aboutsummaryrefslogtreecommitdiff
path: root/ggml.c
diff options
context:
space:
mode:
Diffstat (limited to 'ggml.c')
-rw-r--r--ggml.c482
1 files changed, 481 insertions, 1 deletions
diff --git a/ggml.c b/ggml.c
index b4570eb..4cd0d72 100644
--- a/ggml.c
+++ b/ggml.c
@@ -4030,6 +4030,14 @@ void ggml_set_no_alloc(struct ggml_context * ctx, bool no_alloc) {
ctx->no_alloc = no_alloc;
}
+void * ggml_get_mem_buffer(struct ggml_context * ctx) {
+ return ctx->mem_buffer;
+}
+
+size_t ggml_get_mem_size(struct ggml_context * ctx) {
+ return ctx->mem_size;
+}
+
// IMPORTANT:
// when creating "opt" tensors, always save and load the scratch buffer
// this is an error prone process, but it is necessary to support inplace
@@ -4523,6 +4531,23 @@ struct ggml_tensor * ggml_view_tensor(
return result;
}
+struct ggml_tensor * ggml_get_tensor(struct ggml_context * ctx, const char * name) {
+ struct ggml_object * obj = ctx->objects_begin;
+
+ char * const mem_buffer = ctx->mem_buffer;
+
+ while (obj != NULL) {
+ struct ggml_tensor * cur = (struct ggml_tensor *)(mem_buffer + obj->offs);
+ if (strcmp(cur->name, name) == 0) {
+ return cur;
+ }
+
+ obj = obj->next;
+ }
+
+ return NULL;
+}
+
////////////////////////////////////////////////////////////////////////////////
// ggml_dup
@@ -14532,7 +14557,7 @@ void ggml_graph_reset(struct ggml_cgraph * cgraph) {
}
}
-struct ggml_tensor * ggml_get_tensor_by_name(struct ggml_cgraph * cgraph, const char * name) {
+struct ggml_tensor * ggml_graph_get_tensor(struct ggml_cgraph * cgraph, const char * name) {
for (int i = 0; i < cgraph->n_leafs; i++) {
struct ggml_tensor * leaf = cgraph->leafs[i];
@@ -14552,6 +14577,461 @@ struct ggml_tensor * ggml_get_tensor_by_name(struct ggml_cgraph * cgraph, const
return NULL;
}
+static void ggml_graph_export_leaf(const struct ggml_tensor * tensor, FILE * fout) {
+ const int64_t * ne = tensor->ne;
+ const size_t * nb = tensor->nb;
+
+ fprintf(fout, "%-6s %-12s %8d %8lld %8lld %8lld %8lld %16zu %16zu %16zu %16zu %16p %16s\n",
+ ggml_type_name(tensor->type),
+ ggml_op_name (tensor->op),
+ tensor->n_dims,
+ ne[0], ne[1], ne[2], ne[3],
+ nb[0], nb[1], nb[2], nb[3],
+ tensor->data,
+ tensor->name);
+}
+
+static void ggml_graph_export_node(const struct ggml_tensor * tensor, const char * arg, FILE * fout) {
+ const int64_t * ne = tensor->ne;
+ const size_t * nb = tensor->nb;
+
+ fprintf(fout, "%-6s %-6s %-12s %8d %8lld %8lld %8lld %8lld %16zu %16zu %16zu %16zu %8d %16p %16s\n",
+ arg,
+ ggml_type_name(tensor->type),
+ ggml_op_name (tensor->op),
+ tensor->n_dims,
+ ne[0], ne[1], ne[2], ne[3],
+ nb[0], nb[1], nb[2], nb[3],
+ tensor->n_tasks,
+ tensor->data,
+ tensor->name);
+}
+
+void ggml_graph_export(const struct ggml_cgraph * cgraph, const char * fname) {
+ assert(cgraph->work == NULL);
+ assert(cgraph->work_size == 0);
+
+ uint64_t size_eval = 0;
+
+ // compute size of intermediate results
+ // TODO: does not take into account scratch buffers !!!!
+ for (int i = 0; i < cgraph->n_nodes; ++i) {
+ size_eval += ggml_nbytes(cgraph->nodes[i]);
+ }
+
+ // print
+ {
+ FILE * fout = stdout;
+
+ fprintf(fout, "\n");
+ fprintf(fout, "%-16s %8x\n", "magic", GGML_FILE_MAGIC);
+ fprintf(fout, "%-16s %8d\n", "version", GGML_FILE_VERSION);
+ fprintf(fout, "%-16s %8d\n", "leafs", cgraph->n_leafs);
+ fprintf(fout, "%-16s %8d\n", "nodes", cgraph->n_nodes);
+ fprintf(fout, "%-16s %8llu\n", "eval", size_eval);
+
+ // header
+ fprintf(fout, "\n");
+ fprintf(fout, "%-6s %-12s %8s %8s %8s %8s %8s %16s %16s %16s %16s %16s %16s\n",
+ "TYPE", "OP", "NDIMS", "NE0", "NE1", "NE2", "NE3", "NB0", "NB1", "NB2", "NB3", "DATA", "NAME");
+
+ for (int i = 0; i < cgraph->n_leafs; ++i) {
+ ggml_graph_export_leaf(cgraph->leafs[i], fout);
+
+ GGML_ASSERT(cgraph->leafs[i]->op == GGML_OP_NONE);
+ GGML_ASSERT(cgraph->leafs[i]->src0 == NULL);
+ GGML_ASSERT(cgraph->leafs[i]->src1 == NULL);
+ }
+
+ // header
+ fprintf(fout, "\n");
+ fprintf(fout, "%-6s %-6s %-12s %8s %8s %8s %8s %8s %16s %16s %16s %16s %8s %16s %16s\n",
+ "ARG", "TYPE", "OP", "NDIMS", "NE0", "NE1", "NE2", "NE3", "NB0", "NB1", "NB2", "NB3", "NTASKS", "DATA", "NAME");
+
+ for (int i = 0; i < cgraph->n_nodes; ++i) {
+ ggml_graph_export_node(cgraph->nodes[i], "DST", fout);
+
+ if (cgraph->nodes[i]->src0) {
+ ggml_graph_export_node(cgraph->nodes[i]->src0, "SRC0", fout);
+ }
+
+ if (cgraph->nodes[i]->src1) {
+ ggml_graph_export_node(cgraph->nodes[i]->src1, "SRC1", fout);
+ }
+
+ for (int j = 0; j < GGML_MAX_OPT; ++j) {
+ if (cgraph->nodes[i]->opt[j]) {
+ ggml_graph_export_node(cgraph->nodes[i]->opt[j], "OPT", fout);
+ }
+ }
+
+ fprintf(fout, "\n");
+ }
+
+ fprintf(fout, "\n");
+ }
+
+ // write binary data
+ {
+ FILE * fout = fopen(fname, "wb");
+
+ if (!fout) {
+ fprintf(stderr, "%s: failed to open %s\n", __func__, fname);
+ return;
+ }
+
+ // header
+ {
+ const uint32_t magic = GGML_FILE_MAGIC;
+ const uint32_t version = GGML_FILE_VERSION;
+ const uint32_t n_leafs = cgraph->n_leafs;
+ const uint32_t nodes = cgraph->n_nodes;
+
+ fwrite(&magic, sizeof(uint32_t), 1, fout);
+ fwrite(&version, sizeof(uint32_t), 1, fout);
+ fwrite(&n_leafs, sizeof(uint32_t), 1, fout);
+ fwrite(&nodes, sizeof(uint32_t), 1, fout);
+ fwrite(&size_eval, sizeof(uint64_t), 1, fout);
+ }
+
+ // leafs
+ {
+ for (int i = 0; i < cgraph->n_leafs; ++i) {
+ const struct ggml_tensor * tensor = cgraph->leafs[i];
+
+ const uint32_t type = tensor->type;
+ const uint32_t op = tensor->op;
+ const uint32_t n_dims = tensor->n_dims;
+
+ fwrite(&type, sizeof(uint32_t), 1, fout);
+ fwrite(&op, sizeof(uint32_t), 1, fout);
+ fwrite(&n_dims, sizeof(uint32_t), 1, fout);
+
+ for (int j = 0; j < GGML_MAX_DIMS; ++j) {
+ const uint64_t ne = tensor->ne[j];
+ const uint64_t nb = tensor->nb[j];
+
+ fwrite(&ne, sizeof(uint64_t), 1, fout);
+ fwrite(&nb, sizeof(uint64_t), 1, fout);
+ }
+
+ // store the pointer address
+ {
+ const uint64_t ptr = (uint64_t) tensor->data;
+
+ fwrite(&ptr, sizeof(uint64_t), 1, fout);
+ }
+
+ fwrite(tensor->name, sizeof(char), GGML_MAX_NAME, fout);
+
+ // dump the data
+ // TODO: pad this to 32 byte boundary
+ {
+ const size_t size = ggml_nbytes(tensor);
+
+ fwrite(tensor->data, sizeof(char), size, fout);
+ }
+ }
+ }
+
+ // nodes
+ {
+ for (int i = 0; i < cgraph->n_nodes; ++i) {
+ const struct ggml_tensor * tensor = cgraph->nodes[i];
+
+ const uint32_t type = tensor->type;
+ const uint32_t op = tensor->op;
+ const uint32_t n_dims = tensor->n_dims;
+
+ fwrite(&type, sizeof(uint32_t), 1, fout);
+ fwrite(&op, sizeof(uint32_t), 1, fout);
+ fwrite(&n_dims, sizeof(uint32_t), 1, fout);
+
+ for (int j = 0; j < GGML_MAX_DIMS; ++j) {
+ const uint64_t ne = tensor->ne[j];
+ const uint64_t nb = tensor->nb[j];
+
+ fwrite(&ne, sizeof(uint64_t), 1, fout);
+ fwrite(&nb, sizeof(uint64_t), 1, fout);
+ }
+
+ // store the pointer address
+ {
+ const uint64_t ptr = (uint64_t) tensor->data;
+
+ fwrite(&ptr, sizeof(uint64_t), 1, fout);
+ }
+
+ fwrite(tensor->name, sizeof(char), GGML_MAX_NAME, fout);
+
+ // output the op arguments
+ {
+ struct ggml_tensor * args[2 + GGML_MAX_OPT] = { NULL };
+
+ args[0] = tensor->src0;
+ args[1] = tensor->src1;
+
+ for (int j = 0; j < GGML_MAX_OPT; ++j) {
+ args[2 + j] = tensor->opt[j];
+ }
+
+ for (int j = 0; j < 2 + GGML_MAX_OPT; ++j) {
+ if (args[j]) {
+ int32_t idx = -1;
+
+ // check if leaf
+ {
+ for (int k = 0; k < cgraph->n_leafs; ++k) {
+ if (args[j] == cgraph->leafs[k]) {
+ idx = k;
+ break;
+ }
+ }
+ }
+
+ // check if node
+ if (idx == -1) {
+ for (int k = 0; k < cgraph->n_nodes; ++k) {
+ if (args[j] == cgraph->nodes[k]) {
+ idx = GGML_MAX_NODES + k;
+ break;
+ }
+ }
+ }
+
+ if (idx == -1) {
+ fprintf(stderr, "%s: failed to find tensor, arg = %d, node = %d\n", __func__, j, i);
+ return;
+ }
+
+ fwrite(&idx, sizeof(int32_t), 1, fout);
+ } else {
+ const int32_t nul = -1;
+
+ fwrite(&nul, sizeof(int32_t), 1, fout);
+ }
+ }
+ }
+ }
+ }
+
+ fclose(fout);
+ }
+}
+
+struct ggml_cgraph ggml_graph_import(const char * fname, struct ggml_context ** ctx_data, struct ggml_context ** ctx_eval) {
+ assert(*ctx_data == NULL);
+ assert(*ctx_eval == NULL);
+
+ struct ggml_cgraph result = { 0 };
+
+ struct ggml_tensor * data = NULL;
+
+ // read file into data
+ {
+ FILE * fin = fopen(fname, "rb");
+
+ if (!fin) {
+ fprintf(stderr, "%s: failed to open %s\n", __func__, fname);
+ return result;
+ }
+
+ size_t fsize = 0;
+
+ fseek(fin, 0, SEEK_END);
+ fsize = ftell(fin);
+ fseek(fin, 0, SEEK_SET);
+
+ // create the data context
+ {
+ const size_t overhead = 1*ggml_tensor_overhead();
+
+ struct ggml_init_params params = {
+ .mem_size = fsize + overhead,
+ .mem_buffer = NULL,
+ .no_alloc = false,
+ };
+
+ *ctx_data = ggml_init(params);
+
+ if (!*ctx_data) {
+ fprintf(stderr, "%s: failed to create ggml context\n", __func__);
+ return result;
+ }
+ }
+
+ data = ggml_new_tensor_1d(*ctx_data, GGML_TYPE_I8, fsize);
+
+ fread(data->data, sizeof(char), fsize, fin);
+
+ fclose(fin);
+ }
+
+ // populate result
+ {
+ char * ptr = (char *) data->data;
+
+ const uint32_t magic = *(const uint32_t *) ptr; ptr += sizeof(magic);
+
+ if (magic != GGML_FILE_MAGIC) {
+ fprintf(stderr, "%s: invalid magic number, got %08x\n", __func__, magic);
+ return result;
+ }
+
+ const uint32_t version = *(const uint32_t *) ptr; ptr += sizeof(version);
+
+ if (version != GGML_FILE_VERSION) {
+ fprintf(stderr, "%s: invalid version number\n", __func__);
+ return result;
+ }
+
+ const uint32_t n_leafs = *(const uint32_t *) ptr; ptr += sizeof(n_leafs);
+ const uint32_t n_nodes = *(const uint32_t *) ptr; ptr += sizeof(n_nodes);
+ const uint64_t size_eval = *(const uint64_t *) ptr; ptr += sizeof(size_eval);
+
+ result.n_leafs = n_leafs;
+ result.n_nodes = n_nodes;
+
+ // create the data context
+ {
+ const size_t overhead = (n_leafs + n_nodes)*ggml_tensor_overhead();
+
+ struct ggml_init_params params = {
+ .mem_size = size_eval + overhead,
+ .mem_buffer = NULL,
+ .no_alloc = true,
+ };
+
+ *ctx_eval = ggml_init(params);
+
+ if (!*ctx_eval) {
+ fprintf(stderr, "%s: failed to create ggml context\n", __func__);
+ return result;
+ }
+ }
+
+ // leafs
+ {
+ uint32_t type;
+ uint32_t op;
+ uint32_t n_dims;
+
+ for (uint32_t i = 0; i < n_leafs; ++i) {
+ type = *(const uint32_t *) ptr; ptr += sizeof(type);
+ op = *(const uint32_t *) ptr; ptr += sizeof(op);
+ n_dims = *(const uint32_t *) ptr; ptr += sizeof(n_dims);
+
+ int64_t ne[GGML_MAX_DIMS];
+ size_t nb[GGML_MAX_DIMS];
+
+ for (int j = 0; j < GGML_MAX_DIMS; ++j) {
+ uint64_t ne_cur;
+ uint64_t nb_cur;
+
+ ne_cur = *(const uint64_t *) ptr; ptr += sizeof(ne_cur);
+ nb_cur = *(const uint64_t *) ptr; ptr += sizeof(nb_cur);
+
+ ne[j] = ne_cur;
+ nb[j] = nb_cur;
+ }
+
+ struct ggml_tensor * tensor = ggml_new_tensor(*ctx_eval, (enum ggml_type) type, n_dims, ne);
+
+ tensor->op = (enum ggml_op) op;
+
+ uint64_t ptr_cur = *(const uint64_t *) ptr; ptr += sizeof(ptr_cur);
+
+ memcpy(tensor->name, ptr, GGML_MAX_NAME); ptr += GGML_MAX_NAME;
+
+ tensor->data = (void *) ptr;
+
+ for (int j = 0; j < GGML_MAX_DIMS; ++j) {
+ tensor->nb[j] = nb[j];
+ }
+
+ result.leafs[i] = tensor;
+
+ ptr += ggml_nbytes(tensor);
+
+ fprintf(stderr, "%s: loaded leaf %d: '%16s', %3d dims, %9zu bytes\n", __func__, i, tensor->name, n_dims, ggml_nbytes(tensor));
+ }
+ }
+
+ ggml_set_no_alloc(*ctx_eval, false);
+
+ // nodes
+ {
+ uint32_t type;
+ uint32_t op;
+ uint32_t n_dims;
+
+ for (uint32_t i = 0; i < n_nodes; ++i) {
+ type = *(const uint32_t *) ptr; ptr += sizeof(type);
+ op = *(const uint32_t *) ptr; ptr += sizeof(op);
+ n_dims = *(const uint32_t *) ptr; ptr += sizeof(n_dims);
+
+ int64_t ne[GGML_MAX_DIMS];
+ size_t nb[GGML_MAX_DIMS];
+
+ for (int j = 0; j < GGML_MAX_DIMS; ++j) {
+ uint64_t ne_cur;
+ uint64_t nb_cur;
+
+ ne_cur = *(const uint64_t *) ptr; ptr += sizeof(ne_cur);
+ nb_cur = *(const uint64_t *) ptr; ptr += sizeof(nb_cur);
+
+ ne[j] = ne_cur;
+ nb[j] = nb_cur;
+ }
+
+ struct ggml_tensor * tensor = ggml_new_tensor(*ctx_eval, (enum ggml_type) type, n_dims, ne);
+
+ tensor->op = (enum ggml_op) op;
+
+ uint64_t ptr_cur = *(const uint64_t *) ptr; ptr += sizeof(ptr_cur);
+
+ memcpy(tensor->name, ptr, GGML_MAX_NAME); ptr += GGML_MAX_NAME;
+
+ for (int j = 0; j < GGML_MAX_DIMS; ++j) {
+ tensor->nb[j] = nb[j];
+ }
+
+ // parse args
+ {
+ struct ggml_tensor ** args[2 + GGML_MAX_OPT] = {
+ &tensor->src0,
+ &tensor->src1,
+ };
+
+ for (int j = 0; j < GGML_MAX_OPT; ++j) {
+ args[2 + j] = &tensor->opt[j];
+ }
+
+ for (int j = 0; j < 2 + GGML_MAX_OPT; ++j) {
+ const int32_t arg_idx = *(const int32_t *) ptr; ptr += sizeof(arg_idx);
+
+ if (arg_idx == -1) {
+ continue;
+ }
+
+ if (arg_idx < GGML_MAX_NODES) {
+ *args[j] = result.leafs[arg_idx];
+ } else {
+ *args[j] = result.nodes[arg_idx - GGML_MAX_NODES];
+ }
+ }
+ }
+
+ result.nodes[i] = tensor;
+
+ fprintf(stderr, "%s: loaded node %d: '%16s', %3d dims, %9zu bytes\n", __func__, i, tensor->name, n_dims, ggml_nbytes(tensor));
+ }
+ }
+ }
+
+ return result;
+}
+
void ggml_graph_print(const struct ggml_cgraph * cgraph) {
int64_t perf_total_per_op_us[GGML_OP_COUNT] = {0};