ggml : allocate graphs in a context (#2392)

* ggml : graph allocation in contexts * allocate work buffer as a ggml_object in ggml_graph_compute_with_ctx * llama.cpp : allocate graph in the context * add GGML_PAD --------- Co-authored-by: Georgi Gerganov <ggerganov@gmail.com>
author: slaren <slarengh@gmail.com> 2023-07-26 15:56:53 +0200
committer: GitHub <noreply@github.com> 2023-07-26 15:56:53 +0200
commit: 5488fb789ea5692268309baa76f67598155060be (patch)
tree: 0609924b82a47ce97806b9201b75ec49bdf76c31 /ggml.h
parent: eb542d39324574a6778fad9ba9e34ba7a14a82a3 (diff)
1 files changed, 19 insertions, 2 deletions
diff --git a/ggml.h b/ggml.h
index c309f13..9919cce 100644
--- a/ggml.h
+++ b/ggml.h
@@ -208,6 +208,7 @@
 
 #define GGML_UNUSED(x) (void)(x)
 
+#define GGML_PAD(x, n) (((x) + (n) - 1) & ~((n) - 1))
 
 #define GGML_ASSERT(x) \
     do { \
@@ -396,6 +397,12 @@ extern "C" {
         GGML_UNARY_OP_SILU,
     };
 
+    enum ggml_object_type {
+        GGML_OBJECT_TENSOR,
+        GGML_OBJECT_GRAPH,
+        GGML_OBJECT_WORK_BUFFER
+    };
+
     // ggml object
     struct ggml_object {
         size_t offs;
@@ -403,7 +410,9 @@ extern "C" {
 
         struct ggml_object * next;
 
-        char padding[8];
+        enum ggml_object_type type;
+
+        char padding[4];
     };
 
     static const size_t GGML_OBJECT_SIZE = sizeof(struct ggml_object);
@@ -424,7 +433,7 @@ extern "C" {
         enum ggml_op op;
 
         // op params - allocated as int32_t for alignment
-        int32_t op_params[GGML_MAX_OP_PARAMS / sizeof(uint32_t)];
+        int32_t op_params[GGML_MAX_OP_PARAMS / sizeof(int32_t)];
 
         bool is_param;
 
@@ -485,6 +494,8 @@ extern "C" {
         int64_t perf_time_us;
     };
 
+    static const size_t GGML_GRAPH_SIZE = sizeof(struct ggml_cgraph);
+
     // scratch buffer
     struct ggml_scratch {
         size_t offs;
@@ -1391,11 +1402,17 @@ extern "C" {
             struct ggml_context * ctx,
             struct ggml_tensor  * tensor);
 
+
     GGML_API void ggml_build_forward_expand(struct ggml_cgraph * cgraph, struct ggml_tensor * tensor);
 
     GGML_API struct ggml_cgraph ggml_build_forward (struct ggml_tensor * tensor);
     GGML_API struct ggml_cgraph ggml_build_backward(struct ggml_context * ctx, struct ggml_cgraph * gf, bool keep);
 
+    // graph allocation in a context
+    GGML_API struct ggml_cgraph * ggml_new_graph        (struct ggml_context * ctx);
+    GGML_API struct ggml_cgraph * ggml_build_forward_ctx(struct ggml_context * ctx, struct ggml_tensor * tensor);
+    GGML_API size_t ggml_graph_overhead(void);
+
     // ggml_graph_plan() has to be called before ggml_graph_compute()
     // when plan.work_size > 0, caller must allocate memory for plan.work_data
     GGML_API struct ggml_cplan ggml_graph_plan   (struct ggml_cgraph * cgraph, int n_threads /*= GGML_DEFAULT_N_THREADS*/);
author	slaren <slarengh@gmail.com>	2023-07-26 15:56:53 +0200
committer	GitHub <noreply@github.com>	2023-07-26 15:56:53 +0200
commit	5488fb789ea5692268309baa76f67598155060be (patch)
tree	0609924b82a47ce97806b9201b75ec49bdf76c31 /ggml.h
parent	eb542d39324574a6778fad9ba9e34ba7a14a82a3 (diff)