aboutsummaryrefslogtreecommitdiff
path: root/ggml.h
diff options
context:
space:
mode:
authorslaren <slarengh@gmail.com>2023-07-26 15:56:53 +0200
committerGitHub <noreply@github.com>2023-07-26 15:56:53 +0200
commit5488fb789ea5692268309baa76f67598155060be (patch)
tree0609924b82a47ce97806b9201b75ec49bdf76c31 /ggml.h
parenteb542d39324574a6778fad9ba9e34ba7a14a82a3 (diff)
ggml : allocate graphs in a context (#2392)
* ggml : graph allocation in contexts * allocate work buffer as a ggml_object in ggml_graph_compute_with_ctx * llama.cpp : allocate graph in the context * add GGML_PAD --------- Co-authored-by: Georgi Gerganov <ggerganov@gmail.com>
Diffstat (limited to 'ggml.h')
-rw-r--r--ggml.h21
1 files changed, 19 insertions, 2 deletions
diff --git a/ggml.h b/ggml.h
index c309f13..9919cce 100644
--- a/ggml.h
+++ b/ggml.h
@@ -208,6 +208,7 @@
#define GGML_UNUSED(x) (void)(x)
+#define GGML_PAD(x, n) (((x) + (n) - 1) & ~((n) - 1))
#define GGML_ASSERT(x) \
do { \
@@ -396,6 +397,12 @@ extern "C" {
GGML_UNARY_OP_SILU,
};
+ enum ggml_object_type {
+ GGML_OBJECT_TENSOR,
+ GGML_OBJECT_GRAPH,
+ GGML_OBJECT_WORK_BUFFER
+ };
+
// ggml object
struct ggml_object {
size_t offs;
@@ -403,7 +410,9 @@ extern "C" {
struct ggml_object * next;
- char padding[8];
+ enum ggml_object_type type;
+
+ char padding[4];
};
static const size_t GGML_OBJECT_SIZE = sizeof(struct ggml_object);
@@ -424,7 +433,7 @@ extern "C" {
enum ggml_op op;
// op params - allocated as int32_t for alignment
- int32_t op_params[GGML_MAX_OP_PARAMS / sizeof(uint32_t)];
+ int32_t op_params[GGML_MAX_OP_PARAMS / sizeof(int32_t)];
bool is_param;
@@ -485,6 +494,8 @@ extern "C" {
int64_t perf_time_us;
};
+ static const size_t GGML_GRAPH_SIZE = sizeof(struct ggml_cgraph);
+
// scratch buffer
struct ggml_scratch {
size_t offs;
@@ -1391,11 +1402,17 @@ extern "C" {
struct ggml_context * ctx,
struct ggml_tensor * tensor);
+
GGML_API void ggml_build_forward_expand(struct ggml_cgraph * cgraph, struct ggml_tensor * tensor);
GGML_API struct ggml_cgraph ggml_build_forward (struct ggml_tensor * tensor);
GGML_API struct ggml_cgraph ggml_build_backward(struct ggml_context * ctx, struct ggml_cgraph * gf, bool keep);
+ // graph allocation in a context
+ GGML_API struct ggml_cgraph * ggml_new_graph (struct ggml_context * ctx);
+ GGML_API struct ggml_cgraph * ggml_build_forward_ctx(struct ggml_context * ctx, struct ggml_tensor * tensor);
+ GGML_API size_t ggml_graph_overhead(void);
+
// ggml_graph_plan() has to be called before ggml_graph_compute()
// when plan.work_size > 0, caller must allocate memory for plan.work_data
GGML_API struct ggml_cplan ggml_graph_plan (struct ggml_cgraph * cgraph, int n_threads /*= GGML_DEFAULT_N_THREADS*/);