ggml : change ggml_graph_compute() API to not require context (#1999)

* ggml_graph_compute: deprecate using ggml_context, try resolve issue #287 * rewrite: no longer consider backward compitability; plan and make_plan * minor: rename ctx as plan; const * remove ggml_graph_compute from tests/test-grad0.c, but current change breaks backward * add static ggml_graph_compute_sugar() * minor: update comments * reusable buffers * ggml : more consistent naming + metal fixes * ggml : fix docs * tests : disable grad / opt + minor naming changes * ggml : add ggml_graph_compute_with_ctx() - backwards compatible API - deduplicates a lot of copy-paste * ci : enable test-grad0 * examples : factor out plan allocation into a helper function * llama : factor out plan stuff into a helper function * ci : fix env * llama : fix duplicate symbols + refactor example benchmark * ggml : remove obsolete assert + refactor n_tasks section * ggml : fix indentation in switch * llama : avoid unnecessary bool * ggml : remove comments from source file and match order in header --------- Co-authored-by: Georgi Gerganov <ggerganov@gmail.com>
author: Qingyou Meng <meng.qingyou@gmail.com> 2023-07-08 00:24:01 +0800
committer: GitHub <noreply@github.com> 2023-07-07 19:24:01 +0300
commit: 1d656d6360359cfdaaf5d64ed9690047b600dbcb (patch)
tree: ea41daf563633ab0552f24fd0bacce51833e04eb /ggml-metal.m
parent: 72421402834141df6cbdcf595fe46dbd11874dce (diff)
1 files changed, 9 insertions, 2 deletions
diff --git a/ggml-metal.m b/ggml-metal.m
index fd69c41..3f15f79 100644
--- a/ggml-metal.m
+++ b/ggml-metal.m
@@ -25,6 +25,8 @@ struct ggml_metal_buffer {
 };
 
 struct ggml_metal_context {
+    int n_cb;
+
     float * logits;
 
     id<MTLDevice>       device;
@@ -86,11 +88,12 @@ static NSString * const msl_library_source = @"see metal.metal";
 @implementation GGMLMetalClass
 @end
 
-struct ggml_metal_context * ggml_metal_init(void) {
+struct ggml_metal_context * ggml_metal_init(int n_cb) {
     fprintf(stderr, "%s: allocating\n", __func__);
 
     struct ggml_metal_context * ctx = malloc(sizeof(struct ggml_metal_context));
 
+    ctx->n_cb   = n_cb;
     ctx->device = MTLCreateSystemDefaultDevice();
     ctx->queue  = [ctx->device newCommandQueue];
     ctx->n_buffers = 0;
@@ -208,6 +211,10 @@ void ggml_metal_free(struct ggml_metal_context * ctx) {
     free(ctx);
 }
 
+void ggml_metal_set_n_cb(struct ggml_metal_context * ctx, int n_cb) {
+    ctx->n_cb = n_cb;
+}
+
 // finds the Metal buffer that contains the tensor data on the GPU device
 // the assumption is that there is 1-to-1 mapping between the host and device memory buffers, so we can find the
 // Metal buffer based on the host memory pointer
@@ -354,7 +361,7 @@ void ggml_metal_graph_compute(
     // create multiple command buffers and enqueue them
     // then, we encode the graph into the command buffers in parallel
 
-    const int n_cb = gf->n_threads;
+    const int n_cb = ctx->n_cb;
 
     NSMutableArray * command_buffers = [NSMutableArray arrayWithCapacity:n_cb];
author	Qingyou Meng <meng.qingyou@gmail.com>	2023-07-08 00:24:01 +0800
committer	GitHub <noreply@github.com>	2023-07-07 19:24:01 +0300
commit	1d656d6360359cfdaaf5d64ed9690047b600dbcb (patch)
tree	ea41daf563633ab0552f24fd0bacce51833e04eb /ggml-metal.m
parent	72421402834141df6cbdcf595fe46dbd11874dce (diff)