From ce2c7d72e2d06988b5ddec6811ab923254542077 Mon Sep 17 00:00:00 2001 From: Georgi Gerganov Date: Sun, 18 Jun 2023 09:09:47 +0300 Subject: metal : handle buffers larger than device's maxBufferLength (#1826) * metal : handle buffers larger than device's maxBufferLength * metal : print more verbose device info + handle errors * metal : fix prints for overlapping views * metal : minimize view overlap to try to utilize device memory better --- ggml.c | 24 ++++++++++++++++++++++-- 1 file changed, 22 insertions(+), 2 deletions(-) (limited to 'ggml.c') diff --git a/ggml.c b/ggml.c index 0eda7f3..78c3653 100644 --- a/ggml.c +++ b/ggml.c @@ -4154,14 +4154,34 @@ void ggml_set_no_alloc(struct ggml_context * ctx, bool no_alloc) { ctx->no_alloc = no_alloc; } -void * ggml_get_mem_buffer(struct ggml_context * ctx) { +void * ggml_get_mem_buffer(const struct ggml_context * ctx) { return ctx->mem_buffer; } -size_t ggml_get_mem_size(struct ggml_context * ctx) { +size_t ggml_get_mem_size(const struct ggml_context * ctx) { return ctx->mem_size; } +size_t ggml_get_max_tensor_size(const struct ggml_context * ctx) { + size_t max_size = 0; + + struct ggml_object * obj = ctx->objects_begin; + + while (obj != NULL) { + struct ggml_tensor * tensor = (struct ggml_tensor *) ((char *) ctx->mem_buffer + obj->offs); + + const size_t size = ggml_nbytes(tensor); + + if (max_size < size) { + max_size = size; + } + + obj = obj->next; + } + + return max_size; +} + // IMPORTANT: // when creating "opt" tensors, always save and load the scratch buffer // this is an error prone process, but it is necessary to support inplace -- cgit v1.2.3