diff options
author | Spencer Sutton <spencersutton@users.noreply.github.com> | 2023-06-05 23:28:17 -0400 |
---|---|---|
committer | GitHub <noreply@github.com> | 2023-06-06 06:28:17 +0300 |
commit | 590250f7a9847bc9c83aa063dbaac8fa0fea27c8 (patch) | |
tree | 8ee19cf55a027e846ba83efb183c195d9182f7aa | |
parent | f4c55d3bd7e124b101bc974cbbf0e0dbbc32d5a3 (diff) |
metal : add checks for buffer size (#1706)
Co-authored-by: Spencer Sutton <Spencer.Sutton@precisely.com>
-rw-r--r-- | ggml-metal.m | 5 | ||||
-rw-r--r-- | llama.cpp | 27 |
2 files changed, 25 insertions, 7 deletions
diff --git a/ggml-metal.m b/ggml-metal.m index 82c6596..d721ac6 100644 --- a/ggml-metal.m +++ b/ggml-metal.m @@ -204,6 +204,11 @@ bool ggml_metal_add_buffer( ctx->buffers[ctx->n_buffers].name = name; ctx->buffers[ctx->n_buffers].data = data; ctx->buffers[ctx->n_buffers].size = size; + + if (ctx->device.maxBufferLength < aligned_size) { + fprintf(stderr, "%s: buffer '%s' size %zu is larger than buffer maximum of %zu\n", __func__, name, aligned_size, ctx->device.maxBufferLength); + return false; + } ctx->buffers[ctx->n_buffers].metal = [ctx->device newBufferWithBytesNoCopy:data length:aligned_size options:MTLResourceStorageModeShared deallocator:nil]; if (ctx->buffers[ctx->n_buffers].metal == nil) { @@ -2405,17 +2405,30 @@ struct llama_context * llama_init_from_file( // this allocates all Metal resources and memory buffers ctx->ctx_metal = ggml_metal_init(); + void *data_ptr = NULL; + size_t data_size = 0; if (params.use_mmap) { - ggml_metal_add_buffer(ctx->ctx_metal, "data", ctx->model.mapping->addr, ctx->model.mapping->size); - ggml_metal_add_buffer(ctx->ctx_metal, "eval", ctx->buf_compute.addr, ctx->buf_compute.size); + data_ptr = ctx->model.mapping->addr; + data_size= ctx->model.mapping->size; } else { - ggml_metal_add_buffer(ctx->ctx_metal, "data", ggml_get_mem_buffer(ctx->model.ctx), ggml_get_mem_size(ctx->model.ctx)); - ggml_metal_add_buffer(ctx->ctx_metal, "eval", ctx->buf_compute.addr, ctx->buf_compute.size); + data_ptr = ggml_get_mem_buffer(ctx->model.ctx); + data_size= ggml_get_mem_size(ctx->model.ctx); } - ggml_metal_add_buffer(ctx->ctx_metal, "kv", ctx->model.kv_self.buf.addr, ctx->model.kv_self.buf.size); - ggml_metal_add_buffer(ctx->ctx_metal, "scr0", ctx->buf_scratch[0].addr, ctx->buf_scratch[0].size); - ggml_metal_add_buffer(ctx->ctx_metal, "scr1", ctx->buf_scratch[1].addr, ctx->buf_scratch[1].size); +#define LLAMA_METAL_CHECK_BUF(result) \ + if (!(result)) { \ + fprintf(stderr, "%s: failed to add buffer\n", __func__); \ + llama_free(ctx); \ + return NULL; \ + } + + LLAMA_METAL_CHECK_BUF(ggml_metal_add_buffer(ctx->ctx_metal, "data", data_ptr, data_size)); + LLAMA_METAL_CHECK_BUF(ggml_metal_add_buffer(ctx->ctx_metal, "eval", ctx->buf_compute.addr, ctx->buf_compute.size)); + + LLAMA_METAL_CHECK_BUF(ggml_metal_add_buffer(ctx->ctx_metal, "kv", ctx->model.kv_self.buf.addr, ctx->model.kv_self.buf.size)); + LLAMA_METAL_CHECK_BUF(ggml_metal_add_buffer(ctx->ctx_metal, "scr0", ctx->buf_scratch[0].addr, ctx->buf_scratch[0].size)); + LLAMA_METAL_CHECK_BUF(ggml_metal_add_buffer(ctx->ctx_metal, "scr1", ctx->buf_scratch[1].addr, ctx->buf_scratch[1].size)); +#undef LLAMA_METAL_CHECK_BUF } #endif |