aboutsummaryrefslogtreecommitdiff
path: root/llama.cpp
diff options
context:
space:
mode:
Diffstat (limited to 'llama.cpp')
-rw-r--r--llama.cpp26
1 files changed, 16 insertions, 10 deletions
diff --git a/llama.cpp b/llama.cpp
index a2916b3..c165d32 100644
--- a/llama.cpp
+++ b/llama.cpp
@@ -2696,16 +2696,21 @@ struct llama_context * llama_init_from_file(
// this allocates all Metal resources and memory buffers
ctx->ctx_metal = ggml_metal_init();
- void *data_ptr = NULL;
+ void * data_ptr = NULL;
size_t data_size = 0;
+
if (params.use_mmap) {
- data_ptr = ctx->model.mapping->addr;
- data_size= ctx->model.mapping->size;
+ data_ptr = ctx->model.mapping->addr;
+ data_size = ctx->model.mapping->size;
} else {
- data_ptr = ggml_get_mem_buffer(ctx->model.ctx);
- data_size= ggml_get_mem_size(ctx->model.ctx);
+ data_ptr = ggml_get_mem_buffer(ctx->model.ctx);
+ data_size = ggml_get_mem_size (ctx->model.ctx);
}
+ const size_t max_size = ggml_get_max_tensor_size(ctx->model.ctx);
+
+ printf("%s: max tensor size = %8.2f MB\n", __func__, max_size/1024.0/1024.0);
+
#define LLAMA_METAL_CHECK_BUF(result) \
if (!(result)) { \
fprintf(stderr, "%s: failed to add buffer\n", __func__); \
@@ -2713,12 +2718,13 @@ struct llama_context * llama_init_from_file(
return NULL; \
}
- LLAMA_METAL_CHECK_BUF(ggml_metal_add_buffer(ctx->ctx_metal, "data", data_ptr, data_size));
- LLAMA_METAL_CHECK_BUF(ggml_metal_add_buffer(ctx->ctx_metal, "eval", ctx->buf_compute.addr, ctx->buf_compute.size));
+ LLAMA_METAL_CHECK_BUF(ggml_metal_add_buffer(ctx->ctx_metal, "data", data_ptr, data_size, max_size));
+
+ LLAMA_METAL_CHECK_BUF(ggml_metal_add_buffer(ctx->ctx_metal, "eval", ctx->buf_compute.addr, ctx->buf_compute.size, 0));
+ LLAMA_METAL_CHECK_BUF(ggml_metal_add_buffer(ctx->ctx_metal, "kv", ctx->model.kv_self.buf.addr, ctx->model.kv_self.buf.size, 0));
- LLAMA_METAL_CHECK_BUF(ggml_metal_add_buffer(ctx->ctx_metal, "kv", ctx->model.kv_self.buf.addr, ctx->model.kv_self.buf.size));
- LLAMA_METAL_CHECK_BUF(ggml_metal_add_buffer(ctx->ctx_metal, "scr0", ctx->buf_scratch[0].addr, ctx->buf_scratch[0].size));
- LLAMA_METAL_CHECK_BUF(ggml_metal_add_buffer(ctx->ctx_metal, "scr1", ctx->buf_scratch[1].addr, ctx->buf_scratch[1].size));
+ LLAMA_METAL_CHECK_BUF(ggml_metal_add_buffer(ctx->ctx_metal, "scr0", ctx->buf_scratch[0].addr, ctx->buf_scratch[0].size, 0));
+ LLAMA_METAL_CHECK_BUF(ggml_metal_add_buffer(ctx->ctx_metal, "scr1", ctx->buf_scratch[1].addr, ctx->buf_scratch[1].size, 0));
#undef LLAMA_METAL_CHECK_BUF
}
#endif