diff options
Diffstat (limited to 'llama.cpp')
-rw-r--r-- | llama.cpp | 26 |
1 files changed, 16 insertions, 10 deletions
@@ -2696,16 +2696,21 @@ struct llama_context * llama_init_from_file( // this allocates all Metal resources and memory buffers ctx->ctx_metal = ggml_metal_init(); - void *data_ptr = NULL; + void * data_ptr = NULL; size_t data_size = 0; + if (params.use_mmap) { - data_ptr = ctx->model.mapping->addr; - data_size= ctx->model.mapping->size; + data_ptr = ctx->model.mapping->addr; + data_size = ctx->model.mapping->size; } else { - data_ptr = ggml_get_mem_buffer(ctx->model.ctx); - data_size= ggml_get_mem_size(ctx->model.ctx); + data_ptr = ggml_get_mem_buffer(ctx->model.ctx); + data_size = ggml_get_mem_size (ctx->model.ctx); } + const size_t max_size = ggml_get_max_tensor_size(ctx->model.ctx); + + printf("%s: max tensor size = %8.2f MB\n", __func__, max_size/1024.0/1024.0); + #define LLAMA_METAL_CHECK_BUF(result) \ if (!(result)) { \ fprintf(stderr, "%s: failed to add buffer\n", __func__); \ @@ -2713,12 +2718,13 @@ struct llama_context * llama_init_from_file( return NULL; \ } - LLAMA_METAL_CHECK_BUF(ggml_metal_add_buffer(ctx->ctx_metal, "data", data_ptr, data_size)); - LLAMA_METAL_CHECK_BUF(ggml_metal_add_buffer(ctx->ctx_metal, "eval", ctx->buf_compute.addr, ctx->buf_compute.size)); + LLAMA_METAL_CHECK_BUF(ggml_metal_add_buffer(ctx->ctx_metal, "data", data_ptr, data_size, max_size)); + + LLAMA_METAL_CHECK_BUF(ggml_metal_add_buffer(ctx->ctx_metal, "eval", ctx->buf_compute.addr, ctx->buf_compute.size, 0)); + LLAMA_METAL_CHECK_BUF(ggml_metal_add_buffer(ctx->ctx_metal, "kv", ctx->model.kv_self.buf.addr, ctx->model.kv_self.buf.size, 0)); - LLAMA_METAL_CHECK_BUF(ggml_metal_add_buffer(ctx->ctx_metal, "kv", ctx->model.kv_self.buf.addr, ctx->model.kv_self.buf.size)); - LLAMA_METAL_CHECK_BUF(ggml_metal_add_buffer(ctx->ctx_metal, "scr0", ctx->buf_scratch[0].addr, ctx->buf_scratch[0].size)); - LLAMA_METAL_CHECK_BUF(ggml_metal_add_buffer(ctx->ctx_metal, "scr1", ctx->buf_scratch[1].addr, ctx->buf_scratch[1].size)); + LLAMA_METAL_CHECK_BUF(ggml_metal_add_buffer(ctx->ctx_metal, "scr0", ctx->buf_scratch[0].addr, ctx->buf_scratch[0].size, 0)); + LLAMA_METAL_CHECK_BUF(ggml_metal_add_buffer(ctx->ctx_metal, "scr1", ctx->buf_scratch[1].addr, ctx->buf_scratch[1].size, 0)); #undef LLAMA_METAL_CHECK_BUF } #endif |