diff options
author | Georgi Gerganov <ggerganov@gmail.com> | 2023-06-18 09:09:47 +0300 |
---|---|---|
committer | GitHub <noreply@github.com> | 2023-06-18 09:09:47 +0300 |
commit | ce2c7d72e2d06988b5ddec6811ab923254542077 (patch) | |
tree | b93a7ec0766344197763c657df8bb7fdd4894077 /ggml-metal.h | |
parent | 57cd69460f736031a3fc54af1e97c03f80128478 (diff) |
metal : handle buffers larger than device's maxBufferLength (#1826)
* metal : handle buffers larger than device's maxBufferLength
* metal : print more verbose device info + handle errors
* metal : fix prints for overlapping views
* metal : minimize view overlap to try to utilize device memory better
Diffstat (limited to 'ggml-metal.h')
-rw-r--r-- | ggml-metal.h | 5 |
1 files changed, 4 insertions, 1 deletions
diff --git a/ggml-metal.h b/ggml-metal.h index 033c4d8..b9e50ac 100644 --- a/ggml-metal.h +++ b/ggml-metal.h @@ -41,12 +41,15 @@ void ggml_metal_free(struct ggml_metal_context * ctx); // - make sure to map all buffers used in the graph before calling ggml_metal_graph_compute // - the mapping is used during computation to determine the arguments of the compute kernels // - you don't need to keep the host memory buffer allocated as it is never accessed by Metal +// - max_size specifies the maximum size of a tensor and is used to create shared views such +// that it is guaranteed that the tensor will fit in at least one of the views // bool ggml_metal_add_buffer( struct ggml_metal_context * ctx, const char * name, void * data, - size_t size); + size_t size, + size_t max_size); // set data from host memory into the device void ggml_metal_set_tensor(struct ggml_metal_context * ctx, struct ggml_tensor * t); |