metal : handle buffers larger than device's maxBufferLength (#1826)

* metal : handle buffers larger than device's maxBufferLength * metal : print more verbose device info + handle errors * metal : fix prints for overlapping views * metal : minimize view overlap to try to utilize device memory better
author: Georgi Gerganov <ggerganov@gmail.com> 2023-06-18 09:09:47 +0300
committer: GitHub <noreply@github.com> 2023-06-18 09:09:47 +0300
commit: ce2c7d72e2d06988b5ddec6811ab923254542077 (patch)
tree: b93a7ec0766344197763c657df8bb7fdd4894077 /ggml-metal.h
parent: 57cd69460f736031a3fc54af1e97c03f80128478 (diff)
1 files changed, 4 insertions, 1 deletions
diff --git a/ggml-metal.h b/ggml-metal.h
index 033c4d8..b9e50ac 100644
--- a/ggml-metal.h
+++ b/ggml-metal.h
@@ -41,12 +41,15 @@ void ggml_metal_free(struct ggml_metal_context * ctx);
 // - make sure to map all buffers used in the graph before calling ggml_metal_graph_compute
 // - the mapping is used during computation to determine the arguments of the compute kernels
 // - you don't need to keep the host memory buffer allocated as it is never accessed by Metal
+// - max_size specifies the maximum size of a tensor and is used to create shared views such
+//   that it is guaranteed that the tensor will fit in at least one of the views
 //
 bool ggml_metal_add_buffer(
         struct ggml_metal_context * ctx,
                        const char * name,
                              void * data,
-                           size_t   size);
+                           size_t   size,
+                           size_t   max_size);
 
 // set data from host memory into the device
 void ggml_metal_set_tensor(struct ggml_metal_context * ctx, struct ggml_tensor * t);
author	Georgi Gerganov <ggerganov@gmail.com>	2023-06-18 09:09:47 +0300
committer	GitHub <noreply@github.com>	2023-06-18 09:09:47 +0300
commit	ce2c7d72e2d06988b5ddec6811ab923254542077 (patch)
tree	b93a7ec0766344197763c657df8bb7fdd4894077 /ggml-metal.h
parent	57cd69460f736031a3fc54af1e97c03f80128478 (diff)