aboutsummaryrefslogtreecommitdiff
path: root/llama-util.h
diff options
context:
space:
mode:
authorkiltyj <kiltyj@gmail.com>2023-06-05 13:24:04 -0700
committerGitHub <noreply@github.com>2023-06-05 23:24:04 +0300
commit9d0693bce38013364b1042568d9083353bfff48f (patch)
tree8311cb168defca62e9b2689571c4b641ea7654b9 /llama-util.h
parentefe05076323f5c6bafece109e21cce046f5e4b07 (diff)
metal : use shared buffers between CPU and GPU (#1696)
* Use MTLDevice.newBufferWithBytesNoCopy to share buffers between CPU and GPU * Page-align buffers used by Metal * Remove trailing whitespace * Only import unistd.h for Metal builds * metal : remove unnecessary copies --------- Co-authored-by: Georgi Gerganov <ggerganov@gmail.com>
Diffstat (limited to 'llama-util.h')
-rw-r--r--llama-util.h16
1 files changed, 16 insertions, 0 deletions
diff --git a/llama-util.h b/llama-util.h
index 3cac9f6..4f8a429 100644
--- a/llama-util.h
+++ b/llama-util.h
@@ -405,13 +405,29 @@ struct llama_buffer {
llama_buffer() = default;
void resize(size_t len) {
+#ifdef GGML_USE_METAL
+ free(addr);
+ int result = posix_memalign((void **) &addr, getpagesize(), len);
+ if (result == 0) {
+ memset(addr, 0, len);
+ }
+ else {
+ addr = NULL;
+ }
+#else
delete[] addr;
addr = new uint8_t[len];
+#endif
size = len;
}
~llama_buffer() {
+#ifdef GGML_USE_METAL
+ free(addr);
+#else
delete[] addr;
+#endif
+ addr = NULL;
}
// disable copy and move