diff options
author | slaren <2141330+slaren@users.noreply.github.com> | 2023-05-01 13:32:22 +0200 |
---|---|---|
committer | GitHub <noreply@github.com> | 2023-05-01 13:32:22 +0200 |
commit | b925f1f1b082319ee69943f8d1a83ac9b6ff09ca (patch) | |
tree | cb636a894e6b11918aafce061f3836a24b021e4f /llama-util.h | |
parent | 90b19bd6eee943832584f9cac0b6f9ea29cc42a4 (diff) |
cuBLAS: fall back to pageable memory if pinned alloc fails (#1233)
* cuBLAS: fall back to pageable memory if pinned alloc fails
* cuBLAS: do not use pinned memory if env variable GGML_CUDA_NO_PINNED is set
Diffstat (limited to 'llama-util.h')
-rw-r--r-- | llama-util.h | 42 |
1 files changed, 38 insertions, 4 deletions
diff --git a/llama-util.h b/llama-util.h index ca4dd16..5f9f70e 100644 --- a/llama-util.h +++ b/llama-util.h @@ -395,6 +395,8 @@ struct llama_buffer { uint8_t * addr = NULL; size_t size = 0; + llama_buffer() = default; + void resize(size_t size) { delete[] addr; addr = new uint8_t[size]; @@ -404,27 +406,59 @@ struct llama_buffer { ~llama_buffer() { delete[] addr; } + + // disable copy and move + llama_buffer(const llama_buffer&) = delete; + llama_buffer(llama_buffer&&) = delete; + llama_buffer& operator=(const llama_buffer&) = delete; + llama_buffer& operator=(llama_buffer&&) = delete; }; #ifdef GGML_USE_CUBLAS #include "ggml-cuda.h" struct llama_ctx_buffer { uint8_t * addr = NULL; + bool is_cuda; size_t size = 0; + llama_ctx_buffer() = default; + void resize(size_t size) { + free(); + + addr = (uint8_t *) ggml_cuda_host_malloc(size); if (addr) { - ggml_cuda_host_free(addr); + is_cuda = true; + } + else { + // fall back to pageable memory + addr = new uint8_t[size]; + is_cuda = false; } - addr = (uint8_t *) ggml_cuda_host_malloc(size); this->size = size; } - ~llama_ctx_buffer() { + void free() { if (addr) { - ggml_cuda_host_free(addr); + if (is_cuda) { + ggml_cuda_host_free(addr); + } + else { + delete[] addr; + } } + addr = NULL; } + + ~llama_ctx_buffer() { + free(); + } + + // disable copy and move + llama_ctx_buffer(const llama_ctx_buffer&) = delete; + llama_ctx_buffer(llama_ctx_buffer&&) = delete; + llama_ctx_buffer& operator=(const llama_ctx_buffer&) = delete; + llama_ctx_buffer& operator=(llama_ctx_buffer&&) = delete; }; #else typedef llama_buffer llama_ctx_buffer; |