aboutsummaryrefslogtreecommitdiff
path: root/llama.cpp
diff options
context:
space:
mode:
authorslaren <2141330+slaren@users.noreply.github.com>2023-05-01 13:32:22 +0200
committerGitHub <noreply@github.com>2023-05-01 13:32:22 +0200
commitb925f1f1b082319ee69943f8d1a83ac9b6ff09ca (patch)
treecb636a894e6b11918aafce061f3836a24b021e4f /llama.cpp
parent90b19bd6eee943832584f9cac0b6f9ea29cc42a4 (diff)
cuBLAS: fall back to pageable memory if pinned alloc fails (#1233)
* cuBLAS: fall back to pageable memory if pinned alloc fails * cuBLAS: do not use pinned memory if env variable GGML_CUDA_NO_PINNED is set
Diffstat (limited to 'llama.cpp')
-rw-r--r--llama.cpp3
1 files changed, 1 insertions, 2 deletions
diff --git a/llama.cpp b/llama.cpp
index 3d82113..0d094a5 100644
--- a/llama.cpp
+++ b/llama.cpp
@@ -727,8 +727,7 @@ struct llama_model_loader {
LLAMA_ASSERT(offset == lt.size);
} else if (lt.split_type == SPLIT_BY_COLUMNS) {
// Let's load the data into temporary buffers to ensure the OS performs large loads.
- std::vector<llama_buffer> tmp_bufs;
- tmp_bufs.resize(lt.shards.size());
+ std::vector<llama_buffer> tmp_bufs(lt.shards.size());
for (size_t i = 0; i < lt.shards.size(); i++) {
llama_load_tensor_shard & shard = lt.shards.at(i);
llama_file & file = file_loaders.at(shard.file_idx)->file;