aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorJohannes Gäßler <johannesg@5d6.de>2023-08-07 10:09:40 +0200
committerGitHub <noreply@github.com>2023-08-07 10:09:40 +0200
commit3d9a55181603e85a26378a850a14068034e5002d (patch)
tree7053da3c2538ae03ba6ca3c36b84f7b3252df721
parentf6f9896ac3d2ff207e18f87dab85d126ceef5236 (diff)
Fixed mmap prefetch for GPU offloading (#2529)
-rw-r--r--llama-util.h2
-rw-r--r--llama.cpp6
2 files changed, 4 insertions, 4 deletions
diff --git a/llama-util.h b/llama-util.h
index 3fc03ce..6e9e39d 100644
--- a/llama-util.h
+++ b/llama-util.h
@@ -219,7 +219,7 @@ struct llama_mmap {
// prefetch/readahead impairs performance on NUMA systems
if (numa) { prefetch = 0; }
#ifdef __linux__
- if (prefetch) { flags |= MAP_POPULATE; }
+ if (prefetch >= file->size) { flags |= MAP_POPULATE; }
#endif
addr = mmap(NULL, file->size, PROT_READ, flags, fd, 0);
if (addr == MAP_FAILED) {
diff --git a/llama.cpp b/llama.cpp
index 8397398..39aefd4 100644
--- a/llama.cpp
+++ b/llama.cpp
@@ -747,12 +747,12 @@ struct llama_model_loader {
void load_all_data(llama_progress_callback progress_callback, void * progress_callback_user_data, llama_mlock * lmlock) {
size_t data_size = 0;
- size_t prefetch_size = 0;
+ size_t prefetch_size = file_loader->file.size;
size_t lock_size = 0;
for (const llama_load_tensor & lt : tensors_map.tensors) {
data_size += lt.size;
- if (lt.ggml_tensor->backend == GGML_BACKEND_CPU) {
- prefetch_size += lt.size;
+ if (lt.ggml_tensor->backend != GGML_BACKEND_CPU) {
+ prefetch_size -= lt.size;
}
}