diff options
author | Johannes Gäßler <johannesg@5d6.de> | 2023-08-07 10:09:40 +0200 |
---|---|---|
committer | GitHub <noreply@github.com> | 2023-08-07 10:09:40 +0200 |
commit | 3d9a55181603e85a26378a850a14068034e5002d (patch) | |
tree | 7053da3c2538ae03ba6ca3c36b84f7b3252df721 | |
parent | f6f9896ac3d2ff207e18f87dab85d126ceef5236 (diff) |
Fixed mmap prefetch for GPU offloading (#2529)
-rw-r--r-- | llama-util.h | 2 | ||||
-rw-r--r-- | llama.cpp | 6 |
2 files changed, 4 insertions, 4 deletions
diff --git a/llama-util.h b/llama-util.h index 3fc03ce..6e9e39d 100644 --- a/llama-util.h +++ b/llama-util.h @@ -219,7 +219,7 @@ struct llama_mmap { // prefetch/readahead impairs performance on NUMA systems if (numa) { prefetch = 0; } #ifdef __linux__ - if (prefetch) { flags |= MAP_POPULATE; } + if (prefetch >= file->size) { flags |= MAP_POPULATE; } #endif addr = mmap(NULL, file->size, PROT_READ, flags, fd, 0); if (addr == MAP_FAILED) { @@ -747,12 +747,12 @@ struct llama_model_loader { void load_all_data(llama_progress_callback progress_callback, void * progress_callback_user_data, llama_mlock * lmlock) { size_t data_size = 0; - size_t prefetch_size = 0; + size_t prefetch_size = file_loader->file.size; size_t lock_size = 0; for (const llama_load_tensor & lt : tensors_map.tensors) { data_size += lt.size; - if (lt.ggml_tensor->backend == GGML_BACKEND_CPU) { - prefetch_size += lt.size; + if (lt.ggml_tensor->backend != GGML_BACKEND_CPU) { + prefetch_size -= lt.size; } } |