diff options
author | zrm <trustiosity.zrm@gmail.com> | 2023-06-26 13:57:59 -0400 |
---|---|---|
committer | GitHub <noreply@github.com> | 2023-06-26 20:57:59 +0300 |
commit | b853d456018b10820686362af41b2f2f75f1eec6 (patch) | |
tree | 264e68c8555d8509a5ac27f01eed5e6c69940174 /llama-util.h | |
parent | 9225baef71407d799a6f7f563b77fd7f82791416 (diff) |
ggml : add NUMA support (#1556)
* detect NUMA systems and pin work threads to nodes (linux)
* disable mmap prefetch/readahead for NUMA systems
* avoid sending finalize op to thread pool if it does nothing
* silence robot
* fix args
* make --numa a param
* recommendation that n_nodes evenly divide n_threads did not warrant such aggressive enforcement
* lower synchronization overhead
* statically allocate
* move numa state to g_state
* add description for --numa
* ggml : minor style changes
* ggml : minor style + try fix sanitizer build
* llama : allow to initialize backend with NUMA support
* llama : avoid ggml include in llama-util.h
* ggml : style / formatting
* ggml : fix handling of ops with n_threads > n_tasks > 1
* server : utilize numa parameter
---------
Co-authored-by: Georgi Gerganov <ggerganov@gmail.com>
Diffstat (limited to 'llama-util.h')
-rw-r--r-- | llama-util.h | 24 |
1 files changed, 19 insertions, 5 deletions
diff --git a/llama-util.h b/llama-util.h index 4f8a429..042ebe4 100644 --- a/llama-util.h +++ b/llama-util.h @@ -172,12 +172,14 @@ struct llama_mmap { #ifdef _POSIX_MAPPED_FILES static constexpr bool SUPPORTED = true; - llama_mmap(struct llama_file * file, size_t prefetch = (size_t) -1 /* -1 = max value */) { + llama_mmap(struct llama_file * file, size_t prefetch = (size_t) -1 /* -1 = max value */, bool numa = false) { size = file->size; int fd = fileno(file->fp); int flags = MAP_SHARED; + // prefetch/readahead impairs performance on NUMA systems + if (numa) { prefetch = 0; } #ifdef __linux__ - flags |= MAP_POPULATE; + if (prefetch) { flags |= MAP_POPULATE; } #endif addr = mmap(NULL, file->size, PROT_READ, flags, fd, 0); if (addr == MAP_FAILED) { @@ -191,6 +193,14 @@ struct llama_mmap { strerror(errno)); } } + if (numa) { + // advise the kernel not to use readahead + // (because the next page might not belong on the same node) + if (madvise(addr, file->size, MADV_RANDOM)) { + fprintf(stderr, "warning: madvise(.., MADV_RANDOM) failed: %s\n", + strerror(errno)); + } + } } ~llama_mmap() { @@ -199,7 +209,9 @@ struct llama_mmap { #elif defined(_WIN32) static constexpr bool SUPPORTED = true; - llama_mmap(struct llama_file * file, bool prefetch = true) { + llama_mmap(struct llama_file * file, bool prefetch = true, bool numa = false) { + (void) numa; + size = file->size; HANDLE hFile = (HANDLE) _get_osfhandle(_fileno(file->fp)); @@ -244,8 +256,10 @@ struct llama_mmap { #else static constexpr bool SUPPORTED = false; - llama_mmap(struct llama_file *, bool prefetch = true) { - (void)prefetch; + llama_mmap(struct llama_file *, bool prefetch = true, bool numa = false) { + (void) prefetch; + (void) numa; + throw std::runtime_error(std::string("mmap not supported")); } #endif |