From b853d456018b10820686362af41b2f2f75f1eec6 Mon Sep 17 00:00:00 2001 From: zrm Date: Mon, 26 Jun 2023 13:57:59 -0400 Subject: ggml : add NUMA support (#1556) * detect NUMA systems and pin work threads to nodes (linux) * disable mmap prefetch/readahead for NUMA systems * avoid sending finalize op to thread pool if it does nothing * silence robot * fix args * make --numa a param * recommendation that n_nodes evenly divide n_threads did not warrant such aggressive enforcement * lower synchronization overhead * statically allocate * move numa state to g_state * add description for --numa * ggml : minor style changes * ggml : minor style + try fix sanitizer build * llama : allow to initialize backend with NUMA support * llama : avoid ggml include in llama-util.h * ggml : style / formatting * ggml : fix handling of ops with n_threads > n_tasks > 1 * server : utilize numa parameter --------- Co-authored-by: Georgi Gerganov --- llama-util.h | 24 +++++++++++++++++++----- 1 file changed, 19 insertions(+), 5 deletions(-) (limited to 'llama-util.h') diff --git a/llama-util.h b/llama-util.h index 4f8a429..042ebe4 100644 --- a/llama-util.h +++ b/llama-util.h @@ -172,12 +172,14 @@ struct llama_mmap { #ifdef _POSIX_MAPPED_FILES static constexpr bool SUPPORTED = true; - llama_mmap(struct llama_file * file, size_t prefetch = (size_t) -1 /* -1 = max value */) { + llama_mmap(struct llama_file * file, size_t prefetch = (size_t) -1 /* -1 = max value */, bool numa = false) { size = file->size; int fd = fileno(file->fp); int flags = MAP_SHARED; + // prefetch/readahead impairs performance on NUMA systems + if (numa) { prefetch = 0; } #ifdef __linux__ - flags |= MAP_POPULATE; + if (prefetch) { flags |= MAP_POPULATE; } #endif addr = mmap(NULL, file->size, PROT_READ, flags, fd, 0); if (addr == MAP_FAILED) { @@ -191,6 +193,14 @@ struct llama_mmap { strerror(errno)); } } + if (numa) { + // advise the kernel not to use readahead + // (because the next page might not belong on the same node) + if (madvise(addr, file->size, MADV_RANDOM)) { + fprintf(stderr, "warning: madvise(.., MADV_RANDOM) failed: %s\n", + strerror(errno)); + } + } } ~llama_mmap() { @@ -199,7 +209,9 @@ struct llama_mmap { #elif defined(_WIN32) static constexpr bool SUPPORTED = true; - llama_mmap(struct llama_file * file, bool prefetch = true) { + llama_mmap(struct llama_file * file, bool prefetch = true, bool numa = false) { + (void) numa; + size = file->size; HANDLE hFile = (HANDLE) _get_osfhandle(_fileno(file->fp)); @@ -244,8 +256,10 @@ struct llama_mmap { #else static constexpr bool SUPPORTED = false; - llama_mmap(struct llama_file *, bool prefetch = true) { - (void)prefetch; + llama_mmap(struct llama_file *, bool prefetch = true, bool numa = false) { + (void) prefetch; + (void) numa; + throw std::runtime_error(std::string("mmap not supported")); } #endif -- cgit v1.2.3