aboutsummaryrefslogtreecommitdiff
path: root/llama-util.h
diff options
context:
space:
mode:
authorzrm <trustiosity.zrm@gmail.com>2023-06-26 13:57:59 -0400
committerGitHub <noreply@github.com>2023-06-26 20:57:59 +0300
commitb853d456018b10820686362af41b2f2f75f1eec6 (patch)
tree264e68c8555d8509a5ac27f01eed5e6c69940174 /llama-util.h
parent9225baef71407d799a6f7f563b77fd7f82791416 (diff)
ggml : add NUMA support (#1556)
* detect NUMA systems and pin work threads to nodes (linux) * disable mmap prefetch/readahead for NUMA systems * avoid sending finalize op to thread pool if it does nothing * silence robot * fix args * make --numa a param * recommendation that n_nodes evenly divide n_threads did not warrant such aggressive enforcement * lower synchronization overhead * statically allocate * move numa state to g_state * add description for --numa * ggml : minor style changes * ggml : minor style + try fix sanitizer build * llama : allow to initialize backend with NUMA support * llama : avoid ggml include in llama-util.h * ggml : style / formatting * ggml : fix handling of ops with n_threads > n_tasks > 1 * server : utilize numa parameter --------- Co-authored-by: Georgi Gerganov <ggerganov@gmail.com>
Diffstat (limited to 'llama-util.h')
-rw-r--r--llama-util.h24
1 files changed, 19 insertions, 5 deletions
diff --git a/llama-util.h b/llama-util.h
index 4f8a429..042ebe4 100644
--- a/llama-util.h
+++ b/llama-util.h
@@ -172,12 +172,14 @@ struct llama_mmap {
#ifdef _POSIX_MAPPED_FILES
static constexpr bool SUPPORTED = true;
- llama_mmap(struct llama_file * file, size_t prefetch = (size_t) -1 /* -1 = max value */) {
+ llama_mmap(struct llama_file * file, size_t prefetch = (size_t) -1 /* -1 = max value */, bool numa = false) {
size = file->size;
int fd = fileno(file->fp);
int flags = MAP_SHARED;
+ // prefetch/readahead impairs performance on NUMA systems
+ if (numa) { prefetch = 0; }
#ifdef __linux__
- flags |= MAP_POPULATE;
+ if (prefetch) { flags |= MAP_POPULATE; }
#endif
addr = mmap(NULL, file->size, PROT_READ, flags, fd, 0);
if (addr == MAP_FAILED) {
@@ -191,6 +193,14 @@ struct llama_mmap {
strerror(errno));
}
}
+ if (numa) {
+ // advise the kernel not to use readahead
+ // (because the next page might not belong on the same node)
+ if (madvise(addr, file->size, MADV_RANDOM)) {
+ fprintf(stderr, "warning: madvise(.., MADV_RANDOM) failed: %s\n",
+ strerror(errno));
+ }
+ }
}
~llama_mmap() {
@@ -199,7 +209,9 @@ struct llama_mmap {
#elif defined(_WIN32)
static constexpr bool SUPPORTED = true;
- llama_mmap(struct llama_file * file, bool prefetch = true) {
+ llama_mmap(struct llama_file * file, bool prefetch = true, bool numa = false) {
+ (void) numa;
+
size = file->size;
HANDLE hFile = (HANDLE) _get_osfhandle(_fileno(file->fp));
@@ -244,8 +256,10 @@ struct llama_mmap {
#else
static constexpr bool SUPPORTED = false;
- llama_mmap(struct llama_file *, bool prefetch = true) {
- (void)prefetch;
+ llama_mmap(struct llama_file *, bool prefetch = true, bool numa = false) {
+ (void) prefetch;
+ (void) numa;
+
throw std::runtime_error(std::string("mmap not supported"));
}
#endif