diff options
Diffstat (limited to 'examples/common.cpp')
-rw-r--r-- | examples/common.cpp | 9 |
1 files changed, 6 insertions, 3 deletions
diff --git a/examples/common.cpp b/examples/common.cpp index b27aa6c..f909eed 100644 --- a/examples/common.cpp +++ b/examples/common.cpp @@ -1,7 +1,5 @@ #include "common.h" -#include "ggml.h" - #include <cassert> #include <cstring> #include <fstream> @@ -161,6 +159,8 @@ bool gpt_params_parse(int argc, char ** argv, gpt_params & params) { params.use_color = true; } else if (arg == "--mlock") { params.use_mlock = true; + } else if (arg == "--no-mmap") { + params.use_mmap = false; } else if (arg == "--mtest") { params.mem_test = true; } else if (arg == "--verbose-prompt") { @@ -240,9 +240,12 @@ void gpt_print_usage(int /*argc*/, char ** argv, const gpt_params & params) { fprintf(stderr, " -b N, --batch_size N batch size for prompt processing (default: %d)\n", params.n_batch); fprintf(stderr, " --perplexity compute perplexity over the prompt\n"); fprintf(stderr, " --keep number of tokens to keep from the initial prompt (default: %d, -1 = all)\n", params.n_keep); - if (ggml_mlock_supported()) { + if (llama_mlock_supported()) { fprintf(stderr, " --mlock force system to keep model in RAM rather than swapping or compressing\n"); } + if (llama_mmap_supported()) { + fprintf(stderr, " --no-mmap do not memory-map model (slower load but may reduce pageouts if not using mlock)\n"); + } fprintf(stderr, " --mtest compute maximum memory usage\n"); fprintf(stderr, " --verbose-prompt print prompt before generation\n"); fprintf(stderr, " -m FNAME, --model FNAME\n"); |