diff options
author | Stephan Walter <stephan@walter.name> | 2023-05-17 22:12:01 +0000 |
---|---|---|
committer | GitHub <noreply@github.com> | 2023-05-17 22:12:01 +0000 |
commit | dc271c52ed65e7c8dfcbaaf84dabb1f788e4f3d0 (patch) | |
tree | 7057c49800c88566dd2bd7d4df051df6c53f7b89 /examples | |
parent | c238b5873a1ea496db03ffcfe124c9d0d83afbc6 (diff) |
Remove unused n_parts parameter (#1509)
Diffstat (limited to 'examples')
-rw-r--r-- | examples/common.cpp | 8 | ||||
-rw-r--r-- | examples/common.h | 1 | ||||
-rw-r--r-- | examples/quantize-stats/quantize-stats.cpp | 1 | ||||
-rw-r--r-- | examples/save-load-state/save-load-state.cpp | 1 |
4 files changed, 0 insertions, 11 deletions
diff --git a/examples/common.cpp b/examples/common.cpp index 259880a..a6abc49 100644 --- a/examples/common.cpp +++ b/examples/common.cpp @@ -321,12 +321,6 @@ bool gpt_params_parse(int argc, char ** argv, gpt_params & params) { invalid_param = true; break; } - } else if (arg == "--n-parts") { - if (++i >= argc) { - invalid_param = true; - break; - } - params.n_parts = std::stoi(argv[i]); } else if (arg == "-h" || arg == "--help") { gpt_print_usage(argc, argv, default_params); exit(0); @@ -418,7 +412,6 @@ void gpt_print_usage(int /*argc*/, char ** argv, const gpt_params & params) { fprintf(stderr, " --no-penalize-nl do not penalize newline token\n"); fprintf(stderr, " --memory-f32 use f32 instead of f16 for memory key+value\n"); fprintf(stderr, " --temp N temperature (default: %.1f)\n", (double)params.temp); - fprintf(stderr, " --n-parts N number of model parts (default: -1 = determine from dimensions)\n"); fprintf(stderr, " -b N, --batch-size N batch size for prompt processing (default: %d)\n", params.n_batch); fprintf(stderr, " --perplexity compute perplexity over the prompt\n"); fprintf(stderr, " --keep number of tokens to keep from the initial prompt (default: %d, -1 = all)\n", params.n_keep); @@ -473,7 +466,6 @@ struct llama_context * llama_init_from_gpt_params(const gpt_params & params) { auto lparams = llama_context_default_params(); lparams.n_ctx = params.n_ctx; - lparams.n_parts = params.n_parts; lparams.n_gpu_layers = params.n_gpu_layers; lparams.seed = params.seed; lparams.f16_kv = params.memory_f16; diff --git a/examples/common.h b/examples/common.h index f4e07a2..2ad20ba 100644 --- a/examples/common.h +++ b/examples/common.h @@ -24,7 +24,6 @@ struct gpt_params { int32_t seed = -1; // RNG seed int32_t n_threads = get_num_physical_cores(); int32_t n_predict = -1; // new tokens to predict - int32_t n_parts = -1; // amount of model parts (-1 = determine from model dimensions) int32_t n_ctx = 512; // context size int32_t n_batch = 512; // batch size for prompt processing (must be >=32 to use BLAS) int32_t n_keep = 0; // number of tokens to keep from initial prompt diff --git a/examples/quantize-stats/quantize-stats.cpp b/examples/quantize-stats/quantize-stats.cpp index 9a2aa7c..085fdde 100644 --- a/examples/quantize-stats/quantize-stats.cpp +++ b/examples/quantize-stats/quantize-stats.cpp @@ -321,7 +321,6 @@ int main(int argc, char ** argv) { auto lparams = llama_context_default_params(); lparams.n_ctx = 256; - lparams.n_parts = 1; lparams.seed = 1; lparams.f16_kv = false; lparams.use_mlock = false; diff --git a/examples/save-load-state/save-load-state.cpp b/examples/save-load-state/save-load-state.cpp index 3559695..91f04b6 100644 --- a/examples/save-load-state/save-load-state.cpp +++ b/examples/save-load-state/save-load-state.cpp @@ -26,7 +26,6 @@ int main(int argc, char ** argv) { auto lparams = llama_context_default_params(); lparams.n_ctx = params.n_ctx; - lparams.n_parts = params.n_parts; lparams.seed = params.seed; lparams.f16_kv = params.memory_f16; lparams.use_mmap = params.use_mmap; |