aboutsummaryrefslogtreecommitdiff
path: root/examples
diff options
context:
space:
mode:
authorStephan Walter <stephan@walter.name>2023-05-17 22:12:01 +0000
committerGitHub <noreply@github.com>2023-05-17 22:12:01 +0000
commitdc271c52ed65e7c8dfcbaaf84dabb1f788e4f3d0 (patch)
tree7057c49800c88566dd2bd7d4df051df6c53f7b89 /examples
parentc238b5873a1ea496db03ffcfe124c9d0d83afbc6 (diff)
Remove unused n_parts parameter (#1509)
Diffstat (limited to 'examples')
-rw-r--r--examples/common.cpp8
-rw-r--r--examples/common.h1
-rw-r--r--examples/quantize-stats/quantize-stats.cpp1
-rw-r--r--examples/save-load-state/save-load-state.cpp1
4 files changed, 0 insertions, 11 deletions
diff --git a/examples/common.cpp b/examples/common.cpp
index 259880a..a6abc49 100644
--- a/examples/common.cpp
+++ b/examples/common.cpp
@@ -321,12 +321,6 @@ bool gpt_params_parse(int argc, char ** argv, gpt_params & params) {
invalid_param = true;
break;
}
- } else if (arg == "--n-parts") {
- if (++i >= argc) {
- invalid_param = true;
- break;
- }
- params.n_parts = std::stoi(argv[i]);
} else if (arg == "-h" || arg == "--help") {
gpt_print_usage(argc, argv, default_params);
exit(0);
@@ -418,7 +412,6 @@ void gpt_print_usage(int /*argc*/, char ** argv, const gpt_params & params) {
fprintf(stderr, " --no-penalize-nl do not penalize newline token\n");
fprintf(stderr, " --memory-f32 use f32 instead of f16 for memory key+value\n");
fprintf(stderr, " --temp N temperature (default: %.1f)\n", (double)params.temp);
- fprintf(stderr, " --n-parts N number of model parts (default: -1 = determine from dimensions)\n");
fprintf(stderr, " -b N, --batch-size N batch size for prompt processing (default: %d)\n", params.n_batch);
fprintf(stderr, " --perplexity compute perplexity over the prompt\n");
fprintf(stderr, " --keep number of tokens to keep from the initial prompt (default: %d, -1 = all)\n", params.n_keep);
@@ -473,7 +466,6 @@ struct llama_context * llama_init_from_gpt_params(const gpt_params & params) {
auto lparams = llama_context_default_params();
lparams.n_ctx = params.n_ctx;
- lparams.n_parts = params.n_parts;
lparams.n_gpu_layers = params.n_gpu_layers;
lparams.seed = params.seed;
lparams.f16_kv = params.memory_f16;
diff --git a/examples/common.h b/examples/common.h
index f4e07a2..2ad20ba 100644
--- a/examples/common.h
+++ b/examples/common.h
@@ -24,7 +24,6 @@ struct gpt_params {
int32_t seed = -1; // RNG seed
int32_t n_threads = get_num_physical_cores();
int32_t n_predict = -1; // new tokens to predict
- int32_t n_parts = -1; // amount of model parts (-1 = determine from model dimensions)
int32_t n_ctx = 512; // context size
int32_t n_batch = 512; // batch size for prompt processing (must be >=32 to use BLAS)
int32_t n_keep = 0; // number of tokens to keep from initial prompt
diff --git a/examples/quantize-stats/quantize-stats.cpp b/examples/quantize-stats/quantize-stats.cpp
index 9a2aa7c..085fdde 100644
--- a/examples/quantize-stats/quantize-stats.cpp
+++ b/examples/quantize-stats/quantize-stats.cpp
@@ -321,7 +321,6 @@ int main(int argc, char ** argv) {
auto lparams = llama_context_default_params();
lparams.n_ctx = 256;
- lparams.n_parts = 1;
lparams.seed = 1;
lparams.f16_kv = false;
lparams.use_mlock = false;
diff --git a/examples/save-load-state/save-load-state.cpp b/examples/save-load-state/save-load-state.cpp
index 3559695..91f04b6 100644
--- a/examples/save-load-state/save-load-state.cpp
+++ b/examples/save-load-state/save-load-state.cpp
@@ -26,7 +26,6 @@ int main(int argc, char ** argv) {
auto lparams = llama_context_default_params();
lparams.n_ctx = params.n_ctx;
- lparams.n_parts = params.n_parts;
lparams.seed = params.seed;
lparams.f16_kv = params.memory_f16;
lparams.use_mmap = params.use_mmap;