aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorGeorgi Gerganov <ggerganov@gmail.com>2023-03-25 17:16:50 +0200
committerGeorgi Gerganov <ggerganov@gmail.com>2023-03-25 17:17:16 +0200
commit502a400192013d3e95ed87b777e8fa3bec45713c (patch)
tree137ea5f8ca5ab0871ada8e82def3da0ee991c2a1
parent09aecbf6283bbce9449e2d96000073145aaaf5fc (diff)
Disable prompt verbosity by default and add option to enable (#480)
-rw-r--r--main.cpp15
-rw-r--r--utils.cpp3
-rw-r--r--utils.h1
3 files changed, 13 insertions, 6 deletions
diff --git a/main.cpp b/main.cpp
index 32c3a68..77260bb 100644
--- a/main.cpp
+++ b/main.cpp
@@ -275,13 +275,16 @@ int main(int argc, char ** argv) {
// determine newline token
auto llama_token_newline = ::llama_tokenize(ctx, "\n", false);
- fprintf(stderr, "\n");
- fprintf(stderr, "%s: prompt: '%s'\n", __func__, params.prompt.c_str());
- fprintf(stderr, "%s: number of tokens in prompt = %zu\n", __func__, embd_inp.size());
- for (int i = 0; i < (int) embd_inp.size(); i++) {
- fprintf(stderr, "%6d -> '%s'\n", embd_inp[i], llama_token_to_str(ctx, embd_inp[i]));
+ if (params.verbose_prompt) {
+ fprintf(stderr, "\n");
+ fprintf(stderr, "%s: prompt: '%s'\n", __func__, params.prompt.c_str());
+ fprintf(stderr, "%s: number of tokens in prompt = %zu\n", __func__, embd_inp.size());
+ for (int i = 0; i < (int) embd_inp.size(); i++) {
+ fprintf(stderr, "%6d -> '%s'\n", embd_inp[i], llama_token_to_str(ctx, embd_inp[i]));
+ }
+ fprintf(stderr, "\n");
}
- fprintf(stderr, "\n");
+
if (params.interactive) {
#if defined (__unix__) || (defined (__APPLE__) && defined (__MACH__))
struct sigaction sigint_action;
diff --git a/utils.cpp b/utils.cpp
index 319924c..cea3096 100644
--- a/utils.cpp
+++ b/utils.cpp
@@ -134,6 +134,8 @@ bool gpt_params_parse(int argc, char ** argv, gpt_params & params) {
params.use_mlock = true;
} else if (arg == "--mtest") {
params.mem_test = true;
+ } else if (arg == "--verbose_prompt") {
+ params.verbose_prompt = true;
} else if (arg == "-r" || arg == "--reverse-prompt") {
if (++i >= argc) {
invalid_param = true;
@@ -212,6 +214,7 @@ void gpt_print_usage(int /*argc*/, char ** argv, const gpt_params & params) {
fprintf(stderr, " --mlock force system to keep model in RAM rather than swapping or compressing\n");
}
fprintf(stderr, " --mtest compute maximum memory usage\n");
+ fprintf(stderr, " --verbose-prompt print prompt before generation\n");
fprintf(stderr, " -m FNAME, --model FNAME\n");
fprintf(stderr, " model path (default: %s)\n", params.model.c_str());
fprintf(stderr, "\n");
diff --git a/utils.h b/utils.h
index 0690ef7..dede803 100644
--- a/utils.h
+++ b/utils.h
@@ -48,6 +48,7 @@ struct gpt_params {
bool perplexity = false; // compute perplexity over the prompt
bool use_mlock = false; // use mlock to keep model in memory
bool mem_test = false; // compute maximum memory usage
+ bool verbose_prompt = false; // print prompt tokens before generation
};
bool gpt_params_parse(int argc, char ** argv, gpt_params & params);