diff options
author | Georgi Gerganov <ggerganov@gmail.com> | 2023-03-25 17:16:50 +0200 |
---|---|---|
committer | Georgi Gerganov <ggerganov@gmail.com> | 2023-03-25 17:17:16 +0200 |
commit | 502a400192013d3e95ed87b777e8fa3bec45713c (patch) | |
tree | 137ea5f8ca5ab0871ada8e82def3da0ee991c2a1 | |
parent | 09aecbf6283bbce9449e2d96000073145aaaf5fc (diff) |
Disable prompt verbosity by default and add option to enable (#480)
-rw-r--r-- | main.cpp | 15 | ||||
-rw-r--r-- | utils.cpp | 3 | ||||
-rw-r--r-- | utils.h | 1 |
3 files changed, 13 insertions, 6 deletions
@@ -275,13 +275,16 @@ int main(int argc, char ** argv) { // determine newline token auto llama_token_newline = ::llama_tokenize(ctx, "\n", false); - fprintf(stderr, "\n"); - fprintf(stderr, "%s: prompt: '%s'\n", __func__, params.prompt.c_str()); - fprintf(stderr, "%s: number of tokens in prompt = %zu\n", __func__, embd_inp.size()); - for (int i = 0; i < (int) embd_inp.size(); i++) { - fprintf(stderr, "%6d -> '%s'\n", embd_inp[i], llama_token_to_str(ctx, embd_inp[i])); + if (params.verbose_prompt) { + fprintf(stderr, "\n"); + fprintf(stderr, "%s: prompt: '%s'\n", __func__, params.prompt.c_str()); + fprintf(stderr, "%s: number of tokens in prompt = %zu\n", __func__, embd_inp.size()); + for (int i = 0; i < (int) embd_inp.size(); i++) { + fprintf(stderr, "%6d -> '%s'\n", embd_inp[i], llama_token_to_str(ctx, embd_inp[i])); + } + fprintf(stderr, "\n"); } - fprintf(stderr, "\n"); + if (params.interactive) { #if defined (__unix__) || (defined (__APPLE__) && defined (__MACH__)) struct sigaction sigint_action; @@ -134,6 +134,8 @@ bool gpt_params_parse(int argc, char ** argv, gpt_params & params) { params.use_mlock = true; } else if (arg == "--mtest") { params.mem_test = true; + } else if (arg == "--verbose_prompt") { + params.verbose_prompt = true; } else if (arg == "-r" || arg == "--reverse-prompt") { if (++i >= argc) { invalid_param = true; @@ -212,6 +214,7 @@ void gpt_print_usage(int /*argc*/, char ** argv, const gpt_params & params) { fprintf(stderr, " --mlock force system to keep model in RAM rather than swapping or compressing\n"); } fprintf(stderr, " --mtest compute maximum memory usage\n"); + fprintf(stderr, " --verbose-prompt print prompt before generation\n"); fprintf(stderr, " -m FNAME, --model FNAME\n"); fprintf(stderr, " model path (default: %s)\n", params.model.c_str()); fprintf(stderr, "\n"); @@ -48,6 +48,7 @@ struct gpt_params { bool perplexity = false; // compute perplexity over the prompt bool use_mlock = false; // use mlock to keep model in memory bool mem_test = false; // compute maximum memory usage + bool verbose_prompt = false; // print prompt tokens before generation }; bool gpt_params_parse(int argc, char ** argv, gpt_params & params); |