diff options
author | Bach Le <bach@bullno1.com> | 2023-07-12 00:18:43 +0800 |
---|---|---|
committer | GitHub <noreply@github.com> | 2023-07-11 19:18:43 +0300 |
commit | c9c74b4e3f9dcfab8b0032749ff8a579ab4e4d8d (patch) | |
tree | 651d6915218efa83cad8745310f7d1114ca21e2a /examples/common.cpp | |
parent | 3ec7e596b2ba3f43c22f441254ca2bcfa91102ba (diff) |
llama : add classifier-free guidance (#2135)
* Initial implementation
* Remove debug print
* Restore signature of llama_init_from_gpt_params
* Free guidance context
* Make freeing of guidance_ctx conditional
* Make Classifier-Free Guidance a sampling function
* Correct typo. CFG already means context-free grammar.
* Record sampling time in llama_sample_classifier_free_guidance
* Shift all values by the max value before applying logsoftmax
* Fix styling based on review
Diffstat (limited to 'examples/common.cpp')
-rw-r--r-- | examples/common.cpp | 30 |
1 files changed, 29 insertions, 1 deletions
diff --git a/examples/common.cpp b/examples/common.cpp index fad1688..fd551c9 100644 --- a/examples/common.cpp +++ b/examples/common.cpp @@ -236,6 +236,24 @@ bool gpt_params_parse(int argc, char ** argv, gpt_params & params) { break; } params.mirostat_tau = std::stof(argv[i]); + } else if (arg == "--cfg-negative-prompt") { + if (++i >= argc) { + invalid_param = true; + break; + } + params.cfg_negative_prompt = argv[i]; + } else if (arg == "--cfg-scale") { + if (++i >= argc) { + invalid_param = true; + break; + } + params.cfg_scale = std::stof(argv[i]); + } else if (arg == "--cfg-smooth-factor") { + if (++i >= argc) { + invalid_param = true; + break; + } + params.cfg_smooth_factor = std::stof(argv[i]); } else if (arg == "-b" || arg == "--batch-size") { if (++i >= argc) { invalid_param = true; @@ -469,6 +487,10 @@ void gpt_print_usage(int /*argc*/, char ** argv, const gpt_params & params) { fprintf(stderr, " modifies the likelihood of token appearing in the completion,\n"); fprintf(stderr, " i.e. `--logit-bias 15043+1` to increase likelihood of token ' Hello',\n"); fprintf(stderr, " or `--logit-bias 15043-1` to decrease likelihood of token ' Hello'\n"); + fprintf(stderr, " --cfg-negative-prompt PROMPT \n"); + fprintf(stderr, " negative prompt to use for guidance. (default: empty)\n"); + fprintf(stderr, " --cfg-scale N strength of guidance (default: %f, 1.0 = disable)\n", params.cfg_scale); + fprintf(stderr, " --cfg-smooth-factor N smooth factor between old and new logits (default: %f, 1.0 = no smoothing)\n", params.cfg_smooth_factor); fprintf(stderr, " -c N, --ctx-size N size of the prompt context (default: %d)\n", params.n_ctx); fprintf(stderr, " --ignore-eos ignore end of stream token and continue generating (implies --logit-bias 2-inf)\n"); fprintf(stderr, " --no-penalize-nl do not penalize newline token\n"); @@ -535,7 +557,7 @@ std::vector<llama_token> llama_tokenize(struct llama_context * ctx, const std::s return res; } -std::tuple<struct llama_model *, struct llama_context *> llama_init_from_gpt_params(const gpt_params & params) { +struct llama_context_params llama_context_params_from_gpt_params(const gpt_params & params) { auto lparams = llama_context_default_params(); lparams.n_ctx = params.n_ctx; @@ -551,6 +573,12 @@ std::tuple<struct llama_model *, struct llama_context *> llama_init_from_gpt_par lparams.logits_all = params.perplexity; lparams.embedding = params.embedding; + return lparams; +} + +std::tuple<struct llama_model *, struct llama_context *> llama_init_from_gpt_params(const gpt_params & params) { + auto lparams = llama_context_params_from_gpt_params(params); + llama_model * model = llama_load_model_from_file(params.model.c_str(), lparams); if (model == NULL) { fprintf(stderr, "%s: error: failed to load model '%s'\n", __func__, params.model.c_str()); |