From c9c74b4e3f9dcfab8b0032749ff8a579ab4e4d8d Mon Sep 17 00:00:00 2001
From: Bach Le <bach@bullno1.com>
Date: Wed, 12 Jul 2023 00:18:43 +0800
Subject: llama : add classifier-free guidance (#2135)

* Initial implementation

* Remove debug print

* Restore signature of llama_init_from_gpt_params

* Free guidance context

* Make freeing of guidance_ctx conditional

* Make Classifier-Free Guidance a sampling function

* Correct typo. CFG already means context-free grammar.

* Record sampling time in llama_sample_classifier_free_guidance

* Shift all values by the max value before applying logsoftmax

* Fix styling based on review
---
 examples/common.h | 7 +++++++
 1 file changed, 7 insertions(+)

(limited to 'examples/common.h')
diff --git a/examples/common.h b/examples/common.h
index 96f2228..6315df9 100644
--- a/examples/common.h
+++ b/examples/common.h
@@ -48,6 +48,12 @@ struct gpt_params {
     float   mirostat_tau      = 5.00f; // target entropy
     float   mirostat_eta      = 0.10f; // learning rate
 
+    // Classifier-Free Guidance
+    // https://arxiv.org/abs/2306.17806
+    std::string cfg_negative_prompt;       // string to help guidance
+    float       cfg_scale         = 1.f;   // How strong is guidance
+    float       cfg_smooth_factor = 1.f;   // Smooth factor between old and new logits
+
     std::string model             = "models/7B/ggml-model.bin"; // model path
     std::string model_alias       = "unknown"; // model alias
     std::string prompt            = "";
@@ -99,6 +105,7 @@ std::vector<llama_token> llama_tokenize(struct llama_context * ctx, const std::s
 //
 
 std::tuple<struct llama_model *, struct llama_context *> llama_init_from_gpt_params(const gpt_params & params);
+struct llama_context_params llama_context_params_from_gpt_params(const gpt_params & params);
 
 //
 // Console utils
-- 
cgit v1.2.3