aboutsummaryrefslogtreecommitdiff
path: root/examples
diff options
context:
space:
mode:
Diffstat (limited to 'examples')
-rw-r--r--examples/common.cpp3
-rw-r--r--examples/common.h1
-rw-r--r--examples/main/main.cpp4
3 files changed, 6 insertions, 2 deletions
diff --git a/examples/common.cpp b/examples/common.cpp
index c373462..f5d886a 100644
--- a/examples/common.cpp
+++ b/examples/common.cpp
@@ -132,6 +132,8 @@ bool gpt_params_parse(int argc, char ** argv, gpt_params & params) {
params.path_prompt_cache = argv[i];
} else if (arg == "--prompt-cache-all") {
params.prompt_cache_all = true;
+ } else if (arg == "--prompt-cache-ro") {
+ params.prompt_cache_ro = true;
} else if (arg == "-f" || arg == "--file") {
if (++i >= argc) {
invalid_param = true;
@@ -432,6 +434,7 @@ void gpt_print_usage(int /*argc*/, char ** argv, const gpt_params & params) {
fprintf(stderr, " --prompt-cache FNAME file to cache prompt state for faster startup (default: none)\n");
fprintf(stderr, " --prompt-cache-all if specified, saves user input and generations to cache as well.\n");
fprintf(stderr, " not supported with --interactive or other interactive options\n");
+ fprintf(stderr, " --prompt-cache-ro if specified, uses the prompt cache but does not update it.\n");
fprintf(stderr, " --random-prompt start with a randomized prompt.\n");
fprintf(stderr, " --in-prefix STRING string to prefix user inputs with (default: empty)\n");
fprintf(stderr, " --in-suffix STRING string to suffix after user inputs with (default: empty)\n");
diff --git a/examples/common.h b/examples/common.h
index 12b4973..826e2ae 100644
--- a/examples/common.h
+++ b/examples/common.h
@@ -62,6 +62,7 @@ struct gpt_params {
bool use_color = false; // use color to distinguish generations and inputs
bool interactive = false; // interactive mode
bool prompt_cache_all = false; // save user input and generations to prompt cache
+ bool prompt_cache_ro = false; // open the prompt cache read-only and do not update it
bool embedding = false; // get only sentence embedding
bool interactive_first = false; // wait for user input immediately
diff --git a/examples/main/main.cpp b/examples/main/main.cpp
index b4d1293..de63faa 100644
--- a/examples/main/main.cpp
+++ b/examples/main/main.cpp
@@ -417,7 +417,7 @@ int main(int argc, char ** argv) {
const bool penalize_nl = params.penalize_nl;
// optionally save the session on first sample (for faster prompt loading next time)
- if (!path_session.empty() && need_to_save_session) {
+ if (!path_session.empty() && need_to_save_session && !params.prompt_cache_ro) {
need_to_save_session = false;
llama_save_session_file(ctx, path_session.c_str(), session_tokens.data(), session_tokens.size());
}
@@ -630,7 +630,7 @@ int main(int argc, char ** argv) {
}
}
- if (!path_session.empty() && params.prompt_cache_all) {
+ if (!path_session.empty() && params.prompt_cache_all && !params.prompt_cache_ro) {
fprintf(stderr, "\n%s: saving final output to session file '%s'\n", __func__, path_session.c_str());
llama_save_session_file(ctx, path_session.c_str(), session_tokens.data(), session_tokens.size());
}