diff options
author | Willy Tarreau <w@1wt.eu> | 2023-06-07 04:10:17 +0200 |
---|---|---|
committer | GitHub <noreply@github.com> | 2023-06-06 22:10:17 -0400 |
commit | 35a84916fb029905c44746127026079268216e7a (patch) | |
tree | e7baab7b8c74528460690694902eb7d79bae8c24 /examples | |
parent | 2d7bf110edd8c49209401a16132052cba706ffd0 (diff) |
main: add the possibility to open the prompt cache read-only (#1640)
The prompt cache constitutes a nice speed up when using the same prompt
prefix across multiple evaluations, but when using it, it will also be
updated, which is not always desirable. One use case is to have a large
prompt containing some context and usage rules, and a second part
containing variable data of the problem being studied. In this case it's
desirable to be able to save the first part once, and to always reuse it
as-is without updating it with the second part.
The new argument --prompt-cache-ro enables this read-only mode on the
prompt cache. The prompt's contents that match the cache are loaded
from the cache but the rest is not modified. This allowed to reduce a
total analysis time from 112s to 49.7s here, without having to backup
and restore a copy of the prompt, which takes significant time at 500
MB.
Signed-off-by: Willy Tarreau <w@1wt.eu>
Diffstat (limited to 'examples')
-rw-r--r-- | examples/common.cpp | 3 | ||||
-rw-r--r-- | examples/common.h | 1 | ||||
-rw-r--r-- | examples/main/main.cpp | 4 |
3 files changed, 6 insertions, 2 deletions
diff --git a/examples/common.cpp b/examples/common.cpp index c373462..f5d886a 100644 --- a/examples/common.cpp +++ b/examples/common.cpp @@ -132,6 +132,8 @@ bool gpt_params_parse(int argc, char ** argv, gpt_params & params) { params.path_prompt_cache = argv[i]; } else if (arg == "--prompt-cache-all") { params.prompt_cache_all = true; + } else if (arg == "--prompt-cache-ro") { + params.prompt_cache_ro = true; } else if (arg == "-f" || arg == "--file") { if (++i >= argc) { invalid_param = true; @@ -432,6 +434,7 @@ void gpt_print_usage(int /*argc*/, char ** argv, const gpt_params & params) { fprintf(stderr, " --prompt-cache FNAME file to cache prompt state for faster startup (default: none)\n"); fprintf(stderr, " --prompt-cache-all if specified, saves user input and generations to cache as well.\n"); fprintf(stderr, " not supported with --interactive or other interactive options\n"); + fprintf(stderr, " --prompt-cache-ro if specified, uses the prompt cache but does not update it.\n"); fprintf(stderr, " --random-prompt start with a randomized prompt.\n"); fprintf(stderr, " --in-prefix STRING string to prefix user inputs with (default: empty)\n"); fprintf(stderr, " --in-suffix STRING string to suffix after user inputs with (default: empty)\n"); diff --git a/examples/common.h b/examples/common.h index 12b4973..826e2ae 100644 --- a/examples/common.h +++ b/examples/common.h @@ -62,6 +62,7 @@ struct gpt_params { bool use_color = false; // use color to distinguish generations and inputs bool interactive = false; // interactive mode bool prompt_cache_all = false; // save user input and generations to prompt cache + bool prompt_cache_ro = false; // open the prompt cache read-only and do not update it bool embedding = false; // get only sentence embedding bool interactive_first = false; // wait for user input immediately diff --git a/examples/main/main.cpp b/examples/main/main.cpp index b4d1293..de63faa 100644 --- a/examples/main/main.cpp +++ b/examples/main/main.cpp @@ -417,7 +417,7 @@ int main(int argc, char ** argv) { const bool penalize_nl = params.penalize_nl; // optionally save the session on first sample (for faster prompt loading next time) - if (!path_session.empty() && need_to_save_session) { + if (!path_session.empty() && need_to_save_session && !params.prompt_cache_ro) { need_to_save_session = false; llama_save_session_file(ctx, path_session.c_str(), session_tokens.data(), session_tokens.size()); } @@ -630,7 +630,7 @@ int main(int argc, char ** argv) { } } - if (!path_session.empty() && params.prompt_cache_all) { + if (!path_session.empty() && params.prompt_cache_all && !params.prompt_cache_ro) { fprintf(stderr, "\n%s: saving final output to session file '%s'\n", __func__, path_session.c_str()); llama_save_session_file(ctx, path_session.c_str(), session_tokens.data(), session_tokens.size()); } |