aboutsummaryrefslogtreecommitdiff
path: root/examples/main
diff options
context:
space:
mode:
authorGeorgi Gerganov <ggerganov@gmail.com>2023-06-13 20:20:07 +0300
committerGitHub <noreply@github.com>2023-06-13 20:20:07 +0300
commit2347e45e7bdb09c9a7d74b2c0bc86c2b65f0c343 (patch)
treef3fb417af8c27a472e82a171d9dfb47ff8e2034e /examples/main
parent74d4cfa3438cb58bd177eed30014e6588694aaa8 (diff)
llama : do a warm-up eval at start for better timings (#1824)
Diffstat (limited to 'examples/main')
-rw-r--r--examples/main/main.cpp7
1 files changed, 7 insertions, 0 deletions
diff --git a/examples/main/main.cpp b/examples/main/main.cpp
index 66d5631..efa913e 100644
--- a/examples/main/main.cpp
+++ b/examples/main/main.cpp
@@ -331,6 +331,13 @@ int main(int argc, char ** argv) {
std::vector<llama_token> embd;
+ // do one empty run to warm up the model
+ {
+ const std::vector<llama_token> tmp = { llama_token_bos(), };
+ llama_eval(ctx, tmp.data(), tmp.size(), 0, params.n_threads);
+ llama_reset_timings(ctx);
+ }
+
while ((n_remain != 0 && !is_antiprompt) || params.interactive) {
// predict
if (embd.size() > 0) {