Expose generation timings from server & update completions.js (#2116)

* use javascript generators as much cleaner API Also add ways to access completion as promise and EventSource * export llama_timings as struct and expose them in server * update readme, update baked includes * llama : uniform variable names + struct init --------- Co-authored-by: Georgi Gerganov <ggerganov@gmail.com>
author: Tobias Lütke <tobi@shopify.com> 2023-07-05 16:51:13 -0400
committer: GitHub <noreply@github.com> 2023-07-05 16:51:13 -0400
commit: 31cfbb1013a482e89c72146e2063ac4362becae7 (patch)
tree: b6479860979f90d185a132efe883ae7fb62b07f5 /llama.h
parent: 983b555e9ddb36703cee4d22642afe958de093b7 (diff)
1 files changed, 15 insertions, 0 deletions
diff --git a/llama.h b/llama.h
index 5bb1964..c1e7dab 100644
--- a/llama.h
+++ b/llama.h
@@ -134,6 +134,20 @@ extern "C" {
         bool quantize_output_tensor; // quantize output.weight
     } llama_model_quantize_params;
 
+    // performance timing information
+    struct llama_timings {
+        double t_start_ms;
+        double t_end_ms;
+        double t_load_ms;
+        double t_sample_ms;
+        double t_p_eval_ms;
+        double t_eval_ms;
+
+        int32_t n_sample;
+        int32_t n_p_eval;
+        int32_t n_eval;
+    };
+
     LLAMA_API struct llama_context_params llama_context_default_params();
     LLAMA_API struct llama_model_quantize_params llama_model_quantize_default_params();
 
@@ -331,6 +345,7 @@ extern "C" {
     LLAMA_API llama_token llama_sample_token(struct llama_context * ctx, llama_token_data_array * candidates);
 
     // Performance information
+    LLAMA_API struct llama_timings llama_get_timings(struct llama_context * ctx);
     LLAMA_API void llama_print_timings(struct llama_context * ctx);
     LLAMA_API void llama_reset_timings(struct llama_context * ctx);
author	Tobias Lütke <tobi@shopify.com>	2023-07-05 16:51:13 -0400
committer	GitHub <noreply@github.com>	2023-07-05 16:51:13 -0400
commit	31cfbb1013a482e89c72146e2063ac4362becae7 (patch)
tree	b6479860979f90d185a132efe883ae7fb62b07f5 /llama.h
parent	983b555e9ddb36703cee4d22642afe958de093b7 (diff)