diff options
author | Tobias Lütke <tobi@shopify.com> | 2023-07-05 16:51:13 -0400 |
---|---|---|
committer | GitHub <noreply@github.com> | 2023-07-05 16:51:13 -0400 |
commit | 31cfbb1013a482e89c72146e2063ac4362becae7 (patch) | |
tree | b6479860979f90d185a132efe883ae7fb62b07f5 /llama.h | |
parent | 983b555e9ddb36703cee4d22642afe958de093b7 (diff) |
Expose generation timings from server & update completions.js (#2116)
* use javascript generators as much cleaner API
Also add ways to access completion as promise and EventSource
* export llama_timings as struct and expose them in server
* update readme, update baked includes
* llama : uniform variable names + struct init
---------
Co-authored-by: Georgi Gerganov <ggerganov@gmail.com>
Diffstat (limited to 'llama.h')
-rw-r--r-- | llama.h | 15 |
1 files changed, 15 insertions, 0 deletions
@@ -134,6 +134,20 @@ extern "C" { bool quantize_output_tensor; // quantize output.weight } llama_model_quantize_params; + // performance timing information + struct llama_timings { + double t_start_ms; + double t_end_ms; + double t_load_ms; + double t_sample_ms; + double t_p_eval_ms; + double t_eval_ms; + + int32_t n_sample; + int32_t n_p_eval; + int32_t n_eval; + }; + LLAMA_API struct llama_context_params llama_context_default_params(); LLAMA_API struct llama_model_quantize_params llama_model_quantize_default_params(); @@ -331,6 +345,7 @@ extern "C" { LLAMA_API llama_token llama_sample_token(struct llama_context * ctx, llama_token_data_array * candidates); // Performance information + LLAMA_API struct llama_timings llama_get_timings(struct llama_context * ctx); LLAMA_API void llama_print_timings(struct llama_context * ctx); LLAMA_API void llama_reset_timings(struct llama_context * ctx); |