diff options
author | Evan Jones <evan.q.jones@gmail.com> | 2023-05-02 22:26:13 -0400 |
---|---|---|
committer | GitHub <noreply@github.com> | 2023-05-02 22:26:13 -0400 |
commit | e216aa04633892b972d013719e38b59fd4917341 (patch) | |
tree | 53ffbc32f976fb799dd679204fa7e15f887f9e6b /llama.h | |
parent | 2485d7a4d39406cd0f468e35551b472cceb5bd61 (diff) |
llama : only copy used KV cache in get / set state (#1272)
* llama : only copy used KV cache in get / set state
* switch to ggml for copying k, v
* avoid designated initializers
Diffstat (limited to 'llama.h')
-rw-r--r-- | llama.h | 5 |
1 files changed, 3 insertions, 2 deletions
@@ -23,7 +23,7 @@ #define LLAMA_FILE_MAGIC 'ggjt' #define LLAMA_FILE_MAGIC_UNVERSIONED 'ggml' #define LLAMA_SESSION_MAGIC 'ggsn' -#define LLAMA_SESSION_VERSION 0 +#define LLAMA_SESSION_VERSION 1 #ifdef __cplusplus extern "C" { @@ -127,7 +127,8 @@ extern "C" { // Sets the current rng seed. LLAMA_API void llama_set_rng_seed(struct llama_context * ctx, int seed); - // Returns the size in bytes of the state (rng, logits, embedding and kv_cache) + // Returns the maximum size in bytes of the state (rng, logits, embedding + // and kv_cache) - will often be smaller after compacting tokens LLAMA_API size_t llama_get_state_size(const struct llama_context * ctx); // Copies the state to the specified destination address. |