diff options
author | Georgi Gerganov <ggerganov@gmail.com> | 2023-04-24 07:40:02 +0300 |
---|---|---|
committer | GitHub <noreply@github.com> | 2023-04-24 07:40:02 +0300 |
commit | c4fe84fb0d28851a5c10e5a633f82ae2ba3b7fae (patch) | |
tree | 65b4a9d520b9ecc1b092363b198c0298b85086c9 /llama.h | |
parent | 1d78fecdab4087028a38517e86ed129f077174d8 (diff) |
llama : refactor get / set state + remove redundant kv cache API (#1143)
Diffstat (limited to 'llama.h')
-rw-r--r-- | llama.h | 14 |
1 files changed, 0 insertions, 14 deletions
@@ -112,23 +112,9 @@ extern "C" { const char * path_base_model, int n_threads); - // Returns the KV cache that will contain the context for the - // ongoing prediction with the model. - LLAMA_API const uint8_t * llama_get_kv_cache(struct llama_context * ctx); - - // Returns the size of the KV cache - LLAMA_API size_t llama_get_kv_cache_size(struct llama_context * ctx); - // Returns the number of tokens in the KV cache LLAMA_API int llama_get_kv_cache_token_count(struct llama_context * ctx); - // Sets the KV cache containing the current context for the model - LLAMA_API void llama_set_kv_cache( - struct llama_context * ctx, - const uint8_t * kv_cache, - size_t n_size, - int n_token_count); - // Returns the size in bytes of the state (rng, logits, embedding and kv_cache) LLAMA_API size_t llama_get_state_size(struct llama_context * ctx); |