aboutsummaryrefslogtreecommitdiff
path: root/llama.h
diff options
context:
space:
mode:
authorGeorgi Gerganov <ggerganov@gmail.com>2023-04-24 07:40:02 +0300
committerGitHub <noreply@github.com>2023-04-24 07:40:02 +0300
commitc4fe84fb0d28851a5c10e5a633f82ae2ba3b7fae (patch)
tree65b4a9d520b9ecc1b092363b198c0298b85086c9 /llama.h
parent1d78fecdab4087028a38517e86ed129f077174d8 (diff)
llama : refactor get / set state + remove redundant kv cache API (#1143)
Diffstat (limited to 'llama.h')
-rw-r--r--llama.h14
1 files changed, 0 insertions, 14 deletions
diff --git a/llama.h b/llama.h
index f68a0cb..e9e3abe 100644
--- a/llama.h
+++ b/llama.h
@@ -112,23 +112,9 @@ extern "C" {
const char * path_base_model,
int n_threads);
- // Returns the KV cache that will contain the context for the
- // ongoing prediction with the model.
- LLAMA_API const uint8_t * llama_get_kv_cache(struct llama_context * ctx);
-
- // Returns the size of the KV cache
- LLAMA_API size_t llama_get_kv_cache_size(struct llama_context * ctx);
-
// Returns the number of tokens in the KV cache
LLAMA_API int llama_get_kv_cache_token_count(struct llama_context * ctx);
- // Sets the KV cache containing the current context for the model
- LLAMA_API void llama_set_kv_cache(
- struct llama_context * ctx,
- const uint8_t * kv_cache,
- size_t n_size,
- int n_token_count);
-
// Returns the size in bytes of the state (rng, logits, embedding and kv_cache)
LLAMA_API size_t llama_get_state_size(struct llama_context * ctx);