llama : do not allocate KV cache for "vocab_only == true" (#682)

Fixes sanitizer CI
author: Stephan Walter <stephan@walter.name> 2023-04-02 07:18:53 +0000
committer: GitHub <noreply@github.com> 2023-04-02 10:18:53 +0300
commit: 81040f10aae3160317c5787c9c59acb219927826 (patch)
tree: 6cd8c03882662779c13d7be51e5576b5133a4f7c /llama.cpp
parent: c4f89d8d73aab4318a6c61e3835135adfcf55407 (diff)
1 files changed, 1 insertions, 1 deletions
diff --git a/llama.cpp b/llama.cpp
index bed2420..1b3157c 100644
--- a/llama.cpp
+++ b/llama.cpp
@@ -1608,7 +1608,7 @@ struct llama_context * llama_init_from_file(
     }
 
     // reserve memory for context buffers
-    {
+    if (!params.vocab_only) {
         if (!kv_cache_init(ctx->model.hparams, ctx->model.kv_self, memory_type, ctx->model.hparams.n_ctx)) {
             fprintf(stderr, "%s: kv_cache_init() failed for self-attention cache\n", __func__);
             llama_free(ctx);
author	Stephan Walter <stephan@walter.name>	2023-04-02 07:18:53 +0000
committer	GitHub <noreply@github.com>	2023-04-02 10:18:53 +0300
commit	81040f10aae3160317c5787c9c59acb219927826 (patch)
tree	6cd8c03882662779c13d7be51e5576b5133a4f7c /llama.cpp
parent	c4f89d8d73aab4318a6c61e3835135adfcf55407 (diff)