aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorGeorgi Gerganov <ggerganov@gmail.com>2023-06-17 19:30:22 +0300
committerGeorgi Gerganov <ggerganov@gmail.com>2023-06-17 19:31:20 +0300
commit051e1b0e6a6e3aee7d989b47760980e6fda5861c (patch)
treeb2b2cdb4d59003606c4dce3751273a3911d3b7e0
parent86c7571864ff331f8cdb9e092f3abeb123729a56 (diff)
llama : fix kv_cache `n` init (close #1903)
-rw-r--r--.gitignore1
-rw-r--r--examples/CMakeLists.txt1
-rw-r--r--llama.cpp2
3 files changed, 4 insertions, 0 deletions
diff --git a/.gitignore b/.gitignore
index e68fd72..e7bfd52 100644
--- a/.gitignore
+++ b/.gitignore
@@ -34,6 +34,7 @@ models/*
/perplexity
/embedding
/train-text-from-scratch
+/simple
/benchmark-matmult
/vdot
/server
diff --git a/examples/CMakeLists.txt b/examples/CMakeLists.txt
index de005f3..cf9c4a2 100644
--- a/examples/CMakeLists.txt
+++ b/examples/CMakeLists.txt
@@ -38,6 +38,7 @@ else()
add_subdirectory(benchmark)
add_subdirectory(baby-llama)
add_subdirectory(train-text-from-scratch)
+ add_subdirectory(simple)
if (LLAMA_METAL)
add_subdirectory(metal)
endif()
diff --git a/llama.cpp b/llama.cpp
index a50846f..a2916b3 100644
--- a/llama.cpp
+++ b/llama.cpp
@@ -886,6 +886,7 @@ static bool kv_cache_init(
const int64_t n_elements = n_embd*n_mem;
cache.buf.resize(2u*n_elements*ggml_type_size(wtype) + 2u*MB);
+ cache.n = 0;
struct ggml_init_params params;
params.mem_size = cache.buf.size;
@@ -904,6 +905,7 @@ static bool kv_cache_init(
ggml_set_name(cache.k, "cache_k");
ggml_set_name(cache.v, "cache_v");
+ (void) n_gpu_layers;
#ifdef GGML_USE_CUBLAS
if (n_gpu_layers > n_layer + 1) {
ggml_cuda_assign_buffers_no_scratch(cache.v);