diff options
author | Georgi Gerganov <ggerganov@gmail.com> | 2023-06-06 22:54:39 +0300 |
---|---|---|
committer | Georgi Gerganov <ggerganov@gmail.com> | 2023-06-06 22:54:39 +0300 |
commit | 2d7bf110edd8c49209401a16132052cba706ffd0 (patch) | |
tree | a27683df26cb2126bcb9cdf090e305a3e36d2a07 | |
parent | 2a4e41a086ce80da68c402457c75c77e52dcc698 (diff) |
llama : fix vram_scratch var
-rw-r--r-- | llama.cpp | 4 |
1 files changed, 3 insertions, 1 deletions
@@ -1076,6 +1076,7 @@ static void llama_model_load_internal( // prepare memory for the weights size_t vram_weights = 0; + size_t vram_scratch = 0; { const uint32_t n_embd = hparams.n_embd; const uint32_t n_layer = hparams.n_layer; @@ -1152,8 +1153,9 @@ static void llama_model_load_internal( fprintf(stderr, "%s: mem required = %7.2f MB (+ %7.2f MB per state)\n", __func__, mem_required / 1024.0 / 1024.0, mem_required_state / 1024.0 / 1024.0); + (void) vram_scratch; #ifdef GGML_USE_CUBLAS - const size_t vram_scratch = n_batch * MB; + vram_scratch = n_batch * MB; ggml_cuda_set_scratch_size(vram_scratch); if (n_gpu_layers > 0) { fprintf(stderr, "%s: allocating batch_size x 1 MB = %ld MB VRAM for the scratch buffer\n", |