From ae178ab46bfd6ecb2422da5dad441a4e2fef8b7e Mon Sep 17 00:00:00 2001 From: Georgi Gerganov Date: Fri, 21 Jul 2023 13:10:51 +0300 Subject: llama : make tensor_split ptr instead of array (#2272) --- llama.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'llama.h') diff --git a/llama.h b/llama.h index b676a38..c565f6a 100644 --- a/llama.h +++ b/llama.h @@ -88,7 +88,8 @@ extern "C" { int32_t n_batch; // prompt processing batch size int32_t n_gpu_layers; // number of layers to store in VRAM int32_t main_gpu; // the GPU that is used for scratch and small tensors - float tensor_split[LLAMA_MAX_DEVICES]; // how to split layers across multiple GPUs + + const float * tensor_split; // how to split layers across multiple GPUs (size: LLAMA_MAX_DEVICES) // ref: https://github.com/ggerganov/llama.cpp/pull/2054 float rope_freq_base; // RoPE base frequency -- cgit v1.2.3