diff options
author | Ettore Di Giacinto <mudler@users.noreply.github.com> | 2023-06-20 03:24:39 +0200 |
---|---|---|
committer | GitHub <noreply@github.com> | 2023-06-20 04:24:39 +0300 |
commit | aacdbd40562684665b6f7b8ba6695b7a2088bbb0 (patch) | |
tree | b9fe34e235b99d4c5ae07ee5c07f9375c86c9d17 /llama.h | |
parent | 20568fe60f00155fa25e92eb3a7f6b911d557967 (diff) |
llama : fix params struct slignment (#1936)
* Workaround struct misalignment during value-copy
Signed-off-by: mudler <mudler@localai.io>
* Move booleans at the bottom of the structure
Signed-off-by: mudler <mudler@localai.io>
* Add comment
Signed-off-by: mudler <mudler@localai.io>
---------
Signed-off-by: mudler <mudler@localai.io>
Diffstat (limited to 'llama.h')
-rw-r--r-- | llama.h | 17 |
1 files changed, 8 insertions, 9 deletions
@@ -71,28 +71,27 @@ extern "C" { typedef void (*llama_progress_callback)(float progress, void *ctx); - struct llama_context_params { + struct llama_context_params { + int seed; // RNG seed, -1 for random int n_ctx; // text context int n_batch; // prompt processing batch size int n_gpu_layers; // number of layers to store in VRAM int main_gpu; // the GPU that is used for scratch and small tensors float tensor_split[LLAMA_MAX_DEVICES]; // how to split layers across multiple GPUs - bool low_vram; // if true, reduce VRAM usage at the cost of performance - int seed; // RNG seed, -1 for random + // called with a progress value between 0 and 1, pass NULL to disable + llama_progress_callback progress_callback; + // context pointer passed to the progress callback + void * progress_callback_user_data; + // Keep the booleans together to avoid misalignment during copy-by-value. + bool low_vram; // if true, reduce VRAM usage at the cost of performance bool f16_kv; // use fp16 for KV cache bool logits_all; // the llama_eval() call computes all logits, not just the last one bool vocab_only; // only load the vocabulary, no weights bool use_mmap; // use mmap if possible bool use_mlock; // force system to keep model in RAM bool embedding; // embedding mode only - - // called with a progress value between 0 and 1, pass NULL to disable - llama_progress_callback progress_callback; - // context pointer passed to the progress callback - void * progress_callback_user_data; }; - // model file types enum llama_ftype { LLAMA_FTYPE_ALL_F32 = 0, |