llama : fix params struct slignment (#1936)

* Workaround struct misalignment during value-copy Signed-off-by: mudler <mudler@localai.io> * Move booleans at the bottom of the structure Signed-off-by: mudler <mudler@localai.io> * Add comment Signed-off-by: mudler <mudler@localai.io> --------- Signed-off-by: mudler <mudler@localai.io>
author: Ettore Di Giacinto <mudler@users.noreply.github.com> 2023-06-20 03:24:39 +0200
committer: GitHub <noreply@github.com> 2023-06-20 04:24:39 +0300
commit: aacdbd40562684665b6f7b8ba6695b7a2088bbb0 (patch)
tree: b9fe34e235b99d4c5ae07ee5c07f9375c86c9d17 /llama.h
parent: 20568fe60f00155fa25e92eb3a7f6b911d557967 (diff)
1 files changed, 8 insertions, 9 deletions
diff --git a/llama.h b/llama.h
index 1241ba6..0de530d 100644
--- a/llama.h
+++ b/llama.h
@@ -71,28 +71,27 @@ extern "C" {
 
     typedef void (*llama_progress_callback)(float progress, void *ctx);
 
-    struct llama_context_params {
+   struct llama_context_params {
+        int seed;                              // RNG seed, -1 for random
         int n_ctx;                             // text context
         int n_batch;                           // prompt processing batch size
         int n_gpu_layers;                      // number of layers to store in VRAM
         int main_gpu;                          // the GPU that is used for scratch and small tensors
         float tensor_split[LLAMA_MAX_DEVICES]; // how to split layers across multiple GPUs
-        bool low_vram;                         // if true, reduce VRAM usage at the cost of performance
-        int seed;                              // RNG seed, -1 for random
+        // called with a progress value between 0 and 1, pass NULL to disable
+        llama_progress_callback progress_callback;
+        // context pointer passed to the progress callback
+        void * progress_callback_user_data;
 
+        // Keep the booleans together to avoid misalignment during copy-by-value.
+        bool low_vram;   // if true, reduce VRAM usage at the cost of performance
         bool f16_kv;     // use fp16 for KV cache
         bool logits_all; // the llama_eval() call computes all logits, not just the last one
         bool vocab_only; // only load the vocabulary, no weights
         bool use_mmap;   // use mmap if possible
         bool use_mlock;  // force system to keep model in RAM
         bool embedding;  // embedding mode only
-
-        // called with a progress value between 0 and 1, pass NULL to disable
-        llama_progress_callback progress_callback;
-        // context pointer passed to the progress callback
-        void * progress_callback_user_data;
     };
-
     // model file types
     enum llama_ftype {
         LLAMA_FTYPE_ALL_F32              = 0,
author	Ettore Di Giacinto <mudler@users.noreply.github.com>	2023-06-20 03:24:39 +0200
committer	GitHub <noreply@github.com>	2023-06-20 04:24:39 +0300
commit	aacdbd40562684665b6f7b8ba6695b7a2088bbb0 (patch)
tree	b9fe34e235b99d4c5ae07ee5c07f9375c86c9d17 /llama.h
parent	20568fe60f00155fa25e92eb3a7f6b911d557967 (diff)