From 1b78ed20818b72306edc7208b9bfb69a1a0d3297 Mon Sep 17 00:00:00 2001
From: Kerfuffle <44031344+KerfuffleV2@users.noreply.github.com>
Date: Sun, 28 May 2023 11:48:57 -0600
Subject: Only show -ngl option when relevant + other doc/arg handling updates
 (#1625)

1. Add a `LLAMA_SUPPORTS_GPU_OFFLOAD` define to `llama.h` (defined when compiled with CLBlast or cuBLAS)
2. Update the argument handling in the common example code to only show the `-ngl`, `--n-gpu-layers` option when GPU offload is possible.
3. Add an entry for the `-ngl`, `--n-gpu-layers` option to the `main` and `server` examples documentation
4. Update `main` and `server` examples documentation to use the new style dash separator argument format
5. Update the `server` example to use dash separators for its arguments and adds `-ngl` to `--help` (only shown when compiled with appropriate support). It will still support `--memory_f32` and `--ctx_size` for compatibility.
6. Add a warning discouraging use of `--memory-f32` for the `main` and `server` examples `--help` text as well as documentation. Rationale: https://github.com/ggerganov/llama.cpp/discussions/1593#discussioncomment-6004356
---
 llama.h | 5 +++++
 1 file changed, 5 insertions(+)

(limited to 'llama.h')

diff --git a/llama.h b/llama.h
index 37bae53..c6b0a28 100644
--- a/llama.h
+++ b/llama.h
@@ -31,6 +31,11 @@
 #define LLAMA_SESSION_MAGIC          LLAMA_FILE_MAGIC_GGSN
 #define LLAMA_SESSION_VERSION        1
 
+#if defined(GGML_USE_CUBLAS) || defined(GGML_USE_CLBLAST)
+// Defined when llama.cpp is compiled with support for offloading model layers to GPU.
+#define LLAMA_SUPPORTS_GPU_OFFLOAD
+#endif
+
 #ifdef __cplusplus
 extern "C" {
 #endif
-- 
cgit v1.2.3