aboutsummaryrefslogtreecommitdiff
path: root/llama.h
diff options
context:
space:
mode:
Diffstat (limited to 'llama.h')
-rw-r--r--llama.h8
1 files changed, 7 insertions, 1 deletions
diff --git a/llama.h b/llama.h
index c6b0a28..87fa973 100644
--- a/llama.h
+++ b/llama.h
@@ -31,7 +31,7 @@
#define LLAMA_SESSION_MAGIC LLAMA_FILE_MAGIC_GGSN
#define LLAMA_SESSION_VERSION 1
-#if defined(GGML_USE_CUBLAS) || defined(GGML_USE_CLBLAST)
+#if defined(GGML_USE_CUBLAS) || defined(GGML_USE_CLBLAST) || defined(GGML_USE_METAL)
// Defined when llama.cpp is compiled with support for offloading model layers to GPU.
#define LLAMA_SUPPORTS_GPU_OFFLOAD
#endif
@@ -173,6 +173,12 @@ extern "C" {
int n_past,
int n_threads);
+ // Export a static computation graph for context of 511 and batch size of 1
+ // NOTE: since this functionality is mostly for debugging and demonstration purposes, we hardcode these
+ // parameters here to keep things simple
+ // IMPORTANT: do not use for anything else other than debugging and testing!
+ LLAMA_API int llama_eval_export(struct llama_context * ctx, const char * fname);
+
// Convert the provided text into tokens.
// The tokens pointer must be large enough to hold the resulting tokens.
// Returns the number of tokens on success, no more than n_max_tokens