aboutsummaryrefslogtreecommitdiff
path: root/llama.h
diff options
context:
space:
mode:
authorDidzis Gosko <didzis@users.noreply.github.com>2023-06-24 11:47:58 +0300
committerGitHub <noreply@github.com>2023-06-24 11:47:58 +0300
commit527b6fba1d237befb324fd846bda7418c0fa394d (patch)
tree360b44abac0c9a53739444b8ba9e4ccf903938cd /llama.h
parentd7b7484f74d486f77feb4c0b7af7e1718ed91651 (diff)
llama : make model stateless and context stateful (llama_state) (#1797)
* llama : make model stateless and context stateful * llama : minor cleanup * llama : update internal API declaration * Apply suggestions from code review fix style Co-authored-by: Georgi Gerganov <ggerganov@gmail.com> * Missing model memory release * Fix style * Add deprecated warning for public API function llama_init_from_file * Update public API use cases: move away from deprecated llama_init_from_file * Deprecate public API function llama_apply_lora_from_file --------- Co-authored-by: Georgi Gerganov <ggerganov@gmail.com>
Diffstat (limited to 'llama.h')
-rw-r--r--llama.h35
1 files changed, 31 insertions, 4 deletions
diff --git a/llama.h b/llama.h
index 0de530d..a833a7f 100644
--- a/llama.h
+++ b/llama.h
@@ -26,6 +26,14 @@
# define LLAMA_API
#endif
+#ifdef __GNUC__
+# define DEPRECATED(func, hint) func __attribute__((deprecated(hint)))
+#elif defined(_MSC_VER)
+# define DEPRECATED(func, hint) __declspec(deprecated(hint)) func
+#else
+# define DEPRECATED(func, hint) func
+#endif
+
#define LLAMA_FILE_MAGIC_GGJT 0x67676a74u // 'ggjt'
#define LLAMA_FILE_MAGIC_GGLA 0x67676c61u // 'ggla'
#define LLAMA_FILE_MAGIC_GGMF 0x67676d66u // 'ggmf'
@@ -53,6 +61,7 @@ extern "C" {
// TODO: show sample usage
//
+ struct llama_model;
struct llama_context;
typedef int llama_token;
@@ -136,12 +145,23 @@ extern "C" {
LLAMA_API int64_t llama_time_us();
+ LLAMA_API struct llama_model * llama_load_model_from_file(
+ const char * path_model,
+ struct llama_context_params params);
+
+ LLAMA_API void llama_free_model(struct llama_model * model);
+
+ LLAMA_API struct llama_context * llama_new_context_with_model(
+ struct llama_model * model,
+ struct llama_context_params params);
+
// Various functions for loading a ggml llama model.
// Allocate (almost) all memory needed for the model.
// Return NULL on failure
- LLAMA_API struct llama_context * llama_init_from_file(
+ LLAMA_API DEPRECATED(struct llama_context * llama_init_from_file(
const char * path_model,
- struct llama_context_params params);
+ struct llama_context_params params),
+ "please use llama_load_model_from_file combined with llama_new_context_with_model instead");
// Frees all allocated memory
LLAMA_API void llama_free(struct llama_context * ctx);
@@ -158,10 +178,17 @@ extern "C" {
// The model needs to be reloaded before applying a new adapter, otherwise the adapter
// will be applied on top of the previous one
// Returns 0 on success
- LLAMA_API int llama_apply_lora_from_file(
+ LLAMA_API DEPRECATED(int llama_apply_lora_from_file(
struct llama_context * ctx,
const char * path_lora,
const char * path_base_model,
+ int n_threads),
+ "please use llama_model_apply_lora_from_file instead");
+
+ LLAMA_API int llama_model_apply_lora_from_file(
+ const struct llama_model * model,
+ const char * path_lora,
+ const char * path_base_model,
int n_threads);
// Returns the number of tokens in the KV cache
@@ -310,7 +337,7 @@ extern "C" {
#include <string>
struct ggml_tensor;
-std::vector<std::pair<std::string, struct ggml_tensor *>>& llama_internal_get_tensor_map(struct llama_context * ctx);
+const std::vector<std::pair<std::string, struct ggml_tensor *>>& llama_internal_get_tensor_map(struct llama_context * ctx);
#endif