diff options
author | Didzis Gosko <didzis@users.noreply.github.com> | 2023-06-24 11:47:58 +0300 |
---|---|---|
committer | GitHub <noreply@github.com> | 2023-06-24 11:47:58 +0300 |
commit | 527b6fba1d237befb324fd846bda7418c0fa394d (patch) | |
tree | 360b44abac0c9a53739444b8ba9e4ccf903938cd /llama.h | |
parent | d7b7484f74d486f77feb4c0b7af7e1718ed91651 (diff) |
llama : make model stateless and context stateful (llama_state) (#1797)
* llama : make model stateless and context stateful
* llama : minor cleanup
* llama : update internal API declaration
* Apply suggestions from code review
fix style
Co-authored-by: Georgi Gerganov <ggerganov@gmail.com>
* Missing model memory release
* Fix style
* Add deprecated warning for public API function llama_init_from_file
* Update public API use cases: move away from deprecated llama_init_from_file
* Deprecate public API function llama_apply_lora_from_file
---------
Co-authored-by: Georgi Gerganov <ggerganov@gmail.com>
Diffstat (limited to 'llama.h')
-rw-r--r-- | llama.h | 35 |
1 files changed, 31 insertions, 4 deletions
@@ -26,6 +26,14 @@ # define LLAMA_API #endif +#ifdef __GNUC__ +# define DEPRECATED(func, hint) func __attribute__((deprecated(hint))) +#elif defined(_MSC_VER) +# define DEPRECATED(func, hint) __declspec(deprecated(hint)) func +#else +# define DEPRECATED(func, hint) func +#endif + #define LLAMA_FILE_MAGIC_GGJT 0x67676a74u // 'ggjt' #define LLAMA_FILE_MAGIC_GGLA 0x67676c61u // 'ggla' #define LLAMA_FILE_MAGIC_GGMF 0x67676d66u // 'ggmf' @@ -53,6 +61,7 @@ extern "C" { // TODO: show sample usage // + struct llama_model; struct llama_context; typedef int llama_token; @@ -136,12 +145,23 @@ extern "C" { LLAMA_API int64_t llama_time_us(); + LLAMA_API struct llama_model * llama_load_model_from_file( + const char * path_model, + struct llama_context_params params); + + LLAMA_API void llama_free_model(struct llama_model * model); + + LLAMA_API struct llama_context * llama_new_context_with_model( + struct llama_model * model, + struct llama_context_params params); + // Various functions for loading a ggml llama model. // Allocate (almost) all memory needed for the model. // Return NULL on failure - LLAMA_API struct llama_context * llama_init_from_file( + LLAMA_API DEPRECATED(struct llama_context * llama_init_from_file( const char * path_model, - struct llama_context_params params); + struct llama_context_params params), + "please use llama_load_model_from_file combined with llama_new_context_with_model instead"); // Frees all allocated memory LLAMA_API void llama_free(struct llama_context * ctx); @@ -158,10 +178,17 @@ extern "C" { // The model needs to be reloaded before applying a new adapter, otherwise the adapter // will be applied on top of the previous one // Returns 0 on success - LLAMA_API int llama_apply_lora_from_file( + LLAMA_API DEPRECATED(int llama_apply_lora_from_file( struct llama_context * ctx, const char * path_lora, const char * path_base_model, + int n_threads), + "please use llama_model_apply_lora_from_file instead"); + + LLAMA_API int llama_model_apply_lora_from_file( + const struct llama_model * model, + const char * path_lora, + const char * path_base_model, int n_threads); // Returns the number of tokens in the KV cache @@ -310,7 +337,7 @@ extern "C" { #include <string> struct ggml_tensor; -std::vector<std::pair<std::string, struct ggml_tensor *>>& llama_internal_get_tensor_map(struct llama_context * ctx); +const std::vector<std::pair<std::string, struct ggml_tensor *>>& llama_internal_get_tensor_map(struct llama_context * ctx); #endif |