aboutsummaryrefslogtreecommitdiff
path: root/llama.h
diff options
context:
space:
mode:
authorningshanwutuobang <ningshanwutuobang@gmail.com>2023-06-28 23:53:37 +0800
committerGitHub <noreply@github.com>2023-06-28 18:53:37 +0300
commitcfa0750bc9dbc2d957a91b8ed09ab0035d8f3d4e (patch)
treec8d6d6e6548d4f03899704f64bce6939e471e4e6 /llama.h
parent9d23589d638dc74577d5ff880e6d4248b795f12e (diff)
llama : support input embeddings directly (#1910)
* add interface for float input * fixed inpL shape and type * add examples of input floats * add test example for embd input * fixed sampling * add free for context * fixed add end condition for generating * add examples for llava.py * add READMD for llava.py * add READMD for llava.py * add example of PandaGPT * refactor the interface and fixed the styles * add cmake build for embd-input * add cmake build for embd-input * Add MiniGPT-4 example * change the order of the args of llama_eval_internal * fix ci error
Diffstat (limited to 'llama.h')
-rw-r--r--llama.h8
1 files changed, 8 insertions, 0 deletions
diff --git a/llama.h b/llama.h
index 76239be..c2f2e53 100644
--- a/llama.h
+++ b/llama.h
@@ -226,6 +226,14 @@ extern "C" {
int n_past,
int n_threads);
+ // Same as llama_eval, but use float matrix input directly.
+ LLAMA_API int llama_eval_embd(
+ struct llama_context * ctx,
+ const float * embd,
+ int n_tokens,
+ int n_past,
+ int n_threads);
+
// Export a static computation graph for context of 511 and batch size of 1
// NOTE: since this functionality is mostly for debugging and demonstration purposes, we hardcode these
// parameters here to keep things simple