aboutsummaryrefslogtreecommitdiff
path: root/ggml.h
diff options
context:
space:
mode:
authorGeorgi Gerganov <ggerganov@gmail.com>2023-04-05 22:07:33 +0300
committerGitHub <noreply@github.com>2023-04-05 22:07:33 +0300
commit986b6ce9f99503c51ec5afd8a10baa32359434c6 (patch)
treef4655b45b130b908729eb1407ca9e016c05f21a4 /ggml.h
parent34162989297fdfe3ab7305451ce55bc87e3f4c9c (diff)
ggml, llama : avoid heavy V transpose + improvements (#775)
ggml : - added ggml_view_3d() - ggml_view_tensor() now inherits the stride too - reimplement ggml_cpy() to account for dst stride - no longer require tensor->data to be memory aligned llama : - compute RoPE on 32-bit tensors (should be more accurate) - store RoPE-ed K in the KV cache - store transposed V in the KV cache (significant speed-up) - avoid unnecessary Q copy
Diffstat (limited to 'ggml.h')
-rw-r--r--ggml.h10
1 files changed, 10 insertions, 0 deletions
diff --git a/ggml.h b/ggml.h
index ad962b1..3c94efc 100644
--- a/ggml.h
+++ b/ggml.h
@@ -558,6 +558,16 @@ struct ggml_tensor * ggml_view_2d(
size_t nb1, // row stride in bytes
size_t offset);
+struct ggml_tensor * ggml_view_3d(
+ struct ggml_context * ctx,
+ struct ggml_tensor * a,
+ int64_t ne0,
+ int64_t ne1,
+ int64_t ne2,
+ size_t nb1, // row stride in bytes
+ size_t nb2, // slice stride in bytes
+ size_t offset);
+
struct ggml_tensor * ggml_permute(
struct ggml_context * ctx,
struct ggml_tensor * a,