aboutsummaryrefslogtreecommitdiff
path: root/ggml.h
diff options
context:
space:
mode:
authorGeorgi Gerganov <ggerganov@gmail.com>2023-06-19 18:12:33 +0300
committerGitHub <noreply@github.com>2023-06-19 18:12:33 +0300
commitb97ca431db35ec96a339a721acb1219c1dd78bed (patch)
tree9a43c273f94fea58e92fc2d624172c0ec97dd71d /ggml.h
parent1e3abfcef073e73c2b31e8570cb06c5cb2fd1f55 (diff)
ggml : sync latest ggml repo (#1924)
* ggml : sync latest ggml repo * ggml : remove unused comments * ggml : asserts
Diffstat (limited to 'ggml.h')
-rw-r--r--ggml.h144
1 files changed, 137 insertions, 7 deletions
diff --git a/ggml.h b/ggml.h
index 1380c53..18c7855 100644
--- a/ggml.h
+++ b/ggml.h
@@ -303,6 +303,7 @@ extern "C" {
GGML_OP_STEP,
GGML_OP_RELU,
GGML_OP_GELU,
+ GGML_OP_GELU_QUICK,
GGML_OP_SILU,
GGML_OP_SILU_BACK,
GGML_OP_NORM, // normalize
@@ -331,12 +332,15 @@ extern "C" {
GGML_OP_ROPE_BACK,
GGML_OP_ALIBI,
GGML_OP_CLAMP,
- GGML_OP_CONV_1D_1S,
- GGML_OP_CONV_1D_2S,
+ GGML_OP_CONV_1D_S1_PH,
+ GGML_OP_CONV_1D_S2_PH,
+ GGML_OP_CONV_2D_SK_P0,
GGML_OP_FLASH_ATTN,
GGML_OP_FLASH_FF,
GGML_OP_FLASH_ATTN_BACK,
+ GGML_OP_WIN_PART,
+ GGML_OP_WIN_UNPART,
GGML_OP_MAP_UNARY,
GGML_OP_MAP_BINARY,
@@ -557,8 +561,8 @@ extern "C" {
GGML_API void * ggml_get_data (const struct ggml_tensor * tensor);
GGML_API float * ggml_get_data_f32(const struct ggml_tensor * tensor);
- GGML_API const char * ggml_get_name(const struct ggml_tensor * tensor);
- GGML_API void ggml_set_name(struct ggml_tensor * tensor, const char * name);
+ GGML_API const char * ggml_get_name(const struct ggml_tensor * tensor);
+ GGML_API struct ggml_tensor * ggml_set_name(struct ggml_tensor * tensor, const char * name);
//
// operations on tensors with backpropagation
@@ -611,24 +615,47 @@ extern "C" {
struct ggml_tensor * a,
struct ggml_tensor * b);
+ GGML_API struct ggml_tensor * ggml_sub_inplace(
+ struct ggml_context * ctx,
+ struct ggml_tensor * a,
+ struct ggml_tensor * b);
+
GGML_API struct ggml_tensor * ggml_mul(
struct ggml_context * ctx,
struct ggml_tensor * a,
struct ggml_tensor * b);
+ GGML_API struct ggml_tensor * ggml_mul_inplace(
+ struct ggml_context * ctx,
+ struct ggml_tensor * a,
+ struct ggml_tensor * b);
+
GGML_API struct ggml_tensor * ggml_div(
struct ggml_context * ctx,
struct ggml_tensor * a,
struct ggml_tensor * b);
+ GGML_API struct ggml_tensor * ggml_div_inplace(
+ struct ggml_context * ctx,
+ struct ggml_tensor * a,
+ struct ggml_tensor * b);
+
GGML_API struct ggml_tensor * ggml_sqr(
struct ggml_context * ctx,
struct ggml_tensor * a);
+ GGML_API struct ggml_tensor * ggml_sqr_inplace(
+ struct ggml_context * ctx,
+ struct ggml_tensor * a);
+
GGML_API struct ggml_tensor * ggml_sqrt(
struct ggml_context * ctx,
struct ggml_tensor * a);
+ GGML_API struct ggml_tensor * ggml_sqrt_inplace(
+ struct ggml_context * ctx,
+ struct ggml_tensor * a);
+
GGML_API struct ggml_tensor * ggml_log(
struct ggml_context * ctx,
struct ggml_tensor * a);
@@ -668,31 +695,67 @@ extern "C" {
struct ggml_context * ctx,
struct ggml_tensor * a);
+ GGML_API struct ggml_tensor * ggml_abs_inplace(
+ struct ggml_context * ctx,
+ struct ggml_tensor * a);
+
GGML_API struct ggml_tensor * ggml_sgn(
struct ggml_context * ctx,
struct ggml_tensor * a);
+ GGML_API struct ggml_tensor * ggml_sgn_inplace(
+ struct ggml_context * ctx,
+ struct ggml_tensor * a);
+
GGML_API struct ggml_tensor * ggml_neg(
struct ggml_context * ctx,
struct ggml_tensor * a);
+ GGML_API struct ggml_tensor * ggml_neg_inplace(
+ struct ggml_context * ctx,
+ struct ggml_tensor * a);
+
GGML_API struct ggml_tensor * ggml_step(
struct ggml_context * ctx,
struct ggml_tensor * a);
+ GGML_API struct ggml_tensor * ggml_step_inplace(
+ struct ggml_context * ctx,
+ struct ggml_tensor * a);
+
GGML_API struct ggml_tensor * ggml_relu(
struct ggml_context * ctx,
struct ggml_tensor * a);
+ GGML_API struct ggml_tensor * ggml_relu_inplace(
+ struct ggml_context * ctx,
+ struct ggml_tensor * a);
+
// TODO: double-check this computation is correct
GGML_API struct ggml_tensor * ggml_gelu(
struct ggml_context * ctx,
struct ggml_tensor * a);
+ GGML_API struct ggml_tensor * ggml_gelu_inplace(
+ struct ggml_context * ctx,
+ struct ggml_tensor * a);
+
+ GGML_API struct ggml_tensor * ggml_gelu_quick(
+ struct ggml_context * ctx,
+ struct ggml_tensor * a);
+
+ GGML_API struct ggml_tensor * ggml_gelu_quick_inplace(
+ struct ggml_context * ctx,
+ struct ggml_tensor * a);
+
GGML_API struct ggml_tensor * ggml_silu(
struct ggml_context * ctx,
struct ggml_tensor * a);
+ GGML_API struct ggml_tensor * ggml_silu_inplace(
+ struct ggml_context * ctx,
+ struct ggml_tensor * a);
+
// a - x
// b - dy
GGML_API struct ggml_tensor * ggml_silu_back(
@@ -706,10 +769,18 @@ extern "C" {
struct ggml_context * ctx,
struct ggml_tensor * a);
+ GGML_API struct ggml_tensor * ggml_norm_inplace(
+ struct ggml_context * ctx,
+ struct ggml_tensor * a);
+
GGML_API struct ggml_tensor * ggml_rms_norm(
struct ggml_context * ctx,
struct ggml_tensor * a);
+ GGML_API struct ggml_tensor * ggml_rms_norm_inplace(
+ struct ggml_context * ctx,
+ struct ggml_tensor * a);
+
// a - x
// b - dy
GGML_API struct ggml_tensor * ggml_rms_norm_back(
@@ -999,16 +1070,55 @@ extern "C" {
float min,
float max);
- // padding = 1
+ // TODO: implement general-purpose convolutions
+ // GGML_API struct ggml_tensor * ggml_conv_1d(
+ // struct ggml_context * ctx,
+ // struct ggml_tensor * a,
+ // struct ggml_tensor * b,
+ // int s0
+ // int p0,
+ // int d0);
+ //
+ // GGML_API struct ggml_tensor * ggml_conv_2d(
+ // struct ggml_context * ctx,
+ // struct ggml_tensor * a,
+ // struct ggml_tensor * b,
+ // int s0,
+ // int s1,
+ // int p0,
+ // int p1,
+ // int d0,
+ // int d1);
+
+ // padding = half
// TODO: we don't support extra parameters for now
// that's why we are hard-coding the stride, padding, and dilation
// not great ..
- GGML_API struct ggml_tensor * ggml_conv_1d_1s(
+ // example:
+ // a: 3 80 768 1
+ // b: 3000 80 1 1
+ // res: 3000 768 1 1
+ // used in whisper
+ GGML_API struct ggml_tensor * ggml_conv_1d_s1_ph(
struct ggml_context * ctx,
struct ggml_tensor * a,
struct ggml_tensor * b);
- GGML_API struct ggml_tensor * ggml_conv_1d_2s(
+ // used in whisper
+ GGML_API struct ggml_tensor * ggml_conv_1d_s2_ph(
+ struct ggml_context * ctx,
+ struct ggml_tensor * a,
+ struct ggml_tensor * b);
+
+ // kernel size is a->ne[0] x a->ne[1]
+ // stride is equal to kernel size
+ // padding is zero
+ // example:
+ // a: 16 16 3 768
+ // b: 1024 1024 3 1
+ // res: 64 64 768 1
+ // used in sam
+ GGML_API struct ggml_tensor * ggml_conv_2d_sk_p0(
struct ggml_context * ctx,
struct ggml_tensor * a,
struct ggml_tensor * b);
@@ -1036,6 +1146,26 @@ extern "C" {
struct ggml_tensor * c0,
struct ggml_tensor * c1);
+ // partition into non-overlapping windows with padding if needed
+ // example:
+ // a: 768 64 64 1
+ // w: 14
+ // res: 768 14 14 25
+ // used in sam
+ GGML_API struct ggml_tensor * ggml_win_part(
+ struct ggml_context * ctx,
+ struct ggml_tensor * a,
+ int w);
+
+ // reverse of ggml_win_part
+ // used in sam
+ GGML_API struct ggml_tensor * ggml_win_unpart(
+ struct ggml_context * ctx,
+ struct ggml_tensor * a,
+ int w0,
+ int h0,
+ int w);
+
// Mapping operations
typedef void (*ggml_unary_op_f32_t)(const int, float *, const float *);
typedef void (*ggml_binary_op_f32_t)(const int, float *, const float *, const float *);