aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorGeorgi Gerganov <ggerganov@gmail.com>2023-07-04 21:54:11 +0300
committerGitHub <noreply@github.com>2023-07-04 21:54:11 +0300
commited9a54e5129a11c2a5b555e1dc65e875e3c37b4f (patch)
tree762f68c87fc160b4d646a04bd07f090c392556eb
parentf257fd255044decffad93dee2502875ce66ad80c (diff)
ggml : sync latest (new ops, macros, refactoring) (#2106)
- add ggml_argmax() - add ggml_tanh() - add ggml_elu() - refactor ggml_conv_1d() and variants - refactor ggml_conv_2d() and variants - add helper macros to reduce code duplication in ggml.c
-rw-r--r--ggml.c1506
-rw-r--r--ggml.h118
-rwxr-xr-xscripts/sync-ggml.sh11
3 files changed, 586 insertions, 1049 deletions
diff --git a/ggml.c b/ggml.c
index afeb72f..88cbed7 100644
--- a/ggml.c
+++ b/ggml.c
@@ -220,9 +220,27 @@ inline static void* ggml_aligned_malloc(size_t size) {
#define GGML_ALIGNED_FREE(ptr) free(ptr)
#endif
-#define UNUSED(x) (void)(x)
+#define UNUSED GGML_UNUSED
#define SWAP(x, y, T) do { T SWAP = x; x = y; y = SWAP; } while (0)
+//
+// tensor access macros
+//
+
+#define GGML_TENSOR_UNARY_OP_LOCALS \
+ GGML_TENSOR_LOCALS(int64_t, ne0, src0, ne); \
+ GGML_TENSOR_LOCALS(size_t, nb0, src0, nb); \
+ GGML_TENSOR_LOCALS(int64_t, ne, dst, ne); \
+ GGML_TENSOR_LOCALS(size_t, nb, dst, nb);
+
+#define GGML_TENSOR_BINARY_OP_LOCALS \
+ GGML_TENSOR_LOCALS(int64_t, ne0, src0, ne); \
+ GGML_TENSOR_LOCALS(size_t, nb0, src0, nb); \
+ GGML_TENSOR_LOCALS(int64_t, ne1, src1, ne); \
+ GGML_TENSOR_LOCALS(size_t, nb1, src1, nb); \
+ GGML_TENSOR_LOCALS(int64_t, ne, dst, ne); \
+ GGML_TENSOR_LOCALS(size_t, nb, dst, nb);
+
#if defined(GGML_USE_ACCELERATE)
#include <Accelerate/Accelerate.h>
#if defined(GGML_USE_CLBLAST) // allow usage of CLBlast alongside Accelerate functions
@@ -3447,6 +3465,8 @@ inline static void ggml_vec_log_f32 (const int n, float * y, const float * x) {
inline static void ggml_vec_abs_f32 (const int n, float * y, const float * x) { for (int i = 0; i < n; ++i) y[i] = fabsf(x[i]); }
inline static void ggml_vec_sgn_f32 (const int n, float * y, const float * x) { for (int i = 0; i < n; ++i) y[i] = (x[i] > 0.f) ? 1.f : ((x[i] < 0.f) ? -1.f : 0.f); }
inline static void ggml_vec_step_f32 (const int n, float * y, const float * x) { for (int i = 0; i < n; ++i) y[i] = (x[i] > 0.f) ? 1.f : 0.f; }
+inline static void ggml_vec_tanh_f32 (const int n, float * y, const float * x) { for (int i = 0; i < n; ++i) y[i] = tanhf(x[i]); }
+inline static void ggml_vec_elu_f32 (const int n, float * y, const float * x) { for (int i = 0; i < n; ++i) y[i] = (x[i] > 0.f) ? x[i] : expf(x[i])-1; }
inline static void ggml_vec_relu_f32 (const int n, float * y, const float * x) { for (int i = 0; i < n; ++i) y[i] = (x[i] > 0.f) ? x[i] : 0.f; }
static const float GELU_COEF_A = 0.044715f;
@@ -3598,6 +3618,16 @@ inline static void ggml_vec_norm_inv_f32(const int n, float * s, const float * x
*s = 1.f/(*s);
}
+inline static void ggml_vec_argmax_f32(const int n, int * s, const float * x) {
+ float max = -INFINITY;
+ int idx = 0;
+ for (int i = 0; i < n; ++i) {
+ max = MAX(max, x[i]);
+ if (max == x[i]) { idx = i; }
+ }
+ *s = idx;
+}
+
//
// data types
//
@@ -3707,12 +3737,15 @@ static const char * GGML_OP_NAME[GGML_OP_COUNT] = {
"SUM",
"SUM_ROWS",
"MEAN",
+ "ARGMAX",
"REPEAT",
"REPEAT_BACK",
"ABS",
"SGN",
"NEG",
"STEP",
+ "TANH",
+ "ELU",
"RELU",
"GELU",
"GELU_QUICK",
@@ -3744,9 +3777,8 @@ static const char * GGML_OP_NAME[GGML_OP_COUNT] = {
"ROPE_BACK",
"ALIBI",
"CLAMP",
- "CONV_1D_S1_PH",
- "CONV_1D_S2_PH",
- "CONV_2D_SK_P0",
+ "CONV_1D",
+ "CONV_2D",
"FLASH_ATTN",
"FLASH_FF",
@@ -3765,7 +3797,7 @@ static const char * GGML_OP_NAME[GGML_OP_COUNT] = {
"CROSS_ENTROPY_LOSS_BACK",
};
-static_assert(GGML_OP_COUNT == 64, "GGML_OP_COUNT != 64");
+static_assert(GGML_OP_COUNT == 66, "GGML_OP_COUNT != 66");
static const char * GGML_OP_SYMBOL[GGML_OP_COUNT] = {
"none",
@@ -3783,12 +3815,15 @@ static const char * GGML_OP_SYMBOL[GGML_OP_COUNT] = {
"Σx",
"Σx_k",
"Σx/n",
+ "argmax(x)",
"repeat(x)",
"repeat_back(x)",
"abs(x)",
"sgn(x)",
"-x",
"step(x)",
+ "tanh(x)",
+ "elu(x)",
"relu(x)",
"gelu(x)",
"gelu_quick(x)",
@@ -3820,9 +3855,8 @@ static const char * GGML_OP_SYMBOL[GGML_OP_COUNT] = {
"rope_back(x)",
"alibi(x)",
"clamp(x)",
- "conv_1d_s1_ph(x)",
- "conv_1d_s2_ph(x)",
- "conv_2d_sk_p0(x)",
+ "conv_1d(x)",
+ "conv_2d(x)",
"flash_attn(x)",
"flash_ff(x)",
@@ -3841,7 +3875,7 @@ static const char * GGML_OP_SYMBOL[GGML_OP_COUNT] = {
"cross_entropy_loss_back(x,y)",
};
-static_assert(GGML_OP_COUNT == 64, "GGML_OP_COUNT != 64");
+static_assert(GGML_OP_COUNT == 66, "GGML_OP_COUNT != 66");
static_assert(sizeof(struct ggml_object)%GGML_MEM_ALIGN == 0, "ggml_object size must be a multiple of GGML_MEM_ALIGN");
static_assert(sizeof(struct ggml_tensor)%GGML_MEM_ALIGN == 0, "ggml_tensor size must be a multiple of GGML_MEM_ALIGN");
@@ -3867,9 +3901,8 @@ static void ggml_setup_op_has_task_pass(void) {
p[GGML_OP_GET_ROWS_BACK ] = true;
p[GGML_OP_DIAG_MASK_INF ] = true;
p[GGML_OP_DIAG_MASK_ZERO ] = true;
- p[GGML_OP_CONV_1D_S1_PH ] = true;
- p[GGML_OP_CONV_1D_S2_PH ] = true;
- p[GGML_OP_CONV_2D_SK_P0 ] = true;
+ p[GGML_OP_CONV_1D ] = true;
+ p[GGML_OP_CONV_2D ] = true;
p[GGML_OP_FLASH_ATTN_BACK ] = true;
p[GGML_OP_CROSS_ENTROPY_LOSS ] = true;
}
@@ -5440,6 +5473,30 @@ struct ggml_tensor * ggml_mean(
return result;
}
+// ggml_argmax
+
+struct ggml_tensor * ggml_argmax(
+ struct ggml_context * ctx,
+ struct ggml_tensor * a) {
+ GGML_ASSERT(ggml_is_matrix(a));
+ bool is_node = false;
+
+ if (a->grad) {
+ GGML_ASSERT(false);
+ is_node = true;
+ }
+
+ int64_t ne[GGML_MAX_DIMS] = { a->ne[1], 1, 1, 1 };
+ struct ggml_tensor * result = ggml_new_tensor(ctx, GGML_TYPE_I32, a->n_dims, ne);
+
+ result->op = GGML_OP_ARGMAX;
+ result->grad = is_node ? ggml_dup_tensor(ctx, result) : NULL;
+ result->src0 = a;
+ result->src1 = NULL;
+
+ return result;
+}
+
// ggml_repeat
struct ggml_tensor * ggml_repeat(
@@ -5633,6 +5690,74 @@ struct ggml_tensor * ggml_step_inplace(
return ggml_step_impl(ctx, a, true);
}
+// ggml_tanh
+
+struct ggml_tensor * ggml_tanh_impl(
+ struct ggml_context * ctx,
+ struct ggml_tensor * a,
+ bool inplace) {
+ bool is_node = false;
+
+ if (!inplace && (a->grad)) {
+ is_node = true;
+ }
+
+ struct ggml_tensor * result = inplace ? ggml_view_tensor(ctx, a) : ggml_dup_tensor(ctx, a);
+
+ result->op = GGML_OP_TANH;
+ result->grad = is_node ? ggml_dup_tensor(ctx, result) : NULL;
+ result->src0 = a;
+ result->src1 = NULL;
+
+ return result;
+}
+
+struct ggml_tensor * ggml_tanh(
+ struct ggml_context * ctx,
+ struct ggml_tensor * a) {
+ return ggml_tanh_impl(ctx, a, false);
+}
+
+struct ggml_tensor * ggml_tanh_inplace(
+ struct ggml_context * ctx,
+ struct ggml_tensor * a) {
+ return ggml_tanh_impl(ctx, a, true);
+}
+
+// ggml_elu
+
+struct ggml_tensor * ggml_elu_impl(
+ struct ggml_context * ctx,
+ struct ggml_tensor * a,
+ bool inplace) {
+ bool is_node = false;
+
+ if (!inplace && (a->grad)) {
+ is_node = true;
+ }
+
+ struct ggml_tensor * result = inplace ? ggml_view_tensor(ctx, a) : ggml_dup_tensor(ctx, a);
+
+ result->op = GGML_OP_ELU;
+ result->grad = is_node ? ggml_dup_tensor(ctx, result) : NULL;
+ result->src0 = a;
+ result->src1 = NULL;
+
+ return result;
+}
+
+struct ggml_tensor * ggml_elu(
+ struct ggml_context * ctx,
+ struct ggml_tensor * a) {
+ return ggml_elu_impl(ctx, a, false);
+}
+
+struct ggml_tensor * ggml_elu_inplace(
+ struct ggml_context * ctx,
+ struct ggml_tensor * a) {
+ return ggml_elu_impl(ctx, a, true);
+}
+
// ggml_relu
struct ggml_tensor * ggml_relu_impl(
@@ -6874,6 +6999,8 @@ struct ggml_tensor * ggml_rope_back(
int n_dims,
int mode) {
GGML_ASSERT(n_past >= 0);
+ GGML_ASSERT((mode & 4) == 0 && "ggml_rope_back() for ChatGLM not implemented yet");
+
bool is_node = false;
if (a->grad) {
@@ -6974,15 +7101,21 @@ struct ggml_tensor * ggml_clamp(
return result;
}
-// ggml_conv_1d_s1_ph
+// ggml_conv_1d
+
+static int64_t ggml_calc_conv_output_size(int64_t ins, int64_t ks, int s, int p, int d) {
+ return (ins + 2 * p - d * (ks - 1) - 1) / s + 1;
+}
-struct ggml_tensor * ggml_conv_1d_s1_ph(
+GGML_API struct ggml_tensor * ggml_conv_1d(
struct ggml_context * ctx,
struct ggml_tensor * a,
- struct ggml_tensor * b) {
+ struct ggml_tensor * b,
+ int s0,
+ int p0,
+ int d0) {
GGML_ASSERT(ggml_is_matrix(b));
GGML_ASSERT(a->ne[1] == b->ne[1]);
- GGML_ASSERT(a->ne[3] == 1);
bool is_node = false;
if (a->grad || b->grad) {
@@ -6990,26 +7123,43 @@ struct ggml_tensor * ggml_conv_1d_s1_ph(
is_node = true;
}
- const int64_t ne[4] = { b->ne[0], a->ne[2], 1, 1, };
- struct ggml_tensor * result = ggml_new_tensor(ctx, GGML_TYPE_F32, 2, ne);
+ const int64_t ne[4] = {
+ ggml_calc_conv_output_size(b->ne[0], a->ne[0], s0, p0, d0),
+ a->ne[2], 1, 1,
+ };
+ struct ggml_tensor* result = ggml_new_tensor(ctx, GGML_TYPE_F32, 2, ne);
+
+ ggml_scratch_save(ctx);
+ struct ggml_tensor* c = ggml_new_tensor_1d(ctx, GGML_TYPE_I32, 3);
+ ((int32_t*)c->data)[0] = s0;
+ ((int32_t*)c->data)[1] = p0;
+ ((int32_t*)c->data)[2] = d0;
+ ggml_scratch_load(ctx);
- result->op = GGML_OP_CONV_1D_S1_PH;
+ result->op = GGML_OP_CONV_1D;
result->grad = is_node ? ggml_dup_tensor(ctx, result) : NULL;
result->src0 = a;
result->src1 = b;
+ result->opt[0] = c;
return result;
}
-// ggml_conv_1d_s2_ph
+// ggml_conv_2d
-struct ggml_tensor * ggml_conv_1d_s2_ph(
- struct ggml_context * ctx,
- struct ggml_tensor * a,
- struct ggml_tensor * b) {
- GGML_ASSERT(ggml_is_matrix(b));
- GGML_ASSERT(a->ne[1] == b->ne[1]);
- GGML_ASSERT(a->ne[3] == 1);
+struct ggml_tensor* ggml_conv_2d(
+ struct ggml_context* ctx,
+ struct ggml_tensor * a,
+ struct ggml_tensor * b,
+ int s0,
+ int s1,
+ int p0,
+ int p1,
+ int d0,
+ int d1) {
+
+ GGML_ASSERT(b->ne[3] == 1);
+ GGML_ASSERT(a->ne[2] == b->ne[2]);
bool is_node = false;
if (a->grad || b->grad) {
@@ -7017,43 +7167,42 @@ struct ggml_tensor * ggml_conv_1d_s2_ph(
is_node = true;
}
- const int64_t ne[4] = { b->ne[0]/2, a->ne[2], 1, 1, };
- struct ggml_tensor * result = ggml_new_tensor(ctx, GGML_TYPE_F32, 2, ne);
+ const int64_t ne[4] = {
+ ggml_calc_conv_output_size(b->ne[0], a->ne[0], s0, p0, d0),
+ ggml_calc_conv_output_size(b->ne[1], a->ne[1], s1, p1, d1),
+ a->ne[3], 1,
+ };
+ struct ggml_tensor* result = ggml_new_tensor(ctx, GGML_TYPE_F32, 4, ne);
+
+ ggml_scratch_save(ctx);
+ struct ggml_tensor* c = ggml_new_tensor_1d(ctx, GGML_TYPE_I32, 6);
+ ((int32_t*)c->data)[0] = s0;
+ ((int32_t*)c->data)[1] = s1;
+ ((int32_t*)c->data)[2] = p0;
+ ((int32_t*)c->data)[3] = p1;
+ ((int32_t*)c->data)[4] = d0;
+ ((int32_t*)c->data)[5] = d1;
+ ggml_scratch_load(ctx);
- result->op = GGML_OP_CONV_1D_S2_PH;
+ result->op = GGML_OP_CONV_2D;
result->grad = is_node ? ggml_dup_tensor(ctx, result) : NULL;
result->src0 = a;
result->src1 = b;
+ result->opt[0] = c;
return result;
+
}
-// ggml_conv_2d_sk_p0
+// ggml_conv_1d_ph
-struct ggml_tensor * ggml_conv_2d_sk_p0(
+struct ggml_tensor* ggml_conv_1d_ph(
struct ggml_context * ctx,
struct ggml_tensor * a,
- struct ggml_tensor * b) {
- GGML_ASSERT(b->ne[3] == 1);
- GGML_ASSERT(a->ne[2] == b->ne[2]);
- GGML_ASSERT(b->ne[0] % a->ne[0] == 0);
- GGML_ASSERT(b->ne[1] % a->ne[1] == 0);
- bool is_node = false;
-
- if (a->grad || b->grad) {
- GGML_ASSERT(false); // TODO: implement backward
- is_node = true;
- }
-
- const int64_t ne[4] = { b->ne[0]/a->ne[0], b->ne[1]/a->ne[1], a->ne[3], 1, };
- struct ggml_tensor * result = ggml_new_tensor(ctx, GGML_TYPE_F32, 4, ne);
-
- result->op = GGML_OP_CONV_2D_SK_P0;
- result->grad = is_node ? ggml_dup_tensor(ctx, result) : NULL;
- result->src0 = a;
- result->src1 = b;
-
- return result;
+ struct ggml_tensor * b,
+ int s,
+ int d) {
+ return ggml_conv_1d(ctx, a, b, s, a->ne[0] / 2, d);
}
// ggml_flash_attn
@@ -7603,25 +7752,7 @@ static void ggml_compute_forward_dup_f16(
return;
}
- const int64_t ne00 = src0->ne[0];
- const int64_t ne01 = src0->ne[1];
- const int64_t ne02 = src0->ne[2];
- const int64_t ne03 = src0->ne[3];
-
- const int64_t ne0 = dst->ne[0];
- const int64_t ne1 = dst->ne[1];
- const int64_t ne2 = dst->ne[2];
- const int64_t ne3 = dst->ne[3];
-
- const size_t nb00 = src0->nb[0];
- const size_t nb01 = src0->nb[1];
- const size_t nb02 = src0->nb[2];
- const size_t nb03 = src0->nb[3];
-
- const size_t nb0 = dst->nb[0];
- const size_t nb1 = dst->nb[1];
- const size_t nb2 = dst->nb[2];
- const size_t nb3 = dst->nb[3];
+ GGML_TENSOR_UNARY_OP_LOCALS;
const int ith = params->ith; // thread index
const int nth = params->nth; // number of threads
@@ -7892,25 +8023,7 @@ static void ggml_compute_forward_dup_f32(
return;
}
- const int64_t ne00 = src0->ne[0];
- const int64_t ne01 = src0->ne[1];
- const int64_t ne02 = src0->ne[2];
- const int64_t ne03 = src0->ne[3];
-
- const int64_t ne0 = dst->ne[0];
- const int64_t ne1 = dst->ne[1];
- const int64_t ne2 = dst->ne[2];
- const int64_t ne3 = dst->ne[3];
-
- const size_t nb00 = src0->nb[0];
- const size_t nb01 = src0->nb[1];
- const size_t nb02 = src0->nb[2];
- const size_t nb03 = src0->nb[3];
-
- const size_t nb0 = dst->nb[0];
- const size_t nb1 = dst->nb[1];
- const size_t nb2 = dst->nb[2];
- const size_t nb3 = dst->nb[3];
+ GGML_TENSOR_UNARY_OP_LOCALS;
const int ith = params->ith; // thread index
const int nth = params->nth; // number of threads
@@ -8208,24 +8321,8 @@ static void ggml_compute_forward_add_f32(
const int nth = params->nth;
const int nr = ggml_nrows(src0);
- const int64_t ne0 = src0->ne[0];
- const int64_t ne1 = src0->ne[1];
- const int64_t ne2 = src0->ne[2];
- const size_t nb00 = src0->nb[0];
- const size_t nb01 = src0->nb[1];
- const size_t nb02 = src0->nb[2];
- const size_t nb03 = src0->nb[3];
-
- const size_t nb10 = src1->nb[0];
- const size_t nb11 = src1->nb[1];
- const size_t nb12 = src1->nb[2];
- const size_t nb13 = src1->nb[3];
-
- const size_t nb0 = dst->nb[0];
- const size_t nb1 = dst->nb[1];
- const size_t nb2 = dst->nb[2];
- const size_t nb3 = dst->nb[3];
+ GGML_TENSOR_BINARY_OP_LOCALS;
GGML_ASSERT( nb0 == sizeof(float));
GGML_ASSERT(nb00 == sizeof(float));
@@ -8294,28 +8391,12 @@ static void ggml_compute_forward_add_f16_f32(
const int nth = params->nth;
const int nr = ggml_nrows(src0);
- const int64_t ne0 = src0->ne[0];
- const int64_t ne1 = src0->ne[1];
- const int64_t ne2 = src0->ne[2];
-
- const size_t nb00 = src0->nb[0];
- const size_t nb01 = src0->nb[1];
- const size_t nb02 = src0->nb[2];
- const size_t nb03 = src0->nb[3];
- const size_t nb10 = src1->nb[0];
- const size_t nb11 = src1->nb[1];
- const size_t nb12 = src1->nb[2];
- const size_t nb13 = src1->nb[3];
-
- const size_t nb0 = dst->nb[0];
- const size_t nb1 = dst->nb[1];
- const size_t nb2 = dst->nb[2];
- const size_t nb3 = dst->nb[3];
+ GGML_TENSOR_BINARY_OP_LOCALS;
GGML_ASSERT(src0->type == GGML_TYPE_F16);
GGML_ASSERT(src1->type == GGML_TYPE_F32);
- GGML_ASSERT(dst->type == GGML_TYPE_F16);
+ GGML_ASSERT(dst->type == GGML_TYPE_F16);
GGML_ASSERT( nb0 == sizeof(ggml_fp16_t));
GGML_ASSERT(nb00 == sizeof(ggml_fp16_t));
@@ -8364,24 +8445,8 @@ static void ggml_compute_forward_add_f16_f16(
const int nth = params->nth;
const int nr = ggml_nrows(src0);
- const int64_t ne0 = src0->ne[0];
- const int64_t ne1 = src0->ne[1];
- const int64_t ne2 = src0->ne[2];
- const size_t nb00 = src0->nb[0];
- const size_t nb01 = src0->nb[1];
- const size_t nb02 = src0->nb[2];
- const size_t nb03 = src0->nb[3];
-
- const size_t nb10 = src1->nb[0];
- const size_t nb11 = src1->nb[1];
- const size_t nb12 = src1->nb[2];
- const size_t nb13 = src1->nb[3];
-
- const size_t nb0 = dst->nb[0];
- const size_t nb1 = dst->nb[1];
- const size_t nb2 = dst->nb[2];
- const size_t nb3 = dst->nb[3];
+ GGML_TENSOR_BINARY_OP_LOCALS;
GGML_ASSERT(src0->type == GGML_TYPE_F16);
GGML_ASSERT(src1->type == GGML_TYPE_F16);
@@ -8431,25 +8496,8 @@ static void ggml_compute_forward_add_q_f32(
}
const int nr = ggml_nrows(src0);
- const int64_t ne00 = src0->ne[0];
- const int64_t ne01 = src0->ne[1];
- const int64_t ne02 = src0->ne[2];
- //const int64_t ne03 = src0->ne[3];
-
- const size_t nb00 = src0->nb[0];
- const size_t nb01 = src0->nb[1];
- const size_t nb02 = src0->nb[2];
- const size_t nb03 = src0->nb[3];
- const size_t nb10 = src1->nb[0];
- const size_t nb11 = src1->nb[1];
- const size_t nb12 = src1->nb[2];
- const size_t nb13 = src1->nb[3];
-
- const size_t nb0 = dst->nb[0];
- const size_t nb1 = dst->nb[1];
- const size_t nb2 = dst->nb[2];
- const size_t nb3 = dst->nb[3];
+ GGML_TENSOR_BINARY_OP_LOCALS;
const int ith = params->ith;
const int nth = params->nth;
@@ -8570,19 +8618,8 @@ static void ggml_compute_forward_add1_f32(
const int nth = params->nth;
const int nr = ggml_nrows(src0);
- const int64_t ne0 = src0->ne[0];
- const int64_t ne1 = src0->ne[1];
- const int64_t ne2 = src0->ne[2];
-
- const size_t nb00 = src0->nb[0];
- const size_t nb01 = src0->nb[1];
- const size_t nb02 = src0->nb[2];
- const size_t nb03 = src0->nb[3];
- const size_t nb0 = dst->nb[0];
- const size_t nb1 = dst->nb[1];
- const size_t nb2 = dst->nb[2];
- const size_t nb3 = dst->nb[3];
+ GGML_TENSOR_UNARY_OP_LOCALS;
GGML_ASSERT( nb0 == sizeof(float));
GGML_ASSERT(nb00 == sizeof(float));
@@ -8636,23 +8673,12 @@ static void ggml_compute_forward_add1_f16_f32(
const int nth = params->nth;
const int nr = ggml_nrows(src0);
- const int64_t ne0 = src0->ne[0];
- const int64_t ne1 = src0->ne[1];
- const int64_t ne2 = src0->ne[2];
- const size_t nb00 = src0->nb[0];
- const size_t nb01 = src0->nb[1];
- const size_t nb02 = src0->nb[2];
- const size_t nb03 = src0->nb[3];
-
- const size_t nb0 = dst->nb[0];
- const size_t nb1 = dst->nb[1];
- const size_t nb2 = dst->nb[2];
- const size_t nb3 = dst->nb[3];
+ GGML_TENSOR_UNARY_OP_LOCALS;
GGML_ASSERT(src0->type == GGML_TYPE_F16);
GGML_ASSERT(src1->type == GGML_TYPE_F32);
- GGML_ASSERT(dst->type == GGML_TYPE_F16);
+ GGML_ASSERT(dst->type == GGML_TYPE_F16);
GGML_ASSERT( nb0 == sizeof(ggml_fp16_t));
GGML_ASSERT(nb00 == sizeof(ggml_fp16_t));
@@ -8697,23 +8723,12 @@ static void ggml_compute_forward_add1_f16_f16(
const int nth = params->nth;
const int nr = ggml_nrows(src0);
- const int64_t ne0 = src0->ne[0];
- const int64_t ne1 = src0->ne[1];
- const int64_t ne2 = src0->ne[2];
-
- const size_t nb00 = src0->nb[0];
- const size_t nb01 = src0->nb[1];
- const size_t nb02 = src0->nb[2];
- const size_t nb03 = src0->nb[3];
- const size_t nb0 = dst->nb[0];
- const size_t nb1 = dst->nb[1];
- const size_t nb2 = dst->nb[2];
- const size_t nb3 = dst->nb[3];
+ GGML_TENSOR_UNARY_OP_LOCALS;
GGML_ASSERT(src0->type == GGML_TYPE_F16);
GGML_ASSERT(src1->type == GGML_TYPE_F16);
- GGML_ASSERT(dst->type == GGML_TYPE_F16);
+ GGML_ASSERT(dst->type == GGML_TYPE_F16);
GGML_ASSERT( nb0 == sizeof(ggml_fp16_t));
GGML_ASSERT(nb00 == sizeof(ggml_fp16_t));
@@ -8758,19 +8773,8 @@ static void ggml_compute_forward_add1_q_f32(
const int nth = params->nth;
const int nr = ggml_nrows(src0);
- const int64_t ne0 = src0->ne[0];
- const int64_t ne1 = src0->ne[1];
- const int64_t ne2 = src0->ne[2];
-
- const size_t nb00 = src0->nb[0];
- const size_t nb01 = src0->nb[1];
- const size_t nb02 = src0->nb[2];
- const size_t nb03 = src0->nb[3];
- const size_t nb0 = dst->nb[0];
- const size_t nb1 = dst->nb[1];
- const size_t nb2 = dst->nb[2];
- const size_t nb3 = dst->nb[3];
+ GGML_TENSOR_UNARY_OP_LOCALS;
const enum ggml_type type = src0->type;
dequantize_row_q_t const dequantize_row_q = quantize_fns[type].dequantize_row_q;
@@ -8902,15 +8906,8 @@ static void ggml_compute_forward_acc_f32(
const int nr = ggml_nrows(src1);
const int nc = src1->ne[0];
- const int64_t ne10 = src1->ne[0];
- const int64_t ne11 = src1->ne[1];
- const int64_t ne12 = src1->ne[2];
- const int64_t ne13 = src1->ne[3];
-
- const size_t nb10 = src1->nb[0];
- const size_t nb11 = src1->nb[1];
- const size_t nb12 = src1->nb[2];
- const size_t nb13 = src1->nb[3];
+ GGML_TENSOR_LOCALS(int64_t, ne1, src1, ne);
+ GGML_TENSOR_LOCALS(size_t, nb1, src1, nb);
// src0 and dst as viewed during acc
const size_t nb0 = ggml_element_size(src0);
@@ -8999,24 +8996,8 @@ static void ggml_compute_forward_sub_f32(
}
const int nr = ggml_nrows(src0);
- const int64_t ne0 = src0->ne[0];
- const int64_t ne1 = src0->ne[1];
- const int64_t ne2 = src0->ne[2];
-
- const size_t nb00 = src0->nb[0];
- const size_t nb01 = src0->nb[1];
- const size_t nb02 = src0->nb[2];
- const size_t nb03 = src0->nb[3];
- const size_t nb10 = src1->nb[0];
- const size_t nb11 = src1->nb[1];
- const size_t nb12 = src1->nb[2];
- const size_t nb13 = src1->nb[3];
-
- const size_t nb0 = dst->nb[0];
- const size_t nb1 = dst->nb[1];
- const size_t nb2 = dst->nb[2];
- const size_t nb3 = dst->nb[3];
+ GGML_TENSOR_BINARY_OP_LOCALS;
GGML_ASSERT( nb0 == sizeof(float));
GGML_ASSERT(nb00 == sizeof(float));
@@ -9106,29 +9087,7 @@ static void ggml_compute_forward_mul_f32(
const int64_t nr = ggml_nrows(src0);
- const int64_t ne00 = src0->ne[0];
- const int64_t ne01 = src0->ne[1];
- const int64_t ne02 = src0->ne[2];
-
- const int64_t ne10 = src1->ne[0];
- const int64_t ne11 = src1->ne[1];
- const int64_t ne12 = src1->ne[2];
- const int64_t ne13 = src1->ne[3];
-
- const size_t nb00 = src0->nb[0];
- const size_t nb01 = src0->nb[1];
- const size_t nb02 = src0->nb[2];
- const size_t nb03 = src0->nb[3];
-
- const size_t nb10 = src1->nb[0];
- const size_t nb11 = src1->nb[1];
- const size_t nb12 = src1->nb[2];
- const size_t nb13 = src1->nb[3];
-
- const size_t nb0 = dst->nb[0];
- const size_t nb1 = dst->nb[1];
- const size_t nb2 = dst->nb[2];
- const size_t nb3 = dst->nb[3];
+ GGML_TENSOR_BINARY_OP_LOCALS;
GGML_ASSERT( nb0 == sizeof(float));
GGML_ASSERT(nb00 == sizeof(float));
@@ -9216,24 +9175,8 @@ static void ggml_compute_forward_div_f32(
}
const int nr = ggml_nrows(src0);
- const int64_t ne0 = src0->ne[0];
- const int64_t ne1 = src0->ne[1];
- const int64_t ne2 = src0->ne[2];
-
- const size_t nb00 = src0->nb[0];
- const size_t nb01 = src0->nb[1];
- const size_t nb02 = src0->nb[2];
- const size_t nb03 = src0->nb[3];
- const size_t nb10 = src1->nb[0];
- const size_t nb11 = src1->nb[1];
- const size_t nb12 = src1->nb[2];
- const size_t nb13 = src1->nb[3];
-
- const size_t nb0 = dst->nb[0];
- const size_t nb1 = dst->nb[1];
- const size_t nb2 = dst->nb[2];
- const size_t nb3 = dst->nb[3];
+ GGML_TENSOR_BINARY_OP_LOCALS;
GGML_ASSERT( nb0 == sizeof(float));
GGML_ASSERT(nb00 == sizeof(float));
@@ -9440,14 +9383,8 @@ static void ggml_compute_forward_sum_f32(
assert(ggml_is_scalar(dst));
assert(src0->nb[0] == sizeof(float));
- const int64_t ne00 = src0->ne[0];
- const int64_t ne01 = src0->ne[1];
- const int64_t ne02 = src0->ne[2];
- const int64_t ne03 = src0->ne[3];
-
- const size_t nb01 = src0->nb[1];
- const size_t nb02 = src0->nb[2];
- const size_t nb03 = src0->nb[3];
+ GGML_TENSOR_LOCALS(int64_t, ne0, src0, ne);
+ GGML_TENSOR_LOCALS(size_t, nb0, src0, nb);
ggml_float sum = 0;
ggml_float row_sum = 0;
@@ -9496,29 +9433,13 @@ static void ggml_compute_forward_sum_rows_f32(
GGML_ASSERT(src0->nb[0] == sizeof(float));
GGML_ASSERT(dst->nb[0] == sizeof(float));
- const int64_t ne00 = src0->ne[0];
- const int64_t ne01 = src0->ne[1];
- const int64_t ne02 = src0->ne[2];
- const int64_t ne03 = src0->ne[3];
-
- const int64_t ne0 = dst->ne[0];
- const int64_t ne1 = dst->ne[1];
- const int64_t ne2 = dst->ne[2];
- const int64_t ne3 = dst->ne[3];
+ GGML_TENSOR_UNARY_OP_LOCALS;
GGML_ASSERT(ne0 == 1);
GGML_ASSERT(ne1 == ne01);
GGML_ASSERT(ne2 == ne02);
GGML_ASSERT(ne3 == ne03);
- const size_t nb01 = src0->nb[1];
- const size_t nb02 = src0->nb[2];
- const size_t nb03 = src0->nb[3];
-
- const size_t nb1 = dst->nb[1];
- const size_t nb2 = dst->nb[2];
- const size_t nb3 = dst->nb[3];
-
for (int64_t i3 = 0; i3 < ne03; i3++) {
for (int64_t i2 = 0; i2 < ne02; i2++) {
for (int64_t i1 = 0; i1 < ne01; i1++) {
@@ -9562,19 +9483,7 @@ static void ggml_compute_forward_mean_f32(
assert(src0->nb[0] == sizeof(float));
- const int64_t ne00 = src0->ne[0];
- const int64_t ne01 = src0->ne[1];
- const int64_t ne02 = src0->ne[2];
- const int64_t ne03 = src0->ne[3];
-
- const size_t nb01 = src0->nb[1];
- const size_t nb02 = src0->nb[2];
- const size_t nb03 = src0->nb[3];
-
- const int64_t ne0 = dst->ne[0];
- const int64_t ne1 = dst->ne[1];
- const int64_t ne2 = dst->ne[2];
- const int64_t ne3 = dst->ne[3];
+ GGML_TENSOR_UNARY_OP_LOCALS;
assert(ne0 == 1);
assert(ne1 == ne01);
@@ -9586,10 +9495,6 @@ static void ggml_compute_forward_mean_f32(
UNUSED(ne2);
UNUSED(ne3);
- const size_t nb1 = dst->nb[1];
- const size_t nb2 = dst->nb[2];
- const size_t nb3 = dst->nb[3];
-
for (int64_t i03 = 0; i03 < ne03; i03++) {
for (int64_t i02 = 0; i02 < ne02; i02++) {
for (int64_t i01 = 0; i01 < ne01; i01++) {
@@ -9619,38 +9524,66 @@ static void ggml_compute_forward_mean(
}
}
-// ggml_compute_forward_repeat
+// ggml_compute_forward_argmax
-static void ggml_compute_forward_repeat_f32(
+static void ggml_compute_forward_argmax_f32(
const struct ggml_compute_params * params,
const struct ggml_tensor * src0,
struct ggml_tensor * dst) {
- GGML_ASSERT(params->ith == 0);
- GGML_ASSERT(ggml_can_repeat(src0, dst));
+ assert(params->ith == 0);
if (params->type == GGML_TASK_INIT || params->type == GGML_TASK_FINALIZE) {
return;
}
- const int64_t ne0 = dst->ne[0];
- const int64_t ne1 = dst->ne[1];
- const int64_t ne2 = dst->ne[2];
- const int64_t ne3 = dst->ne[3];
+ assert(src0->nb[0] == sizeof(float));
+ assert(dst->nb[0] == sizeof(float));
const int64_t ne00 = src0->ne[0];
const int64_t ne01 = src0->ne[1];
- const int64_t ne02 = src0->ne[2];
- const int64_t ne03 = src0->ne[3];
-
- const size_t nb0 = dst->nb[0];
- const size_t nb1 = dst->nb[1];
- const size_t nb2 = dst->nb[2];
- const size_t nb3 = dst->nb[3];
- const size_t nb00 = src0->nb[0];
const size_t nb01 = src0->nb[1];
- const size_t nb02 = src0->nb[2];
- const size_t nb03 = src0->nb[3];
+ const size_t nb0 = dst->nb[0];
+
+ for (int64_t i1 = 0; i1 < ne01; i1++) {
+ float * src = (float *) ((char *) src0->data + i1*nb01);
+ int32_t * dst_ = (int32_t *) ((char *) dst->data + i1*nb0);
+ int v = 0;
+ ggml_vec_argmax_f32(ne00, &v, src);
+ dst_[0] = v;
+ }
+}
+
+static void ggml_compute_forward_argmax(
+ const struct ggml_compute_params * params,
+ const struct ggml_tensor * src0,
+ struct ggml_tensor * dst) {
+ switch (src0->type) {
+ case GGML_TYPE_F32:
+ {
+ ggml_compute_forward_argmax_f32(params, src0, dst);
+ } break;
+ default:
+ {
+ GGML_ASSERT(false);
+ } break;
+ }
+}
+
+// ggml_compute_forward_repeat
+
+static void ggml_compute_forward_repeat_f32(
+ const struct ggml_compute_params * params,
+ const struct ggml_tensor * src0,
+ struct ggml_tensor * dst) {
+ GGML_ASSERT(params->ith == 0);
+ GGML_ASSERT(ggml_can_repeat(src0, dst));
+
+ if (params->type == GGML_TASK_INIT || params->type == GGML_TASK_FINALIZE) {
+ return;
+ }
+
+ GGML_TENSOR_UNARY_OP_LOCALS;
// guaranteed to be an integer due to the check in ggml_can_repeat
const int nr0 = (int)(ne0/ne00);
@@ -9711,25 +9644,7 @@ static void ggml_compute_forward_repeat_back_f32(
return;
}
- const int64_t ne0 = dst->ne[0];
- const int64_t ne1 = dst->ne[1];
- const int64_t ne2 = dst->ne[2];
- const int64_t ne3 = dst->ne[3];
-
- const int64_t ne00 = src0->ne[0];
- const int64_t ne01 = src0->ne[1];
- const int64_t ne02 = src0->ne[2];
- const int64_t ne03 = src0->ne[3];
-
- const size_t nb0 = dst->nb[0];
- const size_t nb1 = dst->nb[1];
- const size_t nb2 = dst->nb[2];
- const size_t nb3 = dst->nb[3];
-
- const size_t nb00 = src0->nb[0];
- const size_t nb01 = src0->nb[1];
- const size_t nb02 = src0->nb[2];
- const size_t nb03 = src0->nb[3];
+ GGML_TENSOR_UNARY_OP_LOCALS;
// guaranteed to be an integer due to the check in ggml_can_repeat
const int nr0 = (int)(ne00/ne0);
@@ -9959,6 +9874,90 @@ static void ggml_compute_forward_step(
}
}
+// ggml_compute_forward_tanh
+
+static void ggml_compute_forward_tanh_f32(
+ const struct ggml_compute_params * params,
+ const struct ggml_tensor * src0,
+ struct ggml_tensor * dst) {
+ assert(params->ith == 0);
+ assert(ggml_are_same_shape(src0, dst));
+
+ if (params->type == GGML_TASK_INIT || params->type == GGML_TASK_FINALIZE) {
+ return;
+ }
+
+ const int n = ggml_nrows(src0);
+ const int nc = src0->ne[0];
+
+ assert(dst->nb[0] == sizeof(float));
+ assert(src0->nb[0] == sizeof(float));
+
+ for (int i = 0; i < n; i++) {
+ ggml_vec_tanh_f32(nc,
+ (float *) ((char *) dst->data + i*( dst->nb[1])),
+ (float *) ((char *) src0->data + i*(src0->nb[1])));
+ }
+}
+
+static void ggml_compute_forward_tanh(
+ const struct ggml_compute_params * params,
+ const struct ggml_tensor * src0,
+ struct ggml_tensor * dst) {
+ switch (src0->type) {
+ case GGML_TYPE_F32:
+ {
+ ggml_compute_forward_tanh_f32(params, src0, dst);
+ } break;
+ default:
+ {
+ GGML_ASSERT(false);
+ } break;
+ }
+}
+
+// ggml_compute_forward_elu
+
+static void ggml_compute_forward_elu_f32(
+ const struct ggml_compute_params * params,
+ const struct ggml_tensor * src0,
+ struct ggml_tensor * dst) {
+ assert(params->ith == 0);
+ assert(ggml_are_same_shape(src0, dst));
+
+ if (params->type == GGML_TASK_INIT || params->type == GGML_TASK_FINALIZE) {
+ return;
+ }
+
+ const int n = ggml_nrows(src0);
+ const int nc = src0->ne[0];
+
+ assert(dst->nb[0] == sizeof(float));
+ assert(src0->nb[0] == sizeof(float));
+
+ for (int i = 0; i < n; i++) {
+ ggml_vec_elu_f32(nc,
+ (float *) ((char *) dst->data + i*( dst->nb[1])),
+ (float *) ((char *) src0->data + i*(src0->nb[1])));
+ }
+}
+
+static void ggml_compute_forward_elu(
+ const struct ggml_compute_params * params,
+ const struct ggml_tensor * src0,
+ struct ggml_tensor * dst) {
+ switch (src0->type) {
+ case GGML_TYPE_F32:
+ {
+ ggml_compute_forward_elu_f32(params, src0, dst);
+ } break;
+ default:
+ {
+ GGML_ASSERT(false);
+ } break;
+ }
+}
+
// ggml_compute_forward_relu
static void ggml_compute_forward_relu_f32(
@@ -10260,18 +10259,7 @@ static void ggml_compute_forward_norm_f32(
const int ith = params->ith;
const int nth = params->nth;
- const int64_t ne00 = src0->ne[0];
- const int64_t ne01 = src0->ne[1];
- const int64_t ne02 = src0->ne[2];
- const int64_t ne03 = src0->ne[3];
-
- const size_t nb01 = src0->nb[1];
- const size_t nb02 = src0->nb[2];
- const size_t nb03 = src0->nb[3];
-
- const size_t nb1 = dst->nb[1];
- const size_t nb2 = dst->nb[2];
- const size_t nb3 = dst->nb[3];
+ GGML_TENSOR_UNARY_OP_LOCALS;
const float eps = 1e-5f; // TODO: make this a parameter
@@ -10337,18 +10325,7 @@ static void ggml_compute_forward_rms_norm_f32(
const int ith = params->ith;
const int nth = params->nth;
- const int64_t ne00 = src0->ne[0];
- const int64_t ne01 = src0->ne[1];
- const int64_t ne02 = src0->ne[2];
- const int64_t ne03 = src0->ne[3];
-
- const size_t nb01 = src0->nb[1];
- const size_t nb02 = src0->nb[2];
- const size_t nb03 = src0->nb[3];
-
- const size_t nb1 = dst->nb[1];
- const size_t nb2 = dst->nb[2];
- const size_t nb3 = dst->nb[3];
+ GGML_TENSOR_UNARY_OP_LOCALS;
const float eps = 1e-6f; // TODO: make this a parameter
@@ -10413,22 +10390,7 @@ static void ggml_compute_forward_rms_norm_back_f32(
const int ith = params->ith;
const int nth = params->nth;
- const int64_t ne00 = src0->ne[0];
- const int64_t ne01 = src0->ne[1];
- const int64_t ne02 = src0->ne[2];
- const int64_t ne03 = src0->ne[3];
-
- const size_t nb01 = src0->nb[1];
- const size_t nb02 = src0->nb[2];
- const size_t nb03 = src0->nb[3];
-
- const size_t nb11 = src1->nb[1];
- const size_t nb12 = src1->nb[2];
- const size_t nb13 = src1->nb[3];
-
- const size_t nb1 = dst->nb[1];
- const size_t nb2 = dst->nb[2];
- const size_t nb3 = dst->nb[3];
+ GGML_TENSOR_BINARY_OP_LOCALS;
const float eps = 1e-6f; // TODO: make this a parameter
@@ -10624,41 +10586,7 @@ static void ggml_compute_forward_mul_mat_f32(
int64_t t0 = ggml_perf_time_us();
UNUSED(t0);
- const int64_t ne00 = src0->ne[0];
- const int64_t ne01 = src0->ne[1];
- const int64_t ne02 = src0->ne[2];
- const int64_t ne03 = src0->ne[3];
-
-#if defined(GGML_USE_ACCELERATE) || defined(GGML_USE_OPENBLAS)
- const int64_t ne10 = src1->ne[0];
-#endif
- const int64_t ne11 = src1->ne[1];
-#ifndef NDEBUG
- const int64_t ne12 = src1->ne[2];
- const int64_t ne13 = src1->ne[3];
-
- const int64_t ne0 = dst->ne[0];
- const int64_t ne1 = dst->ne[1];
- const int64_t ne2 = dst->ne[2];
- const int64_t ne3 = dst->ne[3];
-
- const int nb00 = src0->nb[0];
-#endif
- const int nb01 = src0->nb[1];
- const int nb02 = src0->nb[2];
- const int nb03 = src0->nb[3];
-
-#ifndef NDEBUG
- const int nb10 = src1->nb[0];
-#endif
- const int nb11 = src1->nb[1];
- const int nb12 = src1->nb[2];
- const int nb13 = src1->nb[3];
-
- const int nb0 = dst->nb[0];
- const int nb1 = dst->nb[1];
- const int nb2 = dst->nb[2];
- const int nb3 = dst->nb[3];
+ GGML_TENSOR_BINARY_OP_LOCALS;
const int ith = params->ith;
const int nth = params->nth;
@@ -10795,37 +10723,10 @@ static void ggml_compute_forward_mul_mat_f16_f32(
int64_t t0 = ggml_perf_time_us();
UNUSED(t0);
- const int64_t ne00 = src0->ne[0];
- const int64_t ne01 = src0->ne[1];
- const int64_t ne02 = src0->ne[2];
- const int64_t ne03 = src0->ne[3];
+ GGML_TENSOR_BINARY_OP_LOCALS;
- const int64_t ne10 = src1->ne[0];
- const int64_t ne11 = src1->ne[1];
- const int64_t ne12 = src1->ne[2];
- const int64_t ne13 = src1->ne[3];
-
- const int64_t ne0 = dst->ne[0];
- const int64_t ne1 = dst->ne[1];
- const int64_t ne2 = dst->ne[2];
- const int64_t ne3 = dst->ne[3];
//const int64_t ne = ne0*ne1*ne2*ne3;
- const int nb00 = src0->nb[0];
- const int nb01 = src0->nb[1];
- const int nb02 = src0->nb[2];
- const int nb03 = src0->nb[3];
-
- const int nb10 = src1->nb[0];
- const int nb11 = src1->nb[1];
- const int nb12 = src1->nb[2];
- const int nb13 = src1->nb[3];
-
- const int nb0 = dst->nb[0];
- const int nb1 = dst->nb[1];
- const int nb2 = dst->nb[2];
- const int nb3 = dst->nb[3];
-
const int ith = params->ith;
const int nth = params->nth;
@@ -10995,35 +10896,7 @@ static void ggml_compute_forward_mul_mat_q_f32(
int64_t t0 = ggml_perf_time_us();
UNUSED(t0);
- const int64_t ne00 = src0->ne[0];
- const int64_t ne01 = src0->ne[1];
- const int64_t ne02 = src0->ne[2];
- const int64_t ne03 = src0->ne[3];
-
- const int64_t ne10 = src1->ne[0];
- const int64_t ne11 = src1->ne[1];
- const int64_t ne12 = src1->ne[2];
- const int64_t ne13 = src1->ne[3];
-
- const int64_t ne0 = dst->ne[0];
- const int64_t ne1 = dst->ne[1];
- const int64_t ne2 = dst->ne[2];
- const int64_t ne3 = dst->ne[3];
-
- const int nb00 = src0->nb[0];
- const int nb01 = src0->nb[1];
- const int nb02 = src0->nb[2];
- const int nb03 = src0->nb[3];
-
- const int nb10 = src1->nb[0];
- const int nb11 = src1->nb[1];
- const int nb12 = src1->nb[2];
- const int nb13 = src1->nb[3];
-
- const int nb0 = dst->nb[0];
- const int nb1 = dst->nb[1];
- const int nb2 = dst->nb[2];
- const int nb3 = dst->nb[3];
+ GGML_TENSOR_BINARY_OP_LOCALS;
const int ith = params->ith;
const int nth = params->nth;
@@ -11039,7 +10912,7 @@ static void ggml_compute_forward_mul_mat_q_f32(
enum ggml_type const vec_dot_type = quantize_fns[type].vec_dot_type;
// we don't support permuted src0 or src1
- GGML_ASSERT(nb00 == (int) GGML_TYPE_SIZE[type]);
+ GGML_ASSERT(nb00 == GGML_TYPE_SIZE[type]);
GGML_ASSERT(nb10 == sizeof(float));
// dst cannot be transposed or permuted
@@ -11233,35 +11106,7 @@ static void ggml_compute_forward_out_prod_f32(
int64_t t0 = ggml_perf_time_us();
UNUSED(t0);
- const int64_t ne00 = src0->ne[0];
- const int64_t ne01 = src0->ne[1];
- const int64_t ne02 = src0->ne[2];
- const int64_t ne03 = src0->ne[3];
-
- const int64_t ne10 = src1->ne[0];
- //const int64_t ne11 = src1->ne[1];
- const int64_t ne12 = src1->ne[2];
- const int64_t ne13 = src1->ne[3];
-
- const int64_t ne0 = dst->ne[0];
- const int64_t ne1 = dst->ne[1];
- const int64_t ne2 = dst->ne[2];
- const int64_t ne3 = dst->ne[3];
-
- const int nb00 = src0->nb[0];
- const int nb01 = src0->nb[1];
- const int nb02 = src0->nb[2];
- const int nb03 = src0->nb[3];
-
- const int nb10 = src1->nb[0];
- const int nb11 = src1->nb[1];
- const int nb12 = src1->nb[2];
- const int nb13 = src1->nb[3];
-
- const int nb0 = dst->nb[0];
- const int nb1 = dst->nb[1];
- const int nb2 = dst->nb[2];
- const int nb3 = dst->nb[3];
+ GGML_TENSOR_BINARY_OP_LOCALS;
const int ith = params->ith;
const int nth = params->nth;
@@ -11496,15 +11341,8 @@ static void ggml_compute_forward_set_f32(
const int nr = ggml_nrows(src1);
const int nc = src1->ne[0];
- const int64_t ne10 = src1->ne[0];
- const int64_t ne11 = src1->ne[1];
- const int64_t ne12 = src1->ne[2];
- const int64_t ne13 = src1->ne[3];
-
- const size_t nb10 = src1->nb[0];
- const size_t nb11 = src1->nb[1];
- const size_t nb12 = src1->nb[2];
- const size_t nb13 = src1->nb[3];
+ GGML_TENSOR_LOCALS(int64_t, ne1, src1, ne);
+ GGML_TENSOR_LOCALS(size_t, nb1, src1, nb);
// src0 and dst as viewed during set
const size_t nb0 = ggml_element_size(src0);
@@ -11895,29 +11733,14 @@ static void ggml_compute_forward_diag_f32(
// TODO: handle transposed/permuted matrices
- const int ne00 = src0->ne[0];
- const int ne01 = src0->ne[1];
- const int ne02 = src0->ne[2];
- const int ne03 = src0->ne[3];
- const int ne0 = dst->ne[0];
- const int ne1 = dst->ne[1];
- const int ne2 = dst->ne[2];
- const int ne3 = dst->ne[3];
+ GGML_TENSOR_UNARY_OP_LOCALS;
+
GGML_ASSERT(ne00 == ne0);
GGML_ASSERT(ne00 == ne1);
GGML_ASSERT(ne01 == 1);
GGML_ASSERT(ne02 == ne2);
GGML_ASSERT(ne03 == ne3);
- const int nb00 = src0->nb[0];
- //const int nb01 = src0->nb[1];
- const int nb02 = src0->nb[2];
- const int nb03 = src0->nb[3];
- const int nb0 = dst->nb[0];
- const int nb1 = dst->nb[1];
- const int nb2 = dst->nb[2];
- const int nb3 = dst->nb[3];
-
GGML_ASSERT(nb00 == sizeof(float));
GGML_ASSERT(nb0 == sizeof(float));
@@ -12494,20 +12317,7 @@ static void ggml_compute_forward_rope_f32(
assert(n_past >= 0);
- const size_t nb00 = src0->nb[0];
- const size_t nb01 = src0->nb[1];
- const size_t nb02 = src0->nb[2];
- const size_t nb03 = src0->nb[3];
-
- const int64_t ne0 = dst->ne[0];
- const int64_t ne1 = dst->ne[1];
- const int64_t ne2 = dst->ne[2];
- const int64_t ne3 = dst->ne[3];
-
- const size_t nb0 = dst->nb[0];
- const size_t nb1 = dst->nb[1];
- const size_t nb2 = dst->nb[2];
- const size_t nb3 = dst->nb[3];
+ GGML_TENSOR_UNARY_OP_LOCALS;
//printf("ne0: %d, ne1: %d, ne2: %d, ne3: %d\n", ne0, ne1, ne2, ne3);
//printf("n_past = %d, ne2 = %d\n", n_past, ne2);
@@ -12634,20 +12444,7 @@ static void ggml_compute_forward_rope_f16(
assert(n_past >= 0);
- const size_t nb00 = src0->nb[0];
- const size_t nb01 = src0->nb[1];
- const size_t nb02 = src0->nb[2];
- const size_t nb03 = src0->nb[3];
-
- const int64_t ne0 = dst->ne[0];
- const int64_t ne1 = dst->ne[1];
- const int64_t ne2 = dst->ne[2];
- const int64_t ne3 = dst->ne[3];
-
- const size_t nb0 = dst->nb[0];
- const size_t nb1 = dst->nb[1];
- const size_t nb2 = dst->nb[2];
- const size_t nb3 = dst->nb[3];
+ GGML_TENSOR_UNARY_OP_LOCALS;
//printf("ne0: %d, ne1: %d, ne2: %d, ne3: %d\n", ne0, ne1, ne2, ne3);
//printf("n_past = %d, ne2 = %d\n", n_past, ne2);
@@ -12800,21 +12597,7 @@ static void ggml_compute_forward_rope_back_f32(
assert(n_past >= 0);
- const size_t nb00 = src0->nb[0];
- const size_t nb01 = src0->nb[1];
- const size_t nb02 = src0->nb[2];
- const size_t nb03 = src0->nb[3];
-
- const int64_t ne0 = dst->ne[0];
- const int64_t ne1 = dst->ne[1];
- const int64_t ne2 = dst->ne[2];
- const int64_t ne3 = dst->ne[3];
-
- const size_t nb0 = dst->nb[0];
- const size_t nb1 = dst->nb[1];
- const size_t nb2 = dst->nb[2];
- const size_t nb3 = dst->nb[3];
-
+ GGML_TENSOR_UNARY_OP_LOCALS;
//printf("ne0: %d, ne1: %d, ne2: %d, ne3: %d\n", ne0, ne1, ne2, ne3);
//printf("n_past = %d, ne2 = %d\n", n_past, ne2);
@@ -12913,21 +12696,7 @@ static void ggml_compute_forward_rope_back_f16(
assert(n_past >= 0);
- const size_t nb00 = src0->nb[0];
- const size_t nb01 = src0->nb[1];
- const size_t nb02 = src0->nb[2];
- const size_t nb03 = src0->nb[3];
-
- const int64_t ne0 = dst->ne[0];
- const int64_t ne1 = dst->ne[1];
- const int64_t ne2 = dst->ne[2];
- const int64_t ne3 = dst->ne[3];
-
- const size_t nb0 = dst->nb[0];
- const size_t nb1 = dst->nb[1];
- const size_t nb2 = dst->nb[2];
- const size_t nb3 = dst->nb[3];
-
+ GGML_TENSOR_UNARY_OP_LOCALS;
//printf("ne0: %d, ne1: %d, ne2: %d, ne3: %d\n", ne0, ne1, ne2, ne3);
//printf("n_past = %d, ne2 = %d\n", n_past, ne2);
@@ -13025,7 +12794,7 @@ static void ggml_compute_forward_rope_back(
}
}
-// ggml_compute_forward_conv_1d_s1_ph
+// ggml_compute_forward_conv_1d
static void ggml_compute_forward_conv_1d_s1_ph_f16_f32(
const struct ggml_compute_params * params,
@@ -13039,36 +12808,7 @@ static void ggml_compute_forward_conv_1d_s1_ph_f16_f32(
int64_t t0 = ggml_perf_time_us();
UNUSED(t0);
- const int64_t ne00 = src0->ne[0];
- const int64_t ne01 = src0->ne[1];
- const int64_t ne02 = src0->ne[2];
- //const int64_t ne03 = src0->ne[3];
-
- const int64_t ne10 = src1->ne[0];
- const int64_t ne11 = src1->ne[1];
- //const int64_t ne12 = src1->ne[2];
- //const int64_t ne13 = src1->ne[3];
-
- //const int64_t ne0 = dst->ne[0];
- //const int64_t ne1 = dst->ne[1];
- //const int64_t ne2 = dst->ne[2];
- //const int64_t ne3 = dst->ne[3];
- //const int64_t ne = ne0*ne1*ne2*ne3;
-
- const int nb00 = src0->nb[0];
- const int nb01 = src0->nb[1];
- const int nb02 = src0->nb[2];
- //const int nb03 = src0->nb[3];
-
- const int nb10 = src1->nb[0];
- const int nb11 = src1->nb[1];
- //const int nb12 = src1->nb[2];
- //const int nb13 = src1->nb[3];
-
- //const int nb0 = dst->nb[0];
- const int nb1 = dst->nb[1];
- //const int nb2 = dst->nb[2];
- //const int nb3 = dst->nb[3];
+ GGML_TENSOR_BINARY_OP_LOCALS;
const int ith = params->ith;
const int nth = params->nth;
@@ -13159,36 +12899,7 @@ static void ggml_compute_forward_conv_1d_s1_ph_f32(
int64_t t0 = ggml_perf_time_us();
UNUSED(t0);
- const int64_t ne00 = src0->ne[0];
- const int64_t ne01 = src0->ne[1];
- const int64_t ne02 = src0->ne[2];
- //const int64_t ne03 = src0->ne[3];
-
- const int64_t ne10 = src1->ne[0];
- const int64_t ne11 = src1->ne[1];
- //const int64_t ne12 = src1->ne[2];
- //const int64_t ne13 = src1->ne[3];
-
- //const int64_t ne0 = dst->ne[0];
- //const int64_t ne1 = dst->ne[1];
- //const int64_t ne2 = dst->ne[2];
- //const int64_t ne3 = dst->ne[3];
- //const int64_t ne = ne0*ne1*ne2*ne3;
-
- const int nb00 = src0->nb[0];
- const int nb01 = src0->nb[1];
- const int nb02 = src0->nb[2];
- //const int nb03 = src0->nb[3];
-
- const int nb10 = src1->nb[0];
- const int nb11 = src1->nb[1];
- //const int nb12 = src1->nb[2];
- //const int nb13 = src1->nb[3];
-
- //const int nb0 = dst->nb[0];
- const int nb1 = dst->nb[1];
- //const int nb2 = dst->nb[2];
- //const int nb3 = dst->nb[3];
+ GGML_TENSOR_BINARY_OP_LOCALS;
const int ith = params->ith;
const int nth = params->nth;
@@ -13288,8 +12999,6 @@ static void ggml_compute_forward_conv_1d_s1_ph(
}
}
-// ggml_compute_forward_conv_1d_s2_ph
-
static void ggml_compute_forward_conv_1d_s2_ph_f16_f32(
const struct ggml_compute_params * params,
const struct ggml_tensor * src0,
@@ -13302,36 +13011,7 @@ static void ggml_compute_forward_conv_1d_s2_ph_f16_f32(
int64_t t0 = ggml_perf_time_us();
UNUSED(t0);
- const int64_t ne00 = src0->ne[0];
- const int64_t ne01 = src0->ne[1];
- const int64_t ne02 = src0->ne[2];
- //const int64_t ne03 = src0->ne[3];
-
- const int64_t ne10 = src1->ne[0];
- const int64_t ne11 = src1->ne[1];
- //const int64_t ne12 = src1->ne[2];
- //const int64_t ne13 = src1->ne[3];
-
- //const int64_t ne0 = dst->ne[0];
- //const int64_t ne1 = dst->ne[1];
- //const int64_t ne2 = dst->ne[2];
- //const int64_t ne3 = dst->ne[3];
- //const int64_t ne = ne0*ne1*ne2*ne3;
-
- const int nb00 = src0->nb[0];
- const int nb01 = src0->nb[1];
- const int nb02 = src0->nb[2];
- //const int nb03 = src0->nb[3];
-
- const int nb10 = src1->nb[0];
- const int nb11 = src1->nb[1];
- //const int nb12 = src1->nb[2];
- //const int nb13 = src1->nb[3];
-
- //const int nb0 = dst->nb[0];
- const int nb1 = dst->nb[1];
- //const int nb2 = dst->nb[2];
- //const int nb3 = dst->nb[3];
+ GGML_TENSOR_BINARY_OP_LOCALS;
const int ith = params->ith;
const int nth = params->nth;
@@ -13422,36 +13102,7 @@ static void ggml_compute_forward_conv_1d_s2_ph_f32(
int64_t t0 = ggml_perf_time_us();
UNUSED(t0);
- const int64_t ne00 = src0->ne[0];
- const int64_t ne01 = src0->ne[1];
- const int64_t ne02 = src0->ne[2];
- //const int64_t ne03 = src0->ne[3];
-
- const int64_t ne10 = src1->ne[0];
- const int64_t ne11 = src1->ne[1];
- //const int64_t ne12 = src1->ne[2];
- //const int64_t ne13 = src1->ne[3];
-
- //const int64_t ne0 = dst->ne[0];
- //const int64_t ne1 = dst->ne[1];
- //const int64_t ne2 = dst->ne[2];
- //const int64_t ne3 = dst->ne[3];
- //const int64_t ne = ne0*ne1*ne2*ne3;
-
- const int nb00 = src0->nb[0];
- const int nb01 = src0->nb[1];
- const int nb02 = src0->nb[2];
- //const int nb03 = src0->nb[3];
-
- const int nb10 = src1->nb[0];
- const int nb11 = src1->nb[1];
- //const int nb12 = src1->nb[2];
- //const int nb13 = src1->nb[3];
-
- //const int nb0 = dst->nb[0];
- const int nb1 = dst->nb[1];
- //const int nb2 = dst->nb[2];
- //const int nb3 = dst->nb[3];
+ GGML_TENSOR_BINARY_OP_LOCALS;
const int ith = params->ith;
const int nth = params->nth;
@@ -13551,6 +13202,28 @@ static void ggml_compute_forward_conv_1d_s2_ph(
}
}
+// ggml_compute_forward_conv_1d
+
+static void ggml_compute_forward_conv_1d(
+ const struct ggml_compute_params * params,
+ const struct ggml_tensor * src0,
+ const struct ggml_tensor * src1,
+ const struct ggml_tensor * opt0,
+ struct ggml_tensor * dst) {
+ const int32_t s0 = ((const int32_t*)(opt0->data))[0];
+ const int32_t p0 = ((const int32_t*)(opt0->data))[1];
+ const int32_t d0 = ((const int32_t*)(opt0->data))[2];
+ GGML_ASSERT(d0 == 1); // dilation not supported
+ GGML_ASSERT(p0 == src0->ne[0]/2); // only half padding supported
+ if (s0 == 1) {
+ ggml_compute_forward_conv_1d_s1_ph(params, src0, src1, dst);
+ } else if (s0 == 2) {
+ ggml_compute_forward_conv_1d_s2_ph(params, src0, src1, dst);
+ } else {
+ GGML_ASSERT(false); // only stride 1 and 2 supported
+ };
+}
+
// ggml_compute_forward_conv_2d_sk_p0
static void ggml_compute_forward_conv_2d_sk_p0_f16_f32(
@@ -13565,36 +13238,7 @@ static void ggml_compute_forward_conv_2d_sk_p0_f16_f32(
int64_t t0 = ggml_perf_time_us();
UNUSED(t0);
- const int ne00 = src0->ne[0];
- const int ne01 = src0->ne[1];
- const int ne02 = src0->ne[2];
- //const int ne03 = src0->ne[3];
-
- const int ne10 = src1->ne[0];
- //const int ne11 = src1->ne[1];
- const int ne12 = src1->ne[2];
- //const int ne13 = src1->ne[3];
-
- const int ne0 = dst->ne[0];
- const int ne1 = dst->ne[1];
- const int ne2 = dst->ne[2];
- //const int ne3 = dst->ne[3];
- //const int ne = ne0*ne1*ne2*ne3;
-
- const int nb00 = src0->nb[0];
- //const int nb01 = src0->nb[1];
- //const int nb02 = src0->nb[2];
- const int nb03 = src0->nb[3];
-
- const int nb10 = src1->nb[0];
- //const int nb11 = src1->nb[1];
- const int nb12 = src1->nb[2];
- //const int nb13 = src1->nb[3];
-
- //const int nb0 = dst->nb[0];
- //const int nb1 = dst->nb[1];
- const int nb2 = dst->nb[2];
- //const int nb3 = dst->nb[3];
+ GGML_TENSOR_BINARY_OP_LOCALS;
const int ith = params->ith;
const int nth = params->nth;
@@ -13687,6 +13331,34 @@ static void ggml_compute_forward_conv_2d_sk_p0(
}
}
+// ggml_compute_forward_conv_2d
+
+static void ggml_compute_forward_conv_2d(
+ const struct ggml_compute_params* params,
+ const struct ggml_tensor* src0,
+ const struct ggml_tensor* src1,
+ const struct ggml_tensor* opt0,
+ struct ggml_tensor* dst) {
+ const int32_t s0 = ((const int32_t*)(opt0->data))[0];
+ const int32_t s1 = ((const int32_t*)(opt0->data))[1];
+ const int32_t p0 = ((const int32_t*)(opt0->data))[2];
+ const int32_t p1 = ((const int32_t*)(opt0->data))[3];
+ const int32_t d0 = ((const int32_t*)(opt0->data))[4];
+ const int32_t d1 = ((const int32_t*)(opt0->data))[5];
+ GGML_ASSERT(d0 == 1); // dilation not supported
+ GGML_ASSERT(d1 == 1);
+ GGML_ASSERT(p0 == 0); // padding not supported
+ GGML_ASSERT(p1 == 0);
+
+ if (s0 == src0->ne[0] && s1 == src0->ne[1]) {
+ ggml_compute_forward_conv_2d_sk_p0(params, src0, src1, dst);
+ }
+ else {
+ GGML_ASSERT(false); // only stride equal to kernel size is supported
+ };
+}
+
+
// ggml_compute_forward_flash_attn
static void ggml_compute_forward_flash_attn_f32(
@@ -13699,45 +13371,14 @@ static void ggml_compute_forward_flash_attn_f32(
int64_t t0 = ggml_perf_time_us();
UNUSED(t0);
- const int64_t neq0 = q->ne[0];
- const int64_t neq1 = q->ne[1];
- const int64_t neq2 = q->ne[2];
- const int64_t neq3 = q->ne[3];
-
- const int64_t nek0 = k->ne[0];
- const int64_t nek1 = k->ne[1];
- //const int64_t nek2 = k->ne[2];
- //const int64_t nek3 = k->ne[3];
-
- //const int64_t nev0 = v->ne[0];
- const int64_t nev1 = v->ne[1];
- //const int64_t nev2 = v->ne[2];
- //const int64_t nev3 = v->ne[3];
-
- const int64_t ne0 = dst->ne[0];
- const int64_t ne1 = dst->ne[1];
- //const int64_t ne2 = dst->ne[2];
- //const int64_t ne3 = dst->ne[3];
-
- const int nbk0 = k->nb[0];
- const int nbk1 = k->nb[1];
- const int nbk2 = k->nb[2];
- const int nbk3 = k->nb[3];
-
- const int nbq0 = q->nb[0];
- const int nbq1 = q->nb[1];
- const int nbq2 = q->nb[2];
- const int nbq3 = q->nb[3];
-
- const int nbv0 = v->nb[0];
- const int nbv1 = v->nb[1];
- const int nbv2 = v->nb[2];
- const int nbv3 = v->nb[3];
-
- const int nb0 = dst->nb[0];
- const int nb1 = dst->nb[1];
- const int nb2 = dst->nb[2];
- const int nb3 = dst->nb[3];
+ GGML_TENSOR_LOCALS(int64_t, neq, q, ne);
+ GGML_TENSOR_LOCALS(size_t, nbq, q, nb);
+ GGML_TENSOR_LOCALS(int64_t, nek, k, ne);
+ GGML_TENSOR_LOCALS(size_t, nbk, k, nb);
+ GGML_TENSOR_LOCALS(int64_t, nev, v, ne);
+ GGML_TENSOR_LOCALS(size_t, nbv, v, nb);
+ GGML_TENSOR_LOCALS(int64_t, ne, dst, ne);
+ GGML_TENSOR_LOCALS(size_t, nb, dst, nb);
const int ith = params->ith;
const int nth = params->nth;
@@ -13908,45 +13549,14 @@ static void ggml_compute_forward_flash_attn_f16(
int64_t t0 = ggml_perf_time_us();
UNUSED(t0);
- const int64_t neq0 = q->ne[0];
- const int64_t neq1 = q->ne[1];
- const int64_t neq2 = q->ne[2];
- const int64_t neq3 = q->ne[3];
-
- const int64_t nek0 = k->ne[0];
- const int64_t nek1 = k->ne[1];
- //const int64_t nek2 = k->ne[2];
- //const int64_t nek3 = k->ne[3];
-
- //const int64_t nev0 = v->ne[0];
- const int64_t nev1 = v->ne[1];
- //const int64_t nev2 = v->ne[2];
- //const int64_t nev3 = v->ne[3];
-
- const int64_t ne0 = dst->ne[0];
- const int64_t ne1 = dst->ne[1];
- //const int64_t ne2 = dst->ne[2];
- //const int64_t ne3 = dst->ne[3];
-
- const int nbk0 = k->nb[0];
- const int nbk1 = k->nb[1];
- const int nbk2 = k->nb[2];
- const int nbk3 = k->nb[3];
-
- const int nbq0 = q->nb[0];
- const int nbq1 = q->nb[1];
- const int nbq2 = q->nb[2];
- const int nbq3 = q->nb[3];
-
- const int nbv0 = v->nb[0];
- const int nbv1 = v->nb[1];
- const int nbv2 = v->nb[2];
- const int nbv3 = v->nb[3];
-
- const int nb0 = dst->nb[0];
- const int nb1 = dst->nb[1];
- const int nb2 = dst->nb[2];
- const int nb3 = dst->nb[3];
+ GGML_TENSOR_LOCALS(int64_t, neq, q, ne);
+ GGML_TENSOR_LOCALS(size_t, nbq, q, nb);
+ GGML_TENSOR_LOCALS(int64_t, nek, k, ne);
+ GGML_TENSOR_LOCALS(size_t, nbk, k, nb);
+ GGML_TENSOR_LOCALS(int64_t, nev, v, ne);
+ GGML_TENSOR_LOCALS(size_t, nbv, v, nb);
+ GGML_TENSOR_LOCALS(int64_t, ne, dst, ne);
+ GGML_TENSOR_LOCALS(size_t, nb, dst, nb);
const int ith = params->ith;
const int nth = params->nth;
@@ -14180,65 +13790,18 @@ static void ggml_compute_forward_flash_ff_f16(
int64_t t0 = ggml_perf_time_us();
UNUSED(t0);
- const int64_t nea0 = a->ne[0];
- const int64_t nea1 = a->ne[1];
- const int64_t nea2 = a->ne[2];
- const int64_t nea3 = a->ne[3];
-
- const int64_t neb00 = b0->ne[0];
- const int64_t neb01 = b0->ne[1];
- //const int64_t neb02 = b0->ne[2];
- //const int64_t neb03 = b0->ne[3];
-
- const int64_t neb10 = b1->ne[0];
- const int64_t neb11 = b1->ne[1];
- //const int64_t neb12 = b1->ne[2];
- //const int64_t neb13 = b1->ne[3];
-
- const int64_t nec00 = c0->ne[0];
- const int64_t nec01 = c0->ne[1];
- //const int64_t nec02 = c0->ne[2];
- //const int64_t nec03 = c0->ne[3];
-
- const int64_t nec10 = c1->ne[0];
- const int64_t nec11 = c1->ne[1];
- //const int64_t nec12 = c1->ne[2];
- //const int64_t nec13 = c1->ne[3];
-
- const int64_t ne0 = dst->ne[0];
- const int64_t ne1 = dst->ne[1];
- const int64_t ne2 = dst->ne[2];
- //const int64_t ne3 = dst->ne[3];
-
- const int nba0 = a->nb[0];
- const int nba1 = a->nb[1];
- const int nba2 = a->nb[2];
- const int nba3 = a->nb[3];
-
- const int nbb00 = b0->nb[0];
- const int nbb01 = b0->nb[1];
- const int nbb02 = b0->nb[2];
- const int nbb03 = b0->nb[3];
-
- const int nbb10 = b1->nb[0];
- //const int nbb11 = b1->nb[1];
- //const int nbb12 = b1->nb[2];
- //const int nbb13 = b1->nb[3];
-
- const int nbc00 = c0->nb[0];
- const int nbc01 = c0->nb[1];
- const int nbc02 = c0->nb[2];
- const int nbc03 = c0->nb[3];
-
- const int nbc10 = c1->nb[0];
- //const int nbc11 = c1->nb[1];
- //const int nbc12 = c1->nb[2];
- //const int nbc13 = c1->nb[3];
-
- const int nb0 = dst->nb[0];
- const int nb1 = dst->nb[1];
- const int nb2 = dst->nb[2];
- const int nb3 = dst->nb[3];
+ GGML_TENSOR_LOCALS(int64_t, nea, a, ne);
+ GGML_TENSOR_LOCALS(size_t, nba, a, nb);
+ GGML_TENSOR_LOCALS(int64_t, neb0, b0, ne);
+ GGML_TENSOR_LOCALS(size_t, nbb0, b0, nb);
+ GGML_TENSOR_LOCALS(int64_t, neb1, b1, ne);
+ GGML_TENSOR_LOCALS(size_t, nbb1, b1, nb);
+ GGML_TENSOR_LOCALS(int64_t, nec0, c0, ne);
+ GGML_TENSOR_LOCALS(size_t, nbc0, c0, nb);
+ GGML_TENSOR_LOCALS(int64_t, nec1, c1, ne);
+ GGML_TENSOR_LOCALS(size_t, nbc1, c1, nb);
+ GGML_TENSOR_LOCALS(int64_t, ne, dst, ne);
+ GGML_TENSOR_LOCALS(size_t, nb, dst, nb);
const int ith = params->ith;
const int nth = params->nth;
@@ -14386,55 +13949,16 @@ static void ggml_compute_forward_flash_attn_back_f32(
int64_t t0 = ggml_perf_time_us();
UNUSED(t0);
- const int64_t neq0 = q->ne[0];
- const int64_t neq1 = q->ne[1];
- const int64_t neq2 = q->ne[2];
- const int64_t neq3 = q->ne[3];
-
- const int64_t nek0 = k->ne[0];
- const int64_t nek1 = k->ne[1];
- //const int64_t nek2 = k->ne[2];
- //const int64_t nek3 = k->ne[3];
-
- const int64_t nev0 = v->ne[0];
- const int64_t nev1 = v->ne[1];
- //const int64_t nev2 = v->ne[2];
- //const int64_t nev3 = v->ne[3];
-
- const int64_t ned0 = d->ne[0];
- const int64_t ned1 = d->ne[1];
- //const int64_t ned2 = d->ne[2];
- //const int64_t ned3 = d->ne[3];
-
- const int64_t ne0 = dst->ne[0];
- const int64_t ne1 = dst->ne[1];
- const int64_t ne2 = dst->ne[2];
- const int64_t ne3 = dst->ne[3];
-
- const int nbk0 = k->nb[0];
- const int nbk1 = k->nb[1];
- const int nbk2 = k->nb[2];
- const int nbk3 = k->nb[3];
-
- const int nbq0 = q->nb[0];
- const int nbq1 = q->nb[1];
- const int nbq2 = q->nb[2];
- const int nbq3 = q->nb[3];
-
- const int nbv0 = v->nb[0];
- const int nbv1 = v->nb[1];
- const int nbv2 = v->nb[2];
- const int nbv3 = v->nb[3];
-
- const int nbd0 = d->nb[0];
- const int nbd1 = d->nb[1];
- const int nbd2 = d->nb[2];
- const int nbd3 = d->nb[3];
-
- const int nb0 = dst->nb[0];
- const int nb1 = dst->nb[1];
- const int nb2 = dst->nb[2];
- const int nb3 = dst->nb[3];
+ GGML_TENSOR_LOCALS(int64_t, neq, q, ne);
+ GGML_TENSOR_LOCALS(size_t, nbq, q, nb);
+ GGML_TENSOR_LOCALS(int64_t, nek, k, ne);
+ GGML_TENSOR_LOCALS(size_t, nbk, k, nb);
+ GGML_TENSOR_LOCALS(int64_t, nev, v, ne);
+ GGML_TENSOR_LOCALS(size_t, nbv, v, nb);
+ GGML_TENSOR_LOCALS(int64_t, ned, d, ne);
+ GGML_TENSOR_LOCALS(size_t, nbd, d, nb);
+ GGML_TENSOR_LOCALS(int64_t, ne, dst, ne);
+ GGML_TENSOR_LOCALS(size_t, nb, dst, nb);
const int ith = params->ith;
const int nth = params->nth;
@@ -14792,15 +14316,8 @@ static void ggml_compute_forward_win_part_f32(
return;
}
- const int64_t ne00 = src0->ne[0]; UNUSED(ne00);
- const int64_t ne01 = src0->ne[1];
- const int64_t ne02 = src0->ne[2];
- const int64_t ne03 = src0->ne[3]; UNUSED(ne03);
-
- const int64_t ne0 = dst->ne[0];
- const int64_t ne1 = dst->ne[1];
- const int64_t ne2 = dst->ne[2];
- const int64_t ne3 = dst->ne[3]; UNUSED(ne3);
+ GGML_TENSOR_LOCALS(int64_t, ne0, src0, ne);
+ GGML_TENSOR_LOCALS(int64_t, ne, dst, ne);
const int32_t nep0 = ((const int32_t *)(opt0->data))[0];
const int32_t nep1 = ((const int32_t *)(opt0->data))[1];
@@ -14863,14 +14380,8 @@ static void ggml_compute_forward_win_unpart_f32(
return;
}
- const int64_t ne00 = src0->ne[0];
- const int64_t ne01 = src0->ne[1];
- const int64_t ne02 = src0->ne[2];
- //const int64_t ne03 = src0->ne[3];
-
- const int64_t ne0 = dst->ne[0];
- const int64_t ne1 = dst->ne[1];
- const int64_t ne2 = dst->ne[2];
+ GGML_TENSOR_LOCALS(int64_t, ne0, src0, ne);
+ GGML_TENSOR_LOCALS(int64_t, ne, dst, ne);
const int32_t w = ((const int32_t *)(opt0->data))[0];
@@ -15468,6 +14979,10 @@ static void ggml_compute_forward(struct ggml_compute_params * params, struct ggm
{
ggml_compute_forward_mean(params, tensor->src0, tensor);
} break;
+ case GGML_OP_ARGMAX:
+ {
+ ggml_compute_forward_argmax(params, tensor->src0, tensor);
+ } break;
case GGML_OP_REPEAT:
{
ggml_compute_forward_repeat(params, tensor->src0, tensor);
@@ -15492,6 +15007,14 @@ static void ggml_compute_forward(struct ggml_compute_params * params, struct ggm
{
ggml_compute_forward_step(params, tensor->src0, tensor);
} break;
+ case GGML_OP_TANH:
+ {
+ ggml_compute_forward_tanh(params, tensor->src0, tensor);
+ } break;
+ case GGML_OP_ELU:
+ {
+ ggml_compute_forward_elu(params, tensor->src0, tensor);
+ } break;
case GGML_OP_RELU:
{
ggml_compute_forward_relu(params, tensor->src0, tensor);
@@ -15608,17 +15131,13 @@ static void ggml_compute_forward(struct ggml_compute_params * params, struct ggm
{
ggml_compute_forward_clamp(params, tensor->src0, tensor->src1, tensor);
} break;
- case GGML_OP_CONV_1D_S1_PH:
+ case GGML_OP_CONV_1D:
{
- ggml_compute_forward_conv_1d_s1_ph(params, tensor->src0, tensor->src1, tensor);
+ ggml_compute_forward_conv_1d(params, tensor->src0, tensor->src1, tensor->opt[0], tensor);
} break;
- case GGML_OP_CONV_1D_S2_PH:
+ case GGML_OP_CONV_2D:
{
- ggml_compute_forward_conv_1d_s2_ph(params, tensor->src0, tensor->src1, tensor);
- } break;
- case GGML_OP_CONV_2D_SK_P0:
- {
- ggml_compute_forward_conv_2d_sk_p0(params, tensor->src0, tensor->src1, tensor);
+ ggml_compute_forward_conv_2d(params, tensor->src0, tensor->src1, tensor->opt[0], tensor);
} break;
case GGML_OP_FLASH_ATTN:
{
@@ -15867,6 +15386,7 @@ static void ggml_compute_backward(struct ggml_context * ctx, struct ggml_tensor
}
} break;
case GGML_OP_MEAN:
+ case GGML_OP_ARGMAX:
{
GGML_ASSERT(false); // TODO: implement
} break;
@@ -15920,6 +15440,14 @@ static void ggml_compute_backward(struct ggml_context * ctx, struct ggml_tensor
// noop
}
} break;
+ case GGML_OP_TANH:
+ {
+ GGML_ASSERT(false); // TODO: not implemented
+ } break;
+ case GGML_OP_ELU:
+ {
+ GGML_ASSERT(false); // TODO: not implemented
+ } break;
case GGML_OP_RELU:
{
if (src0->grad) {
@@ -15939,14 +15467,6 @@ static void ggml_compute_backward(struct ggml_context * ctx, struct ggml_tensor
{
GGML_ASSERT(false); // TODO: not implemented
} break;
- case GGML_OP_ALIBI:
- {
- GGML_ASSERT(false); // TODO: not implemented
- } break;
- case GGML_OP_CLAMP:
- {
- GGML_ASSERT(false); // TODO: not implemented
- } break;
case GGML_OP_SILU:
{
// necessary for llama
@@ -16263,7 +15783,7 @@ static void ggml_compute_backward(struct ggml_context * ctx, struct ggml_tensor
// necessary for llama
if (src0->grad) {
assert(src1->type == GGML_TYPE_I32);
- assert(ggml_nelements(src1) == 3);
+ assert(ggml_nelements(src1) == 4);
const int n_past = ((int32_t *) src1->data)[0];
const int n_dims = ((int32_t *) src1->data)[1];
const int mode = ((int32_t *) src1->data)[2];
@@ -16303,15 +15823,19 @@ static void ggml_compute_backward(struct ggml_context * ctx, struct ggml_tensor
// noop
}
} break;
- case GGML_OP_CONV_1D_S1_PH:
+ case GGML_OP_ALIBI:
{
GGML_ASSERT(false); // TODO: not implemented
} break;
- case GGML_OP_CONV_1D_S2_PH:
+ case GGML_OP_CLAMP:
{
GGML_ASSERT(false); // TODO: not implemented
} break;
- case GGML_OP_CONV_2D_SK_P0:
+ case GGML_OP_CONV_1D:
+ {
+ GGML_ASSERT(false); // TODO: not implemented
+ } break;
+ case GGML_OP_CONV_2D:
{
GGML_ASSERT(false); // TODO: not implemented
} break;
@@ -16968,12 +16492,15 @@ void ggml_graph_compute(struct ggml_context * ctx, struct ggml_cgraph * cgraph)
case GGML_OP_SUM:
case GGML_OP_SUM_ROWS:
case GGML_OP_MEAN:
+ case GGML_OP_ARGMAX:
case GGML_OP_REPEAT:
case GGML_OP_REPEAT_BACK:
case GGML_OP_ABS:
case GGML_OP_SGN:
case GGML_OP_NEG:
case GGML_OP_STEP:
+ case GGML_OP_TANH:
+ case GGML_OP_ELU:
case GGML_OP_RELU:
{
node->n_tasks = 1;
@@ -17087,8 +16614,7 @@ void ggml_graph_compute(struct ggml_context * ctx, struct ggml_cgraph * cgraph)
{
node->n_tasks = 1; //TODO
} break;
- case GGML_OP_CONV_1D_S1_PH:
- case GGML_OP_CONV_1D_S2_PH:
+ case GGML_OP_CONV_1D:
{
node->n_tasks = n_threads;
@@ -17117,7 +16643,7 @@ void ggml_graph_compute(struct ggml_context * ctx, struct ggml_cgraph * cgraph)
work_size = MAX(work_size, cur);
} break;
- case GGML_OP_CONV_2D_SK_P0:
+ case GGML_OP_CONV_2D:
{
node->n_tasks = n_threads;
@@ -17479,13 +17005,6 @@ void ggml_graph_export(const struct ggml_cgraph * cgraph, const char * fname) {
fwrite(&nb, sizeof(uint64_t), 1, fout);
}
- // store the pointer address
- {
- const uint64_t ptr = (uint64_t) tensor->data;
-
- fwrite(&ptr, sizeof(uint64_t), 1, fout);
- }
-
fwrite(tensor->name, sizeof(char), GGML_MAX_NAME, fout);
// dump the data
@@ -17519,13 +17038,6 @@ void ggml_graph_export(const struct ggml_cgraph * cgraph, const char * fname) {
fwrite(&nb, sizeof(uint64_t), 1, fout);
}
- // store the pointer address
- {
- const uint64_t ptr = (uint64_t) tensor->data;
-
- fwrite(&ptr, sizeof(uint64_t), 1, fout);
- }
-
fwrite(tensor->name, sizeof(char), GGML_MAX_NAME, fout);
// output the op arguments
@@ -17710,8 +17222,6 @@ struct ggml_cgraph ggml_graph_import(const char * fname, struct ggml_context **
tensor->op = (enum ggml_op) op;
- uint64_t ptr_cur = *(const uint64_t *) ptr; ptr += sizeof(ptr_cur);
-
memcpy(tensor->name, ptr, GGML_MAX_NAME); ptr += GGML_MAX_NAME;
tensor->data = (void *) ptr;
@@ -17757,8 +17267,6 @@ struct ggml_cgraph ggml_graph_import(const char * fname, struct ggml_context **
nb[j] = nb_cur;
}
- uint64_t ptr_cur = *(const uint64_t *) ptr; ptr += sizeof(ptr_cur); // TODO: not yet used
-
const char * ptr_name = ptr; ptr += GGML_MAX_NAME;
const int32_t * ptr_arg_idx = (const int32_t *) ptr; ptr += (2 + GGML_MAX_OPT)*sizeof(int32_t);
diff --git a/ggml.h b/ggml.h
index 11b51f8..0af96c7 100644
--- a/ggml.h
+++ b/ggml.h
@@ -201,6 +201,8 @@
#define GGML_MAX_NAME 48
#define GGML_DEFAULT_N_THREADS 4
+#define GGML_UNUSED(x) (void)(x)
+
#define GGML_ASSERT(x) \
do { \
if (!(x)) { \
@@ -209,6 +211,30 @@
} \
} while (0)
+// used to copy the number of elements and stride in bytes of tensors into local variables.
+// main purpose is to reduce code duplication and improve readability.
+//
+// example:
+//
+// GGML_TENSOR_LOCALS(int64_t, ne1, src1, ne);
+// GGML_TENSOR_LOCALS(size_t, nb1, src1, nb);
+//
+#define GGML_TENSOR_LOCALS_1(type, prefix, pointer, array) \
+ const type prefix##0 = (pointer)->array[0]; \
+ GGML_UNUSED(prefix##0);
+#define GGML_TENSOR_LOCALS_2(type, prefix, pointer, array) \
+ GGML_TENSOR_LOCALS_1 (type, prefix, pointer, array) \
+ const type prefix##1 = (pointer)->array[1]; \
+ GGML_UNUSED(prefix##1);
+#define GGML_TENSOR_LOCALS_3(type, prefix, pointer, array) \
+ GGML_TENSOR_LOCALS_2 (type, prefix, pointer, array) \
+ const type prefix##2 = (pointer)->array[2]; \
+ GGML_UNUSED(prefix##2);
+#define GGML_TENSOR_LOCALS(type, prefix, pointer, array) \
+ GGML_TENSOR_LOCALS_3 (type, prefix, pointer, array) \
+ const type prefix##3 = (pointer)->array[3]; \
+ GGML_UNUSED(prefix##3);
+
#ifdef __cplusplus
extern "C" {
#endif
@@ -295,12 +321,15 @@ extern "C" {
GGML_OP_SUM,
GGML_OP_SUM_ROWS,
GGML_OP_MEAN,
+ GGML_OP_ARGMAX,
GGML_OP_REPEAT,
GGML_OP_REPEAT_BACK,
GGML_OP_ABS,
GGML_OP_SGN,
GGML_OP_NEG,
GGML_OP_STEP,
+ GGML_OP_TANH,
+ GGML_OP_ELU,
GGML_OP_RELU,
GGML_OP_GELU,
GGML_OP_GELU_QUICK,
@@ -332,9 +361,8 @@ extern "C" {
GGML_OP_ROPE_BACK,
GGML_OP_ALIBI,
GGML_OP_CLAMP,
- GGML_OP_CONV_1D_S1_PH,
- GGML_OP_CONV_1D_S2_PH,
- GGML_OP_CONV_2D_SK_P0,
+ GGML_OP_CONV_1D,
+ GGML_OP_CONV_2D,
GGML_OP_FLASH_ATTN,
GGML_OP_FLASH_FF,
@@ -690,6 +718,11 @@ extern "C" {
struct ggml_context * ctx,
struct ggml_tensor * a);
+ // argmax along rows
+ GGML_API struct ggml_tensor * ggml_argmax(
+ struct ggml_context * ctx,
+ struct ggml_tensor * a);
+
// if a is the same shape as b, and a is not parameter, return a
// otherwise, return a new tensor: repeat(a) to fit in b
GGML_API struct ggml_tensor * ggml_repeat(
@@ -734,6 +767,22 @@ extern "C" {
struct ggml_context * ctx,
struct ggml_tensor * a);
+ GGML_API struct ggml_tensor * ggml_tanh(
+ struct ggml_context * ctx,
+ struct ggml_tensor * a);
+
+ GGML_API struct ggml_tensor * ggml_tanh_inplace(
+ struct ggml_context * ctx,
+ struct ggml_tensor * a);
+
+ GGML_API struct ggml_tensor * ggml_elu(
+ struct ggml_context * ctx,
+ struct ggml_tensor * a);
+
+ GGML_API struct ggml_tensor * ggml_elu_inplace(
+ struct ggml_context * ctx,
+ struct ggml_tensor * a);
+
GGML_API struct ggml_tensor * ggml_relu(
struct ggml_context * ctx,
struct ggml_tensor * a);
@@ -1084,58 +1133,33 @@ extern "C" {
float min,
float max);
- // TODO: implement general-purpose convolutions
- // GGML_API struct ggml_tensor * ggml_conv_1d(
- // struct ggml_context * ctx,
- // struct ggml_tensor * a,
- // struct ggml_tensor * b,
- // int s0
- // int p0,
- // int d0);
- //
- // GGML_API struct ggml_tensor * ggml_conv_2d(
- // struct ggml_context * ctx,
- // struct ggml_tensor * a,
- // struct ggml_tensor * b,
- // int s0,
- // int s1,
- // int p0,
- // int p1,
- // int d0,
- // int d1);
-
- // padding = half
- // TODO: we don't support extra parameters for now
- // that's why we are hard-coding the stride, padding, and dilation
- // not great ..
- // example:
- // a: 3 80 768 1
- // b: 3000 80 1 1
- // res: 3000 768 1 1
- // used in whisper
- GGML_API struct ggml_tensor * ggml_conv_1d_s1_ph(
+ GGML_API struct ggml_tensor * ggml_conv_1d(
struct ggml_context * ctx,
struct ggml_tensor * a,
- struct ggml_tensor * b);
+ struct ggml_tensor * b,
+ int s0, // stride
+ int p0, // padding
+ int d0); // dilation
- // used in whisper
- GGML_API struct ggml_tensor * ggml_conv_1d_s2_ph(
+ GGML_API struct ggml_tensor * ggml_conv_2d(
struct ggml_context * ctx,
struct ggml_tensor * a,
- struct ggml_tensor * b);
+ struct ggml_tensor * b,
+ int s0,
+ int s1,
+ int p0,
+ int p1,
+ int d0,
+ int d1);
- // kernel size is a->ne[0] x a->ne[1]
- // stride is equal to kernel size
- // padding is zero
- // example:
- // a: 16 16 3 768
- // b: 1024 1024 3 1
- // res: 64 64 768 1
- // used in sam
- GGML_API struct ggml_tensor * ggml_conv_2d_sk_p0(
+ // conv_1d with padding = half
+ // alias for ggml_conv_1d(a, b, s, a->ne[0]/2, d)
+ GGML_API struct ggml_tensor* ggml_conv_1d_ph(
struct ggml_context * ctx,
struct ggml_tensor * a,
- struct ggml_tensor * b);
+ struct ggml_tensor * b,
+ int s,
+ int d);
GGML_API struct ggml_tensor * ggml_flash_attn(
struct ggml_context * ctx,
diff --git a/scripts/sync-ggml.sh b/scripts/sync-ggml.sh
index e6e39ff..574e518 100755
--- a/scripts/sync-ggml.sh
+++ b/scripts/sync-ggml.sh
@@ -1,6 +1,11 @@
#!/bin/bash
-cp -rpv ../ggml/src/ggml.c ./ggml.c
-cp -rpv ../ggml/src/ggml-cuda.cu ./ggml-cuda.cu
-cp -rpv ../ggml/src/ggml-cuda.h ./ggml-cuda.h
+cp -rpv ../ggml/src/ggml.c ./ggml.c
+cp -rpv ../ggml/src/ggml-cuda.h ./ggml-cuda.h
+cp -rpv ../ggml/src/ggml-cuda.cu ./ggml-cuda.cu
+cp -rpv ../ggml/src/ggml-opencl.h ./ggml-opencl.h
+cp -rpv ../ggml/src/ggml-opencl.cpp ./ggml-opencl.cpp
+cp -rpv ../ggml/src/ggml-metal.h ./ggml-metal.h
+cp -rpv ../ggml/src/ggml-metal.m ./ggml-metal.m
+cp -rpv ../ggml/src/ggml-metal.metal ./ggml-metal.metal
cp -rpv ../ggml/include/ggml/ggml.h ./ggml.h