From 5b2b2dc6ae8086bff7c9b3c17fb435cf319b7185 Mon Sep 17 00:00:00 2001 From: Georgi Gerganov Date: Mon, 24 Jul 2023 14:46:21 +0300 Subject: ggml : sync (unary ops refactor, static-correctness) (#2370) * ggml : sync (unary ops, tests) ggml-ci * tests : remove unnecessary funcs --- tests/test-grad0.c | 479 +++++++++++++++++++++++++++++++++++++++++++---------- tests/test-opt.c | 6 +- 2 files changed, 397 insertions(+), 88 deletions(-) (limited to 'tests') diff --git a/tests/test-grad0.c b/tests/test-grad0.c index 01467bc..ef20bce 100644 --- a/tests/test-grad0.c +++ b/tests/test-grad0.c @@ -64,7 +64,7 @@ void get_random_dims(int64_t * dims, int ndims) { } } -struct ggml_tensor * get_random_tensor( +struct ggml_tensor * get_random_tensor_f32( struct ggml_context * ctx0, int ndims, int64_t ne[], @@ -112,7 +112,55 @@ struct ggml_tensor * get_random_tensor( return result; } -struct ggml_tensor * get_random_tensor_int( +struct ggml_tensor * get_random_tensor_f16( + struct ggml_context * ctx0, + int ndims, + int64_t ne[], + float fmin, + float fmax) { + struct ggml_tensor * result = ggml_new_tensor(ctx0, GGML_TYPE_F16, ndims, ne); + + switch (ndims) { + case 1: + for (int i0 = 0; i0 < ne[0]; i0++) { + ((ggml_fp16_t *)result->data)[i0] = ggml_fp32_to_fp16(frand()*(fmax - fmin) + fmin); + } + break; + case 2: + for (int i1 = 0; i1 < ne[1]; i1++) { + for (int i0 = 0; i0 < ne[0]; i0++) { + ((ggml_fp16_t *)result->data)[i1*ne[0] + i0] = ggml_fp32_to_fp16(frand()*(fmax - fmin) + fmin); + } + } + break; + case 3: + for (int i2 = 0; i2 < ne[2]; i2++) { + for (int i1 = 0; i1 < ne[1]; i1++) { + for (int i0 = 0; i0 < ne[0]; i0++) { + ((ggml_fp16_t *)result->data)[i2*ne[1]*ne[0] + i1*ne[0] + i0] = ggml_fp32_to_fp16(frand()*(fmax - fmin) + fmin); + } + } + } + break; + case 4: + for (int i3 = 0; i3 < ne[3]; i3++) { + for (int i2 = 0; i2 < ne[2]; i2++) { + for (int i1 = 0; i1 < ne[1]; i1++) { + for (int i0 = 0; i0 < ne[0]; i0++) { + ((ggml_fp16_t *)result->data)[i3*ne[2]*ne[1]*ne[0] + i2*ne[1]*ne[0] + i1*ne[0] + i0] = ggml_fp32_to_fp16(frand()*(fmax - fmin) + fmin); + } + } + } + } + break; + default: + assert(false); + }; + + return result; +} + +struct ggml_tensor * get_random_tensor_i32( struct ggml_context * ctx0, int ndims, int64_t ne[], @@ -160,23 +208,6 @@ struct ggml_tensor * get_random_tensor_int( return result; } -float get_element(const struct ggml_tensor * t, int idx) { - if (t->type == GGML_TYPE_F32) { - return ((float *)t->data)[idx]; - } - - if (t->type == GGML_TYPE_I32) { - return ((int32_t *)t->data)[idx]; - } - - assert(false); - return INFINITY; -} - -void set_element(struct ggml_tensor * t, int idx, float value) { - ((float *)t->data)[idx] = value; -} - void print_elements(const char* label, const struct ggml_tensor * t) { if (!t) { printf("%s: %s = null\n", __func__, label); @@ -186,7 +217,7 @@ void print_elements(const char* label, const struct ggml_tensor * t) { printf("%s: %s = [", __func__, label); for (int k = 0; k < nelements; ++k) { if (k > 0) { printf(", "); } - printf("%.5f", get_element(t, k)); + printf("%.5f", ggml_get_f32_1d(t, k)); } printf("] shape: ["); for (int k = 0; k < t->n_dims; ++k) { @@ -237,23 +268,23 @@ bool check_gradient( const int nelements = ggml_nelements(x[i]); for (int k = 0; k < nelements; ++k) { // compute gradient using finite differences - const float x0 = get_element(x[i], k); + const float x0 = ggml_get_f32_1d(x[i], k); const float xm = x0 - eps; const float xp = x0 + eps; - set_element(x[i], k, xp); + ggml_set_f32_1d(x[i], k, xp); ggml_graph_compute_with_ctx(ctx0, &gf, n_threads); const float f0 = ggml_get_f32_1d(f, 0); - set_element(x[i], k, xm); + ggml_set_f32_1d(x[i], k, xm); ggml_graph_compute_with_ctx(ctx0, &gf, n_threads); const float f1 = ggml_get_f32_1d(f, 0); const float g0 = (f0 - f1)/(2.0f*eps); - set_element(x[i], k, x0); + ggml_set_f32_1d(x[i], k, x0); // compute gradient using backward graph ggml_graph_reset (&gf); @@ -261,7 +292,7 @@ bool check_gradient( ggml_graph_compute_with_ctx(ctx0, &gb, n_threads); - const float g1 = get_element(x[i]->grad, k); + const float g1 = ggml_get_f32_1d(x[i]->grad, k); const float error_abs = fabsf(g0 - g1); const float error_rel = g0 != 0 ? fabsf(g0 - g1)/fabsf(g0) : 0; @@ -392,19 +423,35 @@ int main(int argc, const char ** argv) { struct ggml_tensor * x[MAX_NARGS]; - // add + // add f32 { const int nargs = 2; for (int ndims = 1; ndims <= 4; ++ndims) { for (int i = 0; i < nargs; ++i) { - x[i] = get_random_tensor(ctx0, ndims, ne, -1.0f, 1.0f); + x[i] = get_random_tensor_f32(ctx0, ndims, ne, -1.0f, 1.0f); ggml_set_param(ctx0, x[i]); } struct ggml_tensor * f = ggml_sum(ctx0, ggml_add(ctx0, x[0], x[1])); - check_gradient("add", ctx0, x, f, ndims, nargs, 1e-3f, 2e-3f, 2e-3f); + check_gradient("add f32", ctx0, x, f, ndims, nargs, 1e-3f, 2e-3f, 2e-3f); + } + } + + // add f16 + { + const int nargs = 2; + + for (int ndims = 1; ndims <= 4; ++ndims) { + for (int i = 0; i < nargs; ++i) { + x[i] = get_random_tensor_f16(ctx0, ndims, ne, -1.0f, 1.0f); + ggml_set_param(ctx0, x[i]); + } + + struct ggml_tensor * f = ggml_sum(ctx0, ggml_add(ctx0, x[0], x[1])); + + check_gradient("add f16", ctx0, x, f, ndims, nargs, 1e-1f, 2e-1f, 2e-1f); } } @@ -414,7 +461,7 @@ int main(int argc, const char ** argv) { for (int ndims = 1; ndims <= 4; ++ndims) { for (int i = 0; i < nargs; ++i) { - x[i] = get_random_tensor(ctx0, ndims, ne, -1.0f, 1.0f); + x[i] = get_random_tensor_f32(ctx0, ndims, ne, -1.0f, 1.0f); ggml_set_param(ctx0, x[i]); } @@ -430,7 +477,7 @@ int main(int argc, const char ** argv) { for (int ndims = 1; ndims <= 4; ++ndims) { for (int i = 0; i < nargs; ++i) { - x[i] = get_random_tensor(ctx0, ndims, ne, -1.0f, 1.0f); + x[i] = get_random_tensor_f32(ctx0, ndims, ne, -1.0f, 1.0f); ggml_set_param(ctx0, x[i]); } @@ -446,7 +493,7 @@ int main(int argc, const char ** argv) { for (int ndims = 1; ndims <= 4; ++ndims) { for (int i = 0; i < nargs; ++i) { - x[i] = get_random_tensor(ctx0, ndims, ne, 0.5f, 1.0f); + x[i] = get_random_tensor_f32(ctx0, ndims, ne, 0.5f, 1.0f); ggml_set_param(ctx0, x[i]); } @@ -462,7 +509,7 @@ int main(int argc, const char ** argv) { for (int ndims = 1; ndims <= 2; ++ndims) { for (int i = 0; i < nargs; ++i) { - x[i] = get_random_tensor(ctx0, ndims, ne, -1.0f, 1.0f); + x[i] = get_random_tensor_f32(ctx0, ndims, ne, -1.0f, 1.0f); ggml_set_param(ctx0, x[i]); } @@ -478,7 +525,7 @@ int main(int argc, const char ** argv) { for (int ndims = 1; ndims <= 2; ++ndims) { for (int i = 0; i < nargs; ++i) { - x[i] = get_random_tensor(ctx0, ndims, ne, 2.0f*1e-3f, 1.0f); + x[i] = get_random_tensor_f32(ctx0, ndims, ne, 2.0f*1e-3f, 1.0f); ggml_set_param(ctx0, x[i]); } @@ -494,7 +541,7 @@ int main(int argc, const char ** argv) { for (int ndims = 1; ndims <= 2; ++ndims) { for (int i = 0; i < nargs; ++i) { - x[i] = get_random_tensor(ctx0, ndims, ne, 2.0f*1e-3f, 1.0f); + x[i] = get_random_tensor_f32(ctx0, ndims, ne, 2.0f*1e-3f, 1.0f); ggml_set_param(ctx0, x[i]); } @@ -510,7 +557,7 @@ int main(int argc, const char ** argv) { for (int ndims = 1; ndims <= 2; ++ndims) { for (int i = 0; i < nargs; ++i) { - x[i] = get_random_tensor(ctx0, ndims, ne, -1.0f, 1.0f); + x[i] = get_random_tensor_f32(ctx0, ndims, ne, -1.0f, 1.0f); ggml_set_param(ctx0, x[i]); } @@ -527,7 +574,7 @@ int main(int argc, const char ** argv) { for (int ndims = 1; ndims <= 4; ++ndims) { for (int i = 0; i < nargs; ++i) { - x[i] = get_random_tensor(ctx0, ndims, ne, -1.0f, 1.0f); + x[i] = get_random_tensor_f32(ctx0, ndims, ne, -1.0f, 1.0f); ggml_set_param(ctx0, x[i]); } @@ -537,6 +584,40 @@ int main(int argc, const char ** argv) { } } + // mean, not yet fully implemented + if(0) + { + const int nargs = 1; + + for (int ndims = 1; ndims <= 4; ++ndims) { + for (int i = 0; i < nargs; ++i) { + x[i] = get_random_tensor_f32(ctx0, ndims, ne, -1.0f, 1.0f); + ggml_set_param(ctx0, x[i]); + } + + struct ggml_tensor * f = ggml_sum(ctx0, ggml_mean(ctx0, x[0])); + + check_gradient("mean", ctx0, x, f, ndims, nargs, 1e-3f, 1e-3f, 1e-3f); + } + } + + // argmax + if (0) + { + const int nargs = 1; + + for (int ndims = 1; ndims <= 4; ++ndims) { + for (int i = 0; i < nargs; ++i) { + x[i] = get_random_tensor_f32(ctx0, ndims, ne, -1.0f, 1.0f); + ggml_set_param(ctx0, x[i]); + } + + struct ggml_tensor * f = ggml_sum(ctx0, ggml_argmax(ctx0, x[0])); + + check_gradient("argmax", ctx0, x, f, ndims, nargs, 1e-3f, 1e-3f, 1e-3f); + } + } + // repeat { int64_t ne2[4]; @@ -549,15 +630,36 @@ int main(int argc, const char ** argv) { const int nargs = 1; for (int ndims = 1; ndims <= 2; ++ndims) { - x[0] = get_random_tensor(ctx0, ndims, ne, -1.0f, 1.0f); - x[1] = get_random_tensor(ctx0, ndims, ne2, -1.0f, 1.0f); + x[0] = get_random_tensor_f32(ctx0, ndims, ne, -1.0f, 1.0f); + x[1] = get_random_tensor_f32(ctx0, ndims, ne2, -1.0f, 1.0f); ggml_set_param(ctx0, x[0]); struct ggml_tensor * f = ggml_sum(ctx0, ggml_sqr(ctx0, ggml_sub(ctx0, x[1], ggml_repeat(ctx0, x[0], x[1])))); check_gradient("repeat", ctx0, x, f, ndims, nargs, 1e-3f, 1e-2f, INFINITY); } + } + + // repeat back + { + int64_t ne2[4]; + get_random_dims(ne2, 4); + + ne2[0] = ne[0] * ne2[0]; + ne2[1] = ne[1] * ne2[1]; + ne2[2] = 1; + ne2[3] = 1; + + const int nargs = 1; + for (int ndims = 1; ndims <= 2; ++ndims) { + x[0] = get_random_tensor_f32(ctx0, ndims, ne, -1.0f, 1.0f); + x[1] = get_random_tensor_f32(ctx0, ndims, ne2, -1.0f, 1.0f); + ggml_set_param(ctx0, x[0]); + + struct ggml_tensor * f = ggml_sum(ctx0, ggml_sqr(ctx0, ggml_sub(ctx0, x[0], ggml_repeat_back(ctx0, x[1], x[0])))); + check_gradient("repeat back", ctx0, x, f, ndims, nargs, 1e-3f, 1e-2f, INFINITY); + } } // abs (finite differences do not work) @@ -566,7 +668,7 @@ int main(int argc, const char ** argv) { // for (int ndims = 1; ndims <= 2; ++ndims) { // for (int i = 0; i < nargs; ++i) { - // x[i] = get_random_tensor(ctx0, ndims, ne, -1.0f, 1.0f); + // x[i] = get_random_tensor_f32(ctx0, ndims, ne, -1.0f, 1.0f); // ggml_set_param(ctx0, x[i]); // } @@ -576,17 +678,82 @@ int main(int argc, const char ** argv) { // } //} + // sgn + { + const int nargs = 1; + + for (int ndims = 1; ndims <= 4; ++ndims) { + for (int i = 0; i < nargs; ++i) { + x[i] = get_random_tensor_f32(ctx0, ndims, ne, -1.0f, 1.0f); + ggml_set_param(ctx0, x[i]); + } + + struct ggml_tensor* f = ggml_sum(ctx0, ggml_sgn(ctx0, x[0])); + + check_gradient("sgn", ctx0, x, f, ndims, nargs, 1e-3f, 1e-3f, 1e-3f); + } + } + + // neg + { + const int nargs = 1; + + for (int ndims = 1; ndims <= 4; ++ndims) { + for (int i = 0; i < nargs; ++i) { + x[i] = get_random_tensor_f32(ctx0, ndims, ne, -1.0f, 1.0f); + ggml_set_param(ctx0, x[i]); + } + + struct ggml_tensor* f = ggml_sum(ctx0, ggml_neg(ctx0, x[0])); + + check_gradient("neg", ctx0, x, f, ndims, nargs, 1e-3f, 1e-3f, 1e-3f); + } + } + + // step + { + const int nargs = 1; + + for (int ndims = 1; ndims <= 4; ++ndims) { + for (int i = 0; i < nargs; ++i) { + x[i] = get_random_tensor_f32(ctx0, ndims, ne, -1.0f, 1.0f); + ggml_set_param(ctx0, x[i]); + } + + struct ggml_tensor* f = ggml_sum(ctx0, ggml_step(ctx0, x[0])); + + check_gradient("step", ctx0, x, f, ndims, nargs, 1e-3f, 1e-3f, 1e-3f); + } + } + + // tanh, not yet fully implemented + if(0) + { + const int nargs = 1; + + for (int ndims = 1; ndims <= 4; ++ndims) { + for (int i = 0; i < nargs; ++i) { + x[i] = get_random_tensor_f32(ctx0, ndims, ne, -1.0f, 1.0f); + ggml_set_param(ctx0, x[i]); + } + + struct ggml_tensor* f = ggml_sum(ctx0, ggml_tanh(ctx0, x[0])); + + check_gradient("tanh", ctx0, x, f, ndims, nargs, 1e-3f, 1e-3f, 1e-3f); + } + } + // mul_mat { const int nargs = 2; for (int ndims = 2; ndims <= 2; ++ndims) { - x[0] = get_random_tensor(ctx0, ndims, ne, -1.0f, 1.0f); + x[0] = get_random_tensor_f32(ctx0, ndims, ne, -1.0f, 1.0f); { int64_t ne2[4]; get_random_dims(ne2, 4); ne2[0] = ne[0]; - x[1] = get_random_tensor(ctx0, ndims, ne2, -1.0f, 1.0f); + x[1] = get_random_tensor_f32(ctx0, ndims, ne2, -1.0f, 1.0f); } ggml_set_param(ctx0, x[0]); @@ -602,13 +769,63 @@ int main(int argc, const char ** argv) { } } + // elu, not yet fully implemented + if(0) + { + const int nargs = 1; + + for (int ndims = 1; ndims <= 4; ++ndims) { + for (int i = 0; i < nargs; ++i) { + x[i] = get_random_tensor_f32(ctx0, ndims, ne, -1.0f, 1.0f); + ggml_set_param(ctx0, x[i]); + } + + struct ggml_tensor* f = ggml_sum(ctx0, ggml_elu(ctx0, x[0])); + + check_gradient("elu", ctx0, x, f, ndims, nargs, 1e-3f, 1e-3f, 1e-3f); + } + } + + // relu + { + const int nargs = 1; + + for (int ndims = 1; ndims <= 4; ++ndims) { + for (int i = 0; i < nargs; ++i) { + x[i] = get_random_tensor_f32(ctx0, ndims, ne, -1.0f, 1.0f); + ggml_set_param(ctx0, x[i]); + } + + struct ggml_tensor* f = ggml_sum(ctx0, ggml_relu(ctx0, x[0])); + + check_gradient("relu", ctx0, x, f, ndims, nargs, 1e-3f, 1e-3f, INFINITY); + } + } + + // gelu, not yet fully implemented + if(0) + { + const int nargs = 1; + + for (int ndims = 1; ndims <= 4; ++ndims) { + for (int i = 0; i < nargs; ++i) { + x[i] = get_random_tensor_f32(ctx0, ndims, ne, -1.0f, 1.0f); + ggml_set_param(ctx0, x[i]); + } + + struct ggml_tensor* f = ggml_sum(ctx0, ggml_gelu(ctx0, x[0])); + + check_gradient("gelu", ctx0, x, f, ndims, nargs, 1e-3f, 1e-3f, 1e-3f); + } + } + // silu { const int nargs = 1; for (int ndims = 1; ndims <= 2; ++ndims) { for (int i = 0; i < nargs; ++i) { - x[i] = get_random_tensor(ctx0, ndims, ne, -1.0f, 1.0f); + x[i] = get_random_tensor_f32(ctx0, ndims, ne, -1.0f, 1.0f); ggml_set_param(ctx0, x[i]); } @@ -629,7 +846,7 @@ int main(int argc, const char ** argv) { for (int ndims = 1; ndims <= 2; ++ndims) { for (int i = 0; i < nargs; ++i) { - x[i] = get_random_tensor(ctx0, ndims, ne, -1.0f, 1.0f); + x[i] = get_random_tensor_f32(ctx0, ndims, ne, -1.0f, 1.0f); ggml_set_param(ctx0, x[i]); } @@ -647,8 +864,8 @@ int main(int argc, const char ** argv) { ne2[0] = 1; for (int ndims = 1; ndims <= 2; ++ndims) { - x[1] = get_random_tensor(ctx0, 1, ne2, -1.0f, 1.0f); - x[0] = get_random_tensor(ctx0, ndims, ne, -1.0f, 1.0f); + x[1] = get_random_tensor_f32(ctx0, 1, ne2, -1.0f, 1.0f); + x[0] = get_random_tensor_f32(ctx0, ndims, ne, -1.0f, 1.0f); ggml_set_param(ctx0, x[0]); ggml_set_param(ctx0, x[1]); @@ -659,20 +876,37 @@ int main(int argc, const char ** argv) { } } - // cpy + // cpy f32 { const int nargs = 2; for (int ndims = 1; ndims <= 2; ++ndims) { for (int i = 0; i < nargs; ++i) { - x[i] = get_random_tensor(ctx0, ndims, ne, -1.0f, 1.0f); + x[i] = get_random_tensor_f32(ctx0, ndims, ne, -1.0f, 1.0f); ggml_set_param(ctx0, x[i]); } // x[1] is overwritten by x[0], so the gradients don't propagate to x[1] struct ggml_tensor * f = ggml_sum(ctx0, ggml_cpy(ctx0, x[0], x[1])); - check_gradient("cpy", ctx0, x, f, ndims, nargs, 1e-3f, 1e-3f, INFINITY); + check_gradient("cpy f32", ctx0, x, f, ndims, nargs, 1e-3f, 1e-3f, INFINITY); + } + } + + // cpy f16 + { + const int nargs = 2; + + for (int ndims = 1; ndims <= 2; ++ndims) { + for (int i = 0; i < nargs; ++i) { + x[i] = get_random_tensor_f16(ctx0, ndims, ne, -1.0f, 1.0f); + ggml_set_param(ctx0, x[i]); + } + // x[1] is overwritten by x[0], so the gradients don't propagate to x[1] + + struct ggml_tensor * f = ggml_sum(ctx0, ggml_cpy(ctx0, x[0], x[1])); + + check_gradient("cpy f16", ctx0, x, f, ndims, nargs, 1e-1f, 1e-1f, INFINITY); } } @@ -689,8 +923,8 @@ int main(int argc, const char ** argv) { for (int i = 0; i < ndims; ++i) { ne2[0] *= ne[i]; } - x[0] = get_random_tensor(ctx0, 1, ne2, -1.0f, 1.0f); - x[1] = get_random_tensor(ctx0, ndims, ne, -1.0f, 1.0f); + x[0] = get_random_tensor_f32(ctx0, 1, ne2, -1.0f, 1.0f); + x[1] = get_random_tensor_f32(ctx0, ndims, ne, -1.0f, 1.0f); ggml_set_param(ctx0, x[0]); @@ -712,8 +946,8 @@ int main(int argc, const char ** argv) { for (int i = 0; i < ndims; ++i) { ne2[0] *= ne[i]; } - x[0] = get_random_tensor(ctx0, ndims, ne, -1.0f, 1.0f); - x[1] = get_random_tensor(ctx0, 1, ne2, -1.0f, 1.0f); + x[0] = get_random_tensor_f32(ctx0, ndims, ne, -1.0f, 1.0f); + x[1] = get_random_tensor_f32(ctx0, 1, ne2, -1.0f, 1.0f); ggml_set_param(ctx0, x[0]); @@ -729,7 +963,7 @@ int main(int argc, const char ** argv) { const int nargs = 2; for (int ndims = 1; ndims <= 4; ++ndims) { - x[0] = get_random_tensor(ctx0, ndims, ne, -1.0f, 1.0f); + x[0] = get_random_tensor_f32(ctx0, ndims, ne, -1.0f, 1.0f); ggml_set_param(ctx0, x[0]); get_random_dims(ne2, 1); @@ -737,7 +971,7 @@ int main(int argc, const char ** argv) { get_random_dims(ne2, 1); } - x[1] = get_random_tensor(ctx0, 1, ne2, -1.0f, 1.0f); + x[1] = get_random_tensor_f32(ctx0, 1, ne2, -1.0f, 1.0f); ggml_set_param(ctx0, x[1]); const int max_offset = MAX(0, ggml_nelements(x[0]) - ggml_nelements(x[1])); @@ -758,7 +992,7 @@ int main(int argc, const char ** argv) { const int nargs = 2; for (int ndims = 2; ndims <= 4; ++ndims) { - x[0] = get_random_tensor(ctx0, ndims, ne, -1.0f, 1.0f); + x[0] = get_random_tensor_f32(ctx0, ndims, ne, -1.0f, 1.0f); ggml_set_param(ctx0, x[0]); get_random_dims(ne2, 2); @@ -766,7 +1000,7 @@ int main(int argc, const char ** argv) { get_random_dims(ne2, 2); } - x[1] = get_random_tensor(ctx0, 2, ne2, -1.0f, 1.0f); + x[1] = get_random_tensor_f32(ctx0, 2, ne2, -1.0f, 1.0f); ggml_set_param(ctx0, x[1]); max_offsets[0] = MAX(0, x[0]->ne[0] - x[1]->ne[0]); @@ -790,7 +1024,7 @@ int main(int argc, const char ** argv) { const int nargs = 2; for (int ndims = 3; ndims <= 4; ++ndims) { - x[0] = get_random_tensor(ctx0, ndims, ne, -1.0f, 1.0f); + x[0] = get_random_tensor_f32(ctx0, ndims, ne, -1.0f, 1.0f); ggml_set_param(ctx0, x[0]); get_random_dims(ne2, 3); @@ -798,7 +1032,7 @@ int main(int argc, const char ** argv) { get_random_dims(ne2, 3); } - x[1] = get_random_tensor(ctx0, 3, ne2, -1.0f, 1.0f); + x[1] = get_random_tensor_f32(ctx0, 3, ne2, -1.0f, 1.0f); ggml_set_param(ctx0, x[1]); max_offsets[0] = MAX(0, x[0]->ne[0] - x[1]->ne[0]); @@ -824,7 +1058,7 @@ int main(int argc, const char ** argv) { const int nargs = 2; for (int ndims = 4; ndims <= 4; ++ndims) { - x[0] = get_random_tensor(ctx0, ndims, ne, -1.0f, 1.0f); + x[0] = get_random_tensor_f32(ctx0, ndims, ne, -1.0f, 1.0f); ggml_set_param(ctx0, x[0]); get_random_dims(ne2, 4); @@ -832,7 +1066,7 @@ int main(int argc, const char ** argv) { get_random_dims(ne2, 4); } - x[1] = get_random_tensor(ctx0, 4, ne2, -1.0f, 1.0f); + x[1] = get_random_tensor_f32(ctx0, 4, ne2, -1.0f, 1.0f); ggml_set_param(ctx0, x[1]); max_offsets[0] = MAX(0, x[0]->ne[0] - x[1]->ne[0]); @@ -858,7 +1092,7 @@ int main(int argc, const char ** argv) { const int nargs = 2; for (int ndims = 1; ndims <= 4; ++ndims) { - x[0] = get_random_tensor(ctx0, ndims, ne, -1.0f, 1.0f); + x[0] = get_random_tensor_f32(ctx0, ndims, ne, -1.0f, 1.0f); ggml_set_param(ctx0, x[0]); get_random_dims(ne2, 1); @@ -866,7 +1100,7 @@ int main(int argc, const char ** argv) { get_random_dims(ne2, 1); } - x[1] = get_random_tensor(ctx0, 1, ne2, -1.0f, 1.0f); + x[1] = get_random_tensor_f32(ctx0, 1, ne2, -1.0f, 1.0f); ggml_set_param(ctx0, x[1]); const int max_offset = MAX(0, ggml_nelements(x[0]) - ggml_nelements(x[1])); @@ -887,7 +1121,7 @@ int main(int argc, const char ** argv) { const int nargs = 1; for (int ndims = 2; ndims <= 4; ++ndims) { - x[0] = get_random_tensor(ctx0, ndims, ne, -1.0f, 1.0f); + x[0] = get_random_tensor_f32(ctx0, ndims, ne, -1.0f, 1.0f); ggml_set_param(ctx0, x[0]); get_random_dims(ne2, 2); @@ -895,7 +1129,7 @@ int main(int argc, const char ** argv) { get_random_dims(ne2, 2); } - x[1] = get_random_tensor(ctx0, 2, ne2, -1.0f, 1.0f); + x[1] = get_random_tensor_f32(ctx0, 2, ne2, -1.0f, 1.0f); ggml_set_param(ctx0, x[1]); max_offsets[0] = MAX(0, x[0]->ne[0] - x[1]->ne[0]); @@ -915,7 +1149,7 @@ int main(int argc, const char ** argv) { const int nargs = 1; for (int ndims = 1; ndims <= 4; ++ndims) { - x[0] = get_random_tensor(ctx0, ndims, ne, -1.0f, 1.0f); + x[0] = get_random_tensor_f32(ctx0, ndims, ne, -1.0f, 1.0f); ggml_set_param(ctx0, x[0]); @@ -941,7 +1175,7 @@ int main(int argc, const char ** argv) { const int nargs = 1; for (int ndims = 1; ndims <= 4; ++ndims) { - x[0] = get_random_tensor(ctx0, ndims, ne, -1.0f, 1.0f); + x[0] = get_random_tensor_f32(ctx0, ndims, ne, -1.0f, 1.0f); get_random_dims(ne2, 2); while (ne2[0]*ne2[1] > ggml_nelements(x[0])) { @@ -971,7 +1205,7 @@ int main(int argc, const char ** argv) { const int nargs = 1; for (int ndims = 1; ndims <= 4; ++ndims) { - x[0] = get_random_tensor(ctx0, ndims, ne, -1.0f, 1.0f); + x[0] = get_random_tensor_f32(ctx0, ndims, ne, -1.0f, 1.0f); get_random_dims(ne2, 3); while (ne2[0]*ne2[1]*ne2[2] > ggml_nelements(x[0])) { @@ -1010,7 +1244,7 @@ int main(int argc, const char ** argv) { for (int i=ndims; i<4; ++i) { ne2[i] = 1; } - x[0] = get_random_tensor(ctx0, 4, ne2, -1.0f, 1.0f); + x[0] = get_random_tensor_f32(ctx0, 4, ne2, -1.0f, 1.0f); ggml_set_param(ctx0, x[0]); @@ -1043,7 +1277,7 @@ int main(int argc, const char ** argv) { for (int i=ndims; i<4; ++i) { ne2[i] = 1; } - x[0] = get_random_tensor(ctx0, 4, ne2, -1.0f, 1.0f); + x[0] = get_random_tensor_f32(ctx0, 4, ne2, -1.0f, 1.0f); ggml_set_param(ctx0, x[0]); @@ -1060,8 +1294,8 @@ int main(int argc, const char ** argv) { int64_t ne3[4] = {1+irand(ne[1]), 1, 1, 1}; const int nargs = 1; const int ndims = 2; - x[0] = get_random_tensor(ctx0, ndims, ne2, -1.0f, 1.0f); - x[1] = get_random_tensor_int(ctx0, 1, ne3, 0, ne2[1]); + x[0] = get_random_tensor_f32(ctx0, ndims, ne2, -1.0f, 1.0f); + x[1] = get_random_tensor_i32(ctx0, 1, ne3, 0, ne2[1]); ggml_set_param(ctx0, x[0]); @@ -1075,7 +1309,7 @@ int main(int argc, const char ** argv) { const int nargs = 1; const int ndims = 2; - x[0] = get_random_tensor(ctx0, ndims, ne, -1.0f, 1.0f); + x[0] = get_random_tensor_f32(ctx0, ndims, ne, -1.0f, 1.0f); ggml_set_param(ctx0, x[0]); int n_past = irand(ne[0]); @@ -1090,7 +1324,7 @@ int main(int argc, const char ** argv) { const int nargs = 1; const int ndims = 2; - x[0] = get_random_tensor(ctx0, ndims, ne, -1.0f, 1.0f); + x[0] = get_random_tensor_f32(ctx0, ndims, ne, -1.0f, 1.0f); ggml_set_param(ctx0, x[0]); int n_past = irand(ne[0]); @@ -1108,7 +1342,7 @@ int main(int argc, const char ** argv) { get_random_dims(ne2, 4); for (int ndims = 1; ndims <= 3; ++ndims) { - x[0] = get_random_tensor(ctx0, ndims, ne2, -1.0f, 1.0f); + x[0] = get_random_tensor_f32(ctx0, ndims, ne2, -1.0f, 1.0f); ggml_set_param(ctx0, x[0]); struct ggml_tensor * f = ggml_sum(ctx0, ggml_soft_max(ctx0, x[0])); @@ -1125,8 +1359,8 @@ int main(int argc, const char ** argv) { get_random_dims(ne2, 4); for (int ndims = 1; ndims <= 3; ++ndims) { - x[0] = get_random_tensor(ctx0, ndims, ne2, -1.0f, 1.0f); - x[1] = get_random_tensor(ctx0, ndims, ne2, 0.0f, 1.0f); + x[0] = get_random_tensor_f32(ctx0, ndims, ne2, -1.0f, 1.0f); + x[1] = get_random_tensor_f32(ctx0, ndims, ne2, 0.0f, 1.0f); ggml_set_param(ctx0, x[0]); struct ggml_tensor * f = ggml_sum(ctx0, ggml_cross_entropy_loss(ctx0, x[0], x[1])); @@ -1136,7 +1370,7 @@ int main(int argc, const char ** argv) { } } - // rope + // rope f32 { const int nargs = 1; @@ -1148,7 +1382,7 @@ int main(int argc, const char ** argv) { for (int ndims = 3; ndims <= 4; ++ndims) { for (int mode = 0; mode < 4; ++mode) { for (int n_past = 1; n_past < ne2[2]; ++n_past) { - x[0] = get_random_tensor(ctx0, ndims, ne2, -1.0f, 1.0f); + x[0] = get_random_tensor_f32(ctx0, ndims, ne2, -1.0f, 1.0f); ggml_set_param(ctx0, x[0]); @@ -1163,14 +1397,89 @@ int main(int argc, const char ** argv) { struct ggml_tensor * f = ggml_sum(ctx0, ggml_rope(ctx0, x[0], n_past, n_rot, mode, 0)); - GGML_PRINT_DEBUG("rope: n_past: %d n_rot: %d mode: %d\n", n_past, n_rot, mode); - check_gradient("rope", ctx0, x, f, ndims, nargs, 1e-2f, 1e-3f, INFINITY); + GGML_PRINT_DEBUG("rope f32: n_past: %d n_rot: %d mode: %d\n", n_past, n_rot, mode); + check_gradient("rope f32", ctx0, x, f, ndims, nargs, 1e-2f, 1e-3f, INFINITY); + } + } + } + } + + // rope f16 + { + const int nargs = 1; + + int64_t ne2[4]; + get_random_dims(ne2, 4); + ne2[0] += ne2[0] % 2; + int n_rot = ne2[0]; + + for (int ndims = 3; ndims <= 4; ++ndims) { + for (int mode = 0; mode < 4; ++mode) { + for (int n_past = 1; n_past < ne2[2]; ++n_past) { + x[0] = get_random_tensor_f16(ctx0, ndims, ne2, -1.0f, 1.0f); + + ggml_set_param(ctx0, x[0]); + + const bool skip_past = (mode & 1); + if (skip_past) { + // we have no past, so this would have to work on uninitialized memory. + // we only test the gradients here; + // skip_past should have no influence on gradient computation. + // so when other modes work, we assume that this does as well. + continue; + } + + struct ggml_tensor * f = ggml_sum(ctx0, ggml_rope(ctx0, x[0], n_past, n_rot, mode, 0)); + + GGML_PRINT_DEBUG("rope f16: n_past: %d n_rot: %d mode: %d\n", n_past, n_rot, mode); + check_gradient("rope f16", ctx0, x, f, ndims, nargs, 1e-1f, 1e-1f, INFINITY); + } + } + } + } + + // flash_attn f32 + { + const int nargs = 3; + + int64_t ne2[4]; + + get_random_dims(ne2, 4); + int64_t D = ne2[0]; + int64_t N = ne2[1]; + int64_t M = ne2[2] + N; + int64_t B = ne2[3]; + + for (int masked = 0; masked <= 1; ++masked) { + for (int ndims = 2; ndims <= 4; ++ndims) { + int64_t neq[4] = { D, N, B, ne[3] }; + int64_t nek[4] = { D, M, B, ne[3] }; + int64_t nev[4] = { M, D, B, ne[3] }; + if (ndims == 2) { + neq[2] = 1; neq[3] = 1; + nek[2] = 1; nek[3] = 1; + nev[2] = 1; nev[3] = 1; + } else if (ndims == 3) { + neq[3] = 1; + nek[3] = 1; + nev[3] = 1; } + x[0] = get_random_tensor_f32(ctx0, ndims, neq, -0.1250f, 0.1250f); + x[1] = get_random_tensor_f32(ctx0, ndims, nek, -0.1250f, 0.1250f); + x[2] = get_random_tensor_f32(ctx0, ndims, nev, -0.1250f, 0.1250f); + ggml_set_param(ctx0, x[0]); + ggml_set_param(ctx0, x[1]); + ggml_set_param(ctx0, x[2]); + + struct ggml_tensor * f = ggml_sum(ctx0, ggml_flash_attn(ctx0, x[0], x[1], x[2], (masked == 0))); + + check_gradient("flash_attn f32", ctx0, x, f, ndims, nargs, 1.5e-4f, INFINITY, 3.5f); } } } - // flash_attn + // flash_attn f16, not yet fully implemented + if(0) { const int nargs = 3; @@ -1196,16 +1505,16 @@ int main(int argc, const char ** argv) { nek[3] = 1; nev[3] = 1; } - x[0] = get_random_tensor(ctx0, ndims, neq, -0.1250f, 0.1250f); - x[1] = get_random_tensor(ctx0, ndims, nek, -0.1250f, 0.1250f); - x[2] = get_random_tensor(ctx0, ndims, nev, -0.1250f, 0.1250f); + x[0] = get_random_tensor_f16(ctx0, ndims, neq, -0.1250f, 0.1250f); + x[1] = get_random_tensor_f16(ctx0, ndims, nek, -0.1250f, 0.1250f); + x[2] = get_random_tensor_f16(ctx0, ndims, nev, -0.1250f, 0.1250f); ggml_set_param(ctx0, x[0]); ggml_set_param(ctx0, x[1]); ggml_set_param(ctx0, x[2]); struct ggml_tensor * f = ggml_sum(ctx0, ggml_flash_attn(ctx0, x[0], x[1], x[2], (masked == 0))); - check_gradient("flash_attn", ctx0, x, f, ndims, nargs, 1.5e-4f, INFINITY, 3.5f); + check_gradient("flash_attn f16", ctx0, x, f, ndims, nargs, 1.5e-4f, INFINITY, 3.5f); } } } diff --git a/tests/test-opt.c b/tests/test-opt.c index 5531814..4eef62b 100644 --- a/tests/test-opt.c +++ b/tests/test-opt.c @@ -125,9 +125,9 @@ int main(void) { }; struct ggml_context * ctx = ggml_init(params); - int64_t ne1[4] = {4, 1024, 1, 1}; - int64_t ne2[4] = {4, 2048, 1, 1};; - int64_t ne3[4] = {1024, 2048, 1, 1}; + int64_t ne1[4] = {4, 128, 1, 1}; + int64_t ne2[4] = {4, 256, 1, 1};; + int64_t ne3[4] = {128, 256, 1, 1}; struct ggml_tensor * a = get_random_tensor(ctx, 2, ne1, -1, +1); struct ggml_tensor * b = get_random_tensor(ctx, 2, ne2, -1, +1); -- cgit v1.2.3