From 81844fbcfd93a162b7aeaea9e4f2ab1358f7f97e Mon Sep 17 00:00:00 2001 From: Eve <139727413+netrunnereve@users.noreply.github.com> Date: Wed, 2 Aug 2023 04:06:19 -0400 Subject: tests : Fix compilation warnings (Linux/GCC) (#2451) * fix hellaswag print format, cast away warning in test-double-float * c++11 cannot use designated initializers * add static to test-grad0.c internal functions * use memcpy in test-double-float.c * port c tests to c++ * use initializer list for ggml_init_params --- Makefile | 6 +- examples/common.cpp | 2 +- scripts/sync-ggml.sh | 4 +- tests/CMakeLists.txt | 6 +- tests/test-double-float.c | 53 -- tests/test-double-float.cpp | 55 ++ tests/test-grad0.c | 1525 ------------------------------------------- tests/test-grad0.cpp | 1525 +++++++++++++++++++++++++++++++++++++++++++ tests/test-opt.c | 211 ------ tests/test-opt.cpp | 212 ++++++ 10 files changed, 1801 insertions(+), 1798 deletions(-) delete mode 100644 tests/test-double-float.c create mode 100644 tests/test-double-float.cpp delete mode 100644 tests/test-grad0.c create mode 100644 tests/test-grad0.cpp delete mode 100644 tests/test-opt.c create mode 100644 tests/test-opt.cpp diff --git a/Makefile b/Makefile index 100614b..a692a39 100644 --- a/Makefile +++ b/Makefile @@ -411,13 +411,13 @@ benchmark-matmult: examples/benchmark/benchmark-matmult.cpp build-info.h ggml.o vdot: pocs/vdot/vdot.cpp ggml.o $(OBJS) $(CXX) $(CXXFLAGS) $^ -o $@ $(LDFLAGS) -tests/test-double-float: tests/test-double-float.c build-info.h ggml.o llama.o common.o $(OBJS) +tests/test-double-float: tests/test-double-float.cpp build-info.h ggml.o llama.o common.o $(OBJS) $(CXX) $(CXXFLAGS) $(filter-out %.txt,$^) -o $@ $(LDFLAGS) -tests/test-grad0: tests/test-grad0.c build-info.h ggml.o llama.o common.o $(OBJS) +tests/test-grad0: tests/test-grad0.cpp build-info.h ggml.o llama.o common.o $(OBJS) $(CXX) $(CXXFLAGS) $(filter-out %.txt,$^) -o $@ $(LDFLAGS) -tests/test-opt: tests/test-opt.c build-info.h ggml.o llama.o common.o $(OBJS) +tests/test-opt: tests/test-opt.cpp build-info.h ggml.o llama.o common.o $(OBJS) $(CXX) $(CXXFLAGS) $(filter-out %.txt,$^) -o $@ $(LDFLAGS) tests/test-quantize-fns: tests/test-quantize-fns.cpp build-info.h ggml.o llama.o common.o $(OBJS) diff --git a/examples/common.cpp b/examples/common.cpp index e643984..3e7c3b6 100644 --- a/examples/common.cpp +++ b/examples/common.cpp @@ -572,7 +572,7 @@ void gpt_print_usage(int /*argc*/, char ** argv, const gpt_params & params) { fprintf(stdout, " --temp N temperature (default: %.1f)\n", (double)params.temp); fprintf(stdout, " --perplexity compute perplexity over each ctx window of the prompt\n"); fprintf(stdout, " --hellaswag compute HellaSwag score over random tasks from datafile supplied with -f\n"); - fprintf(stdout, " --hellaswag-tasks N number of tasks to use when computing the HellaSwag score (default: %d)\n", params.hellaswag_tasks); + fprintf(stdout, " --hellaswag-tasks N number of tasks to use when computing the HellaSwag score (default: %zu)\n", params.hellaswag_tasks); fprintf(stdout, " --keep N number of tokens to keep from the initial prompt (default: %d, -1 = all)\n", params.n_keep); fprintf(stdout, " --chunks N max number of chunks to process (default: %d, -1 = all)\n", params.n_chunks); if (llama_mlock_supported()) { diff --git a/scripts/sync-ggml.sh b/scripts/sync-ggml.sh index 02ea6ec..3d13e85 100755 --- a/scripts/sync-ggml.sh +++ b/scripts/sync-ggml.sh @@ -10,5 +10,5 @@ cp -rpv ../ggml/src/ggml-metal.m ./ggml-metal.m cp -rpv ../ggml/src/ggml-metal.metal ./ggml-metal.metal cp -rpv ../ggml/include/ggml/ggml.h ./ggml.h -cp -rpv ../ggml/tests/test-opt.c ./tests/test-opt.c -cp -rpv ../ggml/tests/test-grad0.c ./tests/test-grad0.c +cp -rpv ../ggml/tests/test-opt.cpp ./tests/test-opt.cpp +cp -rpv ../ggml/tests/test-grad0.cpp ./tests/test-grad0.cpp diff --git a/tests/CMakeLists.txt b/tests/CMakeLists.txt index 11ec6c7..1a40edb 100644 --- a/tests/CMakeLists.txt +++ b/tests/CMakeLists.txt @@ -6,10 +6,10 @@ function(llama_add_test source) add_test(NAME ${TEST_TARGET} COMMAND $ ${ARGN}) endfunction() -# llama_add_test(test-double-float.c) # SLOW +# llama_add_test(test-double-float.cpp) # SLOW llama_add_test(test-quantize-fns.cpp) llama_add_test(test-quantize-perf.cpp) llama_add_test(test-sampling.cpp) llama_add_test(test-tokenizer-0.cpp ${CMAKE_CURRENT_SOURCE_DIR}/../models/ggml-vocab.bin) -llama_add_test(test-grad0.c) # SLOW -# llama_add_test(test-opt.c) # SLOW +llama_add_test(test-grad0.cpp) # SLOW +# llama_add_test(test-opt.cpp) # SLOW diff --git a/tests/test-double-float.c b/tests/test-double-float.c deleted file mode 100644 index 89dafc9..0000000 --- a/tests/test-double-float.c +++ /dev/null @@ -1,53 +0,0 @@ -// These tests may take a long time! -// They are to prove that conversion from double to float of various functions in ggml.c doesn't affect the result. -// This is done by checking all finite (non-NaN, non-infinite) floats. - -#undef NDEBUG -#include -#include -#include -#include - -#pragma GCC diagnostic push -#pragma GCC diagnostic ignored "-Wdouble-promotion" - -// ggml.c::quantize_row_q4_0_reference -inline static uint8_t round_orig(float v0) { return ((int8_t) (round(v0))) + 8; } - -// ggml.c::ggml_silu_f32 -inline static float silu_orig(float x) { - return x/(1.0 + exp(-x)); -} - -#pragma GCC diagnostic pop - -// ggml.c::quantize_row_q4_0_reference -inline static uint8_t round_float(float v0) { return (int8_t)roundf(v0) + 8; } - -// ggml.c::ggml_silu_f32 -inline static float silu_float(float x) { - return x/(1.0f + expf(-x)); -} - -int main(void) { - uint32_t x = UINT32_MAX; - do { - float f = *(float *)&x; - assert(!isfinite(f) || (round_orig(f) == round_float(f))); - } while (x--); - -#ifdef __F16C__ - // GELU and SILU implementations are used with a FP16 lookup table. - // The original and float-only results are not equal for all inputs after converting to FP16. - // GELU is an approximation anyway (tanh), not tested here. - // For SILU, verify that the results are at least the closest floating point numbers, if the FP16 values don't match. - for (x = 0; x <= UINT16_MAX; x++) { - float f = _cvtsh_ss(x); - const float so = silu_orig(f); - const float sf = silu_float(f); - assert( (_cvtss_sh(so, 0) == _cvtss_sh(sf, 0)) - || (nextafterf(so, sf) == sf) - || (nextafterf(sf, so) == so)); - } -#endif -} diff --git a/tests/test-double-float.cpp b/tests/test-double-float.cpp new file mode 100644 index 0000000..b506f27 --- /dev/null +++ b/tests/test-double-float.cpp @@ -0,0 +1,55 @@ +// These tests may take a long time! +// They are to prove that conversion from double to float of various functions in ggml.c doesn't affect the result. +// This is done by checking all finite (non-NaN, non-infinite) floats. + +#undef NDEBUG +#include +#include +#include +#include +#include + +#pragma GCC diagnostic push +#pragma GCC diagnostic ignored "-Wdouble-promotion" + +// ggml.c::quantize_row_q4_0_reference +inline static uint8_t round_orig(float v0) { return ((int8_t) (round(v0))) + 8; } + +// ggml.c::ggml_silu_f32 +inline static float silu_orig(float x) { + return x/(1.0 + exp(-x)); +} + +#pragma GCC diagnostic pop + +// ggml.c::quantize_row_q4_0_reference +inline static uint8_t round_float(float v0) { return (int8_t)roundf(v0) + 8; } + +// ggml.c::ggml_silu_f32 +inline static float silu_float(float x) { + return x/(1.0f + expf(-x)); +} + +int main(void) { + uint32_t x = UINT32_MAX; + do { + float f; + memcpy(&f, &x, sizeof(x)); + assert(!std::isfinite(f) || (round_orig(f) == round_float(f))); + } while (x--); + +#ifdef __F16C__ + // GELU and SILU implementations are used with a FP16 lookup table. + // The original and float-only results are not equal for all inputs after converting to FP16. + // GELU is an approximation anyway (tanh), not tested here. + // For SILU, verify that the results are at least the closest floating point numbers, if the FP16 values don't match. + for (x = 0; x <= UINT16_MAX; x++) { + float f = _cvtsh_ss(x); + const float so = silu_orig(f); + const float sf = silu_float(f); + assert( (_cvtss_sh(so, 0) == _cvtss_sh(sf, 0)) + || (nextafterf(so, sf) == sf) + || (nextafterf(sf, so) == so)); + } +#endif +} diff --git a/tests/test-grad0.c b/tests/test-grad0.c deleted file mode 100644 index 6d31221..0000000 --- a/tests/test-grad0.c +++ /dev/null @@ -1,1525 +0,0 @@ -#define _CRT_SECURE_NO_DEPRECATE // Disables ridiculous "unsafe" warnigns on Windows -#include "ggml.h" - -#include -#include -#include -#include - -#if defined(_MSC_VER) -#pragma warning(disable: 4244 4267) // possible loss of data -#endif - -#if defined(__GNUC__) -#pragma GCC diagnostic ignored "-Wdouble-promotion" -#endif - -#define MAX_NARGS 3 - -#undef MIN -#undef MAX -#define MIN(a, b) ((a) < (b) ? (a) : (b)) -#define MAX(a, b) ((a) > (b) ? (a) : (b)) - -#define GGML_SILU_FP16 - -// -// logging -// - -#if (GGML_DEBUG >= 1) -#define GGML_PRINT_DEBUG(...) printf(__VA_ARGS__) -#else -#define GGML_PRINT_DEBUG(...) -#endif - -#if (GGML_DEBUG >= 5) -#define GGML_PRINT_DEBUG_5(...) printf(__VA_ARGS__) -#else -#define GGML_PRINT_DEBUG_5(...) -#endif - -#if (GGML_DEBUG >= 10) -#define GGML_PRINT_DEBUG_10(...) printf(__VA_ARGS__) -#else -#define GGML_PRINT_DEBUG_10(...) -#endif - -#define GGML_PRINT(...) printf(__VA_ARGS__) - -float frand(void) { - return (float)rand()/(float)RAND_MAX; -} - -int irand(int n) { - if (n == 0) return 0; - return rand()%n; -} - -void get_random_dims(int64_t * dims, int ndims) { - dims[0] = dims[1] = dims[2] = dims[3] = 1; - - for (int i = 0; i < ndims; i++) { - dims[i] = 1 + irand(4); - } -} - -struct ggml_tensor * get_random_tensor_f32( - struct ggml_context * ctx0, - int ndims, - int64_t ne[], - float fmin, - float fmax) { - struct ggml_tensor * result = ggml_new_tensor(ctx0, GGML_TYPE_F32, ndims, ne); - - switch (ndims) { - case 1: - for (int i0 = 0; i0 < ne[0]; i0++) { - ((float *)result->data)[i0] = frand()*(fmax - fmin) + fmin; - } - break; - case 2: - for (int i1 = 0; i1 < ne[1]; i1++) { - for (int i0 = 0; i0 < ne[0]; i0++) { - ((float *)result->data)[i1*ne[0] + i0] = frand()*(fmax - fmin) + fmin; - } - } - break; - case 3: - for (int i2 = 0; i2 < ne[2]; i2++) { - for (int i1 = 0; i1 < ne[1]; i1++) { - for (int i0 = 0; i0 < ne[0]; i0++) { - ((float *)result->data)[i2*ne[1]*ne[0] + i1*ne[0] + i0] = frand()*(fmax - fmin) + fmin; - } - } - } - break; - case 4: - for (int i3 = 0; i3 < ne[3]; i3++) { - for (int i2 = 0; i2 < ne[2]; i2++) { - for (int i1 = 0; i1 < ne[1]; i1++) { - for (int i0 = 0; i0 < ne[0]; i0++) { - ((float *)result->data)[i3*ne[2]*ne[1]*ne[0] + i2*ne[1]*ne[0] + i1*ne[0] + i0] = frand()*(fmax - fmin) + fmin; - } - } - } - } - break; - default: - assert(false); - }; - - return result; -} - -struct ggml_tensor * get_random_tensor_f16( - struct ggml_context * ctx0, - int ndims, - int64_t ne[], - float fmin, - float fmax) { - struct ggml_tensor * result = ggml_new_tensor(ctx0, GGML_TYPE_F16, ndims, ne); - - switch (ndims) { - case 1: - for (int i0 = 0; i0 < ne[0]; i0++) { - ((ggml_fp16_t *)result->data)[i0] = ggml_fp32_to_fp16(frand()*(fmax - fmin) + fmin); - } - break; - case 2: - for (int i1 = 0; i1 < ne[1]; i1++) { - for (int i0 = 0; i0 < ne[0]; i0++) { - ((ggml_fp16_t *)result->data)[i1*ne[0] + i0] = ggml_fp32_to_fp16(frand()*(fmax - fmin) + fmin); - } - } - break; - case 3: - for (int i2 = 0; i2 < ne[2]; i2++) { - for (int i1 = 0; i1 < ne[1]; i1++) { - for (int i0 = 0; i0 < ne[0]; i0++) { - ((ggml_fp16_t *)result->data)[i2*ne[1]*ne[0] + i1*ne[0] + i0] = ggml_fp32_to_fp16(frand()*(fmax - fmin) + fmin); - } - } - } - break; - case 4: - for (int i3 = 0; i3 < ne[3]; i3++) { - for (int i2 = 0; i2 < ne[2]; i2++) { - for (int i1 = 0; i1 < ne[1]; i1++) { - for (int i0 = 0; i0 < ne[0]; i0++) { - ((ggml_fp16_t *)result->data)[i3*ne[2]*ne[1]*ne[0] + i2*ne[1]*ne[0] + i1*ne[0] + i0] = ggml_fp32_to_fp16(frand()*(fmax - fmin) + fmin); - } - } - } - } - break; - default: - assert(false); - }; - - return result; -} - -struct ggml_tensor * get_random_tensor_i32( - struct ggml_context * ctx0, - int ndims, - int64_t ne[], - int32_t imin, - int32_t imax) { - struct ggml_tensor * result = ggml_new_tensor(ctx0, GGML_TYPE_I32, ndims, ne); - - switch (ndims) { - case 1: - for (int i0 = 0; i0 < ne[0]; i0++) { - ((int32_t *)result->data)[i0] = irand(imax - imin) + imin; - } - break; - case 2: - for (int i1 = 0; i1 < ne[1]; i1++) { - for (int i0 = 0; i0 < ne[0]; i0++) { - ((int32_t *)result->data)[i1*ne[0] + i0] = irand(imax - imin) + imin; - } - } - break; - case 3: - for (int i2 = 0; i2 < ne[2]; i2++) { - for (int i1 = 0; i1 < ne[1]; i1++) { - for (int i0 = 0; i0 < ne[0]; i0++) { - ((int32_t *)result->data)[i2*ne[1]*ne[0] + i1*ne[0] + i0] = irand(imax - imin) + imin; - } - } - } - break; - case 4: - for (int i3 = 0; i3 < ne[3]; i3++) { - for (int i2 = 0; i2 < ne[2]; i2++) { - for (int i1 = 0; i1 < ne[1]; i1++) { - for (int i0 = 0; i0 < ne[0]; i0++) { - ((int32_t *)result->data)[i3*ne[2]*ne[1]*ne[0] + i2*ne[1]*ne[0] + i1*ne[0] + i0] = irand(imax - imin) + imin; - } - } - } - } - break; - default: - assert(false); - }; - - return result; -} - -void print_elements(const char* label, const struct ggml_tensor * t) { - if (!t) { - printf("%s: %s = null\n", __func__, label); - return; - } - const int nelements = ggml_nelements(t); - printf("%s: %s = [", __func__, label); - for (int k = 0; k < nelements; ++k) { - if (k > 0) { printf(", "); } - printf("%.5f", ggml_get_f32_1d(t, k)); - } - printf("] shape: ["); - for (int k = 0; k < t->n_dims; ++k) { - if (k > 0) { printf(", "); } - printf("%d", (int)t->ne[k]); - } - printf("]\n"); - -} - -bool check_gradient( - const char * op_name, - struct ggml_context * ctx0, - struct ggml_tensor * x[], - struct ggml_tensor * f, - int ndims, - int nargs, - float eps, - float max_error_abs, - float max_error_rel) { - - static int n_threads = -1; - if (n_threads < 0) { - n_threads = GGML_DEFAULT_N_THREADS; - - const char *env = getenv("GGML_N_THREADS"); - if (env) { - n_threads = atoi(env); - } - - printf("GGML_N_THREADS = %d\n", n_threads); - } - - struct ggml_cgraph gf = ggml_build_forward (f); - struct ggml_cgraph gb = ggml_build_backward(ctx0, &gf, false); - - ggml_graph_compute_with_ctx(ctx0, &gf, n_threads); - - ggml_graph_reset (&gf); - ggml_set_f32 (f->grad, 1.0f); - - ggml_graph_compute_with_ctx(ctx0, &gb, n_threads); - - // ggml_graph_dump_dot(&gf, NULL, "test-grad0-forward.dot"); - // ggml_graph_dump_dot(&gb, &gf, "test-grad0-backward.dot"); - - for (int i = 0; i < nargs; ++i) { - const int nelements = ggml_nelements(x[i]); - for (int k = 0; k < nelements; ++k) { - // compute gradient using finite differences - const float x0 = ggml_get_f32_1d(x[i], k); - const float xm = x0 - eps; - const float xp = x0 + eps; - ggml_set_f32_1d(x[i], k, xp); - - ggml_graph_compute_with_ctx(ctx0, &gf, n_threads); - - const float f0 = ggml_get_f32_1d(f, 0); - - ggml_set_f32_1d(x[i], k, xm); - - ggml_graph_compute_with_ctx(ctx0, &gf, n_threads); - - const float f1 = ggml_get_f32_1d(f, 0); - const float g0 = (f0 - f1)/(2.0f*eps); - - ggml_set_f32_1d(x[i], k, x0); - - // compute gradient using backward graph - ggml_graph_reset (&gf); - ggml_set_f32 (f->grad, 1.0f); - - ggml_graph_compute_with_ctx(ctx0, &gb, n_threads); - - const float g1 = ggml_get_f32_1d(x[i]->grad, k); - - const float error_abs = fabsf(g0 - g1); - const float error_rel = g0 != 0 ? fabsf(g0 - g1)/fabsf(g0) : 0; - - if (error_abs > max_error_abs || error_rel > max_error_rel) { - printf("%s: ndims=%d, i=%d, k=%d, x0=%f, xm=%f, xp=%f, f0=%f, f1=%f, g0=%f, g1=%f, eps=%f, error_abs=%f, error_rel=%f\n", - op_name, ndims, i, k, x0, xm, xp, f0, f1, g0, g1, eps, error_abs, error_rel); - //assert(false); - return false; - } - } - } - - return true; -} - -// TODO: clean-up this .. -bool check_mat_mul( - const struct ggml_tensor * y, - const struct ggml_tensor * x0, - const struct ggml_tensor * x1) { - float * dst = (float *) y->data; - float * src0 = (float *) x0->data; - float * src1 = (float *) x1->data; - - const int nc = x0->ne[1]; - const int nr = x1->ne[1]; - const int nk = x0->ne[0]; - - GGML_PRINT_DEBUG("check_mat_mul: nc=%d, nr=%d, nk=%d\n", nc, nr, nk); - - GGML_PRINT_DEBUG("x0:\n"); - for (int j = 0; j < x0->ne[1]; ++j) { - for (int i = 0; i < x0->ne[0]; ++i) { - GGML_PRINT_DEBUG("%6.3f ", src0[j*nk + i]); - } - GGML_PRINT_DEBUG("\n"); - } - GGML_PRINT_DEBUG("\n"); - - GGML_PRINT_DEBUG("x1:\n"); - for (int j = 0; j < x1->ne[1]; ++j) { - for (int i = 0; i < x1->ne[0]; ++i) { - GGML_PRINT_DEBUG("%6.3f ", src1[j*nk + i]); - } - GGML_PRINT_DEBUG("\n"); - } - GGML_PRINT_DEBUG("\n"); - - GGML_PRINT_DEBUG("y: n_dims = %d, (%lld, %lld)\n", y->n_dims, y->ne[0], y->ne[1]); - for (int j = 0; j < y->ne[1]; ++j) { - for (int i = 0; i < y->ne[0]; ++i) { - GGML_PRINT_DEBUG("%6.3f ", dst[j*nr + i]); - } - GGML_PRINT_DEBUG("\n"); - } - - for (int i = 0; i < nr; ++i) { - for (int j = 0; j < nc; ++j) { - float sum = 0.0f; - - for (int k = 0; k < nk; ++k) { - sum += src0[j*nk + k]*src1[i*nk + k]; - } - - if (fabsf(dst[i*nc + j] - sum) > 1e-5f) { - fprintf(stderr, "check_mat_mul: dst[%d] = %f, sum = %f\n", i*nc + j, dst[i*nc + j], sum); - assert(false); - return false; - } - } - } - - return true; -} - -#define NUM_PERMUTATIONS (4*3*2*1) - -int main(int argc, const char ** argv) { - struct ggml_init_params params = { - .mem_size = 128*1024*1024, - .mem_buffer = NULL, - .no_alloc = false, - }; - - int64_t ne[4]; - - int all_permutations[4 * NUM_PERMUTATIONS]; - { - int count = 0; - for (int ax0=0; ax0<4; ++ax0) { - for (int ax1=0; ax1<4; ++ax1) { - if (ax1 == ax0) continue; - for (int ax2=0; ax2<4; ++ax2) { - if (ax2 == ax0) continue; - if (ax2 == ax1) continue; - for (int ax3=0; ax3<4; ++ax3) { - if (ax3 == ax0) continue; - if (ax3 == ax1) continue; - if (ax3 == ax2) continue; - assert(count < NUM_PERMUTATIONS); - all_permutations[count*4+0] = ax0; - all_permutations[count*4+1] = ax1; - all_permutations[count*4+2] = ax2; - all_permutations[count*4+3] = ax3; - ++count; - } - } - } - } - } - - - // original loop: 1000 - int niter = 4; - const char *env = getenv("GGML_NLOOP"); - if (env != NULL) { - niter = atoi(env); - } - if (argc > 1) { - niter = atoi(argv[1]); - } - for (int iter = 0; iter < niter; ++iter) { - printf("test-grad0: iter:%d/%d\n", iter, niter); - struct ggml_context * ctx0 = ggml_init(params); - - get_random_dims(ne, 4); - - struct ggml_tensor * x[MAX_NARGS]; - - // add f32 - { - const int nargs = 2; - - for (int ndims = 1; ndims <= 4; ++ndims) { - for (int i = 0; i < nargs; ++i) { - x[i] = get_random_tensor_f32(ctx0, ndims, ne, -1.0f, 1.0f); - ggml_set_param(ctx0, x[i]); - } - - struct ggml_tensor * f = ggml_sum(ctx0, ggml_add(ctx0, x[0], x[1])); - - check_gradient("add f32", ctx0, x, f, ndims, nargs, 1e-3f, 2e-3f, 2e-3f); - } - } - - // add f16 - { - const int nargs = 2; - - for (int ndims = 1; ndims <= 4; ++ndims) { - for (int i = 0; i < nargs; ++i) { - x[i] = get_random_tensor_f16(ctx0, ndims, ne, -1.0f, 1.0f); - ggml_set_param(ctx0, x[i]); - } - - struct ggml_tensor * f = ggml_sum(ctx0, ggml_add(ctx0, x[0], x[1])); - - check_gradient("add f16", ctx0, x, f, ndims, nargs, 1e-1f, 2e-1f, 2e-1f); - } - } - - // sub - { - const int nargs = 2; - - for (int ndims = 1; ndims <= 4; ++ndims) { - for (int i = 0; i < nargs; ++i) { - x[i] = get_random_tensor_f32(ctx0, ndims, ne, -1.0f, 1.0f); - ggml_set_param(ctx0, x[i]); - } - - struct ggml_tensor * f = ggml_sum(ctx0, ggml_sub(ctx0, x[0], x[1])); - - check_gradient("sub", ctx0, x, f, ndims, nargs, 1e-3f, 1e-3f, 1e-3f); - } - } - - // mul - { - const int nargs = 2; - - for (int ndims = 1; ndims <= 4; ++ndims) { - for (int i = 0; i < nargs; ++i) { - x[i] = get_random_tensor_f32(ctx0, ndims, ne, -1.0f, 1.0f); - ggml_set_param(ctx0, x[i]); - } - - struct ggml_tensor * f = ggml_sum(ctx0, ggml_mul(ctx0, x[0], x[1])); - - check_gradient("mul", ctx0, x, f, ndims, nargs, 1e-3f, 1e-3f, INFINITY); - } - } - - // div - { - const int nargs = 2; - - for (int ndims = 1; ndims <= 4; ++ndims) { - for (int i = 0; i < nargs; ++i) { - x[i] = get_random_tensor_f32(ctx0, ndims, ne, 0.5f, 1.0f); - ggml_set_param(ctx0, x[i]); - } - - struct ggml_tensor * f = ggml_sum(ctx0, ggml_div(ctx0, x[0], x[1])); - - check_gradient("div", ctx0, x, f, ndims, nargs, 1e-3f, 1e-1f, 1e-1f); - } - } - - // sqr - { - const int nargs = 1; - - for (int ndims = 1; ndims <= 2; ++ndims) { - for (int i = 0; i < nargs; ++i) { - x[i] = get_random_tensor_f32(ctx0, ndims, ne, -1.0f, 1.0f); - ggml_set_param(ctx0, x[i]); - } - - struct ggml_tensor * f = ggml_sum(ctx0, ggml_sqr(ctx0, x[0])); - - check_gradient("sqr", ctx0, x, f, ndims, nargs, 1e-3f, 1e-3f, INFINITY); - } - } - - // sqrt - { - const int nargs = 1; - - for (int ndims = 1; ndims <= 2; ++ndims) { - for (int i = 0; i < nargs; ++i) { - x[i] = get_random_tensor_f32(ctx0, ndims, ne, 2.0f*1e-3f, 1.0f); - ggml_set_param(ctx0, x[i]); - } - - struct ggml_tensor * f = ggml_sum(ctx0, ggml_sqrt(ctx0, x[0])); - - check_gradient("sqrt", ctx0, x, f, ndims, nargs, 1e-3f, INFINITY, 1e-1f); - } - } - - // log - { - const int nargs = 1; - - for (int ndims = 1; ndims <= 2; ++ndims) { - for (int i = 0; i < nargs; ++i) { - x[i] = get_random_tensor_f32(ctx0, ndims, ne, 2.0f*1e-3f, 1.0f); - ggml_set_param(ctx0, x[i]); - } - - struct ggml_tensor * f = ggml_sum(ctx0, ggml_log(ctx0, x[0])); - - check_gradient("log", ctx0, x, f, ndims, nargs, 1e-3f, INFINITY, 1e-1f); - } - } - - // sum - { - const int nargs = 1; - - for (int ndims = 1; ndims <= 2; ++ndims) { - for (int i = 0; i < nargs; ++i) { - x[i] = get_random_tensor_f32(ctx0, ndims, ne, -1.0f, 1.0f); - ggml_set_param(ctx0, x[i]); - } - - struct ggml_tensor * f = ggml_sum(ctx0, x[0]); - - check_gradient("sum", ctx0, x, f, ndims, nargs, 1e-3f, 1e-3f, 1e-3f); - } - } - - - // sum_rows - { - const int nargs = 1; - - for (int ndims = 1; ndims <= 4; ++ndims) { - for (int i = 0; i < nargs; ++i) { - x[i] = get_random_tensor_f32(ctx0, ndims, ne, -1.0f, 1.0f); - ggml_set_param(ctx0, x[i]); - } - - struct ggml_tensor * f = ggml_sum(ctx0, ggml_sqr(ctx0, ggml_sum_rows(ctx0, x[0]))); - - check_gradient("sum_rows", ctx0, x, f, ndims, nargs, 1e-3f, 1e-2f, INFINITY); - } - } - - // mean, not yet fully implemented - if(0) - { - const int nargs = 1; - - for (int ndims = 1; ndims <= 4; ++ndims) { - for (int i = 0; i < nargs; ++i) { - x[i] = get_random_tensor_f32(ctx0, ndims, ne, -1.0f, 1.0f); - ggml_set_param(ctx0, x[i]); - } - - struct ggml_tensor * f = ggml_sum(ctx0, ggml_mean(ctx0, x[0])); - - check_gradient("mean", ctx0, x, f, ndims, nargs, 1e-3f, 1e-3f, 1e-3f); - } - } - - // argmax - if (0) - { - const int nargs = 1; - - for (int ndims = 1; ndims <= 4; ++ndims) { - for (int i = 0; i < nargs; ++i) { - x[i] = get_random_tensor_f32(ctx0, ndims, ne, -1.0f, 1.0f); - ggml_set_param(ctx0, x[i]); - } - - struct ggml_tensor * f = ggml_sum(ctx0, ggml_argmax(ctx0, x[0])); - - check_gradient("argmax", ctx0, x, f, ndims, nargs, 1e-3f, 1e-3f, 1e-3f); - } - } - - // repeat - { - int64_t ne2[4]; - get_random_dims(ne2, 4); - - ne2[0] = ne[0] * ne2[0]; - ne2[1] = ne[1] * ne2[1]; - ne2[2] = 1; - ne2[3] = 1; - - const int nargs = 1; - for (int ndims = 1; ndims <= 2; ++ndims) { - x[0] = get_random_tensor_f32(ctx0, ndims, ne, -1.0f, 1.0f); - x[1] = get_random_tensor_f32(ctx0, ndims, ne2, -1.0f, 1.0f); - ggml_set_param(ctx0, x[0]); - - struct ggml_tensor * f = ggml_sum(ctx0, ggml_sqr(ctx0, ggml_sub(ctx0, x[1], ggml_repeat(ctx0, x[0], x[1])))); - - check_gradient("repeat", ctx0, x, f, ndims, nargs, 1e-3f, 1e-2f, INFINITY); - } - } - - // repeat back - { - int64_t ne2[4]; - get_random_dims(ne2, 4); - - ne2[0] = ne[0] * ne2[0]; - ne2[1] = ne[1] * ne2[1]; - ne2[2] = 1; - ne2[3] = 1; - - const int nargs = 1; - for (int ndims = 1; ndims <= 2; ++ndims) { - x[0] = get_random_tensor_f32(ctx0, ndims, ne, -1.0f, 1.0f); - x[1] = get_random_tensor_f32(ctx0, ndims, ne2, -1.0f, 1.0f); - ggml_set_param(ctx0, x[0]); - - struct ggml_tensor * f = ggml_sum(ctx0, ggml_sqr(ctx0, ggml_sub(ctx0, x[0], ggml_repeat_back(ctx0, x[1], x[0])))); - - check_gradient("repeat back", ctx0, x, f, ndims, nargs, 1e-3f, 1e-2f, INFINITY); - } - } - - // abs (finite differences do not work) - //{ - // const int nargs = 1; - - // for (int ndims = 1; ndims <= 2; ++ndims) { - // for (int i = 0; i < nargs; ++i) { - // x[i] = get_random_tensor_f32(ctx0, ndims, ne, -1.0f, 1.0f); - // ggml_set_param(ctx0, x[i]); - // } - - // struct ggml_tensor * f = ggml_sum(ctx0, ggml_abs(ctx0, x[0])); - - // check_gradient("abs", ctx0, x, f, ndims, nargs, 1e-3f, INFINITY, 1e-3f); - // } - //} - - // sgn - { - const int nargs = 1; - - for (int ndims = 1; ndims <= 4; ++ndims) { - for (int i = 0; i < nargs; ++i) { - x[i] = get_random_tensor_f32(ctx0, ndims, ne, -1.0f, 1.0f); - ggml_set_param(ctx0, x[i]); - } - - struct ggml_tensor* f = ggml_sum(ctx0, ggml_sgn(ctx0, x[0])); - - check_gradient("sgn", ctx0, x, f, ndims, nargs, 1e-3f, 1e-3f, 1e-3f); - } - } - - // neg - { - const int nargs = 1; - - for (int ndims = 1; ndims <= 4; ++ndims) { - for (int i = 0; i < nargs; ++i) { - x[i] = get_random_tensor_f32(ctx0, ndims, ne, -1.0f, 1.0f); - ggml_set_param(ctx0, x[i]); - } - - struct ggml_tensor* f = ggml_sum(ctx0, ggml_neg(ctx0, x[0])); - - check_gradient("neg", ctx0, x, f, ndims, nargs, 1e-3f, 1e-3f, 1e-3f); - } - } - - // step - { - const int nargs = 1; - - for (int ndims = 1; ndims <= 4; ++ndims) { - for (int i = 0; i < nargs; ++i) { - x[i] = get_random_tensor_f32(ctx0, ndims, ne, -1.0f, 1.0f); - ggml_set_param(ctx0, x[i]); - } - - struct ggml_tensor* f = ggml_sum(ctx0, ggml_step(ctx0, x[0])); - - check_gradient("step", ctx0, x, f, ndims, nargs, 1e-3f, 1e-3f, 1e-3f); - } - } - - // tanh, not yet fully implemented - if(0) - { - const int nargs = 1; - - for (int ndims = 1; ndims <= 4; ++ndims) { - for (int i = 0; i < nargs; ++i) { - x[i] = get_random_tensor_f32(ctx0, ndims, ne, -1.0f, 1.0f); - ggml_set_param(ctx0, x[i]); - } - - struct ggml_tensor* f = ggml_sum(ctx0, ggml_tanh(ctx0, x[0])); - - check_gradient("tanh", ctx0, x, f, ndims, nargs, 1e-3f, 1e-3f, 1e-3f); - } - } - - // mul_mat - { - const int nargs = 2; - - for (int ndims = 2; ndims <= 2; ++ndims) { - x[0] = get_random_tensor_f32(ctx0, ndims, ne, -1.0f, 1.0f); - { - int64_t ne2[4]; - get_random_dims(ne2, 4); - ne2[0] = ne[0]; - x[1] = get_random_tensor_f32(ctx0, ndims, ne2, -1.0f, 1.0f); - } - - ggml_set_param(ctx0, x[0]); - ggml_set_param(ctx0, x[1]); - - struct ggml_tensor * m = ggml_mul_mat(ctx0, x[1], x[0]); - struct ggml_tensor * f = ggml_sum(ctx0, m); - - GGML_PRINT_DEBUG("testing: mul_mat, [%lld, %lld] (%d) * [%lld, %lld] (%d)\n", x[1]->ne[0], x[1]->ne[1], x[1]->n_dims, x[0]->ne[0], x[0]->ne[1], x[0]->n_dims); - - check_gradient("mul_mat", ctx0, x, f, ndims, nargs, 1e-3f, 1e-3f, INFINITY); - check_mat_mul(m, x[1], x[0]); - } - } - - // elu, not yet fully implemented - if(0) - { - const int nargs = 1; - - for (int ndims = 1; ndims <= 4; ++ndims) { - for (int i = 0; i < nargs; ++i) { - x[i] = get_random_tensor_f32(ctx0, ndims, ne, -1.0f, 1.0f); - ggml_set_param(ctx0, x[i]); - } - - struct ggml_tensor* f = ggml_sum(ctx0, ggml_elu(ctx0, x[0])); - - check_gradient("elu", ctx0, x, f, ndims, nargs, 1e-3f, 1e-3f, 1e-3f); - } - } - - // relu - { - const int nargs = 1; - - for (int ndims = 1; ndims <= 4; ++ndims) { - for (int i = 0; i < nargs; ++i) { - x[i] = get_random_tensor_f32(ctx0, ndims, ne, -1.0f, 1.0f); - ggml_set_param(ctx0, x[i]); - } - - struct ggml_tensor* f = ggml_sum(ctx0, ggml_relu(ctx0, x[0])); - - check_gradient("relu", ctx0, x, f, ndims, nargs, 1e-3f, 1e-3f, INFINITY); - } - } - - // gelu, not yet fully implemented - if(0) - { - const int nargs = 1; - - for (int ndims = 1; ndims <= 4; ++ndims) { - for (int i = 0; i < nargs; ++i) { - x[i] = get_random_tensor_f32(ctx0, ndims, ne, -1.0f, 1.0f); - ggml_set_param(ctx0, x[i]); - } - - struct ggml_tensor* f = ggml_sum(ctx0, ggml_gelu(ctx0, x[0])); - - check_gradient("gelu", ctx0, x, f, ndims, nargs, 1e-3f, 1e-3f, 1e-3f); - } - } - - // silu - { - const int nargs = 1; - - for (int ndims = 1; ndims <= 2; ++ndims) { - for (int i = 0; i < nargs; ++i) { - x[i] = get_random_tensor_f32(ctx0, ndims, ne, -1.0f, 1.0f); - ggml_set_param(ctx0, x[i]); - } - - struct ggml_tensor * f = ggml_sum(ctx0, ggml_silu(ctx0, x[0])); - -#ifdef GGML_SILU_FP16 - // due to GGML_SILU_FP16 the finite difference method will be slightly wrong -> increase error bounds. - check_gradient("silu", ctx0, x, f, ndims, nargs, 1e-3f, 0.5, INFINITY); -#else - check_gradient("silu", ctx0, x, f, ndims, nargs, 1e-3f, 1e-3f, INFINITY); -#endif - } - } - - // rms_norm - { - const int nargs = 1; - - for (int ndims = 1; ndims <= 2; ++ndims) { - for (int i = 0; i < nargs; ++i) { - x[i] = get_random_tensor_f32(ctx0, ndims, ne, -1.0f, 1.0f); - ggml_set_param(ctx0, x[i]); - } - - struct ggml_tensor * f = ggml_sum(ctx0, ggml_rms_norm(ctx0, x[0], 1e-6f)); - - check_gradient("rms_norm", ctx0, x, f, ndims, nargs, 1e-4f, 1.0f, INFINITY); - } - } - - // scale - { - const int nargs = 2; - - int64_t ne2[4]; - ne2[0] = 1; - - for (int ndims = 1; ndims <= 2; ++ndims) { - x[1] = get_random_tensor_f32(ctx0, 1, ne2, -1.0f, 1.0f); - x[0] = get_random_tensor_f32(ctx0, ndims, ne, -1.0f, 1.0f); - - ggml_set_param(ctx0, x[0]); - ggml_set_param(ctx0, x[1]); - - struct ggml_tensor * f = ggml_sum(ctx0, ggml_scale(ctx0, x[0], x[1])); - - check_gradient("scale", ctx0, x, f, ndims, nargs, 1e-3f, 1e-3f, INFINITY); - } - } - - // cpy f32 - { - const int nargs = 2; - - for (int ndims = 1; ndims <= 2; ++ndims) { - for (int i = 0; i < nargs; ++i) { - x[i] = get_random_tensor_f32(ctx0, ndims, ne, -1.0f, 1.0f); - ggml_set_param(ctx0, x[i]); - } - // x[1] is overwritten by x[0], so the gradients don't propagate to x[1] - - struct ggml_tensor * f = ggml_sum(ctx0, ggml_cpy(ctx0, x[0], x[1])); - - check_gradient("cpy f32", ctx0, x, f, ndims, nargs, 1e-3f, 1e-3f, INFINITY); - } - } - - // cpy f16 - { - const int nargs = 2; - - for (int ndims = 1; ndims <= 2; ++ndims) { - for (int i = 0; i < nargs; ++i) { - x[i] = get_random_tensor_f16(ctx0, ndims, ne, -1.0f, 1.0f); - ggml_set_param(ctx0, x[i]); - } - // x[1] is overwritten by x[0], so the gradients don't propagate to x[1] - - struct ggml_tensor * f = ggml_sum(ctx0, ggml_cpy(ctx0, x[0], x[1])); - - check_gradient("cpy f16", ctx0, x, f, ndims, nargs, 1e-1f, 1e-1f, INFINITY); - } - } - - // reshape (1d->nd) - { - const int nargs = 1; - - for (int ndims = 1; ndims <= 2; ++ndims) { - int64_t ne2[4]; - ne2[0] = 1; - ne2[1] = 1; - ne2[2] = 1; - ne2[3] = 1; - for (int i = 0; i < ndims; ++i) { - ne2[0] *= ne[i]; - } - x[0] = get_random_tensor_f32(ctx0, 1, ne2, -1.0f, 1.0f); - x[1] = get_random_tensor_f32(ctx0, ndims, ne, -1.0f, 1.0f); - ggml_set_param(ctx0, x[0]); - - - struct ggml_tensor * f = ggml_sum(ctx0, ggml_reshape(ctx0, x[0], x[1])); - check_gradient("reshape", ctx0, x, f, ndims, nargs, 1e-3f, 1e-3f, INFINITY); - } - } - - // reshape (nd->1d) - { - const int nargs = 1; - - for (int ndims = 1; ndims <= 2; ++ndims) { - int64_t ne2[4]; - ne2[0] = 1; - ne2[1] = 1; - ne2[2] = 1; - ne2[3] = 1; - for (int i = 0; i < ndims; ++i) { - ne2[0] *= ne[i]; - } - x[0] = get_random_tensor_f32(ctx0, ndims, ne, -1.0f, 1.0f); - x[1] = get_random_tensor_f32(ctx0, 1, ne2, -1.0f, 1.0f); - ggml_set_param(ctx0, x[0]); - - - struct ggml_tensor * f = ggml_sum(ctx0, ggml_reshape(ctx0, x[0], x[1])); - check_gradient("reshape", ctx0, x, f, ndims, nargs, 1e-3f, 1e-3f, INFINITY); - } - } - - // acc 1d - { - int64_t ne2[4] = { 1, 1, 1, 1 }; - - const int nargs = 2; - for (int ndims = 1; ndims <= 4; ++ndims) { - - x[0] = get_random_tensor_f32(ctx0, ndims, ne, -1.0f, 1.0f); - ggml_set_param(ctx0, x[0]); - - get_random_dims(ne2, 1); - while ((ne2[0] > ne[0]) || (ne2[0] > ggml_nelements(x[0]))) { - get_random_dims(ne2, 1); - } - - x[1] = get_random_tensor_f32(ctx0, 1, ne2, -1.0f, 1.0f); - ggml_set_param(ctx0, x[1]); - - const int max_offset = MAX(0, ggml_nelements(x[0]) - ggml_nelements(x[1])); - const int offset = irand(max_offset) * ggml_element_size(x[0]); - - struct ggml_tensor * f = ggml_sum(ctx0, ggml_acc(ctx0, x[0], x[1], x[0]->nb[1], x[0]->nb[2], x[0]->nb[3], offset)); - - check_gradient("acc 1d", ctx0, x, f, ndims, nargs, 1e-3f, 1e-3f, INFINITY); - } - } - - // acc 2d - { - int64_t ne2[4] = { 1, 1, 1, 1 }; - int64_t max_offsets[4] = { 0, 0, 0, 0 }; - int64_t offsets[4] = { 0, 0, 0, 0 }; - - const int nargs = 2; - for (int ndims = 2; ndims <= 4; ++ndims) { - - x[0] = get_random_tensor_f32(ctx0, ndims, ne, -1.0f, 1.0f); - ggml_set_param(ctx0, x[0]); - - get_random_dims(ne2, 2); - while ((ne2[0] > ne[0]) || (ne2[1] > ne[1]) || (ne2[0]*ne2[1] > ggml_nelements(x[0]))) { - get_random_dims(ne2, 2); - } - - x[1] = get_random_tensor_f32(ctx0, 2, ne2, -1.0f, 1.0f); - ggml_set_param(ctx0, x[1]); - - max_offsets[0] = MAX(0, x[0]->ne[0] - x[1]->ne[0]); - max_offsets[1] = MAX(0, x[0]->ne[1] - x[1]->ne[1]); - offsets[0] = irand(max_offsets[0]) * x[0]->nb[0]; - offsets[1] = irand(max_offsets[1]) * x[0]->nb[1]; - const int offset = offsets[0] + offsets[1]; - - struct ggml_tensor * f = ggml_sum(ctx0, ggml_acc(ctx0, x[0], x[1], x[0]->nb[1], x[0]->nb[2], x[0]->nb[3], offset)); - - check_gradient("acc 2d", ctx0, x, f, ndims, nargs, 1e-3f, 1e-3f, INFINITY); - } - } - - // acc 3d - { - int64_t ne2[4] = { 1, 1, 1, 1 }; - int64_t max_offsets[4] = { 0, 0, 0, 0 }; - int64_t offsets[4] = { 0, 0, 0, 0 }; - - const int nargs = 2; - for (int ndims = 3; ndims <= 4; ++ndims) { - - x[0] = get_random_tensor_f32(ctx0, ndims, ne, -1.0f, 1.0f); - ggml_set_param(ctx0, x[0]); - - get_random_dims(ne2, 3); - while ((ne2[0] > ne[0]) || (ne2[1] > ne[1]) || (ne2[2] > ne[2]) || (ne2[0]*ne2[1]*ne2[2] > ggml_nelements(x[0]))) { - get_random_dims(ne2, 3); - } - - x[1] = get_random_tensor_f32(ctx0, 3, ne2, -1.0f, 1.0f); - ggml_set_param(ctx0, x[1]); - - max_offsets[0] = MAX(0, x[0]->ne[0] - x[1]->ne[0]); - max_offsets[1] = MAX(0, x[0]->ne[1] - x[1]->ne[1]); - max_offsets[2] = MAX(0, x[0]->ne[2] - x[1]->ne[2]); - offsets[0] = irand(max_offsets[0]) * x[0]->nb[0]; - offsets[1] = irand(max_offsets[1]) * x[0]->nb[1]; - offsets[2] = irand(max_offsets[2]) * x[0]->nb[2]; - const int offset = offsets[0] + offsets[1] + offsets[2]; - - struct ggml_tensor * f = ggml_sum(ctx0, ggml_acc(ctx0, x[0], x[1], x[0]->nb[1], x[0]->nb[2], x[0]->nb[3], offset)); - - check_gradient("acc 3d", ctx0, x, f, ndims, nargs, 1e-3f, 1e-3f, INFINITY); - } - } - - // acc 4d - { - int64_t ne2[4] = { 1, 1, 1, 1 }; - int64_t max_offsets[4] = { 0, 0, 0, 0 }; - int64_t offsets[4] = { 0, 0, 0, 0 }; - - const int nargs = 2; - for (int ndims = 4; ndims <= 4; ++ndims) { - - x[0] = get_random_tensor_f32(ctx0, ndims, ne, -1.0f, 1.0f); - ggml_set_param(ctx0, x[0]); - - get_random_dims(ne2, 4); - while ((ne2[0] > ne[0]) || (ne2[1] > ne[1]) || (ne2[2] > ne[2]) || (ne2[3] > ne[3]) || (ne2[0]*ne2[1]*ne2[2]*ne2[3] > ggml_nelements(x[0]))) { - get_random_dims(ne2, 4); - } - - x[1] = get_random_tensor_f32(ctx0, 4, ne2, -1.0f, 1.0f); - ggml_set_param(ctx0, x[1]); - - max_offsets[0] = MAX(0, x[0]->ne[0] - x[1]->ne[0]); - max_offsets[1] = MAX(0, x[0]->ne[1] - x[1]->ne[1]); - max_offsets[2] = MAX(0, x[0]->ne[2] - x[1]->ne[2]); - max_offsets[3] = MAX(0, x[0]->ne[3] - x[1]->ne[3]); - offsets[0] = irand(max_offsets[0]) * x[0]->nb[0]; - offsets[1] = irand(max_offsets[1]) * x[0]->nb[1]; - offsets[2] = irand(max_offsets[2]) * x[0]->nb[2]; - offsets[3] = irand(max_offsets[3]) * x[0]->nb[3]; - const int offset = offsets[0] + offsets[1] + offsets[2] + offsets[3]; - - struct ggml_tensor * f = ggml_sum(ctx0, ggml_acc(ctx0, x[0], x[1], x[0]->nb[1], x[0]->nb[2], x[0]->nb[3], offset)); - - check_gradient("acc 4d", ctx0, x, f, ndims, nargs, 1e-3f, 1e-3f, INFINITY); - } - } - - // set_1d - { - int64_t ne2[4]; - - const int nargs = 2; - for (int ndims = 1; ndims <= 4; ++ndims) { - - x[0] = get_random_tensor_f32(ctx0, ndims, ne, -1.0f, 1.0f); - ggml_set_param(ctx0, x[0]); - - get_random_dims(ne2, 1); - while ((ne2[0] > ne[0]) || (ne2[0] > ggml_nelements(x[0]))) { - get_random_dims(ne2, 1); - } - - x[1] = get_random_tensor_f32(ctx0, 1, ne2, -1.0f, 1.0f); - ggml_set_param(ctx0, x[1]); - - const int max_offset = MAX(0, ggml_nelements(x[0]) - ggml_nelements(x[1])); - const int offset = irand(max_offset) * ggml_element_size(x[0]); - - struct ggml_tensor * f = ggml_sum(ctx0, ggml_set_1d(ctx0, x[0], x[1], offset)); - - check_gradient("set_1d", ctx0, x, f, ndims, nargs, 1e-3f, 1e-3f, INFINITY); - } - } - - // set_2d - { - int64_t ne2[4]; - int64_t max_offsets[4] = { 0, 0, 0, 0 }; - int64_t offsets[4] = { 0, 0, 0, 0 }; - - const int nargs = 1; - for (int ndims = 2; ndims <= 4; ++ndims) { - - x[0] = get_random_tensor_f32(ctx0, ndims, ne, -1.0f, 1.0f); - ggml_set_param(ctx0, x[0]); - - get_random_dims(ne2, 2); - while ((ne2[0] > ne[0]) || (ne2[1] > ne[1]) || (ne2[0]*ne2[1] > ggml_nelements(x[0]))) { - get_random_dims(ne2, 2); - } - - x[1] = get_random_tensor_f32(ctx0, 2, ne2, -1.0f, 1.0f); - ggml_set_param(ctx0, x[1]); - - max_offsets[0] = MAX(0, x[0]->ne[0] - x[1]->ne[0]); - max_offsets[1] = MAX(0, x[0]->ne[1] - x[1]->ne[1]); - offsets[0] = irand(max_offsets[0]) * x[0]->nb[0]; - offsets[1] = irand(max_offsets[1]) * x[0]->nb[1]; - const int offset = offsets[0] + offsets[1]; - - struct ggml_tensor * f = ggml_sum(ctx0, ggml_set_2d(ctx0, x[0], x[1], x[1]->nb[1], offset)); - - check_gradient("set_2d", ctx0, x, f, ndims, nargs, 1e-3f, 1e-3f, INFINITY); - } - } - - // view_1d - { - const int nargs = 1; - for (int ndims = 1; ndims <= 4; ++ndims) { - - x[0] = get_random_tensor_f32(ctx0, ndims, ne, -1.0f, 1.0f); - - ggml_set_param(ctx0, x[0]); - - const int k0 = irand(ggml_nelements(x[0])); - const int k1 = irand(ggml_nelements(x[0])); - const int i0 = MIN(k0, k1); - const int i1 = MAX(k0, k1); - - const int offset = i0 * sizeof(float); - const int nelem = i1 - i0; - - struct ggml_tensor * f = ggml_sum(ctx0, ggml_view_1d(ctx0, x[0], nelem, offset)); - - check_gradient("view_1d", ctx0, x, f, ndims, nargs, 1e-3f, 1e-3f, INFINITY); - } - } - - // view_2d - { - int64_t ne2[4]; - int64_t nb2[4]; - - const int nargs = 1; - for (int ndims = 1; ndims <= 4; ++ndims) { - - x[0] = get_random_tensor_f32(ctx0, ndims, ne, -1.0f, 1.0f); - - get_random_dims(ne2, 2); - while (ne2[0]*ne2[1] > ggml_nelements(x[0])) { - get_random_dims(ne2, 2); - } - const int count = ne2[0]*ne2[1]; - - nb2[0] = sizeof(float); - nb2[1] = nb2[0]*ne2[0]; - - ggml_set_param(ctx0, x[0]); - - const int max_offset = ggml_nelements(x[0]) - count; - const int offset = irand(max_offset+1) * sizeof(float); - - struct ggml_tensor * f = ggml_sum(ctx0, ggml_view_2d(ctx0, x[0], ne2[0], ne2[1], nb2[1], offset)); - - check_gradient("view_2d", ctx0, x, f, ndims, nargs, 1e-3f, 1e-3f, INFINITY); - } - } - - // view_3d - { - int64_t ne2[4] = {1,1,1,1}; - int64_t nb2[4] = {0,0,0,0}; - - const int nargs = 1; - for (int ndims = 1; ndims <= 4; ++ndims) { - - x[0] = get_random_tensor_f32(ctx0, ndims, ne, -1.0f, 1.0f); - - get_random_dims(ne2, 3); - while (ne2[0]*ne2[1]*ne2[2] > ggml_nelements(x[0])) { - get_random_dims(ne2, 3); - } - const int count = ne2[0]*ne2[1]*ne2[2]; - - nb2[0] = sizeof(float); - nb2[1] = nb2[0]*ne2[0]; - nb2[2] = nb2[1]*ne2[1]; - - ggml_set_param(ctx0, x[0]); - - const int max_offset = ggml_nelements(x[0]) - count; - const int offset = irand(max_offset+1) * sizeof(float); - - struct ggml_tensor * f = ggml_sum(ctx0, ggml_view_3d(ctx0, x[0], ne2[0], ne2[1], ne2[2], nb2[1], nb2[2], offset)); - - check_gradient("view_3d", ctx0, x, f, ndims, nargs, 1e-3f, 1e-3f, INFINITY); - } - } - - // permute - { - int64_t ne2[4]; - - const int nargs = 1; - for (int ndims = 1; ndims <= 4; ++ndims) - { - // ggml_permute will set axes of dimensions below n_dims to 1. - // to make ggml_permute work correctly on all axes, - // the input tensor needs maximal n_dim of 4. - for (int i=0; i +#include +#include +#include + +#if defined(_MSC_VER) +#pragma warning(disable: 4244 4267) // possible loss of data +#endif + +#if defined(__GNUC__) +#pragma GCC diagnostic ignored "-Wdouble-promotion" +#endif + +#define MAX_NARGS 3 + +#undef MIN +#undef MAX +#define MIN(a, b) ((a) < (b) ? (a) : (b)) +#define MAX(a, b) ((a) > (b) ? (a) : (b)) + +#define GGML_SILU_FP16 + +// +// logging +// + +#if (GGML_DEBUG >= 1) +#define GGML_PRINT_DEBUG(...) printf(__VA_ARGS__) +#else +#define GGML_PRINT_DEBUG(...) +#endif + +#if (GGML_DEBUG >= 5) +#define GGML_PRINT_DEBUG_5(...) printf(__VA_ARGS__) +#else +#define GGML_PRINT_DEBUG_5(...) +#endif + +#if (GGML_DEBUG >= 10) +#define GGML_PRINT_DEBUG_10(...) printf(__VA_ARGS__) +#else +#define GGML_PRINT_DEBUG_10(...) +#endif + +#define GGML_PRINT(...) printf(__VA_ARGS__) + +static float frand(void) { + return (float)rand()/(float)RAND_MAX; +} + +static int irand(int n) { + if (n == 0) return 0; + return rand()%n; +} + +static void get_random_dims(int64_t * dims, int ndims) { + dims[0] = dims[1] = dims[2] = dims[3] = 1; + + for (int i = 0; i < ndims; i++) { + dims[i] = 1 + irand(4); + } +} + +static struct ggml_tensor * get_random_tensor_f32( + struct ggml_context * ctx0, + int ndims, + int64_t ne[], + float fmin, + float fmax) { + struct ggml_tensor * result = ggml_new_tensor(ctx0, GGML_TYPE_F32, ndims, ne); + + switch (ndims) { + case 1: + for (int i0 = 0; i0 < ne[0]; i0++) { + ((float *)result->data)[i0] = frand()*(fmax - fmin) + fmin; + } + break; + case 2: + for (int i1 = 0; i1 < ne[1]; i1++) { + for (int i0 = 0; i0 < ne[0]; i0++) { + ((float *)result->data)[i1*ne[0] + i0] = frand()*(fmax - fmin) + fmin; + } + } + break; + case 3: + for (int i2 = 0; i2 < ne[2]; i2++) { + for (int i1 = 0; i1 < ne[1]; i1++) { + for (int i0 = 0; i0 < ne[0]; i0++) { + ((float *)result->data)[i2*ne[1]*ne[0] + i1*ne[0] + i0] = frand()*(fmax - fmin) + fmin; + } + } + } + break; + case 4: + for (int i3 = 0; i3 < ne[3]; i3++) { + for (int i2 = 0; i2 < ne[2]; i2++) { + for (int i1 = 0; i1 < ne[1]; i1++) { + for (int i0 = 0; i0 < ne[0]; i0++) { + ((float *)result->data)[i3*ne[2]*ne[1]*ne[0] + i2*ne[1]*ne[0] + i1*ne[0] + i0] = frand()*(fmax - fmin) + fmin; + } + } + } + } + break; + default: + assert(false); + }; + + return result; +} + +static struct ggml_tensor * get_random_tensor_f16( + struct ggml_context * ctx0, + int ndims, + int64_t ne[], + float fmin, + float fmax) { + struct ggml_tensor * result = ggml_new_tensor(ctx0, GGML_TYPE_F16, ndims, ne); + + switch (ndims) { + case 1: + for (int i0 = 0; i0 < ne[0]; i0++) { + ((ggml_fp16_t *)result->data)[i0] = ggml_fp32_to_fp16(frand()*(fmax - fmin) + fmin); + } + break; + case 2: + for (int i1 = 0; i1 < ne[1]; i1++) { + for (int i0 = 0; i0 < ne[0]; i0++) { + ((ggml_fp16_t *)result->data)[i1*ne[0] + i0] = ggml_fp32_to_fp16(frand()*(fmax - fmin) + fmin); + } + } + break; + case 3: + for (int i2 = 0; i2 < ne[2]; i2++) { + for (int i1 = 0; i1 < ne[1]; i1++) { + for (int i0 = 0; i0 < ne[0]; i0++) { + ((ggml_fp16_t *)result->data)[i2*ne[1]*ne[0] + i1*ne[0] + i0] = ggml_fp32_to_fp16(frand()*(fmax - fmin) + fmin); + } + } + } + break; + case 4: + for (int i3 = 0; i3 < ne[3]; i3++) { + for (int i2 = 0; i2 < ne[2]; i2++) { + for (int i1 = 0; i1 < ne[1]; i1++) { + for (int i0 = 0; i0 < ne[0]; i0++) { + ((ggml_fp16_t *)result->data)[i3*ne[2]*ne[1]*ne[0] + i2*ne[1]*ne[0] + i1*ne[0] + i0] = ggml_fp32_to_fp16(frand()*(fmax - fmin) + fmin); + } + } + } + } + break; + default: + assert(false); + }; + + return result; +} + +static struct ggml_tensor * get_random_tensor_i32( + struct ggml_context * ctx0, + int ndims, + int64_t ne[], + int32_t imin, + int32_t imax) { + struct ggml_tensor * result = ggml_new_tensor(ctx0, GGML_TYPE_I32, ndims, ne); + + switch (ndims) { + case 1: + for (int i0 = 0; i0 < ne[0]; i0++) { + ((int32_t *)result->data)[i0] = irand(imax - imin) + imin; + } + break; + case 2: + for (int i1 = 0; i1 < ne[1]; i1++) { + for (int i0 = 0; i0 < ne[0]; i0++) { + ((int32_t *)result->data)[i1*ne[0] + i0] = irand(imax - imin) + imin; + } + } + break; + case 3: + for (int i2 = 0; i2 < ne[2]; i2++) { + for (int i1 = 0; i1 < ne[1]; i1++) { + for (int i0 = 0; i0 < ne[0]; i0++) { + ((int32_t *)result->data)[i2*ne[1]*ne[0] + i1*ne[0] + i0] = irand(imax - imin) + imin; + } + } + } + break; + case 4: + for (int i3 = 0; i3 < ne[3]; i3++) { + for (int i2 = 0; i2 < ne[2]; i2++) { + for (int i1 = 0; i1 < ne[1]; i1++) { + for (int i0 = 0; i0 < ne[0]; i0++) { + ((int32_t *)result->data)[i3*ne[2]*ne[1]*ne[0] + i2*ne[1]*ne[0] + i1*ne[0] + i0] = irand(imax - imin) + imin; + } + } + } + } + break; + default: + assert(false); + }; + + return result; +} + +static void print_elements(const char* label, const struct ggml_tensor * t) { + if (!t) { + printf("%s: %s = null\n", __func__, label); + return; + } + const int nelements = ggml_nelements(t); + printf("%s: %s = [", __func__, label); + for (int k = 0; k < nelements; ++k) { + if (k > 0) { printf(", "); } + printf("%.5f", ggml_get_f32_1d(t, k)); + } + printf("] shape: ["); + for (int k = 0; k < t->n_dims; ++k) { + if (k > 0) { printf(", "); } + printf("%d", (int)t->ne[k]); + } + printf("]\n"); + +} + +static bool check_gradient( + const char * op_name, + struct ggml_context * ctx0, + struct ggml_tensor * x[], + struct ggml_tensor * f, + int ndims, + int nargs, + float eps, + float max_error_abs, + float max_error_rel) { + + static int n_threads = -1; + if (n_threads < 0) { + n_threads = GGML_DEFAULT_N_THREADS; + + const char *env = getenv("GGML_N_THREADS"); + if (env) { + n_threads = atoi(env); + } + + printf("GGML_N_THREADS = %d\n", n_threads); + } + + struct ggml_cgraph gf = ggml_build_forward (f); + struct ggml_cgraph gb = ggml_build_backward(ctx0, &gf, false); + + ggml_graph_compute_with_ctx(ctx0, &gf, n_threads); + + ggml_graph_reset (&gf); + ggml_set_f32 (f->grad, 1.0f); + + ggml_graph_compute_with_ctx(ctx0, &gb, n_threads); + + // ggml_graph_dump_dot(&gf, NULL, "test-grad0-forward.dot"); + // ggml_graph_dump_dot(&gb, &gf, "test-grad0-backward.dot"); + + for (int i = 0; i < nargs; ++i) { + const int nelements = ggml_nelements(x[i]); + for (int k = 0; k < nelements; ++k) { + // compute gradient using finite differences + const float x0 = ggml_get_f32_1d(x[i], k); + const float xm = x0 - eps; + const float xp = x0 + eps; + ggml_set_f32_1d(x[i], k, xp); + + ggml_graph_compute_with_ctx(ctx0, &gf, n_threads); + + const float f0 = ggml_get_f32_1d(f, 0); + + ggml_set_f32_1d(x[i], k, xm); + + ggml_graph_compute_with_ctx(ctx0, &gf, n_threads); + + const float f1 = ggml_get_f32_1d(f, 0); + const float g0 = (f0 - f1)/(2.0f*eps); + + ggml_set_f32_1d(x[i], k, x0); + + // compute gradient using backward graph + ggml_graph_reset (&gf); + ggml_set_f32 (f->grad, 1.0f); + + ggml_graph_compute_with_ctx(ctx0, &gb, n_threads); + + const float g1 = ggml_get_f32_1d(x[i]->grad, k); + + const float error_abs = fabsf(g0 - g1); + const float error_rel = g0 != 0 ? fabsf(g0 - g1)/fabsf(g0) : 0; + + if (error_abs > max_error_abs || error_rel > max_error_rel) { + printf("%s: ndims=%d, i=%d, k=%d, x0=%f, xm=%f, xp=%f, f0=%f, f1=%f, g0=%f, g1=%f, eps=%f, error_abs=%f, error_rel=%f\n", + op_name, ndims, i, k, x0, xm, xp, f0, f1, g0, g1, eps, error_abs, error_rel); + //assert(false); + return false; + } + } + } + + return true; +} + +// TODO: clean-up this .. +static bool check_mat_mul( + const struct ggml_tensor * y, + const struct ggml_tensor * x0, + const struct ggml_tensor * x1) { + float * dst = (float *) y->data; + float * src0 = (float *) x0->data; + float * src1 = (float *) x1->data; + + const int nc = x0->ne[1]; + const int nr = x1->ne[1]; + const int nk = x0->ne[0]; + + GGML_PRINT_DEBUG("check_mat_mul: nc=%d, nr=%d, nk=%d\n", nc, nr, nk); + + GGML_PRINT_DEBUG("x0:\n"); + for (int j = 0; j < x0->ne[1]; ++j) { + for (int i = 0; i < x0->ne[0]; ++i) { + GGML_PRINT_DEBUG("%6.3f ", src0[j*nk + i]); + } + GGML_PRINT_DEBUG("\n"); + } + GGML_PRINT_DEBUG("\n"); + + GGML_PRINT_DEBUG("x1:\n"); + for (int j = 0; j < x1->ne[1]; ++j) { + for (int i = 0; i < x1->ne[0]; ++i) { + GGML_PRINT_DEBUG("%6.3f ", src1[j*nk + i]); + } + GGML_PRINT_DEBUG("\n"); + } + GGML_PRINT_DEBUG("\n"); + + GGML_PRINT_DEBUG("y: n_dims = %d, (%lld, %lld)\n", y->n_dims, y->ne[0], y->ne[1]); + for (int j = 0; j < y->ne[1]; ++j) { + for (int i = 0; i < y->ne[0]; ++i) { + GGML_PRINT_DEBUG("%6.3f ", dst[j*nr + i]); + } + GGML_PRINT_DEBUG("\n"); + } + + for (int i = 0; i < nr; ++i) { + for (int j = 0; j < nc; ++j) { + float sum = 0.0f; + + for (int k = 0; k < nk; ++k) { + sum += src0[j*nk + k]*src1[i*nk + k]; + } + + if (fabsf(dst[i*nc + j] - sum) > 1e-5f) { + fprintf(stderr, "check_mat_mul: dst[%d] = %f, sum = %f\n", i*nc + j, dst[i*nc + j], sum); + assert(false); + return false; + } + } + } + + return true; +} + +#define NUM_PERMUTATIONS (4*3*2*1) + +int main(int argc, const char ** argv) { + struct ggml_init_params params = { + /* .mem_size = */ 128*1024*1024, + /* .mem_buffer = */ NULL, + /* .no_alloc = */ false, + }; + + int64_t ne[4]; + + int all_permutations[4 * NUM_PERMUTATIONS]; + { + int count = 0; + for (int ax0=0; ax0<4; ++ax0) { + for (int ax1=0; ax1<4; ++ax1) { + if (ax1 == ax0) continue; + for (int ax2=0; ax2<4; ++ax2) { + if (ax2 == ax0) continue; + if (ax2 == ax1) continue; + for (int ax3=0; ax3<4; ++ax3) { + if (ax3 == ax0) continue; + if (ax3 == ax1) continue; + if (ax3 == ax2) continue; + assert(count < NUM_PERMUTATIONS); + all_permutations[count*4+0] = ax0; + all_permutations[count*4+1] = ax1; + all_permutations[count*4+2] = ax2; + all_permutations[count*4+3] = ax3; + ++count; + } + } + } + } + } + + + // original loop: 1000 + int niter = 4; + const char *env = getenv("GGML_NLOOP"); + if (env != NULL) { + niter = atoi(env); + } + if (argc > 1) { + niter = atoi(argv[1]); + } + for (int iter = 0; iter < niter; ++iter) { + printf("test-grad0: iter:%d/%d\n", iter, niter); + struct ggml_context * ctx0 = ggml_init(params); + + get_random_dims(ne, 4); + + struct ggml_tensor * x[MAX_NARGS]; + + // add f32 + { + const int nargs = 2; + + for (int ndims = 1; ndims <= 4; ++ndims) { + for (int i = 0; i < nargs; ++i) { + x[i] = get_random_tensor_f32(ctx0, ndims, ne, -1.0f, 1.0f); + ggml_set_param(ctx0, x[i]); + } + + struct ggml_tensor * f = ggml_sum(ctx0, ggml_add(ctx0, x[0], x[1])); + + check_gradient("add f32", ctx0, x, f, ndims, nargs, 1e-3f, 2e-3f, 2e-3f); + } + } + + // add f16 + { + const int nargs = 2; + + for (int ndims = 1; ndims <= 4; ++ndims) { + for (int i = 0; i < nargs; ++i) { + x[i] = get_random_tensor_f16(ctx0, ndims, ne, -1.0f, 1.0f); + ggml_set_param(ctx0, x[i]); + } + + struct ggml_tensor * f = ggml_sum(ctx0, ggml_add(ctx0, x[0], x[1])); + + check_gradient("add f16", ctx0, x, f, ndims, nargs, 1e-1f, 2e-1f, 2e-1f); + } + } + + // sub + { + const int nargs = 2; + + for (int ndims = 1; ndims <= 4; ++ndims) { + for (int i = 0; i < nargs; ++i) { + x[i] = get_random_tensor_f32(ctx0, ndims, ne, -1.0f, 1.0f); + ggml_set_param(ctx0, x[i]); + } + + struct ggml_tensor * f = ggml_sum(ctx0, ggml_sub(ctx0, x[0], x[1])); + + check_gradient("sub", ctx0, x, f, ndims, nargs, 1e-3f, 1e-3f, 1e-3f); + } + } + + // mul + { + const int nargs = 2; + + for (int ndims = 1; ndims <= 4; ++ndims) { + for (int i = 0; i < nargs; ++i) { + x[i] = get_random_tensor_f32(ctx0, ndims, ne, -1.0f, 1.0f); + ggml_set_param(ctx0, x[i]); + } + + struct ggml_tensor * f = ggml_sum(ctx0, ggml_mul(ctx0, x[0], x[1])); + + check_gradient("mul", ctx0, x, f, ndims, nargs, 1e-3f, 1e-3f, INFINITY); + } + } + + // div + { + const int nargs = 2; + + for (int ndims = 1; ndims <= 4; ++ndims) { + for (int i = 0; i < nargs; ++i) { + x[i] = get_random_tensor_f32(ctx0, ndims, ne, 0.5f, 1.0f); + ggml_set_param(ctx0, x[i]); + } + + struct ggml_tensor * f = ggml_sum(ctx0, ggml_div(ctx0, x[0], x[1])); + + check_gradient("div", ctx0, x, f, ndims, nargs, 1e-3f, 1e-1f, 1e-1f); + } + } + + // sqr + { + const int nargs = 1; + + for (int ndims = 1; ndims <= 2; ++ndims) { + for (int i = 0; i < nargs; ++i) { + x[i] = get_random_tensor_f32(ctx0, ndims, ne, -1.0f, 1.0f); + ggml_set_param(ctx0, x[i]); + } + + struct ggml_tensor * f = ggml_sum(ctx0, ggml_sqr(ctx0, x[0])); + + check_gradient("sqr", ctx0, x, f, ndims, nargs, 1e-3f, 1e-3f, INFINITY); + } + } + + // sqrt + { + const int nargs = 1; + + for (int ndims = 1; ndims <= 2; ++ndims) { + for (int i = 0; i < nargs; ++i) { + x[i] = get_random_tensor_f32(ctx0, ndims, ne, 2.0f*1e-3f, 1.0f); + ggml_set_param(ctx0, x[i]); + } + + struct ggml_tensor * f = ggml_sum(ctx0, ggml_sqrt(ctx0, x[0])); + + check_gradient("sqrt", ctx0, x, f, ndims, nargs, 1e-3f, INFINITY, 1e-1f); + } + } + + // log + { + const int nargs = 1; + + for (int ndims = 1; ndims <= 2; ++ndims) { + for (int i = 0; i < nargs; ++i) { + x[i] = get_random_tensor_f32(ctx0, ndims, ne, 2.0f*1e-3f, 1.0f); + ggml_set_param(ctx0, x[i]); + } + + struct ggml_tensor * f = ggml_sum(ctx0, ggml_log(ctx0, x[0])); + + check_gradient("log", ctx0, x, f, ndims, nargs, 1e-3f, INFINITY, 1e-1f); + } + } + + // sum + { + const int nargs = 1; + + for (int ndims = 1; ndims <= 2; ++ndims) { + for (int i = 0; i < nargs; ++i) { + x[i] = get_random_tensor_f32(ctx0, ndims, ne, -1.0f, 1.0f); + ggml_set_param(ctx0, x[i]); + } + + struct ggml_tensor * f = ggml_sum(ctx0, x[0]); + + check_gradient("sum", ctx0, x, f, ndims, nargs, 1e-3f, 1e-3f, 1e-3f); + } + } + + + // sum_rows + { + const int nargs = 1; + + for (int ndims = 1; ndims <= 4; ++ndims) { + for (int i = 0; i < nargs; ++i) { + x[i] = get_random_tensor_f32(ctx0, ndims, ne, -1.0f, 1.0f); + ggml_set_param(ctx0, x[i]); + } + + struct ggml_tensor * f = ggml_sum(ctx0, ggml_sqr(ctx0, ggml_sum_rows(ctx0, x[0]))); + + check_gradient("sum_rows", ctx0, x, f, ndims, nargs, 1e-3f, 1e-2f, INFINITY); + } + } + + // mean, not yet fully implemented + if(0) + { + const int nargs = 1; + + for (int ndims = 1; ndims <= 4; ++ndims) { + for (int i = 0; i < nargs; ++i) { + x[i] = get_random_tensor_f32(ctx0, ndims, ne, -1.0f, 1.0f); + ggml_set_param(ctx0, x[i]); + } + + struct ggml_tensor * f = ggml_sum(ctx0, ggml_mean(ctx0, x[0])); + + check_gradient("mean", ctx0, x, f, ndims, nargs, 1e-3f, 1e-3f, 1e-3f); + } + } + + // argmax + if (0) + { + const int nargs = 1; + + for (int ndims = 1; ndims <= 4; ++ndims) { + for (int i = 0; i < nargs; ++i) { + x[i] = get_random_tensor_f32(ctx0, ndims, ne, -1.0f, 1.0f); + ggml_set_param(ctx0, x[i]); + } + + struct ggml_tensor * f = ggml_sum(ctx0, ggml_argmax(ctx0, x[0])); + + check_gradient("argmax", ctx0, x, f, ndims, nargs, 1e-3f, 1e-3f, 1e-3f); + } + } + + // repeat + { + int64_t ne2[4]; + get_random_dims(ne2, 4); + + ne2[0] = ne[0] * ne2[0]; + ne2[1] = ne[1] * ne2[1]; + ne2[2] = 1; + ne2[3] = 1; + + const int nargs = 1; + for (int ndims = 1; ndims <= 2; ++ndims) { + x[0] = get_random_tensor_f32(ctx0, ndims, ne, -1.0f, 1.0f); + x[1] = get_random_tensor_f32(ctx0, ndims, ne2, -1.0f, 1.0f); + ggml_set_param(ctx0, x[0]); + + struct ggml_tensor * f = ggml_sum(ctx0, ggml_sqr(ctx0, ggml_sub(ctx0, x[1], ggml_repeat(ctx0, x[0], x[1])))); + + check_gradient("repeat", ctx0, x, f, ndims, nargs, 1e-3f, 1e-2f, INFINITY); + } + } + + // repeat back + { + int64_t ne2[4]; + get_random_dims(ne2, 4); + + ne2[0] = ne[0] * ne2[0]; + ne2[1] = ne[1] * ne2[1]; + ne2[2] = 1; + ne2[3] = 1; + + const int nargs = 1; + for (int ndims = 1; ndims <= 2; ++ndims) { + x[0] = get_random_tensor_f32(ctx0, ndims, ne, -1.0f, 1.0f); + x[1] = get_random_tensor_f32(ctx0, ndims, ne2, -1.0f, 1.0f); + ggml_set_param(ctx0, x[0]); + + struct ggml_tensor * f = ggml_sum(ctx0, ggml_sqr(ctx0, ggml_sub(ctx0, x[0], ggml_repeat_back(ctx0, x[1], x[0])))); + + check_gradient("repeat back", ctx0, x, f, ndims, nargs, 1e-3f, 1e-2f, INFINITY); + } + } + + // abs (finite differences do not work) + //{ + // const int nargs = 1; + + // for (int ndims = 1; ndims <= 2; ++ndims) { + // for (int i = 0; i < nargs; ++i) { + // x[i] = get_random_tensor_f32(ctx0, ndims, ne, -1.0f, 1.0f); + // ggml_set_param(ctx0, x[i]); + // } + + // struct ggml_tensor * f = ggml_sum(ctx0, ggml_abs(ctx0, x[0])); + + // check_gradient("abs", ctx0, x, f, ndims, nargs, 1e-3f, INFINITY, 1e-3f); + // } + //} + + // sgn + { + const int nargs = 1; + + for (int ndims = 1; ndims <= 4; ++ndims) { + for (int i = 0; i < nargs; ++i) { + x[i] = get_random_tensor_f32(ctx0, ndims, ne, -1.0f, 1.0f); + ggml_set_param(ctx0, x[i]); + } + + struct ggml_tensor* f = ggml_sum(ctx0, ggml_sgn(ctx0, x[0])); + + check_gradient("sgn", ctx0, x, f, ndims, nargs, 1e-3f, 1e-3f, 1e-3f); + } + } + + // neg + { + const int nargs = 1; + + for (int ndims = 1; ndims <= 4; ++ndims) { + for (int i = 0; i < nargs; ++i) { + x[i] = get_random_tensor_f32(ctx0, ndims, ne, -1.0f, 1.0f); + ggml_set_param(ctx0, x[i]); + } + + struct ggml_tensor* f = ggml_sum(ctx0, ggml_neg(ctx0, x[0])); + + check_gradient("neg", ctx0, x, f, ndims, nargs, 1e-3f, 1e-3f, 1e-3f); + } + } + + // step + { + const int nargs = 1; + + for (int ndims = 1; ndims <= 4; ++ndims) { + for (int i = 0; i < nargs; ++i) { + x[i] = get_random_tensor_f32(ctx0, ndims, ne, -1.0f, 1.0f); + ggml_set_param(ctx0, x[i]); + } + + struct ggml_tensor* f = ggml_sum(ctx0, ggml_step(ctx0, x[0])); + + check_gradient("step", ctx0, x, f, ndims, nargs, 1e-3f, 1e-3f, 1e-3f); + } + } + + // tanh, not yet fully implemented + if(0) + { + const int nargs = 1; + + for (int ndims = 1; ndims <= 4; ++ndims) { + for (int i = 0; i < nargs; ++i) { + x[i] = get_random_tensor_f32(ctx0, ndims, ne, -1.0f, 1.0f); + ggml_set_param(ctx0, x[i]); + } + + struct ggml_tensor* f = ggml_sum(ctx0, ggml_tanh(ctx0, x[0])); + + check_gradient("tanh", ctx0, x, f, ndims, nargs, 1e-3f, 1e-3f, 1e-3f); + } + } + + // mul_mat + { + const int nargs = 2; + + for (int ndims = 2; ndims <= 2; ++ndims) { + x[0] = get_random_tensor_f32(ctx0, ndims, ne, -1.0f, 1.0f); + { + int64_t ne2[4]; + get_random_dims(ne2, 4); + ne2[0] = ne[0]; + x[1] = get_random_tensor_f32(ctx0, ndims, ne2, -1.0f, 1.0f); + } + + ggml_set_param(ctx0, x[0]); + ggml_set_param(ctx0, x[1]); + + struct ggml_tensor * m = ggml_mul_mat(ctx0, x[1], x[0]); + struct ggml_tensor * f = ggml_sum(ctx0, m); + + GGML_PRINT_DEBUG("testing: mul_mat, [%lld, %lld] (%d) * [%lld, %lld] (%d)\n", x[1]->ne[0], x[1]->ne[1], x[1]->n_dims, x[0]->ne[0], x[0]->ne[1], x[0]->n_dims); + + check_gradient("mul_mat", ctx0, x, f, ndims, nargs, 1e-3f, 1e-3f, INFINITY); + check_mat_mul(m, x[1], x[0]); + } + } + + // elu, not yet fully implemented + if(0) + { + const int nargs = 1; + + for (int ndims = 1; ndims <= 4; ++ndims) { + for (int i = 0; i < nargs; ++i) { + x[i] = get_random_tensor_f32(ctx0, ndims, ne, -1.0f, 1.0f); + ggml_set_param(ctx0, x[i]); + } + + struct ggml_tensor* f = ggml_sum(ctx0, ggml_elu(ctx0, x[0])); + + check_gradient("elu", ctx0, x, f, ndims, nargs, 1e-3f, 1e-3f, 1e-3f); + } + } + + // relu + { + const int nargs = 1; + + for (int ndims = 1; ndims <= 4; ++ndims) { + for (int i = 0; i < nargs; ++i) { + x[i] = get_random_tensor_f32(ctx0, ndims, ne, -1.0f, 1.0f); + ggml_set_param(ctx0, x[i]); + } + + struct ggml_tensor* f = ggml_sum(ctx0, ggml_relu(ctx0, x[0])); + + check_gradient("relu", ctx0, x, f, ndims, nargs, 1e-3f, 1e-3f, INFINITY); + } + } + + // gelu, not yet fully implemented + if(0) + { + const int nargs = 1; + + for (int ndims = 1; ndims <= 4; ++ndims) { + for (int i = 0; i < nargs; ++i) { + x[i] = get_random_tensor_f32(ctx0, ndims, ne, -1.0f, 1.0f); + ggml_set_param(ctx0, x[i]); + } + + struct ggml_tensor* f = ggml_sum(ctx0, ggml_gelu(ctx0, x[0])); + + check_gradient("gelu", ctx0, x, f, ndims, nargs, 1e-3f, 1e-3f, 1e-3f); + } + } + + // silu + { + const int nargs = 1; + + for (int ndims = 1; ndims <= 2; ++ndims) { + for (int i = 0; i < nargs; ++i) { + x[i] = get_random_tensor_f32(ctx0, ndims, ne, -1.0f, 1.0f); + ggml_set_param(ctx0, x[i]); + } + + struct ggml_tensor * f = ggml_sum(ctx0, ggml_silu(ctx0, x[0])); + +#ifdef GGML_SILU_FP16 + // due to GGML_SILU_FP16 the finite difference method will be slightly wrong -> increase error bounds. + check_gradient("silu", ctx0, x, f, ndims, nargs, 1e-3f, 0.5, INFINITY); +#else + check_gradient("silu", ctx0, x, f, ndims, nargs, 1e-3f, 1e-3f, INFINITY); +#endif + } + } + + // rms_norm + { + const int nargs = 1; + + for (int ndims = 1; ndims <= 2; ++ndims) { + for (int i = 0; i < nargs; ++i) { + x[i] = get_random_tensor_f32(ctx0, ndims, ne, -1.0f, 1.0f); + ggml_set_param(ctx0, x[i]); + } + + struct ggml_tensor * f = ggml_sum(ctx0, ggml_rms_norm(ctx0, x[0], 1e-6f)); + + check_gradient("rms_norm", ctx0, x, f, ndims, nargs, 1e-4f, 1.0f, INFINITY); + } + } + + // scale + { + const int nargs = 2; + + int64_t ne2[4]; + ne2[0] = 1; + + for (int ndims = 1; ndims <= 2; ++ndims) { + x[1] = get_random_tensor_f32(ctx0, 1, ne2, -1.0f, 1.0f); + x[0] = get_random_tensor_f32(ctx0, ndims, ne, -1.0f, 1.0f); + + ggml_set_param(ctx0, x[0]); + ggml_set_param(ctx0, x[1]); + + struct ggml_tensor * f = ggml_sum(ctx0, ggml_scale(ctx0, x[0], x[1])); + + check_gradient("scale", ctx0, x, f, ndims, nargs, 1e-3f, 1e-3f, INFINITY); + } + } + + // cpy f32 + { + const int nargs = 2; + + for (int ndims = 1; ndims <= 2; ++ndims) { + for (int i = 0; i < nargs; ++i) { + x[i] = get_random_tensor_f32(ctx0, ndims, ne, -1.0f, 1.0f); + ggml_set_param(ctx0, x[i]); + } + // x[1] is overwritten by x[0], so the gradients don't propagate to x[1] + + struct ggml_tensor * f = ggml_sum(ctx0, ggml_cpy(ctx0, x[0], x[1])); + + check_gradient("cpy f32", ctx0, x, f, ndims, nargs, 1e-3f, 1e-3f, INFINITY); + } + } + + // cpy f16 + { + const int nargs = 2; + + for (int ndims = 1; ndims <= 2; ++ndims) { + for (int i = 0; i < nargs; ++i) { + x[i] = get_random_tensor_f16(ctx0, ndims, ne, -1.0f, 1.0f); + ggml_set_param(ctx0, x[i]); + } + // x[1] is overwritten by x[0], so the gradients don't propagate to x[1] + + struct ggml_tensor * f = ggml_sum(ctx0, ggml_cpy(ctx0, x[0], x[1])); + + check_gradient("cpy f16", ctx0, x, f, ndims, nargs, 1e-1f, 1e-1f, INFINITY); + } + } + + // reshape (1d->nd) + { + const int nargs = 1; + + for (int ndims = 1; ndims <= 2; ++ndims) { + int64_t ne2[4]; + ne2[0] = 1; + ne2[1] = 1; + ne2[2] = 1; + ne2[3] = 1; + for (int i = 0; i < ndims; ++i) { + ne2[0] *= ne[i]; + } + x[0] = get_random_tensor_f32(ctx0, 1, ne2, -1.0f, 1.0f); + x[1] = get_random_tensor_f32(ctx0, ndims, ne, -1.0f, 1.0f); + ggml_set_param(ctx0, x[0]); + + + struct ggml_tensor * f = ggml_sum(ctx0, ggml_reshape(ctx0, x[0], x[1])); + check_gradient("reshape", ctx0, x, f, ndims, nargs, 1e-3f, 1e-3f, INFINITY); + } + } + + // reshape (nd->1d) + { + const int nargs = 1; + + for (int ndims = 1; ndims <= 2; ++ndims) { + int64_t ne2[4]; + ne2[0] = 1; + ne2[1] = 1; + ne2[2] = 1; + ne2[3] = 1; + for (int i = 0; i < ndims; ++i) { + ne2[0] *= ne[i]; + } + x[0] = get_random_tensor_f32(ctx0, ndims, ne, -1.0f, 1.0f); + x[1] = get_random_tensor_f32(ctx0, 1, ne2, -1.0f, 1.0f); + ggml_set_param(ctx0, x[0]); + + + struct ggml_tensor * f = ggml_sum(ctx0, ggml_reshape(ctx0, x[0], x[1])); + check_gradient("reshape", ctx0, x, f, ndims, nargs, 1e-3f, 1e-3f, INFINITY); + } + } + + // acc 1d + { + int64_t ne2[4] = { 1, 1, 1, 1 }; + + const int nargs = 2; + for (int ndims = 1; ndims <= 4; ++ndims) { + + x[0] = get_random_tensor_f32(ctx0, ndims, ne, -1.0f, 1.0f); + ggml_set_param(ctx0, x[0]); + + get_random_dims(ne2, 1); + while ((ne2[0] > ne[0]) || (ne2[0] > ggml_nelements(x[0]))) { + get_random_dims(ne2, 1); + } + + x[1] = get_random_tensor_f32(ctx0, 1, ne2, -1.0f, 1.0f); + ggml_set_param(ctx0, x[1]); + + const int max_offset = MAX(0, ggml_nelements(x[0]) - ggml_nelements(x[1])); + const int offset = irand(max_offset) * ggml_element_size(x[0]); + + struct ggml_tensor * f = ggml_sum(ctx0, ggml_acc(ctx0, x[0], x[1], x[0]->nb[1], x[0]->nb[2], x[0]->nb[3], offset)); + + check_gradient("acc 1d", ctx0, x, f, ndims, nargs, 1e-3f, 1e-3f, INFINITY); + } + } + + // acc 2d + { + int64_t ne2[4] = { 1, 1, 1, 1 }; + int64_t max_offsets[4] = { 0, 0, 0, 0 }; + int64_t offsets[4] = { 0, 0, 0, 0 }; + + const int nargs = 2; + for (int ndims = 2; ndims <= 4; ++ndims) { + + x[0] = get_random_tensor_f32(ctx0, ndims, ne, -1.0f, 1.0f); + ggml_set_param(ctx0, x[0]); + + get_random_dims(ne2, 2); + while ((ne2[0] > ne[0]) || (ne2[1] > ne[1]) || (ne2[0]*ne2[1] > ggml_nelements(x[0]))) { + get_random_dims(ne2, 2); + } + + x[1] = get_random_tensor_f32(ctx0, 2, ne2, -1.0f, 1.0f); + ggml_set_param(ctx0, x[1]); + + max_offsets[0] = MAX(0, x[0]->ne[0] - x[1]->ne[0]); + max_offsets[1] = MAX(0, x[0]->ne[1] - x[1]->ne[1]); + offsets[0] = irand(max_offsets[0]) * x[0]->nb[0]; + offsets[1] = irand(max_offsets[1]) * x[0]->nb[1]; + const int offset = offsets[0] + offsets[1]; + + struct ggml_tensor * f = ggml_sum(ctx0, ggml_acc(ctx0, x[0], x[1], x[0]->nb[1], x[0]->nb[2], x[0]->nb[3], offset)); + + check_gradient("acc 2d", ctx0, x, f, ndims, nargs, 1e-3f, 1e-3f, INFINITY); + } + } + + // acc 3d + { + int64_t ne2[4] = { 1, 1, 1, 1 }; + int64_t max_offsets[4] = { 0, 0, 0, 0 }; + int64_t offsets[4] = { 0, 0, 0, 0 }; + + const int nargs = 2; + for (int ndims = 3; ndims <= 4; ++ndims) { + + x[0] = get_random_tensor_f32(ctx0, ndims, ne, -1.0f, 1.0f); + ggml_set_param(ctx0, x[0]); + + get_random_dims(ne2, 3); + while ((ne2[0] > ne[0]) || (ne2[1] > ne[1]) || (ne2[2] > ne[2]) || (ne2[0]*ne2[1]*ne2[2] > ggml_nelements(x[0]))) { + get_random_dims(ne2, 3); + } + + x[1] = get_random_tensor_f32(ctx0, 3, ne2, -1.0f, 1.0f); + ggml_set_param(ctx0, x[1]); + + max_offsets[0] = MAX(0, x[0]->ne[0] - x[1]->ne[0]); + max_offsets[1] = MAX(0, x[0]->ne[1] - x[1]->ne[1]); + max_offsets[2] = MAX(0, x[0]->ne[2] - x[1]->ne[2]); + offsets[0] = irand(max_offsets[0]) * x[0]->nb[0]; + offsets[1] = irand(max_offsets[1]) * x[0]->nb[1]; + offsets[2] = irand(max_offsets[2]) * x[0]->nb[2]; + const int offset = offsets[0] + offsets[1] + offsets[2]; + + struct ggml_tensor * f = ggml_sum(ctx0, ggml_acc(ctx0, x[0], x[1], x[0]->nb[1], x[0]->nb[2], x[0]->nb[3], offset)); + + check_gradient("acc 3d", ctx0, x, f, ndims, nargs, 1e-3f, 1e-3f, INFINITY); + } + } + + // acc 4d + { + int64_t ne2[4] = { 1, 1, 1, 1 }; + int64_t max_offsets[4] = { 0, 0, 0, 0 }; + int64_t offsets[4] = { 0, 0, 0, 0 }; + + const int nargs = 2; + for (int ndims = 4; ndims <= 4; ++ndims) { + + x[0] = get_random_tensor_f32(ctx0, ndims, ne, -1.0f, 1.0f); + ggml_set_param(ctx0, x[0]); + + get_random_dims(ne2, 4); + while ((ne2[0] > ne[0]) || (ne2[1] > ne[1]) || (ne2[2] > ne[2]) || (ne2[3] > ne[3]) || (ne2[0]*ne2[1]*ne2[2]*ne2[3] > ggml_nelements(x[0]))) { + get_random_dims(ne2, 4); + } + + x[1] = get_random_tensor_f32(ctx0, 4, ne2, -1.0f, 1.0f); + ggml_set_param(ctx0, x[1]); + + max_offsets[0] = MAX(0, x[0]->ne[0] - x[1]->ne[0]); + max_offsets[1] = MAX(0, x[0]->ne[1] - x[1]->ne[1]); + max_offsets[2] = MAX(0, x[0]->ne[2] - x[1]->ne[2]); + max_offsets[3] = MAX(0, x[0]->ne[3] - x[1]->ne[3]); + offsets[0] = irand(max_offsets[0]) * x[0]->nb[0]; + offsets[1] = irand(max_offsets[1]) * x[0]->nb[1]; + offsets[2] = irand(max_offsets[2]) * x[0]->nb[2]; + offsets[3] = irand(max_offsets[3]) * x[0]->nb[3]; + const int offset = offsets[0] + offsets[1] + offsets[2] + offsets[3]; + + struct ggml_tensor * f = ggml_sum(ctx0, ggml_acc(ctx0, x[0], x[1], x[0]->nb[1], x[0]->nb[2], x[0]->nb[3], offset)); + + check_gradient("acc 4d", ctx0, x, f, ndims, nargs, 1e-3f, 1e-3f, INFINITY); + } + } + + // set_1d + { + int64_t ne2[4]; + + const int nargs = 2; + for (int ndims = 1; ndims <= 4; ++ndims) { + + x[0] = get_random_tensor_f32(ctx0, ndims, ne, -1.0f, 1.0f); + ggml_set_param(ctx0, x[0]); + + get_random_dims(ne2, 1); + while ((ne2[0] > ne[0]) || (ne2[0] > ggml_nelements(x[0]))) { + get_random_dims(ne2, 1); + } + + x[1] = get_random_tensor_f32(ctx0, 1, ne2, -1.0f, 1.0f); + ggml_set_param(ctx0, x[1]); + + const int max_offset = MAX(0, ggml_nelements(x[0]) - ggml_nelements(x[1])); + const int offset = irand(max_offset) * ggml_element_size(x[0]); + + struct ggml_tensor * f = ggml_sum(ctx0, ggml_set_1d(ctx0, x[0], x[1], offset)); + + check_gradient("set_1d", ctx0, x, f, ndims, nargs, 1e-3f, 1e-3f, INFINITY); + } + } + + // set_2d + { + int64_t ne2[4]; + int64_t max_offsets[4] = { 0, 0, 0, 0 }; + int64_t offsets[4] = { 0, 0, 0, 0 }; + + const int nargs = 1; + for (int ndims = 2; ndims <= 4; ++ndims) { + + x[0] = get_random_tensor_f32(ctx0, ndims, ne, -1.0f, 1.0f); + ggml_set_param(ctx0, x[0]); + + get_random_dims(ne2, 2); + while ((ne2[0] > ne[0]) || (ne2[1] > ne[1]) || (ne2[0]*ne2[1] > ggml_nelements(x[0]))) { + get_random_dims(ne2, 2); + } + + x[1] = get_random_tensor_f32(ctx0, 2, ne2, -1.0f, 1.0f); + ggml_set_param(ctx0, x[1]); + + max_offsets[0] = MAX(0, x[0]->ne[0] - x[1]->ne[0]); + max_offsets[1] = MAX(0, x[0]->ne[1] - x[1]->ne[1]); + offsets[0] = irand(max_offsets[0]) * x[0]->nb[0]; + offsets[1] = irand(max_offsets[1]) * x[0]->nb[1]; + const int offset = offsets[0] + offsets[1]; + + struct ggml_tensor * f = ggml_sum(ctx0, ggml_set_2d(ctx0, x[0], x[1], x[1]->nb[1], offset)); + + check_gradient("set_2d", ctx0, x, f, ndims, nargs, 1e-3f, 1e-3f, INFINITY); + } + } + + // view_1d + { + const int nargs = 1; + for (int ndims = 1; ndims <= 4; ++ndims) { + + x[0] = get_random_tensor_f32(ctx0, ndims, ne, -1.0f, 1.0f); + + ggml_set_param(ctx0, x[0]); + + const int k0 = irand(ggml_nelements(x[0])); + const int k1 = irand(ggml_nelements(x[0])); + const int i0 = MIN(k0, k1); + const int i1 = MAX(k0, k1); + + const int offset = i0 * sizeof(float); + const int nelem = i1 - i0; + + struct ggml_tensor * f = ggml_sum(ctx0, ggml_view_1d(ctx0, x[0], nelem, offset)); + + check_gradient("view_1d", ctx0, x, f, ndims, nargs, 1e-3f, 1e-3f, INFINITY); + } + } + + // view_2d + { + int64_t ne2[4]; + int64_t nb2[4]; + + const int nargs = 1; + for (int ndims = 1; ndims <= 4; ++ndims) { + + x[0] = get_random_tensor_f32(ctx0, ndims, ne, -1.0f, 1.0f); + + get_random_dims(ne2, 2); + while (ne2[0]*ne2[1] > ggml_nelements(x[0])) { + get_random_dims(ne2, 2); + } + const int count = ne2[0]*ne2[1]; + + nb2[0] = sizeof(float); + nb2[1] = nb2[0]*ne2[0]; + + ggml_set_param(ctx0, x[0]); + + const int max_offset = ggml_nelements(x[0]) - count; + const int offset = irand(max_offset+1) * sizeof(float); + + struct ggml_tensor * f = ggml_sum(ctx0, ggml_view_2d(ctx0, x[0], ne2[0], ne2[1], nb2[1], offset)); + + check_gradient("view_2d", ctx0, x, f, ndims, nargs, 1e-3f, 1e-3f, INFINITY); + } + } + + // view_3d + { + int64_t ne2[4] = {1,1,1,1}; + int64_t nb2[4] = {0,0,0,0}; + + const int nargs = 1; + for (int ndims = 1; ndims <= 4; ++ndims) { + + x[0] = get_random_tensor_f32(ctx0, ndims, ne, -1.0f, 1.0f); + + get_random_dims(ne2, 3); + while (ne2[0]*ne2[1]*ne2[2] > ggml_nelements(x[0])) { + get_random_dims(ne2, 3); + } + const int count = ne2[0]*ne2[1]*ne2[2]; + + nb2[0] = sizeof(float); + nb2[1] = nb2[0]*ne2[0]; + nb2[2] = nb2[1]*ne2[1]; + + ggml_set_param(ctx0, x[0]); + + const int max_offset = ggml_nelements(x[0]) - count; + const int offset = irand(max_offset+1) * sizeof(float); + + struct ggml_tensor * f = ggml_sum(ctx0, ggml_view_3d(ctx0, x[0], ne2[0], ne2[1], ne2[2], nb2[1], nb2[2], offset)); + + check_gradient("view_3d", ctx0, x, f, ndims, nargs, 1e-3f, 1e-3f, INFINITY); + } + } + + // permute + { + int64_t ne2[4]; + + const int nargs = 1; + for (int ndims = 1; ndims <= 4; ++ndims) + { + // ggml_permute will set axes of dimensions below n_dims to 1. + // to make ggml_permute work correctly on all axes, + // the input tensor needs maximal n_dim of 4. + for (int i=0; i -#include -#include -#include - -#define MAX_NARGS 2 - -#if defined(__GNUC__) -#pragma GCC diagnostic ignored "-Wdouble-promotion" -#endif - -// -// logging -// -#define GGML_DEBUG 0 -#if (GGML_DEBUG >= 1) -#define GGML_PRINT_DEBUG(...) printf(__VA_ARGS__) -#else -#define GGML_PRINT_DEBUG(...) -#endif - -#if (GGML_DEBUG >= 5) -#define GGML_PRINT_DEBUG_5(...) printf(__VA_ARGS__) -#else -#define GGML_PRINT_DEBUG_5(...) -#endif - -#if (GGML_DEBUG >= 10) -#define GGML_PRINT_DEBUG_10(...) printf(__VA_ARGS__) -#else -#define GGML_PRINT_DEBUG_10(...) -#endif - -#define GGML_PRINT(...) printf(__VA_ARGS__) - - -float frand(void) { - return (float)rand()/(float)RAND_MAX; -} - -int irand(int n) { - return rand()%n; -} - -void get_random_dims(int64_t * dims, int ndims) { - dims[0] = dims[1] = dims[2] = dims[3] = 1; - - for (int i = 0; i < ndims; i++) { - dims[i] = 1 + irand(4); - } -} - -void get_random_dims_minmax(int64_t * dims, int ndims, int min, int max) { - dims[0] = dims[1] = dims[2] = dims[3] = 1; - - for (int i = 0; i < ndims; i++) { - dims[i] = min + irand(max-min); - } -} - - -struct ggml_tensor * get_random_tensor( - struct ggml_context * ctx0, - int ndims, - int64_t ne[], - float fmin, - float fmax) { - struct ggml_tensor * result = ggml_new_tensor(ctx0, GGML_TYPE_F32, ndims, ne); - - switch (ndims) { - case 1: - for (int i0 = 0; i0 < ne[0]; i0++) { - ((float *)result->data)[i0] = frand()*(fmax - fmin) + fmin; - } - break; - case 2: - for (int i1 = 0; i1 < ne[1]; i1++) { - for (int i0 = 0; i0 < ne[0]; i0++) { - ((float *)result->data)[i1*ne[0] + i0] = frand()*(fmax - fmin) + fmin; - } - } - break; - case 3: - for (int i2 = 0; i2 < ne[2]; i2++) { - for (int i1 = 0; i1 < ne[1]; i1++) { - for (int i0 = 0; i0 < ne[0]; i0++) { - ((float *)result->data)[i2*ne[1]*ne[0] + i1*ne[0] + i0] = frand()*(fmax - fmin) + fmin; - } - } - } - break; - case 4: - for (int i3 = 0; i3 < ne[3]; i3++) { - for (int i2 = 0; i2 < ne[2]; i2++) { - for (int i1 = 0; i1 < ne[1]; i1++) { - for (int i0 = 0; i0 < ne[0]; i0++) { - ((float *)result->data)[i3*ne[2]*ne[1]*ne[0] + i2*ne[1]*ne[0] + i1*ne[0] + i0] = frand()*(fmax - fmin) + fmin; - } - } - } - } - break; - default: - assert(false); - }; - - return result; -} - -float get_element(const struct ggml_tensor * t, int idx) { - return ((float *)t->data)[idx]; -} - -void set_element(struct ggml_tensor * t, int idx, float value) { - ((float *)t->data)[idx] = value; -} - -int main(void) { - struct ggml_init_params params = { - .mem_size = 1024*1024*1024, - .mem_buffer = NULL, - .no_alloc = false, - }; - struct ggml_context * ctx = ggml_init(params); - - int64_t ne1[4] = {4, 128, 1, 1}; - int64_t ne2[4] = {4, 256, 1, 1};; - int64_t ne3[4] = {128, 256, 1, 1}; - - struct ggml_tensor * a = get_random_tensor(ctx, 2, ne1, -1, +1); - struct ggml_tensor * b = get_random_tensor(ctx, 2, ne2, -1, +1); - ggml_set_param(ctx, a); - ggml_set_param(ctx, b); - - struct ggml_tensor * c = get_random_tensor(ctx, 2, ne3, -1, +1); - - struct ggml_tensor * ab = ggml_mul_mat(ctx, a, b); - struct ggml_tensor * d = ggml_sub(ctx, c, ab); - struct ggml_tensor * e = ggml_sum(ctx, ggml_sqr(ctx, d)); - - struct ggml_cgraph ge = ggml_build_forward(e); - ggml_graph_reset(&ge); - - ggml_graph_compute_with_ctx(ctx, &ge, /*n_threads*/ 1); - - const float fe = ggml_get_f32_1d(e, 0); - printf("%s: e = %.4f\n", __func__, fe); - - struct ggml_opt_params opt_params = ggml_opt_default_params(GGML_OPT_ADAM); - - ggml_opt(ctx, opt_params, e); - - ggml_graph_reset(&ge); - - ggml_graph_compute_with_ctx(ctx, &ge, /*n_threads*/ 1); - - const float fe_opt = ggml_get_f32_1d(e, 0); - printf("%s: original e = %.4f\n", __func__, fe); - printf("%s: optimized e = %.4f\n", __func__, fe_opt); - - const bool success = (fe_opt <= fe); - assert(success); - - ggml_free(ctx); - return success ? 0 : -1; -} -// int64_t ne1[4] = {4, 128, 1, 1}; -// int64_t ne2[4] = {4, 256, 1, 1};; -// int64_t ne3[4] = {128, 256, 1, 1}; -// main: original e = 25890.9375 -// main: optimized e = 10094.7031 - -// int64_t ne1[4] = {8, 128, 1, 1}; -// int64_t ne2[4] = {8, 256, 1, 1};; -// int64_t ne3[4] = {128, 256, 1, 1}; -// main: original e = 39429.5078 -// main: optimized e = 9275.8936 - -// int64_t ne1[4] = {16, 128, 1, 1}; -// int64_t ne2[4] = {16, 256, 1, 1};; -// int64_t ne3[4] = {128, 256, 1, 1}; -// main: original e = 68371.1328 -// main: optimized e = 7854.4502 - - -// int64_t ne1[4] = {32, 128, 1, 1}; -// int64_t ne2[4] = {32, 256, 1, 1};; -// int64_t ne3[4] = {128, 256, 1, 1}; -// main: original e = 126061.1953 -// main: optimized e = 5451.0166 - -// int64_t ne1[4] = {4, 1024, 1, 1}; -// int64_t ne2[4] = {4, 2048, 1, 1};; -// int64_t ne3[4] = {1024, 2048, 1, 1}; -// main: original e = 1620817.8750 -// main: optimized e = 698387.6875 - -// another run on M1 -// int64_t ne1[4] = {4, 1024, 1, 1}; -// int64_t ne2[4] = {4, 2048, 1, 1};; -// int64_t ne3[4] = {1024, 2048, 1, 1}; -// main: original e = 1629595.6250 -// main: optimized e = 698169.1250 - -// int64_t ne1[4] = {32, 1024, 1, 1}; -// int64_t ne2[4] = {32, 2048, 1, 1};; -// int64_t ne3[4] = {1024, 2048, 1, 1}; -// main: original e = 8146770.5000 -// main: optimized e = 651119.1250 diff --git a/tests/test-opt.cpp b/tests/test-opt.cpp new file mode 100644 index 0000000..8ab2402 --- /dev/null +++ b/tests/test-opt.cpp @@ -0,0 +1,212 @@ +#include "ggml.h" + +#include +#include +#include +#include + +#define MAX_NARGS 2 + +#if defined(__GNUC__) +#pragma GCC diagnostic ignored "-Wdouble-promotion" +#endif + +// +// logging +// +#define GGML_DEBUG 0 +#if (GGML_DEBUG >= 1) +#define GGML_PRINT_DEBUG(...) printf(__VA_ARGS__) +#else +#define GGML_PRINT_DEBUG(...) +#endif + +#if (GGML_DEBUG >= 5) +#define GGML_PRINT_DEBUG_5(...) printf(__VA_ARGS__) +#else +#define GGML_PRINT_DEBUG_5(...) +#endif + +#if (GGML_DEBUG >= 10) +#define GGML_PRINT_DEBUG_10(...) printf(__VA_ARGS__) +#else +#define GGML_PRINT_DEBUG_10(...) +#endif + +#define GGML_PRINT(...) printf(__VA_ARGS__) + + +float frand(void) { + return (float)rand()/(float)RAND_MAX; +} + +int irand(int n) { + return rand()%n; +} + +void get_random_dims(int64_t * dims, int ndims) { + dims[0] = dims[1] = dims[2] = dims[3] = 1; + + for (int i = 0; i < ndims; i++) { + dims[i] = 1 + irand(4); + } +} + +void get_random_dims_minmax(int64_t * dims, int ndims, int min, int max) { + dims[0] = dims[1] = dims[2] = dims[3] = 1; + + for (int i = 0; i < ndims; i++) { + dims[i] = min + irand(max-min); + } +} + + +struct ggml_tensor * get_random_tensor( + struct ggml_context * ctx0, + int ndims, + int64_t ne[], + float fmin, + float fmax) { + struct ggml_tensor * result = ggml_new_tensor(ctx0, GGML_TYPE_F32, ndims, ne); + + switch (ndims) { + case 1: + for (int i0 = 0; i0 < ne[0]; i0++) { + ((float *)result->data)[i0] = frand()*(fmax - fmin) + fmin; + } + break; + case 2: + for (int i1 = 0; i1 < ne[1]; i1++) { + for (int i0 = 0; i0 < ne[0]; i0++) { + ((float *)result->data)[i1*ne[0] + i0] = frand()*(fmax - fmin) + fmin; + } + } + break; + case 3: + for (int i2 = 0; i2 < ne[2]; i2++) { + for (int i1 = 0; i1 < ne[1]; i1++) { + for (int i0 = 0; i0 < ne[0]; i0++) { + ((float *)result->data)[i2*ne[1]*ne[0] + i1*ne[0] + i0] = frand()*(fmax - fmin) + fmin; + } + } + } + break; + case 4: + for (int i3 = 0; i3 < ne[3]; i3++) { + for (int i2 = 0; i2 < ne[2]; i2++) { + for (int i1 = 0; i1 < ne[1]; i1++) { + for (int i0 = 0; i0 < ne[0]; i0++) { + ((float *)result->data)[i3*ne[2]*ne[1]*ne[0] + i2*ne[1]*ne[0] + i1*ne[0] + i0] = frand()*(fmax - fmin) + fmin; + } + } + } + } + break; + default: + assert(false); + }; + + return result; +} + +float get_element(const struct ggml_tensor * t, int idx) { + return ((float *)t->data)[idx]; +} + +void set_element(struct ggml_tensor * t, int idx, float value) { + ((float *)t->data)[idx] = value; +} + +int main(void) { + struct ggml_init_params params = { + /* .mem_size = */ 1024*1024*1024, + /* .mem_buffer = */ NULL, + /* .no_alloc = */ false, + }; + + struct ggml_context * ctx = ggml_init(params); + + int64_t ne1[4] = {4, 128, 1, 1}; + int64_t ne2[4] = {4, 256, 1, 1};; + int64_t ne3[4] = {128, 256, 1, 1}; + + struct ggml_tensor * a = get_random_tensor(ctx, 2, ne1, -1, +1); + struct ggml_tensor * b = get_random_tensor(ctx, 2, ne2, -1, +1); + ggml_set_param(ctx, a); + ggml_set_param(ctx, b); + + struct ggml_tensor * c = get_random_tensor(ctx, 2, ne3, -1, +1); + + struct ggml_tensor * ab = ggml_mul_mat(ctx, a, b); + struct ggml_tensor * d = ggml_sub(ctx, c, ab); + struct ggml_tensor * e = ggml_sum(ctx, ggml_sqr(ctx, d)); + + struct ggml_cgraph ge = ggml_build_forward(e); + ggml_graph_reset(&ge); + + ggml_graph_compute_with_ctx(ctx, &ge, /*n_threads*/ 1); + + const float fe = ggml_get_f32_1d(e, 0); + printf("%s: e = %.4f\n", __func__, fe); + + struct ggml_opt_params opt_params = ggml_opt_default_params(GGML_OPT_ADAM); + + ggml_opt(ctx, opt_params, e); + + ggml_graph_reset(&ge); + + ggml_graph_compute_with_ctx(ctx, &ge, /*n_threads*/ 1); + + const float fe_opt = ggml_get_f32_1d(e, 0); + printf("%s: original e = %.4f\n", __func__, fe); + printf("%s: optimized e = %.4f\n", __func__, fe_opt); + + const bool success = (fe_opt <= fe); + assert(success); + + ggml_free(ctx); + return success ? 0 : -1; +} +// int64_t ne1[4] = {4, 128, 1, 1}; +// int64_t ne2[4] = {4, 256, 1, 1};; +// int64_t ne3[4] = {128, 256, 1, 1}; +// main: original e = 25890.9375 +// main: optimized e = 10094.7031 + +// int64_t ne1[4] = {8, 128, 1, 1}; +// int64_t ne2[4] = {8, 256, 1, 1};; +// int64_t ne3[4] = {128, 256, 1, 1}; +// main: original e = 39429.5078 +// main: optimized e = 9275.8936 + +// int64_t ne1[4] = {16, 128, 1, 1}; +// int64_t ne2[4] = {16, 256, 1, 1};; +// int64_t ne3[4] = {128, 256, 1, 1}; +// main: original e = 68371.1328 +// main: optimized e = 7854.4502 + + +// int64_t ne1[4] = {32, 128, 1, 1}; +// int64_t ne2[4] = {32, 256, 1, 1};; +// int64_t ne3[4] = {128, 256, 1, 1}; +// main: original e = 126061.1953 +// main: optimized e = 5451.0166 + +// int64_t ne1[4] = {4, 1024, 1, 1}; +// int64_t ne2[4] = {4, 2048, 1, 1};; +// int64_t ne3[4] = {1024, 2048, 1, 1}; +// main: original e = 1620817.8750 +// main: optimized e = 698387.6875 + +// another run on M1 +// int64_t ne1[4] = {4, 1024, 1, 1}; +// int64_t ne2[4] = {4, 2048, 1, 1};; +// int64_t ne3[4] = {1024, 2048, 1, 1}; +// main: original e = 1629595.6250 +// main: optimized e = 698169.1250 + +// int64_t ne1[4] = {32, 1024, 1, 1}; +// int64_t ne2[4] = {32, 2048, 1, 1};; +// int64_t ne3[4] = {1024, 2048, 1, 1}; +// main: original e = 8146770.5000 +// main: optimized e = 651119.1250 -- cgit v1.2.3