aboutsummaryrefslogtreecommitdiff
path: root/tests/test-double-float.cpp
diff options
context:
space:
mode:
authoraditya <bluenerd@protonmail.com>2023-08-10 12:32:35 +0530
committeraditya <bluenerd@protonmail.com>2023-08-10 12:32:35 +0530
commita9ff78b3f48dc9f81943c41531c4959ce7e2ae9d (patch)
tree49ee8c3c9148038f04112802265d928ef1aba428 /tests/test-double-float.cpp
parent2516af4cd61f509c995b4f78fdf123cba33f3509 (diff)
parent916a9acdd0a411426690400ebe2bb7ce840a6bba (diff)
resolve merge conflict
Diffstat (limited to 'tests/test-double-float.cpp')
-rw-r--r--tests/test-double-float.cpp55
1 files changed, 55 insertions, 0 deletions
diff --git a/tests/test-double-float.cpp b/tests/test-double-float.cpp
new file mode 100644
index 0000000..b506f27
--- /dev/null
+++ b/tests/test-double-float.cpp
@@ -0,0 +1,55 @@
+// These tests may take a long time!
+// They are to prove that conversion from double to float of various functions in ggml.c doesn't affect the result.
+// This is done by checking all finite (non-NaN, non-infinite) floats.
+
+#undef NDEBUG
+#include <cassert>
+#include <immintrin.h>
+#include <cmath>
+#include <cstdint>
+#include <cstring>
+
+#pragma GCC diagnostic push
+#pragma GCC diagnostic ignored "-Wdouble-promotion"
+
+// ggml.c::quantize_row_q4_0_reference
+inline static uint8_t round_orig(float v0) { return ((int8_t) (round(v0))) + 8; }
+
+// ggml.c::ggml_silu_f32
+inline static float silu_orig(float x) {
+ return x/(1.0 + exp(-x));
+}
+
+#pragma GCC diagnostic pop
+
+// ggml.c::quantize_row_q4_0_reference
+inline static uint8_t round_float(float v0) { return (int8_t)roundf(v0) + 8; }
+
+// ggml.c::ggml_silu_f32
+inline static float silu_float(float x) {
+ return x/(1.0f + expf(-x));
+}
+
+int main(void) {
+ uint32_t x = UINT32_MAX;
+ do {
+ float f;
+ memcpy(&f, &x, sizeof(x));
+ assert(!std::isfinite(f) || (round_orig(f) == round_float(f)));
+ } while (x--);
+
+#ifdef __F16C__
+ // GELU and SILU implementations are used with a FP16 lookup table.
+ // The original and float-only results are not equal for all inputs after converting to FP16.
+ // GELU is an approximation anyway (tanh), not tested here.
+ // For SILU, verify that the results are at least the closest floating point numbers, if the FP16 values don't match.
+ for (x = 0; x <= UINT16_MAX; x++) {
+ float f = _cvtsh_ss(x);
+ const float so = silu_orig(f);
+ const float sf = silu_float(f);
+ assert( (_cvtss_sh(so, 0) == _cvtss_sh(sf, 0))
+ || (nextafterf(so, sf) == sf)
+ || (nextafterf(sf, so) == so));
+ }
+#endif
+}