diff options
author | Stephan Walter <stephan@walter.name> | 2023-03-28 16:48:20 +0000 |
---|---|---|
committer | GitHub <noreply@github.com> | 2023-03-28 19:48:20 +0300 |
commit | 436e56193199a1625f8c561069f702e8840a9e08 (patch) | |
tree | 9e7f39e1736ccff5728bb6194f160dfa94cf552d /tests | |
parent | 20e1e84884376b3fb44ffbfd48d478b2934b0b5e (diff) |
all : be more strict about converting float to double (#458)
* Be more strict about converting float to double
* Test equivalence of round, SILU implementations
Test module is commented out in CMakeLists.txt because the tests may
take a long time, depending on how much the compiler optimizes.
* Fix softmax in perplexity.cpp
* all : prefer float over double where appropriate
* perplexity : add <cmath>
---------
Co-authored-by: Georgi Gerganov <ggerganov@gmail.com>
Diffstat (limited to 'tests')
-rw-r--r-- | tests/CMakeLists.txt | 1 | ||||
-rw-r--r-- | tests/test-double-float.c | 53 |
2 files changed, 54 insertions, 0 deletions
diff --git a/tests/CMakeLists.txt b/tests/CMakeLists.txt index b44d7fe..157d733 100644 --- a/tests/CMakeLists.txt +++ b/tests/CMakeLists.txt @@ -5,5 +5,6 @@ function(llama_add_test source) add_test(NAME ${TEST_TARGET} COMMAND $<TARGET_FILE:${TEST_TARGET}> ${ARGN}) endfunction() +# llama_add_test(test-double-float.c) # SLOW llama_add_test(test-quantize.c) llama_add_test(test-tokenizer-0.cpp ${CMAKE_CURRENT_SOURCE_DIR}/../models/ggml-vocab.bin) diff --git a/tests/test-double-float.c b/tests/test-double-float.c new file mode 100644 index 0000000..89dafc9 --- /dev/null +++ b/tests/test-double-float.c @@ -0,0 +1,53 @@ +// These tests may take a long time! +// They are to prove that conversion from double to float of various functions in ggml.c doesn't affect the result. +// This is done by checking all finite (non-NaN, non-infinite) floats. + +#undef NDEBUG +#include <assert.h> +#include <immintrin.h> +#include <math.h> +#include <stdint.h> + +#pragma GCC diagnostic push +#pragma GCC diagnostic ignored "-Wdouble-promotion" + +// ggml.c::quantize_row_q4_0_reference +inline static uint8_t round_orig(float v0) { return ((int8_t) (round(v0))) + 8; } + +// ggml.c::ggml_silu_f32 +inline static float silu_orig(float x) { + return x/(1.0 + exp(-x)); +} + +#pragma GCC diagnostic pop + +// ggml.c::quantize_row_q4_0_reference +inline static uint8_t round_float(float v0) { return (int8_t)roundf(v0) + 8; } + +// ggml.c::ggml_silu_f32 +inline static float silu_float(float x) { + return x/(1.0f + expf(-x)); +} + +int main(void) { + uint32_t x = UINT32_MAX; + do { + float f = *(float *)&x; + assert(!isfinite(f) || (round_orig(f) == round_float(f))); + } while (x--); + +#ifdef __F16C__ + // GELU and SILU implementations are used with a FP16 lookup table. + // The original and float-only results are not equal for all inputs after converting to FP16. + // GELU is an approximation anyway (tanh), not tested here. + // For SILU, verify that the results are at least the closest floating point numbers, if the FP16 values don't match. + for (x = 0; x <= UINT16_MAX; x++) { + float f = _cvtsh_ss(x); + const float so = silu_orig(f); + const float sf = silu_float(f); + assert( (_cvtss_sh(so, 0) == _cvtss_sh(sf, 0)) + || (nextafterf(so, sf) == sf) + || (nextafterf(sf, so) == so)); + } +#endif +} |