diff options
author | Stephan Walter <stephan@walter.name> | 2023-03-22 17:29:06 +0000 |
---|---|---|
committer | GitHub <noreply@github.com> | 2023-03-22 19:29:06 +0200 |
commit | 69c92298a9e36dc2363b3bf50452976ce49487b3 (patch) | |
tree | 685e3fc7582eab3173a87d6efd94b3f92070839d /tests | |
parent | 97940520e8fd49c56bb29b71cc350190b723513f (diff) |
Deduplicate q4 quantization functions (#383)
* Deduplicate q4 quantization functions
* Use const; add basic test
* Re-enable quantization test
* Disable AVX2 flags in CI
---------
Co-authored-by: Georgi Gerganov <ggerganov@gmail.com>
Diffstat (limited to 'tests')
-rw-r--r-- | tests/CMakeLists.txt | 13 | ||||
-rw-r--r-- | tests/test-quantize.c | 42 |
2 files changed, 51 insertions, 4 deletions
diff --git a/tests/CMakeLists.txt b/tests/CMakeLists.txt index 4990c34..6a4170f 100644 --- a/tests/CMakeLists.txt +++ b/tests/CMakeLists.txt @@ -1,4 +1,9 @@ -set(TEST_TARGET test-tokenizer-0) -add_executable(${TEST_TARGET} ${TEST_TARGET}.cpp) -target_link_libraries(${TEST_TARGET} PRIVATE llama ggml utils) -add_test(NAME ${TEST_TARGET} COMMAND $<TARGET_FILE:${TEST_TARGET}> ${CMAKE_CURRENT_SOURCE_DIR}/../models/ggml-vocab.bin) +function(llama_add_test source) + get_filename_component(TEST_TARGET ${source} NAME_WE) + add_executable(${TEST_TARGET} ${source}) + target_link_libraries(${TEST_TARGET} PRIVATE llama ggml utils) + add_test(NAME ${TEST_TARGET} COMMAND $<TARGET_FILE:${TEST_TARGET}> ${ARGN}) +endfunction() + +llama_add_test(test-quantize.c) +llama_add_test(test-tokenizer-0.cpp ${CMAKE_CURRENT_SOURCE_DIR}/../models/ggml-vocab.bin) diff --git a/tests/test-quantize.c b/tests/test-quantize.c new file mode 100644 index 0000000..d59ecb8 --- /dev/null +++ b/tests/test-quantize.c @@ -0,0 +1,42 @@ +#include "ggml.h" +#undef NDEBUG +#include <assert.h> +#include <math.h> + +int main(void) { + #define QK 32 + float src[QK]; + uint8_t dst[24]; + int64_t hist[16]; + + for (int i = 0; i < QK; i++) { + src[i] = (float)(i + 1); + } + + size_t size = ggml_quantize_q4_0(src, dst, QK, QK, QK, hist); + assert(size == 20); + float max_result = ((float *)dst)[0]; + float max_expected = src[31] / ((1 << 3) - 1); + assert(max_result == max_expected); + for (int i = 0; i < QK; i++) { + uint8_t q4_result = (i % 2) ? (dst[sizeof(float) + i/2] >> 4) : (dst[sizeof(float) + i/2] & 0xF); + uint8_t q4_expected = roundf(src[i] / max_expected) + 8; + assert(q4_result == q4_expected); + } + + size = ggml_quantize_q4_1(src, dst, QK, QK, QK, hist); + assert(size == 24); + float delta_result = ((float *)dst)[0]; + float delta_expected = (src[31] - src[0]) / ((1 << 4) - 1); + assert(delta_result == delta_expected); + float min_result = ((float *)dst)[1]; + float min_expected = src[0]; + assert(min_result == min_expected); + for (int i = 0; i < QK; i++) { + uint8_t q4_result = (i % 2) ? (dst[sizeof(float)*2 + i/2] >> 4) : (dst[sizeof(float)*2 + i/2] & 0xF); + uint8_t q4_expected = roundf((src[i] - min_expected) / delta_expected); + assert(q4_result == q4_expected); + } + + return 0; +} |