aboutsummaryrefslogtreecommitdiff
path: root/tests
diff options
context:
space:
mode:
authorkatsu560 <118887472+katsu560@users.noreply.github.com>2023-06-27 01:47:02 +0900
committerGitHub <noreply@github.com>2023-06-26 19:47:02 +0300
commita84ab1da8dc6a59a5b67420ae1322f09503ffc72 (patch)
treef4678c05c35cd67c322ff5bff6d5d4ef3c0dae95 /tests
parent5743ca80928d8410754ec64a5673d5c2dd6cfbb7 (diff)
tests : fix quantize perf (#1990)
* fix test quantize perf * avoid the global state
Diffstat (limited to 'tests')
-rw-r--r--tests/test-quantize-perf.cpp71
1 files changed, 59 insertions, 12 deletions
diff --git a/tests/test-quantize-perf.cpp b/tests/test-quantize-perf.cpp
index 6003757..c0e361e 100644
--- a/tests/test-quantize-perf.cpp
+++ b/tests/test-quantize-perf.cpp
@@ -21,6 +21,7 @@
#define QK 32
#define WARMUP 5
#define ITERATIONS 10
+#define MAX_ITERATIONS 100000000
#define L1_SIZE 32*128
#define L2_SIZE 32*2048
@@ -36,9 +37,9 @@ struct quantize_perf_params {
bool op_dequantize_row_q = false;
bool op_quantize_row_q_dot = false;
bool op_vec_dot_q = false;
+ int64_t iterations = ITERATIONS;
};
-
#if defined(__x86_64__) || defined(__i386__)
#include <x86intrin.h>
@@ -75,7 +76,7 @@ void * align_with_offset(void * ptr, int offset) {
return (char *) std::align(MAX_ALIGNMENT, MAX_ALIGNMENT, ptr, dummy_size) + offset;
}
-void benchmark_function(size_t size, size_t q_size, std::function<size_t(void)> function) {
+void benchmark_function(size_t size, size_t q_size, int64_t iterations, std::function<size_t(void)> function) {
int64_t min_time_us = INT64_MAX;
int64_t total_time_us = 0;
int64_t min_time_cycles = INT64_MAX;
@@ -86,7 +87,7 @@ void benchmark_function(size_t size, size_t q_size, std::function<size_t(void)>
}
- for (int i = 0; i < ITERATIONS; i++) {
+ for (int i = 0; i < iterations; i++) {
const int64_t start_time = ggml_time_us();
const int64_t start_cycles = cpu_cycles();
@@ -102,9 +103,38 @@ void benchmark_function(size_t size, size_t q_size, std::function<size_t(void)>
}
printf(" min cycles/%d vals : %9.2f\n", QK, QK * min_time_cycles / (float) size);
- printf(" avg cycles/%d vals : %9.2f\n", QK, QK * total_time_cycles / (float) (size * ITERATIONS));
- printf(" float32 throughput : %9.2f GB/s\n", gigabytes_per_second(4 * size * ITERATIONS, total_time_us));
- printf(" quantized throughput : %9.2f GB/s\n", gigabytes_per_second(q_size * ITERATIONS, total_time_us));
+ printf(" avg cycles/%d vals : %9.2f\n", QK, QK * total_time_cycles / (float) (size * iterations));
+ printf(" float32 throughput : %9.2f GB/s\n", gigabytes_per_second(4 * size * iterations, total_time_us));
+ printf(" quantized throughput : %9.2f GB/s\n", gigabytes_per_second(q_size * iterations, total_time_us));
+}
+
+void usage(char * argv[]) {
+ printf("Benchmark quantization specific functions on synthetic data\n");
+ printf("\n");
+ printf("usage: %s [options]\n", argv[0]);
+ printf("\n");
+ printf("options: (default)\n");
+ printf(" -h, --help show this help message and exit\n");
+ printf(" --size SIZE set test size, divisible by 32 (L1_SIZE:%d)\n", L1_SIZE);
+ printf(" -3 use size as L1, L2, L3 sizes (L1:%d L2:%d L3:%d)\n", L1_SIZE, L2_SIZE, L3_SIZE);
+ printf(" -4 use size as L1, L2, L3, MEM sizes (L1:%d L2:%d L3:%d MEM:%d)\n", L1_SIZE, L2_SIZE, L3_SIZE, MEM_SIZE);
+ printf(" --op OP set test opration as quantize_row_q_reference, quantize_row_q, dequantize_row_q,\n");
+ printf(" quantize_row_q_dot, vec_dot_q (all)\n");
+ printf(" --type TYPE set test type as");
+ for (int i = 0; i < GGML_TYPE_COUNT; i++) {
+ ggml_type type = (ggml_type) i;
+ quantize_fns_t qfns = ggml_internal_get_quantize_fn(type);
+ if (ggml_type_name(type) != NULL) {
+ if (qfns.quantize_row_q && qfns.dequantize_row_q) {
+ printf(" %s", ggml_type_name(type));
+ }
+ }
+ }
+ printf(" (all)\n");
+ printf(" --alignment-offset OFFSET\n");
+ printf(" set alignment offset as OFFSET (0)\n");
+ printf(" -i NUM, --iterations NUM\n");
+ printf(" set test iteration number (%d)\n", ITERATIONS);
}
int main(int argc, char * argv[]) {
@@ -178,6 +208,21 @@ int main(int argc, char * argv[]) {
break;
}
params.alignment_offset = alignment;
+ } else if ((arg == "-i") || (arg == "--iterations")) {
+ if (++i >= argc) {
+ invalid_param = true;
+ break;
+ }
+ int number = std::stoi(argv[i]);
+ if (number < 0 || number > MAX_ITERATIONS) {
+ fprintf(stderr, "error: iterations must be less than %d\n", MAX_ITERATIONS);
+ invalid_param = true;
+ break;
+ }
+ params.iterations = number;
+ } else if ((arg == "-h") || (arg == "--help")) {
+ usage(argv);
+ return 1;
} else {
fprintf(stderr, "error: unknown argument: %s\n", arg.c_str());
return 1;
@@ -213,6 +258,8 @@ int main(int argc, char * argv[]) {
generate_data(0, largest, test_data1);
generate_data(1, largest, test_data2);
+ int64_t iterations = params.iterations;
+
// Initialize GGML, ensures float conversion tables are initialized
struct ggml_init_params ggml_params = {
@@ -225,7 +272,7 @@ int main(int argc, char * argv[]) {
for (int i = 0; i < GGML_TYPE_COUNT; i++) {
ggml_type type = (ggml_type) i;
quantize_fns_t qfns = ggml_internal_get_quantize_fn(i);
- if (!params.include_types.empty() && std::find(params.include_types.begin(), params.include_types.end(), ggml_type_name(type)) == params.include_types.end()) {
+ if (!params.include_types.empty() && ggml_type_name(type) && std::find(params.include_types.begin(), params.include_types.end(), ggml_type_name(type)) == params.include_types.end()) {
continue;
}
@@ -241,7 +288,7 @@ int main(int argc, char * argv[]) {
return test_q1[0];
};
size_t quantized_size = size / ggml_blck_size(type) * ggml_type_size(type);
- benchmark_function(size, quantized_size, quantize_fn);
+ benchmark_function(size, quantized_size, iterations, quantize_fn);
}
printf("\n");
}
@@ -255,7 +302,7 @@ int main(int argc, char * argv[]) {
return test_q1[0];
};
size_t quantized_size = size / ggml_blck_size(type) * ggml_type_size(type);
- benchmark_function(size, quantized_size, quantize_fn);
+ benchmark_function(size, quantized_size, iterations, quantize_fn);
}
printf("\n");
}
@@ -270,7 +317,7 @@ int main(int argc, char * argv[]) {
return test_out[0];
};
size_t quantized_size = size / ggml_blck_size(type) * ggml_type_size(type);
- benchmark_function(size, quantized_size, quantize_fn);
+ benchmark_function(size, quantized_size, iterations, quantize_fn);
}
printf("\n");
}
@@ -284,7 +331,7 @@ int main(int argc, char * argv[]) {
return test_q1[0];
};
size_t quantized_size = size / ggml_blck_size(type) * ggml_type_size(type);
- benchmark_function(size, quantized_size, quantize_fn);
+ benchmark_function(size, quantized_size, iterations, quantize_fn);
}
printf("\n");
}
@@ -301,7 +348,7 @@ int main(int argc, char * argv[]) {
return result;
};
size_t quantized_size = size / ggml_blck_size(type) * ggml_type_size(type);
- benchmark_function(size, quantized_size, quantize_fn);
+ benchmark_function(size, quantized_size, iterations, quantize_fn);
}
printf("\n");
}