aboutsummaryrefslogtreecommitdiff
path: root/utils.cpp
diff options
context:
space:
mode:
authorGeorgi Gerganov <ggerganov@gmail.com>2023-03-11 10:47:09 +0200
committerGeorgi Gerganov <ggerganov@gmail.com>2023-03-11 11:28:30 +0200
commit007a8f6f459c6eb56678fdee4c09219ddb85b640 (patch)
tree9fc8e018b0acab10f4728f45df7a7527fd0d40cc /utils.cpp
parent5f2f970d51a04b783799bc92fd1d006408269f26 (diff)
Support all LLaMA models + change Q4_0 quantization storage
Diffstat (limited to 'utils.cpp')
-rw-r--r--utils.cpp13
1 files changed, 8 insertions, 5 deletions
diff --git a/utils.cpp b/utils.cpp
index 6bd1fc0..abb3475 100644
--- a/utils.cpp
+++ b/utils.cpp
@@ -448,7 +448,8 @@ gpt_vocab::id llama_sample_top_p(
size_t ggml_quantize_q4_0(float * src, void * dst, int n, int k, int qk, int64_t * hist) {
const int nb = k / qk;
- const size_t row_size = nb*(sizeof(float) + sizeof(uint8_t)*qk/2);
+ const size_t bs = (sizeof(float) + sizeof(uint8_t)*qk/2);
+ const size_t row_size = nb*bs;
assert(k % qk == 0);
@@ -457,8 +458,8 @@ size_t ggml_quantize_q4_0(float * src, void * dst, int n, int k, int qk, int64_t
char * pdst = (char *) dst;
for (int j = 0; j < n; j += k) {
- float * pd = (float *) (pdst + (j/k)*row_size);
- uint8_t * pb = (uint8_t *) (pd + nb);
+ uint8_t * pd = (uint8_t *) (pdst + (j/k)*row_size + 0*bs);
+ uint8_t * pb = (uint8_t *) (pdst + (j/k)*row_size + 0*bs + sizeof(float));
for (int i = 0; i < nb; i++) {
float amax = 0.0f; // absolute max
@@ -472,7 +473,8 @@ size_t ggml_quantize_q4_0(float * src, void * dst, int n, int k, int qk, int64_t
const float d = amax / ((1 << 3) - 1);
const float id = d ? 1.0f/d : 0.0f;
- pd[i] = d;
+ *(float *) pd = d;
+ pd += bs;
for (int l = 0; l < qk; l += 2) {
const float v0 = (src[j + i*qk + l + 0])*id;
@@ -490,7 +492,8 @@ size_t ggml_quantize_q4_0(float * src, void * dst, int n, int k, int qk, int64_t
pp[l/2] = vi0 | (vi1 << 4);
}
- memcpy(pb + i*qk/2, pp, sizeof(pp));
+ memcpy(pb, pp, sizeof(pp));
+ pb += bs;
}
}
}