diff options
| author | Stephan Walter <stephan@walter.name> | 2023-04-13 14:59:50 +0000 | 
|---|---|---|
| committer | GitHub <noreply@github.com> | 2023-04-13 17:59:50 +0300 | 
| commit | 6232f2d7fd7a22d5eeb62182b2f21fcf01359754 (patch) | |
| tree | 431ada6d84ca8db32acee8a54ec0a888899489e2 | |
| parent | 6c248707f51c8a50f7792e7f7787ec481881db88 (diff) | |
ggml : optimize non-SIMD Q4_0 vector dot product (#703)
| -rw-r--r-- | ggml.c | 12 | 
1 files changed, 7 insertions, 5 deletions
@@ -2160,18 +2160,20 @@ static void ggml_vec_dot_q4_0(const int n, float * restrict s, const void * rest          const uint8_t * restrict p0 = x[i].qs;          const uint8_t * restrict p1 = y[i].qs; +        int sumi = 0;          for (int j = 0; j < QK/2; j++) {              const uint8_t v0 = p0[j];              const uint8_t v1 = p1[j]; -            const float f0 = d0*((int8_t) (v0 & 0xf) - 8); -            const float f1 = d0*((int8_t) (v0 >> 4)  - 8); +            const int8_t i0 = (int8_t) (v0 & 0xf) - 8; +            const int8_t i1 = (int8_t) (v0 >> 4)  - 8; -            const float f2 = d1*((int8_t) (v1 & 0xf) - 8); -            const float f3 = d1*((int8_t) (v1 >> 4)  - 8); +            const int8_t i2 = (int8_t) (v1 & 0xf) - 8; +            const int8_t i3 = (int8_t) (v1 >> 4)  - 8; -            sumf += f0*f2 + f1*f3; +            sumi += i0*i2 + i1*i3;          } +        sumf += d0 * d1 * sumi;      }  #endif  | 
