ggml : load data into int8x16x4_t using vld4q_s8 on arm64 (#1738)

author: le.chang <cljs118@126.com> 2023-06-09 00:47:56 +0800
committer: GitHub <noreply@github.com> 2023-06-08 19:47:56 +0300
commit: 8432d4d9f716b25133e3ed671d91e21f6f3be867 (patch)
tree: 1cb67f7e93004e9464425ad312015208093edd66 /k_quants.c
parent: 0f291e1f65c1d68201e71ce99c89562a36686b6d (diff)
1 files changed, 6 insertions, 6 deletions
diff --git a/k_quants.c b/k_quants.c
index 4d52449..b3d6dc7 100644
--- a/k_quants.c
+++ b/k_quants.c
@@ -1259,8 +1259,8 @@ void ggml_vec_dot_q3_K_q8_K(const int n, float * restrict s, const void * restri
         for (int j = 0; j < QK_K/128; ++j) {
 
             const uint8x16x2_t q3bits = vld1q_u8_x2(q3); q3 += 32;
-            const int8x16x4_t q8bytes_1 = vld1q_s8_x4(q8); q8 += 64;
-            const int8x16x4_t q8bytes_2 = vld1q_s8_x4(q8); q8 += 64;
+            const int8x16x4_t q8bytes_1 = vld4q_s8(q8); q8 += 64;
+            const int8x16x4_t q8bytes_2 = vld4q_s8(q8); q8 += 64;
 
             q3h.val[0] = vshlq_n_u8(vbicq_u8(m0, qhbits.val[0]), 2);
             q3h.val[1] = vshlq_n_u8(vbicq_u8(m0, qhbits.val[1]), 2);
@@ -1788,7 +1788,7 @@ void ggml_vec_dot_q5_K_q8_K(const int n, float * restrict s, const void * restri
         for (int j = 0; j < QK_K/64; ++j) {
 
             const uint8x16x2_t q5bits = vld1q_u8_x2(q5); q5 += 32;
-            const int8x16x4_t q8bytes = vld1q_s8_x4(q8); q8 += 64;
+            const int8x16x4_t q8bytes = vld4q_s8(q8); q8 += 64;
 
             q5h.val[0] = vshlq_n_u8(vandq_u8(mone, qhbits.val[0]), 4);
             q5h.val[1] = vshlq_n_u8(vandq_u8(mone, qhbits.val[1]), 4);
@@ -2020,8 +2020,8 @@ void ggml_vec_dot_q6_K_q8_K(const int n, float * restrict s, const void * restri
         for (int j = 0; j < QK_K/128; ++j) {
 
             uint8x16x2_t qhbits = vld1q_u8_x2(qh); qh += 32;
-            uint8x16x4_t q6bits = vld1q_u8_x4(q6); q6 += 64;
-            int8x16x4_t q8bytes = vld1q_s8_x4(q8); q8 += 64;
+            uint8x16x4_t q6bits = vld4q_u8(q6); q6 += 64;
+            int8x16x4_t q8bytes = vld4q_s8(q8); q8 += 64;
 
             q6h.val[0] = vshlq_n_u8(vandq_u8(mone, qhbits.val[0]), 4);
             q6h.val[1] = vshlq_n_u8(vandq_u8(mone, qhbits.val[1]), 4);
@@ -2064,7 +2064,7 @@ void ggml_vec_dot_q6_K_q8_K(const int n, float * restrict s, const void * restri
             scale += 2;
 #endif
 
-            q8bytes = vld1q_s8_x4(q8); q8 += 64;
+            q8bytes = vld4q_s8(q8); q8 += 64;
 
             shifted = vshrq_n_u8(qhbits.val[0], 4);
             q6h.val[0] = vshlq_n_u8(vandq_u8(mone, shifted), 4);
author	le.chang <cljs118@126.com>	2023-06-09 00:47:56 +0800
committer	GitHub <noreply@github.com>	2023-06-08 19:47:56 +0300
commit	8432d4d9f716b25133e3ed671d91e21f6f3be867 (patch)
tree	1cb67f7e93004e9464425ad312015208093edd66 /k_quants.c
parent	0f291e1f65c1d68201e71ce99c89562a36686b6d (diff)