diff options
author | Howard Su <howard0su@gmail.com> | 2023-04-14 14:24:52 +0800 |
---|---|---|
committer | GitHub <noreply@github.com> | 2023-04-14 09:24:52 +0300 |
commit | c5d70f5c9ea5a8f0f6b0d6aa741455978a1dabfd (patch) | |
tree | bb2a51f4e2ced77b303673d903249215fd5ad69e | |
parent | be87b6ed20a5f7528bf491a83e759a9fc6a24fea (diff) |
ggml : optimize rope function to avoid call powf in the tight loop (#807)
-rw-r--r-- | ggml.c | 22 |
1 files changed, 12 insertions, 10 deletions
@@ -7507,19 +7507,20 @@ static void ggml_compute_forward_rope_f32( // row index used to determine which thread to use int ir = 0; + const float theta_scale = powf(10000.0, ((float)-2)/n_dims); + for (int64_t i3 = 0; i3 < ne3; i3++) { for (int64_t i2 = (mode == 0 ? 0 : n_past); i2 < ne2; i2++) { const int p = (mode == 0 ? n_past + i2 : i2); for (int64_t i1 = 0; i1 < ne1; i1++) { if (ir++ < ir0) continue; if (ir > ir1) break; - + float theta = (float)p; for (int i0 = 0; i0 < n_dims; i0 += 2) { - const float theta = powf(10000.0, ((float)-i0)/n_dims); - - const float cos_theta = cosf(p*theta); - const float sin_theta = sinf(p*theta); + const float cos_theta = cosf(theta); + const float sin_theta = sinf(theta); + theta *= theta_scale; const float * const src = (float *)((char *) src0->data + i3*nb3 + i2*nb2 + i1*nb1 + i0*nb0); float * dst_data = (float *)((char *) dst->data + i3*nb3 + i2*nb2 + i1*nb1 + i0*nb0); @@ -7580,19 +7581,20 @@ static void ggml_compute_forward_rope_f16( // row index used to determine which thread to use int ir = 0; + const float theta_scale = powf(10000.0, ((float)-2)/n_dims); + for (int64_t i3 = 0; i3 < ne3; i3++) { for (int64_t i2 = (mode == 0 ? 0 : n_past); i2 < ne2; i2++) { const int p = (mode == 0 ? n_past + i2 : i2); for (int64_t i1 = 0; i1 < ne1; i1++) { if (ir++ < ir0) continue; if (ir > ir1) break; - + float theta = (float)p; for (int i0 = 0; i0 < n_dims; i0 += 2) { - const float theta = powf(10000.0, ((float)-i0)/n_dims); - - const float cos_theta = cosf(p*theta); - const float sin_theta = sinf(p*theta); + const float cos_theta = cosf(theta); + const float sin_theta = sinf(theta); + theta *= theta_scale; const ggml_fp16_t * const src = (ggml_fp16_t *)((char *) src0->data + i3*nb3 + i2*nb2 + i1*nb1 + i0*nb0); ggml_fp16_t * dst_data = (ggml_fp16_t *)((char *) dst->data + i3*nb3 + i2*nb2 + i1*nb1 + i0*nb0); |