diff options
author | Georgi Gerganov <ggerganov@gmail.com> | 2023-04-14 13:31:29 +0300 |
---|---|---|
committer | Georgi Gerganov <ggerganov@gmail.com> | 2023-04-14 13:31:29 +0300 |
commit | 1623a6e9b46453bff30afd7d0f6c3fd188499c2f (patch) | |
tree | 0cb98d0748367a38e0c7641910ff687922dfa252 | |
parent | c14e0d2f23e6d1e785255f4da8c253c1b4723659 (diff) |
ggml : minor
-rw-r--r-- | ggml.c | 10 |
1 files changed, 8 insertions, 2 deletions
@@ -7509,7 +7509,7 @@ static void ggml_compute_forward_rope_f32( // row index used to determine which thread to use int ir = 0; - const float theta_scale = powf(10000.0, ((float)-2)/n_dims); + const float theta_scale = powf(10000.0, -2.0f/n_dims); for (int64_t i3 = 0; i3 < ne3; i3++) { for (int64_t i2 = (mode == 0 ? 0 : n_past); i2 < ne2; i2++) { @@ -7517,12 +7517,15 @@ static void ggml_compute_forward_rope_f32( for (int64_t i1 = 0; i1 < ne1; i1++) { if (ir++ < ir0) continue; if (ir > ir1) break; + float theta = (float)p; + for (int i0 = 0; i0 < n_dims; i0 += 2) { const float cos_theta = cosf(theta); const float sin_theta = sinf(theta); theta *= theta_scale; + const float * const src = (float *)((char *) src0->data + i3*nb3 + i2*nb2 + i1*nb1 + i0*nb0); float * dst_data = (float *)((char *) dst->data + i3*nb3 + i2*nb2 + i1*nb1 + i0*nb0); @@ -7583,7 +7586,7 @@ static void ggml_compute_forward_rope_f16( // row index used to determine which thread to use int ir = 0; - const float theta_scale = powf(10000.0, ((float)-2)/n_dims); + const float theta_scale = powf(10000.0, -2.0f/n_dims); for (int64_t i3 = 0; i3 < ne3; i3++) { for (int64_t i2 = (mode == 0 ? 0 : n_past); i2 < ne2; i2++) { @@ -7591,12 +7594,15 @@ static void ggml_compute_forward_rope_f16( for (int64_t i1 = 0; i1 < ne1; i1++) { if (ir++ < ir0) continue; if (ir > ir1) break; + float theta = (float)p; + for (int i0 = 0; i0 < n_dims; i0 += 2) { const float cos_theta = cosf(theta); const float sin_theta = sinf(theta); theta *= theta_scale; + const ggml_fp16_t * const src = (ggml_fp16_t *)((char *) src0->data + i3*nb3 + i2*nb2 + i1*nb1 + i0*nb0); ggml_fp16_t * dst_data = (ggml_fp16_t *)((char *) dst->data + i3*nb3 + i2*nb2 + i1*nb1 + i0*nb0); |