aboutsummaryrefslogtreecommitdiff
path: root/examples/baby-llama/baby-llama.cpp
diff options
context:
space:
mode:
Diffstat (limited to 'examples/baby-llama/baby-llama.cpp')
-rw-r--r--examples/baby-llama/baby-llama.cpp24
1 files changed, 15 insertions, 9 deletions
diff --git a/examples/baby-llama/baby-llama.cpp b/examples/baby-llama/baby-llama.cpp
index 4965881..6fa55b3 100644
--- a/examples/baby-llama/baby-llama.cpp
+++ b/examples/baby-llama/baby-llama.cpp
@@ -8,6 +8,12 @@
#pragma warning(disable: 4244 4267) // possible loss of data
#endif
+#ifdef LLAMA_DEFAULT_RMS_EPS
+static const float rms_norm_eps = LLAMA_DEFAULT_RMS_EPS;
+#else
+static const float rms_norm_eps = 5e-6f;
+#endif
+
float frand() {
return (float)rand()/(float)RAND_MAX;
}
@@ -562,7 +568,7 @@ struct ggml_tensor * forward(
// norm
{
// cur shape [n_embd,N,1,1]
- cur = ggml_rms_norm(ctx0, inpL);
+ cur = ggml_rms_norm(ctx0, inpL, rms_norm_eps);
// cur = attention_norm*cur
cur = ggml_mul(ctx0,
@@ -685,7 +691,7 @@ struct ggml_tensor * forward(
// norm
{
// cur shape [n_embd,N,1,1]
- cur = ggml_rms_norm(ctx0, inpFF);
+ cur = ggml_rms_norm(ctx0, inpFF, rms_norm_eps);
// cur = ffn_norm*cur
// cur shape [n_embd,N,1,1]
@@ -729,7 +735,7 @@ struct ggml_tensor * forward(
{
// inpL shape [n_embd,N,1,1]
- inpL = ggml_rms_norm(ctx0, inpL);
+ inpL = ggml_rms_norm(ctx0, inpL, rms_norm_eps);
// inpL = norm*inpL
// inpL shape [n_embd,N,1,1]
@@ -817,7 +823,7 @@ struct ggml_tensor * forward_batch(
// norm
{
// cur shape [n_embd,N*n_batch,1,1]
- cur = ggml_rms_norm(ctx0, inpL);
+ cur = ggml_rms_norm(ctx0, inpL, rms_norm_eps);
assert_shape_2d(cur, n_embd, N*n_batch);
// cur = attention_norm*cur
@@ -981,7 +987,7 @@ struct ggml_tensor * forward_batch(
// norm
{
// cur shape [n_embd,N*n_batch,1,1]
- cur = ggml_rms_norm(ctx0, inpFF);
+ cur = ggml_rms_norm(ctx0, inpFF, rms_norm_eps);
assert_shape_2d(cur, n_embd, N*n_batch);
// cur = ffn_norm*cur
@@ -1034,7 +1040,7 @@ struct ggml_tensor * forward_batch(
{
// inpL shape [n_embd,N*n_batch,1,1]
- inpL = ggml_rms_norm(ctx0, inpL);
+ inpL = ggml_rms_norm(ctx0, inpL, rms_norm_eps);
assert_shape_2d(inpL, n_embd, N*n_batch);
// inpL = norm*inpL
@@ -1104,7 +1110,7 @@ struct ggml_tensor * forward_lora(
// norm
{
// cur shape [n_embd,N,1,1]
- cur = ggml_rms_norm(ctx0, inpL);
+ cur = ggml_rms_norm(ctx0, inpL, rms_norm_eps);
// cur = attention_norm*cur
cur = ggml_mul(ctx0,
@@ -1251,7 +1257,7 @@ struct ggml_tensor * forward_lora(
// norm
{
// cur shape [n_embd,N,1,1]
- cur = ggml_rms_norm(ctx0, inpFF);
+ cur = ggml_rms_norm(ctx0, inpFF, rms_norm_eps);
// cur = ffn_norm*cur
// cur shape [n_embd,N,1,1]
@@ -1295,7 +1301,7 @@ struct ggml_tensor * forward_lora(
{
// inpL shape [n_embd,N,1,1]
- inpL = ggml_rms_norm(ctx0, inpL);
+ inpL = ggml_rms_norm(ctx0, inpL, rms_norm_eps);
// inpL = norm*inpL
// inpL shape [n_embd,N,1,1]