From 41c674161fb2459bdf7806d1eebead15bc5d046e Mon Sep 17 00:00:00 2001 From: slaren Date: Mon, 24 Jul 2023 17:57:12 +0200 Subject: make rms_norm_eps a parameter (#2374) * make rms_norm_eps a parameter * add rms_norm_eps to command line * fix baby llama, test-grad0 * use scientific notation for eps param in the help ggml-ci --- examples/baby-llama/baby-llama.cpp | 20 +++++++++++--------- 1 file changed, 11 insertions(+), 9 deletions(-) (limited to 'examples/baby-llama') diff --git a/examples/baby-llama/baby-llama.cpp b/examples/baby-llama/baby-llama.cpp index 4965881..f9dc0aa 100644 --- a/examples/baby-llama/baby-llama.cpp +++ b/examples/baby-llama/baby-llama.cpp @@ -8,6 +8,8 @@ #pragma warning(disable: 4244 4267) // possible loss of data #endif +static const float rms_norm_eps = 1e-6f; + float frand() { return (float)rand()/(float)RAND_MAX; } @@ -562,7 +564,7 @@ struct ggml_tensor * forward( // norm { // cur shape [n_embd,N,1,1] - cur = ggml_rms_norm(ctx0, inpL); + cur = ggml_rms_norm(ctx0, inpL, rms_norm_eps); // cur = attention_norm*cur cur = ggml_mul(ctx0, @@ -685,7 +687,7 @@ struct ggml_tensor * forward( // norm { // cur shape [n_embd,N,1,1] - cur = ggml_rms_norm(ctx0, inpFF); + cur = ggml_rms_norm(ctx0, inpFF, rms_norm_eps); // cur = ffn_norm*cur // cur shape [n_embd,N,1,1] @@ -729,7 +731,7 @@ struct ggml_tensor * forward( { // inpL shape [n_embd,N,1,1] - inpL = ggml_rms_norm(ctx0, inpL); + inpL = ggml_rms_norm(ctx0, inpL, rms_norm_eps); // inpL = norm*inpL // inpL shape [n_embd,N,1,1] @@ -817,7 +819,7 @@ struct ggml_tensor * forward_batch( // norm { // cur shape [n_embd,N*n_batch,1,1] - cur = ggml_rms_norm(ctx0, inpL); + cur = ggml_rms_norm(ctx0, inpL, rms_norm_eps); assert_shape_2d(cur, n_embd, N*n_batch); // cur = attention_norm*cur @@ -981,7 +983,7 @@ struct ggml_tensor * forward_batch( // norm { // cur shape [n_embd,N*n_batch,1,1] - cur = ggml_rms_norm(ctx0, inpFF); + cur = ggml_rms_norm(ctx0, inpFF, rms_norm_eps); assert_shape_2d(cur, n_embd, N*n_batch); // cur = ffn_norm*cur @@ -1034,7 +1036,7 @@ struct ggml_tensor * forward_batch( { // inpL shape [n_embd,N*n_batch,1,1] - inpL = ggml_rms_norm(ctx0, inpL); + inpL = ggml_rms_norm(ctx0, inpL, rms_norm_eps); assert_shape_2d(inpL, n_embd, N*n_batch); // inpL = norm*inpL @@ -1104,7 +1106,7 @@ struct ggml_tensor * forward_lora( // norm { // cur shape [n_embd,N,1,1] - cur = ggml_rms_norm(ctx0, inpL); + cur = ggml_rms_norm(ctx0, inpL, rms_norm_eps); // cur = attention_norm*cur cur = ggml_mul(ctx0, @@ -1251,7 +1253,7 @@ struct ggml_tensor * forward_lora( // norm { // cur shape [n_embd,N,1,1] - cur = ggml_rms_norm(ctx0, inpFF); + cur = ggml_rms_norm(ctx0, inpFF, rms_norm_eps); // cur = ffn_norm*cur // cur shape [n_embd,N,1,1] @@ -1295,7 +1297,7 @@ struct ggml_tensor * forward_lora( { // inpL shape [n_embd,N,1,1] - inpL = ggml_rms_norm(ctx0, inpL); + inpL = ggml_rms_norm(ctx0, inpL, rms_norm_eps); // inpL = norm*inpL // inpL shape [n_embd,N,1,1] -- cgit v1.2.3