llama : fix comment for "output.weight" tensor

author: Georgi Gerganov <ggerganov@gmail.com> 2023-04-21 10:23:36 +0300
committer: Georgi Gerganov <ggerganov@gmail.com> 2023-04-21 10:24:02 +0300
commit: d40fded93e1a533e969768e1e335c15c61c296ce (patch)
tree: f2a3e8e145196a1521f3031f7165d92a7069b8fc
parent: 2510c1831fac874f32e272f6079f01b5461f3986 (diff)
1 files changed, 2 insertions, 2 deletions
diff --git a/llama.cpp b/llama.cpp
index 4a646eb..33ee4fb 100644
--- a/llama.cpp
+++ b/llama.cpp
@@ -1618,8 +1618,8 @@ static void llama_model_quantize_internal(const std::string & fname_inp, const s
         // quantize only 2D tensors
         quantize &= (tensor.ne.size() == 2);
 
-        // GG: uncomment this to keep the output layer in FP16
-        //if (tensor.name.rfind("output")) {
+        // uncomment this to keep the output layer in FP16
+        //if (tensor.name == "output.weight") {
         //    quantize = false;
         //}
author	Georgi Gerganov <ggerganov@gmail.com>	2023-04-21 10:23:36 +0300
committer	Georgi Gerganov <ggerganov@gmail.com>	2023-04-21 10:24:02 +0300
commit	d40fded93e1a533e969768e1e335c15c61c296ce (patch)
tree	f2a3e8e145196a1521f3031f7165d92a7069b8fc
parent	2510c1831fac874f32e272f6079f01b5461f3986 (diff)