aboutsummaryrefslogtreecommitdiff
path: root/convert.py
diff options
context:
space:
mode:
authorJudd <foldl@users.noreply.github.com>2023-07-07 00:23:49 +0800
committerGitHub <noreply@github.com>2023-07-06 19:23:49 +0300
commit36680f6e40e4440c3ec3385d0b7e5ca8bb6c37f7 (patch)
tree1878d52dbd76aae450e044208208ff9335ec2e56 /convert.py
parenta17a2683d8fdb899ba497d0c28ccafb28c62efb6 (diff)
convert : update for baichuan (#2081)
1. guess n_layers; 2. relax warnings on context size; 3. add a note that its derivations are also supported. Co-authored-by: Judd <foldl@boxvest.com>
Diffstat (limited to 'convert.py')
-rw-r--r--convert.py6
1 files changed, 6 insertions, 0 deletions
diff --git a/convert.py b/convert.py
index 1426927..66509b9 100644
--- a/convert.py
+++ b/convert.py
@@ -154,9 +154,15 @@ class Params:
# try transformer naming first
if "model.layers.0.self_attn.q_proj.weight" in model:
n_layer=next(i for i in itertools.count() if f"model.layers.{i}.self_attn.q_proj.weight" not in model)
+ elif "model.layers.0.self_attn.W_pack.weight" in model: # next: try baichuan naming
+ n_layer=next(i for i in itertools.count() if f"model.layers.{i}.self_attn.W_pack.weight" not in model)
else:
n_layer=next(i for i in itertools.count() if f"layers.{i}.attention.wq.weight" not in model)
+ if n_layer < 1:
+ raise Exception("failed to guess 'n_layer'. This model is unknown or unsupported.\n"
+ "Suggestion: provide 'config.json' of the model in the same directory containing model files.")
+
n_head=n_embd // 128 # guessed
return Params(