diff options
author | Judd <foldl@users.noreply.github.com> | 2023-07-07 00:23:49 +0800 |
---|---|---|
committer | GitHub <noreply@github.com> | 2023-07-06 19:23:49 +0300 |
commit | 36680f6e40e4440c3ec3385d0b7e5ca8bb6c37f7 (patch) | |
tree | 1878d52dbd76aae450e044208208ff9335ec2e56 /convert.py | |
parent | a17a2683d8fdb899ba497d0c28ccafb28c62efb6 (diff) |
convert : update for baichuan (#2081)
1. guess n_layers;
2. relax warnings on context size;
3. add a note that its derivations are also supported.
Co-authored-by: Judd <foldl@boxvest.com>
Diffstat (limited to 'convert.py')
-rw-r--r-- | convert.py | 6 |
1 files changed, 6 insertions, 0 deletions
@@ -154,9 +154,15 @@ class Params: # try transformer naming first if "model.layers.0.self_attn.q_proj.weight" in model: n_layer=next(i for i in itertools.count() if f"model.layers.{i}.self_attn.q_proj.weight" not in model) + elif "model.layers.0.self_attn.W_pack.weight" in model: # next: try baichuan naming + n_layer=next(i for i in itertools.count() if f"model.layers.{i}.self_attn.W_pack.weight" not in model) else: n_layer=next(i for i in itertools.count() if f"layers.{i}.attention.wq.weight" not in model) + if n_layer < 1: + raise Exception("failed to guess 'n_layer'. This model is unknown or unsupported.\n" + "Suggestion: provide 'config.json' of the model in the same directory containing model files.") + n_head=n_embd // 128 # guessed return Params( |