aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorQingyou Meng <meng.qingyou@gmail.com>2023-03-21 03:33:10 +0800
committerGitHub <noreply@github.com>2023-03-20 19:33:10 +0000
commit6b6d5b5024faaf82019d08cde5e8a9d69c6ca316 (patch)
tree7dcf3ce431ea8982a7a6c33e355a48de98d462b8
parenta791a68b613b162c88a83f5f0225223bc167c762 (diff)
Fixed tokenizer.model not found error when model dir is symlink (#325)
-rw-r--r--convert-pth-to-ggml.py9
1 files changed, 8 insertions, 1 deletions
diff --git a/convert-pth-to-ggml.py b/convert-pth-to-ggml.py
index 42f5377..108eb1f 100644
--- a/convert-pth-to-ggml.py
+++ b/convert-pth-to-ggml.py
@@ -17,6 +17,7 @@
# and vocabulary.
#
import argparse
+import os
import sys
import json
import struct
@@ -44,8 +45,14 @@ def get_n_parts(dim):
def load_hparams_and_tokenizer(dir_model):
+ # `dir_model` is something like `models/7B` or `models/7B/`.
+ # "tokenizer.model" is expected under model's parent dir.
+ # When `dir_model` is a symlink, f"{dir_model}/../tokenizer.model" would not be found.
+ # Let's use the model's parent dir directly.
+ model_parent_dir = os.path.dirname(os.path.normpath(dir_model))
+
fname_hparams = f"{dir_model}/params.json"
- fname_tokenizer = f"{dir_model}/../tokenizer.model"
+ fname_tokenizer = f"{model_parent_dir}/tokenizer.model"
with open(fname_hparams, "r") as f:
hparams = json.load(f)