aboutsummaryrefslogtreecommitdiff
path: root/examples
diff options
context:
space:
mode:
authorDavid Yang <davidyang6us@gmail.com>2023-06-27 03:45:32 +0800
committerGitHub <noreply@github.com>2023-06-26 22:45:32 +0300
commiteaa6ca5a61b8c9501df9ebe3d264f45b75a5f8aa (patch)
tree8850a005978b91d9393bfd60aed7f19d3d97ce6f /examples
parentaa777abbb73655c4e1e9237b7c0ad66745e8e48c (diff)
ggml : increase max tensor name + clean up compiler warnings in train-text (#1988)
* Clean up compiler warnings in train-text Some brackets to disambiguate order of operations * Increase GGML_MAX_NAME Avoiding strncpy danger in train-text-from-scratch and reducing potential future name length issues
Diffstat (limited to 'examples')
-rw-r--r--examples/train-text-from-scratch/train-text-from-scratch.cpp23
1 files changed, 6 insertions, 17 deletions
diff --git a/examples/train-text-from-scratch/train-text-from-scratch.cpp b/examples/train-text-from-scratch/train-text-from-scratch.cpp
index 61c829e..5c6fd57 100644
--- a/examples/train-text-from-scratch/train-text-from-scratch.cpp
+++ b/examples/train-text-from-scratch/train-text-from-scratch.cpp
@@ -294,20 +294,9 @@ void init_model(struct my_llama_model * model) {
ggml_set_name(layer.ffn_norm, (layers_i + ".ffn_norm.weight").c_str());
- // 'layers.10.feed_forward.w1.weight' has length of 32.
- // ggml_tensor->name only has 32 characters, but we need one more for the '\0' terminator.
- // ggml_set_name will set the last character to '\0', so we can only store 'layers.10.feed_forward.w1.weigh'.
- // when saving llama compatible model the tensors names will miss a character.
- // ggml_set_name(layer.w1, (layers_i + ".feed_forward.w1.weight").c_str());
- // ggml_set_name(layer.w2, (layers_i + ".feed_forward.w2.weight").c_str());
- // ggml_set_name(layer.w3, (layers_i + ".feed_forward.w3.weight").c_str());
-
- strncpy(layer.w1->name, (layers_i + ".feed_forward.w1.weight").c_str(), sizeof(layer.w1->name));
- strncpy(layer.w2->name, (layers_i + ".feed_forward.w2.weight").c_str(), sizeof(layer.w2->name));
- strncpy(layer.w3->name, (layers_i + ".feed_forward.w3.weight").c_str(), sizeof(layer.w3->name));
- layer.w1->padding[0] = 0;
- layer.w2->padding[0] = 0;
- layer.w3->padding[0] = 0;
+ ggml_format_name(layer.w1, "%s.feed_forward.w1.weight", layers_i.c_str());
+ ggml_format_name(layer.w2, "%s.feed_forward.w2.weight", layers_i.c_str());
+ ggml_format_name(layer.w3, "%s.feed_forward.w3.weight", layers_i.c_str());
}
}
@@ -2368,7 +2357,7 @@ void write_tensor(struct llama_file * file, struct ggml_tensor * tensor) {
file->write_u32(0);
file->write_u32(0);
file->write_u32(GGML_TYPE_F32);
- file->seek(0-file->tell() & 31, SEEK_CUR);
+ file->seek((0-file->tell()) & 31, SEEK_CUR);
return;
}
const char * name = ggml_get_name(tensor);
@@ -2383,7 +2372,7 @@ void write_tensor(struct llama_file * file, struct ggml_tensor * tensor) {
file->write_u32(tensor->type);
file->write_raw(ne, sizeof(ne[0]) * nd);
file->write_raw(name, name_len);
- file->seek(0-file->tell() & 31, SEEK_CUR);
+ file->seek((0-file->tell()) & 31, SEEK_CUR);
file->write_raw(tensor->data, ggml_nbytes(tensor));
}
@@ -2404,7 +2393,7 @@ void read_tensor(struct llama_file * file, struct ggml_tensor * tensor) {
std::string name = file->read_string(name_len);
GGML_ASSERT(strncmp(ggml_get_name(tensor), name.c_str(), sizeof(tensor->name)-1) == 0);
- file->seek(0-file->tell() & 31, SEEK_CUR);
+ file->seek((0-file->tell()) & 31, SEEK_CUR);
file->read_raw(tensor->data, ggml_nbytes(tensor));
}