aboutsummaryrefslogtreecommitdiff
path: root/examples/main/main.cpp
diff options
context:
space:
mode:
authorGeorgi Gerganov <ggerganov@gmail.com>2023-07-23 15:09:47 +0300
committerGitHub <noreply@github.com>2023-07-23 15:09:47 +0300
commite76d630df17e235e6b9ef416c45996765d2e36fb (patch)
tree15e0e9648f9b0e398b43e888216a73f84098ff3a /examples/main/main.cpp
parent1d0824b2476e7fda09751a0235c9e571b76d6f2c (diff)
llama : grouped-query attention + LLaMAv2 70B support (#2276)
* CUDA: GQA implementation * llama : support for GQA and LLaMAv2 70B ggml-ci * py : fix hparams parsing (if-else blocks) ggml-ci * py : oh boy .. ggml-ci * help : fix gqa value for 70B ggml-ci --------- Co-authored-by: JohannesGaessler <johannesg@5d6.de>
Diffstat (limited to 'examples/main/main.cpp')
-rw-r--r--examples/main/main.cpp4
1 files changed, 2 insertions, 2 deletions
diff --git a/examples/main/main.cpp b/examples/main/main.cpp
index 4b4cd1d..3bd8ba2 100644
--- a/examples/main/main.cpp
+++ b/examples/main/main.cpp
@@ -93,8 +93,8 @@ int main(int argc, char ** argv) {
}
if (params.n_ctx > 2048) {
- fprintf(stderr, "%s: warning: base model only supports context sizes no greater than 2048 tokens (%d specified);"
- " you are on your own\n", __func__, params.n_ctx);
+ // TODO: determine the actual max context of the model (e.g. 4096 for LLaMA v2) and use that instead of 2048
+ fprintf(stderr, "%s: warning: base model only supports context sizes no greater than 2048 tokens (%d specified)\n", __func__, params.n_ctx);
} else if (params.n_ctx < 8) {
fprintf(stderr, "%s: warning: minimum context size is 8, using minimum size.\n", __func__);
params.n_ctx = 8;