aboutsummaryrefslogtreecommitdiff
path: root/examples
diff options
context:
space:
mode:
authoreiery <19350831+eiery@users.noreply.github.com>2023-04-22 04:27:05 -0400
committerGitHub <noreply@github.com>2023-04-22 11:27:05 +0300
commit10f19c1121068ce3dab9bece03a8b9caaea2db36 (patch)
tree669b6ce043021f01ef8ac91a2e81bfb3df1a9b6a /examples
parent7e312f165c5047d6e16680d1eebc83055e95c313 (diff)
llama : have n_batch default to 512 (#1091)
* set default n_batch to 512 when using BLAS * spacing * alternate implementation of setting different n_batch for BLAS * set n_batch to 512 for all cases
Diffstat (limited to 'examples')
-rw-r--r--examples/common.h2
1 files changed, 1 insertions, 1 deletions
diff --git a/examples/common.h b/examples/common.h
index cbbc2df..0470368 100644
--- a/examples/common.h
+++ b/examples/common.h
@@ -20,7 +20,7 @@ struct gpt_params {
int32_t repeat_last_n = 64; // last n tokens to penalize
int32_t n_parts = -1; // amount of model parts (-1 = determine from model dimensions)
int32_t n_ctx = 512; // context size
- int32_t n_batch = 8; // batch size for prompt processing
+ int32_t n_batch = 512; // batch size for prompt processing (must be >=32 to use BLAS)
int32_t n_keep = 0; // number of tokens to keep from initial prompt
// sampling parameters