diff options
Diffstat (limited to 'examples/main/main.cpp')
-rw-r--r-- | examples/main/main.cpp | 11 |
1 files changed, 4 insertions, 7 deletions
diff --git a/examples/main/main.cpp b/examples/main/main.cpp index 656382f..4b4cd1d 100644 --- a/examples/main/main.cpp +++ b/examples/main/main.cpp @@ -139,17 +139,14 @@ int main(int argc, char ** argv) { params.n_threads, std::thread::hardware_concurrency(), llama_print_system_info()); } - // determine the maximum memory usage needed to do inference for the given n_batch and n_predict parameters + // determine the maximum memory usage needed to do inference for the given n_batch and n_ctx parameters // uncomment the "used_mem" line in llama.cpp to see the results if (params.mem_test) { { - const std::vector<llama_token> tmp(params.n_batch, llama_token_bos()); - llama_eval(ctx, tmp.data(), tmp.size(), 0, params.n_threads); - } + fprintf(stderr, "%s: testing memory usage for n_batch = %d, n_ctx = %d\n", __func__, params.n_batch, params.n_ctx); - { - const std::vector<llama_token> tmp = { 0, }; - llama_eval(ctx, tmp.data(), tmp.size(), params.n_predict - 1, params.n_threads); + const std::vector<llama_token> tmp(params.n_batch, llama_token_bos()); + llama_eval(ctx, tmp.data(), tmp.size(), params.n_ctx, params.n_threads); } llama_print_timings(ctx); |