diff options
Diffstat (limited to 'examples/server/server.cpp')
-rw-r--r-- | examples/server/server.cpp | 48 |
1 files changed, 48 insertions, 0 deletions
diff --git a/examples/server/server.cpp b/examples/server/server.cpp index 9aa7db2..31d8087 100644 --- a/examples/server/server.cpp +++ b/examples/server/server.cpp @@ -401,6 +401,10 @@ void server_print_usage(int /*argc*/, char **argv, const gpt_params ¶ms) #ifdef LLAMA_SUPPORTS_GPU_OFFLOAD fprintf(stderr, " -ngl N, --n-gpu-layers N\n"); fprintf(stderr, " number of layers to store in VRAM\n"); + fprintf(stderr, " -ts SPLIT --tensor-split SPLIT\n"); + fprintf(stderr, " how to split tensors across multiple GPUs, comma-separated list of proportions, e.g. 3,1\n"); + fprintf(stderr, " how to split tensors across multiple GPUs, comma-separated list of proportions, e.g. 3,1\n"); + fprintf(stderr, " -mg i, --main-gpu i the GPU to use for scratch and small tensors\n" ); #endif fprintf(stderr, " -m FNAME, --model FNAME\n"); fprintf(stderr, " model path (default: %s)\n", params.model.c_str()); @@ -504,6 +508,50 @@ bool server_params_parse(int argc, char **argv, server_params &sparams, gpt_para fprintf(stderr, "warning: see main README.md for information on enabling GPU BLAS support\n"); #endif } + else if (arg == "--tensor-split" || arg == "-ts") + { + if (++i >= argc) + { + invalid_param = true; + break; + } +#ifdef GGML_USE_CUBLAS + std::string arg_next = argv[i]; + + // split string by , and / + const std::regex regex{R"([,/]+)"}; + std::sregex_token_iterator it{arg_next.begin(), arg_next.end(), regex, -1}; + std::vector<std::string> split_arg{it, {}}; + GGML_ASSERT(split_arg.size() <= LLAMA_MAX_DEVICES); + + for (size_t i = 0; i < LLAMA_MAX_DEVICES; ++i) + { + if (i < split_arg.size()) + { + params.tensor_split[i] = std::stof(split_arg[i]); + } + else + { + params.tensor_split[i] = 0.0f; + } + } +#else + fprintf(stderr, "WARNING: llama.cpp was compiled without cuBLAS. It is not possible to set a tensor split.\n"); +#endif // GGML_USE_CUBLAS + } + else if (arg == "--main-gpu" || arg == "-mg") + { + if (++i >= argc) + { + invalid_param = true; + break; + } +#ifdef GGML_USE_CUBLAS + params.main_gpu = std::stoi(argv[i]); +#else + fprintf(stderr, "warning: llama.cpp was compiled without cuBLAS. It is not possible to set a main GPU.\n"); +#endif + } else { fprintf(stderr, "error: unknown argument: %s\n", arg.c_str()); |