diff options
author | Evan Miller <emmiller@gmail.com> | 2023-07-10 11:49:56 -0400 |
---|---|---|
committer | GitHub <noreply@github.com> | 2023-07-10 18:49:56 +0300 |
commit | 5656d10599bd756dc0f17284e418e704200b43f3 (patch) | |
tree | a9aba6c867a268d0bcb90bd9174912774a67ed65 /examples | |
parent | 1d1630996920f889cdc08de26cebf2415958540e (diff) |
mpi : add support for distributed inference via MPI (#2099)
* MPI support, first cut
* fix warnings, update README
* fixes
* wrap includes
* PR comments
* Update CMakeLists.txt
* Add GH workflow, fix test
* Add info to README
* mpi : trying to move more MPI stuff into ggml-mpi (WIP) (#2099)
* mpi : add names for layer inputs + prep ggml_mpi_graph_compute()
* mpi : move all MPI logic into ggml-mpi
Not tested yet
* mpi : various fixes - communication now works but results are wrong
* mpi : fix output tensor after MPI compute (still not working)
* mpi : fix inference
* mpi : minor
* Add OpenMPI to GH action
* [mpi] continue-on-error: true
* mpi : fix after master merge
* [mpi] Link MPI C++ libraries to fix OpenMPI
* tests : fix new llama_backend API
* [mpi] use MPI_INT32_T
* mpi : factor out recv / send in functions and reuse
* mpi : extend API to allow usage with outer backends (e.g. Metal)
---------
Co-authored-by: Georgi Gerganov <ggerganov@gmail.com>
Diffstat (limited to 'examples')
-rw-r--r-- | examples/embd-input/embd-input-lib.cpp | 2 | ||||
-rw-r--r-- | examples/embedding/embedding.cpp | 4 | ||||
-rw-r--r-- | examples/main/main.cpp | 4 | ||||
-rw-r--r-- | examples/perplexity/perplexity.cpp | 4 | ||||
-rw-r--r-- | examples/quantize/quantize.cpp | 4 | ||||
-rw-r--r-- | examples/server/server.cpp | 4 | ||||
-rw-r--r-- | examples/simple/simple.cpp | 4 |
7 files changed, 19 insertions, 7 deletions
diff --git a/examples/embd-input/embd-input-lib.cpp b/examples/embd-input/embd-input-lib.cpp index 5fa4942..2656382 100644 --- a/examples/embd-input/embd-input-lib.cpp +++ b/examples/embd-input/embd-input-lib.cpp @@ -34,7 +34,7 @@ struct MyModel* create_mymodel(int argc, char ** argv) { } fprintf(stderr, "%s: seed = %d\n", __func__, params.seed); - llama_init_backend(params.numa); + llama_backend_init(params.numa); llama_model * model; llama_context * ctx; diff --git a/examples/embedding/embedding.cpp b/examples/embedding/embedding.cpp index 03e801c..5192d6d 100644 --- a/examples/embedding/embedding.cpp +++ b/examples/embedding/embedding.cpp @@ -35,7 +35,7 @@ int main(int argc, char ** argv) { params.prompt = gpt_random_prompt(rng); } - llama_init_backend(params.numa); + llama_backend_init(params.numa); llama_model * model; llama_context * ctx; @@ -93,5 +93,7 @@ int main(int argc, char ** argv) { llama_free(ctx); llama_free_model(model); + llama_backend_free(); + return 0; } diff --git a/examples/main/main.cpp b/examples/main/main.cpp index 0f6391a..07d8fc6 100644 --- a/examples/main/main.cpp +++ b/examples/main/main.cpp @@ -105,7 +105,7 @@ int main(int argc, char ** argv) { params.prompt = gpt_random_prompt(rng); } - llama_init_backend(params.numa); + llama_backend_init(params.numa); llama_model * model; llama_context * ctx; @@ -671,5 +671,7 @@ int main(int argc, char ** argv) { llama_free(ctx); llama_free_model(model); + llama_backend_free(); + return 0; } diff --git a/examples/perplexity/perplexity.cpp b/examples/perplexity/perplexity.cpp index fd4b03c..7e120ff 100644 --- a/examples/perplexity/perplexity.cpp +++ b/examples/perplexity/perplexity.cpp @@ -147,7 +147,7 @@ int main(int argc, char ** argv) { params.prompt = gpt_random_prompt(rng); } - llama_init_backend(params.numa); + llama_backend_init(params.numa); llama_model * model; llama_context * ctx; @@ -172,5 +172,7 @@ int main(int argc, char ** argv) { llama_free(ctx); llama_free_model(model); + llama_backend_free(); + return 0; } diff --git a/examples/quantize/quantize.cpp b/examples/quantize/quantize.cpp index 1eb0f75..797d2f0 100644 --- a/examples/quantize/quantize.cpp +++ b/examples/quantize/quantize.cpp @@ -180,7 +180,7 @@ int main(int argc, char ** argv) { usage(argv[0]); } - llama_init_backend(false); + llama_backend_init(false); // parse command line arguments const std::string fname_inp = argv[arg_idx]; @@ -257,5 +257,7 @@ int main(int argc, char ** argv) { printf("%s: total time = %8.2f ms\n", __func__, (t_main_end_us - t_main_start_us)/1000.0); } + llama_backend_free(); + return 0; } diff --git a/examples/server/server.cpp b/examples/server/server.cpp index 2cbfc00..296c5d6 100644 --- a/examples/server/server.cpp +++ b/examples/server/server.cpp @@ -1079,7 +1079,7 @@ int main(int argc, char **argv) params.model_alias = params.model; } - llama_init_backend(params.numa); + llama_backend_init(params.numa); LOG_INFO("build info", {{"build", BUILD_NUMBER}, {"commit", BUILD_COMMIT}}); @@ -1309,5 +1309,7 @@ int main(int argc, char **argv) return 1; } + llama_backend_free(); + return 0; } diff --git a/examples/simple/simple.cpp b/examples/simple/simple.cpp index 2d913ce..aa2c435 100644 --- a/examples/simple/simple.cpp +++ b/examples/simple/simple.cpp @@ -66,7 +66,7 @@ int main(int argc, char ** argv) // Init LLM : //--------------------------------- - llama_init_backend(params.numa); + llama_backend_init(params.numa); llama_model * model; llama_context * ctx; @@ -173,6 +173,8 @@ int main(int argc, char ** argv) llama_free( ctx ); llama_free_model( model ); + llama_backend_free(); + return 0; } |