aboutsummaryrefslogtreecommitdiff
path: root/examples
diff options
context:
space:
mode:
authorEvan Miller <emmiller@gmail.com>2023-07-10 11:49:56 -0400
committerGitHub <noreply@github.com>2023-07-10 18:49:56 +0300
commit5656d10599bd756dc0f17284e418e704200b43f3 (patch)
treea9aba6c867a268d0bcb90bd9174912774a67ed65 /examples
parent1d1630996920f889cdc08de26cebf2415958540e (diff)
mpi : add support for distributed inference via MPI (#2099)
* MPI support, first cut * fix warnings, update README * fixes * wrap includes * PR comments * Update CMakeLists.txt * Add GH workflow, fix test * Add info to README * mpi : trying to move more MPI stuff into ggml-mpi (WIP) (#2099) * mpi : add names for layer inputs + prep ggml_mpi_graph_compute() * mpi : move all MPI logic into ggml-mpi Not tested yet * mpi : various fixes - communication now works but results are wrong * mpi : fix output tensor after MPI compute (still not working) * mpi : fix inference * mpi : minor * Add OpenMPI to GH action * [mpi] continue-on-error: true * mpi : fix after master merge * [mpi] Link MPI C++ libraries to fix OpenMPI * tests : fix new llama_backend API * [mpi] use MPI_INT32_T * mpi : factor out recv / send in functions and reuse * mpi : extend API to allow usage with outer backends (e.g. Metal) --------- Co-authored-by: Georgi Gerganov <ggerganov@gmail.com>
Diffstat (limited to 'examples')
-rw-r--r--examples/embd-input/embd-input-lib.cpp2
-rw-r--r--examples/embedding/embedding.cpp4
-rw-r--r--examples/main/main.cpp4
-rw-r--r--examples/perplexity/perplexity.cpp4
-rw-r--r--examples/quantize/quantize.cpp4
-rw-r--r--examples/server/server.cpp4
-rw-r--r--examples/simple/simple.cpp4
7 files changed, 19 insertions, 7 deletions
diff --git a/examples/embd-input/embd-input-lib.cpp b/examples/embd-input/embd-input-lib.cpp
index 5fa4942..2656382 100644
--- a/examples/embd-input/embd-input-lib.cpp
+++ b/examples/embd-input/embd-input-lib.cpp
@@ -34,7 +34,7 @@ struct MyModel* create_mymodel(int argc, char ** argv) {
}
fprintf(stderr, "%s: seed = %d\n", __func__, params.seed);
- llama_init_backend(params.numa);
+ llama_backend_init(params.numa);
llama_model * model;
llama_context * ctx;
diff --git a/examples/embedding/embedding.cpp b/examples/embedding/embedding.cpp
index 03e801c..5192d6d 100644
--- a/examples/embedding/embedding.cpp
+++ b/examples/embedding/embedding.cpp
@@ -35,7 +35,7 @@ int main(int argc, char ** argv) {
params.prompt = gpt_random_prompt(rng);
}
- llama_init_backend(params.numa);
+ llama_backend_init(params.numa);
llama_model * model;
llama_context * ctx;
@@ -93,5 +93,7 @@ int main(int argc, char ** argv) {
llama_free(ctx);
llama_free_model(model);
+ llama_backend_free();
+
return 0;
}
diff --git a/examples/main/main.cpp b/examples/main/main.cpp
index 0f6391a..07d8fc6 100644
--- a/examples/main/main.cpp
+++ b/examples/main/main.cpp
@@ -105,7 +105,7 @@ int main(int argc, char ** argv) {
params.prompt = gpt_random_prompt(rng);
}
- llama_init_backend(params.numa);
+ llama_backend_init(params.numa);
llama_model * model;
llama_context * ctx;
@@ -671,5 +671,7 @@ int main(int argc, char ** argv) {
llama_free(ctx);
llama_free_model(model);
+ llama_backend_free();
+
return 0;
}
diff --git a/examples/perplexity/perplexity.cpp b/examples/perplexity/perplexity.cpp
index fd4b03c..7e120ff 100644
--- a/examples/perplexity/perplexity.cpp
+++ b/examples/perplexity/perplexity.cpp
@@ -147,7 +147,7 @@ int main(int argc, char ** argv) {
params.prompt = gpt_random_prompt(rng);
}
- llama_init_backend(params.numa);
+ llama_backend_init(params.numa);
llama_model * model;
llama_context * ctx;
@@ -172,5 +172,7 @@ int main(int argc, char ** argv) {
llama_free(ctx);
llama_free_model(model);
+ llama_backend_free();
+
return 0;
}
diff --git a/examples/quantize/quantize.cpp b/examples/quantize/quantize.cpp
index 1eb0f75..797d2f0 100644
--- a/examples/quantize/quantize.cpp
+++ b/examples/quantize/quantize.cpp
@@ -180,7 +180,7 @@ int main(int argc, char ** argv) {
usage(argv[0]);
}
- llama_init_backend(false);
+ llama_backend_init(false);
// parse command line arguments
const std::string fname_inp = argv[arg_idx];
@@ -257,5 +257,7 @@ int main(int argc, char ** argv) {
printf("%s: total time = %8.2f ms\n", __func__, (t_main_end_us - t_main_start_us)/1000.0);
}
+ llama_backend_free();
+
return 0;
}
diff --git a/examples/server/server.cpp b/examples/server/server.cpp
index 2cbfc00..296c5d6 100644
--- a/examples/server/server.cpp
+++ b/examples/server/server.cpp
@@ -1079,7 +1079,7 @@ int main(int argc, char **argv)
params.model_alias = params.model;
}
- llama_init_backend(params.numa);
+ llama_backend_init(params.numa);
LOG_INFO("build info", {{"build", BUILD_NUMBER},
{"commit", BUILD_COMMIT}});
@@ -1309,5 +1309,7 @@ int main(int argc, char **argv)
return 1;
}
+ llama_backend_free();
+
return 0;
}
diff --git a/examples/simple/simple.cpp b/examples/simple/simple.cpp
index 2d913ce..aa2c435 100644
--- a/examples/simple/simple.cpp
+++ b/examples/simple/simple.cpp
@@ -66,7 +66,7 @@ int main(int argc, char ** argv)
// Init LLM :
//---------------------------------
- llama_init_backend(params.numa);
+ llama_backend_init(params.numa);
llama_model * model;
llama_context * ctx;
@@ -173,6 +173,8 @@ int main(int argc, char ** argv)
llama_free( ctx );
llama_free_model( model );
+ llama_backend_free();
+
return 0;
}