aboutsummaryrefslogtreecommitdiff
path: root/ggml-mpi.h
diff options
context:
space:
mode:
authorEvan Miller <emmiller@gmail.com>2023-07-10 11:49:56 -0400
committerGitHub <noreply@github.com>2023-07-10 18:49:56 +0300
commit5656d10599bd756dc0f17284e418e704200b43f3 (patch)
treea9aba6c867a268d0bcb90bd9174912774a67ed65 /ggml-mpi.h
parent1d1630996920f889cdc08de26cebf2415958540e (diff)
mpi : add support for distributed inference via MPI (#2099)
* MPI support, first cut * fix warnings, update README * fixes * wrap includes * PR comments * Update CMakeLists.txt * Add GH workflow, fix test * Add info to README * mpi : trying to move more MPI stuff into ggml-mpi (WIP) (#2099) * mpi : add names for layer inputs + prep ggml_mpi_graph_compute() * mpi : move all MPI logic into ggml-mpi Not tested yet * mpi : various fixes - communication now works but results are wrong * mpi : fix output tensor after MPI compute (still not working) * mpi : fix inference * mpi : minor * Add OpenMPI to GH action * [mpi] continue-on-error: true * mpi : fix after master merge * [mpi] Link MPI C++ libraries to fix OpenMPI * tests : fix new llama_backend API * [mpi] use MPI_INT32_T * mpi : factor out recv / send in functions and reuse * mpi : extend API to allow usage with outer backends (e.g. Metal) --------- Co-authored-by: Georgi Gerganov <ggerganov@gmail.com>
Diffstat (limited to 'ggml-mpi.h')
-rw-r--r--ggml-mpi.h39
1 files changed, 39 insertions, 0 deletions
diff --git a/ggml-mpi.h b/ggml-mpi.h
new file mode 100644
index 0000000..eda119d
--- /dev/null
+++ b/ggml-mpi.h
@@ -0,0 +1,39 @@
+#pragma once
+
+struct ggml_context;
+struct ggml_tensor;
+struct ggml_cgraph;
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+struct ggml_mpi_context;
+
+void ggml_mpi_backend_init(void);
+void ggml_mpi_backend_free(void);
+
+struct ggml_mpi_context * ggml_mpi_init(void);
+void ggml_mpi_free(struct ggml_mpi_context * ctx);
+
+int ggml_mpi_rank(struct ggml_mpi_context * ctx);
+
+void ggml_mpi_eval_init(
+ struct ggml_mpi_context * ctx_mpi,
+ int * n_tokens,
+ int * n_past,
+ int * n_threads);
+
+void ggml_mpi_graph_compute_pre(
+ struct ggml_mpi_context * ctx_mpi,
+ struct ggml_cgraph * gf,
+ int n_layers);
+
+void ggml_mpi_graph_compute_post(
+ struct ggml_mpi_context * ctx_mpi,
+ struct ggml_cgraph * gf,
+ int n_layers);
+
+#ifdef __cplusplus
+}
+#endif