From 17366df842e358768c0df7024484fffecfc7865b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Johannes=20G=C3=A4=C3=9Fler?= Date: Tue, 6 Jun 2023 21:33:23 +0200 Subject: Multi GPU support, CUDA refactor, CUDA scratch buffer (#1703) * CUDA multi GPU + scratch ggml_cuda_compute_forward Tensor parallelism ggml_cuda_add ggml_cuda_rms_norm ggml_cuda_silu CUDA scratch buffer --main-gpu CLI option --- ggml.h | 34 ++++++++++++++++++++++++++++------ 1 file changed, 28 insertions(+), 6 deletions(-) (limited to 'ggml.h') diff --git a/ggml.h b/ggml.h index d1ba15f..1b26da3 100644 --- a/ggml.h +++ b/ggml.h @@ -256,8 +256,8 @@ extern "C" { enum ggml_backend { GGML_BACKEND_CPU = 0, - GGML_BACKEND_CUDA = 1, - GGML_BACKEND_CL = 2, + GGML_BACKEND_GPU = 10, + GGML_BACKEND_GPU_SPLIT = 20, }; // model file types @@ -387,7 +387,9 @@ extern "C" { char name[GGML_MAX_NAME]; - char padding[16]; + void * extra; // extra things e.g. for ggml-cuda.cu + + char padding[4]; }; static const size_t GGML_TENSOR_SIZE = sizeof(struct ggml_tensor); @@ -425,6 +427,25 @@ extern "C" { bool no_alloc; // don't allocate memory for the tensor data }; + + // compute types + enum ggml_task_type { + GGML_TASK_INIT = 0, + GGML_TASK_COMPUTE, + GGML_TASK_FINALIZE, + }; + + struct ggml_compute_params { + enum ggml_task_type type; + + // ith = thread index, nth = number of threads + int ith, nth; + + // work buffer for all threads + size_t wsize; + void * wdata; + }; + // misc GGML_API void ggml_time_init(void); // call this once at the beginning of the program @@ -436,9 +457,10 @@ extern "C" { GGML_API void ggml_print_object (const struct ggml_object * obj); GGML_API void ggml_print_objects(const struct ggml_context * ctx); - GGML_API int64_t ggml_nelements(const struct ggml_tensor * tensor); - GGML_API int64_t ggml_nrows (const struct ggml_tensor * tensor); - GGML_API size_t ggml_nbytes (const struct ggml_tensor * tensor); + GGML_API int64_t ggml_nelements (const struct ggml_tensor * tensor); + GGML_API int64_t ggml_nrows (const struct ggml_tensor * tensor); + GGML_API size_t ggml_nbytes (const struct ggml_tensor * tensor); + GGML_API size_t ggml_nbytes_split(const struct ggml_tensor * tensor, int nrows_split); GGML_API int ggml_blck_size (enum ggml_type type); GGML_API size_t ggml_type_size (enum ggml_type type); // size in bytes for all elements in a block -- cgit v1.2.3