diff options
author | Johannes Gäßler <johannesg@5d6.de> | 2023-06-06 21:33:23 +0200 |
---|---|---|
committer | GitHub <noreply@github.com> | 2023-06-06 21:33:23 +0200 |
commit | 17366df842e358768c0df7024484fffecfc7865b (patch) | |
tree | f042c8142311d45f8712db10debf89111b2c7e57 /ggml.h | |
parent | 44f906e8537fcec965e312d621c80556d6aa9bec (diff) |
Multi GPU support, CUDA refactor, CUDA scratch buffer (#1703)
* CUDA multi GPU + scratch
ggml_cuda_compute_forward
Tensor parallelism
ggml_cuda_add
ggml_cuda_rms_norm
ggml_cuda_silu
CUDA scratch buffer
--main-gpu CLI option
Diffstat (limited to 'ggml.h')
-rw-r--r-- | ggml.h | 34 |
1 files changed, 28 insertions, 6 deletions
@@ -256,8 +256,8 @@ extern "C" { enum ggml_backend { GGML_BACKEND_CPU = 0, - GGML_BACKEND_CUDA = 1, - GGML_BACKEND_CL = 2, + GGML_BACKEND_GPU = 10, + GGML_BACKEND_GPU_SPLIT = 20, }; // model file types @@ -387,7 +387,9 @@ extern "C" { char name[GGML_MAX_NAME]; - char padding[16]; + void * extra; // extra things e.g. for ggml-cuda.cu + + char padding[4]; }; static const size_t GGML_TENSOR_SIZE = sizeof(struct ggml_tensor); @@ -425,6 +427,25 @@ extern "C" { bool no_alloc; // don't allocate memory for the tensor data }; + + // compute types + enum ggml_task_type { + GGML_TASK_INIT = 0, + GGML_TASK_COMPUTE, + GGML_TASK_FINALIZE, + }; + + struct ggml_compute_params { + enum ggml_task_type type; + + // ith = thread index, nth = number of threads + int ith, nth; + + // work buffer for all threads + size_t wsize; + void * wdata; + }; + // misc GGML_API void ggml_time_init(void); // call this once at the beginning of the program @@ -436,9 +457,10 @@ extern "C" { GGML_API void ggml_print_object (const struct ggml_object * obj); GGML_API void ggml_print_objects(const struct ggml_context * ctx); - GGML_API int64_t ggml_nelements(const struct ggml_tensor * tensor); - GGML_API int64_t ggml_nrows (const struct ggml_tensor * tensor); - GGML_API size_t ggml_nbytes (const struct ggml_tensor * tensor); + GGML_API int64_t ggml_nelements (const struct ggml_tensor * tensor); + GGML_API int64_t ggml_nrows (const struct ggml_tensor * tensor); + GGML_API size_t ggml_nbytes (const struct ggml_tensor * tensor); + GGML_API size_t ggml_nbytes_split(const struct ggml_tensor * tensor, int nrows_split); GGML_API int ggml_blck_size (enum ggml_type type); GGML_API size_t ggml_type_size (enum ggml_type type); // size in bytes for all elements in a block |