aboutsummaryrefslogtreecommitdiff
path: root/ggml.h
diff options
context:
space:
mode:
authorJohannes Gäßler <johannesg@5d6.de>2023-06-06 21:33:23 +0200
committerGitHub <noreply@github.com>2023-06-06 21:33:23 +0200
commit17366df842e358768c0df7024484fffecfc7865b (patch)
treef042c8142311d45f8712db10debf89111b2c7e57 /ggml.h
parent44f906e8537fcec965e312d621c80556d6aa9bec (diff)
Multi GPU support, CUDA refactor, CUDA scratch buffer (#1703)
* CUDA multi GPU + scratch ggml_cuda_compute_forward Tensor parallelism ggml_cuda_add ggml_cuda_rms_norm ggml_cuda_silu CUDA scratch buffer --main-gpu CLI option
Diffstat (limited to 'ggml.h')
-rw-r--r--ggml.h34
1 files changed, 28 insertions, 6 deletions
diff --git a/ggml.h b/ggml.h
index d1ba15f..1b26da3 100644
--- a/ggml.h
+++ b/ggml.h
@@ -256,8 +256,8 @@ extern "C" {
enum ggml_backend {
GGML_BACKEND_CPU = 0,
- GGML_BACKEND_CUDA = 1,
- GGML_BACKEND_CL = 2,
+ GGML_BACKEND_GPU = 10,
+ GGML_BACKEND_GPU_SPLIT = 20,
};
// model file types
@@ -387,7 +387,9 @@ extern "C" {
char name[GGML_MAX_NAME];
- char padding[16];
+ void * extra; // extra things e.g. for ggml-cuda.cu
+
+ char padding[4];
};
static const size_t GGML_TENSOR_SIZE = sizeof(struct ggml_tensor);
@@ -425,6 +427,25 @@ extern "C" {
bool no_alloc; // don't allocate memory for the tensor data
};
+
+ // compute types
+ enum ggml_task_type {
+ GGML_TASK_INIT = 0,
+ GGML_TASK_COMPUTE,
+ GGML_TASK_FINALIZE,
+ };
+
+ struct ggml_compute_params {
+ enum ggml_task_type type;
+
+ // ith = thread index, nth = number of threads
+ int ith, nth;
+
+ // work buffer for all threads
+ size_t wsize;
+ void * wdata;
+ };
+
// misc
GGML_API void ggml_time_init(void); // call this once at the beginning of the program
@@ -436,9 +457,10 @@ extern "C" {
GGML_API void ggml_print_object (const struct ggml_object * obj);
GGML_API void ggml_print_objects(const struct ggml_context * ctx);
- GGML_API int64_t ggml_nelements(const struct ggml_tensor * tensor);
- GGML_API int64_t ggml_nrows (const struct ggml_tensor * tensor);
- GGML_API size_t ggml_nbytes (const struct ggml_tensor * tensor);
+ GGML_API int64_t ggml_nelements (const struct ggml_tensor * tensor);
+ GGML_API int64_t ggml_nrows (const struct ggml_tensor * tensor);
+ GGML_API size_t ggml_nbytes (const struct ggml_tensor * tensor);
+ GGML_API size_t ggml_nbytes_split(const struct ggml_tensor * tensor, int nrows_split);
GGML_API int ggml_blck_size (enum ggml_type type);
GGML_API size_t ggml_type_size (enum ggml_type type); // size in bytes for all elements in a block