aboutsummaryrefslogtreecommitdiff
path: root/ggml-cuda.h
diff options
context:
space:
mode:
Diffstat (limited to 'ggml-cuda.h')
-rw-r--r--ggml-cuda.h29
1 files changed, 29 insertions, 0 deletions
diff --git a/ggml-cuda.h b/ggml-cuda.h
index be14060..370bbc7 100644
--- a/ggml-cuda.h
+++ b/ggml-cuda.h
@@ -1,7 +1,36 @@
+#include <cublas_v2.h>
+#include <cuda_runtime.h>
+
#ifdef __cplusplus
extern "C" {
#endif
+#define CUDA_CHECK(err) \
+ do { \
+ cudaError_t err_ = (err); \
+ if (err_ != cudaSuccess) { \
+ fprintf(stderr, "CUDA error %d at %s:%d: %s\n", err_, __FILE__, __LINE__, \
+ cudaGetErrorString(err_)); \
+ exit(1); \
+ } \
+ } while (0)
+
+#define CUBLAS_CHECK(err) \
+ do { \
+ cublasStatus_t err_ = (err); \
+ if (err_ != CUBLAS_STATUS_SUCCESS) { \
+ fprintf(stderr, "cuBLAS error %d at %s:%d\n", err_, __FILE__, __LINE__); \
+ exit(1); \
+ } \
+ } while (0)
+
+extern cublasHandle_t g_cublasH;
+extern cudaStream_t g_cudaStream;
+
+void ggml_init_cublas(void);
+void * ggml_cuda_pool_malloc(size_t size, size_t * actual_size);
+void ggml_cuda_pool_free(void * ptr, size_t size);
+
void dequantize_row_q4_0_cuda(const void * vx, float * y, int k, cudaStream_t stream);
void dequantize_row_q4_1_cuda(const void * vx, float * y, int k, cudaStream_t stream);
void dequantize_row_q4_2_cuda(const void * vx, float * y, int k, cudaStream_t stream);