diff options
author | slaren <2141330+slaren@users.noreply.github.com> | 2023-04-21 21:59:17 +0200 |
---|---|---|
committer | GitHub <noreply@github.com> | 2023-04-21 21:59:17 +0200 |
commit | 50cb666b8a2e35a49b08c0f6bc81138c8f6f2ac1 (patch) | |
tree | 80370baa4d8b17d2cb44a134bed6b1a088b1cfc1 /ggml-cuda.h | |
parent | 25d7abbd1f73582b7e0fdc422a936e8541c0780b (diff) |
Improve cuBLAS performance by using a memory pool (#1094)
* Improve cuBLAS performance by using a memory pool
* Move cuda specific definitions to ggml-cuda.h/cu
* Add CXX flags to nvcc
* Change memory pool synchronization mechanism to a spin lock
General code cleanup
Diffstat (limited to 'ggml-cuda.h')
-rw-r--r-- | ggml-cuda.h | 29 |
1 files changed, 29 insertions, 0 deletions
diff --git a/ggml-cuda.h b/ggml-cuda.h index be14060..370bbc7 100644 --- a/ggml-cuda.h +++ b/ggml-cuda.h @@ -1,7 +1,36 @@ +#include <cublas_v2.h> +#include <cuda_runtime.h> + #ifdef __cplusplus extern "C" { #endif +#define CUDA_CHECK(err) \ + do { \ + cudaError_t err_ = (err); \ + if (err_ != cudaSuccess) { \ + fprintf(stderr, "CUDA error %d at %s:%d: %s\n", err_, __FILE__, __LINE__, \ + cudaGetErrorString(err_)); \ + exit(1); \ + } \ + } while (0) + +#define CUBLAS_CHECK(err) \ + do { \ + cublasStatus_t err_ = (err); \ + if (err_ != CUBLAS_STATUS_SUCCESS) { \ + fprintf(stderr, "cuBLAS error %d at %s:%d\n", err_, __FILE__, __LINE__); \ + exit(1); \ + } \ + } while (0) + +extern cublasHandle_t g_cublasH; +extern cudaStream_t g_cudaStream; + +void ggml_init_cublas(void); +void * ggml_cuda_pool_malloc(size_t size, size_t * actual_size); +void ggml_cuda_pool_free(void * ptr, size_t size); + void dequantize_row_q4_0_cuda(const void * vx, float * y, int k, cudaStream_t stream); void dequantize_row_q4_1_cuda(const void * vx, float * y, int k, cudaStream_t stream); void dequantize_row_q4_2_cuda(const void * vx, float * y, int k, cudaStream_t stream); |