diff options
author | slaren <2141330+slaren@users.noreply.github.com> | 2023-04-20 03:14:14 +0200 |
---|---|---|
committer | GitHub <noreply@github.com> | 2023-04-20 03:14:14 +0200 |
commit | 02d6988121510c067e06d498a273a351a888f5b9 (patch) | |
tree | 98c6204ad4f3db40bc49595bb7705e8bcd699e5d /ggml-cuda.h | |
parent | 834695fe3a3ed2a962e774c9615e3f7b41d360a8 (diff) |
Improve cuBLAS performance by dequantizing on the GPU (#1065)
Diffstat (limited to 'ggml-cuda.h')
-rw-r--r-- | ggml-cuda.h | 11 |
1 files changed, 11 insertions, 0 deletions
diff --git a/ggml-cuda.h b/ggml-cuda.h new file mode 100644 index 0000000..646caaf --- /dev/null +++ b/ggml-cuda.h @@ -0,0 +1,11 @@ +#ifdef __cplusplus +extern "C" { +#endif + +void dequantize_row_q4_0_cuda(const void * vx, float * y, int k, cudaStream_t stream); +void dequantize_row_q4_1_cuda(const void * vx, float * y, int k, cudaStream_t stream); +void dequantize_row_q4_2_cuda(const void * vx, float * y, int k, cudaStream_t stream); + +#ifdef __cplusplus +} +#endif |