From 58b367c2d757c0ea12aec672382462b42204c724 Mon Sep 17 00:00:00 2001 From: slaren <2141330+slaren@users.noreply.github.com> Date: Mon, 1 May 2023 18:11:07 +0200 Subject: cuBLAS: refactor and optimize f16 mat mul performance (#1259) * cuBLAS: refactor, convert fp16 to fp32 on device * cuBLAS: use multiple streams, choose smartly between mul_mat_q and mul_mat_f16 * fix build * cuBLAS: update block_q5_1 --- ggml.h | 11 +++++++++++ 1 file changed, 11 insertions(+) (limited to 'ggml.h') diff --git a/ggml.h b/ggml.h index d6feacd..ef5a048 100644 --- a/ggml.h +++ b/ggml.h @@ -197,6 +197,14 @@ #define GGML_MAX_OPT 4 #define GGML_DEFAULT_N_THREADS 4 +#define GGML_ASSERT(x) \ + do { \ + if (!(x)) { \ + fprintf(stderr, "GGML_ASSERT: %s:%d: %s\n", __FILE__, __LINE__, #x); \ + abort(); \ + } \ + } while (0) + #ifdef __cplusplus extern "C" { #endif @@ -212,6 +220,9 @@ extern "C" { GGML_API float ggml_fp16_to_fp32(ggml_fp16_t x); GGML_API ggml_fp16_t ggml_fp32_to_fp16(float x); + GGML_API void ggml_fp16_to_fp32_row(const ggml_fp16_t * x, float * y, size_t n); + GGML_API void ggml_fp32_to_fp16_row(const float * x, ggml_fp16_t * y, size_t n); + struct ggml_object; struct ggml_context; -- cgit v1.2.3