aboutsummaryrefslogtreecommitdiff
path: root/ggml.h
diff options
context:
space:
mode:
authorslaren <2141330+slaren@users.noreply.github.com>2023-05-01 18:11:07 +0200
committerGitHub <noreply@github.com>2023-05-01 18:11:07 +0200
commit58b367c2d757c0ea12aec672382462b42204c724 (patch)
treeb2fa89daf71c08788c44e3fb9abf1747ec8ee65d /ggml.h
parentea3a0ad6b6b5ca4693b94acd4cb32e2803f66fae (diff)
cuBLAS: refactor and optimize f16 mat mul performance (#1259)
* cuBLAS: refactor, convert fp16 to fp32 on device * cuBLAS: use multiple streams, choose smartly between mul_mat_q and mul_mat_f16 * fix build * cuBLAS: update block_q5_1
Diffstat (limited to 'ggml.h')
-rw-r--r--ggml.h11
1 files changed, 11 insertions, 0 deletions
diff --git a/ggml.h b/ggml.h
index d6feacd..ef5a048 100644
--- a/ggml.h
+++ b/ggml.h
@@ -197,6 +197,14 @@
#define GGML_MAX_OPT 4
#define GGML_DEFAULT_N_THREADS 4
+#define GGML_ASSERT(x) \
+ do { \
+ if (!(x)) { \
+ fprintf(stderr, "GGML_ASSERT: %s:%d: %s\n", __FILE__, __LINE__, #x); \
+ abort(); \
+ } \
+ } while (0)
+
#ifdef __cplusplus
extern "C" {
#endif
@@ -212,6 +220,9 @@ extern "C" {
GGML_API float ggml_fp16_to_fp32(ggml_fp16_t x);
GGML_API ggml_fp16_t ggml_fp32_to_fp16(float x);
+ GGML_API void ggml_fp16_to_fp32_row(const ggml_fp16_t * x, float * y, size_t n);
+ GGML_API void ggml_fp32_to_fp16_row(const float * x, ggml_fp16_t * y, size_t n);
+
struct ggml_object;
struct ggml_context;