aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authoranzz1 <anzz1@live.com>2023-03-28 22:44:29 +0300
committerGitHub <noreply@github.com>2023-03-28 22:44:29 +0300
commit5a5f8b1501fbb34367225544010ddfc306d6d2fe (patch)
treeab77116dc4b03350ecdf3b9b00b7ff46b0b4b0c5
parentf1217055eaedfc7214be93d98e529cae89830430 (diff)
Enable Fused-Multiply-Add (FMA) and F16C/CVT16 vector extensions on MSVC (#375)
* Enable Fused-Multiply-Add (FMA) instructions on MSVC __FMA__ macro does not exist in MSVC * Enable F16C/CVT16 vector extensions on MSVC __F16C__ macro does not exist in MSVC, but is implied with AVX2/AVX512 * MSVC cvt intrinsics * Add __SSE3__ macro for MSVC too because why not even though it's not currently used for anything when AVX is defined
-rw-r--r--ggml.c18
1 files changed, 18 insertions, 0 deletions
diff --git a/ggml.c b/ggml.c
index 222d199..efe9316 100644
--- a/ggml.c
+++ b/ggml.c
@@ -79,6 +79,19 @@ static int sched_yield (void) {
typedef void* thread_ret_t;
#endif
+// __FMA__ and __F16C__ are not defined in MSVC, however they are implied with AVX2/AVX512
+#if defined(_MSC_VER) && (defined(__AVX2__) || defined(__AVX512F__))
+#ifndef __FMA__
+#define __FMA__
+#endif
+#ifndef __F16C__
+#define __F16C__
+#endif
+#ifndef __SSE3__
+#define __SSE3__
+#endif
+#endif
+
#ifdef __HAIKU__
#define static_assert(cond, msg) _Static_assert(cond, msg)
#endif
@@ -172,8 +185,13 @@ typedef double ggml_float;
#ifdef __F16C__
+#ifdef _MSC_VER
+#define GGML_COMPUTE_FP16_TO_FP32(x) _mm_cvtss_f32(_mm_cvtph_ps(_mm_cvtsi32_si128(x)))
+#define GGML_COMPUTE_FP32_TO_FP16(x) _mm_extract_epi16(_mm_cvtps_ph(_mm_set_ss(x), 0), 0)
+#else
#define GGML_COMPUTE_FP16_TO_FP32(x) _cvtsh_ss(x)
#define GGML_COMPUTE_FP32_TO_FP16(x) _cvtss_sh(x, 0)
+#endif
#elif defined(__POWER9_VECTOR__)