From 16b9cd193965769089881bb8ec012fccca7b37b6 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Johannes=20G=C3=A4=C3=9Fler?= Date: Mon, 19 Jun 2023 10:23:56 +0200 Subject: Convert vector to f16 for dequantize mul mat vec (#1913) * Convert vector to f16 for dmmv * compile option * Added compilation option description to README * Changed cmake CUDA_ARCHITECTURES from "OFF" to "native" --- Makefile | 3 +++ 1 file changed, 3 insertions(+) (limited to 'Makefile') diff --git a/Makefile b/Makefile index afd06e0..5dd676f 100644 --- a/Makefile +++ b/Makefile @@ -169,6 +169,9 @@ ifdef LLAMA_CUDA_DMMV_Y else NVCCFLAGS += -DGGML_CUDA_DMMV_Y=1 endif # LLAMA_CUDA_DMMV_Y +ifdef LLAMA_CUDA_DMMV_F16 + NVCCFLAGS += -DGGML_CUDA_DMMV_F16 +endif # LLAMA_CUDA_DMMV_F16 ifdef LLAMA_CUDA_KQUANTS_ITER NVCCFLAGS += -DK_QUANTS_PER_ITERATION=$(LLAMA_CUDA_KQUANTS_ITER) else -- cgit v1.2.3