aboutsummaryrefslogtreecommitdiff
path: root/Makefile
diff options
context:
space:
mode:
authorJohannes Gäßler <johannesg@5d6.de>2023-05-25 23:07:29 +0200
committerGitHub <noreply@github.com>2023-05-26 00:07:29 +0300
commit1fcdcc28b119a6608774d52de905931bd5f8a43d (patch)
treea28504b1f2b0ed7d4b550316c37a9b7e25de889c /Makefile
parentac7876ac20124a15a44fd6317721ff1aa2538806 (diff)
cuda : performance optimizations (#1530)
* xor hack * block y dim * loop unrolling * Fixed cmake LLAMA_CUDA_BY option * Removed hipblas compatibility code * Define GGML_CUDA_DMMV_BLOCK_Y if not defined * Fewer iters, more ops per iter * Renamed DMMV X/Y compilation options
Diffstat (limited to 'Makefile')
-rw-r--r--Makefile12
1 files changed, 11 insertions, 1 deletions
diff --git a/Makefile b/Makefile
index 08e2503..804307b 100644
--- a/Makefile
+++ b/Makefile
@@ -133,9 +133,19 @@ ifdef LLAMA_CUBLAS
OBJS += ggml-cuda.o
NVCC = nvcc
NVCCFLAGS = --forward-unknown-to-host-compiler -arch=native
+ifdef LLAMA_CUDA_DMMV_X
+ NVCCFLAGS += -DGGML_CUDA_DMMV_X=$(LLAMA_CUDA_DMMV_X)
+else
+ NVCCFLAGS += -DGGML_CUDA_DMMV_X=32
+endif # LLAMA_CUDA_DMMV_X
+ifdef LLAMA_CUDA_DMMV_Y
+ NVCCFLAGS += -DGGML_CUDA_DMMV_Y=$(LLAMA_CUDA_DMMV_Y)
+else
+ NVCCFLAGS += -DGGML_CUDA_DMMV_Y=1
+endif # LLAMA_CUDA_DMMV_Y
ggml-cuda.o: ggml-cuda.cu ggml-cuda.h
$(NVCC) $(NVCCFLAGS) $(CXXFLAGS) -Wno-pedantic -c $< -o $@
-endif
+endif # LLAMA_CUBLAS
ifdef LLAMA_CLBLAST
CFLAGS += -DGGML_USE_CLBLAST
CXXFLAGS += -DGGML_USE_CLBLAST