diff options
author | Johannes Gäßler <johannesg@5d6.de> | 2023-05-25 23:07:29 +0200 |
---|---|---|
committer | GitHub <noreply@github.com> | 2023-05-26 00:07:29 +0300 |
commit | 1fcdcc28b119a6608774d52de905931bd5f8a43d (patch) | |
tree | a28504b1f2b0ed7d4b550316c37a9b7e25de889c /Makefile | |
parent | ac7876ac20124a15a44fd6317721ff1aa2538806 (diff) |
cuda : performance optimizations (#1530)
* xor hack
* block y dim
* loop unrolling
* Fixed cmake LLAMA_CUDA_BY option
* Removed hipblas compatibility code
* Define GGML_CUDA_DMMV_BLOCK_Y if not defined
* Fewer iters, more ops per iter
* Renamed DMMV X/Y compilation options
Diffstat (limited to 'Makefile')
-rw-r--r-- | Makefile | 12 |
1 files changed, 11 insertions, 1 deletions
@@ -133,9 +133,19 @@ ifdef LLAMA_CUBLAS OBJS += ggml-cuda.o NVCC = nvcc NVCCFLAGS = --forward-unknown-to-host-compiler -arch=native +ifdef LLAMA_CUDA_DMMV_X + NVCCFLAGS += -DGGML_CUDA_DMMV_X=$(LLAMA_CUDA_DMMV_X) +else + NVCCFLAGS += -DGGML_CUDA_DMMV_X=32 +endif # LLAMA_CUDA_DMMV_X +ifdef LLAMA_CUDA_DMMV_Y + NVCCFLAGS += -DGGML_CUDA_DMMV_Y=$(LLAMA_CUDA_DMMV_Y) +else + NVCCFLAGS += -DGGML_CUDA_DMMV_Y=1 +endif # LLAMA_CUDA_DMMV_Y ggml-cuda.o: ggml-cuda.cu ggml-cuda.h $(NVCC) $(NVCCFLAGS) $(CXXFLAGS) -Wno-pedantic -c $< -o $@ -endif +endif # LLAMA_CUBLAS ifdef LLAMA_CLBLAST CFLAGS += -DGGML_USE_CLBLAST CXXFLAGS += -DGGML_USE_CLBLAST |