aboutsummaryrefslogtreecommitdiff
path: root/Makefile
diff options
context:
space:
mode:
authorslaren <2141330+slaren@users.noreply.github.com>2023-04-21 21:59:17 +0200
committerGitHub <noreply@github.com>2023-04-21 21:59:17 +0200
commit50cb666b8a2e35a49b08c0f6bc81138c8f6f2ac1 (patch)
tree80370baa4d8b17d2cb44a134bed6b1a088b1cfc1 /Makefile
parent25d7abbd1f73582b7e0fdc422a936e8541c0780b (diff)
Improve cuBLAS performance by using a memory pool (#1094)
* Improve cuBLAS performance by using a memory pool * Move cuda specific definitions to ggml-cuda.h/cu * Add CXX flags to nvcc * Change memory pool synchronization mechanism to a spin lock General code cleanup
Diffstat (limited to 'Makefile')
-rw-r--r--Makefile10
1 files changed, 6 insertions, 4 deletions
diff --git a/Makefile b/Makefile
index f267d08..3b48eec 100644
--- a/Makefile
+++ b/Makefile
@@ -101,11 +101,13 @@ ifdef LLAMA_OPENBLAS
LDFLAGS += -lopenblas
endif
ifdef LLAMA_CUBLAS
- CFLAGS += -DGGML_USE_CUBLAS -I/usr/local/cuda/include
- LDFLAGS += -lcublas -lculibos -lcudart -lcublasLt -lpthread -ldl -lrt -L/usr/local/cuda/lib64
- OBJS += ggml-cuda.o
+ CFLAGS += -DGGML_USE_CUBLAS -I/usr/local/cuda/include
+ LDFLAGS += -lcublas -lculibos -lcudart -lcublasLt -lpthread -ldl -lrt -L/usr/local/cuda/lib64
+ OBJS += ggml-cuda.o
+ NVCC = nvcc
+ NVCCFLAGS = --forward-unknown-to-host-linker -arch=native
ggml-cuda.o: ggml-cuda.cu ggml-cuda.h
- nvcc -arch=native -c -o $@ $<
+ $(NVCC) $(NVCCFLAGS) $(CXXFLAGS) -c $< -o $@
endif
ifdef LLAMA_GPROF
CFLAGS += -pg