Improve cuBLAS performance by using a memory pool (#1094)

* Improve cuBLAS performance by using a memory pool * Move cuda specific definitions to ggml-cuda.h/cu * Add CXX flags to nvcc * Change memory pool synchronization mechanism to a spin lock General code cleanup
author: slaren <2141330+slaren@users.noreply.github.com> 2023-04-21 21:59:17 +0200
committer: GitHub <noreply@github.com> 2023-04-21 21:59:17 +0200
commit: 50cb666b8a2e35a49b08c0f6bc81138c8f6f2ac1 (patch)
tree: 80370baa4d8b17d2cb44a134bed6b1a088b1cfc1 /Makefile
parent: 25d7abbd1f73582b7e0fdc422a936e8541c0780b (diff)
1 files changed, 6 insertions, 4 deletions
diff --git a/Makefile b/Makefile
index f267d08..3b48eec 100644
--- a/Makefile
+++ b/Makefile
@@ -101,11 +101,13 @@ ifdef LLAMA_OPENBLAS
 	LDFLAGS += -lopenblas
 endif
 ifdef LLAMA_CUBLAS
-	CFLAGS  += -DGGML_USE_CUBLAS -I/usr/local/cuda/include
-	LDFLAGS += -lcublas -lculibos -lcudart -lcublasLt -lpthread -ldl -lrt -L/usr/local/cuda/lib64
-	OBJS	+= ggml-cuda.o
+	CFLAGS    += -DGGML_USE_CUBLAS -I/usr/local/cuda/include
+	LDFLAGS   += -lcublas -lculibos -lcudart -lcublasLt -lpthread -ldl -lrt -L/usr/local/cuda/lib64
+	OBJS      += ggml-cuda.o
+	NVCC      = nvcc
+	NVCCFLAGS = --forward-unknown-to-host-linker -arch=native
 ggml-cuda.o: ggml-cuda.cu ggml-cuda.h
-	nvcc -arch=native -c -o $@ $<
+	$(NVCC) $(NVCCFLAGS) $(CXXFLAGS) -c $< -o $@
 endif
 ifdef LLAMA_GPROF
 	CFLAGS   += -pg
author	slaren <2141330+slaren@users.noreply.github.com>	2023-04-21 21:59:17 +0200
committer	GitHub <noreply@github.com>	2023-04-21 21:59:17 +0200
commit	50cb666b8a2e35a49b08c0f6bc81138c8f6f2ac1 (patch)
tree	80370baa4d8b17d2cb44a134bed6b1a088b1cfc1 /Makefile
parent	25d7abbd1f73582b7e0fdc422a936e8541c0780b (diff)