diff options
author | slaren <2141330+slaren@users.noreply.github.com> | 2023-04-21 21:59:17 +0200 |
---|---|---|
committer | GitHub <noreply@github.com> | 2023-04-21 21:59:17 +0200 |
commit | 50cb666b8a2e35a49b08c0f6bc81138c8f6f2ac1 (patch) | |
tree | 80370baa4d8b17d2cb44a134bed6b1a088b1cfc1 /Makefile | |
parent | 25d7abbd1f73582b7e0fdc422a936e8541c0780b (diff) |
Improve cuBLAS performance by using a memory pool (#1094)
* Improve cuBLAS performance by using a memory pool
* Move cuda specific definitions to ggml-cuda.h/cu
* Add CXX flags to nvcc
* Change memory pool synchronization mechanism to a spin lock
General code cleanup
Diffstat (limited to 'Makefile')
-rw-r--r-- | Makefile | 10 |
1 files changed, 6 insertions, 4 deletions
@@ -101,11 +101,13 @@ ifdef LLAMA_OPENBLAS LDFLAGS += -lopenblas endif ifdef LLAMA_CUBLAS - CFLAGS += -DGGML_USE_CUBLAS -I/usr/local/cuda/include - LDFLAGS += -lcublas -lculibos -lcudart -lcublasLt -lpthread -ldl -lrt -L/usr/local/cuda/lib64 - OBJS += ggml-cuda.o + CFLAGS += -DGGML_USE_CUBLAS -I/usr/local/cuda/include + LDFLAGS += -lcublas -lculibos -lcudart -lcublasLt -lpthread -ldl -lrt -L/usr/local/cuda/lib64 + OBJS += ggml-cuda.o + NVCC = nvcc + NVCCFLAGS = --forward-unknown-to-host-linker -arch=native ggml-cuda.o: ggml-cuda.cu ggml-cuda.h - nvcc -arch=native -c -o $@ $< + $(NVCC) $(NVCCFLAGS) $(CXXFLAGS) -c $< -o $@ endif ifdef LLAMA_GPROF CFLAGS += -pg |