From 50cb666b8a2e35a49b08c0f6bc81138c8f6f2ac1 Mon Sep 17 00:00:00 2001 From: slaren <2141330+slaren@users.noreply.github.com> Date: Fri, 21 Apr 2023 21:59:17 +0200 Subject: Improve cuBLAS performance by using a memory pool (#1094) * Improve cuBLAS performance by using a memory pool * Move cuda specific definitions to ggml-cuda.h/cu * Add CXX flags to nvcc * Change memory pool synchronization mechanism to a spin lock General code cleanup --- Makefile | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) (limited to 'Makefile') diff --git a/Makefile b/Makefile index f267d08..3b48eec 100644 --- a/Makefile +++ b/Makefile @@ -101,11 +101,13 @@ ifdef LLAMA_OPENBLAS LDFLAGS += -lopenblas endif ifdef LLAMA_CUBLAS - CFLAGS += -DGGML_USE_CUBLAS -I/usr/local/cuda/include - LDFLAGS += -lcublas -lculibos -lcudart -lcublasLt -lpthread -ldl -lrt -L/usr/local/cuda/lib64 - OBJS += ggml-cuda.o + CFLAGS += -DGGML_USE_CUBLAS -I/usr/local/cuda/include + LDFLAGS += -lcublas -lculibos -lcudart -lcublasLt -lpthread -ldl -lrt -L/usr/local/cuda/lib64 + OBJS += ggml-cuda.o + NVCC = nvcc + NVCCFLAGS = --forward-unknown-to-host-linker -arch=native ggml-cuda.o: ggml-cuda.cu ggml-cuda.h - nvcc -arch=native -c -o $@ $< + $(NVCC) $(NVCCFLAGS) $(CXXFLAGS) -c $< -o $@ endif ifdef LLAMA_GPROF CFLAGS += -pg -- cgit v1.2.3