diff options
author | slaren <2141330+slaren@users.noreply.github.com> | 2023-04-29 02:04:18 +0200 |
---|---|---|
committer | GitHub <noreply@github.com> | 2023-04-29 02:04:18 +0200 |
commit | 7fc50c051ae8a78e9643fdf172d12e20f2dd9b6c (patch) | |
tree | cc017db2f3443a39221ad319ab51df0925012e84 /Makefile | |
parent | b1ee8f59b4101b46999a0995d9a34506f7285466 (diff) |
cuBLAS: use host pinned memory and dequantize while copying (#1207)
* cuBLAS: dequantize simultaneously while copying memory
* cuBLAS: use host pinned memory
* cuBLAS: improve ggml_compute_forward_mul_mat_f16_f32 with pinned memory
* cuBLAS: also pin kv cache
* fix rebase
Diffstat (limited to 'Makefile')
-rw-r--r-- | Makefile | 5 |
1 files changed, 3 insertions, 2 deletions
@@ -106,6 +106,7 @@ ifdef LLAMA_OPENBLAS endif ifdef LLAMA_CUBLAS CFLAGS += -DGGML_USE_CUBLAS -I/usr/local/cuda/include -I/opt/cuda/include -I$(CUDA_PATH)/targets/x86_64-linux/include + CXXFLAGS += -DGGML_USE_CUBLAS -I/usr/local/cuda/include -I/opt/cuda/include -I$(CUDA_PATH)/targets/x86_64-linux/include LDFLAGS += -lcublas -lculibos -lcudart -lcublasLt -lpthread -ldl -lrt -L/usr/local/cuda/lib64 -L/opt/cuda/lib64 -L$(CUDA_PATH)/targets/x86_64-linux/lib OBJS += ggml-cuda.o NVCC = nvcc @@ -164,10 +165,10 @@ $(info ) # Build library # -ggml.o: ggml.c ggml.h +ggml.o: ggml.c ggml.h ggml-cuda.h $(CC) $(CFLAGS) -c $< -o $@ -llama.o: llama.cpp ggml.h llama.h llama_util.h +llama.o: llama.cpp ggml.h ggml-cuda.h llama.h llama_util.h $(CXX) $(CXXFLAGS) -c $< -o $@ common.o: examples/common.cpp examples/common.h |