From 7296c961d9303010a2b98379f738da2a8a55aa1b Mon Sep 17 00:00:00 2001 From: 0cc4m Date: Fri, 28 Apr 2023 16:57:16 +0200 Subject: ggml : add CLBlast support (#1164) * Allow use of OpenCL GPU-based BLAS using ClBlast instead of OpenBLAS for context processing * Improve ClBlast implementation, avoid recreating buffers, remove redundant transfers * Finish merge of ClBlast support * Move CLBlast implementation to separate file Add buffer reuse code (adapted from slaren's cuda implementation) * Add q4_2 and q4_3 CLBlast support, improve code * Double CLBlast speed by disabling OpenBLAS thread workaround Co-authored-by: Concedo <39025047+LostRuins@users.noreply.github.com> Co-authored-by: slaren <2141330+slaren@users.noreply.github.com> * Fix device selection env variable names * Fix cast in opencl kernels * Add CLBlast to CMakeLists.txt * Replace buffer pool with static buffers a, b, qb, c Fix compile warnings * Fix typos, use GGML_TYPE defines, improve code * Improve btype dequant kernel selection code, add error if type is unsupported * Improve code quality * Move internal stuff out of header * Use internal enums instead of CLBlast enums * Remove leftover C++ includes and defines * Make event use easier to read Co-authored-by: Henri Vasserman * Use c compiler for opencl files * Simplify code, fix include * First check error, then release event * Make globals static, fix indentation * Rename dequant kernels file to conform with other file names * Fix import cl file name --------- Co-authored-by: Concedo <39025047+LostRuins@users.noreply.github.com> Co-authored-by: slaren <2141330+slaren@users.noreply.github.com> Co-authored-by: Henri Vasserman Co-authored-by: Georgi Gerganov --- Makefile | 11 +++++++++-- 1 file changed, 9 insertions(+), 2 deletions(-) (limited to 'Makefile') diff --git a/Makefile b/Makefile index f7c8dbf..0715e85 100644 --- a/Makefile +++ b/Makefile @@ -105,14 +105,21 @@ ifdef LLAMA_OPENBLAS LDFLAGS += -lopenblas endif ifdef LLAMA_CUBLAS - CFLAGS += -DGGML_USE_CUBLAS -I/usr/local/cuda/include -I$(CUDA_PATH)/targets/x86_64-linux/include - LDFLAGS += -lcublas -lculibos -lcudart -lcublasLt -lpthread -ldl -lrt -L/usr/local/cuda/lib64 -L$(CUDA_PATH)/targets/x86_64-linux/lib + CFLAGS += -DGGML_USE_CUBLAS -I/usr/local/cuda/include -I/opt/cuda/include -I$(CUDA_PATH)/targets/x86_64-linux/include + LDFLAGS += -lcublas -lculibos -lcudart -lcublasLt -lpthread -ldl -lrt -L/usr/local/cuda/lib64 -L/opt/cuda/lib64 -L$(CUDA_PATH)/targets/x86_64-linux/lib OBJS += ggml-cuda.o NVCC = nvcc NVCCFLAGS = --forward-unknown-to-host-compiler -arch=native ggml-cuda.o: ggml-cuda.cu ggml-cuda.h $(NVCC) $(NVCCFLAGS) $(CXXFLAGS) -Wno-pedantic -c $< -o $@ endif +ifdef LLAMA_CLBLAST + CFLAGS += -DGGML_USE_CLBLAST + LDFLAGS += -lclblast -lOpenCL + OBJS += ggml-opencl.o +ggml-opencl.o: ggml-opencl.c ggml-opencl.h + $(CC) $(CFLAGS) -c $< -o $@ +endif ifdef LLAMA_GPROF CFLAGS += -pg CXXFLAGS += -pg -- cgit v1.2.3