From 02d6988121510c067e06d498a273a351a888f5b9 Mon Sep 17 00:00:00 2001 From: slaren <2141330+slaren@users.noreply.github.com> Date: Thu, 20 Apr 2023 03:14:14 +0200 Subject: Improve cuBLAS performance by dequantizing on the GPU (#1065) --- CMakeLists.txt | 31 +++++++++++++++++++++++++------ 1 file changed, 25 insertions(+), 6 deletions(-) (limited to 'CMakeLists.txt') diff --git a/CMakeLists.txt b/CMakeLists.txt index d7aa051..1f9fdd3 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -110,6 +110,7 @@ if (APPLE AND LLAMA_ACCELERATE) message(WARNING "Accelerate framework not found") endif() endif() + if (LLAMA_OPENBLAS) if (LLAMA_STATIC) set(BLA_STATIC ON) @@ -150,6 +151,10 @@ if (LLAMA_CUBLAS) if (CUDAToolkit_FOUND) message(STATUS "cuBLAS found") + enable_language(CUDA) + + set(GGML_CUDA_SOURCES ggml-cuda.cu ggml-cuda.h) + add_compile_definitions(GGML_USE_CUBLAS) if (LLAMA_STATIC) @@ -241,21 +246,26 @@ elseif (${CMAKE_SYSTEM_PROCESSOR} MATCHES "^(x86_64|i686|AMD64)$") message(STATUS "x86 detected") if (MSVC) if (LLAMA_AVX512) - add_compile_options(/arch:AVX512) + add_compile_options($<$:/arch:AVX512>) + add_compile_options($<$:/arch:AVX512>) # MSVC has no compile-time flags enabling specific # AVX512 extensions, neither it defines the # macros corresponding to the extensions. # Do it manually. if (LLAMA_AVX512_VBMI) - add_compile_definitions(__AVX512VBMI__) + add_compile_definitions($<$:__AVX512VBMI__>) + add_compile_definitions($<$:__AVX512VBMI__>) endif() if (LLAMA_AVX512_VNNI) - add_compile_definitions(__AVX512VNNI__) + add_compile_definitions($<$:__AVX512VNNI__>) + add_compile_definitions($<$:__AVX512VNNI__>) endif() elseif (LLAMA_AVX2) - add_compile_options(/arch:AVX2) + add_compile_options($<$:/arch:AVX2>) + add_compile_options($<$:/arch:AVX2>) elseif (LLAMA_AVX) - add_compile_options(/arch:AVX) + add_compile_options($<$:/arch:AVX>) + add_compile_options($<$:/arch:AVX>) endif() else() if (LLAMA_F16C) @@ -292,7 +302,8 @@ endif() add_library(ggml OBJECT ggml.c - ggml.h) + ggml.h + ${GGML_CUDA_SOURCES}) target_include_directories(ggml PUBLIC .) target_compile_features(ggml PUBLIC c_std_11) # don't bump @@ -314,6 +325,14 @@ if (BUILD_SHARED_LIBS) target_compile_definitions(llama PRIVATE LLAMA_SHARED LLAMA_BUILD) endif() +if (GGML_CUDA_SOURCES) + message(STATUS "GGML CUDA sources found, configuring CUDA architecture") + set_property(TARGET ggml PROPERTY CUDA_ARCHITECTURES OFF) + set_property(TARGET ggml PROPERTY CUDA_SELECT_NVCC_ARCH_FLAGS "Auto") + set_property(TARGET llama PROPERTY CUDA_ARCHITECTURES OFF) +endif() + + # # programs, examples and tests # -- cgit v1.2.3