aboutsummaryrefslogtreecommitdiff
path: root/CMakeLists.txt
diff options
context:
space:
mode:
authorJohannes Gäßler <johannesg@5d6.de>2023-07-05 14:19:42 +0200
committerGitHub <noreply@github.com>2023-07-05 14:19:42 +0200
commit924dd22fd3ba93e097f8d19ba5cda919ca2fe2fb (patch)
treeca169c258f2d00f7e31c8b743a9f1206280b4d6b /CMakeLists.txt
parent051c70dcd55709c9cbbfa849af035951fe720433 (diff)
Quantized dot products for CUDA mul mat vec (#2067)
Diffstat (limited to 'CMakeLists.txt')
-rw-r--r--CMakeLists.txt13
1 files changed, 10 insertions, 3 deletions
diff --git a/CMakeLists.txt b/CMakeLists.txt
index 4ac0f6f..a240454 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -68,8 +68,9 @@ option(LLAMA_ACCELERATE "llama: enable Accelerate framework
option(LLAMA_BLAS "llama: use BLAS" OFF)
set(LLAMA_BLAS_VENDOR "Generic" CACHE STRING "llama: BLAS library vendor")
option(LLAMA_CUBLAS "llama: use cuBLAS" OFF)
+option(LLAMA_CUDA_FORCE_DMMV "llama: use dmmv instead of mmvq CUDA kernels" OFF)
set(LLAMA_CUDA_DMMV_X "32" CACHE STRING "llama: x stride for dmmv CUDA kernels")
-set(LLAMA_CUDA_DMMV_Y "1" CACHE STRING "llama: y block size for dmmv CUDA kernels")
+set(LLAMA_CUDA_MMV_Y "1" CACHE STRING "llama: y block size for mmv CUDA kernels")
option(LLAMA_CUDA_DMMV_F16 "llama: use 16 bit floats for dmmv CUDA kernels" OFF)
set(LLAMA_CUDA_KQUANTS_ITER "2" CACHE STRING "llama: iters./thread per block for Q2_K/Q6_K")
option(LLAMA_CLBLAST "llama: use CLBlast" OFF)
@@ -246,8 +247,14 @@ if (LLAMA_CUBLAS)
set(GGML_SOURCES_CUDA ggml-cuda.cu ggml-cuda.h)
add_compile_definitions(GGML_USE_CUBLAS)
+ if (LLAMA_CUDA_FORCE_DMMV)
+ add_compile_definitions(GGML_CUDA_FORCE_DMMV)
+ endif()
add_compile_definitions(GGML_CUDA_DMMV_X=${LLAMA_CUDA_DMMV_X})
- add_compile_definitions(GGML_CUDA_DMMV_Y=${LLAMA_CUDA_DMMV_Y})
+ add_compile_definitions(GGML_CUDA_MMV_Y=${LLAMA_CUDA_MMV_Y})
+ if (DEFINED LLAMA_CUDA_DMMV_Y)
+ add_compile_definitions(GGML_CUDA_MMV_Y=${LLAMA_CUDA_DMMV_Y}) # for backwards compatibility
+ endif()
if (LLAMA_CUDA_DMMV_F16)
add_compile_definitions(GGML_CUDA_DMMV_F16)
endif()
@@ -263,7 +270,7 @@ if (LLAMA_CUBLAS)
if (LLAMA_CUDA_DMMV_F16)
set(CMAKE_CUDA_ARCHITECTURES "61") # needed for f16 CUDA intrinsics
else()
- set(CMAKE_CUDA_ARCHITECTURES "52") # lowest CUDA 12 standard
+ set(CMAKE_CUDA_ARCHITECTURES "52;61") # lowest CUDA 12 standard + lowest for integer intrinsics
endif()
endif()
message(STATUS "Using CUDA architectures: ${CMAKE_CUDA_ARCHITECTURES}")