aboutsummaryrefslogtreecommitdiff
path: root/CMakeLists.txt
diff options
context:
space:
mode:
Diffstat (limited to 'CMakeLists.txt')
-rw-r--r--CMakeLists.txt4
1 files changed, 2 insertions, 2 deletions
diff --git a/CMakeLists.txt b/CMakeLists.txt
index 1d4e63f..d085bc8 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -280,8 +280,8 @@ if (LLAMA_CUBLAS)
# 52 == lowest CUDA 12 standard
# 60 == f16 CUDA intrinsics
# 61 == integer CUDA intrinsics
- # 70 == (assumed) compute capability at which unrolling a loop in mul_mat_q kernels is faster
- if (LLAMA_CUDA_DMMV_F16)
+ # 70 == compute capability at which unrolling a loop in mul_mat_q kernels is faster
+ if (LLAMA_CUDA_F16 OR LLAMA_CUDA_DMMV_F16)
set(CMAKE_CUDA_ARCHITECTURES "60;61;70") # needed for f16 CUDA intrinsics
else()
set(CMAKE_CUDA_ARCHITECTURES "52;61;70") # lowest CUDA 12 standard + lowest for integer intrinsics