aboutsummaryrefslogtreecommitdiff
path: root/CMakeLists.txt
diff options
context:
space:
mode:
Diffstat (limited to 'CMakeLists.txt')
-rw-r--r--CMakeLists.txt16
1 files changed, 10 insertions, 6 deletions
diff --git a/CMakeLists.txt b/CMakeLists.txt
index 57678a3..4ecb3d5 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -68,7 +68,7 @@ option(LLAMA_ACCELERATE "llama: enable Accelerate framework
option(LLAMA_BLAS "llama: use BLAS" OFF)
set(LLAMA_BLAS_VENDOR "Generic" CACHE STRING "llama: BLAS library vendor")
option(LLAMA_CUBLAS "llama: use CUDA" OFF)
-option(LLAMA_CUDA_CUBLAS "llama: use cuBLAS for prompt processing" OFF)
+#option(LLAMA_CUDA_CUBLAS "llama: use cuBLAS for prompt processing" OFF)
set(LLAMA_CUDA_MMQ_Y "64" CACHE STRING "llama: y tile size for mmq CUDA kernels")
option(LLAMA_CUDA_FORCE_DMMV "llama: use dmmv instead of mmvq CUDA kernels" OFF)
set(LLAMA_CUDA_DMMV_X "32" CACHE STRING "llama: x stride for dmmv CUDA kernels")
@@ -253,9 +253,9 @@ if (LLAMA_CUBLAS)
set(GGML_SOURCES_CUDA ggml-cuda.cu ggml-cuda.h)
add_compile_definitions(GGML_USE_CUBLAS)
- if (LLAMA_CUDA_CUBLAS)
- add_compile_definitions(GGML_CUDA_CUBLAS)
- endif()
+# if (LLAMA_CUDA_CUBLAS)
+# add_compile_definitions(GGML_CUDA_CUBLAS)
+# endif()
add_compile_definitions(GGML_CUDA_MMQ_Y=${LLAMA_CUDA_MMQ_Y})
if (LLAMA_CUDA_FORCE_DMMV)
add_compile_definitions(GGML_CUDA_FORCE_DMMV)
@@ -277,10 +277,14 @@ if (LLAMA_CUBLAS)
endif()
if (NOT DEFINED CMAKE_CUDA_ARCHITECTURES)
+ # 52 == lowest CUDA 12 standard
+ # 60 == f16 CUDA intrinsics
+ # 61 == integer CUDA intrinsics
+ # 70 == (assumed) compute capability at which unrolling a loop in mul_mat_q kernels is faster
if (LLAMA_CUDA_DMMV_F16)
- set(CMAKE_CUDA_ARCHITECTURES "60;61") # needed for f16 CUDA intrinsics
+ set(CMAKE_CUDA_ARCHITECTURES "60;61;70") # needed for f16 CUDA intrinsics
else()
- set(CMAKE_CUDA_ARCHITECTURES "52;61") # lowest CUDA 12 standard + lowest for integer intrinsics
+ set(CMAKE_CUDA_ARCHITECTURES "52;61;70") # lowest CUDA 12 standard + lowest for integer intrinsics
endif()
endif()
message(STATUS "Using CUDA architectures: ${CMAKE_CUDA_ARCHITECTURES}")