aboutsummaryrefslogtreecommitdiff
path: root/CMakeLists.txt
diff options
context:
space:
mode:
authorJohannes Gäßler <johannesg@5d6.de>2023-06-19 10:23:56 +0200
committerGitHub <noreply@github.com>2023-06-19 10:23:56 +0200
commit16b9cd193965769089881bb8ec012fccca7b37b6 (patch)
tree2ee329793e782f253966fd81f89ea05f5a1a2495 /CMakeLists.txt
parentb24c3049d96557c24782e4d32feaae65f47277af (diff)
Convert vector to f16 for dequantize mul mat vec (#1913)
* Convert vector to f16 for dmmv * compile option * Added compilation option description to README * Changed cmake CUDA_ARCHITECTURES from "OFF" to "native"
Diffstat (limited to 'CMakeLists.txt')
-rw-r--r--CMakeLists.txt10
1 files changed, 7 insertions, 3 deletions
diff --git a/CMakeLists.txt b/CMakeLists.txt
index 7367719..dc06365 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -70,6 +70,7 @@ set(LLAMA_BLAS_VENDOR "Generic" CACHE STRING "llama: BLAS library vendor")
option(LLAMA_CUBLAS "llama: use cuBLAS" OFF)
set(LLAMA_CUDA_DMMV_X "32" CACHE STRING "llama: x stride for dmmv CUDA kernels")
set(LLAMA_CUDA_DMMV_Y "1" CACHE STRING "llama: y block size for dmmv CUDA kernels")
+option(LLAMA_CUDA_DMMV_F16 "llama: use 16 bit floats for dmmv CUDA kernels" OFF)
set(LLAMA_CUDA_KQUANTS_ITER "2" CACHE STRING "llama: iters./thread per block for Q2_K/Q6_K")
option(LLAMA_CLBLAST "llama: use CLBlast" OFF)
option(LLAMA_METAL "llama: use Metal" OFF)
@@ -238,6 +239,9 @@ if (LLAMA_CUBLAS)
add_compile_definitions(GGML_USE_CUBLAS)
add_compile_definitions(GGML_CUDA_DMMV_X=${LLAMA_CUDA_DMMV_X})
add_compile_definitions(GGML_CUDA_DMMV_Y=${LLAMA_CUDA_DMMV_Y})
+ if (LLAMA_CUDA_DMMV_F16)
+ add_compile_definitions(GGML_CUDA_DMMV_F16)
+ endif()
add_compile_definitions(K_QUANTS_PER_ITERATION=${LLAMA_CUDA_KQUANTS_ITER})
if (LLAMA_STATIC)
@@ -490,13 +494,13 @@ endif()
if (GGML_SOURCES_CUDA)
message(STATUS "GGML CUDA sources found, configuring CUDA architecture")
- set_property(TARGET ggml PROPERTY CUDA_ARCHITECTURES OFF)
+ set_property(TARGET ggml PROPERTY CUDA_ARCHITECTURES "native")
set_property(TARGET ggml PROPERTY CUDA_SELECT_NVCC_ARCH_FLAGS "Auto")
- set_property(TARGET ggml_static PROPERTY CUDA_ARCHITECTURES OFF)
+ set_property(TARGET ggml_static PROPERTY CUDA_ARCHITECTURES "native")
set_property(TARGET ggml_static PROPERTY CUDA_SELECT_NVCC_ARCH_FLAGS "Auto")
- set_property(TARGET llama PROPERTY CUDA_ARCHITECTURES OFF)
+ set_property(TARGET llama PROPERTY CUDA_ARCHITECTURES "native")
endif()