aboutsummaryrefslogtreecommitdiff
path: root/CMakeLists.txt
diff options
context:
space:
mode:
Diffstat (limited to 'CMakeLists.txt')
-rw-r--r--CMakeLists.txt52
1 files changed, 45 insertions, 7 deletions
diff --git a/CMakeLists.txt b/CMakeLists.txt
index d9381da..d085bc8 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -67,11 +67,13 @@ endif()
option(LLAMA_ACCELERATE "llama: enable Accelerate framework" ON)
option(LLAMA_BLAS "llama: use BLAS" OFF)
set(LLAMA_BLAS_VENDOR "Generic" CACHE STRING "llama: BLAS library vendor")
-option(LLAMA_CUBLAS "llama: use cuBLAS" OFF)
+option(LLAMA_CUBLAS "llama: use CUDA" OFF)
+#option(LLAMA_CUDA_CUBLAS "llama: use cuBLAS for prompt processing" OFF)
+set(LLAMA_CUDA_MMQ_Y "64" CACHE STRING "llama: y tile size for mmq CUDA kernels")
option(LLAMA_CUDA_FORCE_DMMV "llama: use dmmv instead of mmvq CUDA kernels" OFF)
set(LLAMA_CUDA_DMMV_X "32" CACHE STRING "llama: x stride for dmmv CUDA kernels")
set(LLAMA_CUDA_MMV_Y "1" CACHE STRING "llama: y block size for mmv CUDA kernels")
-option(LLAMA_CUDA_DMMV_F16 "llama: use 16 bit floats for dmmv CUDA kernels" OFF)
+option(LLAMA_CUDA_F16 "llama: use 16 bit floats for some calculations" OFF)
set(LLAMA_CUDA_KQUANTS_ITER "2" CACHE STRING "llama: iters./thread per block for Q2_K/Q6_K")
option(LLAMA_CLBLAST "llama: use CLBlast" OFF)
option(LLAMA_METAL "llama: use Metal" OFF)
@@ -251,6 +253,10 @@ if (LLAMA_CUBLAS)
set(GGML_SOURCES_CUDA ggml-cuda.cu ggml-cuda.h)
add_compile_definitions(GGML_USE_CUBLAS)
+# if (LLAMA_CUDA_CUBLAS)
+# add_compile_definitions(GGML_CUDA_CUBLAS)
+# endif()
+ add_compile_definitions(GGML_CUDA_MMQ_Y=${LLAMA_CUDA_MMQ_Y})
if (LLAMA_CUDA_FORCE_DMMV)
add_compile_definitions(GGML_CUDA_FORCE_DMMV)
endif()
@@ -259,8 +265,8 @@ if (LLAMA_CUBLAS)
if (DEFINED LLAMA_CUDA_DMMV_Y)
add_compile_definitions(GGML_CUDA_MMV_Y=${LLAMA_CUDA_DMMV_Y}) # for backwards compatibility
endif()
- if (LLAMA_CUDA_DMMV_F16)
- add_compile_definitions(GGML_CUDA_DMMV_F16)
+ if (LLAMA_CUDA_F16 OR LLAMA_CUDA_DMMV_F16)
+ add_compile_definitions(GGML_CUDA_F16)
endif()
add_compile_definitions(K_QUANTS_PER_ITERATION=${LLAMA_CUDA_KQUANTS_ITER})
@@ -271,10 +277,14 @@ if (LLAMA_CUBLAS)
endif()
if (NOT DEFINED CMAKE_CUDA_ARCHITECTURES)
- if (LLAMA_CUDA_DMMV_F16)
- set(CMAKE_CUDA_ARCHITECTURES "60;61") # needed for f16 CUDA intrinsics
+ # 52 == lowest CUDA 12 standard
+ # 60 == f16 CUDA intrinsics
+ # 61 == integer CUDA intrinsics
+ # 70 == compute capability at which unrolling a loop in mul_mat_q kernels is faster
+ if (LLAMA_CUDA_F16 OR LLAMA_CUDA_DMMV_F16)
+ set(CMAKE_CUDA_ARCHITECTURES "60;61;70") # needed for f16 CUDA intrinsics
else()
- set(CMAKE_CUDA_ARCHITECTURES "52;61") # lowest CUDA 12 standard + lowest for integer intrinsics
+ set(CMAKE_CUDA_ARCHITECTURES "52;61;70") # lowest CUDA 12 standard + lowest for integer intrinsics
endif()
endif()
message(STATUS "Using CUDA architectures: ${CMAKE_CUDA_ARCHITECTURES}")
@@ -357,6 +367,7 @@ if (LLAMA_ALL_WARNINGS)
-Wshadow
-Wstrict-prototypes
-Wpointer-arith
+ -Wmissing-prototypes
)
set(cxx_flags
-Wall
@@ -496,6 +507,8 @@ endif()
add_library(ggml OBJECT
ggml.c
ggml.h
+ ggml-alloc.c
+ ggml-alloc.h
${GGML_SOURCES_CUDA}
${GGML_SOURCES_OPENCL}
${GGML_SOURCES_METAL}
@@ -512,6 +525,7 @@ if (BUILD_SHARED_LIBS)
set_target_properties(ggml PROPERTIES POSITION_INDEPENDENT_CODE ON)
add_library(ggml_shared SHARED $<TARGET_OBJECTS:ggml>)
target_link_libraries(ggml_shared PUBLIC Threads::Threads ${LLAMA_EXTRA_LIBS})
+ install(TARGETS ggml_shared LIBRARY)
endif()
add_library(llama
@@ -533,8 +547,32 @@ if (BUILD_SHARED_LIBS)
if (LLAMA_METAL)
set_target_properties(llama PROPERTIES RESOURCE "${CMAKE_CURRENT_SOURCE_DIR}/ggml-metal.metal")
endif()
+ install(TARGETS llama LIBRARY)
endif()
+include(GNUInstallDirs)
+install(
+ FILES convert.py
+ PERMISSIONS
+ OWNER_READ
+ OWNER_WRITE
+ OWNER_EXECUTE
+ GROUP_READ
+ GROUP_EXECUTE
+ WORLD_READ
+ WORLD_EXECUTE
+ DESTINATION ${CMAKE_INSTALL_BINDIR})
+install(
+ FILES convert-lora-to-ggml.py
+ PERMISSIONS
+ OWNER_READ
+ OWNER_WRITE
+ OWNER_EXECUTE
+ GROUP_READ
+ GROUP_EXECUTE
+ WORLD_READ
+ WORLD_EXECUTE
+ DESTINATION ${CMAKE_INSTALL_BINDIR})
#
# programs, examples and tests