aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorGeorgi Gerganov <ggerganov@gmail.com>2023-05-20 12:03:48 +0300
committerGeorgi Gerganov <ggerganov@gmail.com>2023-05-20 12:03:48 +0300
commitea600071cb005267e9e8f2629c1e406dd5fde083 (patch)
tree0a285ebbdd3efa99eb60042631ddd86ae6dedd00
parent07e9ace0f9da424d82e75df969642522880feb92 (diff)
Revert "feature : add blis and other BLAS implementation support (#1502)"
This reverts commit 07e9ace0f9da424d82e75df969642522880feb92.
-rw-r--r--BLIS.md67
-rw-r--r--CMakeLists.txt39
-rw-r--r--Makefile4
-rw-r--r--README.md19
4 files changed, 25 insertions, 104 deletions
diff --git a/BLIS.md b/BLIS.md
deleted file mode 100644
index 9b3c306..0000000
--- a/BLIS.md
+++ /dev/null
@@ -1,67 +0,0 @@
-BLIS Installation Manual
-------------------------
-
-BLIS is a portable software framework for high-performance BLAS-like dense linear algebra libraries. It has received awards and recognition, including the 2023 James H. Wilkinson Prize for Numerical Software and the 2020 SIAM Activity Group on Supercomputing Best Paper Prize. BLIS provides a new BLAS-like API and a compatibility layer for traditional BLAS routine calls. It offers features such as object-based API, typed API, BLAS and CBLAS compatibility layers.
-
-Project URL: https://github.com/flame/blis
-
-### Prepare:
-
-Compile BLIS:
-
-```bash
-git clone https://github.com/flame/blis
-cd blis
-./configure --enable-cblas -t openmp,pthreads auto
-# will install to /usr/local/ by default.
-make -j
-```
-
-Install BLIS:
-
-```bash
-sudo make install
-```
-
-We recommend using openmp since it's easier to modify the cores been used.
-
-### llama.cpp compilation
-
-Makefile:
-
-```bash
-make LLAMA_BLIS=1 -j
-# make LLAMA_BLIS=1 benchmark-matmult
-```
-
-CMake:
-
-```bash
-mkdir build
-cd build
-cmake -DLLAMA_BLAS=ON -DLLAMA_BLAS_VENDOR=FLAME ..
-make -j
-```
-
-### llama.cpp execution
-
-According to the BLIS documentation, we could set the following
-environment variables to modify the behavior of openmp:
-
-```
-export GOMP_GPU_AFFINITY="0-19"
-export BLIS_NUM_THREADS=14
-```
-
-And then run the binaries as normal.
-
-
-### Intel specific issue
-
-Some might get the error message saying that `libimf.so` cannot be found.
-Please follow this [stackoverflow page](https://stackoverflow.com/questions/70687930/intel-oneapi-2022-libimf-so-no-such-file-or-directory-during-openmpi-compila).
-
-### Reference:
-
-1. https://github.com/flame/blis#getting-started
-2. https://github.com/flame/blis/blob/master/docs/Multithreading.md
diff --git a/CMakeLists.txt b/CMakeLists.txt
index 0876ab9..48e3238 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -65,8 +65,7 @@ endif()
# 3rd party libs
option(LLAMA_ACCELERATE "llama: enable Accelerate framework" ON)
-option(LLAMA_BLAS "llama: use BLAS" OFF)
-option(LLAMA_BLAS_VENDOR "llama: BLA_VENDOR from https://cmake.org/cmake/help/latest/module/FindBLAS.html#blas-lapack-vendors" Generic)
+option(LLAMA_OPENBLAS "llama: use OpenBLAS" OFF)
option(LLAMA_CUBLAS "llama: use cuBLAS" OFF)
option(LLAMA_CLBLAST "llama: use CLBlast" OFF)
@@ -146,28 +145,36 @@ if (APPLE AND LLAMA_ACCELERATE)
endif()
endif()
-if (LLAMA_BLAS)
+if (LLAMA_OPENBLAS)
if (LLAMA_STATIC)
set(BLA_STATIC ON)
endif()
- if ($(CMAKE_VERSION) VERSION_GREATER_EQUAL 3.22)
- set(BLA_SIZEOF_INTEGER 8)
- endif()
- set(BLA_VENDOR ${LLAMA_BLAS_VENDOR})
+
+ set(BLA_VENDOR OpenBLAS)
find_package(BLAS)
if (BLAS_FOUND)
- message(STATUS "BLAS found, Libraries: ${BLAS_LIBRARIES}")
+ message(STATUS "OpenBLAS found")
- add_compile_options(${BLAS_LINKER_FLAGS})
add_compile_definitions(GGML_USE_OPENBLAS)
- set(LLAMA_EXTRA_LIBS ${LLAMA_EXTRA_LIBS} ${BLAS_LIBRARIES})
-
- message("${BLAS_LIBRARIES}")
- include_directories(${BLAS_INCLUDE_DIRS})
+ add_link_options(${BLAS_LIBRARIES})
+ set(LLAMA_EXTRA_LIBS ${LLAMA_EXTRA_LIBS} openblas)
+
+ # find header file
+ set(OPENBLAS_INCLUDE_SEARCH_PATHS
+ /usr/include
+ /usr/include/openblas
+ /usr/include/openblas-base
+ /usr/local/include
+ /usr/local/include/openblas
+ /usr/local/include/openblas-base
+ /opt/OpenBLAS/include
+ $ENV{OpenBLAS_HOME}
+ $ENV{OpenBLAS_HOME}/include
+ )
+ find_path(OPENBLAS_INC NAMES cblas.h PATHS ${OPENBLAS_INCLUDE_SEARCH_PATHS})
+ add_compile_options(-I${OPENBLAS_INC})
else()
- message(WARNING "BLAS not found, please refer to "
- "https://cmake.org/cmake/help/latest/module/FindBLAS.html#blas-lapack-vendors"
- " to set correct LLAMA_BLAS_VENDOR")
+ message(WARNING "OpenBLAS not found")
endif()
endif()
diff --git a/Makefile b/Makefile
index cefa0b4..f9ec879 100644
--- a/Makefile
+++ b/Makefile
@@ -122,10 +122,6 @@ ifdef LLAMA_OPENBLAS
LDFLAGS += -lopenblas
endif
endif
-ifdef LLAMA_BLIS
- CFLAGS += -DGGML_USE_OPENBLAS -I/usr/local/include/blis -I/usr/include/blis
- LDFLAGS += -lblis -L/usr/local/lib
-endif
ifdef LLAMA_CUBLAS
CFLAGS += -DGGML_USE_CUBLAS -I/usr/local/cuda/include -I/opt/cuda/include -I$(CUDA_PATH)/targets/x86_64-linux/include
CXXFLAGS += -DGGML_USE_CUBLAS -I/usr/local/cuda/include -I/opt/cuda/include -I$(CUDA_PATH)/targets/x86_64-linux/include
diff --git a/README.md b/README.md
index 102cde4..762f4aa 100644
--- a/README.md
+++ b/README.md
@@ -56,7 +56,7 @@ The main goal of `llama.cpp` is to run the LLaMA model using 4-bit integer quant
- Mixed F16 / F32 precision
- 4-bit, 5-bit and 8-bit integer quantization support
- Runs on the CPU
-- Supports OpenBLAS/Apple BLAS/ARM Performance Lib/ATLAS/BLIS/Intel MKL/NVHPC/ACML/SCSL/SGIMATH and [more](https://cmake.org/cmake/help/latest/module/FindBLAS.html#blas-lapack-vendors) in BLAS
+- OpenBLAS support
- cuBLAS and CLBlast support
The original implementation of `llama.cpp` was [hacked in an evening](https://github.com/ggerganov/llama.cpp/issues/33#issuecomment-1465108022).
@@ -274,25 +274,10 @@ Building the program with BLAS support may lead to some performance improvements
```bash
mkdir build
cd build
- cmake .. -DLLAMA_BLAS=ON -DLLAMA_BLAS_VENDOR=OpenBLAS
+ cmake .. -DLLAMA_OPENBLAS=ON
cmake --build . --config Release
```
-- BLIS
-
- Check [BLIS.md](BLIS.md) for more information.
-
-- Intel MKL
-
- By default, `LLAMA_BLAS_VENDOR` is set to `Generic`, so if you already sourced intel environment script and assign `-DLLAMA_BLAS=ON` in cmake, the mkl version of Blas will automatically been selected. You may also specify it by:
-
- ```bash
- mkdir build
- cd build
- cmake .. -DLLAMA_BLAS=ON -DLLAMA_BLAS_VENDOR=Intel10_64lp -DCMAKE_C_COMPILER=icx -DCMAKE_CXX_COMPILER=icpx
- cmake --build . -config Release
- ```
-
- cuBLAS
This provides BLAS acceleration using the CUDA cores of your Nvidia GPU. Make sure to have the CUDA toolkit installed. You can download it from your Linux distro's package manager or from here: [CUDA Toolkit](https://developer.nvidia.com/cuda-downloads).