aboutsummaryrefslogtreecommitdiff
path: root/Makefile
diff options
context:
space:
mode:
Diffstat (limited to 'Makefile')
-rw-r--r--Makefile33
1 files changed, 27 insertions, 6 deletions
diff --git a/Makefile b/Makefile
index 8e8d426..1f910c3 100644
--- a/Makefile
+++ b/Makefile
@@ -105,6 +105,7 @@ ifeq ($(UNAME_M),$(filter $(UNAME_M),x86_64 i686))
#CFLAGS += -mfma -mf16c -mavx
#CXXFLAGS += -mfma -mf16c -mavx
endif
+
ifneq ($(filter ppc64%,$(UNAME_M)),)
POWER9_M := $(shell grep "POWER9" /proc/cpuinfo)
ifneq (,$(findstring POWER9,$(POWER9_M)))
@@ -116,6 +117,7 @@ ifneq ($(filter ppc64%,$(UNAME_M)),)
CXXFLAGS += -std=c++23 -DGGML_BIG_ENDIAN
endif
endif
+
ifndef LLAMA_NO_ACCELERATE
# Mac M1 - include Accelerate framework.
# `-framework Accelerate` works on Mac Intel as well, with negliable performance boost (as of the predict time).
@@ -123,7 +125,8 @@ ifndef LLAMA_NO_ACCELERATE
CFLAGS += -DGGML_USE_ACCELERATE
LDFLAGS += -framework Accelerate
endif
-endif
+endif # LLAMA_NO_ACCELERATE
+
ifdef LLAMA_OPENBLAS
CFLAGS += -DGGML_USE_OPENBLAS -I/usr/local/include/openblas -I/usr/include/openblas
ifneq ($(shell grep -e "Arch Linux" -e "ID_LIKE=arch" /etc/os-release 2>/dev/null),)
@@ -131,11 +134,13 @@ ifdef LLAMA_OPENBLAS
else
LDFLAGS += -lopenblas
endif
-endif
+endif # LLAMA_OPENBLAS
+
ifdef LLAMA_BLIS
CFLAGS += -DGGML_USE_OPENBLAS -I/usr/local/include/blis -I/usr/include/blis
LDFLAGS += -lblis -L/usr/local/lib
-endif
+endif # LLAMA_BLIS
+
ifdef LLAMA_CUBLAS
CFLAGS += -DGGML_USE_CUBLAS -I/usr/local/cuda/include -I/opt/cuda/include -I$(CUDA_PATH)/targets/x86_64-linux/include
CXXFLAGS += -DGGML_USE_CUBLAS -I/usr/local/cuda/include -I/opt/cuda/include -I$(CUDA_PATH)/targets/x86_64-linux/include
@@ -156,9 +161,10 @@ endif # LLAMA_CUDA_DMMV_Y
ggml-cuda.o: ggml-cuda.cu ggml-cuda.h
$(NVCC) $(NVCCFLAGS) $(CXXFLAGS) -Wno-pedantic -c $< -o $@
endif # LLAMA_CUBLAS
+
ifdef LLAMA_CLBLAST
- CFLAGS += -DGGML_USE_CLBLAST
- CXXFLAGS += -DGGML_USE_CLBLAST
+ CFLAGS += -DGGML_USE_CLBLAST
+ CXXFLAGS += -DGGML_USE_CLBLAST
# Mac provides OpenCL as a framework
ifeq ($(UNAME_S),Darwin)
LDFLAGS += -lclblast -framework OpenCL
@@ -166,23 +172,38 @@ ifdef LLAMA_CLBLAST
LDFLAGS += -lclblast -lOpenCL
endif
OBJS += ggml-opencl.o
+
ggml-opencl.o: ggml-opencl.cpp ggml-opencl.h
$(CXX) $(CXXFLAGS) -c $< -o $@
-endif
+endif # LLAMA_CLBLAST
+
+ifdef LLAMA_METAL
+ CFLAGS += -DGGML_USE_METAL -DGGML_METAL_NDEBUG
+ CXXFLAGS += -DGGML_USE_METAL
+ LDFLAGS += -framework Foundation -framework Metal -framework MetalKit -framework MetalPerformanceShaders
+ OBJS += ggml-metal.o
+
+ggml-metal.o: ggml-metal.m ggml-metal.h
+ $(CC) $(CFLAGS) -c $< -o $@
+endif # LLAMA_METAL
+
ifneq ($(filter aarch64%,$(UNAME_M)),)
# Apple M1, M2, etc.
# Raspberry Pi 3, 4, Zero 2 (64-bit)
CFLAGS += -mcpu=native
CXXFLAGS += -mcpu=native
endif
+
ifneq ($(filter armv6%,$(UNAME_M)),)
# Raspberry Pi 1, Zero
CFLAGS += -mfpu=neon-fp-armv8 -mfp16-format=ieee -mno-unaligned-access
endif
+
ifneq ($(filter armv7%,$(UNAME_M)),)
# Raspberry Pi 2
CFLAGS += -mfpu=neon-fp-armv8 -mfp16-format=ieee -mno-unaligned-access -funsafe-math-optimizations
endif
+
ifneq ($(filter armv8%,$(UNAME_M)),)
# Raspberry Pi 3, 4, Zero 2 (32-bit)
CFLAGS += -mfp16-format=ieee -mno-unaligned-access