diff options
Diffstat (limited to 'Makefile')
| -rw-r--r-- | Makefile | 170 | 
1 files changed, 122 insertions, 48 deletions
| @@ -1,5 +1,8 @@  # Define the default target now so that it is always the first target -BUILD_TARGETS = main quantize quantize-stats perplexity embedding vdot train-text-from-scratch simple server libembdinput.so embd-input-test +BUILD_TARGETS = main quantize quantize-stats perplexity embedding vdot train-text-from-scratch simple server embd-input-test + +# Binaries only useful for tests +TEST_TARGETS = tests/test-double-float tests/test-grad0 tests/test-opt tests/test-quantize-fns tests/test-quantize-perf tests/test-sampling tests/test-tokenizer-0  default: $(BUILD_TARGETS) @@ -60,7 +63,8 @@ ifdef LLAMA_SERVER_VERBOSE  endif  # warnings -CFLAGS   += -Wall -Wextra -Wpedantic -Wcast-qual -Wdouble-promotion -Wshadow -Wstrict-prototypes -Wpointer-arith +CFLAGS   += -Wall -Wextra -Wpedantic -Wcast-qual -Wdouble-promotion -Wshadow -Wstrict-prototypes -Wpointer-arith \ +			-Wmissing-prototypes  CXXFLAGS += -Wall -Wextra -Wpedantic -Wcast-qual -Wno-unused-function -Wno-multichar  # OS specific @@ -90,6 +94,28 @@ ifeq ($(UNAME_S),Haiku)  	CXXFLAGS += -pthread  endif +# detect Windows +ifneq ($(findstring _NT,$(UNAME_S)),) +	_WIN32 := 1 +endif + +# library name prefix +ifneq ($(_WIN32),1) +	LIB_PRE := lib +endif + +# Dynamic Shared Object extension +ifneq ($(_WIN32),1) +	DSO_EXT := .so +else +	DSO_EXT := .dll +endif + +# Windows Sockets 2 (Winsock) for network-capable apps +ifeq ($(_WIN32),1) +	LWINSOCK2 := -lws2_32 +endif +  ifdef LLAMA_GPROF  	CFLAGS   += -pg  	CXXFLAGS += -pg @@ -102,7 +128,7 @@ endif  # Architecture specific  # TODO: probably these flags need to be tweaked on some architectures  #       feel free to update the Makefile for your architecture and send a pull request or issue -ifeq ($(UNAME_M),$(filter $(UNAME_M),x86_64 i686)) +ifeq ($(UNAME_M),$(filter $(UNAME_M),x86_64 i686 amd64))  	# Use all CPU extensions that are available:  	CFLAGS   += -march=native -mtune=native  	CXXFLAGS += -march=native -mtune=native @@ -116,6 +142,28 @@ ifeq ($(UNAME_M),$(filter $(UNAME_M),x86_64 i686))  	#CXXFLAGS += -mssse3  endif +ifneq ($(filter aarch64%,$(UNAME_M)),) +	# Apple M1, M2, etc. +	# Raspberry Pi 3, 4, Zero 2 (64-bit) +	CFLAGS   += -mcpu=native +	CXXFLAGS += -mcpu=native +endif + +ifneq ($(filter armv6%,$(UNAME_M)),) +	# Raspberry Pi 1, Zero +	CFLAGS += -mfpu=neon-fp-armv8 -mfp16-format=ieee -mno-unaligned-access +endif + +ifneq ($(filter armv7%,$(UNAME_M)),) +	# Raspberry Pi 2 +	CFLAGS += -mfpu=neon-fp-armv8 -mfp16-format=ieee -mno-unaligned-access -funsafe-math-optimizations +endif + +ifneq ($(filter armv8%,$(UNAME_M)),) +	# Raspberry Pi 3, 4, Zero 2 (32-bit) +	CFLAGS += -mfp16-format=ieee -mno-unaligned-access +endif +  ifneq ($(filter ppc64%,$(UNAME_M)),)  	POWER9_M := $(shell grep "POWER9" /proc/cpuinfo)  	ifneq (,$(findstring POWER9,$(POWER9_M))) @@ -151,14 +199,11 @@ ifdef LLAMA_MPI  	CFLAGS += -DGGML_USE_MPI -Wno-cast-qual  	CXXFLAGS += -DGGML_USE_MPI -Wno-cast-qual  	OBJS     += ggml-mpi.o - -ggml-mpi.o: ggml-mpi.c ggml-mpi.h -	$(CC) $(CFLAGS) -c $< -o $@  endif # LLAMA_MPI  ifdef LLAMA_OPENBLAS -	CFLAGS  += -DGGML_USE_OPENBLAS -I/usr/local/include/openblas -I/usr/include/openblas -	LDFLAGS += -lopenblas +	CFLAGS  += -DGGML_USE_OPENBLAS $(shell pkg-config --cflags openblas) +	LDFLAGS += $(shell pkg-config --libs openblas)  endif # LLAMA_OPENBLAS  ifdef LLAMA_BLIS @@ -171,8 +216,12 @@ ifdef LLAMA_CUBLAS  	CXXFLAGS  += -DGGML_USE_CUBLAS -I/usr/local/cuda/include -I/opt/cuda/include -I$(CUDA_PATH)/targets/x86_64-linux/include  	LDFLAGS   += -lcublas -lculibos -lcudart -lcublasLt -lpthread -ldl -lrt -L/usr/local/cuda/lib64 -L/opt/cuda/lib64 -L$(CUDA_PATH)/targets/x86_64-linux/lib  	OBJS      += ggml-cuda.o -	NVCC      = nvcc -	NVCCFLAGS = --forward-unknown-to-host-compiler +	NVCCFLAGS = --forward-unknown-to-host-compiler -use_fast_math +ifdef LLAMA_CUDA_NVCC +	NVCC = $(LLAMA_CUDA_NVCC) +else +	NVCC = nvcc +endif #LLAMA_CUDA_NVCC  ifdef CUDA_DOCKER_ARCH  	NVCCFLAGS += -Wno-deprecated-gpu-targets -arch=$(CUDA_DOCKER_ARCH)  else @@ -193,27 +242,37 @@ else ifdef LLAMA_CUDA_DMMV_Y  else  	NVCCFLAGS += -DGGML_CUDA_MMV_Y=1  endif # LLAMA_CUDA_MMV_Y +ifdef LLAMA_CUDA_F16 +	NVCCFLAGS += -DGGML_CUDA_F16 +endif # LLAMA_CUDA_F16  ifdef LLAMA_CUDA_DMMV_F16 -	NVCCFLAGS += -DGGML_CUDA_DMMV_F16 +	NVCCFLAGS += -DGGML_CUDA_F16  endif # LLAMA_CUDA_DMMV_F16  ifdef LLAMA_CUDA_KQUANTS_ITER  	NVCCFLAGS += -DK_QUANTS_PER_ITERATION=$(LLAMA_CUDA_KQUANTS_ITER)  else  	NVCCFLAGS += -DK_QUANTS_PER_ITERATION=2  endif - +#ifdef LLAMA_CUDA_CUBLAS +#	NVCCFLAGS += -DGGML_CUDA_CUBLAS +#endif # LLAMA_CUDA_CUBLAS +ifdef LLAMA_CUDA_CCBIN +	NVCCFLAGS += -ccbin $(LLAMA_CUDA_CCBIN) +endif  ggml-cuda.o: ggml-cuda.cu ggml-cuda.h -	$(NVCC) $(NVCCFLAGS) $(CXXFLAGS) -Wno-pedantic -c $< -o $@ +	$(NVCC) $(NVCCFLAGS) $(subst -Ofast,-O3,$(CXXFLAGS)) -Wno-pedantic -c $< -o $@  endif # LLAMA_CUBLAS  ifdef LLAMA_CLBLAST -	CFLAGS   += -DGGML_USE_CLBLAST -	CXXFLAGS += -DGGML_USE_CLBLAST + +	CFLAGS   += -DGGML_USE_CLBLAST $(shell pkg-config --cflags clblast OpenCL) +	CXXFLAGS += -DGGML_USE_CLBLAST $(shell pkg-config --cflags clblast OpenCL) +  	# Mac provides OpenCL as a framework  	ifeq ($(UNAME_S),Darwin)  		LDFLAGS += -lclblast -framework OpenCL  	else -		LDFLAGS += -lclblast -lOpenCL +		LDFLAGS += $(shell pkg-config --libs clblast OpenCL)  	endif  	OBJS    += ggml-opencl.o @@ -226,32 +285,17 @@ ifdef LLAMA_METAL  	CXXFLAGS += -DGGML_USE_METAL  	LDFLAGS  += -framework Foundation -framework Metal -framework MetalKit -framework MetalPerformanceShaders  	OBJS     += ggml-metal.o +endif # LLAMA_METAL +ifdef LLAMA_METAL  ggml-metal.o: ggml-metal.m ggml-metal.h  	$(CC) $(CFLAGS) -c $< -o $@  endif # LLAMA_METAL -ifneq ($(filter aarch64%,$(UNAME_M)),) -	# Apple M1, M2, etc. -	# Raspberry Pi 3, 4, Zero 2 (64-bit) -	CFLAGS   += -mcpu=native -	CXXFLAGS += -mcpu=native -endif - -ifneq ($(filter armv6%,$(UNAME_M)),) -	# Raspberry Pi 1, Zero -	CFLAGS += -mfpu=neon-fp-armv8 -mfp16-format=ieee -mno-unaligned-access -endif - -ifneq ($(filter armv7%,$(UNAME_M)),) -	# Raspberry Pi 2 -	CFLAGS += -mfpu=neon-fp-armv8 -mfp16-format=ieee -mno-unaligned-access -funsafe-math-optimizations -endif - -ifneq ($(filter armv8%,$(UNAME_M)),) -	# Raspberry Pi 3, 4, Zero 2 (32-bit) -	CFLAGS += -mfp16-format=ieee -mno-unaligned-access -endif +ifdef LLAMA_MPI +ggml-mpi.o: ggml-mpi.c ggml-mpi.h +	$(CC) $(CFLAGS) -c $< -o $@ +endif # LLAMA_MPI  ifdef LLAMA_NO_K_QUANTS  k_quants.o: k_quants.c k_quants.h @@ -280,23 +324,34 @@ $(info )  ggml.o: ggml.c ggml.h ggml-cuda.h  	$(CC)  $(CFLAGS)   -c $< -o $@ -llama.o: llama.cpp ggml.h ggml-cuda.h ggml-metal.h llama.h llama-util.h +ggml-alloc.o: ggml-alloc.c ggml.h ggml-alloc.h +	$(CC)  $(CFLAGS)   -c $< -o $@ + +OBJS += ggml-alloc.o + +llama.o: llama.cpp ggml.h ggml-alloc.h ggml-cuda.h ggml-metal.h llama.h llama-util.h  	$(CXX) $(CXXFLAGS) -c $< -o $@  common.o: examples/common.cpp examples/common.h  	$(CXX) $(CXXFLAGS) -c $< -o $@ +console.o: examples/console.cpp examples/console.h +	$(CXX) $(CXXFLAGS) -c $< -o $@ + +grammar-parser.o: examples/grammar-parser.cpp examples/grammar-parser.h +	$(CXX) $(CXXFLAGS) -c $< -o $@ +  libllama.so: llama.o ggml.o $(OBJS)  	$(CXX) $(CXXFLAGS) -shared -fPIC -o $@ $^ $(LDFLAGS)  clean: -	rm -vf *.o *.so main quantize quantize-stats perplexity embedding benchmark-matmult save-load-state server simple vdot train-text-from-scratch embd-input-test build-info.h +	rm -vf *.o *.so *.dll main quantize quantize-stats perplexity embedding benchmark-matmult save-load-state server simple vdot train-text-from-scratch embd-input-test build-info.h $(TEST_TARGETS)  #  # Examples  # -main: examples/main/main.cpp                                  build-info.h ggml.o llama.o common.o $(OBJS) +main: examples/main/main.cpp                                  build-info.h ggml.o llama.o common.o console.o grammar-parser.o $(OBJS)  	$(CXX) $(CXXFLAGS) $(filter-out %.h,$^) -o $@ $(LDFLAGS)  	@echo  	@echo '====  Run ./main -h for help.  ====' @@ -320,15 +375,15 @@ embedding: examples/embedding/embedding.cpp                   build-info.h ggml.  save-load-state: examples/save-load-state/save-load-state.cpp build-info.h ggml.o llama.o common.o $(OBJS)  	$(CXX) $(CXXFLAGS) $(filter-out %.h,$^) -o $@ $(LDFLAGS) -server: examples/server/server.cpp examples/server/httplib.h examples/server/json.hpp build-info.h ggml.o llama.o common.o $(OBJS) -	$(CXX) $(CXXFLAGS) -Iexamples/server $(filter-out %.h,$(filter-out %.hpp,$^)) -o $@ $(LDFLAGS) +server: examples/server/server.cpp examples/server/httplib.h examples/server/json.hpp examples/server/index.html.hpp examples/server/index.js.hpp examples/server/completion.js.hpp build-info.h ggml.o llama.o common.o grammar-parser.o $(OBJS) +	$(CXX) $(CXXFLAGS) -Iexamples/server $(filter-out %.h,$(filter-out %.hpp,$^)) -o $@ $(LDFLAGS) $(LWINSOCK2) -libembdinput.so: examples/embd-input/embd-input.h examples/embd-input/embd-input-lib.cpp build-info.h ggml.o llama.o common.o $(OBJS) +$(LIB_PRE)embdinput$(DSO_EXT): examples/embd-input/embd-input.h examples/embd-input/embd-input-lib.cpp build-info.h ggml.o llama.o common.o $(OBJS)  	$(CXX) --shared $(CXXFLAGS) $(filter-out %.h,$(filter-out %.hpp,$^)) -o $@ $(LDFLAGS) -embd-input-test: libembdinput.so examples/embd-input/embd-input-test.cpp build-info.h ggml.o llama.o common.o $(OBJS) -	$(CXX) $(CXXFLAGS) $(filter-out %.so,$(filter-out %.h,$(filter-out %.hpp,$^))) -o $@ $(LDFLAGS) -L. -lembdinput +embd-input-test: $(LIB_PRE)embdinput$(DSO_EXT) examples/embd-input/embd-input-test.cpp build-info.h ggml.o llama.o common.o $(OBJS) +	$(CXX) $(CXXFLAGS) $(filter-out %$(DSO_EXT),$(filter-out %.h,$(filter-out %.hpp,$^))) -o $@ $(LDFLAGS) -L. -lembdinput  train-text-from-scratch: examples/train-text-from-scratch/train-text-from-scratch.cpp    build-info.h ggml.o llama.o $(OBJS)  	$(CXX) $(CXXFLAGS) $(filter-out %.h,$^) -o $@ $(LDFLAGS) @@ -345,6 +400,8 @@ build-info.h: $(wildcard .git/index) scripts/build-info.sh  # Tests  # +tests: $(TEST_TARGETS) +  benchmark-matmult: examples/benchmark/benchmark-matmult.cpp build-info.h ggml.o $(OBJS)  	$(CXX) $(CXXFLAGS) $(filter-out %.h,$^) -o $@ $(LDFLAGS)  	./$@ @@ -352,6 +409,23 @@ benchmark-matmult: examples/benchmark/benchmark-matmult.cpp build-info.h ggml.o  vdot: pocs/vdot/vdot.cpp ggml.o $(OBJS)  	$(CXX) $(CXXFLAGS) $^ -o $@ $(LDFLAGS) -.PHONY: tests clean -tests: -	bash ./tests/run-tests.sh +tests/test-double-float: tests/test-double-float.cpp build-info.h ggml.o llama.o common.o $(OBJS) +	$(CXX) $(CXXFLAGS) $(filter-out %.txt,$^) -o $@ $(LDFLAGS) + +tests/test-grad0: tests/test-grad0.cpp build-info.h ggml.o llama.o common.o $(OBJS) +	$(CXX) $(CXXFLAGS) $(filter-out %.txt,$^) -o $@ $(LDFLAGS) + +tests/test-opt: tests/test-opt.cpp build-info.h ggml.o llama.o common.o $(OBJS) +	$(CXX) $(CXXFLAGS) $(filter-out %.txt,$^) -o $@ $(LDFLAGS) + +tests/test-quantize-fns: tests/test-quantize-fns.cpp build-info.h ggml.o llama.o common.o $(OBJS) +	$(CXX) $(CXXFLAGS) $(filter-out %.txt,$^) -o $@ $(LDFLAGS) + +tests/test-quantize-perf: tests/test-quantize-perf.cpp build-info.h ggml.o llama.o common.o $(OBJS) +	$(CXX) $(CXXFLAGS) $(filter-out %.txt,$^) -o $@ $(LDFLAGS) + +tests/test-sampling: tests/test-sampling.cpp build-info.h ggml.o llama.o common.o $(OBJS) +	$(CXX) $(CXXFLAGS) $(filter-out %.txt,$^) -o $@ $(LDFLAGS) + +tests/test-tokenizer-0: tests/test-tokenizer-0.cpp build-info.h ggml.o llama.o common.o $(OBJS) +	$(CXX) $(CXXFLAGS) $(filter-out %.txt,$^) -o $@ $(LDFLAGS) | 
