From 5c64a0952ee58b2d742ee84e8e3d43cce5d366db Mon Sep 17 00:00:00 2001 From: Georgi Gerganov Date: Wed, 7 Jun 2023 10:59:52 +0300 Subject: k-quants : allow to optionally disable at compile time (#1734) * k-quants : put behind optional compile flag LLAMA_K_QUANTS * build : enable k-quants by default --- Makefile | 37 ++++++++++++++++++++++--------------- 1 file changed, 22 insertions(+), 15 deletions(-) (limited to 'Makefile') diff --git a/Makefile b/Makefile index 0205f19..3926516 100644 --- a/Makefile +++ b/Makefile @@ -121,6 +121,11 @@ ifneq ($(filter ppc64%,$(UNAME_M)),) endif endif +ifndef LLAMA_NO_K_QUANTS + CFLAGS += -DGGML_USE_K_QUANTS + OBJS += k_quants.o +endif + ifndef LLAMA_NO_ACCELERATE # Mac M1 - include Accelerate framework. # `-framework Accelerate` works on Mac Intel as well, with negliable performance boost (as of the predict time). @@ -140,7 +145,7 @@ ifdef LLAMA_OPENBLAS endif # LLAMA_OPENBLAS ifdef LLAMA_BLIS - CFLAGS += -DGGML_USE_OPENBLAS -I/usr/local/include/blis -I/usr/include/blis + CFLAGS += -DGGML_USE_OPENBLAS -I/usr/local/include/blis -I/usr/include/blis LDFLAGS += -lblis -L/usr/local/lib endif # LLAMA_BLIS @@ -212,6 +217,11 @@ ifneq ($(filter armv8%,$(UNAME_M)),) CFLAGS += -mfp16-format=ieee -mno-unaligned-access endif +ifdef LLAMA_NO_K_QUANTS +k_quants.o: k_quants.c k_quants.h + $(CC) $(CFLAGS) -c $< -o $@ +endif # LLAMA_NO_K_QUANTS + # # Print build information # @@ -231,10 +241,7 @@ $(info ) # Build library # -ggml.o: ggml.c ggml.h ggml-cuda.h ggml-quants-k.h - $(CC) $(CFLAGS) -c $< -o $@ - -ggml-quants-k.o: ggml-quants-k.c ggml-quants-k.h ggml.h ggml-cuda.h +ggml.o: ggml.c ggml.h ggml-cuda.h $(CC) $(CFLAGS) -c $< -o $@ llama.o: llama.cpp ggml.h ggml-cuda.h llama.h llama-util.h @@ -243,7 +250,7 @@ llama.o: llama.cpp ggml.h ggml-cuda.h llama.h llama-util.h common.o: examples/common.cpp examples/common.h $(CXX) $(CXXFLAGS) -c $< -o $@ -libllama.so: llama.o ggml.o ggml-quants-k.o $(OBJS) +libllama.so: llama.o ggml.o $(OBJS) $(CXX) $(CXXFLAGS) -shared -fPIC -o $@ $^ $(LDFLAGS) clean: @@ -253,28 +260,28 @@ clean: # Examples # -main: examples/main/main.cpp build-info.h ggml.o ggml-quants-k.o llama.o common.o $(OBJS) +main: examples/main/main.cpp build-info.h ggml.o llama.o common.o $(OBJS) $(CXX) $(CXXFLAGS) $(filter-out %.h,$^) -o $@ $(LDFLAGS) @echo @echo '==== Run ./main -h for help. ====' @echo -quantize: examples/quantize/quantize.cpp build-info.h ggml.o ggml-quants-k.o llama.o $(OBJS) +quantize: examples/quantize/quantize.cpp build-info.h ggml.o llama.o $(OBJS) $(CXX) $(CXXFLAGS) $(filter-out %.h,$^) -o $@ $(LDFLAGS) -quantize-stats: examples/quantize-stats/quantize-stats.cpp build-info.h ggml.o ggml-quants-k.o llama.o $(OBJS) +quantize-stats: examples/quantize-stats/quantize-stats.cpp build-info.h ggml.o llama.o $(OBJS) $(CXX) $(CXXFLAGS) $(filter-out %.h,$^) -o $@ $(LDFLAGS) -perplexity: examples/perplexity/perplexity.cpp build-info.h ggml.o ggml-quants-k.o llama.o common.o $(OBJS) +perplexity: examples/perplexity/perplexity.cpp build-info.h ggml.o llama.o common.o $(OBJS) $(CXX) $(CXXFLAGS) $(filter-out %.h,$^) -o $@ $(LDFLAGS) -embedding: examples/embedding/embedding.cpp build-info.h ggml.o ggml-quants-k.o llama.o common.o $(OBJS) +embedding: examples/embedding/embedding.cpp build-info.h ggml.o llama.o common.o $(OBJS) $(CXX) $(CXXFLAGS) $(filter-out %.h,$^) -o $@ $(LDFLAGS) -save-load-state: examples/save-load-state/save-load-state.cpp build-info.h ggml.o ggml-quants-k.o llama.o common.o $(OBJS) +save-load-state: examples/save-load-state/save-load-state.cpp build-info.h ggml.o llama.o common.o $(OBJS) $(CXX) $(CXXFLAGS) $(filter-out %.h,$^) -o $@ $(LDFLAGS) -server: examples/server/server.cpp examples/server/httplib.h examples/server/json.hpp build-info.h ggml.o ggml-quants-k.o llama.o common.o $(OBJS) +server: examples/server/server.cpp examples/server/httplib.h examples/server/json.hpp build-info.h ggml.o llama.o common.o $(OBJS) $(CXX) $(CXXFLAGS) -Iexamples/server $(filter-out %.h,$(filter-out %.hpp,$^)) -o $@ $(LDFLAGS) build-info.h: $(wildcard .git/index) scripts/build-info.sh @@ -289,11 +296,11 @@ build-info.h: $(wildcard .git/index) scripts/build-info.sh # Tests # -benchmark-matmult: examples/benchmark/benchmark-matmult.cpp build-info.h ggml.o ggml-quants-k.o $(OBJS) +benchmark-matmult: examples/benchmark/benchmark-matmult.cpp build-info.h ggml.o $(OBJS) $(CXX) $(CXXFLAGS) $(filter-out %.h,$^) -o $@ $(LDFLAGS) ./$@ -vdot: pocs/vdot/vdot.cpp ggml.o ggml-quants-k.o $(OBJS) +vdot: pocs/vdot/vdot.cpp ggml.o $(OBJS) $(CXX) $(CXXFLAGS) $^ -o $@ $(LDFLAGS) .PHONY: tests clean -- cgit v1.2.3