From 214b6a35702a489e3738acd81fad6d46182d3036 Mon Sep 17 00:00:00 2001
From: Georgi Gerganov <ggerganov@gmail.com>
Date: Sat, 29 Apr 2023 18:43:28 +0300
Subject: ggml : adjust mul_mat_f16 work memory (#1226)

* llama : minor - remove explicity int64_t cast

* ggml : reduce memory buffer for F16 mul_mat when not using cuBLAS

* ggml : add asserts to guard for incorrect wsize
---
 Makefile | 9 +++++++--
 1 file changed, 7 insertions(+), 2 deletions(-)

(limited to 'Makefile')

diff --git a/Makefile b/Makefile
index fd695d7..4516e85 100644
--- a/Makefile
+++ b/Makefile
@@ -34,10 +34,15 @@ endif
 #
 
 # keep standard at C11 and C++11
-CFLAGS   = -I.              -O3 -DNDEBUG -std=c11   -fPIC
-CXXFLAGS = -I. -I./examples -O3 -DNDEBUG -std=c++11 -fPIC
+CFLAGS   = -I.              -O3 -std=c11   -fPIC
+CXXFLAGS = -I. -I./examples -O3 -std=c++11 -fPIC
 LDFLAGS  =
 
+ifndef LLAMA_DEBUG
+	CFLAGS   += -DNDEBUG
+	CXXFLAGS += -DNDEBUG
+endif
+
 # warnings
 CFLAGS   += -Wall -Wextra -Wpedantic -Wcast-qual -Wdouble-promotion -Wshadow -Wstrict-prototypes -Wpointer-arith
 CXXFLAGS += -Wall -Wextra -Wpedantic -Wcast-qual -Wno-unused-function -Wno-multichar
-- 
cgit v1.2.3