diff options
| -rwxr-xr-x | alpaca.sh | 2 | ||||
| -rwxr-xr-x | chat.sh | 2 | ||||
| -rwxr-xr-x | examples/chatLLaMa | 2 | ||||
| -rw-r--r-- | main.cpp | 14 | 
4 files changed, 13 insertions, 7 deletions
| @@ -3,4 +3,4 @@  # Temporary script - will be removed in the future  # -./main -m ./models/ggml-alpaca-7b-q4.bin --color -f ./prompts/alpaca.txt -ins --top_k 10000 --temp 0.2 --repeat_penalty 1 -t 7 +./main -m ./models/ggml-alpaca-7b-q4.bin --color -f ./prompts/alpaca.txt -ins -b 256 --top_k 10000 --temp 0.2 --repeat_penalty 1 -t 7 @@ -3,4 +3,4 @@  # Temporary script - will be removed in the future  # -./main -m ./models/7B/ggml-model-q4_0.bin -n 256 --repeat_penalty 1.0 --color -i -r "User:" -f prompts/chat-with-bob.txt +./main -m ./models/7B/ggml-model-q4_0.bin -b 128 -n 256 --repeat_penalty 1.0 --color -i -r "User:" -f prompts/chat-with-bob.txt diff --git a/examples/chatLLaMa b/examples/chatLLaMa index 97ababb..4265d7b 100755 --- a/examples/chatLLaMa +++ b/examples/chatLLaMa @@ -13,7 +13,7 @@ N_PREDICTS="${N_PREDICTS:-2048}"  # Note: you can also override the generation options by specifying them on the command line:  # For example, override the context size by doing: ./chatLLaMa --ctx_size 1024 -GEN_OPTIONS="${GEN_OPTIONS:---ctx_size 2048 --temp 0.7 --top_k 40 --top_p 0.5 --repeat_last_n 256 --repeat_penalty 1.17647}" +GEN_OPTIONS="${GEN_OPTIONS:---ctx_size 2048 --temp 0.7 --top_k 40 --top_p 0.5 --repeat_last_n 256 --batch_size 1024 --repeat_penalty 1.17647}"  # shellcheck disable=SC2086 # Intended splitting of GEN_OPTIONS  ./main $GEN_OPTIONS \ @@ -372,7 +372,7 @@ int main(int argc, char ** argv) {          n_past += embd.size();          embd.clear(); -        if ((int) embd_inp.size() <= input_consumed) { +        if ((int) embd_inp.size() <= input_consumed && !is_interacting) {              // out of user input, sample next token              const float top_k          = params.top_k;              const float top_p          = params.top_p; @@ -451,13 +451,16 @@ int main(int argc, char ** argv) {              }              // Check if each of the reverse prompts appears at the end of the output. -            for (std::string antiprompt : params.antiprompt) { +            for (std::string & antiprompt : params.antiprompt) {                  if (last_output.find(antiprompt.c_str(), last_output.length() - antiprompt.length(), antiprompt.length()) != std::string::npos) {                      is_interacting = true; +                    set_console_state(CONSOLE_STATE_USER_INPUT); +                    fflush(stdout);                      break;                  }              } -            if (is_interacting) { + +            if (n_past > 0 && is_interacting) {                  // potentially set color to indicate we are taking user input                  set_console_state(CONSOLE_STATE_USER_INPUT); @@ -495,7 +498,10 @@ int main(int argc, char ** argv) {                  input_noecho = true; // do not echo this again              } -            is_interacting = false; + +            if (n_past > 0) { +                is_interacting = false; +            }          }          // end of text token | 
