diff options
author | Tomáš Pazdiora <tomas.pazdiora@gmail.com> | 2023-04-14 17:19:17 +0200 |
---|---|---|
committer | GitHub <noreply@github.com> | 2023-04-14 18:19:17 +0300 |
commit | f4d277ae17247ee51129ef1a9ff74d377cc90b1b (patch) | |
tree | 6473c4a1d5ac2ccfe39f496d6c8c60395b84c10d /examples/main/main.cpp | |
parent | c9a59b70a54e0bc05777df287feaea3dbe0310c4 (diff) |
main : alternative instruct mode (Vicuna support, etc.) (#863)
* Add support for configs, add configurable prefixes / suffixes, deprecate instruct mode, add stop prompt
* Add multiline mode, update text input.
* bugfix
* update implementation
* typos
* Change --multiline implementation to be toggled by EOF.
* bugfix
* default multiline mode
* add more configs
* update formating
* update formatting
* apply suggestions
Diffstat (limited to 'examples/main/main.cpp')
-rw-r--r-- | examples/main/main.cpp | 172 |
1 files changed, 121 insertions, 51 deletions
diff --git a/examples/main/main.cpp b/examples/main/main.cpp index ba153cb..68b4b28 100644 --- a/examples/main/main.cpp +++ b/examples/main/main.cpp @@ -30,7 +30,8 @@ static bool is_interacting = false; #if defined (__unix__) || (defined (__APPLE__) && defined (__MACH__)) || defined (_WIN32) void sigint_handler(int signo) { set_console_color(con_st, CONSOLE_COLOR_DEFAULT); - printf("\n"); // this also force flush stdout. + fflush(stdout); + fflush(stderr); if (signo == SIGINT) { if (!is_interacting) { is_interacting=true; @@ -89,6 +90,8 @@ int main(int argc, char ** argv) { params.prompt = gpt_random_prompt(rng); } + bool instruct_mode = !params.instruct_prefix.empty() || !params.instruct_suffix.empty(); + // params.prompt = R"(// this function checks if the number n is prime //bool is_prime(int n) {)"; @@ -153,22 +156,20 @@ int main(int argc, char ** argv) { } // number of tokens to keep when resetting context - if (params.n_keep < 0 || params.n_keep > (int)embd_inp.size() || params.instruct) { + if (params.n_keep < 0 || params.n_keep > (int)embd_inp.size()) { params.n_keep = (int)embd_inp.size(); } // prefix & suffix for instruct mode - const auto inp_pfx = ::llama_tokenize(ctx, "\n\n### Instruction:\n\n", true); - const auto inp_sfx = ::llama_tokenize(ctx, "\n\n### Response:\n\n", false); - - // in instruct mode, we inject a prefix and a suffix to each input by the user - if (params.instruct) { - params.interactive_start = true; - params.antiprompt.push_back("### Instruction:\n\n"); + const auto inp_pfx = ::llama_tokenize(ctx, params.instruct_prefix, params.instruct_prefix_bos); + std::string instruct_suffix = params.instruct_suffix; + if (params.rm_trailing_space_workaround) { + if (instruct_suffix.back() == ' ') { instruct_suffix.pop_back(); } } + const auto inp_sfx = ::llama_tokenize(ctx, instruct_suffix, params.instruct_suffix_bos); // enable interactive mode if reverse prompt or interactive start is specified - if (params.antiprompt.size() != 0 || params.interactive_start) { + if (params.antiprompt.size() != 0 || params.stopprompt.size() != 0 || params.interactive_start) { params.interactive = true; } @@ -210,10 +211,21 @@ int main(int argc, char ** argv) { fprintf(stderr, "Reverse prompt: '%s'\n", antiprompt.c_str()); } } + if (params.stopprompt.size()) { + for (auto stopprompt : params.stopprompt) { + fprintf(stderr, "Stop prompt: '%s'\n", stopprompt.c_str()); + } + } if (!params.input_prefix.empty()) { fprintf(stderr, "Input prefix: '%s'\n", params.input_prefix.c_str()); } + if (!params.instruct_prefix.empty()) { + fprintf(stderr, "Instruct prefix %s: '%s'\n", params.instruct_prefix_bos ? "(with bos token)" : "", params.instruct_prefix.c_str()); + } + if (!params.instruct_suffix.empty()) { + fprintf(stderr, "Instruct suffix %s: '%s'\n", params.instruct_suffix_bos ? "(with bos token)" : "", params.instruct_suffix.c_str()); + } } fprintf(stderr, "sampling: temp = %f, top_k = %d, top_p = %f, repeat_last_n = %i, repeat_penalty = %f\n", params.temp, params.top_k, params.top_p, params.repeat_last_n, params.repeat_penalty); @@ -229,12 +241,29 @@ int main(int argc, char ** argv) { #if defined (__unix__) || (defined (__APPLE__) && defined (__MACH__)) || defined (_WIN32) " - Press Ctrl+C to interject at any time.\n" #endif - " - Press Return to return control to LLaMa.\n" - " - If you want to submit another line, end your input in '\\'.\n\n"); + ); + if (params.multiline_mode) { + fprintf(stderr, " - Press Return to return control to LLaMa.\n" +#if defined (_WIN32) + " - [MULTILINE MODE] Press Ctrl+Z then Return (EOF) to toggle.\n\n"); +#else + " - [MULTILINE MODE] Press Ctrl+D (EOF) to toggle.\n\n"); +#endif + } + else { + fprintf(stderr, " - Press Return to return control to LLaMa.\n" + " - If you want to submit another line, end your input in '\\'.\n\n"); + } is_interacting = params.interactive_start; } - bool is_antiprompt = false; + struct Antiprompt { + bool any = false; + bool trailing_space = false; + size_t len; + bool is_stop_prompt = false; + } antiprompt; + bool input_noecho = false; int n_past = 0; @@ -304,7 +333,7 @@ int main(int argc, char ** argv) { } // replace end of text token with newline token when in interactive mode - if (id == llama_token_eos() && params.interactive && !params.instruct) { + if (id == llama_token_eos() && params.interactive && !instruct_mode) { id = llama_token_newline.front(); if (params.antiprompt.size() != 0) { // tokenize and inject first reverse prompt @@ -350,27 +379,72 @@ int main(int argc, char ** argv) { // check if we should prompt the user for more if (params.interactive && (int) embd_inp.size() <= n_consumed) { - // check for reverse prompt - if (params.antiprompt.size()) { + // check for reverse prompt or stop prompt + if (params.antiprompt.size() || params.stopprompt.size()) { std::string last_output; for (auto id : last_n_tokens) { last_output += llama_token_to_str(ctx, id); } - is_antiprompt = false; + antiprompt.any = false; + antiprompt.is_stop_prompt = false; // Check if each of the reverse prompts appears at the end of the output. - for (std::string & antiprompt : params.antiprompt) { - if (last_output.find(antiprompt.c_str(), last_output.length() - antiprompt.length(), antiprompt.length()) != std::string::npos) { + for (std::string & prompt : params.antiprompt) { + if (params.rm_trailing_space_workaround) { + antiprompt.trailing_space = prompt.back() == ' '; + antiprompt.len = prompt.length() - (antiprompt.trailing_space ? 1 : 0); + } + if (last_output.find(prompt.c_str(), last_output.length() - antiprompt.len, antiprompt.len) != std::string::npos) { is_interacting = true; - is_antiprompt = true; + antiprompt.any = true; set_console_color(con_st, CONSOLE_COLOR_USER_INPUT); fflush(stdout); break; } } + if (!antiprompt.any) { + for (std::string & prompt : params.stopprompt) { + if (params.rm_trailing_space_workaround) { + antiprompt.trailing_space = prompt.back() == ' '; + antiprompt.len = prompt.length() - (antiprompt.trailing_space ? 1 : 0); + } + if (last_output.find(prompt.c_str(), last_output.length() - antiprompt.len, antiprompt.len) != std::string::npos) { + is_interacting = true; + antiprompt.any = true; + antiprompt.is_stop_prompt = true; + set_console_color(con_st, CONSOLE_COLOR_USER_INPUT); + fflush(stdout); + break; + } + } + } } - if (n_past > 0 && is_interacting) { + if (n_past > 0 && is_interacting) + { + std::string buffer; + if (!params.clean_interface && !params.instruct_prefix.empty() && !antiprompt.any) { + // avoid printing again user's new line (TODO: try to revert enter press and print newline) + int i = params.instruct_prefix.front() == '\n' ? 1 : 0; + for (; i < inp_pfx.size(); i++) { + printf("%s", llama_token_to_str(ctx, inp_pfx[i])); + } + fflush(stdout); + } + if (params.rm_trailing_space_workaround) { + // add only if not stopprompt (as stopprompt could be used to pause + // assistant and then continue without input - adding back trailing + // space may mess it up.) + if (!antiprompt.is_stop_prompt && antiprompt.any && antiprompt.trailing_space) { + // add back removed trailing space to buffer(workaround) + buffer += ' '; + if (!params.clean_interface) { + printf("%s", buffer.c_str()); + } + fflush(stdout); + } + } + // potentially set color to indicate we are taking user input set_console_color(con_st, CONSOLE_COLOR_USER_INPUT); @@ -379,49 +453,45 @@ int main(int argc, char ** argv) { signal(SIGINT, sigint_handler); #endif - if (params.instruct) { + if (params.clean_interface) { printf("\n> "); } - std::string buffer; if (!params.input_prefix.empty()) { buffer += params.input_prefix; printf("%s", buffer.c_str()); } - std::string line; - bool another_line = true; - do { -#if defined(_WIN32) - std::wstring wline; - if (!std::getline(std::wcin, wline)) { - // input stream is bad or EOF received - return 0; - } - win32_utf8_encode(wline, line); -#else - if (!std::getline(std::cin, line)) { - // input stream is bad or EOF received - return 0; - } -#endif - if (line.empty() || line.back() != '\\') { - another_line = false; - } else { - line.pop_back(); // Remove the continue character - } - buffer += line + '\n'; // Append the line to the result - } while (another_line); + if (!get_input_text(buffer, params.multiline_mode)) { + // input stream is bad + return 1; + } + if (!antiprompt.is_stop_prompt) { + buffer += "\n"; + } // done taking input, reset color set_console_color(con_st, CONSOLE_COLOR_DEFAULT); + if (!params.clean_interface && !params.instruct_suffix.empty() && !antiprompt.is_stop_prompt) { + // avoid printing again user's new line (TODO: try to revert enter press and print newline) + int i = params.instruct_suffix.front() == '\n' ? 1 : 0; + for (; i < inp_sfx.size(); i++) { + printf("%s", llama_token_to_str(ctx, inp_sfx[i])); + } + // if (remove trailing space workaround) { + // We won't add back removed trailing space here, because assistant continues here, + // and it may mess up it's output (remove trailing space workaround). + // } + fflush(stdout); + } + // Add tokens to embd only if the input buffer is non-empty // Entering a empty line lets the user pass control back if (buffer.length() > 1) { - // instruct mode: insert instruction prefix - if (params.instruct && !is_antiprompt) { + // insert input prefix + if (!params.instruct_prefix.empty() && !antiprompt.any) { n_consumed = embd_inp.size(); embd_inp.insert(embd_inp.end(), inp_pfx.begin(), inp_pfx.end()); } @@ -429,8 +499,8 @@ int main(int argc, char ** argv) { auto line_inp = ::llama_tokenize(ctx, buffer, false); embd_inp.insert(embd_inp.end(), line_inp.begin(), line_inp.end()); - // instruct mode: insert response suffix - if (params.instruct) { + // insert response suffix + if (!params.instruct_suffix.empty() && !antiprompt.is_stop_prompt) { embd_inp.insert(embd_inp.end(), inp_sfx.begin(), inp_sfx.end()); } @@ -447,7 +517,7 @@ int main(int argc, char ** argv) { // end of text token if (!embd.empty() && embd.back() == llama_token_eos()) { - if (params.instruct) { + if (instruct_mode) { is_interacting = true; } else { fprintf(stderr, " [end of text]\n"); |