diff options
| -rw-r--r-- | main.cpp | 21 | 
1 files changed, 15 insertions, 6 deletions
| @@ -258,6 +258,9 @@ int main(int argc, char ** argv) {          params.interactive = true;      } +    // determine newline token +    auto llama_token_newline = ::llama_tokenize(ctx, "\n", false); +      fprintf(stderr, "\n");      fprintf(stderr, "%s: prompt: '%s'\n", __func__, params.prompt.c_str());      fprintf(stderr, "%s: number of tokens in prompt = %zu\n", __func__, embd_inp.size()); @@ -359,6 +362,16 @@ int main(int argc, char ** argv) {                  last_n_tokens.push_back(id);              } +            // replace end of text token with newline token when in interactive mode +            if (id == llama_token_eos() && params.interactive) { +                id = llama_token_newline.front(); +                if (params.antiprompt.size() != 0) { +                    // tokenize and inject first reverse prompt +                    const auto first_antiprompt = ::llama_tokenize(ctx, params.antiprompt.front(), false); +                    embd_inp.insert(embd_inp.end(), first_antiprompt.begin(), first_antiprompt.end()); +                } +            } +              // add it to the context              embd.push_back(id); @@ -451,12 +464,8 @@ int main(int argc, char ** argv) {          // end of text token          if (embd.back() == llama_token_eos()) { -            if (params.interactive) { -                is_interacting = true; -            } else { -                fprintf(stderr, " [end of text]\n"); -                break; -            } +            fprintf(stderr, " [end of text]\n"); +            break;          }          // In interactive mode, respect the maximum number of tokens and drop back to user input when reached. | 
