From 79b2b266db6b198b5af450982c3cd61120fac951 Mon Sep 17 00:00:00 2001 From: Georgi Gerganov Date: Sat, 25 Mar 2023 21:51:41 +0200 Subject: [PATCH] If n_predict == -1, generate forever --- examples/chat.sh | 2 +- examples/common.cpp | 2 +- examples/main/main.cpp | 3 +-- 3 files changed, 3 insertions(+), 4 deletions(-) diff --git a/examples/chat.sh b/examples/chat.sh index 97973d056..9a928ef05 100755 --- a/examples/chat.sh +++ b/examples/chat.sh @@ -11,6 +11,6 @@ cd .. # # "--keep 48" is based on the contents of prompts/chat-with-bob.txt # -./main -m ./models/7B/ggml-model-q4_0.bin -c 2048 -b 1024 -n 256 --keep 48 \ +./main -m ./models/7B/ggml-model-q4_0.bin -c 512 -b 1024 -n 256 --keep 48 \ --repeat_penalty 1.0 --color -i \ -r "User:" -f prompts/chat-with-bob.txt diff --git a/examples/common.cpp b/examples/common.cpp index 866a6b063..2ab000f4f 100644 --- a/examples/common.cpp +++ b/examples/common.cpp @@ -204,7 +204,7 @@ void gpt_print_usage(int /*argc*/, char ** argv, const gpt_params & params) { fprintf(stderr, " --in-prefix STRING string to prefix user inputs with (default: empty)\n"); fprintf(stderr, " -f FNAME, --file FNAME\n"); fprintf(stderr, " prompt file to start generation.\n"); - fprintf(stderr, " -n N, --n_predict N number of tokens to predict (default: %d)\n", params.n_predict); + fprintf(stderr, " -n N, --n_predict N number of tokens to predict (default: %d, -1 - infinity)\n", params.n_predict); fprintf(stderr, " --top_k N top-k sampling (default: %d)\n", params.top_k); fprintf(stderr, " --top_p N top-p sampling (default: %.1f)\n", params.top_p); fprintf(stderr, " --repeat_last_n N last n tokens to consider for penalize (default: %d)\n", params.repeat_last_n); diff --git a/examples/main/main.cpp b/examples/main/main.cpp index f78936d45..a453743a5 100644 --- a/examples/main/main.cpp +++ b/examples/main/main.cpp @@ -199,7 +199,6 @@ int main(int argc, char ** argv) { } params.n_keep = std::min(params.n_keep, (int) embd_inp.size()); - //params.n_predict = std::min(params.n_predict, n_ctx - (int) embd_inp.size()); // prefix & suffix for instruct mode const auto inp_pfx = ::llama_tokenize(ctx, "\n\n### Instruction:\n\n", true); @@ -293,7 +292,7 @@ int main(int argc, char ** argv) { std::vector embd; - while (n_remain > 0 || params.interactive) { + while (n_remain != 0 || params.interactive) { // predict if (embd.size() > 0) { // infinite text generation via context swapping