From 722d33f34ec74c6f7046109f936d0928ffe171bc Mon Sep 17 00:00:00 2001 From: Yann Follet <131855179+YannFollet@users.noreply.github.com> Date: Sun, 14 Jan 2024 00:09:08 +0800 Subject: [PATCH] main : add parameter --no-display-prompt (#4541) * add the parameter : --no-display-prompt , combine with --log-disable it will display only the generated tokens * remove empty line --------- Co-authored-by: Georgi Gerganov --- common/common.cpp | 6 +++++- common/common.h | 1 + examples/main/main.cpp | 7 ++++++- 3 files changed, 12 insertions(+), 2 deletions(-) diff --git a/common/common.cpp b/common/common.cpp index 322b9f91e..c11006bcb 100644 --- a/common/common.cpp +++ b/common/common.cpp @@ -617,6 +617,8 @@ bool gpt_params_parse_ex(int argc, char ** argv, gpt_params & params) { params.numa = true; } else if (arg == "--verbose-prompt") { params.verbose_prompt = true; + } else if (arg == "--no-display-prompt") { + params.display_prompt = false; } else if (arg == "-r" || arg == "--reverse-prompt") { if (++i >= argc) { invalid_param = true; @@ -936,11 +938,12 @@ void gpt_print_usage(int /*argc*/, char ** argv, const gpt_params & params) { printf(" -mg i, --main-gpu i the GPU to use for the model (with split-mode = none),\n"); printf(" or for intermediate results and KV (with split-mode = row) (default: %d)\n", params.main_gpu); #endif + printf(" --verbose-prompt print a verbose prompt before generation (default: %s)\n", params.verbose_prompt ? "true" : "false"); + printf(" --no-display-prompt don't print prompt at generation (default: %s)\n", !params.display_prompt ? "true" : "false"); printf(" -gan N, --grp-attn-n N\n"); printf(" group-attention factor (default: %d)\n", params.grp_attn_n); printf(" -gaw N, --grp-attn-w N\n"); printf(" group-attention width (default: %.1f)\n", (double)params.grp_attn_w); - printf(" --verbose-prompt print prompt before generation\n"); printf(" -dkvc, --dump-kv-cache\n"); printf(" verbose print of the KV cache\n"); printf(" -nkvo, --no-kv-offload\n"); @@ -1582,6 +1585,7 @@ void dump_non_result_info_yaml(FILE * stream, const gpt_params & params, const l fprintf(stream, "min_p: %f # default: 0.0\n", sparams.min_p); fprintf(stream, "typical_p: %f # default: 1.0\n", sparams.typical_p); fprintf(stream, "verbose_prompt: %s # default: false\n", params.verbose_prompt ? "true" : "false"); + fprintf(stream, "display_prompt: %s # default: true\n", params.display_prompt ? "true" : "false"); } // diff --git a/common/common.h b/common/common.h index f29be5b5a..096468243 100644 --- a/common/common.h +++ b/common/common.h @@ -126,6 +126,7 @@ struct gpt_params { bool use_mlock = false; // use mlock to keep model in memory bool numa = false; // attempt optimizations that help on some NUMA systems bool verbose_prompt = false; // print prompt tokens before generation + bool display_prompt = true; // print prompt before generation bool infill = false; // use infill mode bool dump_kv_cache = false; // dump the KV cache contents for debugging purposes bool no_kv_offload = false; // disable KV offloading diff --git a/examples/main/main.cpp b/examples/main/main.cpp index c53b29978..58b7f807a 100644 --- a/examples/main/main.cpp +++ b/examples/main/main.cpp @@ -477,6 +477,7 @@ int main(int argc, char ** argv) { bool is_antiprompt = false; bool input_echo = true; + bool display = true; bool need_to_save_session = !path_session.empty() && n_matching_session_tokens < embd_inp.size(); int n_past = 0; @@ -491,6 +492,7 @@ int main(int argc, char ** argv) { // the first thing we will do is to output the prompt, so set color accordingly console::set_display(console::prompt); + display = params.display_prompt; std::vector embd; std::vector embd_guidance; @@ -707,7 +709,7 @@ int main(int argc, char ** argv) { } // display text - if (input_echo) { + if (input_echo && display) { for (auto id : embd) { const std::string token_str = llama_token_to_piece(ctx, id); printf("%s", token_str.c_str()); @@ -724,6 +726,7 @@ int main(int argc, char ** argv) { // reset color to default if there is no pending user input if (input_echo && (int) embd_inp.size() == n_consumed) { console::set_display(console::reset); + display = true; } // if not currently processing queued inputs; @@ -796,6 +799,7 @@ int main(int argc, char ** argv) { // color user input only console::set_display(console::user_input); + display = params.display_prompt; std::string line; bool another_line = true; @@ -806,6 +810,7 @@ int main(int argc, char ** argv) { // done taking input, reset color console::set_display(console::reset); + display = true; // Add tokens to embd only if the input buffer is non-empty // Entering a empty line lets the user pass control back