mirror of
https://github.com/ggerganov/llama.cpp.git
synced 2025-01-26 03:12:23 +01:00
Disable prompt verbosity by default and add option to enable (#480)
This commit is contained in:
parent
09aecbf628
commit
502a400192
15
main.cpp
15
main.cpp
@ -275,13 +275,16 @@ int main(int argc, char ** argv) {
|
|||||||
// determine newline token
|
// determine newline token
|
||||||
auto llama_token_newline = ::llama_tokenize(ctx, "\n", false);
|
auto llama_token_newline = ::llama_tokenize(ctx, "\n", false);
|
||||||
|
|
||||||
fprintf(stderr, "\n");
|
if (params.verbose_prompt) {
|
||||||
fprintf(stderr, "%s: prompt: '%s'\n", __func__, params.prompt.c_str());
|
fprintf(stderr, "\n");
|
||||||
fprintf(stderr, "%s: number of tokens in prompt = %zu\n", __func__, embd_inp.size());
|
fprintf(stderr, "%s: prompt: '%s'\n", __func__, params.prompt.c_str());
|
||||||
for (int i = 0; i < (int) embd_inp.size(); i++) {
|
fprintf(stderr, "%s: number of tokens in prompt = %zu\n", __func__, embd_inp.size());
|
||||||
fprintf(stderr, "%6d -> '%s'\n", embd_inp[i], llama_token_to_str(ctx, embd_inp[i]));
|
for (int i = 0; i < (int) embd_inp.size(); i++) {
|
||||||
|
fprintf(stderr, "%6d -> '%s'\n", embd_inp[i], llama_token_to_str(ctx, embd_inp[i]));
|
||||||
|
}
|
||||||
|
fprintf(stderr, "\n");
|
||||||
}
|
}
|
||||||
fprintf(stderr, "\n");
|
|
||||||
if (params.interactive) {
|
if (params.interactive) {
|
||||||
#if defined (__unix__) || (defined (__APPLE__) && defined (__MACH__))
|
#if defined (__unix__) || (defined (__APPLE__) && defined (__MACH__))
|
||||||
struct sigaction sigint_action;
|
struct sigaction sigint_action;
|
||||||
|
@ -134,6 +134,8 @@ bool gpt_params_parse(int argc, char ** argv, gpt_params & params) {
|
|||||||
params.use_mlock = true;
|
params.use_mlock = true;
|
||||||
} else if (arg == "--mtest") {
|
} else if (arg == "--mtest") {
|
||||||
params.mem_test = true;
|
params.mem_test = true;
|
||||||
|
} else if (arg == "--verbose_prompt") {
|
||||||
|
params.verbose_prompt = true;
|
||||||
} else if (arg == "-r" || arg == "--reverse-prompt") {
|
} else if (arg == "-r" || arg == "--reverse-prompt") {
|
||||||
if (++i >= argc) {
|
if (++i >= argc) {
|
||||||
invalid_param = true;
|
invalid_param = true;
|
||||||
@ -212,6 +214,7 @@ void gpt_print_usage(int /*argc*/, char ** argv, const gpt_params & params) {
|
|||||||
fprintf(stderr, " --mlock force system to keep model in RAM rather than swapping or compressing\n");
|
fprintf(stderr, " --mlock force system to keep model in RAM rather than swapping or compressing\n");
|
||||||
}
|
}
|
||||||
fprintf(stderr, " --mtest compute maximum memory usage\n");
|
fprintf(stderr, " --mtest compute maximum memory usage\n");
|
||||||
|
fprintf(stderr, " --verbose-prompt print prompt before generation\n");
|
||||||
fprintf(stderr, " -m FNAME, --model FNAME\n");
|
fprintf(stderr, " -m FNAME, --model FNAME\n");
|
||||||
fprintf(stderr, " model path (default: %s)\n", params.model.c_str());
|
fprintf(stderr, " model path (default: %s)\n", params.model.c_str());
|
||||||
fprintf(stderr, "\n");
|
fprintf(stderr, "\n");
|
||||||
|
1
utils.h
1
utils.h
@ -48,6 +48,7 @@ struct gpt_params {
|
|||||||
bool perplexity = false; // compute perplexity over the prompt
|
bool perplexity = false; // compute perplexity over the prompt
|
||||||
bool use_mlock = false; // use mlock to keep model in memory
|
bool use_mlock = false; // use mlock to keep model in memory
|
||||||
bool mem_test = false; // compute maximum memory usage
|
bool mem_test = false; // compute maximum memory usage
|
||||||
|
bool verbose_prompt = false; // print prompt tokens before generation
|
||||||
};
|
};
|
||||||
|
|
||||||
bool gpt_params_parse(int argc, char ** argv, gpt_params & params);
|
bool gpt_params_parse(int argc, char ** argv, gpt_params & params);
|
||||||
|
Loading…
Reference in New Issue
Block a user