From a3c1232c3f475f0a77b9cc5225516ac31c567a06 Mon Sep 17 00:00:00 2001 From: Georgi Gerganov Date: Wed, 8 Jan 2025 12:55:36 +0200 Subject: [PATCH] arg : option to exclude arguments from specific examples (#11136) * arg : option to exclude arguments from specific examples ggml-ci * readme : remove old args [no ci] --- common/arg.cpp | 17 +++++++++++++---- common/arg.h | 3 +++ examples/server/README.md | 3 --- 3 files changed, 16 insertions(+), 7 deletions(-) diff --git a/common/arg.cpp b/common/arg.cpp index c81b15217..27886b84e 100644 --- a/common/arg.cpp +++ b/common/arg.cpp @@ -22,6 +22,11 @@ common_arg & common_arg::set_examples(std::initializer_list return *this; } +common_arg & common_arg::set_excludes(std::initializer_list excludes) { + this->excludes = std::move(excludes); + return *this; +} + common_arg & common_arg::set_env(const char * env) { help = help + "\n(env: " + env + ")"; this->env = env; @@ -37,6 +42,10 @@ bool common_arg::in_example(enum llama_example ex) { return examples.find(ex) != examples.end(); } +bool common_arg::is_exclude(enum llama_example ex) { + return excludes.find(ex) != excludes.end(); +} + bool common_arg::get_value_from_env(std::string & output) { if (env == nullptr) return false; char * value = std::getenv(env); @@ -420,7 +429,7 @@ common_params_context common_params_parser_init(common_params & params, llama_ex * - if both {LLAMA_EXAMPLE_COMMON, LLAMA_EXAMPLE_*,} are set, we will prioritize the LLAMA_EXAMPLE_* matching current example */ auto add_opt = [&](common_arg arg) { - if (arg.in_example(ex) || arg.in_example(LLAMA_EXAMPLE_COMMON)) { + if ((arg.in_example(ex) || arg.in_example(LLAMA_EXAMPLE_COMMON)) && !arg.is_exclude(ex)) { ctx_arg.options.push_back(std::move(arg)); } }; @@ -649,7 +658,7 @@ common_params_context common_params_parser_init(common_params & params, llama_ex [](common_params & params, const std::string & value) { params.prompt = value; } - )); + ).set_excludes({LLAMA_EXAMPLE_SERVER})); add_opt(common_arg( {"--no-perf"}, string_format("disable internal libllama performance timings (default: %s)", params.no_perf ? "true" : "false"), @@ -673,7 +682,7 @@ common_params_context common_params_parser_init(common_params & params, llama_ex params.prompt.pop_back(); } } - )); + ).set_excludes({LLAMA_EXAMPLE_SERVER})); add_opt(common_arg( {"--in-file"}, "FNAME", "an input file (repeat to specify multiple files)", @@ -700,7 +709,7 @@ common_params_context common_params_parser_init(common_params & params, llama_ex params.prompt = ss.str(); fprintf(stderr, "Read %zu bytes from binary file %s\n", params.prompt.size(), value.c_str()); } - )); + ).set_excludes({LLAMA_EXAMPLE_SERVER})); add_opt(common_arg( {"-e", "--escape"}, string_format("process escapes sequences (\\n, \\r, \\t, \\', \\\", \\\\) (default: %s)", params.escape ? "true" : "false"), diff --git a/common/arg.h b/common/arg.h index a6700d323..49ab8667b 100644 --- a/common/arg.h +++ b/common/arg.h @@ -12,6 +12,7 @@ struct common_arg { std::set examples = {LLAMA_EXAMPLE_COMMON}; + std::set excludes = {}; std::vector args; const char * value_hint = nullptr; // help text or example for arg value const char * value_hint_2 = nullptr; // for second arg value @@ -53,9 +54,11 @@ struct common_arg { ) : args(args), value_hint(value_hint), value_hint_2(value_hint_2), help(help), handler_str_str(handler) {} common_arg & set_examples(std::initializer_list examples); + common_arg & set_excludes(std::initializer_list excludes); common_arg & set_env(const char * env); common_arg & set_sparam(); bool in_example(enum llama_example ex); + bool is_exclude(enum llama_example ex); bool get_value_from_env(std::string & output); bool has_value_from_env(); std::string to_string(); diff --git a/examples/server/README.md b/examples/server/README.md index 3ce16945a..1f0a27d96 100644 --- a/examples/server/README.md +++ b/examples/server/README.md @@ -45,10 +45,7 @@ The project is under active development, and we are [looking for feedback and co | `-ub, --ubatch-size N` | physical maximum batch size (default: 512)
(env: LLAMA_ARG_UBATCH) | | `--keep N` | number of tokens to keep from the initial prompt (default: 0, -1 = all) | | `-fa, --flash-attn` | enable Flash Attention (default: disabled)
(env: LLAMA_ARG_FLASH_ATTN) | -| `-p, --prompt PROMPT` | prompt to start generation with | | `--no-perf` | disable internal libllama performance timings (default: false)
(env: LLAMA_ARG_NO_PERF) | -| `-f, --file FNAME` | a file containing the prompt (default: none) | -| `-bf, --binary-file FNAME` | binary file containing the prompt (default: none) | | `-e, --escape` | process escapes sequences (\n, \r, \t, \', \", \\) (default: true) | | `--no-escape` | do not process escape sequences | | `--rope-scaling {none,linear,yarn}` | RoPE frequency scaling method, defaults to linear unless specified by the model
(env: LLAMA_ARG_ROPE_SCALING_TYPE) |