From 5d7002d4372ebf107cfaf46fcd90df27b204f330 Mon Sep 17 00:00:00 2001 From: minarchist Date: Tue, 2 Jan 2024 04:38:15 -0600 Subject: [PATCH] server : add --override-kv parameter (#4710) * Changes to server to allow metadata override * documentation * flake.nix: expose full scope in legacyPackages * flake.nix: rocm not yet supported on aarch64, so hide the output * flake.nix: expose checks * workflows: nix-ci: init; build flake outputs * workflows: nix-ci: add a job for eval * workflows: weekly `nix flake update` * workflows: nix-flakestry: drop tag filters ...and add a job for flakehub.com * workflows: nix-ci: add a qemu job for jetsons * flake.nix: suggest the binary caches * flake.lock: update to a commit recently cached by nixpkgs-cuda-ci --------- Co-authored-by: John Co-authored-by: Someone Serge --- examples/server/server.cpp | 51 ++++++++++++++++++++++++++++++++++++++ 1 file changed, 51 insertions(+) diff --git a/examples/server/server.cpp b/examples/server/server.cpp index 52d9b9768..b77d3f079 100644 --- a/examples/server/server.cpp +++ b/examples/server/server.cpp @@ -2016,6 +2016,10 @@ static void server_print_usage(const char *argv0, const gpt_params ¶ms, printf(" --mmproj MMPROJ_FILE path to a multimodal projector file for LLaVA.\n"); printf(" --log-disable disables logging to a file.\n"); printf("\n"); + printf(" --override-kv KEY=TYPE:VALUE\n"); + printf(" advanced option to override model metadata by key. may be specified multiple times.\n"); + printf(" types: int, float, bool. example: --override-kv tokenizer.ggml.add_bos_token=bool:false\n"); + printf("\n"); } static void server_params_parse(int argc, char **argv, server_params &sparams, @@ -2379,6 +2383,49 @@ static void server_params_parse(int argc, char **argv, server_params &sparams, log_set_target(stdout); LOG_INFO("logging to file is disabled.", {}); } + else if (arg == "--override-kv") + { + if (++i >= argc) { + invalid_param = true; + break; + } + char * sep = strchr(argv[i], '='); + if (sep == nullptr || sep - argv[i] >= 128) { + fprintf(stderr, "error: Malformed KV override: %s\n", argv[i]); + invalid_param = true; + break; + } + struct llama_model_kv_override kvo; + std::strncpy(kvo.key, argv[i], sep - argv[i]); + kvo.key[sep - argv[i]] = 0; + sep++; + if (strncmp(sep, "int:", 4) == 0) { + sep += 4; + kvo.tag = LLAMA_KV_OVERRIDE_INT; + kvo.int_value = std::atol(sep); + } else if (strncmp(sep, "float:", 6) == 0) { + sep += 6; + kvo.tag = LLAMA_KV_OVERRIDE_FLOAT; + kvo.float_value = std::atof(sep); + } else if (strncmp(sep, "bool:", 5) == 0) { + sep += 5; + kvo.tag = LLAMA_KV_OVERRIDE_BOOL; + if (std::strcmp(sep, "true") == 0) { + kvo.bool_value = true; + } else if (std::strcmp(sep, "false") == 0) { + kvo.bool_value = false; + } else { + fprintf(stderr, "error: Invalid boolean value for KV override: %s\n", argv[i]); + invalid_param = true; + break; + } + } else { + fprintf(stderr, "error: Invalid type for KV override: %s\n", argv[i]); + invalid_param = true; + break; + } + params.kv_overrides.push_back(kvo); + } else { fprintf(stderr, "error: unknown argument: %s\n", arg.c_str()); @@ -2386,6 +2433,10 @@ static void server_params_parse(int argc, char **argv, server_params &sparams, exit(1); } } + if (!params.kv_overrides.empty()) { + params.kv_overrides.emplace_back(llama_model_kv_override()); + params.kv_overrides.back().key[0] = 0; + } if (invalid_param) {