server : add system_fingerprint to chat/completion (#10917)

* server : add system_fingerprint to chat/completion

* update README
This commit is contained in:
Xuan Son Nguyen 2024-12-23 12:02:44 +01:00 committed by GitHub
parent 86bf31cfe6
commit 485dc01214
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
4 changed files with 25 additions and 15 deletions

View File

@ -724,7 +724,8 @@ This endpoint is public (no API key check). By default, it is read-only. To make
}, },
"total_slots": 1, "total_slots": 1,
"model_path": "../models/Meta-Llama-3.1-8B-Instruct-Q4_K_M.gguf", "model_path": "../models/Meta-Llama-3.1-8B-Instruct-Q4_K_M.gguf",
"chat_template": "..." "chat_template": "...",
"build_info": "b(build number)-(build commit hash)"
} }
``` ```

View File

@ -595,10 +595,11 @@ struct server_task_result_cmpl_final : server_task_result {
std::time_t t = std::time(0); std::time_t t = std::time(0);
json res = json { json res = json {
{"choices", json::array({choice})}, {"choices", json::array({choice})},
{"created", t}, {"created", t},
{"model", oaicompat_model}, {"model", oaicompat_model},
{"object", "chat.completion"}, {"system_fingerprint", build_info},
{"object", "chat.completion"},
{"usage", json { {"usage", json {
{"completion_tokens", n_decoded}, {"completion_tokens", n_decoded},
{"prompt_tokens", n_prompt_tokens}, {"prompt_tokens", n_prompt_tokens},
@ -632,11 +633,12 @@ struct server_task_result_cmpl_final : server_task_result {
}; };
json ret = json { json ret = json {
{"choices", json::array({choice})}, {"choices", json::array({choice})},
{"created", t}, {"created", t},
{"id", oaicompat_cmpl_id}, {"id", oaicompat_cmpl_id},
{"model", oaicompat_model}, {"model", oaicompat_model},
{"object", "chat.completion.chunk"}, {"system_fingerprint", build_info},
{"object", "chat.completion.chunk"},
{"usage", json { {"usage", json {
{"completion_tokens", n_decoded}, {"completion_tokens", n_decoded},
{"prompt_tokens", n_prompt_tokens}, {"prompt_tokens", n_prompt_tokens},
@ -761,11 +763,12 @@ struct server_task_result_cmpl_partial : server_task_result {
} }
json ret = json { json ret = json {
{"choices", choices}, {"choices", choices},
{"created", t}, {"created", t},
{"id", oaicompat_cmpl_id}, {"id", oaicompat_cmpl_id},
{"model", oaicompat_model}, {"model", oaicompat_model},
{"object", "chat.completion.chunk"} {"system_fingerprint", build_info},
{"object", "chat.completion.chunk"}
}; };
if (timings.prompt_n >= 0) { if (timings.prompt_n >= 0) {
@ -3476,6 +3479,7 @@ int main(int argc, char ** argv) {
{ "total_slots", ctx_server.params_base.n_parallel }, { "total_slots", ctx_server.params_base.n_parallel },
{ "model_path", ctx_server.params_base.model }, { "model_path", ctx_server.params_base.model },
{ "chat_template", llama_get_chat_template(ctx_server.model) }, { "chat_template", llama_get_chat_template(ctx_server.model) },
{ "build_info", build_info },
}; };
res_ok(res, data); res_ok(res, data);

View File

@ -31,6 +31,7 @@ def test_chat_completion(model, system_prompt, user_prompt, max_tokens, re_conte
}) })
assert res.status_code == 200 assert res.status_code == 200
assert "cmpl" in res.body["id"] # make sure the completion id has the expected format assert "cmpl" in res.body["id"] # make sure the completion id has the expected format
assert res.body["system_fingerprint"].startswith("b")
assert res.body["model"] == model if model is not None else server.model_alias assert res.body["model"] == model if model is not None else server.model_alias
assert res.body["usage"]["prompt_tokens"] == n_prompt assert res.body["usage"]["prompt_tokens"] == n_prompt
assert res.body["usage"]["completion_tokens"] == n_predicted assert res.body["usage"]["completion_tokens"] == n_predicted
@ -63,6 +64,7 @@ def test_chat_completion_stream(system_prompt, user_prompt, max_tokens, re_conte
last_cmpl_id = None last_cmpl_id = None
for data in res: for data in res:
choice = data["choices"][0] choice = data["choices"][0]
assert data["system_fingerprint"].startswith("b")
assert "gpt-3.5" in data["model"] # DEFAULT_OAICOMPAT_MODEL, maybe changed in the future assert "gpt-3.5" in data["model"] # DEFAULT_OAICOMPAT_MODEL, maybe changed in the future
if last_cmpl_id is None: if last_cmpl_id is None:
last_cmpl_id = data["id"] last_cmpl_id = data["id"]
@ -92,6 +94,7 @@ def test_chat_completion_with_openai_library():
seed=42, seed=42,
temperature=0.8, temperature=0.8,
) )
assert res.system_fingerprint is not None and res.system_fingerprint.startswith("b")
assert res.choices[0].finish_reason == "length" assert res.choices[0].finish_reason == "length"
assert res.choices[0].message.content is not None assert res.choices[0].message.content is not None
assert match_regex("(Suddenly)+", res.choices[0].message.content) assert match_regex("(Suddenly)+", res.choices[0].message.content)

View File

@ -56,6 +56,8 @@ static T json_value(const json & body, const std::string & key, const T & defaul
} }
} }
const static std::string build_info("b" + std::to_string(LLAMA_BUILD_NUMBER) + "-" + LLAMA_COMMIT);
// //
// tokenizer and input processing utils // tokenizer and input processing utils
// //