mirror of
https://github.com/ggerganov/llama.cpp.git
synced 2024-12-26 14:20:31 +01:00
server : add system_fingerprint to chat/completion (#10917)
* server : add system_fingerprint to chat/completion * update README
This commit is contained in:
parent
86bf31cfe6
commit
485dc01214
@ -724,7 +724,8 @@ This endpoint is public (no API key check). By default, it is read-only. To make
|
||||
},
|
||||
"total_slots": 1,
|
||||
"model_path": "../models/Meta-Llama-3.1-8B-Instruct-Q4_K_M.gguf",
|
||||
"chat_template": "..."
|
||||
"chat_template": "...",
|
||||
"build_info": "b(build number)-(build commit hash)"
|
||||
}
|
||||
```
|
||||
|
||||
|
@ -595,10 +595,11 @@ struct server_task_result_cmpl_final : server_task_result {
|
||||
std::time_t t = std::time(0);
|
||||
|
||||
json res = json {
|
||||
{"choices", json::array({choice})},
|
||||
{"created", t},
|
||||
{"model", oaicompat_model},
|
||||
{"object", "chat.completion"},
|
||||
{"choices", json::array({choice})},
|
||||
{"created", t},
|
||||
{"model", oaicompat_model},
|
||||
{"system_fingerprint", build_info},
|
||||
{"object", "chat.completion"},
|
||||
{"usage", json {
|
||||
{"completion_tokens", n_decoded},
|
||||
{"prompt_tokens", n_prompt_tokens},
|
||||
@ -632,11 +633,12 @@ struct server_task_result_cmpl_final : server_task_result {
|
||||
};
|
||||
|
||||
json ret = json {
|
||||
{"choices", json::array({choice})},
|
||||
{"created", t},
|
||||
{"id", oaicompat_cmpl_id},
|
||||
{"model", oaicompat_model},
|
||||
{"object", "chat.completion.chunk"},
|
||||
{"choices", json::array({choice})},
|
||||
{"created", t},
|
||||
{"id", oaicompat_cmpl_id},
|
||||
{"model", oaicompat_model},
|
||||
{"system_fingerprint", build_info},
|
||||
{"object", "chat.completion.chunk"},
|
||||
{"usage", json {
|
||||
{"completion_tokens", n_decoded},
|
||||
{"prompt_tokens", n_prompt_tokens},
|
||||
@ -761,11 +763,12 @@ struct server_task_result_cmpl_partial : server_task_result {
|
||||
}
|
||||
|
||||
json ret = json {
|
||||
{"choices", choices},
|
||||
{"created", t},
|
||||
{"id", oaicompat_cmpl_id},
|
||||
{"model", oaicompat_model},
|
||||
{"object", "chat.completion.chunk"}
|
||||
{"choices", choices},
|
||||
{"created", t},
|
||||
{"id", oaicompat_cmpl_id},
|
||||
{"model", oaicompat_model},
|
||||
{"system_fingerprint", build_info},
|
||||
{"object", "chat.completion.chunk"}
|
||||
};
|
||||
|
||||
if (timings.prompt_n >= 0) {
|
||||
@ -3476,6 +3479,7 @@ int main(int argc, char ** argv) {
|
||||
{ "total_slots", ctx_server.params_base.n_parallel },
|
||||
{ "model_path", ctx_server.params_base.model },
|
||||
{ "chat_template", llama_get_chat_template(ctx_server.model) },
|
||||
{ "build_info", build_info },
|
||||
};
|
||||
|
||||
res_ok(res, data);
|
||||
|
@ -31,6 +31,7 @@ def test_chat_completion(model, system_prompt, user_prompt, max_tokens, re_conte
|
||||
})
|
||||
assert res.status_code == 200
|
||||
assert "cmpl" in res.body["id"] # make sure the completion id has the expected format
|
||||
assert res.body["system_fingerprint"].startswith("b")
|
||||
assert res.body["model"] == model if model is not None else server.model_alias
|
||||
assert res.body["usage"]["prompt_tokens"] == n_prompt
|
||||
assert res.body["usage"]["completion_tokens"] == n_predicted
|
||||
@ -63,6 +64,7 @@ def test_chat_completion_stream(system_prompt, user_prompt, max_tokens, re_conte
|
||||
last_cmpl_id = None
|
||||
for data in res:
|
||||
choice = data["choices"][0]
|
||||
assert data["system_fingerprint"].startswith("b")
|
||||
assert "gpt-3.5" in data["model"] # DEFAULT_OAICOMPAT_MODEL, maybe changed in the future
|
||||
if last_cmpl_id is None:
|
||||
last_cmpl_id = data["id"]
|
||||
@ -92,6 +94,7 @@ def test_chat_completion_with_openai_library():
|
||||
seed=42,
|
||||
temperature=0.8,
|
||||
)
|
||||
assert res.system_fingerprint is not None and res.system_fingerprint.startswith("b")
|
||||
assert res.choices[0].finish_reason == "length"
|
||||
assert res.choices[0].message.content is not None
|
||||
assert match_regex("(Suddenly)+", res.choices[0].message.content)
|
||||
|
@ -56,6 +56,8 @@ static T json_value(const json & body, const std::string & key, const T & defaul
|
||||
}
|
||||
}
|
||||
|
||||
const static std::string build_info("b" + std::to_string(LLAMA_BUILD_NUMBER) + "-" + LLAMA_COMMIT);
|
||||
|
||||
//
|
||||
// tokenizer and input processing utils
|
||||
//
|
||||
|
Loading…
Reference in New Issue
Block a user