mirror of
https://github.com/ggerganov/llama.cpp.git
synced 2024-12-28 15:18:26 +01:00
server : add system_fingerprint to chat/completion (#10917)
* server : add system_fingerprint to chat/completion * update README
This commit is contained in:
parent
86bf31cfe6
commit
485dc01214
@ -724,7 +724,8 @@ This endpoint is public (no API key check). By default, it is read-only. To make
|
|||||||
},
|
},
|
||||||
"total_slots": 1,
|
"total_slots": 1,
|
||||||
"model_path": "../models/Meta-Llama-3.1-8B-Instruct-Q4_K_M.gguf",
|
"model_path": "../models/Meta-Llama-3.1-8B-Instruct-Q4_K_M.gguf",
|
||||||
"chat_template": "..."
|
"chat_template": "...",
|
||||||
|
"build_info": "b(build number)-(build commit hash)"
|
||||||
}
|
}
|
||||||
```
|
```
|
||||||
|
|
||||||
|
@ -598,6 +598,7 @@ struct server_task_result_cmpl_final : server_task_result {
|
|||||||
{"choices", json::array({choice})},
|
{"choices", json::array({choice})},
|
||||||
{"created", t},
|
{"created", t},
|
||||||
{"model", oaicompat_model},
|
{"model", oaicompat_model},
|
||||||
|
{"system_fingerprint", build_info},
|
||||||
{"object", "chat.completion"},
|
{"object", "chat.completion"},
|
||||||
{"usage", json {
|
{"usage", json {
|
||||||
{"completion_tokens", n_decoded},
|
{"completion_tokens", n_decoded},
|
||||||
@ -636,6 +637,7 @@ struct server_task_result_cmpl_final : server_task_result {
|
|||||||
{"created", t},
|
{"created", t},
|
||||||
{"id", oaicompat_cmpl_id},
|
{"id", oaicompat_cmpl_id},
|
||||||
{"model", oaicompat_model},
|
{"model", oaicompat_model},
|
||||||
|
{"system_fingerprint", build_info},
|
||||||
{"object", "chat.completion.chunk"},
|
{"object", "chat.completion.chunk"},
|
||||||
{"usage", json {
|
{"usage", json {
|
||||||
{"completion_tokens", n_decoded},
|
{"completion_tokens", n_decoded},
|
||||||
@ -765,6 +767,7 @@ struct server_task_result_cmpl_partial : server_task_result {
|
|||||||
{"created", t},
|
{"created", t},
|
||||||
{"id", oaicompat_cmpl_id},
|
{"id", oaicompat_cmpl_id},
|
||||||
{"model", oaicompat_model},
|
{"model", oaicompat_model},
|
||||||
|
{"system_fingerprint", build_info},
|
||||||
{"object", "chat.completion.chunk"}
|
{"object", "chat.completion.chunk"}
|
||||||
};
|
};
|
||||||
|
|
||||||
@ -3476,6 +3479,7 @@ int main(int argc, char ** argv) {
|
|||||||
{ "total_slots", ctx_server.params_base.n_parallel },
|
{ "total_slots", ctx_server.params_base.n_parallel },
|
||||||
{ "model_path", ctx_server.params_base.model },
|
{ "model_path", ctx_server.params_base.model },
|
||||||
{ "chat_template", llama_get_chat_template(ctx_server.model) },
|
{ "chat_template", llama_get_chat_template(ctx_server.model) },
|
||||||
|
{ "build_info", build_info },
|
||||||
};
|
};
|
||||||
|
|
||||||
res_ok(res, data);
|
res_ok(res, data);
|
||||||
|
@ -31,6 +31,7 @@ def test_chat_completion(model, system_prompt, user_prompt, max_tokens, re_conte
|
|||||||
})
|
})
|
||||||
assert res.status_code == 200
|
assert res.status_code == 200
|
||||||
assert "cmpl" in res.body["id"] # make sure the completion id has the expected format
|
assert "cmpl" in res.body["id"] # make sure the completion id has the expected format
|
||||||
|
assert res.body["system_fingerprint"].startswith("b")
|
||||||
assert res.body["model"] == model if model is not None else server.model_alias
|
assert res.body["model"] == model if model is not None else server.model_alias
|
||||||
assert res.body["usage"]["prompt_tokens"] == n_prompt
|
assert res.body["usage"]["prompt_tokens"] == n_prompt
|
||||||
assert res.body["usage"]["completion_tokens"] == n_predicted
|
assert res.body["usage"]["completion_tokens"] == n_predicted
|
||||||
@ -63,6 +64,7 @@ def test_chat_completion_stream(system_prompt, user_prompt, max_tokens, re_conte
|
|||||||
last_cmpl_id = None
|
last_cmpl_id = None
|
||||||
for data in res:
|
for data in res:
|
||||||
choice = data["choices"][0]
|
choice = data["choices"][0]
|
||||||
|
assert data["system_fingerprint"].startswith("b")
|
||||||
assert "gpt-3.5" in data["model"] # DEFAULT_OAICOMPAT_MODEL, maybe changed in the future
|
assert "gpt-3.5" in data["model"] # DEFAULT_OAICOMPAT_MODEL, maybe changed in the future
|
||||||
if last_cmpl_id is None:
|
if last_cmpl_id is None:
|
||||||
last_cmpl_id = data["id"]
|
last_cmpl_id = data["id"]
|
||||||
@ -92,6 +94,7 @@ def test_chat_completion_with_openai_library():
|
|||||||
seed=42,
|
seed=42,
|
||||||
temperature=0.8,
|
temperature=0.8,
|
||||||
)
|
)
|
||||||
|
assert res.system_fingerprint is not None and res.system_fingerprint.startswith("b")
|
||||||
assert res.choices[0].finish_reason == "length"
|
assert res.choices[0].finish_reason == "length"
|
||||||
assert res.choices[0].message.content is not None
|
assert res.choices[0].message.content is not None
|
||||||
assert match_regex("(Suddenly)+", res.choices[0].message.content)
|
assert match_regex("(Suddenly)+", res.choices[0].message.content)
|
||||||
|
@ -56,6 +56,8 @@ static T json_value(const json & body, const std::string & key, const T & defaul
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
const static std::string build_info("b" + std::to_string(LLAMA_BUILD_NUMBER) + "-" + LLAMA_COMMIT);
|
||||||
|
|
||||||
//
|
//
|
||||||
// tokenizer and input processing utils
|
// tokenizer and input processing utils
|
||||||
//
|
//
|
||||||
|
Loading…
Reference in New Issue
Block a user