mirror of
https://github.com/ggerganov/llama.cpp.git
synced 2025-02-04 15:43:53 +01:00
server : add tests
This commit is contained in:
parent
b436edaad9
commit
81611bef72
@ -82,6 +82,37 @@ def test_different_draft_min_draft_max():
|
|||||||
last_content = res.body["content"]
|
last_content = res.body["content"]
|
||||||
|
|
||||||
|
|
||||||
|
def test_slot_ctx_not_exceeded():
|
||||||
|
global server
|
||||||
|
server.n_ctx = 64
|
||||||
|
server.start()
|
||||||
|
res = server.make_request("POST", "/completion", data={
|
||||||
|
"prompt": "Hello " * 56,
|
||||||
|
"temperature": 0.0,
|
||||||
|
"top_k": 1,
|
||||||
|
"speculative.p_min": 0.0,
|
||||||
|
})
|
||||||
|
assert res.status_code == 200
|
||||||
|
assert len(res.body["content"]) > 0
|
||||||
|
|
||||||
|
|
||||||
|
def test_with_ctx_shift():
|
||||||
|
global server
|
||||||
|
server.n_ctx = 64
|
||||||
|
server.start()
|
||||||
|
res = server.make_request("POST", "/completion", data={
|
||||||
|
"prompt": "Hello " * 56,
|
||||||
|
"temperature": 0.0,
|
||||||
|
"top_k": 1,
|
||||||
|
"n_predict": 64,
|
||||||
|
"speculative.p_min": 0.0,
|
||||||
|
})
|
||||||
|
assert res.status_code == 200
|
||||||
|
assert len(res.body["content"]) > 0
|
||||||
|
assert res.body["tokens_predicted"] == 64
|
||||||
|
assert res.body["truncated"] == True
|
||||||
|
|
||||||
|
|
||||||
@pytest.mark.parametrize("n_slots,n_requests", [
|
@pytest.mark.parametrize("n_slots,n_requests", [
|
||||||
(1, 2),
|
(1, 2),
|
||||||
(2, 2),
|
(2, 2),
|
||||||
|
Loading…
Reference in New Issue
Block a user