mirror of
https://github.com/ggerganov/llama.cpp.git
synced 2025-01-08 11:46:53 +01:00
b782e5c7d4
* server : add split model test * add test speculative * add invalid cases
58 lines
1.8 KiB
Python
58 lines
1.8 KiB
Python
import pytest
|
|
from utils import *
|
|
|
|
server = ServerPreset.tinyllama_infill()
|
|
|
|
@pytest.fixture(scope="module", autouse=True)
|
|
def create_server():
|
|
global server
|
|
server = ServerPreset.tinyllama_infill()
|
|
|
|
|
|
def test_infill_without_input_extra():
|
|
global server
|
|
server.start()
|
|
res = server.make_request("POST", "/infill", data={
|
|
"prompt": "Complete this",
|
|
"input_prefix": "#include <cstdio>\n#include \"llama.h\"\n\nint main() {\n int n_threads = llama_",
|
|
"input_suffix": "}\n",
|
|
})
|
|
assert res.status_code == 200
|
|
assert match_regex("(One|day|she|saw|big|scary|bird)+", res.body["content"])
|
|
|
|
|
|
def test_infill_with_input_extra():
|
|
global server
|
|
server.start()
|
|
res = server.make_request("POST", "/infill", data={
|
|
"prompt": "Complete this",
|
|
"input_extra": [{
|
|
"filename": "llama.h",
|
|
"text": "LLAMA_API int32_t llama_n_threads();\n"
|
|
}],
|
|
"input_prefix": "#include <cstdio>\n#include \"llama.h\"\n\nint main() {\n int n_threads = llama_",
|
|
"input_suffix": "}\n",
|
|
})
|
|
assert res.status_code == 200
|
|
assert match_regex("(cuts|Jimmy|mom|came|into|the|room)+", res.body["content"])
|
|
|
|
|
|
@pytest.mark.parametrize("input_extra", [
|
|
{},
|
|
{"filename": "ok"},
|
|
{"filename": 123},
|
|
{"filename": 123, "text": "abc"},
|
|
{"filename": 123, "text": 456},
|
|
])
|
|
def test_invalid_input_extra_req(input_extra):
|
|
global server
|
|
server.start()
|
|
res = server.make_request("POST", "/infill", data={
|
|
"prompt": "Complete this",
|
|
"input_extra": [input_extra],
|
|
"input_prefix": "#include <cstdio>\n#include \"llama.h\"\n\nint main() {\n int n_threads = llama_",
|
|
"input_suffix": "}\n",
|
|
})
|
|
assert res.status_code == 400
|
|
assert "error" in res.body
|