llama.cpp/examples/server/tests/unit/test_infill.py

import pytest
from utils import *

server = ServerPreset.tinyllama_infill()

@pytest.fixture(scope="module", autouse=True)
def create_server():
    global server
    server = ServerPreset.tinyllama_infill()

def test_infill_without_input_extra():
    global server
    server.start()
    res = server.make_request("POST", "/infill", data={
        "prompt": "Complete this",
        "input_prefix": "#include <cstdio>\n#include \"llama.h\"\n\nint main() {\n    int n_threads = llama_",
        "input_suffix": "}\n",
    })
    assert res.status_code == 200
    assert match_regex("(One|day|she|saw|big|scary|bird)+", res.body["content"])

def test_infill_with_input_extra():
    global server
    server.start()
    res = server.make_request("POST", "/infill", data={
        "prompt": "Complete this",
        "input_extra": [{
            "filename": "llama.h",
            "text": "LLAMA_API int32_t llama_n_threads();\n"
        }],
        "input_prefix": "#include <cstdio>\n#include \"llama.h\"\n\nint main() {\n    int n_threads = llama_",
        "input_suffix": "}\n",
    })
    assert res.status_code == 200
    assert match_regex("(cuts|Jimmy|mom|came|into|the|room)+", res.body["content"])
server : replace behave with pytest (#10416) * server : replace behave with pytest * fix test on windows * misc * add more tests * more tests * styling * log less, fix embd test * added all sequential tests * fix coding style * fix save slot test * add parallel completion test * fix parallel test * remove feature files * update test docs * no cache_prompt for some tests * add test_cache_vs_nocache_prompt 2024-11-26 16:20:18 +01:00			`import pytest`
			`from utils import *`

			`server = ServerPreset.tinyllama_infill()`

			`@pytest.fixture(scope="module", autouse=True)`
			`def create_server():`
			`global server`
			`server = ServerPreset.tinyllama_infill()`

			`def test_infill_without_input_extra():`
			`global server`
			`server.start()`
			`res = server.make_request("POST", "/infill", data={`
			`"prompt": "Complete this",`
			`"input_prefix": "#include <cstdio>\n#include \"llama.h\"\n\nint main() {\n int n_threads = llama_",`
			`"input_suffix": "}\n",`
			`})`
			`assert res.status_code == 200`
			`assert match_regex("(One\|day\|she\|saw\|big\|scary\|bird)+", res.body["content"])`

			`def test_infill_with_input_extra():`
			`global server`
			`server.start()`
			`res = server.make_request("POST", "/infill", data={`
			`"prompt": "Complete this",`
			`"input_extra": [{`
			`"filename": "llama.h",`
			`"text": "LLAMA_API int32_t llama_n_threads();\n"`
			`}],`
			`"input_prefix": "#include <cstdio>\n#include \"llama.h\"\n\nint main() {\n int n_threads = llama_",`
			`"input_suffix": "}\n",`
			`})`
			`assert res.status_code == 200`
			`assert match_regex("(cuts\|Jimmy\|mom\|came\|into\|the\|room)+", res.body["content"])`