server : test script : add timeout for all requests (#9282)

This commit is contained in:
Xuan Son Nguyen 2024-09-02 22:08:38 +02:00 committed by GitHub
parent f1485161e5
commit 48baa61ecc
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
2 changed files with 20 additions and 20 deletions

View File

@ -52,8 +52,8 @@ Feature: Parallel
Then all prompts are predicted with <n_predict> tokens Then all prompts are predicted with <n_predict> tokens
Examples: Examples:
| streaming | n_predict | | streaming | n_predict |
| disabled | 200 | | disabled | 128 |
| enabled | 200 | | enabled | 64 |
Scenario Outline: Multi users OAI completions compatibility no v1 Scenario Outline: Multi users OAI completions compatibility no v1
Given a system prompt You are a writer. Given a system prompt You are a writer.

View File

@ -23,6 +23,8 @@ from prometheus_client import parser
# pyright: reportRedeclaration=false # pyright: reportRedeclaration=false
DEFAULT_TIMEOUT_SECONDS = aiohttp.ClientTimeout(total=600)
@step("a server listening on {server_fqdn}:{server_port}") @step("a server listening on {server_fqdn}:{server_port}")
def step_server_config(context, server_fqdn: str, server_port: str): def step_server_config(context, server_fqdn: str, server_port: str):
context.server_fqdn = server_fqdn context.server_fqdn = server_fqdn
@ -689,7 +691,7 @@ def step_tokenize_set_add_special(context):
@async_run_until_complete @async_run_until_complete
async def step_tokenize(context): async def step_tokenize(context):
context.tokenized_text = context_text(context) context.tokenized_text = context_text(context)
async with aiohttp.ClientSession() as session: async with aiohttp.ClientSession(timeout=DEFAULT_TIMEOUT_SECONDS) as session:
tokenize_args = { tokenize_args = {
"content": context.tokenized_text, "content": context.tokenized_text,
} }
@ -706,7 +708,7 @@ async def step_tokenize(context):
@async_run_until_complete @async_run_until_complete
async def step_detokenize(context): async def step_detokenize(context):
assert len(context.tokens) > 0 assert len(context.tokens) > 0
async with aiohttp.ClientSession() as session: async with aiohttp.ClientSession(timeout=DEFAULT_TIMEOUT_SECONDS) as session:
async with session.post(f'{context.base_url}/detokenize', async with session.post(f'{context.base_url}/detokenize',
json={ json={
"tokens": context.tokens, "tokens": context.tokens,
@ -735,7 +737,7 @@ def step_strings_for_tokenization(context):
@step('an OPTIONS request is sent from {origin}') @step('an OPTIONS request is sent from {origin}')
@async_run_until_complete @async_run_until_complete
async def step_options_request(context, origin): async def step_options_request(context, origin):
async with aiohttp.ClientSession() as session: async with aiohttp.ClientSession(timeout=DEFAULT_TIMEOUT_SECONDS) as session:
headers = {'Authorization': f'Bearer {context.user_api_key}', 'Origin': origin} headers = {'Authorization': f'Bearer {context.user_api_key}', 'Origin': origin}
async with session.options(f'{context.base_url}/v1/chat/completions', async with session.options(f'{context.base_url}/v1/chat/completions',
headers=headers) as response: headers=headers) as response:
@ -751,7 +753,7 @@ def step_check_options_header_value(context, cors_header, cors_header_value):
@step('prometheus metrics are exposed') @step('prometheus metrics are exposed')
@async_run_until_complete @async_run_until_complete
async def step_prometheus_metrics_exported(context): async def step_prometheus_metrics_exported(context):
async with aiohttp.ClientSession() as session: async with aiohttp.ClientSession(timeout=DEFAULT_TIMEOUT_SECONDS) as session:
async with await session.get(f'{context.base_url}/metrics') as metrics_response: async with await session.get(f'{context.base_url}/metrics') as metrics_response:
assert metrics_response.status == 200 assert metrics_response.status == 200
assert metrics_response.headers['Content-Type'] == "text/plain; version=0.0.4" assert metrics_response.headers['Content-Type'] == "text/plain; version=0.0.4"
@ -824,7 +826,7 @@ async def concurrent_requests(context, f_completion, *args, **kwargs):
@step('the slot {slot_id:d} is saved with filename "{filename}"') @step('the slot {slot_id:d} is saved with filename "{filename}"')
@async_run_until_complete @async_run_until_complete
async def step_save_slot(context, slot_id, filename): async def step_save_slot(context, slot_id, filename):
async with aiohttp.ClientSession() as session: async with aiohttp.ClientSession(timeout=DEFAULT_TIMEOUT_SECONDS) as session:
async with session.post(f'{context.base_url}/slots/{slot_id}?action=save', async with session.post(f'{context.base_url}/slots/{slot_id}?action=save',
json={"filename": filename}, json={"filename": filename},
headers={"Content-Type": "application/json"}) as response: headers={"Content-Type": "application/json"}) as response:
@ -834,7 +836,7 @@ async def step_save_slot(context, slot_id, filename):
@step('the slot {slot_id:d} is restored with filename "{filename}"') @step('the slot {slot_id:d} is restored with filename "{filename}"')
@async_run_until_complete @async_run_until_complete
async def step_restore_slot(context, slot_id, filename): async def step_restore_slot(context, slot_id, filename):
async with aiohttp.ClientSession() as session: async with aiohttp.ClientSession(timeout=DEFAULT_TIMEOUT_SECONDS) as session:
async with session.post(f'{context.base_url}/slots/{slot_id}?action=restore', async with session.post(f'{context.base_url}/slots/{slot_id}?action=restore',
json={"filename": filename}, json={"filename": filename},
headers={"Content-Type": "application/json"}) as response: headers={"Content-Type": "application/json"}) as response:
@ -844,7 +846,7 @@ async def step_restore_slot(context, slot_id, filename):
@step('the slot {slot_id:d} is erased') @step('the slot {slot_id:d} is erased')
@async_run_until_complete @async_run_until_complete
async def step_erase_slot(context, slot_id): async def step_erase_slot(context, slot_id):
async with aiohttp.ClientSession() as session: async with aiohttp.ClientSession(timeout=DEFAULT_TIMEOUT_SECONDS) as session:
async with session.post(f'{context.base_url}/slots/{slot_id}?action=erase', async with session.post(f'{context.base_url}/slots/{slot_id}?action=erase',
headers={"Content-Type": "application/json"}) as response: headers={"Content-Type": "application/json"}) as response:
context.response = response context.response = response
@ -853,7 +855,7 @@ async def step_erase_slot(context, slot_id):
@step('switch {on_or_off} lora adapter {lora_id:d}') @step('switch {on_or_off} lora adapter {lora_id:d}')
@async_run_until_complete @async_run_until_complete
async def toggle_lora_adapter(context, on_or_off: str, lora_id: int): async def toggle_lora_adapter(context, on_or_off: str, lora_id: int):
async with aiohttp.ClientSession() as session: async with aiohttp.ClientSession(timeout=DEFAULT_TIMEOUT_SECONDS) as session:
async with session.post(f'{context.base_url}/lora-adapters', async with session.post(f'{context.base_url}/lora-adapters',
json=[{'id': lora_id, 'scale': 1 if on_or_off == 'on' else 0}], json=[{'id': lora_id, 'scale': 1 if on_or_off == 'on' else 0}],
headers={"Content-Type": "application/json"}) as response: headers={"Content-Type": "application/json"}) as response:
@ -889,7 +891,7 @@ async def request_completion(prompt,
print(f"Set user_api_key: {user_api_key}") print(f"Set user_api_key: {user_api_key}")
headers['Authorization'] = f'Bearer {user_api_key}' headers['Authorization'] = f'Bearer {user_api_key}'
async with aiohttp.ClientSession() as session: async with aiohttp.ClientSession(timeout=DEFAULT_TIMEOUT_SECONDS) as session:
async with session.post(f'{base_url}/completion', async with session.post(f'{base_url}/completion',
json={ json={
"input_prefix": prompt_prefix, "input_prefix": prompt_prefix,
@ -902,8 +904,7 @@ async def request_completion(prompt,
"temperature": temperature if temperature is not None else 0.8, "temperature": temperature if temperature is not None else 0.8,
"n_probs": 2, "n_probs": 2,
}, },
headers=headers, headers=headers) as response:
timeout=3600) as response:
if expect_api_error is None or not expect_api_error: if expect_api_error is None or not expect_api_error:
assert response.status == 200 assert response.status == 200
assert response.headers['Access-Control-Allow-Origin'] == origin assert response.headers['Access-Control-Allow-Origin'] == origin
@ -961,7 +962,7 @@ async def oai_chat_completions(user_prompt,
if async_client: if async_client:
origin = 'llama.cpp' origin = 'llama.cpp'
headers = {'Authorization': f'Bearer {user_api_key}', 'Origin': origin} headers = {'Authorization': f'Bearer {user_api_key}', 'Origin': origin}
async with aiohttp.ClientSession() as session: async with aiohttp.ClientSession(timeout=DEFAULT_TIMEOUT_SECONDS) as session:
async with session.post(f'{base_url}{base_path}', async with session.post(f'{base_url}{base_path}',
json=payload, json=payload,
headers=headers) as response: headers=headers) as response:
@ -1048,7 +1049,7 @@ async def oai_chat_completions(user_prompt,
async def request_embedding(content, seed, base_url=None) -> list[list[float]]: async def request_embedding(content, seed, base_url=None) -> list[list[float]]:
async with aiohttp.ClientSession() as session: async with aiohttp.ClientSession(timeout=DEFAULT_TIMEOUT_SECONDS) as session:
async with session.post(f'{base_url}/embedding', async with session.post(f'{base_url}/embedding',
json={ json={
"content": content, "content": content,
@ -1068,14 +1069,13 @@ async def request_oai_embeddings(input, seed,
headers=[] headers=[]
if user_api_key is not None: if user_api_key is not None:
headers = {'Authorization': f'Bearer {user_api_key}', 'Origin': origin} headers = {'Authorization': f'Bearer {user_api_key}', 'Origin': origin}
async with aiohttp.ClientSession() as session: async with aiohttp.ClientSession(timeout=DEFAULT_TIMEOUT_SECONDS) as session:
async with session.post(f'{base_url}/v1/embeddings', async with session.post(f'{base_url}/v1/embeddings',
json={ json={
"input": input, "input": input,
"model": model, "model": model,
}, },
headers=headers, headers=headers) as response:
timeout=3600) as response:
assert response.status == 200, f"received status code not expected: {response.status}" assert response.status == 200, f"received status code not expected: {response.status}"
assert response.headers['Access-Control-Allow-Origin'] == origin assert response.headers['Access-Control-Allow-Origin'] == origin
assert response.headers['Content-Type'] == "application/json; charset=utf-8" assert response.headers['Content-Type'] == "application/json; charset=utf-8"
@ -1194,7 +1194,7 @@ async def wait_for_slots_status(context,
if 'GITHUB_ACTIONS' in os.environ: if 'GITHUB_ACTIONS' in os.environ:
timeout *= 2 timeout *= 2
async with aiohttp.ClientSession() as session: async with aiohttp.ClientSession(timeout=DEFAULT_TIMEOUT_SECONDS) as session:
while True: while True:
async with await session.get(f'{base_url}/slots', params=params) as slots_response: async with await session.get(f'{base_url}/slots', params=params) as slots_response:
status_code = slots_response.status status_code = slots_response.status
@ -1237,7 +1237,7 @@ def assert_embeddings(embeddings):
async def request_slots_status(context, expected_slots): async def request_slots_status(context, expected_slots):
async with aiohttp.ClientSession() as session: async with aiohttp.ClientSession(timeout=DEFAULT_TIMEOUT_SECONDS) as session:
async with await session.get(f'{context.base_url}/slots') as slots_response: async with await session.get(f'{context.base_url}/slots') as slots_response:
assert slots_response.status == 200 assert slots_response.status == 200
slots = await slots_response.json() slots = await slots_response.json()