mirror of
https://github.com/ggerganov/llama.cpp.git
synced 2024-12-23 21:17:54 +01:00
server: tests - slow inference causes timeout on the CI (#5715)
* server: tests - longer inference timeout for CI
This commit is contained in:
parent
8b350356b2
commit
e3965cf35a
@ -266,7 +266,7 @@ static llama_token llama_sampling_sample_impl(
|
||||
// }
|
||||
//}
|
||||
|
||||
LOG("sampled token: %5d: '%s'\n", id, llama_token_to_piece(ctx_main, id).c_str());
|
||||
//LOG("sampled token: %5d: '%s'\n", id, llama_token_to_piece(ctx_main, id).c_str());
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -699,6 +699,8 @@ async def wait_for_health_status(context,
|
||||
if context.debug:
|
||||
print(f"Starting checking for health for expected_health_status={expected_health_status}")
|
||||
timeout = 3 # seconds
|
||||
if expected_health_status == 'ok':
|
||||
timeout = 10 # CI slow inference
|
||||
interval = 0.5
|
||||
counter = 0
|
||||
async with aiohttp.ClientSession() as session:
|
||||
@ -736,7 +738,7 @@ async def wait_for_health_status(context,
|
||||
if n_completions > 0:
|
||||
return
|
||||
|
||||
assert False, 'timeout exceeded'
|
||||
assert False, f'{expected_health_status} timeout exceeded {counter}s>={timeout}'
|
||||
|
||||
|
||||
def assert_embeddings(embeddings):
|
||||
|
Loading…
Reference in New Issue
Block a user