mirror of
https://github.com/ggerganov/llama.cpp.git
synced 2024-12-25 05:48:47 +01:00
server: tests - slow inference causes timeout on the CI (#5715)
* server: tests - longer inference timeout for CI
This commit is contained in:
parent
8b350356b2
commit
e3965cf35a
@ -266,7 +266,7 @@ static llama_token llama_sampling_sample_impl(
|
|||||||
// }
|
// }
|
||||||
//}
|
//}
|
||||||
|
|
||||||
LOG("sampled token: %5d: '%s'\n", id, llama_token_to_piece(ctx_main, id).c_str());
|
//LOG("sampled token: %5d: '%s'\n", id, llama_token_to_piece(ctx_main, id).c_str());
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -699,6 +699,8 @@ async def wait_for_health_status(context,
|
|||||||
if context.debug:
|
if context.debug:
|
||||||
print(f"Starting checking for health for expected_health_status={expected_health_status}")
|
print(f"Starting checking for health for expected_health_status={expected_health_status}")
|
||||||
timeout = 3 # seconds
|
timeout = 3 # seconds
|
||||||
|
if expected_health_status == 'ok':
|
||||||
|
timeout = 10 # CI slow inference
|
||||||
interval = 0.5
|
interval = 0.5
|
||||||
counter = 0
|
counter = 0
|
||||||
async with aiohttp.ClientSession() as session:
|
async with aiohttp.ClientSession() as session:
|
||||||
@ -736,7 +738,7 @@ async def wait_for_health_status(context,
|
|||||||
if n_completions > 0:
|
if n_completions > 0:
|
||||||
return
|
return
|
||||||
|
|
||||||
assert False, 'timeout exceeded'
|
assert False, f'{expected_health_status} timeout exceeded {counter}s>={timeout}'
|
||||||
|
|
||||||
|
|
||||||
def assert_embeddings(embeddings):
|
def assert_embeddings(embeddings):
|
||||||
|
Loading…
Reference in New Issue
Block a user