server: tests - slow inference causes timeout on the CI (#5715)

* server: tests - longer inference timeout for CI
2024-12-24 13:28:50 +01:00 · 2024-02-25 22:48:33 +01:00 · 2024-02-25 22:48:33 +01:00 · e3965cf35a
commit e3965cf35a
parent 8b350356b2
2 changed files with 4 additions and 2 deletions
--- a/common/sampling.cpp
+++ b/common/sampling.cpp
@ -266,7 +266,7 @@ static llama_token llama_sampling_sample_impl(
            //    }
            //}
-            LOG("sampled token: %5d: '%s'\n", id, llama_token_to_piece(ctx_main, id).c_str());
+            //LOG("sampled token: %5d: '%s'\n", id, llama_token_to_piece(ctx_main, id).c_str());
        }
    }
--- a/examples/server/tests/features/steps/steps.py
+++ b/examples/server/tests/features/steps/steps.py
@ -699,6 +699,8 @@ async def wait_for_health_status(context,
    if context.debug:
        print(f"Starting checking for health for expected_health_status={expected_health_status}")
    timeout = 3  # seconds
    if expected_health_status == 'ok':
        timeout = 10 # CI slow inference
    interval = 0.5
    counter = 0
    async with aiohttp.ClientSession() as session:
@ -736,7 +738,7 @@ async def wait_for_health_status(context,
                        if n_completions > 0:
                            return
-                assert False, 'timeout exceeded'
+                assert False, f'{expected_health_status} timeout exceeded {counter}s>={timeout}'
 def assert_embeddings(embeddings):