From 9e0ecfb697d297355e43c20559d29bcc71beb0c3 Mon Sep 17 00:00:00 2001 From: Xuan Son Nguyen Date: Mon, 4 Nov 2024 16:33:29 +0100 Subject: [PATCH] server : clarify /slots endpoint, add is_processing (#10162) * server : clarify /slots endpoint, add is_processing * fix tests --- examples/server/README.md | 11 +++++------ examples/server/server.cpp | 16 ++++++++-------- examples/server/tests/features/steps/steps.py | 10 +++++----- 3 files changed, 18 insertions(+), 19 deletions(-) diff --git a/examples/server/README.md b/examples/server/README.md index 1629e456b..15f95db1e 100644 --- a/examples/server/README.md +++ b/examples/server/README.md @@ -692,7 +692,10 @@ Given a ChatML-formatted json description in `messages`, it returns the predicte ### GET `/slots`: Returns the current slots processing state -This endpoint can be disabled with `--no-slots` +> [!WARNING] +> This endpoint is intended for debugging and may be modified in future versions. For security reasons, we strongly advise against enabling it in production environments. + +This endpoint is disabled by default and can be enabled with `--slots` If query param `?fail_on_no_slot=1` is set, this endpoint will respond with status code 503 if there is no available slots. @@ -709,6 +712,7 @@ Example: "grammar": "", "id": 0, "ignore_eos": false, + "is_processing": false, "logit_bias": [], "min_p": 0.05000000074505806, "mirostat": 0, @@ -741,7 +745,6 @@ Example: "temperature" ], "seed": 42, - "state": 1, "stop": [ "\n" ], @@ -755,10 +758,6 @@ Example: ] ``` -Possible values for `slot[i].state` are: -- `0`: SLOT_STATE_IDLE -- `1`: SLOT_STATE_PROCESSING - ### GET `/metrics`: Prometheus compatible metrics exporter This endpoint is only accessible if `--metrics` is set. diff --git a/examples/server/server.cpp b/examples/server/server.cpp index 8531a784d..f0b89b22c 100644 --- a/examples/server/server.cpp +++ b/examples/server/server.cpp @@ -1566,11 +1566,11 @@ struct server_context { for (server_slot & slot : slots) { json slot_data = get_formated_generation(slot); - slot_data["id"] = slot.id; - slot_data["id_task"] = slot.id_task; - slot_data["state"] = slot.state; - slot_data["prompt"] = common_detokenize(ctx, slot.prompt_tokens); - slot_data["next_token"] = { + slot_data["id"] = slot.id; + slot_data["id_task"] = slot.id_task; + slot_data["is_processing"] = slot.is_processing(); + slot_data["prompt"] = common_detokenize(ctx, slot.prompt_tokens); + slot_data["next_token"] = { {"has_next_token", slot.has_next_token}, {"has_new_line", slot.has_new_line}, {"n_remain", slot.n_remaining}, @@ -1581,10 +1581,10 @@ struct server_context { {"stopping_word", slot.stopping_word}, }; - if (slot_data["state"] == SLOT_STATE_IDLE) { - n_idle_slots++; - } else { + if (slot.is_processing()) { n_processing_slots++; + } else { + n_idle_slots++; } slots_data.push_back(slot_data); diff --git a/examples/server/tests/features/steps/steps.py b/examples/server/tests/features/steps/steps.py index 2e418d8aa..687b163f4 100644 --- a/examples/server/tests/features/steps/steps.py +++ b/examples/server/tests/features/steps/steps.py @@ -260,13 +260,13 @@ async def step_wait_for_server_status(context, expecting_status: Literal['health async def step_all_slots_status(context, expected_slot_status_string: Literal['idle', 'busy'] | str): match expected_slot_status_string: case 'idle': - expected_slot_status = 0 + expected_slot_status = False case 'busy': - expected_slot_status = 1 + expected_slot_status = True case _: assert False, "unknown status" - expected_slots = [{'id': slot_id, 'state': expected_slot_status} + expected_slots = [{'id': slot_id, 'is_processing': expected_slot_status} for slot_id in range(context.n_slots)] await request_slots_status(context, expected_slots) @@ -1354,8 +1354,8 @@ async def wait_for_slots_status(context, if status_code == 503 and status_code == expected_http_status_code: return if status_code == 200 and status_code == expected_http_status_code: - n_slots_idle = sum(1 if slot["state"] == 0 else 0 for slot in slots) - n_slots_processing = sum(1 if slot["state"] != 0 else 0 for slot in slots) + n_slots_idle = sum(1 if not slot["is_processing"] else 0 for slot in slots) + n_slots_processing = sum(1 if slot["is_processing"] else 0 for slot in slots) if ((slots_idle is None or slots_idle == n_slots_idle) and (slots_processing is None or slots_processing == n_slots_processing)): return