Skip to content

Commit 2cbc3b3

Browse files
ngxsonarthw
authored andcommitted
server : clarify /slots endpoint, add is_processing (ggml-org#10162)
* server : clarify /slots endpoint, add is_processing * fix tests
1 parent e4a831f commit 2cbc3b3

File tree

3 files changed

+18
-19
lines changed

3 files changed

+18
-19
lines changed

examples/server/README.md

+5-6
Original file line numberDiff line numberDiff line change
@@ -692,7 +692,10 @@ Given a ChatML-formatted json description in `messages`, it returns the predicte
692692

693693
### GET `/slots`: Returns the current slots processing state
694694

695-
This endpoint can be disabled with `--no-slots`
695+
> [!WARNING]
696+
> This endpoint is intended for debugging and may be modified in future versions. For security reasons, we strongly advise against enabling it in production environments.
697+
698+
This endpoint is disabled by default and can be enabled with `--slots`
696699

697700
If query param `?fail_on_no_slot=1` is set, this endpoint will respond with status code 503 if there is no available slots.
698701

@@ -709,6 +712,7 @@ Example:
709712
"grammar": "",
710713
"id": 0,
711714
"ignore_eos": false,
715+
"is_processing": false,
712716
"logit_bias": [],
713717
"min_p": 0.05000000074505806,
714718
"mirostat": 0,
@@ -741,7 +745,6 @@ Example:
741745
"temperature"
742746
],
743747
"seed": 42,
744-
"state": 1,
745748
"stop": [
746749
"\n"
747750
],
@@ -755,10 +758,6 @@ Example:
755758
]
756759
```
757760

758-
Possible values for `slot[i].state` are:
759-
- `0`: SLOT_STATE_IDLE
760-
- `1`: SLOT_STATE_PROCESSING
761-
762761
### GET `/metrics`: Prometheus compatible metrics exporter
763762

764763
This endpoint is only accessible if `--metrics` is set.

examples/server/server.cpp

+8-8
Original file line numberDiff line numberDiff line change
@@ -1566,11 +1566,11 @@ struct server_context {
15661566

15671567
for (server_slot & slot : slots) {
15681568
json slot_data = get_formated_generation(slot);
1569-
slot_data["id"] = slot.id;
1570-
slot_data["id_task"] = slot.id_task;
1571-
slot_data["state"] = slot.state;
1572-
slot_data["prompt"] = common_detokenize(ctx, slot.prompt_tokens);
1573-
slot_data["next_token"] = {
1569+
slot_data["id"] = slot.id;
1570+
slot_data["id_task"] = slot.id_task;
1571+
slot_data["is_processing"] = slot.is_processing();
1572+
slot_data["prompt"] = common_detokenize(ctx, slot.prompt_tokens);
1573+
slot_data["next_token"] = {
15741574
{"has_next_token", slot.has_next_token},
15751575
{"has_new_line", slot.has_new_line},
15761576
{"n_remain", slot.n_remaining},
@@ -1581,10 +1581,10 @@ struct server_context {
15811581
{"stopping_word", slot.stopping_word},
15821582
};
15831583

1584-
if (slot_data["state"] == SLOT_STATE_IDLE) {
1585-
n_idle_slots++;
1586-
} else {
1584+
if (slot.is_processing()) {
15871585
n_processing_slots++;
1586+
} else {
1587+
n_idle_slots++;
15881588
}
15891589

15901590
slots_data.push_back(slot_data);

examples/server/tests/features/steps/steps.py

+5-5
Original file line numberDiff line numberDiff line change
@@ -260,13 +260,13 @@ async def step_wait_for_server_status(context, expecting_status: Literal['health
260260
async def step_all_slots_status(context, expected_slot_status_string: Literal['idle', 'busy'] | str):
261261
match expected_slot_status_string:
262262
case 'idle':
263-
expected_slot_status = 0
263+
expected_slot_status = False
264264
case 'busy':
265-
expected_slot_status = 1
265+
expected_slot_status = True
266266
case _:
267267
assert False, "unknown status"
268268

269-
expected_slots = [{'id': slot_id, 'state': expected_slot_status}
269+
expected_slots = [{'id': slot_id, 'is_processing': expected_slot_status}
270270
for slot_id in range(context.n_slots)]
271271
await request_slots_status(context, expected_slots)
272272

@@ -1354,8 +1354,8 @@ async def wait_for_slots_status(context,
13541354
if status_code == 503 and status_code == expected_http_status_code:
13551355
return
13561356
if status_code == 200 and status_code == expected_http_status_code:
1357-
n_slots_idle = sum(1 if slot["state"] == 0 else 0 for slot in slots)
1358-
n_slots_processing = sum(1 if slot["state"] != 0 else 0 for slot in slots)
1357+
n_slots_idle = sum(1 if not slot["is_processing"] else 0 for slot in slots)
1358+
n_slots_processing = sum(1 if slot["is_processing"] else 0 for slot in slots)
13591359
if ((slots_idle is None or slots_idle == n_slots_idle)
13601360
and (slots_processing is None or slots_processing == n_slots_processing)):
13611361
return

0 commit comments

Comments
 (0)