Skip to content

Commit 7ec5feb

Browse files
committed
fix: attempt to fix hanging test
Signed-off-by: alec-flowers <aflowers@nvidia.com>
1 parent 7fe1145 commit 7ec5feb

File tree

2 files changed

+11
-7
lines changed

2 files changed

+11
-7
lines changed

tests/v1/engine/test_engine_core_client.py

+5-2
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,7 @@
88

99
import psutil
1010
import pytest
11+
import zmq
1112
from transformers import AutoTokenizer
1213

1314
from vllm import SamplingParams
@@ -250,7 +251,6 @@ async def test_engine_core_client_asyncio(monkeypatch: pytest.MonkeyPatch):
250251
assert str(e_info.value) == "Call to echo method failed: help!"
251252

252253

253-
@create_new_process_for_each_test()
254254
@pytest.mark.parametrize(
255255
"multiprocessing_mode,publisher_config",
256256
[(True, "tcp"), (False, "inproc")],
@@ -332,8 +332,11 @@ def test_kv_cache_events(
332332
assert event.token_ids == custom_tokens, (
333333
"Token ids should be the same as the custom tokens")
334334
finally:
335-
subscriber.close()
336335
client.shutdown()
336+
subscriber.close()
337+
# TODO hack to try and fix CI hang
338+
ctx = zmq.Context.instance()
339+
ctx.term()
337340
return
338341

339342

vllm/distributed/kv_events.py

+6-5
Original file line numberDiff line numberDiff line change
@@ -119,7 +119,7 @@ def __init__(
119119
topic: str = "",
120120
) -> None:
121121
# Storage
122-
self._event_queue = Queue[EventBatch](maxsize=max_queue_size)
122+
self._event_queue = Queue[Optional[EventBatch]](maxsize=max_queue_size)
123123
self._buffer = deque[tuple[int, bytes]](maxlen=buffer_steps)
124124

125125
# ZMQ sockets
@@ -151,9 +151,9 @@ def publish(self, events: EventBatch) -> None:
151151
def shutdown(self) -> None:
152152
"""Stop the publisher thread and clean up resources."""
153153
self._running = False
154+
self._event_queue.put_nowait(None)
154155

155156
start = time.time()
156-
157157
pending_items = True
158158
while pending_items and (time.time() - start < self.SHUTDOWN_TIMEOUT):
159159
pending_items = not self._event_queue.empty()
@@ -177,8 +177,7 @@ def shutdown(self) -> None:
177177
if self._replay is not None:
178178
self._replay.close(linger=0)
179179
finally:
180-
# Do not terminate context; other sockets may use it
181-
pass
180+
pass # Do not terminate context; other sockets may use it
182181

183182
def _socket_setup(self) -> None:
184183
"""Initialize sockets
@@ -211,7 +210,7 @@ def _publisher_thread(self) -> None:
211210

212211
assert self._pub is not None # narrows type for mypy
213212

214-
while self._running or not self._event_queue.empty():
213+
while self._running or self._event_queue.qsize() > 0:
215214
# --- replay (non-critical) ---------------------------------
216215
if self._replay is not None and self._replay.poll(0):
217216
try:
@@ -222,6 +221,8 @@ def _publisher_thread(self) -> None:
222221
# --- main queue (critical) ---------------------------------
223222
try:
224223
event = self._event_queue.get(timeout=0.1)
224+
if event is None:
225+
break # Sentinel received, exit thread
225226
except queue.Empty:
226227
continue
227228

0 commit comments

Comments
 (0)