From 98b6112c44a72bdcad365bd4ca78c4e16749b02f Mon Sep 17 00:00:00 2001 From: Nick Hill Date: Thu, 17 Apr 2025 15:42:08 -0700 Subject: [PATCH] [V1] Avoid socket errors during shutdown when requests are in in-flight Signed-off-by: Nick Hill --- vllm/v1/engine/core.py | 2 +- vllm/v1/engine/core_client.py | 10 ++++++---- 2 files changed, 7 insertions(+), 5 deletions(-) diff --git a/vllm/v1/engine/core.py b/vllm/v1/engine/core.py index 9c4036efd05..242bdf8a703 100644 --- a/vllm/v1/engine/core.py +++ b/vllm/v1/engine/core.py @@ -384,7 +384,7 @@ def signal_handler(signum, frame): except SystemExit: logger.debug("EngineCore exiting.") - + raise except Exception as e: if engine_core is None: logger.exception("EngineCore failed to start.") diff --git a/vllm/v1/engine/core_client.py b/vllm/v1/engine/core_client.py index f54b3546f06..57cee51c138 100644 --- a/vllm/v1/engine/core_client.py +++ b/vllm/v1/engine/core_client.py @@ -312,6 +312,7 @@ class BackgroundResources: def __call__(self): """Clean up background resources.""" + self.engine_dead = True for core_engine in self.core_engines: core_engine.close() @@ -564,7 +565,7 @@ def add_request(self, request: EngineCoreRequest) -> None: self._send_input(EngineCoreRequestType.ADD, request) def abort_requests(self, request_ids: list[str]) -> None: - if len(request_ids) > 0: + if request_ids and not self.resources.engine_dead: self._send_input(EngineCoreRequestType.ABORT, request_ids) def profile(self, is_start: bool = True) -> None: @@ -735,7 +736,7 @@ async def add_request_async(self, request: EngineCoreRequest) -> None: self._ensure_output_queue_task() async def abort_requests_async(self, request_ids: list[str]) -> None: - if len(request_ids) > 0: + if request_ids and not self.resources.engine_dead: await self._send_input(EngineCoreRequestType.ABORT, request_ids) async def profile_async(self, is_start: bool = True) -> None: @@ -895,5 +896,6 @@ async def abort_requests_async(self, request_ids: list[str]) -> None: async def _abort_requests(self, request_ids: list[str], engine: CoreEngine) -> None: - await self._send_input(EngineCoreRequestType.ABORT, request_ids, - engine) + if not self.resources.engine_dead: + await self._send_input(EngineCoreRequestType.ABORT, request_ids, + engine)