add destroy() method to so backend

shen-shanshan · shen-shanshan · commit 845f11e063b5 · 2025-04-24T01:24:00.000Z
Signed-off-by: shen-shanshan &lt;467638484@qq.com&gt;
diff --git a/vllm/v1/engine/core.py b/vllm/v1/engine/core.py
@@ -252,6 +252,10 @@ def shutdown(self):
         if self.model_executor:
             self.model_executor.shutdown()
 
+        so_backend = self.structured_output_manager.backend
+        if so_backend is not None:
+            so_backend.destroy()
+
     def profile(self, is_start: bool = True):
         self.model_executor.profile(is_start)
 
diff --git a/vllm/v1/structured_output/backend_guidance.py b/vllm/v1/structured_output/backend_guidance.py
@@ -110,6 +110,9 @@ def allocate_token_bitmask(self, max_num_seqs: int):
         return llguidance_torch.allocate_token_bitmask(
             max_num_seqs, self.ll_tokenizer.vocab_size)
 
+    def destroy(self):
+        pass
+
 
 @dataclass
 class GuidanceGrammar(StructuredOutputGrammar):
diff --git a/vllm/v1/structured_output/backend_types.py b/vllm/v1/structured_output/backend_types.py
@@ -87,3 +87,9 @@ def allocate_token_bitmask(self, max_num_seqs: int):
             max_num_seqs (int): The maximum number of sequences for which
               to allocate the bitmask.
         """
+
+    @abstractmethod
+    def destroy(self):
+        """
+        Clear objects in the backend to avoid nanobind leaked.
+        """
diff --git a/vllm/v1/structured_output/backend_xgrammar.py b/vllm/v1/structured_output/backend_xgrammar.py
@@ -126,6 +126,9 @@ def compile_grammar(self, request_type: StructuredOutputOptions,
     def allocate_token_bitmask(self, max_num_seqs: int):
         return xgr.allocate_token_bitmask(max_num_seqs, self.vocab_size)
 
+    def destroy(self):
+        del self.compiler
+
 
 @dataclass
 class XgrammarGrammar(StructuredOutputGrammar):