Skip to content

Commit f9d0ddf

Browse files
committed
move xgrammar related utils to backend_xgrammar.py
Signed-off-by: shen-shanshan <467638484@qq.com>
1 parent b590adf commit f9d0ddf

File tree

3 files changed

+119
-125
lines changed

3 files changed

+119
-125
lines changed

vllm/v1/engine/processor.py

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -22,8 +22,8 @@
2222
from vllm.v1.engine.mm_input_cache import MirroredProcessingCache
2323
from vllm.v1.structured_output.backend_guidance import (
2424
validate_guidance_grammar)
25-
from vllm.v1.structured_output.utils import (
26-
validate_structured_output_request_xgrammar)
25+
from vllm.v1.structured_output.backend_xgrammar import (
26+
validate_xgrammar_grammar)
2727

2828

2929
class Processor:
@@ -165,15 +165,15 @@ def _validate_structured_output(self, params: SamplingParams) -> None:
165165
# Request content validation
166166
if engine_level_backend.startswith("xgrammar"):
167167
# xgrammar with no fallback
168-
validate_structured_output_request_xgrammar(params)
168+
validate_xgrammar_grammar(params)
169169
params.guided_decoding.backend = engine_level_backend
170170
elif engine_level_backend == "auto":
171171
# "auto" is an opt-in to opinionated behavior where we try to
172172
# choose a backend based on request contents. This is not the
173173
# default as it is less predictable and subject to change
174174
# between releases as feature support changes.
175175
try:
176-
validate_structured_output_request_xgrammar(params)
176+
validate_xgrammar_grammar(params)
177177
params.guided_decoding.backend = "xgrammar"
178178
except ValueError:
179179
# The request includes some jsonschema feature(s) that

vllm/v1/structured_output/backend_xgrammar.py

Lines changed: 115 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,19 +1,24 @@
11
# SPDX-License-Identifier: Apache-2.0
22

3+
import json
34
from dataclasses import dataclass, field
4-
from typing import TYPE_CHECKING
5+
from typing import TYPE_CHECKING, Any
56

67
import torch
78

89
import vllm.envs
910
from vllm.config import VllmConfig
1011
from vllm.logger import init_logger
12+
from vllm.sampling_params import SamplingParams
1113
from vllm.transformers_utils.tokenizer_group import init_tokenizer_from_configs
1214
from vllm.transformers_utils.tokenizers.mistral import MistralTokenizer
1315
from vllm.utils import LazyLoader
1416
from vllm.v1.structured_output.backend_types import (StructuredOutputBackend,
1517
StructuredOutputGrammar,
1618
StructuredOutputOptions)
19+
from vllm.v1.structured_output.utils import (choice_as_grammar,
20+
convert_lark_to_ebnf,
21+
grammar_is_likely_lark)
1722

1823
if TYPE_CHECKING:
1924
import xgrammar as xgr
@@ -156,3 +161,112 @@ def is_terminated(self) -> bool:
156161
def reset(self):
157162
self.num_processed_tokens = 0
158163
self.matcher.reset()
164+
165+
166+
def has_xgrammar_unsupported_json_features(schema: dict[str, Any]) -> bool:
167+
"""Check if JSON schema contains features unsupported by xgrammar."""
168+
169+
def check_object(obj: dict[str, Any]) -> bool:
170+
if not isinstance(obj, dict):
171+
return False
172+
173+
# Check for pattern restrictions
174+
if "pattern" in obj:
175+
return True
176+
177+
# Check for numeric ranges
178+
if obj.get("type") in ("integer", "number") and any(
179+
key in obj
180+
for key in ("minimum", "maximum", "exclusiveMinimum",
181+
"exclusiveMaximum", "multipleOf")):
182+
return True
183+
184+
# Check for array unsupported keywords
185+
if obj.get("type") == "array" and any(
186+
key in obj
187+
for key in ("uniqueItems", "contains", "minContains",
188+
"maxContains", "minItems", "maxItems")):
189+
return True
190+
191+
# Unsupported keywords for strings
192+
if obj.get("type") == "string" and "format" in obj:
193+
return True
194+
195+
# Unsupported keywords for objects
196+
if obj.get("type") == "object" and any(
197+
key in obj for key in ("minProperties", "maxProperties",
198+
"propertyNames", "patternProperties")):
199+
return True
200+
201+
# Recursively check all nested objects and arrays
202+
for value in obj.values():
203+
if isinstance(value, dict):
204+
if check_object(value):
205+
return True
206+
elif isinstance(value, list):
207+
for item in value:
208+
if isinstance(item, dict) and check_object(item):
209+
return True
210+
211+
return False
212+
213+
return check_object(schema)
214+
215+
216+
def validate_xgrammar_grammar(sampling_params: SamplingParams) -> None:
217+
"""Validate that the request is supported by structured output.
218+
219+
Raises ValueError if the request is not supported.
220+
"""
221+
if sampling_params.guided_decoding is None:
222+
return
223+
224+
gd_params = sampling_params.guided_decoding
225+
226+
if gd_params.regex:
227+
try:
228+
xgr.Grammar.from_regex(gd_params.regex)
229+
except Exception as err:
230+
raise ValueError("Failed to transform regex into a grammar: "
231+
f"{err}") from err
232+
233+
if gd_params.choice:
234+
choice_grammar = choice_as_grammar(gd_params.choice)
235+
try:
236+
xgr.Grammar.from_ebnf(choice_grammar)
237+
except Exception as err:
238+
raise ValueError("Failed to transform choices into a grammar: "
239+
"{err}") from err
240+
gd_params.choice = None
241+
gd_params.grammar = choice_grammar
242+
return
243+
244+
if gd_params.json:
245+
if isinstance(gd_params.json, str):
246+
try:
247+
schema = json.loads(gd_params.json)
248+
except json.JSONDecodeError as e:
249+
raise ValueError("Invalid JSON grammar specification.") from e
250+
else:
251+
schema = gd_params.json
252+
253+
if has_xgrammar_unsupported_json_features(schema):
254+
raise ValueError("The provided JSON schema contains features not "
255+
"supported by xgrammar.")
256+
return
257+
258+
if gd_params.grammar:
259+
if grammar_is_likely_lark(gd_params.grammar):
260+
# xgrammar supports EBNF grammars only
261+
try:
262+
gd_params.grammar = convert_lark_to_ebnf(gd_params.grammar)
263+
except ValueError as e:
264+
raise ValueError(
265+
"Failed to convert the grammar from Lark to EBNF. ") from e
266+
267+
# Test parsing EBNF grammar, possibly already converted from Lark
268+
try:
269+
# parse the grammar, but we aren't compiling it.
270+
xgr.Grammar.from_ebnf(gd_params.grammar)
271+
except Exception as e:
272+
raise ValueError("Invalid grammar specification.") from e

vllm/v1/structured_output/utils.py

Lines changed: 0 additions & 120 deletions
Original file line numberDiff line numberDiff line change
@@ -2,67 +2,7 @@
22

33
from __future__ import annotations
44

5-
import json
65
import re
7-
from typing import TYPE_CHECKING, Any
8-
9-
from vllm.sampling_params import SamplingParams
10-
from vllm.utils import LazyLoader
11-
12-
if TYPE_CHECKING:
13-
import xgrammar as xgr
14-
else:
15-
xgr = LazyLoader("xgr", globals(), "xgrammar")
16-
17-
18-
def has_xgrammar_unsupported_json_features(schema: dict[str, Any]) -> bool:
19-
"""Check if JSON schema contains features unsupported by xgrammar."""
20-
21-
def check_object(obj: dict[str, Any]) -> bool:
22-
if not isinstance(obj, dict):
23-
return False
24-
25-
# Check for pattern restrictions
26-
if "pattern" in obj:
27-
return True
28-
29-
# Check for numeric ranges
30-
if obj.get("type") in ("integer", "number") and any(
31-
key in obj
32-
for key in ("minimum", "maximum", "exclusiveMinimum",
33-
"exclusiveMaximum", "multipleOf")):
34-
return True
35-
36-
# Check for array unsupported keywords
37-
if obj.get("type") == "array" and any(
38-
key in obj
39-
for key in ("uniqueItems", "contains", "minContains",
40-
"maxContains", "minItems", "maxItems")):
41-
return True
42-
43-
# Unsupported keywords for strings
44-
if obj.get("type") == "string" and "format" in obj:
45-
return True
46-
47-
# Unsupported keywords for objects
48-
if obj.get("type") == "object" and any(
49-
key in obj for key in ("minProperties", "maxProperties",
50-
"propertyNames", "patternProperties")):
51-
return True
52-
53-
# Recursively check all nested objects and arrays
54-
for value in obj.values():
55-
if isinstance(value, dict):
56-
if check_object(value):
57-
return True
58-
elif isinstance(value, list):
59-
for item in value:
60-
if isinstance(item, dict) and check_object(item):
61-
return True
62-
63-
return False
64-
65-
return check_object(schema)
666

677

688
def grammar_is_likely_lark(grammar_str: str) -> bool:
@@ -232,63 +172,3 @@ def escape_ebnf_string(s: str) -> str:
232172
escaped_choices = (escape_ebnf_string(c) for c in choice)
233173
grammar = ('root ::= ' + ' | '.join(f'"{c}"' for c in escaped_choices))
234174
return grammar
235-
236-
237-
def validate_structured_output_request_xgrammar(
238-
sampling_params: SamplingParams) -> None:
239-
"""Validate that the request is supported by structured output.
240-
241-
Raises ValueError if the request is not supported.
242-
"""
243-
if sampling_params.guided_decoding is None:
244-
return
245-
246-
gd_params = sampling_params.guided_decoding
247-
248-
if gd_params.regex:
249-
try:
250-
xgr.Grammar.from_regex(gd_params.regex)
251-
except Exception as err:
252-
raise ValueError("Failed to transform regex into a grammar: "
253-
f"{err}") from err
254-
255-
if gd_params.choice:
256-
choice_grammar = choice_as_grammar(gd_params.choice)
257-
try:
258-
xgr.Grammar.from_ebnf(choice_grammar)
259-
except Exception as err:
260-
raise ValueError("Failed to transform choices into a grammar: "
261-
"{err}") from err
262-
gd_params.choice = None
263-
gd_params.grammar = choice_grammar
264-
return
265-
266-
if gd_params.json:
267-
if isinstance(gd_params.json, str):
268-
try:
269-
schema = json.loads(gd_params.json)
270-
except json.JSONDecodeError as e:
271-
raise ValueError("Invalid JSON grammar specification.") from e
272-
else:
273-
schema = gd_params.json
274-
275-
if has_xgrammar_unsupported_json_features(schema):
276-
raise ValueError("The provided JSON schema contains features not "
277-
"supported by xgrammar.")
278-
return
279-
280-
if gd_params.grammar:
281-
if grammar_is_likely_lark(gd_params.grammar):
282-
# xgrammar supports EBNF grammars only
283-
try:
284-
gd_params.grammar = convert_lark_to_ebnf(gd_params.grammar)
285-
except ValueError as e:
286-
raise ValueError(
287-
"Failed to convert the grammar from Lark to EBNF. ") from e
288-
289-
# Test parsing EBNF grammar, possibly already converted from Lark
290-
try:
291-
# parse the grammar, but we aren't compiling it.
292-
xgr.Grammar.from_ebnf(gd_params.grammar)
293-
except Exception as e:
294-
raise ValueError("Invalid grammar specification.") from e

0 commit comments

Comments
 (0)