1
1
# SPDX-License-Identifier: Apache-2.0
2
+ """
3
+ To run this example, you need to start the vLLM server:
4
+
5
+ ```bash
6
+ vllm serve Qwen/Qwen2.5-3B-Instruct
7
+ ```
8
+ """
2
9
3
10
from enum import Enum
4
11
5
12
from openai import BadRequestError , OpenAI
6
13
from pydantic import BaseModel
7
14
8
- client = OpenAI (
9
- base_url = "http://localhost:8000/v1" ,
10
- api_key = "-" ,
11
- )
12
15
13
16
# Guided decoding by Choice (list of possible options)
14
- completion = client .chat .completions .create (
15
- model = "Qwen/Qwen2.5-3B-Instruct" ,
16
- messages = [{
17
- "role" : "user" ,
18
- "content" : "Classify this sentiment: vLLM is wonderful!"
19
- }],
20
- extra_body = {"guided_choice" : ["positive" , "negative" ]},
21
- )
22
- print (completion .choices [0 ].message .content )
17
+ def guided_choice_completion (client : OpenAI , model : str ):
18
+ completion = client .chat .completions .create (
19
+ model = model ,
20
+ messages = [{
21
+ "role" : "user" ,
22
+ "content" : "Classify this sentiment: vLLM is wonderful!"
23
+ }],
24
+ extra_body = {"guided_choice" : ["positive" , "negative" ]},
25
+ )
26
+ return completion .choices [0 ].message .content
27
+
23
28
24
29
# Guided decoding by Regex
25
- prompt = ("Generate an email address for Alan Turing, who works in Enigma."
26
- "End in .com and new line. Example result:"
27
- "alan.turing@enigma.com\n " )
28
-
29
- completion = client .chat .completions .create (
30
- model = "Qwen/Qwen2.5-3B-Instruct" ,
31
- messages = [{
32
- "role" : "user" ,
33
- "content" : prompt ,
34
- }],
35
- extra_body = {
36
- "guided_regex" : r"\w+@\w+\.com\n" ,
37
- "stop" : ["\n " ]
38
- },
39
- )
40
- print (completion .choices [0 ].message .content )
30
+ def guided_regex_completion (client : OpenAI , model : str ):
31
+ prompt = ("Generate an email address for Alan Turing, who works in Enigma."
32
+ "End in .com and new line. Example result:"
33
+ "alan.turing@enigma.com\n " )
34
+
35
+ completion = client .chat .completions .create (
36
+ model = model ,
37
+ messages = [{
38
+ "role" : "user" ,
39
+ "content" : prompt ,
40
+ }],
41
+ extra_body = {
42
+ "guided_regex" : r"\w+@\w+\.com\n" ,
43
+ "stop" : ["\n " ]
44
+ },
45
+ )
46
+ return completion .choices [0 ].message .content
41
47
42
48
43
49
# Guided decoding by JSON using Pydantic schema
@@ -54,66 +60,100 @@ class CarDescription(BaseModel):
54
60
car_type : CarType
55
61
56
62
57
- json_schema = CarDescription .model_json_schema ()
58
-
59
- prompt = ("Generate a JSON with the brand, model and car_type of"
60
- "the most iconic car from the 90's" )
61
- completion = client .chat .completions .create (
62
- model = "Qwen/Qwen2.5-3B-Instruct" ,
63
- messages = [{
64
- "role" : "user" ,
65
- "content" : prompt ,
66
- }],
67
- extra_body = {"guided_json" : json_schema },
68
- )
69
- print (completion .choices [0 ].message .content )
63
+ def guided_json_completion (client : OpenAI , model : str ):
64
+ json_schema = CarDescription .model_json_schema ()
70
65
71
- # Guided decoding by Grammar
72
- simplified_sql_grammar = """
73
- root ::= select_statement
66
+ prompt = ("Generate a JSON with the brand, model and car_type of"
67
+ "the most iconic car from the 90's" )
68
+ completion = client .chat .completions .create (
69
+ model = model ,
70
+ messages = [{
71
+ "role" : "user" ,
72
+ "content" : prompt ,
73
+ }],
74
+ extra_body = {"guided_json" : json_schema },
75
+ )
76
+ return completion .choices [0 ].message .content
74
77
75
- select_statement ::= "SELECT " column " from " table " where " condition
76
78
77
- column ::= "col_1 " | "col_2 "
79
+ # Guided decoding by Grammar
80
+ def guided_grammar_completion (client : OpenAI , model : str ):
81
+ simplified_sql_grammar = """
82
+ root ::= select_statement
78
83
79
- table ::= "table_1 " | "table_2 "
84
+ select_statement ::= "SELECT " column " from " table " where " condition
80
85
81
- condition ::= column "= " number
86
+ column ::= "col_1 " | "col_2 "
82
87
83
- number ::= "1 " | "2 "
84
- """
88
+ table ::= "table_1 " | "table_2 "
85
89
86
- prompt = ("Generate an SQL query to show the 'username' and 'email'"
87
- "from the 'users' table." )
88
- completion = client .chat .completions .create (
89
- model = "Qwen/Qwen2.5-3B-Instruct" ,
90
- messages = [{
91
- "role" : "user" ,
92
- "content" : prompt ,
93
- }],
94
- extra_body = {"guided_grammar" : simplified_sql_grammar },
95
- )
96
- print (completion .choices [0 ].message .content )
90
+ condition ::= column "= " number
97
91
98
- # Extra backend options
99
- prompt = ("Generate an email address for Alan Turing, who works in Enigma."
100
- "End in .com and new line. Example result:"
101
- "alan.turing@enigma.com\n " )
92
+ number ::= "1 " | "2 "
93
+ """
102
94
103
- try :
104
- # The no-fallback option forces vLLM to use xgrammar, so when it fails
105
- # you get a 400 with the reason why
95
+ prompt = ("Generate an SQL query to show the 'username' and 'email'"
96
+ "from the 'users' table." )
106
97
completion = client .chat .completions .create (
107
- model = "Qwen/Qwen2.5-3B-Instruct" ,
98
+ model = model ,
108
99
messages = [{
109
100
"role" : "user" ,
110
101
"content" : prompt ,
111
102
}],
112
- extra_body = {
113
- "guided_regex" : r"\w+@\w+\.com\n" ,
114
- "stop" : ["\n " ],
115
- "guided_decoding_backend" : "xgrammar:no-fallback"
116
- },
103
+ extra_body = {"guided_grammar" : simplified_sql_grammar },
104
+ )
105
+ return completion .choices [0 ].message .content
106
+
107
+
108
+ # Extra backend options
109
+ def extra_backend_options_completion (client : OpenAI , model : str ):
110
+ prompt = ("Generate an email address for Alan Turing, who works in Enigma."
111
+ "End in .com and new line. Example result:"
112
+ "alan.turing@enigma.com\n " )
113
+
114
+ try :
115
+ # The no-fallback option forces vLLM to use xgrammar, so when it fails
116
+ # you get a 400 with the reason why
117
+ completion = client .chat .completions .create (
118
+ model = model ,
119
+ messages = [{
120
+ "role" : "user" ,
121
+ "content" : prompt ,
122
+ }],
123
+ extra_body = {
124
+ "guided_regex" : r"\w+@\w+\.com\n" ,
125
+ "stop" : ["\n " ],
126
+ "guided_decoding_backend" : "xgrammar:no-fallback"
127
+ },
128
+ )
129
+ return completion .choices [0 ].message .content
130
+ except BadRequestError as e :
131
+ print ("This error is expected:" , e )
132
+
133
+
134
+ def main ():
135
+ client : OpenAI = OpenAI (
136
+ base_url = "http://localhost:8000/v1" ,
137
+ api_key = "-" ,
117
138
)
118
- except BadRequestError as e :
119
- print ("This error is expected:" , e )
139
+
140
+ model = "Qwen/Qwen2.5-3B-Instruct"
141
+
142
+ print ("Guided Choice Completion:" )
143
+ print (guided_choice_completion (client , model ))
144
+
145
+ print ("\n Guided Regex Completion:" )
146
+ print (guided_regex_completion (client , model ))
147
+
148
+ print ("\n Guided JSON Completion:" )
149
+ print (guided_json_completion (client , model ))
150
+
151
+ print ("\n Guided Grammar Completion:" )
152
+ print (guided_grammar_completion (client , model ))
153
+
154
+ print ("\n Extra Backend Options Completion:" )
155
+ print (extra_backend_options_completion (client , model ))
156
+
157
+
158
+ if __name__ == "__main__" :
159
+ main ()
0 commit comments