Skip to content

Commit 24f1e18

Browse files
reidliu41reidliu41
authored andcommitted
[doc] add streamlit integration (vllm-project#17522)
Signed-off-by: reidliu41 <reid201711@gmail.com> Co-authored-by: reidliu41 <reid201711@gmail.com>
1 parent ee0eb6b commit 24f1e18

File tree

4 files changed

+228
-0
lines changed

4 files changed

+228
-0
lines changed
Loading

docs/source/deployment/frameworks/index.md

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -12,5 +12,6 @@ lws
1212
modal
1313
open-webui
1414
skypilot
15+
streamlit
1516
triton
1617
:::
Lines changed: 42 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,42 @@
1+
(deployment-streamlit)=
2+
3+
# Streamlit
4+
5+
[Streamlit](https://github.com/streamlit/streamlit) lets you transform Python scripts into interactive web apps in minutes, instead of weeks. Build dashboards, generate reports, or create chat apps.
6+
7+
It can be quickly integrated with vLLM as a backend API server, enabling powerful LLM inference via API calls.
8+
9+
## Prerequisites
10+
11+
- Setup vLLM environment
12+
13+
## Deploy
14+
15+
- Start the vLLM server with the supported chat completion model, e.g.
16+
17+
```console
18+
vllm serve qwen/Qwen1.5-0.5B-Chat
19+
```
20+
21+
- Install streamlit and openai:
22+
23+
```console
24+
pip install streamlit openai
25+
```
26+
27+
- Use the script: <gh-file:examples/online_serving/streamlit_openai_chatbot_webserver.py>
28+
29+
- Start the streamlit web UI and start to chat:
30+
31+
```console
32+
streamlit run streamlit_openai_chatbot_webserver.py
33+
34+
# or specify the VLLM_API_BASE or VLLM_API_KEY
35+
VLLM_API_BASE="http://vllm-server-host:vllm-server-port/v1" streamlit run streamlit_openai_chatbot_webserver.py
36+
37+
# start with debug mode to view more details
38+
streamlit run streamlit_openai_chatbot_webserver.py --logger.level=debug
39+
```
40+
41+
:::{image} /assets/deployment/streamlit-chat.png
42+
:::
Lines changed: 185 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,185 @@
1+
# SPDX-License-Identifier: Apache-2.0
2+
"""
3+
vLLM Chat Assistant - A Streamlit Web Interface
4+
5+
A streamlined chat interface that quickly integrates
6+
with vLLM API server.
7+
8+
Features:
9+
- Multiple chat sessions management
10+
- Streaming response display
11+
- Configurable API endpoint
12+
- Real-time chat history
13+
14+
Requirements:
15+
pip install streamlit openai
16+
17+
Usage:
18+
# Start the app with default settings
19+
streamlit run streamlit_openai_chatbot_webserver.py
20+
21+
# Start with custom vLLM API endpoint
22+
VLLM_API_BASE="http://your-server:8000/v1" \
23+
streamlit run streamlit_openai_chatbot_webserver.py
24+
25+
# Enable debug mode
26+
streamlit run streamlit_openai_chatbot_webserver.py \
27+
--logger.level=debug
28+
"""
29+
import os
30+
from datetime import datetime
31+
32+
import streamlit as st
33+
from openai import OpenAI
34+
35+
# Get command line arguments from environment variables
36+
openai_api_key = os.getenv('VLLM_API_KEY', "EMPTY")
37+
openai_api_base = os.getenv('VLLM_API_BASE', "http://localhost:8000/v1")
38+
39+
# Initialize session states for managing chat sessions
40+
if "sessions" not in st.session_state:
41+
st.session_state.sessions = {}
42+
43+
if "current_session" not in st.session_state:
44+
st.session_state.current_session = None
45+
46+
if "messages" not in st.session_state:
47+
st.session_state.messages = []
48+
49+
if "active_session" not in st.session_state:
50+
st.session_state.active_session = None
51+
52+
# Initialize session state for API base URL
53+
if "api_base_url" not in st.session_state:
54+
st.session_state.api_base_url = openai_api_base
55+
56+
57+
def create_new_chat_session():
58+
"""Create a new chat session with timestamp as ID"""
59+
session_id = datetime.now().strftime("%Y-%m-%d %H:%M:%S")
60+
st.session_state.sessions[session_id] = []
61+
st.session_state.current_session = session_id
62+
st.session_state.active_session = session_id
63+
st.session_state.messages = []
64+
65+
66+
def switch_to_chat_session(session_id):
67+
"""Switch to a different chat session"""
68+
st.session_state.current_session = session_id
69+
st.session_state.active_session = session_id
70+
st.session_state.messages = st.session_state.sessions[session_id]
71+
72+
73+
def get_llm_response(messages, model):
74+
"""Get streaming response from llm
75+
76+
Args:
77+
messages: List of message dictionaries
78+
model: Name of model
79+
80+
Returns:
81+
Streaming response object or error message string
82+
"""
83+
try:
84+
response = client.chat.completions.create(model=model,
85+
messages=messages,
86+
stream=True)
87+
return response
88+
except Exception as e:
89+
st.error(f"Error details: {str(e)}")
90+
return f"Error: {str(e)}"
91+
92+
93+
# Sidebar - API Settings first
94+
st.sidebar.title("API Settings")
95+
new_api_base = st.sidebar.text_input("API Base URL:",
96+
value=st.session_state.api_base_url)
97+
if new_api_base != st.session_state.api_base_url:
98+
st.session_state.api_base_url = new_api_base
99+
st.rerun()
100+
101+
st.sidebar.divider()
102+
103+
# Sidebar - Session Management
104+
st.sidebar.title("Chat Sessions")
105+
if st.sidebar.button("New Session"):
106+
create_new_chat_session()
107+
108+
# Display all sessions in reverse chronological order
109+
for session_id in sorted(st.session_state.sessions.keys(), reverse=True):
110+
# Mark the active session with a pinned button
111+
if session_id == st.session_state.active_session:
112+
st.sidebar.button(f"📍 {session_id}",
113+
key=session_id,
114+
type="primary",
115+
on_click=switch_to_chat_session,
116+
args=(session_id, ))
117+
else:
118+
st.sidebar.button(f"Session {session_id}",
119+
key=session_id,
120+
on_click=switch_to_chat_session,
121+
args=(session_id, ))
122+
123+
# Main interface
124+
st.title("vLLM Chat Assistant")
125+
126+
# Initialize OpenAI client with API settings
127+
client = OpenAI(api_key=openai_api_key, base_url=st.session_state.api_base_url)
128+
129+
# Get and display current model id
130+
models = client.models.list()
131+
model = models.data[0].id
132+
st.markdown(f"**Model**: {model}")
133+
134+
# Initialize first session if none exists
135+
if st.session_state.current_session is None:
136+
create_new_chat_session()
137+
st.session_state.active_session = st.session_state.current_session
138+
139+
# Display chat history for current session
140+
for message in st.session_state.messages:
141+
with st.chat_message(message["role"]):
142+
st.write(message["content"])
143+
144+
# Handle user input and generate llm response
145+
if prompt := st.chat_input("Type your message here..."):
146+
# Save user message to session
147+
st.session_state.messages.append({"role": "user", "content": prompt})
148+
st.session_state.sessions[
149+
st.session_state.current_session] = st.session_state.messages
150+
151+
# Display user message
152+
with st.chat_message("user"):
153+
st.write(prompt)
154+
155+
# Prepare messages for llm
156+
messages_for_llm = [{
157+
"role": m["role"],
158+
"content": m["content"]
159+
} for m in st.session_state.messages]
160+
161+
# Generate and display llm response
162+
with st.chat_message("assistant"):
163+
message_placeholder = st.empty()
164+
full_response = ""
165+
166+
# Get streaming response from llm
167+
response = get_llm_response(messages_for_llm, model)
168+
if isinstance(response, str):
169+
message_placeholder.markdown(response)
170+
full_response = response
171+
else:
172+
for chunk in response:
173+
if hasattr(chunk.choices[0].delta, "content"):
174+
content = chunk.choices[0].delta.content
175+
if content:
176+
full_response += content
177+
message_placeholder.markdown(full_response + "▌")
178+
179+
message_placeholder.markdown(full_response)
180+
181+
# Save llm response to session history
182+
st.session_state.messages.append({
183+
"role": "assistant",
184+
"content": full_response
185+
})

0 commit comments

Comments
 (0)