Skip to content

Commit 4129d94

Browse files
author
reidliu41
committed
[doc] add streamlit integration
Signed-off-by: reidliu41 <reid201711@gmail.com>
1 parent 26bc4bb commit 4129d94

File tree

3 files changed

+202
-0
lines changed

3 files changed

+202
-0
lines changed
Loading

docs/source/deployment/frameworks/index.md

+1
Original file line numberDiff line numberDiff line change
@@ -12,5 +12,6 @@ lws
1212
modal
1313
open-webui
1414
skypilot
15+
streamlit
1516
triton
1617
:::
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,201 @@
1+
(deployment-streamlit)=
2+
3+
# Streamlit
4+
5+
[Streamlit](https://github.com/streamlit/streamlit) lets you transform Python scripts into interactive web apps in minutes, instead of weeks. Build dashboards, generate reports, or create chat apps.
6+
7+
It can be quickly integrated with vLLM as a backend API server, enabling powerful LLM inference via API calls.
8+
9+
## Prerequisites
10+
11+
- Setup vLLM environment
12+
13+
## Deploy
14+
15+
- Start the vLLM server with the supported chat completion model, e.g.
16+
17+
```console
18+
vllm serve qwen/Qwen1.5-0.5B-Chat
19+
```
20+
21+
- Install streamlit and openai:
22+
23+
```console
24+
pip install streamlit openai
25+
```
26+
27+
- Create python file(streamlit-ui-app.py) and copy the below code into it:
28+
29+
```console
30+
import streamlit as st
31+
from openai import OpenAI
32+
from datetime import datetime
33+
import os
34+
35+
# Get command line arguments from environment variables
36+
openai_api_key = os.getenv('VLLM_API_KEY', "EMPTY")
37+
openai_api_base = os.getenv('VLLM_API_BASE', "http://localhost:8000/v1")
38+
39+
# Initialize session states for managing chat sessions
40+
if "sessions" not in st.session_state:
41+
st.session_state.sessions = {}
42+
43+
if "current_session" not in st.session_state:
44+
st.session_state.current_session = None
45+
46+
if "messages" not in st.session_state:
47+
st.session_state.messages = []
48+
49+
if "active_session" not in st.session_state:
50+
st.session_state.active_session = None
51+
52+
# Initialize session state for API base URL
53+
if "api_base_url" not in st.session_state:
54+
st.session_state.api_base_url = openai_api_base
55+
56+
def create_new_chat_session():
57+
"""Create a new chat session with timestamp as ID"""
58+
session_id = datetime.now().strftime("%Y-%m-%d %H:%M:%S")
59+
st.session_state.sessions[session_id] = []
60+
st.session_state.current_session = session_id
61+
st.session_state.active_session = session_id
62+
st.session_state.messages = []
63+
64+
def switch_to_chat_session(session_id):
65+
"""Switch to a different chat session"""
66+
st.session_state.current_session = session_id
67+
st.session_state.active_session = session_id
68+
st.session_state.messages = st.session_state.sessions[session_id]
69+
70+
def get_llm_response(messages, model):
71+
"""Get streaming response from llm
72+
73+
Args:
74+
messages: List of message dictionaries
75+
model: Name of model
76+
77+
Returns:
78+
Streaming response object or error message string
79+
"""
80+
try:
81+
response = client.chat.completions.create(
82+
model=model,
83+
messages=messages,
84+
stream=True
85+
)
86+
return response
87+
except Exception as e:
88+
st.error(f"Error details: {str(e)}")
89+
return f"Error: {str(e)}"
90+
91+
# Sidebar - API Settings first
92+
st.sidebar.title("API Settings")
93+
new_api_base = st.sidebar.text_input("API Base URL:", value=st.session_state.api_base_url)
94+
if new_api_base != st.session_state.api_base_url:
95+
st.session_state.api_base_url = new_api_base
96+
st.rerun()
97+
98+
st.sidebar.divider()
99+
100+
# Sidebar - Session Management
101+
st.sidebar.title("Chat Sessions")
102+
if st.sidebar.button("New Session"):
103+
create_new_chat_session()
104+
105+
# Display all sessions in reverse chronological order
106+
for session_id in sorted(st.session_state.sessions.keys(), reverse=True):
107+
# Mark the active session with a pinned button
108+
if session_id == st.session_state.active_session:
109+
st.sidebar.button(
110+
f"📍 {session_id}",
111+
key=session_id,
112+
type="primary",
113+
on_click=switch_to_chat_session,
114+
args=(session_id,)
115+
)
116+
else:
117+
st.sidebar.button(
118+
f"Session {session_id}",
119+
key=session_id,
120+
on_click=switch_to_chat_session,
121+
args=(session_id,)
122+
)
123+
124+
# Main interface
125+
st.title("vLLM Chat Assistant")
126+
127+
# Initialize OpenAI client with API settings
128+
client = OpenAI(
129+
api_key=openai_api_key,
130+
base_url=st.session_state.api_base_url
131+
)
132+
133+
# Get and display current model id
134+
models = client.models.list()
135+
model = models.data[0].id
136+
st.markdown(f"**Model**: {model}")
137+
138+
# Initialize first session if none exists
139+
if st.session_state.current_session is None:
140+
create_new_chat_session()
141+
st.session_state.active_session = st.session_state.current_session
142+
143+
# Display chat history for current session
144+
for message in st.session_state.messages:
145+
with st.chat_message(message["role"]):
146+
st.write(message["content"])
147+
148+
# Handle user input and generate llm response
149+
if prompt := st.chat_input("Type your message here..."):
150+
# Save user message to session
151+
st.session_state.messages.append({"role": "user", "content": prompt})
152+
st.session_state.sessions[st.session_state.current_session] = st.session_state.messages
153+
154+
# Display user message
155+
with st.chat_message("user"):
156+
st.write(prompt)
157+
158+
# Prepare messages for llm
159+
messages_for_llm = [
160+
{"role": m["role"], "content": m["content"]}
161+
for m in st.session_state.messages
162+
]
163+
164+
# Generate and display llm response
165+
with st.chat_message("assistant"):
166+
message_placeholder = st.empty()
167+
full_response = ""
168+
169+
# Get streaming response from llm
170+
response = get_llm_response(messages_for_llm, model)
171+
if isinstance(response, str):
172+
message_placeholder.markdown(response)
173+
full_response = response
174+
else:
175+
for chunk in response:
176+
if hasattr(chunk.choices[0].delta, "content"):
177+
content = chunk.choices[0].delta.content
178+
if content:
179+
full_response += content
180+
message_placeholder.markdown(full_response + "▌")
181+
182+
message_placeholder.markdown(full_response)
183+
184+
# Save llm response to session history
185+
st.session_state.messages.append({"role": "assistant", "content": full_response})
186+
```
187+
188+
- Start the streamlit web UI and start to chat:
189+
190+
```console
191+
streamlit run streamlit-ui-app.py
192+
193+
# or specify the VLLM_API_BASE or VLLM_API_KEY
194+
VLLM_API_BASE="http://vllm-server-host:vllm-server-port/v1" streamlit run streamlit-ui-app.py
195+
196+
# start with debug mode to view more details
197+
streamlit run streamlit-ui-app.py --logger.level=debug
198+
```
199+
200+
:::{image} /assets/deployment/streamlit-chat.png
201+
:::

0 commit comments

Comments
 (0)