Skip to content

Commit edb5286

Browse files
robertgshaw2-redhatsimon-mo
authored andcommitted
[BugFix] Fix Memory Leak (#17567)
Signed-off-by: rshaw@neuralmagic.com <robertgshaw2@gmail.com>
1 parent ba41cc9 commit edb5286

File tree

2 files changed

+81
-1
lines changed

2 files changed

+81
-1
lines changed

tests/v1/core/test_scheduler.py

+77
Original file line numberDiff line numberDiff line change
@@ -1165,3 +1165,80 @@ def test_kv_connector_handles_preemption():
11651165
# All memory should be freed since nothing is running.
11661166
assert scheduler.kv_cache_manager.block_pool.get_num_free_blocks() \
11671167
== NUM_BLOCKS - 1
1168+
1169+
1170+
def make_output(scheduler: Scheduler):
1171+
return ModelRunnerOutput(
1172+
req_ids=[req.request_id for req in scheduler.running],
1173+
req_id_to_index={
1174+
req.request_id: i
1175+
for i, req in enumerate(scheduler.running)
1176+
},
1177+
sampled_token_ids=[[1000]] * len(scheduler.running),
1178+
spec_token_ids=None,
1179+
logprobs=None,
1180+
prompt_logprobs_dict={},
1181+
)
1182+
1183+
1184+
def assert_scheduler_empty(scheduler: Scheduler):
1185+
"""Confirm the scheduler is "empty" - i.e. no leaks."""
1186+
# Scheduler Metadata.
1187+
assert len(scheduler.requests) == 0
1188+
assert len(scheduler.waiting) == 0
1189+
assert len(scheduler.running) == 0
1190+
assert len(scheduler.finished_req_ids) == 0
1191+
assert len(scheduler._cached_reqs_data) == 0
1192+
1193+
# EncoderCacheManager.
1194+
assert len(scheduler.encoder_cache_manager.freed) == 0
1195+
assert len(scheduler.encoder_cache_manager.cached) == 0
1196+
1197+
# KVCache Manager.
1198+
assert len(scheduler.kv_cache_manager.req_to_blocks) == 0
1199+
assert len(scheduler.kv_cache_manager.req_to_block_hashes) == 0
1200+
assert len(scheduler.kv_cache_manager.num_cached_block) == 0
1201+
num_free_blocks = (
1202+
scheduler.kv_cache_manager.block_pool.free_block_queue.num_free_blocks)
1203+
assert num_free_blocks == (
1204+
scheduler.kv_cache_manager.block_pool.num_gpu_blocks - 1)
1205+
1206+
# NOTE(rob): just the ref count on blocks will be 0. The hash
1207+
# value, etc will remain since we lazily evict for prefix cache.
1208+
for block in scheduler.kv_cache_manager.block_pool.blocks:
1209+
assert block.ref_cnt == 0
1210+
# assert block._block_hash is None
1211+
# assert (
1212+
# len(scheduler.kv_cache_manager.block_pool.cached_block_hash_to_block
1213+
# ) == 0)
1214+
1215+
1216+
def test_memory_leak():
1217+
"""Test that we do not have a memory leak."""
1218+
1219+
scheduler = create_scheduler(enable_prefix_caching=True)
1220+
1221+
NUM_REQUESTS = 5
1222+
NUM_TOKENS = 10
1223+
MAX_TOKENS = 10
1224+
requests = create_requests(num_requests=NUM_REQUESTS,
1225+
num_tokens=NUM_TOKENS,
1226+
max_tokens=MAX_TOKENS)
1227+
1228+
# Add each request.
1229+
for request in requests:
1230+
scheduler.add_request(request)
1231+
scheduler_output = scheduler.schedule()
1232+
model_runner_output = make_output(scheduler)
1233+
scheduler.update_from_output(scheduler_output, model_runner_output)
1234+
1235+
# Iterate until done.
1236+
while True:
1237+
scheduler_output = scheduler.schedule()
1238+
if len(scheduler.running) == 0:
1239+
break
1240+
model_runner_output = make_output(scheduler)
1241+
scheduler.update_from_output(scheduler_output, model_runner_output)
1242+
1243+
# Confirm no memory leak.
1244+
assert_scheduler_empty(scheduler)

vllm/v1/core/sched/scheduler.py

+4-1
Original file line numberDiff line numberDiff line change
@@ -739,7 +739,10 @@ def update_from_output(
739739

740740
# Return the cached request data to the queue so they can be reused.
741741
for req_data in scheduler_output.scheduled_cached_reqs:
742-
self._cached_reqs_data[req_data.req_id].append(req_data)
742+
# NOTE(rob): since we free stopped reqs above, adding stopped reqs
743+
# to _cached_reqs_data will cause a memory leak.
744+
if req_data.req_id not in self.finished_req_ids:
745+
self._cached_reqs_data[req_data.req_id].append(req_data)
743746

744747
self.running = new_running
745748
engine_core_outputs = EngineCoreOutputs(

0 commit comments

Comments
 (0)