@@ -1808,7 +1808,6 @@ def validate_stats(results,
1808
1808
assert results
1809
1809
assert len (results ) == max_tokens if pytorch_backend else max_tokens + 1
1810
1810
for iter , result in enumerate (results ):
1811
- print (result )
1812
1811
ifbStats = result ["inflightBatchingStats" ]
1813
1812
expected_num_scheduled = 1 if (iter < max_tokens ) else 0
1814
1813
assert ifbStats ["numScheduledRequests" ] == expected_num_scheduled
@@ -1906,13 +1905,11 @@ def llm_get_stats_test_harness(tp_size: int = 1,
1906
1905
1907
1906
1908
1907
@pytest .mark .parametrize ("return_context_logits" , [True , False ])
1909
- @pytest .mark .parametrize ("pytorch_backend" , [True , False ])
1910
1908
@pytest .mark .parametrize ("enable_iter_req_stats" , [True , False ])
1911
- def test_llm_get_stats (return_context_logits , pytorch_backend ,
1912
- enable_iter_req_stats ):
1909
+ def test_llm_get_stats (return_context_logits , enable_iter_req_stats ):
1913
1910
llm_get_stats_test_harness (tp_size = 1 ,
1914
1911
return_context_logits = return_context_logits ,
1915
- pytorch_backend = pytorch_backend ,
1912
+ pytorch_backend = False ,
1916
1913
enable_iter_req_stats = enable_iter_req_stats )
1917
1914
1918
1915
@@ -1977,8 +1974,9 @@ async def task1():
1977
1974
results .append (stats )
1978
1975
1979
1976
assert results
1980
- validate_stats (results , pytorch_backend , max_tokens ,
1981
- enable_iter_req_stats )
1977
+ if not use_overlap :
1978
+ validate_stats (results , pytorch_backend , max_tokens ,
1979
+ enable_iter_req_stats )
1982
1980
1983
1981
async def main ():
1984
1982
for i in range (2 ): # test recurrent usage
@@ -1988,14 +1986,12 @@ async def main():
1988
1986
1989
1987
1990
1988
@pytest .mark .parametrize ("return_context_logits" , [True , False ])
1991
- @pytest .mark .parametrize ("pytorch_backend" , [True , False ])
1992
1989
@pytest .mark .parametrize ("enable_iter_req_stats" , [True , False ])
1993
- def test_llm_get_stats_async (return_context_logits , pytorch_backend ,
1994
- enable_iter_req_stats ):
1990
+ def test_llm_get_stats_async (return_context_logits , enable_iter_req_stats ):
1995
1991
llm_get_stats_async_test_harness (
1996
1992
tp_size = 1 ,
1997
1993
return_context_logits = return_context_logits ,
1998
- pytorch_backend = pytorch_backend ,
1994
+ pytorch_backend = False ,
1999
1995
enable_iter_req_stats = enable_iter_req_stats )
2000
1996
2001
1997
0 commit comments