@@ -50,7 +50,6 @@ def test_incremental_detokenization(request_output_kind: RequestOutputKind,
50
50
# Make N requests.
51
51
requests = [
52
52
EngineCoreRequest (request_id = f"request-{ idx } " ,
53
- prompt = prompt ,
54
53
prompt_token_ids = prompt_tokens ,
55
54
arrival_time = 0 ,
56
55
mm_inputs = None ,
@@ -64,14 +63,13 @@ def test_incremental_detokenization(request_output_kind: RequestOutputKind,
64
63
output_kind = request_output_kind ,
65
64
stop = [],
66
65
include_stop_str_in_output = False ,
67
- )) for idx , (prompt , prompt_tokens ) in enumerate (
68
- zip (dummy_test_vectors .prompt_strings ,
69
- dummy_test_vectors .prompt_tokens ))
66
+ ))
67
+ for idx , prompt_tokens in enumerate (dummy_test_vectors .prompt_tokens )
70
68
]
71
69
72
70
# Add requests to the detokenizer.
73
- for request in requests :
74
- output_processor .add_request (request )
71
+ for request , prompt in zip ( requests , dummy_test_vectors . prompt_strings ) :
72
+ output_processor .add_request (request , prompt )
75
73
76
74
gen_strings = {}
77
75
gen_tokens = {}
@@ -398,7 +396,6 @@ def test_logprobs_processor(request_output_kind: RequestOutputKind,
398
396
]
399
397
requests = [
400
398
EngineCoreRequest (request_id = request_id_list [idx ],
401
- prompt = prompt ,
402
399
prompt_token_ids = prompt_tokens ,
403
400
arrival_time = 0 ,
404
401
mm_inputs = None ,
@@ -414,14 +411,13 @@ def test_logprobs_processor(request_output_kind: RequestOutputKind,
414
411
include_stop_str_in_output = False ,
415
412
logprobs = num_sample_logprobs ,
416
413
prompt_logprobs = num_prompt_logprobs ,
417
- )) for idx , (prompt , prompt_tokens ) in enumerate (
418
- zip (dummy_test_vectors .prompt_strings ,
419
- dummy_test_vectors .prompt_tokens ))
414
+ ))
415
+ for idx , prompt_tokens in enumerate (dummy_test_vectors .prompt_tokens )
420
416
]
421
417
422
418
# Add requests to the detokenizer.
423
- for request in requests :
424
- output_processor .add_request (request )
419
+ for request , prompt in zip ( requests , dummy_test_vectors . prompt_strings ) :
420
+ output_processor .add_request (request , prompt )
425
421
426
422
gen_tokens = {}
427
423
gen_logprobs = {}
@@ -562,7 +558,6 @@ def test_stop_token(include_stop_str_in_output: bool,
562
558
request_id = "request-0"
563
559
request = EngineCoreRequest (
564
560
request_id = request_id ,
565
- prompt = prompt_string ,
566
561
prompt_token_ids = prompt_tokens ,
567
562
arrival_time = 0 ,
568
563
mm_inputs = None ,
@@ -583,7 +578,7 @@ def test_stop_token(include_stop_str_in_output: bool,
583
578
))
584
579
585
580
# Add request to the detokenizer.
586
- output_processor .add_request (request )
581
+ output_processor .add_request (request , prompt_string )
587
582
588
583
# Loop over engine core steps; run output processor
589
584
gen_string = ""
@@ -659,7 +654,6 @@ def test_stop_string(include_stop_str_in_output: bool,
659
654
requests = [
660
655
EngineCoreRequest (
661
656
request_id = request_id_list [idx ],
662
- prompt = prompt ,
663
657
prompt_token_ids = prompt_tokens ,
664
658
arrival_time = 0 ,
665
659
mm_inputs = None ,
@@ -675,14 +669,13 @@ def test_stop_string(include_stop_str_in_output: bool,
675
669
include_stop_str_in_output = include_stop_str_in_output ,
676
670
logprobs = num_sample_logprobs ,
677
671
prompt_logprobs = None ,
678
- )) for idx , (prompt , prompt_tokens ) in enumerate (
679
- zip (dummy_test_vectors .prompt_strings ,
680
- dummy_test_vectors .prompt_tokens ))
672
+ ))
673
+ for idx , prompt_tokens in enumerate (dummy_test_vectors .prompt_tokens )
681
674
]
682
675
683
676
# Add requests to the detokenizer.
684
- for request in requests :
685
- output_processor .add_request (request )
677
+ for request , prompt in zip ( requests , dummy_test_vectors . prompt_strings ) :
678
+ output_processor .add_request (request , prompt )
686
679
687
680
gen_strings = {}
688
681
gen_tokens = {}
@@ -774,7 +767,6 @@ def test_iteration_stats(dummy_test_vectors):
774
767
requests = [
775
768
EngineCoreRequest (
776
769
request_id = f"request-{ idx } " ,
777
- prompt = prompt ,
778
770
prompt_token_ids = prompt_tokens ,
779
771
arrival_time = 0 ,
780
772
mm_inputs = None ,
@@ -783,15 +775,13 @@ def test_iteration_stats(dummy_test_vectors):
783
775
eos_token_id = None ,
784
776
lora_request = None ,
785
777
sampling_params = SamplingParams (),
786
- ) for idx , (prompt , prompt_tokens ) in enumerate (
787
- zip (dummy_test_vectors .prompt_strings ,
788
- dummy_test_vectors .prompt_tokens ))
778
+ ) for idx , prompt_tokens in enumerate (dummy_test_vectors .prompt_tokens )
789
779
]
790
780
791
781
# Add all requests except one to the OutputProcessor.
792
782
num_active = len (dummy_test_vectors .generation_tokens ) - 1
793
783
for request in requests [:num_active ]:
794
- output_processor .add_request (request )
784
+ output_processor .add_request (request , None )
795
785
inactive_request = requests [num_active ]
796
786
797
787
# First iteration has 2 prefills.
@@ -817,7 +807,7 @@ def test_iteration_stats(dummy_test_vectors):
817
807
assert iteration_stats .num_generation_tokens == num_active
818
808
819
809
# Add a new request - prefill and 2 decodes in this step.
820
- output_processor .add_request (inactive_request )
810
+ output_processor .add_request (inactive_request , None )
821
811
num_active += 1
822
812
outputs = engine_core .get_outputs ()[:num_active ]
823
813
iteration_stats = IterationStats ()
0 commit comments