File tree 2 files changed +9
-5
lines changed
examples/offline_inference
2 files changed +9
-5
lines changed Original file line number Diff line number Diff line change @@ -118,8 +118,8 @@ def main():
118
118
acceptance_counts [step ] += count
119
119
120
120
print ("-" * 50 )
121
- print (f"mean acceptance length: \
122
- { sum (acceptance_counts ) / acceptance_counts [0 ]:.2f} " )
121
+ print (f"mean acceptance length (including bonus tokens) : \
122
+ { 1 + ( sum (acceptance_counts ) / acceptance_counts [0 ]) :.2f} " )
123
123
print ("-" * 50 )
124
124
125
125
# print acceptance at each token position
Original file line number Diff line number Diff line change @@ -73,7 +73,9 @@ def log(self, log_fn=logger.info):
73
73
74
74
draft_acceptance_rate = (num_accepted_tokens / num_draft_tokens *
75
75
100 if num_draft_tokens > 0 else float ("nan" ))
76
- mean_acceptance_length = (num_accepted_tokens / num_drafts )
76
+
77
+ # Conventionally, mean acceptance length includes the bonus token
78
+ mean_acceptance_length = 1 + (num_accepted_tokens / num_drafts )
77
79
78
80
pos_matrix = np .array (self .accepted_tokens_per_pos_lists )
79
81
acceptance_rates = np .sum (pos_matrix , axis = 0 ) / num_drafts
@@ -103,10 +105,12 @@ class SpecDecodingProm:
103
105
rate(vllm:spec_decode_num_accepted_tokens_total[$interval]) /
104
106
rate(vllm:spec_decode_num_draft_tokens_total[$interval])
105
107
106
- The mean acceptance length can be calculated using:
108
+ The mean acceptance length (conventionally including bonus tokens)
109
+ can be calculated using:
107
110
111
+ 1 + (
108
112
rate(vllm:spec_decode_num_accepted_tokens_total[$interval]) /
109
- rate(vllm:spec_decode_num_drafts[$interval])
113
+ rate(vllm:spec_decode_num_drafts[$interval]))
110
114
111
115
A per-position acceptance rate vector can be computed using
112
116
You can’t perform that action at this time.
0 commit comments