@@ -190,3 +190,85 @@ macro_rules! reallyhard2 { () => (r"\w+\s+Holmes") }
190
190
191
191
bench_match ! ( reallyhard2_1K, reallyhard2!( ) ,
192
192
get_text( TXT_1K , reallyhard2_suffix( ) ) ) ;
193
+
194
+
195
+ //
196
+ // Benchmarks to justify the short-haystack NFA fallthrough optimization
197
+ // implemented by `read_captures_at` in regex/src/exec.rs. See github issue
198
+ // #348.
199
+ //
200
+ // The procedure used to try to determine the right hardcoded cutoff
201
+ // for the short-haystack optimization in issue #348 is as follows.
202
+ //
203
+ // ```
204
+ // > cd bench
205
+ // > cargo bench --features re-rust short_hay | tee dfa-nfa.res
206
+ // > # modify the `MatchType::Dfa` branch in exec.rs:read_captures_at
207
+ // > # to just execute the nfa
208
+ // > cargo bench --features re-rust short_hay | tee nfa-only.res
209
+ // > cargo benchcmp dfa-nfa.res nfa-only.res
210
+ // ```
211
+ //
212
+ // The expected result is that short inputs will go faster under
213
+ // the nfa-only mode, but at some turnover point the dfa-nfa mode
214
+ // will start to win again. Unfortunately, that is not what happened.
215
+ // Instead there was no noticeable change in the bench results, so
216
+ // I've opted to just do the more conservative anchor optimization.
217
+ //
218
+ bench_captures ! ( short_haystack_1x,
219
+ Regex :: new( r"(bbbb)cccc(bbb)" ) . unwrap( ) , 2 ,
220
+ String :: from( "aaaabbbbccccbbbdddd" ) ) ;
221
+ bench_captures ! ( short_haystack_2x,
222
+ Regex :: new( r"(bbbb)cccc(bbb)" ) . unwrap( ) , 2 ,
223
+ format!( "{}bbbbccccbbb{}" ,
224
+ repeat( "aaaa" ) . take( 2 ) . collect:: <String >( ) ,
225
+ repeat( "dddd" ) . take( 2 ) . collect:: <String >( ) ,
226
+ ) ) ;
227
+ bench_captures ! ( short_haystack_3x,
228
+ Regex :: new( r"(bbbb)cccc(bbb)" ) . unwrap( ) , 2 ,
229
+ format!( "{}bbbbccccbbb{}" ,
230
+ repeat( "aaaa" ) . take( 3 ) . collect:: <String >( ) ,
231
+ repeat( "dddd" ) . take( 3 ) . collect:: <String >( ) ,
232
+ ) ) ;
233
+ bench_captures ! ( short_haystack_4x,
234
+ Regex :: new( r"(bbbb)cccc(bbb)" ) . unwrap( ) , 2 ,
235
+ format!( "{}bbbbccccbbb{}" ,
236
+ repeat( "aaaa" ) . take( 4 ) . collect:: <String >( ) ,
237
+ repeat( "dddd" ) . take( 4 ) . collect:: <String >( ) ,
238
+ ) ) ;
239
+ bench_captures ! ( short_haystack_10x,
240
+ Regex :: new( r"(bbbb)cccc(bbb)" ) . unwrap( ) , 2 ,
241
+ format!( "{}bbbbccccbbb{}" ,
242
+ repeat( "aaaa" ) . take( 10 ) . collect:: <String >( ) ,
243
+ repeat( "dddd" ) . take( 10 ) . collect:: <String >( ) ,
244
+ ) ) ;
245
+ bench_captures ! ( short_haystack_100x,
246
+ Regex :: new( r"(bbbb)cccc(bbb)" ) . unwrap( ) , 2 ,
247
+ format!( "{}bbbbccccbbb{}" ,
248
+ repeat( "aaaa" ) . take( 100 ) . collect:: <String >( ) ,
249
+ repeat( "dddd" ) . take( 100 ) . collect:: <String >( ) ,
250
+ ) ) ;
251
+ bench_captures ! ( short_haystack_1000x,
252
+ Regex :: new( r"(bbbb)cccc(bbb)" ) . unwrap( ) , 2 ,
253
+ format!( "{}bbbbccccbbb{}" ,
254
+ repeat( "aaaa" ) . take( 1000 ) . collect:: <String >( ) ,
255
+ repeat( "dddd" ) . take( 1000 ) . collect:: <String >( ) ,
256
+ ) ) ;
257
+ bench_captures ! ( short_haystack_10000x,
258
+ Regex :: new( r"(bbbb)cccc(bbb)" ) . unwrap( ) , 2 ,
259
+ format!( "{}bbbbccccbbb{}" ,
260
+ repeat( "aaaa" ) . take( 10000 ) . collect:: <String >( ) ,
261
+ repeat( "dddd" ) . take( 10000 ) . collect:: <String >( ) ,
262
+ ) ) ;
263
+ bench_captures ! ( short_haystack_100000x,
264
+ Regex :: new( r"(bbbb)cccc(bbb)" ) . unwrap( ) , 2 ,
265
+ format!( "{}bbbbccccbbb{}" ,
266
+ repeat( "aaaa" ) . take( 100000 ) . collect:: <String >( ) ,
267
+ repeat( "dddd" ) . take( 100000 ) . collect:: <String >( ) ,
268
+ ) ) ;
269
+ bench_captures ! ( short_haystack_1000000x,
270
+ Regex :: new( r"(bbbb)cccc(bbb)" ) . unwrap( ) , 2 ,
271
+ format!( "{}bbbbccccbbb{}" ,
272
+ repeat( "aaaa" ) . take( 1000000 ) . collect:: <String >( ) ,
273
+ repeat( "dddd" ) . take( 1000000 ) . collect:: <String >( ) ,
274
+ ) ) ;
0 commit comments