Skip to content

Commit 7eefc5d

Browse files
committed
automata/meta: revert broadening of reverse suffix optimization
This reverts commit 8a8d599 and includes a regression test, as well as a tweak to a log message. Essentially, the broadening was improper. We have to be careful when dealing with suffixes as opposed to prefixes. Namely, my logic previously was that the broadening was okay because we were already doing it for the reverse inner optimization. But the reverse inner optimization works with prefixes, not suffixes. So the comparison wasn't quite correct. This goes back to only applying the reverse suffix optimization when there is a non-empty single common suffix. Fixes #1110 Ref astral-sh/ruff#7980
1 parent e7bd19d commit 7eefc5d

File tree

2 files changed

+41
-13
lines changed

2 files changed

+41
-13
lines changed

regex-automata/src/meta/strategy.rs

+26-13
Original file line numberDiff line numberDiff line change
@@ -1167,21 +1167,34 @@ impl ReverseSuffix {
11671167
return Err(core);
11681168
}
11691169
let kind = core.info.config().get_match_kind();
1170-
let suffixseq = crate::util::prefilter::suffixes(kind, hirs);
1171-
let Some(suffixes) = suffixseq.literals() else {
1172-
debug!(
1173-
"skipping reverse suffix optimization because \
1174-
the extract suffix sequence is not finite",
1175-
);
1176-
return Err(core);
1170+
let suffixes = crate::util::prefilter::suffixes(kind, hirs);
1171+
let lcs = match suffixes.longest_common_suffix() {
1172+
None => {
1173+
debug!(
1174+
"skipping reverse suffix optimization because \
1175+
a longest common suffix could not be found",
1176+
);
1177+
return Err(core);
1178+
}
1179+
Some(lcs) if lcs.is_empty() => {
1180+
debug!(
1181+
"skipping reverse suffix optimization because \
1182+
the longest common suffix is the empty string",
1183+
);
1184+
return Err(core);
1185+
}
1186+
Some(lcs) => lcs,
11771187
};
1178-
let Some(pre) = Prefilter::new(kind, suffixes) else {
1179-
debug!(
1180-
"skipping reverse suffix optimization because \
1188+
let pre = match Prefilter::new(kind, &[lcs]) {
1189+
Some(pre) => pre,
1190+
None => {
1191+
debug!(
1192+
"skipping reverse suffix optimization because \
11811193
a prefilter could not be constructed from the \
11821194
longest common suffix",
1183-
);
1184-
return Err(core);
1195+
);
1196+
return Err(core);
1197+
}
11851198
};
11861199
if !pre.is_fast() {
11871200
debug!(
@@ -1268,7 +1281,7 @@ impl ReverseSuffix {
12681281
e.try_search_half_rev_limited(&input, min_start)
12691282
} else if let Some(e) = self.core.hybrid.get(&input) {
12701283
trace!(
1271-
"using lazy DFA for reverse inner search at {:?}, \
1284+
"using lazy DFA for reverse suffix search at {:?}, \
12721285
but will be stopped at {} to avoid quadratic behavior",
12731286
input.get_span(),
12741287
min_start,

testdata/regression.toml

+15
Original file line numberDiff line numberDiff line change
@@ -813,3 +813,18 @@ name = "hir-optimization-out-of-order-class"
813813
regex = '^[[:alnum:]./-]+$'
814814
haystack = "a-b"
815815
matches = [[0, 3]]
816+
817+
# This is a regression test for an improper reverse suffix optimization. This
818+
# occurred when I "broadened" the applicability of the optimization to include
819+
# multiple possible literal suffixes instead of only sticking to a non-empty
820+
# longest common suffix. It turns out that, at least given how the reverse
821+
# suffix optimization works, we need to stick to the longest common suffix for
822+
# now.
823+
#
824+
# See: https://github.com/rust-lang/regex/issues/1110
825+
# See also: https://github.com/astral-sh/ruff/pull/7980
826+
[[test]]
827+
name = 'improper-reverse-suffix-optimization'
828+
regex = '(\\N\{[^}]+})|([{}])'
829+
haystack = 'hiya \N{snowman} bye'
830+
matches = [[[5, 16], [5, 16], []]]

0 commit comments

Comments
 (0)