Skip to content

Commit bd5f2b4

Browse files
committed
syntax: add is_literal and is_alternation_literal
This adds a couple new methods on HIR expressions for determining whether they are literals or not. This is useful for determining whether to apply optimizations such as Aho-Corasick without re-analyzing the syntax.
1 parent 53395bf commit bd5f2b4

File tree

2 files changed

+100
-0
lines changed

2 files changed

+100
-0
lines changed

regex-syntax/src/hir/mod.rs

+55
Original file line numberDiff line numberDiff line change
@@ -227,6 +227,8 @@ impl Hir {
227227
info.set_any_anchored_start(false);
228228
info.set_any_anchored_end(false);
229229
info.set_match_empty(true);
230+
info.set_literal(true);
231+
info.set_alternation_literal(true);
230232
Hir {
231233
kind: HirKind::Empty,
232234
info: info,
@@ -253,6 +255,8 @@ impl Hir {
253255
info.set_any_anchored_start(false);
254256
info.set_any_anchored_end(false);
255257
info.set_match_empty(false);
258+
info.set_literal(true);
259+
info.set_alternation_literal(true);
256260
Hir {
257261
kind: HirKind::Literal(lit),
258262
info: info,
@@ -271,6 +275,8 @@ impl Hir {
271275
info.set_any_anchored_start(false);
272276
info.set_any_anchored_end(false);
273277
info.set_match_empty(false);
278+
info.set_literal(false);
279+
info.set_alternation_literal(false);
274280
Hir {
275281
kind: HirKind::Class(class),
276282
info: info,
@@ -289,6 +295,8 @@ impl Hir {
289295
info.set_any_anchored_start(false);
290296
info.set_any_anchored_end(false);
291297
info.set_match_empty(true);
298+
info.set_literal(false);
299+
info.set_alternation_literal(false);
292300
if let Anchor::StartText = anchor {
293301
info.set_anchored_start(true);
294302
info.set_line_anchored_start(true);
@@ -322,6 +330,8 @@ impl Hir {
322330
info.set_line_anchored_end(false);
323331
info.set_any_anchored_start(false);
324332
info.set_any_anchored_end(false);
333+
info.set_literal(false);
334+
info.set_alternation_literal(false);
325335
// A negated word boundary matches the empty string, but a normal
326336
// word boundary does not!
327337
info.set_match_empty(word_boundary.is_negated());
@@ -357,6 +367,8 @@ impl Hir {
357367
info.set_any_anchored_start(rep.hir.is_any_anchored_start());
358368
info.set_any_anchored_end(rep.hir.is_any_anchored_end());
359369
info.set_match_empty(rep.is_match_empty() || rep.hir.is_match_empty());
370+
info.set_literal(false);
371+
info.set_alternation_literal(false);
360372
Hir {
361373
kind: HirKind::Repetition(rep),
362374
info: info,
@@ -375,6 +387,8 @@ impl Hir {
375387
info.set_any_anchored_start(group.hir.is_any_anchored_start());
376388
info.set_any_anchored_end(group.hir.is_any_anchored_end());
377389
info.set_match_empty(group.hir.is_match_empty());
390+
info.set_literal(false);
391+
info.set_alternation_literal(false);
378392
Hir {
379393
kind: HirKind::Group(group),
380394
info: info,
@@ -395,6 +409,8 @@ impl Hir {
395409
info.set_any_anchored_start(false);
396410
info.set_any_anchored_end(false);
397411
info.set_match_empty(true);
412+
info.set_literal(true);
413+
info.set_alternation_literal(true);
398414

399415
// Some attributes require analyzing all sub-expressions.
400416
for e in &exprs {
@@ -416,6 +432,14 @@ impl Hir {
416432

417433
let x = info.is_match_empty() && e.is_match_empty();
418434
info.set_match_empty(x);
435+
436+
let x = info.is_literal() && e.is_literal();
437+
info.set_literal(x);
438+
439+
let x =
440+
info.is_alternation_literal()
441+
&& e.is_alternation_literal();
442+
info.set_alternation_literal(x);
419443
}
420444
// Anchored attributes require something slightly more
421445
// sophisticated. Normally, WLOG, to determine whether an
@@ -488,6 +512,8 @@ impl Hir {
488512
info.set_any_anchored_start(false);
489513
info.set_any_anchored_end(false);
490514
info.set_match_empty(false);
515+
info.set_literal(false);
516+
info.set_alternation_literal(true);
491517

492518
// Some attributes require analyzing all sub-expressions.
493519
for e in &exprs {
@@ -523,6 +549,11 @@ impl Hir {
523549

524550
let x = info.is_match_empty() || e.is_match_empty();
525551
info.set_match_empty(x);
552+
553+
let x =
554+
info.is_alternation_literal()
555+
&& e.is_literal();
556+
info.set_alternation_literal(x);
526557
}
527558
Hir {
528559
kind: HirKind::Alternation(exprs),
@@ -655,6 +686,28 @@ impl Hir {
655686
pub fn is_match_empty(&self) -> bool {
656687
self.info.is_match_empty()
657688
}
689+
690+
/// Return true if and only if this HIR is a simple literal. This is only
691+
/// true when this HIR expression is either itself a `Literal` or a
692+
/// concatenation of only `Literal`s.
693+
///
694+
/// For example, `f` and `foo` are literals, but `f+`, `(foo)`, `foo()`
695+
/// are not (even though that contain sub-expressions that are literals).
696+
pub fn is_literal(&self) -> bool {
697+
self.info.is_literal()
698+
}
699+
700+
/// Return true if and only if this HIR is either a simple literal or an
701+
/// alternation of simple literals. This is only
702+
/// true when this HIR expression is either itself a `Literal` or a
703+
/// concatenation of only `Literal`s or an alternation of only `Literal`s.
704+
///
705+
/// For example, `f`, `foo`, `a|b|c`, and `foo|bar|baz` are alternaiton
706+
/// literals, but `f+`, `(foo)`, `foo()`
707+
/// are not (even though that contain sub-expressions that are literals).
708+
pub fn is_alternation_literal(&self) -> bool {
709+
self.info.is_alternation_literal()
710+
}
658711
}
659712

660713
impl HirKind {
@@ -1415,6 +1468,8 @@ impl HirInfo {
14151468
define_bool!(6, is_any_anchored_start, set_any_anchored_start);
14161469
define_bool!(7, is_any_anchored_end, set_any_anchored_end);
14171470
define_bool!(8, is_match_empty, set_match_empty);
1471+
define_bool!(9, is_literal, set_literal);
1472+
define_bool!(10, is_alternation_literal, set_alternation_literal);
14181473
}
14191474

14201475
#[cfg(test)]

regex-syntax/src/hir/translate.rs

+45
Original file line numberDiff line numberDiff line change
@@ -2589,4 +2589,49 @@ mod tests {
25892589
assert!(!t(r"\b").is_match_empty());
25902590
assert!(!t(r"(?-u)\b").is_match_empty());
25912591
}
2592+
2593+
#[test]
2594+
fn analysis_is_literal() {
2595+
// Positive examples.
2596+
assert!(t(r"").is_literal());
2597+
assert!(t(r"a").is_literal());
2598+
assert!(t(r"ab").is_literal());
2599+
assert!(t(r"abc").is_literal());
2600+
assert!(t(r"(?m)abc").is_literal());
2601+
2602+
// Negative examples.
2603+
assert!(!t(r"^").is_literal());
2604+
assert!(!t(r"a|b").is_literal());
2605+
assert!(!t(r"(a)").is_literal());
2606+
assert!(!t(r"a+").is_literal());
2607+
assert!(!t(r"foo(a)").is_literal());
2608+
assert!(!t(r"(a)foo").is_literal());
2609+
assert!(!t(r"[a]").is_literal());
2610+
}
2611+
2612+
#[test]
2613+
fn analysis_is_alternation_literal() {
2614+
// Positive examples.
2615+
assert!(t(r"").is_alternation_literal());
2616+
assert!(t(r"a").is_alternation_literal());
2617+
assert!(t(r"ab").is_alternation_literal());
2618+
assert!(t(r"abc").is_alternation_literal());
2619+
assert!(t(r"(?m)abc").is_alternation_literal());
2620+
assert!(t(r"a|b").is_alternation_literal());
2621+
assert!(t(r"a|b|c").is_alternation_literal());
2622+
assert!(t(r"foo|bar").is_alternation_literal());
2623+
assert!(t(r"foo|bar|baz").is_alternation_literal());
2624+
2625+
// Negative examples.
2626+
assert!(!t(r"^").is_alternation_literal());
2627+
assert!(!t(r"(a)").is_alternation_literal());
2628+
assert!(!t(r"a+").is_alternation_literal());
2629+
assert!(!t(r"foo(a)").is_alternation_literal());
2630+
assert!(!t(r"(a)foo").is_alternation_literal());
2631+
assert!(!t(r"[a]").is_alternation_literal());
2632+
assert!(!t(r"[a]|b").is_alternation_literal());
2633+
assert!(!t(r"a|[b]").is_alternation_literal());
2634+
assert!(!t(r"(a)|b").is_alternation_literal());
2635+
assert!(!t(r"a|(b)").is_alternation_literal());
2636+
}
25922637
}

0 commit comments

Comments
 (0)