@@ -4,6 +4,7 @@ import type {
4
4
EdgeAssertion ,
5
5
Element ,
6
6
LookaroundAssertion ,
7
+ Node ,
7
8
WordBoundaryAssertion ,
8
9
} from "@eslint-community/regexpp/ast"
9
10
import type { RegExpContext } from "../utils"
@@ -23,10 +24,106 @@ import {
23
24
isPotentiallyEmpty ,
24
25
isZeroLength ,
25
26
FirstConsumedChars ,
27
+ invertMatchingDirection ,
26
28
} from "regexp-ast-analysis"
27
29
import { mention } from "../utils/mention"
28
30
import { assertNever } from "../utils/util"
29
31
32
+ function containsAssertion ( n : Node ) : boolean {
33
+ return hasSomeDescendant ( n , ( d ) => d . type === "Assertion" )
34
+ }
35
+
36
+ /**
37
+ * Returns whether the given lookaround asserts exactly one character in the given direction.
38
+ */
39
+ function isSingleCharacterAssertion (
40
+ assertion : Assertion ,
41
+ direction : MatchingDirection ,
42
+ flags : ReadonlyFlags ,
43
+ ) : boolean {
44
+ switch ( assertion . kind ) {
45
+ case "word" :
46
+ // \b and \B assert one character in BOTH directions
47
+ return false
48
+
49
+ case "start" :
50
+ return direction === "rtl"
51
+ case "end" :
52
+ return direction === "ltr"
53
+
54
+ default :
55
+ break
56
+ }
57
+
58
+ if ( getMatchingDirectionFromAssertionKind ( assertion . kind ) !== direction ) {
59
+ return false
60
+ }
61
+
62
+ return assertion . alternatives . every ( ( alt ) => {
63
+ if ( ! containsAssertion ( alt ) ) {
64
+ // if we don't contains assertions, then we can only need to check
65
+ // the length range of the consumed characters
66
+ const range = getLengthRange ( alt , flags )
67
+ return range . min === 1 && range . max === 1
68
+ }
69
+
70
+ // now it gets tricky
71
+ // e.g. (?=(?=[a-z])\w(?<=[a-g])) is a single character assertion
72
+
73
+ let consumed = false
74
+ let asserted = false
75
+ const elements =
76
+ direction === "ltr" ? alt . elements : [ ...alt . elements ] . reverse ( )
77
+ for ( const e of elements ) {
78
+ if ( ! consumed ) {
79
+ // before we consumed our first (and only) character, we can
80
+ // have single character assertions
81
+
82
+ if (
83
+ e . type === "Assertion" &&
84
+ isSingleCharacterAssertion ( e , direction , flags )
85
+ ) {
86
+ asserted = true
87
+ continue
88
+ }
89
+
90
+ if ( containsAssertion ( e ) ) {
91
+ // too complex to reason about, so we just give up
92
+ return false
93
+ }
94
+
95
+ const range = getLengthRange ( e , flags )
96
+ if ( range . max === 0 ) {
97
+ // we haven't consumed anything, so onto the next one
98
+ continue
99
+ } else if ( range . min === 1 && range . max === 1 ) {
100
+ // finally, a character
101
+ consumed = true
102
+ } else {
103
+ // it's not exactly a single character
104
+ return false
105
+ }
106
+ } else {
107
+ // since we already consumed our first (and only) character, we
108
+ // can at most have assertions in the direction of the
109
+ // first character
110
+
111
+ const otherDir = invertMatchingDirection ( direction )
112
+ if (
113
+ e . type === "Assertion" &&
114
+ isSingleCharacterAssertion ( e , otherDir , flags )
115
+ ) {
116
+ continue
117
+ }
118
+
119
+ return false
120
+ }
121
+ }
122
+
123
+ return consumed || asserted
124
+ } )
125
+ }
126
+
30
127
/**
31
128
* Combines 2 look chars such that the result is equivalent to 2 adjacent
32
129
* assertions `(?=a)(?=b)`.
@@ -356,40 +453,31 @@ export default createRule("no-useless-assertions", {
356
453
)
357
454
}
358
455
359
- if ( after . edge ) {
360
- return
361
- }
362
-
363
- // accept is harder because that can't generally be decided by the first character
364
-
365
- // if this contains another assertion then that might reject. It's out of our control
456
+ // We can only decide the accept case for exact single-character assertions.
457
+ // We want the character after the assertion to be a subset of the asserted characters. For this to be
458
+ // correct, the set of assertion characters needs to be exact. We also have to consider edges. Edges
459
+ // can be thought of as a special character, so the same subset requirement applies.
460
+ const edgeSubset = firstOf . edge || ! after . edge
366
461
if (
367
- ! hasSomeDescendant (
462
+ firstOf . exact &&
463
+ edgeSubset &&
464
+ after . char . isSubsetOf ( firstOf . char ) &&
465
+ isSingleCharacterAssertion (
368
466
assertion ,
369
- ( d ) => d !== assertion && d . type === "Assertion" ,
467
+ getMatchingDirectionFromAssertionKind ( assertion . kind ) ,
468
+ flags ,
370
469
)
371
470
) {
372
- const range = getLengthRange ( assertion . alternatives , flags )
373
- // we only check the first character, so it's only correct if the assertion requires only one
374
- // character
375
- if ( range . max === 1 ) {
376
- // require exactness
377
- if (
378
- firstOf . exact &&
379
- after . char . isSubsetOf ( firstOf . char )
380
- ) {
381
- report (
382
- assertion ,
383
- assertion . negate
384
- ? "alwaysForNegativeLookaround"
385
- : "alwaysForLookaround" ,
386
- {
387
- kind : assertion . kind ,
388
- acceptOrReject : accept ,
389
- } ,
390
- )
391
- }
392
- }
471
+ report (
472
+ assertion ,
473
+ assertion . negate
474
+ ? "alwaysForNegativeLookaround"
475
+ : "alwaysForLookaround" ,
476
+ {
477
+ kind : assertion . kind ,
478
+ acceptOrReject : accept ,
479
+ } ,
480
+ )
393
481
}
394
482
}
395
483
0 commit comments