Skip to content

Commit 882beb3

Browse files
Improve detection for regexp/no-useless-assertion (#663)
* Improve detection for `regexp/no-useless-assertion` * Create odd-oranges-matter.md
1 parent 3956097 commit 882beb3

File tree

3 files changed

+144
-30
lines changed

3 files changed

+144
-30
lines changed

.changeset/odd-oranges-matter.md

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,5 @@
1+
---
2+
"eslint-plugin-regexp": minor
3+
---
4+
5+
Improve detection of useless assertions for `regexp/no-useless-assertion`

lib/rules/no-useless-assertions.ts

Lines changed: 118 additions & 30 deletions
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,7 @@ import type {
44
EdgeAssertion,
55
Element,
66
LookaroundAssertion,
7+
Node,
78
WordBoundaryAssertion,
89
} from "@eslint-community/regexpp/ast"
910
import type { RegExpContext } from "../utils"
@@ -23,10 +24,106 @@ import {
2324
isPotentiallyEmpty,
2425
isZeroLength,
2526
FirstConsumedChars,
27+
invertMatchingDirection,
2628
} from "regexp-ast-analysis"
2729
import { mention } from "../utils/mention"
2830
import { assertNever } from "../utils/util"
2931

32+
function containsAssertion(n: Node): boolean {
33+
return hasSomeDescendant(n, (d) => d.type === "Assertion")
34+
}
35+
36+
/**
37+
* Returns whether the given lookaround asserts exactly one character in the given direction.
38+
*/
39+
function isSingleCharacterAssertion(
40+
assertion: Assertion,
41+
direction: MatchingDirection,
42+
flags: ReadonlyFlags,
43+
): boolean {
44+
switch (assertion.kind) {
45+
case "word":
46+
// \b and \B assert one character in BOTH directions
47+
return false
48+
49+
case "start":
50+
return direction === "rtl"
51+
case "end":
52+
return direction === "ltr"
53+
54+
default:
55+
break
56+
}
57+
58+
if (getMatchingDirectionFromAssertionKind(assertion.kind) !== direction) {
59+
return false
60+
}
61+
62+
return assertion.alternatives.every((alt) => {
63+
if (!containsAssertion(alt)) {
64+
// if we don't contains assertions, then we can only need to check
65+
// the length range of the consumed characters
66+
const range = getLengthRange(alt, flags)
67+
return range.min === 1 && range.max === 1
68+
}
69+
70+
// now it gets tricky
71+
// e.g. (?=(?=[a-z])\w(?<=[a-g])) is a single character assertion
72+
73+
let consumed = false
74+
let asserted = false
75+
const elements =
76+
direction === "ltr" ? alt.elements : [...alt.elements].reverse()
77+
for (const e of elements) {
78+
if (!consumed) {
79+
// before we consumed our first (and only) character, we can
80+
// have single character assertions
81+
82+
if (
83+
e.type === "Assertion" &&
84+
isSingleCharacterAssertion(e, direction, flags)
85+
) {
86+
asserted = true
87+
continue
88+
}
89+
90+
if (containsAssertion(e)) {
91+
// too complex to reason about, so we just give up
92+
return false
93+
}
94+
95+
const range = getLengthRange(e, flags)
96+
if (range.max === 0) {
97+
// we haven't consumed anything, so onto the next one
98+
continue
99+
} else if (range.min === 1 && range.max === 1) {
100+
// finally, a character
101+
consumed = true
102+
} else {
103+
// it's not exactly a single character
104+
return false
105+
}
106+
} else {
107+
// since we already consumed our first (and only) character, we
108+
// can at most have assertions in the direction of the
109+
// first character
110+
111+
const otherDir = invertMatchingDirection(direction)
112+
if (
113+
e.type === "Assertion" &&
114+
isSingleCharacterAssertion(e, otherDir, flags)
115+
) {
116+
continue
117+
}
118+
119+
return false
120+
}
121+
}
122+
123+
return consumed || asserted
124+
})
125+
}
126+
30127
/**
31128
* Combines 2 look chars such that the result is equivalent to 2 adjacent
32129
* assertions `(?=a)(?=b)`.
@@ -356,40 +453,31 @@ export default createRule("no-useless-assertions", {
356453
)
357454
}
358455

359-
if (after.edge) {
360-
return
361-
}
362-
363-
// accept is harder because that can't generally be decided by the first character
364-
365-
// if this contains another assertion then that might reject. It's out of our control
456+
// We can only decide the accept case for exact single-character assertions.
457+
// We want the character after the assertion to be a subset of the asserted characters. For this to be
458+
// correct, the set of assertion characters needs to be exact. We also have to consider edges. Edges
459+
// can be thought of as a special character, so the same subset requirement applies.
460+
const edgeSubset = firstOf.edge || !after.edge
366461
if (
367-
!hasSomeDescendant(
462+
firstOf.exact &&
463+
edgeSubset &&
464+
after.char.isSubsetOf(firstOf.char) &&
465+
isSingleCharacterAssertion(
368466
assertion,
369-
(d) => d !== assertion && d.type === "Assertion",
467+
getMatchingDirectionFromAssertionKind(assertion.kind),
468+
flags,
370469
)
371470
) {
372-
const range = getLengthRange(assertion.alternatives, flags)
373-
// we only check the first character, so it's only correct if the assertion requires only one
374-
// character
375-
if (range.max === 1) {
376-
// require exactness
377-
if (
378-
firstOf.exact &&
379-
after.char.isSubsetOf(firstOf.char)
380-
) {
381-
report(
382-
assertion,
383-
assertion.negate
384-
? "alwaysForNegativeLookaround"
385-
: "alwaysForLookaround",
386-
{
387-
kind: assertion.kind,
388-
acceptOrReject: accept,
389-
},
390-
)
391-
}
392-
}
471+
report(
472+
assertion,
473+
assertion.negate
474+
? "alwaysForNegativeLookaround"
475+
: "alwaysForLookaround",
476+
{
477+
kind: assertion.kind,
478+
acceptOrReject: accept,
479+
},
480+
)
393481
}
394482
}
395483

tests/lib/rules/no-useless-assertions.ts

Lines changed: 21 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -433,5 +433,26 @@ tester.run("no-useless-assertions", rule as any, {
433433
"'\\B' will always accept because it is preceded by a non-word character and followed by a non-word character.",
434434
],
435435
},
436+
437+
{
438+
code: String.raw`/(?!$)$/`,
439+
errors: ["The negative lookahead '(?!$)' will always reject."],
440+
},
441+
{
442+
code: String.raw`/(?=a|$)a/`,
443+
errors: ["The lookahead '(?=a|$)' will always accept."],
444+
},
445+
{
446+
code: String.raw`/(?=(?=[a-f])(?=a|A)[\w%])a/`,
447+
errors: [
448+
"The lookahead '(?=(?=[a-f])(?=a|A)[\\w%])' will always accept.",
449+
],
450+
},
451+
{
452+
code: String.raw`/(?=(?=[a-f])(?=[aA])\w(?<=[aA])(?<=[a-f]))a/`,
453+
errors: [
454+
"The lookahead '(?=(?=[a-f])(?=[aA])\\w(?<=[aA])(?<=[a-f]))' will always accept.",
455+
],
456+
},
436457
],
437458
})

0 commit comments

Comments
 (0)