Add regexp/no-useless-set-operand rule (#625)

RunDevelopment · ota-meshi · web-flow · commit f75cbbae29be · 2023-10-02T08:16:26.000+09:00
* Add `regexp/no-useless-set-operand` rule

* Added docs

* Create nervous-yaks-destroy.md

* Apply suggestions from code review

Co-authored-by: Yosuke Ota &lt;otameshiyo23@gmail.com&gt;

* npm run update

---------

Co-authored-by: Yosuke Ota &lt;otameshiyo23@gmail.com&gt;
diff --git a/.changeset/nervous-yaks-destroy.md b/.changeset/nervous-yaks-destroy.md
@@ -0,0 +1,5 @@
+---
+"eslint-plugin-regexp": major
+---
+
+Add `regexp/no-useless-set-operand` rule
diff --git a/README.md b/README.md
@@ -155,6 +155,7 @@ The `plugin:regexp/all` config enables all rules. It's meant for testing, not fo
 | [no-useless-lazy](https://ota-meshi.github.io/eslint-plugin-regexp/rules/no-useless-lazy.html)                                             | disallow unnecessarily non-greedy quantifiers                                              | ✅  |    | 🔧 |    |
 | [no-useless-quantifier](https://ota-meshi.github.io/eslint-plugin-regexp/rules/no-useless-quantifier.html)                                 | disallow quantifiers that can be removed                                                   | ✅  |    | 🔧 | 💡 |
 | [no-useless-range](https://ota-meshi.github.io/eslint-plugin-regexp/rules/no-useless-range.html)                                           | disallow unnecessary character ranges                                                      | ✅  |    | 🔧 |    |
+| [no-useless-set-operand](https://ota-meshi.github.io/eslint-plugin-regexp/rules/no-useless-set-operand.html)                               | disallow unnecessary elements in expression character classes                              | ✅  |    | 🔧 |    |
 | [no-useless-two-nums-quantifier](https://ota-meshi.github.io/eslint-plugin-regexp/rules/no-useless-two-nums-quantifier.html)               | disallow unnecessary `{n,m}` quantifier                                                    | ✅  |    | 🔧 |    |
 | [no-zero-quantifier](https://ota-meshi.github.io/eslint-plugin-regexp/rules/no-zero-quantifier.html)                                       | disallow quantifiers with a maximum of zero                                                | ✅  |    |    | 💡 |
 | [optimal-lookaround-quantifier](https://ota-meshi.github.io/eslint-plugin-regexp/rules/optimal-lookaround-quantifier.html)                 | disallow the alternatives of lookarounds that end with a non-constant quantifier           |    | ✅  |    | 💡 |
diff --git a/docs/rules/index.md b/docs/rules/index.md
@@ -62,6 +62,7 @@ sidebarDepth: 0
 | [no-useless-lazy](no-useless-lazy.md)                                             | disallow unnecessarily non-greedy quantifiers                                              | ✅  |    | 🔧 |    |
 | [no-useless-quantifier](no-useless-quantifier.md)                                 | disallow quantifiers that can be removed                                                   | ✅  |    | 🔧 | 💡 |
 | [no-useless-range](no-useless-range.md)                                           | disallow unnecessary character ranges                                                      | ✅  |    | 🔧 |    |
+| [no-useless-set-operand](no-useless-set-operand.md)                               | disallow unnecessary elements in expression character classes                              | ✅  |    | 🔧 |    |
 | [no-useless-two-nums-quantifier](no-useless-two-nums-quantifier.md)               | disallow unnecessary `{n,m}` quantifier                                                    | ✅  |    | 🔧 |    |
 | [no-zero-quantifier](no-zero-quantifier.md)                                       | disallow quantifiers with a maximum of zero                                                | ✅  |    |    | 💡 |
 | [optimal-lookaround-quantifier](optimal-lookaround-quantifier.md)                 | disallow the alternatives of lookarounds that end with a non-constant quantifier           |    | ✅  |    | 💡 |
diff --git a/docs/rules/no-useless-set-operand.md b/docs/rules/no-useless-set-operand.md
@@ -0,0 +1,56 @@
+---
+pageClass: "rule-details"
+sidebarDepth: 0
+title: "regexp/no-useless-set-operand"
+description: "disallow unnecessary elements in expression character classes"
+---
+# regexp/no-useless-set-operand
+
+💼 This rule is enabled in the ✅ `plugin:regexp/recommended` config.
+
+🔧 This rule is automatically fixable by the [`--fix` CLI option](https://eslint.org/docs/latest/user-guide/command-line-interface#--fix).
+
+<!-- end auto-generated rule header -->
+
+> disallow unnecessary elements in expression character classes
+
+## :book: Rule Details
+
+The `v` flag added set operations for character classes, e.g. `[\w&&\D]` and `[\w--\d]`, but there are no limitations on what operands can be used. This rule reports any unnecessary operands.
+
+<eslint-code-block fix>
+
+```js
+/* eslint regexp/no-useless-set-operand: "error" */
+
+/* ✓ GOOD */
+foo = /[\w--\d]/v
+foo = /[\w--[\d_]]/v
+
+/* ✗ BAD */
+foo = /[\w--[\d$]]/v
+foo = /[\w&&\d]/v
+foo = /[\w&&\s]/v
+foo = /[\w&&[\d\s]]/v
+foo = /[\w&&[^\d\s]]/v
+foo = /[\w--\s]/v
+foo = /[\d--\w]/v
+foo = /[\w--[\d\s]]/v
+foo = /[\w--[^\d\s]]/v
+
+```
+
+</eslint-code-block>
+
+## :wrench: Options
+
+Nothing.
+
+## :rocket: Version
+
+:exclamation: <badge text="This rule has not been released yet." vertical="middle" type="error"> ***This rule has not been released yet.*** </badge>
+
+## :mag: Implementation
+
+- [Rule source](https://github.com/ota-meshi/eslint-plugin-regexp/blob/master/lib/rules/no-useless-set-operand.ts)
+- [Test source](https://github.com/ota-meshi/eslint-plugin-regexp/blob/master/tests/lib/rules/no-useless-set-operand.ts)
diff --git a/lib/configs/recommended.ts b/lib/configs/recommended.ts
@@ -50,6 +50,7 @@ export const rules = {
     "regexp/no-useless-non-capturing-group": "error",
     "regexp/no-useless-quantifier": "error",
     "regexp/no-useless-range": "error",
+    "regexp/no-useless-set-operand": "error",
     "regexp/no-useless-two-nums-quantifier": "error",
     "regexp/no-zero-quantifier": "error",
     "regexp/optimal-lookaround-quantifier": "warn",
diff --git a/lib/rules/no-useless-set-operand.ts b/lib/rules/no-useless-set-operand.ts
@@ -0,0 +1,240 @@
+import type { RegExpVisitor } from "@eslint-community/regexpp/visitor"
+import type {
+    CharacterClassElement,
+    ClassSetOperand,
+    ExpressionCharacterClass,
+    Node,
+    StringAlternative,
+} from "@eslint-community/regexpp/ast"
+import type { RegExpContext } from "../utils"
+import { createRule, defineRegexpVisitor } from "../utils"
+import { toUnicodeSet } from "regexp-ast-analysis"
+
+type FlatElement = CharacterClassElement | StringAlternative
+
+function getFlatElements(
+    node: ClassSetOperand | ExpressionCharacterClass["expression"],
+): readonly FlatElement[] {
+    if (node.type === "ClassStringDisjunction") {
+        return node.alternatives
+    }
+    if (node.type === "CharacterClass") {
+        const nested: FlatElement[] = []
+        // eslint-disable-next-line func-style -- x
+        const addElement = (element: CharacterClassElement) => {
+            if (element.type === "ClassStringDisjunction") {
+                nested.push(...element.alternatives)
+            } else if (element.type === "CharacterClass") {
+                if (!element.negate) {
+                    nested.push(...element.elements)
+                }
+                nested.push(element)
+            } else {
+                nested.push(element)
+            }
+        }
+        node.elements.forEach(addElement)
+        return nested
+    }
+
+    return []
+}
+
+function removeDescendant(root: Node, e: FlatElement): string {
+    let { start, end } = e
+
+    if (e.type === "StringAlternative") {
+        if (e.parent.alternatives.length === 1) {
+            // we have to remove the whole string disjunction
+            // eslint-disable-next-line no-param-reassign -- x
+            e = e.parent
+            start = e.start
+            end = e.end
+        } else {
+            // remove one adjacent | symbol
+            if (e.parent.alternatives.at(-1) === e) {
+                start--
+            } else {
+                end++
+            }
+        }
+    }
+
+    const before = root.raw.slice(0, start - root.start)
+    const after = root.raw.slice(end - root.start)
+    return before + after
+}
+
+export default createRule("no-useless-set-operand", {
+    meta: {
+        docs: {
+            description:
+                "disallow unnecessary elements in expression character classes",
+            category: "Best Practices",
+            recommended: true,
+        },
+        schema: [],
+        messages: {
+            intersectionDisjoint:
+                "'{{left}}' and '{{right}}' are disjoint, so the result of the intersection is always going to be the empty set.",
+            intersectionSubset:
+                "'{{sub}}' is a subset of '{{super}}', so the result of the intersection is always going to be '{{sub}}'.",
+            intersectionRemove:
+                "'{{expr}}' can be removed without changing the result of the intersection.",
+            subtractionDisjoint:
+                "'{{left}}' and '{{right}}' are disjoint, so the subtraction doesn't do anything.",
+            subtractionSubset:
+                "'{{left}}' is a subset of '{{right}}', so the result of the subtraction is always going to be the empty set.",
+            subtractionRemove:
+                "'{{expr}}' can be removed without changing the result of the subtraction.",
+        },
+        fixable: "code",
+        type: "suggestion",
+    },
+    create(context) {
+        function createVisitor(
+            regexpContext: RegExpContext,
+        ): RegExpVisitor.Handlers {
+            const { node, flags, getRegexpLocation, fixReplaceNode } =
+                regexpContext
+
+            if (!flags.unicodeSets) {
+                // set operations are only available with the `v` flag
+                return {}
+            }
+
+            function fixRemoveExpression(
+                expr: ExpressionCharacterClass["expression"],
+            ) {
+                if (expr.parent.type === "ExpressionCharacterClass") {
+                    const cc = expr.parent
+                    return fixReplaceNode(cc, cc.negate ? "[^]" : "[]")
+                }
+                return fixReplaceNode(expr, "[]")
+            }
+
+            return {
+                onClassIntersectionEnter(iNode) {
+                    const leftSet = toUnicodeSet(iNode.left, flags)
+                    const rightSet = toUnicodeSet(iNode.right, flags)
+
+                    if (leftSet.isDisjointWith(rightSet)) {
+                        context.report({
+                            node,
+                            loc: getRegexpLocation(iNode),
+                            messageId: "intersectionDisjoint",
+                            data: {
+                                left: iNode.left.raw,
+                                right: iNode.right.raw,
+                            },
+                            fix: fixRemoveExpression(iNode),
+                        })
+                        return
+                    }
+
+                    if (leftSet.isSubsetOf(rightSet)) {
+                        context.report({
+                            node,
+                            loc: getRegexpLocation(iNode),
+                            messageId: "intersectionSubset",
+                            data: {
+                                sub: iNode.left.raw,
+                                super: iNode.right.raw,
+                            },
+                            fix: fixReplaceNode(iNode, iNode.left.raw),
+                        })
+                        return
+                    }
+                    if (rightSet.isSubsetOf(leftSet)) {
+                        context.report({
+                            node,
+                            loc: getRegexpLocation(iNode),
+                            messageId: "intersectionSubset",
+                            data: {
+                                sub: iNode.right.raw,
+                                super: iNode.left.raw,
+                            },
+                            fix: fixReplaceNode(iNode, iNode.right.raw),
+                        })
+                        return
+                    }
+
+                    const toRemoveRight = getFlatElements(iNode.right).filter(
+                        (e) => leftSet.isDisjointWith(toUnicodeSet(e, flags)),
+                    )
+                    const toRemoveLeft = getFlatElements(iNode.left).filter(
+                        (e) => rightSet.isDisjointWith(toUnicodeSet(e, flags)),
+                    )
+                    for (const e of [...toRemoveRight, ...toRemoveLeft]) {
+                        context.report({
+                            node,
+                            loc: getRegexpLocation(e),
+                            messageId: "subtractionRemove",
+                            data: {
+                                expr: e.raw,
+                            },
+                            fix: fixReplaceNode(
+                                iNode,
+                                removeDescendant(iNode, e),
+                            ),
+                        })
+                    }
+                },
+                onClassSubtractionEnter(sNode) {
+                    const leftSet = toUnicodeSet(sNode.left, flags)
+                    const rightSet = toUnicodeSet(sNode.right, flags)
+
+                    if (leftSet.isDisjointWith(rightSet)) {
+                        context.report({
+                            node,
+                            loc: getRegexpLocation(sNode),
+                            messageId: "subtractionDisjoint",
+                            data: {
+                                left: sNode.left.raw,
+                                right: sNode.right.raw,
+                            },
+                            fix: fixReplaceNode(sNode, sNode.left.raw),
+                        })
+                        return
+                    }
+
+                    if (leftSet.isSubsetOf(rightSet)) {
+                        context.report({
+                            node,
+                            loc: getRegexpLocation(sNode),
+                            messageId: "subtractionSubset",
+                            data: {
+                                left: sNode.left.raw,
+                                right: sNode.right.raw,
+                            },
+                            fix: fixRemoveExpression(sNode),
+                        })
+                        return
+                    }
+
+                    const toRemove = getFlatElements(sNode.right).filter((e) =>
+                        leftSet.isDisjointWith(toUnicodeSet(e, flags)),
+                    )
+                    for (const e of toRemove) {
+                        context.report({
+                            node,
+                            loc: getRegexpLocation(e),
+                            messageId: "subtractionRemove",
+                            data: {
+                                expr: e.raw,
+                            },
+                            fix: fixReplaceNode(
+                                sNode,
+                                removeDescendant(sNode, e),
+                            ),
+                        })
+                    }
+                },
+            }
+        }
+
+        return defineRegexpVisitor(context, {
+            createVisitor,
+        })
+    },
+})
diff --git a/lib/utils/rules.ts b/lib/utils/rules.ts
@@ -47,6 +47,7 @@ import noUselessNonCapturingGroup from "../rules/no-useless-non-capturing-group"
 import noUselessNonGreedy from "../rules/no-useless-non-greedy"
 import noUselessQuantifier from "../rules/no-useless-quantifier"
 import noUselessRange from "../rules/no-useless-range"
+import noUselessSetOperand from "../rules/no-useless-set-operand"
 import noUselessTwoNumsQuantifier from "../rules/no-useless-two-nums-quantifier"
 import noZeroQuantifier from "../rules/no-zero-quantifier"
 import optimalLookaroundQuantifier from "../rules/optimal-lookaround-quantifier"
@@ -130,6 +131,7 @@ export const rules = [
     noUselessNonGreedy,
     noUselessQuantifier,
     noUselessRange,
+    noUselessSetOperand,
     noUselessTwoNumsQuantifier,
     noZeroQuantifier,
     optimalLookaroundQuantifier,
diff --git a/tests/lib/rules/no-useless-set-operand.ts b/tests/lib/rules/no-useless-set-operand.ts

-Original file line number
+Diff line change
@@ @@ -0,0 +1,5 @@ @@
 +---
 +"eslint-plugin-regexp": major
 +---
++
 +Add `regexp/no-useless-set-operand` rule