Skip to content

Commit 714cf99

Browse files
committed
fix(lint/useRegexLiterals): handle useless escapes
1 parent 2f2b007 commit 714cf99

File tree

5 files changed

+221
-26
lines changed

5 files changed

+221
-26
lines changed

.changeset/use-regex-literals-5693.md

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,5 @@
1+
---
2+
"@biomejs/biome": patch
3+
---
4+
5+
Fixed [#5693](https://github.com/biomejs/biome/issues/5693): [useRegexLiterals](https://biomejs.dev/linter/rules/use-regex-literals/) now correctly handle useless escaped character in string literals.

crates/biome_js_analyze/src/lint/complexity/use_regex_literals.rs

Lines changed: 70 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -57,6 +57,8 @@ declare_lint_rule! {
5757
pub struct UseRegexLiteralsState {
5858
pattern: StaticValue,
5959
flags: Option<StaticValue>,
60+
/// Is `pattern` extracted from a raw string?
61+
string_kind: StringKind,
6062
}
6163

6264
impl Rule for UseRegexLiterals {
@@ -81,7 +83,7 @@ impl Rule for UseRegexLiterals {
8183
let mut args = args.iter();
8284

8385
let pattern = args.next()?;
84-
let pattern = extract_valid_pattern(pattern, model)?;
86+
let (pattern, string_kind) = extract_valid_pattern(pattern, model)?;
8587

8688
let flags = match args.next() {
8789
Some(flags) => {
@@ -90,7 +92,11 @@ impl Rule for UseRegexLiterals {
9092
}
9193
None => None,
9294
};
93-
Some(UseRegexLiteralsState { pattern, flags })
95+
Some(UseRegexLiteralsState {
96+
pattern,
97+
flags,
98+
string_kind,
99+
})
94100
}
95101

96102
fn diagnostic(ctx: &RuleContext<Self>, _state: &Self::State) -> Option<RuleDiagnostic> {
@@ -117,6 +123,7 @@ impl Rule for UseRegexLiterals {
117123
.flags
118124
.as_ref()
119125
.unwrap_or(&StaticValue::EmptyString(TextRange::empty(0.into()))),
126+
state.string_kind,
120127
);
121128
let token = JsSyntaxToken::new_detached(JsSyntaxKind::JS_REGEX_LITERAL, &regex, [], []);
122129
let next = AnyJsExpression::AnyJsLiteralExpression(AnyJsLiteralExpression::from(
@@ -137,14 +144,22 @@ impl Rule for UseRegexLiterals {
137144
}
138145
}
139146

147+
#[derive(Debug, Copy, Clone, Eq, PartialEq)]
148+
pub enum StringKind {
149+
Literal,
150+
Template,
151+
RawTemplate,
152+
}
153+
140154
fn extract_valid_pattern(
141155
pattern: Result<AnyJsCallArgument, SyntaxError>,
142156
model: &SemanticModel,
143-
) -> Option<StaticValue> {
157+
) -> Option<(StaticValue, StringKind)> {
144158
let Ok(AnyJsCallArgument::AnyJsExpression(expr)) = pattern else {
145159
return None;
146160
};
147-
if let Some(template_expr) = expr.as_js_template_expression() {
161+
let string_kind = if let Some(template_expr) = expr.as_js_template_expression() {
162+
// Only accept String.raw or untagged template strings.
148163
if let Some(tag) = template_expr.tag() {
149164
let (object, member) = match tag.omit_parentheses() {
150165
AnyJsExpression::JsStaticMemberExpression(expr) => {
@@ -163,18 +178,23 @@ fn extract_valid_pattern(
163178
if model.binding(&reference).is_some() || name.text() != "String" || member != "raw" {
164179
return None;
165180
}
166-
};
181+
StringKind::RawTemplate
182+
} else {
183+
StringKind::Template
184+
}
185+
} else {
186+
StringKind::Literal
167187
};
168188

169189
let pattern = expr.omit_parentheses().as_static_value()?;
170190
// A regex cannot contain a repetition without a quantifier.
171191
if matches!(pattern.as_string_constant()?, "*" | "+" | "?") {
172192
return None;
173193
}
174-
Some(pattern)
194+
Some((pattern, string_kind))
175195
}
176196

177-
fn create_regex(pattern: &str, flags: &StaticValue) -> String {
197+
fn create_regex(pattern: &str, flags: &StaticValue, string_kind: StringKind) -> String {
178198
let flags = flags.text();
179199
let mut pattern_bytes = pattern.bytes().enumerate();
180200
let mut last_copied_inmdex = 0;
@@ -188,10 +208,48 @@ fn create_regex(pattern: &str, flags: &StaticValue) -> String {
188208
new_pattern.push_str(r"\n");
189209
last_copied_inmdex = index + 1;
190210
}
191-
b'\\' => {
192-
if matches!(pattern_bytes.next(), Some((_, b'\\'))) {
193-
new_pattern.push_str(&pattern[last_copied_inmdex..index]);
194-
last_copied_inmdex = index + 1;
211+
b'\\' if string_kind != StringKind::RawTemplate => {
212+
match pattern_bytes.next() {
213+
Some((_, b'\\')) => {
214+
// turn `\\` into `\`
215+
new_pattern.push_str(&pattern[last_copied_inmdex..index]);
216+
last_copied_inmdex = index + 1;
217+
}
218+
Some((
219+
_,
220+
b'/' | b'0'..b'7' | b'f' | b'n' | b'r' | b't' | b'u' | b'v' | b'x',
221+
)) => {
222+
// Keep escape sequence valid in both strings and regexes.
223+
}
224+
Some((_, b'b')) => {
225+
// Backspace escape are not valid in regexes.
226+
new_pattern.push_str(&pattern[last_copied_inmdex..index]);
227+
// This could be represented more compactly with `\cH`.
228+
// However `\x08` is more familiar.
229+
new_pattern.push_str(r"\x08");
230+
last_copied_inmdex = index + 2;
231+
}
232+
Some((_, b'\n')) if string_kind == StringKind::Literal => {
233+
// String literal split over several lines.
234+
new_pattern.push_str(&pattern[last_copied_inmdex..index]);
235+
last_copied_inmdex = index + 2;
236+
}
237+
Some((_, b'\r')) if string_kind == StringKind::Literal => {
238+
if matches!(pattern_bytes.next(), Some((_, b'\n'))) {
239+
// String literal split over several lines.
240+
new_pattern.push_str(&pattern[last_copied_inmdex..index]);
241+
last_copied_inmdex = index + 3;
242+
} else {
243+
// Ignore `\` and preserve `\r`.
244+
new_pattern.push_str(&pattern[last_copied_inmdex..index]);
245+
last_copied_inmdex = index + 1;
246+
}
247+
}
248+
_ => {
249+
// Useless escaped character in strings are invalid escapes in regexes.
250+
new_pattern.push_str(&pattern[last_copied_inmdex..index]);
251+
last_copied_inmdex = index + 1;
252+
}
195253
}
196254
}
197255
// Convert slash to "\/" to avoid parsing error in autofix.
@@ -204,6 +262,7 @@ fn create_regex(pattern: &str, flags: &StaticValue) -> String {
204262
}
205263
}
206264
if pattern.is_empty() {
265+
// Emit `/(?:)/` instead of the invalid regex `//`.
207266
new_pattern.push_str("(?:)");
208267
} else {
209268
new_pattern.push_str(&pattern[last_copied_inmdex..]);

crates/biome_js_analyze/tests/specs/complexity/useRegexLiterals/invalid.js

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2,3 +2,15 @@
22
new RegExp("\/pattern$");
33

44
new RegExp("\🙂pattern");
5+
6+
// https://github.com/biomejs/biome/issues/5693#issuecomment-2816096167
7+
new RegExp(`a\*b`);
8+
9+
// The backspace escape is not supported in regexes.
10+
new RegExp("\b");
11+
12+
new RegExp("a\
13+
b");
14+
15+
new RegExp(`a
16+
b`);

crates/biome_js_analyze/tests/specs/complexity/useRegexLiterals/invalid.js.snap

Lines changed: 130 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,18 @@ new RegExp("\/pattern$");
99
1010
new RegExp("\🙂pattern");
1111
12+
// https://github.com/biomejs/biome/issues/5693#issuecomment-2816096167
13+
new RegExp(`a\*b`);
14+
15+
// The backspace escape is not supported in regexes.
16+
new RegExp("\b");
17+
18+
new RegExp("a\
19+
b");
20+
21+
new RegExp(`a
22+
b`);
23+
1224
```
1325

1426
# Diagnostics
@@ -27,11 +39,11 @@ invalid.js:2:1 lint/complexity/useRegexLiterals FIXABLE ━━━━━━━
2739
2840
i Safe fix: Use a literal notation instead.
2941
30-
1 1 │ // https://github.com/biomejs/biome/issues/5487
31-
2 │ - new·RegExp("\/pattern$");
32-
2 │ + /\/pattern$/;
33-
3 3 │
34-
4 4 │ new RegExp("\🙂pattern");
42+
1 1 │ // https://github.com/biomejs/biome/issues/5487
43+
2 │ - new·RegExp("\/pattern$");
44+
2 │ + /\/pattern$/;
45+
3 3 │
46+
4 4 │ new RegExp("\🙂pattern");
3547
3648
3749
```
@@ -46,16 +58,124 @@ invalid.js:4:1 lint/complexity/useRegexLiterals FIXABLE ━━━━━━━
4658
> 4 │ new RegExp("\🙂pattern");
4759
│ ^^^^^^^^^^^^^^^^^^^^^^^^
4860
5 │
61+
6 │ // https://github.com/biomejs/biome/issues/5693#issuecomment-2816096167
62+
63+
i Regular expression literals avoid some escaping required in a string literal, and are easier to analyze statically.
64+
65+
i Safe fix: Use a literal notation instead.
66+
67+
2 2 │ new RegExp("\/pattern$");
68+
3 3 │
69+
4 │ - new·RegExp("\🙂pattern");
70+
4 │ + /🙂pattern/;
71+
5 5 │
72+
6 6 │ // https://github.com/biomejs/biome/issues/5693#issuecomment-2816096167
73+
74+
75+
```
76+
77+
```
78+
invalid.js:7:1 lint/complexity/useRegexLiterals FIXABLE ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━
79+
80+
× Use a regular expression literal instead of the RegExp constructor.
81+
82+
6 │ // https://github.com/biomejs/biome/issues/5693#issuecomment-2816096167
83+
> 7 │ new RegExp(`a\*b`);
84+
│ ^^^^^^^^^^^^^^^^^^
85+
8 │
86+
9 │ // The backspace escape is not supported in regexes.
87+
88+
i Regular expression literals avoid some escaping required in a string literal, and are easier to analyze statically.
89+
90+
i Safe fix: Use a literal notation instead.
91+
92+
5 5 │
93+
6 6 │ // https://github.com/biomejs/biome/issues/5693#issuecomment-2816096167
94+
7 │ - new·RegExp(`a\*b`);
95+
7 │ + /a*b/;
96+
8 8 │
97+
9 9 │ // The backspace escape is not supported in regexes.
98+
99+
100+
```
101+
102+
```
103+
invalid.js:10:1 lint/complexity/useRegexLiterals FIXABLE ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━
104+
105+
× Use a regular expression literal instead of the RegExp constructor.
106+
107+
9 │ // The backspace escape is not supported in regexes.
108+
> 10 │ new RegExp("\b");
109+
│ ^^^^^^^^^^^^^^^^
110+
11 │
111+
12 │ new RegExp("a\
112+
113+
i Regular expression literals avoid some escaping required in a string literal, and are easier to analyze statically.
114+
115+
i Safe fix: Use a literal notation instead.
116+
117+
8 8 │
118+
9 9 │ // The backspace escape is not supported in regexes.
119+
10 │ - new·RegExp("\b");
120+
10 │ + /\x08/;
121+
11 11 │
122+
12 12 │ new RegExp("a\
123+
124+
125+
```
126+
127+
```
128+
invalid.js:12:1 lint/complexity/useRegexLiterals FIXABLE ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━
129+
130+
× Use a regular expression literal instead of the RegExp constructor.
131+
132+
10 │ new RegExp("\b");
133+
11 │
134+
> 12 │ new RegExp("a\
135+
│ ^^^^^^^^^^^^^^
136+
> 13 │ b");
137+
│ ^^^
138+
14 │
139+
15 │ new RegExp(`a
140+
141+
i Regular expression literals avoid some escaping required in a string literal, and are easier to analyze statically.
142+
143+
i Safe fix: Use a literal notation instead.
144+
145+
10 10new RegExp("\b");
146+
11 11
147+
12- new·RegExp("a\
148+
13 │ - b");
149+
12+ /ab/;
150+
14 13
151+
15 14new RegExp(`a
152+
153+
154+
```
155+
156+
```
157+
invalid.js:15:1 lint/complexity/useRegexLiterals FIXABLE ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━
158+
159+
× Use a regular expression literal instead of the RegExp constructor.
160+
161+
13 │ b");
162+
14 │
163+
> 15 │ new RegExp(`a
164+
^^^^^^^^^^^^^
165+
> 16b`);
166+
│ ^^^
167+
17 │
49168
50169
i Regular expression literals avoid some escaping required in a string literal, and are easier to analyze statically.
51170
52171
i Safe fix: Use a literal notation instead.
53172
54-
2 2 │ new RegExp("\/pattern$");
55-
3 3 │
56-
4 │ - new·RegExp("\🙂pattern");
57-
4 │ + /\🙂pattern/;
58-
5 5 │
173+
13 13 │ b");
174+
14 14 │
175+
15 │ - new·RegExp(`a
176+
16- b`);
177+
15 │ + /a\nb/;
178+
17 16 │
59179
60180
61181
```

crates/biome_js_analyze/tests/specs/complexity/useRegexLiterals/invalid.jsonc.snap

Lines changed: 4 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,6 @@
11
---
22
source: crates/biome_js_analyze/tests/spec_tests.rs
33
expression: invalid.jsonc
4-
snapshot_kind: text
54
---
65
# Input
76
```cjs
@@ -582,7 +581,7 @@ invalid.jsonc:1:1 lint/complexity/useRegexLiterals FIXABLE ━━━━━━
582581
i Safe fix: Use a literal notation instead.
583582
584583
- new·RegExp(String.raw`\\d``g`);
585-
+ /\d/g;
584+
+ /\\d/g;
586585
587586
588587
```
@@ -606,7 +605,7 @@ invalid.jsonc:1:1 lint/complexity/useRegexLiterals FIXABLE ━━━━━━
606605
i Safe fix: Use a literal notation instead.
607606
608607
- new·RegExp(String['raw']`\\d``g`);
609-
+ /\d/g;
608+
+ /\\d/g;
610609
611610
612611
```
@@ -630,7 +629,7 @@ invalid.jsonc:1:1 lint/complexity/useRegexLiterals FIXABLE ━━━━━━
630629
i Safe fix: Use a literal notation instead.
631630
632631
- new·RegExp(String["raw"]`\\d``g`);
633-
+ /\d/g;
632+
+ /\\d/g;
634633
635634
636635
```
@@ -3732,7 +3731,7 @@ invalid.jsonc:1:1 lint/complexity/useRegexLiterals FIXABLE ━━━━━━
37323731
i Safe fix: Use a literal notation instead.
37333732
37343733
- new·RegExp("\.")
3735-
+ /\./
3734+
+ /./
37363735
37373736
37383737
```

0 commit comments

Comments
 (0)