Skip to content

Don't recover lifetimes/labels containing emojis as character literals #108031

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 3 commits into from
Feb 18, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions compiler/rustc_errors/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -471,6 +471,8 @@ pub enum StashKey {
/// When an invalid lifetime e.g. `'2` should be reinterpreted
/// as a char literal in the parser
LifetimeIsChar,
/// When an invalid lifetime e.g. `'🐱` contains emoji.
LifetimeContainsEmoji,
/// Maybe there was a typo where a comma was forgotten before
/// FRU syntax
MaybeFruTypo,
Expand Down
43 changes: 33 additions & 10 deletions compiler/rustc_lexer/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -95,7 +95,7 @@ pub enum TokenKind {
Literal { kind: LiteralKind, suffix_start: u32 },

/// "'a"
Lifetime { starts_with_number: bool },
Lifetime { starts_with_number: bool, contains_emoji: bool },

// One-char tokens:
/// ";"
Expand Down Expand Up @@ -630,7 +630,13 @@ impl Cursor<'_> {
// If the first symbol is valid for identifier, it can be a lifetime.
// Also check if it's a number for a better error reporting (so '0 will
// be reported as invalid lifetime and not as unterminated char literal).
is_id_start(self.first()) || self.first().is_digit(10)
// We also have to account for potential `'🐱` emojis to avoid reporting
// it as an unterminated char literal.
is_id_start(self.first())
|| self.first().is_digit(10)
// FIXME(#108019): `unic-emoji-char` seems to have data tables only up to Unicode
// 5.0, but Unicode is already newer than this.
|| unic_emoji_char::is_emoji(self.first())
};

if !can_be_a_lifetime {
Expand All @@ -643,16 +649,33 @@ impl Cursor<'_> {
return Literal { kind, suffix_start };
}

// Either a lifetime or a character literal with
// length greater than 1.
// Either a lifetime or a character literal.

let starts_with_number = self.first().is_digit(10);
let mut contains_emoji = false;

// Skip the literal contents.
// First symbol can be a number (which isn't a valid identifier start),
// so skip it without any checks.
self.bump();
self.eat_while(is_id_continue);
// FIXME(#108019): `unic-emoji-char` seems to have data tables only up to Unicode
// 5.0, but Unicode is already newer than this.
if unic_emoji_char::is_emoji(self.first()) {
contains_emoji = true;
} else {
// Skip the literal contents.
// First symbol can be a number (which isn't a valid identifier start),
// so skip it without any checks.
self.bump();
}
self.eat_while(|c| {
if is_id_continue(c) {
true
// FIXME(#108019): `unic-emoji-char` seems to have data tables only up to Unicode
// 5.0, but Unicode is already newer than this.
} else if unic_emoji_char::is_emoji(c) {
contains_emoji = true;
true
} else {
false
}
});

// Check if after skipping literal contents we've met a closing
// single quote (which means that user attempted to create a
Expand All @@ -662,7 +685,7 @@ impl Cursor<'_> {
let kind = Char { terminated: true };
Literal { kind, suffix_start: self.pos_within_token() }
} else {
Lifetime { starts_with_number }
Lifetime { starts_with_number, contains_emoji }
}
}

Expand Down
2 changes: 1 addition & 1 deletion compiler/rustc_lexer/src/tests.rs
Original file line number Diff line number Diff line change
Expand Up @@ -235,7 +235,7 @@ fn lifetime() {
check_lexing(
"'abc",
expect![[r#"
Token { kind: Lifetime { starts_with_number: false }, len: 4 }
Token { kind: Lifetime { starts_with_number: false, contains_emoji: false }, len: 4 }
"#]],
);
}
Expand Down
9 changes: 7 additions & 2 deletions compiler/rustc_parse/src/lexer/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -200,16 +200,21 @@ impl<'a> StringReader<'a> {
};
token::Literal(token::Lit { kind, symbol, suffix })
}
rustc_lexer::TokenKind::Lifetime { starts_with_number } => {
rustc_lexer::TokenKind::Lifetime { starts_with_number, contains_emoji } => {
// Include the leading `'` in the real identifier, for macro
// expansion purposes. See #12512 for the gory details of why
// this is necessary.
let lifetime_name = self.str_from(start);
if starts_with_number {
let span = self.mk_sp(start, self.pos);
let mut diag = self.sess.struct_err("lifetimes cannot start with a number");
let mut diag = self.sess.struct_err("lifetimes or labels cannot start with a number");
diag.set_span(span);
diag.stash(span, StashKey::LifetimeIsChar);
} else if contains_emoji {
let span = self.mk_sp(start, self.pos);
let mut diag = self.sess.struct_err("lifetimes or labels cannot contain emojis");
diag.set_span(span);
diag.stash(span, StashKey::LifetimeContainsEmoji);
}
let ident = Symbol::intern(lifetime_name);
token::Lifetime(ident)
Expand Down
45 changes: 45 additions & 0 deletions tests/ui/lexer/issue-108019-bad-emoji-recovery.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,45 @@
#![allow(unused_labels)]

// FIXME(#108019): outdated Unicode table
// fn foo() {
// '🥺 loop {
// break
// }
// }

fn bar() {
'🐱 loop {
//~^ ERROR labeled expression must be followed by `:`
//~| ERROR lifetimes or labels cannot contain emojis
break
}
}

fn qux() {
'a🐱 loop {
//~^ ERROR labeled expression must be followed by `:`
//~| ERROR lifetimes or labels cannot contain emojis
break
}
}

fn quux() {
'1🐱 loop {
//~^ ERROR labeled expression must be followed by `:`
//~| ERROR lifetimes or labels cannot start with a number
break
}
}

fn x<'🐱>() -> &'🐱 () {
//~^ ERROR lifetimes or labels cannot contain emojis
//~| ERROR lifetimes or labels cannot contain emojis
&()
}

fn y() {
'a🐱: loop {}
//~^ ERROR lifetimes or labels cannot contain emojis
}

fn main() {}
86 changes: 86 additions & 0 deletions tests/ui/lexer/issue-108019-bad-emoji-recovery.stderr
Original file line number Diff line number Diff line change
@@ -0,0 +1,86 @@
error: labeled expression must be followed by `:`
--> $DIR/issue-108019-bad-emoji-recovery.rs:11:5
|
LL | '🐱 loop {
| ^--- help: add `:` after the label
| |
| _____the label
| |
LL | |
LL | |
LL | | break
LL | | }
| |_____^
|
= note: labels are used before loops and blocks, allowing e.g., `break 'label` to them

error: labeled expression must be followed by `:`
--> $DIR/issue-108019-bad-emoji-recovery.rs:19:5
|
LL | 'a🐱 loop {
| ^---- help: add `:` after the label
| |
| _____the label
| |
LL | |
LL | |
LL | | break
LL | | }
| |_____^
|
= note: labels are used before loops and blocks, allowing e.g., `break 'label` to them

error: labeled expression must be followed by `:`
--> $DIR/issue-108019-bad-emoji-recovery.rs:27:5
|
LL | '1🐱 loop {
| ^---- help: add `:` after the label
| |
| _____the label
| |
LL | |
LL | |
LL | | break
LL | | }
| |_____^
|
= note: labels are used before loops and blocks, allowing e.g., `break 'label` to them

error: lifetimes or labels cannot contain emojis
--> $DIR/issue-108019-bad-emoji-recovery.rs:11:5
|
LL | '🐱 loop {
| ^^^

error: lifetimes or labels cannot contain emojis
--> $DIR/issue-108019-bad-emoji-recovery.rs:19:5
|
LL | 'a🐱 loop {
| ^^^^

error: lifetimes or labels cannot start with a number
--> $DIR/issue-108019-bad-emoji-recovery.rs:27:5
|
LL | '1🐱 loop {
| ^^^^

error: lifetimes or labels cannot contain emojis
--> $DIR/issue-108019-bad-emoji-recovery.rs:34:6
|
LL | fn x<'🐱>() -> &'🐱 () {
| ^^^

error: lifetimes or labels cannot contain emojis
--> $DIR/issue-108019-bad-emoji-recovery.rs:34:16
|
LL | fn x<'🐱>() -> &'🐱 () {
| ^^^

error: lifetimes or labels cannot contain emojis
--> $DIR/issue-108019-bad-emoji-recovery.rs:41:5
|
LL | 'a🐱: loop {}
| ^^^^

error: aborting due to 9 previous errors

4 changes: 2 additions & 2 deletions tests/ui/parser/numeric-lifetime.rs
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
struct S<'1> { s: &'1 usize }
//~^ ERROR lifetimes cannot start with a number
//~| ERROR lifetimes cannot start with a number
//~^ ERROR lifetimes or labels cannot start with a number
//~| ERROR lifetimes or labels cannot start with a number
fn main() {
// verify that the parse error doesn't stop type checking
let x: usize = "";
Expand Down
4 changes: 2 additions & 2 deletions tests/ui/parser/numeric-lifetime.stderr
Original file line number Diff line number Diff line change
Expand Up @@ -6,13 +6,13 @@ LL | let x: usize = "";
| |
| expected due to this

error: lifetimes cannot start with a number
error: lifetimes or labels cannot start with a number
--> $DIR/numeric-lifetime.rs:1:10
|
LL | struct S<'1> { s: &'1 usize }
| ^^

error: lifetimes cannot start with a number
error: lifetimes or labels cannot start with a number
--> $DIR/numeric-lifetime.rs:1:20
|
LL | struct S<'1> { s: &'1 usize }
Expand Down