Skip to content

Commit d6414f5

Browse files
authored
Rollup merge of #69376 - petrochenkov:bumpwith, r=Centril
parser: Cleanup `Parser::bump_with` and its uses Follow-up to #69006. r? @Centril
2 parents bdd275d + 4356d18 commit d6414f5

File tree

2 files changed

+84
-127
lines changed

2 files changed

+84
-127
lines changed

src/librustc_parse/parser/mod.rs

+51-127
Original file line numberDiff line numberDiff line change
@@ -601,141 +601,76 @@ impl<'a> Parser<'a> {
601601
)
602602
}
603603

604-
/// Expects and consumes a `+`. if `+=` is seen, replaces it with a `=`
605-
/// and continues. If a `+` is not seen, returns `false`.
606-
///
607-
/// This is used when token-splitting `+=` into `+`.
608-
/// See issue #47856 for an example of when this may occur.
609-
fn eat_plus(&mut self) -> bool {
610-
self.expected_tokens.push(TokenType::Token(token::BinOp(token::Plus)));
611-
match self.token.kind {
612-
token::BinOp(token::Plus) => {
613-
self.bump();
604+
/// Eats the expected token if it's present possibly breaking
605+
/// compound tokens like multi-character operators in process.
606+
/// Returns `true` if the token was eaten.
607+
fn break_and_eat(&mut self, expected: TokenKind) -> bool {
608+
if self.token.kind == expected {
609+
self.bump();
610+
return true;
611+
}
612+
match self.token.kind.break_two_token_op() {
613+
Some((first, second)) if first == expected => {
614+
let first_span = self.sess.source_map().start_point(self.token.span);
615+
let second_span = self.token.span.with_lo(first_span.hi());
616+
self.set_token(Token::new(first, first_span));
617+
self.bump_with(Token::new(second, second_span));
614618
true
615619
}
616-
token::BinOpEq(token::Plus) => {
617-
let start_point = self.sess.source_map().start_point(self.token.span);
618-
self.bump_with(token::Eq, self.token.span.with_lo(start_point.hi()));
619-
true
620+
_ => {
621+
self.expected_tokens.push(TokenType::Token(expected));
622+
false
620623
}
621-
_ => false,
622624
}
623625
}
624626

625-
/// Expects and consumes an `&`. If `&&` is seen, replaces it with a single
626-
/// `&` and continues. If an `&` is not seen, signals an error.
627+
/// Eats `+` possibly breaking tokens like `+=` in process.
628+
fn eat_plus(&mut self) -> bool {
629+
self.break_and_eat(token::BinOp(token::Plus))
630+
}
631+
632+
/// Eats `&` possibly breaking tokens like `&&` in process.
633+
/// Signals an error if `&` is not eaten.
627634
fn expect_and(&mut self) -> PResult<'a, ()> {
628-
self.expected_tokens.push(TokenType::Token(token::BinOp(token::And)));
629-
match self.token.kind {
630-
token::BinOp(token::And) => {
631-
self.bump();
632-
Ok(())
633-
}
634-
token::AndAnd => {
635-
let start_point = self.sess.source_map().start_point(self.token.span);
636-
Ok(self
637-
.bump_with(token::BinOp(token::And), self.token.span.with_lo(start_point.hi())))
638-
}
639-
_ => self.unexpected(),
640-
}
635+
if self.break_and_eat(token::BinOp(token::And)) { Ok(()) } else { self.unexpected() }
641636
}
642637

643-
/// Expects and consumes an `|`. If `||` is seen, replaces it with a single
644-
/// `|` and continues. If an `|` is not seen, signals an error.
638+
/// Eats `|` possibly breaking tokens like `||` in process.
639+
/// Signals an error if `|` was not eaten.
645640
fn expect_or(&mut self) -> PResult<'a, ()> {
646-
self.expected_tokens.push(TokenType::Token(token::BinOp(token::Or)));
647-
match self.token.kind {
648-
token::BinOp(token::Or) => {
649-
self.bump();
650-
Ok(())
651-
}
652-
token::OrOr => {
653-
let start_point = self.sess.source_map().start_point(self.token.span);
654-
Ok(self
655-
.bump_with(token::BinOp(token::Or), self.token.span.with_lo(start_point.hi())))
656-
}
657-
_ => self.unexpected(),
658-
}
641+
if self.break_and_eat(token::BinOp(token::Or)) { Ok(()) } else { self.unexpected() }
659642
}
660643

661-
/// Attempts to consume a `<`. If `<<` is seen, replaces it with a single
662-
/// `<` and continue. If `<-` is seen, replaces it with a single `<`
663-
/// and continue. If a `<` is not seen, returns false.
664-
///
665-
/// This is meant to be used when parsing generics on a path to get the
666-
/// starting token.
644+
/// Eats `<` possibly breaking tokens like `<<` in process.
667645
fn eat_lt(&mut self) -> bool {
668-
self.expected_tokens.push(TokenType::Token(token::Lt));
669-
let ate = match self.token.kind {
670-
token::Lt => {
671-
self.bump();
672-
true
673-
}
674-
token::BinOp(token::Shl) => {
675-
let start_point = self.sess.source_map().start_point(self.token.span);
676-
self.bump_with(token::Lt, self.token.span.with_lo(start_point.hi()));
677-
true
678-
}
679-
token::LArrow => {
680-
let start_point = self.sess.source_map().start_point(self.token.span);
681-
self.bump_with(
682-
token::BinOp(token::Minus),
683-
self.token.span.with_lo(start_point.hi()),
684-
);
685-
true
686-
}
687-
_ => false,
688-
};
689-
646+
let ate = self.break_and_eat(token::Lt);
690647
if ate {
691648
// See doc comment for `unmatched_angle_bracket_count`.
692649
self.unmatched_angle_bracket_count += 1;
693650
self.max_angle_bracket_count += 1;
694651
debug!("eat_lt: (increment) count={:?}", self.unmatched_angle_bracket_count);
695652
}
696-
697653
ate
698654
}
699655

656+
/// Eats `<` possibly breaking tokens like `<<` in process.
657+
/// Signals an error if `<` was not eaten.
700658
fn expect_lt(&mut self) -> PResult<'a, ()> {
701-
if !self.eat_lt() { self.unexpected() } else { Ok(()) }
659+
if self.eat_lt() { Ok(()) } else { self.unexpected() }
702660
}
703661

704-
/// Expects and consumes a single `>` token. if a `>>` is seen, replaces it
705-
/// with a single `>` and continues. If a `>` is not seen, signals an error.
662+
/// Eats `>` possibly breaking tokens like `>>` in process.
663+
/// Signals an error if `>` was not eaten.
706664
fn expect_gt(&mut self) -> PResult<'a, ()> {
707-
self.expected_tokens.push(TokenType::Token(token::Gt));
708-
let ate = match self.token.kind {
709-
token::Gt => {
710-
self.bump();
711-
Some(())
712-
}
713-
token::BinOp(token::Shr) => {
714-
let start_point = self.sess.source_map().start_point(self.token.span);
715-
Some(self.bump_with(token::Gt, self.token.span.with_lo(start_point.hi())))
716-
}
717-
token::BinOpEq(token::Shr) => {
718-
let start_point = self.sess.source_map().start_point(self.token.span);
719-
Some(self.bump_with(token::Ge, self.token.span.with_lo(start_point.hi())))
720-
}
721-
token::Ge => {
722-
let start_point = self.sess.source_map().start_point(self.token.span);
723-
Some(self.bump_with(token::Eq, self.token.span.with_lo(start_point.hi())))
724-
}
725-
_ => None,
726-
};
727-
728-
match ate {
729-
Some(_) => {
730-
// See doc comment for `unmatched_angle_bracket_count`.
731-
if self.unmatched_angle_bracket_count > 0 {
732-
self.unmatched_angle_bracket_count -= 1;
733-
debug!("expect_gt: (decrement) count={:?}", self.unmatched_angle_bracket_count);
734-
}
735-
736-
Ok(())
665+
if self.break_and_eat(token::Gt) {
666+
// See doc comment for `unmatched_angle_bracket_count`.
667+
if self.unmatched_angle_bracket_count > 0 {
668+
self.unmatched_angle_bracket_count -= 1;
669+
debug!("expect_gt: (decrement) count={:?}", self.unmatched_angle_bracket_count);
737670
}
738-
None => self.unexpected(),
671+
Ok(())
672+
} else {
673+
self.unexpected()
739674
}
740675
}
741676

@@ -903,41 +838,30 @@ impl<'a> Parser<'a> {
903838
}
904839
}
905840

906-
/// Advance the parser by one token.
907-
pub fn bump(&mut self) {
841+
/// Advance the parser by one token using provided token as the next one.
842+
fn bump_with(&mut self, next_token: Token) {
843+
// Bumping after EOF is a bad sign, usually an infinite loop.
908844
if self.prev_token.kind == TokenKind::Eof {
909-
// Bumping after EOF is a bad sign, usually an infinite loop.
910845
let msg = "attempted to bump the parser past EOF (may be stuck in a loop)";
911846
self.span_bug(self.token.span, msg);
912847
}
913848

914849
// Update the current and previous tokens.
915850
self.prev_token = self.token.take();
916851
self.unnormalized_prev_token = self.unnormalized_token.take();
917-
let next_token = self.next_tok(self.unnormalized_prev_token.span);
918852
self.set_token(next_token);
919853

920854
// Update fields derived from the previous token.
921855
self.prev_span = self.unnormalized_prev_token.span;
922856

857+
// Diagnostics.
923858
self.expected_tokens.clear();
924859
}
925860

926-
/// Advances the parser using provided token as a next one. Use this when
927-
/// consuming a part of a token. For example a single `<` from `<<`.
928-
/// FIXME: this function sets the previous token data to some semi-nonsensical values
929-
/// which kind of work because they are currently used in very limited ways in practice.
930-
/// Correct token kinds and spans need to be calculated instead.
931-
fn bump_with(&mut self, next: TokenKind, span: Span) {
932-
// Update the current and previous tokens.
933-
self.prev_token = self.token.take();
934-
self.unnormalized_prev_token = self.unnormalized_token.take();
935-
self.set_token(Token::new(next, span));
936-
937-
// Update fields derived from the previous token.
938-
self.prev_span = self.unnormalized_prev_token.span.with_hi(span.lo());
939-
940-
self.expected_tokens.clear();
861+
/// Advance the parser by one token.
862+
pub fn bump(&mut self) {
863+
let next_token = self.next_tok(self.unnormalized_token.span);
864+
self.bump_with(next_token);
941865
}
942866

943867
/// Look-ahead `dist` tokens of `self.token` and get access to that token there.

src/libsyntax/token.rs

+33
Original file line numberDiff line numberDiff line change
@@ -270,6 +270,39 @@ impl TokenKind {
270270
Literal(Lit::new(kind, symbol, suffix))
271271
}
272272

273+
// An approximation to proc-macro-style single-character operators used by rustc parser.
274+
// If the operator token can be broken into two tokens, the first of which is single-character,
275+
// then this function performs that operation, otherwise it returns `None`.
276+
pub fn break_two_token_op(&self) -> Option<(TokenKind, TokenKind)> {
277+
Some(match *self {
278+
Le => (Lt, Eq),
279+
EqEq => (Eq, Eq),
280+
Ne => (Not, Eq),
281+
Ge => (Gt, Eq),
282+
AndAnd => (BinOp(And), BinOp(And)),
283+
OrOr => (BinOp(Or), BinOp(Or)),
284+
BinOp(Shl) => (Lt, Lt),
285+
BinOp(Shr) => (Gt, Gt),
286+
BinOpEq(Plus) => (BinOp(Plus), Eq),
287+
BinOpEq(Minus) => (BinOp(Minus), Eq),
288+
BinOpEq(Star) => (BinOp(Star), Eq),
289+
BinOpEq(Slash) => (BinOp(Slash), Eq),
290+
BinOpEq(Percent) => (BinOp(Percent), Eq),
291+
BinOpEq(Caret) => (BinOp(Caret), Eq),
292+
BinOpEq(And) => (BinOp(And), Eq),
293+
BinOpEq(Or) => (BinOp(Or), Eq),
294+
BinOpEq(Shl) => (Lt, Le),
295+
BinOpEq(Shr) => (Gt, Ge),
296+
DotDot => (Dot, Dot),
297+
DotDotDot => (Dot, DotDot),
298+
ModSep => (Colon, Colon),
299+
RArrow => (BinOp(Minus), Gt),
300+
LArrow => (Lt, BinOp(Minus)),
301+
FatArrow => (Eq, Gt),
302+
_ => return None,
303+
})
304+
}
305+
273306
/// Returns tokens that are likely to be typed accidentally instead of the current token.
274307
/// Enables better error recovery when the wrong token is found.
275308
pub fn similar_tokens(&self) -> Option<Vec<TokenKind>> {

0 commit comments

Comments
 (0)