Skip to content

Commit 2af0218

Browse files
committed
Recursively expand nonterminals
1 parent 7a4c186 commit 2af0218

File tree

2 files changed

+95
-16
lines changed

2 files changed

+95
-16
lines changed

src/librustc_ast/tokenstream.rs

-2
Original file line numberDiff line numberDiff line change
@@ -290,8 +290,6 @@ impl TokenStream {
290290
t1.next().is_none() && t2.next().is_none()
291291
}
292292

293-
294-
295293
pub fn map_enumerated<F: FnMut(usize, TokenTree) -> TokenTree>(self, mut f: F) -> TokenStream {
296294
TokenStream(Lrc::new(
297295
self.0

src/librustc_parse/lib.rs

+95-14
Original file line numberDiff line numberDiff line change
@@ -7,20 +7,20 @@
77
#![feature(or_patterns)]
88

99
use rustc_ast::ast;
10-
use rustc_ast::token::{self, Nonterminal, Token, TokenKind, DelimToken};
11-
use rustc_ast::tokenstream::{self, TokenStream, TokenTree};
10+
use rustc_ast::token::{self, DelimToken, Nonterminal, Token, TokenKind};
11+
use rustc_ast::tokenstream::{self, IsJoint, TokenStream, TokenTree};
1212
use rustc_ast_pretty::pprust;
1313
use rustc_data_structures::sync::Lrc;
1414
use rustc_errors::{Diagnostic, FatalError, Level, PResult};
1515
use rustc_session::parse::ParseSess;
16-
use rustc_span::{FileName, SourceFile, Span};
1716
use rustc_span::symbol::kw;
17+
use rustc_span::{FileName, SourceFile, Span};
1818

19+
use std::mem;
1920
use std::path::Path;
2021
use std::str;
21-
use std::mem;
2222

23-
use log::info;
23+
use log::{debug, info};
2424

2525
pub const MACRO_ARGUMENTS: Option<&'static str> = Some("macro arguments");
2626

@@ -302,7 +302,7 @@ pub fn nt_to_tokenstream(nt: &Nonterminal, sess: &ParseSess, span: Span) -> Toke
302302
// modifications, including adding/removing typically non-semantic
303303
// tokens such as extra braces and commas, don't happen.
304304
if let Some(tokens) = tokens {
305-
if tokenstream_probably_equal_for_proc_macro(&tokens, &tokens_for_real) {
305+
if tokenstream_probably_equal_for_proc_macro(&tokens, &tokens_for_real, sess) {
306306
return tokens;
307307
}
308308
info!(
@@ -381,7 +381,11 @@ fn prepend_attrs(
381381
//
382382
// This is otherwise the same as `eq_unspanned`, only recursing with a
383383
// different method.
384-
pub fn tokenstream_probably_equal_for_proc_macro(first: &TokenStream, other: &TokenStream) -> bool {
384+
pub fn tokenstream_probably_equal_for_proc_macro(
385+
first: &TokenStream,
386+
other: &TokenStream,
387+
sess: &ParseSess,
388+
) -> bool {
385389
// When checking for `probably_eq`, we ignore certain tokens that aren't
386390
// preserved in the AST. Because they are not preserved, the pretty
387391
// printer arbitrarily adds or removes them when printing as token
@@ -409,10 +413,83 @@ pub fn tokenstream_probably_equal_for_proc_macro(first: &TokenStream, other: &To
409413
true
410414
}
411415

412-
let mut t1 = first.trees().filter(semantic_tree);
413-
let mut t2 = other.trees().filter(semantic_tree);
416+
// When comparing two `TokenStream`s, we ignore the `IsJoint` information.
417+
//
418+
// However, `rustc_parse::lexer::tokentrees::TokenStreamBuilder` will
419+
// use `Token.glue` on adjacent tokens with the proper `IsJoint`.
420+
// Since we are ignoreing `IsJoint`, a 'glued' token (e.g. `BinOp(Shr)`)
421+
// and its 'split'/'unglued' compoenents (e.g. `Gt, Gt`) are equivalent
422+
// when determining if two `TokenStream`s are 'probably equal'.
423+
//
424+
// Therefore, we use `break_two_token_op` to convert all tokens
425+
// to the 'unglued' form (if it exists). This ensures that two
426+
// `TokenStream`s which differ only in how their tokens are glued
427+
// will be considered 'probably equal', which allows us to keep spans.
428+
//
429+
// This is important when the original `TokenStream` contained
430+
// extra spaces (e.g. `f :: < Vec < _ > > ( ) ;'). These extra spaces
431+
// will be omitted when we pretty-print, which can cause the original
432+
// and reparsed `TokenStream`s to differ in the assignment of `IsJoint`,
433+
// leading to some tokens being 'glued' together in one stream but not
434+
// the other. See #68489 for more details.
435+
fn break_tokens(tree: TokenTree) -> impl Iterator<Item = TokenTree> {
436+
// In almost all cases, we should have either zero or one levels
437+
// of 'unglueing'. However, in some unusual cases, we may need
438+
// to iterate breaking tokens mutliple times. For example:
439+
// '[BinOpEq(Shr)] => [Gt, Ge] -> [Gt, Gt, Eq]'
440+
let mut token_trees: SmallVec<[_; 2]>;
441+
if let TokenTree::Token(token) = &tree {
442+
let mut out = SmallVec::<[_; 2]>::new();
443+
out.push(token.clone());
444+
// Iterate to fixpoint:
445+
// * We start off with 'out' containing our initial token, and `temp` empty
446+
// * If we are able to break any tokens in `out`, then `out` will have
447+
// at least one more element than 'temp', so we will try to break tokens
448+
// again.
449+
// * If we cannot break any tokens in 'out', we are done
450+
loop {
451+
let mut temp = SmallVec::<[_; 2]>::new();
452+
let mut changed = false;
453+
454+
for token in out.into_iter() {
455+
if let Some((first, second)) = token.kind.break_two_token_op() {
456+
temp.push(Token::new(first, DUMMY_SP));
457+
temp.push(Token::new(second, DUMMY_SP));
458+
changed = true;
459+
} else {
460+
temp.push(token);
461+
}
462+
}
463+
out = temp;
464+
if !changed {
465+
break;
466+
}
467+
}
468+
token_trees = out.into_iter().map(|t| TokenTree::Token(t)).collect();
469+
if token_trees.len() != 1 {
470+
debug!("break_tokens: broke {:?} to {:?}", tree, token_trees);
471+
}
472+
} else {
473+
token_trees = SmallVec::new();
474+
token_trees.push(tree);
475+
}
476+
token_trees.into_iter()
477+
}
478+
479+
let expand_nt = |tree: TokenTree| {
480+
if let TokenTree::Token(Token { kind: TokenKind::Interpolated(nt), span }) = &tree {
481+
nt_to_tokenstream(nt, sess, *span).into_trees()
482+
} else {
483+
TokenStream::new(vec![(tree, IsJoint::NonJoint)]).into_trees()
484+
}
485+
};
486+
487+
// Break tokens after we expand any nonterminals, so that we break tokens
488+
// that are produced as a result of nonterminal expansion.
489+
let mut t1 = first.trees().filter(semantic_tree).flat_map(expand_nt).flat_map(break_tokens);
490+
let mut t2 = other.trees().filter(semantic_tree).flat_map(expand_nt).flat_map(break_tokens);
414491
for (t1, t2) in t1.by_ref().zip(t2.by_ref()) {
415-
if !tokentree_probably_equal_for_proc_macro(&t1, &t2) {
492+
if !tokentree_probably_equal_for_proc_macro(&t1, &t2, sess) {
416493
return false;
417494
}
418495
}
@@ -471,25 +548,29 @@ crate fn token_probably_equal_for_proc_macro(first: &Token, other: &Token) -> bo
471548
b == d && (a == c || a == kw::DollarCrate || c == kw::DollarCrate)
472549
}
473550

474-
(&Interpolated(_), &Interpolated(_)) => false,
551+
// Expanded by `tokenstream_probably_equal_for_proc_macro`
552+
(&Interpolated(_), &Interpolated(_)) => unreachable!(),
475553

476554
_ => panic!("forgot to add a token?"),
477555
}
478556
}
479557

480-
481558
// See comments in `Nonterminal::to_tokenstream` for why we care about
482559
// *probably* equal here rather than actual equality
483560
//
484561
// This is otherwise the same as `eq_unspanned`, only recursing with a
485562
// different method.
486-
pub fn tokentree_probably_equal_for_proc_macro(first: &TokenTree, other: &TokenTree) -> bool {
563+
pub fn tokentree_probably_equal_for_proc_macro(
564+
first: &TokenTree,
565+
other: &TokenTree,
566+
sess: &ParseSess,
567+
) -> bool {
487568
match (first, other) {
488569
(TokenTree::Token(token), TokenTree::Token(token2)) => {
489570
token_probably_equal_for_proc_macro(token, token2)
490571
}
491572
(TokenTree::Delimited(_, delim, tts), TokenTree::Delimited(_, delim2, tts2)) => {
492-
delim == delim2 && tokenstream_probably_equal_for_proc_macro(&tts, &tts2)
573+
delim == delim2 && tokenstream_probably_equal_for_proc_macro(&tts, &tts2, sess)
493574
}
494575
_ => false,
495576
}

0 commit comments

Comments
 (0)