Recursively expand nonterminals

Aaron1011 · Aaron1011 · commit 2af0218bf1ff · 2020-05-22T15:07:36.000-04:00
diff --git a/src/librustc_ast/tokenstream.rs b/src/librustc_ast/tokenstream.rs
@@ -290,8 +290,6 @@ impl TokenStream {
         t1.next().is_none() && t2.next().is_none()
     }
 
-
-
     pub fn map_enumerated<F: FnMut(usize, TokenTree) -> TokenTree>(self, mut f: F) -> TokenStream {
         TokenStream(Lrc::new(
             self.0
diff --git a/src/librustc_parse/lib.rs b/src/librustc_parse/lib.rs
@@ -7,20 +7,20 @@
 #![feature(or_patterns)]
 
 use rustc_ast::ast;
-use rustc_ast::token::{self, Nonterminal, Token, TokenKind, DelimToken};
-use rustc_ast::tokenstream::{self, TokenStream, TokenTree};
+use rustc_ast::token::{self, DelimToken, Nonterminal, Token, TokenKind};
+use rustc_ast::tokenstream::{self, IsJoint, TokenStream, TokenTree};
 use rustc_ast_pretty::pprust;
 use rustc_data_structures::sync::Lrc;
 use rustc_errors::{Diagnostic, FatalError, Level, PResult};
 use rustc_session::parse::ParseSess;
-use rustc_span::{FileName, SourceFile, Span};
 use rustc_span::symbol::kw;
+use rustc_span::{FileName, SourceFile, Span};
 
+use std::mem;
 use std::path::Path;
 use std::str;
-use std::mem;
 
-use log::info;
+use log::{debug, info};
 
 pub const MACRO_ARGUMENTS: Option<&'static str> = Some("macro arguments");
 
@@ -302,7 +302,7 @@ pub fn nt_to_tokenstream(nt: &Nonterminal, sess: &ParseSess, span: Span) -> Toke
     // modifications, including adding/removing typically non-semantic
     // tokens such as extra braces and commas, don't happen.
     if let Some(tokens) = tokens {
-        if tokenstream_probably_equal_for_proc_macro(&tokens, &tokens_for_real) {
+        if tokenstream_probably_equal_for_proc_macro(&tokens, &tokens_for_real, sess) {
             return tokens;
         }
         info!(
@@ -381,7 +381,11 @@ fn prepend_attrs(
 //
 // This is otherwise the same as `eq_unspanned`, only recursing with a
 // different method.
-pub fn tokenstream_probably_equal_for_proc_macro(first: &TokenStream, other: &TokenStream) -> bool {
+pub fn tokenstream_probably_equal_for_proc_macro(
+    first: &TokenStream,
+    other: &TokenStream,
+    sess: &ParseSess,
+) -> bool {
     // When checking for `probably_eq`, we ignore certain tokens that aren't
     // preserved in the AST. Because they are not preserved, the pretty
     // printer arbitrarily adds or removes them when printing as token
@@ -409,10 +413,83 @@ pub fn tokenstream_probably_equal_for_proc_macro(first: &TokenStream, other: &To
         true
     }
 
-    let mut t1 = first.trees().filter(semantic_tree);
-    let mut t2 = other.trees().filter(semantic_tree);
+    // When comparing two `TokenStream`s, we ignore the `IsJoint` information.
+    //
+    // However, `rustc_parse::lexer::tokentrees::TokenStreamBuilder` will
+    // use `Token.glue` on adjacent tokens with the proper `IsJoint`.
+    // Since we are ignoreing `IsJoint`, a 'glued' token (e.g. `BinOp(Shr)`)
+    // and its 'split'/'unglued' compoenents (e.g. `Gt, Gt`) are equivalent
+    // when determining if two `TokenStream`s are 'probably equal'.
+    //
+    // Therefore, we use `break_two_token_op` to convert all tokens
+    // to the 'unglued' form (if it exists). This ensures that two
+    // `TokenStream`s which differ only in how their tokens are glued
+    // will be considered 'probably equal', which allows us to keep spans.
+    //
+    // This is important when the original `TokenStream` contained
+    // extra spaces (e.g. `f :: < Vec < _ > > ( ) ;'). These extra spaces
+    // will be omitted when we pretty-print, which can cause the original
+    // and reparsed `TokenStream`s to differ in the assignment of `IsJoint`,
+    // leading to some tokens being 'glued' together in one stream but not
+    // the other. See #68489 for more details.
+    fn break_tokens(tree: TokenTree) -> impl Iterator<Item = TokenTree> {
+        // In almost all cases, we should have either zero or one levels
+        // of 'unglueing'. However, in some unusual cases, we may need
+        // to iterate breaking tokens mutliple times. For example:
+        // '[BinOpEq(Shr)] => [Gt, Ge] -> [Gt, Gt, Eq]'
+        let mut token_trees: SmallVec<[_; 2]>;
+        if let TokenTree::Token(token) = &tree {
+            let mut out = SmallVec::<[_; 2]>::new();
+            out.push(token.clone());
+            // Iterate to fixpoint:
+            // * We start off with 'out' containing our initial token, and `temp` empty
+            // * If we are able to break any tokens in `out`, then `out` will have
+            //   at least one more element than 'temp', so we will try to break tokens
+            //   again.
+            // * If we cannot break any tokens in 'out', we are done
+            loop {
+                let mut temp = SmallVec::<[_; 2]>::new();
+                let mut changed = false;
+
+                for token in out.into_iter() {
+                    if let Some((first, second)) = token.kind.break_two_token_op() {
+                        temp.push(Token::new(first, DUMMY_SP));
+                        temp.push(Token::new(second, DUMMY_SP));
+                        changed = true;
+                    } else {
+                        temp.push(token);
+                    }
+                }
+                out = temp;
+                if !changed {
+                    break;
+                }
+            }
+            token_trees = out.into_iter().map(|t| TokenTree::Token(t)).collect();
+            if token_trees.len() != 1 {
+                debug!("break_tokens: broke {:?} to {:?}", tree, token_trees);
+            }
+        } else {
+            token_trees = SmallVec::new();
+            token_trees.push(tree);
+        }
+        token_trees.into_iter()
+    }
+
+    let expand_nt = |tree: TokenTree| {
+        if let TokenTree::Token(Token { kind: TokenKind::Interpolated(nt), span }) = &tree {
+            nt_to_tokenstream(nt, sess, *span).into_trees()
+        } else {
+            TokenStream::new(vec![(tree, IsJoint::NonJoint)]).into_trees()
+        }
+    };
+
+    // Break tokens after we expand any nonterminals, so that we break tokens
+    // that are produced as a result of nonterminal expansion.
+    let mut t1 = first.trees().filter(semantic_tree).flat_map(expand_nt).flat_map(break_tokens);
+    let mut t2 = other.trees().filter(semantic_tree).flat_map(expand_nt).flat_map(break_tokens);
     for (t1, t2) in t1.by_ref().zip(t2.by_ref()) {
-        if !tokentree_probably_equal_for_proc_macro(&t1, &t2) {
+        if !tokentree_probably_equal_for_proc_macro(&t1, &t2, sess) {
             return false;
         }
     }
@@ -471,25 +548,29 @@ crate fn token_probably_equal_for_proc_macro(first: &Token, other: &Token) -> bo
             b == d && (a == c || a == kw::DollarCrate || c == kw::DollarCrate)
         }
 
-        (&Interpolated(_), &Interpolated(_)) => false,
+        // Expanded by `tokenstream_probably_equal_for_proc_macro`
+        (&Interpolated(_), &Interpolated(_)) => unreachable!(),
 
         _ => panic!("forgot to add a token?"),
     }
 }
 
-
 // See comments in `Nonterminal::to_tokenstream` for why we care about
 // *probably* equal here rather than actual equality
 //
 // This is otherwise the same as `eq_unspanned`, only recursing with a
 // different method.
-pub fn tokentree_probably_equal_for_proc_macro(first: &TokenTree, other: &TokenTree) -> bool {
+pub fn tokentree_probably_equal_for_proc_macro(
+    first: &TokenTree,
+    other: &TokenTree,
+    sess: &ParseSess,
+) -> bool {
     match (first, other) {
         (TokenTree::Token(token), TokenTree::Token(token2)) => {
             token_probably_equal_for_proc_macro(token, token2)
         }
         (TokenTree::Delimited(_, delim, tts), TokenTree::Delimited(_, delim2, tts2)) => {
-            delim == delim2 && tokenstream_probably_equal_for_proc_macro(&tts, &tts2)
+            delim == delim2 && tokenstream_probably_equal_for_proc_macro(&tts, &tts2, sess)
         }
         _ => false,
     }

Original file line number	Diff line number	Diff line change
`@@ -290,8 +290,6 @@ impl TokenStream {`
`290`	`290`	`t1.next().is_none() && t2.next().is_none()`
`291`	`291`	`}`
`292`	`292`
`293`		`-`
`294`		`-`
`295`	`293`	`pub fn map_enumerated<F: FnMut(usize, TokenTree) -> TokenTree>(self, mut f: F) -> TokenStream {`
`296`	`294`	`TokenStream(Lrc::new(`
`297`	`295`	`self.0`