From 3d61c2c4212f86d76000c43707d2ea773bb293be Mon Sep 17 00:00:00 2001
From: Lzu Tao <taolzu@gmail.com>
Date: Sat, 11 Jul 2020 02:03:08 +0000
Subject: [PATCH 1/4] Move block comment helps to its own module

---
 compiler/rustc_ast/src/util/comments.rs       | 62 +-----------------
 .../src/util/comments/block_comment.rs        | 64 +++++++++++++++++++
 2 files changed, 66 insertions(+), 60 deletions(-)
 create mode 100644 compiler/rustc_ast/src/util/comments/block_comment.rs
diff --git a/compiler/rustc_ast/src/util/comments.rs b/compiler/rustc_ast/src/util/comments.rs
index e97c8cc4562f6..c1d46bd81c519 100644
--- a/compiler/rustc_ast/src/util/comments.rs
+++ b/compiler/rustc_ast/src/util/comments.rs
@@ -1,6 +1,7 @@
 use rustc_span::source_map::SourceMap;
 use rustc_span::{BytePos, CharPos, FileName, Pos, Symbol};
 
+mod block_comment;
 #[cfg(test)]
 mod tests;
 
@@ -26,66 +27,7 @@ pub struct Comment {
 /// Makes a doc string more presentable to users.
 /// Used by rustdoc and perhaps other tools, but not by rustc.
 pub fn beautify_doc_string(data: Symbol) -> String {
-    /// remove whitespace-only lines from the start/end of lines
-    fn vertical_trim(lines: Vec<String>) -> Vec<String> {
-        let mut i = 0;
-        let mut j = lines.len();
-        // first line of all-stars should be omitted
-        if !lines.is_empty() && lines[0].chars().all(|c| c == '*') {
-            i += 1;
-        }
-
-        while i < j && lines[i].trim().is_empty() {
-            i += 1;
-        }
-        // like the first, a last line of all stars should be omitted
-        if j > i && lines[j - 1].chars().skip(1).all(|c| c == '*') {
-            j -= 1;
-        }
-
-        while j > i && lines[j - 1].trim().is_empty() {
-            j -= 1;
-        }
-
-        lines[i..j].to_vec()
-    }
-
-    /// remove a "[ \t]*\*" block from each line, if possible
-    fn horizontal_trim(lines: Vec<String>) -> Vec<String> {
-        let mut i = usize::MAX;
-        let mut can_trim = true;
-        let mut first = true;
-
-        for line in &lines {
-            for (j, c) in line.chars().enumerate() {
-                if j > i || !"* \t".contains(c) {
-                    can_trim = false;
-                    break;
-                }
-                if c == '*' {
-                    if first {
-                        i = j;
-                        first = false;
-                    } else if i != j {
-                        can_trim = false;
-                    }
-                    break;
-                }
-            }
-            if i >= line.len() {
-                can_trim = false;
-            }
-            if !can_trim {
-                break;
-            }
-        }
-
-        if can_trim {
-            lines.iter().map(|line| (&line[i + 1..line.len()]).to_string()).collect()
-        } else {
-            lines
-        }
-    }
+    use block_comment::{horizontal_trim, vertical_trim};
 
     let data = data.as_str();
     if data.contains('\n') {
diff --git a/compiler/rustc_ast/src/util/comments/block_comment.rs b/compiler/rustc_ast/src/util/comments/block_comment.rs
new file mode 100644
index 0000000000000..dbb74f90d4087
--- /dev/null
+++ b/compiler/rustc_ast/src/util/comments/block_comment.rs
@@ -0,0 +1,64 @@
+/*!
+ * Block comment helpers.
+ */
+
+/// remove whitespace-only lines from the start/end of lines
+pub fn vertical_trim(lines: Vec<String>) -> Vec<String> {
+    let mut i = 0;
+    let mut j = lines.len();
+    // first line of all-stars should be omitted
+    if !lines.is_empty() && lines[0].chars().all(|c| c == '*') {
+        i += 1;
+    }
+
+    while i < j && lines[i].trim().is_empty() {
+        i += 1;
+    }
+    // like the first, a last line of all stars should be omitted
+    if j > i && lines[j - 1].chars().skip(1).all(|c| c == '*') {
+        j -= 1;
+    }
+
+    while j > i && lines[j - 1].trim().is_empty() {
+        j -= 1;
+    }
+
+    lines[i..j].to_vec()
+}
+
+/// remove a "[ \t]*\*" block from each line, if possible
+pub fn horizontal_trim(lines: Vec<String>) -> Vec<String> {
+    let mut i = usize::MAX;
+    let mut can_trim = true;
+    let mut first = true;
+
+    for line in &lines {
+        for (j, c) in line.chars().enumerate() {
+            if j > i || !"* \t".contains(c) {
+                can_trim = false;
+                break;
+            }
+            if c == '*' {
+                if first {
+                    i = j;
+                    first = false;
+                } else if i != j {
+                    can_trim = false;
+                }
+                break;
+            }
+        }
+        if i >= line.len() {
+            can_trim = false;
+        }
+        if !can_trim {
+            break;
+        }
+    }
+
+    if can_trim {
+        lines.iter().map(|line| (&line[i + 1..line.len()]).to_string()).collect()
+    } else {
+        lines
+    }
+}

From e82761cb2333464a54d0a698f6abf5f54f024343 Mon Sep 17 00:00:00 2001
From: Lzu Tao <taolzu@gmail.com>
Date: Sat, 11 Jul 2020 02:03:08 +0000
Subject: [PATCH 2/4] Modernize doc helper functions

---
 compiler/rustc_ast/src/util/comments.rs       |  10 +-
 .../src/util/comments/block_comment.rs        | 137 ++++++++++------
 .../src/util/comments/block_comment/tests.rs  | 146 ++++++++++++++++++
 3 files changed, 242 insertions(+), 51 deletions(-)
 create mode 100644 compiler/rustc_ast/src/util/comments/block_comment/tests.rs

diff --git a/compiler/rustc_ast/src/util/comments.rs b/compiler/rustc_ast/src/util/comments.rs
index c1d46bd81c519..15d042bfb6690 100644
--- a/compiler/rustc_ast/src/util/comments.rs
+++ b/compiler/rustc_ast/src/util/comments.rs
@@ -31,10 +31,12 @@ pub fn beautify_doc_string(data: Symbol) -> String {
 
     let data = data.as_str();
     if data.contains('\n') {
-        let lines = data.lines().map(|s| s.to_string()).collect::<Vec<String>>();
-        let lines = vertical_trim(lines);
-        let lines = horizontal_trim(lines);
-        lines.join("\n")
+        let lines = data.lines().collect::<Vec<&str>>();
+        let lines = vertical_trim(&lines);
+        match horizontal_trim(lines) {
+            Some(lines) => lines.join("\n"),
+            None => lines.join("\n"),
+        }
     } else {
         data.to_string()
     }
diff --git a/compiler/rustc_ast/src/util/comments/block_comment.rs b/compiler/rustc_ast/src/util/comments/block_comment.rs
index dbb74f90d4087..4aea8338d7224 100644
--- a/compiler/rustc_ast/src/util/comments/block_comment.rs
+++ b/compiler/rustc_ast/src/util/comments/block_comment.rs
@@ -2,63 +2,106 @@
  * Block comment helpers.
  */
 
-/// remove whitespace-only lines from the start/end of lines
-pub fn vertical_trim(lines: Vec<String>) -> Vec<String> {
-    let mut i = 0;
-    let mut j = lines.len();
-    // first line of all-stars should be omitted
-    if !lines.is_empty() && lines[0].chars().all(|c| c == '*') {
-        i += 1;
+#[cfg(test)]
+mod tests;
+
+/********************************************************
+ * Skip lines based on the following rules:
+ *
+ * * Skip first line of all stars ("*").
+ * * Skip consecutive empty lines top-bottom.
+ * * Skip consecutive empty lines bottom-top.
+ * * Skip last line contains pattern "^ ?\**$" in regex.
+ *******************************************************/
+pub fn vertical_trim<'arr, 'row: 'arr>(lines: &'arr [&'row str]) -> &'arr [&'row str] {
+    let mut region = lines;
+    if let [first, tail @ ..] = region {
+        // Skip first line of all-stars.
+        if first.bytes().all(|c| c == b'*') {
+            region = tail;
+        }
     }
 
-    while i < j && lines[i].trim().is_empty() {
-        i += 1;
+    // Skip consecutive empty lines.
+    loop {
+        match region {
+            [first, tail @ ..] if first.trim().is_empty() => region = tail,
+            _ => break,
+        }
     }
-    // like the first, a last line of all stars should be omitted
-    if j > i && lines[j - 1].chars().skip(1).all(|c| c == '*') {
-        j -= 1;
+
+    // Skip last line contains pattern "^ ?*\**" in regex.
+    if let [head @ .., last] = region {
+        let s = match last.as_bytes() {
+            [b' ', tail @ ..] => tail,
+            all => all,
+        };
+        if s.iter().all(|&c| c == b'*') {
+            region = head;
+        }
     }
 
-    while j > i && lines[j - 1].trim().is_empty() {
-        j -= 1;
+    // Skip consecutive empty lines from last line backward.
+    loop {
+        match region {
+            [head @ .., last] if last.trim().is_empty() => region = head,
+            _ => break,
+        }
     }
 
-    lines[i..j].to_vec()
+    region
 }
 
-/// remove a "[ \t]*\*" block from each line, if possible
-pub fn horizontal_trim(lines: Vec<String>) -> Vec<String> {
-    let mut i = usize::MAX;
-    let mut can_trim = true;
-    let mut first = true;
+/// Trim all "\s*\*" prefix from comment: all or nothing.
+///
+/// For example,
+/// ```text
+///   * one two three four five ...
+///   * one two three four five ...
+///   * one two three four five ...
+/// ```
+/// will be trimmed to
+/// ```text
+///  one two three four five ...
+///  one two three four five ...
+///  one two three four five ...
+/// ```
+pub fn horizontal_trim<'arr, 'row: 'arr>(lines: &'arr [&'row str]) -> Option<Vec<&'row str>> {
+    let prefix = match lines {
+        [first, ..] => get_prefix(first)?,
+        _ => return None,
+    };
 
-    for line in &lines {
-        for (j, c) in line.chars().enumerate() {
-            if j > i || !"* \t".contains(c) {
-                can_trim = false;
-                break;
-            }
-            if c == '*' {
-                if first {
-                    i = j;
-                    first = false;
-                } else if i != j {
-                    can_trim = false;
-                }
-                break;
-            }
-        }
-        if i >= line.len() {
-            can_trim = false;
-        }
-        if !can_trim {
-            break;
-        }
+    if lines.iter().any(|l| !l.starts_with(prefix)) {
+        return None;
     }
 
-    if can_trim {
-        lines.iter().map(|line| (&line[i + 1..line.len()]).to_string()).collect()
-    } else {
-        lines
-    }
+    let lines = lines
+        .iter()
+        // SAFETY: All lines have been checked if it starts with prefix
+        .map(|l| unsafe { l.get_unchecked(prefix.len()..) })
+        .collect();
+    Some(lines)
+}
+
+/// Get the prefix with pattern "\s*\*" of input `s`.
+fn get_prefix(s: &str) -> Option<&str> {
+    let mut bytes = s.as_bytes();
+    let dst: *const u8 = loop {
+        match bytes {
+            [b' ' | b'\t', end @ ..] => bytes = end,
+            [b'*', end @ ..] => break end.as_ptr(),
+            _ => return None,
+        }
+    };
+    let prefix = unsafe {
+        // SAFETY: Two invariants are followed.
+        // * length of `prefix` is the diff of two pointer from the same str `s`.
+        // * lifetime of `prefix` is the same as argument `s`.
+        let src: *const u8 = s.as_ptr();
+        let len = dst as usize - src as usize;
+        let slice = std::slice::from_raw_parts(src, len);
+        std::str::from_utf8_unchecked(slice)
+    };
+    Some(prefix)
 }
diff --git a/compiler/rustc_ast/src/util/comments/block_comment/tests.rs b/compiler/rustc_ast/src/util/comments/block_comment/tests.rs
new file mode 100644
index 0000000000000..c2fd368246d23
--- /dev/null
+++ b/compiler/rustc_ast/src/util/comments/block_comment/tests.rs
@@ -0,0 +1,146 @@
+use super::*;
+
+// If vertical_trim trim first and last line.
+#[test]
+fn trim_vertically_first_or_line() {
+    // Accepted cases
+
+    let inp = &["*********************************", "* This is a module to do foo job."];
+    let out = &["* This is a module to do foo job."];
+    assert_eq!(vertical_trim(inp), out);
+
+    let inp = &["* This is a module to do foo job.", "*********************************"];
+    let out = &["* This is a module to do foo job."];
+    assert_eq!(vertical_trim(inp), out);
+
+    let inp = &[
+        "*********************************",
+        "* This is a module to do foo job.",
+        "*********************************",
+    ];
+    let out = &["* This is a module to do foo job."];
+    assert_eq!(vertical_trim(inp), out);
+
+    let inp = &[
+        "***********************",
+        "* This is a module to do foo job.",
+        "*********************************",
+    ];
+    let out = &["* This is a module to do foo job."];
+    assert_eq!(vertical_trim(inp), out);
+
+    let inp = &[
+        "**************************",
+        " * one two three four five six seven",
+        " ****************",
+    ];
+    let out = &[" * one two three four five six seven"];
+    assert_eq!(vertical_trim(inp), out);
+
+    let inp = &["", " * one two three four five", " "];
+    let out = &[" * one two three four five"];
+    assert_eq!(vertical_trim(inp), out);
+
+    // Non-accepted cases
+
+    let inp = &["\t  *********************** \t", "* This is a module to do foo job."];
+    let out = &["\t  *********************** \t", "* This is a module to do foo job."];
+    assert_eq!(vertical_trim(inp), out);
+
+    // More than one space indentation.
+    let inp = &[
+        "******************************",
+        "  * This is a module to do foo job.",
+        "  **************",
+    ];
+    let out = &["  * This is a module to do foo job.", "  **************"];
+    assert_eq!(vertical_trim(inp), out);
+}
+
+// Trim consecutive empty lines. Break if meet a non-empty line.
+#[test]
+fn trim_vertically_empty_lines_forward() {
+    let inp = &["    ", "    \t    \t  ", " * One two three four five six seven eight nine ten."];
+    let out = &[" * One two three four five six seven eight nine ten."];
+    assert_eq!(vertical_trim(inp), out);
+
+    let inp = &[
+        "    ",
+        " * One two three four five six seven eight nine ten.",
+        "    \t    \t  ",
+        " * One two three four five six seven eight nine ten.",
+    ];
+    let out = &[
+        " * One two three four five six seven eight nine ten.",
+        "    \t    \t  ",
+        " * One two three four five six seven eight nine ten.",
+    ];
+    assert_eq!(vertical_trim(inp), out);
+}
+
+// Trim consecutive empty lines bottom-top. Break if meet a non-empty line.
+#[test]
+fn trim_vertically_empty_lines_backward() {
+    let inp = &[" * One two three four five six seven eight nine ten.", "    ", "    \t    \t  "];
+    let out = &[" * One two three four five six seven eight nine ten."];
+    assert_eq!(vertical_trim(inp), out);
+
+    let inp = &[
+        " * One two three four five six seven eight nine ten.",
+        "    ",
+        " * One two three four five six seven eight nine ten.",
+        "    \t    \t  ",
+    ];
+    let out = &[
+        " * One two three four five six seven eight nine ten.",
+        "    ",
+        " * One two three four five six seven eight nine ten.",
+    ];
+    assert_eq!(vertical_trim(inp), out);
+}
+
+// Test for any panic from wrong indexing.
+#[test]
+fn trim_vertically_empty() {
+    let inp = &[""];
+    let out: &[&str] = &[];
+    assert_eq!(vertical_trim(inp), out);
+
+    let inp: &[&str] = &[];
+    let out: &[&str] = &[];
+    assert_eq!(vertical_trim(inp), out);
+}
+
+#[test]
+fn trim_horizontally() {
+    let inp = &[
+        " \t\t * one two three",
+        " \t\t * four fix six seven *",
+        " \t\t * forty two ",
+        " \t\t ** sixty nine",
+    ];
+    let out: &[&str] = &[" one two three", " four fix six seven *", " forty two ", "* sixty nine"];
+    assert_eq!(horizontal_trim(inp).as_deref(), Some(out));
+
+    // Test that we handle empty collection and collection with one item.
+    assert_eq!(horizontal_trim(&[]).as_deref(), None);
+    assert_eq!(horizontal_trim(&[""]).as_deref(), None);
+
+    // Non-accepted: "\t" will not equal to " "
+
+    let inp = &[
+        " \t * one two three",
+        "     * four fix six seven *",
+        " \t * forty two ",
+        " \t ** sixty nine",
+    ];
+    assert_eq!(horizontal_trim(inp).as_deref(), None);
+}
+
+#[test]
+fn test_get_prefix() {
+    assert_eq!(get_prefix(" \t **"), Some(" \t *"));
+    assert_eq!(get_prefix("*"), Some("*"));
+    assert_eq!(get_prefix(" \t ^*"), None);
+    assert_eq!(get_prefix("   "), None);
+}

From 535c726ed5b4f00ef3e59e48ea1ccfdf2a536f6a Mon Sep 17 00:00:00 2001
From: Lzu Tao <taolzu@gmail.com>
Date: Thu, 3 Sep 2020 11:58:30 +0000
Subject: [PATCH 3/4] Use slice pattern for doc comments in lexer

---
 compiler/rustc_lexer/src/cursor.rs |  2 +-
 compiler/rustc_lexer/src/lib.rs    | 16 +++++++++-------
 2 files changed, 10 insertions(+), 8 deletions(-)

diff --git a/compiler/rustc_lexer/src/cursor.rs b/compiler/rustc_lexer/src/cursor.rs
index c0045d3f79be1..9010240e3eb0d 100644
--- a/compiler/rustc_lexer/src/cursor.rs
+++ b/compiler/rustc_lexer/src/cursor.rs
@@ -66,7 +66,7 @@ impl<'a> Cursor<'a> {
     }
 
     /// Returns a `Chars` iterator over the remaining characters.
-    fn chars(&self) -> Chars<'a> {
+    pub(crate) fn chars(&self) -> Chars<'a> {
         self.chars.clone()
     }
 
diff --git a/compiler/rustc_lexer/src/lib.rs b/compiler/rustc_lexer/src/lib.rs
index 44999bbe85713..a6f3c2ccc5d59 100644
--- a/compiler/rustc_lexer/src/lib.rs
+++ b/compiler/rustc_lexer/src/lib.rs
@@ -422,11 +422,12 @@ impl Cursor<'_> {
         debug_assert!(self.prev() == '/' && self.first() == '/');
         self.bump();
 
-        let doc_style = match self.first() {
+        let doc_style = match self.chars().as_str().as_bytes() {
             // `//!` is an inner line doc comment.
-            '!' => Some(DocStyle::Inner),
+            [b'!', ..] => Some(DocStyle::Inner),
             // `////` (more than 3 slashes) is not considered a doc comment.
-            '/' if self.second() != '/' => Some(DocStyle::Outer),
+            [b'/', b'/', ..] => None,
+            [b'/', ..] => Some(DocStyle::Outer),
             _ => None,
         };
 
@@ -438,12 +439,13 @@ impl Cursor<'_> {
         debug_assert!(self.prev() == '/' && self.first() == '*');
         self.bump();
 
-        let doc_style = match self.first() {
+        let doc_style = match self.chars().as_str().as_bytes() {
             // `/*!` is an inner block doc comment.
-            '!' => Some(DocStyle::Inner),
+            [b'!', ..] => Some(DocStyle::Inner),
             // `/***` (more than 2 stars) is not considered a doc comment.
             // `/**/` is not considered a doc comment.
-            '*' if !matches!(self.second(), '*' | '/') => Some(DocStyle::Outer),
+            [b'*', b'*' | b'/', ..] => None,
+            [b'*', ..] => Some(DocStyle::Outer),
             _ => None,
         };
 
@@ -464,7 +466,7 @@ impl Cursor<'_> {
                         break;
                     }
                 }
-                _ => (),
+                _ => {}
             }
         }
 

From 92521d11eb0b0d2f52c6a77c34e2c56b5622bc49 Mon Sep 17 00:00:00 2001
From: Lzu Tao <taolzu@gmail.com>
Date: Thu, 3 Sep 2020 11:59:00 +0000
Subject: [PATCH 4/4] Make it clear about content_end

---
 compiler/rustc_parse/src/lexer/mod.rs | 20 ++++++++++----------
 1 file changed, 10 insertions(+), 10 deletions(-)

diff --git a/compiler/rustc_parse/src/lexer/mod.rs b/compiler/rustc_parse/src/lexer/mod.rs
index 034442b798b29..8f6b2b3bf7387 100644
--- a/compiler/rustc_parse/src/lexer/mod.rs
+++ b/compiler/rustc_parse/src/lexer/mod.rs
@@ -172,17 +172,17 @@ impl<'a> StringReader<'a> {
                         )
                         .emit();
                     FatalError.raise();
+                } else {
+                    // Skip non-doc comments
+                    let doc_style = doc_style?;
+
+                    // Opening delimiter of the length 3 and closing delimiter of the length 2
+                    // are not included into the symbol.
+                    let content_start = start + BytePos(3);
+                    let content_end = self.pos - BytePos(2);
+                    let content = self.str_from_to(content_start, content_end);
+                    self.cook_doc_comment(content_start, content, CommentKind::Block, doc_style)
                 }
-
-                // Skip non-doc comments
-                let doc_style = doc_style?;
-
-                // Opening delimiter of the length 3 and closing delimiter of the length 2
-                // are not included into the symbol.
-                let content_start = start + BytePos(3);
-                let content_end = self.pos - BytePos(if terminated { 2 } else { 0 });
-                let content = self.str_from_to(content_start, content_end);
-                self.cook_doc_comment(content_start, content, CommentKind::Block, doc_style)
             }
             rustc_lexer::TokenKind::Whitespace => return None,
             rustc_lexer::TokenKind::Ident | rustc_lexer::TokenKind::RawIdent => {