From 3d61c2c4212f86d76000c43707d2ea773bb293be Mon Sep 17 00:00:00 2001 From: Lzu Tao Date: Sat, 11 Jul 2020 02:03:08 +0000 Subject: [PATCH 1/4] Move block comment helps to its own module --- compiler/rustc_ast/src/util/comments.rs | 62 +----------------- .../src/util/comments/block_comment.rs | 64 +++++++++++++++++++ 2 files changed, 66 insertions(+), 60 deletions(-) create mode 100644 compiler/rustc_ast/src/util/comments/block_comment.rs diff --git a/compiler/rustc_ast/src/util/comments.rs b/compiler/rustc_ast/src/util/comments.rs index e97c8cc4562f6..c1d46bd81c519 100644 --- a/compiler/rustc_ast/src/util/comments.rs +++ b/compiler/rustc_ast/src/util/comments.rs @@ -1,6 +1,7 @@ use rustc_span::source_map::SourceMap; use rustc_span::{BytePos, CharPos, FileName, Pos, Symbol}; +mod block_comment; #[cfg(test)] mod tests; @@ -26,66 +27,7 @@ pub struct Comment { /// Makes a doc string more presentable to users. /// Used by rustdoc and perhaps other tools, but not by rustc. pub fn beautify_doc_string(data: Symbol) -> String { - /// remove whitespace-only lines from the start/end of lines - fn vertical_trim(lines: Vec) -> Vec { - let mut i = 0; - let mut j = lines.len(); - // first line of all-stars should be omitted - if !lines.is_empty() && lines[0].chars().all(|c| c == '*') { - i += 1; - } - - while i < j && lines[i].trim().is_empty() { - i += 1; - } - // like the first, a last line of all stars should be omitted - if j > i && lines[j - 1].chars().skip(1).all(|c| c == '*') { - j -= 1; - } - - while j > i && lines[j - 1].trim().is_empty() { - j -= 1; - } - - lines[i..j].to_vec() - } - - /// remove a "[ \t]*\*" block from each line, if possible - fn horizontal_trim(lines: Vec) -> Vec { - let mut i = usize::MAX; - let mut can_trim = true; - let mut first = true; - - for line in &lines { - for (j, c) in line.chars().enumerate() { - if j > i || !"* \t".contains(c) { - can_trim = false; - break; - } - if c == '*' { - if first { - i = j; - first = false; - } else if i != j { - can_trim = false; - } - break; - } - } - if i >= line.len() { - can_trim = false; - } - if !can_trim { - break; - } - } - - if can_trim { - lines.iter().map(|line| (&line[i + 1..line.len()]).to_string()).collect() - } else { - lines - } - } + use block_comment::{horizontal_trim, vertical_trim}; let data = data.as_str(); if data.contains('\n') { diff --git a/compiler/rustc_ast/src/util/comments/block_comment.rs b/compiler/rustc_ast/src/util/comments/block_comment.rs new file mode 100644 index 0000000000000..dbb74f90d4087 --- /dev/null +++ b/compiler/rustc_ast/src/util/comments/block_comment.rs @@ -0,0 +1,64 @@ +/*! + * Block comment helpers. + */ + +/// remove whitespace-only lines from the start/end of lines +pub fn vertical_trim(lines: Vec) -> Vec { + let mut i = 0; + let mut j = lines.len(); + // first line of all-stars should be omitted + if !lines.is_empty() && lines[0].chars().all(|c| c == '*') { + i += 1; + } + + while i < j && lines[i].trim().is_empty() { + i += 1; + } + // like the first, a last line of all stars should be omitted + if j > i && lines[j - 1].chars().skip(1).all(|c| c == '*') { + j -= 1; + } + + while j > i && lines[j - 1].trim().is_empty() { + j -= 1; + } + + lines[i..j].to_vec() +} + +/// remove a "[ \t]*\*" block from each line, if possible +pub fn horizontal_trim(lines: Vec) -> Vec { + let mut i = usize::MAX; + let mut can_trim = true; + let mut first = true; + + for line in &lines { + for (j, c) in line.chars().enumerate() { + if j > i || !"* \t".contains(c) { + can_trim = false; + break; + } + if c == '*' { + if first { + i = j; + first = false; + } else if i != j { + can_trim = false; + } + break; + } + } + if i >= line.len() { + can_trim = false; + } + if !can_trim { + break; + } + } + + if can_trim { + lines.iter().map(|line| (&line[i + 1..line.len()]).to_string()).collect() + } else { + lines + } +} From e82761cb2333464a54d0a698f6abf5f54f024343 Mon Sep 17 00:00:00 2001 From: Lzu Tao Date: Sat, 11 Jul 2020 02:03:08 +0000 Subject: [PATCH 2/4] Modernize doc helper functions --- compiler/rustc_ast/src/util/comments.rs | 10 +- .../src/util/comments/block_comment.rs | 137 ++++++++++------ .../src/util/comments/block_comment/tests.rs | 146 ++++++++++++++++++ 3 files changed, 242 insertions(+), 51 deletions(-) create mode 100644 compiler/rustc_ast/src/util/comments/block_comment/tests.rs diff --git a/compiler/rustc_ast/src/util/comments.rs b/compiler/rustc_ast/src/util/comments.rs index c1d46bd81c519..15d042bfb6690 100644 --- a/compiler/rustc_ast/src/util/comments.rs +++ b/compiler/rustc_ast/src/util/comments.rs @@ -31,10 +31,12 @@ pub fn beautify_doc_string(data: Symbol) -> String { let data = data.as_str(); if data.contains('\n') { - let lines = data.lines().map(|s| s.to_string()).collect::>(); - let lines = vertical_trim(lines); - let lines = horizontal_trim(lines); - lines.join("\n") + let lines = data.lines().collect::>(); + let lines = vertical_trim(&lines); + match horizontal_trim(lines) { + Some(lines) => lines.join("\n"), + None => lines.join("\n"), + } } else { data.to_string() } diff --git a/compiler/rustc_ast/src/util/comments/block_comment.rs b/compiler/rustc_ast/src/util/comments/block_comment.rs index dbb74f90d4087..4aea8338d7224 100644 --- a/compiler/rustc_ast/src/util/comments/block_comment.rs +++ b/compiler/rustc_ast/src/util/comments/block_comment.rs @@ -2,63 +2,106 @@ * Block comment helpers. */ -/// remove whitespace-only lines from the start/end of lines -pub fn vertical_trim(lines: Vec) -> Vec { - let mut i = 0; - let mut j = lines.len(); - // first line of all-stars should be omitted - if !lines.is_empty() && lines[0].chars().all(|c| c == '*') { - i += 1; +#[cfg(test)] +mod tests; + +/******************************************************** + * Skip lines based on the following rules: + * + * * Skip first line of all stars ("*"). + * * Skip consecutive empty lines top-bottom. + * * Skip consecutive empty lines bottom-top. + * * Skip last line contains pattern "^ ?\**$" in regex. + *******************************************************/ +pub fn vertical_trim<'arr, 'row: 'arr>(lines: &'arr [&'row str]) -> &'arr [&'row str] { + let mut region = lines; + if let [first, tail @ ..] = region { + // Skip first line of all-stars. + if first.bytes().all(|c| c == b'*') { + region = tail; + } } - while i < j && lines[i].trim().is_empty() { - i += 1; + // Skip consecutive empty lines. + loop { + match region { + [first, tail @ ..] if first.trim().is_empty() => region = tail, + _ => break, + } } - // like the first, a last line of all stars should be omitted - if j > i && lines[j - 1].chars().skip(1).all(|c| c == '*') { - j -= 1; + + // Skip last line contains pattern "^ ?*\**" in regex. + if let [head @ .., last] = region { + let s = match last.as_bytes() { + [b' ', tail @ ..] => tail, + all => all, + }; + if s.iter().all(|&c| c == b'*') { + region = head; + } } - while j > i && lines[j - 1].trim().is_empty() { - j -= 1; + // Skip consecutive empty lines from last line backward. + loop { + match region { + [head @ .., last] if last.trim().is_empty() => region = head, + _ => break, + } } - lines[i..j].to_vec() + region } -/// remove a "[ \t]*\*" block from each line, if possible -pub fn horizontal_trim(lines: Vec) -> Vec { - let mut i = usize::MAX; - let mut can_trim = true; - let mut first = true; +/// Trim all "\s*\*" prefix from comment: all or nothing. +/// +/// For example, +/// ```text +/// * one two three four five ... +/// * one two three four five ... +/// * one two three four five ... +/// ``` +/// will be trimmed to +/// ```text +/// one two three four five ... +/// one two three four five ... +/// one two three four five ... +/// ``` +pub fn horizontal_trim<'arr, 'row: 'arr>(lines: &'arr [&'row str]) -> Option> { + let prefix = match lines { + [first, ..] => get_prefix(first)?, + _ => return None, + }; - for line in &lines { - for (j, c) in line.chars().enumerate() { - if j > i || !"* \t".contains(c) { - can_trim = false; - break; - } - if c == '*' { - if first { - i = j; - first = false; - } else if i != j { - can_trim = false; - } - break; - } - } - if i >= line.len() { - can_trim = false; - } - if !can_trim { - break; - } + if lines.iter().any(|l| !l.starts_with(prefix)) { + return None; } - if can_trim { - lines.iter().map(|line| (&line[i + 1..line.len()]).to_string()).collect() - } else { - lines - } + let lines = lines + .iter() + // SAFETY: All lines have been checked if it starts with prefix + .map(|l| unsafe { l.get_unchecked(prefix.len()..) }) + .collect(); + Some(lines) +} + +/// Get the prefix with pattern "\s*\*" of input `s`. +fn get_prefix(s: &str) -> Option<&str> { + let mut bytes = s.as_bytes(); + let dst: *const u8 = loop { + match bytes { + [b' ' | b'\t', end @ ..] => bytes = end, + [b'*', end @ ..] => break end.as_ptr(), + _ => return None, + } + }; + let prefix = unsafe { + // SAFETY: Two invariants are followed. + // * length of `prefix` is the diff of two pointer from the same str `s`. + // * lifetime of `prefix` is the same as argument `s`. + let src: *const u8 = s.as_ptr(); + let len = dst as usize - src as usize; + let slice = std::slice::from_raw_parts(src, len); + std::str::from_utf8_unchecked(slice) + }; + Some(prefix) } diff --git a/compiler/rustc_ast/src/util/comments/block_comment/tests.rs b/compiler/rustc_ast/src/util/comments/block_comment/tests.rs new file mode 100644 index 0000000000000..c2fd368246d23 --- /dev/null +++ b/compiler/rustc_ast/src/util/comments/block_comment/tests.rs @@ -0,0 +1,146 @@ +use super::*; + +// If vertical_trim trim first and last line. +#[test] +fn trim_vertically_first_or_line() { + // Accepted cases + + let inp = &["*********************************", "* This is a module to do foo job."]; + let out = &["* This is a module to do foo job."]; + assert_eq!(vertical_trim(inp), out); + + let inp = &["* This is a module to do foo job.", "*********************************"]; + let out = &["* This is a module to do foo job."]; + assert_eq!(vertical_trim(inp), out); + + let inp = &[ + "*********************************", + "* This is a module to do foo job.", + "*********************************", + ]; + let out = &["* This is a module to do foo job."]; + assert_eq!(vertical_trim(inp), out); + + let inp = &[ + "***********************", + "* This is a module to do foo job.", + "*********************************", + ]; + let out = &["* This is a module to do foo job."]; + assert_eq!(vertical_trim(inp), out); + + let inp = &[ + "**************************", + " * one two three four five six seven", + " ****************", + ]; + let out = &[" * one two three four five six seven"]; + assert_eq!(vertical_trim(inp), out); + + let inp = &["", " * one two three four five", " "]; + let out = &[" * one two three four five"]; + assert_eq!(vertical_trim(inp), out); + + // Non-accepted cases + + let inp = &["\t *********************** \t", "* This is a module to do foo job."]; + let out = &["\t *********************** \t", "* This is a module to do foo job."]; + assert_eq!(vertical_trim(inp), out); + + // More than one space indentation. + let inp = &[ + "******************************", + " * This is a module to do foo job.", + " **************", + ]; + let out = &[" * This is a module to do foo job.", " **************"]; + assert_eq!(vertical_trim(inp), out); +} + +// Trim consecutive empty lines. Break if meet a non-empty line. +#[test] +fn trim_vertically_empty_lines_forward() { + let inp = &[" ", " \t \t ", " * One two three four five six seven eight nine ten."]; + let out = &[" * One two three four five six seven eight nine ten."]; + assert_eq!(vertical_trim(inp), out); + + let inp = &[ + " ", + " * One two three four five six seven eight nine ten.", + " \t \t ", + " * One two three four five six seven eight nine ten.", + ]; + let out = &[ + " * One two three four five six seven eight nine ten.", + " \t \t ", + " * One two three four five six seven eight nine ten.", + ]; + assert_eq!(vertical_trim(inp), out); +} + +// Trim consecutive empty lines bottom-top. Break if meet a non-empty line. +#[test] +fn trim_vertically_empty_lines_backward() { + let inp = &[" * One two three four five six seven eight nine ten.", " ", " \t \t "]; + let out = &[" * One two three four five six seven eight nine ten."]; + assert_eq!(vertical_trim(inp), out); + + let inp = &[ + " * One two three four five six seven eight nine ten.", + " ", + " * One two three four five six seven eight nine ten.", + " \t \t ", + ]; + let out = &[ + " * One two three four five six seven eight nine ten.", + " ", + " * One two three four five six seven eight nine ten.", + ]; + assert_eq!(vertical_trim(inp), out); +} + +// Test for any panic from wrong indexing. +#[test] +fn trim_vertically_empty() { + let inp = &[""]; + let out: &[&str] = &[]; + assert_eq!(vertical_trim(inp), out); + + let inp: &[&str] = &[]; + let out: &[&str] = &[]; + assert_eq!(vertical_trim(inp), out); +} + +#[test] +fn trim_horizontally() { + let inp = &[ + " \t\t * one two three", + " \t\t * four fix six seven *", + " \t\t * forty two ", + " \t\t ** sixty nine", + ]; + let out: &[&str] = &[" one two three", " four fix six seven *", " forty two ", "* sixty nine"]; + assert_eq!(horizontal_trim(inp).as_deref(), Some(out)); + + // Test that we handle empty collection and collection with one item. + assert_eq!(horizontal_trim(&[]).as_deref(), None); + assert_eq!(horizontal_trim(&[""]).as_deref(), None); + + // Non-accepted: "\t" will not equal to " " + + let inp = &[ + " \t * one two three", + " * four fix six seven *", + " \t * forty two ", + " \t ** sixty nine", + ]; + assert_eq!(horizontal_trim(inp).as_deref(), None); +} + +#[test] +fn test_get_prefix() { + assert_eq!(get_prefix(" \t **"), Some(" \t *")); + assert_eq!(get_prefix("*"), Some("*")); + assert_eq!(get_prefix(" \t ^*"), None); + assert_eq!(get_prefix(" "), None); +} From 535c726ed5b4f00ef3e59e48ea1ccfdf2a536f6a Mon Sep 17 00:00:00 2001 From: Lzu Tao Date: Thu, 3 Sep 2020 11:58:30 +0000 Subject: [PATCH 3/4] Use slice pattern for doc comments in lexer --- compiler/rustc_lexer/src/cursor.rs | 2 +- compiler/rustc_lexer/src/lib.rs | 16 +++++++++------- 2 files changed, 10 insertions(+), 8 deletions(-) diff --git a/compiler/rustc_lexer/src/cursor.rs b/compiler/rustc_lexer/src/cursor.rs index c0045d3f79be1..9010240e3eb0d 100644 --- a/compiler/rustc_lexer/src/cursor.rs +++ b/compiler/rustc_lexer/src/cursor.rs @@ -66,7 +66,7 @@ impl<'a> Cursor<'a> { } /// Returns a `Chars` iterator over the remaining characters. - fn chars(&self) -> Chars<'a> { + pub(crate) fn chars(&self) -> Chars<'a> { self.chars.clone() } diff --git a/compiler/rustc_lexer/src/lib.rs b/compiler/rustc_lexer/src/lib.rs index 44999bbe85713..a6f3c2ccc5d59 100644 --- a/compiler/rustc_lexer/src/lib.rs +++ b/compiler/rustc_lexer/src/lib.rs @@ -422,11 +422,12 @@ impl Cursor<'_> { debug_assert!(self.prev() == '/' && self.first() == '/'); self.bump(); - let doc_style = match self.first() { + let doc_style = match self.chars().as_str().as_bytes() { // `//!` is an inner line doc comment. - '!' => Some(DocStyle::Inner), + [b'!', ..] => Some(DocStyle::Inner), // `////` (more than 3 slashes) is not considered a doc comment. - '/' if self.second() != '/' => Some(DocStyle::Outer), + [b'/', b'/', ..] => None, + [b'/', ..] => Some(DocStyle::Outer), _ => None, }; @@ -438,12 +439,13 @@ impl Cursor<'_> { debug_assert!(self.prev() == '/' && self.first() == '*'); self.bump(); - let doc_style = match self.first() { + let doc_style = match self.chars().as_str().as_bytes() { // `/*!` is an inner block doc comment. - '!' => Some(DocStyle::Inner), + [b'!', ..] => Some(DocStyle::Inner), // `/***` (more than 2 stars) is not considered a doc comment. // `/**/` is not considered a doc comment. - '*' if !matches!(self.second(), '*' | '/') => Some(DocStyle::Outer), + [b'*', b'*' | b'/', ..] => None, + [b'*', ..] => Some(DocStyle::Outer), _ => None, }; @@ -464,7 +466,7 @@ impl Cursor<'_> { break; } } - _ => (), + _ => {} } } From 92521d11eb0b0d2f52c6a77c34e2c56b5622bc49 Mon Sep 17 00:00:00 2001 From: Lzu Tao Date: Thu, 3 Sep 2020 11:59:00 +0000 Subject: [PATCH 4/4] Make it clear about content_end --- compiler/rustc_parse/src/lexer/mod.rs | 20 ++++++++++---------- 1 file changed, 10 insertions(+), 10 deletions(-) diff --git a/compiler/rustc_parse/src/lexer/mod.rs b/compiler/rustc_parse/src/lexer/mod.rs index 034442b798b29..8f6b2b3bf7387 100644 --- a/compiler/rustc_parse/src/lexer/mod.rs +++ b/compiler/rustc_parse/src/lexer/mod.rs @@ -172,17 +172,17 @@ impl<'a> StringReader<'a> { ) .emit(); FatalError.raise(); + } else { + // Skip non-doc comments + let doc_style = doc_style?; + + // Opening delimiter of the length 3 and closing delimiter of the length 2 + // are not included into the symbol. + let content_start = start + BytePos(3); + let content_end = self.pos - BytePos(2); + let content = self.str_from_to(content_start, content_end); + self.cook_doc_comment(content_start, content, CommentKind::Block, doc_style) } - - // Skip non-doc comments - let doc_style = doc_style?; - - // Opening delimiter of the length 3 and closing delimiter of the length 2 - // are not included into the symbol. - let content_start = start + BytePos(3); - let content_end = self.pos - BytePos(if terminated { 2 } else { 0 }); - let content = self.str_from_to(content_start, content_end); - self.cook_doc_comment(content_start, content, CommentKind::Block, doc_style) } rustc_lexer::TokenKind::Whitespace => return None, rustc_lexer::TokenKind::Ident | rustc_lexer::TokenKind::RawIdent => {