Skip to content

Commit 8232109

Browse files
authored
Rollup merge of #80244 - jyn514:spans, r=bugadani
Cleanup markdown span handling 1. Get rid of `locate()` in markdown handling This function was unfortunate for several reasons: - It used `unsafe` because it wanted to tell whether a string came from the same *allocation* as another, not just whether it was a textual match. - It recalculated spans even though they were already available from pulldown - It sometimes *failed* to calculate the span, which meant it was always possible for the span to be `None`, even though in practice that should never happen. This has several cleanups: - Make the span required - Pass through the span from pulldown in the `HeadingLinks` and `Footnotes` iterators - Only add iterator bounds on the `impl Iterator`, not on `new` and the struct itself. 2. Remove unnecessary scope in `markdown_links` I recommend reading a single commit at a time. cc ``@bugadani`` - this will conflict with #77859, I'll try to make sure that gets merged first.
2 parents c4b34ee + 60d5567 commit 8232109

File tree

2 files changed

+94
-103
lines changed

2 files changed

+94
-103
lines changed

src/librustdoc/html/markdown.rs

+66-69
Original file line numberDiff line numberDiff line change
@@ -447,61 +447,61 @@ impl<'a, I: Iterator<Item = Event<'a>>> Iterator for LinkReplacer<'a, I> {
447447
}
448448

449449
/// Make headings links with anchor IDs and build up TOC.
450-
struct HeadingLinks<'a, 'b, 'ids, I: Iterator<Item = Event<'a>>> {
450+
struct HeadingLinks<'a, 'b, 'ids, I> {
451451
inner: I,
452452
toc: Option<&'b mut TocBuilder>,
453-
buf: VecDeque<Event<'a>>,
453+
buf: VecDeque<(Event<'a>, Range<usize>)>,
454454
id_map: &'ids mut IdMap,
455455
}
456456

457-
impl<'a, 'b, 'ids, I: Iterator<Item = Event<'a>>> HeadingLinks<'a, 'b, 'ids, I> {
457+
impl<'a, 'b, 'ids, I> HeadingLinks<'a, 'b, 'ids, I> {
458458
fn new(iter: I, toc: Option<&'b mut TocBuilder>, ids: &'ids mut IdMap) -> Self {
459459
HeadingLinks { inner: iter, toc, buf: VecDeque::new(), id_map: ids }
460460
}
461461
}
462462

463-
impl<'a, 'b, 'ids, I: Iterator<Item = Event<'a>>> Iterator for HeadingLinks<'a, 'b, 'ids, I> {
464-
type Item = Event<'a>;
463+
impl<'a, 'b, 'ids, I: Iterator<Item = (Event<'a>, Range<usize>)>> Iterator
464+
for HeadingLinks<'a, 'b, 'ids, I>
465+
{
466+
type Item = (Event<'a>, Range<usize>);
465467

466468
fn next(&mut self) -> Option<Self::Item> {
467469
if let Some(e) = self.buf.pop_front() {
468470
return Some(e);
469471
}
470472

471473
let event = self.inner.next();
472-
if let Some(Event::Start(Tag::Heading(level))) = event {
474+
if let Some((Event::Start(Tag::Heading(level)), _)) = event {
473475
let mut id = String::new();
474476
for event in &mut self.inner {
475-
match &event {
477+
match &event.0 {
476478
Event::End(Tag::Heading(..)) => break,
479+
Event::Start(Tag::Link(_, _, _)) | Event::End(Tag::Link(..)) => {}
477480
Event::Text(text) | Event::Code(text) => {
478481
id.extend(text.chars().filter_map(slugify));
482+
self.buf.push_back(event);
479483
}
480-
_ => {}
481-
}
482-
match event {
483-
Event::Start(Tag::Link(_, _, _)) | Event::End(Tag::Link(..)) => {}
484-
event => self.buf.push_back(event),
484+
_ => self.buf.push_back(event),
485485
}
486486
}
487487
let id = self.id_map.derive(id);
488488

489489
if let Some(ref mut builder) = self.toc {
490490
let mut html_header = String::new();
491-
html::push_html(&mut html_header, self.buf.iter().cloned());
491+
html::push_html(&mut html_header, self.buf.iter().map(|(ev, _)| ev.clone()));
492492
let sec = builder.push(level as u32, html_header, id.clone());
493-
self.buf.push_front(Event::Html(format!("{} ", sec).into()));
493+
self.buf.push_front((Event::Html(format!("{} ", sec).into()), 0..0));
494494
}
495495

496-
self.buf.push_back(Event::Html(format!("</a></h{}>", level).into()));
496+
self.buf.push_back((Event::Html(format!("</a></h{}>", level).into()), 0..0));
497497

498498
let start_tags = format!(
499499
"<h{level} id=\"{id}\" class=\"section-header\">\
500500
<a href=\"#{id}\">",
501501
id = id,
502502
level = level
503503
);
504-
return Some(Event::Html(start_tags.into()));
504+
return Some((Event::Html(start_tags.into()), 0..0));
505505
}
506506
event
507507
}
@@ -575,39 +575,40 @@ impl<'a, I: Iterator<Item = Event<'a>>> Iterator for SummaryLine<'a, I> {
575575

576576
/// Moves all footnote definitions to the end and add back links to the
577577
/// references.
578-
struct Footnotes<'a, I: Iterator<Item = Event<'a>>> {
578+
struct Footnotes<'a, I> {
579579
inner: I,
580580
footnotes: FxHashMap<String, (Vec<Event<'a>>, u16)>,
581581
}
582582

583-
impl<'a, I: Iterator<Item = Event<'a>>> Footnotes<'a, I> {
583+
impl<'a, I> Footnotes<'a, I> {
584584
fn new(iter: I) -> Self {
585585
Footnotes { inner: iter, footnotes: FxHashMap::default() }
586586
}
587+
587588
fn get_entry(&mut self, key: &str) -> &mut (Vec<Event<'a>>, u16) {
588589
let new_id = self.footnotes.keys().count() + 1;
589590
let key = key.to_owned();
590591
self.footnotes.entry(key).or_insert((Vec::new(), new_id as u16))
591592
}
592593
}
593594

594-
impl<'a, I: Iterator<Item = Event<'a>>> Iterator for Footnotes<'a, I> {
595-
type Item = Event<'a>;
595+
impl<'a, I: Iterator<Item = (Event<'a>, Range<usize>)>> Iterator for Footnotes<'a, I> {
596+
type Item = (Event<'a>, Range<usize>);
596597

597598
fn next(&mut self) -> Option<Self::Item> {
598599
loop {
599600
match self.inner.next() {
600-
Some(Event::FootnoteReference(ref reference)) => {
601+
Some((Event::FootnoteReference(ref reference), range)) => {
601602
let entry = self.get_entry(&reference);
602603
let reference = format!(
603604
"<sup id=\"fnref{0}\"><a href=\"#fn{0}\">{0}</a></sup>",
604605
(*entry).1
605606
);
606-
return Some(Event::Html(reference.into()));
607+
return Some((Event::Html(reference.into()), range));
607608
}
608-
Some(Event::Start(Tag::FootnoteDefinition(def))) => {
609+
Some((Event::Start(Tag::FootnoteDefinition(def)), _)) => {
609610
let mut content = Vec::new();
610-
for event in &mut self.inner {
611+
for (event, _) in &mut self.inner {
611612
if let Event::End(Tag::FootnoteDefinition(..)) = event {
612613
break;
613614
}
@@ -638,7 +639,7 @@ impl<'a, I: Iterator<Item = Event<'a>>> Iterator for Footnotes<'a, I> {
638639
ret.push_str("</li>");
639640
}
640641
ret.push_str("</ol></div>");
641-
return Some(Event::Html(ret.into()));
642+
return Some((Event::Html(ret.into()), 0..0));
642643
} else {
643644
return None;
644645
}
@@ -946,13 +947,14 @@ impl Markdown<'_> {
946947
};
947948

948949
let p = Parser::new_with_broken_link_callback(md, opts(), Some(&mut replacer));
950+
let p = p.into_offset_iter();
949951

950952
let mut s = String::with_capacity(md.len() * 3 / 2);
951953

952954
let p = HeadingLinks::new(p, None, &mut ids);
953-
let p = LinkReplacer::new(p, links);
954-
let p = CodeBlocks::new(p, codes, edition, playground);
955955
let p = Footnotes::new(p);
956+
let p = LinkReplacer::new(p.map(|(ev, _)| ev), links);
957+
let p = CodeBlocks::new(p, codes, edition, playground);
956958
html::push_html(&mut s, p);
957959

958960
s
@@ -963,16 +965,16 @@ impl MarkdownWithToc<'_> {
963965
crate fn into_string(self) -> String {
964966
let MarkdownWithToc(md, mut ids, codes, edition, playground) = self;
965967

966-
let p = Parser::new_ext(md, opts());
968+
let p = Parser::new_ext(md, opts()).into_offset_iter();
967969

968970
let mut s = String::with_capacity(md.len() * 3 / 2);
969971

970972
let mut toc = TocBuilder::new();
971973

972974
{
973975
let p = HeadingLinks::new(p, Some(&mut toc), &mut ids);
974-
let p = CodeBlocks::new(p, codes, edition, playground);
975976
let p = Footnotes::new(p);
977+
let p = CodeBlocks::new(p.map(|(ev, _)| ev), codes, edition, playground);
976978
html::push_html(&mut s, p);
977979
}
978980

@@ -988,19 +990,19 @@ impl MarkdownHtml<'_> {
988990
if md.is_empty() {
989991
return String::new();
990992
}
991-
let p = Parser::new_ext(md, opts());
993+
let p = Parser::new_ext(md, opts()).into_offset_iter();
992994

993995
// Treat inline HTML as plain text.
994-
let p = p.map(|event| match event {
995-
Event::Html(text) => Event::Text(text),
996+
let p = p.map(|event| match event.0 {
997+
Event::Html(text) => (Event::Text(text), event.1),
996998
_ => event,
997999
});
9981000

9991001
let mut s = String::with_capacity(md.len() * 3 / 2);
10001002

10011003
let p = HeadingLinks::new(p, None, &mut ids);
1002-
let p = CodeBlocks::new(p, codes, edition, playground);
10031004
let p = Footnotes::new(p);
1005+
let p = CodeBlocks::new(p.map(|(ev, _)| ev), codes, edition, playground);
10041006
html::push_html(&mut s, p);
10051007

10061008
s
@@ -1153,50 +1155,45 @@ crate fn plain_text_summary(md: &str) -> String {
11531155
s
11541156
}
11551157

1156-
crate fn markdown_links(md: &str) -> Vec<(String, Option<Range<usize>>)> {
1158+
crate fn markdown_links(md: &str) -> Vec<(String, Range<usize>)> {
11571159
if md.is_empty() {
11581160
return vec![];
11591161
}
11601162

11611163
let mut links = vec![];
1164+
// Used to avoid mutable borrow issues in the `push` closure
1165+
// Probably it would be more efficient to use a `RefCell` but it doesn't seem worth the churn.
11621166
let mut shortcut_links = vec![];
11631167

1164-
{
1165-
let locate = |s: &str| unsafe {
1166-
let s_start = s.as_ptr();
1167-
let s_end = s_start.add(s.len());
1168-
let md_start = md.as_ptr();
1169-
let md_end = md_start.add(md.len());
1170-
if md_start <= s_start && s_end <= md_end {
1171-
let start = s_start.offset_from(md_start) as usize;
1172-
let end = s_end.offset_from(md_start) as usize;
1173-
Some(start..end)
1174-
} else {
1175-
None
1176-
}
1177-
};
1178-
1179-
let mut push = |link: BrokenLink<'_>| {
1180-
// FIXME: use `link.span` instead of `locate`
1181-
// (doing it now includes the `[]` as well as the text)
1182-
shortcut_links.push((link.reference.to_owned(), locate(link.reference)));
1183-
None
1184-
};
1185-
let p = Parser::new_with_broken_link_callback(md, opts(), Some(&mut push));
1186-
1187-
// There's no need to thread an IdMap through to here because
1188-
// the IDs generated aren't going to be emitted anywhere.
1189-
let mut ids = IdMap::new();
1190-
let iter = Footnotes::new(HeadingLinks::new(p, None, &mut ids));
1191-
1192-
for ev in iter {
1193-
if let Event::Start(Tag::Link(_, dest, _)) = ev {
1194-
debug!("found link: {}", dest);
1195-
links.push(match dest {
1196-
CowStr::Borrowed(s) => (s.to_owned(), locate(s)),
1197-
s @ (CowStr::Boxed(..) | CowStr::Inlined(..)) => (s.into_string(), None),
1198-
});
1168+
let span_for_link = |link: &str, span: Range<usize>| {
1169+
// Pulldown includes the `[]` as well as the URL. Only highlight the relevant span.
1170+
// NOTE: uses `rfind` in case the title and url are the same: `[Ok][Ok]`
1171+
match md[span.clone()].rfind(link) {
1172+
Some(start) => {
1173+
let start = span.start + start;
1174+
start..start + link.len()
11991175
}
1176+
// This can happen for things other than intra-doc links, like `#1` expanded to `https://github.com/rust-lang/rust/issues/1`.
1177+
None => span,
1178+
}
1179+
};
1180+
let mut push = |link: BrokenLink<'_>| {
1181+
let span = span_for_link(link.reference, link.span);
1182+
shortcut_links.push((link.reference.to_owned(), span));
1183+
None
1184+
};
1185+
let p = Parser::new_with_broken_link_callback(md, opts(), Some(&mut push));
1186+
1187+
// There's no need to thread an IdMap through to here because
1188+
// the IDs generated aren't going to be emitted anywhere.
1189+
let mut ids = IdMap::new();
1190+
let iter = Footnotes::new(HeadingLinks::new(p.into_offset_iter(), None, &mut ids));
1191+
1192+
for ev in iter {
1193+
if let Event::Start(Tag::Link(_, dest, _)) = ev.0 {
1194+
debug!("found link: {}", dest);
1195+
let span = span_for_link(&dest, ev.1);
1196+
links.push((dest.into_string(), span));
12001197
}
12011198
}
12021199

0 commit comments

Comments
 (0)