|
| 1 | +//! Detects markdown syntax that's different between pulldown-cmark |
| 2 | +//! 0.9 and 0.10. |
| 3 | +
|
| 4 | +use crate::clean::Item; |
| 5 | +use crate::core::DocContext; |
| 6 | +use crate::html::markdown::main_body_opts; |
| 7 | +use pulldown_cmark as cmarko; |
| 8 | +use pulldown_cmark_new as cmarkn; |
| 9 | +use rustc_resolve::rustdoc::source_span_for_markdown_range; |
| 10 | + |
| 11 | +pub(crate) fn visit_item(cx: &DocContext<'_>, item: &Item) { |
| 12 | + let tcx = cx.tcx; |
| 13 | + let Some(hir_id) = DocContext::as_local_hir_id(tcx, item.item_id) else { |
| 14 | + // If non-local, no need to check anything. |
| 15 | + return; |
| 16 | + }; |
| 17 | + |
| 18 | + let dox = item.doc_value(); |
| 19 | + if dox.is_empty() { |
| 20 | + return; |
| 21 | + } |
| 22 | + |
| 23 | + let link_names = item.link_names(&cx.cache); |
| 24 | + let mut replacer_old = |broken_link: cmarko::BrokenLink<'_>| { |
| 25 | + link_names |
| 26 | + .iter() |
| 27 | + .find(|link| *link.original_text == *broken_link.reference) |
| 28 | + .map(|link| ((*link.href).into(), (*link.new_text).into())) |
| 29 | + }; |
| 30 | + let parser_old = cmarko::Parser::new_with_broken_link_callback( |
| 31 | + &dox, |
| 32 | + main_body_opts(), |
| 33 | + Some(&mut replacer_old) |
| 34 | + ).into_offset_iter() |
| 35 | + // Not worth cleaning up minor "distinctions without difference" in the AST. |
| 36 | + // Text events get chopped up differently between versions. |
| 37 | + // <html> and `code` mistakes are usually covered by unescaped_backticks and html_tags lints. |
| 38 | + .filter(|(event, _event_range)| !matches!(event, cmarko::Event::Code(_) | cmarko::Event::Text(_) | cmarko::Event::Html(_))); |
| 39 | + |
| 40 | + pub fn main_body_opts_new() -> cmarkn::Options { |
| 41 | + cmarkn::Options::ENABLE_TABLES |
| 42 | + | cmarkn::Options::ENABLE_FOOTNOTES |
| 43 | + | cmarkn::Options::ENABLE_STRIKETHROUGH |
| 44 | + | cmarkn::Options::ENABLE_TASKLISTS |
| 45 | + | cmarkn::Options::ENABLE_SMART_PUNCTUATION |
| 46 | + } |
| 47 | + let mut replacer_new = |broken_link: cmarkn::BrokenLink<'_>| { |
| 48 | + link_names |
| 49 | + .iter() |
| 50 | + .find(|link| *link.original_text == *broken_link.reference) |
| 51 | + .map(|link| ((*link.href).into(), (*link.new_text).into())) |
| 52 | + }; |
| 53 | + let parser_new = cmarkn::Parser::new_with_broken_link_callback( |
| 54 | + &dox, |
| 55 | + main_body_opts_new(), |
| 56 | + Some(&mut replacer_new) |
| 57 | + ).into_offset_iter() |
| 58 | + .filter(|(event, _event_range)| !matches!(event, cmarkn::Event::Code(_) | cmarkn::Event::Text(_) | cmarkn::Event::Html(_) | cmarkn::Event::InlineHtml(_))); |
| 59 | + |
| 60 | + let mut reported_an_error = false; |
| 61 | + for ((event_old, event_range_old), (event_new, event_range_new)) in parser_old.zip(parser_new) { |
| 62 | + match (event_old, event_new) { |
| 63 | + | (cmarko::Event::Start(cmarko::Tag::Emphasis), cmarkn::Event::Start(cmarkn::Tag::Emphasis)) |
| 64 | + | (cmarko::Event::Start(cmarko::Tag::Strong), cmarkn::Event::Start(cmarkn::Tag::Strong)) |
| 65 | + | (cmarko::Event::Start(cmarko::Tag::Strikethrough), cmarkn::Event::Start(cmarkn::Tag::Strikethrough)) |
| 66 | + | (cmarko::Event::Start(cmarko::Tag::Link(..)), cmarkn::Event::Start(cmarkn::Tag::Link { .. })) |
| 67 | + | (cmarko::Event::Start(cmarko::Tag::Image(..)), cmarkn::Event::Start(cmarkn::Tag::Image { .. })) |
| 68 | + | (cmarko::Event::End(cmarko::Tag::Emphasis), cmarkn::Event::End(cmarkn::TagEnd::Emphasis)) |
| 69 | + | (cmarko::Event::End(cmarko::Tag::Strong), cmarkn::Event::End(cmarkn::TagEnd::Strong)) |
| 70 | + | (cmarko::Event::End(cmarko::Tag::Strikethrough), cmarkn::Event::End(cmarkn::TagEnd::Strikethrough)) |
| 71 | + | (cmarko::Event::End(cmarko::Tag::Link(..)), cmarkn::Event::End(cmarkn::TagEnd::Link)) |
| 72 | + | (cmarko::Event::End(cmarko::Tag::Image(..)), cmarkn::Event::End(cmarkn::TagEnd::Image)) |
| 73 | + | (cmarko::Event::FootnoteReference(..), cmarkn::Event::FootnoteReference(..)) |
| 74 | + | (cmarko::Event::SoftBreak, cmarkn::Event::SoftBreak) |
| 75 | + | (cmarko::Event::HardBreak, cmarkn::Event::HardBreak) |
| 76 | + if event_range_old == event_range_new => { |
| 77 | + // Matching tags. Do nothing. |
| 78 | + } |
| 79 | + | (cmarko::Event::Start(cmarko::Tag::Paragraph), cmarkn::Event::Start(cmarkn::Tag::Paragraph)) |
| 80 | + | (cmarko::Event::Start(cmarko::Tag::Heading(..)), cmarkn::Event::Start(cmarkn::Tag::Heading { .. })) |
| 81 | + | (cmarko::Event::Start(cmarko::Tag::BlockQuote), cmarkn::Event::Start(cmarkn::Tag::BlockQuote)) |
| 82 | + | (cmarko::Event::Start(cmarko::Tag::CodeBlock(..)), cmarkn::Event::Start(cmarkn::Tag::CodeBlock(..))) |
| 83 | + | (cmarko::Event::Start(cmarko::Tag::List(..)), cmarkn::Event::Start(cmarkn::Tag::List(..))) |
| 84 | + | (cmarko::Event::Start(cmarko::Tag::Item), cmarkn::Event::Start(cmarkn::Tag::Item)) |
| 85 | + | (cmarko::Event::Start(cmarko::Tag::FootnoteDefinition(..)), cmarkn::Event::Start(cmarkn::Tag::FootnoteDefinition(..))) |
| 86 | + | (cmarko::Event::Start(cmarko::Tag::Table(..)), cmarkn::Event::Start(cmarkn::Tag::Table(..))) |
| 87 | + | (cmarko::Event::Start(cmarko::Tag::TableHead), cmarkn::Event::Start(cmarkn::Tag::TableHead)) |
| 88 | + | (cmarko::Event::Start(cmarko::Tag::TableRow), cmarkn::Event::Start(cmarkn::Tag::TableRow)) |
| 89 | + | (cmarko::Event::Start(cmarko::Tag::TableCell), cmarkn::Event::Start(cmarkn::Tag::TableCell)) |
| 90 | + | (cmarko::Event::End(cmarko::Tag::Paragraph), cmarkn::Event::End(cmarkn::TagEnd::Paragraph)) |
| 91 | + | (cmarko::Event::End(cmarko::Tag::Heading(..)), cmarkn::Event::End(cmarkn::TagEnd::Heading(_))) |
| 92 | + | (cmarko::Event::End(cmarko::Tag::BlockQuote), cmarkn::Event::End(cmarkn::TagEnd::BlockQuote)) |
| 93 | + | (cmarko::Event::End(cmarko::Tag::CodeBlock(..)), cmarkn::Event::End(cmarkn::TagEnd::CodeBlock)) |
| 94 | + | (cmarko::Event::End(cmarko::Tag::List(..)), cmarkn::Event::End(cmarkn::TagEnd::List(_))) |
| 95 | + | (cmarko::Event::End(cmarko::Tag::Item), cmarkn::Event::End(cmarkn::TagEnd::Item)) |
| 96 | + | (cmarko::Event::End(cmarko::Tag::FootnoteDefinition(..)), cmarkn::Event::End(cmarkn::TagEnd::FootnoteDefinition)) |
| 97 | + | (cmarko::Event::End(cmarko::Tag::Table(..)), cmarkn::Event::End(cmarkn::TagEnd::Table)) |
| 98 | + | (cmarko::Event::End(cmarko::Tag::TableHead), cmarkn::Event::End(cmarkn::TagEnd::TableHead)) |
| 99 | + | (cmarko::Event::End(cmarko::Tag::TableRow), cmarkn::Event::End(cmarkn::TagEnd::TableRow)) |
| 100 | + | (cmarko::Event::End(cmarko::Tag::TableCell), cmarkn::Event::End(cmarkn::TagEnd::TableCell)) |
| 101 | + => { |
| 102 | + // Matching tags. Do nothing. |
| 103 | + // |
| 104 | + // Parsers sometimes differ in what they consider the "range of an event," |
| 105 | + // even though the event is really the same. Inlines are pretty consistent, |
| 106 | + // but stuff like list items? Not really. |
| 107 | + // |
| 108 | + // Mismatched block elements will usually nest differently, so ignoring it |
| 109 | + // works good enough. |
| 110 | + } |
| 111 | + // If we've already reported an error on the start tag, don't bother on the end tag. |
| 112 | + (cmarko::Event::End(_), _) | (_, cmarkn::Event::End(_)) if reported_an_error => {} |
| 113 | + // Non-matching inline. |
| 114 | + | (cmarko::Event::Start(cmarko::Tag::Link(..)), cmarkn::Event::FootnoteReference(..)) |
| 115 | + | (cmarko::Event::Start(cmarko::Tag::Image(..)), cmarkn::Event::FootnoteReference(..)) |
| 116 | + | (cmarko::Event::FootnoteReference(..), cmarkn::Event::Start(cmarkn::Tag::Link { .. })) |
| 117 | + | (cmarko::Event::FootnoteReference(..), cmarkn::Event::Start(cmarkn::Tag::Image { .. })) if event_range_old == event_range_new => { |
| 118 | + reported_an_error = true; |
| 119 | + // If we can't get a span of the backtick, because it is in a `#[doc = ""]` attribute, |
| 120 | + // use the span of the entire attribute as a fallback. |
| 121 | + let span = source_span_for_markdown_range( |
| 122 | + tcx, |
| 123 | + &dox, |
| 124 | + &event_range_old, |
| 125 | + &item.attrs.doc_strings, |
| 126 | + ).unwrap_or_else(|| item.attr_span(tcx)); |
| 127 | + tcx.node_span_lint(crate::lint::UNPORTABLE_MARKDOWN, hir_id, span, "unportable markdown", |lint| { |
| 128 | + lint.help(format!("syntax ambiguous between footnote and link")); |
| 129 | + }); |
| 130 | + } |
| 131 | + // Non-matching results. |
| 132 | + (event_old, event_new) => { |
| 133 | + reported_an_error = true; |
| 134 | + let (range, range_other, desc, desc_other, tag, tag_other) = if event_range_old.end - event_range_old.start < event_range_new.end - event_range_new.start { |
| 135 | + (event_range_old, event_range_new, "old", "new", format!("{event_old:?}"), format!("{event_new:?}")) |
| 136 | + } else { |
| 137 | + (event_range_new, event_range_old, "new", "old", format!("{event_new:?}"), format!("{event_old:?}")) |
| 138 | + }; |
| 139 | + let (range, tag_other) = if range_other.start <= range.start && range_other.end <= range.end { |
| 140 | + (range_other.start..range.end, tag_other) |
| 141 | + } else { |
| 142 | + (range, format!("nothing")) |
| 143 | + }; |
| 144 | + // If we can't get a span of the backtick, because it is in a `#[doc = ""]` attribute, |
| 145 | + // use the span of the entire attribute as a fallback. |
| 146 | + let span = source_span_for_markdown_range( |
| 147 | + tcx, |
| 148 | + &dox, |
| 149 | + &range, |
| 150 | + &item.attrs.doc_strings, |
| 151 | + ).unwrap_or_else(|| item.attr_span(tcx)); |
| 152 | + tcx.node_span_lint(crate::lint::UNPORTABLE_MARKDOWN, hir_id, span, "unportable markdown", |lint| { |
| 153 | + lint.help(format!("{desc} parser sees {tag}, {desc_other} sees {tag_other}")); |
| 154 | + }); |
| 155 | + } |
| 156 | + } |
| 157 | + } |
| 158 | +} |
0 commit comments