Skip to content

Commit 3d931ab

Browse files
committed
rustdoc: check parsing diffs between pulldown-cmark 0.9.6 and 0.10
This commit is not meant to be merged as-is. It's meant to run in Crater, so that we can estimate the impact of bumping to the new version of the markdown parser.
1 parent 829308e commit 3d931ab

File tree

7 files changed

+287
-0
lines changed

7 files changed

+287
-0
lines changed

Cargo.lock

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -4704,6 +4704,7 @@ dependencies = [
47044704
"itertools",
47054705
"minifier",
47064706
"once_cell",
4707+
"pulldown-cmark 0.10.0",
47074708
"regex",
47084709
"rustdoc-json-types",
47094710
"serde",

src/librustdoc/Cargo.toml

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -13,6 +13,7 @@ itertools = "0.11"
1313
indexmap = "2"
1414
minifier = "0.3.0"
1515
once_cell = "1.10.0"
16+
pulldown-cmark-new = { version = "0.10", package = "pulldown-cmark", default-features = false }
1617
regex = "1"
1718
rustdoc-json-types = { path = "../rustdoc-json-types" }
1819
serde_json = "1.0"

src/librustdoc/lint.rs

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -196,6 +196,14 @@ declare_rustdoc_lint! {
196196
"detects redundant explicit links in doc comments"
197197
}
198198

199+
declare_rustdoc_lint! {
200+
/// This compatibility lint checks for Markdown syntax that works in the old engine but not
201+
/// the new one.
202+
UNPORTABLE_MARKDOWN,
203+
Deny,
204+
"detects markdown that is interpreted differently in different parser"
205+
}
206+
199207
pub(crate) static RUSTDOC_LINTS: Lazy<Vec<&'static Lint>> = Lazy::new(|| {
200208
vec![
201209
BROKEN_INTRA_DOC_LINKS,
@@ -209,6 +217,7 @@ pub(crate) static RUSTDOC_LINTS: Lazy<Vec<&'static Lint>> = Lazy::new(|| {
209217
MISSING_CRATE_LEVEL_DOCS,
210218
UNESCAPED_BACKTICKS,
211219
REDUNDANT_EXPLICIT_LINKS,
220+
UNPORTABLE_MARKDOWN,
212221
]
213222
});
214223

src/librustdoc/passes/lint.rs

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,7 @@ mod check_code_block_syntax;
66
mod html_tags;
77
mod redundant_explicit_links;
88
mod unescaped_backticks;
9+
mod unportable_markdown;
910

1011
use super::Pass;
1112
use crate::clean::*;
@@ -31,6 +32,7 @@ impl<'a, 'tcx> DocVisitor for Linter<'a, 'tcx> {
3132
html_tags::visit_item(self.cx, item);
3233
unescaped_backticks::visit_item(self.cx, item);
3334
redundant_explicit_links::visit_item(self.cx, item);
35+
unportable_markdown::visit_item(self.cx, item);
3436

3537
self.visit_item_recur(item)
3638
}
Lines changed: 158 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,158 @@
1+
//! Detects markdown syntax that's different between pulldown-cmark
2+
//! 0.9 and 0.10.
3+
4+
use crate::clean::Item;
5+
use crate::core::DocContext;
6+
use crate::html::markdown::main_body_opts;
7+
use pulldown_cmark as cmarko;
8+
use pulldown_cmark_new as cmarkn;
9+
use rustc_resolve::rustdoc::source_span_for_markdown_range;
10+
11+
pub(crate) fn visit_item(cx: &DocContext<'_>, item: &Item) {
12+
let tcx = cx.tcx;
13+
let Some(hir_id) = DocContext::as_local_hir_id(tcx, item.item_id) else {
14+
// If non-local, no need to check anything.
15+
return;
16+
};
17+
18+
let dox = item.doc_value();
19+
if dox.is_empty() {
20+
return;
21+
}
22+
23+
let link_names = item.link_names(&cx.cache);
24+
let mut replacer_old = |broken_link: cmarko::BrokenLink<'_>| {
25+
link_names
26+
.iter()
27+
.find(|link| *link.original_text == *broken_link.reference)
28+
.map(|link| ((*link.href).into(), (*link.new_text).into()))
29+
};
30+
let parser_old = cmarko::Parser::new_with_broken_link_callback(
31+
&dox,
32+
main_body_opts(),
33+
Some(&mut replacer_old)
34+
).into_offset_iter()
35+
// Not worth cleaning up minor "distinctions without difference" in the AST.
36+
// Text events get chopped up differently between versions.
37+
// <html> and `code` mistakes are usually covered by unescaped_backticks and html_tags lints.
38+
.filter(|(event, _event_range)| !matches!(event, cmarko::Event::Code(_) | cmarko::Event::Text(_) | cmarko::Event::Html(_)));
39+
40+
pub fn main_body_opts_new() -> cmarkn::Options {
41+
cmarkn::Options::ENABLE_TABLES
42+
| cmarkn::Options::ENABLE_FOOTNOTES
43+
| cmarkn::Options::ENABLE_STRIKETHROUGH
44+
| cmarkn::Options::ENABLE_TASKLISTS
45+
| cmarkn::Options::ENABLE_SMART_PUNCTUATION
46+
}
47+
let mut replacer_new = |broken_link: cmarkn::BrokenLink<'_>| {
48+
link_names
49+
.iter()
50+
.find(|link| *link.original_text == *broken_link.reference)
51+
.map(|link| ((*link.href).into(), (*link.new_text).into()))
52+
};
53+
let parser_new = cmarkn::Parser::new_with_broken_link_callback(
54+
&dox,
55+
main_body_opts_new(),
56+
Some(&mut replacer_new)
57+
).into_offset_iter()
58+
.filter(|(event, _event_range)| !matches!(event, cmarkn::Event::Code(_) | cmarkn::Event::Text(_) | cmarkn::Event::Html(_) | cmarkn::Event::InlineHtml(_)));
59+
60+
let mut reported_an_error = false;
61+
for ((event_old, event_range_old), (event_new, event_range_new)) in parser_old.zip(parser_new) {
62+
match (event_old, event_new) {
63+
| (cmarko::Event::Start(cmarko::Tag::Emphasis), cmarkn::Event::Start(cmarkn::Tag::Emphasis))
64+
| (cmarko::Event::Start(cmarko::Tag::Strong), cmarkn::Event::Start(cmarkn::Tag::Strong))
65+
| (cmarko::Event::Start(cmarko::Tag::Strikethrough), cmarkn::Event::Start(cmarkn::Tag::Strikethrough))
66+
| (cmarko::Event::Start(cmarko::Tag::Link(..)), cmarkn::Event::Start(cmarkn::Tag::Link { .. }))
67+
| (cmarko::Event::Start(cmarko::Tag::Image(..)), cmarkn::Event::Start(cmarkn::Tag::Image { .. }))
68+
| (cmarko::Event::End(cmarko::Tag::Emphasis), cmarkn::Event::End(cmarkn::TagEnd::Emphasis))
69+
| (cmarko::Event::End(cmarko::Tag::Strong), cmarkn::Event::End(cmarkn::TagEnd::Strong))
70+
| (cmarko::Event::End(cmarko::Tag::Strikethrough), cmarkn::Event::End(cmarkn::TagEnd::Strikethrough))
71+
| (cmarko::Event::End(cmarko::Tag::Link(..)), cmarkn::Event::End(cmarkn::TagEnd::Link))
72+
| (cmarko::Event::End(cmarko::Tag::Image(..)), cmarkn::Event::End(cmarkn::TagEnd::Image))
73+
| (cmarko::Event::FootnoteReference(..), cmarkn::Event::FootnoteReference(..))
74+
| (cmarko::Event::SoftBreak, cmarkn::Event::SoftBreak)
75+
| (cmarko::Event::HardBreak, cmarkn::Event::HardBreak)
76+
if event_range_old == event_range_new => {
77+
// Matching tags. Do nothing.
78+
}
79+
| (cmarko::Event::Start(cmarko::Tag::Paragraph), cmarkn::Event::Start(cmarkn::Tag::Paragraph))
80+
| (cmarko::Event::Start(cmarko::Tag::Heading(..)), cmarkn::Event::Start(cmarkn::Tag::Heading { .. }))
81+
| (cmarko::Event::Start(cmarko::Tag::BlockQuote), cmarkn::Event::Start(cmarkn::Tag::BlockQuote))
82+
| (cmarko::Event::Start(cmarko::Tag::CodeBlock(..)), cmarkn::Event::Start(cmarkn::Tag::CodeBlock(..)))
83+
| (cmarko::Event::Start(cmarko::Tag::List(..)), cmarkn::Event::Start(cmarkn::Tag::List(..)))
84+
| (cmarko::Event::Start(cmarko::Tag::Item), cmarkn::Event::Start(cmarkn::Tag::Item))
85+
| (cmarko::Event::Start(cmarko::Tag::FootnoteDefinition(..)), cmarkn::Event::Start(cmarkn::Tag::FootnoteDefinition(..)))
86+
| (cmarko::Event::Start(cmarko::Tag::Table(..)), cmarkn::Event::Start(cmarkn::Tag::Table(..)))
87+
| (cmarko::Event::Start(cmarko::Tag::TableHead), cmarkn::Event::Start(cmarkn::Tag::TableHead))
88+
| (cmarko::Event::Start(cmarko::Tag::TableRow), cmarkn::Event::Start(cmarkn::Tag::TableRow))
89+
| (cmarko::Event::Start(cmarko::Tag::TableCell), cmarkn::Event::Start(cmarkn::Tag::TableCell))
90+
| (cmarko::Event::End(cmarko::Tag::Paragraph), cmarkn::Event::End(cmarkn::TagEnd::Paragraph))
91+
| (cmarko::Event::End(cmarko::Tag::Heading(..)), cmarkn::Event::End(cmarkn::TagEnd::Heading(_)))
92+
| (cmarko::Event::End(cmarko::Tag::BlockQuote), cmarkn::Event::End(cmarkn::TagEnd::BlockQuote))
93+
| (cmarko::Event::End(cmarko::Tag::CodeBlock(..)), cmarkn::Event::End(cmarkn::TagEnd::CodeBlock))
94+
| (cmarko::Event::End(cmarko::Tag::List(..)), cmarkn::Event::End(cmarkn::TagEnd::List(_)))
95+
| (cmarko::Event::End(cmarko::Tag::Item), cmarkn::Event::End(cmarkn::TagEnd::Item))
96+
| (cmarko::Event::End(cmarko::Tag::FootnoteDefinition(..)), cmarkn::Event::End(cmarkn::TagEnd::FootnoteDefinition))
97+
| (cmarko::Event::End(cmarko::Tag::Table(..)), cmarkn::Event::End(cmarkn::TagEnd::Table))
98+
| (cmarko::Event::End(cmarko::Tag::TableHead), cmarkn::Event::End(cmarkn::TagEnd::TableHead))
99+
| (cmarko::Event::End(cmarko::Tag::TableRow), cmarkn::Event::End(cmarkn::TagEnd::TableRow))
100+
| (cmarko::Event::End(cmarko::Tag::TableCell), cmarkn::Event::End(cmarkn::TagEnd::TableCell))
101+
=> {
102+
// Matching tags. Do nothing.
103+
//
104+
// Parsers sometimes differ in what they consider the "range of an event,"
105+
// even though the event is really the same. Inlines are pretty consistent,
106+
// but stuff like list items? Not really.
107+
//
108+
// Mismatched block elements will usually nest differently, so ignoring it
109+
// works good enough.
110+
}
111+
// If we've already reported an error on the start tag, don't bother on the end tag.
112+
(cmarko::Event::End(_), _) | (_, cmarkn::Event::End(_)) if reported_an_error => {}
113+
// Non-matching inline.
114+
| (cmarko::Event::Start(cmarko::Tag::Link(..)), cmarkn::Event::FootnoteReference(..))
115+
| (cmarko::Event::Start(cmarko::Tag::Image(..)), cmarkn::Event::FootnoteReference(..))
116+
| (cmarko::Event::FootnoteReference(..), cmarkn::Event::Start(cmarkn::Tag::Link { .. }))
117+
| (cmarko::Event::FootnoteReference(..), cmarkn::Event::Start(cmarkn::Tag::Image { .. })) if event_range_old == event_range_new => {
118+
reported_an_error = true;
119+
// If we can't get a span of the backtick, because it is in a `#[doc = ""]` attribute,
120+
// use the span of the entire attribute as a fallback.
121+
let span = source_span_for_markdown_range(
122+
tcx,
123+
&dox,
124+
&event_range_old,
125+
&item.attrs.doc_strings,
126+
).unwrap_or_else(|| item.attr_span(tcx));
127+
tcx.node_span_lint(crate::lint::UNPORTABLE_MARKDOWN, hir_id, span, "unportable markdown", |lint| {
128+
lint.help(format!("syntax ambiguous between footnote and link"));
129+
});
130+
}
131+
// Non-matching results.
132+
(event_old, event_new) => {
133+
reported_an_error = true;
134+
let (range, range_other, desc, desc_other, tag, tag_other) = if event_range_old.end - event_range_old.start < event_range_new.end - event_range_new.start {
135+
(event_range_old, event_range_new, "old", "new", format!("{event_old:?}"), format!("{event_new:?}"))
136+
} else {
137+
(event_range_new, event_range_old, "new", "old", format!("{event_new:?}"), format!("{event_old:?}"))
138+
};
139+
let (range, tag_other) = if range_other.start <= range.start && range_other.end <= range.end {
140+
(range_other.start..range.end, tag_other)
141+
} else {
142+
(range, format!("nothing"))
143+
};
144+
// If we can't get a span of the backtick, because it is in a `#[doc = ""]` attribute,
145+
// use the span of the entire attribute as a fallback.
146+
let span = source_span_for_markdown_range(
147+
tcx,
148+
&dox,
149+
&range,
150+
&item.attrs.doc_strings,
151+
).unwrap_or_else(|| item.attr_span(tcx));
152+
tcx.node_span_lint(crate::lint::UNPORTABLE_MARKDOWN, hir_id, span, "unportable markdown", |lint| {
153+
lint.help(format!("{desc} parser sees {tag}, {desc_other} sees {tag_other}"));
154+
});
155+
}
156+
}
157+
}
158+
}
Lines changed: 44 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,44 @@
1+
// https://internals.rust-lang.org/t/proposal-migrate-the-syntax-of-rustdoc-markdown-footnotes-to-be-compatible-with-the-syntax-used-in-github/18929
2+
//
3+
// A series of test cases for CommonMark corner cases that pulldown-cmark 0.10 fixes.
4+
5+
/// <https://github.com/pulldown-cmark/pulldown-cmark/pull/654>
6+
///
7+
/// Test footnote [^foot].
8+
///
9+
/// [^foot]: This is nested within the footnote now, but didn't used to be.
10+
//~^ ERROR unportable markdown
11+
///
12+
/// This is a multi-paragraph footnote.
13+
pub struct GfmFootnotes;
14+
15+
/// <https://github.com/pulldown-cmark/pulldown-cmark/pull/750>
16+
///
17+
/// test [^]
18+
//~^ ERROR unportable markdown
19+
///
20+
/// [^]: test2
21+
pub struct FootnoteEmptyName;
22+
23+
/// <https://github.com/pulldown-cmark/pulldown-cmark/pull/829>
24+
///
25+
/// - _t
26+
/// # test
27+
//~^ ERROR unportable markdown
28+
/// t_
29+
pub struct NestingCornerCase;
30+
31+
/// <https://github.com/pulldown-cmark/pulldown-cmark/pull/650>
32+
///
33+
/// *~~__emphasis strike strong__~~* ~~*__strike emphasis strong__*~~
34+
//~^ ERROR unportable markdown
35+
//~| ERROR unportable markdown
36+
pub struct Emphasis1;
37+
38+
/// <https://github.com/pulldown-cmark/pulldown-cmark/pull/732>
39+
///
40+
/// |
41+
//~^ ERROR unportable markdown
42+
//~| ERROR unportable markdown
43+
/// |
44+
pub struct NotEnoughTable;
Lines changed: 72 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,72 @@
1+
error: unportable markdown
2+
--> $DIR/unportable-markdown.rs:9:5
3+
|
4+
LL | /// [^foot]: This is nested within the footnote now, but didn't used to be.
5+
| _____^
6+
LL | |
7+
LL | | ///
8+
LL | | /// This is a multi-paragraph footnote.
9+
| |___________________________________________^
10+
|
11+
= help: new parser sees Start(Paragraph), old sees End(FootnoteDefinition(Borrowed("foot")))
12+
= note: `#[deny(rustdoc::unportable_markdown)]` on by default
13+
14+
error: unportable markdown
15+
--> $DIR/unportable-markdown.rs:17:10
16+
|
17+
LL | /// test [^]
18+
| ^^^
19+
|
20+
= help: new parser sees Start(Link { link_type: Shortcut, dest_url: Borrowed("test2"), title: Borrowed(""), id: Borrowed("^") }), old sees nothing
21+
22+
error: unportable markdown
23+
--> $DIR/unportable-markdown.rs:26:7
24+
|
25+
LL | /// # test
26+
| _______^
27+
LL | |
28+
LL | | /// t_
29+
| |____^
30+
|
31+
= help: new parser sees Start(Heading { level: H1, id: None, classes: [], attrs: [] }), old sees nothing
32+
33+
error: unportable markdown
34+
--> $DIR/unportable-markdown.rs:33:40
35+
|
36+
LL | /// *~~__emphasis strike strong__~~* ~~*__strike emphasis strong__*~~
37+
| ^^^^^^^^^^^^^^^^^^^^^^^^^^^^
38+
|
39+
= help: old parser sees Start(Emphasis), new sees nothing
40+
41+
error: unportable markdown
42+
--> $DIR/unportable-markdown.rs:33:41
43+
|
44+
LL | /// *~~__emphasis strike strong__~~* ~~*__strike emphasis strong__*~~
45+
| ^^^^^^^^^^^^^^^^^^^^^^^^^^
46+
|
47+
= help: old parser sees Start(Strong), new sees nothing
48+
49+
error: unportable markdown
50+
--> $DIR/unportable-markdown.rs:40:5
51+
|
52+
LL | /// |
53+
| _____^
54+
LL | |
55+
LL | |
56+
| |____^
57+
|
58+
= help: new parser sees Start(Paragraph), old sees Start(Table([]))
59+
60+
error: unportable markdown
61+
--> $DIR/unportable-markdown.rs:40:5
62+
|
63+
LL | /// |
64+
| _____^
65+
LL | |
66+
LL | |
67+
| |___^
68+
|
69+
= help: new parser sees SoftBreak, old sees Start(TableHead)
70+
71+
error: aborting due to 7 previous errors
72+

0 commit comments

Comments
 (0)