-
Notifications
You must be signed in to change notification settings - Fork 13.4k
Add new tool to check HTML #84480
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Add new tool to check HTML #84480
Changes from all commits
2cedb7c
63862e1
48c3f99
c50d9cb
82feb9c
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,12 @@ | ||
[package] | ||
name = "html-checker" | ||
version = "0.1.0" | ||
authors = ["Guillaume Gomez <[email protected]>"] | ||
edition = "2018" | ||
|
||
[[bin]] | ||
name = "html-checker" | ||
path = "main.rs" | ||
|
||
[dependencies] | ||
walkdir = "2" |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,93 @@ | ||
use std::env; | ||
use std::path::Path; | ||
use std::process::{Command, Output}; | ||
|
||
fn check_html_file(file: &Path) -> usize { | ||
let to_mute = &[ | ||
// "disabled" on <link> or "autocomplete" on <select> emit this warning | ||
"PROPRIETARY_ATTRIBUTE", | ||
// It complains when multiple in the same page link to the same anchor for some reason... | ||
GuillaumeGomez marked this conversation as resolved.
Show resolved
Hide resolved
|
||
"ANCHOR_NOT_UNIQUE", | ||
// If a <span> contains only HTML elements and no text, it complains about it. | ||
"TRIM_EMPTY_ELEMENT", | ||
// FIXME: the three next warnings are about <pre> elements which are not supposed to | ||
// contain HTML. The solution here would be to replace them with a <div> | ||
"MISSING_ENDTAG_BEFORE", | ||
"INSERTING_TAG", | ||
"DISCARDING_UNEXPECTED", | ||
]; | ||
let to_mute_s = to_mute.join(","); | ||
let mut command = Command::new("tidy"); | ||
command | ||
.arg("-errors") | ||
.arg("-quiet") | ||
.arg("--mute-id") // this option is useful in case we want to mute more warnings | ||
GuillaumeGomez marked this conversation as resolved.
Show resolved
Hide resolved
|
||
.arg("yes") | ||
.arg("--mute") | ||
.arg(&to_mute_s) | ||
.arg(file); | ||
|
||
let Output { status, stderr, .. } = command.output().expect("failed to run tidy command"); | ||
if status.success() { | ||
0 | ||
} else { | ||
let stderr = String::from_utf8(stderr).expect("String::from_utf8 failed..."); | ||
if stderr.is_empty() && status.code() != Some(2) { | ||
0 | ||
} else { | ||
eprintln!( | ||
GuillaumeGomez marked this conversation as resolved.
Show resolved
Hide resolved
|
||
"=> Errors for `{}` (error code: {}) <=", | ||
file.display(), | ||
status.code().unwrap_or(-1) | ||
); | ||
eprintln!("{}", stderr); | ||
stderr.lines().count() | ||
} | ||
} | ||
} | ||
|
||
const DOCS_TO_CHECK: &[&str] = | ||
&["alloc", "core", "proc_macro", "implementors", "src", "std", "test"]; | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Rather than only checking specific crates, could you instead filter out the books you don't want to check? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. There are A LOT of different ones and I can't know for sure which one will be added. I initially filtered out but it was a huge mess. We don't generate that many crates in the end so I preferred going this way. EDIT: and some of them don't have "book" in their name, making things funnier, like "rustdoc". :3 |
||
|
||
// Returns the number of files read and the number of errors. | ||
fn find_all_html_files(dir: &Path) -> (usize, usize) { | ||
let mut files_read = 0; | ||
let mut errors = 0; | ||
|
||
for entry in walkdir::WalkDir::new(dir).into_iter().filter_entry(|e| { | ||
e.depth() != 1 | ||
|| e.file_name() | ||
.to_str() | ||
.map(|s| DOCS_TO_CHECK.into_iter().any(|d| *d == s)) | ||
.unwrap_or(false) | ||
}) { | ||
let entry = entry.expect("failed to read file"); | ||
if !entry.file_type().is_file() { | ||
continue; | ||
} | ||
let entry = entry.path(); | ||
if entry.extension().and_then(|s| s.to_str()) == Some("html") { | ||
errors += check_html_file(&entry); | ||
files_read += 1; | ||
} | ||
} | ||
(files_read, errors) | ||
} | ||
|
||
fn main() -> Result<(), String> { | ||
let args = env::args().collect::<Vec<_>>(); | ||
if args.len() != 2 { | ||
return Err(format!("Usage: {} <doc folder>", args[0])); | ||
} | ||
|
||
println!("Running HTML checker..."); | ||
|
||
let (files_read, errors) = find_all_html_files(&Path::new(&args[1])); | ||
println!("Done! Read {} files...", files_read); | ||
if errors > 0 { | ||
Err(format!("HTML check failed: {} errors", errors)) | ||
} else { | ||
println!("No error found!"); | ||
Ok(()) | ||
} | ||
} |
Uh oh!
There was an error while loading. Please reload this page.