From dca3299e26a6bc2674bf31d54e9abb2340b15bf2 Mon Sep 17 00:00:00 2001 From: Ashe Connor Date: Tue, 17 Oct 2017 16:57:30 +1100 Subject: [PATCH 1/6] Record readme filename from cargo transmit --- .../2017-10-17-052814_add_readme_file_to_crates/down.sql | 1 + .../2017-10-17-052814_add_readme_file_to_crates/up.sql | 1 + src/bin/render-readmes.rs | 6 +++--- src/krate/mod.rs | 7 ++++++- src/render.rs | 7 ++++++- src/schema.rs | 6 ++++++ src/tests/all.rs | 2 ++ src/tests/krate.rs | 1 + src/upload.rs | 1 + 9 files changed, 27 insertions(+), 5 deletions(-) create mode 100644 migrations/2017-10-17-052814_add_readme_file_to_crates/down.sql create mode 100644 migrations/2017-10-17-052814_add_readme_file_to_crates/up.sql diff --git a/migrations/2017-10-17-052814_add_readme_file_to_crates/down.sql b/migrations/2017-10-17-052814_add_readme_file_to_crates/down.sql new file mode 100644 index 00000000000..d1b78b569c3 --- /dev/null +++ b/migrations/2017-10-17-052814_add_readme_file_to_crates/down.sql @@ -0,0 +1 @@ +ALTER TABLE crates DROP COLUMN readme_file; diff --git a/migrations/2017-10-17-052814_add_readme_file_to_crates/up.sql b/migrations/2017-10-17-052814_add_readme_file_to_crates/up.sql new file mode 100644 index 00000000000..36ad1e5fd13 --- /dev/null +++ b/migrations/2017-10-17-052814_add_readme_file_to_crates/up.sql @@ -0,0 +1 @@ +ALTER TABLE crates ADD COLUMN readme_file VARCHAR; diff --git a/src/bin/render-readmes.rs b/src/bin/render-readmes.rs index 6c4d58aa685..da25ce5a517 100644 --- a/src/bin/render-readmes.rs +++ b/src/bin/render-readmes.rs @@ -1,5 +1,5 @@ // Iterates over every crate versions ever uploaded and (re-)renders their -// readme using the Markdown renderer from the cargo_registry crate. +// readme using the readme renderer from the cargo_registry crate. // // Warning: this can take a lot of time. @@ -34,7 +34,7 @@ use url::Url; use cargo_registry::{Config, Version}; use cargo_registry::schema::*; -use cargo_registry::render::markdown_to_html; +use cargo_registry::render::readme_to_html; const DEFAULT_PAGE_SIZE: usize = 25; const USAGE: &'static str = " @@ -255,7 +255,7 @@ fn get_readme(config: &Config, version: &Version, krate_name: &str) -> Option, pub documentation: Option, pub readme: Option, + pub readme_file: Option, pub license: Option, pub repository: Option, pub max_upload_size: Option, @@ -82,6 +83,7 @@ type AllColumns = ( crates::homepage, crates::documentation, crates::readme, + crates::readme_file, crates::license, crates::repository, crates::max_upload_size, @@ -97,6 +99,7 @@ pub const ALL_COLUMNS: AllColumns = ( crates::homepage, crates::documentation, crates::readme, + crates::readme_file, crates::license, crates::repository, crates::max_upload_size, @@ -146,6 +149,7 @@ pub struct NewCrate<'a> { pub homepage: Option<&'a str>, pub documentation: Option<&'a str>, pub readme: Option<&'a str>, + pub readme_file: Option<&'a str>, pub repository: Option<&'a str>, pub max_upload_size: Option, pub license: Option<&'a str>, @@ -970,6 +974,7 @@ pub fn new(req: &mut Request) -> CargoResult { homepage: new_crate.homepage.as_ref().map(|s| &**s), documentation: new_crate.documentation.as_ref().map(|s| &**s), readme: new_crate.readme.as_ref().map(|s| &**s), + readme_file: new_crate.readme_file.as_ref().map(|s| &**s), repository: new_crate.repository.as_ref().map(|s| &**s), license: new_crate.license.as_ref().map(|s| &**s), max_upload_size: None, @@ -1028,7 +1033,7 @@ pub fn new(req: &mut Request) -> CargoResult { // Render the README for this crate let readme = match new_crate.readme.as_ref() { - Some(readme) => Some(render::markdown_to_html(&**readme)?), + Some(readme) => Some(render::readme_to_html(&**readme, new_crate.readme_file.as_ref().map_or("README.md", |s| &**s))?), None => None, }; diff --git a/src/render.rs b/src/render.rs index bdc5456ebc1..41efd231aa1 100644 --- a/src/render.rs +++ b/src/render.rs @@ -141,11 +141,16 @@ impl<'a> Default for MarkdownRenderer<'a> { /// let text = "[Rust](https://rust-lang.org/) is an awesome *systems programming* language!"; /// let rendered = markdown_to_html(text)?; /// ``` -pub fn markdown_to_html(text: &str) -> CargoResult { +fn markdown_to_html(text: &str) -> CargoResult { let renderer = MarkdownRenderer::new(); renderer.to_html(text) } +pub fn readme_to_html(text: &str, filename: &str) -> CargoResult { + // Passthrough for now. + markdown_to_html(text) +} + #[cfg(test)] mod tests { use super::*; diff --git a/src/schema.rs b/src/schema.rs index 4dd4ccc40fc..b1b0eda06b8 100644 --- a/src/schema.rs +++ b/src/schema.rs @@ -279,6 +279,12 @@ table! { /// /// (Automatically generated by Diesel.) readme -> Nullable, + /// The `readme_file` column of the `crates` table. + /// + /// Its SQL type is `Nullable`. + /// + /// (Automatically generated by Diesel.) + readme_file -> Nullable, /// The `textsearchable_index_col` column of the `crates` table. /// /// Its SQL type is `TsVector`. diff --git a/src/tests/all.rs b/src/tests/all.rs index 5da065f5033..975e1e95d68 100644 --- a/src/tests/all.rs +++ b/src/tests/all.rs @@ -480,6 +480,7 @@ fn krate(name: &str) -> Crate { homepage: None, description: None, readme: None, + readme_file: None, license: None, repository: None, max_upload_size: None, @@ -648,6 +649,7 @@ fn new_req_body( homepage: krate.homepage, documentation: krate.documentation, readme: krate.readme, + readme_file: krate.readme_file, keywords: Some(u::KeywordList(kws)), categories: Some(u::CategoryList(cats)), license: Some("MIT".to_string()), diff --git a/src/tests/krate.rs b/src/tests/krate.rs index e602ba0c2fb..a78369fa3e2 100644 --- a/src/tests/krate.rs +++ b/src/tests/krate.rs @@ -75,6 +75,7 @@ fn new_crate(name: &str) -> u::NewCrate { homepage: None, documentation: None, readme: None, + readme_file: None, keywords: None, categories: None, license: Some("MIT".to_string()), diff --git a/src/upload.rs b/src/upload.rs index 068b4b479f0..f712089f08f 100644 --- a/src/upload.rs +++ b/src/upload.rs @@ -23,6 +23,7 @@ pub struct NewCrate { pub homepage: Option, pub documentation: Option, pub readme: Option, + pub readme_file: Option, pub keywords: Option, pub categories: Option, pub license: Option, From 12c4762bf4c8d3498393bd5e7881eb945e40fc31 Mon Sep 17 00:00:00 2001 From: Ashe Connor Date: Tue, 17 Oct 2017 17:39:43 +1100 Subject: [PATCH 2/6] Render non-Markdown READMEs as plain text --- Cargo.lock | 7 +++++++ Cargo.toml | 1 + src/krate/mod.rs | 5 ++++- src/lib.rs | 1 + src/render.rs | 40 ++++++++++++++++++++++++++++++++++++++-- 5 files changed, 51 insertions(+), 3 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 0628831d766..432e01a3898 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -151,6 +151,7 @@ dependencies = [ "futures 0.1.16 (registry+https://github.com/rust-lang/crates.io-index)", "git2 0.6.8 (registry+https://github.com/rust-lang/crates.io-index)", "hex 0.2.0 (registry+https://github.com/rust-lang/crates.io-index)", + "htmlescape 0.3.1 (registry+https://github.com/rust-lang/crates.io-index)", "hyper 0.11.2 (registry+https://github.com/rust-lang/crates.io-index)", "hyper-tls 0.1.2 (registry+https://github.com/rust-lang/crates.io-index)", "itertools 0.6.2 (registry+https://github.com/rust-lang/crates.io-index)", @@ -739,6 +740,11 @@ dependencies = [ "syn 0.11.11 (registry+https://github.com/rust-lang/crates.io-index)", ] +[[package]] +name = "htmlescape" +version = "0.3.1" +source = "registry+https://github.com/rust-lang/crates.io-index" + [[package]] name = "httparse" version = "1.2.3" @@ -1896,6 +1902,7 @@ dependencies = [ "checksum git2 0.6.8 (registry+https://github.com/rust-lang/crates.io-index)" = "0c1c0203d653f4140241da0c1375a404f0a397249ec818cd2076c6280c50f6fa" "checksum hex 0.2.0 (registry+https://github.com/rust-lang/crates.io-index)" = "d6a22814455d41612f41161581c2883c0c6a1c41852729b17d5ed88f01e153aa" "checksum html5ever 0.19.0 (registry+https://github.com/rust-lang/crates.io-index)" = "6ba0806f17ce2ea657c67cd28d03941166638c05153fb644aac6d5156b3033d0" +"checksum htmlescape 0.3.1 (registry+https://github.com/rust-lang/crates.io-index)" = "e9025058dae765dee5070ec375f591e2ba14638c63feff74f13805a72e523163" "checksum httparse 1.2.3 (registry+https://github.com/rust-lang/crates.io-index)" = "af2f2dd97457e8fb1ae7c5a420db346af389926e36f43768b96f101546b04a07" "checksum hyper 0.11.2 (registry+https://github.com/rust-lang/crates.io-index)" = "641abc3e3fcf0de41165595f801376e01106bca1fd876dda937730e477ca004c" "checksum hyper-tls 0.1.2 (registry+https://github.com/rust-lang/crates.io-index)" = "9c81fa95203e2a6087242c38691a0210f23e9f3f8f944350bd676522132e2985" diff --git a/Cargo.toml b/Cargo.toml index 39546582f5f..ea0d8ddb69f 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -42,6 +42,7 @@ oauth2 = "0.3" log = "0.3" env_logger = "0.4" hex = "0.2" +htmlescape = "0.3.1" license-exprs = "^1.3" dotenv = "0.10.0" toml = "0.4" diff --git a/src/krate/mod.rs b/src/krate/mod.rs index a1b5e6685c1..857c433882d 100644 --- a/src/krate/mod.rs +++ b/src/krate/mod.rs @@ -1033,7 +1033,10 @@ pub fn new(req: &mut Request) -> CargoResult { // Render the README for this crate let readme = match new_crate.readme.as_ref() { - Some(readme) => Some(render::readme_to_html(&**readme, new_crate.readme_file.as_ref().map_or("README.md", |s| &**s))?), + Some(readme) => Some(render::readme_to_html( + &**readme, + new_crate.readme_file.as_ref().map_or("README.md", |s| &**s), + )?), None => None, }; diff --git a/src/lib.rs b/src/lib.rs index c8dbcd37104..437e164bbdf 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -22,6 +22,7 @@ extern crate dotenv; extern crate flate2; extern crate git2; extern crate hex; +extern crate htmlescape; extern crate lettre; extern crate license_exprs; #[macro_use] diff --git a/src/render.rs b/src/render.rs index 41efd231aa1..9da5d12fe8f 100644 --- a/src/render.rs +++ b/src/render.rs @@ -1,5 +1,6 @@ use ammonia::Ammonia; use comrak; +use htmlescape::encode_minimal; use util::CargoResult; @@ -146,9 +147,24 @@ fn markdown_to_html(text: &str) -> CargoResult { renderer.to_html(text) } +static MARKDOWN_EXTENSIONS: [&'static str; 7] = [ + ".md", + ".markdown", + ".mdown", + ".mdwn", + ".mkd", + ".mkdn", + ".mkdown", +]; + pub fn readme_to_html(text: &str, filename: &str) -> CargoResult { - // Passthrough for now. - markdown_to_html(text) + for e in MARKDOWN_EXTENSIONS.iter() { + if filename.to_lowercase().ends_with(e) { + return markdown_to_html(text); + } + } + + Ok(encode_minimal(text).replace("\n", "
\n")) } #[cfg(test)] @@ -223,4 +239,24 @@ mod tests { let result = markdown_to_html(text).unwrap(); assert_eq!(result, "

Hello World!

\n"); } + + #[test] + fn readme_to_html_renders_markdown() { + for f in &["readme.md", "README.MARKDOWN", "whatever.mkd"] { + assert_eq!( + readme_to_html("*lobster*", f).unwrap(), + "

lobster

\n" + ); + } + } + + #[test] + fn readme_to_html_renders_other_things() { + for f in &["readme", "readem.org", "blah.adoc"] { + assert_eq!( + readme_to_html("\n\nis my friend\n", f).unwrap(), + "<script>lobster</script>
\n
\nis my friend
\n" + ); + } + } } From 633a214100bf1ef222d499cb7dc91b7ece90d13f Mon Sep 17 00:00:00 2001 From: Ashe Connor Date: Tue, 17 Oct 2017 17:50:29 +1100 Subject: [PATCH 3/6] Don't let this exit the container --- app/styles/crate.scss | 1 + 1 file changed, 1 insertion(+) diff --git a/app/styles/crate.scss b/app/styles/crate.scss index 79e6fa3e1b8..dcf8a182532 100644 --- a/app/styles/crate.scss +++ b/app/styles/crate.scss @@ -305,6 +305,7 @@ } .crate-readme { line-height: 1.5; + overflow-wrap: break-word; img { max-width: 100%; From 0b87650ba20e2a56e5fe76a7aa6bb2cdaa2c17fd Mon Sep 17 00:00:00 2001 From: Ashe Connor Date: Tue, 17 Oct 2017 18:55:58 +1100 Subject: [PATCH 4/6] clippy! --- src/render.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/render.rs b/src/render.rs index 9da5d12fe8f..0e7732162cf 100644 --- a/src/render.rs +++ b/src/render.rs @@ -158,7 +158,7 @@ static MARKDOWN_EXTENSIONS: [&'static str; 7] = [ ]; pub fn readme_to_html(text: &str, filename: &str) -> CargoResult { - for e in MARKDOWN_EXTENSIONS.iter() { + for e in &MARKDOWN_EXTENSIONS { if filename.to_lowercase().ends_with(e) { return markdown_to_html(text); } From c00faea03292b1b100623a88f4f751b2bfe691a7 Mon Sep 17 00:00:00 2001 From: Ashe Connor Date: Tue, 17 Oct 2017 21:48:27 +1100 Subject: [PATCH 5/6] Cleanup based on @kureuil's comments --- src/render.rs | 35 +++++++++++++++++++---------------- 1 file changed, 19 insertions(+), 16 deletions(-) diff --git a/src/render.rs b/src/render.rs index 0e7732162cf..52bc8ea45df 100644 --- a/src/render.rs +++ b/src/render.rs @@ -6,13 +6,13 @@ use util::CargoResult; /// Context for markdown to HTML rendering. #[allow(missing_debug_implementations)] -pub struct MarkdownRenderer<'a> { +struct MarkdownRenderer<'a> { html_sanitizer: Ammonia<'a>, } impl<'a> MarkdownRenderer<'a> { /// Creates a new renderer instance. - pub fn new() -> MarkdownRenderer<'a> { + fn new() -> MarkdownRenderer<'a> { let tags = [ "a", "b", @@ -109,7 +109,7 @@ impl<'a> MarkdownRenderer<'a> { } /// Renders the given markdown to HTML using the current settings. - pub fn to_html(&self, text: &str) -> CargoResult { + fn to_html(&self, text: &str) -> CargoResult { let options = comrak::ComrakOptions { ext_autolink: true, ext_strikethrough: true, @@ -129,24 +129,13 @@ impl<'a> Default for MarkdownRenderer<'a> { } } -/// Renders a markdown text to sanitized HTML. -/// -/// The returned text should not contain any harmful HTML tag or attribute (such as iframe, -/// onclick, onmouseover, etc.). -/// -/// # Examples -/// -/// ``` -/// use render::markdown_to_html; -/// -/// let text = "[Rust](https://rust-lang.org/) is an awesome *systems programming* language!"; -/// let rendered = markdown_to_html(text)?; -/// ``` +/// Renders Markdown text to sanitized HTML. fn markdown_to_html(text: &str) -> CargoResult { let renderer = MarkdownRenderer::new(); renderer.to_html(text) } +/// Any readme with a filename ending in one of these extensions will be rendered as Markdown. static MARKDOWN_EXTENSIONS: [&'static str; 7] = [ ".md", ".markdown", @@ -157,6 +146,20 @@ static MARKDOWN_EXTENSIONS: [&'static str; 7] = [ ".mkdown", ]; +/// Renders a readme to sanitized HTML. An appropriate rendering method is chosen depending +/// on the extension of the supplied filename. +/// +/// The returned text should not contain any harmful HTML tag or attribute (such as iframe, +/// onclick, onmouseover, etc.). +/// +/// # Examples +/// +/// ``` +/// use render::render_to_html; +/// +/// let text = "[Rust](https://rust-lang.org/) is an awesome *systems programming* language!"; +/// let rendered = readme_to_html(text, "README.md")?; +/// ``` pub fn readme_to_html(text: &str, filename: &str) -> CargoResult { for e in &MARKDOWN_EXTENSIONS { if filename.to_lowercase().ends_with(e) { From 0dde304cadd06ad4a773bb846936f087bccbe85e Mon Sep 17 00:00:00 2001 From: Ashe Connor Date: Thu, 19 Oct 2017 14:19:28 +1100 Subject: [PATCH 6/6] No extension = assume Markdown. --- src/render.rs | 13 +++++++------ 1 file changed, 7 insertions(+), 6 deletions(-) diff --git a/src/render.rs b/src/render.rs index 52bc8ea45df..93086d80c40 100644 --- a/src/render.rs +++ b/src/render.rs @@ -136,6 +136,7 @@ fn markdown_to_html(text: &str) -> CargoResult { } /// Any readme with a filename ending in one of these extensions will be rendered as Markdown. +/// Note we also render a readme as Markdown if _no_ extension is on the filename. static MARKDOWN_EXTENSIONS: [&'static str; 7] = [ ".md", ".markdown", @@ -161,10 +162,10 @@ static MARKDOWN_EXTENSIONS: [&'static str; 7] = [ /// let rendered = readme_to_html(text, "README.md")?; /// ``` pub fn readme_to_html(text: &str, filename: &str) -> CargoResult { - for e in &MARKDOWN_EXTENSIONS { - if filename.to_lowercase().ends_with(e) { - return markdown_to_html(text); - } + let filename = filename.to_lowercase(); + + if !filename.contains('.') || MARKDOWN_EXTENSIONS.iter().any(|e| filename.ends_with(e)) { + return markdown_to_html(text); } Ok(encode_minimal(text).replace("\n", "
\n")) @@ -245,7 +246,7 @@ mod tests { #[test] fn readme_to_html_renders_markdown() { - for f in &["readme.md", "README.MARKDOWN", "whatever.mkd"] { + for f in &["README", "readme.md", "README.MARKDOWN", "whatever.mkd"] { assert_eq!( readme_to_html("*lobster*", f).unwrap(), "

lobster

\n" @@ -255,7 +256,7 @@ mod tests { #[test] fn readme_to_html_renders_other_things() { - for f in &["readme", "readem.org", "blah.adoc"] { + for f in &["readme.exe", "readem.org", "blah.adoc"] { assert_eq!( readme_to_html("\n\nis my friend\n", f).unwrap(), "<script>lobster</script>
\n
\nis my friend
\n"