From 659b937e445b0673ca31a3b3c2c6a18138b6975d Mon Sep 17 00:00:00 2001 From: Wim Looman Date: Wed, 7 Jun 2023 14:49:30 +0200 Subject: [PATCH 1/2] Fetch readme from source archive if available for crate details page --- src/storage/mod.rs | 2 + src/test/fakes.rs | 23 +++++++++ src/web/crate_details.rs | 104 ++++++++++++++++++++++++++++++++++++--- src/web/source.rs | 14 +++++- 4 files changed, 133 insertions(+), 10 deletions(-) diff --git a/src/storage/mod.rs b/src/storage/mod.rs index c99989ac7..a103c0bd3 100644 --- a/src/storage/mod.rs +++ b/src/storage/mod.rs @@ -13,6 +13,7 @@ use crate::web::metrics::RenderingTimesRecorder; use crate::{db::Pool, Config, InstanceMetrics}; use anyhow::{anyhow, ensure}; use chrono::{DateTime, Utc}; +use fn_error_context::context; use path_slash::PathExt; use std::io::BufReader; use std::num::NonZeroU64; @@ -199,6 +200,7 @@ impl Storage { }) } + #[context("fetching {path} from {name} {version} (archive: {archive_storage})")] pub(crate) fn fetch_source_file( &self, name: &str, diff --git a/src/test/fakes.rs b/src/test/fakes.rs index 5d2583972..e2e52c554 100644 --- a/src/test/fakes.rs +++ b/src/test/fakes.rs @@ -214,6 +214,12 @@ impl<'a> FakeRelease<'a> { self.source_file("README.md", content.as_bytes()) } + /// NOTE: this should be markdown. It will be rendered as HTML when served. + pub(crate) fn readme_only_database(mut self, content: &'a str) -> Self { + self.readme = Some(content); + self + } + pub(crate) fn add_owner(mut self, owner: CrateOwner) -> Self { self.registry_crate_data.owners.push(owner); self @@ -347,6 +353,23 @@ impl<'a> FakeRelease<'a> { debug!("before upload source"); let source_tmp = create_temp_dir(); store_files_into(&self.source_files, source_tmp.path())?; + + if !self + .source_files + .iter() + .any(|&(path, _)| path == "Cargo.toml") + { + let MetadataPackage { name, version, .. } = &package; + let content = format!( + r#" + [package] + name = "{name}" + version = "{version}" + "# + ); + store_files_into(&[("Cargo.toml", content.as_bytes())], source_tmp.path())?; + } + let (source_meta, algs) = upload_files(FileKind::Sources, source_tmp.path())?; debug!("added source files {}", source_meta); diff --git a/src/web/crate_details.rs b/src/web/crate_details.rs index 5a3dbe367..b68ed177a 100644 --- a/src/web/crate_details.rs +++ b/src/web/crate_details.rs @@ -9,8 +9,9 @@ use crate::{ encode_url_path, error::{AxumNope, AxumResult}, }, + Storage, }; -use anyhow::anyhow; +use anyhow::{bail, Context, Result}; use axum::{ extract::{Extension, Path}, response::{IntoResponse, Response as AxumResponse}, @@ -91,6 +92,34 @@ pub struct Release { pub target_name: String, } +#[fn_error_context::context("fetching readme for {name} {version}")] +fn fetch_readme_from_source( + storage: &Storage, + name: &str, + version: &str, + archive_storage: bool, +) -> anyhow::Result { + let manifest = storage.fetch_source_file(name, version, "Cargo.toml", archive_storage)?; + let manifest = String::from_utf8(manifest.content) + .context("parsing Cargo.toml")? + .parse::() + .context("parsing Cargo.toml")?; + let paths = match manifest.get("package").and_then(|p| p.get("readme")) { + Some(toml::Value::Boolean(true)) => vec!["README.md"], + Some(toml::Value::Boolean(false)) => vec![], + Some(toml::Value::String(path)) => vec![path.as_ref()], + _ => vec!["README.md", "README.txt", "README"], + }; + for path in &paths { + if let Ok(readme) = storage.fetch_source_file(name, version, path, archive_storage) { + let readme = String::from_utf8(readme.content) + .with_context(|| format!("parsing {path} content"))?; + return Ok(readme); + } + } + bail!("couldn't find readme in stored source, checked {paths:?}") +} + impl CrateDetails { pub fn new( conn: &mut impl GenericClient, @@ -237,6 +266,13 @@ impl CrateDetails { Ok(Some(crate_details)) } + fn enrich_readme(&mut self, storage: &Storage) -> Result<()> { + let readme = + fetch_readme_from_source(storage, &self.name, &self.version, self.archive_storage)?; + self.readme = Some(readme); + Ok(()) + } + /// Returns the latest non-yanked, non-prerelease release of this crate (or latest /// yanked/prereleased if that is all that exist). pub fn latest_release(&self) -> &Release { @@ -270,7 +306,9 @@ pub(crate) fn releases_for_crate( .into_iter() .filter_map(|row| { let version: String = row.get("version"); - match semver::Version::parse(&version) { + match semver::Version::parse(&version).with_context(|| { + format!("invalid semver in database for crate {crate_id}: {version}") + }) { Ok(semversion) => Some(Release { id: row.get("id"), version: semversion, @@ -281,9 +319,7 @@ pub(crate) fn releases_for_crate( target_name: row.get("target_name"), }), Err(err) => { - report_error(&anyhow!(err).context(format!( - "invalid semver in database for crate {crate_id}: {version}" - ))); + report_error(&err); None } } @@ -310,9 +346,10 @@ pub(crate) struct CrateDetailHandlerParams { version: Option, } -#[tracing::instrument] +#[tracing::instrument(skip(pool, storage))] pub(crate) async fn crate_details_handler( Path(params): Path, + Extension(storage): Extension>, Extension(pool): Extension, Extension(repository_stats_updater): Extension>, ) -> AxumResult { @@ -352,13 +389,19 @@ pub(crate) async fn crate_details_handler( let details = spawn_blocking(move || { let mut conn = pool.get()?; - CrateDetails::new( + let mut details = CrateDetails::new( &mut *conn, ¶ms.name, &version, &version_or_latest, Some(&repository_stats_updater), - ) + )?; + if let Some(ref mut details) = details { + if let Err(e) = details.enrich_readme(&storage) { + tracing::debug!("{e:?}") + } + } + Ok(details) }) .await? .ok_or(AxumNope::VersionNotFound)?; @@ -1111,4 +1154,49 @@ mod tests { Ok(()) }); } + + #[test] + fn readme() { + wrapper(|env| { + env.fake_release() + .name("dummy") + .version("0.1.0") + .readme_only_database("database readme") + .create()?; + + env.fake_release() + .name("dummy") + .version("0.2.0") + .readme_only_database("database readme") + .source_file("README.md", b"storage readme") + .create()?; + + env.fake_release() + .name("dummy") + .version("0.3.0") + .source_file("README.md", b"storage readme") + .create()?; + + env.fake_release() + .name("dummy") + .version("0.4.0") + .readme_only_database("database readme") + .source_file("MEREAD", b"storage meread") + .source_file("Cargo.toml", br#"package.readme = "MEREAD""#) + .create()?; + + let check_readme = |path, content| { + let resp = env.frontend().get(path).send().unwrap(); + let body = String::from_utf8(resp.bytes().unwrap().to_vec()).unwrap(); + assert!(body.contains(content)); + }; + + check_readme("/crate/dummy/0.1.0", "database readme"); + check_readme("/crate/dummy/0.2.0", "storage readme"); + check_readme("/crate/dummy/0.3.0", "storage readme"); + check_readme("/crate/dummy/0.4.0", "storage meread"); + + Ok(()) + }); + } } diff --git a/src/web/source.rs b/src/web/source.rs index a90064feb..26eff321d 100644 --- a/src/web/source.rs +++ b/src/web/source.rs @@ -619,6 +619,7 @@ mod tests { env.fake_release() .name("fake") .version("0.1.0") + .source_file("Cargo.toml", b"") .source_file("config.json", b"{}") .create()?; @@ -637,7 +638,10 @@ mod tests { assert!(text.starts_with(r#""#)); // file list doesn't show "../" - assert_eq!(get_file_list_links(&text), vec!["./config.json"]); + assert_eq!( + get_file_list_links(&text), + vec!["./Cargo.toml", "./config.json"] + ); Ok(()) }); @@ -649,6 +653,7 @@ mod tests { env.fake_release() .name("fake") .version("0.1.0") + .source_file("Cargo.toml", b"some_random_content") .source_file("folder1/some_filename.rs", b"some_random_content") .source_file("folder2/another_filename.rs", b"some_random_content") .source_file("root_filename.rs", b"some_random_content") @@ -665,7 +670,12 @@ mod tests { assert_eq!( get_file_list_links(&response.text()?), - vec!["./folder1/", "./folder2/", "./root_filename.rs"] + vec![ + "./folder1/", + "./folder2/", + "./Cargo.toml", + "./root_filename.rs" + ] ); Ok(()) }); From fae24882f23a895b12d947e025d832e637145edc Mon Sep 17 00:00:00 2001 From: Wim Looman Date: Tue, 1 Aug 2023 13:35:31 +0200 Subject: [PATCH 2/2] Improve handling errors during readme fetch A missing readme is ignored, other errors are reported to sentry --- src/web/crate_details.rs | 85 +++++++++++++++++++++------------------- 1 file changed, 44 insertions(+), 41 deletions(-) diff --git a/src/web/crate_details.rs b/src/web/crate_details.rs index b68ed177a..fb1cb0a61 100644 --- a/src/web/crate_details.rs +++ b/src/web/crate_details.rs @@ -4,6 +4,7 @@ use crate::{ db::Pool, impl_axum_webpage, repositories::RepositoryStatsUpdater, + storage::PathNotFoundError, web::{ cache::CachePolicy, encode_url_path, @@ -11,7 +12,7 @@ use crate::{ }, Storage, }; -use anyhow::{bail, Context, Result}; +use anyhow::{Context, Result}; use axum::{ extract::{Extension, Path}, response::{IntoResponse, Response as AxumResponse}, @@ -92,34 +93,6 @@ pub struct Release { pub target_name: String, } -#[fn_error_context::context("fetching readme for {name} {version}")] -fn fetch_readme_from_source( - storage: &Storage, - name: &str, - version: &str, - archive_storage: bool, -) -> anyhow::Result { - let manifest = storage.fetch_source_file(name, version, "Cargo.toml", archive_storage)?; - let manifest = String::from_utf8(manifest.content) - .context("parsing Cargo.toml")? - .parse::() - .context("parsing Cargo.toml")?; - let paths = match manifest.get("package").and_then(|p| p.get("readme")) { - Some(toml::Value::Boolean(true)) => vec!["README.md"], - Some(toml::Value::Boolean(false)) => vec![], - Some(toml::Value::String(path)) => vec![path.as_ref()], - _ => vec!["README.md", "README.txt", "README"], - }; - for path in &paths { - if let Ok(readme) = storage.fetch_source_file(name, version, path, archive_storage) { - let readme = String::from_utf8(readme.content) - .with_context(|| format!("parsing {path} content"))?; - return Ok(readme); - } - } - bail!("couldn't find readme in stored source, checked {paths:?}") -} - impl CrateDetails { pub fn new( conn: &mut impl GenericClient, @@ -266,11 +239,40 @@ impl CrateDetails { Ok(Some(crate_details)) } - fn enrich_readme(&mut self, storage: &Storage) -> Result<()> { - let readme = - fetch_readme_from_source(storage, &self.name, &self.version, self.archive_storage)?; - self.readme = Some(readme); - Ok(()) + #[fn_error_context::context("fetching readme for {} {}", self.name, self.version)] + fn fetch_readme(&self, storage: &Storage) -> anyhow::Result> { + let manifest = storage.fetch_source_file( + &self.name, + &self.version, + "Cargo.toml", + self.archive_storage, + )?; + let manifest = String::from_utf8(manifest.content) + .context("parsing Cargo.toml")? + .parse::() + .context("parsing Cargo.toml")?; + let paths = match manifest.get("package").and_then(|p| p.get("readme")) { + Some(toml::Value::Boolean(true)) => vec!["README.md"], + Some(toml::Value::Boolean(false)) => vec![], + Some(toml::Value::String(path)) => vec![path.as_ref()], + _ => vec!["README.md", "README.txt", "README"], + }; + for path in &paths { + match storage.fetch_source_file(&self.name, &self.version, path, self.archive_storage) { + Ok(readme) => { + let readme = String::from_utf8(readme.content) + .with_context(|| format!("parsing {path} content"))?; + return Ok(Some(readme)); + } + Err(err) if err.is::() => { + continue; + } + Err(err) => { + return Err(err); + } + } + } + Ok(None) } /// Returns the latest non-yanked, non-prerelease release of this crate (or latest @@ -395,16 +397,17 @@ pub(crate) async fn crate_details_handler( &version, &version_or_latest, Some(&repository_stats_updater), - )?; - if let Some(ref mut details) = details { - if let Err(e) = details.enrich_readme(&storage) { - tracing::debug!("{e:?}") - } + )? + .ok_or(AxumNope::VersionNotFound)?; + + match details.fetch_readme(&storage) { + Ok(readme) => details.readme = readme.or(details.readme), + Err(e) => report_error(&e), } + Ok(details) }) - .await? - .ok_or(AxumNope::VersionNotFound)?; + .await?; let mut res = CrateDetailsPage { details }.into_response(); res.extensions_mut()