Skip to content

Do unconditional redirects for downloads when the db is broken #3564

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 4 commits into from
Apr 27, 2021
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
12 changes: 12 additions & 0 deletions Cargo.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

4 changes: 2 additions & 2 deletions Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -81,7 +81,7 @@ sha2 = "0.9"
swirl = { git = "https://github.com/sgrif/swirl.git", rev = "e87cf37" }
tar = "0.4.16"
tempfile = "3"
tokio = { version = "1", features = ["net", "signal", "io-std", "io-util", "rt-multi-thread"]}
tokio = { version = "1.5.0", features = ["net", "signal", "io-std", "io-util", "rt-multi-thread", "macros"]}
toml = "0.5"
tracing = "0.1"
tracing-subscriber = "0.2"
Expand All @@ -92,7 +92,7 @@ claim = "0.5"
conduit-test = "0.9.0-alpha.4"
hyper-tls = "0.5"
lazy_static = "1.0"
tokio = "1"
tokio = "1.5.0"
tower-service = "0.3.0"

[build-dependencies]
Expand Down
60 changes: 34 additions & 26 deletions src/app.rs
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
//! Application-wide components in a struct accessible from each request

use crate::{db, Config, Env};
use crate::db::{ConnectionConfig, DieselPool};
use crate::{Config, Env};
use std::{sync::Arc, time::Duration};

use crate::downloads_counter::DownloadsCounter;
Expand All @@ -18,10 +19,10 @@ use scheduled_thread_pool::ScheduledThreadPool;
#[allow(missing_debug_implementations)]
pub struct App {
/// The primary database connection pool
pub primary_database: db::DieselPool,
pub primary_database: DieselPool,

/// The read-only replica database connection pool
pub read_only_replica_database: Option<db::DieselPool>,
pub read_only_replica_database: Option<DieselPool>,

/// GitHub API client
pub github: GitHubClient,
Expand Down Expand Up @@ -103,38 +104,45 @@ impl App {
_ => 30,
};

let primary_db_connection_config = db::ConnectionConfig {
statement_timeout: db_connection_timeout,
read_only: config.db_primary_config.read_only_mode,
};

let thread_pool = Arc::new(ScheduledThreadPool::new(db_helper_threads));

let primary_db_config = r2d2::Pool::builder()
.max_size(db_pool_size)
.min_idle(db_min_idle)
.connection_timeout(Duration::from_secs(db_connection_timeout))
.connection_customizer(Box::new(primary_db_connection_config))
.thread_pool(thread_pool.clone());

let primary_database =
db::diesel_pool(&config.db_primary_config.url, config.env, primary_db_config);

let replica_database = if let Some(url) = config.db_replica_config.as_ref().map(|c| &c.url)
{
let replica_db_connection_config = db::ConnectionConfig {
let primary_database = if config.use_test_database_pool {
DieselPool::new_test(&config.db_primary_config.url)
} else {
let primary_db_connection_config = ConnectionConfig {
statement_timeout: db_connection_timeout,
read_only: true,
read_only: config.db_primary_config.read_only_mode,
};

let replica_db_config = r2d2::Pool::builder()
let primary_db_config = r2d2::Pool::builder()
.max_size(db_pool_size)
.min_idle(db_min_idle)
.connection_timeout(Duration::from_secs(db_connection_timeout))
.connection_customizer(Box::new(replica_db_connection_config))
.thread_pool(thread_pool);
.connection_customizer(Box::new(primary_db_connection_config))
.thread_pool(thread_pool.clone());

DieselPool::new(&config.db_primary_config.url, primary_db_config)
};

Some(db::diesel_pool(&url, config.env, replica_db_config))
let replica_database = if let Some(url) = config.db_replica_config.as_ref().map(|c| &c.url)
{
if config.use_test_database_pool {
Some(DieselPool::new_test(url))
} else {
let replica_db_connection_config = ConnectionConfig {
statement_timeout: db_connection_timeout,
read_only: true,
};

let replica_db_config = r2d2::Pool::builder()
.max_size(db_pool_size)
.min_idle(db_min_idle)
.connection_timeout(Duration::from_secs(db_connection_timeout))
.connection_customizer(Box::new(replica_db_connection_config))
.thread_pool(thread_pool);

Some(DieselPool::new(&url, replica_db_config))
}
} else {
None
};
Expand Down
3 changes: 1 addition & 2 deletions src/background_jobs.rs
Original file line number Diff line number Diff line change
Expand Up @@ -2,10 +2,9 @@ use reqwest::blocking::Client;
use std::panic::AssertUnwindSafe;
use std::sync::{Arc, Mutex, MutexGuard, PoisonError};

use diesel::r2d2::PoolError;
use swirl::PerformError;

use crate::db::{DieselPool, DieselPooledConn};
use crate::db::{DieselPool, DieselPooledConn, PoolError};
use crate::git::Repository;
use crate::uploaders::Uploader;

Expand Down
2 changes: 2 additions & 0 deletions src/config.rs
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,7 @@ pub struct Config {
pub downloads_persist_interval_ms: usize,
pub ownership_invitations_expiration_days: u64,
pub metrics_authorization_token: Option<String>,
pub use_test_database_pool: bool,
}

#[derive(Debug)]
Expand Down Expand Up @@ -211,6 +212,7 @@ impl Default for Config {
.unwrap_or(60_000), // 1 minute
ownership_invitations_expiration_days: 30,
metrics_authorization_token: dotenv::var("METRICS_AUTHORIZATION_TOKEN").ok(),
use_test_database_pool: false,
}
}
}
Expand Down
93 changes: 62 additions & 31 deletions src/controllers/version/downloads.rs
Original file line number Diff line number Diff line change
Expand Up @@ -2,53 +2,84 @@
//!
//! Crate level functionality is located in `krate::downloads`.

use super::{extract_crate_name_and_semver, version_and_crate};
use crate::controllers::prelude::*;

use chrono::{Duration, NaiveDate, Utc};

use crate::db::PoolError;
use crate::models::{Crate, VersionDownload};
use crate::schema::*;
use crate::views::EncodableVersionDownload;

use super::{extract_crate_name_and_semver, version_and_crate};
use chrono::{Duration, NaiveDate, Utc};

/// Handles the `GET /crates/:crate_id/:version/download` route.
/// This returns a URL to the location where the crate is stored.
pub fn download(req: &mut dyn RequestExt) -> EndpointResult {
let app = req.app().clone();
let recorder = req.timing_recorder();

let crate_name = &req.params()["crate_id"];
let version = &req.params()["version"];

let (version_id, canonical_crate_name): (_, String) = {
use self::versions::dsl::*;

let conn = recorder.record("get_conn", || req.db_conn())?;

// Returns the crate name as stored in the database, or an error if we could
// not load the version ID from the database.
recorder.record("get_version", || {
versions
.inner_join(crates::table)
.select((id, crates::name))
.filter(Crate::with_name(crate_name))
.filter(num.eq(version))
.first(&*conn)
})?
};

// The increment does not happen instantly, but it's deferred to be executed in a batch
// along with other downloads. See crate::downloads_counter for the implementation.
req.app().downloads_counter.increment(version_id);
let mut crate_name = req.params()["crate_id"].clone();
let version = req.params()["version"].as_str();

let mut log_metadata = None;
match recorder.record("get_conn", || req.db_conn()) {
Ok(conn) => {
use self::versions::dsl::*;

// Returns the crate name as stored in the database, or an error if we could
// not load the version ID from the database.
let (version_id, canonical_crate_name) = recorder.record("get_version", || {
versions
.inner_join(crates::table)
.select((id, crates::name))
.filter(Crate::with_name(&crate_name))
.filter(num.eq(version))
.first::<(i32, String)>(&*conn)
})?;

if canonical_crate_name != crate_name {
app.instance_metrics
.downloads_non_canonical_crate_name_total
.inc();
log_metadata = Some(("bot", "dl"));
}
crate_name = canonical_crate_name;

// The increment does not happen instantly, but it's deferred to be executed in a batch
// along with other downloads. See crate::downloads_counter for the implementation.
app.downloads_counter.increment(version_id);
}
Err(PoolError::UnhealthyPool) => {
// The download endpoint is the most critical route in the whole crates.io application,
// as it's relied upon by users and automations to download crates. Keeping it working
// is the most important thing for us.
//
// The endpoint relies on the database to fetch the canonical crate name (with the
// right capitalization and hyphenation), but that's only needed to serve clients who
// don't call the endpoint with the crate's canonical name.
//
// Thankfully Cargo always uses the right name when calling the endpoint, and we can
// keep it working during a full database outage by unconditionally redirecting without
// checking whether the crate exists or the rigth name is used. Non-Cargo clients might
// get a 404 response instead of a 500, but that's worth it.
//
// Without a working database we also can't count downloads, but that's also less
// critical than keeping Cargo downloads operational.

app.instance_metrics
.downloads_unconditional_redirects_total
.inc();
log_metadata = Some(("unconditional_redirect", "true"));
}
Err(err) => return Err(err.into()),
}

let redirect_url = req
.app()
.config
.uploader
.crate_location(&canonical_crate_name, version);
.crate_location(&crate_name, &*version);

if &canonical_crate_name != crate_name {
req.log_metadata("bot", "dl");
if let Some((key, value)) = log_metadata {
req.log_metadata(key, value);
}

if req.wants_json() {
Expand Down
Loading