Skip to content

Commit 91f3a12

Browse files
committed
allow downloads to happen with a broken database
1 parent f129b01 commit 91f3a12

File tree

11 files changed

+353
-49
lines changed

11 files changed

+353
-49
lines changed

Cargo.lock

Lines changed: 12 additions & 0 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

Cargo.toml

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -81,7 +81,7 @@ sha2 = "0.9"
8181
swirl = { git = "https://github.com/sgrif/swirl.git", rev = "e87cf37" }
8282
tar = "0.4.16"
8383
tempfile = "3"
84-
tokio = { version = "1", features = ["net", "signal", "io-std", "io-util", "rt-multi-thread"]}
84+
tokio = { version = "1.5.0", features = ["net", "signal", "io-std", "io-util", "rt-multi-thread", "macros"]}
8585
toml = "0.5"
8686
tracing = "0.1"
8787
tracing-subscriber = "0.2"
@@ -92,7 +92,7 @@ claim = "0.5"
9292
conduit-test = "0.9.0-alpha.4"
9393
hyper-tls = "0.5"
9494
lazy_static = "1.0"
95-
tokio = "1"
95+
tokio = "1.5.0"
9696
tower-service = "0.3.0"
9797

9898
[build-dependencies]

src/background_jobs.rs

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -2,10 +2,9 @@ use reqwest::blocking::Client;
22
use std::panic::AssertUnwindSafe;
33
use std::sync::{Arc, Mutex, MutexGuard, PoisonError};
44

5-
use diesel::r2d2::PoolError;
65
use swirl::PerformError;
76

8-
use crate::db::{DieselPool, DieselPooledConn};
7+
use crate::db::{DieselPool, DieselPooledConn, PoolError};
98
use crate::git::Repository;
109
use crate::uploaders::Uploader;
1110

src/controllers/version/downloads.rs

Lines changed: 58 additions & 31 deletions
Original file line numberDiff line numberDiff line change
@@ -2,53 +2,80 @@
22
//!
33
//! Crate level functionality is located in `krate::downloads`.
44
5+
use super::{extract_crate_name_and_semver, version_and_crate};
56
use crate::controllers::prelude::*;
6-
7-
use chrono::{Duration, NaiveDate, Utc};
8-
7+
use crate::db::PoolError;
98
use crate::models::{Crate, VersionDownload};
109
use crate::schema::*;
1110
use crate::views::EncodableVersionDownload;
12-
13-
use super::{extract_crate_name_and_semver, version_and_crate};
11+
use chrono::{Duration, NaiveDate, Utc};
1412

1513
/// Handles the `GET /crates/:crate_id/:version/download` route.
1614
/// This returns a URL to the location where the crate is stored.
1715
pub fn download(req: &mut dyn RequestExt) -> EndpointResult {
16+
let app = req.app().clone();
1817
let recorder = req.timing_recorder();
1918

20-
let crate_name = &req.params()["crate_id"];
21-
let version = &req.params()["version"];
22-
23-
let (version_id, canonical_crate_name): (_, String) = {
24-
use self::versions::dsl::*;
25-
26-
let conn = recorder.record("get_conn", || req.db_conn())?;
27-
28-
// Returns the crate name as stored in the database, or an error if we could
29-
// not load the version ID from the database.
30-
recorder.record("get_version", || {
31-
versions
32-
.inner_join(crates::table)
33-
.select((id, crates::name))
34-
.filter(Crate::with_name(crate_name))
35-
.filter(num.eq(version))
36-
.first(&*conn)
37-
})?
38-
};
39-
40-
// The increment does not happen instantly, but it's deferred to be executed in a batch
41-
// along with other downloads. See crate::downloads_counter for the implementation.
42-
req.app().downloads_counter.increment(version_id);
19+
let mut crate_name = req.params()["crate_id"].clone();
20+
let version = req.params()["version"].as_str();
21+
22+
let mut log_metadata = None;
23+
match recorder.record("get_conn", || req.db_conn()) {
24+
Ok(conn) => {
25+
use self::versions::dsl::*;
26+
27+
// Returns the crate name as stored in the database, or an error if we could
28+
// not load the version ID from the database.
29+
let (version_id, canonical_crate_name) = recorder.record("get_version", || {
30+
versions
31+
.inner_join(crates::table)
32+
.select((id, crates::name))
33+
.filter(Crate::with_name(&crate_name))
34+
.filter(num.eq(version))
35+
.first::<(i32, String)>(&*conn)
36+
})?;
37+
38+
if canonical_crate_name != crate_name {
39+
app.instance_metrics.downloads_non_canonical_crate_name_total.inc();
40+
log_metadata = Some(("bot", "dl"));
41+
}
42+
crate_name = canonical_crate_name;
43+
44+
// The increment does not happen instantly, but it's deferred to be executed in a batch
45+
// along with other downloads. See crate::downloads_counter for the implementation.
46+
app.downloads_counter.increment(version_id);
47+
}
48+
Err(PoolError::UnhealthyPool) => {
49+
// The download endpoint is the most critical route in the whole crates.io application,
50+
// as it's relied upon by users and automations to download crates. Keeping it working
51+
// is the most important thing for us.
52+
//
53+
// The endpoint relies on the database to fetch the canonical crate name (with the
54+
// right capitalization and hyphenation), but that's only needed to serve clients who
55+
// don't call the endpoint with the crate's canonical name.
56+
//
57+
// Thankfully Cargo always uses the right name when calling the endpoint, and we can
58+
// keep it working during a full database outage by unconditionally redirecting without
59+
// checking whether the crate exists or the rigth name is used. Non-Cargo clients might
60+
// get a 404 response instead of a 500, but that's worth it.
61+
//
62+
// Without a working database we also can't count downloads, but that's also less
63+
// critical than keeping Cargo downloads operational.
64+
65+
app.instance_metrics.downloads_unconditional_redirects_total.inc();
66+
log_metadata = Some(("unconditional_redirect", "true"));
67+
}
68+
Err(err) => return Err(err.into()),
69+
}
4370

4471
let redirect_url = req
4572
.app()
4673
.config
4774
.uploader
48-
.crate_location(&canonical_crate_name, version);
75+
.crate_location(&crate_name, &*version);
4976

50-
if &canonical_crate_name != crate_name {
51-
req.log_metadata("bot", "dl");
77+
if let Some((key, value)) = log_metadata {
78+
req.log_metadata(key, value);
5279
}
5380

5481
if req.wants_json() {

src/db.rs

Lines changed: 50 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -2,8 +2,8 @@ use conduit::RequestExt;
22
use diesel::prelude::*;
33
use diesel::r2d2::{self, ConnectionManager, CustomizeConnection};
44
use parking_lot::{ReentrantMutex, ReentrantMutexGuard};
5-
use std::ops::Deref;
65
use std::sync::Arc;
6+
use std::{ops::Deref, time::Duration};
77
use url::Url;
88

99
use crate::middleware::app::RequestApp;
@@ -32,9 +32,17 @@ impl DieselPool {
3232
DieselPool::Test(Arc::new(ReentrantMutex::new(conn)))
3333
}
3434

35-
pub fn get(&self) -> Result<DieselPooledConn<'_>, r2d2::PoolError> {
35+
pub fn get(&self) -> Result<DieselPooledConn<'_>, PoolError> {
3636
match self {
37-
DieselPool::Pool(pool) => Ok(DieselPooledConn::Pool(pool.get()?)),
37+
DieselPool::Pool(pool) => {
38+
if let Some(conn) = pool.try_get() {
39+
Ok(DieselPooledConn::Pool(conn))
40+
} else if !self.is_healthy() {
41+
Err(PoolError::UnhealthyPool)
42+
} else {
43+
Ok(DieselPooledConn::Pool(pool.get().map_err(PoolError::R2D2)?))
44+
}
45+
}
3846
DieselPool::Test(conn) => Ok(DieselPooledConn::Test(conn.lock())),
3947
}
4048
}
@@ -54,6 +62,21 @@ impl DieselPool {
5462
},
5563
}
5664
}
65+
66+
pub fn wait_until_healthy(&self, timeout: Duration) -> Result<(), PoolError> {
67+
match self {
68+
DieselPool::Pool(pool) => match pool.get_timeout(timeout) {
69+
Ok(_) => Ok(()),
70+
Err(_) if !self.is_healthy() => Err(PoolError::UnhealthyPool),
71+
Err(err) => Err(PoolError::R2D2(err)),
72+
},
73+
DieselPool::Test(_) => Ok(()),
74+
}
75+
}
76+
77+
fn is_healthy(&self) -> bool {
78+
self.state().connections > 0
79+
}
5780
}
5881

5982
#[derive(Debug, Copy, Clone)]
@@ -96,23 +119,23 @@ pub fn connection_url(url: &str) -> String {
96119

97120
pub trait RequestTransaction {
98121
/// Obtain a read/write database connection from the primary pool
99-
fn db_conn(&self) -> Result<DieselPooledConn<'_>, r2d2::PoolError>;
122+
fn db_conn(&self) -> Result<DieselPooledConn<'_>, PoolError>;
100123

101124
/// Obtain a readonly database connection from the replica pool
102125
///
103126
/// If there is no replica pool, the primary pool is used instead.
104-
fn db_read_only(&self) -> Result<DieselPooledConn<'_>, r2d2::PoolError>;
127+
fn db_read_only(&self) -> Result<DieselPooledConn<'_>, PoolError>;
105128
}
106129

107130
impl<T: RequestExt + ?Sized> RequestTransaction for T {
108-
fn db_conn(&self) -> Result<DieselPooledConn<'_>, r2d2::PoolError> {
109-
self.app().primary_database.get().map_err(Into::into)
131+
fn db_conn(&self) -> Result<DieselPooledConn<'_>, PoolError> {
132+
self.app().primary_database.get()
110133
}
111134

112-
fn db_read_only(&self) -> Result<DieselPooledConn<'_>, r2d2::PoolError> {
135+
fn db_read_only(&self) -> Result<DieselPooledConn<'_>, PoolError> {
113136
match &self.app().read_only_replica_database {
114-
Some(pool) => pool.get().map_err(Into::into),
115-
None => self.app().primary_database.get().map_err(Into::into),
137+
Some(pool) => pool.get(),
138+
None => self.app().primary_database.get(),
116139
}
117140
}
118141
}
@@ -148,3 +171,20 @@ pub(crate) fn test_conn() -> PgConnection {
148171
conn.begin_test_transaction().unwrap();
149172
conn
150173
}
174+
175+
#[derive(Debug)]
176+
pub enum PoolError {
177+
R2D2(r2d2::PoolError),
178+
UnhealthyPool,
179+
}
180+
181+
impl std::error::Error for PoolError {}
182+
183+
impl std::fmt::Display for PoolError {
184+
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
185+
match self {
186+
PoolError::R2D2(err) => write!(f, "{}", err),
187+
PoolError::UnhealthyPool => write!(f, "unhealthy database pool"),
188+
}
189+
}
190+
}

src/metrics/instance.rs

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -32,6 +32,11 @@ metrics! {
3232
pub requests_total: IntCounter,
3333
/// Number of requests currently being processed
3434
pub requests_in_flight: IntGauge,
35+
36+
/// Number of download requests that were served with an unconditional redirect.
37+
pub downloads_unconditional_redirects_total: IntCounter,
38+
/// Number of download requests with a non-canonical crate name.
39+
pub downloads_non_canonical_crate_name_total: IntCounter,
3540
}
3641

3742
// All instance metrics will be prefixed with this namespace.

src/tests/all.rs

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -37,6 +37,7 @@ mod categories;
3737
mod category;
3838
mod dump_db;
3939
mod git;
40+
mod unhealthy_database;
4041
mod keyword;
4142
mod krate;
4243
mod metrics;

src/tests/unhealthy_database.rs

Lines changed: 66 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,66 @@
1+
use crate::{
2+
builders::CrateBuilder,
3+
util::{MockAnonymousUser, RequestHelper, TestApp},
4+
};
5+
use std::time::Duration;
6+
7+
#[test]
8+
fn download_crate_with_broken_networking_primary_database() {
9+
let (app, anon, _, owner) = TestApp::init().with_slow_real_db_pool().with_token();
10+
app.db(|conn| {
11+
CrateBuilder::new("crate_name", owner.as_model().user_id)
12+
.version("1.0.0")
13+
.expect_build(conn)
14+
});
15+
16+
// When the database connection is healthy downloads are redirected with the proper
17+
// capitalization, and missing crates or versions return a 404.
18+
19+
assert_checked_redirects(&anon);
20+
21+
// After networking breaks, preventing new database connections, the download endpoint should
22+
// do an unconditional redirect to the CDN, without checking whether the crate exists or what
23+
// the exact capitalization of crate name is.
24+
25+
app.db_chaosproxy().break_networking();
26+
assert_unconditional_redirects(&anon);
27+
28+
// After restoring the network and waiting for the database pool to get healthy again redirects
29+
// should be checked again.
30+
31+
app.db_chaosproxy().restore_networking();
32+
app.as_inner()
33+
.primary_database
34+
.wait_until_healthy(Duration::from_millis(500))
35+
.expect("the database did not return healthy");
36+
37+
assert_checked_redirects(&anon);
38+
}
39+
40+
fn assert_checked_redirects(anon: &MockAnonymousUser) {
41+
anon.get::<()>("/api/v1/crates/crate_name/1.0.0/download")
42+
.assert_redirect_ends_with("/crate_name/crate_name-1.0.0.crate");
43+
44+
anon.get::<()>("/api/v1/crates/Crate-Name/1.0.0/download")
45+
.assert_redirect_ends_with("/crate_name/crate_name-1.0.0.crate");
46+
47+
anon.get::<()>("/api/v1/crates/crate_name/2.0.0/download")
48+
.assert_not_found();
49+
50+
anon.get::<()>("/api/v1/crates/awesome-project/1.0.0/download")
51+
.assert_not_found();
52+
}
53+
54+
fn assert_unconditional_redirects(anon: &MockAnonymousUser) {
55+
anon.get::<()>("/api/v1/crates/crate_name/1.0.0/download")
56+
.assert_redirect_ends_with("/crate_name/crate_name-1.0.0.crate");
57+
58+
anon.get::<()>("/api/v1/crates/Crate-Name/1.0.0/download")
59+
.assert_redirect_ends_with("/Crate-Name/Crate-Name-1.0.0.crate");
60+
61+
anon.get::<()>("/api/v1/crates/crate_name/2.0.0/download")
62+
.assert_redirect_ends_with("/crate_name/crate_name-2.0.0.crate");
63+
64+
anon.get::<()>("/api/v1/crates/awesome-project/1.0.0/download")
65+
.assert_redirect_ends_with("/awesome-project/awesome-project-1.0.0.crate");
66+
}

src/tests/util.rs

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -36,6 +36,7 @@ use std::collections::HashMap;
3636
mod response;
3737
mod test_app;
3838
mod fresh_schema;
39+
mod chaosproxy;
3940

4041
pub use response::Response;
4142
pub use test_app::TestApp;

0 commit comments

Comments
 (0)