diff --git a/benches/compression.rs b/benches/compression.rs index 8b4133d62..f7d99fda9 100644 --- a/benches/compression.rs +++ b/benches/compression.rs @@ -1,17 +1,19 @@ -use cratesfyi::storage::{compress, decompress}; +use cratesfyi::storage::{compress, decompress, CompressionAlgorithm}; use criterion::{black_box, criterion_group, criterion_main, Criterion}; +const ALGORITHM: CompressionAlgorithm = CompressionAlgorithm::Zstd; + pub fn criterion_benchmark(c: &mut Criterion) { // this isn't a great benchmark because it only tests on one file // ideally we would build a whole crate and compress each file, taking the average let html = std::fs::read_to_string("benches/struct.CaptureMatches.html").unwrap(); let html_slice = html.as_bytes(); c.bench_function("compress regex html", |b| { - b.iter(|| compress(black_box(html_slice))) + b.iter(|| compress(black_box(html_slice, ALGORITHM))) }); - let (compressed, alg) = compress(html_slice).unwrap(); + let compressed = compress(html_slice, ALGORITHM).unwrap(); c.bench_function("decompress regex html", |b| { - b.iter(|| decompress(black_box(compressed.as_slice()), alg)) + b.iter(|| decompress(black_box(compressed.as_slice()), ALGORITHM)) }); } diff --git a/src/bin/cratesfyi.rs b/src/bin/cratesfyi.rs index 90cba2cad..313ce6bd7 100644 --- a/src/bin/cratesfyi.rs +++ b/src/bin/cratesfyi.rs @@ -1,17 +1,19 @@ use std::env; use std::path::PathBuf; +use std::sync::Arc; use cratesfyi::db::{self, add_path_into_database, connect_db}; use cratesfyi::utils::{add_crate_to_queue, remove_crate_priority, set_crate_priority}; use cratesfyi::Server; use cratesfyi::{DocBuilder, DocBuilderOptions, RustwideBuilder}; +use failure::Error; use structopt::StructOpt; -pub fn main() { +pub fn main() -> Result<(), Error> { let _ = dotenv::dotenv(); logger_init(); - CommandLine::from_args().handle_args(); + CommandLine::from_args().handle_args() } fn logger_init() { @@ -79,19 +81,23 @@ enum CommandLine { } impl CommandLine { - pub fn handle_args(self) { + pub fn handle_args(self) -> Result<(), Error> { + let config = Arc::new(cratesfyi::Config::from_env()?); + match self { Self::Build(build) => build.handle_args(), Self::StartWebServer { socket_addr, reload_templates, } => { - Server::start(Some(&socket_addr), reload_templates); + Server::start(Some(&socket_addr), reload_templates, config); } - Self::Daemon { foreground } => cratesfyi::utils::start_daemon(!foreground), + Self::Daemon { foreground } => cratesfyi::utils::start_daemon(!foreground, config), Self::Database { subcommand } => subcommand.handle_args(), Self::Queue { subcommand } => subcommand.handle_args(), } + + Ok(()) } } diff --git a/src/config.rs b/src/config.rs new file mode 100644 index 000000000..ff3a87d67 --- /dev/null +++ b/src/config.rs @@ -0,0 +1,37 @@ +use failure::{bail, Error, Fail, ResultExt}; +use std::env::VarError; +use std::str::FromStr; +use std::sync::Arc; + +#[derive(Debug)] +pub struct Config { + pub(crate) max_file_size: usize, + pub(crate) max_file_size_html: usize, +} + +impl Config { + pub fn from_env() -> Result { + Ok(Self { + max_file_size: env("DOCSRS_MAX_FILE_SIZE", 50 * 1024 * 1024)?, + max_file_size_html: env("DOCSRS_MAX_FILE_SIZE_HTML", 5 * 1024 * 1024)?, + }) + } +} + +impl iron::typemap::Key for Config { + type Value = Arc; +} + +fn env(var: &str, default: T) -> Result +where + T: FromStr, + T::Err: Fail, +{ + match std::env::var(var) { + Ok(content) => Ok(content + .parse::() + .with_context(|_| format!("failed to parse configuration variable {}", var))?), + Err(VarError::NotPresent) => Ok(default), + Err(VarError::NotUnicode(_)) => bail!("configuration variable {} is not UTF-8", var), + } +} diff --git a/src/db/file.rs b/src/db/file.rs index 02bc5fc26..b1d694af2 100644 --- a/src/db/file.rs +++ b/src/db/file.rs @@ -13,8 +13,8 @@ use std::path::{Path, PathBuf}; pub(crate) use crate::storage::Blob; -pub(crate) fn get_path(conn: &Connection, path: &str) -> Result { - Storage::new(conn).get(path) +pub(crate) fn get_path(conn: &Connection, path: &str, max_size: usize) -> Result { + Storage::new(conn).get(path, max_size) } /// Store all files in a directory and return [[mimetype, filename]] as Json diff --git a/src/error.rs b/src/error.rs index 075ae5ea1..c2526c752 100644 --- a/src/error.rs +++ b/src/error.rs @@ -5,3 +5,14 @@ use std::result::Result as StdResult; pub(crate) use failure::Error; pub type Result = StdResult; + +#[derive(Debug, Copy, Clone)] +pub(crate) struct SizeLimitReached; + +impl std::error::Error for SizeLimitReached {} + +impl std::fmt::Display for SizeLimitReached { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + write!(f, "the size limit for the buffer was reached") + } +} diff --git a/src/lib.rs b/src/lib.rs index 2a5769307..905798f0c 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -2,11 +2,13 @@ //! documentation of crates for the Rust Programming Language. #![allow(clippy::cognitive_complexity)] +pub use self::config::Config; pub use self::docbuilder::options::DocBuilderOptions; pub use self::docbuilder::DocBuilder; pub use self::docbuilder::RustwideBuilder; pub use self::web::Server; +mod config; pub mod db; mod docbuilder; mod error; diff --git a/src/storage/database.rs b/src/storage/database.rs index 89bbbf3b6..7569e6e15 100644 --- a/src/storage/database.rs +++ b/src/storage/database.rs @@ -16,20 +16,38 @@ impl<'a> DatabaseBackend<'a> { Self { conn } } - pub(super) fn get(&self, path: &str) -> Result { + pub(super) fn get(&self, path: &str, max_size: usize) -> Result { use std::convert::TryInto; + // The maximum size for a BYTEA (the type used for `content`) is 1GB, so this cast is safe: + // https://www.postgresql.org/message-id/162867790712200946i7ba8eb92v908ac595c0c35aee%40mail.gmail.com + let max_size = max_size.min(std::i32::MAX as usize) as i32; + + // The size limit is checked at the database level, to avoid receiving data altogether if + // the limit is exceeded. let rows = self.conn.query( - "SELECT path, mime, date_updated, content, compression + "SELECT + path, mime, date_updated, compression, + (CASE WHEN LENGTH(content) <= $2 THEN content ELSE NULL END) AS content, + (LENGTH(content) > $2) AS is_too_big FROM files WHERE path = $1;", - &[&path], + &[&path, &(max_size)], )?; if rows.is_empty() { Err(PathNotFoundError.into()) } else { let row = rows.get(0); + + if row.get("is_too_big") { + return Err(std::io::Error::new( + std::io::ErrorKind::Other, + crate::error::SizeLimitReached, + ) + .into()); + } + let compression = row.get::<_, Option>("compression").map(|i| { i.try_into() .expect("invalid compression algorithm stored in database") @@ -91,17 +109,17 @@ mod tests { content: "Hello world!".bytes().collect(), compression: None, }, - backend.get("dir/foo.txt")? + backend.get("dir/foo.txt", std::usize::MAX)? ); // Test that other files are not returned assert!(backend - .get("dir/bar.txt") + .get("dir/bar.txt", std::usize::MAX) .unwrap_err() .downcast_ref::() .is_some()); assert!(backend - .get("foo.txt") + .get("foo.txt", std::usize::MAX) .unwrap_err() .downcast_ref::() .is_some()); @@ -109,4 +127,51 @@ mod tests { Ok(()) }); } + + #[test] + fn test_get_too_big() { + const MAX_SIZE: usize = 1024; + + crate::test::wrapper(|env| { + let conn = env.db().conn(); + let backend = DatabaseBackend::new(&conn); + + let small_blob = Blob { + path: "small-blob.bin".into(), + mime: "text/plain".into(), + date_updated: Utc::now(), + content: vec![0; MAX_SIZE], + compression: None, + }; + let big_blob = Blob { + path: "big-blob.bin".into(), + mime: "text/plain".into(), + date_updated: Utc::now(), + content: vec![0; MAX_SIZE * 2], + compression: None, + }; + + let transaction = conn.transaction()?; + backend + .store_batch(std::slice::from_ref(&small_blob), &transaction) + .unwrap(); + backend + .store_batch(std::slice::from_ref(&big_blob), &transaction) + .unwrap(); + transaction.commit()?; + + let blob = backend.get("small-blob.bin", MAX_SIZE).unwrap(); + assert_eq!(blob.content.len(), small_blob.content.len()); + + assert!(backend + .get("big-blob.bin", MAX_SIZE) + .unwrap_err() + .downcast_ref::() + .and_then(|io| io.get_ref()) + .and_then(|err| err.downcast_ref::()) + .is_some()); + + Ok(()) + }); + } } diff --git a/src/storage/mod.rs b/src/storage/mod.rs index 09e3a8690..cb42a0450 100644 --- a/src/storage/mod.rs +++ b/src/storage/mod.rs @@ -14,6 +14,7 @@ use std::io::Read; use std::path::{Path, PathBuf}; const MAX_CONCURRENT_UPLOADS: usize = 1000; +const DEFAULT_COMPRESSION: CompressionAlgorithm = CompressionAlgorithm::Zstd; pub type CompressionAlgorithms = HashSet; @@ -24,6 +25,11 @@ macro_rules! enum_id { $($variant = $discriminant,)* } + impl $name { + #[cfg(test)] + const AVAILABLE: &'static [Self] = &[$(Self::$variant,)*]; + } + impl fmt::Display for CompressionAlgorithm { fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { match self { @@ -117,13 +123,13 @@ impl<'a> Storage<'a> { DatabaseBackend::new(conn).into() } } - pub(crate) fn get(&self, path: &str) -> Result { + pub(crate) fn get(&self, path: &str, max_size: usize) -> Result { let mut blob = match self { - Self::Database(db) => db.get(path), - Self::S3(s3) => s3.get(path), + Self::Database(db) => db.get(path, max_size), + Self::S3(s3) => s3.get(path, max_size), }?; if let Some(alg) = blob.compression { - blob.content = decompress(blob.content.as_slice(), alg)?; + blob.content = decompress(blob.content.as_slice(), alg, max_size)?; blob.compression = None; } Ok(blob) @@ -163,7 +169,8 @@ impl<'a> Storage<'a> { .map(|file| (file_path, file)) }) .map(|(file_path, file)| -> Result<_, Error> { - let (content, alg) = compress(file)?; + let alg = DEFAULT_COMPRESSION; + let content = compress(file, alg)?; let bucket_path = Path::new(prefix).join(&file_path); #[cfg(windows)] // On windows, we need to normalize \\ to / so the route logic works @@ -201,15 +208,25 @@ impl<'a> Storage<'a> { } // public for benchmarking -pub fn compress(content: impl Read) -> Result<(Vec, CompressionAlgorithm), Error> { - let data = zstd::encode_all(content, 9)?; - Ok((data, CompressionAlgorithm::Zstd)) +pub fn compress(content: impl Read, algorithm: CompressionAlgorithm) -> Result, Error> { + match algorithm { + CompressionAlgorithm::Zstd => Ok(zstd::encode_all(content, 9)?), + } } -pub fn decompress(content: impl Read, algorithm: CompressionAlgorithm) -> Result, Error> { +pub fn decompress( + content: impl Read, + algorithm: CompressionAlgorithm, + max_size: usize, +) -> Result, Error> { + // The sized buffer prevents a malicious file from decompressing to multiple times its size. + let mut buffer = crate::utils::sized_buffer::SizedBuffer::new(max_size); + match algorithm { - CompressionAlgorithm::Zstd => zstd::decode_all(content).map_err(Into::into), + CompressionAlgorithm::Zstd => zstd::stream::copy_decode(content, &mut buffer)?, } + + Ok(buffer.into_inner()) } fn detect_mime(file_path: &Path) -> Result<&'static str, Error> { @@ -282,7 +299,7 @@ mod test { let name = Path::new(&blob.path); assert!(stored_files.contains_key(name)); - let actual = backend.get(&blob.path).unwrap(); + let actual = backend.get(&blob.path, std::usize::MAX).unwrap(); assert_blob_eq(blob, &actual); } @@ -324,12 +341,12 @@ mod test { "text/rust" ); - let file = backend.get("rustdoc/Cargo.toml").unwrap(); + let file = backend.get("rustdoc/Cargo.toml", std::usize::MAX).unwrap(); assert_eq!(file.content, b"data"); assert_eq!(file.mime, "text/toml"); assert_eq!(file.path, "rustdoc/Cargo.toml"); - let file = backend.get("rustdoc/src/main.rs").unwrap(); + let file = backend.get("rustdoc/src/main.rs", std::usize::MAX).unwrap(); assert_eq!(file.content, b"data"); assert_eq!(file.mime, "text/rust"); assert_eq!(file.path, "rustdoc/src/main.rs"); @@ -341,7 +358,8 @@ mod test { fn test_batched_uploads() { let uploads: Vec<_> = (0..=MAX_CONCURRENT_UPLOADS + 1) .map(|i| { - let (content, alg) = compress("fn main() {}".as_bytes()).unwrap(); + let alg = DEFAULT_COMPRESSION; + let content = compress("fn main() {}".as_bytes(), alg).unwrap(); Blob { mime: "text/rust".into(), content, @@ -369,16 +387,60 @@ mod test { #[test] fn test_compression() { let orig = "fn main() {}"; - let (data, alg) = compress(orig.as_bytes()).unwrap(); - let blob = Blob { - mime: "text/rust".into(), - content: data.clone(), - path: "main.rs".into(), - date_updated: Utc::now(), - compression: Some(alg), - }; - test_roundtrip(std::slice::from_ref(&blob)); - assert_eq!(decompress(data.as_slice(), alg).unwrap(), orig.as_bytes()); + for alg in CompressionAlgorithm::AVAILABLE { + println!("testing algorithm {}", alg); + + let data = compress(orig.as_bytes(), *alg).unwrap(); + let blob = Blob { + mime: "text/rust".into(), + content: data.clone(), + path: "main.rs".into(), + date_updated: Utc::now(), + compression: Some(*alg), + }; + test_roundtrip(std::slice::from_ref(&blob)); + assert_eq!( + decompress(data.as_slice(), *alg, std::usize::MAX).unwrap(), + orig.as_bytes() + ); + } + } + + #[test] + fn test_decompression_too_big() { + const MAX_SIZE: usize = 1024; + + let small = &[b'A'; MAX_SIZE / 2] as &[u8]; + let exact = &[b'A'; MAX_SIZE] as &[u8]; + let big = &[b'A'; MAX_SIZE * 2] as &[u8]; + + for &alg in CompressionAlgorithm::AVAILABLE { + let compressed_small = compress(small, alg).unwrap(); + let compressed_exact = compress(exact, alg).unwrap(); + let compressed_big = compress(big, alg).unwrap(); + + // Ensure decompressing within the limit works. + assert_eq!( + small.len(), + decompress(compressed_small.as_slice(), alg, MAX_SIZE) + .unwrap() + .len() + ); + assert_eq!( + exact.len(), + decompress(compressed_exact.as_slice(), alg, MAX_SIZE) + .unwrap() + .len() + ); + + // Ensure decompressing a file over the limit returns a SizeLimitReached error. + let err = decompress(compressed_big.as_slice(), alg, MAX_SIZE).unwrap_err(); + assert!(err + .downcast_ref::() + .and_then(|io| io.get_ref()) + .and_then(|err| err.downcast_ref::()) + .is_some()); + } } #[test] diff --git a/src/storage/s3.rs b/src/storage/s3.rs index 073f0f566..54d550a56 100644 --- a/src/storage/s3.rs +++ b/src/storage/s3.rs @@ -7,7 +7,6 @@ use rusoto_core::region::Region; use rusoto_credential::DefaultCredentialsProvider; use rusoto_s3::{GetObjectRequest, PutObjectRequest, S3Client, S3}; use std::convert::TryInto; -use std::io::Read; use tokio::runtime::Runtime; #[cfg(test)] @@ -32,7 +31,7 @@ impl<'a> S3Backend<'a> { } } - pub(super) fn get(&self, path: &str) -> Result { + pub(super) fn get(&self, path: &str, max_size: usize) -> Result { let res = self .client .get_object(GetObjectRequest { @@ -42,13 +41,15 @@ impl<'a> S3Backend<'a> { }) .sync()?; - let mut b = res.body.unwrap().into_blocking_read(); - let mut content = Vec::with_capacity( + let mut content = crate::utils::sized_buffer::SizedBuffer::new(max_size); + content.reserve( res.content_length .and_then(|l| l.try_into().ok()) .unwrap_or(0), ); - b.read_to_end(&mut content).unwrap(); + + let mut body = res.body.unwrap().into_blocking_read(); + std::io::copy(&mut body, &mut content)?; let date_updated = parse_timespec(&res.last_modified.unwrap())?; let compression = res.content_encoding.and_then(|s| s.parse().ok()); @@ -57,7 +58,7 @@ impl<'a> S3Backend<'a> { path: path.into(), mime: res.content_type.unwrap(), date_updated, - content, + content: content.into_inner(), compression, }) } @@ -190,6 +191,47 @@ pub(crate) mod tests { }); } + #[test] + fn test_get_too_big() { + const MAX_SIZE: usize = 1024; + + wrapper(|env| { + let small_blob = Blob { + path: "small-blob.bin".into(), + mime: "text/plain".into(), + date_updated: Utc::now(), + content: vec![0; MAX_SIZE], + compression: None, + }; + let big_blob = Blob { + path: "big-blob.bin".into(), + mime: "text/plain".into(), + date_updated: Utc::now(), + content: vec![0; MAX_SIZE * 2], + compression: None, + }; + + let s3 = env.s3(); + s3.upload(slice::from_ref(&small_blob)).unwrap(); + s3.upload(slice::from_ref(&big_blob)).unwrap(); + + s3.with_client(|client| { + let blob = client.get("small-blob.bin", MAX_SIZE).unwrap(); + assert_eq!(blob.content.len(), small_blob.content.len()); + + assert!(client + .get("big-blob.bin", MAX_SIZE) + .unwrap_err() + .downcast_ref::() + .and_then(|io| io.get_ref()) + .and_then(|err| err.downcast_ref::()) + .is_some()); + }); + + Ok(()) + }) + } + #[test] fn test_store() { wrapper(|env| { @@ -220,6 +262,7 @@ pub(crate) mod tests { Ok(()) }) } + // NOTE: trying to upload a file ending with `/` will behave differently in test and prod. // NOTE: On s3, it will succeed and create a file called `/`. // NOTE: On min.io, it will fail with 'Object name contains unsupported characters.' diff --git a/src/storage/s3/test.rs b/src/storage/s3/test.rs index 937f20127..be915379f 100644 --- a/src/storage/s3/test.rs +++ b/src/storage/s3/test.rs @@ -30,7 +30,7 @@ impl TestS3 { use rusoto_core::RusotoError; use rusoto_s3::GetObjectError; - let err = self.0.borrow().get(path).unwrap_err(); + let err = self.0.borrow().get(path, std::usize::MAX).unwrap_err(); match err .downcast_ref::>() .expect("wanted GetObject") @@ -41,9 +41,13 @@ impl TestS3 { }; } pub(crate) fn assert_blob(&self, blob: &Blob, path: &str) { - let actual = self.0.borrow().get(path).unwrap(); + let actual = self.0.borrow().get(path, std::usize::MAX).unwrap(); assert_blob_eq(blob, &actual); } + + pub(crate) fn with_client(&self, f: impl FnOnce(&mut S3Backend<'static>)) { + f(&mut self.0.borrow_mut()) + } } impl Drop for TestS3 { diff --git a/src/test/mod.rs b/src/test/mod.rs index d57ef200a..d001db967 100644 --- a/src/test/mod.rs +++ b/src/test/mod.rs @@ -2,6 +2,7 @@ mod fakes; use crate::storage::s3::TestS3; use crate::web::Server; +use crate::Config; use failure::Error; use log::error; use once_cell::unsync::OnceCell; @@ -93,6 +94,7 @@ pub(crate) fn assert_redirect( } pub(crate) struct TestEnvironment { + config: OnceCell>, db: OnceCell, frontend: OnceCell, s3: OnceCell, @@ -107,6 +109,7 @@ impl TestEnvironment { fn new() -> Self { init_logger(); Self { + config: OnceCell::new(), db: OnceCell::new(), frontend: OnceCell::new(), s3: OnceCell::new(), @@ -119,13 +122,33 @@ impl TestEnvironment { } } + fn base_config(&self) -> Config { + Config::from_env().expect("failed to get base config") + } + + pub(crate) fn override_config(&self, f: impl FnOnce(&mut Config)) { + let mut config = self.base_config(); + f(&mut config); + + if self.config.set(Arc::new(config)).is_err() { + panic!("can't call override_config after the configuration is accessed!"); + } + } + + pub(crate) fn config(&self) -> Arc { + self.config + .get_or_init(|| Arc::new(self.base_config())) + .clone() + } + pub(crate) fn db(&self) -> &TestDatabase { self.db .get_or_init(|| TestDatabase::new().expect("failed to initialize the db")) } pub(crate) fn frontend(&self) -> &TestFrontend { - self.frontend.get_or_init(|| TestFrontend::new(self.db())) + self.frontend + .get_or_init(|| TestFrontend::new(self.db(), self.config())) } pub(crate) fn s3(&self) -> &TestS3 { @@ -187,9 +210,9 @@ pub(crate) struct TestFrontend { } impl TestFrontend { - fn new(db: &TestDatabase) -> Self { + fn new(db: &TestDatabase, config: Arc) -> Self { Self { - server: Server::start_test(db.conn.clone()), + server: Server::start_test(db.conn.clone(), config), client: Client::new(), } } diff --git a/src/utils/daemon.rs b/src/utils/daemon.rs index 9f8c79617..b3fba957c 100644 --- a/src/utils/daemon.rs +++ b/src/utils/daemon.rs @@ -5,19 +5,20 @@ use crate::{ docbuilder::RustwideBuilder, utils::{github_updater, pubsubhubbub, update_release_activity}, - DocBuilder, DocBuilderOptions, + Config, DocBuilder, DocBuilderOptions, }; use chrono::{Timelike, Utc}; use log::{debug, error, info, warn}; use std::panic::{catch_unwind, AssertUnwindSafe}; use std::path::PathBuf; +use std::sync::Arc; use std::time::Duration; use std::{env, thread}; #[cfg(not(target_os = "windows"))] use ::{libc::fork, std::fs::File, std::io::Write, std::process::exit}; -pub fn start_daemon(background: bool) { +pub fn start_daemon(background: bool, config: Arc) { const CRATE_VARIABLES: [&str; 3] = [ "CRATESFYI_PREFIX", "CRATESFYI_GITHUB_USERNAME", @@ -249,7 +250,7 @@ pub fn start_daemon(background: bool) { // at least start web server info!("Starting web server"); - crate::Server::start(None, false); + crate::Server::start(None, false, config); } fn opts() -> DocBuilderOptions { diff --git a/src/utils/mod.rs b/src/utils/mod.rs index 423eb9d54..69b666c26 100644 --- a/src/utils/mod.rs +++ b/src/utils/mod.rs @@ -23,3 +23,4 @@ mod pubsubhubbub; mod queue; mod release_activity_updater; mod rustc_version; +pub(crate) mod sized_buffer; diff --git a/src/utils/sized_buffer.rs b/src/utils/sized_buffer.rs new file mode 100644 index 000000000..9c94efe3b --- /dev/null +++ b/src/utils/sized_buffer.rs @@ -0,0 +1,72 @@ +use std::io::{Error as IoError, ErrorKind, Write}; + +pub(crate) struct SizedBuffer { + inner: Vec, + limit: usize, +} + +impl SizedBuffer { + pub(crate) fn new(limit: usize) -> Self { + SizedBuffer { + inner: Vec::new(), + limit, + } + } + + pub(crate) fn reserve(&mut self, amount: usize) { + if self.inner.len() + amount > self.limit { + self.inner.reserve_exact(self.limit - self.inner.len()); + } else { + self.inner.reserve(amount); + } + } + + pub(crate) fn into_inner(self) -> Vec { + self.inner + } +} + +impl Write for SizedBuffer { + fn write(&mut self, buf: &[u8]) -> Result { + if self.inner.len() + buf.len() > self.limit { + Err(IoError::new( + ErrorKind::Other, + crate::error::SizeLimitReached, + )) + } else { + self.inner.write(buf) + } + } + + fn flush(&mut self) -> Result<(), IoError> { + self.inner.flush() + } +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn test_sized_buffer() { + let mut buffer = SizedBuffer::new(1024); + + // Add two chunks of 500 bytes + assert_eq!(500, buffer.write(&[0; 500]).unwrap()); + assert_eq!(500, buffer.write(&[0; 500]).unwrap()); + + // Ensure adding a third chunk fails + let error = buffer.write(&[0; 500]).unwrap_err(); + assert!(error + .get_ref() + .unwrap() + .is::()); + + // Ensure all the third chunk was discarded + assert_eq!(1000, buffer.inner.len()); + + // Ensure it's possible to reach the limit + assert_eq!(24, buffer.write(&[0; 24]).unwrap()); + assert_eq!(1024, buffer.inner.len()); + } +} diff --git a/src/web/file.rs b/src/web/file.rs index 7ea0457fe..a2abbaeff 100644 --- a/src/web/file.rs +++ b/src/web/file.rs @@ -1,16 +1,23 @@ //! Database based file handler use super::pool::Pool; -use crate::{db, error::Result}; +use crate::{db, error::Result, Config}; use iron::{status, Handler, IronError, IronResult, Request, Response}; use postgres::Connection; +#[derive(Debug)] pub(crate) struct File(pub(crate) db::file::Blob); impl File { /// Gets file from database - pub fn from_path(conn: &Connection, path: &str) -> Result { - Ok(File(db::file::get_path(conn, path)?)) + pub fn from_path(conn: &Connection, path: &str, config: &Config) -> Result { + let max_size = if path.ends_with(".html") { + config.max_file_size_html + } else { + config.max_file_size + }; + + Ok(File(db::file::get_path(conn, path, max_size)?)) } /// Consumes File and creates a iron response @@ -52,7 +59,8 @@ impl Handler for DatabaseFileHandler { fn handle(&self, req: &mut Request) -> IronResult { let path = req.url.path().join("/"); let conn = extension!(req, Pool).get()?; - if let Ok(file) = File::from_path(&conn, &path) { + let config = extension!(req, Config); + if let Ok(file) = File::from_path(&conn, &path, &config) { Ok(file.serve()) } else { Err(IronError::new( @@ -80,6 +88,7 @@ mod tests { let mut file = File::from_path( &*db.conn(), "rustdoc/fake-package/1.0.0/fake-package/index.html", + &env.config(), ) .unwrap(); file.0.date_updated = now; @@ -93,4 +102,59 @@ mod tests { Ok(()) }); } + + #[test] + fn test_max_size() { + const MAX_SIZE: usize = 1024; + const MAX_HTML_SIZE: usize = 128; + + wrapper(|env| { + env.override_config(|config| { + config.max_file_size = MAX_SIZE; + config.max_file_size_html = MAX_HTML_SIZE; + }); + + let db = env.db(); + + db.fake_release() + .name("dummy") + .version("0.1.0") + .rustdoc_file("small.html", &[b'A'; MAX_HTML_SIZE / 2] as &[u8]) + .rustdoc_file("exact.html", &[b'A'; MAX_HTML_SIZE] as &[u8]) + .rustdoc_file("big.html", &[b'A'; MAX_HTML_SIZE * 2] as &[u8]) + .rustdoc_file("small.js", &[b'A'; MAX_SIZE / 2] as &[u8]) + .rustdoc_file("exact.js", &[b'A'; MAX_SIZE] as &[u8]) + .rustdoc_file("big.js", &[b'A'; MAX_SIZE * 2] as &[u8]) + .create()?; + + let file = |path| { + File::from_path( + &db.conn(), + &format!("rustdoc/dummy/0.1.0/{}", path), + &env.config(), + ) + }; + let assert_len = |len, path| { + assert_eq!(len, file(path).unwrap().0.content.len()); + }; + let assert_too_big = |path| { + file(path) + .unwrap_err() + .downcast_ref::() + .and_then(|io| io.get_ref()) + .and_then(|err| err.downcast_ref::()) + .is_some() + }; + + assert_len(MAX_HTML_SIZE / 2, "small.html"); + assert_len(MAX_HTML_SIZE, "exact.html"); + assert_len(MAX_SIZE / 2, "small.js"); + assert_len(MAX_SIZE, "exact.js"); + + assert_too_big("big.html"); + assert_too_big("big.js"); + + Ok(()) + }) + } } diff --git a/src/web/mod.rs b/src/web/mod.rs index e31dae5cd..66c5a5188 100644 --- a/src/web/mod.rs +++ b/src/web/mod.rs @@ -56,21 +56,23 @@ mod sitemap; mod source; use self::pool::Pool; +use crate::config::Config; use chrono::{DateTime, Utc}; use handlebars_iron::{DirectorySource, HandlebarsEngine, SourceError}; use iron::headers::{CacheControl, CacheDirective, ContentType, Expires, HttpDate}; use iron::modifiers::Redirect; use iron::prelude::*; -use iron::{self, status, Handler, Listening, Url}; +use iron::{self, status, BeforeMiddleware, Handler, Listening, Url}; use postgres::Connection; use router::NoRoute; use semver::{Version, VersionReq}; use staticfile::Static; use std::net::SocketAddr; +use std::sync::Arc; use std::{env, fmt, path::PathBuf, time::Duration}; #[cfg(test)] -use std::sync::{Arc, Mutex}; +use std::sync::Mutex; /// Duration of static files for staticfile and DatabaseFileHandler (in seconds) const STATIC_FILE_CACHE_DURATION: u64 = 60 * 60 * 24 * 30 * 12; // 12 months @@ -97,25 +99,28 @@ struct CratesfyiHandler { router_handler: Box, database_file_handler: Box, static_handler: Box, - pool: Pool, + inject_extensions: InjectExtensions, } impl CratesfyiHandler { - fn chain(pool: Pool, base: H) -> Chain { + fn chain(inject_extensions: InjectExtensions, base: H) -> Chain { let hbse = handlebars_engine().expect("Failed to load handlebar templates"); let mut chain = Chain::new(base); - chain.link_before(pool); + chain.link_before(inject_extensions); chain.link_after(hbse); chain } - fn new(pool: Pool) -> CratesfyiHandler { + fn new(pool: Pool, config: Arc) -> CratesfyiHandler { + let inject_extensions = InjectExtensions { pool, config }; + let routes = routes::build_routes(); let blacklisted_prefixes = routes.page_prefixes(); - let shared_resources = Self::chain(pool.clone(), rustdoc::SharedResourceHandler); - let router_chain = Self::chain(pool.clone(), routes.iron_router()); + let shared_resources = + Self::chain(inject_extensions.clone(), rustdoc::SharedResourceHandler); + let router_chain = Self::chain(inject_extensions.clone(), routes.iron_router()); let prefix = PathBuf::from( env::var("CRATESFYI_PREFIX") .expect("the CRATESFYI_PREFIX environment variable is not set"), @@ -132,7 +137,7 @@ impl CratesfyiHandler { Box::new(file::DatabaseFileHandler), )), static_handler: Box::new(static_handler), - pool, + inject_extensions, } } } @@ -201,11 +206,26 @@ impl Handler for CratesfyiHandler { debug!("Path not found: {}; {}", DebugPath(&req.url), e.error); } - Self::chain(self.pool.clone(), err).handle(req) + Self::chain(self.inject_extensions.clone(), err).handle(req) }) } } +#[derive(Debug, Clone)] +struct InjectExtensions { + pool: Pool, + config: Arc, +} + +impl BeforeMiddleware for InjectExtensions { + fn before(&self, req: &mut Request) -> IronResult<()> { + req.extensions.insert::(self.pool.clone()); + req.extensions.insert::(self.config.clone()); + + Ok(()) + } +} + struct MatchVersion { /// Represents the crate name that was found when attempting to load a crate release. /// @@ -363,24 +383,24 @@ pub struct Server { } impl Server { - pub fn start(addr: Option<&str>, reload_templates: bool) -> Self { + pub fn start(addr: Option<&str>, reload_templates: bool, config: Arc) -> Self { // Initialize templates let _: &page::TemplateData = &*page::TEMPLATE_DATA; if reload_templates { page::TemplateData::start_template_reloading(); } - let server = Self::start_inner(addr.unwrap_or(DEFAULT_BIND), Pool::new()); + let server = Self::start_inner(addr.unwrap_or(DEFAULT_BIND), Pool::new(), config); info!("Running docs.rs web server on http://{}", server.addr()); server } #[cfg(test)] - pub(crate) fn start_test(conn: Arc>) -> Self { - Self::start_inner("127.0.0.1:0", Pool::new_simple(conn.clone())) + pub(crate) fn start_test(conn: Arc>, config: Arc) -> Self { + Self::start_inner("127.0.0.1:0", Pool::new_simple(conn.clone()), config) } - fn start_inner(addr: &str, pool: Pool) -> Self { + fn start_inner(addr: &str, pool: Pool, config: Arc) -> Self { // poke all the metrics counters to instantiate and register them metrics::TOTAL_BUILDS.inc_by(0); metrics::SUCCESSFUL_BUILDS.inc_by(0); @@ -389,7 +409,7 @@ impl Server { metrics::UPLOADED_FILES_TOTAL.inc_by(0); metrics::FAILED_DB_CONNECTIONS.inc_by(0); - let cratesfyi = CratesfyiHandler::new(pool); + let cratesfyi = CratesfyiHandler::new(pool, config); let inner = Iron::new(cratesfyi) .http(addr) .unwrap_or_else(|_| panic!("Failed to bind to socket on {}", addr)); diff --git a/src/web/pool.rs b/src/web/pool.rs index 2fe15a393..1c827f6bb 100644 --- a/src/web/pool.rs +++ b/src/web/pool.rs @@ -1,5 +1,5 @@ use crate::db::create_pool; -use iron::{status::Status, typemap, BeforeMiddleware, IronError, IronResult, Request}; +use iron::{status::Status, typemap, IronError, IronResult}; use postgres::Connection; use std::marker::PhantomData; @@ -66,14 +66,6 @@ impl typemap::Key for Pool { type Value = Pool; } -impl BeforeMiddleware for Pool { - fn before(&self, req: &mut Request) -> IronResult<()> { - req.extensions.insert::(self.clone()); - - Ok(()) - } -} - pub(crate) enum DerefConnection<'a> { Connection( r2d2::PooledConnection, diff --git a/src/web/rustdoc.rs b/src/web/rustdoc.rs index 62dcb916b..36302e2d0 100644 --- a/src/web/rustdoc.rs +++ b/src/web/rustdoc.rs @@ -8,6 +8,7 @@ use super::pool::Pool; use super::redirect_base; use super::{match_version, MatchSemver}; use crate::utils; +use crate::Config; use iron::headers::{CacheControl, CacheDirective, Expires, HttpDate}; use iron::modifiers::Redirect; use iron::prelude::*; @@ -134,10 +135,12 @@ pub fn rustdoc_redirector_handler(req: &mut Request) -> IronResult { // this URL is actually from a crate-internal path, serve it there instead return rustdoc_html_server_handler(req); } else { + let conn = extension!(req, Pool).get()?; + let config = extension!(req, Config); + let path = req.url.path(); let path = path.join("/"); - let conn = extension!(req, Pool).get()?; - match File::from_path(&conn, &path) { + match File::from_path(&conn, &path, &config) { Ok(f) => return Ok(f.serve()), Err(..) => return Err(IronError::new(Nope::ResourceNotFound, status::NotFound)), } @@ -156,6 +159,8 @@ pub fn rustdoc_redirector_handler(req: &mut Request) -> IronResult { } let router = extension!(req, Router); + let conn = extension!(req, Pool).get()?; + // this handler should never called without crate pattern let crate_name = cexpect!(router.find("crate")); let mut crate_name = percent_decode(crate_name.as_bytes()) @@ -165,8 +170,6 @@ pub fn rustdoc_redirector_handler(req: &mut Request) -> IronResult { let req_version = router.find("version"); let mut target = router.find("target"); - let conn = extension!(req, Pool).get()?; - // it doesn't matter if the version that was given was exact or not, since we're redirecting // anyway let (version, id) = match match_version(&conn, &crate_name, req_version) { @@ -222,6 +225,7 @@ pub fn rustdoc_html_server_handler(req: &mut Request) -> IronResult { ); let conn = extension!(req, Pool).get()?; + let config = extension!(req, Config); let mut req_path = req.url.path(); // Remove the name and version from the path @@ -294,14 +298,14 @@ pub fn rustdoc_html_server_handler(req: &mut Request) -> IronResult { } // Attempt to load the file from the database - let file = if let Ok(file) = File::from_path(&conn, &path) { + let file = if let Ok(file) = File::from_path(&conn, &path, &config) { file } else { // If it fails, we try again with /index.html at the end path.push_str("/index.html"); req_path.push("index.html"); - File::from_path(&conn, &path) + File::from_path(&conn, &path, &config) .map_err(|_| IronError::new(Nope::ResourceNotFound, status::NotFound))? }; @@ -342,7 +346,7 @@ pub fn rustdoc_html_server_handler(req: &mut Request) -> IronResult { "/{}/{}/{}", name, latest_version, - path_for_version(&latest_path, &crate_details.doc_targets, &conn) + path_for_version(&latest_path, &crate_details.doc_targets, &conn, &config) ) } else { format!("/crate/{}/{}", name, latest_version) @@ -396,9 +400,14 @@ pub fn rustdoc_html_server_handler(req: &mut Request) -> IronResult { /// `rustdoc/crate/version[/platform]/module/[kind.name.html|index.html]` /// /// Returns a path that can be appended to `/crate/version/` to create a complete URL. -fn path_for_version(req_path: &[&str], known_platforms: &[String], conn: &Connection) -> String { +fn path_for_version( + req_path: &[&str], + known_platforms: &[String], + conn: &Connection, + config: &Config, +) -> String { // Simple case: page exists in the latest version, so just change the version number - if File::from_path(&conn, &req_path.join("/")).is_ok() { + if File::from_path(&conn, &req_path.join("/"), config).is_ok() { // NOTE: this adds 'index.html' if it wasn't there before return req_path[3..].join("/"); } @@ -430,6 +439,7 @@ pub fn target_redirect_handler(req: &mut Request) -> IronResult { let version = cexpect!(router.find("version")); let conn = extension!(req, Pool).get()?; + let config = extension!(req, Config); let base = redirect_base(req); let crate_details = cexpect!(CrateDetails::new(&conn, &name, &version)); @@ -453,7 +463,7 @@ pub fn target_redirect_handler(req: &mut Request) -> IronResult { file_path }; - let path = path_for_version(&file_path, &crate_details.doc_targets, &conn); + let path = path_for_version(&file_path, &crate_details.doc_targets, &conn, &config); let url = format!( "{base}/{name}/{version}/{path}", base = base, @@ -551,8 +561,9 @@ impl Handler for SharedResourceHandler { let suffix = filename.split('.').last().unwrap(); // unwrap is fine: split always works if ["js", "css", "woff", "svg"].contains(&suffix) { let conn = extension!(req, Pool).get()?; + let config = extension!(req, Config); - if let Ok(file) = File::from_path(&conn, filename) { + if let Ok(file) = File::from_path(&conn, filename, &config) { return Ok(file.serve()); } } diff --git a/src/web/source.rs b/src/web/source.rs index 4d448f19d..ba8d47657 100644 --- a/src/web/source.rs +++ b/src/web/source.rs @@ -4,6 +4,7 @@ use super::file::File as DbFile; use super::page::Page; use super::pool::Pool; use super::MetaData; +use crate::Config; use iron::prelude::*; use postgres::Connection; use router::Router; @@ -211,11 +212,12 @@ pub fn source_browser_handler(req: &mut Request) -> IronResult { }; let conn = extension!(req, Pool).get()?; + let config = extension!(req, Config); // try to get actual file first // skip if request is a directory let file = if !file_path.ends_with('/') { - DbFile::from_path(&conn, &file_path).ok() + DbFile::from_path(&conn, &file_path, &config).ok() } else { None };