diff --git a/README.md b/README.md index 9420c72177..527f04cbba 100644 --- a/README.md +++ b/README.md @@ -56,6 +56,8 @@ Use the [Conventional Commits](https://www.conventionalcommits.org) format for y In the PR description, please include a summary of the changes and any relevant context. Also, please make sure to update the package versions following the [Semantic Versioning](https://semver.org/) rules. +See also: [Code guidelines](doc/code-guidelines.md) + ### Releases The repository has several CI workflows that automatically release new versions of the various components when a new Github release is published. diff --git a/apps/hermes/server/Cargo.toml b/apps/hermes/server/Cargo.toml index 94a92b7e71..5b532a58ef 100644 --- a/apps/hermes/server/Cargo.toml +++ b/apps/hermes/server/Cargo.toml @@ -74,3 +74,46 @@ panic = 'abort' [profile.dev] panic = 'abort' + + +[lints.rust] +unsafe_code = "deny" + +[lints.clippy] +# See [Rust code guidelines](../../doc/rust-code-guidelines.md) + +wildcard_dependencies = "deny" + +collapsible_if = "allow" +collapsible_else_if = "allow" + +allow_attributes_without_reason = "warn" + +# Panics +expect_used = "warn" +fallible_impl_from = "warn" +indexing_slicing = "warn" +panic = "warn" +panic_in_result_fn = "warn" +string_slice = "warn" +todo = "warn" +unchecked_duration_subtraction = "warn" +unreachable = "warn" +unwrap_in_result = "warn" +unwrap_used = "warn" + +# Correctness +cast_lossless = "warn" +cast_possible_truncation = "warn" +cast_possible_wrap = "warn" +cast_sign_loss = "warn" +collection_is_never_read = "warn" +match_wild_err_arm = "warn" +path_buf_push_overwrite = "warn" +read_zero_byte_vec = "warn" +same_name_method = "warn" +suspicious_operation_groupings = "warn" +suspicious_xor_used_as_pow = "warn" +unused_self = "warn" +used_underscore_binding = "warn" +while_float = "warn" diff --git a/apps/hermes/server/build.rs b/apps/hermes/server/build.rs index 939e5a80e0..a42a1de20b 100644 --- a/apps/hermes/server/build.rs +++ b/apps/hermes/server/build.rs @@ -13,6 +13,7 @@ fn main() { // Build the wormhole and google protobufs using Rust's prost_build crate. // The generated Rust code is placed in the OUT_DIR (env var set by cargo). // `network/wormhole.rs` then includes the generated code into the source while compilation is happening. + #[allow(clippy::expect_used, reason = "failing at build time is fine")] tonic_build::configure() .build_server(false) .compile( diff --git a/apps/hermes/server/src/api.rs b/apps/hermes/server/src/api.rs index 03365e8b5e..7a9807de17 100644 --- a/apps/hermes/server/src/api.rs +++ b/apps/hermes/server/src/api.rs @@ -140,7 +140,7 @@ where // Initialize Axum Router. Note the type here is a `Router` due to the use of the // `with_state` method which replaces `Body` with `State` in the type signature. let app = Router::new(); - #[allow(deprecated)] + #[allow(deprecated, reason = "serving deprecated API endpoints")] let app = app .merge(SwaggerUi::new("/docs").url("/docs/openapi.json", ApiDoc::openapi())) .route("/", get(rest::index)) diff --git a/apps/hermes/server/src/api/rest.rs b/apps/hermes/server/src/api/rest.rs index f4d4faac90..3fb1208e95 100644 --- a/apps/hermes/server/src/api/rest.rs +++ b/apps/hermes/server/src/api/rest.rs @@ -119,6 +119,7 @@ where } } #[cfg(test)] +#[allow(clippy::unwrap_used, reason = "tests")] mod tests { use { super::*, diff --git a/apps/hermes/server/src/api/rest/get_vaa_ccip.rs b/apps/hermes/server/src/api/rest/get_vaa_ccip.rs index d4af86d67d..858eb71945 100644 --- a/apps/hermes/server/src/api/rest/get_vaa_ccip.rs +++ b/apps/hermes/server/src/api/rest/get_vaa_ccip.rs @@ -51,15 +51,16 @@ pub async fn get_vaa_ccip( where S: Aggregates, { + let data: [u8; 40] = *params.data; let price_id: PriceIdentifier = PriceIdentifier::new( - params.data[0..32] + data[0..32] .try_into() .map_err(|_| RestError::InvalidCCIPInput)?, ); validate_price_ids(&state, &[price_id], false).await?; let publish_time = UnixTimestamp::from_be_bytes( - params.data[32..40] + data[32..40] .try_into() .map_err(|_| RestError::InvalidCCIPInput)?, ); diff --git a/apps/hermes/server/src/api/types.rs b/apps/hermes/server/src/api/types.rs index aa430773b5..9e788f359e 100644 --- a/apps/hermes/server/src/api/types.rs +++ b/apps/hermes/server/src/api/types.rs @@ -396,6 +396,7 @@ impl Display for AssetType { } #[cfg(test)] +#[allow(clippy::unwrap_used, reason = "tests")] mod tests { use super::*; diff --git a/apps/hermes/server/src/config.rs b/apps/hermes/server/src/config.rs index 248fd29e90..e4c21bc919 100644 --- a/apps/hermes/server/src/config.rs +++ b/apps/hermes/server/src/config.rs @@ -14,7 +14,10 @@ mod wormhole; #[command(author = crate_authors!())] #[command(about = crate_description!())] #[command(version = crate_version!())] -#[allow(clippy::large_enum_variant)] +#[allow( + clippy::large_enum_variant, + reason = "performance is not a concern for config" +)] pub enum Options { /// Run the Hermes Price Service. Run(RunOptions), diff --git a/apps/hermes/server/src/config/cache.rs b/apps/hermes/server/src/config/cache.rs index cfa89f0732..3b2756318f 100644 --- a/apps/hermes/server/src/config/cache.rs +++ b/apps/hermes/server/src/config/cache.rs @@ -14,5 +14,5 @@ pub struct Options { #[arg(long = "cache-size-slots")] #[arg(env = "CACHE_SIZE_SLOTS")] #[arg(default_value = "1600")] - pub size_slots: u64, + pub size_slots: usize, } diff --git a/apps/hermes/server/src/main.rs b/apps/hermes/server/src/main.rs index 6103b9f860..85b4d104c3 100644 --- a/apps/hermes/server/src/main.rs +++ b/apps/hermes/server/src/main.rs @@ -1,5 +1,5 @@ use { - anyhow::Result, + anyhow::{Context, Result}, clap::{CommandFactory, Parser}, futures::future::join_all, lazy_static::lazy_static, @@ -53,9 +53,13 @@ async fn init() -> Result<()> { // Listen for Ctrl+C so we can set the exit flag and wait for a graceful shutdown. spawn(async move { tracing::info!("Registered shutdown signal handler..."); - tokio::signal::ctrl_c().await.unwrap(); - tracing::info!("Shut down signal received, waiting for tasks..."); - let _ = EXIT.send(true); + match tokio::signal::ctrl_c().await { + Ok(()) => { + tracing::info!("Shut down signal received, waiting for tasks..."); + let _ = EXIT.send(true); + } + Err(err) => tracing::warn!("failed to register shutdown signal handler: {err}"), + } }); // Spawn all worker tasks, and wait for all to complete (which will happen if a shutdown @@ -83,8 +87,8 @@ async fn init() -> Result<()> { let defaults = arg .get_default_values() .iter() - .map(|v| v.to_str().unwrap()) - .collect::>() + .map(|v| v.to_str().context("non-utf8 default arg value")) + .collect::>>()? .join(","); println!( diff --git a/apps/hermes/server/src/network/wormhole.rs b/apps/hermes/server/src/network/wormhole.rs index 5691af4478..623a23d276 100644 --- a/apps/hermes/server/src/network/wormhole.rs +++ b/apps/hermes/server/src/network/wormhole.rs @@ -43,7 +43,10 @@ impl std::fmt::Display for GuardianSet { /// BridgeData extracted from wormhole bridge account, due to no API. #[derive(borsh::BorshDeserialize)] -#[allow(dead_code)] +#[allow( + dead_code, + reason = "we have to deserialize all fields but we don't use all of them" +)] pub struct BridgeData { pub guardian_set_index: u32, pub last_lamports: u64, @@ -52,7 +55,10 @@ pub struct BridgeData { /// BridgeConfig extracted from wormhole bridge account, due to no API. #[derive(borsh::BorshDeserialize)] -#[allow(dead_code)] +#[allow( + dead_code, + reason = "we have to deserialize all fields but we don't use all of them" +)] pub struct BridgeConfig { pub guardian_set_expiration_time: u32, pub fee: u64, @@ -75,7 +81,11 @@ pub struct GuardianSetData { /// /// The following module structure must match the protobuf definitions, so that the generated code /// can correctly reference modules from each other. -#[allow(clippy::enum_variant_names)] +#[allow( + clippy::enum_variant_names, + clippy::allow_attributes_without_reason, + reason = "generated code" +)] mod proto { pub mod node { pub mod v1 { diff --git a/apps/hermes/server/src/serde.rs b/apps/hermes/server/src/serde.rs index d119d1d251..e6c807fafd 100644 --- a/apps/hermes/server/src/serde.rs +++ b/apps/hermes/server/src/serde.rs @@ -17,13 +17,16 @@ pub mod hex { R: FromHex, ::Error: std::fmt::Display, { - let s: String = Deserialize::deserialize(d)?; - let p = s.starts_with("0x") || s.starts_with("0X"); - let s = if p { &s[2..] } else { &s[..] }; - hex::serde::deserialize(s.into_deserializer()) + let full: String = Deserialize::deserialize(d)?; + let hex = full + .strip_prefix("0x") + .or_else(|| full.strip_prefix("0X")) + .unwrap_or(&full); + hex::serde::deserialize(hex.into_deserializer()) } #[cfg(test)] + #[allow(clippy::unwrap_used, reason = "tests")] mod tests { use serde::Deserialize; diff --git a/apps/hermes/server/src/state.rs b/apps/hermes/server/src/state.rs index d03fec6ca4..a5ebeee020 100644 --- a/apps/hermes/server/src/state.rs +++ b/apps/hermes/server/src/state.rs @@ -56,7 +56,7 @@ struct State { pub fn new( update_tx: Sender, - cache_size: u64, + cache_size: usize, benchmarks_endpoint: Option, readiness_staleness_threshold: Duration, readiness_max_allowed_slot_lag: Slot, @@ -87,7 +87,7 @@ pub mod test { }; pub async fn setup_state( - cache_size: u64, + cache_size: usize, ) -> (Arc, Receiver) { let (update_tx, update_rx) = tokio::sync::broadcast::channel(1000); let state = super::new(update_tx, cache_size, None, Duration::from_secs(30), 10); diff --git a/apps/hermes/server/src/state/aggregate.rs b/apps/hermes/server/src/state/aggregate.rs index 9202aaee2a..237b0c0fa5 100644 --- a/apps/hermes/server/src/state/aggregate.rs +++ b/apps/hermes/server/src/state/aggregate.rs @@ -1,3 +1,4 @@ +use anyhow::Context; use log::warn; #[cfg(test)] use mock_instant::{SystemTime, UNIX_EPOCH}; @@ -164,8 +165,9 @@ pub struct AccumulatorMessages { } impl AccumulatorMessages { + #[allow(clippy::cast_possible_truncation, reason = "intended truncation")] pub fn ring_index(&self) -> u32 { - (self.slot % self.ring_size as u64) as u32 + (self.slot % u64::from(self.ring_size)) as u32 } } @@ -492,7 +494,12 @@ where let state_data = self.into().data.read().await; let price_feeds_metadata = PriceFeedMeta::retrieve_price_feeds_metadata(self) .await - .unwrap(); + .unwrap_or_else(|err| { + tracing::error!( + "unexpected failure of PriceFeedMeta::retrieve_price_feeds_metadata(self): {err}" + ); + Vec::new() + }); let current_time = SystemTime::now(); @@ -529,8 +536,8 @@ where latest_completed_unix_timestamp: state_data.latest_completed_update_time.and_then( |t| { t.duration_since(UNIX_EPOCH) - .map(|d| d.as_secs() as i64) .ok() + .and_then(|d| d.as_secs().try_into().ok()) }, ), price_feeds_metadata_len: price_feeds_metadata.len(), @@ -547,7 +554,11 @@ fn build_message_states( let wormhole_merkle_message_states_proofs = construct_message_states_proofs(&accumulator_messages, &wormhole_merkle_state)?; - let current_time: UnixTimestamp = SystemTime::now().duration_since(UNIX_EPOCH)?.as_secs() as _; + let current_time: UnixTimestamp = SystemTime::now() + .duration_since(UNIX_EPOCH)? + .as_secs() + .try_into() + .context("timestamp overflow")?; accumulator_messages .raw_messages @@ -663,7 +674,12 @@ where )); } else { // Use the publish time from the first end message - end_messages[0].message.publish_time() - window_seconds as i64 + end_messages + .first() + .context("no messages found")? + .message + .publish_time() + - i64::try_from(window_seconds).context("window size overflow")? }; let start_time = RequestTime::FirstAfter(start_timestamp); @@ -807,6 +823,12 @@ fn calculate_twap(start_message: &TwapMessage, end_message: &TwapMessage) -> Res } #[cfg(test)] +#[allow( + clippy::unwrap_used, + clippy::indexing_slicing, + clippy::cast_possible_wrap, + reason = "tests" +)] mod test { use { super::*, @@ -892,11 +914,11 @@ mod test { ) -> PriceFeedMessage { PriceFeedMessage { feed_id: [seed; 32], - price: seed as _, - conf: seed as _, + price: seed.into(), + conf: seed.into(), exponent: 0, - ema_conf: seed as _, - ema_price: seed as _, + ema_conf: seed.into(), + ema_price: seed.into(), publish_time, prev_publish_time, } @@ -1239,7 +1261,7 @@ mod test { PriceIdentifier::new([100; 32]), PriceIdentifier::new([200; 32]) ], - RequestTime::FirstAfter(slot as i64), + RequestTime::FirstAfter(slot.into()), ) .await .is_err()); @@ -1631,6 +1653,7 @@ mod test { } } #[cfg(test)] +#[allow(clippy::unwrap_used, reason = "tests")] /// Unit tests for the core TWAP calculation logic in `calculate_twap` mod calculate_twap_unit_tests { use super::*; diff --git a/apps/hermes/server/src/state/aggregate/metrics.rs b/apps/hermes/server/src/state/aggregate/metrics.rs index aba2464168..1612675abd 100644 --- a/apps/hermes/server/src/state/aggregate/metrics.rs +++ b/apps/hermes/server/src/state/aggregate/metrics.rs @@ -106,7 +106,12 @@ impl Metrics { // Clear out old slots while self.first_observed_time_of_slot.len() > MAX_SLOT_OBSERVATIONS { - let oldest_slot = *self.first_observed_time_of_slot.keys().next().unwrap(); + #[allow(clippy::expect_used, reason = "len checked above")] + let oldest_slot = *self + .first_observed_time_of_slot + .keys() + .next() + .expect("first_observed_time_of_slot is empty"); self.first_observed_time_of_slot.remove(&oldest_slot); } } diff --git a/apps/hermes/server/src/state/aggregate/wormhole_merkle.rs b/apps/hermes/server/src/state/aggregate/wormhole_merkle.rs index cbe40a1d5a..5355b270fb 100644 --- a/apps/hermes/server/src/state/aggregate/wormhole_merkle.rs +++ b/apps/hermes/server/src/state/aggregate/wormhole_merkle.rs @@ -142,6 +142,13 @@ pub fn construct_update_data(mut messages: Vec) -> Re } #[cfg(test)] +#[allow( + clippy::unwrap_used, + clippy::cast_possible_wrap, + clippy::panic, + clippy::indexing_slicing, + reason = "tests" +)] mod test { use { super::*, @@ -175,6 +182,7 @@ mod test { } #[test] + fn test_construct_update_data_works_on_mixed_slot_and_big_size() { let mut messages = vec![]; diff --git a/apps/hermes/server/src/state/benchmarks.rs b/apps/hermes/server/src/state/benchmarks.rs index 015668914d..b83a6de792 100644 --- a/apps/hermes/server/src/state/benchmarks.rs +++ b/apps/hermes/server/src/state/benchmarks.rs @@ -6,7 +6,7 @@ use { State, }, crate::api::types::PriceUpdate, - anyhow::Result, + anyhow::{Context, Result}, base64::{engine::general_purpose::STANDARD as base64_standard_engine, Engine as _}, pyth_sdk::PriceIdentifier, reqwest::Url, @@ -89,7 +89,7 @@ where .as_ref() .ok_or_else(|| anyhow::anyhow!("Benchmarks endpoint is not set"))? .join(&format!("/v1/updates/price/{}", publish_time)) - .unwrap(); + .context("failed to construct price endpoint")?; let mut request = reqwest::Client::new() .get(endpoint) diff --git a/apps/hermes/server/src/state/cache.rs b/apps/hermes/server/src/state/cache.rs index 8a0c1162af..748615c2be 100644 --- a/apps/hermes/server/src/state/cache.rs +++ b/apps/hermes/server/src/state/cache.rs @@ -74,8 +74,8 @@ impl MessageState { } #[derive(Clone, Copy)] -#[allow(dead_code)] pub enum MessageStateFilter { + #[allow(dead_code, reason = "can be useful later")] All, Only(MessageType), } @@ -94,11 +94,11 @@ pub struct CacheState { accumulator_messages_cache: AccumulatorMessagesCache, wormhole_merkle_state_cache: WormholeMerkleStateCache, message_cache: MessageCache, - cache_size: u64, + cache_size: usize, } impl CacheState { - pub fn new(size: u64) -> Self { + pub fn new(size: usize) -> Self { Self { accumulator_messages_cache: Arc::new(RwLock::new(BTreeMap::new())), wormhole_merkle_state_cache: Arc::new(RwLock::new(BTreeMap::new())), @@ -164,7 +164,7 @@ where cache.insert(time, message_state); // Remove the earliest message states if the cache size is exceeded - while cache.len() > self.into().cache_size as usize { + while cache.len() > self.into().cache_size { cache.pop_first(); } } @@ -238,7 +238,7 @@ where // Messages don't exist, store them cache.insert(slot, accumulator_messages); - while cache.len() > self.into().cache_size as usize { + while cache.len() > self.into().cache_size { cache.pop_first(); } Ok(true) @@ -264,7 +264,7 @@ where // State doesn't exist, store it cache.insert(slot, wormhole_merkle_state); - while cache.len() > self.into().cache_size as usize { + while cache.len() > self.into().cache_size { cache.pop_first(); } Ok(true) @@ -344,6 +344,7 @@ async fn retrieve_message_state( } #[cfg(test)] +#[allow(clippy::unwrap_used, reason = "tests")] mod test { use { super::*, diff --git a/apps/hermes/server/src/state/metrics.rs b/apps/hermes/server/src/state/metrics.rs index fd4dedfa13..5f23abaf61 100644 --- a/apps/hermes/server/src/state/metrics.rs +++ b/apps/hermes/server/src/state/metrics.rs @@ -51,7 +51,10 @@ where async fn encode(&self) -> String { let registry = self.into().registry.read().await; let mut buffer = String::new(); - encode(&mut buffer, ®istry).unwrap(); + if let Err(err) = encode(&mut buffer, ®istry) { + tracing::error!("failed to encode metrics: {err}"); + return String::new(); + } buffer } } diff --git a/apps/hermes/server/src/state/wormhole.rs b/apps/hermes/server/src/state/wormhole.rs index f57d2b5431..845e695d28 100644 --- a/apps/hermes/server/src/state/wormhole.rs +++ b/apps/hermes/server/src/state/wormhole.rs @@ -4,7 +4,7 @@ use { State, }, crate::network::wormhole::GuardianSet, - anyhow::{anyhow, ensure, Result}, + anyhow::{anyhow, ensure, Context, Result}, chrono::DateTime, pythnet_sdk::{ wire::v1::{WormholeMessage, WormholePayload}, @@ -104,8 +104,8 @@ where let vaa = serde_wormhole::from_slice::>(&vaa_bytes)?; // Log VAA Processing. - let vaa_timestamp = DateTime::from_timestamp(vaa.timestamp as i64, 0) - .ok_or(anyhow!("Failed to parse VAA Tiestamp"))? + let vaa_timestamp = DateTime::from_timestamp(vaa.timestamp.into(), 0) + .context("Failed to parse VAA Timestamp")? .format("%Y-%m-%dT%H:%M:%S.%fZ") .to_string(); @@ -219,7 +219,7 @@ fn verify_vaa<'a>( // The address is the last 20 bytes of the Keccak256 hash of the public key let address: [u8; 32] = Keccak256::new_with_prefix(&pubkey[1..]).finalize().into(); - let address: [u8; 20] = address[address.len() - 20..].try_into()?; + let address: [u8; 20] = address[32 - 20..].try_into()?; // Confirm the recovered address matches an address in the guardian set. if guardian_set.keys.get(signer_id) == Some(&address) { diff --git a/doc/code-guidelines.md b/doc/code-guidelines.md new file mode 100644 index 0000000000..819a92c21c --- /dev/null +++ b/doc/code-guidelines.md @@ -0,0 +1,243 @@ +# Code Guidelines + +# Language specific + +[Rust code guidelines](rust-code-guidelines.md) + +# Make your services as resilient to errors as possible + +- Perform more benchmarking/add benchmarking tests through our codebase. Currently +there are portions of the codebase that we have unknown performance for that +may become more important as we scale. Most languages have benchmark test +capability, rust has it built in for example. +- Implement error recovery even for unlikely cases. Think about how the service can continue to work after different failures. +- The service should continue to work (if possible) if its dependencies are unavailable or broken. +- Avoid the possibility of leaving the service in a state where it no longer able to start or work properly. The service should be able to recover from things like invalid files or unexpected database state. If that is not possible, provide clear error messages that explain what should be done to fix it. +- Minimize the number of dependencies required for a service to start. +- It should be possible to run multiple instances of each service at the same time. + +# Set up essential tooling + +- Use strongest lint settings. It is better to have at minimum pedantic warnings +on all projects. Good examples of bad settings: allowing `any` globally in +typescript, ignoring integer clippy type warnings in Rust, etc. +- Add extensive logging, metrics and tracing capability early, much of our code is missing +metrics, good log handling, or ability to do introspection on code that has +failed in retrospect. Good example: hermes launch. + +# Keep the code readable and maintainable + +- Make heavy use of types to define behaviour. In general introducing a type can be +thought of as introducing a unit test. For example: + + ```rust + struct PositiveTime(i64); + + impl TryFrom for PositiveTime { + type Err = (); + fn try_from(n: i64) -> Result { + if n < 0 { + return Err(()); + } + return Ok(Self(n)); + } + } + + ``` + + This can be thought of reducing the valid range of i64 to one we prefer + (given that i64 is the native Linux time type but often we do not want these) + that we can enforce a compile-time. The benefit in types over unit tests is + simply use-at-site of a type ensure behaviour everywhere and reducing the + amount of unwanted behaviour in a codebase. + + Currently we do not try hard enough to isolate behaviours through types. + +- Avoid monolithic event handlers, and avoid state handling in logic. Some +stateful code in our repos mixes the logic handling with the state handle +code which produces very long, hard to reason about code which ends up as +a rather large inline state machine: + + Good: + + ```tsx + function handleEvent(e, state) { + switch(e.type) { + case Event.Websocket: handleWebsocketEvent(e, state.websockets); + case Event.PythNet: handlePythnetEvent(e, state.pyth_handle); + case ... + } + } + + ``` + + Bad: + + ```tsx + function handleEvent(e) { + // Many inlined state tracking vars. Not much better than globals. + var latstPythNetupdateTime = DateTime.now(); + var clientsWaiting = {}; + var ... + + switch(e.type) { + // lots of inline handling + } + } + + ``` + +- Avoid catch-all modules, I.E: `types/`, `utils/` +- Favor Immutability and Idempotency. Both are a huge source of reducing logic bugs. +- State should whenever possible flow top-down, I.E: create at entry point and +flow to other components. Global state should be avoided and no state should be +hidden in separate modules. + + Good: + + ```tsx + // main.ts + function main() { + const db = db.init(); + initDb(db); + } + + ``` + + Bad: + + ```tsx + // main.ts + const { db } = require('db'); + function() { + initDb(); // Databaes not passed, implies global use. + } + + ``` + +- For types/functions that are only used once, keep them close to the +definition. If they are re-used, try and lift them only up to a common +parent, in the following example types/functions only lift as far +as they are useful: + + Example File Hierarchy: + + ``` + lib/routes.rs:validateUserId() + lib/routes/user.rs:type RequestUser + lib/routes/user/register.rs:generateRandomUsername() + + ``` + + Good: + + ```tsx + // Definition only applies to this function, keep locality. + type FeedResponse = { + id: FeedId, + feed: Feed, + }; + + // Note the distinction between FeedResponse/Feed for DDD. + function getFeed(id: FeedId, db: Db): FeedResponse { + let feed: Feed = db.execute(FEED_QUERY, [id]); + return { id, feed: feed, } + } + + ``` + + Bad: + + ```tsx + import { FeedResponse } from 'types'; + function getFeed(id: FeedId, db: Db): FeedResponse { + let feed = db.execute(FEED_QUERY, [id]); + return { id, feed: feed, } + } + + ``` + +- Map functionality into submodules when a module defines a category of handlers. +This help emphasise where code re-use should happen, for example: + + Good: + + ``` + src/routes/user/register.ts + src/routes/user/login.ts + src/routes/user/add_role.ts + src/routes/index.ts + + ``` + + Bad: + + ```tsx + // src/index.ts + function register() { ... } + function login() { ... } + function addRole() { ... } + function index() { ... } + + ``` + + Not only does this make large unwieldy files but it encourages things like + `types/` catch alls, or unnecessary sharing of functionality. For example + imagine a `usernameAsBase58` function thrown into this file, that then + looks useful within an unrelated to users function, it can be tempting to + abuse the utility function or move it to a vague catch-all location. Focus + on clear, API boundaries even within our own codebase. + +- When possible use layered architecture (onion/hexagonal/domain driven design) where +we separate API processing, business logic, and data logic. The benefit of this +is it defines API layers within the application itself: + + Good: + + ```tsx + // web/user/register.ts + import { registerUser, User } from 'api/user/register.ts'; + + // Note locality: one place use functions stay near, no utils/ + function verifyUsername( ... + function verifyPassword( ... + + // Locality again. + type RegisterRequest = { + ... + }; + + function register(req: RegisterRequest): void { + // Validation Logic Only + verifyUsername(req.username); + verifyPassword(req.password); + + // Business Logic Separate + registerUser({ + username: req.username, + password: req.password, + }); + } + + ``` + + ```tsx + // api/user/register.ts + import { storeUser, DbUser } from 'db/user'; + + function registerUser(user: User) { + const user = fetchByUsername(user.username); + if (user) { + throw "User Exists; + } + + // Note again that the type used here differs from User (DbUser) which + // prevents code breakage (such as if the schema is updated but the + // code is not. + storeUser({ + username: user.username, + password: hash(user.password), + }); + } + + ``` diff --git a/doc/rust-code-guidelines.md b/doc/rust-code-guidelines.md new file mode 100644 index 0000000000..0dfe48ce4d --- /dev/null +++ b/doc/rust-code-guidelines.md @@ -0,0 +1,516 @@ +# Rust code guidelines + +Note: general [Code Guidelines](code-guidelines.md) also apply. + +# Tooling + +**Rust toolchain version:** always pin a specific version using `rust-toolchain` or `rust-toolchain.toml` file. Use stable versions unless nightly is absolutely required. Periodically update the pinned version when possible. + +`Cargo.lock` should be checked in to git and used in CI and for building packages/binaries. Versions of other tools used in CI should be pinned too. + +**Formatting:** use `rustfmt` with default configuration. Use `taplo` with default configuration for `Cargo.toml` files. + +**Linting: use `clippy` with the following configuration:** + +- (expand to see clippy configuration) + + ```toml + [lints.rust] + unsafe_code = "deny" + + [lints.clippy] + wildcard_dependencies = "deny" + + collapsible_if = "allow" + collapsible_else_if = "allow" + + allow_attributes_without_reason = "warn" + + # Panics + expect_used = "warn" + fallible_impl_from = "warn" + indexing_slicing = "warn" + panic = "warn" + panic_in_result_fn = "warn" + string_slice = "warn" + todo = "warn" + unchecked_duration_subtraction = "warn" + unreachable = "warn" + unwrap_in_result = "warn" + unwrap_used = "warn" + + # Correctness + cast_lossless = "warn" + cast_possible_truncation = "warn" + cast_possible_wrap = "warn" + cast_sign_loss = "warn" + collection_is_never_read = "warn" + match_wild_err_arm = "warn" + path_buf_push_overwrite = "warn" + read_zero_byte_vec = "warn" + same_name_method = "warn" + suspicious_operation_groupings = "warn" + suspicious_xor_used_as_pow = "warn" + unused_self = "warn" + used_underscore_binding = "warn" + while_float = "warn" + ``` + + +The recommendations on this page should help with dealing with these lints. + +Refer also to [Clippy lints documentation](https://rust-lang.github.io/rust-clippy/master/index.html) for more information about the lints. + +If the lint is a false positive, put `#[allow(lint_name, reason = "...")]` on the relevant line or block and and specify the reason why the code is correct. + +Many of the lints (e.g. most of the panic-related lints) can be allowed globally for tests and other non-production code. + +# Essential crates + +- `tracing` for logging and tracing +- `opentelemetry` for metrics +- `anyhow` for error handling in services +- `thiserror` for error handling in libraries +- `backoff` for retrying (take note of the distinction between transient and permanent errors). +- `axum` and `utoipa` for API server implementation +- `reqwest` for HTTP client +- `chrono` for date and time manipulation +- `config` for flexible config loading +- `humantime` for printing and parsing duration +- `tokio` for async runtime +- `itertools` for extra operations on iterators +- `derive_more` and `strum` for more derives +- `proptest`, `mry` for testing +- `criterion` for benchmarking + +# Avoiding panics + +Panics are unrecoverable errors that unwind the current thread. If a panic occurs in the main thread, the whole program may terminate. If a panic occurs in a task spawned on an async runtime, that task will terminate. + +Panics are dangerous because they can arise from concise code constructions (such as slice indexing with `a[n]`) and have vast effects on the program. Minimize the possibility of panics with the help of the relevant `clippy` lints. Handle the unexpected case properly no matter how unlikely it seems. + +**Common sources of panics:** + +| Source of panic | Non-panicking alternatives | +| --- | --- | +| `.unwrap()`, `.expect(...)`, +`panic!()`, `assert*!()` | `Result`-based handling using `anyhow` crate: +`.context()?` , `.with_context()?` , +`bail!()`, `ensure!()`. | +| `unimplemented!()`, `todo!()`, `unreachable!()` | β€” | +| Indexing out of bounds on slices, strings, `Vec`, `VecDeque`: `&arr[x]` , `&arr[x..y]` | `.get()`, `.get_mut()` | +| Indexing with a range if min > max: `&arr[to..from]` | `.get()`, `.get_mut()` | +| Indexing a `HashMap` or `BTreeMap` on a non-existing key: `&map[key]` | `.get()`, `.get_mut()`, `.entry()` | +| Indexing a non-ASCII string not at char boundaries: `&"😒😒😒"[0..1]` | `.get()`, `.get_mut()` | +| `Vec` methods: `.insert()`, `.remove()`, `.drain()`, `.split_off()` and many more | There are checked alternatives for some but not all of them. | +| Division by zero: `a / b`, `a % b` | `.checked_div()`, `.checked_rem()`, `/ NonZero*` | + +Think about the cases that could cause your code to panic. Try to rewrite the code in a way that avoids a possible panic. + +Here are some tips to solve `clippy` warnings and avoid panics: + +- **Use `Result` type and return the error to the caller.** Use `.context()` or `.with_context()` from `anyhow` crate to add relevant information to the error. Use `.ok_or_else()` or `.context()` to convert `Option` to `Result`. + + β›” Bad: + + ```rust + pub fn best_bid(response: &Response) -> String { + response.bids[0].clone() + } + ``` + + βœ… Good: + + ```rust + pub fn best_bid(response: &Response) -> anyhow::Result { + Ok(response.bids.first().context("expected 1 item in bids, got 0")?.clone()) + } + ``` + +- **If propagation with `?` doesn’t work because of error type, convert the error type with `.map_err()`.** + + βœ… Good (`binary_search` returns `Err(index)` instead of a well-formed error, so we create our own error value): + + ```rust + items + .binary_search(&needle) + .map_err(|_| anyhow!("item not found: {:?}", needle))?; + ``` + + βœ… Good (`?` can’t convert from `Box` to `anyhow::Error`, but there is a function that converts it): + + ```rust + let info = message.info().map_err(anyhow::Error::from_boxed)?; + ``` + +- **If the error is in a non-Result function inside an iterator chain (e.g. `.map()`) or a combinator (e.g. `.unwrap_or_else()`), consider rewriting it as a plain `for` loop or `match`/`if let` to allow error propagation.** (If you’re determined to make iterators work, `fallible-iterator` crate may also be useful.) + + β›” Bad (unwraps the error just because `?` doesn’t work): + + ```rust + fn check_files(paths: &[&Path]) -> anyhow::Result<()> { + let good_paths: Vec<&Path> = paths + .iter() + .copied() + .filter(|path| fs::read_to_string(path).unwrap().contains("magic")) + .collect(); + //... + } + ``` + + βœ… Good: + + ```rust + fn check_files(paths: &[&Path]) -> anyhow::Result<()> { + let mut good_paths = Vec::new(); + for path in paths { + if fs::read_to_string(path)?.contains("magic") { + good_paths.push(path); + } + } + //... + } + ``` + +- **Log the error and return early or skip an item.** + + β›” Bad (panics if we add too many publishers): + + ```rust + let publisher_count = u16::try_from(prices.len()) + .expect("too many publishers"); + ``` + + βœ… Good: + + ```rust + let Ok(publisher_count) = u16::try_from(prices.len()) else { + error!("too many publishers ({})", prices.len()); + return Default::default(); + }; + ``` + + β›” Bad (terminates on error) (and yes, [it can fail](https://www.greyblake.com/blog/when-serde-json-to-string-fails/)): + + ```rust + loop { + //... + yield Ok( + serde_json::to_string(&response).expect("should not fail") + "\n" + ); + } + ``` + + βœ… Good (`return` instead of `continue` could also be reasonable): + + ```rust + loop { + //... + match serde_json::to_string(&response) { + Ok(json) => { + yield Ok(json + "\n"); + } + Err(err) => { + error!("json serialization error for {:?}: {}", response, err); + continue; + } + } + } + ``` + +- **Supply a sensible default:** + + β›” Bad: + + ```rust + let interval_us: u64 = snapshot_interval + .as_micros() + .try_into() + .expect("snapshot_interval overflow"); + ``` + + βœ… Better: + + ```rust + let interval_us = + u64::try_from(snapshot_interval.as_micros()).unwrap_or_else(|_| { + error!( + "invalid snapshot_interval in config: {:?}, defaulting to 1 min", + snapshot_interval + ); + 60_000_000 // 1 min + }); + ``` + +- **Avoid checking a condition and then unwrapping.** Instead, combine the check and access to the value using `match`, `if let`, and combinators such as `map_or`, `some_or`, etc. + + 🟑 Not recommended: + + ```rust + if !values.is_empty() { + process_one(&values[0]); + } + ``` + + βœ… Good: + + ```rust + if let Some(first_value) = values.first() { + process_one(first_value); + } + ``` + + 🟑 Not recommended: + + ```rust + .filter(|price| { + median_price.is_none() || price < &median_price.expect("should not fail") + }) + ``` + + βœ… Good: + + ```rust + .filter(|price| median_price.is_none_or(|median_price| price < &median_price)) + ``` + + 🟑 Not recommended: + + ```rust + if data.len() < header_len { + bail!("data too short"); + } + let header = &data[..header_len]; + let payload = &data[header_len..]; + ``` + + βœ… Good: + + ```rust + let (header, payload) = data + .split_at_checked(header_len) + .context("data too short")?; + ``` + +- **Avoid the panic by introducing a constant or move the panic inside the constant initialization.** Panicking in const initializers is safe because it happens at compile time. + + 🟑 Not recommended: + + ```rust + price.unwrap_or(Price::new(PRICE_FEED_EPS).expect("should never fail")) + ``` + + βœ… Good: + + ```rust + #[allow(clippy::unwrap_used, reason = "safe in const")] + const MIN_POSITIVE_PRICE: Price = + Price(NonZeroI64::new(PRICE_FEED_EPS).unwrap()); + ``` + +- **If it’s not possible to refactor the code, allow the lint and specify the reason why the code cannot fail. Prefer `.expect()` over `.unwrap()` and specify the failure in the argument.** This is reasonable if the failure is truly impossible or if a failure would be so critical that the service cannot continue working. + + 🟑 Not recommended: + + ```rust + Some(prices[prices.len() / 2]) + ``` + + βœ… Good: + + ```rust + #[allow( + clippy::indexing_slicing, + reason = "prices are not empty, prices.len() / 2 < prices.len()" + )] + Some(prices[prices.len() / 2]) + ``` + + 🟑 Not recommended: + + ```rust + let expiry_time = expiry_times.get(self.active_index).unwrap(); + ``` + + βœ… Better: + + ```rust + #[allow( + clippy::expect_used, + reason = "we only assign valid indexes to `self.active_index`" + )] + let expiry_time = expiry_times + .get(self.active_index) + .expect("invalid active index"); + ``` + + +# Avoid unchecked arithmetic operations + +Be aware that some basic arithmetic operators (`+`, `-`, `*`, unary negation with `-`) and some functions (`.abs()`, `.pow()`, `.next_multiple_of()`, etc.) can overflow. These operators and functions will panic only in debug mode (more specifically, when debug assertions are enabled). In release mode, they will quietly produce an invalid value. For example, `200u8 + 150u8` evaluates to 94 in release mode. Be especially careful when subtracting unsigned integers because even small values can produce an overflow: `2u32 - 4u32` evaluates to 4294967294. + +Consider using an alternative function that produces a more reasonable value: + +- Use `.checked_*()` functions that return `None` in case of overflow. +- Use `.saturating_*()` functions that will cap on MIN or MAX value. + +Use `.wrapping_*()` functions if you expect the value to wrap around. + +β›” Bad: + +```rust +charge_payment(amount + fee); +``` + +βœ… Good: + +```rust +let total = amount + .checked_add(fee) + .with_context(|| { + format!("total amount overflow: amount = {amount}, fee = {fee}") + })?; +charge_payment(total); +``` + + + +# Avoid implicit wrapping and truncation with `as` + +Limit the use of `as` keyword for converting values between numeric types. While `as` is often the most convenient option, it’s quite bad at expressing intent. `as` can do conversions with different semantics. In the case of integers, it can do both **lossless conversions** (e.g. from `u8` to `u32`; from `u32` to `i64`) and **lossy conversions** (e.g. `258_u32 as u8` evaluates to 2; `200_u8 as i8` evaluates to -56). + +- Always use `.into()` and `T::from()` instead of `as` for lossless conversions. This makes the intent more clear. +- Only use `as` for lossy conversions when you specifically intend to perform a lossy conversion and are aware of how it affects the value. Add an `#[allow]` attribute and specify the reason. +- See if you can choose a more suitable type to avoid the conversion entirely. +- When a lossless conversion is necessary but it’s not possible using `From` and `Into`, use `.try_into()` or `T::try_from()` instead of `as` and handle the error case appropriately. +- When correctness of the resulting value is not super important (e.g. it’s used for metrics) and the failure is unlikely, you can use `.shrink()` or `T::shrink_from()` provided by `truncate-integer` crate. These functions perform a saturating conversion, i.e. they return a min or max value when the value cannot be represented in the new type. In most cases it’s better than what `as` would produce. + +β›” Bad (if `count` is negative, it will attempt a huge allocation which can crash the process): + +```rust +let count: i32 = /*...*/; +vec.resize(count as usize); + +``` + +βœ… Good: + +```rust +vec.resize( + count + .try_into() + .with_context(|| format!("invalid count: {count}"))? +); +``` + +β›” Bad (truncates on overflow, producing an invalid value): + +```rust +ABC_DURATION.record(started_at.elapsed().as_micros() as u64, &[]); +``` + +βœ… Better (saturates to max value, using `truncate-integer` crate): + +```rust +ABC_DURATION.record(started_at.elapsed().as_micros().shrink(), &[]); +``` + +# Error handling + +End-user applications and services can use a single error type throughout the code. We use `anyhow` for that. Fine-grained error types are often not beneficial in these applications because most of the time you just catch and log the error at a certain level. + +Libraries, on the other hand, often need to return a detailed error to the caller. This is where fine-grained error types come in handy. You should make sure that they are well formed: + +- Error types should implement `Debug`, `Display` and `Error`. When implementing `Error`, the `source()` method should be properly implemented. The easiest way to do these things is the `thiserror` crate and the `#[error]` and `#[source]` attributes it provides. +- If the location of the error can be ambiguous, consider adding a backtrace to it. For custom error types you have to do it manually by storing `std::backtrace::Backtrace` inside the error. + +Other recommendations: + +- The error message should contain relevant context that can help understand the issue. + + β›” Bad: + + ```rust + let timestamp = args.timestamp.try_into()?; + ``` + + βœ… Good (using `anyhow::Context`): + + ```rust + let timestamp = args + .timestamp + .try_into() + .with_context(|| { + format!("invalid timestamp received from Hermes: {:?}", args.timestamp) + })?; + ``` + + β›” Bad: + + ```rust + let exponent = config.feeds.get(feed_id).context("missing feed")?.exponent; + ``` + + βœ… Good: + + ```rust + let exponent = config + .feeds + .get(feed_id) + .with_context(|| { + format!("missing feed {:?} in config", feed_id) + })? + .exponent; + ``` + +- When wrapping another error or converting error type, preserve the error source instead of converting it to `String`. Especially avoid calling `to_string()` on `anyhow::Error` or formatting it with `{}` because it erases context and backtrace information. + + β›” Bad (loses information about source errors and backtrace): + + ```rust + if let Err(err) = parse(data) { + bail!("parsing failed: {err}"); + } + // OR + parse(data).map_err(|err| format!("parsing failed: {err}"))?; + ``` + + β›” Better but still bad (preserves source errors and backtrace, but the error will have two backtraces now): + + ```rust + if let Err(err) = parse(data) { + bail!("parsing failed for {data:?}: {err:?}"); + } + ``` + + βœ… Good (returns an error with proper source and backtrace): + + ```rust + parse(data).with_context(|| format!("failed to parse {data:?}"))?; + ``` + + β›” Bad (loses information about source errors and backtrace): + + ```rust + warn!(%err, ?data, "parsing failed"); + // OR + warn!("parsing failed: {}", err); + ``` + + βœ… Better (returns full information about the error in a structured way): + + ```rust + warn!(?err, ?data, "parsing failed"); + ``` + + +# Other recommendations + +- Avoid writing unsafe code. Unsafe code is hard to get right and is only needed for really low level stuff. +- Prefer default requirements (e.g. `time = "0.1.12"`) when specifying dependencies and to allow semver-compatible upgrades. Avoid using `<=` requirements because they break semver. Never use `*` requirement. +- Avoid using macros if the same result can be achieved without a macro.