From a4daa63a90f46e3c6e36efe5e0743eab09f6f12b Mon Sep 17 00:00:00 2001 From: Tyson Nottingham Date: Wed, 16 Dec 2020 19:03:31 -0800 Subject: [PATCH 1/3] rustc_serialize: specialize opaque encoding of some u8 sequences --- compiler/rustc_macros/src/serialize.rs | 4 ++-- .../src/ty/query/on_disk_cache.rs | 10 ++++++++ .../rustc_serialize/src/collection_impls.rs | 24 +++++-------------- compiler/rustc_serialize/src/lib.rs | 1 + compiler/rustc_serialize/src/opaque.rs | 12 ++++++++++ compiler/rustc_serialize/src/serialize.rs | 2 +- 6 files changed, 32 insertions(+), 21 deletions(-) diff --git a/compiler/rustc_macros/src/serialize.rs b/compiler/rustc_macros/src/serialize.rs index dbeb3c755044f..72bd4804e98c0 100644 --- a/compiler/rustc_macros/src/serialize.rs +++ b/compiler/rustc_macros/src/serialize.rs @@ -203,7 +203,7 @@ fn encodable_body( #field_name, #field_idx, |__encoder| - ::rustc_serialize::Encodable::encode(#bind_ident, __encoder), + ::rustc_serialize::Encodable::<#encoder_ty>::encode(#bind_ident, __encoder), ) { ::std::result::Result::Ok(()) => (), ::std::result::Result::Err(__err) @@ -237,7 +237,7 @@ fn encodable_body( __encoder, #field_idx, |__encoder| - ::rustc_serialize::Encodable::encode(#bind_ident, __encoder), + ::rustc_serialize::Encodable::<#encoder_ty>::encode(#bind_ident, __encoder), ) { ::std::result::Result::Ok(()) => (), ::std::result::Result::Err(__err) diff --git a/compiler/rustc_middle/src/ty/query/on_disk_cache.rs b/compiler/rustc_middle/src/ty/query/on_disk_cache.rs index 8a1165bbd647a..4aa4ff72eb29d 100644 --- a/compiler/rustc_middle/src/ty/query/on_disk_cache.rs +++ b/compiler/rustc_middle/src/ty/query/on_disk_cache.rs @@ -1149,6 +1149,16 @@ where } } +// This ensures that the `Encodable::encode` specialization for byte slices +// is used when a `CacheEncoder` having an `opaque::Encoder` is passed to `Encodable::encode`. +// Unfortunately, we have to manually opt into specializations this way, given how `CacheEncoder` +// and the encoding traits currently work. +impl<'a, 'tcx> Encodable> for [u8] { + fn encode(&self, e: &mut CacheEncoder<'a, 'tcx, opaque::Encoder>) -> opaque::EncodeResult { + self.encode(e.encoder) + } +} + // An integer that will always encode to 8 bytes. struct IntEncodedWithFixedSize(u64); diff --git a/compiler/rustc_serialize/src/collection_impls.rs b/compiler/rustc_serialize/src/collection_impls.rs index 3d274cb01507b..57082da29f2eb 100644 --- a/compiler/rustc_serialize/src/collection_impls.rs +++ b/compiler/rustc_serialize/src/collection_impls.rs @@ -11,12 +11,8 @@ use smallvec::{Array, SmallVec}; impl>> Encodable for SmallVec { fn encode(&self, s: &mut S) -> Result<(), S::Error> { - s.emit_seq(self.len(), |s| { - for (i, e) in self.iter().enumerate() { - s.emit_seq_elt(i, |s| e.encode(s))?; - } - Ok(()) - }) + let slice: &[A::Item] = self; + slice.encode(s) } } @@ -292,12 +288,8 @@ where impl> Encodable for Rc<[T]> { fn encode(&self, s: &mut E) -> Result<(), E::Error> { - s.emit_seq(self.len(), |s| { - for (index, e) in self.iter().enumerate() { - s.emit_seq_elt(index, |s| e.encode(s))?; - } - Ok(()) - }) + let slice: &[T] = self; + slice.encode(s) } } @@ -315,12 +307,8 @@ impl> Decodable for Rc<[T]> { impl> Encodable for Arc<[T]> { fn encode(&self, s: &mut E) -> Result<(), E::Error> { - s.emit_seq(self.len(), |s| { - for (index, e) in self.iter().enumerate() { - s.emit_seq_elt(index, |s| e.encode(s))?; - } - Ok(()) - }) + let slice: &[T] = self; + slice.encode(s) } } diff --git a/compiler/rustc_serialize/src/lib.rs b/compiler/rustc_serialize/src/lib.rs index ac1cdc6ad45f2..0e7974afff377 100644 --- a/compiler/rustc_serialize/src/lib.rs +++ b/compiler/rustc_serialize/src/lib.rs @@ -14,6 +14,7 @@ Core encoding and decoding interfaces. #![feature(nll)] #![feature(associated_type_bounds)] #![cfg_attr(bootstrap, feature(min_const_generics))] +#![feature(min_specialization)] #![cfg_attr(test, feature(test))] #![allow(rustc::internal)] diff --git a/compiler/rustc_serialize/src/opaque.rs b/compiler/rustc_serialize/src/opaque.rs index 8b79c93e7605b..a41b01f453ee3 100644 --- a/compiler/rustc_serialize/src/opaque.rs +++ b/compiler/rustc_serialize/src/opaque.rs @@ -316,3 +316,15 @@ impl<'a> serialize::Decoder for Decoder<'a> { err.to_string() } } + +// Specialize encoding byte slices. The default implementation for slices encodes and emits each +// element individually. This isn't necessary for `u8` slices encoded with an `opaque::Encoder`, +// because each `u8` is emitted as-is. Therefore, we can use a more efficient implementation. This +// specialization applies to encoding `Vec`s, etc., since they call `encode` on their slices. +impl serialize::Encodable for [u8] { + fn encode(&self, e: &mut Encoder) -> EncodeResult { + serialize::Encoder::emit_usize(e, self.len())?; + e.emit_raw_bytes(self); + Ok(()) + } +} diff --git a/compiler/rustc_serialize/src/serialize.rs b/compiler/rustc_serialize/src/serialize.rs index aa305f3c7fc3f..ee8ab0e9e4056 100644 --- a/compiler/rustc_serialize/src/serialize.rs +++ b/compiler/rustc_serialize/src/serialize.rs @@ -527,7 +527,7 @@ impl> Decodable for Rc { } impl> Encodable for [T] { - fn encode(&self, s: &mut S) -> Result<(), S::Error> { + default fn encode(&self, s: &mut S) -> Result<(), S::Error> { s.emit_seq(self.len(), |s| { for (i, e) in self.iter().enumerate() { s.emit_seq_elt(i, |s| e.encode(s))? From 7c6274d464d729faa9bab45086df847d5374431b Mon Sep 17 00:00:00 2001 From: Tyson Nottingham Date: Wed, 16 Dec 2020 19:46:19 -0800 Subject: [PATCH 2/3] rustc_serialize: have read_raw_bytes take MaybeUninit slice --- .../rustc_data_structures/src/fingerprint.rs | 4 ++-- compiler/rustc_serialize/src/opaque.rs | 16 +++++++++++++--- 2 files changed, 15 insertions(+), 5 deletions(-) diff --git a/compiler/rustc_data_structures/src/fingerprint.rs b/compiler/rustc_data_structures/src/fingerprint.rs index 01efcaf6f448d..8afe94ac8dba8 100644 --- a/compiler/rustc_data_structures/src/fingerprint.rs +++ b/compiler/rustc_data_structures/src/fingerprint.rs @@ -4,7 +4,7 @@ use rustc_serialize::{ Decodable, Encodable, }; use std::hash::{Hash, Hasher}; -use std::mem; +use std::mem::{self, MaybeUninit}; #[derive(Eq, PartialEq, Ord, PartialOrd, Debug, Clone, Copy)] pub struct Fingerprint(u64, u64); @@ -61,7 +61,7 @@ impl Fingerprint { } pub fn decode_opaque(decoder: &mut opaque::Decoder<'_>) -> Result { - let mut bytes = [0; 16]; + let mut bytes: [MaybeUninit; 16] = MaybeUninit::uninit_array(); decoder.read_raw_bytes(&mut bytes)?; diff --git a/compiler/rustc_serialize/src/opaque.rs b/compiler/rustc_serialize/src/opaque.rs index a41b01f453ee3..5ef1c7241deba 100644 --- a/compiler/rustc_serialize/src/opaque.rs +++ b/compiler/rustc_serialize/src/opaque.rs @@ -1,6 +1,8 @@ use crate::leb128::{self, read_signed_leb128, write_signed_leb128}; use crate::serialize; use std::borrow::Cow; +use std::mem::MaybeUninit; +use std::ptr; // ----------------------------------------------------------------------------- // Encoder @@ -179,11 +181,19 @@ impl<'a> Decoder<'a> { } #[inline] - pub fn read_raw_bytes(&mut self, s: &mut [u8]) -> Result<(), String> { + pub fn read_raw_bytes(&mut self, s: &mut [MaybeUninit]) -> Result<(), String> { let start = self.position; let end = start + s.len(); - - s.copy_from_slice(&self.data[start..end]); + assert!(end <= self.data.len()); + + // SAFETY: Both `src` and `dst` point to at least `s.len()` elements: + // `src` points to at least `s.len()` elements by above assert, and + // `dst` points to `s.len()` elements by derivation from `s`. + unsafe { + let src = self.data.as_ptr().add(start); + let dst = s.as_mut_ptr() as *mut u8; + ptr::copy_nonoverlapping(src, dst, s.len()); + } self.position = end; From be79f493fb310b3a6b01ceada32713813bb12a91 Mon Sep 17 00:00:00 2001 From: Tyson Nottingham Date: Wed, 16 Dec 2020 21:03:45 -0800 Subject: [PATCH 3/3] rustc_serialize: specialize opaque decoding of some u8 sequences --- .../src/ty/query/on_disk_cache.rs | 9 ++++++ .../rustc_serialize/src/collection_impls.rs | 18 +++--------- compiler/rustc_serialize/src/lib.rs | 1 + compiler/rustc_serialize/src/opaque.rs | 29 ++++++++++++++++--- compiler/rustc_serialize/src/serialize.rs | 11 ++----- 5 files changed, 42 insertions(+), 26 deletions(-) diff --git a/compiler/rustc_middle/src/ty/query/on_disk_cache.rs b/compiler/rustc_middle/src/ty/query/on_disk_cache.rs index 4aa4ff72eb29d..9b40c9a7ed88a 100644 --- a/compiler/rustc_middle/src/ty/query/on_disk_cache.rs +++ b/compiler/rustc_middle/src/ty/query/on_disk_cache.rs @@ -807,6 +807,15 @@ impl<'a, 'tcx> TyDecoder<'tcx> for CacheDecoder<'a, 'tcx> { crate::implement_ty_decoder!(CacheDecoder<'a, 'tcx>); +// This ensures that the `Decodable::decode` specialization for `Vec` is used +// when a `CacheDecoder` is passed to `Decodable::decode`. Unfortunately, we have to manually opt +// into specializations this way, given how `CacheDecoder` and the decoding traits currently work. +impl<'a, 'tcx> Decodable> for Vec { + fn decode(d: &mut CacheDecoder<'a, 'tcx>) -> Result { + Decodable::decode(&mut d.opaque) + } +} + impl<'a, 'tcx> Decodable> for SyntaxContext { fn decode(decoder: &mut CacheDecoder<'a, 'tcx>) -> Result { let syntax_contexts = decoder.syntax_contexts; diff --git a/compiler/rustc_serialize/src/collection_impls.rs b/compiler/rustc_serialize/src/collection_impls.rs index 57082da29f2eb..ae6d27e037b2d 100644 --- a/compiler/rustc_serialize/src/collection_impls.rs +++ b/compiler/rustc_serialize/src/collection_impls.rs @@ -295,13 +295,8 @@ impl> Encodable for Rc<[T]> { impl> Decodable for Rc<[T]> { fn decode(d: &mut D) -> Result, D::Error> { - d.read_seq(|d, len| { - let mut vec = Vec::with_capacity(len); - for index in 0..len { - vec.push(d.read_seq_elt(index, |d| Decodable::decode(d))?); - } - Ok(vec.into()) - }) + let vec: Vec = Decodable::decode(d)?; + Ok(vec.into()) } } @@ -314,12 +309,7 @@ impl> Encodable for Arc<[T]> { impl> Decodable for Arc<[T]> { fn decode(d: &mut D) -> Result, D::Error> { - d.read_seq(|d, len| { - let mut vec = Vec::with_capacity(len); - for index in 0..len { - vec.push(d.read_seq_elt(index, |d| Decodable::decode(d))?); - } - Ok(vec.into()) - }) + let vec: Vec = Decodable::decode(d)?; + Ok(vec.into()) } } diff --git a/compiler/rustc_serialize/src/lib.rs b/compiler/rustc_serialize/src/lib.rs index 0e7974afff377..f58ed14d9971e 100644 --- a/compiler/rustc_serialize/src/lib.rs +++ b/compiler/rustc_serialize/src/lib.rs @@ -15,6 +15,7 @@ Core encoding and decoding interfaces. #![feature(associated_type_bounds)] #![cfg_attr(bootstrap, feature(min_const_generics))] #![feature(min_specialization)] +#![feature(vec_spare_capacity)] #![cfg_attr(test, feature(test))] #![allow(rustc::internal)] diff --git a/compiler/rustc_serialize/src/opaque.rs b/compiler/rustc_serialize/src/opaque.rs index 5ef1c7241deba..673742df7f0dc 100644 --- a/compiler/rustc_serialize/src/opaque.rs +++ b/compiler/rustc_serialize/src/opaque.rs @@ -327,10 +327,13 @@ impl<'a> serialize::Decoder for Decoder<'a> { } } -// Specialize encoding byte slices. The default implementation for slices encodes and emits each -// element individually. This isn't necessary for `u8` slices encoded with an `opaque::Encoder`, -// because each `u8` is emitted as-is. Therefore, we can use a more efficient implementation. This -// specialization applies to encoding `Vec`s, etc., since they call `encode` on their slices. +// Specializations for contiguous byte sequences follow. The default implementations for slices +// encode and decode each element individually. This isn't necessary for `u8` slices when using +// opaque encoders and decoders, because each `u8` is unchanged by encoding and decoding. +// Therefore, we can use more efficient implementations that process the entire sequence at once. + +// Specialize encoding byte slices. This specialization also applies to encoding `Vec`s, etc., +// since the default implementations call `encode` on their slices internally. impl serialize::Encodable for [u8] { fn encode(&self, e: &mut Encoder) -> EncodeResult { serialize::Encoder::emit_usize(e, self.len())?; @@ -338,3 +341,21 @@ impl serialize::Encodable for [u8] { Ok(()) } } + +// Specialize decoding `Vec`. This specialization also applies to decoding `Box<[u8]>`s, etc., +// since the default implementations call `decode` to produce a `Vec` internally. +impl<'a> serialize::Decodable> for Vec { + fn decode(d: &mut Decoder<'a>) -> Result { + let len = serialize::Decoder::read_usize(d)?; + + let mut v = Vec::with_capacity(len); + let buf = &mut v.spare_capacity_mut()[..len]; + d.read_raw_bytes(buf)?; + + unsafe { + v.set_len(len); + } + + Ok(v) + } +} diff --git a/compiler/rustc_serialize/src/serialize.rs b/compiler/rustc_serialize/src/serialize.rs index ee8ab0e9e4056..47aad5b88c622 100644 --- a/compiler/rustc_serialize/src/serialize.rs +++ b/compiler/rustc_serialize/src/serialize.rs @@ -545,7 +545,7 @@ impl> Encodable for Vec { } impl> Decodable for Vec { - fn decode(d: &mut D) -> Result, D::Error> { + default fn decode(d: &mut D) -> Result, D::Error> { d.read_seq(|d, len| { let mut v = Vec::with_capacity(len); for i in 0..len { @@ -591,13 +591,8 @@ where [T]: ToOwned>, { fn decode(d: &mut D) -> Result, D::Error> { - d.read_seq(|d, len| { - let mut v = Vec::with_capacity(len); - for i in 0..len { - v.push(d.read_seq_elt(i, |d| Decodable::decode(d))?); - } - Ok(Cow::Owned(v)) - }) + let v: Vec = Decodable::decode(d)?; + Ok(Cow::Owned(v)) } }