diff --git a/src/libstd/ffi/c_str.rs b/src/libstd/ffi/c_str.rs index 65f4e0cafe09e..ddccd44a76001 100644 --- a/src/libstd/ffi/c_str.rs +++ b/src/libstd/ffi/c_str.rs @@ -3,6 +3,7 @@ use crate::borrow::{Cow, Borrow}; use crate::cmp::Ordering; use crate::error::Error; use crate::fmt::{self, Write}; +use crate::hash::{Hasher, Hash}; use crate::io; use crate::mem; use crate::memchr; @@ -134,10 +135,10 @@ pub struct CString { /// in each pair are borrowed references; the latter are owned /// strings. /// -/// Note that this structure is **not** `repr(C)` and is not recommended to be -/// placed in the signatures of FFI functions. Instead, safe wrappers of FFI -/// functions may leverage the unsafe [`from_ptr`] constructor to provide a safe -/// interface to other consumers. +/// Note that this structure is `repr(C)` and can be +/// placed in the signatures of FFI functions. A `&CStr` is equivalent to `*const c_char`, +/// but offers a richer API. If you want to handle null pointers, too, using an +/// `Option<&CStr>` will work. /// /// # Examples /// @@ -193,20 +194,21 @@ pub struct CString { /// [`String`]: ../string/struct.String.html /// [`CString`]: struct.CString.html /// [`from_ptr`]: #method.from_ptr -#[derive(Hash)] #[stable(feature = "rust1", since = "1.0.0")] -// FIXME: -// `fn from` in `impl From<&CStr> for Box` current implementation relies -// on `CStr` being layout-compatible with `[u8]`. -// When attribute privacy is implemented, `CStr` should be annotated as `#[repr(transparent)]`. -// Anyway, `CStr` representation and layout are considered implementation detail, are -// not documented and must not be relied upon. -pub struct CStr { - // FIXME: this should not be represented with a DST slice but rather with - // just a raw `c_char` along with some form of marker to make - // this an unsized type. Essentially `sizeof(&CStr)` should be the - // same as `sizeof(&c_char)` but `CStr` should be an unsized type. - inner: [c_char] +pub struct CStr(CStrExtern); + +extern "C" { + // HACK: workaround for https://github.com/rust-lang/rust/issues/46665 + type CStrExtern; +} + +#[stable(feature = "rust1", since = "1.0.0")] +impl Hash for CStr { + fn hash(&self, state: &mut H) { + // We also hash a trailing zero to make `CStr` and `CString` produce the same hash + // for the same string. + self.to_bytes_with_nul().hash(state); + } } /// An error indicating that an interior nul byte was found. @@ -598,7 +600,11 @@ impl CString { /// ``` #[stable(feature = "into_boxed_c_str", since = "1.20.0")] pub fn into_boxed_c_str(self) -> Box { - unsafe { Box::from_raw(Box::into_raw(self.into_inner()) as *mut CStr) } + let ptr: *mut [u8] = Box::into_raw(self.into_inner()); + // SAFETY: Casting away the length information is fine as `CStr`'s length computation works + // by counting the elements up to the first null. Since this `CString` must be null + // terminated, the `CStr` can properly compute its length + unsafe { Box::from_raw(ptr as *mut CStr) } } /// Bypass "move out of struct which implements [`Drop`] trait" restriction. @@ -933,22 +939,16 @@ impl fmt::Display for IntoStringError { impl CStr { /// Wraps a raw C string with a safe C string wrapper. + /// This operation is a zero cost conversion. /// /// This function will wrap the provided `ptr` with a `CStr` wrapper, which /// allows inspection and interoperation of non-owned C strings. This method /// is unsafe for a number of reasons: /// - /// * There is no guarantee to the validity of `ptr`. - /// * The returned lifetime is not guaranteed to be the actual lifetime of - /// `ptr`. - /// * There is no guarantee that the memory pointed to by `ptr` contains a - /// valid nul terminator byte at the end of the string. - /// * It is not guaranteed that the memory pointed by `ptr` won't change - /// before the `CStr` has been destroyed. - /// - /// > **Note**: This operation is intended to be a 0-cost cast but it is - /// > currently implemented with an up-front calculation of the length of - /// > the string. This is not guaranteed to always be the case. + /// * The pointer must be non-null, + /// * The returned lifetime must be actual lifetime of the memory that `ptr` points to, + /// * The memory pointed to by `ptr` must have a nul terminator byte within the allocation, + /// * The memory pointed by `ptr` must not be modified before the `CStr` has been destroyed. /// /// # Examples /// @@ -969,9 +969,7 @@ impl CStr { /// ``` #[stable(feature = "rust1", since = "1.0.0")] pub unsafe fn from_ptr<'a>(ptr: *const c_char) -> &'a CStr { - let len = sys::strlen(ptr); - let ptr = ptr as *const u8; - CStr::from_bytes_with_nul_unchecked(slice::from_raw_parts(ptr, len as usize + 1)) + &*(ptr as *const CStr) } /// Creates a C string wrapper from a byte slice. @@ -1024,7 +1022,7 @@ impl CStr { /// /// This function will cast the provided `bytes` to a `CStr` wrapper without /// performing any sanity checks. The provided slice **must** be nul-terminated - /// and not contain any interior nul bytes. + /// and its length will be truncated to its first interior nul (if any). /// /// # Examples /// @@ -1094,7 +1092,7 @@ impl CStr { #[inline] #[stable(feature = "rust1", since = "1.0.0")] pub const fn as_ptr(&self) -> *const c_char { - self.inner.as_ptr() + self as *const CStr as *const c_char } /// Converts this C string to a byte slice. @@ -1102,9 +1100,9 @@ impl CStr { /// The returned slice will **not** contain the trailing nul terminator that this C /// string has. /// - /// > **Note**: This method is currently implemented as a constant-time - /// > cast, but it is planned to alter its definition in the future to - /// > perform the length calculation whenever this method is called. + /// > **Note**: This method performs the length calculation whenever this method is called. + /// > Calculating the length of the C string requires iterating through all bytes of the + /// > C string and can thus be an expensive operation. /// /// # Examples /// @@ -1126,9 +1124,9 @@ impl CStr { /// This function is the equivalent of [`to_bytes`] except that it will retain /// the trailing nul terminator instead of chopping it off. /// - /// > **Note**: This method is currently implemented as a 0-cost cast, but - /// > it is planned to alter its definition in the future to perform the - /// > length calculation whenever this method is called. + /// > **Note**: This method performs the length calculation whenever this method is called. + /// > Calculating the length of the C string requires iterating through all bytes of the + /// > C string and can thus be an expensive operation. /// /// [`to_bytes`]: #method.to_bytes /// @@ -1143,7 +1141,13 @@ impl CStr { #[inline] #[stable(feature = "rust1", since = "1.0.0")] pub fn to_bytes_with_nul(&self) -> &[u8] { - unsafe { &*(&self.inner as *const [c_char] as *const [u8]) } + // SAFETY: safe references to `CStr` can only exist if they point to a memory location + // that is terminated by a `\0`. + unsafe { + let len = sys::strlen(self as *const CStr as *const c_char); + let ptr = self as *const CStr as *const u8; + slice::from_raw_parts(ptr, len as usize + 1) + } } /// Yields a [`&str`] slice if the `CStr` contains valid UTF-8. @@ -1152,10 +1156,8 @@ impl CStr { /// function will return the corresponding [`&str`] slice. Otherwise, /// it will return an error with details of where UTF-8 validation failed. /// - /// > **Note**: This method is currently implemented to check for validity - /// > after a constant-time cast, but it is planned to alter its definition - /// > in the future to perform the length calculation in addition to the - /// > UTF-8 check whenever this method is called. + /// > **Note**: This method performs the length calculation in + /// > addition to the UTF-8 check whenever this method is called. /// /// [`&str`]: ../primitive.str.html /// @@ -1169,9 +1171,7 @@ impl CStr { /// ``` #[stable(feature = "cstr_to_str", since = "1.4.0")] pub fn to_str(&self) -> Result<&str, str::Utf8Error> { - // N.B., when `CStr` is changed to perform the length check in `.to_bytes()` - // instead of in `from_ptr()`, it may be worth considering if this should - // be rewritten to do the UTF-8 check inline with the length calculation + // FIXME: this should be rewritten to do the UTF-8 check inline with the length calculation // instead of doing it afterwards. str::from_utf8(self.to_bytes()) } @@ -1185,10 +1185,8 @@ impl CStr { /// [`U+FFFD REPLACEMENT CHARACTER`][U+FFFD] and return a /// [`Cow`]`::`[`Owned`]`(`[`String`]`)` with the result. /// - /// > **Note**: This method is currently implemented to check for validity - /// > after a constant-time cast, but it is planned to alter its definition - /// > in the future to perform the length calculation in addition to the - /// > UTF-8 check whenever this method is called. + /// > **Note**: This method performs the length calculation in + /// > addition to the UTF-8 check whenever this method is called. /// /// [`Cow`]: ../borrow/enum.Cow.html /// [`Borrowed`]: ../borrow/enum.Cow.html#variant.Borrowed @@ -1244,8 +1242,9 @@ impl CStr { /// ``` #[stable(feature = "into_boxed_c_str", since = "1.20.0")] pub fn into_c_string(self: Box) -> CString { - let raw = Box::into_raw(self) as *mut [u8]; - CString { inner: unsafe { Box::from_raw(raw) } } + unsafe { + CString::from_raw(Box::into_raw(self) as *mut CStr as *mut c_char) + } } } diff --git a/src/libstd/lib.rs b/src/libstd/lib.rs index c3882bacf87eb..bdb15be751a1a 100644 --- a/src/libstd/lib.rs +++ b/src/libstd/lib.rs @@ -263,6 +263,7 @@ #![feature(exact_size_is_empty)] #![feature(exhaustive_patterns)] #![feature(external_doc)] +#![feature(extern_types)] #![feature(fn_traits)] #![feature(format_args_nl)] #![feature(generator_trait)]