@@ -367,7 +367,8 @@ impl AsciiChar {
367
367
/// and `Some(AsciiChar::from_ascii_unchecked(128))` might be `None`.
368
368
#[ inline]
369
369
pub unsafe fn from_ascii_unchecked ( ch : u8 ) -> Self {
370
- ch. to_ascii_char_unchecked ( )
370
+ // SAFETY: Caller guarantees `ch` is within bounds of ascii.
371
+ unsafe { ch. to_ascii_char_unchecked ( ) }
371
372
}
372
373
373
374
/// Converts an ASCII character into a `u8`.
@@ -628,12 +629,20 @@ impl AsciiChar {
628
629
/// assert_eq!(AsciiChar::new('p').as_printable_char(), 'p');
629
630
/// ```
630
631
pub fn as_printable_char ( self ) -> char {
631
- unsafe {
632
- match self as u8 {
633
- b' ' ..=b'~' => self . as_char ( ) ,
634
- 127 => '␡' ,
635
- _ => char:: from_u32_unchecked ( self as u32 + '␀' as u32 ) ,
636
- }
632
+ match self as u8 {
633
+ // Non printable characters
634
+ // SAFETY: From codepoint 0x2400 ('␀') to 0x241f (`␟`), there are characters representing
635
+ // the unprintable characters from 0x0 to 0x1f, ordered correctly.
636
+ // As `b` is guaranteed to be within 0x0 to 0x1f, the conversion represents a
637
+ // valid character.
638
+ b @ 0x0 ..=0x1f => unsafe { char:: from_u32_unchecked ( u32:: from ( '␀' ) + u32:: from ( b) ) } ,
639
+
640
+ // 0x7f (delete) has it's own character at codepoint 0x2420, not 0x247f, so it is special
641
+ // cased to return it's character
642
+ 0x7f => '␡' ,
643
+
644
+ // All other characters are printable, and per function contract use `Self::as_char`
645
+ _ => self . as_char ( ) ,
637
646
}
638
647
}
639
648
@@ -781,17 +790,27 @@ impl Error for ToAsciiCharError {
781
790
782
791
/// Convert `char`, `u8` and other character types to `AsciiChar`.
783
792
pub trait ToAsciiChar {
784
- /// Convert to `AsciiChar` without checking that it is an ASCII character.
785
- unsafe fn to_ascii_char_unchecked ( self ) -> AsciiChar ;
786
793
/// Convert to `AsciiChar`.
787
794
fn to_ascii_char ( self ) -> Result < AsciiChar , ToAsciiCharError > ;
795
+
796
+ /// Convert to `AsciiChar` without checking that it is an ASCII character.
797
+ ///
798
+ /// # Safety
799
+ /// Calling this function with a value outside of the ascii range, `0x0` to `0x7f` inclusive,
800
+ /// is undefined behavior.
801
+ // TODO: Make sure this is the contract we want to express in this function.
802
+ // It is ambigous if numbers such as `0xffffff20_u32` are valid ascii characters,
803
+ // as this function returns `Ascii::Space` due to the cast to `u8`, even though
804
+ // `to_ascii_char` returns `Err()`.
805
+ unsafe fn to_ascii_char_unchecked ( self ) -> AsciiChar ;
788
806
}
789
807
790
808
impl ToAsciiChar for AsciiChar {
791
809
#[ inline]
792
810
fn to_ascii_char ( self ) -> Result < AsciiChar , ToAsciiCharError > {
793
811
Ok ( self )
794
812
}
813
+
795
814
#[ inline]
796
815
unsafe fn to_ascii_char_unchecked ( self ) -> AsciiChar {
797
816
self
@@ -805,44 +824,56 @@ impl ToAsciiChar for u8 {
805
824
}
806
825
#[ inline]
807
826
unsafe fn to_ascii_char_unchecked ( self ) -> AsciiChar {
808
- mem:: transmute ( self )
827
+ // SAFETY: Caller guarantees `self` is within bounds of the enum
828
+ // variants, so this cast successfully produces a valid ascii
829
+ // variant
830
+ unsafe { mem:: transmute :: < u8 , AsciiChar > ( self ) }
809
831
}
810
832
}
811
833
834
+ // Note: Casts to `u8` here does not cause problems, as the negative
835
+ // range is mapped outside of ascii bounds.
812
836
impl ToAsciiChar for i8 {
813
837
#[ inline]
814
838
fn to_ascii_char ( self ) -> Result < AsciiChar , ToAsciiCharError > {
815
- ( self as u32 ) . to_ascii_char ( )
839
+ u32 :: from ( self as u8 ) . to_ascii_char ( )
816
840
}
817
841
#[ inline]
818
842
unsafe fn to_ascii_char_unchecked ( self ) -> AsciiChar {
819
- mem:: transmute ( self )
843
+ // SAFETY: Caller guarantees `self` is within bounds of the enum
844
+ // variants, so this cast successfully produces a valid ascii
845
+ // variant
846
+ unsafe { mem:: transmute :: < u8 , AsciiChar > ( self as u8 ) }
820
847
}
821
848
}
822
849
823
850
impl ToAsciiChar for char {
824
851
#[ inline]
825
852
fn to_ascii_char ( self ) -> Result < AsciiChar , ToAsciiCharError > {
826
- ( self as u32 ) . to_ascii_char ( )
853
+ u32 :: from ( self ) . to_ascii_char ( )
827
854
}
828
855
#[ inline]
829
856
unsafe fn to_ascii_char_unchecked ( self ) -> AsciiChar {
830
- ( self as u32 ) . to_ascii_char_unchecked ( )
857
+ // SAFETY: Caller guarantees we're within ascii range.
858
+ unsafe { u32:: from ( self ) . to_ascii_char_unchecked ( ) }
831
859
}
832
860
}
833
861
834
862
impl ToAsciiChar for u32 {
835
863
fn to_ascii_char ( self ) -> Result < AsciiChar , ToAsciiCharError > {
836
- unsafe {
837
- match self {
838
- 0 ..=127 => Ok ( self . to_ascii_char_unchecked ( ) ) ,
839
- _ => Err ( ToAsciiCharError ( ( ) ) ) ,
840
- }
864
+ match self {
865
+ // SAFETY: We're within the valid ascii range in this branch.
866
+ 0x0 ..=0x7f => Ok ( unsafe { self . to_ascii_char_unchecked ( ) } ) ,
867
+ _ => Err ( ToAsciiCharError ( ( ) ) ) ,
841
868
}
842
869
}
870
+
843
871
#[ inline]
844
872
unsafe fn to_ascii_char_unchecked ( self ) -> AsciiChar {
845
- ( self as u8 ) . to_ascii_char_unchecked ( )
873
+ // Note: This cast discards the top bytes, this may cause problems, see
874
+ // the TODO on this method's documentation in the trait.
875
+ // SAFETY: Caller guarantees we're within ascii range.
876
+ unsafe { ( self as u8 ) . to_ascii_char_unchecked ( ) }
846
877
}
847
878
}
848
879
@@ -852,7 +883,10 @@ impl ToAsciiChar for u16 {
852
883
}
853
884
#[ inline]
854
885
unsafe fn to_ascii_char_unchecked ( self ) -> AsciiChar {
855
- ( self as u8 ) . to_ascii_char_unchecked ( )
886
+ // Note: This cast discards the top bytes, this may cause problems, see
887
+ // the TODO on this method's documentation in the trait.
888
+ // SAFETY: Caller guarantees we're within ascii range.
889
+ unsafe { ( self as u8 ) . to_ascii_char_unchecked ( ) }
856
890
}
857
891
}
858
892
0 commit comments