From d6b42c2463ddfc7d16b9199f7725e704c94c2acf Mon Sep 17 00:00:00 2001
From: root <root@localhost>
Date: Thu, 17 Jul 2014 18:03:34 +0200
Subject: [PATCH 1/6] str: Add better tests for string slice's Chars iterator

Test using for ch s.chars() { black_box(ch) } to have a test that should
force the iterator to run its full decoding computations.
---
 src/libcollections/str.rs | 29 +++++++++++++++++++++++------
 1 file changed, 23 insertions(+), 6 deletions(-)

diff --git a/src/libcollections/str.rs b/src/libcollections/str.rs
index 85921f1176a3..f1bb3b408041 100644
--- a/src/libcollections/str.rs
+++ b/src/libcollections/str.rs
@@ -2240,16 +2240,26 @@ mod tests {
 #[cfg(test)]
 mod bench {
     use test::Bencher;
+    use test::black_box;
     use super::*;
+    use std::option::{None, Some};
     use std::iter::{Iterator, DoubleEndedIterator};
     use std::collections::Collection;
 
     #[bench]
     fn char_iterator(b: &mut Bencher) {
         let s = "ศไทย中华Việt Nam; Mary had a little lamb, Little lamb";
-        let len = s.char_len();
 
-        b.iter(|| assert_eq!(s.chars().count(), len));
+        b.iter(|| s.chars().count());
+    }
+
+    #[bench]
+    fn char_iterator_for(b: &mut Bencher) {
+        let s = "ศไทย中华Việt Nam; Mary had a little lamb, Little lamb";
+
+        b.iter(|| {
+            for ch in s.chars() { black_box(ch) }
+        });
     }
 
     #[bench]
@@ -2260,17 +2270,24 @@ mod bench {
         Mary had a little lamb, Little lamb
         Mary had a little lamb, Little lamb
         Mary had a little lamb, Little lamb";
-        let len = s.char_len();
 
-        b.iter(|| assert_eq!(s.chars().count(), len));
+        b.iter(|| s.chars().count());
     }
 
     #[bench]
     fn char_iterator_rev(b: &mut Bencher) {
         let s = "ศไทย中华Việt Nam; Mary had a little lamb, Little lamb";
-        let len = s.char_len();
 
-        b.iter(|| assert_eq!(s.chars().rev().count(), len));
+        b.iter(|| s.chars().rev().count());
+    }
+
+    #[bench]
+    fn char_iterator_rev_for(b: &mut Bencher) {
+        let s = "ศไทย中华Việt Nam; Mary had a little lamb, Little lamb";
+
+        b.iter(|| {
+            for ch in s.chars().rev() { black_box(ch) }
+        });
     }
 
     #[bench]

From 42357d772b8a3a1ce4395deeac0a5cf1f66e951d Mon Sep 17 00:00:00 2001
From: root <root@localhost>
Date: Thu, 17 Jul 2014 19:34:07 +0200
Subject: [PATCH 2/6] core::str: Implement Chars iterator using slice::Items

Re-use the vector iterator to implement the chars iterator.

The iterator uses our guarantee that the string contains valid UTF-8,
but its only unsafe code is transmuting the decoded u32 into char.
---
 src/libcore/str.rs | 158 ++++++++++++++++++++++++++++++++-------------
 1 file changed, 114 insertions(+), 44 deletions(-)

diff --git a/src/libcore/str.rs b/src/libcore/str.rs
index aa2050dacf1a..9711d2e3bcc1 100644
--- a/src/libcore/str.rs
+++ b/src/libcore/str.rs
@@ -97,47 +97,121 @@ impl<'a> CharEq for &'a [char] {
 Section: Iterators
 */
 
-/// External iterator for a string's characters.
-/// Use with the `std::iter` module.
+/// Iterator for the char (representing *Unicode Scalar Values*) of a string
+///
+/// Created with the method `.chars()`.
 #[deriving(Clone)]
 pub struct Chars<'a> {
-    /// The slice remaining to be iterated
-    string: &'a str,
+    iter: slice::Items<'a, u8>
+}
+
+// Return the initial codepoint accumulator for the first byte.
+// The first byte is special, only want bottom 5 bits for width 2, 4 bits
+// for width 3, and 3 bits for width 4
+macro_rules! utf8_first_byte(
+    ($byte:expr, $width:expr) => (($byte & (0x7F >> $width)) as u32)
+)
+
+// return the value of $ch updated with continuation byte $byte
+macro_rules! utf8_acc_cont_byte(
+    ($ch:expr, $byte:expr) => (($ch << 6) | ($byte & 63u8) as u32)
+)
+
+macro_rules! utf8_is_cont_byte(
+    ($byte:expr) => (($byte & 192u8) == 128)
+)
+
+#[inline]
+fn unwrap_or_0(opt: Option<&u8>) -> u8 {
+    match opt {
+        Some(&byte) => byte,
+        None => 0,
+    }
 }
 
 impl<'a> Iterator<char> for Chars<'a> {
     #[inline]
     fn next(&mut self) -> Option<char> {
-        // Decode the next codepoint, then update
-        // the slice to be just the remaining part
-        if self.string.len() != 0 {
-            let CharRange {ch, next} = self.string.char_range_at(0);
+        // Decode UTF-8, using the valid UTF-8 invariant
+        #[inline]
+        fn decode_multibyte<'a>(x: u8, it: &mut slice::Items<'a, u8>) -> char {
+            // NOTE: Performance is very sensitive to the exact formulation here
+            // Decode from a byte combination out of: [[[x y] z] w]
+            let cont_mask = 0x3F; // continuation byte mask
+            let init = utf8_first_byte!(x, 2);
+            let y = unwrap_or_0(it.next());
+            let mut ch = utf8_acc_cont_byte!(init, y);
+            if x >= 0xE0 {
+                /* [[x y z] w] case */
+                let z = unwrap_or_0(it.next());
+
+                let y_z = (((y & cont_mask) as u32) << 6) | (z & cont_mask) as u32;
+                ch = init << 12 | y_z;
+                if x >= 0xF0 {
+                    /* [x y z w] case */
+                    let w = unwrap_or_0(it.next());
+                    ch = (init & 7) << 18 | y_z << 6 | (w & cont_mask) as u32;
+                }
+            }
             unsafe {
-                self.string = raw::slice_unchecked(self.string, next, self.string.len());
+                mem::transmute(ch)
+            }
+        }
+
+        match self.iter.next() {
+            None => None,
+            Some(&next_byte) => {
+                if next_byte < 128 {
+                    Some(next_byte as char)
+                } else {
+                    Some(decode_multibyte(next_byte, &mut self.iter))
+                }
             }
-            Some(ch)
-        } else {
-            None
         }
     }
 
     #[inline]
     fn size_hint(&self) -> (uint, Option<uint>) {
-        (self.string.len().saturating_add(3)/4, Some(self.string.len()))
+        let (len, _) = self.iter.size_hint();
+        (len.saturating_add(3) / 4, Some(len))
     }
 }
 
 impl<'a> DoubleEndedIterator<char> for Chars<'a> {
     #[inline]
     fn next_back(&mut self) -> Option<char> {
-        if self.string.len() != 0 {
-            let CharRange {ch, next} = self.string.char_range_at_reverse(self.string.len());
+        #[inline]
+        fn decode_multibyte_back<'a>(w: u8, it: &mut slice::Items<'a, u8>) -> char {
+            // Decode from a byte combination out of: [x [y [z w]]]
+            let mut ch;
+            let z = unwrap_or_0(it.next_back());
+            ch = utf8_first_byte!(z, 2);
+            if utf8_is_cont_byte!(z) {
+                let y = unwrap_or_0(it.next_back());
+                ch = utf8_first_byte!(y, 3);
+                if utf8_is_cont_byte!(y) {
+                    let x = unwrap_or_0(it.next_back());
+                    ch = utf8_first_byte!(x, 4);
+                    ch = utf8_acc_cont_byte!(ch, y);
+                }
+                ch = utf8_acc_cont_byte!(ch, z);
+            }
+            ch = utf8_acc_cont_byte!(ch, w);
+
             unsafe {
-                self.string = raw::slice_unchecked(self.string, 0, next);
+                mem::transmute(ch)
+            }
+        }
+
+        match self.iter.next_back() {
+            None => None,
+            Some(&back_byte) => {
+                if back_byte < 128 {
+                    Some(back_byte as char)
+                } else {
+                    Some(decode_multibyte_back(back_byte, &mut self.iter))
+                }
             }
-            Some(ch)
-        } else {
-            None
         }
     }
 }
@@ -146,18 +220,23 @@ impl<'a> DoubleEndedIterator<char> for Chars<'a> {
 /// Use with the `std::iter` module.
 #[deriving(Clone)]
 pub struct CharOffsets<'a> {
-    /// The original string to be iterated
-    string: &'a str,
+    front: uint,
+    back: uint,
     iter: Chars<'a>,
 }
 
 impl<'a> Iterator<(uint, char)> for CharOffsets<'a> {
     #[inline]
     fn next(&mut self) -> Option<(uint, char)> {
-        // Compute the byte offset by using the pointer offset between
-        // the original string slice and the iterator's remaining part
-        let offset = self.iter.string.as_ptr() as uint - self.string.as_ptr() as uint;
-        self.iter.next().map(|ch| (offset, ch))
+        match self.iter.next() {
+            None => None,
+            Some(ch) => {
+                let index = self.front;
+                let (len, _) = self.iter.iter.size_hint();
+                self.front += self.back - self.front - len;
+                Some((index, ch))
+            }
+        }
     }
 
     #[inline]
@@ -169,11 +248,14 @@ impl<'a> Iterator<(uint, char)> for CharOffsets<'a> {
 impl<'a> DoubleEndedIterator<(uint, char)> for CharOffsets<'a> {
     #[inline]
     fn next_back(&mut self) -> Option<(uint, char)> {
-        self.iter.next_back().map(|ch| {
-            let offset = self.iter.string.len() +
-                    self.iter.string.as_ptr() as uint - self.string.as_ptr() as uint;
-            (offset, ch)
-        })
+        match self.iter.next_back() {
+            None => None,
+            Some(ch) => {
+                let (len, _) = self.iter.iter.size_hint();
+                self.back -= self.back - self.front - len;
+                Some((self.back, ch))
+            }
+        }
     }
 }
 
@@ -880,18 +962,6 @@ pub struct CharRange {
     pub next: uint,
 }
 
-// Return the initial codepoint accumulator for the first byte.
-// The first byte is special, only want bottom 5 bits for width 2, 4 bits
-// for width 3, and 3 bits for width 4
-macro_rules! utf8_first_byte(
-    ($byte:expr, $width:expr) => (($byte & (0x7F >> $width)) as u32)
-)
-
-// return the value of $ch updated with continuation byte $byte
-macro_rules! utf8_acc_cont_byte(
-    ($ch:expr, $byte:expr) => (($ch << 6) | ($byte & 63u8) as u32)
-)
-
 static TAG_CONT_U8: u8 = 128u8;
 
 /// Unsafe operations
@@ -1608,7 +1678,7 @@ impl<'a> StrSlice<'a> for &'a str {
 
     #[inline]
     fn chars(&self) -> Chars<'a> {
-        Chars{string: *self}
+        Chars{iter: self.as_bytes().iter()}
     }
 
     #[inline]
@@ -1618,7 +1688,7 @@ impl<'a> StrSlice<'a> for &'a str {
 
     #[inline]
     fn char_indices(&self) -> CharOffsets<'a> {
-        CharOffsets{string: *self, iter: self.chars()}
+        CharOffsets{front: 0, back: self.len(), iter: self.chars()}
     }
 
     #[inline]

From 9e59c7626381eff9ee9110091b68a079b8a5b7d8 Mon Sep 17 00:00:00 2001
From: root <root@localhost>
Date: Fri, 18 Jul 2014 00:59:09 +0200
Subject: [PATCH 3/6] Add more tests for str Chars iterator

Test iterating (decoding) every codepoint.
---
 src/libcollections/str.rs | 25 +++++++++++++++++++++++++
 1 file changed, 25 insertions(+)

diff --git a/src/libcollections/str.rs b/src/libcollections/str.rs
index f1bb3b408041..b0c8f17eecc7 100644
--- a/src/libcollections/str.rs
+++ b/src/libcollections/str.rs
@@ -808,6 +808,7 @@ impl OwnedStr for String {
 #[cfg(test)]
 mod tests {
     use std::iter::AdditiveIterator;
+    use std::iter::range;
     use std::default::Default;
     use std::char::Char;
     use std::clone::Clone;
@@ -1610,6 +1611,30 @@ mod tests {
         assert_eq!(pos, v.len());
     }
 
+    #[test]
+    fn test_chars_decoding() {
+        let mut bytes = [0u8, ..4];
+        for c in range(0u32, 0x110000).filter_map(|c| ::core::char::from_u32(c)) {
+            let len = c.encode_utf8(bytes);
+            let s = ::core::str::from_utf8(bytes.slice_to(len)).unwrap();
+            if Some(c) != s.chars().next() {
+                fail!("character {:x}={} does not decode correctly", c as u32, c);
+            }
+        }
+    }
+
+    #[test]
+    fn test_chars_rev_decoding() {
+        let mut bytes = [0u8, ..4];
+        for c in range(0u32, 0x110000).filter_map(|c| ::core::char::from_u32(c)) {
+            let len = c.encode_utf8(bytes);
+            let s = ::core::str::from_utf8(bytes.slice_to(len)).unwrap();
+            if Some(c) != s.chars().rev().next() {
+                fail!("character {:x}={} does not decode correctly", c as u32, c);
+            }
+        }
+    }
+
     #[test]
     fn test_iterator_clone() {
         let s = "ศไทย中华Việt Nam";

From bbb299ad9840d02c52eefbd9989b5b18b51a7b8d Mon Sep 17 00:00:00 2001
From: root <root@localhost>
Date: Fri, 18 Jul 2014 00:59:49 +0200
Subject: [PATCH 4/6] Clarify str Chars iterator implementation

Thanks to comments from @huonw, clarify decoding details and use
statics for important constants for UTF-8 decoding. Convert some magic
numbers scattered in the same file to use the statics too.
---
 src/libcore/str.rs | 29 ++++++++++++++++-------------
 1 file changed, 16 insertions(+), 13 deletions(-)

diff --git a/src/libcore/str.rs b/src/libcore/str.rs
index 9711d2e3bcc1..f8ff4cfb1d9a 100644
--- a/src/libcore/str.rs
+++ b/src/libcore/str.rs
@@ -114,11 +114,11 @@ macro_rules! utf8_first_byte(
 
 // return the value of $ch updated with continuation byte $byte
 macro_rules! utf8_acc_cont_byte(
-    ($ch:expr, $byte:expr) => (($ch << 6) | ($byte & 63u8) as u32)
+    ($ch:expr, $byte:expr) => (($ch << 6) | ($byte & CONT_MASK) as u32)
 )
 
 macro_rules! utf8_is_cont_byte(
-    ($byte:expr) => (($byte & 192u8) == 128)
+    ($byte:expr) => (($byte & !CONT_MASK) == TAG_CONT_U8)
 )
 
 #[inline]
@@ -137,20 +137,20 @@ impl<'a> Iterator<char> for Chars<'a> {
         fn decode_multibyte<'a>(x: u8, it: &mut slice::Items<'a, u8>) -> char {
             // NOTE: Performance is very sensitive to the exact formulation here
             // Decode from a byte combination out of: [[[x y] z] w]
-            let cont_mask = 0x3F; // continuation byte mask
             let init = utf8_first_byte!(x, 2);
             let y = unwrap_or_0(it.next());
             let mut ch = utf8_acc_cont_byte!(init, y);
             if x >= 0xE0 {
-                /* [[x y z] w] case */
+                /* [[x y z] w] case
+                 * 5th bit in 0xE0 .. 0xEF is always clear, so `init` is still valid */
                 let z = unwrap_or_0(it.next());
-
-                let y_z = (((y & cont_mask) as u32) << 6) | (z & cont_mask) as u32;
+                let y_z = utf8_acc_cont_byte!((y & CONT_MASK) as u32, z);
                 ch = init << 12 | y_z;
                 if x >= 0xF0 {
-                    /* [x y z w] case */
+                    /* [x y z w] case
+                     * use only the lower 3 bits of `init` */
                     let w = unwrap_or_0(it.next());
-                    ch = (init & 7) << 18 | y_z << 6 | (w & cont_mask) as u32;
+                    ch = (init & 7) << 18 | utf8_acc_cont_byte!(y_z, w);
                 }
             }
             unsafe {
@@ -754,9 +754,9 @@ fn run_utf8_validation_iterator(iter: &mut slice::Items<u8>) -> bool {
             // UTF8-4      = %xF0 %x90-BF 2( UTF8-tail ) / %xF1-F3 3( UTF8-tail ) /
             //               %xF4 %x80-8F 2( UTF8-tail )
             match w {
-                2 => if second & 192 != TAG_CONT_U8 {err!()},
+                2 => if second & !CONT_MASK != TAG_CONT_U8 {err!()},
                 3 => {
-                    match (first, second, next!() & 192) {
+                    match (first, second, next!() & !CONT_MASK) {
                         (0xE0        , 0xA0 .. 0xBF, TAG_CONT_U8) |
                         (0xE1 .. 0xEC, 0x80 .. 0xBF, TAG_CONT_U8) |
                         (0xED        , 0x80 .. 0x9F, TAG_CONT_U8) |
@@ -765,7 +765,7 @@ fn run_utf8_validation_iterator(iter: &mut slice::Items<u8>) -> bool {
                     }
                 }
                 4 => {
-                    match (first, second, next!() & 192, next!() & 192) {
+                    match (first, second, next!() & !CONT_MASK, next!() & !CONT_MASK) {
                         (0xF0        , 0x90 .. 0xBF, TAG_CONT_U8, TAG_CONT_U8) |
                         (0xF1 .. 0xF3, 0x80 .. 0xBF, TAG_CONT_U8, TAG_CONT_U8) |
                         (0xF4        , 0x80 .. 0x8F, TAG_CONT_U8, TAG_CONT_U8) => {}
@@ -962,7 +962,10 @@ pub struct CharRange {
     pub next: uint,
 }
 
-static TAG_CONT_U8: u8 = 128u8;
+/// Mask of the value bits of a continuation byte
+static CONT_MASK: u8 = 0b0011_1111u8;
+/// Value of the tag bits (tag mask is !CONT_MASK) of a continuation byte
+static TAG_CONT_U8: u8 = 0b1000_0000u8;
 
 /// Unsafe operations
 pub mod raw {
@@ -1898,7 +1901,7 @@ impl<'a> StrSlice<'a> for &'a str {
         // Multibyte case is a fn to allow char_range_at_reverse to inline cleanly
         fn multibyte_char_range_at_reverse(s: &str, mut i: uint) -> CharRange {
             // while there is a previous byte == 10......
-            while i > 0 && s.as_bytes()[i] & 192u8 == TAG_CONT_U8 {
+            while i > 0 && s.as_bytes()[i] & !CONT_MASK == TAG_CONT_U8 {
                 i -= 1u;
             }
 

From 45921648699a42fa1d257f6a54d2dbe9e46b0e20 Mon Sep 17 00:00:00 2001
From: root <root@localhost>
Date: Sat, 19 Jul 2014 00:02:30 +0200
Subject: [PATCH 5/6] Write multibyte case for str Chars iterator in-line

Thanks to comments from @alexcrichton, write the next/next_back function
bodies without nested functions in a more top-to-bottom flow style.

Also improve comment style and motivate the unsafe blocks with comments.
---
 src/libcore/str.rs | 107 ++++++++++++++++++++-------------------------
 1 file changed, 48 insertions(+), 59 deletions(-)

diff --git a/src/libcore/str.rs b/src/libcore/str.rs
index f8ff4cfb1d9a..293c0118af1e 100644
--- a/src/libcore/str.rs
+++ b/src/libcore/str.rs
@@ -133,40 +133,35 @@ impl<'a> Iterator<char> for Chars<'a> {
     #[inline]
     fn next(&mut self) -> Option<char> {
         // Decode UTF-8, using the valid UTF-8 invariant
-        #[inline]
-        fn decode_multibyte<'a>(x: u8, it: &mut slice::Items<'a, u8>) -> char {
-            // NOTE: Performance is very sensitive to the exact formulation here
-            // Decode from a byte combination out of: [[[x y] z] w]
-            let init = utf8_first_byte!(x, 2);
-            let y = unwrap_or_0(it.next());
-            let mut ch = utf8_acc_cont_byte!(init, y);
-            if x >= 0xE0 {
-                /* [[x y z] w] case
-                 * 5th bit in 0xE0 .. 0xEF is always clear, so `init` is still valid */
-                let z = unwrap_or_0(it.next());
-                let y_z = utf8_acc_cont_byte!((y & CONT_MASK) as u32, z);
-                ch = init << 12 | y_z;
-                if x >= 0xF0 {
-                    /* [x y z w] case
-                     * use only the lower 3 bits of `init` */
-                    let w = unwrap_or_0(it.next());
-                    ch = (init & 7) << 18 | utf8_acc_cont_byte!(y_z, w);
-                }
-            }
-            unsafe {
-                mem::transmute(ch)
+        let x = match self.iter.next() {
+            None => return None,
+            Some(&next_byte) if next_byte < 128 => return Some(next_byte as char),
+            Some(&next_byte) => next_byte,
+        };
+
+        // Multibyte case follows
+        // Decode from a byte combination out of: [[[x y] z] w]
+        // NOTE: Performance is sensitive to the exact formulation here
+        let init = utf8_first_byte!(x, 2);
+        let y = unwrap_or_0(self.iter.next());
+        let mut ch = utf8_acc_cont_byte!(init, y);
+        if x >= 0xE0 {
+            // [[x y z] w] case
+            // 5th bit in 0xE0 .. 0xEF is always clear, so `init` is still valid
+            let z = unwrap_or_0(self.iter.next());
+            let y_z = utf8_acc_cont_byte!((y & CONT_MASK) as u32, z);
+            ch = init << 12 | y_z;
+            if x >= 0xF0 {
+                // [x y z w] case
+                // use only the lower 3 bits of `init`
+                let w = unwrap_or_0(self.iter.next());
+                ch = (init & 7) << 18 | utf8_acc_cont_byte!(y_z, w);
             }
         }
 
-        match self.iter.next() {
-            None => None,
-            Some(&next_byte) => {
-                if next_byte < 128 {
-                    Some(next_byte as char)
-                } else {
-                    Some(decode_multibyte(next_byte, &mut self.iter))
-                }
-            }
+        // str invariant says `ch` is a valid Unicode Scalar Value
+        unsafe {
+            Some(mem::transmute(ch))
         }
     }
 
@@ -180,38 +175,32 @@ impl<'a> Iterator<char> for Chars<'a> {
 impl<'a> DoubleEndedIterator<char> for Chars<'a> {
     #[inline]
     fn next_back(&mut self) -> Option<char> {
-        #[inline]
-        fn decode_multibyte_back<'a>(w: u8, it: &mut slice::Items<'a, u8>) -> char {
-            // Decode from a byte combination out of: [x [y [z w]]]
-            let mut ch;
-            let z = unwrap_or_0(it.next_back());
-            ch = utf8_first_byte!(z, 2);
-            if utf8_is_cont_byte!(z) {
-                let y = unwrap_or_0(it.next_back());
-                ch = utf8_first_byte!(y, 3);
-                if utf8_is_cont_byte!(y) {
-                    let x = unwrap_or_0(it.next_back());
-                    ch = utf8_first_byte!(x, 4);
-                    ch = utf8_acc_cont_byte!(ch, y);
-                }
-                ch = utf8_acc_cont_byte!(ch, z);
-            }
-            ch = utf8_acc_cont_byte!(ch, w);
+        let w = match self.iter.next_back() {
+            None => return None,
+            Some(&back_byte) if back_byte < 128 => return Some(back_byte as char),
+            Some(&back_byte) => back_byte,
+        };
 
-            unsafe {
-                mem::transmute(ch)
+        // Multibyte case follows
+        // Decode from a byte combination out of: [x [y [z w]]]
+        let mut ch;
+        let z = unwrap_or_0(self.iter.next_back());
+        ch = utf8_first_byte!(z, 2);
+        if utf8_is_cont_byte!(z) {
+            let y = unwrap_or_0(self.iter.next_back());
+            ch = utf8_first_byte!(y, 3);
+            if utf8_is_cont_byte!(y) {
+                let x = unwrap_or_0(self.iter.next_back());
+                ch = utf8_first_byte!(x, 4);
+                ch = utf8_acc_cont_byte!(ch, y);
             }
+            ch = utf8_acc_cont_byte!(ch, z);
         }
+        ch = utf8_acc_cont_byte!(ch, w);
 
-        match self.iter.next_back() {
-            None => None,
-            Some(&back_byte) => {
-                if back_byte < 128 {
-                    Some(back_byte as char)
-                } else {
-                    Some(decode_multibyte_back(back_byte, &mut self.iter))
-                }
-            }
+        // str invariant says `ch` is a valid Unicode Scalar Value
+        unsafe {
+            Some(mem::transmute(ch))
         }
     }
 }

From c5e0736c2434fbed6b7f249b42b336f0e27804a6 Mon Sep 17 00:00:00 2001
From: root <root@localhost>
Date: Sat, 19 Jul 2014 15:39:02 +0200
Subject: [PATCH 6/6] Simplify str CharOffsets iterator

Only one uint is needed to keep track of the offset from the original
full string.
---
 src/libcore/str.rs | 14 +++++++-------
 1 file changed, 7 insertions(+), 7 deletions(-)

diff --git a/src/libcore/str.rs b/src/libcore/str.rs
index 293c0118af1e..c6aff9c8bdac 100644
--- a/src/libcore/str.rs
+++ b/src/libcore/str.rs
@@ -209,20 +209,20 @@ impl<'a> DoubleEndedIterator<char> for Chars<'a> {
 /// Use with the `std::iter` module.
 #[deriving(Clone)]
 pub struct CharOffsets<'a> {
-    front: uint,
-    back: uint,
+    front_offset: uint,
     iter: Chars<'a>,
 }
 
 impl<'a> Iterator<(uint, char)> for CharOffsets<'a> {
     #[inline]
     fn next(&mut self) -> Option<(uint, char)> {
+        let (pre_len, _) = self.iter.iter.size_hint();
         match self.iter.next() {
             None => None,
             Some(ch) => {
-                let index = self.front;
+                let index = self.front_offset;
                 let (len, _) = self.iter.iter.size_hint();
-                self.front += self.back - self.front - len;
+                self.front_offset += pre_len - len;
                 Some((index, ch))
             }
         }
@@ -241,8 +241,8 @@ impl<'a> DoubleEndedIterator<(uint, char)> for CharOffsets<'a> {
             None => None,
             Some(ch) => {
                 let (len, _) = self.iter.iter.size_hint();
-                self.back -= self.back - self.front - len;
-                Some((self.back, ch))
+                let index = self.front_offset + len;
+                Some((index, ch))
             }
         }
     }
@@ -1680,7 +1680,7 @@ impl<'a> StrSlice<'a> for &'a str {
 
     #[inline]
     fn char_indices(&self) -> CharOffsets<'a> {
-        CharOffsets{front: 0, back: self.len(), iter: self.chars()}
+        CharOffsets{front_offset: 0, iter: self.chars()}
     }
 
     #[inline]