Skip to content

Commit aa88da6

Browse files
committed
std: Tweak some unstable features of str
This commit clarifies some of the unstable features in the `str` module by moving them out of the blanket `core` and `collections` features. The following methods were moved to the `str_char` feature which generally encompasses decoding specific characters from a `str` and dealing with the result. It is unclear if any of these methods need to be stabilized for 1.0 and the most conservative route for now is to continue providing them but to leave them as unstable under a more specific name. * `is_char_boundary` * `char_at` * `char_range_at` * `char_at_reverse` * `char_range_at_reverse` * `slice_shift_char` The following methods were moved into the generic `unicode` feature as they are specifically enabled by the `unicode` crate itself. * `nfd_chars` * `nfkd_chars` * `nfc_chars` * `graphemes` * `grapheme_indices` * `width`
1 parent c64d671 commit aa88da6

File tree

16 files changed

+156
-92
lines changed

16 files changed

+156
-92
lines changed

src/compiletest/runtest.rs

Lines changed: 9 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -1052,22 +1052,22 @@ fn scan_char(haystack: &str, needle: char, idx: &mut uint) -> bool {
10521052
if *idx >= haystack.len() {
10531053
return false;
10541054
}
1055-
let range = haystack.char_range_at(*idx);
1056-
if range.ch != needle {
1055+
let ch = haystack.char_at(*idx);
1056+
if ch != needle {
10571057
return false;
10581058
}
1059-
*idx = range.next;
1059+
*idx += ch.len_utf8();
10601060
return true;
10611061
}
10621062

10631063
fn scan_integer(haystack: &str, idx: &mut uint) -> bool {
10641064
let mut i = *idx;
10651065
while i < haystack.len() {
1066-
let range = haystack.char_range_at(i);
1067-
if range.ch < '0' || '9' < range.ch {
1066+
let ch = haystack.char_at(i);
1067+
if ch < '0' || '9' < ch {
10681068
break;
10691069
}
1070-
i = range.next;
1070+
i += ch.len_utf8();
10711071
}
10721072
if i == *idx {
10731073
return false;
@@ -1083,9 +1083,9 @@ fn scan_string(haystack: &str, needle: &str, idx: &mut uint) -> bool {
10831083
if haystack_i >= haystack.len() {
10841084
return false;
10851085
}
1086-
let range = haystack.char_range_at(haystack_i);
1087-
haystack_i = range.next;
1088-
if !scan_char(needle, range.ch, &mut needle_i) {
1086+
let ch = haystack.char_at(haystack_i);
1087+
haystack_i += ch.len_utf8();
1088+
if !scan_char(needle, ch, &mut needle_i) {
10891089
return false;
10901090
}
10911091
}

src/libcollections/lib.rs

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -35,6 +35,7 @@
3535
#![feature(unique)]
3636
#![feature(unsafe_no_drop_flag)]
3737
#![feature(step_by)]
38+
#![feature(str_char)]
3839
#![cfg_attr(test, feature(rand, rustc_private, test))]
3940
#![cfg_attr(test, allow(deprecated))] // rand
4041

src/libcollections/str.rs

Lines changed: 102 additions & 56 deletions
Large diffs are not rendered by default.

src/libcollections/string.rs

Lines changed: 5 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -29,7 +29,7 @@ use unicode::str as unicode_str;
2929
use unicode::str::Utf16Item;
3030

3131
use borrow::{Cow, IntoCow};
32-
use str::{self, CharRange, FromStr, Utf8Error};
32+
use str::{self, FromStr, Utf8Error};
3333
use vec::{DerefVec, Vec, as_vec};
3434

3535
/// A growable string stored as a UTF-8 encoded buffer.
@@ -561,9 +561,9 @@ impl String {
561561
return None
562562
}
563563

564-
let CharRange {ch, next} = self.char_range_at_reverse(len);
564+
let ch = self.char_at_reverse(len);
565565
unsafe {
566-
self.vec.set_len(next);
566+
self.vec.set_len(len - ch.len_utf8());
567567
}
568568
Some(ch)
569569
}
@@ -595,7 +595,8 @@ impl String {
595595
let len = self.len();
596596
assert!(idx <= len);
597597

598-
let CharRange { ch, next } = self.char_range_at(idx);
598+
let ch = self.char_at(idx);
599+
let next = idx + ch.len_utf8();
599600
unsafe {
600601
ptr::copy(self.vec.as_mut_ptr().offset(idx as isize),
601602
self.vec.as_ptr().offset(next as isize),

src/libcore/str/mod.rs

Lines changed: 7 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -18,6 +18,7 @@
1818

1919
use self::OldSearcher::{TwoWay, TwoWayLong};
2020

21+
use char::CharExt;
2122
use clone::Clone;
2223
use cmp::{self, Eq};
2324
use default::Default;
@@ -1112,8 +1113,10 @@ static UTF8_CHAR_WIDTH: [u8; 256] = [
11121113
/// the next `char` in a string. This can be used as a data structure
11131114
/// for iterating over the UTF-8 bytes of a string.
11141115
#[derive(Copy)]
1115-
#[unstable(feature = "core",
1116-
reason = "naming is uncertain with container conventions")]
1116+
#[unstable(feature = "str_char",
1117+
reason = "existence of this struct is uncertain as it is frequently \
1118+
able to be replaced with char.len_utf8() and/or \
1119+
char/char_indices iterators")]
11171120
pub struct CharRange {
11181121
/// Current `char`
11191122
pub ch: char,
@@ -1646,8 +1649,8 @@ impl StrExt for str {
16461649
if self.is_empty() {
16471650
None
16481651
} else {
1649-
let CharRange {ch, next} = self.char_range_at(0);
1650-
let next_s = unsafe { self.slice_unchecked(next, self.len()) };
1652+
let ch = self.char_at(0);
1653+
let next_s = unsafe { self.slice_unchecked(ch.len_utf8(), self.len()) };
16511654
Some((ch, next_s))
16521655
}
16531656
}

src/libgetopts/lib.rs

Lines changed: 7 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -92,11 +92,10 @@
9292
html_playground_url = "http://play.rust-lang.org/")]
9393

9494
#![deny(missing_docs)]
95-
#![feature(collections)]
9695
#![feature(int_uint)]
9796
#![feature(staged_api)]
98-
#![feature(core)]
9997
#![feature(str_words)]
98+
#![feature(str_char)]
10099
#![cfg_attr(test, feature(rustc_private))]
101100

102101
#[cfg(test)] #[macro_use] extern crate log;
@@ -620,8 +619,8 @@ pub fn getopts(args: &[String], optgrps: &[OptGroup]) -> Result {
620619
let mut j = 1;
621620
names = Vec::new();
622621
while j < curlen {
623-
let range = cur.char_range_at(j);
624-
let opt = Short(range.ch);
622+
let ch = cur.char_at(j);
623+
let opt = Short(ch);
625624

626625
/* In a series of potential options (eg. -aheJ), if we
627626
see one which takes an argument, we assume all
@@ -642,12 +641,13 @@ pub fn getopts(args: &[String], optgrps: &[OptGroup]) -> Result {
642641
No => false
643642
};
644643

645-
if arg_follows && range.next < curlen {
646-
i_arg = Some((&cur[range.next..curlen]).to_string());
644+
let next = j + ch.len_utf8();
645+
if arg_follows && next < curlen {
646+
i_arg = Some((&cur[next..curlen]).to_string());
647647
break;
648648
}
649649

650-
j = range.next;
650+
j = next;
651651
}
652652
}
653653
let mut name_pos = 0;

src/librustc/lib.rs

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -42,6 +42,7 @@
4242
#![feature(io)]
4343
#![feature(path_ext)]
4444
#![feature(str_words)]
45+
#![feature(str_char)]
4546
#![cfg_attr(test, feature(test))]
4647

4748
extern crate arena;

src/librustc_driver/lib.rs

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -38,6 +38,7 @@
3838
#![feature(exit_status)]
3939
#![feature(io)]
4040
#![feature(set_stdio)]
41+
#![feature(unicode)]
4142

4243
extern crate arena;
4344
extern crate flate;

src/librustc_lint/lib.rs

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -41,6 +41,7 @@
4141
#![feature(unsafe_destructor)]
4242
#![feature(staged_api)]
4343
#![feature(std_misc)]
44+
#![feature(str_char)]
4445
#![cfg_attr(test, feature(test))]
4546

4647
extern crate syntax;

src/libserialize/lib.rs

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -37,6 +37,7 @@ Core encoding and decoding interfaces.
3737
#![feature(staged_api)]
3838
#![feature(std_misc)]
3939
#![feature(unicode)]
40+
#![feature(str_char)]
4041
#![cfg_attr(test, feature(test))]
4142

4243
// test harness access

src/libstd/lib.rs

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -127,6 +127,7 @@
127127
#![feature(int_uint)]
128128
#![feature(unique)]
129129
#![feature(allow_internal_unstable)]
130+
#![feature(str_char)]
130131
#![cfg_attr(test, feature(test, rustc_private))]
131132

132133
// Don't link to std. We are std.

src/libsyntax/lib.rs

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -38,6 +38,7 @@
3838
#![feature(std_misc)]
3939
#![feature(unicode)]
4040
#![feature(path_ext)]
41+
#![feature(str_char)]
4142

4243
extern crate arena;
4344
extern crate fmt_macros;

src/libsyntax/parse/lexer/comments.rs

Lines changed: 3 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -20,7 +20,6 @@ use parse::lexer;
2020
use print::pprust;
2121

2222
use std::io::Read;
23-
use std::str;
2423
use std::usize;
2524

2625
#[derive(Clone, Copy, PartialEq)]
@@ -210,11 +209,11 @@ fn all_whitespace(s: &str, col: CharPos) -> Option<usize> {
210209
let mut col = col.to_usize();
211210
let mut cursor: usize = 0;
212211
while col > 0 && cursor < len {
213-
let r: str::CharRange = s.char_range_at(cursor);
214-
if !r.ch.is_whitespace() {
212+
let ch = s.char_at(cursor);
213+
if !ch.is_whitespace() {
215214
return None;
216215
}
217-
cursor = r.next;
216+
cursor += ch.len_utf8();
218217
col -= 1;
219218
}
220219
return Some(cursor);

src/libsyntax/parse/lexer/mod.rs

Lines changed: 9 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -22,7 +22,6 @@ use std::fmt;
2222
use std::mem::replace;
2323
use std::num;
2424
use std::rc::Rc;
25-
use std::str;
2625

2726
pub use ext::tt::transcribe::{TtReader, new_tt_reader, new_tt_reader_with_doc_flag};
2827

@@ -291,7 +290,8 @@ impl<'a> StringReader<'a> {
291290
s: &'b str, errmsg: &'b str) -> Cow<'b, str> {
292291
let mut i = 0;
293292
while i < s.len() {
294-
let str::CharRange { ch, next } = s.char_range_at(i);
293+
let ch = s.char_at(i);
294+
let next = i + ch.len_utf8();
295295
if ch == '\r' {
296296
if next < s.len() && s.char_at(next) == '\n' {
297297
return translate_crlf_(self, start, s, errmsg, i).into_cow();
@@ -309,7 +309,8 @@ impl<'a> StringReader<'a> {
309309
let mut buf = String::with_capacity(s.len());
310310
let mut j = 0;
311311
while i < s.len() {
312-
let str::CharRange { ch, next } = s.char_range_at(i);
312+
let ch = s.char_at(i);
313+
let next = i + ch.len_utf8();
313314
if ch == '\r' {
314315
if j < i { buf.push_str(&s[j..i]); }
315316
j = next;
@@ -335,10 +336,11 @@ impl<'a> StringReader<'a> {
335336
if current_byte_offset < self.source_text.len() {
336337
assert!(self.curr.is_some());
337338
let last_char = self.curr.unwrap();
338-
let next = self.source_text.char_range_at(current_byte_offset);
339-
let byte_offset_diff = next.next - current_byte_offset;
339+
let ch = self.source_text.char_at(current_byte_offset);
340+
let next = current_byte_offset + ch.len_utf8();
341+
let byte_offset_diff = next - current_byte_offset;
340342
self.pos = self.pos + Pos::from_usize(byte_offset_diff);
341-
self.curr = Some(next.ch);
343+
self.curr = Some(ch);
342344
self.col = self.col + CharPos(1);
343345
if last_char == '\n' {
344346
self.filemap.next_line(self.last_pos);
@@ -370,7 +372,7 @@ impl<'a> StringReader<'a> {
370372
let offset = self.byte_offset(self.pos).to_usize();
371373
let s = &self.source_text[..];
372374
if offset >= s.len() { return None }
373-
let str::CharRange { next, .. } = s.char_range_at(offset);
375+
let next = offset + s.char_at(offset).len_utf8();
374376
if next < s.len() {
375377
Some(s.char_at(next))
376378
} else {

src/libterm/lib.rs

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -60,6 +60,7 @@
6060
#![feature(rustc_private)]
6161
#![feature(staged_api)]
6262
#![feature(std_misc)]
63+
#![feature(str_char)]
6364
#![feature(path_ext)]
6465
#![cfg_attr(windows, feature(libc))]
6566

src/libunicode/u_str.rs

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -244,7 +244,7 @@ impl<'a> Iterator for Graphemes<'a> {
244244
}
245245

246246
self.cat = if take_curr {
247-
idx = self.string.char_range_at(idx).next;
247+
idx = idx + len_utf8(self.string.char_at(idx));
248248
None
249249
} else {
250250
Some(cat)
@@ -256,6 +256,11 @@ impl<'a> Iterator for Graphemes<'a> {
256256
}
257257
}
258258

259+
#[cfg(stage0)]
260+
fn len_utf8(c: char) -> usize { UCharExt::len_utf8(c) }
261+
#[cfg(not(stage0))]
262+
fn len_utf8(c: char) -> usize { c.len_utf8() }
263+
259264
impl<'a> DoubleEndedIterator for Graphemes<'a> {
260265
#[inline]
261266
fn next_back(&mut self) -> Option<&'a str> {

0 commit comments

Comments
 (0)