diff --git a/src/compiletest/header.rs b/src/compiletest/header.rs index a039aaf56835a..5a35c56c075a7 100644 --- a/src/compiletest/header.rs +++ b/src/compiletest/header.rs @@ -142,7 +142,8 @@ fn parse_check_line(line: ~str) -> Option<~str> { fn parse_exec_env(line: ~str) -> Option<(~str, ~str)> { do parse_name_value_directive(line, ~"exec-env").map |nv| { // nv is either FOO or FOO=BAR - let strs = str::splitn_char(*nv, '=', 1u); + let mut strs = ~[]; + for str::each_splitn_char(*nv, '=', 1u) |s| { strs.push(s.to_owned()); } match strs.len() { 1u => (strs[0], ~""), 2u => (strs[0], strs[1]), diff --git a/src/compiletest/runtest.rs b/src/compiletest/runtest.rs index f17e9ffe548bd..ed66aea4f0ce7 100644 --- a/src/compiletest/runtest.rs +++ b/src/compiletest/runtest.rs @@ -267,7 +267,7 @@ fn run_debuginfo_test(config: config, props: TestProps, testfile: &Path) { // check if each line in props.check_lines appears in the // output (in order) let mut i = 0u; - for str::lines_each(ProcRes.stdout) |line| { + for str::each_line(ProcRes.stdout) |line| { if props.check_lines[i].trim() == line.trim() { i += 1u; } @@ -297,7 +297,7 @@ fn check_error_patterns(props: TestProps, let mut next_err_idx = 0u; let mut next_err_pat = props.error_patterns[next_err_idx]; let mut done = false; - for str::lines_each(ProcRes.stderr) |line| { + for str::each_line(ProcRes.stderr) |line| { if str::contains(line, next_err_pat) { debug!("found error pattern %s", next_err_pat); next_err_idx += 1u; @@ -347,7 +347,7 @@ fn check_expected_errors(expected_errors: ~[errors::ExpectedError], // filename:line1:col1: line2:col2: *warning:* msg // where line1:col1: is the starting point, line2:col2: // is the ending point, and * represents ANSI color codes. - for str::lines_each(ProcRes.stderr) |line| { + for str::each_line(ProcRes.stderr) |line| { let mut was_expected = false; for vec::eachi(expected_errors) |i, ee| { if !found_flags[i] { @@ -596,8 +596,12 @@ fn split_maybe_args(argstr: Option<~str>) -> ~[~str] { } match argstr { - Some(s) => rm_whitespace(str::split_char(s, ' ')), - None => ~[] + Some(s) => { + let mut ss = ~[]; + for str::each_split_char(s, ' ') |s| { ss.push(s.to_owned()) } + rm_whitespace(ss) + } + None => ~[] } } diff --git a/src/libcore/num/strconv.rs b/src/libcore/num/strconv.rs index 26f0582bfb2ba..ce6c015c13168 100644 --- a/src/libcore/num/strconv.rs +++ b/src/libcore/num/strconv.rs @@ -130,6 +130,13 @@ impl_NumStrConv_Integer!(u16) impl_NumStrConv_Integer!(u32) impl_NumStrConv_Integer!(u64) + +// Special value strings as [u8] consts. +static inf_buf: [u8*3] = ['i' as u8, 'n' as u8, 'f' as u8]; +static positive_inf_buf: [u8*4] = ['+' as u8, 'i' as u8, 'n' as u8, 'f' as u8]; +static negative_inf_buf: [u8*4] = ['-' as u8, 'i' as u8, 'n' as u8, 'f' as u8]; +static nan_buf: [u8*3] = ['N' as u8, 'a' as u8, 'N' as u8]; + /** * Converts a number to its string representation as a byte vector. * This is meant to be a common base implementation for all numeric string @@ -479,15 +486,15 @@ pub fn from_str_bytes_common+ } if special { - if buf == str::inf_buf || buf == str::positive_inf_buf { + if buf == inf_buf || buf == positive_inf_buf { return NumStrConv::inf(); - } else if buf == str::negative_inf_buf { + } else if buf == negative_inf_buf { if negative { return NumStrConv::neg_inf(); } else { return None; } - } else if buf == str::nan_buf { + } else if buf == nan_buf { return NumStrConv::NaN(); } } diff --git a/src/libcore/os.rs b/src/libcore/os.rs index 3c2dbf7ea15bc..9aa00e8e4576c 100644 --- a/src/libcore/os.rs +++ b/src/libcore/os.rs @@ -218,7 +218,8 @@ pub fn env() -> ~[(~str,~str)] { fn env_convert(input: ~[~str]) -> ~[(~str, ~str)] { let mut pairs = ~[]; for input.each |p| { - let vs = str::splitn_char(*p, '=', 1); + let mut vs = ~[]; + for str::each_splitn_char(*p, '=', 1) |s| { vs.push(s.to_owned()) } debug!("splitting: len: %u", vs.len()); fail_unless!(vs.len() == 2); diff --git a/src/libcore/path.rs b/src/libcore/path.rs index 76aaf14d4ac6c..3d06809a4523e 100644 --- a/src/libcore/path.rs +++ b/src/libcore/path.rs @@ -381,7 +381,8 @@ impl ToStr for PosixPath { impl GenericPath for PosixPath { fn from_str(s: &str) -> PosixPath { - let mut components = str::split_nonempty(s, |c| c == '/'); + let mut components = ~[]; + for str::each_split_nonempty(s, |c| c == '/') |s| { components.push(s.to_owned()) } let is_absolute = (s.len() != 0 && s[0] == '/' as u8); return PosixPath { is_absolute: is_absolute, components: components } @@ -504,9 +505,10 @@ impl GenericPath for PosixPath { fn push_many(&self, cs: &[~str]) -> PosixPath { let mut v = copy self.components; for cs.each |e| { - let mut ss = str::split_nonempty( - *e, - |c| windows::is_sep(c as u8)); + let mut ss = ~[]; + for str::each_split_nonempty(*e, |c| windows::is_sep(c as u8)) |s| { + ss.push(s.to_owned()) + } unsafe { v.push_all_move(ss); } } PosixPath { is_absolute: self.is_absolute, @@ -515,7 +517,10 @@ impl GenericPath for PosixPath { fn push(&self, s: &str) -> PosixPath { let mut v = copy self.components; - let mut ss = str::split_nonempty(s, |c| windows::is_sep(c as u8)); + let mut ss = ~[]; + for str::each_split_nonempty(s, |c| windows::is_sep(c as u8)) |s| { + ss.push(s.to_owned()) + } unsafe { v.push_all_move(ss); } PosixPath { components: v, ..copy *self } } @@ -590,8 +595,10 @@ impl GenericPath for WindowsPath { } } - let mut components = - str::split_nonempty(rest, |c| windows::is_sep(c as u8)); + let mut components = ~[]; + for str::each_split_nonempty(rest, |c| windows::is_sep(c as u8)) |s| { + components.push(s.to_owned()) + } let is_absolute = (rest.len() != 0 && windows::is_sep(rest[0])); return WindowsPath { host: host, device: device, @@ -759,9 +766,10 @@ impl GenericPath for WindowsPath { fn push_many(&self, cs: &[~str]) -> WindowsPath { let mut v = copy self.components; for cs.each |e| { - let mut ss = str::split_nonempty( - *e, - |c| windows::is_sep(c as u8)); + let mut ss = ~[]; + for str::each_split_nonempty(*e, |c| windows::is_sep(c as u8)) |s| { + ss.push(s.to_owned()) + } unsafe { v.push_all_move(ss); } } // tedious, but as-is, we can't use ..self @@ -775,7 +783,10 @@ impl GenericPath for WindowsPath { fn push(&self, s: &str) -> WindowsPath { let mut v = copy self.components; - let mut ss = str::split_nonempty(s, |c| windows::is_sep(c as u8)); + let mut ss = ~[]; + for str::each_split_nonempty(s, |c| windows::is_sep(c as u8)) |s| { + ss.push(s.to_owned()) + } unsafe { v.push_all_move(ss); } return WindowsPath { components: v, ..copy *self } } diff --git a/src/libcore/rand.rs b/src/libcore/rand.rs index 3085269f692ef..afa4ea66ca677 100644 --- a/src/libcore/rand.rs +++ b/src/libcore/rand.rs @@ -327,7 +327,9 @@ impl RngUtil for @Rng { */ fn gen_char_from(&self, chars: &str) -> char { fail_unless!(!chars.is_empty()); - self.choose(str::chars(chars)) + let mut cs = ~[]; + for str::each_char(chars) |c| { cs.push(c) } + self.choose(cs) } /// Return a random bool diff --git a/src/libcore/str.rs b/src/libcore/str.rs index 92358c6a5e97b..0f393dee59779 100644 --- a/src/libcore/str.rs +++ b/src/libcore/str.rs @@ -56,15 +56,15 @@ pub fn from_slice(s: &str) -> ~str { impl ToStr for ~str { #[inline(always)] - fn to_str(&self) -> ~str { copy *self } + fn to_str(&self) -> ~str { from_slice(*self) } } impl ToStr for &'self str { #[inline(always)] - fn to_str(&self) -> ~str { ::str::from_slice(*self) } + fn to_str(&self) -> ~str { from_slice(*self) } } impl ToStr for @str { #[inline(always)] - fn to_str(&self) -> ~str { ::str::from_slice(*self) } + fn to_str(&self) -> ~str { from_slice(*self) } } /** @@ -186,6 +186,7 @@ pub fn push_str_no_overallocate(lhs: &mut ~str, rhs: &str) { raw::set_len(lhs, llen + rlen); } } + /// Appends a string slice to the back of a string #[inline(always)] pub fn push_str(lhs: &mut ~str, rhs: &str) { @@ -214,7 +215,6 @@ pub fn append(lhs: ~str, rhs: &str) -> ~str { v } - /// Concatenate a vector of strings pub fn concat(v: &[~str]) -> ~str { let mut s: ~str = ~""; @@ -383,7 +383,7 @@ Section: Transforming strings */ /** - * Converts a string to a vector of bytes + * Converts a string to a unique vector of bytes * * The result vector is not null-terminated. */ @@ -403,14 +403,11 @@ pub fn byte_slice(s: &str, f: &fn(v: &[u8]) -> T) -> T { } } -/// Convert a string to a vector of characters -pub fn chars(s: &str) -> ~[char] { - let mut buf = ~[], i = 0; - let len = len(s); - while i < len { - let CharRange {ch, next} = char_range_at(s, i); - unsafe { buf.push(ch); } - i = next; +/// Convert a string to a unique vector of characters +pub fn to_chars(s: &str) -> ~[char] { + let mut buf = ~[]; + for each_char(s) |c| { + buf.push(c); } buf } @@ -418,7 +415,7 @@ pub fn chars(s: &str) -> ~[char] { /** * Take a substring of another. * - * Returns a string containing `n` characters starting at byte offset + * Returns a slice pointing at `n` characters starting from byte offset * `begin`. */ pub fn substr(s: &'a str, begin: uint, n: uint) -> &'a str { @@ -437,105 +434,88 @@ pub fn slice(s: &'a str, begin: uint, end: uint) -> &'a str { unsafe { raw::slice_bytes(s, begin, end) } } -/// Splits a string into substrings at each occurrence of a given -/// character. -pub fn split_char(s: &str, sep: char) -> ~[~str] { - split_char_inner(s, sep, len(s), true, true) +/// Splits a string into substrings at each occurrence of a given character +pub fn each_split_char(s: &'a str, sep: char, it: &fn(&'a str) -> bool) { + each_split_char_inner(s, sep, len(s), true, true, it) +} + +/// Like `each_split_char`, but a trailing empty string is omitted +pub fn each_split_char_no_trailing(s: &'a str, sep: char, it: &fn(&'a str) -> bool) { + each_split_char_inner(s, sep, len(s), true, false, it) } /** * Splits a string into substrings at each occurrence of a given * character up to 'count' times. * - * The byte must be a valid UTF-8/ASCII byte + * The character must be a valid UTF-8/ASCII character */ -pub fn splitn_char(s: &str, sep: char, count: uint) -> ~[~str] { - split_char_inner(s, sep, count, true, true) +pub fn each_splitn_char(s: &'a str, sep: char, count: uint, it: &fn(&'a str) -> bool) { + each_split_char_inner(s, sep, count, true, true, it) } -/// Like `split_char`, but omits empty strings from the returned vector -pub fn split_char_nonempty(s: &str, sep: char) -> ~[~str] { - split_char_inner(s, sep, len(s), false, false) +/// Like `each_split_char`, but omits empty strings +pub fn each_split_char_nonempty(s: &'a str, sep: char, it: &fn(&'a str) -> bool) { + each_split_char_inner(s, sep, len(s), false, false, it) } -/** - * Like `split_char`, but a trailing empty string is omitted - * (e.g. `split_char_no_trailing("A B ",' ') == ~[~"A",~"B"]`) - */ -pub fn split_char_no_trailing(s: &str, sep: char) -> ~[~str] { - split_char_inner(s, sep, len(s), true, false) -} - -fn split_char_inner(s: &str, sep: char, count: uint, allow_empty: bool, - allow_trailing_empty: bool) -> ~[~str] { +fn each_split_char_inner(s: &'a str, sep: char, count: uint, allow_empty: bool, + allow_trailing_empty: bool, it: &fn(&'a str) -> bool) { if sep < 128u as char { let b = sep as u8, l = len(s); - let mut result = ~[], done = 0u; + let mut done = 0u; let mut i = 0u, start = 0u; while i < l && done < count { if s[i] == b { if allow_empty || start < i { - unsafe { - result.push(raw::slice_bytes_unique(s, start, i)); - } + if !it( unsafe{ raw::slice_bytes(s, start, i) } ) { return; } } start = i + 1u; done += 1u; } i += 1u; } - // only push a non-empty trailing substring + // only slice a non-empty trailing substring if allow_trailing_empty || start < l { - unsafe { result.push(raw::slice_bytes_unique(s, start, l) ) }; + if !it( unsafe{ raw::slice_bytes(s, start, l) } ) { return; } } - result } else { - split_inner(s, |cur| cur == sep, count, allow_empty, allow_trailing_empty) + each_split_inner(s, |cur| cur == sep, count, allow_empty, allow_trailing_empty, it) } } - /// Splits a string into substrings using a character function -pub fn split(s: &str, sepfn: &fn(char) -> bool) -> ~[~str] { - split_inner(s, sepfn, len(s), true, true) +pub fn each_split(s: &'a str, sepfn: &fn(char) -> bool, it: &fn(&'a str) -> bool) { + each_split_inner(s, sepfn, len(s), true, true, it) +} + +/// Like `each_split`, but a trailing empty string is omitted +pub fn each_split_no_trailing(s: &'a str, sepfn: &fn(char) -> bool, it: &fn(&'a str) -> bool) { + each_split_inner(s, sepfn, len(s), true, false, it) } /** * Splits a string into substrings using a character function, cutting at * most `count` times. */ -pub fn splitn(s: &str, - sepfn: &fn(char) -> bool, - count: uint) - -> ~[~str] { - split_inner(s, sepfn, count, true, true) -} - -/// Like `split`, but omits empty strings from the returned vector -pub fn split_nonempty(s: &str, sepfn: &fn(char) -> bool) -> ~[~str] { - split_inner(s, sepfn, len(s), false, false) +pub fn each_splitn(s: &'a str, sepfn: &fn(char) -> bool, count: uint, it: &fn(&'a str) -> bool) { + each_split_inner(s, sepfn, count, true, true, it) } - -/** - * Like `split`, but a trailing empty string is omitted - * (e.g. `split_no_trailing("A B ",' ') == ~[~"A",~"B"]`) - */ -pub fn split_no_trailing(s: &str, sepfn: &fn(char) -> bool) -> ~[~str] { - split_inner(s, sepfn, len(s), true, false) +/// Like `each_split`, but omits empty strings +pub fn each_split_nonempty(s: &'a str, sepfn: &fn(char) -> bool, it: &fn(&'a str) -> bool) { + each_split_inner(s, sepfn, len(s), false, false, it) } -fn split_inner(s: &str, sepfn: &fn(cc: char) -> bool, count: uint, - allow_empty: bool, allow_trailing_empty: bool) -> ~[~str] { +fn each_split_inner(s: &'a str, sepfn: &fn(cc: char) -> bool, count: uint, + allow_empty: bool, allow_trailing_empty: bool, it: &fn(&'a str) -> bool) { let l = len(s); - let mut result = ~[], i = 0u, start = 0u, done = 0u; + let mut i = 0u, start = 0u, done = 0u; while i < l && done < count { let CharRange {ch, next} = char_range_at(s, i); if sepfn(ch) { if allow_empty || start < i { - unsafe { - result.push(raw::slice_bytes_unique(s, start, i)); - } + if !it( unsafe{ raw::slice_bytes(s, start, i) } ) { return; } } start = next; done += 1u; @@ -543,15 +523,12 @@ fn split_inner(s: &str, sepfn: &fn(cc: char) -> bool, count: uint, i = next; } if allow_trailing_empty || start < l { - unsafe { - result.push(raw::slice_bytes_unique(s, start, l)); - } + if !it( unsafe{ raw::slice_bytes(s, start, l) } ) { return; } } - result } // See Issue #1932 for why this is a naive search -fn iter_matches(s: &'a str, sep: &'b str, f: &fn(uint, uint)) { +fn iter_matches(s: &'a str, sep: &'b str, f: &fn(uint, uint) -> bool) { let sep_len = len(sep), l = len(s); fail_unless!(sep_len > 0u); let mut i = 0u, match_start = 0u, match_i = 0u; @@ -562,7 +539,7 @@ fn iter_matches(s: &'a str, sep: &'b str, f: &fn(uint, uint)) { match_i += 1u; // Found a match if match_i == sep_len { - f(match_start, i + 1u); + if !f(match_start, i + 1u) { return; } match_i = 0u; } i += 1u; @@ -578,10 +555,10 @@ fn iter_matches(s: &'a str, sep: &'b str, f: &fn(uint, uint)) { } } -fn iter_between_matches(s: &'a str, sep: &'b str, f: &fn(uint, uint)) { +fn iter_between_matches(s: &'a str, sep: &'b str, f: &fn(uint, uint) -> bool) { let mut last_end = 0u; - do iter_matches(s, sep) |from, to| { - f(last_end, from); + for iter_matches(s, sep) |from, to| { + if !f(last_end, from) { return; } last_end = to; } f(last_end, len(s)); @@ -593,25 +570,23 @@ fn iter_between_matches(s: &'a str, sep: &'b str, f: &fn(uint, uint)) { * # Example * * ~~~ - * fail_unless!(["", "XXX", "YYY", ""] == split_str(".XXX.YYY.", ".")) + * let mut v = ~[]; + * for each_split_str(".XXX.YYY.", ".") |subs| { v.push(subs); } + * fail_unless!(v == ["", "XXX", "YYY", ""]); * ~~~ */ -pub fn split_str(s: &'a str, sep: &'b str) -> ~[~str] { - let mut result = ~[]; - do iter_between_matches(s, sep) |from, to| { - unsafe { result.push(raw::slice_bytes_unique(s, from, to)); } +pub fn each_split_str(s: &'a str, sep: &'b str, it: &fn(&'a str) -> bool) { + for iter_between_matches(s, sep) |from, to| { + if !it( unsafe { raw::slice_bytes(s, from, to) } ) { return; } } - result } -pub fn split_str_nonempty(s: &'a str, sep: &'b str) -> ~[~str] { - let mut result = ~[]; - do iter_between_matches(s, sep) |from, to| { +pub fn each_split_str_nonempty(s: &'a str, sep: &'b str, it: &fn(&'a str) -> bool) { + for iter_between_matches(s, sep) |from, to| { if to > from { - unsafe { result.push(raw::slice_bytes_unique(s, from, to)); } + if !it( unsafe { raw::slice_bytes(s, from, to) } ) { return; } } } - result } /// Levenshtein Distance between two strings @@ -649,65 +624,99 @@ pub fn levdistance(s: &str, t: &str) -> uint { } /** - * Splits a string into a vector of the substrings separated by LF ('\n'). + * Splits a string into substrings separated by LF ('\n'). */ -pub fn lines(s: &str) -> ~[~str] { - split_char_no_trailing(s, '\n') +pub fn each_line(s: &'a str, it: &fn(&'a str) -> bool) { + each_split_char_no_trailing(s, '\n', it) } /** - * Splits a string into a vector of the substrings separated by LF ('\n') + * Splits a string into substrings separated by LF ('\n') * and/or CR LF ("\r\n") */ -pub fn lines_any(s: &str) -> ~[~str] { - vec::map(lines(s), |s| { - let l = len(*s); - let mut cp = copy *s; +pub fn each_line_any(s: &'a str, it: &fn(&'a str) -> bool) { + for each_line(s) |s| { + let l = s.len(); if l > 0u && s[l - 1u] == '\r' as u8 { - unsafe { raw::set_len(&mut cp, l - 1u); } + if !it( unsafe { raw::slice_bytes(s, 0, l - 1) } ) { return; } + } else { + if !it( s ) { return; } } - cp - }) + } } -/// Splits a string into a vector of the substrings separated by whitespace -pub fn words(s: &str) -> ~[~str] { - split_nonempty(s, char::is_whitespace) +/// Splits a string into substrings separated by whitespace +pub fn each_word(s: &'a str, it: &fn(&'a str) -> bool) { + each_split_nonempty(s, char::is_whitespace, it) } -/** Split a string into a vector of substrings, - * each of which is less than a limit +/** Splits a string into substrings with possibly internal whitespace, + * each of them at most `lim` bytes long. The substrings have leading and trailing + * whitespace removed, and are only cut at whitespace boundaries. + * + * #Failure: + * + * Fails during iteration if the string contains a non-whitespace + * sequence longer than the limit. */ -pub fn split_within(ss: &str, lim: uint) -> ~[~str] { - let words = str::words(ss); +pub fn each_split_within(ss: &'a str, lim: uint, it: &fn(&'a str) -> bool) { + // Just for fun, let's write this as an state machine: - // empty? - if words == ~[] { return ~[]; } + enum SplitWithinState { + A, // leading whitespace, initial state + B, // words + C, // internal and trailing whitespace + } + enum Whitespace { + Ws, // current char is whitespace + Cr // current char is not whitespace + } + enum LengthLimit { + UnderLim, // current char makes current substring still fit in limit + OverLim // current char makes current substring no longer fit in limit + } - let mut rows : ~[~str] = ~[]; - let mut row : ~str = ~""; + let mut slice_start = 0; + let mut last_start = 0; + let mut last_end = 0; + let mut state = A; - for words.each |wptr| { - let word = copy *wptr; + let mut cont = true; + let slice: &fn() = || { cont = it(slice(ss, slice_start, last_end)) }; - // if adding this word to the row would go over the limit, - // then start a new row - if row.len() + word.len() + 1 > lim { - rows.push(copy row); // save previous row - row = word; // start a new one - } else { - if row.len() > 0 { row += ~" " } // separate words - row += word; // append to this row - } - } + let machine: &fn(uint, char) -> bool = |i, c| { + let whitespace = if char::is_whitespace(c) { Ws } else { Cr }; + let limit = if (i - slice_start + 1) <= lim { UnderLim } else { OverLim }; - // save the last row - if row != ~"" { rows.push(row); } + state = match (state, whitespace, limit) { + (A, Ws, _) => { A } + (A, Cr, _) => { slice_start = i; last_start = i; B } - rows -} + (B, Cr, UnderLim) => { B } + (B, Cr, OverLim) if (i - last_start + 1) > lim + => { fail!(~"word longer than limit!") } + (B, Cr, OverLim) => { slice(); slice_start = last_start; B } + (B, Ws, UnderLim) => { last_end = i; C } + (B, Ws, OverLim) => { last_end = i; slice(); A } + + (C, Cr, UnderLim) => { last_start = i; B } + (C, Cr, OverLim) => { slice(); slice_start = i; last_start = i; last_end = i; B } + (C, Ws, OverLim) => { slice(); A } + (C, Ws, UnderLim) => { C } + }; + + cont + }; + str::each_chari(ss, machine); + // Let the automaton 'run out' by supplying trailing whitespace + let mut fake_i = ss.len(); + while cont && match state { B | C => true, A => false } { + machine(fake_i, ' '); + fake_i += 1; + } +} /// Convert a string to lowercase. ASCII only pub fn to_lower(s: &str) -> ~str { @@ -738,7 +747,7 @@ pub fn to_upper(s: &str) -> ~str { */ pub fn replace(s: &str, from: &str, to: &str) -> ~str { let mut result = ~"", first = true; - do iter_between_matches(s, from) |start, end| { + for iter_between_matches(s, from) |start, end| { if first { first = false; } else { @@ -997,10 +1006,16 @@ pub fn eachi_reverse(s: &str, it: &fn(uint, u8) -> bool) { } } -/// Iterates over the chars in a string +/// Iterate over each char of a string, without allocating #[inline(always)] pub fn each_char(s: &str, it: &fn(char) -> bool) { - each_chari(s, |_i, c| it(c)) + let mut i = 0; + let len = len(s); + while i < len { + let CharRange {ch, next} = char_range_at(s, i); + if !it(ch) { return; } + i = next; + } } /// Iterates over the chars in a string, with indices @@ -1038,32 +1053,6 @@ pub fn each_chari_reverse(s: &str, it: &fn(uint, char) -> bool) { } } -/// Apply a function to each substring after splitting by character -pub fn split_char_each(ss: &str, cc: char, ff: &fn(v: &str) -> bool) { - vec::each(split_char(ss, cc), |s| ff(*s)) -} - -/** - * Apply a function to each substring after splitting by character, up to - * `count` times - */ -pub fn splitn_char_each(ss: &str, sep: char, count: uint, - ff: &fn(v: &str) -> bool) { - vec::each(splitn_char(ss, sep, count), |s| ff(*s)) -} - -/// Apply a function to each word -pub fn words_each(ss: &str, ff: &fn(v: &str) -> bool) { - vec::each(words(ss), |s| ff(*s)) -} - -/** - * Apply a function to each line (by '\n') - */ -pub fn lines_each(ss: &str, ff: &fn(v: &str) -> bool) { - vec::each(lines(ss), |s| ff(*s)) -} - /* Section: Searching */ @@ -1209,8 +1198,7 @@ pub fn rfind_char_from(s: &str, c: char, start: uint) -> Option { * or equal to `len(s)`. `start` must be the index of a character boundary, * as defined by `is_char_boundary`. */ -pub fn rfind_char_between(s: &str, c: char, start: uint, end: uint) - -> Option { +pub fn rfind_char_between(s: &str, c: char, start: uint, end: uint) -> Option { if c < 128u as char { fail_unless!(start >= end); fail_unless!(start <= len(s)); @@ -1291,11 +1279,7 @@ pub fn find_from(s: &str, start: uint, f: &fn(char) * or equal to `len(s)`. `start` must be the index of a character * boundary, as defined by `is_char_boundary`. */ -pub fn find_between(s: &str, - start: uint, - end: uint, - f: &fn(char) -> bool) - -> Option { +pub fn find_between(s: &str, start: uint, end: uint, f: &fn(char) -> bool) -> Option { fail_unless!(start <= end); fail_unless!(end <= len(s)); fail_unless!(is_char_boundary(s, start)); @@ -1346,8 +1330,7 @@ pub fn rfind(s: &str, f: &fn(char) -> bool) -> Option { * `start` must be less than or equal to `len(s)', `start` must be the * index of a character boundary, as defined by `is_char_boundary` */ -pub fn rfind_from(s: &str, start: uint, f: &fn(char) -> bool) - -> Option { +pub fn rfind_from(s: &str, start: uint, f: &fn(char) -> bool) -> Option { rfind_between(s, start, 0u, f) } @@ -1373,9 +1356,7 @@ pub fn rfind_from(s: &str, start: uint, f: &fn(char) -> bool) * than or equal to `len(s)`. `start` must be the index of a character * boundary, as defined by `is_char_boundary` */ -pub fn rfind_between(s: &str, start: uint, end: uint, - f: &fn(char) -> bool) - -> Option { +pub fn rfind_between(s: &str, start: uint, end: uint, f: &fn(char) -> bool) -> Option { fail_unless!(start >= end); fail_unless!(start <= len(s)); fail_unless!(is_char_boundary(s, start)); @@ -1431,8 +1412,7 @@ pub fn find_str(haystack: &'a str, needle: &'b str) -> Option { * * `start` must be less than or equal to `len(s)` */ -pub fn find_str_from(haystack: &'a str, needle: &'b str, start: uint) - -> Option { +pub fn find_str_from(haystack: &'a str, needle: &'b str, start: uint) -> Option { find_str_between(haystack, needle, start, len(haystack)) } @@ -1456,9 +1436,8 @@ pub fn find_str_from(haystack: &'a str, needle: &'b str, start: uint) * `start` must be less than or equal to `end` and `end` must be less than * or equal to `len(s)`. */ -pub fn find_str_between(haystack: &'a str, needle: &'b str, start: uint, - end:uint) - -> Option { +pub fn find_str_between(haystack: &'a str, needle: &'b str, start: uint, end:uint) + -> Option { // See Issue #1932 for why this is a naive search fail_unless!(end <= len(haystack)); let needle_len = len(needle); @@ -1661,7 +1640,6 @@ pub fn utf16_chars(v: &[u16], f: &fn(char)) { } } - pub fn from_utf16(v: &[u16]) -> ~str { let mut buf = ~""; unsafe { @@ -1919,14 +1897,6 @@ static tag_five_b: uint = 248u; static max_five_b: uint = 67108864u; static tag_six_b: uint = 252u; -// Constants used for converting strs to floats -pub static inf_buf: [u8*3] = ['i' as u8, 'n' as u8, 'f' as u8]; -pub static positive_inf_buf: [u8*4] = ['+' as u8, 'i' as u8, - 'n' as u8, 'f' as u8]; -pub static negative_inf_buf: [u8*4] = ['-' as u8, 'i' as u8, - 'n' as u8, 'f' as u8]; -pub static nan_buf: [u8*3] = ['N' as u8, 'a' as u8, 'N' as u8]; - /** * Work with the byte buffer of a string. * @@ -1986,7 +1956,6 @@ pub fn as_c_str(s: &str, f: &fn(*libc::c_char) -> T) -> T { } } - /** * Work with the byte buffer and length of a slice. * @@ -2271,7 +2240,7 @@ pub mod traits { #[cfg(test)] pub mod traits {} -pub trait StrSlice { +pub trait StrSlice<'self> { fn all(&self, it: &fn(char) -> bool) -> bool; fn any(&self, it: &fn(char) -> bool) -> bool; fn contains(&self, needle: &'a str) -> bool; @@ -2291,9 +2260,9 @@ pub trait StrSlice { fn len(&self) -> uint; fn char_len(&self) -> uint; fn slice(&self, begin: uint, end: uint) -> &'self str; - fn split(&self, sepfn: &fn(char) -> bool) -> ~[~str]; - fn split_char(&self, sep: char) -> ~[~str]; - fn split_str(&self, sep: &'a str) -> ~[~str]; + fn each_split(&self, sepfn: &fn(char) -> bool, it: &fn(&'self str) -> bool); + fn each_split_char(&self, sep: char, it: &fn(&'self str) -> bool); + fn each_split_str(&self, sep: &'a str, it: &fn(&'self str) -> bool); fn starts_with(&self, needle: &'a str) -> bool; fn substr(&self, begin: uint, n: uint) -> &'self str; fn to_lower(&self) -> ~str; @@ -2314,7 +2283,7 @@ pub trait StrSlice { } /// Extension methods for strings -impl StrSlice for &'self str { +impl StrSlice<'self> for &'self str { /** * Return true if a predicate matches all characters or if the string * contains no characters @@ -2413,20 +2382,24 @@ impl StrSlice for &'self str { } /// Splits a string into substrings using a character function #[inline] - fn split(&self, sepfn: &fn(char) -> bool) -> ~[~str] { - split(*self, sepfn) + fn each_split(&self, sepfn: &fn(char) -> bool, it: &fn(&'self str) -> bool) { + each_split(*self, sepfn, it) } /** * Splits a string into substrings at each occurrence of a given character */ #[inline] - fn split_char(&self, sep: char) -> ~[~str] { split_char(*self, sep) } + fn each_split_char(&self, sep: char, it: &fn(&'self str) -> bool) { + each_split_char(*self, sep, it) + } /** * Splits a string into a vector of the substrings separated by a given * string */ #[inline] - fn split_str(&self, sep: &'a str) -> ~[~str] { split_str(*self, sep) } + fn each_split_str(&self, sep: &'a str, it: &fn(&'self str) -> bool) { + each_split_str(*self, sep, it) + } /// Returns true if one string starts with another #[inline] fn starts_with(&self, needle: &'a str) -> bool { @@ -2519,7 +2492,7 @@ impl OwnedStr for ~str { impl Clone for ~str { #[inline(always)] fn clone(&self) -> ~str { - self.to_str() // hilarious + from_slice(*self) } } @@ -2613,7 +2586,8 @@ mod tests { fn test_split_char() { fn t(s: &str, c: char, u: &[~str]) { debug!(~"split_byte: " + s); - let v = split_char(s, c); + let mut v = ~[]; + for each_split_char(s, c) |s| { v.push(s.to_owned()) } debug!("split_byte to: %?", v); fail_unless!(vec::all2(v, u, |a,b| a == b)); } @@ -2621,28 +2595,31 @@ mod tests { t(~".hello.there", '.', ~[~"", ~"hello", ~"there"]); t(~"...hello.there.", '.', ~[~"", ~"", ~"", ~"hello", ~"there", ~""]); - fail_unless!(~[~"", ~"", ~"", ~"hello", ~"there", ~""] - == split_char(~"...hello.there.", '.')); - - fail_unless!(~[~""] == split_char(~"", 'z')); - fail_unless!(~[~"",~""] == split_char(~"z", 'z')); - fail_unless!(~[~"ok"] == split_char(~"ok", 'z')); + t(~"", 'z', ~[~""]); + t(~"z", 'z', ~[~"",~""]); + t(~"ok", 'z', ~[~"ok"]); } #[test] fn test_split_char_2() { + fn t(s: &str, c: char, u: &[~str]) { + debug!(~"split_byte: " + s); + let mut v = ~[]; + for each_split_char(s, c) |s| { v.push(s.to_owned()) } + debug!("split_byte to: %?", v); + fail_unless!(vec::all2(v, u, |a,b| a == b)); + } let data = ~"ประเทศไทย中华Việt Nam"; - fail_unless!(~[~"ประเทศไทย中华", ~"iệt Nam"] - == split_char(data, 'V')); - fail_unless!(~[~"ประเ", ~"ศไ", ~"ย中华Việt Nam"] - == split_char(data, 'ท')); + t(data, 'V', ~[~"ประเทศไทย中华", ~"iệt Nam"]); + t(data, 'ท', ~[~"ประเ", ~"ศไ", ~"ย中华Việt Nam"]); } #[test] fn test_splitn_char() { fn t(s: &str, c: char, n: uint, u: &[~str]) { debug!(~"splitn_byte: " + s); - let v = splitn_char(s, c, n); + let mut v = ~[]; + for each_splitn_char(s, c, n) |s| { v.push(s.to_owned()) } debug!("split_byte to: %?", v); debug!("comparing vs. %?", u); fail_unless!(vec::all2(v, u, |a,b| a == b)); @@ -2654,46 +2631,56 @@ mod tests { t(~".hello.there", '.', 0u, ~[~".hello.there"]); t(~".hello.there", '.', 1u, ~[~"", ~"hello.there"]); t(~"...hello.there.", '.', 3u, ~[~"", ~"", ~"", ~"hello.there."]); - t(~"...hello.there.", '.', 5u, - ~[~"", ~"", ~"", ~"hello", ~"there", ~""]); + t(~"...hello.there.", '.', 5u, ~[~"", ~"", ~"", ~"hello", ~"there", ~""]); - fail_unless!(~[~""] == splitn_char(~"", 'z', 5u)); - fail_unless!(~[~"",~""] == splitn_char(~"z", 'z', 5u)); - fail_unless!(~[~"ok"] == splitn_char(~"ok", 'z', 5u)); - fail_unless!(~[~"z"] == splitn_char(~"z", 'z', 0u)); - fail_unless!(~[~"w.x.y"] == splitn_char(~"w.x.y", '.', 0u)); - fail_unless!(~[~"w",~"x.y"] == splitn_char(~"w.x.y", '.', 1u)); + t(~"", 'z', 5u, ~[~""]); + t(~"z", 'z', 5u, ~[~"",~""]); + t(~"ok", 'z', 5u, ~[~"ok"]); + t(~"z", 'z', 0u, ~[~"z"]); + t(~"w.x.y", '.', 0u, ~[~"w.x.y"]); + t(~"w.x.y", '.', 1u, ~[~"w",~"x.y"]); } #[test] fn test_splitn_char_2 () { - let data = ~"ประเทศไทย中华Việt Nam"; - fail_unless!(~[~"ประเทศไทย中", ~"Việt Nam"] - == splitn_char(data, '华', 1u)); + fn t(s: &str, c: char, n: uint, u: &[~str]) { + debug!(~"splitn_byte: " + s); + let mut v = ~[]; + for each_splitn_char(s, c, n) |s| { v.push(s.to_owned()) } + debug!("split_byte to: %?", v); + debug!("comparing vs. %?", u); + fail_unless!(vec::all2(v, u, |a,b| a == b)); + } - fail_unless!(~[~"", ~"", ~"XXX", ~"YYYzWWWz"] - == splitn_char(~"zzXXXzYYYzWWWz", 'z', 3u)); - fail_unless!(~[~"",~""] == splitn_char(~"z", 'z', 5u)); - fail_unless!(~[~""] == splitn_char(~"", 'z', 5u)); - fail_unless!(~[~"ok"] == splitn_char(~"ok", 'z', 5u)); + t(~"ประเทศไทย中华Việt Nam", '华', 1u, ~[~"ประเทศไทย中", ~"Việt Nam"]); + t(~"zzXXXzYYYzWWWz", 'z', 3u, ~[~"", ~"", ~"XXX", ~"YYYzWWWz"]); + t(~"z", 'z', 5u, ~[~"",~""]); + t(~"", 'z', 5u, ~[~""]); + t(~"ok", 'z', 5u, ~[~"ok"]); } #[test] fn test_splitn_char_3() { + fn t(s: &str, c: char, n: uint, u: &[~str]) { + debug!(~"splitn_byte: " + s); + let mut v = ~[]; + for each_splitn_char(s, c, n) |s| { v.push(s.to_owned()) } + debug!("split_byte to: %?", v); + debug!("comparing vs. %?", u); + fail_unless!(vec::all2(v, u, |a,b| a == b)); + } let data = ~"ประเทศไทย中华Việt Nam"; - fail_unless!(~[~"ประเทศไทย中华", ~"iệt Nam"] - == splitn_char(data, 'V', 1u)); - fail_unless!(~[~"ประเ", ~"ศไทย中华Việt Nam"] - == splitn_char(data, 'ท', 1u)); - + t(data, 'V', 1u, ~[~"ประเทศไทย中华", ~"iệt Nam"]); + t(data, 'ท', 1u, ~[~"ประเ", ~"ศไทย中华Việt Nam"]); } #[test] fn test_split_char_no_trailing() { - fn t(s: &str, c: char, u: &[~str]) { + fn t(s: &str, c: char, u: &[~str]) { debug!(~"split_byte: " + s); - let v = split_char_no_trailing(s, c); + let mut v = ~[]; + for each_split_char_no_trailing(s, c) |s| { v.push(s.to_owned()) } debug!("split_byte to: %?", v); fail_unless!(vec::all2(v, u, |a,b| a == b)); } @@ -2701,91 +2688,80 @@ mod tests { t(~".hello.there", '.', ~[~"", ~"hello", ~"there"]); t(~"...hello.there.", '.', ~[~"", ~"", ~"", ~"hello", ~"there"]); - fail_unless!(~[~"", ~"", ~"", ~"hello", ~"there"] - == split_char_no_trailing(~"...hello.there.", '.')); - - fail_unless!(~[] == split_char_no_trailing(~"", 'z')); - fail_unless!(~[~""] == split_char_no_trailing(~"z", 'z')); - fail_unless!(~[~"ok"] == split_char_no_trailing(~"ok", 'z')); + t(~"...hello.there.", '.', ~[~"", ~"", ~"", ~"hello", ~"there"]); + t(~"", 'z', ~[]); + t(~"z", 'z', ~[~""]); + t(~"ok", 'z', ~[~"ok"]); } #[test] fn test_split_char_no_trailing_2() { + fn t(s: &str, c: char, u: &[~str]) { + debug!(~"split_byte: " + s); + let mut v = ~[]; + for each_split_char_no_trailing(s, c) |s| { v.push(s.to_owned()) } + debug!("split_byte to: %?", v); + fail_unless!(vec::all2(v, u, |a,b| a == b)); + } let data = ~"ประเทศไทย中华Việt Nam"; - fail_unless!(~[~"ประเทศไทย中华", ~"iệt Nam"] - == split_char_no_trailing(data, 'V')); - fail_unless!(~[~"ประเ", ~"ศไ", ~"ย中华Việt Nam"] - == split_char_no_trailing(data, 'ท')); + t(data, 'V', ~[~"ประเทศไทย中华", ~"iệt Nam"]); + t(data, 'ท', ~[~"ประเ", ~"ศไ", ~"ย中华Việt Nam"]); } #[test] fn test_split_str() { - fn t(s: &str, sep: &'a str, i: int, k: &str) { - fn borrow(x: &'a str) -> &'a str { x } - let v = split_str(s, sep); - fail_unless!(borrow(v[i]) == k); + fn t(s: &str, sep: &'a str, u: &[~str]) { + let mut v = ~[]; + for each_split_str(s, sep) |s| { v.push(s.to_owned()) } + fail_unless!(vec::all2(v, u, |a,b| a == b)); } - - t(~"--1233345--", ~"12345", 0, ~"--1233345--"); - t(~"abc::hello::there", ~"::", 0, ~"abc"); - t(~"abc::hello::there", ~"::", 1, ~"hello"); - t(~"abc::hello::there", ~"::", 2, ~"there"); - t(~"::hello::there", ~"::", 0, ~""); - t(~"hello::there::", ~"::", 2, ~""); - t(~"::hello::there::", ~"::", 3, ~""); - - let data = ~"ประเทศไทย中华Việt Nam"; - fail_unless!(~[~"ประเทศไทย", ~"Việt Nam"] - == split_str (data, ~"中华")); - - fail_unless!(~[~"", ~"XXX", ~"YYY", ~""] - == split_str(~"zzXXXzzYYYzz", ~"zz")); - - fail_unless!(~[~"zz", ~"zYYYz"] == split_str(~"zzXXXzYYYz", ~"XXX")); - - - fail_unless!(~[~"", ~"XXX", ~"YYY", ~""] == - split_str(~".XXX.YYY.", ~".")); - fail_unless!(~[~""] == split_str(~"", ~".")); - fail_unless!(~[~"",~""] == split_str(~"zz", ~"zz")); - fail_unless!(~[~"ok"] == split_str(~"ok", ~"z")); - fail_unless!(~[~"",~"z"] == split_str(~"zzz", ~"zz")); - fail_unless!(~[~"",~"",~"z"] == split_str(~"zzzzz", ~"zz")); + t(~"--1233345--", ~"12345", ~[~"--1233345--"]); + t(~"abc::hello::there", ~"::", ~[~"abc", ~"hello", ~"there"]); + t(~"::hello::there", ~"::", ~[~"", ~"hello", ~"there"]); + t(~"hello::there::", ~"::", ~[~"hello", ~"there", ~""]); + t(~"::hello::there::", ~"::", ~[~"", ~"hello", ~"there", ~""]); + t(~"ประเทศไทย中华Việt Nam", ~"中华", ~[~"ประเทศไทย", ~"Việt Nam"]); + t(~"zzXXXzzYYYzz", ~"zz", ~[~"", ~"XXX", ~"YYY", ~""]); + t(~"zzXXXzYYYz", ~"XXX", ~[~"zz", ~"zYYYz"]); + t(~".XXX.YYY.", ~".", ~[~"", ~"XXX", ~"YYY", ~""]); + t(~"", ~".", ~[~""]); + t(~"zz", ~"zz", ~[~"",~""]); + t(~"ok", ~"z", ~[~"ok"]); + t(~"zzz", ~"zz", ~[~"",~"z"]); + t(~"zzzzz", ~"zz", ~[~"",~"",~"z"]); } #[test] fn test_split() { - let data = ~"ประเทศไทย中华Việt Nam"; - fail_unless!(~[~"ประเทศไทย中", ~"Việt Nam"] - == split (data, |cc| cc == '华')); - - fail_unless!(~[~"", ~"", ~"XXX", ~"YYY", ~""] - == split(~"zzXXXzYYYz", char::is_lowercase)); - - fail_unless!(~[~"zz", ~"", ~"", ~"z", ~"", ~"", ~"z"] - == split(~"zzXXXzYYYz", char::is_uppercase)); + fn t(s: &str, sepf: &fn(char) -> bool, u: &[~str]) { + let mut v = ~[]; + for each_split(s, sepf) |s| { v.push(s.to_owned()) } + fail_unless!(vec::all2(v, u, |a,b| a == b)); + } - fail_unless!(~[~"",~""] == split(~"z", |cc| cc == 'z')); - fail_unless!(~[~""] == split(~"", |cc| cc == 'z')); - fail_unless!(~[~"ok"] == split(~"ok", |cc| cc == 'z')); + t(~"ประเทศไทย中华Việt Nam", |cc| cc == '华', ~[~"ประเทศไทย中", ~"Việt Nam"]); + t(~"zzXXXzYYYz", char::is_lowercase, ~[~"", ~"", ~"XXX", ~"YYY", ~""]); + t(~"zzXXXzYYYz", char::is_uppercase, ~[~"zz", ~"", ~"", ~"z", ~"", ~"", ~"z"]); + t(~"z", |cc| cc == 'z', ~[~"",~""]); + t(~"", |cc| cc == 'z', ~[~""]); + t(~"ok", |cc| cc == 'z', ~[~"ok"]); } #[test] fn test_split_no_trailing() { - let data = ~"ประเทศไทย中华Việt Nam"; - fail_unless!(~[~"ประเทศไทย中", ~"Việt Nam"] - == split_no_trailing (data, |cc| cc == '华')); - - fail_unless!(~[~"", ~"", ~"XXX", ~"YYY"] - == split_no_trailing(~"zzXXXzYYYz", char::is_lowercase)); - - fail_unless!(~[~"zz", ~"", ~"", ~"z", ~"", ~"", ~"z"] - == split_no_trailing(~"zzXXXzYYYz", char::is_uppercase)); + fn t(s: &str, sepf: &fn(char) -> bool, u: &[~str]) { + let mut v = ~[]; + for each_split_no_trailing(s, sepf) |s| { v.push(s.to_owned()) } + fail_unless!(vec::all2(v, u, |a,b| a == b)); + } - fail_unless!(~[~""] == split_no_trailing(~"z", |cc| cc == 'z')); - fail_unless!(~[] == split_no_trailing(~"", |cc| cc == 'z')); - fail_unless!(~[~"ok"] == split_no_trailing(~"ok", |cc| cc == 'z')); + t(~"ประเทศไทย中华Việt Nam", |cc| cc == '华', ~[~"ประเทศไทย中", ~"Việt Nam"]); + t(~"zzXXXzYYYz", char::is_lowercase, ~[~"", ~"", ~"XXX", ~"YYY"]); + t(~"zzXXXzYYYz", char::is_uppercase, ~[~"zz", ~"", ~"", ~"z", ~"", ~"", ~"z"]); + t(~"z", |cc| cc == 'z', ~[~""]); + t(~"", |cc| cc == 'z', ~[]); + t(~"ok", |cc| cc == 'z', ~[~"ok"]); } #[test] @@ -2793,49 +2769,50 @@ mod tests { let lf = ~"\nMary had a little lamb\nLittle lamb\n"; let crlf = ~"\r\nMary had a little lamb\r\nLittle lamb\r\n"; - fail_unless!(~[~"", ~"Mary had a little lamb", ~"Little lamb"] - == lines(lf)); - - fail_unless!(~[~"", ~"Mary had a little lamb", ~"Little lamb"] - == lines_any(lf)); - - fail_unless!(~[~"\r", ~"Mary had a little lamb\r", - ~"Little lamb\r"] - == lines(crlf)); - - fail_unless!(~[~"", ~"Mary had a little lamb", ~"Little lamb"] - == lines_any(crlf)); + fn t(s: &str, f: &fn(&str, &fn(&str) -> bool), u: &[~str]) { + let mut v = ~[]; + for f(s) |s| { v.push(s.to_owned()) } + fail_unless!(vec::all2(v, u, |a,b| a == b)); + } - fail_unless!(~[] == lines (~"")); - fail_unless!(~[] == lines_any(~"")); - fail_unless!(~[~""] == lines (~"\n")); - fail_unless!(~[~""] == lines_any(~"\n")); - fail_unless!(~[~"banana"] == lines (~"banana")); - fail_unless!(~[~"banana"] == lines_any(~"banana")); + t(lf, each_line ,~[~"", ~"Mary had a little lamb", ~"Little lamb"]); + t(lf, each_line_any, ~[~"", ~"Mary had a little lamb", ~"Little lamb"]); + t(crlf, each_line, ~[~"\r", ~"Mary had a little lamb\r", ~"Little lamb\r"]); + t(crlf, each_line_any, ~[~"", ~"Mary had a little lamb", ~"Little lamb"]); + t(~"", each_line, ~[]); + t(~"", each_line_any, ~[]); + t(~"\n", each_line, ~[~""]); + t(~"\n", each_line_any, ~[~""]); + t(~"banana", each_line, ~[~"banana"]); + t(~"banana", each_line_any, ~[~"banana"]); } #[test] fn test_words () { + fn t(s: &str, f: &fn(&str, &fn(&str) -> bool), u: &[~str]) { + let mut v = ~[]; + for f(s) |s| { v.push(s.to_owned()) } + fail_unless!(vec::all2(v, u, |a,b| a == b)); + } let data = ~"\nMary had a little lamb\nLittle lamb\n"; - fail_unless!(~[ - ~"Mary",~"had",~"a",~"little",~"lamb",~"Little",~"lamb"] - == words(data)); - fail_unless!(~[~"ok"] == words(~"ok")); - fail_unless!(~[] == words(~"")); + t(data, each_word, ~[~"Mary",~"had",~"a",~"little",~"lamb",~"Little",~"lamb"]); + t(~"ok", each_word, ~[~"ok"]); + t(~"", each_word, ~[]); } #[test] fn test_split_within() { - fail_unless!(split_within(~"", 0) == ~[]); - fail_unless!(split_within(~"", 15) == ~[]); - fail_unless!(split_within(~"hello", 15) == ~[~"hello"]); - - let data = ~"\nMary had a little lamb\nLittle lamb\n"; - error!("~~~~ %?", split_within(data, 15)); - fail_unless!(split_within(data, 15) == ~[~"Mary had a", - ~"little lamb", - ~"Little lamb"]); + fn t(s: &str, i: uint, u: &[~str]) { + let mut v = ~[]; + for each_split_within(s, i) |s| { v.push(s.to_owned()) } + fail_unless!(vec::all2(v, u, |a,b| a == b)); + } + t(~"", 0, ~[]); + t(~"", 15, ~[]); + t(~"hello", 15, ~[~"hello"]); + t(~"\nMary had a little lamb\nLittle lamb\n", 15, + ~[~"Mary had a", ~"little lamb", ~"Little lamb"]); } #[test] @@ -3365,7 +3342,7 @@ mod tests { let mut ii = 0; - for split_char_each(data, ' ') |xx| { + for each_split_char(data, ' ') |xx| { match ii { 0 => fail_unless!("\nMary" == xx), 1 => fail_unless!("had" == xx), @@ -3383,7 +3360,7 @@ mod tests { let mut ii = 0; - for splitn_char_each(data, ' ', 2u) |xx| { + for each_splitn_char(data, ' ', 2u) |xx| { match ii { 0 => fail_unless!("\nMary" == xx), 1 => fail_unless!("had" == xx), @@ -3400,7 +3377,7 @@ mod tests { let mut ii = 0; - for words_each(data) |ww| { + for each_word(data) |ww| { match ii { 0 => fail_unless!("Mary" == ww), 1 => fail_unless!("had" == ww), @@ -3411,7 +3388,7 @@ mod tests { ii += 1; } - words_each(~"", |_x| fail!()); // should not fail + each_word(~"", |_x| fail!()); // should not fail } #[test] @@ -3420,7 +3397,7 @@ mod tests { let mut ii = 0; - for lines_each(lf) |x| { + for each_line(lf) |x| { match ii { 0 => fail_unless!("" == x), 1 => fail_unless!("Mary had a little lamb" == x), @@ -3464,7 +3441,7 @@ mod tests { let ss = ~"ศไทย中华Việt Nam"; fail_unless!(~['ศ','ไ','ท','ย','中','华','V','i','ệ','t',' ','N','a', 'm'] - == chars(ss)); + == to_chars(ss)); } #[test] diff --git a/src/librust/rust.rc b/src/librust/rust.rc index e590586abbb39..ffd7669c2d2ee 100644 --- a/src/librust/rust.rc +++ b/src/librust/rust.rc @@ -9,7 +9,7 @@ // except according to those terms. // rust - central access to other rust tools -// FIXME #2238 Make commands run and test emit proper file endings on winds +// FIXME #2238 Make commands run and test emit proper file endings on windows // FIXME #2238 Make run only accept source that emits an executable #[link(name = "rust", @@ -29,10 +29,12 @@ enum ValidUsage { } impl ValidUsage { - fn is_valid(&self) -> bool { match *self { - Valid => true, - Invalid => false - }} + fn is_valid(&self) -> bool { + match *self { + Valid => true, + Invalid => false + } + } } enum Action { @@ -128,7 +130,9 @@ fn cmd_help(args: &[~str]) -> ValidUsage { match command.usage_full { UsgStr(msg) => io::println(fmt!("%s\n", msg)), UsgExec(commandline) => { - let words = str::words(commandline); + let mut words = ~[]; + for str::each_word(commandline) |word| { words.push(word.to_owned()) } + let words = words; let (prog, args) = (words.head(), words.tail()); run::run_program(*prog, args); } @@ -184,7 +188,9 @@ fn do_command(command: &Command, args: &[~str]) -> ValidUsage { match command.action { Call(f) => f(args), Exec(commandline) => { - let words = str::words(commandline); + let mut words = ~[]; + for str::each_word(commandline) |word| { words.push(word.to_owned()) } + let words = words; let (prog, prog_args) = (words.head(), words.tail()); let exitstatus = run::run_program( *prog, diff --git a/src/librustc/metadata/cstore.rs b/src/librustc/metadata/cstore.rs index 581ad5336de0f..018a365f37f52 100644 --- a/src/librustc/metadata/cstore.rs +++ b/src/librustc/metadata/cstore.rs @@ -120,7 +120,9 @@ pub fn get_used_libraries(cstore: @mut CStore) -> ~[~str] { } pub fn add_used_link_args(cstore: @mut CStore, args: &str) { - cstore.used_link_args.push_all(args.split_char(' ')); + for args.each_split_char(' ') |s| { + cstore.used_link_args.push(s.to_owned()); + } } pub fn get_used_link_args(cstore: @mut CStore) -> ~[~str] { diff --git a/src/librustc/middle/resolve.rs b/src/librustc/middle/resolve.rs index 00883b28b0493..079110e67f511 100644 --- a/src/librustc/middle/resolve.rs +++ b/src/librustc/middle/resolve.rs @@ -76,7 +76,7 @@ use syntax::visit::{visit_mod, visit_ty, vt}; use syntax::opt_vec::OptVec; use core::option::{Some, get, is_some, is_none}; -use core::str::{connect, split_str}; +use core::str::{connect, each_split_str}; use core::hashmap::linear::LinearMap; use std::oldmap::HashMap; @@ -1696,7 +1696,8 @@ pub impl Resolver { entry: %s (%?)", path_string, def_like); - let mut pieces = split_str(path_string, ~"::"); + let mut pieces = ~[]; + for each_split_str(path_string, "::") |s| { pieces.push(s.to_owned()) } let final_ident_str = pieces.pop(); let final_ident = self.session.ident_of(final_ident_str); diff --git a/src/librustdoc/desc_to_brief_pass.rs b/src/librustdoc/desc_to_brief_pass.rs index 957b94d18f532..012a56c5b720a 100644 --- a/src/librustdoc/desc_to_brief_pass.rs +++ b/src/librustdoc/desc_to_brief_pass.rs @@ -183,7 +183,8 @@ fn first_sentence_(s: &str) -> ~str { } fn paragraphs(s: &str) -> ~[~str] { - let lines = str::lines_any(s); + let mut lines = ~[]; + for str::each_line_any(s) |line| { lines.push(line.to_owned()); } let mut whitespace_lines = 0; let mut accum = ~""; let paras = do vec::foldl(~[], lines) |paras, line| { diff --git a/src/librustdoc/markdown_pass.rs b/src/librustdoc/markdown_pass.rs index 73f3aa53c250d..1e39373d9a5be 100644 --- a/src/librustdoc/markdown_pass.rs +++ b/src/librustdoc/markdown_pass.rs @@ -534,9 +534,11 @@ fn write_sig(ctxt: &Ctxt, sig: Option<~str>) { } fn code_block_indent(s: ~str) -> ~str { - let lines = str::lines_any(s); - let indented = vec::map(lines, |line| fmt!(" %s", *line) ); - str::connect(indented, ~"\n") + let mut indented = ~[]; + for str::each_line_any(s) |line| { + indented.push(fmt!(" %s", line)); + } + str::connect(indented, "\n") } #[test] diff --git a/src/librustdoc/sectionalize_pass.rs b/src/librustdoc/sectionalize_pass.rs index 8b058048ff4ac..33003a59611d4 100644 --- a/src/librustdoc/sectionalize_pass.rs +++ b/src/librustdoc/sectionalize_pass.rs @@ -104,8 +104,8 @@ fn sectionalize(desc: Option<~str>) -> (Option<~str>, ~[doc::Section]) { if desc.is_none() { return (None, ~[]); } - - let lines = str::lines((copy desc).get()); + let mut lines = ~[]; + for str::each_line_any(*desc.get_ref()) |line| { lines.push(line.to_owned()); } let mut new_desc = None::<~str>; let mut current_section = None; diff --git a/src/librustdoc/unindent_pass.rs b/src/librustdoc/unindent_pass.rs index ecd72950468e1..6207e2252e40b 100644 --- a/src/librustdoc/unindent_pass.rs +++ b/src/librustdoc/unindent_pass.rs @@ -33,7 +33,8 @@ pub fn mk_pass() -> Pass { } fn unindent(s: &str) -> ~str { - let lines = str::lines_any(s); + let mut lines = ~[]; + for str::each_line_any(s) |line| { lines.push(line.to_owned()); } let mut saw_first_line = false; let mut saw_second_line = false; let min_indent = do vec::foldl(uint::max_value, lines) diff --git a/src/librusti/rusti.rc b/src/librusti/rusti.rc index e04cc9e389840..ddde66157bb44 100644 --- a/src/librusti/rusti.rc +++ b/src/librusti/rusti.rc @@ -337,7 +337,8 @@ fn run_line(repl: &mut Repl, in: @io::Reader, out: @io::Writer, line: ~str) -> Option { if line.starts_with(~":") { let full = line.substr(1, line.len() - 1); - let split = str::words(full); + let mut split = ~[]; + for str::each_word(full) |word| { split.push(word.to_owned()) } let len = split.len(); if len > 0 { diff --git a/src/librustpkg/rustpkg.rc b/src/librustpkg/rustpkg.rc index 2032969fbca6c..35698bb235a38 100644 --- a/src/librustpkg/rustpkg.rc +++ b/src/librustpkg/rustpkg.rc @@ -270,14 +270,11 @@ impl Ctx { fn sep_name_vers(in: ~str) -> (Option<~str>, Option<~str>) { let mut name = None; let mut vers = None; - let parts = str::split_char(in, '@'); - if parts.len() >= 1 { - name = Some(parts[0]); - - if parts.len() >= 2 { - vers = Some(parts[1]); - } + for str::each_split_char(in, '@') |s| { + if name.is_none() { name = Some(s.to_owned()); } + else if vers.is_none() { vers = Some(s.to_owned()); } + else { break; } } (name, vers) @@ -733,8 +730,12 @@ impl Ctx { for package.bins.each |&bin| { let path = Path(bin); - let name = str::split_char(path.file_path().to_str(), '-')[0]; - let out = bin_dir.push(name); + let mut name = None; + for str::each_split_char(path.file_path().to_str(), '-') |s| { + name = Some(s.to_owned()); + break; + } + let out = bin_dir.push(name.unwrap()); util::link_exe(&path, &out); util::note(fmt!("linked %s", out.to_str())); @@ -847,8 +848,12 @@ impl Ctx { for package.bins.each |&bin| { let path = Path(bin); - let name = str::split_char(path.file_path().to_str(), '-')[0]; - let out = bin_dir.push(name); + let mut name = None; + for str::each_split_char(path.file_path().to_str(), '-') |s| { + name = Some(s.to_owned()); + break; + } + let out = bin_dir.push(name.unwrap()); if os::path_exists(&out) { if os::remove_file(&out) { diff --git a/src/librustpkg/util.rs b/src/librustpkg/util.rs index 8e5d7e95ae17a..58a286a1854ee 100644 --- a/src/librustpkg/util.rs +++ b/src/librustpkg/util.rs @@ -44,10 +44,10 @@ pub fn is_cmd(cmd: ~str) -> bool { } pub fn parse_name(id: ~str) -> result::Result<~str, ~str> { - let parts = str::split_char(id, '.'); + let mut last_part = None; - for parts.each |&part| { - for str::chars(part).each |&char| { + for str::each_split_char(id, '.') |part| { + for str::each_char(part) |char| { if char::is_whitespace(char) { return result::Err( ~"could not parse id: contains whitespace"); @@ -56,9 +56,11 @@ pub fn parse_name(id: ~str) -> result::Result<~str, ~str> { ~"could not parse id: should be all lowercase"); } } + last_part = Some(part.to_owned()); } + if last_part.is_none() { return result::Err(~"could not parse id: is empty"); } - result::Ok(copy *parts.last()) + result::Ok(last_part.unwrap()) } struct ListenerFn { diff --git a/src/libstd/base64.rs b/src/libstd/base64.rs index ff026324404ab..02858de9b347f 100644 --- a/src/libstd/base64.rs +++ b/src/libstd/base64.rs @@ -16,12 +16,16 @@ pub trait ToBase64 { fn to_base64(&self) -> ~str; } +static CHARS: [char * 64] = [ + 'A', 'B', 'C', 'D', 'E', 'F', 'G', 'H', 'I', 'J', 'K', 'L', 'M', + 'N', 'O', 'P', 'Q', 'R', 'S', 'T', 'U', 'V', 'W', 'X', 'Y', 'Z', + 'a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i', 'j', 'k', 'l', 'm', + 'n', 'o', 'p', 'q', 'r', 's', 't', 'u', 'v', 'w', 'x', 'y', 'z', + '0', '1', '2', '3', '4', '5', '6', '7', '8', '9', '+', '/' +]; + impl ToBase64 for &'self [u8] { fn to_base64(&self) -> ~str { - let chars = str::chars( - ~"ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/" - ); - let mut s = ~""; unsafe { let len = self.len(); @@ -35,10 +39,10 @@ impl ToBase64 for &'self [u8] { (self[i + 2u] as uint); // This 24-bit number gets separated into four 6-bit numbers. - str::push_char(&mut s, chars[(n >> 18u) & 63u]); - str::push_char(&mut s, chars[(n >> 12u) & 63u]); - str::push_char(&mut s, chars[(n >> 6u) & 63u]); - str::push_char(&mut s, chars[n & 63u]); + str::push_char(&mut s, CHARS[(n >> 18u) & 63u]); + str::push_char(&mut s, CHARS[(n >> 12u) & 63u]); + str::push_char(&mut s, CHARS[(n >> 6u) & 63u]); + str::push_char(&mut s, CHARS[n & 63u]); i += 3u; } @@ -49,17 +53,17 @@ impl ToBase64 for &'self [u8] { 0 => (), 1 => { let n = (self[i] as uint) << 16u; - str::push_char(&mut s, chars[(n >> 18u) & 63u]); - str::push_char(&mut s, chars[(n >> 12u) & 63u]); + str::push_char(&mut s, CHARS[(n >> 18u) & 63u]); + str::push_char(&mut s, CHARS[(n >> 12u) & 63u]); str::push_char(&mut s, '='); str::push_char(&mut s, '='); } 2 => { let n = (self[i] as uint) << 16u | (self[i + 1u] as uint) << 8u; - str::push_char(&mut s, chars[(n >> 18u) & 63u]); - str::push_char(&mut s, chars[(n >> 12u) & 63u]); - str::push_char(&mut s, chars[(n >> 6u) & 63u]); + str::push_char(&mut s, CHARS[(n >> 18u) & 63u]); + str::push_char(&mut s, CHARS[(n >> 12u) & 63u]); + str::push_char(&mut s, CHARS[(n >> 6u) & 63u]); str::push_char(&mut s, '='); } _ => fail!(~"Algebra is broken, please alert the math police") diff --git a/src/libstd/getopts.rs b/src/libstd/getopts.rs index de8a8f343816c..ae783fb9b697d 100644 --- a/src/libstd/getopts.rs +++ b/src/libstd/getopts.rs @@ -244,7 +244,8 @@ pub fn getopts(args: &[~str], opts: &[Opt]) -> Result { let mut i_arg = None; if cur[1] == '-' as u8 { let tail = str::slice(cur, 2, curlen).to_owned(); - let tail_eq = str::splitn_char(tail, '=', 1); + let mut tail_eq = ~[]; + for str::each_splitn_char(tail, '=', 1) |s| { tail_eq.push(s.to_owned()) } if tail_eq.len() <= 1 { names = ~[Long(tail)]; } else { @@ -601,7 +602,7 @@ pub mod groups { row += match short_name.len() { 0 => ~"", 1 => ~"-" + short_name + " ", - _ => fail!(~"the short name should only be 1 char long"), + _ => fail!(~"the short name should only be 1 ascii char long"), }; // long option @@ -617,6 +618,7 @@ pub mod groups { Maybe => ~"[" + hint + ~"]", }; + // FIXME: #5516 // here we just need to indent the start of the description let rowlen = row.len(); row += if rowlen < 24 { @@ -625,8 +627,22 @@ pub mod groups { desc_sep }; + // Normalize desc to contain words seperated by one space character + let mut desc_normalized_whitespace = ~""; + for str::each_word(desc) |word| { + desc_normalized_whitespace.push_str(word); + desc_normalized_whitespace.push_char(' '); + } + + // FIXME: #5516 + let mut desc_rows = ~[]; + for str::each_split_within(desc_normalized_whitespace, 54) |substr| { + desc_rows.push(substr.to_owned()); + } + + // FIXME: #5516 // wrapped description - row += str::connect(str::split_within(desc, 54), desc_sep); + row += str::connect(desc_rows, desc_sep); row }); diff --git a/src/libstd/json.rs b/src/libstd/json.rs index a9b9b2977cded..f39e406bc0060 100644 --- a/src/libstd/json.rs +++ b/src/libstd/json.rs @@ -806,7 +806,8 @@ impl serialize::Decoder for Decoder<'self> { } fn read_char(&self) -> char { - let v = str::chars(self.read_owned_str()); + let mut v = ~[]; + for str::each_char(self.read_owned_str()) |c| { v.push(c) } if v.len() != 1 { fail!(~"string must have one character") } v[0] } diff --git a/src/libstd/net_ip.rs b/src/libstd/net_ip.rs index 15593571b43ca..4d82d35cc3206 100644 --- a/src/libstd/net_ip.rs +++ b/src/libstd/net_ip.rs @@ -197,7 +197,9 @@ pub mod v4 { } } pub fn parse_to_ipv4_rep(ip: &str) -> result::Result { - let parts = vec::map(str::split_char(ip, '.'), |s| { + let mut parts = ~[]; + for str::each_split_char(ip, '.') |s| { parts.push(s.to_owned()) } + let parts = vec::map(parts, |s| { match uint::from_str(*s) { Some(n) if n <= 255 => n, _ => 256 diff --git a/src/libstd/net_url.rs b/src/libstd/net_url.rs index 21b60584635fc..9caab11d6434e 100644 --- a/src/libstd/net_url.rs +++ b/src/libstd/net_url.rs @@ -344,8 +344,8 @@ fn userinfo_to_str(userinfo: &UserInfo) -> ~str { fn query_from_str(rawquery: &str) -> Query { let mut query: Query = ~[]; if str::len(rawquery) != 0 { - for str::split_char(rawquery, '&').each |p| { - let (k, v) = split_char_first(*p, '='); + for str::each_split_char(rawquery, '&') |p| { + let (k, v) = split_char_first(p, '='); // FIXME(#3722): unsafe only because decode_inner does (string) IO unsafe {query.push((decode_component(k), decode_component(v)));} }; diff --git a/src/libsyntax/parse/comments.rs b/src/libsyntax/parse/comments.rs index 3f8a5588c7163..1b6b25db38ad6 100644 --- a/src/libsyntax/parse/comments.rs +++ b/src/libsyntax/parse/comments.rs @@ -99,7 +99,8 @@ pub fn strip_doc_comment_decoration(comment: &str) -> ~str { } return do lines.map |line| { - let chars = str::chars(*line); + let mut chars = ~[]; + for str::each_char(*line) |c| { chars.push(c) } if i > chars.len() { ~"" } else { @@ -116,7 +117,10 @@ pub fn strip_doc_comment_decoration(comment: &str) -> ~str { } if comment.starts_with(~"/*") { - let lines = str::lines_any(comment.slice(3u, comment.len() - 2u).to_owned()); + let mut lines = ~[]; + for str::each_line_any(comment.slice(3u, comment.len() - 2u)) |line| { + lines.push(line.to_owned()) + } let lines = vertical_trim(lines); let lines = block_trim(lines, ~"\t ", None); let lines = block_trim(lines, ~"*", Some(1u)); diff --git a/src/test/bench/sudoku.rs b/src/test/bench/sudoku.rs index 4964cea28ad91..dcc5fe5875522 100644 --- a/src/test/bench/sudoku.rs +++ b/src/test/bench/sudoku.rs @@ -68,7 +68,8 @@ pub impl Sudoku { let mut g = vec::from_fn(10u, { |_i| ~[0u8, 0u8, 0u8, 0u8, 0u8, 0u8, 0u8, 0u8, 0u8] }); while !reader.eof() { let line = reader.read_line(); - let comps = str::split_char(line.trim(), ','); + let mut comps = ~[]; + for str::each_split_char(line.trim(), ',') |s| { comps.push(s.to_owned()) } if vec::len(comps) == 3u { let row = uint::from_str(comps[0]).get() as u8; let col = uint::from_str(comps[1]).get() as u8; diff --git a/src/test/run-pass/utf8_chars.rs b/src/test/run-pass/utf8_chars.rs index cfbb73981592f..247fd2d712a16 100644 --- a/src/test/run-pass/utf8_chars.rs +++ b/src/test/run-pass/utf8_chars.rs @@ -17,8 +17,8 @@ pub fn main() { fail_unless!((str::len(s) == 10u)); fail_unless!((str::char_len(s) == 4u)); - fail_unless!((vec::len(str::chars(s)) == 4u)); - fail_unless!((str::from_chars(str::chars(s)) == s)); + fail_unless!((vec::len(str::to_chars(s)) == 4u)); + fail_unless!((str::from_chars(str::to_chars(s)) == s)); fail_unless!((str::char_at(s, 0u) == 'e')); fail_unless!((str::char_at(s, 1u) == 'é'));