diff --git a/src/cargo/cargo.rs b/src/cargo/cargo.rs index b8bea7967c93e..57a63052ee80c 100644 --- a/src/cargo/cargo.rs +++ b/src/cargo/cargo.rs @@ -174,10 +174,10 @@ fn print(s: str) { } fn rest(s: str, start: uint) -> str { - if (start >= str::char_len(s)) { + if (start >= str::len(s)) { "" } else { - str::slice(s, start, str::char_len(s)) + str::slice(s, start, str::len(s)) } } @@ -654,7 +654,7 @@ fn cmd_install(c: cargo) unsafe { alt str::index(uuid, '/') { option::some(idx) { let source = str::slice(uuid, 0u, idx); - uuid = str::slice(uuid, idx + 1u, str::char_len(uuid)); + uuid = str::slice(uuid, idx + 1u, str::len(uuid)); install_uuid_specific(c, wd, source, uuid); } option::none { @@ -666,7 +666,7 @@ fn cmd_install(c: cargo) unsafe { alt str::index(name, '/') { option::some(idx) { let source = str::slice(name, 0u, idx); - name = str::slice(name, idx + 1u, str::char_len(name)); + name = str::slice(name, idx + 1u, str::len(name)); install_named_specific(c, wd, source, name); } option::none { diff --git a/src/comp/back/link.rs b/src/comp/back/link.rs index 5fa7841c97246..adb9f31f13ecf 100644 --- a/src/comp/back/link.rs +++ b/src/comp/back/link.rs @@ -396,7 +396,7 @@ fn build_link_meta(sess: session, c: ast::crate, output: str, metas: provided_metas, dep_hashes: [str]) -> str { fn len_and_str(s: str) -> str { - ret #fmt["%u_%s", str::byte_len(s), s]; + ret #fmt["%u_%s", str::len_bytes(s), s]; } fn len_and_str_lit(l: ast::lit) -> str { @@ -521,7 +521,7 @@ fn mangle(ss: path) -> str { for s in ss { alt s { path_name(s) | path_mod(s) { - n += #fmt["%u%s", str::byte_len(s), s]; + n += #fmt["%u%s", str::len_bytes(s), s]; } } } n += "E"; // End name-sequence. @@ -573,7 +573,7 @@ fn link_binary(sess: session, config.os == session::os_freebsd) && str::find(filename, "lib") == 0 { ret str::unsafe::slice_bytes(filename, 3u, - str::byte_len(filename)); + str::len_bytes(filename)); } else { ret filename; } }; fn rmext(filename: str) -> str { diff --git a/src/comp/driver/diagnostic.rs b/src/comp/driver/diagnostic.rs index 427bac77197d5..556d3c74ef3b3 100644 --- a/src/comp/driver/diagnostic.rs +++ b/src/comp/driver/diagnostic.rs @@ -210,7 +210,7 @@ fn highlight_lines(cm: codemap::codemap, sp: span, if elided { let last_line = display_lines[vec::len(display_lines) - 1u]; let s = #fmt["%s:%u ", fm.name, last_line + 1u]; - let indent = str::char_len(s); + let indent = str::len(s); let out = ""; while indent > 0u { out += " "; indent -= 1u; } out += "...\n"; @@ -228,7 +228,7 @@ fn highlight_lines(cm: codemap::codemap, sp: span, while num > 0u { num /= 10u; digits += 1u; } // indent past |name:## | and the 0-offset column location - let left = str::char_len(fm.name) + digits + lo.col + 3u; + let left = str::len(fm.name) + digits + lo.col + 3u; let s = ""; while left > 0u { str::push_char(s, ' '); left -= 1u; } diff --git a/src/comp/driver/rustc.rs b/src/comp/driver/rustc.rs index dfed52b801351..0239f3921de74 100644 --- a/src/comp/driver/rustc.rs +++ b/src/comp/driver/rustc.rs @@ -13,7 +13,7 @@ import rustc::driver::diagnostic; fn version(argv0: str) { let vers = "unknown version"; let env_vers = #env["CFG_VERSION"]; - if str::byte_len(env_vers) != 0u { vers = env_vers; } + if str::len_bytes(env_vers) != 0u { vers = env_vers; } io::stdout().write_str(#fmt["%s %s\n", argv0, vers]); io::stdout().write_str(#fmt["host: %s\n", host_triple()]); } diff --git a/src/comp/middle/debuginfo.rs b/src/comp/middle/debuginfo.rs index 3419b6379e30c..79af3f05d5f51 100644 --- a/src/comp/middle/debuginfo.rs +++ b/src/comp/middle/debuginfo.rs @@ -48,7 +48,7 @@ const DW_ATE_unsigned_char: int = 0x08; fn llstr(s: str) -> ValueRef { str::as_buf(s, {|sbuf| - llvm::LLVMMDString(sbuf, str::byte_len(s) as ctypes::c_uint) + llvm::LLVMMDString(sbuf, str::len_bytes(s) as ctypes::c_uint) }) } fn lltag(lltag: int) -> ValueRef { @@ -167,8 +167,8 @@ fn create_compile_unit(cx: @crate_ctxt, full_path: str) let work_dir = cx.sess.working_dir; let file_path = if str::starts_with(full_path, work_dir) { - str::unsafe::slice_bytes(full_path, str::byte_len(work_dir), - str::byte_len(full_path)) + str::unsafe::slice_bytes(full_path, str::len_bytes(work_dir), + str::len_bytes(full_path)) } else { full_path }; diff --git a/src/comp/middle/trans/common.rs b/src/comp/middle/trans/common.rs index 9f1b7ef7dbd3c..c8965bd3f60e5 100644 --- a/src/comp/middle/trans/common.rs +++ b/src/comp/middle/trans/common.rs @@ -767,7 +767,7 @@ fn C_u8(i: uint) -> ValueRef { ret C_integral(T_i8(), i as u64, False); } // our boxed-and-length-annotated strings. fn C_cstr(cx: @crate_ctxt, s: str) -> ValueRef { let sc = str::as_buf(s) {|buf| - llvm::LLVMConstString(buf, str::byte_len(s) as unsigned, False) + llvm::LLVMConstString(buf, str::len_bytes(s) as unsigned, False) }; let g = str::as_buf(cx.names("str"), @@ -781,7 +781,7 @@ fn C_cstr(cx: @crate_ctxt, s: str) -> ValueRef { // Returns a Plain Old LLVM String: fn C_postr(s: str) -> ValueRef { ret str::as_buf(s) {|buf| - llvm::LLVMConstString(buf, str::byte_len(s) as unsigned, False) + llvm::LLVMConstString(buf, str::len_bytes(s) as unsigned, False) }; } diff --git a/src/comp/middle/trans/tvec.rs b/src/comp/middle/trans/tvec.rs index 5b6a20e3ce19c..a3700ca423e49 100644 --- a/src/comp/middle/trans/tvec.rs +++ b/src/comp/middle/trans/tvec.rs @@ -130,7 +130,7 @@ fn trans_vec(bcx: @block_ctxt, args: [@ast::expr], id: ast::node_id, } fn trans_str(bcx: @block_ctxt, s: str, dest: dest) -> @block_ctxt { - let veclen = str::byte_len(s) + 1u; // +1 for \0 + let veclen = str::len_bytes(s) + 1u; // +1 for \0 let {bcx: bcx, val: sptr, _} = alloc(bcx, ty::mk_str(bcx_tcx(bcx)), veclen); diff --git a/src/comp/syntax/codemap.rs b/src/comp/syntax/codemap.rs index cb2590e195188..d1b60916e1951 100644 --- a/src/comp/syntax/codemap.rs +++ b/src/comp/syntax/codemap.rs @@ -135,7 +135,7 @@ fn lookup_byte_offset(cm: codemap::codemap, chpos: uint) let {fm,line} = lookup_line(cm,chpos,lookup); let line_offset = fm.lines[line].byte - fm.start_pos.byte; let col = chpos - fm.lines[line].ch; - let col_offset = str::byte_len_range(*fm.src, line_offset, col); + let col_offset = str::substr_len_bytes(*fm.src, line_offset, col); ret {fm: fm, pos: line_offset + col_offset}; } diff --git a/src/comp/syntax/ext/qquote.rs b/src/comp/syntax/ext/qquote.rs index 39bd34785ba0d..ba66cb78a15c3 100644 --- a/src/comp/syntax/ext/qquote.rs +++ b/src/comp/syntax/ext/qquote.rs @@ -202,7 +202,7 @@ fn expand_qquote if (j < g_len && i == cx.gather[j].lo) { assert ch == '$'; let repl = #fmt("$%u ", j); - state = skip(str::char_len(repl)); + state = skip(str::len(repl)); str2 += repl; } alt state { diff --git a/src/comp/syntax/parse/lexer.rs b/src/comp/syntax/parse/lexer.rs index 42cc5400b0c3a..300d0a66f1db2 100644 --- a/src/comp/syntax/parse/lexer.rs +++ b/src/comp/syntax/parse/lexer.rs @@ -58,7 +58,7 @@ fn new_reader(cm: codemap::codemap, itr: @interner::interner) -> reader { let r = @{cm: cm, span_diagnostic: span_diagnostic, - src: filemap.src, len: str::byte_len(*filemap.src), + src: filemap.src, len: str::len_bytes(*filemap.src), mutable col: 0u, mutable pos: 0u, mutable curr: -1 as char, mutable chpos: filemap.start_pos.ch, mutable strs: [], filemap: filemap, interner: itr}; @@ -149,15 +149,15 @@ fn scan_exponent(rdr: reader) -> option { let c = rdr.curr; let rslt = ""; if c == 'e' || c == 'E' { - str::push_byte(rslt, c as u8); + str::push_char(rslt, c); rdr.bump(); c = rdr.curr; if c == '-' || c == '+' { - str::push_byte(rslt, c as u8); + str::push_char(rslt, c); rdr.bump(); } let exponent = scan_digits(rdr, 10u); - if str::byte_len(exponent) > 0u { + if str::len_bytes(exponent) > 0u { ret some(rslt + exponent); } else { rdr.fatal("scan_exponent: bad fp literal"); } } else { ret none::; } @@ -170,7 +170,7 @@ fn scan_digits(rdr: reader, radix: uint) -> str { if c == '_' { rdr.bump(); cont; } alt char::maybe_digit(c) { some(d) if (d as uint) < radix { - str::push_byte(rslt, c as u8); + str::push_char(rslt, c); rdr.bump(); } _ { break; } @@ -220,7 +220,7 @@ fn scan_number(c: char, rdr: reader) -> token::token { tp = if signed { either::left(ast::ty_i64) } else { either::right(ast::ty_u64) }; } - if str::byte_len(num_str) == 0u { + if str::len_bytes(num_str) == 0u { rdr.fatal("no valid digits found for number"); } let parsed = u64::from_str(num_str, base as u64); @@ -267,7 +267,7 @@ fn scan_number(c: char, rdr: reader) -> token::token { ret token::LIT_FLOAT(interner::intern(*rdr.interner, num_str), ast::ty_f); } else { - if str::byte_len(num_str) == 0u { + if str::len_bytes(num_str) == 0u { rdr.fatal("no valid digits found for number"); } let parsed = u64::from_str(num_str, base as u64); @@ -472,11 +472,11 @@ fn next_token_inner(rdr: reader) -> token::token { let escaped = rdr.curr; rdr.bump(); alt escaped { - 'n' { str::push_byte(accum_str, '\n' as u8); } - 'r' { str::push_byte(accum_str, '\r' as u8); } - 't' { str::push_byte(accum_str, '\t' as u8); } - '\\' { str::push_byte(accum_str, '\\' as u8); } - '"' { str::push_byte(accum_str, '"' as u8); } + 'n' { str::push_char(accum_str, '\n'); } + 'r' { str::push_char(accum_str, '\r'); } + 't' { str::push_char(accum_str, '\t'); } + '\\' { str::push_char(accum_str, '\\'); } + '"' { str::push_char(accum_str, '"'); } '\n' { consume_whitespace(rdr); } 'x' { str::push_char(accum_str, scan_numeric_escape(rdr, 2u)); @@ -604,8 +604,8 @@ fn trim_whitespace_prefix_and_push_line(&lines: [str], s: str, col: uint) unsafe { let s1; if all_whitespace(s, 0u, col) { - if col < str::byte_len(s) { - s1 = str::unsafe::slice_bytes(s, col, str::byte_len(s)); + if col < str::len_bytes(s) { + s1 = str::unsafe::slice_bytes(s, col, str::len_bytes(s)); } else { s1 = ""; } } else { s1 = s; } log(debug, "pushing line: " + s1); @@ -645,7 +645,7 @@ fn read_block_comment(rdr: reader, code_to_the_left: bool) -> cmnt { } } } - if str::byte_len(curr_line) != 0u { + if str::len_bytes(curr_line) != 0u { trim_whitespace_prefix_and_push_line(lines, curr_line, col); } let style = if code_to_the_left { trailing } else { isolated }; diff --git a/src/comp/syntax/parse/token.rs b/src/comp/syntax/parse/token.rs index 14f7c055743a2..60949f7793cbe 100644 --- a/src/comp/syntax/parse/token.rs +++ b/src/comp/syntax/parse/token.rs @@ -139,7 +139,7 @@ fn to_str(r: reader, t: token) -> str { // FIXME: escape. let tmp = "'"; str::push_char(tmp, c as char); - str::push_byte(tmp, '\'' as u8); + str::push_char(tmp, '\''); ret tmp; } LIT_INT(i, t) { diff --git a/src/comp/syntax/print/pp.rs b/src/comp/syntax/print/pp.rs index d970f9eb23b3a..3ee3131429929 100644 --- a/src/comp/syntax/print/pp.rs +++ b/src/comp/syntax/print/pp.rs @@ -491,7 +491,7 @@ fn end(p: printer) { p.pretty_print(END); } fn eof(p: printer) { p.pretty_print(EOF); } fn word(p: printer, wrd: str) { - p.pretty_print(STRING(wrd, str::char_len(wrd) as int)); + p.pretty_print(STRING(wrd, str::len(wrd) as int)); } fn huge_word(p: printer, wrd: str) { diff --git a/src/comp/syntax/print/pprust.rs b/src/comp/syntax/print/pprust.rs index 9ed73b17d9da0..047653cc4c98f 100644 --- a/src/comp/syntax/print/pprust.rs +++ b/src/comp/syntax/print/pprust.rs @@ -201,7 +201,7 @@ fn head(s: ps, w: str) { // outer-box is consistent cbox(s, indent_unit); // head-box is inconsistent - ibox(s, str::char_len(w) + 1u); + ibox(s, str::len(w) + 1u); // keyword that starts the head word_nbsp(s, w); } @@ -1458,7 +1458,7 @@ fn print_ty_fn(s: ps, opt_proto: option, popen(s); fn print_arg(s: ps, input: ast::arg) { print_arg_mode(s, input.mode); - if str::byte_len(input.ident) > 0u { + if str::len_bytes(input.ident) > 0u { word_space(s, input.ident + ":"); } print_type(s, input.ty); @@ -1640,7 +1640,7 @@ fn print_string(s: ps, st: str) { fn escape_str(st: str, to_escape: char) -> str { let out: str = ""; - let len = str::byte_len(st); + let len = str::len_bytes(st); let i = 0u; while i < len { alt st[i] as char { diff --git a/src/comp/util/ppaux.rs b/src/comp/util/ppaux.rs index 5af07e1650e5f..03bbeb0d53894 100644 --- a/src/comp/util/ppaux.rs +++ b/src/comp/util/ppaux.rs @@ -132,7 +132,7 @@ fn ty_to_str(cx: ctxt, typ: t) -> str { fn ty_to_short_str(cx: ctxt, typ: t) -> str unsafe { let s = encoder::encoded_ty(cx, typ); - if str::byte_len(s) >= 32u { s = str::unsafe::slice_bytes(s, 0u, 32u); } + if str::len_bytes(s) >= 32u { s = str::unsafe::slice_bytes(s, 0u, 32u); } ret s; } diff --git a/src/compiletest/errors.rs b/src/compiletest/errors.rs index 3703badf52793..9d81b6f00e209 100644 --- a/src/compiletest/errors.rs +++ b/src/compiletest/errors.rs @@ -26,12 +26,12 @@ fn parse_expected(line_num: uint, line: str) -> [expected_error] unsafe { let error_tag = "//!"; let idx0 = str::find(line, error_tag); if idx0 < 0 { ret []; } - let idx = (idx0 as uint) + str::byte_len(error_tag); + let idx = (idx0 as uint) + str::len_bytes(error_tag); // "//!^^^ kind msg" denotes a message expected // three lines above current line: let adjust_line = 0u; - let len = str::byte_len(line); + let len = str::len_bytes(line); while idx < len && line[idx] == ('^' as u8) { adjust_line += 1u; idx += 1u; diff --git a/src/compiletest/header.rs b/src/compiletest/header.rs index e09861141c4af..f9c7a7a66e05b 100644 --- a/src/compiletest/header.rs +++ b/src/compiletest/header.rs @@ -109,8 +109,8 @@ fn parse_name_value_directive(line: str, if str::find(line, keycolon) >= 0 { let colon = str::find(line, keycolon) as uint; let value = - str::unsafe::slice_bytes(line, colon + str::byte_len(keycolon), - str::byte_len(line)); + str::unsafe::slice_bytes(line, colon + str::len_bytes(keycolon), + str::len_bytes(line)); #debug("%s: %s", directive, value); option::some(value) } else { option::none } diff --git a/src/fuzzer/fuzzer.rs b/src/fuzzer/fuzzer.rs index 9790ec02ff45b..f6d5f72c367f1 100644 --- a/src/fuzzer/fuzzer.rs +++ b/src/fuzzer/fuzzer.rs @@ -285,7 +285,7 @@ fn check_variants_T( fn last_part(filename: str) -> str { let ix = option::get(str::rindex(filename, '/')); - str::slice(filename, ix + 1u, str::char_len(filename) - 3u) + str::slice(filename, ix + 1u, str::len(filename) - 3u) } enum happiness { passed, cleanly_rejected(str), known_bug(str), failed(str), } @@ -333,7 +333,7 @@ fn removeDirIfExists(filename: str) { fn check_running(exe_filename: str) -> happiness { let p = std::run::program_output("/Users/jruderman/scripts/timed_run_rust_program.py", [exe_filename]); let comb = p.out + "\n" + p.err; - if str::byte_len(comb) > 1u { + if str::len_bytes(comb) > 1u { log(error, "comb comb comb: " + comb); } diff --git a/src/libcore/extfmt.rs b/src/libcore/extfmt.rs index 0638394e29bde..30676e34de831 100644 --- a/src/libcore/extfmt.rs +++ b/src/libcore/extfmt.rs @@ -82,10 +82,10 @@ mod ct { fn parse_fmt_string(s: str, error: error_fn) -> [piece] unsafe { let pieces: [piece] = []; - let lim = str::byte_len(s); + let lim = str::len_bytes(s); let buf = ""; fn flush_buf(buf: str, &pieces: [piece]) -> str { - if str::byte_len(buf) > 0u { + if str::len_bytes(buf) > 0u { let piece = piece_string(buf); pieces += [piece]; } @@ -325,7 +325,7 @@ mod rt { alt cv.precision { count_implied { s } count_is(max) { - if max as uint < str::char_len(s) { + if max as uint < str::len(s) { str::substr(s, 0u, max as uint) } else { s } } @@ -368,7 +368,7 @@ mod rt { "" } else { let s = uint::to_str(num, radix); - let len = str::char_len(s); + let len = str::len(s); if len < prec { let diff = prec - len; let pad = str_init_elt(diff, '0'); @@ -400,7 +400,7 @@ mod rt { uwidth = width as uint; } } - let strlen = str::char_len(s); + let strlen = str::len(s); if uwidth <= strlen { ret s; } let padchar = ' '; let diff = uwidth - strlen; @@ -433,12 +433,12 @@ mod rt { // zeros. It may make sense to convert zero padding to a precision // instead. - if signed && zero_padding && str::byte_len(s) > 0u { + if signed && zero_padding && str::len_bytes(s) > 0u { let head = s[0]; if head == '+' as u8 || head == '-' as u8 || head == ' ' as u8 { let headstr = str::from_bytes([head]); // FIXME: not UTF-8 safe - let bytelen = str::byte_len(s); + let bytelen = str::len_bytes(s); let numpart = str::unsafe::slice_bytes(s, 1u, bytelen); ret headstr + padstr + numpart; } diff --git a/src/libcore/float.rs b/src/libcore/float.rs index f8a0e5ea7cf85..5b9f1ce363655 100644 --- a/src/libcore/float.rs +++ b/src/libcore/float.rs @@ -128,7 +128,7 @@ fn from_str(num: str) -> float { let pos = 0u; //Current byte position in the string. //Used to walk the string in O(n). - let len = str::byte_len(num); //Length of the string, in bytes. + let len = str::len_bytes(num); //Length of the string, in bytes. if len == 0u { ret 0.; } let total = 0f; //Accumulated result diff --git a/src/libcore/str.rs b/src/libcore/str.rs index 1f1f5a22ac21c..5d39fb72b7e95 100644 --- a/src/libcore/str.rs +++ b/src/libcore/str.rs @@ -26,10 +26,6 @@ export pop_char, shift_char, unshift_char, - push_byte, - //push_bytes, - pop_byte, - shift_byte, trim_left, trim_right, trim, @@ -83,14 +79,14 @@ export is_empty, is_not_empty, is_whitespace, - byte_len, - char_len, + len_bytes, + len_chars, len, // Misc // FIXME: perhaps some more of this section shouldn't be exported? is_utf8, - char_len_range, - byte_len_range, + substr_len_bytes, + substr_len_chars, utf8_char_width, char_range_at, char_at, @@ -137,7 +133,7 @@ fn from_byte(uu: u8) -> str { from_bytes([uu]) } -fn push_utf8_bytes(&s: str, ch: char) { +fn push_utf8_bytes(&s: str, ch: char) unsafe { let code = ch as uint; let bytes = if code < max_one_b { @@ -168,7 +164,7 @@ fn push_utf8_bytes(&s: str, ch: char) { (code >> 6u & 63u | tag_cont) as u8, (code & 63u | tag_cont) as u8] }; - push_bytes(s, bytes); + unsafe::push_bytes(s, bytes); } /* @@ -275,7 +271,7 @@ Failure: If the string does not contain any characters. */ fn pop_char(&s: str) -> char unsafe { - let end = byte_len(s); + let end = len_bytes(s); let {ch:ch, prev:end} = char_range_at_reverse(s, end); s = unsafe::slice_bytes(s, 0u, end); ret ch; @@ -292,7 +288,7 @@ If the string does not contain any characters. */ fn shift_char(&s: str) -> char unsafe { let r = char_range_at(s, 0u); - s = unsafe::slice_bytes(s, r.next, byte_len(s)); + s = unsafe::slice_bytes(s, r.next, len_bytes(s)); ret r.ch; } @@ -303,58 +299,6 @@ Prepend a char to a string */ fn unshift_char(&s: str, ch: char) { s = from_char(ch) + s; } -/* -Function: push_byte - -Appends a byte to a string. - -This function is not unicode-safe. -*/ -fn push_byte(&s: str, b: u8) { rustrt::rust_str_push(s, b); } - -/* -Function: push_bytes - -Appends a vector of bytes to a string. - -This function is not unicode-safe. -*/ -fn push_bytes(&s: str, bytes: [u8]) { - for byte in bytes { rustrt::rust_str_push(s, byte); } -} - -/* -Function: pop_byte - -Removes the last byte from a string and returns it. - -This function is not unicode-safe. -FIXME: move to unsafe? -*/ -fn pop_byte(&s: str) -> u8 unsafe { - let len = byte_len(s); - assert (len > 0u); - let b = s[len - 1u]; - s = unsafe::slice_bytes(s, 0u, len - 1u); - ret b; -} - -/* -Function: shift_byte - -Removes the first byte from a string and returns it. - -This function is not unicode-safe. -FIXME: move to unsafe? -*/ -fn shift_byte(&s: str) -> u8 unsafe { - let len = byte_len(s); - assert (len > 0u); - let b = s[0]; - s = unsafe::slice_bytes(s, 1u, len); - ret b; -} - /* Function: trim_left @@ -423,7 +367,7 @@ Convert a string to a vector of characters fn chars(s: str) -> [char] { let buf: [char] = []; let i = 0u; - let len = byte_len(s); + let len = len_bytes(s); while i < len { let cur = char_range_at(s, i); buf += [cur.ch]; @@ -499,7 +443,7 @@ fn splitn_byte(ss: str, sep: u8, count: uint) -> [str] unsafe { assert u8::is_ascii(sep); let vv = []; - let start = 0u, current = 0u, len = byte_len(ss); + let start = 0u, current = 0u, len = len_bytes(ss); let splits_done = 0u; while splits_done < count && current < len { @@ -529,7 +473,7 @@ fn split_str(ss: str, sep: str) -> [str] unsafe { // unsafe is justified: we are splitting // UTF-8 with UTF-8, so the results will be OK - let sep_len = str::byte_len(sep); + let sep_len = len_bytes(sep); assert sep_len > 0u; let vv = []; let start = 0u, start_match = 0u, current = 0u, matching = 0u; @@ -581,7 +525,7 @@ fn split(ss: str, sepfn: fn(cc: char)->bool) -> [str] { } }); - if char_len(accum) >= 0u || ends_with_sep { + if len(accum) >= 0u || ends_with_sep { vv += [accum]; } @@ -592,8 +536,6 @@ fn split(ss: str, sepfn: fn(cc: char)->bool) -> [str] { Function: split_char Splits a string into a vector of the substrings separated by a given character - -FIXME: also add splitn_char */ fn split_char(ss: str, cc: char) -> [str] { split(ss, {|kk| kk == cc}) @@ -608,7 +550,7 @@ up to `count` times fn splitn_char(ss: str, sep: char, count: uint) -> [str] unsafe { let vv = []; - let start = 0u, current = 0u, len = byte_len(ss); + let start = 0u, current = 0u, len = len_bytes(ss); let splits_done = 0u; while splits_done < count && current < len { @@ -655,7 +597,7 @@ separated by whitespace */ fn words(ss: str) -> [str] { ret vec::filter( split(ss, {|cc| char::is_whitespace(cc)}), - {|w| 0u < str::char_len(w)}); + {|w| 0u < str::len(w)}); } /* @@ -665,7 +607,7 @@ Create a vector of substrings of size `nn` */ fn windowed(nn: uint, ss: str) -> [str] { let ww = []; - let len = str::char_len(ss); + let len = str::len(ss); assert 1u <= nn; @@ -714,19 +656,21 @@ Returns: The original string with all occurances of `from` replaced with `to` */ fn replace(s: str, from: str, to: str) : is_not_empty(from) -> str unsafe { - if byte_len(s) == 0u { + if len_bytes(s) == 0u { ret ""; } else if starts_with(s, from) { - ret to + replace(unsafe::slice_bytes(s, byte_len(from), byte_len(s)), + ret to + replace( + unsafe::slice_bytes(s, len_bytes(from), len_bytes(s)), from, to); } else { let idx = find(s, from); if idx == -1 { ret s; } - ret slice(s, 0u, idx as uint) + to + - replace(slice(s, idx as uint + char_len(from), char_len(s)), - from, to); + let before = unsafe::slice_bytes(s, 0u, idx as uint); + let after = unsafe::slice_bytes(s, idx as uint + len_bytes(from), + len_bytes(s)); + ret before + to + replace(after, from, to); } } @@ -784,7 +728,7 @@ Return true if a predicate matches all characters or if the string contains no characters */ fn all(s: str, it: fn(char) -> bool) -> bool{ - ret substr_all(s, 0u, byte_len(s), it); + ret substr_all(s, 0u, len_bytes(s), it); } /* @@ -804,7 +748,7 @@ Apply a function to each character */ fn map(ss: str, ff: fn(char) -> char) -> str { let result = ""; - reserve(result, byte_len(ss)); + reserve(result, len_bytes(ss)); chars_iter(ss, {|cc| str::push_char(result, ff(cc)); @@ -820,7 +764,7 @@ Iterate over the bytes in a string */ fn bytes_iter(ss: str, it: fn(u8)) { let pos = 0u; - let len = byte_len(ss); + let len = len_bytes(ss); while (pos < len) { it(ss[pos]); @@ -834,7 +778,7 @@ Function: chars_iter Iterate over the characters in a string */ fn chars_iter(s: str, it: fn(char)) { - let pos = 0u, len = byte_len(s); + let pos = 0u, len = len_bytes(s); while (pos < len) { let {ch, next} = char_range_at(s, pos); pos = next; @@ -891,7 +835,7 @@ Section: Searching fn index(ss: str, cc: char) -> option { let bii = 0u; let cii = 0u; - let len = byte_len(ss); + let len = len_bytes(ss); while bii < len { let {ch, next} = char_range_at(ss, bii); @@ -913,8 +857,8 @@ fn index(ss: str, cc: char) -> option { // Returns the index of the first matching char // (as option some/none) fn rindex(ss: str, cc: char) -> option { - let bii = byte_len(ss); - let cii = char_len(ss); + let bii = len_bytes(ss); + let cii = len(ss); while bii > 0u { let {ch, prev} = char_range_at_reverse(ss, bii); cii -= 1u; @@ -945,11 +889,11 @@ Returns: The index of the first occurance of `needle`, or -1 if not found. -FIXME: UTF-8 +FIXME: return an option instead */ fn find(haystack: str, needle: str) -> int { - let haystack_len: int = byte_len(haystack) as int; - let needle_len: int = byte_len(needle) as int; + let haystack_len: int = len_bytes(haystack) as int; + let needle_len: int = len_bytes(needle) as int; if needle_len == 0 { ret 0; } fn match_at(haystack: str, needle: str, i: int) -> bool { let j: int = i; @@ -989,8 +933,8 @@ haystack - The string to look in needle - The string to look for */ fn starts_with(haystack: str, needle: str) -> bool { - let haystack_len: uint = byte_len(haystack); - let needle_len: uint = byte_len(needle); + let haystack_len: uint = len(haystack); + let needle_len: uint = len(needle); if needle_len == 0u { ret true; } if needle_len > haystack_len { ret false; } ret eq(substr(haystack, 0u, needle_len), needle); @@ -1005,8 +949,8 @@ haystack - The string to look in needle - The string to look for */ fn ends_with(haystack: str, needle: str) -> bool { - let haystack_len: uint = char_len(haystack); - let needle_len: uint = char_len(needle); + let haystack_len: uint = len(haystack); + let needle_len: uint = len(needle); ret if needle_len == 0u { true } else if needle_len > haystack_len { @@ -1027,7 +971,7 @@ Function: is_ascii Determines if a string contains only ASCII characters */ fn is_ascii(s: str) -> bool { - let i: uint = byte_len(s); + let i: uint = len_bytes(s); while i > 0u { i -= 1u; if !u8::is_ascii(s[i]) { ret false; } } ret true; } @@ -1055,14 +999,11 @@ fn is_whitespace(s: str) -> bool { ret all(s, char::is_whitespace); } -/* -Function: byte_len - -Returns the length in bytes of a string -FIXME: rename to 'len_bytes' -*/ -pure fn byte_len(s: str) -> uint unsafe { +// Function: len_bytes +// +// Returns the string length in bytes +pure fn len_bytes(s: str) -> uint unsafe { as_bytes(s) { |v| let vlen = vec::len(v); // There should always be a null terminator @@ -1071,17 +1012,16 @@ pure fn byte_len(s: str) -> uint unsafe { } } -/* -Function: char_len - -Count the number of unicode characters in a string - -FIXME: rename to 'len_chars' -*/ -fn char_len(s: str) -> uint { - ret char_len_range(s, 0u, byte_len(s)); +// Function: len +// +// String length or size in characters. +// (Synonym: len_chars) +fn len(s: str) -> uint { + substr_len_chars(s, 0u, len_bytes(s)) } +fn len_chars(s: str) -> uint { len(s) } + /* Section: Misc */ @@ -1109,7 +1049,7 @@ fn is_utf8(v: [u8]) -> bool { } /* -Function: char_len_range +Function: substr_len_chars As char_len but for a slice of a string @@ -1126,10 +1066,8 @@ Safety note: - This function does not check whether the substring is valid. - This function fails if `byte_offset` or `byte_len` do not represent valid positions inside `s` - -FIXME: rename to 'substr_len_chars' */ -fn char_len_range(s: str, byte_start: uint, byte_len: uint) -> uint { +fn substr_len_chars(s: str, byte_start: uint, byte_len: uint) -> uint { let i = byte_start; let byte_stop = i + byte_len; let len = 0u; @@ -1143,7 +1081,7 @@ fn char_len_range(s: str, byte_start: uint, byte_len: uint) -> uint { } /* -Function: byte_len_range +Function: substr_len_bytes As byte_len but for a substring @@ -1163,7 +1101,7 @@ valid positions in `s` FIXME: rename to 'substr_len_bytes' */ -fn byte_len_range(s: str, byte_offset: uint, char_len: uint) -> uint { +fn substr_len_bytes(s: str, byte_offset: uint, char_len: uint) -> uint { let i = byte_offset; let chars = 0u; while chars < char_len { @@ -1204,7 +1142,7 @@ This function can be used to iterate over the unicode characters of a string. Example: > let s = "中华Việt Nam"; > let i = 0u; -> while i < str::byte_len(s) { +> while i < str::len_bytes(s) { > let {ch, next} = str::char_range_at(s, i); > std::io::println(#fmt("%u: %c",i,ch)); > i = next; @@ -1409,7 +1347,11 @@ mod unsafe { from_bytes, from_byte, slice_bytes, - slice_bytes_safe_range; + slice_bytes_safe_range, + push_byte, + push_bytes, // note: wasn't exported + pop_byte, + shift_byte; // Function: unsafe::from_bytes // @@ -1442,7 +1384,7 @@ mod unsafe { unsafe fn slice_bytes(s: str, begin: uint, end: uint) -> str unsafe { // FIXME: Typestate precondition assert (begin <= end); - assert (end <= byte_len(s)); + assert (end <= len_bytes(s)); let v = as_bytes(s) { |v| vec::slice(v, begin, end) }; v += [0u8]; @@ -1459,9 +1401,46 @@ mod unsafe { unsafe fn slice_bytes_safe_range(s: str, begin: uint, end: uint) : uint::le(begin, end) -> str { // would need some magic to make this a precondition - assert (end <= byte_len(s)); + assert (end <= len_bytes(s)); ret slice_bytes(s, begin, end); } + + // Function: push_byte + // + // Appends a byte to a string. (Not UTF-8 safe). + unsafe fn push_byte(&s: str, b: u8) { + rustrt::rust_str_push(s, b); + } + + // Function: push_bytes + // + // Appends a vector of bytes to a string. (Not UTF-8 safe). + unsafe fn push_bytes(&s: str, bytes: [u8]) { + for byte in bytes { rustrt::rust_str_push(s, byte); } + } + + // Function: pop_byte + // + // Removes the last byte from a string and returns it. (Not UTF-8 safe). + unsafe fn pop_byte(&s: str) -> u8 unsafe { + let len = len_bytes(s); + assert (len > 0u); + let b = s[len - 1u]; + s = unsafe::slice_bytes(s, 0u, len - 1u); + ret b; + } + + // Function: shift_byte + // + // Removes the first byte from a string and returns it. (Not UTF-8 safe). + unsafe fn shift_byte(&s: str) -> u8 unsafe { + let len = len_bytes(s); + assert (len > 0u); + let b = s[0]; + s = unsafe::slice_bytes(s, 1u, len); + ret b; + } + } @@ -1484,14 +1463,23 @@ mod tests { } #[test] - fn test_bytes_len() { - assert (byte_len("") == 0u); - assert (byte_len("hello world") == 11u); - assert (byte_len("\x63") == 1u); - assert (byte_len("\xa2") == 2u); - assert (byte_len("\u03c0") == 2u); - assert (byte_len("\u2620") == 3u); - assert (byte_len("\U0001d11e") == 4u); + fn test_len() { + assert (len_bytes("") == 0u); + assert (len_bytes("hello world") == 11u); + assert (len_bytes("\x63") == 1u); + assert (len_bytes("\xa2") == 2u); + assert (len_bytes("\u03c0") == 2u); + assert (len_bytes("\u2620") == 3u); + assert (len_bytes("\U0001d11e") == 4u); + + assert (len("") == 0u); + assert (len("hello world") == 11u); + assert (len("\x63") == 1u); + assert (len("\xa2") == 1u); + assert (len("\u03c0") == 1u); + assert (len("\u2620") == 1u); + assert (len("\U0001d11e") == 1u); + assert (len("ประเทศไทย中华Việt Nam") == 19u); } #[test] @@ -1721,12 +1709,19 @@ mod tests { t("this is a simple", "", 0); t("this is a simple", "simple", 10); t("this", "simple", -1); + + // FIXME: return option position instead + let data = "ประเทศไทย中华Việt Nam"; + assert (find(data, "ประเ") == 0); + assert (find(data, "ะเ") == 6); // byte position + assert (find(data, "中华") == 27); // byte position + assert (find(data, "ไท华") == -1); } #[test] fn test_substr() { fn t(a: str, b: str, start: int) { - assert (eq(substr(a, start as uint, byte_len(b)), b)); + assert (eq(substr(a, start as uint, len_bytes(b)), b)); } t("hello", "llo", 2); t("hello", "el", 1); @@ -1838,6 +1833,49 @@ mod tests { assert (replace(" test test ", test, "") == " "); } + #[test] + fn test_replace_2a() { + let data = "ประเทศไทย中华"; + let repl = "دولة الكويت"; + + let a = "ประเ"; + let A = "دولة الكويتทศไทย中华"; + check is_not_empty(a); + assert (replace(data, a, repl) == A); + } + + #[test] + fn test_replace_2b() { + let data = "ประเทศไทย中华"; + let repl = "دولة الكويت"; + + let b = "ะเ"; + let B = "ปรدولة الكويتทศไทย中华"; + check is_not_empty(b); + assert (replace(data, b, repl) == B); + } + + #[test] + fn test_replace_2c() { + let data = "ประเทศไทย中华"; + let repl = "دولة الكويت"; + + let c = "中华"; + let C = "ประเทศไทยدولة الكويت"; + check is_not_empty(c); + assert (replace(data, c, repl) == C); + } + + #[test] + fn test_replace_2d() { + let data = "ประเทศไทย中华"; + let repl = "دولة الكويت"; + + let d = "ไท华"; + check is_not_empty(d); + assert (replace(data, d, repl) == data); + } + #[test] fn test_slice() { assert (eq("ab", slice("abc", 0u, 2u))); @@ -1914,17 +1952,17 @@ mod tests { } #[test] - fn test_shift_byte() { + fn test_shift_byte() unsafe { let s = "ABC"; - let b = shift_byte(s); + let b = unsafe::shift_byte(s); assert (s == "BC"); assert (b == 65u8); } #[test] - fn test_pop_byte() { + fn test_pop_byte() unsafe { let s = "ABC"; - let b = pop_byte(s); + let b = unsafe::pop_byte(s); assert (s == "AB"); assert (b == 67u8); } @@ -2016,7 +2054,7 @@ mod tests { let v: [u8] = bytes(s1); let s2: str = from_bytes(v); let i: uint = 0u; - let n1: uint = byte_len(s1); + let n1: uint = len_bytes(s1); let n2: uint = vec::len::(v); assert (n1 == n2); while i < n1 { @@ -2038,6 +2076,12 @@ mod tests { assert contains("", ""); assert !contains("abcde", "def"); assert !contains("", "a"); + + let data = "ประเทศไทย中华Việt Nam"; + assert contains(data, "ประเ"); + assert contains(data, "ะเ"); + assert contains(data, "中华"); + assert !contains(data, "ไท华"); } #[test] diff --git a/src/libcore/u64.rs b/src/libcore/u64.rs index d60c2d54b9e25..e169b6192dbe5 100644 --- a/src/libcore/u64.rs +++ b/src/libcore/u64.rs @@ -118,11 +118,11 @@ Function: from_str Parse a string as an unsigned integer. */ fn from_str(buf: str, radix: u64) -> u64 { - if str::byte_len(buf) == 0u { + if str::len_bytes(buf) == 0u { #error("parse_buf(): buf is empty"); fail; } - let i = str::byte_len(buf) - 1u; + let i = str::len_bytes(buf) - 1u; let power = 1u64, n = 0u64; while true { let digit = char::to_digit(buf[i] as char) as u64; diff --git a/src/libcore/uint.rs b/src/libcore/uint.rs index d2f7229a6f0dc..22b2a95f3cbdf 100644 --- a/src/libcore/uint.rs +++ b/src/libcore/uint.rs @@ -256,7 +256,7 @@ fn to_str(num: uint, radix: uint) -> str { n /= radix; } let s1: str = ""; - let len: uint = str::byte_len(s); + let len: uint = str::len_bytes(s); while len != 0u { len -= 1u; s1 += str::from_byte(s[len]); } ret s1; } diff --git a/src/libstd/fs.rs b/src/libstd/fs.rs index de4789fdd3996..947942e20d86d 100644 --- a/src/libstd/fs.rs +++ b/src/libstd/fs.rs @@ -45,7 +45,7 @@ fn splitDirnameBasename (pp: path) -> {dirname: str, basename: str} { } ret {dirname: str::slice(pp, 0u, ii), - basename: str::slice(pp, ii + 1u, str::char_len(pp))}; + basename: str::slice(pp, ii + 1u, str::len(pp))}; } /* @@ -89,14 +89,14 @@ any leading path separator on `post`, and returns the concatenation of the two with a single path separator between them. */ -fn connect(pre: path, post: path) -> path { +fn connect(pre: path, post: path) -> path unsafe { let pre_ = pre; let post_ = post; let sep = os_fs::path_sep as u8; - let pre_len = str::byte_len(pre); - let post_len = str::byte_len(post); - if pre_len > 1u && pre[pre_len-1u] == sep { str::pop_byte(pre_); } - if post_len > 1u && post[0] == sep { str::shift_byte(post_); } + let pre_len = str::len_bytes(pre); + let post_len = str::len_bytes(post); + if pre_len > 1u && pre[pre_len-1u] == sep { str::unsafe::pop_byte(pre_); } + if post_len > 1u && post[0] == sep { str::unsafe::shift_byte(post_); } ret pre_ + path_sep() + post_; } @@ -171,7 +171,7 @@ Lists the contents of a directory. */ fn list_dir(p: path) -> [str] { let p = p; - let pl = str::byte_len(p); + let pl = str::len_bytes(p); if pl == 0u || p[pl - 1u] as char != os_fs::path_sep { p += path_sep(); } let full_paths: [str] = []; for filename: str in os_fs::list_dir(p) { @@ -337,7 +337,7 @@ fn normalize(p: path) -> path { let s = reabsolute(p, s); let s = reterminate(p, s); - let s = if str::byte_len(s) == 0u { + let s = if str::len_bytes(s) == 0u { "." } else { s @@ -404,7 +404,7 @@ fn normalize(p: path) -> path { } fn reterminate(orig: path, new: path) -> path { - let last = orig[str::byte_len(orig) - 1u]; + let last = orig[str::len_bytes(orig) - 1u]; if last == os_fs::path_sep as u8 || last == os_fs::path_sep as u8 { ret new + path_sep(); diff --git a/src/libstd/getopts.rs b/src/libstd/getopts.rs index 8288501defc31..00187aee01f8f 100644 --- a/src/libstd/getopts.rs +++ b/src/libstd/getopts.rs @@ -79,7 +79,7 @@ A description of a possible option type opt = {name: name, hasarg: hasarg, occur: occur}; fn mkname(nm: str) -> name { - ret if str::char_len(nm) == 1u { + ret if str::len(nm) == 1u { short(str::char_at(nm, 0u)) } else { long(nm) }; } @@ -141,7 +141,7 @@ of matches and a vector of free strings. type match = {opts: [opt], vals: [mutable [optval]], free: [str]}; fn is_arg(arg: str) -> bool { - ret str::byte_len(arg) > 1u && arg[0] == '-' as u8; + ret str::len_bytes(arg) > 1u && arg[0] == '-' as u8; } fn name_str(nm: name) -> str { @@ -218,7 +218,7 @@ fn getopts(args: [str], opts: [opt]) -> result unsafe { let i = 0u; while i < l { let cur = args[i]; - let curlen = str::byte_len(cur); + let curlen = str::len_bytes(cur); if !is_arg(cur) { free += [cur]; } else if str::eq(cur, "--") { diff --git a/src/libstd/json.rs b/src/libstd/json.rs index bb1d0fb64b6b2..127cd93952e78 100644 --- a/src/libstd/json.rs +++ b/src/libstd/json.rs @@ -70,13 +70,13 @@ fn to_str(j: json) -> str { } fn rest(s: str) -> str { - assert(str::char_len(s) >= 1u); - str::slice(s, 1u, str::char_len(s)) + assert(str::len(s) >= 1u); + str::slice(s, 1u, str::len(s)) } fn from_str_str(s: str) -> (option, str) { let pos = 0u; - let len = str::byte_len(s); + let len = str::len_bytes(s); let escape = false; let res = ""; @@ -99,7 +99,7 @@ fn from_str_str(s: str) -> (option, str) { cont; } else if (c == '"') { ret (some(string(res)), - str::slice(s, pos, str::char_len(s))); + str::slice(s, pos, str::len(s))); } res = res + str::from_char(c); } @@ -172,7 +172,7 @@ fn from_str_dict(s: str) -> (option, str) { fn from_str_float(s: str) -> (option, str) { let pos = 0u; - let len = str::byte_len(s); + let len = str::len_bytes(s); let res = 0f; let neg = 1f; @@ -200,12 +200,12 @@ fn from_str_float(s: str) -> (option, str) { } '.' { break; } _ { ret (some(num(neg * res)), - str::slice(s, opos, str::char_len(s))); } + str::slice(s, opos, str::len(s))); } } } if pos == len { - ret (some(num(neg * res)), str::slice(s, pos, str::char_len(s))); + ret (some(num(neg * res)), str::slice(s, pos, str::len(s))); } let dec = 1f; @@ -220,17 +220,17 @@ fn from_str_float(s: str) -> (option, str) { res += (((c as int) - ('0' as int)) as float) * dec; } _ { ret (some(num(neg * res)), - str::slice(s, opos, str::char_len(s))); } + str::slice(s, opos, str::len(s))); } } } - ret (some(num(neg * res)), str::slice(s, pos, str::char_len(s))); + ret (some(num(neg * res)), str::slice(s, pos, str::len(s))); } fn from_str_bool(s: str) -> (option, str) { if (str::starts_with(s, "true")) { - (some(boolean(true)), str::slice(s, 4u, str::char_len(s))) + (some(boolean(true)), str::slice(s, 4u, str::len(s))) } else if (str::starts_with(s, "false")) { - (some(boolean(false)), str::slice(s, 5u, str::char_len(s))) + (some(boolean(false)), str::slice(s, 5u, str::len(s))) } else { (none, s) } @@ -238,7 +238,7 @@ fn from_str_bool(s: str) -> (option, str) { fn from_str_null(s: str) -> (option, str) { if (str::starts_with(s, "null")) { - (some(null), str::slice(s, 4u, str::char_len(s))) + (some(null), str::slice(s, 4u, str::len(s))) } else { (none, s) } diff --git a/src/libstd/rand.rs b/src/libstd/rand.rs index c288189216d2b..35525b70710ac 100644 --- a/src/libstd/rand.rs +++ b/src/libstd/rand.rs @@ -77,7 +77,7 @@ fn mk_rng() -> rng { let i = 0u; while (i < len) { let n = rustrt::rand_next(**self) as uint % - str::char_len(charset); + str::len(charset); s = s + str::from_char(str::char_at(charset, n)); i += 1u; } @@ -130,8 +130,8 @@ mod tests { log(debug, r.gen_str(10u)); log(debug, r.gen_str(10u)); log(debug, r.gen_str(10u)); - assert(str::char_len(r.gen_str(10u)) == 10u); - assert(str::char_len(r.gen_str(16u)) == 16u); + assert(str::len(r.gen_str(10u)) == 10u); + assert(str::len(r.gen_str(16u)) == 16u); } } diff --git a/src/libstd/rope.rs b/src/libstd/rope.rs index b586f114bce6c..07ee837a9b918 100644 --- a/src/libstd/rope.rs +++ b/src/libstd/rope.rs @@ -65,7 +65,7 @@ Performance notes: - the function runs in linear time. */ fn of_str(str: @str) -> rope { - ret of_substr(str, 0u, str::byte_len(*str)); + ret of_substr(str, 0u, str::len_bytes(*str)); } /* @@ -93,7 +93,7 @@ Safety notes: */ fn of_substr(str: @str, byte_offset: uint, byte_len: uint) -> rope { if byte_len == 0u { ret node::empty; } - if byte_offset + byte_len > str::byte_len(*str) { fail; } + if byte_offset + byte_len > str::len_bytes(*str) { fail; } ret node::content(node::of_substr(str, byte_offset, byte_len)); } @@ -540,6 +540,7 @@ pure fn char_len(rope: rope) -> uint { Returns: The number of bytes in the rope Performance note: Constant time. + FIXME: char or byte? */ pure fn byte_len(rope: rope) -> uint { alt(rope) { @@ -720,7 +721,7 @@ mod node { the length of `str`. */ fn of_str(str: @str) -> @node { - ret of_substr(str, 0u, str::byte_len(*str)); + ret of_substr(str, 0u, str::len_bytes(*str)); } /* @@ -742,7 +743,7 @@ mod node { */ fn of_substr(str: @str, byte_start: uint, byte_len: uint) -> @node { ret of_substr_unsafer(str, byte_start, byte_len, - str::char_len_range(*str, byte_start, byte_len)); + str::substr_len_chars(*str, byte_start, byte_len)); } /* @@ -767,7 +768,7 @@ mod node { */ fn of_substr_unsafer(str: @str, byte_start: uint, byte_len: uint, char_len: uint) -> @node { - assert(byte_start + byte_len <= str::byte_len(*str)); + assert(byte_start + byte_len <= str::len_bytes(*str)); let candidate = @leaf({ byte_offset: byte_start, byte_len: byte_len, @@ -794,7 +795,7 @@ mod node { if i == 0u { first_leaf_char_len } else { hint_max_leaf_char_len }; let chunk_byte_len = - str::byte_len_range(*str, offset, chunk_char_len); + str::substr_len_bytes(*str, offset, chunk_char_len); nodes[i] = @leaf({ byte_offset: offset, byte_len: chunk_byte_len, @@ -997,7 +998,7 @@ mod node { alt(*node) { node::leaf(x) { let char_len = - str::char_len_range(*x.content, byte_offset, byte_len); + str::substr_len_chars(*x.content, byte_offset, byte_len); ret @leaf({byte_offset: byte_offset, byte_len: byte_len, char_len: char_len, @@ -1058,9 +1059,9 @@ mod node { ret node; } let byte_offset = - str::byte_len_range(*x.content, 0u, char_offset); + str::substr_len_bytes(*x.content, 0u, char_offset); let byte_len = - str::byte_len_range(*x.content, byte_offset, char_len); + str::substr_len_bytes(*x.content, byte_offset, char_len); ret @leaf({byte_offset: byte_offset, byte_len: byte_len, char_len: char_len, @@ -1372,7 +1373,7 @@ mod tests { let sample = @"0123456789ABCDE"; let r = of_str(sample); - assert char_len(r) == str::char_len(*sample); + assert char_len(r) == str::len(*sample); assert rope_to_string(r) == *sample; } @@ -1383,11 +1384,11 @@ mod tests { while i < 10 { *buf = *buf + *buf; i+=1;} let sample = @*buf; let r = of_str(sample); - assert char_len(r) == str::char_len(*sample); + assert char_len(r) == str::len(*sample); assert rope_to_string(r) == *sample; let string_iter = 0u; - let string_len = str::byte_len(*sample); + let string_len = str::len_bytes(*sample); let rope_iter = iterator::char::start(r); let equal = true; let pos = 0u; @@ -1426,7 +1427,7 @@ mod tests { } } - assert len == str::char_len(*sample); + assert len == str::len(*sample); } #[test] diff --git a/src/libstd/sha1.rs b/src/libstd/sha1.rs index 242bf61db135f..c28d67a7526cd 100644 --- a/src/libstd/sha1.rs +++ b/src/libstd/sha1.rs @@ -368,7 +368,7 @@ mod tests { // Test that it works when accepting the message in pieces for t: test in tests { - let len = str::byte_len(t.input); + let len = str::len_bytes(t.input); let left = len; while left > 0u { let take = (left + 1u) / 2u; diff --git a/src/rustdoc/desc_to_brief_pass.rs b/src/rustdoc/desc_to_brief_pass.rs index 89b2b5d97bce7..4be1b43d50eae 100644 --- a/src/rustdoc/desc_to_brief_pass.rs +++ b/src/rustdoc/desc_to_brief_pass.rs @@ -240,7 +240,7 @@ fn parse_desc(desc: str) -> (option, option) { if check vec::is_not_empty(paras) { let maybe_brief = vec::head(paras); - if str::char_len(maybe_brief) <= max_brief_len { + if str::len(maybe_brief) <= max_brief_len { let desc_paras = vec::tail(paras); let desc = if vec::is_not_empty(desc_paras) { some(str::connect(desc_paras, "\n\n")) diff --git a/src/rustdoc/unindent_pass.rs b/src/rustdoc/unindent_pass.rs index 9c43ce4486414..b52316ccb15b9 100644 --- a/src/rustdoc/unindent_pass.rs +++ b/src/rustdoc/unindent_pass.rs @@ -67,8 +67,8 @@ fn unindent(s: str) -> str { if str::is_whitespace(line) { line } else { - assert str::byte_len(line) >= min_indent; - str::slice(line, min_indent, str::char_len(line)) + assert str::len_bytes(line) >= min_indent; + str::slice(line, min_indent, str::len(line)) } }; str::connect(unindented, "\n") diff --git a/src/test/bench/99bob-iter.rs b/src/test/bench/99bob-iter.rs index 02d55cf64d370..0e338ae5c52fa 100644 --- a/src/test/bench/99bob-iter.rs +++ b/src/test/bench/99bob-iter.rs @@ -22,7 +22,7 @@ fn b8() -> str { ret "Go to the store and buy some more, # of beer on the wall."; } -fn sub(t: str, n: int) -> str { +fn sub(t: str, n: int) -> str unsafe { let b: str = ""; let i: uint = 0u; let ns: str; @@ -31,8 +31,9 @@ fn sub(t: str, n: int) -> str { 1 { ns = "1 bottle"; } _ { ns = int::to_str(n, 10u) + " bottles"; } } - while i < str::byte_len(t) { - if t[i] == '#' as u8 { b += ns; } else { str::push_byte(b, t[i]); } + while i < str::len_bytes(t) { + if t[i] == '#' as u8 { b += ns; } + else { str::unsafe::push_byte(b, t[i]); } i += 1u; } ret b; diff --git a/src/test/bench/99bob-simple.rs b/src/test/bench/99bob-simple.rs index 6d8a17cc93218..351f5463d5291 100644 --- a/src/test/bench/99bob-simple.rs +++ b/src/test/bench/99bob-simple.rs @@ -22,7 +22,7 @@ fn b8() -> str { ret "Go to the store and buy some more, # of beer on the wall."; } -fn sub(t: str, n: int) -> str { +fn sub(t: str, n: int) -> str unsafe { let b: str = ""; let i: uint = 0u; let ns: str; @@ -31,8 +31,9 @@ fn sub(t: str, n: int) -> str { 1 { ns = "1 bottle"; } _ { ns = int::to_str(n, 10u) + " bottles"; } } - while i < str::byte_len(t) { - if t[i] == '#' as u8 { b += ns; } else { str::push_byte(b, t[i]); } + while i < str::len_bytes(t) { + if t[i] == '#' as u8 { b += ns; } + else { str::unsafe::push_byte(b, t[i]); } i += 1u; } ret b; diff --git a/src/test/bench/shootout-fasta.rs b/src/test/bench/shootout-fasta.rs index 7145c0fc1054b..ef82f0742de16 100644 --- a/src/test/bench/shootout-fasta.rs +++ b/src/test/bench/shootout-fasta.rs @@ -47,28 +47,28 @@ fn make_random_fasta(id: str, desc: str, genelist: [aminoacids], n: int) { let rng = @{mutable last: std::rand::mk_rng().next()}; let op: str = ""; uint::range(0u, n as uint) {|_i| - str::push_byte(op, select_random(myrandom_next(rng, 100u32), - genelist) as u8); - if str::byte_len(op) >= LINE_LENGTH() { + str::push_char(op, select_random(myrandom_next(rng, 100u32), + genelist)); + if str::len_bytes(op) >= LINE_LENGTH() { log(debug, op); op = ""; } } - if str::byte_len(op) > 0u { log(debug, op); } + if str::len_bytes(op) > 0u { log(debug, op); } } -fn make_repeat_fasta(id: str, desc: str, s: str, n: int) { +fn make_repeat_fasta(id: str, desc: str, s: str, n: int) unsafe { log(debug, ">" + id + " " + desc); let op: str = ""; - let sl: uint = str::byte_len(s); + let sl: uint = str::len_bytes(s); uint::range(0u, n as uint) {|i| - str::push_byte(op, s[i % sl]); - if str::byte_len(op) >= LINE_LENGTH() { + str::unsafe::push_byte(op, s[i % sl]); + if str::len_bytes(op) >= LINE_LENGTH() { log(debug, op); op = ""; } } - if str::byte_len(op) > 0u { log(debug, op); } + if str::len_bytes(op) > 0u { log(debug, op); } } fn acid(ch: char, prob: u32) -> aminoacids { ret {ch: ch, prob: prob}; } diff --git a/src/test/run-pass/bind-native-fn.rs b/src/test/run-pass/bind-native-fn.rs index 2984f97813b66..88b0fbf4b3553 100644 --- a/src/test/run-pass/bind-native-fn.rs +++ b/src/test/run-pass/bind-native-fn.rs @@ -13,7 +13,7 @@ native mod libc { fn main() { let s = "hello world\n"; let b = str::bytes(s); - let l = str::byte_len(s); + let l = str::len_bytes(s); let b8 = unsafe { vec::unsafe::to_ptr(b) }; libc::write(0i32, b8, l); let a = bind libc::write(0i32, _, _); diff --git a/src/test/run-pass/string-self-append.rs b/src/test/run-pass/string-self-append.rs index 30ea8e0efc437..260b0194a0c94 100644 --- a/src/test/run-pass/string-self-append.rs +++ b/src/test/run-pass/string-self-append.rs @@ -7,8 +7,8 @@ fn main() { let i = 20; let expected_len = 1u; while i > 0 { - log(error, str::byte_len(a)); - assert (str::byte_len(a) == expected_len); + log(error, str::len_bytes(a)); + assert (str::len_bytes(a) == expected_len); a += a; i -= 1; expected_len *= 2u; diff --git a/src/test/run-pass/utf8_chars.rs b/src/test/run-pass/utf8_chars.rs index 7b4e9bf6d1c4d..cd591866c2488 100644 --- a/src/test/run-pass/utf8_chars.rs +++ b/src/test/run-pass/utf8_chars.rs @@ -7,8 +7,8 @@ fn main() { let chs: [char] = ['e', 'é', '€', 0x10000 as char]; let s: str = str::from_chars(chs); - assert (str::byte_len(s) == 10u); - assert (str::char_len(s) == 4u); + assert (str::len_bytes(s) == 10u); + assert (str::len(s) == 4u); assert (vec::len::(str::chars(s)) == 4u); assert (str::eq(str::from_chars(str::chars(s)), s)); assert (str::char_at(s, 0u) == 'e');