Skip to content

IterBytes: Delimit sequences &[A] and ~str when hashing #8545

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 3 commits into from
Aug 18, 2013
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
24 changes: 23 additions & 1 deletion src/libstd/hash.rs
Original file line number Diff line number Diff line change
Expand Up @@ -409,6 +409,14 @@ mod tests {

use uint;

// Hash just the bytes of the slice, without length prefix
struct Bytes<'self>(&'self [u8]);
impl<'self> IterBytes for Bytes<'self> {
fn iter_bytes(&self, _lsb0: bool, f: &fn(&[u8]) -> bool) -> bool {
f(**self)
}
}

#[test]
fn test_siphash() {
let vecs : [[u8, ..8], ..64] = [
Expand Down Expand Up @@ -496,7 +504,7 @@ mod tests {
while t < 64 {
debug!("siphash test %?", t);
let vec = u8to64_le!(vecs[t], 0);
let out = buf.hash_keyed(k0, k1);
let out = Bytes(buf.as_slice()).hash_keyed(k0, k1);
debug!("got %?, expected %?", out, vec);
assert_eq!(vec, out);

Expand Down Expand Up @@ -587,4 +595,18 @@ mod tests {
fn test_float_hashes_of_zero() {
assert_eq!(0.0.hash(), (-0.0).hash());
}

#[test]
fn test_hash_no_concat_alias() {
let s = ("aa", "bb");
let t = ("aabb", "");
let u = ("a", "abb");

let v = (&[1u8], &[0u8, 0], &[0u8]);
let w = (&[1u8, 0, 0, 0], &[], &[]);

assert!(v != w);
assert!(s.hash() != t.hash() && s.hash() != u.hash());
assert!(v.hash() != w.hash());
}
}
2 changes: 0 additions & 2 deletions src/libstd/str/ascii.rs
Original file line number Diff line number Diff line change
Expand Up @@ -376,7 +376,6 @@ static ASCII_UPPER_MAP: &'static [u8] = &[
#[cfg(test)]
mod tests {
use super::*;
use to_bytes::ToBytes;
use str::from_char;

macro_rules! v2ascii (
Expand Down Expand Up @@ -445,7 +444,6 @@ mod tests {

#[test]
fn test_ascii_to_bytes() {
assert_eq!(v2ascii!(~[40, 32, 59]).to_bytes(false), ~[40u8, 32u8, 59u8]);
assert_eq!(v2ascii!(~[40, 32, 59]).into_bytes(), ~[40u8, 32u8, 59u8]);
}

Expand Down
124 changes: 66 additions & 58 deletions src/libstd/to_bytes.rs
Original file line number Diff line number Diff line change
Expand Up @@ -15,37 +15,43 @@ The `ToBytes` and `IterBytes` traits
*/

use cast;
use container::Container;
use io;
use io::Writer;
use iterator::Iterator;
use option::{None, Option, Some};
use str::StrSlice;
use vec::ImmutableVector;
use str::{Str, StrSlice};
use vec::{Vector, ImmutableVector};

pub type Cb<'self> = &'self fn(buf: &[u8]) -> bool;

/**
* A trait to implement in order to make a type hashable;
* This works in combination with the trait `Hash::Hash`, and
* may in the future be merged with that trait or otherwise
* modified when default methods and trait inheritance are
* completed.
*/
///
/// A trait to implement in order to make a type hashable;
/// This works in combination with the trait `std::hash::Hash`, and
/// may in the future be merged with that trait or otherwise
/// modified when default methods and trait inheritance are
/// completed.
///
/// IterBytes should be implemented so that the extent of the
/// produced byte stream can be discovered, given the original
/// type.
/// For example, the IterBytes implementation for vectors emits
/// its length first, and enums should emit their discriminant.
///
pub trait IterBytes {
/**
* Call the provided callback `f` one or more times with
* byte-slices that should be used when computing a hash
* value or otherwise "flattening" the structure into
* a sequence of bytes. The `lsb0` parameter conveys
* whether the caller is asking for little-endian bytes
* (`true`) or big-endian (`false`); this should only be
* relevant in implementations that represent a single
* multi-byte datum such as a 32 bit integer or 64 bit
* floating-point value. It can be safely ignored for
* larger structured types as they are usually processed
* left-to-right in declaration order, regardless of
* underlying memory endianness.
*/
/// Call the provided callback `f` one or more times with
/// byte-slices that should be used when computing a hash
/// value or otherwise "flattening" the structure into
/// a sequence of bytes. The `lsb0` parameter conveys
/// whether the caller is asking for little-endian bytes
/// (`true`) or big-endian (`false`); this should only be
/// relevant in implementations that represent a single
/// multi-byte datum such as a 32 bit integer or 64 bit
/// floating-point value. It can be safely ignored for
/// larger structured types as they are usually processed
/// left-to-right in declaration order, regardless of
/// underlying memory endianness.
///
fn iter_bytes(&self, lsb0: bool, f: Cb) -> bool;
}

Expand Down Expand Up @@ -224,74 +230,76 @@ impl IterBytes for f64 {
impl<'self,A:IterBytes> IterBytes for &'self [A] {
#[inline]
fn iter_bytes(&self, lsb0: bool, f: Cb) -> bool {
self.len().iter_bytes(lsb0, |b| f(b)) &&
self.iter().advance(|elt| elt.iter_bytes(lsb0, |b| f(b)))
}
}

impl<A:IterBytes,B:IterBytes> IterBytes for (A,B) {
#[inline]
fn iter_bytes(&self, lsb0: bool, f: Cb) -> bool {
match *self {
(ref a, ref b) => { a.iter_bytes(lsb0, |b| f(b)) &&
b.iter_bytes(lsb0, |b| f(b)) }
}
}
}

impl<A:IterBytes,B:IterBytes,C:IterBytes> IterBytes for (A,B,C) {
#[inline]
fn iter_bytes(&self, lsb0: bool, f: Cb) -> bool {
match *self {
(ref a, ref b, ref c) => {
a.iter_bytes(lsb0, |b| f(b)) &&
b.iter_bytes(lsb0, |b| f(b)) &&
c.iter_bytes(lsb0, |b| f(b))
}
impl<A: IterBytes> IterBytes for (A, ) {
fn iter_bytes(&self, lsb0: bool, f: Cb) -> bool {
match *self {
(ref a, ) => a.iter_bytes(lsb0, |b| f(b))
}
}
}
}

// Move this to vec, probably.
fn borrow<'x,A>(a: &'x [A]) -> &'x [A] {
a
}
macro_rules! iter_bytes_tuple(
($($A:ident),+) => (
impl<$($A: IterBytes),+> IterBytes for ($($A),+) {
fn iter_bytes(&self, lsb0: bool, f: Cb) -> bool {
match *self {
($(ref $A),+) => {
$(
$A .iter_bytes(lsb0, |b| f(b))
)&&+
}
}
}
}
)
)

iter_bytes_tuple!(A, B)
iter_bytes_tuple!(A, B, C)
iter_bytes_tuple!(A, B, C, D)
iter_bytes_tuple!(A, B, C, D, E)
iter_bytes_tuple!(A, B, C, D, E, F)
iter_bytes_tuple!(A, B, C, D, E, F, G)
iter_bytes_tuple!(A, B, C, D, E, F, G, H)

impl<A:IterBytes> IterBytes for ~[A] {
#[inline]
fn iter_bytes(&self, lsb0: bool, f: Cb) -> bool {
borrow(*self).iter_bytes(lsb0, f)
self.as_slice().iter_bytes(lsb0, f)
}
}

impl<A:IterBytes> IterBytes for @[A] {
#[inline]
fn iter_bytes(&self, lsb0: bool, f: Cb) -> bool {
borrow(*self).iter_bytes(lsb0, f)
self.as_slice().iter_bytes(lsb0, f)
}
}

impl<'self> IterBytes for &'self str {
#[inline]
fn iter_bytes(&self, _lsb0: bool, f: Cb) -> bool {
f(self.as_bytes())
// Terminate the string with a byte that does not appear in UTF-8
f(self.as_bytes()) && f([0xFF])
}
}

impl IterBytes for ~str {
#[inline]
fn iter_bytes(&self, _lsb0: bool, f: Cb) -> bool {
// this should possibly include the null terminator, but that
// breaks .find_equiv on hashmaps.
f(self.as_bytes())
fn iter_bytes(&self, lsb0: bool, f: Cb) -> bool {
self.as_slice().iter_bytes(lsb0, f)
}
}

impl IterBytes for @str {
#[inline]
fn iter_bytes(&self, _lsb0: bool, f: Cb) -> bool {
// this should possibly include the null terminator, but that
// breaks .find_equiv on hashmaps.
f(self.as_bytes())
fn iter_bytes(&self, lsb0: bool, f: Cb) -> bool {
self.as_slice().iter_bytes(lsb0, f)
}
}

Expand Down