Skip to content

Commit f6b2ef8

Browse files
committed
Apply feedback for Group/BitMask
1 parent 17ab35d commit f6b2ef8

File tree

3 files changed

+45
-18
lines changed

3 files changed

+45
-18
lines changed

src/raw/bitmask.rs

Lines changed: 19 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
use super::imp::{BitMaskWord, BITMASK_MASK, BITMASK_SHIFT};
1+
use super::imp::{BitMaskWord, BITMASK_MASK, BITMASK_STRIDE};
22
#[cfg(feature = "nightly")]
33
use core::intrinsics;
44

@@ -7,6 +7,12 @@ use core::intrinsics;
77
///
88
/// The bit mask is arranged so that low-order bits represent lower memory
99
/// addresses for group match results.
10+
///
11+
/// For implementation reasons, the bits in the set may be sparsely packed, so
12+
/// that there is only one bit-per-byte used (the high bit, 7). If this is the
13+
/// case, `BITMASK_STRIDE` will be 8 to indicate a divide-by-8 should be
14+
/// performed on counts/indices to normalize this difference. `BITMASK_MASK` is
15+
/// similarly a mask of all the actually-used bits.
1016
#[derive(Copy, Clone)]
1117
pub struct BitMask(pub BitMaskWord);
1218

@@ -24,7 +30,7 @@ impl BitMask {
2430
pub fn remove_lowest_bit(self) -> BitMask {
2531
BitMask(self.0 & (self.0 - 1))
2632
}
27-
/// Returns whether the `BitMask` has at least one set bits.
33+
/// Returns whether the `BitMask` has at least one set bit.
2834
#[inline]
2935
pub fn any_bit_set(self) -> bool {
3036
self.0 != 0
@@ -36,7 +42,7 @@ impl BitMask {
3642
if self.0 == 0 {
3743
None
3844
} else {
39-
Some(self.trailing_zeros())
45+
Some(unsafe { self.lowest_set_bit_nonzero() })
4046
}
4147
}
4248

@@ -45,7 +51,7 @@ impl BitMask {
4551
#[inline]
4652
#[cfg(feature = "nightly")]
4753
pub unsafe fn lowest_set_bit_nonzero(self) -> usize {
48-
intrinsics::cttz_nonzero(self.0) as usize >> BITMASK_SHIFT
54+
intrinsics::cttz_nonzero(self.0) as usize / BITMASK_STRIDE
4955
}
5056
#[cfg(not(feature = "nightly"))]
5157
pub unsafe fn lowest_set_bit_nonzero(self) -> usize {
@@ -55,21 +61,22 @@ impl BitMask {
5561
/// Returns the number of trailing zeroes in the `BitMask`.
5662
#[inline]
5763
pub fn trailing_zeros(self) -> usize {
58-
// ARM doesn't have a CTZ instruction, and instead uses RBIT + CLZ.
59-
// However older ARM versions (pre-ARMv7) don't have RBIT and need to
60-
// emulate it instead. Since we only have 1 bit set in each byte we can
61-
// use REV + CLZ instead.
62-
if cfg!(target_arch = "arm") && BITMASK_SHIFT >= 3 {
63-
self.0.swap_bytes().leading_zeros() as usize >> BITMASK_SHIFT
64+
// ARM doesn't have a trailing_zeroes instruction, and instead uses
65+
// reverse_bits (RBIT) + leading_zeroes (CLZ). However older ARM
66+
// versions (pre-ARMv7) don't have RBIT and need to emulate it
67+
// instead. Since we only have 1 bit set in each byte on ARM, we can
68+
// use swap_bytes (REV) + leading_zeroes instead.
69+
if cfg!(target_arch = "arm") && BITMASK_STRIDE % 8 == 0 {
70+
self.0.swap_bytes().leading_zeros() as usize / BITMASK_STRIDE
6471
} else {
65-
self.0.trailing_zeros() as usize >> BITMASK_SHIFT
72+
self.0.trailing_zeros() as usize / BITMASK_STRIDE
6673
}
6774
}
6875

6976
/// Returns the number of leading zeroes in the `BitMask`.
7077
#[inline]
7178
pub fn leading_zeros(self) -> usize {
72-
self.0.leading_zeros() as usize >> BITMASK_SHIFT
79+
self.0.leading_zeros() as usize / BITMASK_STRIDE
7380
}
7481
}
7582

src/raw/generic.rs

Lines changed: 15 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -19,8 +19,9 @@ type GroupWord = u64;
1919
type GroupWord = u32;
2020

2121
pub type BitMaskWord = GroupWord;
22-
pub const BITMASK_SHIFT: u32 = 3;
23-
pub const BITMASK_MASK: GroupWord = 0x8080808080808080u64 as GroupWord;
22+
pub const BITMASK_STRIDE: usize = 8;
23+
// We only care about the highest bit of each byte for the mask.
24+
pub const BITMASK_MASK: BitMaskWord = 0x8080_8080_8080_8080u64 as GroupWord;
2425

2526
/// Helper function to replicate a byte across a `GroupWord`.
2627
#[inline]
@@ -107,13 +108,17 @@ impl Group {
107108
/// `EMPTY`.
108109
#[inline]
109110
pub fn match_empty(&self) -> BitMask {
111+
// If the high bit is set, then the byte must be either:
112+
// 1111_1111 (EMPTY) or 1000_0000 (DELETED).
113+
// So we can just check if the top two bits are 1 by ANDing them.
110114
BitMask((self.0 & (self.0 << 1) & repeat(0x80)).to_le())
111115
}
112116

113117
/// Returns a `BitMask` indicating all bytes in the group which are
114-
/// `EMPTY` pr `DELETED`.
118+
/// `EMPTY` or `DELETED`.
115119
#[inline]
116120
pub fn match_empty_or_deleted(&self) -> BitMask {
121+
// A byte is EMPTY or DELETED iff the high bit is set
117122
BitMask((self.0 & repeat(0x80)).to_le())
118123
}
119124

@@ -123,6 +128,13 @@ impl Group {
123128
/// - `FULL => DELETED`
124129
#[inline]
125130
pub fn convert_special_to_empty_and_full_to_deleted(&self) -> Group {
131+
// Map high_bit = 1 (EMPTY or DELETED) to 1111_1111
132+
// and high_bit = 0 (FULL) to 1000_0000
133+
//
134+
// Here's this logic expanded to concrete values:
135+
// let full = 1000_0000 (true) or 0000_0000 (false)
136+
// !1000_0000 + 1 = 0111_1111 + 1 = 1000_0000 (no carry)
137+
// !0000_0000 + 0 = 1111_1111 + 0 = 1111_1111 (no carry)
126138
let full = !self.0 & repeat(0x80);
127139
Group(!full + (full >> 7))
128140
}

src/raw/sse2.rs

Lines changed: 11 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -8,8 +8,8 @@ use core::arch::x86;
88
use core::arch::x86_64 as x86;
99

1010
pub type BitMaskWord = u16;
11-
pub const BITMASK_SHIFT: u32 = 0;
12-
pub const BITMASK_MASK: u16 = 0xffff;
11+
pub const BITMASK_STRIDE: usize = 1;
12+
pub const BITMASK_MASK: BitMaskWord = 0xffff;
1313

1414
/// Abstraction over a group of control bytes which can be scanned in
1515
/// parallel.
@@ -78,9 +78,10 @@ impl Group {
7878
}
7979

8080
/// Returns a `BitMask` indicating all bytes in the group which are
81-
/// `EMPTY` pr `DELETED`.
81+
/// `EMPTY` or `DELETED`.
8282
#[inline]
8383
pub fn match_empty_or_deleted(&self) -> BitMask {
84+
// A byte is EMPTY or DELETED iff the high bit is set
8485
unsafe { BitMask(x86::_mm_movemask_epi8(self.0) as u16) }
8586
}
8687

@@ -90,6 +91,13 @@ impl Group {
9091
/// - `FULL => DELETED`
9192
#[inline]
9293
pub fn convert_special_to_empty_and_full_to_deleted(&self) -> Group {
94+
// Map high_bit = 1 (EMPTY or DELETED) to 1111_1111
95+
// and high_bit = 0 (FULL) to 1000_0000
96+
//
97+
// Here's this logic expanded to concrete values:
98+
// let special = 0 > byte = 1111_1111 (true) or 0000_0000 (false)
99+
// 1111_1111 | 1000_0000 = 1111_1111
100+
// 0000_0000 | 1000_0000 = 1000_0000
93101
unsafe {
94102
let zero = x86::_mm_setzero_si128();
95103
let special = x86::_mm_cmpgt_epi8(zero, self.0);

0 commit comments

Comments
 (0)