Skip to content

Commit e465f4d

Browse files
Tony Sifkarovskignzlbg
Tony Sifkarovski
authored andcommitted
Add unchecked methods, fix _mm_extract_epi* return types (rust-lang#223)
* Adds extract_unchecked + replace_unchecked + len (rust-lang#222 ) * [x86] Fixes the return types + uses extract_unchecked for: * _mm_extract_epi8 * _mm_extract_epi16 * _mm256_extract_epi8 * _mm256_extract_epi16 * Minor changes to the other extract_epi* intrinsics for style consistency These should now zero-extend the extracted int and behave appropriately. An old typo makes these a bit confusing, See this llvm issue.
1 parent e5134c1 commit e465f4d

File tree

5 files changed

+78
-37
lines changed

5 files changed

+78
-37
lines changed

coresimd/src/macros.rs

Lines changed: 21 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -30,6 +30,11 @@ macro_rules! define_impl {
3030
$name($($elname),*)
3131
}
3232

33+
#[inline(always)]
34+
pub fn len() -> i32 {
35+
$nelems
36+
}
37+
3338
#[inline(always)]
3439
pub fn splat(value: $elemty) -> $name {
3540
$name($({
@@ -42,13 +47,27 @@ macro_rules! define_impl {
4247
#[inline(always)]
4348
pub fn extract(self, idx: u32) -> $elemty {
4449
assert!(idx < $nelems);
45-
unsafe { simd_extract(self, idx) }
50+
unsafe { self.extract_unchecked(idx) }
51+
}
52+
53+
#[inline(always)]
54+
pub unsafe fn extract_unchecked(self, idx: u32) -> $elemty {
55+
simd_extract(self, idx)
4656
}
4757

4858
#[inline(always)]
4959
pub fn replace(self, idx: u32, val: $elemty) -> $name {
5060
assert!(idx < $nelems);
51-
unsafe { simd_insert(self, idx, val) }
61+
unsafe { self.replace_unchecked(idx, val) }
62+
}
63+
64+
#[inline(always)]
65+
pub unsafe fn replace_unchecked(
66+
self,
67+
idx: u32,
68+
val: $elemty,
69+
) -> $name {
70+
simd_insert(self, idx, val)
5271
}
5372

5473
#[inline(always)]

coresimd/src/x86/i586/avx.rs

Lines changed: 33 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -863,36 +863,44 @@ pub unsafe fn _mm256_extractf128_si256(a: __m256i, imm8: i32) -> __m128i {
863863
__m128i::from(dst)
864864
}
865865

866-
/// Extract an 8-bit integer from `a`, selected with `imm8`.
866+
/// Extract an 8-bit integer from `a`, selected with `imm8`. Returns a 32-bit
867+
/// integer containing the zero-extended integer data.
868+
/// See: https://reviews.llvm.org/D20468
867869
#[inline(always)]
868870
#[target_feature = "+avx"]
869871
// This intrinsic has no corresponding instruction.
870872
pub unsafe fn _mm256_extract_epi8(a: i8x32, imm8: i32) -> i32 {
871-
a.extract(imm8 as u32 & 31) as i32
873+
let imm8 = (imm8 & 31) as u32;
874+
(a.extract_unchecked(imm8) as i32) & 0xFF
872875
}
873876

874-
/// Extract a 16-bit integer from `a`, selected with `imm8`.
877+
/// Extract a 16-bit integer from `a`, selected with `imm8`. Returns a 32-bit
878+
/// integer containing the zero-extended integer data.
879+
/// See: https://reviews.llvm.org/D20468
875880
#[inline(always)]
876881
#[target_feature = "+avx"]
877882
// This intrinsic has no corresponding instruction.
878883
pub unsafe fn _mm256_extract_epi16(a: i16x16, imm8: i32) -> i32 {
879-
a.extract(imm8 as u32 & 15) as i32
884+
let imm8 = (imm8 & 15) as u32;
885+
(a.extract_unchecked(imm8) as i32) & 0xFFFF
880886
}
881887

882888
/// Extract a 32-bit integer from `a`, selected with `imm8`.
883889
#[inline(always)]
884890
#[target_feature = "+avx"]
885891
// This intrinsic has no corresponding instruction.
886892
pub unsafe fn _mm256_extract_epi32(a: i32x8, imm8: i32) -> i32 {
887-
a.extract(imm8 as u32 & 7) as i32
893+
let imm8 = (imm8 & 7) as u32;
894+
a.extract_unchecked(imm8)
888895
}
889896

890897
/// Extract a 64-bit integer from `a`, selected with `imm8`.
891898
#[inline(always)]
892899
#[target_feature = "+avx"]
893900
// This intrinsic has no corresponding instruction.
894-
pub unsafe fn _mm256_extract_epi64(a: i64x4, imm8: i32) -> i32 {
895-
a.extract(imm8 as u32 & 3) as i32
901+
pub unsafe fn _mm256_extract_epi64(a: i64x4, imm8: i32) -> i64 {
902+
let imm8 = (imm8 & 3) as u32;
903+
a.extract_unchecked(imm8)
896904
}
897905

898906
/// Zero the contents of all XMM or YMM registers.
@@ -3142,28 +3150,34 @@ mod tests {
31423150
unsafe fn _mm256_extract_epi8() {
31433151
#[cfg_attr(rustfmt, rustfmt_skip)]
31443152
let a = i8x32::new(
3145-
1, 2, 3, 4, 5, 6, 7, 8,
3146-
9, 10, 11, 12, 13, 14, 15, 16,
3147-
17, 18, 19, 20, 21, 22, 23, 24,
3148-
25, 26, 27, 28, 29, 30, 31, 32,
3153+
-1, 1, 2, 3, 4, 5, 6, 7,
3154+
8, 9, 10, 11, 12, 13, 14, 15,
3155+
16, 17, 18, 19, 20, 21, 22, 23,
3156+
24, 25, 26, 27, 28, 29, 30, 31
31493157
);
3150-
let r = avx::_mm256_extract_epi8(a, 0);
3151-
assert_eq!(r, 1);
3158+
let r1 = avx::_mm256_extract_epi8(a, 0);
3159+
let r2 = avx::_mm256_extract_epi8(a, 35);
3160+
assert_eq!(r1, 0xFF);
3161+
assert_eq!(r2, 3);
31523162
}
31533163

31543164
#[simd_test = "avx"]
31553165
unsafe fn _mm256_extract_epi16() {
31563166
let a =
3157-
i16x16::new(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
3158-
let r = avx::_mm256_extract_epi16(a, 0);
3159-
assert_eq!(r, 0);
3167+
i16x16::new(-1, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
3168+
let r1 = avx::_mm256_extract_epi16(a, 0);
3169+
let r2 = avx::_mm256_extract_epi16(a, 19);
3170+
assert_eq!(r1, 0xFFFF);
3171+
assert_eq!(r2, 3);
31603172
}
31613173

31623174
#[simd_test = "avx"]
31633175
unsafe fn _mm256_extract_epi32() {
3164-
let a = i32x8::new(1, 2, 3, 4, 5, 6, 7, 8);
3165-
let r = avx::_mm256_extract_epi32(a, 0);
3166-
assert_eq!(r, 1);
3176+
let a = i32x8::new(-1, 1, 2, 3, 4, 5, 6, 7);
3177+
let r1 = avx::_mm256_extract_epi32(a, 0);
3178+
let r2 = avx::_mm256_extract_epi32(a, 11);
3179+
assert_eq!(r1, -1);
3180+
assert_eq!(r2, 3);
31673181
}
31683182

31693183
#[simd_test = "avx"]

coresimd/src/x86/i586/sse2.rs

Lines changed: 7 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -971,7 +971,8 @@ pub unsafe fn _mm_packus_epi16(a: i16x8, b: i16x8) -> u8x16 {
971971
#[target_feature = "+sse2"]
972972
#[cfg_attr(test, assert_instr(pextrw, imm8 = 9))]
973973
pub unsafe fn _mm_extract_epi16(a: i16x8, imm8: i32) -> i32 {
974-
a.extract(imm8 as u32 & 0b111) as i32
974+
let imm8 = (imm8 & 7) as u32;
975+
(a.extract_unchecked(imm8) as i32) & 0xFFFF
975976
}
976977

977978
/// Return a new vector where the `imm8` element of `a` is replaced with `i`.
@@ -3122,9 +3123,11 @@ mod tests {
31223123

31233124
#[simd_test = "sse2"]
31243125
unsafe fn _mm_extract_epi16() {
3125-
let a = i16x8::new(0, 1, 2, 3, 4, 5, 6, 7);
3126-
let r = sse2::_mm_extract_epi16(a, 5);
3127-
assert_eq!(r, 5);
3126+
let a = i16x8::new(-1, 1, 2, 3, 4, 5, 6, 7);
3127+
let r1 = sse2::_mm_extract_epi16(a, 0);
3128+
let r2 = sse2::_mm_extract_epi16(a, 11);
3129+
assert_eq!(r1, 0xFFFF);
3130+
assert_eq!(r2, 3);
31283131
}
31293132

31303133
#[simd_test = "sse2"]

coresimd/src/x86/i586/sse41.rs

Lines changed: 14 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -120,21 +120,25 @@ pub unsafe fn _mm_extract_ps(a: f32x4, imm8: u8) -> i32 {
120120
mem::transmute(a.extract(imm8 as u32 & 0b11))
121121
}
122122

123-
/// Extract an 8-bit integer from `a` selected with `imm8`
123+
/// Extract an 8-bit integer from `a`, selected with `imm8`. Returns a 32-bit
124+
/// integer containing the zero-extended integer data.
125+
/// See: https://reviews.llvm.org/D20468
124126
#[inline(always)]
125127
#[target_feature = "+sse4.1"]
126128
#[cfg_attr(test, assert_instr(pextrb, imm8 = 0))]
127-
pub unsafe fn _mm_extract_epi8(a: i8x16, imm8: u8) -> i8 {
128-
a.extract((imm8 & 0b1111) as u32)
129+
pub unsafe fn _mm_extract_epi8(a: i8x16, imm8: i32) -> i32 {
130+
let imm8 = (imm8 & 15) as u32;
131+
(a.extract_unchecked(imm8) as i32) & 0xFF
129132
}
130133

131134
/// Extract an 32-bit integer from `a` selected with `imm8`
132135
#[inline(always)]
133136
#[target_feature = "+sse4.1"]
134137
// TODO: Add test for Windows
135138
#[cfg_attr(all(test, not(windows)), assert_instr(pextrd, imm8 = 1))]
136-
pub unsafe fn _mm_extract_epi32(a: i32x4, imm8: u8) -> i32 {
137-
a.extract((imm8 & 0b11) as u32)
139+
pub unsafe fn _mm_extract_epi32(a: i32x4, imm8: i32) -> i32 {
140+
let imm8 = (imm8 & 3) as u32;
141+
a.extract_unchecked(imm8) as i32
138142
}
139143

140144
/// Select a single value in `a` to store at some position in `b`,
@@ -844,11 +848,11 @@ mod tests {
844848
#[simd_test = "sse4.1"]
845849
unsafe fn _mm_extract_epi8() {
846850
let a =
847-
i8x16::new(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
848-
let r = sse41::_mm_extract_epi8(a, 1);
849-
assert_eq!(r, 1);
850-
let r = sse41::_mm_extract_epi8(a, 17);
851-
assert_eq!(r, 1);
851+
i8x16::new(-1, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
852+
let r1 = sse41::_mm_extract_epi8(a, 0);
853+
let r2 = sse41::_mm_extract_epi8(a, 19);
854+
assert_eq!(r1, 0xFF);
855+
assert_eq!(r2, 3);
852856
}
853857

854858
#[simd_test = "sse4.1"]

coresimd/src/x86/i686/sse41.rs

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -24,8 +24,9 @@ extern "C" {
2424
// On x86 this emits 2 pextrd instructions
2525
#[cfg_attr(all(test, not(windows), target_arch = "x86"),
2626
assert_instr(pextrd, imm8 = 1))]
27-
pub unsafe fn _mm_extract_epi64(a: i64x2, imm8: u8) -> i64 {
28-
a.extract((imm8 & 0b1) as u32)
27+
pub unsafe fn _mm_extract_epi64(a: i64x2, imm8: i32) -> i64 {
28+
let imm8 = (imm8 & 1) as u32;
29+
a.extract_unchecked(imm8)
2930
}
3031

3132
/// Return a copy of `a` with the 64-bit integer from `i` inserted at a

0 commit comments

Comments
 (0)