Skip to content

Commit e422a8a

Browse files
gwennalexcrichton
authored andcommitted
ssse3 (rust-lang#68)
* SSSE3: _mm_abs_epi16, _mm_abs_epi32, _mm_hadd_epi16 * SSSE3: _mm_hadds_epi16 * SSSE3: assert_instr * SSSE3: _mm_hadd_epi32 * SSSE3: _mm_hsub_epi16 * SSSE3: _mm_hsubs_epi16 * SSSE3: _mm_hsub_epi32 * SSSE3: _mm_maddubs_epi16 * SSSE3: _mm_mulhrs_epi16 * SSSE3: _mm_sign_epi8 * SSSE3: _mm_sign_epi32 * SSSE3: _mm_sign_epi32 * SSSE3: Fix assert_instr
1 parent 0024ba9 commit e422a8a

File tree

1 file changed

+280
-0
lines changed

1 file changed

+280
-0
lines changed

src/x86/ssse3.rs

Lines changed: 280 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -12,6 +12,24 @@ pub unsafe fn _mm_abs_epi8(a: i8x16) -> u8x16 {
1212
pabsb128(a)
1313
}
1414

15+
/// Compute the absolute value of each of the packed 16-bit signed integers in `a` and
16+
/// return the 16-bit unsigned integer
17+
#[inline(always)]
18+
#[target_feature = "+ssse3"]
19+
#[cfg_attr(test, assert_instr(pabsw))]
20+
pub unsafe fn _mm_abs_epi16(a: i16x8) -> u16x8 {
21+
pabsw128(a)
22+
}
23+
24+
/// Compute the absolute value of each of the packed 32-bit signed integers in `a` and
25+
/// return the 32-bit unsigned integer
26+
#[inline(always)]
27+
#[target_feature = "+ssse3"]
28+
#[cfg_attr(test, assert_instr(pabsd))]
29+
pub unsafe fn _mm_abs_epi32(a: i32x4) -> u32x4 {
30+
pabsd128(a)
31+
}
32+
1533
/// Shuffle bytes from `a` according to the content of `b`.
1634
///
1735
/// The last 4 bits of each byte of `b` are used as addresses
@@ -43,13 +61,164 @@ pub unsafe fn _mm_shuffle_epi8(a: u8x16, b: u8x16) -> u8x16 {
4361
pshufb128(a, b)
4462
}
4563

64+
/// Horizontally add the adjacent pairs of values contained in 2 packed
65+
/// 128-bit vectors of [8 x i16].
66+
#[inline(always)]
67+
#[target_feature = "+ssse3"]
68+
#[cfg_attr(test, assert_instr(phaddw))]
69+
pub unsafe fn _mm_hadd_epi16(a: i16x8, b: i16x8) -> i16x8 {
70+
phaddw128(a, b)
71+
}
72+
73+
/// Horizontally add the adjacent pairs of values contained in 2 packed
74+
/// 128-bit vectors of [8 x i16]. Positive sums greater than 7FFFh are
75+
/// saturated to 7FFFh. Negative sums less than 8000h are saturated to 8000h.
76+
#[inline(always)]
77+
#[target_feature = "+ssse3"]
78+
#[cfg_attr(test, assert_instr(phaddsw))]
79+
pub unsafe fn _mm_hadds_epi16(a: i16x8, b: i16x8) -> i16x8 {
80+
phaddsw128(a, b)
81+
}
82+
83+
/// Horizontally add the adjacent pairs of values contained in 2 packed
84+
/// 128-bit vectors of [4 x i32].
85+
#[inline(always)]
86+
#[target_feature = "+ssse3"]
87+
#[cfg_attr(test, assert_instr(phaddd))]
88+
pub unsafe fn _mm_hadd_epi32(a: i32x4, b: i32x4) -> i32x4 {
89+
phaddd128(a, b)
90+
}
91+
92+
/// Horizontally subtract the adjacent pairs of values contained in 2
93+
/// packed 128-bit vectors of [8 x i16].
94+
#[inline(always)]
95+
#[target_feature = "+ssse3"]
96+
#[cfg_attr(test, assert_instr(phsubw))]
97+
pub unsafe fn _mm_hsub_epi16(a: i16x8, b: i16x8) -> i16x8 {
98+
phsubw128(a, b)
99+
}
100+
101+
/// Horizontally subtract the adjacent pairs of values contained in 2
102+
/// packed 128-bit vectors of [8 x i16]. Positive differences greater than
103+
/// 7FFFh are saturated to 7FFFh. Negative differences less than 8000h are
104+
/// saturated to 8000h.
105+
#[inline(always)]
106+
#[target_feature = "+ssse3"]
107+
#[cfg_attr(test, assert_instr(phsubsw))]
108+
pub unsafe fn _mm_hsubs_epi16(a: i16x8, b: i16x8) -> i16x8 {
109+
phsubsw128(a, b)
110+
}
111+
112+
/// Horizontally subtract the adjacent pairs of values contained in 2
113+
/// packed 128-bit vectors of [4 x i32].
114+
#[inline(always)]
115+
#[target_feature = "+ssse3"]
116+
#[cfg_attr(test, assert_instr(phsubd))]
117+
pub unsafe fn _mm_hsub_epi32(a: i32x4, b: i32x4) -> i32x4 {
118+
phsubd128(a, b)
119+
}
120+
121+
/// Multiply corresponding pairs of packed 8-bit unsigned integer
122+
/// values contained in the first source operand and packed 8-bit signed
123+
/// integer values contained in the second source operand, add pairs of
124+
/// contiguous products with signed saturation, and writes the 16-bit sums to
125+
/// the corresponding bits in the destination.
126+
#[inline(always)]
127+
#[target_feature = "+ssse3"]
128+
#[cfg_attr(test, assert_instr(pmaddubsw))]
129+
pub unsafe fn _mm_maddubs_epi16(a: u8x16, b: i8x16) -> i16x8 {
130+
pmaddubsw128(a, b)
131+
}
132+
133+
/// Multiply packed 16-bit signed integer values, truncate the 32-bit
134+
/// product to the 18 most significant bits by right-shifting, round the
135+
/// truncated value by adding 1, and write bits [16:1] to the destination.
136+
#[inline(always)]
137+
#[target_feature = "+ssse3"]
138+
#[cfg_attr(test, assert_instr(pmulhrsw))]
139+
pub unsafe fn _mm_mulhrs_epi16(a: i16x8, b: i16x8) -> i16x8 {
140+
pmulhrsw128(a, b)
141+
}
142+
143+
/// Negate packed 8-bit integers in `a` when the corresponding signed 8-bit
144+
/// integer in `b` is negative, and return the result.
145+
/// Elements in result are zeroed out when the corresponding element in `b`
146+
/// is zero.
147+
#[inline(always)]
148+
#[target_feature = "+ssse3"]
149+
#[cfg_attr(test, assert_instr(psignb))]
150+
pub unsafe fn _mm_sign_epi8(a: i8x16, b: i8x16) -> i8x16 {
151+
psignb128(a, b)
152+
}
153+
154+
/// Negate packed 16-bit integers in `a` when the corresponding signed 16-bit
155+
/// integer in `b` is negative, and return the results.
156+
/// Elements in result are zeroed out when the corresponding element in `b`
157+
/// is zero.
158+
#[inline(always)]
159+
#[target_feature = "+ssse3"]
160+
#[cfg_attr(test, assert_instr(psignw))]
161+
pub unsafe fn _mm_sign_epi16(a: i16x8, b: i16x8) -> i16x8 {
162+
psignw128(a, b)
163+
}
164+
165+
/// Negate packed 32-bit integers in `a` when the corresponding signed 32-bit
166+
/// integer in `b` is negative, and return the results.
167+
/// Element in result are zeroed out when the corresponding element in `b`
168+
/// is zero.
169+
#[inline(always)]
170+
#[target_feature = "+ssse3"]
171+
#[cfg_attr(test, assert_instr(psignd))]
172+
pub unsafe fn _mm_sign_epi32(a: i32x4, b: i32x4) -> i32x4 {
173+
psignd128(a, b)
174+
}
46175

47176
#[allow(improper_ctypes)]
48177
extern {
49178
#[link_name = "llvm.x86.ssse3.pabs.b.128"]
50179
fn pabsb128(a: i8x16) -> u8x16;
180+
181+
#[link_name = "llvm.x86.ssse3.pabs.w.128"]
182+
fn pabsw128(a: i16x8) -> u16x8;
183+
184+
#[link_name = "llvm.x86.ssse3.pabs.d.128"]
185+
fn pabsd128(a: i32x4) -> u32x4;
186+
51187
#[link_name = "llvm.x86.ssse3.pshuf.b.128"]
52188
fn pshufb128(a: u8x16, b: u8x16) -> u8x16;
189+
190+
#[link_name = "llvm.x86.ssse3.phadd.w.128"]
191+
fn phaddw128(a: i16x8, b: i16x8) -> i16x8;
192+
193+
#[link_name = "llvm.x86.ssse3.phadd.sw.128"]
194+
fn phaddsw128(a: i16x8, b: i16x8) -> i16x8;
195+
196+
#[link_name = "llvm.x86.ssse3.phadd.d.128"]
197+
fn phaddd128(a: i32x4, b: i32x4) -> i32x4;
198+
199+
#[link_name = "llvm.x86.ssse3.phsub.w.128"]
200+
fn phsubw128(a: i16x8, b: i16x8) -> i16x8;
201+
202+
#[link_name = "llvm.x86.ssse3.phsub.sw.128"]
203+
fn phsubsw128(a: i16x8, b: i16x8) -> i16x8;
204+
205+
#[link_name = "llvm.x86.ssse3.phsub.d.128"]
206+
fn phsubd128(a: i32x4, b: i32x4) -> i32x4;
207+
208+
#[link_name = "llvm.x86.ssse3.pmadd.ub.sw.128"]
209+
fn pmaddubsw128(a: u8x16, b: i8x16) -> i16x8;
210+
211+
#[link_name = "llvm.x86.ssse3.pmul.hr.sw.128"]
212+
fn pmulhrsw128(a: i16x8, b: i16x8) -> i16x8;
213+
214+
#[link_name = "llvm.x86.ssse3.psign.b.128"]
215+
fn psignb128(a: i8x16, b: i8x16) -> i8x16;
216+
217+
#[link_name = "llvm.x86.ssse3.psign.w.128"]
218+
fn psignw128(a: i16x8, b: i16x8) -> i16x8;
219+
220+
#[link_name = "llvm.x86.ssse3.psign.d.128"]
221+
fn psignd128(a: i32x4, b: i32x4) -> i32x4;
53222
}
54223

55224
#[cfg(test)]
@@ -65,6 +234,18 @@ mod tests {
65234
assert_eq!(r, u8x16::splat(5));
66235
}
67236

237+
#[simd_test = "ssse3"]
238+
unsafe fn _mm_abs_epi16() {
239+
let r = ssse3::_mm_abs_epi16(i16x8::splat(-5));
240+
assert_eq!(r, u16x8::splat(5));
241+
}
242+
243+
#[simd_test = "ssse3"]
244+
unsafe fn _mm_abs_epi32() {
245+
let r = ssse3::_mm_abs_epi32(i32x4::splat(-5));
246+
assert_eq!(r, u32x4::splat(5));
247+
}
248+
68249
#[simd_test = "ssse3"]
69250
unsafe fn _mm_shuffle_epi8() {
70251
let a = u8x16::new(
@@ -88,4 +269,103 @@ mod tests {
88269
let r = ssse3::_mm_shuffle_epi8(a, b);
89270
assert_eq!(r, expected);
90271
}
272+
273+
#[simd_test = "ssse3"]
274+
unsafe fn _mm_hadd_epi16() {
275+
let a = i16x8::new(1, 2, 3, 4, 5, 6, 7, 8);
276+
let b = i16x8::new(4, 128, 4, 3, 24, 12, 6, 19);
277+
let expected = i16x8::new(3, 7, 11, 15, 132, 7, 36, 25);
278+
let r = ssse3::_mm_hadd_epi16(a, b);
279+
assert_eq!(r, expected);
280+
}
281+
282+
#[simd_test = "ssse3"]
283+
unsafe fn _mm_hadds_epi16() {
284+
let a = i16x8::new(1, 2, 3, 4, 5, 6, 7, 8);
285+
let b = i16x8::new(4, 128, 4, 3, 32767, 1, -32768, -1);
286+
let expected = i16x8::new(3, 7, 11, 15, 132, 7, 32767, -32768);
287+
let r = ssse3::_mm_hadds_epi16(a, b);
288+
assert_eq!(r, expected);
289+
}
290+
291+
#[simd_test = "ssse3"]
292+
unsafe fn _mm_hadd_epi32() {
293+
let a = i32x4::new(1, 2, 3, 4);
294+
let b = i32x4::new(4, 128, 4, 3);
295+
let expected = i32x4::new(3, 7, 132, 7);
296+
let r = ssse3::_mm_hadd_epi32(a, b);
297+
assert_eq!(r, expected);
298+
}
299+
300+
#[simd_test = "ssse3"]
301+
unsafe fn _mm_hsub_epi16() {
302+
let a = i16x8::new(1, 2, 3, 4, 5, 6, 7, 8);
303+
let b = i16x8::new(4, 128, 4, 3, 24, 12, 6, 19);
304+
let expected = i16x8::new(-1, -1, -1, -1, -124, 1, 12, -13);
305+
let r = ssse3::_mm_hsub_epi16(a, b);
306+
assert_eq!(r, expected);
307+
}
308+
309+
#[simd_test = "ssse3"]
310+
unsafe fn _mm_hsubs_epi16() {
311+
let a = i16x8::new(1, 2, 3, 4, 5, 6, 7, 8);
312+
let b = i16x8::new(4, 128, 4, 3, 32767, -1, -32768, 1);
313+
let expected = i16x8::new(-1, -1, -1, -1, -124, 1, 32767, -32768);
314+
let r = ssse3::_mm_hsubs_epi16(a, b);
315+
assert_eq!(r, expected);
316+
}
317+
318+
#[simd_test = "ssse3"]
319+
unsafe fn _mm_hsub_epi32() {
320+
let a = i32x4::new(1, 2, 3, 4);
321+
let b = i32x4::new(4, 128, 4, 3);
322+
let expected = i32x4::new(-1, -1, -124, 1);
323+
let r = ssse3::_mm_hsub_epi32(a, b);
324+
assert_eq!(r, expected);
325+
}
326+
327+
#[simd_test = "ssse3"]
328+
unsafe fn _mm_maddubs_epi16() {
329+
let a = u8x16::new(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16);
330+
let b = i8x16::new(4, 63, 4, 3, 24, 12, 6, 19, 12, 5, 5, 10, 4, 1, 8, 0);
331+
let expected = i16x8::new(130, 24, 192, 194, 158, 175, 66, 120);
332+
let r = ssse3::_mm_maddubs_epi16(a, b);
333+
assert_eq!(r, expected);
334+
}
335+
336+
#[simd_test = "ssse3"]
337+
unsafe fn _mm_mulhrs_epi16() {
338+
let a = i16x8::new(1, 2, 3, 4, 5, 6, 7, 8);
339+
let b = i16x8::new(4, 128, 4, 3, 32767, -1, -32768, 1);
340+
let expected = i16x8::new(0, 0, 0, 0, 5, 0, -7, 0);
341+
let r = ssse3::_mm_mulhrs_epi16(a, b);
342+
assert_eq!(r, expected);
343+
}
344+
345+
#[simd_test = "ssse3"]
346+
unsafe fn _mm_sign_epi8() {
347+
let a = i8x16::new(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, -14, -15, 16);
348+
let b = i8x16::new(4, 63, -4, 3, 24, 12, -6, -19, 12, 5, -5, 10, 4, 1, -8, 0);
349+
let expected = i8x16::new(1, 2, -3, 4, 5, 6, -7, -8, 9, 10, -11, 12, 13, -14, 15, 0);
350+
let r = ssse3::_mm_sign_epi8(a, b);
351+
assert_eq!(r, expected);
352+
}
353+
354+
#[simd_test = "ssse3"]
355+
unsafe fn _mm_sign_epi16() {
356+
let a = i16x8::new(1, 2, 3, 4, -5, -6, 7, 8);
357+
let b = i16x8::new(4, 128, 0, 3, 1, -1, -2, 1);
358+
let expected = i16x8::new(1, 2, 0, 4, -5, 6, -7, 8);
359+
let r = ssse3::_mm_sign_epi16(a, b);
360+
assert_eq!(r, expected);
361+
}
362+
363+
#[simd_test = "ssse3"]
364+
unsafe fn _mm_sign_epi32() {
365+
let a = i32x4::new(-1, 2, 3, 4);
366+
let b = i32x4::new(1, -1, 1, 0);
367+
let expected = i32x4::new(-1, -2, 3, 0);
368+
let r = ssse3::_mm_sign_epi32(a, b);
369+
assert_eq!(r, expected);
370+
}
91371
}

0 commit comments

Comments
 (0)