ssse3 (rust-lang#68)

gwenn · alexcrichton · commit e422a8abf0a8 · 2017-09-28T14:10:40.000-05:00
* SSSE3: _mm_abs_epi16, _mm_abs_epi32, _mm_hadd_epi16

* SSSE3: _mm_hadds_epi16

* SSSE3: assert_instr

* SSSE3: _mm_hadd_epi32

* SSSE3: _mm_hsub_epi16

* SSSE3: _mm_hsubs_epi16

* SSSE3: _mm_hsub_epi32

* SSSE3: _mm_maddubs_epi16

* SSSE3: _mm_mulhrs_epi16

* SSSE3: _mm_sign_epi8

* SSSE3: _mm_sign_epi32

* SSSE3: _mm_sign_epi32

* SSSE3: Fix assert_instr
diff --git a/src/x86/ssse3.rs b/src/x86/ssse3.rs
@@ -12,6 +12,24 @@ pub unsafe fn _mm_abs_epi8(a: i8x16) -> u8x16 {
     pabsb128(a)
 }
 
+/// Compute the absolute value of each of the packed 16-bit signed integers in `a` and
+/// return the 16-bit unsigned integer
+#[inline(always)]
+#[target_feature = "+ssse3"]
+#[cfg_attr(test, assert_instr(pabsw))]
+pub unsafe fn _mm_abs_epi16(a: i16x8) -> u16x8 {
+    pabsw128(a)
+}
+
+/// Compute the absolute value of each of the packed 32-bit signed integers in `a` and
+/// return the 32-bit unsigned integer
+#[inline(always)]
+#[target_feature = "+ssse3"]
+#[cfg_attr(test, assert_instr(pabsd))]
+pub unsafe fn _mm_abs_epi32(a: i32x4) -> u32x4 {
+    pabsd128(a)
+}
+
 /// Shuffle bytes from `a` according to the content of `b`.
 ///
 /// The last 4 bits of each byte of `b` are used as addresses
@@ -43,13 +61,164 @@ pub unsafe fn _mm_shuffle_epi8(a: u8x16, b: u8x16) -> u8x16 {
     pshufb128(a, b)
 }
 
+/// Horizontally add the adjacent pairs of values contained in 2 packed
+/// 128-bit vectors of [8 x i16].
+#[inline(always)]
+#[target_feature = "+ssse3"]
+#[cfg_attr(test, assert_instr(phaddw))]
+pub unsafe fn _mm_hadd_epi16(a: i16x8, b: i16x8) -> i16x8 {
+    phaddw128(a, b)
+}
+
+/// Horizontally add the adjacent pairs of values contained in 2 packed
+/// 128-bit vectors of [8 x i16]. Positive sums greater than 7FFFh are
+/// saturated to 7FFFh. Negative sums less than 8000h are saturated to 8000h.
+#[inline(always)]
+#[target_feature = "+ssse3"]
+#[cfg_attr(test, assert_instr(phaddsw))]
+pub unsafe fn _mm_hadds_epi16(a: i16x8, b: i16x8) -> i16x8 {
+    phaddsw128(a, b)
+}
+
+/// Horizontally add the adjacent pairs of values contained in 2 packed
+/// 128-bit vectors of [4 x i32].
+#[inline(always)]
+#[target_feature = "+ssse3"]
+#[cfg_attr(test, assert_instr(phaddd))]
+pub unsafe fn _mm_hadd_epi32(a: i32x4, b: i32x4) -> i32x4 {
+    phaddd128(a, b)
+}
+
+/// Horizontally subtract the adjacent pairs of values contained in 2
+/// packed 128-bit vectors of [8 x i16].
+#[inline(always)]
+#[target_feature = "+ssse3"]
+#[cfg_attr(test, assert_instr(phsubw))]
+pub unsafe fn _mm_hsub_epi16(a: i16x8, b: i16x8) -> i16x8 {
+    phsubw128(a, b)
+}
+
+/// Horizontally subtract the adjacent pairs of values contained in 2
+/// packed 128-bit vectors of [8 x i16]. Positive differences greater than
+/// 7FFFh are saturated to 7FFFh. Negative differences less than 8000h are
+/// saturated to 8000h.
+#[inline(always)]
+#[target_feature = "+ssse3"]
+#[cfg_attr(test, assert_instr(phsubsw))]
+pub unsafe fn _mm_hsubs_epi16(a: i16x8, b: i16x8) -> i16x8 {
+    phsubsw128(a, b)
+}
+
+/// Horizontally subtract the adjacent pairs of values contained in 2
+/// packed 128-bit vectors of [4 x i32].
+#[inline(always)]
+#[target_feature = "+ssse3"]
+#[cfg_attr(test, assert_instr(phsubd))]
+pub unsafe fn _mm_hsub_epi32(a: i32x4, b: i32x4) -> i32x4 {
+    phsubd128(a, b)
+}
+
+/// Multiply corresponding pairs of packed 8-bit unsigned integer
+/// values contained in the first source operand and packed 8-bit signed
+/// integer values contained in the second source operand, add pairs of
+/// contiguous products with signed saturation, and writes the 16-bit sums to
+/// the corresponding bits in the destination.
+#[inline(always)]
+#[target_feature = "+ssse3"]
+#[cfg_attr(test, assert_instr(pmaddubsw))]
+pub unsafe fn _mm_maddubs_epi16(a: u8x16, b: i8x16) -> i16x8 {
+    pmaddubsw128(a, b)
+}
+
+/// Multiply packed 16-bit signed integer values, truncate the 32-bit
+/// product to the 18 most significant bits by right-shifting, round the
+/// truncated value by adding 1, and write bits [16:1] to the destination.
+#[inline(always)]
+#[target_feature = "+ssse3"]
+#[cfg_attr(test, assert_instr(pmulhrsw))]
+pub unsafe fn _mm_mulhrs_epi16(a: i16x8, b: i16x8) -> i16x8 {
+    pmulhrsw128(a, b)
+}
+
+/// Negate packed 8-bit integers in `a` when the corresponding signed 8-bit
+/// integer in `b` is negative, and return the result.
+/// Elements in result are zeroed out when the corresponding element in `b`
+/// is zero.
+#[inline(always)]
+#[target_feature = "+ssse3"]
+#[cfg_attr(test, assert_instr(psignb))]
+pub unsafe fn _mm_sign_epi8(a: i8x16, b: i8x16) -> i8x16 {
+    psignb128(a, b)
+}
+
+/// Negate packed 16-bit integers in `a` when the corresponding signed 16-bit
+/// integer in `b` is negative, and return the results.
+/// Elements in result are zeroed out when the corresponding element in `b`
+/// is zero.
+#[inline(always)]
+#[target_feature = "+ssse3"]
+#[cfg_attr(test, assert_instr(psignw))]
+pub unsafe fn _mm_sign_epi16(a: i16x8, b: i16x8) -> i16x8 {
+    psignw128(a, b)
+}
+
+/// Negate packed 32-bit integers in `a` when the corresponding signed 32-bit
+/// integer in `b` is negative, and return the results.
+/// Element in result are zeroed out when the corresponding element in `b`
+/// is zero.
+#[inline(always)]
+#[target_feature = "+ssse3"]
+#[cfg_attr(test, assert_instr(psignd))]
+pub unsafe fn _mm_sign_epi32(a: i32x4, b: i32x4) -> i32x4 {
+    psignd128(a, b)
+}
 
 #[allow(improper_ctypes)]
 extern {
     #[link_name = "llvm.x86.ssse3.pabs.b.128"]
     fn pabsb128(a: i8x16) -> u8x16;
+
+    #[link_name = "llvm.x86.ssse3.pabs.w.128"]
+    fn pabsw128(a: i16x8) -> u16x8;
+
+    #[link_name = "llvm.x86.ssse3.pabs.d.128"]
+    fn pabsd128(a: i32x4) -> u32x4;
+
     #[link_name = "llvm.x86.ssse3.pshuf.b.128"]
     fn pshufb128(a: u8x16, b: u8x16) -> u8x16;
+
+    #[link_name = "llvm.x86.ssse3.phadd.w.128"]
+    fn phaddw128(a: i16x8, b: i16x8) -> i16x8;
+
+    #[link_name = "llvm.x86.ssse3.phadd.sw.128"]
+    fn phaddsw128(a: i16x8, b: i16x8) -> i16x8;
+
+    #[link_name = "llvm.x86.ssse3.phadd.d.128"]
+    fn phaddd128(a: i32x4, b: i32x4) -> i32x4;
+
+    #[link_name = "llvm.x86.ssse3.phsub.w.128"]
+    fn phsubw128(a: i16x8, b: i16x8) -> i16x8;
+
+    #[link_name = "llvm.x86.ssse3.phsub.sw.128"]
+    fn phsubsw128(a: i16x8, b: i16x8) -> i16x8;
+
+    #[link_name = "llvm.x86.ssse3.phsub.d.128"]
+    fn phsubd128(a: i32x4, b: i32x4) -> i32x4;
+
+    #[link_name = "llvm.x86.ssse3.pmadd.ub.sw.128"]
+    fn pmaddubsw128(a: u8x16, b: i8x16) -> i16x8;
+
+    #[link_name = "llvm.x86.ssse3.pmul.hr.sw.128"]
+    fn pmulhrsw128(a: i16x8, b: i16x8) -> i16x8;
+
+    #[link_name = "llvm.x86.ssse3.psign.b.128"]
+    fn psignb128(a: i8x16, b: i8x16) -> i8x16;
+
+    #[link_name = "llvm.x86.ssse3.psign.w.128"]
+    fn psignw128(a: i16x8, b: i16x8) -> i16x8;
+
+    #[link_name = "llvm.x86.ssse3.psign.d.128"]
+    fn psignd128(a: i32x4, b: i32x4) -> i32x4;
 }
 
 #[cfg(test)]
@@ -65,6 +234,18 @@ mod tests {
         assert_eq!(r, u8x16::splat(5));
     }
 
+    #[simd_test = "ssse3"]
+    unsafe fn _mm_abs_epi16() {
+        let r = ssse3::_mm_abs_epi16(i16x8::splat(-5));
+        assert_eq!(r, u16x8::splat(5));
+    }
+
+    #[simd_test = "ssse3"]
+    unsafe fn _mm_abs_epi32() {
+        let r = ssse3::_mm_abs_epi32(i32x4::splat(-5));
+        assert_eq!(r, u32x4::splat(5));
+    }
+
     #[simd_test = "ssse3"]
     unsafe fn _mm_shuffle_epi8() {
         let a = u8x16::new(
@@ -88,4 +269,103 @@ mod tests {
         let r = ssse3::_mm_shuffle_epi8(a, b);
         assert_eq!(r, expected);
     }
+
+    #[simd_test = "ssse3"]
+    unsafe fn _mm_hadd_epi16() {
+        let a = i16x8::new(1, 2, 3, 4, 5, 6, 7, 8);
+        let b = i16x8::new(4, 128, 4, 3, 24, 12, 6, 19);
+        let expected = i16x8::new(3, 7, 11, 15, 132, 7, 36, 25);
+        let r = ssse3::_mm_hadd_epi16(a, b);
+        assert_eq!(r, expected);
+    }
+
+    #[simd_test = "ssse3"]
+    unsafe fn _mm_hadds_epi16() {
+        let a = i16x8::new(1, 2, 3, 4, 5, 6, 7, 8);
+        let b = i16x8::new(4, 128, 4, 3, 32767, 1, -32768, -1);
+        let expected = i16x8::new(3, 7, 11, 15, 132, 7, 32767, -32768);
+        let r = ssse3::_mm_hadds_epi16(a, b);
+        assert_eq!(r, expected);
+    }
+
+    #[simd_test = "ssse3"]
+    unsafe fn _mm_hadd_epi32() {
+        let a = i32x4::new(1, 2, 3, 4);
+        let b = i32x4::new(4, 128, 4, 3);
+        let expected = i32x4::new(3, 7, 132, 7);
+        let r = ssse3::_mm_hadd_epi32(a, b);
+        assert_eq!(r, expected);
+    }
+
+    #[simd_test = "ssse3"]
+    unsafe fn _mm_hsub_epi16() {
+        let a = i16x8::new(1, 2, 3, 4, 5, 6, 7, 8);
+        let b = i16x8::new(4, 128, 4, 3, 24, 12, 6, 19);
+        let expected = i16x8::new(-1, -1, -1, -1, -124, 1, 12, -13);
+        let r = ssse3::_mm_hsub_epi16(a, b);
+        assert_eq!(r, expected);
+    }
+
+    #[simd_test = "ssse3"]
+    unsafe fn _mm_hsubs_epi16() {
+        let a = i16x8::new(1, 2, 3, 4, 5, 6, 7, 8);
+        let b = i16x8::new(4, 128, 4, 3, 32767, -1, -32768, 1);
+        let expected = i16x8::new(-1, -1, -1, -1, -124, 1, 32767, -32768);
+        let r = ssse3::_mm_hsubs_epi16(a, b);
+        assert_eq!(r, expected);
+    }
+
+    #[simd_test = "ssse3"]
+    unsafe fn _mm_hsub_epi32() {
+        let a = i32x4::new(1, 2, 3, 4);
+        let b = i32x4::new(4, 128, 4, 3);
+        let expected = i32x4::new(-1, -1, -124, 1);
+        let r = ssse3::_mm_hsub_epi32(a, b);
+        assert_eq!(r, expected);
+    }
+
+    #[simd_test = "ssse3"]
+    unsafe fn _mm_maddubs_epi16() {
+        let a = u8x16::new(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16);
+        let b = i8x16::new(4, 63, 4, 3, 24, 12, 6, 19, 12, 5, 5, 10, 4, 1, 8, 0);
+        let expected = i16x8::new(130, 24, 192, 194, 158, 175, 66, 120);
+        let r = ssse3::_mm_maddubs_epi16(a, b);
+        assert_eq!(r, expected);
+    }
+
+    #[simd_test = "ssse3"]
+    unsafe fn _mm_mulhrs_epi16() {
+        let a = i16x8::new(1, 2, 3, 4, 5, 6, 7, 8);
+        let b = i16x8::new(4, 128, 4, 3, 32767, -1, -32768, 1);
+        let expected = i16x8::new(0, 0, 0, 0, 5, 0, -7, 0);
+        let r = ssse3::_mm_mulhrs_epi16(a, b);
+        assert_eq!(r, expected);
+    }
+
+    #[simd_test = "ssse3"]
+    unsafe fn _mm_sign_epi8() {
+        let a = i8x16::new(1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, -14, -15, 16);
+        let b = i8x16::new(4, 63, -4, 3, 24, 12, -6, -19, 12, 5, -5, 10, 4, 1, -8, 0);
+        let expected = i8x16::new(1, 2, -3, 4, 5, 6, -7, -8, 9, 10, -11, 12, 13, -14, 15, 0);
+        let r = ssse3::_mm_sign_epi8(a, b);
+        assert_eq!(r, expected);
+    }
+
+    #[simd_test = "ssse3"]
+    unsafe fn _mm_sign_epi16() {
+        let a = i16x8::new(1, 2, 3, 4, -5, -6, 7, 8);
+        let b = i16x8::new(4, 128, 0, 3, 1, -1, -2, 1);
+        let expected = i16x8::new(1, 2, 0, 4, -5, 6, -7, 8);
+        let r = ssse3::_mm_sign_epi16(a, b);
+        assert_eq!(r, expected);
+    }
+
+    #[simd_test = "ssse3"]
+    unsafe fn _mm_sign_epi32() {
+        let a = i32x4::new(-1, 2, 3, 4);
+        let b = i32x4::new(1, -1, 1, 0);
+        let expected = i32x4::new(-1, -2, 3, 0);
+        let r = ssse3::_mm_sign_epi32(a, b);
+        assert_eq!(r, expected);
+    }
 }