@@ -12,6 +12,24 @@ pub unsafe fn _mm_abs_epi8(a: i8x16) -> u8x16 {
12
12
pabsb128 ( a)
13
13
}
14
14
15
+ /// Compute the absolute value of each of the packed 16-bit signed integers in `a` and
16
+ /// return the 16-bit unsigned integer
17
+ #[ inline( always) ]
18
+ #[ target_feature = "+ssse3" ]
19
+ #[ cfg_attr( test, assert_instr( pabsw) ) ]
20
+ pub unsafe fn _mm_abs_epi16 ( a : i16x8 ) -> u16x8 {
21
+ pabsw128 ( a)
22
+ }
23
+
24
+ /// Compute the absolute value of each of the packed 32-bit signed integers in `a` and
25
+ /// return the 32-bit unsigned integer
26
+ #[ inline( always) ]
27
+ #[ target_feature = "+ssse3" ]
28
+ #[ cfg_attr( test, assert_instr( pabsd) ) ]
29
+ pub unsafe fn _mm_abs_epi32 ( a : i32x4 ) -> u32x4 {
30
+ pabsd128 ( a)
31
+ }
32
+
15
33
/// Shuffle bytes from `a` according to the content of `b`.
16
34
///
17
35
/// The last 4 bits of each byte of `b` are used as addresses
@@ -43,13 +61,164 @@ pub unsafe fn _mm_shuffle_epi8(a: u8x16, b: u8x16) -> u8x16 {
43
61
pshufb128 ( a, b)
44
62
}
45
63
64
+ /// Horizontally add the adjacent pairs of values contained in 2 packed
65
+ /// 128-bit vectors of [8 x i16].
66
+ #[ inline( always) ]
67
+ #[ target_feature = "+ssse3" ]
68
+ #[ cfg_attr( test, assert_instr( phaddw) ) ]
69
+ pub unsafe fn _mm_hadd_epi16 ( a : i16x8 , b : i16x8 ) -> i16x8 {
70
+ phaddw128 ( a, b)
71
+ }
72
+
73
+ /// Horizontally add the adjacent pairs of values contained in 2 packed
74
+ /// 128-bit vectors of [8 x i16]. Positive sums greater than 7FFFh are
75
+ /// saturated to 7FFFh. Negative sums less than 8000h are saturated to 8000h.
76
+ #[ inline( always) ]
77
+ #[ target_feature = "+ssse3" ]
78
+ #[ cfg_attr( test, assert_instr( phaddsw) ) ]
79
+ pub unsafe fn _mm_hadds_epi16 ( a : i16x8 , b : i16x8 ) -> i16x8 {
80
+ phaddsw128 ( a, b)
81
+ }
82
+
83
+ /// Horizontally add the adjacent pairs of values contained in 2 packed
84
+ /// 128-bit vectors of [4 x i32].
85
+ #[ inline( always) ]
86
+ #[ target_feature = "+ssse3" ]
87
+ #[ cfg_attr( test, assert_instr( phaddd) ) ]
88
+ pub unsafe fn _mm_hadd_epi32 ( a : i32x4 , b : i32x4 ) -> i32x4 {
89
+ phaddd128 ( a, b)
90
+ }
91
+
92
+ /// Horizontally subtract the adjacent pairs of values contained in 2
93
+ /// packed 128-bit vectors of [8 x i16].
94
+ #[ inline( always) ]
95
+ #[ target_feature = "+ssse3" ]
96
+ #[ cfg_attr( test, assert_instr( phsubw) ) ]
97
+ pub unsafe fn _mm_hsub_epi16 ( a : i16x8 , b : i16x8 ) -> i16x8 {
98
+ phsubw128 ( a, b)
99
+ }
100
+
101
+ /// Horizontally subtract the adjacent pairs of values contained in 2
102
+ /// packed 128-bit vectors of [8 x i16]. Positive differences greater than
103
+ /// 7FFFh are saturated to 7FFFh. Negative differences less than 8000h are
104
+ /// saturated to 8000h.
105
+ #[ inline( always) ]
106
+ #[ target_feature = "+ssse3" ]
107
+ #[ cfg_attr( test, assert_instr( phsubsw) ) ]
108
+ pub unsafe fn _mm_hsubs_epi16 ( a : i16x8 , b : i16x8 ) -> i16x8 {
109
+ phsubsw128 ( a, b)
110
+ }
111
+
112
+ /// Horizontally subtract the adjacent pairs of values contained in 2
113
+ /// packed 128-bit vectors of [4 x i32].
114
+ #[ inline( always) ]
115
+ #[ target_feature = "+ssse3" ]
116
+ #[ cfg_attr( test, assert_instr( phsubd) ) ]
117
+ pub unsafe fn _mm_hsub_epi32 ( a : i32x4 , b : i32x4 ) -> i32x4 {
118
+ phsubd128 ( a, b)
119
+ }
120
+
121
+ /// Multiply corresponding pairs of packed 8-bit unsigned integer
122
+ /// values contained in the first source operand and packed 8-bit signed
123
+ /// integer values contained in the second source operand, add pairs of
124
+ /// contiguous products with signed saturation, and writes the 16-bit sums to
125
+ /// the corresponding bits in the destination.
126
+ #[ inline( always) ]
127
+ #[ target_feature = "+ssse3" ]
128
+ #[ cfg_attr( test, assert_instr( pmaddubsw) ) ]
129
+ pub unsafe fn _mm_maddubs_epi16 ( a : u8x16 , b : i8x16 ) -> i16x8 {
130
+ pmaddubsw128 ( a, b)
131
+ }
132
+
133
+ /// Multiply packed 16-bit signed integer values, truncate the 32-bit
134
+ /// product to the 18 most significant bits by right-shifting, round the
135
+ /// truncated value by adding 1, and write bits [16:1] to the destination.
136
+ #[ inline( always) ]
137
+ #[ target_feature = "+ssse3" ]
138
+ #[ cfg_attr( test, assert_instr( pmulhrsw) ) ]
139
+ pub unsafe fn _mm_mulhrs_epi16 ( a : i16x8 , b : i16x8 ) -> i16x8 {
140
+ pmulhrsw128 ( a, b)
141
+ }
142
+
143
+ /// Negate packed 8-bit integers in `a` when the corresponding signed 8-bit
144
+ /// integer in `b` is negative, and return the result.
145
+ /// Elements in result are zeroed out when the corresponding element in `b`
146
+ /// is zero.
147
+ #[ inline( always) ]
148
+ #[ target_feature = "+ssse3" ]
149
+ #[ cfg_attr( test, assert_instr( psignb) ) ]
150
+ pub unsafe fn _mm_sign_epi8 ( a : i8x16 , b : i8x16 ) -> i8x16 {
151
+ psignb128 ( a, b)
152
+ }
153
+
154
+ /// Negate packed 16-bit integers in `a` when the corresponding signed 16-bit
155
+ /// integer in `b` is negative, and return the results.
156
+ /// Elements in result are zeroed out when the corresponding element in `b`
157
+ /// is zero.
158
+ #[ inline( always) ]
159
+ #[ target_feature = "+ssse3" ]
160
+ #[ cfg_attr( test, assert_instr( psignw) ) ]
161
+ pub unsafe fn _mm_sign_epi16 ( a : i16x8 , b : i16x8 ) -> i16x8 {
162
+ psignw128 ( a, b)
163
+ }
164
+
165
+ /// Negate packed 32-bit integers in `a` when the corresponding signed 32-bit
166
+ /// integer in `b` is negative, and return the results.
167
+ /// Element in result are zeroed out when the corresponding element in `b`
168
+ /// is zero.
169
+ #[ inline( always) ]
170
+ #[ target_feature = "+ssse3" ]
171
+ #[ cfg_attr( test, assert_instr( psignd) ) ]
172
+ pub unsafe fn _mm_sign_epi32 ( a : i32x4 , b : i32x4 ) -> i32x4 {
173
+ psignd128 ( a, b)
174
+ }
46
175
47
176
#[ allow( improper_ctypes) ]
48
177
extern {
49
178
#[ link_name = "llvm.x86.ssse3.pabs.b.128" ]
50
179
fn pabsb128 ( a : i8x16 ) -> u8x16 ;
180
+
181
+ #[ link_name = "llvm.x86.ssse3.pabs.w.128" ]
182
+ fn pabsw128 ( a : i16x8 ) -> u16x8 ;
183
+
184
+ #[ link_name = "llvm.x86.ssse3.pabs.d.128" ]
185
+ fn pabsd128 ( a : i32x4 ) -> u32x4 ;
186
+
51
187
#[ link_name = "llvm.x86.ssse3.pshuf.b.128" ]
52
188
fn pshufb128 ( a : u8x16 , b : u8x16 ) -> u8x16 ;
189
+
190
+ #[ link_name = "llvm.x86.ssse3.phadd.w.128" ]
191
+ fn phaddw128 ( a : i16x8 , b : i16x8 ) -> i16x8 ;
192
+
193
+ #[ link_name = "llvm.x86.ssse3.phadd.sw.128" ]
194
+ fn phaddsw128 ( a : i16x8 , b : i16x8 ) -> i16x8 ;
195
+
196
+ #[ link_name = "llvm.x86.ssse3.phadd.d.128" ]
197
+ fn phaddd128 ( a : i32x4 , b : i32x4 ) -> i32x4 ;
198
+
199
+ #[ link_name = "llvm.x86.ssse3.phsub.w.128" ]
200
+ fn phsubw128 ( a : i16x8 , b : i16x8 ) -> i16x8 ;
201
+
202
+ #[ link_name = "llvm.x86.ssse3.phsub.sw.128" ]
203
+ fn phsubsw128 ( a : i16x8 , b : i16x8 ) -> i16x8 ;
204
+
205
+ #[ link_name = "llvm.x86.ssse3.phsub.d.128" ]
206
+ fn phsubd128 ( a : i32x4 , b : i32x4 ) -> i32x4 ;
207
+
208
+ #[ link_name = "llvm.x86.ssse3.pmadd.ub.sw.128" ]
209
+ fn pmaddubsw128 ( a : u8x16 , b : i8x16 ) -> i16x8 ;
210
+
211
+ #[ link_name = "llvm.x86.ssse3.pmul.hr.sw.128" ]
212
+ fn pmulhrsw128 ( a : i16x8 , b : i16x8 ) -> i16x8 ;
213
+
214
+ #[ link_name = "llvm.x86.ssse3.psign.b.128" ]
215
+ fn psignb128 ( a : i8x16 , b : i8x16 ) -> i8x16 ;
216
+
217
+ #[ link_name = "llvm.x86.ssse3.psign.w.128" ]
218
+ fn psignw128 ( a : i16x8 , b : i16x8 ) -> i16x8 ;
219
+
220
+ #[ link_name = "llvm.x86.ssse3.psign.d.128" ]
221
+ fn psignd128 ( a : i32x4 , b : i32x4 ) -> i32x4 ;
53
222
}
54
223
55
224
#[ cfg( test) ]
@@ -65,6 +234,18 @@ mod tests {
65
234
assert_eq ! ( r, u8x16:: splat( 5 ) ) ;
66
235
}
67
236
237
+ #[ simd_test = "ssse3" ]
238
+ unsafe fn _mm_abs_epi16 ( ) {
239
+ let r = ssse3:: _mm_abs_epi16 ( i16x8:: splat ( -5 ) ) ;
240
+ assert_eq ! ( r, u16x8:: splat( 5 ) ) ;
241
+ }
242
+
243
+ #[ simd_test = "ssse3" ]
244
+ unsafe fn _mm_abs_epi32 ( ) {
245
+ let r = ssse3:: _mm_abs_epi32 ( i32x4:: splat ( -5 ) ) ;
246
+ assert_eq ! ( r, u32x4:: splat( 5 ) ) ;
247
+ }
248
+
68
249
#[ simd_test = "ssse3" ]
69
250
unsafe fn _mm_shuffle_epi8 ( ) {
70
251
let a = u8x16:: new (
@@ -88,4 +269,103 @@ mod tests {
88
269
let r = ssse3:: _mm_shuffle_epi8 ( a, b) ;
89
270
assert_eq ! ( r, expected) ;
90
271
}
272
+
273
+ #[ simd_test = "ssse3" ]
274
+ unsafe fn _mm_hadd_epi16 ( ) {
275
+ let a = i16x8:: new ( 1 , 2 , 3 , 4 , 5 , 6 , 7 , 8 ) ;
276
+ let b = i16x8:: new ( 4 , 128 , 4 , 3 , 24 , 12 , 6 , 19 ) ;
277
+ let expected = i16x8:: new ( 3 , 7 , 11 , 15 , 132 , 7 , 36 , 25 ) ;
278
+ let r = ssse3:: _mm_hadd_epi16 ( a, b) ;
279
+ assert_eq ! ( r, expected) ;
280
+ }
281
+
282
+ #[ simd_test = "ssse3" ]
283
+ unsafe fn _mm_hadds_epi16 ( ) {
284
+ let a = i16x8:: new ( 1 , 2 , 3 , 4 , 5 , 6 , 7 , 8 ) ;
285
+ let b = i16x8:: new ( 4 , 128 , 4 , 3 , 32767 , 1 , -32768 , -1 ) ;
286
+ let expected = i16x8:: new ( 3 , 7 , 11 , 15 , 132 , 7 , 32767 , -32768 ) ;
287
+ let r = ssse3:: _mm_hadds_epi16 ( a, b) ;
288
+ assert_eq ! ( r, expected) ;
289
+ }
290
+
291
+ #[ simd_test = "ssse3" ]
292
+ unsafe fn _mm_hadd_epi32 ( ) {
293
+ let a = i32x4:: new ( 1 , 2 , 3 , 4 ) ;
294
+ let b = i32x4:: new ( 4 , 128 , 4 , 3 ) ;
295
+ let expected = i32x4:: new ( 3 , 7 , 132 , 7 ) ;
296
+ let r = ssse3:: _mm_hadd_epi32 ( a, b) ;
297
+ assert_eq ! ( r, expected) ;
298
+ }
299
+
300
+ #[ simd_test = "ssse3" ]
301
+ unsafe fn _mm_hsub_epi16 ( ) {
302
+ let a = i16x8:: new ( 1 , 2 , 3 , 4 , 5 , 6 , 7 , 8 ) ;
303
+ let b = i16x8:: new ( 4 , 128 , 4 , 3 , 24 , 12 , 6 , 19 ) ;
304
+ let expected = i16x8:: new ( -1 , -1 , -1 , -1 , -124 , 1 , 12 , -13 ) ;
305
+ let r = ssse3:: _mm_hsub_epi16 ( a, b) ;
306
+ assert_eq ! ( r, expected) ;
307
+ }
308
+
309
+ #[ simd_test = "ssse3" ]
310
+ unsafe fn _mm_hsubs_epi16 ( ) {
311
+ let a = i16x8:: new ( 1 , 2 , 3 , 4 , 5 , 6 , 7 , 8 ) ;
312
+ let b = i16x8:: new ( 4 , 128 , 4 , 3 , 32767 , -1 , -32768 , 1 ) ;
313
+ let expected = i16x8:: new ( -1 , -1 , -1 , -1 , -124 , 1 , 32767 , -32768 ) ;
314
+ let r = ssse3:: _mm_hsubs_epi16 ( a, b) ;
315
+ assert_eq ! ( r, expected) ;
316
+ }
317
+
318
+ #[ simd_test = "ssse3" ]
319
+ unsafe fn _mm_hsub_epi32 ( ) {
320
+ let a = i32x4:: new ( 1 , 2 , 3 , 4 ) ;
321
+ let b = i32x4:: new ( 4 , 128 , 4 , 3 ) ;
322
+ let expected = i32x4:: new ( -1 , -1 , -124 , 1 ) ;
323
+ let r = ssse3:: _mm_hsub_epi32 ( a, b) ;
324
+ assert_eq ! ( r, expected) ;
325
+ }
326
+
327
+ #[ simd_test = "ssse3" ]
328
+ unsafe fn _mm_maddubs_epi16 ( ) {
329
+ let a = u8x16:: new ( 1 , 2 , 3 , 4 , 5 , 6 , 7 , 8 , 9 , 10 , 11 , 12 , 13 , 14 , 15 , 16 ) ;
330
+ let b = i8x16:: new ( 4 , 63 , 4 , 3 , 24 , 12 , 6 , 19 , 12 , 5 , 5 , 10 , 4 , 1 , 8 , 0 ) ;
331
+ let expected = i16x8:: new ( 130 , 24 , 192 , 194 , 158 , 175 , 66 , 120 ) ;
332
+ let r = ssse3:: _mm_maddubs_epi16 ( a, b) ;
333
+ assert_eq ! ( r, expected) ;
334
+ }
335
+
336
+ #[ simd_test = "ssse3" ]
337
+ unsafe fn _mm_mulhrs_epi16 ( ) {
338
+ let a = i16x8:: new ( 1 , 2 , 3 , 4 , 5 , 6 , 7 , 8 ) ;
339
+ let b = i16x8:: new ( 4 , 128 , 4 , 3 , 32767 , -1 , -32768 , 1 ) ;
340
+ let expected = i16x8:: new ( 0 , 0 , 0 , 0 , 5 , 0 , -7 , 0 ) ;
341
+ let r = ssse3:: _mm_mulhrs_epi16 ( a, b) ;
342
+ assert_eq ! ( r, expected) ;
343
+ }
344
+
345
+ #[ simd_test = "ssse3" ]
346
+ unsafe fn _mm_sign_epi8 ( ) {
347
+ let a = i8x16:: new ( 1 , 2 , 3 , 4 , 5 , 6 , 7 , 8 , 9 , 10 , 11 , 12 , 13 , -14 , -15 , 16 ) ;
348
+ let b = i8x16:: new ( 4 , 63 , -4 , 3 , 24 , 12 , -6 , -19 , 12 , 5 , -5 , 10 , 4 , 1 , -8 , 0 ) ;
349
+ let expected = i8x16:: new ( 1 , 2 , -3 , 4 , 5 , 6 , -7 , -8 , 9 , 10 , -11 , 12 , 13 , -14 , 15 , 0 ) ;
350
+ let r = ssse3:: _mm_sign_epi8 ( a, b) ;
351
+ assert_eq ! ( r, expected) ;
352
+ }
353
+
354
+ #[ simd_test = "ssse3" ]
355
+ unsafe fn _mm_sign_epi16 ( ) {
356
+ let a = i16x8:: new ( 1 , 2 , 3 , 4 , -5 , -6 , 7 , 8 ) ;
357
+ let b = i16x8:: new ( 4 , 128 , 0 , 3 , 1 , -1 , -2 , 1 ) ;
358
+ let expected = i16x8:: new ( 1 , 2 , 0 , 4 , -5 , 6 , -7 , 8 ) ;
359
+ let r = ssse3:: _mm_sign_epi16 ( a, b) ;
360
+ assert_eq ! ( r, expected) ;
361
+ }
362
+
363
+ #[ simd_test = "ssse3" ]
364
+ unsafe fn _mm_sign_epi32 ( ) {
365
+ let a = i32x4:: new ( -1 , 2 , 3 , 4 ) ;
366
+ let b = i32x4:: new ( 1 , -1 , 1 , 0 ) ;
367
+ let expected = i32x4:: new ( -1 , -2 , 3 , 0 ) ;
368
+ let r = ssse3:: _mm_sign_epi32 ( a, b) ;
369
+ assert_eq ! ( r, expected) ;
370
+ }
91
371
}
0 commit comments