Skip to content

Commit 531b928

Browse files
committed
Auto merge of #32140 - ruud-v-a:avx-intrinsics, r=alexcrichton
Add AVX broadcast and conversion intrinsics This adds the following intrinsics: * `_mm256_broadcast_pd` * `_mm256_broadcast_ps` * `_mm256_cvtepi32_pd` * `_mm256_cvtepi32_ps` * `_mm256_cvtpd_epi32` * `_mm256_cvtpd_ps` * `_mm256_cvtps_epi32` * `_mm256_cvtps_pd` * `_mm256_cvttpd_epi32` * `_mm256_cvttps_epi32` The "avx" codegen feature must be enabled to use these.
2 parents a2c56de + c306853 commit 531b928

File tree

2 files changed

+113
-0
lines changed
  • src

2 files changed

+113
-0
lines changed

src/etc/platform-intrinsics/x86/avx.json

Lines changed: 63 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,69 @@
88
"ret": "f(32-64)",
99
"args": ["0", "0"]
1010
},
11+
{
12+
"intrinsic": "256_broadcast_{0.data_type}",
13+
"width": [256],
14+
"llvm": "vbroadcastf128.{0.data_type}.256",
15+
"ret": "f(32-64)",
16+
"args": ["s8SPc"]
17+
},
18+
{
19+
"intrinsic": "256_cvtepi32_pd",
20+
"width": [256],
21+
"llvm": "cvtdq2.pd.256",
22+
"ret": "f64",
23+
"args": ["s32h"]
24+
},
25+
{
26+
"intrinsic": "256_cvtepi32_ps",
27+
"width": [256],
28+
"llvm": "cvtdq2.ps.256",
29+
"ret": "f32",
30+
"args": ["s32"]
31+
},
32+
{
33+
"intrinsic": "256_cvtpd_epi32",
34+
"width": [256],
35+
"llvm": "cvt.pd2dq.256",
36+
"ret": "s32h",
37+
"args": ["f64"]
38+
},
39+
{
40+
"intrinsic": "256_cvtpd_ps",
41+
"width": [256],
42+
"llvm": "cvt.pd2.ps.256",
43+
"ret": "f32h",
44+
"args": ["f64"]
45+
},
46+
{
47+
"intrinsic": "256_cvtps_epi32",
48+
"width": [256],
49+
"llvm": "cvt.ps2dq.256",
50+
"ret": "s32",
51+
"args": ["f32"]
52+
},
53+
{
54+
"intrinsic": "256_cvtps_pd",
55+
"width": [256],
56+
"llvm": "cvt.ps2.pd.256",
57+
"ret": "f64",
58+
"args": ["f32h"]
59+
},
60+
{
61+
"intrinsic": "256_cvttpd_epi32",
62+
"width": [256],
63+
"llvm": "cvtt.pd2dq.256",
64+
"ret": "s32h",
65+
"args": ["f64"]
66+
},
67+
{
68+
"intrinsic": "256_cvttps_epi32",
69+
"width": [256],
70+
"llvm": "cvtt.ps2dq.256",
71+
"ret": "s32",
72+
"args": ["f32"]
73+
},
1174
{
1275
"intrinsic": "256_dp_ps",
1376
"width": [256],

src/librustc_platform_intrinsics/x86.rs

Lines changed: 50 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -498,6 +498,56 @@ pub fn find<'tcx>(_tcx: &TyCtxt<'tcx>, name: &str) -> Option<Intrinsic> {
498498
output: v(f(64), 4),
499499
definition: Named("llvm.x86.avx.addsub.pd.256")
500500
},
501+
"256_broadcast_ps" => Intrinsic {
502+
inputs: vec![p(true, i(8), None)],
503+
output: v(f(32), 8),
504+
definition: Named("llvm.x86.avx.vbroadcastf128.ps.256")
505+
},
506+
"256_broadcast_pd" => Intrinsic {
507+
inputs: vec![p(true, i(8), None)],
508+
output: v(f(64), 4),
509+
definition: Named("llvm.x86.avx.vbroadcastf128.pd.256")
510+
},
511+
"256_cvtepi32_pd" => Intrinsic {
512+
inputs: vec![v(i(32), 4)],
513+
output: v(f(64), 4),
514+
definition: Named("llvm.x86.avx.cvtdq2.pd.256")
515+
},
516+
"256_cvtepi32_ps" => Intrinsic {
517+
inputs: vec![v(i(32), 8)],
518+
output: v(f(32), 8),
519+
definition: Named("llvm.x86.avx.cvtdq2.ps.256")
520+
},
521+
"256_cvtpd_epi32" => Intrinsic {
522+
inputs: vec![v(f(64), 4)],
523+
output: v(i(32), 4),
524+
definition: Named("llvm.x86.avx.cvt.pd2dq.256")
525+
},
526+
"256_cvtpd_ps" => Intrinsic {
527+
inputs: vec![v(f(64), 4)],
528+
output: v(f(32), 4),
529+
definition: Named("llvm.x86.avx.cvt.pd2.ps.256")
530+
},
531+
"256_cvtps_epi32" => Intrinsic {
532+
inputs: vec![v(f(32), 8)],
533+
output: v(i(32), 8),
534+
definition: Named("llvm.x86.avx.cvt.ps2dq.256")
535+
},
536+
"256_cvtps_pd" => Intrinsic {
537+
inputs: vec![v(f(32), 4)],
538+
output: v(f(64), 4),
539+
definition: Named("llvm.x86.avx.cvt.ps2.pd.256")
540+
},
541+
"256_cvttpd_epi32" => Intrinsic {
542+
inputs: vec![v(f(64), 4)],
543+
output: v(i(32), 4),
544+
definition: Named("llvm.x86.avx.cvtt.pd2dq.256")
545+
},
546+
"256_cvttps_epi32" => Intrinsic {
547+
inputs: vec![v(f(32), 8)],
548+
output: v(i(32), 8),
549+
definition: Named("llvm.x86.avx.cvtt.ps2dq.256")
550+
},
501551
"256_dp_ps" => Intrinsic {
502552
inputs: vec![v(f(32), 8), v(f(32), 8), i_(32, 8)],
503553
output: v(f(32), 8),

0 commit comments

Comments
 (0)