Skip to content

Commit 87c7730

Browse files
committed
[PowerPC] Exploit VSX rounding instrs for rint
Exploit native VSX rounding instruction, x(v|s)r(d|s)pic, which does rounding using current rounding mode. According to C standard library, rint may raise INEXACT exception while nearbyint won't. Reviewed By: lkail Differential Revision: https://reviews.llvm.org/D72685
1 parent 536456a commit 87c7730

File tree

6 files changed

+186
-333
lines changed

6 files changed

+186
-333
lines changed

llvm/lib/Target/PowerPC/PPCISelLowering.cpp

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -799,12 +799,16 @@ PPCTargetLowering::PPCTargetLowering(const PPCTargetMachine &TM,
799799
setOperationAction(ISD::FCEIL, MVT::v2f64, Legal);
800800
setOperationAction(ISD::FTRUNC, MVT::v2f64, Legal);
801801
setOperationAction(ISD::FNEARBYINT, MVT::v2f64, Legal);
802+
setOperationAction(ISD::FRINT, MVT::v2f64, Legal);
802803
setOperationAction(ISD::FROUND, MVT::v2f64, Legal);
803804
setOperationAction(ISD::FROUND, MVT::f64, Legal);
805+
setOperationAction(ISD::FRINT, MVT::f64, Legal);
804806

805807
setOperationAction(ISD::FNEARBYINT, MVT::v4f32, Legal);
808+
setOperationAction(ISD::FRINT, MVT::v4f32, Legal);
806809
setOperationAction(ISD::FROUND, MVT::v4f32, Legal);
807810
setOperationAction(ISD::FROUND, MVT::f32, Legal);
811+
setOperationAction(ISD::FRINT, MVT::f32, Legal);
808812

809813
setOperationAction(ISD::MUL, MVT::v2f64, Legal);
810814
setOperationAction(ISD::FMA, MVT::v2f64, Legal);

llvm/lib/Target/PowerPC/PPCInstrVSX.td

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2563,6 +2563,14 @@ def : Pat<(f32 (fceil f32:$S)),
25632563
def : Pat<(f32 (ftrunc f32:$S)),
25642564
(f32 (COPY_TO_REGCLASS (XSRDPIZ
25652565
(COPY_TO_REGCLASS $S, VSFRC)), VSSRC))>;
2566+
def : Pat<(f32 (frint f32:$S)),
2567+
(f32 (COPY_TO_REGCLASS (XSRDPIC
2568+
(COPY_TO_REGCLASS $S, VSFRC)), VSSRC))>;
2569+
def : Pat<(v4f32 (frint v4f32:$S)), (v4f32 (XVRSPIC $S))>;
2570+
2571+
// Rounding for double precision.
2572+
def : Pat<(f64 (frint f64:$S)), (f64 (XSRDPIC $S))>;
2573+
def : Pat<(v2f64 (frint v2f64:$S)), (v2f64 (XVRDPIC $S))>;
25662574
}
25672575

25682576
// Materialize a zero-vector of long long

llvm/test/CodeGen/PowerPC/mi-peephole-splat.ll

Lines changed: 8 additions & 92 deletions
Original file line numberDiff line numberDiff line change
@@ -11,118 +11,34 @@
1111
define double @splat_swap(<2 x double> %x, <2 x double> %y) nounwind {
1212
; CHECK-LE-LABEL: splat_swap:
1313
; CHECK-LE: # %bb.0:
14-
; CHECK-LE-NEXT: mflr 0
15-
; CHECK-LE-NEXT: std 0, 16(1)
16-
; CHECK-LE-NEXT: stdu 1, -80(1)
17-
; CHECK-LE-NEXT: li 3, 64
18-
; CHECK-LE-NEXT: stxvd2x 63, 1, 3 # 16-byte Folded Spill
19-
; CHECK-LE-NEXT: xvadddp 63, 34, 35
20-
; CHECK-LE-NEXT: xxlor 1, 63, 63
21-
; CHECK-LE-NEXT: bl rint
22-
; CHECK-LE-NEXT: nop
23-
; CHECK-LE-NEXT: xxswapd 0, 63
24-
; CHECK-LE-NEXT: # kill: def $f1 killed $f1 def $vsl1
25-
; CHECK-LE-NEXT: li 3, 48
26-
; CHECK-LE-NEXT: stxvd2x 1, 1, 3 # 16-byte Folded Spill
27-
; CHECK-LE-NEXT: fmr 1, 0
28-
; CHECK-LE-NEXT: bl rint
29-
; CHECK-LE-NEXT: nop
30-
; CHECK-LE-NEXT: li 3, 48
31-
; CHECK-LE-NEXT: # kill: def $f1 killed $f1 def $vsl1
32-
; CHECK-LE-NEXT: lxvd2x 0, 1, 3 # 16-byte Folded Reload
33-
; CHECK-LE-NEXT: li 3, 64
34-
; CHECK-LE-NEXT: lxvd2x 63, 1, 3 # 16-byte Folded Reload
35-
; CHECK-LE-NEXT: xxmrghd 0, 0, 1
14+
; CHECK-LE-NEXT: xvadddp 0, 34, 35
15+
; CHECK-LE-NEXT: xvrdpic 0, 0
3616
; CHECK-LE-NEXT: xxswapd 1, 0
3717
; CHECK-LE-NEXT: xssubdp 1, 1, 0
38-
; CHECK-LE-NEXT: addi 1, 1, 80
39-
; CHECK-LE-NEXT: ld 0, 16(1)
40-
; CHECK-LE-NEXT: mtlr 0
4118
; CHECK-LE-NEXT: blr
4219
;
4320
; CHECK-BE-LABEL: splat_swap:
4421
; CHECK-BE: # %bb.0:
45-
; CHECK-BE-NEXT: mflr 0
46-
; CHECK-BE-NEXT: std 0, 16(1)
47-
; CHECK-BE-NEXT: stdu 1, -160(1)
48-
; CHECK-BE-NEXT: li 3, 144
49-
; CHECK-BE-NEXT: stxvd2x 63, 1, 3 # 16-byte Folded Spill
50-
; CHECK-BE-NEXT: xvadddp 63, 34, 35
51-
; CHECK-BE-NEXT: xxlor 1, 63, 63
52-
; CHECK-BE-NEXT: bl rint
53-
; CHECK-BE-NEXT: nop
54-
; CHECK-BE-NEXT: # kill: def $f1 killed $f1 def $vsl1
55-
; CHECK-BE-NEXT: li 3, 128
56-
; CHECK-BE-NEXT: stxvd2x 1, 1, 3 # 16-byte Folded Spill
57-
; CHECK-BE-NEXT: xxswapd 1, 63
58-
; CHECK-BE-NEXT: # kill: def $f1 killed $f1 killed $vsl1
59-
; CHECK-BE-NEXT: bl rint
60-
; CHECK-BE-NEXT: nop
61-
; CHECK-BE-NEXT: li 3, 128
62-
; CHECK-BE-NEXT: lxvd2x 0, 1, 3 # 16-byte Folded Reload
63-
; CHECK-BE-NEXT: # kill: def $f1 killed $f1 def $vsl1
64-
; CHECK-BE-NEXT: li 3, 144
65-
; CHECK-BE-NEXT: lxvd2x 63, 1, 3 # 16-byte Folded Reload
66-
; CHECK-BE-NEXT: xxmrghd 0, 0, 1
22+
; CHECK-BE-NEXT: xvadddp 0, 34, 35
23+
; CHECK-BE-NEXT: xvrdpic 0, 0
6724
; CHECK-BE-NEXT: xxswapd 1, 0
6825
; CHECK-BE-NEXT: xssubdp 1, 0, 1
69-
; CHECK-BE-NEXT: addi 1, 1, 160
70-
; CHECK-BE-NEXT: ld 0, 16(1)
71-
; CHECK-BE-NEXT: mtlr 0
7226
; CHECK-BE-NEXT: blr
7327
;
7428
; CHECK-P9LE-LABEL: splat_swap:
7529
; CHECK-P9LE: # %bb.0:
76-
; CHECK-P9LE-NEXT: mflr 0
77-
; CHECK-P9LE-NEXT: std 0, 16(1)
78-
; CHECK-P9LE-NEXT: stdu 1, -64(1)
79-
; CHECK-P9LE-NEXT: stxv 63, 48(1) # 16-byte Folded Spill
80-
; CHECK-P9LE-NEXT: xvadddp 63, 34, 35
81-
; CHECK-P9LE-NEXT: xscpsgndp 1, 63, 63
82-
; CHECK-P9LE-NEXT: bl rint
83-
; CHECK-P9LE-NEXT: nop
84-
; CHECK-P9LE-NEXT: # kill: def $f1 killed $f1 def $vsl1
85-
; CHECK-P9LE-NEXT: stxv 1, 32(1) # 16-byte Folded Spill
86-
; CHECK-P9LE-NEXT: xxswapd 1, 63
87-
; CHECK-P9LE-NEXT: # kill: def $f1 killed $f1 killed $vsl1
88-
; CHECK-P9LE-NEXT: bl rint
89-
; CHECK-P9LE-NEXT: nop
90-
; CHECK-P9LE-NEXT: lxv 0, 32(1) # 16-byte Folded Reload
91-
; CHECK-P9LE-NEXT: # kill: def $f1 killed $f1 def $vsl1
92-
; CHECK-P9LE-NEXT: xxmrghd 0, 0, 1
93-
; CHECK-P9LE-NEXT: lxv 63, 48(1) # 16-byte Folded Reload
30+
; CHECK-P9LE-NEXT: xvadddp 0, 34, 35
31+
; CHECK-P9LE-NEXT: xvrdpic 0, 0
9432
; CHECK-P9LE-NEXT: xxswapd 1, 0
9533
; CHECK-P9LE-NEXT: xssubdp 1, 1, 0
96-
; CHECK-P9LE-NEXT: addi 1, 1, 64
97-
; CHECK-P9LE-NEXT: ld 0, 16(1)
98-
; CHECK-P9LE-NEXT: mtlr 0
9934
; CHECK-P9LE-NEXT: blr
10035
;
10136
; CHECK-P9BE-LABEL: splat_swap:
10237
; CHECK-P9BE: # %bb.0:
103-
; CHECK-P9BE-NEXT: mflr 0
104-
; CHECK-P9BE-NEXT: std 0, 16(1)
105-
; CHECK-P9BE-NEXT: stdu 1, -144(1)
106-
; CHECK-P9BE-NEXT: stxv 63, 128(1) # 16-byte Folded Spill
107-
; CHECK-P9BE-NEXT: xvadddp 63, 34, 35
108-
; CHECK-P9BE-NEXT: xscpsgndp 1, 63, 63
109-
; CHECK-P9BE-NEXT: bl rint
110-
; CHECK-P9BE-NEXT: nop
111-
; CHECK-P9BE-NEXT: # kill: def $f1 killed $f1 def $vsl1
112-
; CHECK-P9BE-NEXT: stxv 1, 112(1) # 16-byte Folded Spill
113-
; CHECK-P9BE-NEXT: xxswapd 1, 63
114-
; CHECK-P9BE-NEXT: # kill: def $f1 killed $f1 killed $vsl1
115-
; CHECK-P9BE-NEXT: bl rint
116-
; CHECK-P9BE-NEXT: nop
117-
; CHECK-P9BE-NEXT: lxv 0, 112(1) # 16-byte Folded Reload
118-
; CHECK-P9BE-NEXT: # kill: def $f1 killed $f1 def $vsl1
119-
; CHECK-P9BE-NEXT: xxmrghd 0, 0, 1
120-
; CHECK-P9BE-NEXT: lxv 63, 128(1) # 16-byte Folded Reload
38+
; CHECK-P9BE-NEXT: xvadddp 0, 34, 35
39+
; CHECK-P9BE-NEXT: xvrdpic 0, 0
12140
; CHECK-P9BE-NEXT: xxswapd 1, 0
12241
; CHECK-P9BE-NEXT: xssubdp 1, 0, 1
123-
; CHECK-P9BE-NEXT: addi 1, 1, 144
124-
; CHECK-P9BE-NEXT: ld 0, 16(1)
125-
; CHECK-P9BE-NEXT: mtlr 0
12642
; CHECK-P9BE-NEXT: blr
12743
%added = fadd <2 x double> %x, %y
12844
%call = tail call <2 x double> @llvm.rint.v2f64(<2 x double> %added) nounwind readnone

llvm/test/CodeGen/PowerPC/scalar-rounding-ops.ll

Lines changed: 44 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -559,3 +559,47 @@ entry:
559559
}
560560

561561
declare float @llvm.ceil.f32(float)
562+
563+
define dso_local double @test_rint(double %d) local_unnamed_addr {
564+
; BE-LABEL: test_rint:
565+
; BE: # %bb.0: # %entry
566+
; BE-NEXT: xsrdpic f1, f1
567+
; BE-NEXT: blr
568+
;
569+
; CHECK-LABEL: test_rint:
570+
; CHECK: # %bb.0: # %entry
571+
; CHECK-NEXT: xsrdpic f1, f1
572+
; CHECK-NEXT: blr
573+
;
574+
; FAST-LABEL: test_rint:
575+
; FAST: # %bb.0: # %entry
576+
; FAST-NEXT: xsrdpic f1, f1
577+
; FAST-NEXT: blr
578+
entry:
579+
%0 = tail call double @llvm.rint.f64(double %d)
580+
ret double %0
581+
}
582+
583+
declare double @llvm.rint.f64(double)
584+
585+
define dso_local float @test_rintf(float %f) local_unnamed_addr {
586+
; BE-LABEL: test_rintf:
587+
; BE: # %bb.0: # %entry
588+
; BE-NEXT: xsrdpic f1, f1
589+
; BE-NEXT: blr
590+
;
591+
; CHECK-LABEL: test_rintf:
592+
; CHECK: # %bb.0: # %entry
593+
; CHECK-NEXT: xsrdpic f1, f1
594+
; CHECK-NEXT: blr
595+
;
596+
; FAST-LABEL: test_rintf:
597+
; FAST: # %bb.0: # %entry
598+
; FAST-NEXT: xsrdpic f1, f1
599+
; FAST-NEXT: blr
600+
entry:
601+
%0 = tail call float @llvm.rint.f32(float %f)
602+
ret float %0
603+
}
604+
605+
declare float @llvm.rint.f32(float)

0 commit comments

Comments
 (0)