Skip to content

Commit b9a6fb6

Browse files
committed
[ARM] VBIT/VBIF support added.
Vector bitwise selects are matched by pseudo VBSP instruction and expanded to VBSL/VBIT/VBIF after register allocation depend on operands registers to minimize extra copies.
1 parent 15d058f commit b9a6fb6

19 files changed

+553
-451
lines changed

llvm/lib/Target/ARM/ARMExpandPseudoInsts.cpp

Lines changed: 60 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1860,6 +1860,66 @@ bool ARMExpandPseudo::ExpandMI(MachineBasicBlock &MBB,
18601860
default:
18611861
return false;
18621862

1863+
case ARM::VBSPd:
1864+
case ARM::VBSPq: {
1865+
Register DstReg = MI.getOperand(0).getReg();
1866+
if (DstReg == MI.getOperand(3).getReg()) {
1867+
// Expand to VBIT
1868+
unsigned NewOpc = Opcode == ARM::VBSPd ? ARM::VBITd : ARM::VBITq;
1869+
BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(NewOpc))
1870+
.add(MI.getOperand(0))
1871+
.add(MI.getOperand(3))
1872+
.add(MI.getOperand(2))
1873+
.add(MI.getOperand(1))
1874+
.addImm(MI.getOperand(4).getImm())
1875+
.add(MI.getOperand(5));
1876+
} else if (DstReg == MI.getOperand(2).getReg()) {
1877+
// Expand to VBIF
1878+
unsigned NewOpc = Opcode == ARM::VBSPd ? ARM::VBIFd : ARM::VBIFq;
1879+
BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(NewOpc))
1880+
.add(MI.getOperand(0))
1881+
.add(MI.getOperand(2))
1882+
.add(MI.getOperand(3))
1883+
.add(MI.getOperand(1))
1884+
.addImm(MI.getOperand(4).getImm())
1885+
.add(MI.getOperand(5));
1886+
} else {
1887+
// Expand to VBSL
1888+
unsigned NewOpc = Opcode == ARM::VBSPd ? ARM::VBSLd : ARM::VBSLq;
1889+
if (DstReg == MI.getOperand(1).getReg()) {
1890+
BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(NewOpc))
1891+
.add(MI.getOperand(0))
1892+
.add(MI.getOperand(1))
1893+
.add(MI.getOperand(2))
1894+
.add(MI.getOperand(3))
1895+
.addImm(MI.getOperand(4).getImm())
1896+
.add(MI.getOperand(5));
1897+
} else {
1898+
// Use move to satisfy constraints
1899+
unsigned MoveOpc = Opcode == ARM::VBSPd ? ARM::VORRd : ARM::VORRq;
1900+
BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(MoveOpc))
1901+
.addReg(DstReg,
1902+
RegState::Define |
1903+
getRenamableRegState(MI.getOperand(0).isRenamable()))
1904+
.add(MI.getOperand(1))
1905+
.add(MI.getOperand(1))
1906+
.addImm(MI.getOperand(4).getImm())
1907+
.add(MI.getOperand(5));
1908+
BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(NewOpc))
1909+
.add(MI.getOperand(0))
1910+
.addReg(DstReg,
1911+
RegState::Kill |
1912+
getRenamableRegState(MI.getOperand(0).isRenamable()))
1913+
.add(MI.getOperand(2))
1914+
.add(MI.getOperand(3))
1915+
.addImm(MI.getOperand(4).getImm())
1916+
.add(MI.getOperand(5));
1917+
}
1918+
}
1919+
MI.eraseFromParent();
1920+
return true;
1921+
}
1922+
18631923
case ARM::TCRETURNdi:
18641924
case ARM::TCRETURNri: {
18651925
MachineBasicBlock::iterator MBBI = MBB.getLastNonDebugInstr();

llvm/lib/Target/ARM/ARMISelLowering.cpp

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1755,7 +1755,7 @@ const char *ARMTargetLowering::getTargetNodeName(unsigned Opcode) const {
17551755
case ARMISD::BFI: return "ARMISD::BFI";
17561756
case ARMISD::VORRIMM: return "ARMISD::VORRIMM";
17571757
case ARMISD::VBICIMM: return "ARMISD::VBICIMM";
1758-
case ARMISD::VBSL: return "ARMISD::VBSL";
1758+
case ARMISD::VBSP: return "ARMISD::VBSP";
17591759
case ARMISD::MEMCPY: return "ARMISD::MEMCPY";
17601760
case ARMISD::VLD1DUP: return "ARMISD::VLD1DUP";
17611761
case ARMISD::VLD2DUP: return "ARMISD::VLD2DUP";
@@ -13153,7 +13153,7 @@ static SDValue PerformORCombine(SDNode *N,
1315313153
// Canonicalize the vector type to make instruction selection
1315413154
// simpler.
1315513155
EVT CanonicalVT = VT.is128BitVector() ? MVT::v4i32 : MVT::v2i32;
13156-
SDValue Result = DAG.getNode(ARMISD::VBSL, dl, CanonicalVT,
13156+
SDValue Result = DAG.getNode(ARMISD::VBSP, dl, CanonicalVT,
1315713157
N0->getOperand(1),
1315813158
N0->getOperand(0),
1315913159
N1->getOperand(0));

llvm/lib/Target/ARM/ARMISelLowering.h

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -271,8 +271,8 @@ class VectorType;
271271
// Vector AND with NOT of immediate
272272
VBICIMM,
273273

274-
// Vector bitwise select
275-
VBSL,
274+
// Pseudo vector bitwise select
275+
VBSP,
276276

277277
// Pseudo-instruction representing a memory copy using ldm/stm
278278
// instructions.

llvm/lib/Target/ARM/ARMInstrNEON.td

Lines changed: 49 additions & 42 deletions
Original file line numberDiff line numberDiff line change
@@ -509,7 +509,7 @@ def NEONvqrshrnsuImm : SDNode<"ARMISD::VQRSHRNsuIMM", SDTARMVSHXIMM>;
509509
def NEONvsliImm : SDNode<"ARMISD::VSLIIMM", SDTARMVSHINSIMM>;
510510
def NEONvsriImm : SDNode<"ARMISD::VSRIIMM", SDTARMVSHINSIMM>;
511511

512-
def NEONvbsl : SDNode<"ARMISD::VBSL",
512+
def NEONvbsp : SDNode<"ARMISD::VBSP",
513513
SDTypeProfile<1, 3, [SDTCisVec<0>,
514514
SDTCisSameAs<0, 1>,
515515
SDTCisSameAs<0, 2>,
@@ -4526,9 +4526,9 @@ let Predicates = [HasNEON, HasV8_1a] in {
45264526
(SubReg_i16_lane imm:$lane)))>;
45274527
def : Pat<(v4i32 (saddsat
45284528
(v4i32 QPR:$src1),
4529-
(v4i32 (int_arm_neon_vqrdmulh
4529+
(v4i32 (int_arm_neon_vqrdmulh
45304530
(v4i32 QPR:$src2),
4531-
(v4i32 (ARMvduplane (v4i32 QPR:$src3),
4531+
(v4i32 (ARMvduplane (v4i32 QPR:$src3),
45324532
imm:$lane)))))),
45334533
(v4i32 (VQRDMLAHslv4i32 (v4i32 QPR:$src1),
45344534
(v4i32 QPR:$src2),
@@ -4579,17 +4579,17 @@ let Predicates = [HasNEON, HasV8_1a] in {
45794579
(v2i32 DPR:$Vn),
45804580
(v2i32 (ARMvduplane (v2i32 DPR_VFP2:$Vm),
45814581
imm:$lane)))))),
4582-
(v2i32 (VQRDMLSHslv2i32 DPR:$src1, DPR:$Vn, DPR_VFP2:$Vm,
4582+
(v2i32 (VQRDMLSHslv2i32 DPR:$src1, DPR:$Vn, DPR_VFP2:$Vm,
45834583
imm:$lane))>;
45844584
def : Pat<(v8i16 (ssubsat
45854585
(v8i16 QPR:$src1),
45864586
(v8i16 (int_arm_neon_vqrdmulh
45874587
(v8i16 QPR:$src2),
4588-
(v8i16 (ARMvduplane (v8i16 QPR:$src3),
4588+
(v8i16 (ARMvduplane (v8i16 QPR:$src3),
45894589
imm:$lane)))))),
45904590
(v8i16 (VQRDMLSHslv8i16 (v8i16 QPR:$src1),
45914591
(v8i16 QPR:$src2),
4592-
(v4i16 (EXTRACT_SUBREG
4592+
(v4i16 (EXTRACT_SUBREG
45934593
QPR:$src3,
45944594
(DSubReg_i16_reg imm:$lane))),
45954595
(SubReg_i16_lane imm:$lane)))>;
@@ -4601,7 +4601,7 @@ let Predicates = [HasNEON, HasV8_1a] in {
46014601
imm:$lane)))))),
46024602
(v4i32 (VQRDMLSHslv4i32 (v4i32 QPR:$src1),
46034603
(v4i32 QPR:$src2),
4604-
(v2i32 (EXTRACT_SUBREG
4604+
(v2i32 (EXTRACT_SUBREG
46054605
QPR:$src3,
46064606
(DSubReg_i32_reg imm:$lane))),
46074607
(SubReg_i32_lane imm:$lane)))>;
@@ -5442,74 +5442,86 @@ def : Pat<(v2i32 (vnotd DPR:$src)), (VMVNd DPR:$src)>;
54425442
def : Pat<(v4i32 (vnotq QPR:$src)), (VMVNq QPR:$src)>;
54435443
}
54445444

5445-
// VBSL : Vector Bitwise Select
5446-
def VBSLd : N3VX<1, 0, 0b01, 0b0001, 0, 1, (outs DPR:$Vd),
5447-
(ins DPR:$src1, DPR:$Vn, DPR:$Vm),
5448-
N3RegFrm, IIC_VCNTiD,
5449-
"vbsl", "$Vd, $Vn, $Vm", "$src1 = $Vd",
5450-
[(set DPR:$Vd,
5451-
(v2i32 (NEONvbsl DPR:$src1, DPR:$Vn, DPR:$Vm)))]>;
5445+
// The TwoAddress pass will not go looking for equivalent operations
5446+
// with different register constraints; it just inserts copies.
5447+
// That is why pseudo VBSP implemented. Is is expanded later into
5448+
// VBIT/VBIF/VBSL taking into account register constraints to avoid copies.
5449+
def VBSPd
5450+
: PseudoNeonI<(outs DPR:$Vd), (ins DPR:$src1, DPR:$Vn, DPR:$Vm),
5451+
IIC_VBINiD, "",
5452+
[(set DPR:$Vd,
5453+
(v2i32 (NEONvbsp DPR:$src1, DPR:$Vn, DPR:$Vm)))]>;
54525454
let Predicates = [HasNEON] in {
54535455
def : Pat<(v8i8 (int_arm_neon_vbsl (v8i8 DPR:$src1),
54545456
(v8i8 DPR:$Vn), (v8i8 DPR:$Vm))),
5455-
(VBSLd DPR:$src1, DPR:$Vn, DPR:$Vm)>;
5457+
(VBSPd DPR:$src1, DPR:$Vn, DPR:$Vm)>;
54565458
def : Pat<(v4i16 (int_arm_neon_vbsl (v4i16 DPR:$src1),
54575459
(v4i16 DPR:$Vn), (v4i16 DPR:$Vm))),
5458-
(VBSLd DPR:$src1, DPR:$Vn, DPR:$Vm)>;
5460+
(VBSPd DPR:$src1, DPR:$Vn, DPR:$Vm)>;
54595461
def : Pat<(v2i32 (int_arm_neon_vbsl (v2i32 DPR:$src1),
54605462
(v2i32 DPR:$Vn), (v2i32 DPR:$Vm))),
5461-
(VBSLd DPR:$src1, DPR:$Vn, DPR:$Vm)>;
5463+
(VBSPd DPR:$src1, DPR:$Vn, DPR:$Vm)>;
54625464
def : Pat<(v2f32 (int_arm_neon_vbsl (v2f32 DPR:$src1),
54635465
(v2f32 DPR:$Vn), (v2f32 DPR:$Vm))),
5464-
(VBSLd DPR:$src1, DPR:$Vn, DPR:$Vm)>;
5466+
(VBSPd DPR:$src1, DPR:$Vn, DPR:$Vm)>;
54655467
def : Pat<(v1i64 (int_arm_neon_vbsl (v1i64 DPR:$src1),
54665468
(v1i64 DPR:$Vn), (v1i64 DPR:$Vm))),
5467-
(VBSLd DPR:$src1, DPR:$Vn, DPR:$Vm)>;
5469+
(VBSPd DPR:$src1, DPR:$Vn, DPR:$Vm)>;
54685470

54695471
def : Pat<(v2i32 (or (and DPR:$Vn, DPR:$Vd),
54705472
(and DPR:$Vm, (vnotd DPR:$Vd)))),
5471-
(VBSLd DPR:$Vd, DPR:$Vn, DPR:$Vm)>;
5473+
(VBSPd DPR:$Vd, DPR:$Vn, DPR:$Vm)>;
54725474

54735475
def : Pat<(v1i64 (or (and DPR:$Vn, DPR:$Vd),
54745476
(and DPR:$Vm, (vnotd DPR:$Vd)))),
5475-
(VBSLd DPR:$Vd, DPR:$Vn, DPR:$Vm)>;
5477+
(VBSPd DPR:$Vd, DPR:$Vn, DPR:$Vm)>;
54765478
}
54775479

5478-
def VBSLq : N3VX<1, 0, 0b01, 0b0001, 1, 1, (outs QPR:$Vd),
5479-
(ins QPR:$src1, QPR:$Vn, QPR:$Vm),
5480-
N3RegFrm, IIC_VCNTiQ,
5481-
"vbsl", "$Vd, $Vn, $Vm", "$src1 = $Vd",
5482-
[(set QPR:$Vd,
5483-
(v4i32 (NEONvbsl QPR:$src1, QPR:$Vn, QPR:$Vm)))]>;
5484-
5480+
def VBSPq
5481+
: PseudoNeonI<(outs QPR:$Vd), (ins QPR:$src1, QPR:$Vn, QPR:$Vm),
5482+
IIC_VBINiQ, "",
5483+
[(set QPR:$Vd,
5484+
(v4i32 (NEONvbsp QPR:$src1, QPR:$Vn, QPR:$Vm)))]>;
54855485
let Predicates = [HasNEON] in {
54865486
def : Pat<(v16i8 (int_arm_neon_vbsl (v16i8 QPR:$src1),
54875487
(v16i8 QPR:$Vn), (v16i8 QPR:$Vm))),
5488-
(VBSLq QPR:$src1, QPR:$Vn, QPR:$Vm)>;
5488+
(VBSPq QPR:$src1, QPR:$Vn, QPR:$Vm)>;
54895489
def : Pat<(v8i16 (int_arm_neon_vbsl (v8i16 QPR:$src1),
54905490
(v8i16 QPR:$Vn), (v8i16 QPR:$Vm))),
5491-
(VBSLq QPR:$src1, QPR:$Vn, QPR:$Vm)>;
5491+
(VBSPq QPR:$src1, QPR:$Vn, QPR:$Vm)>;
54925492
def : Pat<(v4i32 (int_arm_neon_vbsl (v4i32 QPR:$src1),
54935493
(v4i32 QPR:$Vn), (v4i32 QPR:$Vm))),
5494-
(VBSLq QPR:$src1, QPR:$Vn, QPR:$Vm)>;
5494+
(VBSPq QPR:$src1, QPR:$Vn, QPR:$Vm)>;
54955495
def : Pat<(v4f32 (int_arm_neon_vbsl (v4f32 QPR:$src1),
54965496
(v4f32 QPR:$Vn), (v4f32 QPR:$Vm))),
5497-
(VBSLq QPR:$src1, QPR:$Vn, QPR:$Vm)>;
5497+
(VBSPq QPR:$src1, QPR:$Vn, QPR:$Vm)>;
54985498
def : Pat<(v2i64 (int_arm_neon_vbsl (v2i64 QPR:$src1),
54995499
(v2i64 QPR:$Vn), (v2i64 QPR:$Vm))),
5500-
(VBSLq QPR:$src1, QPR:$Vn, QPR:$Vm)>;
5500+
(VBSPq QPR:$src1, QPR:$Vn, QPR:$Vm)>;
55015501

55025502
def : Pat<(v4i32 (or (and QPR:$Vn, QPR:$Vd),
55035503
(and QPR:$Vm, (vnotq QPR:$Vd)))),
5504-
(VBSLq QPR:$Vd, QPR:$Vn, QPR:$Vm)>;
5504+
(VBSPq QPR:$Vd, QPR:$Vn, QPR:$Vm)>;
55055505
def : Pat<(v2i64 (or (and QPR:$Vn, QPR:$Vd),
55065506
(and QPR:$Vm, (vnotq QPR:$Vd)))),
5507-
(VBSLq QPR:$Vd, QPR:$Vn, QPR:$Vm)>;
5507+
(VBSPq QPR:$Vd, QPR:$Vn, QPR:$Vm)>;
55085508
}
55095509

5510+
// VBSL : Vector Bitwise Select
5511+
def VBSLd : N3VX<1, 0, 0b01, 0b0001, 0, 1, (outs DPR:$Vd),
5512+
(ins DPR:$src1, DPR:$Vn, DPR:$Vm),
5513+
N3RegFrm, IIC_VBINiD,
5514+
"vbsl", "$Vd, $Vn, $Vm", "$src1 = $Vd",
5515+
[]>;
5516+
5517+
def VBSLq : N3VX<1, 0, 0b01, 0b0001, 1, 1, (outs QPR:$Vd),
5518+
(ins QPR:$src1, QPR:$Vn, QPR:$Vm),
5519+
N3RegFrm, IIC_VBINiQ,
5520+
"vbsl", "$Vd, $Vn, $Vm", "$src1 = $Vd",
5521+
[]>;
5522+
55105523
// VBIF : Vector Bitwise Insert if False
55115524
// like VBSL but with: "vbif $dst, $src3, $src1", "$src2 = $dst",
5512-
// FIXME: This instruction's encoding MAY NOT BE correct.
55135525
def VBIFd : N3VX<1, 0, 0b11, 0b0001, 0, 1,
55145526
(outs DPR:$Vd), (ins DPR:$src1, DPR:$Vn, DPR:$Vm),
55155527
N3RegFrm, IIC_VBINiD,
@@ -5523,7 +5535,6 @@ def VBIFq : N3VX<1, 0, 0b11, 0b0001, 1, 1,
55235535

55245536
// VBIT : Vector Bitwise Insert if True
55255537
// like VBSL but with: "vbit $dst, $src2, $src1", "$src3 = $dst",
5526-
// FIXME: This instruction's encoding MAY NOT BE correct.
55275538
def VBITd : N3VX<1, 0, 0b10, 0b0001, 0, 1,
55285539
(outs DPR:$Vd), (ins DPR:$src1, DPR:$Vn, DPR:$Vm),
55295540
N3RegFrm, IIC_VBINiD,
@@ -5535,10 +5546,6 @@ def VBITq : N3VX<1, 0, 0b10, 0b0001, 1, 1,
55355546
"vbit", "$Vd, $Vn, $Vm", "$src1 = $Vd",
55365547
[]>;
55375548

5538-
// VBIT/VBIF are not yet implemented. The TwoAddress pass will not go looking
5539-
// for equivalent operations with different register constraints; it just
5540-
// inserts copies.
5541-
55425549
// Vector Absolute Differences.
55435550

55445551
// VABD : Vector Absolute Difference
@@ -7953,7 +7960,7 @@ let Predicates = [HasNEON,IsLE] in {
79537960
(VLD1LNd16 addrmode6:$addr,
79547961
(f64 (IMPLICIT_DEF)), (i32 0))), dsub_0)), dsub_0))>;
79557962
}
7956-
// The following patterns are basically a copy of the patterns above,
7963+
// The following patterns are basically a copy of the patterns above,
79577964
// however with an additional VREV16d instruction to convert data
79587965
// loaded by VLD1LN into proper vector format in big endian mode.
79597966
let Predicates = [HasNEON,IsBE] in {

llvm/lib/Target/ARM/ARMScheduleA57.td

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1201,7 +1201,7 @@ def : InstRW<[A57Write_5cyc_1V], (instregex
12011201
// --- 3.16 ASIMD Miscellaneous Instructions ---
12021202

12031203
// ASIMD bitwise insert
1204-
def : InstRW<[A57Write_3cyc_1V], (instregex "VBIF", "VBIT", "VBSL")>;
1204+
def : InstRW<[A57Write_3cyc_1V], (instregex "VBIF", "VBIT", "VBSL", "VBSP")>;
12051205

12061206
// ASIMD count
12071207
def : InstRW<[A57Write_3cyc_1V], (instregex "VCLS", "VCLZ", "VCNT")>;

llvm/lib/Target/ARM/ARMScheduleR52.td

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -787,8 +787,8 @@ def : InstRW<[R52Write2FPALU_F3, R52Read_F2, R52Read_F2], (instregex "(VAND|VBIC
787787
def : InstRW<[R52WriteFPALU_F3, R52Read_F2], (instregex "VBICi(v4i16|v2i32)")>;
788788
def : InstRW<[R52Write2FPALU_F3, R52Read_F2], (instregex "VBICi(v8i16|v4i32)")>;
789789

790-
def : InstRW<[R52WriteFPALU_F3, R52Read_F1, R52Read_F2, R52Read_F2], (instregex "(VBIF|VBIT|VBSL)d")>;
791-
def : InstRW<[R52Write2FPALU_F3, R52Read_F1, R52Read_F2, R52Read_F2], (instregex "(VBIF|VBIT|VBSL)q")>;
790+
def : InstRW<[R52WriteFPALU_F3, R52Read_F1, R52Read_F2, R52Read_F2], (instregex "(VBIF|VBIT|VBSL|VBSP)d")>;
791+
def : InstRW<[R52Write2FPALU_F3, R52Read_F1, R52Read_F2, R52Read_F2], (instregex "(VBIF|VBIT|VBSL|VBSP)q")>;
792792

793793
def : InstRW<[R52WriteFPALU_F3, R52Read_F1, R52Read_F1],
794794
(instregex "(VCEQ|VCGE|VCGT|VCLE|VCLT|VCLZ|VCMP|VCMPE|VCNT)")>;

llvm/lib/Target/ARM/ARMScheduleSwift.td

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -558,8 +558,8 @@ let SchedModel = SwiftModel in {
558558
(instregex "VADDv", "VSUBv", "VNEG(s|f|v)", "VADDL", "VSUBL",
559559
"VADDW", "VSUBW", "VHADD", "VHSUB", "VRHADD", "VPADDi",
560560
"VPADDL", "VAND", "VBIC", "VEOR", "VORN", "VORR", "VTST",
561-
"VSHL", "VSHR(s|u)", "VSHLL", "VQSHL(s|u)", "VBIF",
562-
"VBIT", "VBSL", "VSLI", "VSRI", "VCLS", "VCLZ", "VCNT")>;
561+
"VSHL", "VSHR(s|u)", "VSHLL", "VQSHL(s|u)", "VBIF", "VBIT",
562+
"VBSL", "VBSP", "VSLI", "VSRI", "VCLS", "VCLZ", "VCNT")>;
563563

564564
def : InstRW<[SwiftWriteP1TwoCycle],
565565
(instregex "VEXT", "VREV16", "VREV32", "VREV64")>;

llvm/test/CodeGen/ARM/fcopysign.ll

Lines changed: 14 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -12,11 +12,11 @@ define float @test1(float %x, float %y) nounwind {
1212
;
1313
; HARD-LABEL: test1:
1414
; HARD: @ %bb.0: @ %entry
15-
; HARD-NEXT: vmov.f32 s4, s1
15+
; HARD-NEXT: vmov.f32 s2, s1
1616
; HARD-NEXT: @ kill: def $s0 killed $s0 def $d0
17-
; HARD-NEXT: vmov.i32 d1, #0x80000000
18-
; HARD-NEXT: vbsl d1, d2, d0
19-
; HARD-NEXT: vmov.f32 s0, s2
17+
; HARD-NEXT: vmov.i32 d16, #0x80000000
18+
; HARD-NEXT: vbit d0, d1, d16
19+
; HARD-NEXT: @ kill: def $s0 killed $s0 killed $d0
2020
; HARD-NEXT: bx lr
2121
entry:
2222

@@ -35,8 +35,7 @@ define double @test2(double %x, double %y) nounwind {
3535
; HARD: @ %bb.0: @ %entry
3636
; HARD-NEXT: vmov.i32 d16, #0x80000000
3737
; HARD-NEXT: vshl.i64 d16, d16, #32
38-
; HARD-NEXT: vbsl d16, d1, d0
39-
; HARD-NEXT: vorr d0, d16, d16
38+
; HARD-NEXT: vbit d0, d1, d16
4039
; HARD-NEXT: bx lr
4140
entry:
4241

@@ -53,15 +52,16 @@ define double @test3(double %x, double %y, double %z) nounwind {
5352
; SOFT-NEXT: vmov.i32 d17, #0x80000000
5453
; SOFT-NEXT: vshl.i64 d17, d17, #32
5554
; SOFT-NEXT: vldr d18, [sp]
56-
; SOFT-NEXT: vbsl d17, d18, d16
57-
; SOFT-NEXT: vmov r0, r1, d17
55+
; SOFT-NEXT: vbit d16, d18, d17
56+
; SOFT-NEXT: vmov r0, r1, d16
5857
; SOFT-NEXT: bx lr
5958
;
6059
; HARD-LABEL: test3:
6160
; HARD: @ %bb.0: @ %entry
6261
; HARD-NEXT: vmul.f64 d16, d0, d1
6362
; HARD-NEXT: vmov.i32 d17, #0x80000000
64-
; HARD-NEXT: vshl.i64 d0, d17, #32
63+
; HARD-NEXT: vshl.i64 d17, d17, #32
64+
; HARD-NEXT: vorr d0, d17, d17
6565
; HARD-NEXT: vbsl d0, d2, d16
6666
; HARD-NEXT: bx lr
6767
entry:
@@ -81,8 +81,8 @@ define float @test4() nounwind {
8181
; SOFT-NEXT: vmov.i32 d17, #0x80000000
8282
; SOFT-NEXT: vshr.u64 d16, d16, #32
8383
; SOFT-NEXT: vmov.f32 d18, #5.000000e-01
84-
; SOFT-NEXT: vbsl d17, d16, d18
85-
; SOFT-NEXT: vadd.f32 d0, d0, d17
84+
; SOFT-NEXT: vbif d16, d18, d17
85+
; SOFT-NEXT: vadd.f32 d0, d0, d16
8686
; SOFT-NEXT: vmov r0, s0
8787
; SOFT-NEXT: pop {lr}
8888
;
@@ -93,10 +93,10 @@ define float @test4() nounwind {
9393
; HARD-NEXT: bl bar
9494
; HARD-NEXT: vmov d16, r0, r1
9595
; HARD-NEXT: vcvt.f32.f64 s0, d16
96-
; HARD-NEXT: vmov.i32 d1, #0x80000000
96+
; HARD-NEXT: vmov.i32 d17, #0x80000000
9797
; HARD-NEXT: vshr.u64 d16, d16, #32
98-
; HARD-NEXT: vmov.f32 s4, #5.000000e-01
99-
; HARD-NEXT: vbsl d1, d16, d2
98+
; HARD-NEXT: vmov.f32 s2, #5.000000e-01
99+
; HARD-NEXT: vbit d1, d16, d17
100100
; HARD-NEXT: vadd.f32 s0, s0, s2
101101
; HARD-NEXT: pop {r11, pc}
102102
entry:

0 commit comments

Comments
 (0)