@@ -509,7 +509,7 @@ def NEONvqrshrnsuImm : SDNode<"ARMISD::VQRSHRNsuIMM", SDTARMVSHXIMM>;
509
509
def NEONvsliImm : SDNode<"ARMISD::VSLIIMM", SDTARMVSHINSIMM>;
510
510
def NEONvsriImm : SDNode<"ARMISD::VSRIIMM", SDTARMVSHINSIMM>;
511
511
512
- def NEONvbsl : SDNode<"ARMISD::VBSL ",
512
+ def NEONvbsp : SDNode<"ARMISD::VBSP ",
513
513
SDTypeProfile<1, 3, [SDTCisVec<0>,
514
514
SDTCisSameAs<0, 1>,
515
515
SDTCisSameAs<0, 2>,
@@ -4526,9 +4526,9 @@ let Predicates = [HasNEON, HasV8_1a] in {
4526
4526
(SubReg_i16_lane imm:$lane)))>;
4527
4527
def : Pat<(v4i32 (saddsat
4528
4528
(v4i32 QPR:$src1),
4529
- (v4i32 (int_arm_neon_vqrdmulh
4529
+ (v4i32 (int_arm_neon_vqrdmulh
4530
4530
(v4i32 QPR:$src2),
4531
- (v4i32 (ARMvduplane (v4i32 QPR:$src3),
4531
+ (v4i32 (ARMvduplane (v4i32 QPR:$src3),
4532
4532
imm:$lane)))))),
4533
4533
(v4i32 (VQRDMLAHslv4i32 (v4i32 QPR:$src1),
4534
4534
(v4i32 QPR:$src2),
@@ -4579,17 +4579,17 @@ let Predicates = [HasNEON, HasV8_1a] in {
4579
4579
(v2i32 DPR:$Vn),
4580
4580
(v2i32 (ARMvduplane (v2i32 DPR_VFP2:$Vm),
4581
4581
imm:$lane)))))),
4582
- (v2i32 (VQRDMLSHslv2i32 DPR:$src1, DPR:$Vn, DPR_VFP2:$Vm,
4582
+ (v2i32 (VQRDMLSHslv2i32 DPR:$src1, DPR:$Vn, DPR_VFP2:$Vm,
4583
4583
imm:$lane))>;
4584
4584
def : Pat<(v8i16 (ssubsat
4585
4585
(v8i16 QPR:$src1),
4586
4586
(v8i16 (int_arm_neon_vqrdmulh
4587
4587
(v8i16 QPR:$src2),
4588
- (v8i16 (ARMvduplane (v8i16 QPR:$src3),
4588
+ (v8i16 (ARMvduplane (v8i16 QPR:$src3),
4589
4589
imm:$lane)))))),
4590
4590
(v8i16 (VQRDMLSHslv8i16 (v8i16 QPR:$src1),
4591
4591
(v8i16 QPR:$src2),
4592
- (v4i16 (EXTRACT_SUBREG
4592
+ (v4i16 (EXTRACT_SUBREG
4593
4593
QPR:$src3,
4594
4594
(DSubReg_i16_reg imm:$lane))),
4595
4595
(SubReg_i16_lane imm:$lane)))>;
@@ -4601,7 +4601,7 @@ let Predicates = [HasNEON, HasV8_1a] in {
4601
4601
imm:$lane)))))),
4602
4602
(v4i32 (VQRDMLSHslv4i32 (v4i32 QPR:$src1),
4603
4603
(v4i32 QPR:$src2),
4604
- (v2i32 (EXTRACT_SUBREG
4604
+ (v2i32 (EXTRACT_SUBREG
4605
4605
QPR:$src3,
4606
4606
(DSubReg_i32_reg imm:$lane))),
4607
4607
(SubReg_i32_lane imm:$lane)))>;
@@ -5442,74 +5442,86 @@ def : Pat<(v2i32 (vnotd DPR:$src)), (VMVNd DPR:$src)>;
5442
5442
def : Pat<(v4i32 (vnotq QPR:$src)), (VMVNq QPR:$src)>;
5443
5443
}
5444
5444
5445
- // VBSL : Vector Bitwise Select
5446
- def VBSLd : N3VX<1, 0, 0b01, 0b0001, 0, 1, (outs DPR:$Vd),
5447
- (ins DPR:$src1, DPR:$Vn, DPR:$Vm),
5448
- N3RegFrm, IIC_VCNTiD,
5449
- "vbsl", "$Vd, $Vn, $Vm", "$src1 = $Vd",
5450
- [(set DPR:$Vd,
5451
- (v2i32 (NEONvbsl DPR:$src1, DPR:$Vn, DPR:$Vm)))]>;
5445
+ // The TwoAddress pass will not go looking for equivalent operations
5446
+ // with different register constraints; it just inserts copies.
5447
+ // That is why pseudo VBSP implemented. Is is expanded later into
5448
+ // VBIT/VBIF/VBSL taking into account register constraints to avoid copies.
5449
+ def VBSPd
5450
+ : PseudoNeonI<(outs DPR:$Vd), (ins DPR:$src1, DPR:$Vn, DPR:$Vm),
5451
+ IIC_VBINiD, "",
5452
+ [(set DPR:$Vd,
5453
+ (v2i32 (NEONvbsp DPR:$src1, DPR:$Vn, DPR:$Vm)))]>;
5452
5454
let Predicates = [HasNEON] in {
5453
5455
def : Pat<(v8i8 (int_arm_neon_vbsl (v8i8 DPR:$src1),
5454
5456
(v8i8 DPR:$Vn), (v8i8 DPR:$Vm))),
5455
- (VBSLd DPR:$src1, DPR:$Vn, DPR:$Vm)>;
5457
+ (VBSPd DPR:$src1, DPR:$Vn, DPR:$Vm)>;
5456
5458
def : Pat<(v4i16 (int_arm_neon_vbsl (v4i16 DPR:$src1),
5457
5459
(v4i16 DPR:$Vn), (v4i16 DPR:$Vm))),
5458
- (VBSLd DPR:$src1, DPR:$Vn, DPR:$Vm)>;
5460
+ (VBSPd DPR:$src1, DPR:$Vn, DPR:$Vm)>;
5459
5461
def : Pat<(v2i32 (int_arm_neon_vbsl (v2i32 DPR:$src1),
5460
5462
(v2i32 DPR:$Vn), (v2i32 DPR:$Vm))),
5461
- (VBSLd DPR:$src1, DPR:$Vn, DPR:$Vm)>;
5463
+ (VBSPd DPR:$src1, DPR:$Vn, DPR:$Vm)>;
5462
5464
def : Pat<(v2f32 (int_arm_neon_vbsl (v2f32 DPR:$src1),
5463
5465
(v2f32 DPR:$Vn), (v2f32 DPR:$Vm))),
5464
- (VBSLd DPR:$src1, DPR:$Vn, DPR:$Vm)>;
5466
+ (VBSPd DPR:$src1, DPR:$Vn, DPR:$Vm)>;
5465
5467
def : Pat<(v1i64 (int_arm_neon_vbsl (v1i64 DPR:$src1),
5466
5468
(v1i64 DPR:$Vn), (v1i64 DPR:$Vm))),
5467
- (VBSLd DPR:$src1, DPR:$Vn, DPR:$Vm)>;
5469
+ (VBSPd DPR:$src1, DPR:$Vn, DPR:$Vm)>;
5468
5470
5469
5471
def : Pat<(v2i32 (or (and DPR:$Vn, DPR:$Vd),
5470
5472
(and DPR:$Vm, (vnotd DPR:$Vd)))),
5471
- (VBSLd DPR:$Vd, DPR:$Vn, DPR:$Vm)>;
5473
+ (VBSPd DPR:$Vd, DPR:$Vn, DPR:$Vm)>;
5472
5474
5473
5475
def : Pat<(v1i64 (or (and DPR:$Vn, DPR:$Vd),
5474
5476
(and DPR:$Vm, (vnotd DPR:$Vd)))),
5475
- (VBSLd DPR:$Vd, DPR:$Vn, DPR:$Vm)>;
5477
+ (VBSPd DPR:$Vd, DPR:$Vn, DPR:$Vm)>;
5476
5478
}
5477
5479
5478
- def VBSLq : N3VX<1, 0, 0b01, 0b0001, 1, 1, (outs QPR:$Vd),
5479
- (ins QPR:$src1, QPR:$Vn, QPR:$Vm),
5480
- N3RegFrm, IIC_VCNTiQ,
5481
- "vbsl", "$Vd, $Vn, $Vm", "$src1 = $Vd",
5482
- [(set QPR:$Vd,
5483
- (v4i32 (NEONvbsl QPR:$src1, QPR:$Vn, QPR:$Vm)))]>;
5484
-
5480
+ def VBSPq
5481
+ : PseudoNeonI<(outs QPR:$Vd), (ins QPR:$src1, QPR:$Vn, QPR:$Vm),
5482
+ IIC_VBINiQ, "",
5483
+ [(set QPR:$Vd,
5484
+ (v4i32 (NEONvbsp QPR:$src1, QPR:$Vn, QPR:$Vm)))]>;
5485
5485
let Predicates = [HasNEON] in {
5486
5486
def : Pat<(v16i8 (int_arm_neon_vbsl (v16i8 QPR:$src1),
5487
5487
(v16i8 QPR:$Vn), (v16i8 QPR:$Vm))),
5488
- (VBSLq QPR:$src1, QPR:$Vn, QPR:$Vm)>;
5488
+ (VBSPq QPR:$src1, QPR:$Vn, QPR:$Vm)>;
5489
5489
def : Pat<(v8i16 (int_arm_neon_vbsl (v8i16 QPR:$src1),
5490
5490
(v8i16 QPR:$Vn), (v8i16 QPR:$Vm))),
5491
- (VBSLq QPR:$src1, QPR:$Vn, QPR:$Vm)>;
5491
+ (VBSPq QPR:$src1, QPR:$Vn, QPR:$Vm)>;
5492
5492
def : Pat<(v4i32 (int_arm_neon_vbsl (v4i32 QPR:$src1),
5493
5493
(v4i32 QPR:$Vn), (v4i32 QPR:$Vm))),
5494
- (VBSLq QPR:$src1, QPR:$Vn, QPR:$Vm)>;
5494
+ (VBSPq QPR:$src1, QPR:$Vn, QPR:$Vm)>;
5495
5495
def : Pat<(v4f32 (int_arm_neon_vbsl (v4f32 QPR:$src1),
5496
5496
(v4f32 QPR:$Vn), (v4f32 QPR:$Vm))),
5497
- (VBSLq QPR:$src1, QPR:$Vn, QPR:$Vm)>;
5497
+ (VBSPq QPR:$src1, QPR:$Vn, QPR:$Vm)>;
5498
5498
def : Pat<(v2i64 (int_arm_neon_vbsl (v2i64 QPR:$src1),
5499
5499
(v2i64 QPR:$Vn), (v2i64 QPR:$Vm))),
5500
- (VBSLq QPR:$src1, QPR:$Vn, QPR:$Vm)>;
5500
+ (VBSPq QPR:$src1, QPR:$Vn, QPR:$Vm)>;
5501
5501
5502
5502
def : Pat<(v4i32 (or (and QPR:$Vn, QPR:$Vd),
5503
5503
(and QPR:$Vm, (vnotq QPR:$Vd)))),
5504
- (VBSLq QPR:$Vd, QPR:$Vn, QPR:$Vm)>;
5504
+ (VBSPq QPR:$Vd, QPR:$Vn, QPR:$Vm)>;
5505
5505
def : Pat<(v2i64 (or (and QPR:$Vn, QPR:$Vd),
5506
5506
(and QPR:$Vm, (vnotq QPR:$Vd)))),
5507
- (VBSLq QPR:$Vd, QPR:$Vn, QPR:$Vm)>;
5507
+ (VBSPq QPR:$Vd, QPR:$Vn, QPR:$Vm)>;
5508
5508
}
5509
5509
5510
+ // VBSL : Vector Bitwise Select
5511
+ def VBSLd : N3VX<1, 0, 0b01, 0b0001, 0, 1, (outs DPR:$Vd),
5512
+ (ins DPR:$src1, DPR:$Vn, DPR:$Vm),
5513
+ N3RegFrm, IIC_VBINiD,
5514
+ "vbsl", "$Vd, $Vn, $Vm", "$src1 = $Vd",
5515
+ []>;
5516
+
5517
+ def VBSLq : N3VX<1, 0, 0b01, 0b0001, 1, 1, (outs QPR:$Vd),
5518
+ (ins QPR:$src1, QPR:$Vn, QPR:$Vm),
5519
+ N3RegFrm, IIC_VBINiQ,
5520
+ "vbsl", "$Vd, $Vn, $Vm", "$src1 = $Vd",
5521
+ []>;
5522
+
5510
5523
// VBIF : Vector Bitwise Insert if False
5511
5524
// like VBSL but with: "vbif $dst, $src3, $src1", "$src2 = $dst",
5512
- // FIXME: This instruction's encoding MAY NOT BE correct.
5513
5525
def VBIFd : N3VX<1, 0, 0b11, 0b0001, 0, 1,
5514
5526
(outs DPR:$Vd), (ins DPR:$src1, DPR:$Vn, DPR:$Vm),
5515
5527
N3RegFrm, IIC_VBINiD,
@@ -5523,7 +5535,6 @@ def VBIFq : N3VX<1, 0, 0b11, 0b0001, 1, 1,
5523
5535
5524
5536
// VBIT : Vector Bitwise Insert if True
5525
5537
// like VBSL but with: "vbit $dst, $src2, $src1", "$src3 = $dst",
5526
- // FIXME: This instruction's encoding MAY NOT BE correct.
5527
5538
def VBITd : N3VX<1, 0, 0b10, 0b0001, 0, 1,
5528
5539
(outs DPR:$Vd), (ins DPR:$src1, DPR:$Vn, DPR:$Vm),
5529
5540
N3RegFrm, IIC_VBINiD,
@@ -5535,10 +5546,6 @@ def VBITq : N3VX<1, 0, 0b10, 0b0001, 1, 1,
5535
5546
"vbit", "$Vd, $Vn, $Vm", "$src1 = $Vd",
5536
5547
[]>;
5537
5548
5538
- // VBIT/VBIF are not yet implemented. The TwoAddress pass will not go looking
5539
- // for equivalent operations with different register constraints; it just
5540
- // inserts copies.
5541
-
5542
5549
// Vector Absolute Differences.
5543
5550
5544
5551
// VABD : Vector Absolute Difference
@@ -7953,7 +7960,7 @@ let Predicates = [HasNEON,IsLE] in {
7953
7960
(VLD1LNd16 addrmode6:$addr,
7954
7961
(f64 (IMPLICIT_DEF)), (i32 0))), dsub_0)), dsub_0))>;
7955
7962
}
7956
- // The following patterns are basically a copy of the patterns above,
7963
+ // The following patterns are basically a copy of the patterns above,
7957
7964
// however with an additional VREV16d instruction to convert data
7958
7965
// loaded by VLD1LN into proper vector format in big endian mode.
7959
7966
let Predicates = [HasNEON,IsBE] in {
0 commit comments