Skip to content

Commit ba56c87

Browse files
committed
[WIP][DAG] Add legalization handling for ABDS/ABDU
Still WIP, but I wanted to get some visibility to other teams. Always match ABD patterns pre-legalization, and use TargetLowering::expandABD to expand again during legalization.
1 parent c631131 commit ba56c87

20 files changed

+888
-967
lines changed

llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp

Lines changed: 8 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -4140,13 +4140,13 @@ SDValue DAGCombiner::visitSUB(SDNode *N) {
41404140
}
41414141

41424142
// smax(a,b) - smin(a,b) --> abds(a,b)
4143-
if (hasOperation(ISD::ABDS, VT) &&
4143+
if ((!LegalOperations || hasOperation(ISD::ABDS, VT)) &&
41444144
sd_match(N0, m_SMax(m_Value(A), m_Value(B))) &&
41454145
sd_match(N1, m_SMin(m_Specific(A), m_Specific(B))))
41464146
return DAG.getNode(ISD::ABDS, DL, VT, A, B);
41474147

41484148
// umax(a,b) - umin(a,b) --> abdu(a,b)
4149-
if (hasOperation(ISD::ABDU, VT) &&
4149+
if ((!LegalOperations || hasOperation(ISD::ABDU, VT)) &&
41504150
sd_match(N0, m_UMax(m_Value(A), m_Value(B))) &&
41514151
sd_match(N1, m_UMin(m_Specific(A), m_Specific(B))))
41524152
return DAG.getNode(ISD::ABDU, DL, VT, A, B);
@@ -10942,7 +10942,8 @@ SDValue DAGCombiner::foldABSToABD(SDNode *N, const SDLoc &DL) {
1094210942
(Opc0 != ISD::ZERO_EXTEND && Opc0 != ISD::SIGN_EXTEND &&
1094310943
Opc0 != ISD::SIGN_EXTEND_INREG)) {
1094410944
// fold (abs (sub nsw x, y)) -> abds(x, y)
10945-
if (AbsOp1->getFlags().hasNoSignedWrap() && hasOperation(ISD::ABDS, VT) &&
10945+
if (AbsOp1->getFlags().hasNoSignedWrap() &&
10946+
(!LegalOperations || hasOperation(ISD::ABDS, VT)) &&
1094610947
TLI.preferABDSToABSWithNSW(VT)) {
1094710948
SDValue ABD = DAG.getNode(ISD::ABDS, DL, VT, Op0, Op1);
1094810949
return DAG.getZExtOrTrunc(ABD, DL, SrcVT);
@@ -10964,7 +10965,8 @@ SDValue DAGCombiner::foldABSToABD(SDNode *N, const SDLoc &DL) {
1096410965
// fold abs(zext(x) - zext(y)) -> zext(abdu(x, y))
1096510966
EVT MaxVT = VT0.bitsGT(VT1) ? VT0 : VT1;
1096610967
if ((VT0 == MaxVT || Op0->hasOneUse()) &&
10967-
(VT1 == MaxVT || Op1->hasOneUse()) && hasOperation(ABDOpcode, MaxVT)) {
10968+
(VT1 == MaxVT || Op1->hasOneUse()) &&
10969+
(!LegalOperations || hasOperation(ABDOpcode, MaxVT))) {
1096810970
SDValue ABD = DAG.getNode(ABDOpcode, DL, MaxVT,
1096910971
DAG.getNode(ISD::TRUNCATE, DL, MaxVT, Op0),
1097010972
DAG.getNode(ISD::TRUNCATE, DL, MaxVT, Op1));
@@ -10974,7 +10976,7 @@ SDValue DAGCombiner::foldABSToABD(SDNode *N, const SDLoc &DL) {
1097410976

1097510977
// fold abs(sext(x) - sext(y)) -> abds(sext(x), sext(y))
1097610978
// fold abs(zext(x) - zext(y)) -> abdu(zext(x), zext(y))
10977-
if (hasOperation(ABDOpcode, VT)) {
10979+
if (!LegalOperations || hasOperation(ABDOpcode, VT)) {
1097810980
SDValue ABD = DAG.getNode(ABDOpcode, DL, VT, Op0, Op1);
1097910981
return DAG.getZExtOrTrunc(ABD, DL, SrcVT);
1098010982
}
@@ -12346,7 +12348,7 @@ SDValue DAGCombiner::visitVSELECT(SDNode *N) {
1234612348
N1.getOperand(1) == N2.getOperand(0)) {
1234712349
bool IsSigned = isSignedIntSetCC(CC);
1234812350
unsigned ABDOpc = IsSigned ? ISD::ABDS : ISD::ABDU;
12349-
if (hasOperation(ABDOpc, VT)) {
12351+
if (!LegalOperations || hasOperation(ABDOpc, VT)) {
1235012352
switch (CC) {
1235112353
case ISD::SETGT:
1235212354
case ISD::SETGE:

llvm/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -188,13 +188,15 @@ void DAGTypeLegalizer::PromoteIntegerResult(SDNode *N, unsigned ResNo) {
188188
case ISD::VP_SUB:
189189
case ISD::VP_MUL: Res = PromoteIntRes_SimpleIntBinOp(N); break;
190190

191+
case ISD::ABDS:
191192
case ISD::VP_SMIN:
192193
case ISD::VP_SMAX:
193194
case ISD::SDIV:
194195
case ISD::SREM:
195196
case ISD::VP_SDIV:
196197
case ISD::VP_SREM: Res = PromoteIntRes_SExtIntBinOp(N); break;
197198

199+
case ISD::ABDU:
198200
case ISD::VP_UMIN:
199201
case ISD::VP_UMAX:
200202
case ISD::UDIV:
@@ -2703,6 +2705,8 @@ void DAGTypeLegalizer::ExpandIntegerResult(SDNode *N, unsigned ResNo) {
27032705
case ISD::PARITY: ExpandIntRes_PARITY(N, Lo, Hi); break;
27042706
case ISD::Constant: ExpandIntRes_Constant(N, Lo, Hi); break;
27052707
case ISD::ABS: ExpandIntRes_ABS(N, Lo, Hi); break;
2708+
case ISD::ABDS:
2709+
case ISD::ABDU: ExpandIntRes_ABD(N, Lo, Hi); break;
27062710
case ISD::CTLZ_ZERO_UNDEF:
27072711
case ISD::CTLZ: ExpandIntRes_CTLZ(N, Lo, Hi); break;
27082712
case ISD::CTPOP: ExpandIntRes_CTPOP(N, Lo, Hi); break;
@@ -3749,6 +3753,11 @@ void DAGTypeLegalizer::ExpandIntRes_CTLZ(SDNode *N,
37493753
Hi = DAG.getConstant(0, dl, NVT);
37503754
}
37513755

3756+
void DAGTypeLegalizer::ExpandIntRes_ABD(SDNode *N, SDValue &Lo, SDValue &Hi) {
3757+
SDValue Result = TLI.expandABD(N, DAG);
3758+
SplitInteger(Result, Lo, Hi);
3759+
}
3760+
37523761
void DAGTypeLegalizer::ExpandIntRes_CTPOP(SDNode *N,
37533762
SDValue &Lo, SDValue &Hi) {
37543763
SDLoc dl(N);

llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -443,6 +443,7 @@ class LLVM_LIBRARY_VISIBILITY DAGTypeLegalizer {
443443
void ExpandIntRes_AssertZext (SDNode *N, SDValue &Lo, SDValue &Hi);
444444
void ExpandIntRes_Constant (SDNode *N, SDValue &Lo, SDValue &Hi);
445445
void ExpandIntRes_ABS (SDNode *N, SDValue &Lo, SDValue &Hi);
446+
void ExpandIntRes_ABD (SDNode *N, SDValue &Lo, SDValue &Hi);
446447
void ExpandIntRes_CTLZ (SDNode *N, SDValue &Lo, SDValue &Hi);
447448
void ExpandIntRes_CTPOP (SDNode *N, SDValue &Lo, SDValue &Hi);
448449
void ExpandIntRes_CTTZ (SDNode *N, SDValue &Lo, SDValue &Hi);

llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -136,6 +136,8 @@ void DAGTypeLegalizer::ScalarizeVectorResult(SDNode *N, unsigned ResNo) {
136136
case ISD::FMINIMUM:
137137
case ISD::FMAXIMUM:
138138
case ISD::FLDEXP:
139+
case ISD::ABDS:
140+
case ISD::ABDU:
139141
case ISD::SMIN:
140142
case ISD::SMAX:
141143
case ISD::UMIN:
@@ -1171,6 +1173,8 @@ void DAGTypeLegalizer::SplitVectorResult(SDNode *N, unsigned ResNo) {
11711173
case ISD::MUL: case ISD::VP_MUL:
11721174
case ISD::MULHS:
11731175
case ISD::MULHU:
1176+
case ISD::ABDS:
1177+
case ISD::ABDU:
11741178
case ISD::FADD: case ISD::VP_FADD:
11751179
case ISD::FSUB: case ISD::VP_FSUB:
11761180
case ISD::FMUL: case ISD::VP_FMUL:
@@ -4235,6 +4239,8 @@ void DAGTypeLegalizer::WidenVectorResult(SDNode *N, unsigned ResNo) {
42354239
case ISD::MUL: case ISD::VP_MUL:
42364240
case ISD::MULHS:
42374241
case ISD::MULHU:
4242+
case ISD::ABDS:
4243+
case ISD::ABDU:
42384244
case ISD::OR: case ISD::VP_OR:
42394245
case ISD::SUB: case ISD::VP_SUB:
42404246
case ISD::XOR: case ISD::VP_XOR:

llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3477,6 +3477,16 @@ KnownBits SelectionDAG::computeKnownBits(SDValue Op, const APInt &DemandedElts,
34773477
Known = computeKnownBits(Op.getOperand(1), DemandedElts, Depth + 1);
34783478
Known2 = computeKnownBits(Op.getOperand(0), DemandedElts, Depth + 1);
34793479
Known = KnownBits::abds(Known, Known2);
3480+
// If the difference has more than 1 sign bit, then its guaranteed to be
3481+
// positive.
3482+
unsigned SignBits =
3483+
ComputeNumSignBits(Op.getOperand(1), DemandedElts, Depth + 1);
3484+
if (SignBits == 1)
3485+
break;
3486+
SignBits = std::min(SignBits, ComputeNumSignBits(Op.getOperand(0),
3487+
DemandedElts, Depth + 1));
3488+
if (SignBits > 1)
3489+
Known.Zero.setHighBits(SignBits - 1);
34803490
break;
34813491
}
34823492
case ISD::UMUL_LOHI: {
@@ -6940,6 +6950,8 @@ SDValue SelectionDAG::getNode(unsigned Opcode, const SDLoc &DL, EVT VT,
69406950
assert(VT.isInteger() && "This operator does not apply to FP types!");
69416951
assert(N1.getValueType() == N2.getValueType() &&
69426952
N1.getValueType() == VT && "Binary operator types must match!");
6953+
if (VT.isVector() && VT.getVectorElementType() == MVT::i1)
6954+
return getNode(ISD::XOR, DL, VT, N1, N2);
69436955
break;
69446956
case ISD::SMIN:
69456957
case ISD::UMAX:

llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp

Lines changed: 16 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -9226,6 +9226,15 @@ SDValue TargetLowering::expandABD(SDNode *N, SelectionDAG &DAG) const {
92269226
DAG.getNode(ISD::USUBSAT, dl, VT, LHS, RHS),
92279227
DAG.getNode(ISD::USUBSAT, dl, VT, RHS, LHS));
92289228

9229+
// If the subtract doesn't overflow then just use abs(sub())
9230+
// NOTE: don't use frozen operands for value tracking.
9231+
if (DAG.willNotOverflowSub(IsSigned, N->getOperand(0), N->getOperand(1)))
9232+
return DAG.getNode(ISD::ABS, dl, VT,
9233+
DAG.getNode(ISD::SUB, dl, VT, LHS, RHS));
9234+
if (DAG.willNotOverflowSub(IsSigned, N->getOperand(1), N->getOperand(0)))
9235+
return DAG.getNode(ISD::ABS, dl, VT,
9236+
DAG.getNode(ISD::SUB, dl, VT, RHS, LHS));
9237+
92299238
EVT CCVT = getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), VT);
92309239
ISD::CondCode CC = IsSigned ? ISD::CondCode::SETGT : ISD::CondCode::SETUGT;
92319240
SDValue Cmp = DAG.getSetCC(dl, CCVT, LHS, RHS, CC);
@@ -9239,10 +9248,15 @@ SDValue TargetLowering::expandABD(SDNode *N, SelectionDAG &DAG) const {
92399248
return DAG.getNode(ISD::SUB, dl, VT, Cmp, Xor);
92409249
}
92419250

9251+
// FIXME: Should really try to split the vector in case it's legal on a
9252+
// subvector.
9253+
if (VT.isVector() && !isOperationLegalOrCustom(ISD::VSELECT, VT))
9254+
return DAG.UnrollVectorOp(N);
9255+
92429256
// abds(lhs, rhs) -> select(sgt(lhs,rhs), sub(lhs,rhs), sub(rhs,lhs))
92439257
// abdu(lhs, rhs) -> select(ugt(lhs,rhs), sub(lhs,rhs), sub(rhs,lhs))
9244-
return DAG.getSelect(dl, VT, Cmp, DAG.getNode(ISD::SUB, dl, VT, LHS, RHS),
9245-
DAG.getNode(ISD::SUB, dl, VT, RHS, LHS));
9258+
SDValue Diff = DAG.getNode(ISD::SUB, dl, VT, LHS, RHS);
9259+
return DAG.getSelect(dl, VT, Cmp, Diff, DAG.getNegative(Diff, dl, VT));
92469260
}
92479261

92489262
SDValue TargetLowering::expandBSWAP(SDNode *N, SelectionDAG &DAG) const {

llvm/test/CodeGen/AArch64/arm64-csel.ll

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -65,7 +65,7 @@ define i32@foo5(i32 %a, i32 %b) nounwind ssp {
6565
; CHECK-LABEL: foo5:
6666
; CHECK: // %bb.0: // %entry
6767
; CHECK-NEXT: subs w8, w0, w1
68-
; CHECK-NEXT: cneg w0, w8, mi
68+
; CHECK-NEXT: cneg w0, w8, le
6969
; CHECK-NEXT: ret
7070
entry:
7171
%sub = sub nsw i32 %a, %b
@@ -98,7 +98,7 @@ define i32 @foo7(i32 %a, i32 %b) nounwind {
9898
; CHECK-LABEL: foo7:
9999
; CHECK: // %bb.0: // %entry
100100
; CHECK-NEXT: subs w8, w0, w1
101-
; CHECK-NEXT: cneg w9, w8, mi
101+
; CHECK-NEXT: cneg w9, w8, le
102102
; CHECK-NEXT: cmn w8, #1
103103
; CHECK-NEXT: csel w10, w9, w0, lt
104104
; CHECK-NEXT: cmp w8, #0

llvm/test/CodeGen/AArch64/arm64-vabs.ll

Lines changed: 7 additions & 24 deletions
Original file line numberDiff line numberDiff line change
@@ -1799,30 +1799,13 @@ define <2 x i64> @uabd_i32(<2 x i32> %a, <2 x i32> %b) {
17991799
define <2 x i128> @uabd_i64(<2 x i64> %a, <2 x i64> %b) {
18001800
; CHECK-LABEL: uabd_i64:
18011801
; CHECK: // %bb.0:
1802-
; CHECK-NEXT: mov.d x8, v0[1]
1803-
; CHECK-NEXT: mov.d x9, v1[1]
1804-
; CHECK-NEXT: fmov x10, d0
1805-
; CHECK-NEXT: fmov x11, d1
1806-
; CHECK-NEXT: asr x12, x10, #63
1807-
; CHECK-NEXT: asr x13, x11, #63
1808-
; CHECK-NEXT: subs x10, x10, x11
1809-
; CHECK-NEXT: asr x11, x8, #63
1810-
; CHECK-NEXT: asr x14, x9, #63
1811-
; CHECK-NEXT: sbc x12, x12, x13
1812-
; CHECK-NEXT: subs x8, x8, x9
1813-
; CHECK-NEXT: sbc x9, x11, x14
1814-
; CHECK-NEXT: asr x13, x12, #63
1815-
; CHECK-NEXT: asr x11, x9, #63
1816-
; CHECK-NEXT: eor x10, x10, x13
1817-
; CHECK-NEXT: eor x8, x8, x11
1818-
; CHECK-NEXT: eor x9, x9, x11
1819-
; CHECK-NEXT: subs x2, x8, x11
1820-
; CHECK-NEXT: eor x8, x12, x13
1821-
; CHECK-NEXT: sbc x3, x9, x11
1822-
; CHECK-NEXT: subs x9, x10, x13
1823-
; CHECK-NEXT: fmov d0, x9
1824-
; CHECK-NEXT: sbc x1, x8, x13
1825-
; CHECK-NEXT: mov.d v0[1], x1
1802+
; CHECK-NEXT: cmgt.2d v2, v0, v1
1803+
; CHECK-NEXT: sub.2d v0, v0, v1
1804+
; CHECK-NEXT: mov x1, xzr
1805+
; CHECK-NEXT: mov x3, xzr
1806+
; CHECK-NEXT: eor.16b v0, v0, v2
1807+
; CHECK-NEXT: sub.2d v0, v2, v0
1808+
; CHECK-NEXT: mov.d x2, v0[1]
18261809
; CHECK-NEXT: fmov x0, d0
18271810
; CHECK-NEXT: ret
18281811
%aext = sext <2 x i64> %a to <2 x i128>

llvm/test/CodeGen/AArch64/neon-abd.ll

Lines changed: 21 additions & 53 deletions
Original file line numberDiff line numberDiff line change
@@ -49,10 +49,10 @@ define <4 x i16> @sabd_4h(<4 x i16> %a, <4 x i16> %b) #0 {
4949
define <4 x i16> @sabd_4h_promoted_ops(<4 x i8> %a, <4 x i8> %b) #0 {
5050
; CHECK-LABEL: sabd_4h_promoted_ops:
5151
; CHECK: // %bb.0:
52-
; CHECK-NEXT: shl v0.4h, v0.4h, #8
5352
; CHECK-NEXT: shl v1.4h, v1.4h, #8
54-
; CHECK-NEXT: sshr v0.4h, v0.4h, #8
53+
; CHECK-NEXT: shl v0.4h, v0.4h, #8
5554
; CHECK-NEXT: sshr v1.4h, v1.4h, #8
55+
; CHECK-NEXT: sshr v0.4h, v0.4h, #8
5656
; CHECK-NEXT: sabd v0.4h, v0.4h, v1.4h
5757
; CHECK-NEXT: ret
5858
%a.sext = sext <4 x i8> %a to <4 x i16>
@@ -103,10 +103,10 @@ define <2 x i32> @sabd_2s(<2 x i32> %a, <2 x i32> %b) #0 {
103103
define <2 x i32> @sabd_2s_promoted_ops(<2 x i16> %a, <2 x i16> %b) #0 {
104104
; CHECK-LABEL: sabd_2s_promoted_ops:
105105
; CHECK: // %bb.0:
106-
; CHECK-NEXT: shl v0.2s, v0.2s, #16
107106
; CHECK-NEXT: shl v1.2s, v1.2s, #16
108-
; CHECK-NEXT: sshr v0.2s, v0.2s, #16
107+
; CHECK-NEXT: shl v0.2s, v0.2s, #16
109108
; CHECK-NEXT: sshr v1.2s, v1.2s, #16
109+
; CHECK-NEXT: sshr v0.2s, v0.2s, #16
110110
; CHECK-NEXT: sabd v0.2s, v0.2s, v1.2s
111111
; CHECK-NEXT: ret
112112
%a.sext = sext <2 x i16> %a to <2 x i32>
@@ -144,27 +144,10 @@ define <4 x i32> @sabd_4s_promoted_ops(<4 x i16> %a, <4 x i16> %b) #0 {
144144
define <2 x i64> @sabd_2d(<2 x i64> %a, <2 x i64> %b) #0 {
145145
; CHECK-LABEL: sabd_2d:
146146
; CHECK: // %bb.0:
147-
; CHECK-NEXT: mov x8, v0.d[1]
148-
; CHECK-NEXT: mov x9, v1.d[1]
149-
; CHECK-NEXT: fmov x10, d0
150-
; CHECK-NEXT: fmov x12, d1
151-
; CHECK-NEXT: asr x14, x10, #63
152-
; CHECK-NEXT: asr x11, x8, #63
153-
; CHECK-NEXT: asr x13, x9, #63
154-
; CHECK-NEXT: asr x15, x12, #63
155-
; CHECK-NEXT: subs x8, x8, x9
156-
; CHECK-NEXT: sbc x9, x11, x13
157-
; CHECK-NEXT: subs x10, x10, x12
158-
; CHECK-NEXT: sbc x11, x14, x15
159-
; CHECK-NEXT: asr x9, x9, #63
160-
; CHECK-NEXT: asr x11, x11, #63
161-
; CHECK-NEXT: eor x8, x8, x9
162-
; CHECK-NEXT: eor x10, x10, x11
163-
; CHECK-NEXT: sub x8, x8, x9
164-
; CHECK-NEXT: sub x10, x10, x11
165-
; CHECK-NEXT: fmov d1, x8
166-
; CHECK-NEXT: fmov d0, x10
167-
; CHECK-NEXT: mov v0.d[1], v1.d[0]
147+
; CHECK-NEXT: cmgt v2.2d, v0.2d, v1.2d
148+
; CHECK-NEXT: sub v0.2d, v0.2d, v1.2d
149+
; CHECK-NEXT: eor v0.16b, v0.16b, v2.16b
150+
; CHECK-NEXT: sub v0.2d, v2.2d, v0.2d
168151
; CHECK-NEXT: ret
169152
%a.sext = sext <2 x i64> %a to <2 x i128>
170153
%b.sext = sext <2 x i64> %b to <2 x i128>
@@ -232,8 +215,8 @@ define <4 x i16> @uabd_4h(<4 x i16> %a, <4 x i16> %b) #0 {
232215
define <4 x i16> @uabd_4h_promoted_ops(<4 x i8> %a, <4 x i8> %b) #0 {
233216
; CHECK-LABEL: uabd_4h_promoted_ops:
234217
; CHECK: // %bb.0:
235-
; CHECK-NEXT: bic v0.4h, #255, lsl #8
236218
; CHECK-NEXT: bic v1.4h, #255, lsl #8
219+
; CHECK-NEXT: bic v0.4h, #255, lsl #8
237220
; CHECK-NEXT: uabd v0.4h, v0.4h, v1.4h
238221
; CHECK-NEXT: ret
239222
%a.zext = zext <4 x i8> %a to <4 x i16>
@@ -285,8 +268,8 @@ define <2 x i32> @uabd_2s_promoted_ops(<2 x i16> %a, <2 x i16> %b) #0 {
285268
; CHECK-LABEL: uabd_2s_promoted_ops:
286269
; CHECK: // %bb.0:
287270
; CHECK-NEXT: movi d2, #0x00ffff0000ffff
288-
; CHECK-NEXT: and v0.8b, v0.8b, v2.8b
289271
; CHECK-NEXT: and v1.8b, v1.8b, v2.8b
272+
; CHECK-NEXT: and v0.8b, v0.8b, v2.8b
290273
; CHECK-NEXT: uabd v0.2s, v0.2s, v1.2s
291274
; CHECK-NEXT: ret
292275
%a.zext = zext <2 x i16> %a to <2 x i32>
@@ -324,23 +307,9 @@ define <4 x i32> @uabd_4s_promoted_ops(<4 x i16> %a, <4 x i16> %b) #0 {
324307
define <2 x i64> @uabd_2d(<2 x i64> %a, <2 x i64> %b) #0 {
325308
; CHECK-LABEL: uabd_2d:
326309
; CHECK: // %bb.0:
327-
; CHECK-NEXT: mov x8, v0.d[1]
328-
; CHECK-NEXT: mov x9, v1.d[1]
329-
; CHECK-NEXT: fmov x10, d0
330-
; CHECK-NEXT: fmov x11, d1
331-
; CHECK-NEXT: subs x8, x8, x9
332-
; CHECK-NEXT: ngc x9, xzr
333-
; CHECK-NEXT: subs x10, x10, x11
334-
; CHECK-NEXT: ngc x11, xzr
335-
; CHECK-NEXT: asr x9, x9, #63
336-
; CHECK-NEXT: asr x11, x11, #63
337-
; CHECK-NEXT: eor x8, x8, x9
338-
; CHECK-NEXT: eor x10, x10, x11
339-
; CHECK-NEXT: sub x8, x8, x9
340-
; CHECK-NEXT: sub x10, x10, x11
341-
; CHECK-NEXT: fmov d1, x8
342-
; CHECK-NEXT: fmov d0, x10
343-
; CHECK-NEXT: mov v0.d[1], v1.d[0]
310+
; CHECK-NEXT: uqsub v2.2d, v1.2d, v0.2d
311+
; CHECK-NEXT: uqsub v0.2d, v0.2d, v1.2d
312+
; CHECK-NEXT: orr v0.16b, v0.16b, v2.16b
344313
; CHECK-NEXT: ret
345314
%a.zext = zext <2 x i64> %a to <2 x i128>
346315
%b.zext = zext <2 x i64> %b to <2 x i128>
@@ -439,8 +408,10 @@ define <4 x i32> @sabd_v4i32_nsw(<4 x i32> %a, <4 x i32> %b) #0 {
439408
define <2 x i64> @sabd_v2i64_nsw(<2 x i64> %a, <2 x i64> %b) #0 {
440409
; CHECK-LABEL: sabd_v2i64_nsw:
441410
; CHECK: // %bb.0:
411+
; CHECK-NEXT: cmgt v2.2d, v0.2d, v1.2d
442412
; CHECK-NEXT: sub v0.2d, v0.2d, v1.2d
443-
; CHECK-NEXT: abs v0.2d, v0.2d
413+
; CHECK-NEXT: eor v0.16b, v0.16b, v2.16b
414+
; CHECK-NEXT: sub v0.2d, v2.2d, v0.2d
444415
; CHECK-NEXT: ret
445416
%sub = sub nsw <2 x i64> %a, %b
446417
%abs = call <2 x i64> @llvm.abs.v2i64(<2 x i64> %sub, i1 true)
@@ -484,9 +455,8 @@ define <2 x i64> @smaxmin_v2i64(<2 x i64> %0, <2 x i64> %1) {
484455
; CHECK-LABEL: smaxmin_v2i64:
485456
; CHECK: // %bb.0:
486457
; CHECK-NEXT: cmgt v2.2d, v0.2d, v1.2d
487-
; CHECK-NEXT: cmgt v3.2d, v1.2d, v0.2d
488-
; CHECK-NEXT: bsl v2.16b, v0.16b, v1.16b
489-
; CHECK-NEXT: bif v0.16b, v1.16b, v3.16b
458+
; CHECK-NEXT: sub v0.2d, v0.2d, v1.2d
459+
; CHECK-NEXT: eor v0.16b, v0.16b, v2.16b
490460
; CHECK-NEXT: sub v0.2d, v2.2d, v0.2d
491461
; CHECK-NEXT: ret
492462
%a = tail call <2 x i64> @llvm.smax.v2i64(<2 x i64> %0, <2 x i64> %1)
@@ -531,11 +501,9 @@ define <4 x i32> @umaxmin_v4i32(<4 x i32> %0, <4 x i32> %1) {
531501
define <2 x i64> @umaxmin_v2i64(<2 x i64> %0, <2 x i64> %1) {
532502
; CHECK-LABEL: umaxmin_v2i64:
533503
; CHECK: // %bb.0:
534-
; CHECK-NEXT: cmhi v2.2d, v0.2d, v1.2d
535-
; CHECK-NEXT: cmhi v3.2d, v1.2d, v0.2d
536-
; CHECK-NEXT: bsl v2.16b, v0.16b, v1.16b
537-
; CHECK-NEXT: bif v0.16b, v1.16b, v3.16b
538-
; CHECK-NEXT: sub v0.2d, v2.2d, v0.2d
504+
; CHECK-NEXT: uqsub v2.2d, v1.2d, v0.2d
505+
; CHECK-NEXT: uqsub v0.2d, v0.2d, v1.2d
506+
; CHECK-NEXT: orr v0.16b, v0.16b, v2.16b
539507
; CHECK-NEXT: ret
540508
%a = tail call <2 x i64> @llvm.umax.v2i64(<2 x i64> %0, <2 x i64> %1)
541509
%b = tail call <2 x i64> @llvm.umin.v2i64(<2 x i64> %0, <2 x i64> %1)

0 commit comments

Comments
 (0)