-
Notifications
You must be signed in to change notification settings - Fork 13.6k
[AArch64] Eliminate Common SUBS by Reassociating Non-Constants #123344
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Conversation
@llvm/pr-subscribers-backend-aarch64 Author: Marius Kamp (mskamp) ChangesCommit 1eed469 added logic to This commit extends this logic to non-constants. In this way, we also Alive proof: https://alive2.llvm.org/ce/z/SEVpR Full diff: https://github.com/llvm/llvm-project/pull/123344.diff 3 Files Affected:
diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
index d4a114c275fb76..9a43f510b8ff42 100644
--- a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
+++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
@@ -24899,16 +24899,36 @@ static SDValue reassociateCSELOperandsForCSE(SDNode *N, SelectionDAG &DAG) {
SDValue SubsNode = N->getOperand(3);
if (SubsNode.getOpcode() != AArch64ISD::SUBS || !SubsNode.hasOneUse())
return SDValue();
- auto *CmpOpConst = dyn_cast<ConstantSDNode>(SubsNode.getOperand(1));
- if (!CmpOpConst)
- return SDValue();
+ SDValue CmpOpToMatch = SubsNode.getOperand(1);
SDValue CmpOpOther = SubsNode.getOperand(0);
EVT VT = N->getValueType(0);
+ unsigned ExpectedOpcode;
+ std::function<bool(SDValue)> CheckOp;
+ std::function<SDValue()> BuildSubsOp;
+ auto *CmpOpConst = dyn_cast<ConstantSDNode>(CmpOpToMatch);
+ if (CmpOpConst) {
+ ExpectedOpcode = ISD::ADD;
+ CheckOp = [&](SDValue Op) {
+ auto *AddOpConst = dyn_cast<ConstantSDNode>(Op);
+ return AddOpConst &&
+ AddOpConst->getAPIntValue() == -CmpOpConst->getAPIntValue();
+ };
+ BuildSubsOp = [&] {
+ return DAG.getConstant(CmpOpConst->getAPIntValue(), SDLoc(CmpOpConst),
+ CmpOpConst->getValueType(0));
+ };
+ } else {
+ ExpectedOpcode = ISD::SUB;
+ CheckOp = [&](SDValue Op) { return Op == CmpOpToMatch; };
+ BuildSubsOp = [&] { return CmpOpToMatch; };
+ }
+
// Get the operand that can be reassociated with the SUBS instruction.
- auto GetReassociationOp = [&](SDValue Op, APInt ExpectedConst) {
- if (Op.getOpcode() != ISD::ADD)
+ auto GetReassociationOp = [&](SDValue Op,
+ std::function<bool(SDValue)> CheckOp) {
+ if (Op.getOpcode() != ExpectedOpcode)
return SDValue();
if (Op.getOperand(0).getOpcode() != ISD::ADD ||
!Op.getOperand(0).hasOneUse())
@@ -24919,24 +24939,23 @@ static SDValue reassociateCSELOperandsForCSE(SDNode *N, SelectionDAG &DAG) {
std::swap(X, Y);
if (X != CmpOpOther)
return SDValue();
- auto *AddOpConst = dyn_cast<ConstantSDNode>(Op.getOperand(1));
- if (!AddOpConst || AddOpConst->getAPIntValue() != ExpectedConst)
+ if (!CheckOp(Op.getOperand(1)))
return SDValue();
return Y;
};
// Try the reassociation using the given constant and condition code.
- auto Fold = [&](APInt NewCmpConst, AArch64CC::CondCode NewCC) {
- APInt ExpectedConst = -NewCmpConst;
- SDValue TReassocOp = GetReassociationOp(N->getOperand(0), ExpectedConst);
- SDValue FReassocOp = GetReassociationOp(N->getOperand(1), ExpectedConst);
+ auto Fold = [&](AArch64CC::CondCode NewCC,
+ std::function<bool(SDValue)> CheckOp,
+ std::function<SDValue()> BuildSubsOp) {
+ SDValue TReassocOp = GetReassociationOp(N->getOperand(0), CheckOp);
+ SDValue FReassocOp = GetReassociationOp(N->getOperand(1), CheckOp);
if (!TReassocOp && !FReassocOp)
return SDValue();
- SDValue NewCmp = DAG.getNode(AArch64ISD::SUBS, SDLoc(SubsNode),
- DAG.getVTList(VT, MVT_CC), CmpOpOther,
- DAG.getConstant(NewCmpConst, SDLoc(CmpOpConst),
- CmpOpConst->getValueType(0)));
+ SDValue NewCmp =
+ DAG.getNode(AArch64ISD::SUBS, SDLoc(SubsNode),
+ DAG.getVTList(VT, MVT_CC), CmpOpOther, BuildSubsOp());
auto Reassociate = [&](SDValue ReassocOp, unsigned OpNum) {
if (!ReassocOp)
@@ -24958,9 +24977,19 @@ static SDValue reassociateCSELOperandsForCSE(SDNode *N, SelectionDAG &DAG) {
// First, try to eliminate the compare instruction by searching for a
// subtraction with the same constant.
- if (SDValue R = Fold(CmpOpConst->getAPIntValue(), CC))
+ if (SDValue R = Fold(CC, CheckOp, BuildSubsOp))
return R;
+ if (!CmpOpConst) {
+ // Try again with the operands of the SUBS instruction and the condition
+ // swapped. Due to canonicalization, this only helps for non-constant
+ // operands of the SUBS instruction.
+ std::swap(CmpOpToMatch, CmpOpOther);
+ if (SDValue R = Fold(getSwappedCondition(CC), CheckOp, BuildSubsOp))
+ return R;
+ return SDValue();
+ }
+
if ((CC == AArch64CC::EQ || CC == AArch64CC::NE) && !CmpOpConst->isZero())
return SDValue();
@@ -24972,7 +25001,15 @@ static SDValue reassociateCSELOperandsForCSE(SDNode *N, SelectionDAG &DAG) {
// them here but check for them nevertheless to be on the safe side.
auto CheckedFold = [&](bool Check, APInt NewCmpConst,
AArch64CC::CondCode NewCC) {
- return Check ? Fold(NewCmpConst, NewCC) : SDValue();
+ auto CheckOp = [=](SDValue Op) {
+ auto *AddOpConst = dyn_cast<ConstantSDNode>(Op);
+ return AddOpConst && AddOpConst->getAPIntValue() == -NewCmpConst;
+ };
+ auto BuildSubsOp = [&, CmpOpConst] {
+ return DAG.getConstant(NewCmpConst, SDLoc(CmpOpConst),
+ CmpOpConst->getValueType(0));
+ };
+ return Check ? Fold(NewCC, CheckOp, BuildSubsOp) : SDValue();
};
switch (CC) {
case AArch64CC::EQ:
diff --git a/llvm/lib/Target/AArch64/Utils/AArch64BaseInfo.h b/llvm/lib/Target/AArch64/Utils/AArch64BaseInfo.h
index b8d323649feaa8..9671fa3b3d92fa 100644
--- a/llvm/lib/Target/AArch64/Utils/AArch64BaseInfo.h
+++ b/llvm/lib/Target/AArch64/Utils/AArch64BaseInfo.h
@@ -306,6 +306,36 @@ inline static CondCode getInvertedCondCode(CondCode Code) {
return static_cast<CondCode>(static_cast<unsigned>(Code) ^ 0x1);
}
+/// getSwappedCondition - assume the flags are set by MI(a,b), return
+/// the condition code if we modify the instructions such that flags are
+/// set by MI(b,a).
+inline static CondCode getSwappedCondition(CondCode CC) {
+ switch (CC) {
+ default:
+ return AL;
+ case EQ:
+ return EQ;
+ case NE:
+ return NE;
+ case HS:
+ return LS;
+ case LO:
+ return HI;
+ case HI:
+ return LO;
+ case LS:
+ return HS;
+ case GE:
+ return LE;
+ case LT:
+ return GT;
+ case GT:
+ return LT;
+ case LE:
+ return GE;
+ }
+}
+
/// Given a condition code, return NZCV flags that would satisfy that condition.
/// The flag bits are in the format expected by the ccmp instructions.
/// Note that many different flag settings can satisfy a given condition code,
diff --git a/llvm/test/CodeGen/AArch64/csel-cmp-cse.ll b/llvm/test/CodeGen/AArch64/csel-cmp-cse.ll
index d8904cc6e35e34..42b1656fa78725 100644
--- a/llvm/test/CodeGen/AArch64/csel-cmp-cse.ll
+++ b/llvm/test/CodeGen/AArch64/csel-cmp-cse.ll
@@ -335,6 +335,286 @@ define i32 @test_eq0_multi_use_sub_i32(i32 %x0, i32 %x1) {
ret i32 %ret
}
+define i32 @test_eq_nonconst_sub_add_i32(i32 %x0, i32 %x1, i32 %x2) {
+; CHECK-LABEL: test_eq_nonconst_sub_add_i32:
+; CHECK: // %bb.0:
+; CHECK-NEXT: subs w8, w1, w2
+; CHECK-NEXT: add w8, w8, w0
+; CHECK-NEXT: csel w0, wzr, w8, eq
+; CHECK-NEXT: ret
+ %cmp = icmp eq i32 %x1, %x2
+ %add = add nuw i32 %x0, %x1
+ %sub = sub i32 %add, %x2
+ %ret = select i1 %cmp, i32 0, i32 %sub
+ ret i32 %ret
+}
+
+define i32 @test_ne_nonconst_sub_add_i32(i32 %x0, i32 %x1, i32 %x2) {
+; CHECK-LABEL: test_ne_nonconst_sub_add_i32:
+; CHECK: // %bb.0:
+; CHECK-NEXT: subs w8, w1, w2
+; CHECK-NEXT: add w8, w8, w0
+; CHECK-NEXT: csel w0, wzr, w8, ne
+; CHECK-NEXT: ret
+ %cmp = icmp ne i32 %x1, %x2
+ %add = add nuw i32 %x0, %x1
+ %sub = sub i32 %add, %x2
+ %ret = select i1 %cmp, i32 0, i32 %sub
+ ret i32 %ret
+}
+
+define i32 @test_ult_nonconst_i32(i32 %x0, i32 %x1, i32 %x2) {
+; CHECK-LABEL: test_ult_nonconst_i32:
+; CHECK: // %bb.0:
+; CHECK-NEXT: subs w8, w1, w2
+; CHECK-NEXT: add w8, w8, w0
+; CHECK-NEXT: csel w0, wzr, w8, lo
+; CHECK-NEXT: ret
+ %cmp = icmp ult i32 %x1, %x2
+ %add = add i32 %x0, %x1
+ %sub = sub i32 %add, %x2
+ %ret = select i1 %cmp, i32 0, i32 %sub
+ ret i32 %ret
+}
+
+define i32 @test_ule_nonconst_i32(i32 %x0, i32 %x1, i32 %x2) {
+; CHECK-LABEL: test_ule_nonconst_i32:
+; CHECK: // %bb.0:
+; CHECK-NEXT: subs w8, w1, w2
+; CHECK-NEXT: add w8, w8, w0
+; CHECK-NEXT: csel w0, wzr, w8, ls
+; CHECK-NEXT: ret
+ %cmp = icmp ule i32 %x1, %x2
+ %add = add i32 %x0, %x1
+ %sub = sub i32 %add, %x2
+ %ret = select i1 %cmp, i32 0, i32 %sub
+ ret i32 %ret
+}
+
+define i32 @test_ugt_nonconst_i32(i32 %x0, i32 %x1, i32 %x2) {
+; CHECK-LABEL: test_ugt_nonconst_i32:
+; CHECK: // %bb.0:
+; CHECK-NEXT: subs w8, w1, w2
+; CHECK-NEXT: add w8, w8, w0
+; CHECK-NEXT: csel w0, wzr, w8, hi
+; CHECK-NEXT: ret
+ %cmp = icmp ugt i32 %x1, %x2
+ %add = add i32 %x0, %x1
+ %sub = sub i32 %add, %x2
+ %ret = select i1 %cmp, i32 0, i32 %sub
+ ret i32 %ret
+}
+
+define i32 @test_uge_nonconst_i32(i32 %x0, i32 %x1, i32 %x2) {
+; CHECK-LABEL: test_uge_nonconst_i32:
+; CHECK: // %bb.0:
+; CHECK-NEXT: subs w8, w1, w2
+; CHECK-NEXT: add w8, w8, w0
+; CHECK-NEXT: csel w0, wzr, w8, hs
+; CHECK-NEXT: ret
+ %cmp = icmp uge i32 %x1, %x2
+ %add = add i32 %x0, %x1
+ %sub = sub i32 %add, %x2
+ %ret = select i1 %cmp, i32 0, i32 %sub
+ ret i32 %ret
+}
+
+define i32 @test_slt_nonconst_i32(i32 %x0, i32 %x1, i32 %x2) {
+; CHECK-LABEL: test_slt_nonconst_i32:
+; CHECK: // %bb.0:
+; CHECK-NEXT: subs w8, w1, w2
+; CHECK-NEXT: add w8, w8, w0
+; CHECK-NEXT: csel w0, wzr, w8, lt
+; CHECK-NEXT: ret
+ %cmp = icmp slt i32 %x1, %x2
+ %add = add i32 %x0, %x1
+ %sub = sub i32 %add, %x2
+ %ret = select i1 %cmp, i32 0, i32 %sub
+ ret i32 %ret
+}
+
+define i32 @test_sle_nonconst_i32(i32 %x0, i32 %x1, i32 %x2) {
+; CHECK-LABEL: test_sle_nonconst_i32:
+; CHECK: // %bb.0:
+; CHECK-NEXT: subs w8, w1, w2
+; CHECK-NEXT: add w8, w8, w0
+; CHECK-NEXT: csel w0, wzr, w8, le
+; CHECK-NEXT: ret
+ %cmp = icmp sle i32 %x1, %x2
+ %add = add i32 %x0, %x1
+ %sub = sub i32 %add, %x2
+ %ret = select i1 %cmp, i32 0, i32 %sub
+ ret i32 %ret
+}
+
+define i32 @test_sgt_nonconst_i32(i32 %x0, i32 %x1, i32 %x2) {
+; CHECK-LABEL: test_sgt_nonconst_i32:
+; CHECK: // %bb.0:
+; CHECK-NEXT: subs w8, w1, w2
+; CHECK-NEXT: add w8, w8, w0
+; CHECK-NEXT: csel w0, wzr, w8, gt
+; CHECK-NEXT: ret
+ %cmp = icmp sgt i32 %x1, %x2
+ %add = add i32 %x0, %x1
+ %sub = sub i32 %add, %x2
+ %ret = select i1 %cmp, i32 0, i32 %sub
+ ret i32 %ret
+}
+
+define i32 @test_sge_nonconst_i32(i32 %x0, i32 %x1, i32 %x2) {
+; CHECK-LABEL: test_sge_nonconst_i32:
+; CHECK: // %bb.0:
+; CHECK-NEXT: subs w8, w1, w2
+; CHECK-NEXT: add w8, w8, w0
+; CHECK-NEXT: csel w0, wzr, w8, ge
+; CHECK-NEXT: ret
+ %cmp = icmp sge i32 %x1, %x2
+ %add = add i32 %x0, %x1
+ %sub = sub i32 %add, %x2
+ %ret = select i1 %cmp, i32 0, i32 %sub
+ ret i32 %ret
+}
+
+define i32 @test_eq_nonconst_sub_add_comm_i32(i32 %x0, i32 %x1, i32 %x2) {
+; CHECK-LABEL: test_eq_nonconst_sub_add_comm_i32:
+; CHECK: // %bb.0:
+; CHECK-NEXT: subs w8, w1, w2
+; CHECK-NEXT: add w8, w8, w0
+; CHECK-NEXT: csel w0, wzr, w8, eq
+; CHECK-NEXT: ret
+ %cmp = icmp eq i32 %x2, %x1
+ %add = add nuw i32 %x0, %x1
+ %sub = sub i32 %add, %x2
+ %ret = select i1 %cmp, i32 0, i32 %sub
+ ret i32 %ret
+}
+
+define i32 @test_ne_nonconst_sub_add_comm_i32(i32 %x0, i32 %x1, i32 %x2) {
+; CHECK-LABEL: test_ne_nonconst_sub_add_comm_i32:
+; CHECK: // %bb.0:
+; CHECK-NEXT: subs w8, w1, w2
+; CHECK-NEXT: add w8, w8, w0
+; CHECK-NEXT: csel w0, wzr, w8, ne
+; CHECK-NEXT: ret
+ %cmp = icmp ne i32 %x2, %x1
+ %add = add nuw i32 %x0, %x1
+ %sub = sub i32 %add, %x2
+ %ret = select i1 %cmp, i32 0, i32 %sub
+ ret i32 %ret
+}
+
+define i32 @test_ult_nonconst_sub_add_comm_i32(i32 %x0, i32 %x1, i32 %x2) {
+; CHECK-LABEL: test_ult_nonconst_sub_add_comm_i32:
+; CHECK: // %bb.0:
+; CHECK-NEXT: subs w8, w1, w2
+; CHECK-NEXT: add w8, w8, w0
+; CHECK-NEXT: csel w0, wzr, w8, hi
+; CHECK-NEXT: ret
+ %cmp = icmp ult i32 %x2, %x1
+ %add = add nuw i32 %x0, %x1
+ %sub = sub i32 %add, %x2
+ %ret = select i1 %cmp, i32 0, i32 %sub
+ ret i32 %ret
+}
+
+define i32 @test_ule_nonconst_sub_add_comm_i32(i32 %x0, i32 %x1, i32 %x2) {
+; CHECK-LABEL: test_ule_nonconst_sub_add_comm_i32:
+; CHECK: // %bb.0:
+; CHECK-NEXT: subs w8, w1, w2
+; CHECK-NEXT: add w8, w8, w0
+; CHECK-NEXT: csel w0, wzr, w8, hs
+; CHECK-NEXT: ret
+ %cmp = icmp ule i32 %x2, %x1
+ %add = add nuw i32 %x0, %x1
+ %sub = sub i32 %add, %x2
+ %ret = select i1 %cmp, i32 0, i32 %sub
+ ret i32 %ret
+}
+
+define i32 @test_ugt_nonconst_sub_add_comm_i32(i32 %x0, i32 %x1, i32 %x2) {
+; CHECK-LABEL: test_ugt_nonconst_sub_add_comm_i32:
+; CHECK: // %bb.0:
+; CHECK-NEXT: subs w8, w1, w2
+; CHECK-NEXT: add w8, w8, w0
+; CHECK-NEXT: csel w0, wzr, w8, lo
+; CHECK-NEXT: ret
+ %cmp = icmp ugt i32 %x2, %x1
+ %add = add nuw i32 %x0, %x1
+ %sub = sub i32 %add, %x2
+ %ret = select i1 %cmp, i32 0, i32 %sub
+ ret i32 %ret
+}
+
+define i32 @test_uge_nonconst_sub_add_comm_i32(i32 %x0, i32 %x1, i32 %x2) {
+; CHECK-LABEL: test_uge_nonconst_sub_add_comm_i32:
+; CHECK: // %bb.0:
+; CHECK-NEXT: subs w8, w1, w2
+; CHECK-NEXT: add w8, w8, w0
+; CHECK-NEXT: csel w0, wzr, w8, ls
+; CHECK-NEXT: ret
+ %cmp = icmp uge i32 %x2, %x1
+ %add = add nuw i32 %x0, %x1
+ %sub = sub i32 %add, %x2
+ %ret = select i1 %cmp, i32 0, i32 %sub
+ ret i32 %ret
+}
+
+define i32 @test_slt_nonconst_sub_add_comm_i32(i32 %x0, i32 %x1, i32 %x2) {
+; CHECK-LABEL: test_slt_nonconst_sub_add_comm_i32:
+; CHECK: // %bb.0:
+; CHECK-NEXT: subs w8, w1, w2
+; CHECK-NEXT: add w8, w8, w0
+; CHECK-NEXT: csel w0, wzr, w8, gt
+; CHECK-NEXT: ret
+ %cmp = icmp slt i32 %x2, %x1
+ %add = add nuw i32 %x0, %x1
+ %sub = sub i32 %add, %x2
+ %ret = select i1 %cmp, i32 0, i32 %sub
+ ret i32 %ret
+}
+
+define i32 @test_sle_nonconst_sub_add_comm_i32(i32 %x0, i32 %x1, i32 %x2) {
+; CHECK-LABEL: test_sle_nonconst_sub_add_comm_i32:
+; CHECK: // %bb.0:
+; CHECK-NEXT: subs w8, w1, w2
+; CHECK-NEXT: add w8, w8, w0
+; CHECK-NEXT: csel w0, wzr, w8, ge
+; CHECK-NEXT: ret
+ %cmp = icmp sle i32 %x2, %x1
+ %add = add nuw i32 %x0, %x1
+ %sub = sub i32 %add, %x2
+ %ret = select i1 %cmp, i32 0, i32 %sub
+ ret i32 %ret
+}
+
+define i32 @test_sgt_nonconst_sub_add_comm_i32(i32 %x0, i32 %x1, i32 %x2) {
+; CHECK-LABEL: test_sgt_nonconst_sub_add_comm_i32:
+; CHECK: // %bb.0:
+; CHECK-NEXT: subs w8, w1, w2
+; CHECK-NEXT: add w8, w8, w0
+; CHECK-NEXT: csel w0, wzr, w8, lt
+; CHECK-NEXT: ret
+ %cmp = icmp sgt i32 %x2, %x1
+ %add = add nuw i32 %x0, %x1
+ %sub = sub i32 %add, %x2
+ %ret = select i1 %cmp, i32 0, i32 %sub
+ ret i32 %ret
+}
+
+define i32 @test_sge_nonconst_sub_add_comm_i32(i32 %x0, i32 %x1, i32 %x2) {
+; CHECK-LABEL: test_sge_nonconst_sub_add_comm_i32:
+; CHECK: // %bb.0:
+; CHECK-NEXT: subs w8, w1, w2
+; CHECK-NEXT: add w8, w8, w0
+; CHECK-NEXT: csel w0, wzr, w8, le
+; CHECK-NEXT: ret
+ %cmp = icmp sge i32 %x2, %x1
+ %add = add nuw i32 %x0, %x1
+ %sub = sub i32 %add, %x2
+ %ret = select i1 %cmp, i32 0, i32 %sub
+ ret i32 %ret
+}
+
; Negative test
define i32 @test_eq0_multi_use_cmp_i32(i32 %x0, i32 %x1) {
; CHECK-LABEL: test_eq0_multi_use_cmp_i32:
@@ -421,22 +701,6 @@ define i32 @test_ugtsmax_sub_add_i32(i32 %x0, i32 %x1) {
ret i32 %ret
}
-; Negative test
-define i32 @test_ult_nonconst_i32(i32 %x0, i32 %x1, i32 %x2) {
-; CHECK-LABEL: test_ult_nonconst_i32:
-; CHECK: // %bb.0:
-; CHECK-NEXT: add w8, w0, w1
-; CHECK-NEXT: cmp w1, w2
-; CHECK-NEXT: sub w8, w8, w2
-; CHECK-NEXT: csel w0, wzr, w8, lo
-; CHECK-NEXT: ret
- %cmp = icmp ult i32 %x1, %x2
- %add = add i32 %x0, %x1
- %sub = sub i32 %add, %x2
- %ret = select i1 %cmp, i32 0, i32 %sub
- ret i32 %ret
-}
-
; Negative test
define i32 @test_eq_const_mismatch_i32(i32 %x0, i32 %x1) {
; CHECK-LABEL: test_eq_const_mismatch_i32:
@@ -771,3 +1035,51 @@ define i32 @test_eq0_bitwidth_mismatch_2(i32 %x0, i64 %x1) {
%ret = select i1 %cmp, i32 0, i32 %sub
ret i32 %ret
}
+
+; Negative test
+define i32 @test_ult_nonconst_op_mismatch_i32(i32 %x0, i32 %x1, i32 %x2) {
+; CHECK-LABEL: test_ult_nonconst_op_mismatch_i32:
+; CHECK: // %bb.0:
+; CHECK-NEXT: add w8, w0, w1
+; CHECK-NEXT: cmp w1, w2
+; CHECK-NEXT: add w8, w8, w2
+; CHECK-NEXT: csel w0, wzr, w8, lo
+; CHECK-NEXT: ret
+ %cmp = icmp ult i32 %x1, %x2
+ %add = add i32 %x0, %x1
+ %sub = add i32 %add, %x2
+ %ret = select i1 %cmp, i32 0, i32 %sub
+ ret i32 %ret
+}
+
+; Negative test
+define i32 @test_ult_nonconst_unrelated_i32(i32 %x0, i32 %x1, i32 %x2, i32 %x3) {
+; CHECK-LABEL: test_ult_nonconst_unrelated_i32:
+; CHECK: // %bb.0:
+; CHECK-NEXT: add w8, w0, w1
+; CHECK-NEXT: cmp w1, w2
+; CHECK-NEXT: sub w8, w8, w3
+; CHECK-NEXT: csel w0, wzr, w8, lo
+; CHECK-NEXT: ret
+ %cmp = icmp ult i32 %x1, %x2
+ %add = add i32 %x0, %x1
+ %sub = sub i32 %add, %x3
+ %ret = select i1 %cmp, i32 0, i32 %sub
+ ret i32 %ret
+}
+
+; Negative test
+define i32 @test_ult_nonconst_unrelated_2_i32(i32 %x0, i32 %x1, i32 %x2, i32 %x3) {
+; CHECK-LABEL: test_ult_nonconst_unrelated_2_i32:
+; CHECK: // %bb.0:
+; CHECK-NEXT: add w8, w0, w1
+; CHECK-NEXT: cmp w2, w1
+; CHECK-NEXT: sub w8, w8, w3
+; CHECK-NEXT: csel w0, wzr, w8, lo
+; CHECK-NEXT: ret
+ %cmp = icmp ult i32 %x2, %x1
+ %add = add i32 %x0, %x1
+ %sub = sub i32 %add, %x3
+ %ret = select i1 %cmp, i32 0, i32 %sub
+ ret i32 %ret
+}
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I've pushed #121412 if you would like to rebase. Thanks.
AddOpConst->getAPIntValue() == -CmpOpConst->getAPIntValue(); | ||
}; | ||
BuildSubsOp = [&] { | ||
return DAG.getConstant(CmpOpConst->getAPIntValue(), SDLoc(CmpOpConst), |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Constants are OK to create if it helps remove the functions pointers, they are simpler than other instructions (and feel like the lesser of two evils) :)
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Good. In this way, we can get rid of at least one std::function
.
786fff8
to
f2222fe
Compare
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Thanks - it might be possible to remove the other lambda too by creating the negative constant, I'm not sure if that is worth it or not.
LGTM. Thanks for the follow-up. Let me know if you are happy for this version to be submitted.
f2222fe
to
211b197
Compare
Commit 1eed469 added logic to reassociate a (add (add x y) -c) operand to a CSEL instruction with a comparison involving x and c (or a similar constant) in order to obtain a common (SUBS x c) instruction. This commit extends this logic to non-constants. In this way, we also reassociate a (sub (add x y) z) operand of a CSEL instruction to (add (sub x z) y) if the CSEL compares x and z, for example. Alive proof: https://alive2.llvm.org/ce/z/SEVpRm
211b197
to
e302dd6
Compare
Good suggestion. I've adjusted the code accordingly to get rid of the remaining lambda.
In case the latest changes are fine, I'd consider this ready. |
Commit 1eed469 added logic to
reassociate a (add (add x y) -c) operand to a CSEL instruction with a
comparison involving x and c (or a similar constant) in order to obtain
a common (SUBS x c) instruction.
This commit extends this logic to non-constants. In this way, we also
reassociate a (sub (add x y) z) operand of a CSEL instruction to
(add (sub x z) y) if the CSEL compares x and z, for example.
Alive proof: https://alive2.llvm.org/ce/z/SEVpR