diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp index e4715018d84ca..93bb97933760d 100644 --- a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp +++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp @@ -24898,16 +24898,31 @@ static SDValue reassociateCSELOperandsForCSE(SDNode *N, SelectionDAG &DAG) { SDValue SubsNode = N->getOperand(3); if (SubsNode.getOpcode() != AArch64ISD::SUBS || !SubsNode.hasOneUse()) return SDValue(); - auto *CmpOpConst = dyn_cast(SubsNode.getOperand(1)); - if (!CmpOpConst) - return SDValue(); + SDValue CmpOpToMatch = SubsNode.getOperand(1); SDValue CmpOpOther = SubsNode.getOperand(0); EVT VT = N->getValueType(0); + unsigned ExpectedOpcode; + SDValue ExpectedOp; + SDValue SubsOp; + auto *CmpOpConst = dyn_cast(CmpOpToMatch); + if (CmpOpConst) { + ExpectedOpcode = ISD::ADD; + ExpectedOp = + DAG.getConstant(-CmpOpConst->getAPIntValue(), SDLoc(CmpOpConst), + CmpOpConst->getValueType(0)); + SubsOp = DAG.getConstant(CmpOpConst->getAPIntValue(), SDLoc(CmpOpConst), + CmpOpConst->getValueType(0)); + } else { + ExpectedOpcode = ISD::SUB; + ExpectedOp = CmpOpToMatch; + SubsOp = CmpOpToMatch; + } + // Get the operand that can be reassociated with the SUBS instruction. - auto GetReassociationOp = [&](SDValue Op, APInt ExpectedConst) { - if (Op.getOpcode() != ISD::ADD) + auto GetReassociationOp = [&](SDValue Op, SDValue ExpectedOp) { + if (Op.getOpcode() != ExpectedOpcode) return SDValue(); if (Op.getOperand(0).getOpcode() != ISD::ADD || !Op.getOperand(0).hasOneUse()) @@ -24918,24 +24933,21 @@ static SDValue reassociateCSELOperandsForCSE(SDNode *N, SelectionDAG &DAG) { std::swap(X, Y); if (X != CmpOpOther) return SDValue(); - auto *AddOpConst = dyn_cast(Op.getOperand(1)); - if (!AddOpConst || AddOpConst->getAPIntValue() != ExpectedConst) + if (ExpectedOp != Op.getOperand(1)) return SDValue(); return Y; }; // Try the reassociation using the given constant and condition code. - auto Fold = [&](APInt NewCmpConst, AArch64CC::CondCode NewCC) { - APInt ExpectedConst = -NewCmpConst; - SDValue TReassocOp = GetReassociationOp(N->getOperand(0), ExpectedConst); - SDValue FReassocOp = GetReassociationOp(N->getOperand(1), ExpectedConst); + auto Fold = [&](AArch64CC::CondCode NewCC, SDValue ExpectedOp, + SDValue SubsOp) { + SDValue TReassocOp = GetReassociationOp(N->getOperand(0), ExpectedOp); + SDValue FReassocOp = GetReassociationOp(N->getOperand(1), ExpectedOp); if (!TReassocOp && !FReassocOp) return SDValue(); SDValue NewCmp = DAG.getNode(AArch64ISD::SUBS, SDLoc(SubsNode), - DAG.getVTList(VT, MVT_CC), CmpOpOther, - DAG.getConstant(NewCmpConst, SDLoc(CmpOpConst), - CmpOpConst->getValueType(0))); + DAG.getVTList(VT, MVT_CC), CmpOpOther, SubsOp); auto Reassociate = [&](SDValue ReassocOp, unsigned OpNum) { if (!ReassocOp) @@ -24957,9 +24969,19 @@ static SDValue reassociateCSELOperandsForCSE(SDNode *N, SelectionDAG &DAG) { // First, try to eliminate the compare instruction by searching for a // subtraction with the same constant. - if (SDValue R = Fold(CmpOpConst->getAPIntValue(), CC)) + if (SDValue R = Fold(CC, ExpectedOp, SubsOp)) return R; + if (!CmpOpConst) { + // Try again with the operands of the SUBS instruction and the condition + // swapped. Due to canonicalization, this only helps for non-constant + // operands of the SUBS instruction. + std::swap(CmpOpToMatch, CmpOpOther); + if (SDValue R = Fold(getSwappedCondition(CC), CmpOpToMatch, CmpOpToMatch)) + return R; + return SDValue(); + } + if ((CC == AArch64CC::EQ || CC == AArch64CC::NE) && !CmpOpConst->isZero()) return SDValue(); @@ -24971,7 +24993,11 @@ static SDValue reassociateCSELOperandsForCSE(SDNode *N, SelectionDAG &DAG) { // them here but check for them nevertheless to be on the safe side. auto CheckedFold = [&](bool Check, APInt NewCmpConst, AArch64CC::CondCode NewCC) { - return Check ? Fold(NewCmpConst, NewCC) : SDValue(); + auto ExpectedOp = DAG.getConstant(-NewCmpConst, SDLoc(CmpOpConst), + CmpOpConst->getValueType(0)); + auto SubsOp = DAG.getConstant(NewCmpConst, SDLoc(CmpOpConst), + CmpOpConst->getValueType(0)); + return Check ? Fold(NewCC, ExpectedOp, SubsOp) : SDValue(); }; switch (CC) { case AArch64CC::EQ: diff --git a/llvm/test/CodeGen/AArch64/csel-cmp-cse.ll b/llvm/test/CodeGen/AArch64/csel-cmp-cse.ll index d8904cc6e35e3..e745326323329 100644 --- a/llvm/test/CodeGen/AArch64/csel-cmp-cse.ll +++ b/llvm/test/CodeGen/AArch64/csel-cmp-cse.ll @@ -335,6 +335,300 @@ define i32 @test_eq0_multi_use_sub_i32(i32 %x0, i32 %x1) { ret i32 %ret } +define i32 @test_eq_nonconst_sub_add_i32(i32 %x0, i32 %x1, i32 %x2) { +; CHECK-LABEL: test_eq_nonconst_sub_add_i32: +; CHECK: // %bb.0: +; CHECK-NEXT: subs w8, w1, w2 +; CHECK-NEXT: add w8, w8, w0 +; CHECK-NEXT: csel w0, wzr, w8, eq +; CHECK-NEXT: ret + %cmp = icmp eq i32 %x1, %x2 + %add = add nuw i32 %x0, %x1 + %sub = sub i32 %add, %x2 + %ret = select i1 %cmp, i32 0, i32 %sub + ret i32 %ret +} + +define i32 @test_ne_nonconst_sub_add_i32(i32 %x0, i32 %x1, i32 %x2) { +; CHECK-LABEL: test_ne_nonconst_sub_add_i32: +; CHECK: // %bb.0: +; CHECK-NEXT: subs w8, w1, w2 +; CHECK-NEXT: add w8, w8, w0 +; CHECK-NEXT: csel w0, wzr, w8, ne +; CHECK-NEXT: ret + %cmp = icmp ne i32 %x1, %x2 + %add = add nuw i32 %x0, %x1 + %sub = sub i32 %add, %x2 + %ret = select i1 %cmp, i32 0, i32 %sub + ret i32 %ret +} + +define i32 @test_ult_nonconst_i32(i32 %x0, i32 %x1, i32 %x2) { +; CHECK-LABEL: test_ult_nonconst_i32: +; CHECK: // %bb.0: +; CHECK-NEXT: subs w8, w1, w2 +; CHECK-NEXT: add w8, w8, w0 +; CHECK-NEXT: csel w0, wzr, w8, lo +; CHECK-NEXT: ret + %cmp = icmp ult i32 %x1, %x2 + %add = add i32 %x0, %x1 + %sub = sub i32 %add, %x2 + %ret = select i1 %cmp, i32 0, i32 %sub + ret i32 %ret +} + +define i32 @test_ule_nonconst_i32(i32 %x0, i32 %x1, i32 %x2) { +; CHECK-LABEL: test_ule_nonconst_i32: +; CHECK: // %bb.0: +; CHECK-NEXT: subs w8, w1, w2 +; CHECK-NEXT: add w8, w8, w0 +; CHECK-NEXT: csel w0, wzr, w8, ls +; CHECK-NEXT: ret + %cmp = icmp ule i32 %x1, %x2 + %add = add i32 %x0, %x1 + %sub = sub i32 %add, %x2 + %ret = select i1 %cmp, i32 0, i32 %sub + ret i32 %ret +} + +define i32 @test_ugt_nonconst_i32(i32 %x0, i32 %x1, i32 %x2) { +; CHECK-LABEL: test_ugt_nonconst_i32: +; CHECK: // %bb.0: +; CHECK-NEXT: subs w8, w1, w2 +; CHECK-NEXT: add w8, w8, w0 +; CHECK-NEXT: csel w0, wzr, w8, hi +; CHECK-NEXT: ret + %cmp = icmp ugt i32 %x1, %x2 + %add = add i32 %x0, %x1 + %sub = sub i32 %add, %x2 + %ret = select i1 %cmp, i32 0, i32 %sub + ret i32 %ret +} + +define i32 @test_uge_nonconst_i32(i32 %x0, i32 %x1, i32 %x2) { +; CHECK-LABEL: test_uge_nonconst_i32: +; CHECK: // %bb.0: +; CHECK-NEXT: subs w8, w1, w2 +; CHECK-NEXT: add w8, w8, w0 +; CHECK-NEXT: csel w0, wzr, w8, hs +; CHECK-NEXT: ret + %cmp = icmp uge i32 %x1, %x2 + %add = add i32 %x0, %x1 + %sub = sub i32 %add, %x2 + %ret = select i1 %cmp, i32 0, i32 %sub + ret i32 %ret +} + +define i32 @test_slt_nonconst_i32(i32 %x0, i32 %x1, i32 %x2) { +; CHECK-LABEL: test_slt_nonconst_i32: +; CHECK: // %bb.0: +; CHECK-NEXT: subs w8, w1, w2 +; CHECK-NEXT: add w8, w8, w0 +; CHECK-NEXT: csel w0, wzr, w8, lt +; CHECK-NEXT: ret + %cmp = icmp slt i32 %x1, %x2 + %add = add i32 %x0, %x1 + %sub = sub i32 %add, %x2 + %ret = select i1 %cmp, i32 0, i32 %sub + ret i32 %ret +} + +define i32 @test_sle_nonconst_i32(i32 %x0, i32 %x1, i32 %x2) { +; CHECK-LABEL: test_sle_nonconst_i32: +; CHECK: // %bb.0: +; CHECK-NEXT: subs w8, w1, w2 +; CHECK-NEXT: add w8, w8, w0 +; CHECK-NEXT: csel w0, wzr, w8, le +; CHECK-NEXT: ret + %cmp = icmp sle i32 %x1, %x2 + %add = add i32 %x0, %x1 + %sub = sub i32 %add, %x2 + %ret = select i1 %cmp, i32 0, i32 %sub + ret i32 %ret +} + +define i32 @test_sgt_nonconst_i32(i32 %x0, i32 %x1, i32 %x2) { +; CHECK-LABEL: test_sgt_nonconst_i32: +; CHECK: // %bb.0: +; CHECK-NEXT: subs w8, w1, w2 +; CHECK-NEXT: add w8, w8, w0 +; CHECK-NEXT: csel w0, wzr, w8, gt +; CHECK-NEXT: ret + %cmp = icmp sgt i32 %x1, %x2 + %add = add i32 %x0, %x1 + %sub = sub i32 %add, %x2 + %ret = select i1 %cmp, i32 0, i32 %sub + ret i32 %ret +} + +define i32 @test_sge_nonconst_i32(i32 %x0, i32 %x1, i32 %x2) { +; CHECK-LABEL: test_sge_nonconst_i32: +; CHECK: // %bb.0: +; CHECK-NEXT: subs w8, w1, w2 +; CHECK-NEXT: add w8, w8, w0 +; CHECK-NEXT: csel w0, wzr, w8, ge +; CHECK-NEXT: ret + %cmp = icmp sge i32 %x1, %x2 + %add = add i32 %x0, %x1 + %sub = sub i32 %add, %x2 + %ret = select i1 %cmp, i32 0, i32 %sub + ret i32 %ret +} + +define i64 @test_ult_nonconst_i64(i64 %x0, i64 %x1, i64 %x2) { +; CHECK-LABEL: test_ult_nonconst_i64: +; CHECK: // %bb.0: +; CHECK-NEXT: subs x8, x1, x2 +; CHECK-NEXT: add x8, x8, x0 +; CHECK-NEXT: csel x0, xzr, x8, lo +; CHECK-NEXT: ret + %cmp = icmp ult i64 %x1, %x2 + %add = add i64 %x0, %x1 + %sub = sub i64 %add, %x2 + %ret = select i1 %cmp, i64 0, i64 %sub + ret i64 %ret +} + +define i32 @test_eq_nonconst_sub_add_comm_i32(i32 %x0, i32 %x1, i32 %x2) { +; CHECK-LABEL: test_eq_nonconst_sub_add_comm_i32: +; CHECK: // %bb.0: +; CHECK-NEXT: subs w8, w1, w2 +; CHECK-NEXT: add w8, w8, w0 +; CHECK-NEXT: csel w0, wzr, w8, eq +; CHECK-NEXT: ret + %cmp = icmp eq i32 %x2, %x1 + %add = add nuw i32 %x0, %x1 + %sub = sub i32 %add, %x2 + %ret = select i1 %cmp, i32 0, i32 %sub + ret i32 %ret +} + +define i32 @test_ne_nonconst_sub_add_comm_i32(i32 %x0, i32 %x1, i32 %x2) { +; CHECK-LABEL: test_ne_nonconst_sub_add_comm_i32: +; CHECK: // %bb.0: +; CHECK-NEXT: subs w8, w1, w2 +; CHECK-NEXT: add w8, w8, w0 +; CHECK-NEXT: csel w0, wzr, w8, ne +; CHECK-NEXT: ret + %cmp = icmp ne i32 %x2, %x1 + %add = add nuw i32 %x0, %x1 + %sub = sub i32 %add, %x2 + %ret = select i1 %cmp, i32 0, i32 %sub + ret i32 %ret +} + +define i32 @test_ult_nonconst_sub_add_comm_i32(i32 %x0, i32 %x1, i32 %x2) { +; CHECK-LABEL: test_ult_nonconst_sub_add_comm_i32: +; CHECK: // %bb.0: +; CHECK-NEXT: subs w8, w1, w2 +; CHECK-NEXT: add w8, w8, w0 +; CHECK-NEXT: csel w0, wzr, w8, hi +; CHECK-NEXT: ret + %cmp = icmp ult i32 %x2, %x1 + %add = add nuw i32 %x0, %x1 + %sub = sub i32 %add, %x2 + %ret = select i1 %cmp, i32 0, i32 %sub + ret i32 %ret +} + +define i32 @test_ule_nonconst_sub_add_comm_i32(i32 %x0, i32 %x1, i32 %x2) { +; CHECK-LABEL: test_ule_nonconst_sub_add_comm_i32: +; CHECK: // %bb.0: +; CHECK-NEXT: subs w8, w1, w2 +; CHECK-NEXT: add w8, w8, w0 +; CHECK-NEXT: csel w0, wzr, w8, hs +; CHECK-NEXT: ret + %cmp = icmp ule i32 %x2, %x1 + %add = add nuw i32 %x0, %x1 + %sub = sub i32 %add, %x2 + %ret = select i1 %cmp, i32 0, i32 %sub + ret i32 %ret +} + +define i32 @test_ugt_nonconst_sub_add_comm_i32(i32 %x0, i32 %x1, i32 %x2) { +; CHECK-LABEL: test_ugt_nonconst_sub_add_comm_i32: +; CHECK: // %bb.0: +; CHECK-NEXT: subs w8, w1, w2 +; CHECK-NEXT: add w8, w8, w0 +; CHECK-NEXT: csel w0, wzr, w8, lo +; CHECK-NEXT: ret + %cmp = icmp ugt i32 %x2, %x1 + %add = add nuw i32 %x0, %x1 + %sub = sub i32 %add, %x2 + %ret = select i1 %cmp, i32 0, i32 %sub + ret i32 %ret +} + +define i32 @test_uge_nonconst_sub_add_comm_i32(i32 %x0, i32 %x1, i32 %x2) { +; CHECK-LABEL: test_uge_nonconst_sub_add_comm_i32: +; CHECK: // %bb.0: +; CHECK-NEXT: subs w8, w1, w2 +; CHECK-NEXT: add w8, w8, w0 +; CHECK-NEXT: csel w0, wzr, w8, ls +; CHECK-NEXT: ret + %cmp = icmp uge i32 %x2, %x1 + %add = add nuw i32 %x0, %x1 + %sub = sub i32 %add, %x2 + %ret = select i1 %cmp, i32 0, i32 %sub + ret i32 %ret +} + +define i32 @test_slt_nonconst_sub_add_comm_i32(i32 %x0, i32 %x1, i32 %x2) { +; CHECK-LABEL: test_slt_nonconst_sub_add_comm_i32: +; CHECK: // %bb.0: +; CHECK-NEXT: subs w8, w1, w2 +; CHECK-NEXT: add w8, w8, w0 +; CHECK-NEXT: csel w0, wzr, w8, gt +; CHECK-NEXT: ret + %cmp = icmp slt i32 %x2, %x1 + %add = add nuw i32 %x0, %x1 + %sub = sub i32 %add, %x2 + %ret = select i1 %cmp, i32 0, i32 %sub + ret i32 %ret +} + +define i32 @test_sle_nonconst_sub_add_comm_i32(i32 %x0, i32 %x1, i32 %x2) { +; CHECK-LABEL: test_sle_nonconst_sub_add_comm_i32: +; CHECK: // %bb.0: +; CHECK-NEXT: subs w8, w1, w2 +; CHECK-NEXT: add w8, w8, w0 +; CHECK-NEXT: csel w0, wzr, w8, ge +; CHECK-NEXT: ret + %cmp = icmp sle i32 %x2, %x1 + %add = add nuw i32 %x0, %x1 + %sub = sub i32 %add, %x2 + %ret = select i1 %cmp, i32 0, i32 %sub + ret i32 %ret +} + +define i32 @test_sgt_nonconst_sub_add_comm_i32(i32 %x0, i32 %x1, i32 %x2) { +; CHECK-LABEL: test_sgt_nonconst_sub_add_comm_i32: +; CHECK: // %bb.0: +; CHECK-NEXT: subs w8, w1, w2 +; CHECK-NEXT: add w8, w8, w0 +; CHECK-NEXT: csel w0, wzr, w8, lt +; CHECK-NEXT: ret + %cmp = icmp sgt i32 %x2, %x1 + %add = add nuw i32 %x0, %x1 + %sub = sub i32 %add, %x2 + %ret = select i1 %cmp, i32 0, i32 %sub + ret i32 %ret +} + +define i32 @test_sge_nonconst_sub_add_comm_i32(i32 %x0, i32 %x1, i32 %x2) { +; CHECK-LABEL: test_sge_nonconst_sub_add_comm_i32: +; CHECK: // %bb.0: +; CHECK-NEXT: subs w8, w1, w2 +; CHECK-NEXT: add w8, w8, w0 +; CHECK-NEXT: csel w0, wzr, w8, le +; CHECK-NEXT: ret + %cmp = icmp sge i32 %x2, %x1 + %add = add nuw i32 %x0, %x1 + %sub = sub i32 %add, %x2 + %ret = select i1 %cmp, i32 0, i32 %sub + ret i32 %ret +} + ; Negative test define i32 @test_eq0_multi_use_cmp_i32(i32 %x0, i32 %x1) { ; CHECK-LABEL: test_eq0_multi_use_cmp_i32: @@ -421,22 +715,6 @@ define i32 @test_ugtsmax_sub_add_i32(i32 %x0, i32 %x1) { ret i32 %ret } -; Negative test -define i32 @test_ult_nonconst_i32(i32 %x0, i32 %x1, i32 %x2) { -; CHECK-LABEL: test_ult_nonconst_i32: -; CHECK: // %bb.0: -; CHECK-NEXT: add w8, w0, w1 -; CHECK-NEXT: cmp w1, w2 -; CHECK-NEXT: sub w8, w8, w2 -; CHECK-NEXT: csel w0, wzr, w8, lo -; CHECK-NEXT: ret - %cmp = icmp ult i32 %x1, %x2 - %add = add i32 %x0, %x1 - %sub = sub i32 %add, %x2 - %ret = select i1 %cmp, i32 0, i32 %sub - ret i32 %ret -} - ; Negative test define i32 @test_eq_const_mismatch_i32(i32 %x0, i32 %x1) { ; CHECK-LABEL: test_eq_const_mismatch_i32: @@ -629,6 +907,40 @@ define i16 @test_eq0_sub_add_i16(i16 %x0, i16 %x1) { ret i16 %ret } +; Negative test +define i8 @test_eq_nonconst_sub_add_i8(i8 %x0, i8 %x1, i8 %x2) { +; CHECK-LABEL: test_eq_nonconst_sub_add_i8: +; CHECK: // %bb.0: +; CHECK-NEXT: and w8, w1, #0xff +; CHECK-NEXT: add w9, w0, w1 +; CHECK-NEXT: sub w9, w9, w2 +; CHECK-NEXT: cmp w8, w2, uxtb +; CHECK-NEXT: csel w0, wzr, w9, eq +; CHECK-NEXT: ret + %cmp = icmp eq i8 %x1, %x2 + %add = add nuw i8 %x0, %x1 + %sub = sub i8 %add, %x2 + %ret = select i1 %cmp, i8 0, i8 %sub + ret i8 %ret +} + +; Negative test +define i16 @test_eq_nonconst_sub_add_i16(i16 %x0, i16 %x1, i16 %x2) { +; CHECK-LABEL: test_eq_nonconst_sub_add_i16: +; CHECK: // %bb.0: +; CHECK-NEXT: and w8, w1, #0xffff +; CHECK-NEXT: add w9, w0, w1 +; CHECK-NEXT: sub w9, w9, w2 +; CHECK-NEXT: cmp w8, w2, uxth +; CHECK-NEXT: csel w0, wzr, w9, eq +; CHECK-NEXT: ret + %cmp = icmp eq i16 %x1, %x2 + %add = add nuw i16 %x0, %x1 + %sub = sub i16 %add, %x2 + %ret = select i1 %cmp, i16 0, i16 %sub + ret i16 %ret +} + ; Negative test define i32 @test_ule_unsigned_overflow(i32 %x0, i32 %x1) { ; CHECK-LABEL: test_ule_unsigned_overflow: @@ -771,3 +1083,51 @@ define i32 @test_eq0_bitwidth_mismatch_2(i32 %x0, i64 %x1) { %ret = select i1 %cmp, i32 0, i32 %sub ret i32 %ret } + +; Negative test +define i32 @test_ult_nonconst_op_mismatch_i32(i32 %x0, i32 %x1, i32 %x2) { +; CHECK-LABEL: test_ult_nonconst_op_mismatch_i32: +; CHECK: // %bb.0: +; CHECK-NEXT: add w8, w0, w1 +; CHECK-NEXT: cmp w1, w2 +; CHECK-NEXT: add w8, w8, w2 +; CHECK-NEXT: csel w0, wzr, w8, lo +; CHECK-NEXT: ret + %cmp = icmp ult i32 %x1, %x2 + %add = add i32 %x0, %x1 + %sub = add i32 %add, %x2 + %ret = select i1 %cmp, i32 0, i32 %sub + ret i32 %ret +} + +; Negative test +define i32 @test_ult_nonconst_unrelated_i32(i32 %x0, i32 %x1, i32 %x2, i32 %x3) { +; CHECK-LABEL: test_ult_nonconst_unrelated_i32: +; CHECK: // %bb.0: +; CHECK-NEXT: add w8, w0, w1 +; CHECK-NEXT: cmp w1, w2 +; CHECK-NEXT: sub w8, w8, w3 +; CHECK-NEXT: csel w0, wzr, w8, lo +; CHECK-NEXT: ret + %cmp = icmp ult i32 %x1, %x2 + %add = add i32 %x0, %x1 + %sub = sub i32 %add, %x3 + %ret = select i1 %cmp, i32 0, i32 %sub + ret i32 %ret +} + +; Negative test +define i32 @test_ult_nonconst_unrelated_2_i32(i32 %x0, i32 %x1, i32 %x2, i32 %x3) { +; CHECK-LABEL: test_ult_nonconst_unrelated_2_i32: +; CHECK: // %bb.0: +; CHECK-NEXT: add w8, w0, w1 +; CHECK-NEXT: cmp w2, w1 +; CHECK-NEXT: sub w8, w8, w3 +; CHECK-NEXT: csel w0, wzr, w8, lo +; CHECK-NEXT: ret + %cmp = icmp ult i32 %x2, %x1 + %add = add i32 %x0, %x1 + %sub = sub i32 %add, %x3 + %ret = select i1 %cmp, i32 0, i32 %sub + ret i32 %ret +}