Skip to content

Commit 949bbdc

Browse files
authored
[InstCombine] Fold Minimum over Trailing/Leading Bits Counts (#90402)
The new transformation folds `umin(cttz(x), c)` to `cttz(x | (1 << c))` and `umin(ctlz(x), c)` to `ctlz(x | ((1 << (bitwidth - 1)) >> c))`. The transformation is only implemented for constant `c` to not increase the number of instructions. The idea of the transformation is to set the c-th lowest (for `cttz`) or highest (for `ctlz`) bit in the operand. In this way, the `cttz` or `ctlz` instruction always returns at most `c`. Alive2 proofs: https://alive2.llvm.org/ce/z/y8Hdb8 Fixes #90000
1 parent b22adf0 commit 949bbdc

File tree

2 files changed

+429
-0
lines changed

2 files changed

+429
-0
lines changed

llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp

Lines changed: 47 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1456,6 +1456,43 @@ static Value *simplifyReductionOperand(Value *Arg, bool CanReorderLanes) {
14561456
return UsedIndices.all() ? V : nullptr;
14571457
}
14581458

1459+
/// Fold an unsigned minimum of trailing or leading zero bits counts:
1460+
/// umin(cttz(CtOp, ZeroUndef), ConstOp) --> cttz(CtOp | (1 << ConstOp))
1461+
/// umin(ctlz(CtOp, ZeroUndef), ConstOp) --> ctlz(CtOp | (SignedMin
1462+
/// >> ConstOp))
1463+
template <Intrinsic::ID IntrID>
1464+
static Value *
1465+
foldMinimumOverTrailingOrLeadingZeroCount(Value *I0, Value *I1,
1466+
const DataLayout &DL,
1467+
InstCombiner::BuilderTy &Builder) {
1468+
static_assert(IntrID == Intrinsic::cttz || IntrID == Intrinsic::ctlz,
1469+
"This helper only supports cttz and ctlz intrinsics");
1470+
1471+
Value *CtOp;
1472+
Value *ZeroUndef;
1473+
if (!match(I0,
1474+
m_OneUse(m_Intrinsic<IntrID>(m_Value(CtOp), m_Value(ZeroUndef)))))
1475+
return nullptr;
1476+
1477+
unsigned BitWidth = I1->getType()->getScalarSizeInBits();
1478+
auto LessBitWidth = [BitWidth](auto &C) { return C.ult(BitWidth); };
1479+
if (!match(I1, m_CheckedInt(LessBitWidth)))
1480+
// We have a constant >= BitWidth (which can be handled by CVP)
1481+
// or a non-splat vector with elements < and >= BitWidth
1482+
return nullptr;
1483+
1484+
Type *Ty = I1->getType();
1485+
Constant *NewConst = ConstantFoldBinaryOpOperands(
1486+
IntrID == Intrinsic::cttz ? Instruction::Shl : Instruction::LShr,
1487+
IntrID == Intrinsic::cttz
1488+
? ConstantInt::get(Ty, 1)
1489+
: ConstantInt::get(Ty, APInt::getSignedMinValue(BitWidth)),
1490+
cast<Constant>(I1), DL);
1491+
return Builder.CreateBinaryIntrinsic(
1492+
IntrID, Builder.CreateOr(CtOp, NewConst),
1493+
ConstantInt::getTrue(ZeroUndef->getType()));
1494+
}
1495+
14591496
/// CallInst simplification. This mostly only handles folding of intrinsic
14601497
/// instructions. For normal calls, it allows visitCallBase to do the heavy
14611498
/// lifting.
@@ -1661,6 +1698,16 @@ Instruction *InstCombinerImpl::visitCallInst(CallInst &CI) {
16611698
Value *Cmp = Builder.CreateICmpNE(I0, Zero);
16621699
return CastInst::Create(Instruction::ZExt, Cmp, II->getType());
16631700
}
1701+
// umin(cttz(x), const) --> cttz(x | (1 << const))
1702+
if (Value *FoldedCttz =
1703+
foldMinimumOverTrailingOrLeadingZeroCount<Intrinsic::cttz>(
1704+
I0, I1, DL, Builder))
1705+
return replaceInstUsesWith(*II, FoldedCttz);
1706+
// umin(ctlz(x), const) --> ctlz(x | (SignedMin >> const))
1707+
if (Value *FoldedCtlz =
1708+
foldMinimumOverTrailingOrLeadingZeroCount<Intrinsic::ctlz>(
1709+
I0, I1, DL, Builder))
1710+
return replaceInstUsesWith(*II, FoldedCtlz);
16641711
[[fallthrough]];
16651712
}
16661713
case Intrinsic::umax: {

0 commit comments

Comments
 (0)