Skip to content

Commit 51aa741

Browse files
[X86][GFNI] Fix style and logic for CTLZ vXi8 lowering
1 parent e789fab commit 51aa741

File tree

2 files changed

+152
-172
lines changed

2 files changed

+152
-172
lines changed

llvm/lib/Target/X86/X86ISelLowering.cpp

Lines changed: 6 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -28988,7 +28988,7 @@ static SDValue LowerVectorCTLZ(SDValue Op, const SDLoc &DL,
2898828988
return LowerVectorCTLZ_AVX512CDI(Op, DAG, Subtarget);
2898928989

2899028990
// Decompose 256-bit ops into smaller 128-bit ops.
28991-
if (VT.is256BitVector() && !Subtarget.hasInt256())
28991+
if (VT.is256BitVector() && !Subtarget.hasInt256())
2899228992
return splitVectorIntUnary(Op, DAG, DL);
2899328993

2899428994
// Decompose 512-bit ops into smaller 256-bit ops.
@@ -28998,6 +28998,7 @@ static SDValue LowerVectorCTLZ(SDValue Op, const SDLoc &DL,
2899828998
assert(Subtarget.hasSSSE3() && "Expected SSSE3 support for PSHUFB");
2899928999
return LowerVectorCTLZInRegLUT(Op, DL, Subtarget, DAG);
2900029000
}
29001+
2900129002
static SDValue LowerVectorCTLZ_GFNI(SDValue Op, const SDLoc &DL,
2900229003
SelectionDAG &DAG,
2900329004
const X86Subtarget &Subtarget) {
@@ -29007,20 +29008,13 @@ static SDValue LowerVectorCTLZ_GFNI(SDValue Op, const SDLoc &DL,
2900729008
assert(VT.isVector() && VT.getVectorElementType() == MVT::i8 &&
2900829009
"Expected vXi8 input for GFNI-based CTLZ lowering");
2900929010

29010-
// Step 1: Bit-reverse input
2901129011
SDValue Reversed = DAG.getNode(ISD::BITREVERSE, DL, VT, Input);
2901229012

29013-
// Step 2: Add 0xFF
29014-
SDValue AddVec = DAG.getAllOnesConstant(DL, VT);
29015-
SDValue Summed = DAG.getNode(ISD::ADD, DL, VT, Reversed, AddVec);
29016-
29017-
// Step 3: Not(Summed)
29018-
SDValue NotSummed = DAG.getNOT(DL, Summed, VT);
29019-
29020-
// Step 4: AND with Reversed
29021-
SDValue Filtered = DAG.getNode(ISD::AND, DL, VT, NotSummed, Reversed);
29013+
SDValue Zero = DAG.getConstant(0, DL, MVT::i8);
29014+
SDValue ZeroVec = DAG.getSplatBuildVector(VT, DL, Zero);
29015+
SDValue Neg = DAG.getNode(ISD::SUB, DL, VT, ZeroVec, Reversed);
29016+
SDValue Filtered = DAG.getNode(ISD::AND, DL, VT, Reversed, Neg);
2902229017

29023-
// Step 5: Apply CTTZ LUT using GF2P8AFFINEQB
2902429018
MVT VT64 = MVT::getVectorVT(MVT::i64, VT.getSizeInBits() / 64);
2902529019
SDValue CTTZConst = DAG.getConstant(0xAACCF0FF00000000ULL, DL, VT64);
2902629020
SDValue CTTZMatrix = DAG.getBitcast(VT, CTTZConst);

0 commit comments

Comments
 (0)