@@ -28988,7 +28988,7 @@ static SDValue LowerVectorCTLZ(SDValue Op, const SDLoc &DL,
28988
28988
return LowerVectorCTLZ_AVX512CDI(Op, DAG, Subtarget);
28989
28989
28990
28990
// Decompose 256-bit ops into smaller 128-bit ops.
28991
- if (VT.is256BitVector() && !Subtarget.hasInt256())
28991
+ if (VT.is256BitVector() && !Subtarget.hasInt256())
28992
28992
return splitVectorIntUnary(Op, DAG, DL);
28993
28993
28994
28994
// Decompose 512-bit ops into smaller 256-bit ops.
@@ -28998,6 +28998,7 @@ static SDValue LowerVectorCTLZ(SDValue Op, const SDLoc &DL,
28998
28998
assert(Subtarget.hasSSSE3() && "Expected SSSE3 support for PSHUFB");
28999
28999
return LowerVectorCTLZInRegLUT(Op, DL, Subtarget, DAG);
29000
29000
}
29001
+
29001
29002
static SDValue LowerVectorCTLZ_GFNI(SDValue Op, const SDLoc &DL,
29002
29003
SelectionDAG &DAG,
29003
29004
const X86Subtarget &Subtarget) {
@@ -29007,20 +29008,13 @@ static SDValue LowerVectorCTLZ_GFNI(SDValue Op, const SDLoc &DL,
29007
29008
assert(VT.isVector() && VT.getVectorElementType() == MVT::i8 &&
29008
29009
"Expected vXi8 input for GFNI-based CTLZ lowering");
29009
29010
29010
- // Step 1: Bit-reverse input
29011
29011
SDValue Reversed = DAG.getNode(ISD::BITREVERSE, DL, VT, Input);
29012
29012
29013
- // Step 2: Add 0xFF
29014
- SDValue AddVec = DAG.getAllOnesConstant(DL, VT);
29015
- SDValue Summed = DAG.getNode(ISD::ADD, DL, VT, Reversed, AddVec);
29016
-
29017
- // Step 3: Not(Summed)
29018
- SDValue NotSummed = DAG.getNOT(DL, Summed, VT);
29019
-
29020
- // Step 4: AND with Reversed
29021
- SDValue Filtered = DAG.getNode(ISD::AND, DL, VT, NotSummed, Reversed);
29013
+ SDValue Zero = DAG.getConstant(0, DL, MVT::i8);
29014
+ SDValue ZeroVec = DAG.getSplatBuildVector(VT, DL, Zero);
29015
+ SDValue Neg = DAG.getNode(ISD::SUB, DL, VT, ZeroVec, Reversed);
29016
+ SDValue Filtered = DAG.getNode(ISD::AND, DL, VT, Reversed, Neg);
29022
29017
29023
- // Step 5: Apply CTTZ LUT using GF2P8AFFINEQB
29024
29018
MVT VT64 = MVT::getVectorVT(MVT::i64, VT.getSizeInBits() / 64);
29025
29019
SDValue CTTZConst = DAG.getConstant(0xAACCF0FF00000000ULL, DL, VT64);
29026
29020
SDValue CTTZMatrix = DAG.getBitcast(VT, CTTZConst);
0 commit comments