-
Notifications
You must be signed in to change notification settings - Fork 13.6k
[X86] Use GFNI for LZCNT vXi8 ops #141888
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: main
Are you sure you want to change the base?
Conversation
@llvm/pr-subscribers-backend-x86 Author: 黃國庭 (houngkoungting) ChangesThis PULL REQUEST implements vXi8 ctlz lowering for X86 using GFNI instructions as suggested in #140729 Patch is 514.47 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/141888.diff 3 Files Affected:
diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp
index 99a82cab384aa..7918a8e72adf6 100644
--- a/llvm/lib/Target/X86/X86ISelLowering.cpp
+++ b/llvm/lib/Target/X86/X86ISelLowering.cpp
@@ -170,14 +170,14 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM,
if (Subtarget.isTargetWindowsMSVC() || Subtarget.isTargetWindowsItanium()) {
static const struct {
const RTLIB::Libcall Op;
- const char * const Name;
+ const char *const Name;
const CallingConv::ID CC;
} LibraryCalls[] = {
- { RTLIB::SDIV_I64, "_alldiv", CallingConv::X86_StdCall },
- { RTLIB::UDIV_I64, "_aulldiv", CallingConv::X86_StdCall },
- { RTLIB::SREM_I64, "_allrem", CallingConv::X86_StdCall },
- { RTLIB::UREM_I64, "_aullrem", CallingConv::X86_StdCall },
- { RTLIB::MUL_I64, "_allmul", CallingConv::X86_StdCall },
+ {RTLIB::SDIV_I64, "_alldiv", CallingConv::X86_StdCall},
+ {RTLIB::UDIV_I64, "_aulldiv", CallingConv::X86_StdCall},
+ {RTLIB::SREM_I64, "_allrem", CallingConv::X86_StdCall},
+ {RTLIB::UREM_I64, "_aullrem", CallingConv::X86_StdCall},
+ {RTLIB::MUL_I64, "_allmul", CallingConv::X86_StdCall},
};
for (const auto &LC : LibraryCalls) {
@@ -210,10 +210,10 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM,
// We don't accept any truncstore of integer registers.
setTruncStoreAction(MVT::i64, MVT::i32, Expand);
setTruncStoreAction(MVT::i64, MVT::i16, Expand);
- setTruncStoreAction(MVT::i64, MVT::i8 , Expand);
+ setTruncStoreAction(MVT::i64, MVT::i8, Expand);
setTruncStoreAction(MVT::i32, MVT::i16, Expand);
- setTruncStoreAction(MVT::i32, MVT::i8 , Expand);
- setTruncStoreAction(MVT::i16, MVT::i8, Expand);
+ setTruncStoreAction(MVT::i32, MVT::i8, Expand);
+ setTruncStoreAction(MVT::i16, MVT::i8, Expand);
setTruncStoreAction(MVT::f64, MVT::f32, Expand);
@@ -225,106 +225,106 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM,
// Integer absolute.
if (Subtarget.canUseCMOV()) {
- setOperationAction(ISD::ABS , MVT::i16 , Custom);
- setOperationAction(ISD::ABS , MVT::i32 , Custom);
+ setOperationAction(ISD::ABS, MVT::i16, Custom);
+ setOperationAction(ISD::ABS, MVT::i32, Custom);
if (Subtarget.is64Bit())
- setOperationAction(ISD::ABS , MVT::i64 , Custom);
+ setOperationAction(ISD::ABS, MVT::i64, Custom);
}
// Absolute difference.
for (auto Op : {ISD::ABDS, ISD::ABDU}) {
- setOperationAction(Op , MVT::i8 , Custom);
- setOperationAction(Op , MVT::i16 , Custom);
- setOperationAction(Op , MVT::i32 , Custom);
+ setOperationAction(Op, MVT::i8, Custom);
+ setOperationAction(Op, MVT::i16, Custom);
+ setOperationAction(Op, MVT::i32, Custom);
if (Subtarget.is64Bit())
- setOperationAction(Op , MVT::i64 , Custom);
+ setOperationAction(Op, MVT::i64, Custom);
}
// Signed saturation subtraction.
- setOperationAction(ISD::SSUBSAT , MVT::i8 , Custom);
- setOperationAction(ISD::SSUBSAT , MVT::i16 , Custom);
- setOperationAction(ISD::SSUBSAT , MVT::i32 , Custom);
+ setOperationAction(ISD::SSUBSAT, MVT::i8, Custom);
+ setOperationAction(ISD::SSUBSAT, MVT::i16, Custom);
+ setOperationAction(ISD::SSUBSAT, MVT::i32, Custom);
if (Subtarget.is64Bit())
- setOperationAction(ISD::SSUBSAT , MVT::i64 , Custom);
+ setOperationAction(ISD::SSUBSAT, MVT::i64, Custom);
// Funnel shifts.
for (auto ShiftOp : {ISD::FSHL, ISD::FSHR}) {
// For slow shld targets we only lower for code size.
LegalizeAction ShiftDoubleAction = Subtarget.isSHLDSlow() ? Custom : Legal;
- setOperationAction(ShiftOp , MVT::i8 , Custom);
- setOperationAction(ShiftOp , MVT::i16 , Custom);
- setOperationAction(ShiftOp , MVT::i32 , ShiftDoubleAction);
+ setOperationAction(ShiftOp, MVT::i8, Custom);
+ setOperationAction(ShiftOp, MVT::i16, Custom);
+ setOperationAction(ShiftOp, MVT::i32, ShiftDoubleAction);
if (Subtarget.is64Bit())
- setOperationAction(ShiftOp , MVT::i64 , ShiftDoubleAction);
+ setOperationAction(ShiftOp, MVT::i64, ShiftDoubleAction);
}
if (!Subtarget.useSoftFloat()) {
// Promote all UINT_TO_FP to larger SINT_TO_FP's, as X86 doesn't have this
// operation.
- setOperationAction(ISD::UINT_TO_FP, MVT::i8, Promote);
+ setOperationAction(ISD::UINT_TO_FP, MVT::i8, Promote);
setOperationAction(ISD::STRICT_UINT_TO_FP, MVT::i8, Promote);
- setOperationAction(ISD::UINT_TO_FP, MVT::i16, Promote);
+ setOperationAction(ISD::UINT_TO_FP, MVT::i16, Promote);
setOperationAction(ISD::STRICT_UINT_TO_FP, MVT::i16, Promote);
// We have an algorithm for SSE2, and we turn this into a 64-bit
// FILD or VCVTUSI2SS/SD for other targets.
- setOperationAction(ISD::UINT_TO_FP, MVT::i32, Custom);
+ setOperationAction(ISD::UINT_TO_FP, MVT::i32, Custom);
setOperationAction(ISD::STRICT_UINT_TO_FP, MVT::i32, Custom);
// We have an algorithm for SSE2->double, and we turn this into a
// 64-bit FILD followed by conditional FADD for other targets.
- setOperationAction(ISD::UINT_TO_FP, MVT::i64, Custom);
+ setOperationAction(ISD::UINT_TO_FP, MVT::i64, Custom);
setOperationAction(ISD::STRICT_UINT_TO_FP, MVT::i64, Custom);
// Promote i8 SINT_TO_FP to larger SINT_TO_FP's, as X86 doesn't have
// this operation.
- setOperationAction(ISD::SINT_TO_FP, MVT::i8, Promote);
+ setOperationAction(ISD::SINT_TO_FP, MVT::i8, Promote);
setOperationAction(ISD::STRICT_SINT_TO_FP, MVT::i8, Promote);
// SSE has no i16 to fp conversion, only i32. We promote in the handler
// to allow f80 to use i16 and f64 to use i16 with sse1 only
- setOperationAction(ISD::SINT_TO_FP, MVT::i16, Custom);
+ setOperationAction(ISD::SINT_TO_FP, MVT::i16, Custom);
setOperationAction(ISD::STRICT_SINT_TO_FP, MVT::i16, Custom);
// f32 and f64 cases are Legal with SSE1/SSE2, f80 case is not
- setOperationAction(ISD::SINT_TO_FP, MVT::i32, Custom);
+ setOperationAction(ISD::SINT_TO_FP, MVT::i32, Custom);
setOperationAction(ISD::STRICT_SINT_TO_FP, MVT::i32, Custom);
// In 32-bit mode these are custom lowered. In 64-bit mode F32 and F64
// are Legal, f80 is custom lowered.
- setOperationAction(ISD::SINT_TO_FP, MVT::i64, Custom);
+ setOperationAction(ISD::SINT_TO_FP, MVT::i64, Custom);
setOperationAction(ISD::STRICT_SINT_TO_FP, MVT::i64, Custom);
// Promote i8 FP_TO_SINT to larger FP_TO_SINTS's, as X86 doesn't have
// this operation.
- setOperationAction(ISD::FP_TO_SINT, MVT::i8, Promote);
+ setOperationAction(ISD::FP_TO_SINT, MVT::i8, Promote);
// FIXME: This doesn't generate invalid exception when it should. PR44019.
- setOperationAction(ISD::STRICT_FP_TO_SINT, MVT::i8, Promote);
- setOperationAction(ISD::FP_TO_SINT, MVT::i16, Custom);
+ setOperationAction(ISD::STRICT_FP_TO_SINT, MVT::i8, Promote);
+ setOperationAction(ISD::FP_TO_SINT, MVT::i16, Custom);
setOperationAction(ISD::STRICT_FP_TO_SINT, MVT::i16, Custom);
- setOperationAction(ISD::FP_TO_SINT, MVT::i32, Custom);
+ setOperationAction(ISD::FP_TO_SINT, MVT::i32, Custom);
setOperationAction(ISD::STRICT_FP_TO_SINT, MVT::i32, Custom);
// In 32-bit mode these are custom lowered. In 64-bit mode F32 and F64
// are Legal, f80 is custom lowered.
- setOperationAction(ISD::FP_TO_SINT, MVT::i64, Custom);
+ setOperationAction(ISD::FP_TO_SINT, MVT::i64, Custom);
setOperationAction(ISD::STRICT_FP_TO_SINT, MVT::i64, Custom);
// Handle FP_TO_UINT by promoting the destination to a larger signed
// conversion.
- setOperationAction(ISD::FP_TO_UINT, MVT::i8, Promote);
+ setOperationAction(ISD::FP_TO_UINT, MVT::i8, Promote);
// FIXME: This doesn't generate invalid exception when it should. PR44019.
- setOperationAction(ISD::STRICT_FP_TO_UINT, MVT::i8, Promote);
- setOperationAction(ISD::FP_TO_UINT, MVT::i16, Promote);
+ setOperationAction(ISD::STRICT_FP_TO_UINT, MVT::i8, Promote);
+ setOperationAction(ISD::FP_TO_UINT, MVT::i16, Promote);
// FIXME: This doesn't generate invalid exception when it should. PR44019.
setOperationAction(ISD::STRICT_FP_TO_UINT, MVT::i16, Promote);
- setOperationAction(ISD::FP_TO_UINT, MVT::i32, Custom);
+ setOperationAction(ISD::FP_TO_UINT, MVT::i32, Custom);
setOperationAction(ISD::STRICT_FP_TO_UINT, MVT::i32, Custom);
- setOperationAction(ISD::FP_TO_UINT, MVT::i64, Custom);
+ setOperationAction(ISD::FP_TO_UINT, MVT::i64, Custom);
setOperationAction(ISD::STRICT_FP_TO_UINT, MVT::i64, Custom);
- setOperationAction(ISD::LRINT, MVT::f32, Custom);
- setOperationAction(ISD::LRINT, MVT::f64, Custom);
- setOperationAction(ISD::LLRINT, MVT::f32, Custom);
- setOperationAction(ISD::LLRINT, MVT::f64, Custom);
+ setOperationAction(ISD::LRINT, MVT::f32, Custom);
+ setOperationAction(ISD::LRINT, MVT::f64, Custom);
+ setOperationAction(ISD::LLRINT, MVT::f32, Custom);
+ setOperationAction(ISD::LLRINT, MVT::f64, Custom);
if (!Subtarget.is64Bit()) {
- setOperationAction(ISD::LRINT, MVT::i64, Custom);
+ setOperationAction(ISD::LRINT, MVT::i64, Custom);
setOperationAction(ISD::LLRINT, MVT::i64, Custom);
}
}
@@ -332,7 +332,7 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM,
if (Subtarget.hasSSE2()) {
// Custom lowering for saturating float to int conversions.
// We handle promotion to larger result types manually.
- for (MVT VT : { MVT::i8, MVT::i16, MVT::i32 }) {
+ for (MVT VT : {MVT::i8, MVT::i16, MVT::i32}) {
setOperationAction(ISD::FP_TO_UINT_SAT, VT, Custom);
setOperationAction(ISD::FP_TO_SINT_SAT, VT, Custom);
}
@@ -367,17 +367,17 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM,
// TODO: when we have SSE, these could be more efficient, by using movd/movq.
if (!Subtarget.hasSSE2()) {
- setOperationAction(ISD::BITCAST , MVT::f32 , Expand);
- setOperationAction(ISD::BITCAST , MVT::i32 , Expand);
+ setOperationAction(ISD::BITCAST, MVT::f32, Expand);
+ setOperationAction(ISD::BITCAST, MVT::i32, Expand);
setOperationAction(ISD::FCANONICALIZE, MVT::f32, Custom);
setOperationAction(ISD::FCANONICALIZE, MVT::f64, Custom);
if (Subtarget.is64Bit()) {
- setOperationAction(ISD::BITCAST , MVT::f64 , Expand);
+ setOperationAction(ISD::BITCAST, MVT::f64, Expand);
// Without SSE, i64->f64 goes through memory.
- setOperationAction(ISD::BITCAST , MVT::i64 , Expand);
+ setOperationAction(ISD::BITCAST, MVT::i64, Expand);
}
} else if (!Subtarget.is64Bit())
- setOperationAction(ISD::BITCAST , MVT::i64 , Custom);
+ setOperationAction(ISD::BITCAST, MVT::i64, Custom);
// Scalar integer divide and remainder are lowered to use operations that
// produce two results, to match the available instructions. This exposes
@@ -389,7 +389,7 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM,
// (low) operations are left as Legal, as there are single-result
// instructions for this in x86. Using the two-result multiply instructions
// when both high and low results are needed must be arranged by dagcombine.
- for (auto VT : { MVT::i8, MVT::i16, MVT::i32, MVT::i64 }) {
+ for (auto VT : {MVT::i8, MVT::i16, MVT::i32, MVT::i64}) {
setOperationAction(ISD::MULHS, VT, Expand);
setOperationAction(ISD::MULHU, VT, Expand);
setOperationAction(ISD::SDIV, VT, Expand);
@@ -398,47 +398,47 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM,
setOperationAction(ISD::UREM, VT, Expand);
}
- setOperationAction(ISD::BR_JT , MVT::Other, Expand);
- setOperationAction(ISD::BRCOND , MVT::Other, Custom);
- for (auto VT : { MVT::f32, MVT::f64, MVT::f80, MVT::f128,
- MVT::i8, MVT::i16, MVT::i32, MVT::i64 }) {
- setOperationAction(ISD::BR_CC, VT, Expand);
+ setOperationAction(ISD::BR_JT, MVT::Other, Expand);
+ setOperationAction(ISD::BRCOND, MVT::Other, Custom);
+ for (auto VT : {MVT::f32, MVT::f64, MVT::f80, MVT::f128, MVT::i8, MVT::i16,
+ MVT::i32, MVT::i64}) {
+ setOperationAction(ISD::BR_CC, VT, Expand);
setOperationAction(ISD::SELECT_CC, VT, Expand);
}
if (Subtarget.is64Bit())
setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i32, Legal);
- setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i16 , Legal);
- setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i8 , Legal);
- setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i1 , Expand);
+ setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i16, Legal);
+ setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i8, Legal);
+ setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i1, Expand);
- setOperationAction(ISD::FREM , MVT::f32 , Expand);
- setOperationAction(ISD::FREM , MVT::f64 , Expand);
- setOperationAction(ISD::FREM , MVT::f80 , Expand);
- setOperationAction(ISD::FREM , MVT::f128 , Expand);
+ setOperationAction(ISD::FREM, MVT::f32, Expand);
+ setOperationAction(ISD::FREM, MVT::f64, Expand);
+ setOperationAction(ISD::FREM, MVT::f80, Expand);
+ setOperationAction(ISD::FREM, MVT::f128, Expand);
if (!Subtarget.useSoftFloat() && Subtarget.hasX87()) {
- setOperationAction(ISD::GET_ROUNDING , MVT::i32 , Custom);
- setOperationAction(ISD::SET_ROUNDING , MVT::Other, Custom);
- setOperationAction(ISD::GET_FPENV_MEM , MVT::Other, Custom);
- setOperationAction(ISD::SET_FPENV_MEM , MVT::Other, Custom);
- setOperationAction(ISD::RESET_FPENV , MVT::Other, Custom);
+ setOperationAction(ISD::GET_ROUNDING, MVT::i32, Custom);
+ setOperationAction(ISD::SET_ROUNDING, MVT::Other, Custom);
+ setOperationAction(ISD::GET_FPENV_MEM, MVT::Other, Custom);
+ setOperationAction(ISD::SET_FPENV_MEM, MVT::Other, Custom);
+ setOperationAction(ISD::RESET_FPENV, MVT::Other, Custom);
}
// Promote the i8 variants and force them on up to i32 which has a shorter
// encoding.
- setOperationPromotedToType(ISD::CTTZ , MVT::i8 , MVT::i32);
- setOperationPromotedToType(ISD::CTTZ_ZERO_UNDEF, MVT::i8 , MVT::i32);
+ setOperationPromotedToType(ISD::CTTZ, MVT::i8, MVT::i32);
+ setOperationPromotedToType(ISD::CTTZ_ZERO_UNDEF, MVT::i8, MVT::i32);
// Promoted i16. tzcntw has a false dependency on Intel CPUs. For BSF, we emit
// a REP prefix to encode it as TZCNT for modern CPUs so it makes sense to
// promote that too.
- setOperationPromotedToType(ISD::CTTZ , MVT::i16 , MVT::i32);
- setOperationPromotedToType(ISD::CTTZ_ZERO_UNDEF, MVT::i16 , MVT::i32);
+ setOperationPromotedToType(ISD::CTTZ, MVT::i16, MVT::i32);
+ setOperationPromotedToType(ISD::CTTZ_ZERO_UNDEF, MVT::i16, MVT::i32);
if (!Subtarget.hasBMI()) {
- setOperationAction(ISD::CTTZ , MVT::i32 , Custom);
- setOperationAction(ISD::CTTZ_ZERO_UNDEF, MVT::i32 , Legal);
+ setOperationAction(ISD::CTTZ, MVT::i32, Custom);
+ setOperationAction(ISD::CTTZ_ZERO_UNDEF, MVT::i32, Legal);
if (Subtarget.is64Bit()) {
- setOperationAction(ISD::CTTZ , MVT::i64 , Custom);
+ setOperationAction(ISD::CTTZ, MVT::i64, Custom);
setOperationAction(ISD::CTTZ_ZERO_UNDEF, MVT::i64, Legal);
}
}
@@ -446,13 +446,13 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM,
if (Subtarget.hasLZCNT()) {
// When promoting the i8 variants, force them to i32 for a shorter
// encoding.
- setOperationPromotedToType(ISD::CTLZ , MVT::i8 , MVT::i32);
- setOperationPromotedToType(ISD::CTLZ_ZERO_UNDEF, MVT::i8 , MVT::i32);
+ setOperationPromotedToType(ISD::CTLZ, MVT::i8, MVT::i32);
+ setOperationPromotedToType(ISD::CTLZ_ZERO_UNDEF, MVT::i8, MVT::i32);
} else {
for (auto VT : {MVT::i8, MVT::i16, MVT::i32, MVT::i64}) {
if (VT == MVT::i64 && !Subtarget.is64Bit())
continue;
- setOperationAction(ISD::CTLZ , VT, Custom);
+ setOperationAction(ISD::CTLZ, VT, Custom);
setOperationAction(ISD::CTLZ_ZERO_UNDEF, VT, Custom);
}
}
@@ -497,36 +497,36 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM,
// on the dest that popcntl hasn't had since Cannon Lake.
setOperationPromotedToType(ISD::CTPOP, MVT::i16, MVT::i32);
} else {
- setOperationAction(ISD::CTPOP , MVT::i8 , Custom);
- setOperationAction(ISD::CTPOP , MVT::i16 , Custom);
- setOperationAction(ISD::CTPOP , MVT::i32 , Custom);
- setOperationAction(ISD::CTPOP , MVT::i64 , Custom);
+ setOperationAction(ISD::CTPOP, MVT::i8, Custom);
+ setOperationAction(ISD::CTPOP, MVT::i16, Custom);
+ setOperationAction(ISD::CTPOP, MVT::i32, Custom);
+ setOperationAction(ISD::CTPOP, MVT::i64, Custom);
}
- setOperationAction(ISD::READCYCLECOUNTER , MVT::i64 , Custom);
+ setOperationAction(ISD::READCYCLECOUNTER, MVT::i64, Custom);
if (!Subtarget.hasMOVBE())
- setOperationAction(ISD::BSWAP , MVT::i16 , Expand);
+ setOperationAction(ISD::BSWAP, MVT::i16, Expand);
// X86 wants to expand cmov itself.
- for (auto VT : { MVT::f32, MVT::f64, MVT::f80, MVT::f128 }) {
+ for (auto VT : {MVT::f32, MVT::f64, MVT::f80, MVT::f128}) {
setOperationAction(ISD::SELECT, VT, Custom);
setOperationAction(ISD::SETCC, VT, Custom);
setOperationAction(ISD::STRICT_FSETCC, VT, Custom);
setOperationAction(ISD::STRICT_FSETCCS, VT, Custom);
}
- for (auto VT : { MVT::i8, MVT::i16, MVT::i32, MVT::i64 }) {
+ for (auto VT : {MVT::i8, MVT::i16, MVT::i32, MVT::i64}) {
if (VT == MVT::i64 && !Subtarget.is64Bit())
continue;
setOperationAction(ISD::SELECT, VT, Custom);
- setOperationAction(ISD::SETCC, VT, Custom);
+ setOperationAction(ISD::SETCC, VT, Custom);
}
// Custom action for SELECT MMX and expand action for SELECT_CC MMX
setOperationAction(ISD::SELECT, MVT::x86mmx, Custom);
setOperationAction(ISD::SELECT_CC, MVT::x86mmx, Expand);
- setOperationAction(ISD::EH_RETURN , MVT::Other, Custom);
+ setOperationAction(ISD::EH_RETURN, MVT::Other, Custom);
// NOTE: EH_SJLJ_SETJMP/_LONGJMP are not recommended, since
// LLVM/Clang supports zero-cost DWARF and SEH exception handling.
setOperationAction(ISD::EH_SJLJ_SETJMP, MVT::i32, Custom);
@@ -536,19 +536,19 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM,
setLibcallName(RTLIB::UNWIND_RESUME, "_Unwind_SjLj_Resume");
// Darwin ABI issue.
- for (auto VT : { MVT::i32, MVT::i64 }) {
+ for (auto VT : {MVT::i32, MVT::i64}) {
if (VT == MVT::i64 && !Subtarget.is64Bit())
continue;
- setOperationAction(ISD::ConstantPool , VT, Custom);
- setOperationAction(ISD::JumpTable , VT, Custom);
- setOperationAction(ISD::GlobalAddress , VT, Custom);
+ setOperationAction(ISD::ConstantPool, VT, Custom);
+ setOperationAction(ISD::JumpTable, VT, Custom);
+ setOperationAction(ISD::GlobalAddress, VT, Custom);
setOperationAction(ISD::GlobalTLSAddress, VT, Custom);
- setOperationAction(ISD::ExternalSymbol , VT, Custom);
- setOperationAction(ISD::BlockAddress , VT, Custom);
+ setOperationAction(ISD::ExternalSymbol, VT, Custom);
+ setOperationAction(ISD::BlockAddress, VT, Custom);
}
// 64-bit shl, sra, srl (iff 32-bit x86)
- for (auto VT : { MVT::i32, MVT::i64 }) {
+ for (auto VT : {MVT::i32, MVT::i64}) {
if (VT == MVT::i64 && !Subtarget.is64Bit())
continue;
setOperationAction(ISD::SHL_PARTS, VT, Custom);
@@ -557,12 +557,12 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM,
}
if (Subtarget.hasSSEPrefetch())
- setOperationAction(ISD::PREFETCH , MVT::Other, Custom);
+ setOperationAction(ISD::PREFETCH, MVT::Other, Custom);
- setOperationAction(ISD::ATOMIC_FENCE , MVT::Other, Custom);...
[truncated]
|
✅ With the latest revision this PR passed the C/C++ code formatter. |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Please regenerate the following test files (they should have reported errors when you ran ninja check-llvm-codegen-x86
).
codegen-no-uselist-constantdata.ll
ctlz-gfni.ll
urem-seteq-illegal-types.ll
vec_insert-5.ll
vector-shuffle-combining.ll
llvm/test/CodeGen/X86/ctlz-gfni.ll
Outdated
@@ -0,0 +1,16 @@ | |||
; RUN: llc -mtriple=x86_64 -mattr=+gfni,+ssse3 < %s | FileCheck %s |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
theres no need for this new test file - the existing test coverage in gfni-lzcnt.ll should be enough
@@ -170,14 +170,14 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM, | |||
if (Subtarget.isTargetWindowsMSVC() || Subtarget.isTargetWindowsItanium()) { | |||
static const struct { | |||
const RTLIB::Libcall Op; | |||
const char * const Name; | |||
const char *const Name; |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
don't clang-format code that you haven't touched - this has made reviewing your patch incredibly difficult.
@RKSimon I've updated the latest version . Let me know if it looks good now, thanks!(ninja check-llvm is pass) |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Thanks for looking at this!
Take a look at https://zig.godbolt.org/z/4GfbWajPT - the bit twiddling can be simplified to avoid the ANDNOT
@@ -28998,6 +28998,35 @@ static SDValue LowerVectorCTLZ(SDValue Op, const SDLoc &DL, | |||
assert(Subtarget.hasSSSE3() && "Expected SSSE3 support for PSHUFB"); | |||
return LowerVectorCTLZInRegLUT(Op, DL, Subtarget, DAG); | |||
} | |||
static SDValue LowerVectorCTLZ_GFNI(SDValue Op, SelectionDAG &DAG, |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
(style) newline between functions
HI @RKSimon , Thanks for the feedback. Some issues (include whitespace...) are still not fully addressed — I will work on them and submit an updated it tomorrow. |
586be06
to
51aa741
Compare
|
||
SDValue Zero = DAG.getConstant(0, DL, MVT::i8); | ||
SDValue ZeroVec = DAG.getSplatBuildVector(VT, DL, Zero); | ||
SDValue Neg = DAG.getNode(ISD::SUB, DL, VT, ZeroVec, Reversed); |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
SDValue Neg = DAG.getNegative(Reversed, DL, VT);
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
@houngkoungting Can you use DAG.getNegative and remove the Zero/ZeroVec?
Hi @RKSimon, |
This PULL REQUEST implements vXi8 ctlz lowering for X86 using GFNI instructions
Fixes #140729