Skip to content

Commit a82d27a

Browse files
e-kudphoebewang
authored andcommitted
[X86] Support llvm.{min,max}imum.f{16,32,64}
Addresses #53353 Reviewed By: RKSimon, pengfei Differential Revision: https://reviews.llvm.org/D145634
1 parent 62f1d91 commit a82d27a

File tree

8 files changed

+1632
-57
lines changed

8 files changed

+1632
-57
lines changed

llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -7994,7 +7994,9 @@ SDValue TargetLowering::expandFMINNUM_FMAXNUM(SDNode *Node,
79947994
// If the target has FMINIMUM/FMAXIMUM but not FMINNUM/FMAXNUM use that
79957995
// instead if there are no NaNs and there can't be an incompatible zero
79967996
// compare: at least one operand isn't +/-0, or there are no signed-zeros.
7997-
if (Node->getFlags().hasNoNaNs() &&
7997+
if ((Node->getFlags().hasNoNaNs() ||
7998+
(DAG.isKnownNeverNaN(Node->getOperand(0)) &&
7999+
DAG.isKnownNeverNaN(Node->getOperand(1)))) &&
79988000
(Node->getFlags().hasNoSignedZeros() ||
79998001
DAG.isKnownNeverZeroFloat(Node->getOperand(0)) ||
80008002
DAG.isKnownNeverZeroFloat(Node->getOperand(1)))) {

llvm/lib/Target/X86/X86ISelLowering.cpp

Lines changed: 131 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1002,6 +1002,9 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM,
10021002
addRegisterClass(MVT::v4f32, Subtarget.hasVLX() ? &X86::VR128XRegClass
10031003
: &X86::VR128RegClass);
10041004

1005+
setOperationAction(ISD::FMAXIMUM, MVT::f32, Custom);
1006+
setOperationAction(ISD::FMINIMUM, MVT::f32, Custom);
1007+
10051008
setOperationAction(ISD::FNEG, MVT::v4f32, Custom);
10061009
setOperationAction(ISD::FABS, MVT::v4f32, Custom);
10071010
setOperationAction(ISD::FCOPYSIGN, MVT::v4f32, Custom);
@@ -1038,6 +1041,9 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM,
10381041
addRegisterClass(MVT::v2i64, Subtarget.hasVLX() ? &X86::VR128XRegClass
10391042
: &X86::VR128RegClass);
10401043

1044+
setOperationAction(ISD::FMAXIMUM, MVT::f64, Custom);
1045+
setOperationAction(ISD::FMINIMUM, MVT::f64, Custom);
1046+
10411047
for (auto VT : { MVT::v2i8, MVT::v4i8, MVT::v8i8,
10421048
MVT::v2i16, MVT::v4i16, MVT::v2i32 }) {
10431049
setOperationAction(ISD::SDIV, VT, Custom);
@@ -2124,6 +2130,8 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM,
21242130
setOperationAction(ISD::STRICT_FROUNDEVEN, MVT::f16, Legal);
21252131
setOperationAction(ISD::FP_ROUND, MVT::f16, Custom);
21262132
setOperationAction(ISD::STRICT_FP_ROUND, MVT::f16, Custom);
2133+
setOperationAction(ISD::FMAXIMUM, MVT::f16, Custom);
2134+
setOperationAction(ISD::FMINIMUM, MVT::f16, Custom);
21272135
setOperationAction(ISD::FP_EXTEND, MVT::f32, Legal);
21282136
setOperationAction(ISD::STRICT_FP_EXTEND, MVT::f32, Legal);
21292137

@@ -30217,6 +30225,126 @@ static SDValue LowerMINMAX(SDValue Op, const X86Subtarget &Subtarget,
3021730225
return SDValue();
3021830226
}
3021930227

30228+
static SDValue LowerFMINIMUM_FMAXIMUM(SDValue Op, const X86Subtarget &Subtarget,
30229+
SelectionDAG &DAG) {
30230+
assert((Op.getOpcode() == ISD::FMAXIMUM || Op.getOpcode() == ISD::FMINIMUM) &&
30231+
"Expected FMAXIMUM or FMINIMUM opcode");
30232+
const TargetLowering &TLI = DAG.getTargetLoweringInfo();
30233+
EVT VT = Op.getValueType();
30234+
SDValue X = Op.getOperand(0);
30235+
SDValue Y = Op.getOperand(1);
30236+
SDLoc DL(Op);
30237+
uint64_t SizeInBits = VT.getFixedSizeInBits();
30238+
APInt PreferredZero = APInt::getZero(SizeInBits);
30239+
EVT IVT = MVT::getIntegerVT(SizeInBits);
30240+
X86ISD::NodeType MinMaxOp;
30241+
if (Op.getOpcode() == ISD::FMAXIMUM) {
30242+
MinMaxOp = X86ISD::FMAX;
30243+
} else {
30244+
PreferredZero.setSignBit();
30245+
MinMaxOp = X86ISD::FMIN;
30246+
}
30247+
EVT SetCCType =
30248+
TLI.getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), VT);
30249+
30250+
// The tables below show the expected result of Max in cases of NaN and
30251+
// signed zeros.
30252+
//
30253+
// Y Y
30254+
// Num xNaN +0 -0
30255+
// --------------- ---------------
30256+
// Num | Max | qNaN | +0 | +0 | +0 |
30257+
// X --------------- X ---------------
30258+
// xNaN | qNaN | qNaN | -0 | +0 | -0 |
30259+
// --------------- ---------------
30260+
//
30261+
// It is achieved by means of FMAX/FMIN with preliminary checks and operand
30262+
// reordering.
30263+
//
30264+
// We check if any of operands is NaN and return NaN. Then we check if any of
30265+
// operands is zero or negative zero (for fmaximum and fminimum respectively)
30266+
// to ensure the correct zero is returned.
30267+
auto IsPreferredZero = [PreferredZero](SDValue Op) {
30268+
Op = peekThroughBitcasts(Op);
30269+
if (auto *CstOp = dyn_cast<ConstantFPSDNode>(Op))
30270+
return CstOp->getValueAPF().bitcastToAPInt() == PreferredZero;
30271+
if (auto *CstOp = dyn_cast<ConstantSDNode>(Op))
30272+
return CstOp->getAPIntValue() == PreferredZero;
30273+
return false;
30274+
};
30275+
30276+
SDValue MinMax;
30277+
bool IsXNeverNaN = DAG.isKnownNeverNaN(X);
30278+
bool IsYNeverNaN = DAG.isKnownNeverNaN(Y);
30279+
if (DAG.getTarget().Options.NoSignedZerosFPMath ||
30280+
Op->getFlags().hasNoSignedZeros() || IsPreferredZero(Y) ||
30281+
DAG.isKnownNeverZeroFloat(X)) {
30282+
MinMax = DAG.getNode(MinMaxOp, DL, VT, X, Y, Op->getFlags());
30283+
} else if (IsPreferredZero(X) || DAG.isKnownNeverZeroFloat(Y)) {
30284+
MinMax = DAG.getNode(MinMaxOp, DL, VT, Y, X, Op->getFlags());
30285+
} else if ((VT == MVT::f16 || Subtarget.hasDQI()) &&
30286+
(Op->getFlags().hasNoNaNs() || IsXNeverNaN || IsYNeverNaN)) {
30287+
if (IsXNeverNaN)
30288+
std::swap(X, Y);
30289+
// VFPCLASSS consumes a vector type. So provide a minimal one corresponded
30290+
// xmm register.
30291+
MVT VectorType = MVT::getVectorVT(VT.getSimpleVT(), 128 / SizeInBits);
30292+
SDValue VX = DAG.getNode(ISD::SCALAR_TO_VECTOR, DL, VectorType, X);
30293+
// Bits of classes:
30294+
// Bits Imm8[0] Imm8[1] Imm8[2] Imm8[3] Imm8[4] Imm8[5] Imm8[6] Imm8[7]
30295+
// Class QNAN PosZero NegZero PosINF NegINF Denormal Negative SNAN
30296+
SDValue Imm = DAG.getTargetConstant(MinMaxOp == X86ISD::FMAX ? 0b11 : 0b101,
30297+
DL, MVT::i32);
30298+
SDValue IsNanZero = DAG.getNode(X86ISD::VFPCLASSS, DL, MVT::v1i1, VX, Imm);
30299+
SDValue Ins = DAG.getNode(ISD::INSERT_SUBVECTOR, DL, MVT::v8i1,
30300+
DAG.getConstant(0, DL, MVT::v8i1), IsNanZero,
30301+
DAG.getIntPtrConstant(0, DL));
30302+
SDValue NeedSwap = DAG.getBitcast(MVT::i8, Ins);
30303+
SDValue NewX = DAG.getSelect(DL, VT, NeedSwap, Y, X);
30304+
SDValue NewY = DAG.getSelect(DL, VT, NeedSwap, X, Y);
30305+
return DAG.getNode(MinMaxOp, DL, VT, NewX, NewY, Op->getFlags());
30306+
} else {
30307+
SDValue IsXZero;
30308+
if (Subtarget.is64Bit() || VT != MVT::f64) {
30309+
SDValue XInt = DAG.getNode(ISD::BITCAST, DL, IVT, X);
30310+
SDValue ZeroCst = DAG.getConstant(PreferredZero, DL, IVT);
30311+
IsXZero = DAG.getSetCC(DL, SetCCType, XInt, ZeroCst, ISD::SETEQ);
30312+
} else {
30313+
assert(VT == MVT::f64);
30314+
SDValue Ins = DAG.getNode(ISD::INSERT_VECTOR_ELT, DL, MVT::v2f64,
30315+
DAG.getConstantFP(0, DL, MVT::v2f64), X,
30316+
DAG.getIntPtrConstant(0, DL));
30317+
SDValue VX = DAG.getNode(ISD::BITCAST, DL, MVT::v4f32, Ins);
30318+
SDValue Lo = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::f32, VX,
30319+
DAG.getIntPtrConstant(0, DL));
30320+
Lo = DAG.getBitcast(MVT::i32, Lo);
30321+
SDValue Hi = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::f32, VX,
30322+
DAG.getIntPtrConstant(1, DL));
30323+
Hi = DAG.getBitcast(MVT::i32, Hi);
30324+
PreferredZero = APInt::getZero(SizeInBits / 2);
30325+
if (MinMaxOp == X86ISD::FMIN)
30326+
PreferredZero.setSignBit();
30327+
IsXZero = DAG.getNode(ISD::XOR, DL, MVT::i32, Hi,
30328+
DAG.getConstant(PreferredZero, DL, MVT::i32));
30329+
IsXZero = DAG.getNode(ISD::OR, DL, MVT::i32, Lo, IsXZero);
30330+
IsXZero = DAG.getSetCC(DL, SetCCType, IsXZero,
30331+
DAG.getConstant(0, DL, MVT::i32), ISD::SETEQ);
30332+
}
30333+
SDValue NewX = DAG.getSelect(DL, VT, IsXZero, Y, X);
30334+
SDValue NewY = DAG.getSelect(DL, VT, IsXZero, X, Y);
30335+
MinMax = DAG.getNode(MinMaxOp, DL, VT, NewX, NewY, Op->getFlags());
30336+
}
30337+
30338+
if (Op->getFlags().hasNoNaNs() || (IsXNeverNaN && IsYNeverNaN))
30339+
return MinMax;
30340+
30341+
APFloat NaNValue = APFloat::getNaN(DAG.EVTToAPFloatSemantics(VT));
30342+
SDValue IsNaN = DAG.getSetCC(DL, SetCCType, IsXNeverNaN ? Y : X,
30343+
IsYNeverNaN ? X : Y, ISD::SETUO);
30344+
return DAG.getSelect(DL, VT, IsNaN, DAG.getConstantFP(NaNValue, DL, VT),
30345+
MinMax);
30346+
}
30347+
3022030348
static SDValue LowerABD(SDValue Op, const X86Subtarget &Subtarget,
3022130349
SelectionDAG &DAG) {
3022230350
MVT VT = Op.getSimpleValueType();
@@ -33969,6 +34097,9 @@ SDValue X86TargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const {
3396934097
case ISD::SMIN:
3397034098
case ISD::UMAX:
3397134099
case ISD::UMIN: return LowerMINMAX(Op, Subtarget, DAG);
34100+
case ISD::FMINIMUM:
34101+
case ISD::FMAXIMUM:
34102+
return LowerFMINIMUM_FMAXIMUM(Op, Subtarget, DAG);
3397234103
case ISD::ABS: return LowerABS(Op, Subtarget, DAG);
3397334104
case ISD::ABDS:
3397434105
case ISD::ABDU: return LowerABD(Op, Subtarget, DAG);

llvm/test/Analysis/CostModel/X86/intrinsic-cost-kinds.ll

Lines changed: 8 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -206,23 +206,23 @@ define void @constrained_fadd(float %a, <16 x float> %va) {
206206

207207
define void @fmaximum(float %a, float %b, <16 x float> %va, <16 x float> %vb) {
208208
; THRU-LABEL: 'fmaximum'
209-
; THRU-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %s = call float @llvm.maximum.f32(float %a, float %b)
210-
; THRU-NEXT: Cost Model: Found an estimated cost of 196 for instruction: %v = call <16 x float> @llvm.maximum.v16f32(<16 x float> %va, <16 x float> %vb)
209+
; THRU-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %s = call float @llvm.maximum.f32(float %a, float %b)
210+
; THRU-NEXT: Cost Model: Found an estimated cost of 68 for instruction: %v = call <16 x float> @llvm.maximum.v16f32(<16 x float> %va, <16 x float> %vb)
211211
; THRU-NEXT: Cost Model: Found an estimated cost of 0 for instruction: ret void
212212
;
213213
; LATE-LABEL: 'fmaximum'
214-
; LATE-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %s = call float @llvm.maximum.f32(float %a, float %b)
215-
; LATE-NEXT: Cost Model: Found an estimated cost of 196 for instruction: %v = call <16 x float> @llvm.maximum.v16f32(<16 x float> %va, <16 x float> %vb)
214+
; LATE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %s = call float @llvm.maximum.f32(float %a, float %b)
215+
; LATE-NEXT: Cost Model: Found an estimated cost of 68 for instruction: %v = call <16 x float> @llvm.maximum.v16f32(<16 x float> %va, <16 x float> %vb)
216216
; LATE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void
217217
;
218218
; SIZE-LABEL: 'fmaximum'
219-
; SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: %s = call float @llvm.maximum.f32(float %a, float %b)
220-
; SIZE-NEXT: Cost Model: Found an estimated cost of 52 for instruction: %v = call <16 x float> @llvm.maximum.v16f32(<16 x float> %va, <16 x float> %vb)
219+
; SIZE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %s = call float @llvm.maximum.f32(float %a, float %b)
220+
; SIZE-NEXT: Cost Model: Found an estimated cost of 68 for instruction: %v = call <16 x float> @llvm.maximum.v16f32(<16 x float> %va, <16 x float> %vb)
221221
; SIZE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void
222222
;
223223
; SIZE_LATE-LABEL: 'fmaximum'
224-
; SIZE_LATE-NEXT: Cost Model: Found an estimated cost of 10 for instruction: %s = call float @llvm.maximum.f32(float %a, float %b)
225-
; SIZE_LATE-NEXT: Cost Model: Found an estimated cost of 196 for instruction: %v = call <16 x float> @llvm.maximum.v16f32(<16 x float> %va, <16 x float> %vb)
224+
; SIZE_LATE-NEXT: Cost Model: Found an estimated cost of 2 for instruction: %s = call float @llvm.maximum.f32(float %a, float %b)
225+
; SIZE_LATE-NEXT: Cost Model: Found an estimated cost of 68 for instruction: %v = call <16 x float> @llvm.maximum.v16f32(<16 x float> %va, <16 x float> %vb)
226226
; SIZE_LATE-NEXT: Cost Model: Found an estimated cost of 1 for instruction: ret void
227227
;
228228
%s = call float @llvm.maximum.f32(float %a, float %b)

0 commit comments

Comments
 (0)