Skip to content

Commit c4badd2

Browse files
arsenmmemfrob
authored and
memfrob
committed
AMDGPU: Add feature for fast f32 denormals
1 parent 89f8edb commit c4badd2

File tree

3 files changed

+12
-4
lines changed

3 files changed

+12
-4
lines changed

llvm/lib/Target/AMDGPU/AMDGPU.td

Lines changed: 10 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -33,6 +33,12 @@ def FeatureFastFMAF32 : SubtargetFeature<"fast-fmaf",
3333
"Assuming f32 fma is at least as fast as mul + add"
3434
>;
3535

36+
def FeatureFastDenormalF32 : SubtargetFeature<"fast-denormal-f32",
37+
"FastDenormalF32",
38+
"true",
39+
"Enabling denormals does not cause f32 instructions to run at f64 rates"
40+
>;
41+
3642
def FeatureMIMG_R128 : SubtargetFeature<"mimg-r128",
3743
"MIMG_R128",
3844
"true",
@@ -632,7 +638,7 @@ def FeatureVolcanicIslands : GCNSubtargetFeatureGeneration<"VOLCANIC_ISLANDS",
632638
FeatureScalarStores, FeatureInv2PiInlineImm,
633639
FeatureSDWA, FeatureSDWAOutModsVOPC, FeatureSDWAMac, FeatureDPP,
634640
FeatureIntClamp, FeatureTrigReducedRange, FeatureDoesNotSupportSRAMECC,
635-
FeatureGFX8Insts, FeatureGFX7GFX8GFX9Insts
641+
FeatureGFX8Insts, FeatureGFX7GFX8GFX9Insts, FeatureFastDenormalF32
636642
]
637643
>;
638644

@@ -647,8 +653,8 @@ def FeatureGFX9 : GCNSubtargetFeatureGeneration<"GFX9",
647653
FeatureSDWA, FeatureSDWAOmod, FeatureSDWAScalar, FeatureSDWASdst,
648654
FeatureFlatInstOffsets, FeatureFlatGlobalInsts, FeatureFlatScratchInsts,
649655
FeatureAddNoCarryInsts, FeatureGFX8Insts, FeatureGFX7GFX8GFX9Insts,
650-
FeatureScalarFlatScratchInsts, FeatureScalarAtomics, FeatureR128A16
651-
]
656+
FeatureScalarFlatScratchInsts, FeatureScalarAtomics, FeatureR128A16,
657+
FeatureFastDenormalF32]
652658
>;
653659

654660
def FeatureGFX10 : GCNSubtargetFeatureGeneration<"GFX10",
@@ -665,7 +671,7 @@ def FeatureGFX10 : GCNSubtargetFeatureGeneration<"GFX10",
665671
FeatureNoSdstCMPX, FeatureVscnt, FeatureRegisterBanking,
666672
FeatureVOP3Literal, FeatureDPP8,
667673
FeatureNoDataDepHazard, FeaturePkFmacF16Inst, FeatureDoesNotSupportSRAMECC,
668-
FeatureGFX10A16
674+
FeatureGFX10A16, FeatureFastDenormalF32
669675
]
670676
>;
671677

llvm/lib/Target/AMDGPU/AMDGPUSubtarget.cpp

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -178,6 +178,7 @@ GCNSubtarget::GCNSubtarget(const Triple &TT, StringRef GPU, StringRef FS,
178178
MaxPrivateElementSize(0),
179179

180180
FastFMAF32(false),
181+
FastDenormalF32(false),
181182
HalfRate64Ops(false),
182183

183184
FlatForGlobal(false),

llvm/lib/Target/AMDGPU/AMDGPUSubtarget.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -291,6 +291,7 @@ class GCNSubtarget : public AMDGPUGenSubtargetInfo,
291291

292292
// Possibly statically set by tablegen, but may want to be overridden.
293293
bool FastFMAF32;
294+
bool FastDenormalF32;
294295
bool HalfRate64Ops;
295296

296297
// Dynamially set bits that enable features.

0 commit comments

Comments
 (0)