Skip to content

Commit ba52f06

Browse files
authored
[AMDGPU] CodeGen for GFX12 S_WAIT_* instructions (#77438)
Update SIMemoryLegalizer and SIInsertWaitcnts to use separate wait instructions per counter (e.g. S_WAIT_LOADCNT) and split VMCNT into separate LOADCNT, SAMPLECNT and BVHCNT counters.
1 parent 9ca3693 commit ba52f06

File tree

109 files changed

+5939
-3912
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

109 files changed

+5939
-3912
lines changed

llvm/lib/Target/AMDGPU/GCNHazardRecognizer.cpp

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1242,7 +1242,8 @@ bool GCNHazardRecognizer::fixSMEMtoVectorWriteHazards(MachineInstr *MI) {
12421242
case AMDGPU::S_WAITCNT: {
12431243
const int64_t Imm = MI.getOperand(0).getImm();
12441244
AMDGPU::Waitcnt Decoded = AMDGPU::decodeWaitcnt(IV, Imm);
1245-
return (Decoded.LgkmCnt == 0);
1245+
// DsCnt corresponds to LGKMCnt here.
1246+
return (Decoded.DsCnt == 0);
12461247
}
12471248
default:
12481249
// SOPP instructions cannot mitigate the hazard.

llvm/lib/Target/AMDGPU/GCNSubtarget.h

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1200,6 +1200,10 @@ class GCNSubtarget final : public AMDGPUGenSubtargetInfo,
12001200

12011201
bool hasRestrictedSOffset() const { return HasRestrictedSOffset; }
12021202

1203+
/// \returns true if the target uses LOADcnt/SAMPLEcnt/BVHcnt, DScnt/KMcnt
1204+
/// and STOREcnt rather than VMcnt, LGKMcnt and VScnt respectively.
1205+
bool hasExtendedWaitCounts() const { return getGeneration() >= GFX12; }
1206+
12031207
/// Return the maximum number of waves per SIMD for kernels using \p SGPRs
12041208
/// SGPRs
12051209
unsigned getOccupancyWithNumSGPRs(unsigned SGPRs) const;

0 commit comments

Comments
 (0)