Skip to content

Commit 149ed9d

Browse files
AMDGPU: update GFX11 wmma hazards (#76143)
One V_NOP or unrelated VALU instruction in between is required for correctness when matrix A or B of current WMMA instruction overlaps with matrix D of previous WMMA instruction. Remaining cases of WMMA operand overlaps are handled by the hardware and do not require handling in hazard recognizer. Hardware may stall in cases where: - matrix C of current WMMA instruction overlaps with matrix D of previous WMMA instruction - VALU instruction reads matrix D of previous WMMA instruction - matrix A,B or C of WMMA instruction reads result of previous VALU instruction
1 parent 91ddcba commit 149ed9d

File tree

2 files changed

+35
-36
lines changed

2 files changed

+35
-36
lines changed

llvm/lib/Target/AMDGPU/GCNHazardRecognizer.cpp

Lines changed: 2 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -1726,8 +1726,8 @@ bool GCNHazardRecognizer::fixWMMAHazards(MachineInstr *MI) {
17261726
if (!SIInstrInfo::isWMMA(I))
17271727
return false;
17281728

1729-
// Src0 or Src1 of the current wmma instruction overlaps with the dest of
1730-
// the previous wmma.
1729+
// Src0(matrix A) or Src1(matrix B) of the current wmma instruction overlaps
1730+
// with the dest(matrix D) of the previous wmma.
17311731
const Register CurSrc0Reg =
17321732
TII->getNamedOperand(*MI, AMDGPU::OpName::src0)->getReg();
17331733
const Register CurSrc1Reg =
@@ -1741,25 +1741,6 @@ bool GCNHazardRecognizer::fixWMMAHazards(MachineInstr *MI) {
17411741
return true;
17421742
}
17431743

1744-
// Src2 of the current wmma instruction overlaps with the dest of the
1745-
// previous wmma.
1746-
const MachineOperand *Src2 =
1747-
TII->getNamedOperand(*MI, AMDGPU::OpName::src2);
1748-
const Register CurSrc2Reg = Src2->isReg() ? Src2->getReg() : Register();
1749-
1750-
if (CurSrc2Reg != AMDGPU::NoRegister &&
1751-
TRI->regsOverlap(PrevDstReg, CurSrc2Reg)) {
1752-
1753-
const MachineOperand *Src2Mods =
1754-
TII->getNamedOperand(*MI, AMDGPU::OpName::src2_modifiers);
1755-
const bool NoSrc2Mods =
1756-
(Src2Mods->getImm() & (SISrcMods::NEG | SISrcMods::NEG_HI)) == 0;
1757-
// Exception: there is no hazard if the wmma instructions are of the same
1758-
// type and there is no input modifier on src2 of the current instruction.
1759-
return !(NoSrc2Mods && (TII->pseudoToMCOpcode(I.getOpcode()) ==
1760-
TII->pseudoToMCOpcode(MI->getOpcode())));
1761-
}
1762-
17631744
return false;
17641745
};
17651746

0 commit comments

Comments
 (0)