Skip to content

Commit 5cb6b15

Browse files
committed
[AMDGPU] Constrain use LiveMask by the operand's LaneMask for RP calculation.
For speculative RP queries, recede may calculate inaccurate masks for subreg uses. Previously, the calculation would look at any live lane for the use at the position of the MI in the LIS. This also adds lanes for any subregs which are live at but not used by the instruction. By constraining against the getSubRegIndexLaneMask for the operand's subreg, we are sure to not pick up on these extra lanes. For current clients of recede, this is not an issue. This is because 1. the current clients do not violate the program order in the LIS, and 2. the change to RP is based on the difference between previous mask and new mask. Since current clients are not exposed to this issue, this patch is sort of NFC. Co-authored-by: Valery Pykhtin [email protected] Change-Id: Iaed80271226b2587297e6fb78fe081afec1a9275
1 parent 7e31eaa commit 5cb6b15

File tree

2 files changed

+40
-25
lines changed

2 files changed

+40
-25
lines changed

llvm/lib/Target/AMDGPU/GCNRegPressure.cpp

Lines changed: 31 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -259,33 +259,40 @@ static void
259259
collectVirtualRegUses(SmallVectorImpl<RegisterMaskPair> &RegMaskPairs,
260260
const MachineInstr &MI, const LiveIntervals &LIS,
261261
const MachineRegisterInfo &MRI) {
262-
SlotIndex InstrSI;
262+
263+
auto &TRI = *MRI.getTargetRegisterInfo();
263264
for (const auto &MO : MI.operands()) {
264265
if (!MO.isReg() || !MO.getReg().isVirtual())
265266
continue;
266267
if (!MO.isUse() || !MO.readsReg())
267268
continue;
268269

269270
Register Reg = MO.getReg();
270-
if (llvm::any_of(RegMaskPairs, [Reg](const RegisterMaskPair &RM) {
271-
return RM.RegUnit == Reg;
272-
}))
273-
continue;
271+
auto I = llvm::find_if(RegMaskPairs, [Reg](const RegisterMaskPair &RM) {
272+
return RM.RegUnit == Reg;
273+
});
274+
275+
auto &P = I == RegMaskPairs.end()
276+
? RegMaskPairs.emplace_back(Reg, LaneBitmask::getNone())
277+
: *I;
274278

275-
LaneBitmask UseMask;
276-
auto &LI = LIS.getInterval(Reg);
279+
P.LaneMask |= MO.getSubReg() ? TRI.getSubRegIndexLaneMask(MO.getSubReg())
280+
: MRI.getMaxLaneMaskForVReg(Reg);
281+
}
282+
283+
SlotIndex InstrSI;
284+
for (auto &P : RegMaskPairs) {
285+
auto &LI = LIS.getInterval(P.RegUnit);
277286
if (!LI.hasSubRanges())
278-
UseMask = MRI.getMaxLaneMaskForVReg(Reg);
279-
else {
280-
// For a tentative schedule LIS isn't updated yet but livemask should
281-
// remain the same on any schedule. Subreg defs can be reordered but they
282-
// all must dominate uses anyway.
283-
if (!InstrSI)
284-
InstrSI = LIS.getInstructionIndex(*MO.getParent()).getBaseIndex();
285-
UseMask = getLiveLaneMask(LI, InstrSI, MRI);
286-
}
287+
continue;
288+
289+
// For a tentative schedule LIS isn't updated yet but livemask should
290+
// remain the same on any schedule. Subreg defs can be reordered but they
291+
// all must dominate uses anyway.
292+
if (!InstrSI)
293+
InstrSI = LIS.getInstructionIndex(MI).getBaseIndex();
287294

288-
RegMaskPairs.emplace_back(Reg, UseMask);
295+
P.LaneMask = getLiveLaneMask(LI, InstrSI, MRI, P.LaneMask);
289296
}
290297
}
291298

@@ -294,22 +301,25 @@ collectVirtualRegUses(SmallVectorImpl<RegisterMaskPair> &RegMaskPairs,
294301

295302
LaneBitmask llvm::getLiveLaneMask(unsigned Reg, SlotIndex SI,
296303
const LiveIntervals &LIS,
297-
const MachineRegisterInfo &MRI) {
298-
return getLiveLaneMask(LIS.getInterval(Reg), SI, MRI);
304+
const MachineRegisterInfo &MRI,
305+
LaneBitmask LaneMaskFilter) {
306+
return getLiveLaneMask(LIS.getInterval(Reg), SI, MRI, LaneMaskFilter);
299307
}
300308

301309
LaneBitmask llvm::getLiveLaneMask(const LiveInterval &LI, SlotIndex SI,
302-
const MachineRegisterInfo &MRI) {
310+
const MachineRegisterInfo &MRI,
311+
LaneBitmask LaneMaskFilter) {
303312
LaneBitmask LiveMask;
304313
if (LI.hasSubRanges()) {
305314
for (const auto &S : LI.subranges())
306-
if (S.liveAt(SI)) {
315+
if ((S.LaneMask & LaneMaskFilter).any() && S.liveAt(SI)) {
307316
LiveMask |= S.LaneMask;
308317
assert(LiveMask == (LiveMask & MRI.getMaxLaneMaskForVReg(LI.reg())));
309318
}
310319
} else if (LI.liveAt(SI)) {
311320
LiveMask = MRI.getMaxLaneMaskForVReg(LI.reg());
312321
}
322+
LiveMask &= LaneMaskFilter;
313323
return LiveMask;
314324
}
315325

llvm/lib/Target/AMDGPU/GCNRegPressure.h

Lines changed: 9 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -265,13 +265,18 @@ class GCNDownwardRPTracker : public GCNRPTracker {
265265
const LiveRegSet *LiveRegsCopy = nullptr);
266266
};
267267

268-
LaneBitmask getLiveLaneMask(unsigned Reg,
269-
SlotIndex SI,
268+
/// \returns the LaneMask of live lanes of \p Reg at position \p SI. Only the
269+
/// active lanes of \p LaneMaskFilter will be set in the return value. This is
270+
/// used, for example, to limit the live lanes to a specific subreg when
271+
/// calculating use masks.
272+
LaneBitmask getLiveLaneMask(unsigned Reg, SlotIndex SI,
270273
const LiveIntervals &LIS,
271-
const MachineRegisterInfo &MRI);
274+
const MachineRegisterInfo &MRI,
275+
LaneBitmask LaneMaskFilter = LaneBitmask::getAll());
272276

273277
LaneBitmask getLiveLaneMask(const LiveInterval &LI, SlotIndex SI,
274-
const MachineRegisterInfo &MRI);
278+
const MachineRegisterInfo &MRI,
279+
LaneBitmask LaneMaskFilter = LaneBitmask::getAll());
275280

276281
GCNRPTracker::LiveRegSet getLiveRegs(SlotIndex SI, const LiveIntervals &LIS,
277282
const MachineRegisterInfo &MRI);

0 commit comments

Comments
 (0)