Skip to content

Commit c46109d

Browse files
Revert "AMDGPU/GlobalISelDivergenceLowering: select divergent i1 phis" (llvm#79274)
Reverts llvm#78482
1 parent 9dddb3d commit c46109d

21 files changed

+259
-824
lines changed

llvm/include/llvm/CodeGen/MachineRegisterInfo.h

Lines changed: 0 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -752,17 +752,6 @@ class MachineRegisterInfo {
752752
Register createVirtualRegister(const TargetRegisterClass *RegClass,
753753
StringRef Name = "");
754754

755-
/// All avilable attributes a virtual register can have.
756-
struct RegisterAttributes {
757-
const RegClassOrRegBank *RCOrRB;
758-
LLT Ty;
759-
};
760-
761-
/// createVirtualRegister - Create and return a new virtual register in the
762-
/// function with the specified register attributes.
763-
Register createVirtualRegister(RegisterAttributes RegAttr,
764-
StringRef Name = "");
765-
766755
/// Create and return a new virtual register in the function with the same
767756
/// attributes as the given register.
768757
Register cloneVirtualRegister(Register VReg, StringRef Name = "");

llvm/include/llvm/CodeGen/MachineUniformityAnalysis.h

Lines changed: 0 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -32,25 +32,6 @@ MachineUniformityInfo computeMachineUniformityInfo(
3232
MachineFunction &F, const MachineCycleInfo &cycleInfo,
3333
const MachineDomTree &domTree, bool HasBranchDivergence);
3434

35-
/// Legacy analysis pass which computes a \ref MachineUniformityInfo.
36-
class MachineUniformityAnalysisPass : public MachineFunctionPass {
37-
MachineUniformityInfo UI;
38-
39-
public:
40-
static char ID;
41-
42-
MachineUniformityAnalysisPass();
43-
44-
MachineUniformityInfo &getUniformityInfo() { return UI; }
45-
const MachineUniformityInfo &getUniformityInfo() const { return UI; }
46-
47-
bool runOnMachineFunction(MachineFunction &F) override;
48-
void getAnalysisUsage(AnalysisUsage &AU) const override;
49-
void print(raw_ostream &OS, const Module *M = nullptr) const override;
50-
51-
// TODO: verify analysis
52-
};
53-
5435
} // namespace llvm
5536

5637
#endif // LLVM_CODEGEN_MACHINEUNIFORMITYANALYSIS_H

llvm/lib/CodeGen/MachineRegisterInfo.cpp

Lines changed: 0 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -167,17 +167,6 @@ MachineRegisterInfo::createVirtualRegister(const TargetRegisterClass *RegClass,
167167
return Reg;
168168
}
169169

170-
/// createVirtualRegister - Create and return a new virtual register in the
171-
/// function with the specified register attributes.
172-
Register MachineRegisterInfo::createVirtualRegister(RegisterAttributes RegAttr,
173-
StringRef Name) {
174-
Register Reg = createIncompleteVirtualRegister(Name);
175-
VRegInfo[Reg].first = *RegAttr.RCOrRB;
176-
setType(Reg, RegAttr.Ty);
177-
noteNewVirtualRegister(Reg);
178-
return Reg;
179-
}
180-
181170
Register MachineRegisterInfo::cloneVirtualRegister(Register VReg,
182171
StringRef Name) {
183172
Register Reg = createIncompleteVirtualRegister(Name);

llvm/lib/CodeGen/MachineUniformityAnalysis.cpp

Lines changed: 19 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -165,6 +165,25 @@ MachineUniformityInfo llvm::computeMachineUniformityInfo(
165165

166166
namespace {
167167

168+
/// Legacy analysis pass which computes a \ref MachineUniformityInfo.
169+
class MachineUniformityAnalysisPass : public MachineFunctionPass {
170+
MachineUniformityInfo UI;
171+
172+
public:
173+
static char ID;
174+
175+
MachineUniformityAnalysisPass();
176+
177+
MachineUniformityInfo &getUniformityInfo() { return UI; }
178+
const MachineUniformityInfo &getUniformityInfo() const { return UI; }
179+
180+
bool runOnMachineFunction(MachineFunction &F) override;
181+
void getAnalysisUsage(AnalysisUsage &AU) const override;
182+
void print(raw_ostream &OS, const Module *M = nullptr) const override;
183+
184+
// TODO: verify analysis
185+
};
186+
168187
class MachineUniformityInfoPrinterPass : public MachineFunctionPass {
169188
public:
170189
static char ID;

llvm/lib/Target/AMDGPU/AMDGPUGlobalISelDivergenceLowering.cpp

Lines changed: 1 addition & 144 deletions
Original file line numberDiff line numberDiff line change
@@ -16,11 +16,7 @@
1616
//===----------------------------------------------------------------------===//
1717

1818
#include "AMDGPU.h"
19-
#include "SILowerI1Copies.h"
20-
#include "llvm/CodeGen/GlobalISel/MachineIRBuilder.h"
2119
#include "llvm/CodeGen/MachineFunctionPass.h"
22-
#include "llvm/CodeGen/MachineUniformityAnalysis.h"
23-
#include "llvm/InitializePasses.h"
2420

2521
#define DEBUG_TYPE "amdgpu-global-isel-divergence-lowering"
2622

@@ -46,146 +42,14 @@ class AMDGPUGlobalISelDivergenceLowering : public MachineFunctionPass {
4642

4743
void getAnalysisUsage(AnalysisUsage &AU) const override {
4844
AU.setPreservesCFG();
49-
AU.addRequired<MachineDominatorTree>();
50-
AU.addRequired<MachinePostDominatorTree>();
51-
AU.addRequired<MachineUniformityAnalysisPass>();
5245
MachineFunctionPass::getAnalysisUsage(AU);
5346
}
5447
};
5548

56-
class DivergenceLoweringHelper : public PhiLoweringHelper {
57-
public:
58-
DivergenceLoweringHelper(MachineFunction *MF, MachineDominatorTree *DT,
59-
MachinePostDominatorTree *PDT,
60-
MachineUniformityInfo *MUI);
61-
62-
private:
63-
MachineUniformityInfo *MUI = nullptr;
64-
MachineIRBuilder B;
65-
Register buildRegCopyToLaneMask(Register Reg);
66-
67-
public:
68-
void markAsLaneMask(Register DstReg) const override;
69-
void getCandidatesForLowering(
70-
SmallVectorImpl<MachineInstr *> &Vreg1Phis) const override;
71-
void collectIncomingValuesFromPhi(
72-
const MachineInstr *MI,
73-
SmallVectorImpl<Incoming> &Incomings) const override;
74-
void replaceDstReg(Register NewReg, Register OldReg,
75-
MachineBasicBlock *MBB) override;
76-
void buildMergeLaneMasks(MachineBasicBlock &MBB,
77-
MachineBasicBlock::iterator I, const DebugLoc &DL,
78-
Register DstReg, Register PrevReg,
79-
Register CurReg) override;
80-
void constrainAsLaneMask(Incoming &In) override;
81-
};
82-
83-
DivergenceLoweringHelper::DivergenceLoweringHelper(
84-
MachineFunction *MF, MachineDominatorTree *DT,
85-
MachinePostDominatorTree *PDT, MachineUniformityInfo *MUI)
86-
: PhiLoweringHelper(MF, DT, PDT), MUI(MUI), B(*MF) {}
87-
88-
// _(s1) -> SReg_32/64(s1)
89-
void DivergenceLoweringHelper::markAsLaneMask(Register DstReg) const {
90-
assert(MRI->getType(DstReg) == LLT::scalar(1));
91-
92-
if (MRI->getRegClassOrNull(DstReg)) {
93-
if (MRI->constrainRegClass(DstReg, ST->getBoolRC()))
94-
return;
95-
llvm_unreachable("Failed to constrain register class");
96-
}
97-
98-
MRI->setRegClass(DstReg, ST->getBoolRC());
99-
}
100-
101-
void DivergenceLoweringHelper::getCandidatesForLowering(
102-
SmallVectorImpl<MachineInstr *> &Vreg1Phis) const {
103-
LLT S1 = LLT::scalar(1);
104-
105-
// Add divergent i1 phis to the list
106-
for (MachineBasicBlock &MBB : *MF) {
107-
for (MachineInstr &MI : MBB.phis()) {
108-
Register Dst = MI.getOperand(0).getReg();
109-
if (MRI->getType(Dst) == S1 && MUI->isDivergent(Dst))
110-
Vreg1Phis.push_back(&MI);
111-
}
112-
}
113-
}
114-
115-
void DivergenceLoweringHelper::collectIncomingValuesFromPhi(
116-
const MachineInstr *MI, SmallVectorImpl<Incoming> &Incomings) const {
117-
for (unsigned i = 1; i < MI->getNumOperands(); i += 2) {
118-
Incomings.emplace_back(MI->getOperand(i).getReg(),
119-
MI->getOperand(i + 1).getMBB(), Register());
120-
}
121-
}
122-
123-
void DivergenceLoweringHelper::replaceDstReg(Register NewReg, Register OldReg,
124-
MachineBasicBlock *MBB) {
125-
BuildMI(*MBB, MBB->getFirstNonPHI(), {}, TII->get(AMDGPU::COPY), OldReg)
126-
.addReg(NewReg);
127-
}
128-
129-
// Copy Reg to new lane mask register, insert a copy after instruction that
130-
// defines Reg while skipping phis if needed.
131-
Register DivergenceLoweringHelper::buildRegCopyToLaneMask(Register Reg) {
132-
Register LaneMask = createLaneMaskReg(MRI, LaneMaskRegAttrs);
133-
MachineInstr *Instr = MRI->getVRegDef(Reg);
134-
MachineBasicBlock *MBB = Instr->getParent();
135-
B.setInsertPt(*MBB, MBB->SkipPHIsAndLabels(std::next(Instr->getIterator())));
136-
B.buildCopy(LaneMask, Reg);
137-
return LaneMask;
138-
}
139-
140-
// bb.previous
141-
// %PrevReg = ...
142-
//
143-
// bb.current
144-
// %CurReg = ...
145-
//
146-
// %DstReg - not defined
147-
//
148-
// -> (wave32 example, new registers have sreg_32 reg class and S1 LLT)
149-
//
150-
// bb.previous
151-
// %PrevReg = ...
152-
// %PrevRegCopy:sreg_32(s1) = COPY %PrevReg
153-
//
154-
// bb.current
155-
// %CurReg = ...
156-
// %CurRegCopy:sreg_32(s1) = COPY %CurReg
157-
// ...
158-
// %PrevMaskedReg:sreg_32(s1) = ANDN2 %PrevRegCopy, ExecReg - active lanes 0
159-
// %CurMaskedReg:sreg_32(s1) = AND %ExecReg, CurRegCopy - inactive lanes to 0
160-
// %DstReg:sreg_32(s1) = OR %PrevMaskedReg, CurMaskedReg
161-
//
162-
// DstReg = for active lanes rewrite bit in PrevReg with bit from CurReg
163-
void DivergenceLoweringHelper::buildMergeLaneMasks(
164-
MachineBasicBlock &MBB, MachineBasicBlock::iterator I, const DebugLoc &DL,
165-
Register DstReg, Register PrevReg, Register CurReg) {
166-
// DstReg = (PrevReg & !EXEC) | (CurReg & EXEC)
167-
// TODO: check if inputs are constants or results of a compare.
168-
169-
Register PrevRegCopy = buildRegCopyToLaneMask(PrevReg);
170-
Register CurRegCopy = buildRegCopyToLaneMask(CurReg);
171-
Register PrevMaskedReg = createLaneMaskReg(MRI, LaneMaskRegAttrs);
172-
Register CurMaskedReg = createLaneMaskReg(MRI, LaneMaskRegAttrs);
173-
174-
B.setInsertPt(MBB, I);
175-
B.buildInstr(AndN2Op, {PrevMaskedReg}, {PrevRegCopy, ExecReg});
176-
B.buildInstr(AndOp, {CurMaskedReg}, {ExecReg, CurRegCopy});
177-
B.buildInstr(OrOp, {DstReg}, {PrevMaskedReg, CurMaskedReg});
178-
}
179-
180-
void DivergenceLoweringHelper::constrainAsLaneMask(Incoming &In) { return; }
181-
18249
} // End anonymous namespace.
18350

18451
INITIALIZE_PASS_BEGIN(AMDGPUGlobalISelDivergenceLowering, DEBUG_TYPE,
18552
"AMDGPU GlobalISel divergence lowering", false, false)
186-
INITIALIZE_PASS_DEPENDENCY(MachineDominatorTree)
187-
INITIALIZE_PASS_DEPENDENCY(MachinePostDominatorTree)
188-
INITIALIZE_PASS_DEPENDENCY(MachineUniformityAnalysisPass)
18953
INITIALIZE_PASS_END(AMDGPUGlobalISelDivergenceLowering, DEBUG_TYPE,
19054
"AMDGPU GlobalISel divergence lowering", false, false)
19155

@@ -200,12 +64,5 @@ FunctionPass *llvm::createAMDGPUGlobalISelDivergenceLoweringPass() {
20064

20165
bool AMDGPUGlobalISelDivergenceLowering::runOnMachineFunction(
20266
MachineFunction &MF) {
203-
MachineDominatorTree &DT = getAnalysis<MachineDominatorTree>();
204-
MachinePostDominatorTree &PDT = getAnalysis<MachinePostDominatorTree>();
205-
MachineUniformityInfo &MUI =
206-
getAnalysis<MachineUniformityAnalysisPass>().getUniformityInfo();
207-
208-
DivergenceLoweringHelper Helper(&MF, &DT, &PDT, &MUI);
209-
210-
return Helper.lowerPhis();
67+
return false;
21168
}

llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp

Lines changed: 2 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -210,7 +210,6 @@ bool AMDGPUInstructionSelector::selectCOPY(MachineInstr &I) const {
210210
bool AMDGPUInstructionSelector::selectPHI(MachineInstr &I) const {
211211
const Register DefReg = I.getOperand(0).getReg();
212212
const LLT DefTy = MRI->getType(DefReg);
213-
214213
if (DefTy == LLT::scalar(1)) {
215214
if (!AllowRiskySelect) {
216215
LLVM_DEBUG(dbgs() << "Skipping risky boolean phi\n");
@@ -3553,6 +3552,8 @@ bool AMDGPUInstructionSelector::selectStackRestore(MachineInstr &MI) const {
35533552
}
35543553

35553554
bool AMDGPUInstructionSelector::select(MachineInstr &I) {
3555+
if (I.isPHI())
3556+
return selectPHI(I);
35563557

35573558
if (!I.isPreISelOpcode()) {
35583559
if (I.isCopy())
@@ -3695,8 +3696,6 @@ bool AMDGPUInstructionSelector::select(MachineInstr &I) {
36953696
return selectWaveAddress(I);
36963697
case AMDGPU::G_STACKRESTORE:
36973698
return selectStackRestore(I);
3698-
case AMDGPU::G_PHI:
3699-
return selectPHI(I);
37003699
default:
37013700
return selectImpl(I, *CoverageInfo);
37023701
}

llvm/lib/Target/AMDGPU/SILowerI1Copies.cpp

Lines changed: 15 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -31,9 +31,9 @@
3131

3232
using namespace llvm;
3333

34-
static Register
35-
insertUndefLaneMask(MachineBasicBlock *MBB, MachineRegisterInfo *MRI,
36-
MachineRegisterInfo::RegisterAttributes LaneMaskRegAttrs);
34+
static Register insertUndefLaneMask(MachineBasicBlock *MBB,
35+
MachineRegisterInfo *MRI,
36+
Register LaneMaskRegAttrs);
3737

3838
namespace {
3939

@@ -78,7 +78,7 @@ class Vreg1LoweringHelper : public PhiLoweringHelper {
7878
MachineBasicBlock::iterator I, const DebugLoc &DL,
7979
Register DstReg, Register PrevReg,
8080
Register CurReg) override;
81-
void constrainAsLaneMask(Incoming &In) override;
81+
void constrainIncomingRegisterTakenAsIs(Incoming &In) override;
8282

8383
bool lowerCopiesFromI1();
8484
bool lowerCopiesToI1();
@@ -304,8 +304,7 @@ class LoopFinder {
304304
/// blocks, so that the SSA updater doesn't have to search all the way to the
305305
/// function entry.
306306
void addLoopEntries(unsigned LoopLevel, MachineSSAUpdater &SSAUpdater,
307-
MachineRegisterInfo &MRI,
308-
MachineRegisterInfo::RegisterAttributes LaneMaskRegAttrs,
307+
MachineRegisterInfo &MRI, Register LaneMaskRegAttrs,
309308
ArrayRef<Incoming> Incomings = {}) {
310309
assert(LoopLevel < CommonDominators.size());
311310

@@ -412,15 +411,14 @@ FunctionPass *llvm::createSILowerI1CopiesPass() {
412411
return new SILowerI1Copies();
413412
}
414413

415-
Register llvm::createLaneMaskReg(
416-
MachineRegisterInfo *MRI,
417-
MachineRegisterInfo::RegisterAttributes LaneMaskRegAttrs) {
418-
return MRI->createVirtualRegister(LaneMaskRegAttrs);
414+
Register llvm::createLaneMaskReg(MachineRegisterInfo *MRI,
415+
Register LaneMaskRegAttrs) {
416+
return MRI->cloneVirtualRegister(LaneMaskRegAttrs);
419417
}
420418

421-
static Register
422-
insertUndefLaneMask(MachineBasicBlock *MBB, MachineRegisterInfo *MRI,
423-
MachineRegisterInfo::RegisterAttributes LaneMaskRegAttrs) {
419+
static Register insertUndefLaneMask(MachineBasicBlock *MBB,
420+
MachineRegisterInfo *MRI,
421+
Register LaneMaskRegAttrs) {
424422
MachineFunction &MF = *MBB->getParent();
425423
const GCNSubtarget &ST = MF.getSubtarget<GCNSubtarget>();
426424
const SIInstrInfo *TII = ST.getInstrInfo();
@@ -621,7 +619,7 @@ bool PhiLoweringHelper::lowerPhis() {
621619
for (auto &Incoming : Incomings) {
622620
MachineBasicBlock &IMBB = *Incoming.Block;
623621
if (PIA.isSource(IMBB)) {
624-
constrainAsLaneMask(Incoming);
622+
constrainIncomingRegisterTakenAsIs(Incoming);
625623
SSAUpdater.AddAvailableValue(&IMBB, Incoming.Reg);
626624
} else {
627625
Incoming.UpdatedReg = createLaneMaskReg(MRI, LaneMaskRegAttrs);
@@ -913,4 +911,6 @@ void Vreg1LoweringHelper::buildMergeLaneMasks(MachineBasicBlock &MBB,
913911
}
914912
}
915913

916-
void Vreg1LoweringHelper::constrainAsLaneMask(Incoming &In) {}
914+
void Vreg1LoweringHelper::constrainIncomingRegisterTakenAsIs(Incoming &In) {
915+
return;
916+
}

llvm/lib/Target/AMDGPU/SILowerI1Copies.h

Lines changed: 4 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -31,9 +31,7 @@ struct Incoming {
3131
: Reg(Reg), Block(Block), UpdatedReg(UpdatedReg) {}
3232
};
3333

34-
Register
35-
createLaneMaskReg(MachineRegisterInfo *MRI,
36-
MachineRegisterInfo::RegisterAttributes LaneMaskRegAttrs);
34+
Register createLaneMaskReg(MachineRegisterInfo *MRI, Register LaneMaskRegAttrs);
3735

3836
class PhiLoweringHelper {
3937
public:
@@ -49,7 +47,7 @@ class PhiLoweringHelper {
4947
MachineRegisterInfo *MRI = nullptr;
5048
const GCNSubtarget *ST = nullptr;
5149
const SIInstrInfo *TII = nullptr;
52-
MachineRegisterInfo::RegisterAttributes LaneMaskRegAttrs;
50+
Register LaneMaskRegAttrs;
5351

5452
#ifndef NDEBUG
5553
DenseSet<Register> PhiRegisters;
@@ -70,8 +68,7 @@ class PhiLoweringHelper {
7068
getSaluInsertionAtEnd(MachineBasicBlock &MBB) const;
7169

7270
void initializeLaneMaskRegisterAttributes(Register LaneMask) {
73-
LaneMaskRegAttrs.RCOrRB = &MRI->getRegClassOrRegBank(LaneMask);
74-
LaneMaskRegAttrs.Ty = MRI->getType(LaneMask);
71+
LaneMaskRegAttrs = LaneMask;
7572
}
7673

7774
bool isLaneMaskReg(Register Reg) const {
@@ -94,7 +91,7 @@ class PhiLoweringHelper {
9491
MachineBasicBlock::iterator I,
9592
const DebugLoc &DL, Register DstReg,
9693
Register PrevReg, Register CurReg) = 0;
97-
virtual void constrainAsLaneMask(Incoming &In) = 0;
94+
virtual void constrainIncomingRegisterTakenAsIs(Incoming &In) = 0;
9895
};
9996

10097
} // end namespace llvm

0 commit comments

Comments
 (0)