-
Notifications
You must be signed in to change notification settings - Fork 13.6k
Reland "RegisterCoalescer: Add implicit-def of super register when coalescing SUBREG_TO_REG" #123632
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Reland "RegisterCoalescer: Add implicit-def of super register when coalescing SUBREG_TO_REG" #123632
Conversation
@llvm/pr-subscribers-llvm-globalisel @llvm/pr-subscribers-backend-x86 Author: Sander de Smalen (sdesmalen-arm) ChangesThis PR aims to reland work done by @arsenm which was previously reverted This PR cherry-picks the original commit (0e46b49), and adds another
Original patch by @arsenm. Patch is 164.49 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/123632.diff 23 Files Affected:
diff --git a/llvm/lib/CodeGen/RegisterCoalescer.cpp b/llvm/lib/CodeGen/RegisterCoalescer.cpp
index 8313927dd2aa11..86dd044adbf7ff 100644
--- a/llvm/lib/CodeGen/RegisterCoalescer.cpp
+++ b/llvm/lib/CodeGen/RegisterCoalescer.cpp
@@ -306,7 +306,11 @@ namespace {
/// number if it is not zero. If DstReg is a physical register and the
/// existing subregister number of the def / use being updated is not zero,
/// make sure to set it to the correct physical subregister.
- void updateRegDefsUses(Register SrcReg, Register DstReg, unsigned SubIdx);
+ ///
+ /// If \p IsSubregToReg, we are coalescing a DstReg = SUBREG_TO_REG
+ /// SrcReg. This introduces an implicit-def of DstReg on coalesced users.
+ void updateRegDefsUses(Register SrcReg, Register DstReg, unsigned SubIdx,
+ bool IsSubregToReg);
/// If the given machine operand reads only undefined lanes add an undef
/// flag.
@@ -1430,6 +1434,7 @@ bool RegisterCoalescer::reMaterializeTrivialDef(const CoalescerPair &CP,
// CopyMI may have implicit operands, save them so that we can transfer them
// over to the newly materialized instruction after CopyMI is removed.
+ LaneBitmask NewMIImplicitOpsMask;
SmallVector<MachineOperand, 4> ImplicitOps;
ImplicitOps.reserve(CopyMI->getNumOperands() -
CopyMI->getDesc().getNumOperands());
@@ -1443,6 +1448,9 @@ bool RegisterCoalescer::reMaterializeTrivialDef(const CoalescerPair &CP,
(MO.getSubReg() == 0 && MO.getReg() == DstOperand.getReg())) &&
"unexpected implicit virtual register def");
ImplicitOps.push_back(MO);
+ if (MO.isDef() && MO.getReg().isVirtual() &&
+ MRI->shouldTrackSubRegLiveness(DstReg))
+ NewMIImplicitOpsMask |= MRI->getMaxLaneMaskForVReg(MO.getReg());
}
}
@@ -1485,14 +1493,11 @@ bool RegisterCoalescer::reMaterializeTrivialDef(const CoalescerPair &CP,
} else {
assert(MO.getReg() == NewMI.getOperand(0).getReg());
- // We're only expecting another def of the main output, so the range
- // should get updated with the regular output range.
- //
- // FIXME: The range updating below probably needs updating to look at
- // the super register if subranges are tracked.
- assert(!MRI->shouldTrackSubRegLiveness(DstReg) &&
- "subrange update for implicit-def of super register may not be "
- "properly handled");
+ // If lanemasks need to be tracked, compile the lanemask of the NewMI
+ // implicit def operands to avoid subranges for the super-regs from
+ // being removed by code later on in this function.
+ if (MRI->shouldTrackSubRegLiveness(DstReg))
+ NewMIImplicitOpsMask |= MRI->getMaxLaneMaskForVReg(MO.getReg());
}
}
}
@@ -1516,7 +1521,7 @@ bool RegisterCoalescer::reMaterializeTrivialDef(const CoalescerPair &CP,
MRI->setRegClass(DstReg, NewRC);
// Update machine operands and add flags.
- updateRegDefsUses(DstReg, DstReg, DstIdx);
+ updateRegDefsUses(DstReg, DstReg, DstIdx, false);
NewMI.getOperand(0).setSubReg(NewIdx);
// updateRegDefUses can add an "undef" flag to the definition, since
// it will replace DstReg with DstReg.DstIdx. If NewIdx is 0, make
@@ -1592,7 +1597,8 @@ bool RegisterCoalescer::reMaterializeTrivialDef(const CoalescerPair &CP,
CurrIdx.getRegSlot(NewMI.getOperand(0).isEarlyClobber());
VNInfo::Allocator &Alloc = LIS->getVNInfoAllocator();
for (LiveInterval::SubRange &SR : DstInt.subranges()) {
- if ((SR.LaneMask & DstMask).none()) {
+ if ((SR.LaneMask & DstMask).none() &&
+ (SR.LaneMask & NewMIImplicitOpsMask).none()) {
LLVM_DEBUG(dbgs()
<< "Removing undefined SubRange "
<< PrintLaneMask(SR.LaneMask) << " : " << SR << "\n");
@@ -1857,7 +1863,7 @@ void RegisterCoalescer::addUndefFlag(const LiveInterval &Int, SlotIndex UseIdx,
}
void RegisterCoalescer::updateRegDefsUses(Register SrcReg, Register DstReg,
- unsigned SubIdx) {
+ unsigned SubIdx, bool IsSubregToReg) {
bool DstIsPhys = DstReg.isPhysical();
LiveInterval *DstInt = DstIsPhys ? nullptr : &LIS->getInterval(DstReg);
@@ -1877,6 +1883,14 @@ void RegisterCoalescer::updateRegDefsUses(Register SrcReg, Register DstReg,
}
}
+ // If DstInt already has a subrange for the unused lanes, then we shouldn't
+ // create duplicate subranges when we update the interval for unused lanes.
+ LaneBitmask DefinedLanes;
+ if (DstInt && MRI->shouldTrackSubRegLiveness(DstReg)) {
+ for (LiveInterval::SubRange &SR : DstInt->subranges())
+ DefinedLanes |= SR.LaneMask;
+ }
+
SmallPtrSet<MachineInstr*, 8> Visited;
for (MachineRegisterInfo::reg_instr_iterator
I = MRI->reg_instr_begin(SrcReg), E = MRI->reg_instr_end();
@@ -1900,6 +1914,8 @@ void RegisterCoalescer::updateRegDefsUses(Register SrcReg, Register DstReg,
if (DstInt && !Reads && SubIdx && !UseMI->isDebugInstr())
Reads = DstInt->liveAt(LIS->getInstructionIndex(*UseMI));
+ bool FullDef = true;
+
// Replace SrcReg with DstReg in all UseMI operands.
for (unsigned Op : Ops) {
MachineOperand &MO = UseMI->getOperand(Op);
@@ -1907,8 +1923,10 @@ void RegisterCoalescer::updateRegDefsUses(Register SrcReg, Register DstReg,
// Adjust <undef> flags in case of sub-register joins. We don't want to
// turn a full def into a read-modify-write sub-register def and vice
// versa.
- if (SubIdx && MO.isDef())
+ if (SubIdx && MO.isDef()) {
MO.setIsUndef(!Reads);
+ FullDef = false;
+ }
// A subreg use of a partially undef (super) register may be a complete
// undef use now and then has to be marked that way.
@@ -1941,6 +1959,38 @@ void RegisterCoalescer::updateRegDefsUses(Register SrcReg, Register DstReg,
MO.substVirtReg(DstReg, SubIdx, *TRI);
}
+ if (IsSubregToReg && !FullDef) {
+ // If the coalesed instruction doesn't fully define the register, we need
+ // to preserve the original super register liveness for SUBREG_TO_REG.
+ //
+ // We pretended SUBREG_TO_REG was a regular copy for coalescing purposes,
+ // but it introduces liveness for other subregisters. Downstream users may
+ // have been relying on those bits, so we need to ensure their liveness is
+ // captured with a def of other lanes.
+
+ if (DstInt && MRI->shouldTrackSubRegLiveness(DstReg)) {
+ assert(DstInt->hasSubRanges() &&
+ "SUBREG_TO_REG should have resulted in subrange");
+ LaneBitmask DstMask = MRI->getMaxLaneMaskForVReg(DstInt->reg());
+ LaneBitmask UsedLanes = TRI->getSubRegIndexLaneMask(SubIdx);
+ LaneBitmask UnusedLanes = DstMask & ~UsedLanes & ~DefinedLanes;
+ if ((UnusedLanes).any()) {
+ BumpPtrAllocator &Allocator = LIS->getVNInfoAllocator();
+ DstInt->createSubRangeFrom(Allocator, UnusedLanes, *DstInt);
+ DefinedLanes |= UnusedLanes;
+ }
+ } else if (DstIsPhys) {
+ // Ensure we have a computed liverange for all regunits,
+ // as this is required by the scheduler/regpressure tracker,
+ // see: https://github.com/llvm/llvm-project/issues/76416
+ for (MCRegUnit Unit : TRI->regunits(DstReg))
+ LIS->getRegUnit(Unit);
+ }
+
+ MachineInstrBuilder MIB(*MF, UseMI);
+ MIB.addReg(DstReg, RegState::ImplicitDefine);
+ }
+
LLVM_DEBUG({
dbgs() << "\t\tupdated: ";
if (!UseMI->isDebugInstr())
@@ -2142,6 +2192,8 @@ bool RegisterCoalescer::joinCopy(
});
}
+ const bool IsSubregToReg = CopyMI->isSubregToReg();
+
ShrinkMask = LaneBitmask::getNone();
ShrinkMainRange = false;
@@ -2211,9 +2263,12 @@ bool RegisterCoalescer::joinCopy(
// Rewrite all SrcReg operands to DstReg.
// Also update DstReg operands to include DstIdx if it is set.
- if (CP.getDstIdx())
- updateRegDefsUses(CP.getDstReg(), CP.getDstReg(), CP.getDstIdx());
- updateRegDefsUses(CP.getSrcReg(), CP.getDstReg(), CP.getSrcIdx());
+ if (CP.getDstIdx()) {
+ assert(!IsSubregToReg && "can this happen?");
+ updateRegDefsUses(CP.getDstReg(), CP.getDstReg(), CP.getDstIdx(), false);
+ }
+ updateRegDefsUses(CP.getSrcReg(), CP.getDstReg(), CP.getSrcIdx(),
+ IsSubregToReg);
// Shrink subregister ranges if necessary.
if (ShrinkMask.any()) {
diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/arm64-pcsections.ll b/llvm/test/CodeGen/AArch64/GlobalISel/arm64-pcsections.ll
index c6819ff39ed33e..39446105c455a6 100644
--- a/llvm/test/CodeGen/AArch64/GlobalISel/arm64-pcsections.ll
+++ b/llvm/test/CodeGen/AArch64/GlobalISel/arm64-pcsections.ll
@@ -12,7 +12,7 @@ define i32 @val_compare_and_swap(ptr %p, i32 %cmp, i32 %new) {
; CHECK-NEXT: successors: %bb.2(0x7c000000), %bb.3(0x04000000)
; CHECK-NEXT: liveins: $w1, $w2, $x0
; CHECK-NEXT: {{ $}}
- ; CHECK-NEXT: renamable $w8 = LDAXRW renamable $x0, implicit-def $x8, pcsections !0 :: (volatile load (s32) from %ir.p)
+ ; CHECK-NEXT: renamable $w8 = LDAXRW renamable $x0, implicit-def renamable $x8, pcsections !0 :: (volatile load (s32) from %ir.p)
; CHECK-NEXT: $wzr = SUBSWrs renamable $w8, renamable $w1, 0, implicit-def $nzcv, pcsections !0
; CHECK-NEXT: Bcc 1, %bb.3, implicit killed $nzcv, pcsections !0
; CHECK-NEXT: {{ $}}
@@ -46,13 +46,13 @@ define i32 @val_compare_and_swap_from_load(ptr %p, i32 %cmp, ptr %pnew) {
; CHECK-NEXT: successors: %bb.1(0x80000000)
; CHECK-NEXT: liveins: $w1, $x0, $x2
; CHECK-NEXT: {{ $}}
- ; CHECK-NEXT: renamable $w9 = LDRWui killed renamable $x2, 0, implicit-def $x9, pcsections !0 :: (load (s32) from %ir.pnew)
+ ; CHECK-NEXT: renamable $w9 = LDRWui killed renamable $x2, 0, implicit-def renamable $x9, pcsections !0 :: (load (s32) from %ir.pnew)
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: bb.1.cmpxchg.start:
; CHECK-NEXT: successors: %bb.2(0x7c000000), %bb.3(0x04000000)
; CHECK-NEXT: liveins: $w1, $x0, $x9
; CHECK-NEXT: {{ $}}
- ; CHECK-NEXT: renamable $w8 = LDAXRW renamable $x0, implicit-def $x8, pcsections !0 :: (volatile load (s32) from %ir.p)
+ ; CHECK-NEXT: renamable $w8 = LDAXRW renamable $x0, implicit-def renamable $x8, pcsections !0 :: (volatile load (s32) from %ir.p)
; CHECK-NEXT: $wzr = SUBSWrs renamable $w8, renamable $w1, 0, implicit-def $nzcv, pcsections !0
; CHECK-NEXT: Bcc 1, %bb.3, implicit killed $nzcv, pcsections !0
; CHECK-NEXT: {{ $}}
@@ -91,7 +91,7 @@ define i32 @val_compare_and_swap_rel(ptr %p, i32 %cmp, i32 %new) {
; CHECK-NEXT: successors: %bb.2(0x7c000000), %bb.3(0x04000000)
; CHECK-NEXT: liveins: $w1, $w2, $x0
; CHECK-NEXT: {{ $}}
- ; CHECK-NEXT: renamable $w8 = LDAXRW renamable $x0, implicit-def $x8, pcsections !0 :: (volatile load (s32) from %ir.p)
+ ; CHECK-NEXT: renamable $w8 = LDAXRW renamable $x0, implicit-def renamable $x8, pcsections !0 :: (volatile load (s32) from %ir.p)
; CHECK-NEXT: $wzr = SUBSWrs renamable $w8, renamable $w1, 0, implicit-def $nzcv, pcsections !0
; CHECK-NEXT: Bcc 1, %bb.3, implicit killed $nzcv, pcsections !0
; CHECK-NEXT: {{ $}}
@@ -243,7 +243,7 @@ define i32 @fetch_and_nand(ptr %p) {
; CHECK-NEXT: successors: %bb.1(0x7c000000), %bb.2(0x04000000)
; CHECK-NEXT: liveins: $x0
; CHECK-NEXT: {{ $}}
- ; CHECK-NEXT: renamable $w8 = LDXRW renamable $x0, implicit-def $x8, pcsections !0 :: (volatile load (s32) from %ir.p)
+ ; CHECK-NEXT: renamable $w8 = LDXRW renamable $x0, implicit-def renamable $x8, pcsections !0 :: (volatile load (s32) from %ir.p)
; CHECK-NEXT: renamable $w9 = ANDWri renamable $w8, 2, pcsections !0
; CHECK-NEXT: $w9 = ORNWrs $wzr, killed renamable $w9, 0, pcsections !0
; CHECK-NEXT: early-clobber renamable $w10 = STLXRW killed renamable $w9, renamable $x0, pcsections !0 :: (volatile store (s32) into %ir.p)
@@ -295,7 +295,7 @@ define i32 @fetch_and_or(ptr %p) {
; CHECK-NEXT: successors: %bb.1(0x7c000000), %bb.2(0x04000000)
; CHECK-NEXT: liveins: $w9, $x0
; CHECK-NEXT: {{ $}}
- ; CHECK-NEXT: renamable $w8 = LDAXRW renamable $x0, implicit-def $x8, pcsections !0 :: (volatile load (s32) from %ir.p)
+ ; CHECK-NEXT: renamable $w8 = LDAXRW renamable $x0, implicit-def renamable $x8, pcsections !0 :: (volatile load (s32) from %ir.p)
; CHECK-NEXT: $w10 = ORRWrs renamable $w8, renamable $w9, 0, pcsections !0
; CHECK-NEXT: early-clobber renamable $w11 = STLXRW killed renamable $w10, renamable $x0, pcsections !0 :: (volatile store (s32) into %ir.p)
; CHECK-NEXT: CBNZW killed renamable $w11, %bb.1, pcsections !0
@@ -726,8 +726,8 @@ define i8 @atomicrmw_add_i8(ptr %ptr, i8 %rhs) {
; CHECK-NEXT: successors: %bb.1(0x7c000000), %bb.2(0x04000000)
; CHECK-NEXT: liveins: $w1, $x0
; CHECK-NEXT: {{ $}}
- ; CHECK-NEXT: renamable $w8 = LDAXRB renamable $x0, implicit-def $x8, pcsections !0 :: (volatile load (s8) from %ir.ptr)
- ; CHECK-NEXT: $w9 = ADDWrs renamable $w8, renamable $w1, 0, implicit-def $x9, pcsections !0
+ ; CHECK-NEXT: renamable $w8 = LDAXRB renamable $x0, implicit-def renamable $x8, pcsections !0 :: (volatile load (s8) from %ir.ptr)
+ ; CHECK-NEXT: $w9 = ADDWrs renamable $w8, renamable $w1, 0, implicit-def renamable $x9, pcsections !0
; CHECK-NEXT: early-clobber renamable $w10 = STLXRB renamable $w9, renamable $x0, implicit killed $x9, pcsections !0 :: (volatile store (s8) into %ir.ptr)
; CHECK-NEXT: CBNZW killed renamable $w10, %bb.1, pcsections !0
; CHECK-NEXT: {{ $}}
@@ -752,7 +752,7 @@ define i8 @atomicrmw_xchg_i8(ptr %ptr, i8 %rhs) {
; CHECK-NEXT: successors: %bb.1(0x7c000000), %bb.2(0x04000000)
; CHECK-NEXT: liveins: $x0, $x1
; CHECK-NEXT: {{ $}}
- ; CHECK-NEXT: renamable $w8 = LDXRB renamable $x0, implicit-def $x8, pcsections !0 :: (volatile load (s8) from %ir.ptr)
+ ; CHECK-NEXT: renamable $w8 = LDXRB renamable $x0, implicit-def renamable $x8, pcsections !0 :: (volatile load (s8) from %ir.ptr)
; CHECK-NEXT: early-clobber renamable $w9 = STXRB renamable $w1, renamable $x0, pcsections !0 :: (volatile store (s8) into %ir.ptr)
; CHECK-NEXT: CBNZW killed renamable $w9, %bb.1, pcsections !0
; CHECK-NEXT: {{ $}}
@@ -775,8 +775,8 @@ define i8 @atomicrmw_sub_i8(ptr %ptr, i8 %rhs) {
; CHECK-NEXT: successors: %bb.1(0x7c000000), %bb.2(0x04000000)
; CHECK-NEXT: liveins: $w1, $x0
; CHECK-NEXT: {{ $}}
- ; CHECK-NEXT: renamable $w8 = LDAXRB renamable $x0, implicit-def $x8, pcsections !0 :: (volatile load (s8) from %ir.ptr)
- ; CHECK-NEXT: $w9 = SUBWrs renamable $w8, renamable $w1, 0, implicit-def $x9, pcsections !0
+ ; CHECK-NEXT: renamable $w8 = LDAXRB renamable $x0, implicit-def renamable $x8, pcsections !0 :: (volatile load (s8) from %ir.ptr)
+ ; CHECK-NEXT: $w9 = SUBWrs renamable $w8, renamable $w1, 0, implicit-def renamable $x9, pcsections !0
; CHECK-NEXT: early-clobber renamable $w10 = STXRB renamable $w9, renamable $x0, implicit killed $x9, pcsections !0 :: (volatile store (s8) into %ir.ptr)
; CHECK-NEXT: CBNZW killed renamable $w10, %bb.1, pcsections !0
; CHECK-NEXT: {{ $}}
@@ -799,8 +799,8 @@ define i8 @atomicrmw_and_i8(ptr %ptr, i8 %rhs) {
; CHECK-NEXT: successors: %bb.1(0x7c000000), %bb.2(0x04000000)
; CHECK-NEXT: liveins: $w1, $x0
; CHECK-NEXT: {{ $}}
- ; CHECK-NEXT: renamable $w8 = LDXRB renamable $x0, implicit-def $x8, pcsections !0 :: (volatile load (s8) from %ir.ptr)
- ; CHECK-NEXT: $w9 = ANDWrs renamable $w8, renamable $w1, 0, implicit-def $x9, pcsections !0
+ ; CHECK-NEXT: renamable $w8 = LDXRB renamable $x0, implicit-def renamable $x8, pcsections !0 :: (volatile load (s8) from %ir.ptr)
+ ; CHECK-NEXT: $w9 = ANDWrs renamable $w8, renamable $w1, 0, implicit-def renamable $x9, pcsections !0
; CHECK-NEXT: early-clobber renamable $w10 = STLXRB renamable $w9, renamable $x0, implicit killed $x9, pcsections !0 :: (volatile store (s8) into %ir.ptr)
; CHECK-NEXT: CBNZW killed renamable $w10, %bb.1, pcsections !0
; CHECK-NEXT: {{ $}}
@@ -823,8 +823,8 @@ define i8 @atomicrmw_or_i8(ptr %ptr, i8 %rhs) {
; CHECK-NEXT: successors: %bb.1(0x7c000000), %bb.2(0x04000000)
; CHECK-NEXT: liveins: $w1, $x0
; CHECK-NEXT: {{ $}}
- ; CHECK-NEXT: renamable $w8 = LDAXRB renamable $x0, implicit-def $x8, pcsections !0 :: (volatile load (s8) from %ir.ptr)
- ; CHECK-NEXT: $w9 = ORRWrs renamable $w8, renamable $w1, 0, implicit-def $x9, pcsections !0
+ ; CHECK-NEXT: renamable $w8 = LDAXRB renamable $x0, implicit-def renamable $x8, pcsections !0 :: (volatile load (s8) from %ir.ptr)
+ ; CHECK-NEXT: $w9 = ORRWrs renamable $w8, renamable $w1, 0, implicit-def renamable $x9, pcsections !0
; CHECK-NEXT: early-clobber renamable $w10 = STLXRB renamable $w9, renamable $x0, implicit killed $x9, pcsections !0 :: (volatile store (s8) into %ir.ptr)
; CHECK-NEXT: CBNZW killed renamable $w10, %bb.1, pcsections !0
; CHECK-NEXT: {{ $}}
@@ -847,8 +847,8 @@ define i8 @atomicrmw_xor_i8(ptr %ptr, i8 %rhs) {
; CHECK-NEXT: successors: %bb.1(0x7c000000), %bb.2(0x04000000)
; CHECK-NEXT: liveins: $w1, $x0
; CHECK-NEXT: {{ $}}
- ; CHECK-NEXT: renamable $w8 = LDXRB renamable $x0, implicit-def $x8, pcsections !0 :: (volatile load (s8) from %ir.ptr)
- ; CHECK-NEXT: $w9 = EORWrs renamable $w8, renamable $w1, 0, implicit-def $x9, pcsections !0
+ ; CHECK-NEXT: renamable $w8 = LDXRB renamable $x0, implicit-def renamable $x8, pcsections !0 :: (volatile load (s8) from %ir.ptr)
+ ; CHECK-NEXT: $w9 = EORWrs renamable $w8, renamable $w1, 0, implicit-def renamable $x9, pcsections !0
; CHECK-NEXT: early-clobber renamable $w10 = STXRB renamable $w9, renamable $x0, implicit killed $x9, pcsections !0 :: (volatile store (s8) into %ir.ptr)
; CHECK-NEXT: CBNZW killed renamable $w10, %bb.1, pcsections !0
; CHECK-NEXT: {{ $}}
@@ -871,10 +871,10 @@ define i8 @atomicrmw_min_i8(ptr %ptr, i8 %rhs) {
; CHECK-NEXT: successors: %bb.1(0x7c000000), %bb.2(0x04000000)
; CHECK-NEXT: liveins: $w1, $x0
; CHECK-NEXT: {{ $}}
- ; CHECK-NEXT: renamable $w8 = LDAXRB renamable $x0, implicit-def $x8, pcsections !0 :: (volatile load (s8) from %ir.ptr)
+ ; CHECK-NEXT: renamable $w8 = LDAXRB renamable $x0, implicit-def renamable $x8, pcsections !0 :: (volatile load (s8) from %ir.ptr)
; CHECK-NEXT: renamable $w9 = SBFMWri renamable $w8, 0, 7, pcsections !0
; CHECK-NEXT: dead $wzr = SUBSWrx killed renamable $w9, renamable $w1, 32, implicit-def $nzcv, pcsections !0
- ; CHECK-NEXT: renamable $w9 = CSELWr renamable $w8, renamable $w1, 11, implicit killed $nzcv, implicit-def $x9, pcsections !0
+ ; CHECK-NEXT: renamable $w9 = CSELWr renamable $w8, renamable $w1, 11, implicit killed $nzcv, implicit-def renamable $x9, pcsections !0
; CHECK-NEXT: early-clobber renamable $w10 = STXRB renamable $w9, renamable $x0, implicit killed $x9, pcsections !0 :: (volatile store (s8) into %ir.ptr)
; CHECK-NEXT: CBNZW killed renamable $w10, %bb.1, pcsections !0
; CHECK-NEXT: {{ $}}
@@ -897,10 +897,10 @@ define i8 @atomicrmw_max_i8(ptr %ptr, i8 %rhs) {
; CHECK-NEXT: successors: %bb.1(0x7c000000), %bb.2(0x04000000)
; CHECK-NEXT: liveins: $w1, $x0
; CHECK-NEXT: {{ $}}
- ; CHECK-NEXT: renamable $w8 = LDXRB renamable $x0, implicit-def $x8, pcsections !0 :: (volatile load (s8) from %ir.ptr)
+ ; CHECK-NEXT: renamable $w8 = LDXRB renamable $x0, implicit-def renamable $x8, pcsections !0 :: (volatile load (s8) from %ir.ptr)
; CHECK-NEXT: renamable $w9 = SBFMWri renamable $w8, 0, 7, pcsections !0
; CHECK-NEXT: dead $wzr = SUBSWrx killed renamable $w9, renamable $w1, 32, implicit-def $nzcv, pcsections !0
- ; CHECK-NEXT: renamable $w9 = CSELWr renamable $w8, renamable $w1, 12, implicit killed $nzcv, implicit-def $x9, pcsections !0
+ ; CHECK-NEXT: renamable $w9 = CSELWr renamable $w8, renamable $w1, 12, implicit killed $nzcv, implicit-def renamable $x9, pcsections !0
; CHECK-NEXT: early-clobber renamable $w10 = STLXRB renamable $w9, renamable $x0, implicit killed $x9, pcsections !0 :: (volatile store (s8) into %ir.ptr)
; CHECK-NEXT: CBNZW killed renamable $w10, %bb.1, pcsecti...
[truncated]
|
@llvm/pr-subscribers-backend-powerpc Author: Sander de Smalen (sdesmalen-arm) ChangesThis PR aims to reland work done by @arsenm which was previously reverted This PR cherry-picks the original commit (0e46b49), and adds another
Original patch by @arsenm. Patch is 164.49 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/123632.diff 23 Files Affected:
diff --git a/llvm/lib/CodeGen/RegisterCoalescer.cpp b/llvm/lib/CodeGen/RegisterCoalescer.cpp
index 8313927dd2aa11..86dd044adbf7ff 100644
--- a/llvm/lib/CodeGen/RegisterCoalescer.cpp
+++ b/llvm/lib/CodeGen/RegisterCoalescer.cpp
@@ -306,7 +306,11 @@ namespace {
/// number if it is not zero. If DstReg is a physical register and the
/// existing subregister number of the def / use being updated is not zero,
/// make sure to set it to the correct physical subregister.
- void updateRegDefsUses(Register SrcReg, Register DstReg, unsigned SubIdx);
+ ///
+ /// If \p IsSubregToReg, we are coalescing a DstReg = SUBREG_TO_REG
+ /// SrcReg. This introduces an implicit-def of DstReg on coalesced users.
+ void updateRegDefsUses(Register SrcReg, Register DstReg, unsigned SubIdx,
+ bool IsSubregToReg);
/// If the given machine operand reads only undefined lanes add an undef
/// flag.
@@ -1430,6 +1434,7 @@ bool RegisterCoalescer::reMaterializeTrivialDef(const CoalescerPair &CP,
// CopyMI may have implicit operands, save them so that we can transfer them
// over to the newly materialized instruction after CopyMI is removed.
+ LaneBitmask NewMIImplicitOpsMask;
SmallVector<MachineOperand, 4> ImplicitOps;
ImplicitOps.reserve(CopyMI->getNumOperands() -
CopyMI->getDesc().getNumOperands());
@@ -1443,6 +1448,9 @@ bool RegisterCoalescer::reMaterializeTrivialDef(const CoalescerPair &CP,
(MO.getSubReg() == 0 && MO.getReg() == DstOperand.getReg())) &&
"unexpected implicit virtual register def");
ImplicitOps.push_back(MO);
+ if (MO.isDef() && MO.getReg().isVirtual() &&
+ MRI->shouldTrackSubRegLiveness(DstReg))
+ NewMIImplicitOpsMask |= MRI->getMaxLaneMaskForVReg(MO.getReg());
}
}
@@ -1485,14 +1493,11 @@ bool RegisterCoalescer::reMaterializeTrivialDef(const CoalescerPair &CP,
} else {
assert(MO.getReg() == NewMI.getOperand(0).getReg());
- // We're only expecting another def of the main output, so the range
- // should get updated with the regular output range.
- //
- // FIXME: The range updating below probably needs updating to look at
- // the super register if subranges are tracked.
- assert(!MRI->shouldTrackSubRegLiveness(DstReg) &&
- "subrange update for implicit-def of super register may not be "
- "properly handled");
+ // If lanemasks need to be tracked, compile the lanemask of the NewMI
+ // implicit def operands to avoid subranges for the super-regs from
+ // being removed by code later on in this function.
+ if (MRI->shouldTrackSubRegLiveness(DstReg))
+ NewMIImplicitOpsMask |= MRI->getMaxLaneMaskForVReg(MO.getReg());
}
}
}
@@ -1516,7 +1521,7 @@ bool RegisterCoalescer::reMaterializeTrivialDef(const CoalescerPair &CP,
MRI->setRegClass(DstReg, NewRC);
// Update machine operands and add flags.
- updateRegDefsUses(DstReg, DstReg, DstIdx);
+ updateRegDefsUses(DstReg, DstReg, DstIdx, false);
NewMI.getOperand(0).setSubReg(NewIdx);
// updateRegDefUses can add an "undef" flag to the definition, since
// it will replace DstReg with DstReg.DstIdx. If NewIdx is 0, make
@@ -1592,7 +1597,8 @@ bool RegisterCoalescer::reMaterializeTrivialDef(const CoalescerPair &CP,
CurrIdx.getRegSlot(NewMI.getOperand(0).isEarlyClobber());
VNInfo::Allocator &Alloc = LIS->getVNInfoAllocator();
for (LiveInterval::SubRange &SR : DstInt.subranges()) {
- if ((SR.LaneMask & DstMask).none()) {
+ if ((SR.LaneMask & DstMask).none() &&
+ (SR.LaneMask & NewMIImplicitOpsMask).none()) {
LLVM_DEBUG(dbgs()
<< "Removing undefined SubRange "
<< PrintLaneMask(SR.LaneMask) << " : " << SR << "\n");
@@ -1857,7 +1863,7 @@ void RegisterCoalescer::addUndefFlag(const LiveInterval &Int, SlotIndex UseIdx,
}
void RegisterCoalescer::updateRegDefsUses(Register SrcReg, Register DstReg,
- unsigned SubIdx) {
+ unsigned SubIdx, bool IsSubregToReg) {
bool DstIsPhys = DstReg.isPhysical();
LiveInterval *DstInt = DstIsPhys ? nullptr : &LIS->getInterval(DstReg);
@@ -1877,6 +1883,14 @@ void RegisterCoalescer::updateRegDefsUses(Register SrcReg, Register DstReg,
}
}
+ // If DstInt already has a subrange for the unused lanes, then we shouldn't
+ // create duplicate subranges when we update the interval for unused lanes.
+ LaneBitmask DefinedLanes;
+ if (DstInt && MRI->shouldTrackSubRegLiveness(DstReg)) {
+ for (LiveInterval::SubRange &SR : DstInt->subranges())
+ DefinedLanes |= SR.LaneMask;
+ }
+
SmallPtrSet<MachineInstr*, 8> Visited;
for (MachineRegisterInfo::reg_instr_iterator
I = MRI->reg_instr_begin(SrcReg), E = MRI->reg_instr_end();
@@ -1900,6 +1914,8 @@ void RegisterCoalescer::updateRegDefsUses(Register SrcReg, Register DstReg,
if (DstInt && !Reads && SubIdx && !UseMI->isDebugInstr())
Reads = DstInt->liveAt(LIS->getInstructionIndex(*UseMI));
+ bool FullDef = true;
+
// Replace SrcReg with DstReg in all UseMI operands.
for (unsigned Op : Ops) {
MachineOperand &MO = UseMI->getOperand(Op);
@@ -1907,8 +1923,10 @@ void RegisterCoalescer::updateRegDefsUses(Register SrcReg, Register DstReg,
// Adjust <undef> flags in case of sub-register joins. We don't want to
// turn a full def into a read-modify-write sub-register def and vice
// versa.
- if (SubIdx && MO.isDef())
+ if (SubIdx && MO.isDef()) {
MO.setIsUndef(!Reads);
+ FullDef = false;
+ }
// A subreg use of a partially undef (super) register may be a complete
// undef use now and then has to be marked that way.
@@ -1941,6 +1959,38 @@ void RegisterCoalescer::updateRegDefsUses(Register SrcReg, Register DstReg,
MO.substVirtReg(DstReg, SubIdx, *TRI);
}
+ if (IsSubregToReg && !FullDef) {
+ // If the coalesed instruction doesn't fully define the register, we need
+ // to preserve the original super register liveness for SUBREG_TO_REG.
+ //
+ // We pretended SUBREG_TO_REG was a regular copy for coalescing purposes,
+ // but it introduces liveness for other subregisters. Downstream users may
+ // have been relying on those bits, so we need to ensure their liveness is
+ // captured with a def of other lanes.
+
+ if (DstInt && MRI->shouldTrackSubRegLiveness(DstReg)) {
+ assert(DstInt->hasSubRanges() &&
+ "SUBREG_TO_REG should have resulted in subrange");
+ LaneBitmask DstMask = MRI->getMaxLaneMaskForVReg(DstInt->reg());
+ LaneBitmask UsedLanes = TRI->getSubRegIndexLaneMask(SubIdx);
+ LaneBitmask UnusedLanes = DstMask & ~UsedLanes & ~DefinedLanes;
+ if ((UnusedLanes).any()) {
+ BumpPtrAllocator &Allocator = LIS->getVNInfoAllocator();
+ DstInt->createSubRangeFrom(Allocator, UnusedLanes, *DstInt);
+ DefinedLanes |= UnusedLanes;
+ }
+ } else if (DstIsPhys) {
+ // Ensure we have a computed liverange for all regunits,
+ // as this is required by the scheduler/regpressure tracker,
+ // see: https://github.com/llvm/llvm-project/issues/76416
+ for (MCRegUnit Unit : TRI->regunits(DstReg))
+ LIS->getRegUnit(Unit);
+ }
+
+ MachineInstrBuilder MIB(*MF, UseMI);
+ MIB.addReg(DstReg, RegState::ImplicitDefine);
+ }
+
LLVM_DEBUG({
dbgs() << "\t\tupdated: ";
if (!UseMI->isDebugInstr())
@@ -2142,6 +2192,8 @@ bool RegisterCoalescer::joinCopy(
});
}
+ const bool IsSubregToReg = CopyMI->isSubregToReg();
+
ShrinkMask = LaneBitmask::getNone();
ShrinkMainRange = false;
@@ -2211,9 +2263,12 @@ bool RegisterCoalescer::joinCopy(
// Rewrite all SrcReg operands to DstReg.
// Also update DstReg operands to include DstIdx if it is set.
- if (CP.getDstIdx())
- updateRegDefsUses(CP.getDstReg(), CP.getDstReg(), CP.getDstIdx());
- updateRegDefsUses(CP.getSrcReg(), CP.getDstReg(), CP.getSrcIdx());
+ if (CP.getDstIdx()) {
+ assert(!IsSubregToReg && "can this happen?");
+ updateRegDefsUses(CP.getDstReg(), CP.getDstReg(), CP.getDstIdx(), false);
+ }
+ updateRegDefsUses(CP.getSrcReg(), CP.getDstReg(), CP.getSrcIdx(),
+ IsSubregToReg);
// Shrink subregister ranges if necessary.
if (ShrinkMask.any()) {
diff --git a/llvm/test/CodeGen/AArch64/GlobalISel/arm64-pcsections.ll b/llvm/test/CodeGen/AArch64/GlobalISel/arm64-pcsections.ll
index c6819ff39ed33e..39446105c455a6 100644
--- a/llvm/test/CodeGen/AArch64/GlobalISel/arm64-pcsections.ll
+++ b/llvm/test/CodeGen/AArch64/GlobalISel/arm64-pcsections.ll
@@ -12,7 +12,7 @@ define i32 @val_compare_and_swap(ptr %p, i32 %cmp, i32 %new) {
; CHECK-NEXT: successors: %bb.2(0x7c000000), %bb.3(0x04000000)
; CHECK-NEXT: liveins: $w1, $w2, $x0
; CHECK-NEXT: {{ $}}
- ; CHECK-NEXT: renamable $w8 = LDAXRW renamable $x0, implicit-def $x8, pcsections !0 :: (volatile load (s32) from %ir.p)
+ ; CHECK-NEXT: renamable $w8 = LDAXRW renamable $x0, implicit-def renamable $x8, pcsections !0 :: (volatile load (s32) from %ir.p)
; CHECK-NEXT: $wzr = SUBSWrs renamable $w8, renamable $w1, 0, implicit-def $nzcv, pcsections !0
; CHECK-NEXT: Bcc 1, %bb.3, implicit killed $nzcv, pcsections !0
; CHECK-NEXT: {{ $}}
@@ -46,13 +46,13 @@ define i32 @val_compare_and_swap_from_load(ptr %p, i32 %cmp, ptr %pnew) {
; CHECK-NEXT: successors: %bb.1(0x80000000)
; CHECK-NEXT: liveins: $w1, $x0, $x2
; CHECK-NEXT: {{ $}}
- ; CHECK-NEXT: renamable $w9 = LDRWui killed renamable $x2, 0, implicit-def $x9, pcsections !0 :: (load (s32) from %ir.pnew)
+ ; CHECK-NEXT: renamable $w9 = LDRWui killed renamable $x2, 0, implicit-def renamable $x9, pcsections !0 :: (load (s32) from %ir.pnew)
; CHECK-NEXT: {{ $}}
; CHECK-NEXT: bb.1.cmpxchg.start:
; CHECK-NEXT: successors: %bb.2(0x7c000000), %bb.3(0x04000000)
; CHECK-NEXT: liveins: $w1, $x0, $x9
; CHECK-NEXT: {{ $}}
- ; CHECK-NEXT: renamable $w8 = LDAXRW renamable $x0, implicit-def $x8, pcsections !0 :: (volatile load (s32) from %ir.p)
+ ; CHECK-NEXT: renamable $w8 = LDAXRW renamable $x0, implicit-def renamable $x8, pcsections !0 :: (volatile load (s32) from %ir.p)
; CHECK-NEXT: $wzr = SUBSWrs renamable $w8, renamable $w1, 0, implicit-def $nzcv, pcsections !0
; CHECK-NEXT: Bcc 1, %bb.3, implicit killed $nzcv, pcsections !0
; CHECK-NEXT: {{ $}}
@@ -91,7 +91,7 @@ define i32 @val_compare_and_swap_rel(ptr %p, i32 %cmp, i32 %new) {
; CHECK-NEXT: successors: %bb.2(0x7c000000), %bb.3(0x04000000)
; CHECK-NEXT: liveins: $w1, $w2, $x0
; CHECK-NEXT: {{ $}}
- ; CHECK-NEXT: renamable $w8 = LDAXRW renamable $x0, implicit-def $x8, pcsections !0 :: (volatile load (s32) from %ir.p)
+ ; CHECK-NEXT: renamable $w8 = LDAXRW renamable $x0, implicit-def renamable $x8, pcsections !0 :: (volatile load (s32) from %ir.p)
; CHECK-NEXT: $wzr = SUBSWrs renamable $w8, renamable $w1, 0, implicit-def $nzcv, pcsections !0
; CHECK-NEXT: Bcc 1, %bb.3, implicit killed $nzcv, pcsections !0
; CHECK-NEXT: {{ $}}
@@ -243,7 +243,7 @@ define i32 @fetch_and_nand(ptr %p) {
; CHECK-NEXT: successors: %bb.1(0x7c000000), %bb.2(0x04000000)
; CHECK-NEXT: liveins: $x0
; CHECK-NEXT: {{ $}}
- ; CHECK-NEXT: renamable $w8 = LDXRW renamable $x0, implicit-def $x8, pcsections !0 :: (volatile load (s32) from %ir.p)
+ ; CHECK-NEXT: renamable $w8 = LDXRW renamable $x0, implicit-def renamable $x8, pcsections !0 :: (volatile load (s32) from %ir.p)
; CHECK-NEXT: renamable $w9 = ANDWri renamable $w8, 2, pcsections !0
; CHECK-NEXT: $w9 = ORNWrs $wzr, killed renamable $w9, 0, pcsections !0
; CHECK-NEXT: early-clobber renamable $w10 = STLXRW killed renamable $w9, renamable $x0, pcsections !0 :: (volatile store (s32) into %ir.p)
@@ -295,7 +295,7 @@ define i32 @fetch_and_or(ptr %p) {
; CHECK-NEXT: successors: %bb.1(0x7c000000), %bb.2(0x04000000)
; CHECK-NEXT: liveins: $w9, $x0
; CHECK-NEXT: {{ $}}
- ; CHECK-NEXT: renamable $w8 = LDAXRW renamable $x0, implicit-def $x8, pcsections !0 :: (volatile load (s32) from %ir.p)
+ ; CHECK-NEXT: renamable $w8 = LDAXRW renamable $x0, implicit-def renamable $x8, pcsections !0 :: (volatile load (s32) from %ir.p)
; CHECK-NEXT: $w10 = ORRWrs renamable $w8, renamable $w9, 0, pcsections !0
; CHECK-NEXT: early-clobber renamable $w11 = STLXRW killed renamable $w10, renamable $x0, pcsections !0 :: (volatile store (s32) into %ir.p)
; CHECK-NEXT: CBNZW killed renamable $w11, %bb.1, pcsections !0
@@ -726,8 +726,8 @@ define i8 @atomicrmw_add_i8(ptr %ptr, i8 %rhs) {
; CHECK-NEXT: successors: %bb.1(0x7c000000), %bb.2(0x04000000)
; CHECK-NEXT: liveins: $w1, $x0
; CHECK-NEXT: {{ $}}
- ; CHECK-NEXT: renamable $w8 = LDAXRB renamable $x0, implicit-def $x8, pcsections !0 :: (volatile load (s8) from %ir.ptr)
- ; CHECK-NEXT: $w9 = ADDWrs renamable $w8, renamable $w1, 0, implicit-def $x9, pcsections !0
+ ; CHECK-NEXT: renamable $w8 = LDAXRB renamable $x0, implicit-def renamable $x8, pcsections !0 :: (volatile load (s8) from %ir.ptr)
+ ; CHECK-NEXT: $w9 = ADDWrs renamable $w8, renamable $w1, 0, implicit-def renamable $x9, pcsections !0
; CHECK-NEXT: early-clobber renamable $w10 = STLXRB renamable $w9, renamable $x0, implicit killed $x9, pcsections !0 :: (volatile store (s8) into %ir.ptr)
; CHECK-NEXT: CBNZW killed renamable $w10, %bb.1, pcsections !0
; CHECK-NEXT: {{ $}}
@@ -752,7 +752,7 @@ define i8 @atomicrmw_xchg_i8(ptr %ptr, i8 %rhs) {
; CHECK-NEXT: successors: %bb.1(0x7c000000), %bb.2(0x04000000)
; CHECK-NEXT: liveins: $x0, $x1
; CHECK-NEXT: {{ $}}
- ; CHECK-NEXT: renamable $w8 = LDXRB renamable $x0, implicit-def $x8, pcsections !0 :: (volatile load (s8) from %ir.ptr)
+ ; CHECK-NEXT: renamable $w8 = LDXRB renamable $x0, implicit-def renamable $x8, pcsections !0 :: (volatile load (s8) from %ir.ptr)
; CHECK-NEXT: early-clobber renamable $w9 = STXRB renamable $w1, renamable $x0, pcsections !0 :: (volatile store (s8) into %ir.ptr)
; CHECK-NEXT: CBNZW killed renamable $w9, %bb.1, pcsections !0
; CHECK-NEXT: {{ $}}
@@ -775,8 +775,8 @@ define i8 @atomicrmw_sub_i8(ptr %ptr, i8 %rhs) {
; CHECK-NEXT: successors: %bb.1(0x7c000000), %bb.2(0x04000000)
; CHECK-NEXT: liveins: $w1, $x0
; CHECK-NEXT: {{ $}}
- ; CHECK-NEXT: renamable $w8 = LDAXRB renamable $x0, implicit-def $x8, pcsections !0 :: (volatile load (s8) from %ir.ptr)
- ; CHECK-NEXT: $w9 = SUBWrs renamable $w8, renamable $w1, 0, implicit-def $x9, pcsections !0
+ ; CHECK-NEXT: renamable $w8 = LDAXRB renamable $x0, implicit-def renamable $x8, pcsections !0 :: (volatile load (s8) from %ir.ptr)
+ ; CHECK-NEXT: $w9 = SUBWrs renamable $w8, renamable $w1, 0, implicit-def renamable $x9, pcsections !0
; CHECK-NEXT: early-clobber renamable $w10 = STXRB renamable $w9, renamable $x0, implicit killed $x9, pcsections !0 :: (volatile store (s8) into %ir.ptr)
; CHECK-NEXT: CBNZW killed renamable $w10, %bb.1, pcsections !0
; CHECK-NEXT: {{ $}}
@@ -799,8 +799,8 @@ define i8 @atomicrmw_and_i8(ptr %ptr, i8 %rhs) {
; CHECK-NEXT: successors: %bb.1(0x7c000000), %bb.2(0x04000000)
; CHECK-NEXT: liveins: $w1, $x0
; CHECK-NEXT: {{ $}}
- ; CHECK-NEXT: renamable $w8 = LDXRB renamable $x0, implicit-def $x8, pcsections !0 :: (volatile load (s8) from %ir.ptr)
- ; CHECK-NEXT: $w9 = ANDWrs renamable $w8, renamable $w1, 0, implicit-def $x9, pcsections !0
+ ; CHECK-NEXT: renamable $w8 = LDXRB renamable $x0, implicit-def renamable $x8, pcsections !0 :: (volatile load (s8) from %ir.ptr)
+ ; CHECK-NEXT: $w9 = ANDWrs renamable $w8, renamable $w1, 0, implicit-def renamable $x9, pcsections !0
; CHECK-NEXT: early-clobber renamable $w10 = STLXRB renamable $w9, renamable $x0, implicit killed $x9, pcsections !0 :: (volatile store (s8) into %ir.ptr)
; CHECK-NEXT: CBNZW killed renamable $w10, %bb.1, pcsections !0
; CHECK-NEXT: {{ $}}
@@ -823,8 +823,8 @@ define i8 @atomicrmw_or_i8(ptr %ptr, i8 %rhs) {
; CHECK-NEXT: successors: %bb.1(0x7c000000), %bb.2(0x04000000)
; CHECK-NEXT: liveins: $w1, $x0
; CHECK-NEXT: {{ $}}
- ; CHECK-NEXT: renamable $w8 = LDAXRB renamable $x0, implicit-def $x8, pcsections !0 :: (volatile load (s8) from %ir.ptr)
- ; CHECK-NEXT: $w9 = ORRWrs renamable $w8, renamable $w1, 0, implicit-def $x9, pcsections !0
+ ; CHECK-NEXT: renamable $w8 = LDAXRB renamable $x0, implicit-def renamable $x8, pcsections !0 :: (volatile load (s8) from %ir.ptr)
+ ; CHECK-NEXT: $w9 = ORRWrs renamable $w8, renamable $w1, 0, implicit-def renamable $x9, pcsections !0
; CHECK-NEXT: early-clobber renamable $w10 = STLXRB renamable $w9, renamable $x0, implicit killed $x9, pcsections !0 :: (volatile store (s8) into %ir.ptr)
; CHECK-NEXT: CBNZW killed renamable $w10, %bb.1, pcsections !0
; CHECK-NEXT: {{ $}}
@@ -847,8 +847,8 @@ define i8 @atomicrmw_xor_i8(ptr %ptr, i8 %rhs) {
; CHECK-NEXT: successors: %bb.1(0x7c000000), %bb.2(0x04000000)
; CHECK-NEXT: liveins: $w1, $x0
; CHECK-NEXT: {{ $}}
- ; CHECK-NEXT: renamable $w8 = LDXRB renamable $x0, implicit-def $x8, pcsections !0 :: (volatile load (s8) from %ir.ptr)
- ; CHECK-NEXT: $w9 = EORWrs renamable $w8, renamable $w1, 0, implicit-def $x9, pcsections !0
+ ; CHECK-NEXT: renamable $w8 = LDXRB renamable $x0, implicit-def renamable $x8, pcsections !0 :: (volatile load (s8) from %ir.ptr)
+ ; CHECK-NEXT: $w9 = EORWrs renamable $w8, renamable $w1, 0, implicit-def renamable $x9, pcsections !0
; CHECK-NEXT: early-clobber renamable $w10 = STXRB renamable $w9, renamable $x0, implicit killed $x9, pcsections !0 :: (volatile store (s8) into %ir.ptr)
; CHECK-NEXT: CBNZW killed renamable $w10, %bb.1, pcsections !0
; CHECK-NEXT: {{ $}}
@@ -871,10 +871,10 @@ define i8 @atomicrmw_min_i8(ptr %ptr, i8 %rhs) {
; CHECK-NEXT: successors: %bb.1(0x7c000000), %bb.2(0x04000000)
; CHECK-NEXT: liveins: $w1, $x0
; CHECK-NEXT: {{ $}}
- ; CHECK-NEXT: renamable $w8 = LDAXRB renamable $x0, implicit-def $x8, pcsections !0 :: (volatile load (s8) from %ir.ptr)
+ ; CHECK-NEXT: renamable $w8 = LDAXRB renamable $x0, implicit-def renamable $x8, pcsections !0 :: (volatile load (s8) from %ir.ptr)
; CHECK-NEXT: renamable $w9 = SBFMWri renamable $w8, 0, 7, pcsections !0
; CHECK-NEXT: dead $wzr = SUBSWrx killed renamable $w9, renamable $w1, 32, implicit-def $nzcv, pcsections !0
- ; CHECK-NEXT: renamable $w9 = CSELWr renamable $w8, renamable $w1, 11, implicit killed $nzcv, implicit-def $x9, pcsections !0
+ ; CHECK-NEXT: renamable $w9 = CSELWr renamable $w8, renamable $w1, 11, implicit killed $nzcv, implicit-def renamable $x9, pcsections !0
; CHECK-NEXT: early-clobber renamable $w10 = STXRB renamable $w9, renamable $x0, implicit killed $x9, pcsections !0 :: (volatile store (s8) into %ir.ptr)
; CHECK-NEXT: CBNZW killed renamable $w10, %bb.1, pcsections !0
; CHECK-NEXT: {{ $}}
@@ -897,10 +897,10 @@ define i8 @atomicrmw_max_i8(ptr %ptr, i8 %rhs) {
; CHECK-NEXT: successors: %bb.1(0x7c000000), %bb.2(0x04000000)
; CHECK-NEXT: liveins: $w1, $x0
; CHECK-NEXT: {{ $}}
- ; CHECK-NEXT: renamable $w8 = LDXRB renamable $x0, implicit-def $x8, pcsections !0 :: (volatile load (s8) from %ir.ptr)
+ ; CHECK-NEXT: renamable $w8 = LDXRB renamable $x0, implicit-def renamable $x8, pcsections !0 :: (volatile load (s8) from %ir.ptr)
; CHECK-NEXT: renamable $w9 = SBFMWri renamable $w8, 0, 7, pcsections !0
; CHECK-NEXT: dead $wzr = SUBSWrx killed renamable $w9, renamable $w1, 32, implicit-def $nzcv, pcsections !0
- ; CHECK-NEXT: renamable $w9 = CSELWr renamable $w8, renamable $w1, 12, implicit killed $nzcv, implicit-def $x9, pcsections !0
+ ; CHECK-NEXT: renamable $w9 = CSELWr renamable $w8, renamable $w1, 12, implicit killed $nzcv, implicit-def renamable $x9, pcsections !0
; CHECK-NEXT: early-clobber renamable $w10 = STLXRB renamable $w9, renamable $x0, implicit killed $x9, pcsections !0 :: (volatile store (s8) into %ir.ptr)
; CHECK-NEXT: CBNZW killed renamable $w10, %bb.1, pcsecti...
[truncated]
|
✅ With the latest revision this PR passed the undef deprecator. |
Changes made compared to 0e46b49: * The code in `updateRegDefsUses` now updates subranges when subreg-liveness-tracking is enabled. * When adding an implicit-def operand for the super-register, the code in `reMaterializeTrivialDef` which tries to remove undefined subranges should now take into account that the lanes from the super-reg are no longer undefined. * Any operand that defines a register with a sub-reg, must set FullDef to false, because it only defines part of the register. * Ensures that live-ranges are computed for all regunits of a physical reg that's added as implicit-def, as needed for subsequent passes, see the conversation on: llvm#76416
3332f15
to
f5266f5
Compare
Can you add the reported regression test? |
if (MO.isDef() && MO.getReg().isVirtual() && | ||
MRI->shouldTrackSubRegLiveness(DstReg)) | ||
NewMIImplicitOpsMask |= MRI->getMaxLaneMaskForVReg(MO.getReg()); |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Why is this mixing MO.getReg() and DstReg?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Good catch, that was a mistake; there is no requirement for the implicit-def of MO.getReg()
to match DstReg
.
Adds reproducer to show previous regressions are gone. Also remove code that calculates live ranges for physregs, as I don't have any tests that cover this case, and the previous reproducers don't trigger this code. This suggests to me that the code in llvm#121734 may be sufficient to fix this issue.
llvm/test/CodeGen/X86/pr76416.ll
Outdated
call void asm sideeffect "", "N{dx},~{dirflag},~{fpsr},~{flags}"(i32 poison) nounwind | ||
%2 = load ptr, ptr @load_p, align 8 | ||
%regs = getelementptr inbounds { [4 x i8] }, ptr %2, i32 0, i32 0 | ||
%3 = load i32, ptr %i, align 4 |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Use named values in tests
llvm/test/CodeGen/X86/pr76416.ll
Outdated
; Related reproducer as reported on https://github.com/llvm/llvm-project/commit/0e46b49de43349f8cbb2a7d4c6badef6d16e31ae#commitcomment-136147998 | ||
; | ||
|
||
define void @f(i1 %cmp.not.i.i.i) { |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I think this one is the same as llvm/test/CodeGen/X86/coalescer-add-implicit-def-subreg-to-reg-regression.ll
When the RegisterCoalescer adds an implicit-def when coalescing a SUBREG_TO_REG (llvm#123632), this causes issues when removing other COPY nodes by commuting the instruction because it doesn't take the implicit-def into consideration. This PR fixes that.
When the RegisterCoalescer adds an implicit-def when coalescing a SUBREG_TO_REG (#123632), this causes issues when removing other COPY nodes by commuting the instruction because it doesn't take the implicit-def into consideration. This PR fixes that.
This PR aims to reland work done by @arsenm which was previously reverted
due to some tangentially related scheduler issues as discussed on #76416.
This PR cherry-picks the original commit (0e46b49), and adds another
patch on top with the following changes:
The code in
updateRegDefsUses
now updates subranges whensubreg-liveness-tracking is enabled.
When adding an implicit-def operand for the super-register,
the code in
reMaterializeTrivialDef
which tries to removeundefined subranges should now take into account that the lanes
from the super-reg are no longer undefined.
Original patch by @arsenm.