Skip to content

[AArch64] Initial compiler support for SVE unwind on Windows. #138609

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Open
wants to merge 6 commits into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
26 changes: 26 additions & 0 deletions llvm/lib/Target/AArch64/AArch64AsmPrinter.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -3272,6 +3272,32 @@ void AArch64AsmPrinter::emitInstruction(const MachineInstr *MI) {
-MI->getOperand(2).getImm());
return;

case AArch64::SEH_AllocZ:
assert(MI->getOperand(0).getImm() >= 0 &&
"AllocZ SEH opcode offset must be non-negative");
assert(MI->getOperand(0).getImm() <= 255 &&
"AllocZ SEH opcode offset must fit into 8 bits");
TS->emitARM64WinCFIAllocZ(MI->getOperand(0).getImm());
return;

case AArch64::SEH_SaveZReg:
assert(MI->getOperand(1).getImm() >= 0 &&
"SaveZReg SEH opcode offset must be non-negative");
assert(MI->getOperand(1).getImm() <= 255 &&
"SaveZReg SEH opcode offset must fit into 8 bits");
TS->emitARM64WinCFISaveZReg(MI->getOperand(0).getImm(),
MI->getOperand(1).getImm());
return;

case AArch64::SEH_SavePReg:
assert(MI->getOperand(1).getImm() >= 0 &&
"SavePReg SEH opcode offset must be non-negative");
assert(MI->getOperand(1).getImm() <= 255 &&
"SavePReg SEH opcode offset must fit into 8 bits");
TS->emitARM64WinCFISavePReg(MI->getOperand(0).getImm(),
MI->getOperand(1).getImm());
return;

case AArch64::BLR:
case AArch64::BR: {
recordIfImportCall(MI);
Expand Down
8 changes: 8 additions & 0 deletions llvm/lib/Target/AArch64/AArch64CallingConvention.td
Original file line number Diff line number Diff line change
Expand Up @@ -606,6 +606,9 @@ def CSR_Win_AArch64_Arm64EC_Thunk : CalleeSavedRegs<(add (sequence "Q%u", 6, 15)
def CSR_AArch64_AAVPCS : CalleeSavedRegs<(add X19, X20, X21, X22, X23, X24,
X25, X26, X27, X28, LR, FP,
(sequence "Q%u", 8, 23))>;
def CSR_Win_AArch64_AAVPCS : CalleeSavedRegs<(add X19, X20, X21, X22, X23, X24,
X25, X26, X27, X28, FP, LR,
(sequence "Q%u", 8, 23))>;

// Functions taking SVE arguments or returning an SVE type
// must (additionally) preserve full Z8-Z23 and predicate registers P4-P15
Expand All @@ -619,6 +622,11 @@ def CSR_Darwin_AArch64_SVE_AAPCS : CalleeSavedRegs<(add (sequence "Z%u", 8, 23),
LR, FP, X19, X20, X21, X22,
X23, X24, X25, X26, X27, X28)>;

def CSR_Win_AArch64_SVE_AAPCS : CalleeSavedRegs<(add (sequence "P%u", 4, 11),
(sequence "Z%u", 8, 23),
X19, X20, X21, X22, X23, X24,
X25, X26, X27, X28, FP, LR)>;

// SME ABI support routines such as __arm_tpidr2_save/restore preserve most registers.
def CSR_AArch64_SME_ABI_Support_Routines_PreserveMost_From_X0
: CalleeSavedRegs<(add (sequence "Z%u", 0, 31),
Expand Down
178 changes: 158 additions & 20 deletions llvm/lib/Target/AArch64/AArch64FrameLowering.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -44,6 +44,10 @@
// | |
// |-----------------------------------|
// | |
// | (Win64 only) callee-saved SVE reg |
// | |
// |-----------------------------------|
// | |
// | callee-saved gpr registers | <--.
// | | | On Darwin platforms these
// |- - - - - - - - - - - - - - - - - -| | callee saves are swapped,
Expand Down Expand Up @@ -1200,7 +1204,25 @@ static MachineBasicBlock::iterator InsertSEH(MachineBasicBlock::iterator MBBI,

switch (Opc) {
default:
llvm_unreachable("No SEH Opcode for this instruction");
report_fatal_error("No SEH Opcode for this instruction");
case AArch64::STR_ZXI:
case AArch64::LDR_ZXI: {
unsigned Reg0 = RegInfo->getSEHRegNum(MBBI->getOperand(0).getReg());
MIB = BuildMI(MF, DL, TII.get(AArch64::SEH_SaveZReg))
.addImm(Reg0)
.addImm(Imm)
.setMIFlag(Flag);
break;
}
case AArch64::STR_PXI:
case AArch64::LDR_PXI: {
unsigned Reg0 = RegInfo->getSEHRegNum(MBBI->getOperand(0).getReg());
MIB = BuildMI(MF, DL, TII.get(AArch64::SEH_SavePReg))
.addImm(Reg0)
.addImm(Imm)
.setMIFlag(Flag);
break;
}
case AArch64::LDPDpost:
Imm = -Imm;
[[fallthrough]];
Expand Down Expand Up @@ -1592,6 +1614,9 @@ static bool IsSVECalleeSave(MachineBasicBlock::iterator I) {
case AArch64::CMPNE_PPzZI_B:
return I->getFlag(MachineInstr::FrameSetup) ||
I->getFlag(MachineInstr::FrameDestroy);
case AArch64::SEH_SavePReg:
case AArch64::SEH_SaveZReg:
return true;
}
}

Expand Down Expand Up @@ -1874,12 +1899,48 @@ void AArch64FrameLowering::emitPrologue(MachineFunction &MF,
bool IsWin64 = Subtarget.isCallingConvWin64(F.getCallingConv(), F.isVarArg());
unsigned FixedObject = getFixedObjectSize(MF, AFI, IsWin64, IsFunclet);

// Windows unwind can't represent the required stack adjustments if we have
// both SVE callee-saves and dynamic stack allocations, and the frame
// pointer is before the SVE spills. The allocation of the frame pointer
// must be the last instruction in the prologue so the unwinder can restore
// the stack pointer correctly. (And there isn't any unwind opcode for
// `addvl sp, x29, -17`.)
//
// Because of this, we do spills in the opposite order on Windows: first SVE,
// then GPRs. The main side-effect of this is that it makes accessing
// parameters passed on the stack more expensive.
//
// We could consider rearranging the spills for simpler cases.
bool FPAfterSVECalleeSaves =
Subtarget.isTargetWindows() && AFI->getSVECalleeSavedStackSize();

auto PrologueSaveSize = AFI->getCalleeSavedStackSize() + FixedObject;
// All of the remaining stack allocations are for locals.
AFI->setLocalStackSize(NumBytes - PrologueSaveSize);
bool CombineSPBump = shouldCombineCSRLocalStackBump(MF, NumBytes);
bool HomPrologEpilog = homogeneousPrologEpilog(MF);
if (CombineSPBump) {
if (FPAfterSVECalleeSaves) {
// If we're doing SVE saves first, we need to immediately allocate space
// for fixed objects, then space for the SVE callee saves.
//
// Windows unwind requires that the scalable size is a multiple of 16;
// that's handled when the callee-saved size is computed.
auto SaveSize =
StackOffset::getScalable(AFI->getSVECalleeSavedStackSize()) +
StackOffset::getFixed(FixedObject);
allocateStackSpace(MBB, MBBI, 0, SaveSize, NeedsWinCFI, &HasWinCFI,
/*EmitCFI=*/false, StackOffset{},
/*FollowupAllocs=*/true);
NumBytes -= FixedObject;

// Now allocate space for the GPR callee saves.
while (MBBI != End && IsSVECalleeSave(MBBI))
++MBBI;
MBBI = convertCalleeSaveRestoreToSPPrePostIncDec(
MBB, MBBI, DL, TII, -AFI->getCalleeSavedStackSize(), NeedsWinCFI,
&HasWinCFI, EmitAsyncCFI);
NumBytes -= AFI->getCalleeSavedStackSize();
} else if (CombineSPBump) {
assert(!SVEStackSize && "Cannot combine SP bump with SVE");
emitFrameOffset(MBB, MBBI, DL, AArch64::SP, AArch64::SP,
StackOffset::getFixed(-NumBytes), TII,
Expand Down Expand Up @@ -1982,6 +2043,9 @@ void AArch64FrameLowering::emitPrologue(MachineFunction &MF,
: 0;

if (windowsRequiresStackProbe(MF, NumBytes + RealignmentPadding)) {
if (AFI->getSVECalleeSavedStackSize())
report_fatal_error(
"SVE callee saves not yet supported with stack probing");
uint64_t NumWords = (NumBytes + RealignmentPadding) >> 4;
if (NeedsWinCFI) {
HasWinCFI = true;
Expand Down Expand Up @@ -2116,9 +2180,11 @@ void AArch64FrameLowering::emitPrologue(MachineFunction &MF,
<< "\n");
// Find callee save instructions in frame.
CalleeSavesBegin = MBBI;
assert(IsSVECalleeSave(CalleeSavesBegin) && "Unexpected instruction");
while (IsSVECalleeSave(MBBI) && MBBI != MBB.getFirstTerminator())
++MBBI;
if (!FPAfterSVECalleeSaves) {
assert(IsSVECalleeSave(CalleeSavesBegin) && "Unexpected instruction");
while (IsSVECalleeSave(MBBI) && MBBI != MBB.getFirstTerminator())
++MBBI;
}
CalleeSavesEnd = MBBI;

SVECalleeSavesSize = StackOffset::getScalable(CalleeSavedSize);
Expand All @@ -2129,9 +2195,11 @@ void AArch64FrameLowering::emitPrologue(MachineFunction &MF,
StackOffset CFAOffset =
StackOffset::getFixed((int64_t)MFI.getStackSize() - NumBytes);
StackOffset LocalsSize = SVELocalsSize + StackOffset::getFixed(NumBytes);
allocateStackSpace(MBB, CalleeSavesBegin, 0, SVECalleeSavesSize, false,
nullptr, EmitAsyncCFI && !HasFP, CFAOffset,
MFI.hasVarSizedObjects() || LocalsSize);
if (!FPAfterSVECalleeSaves) {
allocateStackSpace(MBB, CalleeSavesBegin, 0, SVECalleeSavesSize, false,
nullptr, EmitAsyncCFI && !HasFP, CFAOffset,
MFI.hasVarSizedObjects() || LocalsSize);
}
CFAOffset += SVECalleeSavesSize;

if (EmitAsyncCFI)
Expand Down Expand Up @@ -2303,10 +2371,16 @@ void AArch64FrameLowering::emitEpilogue(MachineFunction &MF,
assert(AfterCSRPopSize == 0);
return;
}

bool FPAfterSVECalleeSaves =
Subtarget.isTargetWindows() && AFI->getSVECalleeSavedStackSize();

bool CombineSPBump = shouldCombineCSRLocalStackBumpInEpilogue(MBB, NumBytes);
// Assume we can't combine the last pop with the sp restore.
bool CombineAfterCSRBump = false;
if (!CombineSPBump && PrologueSaveSize != 0) {
if (FPAfterSVECalleeSaves) {
AfterCSRPopSize = FixedObject;
} else if (!CombineSPBump && PrologueSaveSize != 0) {
MachineBasicBlock::iterator Pop = std::prev(MBB.getFirstTerminator());
while (Pop->getOpcode() == TargetOpcode::CFI_INSTRUCTION ||
AArch64InstrInfo::isSEHInstruction(*Pop))
Expand Down Expand Up @@ -2339,7 +2413,7 @@ void AArch64FrameLowering::emitEpilogue(MachineFunction &MF,
while (LastPopI != Begin) {
--LastPopI;
if (!LastPopI->getFlag(MachineInstr::FrameDestroy) ||
IsSVECalleeSave(LastPopI)) {
(!FPAfterSVECalleeSaves && IsSVECalleeSave(LastPopI))) {
++LastPopI;
break;
} else if (CombineSPBump)
Expand Down Expand Up @@ -2415,6 +2489,9 @@ void AArch64FrameLowering::emitEpilogue(MachineFunction &MF,
StackOffset DeallocateBefore = {}, DeallocateAfter = SVEStackSize;
MachineBasicBlock::iterator RestoreBegin = LastPopI, RestoreEnd = LastPopI;
if (int64_t CalleeSavedSize = AFI->getSVECalleeSavedStackSize()) {
if (FPAfterSVECalleeSaves)
RestoreEnd = MBB.getFirstTerminator();

RestoreBegin = std::prev(RestoreEnd);
while (RestoreBegin != MBB.begin() &&
IsSVECalleeSave(std::prev(RestoreBegin)))
Expand All @@ -2430,7 +2507,31 @@ void AArch64FrameLowering::emitEpilogue(MachineFunction &MF,
}

// Deallocate the SVE area.
if (SVEStackSize) {
if (FPAfterSVECalleeSaves) {
// If the callee-save area is before FP, restoring the FP implicitly
// deallocates non-callee-save SVE allocations. Otherwise, deallocate
// them explicitly.
if (!AFI->isStackRealigned() && !MFI.hasVarSizedObjects()) {
emitFrameOffset(MBB, LastPopI, DL, AArch64::SP, AArch64::SP,
DeallocateBefore, TII, MachineInstr::FrameDestroy, false,
NeedsWinCFI, &HasWinCFI);
}

// Deallocate callee-save non-SVE registers.
emitFrameOffset(MBB, RestoreBegin, DL, AArch64::SP, AArch64::SP,
StackOffset::getFixed(AFI->getCalleeSavedStackSize()), TII,
MachineInstr::FrameDestroy, false, NeedsWinCFI, &HasWinCFI);

// Deallocate fixed objects.
emitFrameOffset(MBB, RestoreEnd, DL, AArch64::SP, AArch64::SP,
StackOffset::getFixed(FixedObject), TII,
MachineInstr::FrameDestroy, false, NeedsWinCFI, &HasWinCFI);

// Deallocate callee-save SVE registers.
emitFrameOffset(MBB, RestoreEnd, DL, AArch64::SP, AArch64::SP,
DeallocateAfter, TII, MachineInstr::FrameDestroy, false,
NeedsWinCFI, &HasWinCFI);
} else if (SVEStackSize) {
// If we have stack realignment or variable sized objects on the stack,
// restore the stack pointer from the frame pointer prior to SVE CSR
// restoration.
Expand All @@ -2450,20 +2551,20 @@ void AArch64FrameLowering::emitEpilogue(MachineFunction &MF,
emitFrameOffset(
MBB, RestoreBegin, DL, AArch64::SP, AArch64::SP,
StackOffset::getFixed(NumBytes), TII, MachineInstr::FrameDestroy,
false, false, nullptr, EmitCFI && !hasFP(MF),
false, NeedsWinCFI, &HasWinCFI, EmitCFI && !hasFP(MF),
SVEStackSize + StackOffset::getFixed(NumBytes + PrologueSaveSize));
NumBytes = 0;
}

emitFrameOffset(MBB, RestoreBegin, DL, AArch64::SP, AArch64::SP,
DeallocateBefore, TII, MachineInstr::FrameDestroy, false,
false, nullptr, EmitCFI && !hasFP(MF),
NeedsWinCFI, &HasWinCFI, EmitCFI && !hasFP(MF),
SVEStackSize +
StackOffset::getFixed(NumBytes + PrologueSaveSize));

emitFrameOffset(MBB, RestoreEnd, DL, AArch64::SP, AArch64::SP,
DeallocateAfter, TII, MachineInstr::FrameDestroy, false,
false, nullptr, EmitCFI && !hasFP(MF),
NeedsWinCFI, &HasWinCFI, EmitCFI && !hasFP(MF),
DeallocateAfter +
StackOffset::getFixed(NumBytes + PrologueSaveSize));
}
Expand Down Expand Up @@ -2587,7 +2688,12 @@ AArch64FrameLowering::getFrameIndexReferenceFromSP(const MachineFunction &MF,
return StackOffset::getFixed(ObjectOffset - getOffsetOfLocalArea());

const auto *AFI = MF.getInfo<AArch64FunctionInfo>();
bool FPAfterSVECalleeSaves =
isTargetWindows(MF) && AFI->getSVECalleeSavedStackSize();
if (MFI.getStackID(FI) == TargetStackID::ScalableVector) {
if (FPAfterSVECalleeSaves &&
-ObjectOffset <= (int64_t)AFI->getSVECalleeSavedStackSize())
return StackOffset::get(0, ObjectOffset);
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

nit:

Suggested change
return StackOffset::get(0, ObjectOffset);
return StackOffset::getScalable(ObjectOffset);

return StackOffset::get(-((int64_t)AFI->getCalleeSavedStackSize()),
ObjectOffset);
}
Expand All @@ -2597,8 +2703,12 @@ AArch64FrameLowering::getFrameIndexReferenceFromSP(const MachineFunction &MF,
!IsFixed && ObjectOffset >= -((int)AFI->getCalleeSavedStackSize(MFI));

StackOffset ScalableOffset = {};
if (!IsFixed && !IsCSR)
if (!IsFixed && !IsCSR) {
ScalableOffset = -SVEStackSize;
} else if (FPAfterSVECalleeSaves && IsCSR) {
ScalableOffset =
-StackOffset::getScalable(AFI->getSVECalleeSavedStackSize());
}

return StackOffset::getFixed(ObjectOffset) + ScalableOffset;
}
Expand Down Expand Up @@ -2736,13 +2846,21 @@ StackOffset AArch64FrameLowering::resolveFrameOffsetReference(
"In the presence of dynamic stack pointer realignment, "
"non-argument/CSR objects cannot be accessed through the frame pointer");

bool FPAfterSVECalleeSaves =
isTargetWindows(MF) && AFI->getSVECalleeSavedStackSize();

if (isSVE) {
assert(-ObjectOffset > (int64_t)AFI->getSVECalleeSavedStackSize() &&
"Math isn't correct for CSRs with FPAfterSVECalleeSaves");
StackOffset FPOffset =
StackOffset::get(-AFI->getCalleeSaveBaseToFrameRecordOffset(), ObjectOffset);
StackOffset SPOffset =
SVEStackSize +
StackOffset::get(MFI.getStackSize() - AFI->getCalleeSavedStackSize(),
ObjectOffset);
if (FPAfterSVECalleeSaves) {
FPOffset += StackOffset::getScalable(AFI->getSVECalleeSavedStackSize());
}
// Always use the FP for SVE spills if available and beneficial.
if (hasFP(MF) && (SPOffset.getFixed() ||
FPOffset.getScalable() < SPOffset.getScalable() ||
Expand All @@ -2757,10 +2875,28 @@ StackOffset AArch64FrameLowering::resolveFrameOffsetReference(
}

StackOffset ScalableOffset = {};
if (UseFP && !(isFixed || isCSR))
ScalableOffset = -SVEStackSize;
if (!UseFP && (isFixed || isCSR))
ScalableOffset = SVEStackSize;
if (FPAfterSVECalleeSaves) {
// In this stack layout, the FP is in between the callee saves and other
// SVE allocations.
StackOffset SVECalleeSavedStack =
StackOffset::getScalable(AFI->getSVECalleeSavedStackSize());
if (UseFP) {
if (isFixed)
ScalableOffset = SVECalleeSavedStack;
else if (!isCSR)
ScalableOffset = SVECalleeSavedStack - SVEStackSize;
} else {
if (isFixed)
ScalableOffset = SVEStackSize;
else if (isCSR)
ScalableOffset = SVEStackSize - SVECalleeSavedStack;
}
} else {
if (UseFP && !(isFixed || isCSR))
ScalableOffset = -SVEStackSize;
if (!UseFP && (isFixed || isCSR))
ScalableOffset = SVEStackSize;
}

if (UseFP) {
FrameReg = RegInfo->getFrameRegister(MF);
Expand Down Expand Up @@ -2934,7 +3070,9 @@ static void computeCalleeSaveRegisterPairs(
RegInc = -1;
FirstReg = Count - 1;
}
int ScalableByteOffset = AFI->getSVECalleeSavedStackSize();
bool FPAfterSVECalleeSaves = IsWindows && AFI->getSVECalleeSavedStackSize();
int ScalableByteOffset =
FPAfterSVECalleeSaves ? 0 : AFI->getSVECalleeSavedStackSize();
bool NeedGapToAlignStack = AFI->hasCalleeSaveStackFreeSpace();
Register LastReg = 0;

Expand Down
Loading