Skip to content

Commit 3fb8c5b

Browse files
authored
[X86] Fix invalid instructions on x32 with large stack frames (#124041)
`X86FrameLowering::emitSPUpdate()` assumes that 64-bit targets use a 64-bit stack pointer, but that's not true on x32. When checking the stack pointer size, we need to look at `Uses64BitFramePtr` rather than `Is64Bit`. This avoids generating invalid instructions like `add esp, rcx`. For impossibly-large stack frames (4 GiB or larger with a 32-bit stack pointer), we were also generating invalid instructions like `mov eax, 5000000000`. The inline stack probe code already had a check for that situation; I've moved the check into `emitSPUpdate()`, so any attempt to allocate a 4 GiB stack frame with a 32-bit stack pointer will now trap rather than adjusting ESP by the wrong amount. This also fixes the "can't have 32-bit 16GB stack frame" assertion, which used to be triggerable by user code but is now correct. To help catch situations like this in the future, I've added `-verify-machineinstrs` to the stack clash tests that generate large stack frames. This fixes the expensive-checks buildbot failure caused by #113219.
1 parent 220004d commit 3fb8c5b

File tree

3 files changed

+19
-16
lines changed

3 files changed

+19
-16
lines changed

llvm/lib/Target/X86/X86FrameLowering.cpp

Lines changed: 10 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -253,17 +253,19 @@ void X86FrameLowering::emitSPUpdate(MachineBasicBlock &MBB,
253253
// Rather than emit a long series of instructions for large offsets,
254254
// load the offset into a register and do one sub/add
255255
unsigned Reg = 0;
256-
unsigned Rax = (unsigned)(Is64Bit ? X86::RAX : X86::EAX);
256+
unsigned Rax = (unsigned)(Uses64BitFramePtr ? X86::RAX : X86::EAX);
257257

258258
if (isSub && !isEAXLiveIn(MBB))
259259
Reg = Rax;
260260
else
261-
Reg = TRI->findDeadCallerSavedReg(MBB, MBBI);
261+
Reg = getX86SubSuperRegister(TRI->findDeadCallerSavedReg(MBB, MBBI),
262+
Uses64BitFramePtr ? 64 : 32);
262263

263-
unsigned AddSubRROpc =
264-
isSub ? getSUBrrOpcode(Is64Bit) : getADDrrOpcode(Is64Bit);
264+
unsigned AddSubRROpc = isSub ? getSUBrrOpcode(Uses64BitFramePtr)
265+
: getADDrrOpcode(Uses64BitFramePtr);
265266
if (Reg) {
266-
BuildMI(MBB, MBBI, DL, TII.get(getMOVriOpcode(Is64Bit, Offset)), Reg)
267+
BuildMI(MBB, MBBI, DL, TII.get(getMOVriOpcode(Uses64BitFramePtr, Offset)),
268+
Reg)
267269
.addImm(Offset)
268270
.setMIFlag(Flag);
269271
MachineInstr *MI = BuildMI(MBB, MBBI, DL, TII.get(AddSubRROpc), StackPtr)
@@ -279,7 +281,7 @@ void X86FrameLowering::emitSPUpdate(MachineBasicBlock &MBB,
279281
// addq %rsp, %rax
280282
// xchg %rax, (%rsp)
281283
// movq (%rsp), %rsp
282-
assert(Is64Bit && "can't have 32-bit 16GB stack frame");
284+
assert(Uses64BitFramePtr && "can't have 32-bit 16GB stack frame");
283285
BuildMI(MBB, MBBI, DL, TII.get(X86::PUSH64r))
284286
.addReg(Rax, RegState::Kill)
285287
.setMIFlag(Flag);
@@ -289,7 +291,8 @@ void X86FrameLowering::emitSPUpdate(MachineBasicBlock &MBB,
289291
Offset = -(Offset - SlotSize);
290292
else
291293
Offset = Offset + SlotSize;
292-
BuildMI(MBB, MBBI, DL, TII.get(getMOVriOpcode(Is64Bit, Offset)), Rax)
294+
BuildMI(MBB, MBBI, DL, TII.get(getMOVriOpcode(Uses64BitFramePtr, Offset)),
295+
Rax)
293296
.addImm(Offset)
294297
.setMIFlag(Flag);
295298
MachineInstr *MI = BuildMI(MBB, MBBI, DL, TII.get(X86::ADD64rr), Rax)

llvm/test/CodeGen/X86/stack-clash-extra-huge.ll

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
11
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --no_x86_scrub_sp
2-
; RUN: llc -mtriple=x86_64-linux-android < %s | FileCheck -check-prefix=CHECK-X64 %s
3-
; RUN: llc -mtriple=i686-linux-android < %s | FileCheck -check-prefix=CHECK-X86 %s
4-
; RUN: llc -mtriple=x86_64-linux-gnux32 < %s | FileCheck -check-prefix=CHECK-X32 %s
2+
; RUN: llc -mtriple=x86_64-linux-android -verify-machineinstrs < %s | FileCheck -check-prefix=CHECK-X64 %s
3+
; RUN: llc -mtriple=i686-linux-android -verify-machineinstrs < %s | FileCheck -check-prefix=CHECK-X86 %s
4+
; RUN: llc -mtriple=x86_64-linux-gnux32 -verify-machineinstrs < %s | FileCheck -check-prefix=CHECK-X32 %s
55

66
define i32 @foo() local_unnamed_addr #0 {
77
; CHECK-X64-LABEL: foo:
@@ -66,8 +66,8 @@ define i32 @foo() local_unnamed_addr #0 {
6666
; CHECK-X32-NEXT: movl $1, 264(%esp)
6767
; CHECK-X32-NEXT: movl $1, 28664(%esp)
6868
; CHECK-X32-NEXT: movl -128(%esp), %eax
69-
; CHECK-X32-NEXT: movabsq $4799999880, %rcx # imm = 0x11E1A2F88
70-
; CHECK-X32-NEXT: addq %rcx, %esp
69+
; CHECK-X32-NEXT: movl $4799999880, %ecx # imm = 0x11E1A2F88
70+
; CHECK-X32-NEXT: addl %ecx, %esp
7171
; CHECK-X32-NEXT: .cfi_def_cfa_offset 8
7272
; CHECK-X32-NEXT: retq
7373
%a = alloca i32, i64 1200000000, align 16

llvm/test/CodeGen/X86/stack-clash-huge.ll

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
11
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --no_x86_scrub_sp
2-
; RUN: llc -mtriple=x86_64-linux-android < %s | FileCheck -check-prefix=CHECK-X64 %s
3-
; RUN: llc -mtriple=i686-linux-android < %s | FileCheck -check-prefix=CHECK-X86 %s
4-
; RUN: llc -mtriple=x86_64-linux-gnux32 < %s | FileCheck -check-prefix=CHECK-X32 %s
2+
; RUN: llc -mtriple=x86_64-linux-android -verify-machineinstrs < %s | FileCheck -check-prefix=CHECK-X64 %s
3+
; RUN: llc -mtriple=i686-linux-android -verify-machineinstrs < %s | FileCheck -check-prefix=CHECK-X86 %s
4+
; RUN: llc -mtriple=x86_64-linux-gnux32 -verify-machineinstrs < %s | FileCheck -check-prefix=CHECK-X32 %s
55

66
define i32 @foo() local_unnamed_addr #0 {
77
; CHECK-X64-LABEL: foo:
@@ -69,7 +69,7 @@ define i32 @foo() local_unnamed_addr #0 {
6969
; CHECK-X32-NEXT: movl $1, 28664(%esp)
7070
; CHECK-X32-NEXT: movl -128(%esp), %eax
7171
; CHECK-X32-NEXT: movl $2399999880, %ecx # imm = 0x8F0D1788
72-
; CHECK-X32-NEXT: addq %rcx, %esp
72+
; CHECK-X32-NEXT: addl %ecx, %esp
7373
; CHECK-X32-NEXT: .cfi_def_cfa_offset 8
7474
; CHECK-X32-NEXT: retq
7575
%a = alloca i32, i64 600000000, align 16

0 commit comments

Comments
 (0)