Skip to content

[RISCV][GISel] Add FP calling convention support #69138

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 2 commits into from
Oct 25, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
91 changes: 91 additions & 0 deletions llvm/lib/Target/RISCV/GISel/RISCVCallLowering.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -94,11 +94,60 @@ struct RISCVOutgoingValueHandler : public CallLowering::OutgoingValueHandler {

void assignValueToReg(Register ValVReg, Register PhysReg,
const CCValAssign &VA) override {
// If we're passing an f32 value into an i64, anyextend before copying.
if (VA.getLocVT() == MVT::i64 && VA.getValVT() == MVT::f32)
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

It looks like maybe this bit of code could be handled by setting LocInfo to CCValAssign::AExt, and letting it fallthrough instead? Not sure about what else that means though, so take with bucket of salt.

Copy link
Collaborator Author

@topperc topperc Oct 25, 2023

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think so too, but it would require moving some code in SelectionDAG if we fix the original VA. Or we need to modify the VA and pass the modified version on.

ValVReg = MIRBuilder.buildAnyExt(LLT::scalar(64), ValVReg).getReg(0);

Register ExtReg = extendRegister(ValVReg, VA);
MIRBuilder.buildCopy(PhysReg, ExtReg);
MIB.addUse(PhysReg, RegState::Implicit);
}

unsigned assignCustomValue(CallLowering::ArgInfo &Arg,
ArrayRef<CCValAssign> VAs,
std::function<void()> *Thunk) override {
assert(VAs.size() >= 2 && "Expected at least 2 VAs.");
const CCValAssign &VALo = VAs[0];
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Add an assert on VAs.size() being 2.

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The size isn't ==2. It is >=2. The caller has no idea how many entries this handler needs. It's continuously calling slice on an ArrayRef of all VAs to drop the ones that have been consumed. I'll add the assert for >=2

const CCValAssign &VAHi = VAs[1];

assert(VAHi.needsCustom() && "Value doesn't need custom handling");
assert(VALo.getValNo() == VAHi.getValNo() &&
"Values belong to different arguments");

assert(VALo.getLocVT() == MVT::i32 && VAHi.getLocVT() == MVT::i32 &&
VALo.getValVT() == MVT::f64 && VAHi.getValVT() == MVT::f64 &&
"unexpected custom value");

Register NewRegs[] = {MRI.createGenericVirtualRegister(LLT::scalar(32)),
MRI.createGenericVirtualRegister(LLT::scalar(32))};
MIRBuilder.buildUnmerge(NewRegs, Arg.Regs[0]);

if (VAHi.isMemLoc()) {
LLT MemTy(VAHi.getLocVT());

MachinePointerInfo MPO;
Register StackAddr = getStackAddress(
MemTy.getSizeInBytes(), VAHi.getLocMemOffset(), MPO, Arg.Flags[0]);

assignValueToAddress(NewRegs[1], StackAddr, MemTy, MPO,
const_cast<CCValAssign &>(VAHi));
}

auto assignFunc = [=]() {
assignValueToReg(NewRegs[0], VALo.getLocReg(), VALo);
if (VAHi.isRegLoc())
assignValueToReg(NewRegs[1], VAHi.getLocReg(), VAHi);
};

if (Thunk) {
*Thunk = assignFunc;
return 1;
}

assignFunc();
return 1;
}

private:
MachineInstrBuilder MIB;

Expand Down Expand Up @@ -170,6 +219,44 @@ struct RISCVIncomingValueHandler : public CallLowering::IncomingValueHandler {
IncomingValueHandler::assignValueToReg(ValVReg, PhysReg, VA);
}

unsigned assignCustomValue(CallLowering::ArgInfo &Arg,
ArrayRef<CCValAssign> VAs,
std::function<void()> *Thunk) override {
assert(VAs.size() >= 2 && "Expected at least 2 VAs.");
const CCValAssign &VALo = VAs[0];
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Same assert.

const CCValAssign &VAHi = VAs[1];

assert(VAHi.needsCustom() && "Value doesn't need custom handling");
assert(VALo.getValNo() == VAHi.getValNo() &&
"Values belong to different arguments");

assert(VALo.getLocVT() == MVT::i32 && VAHi.getLocVT() == MVT::i32 &&
VALo.getValVT() == MVT::f64 && VAHi.getValVT() == MVT::f64 &&
"unexpected custom value");

Register NewRegs[] = {MRI.createGenericVirtualRegister(LLT::scalar(32)),
MRI.createGenericVirtualRegister(LLT::scalar(32))};

if (VAHi.isMemLoc()) {
LLT MemTy(VAHi.getLocVT());

MachinePointerInfo MPO;
Register StackAddr = getStackAddress(
MemTy.getSizeInBytes(), VAHi.getLocMemOffset(), MPO, Arg.Flags[0]);

assignValueToAddress(NewRegs[1], StackAddr, MemTy, MPO,
const_cast<CCValAssign &>(VAHi));
}

assignValueToReg(NewRegs[0], VALo.getLocReg(), VALo);
if (VAHi.isRegLoc())
assignValueToReg(NewRegs[1], VAHi.getLocReg(), VAHi);

MIRBuilder.buildMergeLikeInstr(Arg.Regs[0], NewRegs);

return 1;
}

/// How the physical register gets marked varies between formal
/// parameters (it's a basic-block live-in), and a call instruction
/// (it's an implicit-def of the BL).
Expand Down Expand Up @@ -212,6 +299,8 @@ static bool isSupportedArgumentType(Type *T, const RISCVSubtarget &Subtarget) {
// supported yet.
if (T->isIntegerTy())
return T->getIntegerBitWidth() <= Subtarget.getXLen() * 2;
if (T->isFloatTy() || T->isDoubleTy())
return true;
if (T->isPointerTy())
return true;
return false;
Expand All @@ -223,6 +312,8 @@ static bool isSupportedReturnType(Type *T, const RISCVSubtarget &Subtarget) {
// supported yet.
if (T->isIntegerTy())
return T->getIntegerBitWidth() <= Subtarget.getXLen() * 2;
if (T->isFloatTy() || T->isDoubleTy())
return true;
if (T->isPointerTy())
return true;

Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,75 @@
; NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py UTC_ARGS: --version 3
; RUN: llc -mtriple=riscv32 -global-isel -stop-after=irtranslator \
; RUN: -verify-machineinstrs < %s \
; RUN: | FileCheck -check-prefix=RV32I %s
; RUN: llc -mtriple=riscv32 -mattr=+f -target-abi ilp32f \
; RUN: -global-isel -stop-after=irtranslator -verify-machineinstrs < %s \
; RUN: | FileCheck -check-prefix=RV32I %s

; This file contains tests that should have identical output for the ilp32,
; and ilp32f.

; Check that on RV32 ilp32[f], double is passed in a pair of registers. Unlike
; the convention for varargs, this need not be an aligned pair.

define i32 @callee_double_in_regs(i32 %a, double %b) nounwind {
; RV32I-LABEL: name: callee_double_in_regs
; RV32I: bb.1 (%ir-block.0):
; RV32I-NEXT: liveins: $x10, $x11, $x12
; RV32I-NEXT: {{ $}}
; RV32I-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $x10
; RV32I-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $x11
; RV32I-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $x12
; RV32I-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY1]](s32), [[COPY2]](s32)
; RV32I-NEXT: [[FPTOSI:%[0-9]+]]:_(s32) = G_FPTOSI [[MV]](s64)
; RV32I-NEXT: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[COPY]], [[FPTOSI]]
; RV32I-NEXT: $x10 = COPY [[ADD]](s32)
; RV32I-NEXT: PseudoRET implicit $x10
%b_fptosi = fptosi double %b to i32
%1 = add i32 %a, %b_fptosi
ret i32 %1
}

define i32 @caller_double_in_regs() nounwind {
; RV32I-LABEL: name: caller_double_in_regs
; RV32I: bb.1 (%ir-block.0):
; RV32I-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1
; RV32I-NEXT: [[C1:%[0-9]+]]:_(s64) = G_FCONSTANT double 2.000000e+00
; RV32I-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[C1]](s64)
; RV32I-NEXT: $x10 = COPY [[C]](s32)
; RV32I-NEXT: $x11 = COPY [[UV]](s32)
; RV32I-NEXT: $x12 = COPY [[UV1]](s32)
; RV32I-NEXT: PseudoCALL target-flags(riscv-call) @callee_double_in_regs, implicit-def $x1, implicit $x10, implicit $x11, implicit $x12, implicit-def $x10
; RV32I-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $x10
; RV32I-NEXT: $x10 = COPY [[COPY]](s32)
; RV32I-NEXT: PseudoRET implicit $x10
%1 = call i32 @callee_double_in_regs(i32 1, double 2.0)
ret i32 %1
}

define double @callee_small_scalar_ret() nounwind {
; RV32I-LABEL: name: callee_small_scalar_ret
; RV32I: bb.1 (%ir-block.0):
; RV32I-NEXT: [[C:%[0-9]+]]:_(s64) = G_FCONSTANT double 1.000000e+00
; RV32I-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[C]](s64)
; RV32I-NEXT: $x10 = COPY [[UV]](s32)
; RV32I-NEXT: $x11 = COPY [[UV1]](s32)
; RV32I-NEXT: PseudoRET implicit $x10, implicit $x11
ret double 1.0
}

define i64 @caller_small_scalar_ret() nounwind {
; RV32I-LABEL: name: caller_small_scalar_ret
; RV32I: bb.1 (%ir-block.0):
; RV32I-NEXT: PseudoCALL target-flags(riscv-call) @callee_small_scalar_ret, implicit-def $x1, implicit-def $x10, implicit-def $x11
; RV32I-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $x10
; RV32I-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $x11
; RV32I-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY]](s32), [[COPY1]](s32)
; RV32I-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[MV]](s64)
; RV32I-NEXT: $x10 = COPY [[UV]](s32)
; RV32I-NEXT: $x11 = COPY [[UV1]](s32)
; RV32I-NEXT: PseudoRET implicit $x10, implicit $x11
%1 = call double @callee_small_scalar_ret()
%2 = bitcast double %1 to i64
ret i64 %2
}
121 changes: 121 additions & 0 deletions llvm/test/CodeGen/RISCV/GlobalISel/irtranslator/calling-conv-ilp32.ll
Original file line number Diff line number Diff line change
@@ -0,0 +1,121 @@
; NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py UTC_ARGS: --version 3
; RUN: llc -mtriple=riscv32 -global-isel -stop-after=irtranslator \
; RUN: -verify-machineinstrs < %s | FileCheck -check-prefix=RV32I %s

; Any tests that would have identical output for some combination of the ilp32*
; ABIs belong in calling-conv-*-common.ll. This file contains tests that will
; have different output across those ABIs. i.e. where some arguments would be
; passed according to the floating point ABI.

define i32 @callee_float_in_regs(i32 %a, float %b) nounwind {
; RV32I-LABEL: name: callee_float_in_regs
; RV32I: bb.1 (%ir-block.0):
; RV32I-NEXT: liveins: $x10, $x11
; RV32I-NEXT: {{ $}}
; RV32I-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $x10
; RV32I-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $x11
; RV32I-NEXT: [[FPTOSI:%[0-9]+]]:_(s32) = G_FPTOSI [[COPY1]](s32)
; RV32I-NEXT: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[COPY]], [[FPTOSI]]
; RV32I-NEXT: $x10 = COPY [[ADD]](s32)
; RV32I-NEXT: PseudoRET implicit $x10
%b_fptosi = fptosi float %b to i32
%1 = add i32 %a, %b_fptosi
ret i32 %1
}

define i32 @caller_float_in_regs() nounwind {
; RV32I-LABEL: name: caller_float_in_regs
; RV32I: bb.1 (%ir-block.0):
; RV32I-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1
; RV32I-NEXT: [[C1:%[0-9]+]]:_(s32) = G_FCONSTANT float 2.000000e+00
; RV32I-NEXT: $x10 = COPY [[C]](s32)
; RV32I-NEXT: $x11 = COPY [[C1]](s32)
; RV32I-NEXT: PseudoCALL target-flags(riscv-call) @callee_float_in_regs, implicit-def $x1, implicit $x10, implicit $x11, implicit-def $x10
; RV32I-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $x10
; RV32I-NEXT: $x10 = COPY [[COPY]](s32)
; RV32I-NEXT: PseudoRET implicit $x10
%1 = call i32 @callee_float_in_regs(i32 1, float 2.0)
ret i32 %1
}

define i32 @callee_float_on_stack(i64 %a, i64 %b, i64 %c, i64 %d, float %e) nounwind {
; RV32I-LABEL: name: callee_float_on_stack
; RV32I: bb.1 (%ir-block.0):
; RV32I-NEXT: liveins: $x10, $x11, $x12, $x13, $x14, $x15, $x16, $x17
; RV32I-NEXT: {{ $}}
; RV32I-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $x10
; RV32I-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $x11
; RV32I-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY]](s32), [[COPY1]](s32)
; RV32I-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $x12
; RV32I-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $x13
; RV32I-NEXT: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY2]](s32), [[COPY3]](s32)
; RV32I-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $x14
; RV32I-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $x15
; RV32I-NEXT: [[MV2:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY4]](s32), [[COPY5]](s32)
; RV32I-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $x16
; RV32I-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $x17
; RV32I-NEXT: [[MV3:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY6]](s32), [[COPY7]](s32)
; RV32I-NEXT: [[FRAME_INDEX:%[0-9]+]]:_(p0) = G_FRAME_INDEX %fixed-stack.0
; RV32I-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX]](p0) :: (load (s32) from %fixed-stack.0, align 16)
; RV32I-NEXT: [[TRUNC:%[0-9]+]]:_(s32) = G_TRUNC [[MV3]](s64)
; RV32I-NEXT: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[TRUNC]], [[LOAD]]
; RV32I-NEXT: $x10 = COPY [[ADD]](s32)
; RV32I-NEXT: PseudoRET implicit $x10
%1 = trunc i64 %d to i32
%2 = bitcast float %e to i32
%3 = add i32 %1, %2
ret i32 %3
}

define i32 @caller_float_on_stack() nounwind {
; RV32I-LABEL: name: caller_float_on_stack
; RV32I: bb.1 (%ir-block.0):
; RV32I-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 1
; RV32I-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 2
; RV32I-NEXT: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 3
; RV32I-NEXT: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 4
; RV32I-NEXT: [[C4:%[0-9]+]]:_(s32) = G_FCONSTANT float 5.000000e+00
; RV32I-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[C]](s64)
; RV32I-NEXT: [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[C1]](s64)
; RV32I-NEXT: [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[C2]](s64)
; RV32I-NEXT: [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[C3]](s64)
; RV32I-NEXT: [[COPY:%[0-9]+]]:_(p0) = COPY $x2
; RV32I-NEXT: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
; RV32I-NEXT: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C5]](s32)
; RV32I-NEXT: G_STORE [[C4]](s32), [[PTR_ADD]](p0) :: (store (s32) into stack, align 16)
; RV32I-NEXT: $x10 = COPY [[UV]](s32)
; RV32I-NEXT: $x11 = COPY [[UV1]](s32)
; RV32I-NEXT: $x12 = COPY [[UV2]](s32)
; RV32I-NEXT: $x13 = COPY [[UV3]](s32)
; RV32I-NEXT: $x14 = COPY [[UV4]](s32)
; RV32I-NEXT: $x15 = COPY [[UV5]](s32)
; RV32I-NEXT: $x16 = COPY [[UV6]](s32)
; RV32I-NEXT: $x17 = COPY [[UV7]](s32)
; RV32I-NEXT: PseudoCALL target-flags(riscv-call) @callee_float_on_stack, implicit-def $x1, implicit $x10, implicit $x11, implicit $x12, implicit $x13, implicit $x14, implicit $x15, implicit $x16, implicit $x17, implicit-def $x10
; RV32I-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $x10
; RV32I-NEXT: $x10 = COPY [[COPY1]](s32)
; RV32I-NEXT: PseudoRET implicit $x10
%1 = call i32 @callee_float_on_stack(i64 1, i64 2, i64 3, i64 4, float 5.0)
ret i32 %1
}

define float @callee_tiny_scalar_ret() nounwind {
; RV32I-LABEL: name: callee_tiny_scalar_ret
; RV32I: bb.1 (%ir-block.0):
; RV32I-NEXT: [[C:%[0-9]+]]:_(s32) = G_FCONSTANT float 1.000000e+00
; RV32I-NEXT: $x10 = COPY [[C]](s32)
; RV32I-NEXT: PseudoRET implicit $x10
ret float 1.0
}

define i32 @caller_tiny_scalar_ret() nounwind {
; RV32I-LABEL: name: caller_tiny_scalar_ret
; RV32I: bb.1 (%ir-block.0):
; RV32I-NEXT: PseudoCALL target-flags(riscv-call) @callee_tiny_scalar_ret, implicit-def $x1, implicit-def $x10
; RV32I-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $x10
; RV32I-NEXT: $x10 = COPY [[COPY]](s32)
; RV32I-NEXT: PseudoRET implicit $x10
%1 = call float @callee_tiny_scalar_ret()
%2 = bitcast float %1 to i32
ret i32 %2
}
Loading