-
Notifications
You must be signed in to change notification settings - Fork 13.6k
[RISCV][GISel] Add FP calling convention support #69138
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -94,11 +94,60 @@ struct RISCVOutgoingValueHandler : public CallLowering::OutgoingValueHandler { | |
|
||
void assignValueToReg(Register ValVReg, Register PhysReg, | ||
const CCValAssign &VA) override { | ||
// If we're passing an f32 value into an i64, anyextend before copying. | ||
if (VA.getLocVT() == MVT::i64 && VA.getValVT() == MVT::f32) | ||
ValVReg = MIRBuilder.buildAnyExt(LLT::scalar(64), ValVReg).getReg(0); | ||
|
||
Register ExtReg = extendRegister(ValVReg, VA); | ||
MIRBuilder.buildCopy(PhysReg, ExtReg); | ||
MIB.addUse(PhysReg, RegState::Implicit); | ||
} | ||
|
||
unsigned assignCustomValue(CallLowering::ArgInfo &Arg, | ||
ArrayRef<CCValAssign> VAs, | ||
std::function<void()> *Thunk) override { | ||
assert(VAs.size() >= 2 && "Expected at least 2 VAs."); | ||
const CCValAssign &VALo = VAs[0]; | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Add an assert on VAs.size() being 2. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. The size isn't ==2. It is >=2. The caller has no idea how many entries this handler needs. It's continuously calling |
||
const CCValAssign &VAHi = VAs[1]; | ||
|
||
assert(VAHi.needsCustom() && "Value doesn't need custom handling"); | ||
assert(VALo.getValNo() == VAHi.getValNo() && | ||
"Values belong to different arguments"); | ||
|
||
assert(VALo.getLocVT() == MVT::i32 && VAHi.getLocVT() == MVT::i32 && | ||
VALo.getValVT() == MVT::f64 && VAHi.getValVT() == MVT::f64 && | ||
"unexpected custom value"); | ||
|
||
Register NewRegs[] = {MRI.createGenericVirtualRegister(LLT::scalar(32)), | ||
MRI.createGenericVirtualRegister(LLT::scalar(32))}; | ||
MIRBuilder.buildUnmerge(NewRegs, Arg.Regs[0]); | ||
|
||
if (VAHi.isMemLoc()) { | ||
LLT MemTy(VAHi.getLocVT()); | ||
|
||
MachinePointerInfo MPO; | ||
Register StackAddr = getStackAddress( | ||
MemTy.getSizeInBytes(), VAHi.getLocMemOffset(), MPO, Arg.Flags[0]); | ||
|
||
assignValueToAddress(NewRegs[1], StackAddr, MemTy, MPO, | ||
const_cast<CCValAssign &>(VAHi)); | ||
} | ||
|
||
auto assignFunc = [=]() { | ||
assignValueToReg(NewRegs[0], VALo.getLocReg(), VALo); | ||
if (VAHi.isRegLoc()) | ||
assignValueToReg(NewRegs[1], VAHi.getLocReg(), VAHi); | ||
}; | ||
|
||
if (Thunk) { | ||
*Thunk = assignFunc; | ||
return 1; | ||
} | ||
|
||
assignFunc(); | ||
return 1; | ||
} | ||
|
||
private: | ||
MachineInstrBuilder MIB; | ||
|
||
|
@@ -170,6 +219,44 @@ struct RISCVIncomingValueHandler : public CallLowering::IncomingValueHandler { | |
IncomingValueHandler::assignValueToReg(ValVReg, PhysReg, VA); | ||
} | ||
|
||
unsigned assignCustomValue(CallLowering::ArgInfo &Arg, | ||
ArrayRef<CCValAssign> VAs, | ||
std::function<void()> *Thunk) override { | ||
assert(VAs.size() >= 2 && "Expected at least 2 VAs."); | ||
const CCValAssign &VALo = VAs[0]; | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Same assert. |
||
const CCValAssign &VAHi = VAs[1]; | ||
|
||
assert(VAHi.needsCustom() && "Value doesn't need custom handling"); | ||
assert(VALo.getValNo() == VAHi.getValNo() && | ||
"Values belong to different arguments"); | ||
|
||
assert(VALo.getLocVT() == MVT::i32 && VAHi.getLocVT() == MVT::i32 && | ||
VALo.getValVT() == MVT::f64 && VAHi.getValVT() == MVT::f64 && | ||
"unexpected custom value"); | ||
|
||
Register NewRegs[] = {MRI.createGenericVirtualRegister(LLT::scalar(32)), | ||
MRI.createGenericVirtualRegister(LLT::scalar(32))}; | ||
|
||
if (VAHi.isMemLoc()) { | ||
LLT MemTy(VAHi.getLocVT()); | ||
|
||
MachinePointerInfo MPO; | ||
Register StackAddr = getStackAddress( | ||
MemTy.getSizeInBytes(), VAHi.getLocMemOffset(), MPO, Arg.Flags[0]); | ||
|
||
assignValueToAddress(NewRegs[1], StackAddr, MemTy, MPO, | ||
const_cast<CCValAssign &>(VAHi)); | ||
} | ||
|
||
assignValueToReg(NewRegs[0], VALo.getLocReg(), VALo); | ||
if (VAHi.isRegLoc()) | ||
assignValueToReg(NewRegs[1], VAHi.getLocReg(), VAHi); | ||
|
||
MIRBuilder.buildMergeLikeInstr(Arg.Regs[0], NewRegs); | ||
|
||
return 1; | ||
} | ||
|
||
/// How the physical register gets marked varies between formal | ||
/// parameters (it's a basic-block live-in), and a call instruction | ||
/// (it's an implicit-def of the BL). | ||
|
@@ -212,6 +299,8 @@ static bool isSupportedArgumentType(Type *T, const RISCVSubtarget &Subtarget) { | |
// supported yet. | ||
if (T->isIntegerTy()) | ||
return T->getIntegerBitWidth() <= Subtarget.getXLen() * 2; | ||
if (T->isFloatTy() || T->isDoubleTy()) | ||
return true; | ||
if (T->isPointerTy()) | ||
return true; | ||
return false; | ||
|
@@ -223,6 +312,8 @@ static bool isSupportedReturnType(Type *T, const RISCVSubtarget &Subtarget) { | |
// supported yet. | ||
if (T->isIntegerTy()) | ||
return T->getIntegerBitWidth() <= Subtarget.getXLen() * 2; | ||
if (T->isFloatTy() || T->isDoubleTy()) | ||
return true; | ||
if (T->isPointerTy()) | ||
return true; | ||
|
||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,75 @@ | ||
; NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py UTC_ARGS: --version 3 | ||
; RUN: llc -mtriple=riscv32 -global-isel -stop-after=irtranslator \ | ||
; RUN: -verify-machineinstrs < %s \ | ||
; RUN: | FileCheck -check-prefix=RV32I %s | ||
; RUN: llc -mtriple=riscv32 -mattr=+f -target-abi ilp32f \ | ||
; RUN: -global-isel -stop-after=irtranslator -verify-machineinstrs < %s \ | ||
; RUN: | FileCheck -check-prefix=RV32I %s | ||
|
||
; This file contains tests that should have identical output for the ilp32, | ||
; and ilp32f. | ||
|
||
; Check that on RV32 ilp32[f], double is passed in a pair of registers. Unlike | ||
; the convention for varargs, this need not be an aligned pair. | ||
|
||
define i32 @callee_double_in_regs(i32 %a, double %b) nounwind { | ||
; RV32I-LABEL: name: callee_double_in_regs | ||
; RV32I: bb.1 (%ir-block.0): | ||
; RV32I-NEXT: liveins: $x10, $x11, $x12 | ||
; RV32I-NEXT: {{ $}} | ||
; RV32I-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $x10 | ||
; RV32I-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $x11 | ||
; RV32I-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $x12 | ||
; RV32I-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY1]](s32), [[COPY2]](s32) | ||
; RV32I-NEXT: [[FPTOSI:%[0-9]+]]:_(s32) = G_FPTOSI [[MV]](s64) | ||
; RV32I-NEXT: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[COPY]], [[FPTOSI]] | ||
; RV32I-NEXT: $x10 = COPY [[ADD]](s32) | ||
; RV32I-NEXT: PseudoRET implicit $x10 | ||
%b_fptosi = fptosi double %b to i32 | ||
%1 = add i32 %a, %b_fptosi | ||
ret i32 %1 | ||
} | ||
|
||
define i32 @caller_double_in_regs() nounwind { | ||
; RV32I-LABEL: name: caller_double_in_regs | ||
; RV32I: bb.1 (%ir-block.0): | ||
; RV32I-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 | ||
; RV32I-NEXT: [[C1:%[0-9]+]]:_(s64) = G_FCONSTANT double 2.000000e+00 | ||
; RV32I-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[C1]](s64) | ||
; RV32I-NEXT: $x10 = COPY [[C]](s32) | ||
; RV32I-NEXT: $x11 = COPY [[UV]](s32) | ||
; RV32I-NEXT: $x12 = COPY [[UV1]](s32) | ||
; RV32I-NEXT: PseudoCALL target-flags(riscv-call) @callee_double_in_regs, implicit-def $x1, implicit $x10, implicit $x11, implicit $x12, implicit-def $x10 | ||
; RV32I-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $x10 | ||
; RV32I-NEXT: $x10 = COPY [[COPY]](s32) | ||
; RV32I-NEXT: PseudoRET implicit $x10 | ||
%1 = call i32 @callee_double_in_regs(i32 1, double 2.0) | ||
ret i32 %1 | ||
} | ||
|
||
define double @callee_small_scalar_ret() nounwind { | ||
; RV32I-LABEL: name: callee_small_scalar_ret | ||
; RV32I: bb.1 (%ir-block.0): | ||
; RV32I-NEXT: [[C:%[0-9]+]]:_(s64) = G_FCONSTANT double 1.000000e+00 | ||
; RV32I-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[C]](s64) | ||
; RV32I-NEXT: $x10 = COPY [[UV]](s32) | ||
; RV32I-NEXT: $x11 = COPY [[UV1]](s32) | ||
; RV32I-NEXT: PseudoRET implicit $x10, implicit $x11 | ||
ret double 1.0 | ||
} | ||
|
||
define i64 @caller_small_scalar_ret() nounwind { | ||
; RV32I-LABEL: name: caller_small_scalar_ret | ||
; RV32I: bb.1 (%ir-block.0): | ||
; RV32I-NEXT: PseudoCALL target-flags(riscv-call) @callee_small_scalar_ret, implicit-def $x1, implicit-def $x10, implicit-def $x11 | ||
; RV32I-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $x10 | ||
; RV32I-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $x11 | ||
; RV32I-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY]](s32), [[COPY1]](s32) | ||
; RV32I-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[MV]](s64) | ||
; RV32I-NEXT: $x10 = COPY [[UV]](s32) | ||
; RV32I-NEXT: $x11 = COPY [[UV1]](s32) | ||
; RV32I-NEXT: PseudoRET implicit $x10, implicit $x11 | ||
%1 = call double @callee_small_scalar_ret() | ||
%2 = bitcast double %1 to i64 | ||
ret i64 %2 | ||
} |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,121 @@ | ||
; NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py UTC_ARGS: --version 3 | ||
; RUN: llc -mtriple=riscv32 -global-isel -stop-after=irtranslator \ | ||
; RUN: -verify-machineinstrs < %s | FileCheck -check-prefix=RV32I %s | ||
|
||
; Any tests that would have identical output for some combination of the ilp32* | ||
; ABIs belong in calling-conv-*-common.ll. This file contains tests that will | ||
; have different output across those ABIs. i.e. where some arguments would be | ||
; passed according to the floating point ABI. | ||
|
||
define i32 @callee_float_in_regs(i32 %a, float %b) nounwind { | ||
; RV32I-LABEL: name: callee_float_in_regs | ||
; RV32I: bb.1 (%ir-block.0): | ||
; RV32I-NEXT: liveins: $x10, $x11 | ||
; RV32I-NEXT: {{ $}} | ||
; RV32I-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $x10 | ||
; RV32I-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $x11 | ||
; RV32I-NEXT: [[FPTOSI:%[0-9]+]]:_(s32) = G_FPTOSI [[COPY1]](s32) | ||
; RV32I-NEXT: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[COPY]], [[FPTOSI]] | ||
; RV32I-NEXT: $x10 = COPY [[ADD]](s32) | ||
; RV32I-NEXT: PseudoRET implicit $x10 | ||
%b_fptosi = fptosi float %b to i32 | ||
%1 = add i32 %a, %b_fptosi | ||
ret i32 %1 | ||
} | ||
|
||
define i32 @caller_float_in_regs() nounwind { | ||
; RV32I-LABEL: name: caller_float_in_regs | ||
; RV32I: bb.1 (%ir-block.0): | ||
; RV32I-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1 | ||
; RV32I-NEXT: [[C1:%[0-9]+]]:_(s32) = G_FCONSTANT float 2.000000e+00 | ||
; RV32I-NEXT: $x10 = COPY [[C]](s32) | ||
; RV32I-NEXT: $x11 = COPY [[C1]](s32) | ||
; RV32I-NEXT: PseudoCALL target-flags(riscv-call) @callee_float_in_regs, implicit-def $x1, implicit $x10, implicit $x11, implicit-def $x10 | ||
; RV32I-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $x10 | ||
; RV32I-NEXT: $x10 = COPY [[COPY]](s32) | ||
; RV32I-NEXT: PseudoRET implicit $x10 | ||
%1 = call i32 @callee_float_in_regs(i32 1, float 2.0) | ||
ret i32 %1 | ||
} | ||
|
||
define i32 @callee_float_on_stack(i64 %a, i64 %b, i64 %c, i64 %d, float %e) nounwind { | ||
; RV32I-LABEL: name: callee_float_on_stack | ||
; RV32I: bb.1 (%ir-block.0): | ||
; RV32I-NEXT: liveins: $x10, $x11, $x12, $x13, $x14, $x15, $x16, $x17 | ||
; RV32I-NEXT: {{ $}} | ||
; RV32I-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $x10 | ||
; RV32I-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $x11 | ||
; RV32I-NEXT: [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY]](s32), [[COPY1]](s32) | ||
; RV32I-NEXT: [[COPY2:%[0-9]+]]:_(s32) = COPY $x12 | ||
; RV32I-NEXT: [[COPY3:%[0-9]+]]:_(s32) = COPY $x13 | ||
; RV32I-NEXT: [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY2]](s32), [[COPY3]](s32) | ||
; RV32I-NEXT: [[COPY4:%[0-9]+]]:_(s32) = COPY $x14 | ||
; RV32I-NEXT: [[COPY5:%[0-9]+]]:_(s32) = COPY $x15 | ||
; RV32I-NEXT: [[MV2:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY4]](s32), [[COPY5]](s32) | ||
; RV32I-NEXT: [[COPY6:%[0-9]+]]:_(s32) = COPY $x16 | ||
; RV32I-NEXT: [[COPY7:%[0-9]+]]:_(s32) = COPY $x17 | ||
; RV32I-NEXT: [[MV3:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY6]](s32), [[COPY7]](s32) | ||
; RV32I-NEXT: [[FRAME_INDEX:%[0-9]+]]:_(p0) = G_FRAME_INDEX %fixed-stack.0 | ||
; RV32I-NEXT: [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX]](p0) :: (load (s32) from %fixed-stack.0, align 16) | ||
; RV32I-NEXT: [[TRUNC:%[0-9]+]]:_(s32) = G_TRUNC [[MV3]](s64) | ||
; RV32I-NEXT: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[TRUNC]], [[LOAD]] | ||
; RV32I-NEXT: $x10 = COPY [[ADD]](s32) | ||
; RV32I-NEXT: PseudoRET implicit $x10 | ||
%1 = trunc i64 %d to i32 | ||
%2 = bitcast float %e to i32 | ||
%3 = add i32 %1, %2 | ||
ret i32 %3 | ||
} | ||
|
||
define i32 @caller_float_on_stack() nounwind { | ||
; RV32I-LABEL: name: caller_float_on_stack | ||
; RV32I: bb.1 (%ir-block.0): | ||
; RV32I-NEXT: [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 1 | ||
; RV32I-NEXT: [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 2 | ||
; RV32I-NEXT: [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 3 | ||
; RV32I-NEXT: [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 4 | ||
; RV32I-NEXT: [[C4:%[0-9]+]]:_(s32) = G_FCONSTANT float 5.000000e+00 | ||
; RV32I-NEXT: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[C]](s64) | ||
; RV32I-NEXT: [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[C1]](s64) | ||
; RV32I-NEXT: [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[C2]](s64) | ||
; RV32I-NEXT: [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[C3]](s64) | ||
; RV32I-NEXT: [[COPY:%[0-9]+]]:_(p0) = COPY $x2 | ||
; RV32I-NEXT: [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 | ||
; RV32I-NEXT: [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C5]](s32) | ||
; RV32I-NEXT: G_STORE [[C4]](s32), [[PTR_ADD]](p0) :: (store (s32) into stack, align 16) | ||
; RV32I-NEXT: $x10 = COPY [[UV]](s32) | ||
; RV32I-NEXT: $x11 = COPY [[UV1]](s32) | ||
; RV32I-NEXT: $x12 = COPY [[UV2]](s32) | ||
; RV32I-NEXT: $x13 = COPY [[UV3]](s32) | ||
; RV32I-NEXT: $x14 = COPY [[UV4]](s32) | ||
; RV32I-NEXT: $x15 = COPY [[UV5]](s32) | ||
; RV32I-NEXT: $x16 = COPY [[UV6]](s32) | ||
; RV32I-NEXT: $x17 = COPY [[UV7]](s32) | ||
; RV32I-NEXT: PseudoCALL target-flags(riscv-call) @callee_float_on_stack, implicit-def $x1, implicit $x10, implicit $x11, implicit $x12, implicit $x13, implicit $x14, implicit $x15, implicit $x16, implicit $x17, implicit-def $x10 | ||
; RV32I-NEXT: [[COPY1:%[0-9]+]]:_(s32) = COPY $x10 | ||
; RV32I-NEXT: $x10 = COPY [[COPY1]](s32) | ||
; RV32I-NEXT: PseudoRET implicit $x10 | ||
%1 = call i32 @callee_float_on_stack(i64 1, i64 2, i64 3, i64 4, float 5.0) | ||
ret i32 %1 | ||
} | ||
|
||
define float @callee_tiny_scalar_ret() nounwind { | ||
; RV32I-LABEL: name: callee_tiny_scalar_ret | ||
; RV32I: bb.1 (%ir-block.0): | ||
; RV32I-NEXT: [[C:%[0-9]+]]:_(s32) = G_FCONSTANT float 1.000000e+00 | ||
; RV32I-NEXT: $x10 = COPY [[C]](s32) | ||
; RV32I-NEXT: PseudoRET implicit $x10 | ||
ret float 1.0 | ||
} | ||
|
||
define i32 @caller_tiny_scalar_ret() nounwind { | ||
; RV32I-LABEL: name: caller_tiny_scalar_ret | ||
; RV32I: bb.1 (%ir-block.0): | ||
; RV32I-NEXT: PseudoCALL target-flags(riscv-call) @callee_tiny_scalar_ret, implicit-def $x1, implicit-def $x10 | ||
; RV32I-NEXT: [[COPY:%[0-9]+]]:_(s32) = COPY $x10 | ||
; RV32I-NEXT: $x10 = COPY [[COPY]](s32) | ||
; RV32I-NEXT: PseudoRET implicit $x10 | ||
%1 = call float @callee_tiny_scalar_ret() | ||
%2 = bitcast float %1 to i32 | ||
ret i32 %2 | ||
} |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
It looks like maybe this bit of code could be handled by setting LocInfo to CCValAssign::AExt, and letting it fallthrough instead? Not sure about what else that means though, so take with bucket of salt.
Uh oh!
There was an error while loading. Please reload this page.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I think so too, but it would require moving some code in SelectionDAG if we fix the original VA. Or we need to modify the VA and pass the modified version on.