llvm · topperc · Oct 25, 2023 · Oct 16, 2023 · Oct 21, 2023 · preames
diff --git a/llvm/lib/Target/RISCV/GISel/RISCVCallLowering.cpp b/llvm/lib/Target/RISCV/GISel/RISCVCallLowering.cpp
@@ -94,11 +94,60 @@ struct RISCVOutgoingValueHandler : public CallLowering::OutgoingValueHandler {
 
   void assignValueToReg(Register ValVReg, Register PhysReg,
                         const CCValAssign &VA) override {
+    // If we're passing an f32 value into an i64, anyextend before copying.
+    if (VA.getLocVT() == MVT::i64 && VA.getValVT() == MVT::f32)
+      ValVReg = MIRBuilder.buildAnyExt(LLT::scalar(64), ValVReg).getReg(0);
+
     Register ExtReg = extendRegister(ValVReg, VA);
     MIRBuilder.buildCopy(PhysReg, ExtReg);
     MIB.addUse(PhysReg, RegState::Implicit);
   }
 
+  unsigned assignCustomValue(CallLowering::ArgInfo &Arg,
+                             ArrayRef<CCValAssign> VAs,
+                             std::function<void()> *Thunk) override {
+    assert(VAs.size() >= 2 && "Expected at least 2 VAs.");
+    const CCValAssign &VALo = VAs[0];
+    const CCValAssign &VAHi = VAs[1];
+
+    assert(VAHi.needsCustom() && "Value doesn't need custom handling");
+    assert(VALo.getValNo() == VAHi.getValNo() &&
+           "Values belong to different arguments");
+
+    assert(VALo.getLocVT() == MVT::i32 && VAHi.getLocVT() == MVT::i32 &&
+           VALo.getValVT() == MVT::f64 && VAHi.getValVT() == MVT::f64 &&
+           "unexpected custom value");
+
+    Register NewRegs[] = {MRI.createGenericVirtualRegister(LLT::scalar(32)),
+                          MRI.createGenericVirtualRegister(LLT::scalar(32))};
+    MIRBuilder.buildUnmerge(NewRegs, Arg.Regs[0]);
+
+    if (VAHi.isMemLoc()) {
+      LLT MemTy(VAHi.getLocVT());
+
+      MachinePointerInfo MPO;
+      Register StackAddr = getStackAddress(
+          MemTy.getSizeInBytes(), VAHi.getLocMemOffset(), MPO, Arg.Flags[0]);
+
+      assignValueToAddress(NewRegs[1], StackAddr, MemTy, MPO,
+                           const_cast<CCValAssign &>(VAHi));
+    }
+
+    auto assignFunc = [=]() {
+      assignValueToReg(NewRegs[0], VALo.getLocReg(), VALo);
+      if (VAHi.isRegLoc())
+        assignValueToReg(NewRegs[1], VAHi.getLocReg(), VAHi);
+    };
+
+    if (Thunk) {
+      *Thunk = assignFunc;
+      return 1;
+    }
+
+    assignFunc();
+    return 1;
+  }
+
 private:
   MachineInstrBuilder MIB;
 
@@ -170,6 +219,44 @@ struct RISCVIncomingValueHandler : public CallLowering::IncomingValueHandler {
     IncomingValueHandler::assignValueToReg(ValVReg, PhysReg, VA);
   }
 
+  unsigned assignCustomValue(CallLowering::ArgInfo &Arg,
+                             ArrayRef<CCValAssign> VAs,
+                             std::function<void()> *Thunk) override {
+    assert(VAs.size() >= 2 && "Expected at least 2 VAs.");
+    const CCValAssign &VALo = VAs[0];
+    const CCValAssign &VAHi = VAs[1];
+
+    assert(VAHi.needsCustom() && "Value doesn't need custom handling");
+    assert(VALo.getValNo() == VAHi.getValNo() &&
+           "Values belong to different arguments");
+
+    assert(VALo.getLocVT() == MVT::i32 && VAHi.getLocVT() == MVT::i32 &&
+           VALo.getValVT() == MVT::f64 && VAHi.getValVT() == MVT::f64 &&
+           "unexpected custom value");
+
+    Register NewRegs[] = {MRI.createGenericVirtualRegister(LLT::scalar(32)),
+                          MRI.createGenericVirtualRegister(LLT::scalar(32))};
+
+    if (VAHi.isMemLoc()) {
+      LLT MemTy(VAHi.getLocVT());
+
+      MachinePointerInfo MPO;
+      Register StackAddr = getStackAddress(
+          MemTy.getSizeInBytes(), VAHi.getLocMemOffset(), MPO, Arg.Flags[0]);
+
+      assignValueToAddress(NewRegs[1], StackAddr, MemTy, MPO,
+                           const_cast<CCValAssign &>(VAHi));
+    }
+
+    assignValueToReg(NewRegs[0], VALo.getLocReg(), VALo);
+    if (VAHi.isRegLoc())
+      assignValueToReg(NewRegs[1], VAHi.getLocReg(), VAHi);
+
+    MIRBuilder.buildMergeLikeInstr(Arg.Regs[0], NewRegs);
+
+    return 1;
+  }
+
   /// How the physical register gets marked varies between formal
   /// parameters (it's a basic-block live-in), and a call instruction
   /// (it's an implicit-def of the BL).
@@ -212,6 +299,8 @@ static bool isSupportedArgumentType(Type *T, const RISCVSubtarget &Subtarget) {
   // supported yet.
   if (T->isIntegerTy())
     return T->getIntegerBitWidth() <= Subtarget.getXLen() * 2;
+  if (T->isFloatTy() || T->isDoubleTy())
+    return true;
   if (T->isPointerTy())
     return true;
   return false;
@@ -223,6 +312,8 @@ static bool isSupportedReturnType(Type *T, const RISCVSubtarget &Subtarget) {
   // supported yet.
   if (T->isIntegerTy())
     return T->getIntegerBitWidth() <= Subtarget.getXLen() * 2;
+  if (T->isFloatTy() || T->isDoubleTy())
+    return true;
   if (T->isPointerTy())
     return true;
 

diff --git a/llvm/test/CodeGen/RISCV/GlobalISel/irtranslator/calling-conv-ilp32-ilp32f-common.ll b/llvm/test/CodeGen/RISCV/GlobalISel/irtranslator/calling-conv-ilp32-ilp32f-common.ll
@@ -0,0 +1,75 @@
+; NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py UTC_ARGS: --version 3
+; RUN: llc -mtriple=riscv32 -global-isel -stop-after=irtranslator \
+; RUN:    -verify-machineinstrs < %s \
+; RUN:   | FileCheck -check-prefix=RV32I %s
+; RUN: llc -mtriple=riscv32 -mattr=+f -target-abi ilp32f \
+; RUN:    -global-isel -stop-after=irtranslator -verify-machineinstrs < %s \
+; RUN:   | FileCheck -check-prefix=RV32I %s
+
+; This file contains tests that should have identical output for the ilp32,
+; and ilp32f.
+
+; Check that on RV32 ilp32[f], double is passed in a pair of registers. Unlike
+; the convention for varargs, this need not be an aligned pair.
+
+define i32 @callee_double_in_regs(i32 %a, double %b) nounwind {
+  ; RV32I-LABEL: name: callee_double_in_regs
+  ; RV32I: bb.1 (%ir-block.0):
+  ; RV32I-NEXT:   liveins: $x10, $x11, $x12
+  ; RV32I-NEXT: {{  $}}
+  ; RV32I-NEXT:   [[COPY:%[0-9]+]]:_(s32) = COPY $x10
+  ; RV32I-NEXT:   [[COPY1:%[0-9]+]]:_(s32) = COPY $x11
+  ; RV32I-NEXT:   [[COPY2:%[0-9]+]]:_(s32) = COPY $x12
+  ; RV32I-NEXT:   [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY1]](s32), [[COPY2]](s32)
+  ; RV32I-NEXT:   [[FPTOSI:%[0-9]+]]:_(s32) = G_FPTOSI [[MV]](s64)
+  ; RV32I-NEXT:   [[ADD:%[0-9]+]]:_(s32) = G_ADD [[COPY]], [[FPTOSI]]
+  ; RV32I-NEXT:   $x10 = COPY [[ADD]](s32)
+  ; RV32I-NEXT:   PseudoRET implicit $x10
+  %b_fptosi = fptosi double %b to i32
+  %1 = add i32 %a, %b_fptosi
+  ret i32 %1
+}
+
+define i32 @caller_double_in_regs() nounwind {
+  ; RV32I-LABEL: name: caller_double_in_regs
+  ; RV32I: bb.1 (%ir-block.0):
+  ; RV32I-NEXT:   [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1
+  ; RV32I-NEXT:   [[C1:%[0-9]+]]:_(s64) = G_FCONSTANT double 2.000000e+00
+  ; RV32I-NEXT:   [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[C1]](s64)
+  ; RV32I-NEXT:   $x10 = COPY [[C]](s32)
+  ; RV32I-NEXT:   $x11 = COPY [[UV]](s32)
+  ; RV32I-NEXT:   $x12 = COPY [[UV1]](s32)
+  ; RV32I-NEXT:   PseudoCALL target-flags(riscv-call) @callee_double_in_regs, implicit-def $x1, implicit $x10, implicit $x11, implicit $x12, implicit-def $x10
+  ; RV32I-NEXT:   [[COPY:%[0-9]+]]:_(s32) = COPY $x10
+  ; RV32I-NEXT:   $x10 = COPY [[COPY]](s32)
+  ; RV32I-NEXT:   PseudoRET implicit $x10
+  %1 = call i32 @callee_double_in_regs(i32 1, double 2.0)
+  ret i32 %1
+}
+
+define double @callee_small_scalar_ret() nounwind {
+  ; RV32I-LABEL: name: callee_small_scalar_ret
+  ; RV32I: bb.1 (%ir-block.0):
+  ; RV32I-NEXT:   [[C:%[0-9]+]]:_(s64) = G_FCONSTANT double 1.000000e+00
+  ; RV32I-NEXT:   [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[C]](s64)
+  ; RV32I-NEXT:   $x10 = COPY [[UV]](s32)
+  ; RV32I-NEXT:   $x11 = COPY [[UV1]](s32)
+  ; RV32I-NEXT:   PseudoRET implicit $x10, implicit $x11
+  ret double 1.0
+}
+
+define i64 @caller_small_scalar_ret() nounwind {
+  ; RV32I-LABEL: name: caller_small_scalar_ret
+  ; RV32I: bb.1 (%ir-block.0):
+  ; RV32I-NEXT:   PseudoCALL target-flags(riscv-call) @callee_small_scalar_ret, implicit-def $x1, implicit-def $x10, implicit-def $x11
+  ; RV32I-NEXT:   [[COPY:%[0-9]+]]:_(s32) = COPY $x10
+  ; RV32I-NEXT:   [[COPY1:%[0-9]+]]:_(s32) = COPY $x11
+  ; RV32I-NEXT:   [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY]](s32), [[COPY1]](s32)
+  ; RV32I-NEXT:   [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[MV]](s64)
+  ; RV32I-NEXT:   $x10 = COPY [[UV]](s32)
+  ; RV32I-NEXT:   $x11 = COPY [[UV1]](s32)
+  ; RV32I-NEXT:   PseudoRET implicit $x10, implicit $x11
+  %1 = call double @callee_small_scalar_ret()
+  %2 = bitcast double %1 to i64
+  ret i64 %2
+}
diff --git a/llvm/test/CodeGen/RISCV/GlobalISel/irtranslator/calling-conv-ilp32.ll b/llvm/test/CodeGen/RISCV/GlobalISel/irtranslator/calling-conv-ilp32.ll
@@ -0,0 +1,121 @@
+; NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py UTC_ARGS: --version 3
+; RUN: llc -mtriple=riscv32 -global-isel -stop-after=irtranslator \
+; RUN:   -verify-machineinstrs < %s | FileCheck -check-prefix=RV32I %s
+
+; Any tests that would have identical output for some combination of the ilp32*
+; ABIs belong in calling-conv-*-common.ll. This file contains tests that will
+; have different output across those ABIs. i.e. where some arguments would be
+; passed according to the floating point ABI.
+
+define i32 @callee_float_in_regs(i32 %a, float %b) nounwind {
+  ; RV32I-LABEL: name: callee_float_in_regs
+  ; RV32I: bb.1 (%ir-block.0):
+  ; RV32I-NEXT:   liveins: $x10, $x11
+  ; RV32I-NEXT: {{  $}}
+  ; RV32I-NEXT:   [[COPY:%[0-9]+]]:_(s32) = COPY $x10
+  ; RV32I-NEXT:   [[COPY1:%[0-9]+]]:_(s32) = COPY $x11
+  ; RV32I-NEXT:   [[FPTOSI:%[0-9]+]]:_(s32) = G_FPTOSI [[COPY1]](s32)
+  ; RV32I-NEXT:   [[ADD:%[0-9]+]]:_(s32) = G_ADD [[COPY]], [[FPTOSI]]
+  ; RV32I-NEXT:   $x10 = COPY [[ADD]](s32)
+  ; RV32I-NEXT:   PseudoRET implicit $x10
+  %b_fptosi = fptosi float %b to i32
+  %1 = add i32 %a, %b_fptosi
+  ret i32 %1
+}
+
+define i32 @caller_float_in_regs() nounwind {
+  ; RV32I-LABEL: name: caller_float_in_regs
+  ; RV32I: bb.1 (%ir-block.0):
+  ; RV32I-NEXT:   [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 1
+  ; RV32I-NEXT:   [[C1:%[0-9]+]]:_(s32) = G_FCONSTANT float 2.000000e+00
+  ; RV32I-NEXT:   $x10 = COPY [[C]](s32)
+  ; RV32I-NEXT:   $x11 = COPY [[C1]](s32)
+  ; RV32I-NEXT:   PseudoCALL target-flags(riscv-call) @callee_float_in_regs, implicit-def $x1, implicit $x10, implicit $x11, implicit-def $x10
+  ; RV32I-NEXT:   [[COPY:%[0-9]+]]:_(s32) = COPY $x10
+  ; RV32I-NEXT:   $x10 = COPY [[COPY]](s32)
+  ; RV32I-NEXT:   PseudoRET implicit $x10
+  %1 = call i32 @callee_float_in_regs(i32 1, float 2.0)
+  ret i32 %1
+}
+
+define i32 @callee_float_on_stack(i64 %a, i64 %b, i64 %c, i64 %d, float %e) nounwind {
+  ; RV32I-LABEL: name: callee_float_on_stack
+  ; RV32I: bb.1 (%ir-block.0):
+  ; RV32I-NEXT:   liveins: $x10, $x11, $x12, $x13, $x14, $x15, $x16, $x17
+  ; RV32I-NEXT: {{  $}}
+  ; RV32I-NEXT:   [[COPY:%[0-9]+]]:_(s32) = COPY $x10
+  ; RV32I-NEXT:   [[COPY1:%[0-9]+]]:_(s32) = COPY $x11
+  ; RV32I-NEXT:   [[MV:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY]](s32), [[COPY1]](s32)
+  ; RV32I-NEXT:   [[COPY2:%[0-9]+]]:_(s32) = COPY $x12
+  ; RV32I-NEXT:   [[COPY3:%[0-9]+]]:_(s32) = COPY $x13
+  ; RV32I-NEXT:   [[MV1:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY2]](s32), [[COPY3]](s32)
+  ; RV32I-NEXT:   [[COPY4:%[0-9]+]]:_(s32) = COPY $x14
+  ; RV32I-NEXT:   [[COPY5:%[0-9]+]]:_(s32) = COPY $x15
+  ; RV32I-NEXT:   [[MV2:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY4]](s32), [[COPY5]](s32)
+  ; RV32I-NEXT:   [[COPY6:%[0-9]+]]:_(s32) = COPY $x16
+  ; RV32I-NEXT:   [[COPY7:%[0-9]+]]:_(s32) = COPY $x17
+  ; RV32I-NEXT:   [[MV3:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY6]](s32), [[COPY7]](s32)
+  ; RV32I-NEXT:   [[FRAME_INDEX:%[0-9]+]]:_(p0) = G_FRAME_INDEX %fixed-stack.0
+  ; RV32I-NEXT:   [[LOAD:%[0-9]+]]:_(s32) = G_LOAD [[FRAME_INDEX]](p0) :: (load (s32) from %fixed-stack.0, align 16)
+  ; RV32I-NEXT:   [[TRUNC:%[0-9]+]]:_(s32) = G_TRUNC [[MV3]](s64)
+  ; RV32I-NEXT:   [[ADD:%[0-9]+]]:_(s32) = G_ADD [[TRUNC]], [[LOAD]]
+  ; RV32I-NEXT:   $x10 = COPY [[ADD]](s32)
+  ; RV32I-NEXT:   PseudoRET implicit $x10
+  %1 = trunc i64 %d to i32
+  %2 = bitcast float %e to i32
+  %3 = add i32 %1, %2
+  ret i32 %3
+}
+
+define i32 @caller_float_on_stack() nounwind {
+  ; RV32I-LABEL: name: caller_float_on_stack
+  ; RV32I: bb.1 (%ir-block.0):
+  ; RV32I-NEXT:   [[C:%[0-9]+]]:_(s64) = G_CONSTANT i64 1
+  ; RV32I-NEXT:   [[C1:%[0-9]+]]:_(s64) = G_CONSTANT i64 2
+  ; RV32I-NEXT:   [[C2:%[0-9]+]]:_(s64) = G_CONSTANT i64 3
+  ; RV32I-NEXT:   [[C3:%[0-9]+]]:_(s64) = G_CONSTANT i64 4
+  ; RV32I-NEXT:   [[C4:%[0-9]+]]:_(s32) = G_FCONSTANT float 5.000000e+00
+  ; RV32I-NEXT:   [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[C]](s64)
+  ; RV32I-NEXT:   [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[C1]](s64)
+  ; RV32I-NEXT:   [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[C2]](s64)
+  ; RV32I-NEXT:   [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[C3]](s64)
+  ; RV32I-NEXT:   [[COPY:%[0-9]+]]:_(p0) = COPY $x2
+  ; RV32I-NEXT:   [[C5:%[0-9]+]]:_(s32) = G_CONSTANT i32 0
+  ; RV32I-NEXT:   [[PTR_ADD:%[0-9]+]]:_(p0) = G_PTR_ADD [[COPY]], [[C5]](s32)
+  ; RV32I-NEXT:   G_STORE [[C4]](s32), [[PTR_ADD]](p0) :: (store (s32) into stack, align 16)
+  ; RV32I-NEXT:   $x10 = COPY [[UV]](s32)
+  ; RV32I-NEXT:   $x11 = COPY [[UV1]](s32)
+  ; RV32I-NEXT:   $x12 = COPY [[UV2]](s32)
+  ; RV32I-NEXT:   $x13 = COPY [[UV3]](s32)
+  ; RV32I-NEXT:   $x14 = COPY [[UV4]](s32)
+  ; RV32I-NEXT:   $x15 = COPY [[UV5]](s32)
+  ; RV32I-NEXT:   $x16 = COPY [[UV6]](s32)
+  ; RV32I-NEXT:   $x17 = COPY [[UV7]](s32)
+  ; RV32I-NEXT:   PseudoCALL target-flags(riscv-call) @callee_float_on_stack, implicit-def $x1, implicit $x10, implicit $x11, implicit $x12, implicit $x13, implicit $x14, implicit $x15, implicit $x16, implicit $x17, implicit-def $x10
+  ; RV32I-NEXT:   [[COPY1:%[0-9]+]]:_(s32) = COPY $x10
+  ; RV32I-NEXT:   $x10 = COPY [[COPY1]](s32)
+  ; RV32I-NEXT:   PseudoRET implicit $x10
+  %1 = call i32 @callee_float_on_stack(i64 1, i64 2, i64 3, i64 4, float 5.0)
+  ret i32 %1
+}
+
+define float @callee_tiny_scalar_ret() nounwind {
+  ; RV32I-LABEL: name: callee_tiny_scalar_ret
+  ; RV32I: bb.1 (%ir-block.0):
+  ; RV32I-NEXT:   [[C:%[0-9]+]]:_(s32) = G_FCONSTANT float 1.000000e+00
+  ; RV32I-NEXT:   $x10 = COPY [[C]](s32)
+  ; RV32I-NEXT:   PseudoRET implicit $x10
+  ret float 1.0
+}
+
+define i32 @caller_tiny_scalar_ret() nounwind {
+  ; RV32I-LABEL: name: caller_tiny_scalar_ret
+  ; RV32I: bb.1 (%ir-block.0):
+  ; RV32I-NEXT:   PseudoCALL target-flags(riscv-call) @callee_tiny_scalar_ret, implicit-def $x1, implicit-def $x10
+  ; RV32I-NEXT:   [[COPY:%[0-9]+]]:_(s32) = COPY $x10
+  ; RV32I-NEXT:   $x10 = COPY [[COPY]](s32)
+  ; RV32I-NEXT:   PseudoRET implicit $x10
+  %1 = call float @callee_tiny_scalar_ret()
+  %2 = bitcast float %1 to i32
+  ret i32 %2
+}