Skip to content

Commit 6662fe3

Browse files
[X86] Add missing vNbf16 handling in X86CallingConv.td file (#127102)
Lack of these entries caused clang to crash on the following code: ```c __m256bh fun(__m256bh arg) { return arg; } __m256bh run() { __m256bh arg= {0}; fun(arg); } ``` It caused the FastISel to fail since it handled the call lowering basing on the X86CallingConv table. Curiously, if FastISel fails somewhere down the line and selectionDAGISel fallbacks, the crash does not occur. Following code _does not_ crash: ```c __m256bh fun(__m256bh arg) { return arg; } __m256bh run() { __m256bh arg= {0}; return fun(arg); } ``` This is puzzling to me. Obviously, if FastISel fails then compiler fallbacks to something else to lower these calls -- but since the X86callingConv table _doesn't_ have entries for vNbf16 how does this other thing manage not to crash? It has to use some other mechanism, one which doesn't use the table. This rises following questions: - how is this lowering accomplished without, presumably, using the CallingConv entries? - why is the table not used? I mean this points to some logic duplication (fastISel way vs. the other bug-free way) - How to properly test this? There is a test for vNbf16 values, but it also must not be using the FastISel path? This duplication of logic makes it hard to test this, since we don't have direct control whether the FastISel path or the other one is used. Nonetheless, this PR fixes the crash, though I didn't create a test for it, since I am unsure yet how it should look like. I would like to learn how the working non-FastISel mechanism works; I tried looking for it, but didn't yet manage to find anything
1 parent 715edd7 commit 6662fe3

File tree

2 files changed

+1186
-24
lines changed

2 files changed

+1186
-24
lines changed

llvm/lib/Target/X86/X86CallingConv.td

Lines changed: 24 additions & 24 deletions
Original file line numberDiff line numberDiff line change
@@ -267,19 +267,19 @@ def RetCC_X86Common : CallingConv<[
267267
// Vector types are returned in XMM0 and XMM1, when they fit. XMM2 and XMM3
268268
// can only be used by ABI non-compliant code. If the target doesn't have XMM
269269
// registers, it won't have vector types.
270-
CCIfType<[v16i8, v8i16, v4i32, v2i64, v8f16, v4f32, v2f64],
270+
CCIfType<[v16i8, v8i16, v4i32, v2i64, v8f16, v8bf16, v4f32, v2f64],
271271
CCAssignToReg<[XMM0,XMM1,XMM2,XMM3]>>,
272272

273273
// 256-bit vectors are returned in YMM0 and XMM1, when they fit. YMM2 and YMM3
274274
// can only be used by ABI non-compliant code. This vector type is only
275275
// supported while using the AVX target feature.
276-
CCIfType<[v32i8, v16i16, v8i32, v4i64, v16f16, v8f32, v4f64],
276+
CCIfType<[v32i8, v16i16, v8i32, v4i64, v16f16, v16bf16, v8f32, v4f64],
277277
CCAssignToReg<[YMM0,YMM1,YMM2,YMM3]>>,
278278

279279
// 512-bit vectors are returned in ZMM0 and ZMM1, when they fit. ZMM2 and ZMM3
280280
// can only be used by ABI non-compliant code. This vector type is only
281281
// supported while using the AVX-512 target feature.
282-
CCIfType<[v64i8, v32i16, v16i32, v8i64, v32f16, v16f32, v8f64],
282+
CCIfType<[v64i8, v32i16, v16i32, v8i64, v32f16, v32bf16, v16f32, v8f64],
283283
CCAssignToReg<[ZMM0,ZMM1,ZMM2,ZMM3]>>,
284284

285285
// Long double types are always returned in FP0 (even with SSE),
@@ -565,7 +565,7 @@ def CC_X86_64_C : CallingConv<[
565565
CCIfType<[v64i1], CCPromoteToType<v64i8>>,
566566

567567
// The first 8 FP/Vector arguments are passed in XMM registers.
568-
CCIfType<[f16, f32, f64, f128, v16i8, v8i16, v4i32, v2i64, v8f16, v4f32, v2f64],
568+
CCIfType<[f16, f32, f64, f128, v16i8, v8i16, v4i32, v2i64, v8f16, v8bf16, v4f32, v2f64],
569569
CCIfSubtarget<"hasSSE1()",
570570
CCAssignToReg<[XMM0, XMM1, XMM2, XMM3, XMM4, XMM5, XMM6, XMM7]>>>,
571571

@@ -574,13 +574,13 @@ def CC_X86_64_C : CallingConv<[
574574
// FIXME: This isn't precisely correct; the x86-64 ABI document says that
575575
// fixed arguments to vararg functions are supposed to be passed in
576576
// registers. Actually modeling that would be a lot of work, though.
577-
CCIfNotVarArg<CCIfType<[v32i8, v16i16, v8i32, v4i64, v16f16, v8f32, v4f64],
577+
CCIfNotVarArg<CCIfType<[v32i8, v16i16, v8i32, v4i64, v16f16, v16bf16, v8f32, v4f64],
578578
CCIfSubtarget<"hasAVX()",
579579
CCAssignToReg<[YMM0, YMM1, YMM2, YMM3,
580580
YMM4, YMM5, YMM6, YMM7]>>>>,
581581

582582
// The first 8 512-bit vector arguments are passed in ZMM registers.
583-
CCIfNotVarArg<CCIfType<[v64i8, v32i16, v16i32, v8i64, v32f16, v16f32, v8f64],
583+
CCIfNotVarArg<CCIfType<[v64i8, v32i16, v16i32, v8i64, v32f16, v32bf16, v16f32, v8f64],
584584
CCIfSubtarget<"hasAVX512()",
585585
CCAssignToReg<[ZMM0, ZMM1, ZMM2, ZMM3, ZMM4, ZMM5, ZMM6, ZMM7]>>>>,
586586

@@ -593,14 +593,14 @@ def CC_X86_64_C : CallingConv<[
593593
CCIfType<[f80, f128], CCAssignToStack<0, 0>>,
594594

595595
// Vectors get 16-byte stack slots that are 16-byte aligned.
596-
CCIfType<[v16i8, v8i16, v4i32, v2i64, v8f16, v4f32, v2f64], CCAssignToStack<16, 16>>,
596+
CCIfType<[v16i8, v8i16, v4i32, v2i64, v8f16, v8bf16, v4f32, v2f64], CCAssignToStack<16, 16>>,
597597

598598
// 256-bit vectors get 32-byte stack slots that are 32-byte aligned.
599-
CCIfType<[v32i8, v16i16, v8i32, v4i64, v16f16, v8f32, v4f64],
599+
CCIfType<[v32i8, v16i16, v8i32, v4i64, v16f16, v16bf16, v8f32, v4f64],
600600
CCAssignToStack<32, 32>>,
601601

602602
// 512-bit vectors get 64-byte stack slots that are 64-byte aligned.
603-
CCIfType<[v64i8, v32i16, v16i32, v8i64, v32f16, v16f32, v8f64],
603+
CCIfType<[v64i8, v32i16, v16i32, v8i64, v32f16, v32bf16, v16f32, v8f64],
604604
CCAssignToStack<64, 64>>
605605
]>;
606606

@@ -631,13 +631,13 @@ def CC_X86_Win64_C : CallingConv<[
631631
CCIfCFGuardTarget<CCAssignToReg<[RAX]>>,
632632

633633
// 128 bit vectors are passed by pointer
634-
CCIfType<[v16i8, v8i16, v4i32, v2i64, v8f16, v4f32, v2f64], CCPassIndirect<i64>>,
634+
CCIfType<[v16i8, v8i16, v4i32, v2i64, v8f16, v8bf16, v4f32, v2f64], CCPassIndirect<i64>>,
635635

636636
// 256 bit vectors are passed by pointer
637-
CCIfType<[v32i8, v16i16, v8i32, v4i64, v16f16, v8f32, v4f64], CCPassIndirect<i64>>,
637+
CCIfType<[v32i8, v16i16, v8i32, v4i64, v16f16, v16bf16, v8f32, v4f64], CCPassIndirect<i64>>,
638638

639639
// 512 bit vectors are passed by pointer
640-
CCIfType<[v64i8, v32i16, v16i32, v32f16, v16f32, v8f64, v8i64], CCPassIndirect<i64>>,
640+
CCIfType<[v64i8, v32i16, v16i32, v32f16, v32bf16, v16f32, v8f64, v8i64], CCPassIndirect<i64>>,
641641

642642
// Long doubles are passed by pointer
643643
CCIfType<[f80], CCPassIndirect<i64>>,
@@ -734,48 +734,48 @@ def CC_X86_64_AnyReg : CallingConv<[
734734
/// values are spilled on the stack.
735735
def CC_X86_32_Vector_Common : CallingConv<[
736736
// Other SSE vectors get 16-byte stack slots that are 16-byte aligned.
737-
CCIfType<[v16i8, v8i16, v4i32, v2i64, v8f16, v4f32, v2f64],
737+
CCIfType<[v16i8, v8i16, v4i32, v2i64, v8f16, v8bf16, v4f32, v2f64],
738738
CCAssignToStack<16, 16>>,
739739

740740
// 256-bit AVX vectors get 32-byte stack slots that are 32-byte aligned.
741-
CCIfType<[v32i8, v16i16, v8i32, v4i64, v16f16, v8f32, v4f64],
741+
CCIfType<[v32i8, v16i16, v8i32, v4i64, v16f16, v16bf16, v8f32, v4f64],
742742
CCAssignToStack<32, 32>>,
743743

744744
// 512-bit AVX 512-bit vectors get 64-byte stack slots that are 64-byte aligned.
745-
CCIfType<[v64i8, v32i16, v16i32, v8i64, v32f16, v16f32, v8f64],
745+
CCIfType<[v64i8, v32i16, v16i32, v8i64, v32f16, v32bf16, v16f32, v8f64],
746746
CCAssignToStack<64, 64>>
747747
]>;
748748

749749
/// CC_X86_Win32_Vector - In X86 Win32 calling conventions, extra vector
750750
/// values are spilled on the stack.
751751
def CC_X86_Win32_Vector : CallingConv<[
752752
// Other SSE vectors get 16-byte stack slots that are 4-byte aligned.
753-
CCIfType<[v16i8, v8i16, v4i32, v2i64, v8f16, v4f32, v2f64],
753+
CCIfType<[v16i8, v8i16, v4i32, v2i64, v8f16, v8bf16, v4f32, v2f64],
754754
CCAssignToStack<16, 4>>,
755755

756756
// 256-bit AVX vectors get 32-byte stack slots that are 4-byte aligned.
757-
CCIfType<[v32i8, v16i16, v8i32, v4i64, v16f16, v8f32, v4f64],
757+
CCIfType<[v32i8, v16i16, v8i32, v4i64, v16f16, v16bf16, v8f32, v4f64],
758758
CCAssignToStack<32, 4>>,
759759

760760
// 512-bit AVX 512-bit vectors get 64-byte stack slots that are 4-byte aligned.
761-
CCIfType<[v64i8, v32i16, v16i32, v8i64, v32f16, v16f32, v8f64],
761+
CCIfType<[v64i8, v32i16, v16i32, v8i64, v32f16, v32bf16, v16f32, v8f64],
762762
CCAssignToStack<64, 4>>
763763
]>;
764764

765765
// CC_X86_32_Vector_Standard - The first 3 vector arguments are passed in
766766
// vector registers
767767
def CC_X86_32_Vector_Standard : CallingConv<[
768768
// SSE vector arguments are passed in XMM registers.
769-
CCIfNotVarArg<CCIfType<[v16i8, v8i16, v4i32, v2i64, v8f16, v4f32, v2f64],
769+
CCIfNotVarArg<CCIfType<[v16i8, v8i16, v4i32, v2i64, v8f16, v8bf16, v4f32, v2f64],
770770
CCAssignToReg<[XMM0, XMM1, XMM2]>>>,
771771

772772
// AVX 256-bit vector arguments are passed in YMM registers.
773-
CCIfNotVarArg<CCIfType<[v32i8, v16i16, v8i32, v4i64, v16f16, v8f32, v4f64],
773+
CCIfNotVarArg<CCIfType<[v32i8, v16i16, v8i32, v4i64, v16f16, v16bf16, v8f32, v4f64],
774774
CCIfSubtarget<"hasAVX()",
775775
CCAssignToReg<[YMM0, YMM1, YMM2]>>>>,
776776

777777
// AVX 512-bit vector arguments are passed in ZMM registers.
778-
CCIfNotVarArg<CCIfType<[v64i8, v32i16, v16i32, v8i64, v32f16, v16f32, v8f64],
778+
CCIfNotVarArg<CCIfType<[v64i8, v32i16, v16i32, v8i64, v32f16, v32bf16, v16f32, v8f64],
779779
CCAssignToReg<[ZMM0, ZMM1, ZMM2]>>>,
780780

781781
CCIfIsVarArgOnWin<CCDelegateTo<CC_X86_Win32_Vector>>,
@@ -786,16 +786,16 @@ def CC_X86_32_Vector_Standard : CallingConv<[
786786
// vector registers.
787787
def CC_X86_32_Vector_Darwin : CallingConv<[
788788
// SSE vector arguments are passed in XMM registers.
789-
CCIfNotVarArg<CCIfType<[v16i8, v8i16, v4i32, v2i64, v8f16, v4f32, v2f64],
789+
CCIfNotVarArg<CCIfType<[v16i8, v8i16, v4i32, v2i64, v8f16, v8bf16, v4f32, v2f64],
790790
CCAssignToReg<[XMM0, XMM1, XMM2, XMM3]>>>,
791791

792792
// AVX 256-bit vector arguments are passed in YMM registers.
793-
CCIfNotVarArg<CCIfType<[v32i8, v16i16, v8i32, v4i64, v16f16, v8f32, v4f64],
793+
CCIfNotVarArg<CCIfType<[v32i8, v16i16, v8i32, v4i64, v16f16, v16bf16, v8f32, v4f64],
794794
CCIfSubtarget<"hasAVX()",
795795
CCAssignToReg<[YMM0, YMM1, YMM2, YMM3]>>>>,
796796

797797
// AVX 512-bit vector arguments are passed in ZMM registers.
798-
CCIfNotVarArg<CCIfType<[v64i8, v32i16, v16i32, v8i64, v32f16, v16f32, v8f64],
798+
CCIfNotVarArg<CCIfType<[v64i8, v32i16, v16i32, v8i64, v32f16, v32bf16, v16f32, v8f64],
799799
CCAssignToReg<[ZMM0, ZMM1, ZMM2, ZMM3]>>>,
800800

801801
CCDelegateTo<CC_X86_32_Vector_Common>

0 commit comments

Comments
 (0)