Skip to content

Commit 0021644

Browse files
committed
[SveEmitter] Add builtins for FP conversions
This adds the flag IsOverloadCvt which tells CGBulitin to use the result type and the type of the last operand as the overloaded types for the LLVM IR intrinsic. This also adds the flag IsFPConvert, which is needed to avoid converting the predicate of the operation from svbool_t to a predicate with fewer lanes, as the LLVM IR intrinsics use the <vscale x 16 x i1> as the predicate. Reviewers: SjoerdMeijer, efriedma Reviewed By: efriedma Tags: #clang Differential Revision: https://reviews.llvm.org/D78239
1 parent 871388e commit 0021644

File tree

9 files changed

+1133
-5
lines changed

9 files changed

+1133
-5
lines changed

clang/include/clang/Basic/TargetBuiltins.h

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -237,6 +237,8 @@ namespace clang {
237237
bool isOverloadWhile() const { return Flags & IsOverloadWhile; }
238238
bool isOverloadDefault() const { return !(Flags & OverloadKindMask); }
239239
bool isOverloadWhileRW() const { return Flags & IsOverloadWhileRW; }
240+
bool isOverloadCvt() const { return Flags & IsOverloadCvt; }
241+
bool isFPConvert() const { return Flags & IsFPConvert; }
240242

241243
uint64_t getBits() const { return Flags; }
242244
bool isFlagSet(uint64_t Flag) const { return Flags & Flag; }

clang/include/clang/Basic/arm_sve.td

Lines changed: 99 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -170,10 +170,10 @@ def IsZExtReturn : FlagType<0x00080000>; // Return value is sign-ex
170170
def IsOverloadNone : FlagType<0x00100000>; // Intrinsic does not take any overloaded types.
171171
def IsOverloadWhile : FlagType<0x00200000>; // Use {default type, typeof(operand1)} as overloaded types.
172172
def IsOverloadWhileRW : FlagType<0x00400000>; // Use {pred(default type), typeof(operand0)} as overloaded types.
173+
def IsOverloadCvt : FlagType<0x00800000>; // Use {typeof(operand0), typeof(last operand)} as overloaded types.
173174
def OverloadKindMask : FlagType<0x00E00000>; // When the masked values are all '0', the default type is used as overload type.
174-
// : :
175-
// : :
176-
def IsByteIndexed : FlagType<0x02000000>;
175+
def IsByteIndexed : FlagType<0x01000000>;
176+
def IsFPConvert : FlagType<0x02000000>;
177177

178178
// These must be kept in sync with the flags in include/clang/Basic/TargetBuiltins.h
179179
class ImmCheckType<int val> {
@@ -553,6 +553,102 @@ def SVTMAD : SInst<"svtmad[_{d}]", "dddi", "hfd", MergeNone, "aarch64_sve_ftma
553553
def SVMLA_LANE : SInst<"svmla_lane[_{d}]", "ddddi", "hfd", MergeNone, "aarch64_sve_fmla_lane", [], [ImmCheck<3, ImmCheckLaneIndex, 2>]>;
554554
def SVCMLA_LANE : SInst<"svcmla_lane[_{d}]", "ddddii", "hf", MergeNone, "aarch64_sve_fcmla_lane", [], [ImmCheck<3, ImmCheckLaneIndexCompRotate, 2>,
555555
ImmCheck<4, ImmCheckComplexRotAll90>]>;
556+
////////////////////////////////////////////////////////////////////////////////
557+
// Floating-point conversions
558+
559+
multiclass SInstCvtMXZ<
560+
string name, string m_types, string xz_types, string types,
561+
string intrinsic, list<FlagType> flags = [IsFPConvert, IsOverloadNone]> {
562+
def _M : SInst<name, m_types, types, MergeOp1, intrinsic, flags>;
563+
def _X : SInst<name, xz_types, types, MergeAnyExp, intrinsic, flags>;
564+
def _Z : SInst<name, xz_types, types, MergeZeroExp, intrinsic, flags>;
565+
}
566+
567+
multiclass SInstCvtMX<string name, string m_types, string xz_types,
568+
string types, string intrinsic,
569+
list<FlagType> flags = [IsFPConvert, IsOverloadNone]> {
570+
def _M : SInst<name, m_types, types, MergeOp1, intrinsic, flags>;
571+
def _X : SInst<name, xz_types, types, MergeAnyExp, intrinsic, flags>;
572+
}
573+
574+
// svcvt_s##_f16
575+
defm SVFCVTZS_S16_F16 : SInstCvtMXZ<"svcvt_s16[_f16]", "ddPO", "dPO", "s", "aarch64_sve_fcvtzs", [IsOverloadCvt]>;
576+
defm SVFCVTZS_S32_F16 : SInstCvtMXZ<"svcvt_s32[_f16]", "ddPO", "dPO", "i", "aarch64_sve_fcvtzs_i32f16">;
577+
defm SVFCVTZS_S64_F16 : SInstCvtMXZ<"svcvt_s64[_f16]", "ddPO", "dPO", "l", "aarch64_sve_fcvtzs_i64f16">;
578+
579+
// svcvt_s##_f32
580+
defm SVFCVTZS_S32_F32 : SInstCvtMXZ<"svcvt_s32[_f32]", "ddPM", "dPM", "i", "aarch64_sve_fcvtzs", [IsOverloadCvt]>;
581+
defm SVFCVTZS_S64_F32 : SInstCvtMXZ<"svcvt_s64[_f32]", "ddPM", "dPM", "l", "aarch64_sve_fcvtzs_i64f32">;
582+
583+
// svcvt_s##_f64
584+
defm SVFCVTZS_S32_F64 : SInstCvtMXZ<"svcvt_s32[_f64]", "ddPN", "dPN", "i", "aarch64_sve_fcvtzs_i32f64">;
585+
defm SVFCVTZS_S64_F64 : SInstCvtMXZ<"svcvt_s64[_f64]", "ddPN", "dPN", "l", "aarch64_sve_fcvtzs", [IsOverloadCvt]>;
586+
587+
// svcvt_u##_f16
588+
defm SVFCVTZU_U16_F16 : SInstCvtMXZ<"svcvt_u16[_f16]", "ddPO", "dPO", "Us", "aarch64_sve_fcvtzu", [IsOverloadCvt]>;
589+
defm SVFCVTZU_U32_F16 : SInstCvtMXZ<"svcvt_u32[_f16]", "ddPO", "dPO", "Ui", "aarch64_sve_fcvtzu_i32f16">;
590+
defm SVFCVTZU_U64_F16 : SInstCvtMXZ<"svcvt_u64[_f16]", "ddPO", "dPO", "Ul", "aarch64_sve_fcvtzu_i64f16">;
591+
592+
// svcvt_u##_f32
593+
defm SVFCVTZU_U32_F32 : SInstCvtMXZ<"svcvt_u32[_f32]", "ddPM", "dPM", "Ui", "aarch64_sve_fcvtzu", [IsOverloadCvt]>;
594+
defm SVFCVTZU_U64_F32 : SInstCvtMXZ<"svcvt_u64[_f32]", "ddPM", "dPM", "Ul", "aarch64_sve_fcvtzu_i64f32">;
595+
596+
// svcvt_u##_f64
597+
defm SVFCVTZU_U32_F64 : SInstCvtMXZ<"svcvt_u32[_f64]", "ddPN", "dPN", "Ui", "aarch64_sve_fcvtzu_i32f64">;
598+
defm SVFCVTZU_U64_F64 : SInstCvtMXZ<"svcvt_u64[_f64]", "ddPN", "dPN", "Ul", "aarch64_sve_fcvtzu", [IsOverloadCvt]>;
599+
600+
// svcvt_f16_s##
601+
defm SVFCVTZS_F16_S16 : SInstCvtMXZ<"svcvt_f16[_s16]", "OOPd", "OPd", "s", "aarch64_sve_scvtf", [IsOverloadCvt]>;
602+
defm SVFCVTZS_F16_S32 : SInstCvtMXZ<"svcvt_f16[_s32]", "OOPd", "OPd", "i", "aarch64_sve_scvtf_f16i32">;
603+
defm SVFCVTZS_F16_S64 : SInstCvtMXZ<"svcvt_f16[_s64]", "OOPd", "OPd", "l", "aarch64_sve_scvtf_f16i64">;
604+
605+
// svcvt_f32_s##
606+
defm SVFCVTZS_F32_S32 : SInstCvtMXZ<"svcvt_f32[_s32]", "MMPd", "MPd", "i", "aarch64_sve_scvtf", [IsOverloadCvt]>;
607+
defm SVFCVTZS_F32_S64 : SInstCvtMXZ<"svcvt_f32[_s64]", "MMPd", "MPd", "l", "aarch64_sve_scvtf_f32i64">;
608+
609+
// svcvt_f64_s##
610+
defm SVFCVTZS_F64_S32 : SInstCvtMXZ<"svcvt_f64[_s32]", "NNPd", "NPd", "i", "aarch64_sve_scvtf_f64i32">;
611+
defm SVFCVTZS_F64_S64 : SInstCvtMXZ<"svcvt_f64[_s64]", "NNPd", "NPd", "l", "aarch64_sve_scvtf", [IsOverloadCvt]>;
612+
613+
// svcvt_f16_u##
614+
defm SVFCVTZU_F16_U16 : SInstCvtMXZ<"svcvt_f16[_u16]", "OOPd", "OPd", "Us", "aarch64_sve_ucvtf", [IsOverloadCvt]>;
615+
defm SVFCVTZU_F16_U32 : SInstCvtMXZ<"svcvt_f16[_u32]", "OOPd", "OPd", "Ui", "aarch64_sve_ucvtf_f16i32">;
616+
defm SVFCVTZU_F16_U64 : SInstCvtMXZ<"svcvt_f16[_u64]", "OOPd", "OPd", "Ul", "aarch64_sve_ucvtf_f16i64">;
617+
618+
// svcvt_f32_u##
619+
defm SVFCVTZU_F32_U32 : SInstCvtMXZ<"svcvt_f32[_u32]", "MMPd", "MPd", "Ui", "aarch64_sve_ucvtf", [IsOverloadCvt]>;
620+
defm SVFCVTZU_F32_U64 : SInstCvtMXZ<"svcvt_f32[_u64]", "MMPd", "MPd", "Ul", "aarch64_sve_ucvtf_f32i64">;
621+
622+
// svcvt_f64_u##
623+
defm SVFCVTZU_F64_U32 : SInstCvtMXZ<"svcvt_f64[_u32]", "NNPd", "NPd", "Ui", "aarch64_sve_ucvtf_f64i32">;
624+
defm SVFCVTZU_F64_U64 : SInstCvtMXZ<"svcvt_f64[_u64]", "NNPd", "NPd", "Ul", "aarch64_sve_ucvtf", [IsOverloadCvt]>;
625+
626+
// svcvt_f16_f##
627+
defm SVFCVT_F16_F32 : SInstCvtMXZ<"svcvt_f16[_f32]", "OOPd", "OPd", "f", "aarch64_sve_fcvt_f16f32">;
628+
defm SVFCVT_F16_F64 : SInstCvtMXZ<"svcvt_f16[_f64]", "OOPd", "OPd", "d", "aarch64_sve_fcvt_f16f64">;
629+
630+
// svcvt_f32_f##
631+
defm SVFCVT_F32_F16 : SInstCvtMXZ<"svcvt_f32[_f16]", "MMPd", "MPd", "h", "aarch64_sve_fcvt_f32f16">;
632+
defm SVFCVT_F32_F64 : SInstCvtMXZ<"svcvt_f32[_f64]", "MMPd", "MPd", "d", "aarch64_sve_fcvt_f32f64">;
633+
634+
// svcvt_f64_f##
635+
defm SVFCVT_F64_F16 : SInstCvtMXZ<"svcvt_f64[_f16]", "NNPd", "NPd", "h", "aarch64_sve_fcvt_f64f16">;
636+
defm SVFCVT_F64_F32 : SInstCvtMXZ<"svcvt_f64[_f32]", "NNPd", "NPd", "f", "aarch64_sve_fcvt_f64f32">;
637+
638+
let ArchGuard = "defined(__ARM_FEATURE_SVE2)" in {
639+
defm SVCVTLT_F32 : SInstCvtMX<"svcvtlt_f32[_f16]", "ddPh", "dPh", "f", "aarch64_sve_fcvtlt_f32f16">;
640+
defm SVCVTLT_F64 : SInstCvtMX<"svcvtlt_f64[_f32]", "ddPh", "dPh", "d", "aarch64_sve_fcvtlt_f64f32">;
641+
642+
defm SVCVTX_F32 : SInstCvtMXZ<"svcvtx_f32[_f64]", "MMPd", "MPd", "d", "aarch64_sve_fcvtx_f32f64">;
643+
644+
def SVCVTNT_F32 : SInst<"svcvtnt_f16[_f32]", "hhPd", "f", MergeOp1, "aarch64_sve_fcvtnt_f16f32">;
645+
def SVCVTNT_F64 : SInst<"svcvtnt_f32[_f64]", "hhPd", "d", MergeOp1, "aarch64_sve_fcvtnt_f32f64">;
646+
// SVCVTNT_X : Implemented as macro by SveEmitter.cpp
647+
648+
def SVCVTXNT_F32 : SInst<"svcvtxnt_f32[_f64]", "MMPd", "d", MergeOp1, "aarch64_sve_fcvtxnt_f32f64">;
649+
// SVCVTXNT_X_F32 : Implemented as macro by SveEmitter.cpp
650+
651+
}
556652

557653
def SVCADD_M : SInst<"svcadd[_{d}]", "dPddi", "hfd", MergeOp1, "aarch64_sve_fcadd", [], [ImmCheck<3, ImmCheckComplexRot90_270>]>;
558654
def SVCMLA_M : SInst<"svcmla[_{d}]", "dPdddi", "hfd", MergeOp1, "aarch64_sve_fcmla", [], [ImmCheck<4, ImmCheckComplexRotAll90>]>;

clang/lib/CodeGen/CGBuiltin.cpp

Lines changed: 15 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -7811,6 +7811,9 @@ CodeGenFunction::getSVEOverloadTypes(SVETypeFlags TypeFlags,
78117811
if (TypeFlags.isOverloadWhileRW())
78127812
return {getSVEPredType(TypeFlags), Ops[0]->getType()};
78137813

7814+
if (TypeFlags.isOverloadCvt())
7815+
return {Ops[0]->getType(), Ops.back()->getType()};
7816+
78147817
assert(TypeFlags.isOverloadDefault() && "Unexpected value for overloads");
78157818
return {DefaultType};
78167819
}
@@ -7865,8 +7868,18 @@ Value *CodeGenFunction::EmitAArch64SVEBuiltinExpr(unsigned BuiltinID,
78657868
// Predicates must match the main datatype.
78667869
for (unsigned i = 0, e = Ops.size(); i != e; ++i)
78677870
if (auto PredTy = dyn_cast<llvm::VectorType>(Ops[i]->getType()))
7868-
if (PredTy->getElementType()->isIntegerTy(1))
7869-
Ops[i] = EmitSVEPredicateCast(Ops[i], getSVEType(TypeFlags));
7871+
if (PredTy->getElementType()->isIntegerTy(1)) {
7872+
// The special case for `isFPConvert` is because the predicates of the
7873+
// ACLE IR intrinsics for FP converts are always of type <vscale x 16 x i1>.
7874+
// This special-case will be removed in a follow-up patch that updates
7875+
// the FP conversion intrinsics with predicates that match the
7876+
// default type.
7877+
llvm::VectorType *NewPredTy =
7878+
TypeFlags.isFPConvert()
7879+
? llvm::VectorType::get(Builder.getInt1Ty(), {16, true})
7880+
: getSVEType(TypeFlags);
7881+
Ops[i] = EmitSVEPredicateCast(Ops[i], NewPredTy);
7882+
}
78707883

78717884
// Splat scalar operand to vector (intrinsics with _n infix)
78727885
if (TypeFlags.hasSplatOperand()) {

0 commit comments

Comments
 (0)