Skip to content

Commit a5e0389

Browse files
committed
[AArch64] Define ACLE FP conversion intrinsics with more specific predicate.
This patch changes the FP conversion intrinsics to take a predicate that matches the number of lanes for the vector with the widest element type as opposed to using <vscale x 16 x i1>. For example: ```<vscale x 4 x float> @llvm.aarch64.sve.fcvt.f32f16(<vscale x 4 x float>, <vscale x 4 x i1>, <vscale x 8 x half>)``` now uses <vscale x 4 x i1> instead of <vscale x 16 x i1> And similar for: ```<vscale x 4 x float> @llvm.aarch64.sve.fcvt.f32f64(<vscale x 4 x float>, <vscale x 2 x i1>, <vscale x 2 x double>)``` where the predicate now matches the wider type, so <vscale x 2 x i1>. Reviewers: efriedma, SjoerdMeijer, paulwalker-arm, rengolin Reviewed By: efriedma Tags: #clang Differential Revision: https://reviews.llvm.org/D78402
1 parent 0021644 commit a5e0389

File tree

15 files changed

+350
-271
lines changed

15 files changed

+350
-271
lines changed

clang/include/clang/Basic/TargetBuiltins.h

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -238,7 +238,6 @@ namespace clang {
238238
bool isOverloadDefault() const { return !(Flags & OverloadKindMask); }
239239
bool isOverloadWhileRW() const { return Flags & IsOverloadWhileRW; }
240240
bool isOverloadCvt() const { return Flags & IsOverloadCvt; }
241-
bool isFPConvert() const { return Flags & IsFPConvert; }
242241

243242
uint64_t getBits() const { return Flags; }
244243
bool isFlagSet(uint64_t Flag) const { return Flags & Flag; }

clang/include/clang/Basic/arm_sve.td

Lines changed: 18 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -74,10 +74,13 @@
7474
// l: int64_t
7575
// m: uint32_t
7676
// n: uint64_t
77-
//
78-
// I: Predicate Pattern (sv_pattern)
7977

80-
// l: int64_t
78+
// t: svint32_t
79+
// z: svuint32_t
80+
// g: svuint64_t
81+
// O: svfloat16_t
82+
// M: svfloat32_t
83+
// N: svfloat64_t
8184

8285
// A: pointer to int8_t
8386
// B: pointer to int16_t
@@ -173,7 +176,6 @@ def IsOverloadWhileRW : FlagType<0x00400000>; // Use {pred(default type)
173176
def IsOverloadCvt : FlagType<0x00800000>; // Use {typeof(operand0), typeof(last operand)} as overloaded types.
174177
def OverloadKindMask : FlagType<0x00E00000>; // When the masked values are all '0', the default type is used as overload type.
175178
def IsByteIndexed : FlagType<0x01000000>;
176-
def IsFPConvert : FlagType<0x02000000>;
177179

178180
// These must be kept in sync with the flags in include/clang/Basic/TargetBuiltins.h
179181
class ImmCheckType<int val> {
@@ -558,15 +560,15 @@ def SVCMLA_LANE : SInst<"svcmla_lane[_{d}]", "ddddii", "hf", MergeNone, "aarch6
558560

559561
multiclass SInstCvtMXZ<
560562
string name, string m_types, string xz_types, string types,
561-
string intrinsic, list<FlagType> flags = [IsFPConvert, IsOverloadNone]> {
563+
string intrinsic, list<FlagType> flags = [IsOverloadNone]> {
562564
def _M : SInst<name, m_types, types, MergeOp1, intrinsic, flags>;
563565
def _X : SInst<name, xz_types, types, MergeAnyExp, intrinsic, flags>;
564566
def _Z : SInst<name, xz_types, types, MergeZeroExp, intrinsic, flags>;
565567
}
566568

567569
multiclass SInstCvtMX<string name, string m_types, string xz_types,
568570
string types, string intrinsic,
569-
list<FlagType> flags = [IsFPConvert, IsOverloadNone]> {
571+
list<FlagType> flags = [IsOverloadNone]> {
570572
def _M : SInst<name, m_types, types, MergeOp1, intrinsic, flags>;
571573
def _X : SInst<name, xz_types, types, MergeAnyExp, intrinsic, flags>;
572574
}
@@ -581,7 +583,7 @@ defm SVFCVTZS_S32_F32 : SInstCvtMXZ<"svcvt_s32[_f32]", "ddPM", "dPM", "i", "aar
581583
defm SVFCVTZS_S64_F32 : SInstCvtMXZ<"svcvt_s64[_f32]", "ddPM", "dPM", "l", "aarch64_sve_fcvtzs_i64f32">;
582584

583585
// svcvt_s##_f64
584-
defm SVFCVTZS_S32_F64 : SInstCvtMXZ<"svcvt_s32[_f64]", "ddPN", "dPN", "i", "aarch64_sve_fcvtzs_i32f64">;
586+
defm SVFCVTZS_S32_F64 : SInstCvtMXZ<"svcvt_s32[_f64]", "ttPd", "tPd", "d", "aarch64_sve_fcvtzs_i32f64">;
585587
defm SVFCVTZS_S64_F64 : SInstCvtMXZ<"svcvt_s64[_f64]", "ddPN", "dPN", "l", "aarch64_sve_fcvtzs", [IsOverloadCvt]>;
586588

587589
// svcvt_u##_f16
@@ -594,7 +596,7 @@ defm SVFCVTZU_U32_F32 : SInstCvtMXZ<"svcvt_u32[_f32]", "ddPM", "dPM", "Ui", "aar
594596
defm SVFCVTZU_U64_F32 : SInstCvtMXZ<"svcvt_u64[_f32]", "ddPM", "dPM", "Ul", "aarch64_sve_fcvtzu_i64f32">;
595597

596598
// svcvt_u##_f64
597-
defm SVFCVTZU_U32_F64 : SInstCvtMXZ<"svcvt_u32[_f64]", "ddPN", "dPN", "Ui", "aarch64_sve_fcvtzu_i32f64">;
599+
defm SVFCVTZU_U32_F64 : SInstCvtMXZ<"svcvt_u32[_f64]", "zzPd", "zPd", "d", "aarch64_sve_fcvtzu_i32f64">;
598600
defm SVFCVTZU_U64_F64 : SInstCvtMXZ<"svcvt_u64[_f64]", "ddPN", "dPN", "Ul", "aarch64_sve_fcvtzu", [IsOverloadCvt]>;
599601

600602
// svcvt_f16_s##
@@ -607,7 +609,7 @@ defm SVFCVTZS_F32_S32 : SInstCvtMXZ<"svcvt_f32[_s32]", "MMPd", "MPd", "i", "aar
607609
defm SVFCVTZS_F32_S64 : SInstCvtMXZ<"svcvt_f32[_s64]", "MMPd", "MPd", "l", "aarch64_sve_scvtf_f32i64">;
608610

609611
// svcvt_f64_s##
610-
defm SVFCVTZS_F64_S32 : SInstCvtMXZ<"svcvt_f64[_s32]", "NNPd", "NPd", "i", "aarch64_sve_scvtf_f64i32">;
612+
defm SVFCVTZS_F64_S32 : SInstCvtMXZ<"svcvt_f64[_s32]", "ddPt", "dPt", "d", "aarch64_sve_scvtf_f64i32">;
611613
defm SVFCVTZS_F64_S64 : SInstCvtMXZ<"svcvt_f64[_s64]", "NNPd", "NPd", "l", "aarch64_sve_scvtf", [IsOverloadCvt]>;
612614

613615
// svcvt_f16_u##
@@ -620,32 +622,32 @@ defm SVFCVTZU_F32_U32 : SInstCvtMXZ<"svcvt_f32[_u32]", "MMPd", "MPd", "Ui", "aar
620622
defm SVFCVTZU_F32_U64 : SInstCvtMXZ<"svcvt_f32[_u64]", "MMPd", "MPd", "Ul", "aarch64_sve_ucvtf_f32i64">;
621623

622624
// svcvt_f64_u##
623-
defm SVFCVTZU_F64_U32 : SInstCvtMXZ<"svcvt_f64[_u32]", "NNPd", "NPd", "Ui", "aarch64_sve_ucvtf_f64i32">;
625+
defm SVFCVTZU_F64_U32 : SInstCvtMXZ<"svcvt_f64[_u32]", "ddPz", "dPz", "d", "aarch64_sve_ucvtf_f64i32">;
624626
defm SVFCVTZU_F64_U64 : SInstCvtMXZ<"svcvt_f64[_u64]", "NNPd", "NPd", "Ul", "aarch64_sve_ucvtf", [IsOverloadCvt]>;
625627

626628
// svcvt_f16_f##
627629
defm SVFCVT_F16_F32 : SInstCvtMXZ<"svcvt_f16[_f32]", "OOPd", "OPd", "f", "aarch64_sve_fcvt_f16f32">;
628630
defm SVFCVT_F16_F64 : SInstCvtMXZ<"svcvt_f16[_f64]", "OOPd", "OPd", "d", "aarch64_sve_fcvt_f16f64">;
629631

630632
// svcvt_f32_f##
631-
defm SVFCVT_F32_F16 : SInstCvtMXZ<"svcvt_f32[_f16]", "MMPd", "MPd", "h", "aarch64_sve_fcvt_f32f16">;
633+
defm SVFCVT_F32_F16 : SInstCvtMXZ<"svcvt_f32[_f16]", "ddPO", "dPO", "f", "aarch64_sve_fcvt_f32f16">;
632634
defm SVFCVT_F32_F64 : SInstCvtMXZ<"svcvt_f32[_f64]", "MMPd", "MPd", "d", "aarch64_sve_fcvt_f32f64">;
633635

634636
// svcvt_f64_f##
635-
defm SVFCVT_F64_F16 : SInstCvtMXZ<"svcvt_f64[_f16]", "NNPd", "NPd", "h", "aarch64_sve_fcvt_f64f16">;
636-
defm SVFCVT_F64_F32 : SInstCvtMXZ<"svcvt_f64[_f32]", "NNPd", "NPd", "f", "aarch64_sve_fcvt_f64f32">;
637+
defm SVFCVT_F64_F16 : SInstCvtMXZ<"svcvt_f64[_f16]", "ddPO", "dPO", "d", "aarch64_sve_fcvt_f64f16">;
638+
defm SVFCVT_F64_F32 : SInstCvtMXZ<"svcvt_f64[_f32]", "ddPM", "dPM", "d", "aarch64_sve_fcvt_f64f32">;
637639

638640
let ArchGuard = "defined(__ARM_FEATURE_SVE2)" in {
639641
defm SVCVTLT_F32 : SInstCvtMX<"svcvtlt_f32[_f16]", "ddPh", "dPh", "f", "aarch64_sve_fcvtlt_f32f16">;
640642
defm SVCVTLT_F64 : SInstCvtMX<"svcvtlt_f64[_f32]", "ddPh", "dPh", "d", "aarch64_sve_fcvtlt_f64f32">;
641643

642644
defm SVCVTX_F32 : SInstCvtMXZ<"svcvtx_f32[_f64]", "MMPd", "MPd", "d", "aarch64_sve_fcvtx_f32f64">;
643645

644-
def SVCVTNT_F32 : SInst<"svcvtnt_f16[_f32]", "hhPd", "f", MergeOp1, "aarch64_sve_fcvtnt_f16f32">;
645-
def SVCVTNT_F64 : SInst<"svcvtnt_f32[_f64]", "hhPd", "d", MergeOp1, "aarch64_sve_fcvtnt_f32f64">;
646+
def SVCVTNT_F32 : SInst<"svcvtnt_f16[_f32]", "hhPd", "f", MergeOp1, "aarch64_sve_fcvtnt_f16f32", [IsOverloadNone]>;
647+
def SVCVTNT_F64 : SInst<"svcvtnt_f32[_f64]", "hhPd", "d", MergeOp1, "aarch64_sve_fcvtnt_f32f64", [IsOverloadNone]>;
646648
// SVCVTNT_X : Implemented as macro by SveEmitter.cpp
647649

648-
def SVCVTXNT_F32 : SInst<"svcvtxnt_f32[_f64]", "MMPd", "d", MergeOp1, "aarch64_sve_fcvtxnt_f32f64">;
650+
def SVCVTXNT_F32 : SInst<"svcvtxnt_f32[_f64]", "MMPd", "d", MergeOp1, "aarch64_sve_fcvtxnt_f32f64", [IsOverloadNone]>;
649651
// SVCVTXNT_X_F32 : Implemented as macro by SveEmitter.cpp
650652

651653
}

clang/lib/CodeGen/CGBuiltin.cpp

Lines changed: 2 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -7868,18 +7868,8 @@ Value *CodeGenFunction::EmitAArch64SVEBuiltinExpr(unsigned BuiltinID,
78687868
// Predicates must match the main datatype.
78697869
for (unsigned i = 0, e = Ops.size(); i != e; ++i)
78707870
if (auto PredTy = dyn_cast<llvm::VectorType>(Ops[i]->getType()))
7871-
if (PredTy->getElementType()->isIntegerTy(1)) {
7872-
// The special case for `isFPConvert` is because the predicates of the
7873-
// ACLE IR intrinsics for FP converts are always of type <vscale x 16 x i1>.
7874-
// This special-case will be removed in a follow-up patch that updates
7875-
// the FP conversion intrinsics with predicates that match the
7876-
// default type.
7877-
llvm::VectorType *NewPredTy =
7878-
TypeFlags.isFPConvert()
7879-
? llvm::VectorType::get(Builder.getInt1Ty(), {16, true})
7880-
: getSVEType(TypeFlags);
7881-
Ops[i] = EmitSVEPredicateCast(Ops[i], NewPredTy);
7882-
}
7871+
if (PredTy->getElementType()->isIntegerTy(1))
7872+
Ops[i] = EmitSVEPredicateCast(Ops[i], getSVEType(TypeFlags));
78837873

78847874
// Splat scalar operand to vector (intrinsics with _n infix)
78857875
if (TypeFlags.hasSplatOperand()) {

0 commit comments

Comments
 (0)