Skip to content

Commit cdc247e

Browse files
davemgreentstellar
authored andcommitted
[AArch64] Don't generate neon integer complex numbers with +sve2. NFC (llvm#79829)
The condition for allowing integer complex number support could also allow neon fixed length complex numbers if +sve2 was specified. This tightens the condition to only allow integer complex number support for scalable vectors. We could generalize this in the future to generate SVE intrinsics for fixed-length vectors, but for the moment this opts for the simpler fix. (cherry picked from commit 9520773)
1 parent 66fed52 commit cdc247e

File tree

2 files changed

+29
-2
lines changed

2 files changed

+29
-2
lines changed

llvm/lib/Target/AArch64/AArch64ISelLowering.cpp

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -26915,7 +26915,7 @@ bool AArch64TargetLowering::isComplexDeinterleavingOperationSupported(
2691526915
return false;
2691626916

2691726917
// If the vector is scalable, SVE is enabled, implying support for complex
26918-
// numbers. Otherwirse, we need to ensure complex number support is avaialble
26918+
// numbers. Otherwise, we need to ensure complex number support is available
2691926919
if (!VTy->isScalableTy() && !Subtarget->hasComplxNum())
2692026920
return false;
2692126921

@@ -26931,7 +26931,7 @@ bool AArch64TargetLowering::isComplexDeinterleavingOperationSupported(
2693126931
!llvm::isPowerOf2_32(VTyWidth))
2693226932
return false;
2693326933

26934-
if (ScalarTy->isIntegerTy() && Subtarget->hasSVE2()) {
26934+
if (ScalarTy->isIntegerTy() && Subtarget->hasSVE2() && VTy->isScalableTy()) {
2693526935
unsigned ScalarWidth = ScalarTy->getScalarSizeInBits();
2693626936
return 8 <= ScalarWidth && ScalarWidth <= 64;
2693726937
}

llvm/test/CodeGen/AArch64/complex-deinterleaving-f16-add.ll

Lines changed: 27 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,7 @@
11
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
22
; RUN: llc < %s --mattr=+complxnum,+neon,+fullfp16 -o - | FileCheck %s
33
; RUN: llc < %s --mattr=+complxnum,+neon,+fullfp16,+sve -o - | FileCheck %s
4+
; RUN: llc < %s --mattr=+complxnum,+neon,+fullfp16,+sve2 -o - | FileCheck %s
45

56
target triple = "aarch64"
67

@@ -158,6 +159,32 @@ entry:
158159
ret <16 x half> %interleaved.vec
159160
}
160161

162+
163+
; Expected not to transform as it is integer
164+
define <16 x i16> @complex_add_v16i16(<16 x i16> %a, <16 x i16> %b) {
165+
; CHECK-LABEL: complex_add_v16i16:
166+
; CHECK: // %bb.0: // %entry
167+
; CHECK-NEXT: uzp1 v4.8h, v2.8h, v3.8h
168+
; CHECK-NEXT: uzp1 v5.8h, v0.8h, v1.8h
169+
; CHECK-NEXT: uzp2 v0.8h, v0.8h, v1.8h
170+
; CHECK-NEXT: uzp2 v1.8h, v2.8h, v3.8h
171+
; CHECK-NEXT: sub v2.8h, v4.8h, v0.8h
172+
; CHECK-NEXT: add v1.8h, v1.8h, v5.8h
173+
; CHECK-NEXT: zip1 v0.8h, v2.8h, v1.8h
174+
; CHECK-NEXT: zip2 v1.8h, v2.8h, v1.8h
175+
; CHECK-NEXT: ret
176+
entry:
177+
%a.real = shufflevector <16 x i16> %a, <16 x i16> zeroinitializer, <8 x i32> <i32 0, i32 2, i32 4, i32 6, i32 8, i32 10, i32 12, i32 14>
178+
%a.imag = shufflevector <16 x i16> %a, <16 x i16> zeroinitializer, <8 x i32> <i32 1, i32 3, i32 5, i32 7, i32 9, i32 11, i32 13, i32 15>
179+
%b.real = shufflevector <16 x i16> %b, <16 x i16> zeroinitializer, <8 x i32> <i32 0, i32 2, i32 4, i32 6, i32 8, i32 10, i32 12, i32 14>
180+
%b.imag = shufflevector <16 x i16> %b, <16 x i16> zeroinitializer, <8 x i32> <i32 1, i32 3, i32 5, i32 7, i32 9, i32 11, i32 13, i32 15>
181+
%0 = sub <8 x i16> %b.real, %a.imag
182+
%1 = add <8 x i16> %b.imag, %a.real
183+
%interleaved.vec = shufflevector <8 x i16> %0, <8 x i16> %1, <16 x i32> <i32 0, i32 8, i32 1, i32 9, i32 2, i32 10, i32 3, i32 11, i32 4, i32 12, i32 5, i32 13, i32 6, i32 14, i32 7, i32 15>
184+
ret <16 x i16> %interleaved.vec
185+
}
186+
187+
161188
declare { <2 x half>, <2 x half> } @llvm.experimental.vector.deinterleave2.v4f16(<4 x half>)
162189
declare <4 x half> @llvm.experimental.vector.interleave2.v4f16(<2 x half>, <2 x half>)
163190

0 commit comments

Comments
 (0)