Skip to content

Commit b799cc3

Browse files
authored
[RISCV] Add lowering for @llvm.experimental.vector.compress (#113291)
This intrinsic was introduced by #92289 and currently we just expand it for RISC-V. This patch adds custom lowering for this intrinsic and simply maps it to `vcompress` instruction. Fixes #113242.
1 parent 0cb80c4 commit b799cc3

File tree

5 files changed

+1429
-0
lines changed

5 files changed

+1429
-0
lines changed

llvm/lib/Target/RISCV/RISCVISelLowering.cpp

Lines changed: 40 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -930,6 +930,8 @@ RISCVTargetLowering::RISCVTargetLowering(const TargetMachine &TM,
930930
VT, Custom);
931931
}
932932
}
933+
934+
setOperationAction(ISD::VECTOR_COMPRESS, VT, Custom);
933935
}
934936

935937
for (MVT VT : VecTupleVTs) {
@@ -1051,6 +1053,8 @@ RISCVTargetLowering::RISCVTargetLowering(const TargetMachine &TM,
10511053
ISD::STRICT_FFLOOR, ISD::STRICT_FROUND,
10521054
ISD::STRICT_FROUNDEVEN, ISD::STRICT_FNEARBYINT},
10531055
VT, Custom);
1056+
1057+
setOperationAction(ISD::VECTOR_COMPRESS, VT, Custom);
10541058
};
10551059

10561060
// Sets common extload/truncstore actions on RVV floating-point vector
@@ -1306,6 +1310,8 @@ RISCVTargetLowering::RISCVTargetLowering(const TargetMachine &TM,
13061310
{ISD::CTLZ, ISD::CTLZ_ZERO_UNDEF, ISD::CTTZ_ZERO_UNDEF}, VT,
13071311
Custom);
13081312
}
1313+
1314+
setOperationAction(ISD::VECTOR_COMPRESS, VT, Custom);
13091315
}
13101316

13111317
for (MVT VT : MVT::fp_fixedlen_vector_valuetypes()) {
@@ -1434,6 +1440,8 @@ RISCVTargetLowering::RISCVTargetLowering(const TargetMachine &TM,
14341440
ISD::STRICT_FCEIL, ISD::STRICT_FFLOOR, ISD::STRICT_FROUND,
14351441
ISD::STRICT_FROUNDEVEN, ISD::STRICT_FNEARBYINT},
14361442
VT, Custom);
1443+
1444+
setOperationAction(ISD::VECTOR_COMPRESS, VT, Custom);
14371445
}
14381446

14391447
// Custom-legalize bitcasts from fixed-length vectors to scalar types.
@@ -7082,6 +7090,8 @@ SDValue RISCVTargetLowering::LowerOperation(SDValue Op,
70827090
case ISD::MSTORE:
70837091
case ISD::VP_STORE:
70847092
return lowerMaskedStore(Op, DAG);
7093+
case ISD::VECTOR_COMPRESS:
7094+
return lowerVectorCompress(Op, DAG);
70857095
case ISD::SELECT_CC: {
70867096
// This occurs because we custom legalize SETGT and SETUGT for setcc. That
70877097
// causes LegalizeDAG to think we need to custom legalize select_cc. Expand
@@ -11225,6 +11235,36 @@ SDValue RISCVTargetLowering::lowerMaskedStore(SDValue Op,
1122511235
DAG.getVTList(MVT::Other), Ops, MemVT, MMO);
1122611236
}
1122711237

11238+
SDValue RISCVTargetLowering::lowerVectorCompress(SDValue Op,
11239+
SelectionDAG &DAG) const {
11240+
SDLoc DL(Op);
11241+
SDValue Val = Op.getOperand(0);
11242+
SDValue Mask = Op.getOperand(1);
11243+
SDValue Passthru = Op.getOperand(2);
11244+
11245+
MVT VT = Val.getSimpleValueType();
11246+
MVT XLenVT = Subtarget.getXLenVT();
11247+
MVT ContainerVT = VT;
11248+
if (VT.isFixedLengthVector()) {
11249+
ContainerVT = getContainerForFixedLengthVector(VT);
11250+
MVT MaskVT = getMaskTypeFor(ContainerVT);
11251+
Val = convertToScalableVector(ContainerVT, Val, DAG, Subtarget);
11252+
Mask = convertToScalableVector(MaskVT, Mask, DAG, Subtarget);
11253+
Passthru = convertToScalableVector(ContainerVT, Passthru, DAG, Subtarget);
11254+
}
11255+
11256+
SDValue VL = getDefaultVLOps(VT, ContainerVT, DL, DAG, Subtarget).second;
11257+
SDValue Res =
11258+
DAG.getNode(ISD::INTRINSIC_WO_CHAIN, DL, ContainerVT,
11259+
DAG.getConstant(Intrinsic::riscv_vcompress, DL, XLenVT),
11260+
Passthru, Val, Mask, VL);
11261+
11262+
if (VT.isFixedLengthVector())
11263+
Res = convertFromScalableVector(VT, Res, DAG, Subtarget);
11264+
11265+
return Res;
11266+
}
11267+
1122811268
SDValue
1122911269
RISCVTargetLowering::lowerFixedLengthVectorSetccToRVV(SDValue Op,
1123011270
SelectionDAG &DAG) const {

llvm/lib/Target/RISCV/RISCVISelLowering.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -965,6 +965,7 @@ class RISCVTargetLowering : public TargetLowering {
965965
SDValue lowerABS(SDValue Op, SelectionDAG &DAG) const;
966966
SDValue lowerMaskedLoad(SDValue Op, SelectionDAG &DAG) const;
967967
SDValue lowerMaskedStore(SDValue Op, SelectionDAG &DAG) const;
968+
SDValue lowerVectorCompress(SDValue Op, SelectionDAG &DAG) const;
968969
SDValue lowerFixedLengthVectorFCOPYSIGNToRVV(SDValue Op,
969970
SelectionDAG &DAG) const;
970971
SDValue lowerMaskedGather(SDValue Op, SelectionDAG &DAG) const;
Lines changed: 255 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,255 @@
1+
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 4
2+
; RUN: llc -mtriple=riscv32 -mattr=+m,+v,+f,+d,+zfh,+zvfh -verify-machineinstrs < %s | FileCheck %s
3+
; RUN: llc -mtriple=riscv64 -mattr=+m,+v,+f,+d,+zfh,+zvfh -verify-machineinstrs < %s | FileCheck %s
4+
5+
define <1 x half> @vector_compress_v1f16(<1 x half> %v, <1 x i1> %mask) {
6+
; CHECK-LABEL: vector_compress_v1f16:
7+
; CHECK: # %bb.0:
8+
; CHECK-NEXT: vsetivli zero, 1, e16, mf4, ta, ma
9+
; CHECK-NEXT: vcompress.vm v9, v8, v0
10+
; CHECK-NEXT: vmv1r.v v8, v9
11+
; CHECK-NEXT: ret
12+
%ret = call <1 x half> @llvm.experimental.vector.compress.v1f16(<1 x half> %v, <1 x i1> %mask, <1 x half> undef)
13+
ret <1 x half> %ret
14+
}
15+
16+
define <1 x half> @vector_compress_v1f16_passthru(<1 x half> %passthru, <1 x half> %v, <1 x i1> %mask) {
17+
; CHECK-LABEL: vector_compress_v1f16_passthru:
18+
; CHECK: # %bb.0:
19+
; CHECK-NEXT: vsetivli zero, 1, e16, mf4, tu, ma
20+
; CHECK-NEXT: vcompress.vm v8, v9, v0
21+
; CHECK-NEXT: ret
22+
%ret = call <1 x half> @llvm.experimental.vector.compress.v1f16(<1 x half> %v, <1 x i1> %mask, <1 x half> %passthru)
23+
ret <1 x half> %ret
24+
}
25+
26+
define <2 x half> @vector_compress_v2f16(<2 x half> %v, <2 x i1> %mask) {
27+
; CHECK-LABEL: vector_compress_v2f16:
28+
; CHECK: # %bb.0:
29+
; CHECK-NEXT: vsetivli zero, 2, e16, mf4, ta, ma
30+
; CHECK-NEXT: vcompress.vm v9, v8, v0
31+
; CHECK-NEXT: vmv1r.v v8, v9
32+
; CHECK-NEXT: ret
33+
%ret = call <2 x half> @llvm.experimental.vector.compress.v2f16(<2 x half> %v, <2 x i1> %mask, <2 x half> undef)
34+
ret <2 x half> %ret
35+
}
36+
37+
define <2 x half> @vector_compress_v2f16_passthru(<2 x half> %passthru, <2 x half> %v, <2 x i1> %mask) {
38+
; CHECK-LABEL: vector_compress_v2f16_passthru:
39+
; CHECK: # %bb.0:
40+
; CHECK-NEXT: vsetivli zero, 2, e16, mf4, tu, ma
41+
; CHECK-NEXT: vcompress.vm v8, v9, v0
42+
; CHECK-NEXT: ret
43+
%ret = call <2 x half> @llvm.experimental.vector.compress.v2f16(<2 x half> %v, <2 x i1> %mask, <2 x half> %passthru)
44+
ret <2 x half> %ret
45+
}
46+
47+
define <4 x half> @vector_compress_v4f16(<4 x half> %v, <4 x i1> %mask) {
48+
; CHECK-LABEL: vector_compress_v4f16:
49+
; CHECK: # %bb.0:
50+
; CHECK-NEXT: vsetivli zero, 4, e16, mf2, ta, ma
51+
; CHECK-NEXT: vcompress.vm v9, v8, v0
52+
; CHECK-NEXT: vmv1r.v v8, v9
53+
; CHECK-NEXT: ret
54+
%ret = call <4 x half> @llvm.experimental.vector.compress.v4f16(<4 x half> %v, <4 x i1> %mask, <4 x half> undef)
55+
ret <4 x half> %ret
56+
}
57+
58+
define <4 x half> @vector_compress_v4f16_passthru(<4 x half> %passthru, <4 x half> %v, <4 x i1> %mask) {
59+
; CHECK-LABEL: vector_compress_v4f16_passthru:
60+
; CHECK: # %bb.0:
61+
; CHECK-NEXT: vsetivli zero, 4, e16, mf2, tu, ma
62+
; CHECK-NEXT: vcompress.vm v8, v9, v0
63+
; CHECK-NEXT: ret
64+
%ret = call <4 x half> @llvm.experimental.vector.compress.v4f16(<4 x half> %v, <4 x i1> %mask, <4 x half> %passthru)
65+
ret <4 x half> %ret
66+
}
67+
68+
define <8 x half> @vector_compress_v8f16(<8 x half> %v, <8 x i1> %mask) {
69+
; CHECK-LABEL: vector_compress_v8f16:
70+
; CHECK: # %bb.0:
71+
; CHECK-NEXT: vsetivli zero, 8, e16, m1, ta, ma
72+
; CHECK-NEXT: vcompress.vm v9, v8, v0
73+
; CHECK-NEXT: vmv.v.v v8, v9
74+
; CHECK-NEXT: ret
75+
%ret = call <8 x half> @llvm.experimental.vector.compress.v8f16(<8 x half> %v, <8 x i1> %mask, <8 x half> undef)
76+
ret <8 x half> %ret
77+
}
78+
79+
define <8 x half> @vector_compress_v8f16_passthru(<8 x half> %passthru, <8 x half> %v, <8 x i1> %mask) {
80+
; CHECK-LABEL: vector_compress_v8f16_passthru:
81+
; CHECK: # %bb.0:
82+
; CHECK-NEXT: vsetivli zero, 8, e16, m1, tu, ma
83+
; CHECK-NEXT: vcompress.vm v8, v9, v0
84+
; CHECK-NEXT: ret
85+
%ret = call <8 x half> @llvm.experimental.vector.compress.v8f16(<8 x half> %v, <8 x i1> %mask, <8 x half> %passthru)
86+
ret <8 x half> %ret
87+
}
88+
89+
define <1 x float> @vector_compress_v1f32(<1 x float> %v, <1 x i1> %mask) {
90+
; CHECK-LABEL: vector_compress_v1f32:
91+
; CHECK: # %bb.0:
92+
; CHECK-NEXT: vsetivli zero, 1, e32, mf2, ta, ma
93+
; CHECK-NEXT: vcompress.vm v9, v8, v0
94+
; CHECK-NEXT: vmv1r.v v8, v9
95+
; CHECK-NEXT: ret
96+
%ret = call <1 x float> @llvm.experimental.vector.compress.v1f32(<1 x float> %v, <1 x i1> %mask, <1 x float> undef)
97+
ret <1 x float> %ret
98+
}
99+
100+
define <1 x float> @vector_compress_v1f32_passthru(<1 x float> %passthru, <1 x float> %v, <1 x i1> %mask) {
101+
; CHECK-LABEL: vector_compress_v1f32_passthru:
102+
; CHECK: # %bb.0:
103+
; CHECK-NEXT: vsetivli zero, 1, e32, mf2, tu, ma
104+
; CHECK-NEXT: vcompress.vm v8, v9, v0
105+
; CHECK-NEXT: ret
106+
%ret = call <1 x float> @llvm.experimental.vector.compress.v1f32(<1 x float> %v, <1 x i1> %mask, <1 x float> %passthru)
107+
ret <1 x float> %ret
108+
}
109+
110+
define <2 x float> @vector_compress_v2f32(<2 x float> %v, <2 x i1> %mask) {
111+
; CHECK-LABEL: vector_compress_v2f32:
112+
; CHECK: # %bb.0:
113+
; CHECK-NEXT: vsetivli zero, 2, e32, mf2, ta, ma
114+
; CHECK-NEXT: vcompress.vm v9, v8, v0
115+
; CHECK-NEXT: vmv1r.v v8, v9
116+
; CHECK-NEXT: ret
117+
%ret = call <2 x float> @llvm.experimental.vector.compress.v2f32(<2 x float> %v, <2 x i1> %mask, <2 x float> undef)
118+
ret <2 x float> %ret
119+
}
120+
121+
define <2 x float> @vector_compress_v2f32_passthru(<2 x float> %passthru, <2 x float> %v, <2 x i1> %mask) {
122+
; CHECK-LABEL: vector_compress_v2f32_passthru:
123+
; CHECK: # %bb.0:
124+
; CHECK-NEXT: vsetivli zero, 2, e32, mf2, tu, ma
125+
; CHECK-NEXT: vcompress.vm v8, v9, v0
126+
; CHECK-NEXT: ret
127+
%ret = call <2 x float> @llvm.experimental.vector.compress.v2f32(<2 x float> %v, <2 x i1> %mask, <2 x float> %passthru)
128+
ret <2 x float> %ret
129+
}
130+
131+
define <4 x float> @vector_compress_v4f32(<4 x float> %v, <4 x i1> %mask) {
132+
; CHECK-LABEL: vector_compress_v4f32:
133+
; CHECK: # %bb.0:
134+
; CHECK-NEXT: vsetivli zero, 4, e32, m1, ta, ma
135+
; CHECK-NEXT: vcompress.vm v9, v8, v0
136+
; CHECK-NEXT: vmv.v.v v8, v9
137+
; CHECK-NEXT: ret
138+
%ret = call <4 x float> @llvm.experimental.vector.compress.v4f32(<4 x float> %v, <4 x i1> %mask, <4 x float> undef)
139+
ret <4 x float> %ret
140+
}
141+
142+
define <4 x float> @vector_compress_v4f32_passthru(<4 x float> %passthru, <4 x float> %v, <4 x i1> %mask) {
143+
; CHECK-LABEL: vector_compress_v4f32_passthru:
144+
; CHECK: # %bb.0:
145+
; CHECK-NEXT: vsetivli zero, 4, e32, m1, tu, ma
146+
; CHECK-NEXT: vcompress.vm v8, v9, v0
147+
; CHECK-NEXT: ret
148+
%ret = call <4 x float> @llvm.experimental.vector.compress.v4f32(<4 x float> %v, <4 x i1> %mask, <4 x float> %passthru)
149+
ret <4 x float> %ret
150+
}
151+
152+
define <8 x float> @vector_compress_v8f32(<8 x float> %v, <8 x i1> %mask) {
153+
; CHECK-LABEL: vector_compress_v8f32:
154+
; CHECK: # %bb.0:
155+
; CHECK-NEXT: vsetivli zero, 8, e32, m2, ta, ma
156+
; CHECK-NEXT: vcompress.vm v10, v8, v0
157+
; CHECK-NEXT: vmv.v.v v8, v10
158+
; CHECK-NEXT: ret
159+
%ret = call <8 x float> @llvm.experimental.vector.compress.v8f32(<8 x float> %v, <8 x i1> %mask, <8 x float> undef)
160+
ret <8 x float> %ret
161+
}
162+
163+
define <8 x float> @vector_compress_v8f32_passthru(<8 x float> %passthru, <8 x float> %v, <8 x i1> %mask) {
164+
; CHECK-LABEL: vector_compress_v8f32_passthru:
165+
; CHECK: # %bb.0:
166+
; CHECK-NEXT: vsetivli zero, 8, e32, m2, tu, ma
167+
; CHECK-NEXT: vcompress.vm v8, v10, v0
168+
; CHECK-NEXT: ret
169+
%ret = call <8 x float> @llvm.experimental.vector.compress.v8f32(<8 x float> %v, <8 x i1> %mask, <8 x float> %passthru)
170+
ret <8 x float> %ret
171+
}
172+
173+
define <1 x double> @vector_compress_v1f64(<1 x double> %v, <1 x i1> %mask) {
174+
; CHECK-LABEL: vector_compress_v1f64:
175+
; CHECK: # %bb.0:
176+
; CHECK-NEXT: vsetivli zero, 1, e64, m1, ta, ma
177+
; CHECK-NEXT: vcompress.vm v9, v8, v0
178+
; CHECK-NEXT: vmv.v.v v8, v9
179+
; CHECK-NEXT: ret
180+
%ret = call <1 x double> @llvm.experimental.vector.compress.v1f64(<1 x double> %v, <1 x i1> %mask, <1 x double> undef)
181+
ret <1 x double> %ret
182+
}
183+
184+
define <1 x double> @vector_compress_v1f64_passthru(<1 x double> %passthru, <1 x double> %v, <1 x i1> %mask) {
185+
; CHECK-LABEL: vector_compress_v1f64_passthru:
186+
; CHECK: # %bb.0:
187+
; CHECK-NEXT: vsetivli zero, 1, e64, m1, tu, ma
188+
; CHECK-NEXT: vcompress.vm v8, v9, v0
189+
; CHECK-NEXT: ret
190+
%ret = call <1 x double> @llvm.experimental.vector.compress.v1f64(<1 x double> %v, <1 x i1> %mask, <1 x double> %passthru)
191+
ret <1 x double> %ret
192+
}
193+
194+
define <2 x double> @vector_compress_v2f64(<2 x double> %v, <2 x i1> %mask) {
195+
; CHECK-LABEL: vector_compress_v2f64:
196+
; CHECK: # %bb.0:
197+
; CHECK-NEXT: vsetivli zero, 2, e64, m1, ta, ma
198+
; CHECK-NEXT: vcompress.vm v9, v8, v0
199+
; CHECK-NEXT: vmv.v.v v8, v9
200+
; CHECK-NEXT: ret
201+
%ret = call <2 x double> @llvm.experimental.vector.compress.v2f64(<2 x double> %v, <2 x i1> %mask, <2 x double> undef)
202+
ret <2 x double> %ret
203+
}
204+
205+
define <2 x double> @vector_compress_v2f64_passthru(<2 x double> %passthru, <2 x double> %v, <2 x i1> %mask) {
206+
; CHECK-LABEL: vector_compress_v2f64_passthru:
207+
; CHECK: # %bb.0:
208+
; CHECK-NEXT: vsetivli zero, 2, e64, m1, tu, ma
209+
; CHECK-NEXT: vcompress.vm v8, v9, v0
210+
; CHECK-NEXT: ret
211+
%ret = call <2 x double> @llvm.experimental.vector.compress.v2f64(<2 x double> %v, <2 x i1> %mask, <2 x double> %passthru)
212+
ret <2 x double> %ret
213+
}
214+
215+
define <4 x double> @vector_compress_v4f64(<4 x double> %v, <4 x i1> %mask) {
216+
; CHECK-LABEL: vector_compress_v4f64:
217+
; CHECK: # %bb.0:
218+
; CHECK-NEXT: vsetivli zero, 4, e64, m2, ta, ma
219+
; CHECK-NEXT: vcompress.vm v10, v8, v0
220+
; CHECK-NEXT: vmv.v.v v8, v10
221+
; CHECK-NEXT: ret
222+
%ret = call <4 x double> @llvm.experimental.vector.compress.v4f64(<4 x double> %v, <4 x i1> %mask, <4 x double> undef)
223+
ret <4 x double> %ret
224+
}
225+
226+
define <4 x double> @vector_compress_v4f64_passthru(<4 x double> %passthru, <4 x double> %v, <4 x i1> %mask) {
227+
; CHECK-LABEL: vector_compress_v4f64_passthru:
228+
; CHECK: # %bb.0:
229+
; CHECK-NEXT: vsetivli zero, 4, e64, m2, tu, ma
230+
; CHECK-NEXT: vcompress.vm v8, v10, v0
231+
; CHECK-NEXT: ret
232+
%ret = call <4 x double> @llvm.experimental.vector.compress.v4f64(<4 x double> %v, <4 x i1> %mask, <4 x double> %passthru)
233+
ret <4 x double> %ret
234+
}
235+
236+
define <8 x double> @vector_compress_v8f64(<8 x double> %v, <8 x i1> %mask) {
237+
; CHECK-LABEL: vector_compress_v8f64:
238+
; CHECK: # %bb.0:
239+
; CHECK-NEXT: vsetivli zero, 8, e64, m4, ta, ma
240+
; CHECK-NEXT: vcompress.vm v12, v8, v0
241+
; CHECK-NEXT: vmv.v.v v8, v12
242+
; CHECK-NEXT: ret
243+
%ret = call <8 x double> @llvm.experimental.vector.compress.v8f64(<8 x double> %v, <8 x i1> %mask, <8 x double> undef)
244+
ret <8 x double> %ret
245+
}
246+
247+
define <8 x double> @vector_compress_v8f64_passthru(<8 x double> %passthru, <8 x double> %v, <8 x i1> %mask) {
248+
; CHECK-LABEL: vector_compress_v8f64_passthru:
249+
; CHECK: # %bb.0:
250+
; CHECK-NEXT: vsetivli zero, 8, e64, m4, tu, ma
251+
; CHECK-NEXT: vcompress.vm v8, v12, v0
252+
; CHECK-NEXT: ret
253+
%ret = call <8 x double> @llvm.experimental.vector.compress.v8f64(<8 x double> %v, <8 x i1> %mask, <8 x double> %passthru)
254+
ret <8 x double> %ret
255+
}

0 commit comments

Comments
 (0)