Skip to content

Commit 51d5b65

Browse files
authored
[RISCV] Handle scalable ops with < EEW / 2 narrow types in combineBinOp_VLToVWBinOp_VL (#84158)
We can remove the restriction that the narrow type needs to be exactly EEW / 2 for scalable ISD::{ADD,SUB,MUL} nodes. This allows us to perform the combine even if we can't fully fold the extend into the widening op. VP intrinsics already do this, since they are lowered to _VL nodes which don't have this restriction. The "exactly EEW / 2" narrow type restriction prevented us from emitting V{S,Z}EXT_VL nodes with i1 element types which crash when we try to select them, since no other legal type is double the size of i1, see the test case added in this PR `i1_zext`. So to preserve this, this adds a check for i1 narrow types instead.
1 parent 06d2452 commit 51d5b65

File tree

5 files changed

+631
-602
lines changed

5 files changed

+631
-602
lines changed

llvm/lib/Target/RISCV/RISCVISelLowering.cpp

Lines changed: 2 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -13715,12 +13715,8 @@ struct NodeExtensionHelper {
1371513715

1371613716
SDValue NarrowElt = OrigOperand.getOperand(0);
1371713717
MVT NarrowVT = NarrowElt.getSimpleValueType();
13718-
13719-
unsigned ScalarBits = VT.getScalarSizeInBits();
13720-
unsigned NarrowScalarBits = NarrowVT.getScalarSizeInBits();
13721-
13722-
// Ensure the extension's semantic is equivalent to rvv vzext or vsext.
13723-
if (ScalarBits != NarrowScalarBits * 2)
13718+
// i1 types are legal but we can't select V{S,Z}EXT_VLs with them.
13719+
if (NarrowVT.getVectorElementType() == MVT::i1)
1372413720
break;
1372513721

1372613722
SupportsZExt = Opc == ISD::ZERO_EXTEND;

llvm/test/CodeGen/RISCV/rvv/vscale-vw-web-simplification.ll

Lines changed: 20 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -283,18 +283,19 @@ define <vscale x 2 x i32> @vwop_vscale_sext_i8i32_multiple_users(ptr %x, ptr %y,
283283
;
284284
; FOLDING-LABEL: vwop_vscale_sext_i8i32_multiple_users:
285285
; FOLDING: # %bb.0:
286-
; FOLDING-NEXT: vsetvli a3, zero, e32, m1, ta, ma
286+
; FOLDING-NEXT: vsetvli a3, zero, e16, mf2, ta, ma
287287
; FOLDING-NEXT: vle8.v v8, (a0)
288288
; FOLDING-NEXT: vle8.v v9, (a1)
289289
; FOLDING-NEXT: vle8.v v10, (a2)
290-
; FOLDING-NEXT: vsext.vf4 v11, v8
291-
; FOLDING-NEXT: vsext.vf4 v8, v9
292-
; FOLDING-NEXT: vsext.vf4 v9, v10
293-
; FOLDING-NEXT: vmul.vv v8, v11, v8
294-
; FOLDING-NEXT: vadd.vv v10, v11, v9
295-
; FOLDING-NEXT: vsub.vv v9, v11, v9
296-
; FOLDING-NEXT: vor.vv v8, v8, v10
297-
; FOLDING-NEXT: vor.vv v8, v8, v9
290+
; FOLDING-NEXT: vsext.vf2 v11, v8
291+
; FOLDING-NEXT: vsext.vf2 v8, v9
292+
; FOLDING-NEXT: vsext.vf2 v9, v10
293+
; FOLDING-NEXT: vwmul.vv v10, v11, v8
294+
; FOLDING-NEXT: vwadd.vv v8, v11, v9
295+
; FOLDING-NEXT: vwsub.vv v12, v11, v9
296+
; FOLDING-NEXT: vsetvli zero, zero, e32, m1, ta, ma
297+
; FOLDING-NEXT: vor.vv v8, v10, v8
298+
; FOLDING-NEXT: vor.vv v8, v8, v12
298299
; FOLDING-NEXT: ret
299300
%a = load <vscale x 2 x i8>, ptr %x
300301
%b = load <vscale x 2 x i8>, ptr %y
@@ -563,18 +564,19 @@ define <vscale x 2 x i32> @vwop_vscale_zext_i8i32_multiple_users(ptr %x, ptr %y,
563564
;
564565
; FOLDING-LABEL: vwop_vscale_zext_i8i32_multiple_users:
565566
; FOLDING: # %bb.0:
566-
; FOLDING-NEXT: vsetvli a3, zero, e32, m1, ta, ma
567+
; FOLDING-NEXT: vsetvli a3, zero, e16, mf2, ta, ma
567568
; FOLDING-NEXT: vle8.v v8, (a0)
568569
; FOLDING-NEXT: vle8.v v9, (a1)
569570
; FOLDING-NEXT: vle8.v v10, (a2)
570-
; FOLDING-NEXT: vzext.vf4 v11, v8
571-
; FOLDING-NEXT: vzext.vf4 v8, v9
572-
; FOLDING-NEXT: vzext.vf4 v9, v10
573-
; FOLDING-NEXT: vmul.vv v8, v11, v8
574-
; FOLDING-NEXT: vadd.vv v10, v11, v9
575-
; FOLDING-NEXT: vsub.vv v9, v11, v9
576-
; FOLDING-NEXT: vor.vv v8, v8, v10
577-
; FOLDING-NEXT: vor.vv v8, v8, v9
571+
; FOLDING-NEXT: vzext.vf2 v11, v8
572+
; FOLDING-NEXT: vzext.vf2 v8, v9
573+
; FOLDING-NEXT: vzext.vf2 v9, v10
574+
; FOLDING-NEXT: vwmulu.vv v10, v11, v8
575+
; FOLDING-NEXT: vwaddu.vv v8, v11, v9
576+
; FOLDING-NEXT: vwsubu.vv v12, v11, v9
577+
; FOLDING-NEXT: vsetvli zero, zero, e32, m1, ta, ma
578+
; FOLDING-NEXT: vor.vv v8, v10, v8
579+
; FOLDING-NEXT: vor.vv v8, v8, v12
578580
; FOLDING-NEXT: ret
579581
%a = load <vscale x 2 x i8>, ptr %x
580582
%b = load <vscale x 2 x i8>, ptr %y

0 commit comments

Comments
 (0)