diff --git a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp index 98c25bc93a8a2..28cc136d76ffc 100644 --- a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp +++ b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp @@ -19462,6 +19462,11 @@ void RISCVTargetLowering::computeKnownBitsForTargetNode(const SDValue Op, Known = Known.intersectWith(Known2); break; } + case RISCVISD::VCPOP_VL: { + KnownBits Known2 = DAG.computeKnownBits(Op.getOperand(2), Depth + 1); + Known.Zero.setBitsFrom(Known2.countMaxActiveBits()); + break; + } case RISCVISD::CZERO_EQZ: case RISCVISD::CZERO_NEZ: Known = DAG.computeKnownBits(Op.getOperand(0), Depth + 1); diff --git a/llvm/test/CodeGen/RISCV/rvv/vcpop-compute-known-bits.ll b/llvm/test/CodeGen/RISCV/rvv/vcpop-compute-known-bits.ll new file mode 100644 index 0000000000000..7c569da9291db --- /dev/null +++ b/llvm/test/CodeGen/RISCV/rvv/vcpop-compute-known-bits.ll @@ -0,0 +1,18 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5 +; RUN: llc < %s -mtriple=riscv32 -mattr=+v | FileCheck %s --check-prefixes=CHECK,RV32 +; RUN: llc < %s -mtriple=riscv64 -mattr=+v | FileCheck %s --check-prefixes=CHECK,RV64 + +define i32 @test(<8 x i1> %mask) { +; CHECK-LABEL: test: +; CHECK: # %bb.0: +; CHECK-NEXT: vsetivli zero, 8, e8, mf2, ta, ma +; CHECK-NEXT: vcpop.m a0, v0 +; CHECK-NEXT: ret + %1 = bitcast <8 x i1> %mask to i8 + %2 = call range(i8 0, 9) i8 @llvm.ctpop.i8(i8 %1) + %3 = zext nneg i8 %2 to i32 + ret i32 %3 +} +;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line: +; RV32: {{.*}} +; RV64: {{.*}} diff --git a/llvm/test/CodeGen/RISCV/rvv/vcpop-shl-zext-opt.ll b/llvm/test/CodeGen/RISCV/rvv/vcpop-shl-zext-opt.ll new file mode 100644 index 0000000000000..16c4ade7fa9cb --- /dev/null +++ b/llvm/test/CodeGen/RISCV/rvv/vcpop-shl-zext-opt.ll @@ -0,0 +1,198 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5 +; RUN: llc < %s -mtriple=riscv32 -mattr=+v | FileCheck %s --check-prefixes=CHECK,RV32 +; RUN: llc < %s -mtriple=riscv64 -mattr=+v | FileCheck %s --check-prefixes=CHECK,RV64 + +define dso_local void @test_store1(ptr nocapture noundef writeonly %dst, ptr nocapture noundef readonly %src, i32 noundef signext %c, i32 noundef signext %n) { +; RV32-LABEL: test_store1: +; RV32: # %bb.0: # %entry +; RV32-NEXT: blez a3, .LBB0_6 +; RV32-NEXT: # %bb.1: # %for.body.preheader +; RV32-NEXT: li a4, 8 +; RV32-NEXT: bltu a3, a4, .LBB0_7 +; RV32-NEXT: # %bb.2: # %for.body.preheader +; RV32-NEXT: sub a4, a0, a1 +; RV32-NEXT: sltu a5, a0, a1 +; RV32-NEXT: neg a5, a5 +; RV32-NEXT: sltiu a4, a4, 32 +; RV32-NEXT: seqz a5, a5 +; RV32-NEXT: and a4, a5, a4 +; RV32-NEXT: bnez a4, .LBB0_7 +; RV32-NEXT: # %bb.3: # %vector.ph +; RV32-NEXT: lui a5, 524288 +; RV32-NEXT: addi a5, a5, -8 +; RV32-NEXT: and a5, a3, a5 +; RV32-NEXT: li a7, 0 +; RV32-NEXT: li a6, 0 +; RV32-NEXT: .LBB0_4: # %vector.body +; RV32-NEXT: # =>This Inner Loop Header: Depth=1 +; RV32-NEXT: slli t0, a7, 2 +; RV32-NEXT: addi t1, a7, 8 +; RV32-NEXT: add t0, a1, t0 +; RV32-NEXT: vsetivli zero, 8, e32, m2, ta, ma +; RV32-NEXT: vle32.v v8, (t0) +; RV32-NEXT: sltu a7, t1, a7 +; RV32-NEXT: xor t0, t1, a5 +; RV32-NEXT: add a6, a6, a7 +; RV32-NEXT: vmslt.vx v10, v8, a2 +; RV32-NEXT: vcompress.vm v12, v8, v10 +; RV32-NEXT: vcpop.m a7, v10 +; RV32-NEXT: vsetvli zero, a7, e32, m2, ta, ma +; RV32-NEXT: vse32.v v12, (a0) +; RV32-NEXT: slli a7, a7, 2 +; RV32-NEXT: or t0, t0, a6 +; RV32-NEXT: add a0, a0, a7 +; RV32-NEXT: mv a7, t1 +; RV32-NEXT: bnez t0, .LBB0_4 +; RV32-NEXT: # %bb.5: # %middle.block +; RV32-NEXT: bne a5, a3, .LBB0_9 +; RV32-NEXT: .LBB0_6: # %for.cond.cleanup +; RV32-NEXT: ret +; RV32-NEXT: .LBB0_7: +; RV32-NEXT: li a5, 0 +; RV32-NEXT: li a4, 0 +; RV32-NEXT: j .LBB0_9 +; RV32-NEXT: .LBB0_8: # %for.inc +; RV32-NEXT: # in Loop: Header=BB0_9 Depth=1 +; RV32-NEXT: addi a5, a5, 1 +; RV32-NEXT: seqz a6, a5 +; RV32-NEXT: add a4, a4, a6 +; RV32-NEXT: xor a6, a5, a3 +; RV32-NEXT: or a6, a6, a4 +; RV32-NEXT: beqz a6, .LBB0_6 +; RV32-NEXT: .LBB0_9: # %for.body +; RV32-NEXT: # =>This Inner Loop Header: Depth=1 +; RV32-NEXT: slli a6, a5, 2 +; RV32-NEXT: add a6, a1, a6 +; RV32-NEXT: lw a6, 0(a6) +; RV32-NEXT: bge a6, a2, .LBB0_8 +; RV32-NEXT: # %bb.10: # %if.then +; RV32-NEXT: # in Loop: Header=BB0_9 Depth=1 +; RV32-NEXT: addi a7, a0, 4 +; RV32-NEXT: sw a6, 0(a0) +; RV32-NEXT: mv a0, a7 +; RV32-NEXT: j .LBB0_8 +; +; RV64-LABEL: test_store1: +; RV64: # %bb.0: # %entry +; RV64-NEXT: blez a3, .LBB0_6 +; RV64-NEXT: # %bb.1: # %for.body.preheader +; RV64-NEXT: li a5, 8 +; RV64-NEXT: li a4, 0 +; RV64-NEXT: bltu a3, a5, .LBB0_7 +; RV64-NEXT: # %bb.2: # %for.body.preheader +; RV64-NEXT: sub a5, a0, a1 +; RV64-NEXT: li a6, 31 +; RV64-NEXT: bgeu a6, a5, .LBB0_7 +; RV64-NEXT: # %bb.3: # %vector.ph +; RV64-NEXT: lui a4, 524288 +; RV64-NEXT: addiw a4, a4, -8 +; RV64-NEXT: and a4, a3, a4 +; RV64-NEXT: slli a5, a4, 2 +; RV64-NEXT: add a5, a5, a1 +; RV64-NEXT: mv a6, a1 +; RV64-NEXT: .LBB0_4: # %vector.body +; RV64-NEXT: # =>This Inner Loop Header: Depth=1 +; RV64-NEXT: vsetivli zero, 8, e32, m2, ta, ma +; RV64-NEXT: vle32.v v8, (a6) +; RV64-NEXT: addi a6, a6, 32 +; RV64-NEXT: vmslt.vx v10, v8, a2 +; RV64-NEXT: vcompress.vm v12, v8, v10 +; RV64-NEXT: vcpop.m a7, v10 +; RV64-NEXT: vsetvli zero, a7, e32, m2, ta, ma +; RV64-NEXT: vse32.v v12, (a0) +; RV64-NEXT: slli a7, a7, 2 +; RV64-NEXT: add a0, a0, a7 +; RV64-NEXT: bne a6, a5, .LBB0_4 +; RV64-NEXT: # %bb.5: # %middle.block +; RV64-NEXT: bne a4, a3, .LBB0_7 +; RV64-NEXT: .LBB0_6: # %for.cond.cleanup +; RV64-NEXT: ret +; RV64-NEXT: .LBB0_7: # %for.body.preheader13 +; RV64-NEXT: slli a4, a4, 2 +; RV64-NEXT: slli a5, a3, 2 +; RV64-NEXT: add a3, a1, a4 +; RV64-NEXT: add a1, a1, a5 +; RV64-NEXT: j .LBB0_9 +; RV64-NEXT: .LBB0_8: # %for.inc +; RV64-NEXT: # in Loop: Header=BB0_9 Depth=1 +; RV64-NEXT: addi a3, a3, 4 +; RV64-NEXT: beq a3, a1, .LBB0_6 +; RV64-NEXT: .LBB0_9: # %for.body +; RV64-NEXT: # =>This Inner Loop Header: Depth=1 +; RV64-NEXT: lw a4, 0(a3) +; RV64-NEXT: bge a4, a2, .LBB0_8 +; RV64-NEXT: # %bb.10: # %if.then +; RV64-NEXT: # in Loop: Header=BB0_9 Depth=1 +; RV64-NEXT: addi a5, a0, 4 +; RV64-NEXT: sw a4, 0(a0) +; RV64-NEXT: mv a0, a5 +; RV64-NEXT: j .LBB0_8 +entry: + %cmp8 = icmp sgt i32 %n, 0 + br i1 %cmp8, label %for.body.preheader, label %for.cond.cleanup + +for.body.preheader: ; preds = %entry + %dst11 = ptrtoint ptr %dst to i64 + %src12 = ptrtoint ptr %src to i64 + %wide.trip.count = zext nneg i32 %n to i64 + %min.iters.check = icmp ult i32 %n, 8 + %0 = sub i64 %dst11, %src12 + %diff.check = icmp ult i64 %0, 32 + %or.cond = or i1 %min.iters.check, %diff.check + br i1 %or.cond, label %for.body.preheader13, label %vector.ph + +for.body.preheader13: ; preds = %middle.block, %for.body.preheader + %indvars.iv.ph = phi i64 [ 0, %for.body.preheader ], [ %n.vec, %middle.block ] + %dst.addr.09.ph = phi ptr [ %dst, %for.body.preheader ], [ %monotonic.add, %middle.block ] + br label %for.body + +vector.ph: ; preds = %for.body.preheader + %n.vec = and i64 %wide.trip.count, 2147483640 + %broadcast.splatinsert = insertelement <8 x i32> poison, i32 %c, i64 0 + %broadcast.splat = shufflevector <8 x i32> %broadcast.splatinsert, <8 x i32> poison, <8 x i32> zeroinitializer + br label %vector.body + +vector.body: ; preds = %vector.body, %vector.ph + %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ] + %monotonic.iv = phi ptr [ %dst, %vector.ph ], [ %monotonic.add, %vector.body ] + %1 = getelementptr inbounds i32, ptr %src, i64 %index + %wide.load = load <8 x i32>, ptr %1, align 4 + %2 = icmp slt <8 x i32> %wide.load, %broadcast.splat + tail call void @llvm.masked.compressstore.v8i32(<8 x i32> %wide.load, ptr align 4 %monotonic.iv, <8 x i1> %2) + %3 = bitcast <8 x i1> %2 to i8 + %4 = tail call range(i8 0, 9) i8 @llvm.ctpop.i8(i8 %3) + %5 = shl nuw nsw i8 %4, 2 + %6 = zext nneg i8 %5 to i64 + %monotonic.add = getelementptr inbounds i8, ptr %monotonic.iv, i64 %6 + %index.next = add nuw i64 %index, 8 + %7 = icmp eq i64 %index.next, %n.vec + br i1 %7, label %middle.block, label %vector.body + +middle.block: ; preds = %vector.body + %cmp.n = icmp eq i64 %n.vec, %wide.trip.count + br i1 %cmp.n, label %for.cond.cleanup, label %for.body.preheader13 + +for.cond.cleanup: ; preds = %for.inc, %middle.block, %entry + ret void + +for.body: ; preds = %for.body.preheader13, %for.inc + %indvars.iv = phi i64 [ %indvars.iv.next, %for.inc ], [ %indvars.iv.ph, %for.body.preheader13 ] + %dst.addr.09 = phi ptr [ %dst.addr.1, %for.inc ], [ %dst.addr.09.ph, %for.body.preheader13 ] + %arrayidx = getelementptr inbounds i32, ptr %src, i64 %indvars.iv + %8 = load i32, ptr %arrayidx, align 4 + %cmp1 = icmp slt i32 %8, %c + br i1 %cmp1, label %if.then, label %for.inc + +if.then: ; preds = %for.body + %incdec.ptr = getelementptr inbounds i8, ptr %dst.addr.09, i64 4 + store i32 %8, ptr %dst.addr.09, align 4 + br label %for.inc + +for.inc: ; preds = %for.body, %if.then + %dst.addr.1 = phi ptr [ %incdec.ptr, %if.then ], [ %dst.addr.09, %for.body ] + %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1 + %exitcond.not = icmp eq i64 %indvars.iv.next, %wide.trip.count + br i1 %exitcond.not, label %for.cond.cleanup, label %for.body +} +;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line: +; CHECK: {{.*}}