diff --git a/llvm/lib/Target/RISCV/RISCVInsertVSETVLI.cpp b/llvm/lib/Target/RISCV/RISCVInsertVSETVLI.cpp index e7c13f180f64e..151a7821f835d 100644 --- a/llvm/lib/Target/RISCV/RISCVInsertVSETVLI.cpp +++ b/llvm/lib/Target/RISCV/RISCVInsertVSETVLI.cpp @@ -404,7 +404,9 @@ DemandedFields getDemanded(const MachineInstr &MI, const RISCVSubtarget *ST) { if (RISCVII::hasSEWOp(TSFlags)) { Res.demandVTYPE(); if (RISCVII::hasVLOp(TSFlags)) - Res.demandVL(); + if (const MachineOperand &VLOp = MI.getOperand(getVLOpNum(MI)); + !VLOp.isReg() || !VLOp.isUndef()) + Res.demandVL(); // Behavior is independent of mask policy. if (!RISCVII::usesMaskPolicy(TSFlags)) @@ -524,7 +526,6 @@ class VSETVLIInfo { AVLIsReg, AVLIsImm, AVLIsVLMAX, - AVLIsIgnored, Unknown, } State = Uninitialized; @@ -564,12 +565,9 @@ class VSETVLIInfo { void setAVLVLMAX() { State = AVLIsVLMAX; } - void setAVLIgnored() { State = AVLIsIgnored; } - bool hasAVLImm() const { return State == AVLIsImm; } bool hasAVLReg() const { return State == AVLIsReg; } bool hasAVLVLMAX() const { return State == AVLIsVLMAX; } - bool hasAVLIgnored() const { return State == AVLIsIgnored; } Register getAVLReg() const { assert(hasAVLReg() && AVLRegDef.DefReg.isVirtual()); return AVLRegDef.DefReg; @@ -600,8 +598,6 @@ class VSETVLIInfo { setAVLRegDef(Info.getAVLVNInfo(), Info.getAVLReg()); else if (Info.hasAVLVLMAX()) setAVLVLMAX(); - else if (Info.hasAVLIgnored()) - setAVLIgnored(); else { assert(Info.hasAVLImm()); setAVLImm(Info.getAVLImm()); @@ -622,8 +618,6 @@ class VSETVLIInfo { } if (hasAVLVLMAX()) return true; - if (hasAVLIgnored()) - return false; return false; } @@ -645,9 +639,6 @@ class VSETVLIInfo { if (hasAVLVLMAX()) return Other.hasAVLVLMAX() && hasSameVLMAX(Other); - if (hasAVLIgnored()) - return Other.hasAVLIgnored(); - return false; } @@ -821,8 +812,6 @@ class VSETVLIInfo { OS << "AVLImm=" << (unsigned)AVLImm; if (hasAVLVLMAX()) OS << "AVLVLMAX"; - if (hasAVLIgnored()) - OS << "AVLIgnored"; OS << ", " << "VLMul=" << (unsigned)VLMul << ", " << "SEW=" << (unsigned)SEW << ", " @@ -938,7 +927,8 @@ RISCVInsertVSETVLI::getInfoForVSETVLI(const MachineInstr &MI) const { if (AVLReg == RISCV::X0) NewInfo.setAVLVLMAX(); else if (MI.getOperand(1).isUndef()) - NewInfo.setAVLIgnored(); + // Otherwise use an AVL of 1 to avoid depending on previous vl. + NewInfo.setAVLImm(1); else { VNInfo *VNI = getVNInfoFromReg(AVLReg, MI, LIS); NewInfo.setAVLRegDef(VNI, AVLReg); @@ -1014,17 +1004,17 @@ RISCVInsertVSETVLI::computeInfoForInstr(const MachineInstr &MI) const { else InstrInfo.setAVLImm(Imm); } else if (VLOp.isUndef()) { - InstrInfo.setAVLIgnored(); + // Otherwise use an AVL of 1 to avoid depending on previous vl. + InstrInfo.setAVLImm(1); } else { VNInfo *VNI = getVNInfoFromReg(VLOp.getReg(), MI, LIS); InstrInfo.setAVLRegDef(VNI, VLOp.getReg()); } } else { assert(isScalarExtractInstr(MI)); - // TODO: If we are more clever about x0,x0 insertion then we should be able - // to deduce that the VL is ignored based off of DemandedFields, and remove - // the AVLIsIgnored state. Then we can just use an arbitrary immediate AVL. - InstrInfo.setAVLIgnored(); + // Pick a random value for state tracking purposes, will be ignored via + // the demanded fields mechanism + InstrInfo.setAVLImm(1); } #ifndef NDEBUG if (std::optional EEW = getEEWForLoadStore(MI)) { @@ -1104,28 +1094,6 @@ void RISCVInsertVSETVLI::insertVSETVLI(MachineBasicBlock &MBB, return; } - if (Info.hasAVLIgnored()) { - // We can only use x0, x0 if there's no chance of the vtype change causing - // the previous vl to become invalid. - if (PrevInfo.isValid() && !PrevInfo.isUnknown() && - Info.hasSameVLMAX(PrevInfo)) { - auto MI = BuildMI(MBB, InsertPt, DL, TII->get(RISCV::PseudoVSETVLIX0)) - .addReg(RISCV::X0, RegState::Define | RegState::Dead) - .addReg(RISCV::X0, RegState::Kill) - .addImm(Info.encodeVTYPE()) - .addReg(RISCV::VL, RegState::Implicit); - LIS->InsertMachineInstrInMaps(*MI); - return; - } - // Otherwise use an AVL of 1 to avoid depending on previous vl. - auto MI = BuildMI(MBB, InsertPt, DL, TII->get(RISCV::PseudoVSETIVLI)) - .addReg(RISCV::X0, RegState::Define | RegState::Dead) - .addImm(1) - .addImm(Info.encodeVTYPE()); - LIS->InsertMachineInstrInMaps(*MI); - return; - } - if (Info.hasAVLVLMAX()) { Register DestReg = MRI->createVirtualRegister(&RISCV::GPRRegClass); auto MI = BuildMI(MBB, InsertPt, DL, TII->get(RISCV::PseudoVSETVLIX0)) @@ -1534,11 +1502,6 @@ void RISCVInsertVSETVLI::doPRE(MachineBasicBlock &MBB) { return; } - // If the AVL isn't used in its predecessors then bail, since we have no AVL - // to insert a vsetvli with. - if (AvailableInfo.hasAVLIgnored()) - return; - // Model the effect of changing the input state of the block MBB to // AvailableInfo. We're looking for two issues here; one legality, // one profitability. diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-expandload-fp.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-expandload-fp.ll index 48e820243c957..8b31166e313de 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-expandload-fp.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-expandload-fp.ll @@ -44,15 +44,16 @@ define <2 x half> @expandload_v2f16(ptr %base, <2 x half> %src0, <2 x i1> %mask) ; RV32-NEXT: ret ; RV32-NEXT: .LBB1_3: # %cond.load ; RV32-NEXT: flh fa5, 0(a0) -; RV32-NEXT: vsetivli zero, 2, e16, m2, tu, ma +; RV32-NEXT: vsetvli zero, zero, e16, m2, tu, ma ; RV32-NEXT: vfmv.s.f v8, fa5 ; RV32-NEXT: addi a0, a0, 2 ; RV32-NEXT: andi a1, a1, 2 ; RV32-NEXT: beqz a1, .LBB1_2 ; RV32-NEXT: .LBB1_4: # %cond.load1 ; RV32-NEXT: flh fa5, 0(a0) -; RV32-NEXT: vsetivli zero, 2, e16, mf4, ta, ma +; RV32-NEXT: vsetvli zero, zero, e16, m2, ta, ma ; RV32-NEXT: vfmv.s.f v9, fa5 +; RV32-NEXT: vsetivli zero, 2, e16, mf4, ta, ma ; RV32-NEXT: vslideup.vi v8, v9, 1 ; RV32-NEXT: ret ; @@ -69,15 +70,16 @@ define <2 x half> @expandload_v2f16(ptr %base, <2 x half> %src0, <2 x i1> %mask) ; RV64-NEXT: ret ; RV64-NEXT: .LBB1_3: # %cond.load ; RV64-NEXT: flh fa5, 0(a0) -; RV64-NEXT: vsetivli zero, 2, e16, m2, tu, ma +; RV64-NEXT: vsetvli zero, zero, e16, m2, tu, ma ; RV64-NEXT: vfmv.s.f v8, fa5 ; RV64-NEXT: addi a0, a0, 2 ; RV64-NEXT: andi a1, a1, 2 ; RV64-NEXT: beqz a1, .LBB1_2 ; RV64-NEXT: .LBB1_4: # %cond.load1 ; RV64-NEXT: flh fa5, 0(a0) -; RV64-NEXT: vsetivli zero, 2, e16, mf4, ta, ma +; RV64-NEXT: vsetvli zero, zero, e16, m2, ta, ma ; RV64-NEXT: vfmv.s.f v9, fa5 +; RV64-NEXT: vsetivli zero, 2, e16, mf4, ta, ma ; RV64-NEXT: vslideup.vi v8, v9, 1 ; RV64-NEXT: ret %res = call <2 x half> @llvm.masked.expandload.v2f16(ptr align 2 %base, <2 x i1> %mask, <2 x half> %src0) @@ -105,15 +107,16 @@ define <4 x half> @expandload_v4f16(ptr %base, <4 x half> %src0, <4 x i1> %mask) ; RV32-NEXT: ret ; RV32-NEXT: .LBB2_5: # %cond.load ; RV32-NEXT: flh fa5, 0(a0) -; RV32-NEXT: vsetivli zero, 4, e16, m2, tu, ma +; RV32-NEXT: vsetvli zero, zero, e16, m2, tu, ma ; RV32-NEXT: vfmv.s.f v8, fa5 ; RV32-NEXT: addi a0, a0, 2 ; RV32-NEXT: andi a2, a1, 2 ; RV32-NEXT: beqz a2, .LBB2_2 ; RV32-NEXT: .LBB2_6: # %cond.load1 ; RV32-NEXT: flh fa5, 0(a0) -; RV32-NEXT: vsetivli zero, 2, e16, mf2, tu, ma +; RV32-NEXT: vsetvli zero, zero, e16, m2, ta, ma ; RV32-NEXT: vfmv.s.f v9, fa5 +; RV32-NEXT: vsetivli zero, 2, e16, mf2, tu, ma ; RV32-NEXT: vslideup.vi v8, v9, 1 ; RV32-NEXT: addi a0, a0, 2 ; RV32-NEXT: andi a2, a1, 4 @@ -152,15 +155,16 @@ define <4 x half> @expandload_v4f16(ptr %base, <4 x half> %src0, <4 x i1> %mask) ; RV64-NEXT: ret ; RV64-NEXT: .LBB2_5: # %cond.load ; RV64-NEXT: flh fa5, 0(a0) -; RV64-NEXT: vsetivli zero, 4, e16, m2, tu, ma +; RV64-NEXT: vsetvli zero, zero, e16, m2, tu, ma ; RV64-NEXT: vfmv.s.f v8, fa5 ; RV64-NEXT: addi a0, a0, 2 ; RV64-NEXT: andi a2, a1, 2 ; RV64-NEXT: beqz a2, .LBB2_2 ; RV64-NEXT: .LBB2_6: # %cond.load1 ; RV64-NEXT: flh fa5, 0(a0) -; RV64-NEXT: vsetivli zero, 2, e16, mf2, tu, ma +; RV64-NEXT: vsetvli zero, zero, e16, m2, ta, ma ; RV64-NEXT: vfmv.s.f v9, fa5 +; RV64-NEXT: vsetivli zero, 2, e16, mf2, tu, ma ; RV64-NEXT: vslideup.vi v8, v9, 1 ; RV64-NEXT: addi a0, a0, 2 ; RV64-NEXT: andi a2, a1, 4 @@ -216,15 +220,16 @@ define <8 x half> @expandload_v8f16(ptr %base, <8 x half> %src0, <8 x i1> %mask) ; RV32-NEXT: ret ; RV32-NEXT: .LBB3_9: # %cond.load ; RV32-NEXT: flh fa5, 0(a0) -; RV32-NEXT: vsetivli zero, 8, e16, m2, tu, ma +; RV32-NEXT: vsetvli zero, zero, e16, m2, tu, ma ; RV32-NEXT: vfmv.s.f v8, fa5 ; RV32-NEXT: addi a0, a0, 2 ; RV32-NEXT: andi a2, a1, 2 ; RV32-NEXT: beqz a2, .LBB3_2 ; RV32-NEXT: .LBB3_10: # %cond.load1 ; RV32-NEXT: flh fa5, 0(a0) -; RV32-NEXT: vsetivli zero, 2, e16, m1, tu, ma +; RV32-NEXT: vsetvli zero, zero, e16, m2, ta, ma ; RV32-NEXT: vfmv.s.f v9, fa5 +; RV32-NEXT: vsetivli zero, 2, e16, m1, tu, ma ; RV32-NEXT: vslideup.vi v8, v9, 1 ; RV32-NEXT: addi a0, a0, 2 ; RV32-NEXT: andi a2, a1, 4 @@ -307,15 +312,16 @@ define <8 x half> @expandload_v8f16(ptr %base, <8 x half> %src0, <8 x i1> %mask) ; RV64-NEXT: ret ; RV64-NEXT: .LBB3_9: # %cond.load ; RV64-NEXT: flh fa5, 0(a0) -; RV64-NEXT: vsetivli zero, 8, e16, m2, tu, ma +; RV64-NEXT: vsetvli zero, zero, e16, m2, tu, ma ; RV64-NEXT: vfmv.s.f v8, fa5 ; RV64-NEXT: addi a0, a0, 2 ; RV64-NEXT: andi a2, a1, 2 ; RV64-NEXT: beqz a2, .LBB3_2 ; RV64-NEXT: .LBB3_10: # %cond.load1 ; RV64-NEXT: flh fa5, 0(a0) -; RV64-NEXT: vsetivli zero, 2, e16, m1, tu, ma +; RV64-NEXT: vsetvli zero, zero, e16, m2, ta, ma ; RV64-NEXT: vfmv.s.f v9, fa5 +; RV64-NEXT: vsetivli zero, 2, e16, m1, tu, ma ; RV64-NEXT: vslideup.vi v8, v9, 1 ; RV64-NEXT: addi a0, a0, 2 ; RV64-NEXT: andi a2, a1, 4 @@ -412,15 +418,16 @@ define <2 x float> @expandload_v2f32(ptr %base, <2 x float> %src0, <2 x i1> %mas ; RV32-NEXT: ret ; RV32-NEXT: .LBB5_3: # %cond.load ; RV32-NEXT: flw fa5, 0(a0) -; RV32-NEXT: vsetivli zero, 2, e32, m4, tu, ma +; RV32-NEXT: vsetvli zero, zero, e32, m4, tu, ma ; RV32-NEXT: vfmv.s.f v8, fa5 ; RV32-NEXT: addi a0, a0, 4 ; RV32-NEXT: andi a1, a1, 2 ; RV32-NEXT: beqz a1, .LBB5_2 ; RV32-NEXT: .LBB5_4: # %cond.load1 ; RV32-NEXT: flw fa5, 0(a0) -; RV32-NEXT: vsetivli zero, 2, e32, mf2, ta, ma +; RV32-NEXT: vsetvli zero, zero, e32, m4, ta, ma ; RV32-NEXT: vfmv.s.f v9, fa5 +; RV32-NEXT: vsetivli zero, 2, e32, mf2, ta, ma ; RV32-NEXT: vslideup.vi v8, v9, 1 ; RV32-NEXT: ret ; @@ -437,15 +444,16 @@ define <2 x float> @expandload_v2f32(ptr %base, <2 x float> %src0, <2 x i1> %mas ; RV64-NEXT: ret ; RV64-NEXT: .LBB5_3: # %cond.load ; RV64-NEXT: flw fa5, 0(a0) -; RV64-NEXT: vsetivli zero, 2, e32, m4, tu, ma +; RV64-NEXT: vsetvli zero, zero, e32, m4, tu, ma ; RV64-NEXT: vfmv.s.f v8, fa5 ; RV64-NEXT: addi a0, a0, 4 ; RV64-NEXT: andi a1, a1, 2 ; RV64-NEXT: beqz a1, .LBB5_2 ; RV64-NEXT: .LBB5_4: # %cond.load1 ; RV64-NEXT: flw fa5, 0(a0) -; RV64-NEXT: vsetivli zero, 2, e32, mf2, ta, ma +; RV64-NEXT: vsetvli zero, zero, e32, m4, ta, ma ; RV64-NEXT: vfmv.s.f v9, fa5 +; RV64-NEXT: vsetivli zero, 2, e32, mf2, ta, ma ; RV64-NEXT: vslideup.vi v8, v9, 1 ; RV64-NEXT: ret %res = call <2 x float> @llvm.masked.expandload.v2f32(ptr align 4 %base, <2 x i1> %mask, <2 x float> %src0) @@ -473,15 +481,16 @@ define <4 x float> @expandload_v4f32(ptr %base, <4 x float> %src0, <4 x i1> %mas ; RV32-NEXT: ret ; RV32-NEXT: .LBB6_5: # %cond.load ; RV32-NEXT: flw fa5, 0(a0) -; RV32-NEXT: vsetivli zero, 4, e32, m4, tu, ma +; RV32-NEXT: vsetvli zero, zero, e32, m4, tu, ma ; RV32-NEXT: vfmv.s.f v8, fa5 ; RV32-NEXT: addi a0, a0, 4 ; RV32-NEXT: andi a2, a1, 2 ; RV32-NEXT: beqz a2, .LBB6_2 ; RV32-NEXT: .LBB6_6: # %cond.load1 ; RV32-NEXT: flw fa5, 0(a0) -; RV32-NEXT: vsetivli zero, 2, e32, m1, tu, ma +; RV32-NEXT: vsetvli zero, zero, e32, m4, ta, ma ; RV32-NEXT: vfmv.s.f v9, fa5 +; RV32-NEXT: vsetivli zero, 2, e32, m1, tu, ma ; RV32-NEXT: vslideup.vi v8, v9, 1 ; RV32-NEXT: addi a0, a0, 4 ; RV32-NEXT: andi a2, a1, 4 @@ -520,15 +529,16 @@ define <4 x float> @expandload_v4f32(ptr %base, <4 x float> %src0, <4 x i1> %mas ; RV64-NEXT: ret ; RV64-NEXT: .LBB6_5: # %cond.load ; RV64-NEXT: flw fa5, 0(a0) -; RV64-NEXT: vsetivli zero, 4, e32, m4, tu, ma +; RV64-NEXT: vsetvli zero, zero, e32, m4, tu, ma ; RV64-NEXT: vfmv.s.f v8, fa5 ; RV64-NEXT: addi a0, a0, 4 ; RV64-NEXT: andi a2, a1, 2 ; RV64-NEXT: beqz a2, .LBB6_2 ; RV64-NEXT: .LBB6_6: # %cond.load1 ; RV64-NEXT: flw fa5, 0(a0) -; RV64-NEXT: vsetivli zero, 2, e32, m1, tu, ma +; RV64-NEXT: vsetvli zero, zero, e32, m4, ta, ma ; RV64-NEXT: vfmv.s.f v9, fa5 +; RV64-NEXT: vsetivli zero, 2, e32, m1, tu, ma ; RV64-NEXT: vslideup.vi v8, v9, 1 ; RV64-NEXT: addi a0, a0, 4 ; RV64-NEXT: andi a2, a1, 4 @@ -584,15 +594,16 @@ define <8 x float> @expandload_v8f32(ptr %base, <8 x float> %src0, <8 x i1> %mas ; RV32-NEXT: ret ; RV32-NEXT: .LBB7_9: # %cond.load ; RV32-NEXT: flw fa5, 0(a0) -; RV32-NEXT: vsetivli zero, 8, e32, m4, tu, ma +; RV32-NEXT: vsetvli zero, zero, e32, m4, tu, ma ; RV32-NEXT: vfmv.s.f v8, fa5 ; RV32-NEXT: addi a0, a0, 4 ; RV32-NEXT: andi a2, a1, 2 ; RV32-NEXT: beqz a2, .LBB7_2 ; RV32-NEXT: .LBB7_10: # %cond.load1 ; RV32-NEXT: flw fa5, 0(a0) -; RV32-NEXT: vsetivli zero, 2, e32, m1, tu, ma +; RV32-NEXT: vsetvli zero, zero, e32, m4, ta, ma ; RV32-NEXT: vfmv.s.f v10, fa5 +; RV32-NEXT: vsetivli zero, 2, e32, m1, tu, ma ; RV32-NEXT: vslideup.vi v8, v10, 1 ; RV32-NEXT: addi a0, a0, 4 ; RV32-NEXT: andi a2, a1, 4 @@ -675,15 +686,16 @@ define <8 x float> @expandload_v8f32(ptr %base, <8 x float> %src0, <8 x i1> %mas ; RV64-NEXT: ret ; RV64-NEXT: .LBB7_9: # %cond.load ; RV64-NEXT: flw fa5, 0(a0) -; RV64-NEXT: vsetivli zero, 8, e32, m4, tu, ma +; RV64-NEXT: vsetvli zero, zero, e32, m4, tu, ma ; RV64-NEXT: vfmv.s.f v8, fa5 ; RV64-NEXT: addi a0, a0, 4 ; RV64-NEXT: andi a2, a1, 2 ; RV64-NEXT: beqz a2, .LBB7_2 ; RV64-NEXT: .LBB7_10: # %cond.load1 ; RV64-NEXT: flw fa5, 0(a0) -; RV64-NEXT: vsetivli zero, 2, e32, m1, tu, ma +; RV64-NEXT: vsetvli zero, zero, e32, m4, ta, ma ; RV64-NEXT: vfmv.s.f v10, fa5 +; RV64-NEXT: vsetivli zero, 2, e32, m1, tu, ma ; RV64-NEXT: vslideup.vi v8, v10, 1 ; RV64-NEXT: addi a0, a0, 4 ; RV64-NEXT: andi a2, a1, 4 @@ -780,15 +792,16 @@ define <2 x double> @expandload_v2f64(ptr %base, <2 x double> %src0, <2 x i1> %m ; RV32-NEXT: ret ; RV32-NEXT: .LBB9_3: # %cond.load ; RV32-NEXT: fld fa5, 0(a0) -; RV32-NEXT: vsetivli zero, 2, e64, m8, tu, ma +; RV32-NEXT: vsetvli zero, zero, e64, m8, tu, ma ; RV32-NEXT: vfmv.s.f v8, fa5 ; RV32-NEXT: addi a0, a0, 8 ; RV32-NEXT: andi a1, a1, 2 ; RV32-NEXT: beqz a1, .LBB9_2 ; RV32-NEXT: .LBB9_4: # %cond.load1 ; RV32-NEXT: fld fa5, 0(a0) -; RV32-NEXT: vsetivli zero, 2, e64, m1, ta, ma +; RV32-NEXT: vsetvli zero, zero, e64, m8, ta, ma ; RV32-NEXT: vfmv.s.f v9, fa5 +; RV32-NEXT: vsetivli zero, 2, e64, m1, ta, ma ; RV32-NEXT: vslideup.vi v8, v9, 1 ; RV32-NEXT: ret ; @@ -805,15 +818,16 @@ define <2 x double> @expandload_v2f64(ptr %base, <2 x double> %src0, <2 x i1> %m ; RV64-NEXT: ret ; RV64-NEXT: .LBB9_3: # %cond.load ; RV64-NEXT: fld fa5, 0(a0) -; RV64-NEXT: vsetivli zero, 2, e64, m8, tu, ma +; RV64-NEXT: vsetvli zero, zero, e64, m8, tu, ma ; RV64-NEXT: vfmv.s.f v8, fa5 ; RV64-NEXT: addi a0, a0, 8 ; RV64-NEXT: andi a1, a1, 2 ; RV64-NEXT: beqz a1, .LBB9_2 ; RV64-NEXT: .LBB9_4: # %cond.load1 ; RV64-NEXT: fld fa5, 0(a0) -; RV64-NEXT: vsetivli zero, 2, e64, m1, ta, ma +; RV64-NEXT: vsetvli zero, zero, e64, m8, ta, ma ; RV64-NEXT: vfmv.s.f v9, fa5 +; RV64-NEXT: vsetivli zero, 2, e64, m1, ta, ma ; RV64-NEXT: vslideup.vi v8, v9, 1 ; RV64-NEXT: ret %res = call <2 x double> @llvm.masked.expandload.v2f64(ptr align 8 %base, <2 x i1> %mask, <2 x double> %src0) @@ -841,15 +855,16 @@ define <4 x double> @expandload_v4f64(ptr %base, <4 x double> %src0, <4 x i1> %m ; RV32-NEXT: ret ; RV32-NEXT: .LBB10_5: # %cond.load ; RV32-NEXT: fld fa5, 0(a0) -; RV32-NEXT: vsetivli zero, 4, e64, m8, tu, ma +; RV32-NEXT: vsetvli zero, zero, e64, m8, tu, ma ; RV32-NEXT: vfmv.s.f v8, fa5 ; RV32-NEXT: addi a0, a0, 8 ; RV32-NEXT: andi a2, a1, 2 ; RV32-NEXT: beqz a2, .LBB10_2 ; RV32-NEXT: .LBB10_6: # %cond.load1 ; RV32-NEXT: fld fa5, 0(a0) -; RV32-NEXT: vsetivli zero, 2, e64, m1, tu, ma +; RV32-NEXT: vsetvli zero, zero, e64, m8, ta, ma ; RV32-NEXT: vfmv.s.f v10, fa5 +; RV32-NEXT: vsetivli zero, 2, e64, m1, tu, ma ; RV32-NEXT: vslideup.vi v8, v10, 1 ; RV32-NEXT: addi a0, a0, 8 ; RV32-NEXT: andi a2, a1, 4 @@ -888,15 +903,16 @@ define <4 x double> @expandload_v4f64(ptr %base, <4 x double> %src0, <4 x i1> %m ; RV64-NEXT: ret ; RV64-NEXT: .LBB10_5: # %cond.load ; RV64-NEXT: fld fa5, 0(a0) -; RV64-NEXT: vsetivli zero, 4, e64, m8, tu, ma +; RV64-NEXT: vsetvli zero, zero, e64, m8, tu, ma ; RV64-NEXT: vfmv.s.f v8, fa5 ; RV64-NEXT: addi a0, a0, 8 ; RV64-NEXT: andi a2, a1, 2 ; RV64-NEXT: beqz a2, .LBB10_2 ; RV64-NEXT: .LBB10_6: # %cond.load1 ; RV64-NEXT: fld fa5, 0(a0) -; RV64-NEXT: vsetivli zero, 2, e64, m1, tu, ma +; RV64-NEXT: vsetvli zero, zero, e64, m8, ta, ma ; RV64-NEXT: vfmv.s.f v10, fa5 +; RV64-NEXT: vsetivli zero, 2, e64, m1, tu, ma ; RV64-NEXT: vslideup.vi v8, v10, 1 ; RV64-NEXT: addi a0, a0, 8 ; RV64-NEXT: andi a2, a1, 4 @@ -952,15 +968,16 @@ define <8 x double> @expandload_v8f64(ptr %base, <8 x double> %src0, <8 x i1> %m ; RV32-NEXT: ret ; RV32-NEXT: .LBB11_9: # %cond.load ; RV32-NEXT: fld fa5, 0(a0) -; RV32-NEXT: vsetivli zero, 8, e64, m8, tu, ma +; RV32-NEXT: vsetvli zero, zero, e64, m8, tu, ma ; RV32-NEXT: vfmv.s.f v8, fa5 ; RV32-NEXT: addi a0, a0, 8 ; RV32-NEXT: andi a2, a1, 2 ; RV32-NEXT: beqz a2, .LBB11_2 ; RV32-NEXT: .LBB11_10: # %cond.load1 ; RV32-NEXT: fld fa5, 0(a0) -; RV32-NEXT: vsetivli zero, 2, e64, m1, tu, ma +; RV32-NEXT: vsetvli zero, zero, e64, m8, ta, ma ; RV32-NEXT: vfmv.s.f v12, fa5 +; RV32-NEXT: vsetivli zero, 2, e64, m1, tu, ma ; RV32-NEXT: vslideup.vi v8, v12, 1 ; RV32-NEXT: addi a0, a0, 8 ; RV32-NEXT: andi a2, a1, 4 @@ -1043,15 +1060,16 @@ define <8 x double> @expandload_v8f64(ptr %base, <8 x double> %src0, <8 x i1> %m ; RV64-NEXT: ret ; RV64-NEXT: .LBB11_9: # %cond.load ; RV64-NEXT: fld fa5, 0(a0) -; RV64-NEXT: vsetivli zero, 8, e64, m8, tu, ma +; RV64-NEXT: vsetvli zero, zero, e64, m8, tu, ma ; RV64-NEXT: vfmv.s.f v8, fa5 ; RV64-NEXT: addi a0, a0, 8 ; RV64-NEXT: andi a2, a1, 2 ; RV64-NEXT: beqz a2, .LBB11_2 ; RV64-NEXT: .LBB11_10: # %cond.load1 ; RV64-NEXT: fld fa5, 0(a0) -; RV64-NEXT: vsetivli zero, 2, e64, m1, tu, ma +; RV64-NEXT: vsetvli zero, zero, e64, m8, ta, ma ; RV64-NEXT: vfmv.s.f v12, fa5 +; RV64-NEXT: vsetivli zero, 2, e64, m1, tu, ma ; RV64-NEXT: vslideup.vi v8, v12, 1 ; RV64-NEXT: addi a0, a0, 8 ; RV64-NEXT: andi a2, a1, 4 diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-expandload-int.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-expandload-int.ll index d6aca55fbde59..5bf8b07efc1da 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-expandload-int.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-expandload-int.ll @@ -33,15 +33,16 @@ define <2 x i8> @expandload_v2i8(ptr %base, <2 x i8> %src0, <2 x i1> %mask) { ; CHECK-NEXT: ret ; CHECK-NEXT: .LBB1_3: # %cond.load ; CHECK-NEXT: lbu a2, 0(a0) -; CHECK-NEXT: vsetivli zero, 2, e8, m1, tu, ma +; CHECK-NEXT: vsetvli zero, zero, e8, m1, tu, ma ; CHECK-NEXT: vmv.s.x v8, a2 ; CHECK-NEXT: addi a0, a0, 1 ; CHECK-NEXT: andi a1, a1, 2 ; CHECK-NEXT: beqz a1, .LBB1_2 ; CHECK-NEXT: .LBB1_4: # %cond.load1 ; CHECK-NEXT: lbu a0, 0(a0) -; CHECK-NEXT: vsetivli zero, 2, e8, mf8, ta, ma +; CHECK-NEXT: vsetvli zero, zero, e8, m1, ta, ma ; CHECK-NEXT: vmv.s.x v9, a0 +; CHECK-NEXT: vsetivli zero, 2, e8, mf8, ta, ma ; CHECK-NEXT: vslideup.vi v8, v9, 1 ; CHECK-NEXT: ret %res = call <2 x i8> @llvm.masked.expandload.v2i8(ptr %base, <2 x i1> %mask, <2 x i8> %src0) @@ -69,15 +70,16 @@ define <4 x i8> @expandload_v4i8(ptr %base, <4 x i8> %src0, <4 x i1> %mask) { ; CHECK-NEXT: ret ; CHECK-NEXT: .LBB2_5: # %cond.load ; CHECK-NEXT: lbu a2, 0(a0) -; CHECK-NEXT: vsetivli zero, 4, e8, m1, tu, ma +; CHECK-NEXT: vsetvli zero, zero, e8, m1, tu, ma ; CHECK-NEXT: vmv.s.x v8, a2 ; CHECK-NEXT: addi a0, a0, 1 ; CHECK-NEXT: andi a2, a1, 2 ; CHECK-NEXT: beqz a2, .LBB2_2 ; CHECK-NEXT: .LBB2_6: # %cond.load1 ; CHECK-NEXT: lbu a2, 0(a0) -; CHECK-NEXT: vsetivli zero, 2, e8, mf4, tu, ma +; CHECK-NEXT: vsetvli zero, zero, e8, m1, ta, ma ; CHECK-NEXT: vmv.s.x v9, a2 +; CHECK-NEXT: vsetivli zero, 2, e8, mf4, tu, ma ; CHECK-NEXT: vslideup.vi v8, v9, 1 ; CHECK-NEXT: addi a0, a0, 1 ; CHECK-NEXT: andi a2, a1, 4 @@ -133,15 +135,16 @@ define <8 x i8> @expandload_v8i8(ptr %base, <8 x i8> %src0, <8 x i1> %mask) { ; CHECK-NEXT: ret ; CHECK-NEXT: .LBB3_9: # %cond.load ; CHECK-NEXT: lbu a2, 0(a0) -; CHECK-NEXT: vsetivli zero, 8, e8, m1, tu, ma +; CHECK-NEXT: vsetvli zero, zero, e8, m1, tu, ma ; CHECK-NEXT: vmv.s.x v8, a2 ; CHECK-NEXT: addi a0, a0, 1 ; CHECK-NEXT: andi a2, a1, 2 ; CHECK-NEXT: beqz a2, .LBB3_2 ; CHECK-NEXT: .LBB3_10: # %cond.load1 ; CHECK-NEXT: lbu a2, 0(a0) -; CHECK-NEXT: vsetivli zero, 2, e8, mf2, tu, ma +; CHECK-NEXT: vsetvli zero, zero, e8, m1, ta, ma ; CHECK-NEXT: vmv.s.x v9, a2 +; CHECK-NEXT: vsetivli zero, 2, e8, mf2, tu, ma ; CHECK-NEXT: vslideup.vi v8, v9, 1 ; CHECK-NEXT: addi a0, a0, 1 ; CHECK-NEXT: andi a2, a1, 4 @@ -227,15 +230,16 @@ define <2 x i16> @expandload_v2i16(ptr %base, <2 x i16> %src0, <2 x i1> %mask) { ; CHECK-NEXT: ret ; CHECK-NEXT: .LBB5_3: # %cond.load ; CHECK-NEXT: lh a2, 0(a0) -; CHECK-NEXT: vsetivli zero, 2, e16, m2, tu, ma +; CHECK-NEXT: vsetvli zero, zero, e16, m2, tu, ma ; CHECK-NEXT: vmv.s.x v8, a2 ; CHECK-NEXT: addi a0, a0, 2 ; CHECK-NEXT: andi a1, a1, 2 ; CHECK-NEXT: beqz a1, .LBB5_2 ; CHECK-NEXT: .LBB5_4: # %cond.load1 ; CHECK-NEXT: lh a0, 0(a0) -; CHECK-NEXT: vsetivli zero, 2, e16, mf4, ta, ma +; CHECK-NEXT: vsetvli zero, zero, e16, m2, ta, ma ; CHECK-NEXT: vmv.s.x v9, a0 +; CHECK-NEXT: vsetivli zero, 2, e16, mf4, ta, ma ; CHECK-NEXT: vslideup.vi v8, v9, 1 ; CHECK-NEXT: ret %res = call <2 x i16> @llvm.masked.expandload.v2i16(ptr align 2 %base, <2 x i1> %mask, <2 x i16> %src0) @@ -263,15 +267,16 @@ define <4 x i16> @expandload_v4i16(ptr %base, <4 x i16> %src0, <4 x i1> %mask) { ; CHECK-NEXT: ret ; CHECK-NEXT: .LBB6_5: # %cond.load ; CHECK-NEXT: lh a2, 0(a0) -; CHECK-NEXT: vsetivli zero, 4, e16, m2, tu, ma +; CHECK-NEXT: vsetvli zero, zero, e16, m2, tu, ma ; CHECK-NEXT: vmv.s.x v8, a2 ; CHECK-NEXT: addi a0, a0, 2 ; CHECK-NEXT: andi a2, a1, 2 ; CHECK-NEXT: beqz a2, .LBB6_2 ; CHECK-NEXT: .LBB6_6: # %cond.load1 ; CHECK-NEXT: lh a2, 0(a0) -; CHECK-NEXT: vsetivli zero, 2, e16, mf2, tu, ma +; CHECK-NEXT: vsetvli zero, zero, e16, m2, ta, ma ; CHECK-NEXT: vmv.s.x v9, a2 +; CHECK-NEXT: vsetivli zero, 2, e16, mf2, tu, ma ; CHECK-NEXT: vslideup.vi v8, v9, 1 ; CHECK-NEXT: addi a0, a0, 2 ; CHECK-NEXT: andi a2, a1, 4 @@ -327,15 +332,16 @@ define <8 x i16> @expandload_v8i16(ptr %base, <8 x i16> %src0, <8 x i1> %mask) { ; CHECK-NEXT: ret ; CHECK-NEXT: .LBB7_9: # %cond.load ; CHECK-NEXT: lh a2, 0(a0) -; CHECK-NEXT: vsetivli zero, 8, e16, m2, tu, ma +; CHECK-NEXT: vsetvli zero, zero, e16, m2, tu, ma ; CHECK-NEXT: vmv.s.x v8, a2 ; CHECK-NEXT: addi a0, a0, 2 ; CHECK-NEXT: andi a2, a1, 2 ; CHECK-NEXT: beqz a2, .LBB7_2 ; CHECK-NEXT: .LBB7_10: # %cond.load1 ; CHECK-NEXT: lh a2, 0(a0) -; CHECK-NEXT: vsetivli zero, 2, e16, m1, tu, ma +; CHECK-NEXT: vsetvli zero, zero, e16, m2, ta, ma ; CHECK-NEXT: vmv.s.x v9, a2 +; CHECK-NEXT: vsetivli zero, 2, e16, m1, tu, ma ; CHECK-NEXT: vslideup.vi v8, v9, 1 ; CHECK-NEXT: addi a0, a0, 2 ; CHECK-NEXT: andi a2, a1, 4 @@ -421,15 +427,16 @@ define <2 x i32> @expandload_v2i32(ptr %base, <2 x i32> %src0, <2 x i1> %mask) { ; CHECK-NEXT: ret ; CHECK-NEXT: .LBB9_3: # %cond.load ; CHECK-NEXT: lw a2, 0(a0) -; CHECK-NEXT: vsetivli zero, 2, e32, m4, tu, ma +; CHECK-NEXT: vsetvli zero, zero, e32, m4, tu, ma ; CHECK-NEXT: vmv.s.x v8, a2 ; CHECK-NEXT: addi a0, a0, 4 ; CHECK-NEXT: andi a1, a1, 2 ; CHECK-NEXT: beqz a1, .LBB9_2 ; CHECK-NEXT: .LBB9_4: # %cond.load1 ; CHECK-NEXT: lw a0, 0(a0) -; CHECK-NEXT: vsetivli zero, 2, e32, mf2, ta, ma +; CHECK-NEXT: vsetvli zero, zero, e32, m4, ta, ma ; CHECK-NEXT: vmv.s.x v9, a0 +; CHECK-NEXT: vsetivli zero, 2, e32, mf2, ta, ma ; CHECK-NEXT: vslideup.vi v8, v9, 1 ; CHECK-NEXT: ret %res = call <2 x i32> @llvm.masked.expandload.v2i32(ptr align 4 %base, <2 x i1> %mask, <2 x i32> %src0) @@ -457,15 +464,16 @@ define <4 x i32> @expandload_v4i32(ptr %base, <4 x i32> %src0, <4 x i1> %mask) { ; CHECK-NEXT: ret ; CHECK-NEXT: .LBB10_5: # %cond.load ; CHECK-NEXT: lw a2, 0(a0) -; CHECK-NEXT: vsetivli zero, 4, e32, m4, tu, ma +; CHECK-NEXT: vsetvli zero, zero, e32, m4, tu, ma ; CHECK-NEXT: vmv.s.x v8, a2 ; CHECK-NEXT: addi a0, a0, 4 ; CHECK-NEXT: andi a2, a1, 2 ; CHECK-NEXT: beqz a2, .LBB10_2 ; CHECK-NEXT: .LBB10_6: # %cond.load1 ; CHECK-NEXT: lw a2, 0(a0) -; CHECK-NEXT: vsetivli zero, 2, e32, m1, tu, ma +; CHECK-NEXT: vsetvli zero, zero, e32, m4, ta, ma ; CHECK-NEXT: vmv.s.x v9, a2 +; CHECK-NEXT: vsetivli zero, 2, e32, m1, tu, ma ; CHECK-NEXT: vslideup.vi v8, v9, 1 ; CHECK-NEXT: addi a0, a0, 4 ; CHECK-NEXT: andi a2, a1, 4 @@ -521,15 +529,16 @@ define <8 x i32> @expandload_v8i32(ptr %base, <8 x i32> %src0, <8 x i1> %mask) { ; CHECK-NEXT: ret ; CHECK-NEXT: .LBB11_9: # %cond.load ; CHECK-NEXT: lw a2, 0(a0) -; CHECK-NEXT: vsetivli zero, 8, e32, m4, tu, ma +; CHECK-NEXT: vsetvli zero, zero, e32, m4, tu, ma ; CHECK-NEXT: vmv.s.x v8, a2 ; CHECK-NEXT: addi a0, a0, 4 ; CHECK-NEXT: andi a2, a1, 2 ; CHECK-NEXT: beqz a2, .LBB11_2 ; CHECK-NEXT: .LBB11_10: # %cond.load1 ; CHECK-NEXT: lw a2, 0(a0) -; CHECK-NEXT: vsetivli zero, 2, e32, m1, tu, ma +; CHECK-NEXT: vsetvli zero, zero, e32, m4, ta, ma ; CHECK-NEXT: vmv.s.x v10, a2 +; CHECK-NEXT: vsetivli zero, 2, e32, m1, tu, ma ; CHECK-NEXT: vslideup.vi v8, v10, 1 ; CHECK-NEXT: addi a0, a0, 4 ; CHECK-NEXT: andi a2, a1, 4 @@ -664,15 +673,16 @@ define <2 x i64> @expandload_v2i64(ptr %base, <2 x i64> %src0, <2 x i1> %mask) { ; RV64-NEXT: ret ; RV64-NEXT: .LBB13_3: # %cond.load ; RV64-NEXT: ld a2, 0(a0) -; RV64-NEXT: vsetivli zero, 2, e64, m8, tu, ma +; RV64-NEXT: vsetvli zero, zero, e64, m8, tu, ma ; RV64-NEXT: vmv.s.x v8, a2 ; RV64-NEXT: addi a0, a0, 8 ; RV64-NEXT: andi a1, a1, 2 ; RV64-NEXT: beqz a1, .LBB13_2 ; RV64-NEXT: .LBB13_4: # %cond.load1 ; RV64-NEXT: ld a0, 0(a0) -; RV64-NEXT: vsetivli zero, 2, e64, m1, ta, ma +; RV64-NEXT: vsetvli zero, zero, e64, m8, ta, ma ; RV64-NEXT: vmv.s.x v9, a0 +; RV64-NEXT: vsetivli zero, 2, e64, m1, ta, ma ; RV64-NEXT: vslideup.vi v8, v9, 1 ; RV64-NEXT: ret %res = call <2 x i64> @llvm.masked.expandload.v2i64(ptr align 8 %base, <2 x i1> %mask, <2 x i64> %src0) @@ -758,15 +768,16 @@ define <4 x i64> @expandload_v4i64(ptr %base, <4 x i64> %src0, <4 x i1> %mask) { ; RV64-NEXT: ret ; RV64-NEXT: .LBB14_5: # %cond.load ; RV64-NEXT: ld a2, 0(a0) -; RV64-NEXT: vsetivli zero, 4, e64, m8, tu, ma +; RV64-NEXT: vsetvli zero, zero, e64, m8, tu, ma ; RV64-NEXT: vmv.s.x v8, a2 ; RV64-NEXT: addi a0, a0, 8 ; RV64-NEXT: andi a2, a1, 2 ; RV64-NEXT: beqz a2, .LBB14_2 ; RV64-NEXT: .LBB14_6: # %cond.load1 ; RV64-NEXT: ld a2, 0(a0) -; RV64-NEXT: vsetivli zero, 2, e64, m1, tu, ma +; RV64-NEXT: vsetvli zero, zero, e64, m8, ta, ma ; RV64-NEXT: vmv.s.x v10, a2 +; RV64-NEXT: vsetivli zero, 2, e64, m1, tu, ma ; RV64-NEXT: vslideup.vi v8, v10, 1 ; RV64-NEXT: addi a0, a0, 8 ; RV64-NEXT: andi a2, a1, 4 @@ -936,15 +947,16 @@ define <8 x i64> @expandload_v8i64(ptr %base, <8 x i64> %src0, <8 x i1> %mask) { ; RV64-NEXT: ret ; RV64-NEXT: .LBB15_9: # %cond.load ; RV64-NEXT: ld a2, 0(a0) -; RV64-NEXT: vsetivli zero, 8, e64, m8, tu, ma +; RV64-NEXT: vsetvli zero, zero, e64, m8, tu, ma ; RV64-NEXT: vmv.s.x v8, a2 ; RV64-NEXT: addi a0, a0, 8 ; RV64-NEXT: andi a2, a1, 2 ; RV64-NEXT: beqz a2, .LBB15_2 ; RV64-NEXT: .LBB15_10: # %cond.load1 ; RV64-NEXT: ld a2, 0(a0) -; RV64-NEXT: vsetivli zero, 2, e64, m1, tu, ma +; RV64-NEXT: vsetvli zero, zero, e64, m8, ta, ma ; RV64-NEXT: vmv.s.x v12, a2 +; RV64-NEXT: vsetivli zero, 2, e64, m1, tu, ma ; RV64-NEXT: vslideup.vi v8, v12, 1 ; RV64-NEXT: addi a0, a0, 8 ; RV64-NEXT: andi a2, a1, 4 diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-llrint.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-llrint.ll index 9463267d0b0e6..2d3865ba4533d 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-llrint.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-llrint.ll @@ -26,7 +26,7 @@ define <1 x i64> @llrint_v1i64_v1f32(<1 x float> %x) { ; RV64-NEXT: vsetivli zero, 1, e32, mf2, ta, ma ; RV64-NEXT: vfmv.f.s fa5, v8 ; RV64-NEXT: fcvt.l.s a0, fa5 -; RV64-NEXT: vsetivli zero, 1, e64, m1, ta, ma +; RV64-NEXT: vsetvli zero, zero, e64, m1, ta, ma ; RV64-NEXT: vmv.s.x v8, a0 ; RV64-NEXT: ret %a = call <1 x i64> @llvm.llrint.v1i64.v1f32(<1 x float> %x) diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-lrint.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-lrint.ll index 9b0944e7e2f72..de47d8572017b 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-lrint.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-lrint.ll @@ -28,7 +28,7 @@ define <1 x iXLen> @lrint_v1f32(<1 x float> %x) { ; RV64-i64-NEXT: vsetivli zero, 1, e32, mf2, ta, ma ; RV64-i64-NEXT: vfmv.f.s fa5, v8 ; RV64-i64-NEXT: fcvt.l.s a0, fa5 -; RV64-i64-NEXT: vsetivli zero, 1, e64, m1, ta, ma +; RV64-i64-NEXT: vsetvli zero, zero, e64, m1, ta, ma ; RV64-i64-NEXT: vmv.s.x v8, a0 ; RV64-i64-NEXT: ret %a = call <1 x iXLen> @llvm.lrint.v1iXLen.v1f32(<1 x float> %x) @@ -609,7 +609,6 @@ define <1 x iXLen> @lrint_v1f64(<1 x double> %x) { ; RV32-NEXT: vsetivli zero, 1, e64, m1, ta, ma ; RV32-NEXT: vfmv.f.s fa5, v8 ; RV32-NEXT: fcvt.w.d a0, fa5 -; RV32-NEXT: vsetivli zero, 1, e32, mf2, ta, ma ; RV32-NEXT: vmv.s.x v8, a0 ; RV32-NEXT: ret ; @@ -618,7 +617,6 @@ define <1 x iXLen> @lrint_v1f64(<1 x double> %x) { ; RV64-i32-NEXT: vsetivli zero, 1, e64, m1, ta, ma ; RV64-i32-NEXT: vfmv.f.s fa5, v8 ; RV64-i32-NEXT: fcvt.l.d a0, fa5 -; RV64-i32-NEXT: vsetivli zero, 1, e32, mf2, ta, ma ; RV64-i32-NEXT: vmv.s.x v8, a0 ; RV64-i32-NEXT: ret ; diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-masked-gather.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-masked-gather.ll index 69341981288b9..a4f9eeb59cd5b 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-masked-gather.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-masked-gather.ll @@ -83,14 +83,15 @@ define <2 x i8> @mgather_v2i8(<2 x ptr> %ptrs, <2 x i1> %m, <2 x i8> %passthru) ; RV64ZVE32F-NEXT: ret ; RV64ZVE32F-NEXT: .LBB1_3: # %cond.load ; RV64ZVE32F-NEXT: lbu a0, 0(a0) -; RV64ZVE32F-NEXT: vsetivli zero, 2, e8, m1, tu, ma +; RV64ZVE32F-NEXT: vsetvli zero, zero, e8, m1, tu, ma ; RV64ZVE32F-NEXT: vmv.s.x v8, a0 ; RV64ZVE32F-NEXT: andi a2, a2, 2 ; RV64ZVE32F-NEXT: beqz a2, .LBB1_2 ; RV64ZVE32F-NEXT: .LBB1_4: # %cond.load1 ; RV64ZVE32F-NEXT: lbu a0, 0(a1) -; RV64ZVE32F-NEXT: vsetivli zero, 2, e8, mf4, ta, ma +; RV64ZVE32F-NEXT: vsetvli zero, zero, e8, m1, ta, ma ; RV64ZVE32F-NEXT: vmv.s.x v9, a0 +; RV64ZVE32F-NEXT: vsetivli zero, 2, e8, mf4, ta, ma ; RV64ZVE32F-NEXT: vslideup.vi v8, v9, 1 ; RV64ZVE32F-NEXT: ret %v = call <2 x i8> @llvm.masked.gather.v2i8.v2p0(<2 x ptr> %ptrs, i32 1, <2 x i1> %m, <2 x i8> %passthru) @@ -130,15 +131,16 @@ define <2 x i16> @mgather_v2i8_sextload_v2i16(<2 x ptr> %ptrs, <2 x i1> %m, <2 x ; RV64ZVE32F-NEXT: beqz a3, .LBB2_2 ; RV64ZVE32F-NEXT: # %bb.1: # %cond.load ; RV64ZVE32F-NEXT: lbu a0, 0(a0) -; RV64ZVE32F-NEXT: vsetivli zero, 2, e8, m1, tu, ma +; RV64ZVE32F-NEXT: vsetvli zero, zero, e8, m1, tu, ma ; RV64ZVE32F-NEXT: vmv.s.x v8, a0 ; RV64ZVE32F-NEXT: .LBB2_2: # %else ; RV64ZVE32F-NEXT: andi a2, a2, 2 ; RV64ZVE32F-NEXT: beqz a2, .LBB2_4 ; RV64ZVE32F-NEXT: # %bb.3: # %cond.load1 ; RV64ZVE32F-NEXT: lbu a0, 0(a1) -; RV64ZVE32F-NEXT: vsetivli zero, 2, e8, mf4, ta, ma +; RV64ZVE32F-NEXT: vsetvli zero, zero, e8, m1, ta, ma ; RV64ZVE32F-NEXT: vmv.s.x v9, a0 +; RV64ZVE32F-NEXT: vsetivli zero, 2, e8, mf4, ta, ma ; RV64ZVE32F-NEXT: vslideup.vi v8, v9, 1 ; RV64ZVE32F-NEXT: .LBB2_4: # %else2 ; RV64ZVE32F-NEXT: vsetivli zero, 2, e16, mf2, ta, ma @@ -183,15 +185,16 @@ define <2 x i16> @mgather_v2i8_zextload_v2i16(<2 x ptr> %ptrs, <2 x i1> %m, <2 x ; RV64ZVE32F-NEXT: beqz a3, .LBB3_2 ; RV64ZVE32F-NEXT: # %bb.1: # %cond.load ; RV64ZVE32F-NEXT: lbu a0, 0(a0) -; RV64ZVE32F-NEXT: vsetivli zero, 2, e8, m1, tu, ma +; RV64ZVE32F-NEXT: vsetvli zero, zero, e8, m1, tu, ma ; RV64ZVE32F-NEXT: vmv.s.x v8, a0 ; RV64ZVE32F-NEXT: .LBB3_2: # %else ; RV64ZVE32F-NEXT: andi a2, a2, 2 ; RV64ZVE32F-NEXT: beqz a2, .LBB3_4 ; RV64ZVE32F-NEXT: # %bb.3: # %cond.load1 ; RV64ZVE32F-NEXT: lbu a0, 0(a1) -; RV64ZVE32F-NEXT: vsetivli zero, 2, e8, mf4, ta, ma +; RV64ZVE32F-NEXT: vsetvli zero, zero, e8, m1, ta, ma ; RV64ZVE32F-NEXT: vmv.s.x v9, a0 +; RV64ZVE32F-NEXT: vsetivli zero, 2, e8, mf4, ta, ma ; RV64ZVE32F-NEXT: vslideup.vi v8, v9, 1 ; RV64ZVE32F-NEXT: .LBB3_4: # %else2 ; RV64ZVE32F-NEXT: vsetivli zero, 2, e16, mf2, ta, ma @@ -236,15 +239,16 @@ define <2 x i32> @mgather_v2i8_sextload_v2i32(<2 x ptr> %ptrs, <2 x i1> %m, <2 x ; RV64ZVE32F-NEXT: beqz a3, .LBB4_2 ; RV64ZVE32F-NEXT: # %bb.1: # %cond.load ; RV64ZVE32F-NEXT: lbu a0, 0(a0) -; RV64ZVE32F-NEXT: vsetivli zero, 2, e8, m1, tu, ma +; RV64ZVE32F-NEXT: vsetvli zero, zero, e8, m1, tu, ma ; RV64ZVE32F-NEXT: vmv.s.x v8, a0 ; RV64ZVE32F-NEXT: .LBB4_2: # %else ; RV64ZVE32F-NEXT: andi a2, a2, 2 ; RV64ZVE32F-NEXT: beqz a2, .LBB4_4 ; RV64ZVE32F-NEXT: # %bb.3: # %cond.load1 ; RV64ZVE32F-NEXT: lbu a0, 0(a1) -; RV64ZVE32F-NEXT: vsetivli zero, 2, e8, mf4, ta, ma +; RV64ZVE32F-NEXT: vsetvli zero, zero, e8, m1, ta, ma ; RV64ZVE32F-NEXT: vmv.s.x v9, a0 +; RV64ZVE32F-NEXT: vsetivli zero, 2, e8, mf4, ta, ma ; RV64ZVE32F-NEXT: vslideup.vi v8, v9, 1 ; RV64ZVE32F-NEXT: .LBB4_4: # %else2 ; RV64ZVE32F-NEXT: vsetivli zero, 2, e32, m1, ta, ma @@ -289,15 +293,16 @@ define <2 x i32> @mgather_v2i8_zextload_v2i32(<2 x ptr> %ptrs, <2 x i1> %m, <2 x ; RV64ZVE32F-NEXT: beqz a3, .LBB5_2 ; RV64ZVE32F-NEXT: # %bb.1: # %cond.load ; RV64ZVE32F-NEXT: lbu a0, 0(a0) -; RV64ZVE32F-NEXT: vsetivli zero, 2, e8, m1, tu, ma +; RV64ZVE32F-NEXT: vsetvli zero, zero, e8, m1, tu, ma ; RV64ZVE32F-NEXT: vmv.s.x v8, a0 ; RV64ZVE32F-NEXT: .LBB5_2: # %else ; RV64ZVE32F-NEXT: andi a2, a2, 2 ; RV64ZVE32F-NEXT: beqz a2, .LBB5_4 ; RV64ZVE32F-NEXT: # %bb.3: # %cond.load1 ; RV64ZVE32F-NEXT: lbu a0, 0(a1) -; RV64ZVE32F-NEXT: vsetivli zero, 2, e8, mf4, ta, ma +; RV64ZVE32F-NEXT: vsetvli zero, zero, e8, m1, ta, ma ; RV64ZVE32F-NEXT: vmv.s.x v9, a0 +; RV64ZVE32F-NEXT: vsetivli zero, 2, e8, mf4, ta, ma ; RV64ZVE32F-NEXT: vslideup.vi v8, v9, 1 ; RV64ZVE32F-NEXT: .LBB5_4: # %else2 ; RV64ZVE32F-NEXT: vsetivli zero, 2, e32, m1, ta, ma @@ -349,15 +354,16 @@ define <2 x i64> @mgather_v2i8_sextload_v2i64(<2 x ptr> %ptrs, <2 x i1> %m, <2 x ; RV64ZVE32F-NEXT: beqz a3, .LBB6_2 ; RV64ZVE32F-NEXT: # %bb.1: # %cond.load ; RV64ZVE32F-NEXT: lbu a0, 0(a0) -; RV64ZVE32F-NEXT: vsetivli zero, 2, e8, m1, tu, ma +; RV64ZVE32F-NEXT: vsetvli zero, zero, e8, m1, tu, ma ; RV64ZVE32F-NEXT: vmv.s.x v8, a0 ; RV64ZVE32F-NEXT: .LBB6_2: # %else ; RV64ZVE32F-NEXT: andi a2, a2, 2 ; RV64ZVE32F-NEXT: beqz a2, .LBB6_4 ; RV64ZVE32F-NEXT: # %bb.3: # %cond.load1 ; RV64ZVE32F-NEXT: lbu a0, 0(a1) -; RV64ZVE32F-NEXT: vsetivli zero, 2, e8, mf4, ta, ma +; RV64ZVE32F-NEXT: vsetvli zero, zero, e8, m1, ta, ma ; RV64ZVE32F-NEXT: vmv.s.x v9, a0 +; RV64ZVE32F-NEXT: vsetivli zero, 2, e8, mf4, ta, ma ; RV64ZVE32F-NEXT: vslideup.vi v8, v9, 1 ; RV64ZVE32F-NEXT: .LBB6_4: # %else2 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma @@ -410,15 +416,16 @@ define <2 x i64> @mgather_v2i8_zextload_v2i64(<2 x ptr> %ptrs, <2 x i1> %m, <2 x ; RV64ZVE32F-NEXT: beqz a3, .LBB7_2 ; RV64ZVE32F-NEXT: # %bb.1: # %cond.load ; RV64ZVE32F-NEXT: lbu a0, 0(a0) -; RV64ZVE32F-NEXT: vsetivli zero, 2, e8, m1, tu, ma +; RV64ZVE32F-NEXT: vsetvli zero, zero, e8, m1, tu, ma ; RV64ZVE32F-NEXT: vmv.s.x v8, a0 ; RV64ZVE32F-NEXT: .LBB7_2: # %else ; RV64ZVE32F-NEXT: andi a2, a2, 2 ; RV64ZVE32F-NEXT: beqz a2, .LBB7_4 ; RV64ZVE32F-NEXT: # %bb.3: # %cond.load1 ; RV64ZVE32F-NEXT: lbu a0, 0(a1) -; RV64ZVE32F-NEXT: vsetivli zero, 2, e8, mf4, ta, ma +; RV64ZVE32F-NEXT: vsetvli zero, zero, e8, m1, ta, ma ; RV64ZVE32F-NEXT: vmv.s.x v9, a0 +; RV64ZVE32F-NEXT: vsetivli zero, 2, e8, mf4, ta, ma ; RV64ZVE32F-NEXT: vslideup.vi v8, v9, 1 ; RV64ZVE32F-NEXT: .LBB7_4: # %else2 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma @@ -470,15 +477,16 @@ define <4 x i8> @mgather_v4i8(<4 x ptr> %ptrs, <4 x i1> %m, <4 x i8> %passthru) ; RV64ZVE32F-NEXT: .LBB8_5: # %cond.load ; RV64ZVE32F-NEXT: ld a2, 0(a0) ; RV64ZVE32F-NEXT: lbu a2, 0(a2) -; RV64ZVE32F-NEXT: vsetivli zero, 4, e8, m1, tu, ma +; RV64ZVE32F-NEXT: vsetvli zero, zero, e8, m1, tu, ma ; RV64ZVE32F-NEXT: vmv.s.x v8, a2 ; RV64ZVE32F-NEXT: andi a2, a1, 2 ; RV64ZVE32F-NEXT: beqz a2, .LBB8_2 ; RV64ZVE32F-NEXT: .LBB8_6: # %cond.load1 ; RV64ZVE32F-NEXT: ld a2, 8(a0) ; RV64ZVE32F-NEXT: lbu a2, 0(a2) -; RV64ZVE32F-NEXT: vsetivli zero, 2, e8, mf4, tu, ma +; RV64ZVE32F-NEXT: vsetvli zero, zero, e8, m1, ta, ma ; RV64ZVE32F-NEXT: vmv.s.x v9, a2 +; RV64ZVE32F-NEXT: vsetivli zero, 2, e8, mf4, tu, ma ; RV64ZVE32F-NEXT: vslideup.vi v8, v9, 1 ; RV64ZVE32F-NEXT: andi a2, a1, 4 ; RV64ZVE32F-NEXT: beqz a2, .LBB8_3 @@ -602,15 +610,16 @@ define <8 x i8> @mgather_v8i8(<8 x ptr> %ptrs, <8 x i1> %m, <8 x i8> %passthru) ; RV64ZVE32F-NEXT: .LBB11_9: # %cond.load ; RV64ZVE32F-NEXT: ld a2, 0(a0) ; RV64ZVE32F-NEXT: lbu a2, 0(a2) -; RV64ZVE32F-NEXT: vsetivli zero, 8, e8, m1, tu, ma +; RV64ZVE32F-NEXT: vsetvli zero, zero, e8, m1, tu, ma ; RV64ZVE32F-NEXT: vmv.s.x v8, a2 ; RV64ZVE32F-NEXT: andi a2, a1, 2 ; RV64ZVE32F-NEXT: beqz a2, .LBB11_2 ; RV64ZVE32F-NEXT: .LBB11_10: # %cond.load1 ; RV64ZVE32F-NEXT: ld a2, 8(a0) ; RV64ZVE32F-NEXT: lbu a2, 0(a2) -; RV64ZVE32F-NEXT: vsetivli zero, 2, e8, mf2, tu, ma +; RV64ZVE32F-NEXT: vsetvli zero, zero, e8, m1, ta, ma ; RV64ZVE32F-NEXT: vmv.s.x v9, a2 +; RV64ZVE32F-NEXT: vsetivli zero, 2, e8, mf2, tu, ma ; RV64ZVE32F-NEXT: vslideup.vi v8, v9, 1 ; RV64ZVE32F-NEXT: andi a2, a1, 4 ; RV64ZVE32F-NEXT: beqz a2, .LBB11_3 @@ -694,7 +703,7 @@ define <8 x i8> @mgather_baseidx_v8i8(ptr %base, <8 x i8> %idxs, <8 x i1> %m, <8 ; RV64ZVE32F-NEXT: vmv.x.s a2, v8 ; RV64ZVE32F-NEXT: add a2, a0, a2 ; RV64ZVE32F-NEXT: lbu a2, 0(a2) -; RV64ZVE32F-NEXT: vsetivli zero, 8, e8, m1, tu, ma +; RV64ZVE32F-NEXT: vsetvli zero, zero, e8, m1, tu, ma ; RV64ZVE32F-NEXT: vmv.s.x v9, a2 ; RV64ZVE32F-NEXT: .LBB12_2: # %else ; RV64ZVE32F-NEXT: andi a2, a1, 2 @@ -874,14 +883,15 @@ define <2 x i16> @mgather_v2i16(<2 x ptr> %ptrs, <2 x i1> %m, <2 x i16> %passthr ; RV64ZVE32F-NEXT: ret ; RV64ZVE32F-NEXT: .LBB14_3: # %cond.load ; RV64ZVE32F-NEXT: lh a0, 0(a0) -; RV64ZVE32F-NEXT: vsetivli zero, 2, e16, m2, tu, ma +; RV64ZVE32F-NEXT: vsetvli zero, zero, e16, m2, tu, ma ; RV64ZVE32F-NEXT: vmv.s.x v8, a0 ; RV64ZVE32F-NEXT: andi a2, a2, 2 ; RV64ZVE32F-NEXT: beqz a2, .LBB14_2 ; RV64ZVE32F-NEXT: .LBB14_4: # %cond.load1 ; RV64ZVE32F-NEXT: lh a0, 0(a1) -; RV64ZVE32F-NEXT: vsetivli zero, 2, e16, mf2, ta, ma +; RV64ZVE32F-NEXT: vsetvli zero, zero, e16, m2, ta, ma ; RV64ZVE32F-NEXT: vmv.s.x v9, a0 +; RV64ZVE32F-NEXT: vsetivli zero, 2, e16, mf2, ta, ma ; RV64ZVE32F-NEXT: vslideup.vi v8, v9, 1 ; RV64ZVE32F-NEXT: ret %v = call <2 x i16> @llvm.masked.gather.v2i16.v2p0(<2 x ptr> %ptrs, i32 2, <2 x i1> %m, <2 x i16> %passthru) @@ -921,15 +931,16 @@ define <2 x i32> @mgather_v2i16_sextload_v2i32(<2 x ptr> %ptrs, <2 x i1> %m, <2 ; RV64ZVE32F-NEXT: beqz a3, .LBB15_2 ; RV64ZVE32F-NEXT: # %bb.1: # %cond.load ; RV64ZVE32F-NEXT: lh a0, 0(a0) -; RV64ZVE32F-NEXT: vsetivli zero, 2, e16, m2, tu, ma +; RV64ZVE32F-NEXT: vsetvli zero, zero, e16, m2, tu, ma ; RV64ZVE32F-NEXT: vmv.s.x v8, a0 ; RV64ZVE32F-NEXT: .LBB15_2: # %else ; RV64ZVE32F-NEXT: andi a2, a2, 2 ; RV64ZVE32F-NEXT: beqz a2, .LBB15_4 ; RV64ZVE32F-NEXT: # %bb.3: # %cond.load1 ; RV64ZVE32F-NEXT: lh a0, 0(a1) -; RV64ZVE32F-NEXT: vsetivli zero, 2, e16, mf2, ta, ma +; RV64ZVE32F-NEXT: vsetvli zero, zero, e16, m2, ta, ma ; RV64ZVE32F-NEXT: vmv.s.x v9, a0 +; RV64ZVE32F-NEXT: vsetivli zero, 2, e16, mf2, ta, ma ; RV64ZVE32F-NEXT: vslideup.vi v8, v9, 1 ; RV64ZVE32F-NEXT: .LBB15_4: # %else2 ; RV64ZVE32F-NEXT: vsetivli zero, 2, e32, m1, ta, ma @@ -974,15 +985,16 @@ define <2 x i32> @mgather_v2i16_zextload_v2i32(<2 x ptr> %ptrs, <2 x i1> %m, <2 ; RV64ZVE32F-NEXT: beqz a3, .LBB16_2 ; RV64ZVE32F-NEXT: # %bb.1: # %cond.load ; RV64ZVE32F-NEXT: lh a0, 0(a0) -; RV64ZVE32F-NEXT: vsetivli zero, 2, e16, m2, tu, ma +; RV64ZVE32F-NEXT: vsetvli zero, zero, e16, m2, tu, ma ; RV64ZVE32F-NEXT: vmv.s.x v8, a0 ; RV64ZVE32F-NEXT: .LBB16_2: # %else ; RV64ZVE32F-NEXT: andi a2, a2, 2 ; RV64ZVE32F-NEXT: beqz a2, .LBB16_4 ; RV64ZVE32F-NEXT: # %bb.3: # %cond.load1 ; RV64ZVE32F-NEXT: lh a0, 0(a1) -; RV64ZVE32F-NEXT: vsetivli zero, 2, e16, mf2, ta, ma +; RV64ZVE32F-NEXT: vsetvli zero, zero, e16, m2, ta, ma ; RV64ZVE32F-NEXT: vmv.s.x v9, a0 +; RV64ZVE32F-NEXT: vsetivli zero, 2, e16, mf2, ta, ma ; RV64ZVE32F-NEXT: vslideup.vi v8, v9, 1 ; RV64ZVE32F-NEXT: .LBB16_4: # %else2 ; RV64ZVE32F-NEXT: vsetivli zero, 2, e32, m1, ta, ma @@ -1034,15 +1046,16 @@ define <2 x i64> @mgather_v2i16_sextload_v2i64(<2 x ptr> %ptrs, <2 x i1> %m, <2 ; RV64ZVE32F-NEXT: beqz a3, .LBB17_2 ; RV64ZVE32F-NEXT: # %bb.1: # %cond.load ; RV64ZVE32F-NEXT: lh a0, 0(a0) -; RV64ZVE32F-NEXT: vsetivli zero, 2, e16, m2, tu, ma +; RV64ZVE32F-NEXT: vsetvli zero, zero, e16, m2, tu, ma ; RV64ZVE32F-NEXT: vmv.s.x v8, a0 ; RV64ZVE32F-NEXT: .LBB17_2: # %else ; RV64ZVE32F-NEXT: andi a2, a2, 2 ; RV64ZVE32F-NEXT: beqz a2, .LBB17_4 ; RV64ZVE32F-NEXT: # %bb.3: # %cond.load1 ; RV64ZVE32F-NEXT: lh a0, 0(a1) -; RV64ZVE32F-NEXT: vsetivli zero, 2, e16, mf2, ta, ma +; RV64ZVE32F-NEXT: vsetvli zero, zero, e16, m2, ta, ma ; RV64ZVE32F-NEXT: vmv.s.x v9, a0 +; RV64ZVE32F-NEXT: vsetivli zero, 2, e16, mf2, ta, ma ; RV64ZVE32F-NEXT: vslideup.vi v8, v9, 1 ; RV64ZVE32F-NEXT: .LBB17_4: # %else2 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, mf2, ta, ma @@ -1097,15 +1110,16 @@ define <2 x i64> @mgather_v2i16_zextload_v2i64(<2 x ptr> %ptrs, <2 x i1> %m, <2 ; RV64ZVE32F-NEXT: beqz a3, .LBB18_2 ; RV64ZVE32F-NEXT: # %bb.1: # %cond.load ; RV64ZVE32F-NEXT: lh a0, 0(a0) -; RV64ZVE32F-NEXT: vsetivli zero, 2, e16, m2, tu, ma +; RV64ZVE32F-NEXT: vsetvli zero, zero, e16, m2, tu, ma ; RV64ZVE32F-NEXT: vmv.s.x v8, a0 ; RV64ZVE32F-NEXT: .LBB18_2: # %else ; RV64ZVE32F-NEXT: andi a2, a2, 2 ; RV64ZVE32F-NEXT: beqz a2, .LBB18_4 ; RV64ZVE32F-NEXT: # %bb.3: # %cond.load1 ; RV64ZVE32F-NEXT: lh a0, 0(a1) -; RV64ZVE32F-NEXT: vsetivli zero, 2, e16, mf2, ta, ma +; RV64ZVE32F-NEXT: vsetvli zero, zero, e16, m2, ta, ma ; RV64ZVE32F-NEXT: vmv.s.x v9, a0 +; RV64ZVE32F-NEXT: vsetivli zero, 2, e16, mf2, ta, ma ; RV64ZVE32F-NEXT: vslideup.vi v8, v9, 1 ; RV64ZVE32F-NEXT: .LBB18_4: # %else2 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e16, mf2, ta, ma @@ -1159,15 +1173,16 @@ define <4 x i16> @mgather_v4i16(<4 x ptr> %ptrs, <4 x i1> %m, <4 x i16> %passthr ; RV64ZVE32F-NEXT: .LBB19_5: # %cond.load ; RV64ZVE32F-NEXT: ld a2, 0(a0) ; RV64ZVE32F-NEXT: lh a2, 0(a2) -; RV64ZVE32F-NEXT: vsetivli zero, 4, e16, m2, tu, ma +; RV64ZVE32F-NEXT: vsetvli zero, zero, e16, m2, tu, ma ; RV64ZVE32F-NEXT: vmv.s.x v8, a2 ; RV64ZVE32F-NEXT: andi a2, a1, 2 ; RV64ZVE32F-NEXT: beqz a2, .LBB19_2 ; RV64ZVE32F-NEXT: .LBB19_6: # %cond.load1 ; RV64ZVE32F-NEXT: ld a2, 8(a0) ; RV64ZVE32F-NEXT: lh a2, 0(a2) -; RV64ZVE32F-NEXT: vsetivli zero, 2, e16, mf2, tu, ma +; RV64ZVE32F-NEXT: vsetvli zero, zero, e16, m2, ta, ma ; RV64ZVE32F-NEXT: vmv.s.x v9, a2 +; RV64ZVE32F-NEXT: vsetivli zero, 2, e16, mf2, tu, ma ; RV64ZVE32F-NEXT: vslideup.vi v8, v9, 1 ; RV64ZVE32F-NEXT: andi a2, a1, 4 ; RV64ZVE32F-NEXT: beqz a2, .LBB19_3 @@ -1291,15 +1306,16 @@ define <8 x i16> @mgather_v8i16(<8 x ptr> %ptrs, <8 x i1> %m, <8 x i16> %passthr ; RV64ZVE32F-NEXT: .LBB22_9: # %cond.load ; RV64ZVE32F-NEXT: ld a2, 0(a0) ; RV64ZVE32F-NEXT: lh a2, 0(a2) -; RV64ZVE32F-NEXT: vsetivli zero, 8, e16, m2, tu, ma +; RV64ZVE32F-NEXT: vsetvli zero, zero, e16, m2, tu, ma ; RV64ZVE32F-NEXT: vmv.s.x v8, a2 ; RV64ZVE32F-NEXT: andi a2, a1, 2 ; RV64ZVE32F-NEXT: beqz a2, .LBB22_2 ; RV64ZVE32F-NEXT: .LBB22_10: # %cond.load1 ; RV64ZVE32F-NEXT: ld a2, 8(a0) ; RV64ZVE32F-NEXT: lh a2, 0(a2) -; RV64ZVE32F-NEXT: vsetivli zero, 2, e16, m1, tu, ma +; RV64ZVE32F-NEXT: vsetvli zero, zero, e16, m2, ta, ma ; RV64ZVE32F-NEXT: vmv.s.x v9, a2 +; RV64ZVE32F-NEXT: vsetivli zero, 2, e16, m1, tu, ma ; RV64ZVE32F-NEXT: vslideup.vi v8, v9, 1 ; RV64ZVE32F-NEXT: andi a2, a1, 4 ; RV64ZVE32F-NEXT: beqz a2, .LBB22_3 @@ -1386,7 +1402,7 @@ define <8 x i16> @mgather_baseidx_v8i8_v8i16(ptr %base, <8 x i8> %idxs, <8 x i1> ; RV64ZVE32F-NEXT: slli a2, a2, 1 ; RV64ZVE32F-NEXT: add a2, a0, a2 ; RV64ZVE32F-NEXT: lh a2, 0(a2) -; RV64ZVE32F-NEXT: vsetivli zero, 8, e16, m2, tu, ma +; RV64ZVE32F-NEXT: vsetvli zero, zero, e16, m2, tu, ma ; RV64ZVE32F-NEXT: vmv.s.x v9, a2 ; RV64ZVE32F-NEXT: .LBB23_2: # %else ; RV64ZVE32F-NEXT: andi a2, a1, 2 @@ -1470,8 +1486,9 @@ define <8 x i16> @mgather_baseidx_v8i8_v8i16(ptr %base, <8 x i8> %idxs, <8 x i1> ; RV64ZVE32F-NEXT: slli a2, a2, 1 ; RV64ZVE32F-NEXT: add a2, a0, a2 ; RV64ZVE32F-NEXT: lh a2, 0(a2) -; RV64ZVE32F-NEXT: vsetivli zero, 5, e16, m1, tu, ma +; RV64ZVE32F-NEXT: vsetvli zero, zero, e16, m2, ta, ma ; RV64ZVE32F-NEXT: vmv.s.x v8, a2 +; RV64ZVE32F-NEXT: vsetivli zero, 5, e16, m1, tu, ma ; RV64ZVE32F-NEXT: vslideup.vi v9, v8, 4 ; RV64ZVE32F-NEXT: andi a2, a1, 32 ; RV64ZVE32F-NEXT: bnez a2, .LBB23_8 @@ -1537,7 +1554,7 @@ define <8 x i16> @mgather_baseidx_sext_v8i8_v8i16(ptr %base, <8 x i8> %idxs, <8 ; RV64ZVE32F-NEXT: slli a2, a2, 1 ; RV64ZVE32F-NEXT: add a2, a0, a2 ; RV64ZVE32F-NEXT: lh a2, 0(a2) -; RV64ZVE32F-NEXT: vsetivli zero, 8, e16, m2, tu, ma +; RV64ZVE32F-NEXT: vsetvli zero, zero, e16, m2, tu, ma ; RV64ZVE32F-NEXT: vmv.s.x v9, a2 ; RV64ZVE32F-NEXT: .LBB24_2: # %else ; RV64ZVE32F-NEXT: andi a2, a1, 2 @@ -1621,8 +1638,9 @@ define <8 x i16> @mgather_baseidx_sext_v8i8_v8i16(ptr %base, <8 x i8> %idxs, <8 ; RV64ZVE32F-NEXT: slli a2, a2, 1 ; RV64ZVE32F-NEXT: add a2, a0, a2 ; RV64ZVE32F-NEXT: lh a2, 0(a2) -; RV64ZVE32F-NEXT: vsetivli zero, 5, e16, m1, tu, ma +; RV64ZVE32F-NEXT: vsetvli zero, zero, e16, m2, ta, ma ; RV64ZVE32F-NEXT: vmv.s.x v8, a2 +; RV64ZVE32F-NEXT: vsetivli zero, 5, e16, m1, tu, ma ; RV64ZVE32F-NEXT: vslideup.vi v9, v8, 4 ; RV64ZVE32F-NEXT: andi a2, a1, 32 ; RV64ZVE32F-NEXT: bnez a2, .LBB24_8 @@ -1688,7 +1706,7 @@ define <8 x i16> @mgather_baseidx_zext_v8i8_v8i16(ptr %base, <8 x i8> %idxs, <8 ; RV64ZVE32F-NEXT: slli a2, a2, 1 ; RV64ZVE32F-NEXT: add a2, a0, a2 ; RV64ZVE32F-NEXT: lh a2, 0(a2) -; RV64ZVE32F-NEXT: vsetivli zero, 8, e16, m2, tu, ma +; RV64ZVE32F-NEXT: vsetvli zero, zero, e16, m2, tu, ma ; RV64ZVE32F-NEXT: vmv.s.x v9, a2 ; RV64ZVE32F-NEXT: .LBB25_2: # %else ; RV64ZVE32F-NEXT: andi a2, a1, 2 @@ -1777,8 +1795,9 @@ define <8 x i16> @mgather_baseidx_zext_v8i8_v8i16(ptr %base, <8 x i8> %idxs, <8 ; RV64ZVE32F-NEXT: slli a2, a2, 1 ; RV64ZVE32F-NEXT: add a2, a0, a2 ; RV64ZVE32F-NEXT: lh a2, 0(a2) -; RV64ZVE32F-NEXT: vsetivli zero, 5, e16, m1, tu, ma +; RV64ZVE32F-NEXT: vsetvli zero, zero, e16, m2, ta, ma ; RV64ZVE32F-NEXT: vmv.s.x v8, a2 +; RV64ZVE32F-NEXT: vsetivli zero, 5, e16, m1, tu, ma ; RV64ZVE32F-NEXT: vslideup.vi v9, v8, 4 ; RV64ZVE32F-NEXT: andi a2, a1, 32 ; RV64ZVE32F-NEXT: bnez a2, .LBB25_8 @@ -1841,7 +1860,7 @@ define <8 x i16> @mgather_baseidx_v8i16(ptr %base, <8 x i16> %idxs, <8 x i1> %m, ; RV64ZVE32F-NEXT: andi a2, a1, 1 ; RV64ZVE32F-NEXT: beqz a2, .LBB26_2 ; RV64ZVE32F-NEXT: # %bb.1: # %cond.load -; RV64ZVE32F-NEXT: vsetivli zero, 8, e16, m2, tu, ma +; RV64ZVE32F-NEXT: vsetvli zero, zero, e16, m2, tu, ma ; RV64ZVE32F-NEXT: vmv.x.s a2, v8 ; RV64ZVE32F-NEXT: slli a2, a2, 1 ; RV64ZVE32F-NEXT: add a2, a0, a2 @@ -2032,14 +2051,15 @@ define <2 x i32> @mgather_v2i32(<2 x ptr> %ptrs, <2 x i1> %m, <2 x i32> %passthr ; RV64ZVE32F-NEXT: ret ; RV64ZVE32F-NEXT: .LBB28_3: # %cond.load ; RV64ZVE32F-NEXT: lw a0, 0(a0) -; RV64ZVE32F-NEXT: vsetivli zero, 2, e32, m4, tu, ma +; RV64ZVE32F-NEXT: vsetvli zero, zero, e32, m4, tu, ma ; RV64ZVE32F-NEXT: vmv.s.x v8, a0 ; RV64ZVE32F-NEXT: andi a2, a2, 2 ; RV64ZVE32F-NEXT: beqz a2, .LBB28_2 ; RV64ZVE32F-NEXT: .LBB28_4: # %cond.load1 ; RV64ZVE32F-NEXT: lw a0, 0(a1) -; RV64ZVE32F-NEXT: vsetivli zero, 2, e32, m1, ta, ma +; RV64ZVE32F-NEXT: vsetvli zero, zero, e32, m4, ta, ma ; RV64ZVE32F-NEXT: vmv.s.x v9, a0 +; RV64ZVE32F-NEXT: vsetivli zero, 2, e32, m1, ta, ma ; RV64ZVE32F-NEXT: vslideup.vi v8, v9, 1 ; RV64ZVE32F-NEXT: ret %v = call <2 x i32> @llvm.masked.gather.v2i32.v2p0(<2 x ptr> %ptrs, i32 4, <2 x i1> %m, <2 x i32> %passthru) @@ -2088,15 +2108,16 @@ define <2 x i64> @mgather_v2i32_sextload_v2i64(<2 x ptr> %ptrs, <2 x i1> %m, <2 ; RV64ZVE32F-NEXT: beqz a3, .LBB29_2 ; RV64ZVE32F-NEXT: # %bb.1: # %cond.load ; RV64ZVE32F-NEXT: lw a0, 0(a0) -; RV64ZVE32F-NEXT: vsetivli zero, 2, e32, m4, tu, ma +; RV64ZVE32F-NEXT: vsetvli zero, zero, e32, m4, tu, ma ; RV64ZVE32F-NEXT: vmv.s.x v8, a0 ; RV64ZVE32F-NEXT: .LBB29_2: # %else ; RV64ZVE32F-NEXT: andi a2, a2, 2 ; RV64ZVE32F-NEXT: beqz a2, .LBB29_4 ; RV64ZVE32F-NEXT: # %bb.3: # %cond.load1 ; RV64ZVE32F-NEXT: lw a0, 0(a1) -; RV64ZVE32F-NEXT: vsetivli zero, 2, e32, m1, ta, ma +; RV64ZVE32F-NEXT: vsetvli zero, zero, e32, m4, ta, ma ; RV64ZVE32F-NEXT: vmv.s.x v9, a0 +; RV64ZVE32F-NEXT: vsetivli zero, 2, e32, m1, ta, ma ; RV64ZVE32F-NEXT: vslideup.vi v8, v9, 1 ; RV64ZVE32F-NEXT: .LBB29_4: # %else2 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma @@ -2147,15 +2168,16 @@ define <2 x i64> @mgather_v2i32_zextload_v2i64(<2 x ptr> %ptrs, <2 x i1> %m, <2 ; RV64ZVE32F-NEXT: beqz a3, .LBB30_2 ; RV64ZVE32F-NEXT: # %bb.1: # %cond.load ; RV64ZVE32F-NEXT: lw a0, 0(a0) -; RV64ZVE32F-NEXT: vsetivli zero, 2, e32, m4, tu, ma +; RV64ZVE32F-NEXT: vsetvli zero, zero, e32, m4, tu, ma ; RV64ZVE32F-NEXT: vmv.s.x v8, a0 ; RV64ZVE32F-NEXT: .LBB30_2: # %else ; RV64ZVE32F-NEXT: andi a2, a2, 2 ; RV64ZVE32F-NEXT: beqz a2, .LBB30_4 ; RV64ZVE32F-NEXT: # %bb.3: # %cond.load1 ; RV64ZVE32F-NEXT: lw a0, 0(a1) -; RV64ZVE32F-NEXT: vsetivli zero, 2, e32, m1, ta, ma +; RV64ZVE32F-NEXT: vsetvli zero, zero, e32, m4, ta, ma ; RV64ZVE32F-NEXT: vmv.s.x v9, a0 +; RV64ZVE32F-NEXT: vsetivli zero, 2, e32, m1, ta, ma ; RV64ZVE32F-NEXT: vslideup.vi v8, v9, 1 ; RV64ZVE32F-NEXT: .LBB30_4: # %else2 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma @@ -2209,15 +2231,16 @@ define <4 x i32> @mgather_v4i32(<4 x ptr> %ptrs, <4 x i1> %m, <4 x i32> %passthr ; RV64ZVE32F-NEXT: .LBB31_5: # %cond.load ; RV64ZVE32F-NEXT: ld a2, 0(a0) ; RV64ZVE32F-NEXT: lw a2, 0(a2) -; RV64ZVE32F-NEXT: vsetivli zero, 4, e32, m4, tu, ma +; RV64ZVE32F-NEXT: vsetvli zero, zero, e32, m4, tu, ma ; RV64ZVE32F-NEXT: vmv.s.x v8, a2 ; RV64ZVE32F-NEXT: andi a2, a1, 2 ; RV64ZVE32F-NEXT: beqz a2, .LBB31_2 ; RV64ZVE32F-NEXT: .LBB31_6: # %cond.load1 ; RV64ZVE32F-NEXT: ld a2, 8(a0) ; RV64ZVE32F-NEXT: lw a2, 0(a2) -; RV64ZVE32F-NEXT: vsetivli zero, 2, e32, m1, tu, ma +; RV64ZVE32F-NEXT: vsetvli zero, zero, e32, m4, ta, ma ; RV64ZVE32F-NEXT: vmv.s.x v9, a2 +; RV64ZVE32F-NEXT: vsetivli zero, 2, e32, m1, tu, ma ; RV64ZVE32F-NEXT: vslideup.vi v8, v9, 1 ; RV64ZVE32F-NEXT: andi a2, a1, 4 ; RV64ZVE32F-NEXT: beqz a2, .LBB31_3 @@ -2340,15 +2363,16 @@ define <8 x i32> @mgather_v8i32(<8 x ptr> %ptrs, <8 x i1> %m, <8 x i32> %passthr ; RV64ZVE32F-NEXT: .LBB34_9: # %cond.load ; RV64ZVE32F-NEXT: ld a2, 0(a0) ; RV64ZVE32F-NEXT: lw a2, 0(a2) -; RV64ZVE32F-NEXT: vsetivli zero, 8, e32, m4, tu, ma +; RV64ZVE32F-NEXT: vsetvli zero, zero, e32, m4, tu, ma ; RV64ZVE32F-NEXT: vmv.s.x v8, a2 ; RV64ZVE32F-NEXT: andi a2, a1, 2 ; RV64ZVE32F-NEXT: beqz a2, .LBB34_2 ; RV64ZVE32F-NEXT: .LBB34_10: # %cond.load1 ; RV64ZVE32F-NEXT: ld a2, 8(a0) ; RV64ZVE32F-NEXT: lw a2, 0(a2) -; RV64ZVE32F-NEXT: vsetivli zero, 2, e32, m1, tu, ma +; RV64ZVE32F-NEXT: vsetvli zero, zero, e32, m4, ta, ma ; RV64ZVE32F-NEXT: vmv.s.x v10, a2 +; RV64ZVE32F-NEXT: vsetivli zero, 2, e32, m1, tu, ma ; RV64ZVE32F-NEXT: vslideup.vi v8, v10, 1 ; RV64ZVE32F-NEXT: andi a2, a1, 4 ; RV64ZVE32F-NEXT: beqz a2, .LBB34_3 @@ -2434,7 +2458,7 @@ define <8 x i32> @mgather_baseidx_v8i8_v8i32(ptr %base, <8 x i8> %idxs, <8 x i1> ; RV64ZVE32F-NEXT: slli a2, a2, 2 ; RV64ZVE32F-NEXT: add a2, a0, a2 ; RV64ZVE32F-NEXT: lw a2, 0(a2) -; RV64ZVE32F-NEXT: vsetivli zero, 8, e32, m4, tu, ma +; RV64ZVE32F-NEXT: vsetvli zero, zero, e32, m4, tu, ma ; RV64ZVE32F-NEXT: vmv.s.x v10, a2 ; RV64ZVE32F-NEXT: .LBB35_2: # %else ; RV64ZVE32F-NEXT: andi a2, a1, 2 @@ -2518,8 +2542,9 @@ define <8 x i32> @mgather_baseidx_v8i8_v8i32(ptr %base, <8 x i8> %idxs, <8 x i1> ; RV64ZVE32F-NEXT: slli a2, a2, 2 ; RV64ZVE32F-NEXT: add a2, a0, a2 ; RV64ZVE32F-NEXT: lw a2, 0(a2) -; RV64ZVE32F-NEXT: vsetivli zero, 5, e32, m2, tu, ma +; RV64ZVE32F-NEXT: vsetvli zero, zero, e32, m4, ta, ma ; RV64ZVE32F-NEXT: vmv.s.x v8, a2 +; RV64ZVE32F-NEXT: vsetivli zero, 5, e32, m2, tu, ma ; RV64ZVE32F-NEXT: vslideup.vi v10, v8, 4 ; RV64ZVE32F-NEXT: andi a2, a1, 32 ; RV64ZVE32F-NEXT: bnez a2, .LBB35_8 @@ -2584,7 +2609,7 @@ define <8 x i32> @mgather_baseidx_sext_v8i8_v8i32(ptr %base, <8 x i8> %idxs, <8 ; RV64ZVE32F-NEXT: slli a2, a2, 2 ; RV64ZVE32F-NEXT: add a2, a0, a2 ; RV64ZVE32F-NEXT: lw a2, 0(a2) -; RV64ZVE32F-NEXT: vsetivli zero, 8, e32, m4, tu, ma +; RV64ZVE32F-NEXT: vsetvli zero, zero, e32, m4, tu, ma ; RV64ZVE32F-NEXT: vmv.s.x v10, a2 ; RV64ZVE32F-NEXT: .LBB36_2: # %else ; RV64ZVE32F-NEXT: andi a2, a1, 2 @@ -2668,8 +2693,9 @@ define <8 x i32> @mgather_baseidx_sext_v8i8_v8i32(ptr %base, <8 x i8> %idxs, <8 ; RV64ZVE32F-NEXT: slli a2, a2, 2 ; RV64ZVE32F-NEXT: add a2, a0, a2 ; RV64ZVE32F-NEXT: lw a2, 0(a2) -; RV64ZVE32F-NEXT: vsetivli zero, 5, e32, m2, tu, ma +; RV64ZVE32F-NEXT: vsetvli zero, zero, e32, m4, ta, ma ; RV64ZVE32F-NEXT: vmv.s.x v8, a2 +; RV64ZVE32F-NEXT: vsetivli zero, 5, e32, m2, tu, ma ; RV64ZVE32F-NEXT: vslideup.vi v10, v8, 4 ; RV64ZVE32F-NEXT: andi a2, a1, 32 ; RV64ZVE32F-NEXT: bnez a2, .LBB36_8 @@ -2737,7 +2763,7 @@ define <8 x i32> @mgather_baseidx_zext_v8i8_v8i32(ptr %base, <8 x i8> %idxs, <8 ; RV64ZVE32F-NEXT: slli a2, a2, 2 ; RV64ZVE32F-NEXT: add a2, a0, a2 ; RV64ZVE32F-NEXT: lw a2, 0(a2) -; RV64ZVE32F-NEXT: vsetivli zero, 8, e32, m4, tu, ma +; RV64ZVE32F-NEXT: vsetvli zero, zero, e32, m4, tu, ma ; RV64ZVE32F-NEXT: vmv.s.x v10, a2 ; RV64ZVE32F-NEXT: .LBB37_2: # %else ; RV64ZVE32F-NEXT: andi a2, a1, 2 @@ -2826,8 +2852,9 @@ define <8 x i32> @mgather_baseidx_zext_v8i8_v8i32(ptr %base, <8 x i8> %idxs, <8 ; RV64ZVE32F-NEXT: slli a2, a2, 2 ; RV64ZVE32F-NEXT: add a2, a0, a2 ; RV64ZVE32F-NEXT: lw a2, 0(a2) -; RV64ZVE32F-NEXT: vsetivli zero, 5, e32, m2, tu, ma +; RV64ZVE32F-NEXT: vsetvli zero, zero, e32, m4, ta, ma ; RV64ZVE32F-NEXT: vmv.s.x v8, a2 +; RV64ZVE32F-NEXT: vsetivli zero, 5, e32, m2, tu, ma ; RV64ZVE32F-NEXT: vslideup.vi v10, v8, 4 ; RV64ZVE32F-NEXT: andi a2, a1, 32 ; RV64ZVE32F-NEXT: bnez a2, .LBB37_8 @@ -2896,7 +2923,7 @@ define <8 x i32> @mgather_baseidx_v8i16_v8i32(ptr %base, <8 x i16> %idxs, <8 x i ; RV64ZVE32F-NEXT: slli a2, a2, 2 ; RV64ZVE32F-NEXT: add a2, a0, a2 ; RV64ZVE32F-NEXT: lw a2, 0(a2) -; RV64ZVE32F-NEXT: vsetivli zero, 8, e32, m4, tu, ma +; RV64ZVE32F-NEXT: vsetvli zero, zero, e32, m4, tu, ma ; RV64ZVE32F-NEXT: vmv.s.x v10, a2 ; RV64ZVE32F-NEXT: .LBB38_2: # %else ; RV64ZVE32F-NEXT: andi a2, a1, 2 @@ -2980,8 +3007,9 @@ define <8 x i32> @mgather_baseidx_v8i16_v8i32(ptr %base, <8 x i16> %idxs, <8 x i ; RV64ZVE32F-NEXT: slli a2, a2, 2 ; RV64ZVE32F-NEXT: add a2, a0, a2 ; RV64ZVE32F-NEXT: lw a2, 0(a2) -; RV64ZVE32F-NEXT: vsetivli zero, 5, e32, m2, tu, ma +; RV64ZVE32F-NEXT: vsetvli zero, zero, e32, m2, ta, ma ; RV64ZVE32F-NEXT: vmv.s.x v8, a2 +; RV64ZVE32F-NEXT: vsetivli zero, 5, e32, m2, tu, ma ; RV64ZVE32F-NEXT: vslideup.vi v10, v8, 4 ; RV64ZVE32F-NEXT: andi a2, a1, 32 ; RV64ZVE32F-NEXT: bnez a2, .LBB38_8 @@ -3047,7 +3075,7 @@ define <8 x i32> @mgather_baseidx_sext_v8i16_v8i32(ptr %base, <8 x i16> %idxs, < ; RV64ZVE32F-NEXT: slli a2, a2, 2 ; RV64ZVE32F-NEXT: add a2, a0, a2 ; RV64ZVE32F-NEXT: lw a2, 0(a2) -; RV64ZVE32F-NEXT: vsetivli zero, 8, e32, m4, tu, ma +; RV64ZVE32F-NEXT: vsetvli zero, zero, e32, m4, tu, ma ; RV64ZVE32F-NEXT: vmv.s.x v10, a2 ; RV64ZVE32F-NEXT: .LBB39_2: # %else ; RV64ZVE32F-NEXT: andi a2, a1, 2 @@ -3131,8 +3159,9 @@ define <8 x i32> @mgather_baseidx_sext_v8i16_v8i32(ptr %base, <8 x i16> %idxs, < ; RV64ZVE32F-NEXT: slli a2, a2, 2 ; RV64ZVE32F-NEXT: add a2, a0, a2 ; RV64ZVE32F-NEXT: lw a2, 0(a2) -; RV64ZVE32F-NEXT: vsetivli zero, 5, e32, m2, tu, ma +; RV64ZVE32F-NEXT: vsetvli zero, zero, e32, m2, ta, ma ; RV64ZVE32F-NEXT: vmv.s.x v8, a2 +; RV64ZVE32F-NEXT: vsetivli zero, 5, e32, m2, tu, ma ; RV64ZVE32F-NEXT: vslideup.vi v10, v8, 4 ; RV64ZVE32F-NEXT: andi a2, a1, 32 ; RV64ZVE32F-NEXT: bnez a2, .LBB39_8 @@ -3201,7 +3230,7 @@ define <8 x i32> @mgather_baseidx_zext_v8i16_v8i32(ptr %base, <8 x i16> %idxs, < ; RV64ZVE32F-NEXT: slli a3, a3, 2 ; RV64ZVE32F-NEXT: add a3, a0, a3 ; RV64ZVE32F-NEXT: lw a3, 0(a3) -; RV64ZVE32F-NEXT: vsetivli zero, 8, e32, m4, tu, ma +; RV64ZVE32F-NEXT: vsetvli zero, zero, e32, m4, tu, ma ; RV64ZVE32F-NEXT: vmv.s.x v10, a3 ; RV64ZVE32F-NEXT: .LBB40_2: # %else ; RV64ZVE32F-NEXT: andi a3, a2, 2 @@ -3290,8 +3319,9 @@ define <8 x i32> @mgather_baseidx_zext_v8i16_v8i32(ptr %base, <8 x i16> %idxs, < ; RV64ZVE32F-NEXT: slli a3, a3, 2 ; RV64ZVE32F-NEXT: add a3, a0, a3 ; RV64ZVE32F-NEXT: lw a3, 0(a3) -; RV64ZVE32F-NEXT: vsetivli zero, 5, e32, m2, tu, ma +; RV64ZVE32F-NEXT: vsetvli zero, zero, e32, m2, ta, ma ; RV64ZVE32F-NEXT: vmv.s.x v8, a3 +; RV64ZVE32F-NEXT: vsetivli zero, 5, e32, m2, tu, ma ; RV64ZVE32F-NEXT: vslideup.vi v10, v8, 4 ; RV64ZVE32F-NEXT: andi a3, a2, 32 ; RV64ZVE32F-NEXT: bnez a3, .LBB40_8 @@ -3354,7 +3384,7 @@ define <8 x i32> @mgather_baseidx_v8i32(ptr %base, <8 x i32> %idxs, <8 x i1> %m, ; RV64ZVE32F-NEXT: andi a2, a1, 1 ; RV64ZVE32F-NEXT: beqz a2, .LBB41_2 ; RV64ZVE32F-NEXT: # %bb.1: # %cond.load -; RV64ZVE32F-NEXT: vsetivli zero, 8, e32, m4, tu, ma +; RV64ZVE32F-NEXT: vsetvli zero, zero, e32, m4, tu, ma ; RV64ZVE32F-NEXT: vmv.x.s a2, v8 ; RV64ZVE32F-NEXT: slli a2, a2, 2 ; RV64ZVE32F-NEXT: add a2, a0, a2 @@ -6915,14 +6945,15 @@ define <2 x half> @mgather_v2f16(<2 x ptr> %ptrs, <2 x i1> %m, <2 x half> %passt ; RV64ZVE32F-NEXT: ret ; RV64ZVE32F-NEXT: .LBB59_3: # %cond.load ; RV64ZVE32F-NEXT: flh fa5, 0(a0) -; RV64ZVE32F-NEXT: vsetivli zero, 2, e16, m2, tu, ma +; RV64ZVE32F-NEXT: vsetvli zero, zero, e16, m2, tu, ma ; RV64ZVE32F-NEXT: vfmv.s.f v8, fa5 ; RV64ZVE32F-NEXT: andi a2, a2, 2 ; RV64ZVE32F-NEXT: beqz a2, .LBB59_2 ; RV64ZVE32F-NEXT: .LBB59_4: # %cond.load1 ; RV64ZVE32F-NEXT: flh fa5, 0(a1) -; RV64ZVE32F-NEXT: vsetivli zero, 2, e16, mf2, ta, ma +; RV64ZVE32F-NEXT: vsetvli zero, zero, e16, m2, ta, ma ; RV64ZVE32F-NEXT: vfmv.s.f v9, fa5 +; RV64ZVE32F-NEXT: vsetivli zero, 2, e16, mf2, ta, ma ; RV64ZVE32F-NEXT: vslideup.vi v8, v9, 1 ; RV64ZVE32F-NEXT: ret %v = call <2 x half> @llvm.masked.gather.v2f16.v2p0(<2 x ptr> %ptrs, i32 2, <2 x i1> %m, <2 x half> %passthru) @@ -6966,15 +6997,16 @@ define <4 x half> @mgather_v4f16(<4 x ptr> %ptrs, <4 x i1> %m, <4 x half> %passt ; RV64ZVE32F-NEXT: .LBB60_5: # %cond.load ; RV64ZVE32F-NEXT: ld a2, 0(a0) ; RV64ZVE32F-NEXT: flh fa5, 0(a2) -; RV64ZVE32F-NEXT: vsetivli zero, 4, e16, m2, tu, ma +; RV64ZVE32F-NEXT: vsetvli zero, zero, e16, m2, tu, ma ; RV64ZVE32F-NEXT: vfmv.s.f v8, fa5 ; RV64ZVE32F-NEXT: andi a2, a1, 2 ; RV64ZVE32F-NEXT: beqz a2, .LBB60_2 ; RV64ZVE32F-NEXT: .LBB60_6: # %cond.load1 ; RV64ZVE32F-NEXT: ld a2, 8(a0) ; RV64ZVE32F-NEXT: flh fa5, 0(a2) -; RV64ZVE32F-NEXT: vsetivli zero, 2, e16, mf2, tu, ma +; RV64ZVE32F-NEXT: vsetvli zero, zero, e16, m2, ta, ma ; RV64ZVE32F-NEXT: vfmv.s.f v9, fa5 +; RV64ZVE32F-NEXT: vsetivli zero, 2, e16, mf2, tu, ma ; RV64ZVE32F-NEXT: vslideup.vi v8, v9, 1 ; RV64ZVE32F-NEXT: andi a2, a1, 4 ; RV64ZVE32F-NEXT: beqz a2, .LBB60_3 @@ -7098,15 +7130,16 @@ define <8 x half> @mgather_v8f16(<8 x ptr> %ptrs, <8 x i1> %m, <8 x half> %passt ; RV64ZVE32F-NEXT: .LBB63_9: # %cond.load ; RV64ZVE32F-NEXT: ld a2, 0(a0) ; RV64ZVE32F-NEXT: flh fa5, 0(a2) -; RV64ZVE32F-NEXT: vsetivli zero, 8, e16, m2, tu, ma +; RV64ZVE32F-NEXT: vsetvli zero, zero, e16, m2, tu, ma ; RV64ZVE32F-NEXT: vfmv.s.f v8, fa5 ; RV64ZVE32F-NEXT: andi a2, a1, 2 ; RV64ZVE32F-NEXT: beqz a2, .LBB63_2 ; RV64ZVE32F-NEXT: .LBB63_10: # %cond.load1 ; RV64ZVE32F-NEXT: ld a2, 8(a0) ; RV64ZVE32F-NEXT: flh fa5, 0(a2) -; RV64ZVE32F-NEXT: vsetivli zero, 2, e16, m1, tu, ma +; RV64ZVE32F-NEXT: vsetvli zero, zero, e16, m2, ta, ma ; RV64ZVE32F-NEXT: vfmv.s.f v9, fa5 +; RV64ZVE32F-NEXT: vsetivli zero, 2, e16, m1, tu, ma ; RV64ZVE32F-NEXT: vslideup.vi v8, v9, 1 ; RV64ZVE32F-NEXT: andi a2, a1, 4 ; RV64ZVE32F-NEXT: beqz a2, .LBB63_3 @@ -7193,7 +7226,7 @@ define <8 x half> @mgather_baseidx_v8i8_v8f16(ptr %base, <8 x i8> %idxs, <8 x i1 ; RV64ZVE32F-NEXT: slli a2, a2, 1 ; RV64ZVE32F-NEXT: add a2, a0, a2 ; RV64ZVE32F-NEXT: flh fa5, 0(a2) -; RV64ZVE32F-NEXT: vsetivli zero, 8, e16, m2, tu, ma +; RV64ZVE32F-NEXT: vsetvli zero, zero, e16, m2, tu, ma ; RV64ZVE32F-NEXT: vfmv.s.f v9, fa5 ; RV64ZVE32F-NEXT: .LBB64_2: # %else ; RV64ZVE32F-NEXT: andi a2, a1, 2 @@ -7277,8 +7310,9 @@ define <8 x half> @mgather_baseidx_v8i8_v8f16(ptr %base, <8 x i8> %idxs, <8 x i1 ; RV64ZVE32F-NEXT: slli a2, a2, 1 ; RV64ZVE32F-NEXT: add a2, a0, a2 ; RV64ZVE32F-NEXT: flh fa5, 0(a2) -; RV64ZVE32F-NEXT: vsetivli zero, 5, e16, m1, tu, ma +; RV64ZVE32F-NEXT: vsetvli zero, zero, e16, m2, ta, ma ; RV64ZVE32F-NEXT: vfmv.s.f v8, fa5 +; RV64ZVE32F-NEXT: vsetivli zero, 5, e16, m1, tu, ma ; RV64ZVE32F-NEXT: vslideup.vi v9, v8, 4 ; RV64ZVE32F-NEXT: andi a2, a1, 32 ; RV64ZVE32F-NEXT: bnez a2, .LBB64_8 @@ -7344,7 +7378,7 @@ define <8 x half> @mgather_baseidx_sext_v8i8_v8f16(ptr %base, <8 x i8> %idxs, <8 ; RV64ZVE32F-NEXT: slli a2, a2, 1 ; RV64ZVE32F-NEXT: add a2, a0, a2 ; RV64ZVE32F-NEXT: flh fa5, 0(a2) -; RV64ZVE32F-NEXT: vsetivli zero, 8, e16, m2, tu, ma +; RV64ZVE32F-NEXT: vsetvli zero, zero, e16, m2, tu, ma ; RV64ZVE32F-NEXT: vfmv.s.f v9, fa5 ; RV64ZVE32F-NEXT: .LBB65_2: # %else ; RV64ZVE32F-NEXT: andi a2, a1, 2 @@ -7428,8 +7462,9 @@ define <8 x half> @mgather_baseidx_sext_v8i8_v8f16(ptr %base, <8 x i8> %idxs, <8 ; RV64ZVE32F-NEXT: slli a2, a2, 1 ; RV64ZVE32F-NEXT: add a2, a0, a2 ; RV64ZVE32F-NEXT: flh fa5, 0(a2) -; RV64ZVE32F-NEXT: vsetivli zero, 5, e16, m1, tu, ma +; RV64ZVE32F-NEXT: vsetvli zero, zero, e16, m2, ta, ma ; RV64ZVE32F-NEXT: vfmv.s.f v8, fa5 +; RV64ZVE32F-NEXT: vsetivli zero, 5, e16, m1, tu, ma ; RV64ZVE32F-NEXT: vslideup.vi v9, v8, 4 ; RV64ZVE32F-NEXT: andi a2, a1, 32 ; RV64ZVE32F-NEXT: bnez a2, .LBB65_8 @@ -7495,7 +7530,7 @@ define <8 x half> @mgather_baseidx_zext_v8i8_v8f16(ptr %base, <8 x i8> %idxs, <8 ; RV64ZVE32F-NEXT: slli a2, a2, 1 ; RV64ZVE32F-NEXT: add a2, a0, a2 ; RV64ZVE32F-NEXT: flh fa5, 0(a2) -; RV64ZVE32F-NEXT: vsetivli zero, 8, e16, m2, tu, ma +; RV64ZVE32F-NEXT: vsetvli zero, zero, e16, m2, tu, ma ; RV64ZVE32F-NEXT: vfmv.s.f v9, fa5 ; RV64ZVE32F-NEXT: .LBB66_2: # %else ; RV64ZVE32F-NEXT: andi a2, a1, 2 @@ -7584,8 +7619,9 @@ define <8 x half> @mgather_baseidx_zext_v8i8_v8f16(ptr %base, <8 x i8> %idxs, <8 ; RV64ZVE32F-NEXT: slli a2, a2, 1 ; RV64ZVE32F-NEXT: add a2, a0, a2 ; RV64ZVE32F-NEXT: flh fa5, 0(a2) -; RV64ZVE32F-NEXT: vsetivli zero, 5, e16, m1, tu, ma +; RV64ZVE32F-NEXT: vsetvli zero, zero, e16, m2, ta, ma ; RV64ZVE32F-NEXT: vfmv.s.f v8, fa5 +; RV64ZVE32F-NEXT: vsetivli zero, 5, e16, m1, tu, ma ; RV64ZVE32F-NEXT: vslideup.vi v9, v8, 4 ; RV64ZVE32F-NEXT: andi a2, a1, 32 ; RV64ZVE32F-NEXT: bnez a2, .LBB66_8 @@ -7648,7 +7684,7 @@ define <8 x half> @mgather_baseidx_v8f16(ptr %base, <8 x i16> %idxs, <8 x i1> %m ; RV64ZVE32F-NEXT: andi a2, a1, 1 ; RV64ZVE32F-NEXT: beqz a2, .LBB67_2 ; RV64ZVE32F-NEXT: # %bb.1: # %cond.load -; RV64ZVE32F-NEXT: vsetivli zero, 8, e16, m2, tu, ma +; RV64ZVE32F-NEXT: vsetvli zero, zero, e16, m2, tu, ma ; RV64ZVE32F-NEXT: vmv.x.s a2, v8 ; RV64ZVE32F-NEXT: slli a2, a2, 1 ; RV64ZVE32F-NEXT: add a2, a0, a2 @@ -7839,14 +7875,15 @@ define <2 x float> @mgather_v2f32(<2 x ptr> %ptrs, <2 x i1> %m, <2 x float> %pas ; RV64ZVE32F-NEXT: ret ; RV64ZVE32F-NEXT: .LBB69_3: # %cond.load ; RV64ZVE32F-NEXT: flw fa5, 0(a0) -; RV64ZVE32F-NEXT: vsetivli zero, 2, e32, m4, tu, ma +; RV64ZVE32F-NEXT: vsetvli zero, zero, e32, m4, tu, ma ; RV64ZVE32F-NEXT: vfmv.s.f v8, fa5 ; RV64ZVE32F-NEXT: andi a2, a2, 2 ; RV64ZVE32F-NEXT: beqz a2, .LBB69_2 ; RV64ZVE32F-NEXT: .LBB69_4: # %cond.load1 ; RV64ZVE32F-NEXT: flw fa5, 0(a1) -; RV64ZVE32F-NEXT: vsetivli zero, 2, e32, m1, ta, ma +; RV64ZVE32F-NEXT: vsetvli zero, zero, e32, m4, ta, ma ; RV64ZVE32F-NEXT: vfmv.s.f v9, fa5 +; RV64ZVE32F-NEXT: vsetivli zero, 2, e32, m1, ta, ma ; RV64ZVE32F-NEXT: vslideup.vi v8, v9, 1 ; RV64ZVE32F-NEXT: ret %v = call <2 x float> @llvm.masked.gather.v2f32.v2p0(<2 x ptr> %ptrs, i32 4, <2 x i1> %m, <2 x float> %passthru) @@ -7890,15 +7927,16 @@ define <4 x float> @mgather_v4f32(<4 x ptr> %ptrs, <4 x i1> %m, <4 x float> %pas ; RV64ZVE32F-NEXT: .LBB70_5: # %cond.load ; RV64ZVE32F-NEXT: ld a2, 0(a0) ; RV64ZVE32F-NEXT: flw fa5, 0(a2) -; RV64ZVE32F-NEXT: vsetivli zero, 4, e32, m4, tu, ma +; RV64ZVE32F-NEXT: vsetvli zero, zero, e32, m4, tu, ma ; RV64ZVE32F-NEXT: vfmv.s.f v8, fa5 ; RV64ZVE32F-NEXT: andi a2, a1, 2 ; RV64ZVE32F-NEXT: beqz a2, .LBB70_2 ; RV64ZVE32F-NEXT: .LBB70_6: # %cond.load1 ; RV64ZVE32F-NEXT: ld a2, 8(a0) ; RV64ZVE32F-NEXT: flw fa5, 0(a2) -; RV64ZVE32F-NEXT: vsetivli zero, 2, e32, m1, tu, ma +; RV64ZVE32F-NEXT: vsetvli zero, zero, e32, m4, ta, ma ; RV64ZVE32F-NEXT: vfmv.s.f v9, fa5 +; RV64ZVE32F-NEXT: vsetivli zero, 2, e32, m1, tu, ma ; RV64ZVE32F-NEXT: vslideup.vi v8, v9, 1 ; RV64ZVE32F-NEXT: andi a2, a1, 4 ; RV64ZVE32F-NEXT: beqz a2, .LBB70_3 @@ -8021,15 +8059,16 @@ define <8 x float> @mgather_v8f32(<8 x ptr> %ptrs, <8 x i1> %m, <8 x float> %pas ; RV64ZVE32F-NEXT: .LBB73_9: # %cond.load ; RV64ZVE32F-NEXT: ld a2, 0(a0) ; RV64ZVE32F-NEXT: flw fa5, 0(a2) -; RV64ZVE32F-NEXT: vsetivli zero, 8, e32, m4, tu, ma +; RV64ZVE32F-NEXT: vsetvli zero, zero, e32, m4, tu, ma ; RV64ZVE32F-NEXT: vfmv.s.f v8, fa5 ; RV64ZVE32F-NEXT: andi a2, a1, 2 ; RV64ZVE32F-NEXT: beqz a2, .LBB73_2 ; RV64ZVE32F-NEXT: .LBB73_10: # %cond.load1 ; RV64ZVE32F-NEXT: ld a2, 8(a0) ; RV64ZVE32F-NEXT: flw fa5, 0(a2) -; RV64ZVE32F-NEXT: vsetivli zero, 2, e32, m1, tu, ma +; RV64ZVE32F-NEXT: vsetvli zero, zero, e32, m4, ta, ma ; RV64ZVE32F-NEXT: vfmv.s.f v10, fa5 +; RV64ZVE32F-NEXT: vsetivli zero, 2, e32, m1, tu, ma ; RV64ZVE32F-NEXT: vslideup.vi v8, v10, 1 ; RV64ZVE32F-NEXT: andi a2, a1, 4 ; RV64ZVE32F-NEXT: beqz a2, .LBB73_3 @@ -8115,7 +8154,7 @@ define <8 x float> @mgather_baseidx_v8i8_v8f32(ptr %base, <8 x i8> %idxs, <8 x i ; RV64ZVE32F-NEXT: slli a2, a2, 2 ; RV64ZVE32F-NEXT: add a2, a0, a2 ; RV64ZVE32F-NEXT: flw fa5, 0(a2) -; RV64ZVE32F-NEXT: vsetivli zero, 8, e32, m4, tu, ma +; RV64ZVE32F-NEXT: vsetvli zero, zero, e32, m4, tu, ma ; RV64ZVE32F-NEXT: vfmv.s.f v10, fa5 ; RV64ZVE32F-NEXT: .LBB74_2: # %else ; RV64ZVE32F-NEXT: andi a2, a1, 2 @@ -8199,8 +8238,9 @@ define <8 x float> @mgather_baseidx_v8i8_v8f32(ptr %base, <8 x i8> %idxs, <8 x i ; RV64ZVE32F-NEXT: slli a2, a2, 2 ; RV64ZVE32F-NEXT: add a2, a0, a2 ; RV64ZVE32F-NEXT: flw fa5, 0(a2) -; RV64ZVE32F-NEXT: vsetivli zero, 5, e32, m2, tu, ma +; RV64ZVE32F-NEXT: vsetvli zero, zero, e32, m4, ta, ma ; RV64ZVE32F-NEXT: vfmv.s.f v8, fa5 +; RV64ZVE32F-NEXT: vsetivli zero, 5, e32, m2, tu, ma ; RV64ZVE32F-NEXT: vslideup.vi v10, v8, 4 ; RV64ZVE32F-NEXT: andi a2, a1, 32 ; RV64ZVE32F-NEXT: bnez a2, .LBB74_8 @@ -8265,7 +8305,7 @@ define <8 x float> @mgather_baseidx_sext_v8i8_v8f32(ptr %base, <8 x i8> %idxs, < ; RV64ZVE32F-NEXT: slli a2, a2, 2 ; RV64ZVE32F-NEXT: add a2, a0, a2 ; RV64ZVE32F-NEXT: flw fa5, 0(a2) -; RV64ZVE32F-NEXT: vsetivli zero, 8, e32, m4, tu, ma +; RV64ZVE32F-NEXT: vsetvli zero, zero, e32, m4, tu, ma ; RV64ZVE32F-NEXT: vfmv.s.f v10, fa5 ; RV64ZVE32F-NEXT: .LBB75_2: # %else ; RV64ZVE32F-NEXT: andi a2, a1, 2 @@ -8349,8 +8389,9 @@ define <8 x float> @mgather_baseidx_sext_v8i8_v8f32(ptr %base, <8 x i8> %idxs, < ; RV64ZVE32F-NEXT: slli a2, a2, 2 ; RV64ZVE32F-NEXT: add a2, a0, a2 ; RV64ZVE32F-NEXT: flw fa5, 0(a2) -; RV64ZVE32F-NEXT: vsetivli zero, 5, e32, m2, tu, ma +; RV64ZVE32F-NEXT: vsetvli zero, zero, e32, m4, ta, ma ; RV64ZVE32F-NEXT: vfmv.s.f v8, fa5 +; RV64ZVE32F-NEXT: vsetivli zero, 5, e32, m2, tu, ma ; RV64ZVE32F-NEXT: vslideup.vi v10, v8, 4 ; RV64ZVE32F-NEXT: andi a2, a1, 32 ; RV64ZVE32F-NEXT: bnez a2, .LBB75_8 @@ -8418,7 +8459,7 @@ define <8 x float> @mgather_baseidx_zext_v8i8_v8f32(ptr %base, <8 x i8> %idxs, < ; RV64ZVE32F-NEXT: slli a2, a2, 2 ; RV64ZVE32F-NEXT: add a2, a0, a2 ; RV64ZVE32F-NEXT: flw fa5, 0(a2) -; RV64ZVE32F-NEXT: vsetivli zero, 8, e32, m4, tu, ma +; RV64ZVE32F-NEXT: vsetvli zero, zero, e32, m4, tu, ma ; RV64ZVE32F-NEXT: vfmv.s.f v10, fa5 ; RV64ZVE32F-NEXT: .LBB76_2: # %else ; RV64ZVE32F-NEXT: andi a2, a1, 2 @@ -8507,8 +8548,9 @@ define <8 x float> @mgather_baseidx_zext_v8i8_v8f32(ptr %base, <8 x i8> %idxs, < ; RV64ZVE32F-NEXT: slli a2, a2, 2 ; RV64ZVE32F-NEXT: add a2, a0, a2 ; RV64ZVE32F-NEXT: flw fa5, 0(a2) -; RV64ZVE32F-NEXT: vsetivli zero, 5, e32, m2, tu, ma +; RV64ZVE32F-NEXT: vsetvli zero, zero, e32, m4, ta, ma ; RV64ZVE32F-NEXT: vfmv.s.f v8, fa5 +; RV64ZVE32F-NEXT: vsetivli zero, 5, e32, m2, tu, ma ; RV64ZVE32F-NEXT: vslideup.vi v10, v8, 4 ; RV64ZVE32F-NEXT: andi a2, a1, 32 ; RV64ZVE32F-NEXT: bnez a2, .LBB76_8 @@ -8577,7 +8619,7 @@ define <8 x float> @mgather_baseidx_v8i16_v8f32(ptr %base, <8 x i16> %idxs, <8 x ; RV64ZVE32F-NEXT: slli a2, a2, 2 ; RV64ZVE32F-NEXT: add a2, a0, a2 ; RV64ZVE32F-NEXT: flw fa5, 0(a2) -; RV64ZVE32F-NEXT: vsetivli zero, 8, e32, m4, tu, ma +; RV64ZVE32F-NEXT: vsetvli zero, zero, e32, m4, tu, ma ; RV64ZVE32F-NEXT: vfmv.s.f v10, fa5 ; RV64ZVE32F-NEXT: .LBB77_2: # %else ; RV64ZVE32F-NEXT: andi a2, a1, 2 @@ -8661,8 +8703,9 @@ define <8 x float> @mgather_baseidx_v8i16_v8f32(ptr %base, <8 x i16> %idxs, <8 x ; RV64ZVE32F-NEXT: slli a2, a2, 2 ; RV64ZVE32F-NEXT: add a2, a0, a2 ; RV64ZVE32F-NEXT: flw fa5, 0(a2) -; RV64ZVE32F-NEXT: vsetivli zero, 5, e32, m2, tu, ma +; RV64ZVE32F-NEXT: vsetvli zero, zero, e32, m2, ta, ma ; RV64ZVE32F-NEXT: vfmv.s.f v8, fa5 +; RV64ZVE32F-NEXT: vsetivli zero, 5, e32, m2, tu, ma ; RV64ZVE32F-NEXT: vslideup.vi v10, v8, 4 ; RV64ZVE32F-NEXT: andi a2, a1, 32 ; RV64ZVE32F-NEXT: bnez a2, .LBB77_8 @@ -8728,7 +8771,7 @@ define <8 x float> @mgather_baseidx_sext_v8i16_v8f32(ptr %base, <8 x i16> %idxs, ; RV64ZVE32F-NEXT: slli a2, a2, 2 ; RV64ZVE32F-NEXT: add a2, a0, a2 ; RV64ZVE32F-NEXT: flw fa5, 0(a2) -; RV64ZVE32F-NEXT: vsetivli zero, 8, e32, m4, tu, ma +; RV64ZVE32F-NEXT: vsetvli zero, zero, e32, m4, tu, ma ; RV64ZVE32F-NEXT: vfmv.s.f v10, fa5 ; RV64ZVE32F-NEXT: .LBB78_2: # %else ; RV64ZVE32F-NEXT: andi a2, a1, 2 @@ -8812,8 +8855,9 @@ define <8 x float> @mgather_baseidx_sext_v8i16_v8f32(ptr %base, <8 x i16> %idxs, ; RV64ZVE32F-NEXT: slli a2, a2, 2 ; RV64ZVE32F-NEXT: add a2, a0, a2 ; RV64ZVE32F-NEXT: flw fa5, 0(a2) -; RV64ZVE32F-NEXT: vsetivli zero, 5, e32, m2, tu, ma +; RV64ZVE32F-NEXT: vsetvli zero, zero, e32, m2, ta, ma ; RV64ZVE32F-NEXT: vfmv.s.f v8, fa5 +; RV64ZVE32F-NEXT: vsetivli zero, 5, e32, m2, tu, ma ; RV64ZVE32F-NEXT: vslideup.vi v10, v8, 4 ; RV64ZVE32F-NEXT: andi a2, a1, 32 ; RV64ZVE32F-NEXT: bnez a2, .LBB78_8 @@ -8882,7 +8926,7 @@ define <8 x float> @mgather_baseidx_zext_v8i16_v8f32(ptr %base, <8 x i16> %idxs, ; RV64ZVE32F-NEXT: slli a3, a3, 2 ; RV64ZVE32F-NEXT: add a3, a0, a3 ; RV64ZVE32F-NEXT: flw fa5, 0(a3) -; RV64ZVE32F-NEXT: vsetivli zero, 8, e32, m4, tu, ma +; RV64ZVE32F-NEXT: vsetvli zero, zero, e32, m4, tu, ma ; RV64ZVE32F-NEXT: vfmv.s.f v10, fa5 ; RV64ZVE32F-NEXT: .LBB79_2: # %else ; RV64ZVE32F-NEXT: andi a3, a2, 2 @@ -8971,8 +9015,9 @@ define <8 x float> @mgather_baseidx_zext_v8i16_v8f32(ptr %base, <8 x i16> %idxs, ; RV64ZVE32F-NEXT: slli a3, a3, 2 ; RV64ZVE32F-NEXT: add a3, a0, a3 ; RV64ZVE32F-NEXT: flw fa5, 0(a3) -; RV64ZVE32F-NEXT: vsetivli zero, 5, e32, m2, tu, ma +; RV64ZVE32F-NEXT: vsetvli zero, zero, e32, m2, ta, ma ; RV64ZVE32F-NEXT: vfmv.s.f v8, fa5 +; RV64ZVE32F-NEXT: vsetivli zero, 5, e32, m2, tu, ma ; RV64ZVE32F-NEXT: vslideup.vi v10, v8, 4 ; RV64ZVE32F-NEXT: andi a3, a2, 32 ; RV64ZVE32F-NEXT: bnez a3, .LBB79_8 @@ -9035,7 +9080,7 @@ define <8 x float> @mgather_baseidx_v8f32(ptr %base, <8 x i32> %idxs, <8 x i1> % ; RV64ZVE32F-NEXT: andi a2, a1, 1 ; RV64ZVE32F-NEXT: beqz a2, .LBB80_2 ; RV64ZVE32F-NEXT: # %bb.1: # %cond.load -; RV64ZVE32F-NEXT: vsetivli zero, 8, e32, m4, tu, ma +; RV64ZVE32F-NEXT: vsetvli zero, zero, e32, m4, tu, ma ; RV64ZVE32F-NEXT: vmv.x.s a2, v8 ; RV64ZVE32F-NEXT: slli a2, a2, 2 ; RV64ZVE32F-NEXT: add a2, a0, a2 @@ -11858,7 +11903,7 @@ define <16 x i8> @mgather_baseidx_v16i8(ptr %base, <16 x i8> %idxs, <16 x i1> %m ; RV64ZVE32F-NEXT: andi a2, a1, 1 ; RV64ZVE32F-NEXT: beqz a2, .LBB97_2 ; RV64ZVE32F-NEXT: # %bb.1: # %cond.load -; RV64ZVE32F-NEXT: vsetivli zero, 16, e8, mf2, tu, ma +; RV64ZVE32F-NEXT: vsetvli zero, zero, e8, mf2, tu, ma ; RV64ZVE32F-NEXT: vmv.x.s a2, v8 ; RV64ZVE32F-NEXT: add a2, a0, a2 ; RV64ZVE32F-NEXT: lbu a2, 0(a2) @@ -12113,18 +12158,16 @@ define <32 x i8> @mgather_baseidx_v32i8(ptr %base, <32 x i8> %idxs, <32 x i1> %m ; RV64ZVE32F-NEXT: andi a2, a1, 1 ; RV64ZVE32F-NEXT: beqz a2, .LBB98_2 ; RV64ZVE32F-NEXT: # %bb.1: # %cond.load -; RV64ZVE32F-NEXT: vsetvli zero, zero, e8, mf4, ta, ma +; RV64ZVE32F-NEXT: vsetvli zero, zero, e8, mf4, tu, ma ; RV64ZVE32F-NEXT: vmv.x.s a2, v8 ; RV64ZVE32F-NEXT: add a2, a0, a2 ; RV64ZVE32F-NEXT: lbu a2, 0(a2) -; RV64ZVE32F-NEXT: li a3, 32 -; RV64ZVE32F-NEXT: vsetvli zero, a3, e8, mf4, tu, ma ; RV64ZVE32F-NEXT: vmv.s.x v10, a2 ; RV64ZVE32F-NEXT: .LBB98_2: # %else ; RV64ZVE32F-NEXT: andi a2, a1, 2 ; RV64ZVE32F-NEXT: beqz a2, .LBB98_4 ; RV64ZVE32F-NEXT: # %bb.3: # %cond.load1 -; RV64ZVE32F-NEXT: vsetivli zero, 1, e8, mf4, ta, ma +; RV64ZVE32F-NEXT: vsetvli zero, zero, e8, mf4, ta, ma ; RV64ZVE32F-NEXT: vslidedown.vi v12, v8, 1 ; RV64ZVE32F-NEXT: vmv.x.s a2, v12 ; RV64ZVE32F-NEXT: add a2, a0, a2 diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-masked-scatter.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-masked-scatter.ll index e6852c1b57510..1dd74a7c9dd1b 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-masked-scatter.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-masked-scatter.ll @@ -2387,7 +2387,7 @@ define void @mscatter_baseidx_v8i16_v8i32(<8 x i32> %val, ptr %base, <8 x i16> % ; RV64ZVE32F-NEXT: vmv.x.s a2, v11 ; RV64ZVE32F-NEXT: slli a2, a2, 2 ; RV64ZVE32F-NEXT: add a2, a0, a2 -; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma +; RV64ZVE32F-NEXT: vsetvli zero, zero, e32, m2, ta, ma ; RV64ZVE32F-NEXT: vslidedown.vi v12, v8, 4 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma ; RV64ZVE32F-NEXT: vse32.v v12, (a2) @@ -2525,7 +2525,7 @@ define void @mscatter_baseidx_sext_v8i16_v8i32(<8 x i32> %val, ptr %base, <8 x i ; RV64ZVE32F-NEXT: vmv.x.s a2, v11 ; RV64ZVE32F-NEXT: slli a2, a2, 2 ; RV64ZVE32F-NEXT: add a2, a0, a2 -; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma +; RV64ZVE32F-NEXT: vsetvli zero, zero, e32, m2, ta, ma ; RV64ZVE32F-NEXT: vslidedown.vi v12, v8, 4 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma ; RV64ZVE32F-NEXT: vse32.v v12, (a2) @@ -2671,7 +2671,7 @@ define void @mscatter_baseidx_zext_v8i16_v8i32(<8 x i32> %val, ptr %base, <8 x i ; RV64ZVE32F-NEXT: and a3, a3, a1 ; RV64ZVE32F-NEXT: slli a3, a3, 2 ; RV64ZVE32F-NEXT: add a3, a0, a3 -; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma +; RV64ZVE32F-NEXT: vsetvli zero, zero, e32, m2, ta, ma ; RV64ZVE32F-NEXT: vslidedown.vi v12, v8, 4 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma ; RV64ZVE32F-NEXT: vse32.v v12, (a3) @@ -7507,7 +7507,7 @@ define void @mscatter_baseidx_v8i16_v8f32(<8 x float> %val, ptr %base, <8 x i16> ; RV64ZVE32F-NEXT: vmv.x.s a2, v11 ; RV64ZVE32F-NEXT: slli a2, a2, 2 ; RV64ZVE32F-NEXT: add a2, a0, a2 -; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma +; RV64ZVE32F-NEXT: vsetvli zero, zero, e32, m2, ta, ma ; RV64ZVE32F-NEXT: vslidedown.vi v12, v8, 4 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma ; RV64ZVE32F-NEXT: vse32.v v12, (a2) @@ -7645,7 +7645,7 @@ define void @mscatter_baseidx_sext_v8i16_v8f32(<8 x float> %val, ptr %base, <8 x ; RV64ZVE32F-NEXT: vmv.x.s a2, v11 ; RV64ZVE32F-NEXT: slli a2, a2, 2 ; RV64ZVE32F-NEXT: add a2, a0, a2 -; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma +; RV64ZVE32F-NEXT: vsetvli zero, zero, e32, m2, ta, ma ; RV64ZVE32F-NEXT: vslidedown.vi v12, v8, 4 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma ; RV64ZVE32F-NEXT: vse32.v v12, (a2) @@ -7791,7 +7791,7 @@ define void @mscatter_baseidx_zext_v8i16_v8f32(<8 x float> %val, ptr %base, <8 x ; RV64ZVE32F-NEXT: and a3, a3, a1 ; RV64ZVE32F-NEXT: slli a3, a3, 2 ; RV64ZVE32F-NEXT: add a3, a0, a3 -; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m2, ta, ma +; RV64ZVE32F-NEXT: vsetvli zero, zero, e32, m2, ta, ma ; RV64ZVE32F-NEXT: vslidedown.vi v12, v8, 4 ; RV64ZVE32F-NEXT: vsetivli zero, 1, e32, m1, ta, ma ; RV64ZVE32F-NEXT: vse32.v v12, (a3) diff --git a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-unaligned.ll b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-unaligned.ll index 805b548b0cd18..eba3bd9d86dbb 100644 --- a/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-unaligned.ll +++ b/llvm/test/CodeGen/RISCV/rvv/fixed-vectors-unaligned.ll @@ -94,7 +94,7 @@ define <2 x i16> @mgather_v2i16_align1(<2 x ptr> %ptrs, <2 x i1> %m, <2 x i16> % ; RV32-SLOW-NEXT: lbu a1, 0(a1) ; RV32-SLOW-NEXT: slli a2, a2, 8 ; RV32-SLOW-NEXT: or a1, a2, a1 -; RV32-SLOW-NEXT: vsetivli zero, 2, e16, m2, tu, ma +; RV32-SLOW-NEXT: vsetvli zero, zero, e16, m2, tu, ma ; RV32-SLOW-NEXT: vmv.s.x v9, a1 ; RV32-SLOW-NEXT: andi a0, a0, 2 ; RV32-SLOW-NEXT: beqz a0, .LBB4_2 @@ -131,7 +131,7 @@ define <2 x i16> @mgather_v2i16_align1(<2 x ptr> %ptrs, <2 x i1> %m, <2 x i16> % ; RV64-SLOW-NEXT: lbu a1, 0(a1) ; RV64-SLOW-NEXT: slli a2, a2, 8 ; RV64-SLOW-NEXT: or a1, a2, a1 -; RV64-SLOW-NEXT: vsetivli zero, 2, e16, m2, tu, ma +; RV64-SLOW-NEXT: vsetvli zero, zero, e16, m2, tu, ma ; RV64-SLOW-NEXT: vmv.s.x v9, a1 ; RV64-SLOW-NEXT: andi a0, a0, 2 ; RV64-SLOW-NEXT: beqz a0, .LBB4_2 @@ -217,7 +217,7 @@ define <2 x i64> @mgather_v2i64_align4(<2 x ptr> %ptrs, <2 x i1> %m, <2 x i64> % ; RV64-SLOW-NEXT: vmv1r.v v8, v9 ; RV64-SLOW-NEXT: ret ; RV64-SLOW-NEXT: .LBB5_3: # %cond.load -; RV64-SLOW-NEXT: vsetivli zero, 2, e64, m8, tu, ma +; RV64-SLOW-NEXT: vsetvli zero, zero, e64, m8, tu, ma ; RV64-SLOW-NEXT: vmv.x.s a1, v8 ; RV64-SLOW-NEXT: lwu a2, 4(a1) ; RV64-SLOW-NEXT: lwu a1, 0(a1) diff --git a/llvm/test/CodeGen/RISCV/rvv/vsetvli-insert-crossbb.mir b/llvm/test/CodeGen/RISCV/rvv/vsetvli-insert-crossbb.mir index 295d4c57a1be5..4091d1711b584 100644 --- a/llvm/test/CodeGen/RISCV/rvv/vsetvli-insert-crossbb.mir +++ b/llvm/test/CodeGen/RISCV/rvv/vsetvli-insert-crossbb.mir @@ -976,12 +976,12 @@ body: | ; CHECK: bb.0: ; CHECK-NEXT: successors: %bb.1(0x80000000) ; CHECK-NEXT: {{ $}} + ; CHECK-NEXT: dead $x0 = PseudoVSETIVLI 1, 216 /* e64, m1, ta, ma */, implicit-def $vl, implicit-def $vtype ; CHECK-NEXT: PseudoBR %bb.1 ; CHECK-NEXT: {{ $}} ; CHECK-NEXT: bb.1: ; CHECK-NEXT: successors: %bb.1(0x80000000) ; CHECK-NEXT: {{ $}} - ; CHECK-NEXT: dead $x0 = PseudoVSETIVLI 1, 216 /* e64, m1, ta, ma */, implicit-def $vl, implicit-def $vtype ; CHECK-NEXT: dead %x:gpr = PseudoVMV_X_S undef $noreg, 6 /* e64 */, implicit $vtype ; CHECK-NEXT: PseudoBR %bb.1 bb.0: