diff --git a/llvm/lib/Target/RISCV/CMakeLists.txt b/llvm/lib/Target/RISCV/CMakeLists.txt index 9b23a5ab521c8..5d1ea50eba494 100644 --- a/llvm/lib/Target/RISCV/CMakeLists.txt +++ b/llvm/lib/Target/RISCV/CMakeLists.txt @@ -37,6 +37,7 @@ add_llvm_target(RISCVCodeGen RISCVMakeCompressible.cpp RISCVExpandAtomicPseudoInsts.cpp RISCVExpandPseudoInsts.cpp + RISCVFoldMemOffset.cpp RISCVFrameLowering.cpp RISCVGatherScatterLowering.cpp RISCVIndirectBranchTracking.cpp diff --git a/llvm/lib/Target/RISCV/RISCV.h b/llvm/lib/Target/RISCV/RISCV.h index 851eea1352852..641e2eb4094f9 100644 --- a/llvm/lib/Target/RISCV/RISCV.h +++ b/llvm/lib/Target/RISCV/RISCV.h @@ -52,6 +52,9 @@ void initializeRISCVVectorPeepholePass(PassRegistry &); FunctionPass *createRISCVOptWInstrsPass(); void initializeRISCVOptWInstrsPass(PassRegistry &); +FunctionPass *createRISCVFoldMemOffsetPass(); +void initializeRISCVFoldMemOffsetPass(PassRegistry &); + FunctionPass *createRISCVMergeBaseOffsetOptPass(); void initializeRISCVMergeBaseOffsetOptPass(PassRegistry &); diff --git a/llvm/lib/Target/RISCV/RISCVFoldMemOffset.cpp b/llvm/lib/Target/RISCV/RISCVFoldMemOffset.cpp new file mode 100644 index 0000000000000..989e9d859d64f --- /dev/null +++ b/llvm/lib/Target/RISCV/RISCVFoldMemOffset.cpp @@ -0,0 +1,282 @@ +//===- RISCVFoldMemOffset.cpp - Fold ADDI into memory offsets ------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===---------------------------------------------------------------------===// +// +// Look for ADDIs that can be removed by folding their immediate into later +// load/store addresses. There may be other arithmetic instructions between the +// addi and load/store that we need to reassociate through. If the final result +// of the arithmetic is only used by load/store addresses, we can fold the +// offset into the all the load/store as long as it doesn't create an offset +// that is too large. +// +//===---------------------------------------------------------------------===// + +#include "RISCV.h" +#include "RISCVSubtarget.h" +#include "llvm/CodeGen/MachineFunctionPass.h" +#include + +using namespace llvm; + +#define DEBUG_TYPE "riscv-fold-mem-offset" +#define RISCV_FOLD_MEM_OFFSET_NAME "RISC-V Fold Memory Offset" + +namespace { + +class RISCVFoldMemOffset : public MachineFunctionPass { +public: + static char ID; + + RISCVFoldMemOffset() : MachineFunctionPass(ID) {} + + bool runOnMachineFunction(MachineFunction &MF) override; + + bool foldOffset(Register OrigReg, int64_t InitialOffset, + const MachineRegisterInfo &MRI, + DenseMap &FoldableInstrs); + + void getAnalysisUsage(AnalysisUsage &AU) const override { + AU.setPreservesCFG(); + MachineFunctionPass::getAnalysisUsage(AU); + } + + StringRef getPassName() const override { return RISCV_FOLD_MEM_OFFSET_NAME; } +}; + +// Wrapper class around a std::optional to allow accumulation. +class FoldableOffset { + std::optional Offset; + +public: + bool hasValue() const { return Offset.has_value(); } + int64_t getValue() const { return *Offset; } + + FoldableOffset &operator=(int64_t RHS) { + Offset = RHS; + return *this; + } + + FoldableOffset &operator+=(int64_t RHS) { + if (!Offset) + Offset = 0; + Offset = (uint64_t)*Offset + (uint64_t)RHS; + return *this; + } + + int64_t operator*() { return *Offset; } +}; + +} // end anonymous namespace + +char RISCVFoldMemOffset::ID = 0; +INITIALIZE_PASS(RISCVFoldMemOffset, DEBUG_TYPE, RISCV_FOLD_MEM_OFFSET_NAME, + false, false) + +FunctionPass *llvm::createRISCVFoldMemOffsetPass() { + return new RISCVFoldMemOffset(); +} + +// Walk forward from the ADDI looking for arithmetic instructions we can +// analyze or memory instructions that use it as part of their address +// calculation. For each arithmetic instruction we lookup how the offset +// contributes to the value in that register use that information to +// calculate the contribution to the output of this instruction. +// Only addition and left shift are supported. +// FIXME: Add multiplication by constant. The constant will be in a register. +bool RISCVFoldMemOffset::foldOffset( + Register OrigReg, int64_t InitialOffset, const MachineRegisterInfo &MRI, + DenseMap &FoldableInstrs) { + // Map to hold how much the offset contributes to the value of this register. + DenseMap RegToOffsetMap; + + // Insert root offset into the map. + RegToOffsetMap[OrigReg] = InitialOffset; + + std::queue Worklist; + Worklist.push(OrigReg); + + while (!Worklist.empty()) { + Register Reg = Worklist.front(); + Worklist.pop(); + + if (!Reg.isVirtual()) + return false; + + for (auto &User : MRI.use_nodbg_instructions(Reg)) { + FoldableOffset Offset; + + switch (User.getOpcode()) { + default: + return false; + case RISCV::ADD: + if (auto I = RegToOffsetMap.find(User.getOperand(1).getReg()); + I != RegToOffsetMap.end()) + Offset = I->second; + if (auto I = RegToOffsetMap.find(User.getOperand(2).getReg()); + I != RegToOffsetMap.end()) + Offset += I->second; + break; + case RISCV::SH1ADD: + if (auto I = RegToOffsetMap.find(User.getOperand(1).getReg()); + I != RegToOffsetMap.end()) + Offset = (uint64_t)I->second << 1; + if (auto I = RegToOffsetMap.find(User.getOperand(2).getReg()); + I != RegToOffsetMap.end()) + Offset += I->second; + break; + case RISCV::SH2ADD: + if (auto I = RegToOffsetMap.find(User.getOperand(1).getReg()); + I != RegToOffsetMap.end()) + Offset = (uint64_t)I->second << 2; + if (auto I = RegToOffsetMap.find(User.getOperand(2).getReg()); + I != RegToOffsetMap.end()) + Offset += I->second; + break; + case RISCV::SH3ADD: + if (auto I = RegToOffsetMap.find(User.getOperand(1).getReg()); + I != RegToOffsetMap.end()) + Offset = (uint64_t)I->second << 3; + if (auto I = RegToOffsetMap.find(User.getOperand(2).getReg()); + I != RegToOffsetMap.end()) + Offset += I->second; + break; + case RISCV::ADD_UW: + case RISCV::SH1ADD_UW: + case RISCV::SH2ADD_UW: + case RISCV::SH3ADD_UW: + // Don't fold through the zero extended input. + if (User.getOperand(1).getReg() == Reg) + return false; + if (auto I = RegToOffsetMap.find(User.getOperand(2).getReg()); + I != RegToOffsetMap.end()) + Offset = I->second; + break; + case RISCV::SLLI: { + unsigned ShAmt = User.getOperand(2).getImm(); + if (auto I = RegToOffsetMap.find(User.getOperand(1).getReg()); + I != RegToOffsetMap.end()) + Offset = (uint64_t)I->second << ShAmt; + break; + } + case RISCV::LB: + case RISCV::LBU: + case RISCV::SB: + case RISCV::LH: + case RISCV::LH_INX: + case RISCV::LHU: + case RISCV::FLH: + case RISCV::SH: + case RISCV::SH_INX: + case RISCV::FSH: + case RISCV::LW: + case RISCV::LW_INX: + case RISCV::LWU: + case RISCV::FLW: + case RISCV::SW: + case RISCV::SW_INX: + case RISCV::FSW: + case RISCV::LD: + case RISCV::FLD: + case RISCV::SD: + case RISCV::FSD: { + // Can't fold into store value. + if (User.getOperand(0).getReg() == Reg) + return false; + + // Existing offset must be immediate. + if (!User.getOperand(2).isImm()) + return false; + + // Require at least one operation between the ADDI and the load/store. + // We have other optimizations that should handle the simple case. + if (User.getOperand(1).getReg() == OrigReg) + return false; + + auto I = RegToOffsetMap.find(User.getOperand(1).getReg()); + if (I == RegToOffsetMap.end()) + return false; + + int64_t LocalOffset = User.getOperand(2).getImm(); + assert(isInt<12>(LocalOffset)); + int64_t CombinedOffset = (uint64_t)LocalOffset + (uint64_t)I->second; + if (!isInt<12>(CombinedOffset)) + return false; + + FoldableInstrs[&User] = CombinedOffset; + continue; + } + } + + // If we reach here we should have an accumulated offset. + assert(Offset.hasValue() && "Expected an offset"); + + // If the offset is new or changed, add the destination register to the + // work list. + int64_t OffsetVal = Offset.getValue(); + auto P = + RegToOffsetMap.try_emplace(User.getOperand(0).getReg(), OffsetVal); + if (P.second) { + Worklist.push(User.getOperand(0).getReg()); + } else if (P.first->second != OffsetVal) { + P.first->second = OffsetVal; + Worklist.push(User.getOperand(0).getReg()); + } + } + } + + return true; +} + +bool RISCVFoldMemOffset::runOnMachineFunction(MachineFunction &MF) { + if (skipFunction(MF.getFunction())) + return false; + + // This optimization may increase size by preventing compression. + if (MF.getFunction().hasOptSize()) + return false; + + MachineRegisterInfo &MRI = MF.getRegInfo(); + + bool MadeChange = false; + for (MachineBasicBlock &MBB : MF) { + for (MachineInstr &MI : llvm::make_early_inc_range(MBB)) { + // FIXME: We can support ADDIW from an LUI+ADDIW pair if the result is + // equivalent to LUI+ADDI. + if (MI.getOpcode() != RISCV::ADDI) + continue; + + // We only want to optimize register ADDIs. + if (!MI.getOperand(1).isReg() || !MI.getOperand(2).isImm()) + continue; + + // Ignore 'li'. + if (MI.getOperand(1).getReg() == RISCV::X0) + continue; + + int64_t Offset = MI.getOperand(2).getImm(); + assert(isInt<12>(Offset)); + + DenseMap FoldableInstrs; + + if (!foldOffset(MI.getOperand(0).getReg(), Offset, MRI, FoldableInstrs)) + continue; + + if (FoldableInstrs.empty()) + continue; + + // We can fold this ADDI. + // Rewrite all the instructions. + for (auto [MemMI, NewOffset] : FoldableInstrs) + MemMI->getOperand(2).setImm(NewOffset); + + MRI.replaceRegWith(MI.getOperand(0).getReg(), MI.getOperand(1).getReg()); + MI.eraseFromParent(); + } + } + + return MadeChange; +} diff --git a/llvm/lib/Target/RISCV/RISCVTargetMachine.cpp b/llvm/lib/Target/RISCV/RISCVTargetMachine.cpp index 167dbb53c5950..89e017807363b 100644 --- a/llvm/lib/Target/RISCV/RISCVTargetMachine.cpp +++ b/llvm/lib/Target/RISCV/RISCVTargetMachine.cpp @@ -133,6 +133,7 @@ extern "C" LLVM_EXTERNAL_VISIBILITY void LLVMInitializeRISCVTarget() { initializeRISCVPostRAExpandPseudoPass(*PR); initializeRISCVMergeBaseOffsetOptPass(*PR); initializeRISCVOptWInstrsPass(*PR); + initializeRISCVFoldMemOffsetPass(*PR); initializeRISCVPreRAExpandPseudoPass(*PR); initializeRISCVExpandPseudoPass(*PR); initializeRISCVVectorPeepholePass(*PR); @@ -590,6 +591,7 @@ void RISCVPassConfig::addMachineSSAOptimization() { addPass(createRISCVVectorPeepholePass()); // TODO: Move this to pre regalloc addPass(createRISCVVMV0EliminationPass()); + addPass(createRISCVFoldMemOffsetPass()); TargetPassConfig::addMachineSSAOptimization(); diff --git a/llvm/test/CodeGen/RISCV/O3-pipeline.ll b/llvm/test/CodeGen/RISCV/O3-pipeline.ll index 2646dfeca4eb6..194223eee69eb 100644 --- a/llvm/test/CodeGen/RISCV/O3-pipeline.ll +++ b/llvm/test/CodeGen/RISCV/O3-pipeline.ll @@ -98,6 +98,7 @@ ; CHECK-NEXT: Finalize ISel and expand pseudo-instructions ; CHECK-NEXT: RISC-V Vector Peephole Optimization ; CHECK-NEXT: RISC-V VMV0 Elimination +; CHECK-NEXT: RISC-V Fold Memory Offset ; CHECK-NEXT: Lazy Machine Block Frequency Analysis ; CHECK-NEXT: Early Tail Duplication ; CHECK-NEXT: Optimize machine instruction PHIs diff --git a/llvm/test/CodeGen/RISCV/fold-mem-offset.ll b/llvm/test/CodeGen/RISCV/fold-mem-offset.ll new file mode 100644 index 0000000000000..b12fa509b0bea --- /dev/null +++ b/llvm/test/CodeGen/RISCV/fold-mem-offset.ll @@ -0,0 +1,733 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5 +; RUN: sed 's/iXLen/i32/g' %s | llc -mtriple=riscv32 | FileCheck %s --check-prefixes=CHECK,RV32I +; RUN: sed 's/iXLen/i64/g' %s | llc -mtriple=riscv64 | FileCheck %s --check-prefixes=CHECK,RV64I +; RUN: sed 's/iXLen/i32/g' %s | llc -mtriple=riscv32 -mattr=+zba | FileCheck %s --check-prefixes=ZBA,RV32ZBA +; RUN: sed 's/iXLen/i64/g' %s | llc -mtriple=riscv64 -mattr=+zba | FileCheck %s --check-prefixes=ZBA,RV64ZBA + +define i64 @test_sh3add(ptr %p, iXLen %x, iXLen %y) { +; RV32I-LABEL: test_sh3add: +; RV32I: # %bb.0: # %entry +; RV32I-NEXT: slli a1, a1, 3 +; RV32I-NEXT: slli a2, a2, 3 +; RV32I-NEXT: add a1, a1, a0 +; RV32I-NEXT: add a0, a0, a2 +; RV32I-NEXT: lw a2, 480(a1) +; RV32I-NEXT: lw a1, 484(a1) +; RV32I-NEXT: lw a3, 400(a0) +; RV32I-NEXT: lw a0, 404(a0) +; RV32I-NEXT: add a1, a0, a1 +; RV32I-NEXT: add a0, a3, a2 +; RV32I-NEXT: sltu a2, a0, a3 +; RV32I-NEXT: add a1, a1, a2 +; RV32I-NEXT: ret +; +; RV64I-LABEL: test_sh3add: +; RV64I: # %bb.0: # %entry +; RV64I-NEXT: slli a1, a1, 3 +; RV64I-NEXT: slli a2, a2, 3 +; RV64I-NEXT: add a1, a1, a0 +; RV64I-NEXT: add a0, a0, a2 +; RV64I-NEXT: ld a1, 480(a1) +; RV64I-NEXT: ld a0, 400(a0) +; RV64I-NEXT: add a0, a0, a1 +; RV64I-NEXT: ret +; +; RV32ZBA-LABEL: test_sh3add: +; RV32ZBA: # %bb.0: # %entry +; RV32ZBA-NEXT: sh3add a1, a1, a0 +; RV32ZBA-NEXT: sh3add a0, a2, a0 +; RV32ZBA-NEXT: lw a2, 480(a1) +; RV32ZBA-NEXT: lw a1, 484(a1) +; RV32ZBA-NEXT: lw a3, 400(a0) +; RV32ZBA-NEXT: lw a0, 404(a0) +; RV32ZBA-NEXT: add a1, a0, a1 +; RV32ZBA-NEXT: add a0, a3, a2 +; RV32ZBA-NEXT: sltu a2, a0, a3 +; RV32ZBA-NEXT: add a1, a1, a2 +; RV32ZBA-NEXT: ret +; +; RV64ZBA-LABEL: test_sh3add: +; RV64ZBA: # %bb.0: # %entry +; RV64ZBA-NEXT: sh3add a1, a1, a0 +; RV64ZBA-NEXT: sh3add a0, a2, a0 +; RV64ZBA-NEXT: ld a1, 480(a1) +; RV64ZBA-NEXT: ld a0, 400(a0) +; RV64ZBA-NEXT: add a0, a0, a1 +; RV64ZBA-NEXT: ret +entry: + %b = getelementptr inbounds nuw i8, ptr %p, i64 400 + %add = add iXLen %x, 10 + %arrayidx = getelementptr inbounds nuw [100 x i64], ptr %b, i64 0, iXLen %add + %0 = load i64, ptr %arrayidx, align 8 + %arrayidx2 = getelementptr inbounds nuw [100 x i64], ptr %b, i64 0, iXLen %y + %1 = load i64, ptr %arrayidx2, align 8 + %add3 = add nsw i64 %1, %0 + ret i64 %add3 +} + +define signext i32 @test_sh2add(ptr %p, iXLen %x, iXLen %y) { +; RV32I-LABEL: test_sh2add: +; RV32I: # %bb.0: # %entry +; RV32I-NEXT: slli a1, a1, 2 +; RV32I-NEXT: slli a2, a2, 2 +; RV32I-NEXT: add a1, a0, a1 +; RV32I-NEXT: add a0, a2, a0 +; RV32I-NEXT: lw a1, 1200(a1) +; RV32I-NEXT: lw a0, 1240(a0) +; RV32I-NEXT: add a0, a0, a1 +; RV32I-NEXT: ret +; +; RV64I-LABEL: test_sh2add: +; RV64I: # %bb.0: # %entry +; RV64I-NEXT: slli a1, a1, 2 +; RV64I-NEXT: slli a2, a2, 2 +; RV64I-NEXT: add a1, a0, a1 +; RV64I-NEXT: add a0, a2, a0 +; RV64I-NEXT: lw a1, 1200(a1) +; RV64I-NEXT: lw a0, 1240(a0) +; RV64I-NEXT: addw a0, a0, a1 +; RV64I-NEXT: ret +; +; RV32ZBA-LABEL: test_sh2add: +; RV32ZBA: # %bb.0: # %entry +; RV32ZBA-NEXT: sh2add a1, a1, a0 +; RV32ZBA-NEXT: sh2add a0, a2, a0 +; RV32ZBA-NEXT: lw a1, 1200(a1) +; RV32ZBA-NEXT: lw a0, 1240(a0) +; RV32ZBA-NEXT: add a0, a0, a1 +; RV32ZBA-NEXT: ret +; +; RV64ZBA-LABEL: test_sh2add: +; RV64ZBA: # %bb.0: # %entry +; RV64ZBA-NEXT: sh2add a1, a1, a0 +; RV64ZBA-NEXT: sh2add a0, a2, a0 +; RV64ZBA-NEXT: lw a1, 1200(a1) +; RV64ZBA-NEXT: lw a0, 1240(a0) +; RV64ZBA-NEXT: addw a0, a0, a1 +; RV64ZBA-NEXT: ret +entry: + %c = getelementptr inbounds nuw i8, ptr %p, i64 1200 + %arrayidx = getelementptr inbounds nuw [100 x i32], ptr %c, i64 0, iXLen %x + %0 = load i32, ptr %arrayidx, align 4 + %add = add iXLen %y, 10 + %arrayidx2 = getelementptr inbounds nuw [100 x i32], ptr %c, i64 0, iXLen %add + %1 = load i32, ptr %arrayidx2, align 4 + %add3 = add nsw i32 %1, %0 + ret i32 %add3 +} + +define signext i16 @test_sh1add(ptr %p, iXLen %x, iXLen %y) { +; RV32I-LABEL: test_sh1add: +; RV32I: # %bb.0: # %entry +; RV32I-NEXT: slli a1, a1, 1 +; RV32I-NEXT: slli a2, a2, 1 +; RV32I-NEXT: add a1, a0, a1 +; RV32I-NEXT: add a0, a2, a0 +; RV32I-NEXT: lh a1, 1600(a1) +; RV32I-NEXT: lh a0, 1620(a0) +; RV32I-NEXT: add a0, a0, a1 +; RV32I-NEXT: slli a0, a0, 16 +; RV32I-NEXT: srai a0, a0, 16 +; RV32I-NEXT: ret +; +; RV64I-LABEL: test_sh1add: +; RV64I: # %bb.0: # %entry +; RV64I-NEXT: slli a1, a1, 1 +; RV64I-NEXT: slli a2, a2, 1 +; RV64I-NEXT: add a1, a0, a1 +; RV64I-NEXT: add a0, a2, a0 +; RV64I-NEXT: lh a1, 1600(a1) +; RV64I-NEXT: lh a0, 1620(a0) +; RV64I-NEXT: add a0, a0, a1 +; RV64I-NEXT: slli a0, a0, 48 +; RV64I-NEXT: srai a0, a0, 48 +; RV64I-NEXT: ret +; +; RV32ZBA-LABEL: test_sh1add: +; RV32ZBA: # %bb.0: # %entry +; RV32ZBA-NEXT: sh1add a1, a1, a0 +; RV32ZBA-NEXT: sh1add a0, a2, a0 +; RV32ZBA-NEXT: lh a1, 1600(a1) +; RV32ZBA-NEXT: lh a0, 1620(a0) +; RV32ZBA-NEXT: add a0, a0, a1 +; RV32ZBA-NEXT: slli a0, a0, 16 +; RV32ZBA-NEXT: srai a0, a0, 16 +; RV32ZBA-NEXT: ret +; +; RV64ZBA-LABEL: test_sh1add: +; RV64ZBA: # %bb.0: # %entry +; RV64ZBA-NEXT: sh1add a1, a1, a0 +; RV64ZBA-NEXT: sh1add a0, a2, a0 +; RV64ZBA-NEXT: lh a1, 1600(a1) +; RV64ZBA-NEXT: lh a0, 1620(a0) +; RV64ZBA-NEXT: add a0, a0, a1 +; RV64ZBA-NEXT: slli a0, a0, 48 +; RV64ZBA-NEXT: srai a0, a0, 48 +; RV64ZBA-NEXT: ret +entry: + %d = getelementptr inbounds nuw i8, ptr %p, i64 1600 + %arrayidx = getelementptr inbounds nuw [100 x i16], ptr %d, i64 0, iXLen %x + %0 = load i16, ptr %arrayidx, align 2 + %add = add iXLen %y, 10 + %arrayidx2 = getelementptr inbounds nuw [100 x i16], ptr %d, i64 0, iXLen %add + %1 = load i16, ptr %arrayidx2, align 2 + %add4 = add i16 %1, %0 + ret i16 %add4 +} + +define zeroext i8 @test_add(ptr %p, iXLen %x, iXLen %y) { +; CHECK-LABEL: test_add: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: add a1, a0, a1 +; CHECK-NEXT: add a0, a2, a0 +; CHECK-NEXT: lbu a1, 1800(a1) +; CHECK-NEXT: lbu a0, 1810(a0) +; CHECK-NEXT: add a0, a0, a1 +; CHECK-NEXT: andi a0, a0, 255 +; CHECK-NEXT: ret +; +; ZBA-LABEL: test_add: +; ZBA: # %bb.0: # %entry +; ZBA-NEXT: add a1, a0, a1 +; ZBA-NEXT: add a0, a2, a0 +; ZBA-NEXT: lbu a1, 1800(a1) +; ZBA-NEXT: lbu a0, 1810(a0) +; ZBA-NEXT: add a0, a0, a1 +; ZBA-NEXT: andi a0, a0, 255 +; ZBA-NEXT: ret +entry: + %e = getelementptr inbounds nuw i8, ptr %p, i64 1800 + %arrayidx = getelementptr inbounds nuw [1000 x i8], ptr %e, i64 0, iXLen %x + %0 = load i8, ptr %arrayidx, align 1 + %add = add iXLen %y, 10 + %arrayidx2 = getelementptr inbounds nuw [1000 x i8], ptr %e, i64 0, iXLen %add + %1 = load i8, ptr %arrayidx2, align 1 + %add4 = add i8 %1, %0 + ret i8 %add4 +} + +define i64 @test_sh3add_uw(ptr %p, i32 signext %x, i32 signext %y) { +; RV32I-LABEL: test_sh3add_uw: +; RV32I: # %bb.0: # %entry +; RV32I-NEXT: slli a1, a1, 3 +; RV32I-NEXT: slli a2, a2, 3 +; RV32I-NEXT: add a1, a0, a1 +; RV32I-NEXT: add a0, a0, a2 +; RV32I-NEXT: lw a2, 400(a1) +; RV32I-NEXT: lw a1, 404(a1) +; RV32I-NEXT: lw a3, 400(a0) +; RV32I-NEXT: lw a0, 404(a0) +; RV32I-NEXT: add a1, a0, a1 +; RV32I-NEXT: add a0, a3, a2 +; RV32I-NEXT: sltu a2, a0, a3 +; RV32I-NEXT: add a1, a1, a2 +; RV32I-NEXT: ret +; +; RV64I-LABEL: test_sh3add_uw: +; RV64I: # %bb.0: # %entry +; RV64I-NEXT: slli a1, a1, 32 +; RV64I-NEXT: slli a2, a2, 32 +; RV64I-NEXT: srli a1, a1, 29 +; RV64I-NEXT: srli a2, a2, 29 +; RV64I-NEXT: add a1, a0, a1 +; RV64I-NEXT: add a0, a0, a2 +; RV64I-NEXT: ld a1, 400(a1) +; RV64I-NEXT: ld a0, 400(a0) +; RV64I-NEXT: add a0, a0, a1 +; RV64I-NEXT: ret +; +; RV32ZBA-LABEL: test_sh3add_uw: +; RV32ZBA: # %bb.0: # %entry +; RV32ZBA-NEXT: sh3add a1, a1, a0 +; RV32ZBA-NEXT: sh3add a0, a2, a0 +; RV32ZBA-NEXT: lw a2, 400(a1) +; RV32ZBA-NEXT: lw a1, 404(a1) +; RV32ZBA-NEXT: lw a3, 400(a0) +; RV32ZBA-NEXT: lw a0, 404(a0) +; RV32ZBA-NEXT: add a1, a0, a1 +; RV32ZBA-NEXT: add a0, a3, a2 +; RV32ZBA-NEXT: sltu a2, a0, a3 +; RV32ZBA-NEXT: add a1, a1, a2 +; RV32ZBA-NEXT: ret +; +; RV64ZBA-LABEL: test_sh3add_uw: +; RV64ZBA: # %bb.0: # %entry +; RV64ZBA-NEXT: sh3add.uw a1, a1, a0 +; RV64ZBA-NEXT: sh3add.uw a0, a2, a0 +; RV64ZBA-NEXT: ld a1, 400(a1) +; RV64ZBA-NEXT: ld a0, 400(a0) +; RV64ZBA-NEXT: add a0, a0, a1 +; RV64ZBA-NEXT: ret +entry: + %b = getelementptr inbounds nuw i8, ptr %p, i64 400 + %idxprom = zext i32 %x to i64 + %arrayidx = getelementptr inbounds nuw [100 x i64], ptr %b, i64 0, i64 %idxprom + %0 = load i64, ptr %arrayidx, align 8 + %idxprom2 = zext i32 %y to i64 + %arrayidx3 = getelementptr inbounds nuw [100 x i64], ptr %b, i64 0, i64 %idxprom2 + %1 = load i64, ptr %arrayidx3, align 8 + %add4 = add nsw i64 %1, %0 + ret i64 %add4 +} + +define signext i32 @test_sh2add_uw(ptr %p, i32 signext %x, i32 signext %y) { +; RV32I-LABEL: test_sh2add_uw: +; RV32I: # %bb.0: # %entry +; RV32I-NEXT: slli a1, a1, 2 +; RV32I-NEXT: slli a2, a2, 2 +; RV32I-NEXT: add a1, a0, a1 +; RV32I-NEXT: add a0, a0, a2 +; RV32I-NEXT: lw a1, 1200(a1) +; RV32I-NEXT: lw a0, 1200(a0) +; RV32I-NEXT: add a0, a0, a1 +; RV32I-NEXT: ret +; +; RV64I-LABEL: test_sh2add_uw: +; RV64I: # %bb.0: # %entry +; RV64I-NEXT: slli a1, a1, 32 +; RV64I-NEXT: slli a2, a2, 32 +; RV64I-NEXT: srli a1, a1, 30 +; RV64I-NEXT: srli a2, a2, 30 +; RV64I-NEXT: add a1, a0, a1 +; RV64I-NEXT: add a0, a0, a2 +; RV64I-NEXT: lw a1, 1200(a1) +; RV64I-NEXT: lw a0, 1200(a0) +; RV64I-NEXT: addw a0, a0, a1 +; RV64I-NEXT: ret +; +; RV32ZBA-LABEL: test_sh2add_uw: +; RV32ZBA: # %bb.0: # %entry +; RV32ZBA-NEXT: sh2add a1, a1, a0 +; RV32ZBA-NEXT: sh2add a0, a2, a0 +; RV32ZBA-NEXT: lw a1, 1200(a1) +; RV32ZBA-NEXT: lw a0, 1200(a0) +; RV32ZBA-NEXT: add a0, a0, a1 +; RV32ZBA-NEXT: ret +; +; RV64ZBA-LABEL: test_sh2add_uw: +; RV64ZBA: # %bb.0: # %entry +; RV64ZBA-NEXT: sh2add.uw a1, a1, a0 +; RV64ZBA-NEXT: sh2add.uw a0, a2, a0 +; RV64ZBA-NEXT: lw a1, 1200(a1) +; RV64ZBA-NEXT: lw a0, 1200(a0) +; RV64ZBA-NEXT: addw a0, a0, a1 +; RV64ZBA-NEXT: ret +entry: + %c = getelementptr inbounds nuw i8, ptr %p, i64 1200 + %idxprom = zext i32 %x to i64 + %arrayidx = getelementptr inbounds nuw [100 x i32], ptr %c, i64 0, i64 %idxprom + %0 = load i32, ptr %arrayidx, align 4 + %idxprom2 = zext i32 %y to i64 + %arrayidx3 = getelementptr inbounds nuw [100 x i32], ptr %c, i64 0, i64 %idxprom2 + %1 = load i32, ptr %arrayidx3, align 4 + %add4 = add nsw i32 %1, %0 + ret i32 %add4 +} + +define signext i16 @test_sh1add_uw(ptr %p, i32 signext %x, i32 signext %y) { +; RV32I-LABEL: test_sh1add_uw: +; RV32I: # %bb.0: # %entry +; RV32I-NEXT: slli a1, a1, 1 +; RV32I-NEXT: slli a2, a2, 1 +; RV32I-NEXT: add a1, a0, a1 +; RV32I-NEXT: add a0, a2, a0 +; RV32I-NEXT: lh a1, 1600(a1) +; RV32I-NEXT: lh a0, 1620(a0) +; RV32I-NEXT: add a0, a0, a1 +; RV32I-NEXT: slli a0, a0, 16 +; RV32I-NEXT: srai a0, a0, 16 +; RV32I-NEXT: ret +; +; RV64I-LABEL: test_sh1add_uw: +; RV64I: # %bb.0: # %entry +; RV64I-NEXT: slli a1, a1, 32 +; RV64I-NEXT: addi a2, a2, 10 +; RV64I-NEXT: srli a1, a1, 31 +; RV64I-NEXT: slli a2, a2, 32 +; RV64I-NEXT: add a1, a0, a1 +; RV64I-NEXT: srli a2, a2, 31 +; RV64I-NEXT: add a0, a0, a2 +; RV64I-NEXT: lh a1, 1600(a1) +; RV64I-NEXT: lh a0, 1600(a0) +; RV64I-NEXT: add a0, a0, a1 +; RV64I-NEXT: slli a0, a0, 48 +; RV64I-NEXT: srai a0, a0, 48 +; RV64I-NEXT: ret +; +; RV32ZBA-LABEL: test_sh1add_uw: +; RV32ZBA: # %bb.0: # %entry +; RV32ZBA-NEXT: sh1add a1, a1, a0 +; RV32ZBA-NEXT: sh1add a0, a2, a0 +; RV32ZBA-NEXT: lh a1, 1600(a1) +; RV32ZBA-NEXT: lh a0, 1620(a0) +; RV32ZBA-NEXT: add a0, a0, a1 +; RV32ZBA-NEXT: slli a0, a0, 16 +; RV32ZBA-NEXT: srai a0, a0, 16 +; RV32ZBA-NEXT: ret +; +; RV64ZBA-LABEL: test_sh1add_uw: +; RV64ZBA: # %bb.0: # %entry +; RV64ZBA-NEXT: sh1add.uw a1, a1, a0 +; RV64ZBA-NEXT: addi a2, a2, 10 +; RV64ZBA-NEXT: sh1add.uw a0, a2, a0 +; RV64ZBA-NEXT: lh a1, 1600(a1) +; RV64ZBA-NEXT: lh a0, 1600(a0) +; RV64ZBA-NEXT: add a0, a0, a1 +; RV64ZBA-NEXT: slli a0, a0, 48 +; RV64ZBA-NEXT: srai a0, a0, 48 +; RV64ZBA-NEXT: ret +entry: + %d = getelementptr inbounds nuw i8, ptr %p, i64 1600 + %idxprom = zext i32 %x to i64 + %arrayidx = getelementptr inbounds nuw [100 x i16], ptr %d, i64 0, i64 %idxprom + %0 = load i16, ptr %arrayidx, align 2 + %add = add i32 %y, 10 + %idxprom2 = zext i32 %add to i64 + %arrayidx3 = getelementptr inbounds nuw [100 x i16], ptr %d, i64 0, i64 %idxprom2 + %1 = load i16, ptr %arrayidx3, align 2 + %add5 = add i16 %1, %0 + ret i16 %add5 +} + +define zeroext i8 @test_add_uw(ptr %p, i32 signext %x, i32 signext %y) { +; RV32I-LABEL: test_add_uw: +; RV32I: # %bb.0: # %entry +; RV32I-NEXT: add a1, a0, a1 +; RV32I-NEXT: add a0, a0, a2 +; RV32I-NEXT: lbu a1, 1800(a1) +; RV32I-NEXT: lbu a0, 1800(a0) +; RV32I-NEXT: add a0, a0, a1 +; RV32I-NEXT: andi a0, a0, 255 +; RV32I-NEXT: ret +; +; RV64I-LABEL: test_add_uw: +; RV64I: # %bb.0: # %entry +; RV64I-NEXT: slli a1, a1, 32 +; RV64I-NEXT: slli a2, a2, 32 +; RV64I-NEXT: srli a1, a1, 32 +; RV64I-NEXT: srli a2, a2, 32 +; RV64I-NEXT: add a1, a0, a1 +; RV64I-NEXT: add a0, a0, a2 +; RV64I-NEXT: lbu a1, 1800(a1) +; RV64I-NEXT: lbu a0, 1800(a0) +; RV64I-NEXT: add a0, a0, a1 +; RV64I-NEXT: andi a0, a0, 255 +; RV64I-NEXT: ret +; +; RV32ZBA-LABEL: test_add_uw: +; RV32ZBA: # %bb.0: # %entry +; RV32ZBA-NEXT: add a1, a0, a1 +; RV32ZBA-NEXT: add a0, a0, a2 +; RV32ZBA-NEXT: lbu a1, 1800(a1) +; RV32ZBA-NEXT: lbu a0, 1800(a0) +; RV32ZBA-NEXT: add a0, a0, a1 +; RV32ZBA-NEXT: andi a0, a0, 255 +; RV32ZBA-NEXT: ret +; +; RV64ZBA-LABEL: test_add_uw: +; RV64ZBA: # %bb.0: # %entry +; RV64ZBA-NEXT: add.uw a1, a1, a0 +; RV64ZBA-NEXT: add.uw a0, a2, a0 +; RV64ZBA-NEXT: lbu a1, 1800(a1) +; RV64ZBA-NEXT: lbu a0, 1800(a0) +; RV64ZBA-NEXT: add a0, a0, a1 +; RV64ZBA-NEXT: andi a0, a0, 255 +; RV64ZBA-NEXT: ret +entry: + %e = getelementptr inbounds nuw i8, ptr %p, i64 1800 + %idxprom = zext i32 %x to i64 + %arrayidx = getelementptr inbounds nuw [1000 x i8], ptr %e, i64 0, i64 %idxprom + %0 = load i8, ptr %arrayidx, align 1 + %idxprom2 = zext i32 %y to i64 + %arrayidx3 = getelementptr inbounds nuw [1000 x i8], ptr %e, i64 0, i64 %idxprom2 + %1 = load i8, ptr %arrayidx3, align 1 + %add5 = add i8 %1, %0 + ret i8 %add5 +} + +; The addi is part of the index and used with 2 different scales. +define signext i32 @test_scaled_index_addi(ptr %p, iXLen %x) { +; RV32I-LABEL: test_scaled_index_addi: +; RV32I: # %bb.0: # %entry +; RV32I-NEXT: slli a2, a1, 2 +; RV32I-NEXT: slli a1, a1, 1 +; RV32I-NEXT: add a2, a0, a2 +; RV32I-NEXT: add a0, a0, a1 +; RV32I-NEXT: lw a1, 1196(a2) +; RV32I-NEXT: lh a0, 1598(a0) +; RV32I-NEXT: add a0, a1, a0 +; RV32I-NEXT: ret +; +; RV64I-LABEL: test_scaled_index_addi: +; RV64I: # %bb.0: # %entry +; RV64I-NEXT: slli a2, a1, 2 +; RV64I-NEXT: slli a1, a1, 1 +; RV64I-NEXT: add a2, a0, a2 +; RV64I-NEXT: add a0, a0, a1 +; RV64I-NEXT: lw a1, 1196(a2) +; RV64I-NEXT: lh a0, 1598(a0) +; RV64I-NEXT: addw a0, a1, a0 +; RV64I-NEXT: ret +; +; RV32ZBA-LABEL: test_scaled_index_addi: +; RV32ZBA: # %bb.0: # %entry +; RV32ZBA-NEXT: sh2add a2, a1, a0 +; RV32ZBA-NEXT: sh1add a0, a1, a0 +; RV32ZBA-NEXT: lw a1, 1196(a2) +; RV32ZBA-NEXT: lh a0, 1598(a0) +; RV32ZBA-NEXT: add a0, a1, a0 +; RV32ZBA-NEXT: ret +; +; RV64ZBA-LABEL: test_scaled_index_addi: +; RV64ZBA: # %bb.0: # %entry +; RV64ZBA-NEXT: sh2add a2, a1, a0 +; RV64ZBA-NEXT: sh1add a0, a1, a0 +; RV64ZBA-NEXT: lw a1, 1196(a2) +; RV64ZBA-NEXT: lh a0, 1598(a0) +; RV64ZBA-NEXT: addw a0, a1, a0 +; RV64ZBA-NEXT: ret +entry: + %c = getelementptr inbounds nuw i8, ptr %p, i64 1200 + %sub = add iXLen %x, -1 + %arrayidx = getelementptr inbounds nuw [100 x i32], ptr %c, i64 0, iXLen %sub + %0 = load i32, ptr %arrayidx, align 4 + %d = getelementptr inbounds nuw i8, ptr %p, i64 1600 + %arrayidx2 = getelementptr inbounds nuw [100 x i16], ptr %d, i64 0, iXLen %sub + %1 = load i16, ptr %arrayidx2, align 2 + %conv = sext i16 %1 to i32 + %add = add nsw i32 %0, %conv + ret i32 %add +} + +; Offset is a pair of addis. We can fold one of them. +define signext i32 @test_medium_offset(ptr %p, iXLen %x, iXLen %y) { +; RV32I-LABEL: test_medium_offset: +; RV32I: # %bb.0: # %entry +; RV32I-NEXT: addi a0, a0, 2047 +; RV32I-NEXT: slli a1, a1, 2 +; RV32I-NEXT: slli a2, a2, 2 +; RV32I-NEXT: add a1, a0, a1 +; RV32I-NEXT: add a0, a2, a0 +; RV32I-NEXT: lw a1, 753(a1) +; RV32I-NEXT: lw a0, 793(a0) +; RV32I-NEXT: add a0, a0, a1 +; RV32I-NEXT: ret +; +; RV64I-LABEL: test_medium_offset: +; RV64I: # %bb.0: # %entry +; RV64I-NEXT: addi a0, a0, 2047 +; RV64I-NEXT: slli a1, a1, 2 +; RV64I-NEXT: slli a2, a2, 2 +; RV64I-NEXT: add a1, a0, a1 +; RV64I-NEXT: add a0, a2, a0 +; RV64I-NEXT: lw a1, 753(a1) +; RV64I-NEXT: lw a0, 793(a0) +; RV64I-NEXT: addw a0, a0, a1 +; RV64I-NEXT: ret +; +; RV32ZBA-LABEL: test_medium_offset: +; RV32ZBA: # %bb.0: # %entry +; RV32ZBA-NEXT: addi a0, a0, 2047 +; RV32ZBA-NEXT: sh2add a1, a1, a0 +; RV32ZBA-NEXT: sh2add a0, a2, a0 +; RV32ZBA-NEXT: lw a1, 753(a1) +; RV32ZBA-NEXT: lw a0, 793(a0) +; RV32ZBA-NEXT: add a0, a0, a1 +; RV32ZBA-NEXT: ret +; +; RV64ZBA-LABEL: test_medium_offset: +; RV64ZBA: # %bb.0: # %entry +; RV64ZBA-NEXT: addi a0, a0, 2047 +; RV64ZBA-NEXT: sh2add a1, a1, a0 +; RV64ZBA-NEXT: sh2add a0, a2, a0 +; RV64ZBA-NEXT: lw a1, 753(a1) +; RV64ZBA-NEXT: lw a0, 793(a0) +; RV64ZBA-NEXT: addw a0, a0, a1 +; RV64ZBA-NEXT: ret +entry: + %f = getelementptr inbounds nuw i8, ptr %p, i64 2800 + %arrayidx = getelementptr inbounds nuw [1000 x i32], ptr %f, i64 0, iXLen %x + %0 = load i32, ptr %arrayidx, align 4 + %add = add iXLen %y, 10 + %arrayidx2 = getelementptr inbounds nuw [1000 x i32], ptr %f, i64 0, iXLen %add + %1 = load i32, ptr %arrayidx2, align 4 + %add3 = add nsw i32 %1, %0 + ret i32 %add3 +} + +; Offset is a lui+addiw. We can't fold this on RV64. +define signext i32 @test_large_offset(ptr %p, iXLen %x, iXLen %y) { +; RV32I-LABEL: test_large_offset: +; RV32I: # %bb.0: # %entry +; RV32I-NEXT: lui a3, 2 +; RV32I-NEXT: slli a1, a1, 2 +; RV32I-NEXT: slli a2, a2, 2 +; RV32I-NEXT: add a0, a0, a3 +; RV32I-NEXT: add a1, a0, a1 +; RV32I-NEXT: add a0, a2, a0 +; RV32I-NEXT: lw a1, -1392(a1) +; RV32I-NEXT: lw a0, -1352(a0) +; RV32I-NEXT: add a0, a0, a1 +; RV32I-NEXT: ret +; +; RV64I-LABEL: test_large_offset: +; RV64I: # %bb.0: # %entry +; RV64I-NEXT: lui a3, 2 +; RV64I-NEXT: slli a1, a1, 2 +; RV64I-NEXT: slli a2, a2, 2 +; RV64I-NEXT: addiw a3, a3, -1392 +; RV64I-NEXT: add a0, a0, a3 +; RV64I-NEXT: add a1, a0, a1 +; RV64I-NEXT: add a0, a2, a0 +; RV64I-NEXT: lw a1, 0(a1) +; RV64I-NEXT: lw a0, 40(a0) +; RV64I-NEXT: addw a0, a0, a1 +; RV64I-NEXT: ret +; +; RV32ZBA-LABEL: test_large_offset: +; RV32ZBA: # %bb.0: # %entry +; RV32ZBA-NEXT: li a3, 1700 +; RV32ZBA-NEXT: sh2add a0, a3, a0 +; RV32ZBA-NEXT: sh2add a1, a1, a0 +; RV32ZBA-NEXT: sh2add a0, a2, a0 +; RV32ZBA-NEXT: lw a1, 0(a1) +; RV32ZBA-NEXT: lw a0, 40(a0) +; RV32ZBA-NEXT: add a0, a0, a1 +; RV32ZBA-NEXT: ret +; +; RV64ZBA-LABEL: test_large_offset: +; RV64ZBA: # %bb.0: # %entry +; RV64ZBA-NEXT: li a3, 1700 +; RV64ZBA-NEXT: sh2add a0, a3, a0 +; RV64ZBA-NEXT: sh2add a1, a1, a0 +; RV64ZBA-NEXT: sh2add a0, a2, a0 +; RV64ZBA-NEXT: lw a1, 0(a1) +; RV64ZBA-NEXT: lw a0, 40(a0) +; RV64ZBA-NEXT: addw a0, a0, a1 +; RV64ZBA-NEXT: ret +entry: + %g = getelementptr inbounds nuw i8, ptr %p, i64 6800 + %arrayidx = getelementptr inbounds nuw [200 x i32], ptr %g, i64 0, iXLen %x + %0 = load i32, ptr %arrayidx, align 4 + %add = add iXLen %y, 10 + %arrayidx2 = getelementptr inbounds nuw [200 x i32], ptr %g, i64 0, iXLen %add + %1 = load i32, ptr %arrayidx2, align 4 + %add3 = add nsw i32 %1, %0 + ret i32 %add3 +} + +; After folding we can CSE the sh2add +define signext i32 @test_cse(ptr %p, iXLen %x) { +; RV32I-LABEL: test_cse: +; RV32I: # %bb.0: # %entry +; RV32I-NEXT: slli a1, a1, 2 +; RV32I-NEXT: add a0, a0, a1 +; RV32I-NEXT: lw a1, 1200(a0) +; RV32I-NEXT: addi a0, a0, 2047 +; RV32I-NEXT: lw a0, 753(a0) +; RV32I-NEXT: add a0, a0, a1 +; RV32I-NEXT: ret +; +; RV64I-LABEL: test_cse: +; RV64I: # %bb.0: # %entry +; RV64I-NEXT: slli a1, a1, 2 +; RV64I-NEXT: add a0, a0, a1 +; RV64I-NEXT: lw a1, 1200(a0) +; RV64I-NEXT: addi a0, a0, 2047 +; RV64I-NEXT: lw a0, 753(a0) +; RV64I-NEXT: addw a0, a0, a1 +; RV64I-NEXT: ret +; +; RV32ZBA-LABEL: test_cse: +; RV32ZBA: # %bb.0: # %entry +; RV32ZBA-NEXT: sh2add a0, a1, a0 +; RV32ZBA-NEXT: lw a1, 1200(a0) +; RV32ZBA-NEXT: addi a0, a0, 2047 +; RV32ZBA-NEXT: lw a0, 753(a0) +; RV32ZBA-NEXT: add a0, a0, a1 +; RV32ZBA-NEXT: ret +; +; RV64ZBA-LABEL: test_cse: +; RV64ZBA: # %bb.0: # %entry +; RV64ZBA-NEXT: sh2add a0, a1, a0 +; RV64ZBA-NEXT: lw a1, 1200(a0) +; RV64ZBA-NEXT: addi a0, a0, 2047 +; RV64ZBA-NEXT: lw a0, 753(a0) +; RV64ZBA-NEXT: addw a0, a0, a1 +; RV64ZBA-NEXT: ret +entry: + %c = getelementptr inbounds nuw i8, ptr %p, i64 1200 + %arrayidx = getelementptr inbounds nuw [100 x i32], ptr %c, i64 0, iXLen %x + %0 = load i32, ptr %arrayidx, align 4 + %f = getelementptr inbounds nuw i8, ptr %p, i64 2800 + %arrayidx1 = getelementptr inbounds nuw [1000 x i32], ptr %f, i64 0, iXLen %x + %1 = load i32, ptr %arrayidx1, align 4 + %add = add nsw i32 %1, %0 + ret i32 %add +} + +define zeroext i8 @test_optsize(ptr %p, iXLen %x, iXLen %y) optsize { +; CHECK-LABEL: test_optsize: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi a0, a0, 1800 +; CHECK-NEXT: add a1, a0, a1 +; CHECK-NEXT: add a0, a2, a0 +; CHECK-NEXT: lbu a1, 0(a1) +; CHECK-NEXT: lbu a0, 10(a0) +; CHECK-NEXT: add a0, a0, a1 +; CHECK-NEXT: andi a0, a0, 255 +; CHECK-NEXT: ret +; +; ZBA-LABEL: test_optsize: +; ZBA: # %bb.0: # %entry +; ZBA-NEXT: addi a0, a0, 1800 +; ZBA-NEXT: add a1, a0, a1 +; ZBA-NEXT: add a0, a2, a0 +; ZBA-NEXT: lbu a1, 0(a1) +; ZBA-NEXT: lbu a0, 10(a0) +; ZBA-NEXT: add a0, a0, a1 +; ZBA-NEXT: andi a0, a0, 255 +; ZBA-NEXT: ret +entry: + %e = getelementptr inbounds nuw i8, ptr %p, i64 1800 + %arrayidx = getelementptr inbounds nuw [1000 x i8], ptr %e, i64 0, iXLen %x + %0 = load i8, ptr %arrayidx, align 1 + %add = add iXLen %y, 10 + %arrayidx2 = getelementptr inbounds nuw [1000 x i8], ptr %e, i64 0, iXLen %add + %1 = load i8, ptr %arrayidx2, align 1 + %add4 = add i8 %1, %0 + ret i8 %add4 +} + +define zeroext i8 @test_minsize(ptr %p, iXLen %x, iXLen %y) minsize { +; CHECK-LABEL: test_minsize: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: addi a0, a0, 1800 +; CHECK-NEXT: add a1, a0, a1 +; CHECK-NEXT: add a0, a2, a0 +; CHECK-NEXT: lbu a1, 0(a1) +; CHECK-NEXT: lbu a0, 10(a0) +; CHECK-NEXT: add a0, a0, a1 +; CHECK-NEXT: andi a0, a0, 255 +; CHECK-NEXT: ret +; +; ZBA-LABEL: test_minsize: +; ZBA: # %bb.0: # %entry +; ZBA-NEXT: addi a0, a0, 1800 +; ZBA-NEXT: add a1, a0, a1 +; ZBA-NEXT: add a0, a2, a0 +; ZBA-NEXT: lbu a1, 0(a1) +; ZBA-NEXT: lbu a0, 10(a0) +; ZBA-NEXT: add a0, a0, a1 +; ZBA-NEXT: andi a0, a0, 255 +; ZBA-NEXT: ret +entry: + %e = getelementptr inbounds nuw i8, ptr %p, i64 1800 + %arrayidx = getelementptr inbounds nuw [1000 x i8], ptr %e, i64 0, iXLen %x + %0 = load i8, ptr %arrayidx, align 1 + %add = add iXLen %y, 10 + %arrayidx2 = getelementptr inbounds nuw [1000 x i8], ptr %e, i64 0, iXLen %add + %1 = load i8, ptr %arrayidx2, align 1 + %add4 = add i8 %1, %0 + ret i8 %add4 +} diff --git a/llvm/test/CodeGen/RISCV/split-offsets.ll b/llvm/test/CodeGen/RISCV/split-offsets.ll index 6d14c0d76a45c..8b80f0140a88a 100644 --- a/llvm/test/CodeGen/RISCV/split-offsets.ll +++ b/llvm/test/CodeGen/RISCV/split-offsets.ll @@ -14,14 +14,13 @@ define void @test1(ptr %sp, ptr %t, i32 %n) { ; RV32I-NEXT: lui a2, 20 ; RV32I-NEXT: lw a0, 0(a0) ; RV32I-NEXT: li a3, 2 -; RV32I-NEXT: addi a2, a2, -1920 ; RV32I-NEXT: add a1, a1, a2 ; RV32I-NEXT: add a0, a0, a2 ; RV32I-NEXT: li a2, 1 -; RV32I-NEXT: sw a3, 0(a0) -; RV32I-NEXT: sw a2, 4(a0) -; RV32I-NEXT: sw a2, 0(a1) -; RV32I-NEXT: sw a3, 4(a1) +; RV32I-NEXT: sw a3, -1920(a0) +; RV32I-NEXT: sw a2, -1916(a0) +; RV32I-NEXT: sw a2, -1920(a1) +; RV32I-NEXT: sw a3, -1916(a1) ; RV32I-NEXT: ret ; ; RV64I-LABEL: test1: @@ -57,7 +56,6 @@ define void @test2(ptr %sp, ptr %t, i32 %n) { ; RV32I: # %bb.0: # %entry ; RV32I-NEXT: li a3, 0 ; RV32I-NEXT: lui a4, 20 -; RV32I-NEXT: addi a4, a4, -1920 ; RV32I-NEXT: add a1, a1, a4 ; RV32I-NEXT: lw a0, 0(a0) ; RV32I-NEXT: add a0, a0, a4 @@ -65,10 +63,10 @@ define void @test2(ptr %sp, ptr %t, i32 %n) { ; RV32I-NEXT: .LBB1_1: # %while_body ; RV32I-NEXT: # =>This Inner Loop Header: Depth=1 ; RV32I-NEXT: addi a4, a3, 1 -; RV32I-NEXT: sw a4, 0(a0) -; RV32I-NEXT: sw a3, 4(a0) -; RV32I-NEXT: sw a4, 0(a1) -; RV32I-NEXT: sw a3, 4(a1) +; RV32I-NEXT: sw a4, -1920(a0) +; RV32I-NEXT: sw a3, -1916(a0) +; RV32I-NEXT: sw a4, -1920(a1) +; RV32I-NEXT: sw a3, -1916(a1) ; RV32I-NEXT: mv a3, a4 ; RV32I-NEXT: blt a4, a2, .LBB1_1 ; RV32I-NEXT: .LBB1_2: # %while_end @@ -126,11 +124,10 @@ define void @test3(ptr %t) { ; RV32I: # %bb.0: # %entry ; RV32I-NEXT: lui a1, 20 ; RV32I-NEXT: li a2, 2 -; RV32I-NEXT: addi a1, a1, -1920 ; RV32I-NEXT: add a0, a0, a1 ; RV32I-NEXT: li a1, 3 -; RV32I-NEXT: sw a2, 4(a0) -; RV32I-NEXT: sw a1, 8(a0) +; RV32I-NEXT: sw a2, -1916(a0) +; RV32I-NEXT: sw a1, -1912(a0) ; RV32I-NEXT: ret ; ; RV64I-LABEL: test3: diff --git a/llvm/test/CodeGen/RISCV/xtheadmemidx.ll b/llvm/test/CodeGen/RISCV/xtheadmemidx.ll index f6b7f97f6525c..0708838223cf3 100644 --- a/llvm/test/CodeGen/RISCV/xtheadmemidx.ll +++ b/llvm/test/CodeGen/RISCV/xtheadmemidx.ll @@ -1136,10 +1136,9 @@ define i64 @lrd_large_offset(ptr %a, i64 %b) { ; RV32XTHEADMEMIDX-NEXT: slli a1, a1, 3 ; RV32XTHEADMEMIDX-NEXT: add a0, a1, a0 ; RV32XTHEADMEMIDX-NEXT: lui a1, 23 -; RV32XTHEADMEMIDX-NEXT: addi a1, a1, 1792 ; RV32XTHEADMEMIDX-NEXT: add a1, a0, a1 -; RV32XTHEADMEMIDX-NEXT: lw a0, 0(a1) -; RV32XTHEADMEMIDX-NEXT: lw a1, 4(a1) +; RV32XTHEADMEMIDX-NEXT: lw a0, 1792(a1) +; RV32XTHEADMEMIDX-NEXT: lw a1, 1796(a1) ; RV32XTHEADMEMIDX-NEXT: ret ; ; RV64XTHEADMEMIDX-LABEL: lrd_large_offset: