Skip to content

[LoongArch] Add generation support for preld instruction #118436

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 3 commits into from
Jan 20, 2025

Conversation

zhaoqi5
Copy link
Contributor

@zhaoqi5 zhaoqi5 commented Dec 3, 2024

Instruction preld is used to prefetch one cache-line of data from memory in advance into the cache.

This commit allows it to be generated automatically.

Instruction `preld` is used to prefetch one cache-line
of data from memory in advance into the cache.

This commit allows it to be generated automatically.
@llvmbot
Copy link
Member

llvmbot commented Dec 3, 2024

@llvm/pr-subscribers-backend-loongarch

Author: ZhaoQi (zhaoqi5)

Changes

Instruction preld is used to prefetch one cache-line of data from memory in advance into the cache.

This commit allows it to be generated automatically.


Full diff: https://github.com/llvm/llvm-project/pull/118436.diff

5 Files Affected:

  • (modified) llvm/lib/Target/LoongArch/LoongArchISelDAGToDAG.cpp (+22)
  • (modified) llvm/lib/Target/LoongArch/LoongArchISelDAGToDAG.h (+1)
  • (modified) llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp (+2)
  • (modified) llvm/lib/Target/LoongArch/LoongArchInstrInfo.td (+9)
  • (added) llvm/test/CodeGen/LoongArch/preld.ll (+67)
diff --git a/llvm/lib/Target/LoongArch/LoongArchISelDAGToDAG.cpp b/llvm/lib/Target/LoongArch/LoongArchISelDAGToDAG.cpp
index d330f953556018..099ce54cead140 100644
--- a/llvm/lib/Target/LoongArch/LoongArchISelDAGToDAG.cpp
+++ b/llvm/lib/Target/LoongArch/LoongArchISelDAGToDAG.cpp
@@ -245,6 +245,28 @@ bool LoongArchDAGToDAGISel::selectNonFIBaseAddr(SDValue Addr, SDValue &Base) {
   return true;
 }
 
+bool LoongArchDAGToDAGISel::SelectAddrRegImm(SDValue Addr, SDValue &Base,
+                                             SDValue &Offset) {
+  SDLoc DL(Addr);
+  MVT VT = Addr.getSimpleValueType();
+
+  // The address is the result of an ADD. Here we only consider reg+simm12.
+  if (CurDAG->isBaseWithConstantOffset(Addr)) {
+    int64_t Imm = cast<ConstantSDNode>(Addr.getOperand(1))->getSExtValue();
+    if (isInt<12>(Imm)) {
+      Base = Addr.getOperand(0);
+      Offset = CurDAG->getTargetConstant(SignExtend64<12>(Imm), DL, VT);
+      return true;
+    }
+  }
+
+  // Otherwise, we assume Addr as the base address and use constant 0 as the
+  // offset.
+  Base = Addr;
+  Offset = CurDAG->getTargetConstant(0, DL, VT);
+  return true;
+}
+
 bool LoongArchDAGToDAGISel::selectShiftMask(SDValue N, unsigned ShiftWidth,
                                             SDValue &ShAmt) {
   // Shift instructions on LoongArch only read the lower 5 or 6 bits of the
diff --git a/llvm/lib/Target/LoongArch/LoongArchISelDAGToDAG.h b/llvm/lib/Target/LoongArch/LoongArchISelDAGToDAG.h
index 363b4f0ca7cf06..46c286bdb4eb77 100644
--- a/llvm/lib/Target/LoongArch/LoongArchISelDAGToDAG.h
+++ b/llvm/lib/Target/LoongArch/LoongArchISelDAGToDAG.h
@@ -43,6 +43,7 @@ class LoongArchDAGToDAGISel : public SelectionDAGISel {
   bool SelectBaseAddr(SDValue Addr, SDValue &Base);
   bool SelectAddrConstant(SDValue Addr, SDValue &Base, SDValue &Offset);
   bool selectNonFIBaseAddr(SDValue Addr, SDValue &Base);
+  bool SelectAddrRegImm(SDValue Addr, SDValue &Base, SDValue &Offset);
 
   bool selectShiftMask(SDValue N, unsigned ShiftWidth, SDValue &ShAmt);
   bool selectShiftMaskGRLen(SDValue N, SDValue &ShAmt) {
diff --git a/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp b/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp
index 16bceacfaa222c..a92142a3930f0f 100644
--- a/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp
+++ b/llvm/lib/Target/LoongArch/LoongArchISelLowering.cpp
@@ -99,6 +99,8 @@ LoongArchTargetLowering::LoongArchTargetLowering(const TargetMachine &TM,
   setOperationAction(ISD::INTRINSIC_W_CHAIN, MVT::Other, Custom);
   setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::Other, Custom);
 
+  setOperationAction(ISD::PREFETCH, MVT::Other, Legal);
+
   // Expand bitreverse.i16 with native-width bitrev and shift for now, before
   // we get to know which of sll and revb.2h is faster.
   setOperationAction(ISD::BITREVERSE, MVT::i8, Custom);
diff --git a/llvm/lib/Target/LoongArch/LoongArchInstrInfo.td b/llvm/lib/Target/LoongArch/LoongArchInstrInfo.td
index 6134daf2fbe630..5eda3bc7225e05 100644
--- a/llvm/lib/Target/LoongArch/LoongArchInstrInfo.td
+++ b/llvm/lib/Target/LoongArch/LoongArchInstrInfo.td
@@ -521,6 +521,7 @@ def HI16ForAddu16idAddiPair: SDNodeXForm<imm, [{
 def BaseAddr : ComplexPattern<iPTR, 1, "SelectBaseAddr">;
 def AddrConstant : ComplexPattern<iPTR, 2, "SelectAddrConstant">;
 def NonFIBaseAddr : ComplexPattern<iPTR, 1, "selectNonFIBaseAddr">;
+def AddrRegImm : ComplexPattern<iPTR, 2, "SelectAddrRegImm">;
 
 def fma_nsz : PatFrag<(ops node:$fj, node:$fk, node:$fa),
                       (fma node:$fj, node:$fk, node:$fa), [{
@@ -2009,6 +2010,14 @@ class PseudoMaskedAMMinMax
 def PseudoMaskedAtomicLoadMax32 : PseudoMaskedAMMinMax;
 def PseudoMaskedAtomicLoadMin32 : PseudoMaskedAMMinMax;
 
+// Data prefetch
+
+// TODO: Supports for preldx instruction.
+def : Pat<(prefetch (AddrRegImm GPR:$rj, simm12:$imm12), (i32 0), timm, (i32 1)),
+          (PRELD 0, GPR:$rj, simm12:$imm12)>; // data prefetch for loads
+def : Pat<(prefetch (AddrRegImm GPR:$rj, simm12:$imm12), (i32 1), timm, (i32 1)),
+          (PRELD 8, GPR:$rj, simm12:$imm12)>; // data prefetch for stores
+
 /// Compare and exchange
 
 class PseudoCmpXchg
diff --git a/llvm/test/CodeGen/LoongArch/preld.ll b/llvm/test/CodeGen/LoongArch/preld.ll
new file mode 100644
index 00000000000000..18057ac871f753
--- /dev/null
+++ b/llvm/test/CodeGen/LoongArch/preld.ll
@@ -0,0 +1,67 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
+; RUN: llc --mtriple=loongarch32 < %s | FileCheck %s --check-prefix=LA32
+; RUN: llc --mtriple=loongarch64 < %s | FileCheck %s --check-prefix=LA64
+
+declare void @llvm.prefetch(ptr, i32, i32, i32)
+
+define void @load_prefetch_no_offset(ptr %a) {
+; LA32-LABEL: load_prefetch_no_offset:
+; LA32:       # %bb.0: # %entry
+; LA32-NEXT:    preld 0, $a0, 0
+; LA32-NEXT:    ret
+;
+; LA64-LABEL: load_prefetch_no_offset:
+; LA64:       # %bb.0: # %entry
+; LA64-NEXT:    preld 0, $a0, 0
+; LA64-NEXT:    ret
+entry:
+  call void @llvm.prefetch(ptr %a, i32 0, i32 3, i32 1)
+  ret void
+}
+
+define void @store_prefetch_no_offset(ptr %a) {
+; LA32-LABEL: store_prefetch_no_offset:
+; LA32:       # %bb.0: # %entry
+; LA32-NEXT:    preld 8, $a0, 0
+; LA32-NEXT:    ret
+;
+; LA64-LABEL: store_prefetch_no_offset:
+; LA64:       # %bb.0: # %entry
+; LA64-NEXT:    preld 8, $a0, 0
+; LA64-NEXT:    ret
+entry:
+  call void @llvm.prefetch(ptr %a, i32 1, i32 3, i32 1)
+  ret void
+}
+
+define void @load_prefetch_with_offset(ptr %a) {
+; LA32-LABEL: load_prefetch_with_offset:
+; LA32:       # %bb.0: # %entry
+; LA32-NEXT:    preld 0, $a0, 200
+; LA32-NEXT:    ret
+;
+; LA64-LABEL: load_prefetch_with_offset:
+; LA64:       # %bb.0: # %entry
+; LA64-NEXT:    preld 0, $a0, 200
+; LA64-NEXT:    ret
+entry:
+  %addr = getelementptr i8, ptr %a, i64 200
+  call void @llvm.prefetch(ptr %addr, i32 0, i32 3, i32 1)
+  ret void
+}
+
+define void @store_prefetch_with_offset(ptr %a) {
+; LA32-LABEL: store_prefetch_with_offset:
+; LA32:       # %bb.0: # %entry
+; LA32-NEXT:    preld 8, $a0, 200
+; LA32-NEXT:    ret
+;
+; LA64-LABEL: store_prefetch_with_offset:
+; LA64:       # %bb.0: # %entry
+; LA64-NEXT:    preld 8, $a0, 200
+; LA64-NEXT:    ret
+entry:
+  %addr = getelementptr i8, ptr %a, i64 200
+  call void @llvm.prefetch(ptr %addr, i32 1, i32 3, i32 1)
+  ret void
+}

@zhaoqi5 zhaoqi5 merged commit 84220ec into llvm:main Jan 20, 2025
8 checks passed
@zhaoqi5 zhaoqi5 deleted the lower-preld-instruction branch January 20, 2025 08:11
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment
Projects
None yet
Development

Successfully merging this pull request may close these issues.

3 participants