Skip to content

Commit 9bb69c1

Browse files
authored
[RISCV] Enable LoopDataPrefetch pass (#66201)
So that we can benefit from data prefetch when `Zicbop` extension is supported. Tune information for data prefetching are added in `RISCVTuneInfo`.
1 parent a0710e1 commit 9bb69c1

File tree

6 files changed

+93
-3
lines changed

6 files changed

+93
-3
lines changed

llvm/lib/Target/RISCV/RISCVProcessors.td

Lines changed: 9 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -13,12 +13,20 @@
1313
class RISCVTuneInfo {
1414
bits<8> PrefFunctionAlignment = 1;
1515
bits<8> PrefLoopAlignment = 1;
16+
17+
// Information needed by LoopDataPrefetch.
18+
bits<16> CacheLineSize = 0;
19+
bits<16> PrefetchDistance = 0;
20+
bits<16> MinPrefetchStride = 1;
21+
bits<32> MaxPrefetchIterationsAhead = -1;
1622
}
1723

1824
def RISCVTuneInfoTable : GenericTable {
1925
let FilterClass = "RISCVTuneInfo";
2026
let CppTypeName = "RISCVTuneInfo";
21-
let Fields = ["Name", "PrefFunctionAlignment", "PrefLoopAlignment"];
27+
let Fields = ["Name", "PrefFunctionAlignment", "PrefLoopAlignment",
28+
"CacheLineSize", "PrefetchDistance",
29+
"MinPrefetchStride", "MaxPrefetchIterationsAhead"];
2230
}
2331

2432
def getRISCVTuneInfo : SearchIndex {

llvm/lib/Target/RISCV/RISCVSubtarget.h

Lines changed: 22 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -38,6 +38,12 @@ struct RISCVTuneInfo {
3838
const char *Name;
3939
uint8_t PrefFunctionAlignment;
4040
uint8_t PrefLoopAlignment;
41+
42+
// Information needed by LoopDataPrefetch.
43+
uint16_t CacheLineSize;
44+
uint16_t PrefetchDistance;
45+
uint16_t MinPrefetchStride;
46+
unsigned MaxPrefetchIterationsAhead;
4147
};
4248

4349
#define GET_RISCVTuneInfoTable_DECL
@@ -248,6 +254,22 @@ class RISCVSubtarget : public RISCVGenSubtargetInfo {
248254
&Mutations) const override;
249255

250256
bool useAA() const override;
257+
258+
unsigned getCacheLineSize() const override {
259+
return TuneInfo->CacheLineSize;
260+
};
261+
unsigned getPrefetchDistance() const override {
262+
return TuneInfo->PrefetchDistance;
263+
};
264+
unsigned getMinPrefetchStride(unsigned NumMemAccesses,
265+
unsigned NumStridedMemAccesses,
266+
unsigned NumPrefetches,
267+
bool HasCall) const override {
268+
return TuneInfo->MinPrefetchStride;
269+
};
270+
unsigned getMaxPrefetchIterationsAhead() const override {
271+
return TuneInfo->MaxPrefetchIterationsAhead;
272+
};
251273
};
252274
} // End llvm namespace
253275

llvm/lib/Target/RISCV/RISCVTargetMachine.cpp

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -34,6 +34,7 @@
3434
#include "llvm/Support/FormattedStream.h"
3535
#include "llvm/Target/TargetOptions.h"
3636
#include "llvm/Transforms/IPO.h"
37+
#include "llvm/Transforms/Scalar.h"
3738
#include <optional>
3839
using namespace llvm;
3940

@@ -83,6 +84,11 @@ static cl::opt<bool>
8384
cl::desc("Enable sinking and folding of instruction copies"),
8485
cl::init(false), cl::Hidden);
8586

87+
static cl::opt<bool>
88+
EnableLoopDataPrefetch("riscv-enable-loop-data-prefetch", cl::Hidden,
89+
cl::desc("Enable the loop data prefetch pass"),
90+
cl::init(true));
91+
8692
extern "C" LLVM_EXTERNAL_VISIBILITY void LLVMInitializeRISCVTarget() {
8793
RegisterTargetMachine<RISCVTargetMachine> X(getTheRISCV32Target());
8894
RegisterTargetMachine<RISCVTargetMachine> Y(getTheRISCV64Target());
@@ -310,6 +316,9 @@ void RISCVPassConfig::addIRPasses() {
310316
addPass(createAtomicExpandPass());
311317

312318
if (getOptLevel() != CodeGenOptLevel::None) {
319+
if (EnableLoopDataPrefetch)
320+
addPass(createLoopDataPrefetchPass());
321+
313322
addPass(createRISCVGatherScatterLoweringPass());
314323
addPass(createInterleavedAccessPass());
315324
addPass(createRISCVCodeGenPreparePass());

llvm/test/CodeGen/RISCV/O3-pipeline.ll

Lines changed: 8 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -12,10 +12,10 @@
1212
; CHECK-NEXT: Target Pass Configuration
1313
; CHECK-NEXT: Machine Module Information
1414
; CHECK-NEXT: Target Transform Information
15-
; CHECK-NEXT: Type-Based Alias Analysis
16-
; CHECK-NEXT: Scoped NoAlias Alias Analysis
1715
; CHECK-NEXT: Assumption Cache Tracker
1816
; CHECK-NEXT: Profile summary info
17+
; CHECK-NEXT: Type-Based Alias Analysis
18+
; CHECK-NEXT: Scoped NoAlias Alias Analysis
1919
; CHECK-NEXT: Create Garbage Collector Module Metadata
2020
; CHECK-NEXT: Machine Branch Probability Analysis
2121
; CHECK-NEXT: Default Regalloc Eviction Advisor
@@ -28,6 +28,12 @@
2828
; CHECK-NEXT: Expand Atomic instructions
2929
; CHECK-NEXT: Dominator Tree Construction
3030
; CHECK-NEXT: Natural Loop Information
31+
; CHECK-NEXT: Canonicalize natural loops
32+
; CHECK-NEXT: Lazy Branch Probability Analysis
33+
; CHECK-NEXT: Lazy Block Frequency Analysis
34+
; CHECK-NEXT: Optimization Remark Emitter
35+
; CHECK-NEXT: Scalar Evolution Analysis
36+
; CHECK-NEXT: Loop Data Prefetch
3137
; CHECK-NEXT: RISC-V gather/scatter lowering
3238
; CHECK-NEXT: Interleaved Access Pass
3339
; CHECK-NEXT: RISC-V CodeGenPrepare
Lines changed: 43 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,43 @@
1+
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 3
2+
; RUN: opt -mtriple=riscv64 -cache-line-size=64 -prefetch-distance=64 \
3+
; RUN: -passes=loop-data-prefetch -S < %s | FileCheck %s
4+
5+
define void @foo(ptr nocapture %a, ptr nocapture readonly %b) {
6+
; CHECK-LABEL: define void @foo(
7+
; CHECK-SAME: ptr nocapture [[A:%.*]], ptr nocapture readonly [[B:%.*]]) {
8+
; CHECK-NEXT: entry:
9+
; CHECK-NEXT: br label [[FOR_BODY:%.*]]
10+
; CHECK: for.body:
11+
; CHECK-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ], [ [[INDVARS_IV_NEXT:%.*]], [[FOR_BODY]] ]
12+
; CHECK-NEXT: [[TMP0:%.*]] = shl nuw nsw i64 [[INDVARS_IV]], 3
13+
; CHECK-NEXT: [[TMP1:%.*]] = add i64 [[TMP0]], 64
14+
; CHECK-NEXT: [[SCEVGEP:%.*]] = getelementptr i8, ptr [[B]], i64 [[TMP1]]
15+
; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds double, ptr [[B]], i64 [[INDVARS_IV]]
16+
; CHECK-NEXT: call void @llvm.prefetch.p0(ptr [[SCEVGEP]], i32 0, i32 3, i32 1)
17+
; CHECK-NEXT: [[TMP2:%.*]] = load double, ptr [[ARRAYIDX]], align 8
18+
; CHECK-NEXT: [[ADD:%.*]] = fadd double [[TMP2]], 1.000000e+00
19+
; CHECK-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds double, ptr [[A]], i64 [[INDVARS_IV]]
20+
; CHECK-NEXT: store double [[ADD]], ptr [[ARRAYIDX2]], align 8
21+
; CHECK-NEXT: [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1
22+
; CHECK-NEXT: [[EXITCOND:%.*]] = icmp eq i64 [[INDVARS_IV_NEXT]], 1600
23+
; CHECK-NEXT: br i1 [[EXITCOND]], label [[FOR_END:%.*]], label [[FOR_BODY]]
24+
; CHECK: for.end:
25+
; CHECK-NEXT: ret void
26+
;
27+
entry:
28+
br label %for.body
29+
30+
for.body: ; preds = %for.body, %entry
31+
%indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
32+
%arrayidx = getelementptr inbounds double, ptr %b, i64 %indvars.iv
33+
%0 = load double, ptr %arrayidx, align 8
34+
%add = fadd double %0, 1.000000e+00
35+
%arrayidx2 = getelementptr inbounds double, ptr %a, i64 %indvars.iv
36+
store double %add, ptr %arrayidx2, align 8
37+
%indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
38+
%exitcond = icmp eq i64 %indvars.iv.next, 1600
39+
br i1 %exitcond, label %for.end, label %for.body
40+
41+
for.end: ; preds = %for.body
42+
ret void
43+
}
Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,2 @@
1+
if not "RISCV" in config.root.targets:
2+
config.unsupported = True

0 commit comments

Comments
 (0)