Skip to content

Commit 27b6080

Browse files
authored
[RISCV] Increase default tail duplication threshold to 6 at -O3 (#98873)
This is just like AArch64. Changing the threshold to 6 will increase the code size, but will also decrease unconditional branches. CPUs with wide fetch/issue units can benefit from it. The value 6 may be debatable, we can set it to `SchedModel.IssueWidth`.
1 parent 430b90f commit 27b6080

File tree

5 files changed

+98
-1
lines changed

5 files changed

+98
-1
lines changed

llvm/lib/Target/RISCV/RISCVInstrInfo.cpp

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3763,6 +3763,12 @@ RISCVInstrInfo::getSerializableMachineMemOperandTargetFlags() const {
37633763
return ArrayRef(TargetFlags);
37643764
}
37653765

3766+
unsigned RISCVInstrInfo::getTailDuplicateSize(CodeGenOptLevel OptLevel) const {
3767+
return OptLevel >= CodeGenOptLevel::Aggressive
3768+
? STI.getTailDupAggressiveThreshold()
3769+
: 2;
3770+
}
3771+
37663772
// Returns true if this is the sext.w pattern, addiw rd, rs1, 0.
37673773
bool RISCV::isSEXT_W(const MachineInstr &MI) {
37683774
return MI.getOpcode() == RISCV::ADDIW && MI.getOperand(1).isReg() &&

llvm/lib/Target/RISCV/RISCVInstrInfo.h

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -288,6 +288,8 @@ class RISCVInstrInfo : public RISCVGenInstrInfo {
288288
ArrayRef<std::pair<MachineMemOperand::Flags, const char *>>
289289
getSerializableMachineMemOperandTargetFlags() const override;
290290

291+
unsigned getTailDuplicateSize(CodeGenOptLevel OptLevel) const override;
292+
291293
unsigned getUndefInitOpcode(unsigned RegClassID) const override {
292294
switch (RegClassID) {
293295
case RISCV::VRRegClassID:

llvm/lib/Target/RISCV/RISCVProcessors.td

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -21,6 +21,9 @@ class RISCVTuneInfo {
2121
bits<32> MaxPrefetchIterationsAhead = -1;
2222

2323
bits<32> MinimumJumpTableEntries = 5;
24+
25+
// Tail duplication threshold at -O3.
26+
bits<32> TailDupAggressiveThreshold = 6;
2427
}
2528

2629
def RISCVTuneInfoTable : GenericTable {
@@ -29,7 +32,7 @@ def RISCVTuneInfoTable : GenericTable {
2932
let Fields = ["Name", "PrefFunctionAlignment", "PrefLoopAlignment",
3033
"CacheLineSize", "PrefetchDistance",
3134
"MinPrefetchStride", "MaxPrefetchIterationsAhead",
32-
"MinimumJumpTableEntries"];
35+
"MinimumJumpTableEntries", "TailDupAggressiveThreshold"];
3336
}
3437

3538
def getRISCVTuneInfo : SearchIndex {

llvm/lib/Target/RISCV/RISCVSubtarget.h

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -50,6 +50,9 @@ struct RISCVTuneInfo {
5050
unsigned MaxPrefetchIterationsAhead;
5151

5252
unsigned MinimumJumpTableEntries;
53+
54+
// Tail duplication threshold at -O3.
55+
unsigned TailDupAggressiveThreshold;
5356
};
5457

5558
#define GET_RISCVTuneInfoTable_DECL
@@ -300,6 +303,10 @@ class RISCVSubtarget : public RISCVGenSubtargetInfo {
300303

301304
unsigned getMinimumJumpTableEntries() const;
302305

306+
unsigned getTailDupAggressiveThreshold() const {
307+
return TuneInfo->TailDupAggressiveThreshold;
308+
}
309+
303310
bool supportsInitUndef() const override { return hasVInstructions(); }
304311
};
305312
} // End llvm namespace
Lines changed: 79 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,79 @@
1+
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2+
; RUN: llc -mtriple=riscv64 -mattr=+m -O2 < %s | FileCheck %s --check-prefix=CHECK-O2
3+
; RUN: llc -mtriple=riscv64 -mattr=+m -O3 < %s | FileCheck %s --check-prefix=CHECK-O3
4+
5+
; RUN: llc -mtriple=riscv64 -mattr=+m -tail-dup-size=2 < %s | FileCheck %s --check-prefix=CHECK-O2
6+
; RUN: llc -mtriple=riscv64 -mattr=+m -tail-dup-placement-threshold=2 < %s | FileCheck %s --check-prefix=CHECK-O2
7+
; RUN: llc -mtriple=riscv64 -mattr=+m -tail-dup-placement-threshold=6 < %s | FileCheck %s --check-prefix=CHECK-O3
8+
9+
@a = external global i32
10+
@b = external global i32
11+
@c = external global i32
12+
13+
declare i32 @foo(i32)
14+
15+
define i32 @test(i32 %n) {
16+
; CHECK-O2-LABEL: test:
17+
; CHECK-O2: # %bb.0: # %entry
18+
; CHECK-O2-NEXT: sext.w a1, a0
19+
; CHECK-O2-NEXT: blez a1, .LBB0_2
20+
; CHECK-O2-NEXT: # %bb.1: # %if.then
21+
; CHECK-O2-NEXT: lui a1, %hi(a)
22+
; CHECK-O2-NEXT: lw a1, %lo(a)(a1)
23+
; CHECK-O2-NEXT: mul a0, a1, a0
24+
; CHECK-O2-NEXT: j .LBB0_3
25+
; CHECK-O2-NEXT: .LBB0_2: # %if.else
26+
; CHECK-O2-NEXT: lui a1, %hi(b)
27+
; CHECK-O2-NEXT: lw a1, %lo(b)(a1)
28+
; CHECK-O2-NEXT: divw a0, a1, a0
29+
; CHECK-O2-NEXT: .LBB0_3: # %if.end
30+
; CHECK-O2-NEXT: lui a1, %hi(c)
31+
; CHECK-O2-NEXT: lw a1, %lo(c)(a1)
32+
; CHECK-O2-NEXT: addi a0, a0, -1
33+
; CHECK-O2-NEXT: mulw a0, a0, a1
34+
; CHECK-O2-NEXT: tail foo
35+
;
36+
; CHECK-O3-LABEL: test:
37+
; CHECK-O3: # %bb.0: # %entry
38+
; CHECK-O3-NEXT: sext.w a1, a0
39+
; CHECK-O3-NEXT: blez a1, .LBB0_2
40+
; CHECK-O3-NEXT: # %bb.1: # %if.then
41+
; CHECK-O3-NEXT: lui a1, %hi(a)
42+
; CHECK-O3-NEXT: lw a1, %lo(a)(a1)
43+
; CHECK-O3-NEXT: mul a0, a1, a0
44+
; CHECK-O3-NEXT: lui a1, %hi(c)
45+
; CHECK-O3-NEXT: lw a1, %lo(c)(a1)
46+
; CHECK-O3-NEXT: addi a0, a0, -1
47+
; CHECK-O3-NEXT: mulw a0, a0, a1
48+
; CHECK-O3-NEXT: tail foo
49+
; CHECK-O3-NEXT: .LBB0_2: # %if.else
50+
; CHECK-O3-NEXT: lui a1, %hi(b)
51+
; CHECK-O3-NEXT: lw a1, %lo(b)(a1)
52+
; CHECK-O3-NEXT: divw a0, a1, a0
53+
; CHECK-O3-NEXT: lui a1, %hi(c)
54+
; CHECK-O3-NEXT: lw a1, %lo(c)(a1)
55+
; CHECK-O3-NEXT: addi a0, a0, -1
56+
; CHECK-O3-NEXT: mulw a0, a0, a1
57+
; CHECK-O3-NEXT: tail foo
58+
entry:
59+
%cmp = icmp sgt i32 %n, 0
60+
br i1 %cmp, label %if.then, label %if.else
61+
62+
if.then:
63+
%va = load i32, ptr @a
64+
%mul = mul nsw i32 %va, %n
65+
br label %if.end
66+
67+
if.else:
68+
%vb = load i32, ptr @b
69+
%div = sdiv i32 %vb, %n
70+
br label %if.end
71+
72+
if.end:
73+
%phi = phi i32 [ %mul, %if.then ], [ %div, %if.else ]
74+
%vc = load i32, ptr @c
75+
%add = add nsw i32 %phi, -1
76+
%arg = mul i32 %add, %vc
77+
%ret = tail call i32 @foo(i32 %arg)
78+
ret i32 %ret
79+
}

0 commit comments

Comments
 (0)