Skip to content

Commit 0f66915

Browse files
[GlobalMerge] Add MinSize feature to the GlobalMerge Pass. (#93686)
We add a feature that prevents the GlobalMerge pass from considering data smaller than a minimum size in bytes for merging. The MinSize is set in 3 ways: 1. If global-merge-min-data-size is explicitly set, then it uses that value. 2. If SmallDataLimit is set and non-zero, then SmallDataLimit + 1 is used. 3. Otherwise, 0 is used, which means all sizes are considered for merging. We found that this feature allowed us to see the benefit of the GlobalMerge pass while eliminating some merging that was not beneficial. This feature allowed us to enable the GlobalMerge pass on RISC-V in our downstream by default because it led to improvements on multiple benchmark suites. I plan to post a separate patch to propose enabling this by default on RISC-V. But I do not want that discussion to be part of the discussion of adding this feature, so I am keeping the patches separate.
1 parent 2f1229e commit 0f66915

File tree

5 files changed

+160
-1
lines changed

5 files changed

+160
-1
lines changed

llvm/include/llvm/CodeGen/GlobalMerge.h

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -21,6 +21,8 @@ struct GlobalMergeOptions {
2121
// functions), see the code that passes in the offset in the ARM backend
2222
// for more information.
2323
unsigned MaxOffset = 0;
24+
// The minimum size in bytes of each global that should considered in merging.
25+
unsigned MinSize = 0;
2426
bool GroupByUse = true;
2527
bool IgnoreSingleUse = true;
2628
bool MergeConst = false;

llvm/lib/CodeGen/GlobalMerge.cpp

Lines changed: 21 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -134,6 +134,12 @@ static cl::opt<cl::boolOrDefault>
134134
EnableGlobalMergeOnExternal("global-merge-on-external", cl::Hidden,
135135
cl::desc("Enable global merge pass on external linkage"));
136136

137+
static cl::opt<unsigned>
138+
GlobalMergeMinDataSize("global-merge-min-data-size",
139+
cl::desc("The minimum size in bytes of each global "
140+
"that should considered in merging."),
141+
cl::init(0), cl::Hidden);
142+
137143
STATISTIC(NumMerged, "Number of globals merged");
138144

139145
namespace {
@@ -198,6 +204,19 @@ class GlobalMerge : public FunctionPass {
198204
}
199205

200206
bool doInitialization(Module &M) override {
207+
auto GetSmallDataLimit = [](Module &M) -> std::optional<uint64_t> {
208+
Metadata *SDL = M.getModuleFlag("SmallDataLimit");
209+
if (!SDL)
210+
return std::nullopt;
211+
return mdconst::extract<ConstantInt>(SDL)->getZExtValue();
212+
};
213+
if (GlobalMergeMinDataSize.getNumOccurrences())
214+
Opt.MinSize = GlobalMergeMinDataSize;
215+
else if (auto SDL = GetSmallDataLimit(M); SDL && *SDL > 0)
216+
Opt.MinSize = *SDL + 1;
217+
else
218+
Opt.MinSize = 0;
219+
201220
GlobalMergeImpl P(TM, Opt);
202221
return P.run(M);
203222
}
@@ -670,7 +689,8 @@ bool GlobalMergeImpl::run(Module &M) {
670689
continue;
671690

672691
Type *Ty = GV.getValueType();
673-
if (DL.getTypeAllocSize(Ty) < Opt.MaxOffset) {
692+
TypeSize AllocSize = DL.getTypeAllocSize(Ty);
693+
if (AllocSize < Opt.MaxOffset && AllocSize >= Opt.MinSize) {
674694
if (TM &&
675695
TargetLoweringObjectFile::getKindForGlobal(&GV, *TM).isBSS())
676696
BSSGlobals[{AddressSpace, Section}].push_back(&GV);
Lines changed: 48 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,48 @@
1+
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2+
; RUN: llc -mtriple=riscv64 -riscv-enable-global-merge -verify-machineinstrs < %s \
3+
; RUN: | FileCheck %s -check-prefix=SMALL-DATA
4+
; RUN: llc -mtriple=riscv64 -riscv-enable-global-merge -global-merge-min-data-size=0 \
5+
; RUN: -verify-machineinstrs < %s | FileCheck %s -check-prefix=MINSIZE
6+
7+
@ig1 = internal global i32 0, align 4
8+
@ig2 = internal global i32 0, align 4
9+
10+
@eg1 = dso_local global i32 0, align 4
11+
@eg2 = dso_local global i32 0, align 4
12+
13+
; This test shows that GlobalDataMinSize is set to SmallDataLimit + 1 when
14+
; SmallDataLimit module flag is set as non-zero, and that global-merge-min-data-size
15+
; overrides the small data limit.
16+
17+
define void @f1(i32 %a) nounwind {
18+
; SMALL-DATA-LABEL: f1:
19+
; SMALL-DATA: # %bb.0:
20+
; SMALL-DATA-NEXT: lui a1, %hi(ig1)
21+
; SMALL-DATA-NEXT: sw a0, %lo(ig1)(a1)
22+
; SMALL-DATA-NEXT: lui a1, %hi(ig2)
23+
; SMALL-DATA-NEXT: sw a0, %lo(ig2)(a1)
24+
; SMALL-DATA-NEXT: lui a1, %hi(eg1)
25+
; SMALL-DATA-NEXT: sw a0, %lo(eg1)(a1)
26+
; SMALL-DATA-NEXT: lui a1, %hi(eg2)
27+
; SMALL-DATA-NEXT: sw a0, %lo(eg2)(a1)
28+
; SMALL-DATA-NEXT: ret
29+
;
30+
; MINSIZE-LABEL: f1:
31+
; MINSIZE: # %bb.0:
32+
; MINSIZE-NEXT: lui a1, %hi(.L_MergedGlobals)
33+
; MINSIZE-NEXT: sw a0, %lo(.L_MergedGlobals)(a1)
34+
; MINSIZE-NEXT: addi a1, a1, %lo(.L_MergedGlobals)
35+
; MINSIZE-NEXT: sw a0, 4(a1)
36+
; MINSIZE-NEXT: sw a0, 8(a1)
37+
; MINSIZE-NEXT: sw a0, 12(a1)
38+
; MINSIZE-NEXT: ret
39+
store i32 %a, ptr @ig1, align 4
40+
store i32 %a, ptr @ig2, align 4
41+
store i32 %a, ptr @eg1, align 4
42+
store i32 %a, ptr @eg2, align 4
43+
ret void
44+
}
45+
46+
47+
!llvm.module.flags = !{!0}
48+
!0 = !{i32 8, !"SmallDataLimit", i32 8}
Lines changed: 49 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,49 @@
1+
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2+
; RUN: llc -mtriple=riscv32 -riscv-enable-global-merge -verify-machineinstrs < %s \
3+
; RUN: | FileCheck %s -check-prefix=SMALL-DATA
4+
; RUN: llc -mtriple=riscv64 -riscv-enable-global-merge -global-merge-min-data-size=5 \
5+
; RUN: -verify-machineinstrs < %s | FileCheck %s -check-prefix=MINSIZE
6+
7+
@ig1 = internal global i32 0, align 4
8+
@ig2 = internal global i32 0, align 4
9+
10+
@eg1 = dso_local global i32 0, align 4
11+
@eg2 = dso_local global i32 0, align 4
12+
13+
14+
; This test shows that GlobalDataMinSize is set to 0 when SmallDataLimit module
15+
; flag is set to zero, and that the global-merge-min-data-size option overrides
16+
; the small data limit.
17+
18+
define void @f1(i32 %a) nounwind {
19+
; SMALL-DATA-LABEL: f1:
20+
; SMALL-DATA: # %bb.0:
21+
; SMALL-DATA-NEXT: lui a1, %hi(.L_MergedGlobals)
22+
; SMALL-DATA-NEXT: sw a0, %lo(.L_MergedGlobals)(a1)
23+
; SMALL-DATA-NEXT: addi a1, a1, %lo(.L_MergedGlobals)
24+
; SMALL-DATA-NEXT: sw a0, 4(a1)
25+
; SMALL-DATA-NEXT: sw a0, 8(a1)
26+
; SMALL-DATA-NEXT: sw a0, 12(a1)
27+
; SMALL-DATA-NEXT: ret
28+
;
29+
; MINSIZE-LABEL: f1:
30+
; MINSIZE: # %bb.0:
31+
; MINSIZE-NEXT: lui a1, %hi(ig1)
32+
; MINSIZE-NEXT: sw a0, %lo(ig1)(a1)
33+
; MINSIZE-NEXT: lui a1, %hi(ig2)
34+
; MINSIZE-NEXT: sw a0, %lo(ig2)(a1)
35+
; MINSIZE-NEXT: lui a1, %hi(eg1)
36+
; MINSIZE-NEXT: sw a0, %lo(eg1)(a1)
37+
; MINSIZE-NEXT: lui a1, %hi(eg2)
38+
; MINSIZE-NEXT: sw a0, %lo(eg2)(a1)
39+
; MINSIZE-NEXT: ret
40+
store i32 %a, ptr @ig1, align 4
41+
store i32 %a, ptr @ig2, align 4
42+
store i32 %a, ptr @eg1, align 4
43+
store i32 %a, ptr @eg2, align 4
44+
ret void
45+
}
46+
47+
48+
!llvm.module.flags = !{!0}
49+
!0 = !{i32 8, !"SmallDataLimit", i32 0}
Lines changed: 40 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,40 @@
1+
; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2+
; RUN: llc -mtriple=riscv32 -riscv-enable-global-merge -verify-machineinstrs < %s \
3+
; RUN: | FileCheck %s -check-prefix=RV32
4+
; RUN: llc -mtriple=riscv32 -riscv-enable-global-merge -global-merge-min-data-size=5 \
5+
; RUN: -verify-machineinstrs < %s | FileCheck %s -check-prefix=RV32-MINSIZE
6+
7+
@ig1 = internal global i32 0, align 4
8+
@ig2 = internal global i32 0, align 4
9+
10+
@eg1 = dso_local global i32 0, align 4
11+
@eg2 = dso_local global i32 0, align 4
12+
13+
define void @f1(i32 %a) nounwind {
14+
; RV32-LABEL: f1:
15+
; RV32: # %bb.0:
16+
; RV32-NEXT: lui a1, %hi(.L_MergedGlobals)
17+
; RV32-NEXT: sw a0, %lo(.L_MergedGlobals)(a1)
18+
; RV32-NEXT: addi a1, a1, %lo(.L_MergedGlobals)
19+
; RV32-NEXT: sw a0, 4(a1)
20+
; RV32-NEXT: sw a0, 8(a1)
21+
; RV32-NEXT: sw a0, 12(a1)
22+
; RV32-NEXT: ret
23+
;
24+
; RV32-MINSIZE-LABEL: f1:
25+
; RV32-MINSIZE: # %bb.0:
26+
; RV32-MINSIZE-NEXT: lui a1, %hi(ig1)
27+
; RV32-MINSIZE-NEXT: sw a0, %lo(ig1)(a1)
28+
; RV32-MINSIZE-NEXT: lui a1, %hi(ig2)
29+
; RV32-MINSIZE-NEXT: sw a0, %lo(ig2)(a1)
30+
; RV32-MINSIZE-NEXT: lui a1, %hi(eg1)
31+
; RV32-MINSIZE-NEXT: sw a0, %lo(eg1)(a1)
32+
; RV32-MINSIZE-NEXT: lui a1, %hi(eg2)
33+
; RV32-MINSIZE-NEXT: sw a0, %lo(eg2)(a1)
34+
; RV32-MINSIZE-NEXT: ret
35+
store i32 %a, ptr @ig1, align 4
36+
store i32 %a, ptr @ig2, align 4
37+
store i32 %a, ptr @eg1, align 4
38+
store i32 %a, ptr @eg2, align 4
39+
ret void
40+
}

0 commit comments

Comments
 (0)