Skip to content

Commit 0515e21

Browse files
committed
[GlobalMergeFunctions] Skip No Params
1 parent 93a4244 commit 0515e21

File tree

4 files changed

+85
-23
lines changed

4 files changed

+85
-23
lines changed

llvm/lib/CGData/StableFunctionMap.cpp

Lines changed: 33 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -14,6 +14,7 @@
1414
//===----------------------------------------------------------------------===//
1515

1616
#include "llvm/CGData/StableFunctionMap.h"
17+
#include "llvm/ADT/SmallSet.h"
1718
#include "llvm/Support/CommandLine.h"
1819
#include "llvm/Support/Debug.h"
1920

@@ -35,21 +36,30 @@ static cl::opt<unsigned> GlobalMergingMaxParams(
3536
cl::desc(
3637
"The maximum number of parameters allowed when merging functions."),
3738
cl::init(std::numeric_limits<unsigned>::max()), cl::Hidden);
38-
static cl::opt<unsigned> GlobalMergingParamOverhead(
39+
static cl::opt<bool> GlobalMergingSkipNoParams(
40+
"global-merging-skip-no-params",
41+
cl::desc("Skip merging functions with no parameters."), cl::init(false),
42+
cl::Hidden);
43+
static cl::opt<double> GlobalMergingInstOverhead(
44+
"global-merging-inst-overhead",
45+
cl::desc("The overhead cost associated with each instruction when lowering "
46+
"to machine instruction."),
47+
cl::init(1.0), cl::Hidden);
48+
static cl::opt<double> GlobalMergingParamOverhead(
3949
"global-merging-param-overhead",
4050
cl::desc("The overhead cost associated with each parameter when merging "
4151
"functions."),
42-
cl::init(2), cl::Hidden);
43-
static cl::opt<unsigned>
52+
cl::init(2.0), cl::Hidden);
53+
static cl::opt<double>
4454
GlobalMergingCallOverhead("global-merging-call-overhead",
4555
cl::desc("The overhead cost associated with each "
4656
"function call when merging functions."),
47-
cl::init(1), cl::Hidden);
48-
static cl::opt<unsigned> GlobalMergingExtraThreshold(
57+
cl::init(1.0), cl::Hidden);
58+
static cl::opt<double> GlobalMergingExtraThreshold(
4959
"global-merging-extra-threshold",
5060
cl::desc("An additional cost threshold that must be exceeded for merging "
5161
"to be considered beneficial."),
52-
cl::init(0), cl::Hidden);
62+
cl::init(0.0), cl::Hidden);
5363

5464
unsigned StableFunctionMap::getIdOrCreateForName(StringRef Name) {
5565
auto It = NameToId.find(Name);
@@ -159,22 +169,28 @@ static bool isProfitable(
159169
unsigned InstCount = SFS[0]->InstCount;
160170
if (InstCount < GlobalMergingMinInstrs)
161171
return false;
162-
163-
unsigned ParamCount = SFS[0]->IndexOperandHashMap->size();
164-
if (ParamCount > GlobalMergingMaxParams)
165-
return false;
166-
167-
unsigned Benefit = InstCount * (StableFunctionCount - 1);
168-
unsigned Cost =
169-
(GlobalMergingParamOverhead * ParamCount + GlobalMergingCallOverhead) *
170-
StableFunctionCount +
171-
GlobalMergingExtraThreshold;
172+
double Benefit =
173+
InstCount * (StableFunctionCount - 1) * GlobalMergingInstOverhead;
174+
175+
double Cost = 0.0;
176+
SmallSet<stable_hash, 8> UniqueHashVals;
177+
for (auto &SF : SFS) {
178+
UniqueHashVals.clear();
179+
for (auto &[IndexPair, Hash] : *SF->IndexOperandHashMap)
180+
UniqueHashVals.insert(Hash);
181+
unsigned ParamCount = UniqueHashVals.size();
182+
if (ParamCount > GlobalMergingMaxParams)
183+
return false;
184+
if (GlobalMergingSkipNoParams && ParamCount == 0)
185+
return false;
186+
Cost += ParamCount * GlobalMergingParamOverhead + GlobalMergingCallOverhead;
187+
}
188+
Cost += GlobalMergingExtraThreshold;
172189

173190
bool Result = Benefit > Cost;
174191
LLVM_DEBUG(dbgs() << "isProfitable: Hash = " << SFS[0]->Hash << ", "
175192
<< "StableFunctionCount = " << StableFunctionCount
176193
<< ", InstCount = " << InstCount
177-
<< ", ParamCount = " << ParamCount
178194
<< ", Benefit = " << Benefit << ", Cost = " << Cost
179195
<< ", Result = " << (Result ? "true" : "false") << "\n");
180196
return Result;

llvm/lib/CodeGen/GlobalMergeFunctions.cpp

Lines changed: 6 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -405,12 +405,13 @@ static ParamLocsVecTy computeParamInfo(
405405
}
406406

407407
ParamLocsVecTy ParamLocsVec;
408-
for (auto &[HashSeq, Locs] : HashSeqToLocs) {
408+
for (auto &[HashSeq, Locs] : HashSeqToLocs)
409409
ParamLocsVec.push_back(std::move(Locs));
410-
llvm::sort(ParamLocsVec, [&](const ParamLocs &L, const ParamLocs &R) {
411-
return L[0] < R[0];
412-
});
413-
}
410+
411+
llvm::sort(ParamLocsVec, [&](const ParamLocs &L, const ParamLocs &R) {
412+
return L[0] < R[0];
413+
});
414+
414415
return ParamLocsVec;
415416
}
416417

llvm/test/ThinLTO/AArch64/cgdata-merge-local.ll renamed to llvm/test/CodeGen/Generic/cgdata-merge-local.ll

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,7 @@
22
; while parameterizing a difference in their global variables, g1 and g2.
33
; To achieve this, we create two instances of the global merging function, f1.Tgm and f2.Tgm,
44
; which are tail-called from thunks f1 and f2 respectively.
5-
; These identical functions, f1.Tgm and f2.Tgm, will be folded by the linker via Identical Code Folding (IFC).
5+
; These identical functions, f1.Tgm and f2.Tgm, will be folded by the linker via Identical Code Folding (ICF).
66

77
; RUN: opt -S --passes=global-merge-func %s | FileCheck %s
88

Lines changed: 45 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,45 @@
1+
; This test verifies whether two identical functions, f1 and f2, can be merged
2+
; locally using the global merge function.
3+
; The functions, f1.Tgm and f2.Tgm, will be folded by the linker through
4+
; Identical Code Folding (ICF).
5+
; While identical functions can already be folded by the linker, creating this
6+
; canonical form can be beneficial in downstream passes. This merging process
7+
; can be controlled by the -global-merging-skip-no-params option.
8+
9+
; RUN: llc -enable-global-merge-func=true -global-merging-skip-no-params=false < %s | FileCheck %s --check-prefix=MERGE
10+
; RUN: llc -enable-global-merge-func=true -global-merging-skip-no-params=true < %s | FileCheck %s --check-prefix=NOMERGE
11+
12+
; MERGE: _f1.Tgm
13+
; MERGE: _f2.Tgm
14+
15+
; NOMERGE-NOT: _f1.Tgm
16+
; NOMERGE-NOT: _f2.Tgm
17+
18+
target datalayout = "e-m:o-i64:64-i128:128-n32:64-S128"
19+
target triple = "arm64-unknown-ios12.0.0"
20+
21+
@g = external local_unnamed_addr global [0 x i32], align 4
22+
@g1 = external global i32, align 4
23+
@g2 = external global i32, align 4
24+
25+
define i32 @f1(i32 %a) {
26+
entry:
27+
%idxprom = sext i32 %a to i64
28+
%arrayidx = getelementptr inbounds [0 x i32], [0 x i32]* @g, i64 0, i64 %idxprom
29+
%0 = load i32, i32* %arrayidx, align 4
30+
%1 = load volatile i32, i32* @g1, align 4
31+
%mul = mul nsw i32 %1, %0
32+
%add = add nsw i32 %mul, 1
33+
ret i32 %add
34+
}
35+
36+
define i32 @f2(i32 %a) {
37+
entry:
38+
%idxprom = sext i32 %a to i64
39+
%arrayidx = getelementptr inbounds [0 x i32], [0 x i32]* @g, i64 0, i64 %idxprom
40+
%0 = load i32, i32* %arrayidx, align 4
41+
%1 = load volatile i32, i32* @g1, align 4
42+
%mul = mul nsw i32 %1, %0
43+
%add = add nsw i32 %mul, 1
44+
ret i32 %add
45+
}

0 commit comments

Comments
 (0)