Skip to content

Commit 12b539e

Browse files
committed
Address comments from tschuett
1 parent ded5771 commit 12b539e

File tree

4 files changed

+51
-51
lines changed

4 files changed

+51
-51
lines changed

llvm/include/llvm/Passes/CodeGenPassBuilder.h

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -74,7 +74,6 @@
7474
#include "llvm/Target/CGPassBuilderOption.h"
7575
#include "llvm/Target/TargetMachine.h"
7676
#include "llvm/Transforms/CFGuard.h"
77-
#include "llvm/Transforms/IPO/GlobalMergeFunctions.h"
7877
#include "llvm/Transforms/Scalar/ConstantHoisting.h"
7978
#include "llvm/Transforms/Scalar/LoopPassManager.h"
8079
#include "llvm/Transforms/Scalar/LoopStrengthReduce.h"

llvm/include/llvm/Transforms/IPO.h

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -55,6 +55,9 @@ enum class PassSummaryAction {
5555
Export, ///< Export information to summary.
5656
};
5757

58+
/// createGlobalMergeFuncPass - This pass generates merged instances by
59+
/// parameterizing distinct constants across similar functions, utilizing stable
60+
/// function hash information.
5861
Pass *createGlobalMergeFuncPass();
5962

6063
} // End llvm namespace

llvm/include/llvm/Transforms/IPO/GlobalMergeFunctions.h

Lines changed: 23 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -5,23 +5,29 @@
55
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
66
//
77
//===----------------------------------------------------------------------===//
8-
///
9-
/// This file defines global merge functions pass and related data structure.
10-
///
8+
//
9+
// This pass defines the implementation of a function merging mechanism
10+
// that utilizes a stable function hash to track differences in constants and
11+
// identify potential merge candidates. The process involves two rounds:
12+
// 1. The first round collects stable function hashes and identifies merge
13+
// candidates with matching hashes. It also computes the set of parameters
14+
// that point to different constants during the stable function merge.
15+
// 2. The second round leverages this collected global function information to
16+
// optimistically create a merged function in each module context, ensuring
17+
// correct transformation.
18+
// Similar to the global outliner, this approach uses the linker's deduplication
19+
// (ICF) to fold identical merged functions, thereby reducing the final binary
20+
// size. The work is inspired by the concepts discussed in the following paper:
21+
// https://dl.acm.org/doi/pdf/10.1145/3652032.3657575.
22+
//
1123
//===----------------------------------------------------------------------===//
1224

13-
#ifndef PIKA_TRANSFORMS_UTILS_GLOBALMERGEFUNCTIONS_H
14-
#define PIKA_TRANSFORMS_UTILS_GLOBALMERGEFUNCTIONS_H
25+
#ifndef LLVM_TRANSFORMS_IPO_GLOBALMERGEFUNCTIONS_H
26+
#define LLVM_TRANSFORMS_IPO_GLOBALMERGEFUNCTIONS_H
1527

16-
#include "llvm/ADT/DenseMap.h"
17-
#include "llvm/ADT/StableHashing.h"
18-
#include "llvm/ADT/StringRef.h"
1928
#include "llvm/CGData/StableFunctionMap.h"
20-
#include "llvm/IR/Instructions.h"
2129
#include "llvm/IR/Module.h"
2230
#include "llvm/Pass.h"
23-
#include <map>
24-
#include <mutex>
2531

2632
enum class HashFunctionMode {
2733
Local,
@@ -36,15 +42,10 @@ namespace llvm {
3642
using ParamLocs = SmallVector<IndexPair, 4>;
3743
// A vector of parameters
3844
using ParamLocsVecTy = SmallVector<ParamLocs, 8>;
39-
// A map of stable hash to a vector of stable functions
40-
41-
/// GlobalMergeFunc finds functions which only differ by constants in
42-
/// certain instructions, e.g. resulting from specialized functions of layout
43-
/// compatible types.
44-
/// Unlike PikaMergeFunc that directly compares IRs, this uses stable function
45-
/// hash to find the merge candidate. Similar to the global outliner, we can run
46-
/// codegen twice to collect function merge candidate in the first round, and
47-
/// merge functions globally in the second round.
45+
46+
/// GlobalMergeFunc is a ModulePass that implements a function merging mechanism
47+
/// using stable function hashes. It identifies and merges functions with
48+
/// matching hashes across modules to optimize binary size.
4849
class GlobalMergeFunc : public ModulePass {
4950
HashFunctionMode MergerMode = HashFunctionMode::Local;
5051

@@ -69,9 +70,9 @@ class GlobalMergeFunc : public ModulePass {
6970
/// Emit LocalFunctionMap into __llvm_merge section.
7071
void emitFunctionMap(Module &M);
7172

72-
/// Merge functions in the module using the global function map.
73+
/// Merge functions in the module using the given function map.
7374
bool merge(Module &M, const StableFunctionMap *FunctionMap);
7475
};
7576

7677
} // end namespace llvm
77-
#endif // PIKA_TRANSFORMS_UTILS_GLOBALMERGEFUNCTIONS_H
78+
#endif // LLVM_TRANSFORMS_IPO_GLOBALMERGEFUNCTIONS_H

llvm/lib/Transforms/IPO/GlobalMergeFunctions.cpp

Lines changed: 25 additions & 28 deletions
Original file line numberDiff line numberDiff line change
@@ -6,26 +6,26 @@
66
//
77
//===----------------------------------------------------------------------===//
88
//
9-
// TODO: This implements a function merge using function hash while tracking
10-
// differences in Constants. This uses stable function hash to find potential
11-
// merge candidates. The first codegen round collects stable function hashes,
12-
// and determines the merge candidates that match the stable function hashes.
13-
// The set of parameters pointing to different Constants are also computed
14-
// during the stable function merge. The second codegen round uses this global
15-
// function info to optimistically create a merged function in each module
16-
// context to guarantee correct transformation. Similar to the global outliner,
17-
// the linker's deduplication (ICF) folds the identical merged functions to save
18-
// the final binary size.
9+
// This pass defines the implementation of a function merging mechanism
10+
// that utilizes a stable function hash to track differences in constants and
11+
// create potential merge candidates. The process involves two rounds:
12+
// 1. The first round collects stable function hashes and identifies merge
13+
// candidates with matching hashes. It also computes the set of parameters
14+
// that point to different constants during the stable function merge.
15+
// 2. The second round leverages this collected global function information to
16+
// optimistically create a merged function in each module context, ensuring
17+
// correct transformation.
18+
// Similar to the global outliner, this approach uses the linker's deduplication
19+
// (ICF) to fold identical merged functions, thereby reducing the final binary
20+
// size. The work is inspired by the concepts discussed in the following paper:
21+
// https://dl.acm.org/doi/pdf/10.1145/3652032.3657575.
1922
//
2023
//===----------------------------------------------------------------------===//
2124

2225
#include "llvm/Transforms/IPO/GlobalMergeFunctions.h"
2326
#include "llvm/ADT/Statistic.h"
2427
#include "llvm/Analysis/ModuleSummaryAnalysis.h"
2528
#include "llvm/CGData/CodeGenData.h"
26-
#include "llvm/CGData/StableFunctionMap.h"
27-
#include "llvm/CodeGen/MachineStableHash.h"
28-
#include "llvm/CodeGen/Passes.h"
2929
#include "llvm/IR/IRBuilder.h"
3030
#include "llvm/IR/StructuralHash.h"
3131
#include "llvm/InitializePasses.h"
@@ -59,7 +59,7 @@ STATISTIC(NumAnalyzedModues, "Number of modules that are analyzed");
5959
STATISTIC(NumAnalyzedFunctions, "Number of functions that are analyzed");
6060
STATISTIC(NumEligibleFunctions, "Number of functions that are eligible");
6161

62-
/// Returns true if the \opIdx operand of \p CI is the callee operand.
62+
/// Returns true if the \OpIdx operand of \p CI is the callee operand.
6363
static bool isCalleeOperand(const CallBase *CI, unsigned OpIdx) {
6464
return &CI->getCalledOperandUse() == &CI->getOperandUse(OpIdx);
6565
}
@@ -123,22 +123,19 @@ bool isEligibleFunction(Function *F) {
123123
if (F->hasFnAttribute(llvm::Attribute::NoMerge))
124124
return false;
125125

126-
if (F->hasAvailableExternallyLinkage()) {
126+
if (F->hasAvailableExternallyLinkage())
127127
return false;
128-
}
129128

130-
if (F->getFunctionType()->isVarArg()) {
129+
if (F->getFunctionType()->isVarArg())
131130
return false;
132-
}
133131

134132
if (F->getCallingConv() == CallingConv::SwiftTail)
135133
return false;
136134

137-
// if function contains callsites with musttail, if we merge
135+
// If function contains callsites with musttail, if we merge
138136
// it, the merged function will have the musttail callsite, but
139137
// the number of parameters can change, thus the parameter count
140138
// of the callsite will mismatch with the function itself.
141-
// if (IgnoreMusttailFunction) {
142139
for (const BasicBlock &BB : *F) {
143140
for (const Instruction &I : BB) {
144141
const auto *CB = dyn_cast<CallBase>(&I);
@@ -180,7 +177,6 @@ static bool ignoreOp(const Instruction *I, unsigned OpIdx) {
180177
return true;
181178
}
182179

183-
// copy from merge functions.cpp
184180
static Value *createCast(IRBuilder<> &Builder, Value *V, Type *DestTy) {
185181
Type *SrcTy = V->getType();
186182
if (SrcTy->isStructTy()) {
@@ -229,7 +225,8 @@ void GlobalMergeFunc::analyze(Module &M) {
229225

230226
auto FI = llvm::StructuralHashWithDifferences(Func, ignoreOp);
231227

232-
// Convert the map to a vector for a serialization-friendly format.
228+
// Convert the operand map to a vector for a serialization-friendly
229+
// format.
233230
IndexOperandHashVecType IndexOperandHashes;
234231
for (auto &Pair : *FI.IndexOperandHashMap)
235232
IndexOperandHashes.emplace_back(Pair);
@@ -539,7 +536,7 @@ bool GlobalMergeFunc::merge(Module &M, const StableFunctionMap *FunctionMap) {
539536
// This module check is not strictly necessary as the functions can move
540537
// around. We just want to avoid merging functions from different
541538
// modules than the first one in the functon map, as they may not end up
542-
// with not being ICFed.
539+
// with not being ICFed by the linker.
543540
if (MergedModId != *FunctionMap->getNameForId(SF->ModuleNameId)) {
544541
++NumMismatchedModuleIdGlobalMergeFunction;
545542
continue;
@@ -560,12 +557,12 @@ bool GlobalMergeFunc::merge(Module &M, const StableFunctionMap *FunctionMap) {
560557
dbgs() << "[GlobalMergeFunc] Merging function count " << FuncMergeInfoSize
561558
<< " in " << ModId << "\n";
562559
});
560+
563561
for (auto &FMI : FuncMergeInfos) {
564562
Changed = true;
565563

566564
// We've already validated all locations of constant operands pointed by
567-
// the parameters. Just use the first one to bookkeep the original
568-
// constants for each parameter
565+
// the parameters. Populate parameters pointing to the original constants.
569566
SmallVector<Constant *> Params;
570567
SmallVector<Type *> ParamTypes;
571568
for (auto &ParamLocs : ParamLocsVec) {
@@ -577,8 +574,7 @@ bool GlobalMergeFunc::merge(Module &M, const StableFunctionMap *FunctionMap) {
577574
ParamTypes.push_back(Opnd->getType());
578575
}
579576

580-
// Create a merged function derived from the first function in the current
581-
// module context.
577+
// Create a merged function derived from the current function.
582578
Function *MergedFunc =
583579
createMergedFunction(FMI, ParamTypes, ParamLocsVec);
584580

@@ -589,7 +585,8 @@ bool GlobalMergeFunc::merge(Module &M, const StableFunctionMap *FunctionMap) {
589585
MergedFunc->dump();
590586
});
591587

592-
// Create a thunk to the merged function.
588+
// Transform the current function into a thunk that calls the merged
589+
// function.
593590
createThunk(FMI, Params, MergedFunc);
594591
LLVM_DEBUG({
595592
dbgs() << "[GlobalMergeFunc] Thunk generated: \n";

0 commit comments

Comments
 (0)