Skip to content

Commit ce1b653

Browse files
committed
[Utils] Identity map global debug info on first use in CloneFunction*
Summary: To avoid cloning 'global' debug info, CloneFunction implementation used to eagerly identity map a known subset of global debug into into ValueMap's MD map. In larger modules with meaningful volume of debug info this gets very expensive. By passing such global metadata via an IdentityMD set for the ValueMapper to map on first use, we get several benefits: 1. Mapping metadata is not cheap, particularly because of tracking. When cloning a Function we identity map lots of global module-level metadata to avoid cloning it, while only a fraction of it is actually used by the function. Mapping on first use is a lot faster for modules with meaningful amount of debug info. 2. Eagerly identity mapping metadata makes it harder to cache module-level data (e.g. a set of metadata nodes in a \a DICompileUnit). With this patch we can cache certain module-level metadata calculations to speed things up further. Anecdata from compiling a sample cpp file with full debug info shows that this moderately speeds up CoroSplitPass which is one of the heavier users of cloning: | | Baseline | IdentityMD set | |-----------------+----------+----------------| | CoroSplitPass | 306ms | 221ms | | CoroCloner | 101ms | 72ms | |-----------------+----------+----------------| | Speed up | 1x | 1.4x | Test Plan: ninja check-llvm-unit ninja check-llvm stack-info: PR: #118627, branch: users/artempyanykh/fast-coro-upstream/8
1 parent 51e786e commit ce1b653

File tree

4 files changed

+103
-61
lines changed

4 files changed

+103
-61
lines changed

llvm/include/llvm/Transforms/Utils/Cloning.h

Lines changed: 10 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -192,7 +192,8 @@ void CloneFunctionAttributesInto(Function *NewFunc, const Function *OldFunc,
192192
void CloneFunctionMetadataInto(Function *NewFunc, const Function *OldFunc,
193193
ValueToValueMapTy &VMap, RemapFlags RemapFlag,
194194
ValueMapTypeRemapper *TypeMapper = nullptr,
195-
ValueMaterializer *Materializer = nullptr);
195+
ValueMaterializer *Materializer = nullptr,
196+
const MetadataSetTy *IdentityMD = nullptr);
196197

197198
/// Clone OldFunc's body into NewFunc.
198199
void CloneFunctionBodyInto(Function *NewFunc, const Function *OldFunc,
@@ -201,7 +202,8 @@ void CloneFunctionBodyInto(Function *NewFunc, const Function *OldFunc,
201202
const char *NameSuffix = "",
202203
ClonedCodeInfo *CodeInfo = nullptr,
203204
ValueMapTypeRemapper *TypeMapper = nullptr,
204-
ValueMaterializer *Materializer = nullptr);
205+
ValueMaterializer *Materializer = nullptr,
206+
const MetadataSetTy *IdentityMD = nullptr);
205207

206208
void CloneAndPruneIntoFromInst(Function *NewFunc, const Function *OldFunc,
207209
const Instruction *StartingInst,
@@ -241,13 +243,12 @@ DISubprogram *CollectDebugInfoForCloning(const Function &F,
241243
CloneFunctionChangeType Changes,
242244
DebugInfoFinder &DIFinder);
243245

244-
/// Build a map of debug info to use during Metadata cloning.
245-
/// Returns true if cloning would need module level changes and false if there
246-
/// would only be local changes.
247-
bool BuildDebugInfoMDMap(DenseMap<const Metadata *, TrackingMDRef> &MD,
248-
CloneFunctionChangeType Changes,
249-
DebugInfoFinder &DIFinder,
250-
DISubprogram *SPClonedWithinModule);
246+
/// Based on \p Changes and \p DIFinder populate \p MD with debug info that
247+
/// needs to be identity mapped during Metadata cloning.
248+
void FindDebugInfoToIdentityMap(MetadataSetTy &MD,
249+
CloneFunctionChangeType Changes,
250+
DebugInfoFinder &DIFinder,
251+
DISubprogram *SPClonedWithinModule);
251252

252253
/// This class captures the data input to the InlineFunction call, and records
253254
/// the auxiliary results produced by it.

llvm/include/llvm/Transforms/Utils/ValueMapper.h

Lines changed: 49 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -15,6 +15,7 @@
1515
#define LLVM_TRANSFORMS_UTILS_VALUEMAPPER_H
1616

1717
#include "llvm/ADT/ArrayRef.h"
18+
#include "llvm/ADT/SmallPtrSet.h"
1819
#include "llvm/ADT/simple_ilist.h"
1920
#include "llvm/IR/ValueHandle.h"
2021
#include "llvm/IR/ValueMap.h"
@@ -35,6 +36,7 @@ class Value;
3536

3637
using ValueToValueMapTy = ValueMap<const Value *, WeakTrackingVH>;
3738
using DbgRecordIterator = simple_ilist<DbgRecord>::iterator;
39+
using MetadataSetTy = SmallPtrSet<const Metadata *, 16>;
3840

3941
/// This is a class that can be implemented by clients to remap types when
4042
/// cloning constants and instructions.
@@ -136,6 +138,18 @@ inline RemapFlags operator|(RemapFlags LHS, RemapFlags RHS) {
136138
/// alternate \a ValueToValueMapTy and \a ValueMaterializer and returns a ID to
137139
/// pass into the schedule*() functions.
138140
///
141+
/// NOTE: \c IdentityMD is used by CloneFunction* to directly specify metadata
142+
/// that should be identity mapped (and hence not cloned). The metadata will be
143+
/// identity mapped in \c VM on first use. There are several reasons for doing
144+
/// it this way rather than eagerly identity mapping metadata nodes in \c VM:
145+
/// 1. Mapping metadata is not cheap, particularly because of tracking.
146+
/// 2. When cloning a Function we identity map lots of global module-level
147+
/// metadata to avoid cloning it, while only a fraction of it is actually
148+
/// used by the function. Mapping on first use is a lot faster for modules
149+
/// with meaningful amount of debug info.
150+
/// 3. Eagerly identity mapping metadata makes it harder to cache module-level
151+
/// data (e.g. a set of metadata nodes in a \a DICompileUnit).
152+
///
139153
/// TODO: lib/Linker really doesn't need the \a ValueHandle in the \a
140154
/// ValueToValueMapTy. We should template \a ValueMapper (and its
141155
/// implementation classes), and explicitly instantiate on two concrete
@@ -152,7 +166,8 @@ class ValueMapper {
152166
public:
153167
ValueMapper(ValueToValueMapTy &VM, RemapFlags Flags = RF_None,
154168
ValueMapTypeRemapper *TypeMapper = nullptr,
155-
ValueMaterializer *Materializer = nullptr);
169+
ValueMaterializer *Materializer = nullptr,
170+
const MetadataSetTy *IdentityMD = nullptr);
156171
ValueMapper(ValueMapper &&) = delete;
157172
ValueMapper(const ValueMapper &) = delete;
158173
ValueMapper &operator=(ValueMapper &&) = delete;
@@ -218,8 +233,10 @@ class ValueMapper {
218233
inline Value *MapValue(const Value *V, ValueToValueMapTy &VM,
219234
RemapFlags Flags = RF_None,
220235
ValueMapTypeRemapper *TypeMapper = nullptr,
221-
ValueMaterializer *Materializer = nullptr) {
222-
return ValueMapper(VM, Flags, TypeMapper, Materializer).mapValue(*V);
236+
ValueMaterializer *Materializer = nullptr,
237+
const MetadataSetTy *IdentityMD = nullptr) {
238+
return ValueMapper(VM, Flags, TypeMapper, Materializer, IdentityMD)
239+
.mapValue(*V);
223240
}
224241

225242
/// Lookup or compute a mapping for a piece of metadata.
@@ -231,7 +248,9 @@ inline Value *MapValue(const Value *V, ValueToValueMapTy &VM,
231248
/// \c MD.
232249
/// 3. Else if \c MD is a \a ConstantAsMetadata, call \a MapValue() and
233250
/// re-wrap its return (returning nullptr on nullptr).
234-
/// 4. Else, \c MD is an \a MDNode. These are remapped, along with their
251+
/// 4. Else if \c MD is in \c IdentityMD then add an identity mapping for it
252+
/// and return it.
253+
/// 5. Else, \c MD is an \a MDNode. These are remapped, along with their
235254
/// transitive operands. Distinct nodes are duplicated or moved depending
236255
/// on \a RF_MoveDistinctNodes. Uniqued nodes are remapped like constants.
237256
///
@@ -240,16 +259,20 @@ inline Value *MapValue(const Value *V, ValueToValueMapTy &VM,
240259
inline Metadata *MapMetadata(const Metadata *MD, ValueToValueMapTy &VM,
241260
RemapFlags Flags = RF_None,
242261
ValueMapTypeRemapper *TypeMapper = nullptr,
243-
ValueMaterializer *Materializer = nullptr) {
244-
return ValueMapper(VM, Flags, TypeMapper, Materializer).mapMetadata(*MD);
262+
ValueMaterializer *Materializer = nullptr,
263+
const MetadataSetTy *IdentityMD = nullptr) {
264+
return ValueMapper(VM, Flags, TypeMapper, Materializer, IdentityMD)
265+
.mapMetadata(*MD);
245266
}
246267

247268
/// Version of MapMetadata with type safety for MDNode.
248269
inline MDNode *MapMetadata(const MDNode *MD, ValueToValueMapTy &VM,
249270
RemapFlags Flags = RF_None,
250271
ValueMapTypeRemapper *TypeMapper = nullptr,
251-
ValueMaterializer *Materializer = nullptr) {
252-
return ValueMapper(VM, Flags, TypeMapper, Materializer).mapMDNode(*MD);
272+
ValueMaterializer *Materializer = nullptr,
273+
const MetadataSetTy *IdentityMD = nullptr) {
274+
return ValueMapper(VM, Flags, TypeMapper, Materializer, IdentityMD)
275+
.mapMDNode(*MD);
253276
}
254277

255278
/// Convert the instruction operands from referencing the current values into
@@ -263,17 +286,21 @@ inline MDNode *MapMetadata(const MDNode *MD, ValueToValueMapTy &VM,
263286
inline void RemapInstruction(Instruction *I, ValueToValueMapTy &VM,
264287
RemapFlags Flags = RF_None,
265288
ValueMapTypeRemapper *TypeMapper = nullptr,
266-
ValueMaterializer *Materializer = nullptr) {
267-
ValueMapper(VM, Flags, TypeMapper, Materializer).remapInstruction(*I);
289+
ValueMaterializer *Materializer = nullptr,
290+
const MetadataSetTy *IdentityMD = nullptr) {
291+
ValueMapper(VM, Flags, TypeMapper, Materializer, IdentityMD)
292+
.remapInstruction(*I);
268293
}
269294

270295
/// Remap the Values used in the DbgRecord \a DR using the value map \a
271296
/// VM.
272297
inline void RemapDbgRecord(Module *M, DbgRecord *DR, ValueToValueMapTy &VM,
273298
RemapFlags Flags = RF_None,
274299
ValueMapTypeRemapper *TypeMapper = nullptr,
275-
ValueMaterializer *Materializer = nullptr) {
276-
ValueMapper(VM, Flags, TypeMapper, Materializer).remapDbgRecord(M, *DR);
300+
ValueMaterializer *Materializer = nullptr,
301+
const MetadataSetTy *IdentityMD = nullptr) {
302+
ValueMapper(VM, Flags, TypeMapper, Materializer, IdentityMD)
303+
.remapDbgRecord(M, *DR);
277304
}
278305

279306
/// Remap the Values used in the DbgRecords \a Range using the value map \a
@@ -283,8 +310,9 @@ inline void RemapDbgRecordRange(Module *M,
283310
ValueToValueMapTy &VM,
284311
RemapFlags Flags = RF_None,
285312
ValueMapTypeRemapper *TypeMapper = nullptr,
286-
ValueMaterializer *Materializer = nullptr) {
287-
ValueMapper(VM, Flags, TypeMapper, Materializer)
313+
ValueMaterializer *Materializer = nullptr,
314+
const MetadataSetTy *IdentityMD = nullptr) {
315+
ValueMapper(VM, Flags, TypeMapper, Materializer, IdentityMD)
288316
.remapDbgRecordRange(M, Range);
289317
}
290318

@@ -297,16 +325,19 @@ inline void RemapDbgRecordRange(Module *M,
297325
inline void RemapFunction(Function &F, ValueToValueMapTy &VM,
298326
RemapFlags Flags = RF_None,
299327
ValueMapTypeRemapper *TypeMapper = nullptr,
300-
ValueMaterializer *Materializer = nullptr) {
301-
ValueMapper(VM, Flags, TypeMapper, Materializer).remapFunction(F);
328+
ValueMaterializer *Materializer = nullptr,
329+
const MetadataSetTy *IdentityMD = nullptr) {
330+
ValueMapper(VM, Flags, TypeMapper, Materializer, IdentityMD).remapFunction(F);
302331
}
303332

304333
/// Version of MapValue with type safety for Constant.
305334
inline Constant *MapValue(const Constant *V, ValueToValueMapTy &VM,
306335
RemapFlags Flags = RF_None,
307336
ValueMapTypeRemapper *TypeMapper = nullptr,
308-
ValueMaterializer *Materializer = nullptr) {
309-
return ValueMapper(VM, Flags, TypeMapper, Materializer).mapConstant(*V);
337+
ValueMaterializer *Materializer = nullptr,
338+
const MetadataSetTy *IdentityMD = nullptr) {
339+
return ValueMapper(VM, Flags, TypeMapper, Materializer, IdentityMD)
340+
.mapConstant(*V);
310341
}
311342

312343
} // end namespace llvm

llvm/lib/Transforms/Utils/CloneFunction.cpp

Lines changed: 29 additions & 30 deletions
Original file line numberDiff line numberDiff line change
@@ -152,64 +152,57 @@ DISubprogram *llvm::CollectDebugInfoForCloning(const Function &F,
152152
return SPClonedWithinModule;
153153
}
154154

155-
bool llvm::BuildDebugInfoMDMap(DenseMap<const Metadata *, TrackingMDRef> &MD,
156-
CloneFunctionChangeType Changes,
157-
DebugInfoFinder &DIFinder,
158-
DISubprogram *SPClonedWithinModule) {
159-
bool ModuleLevelChanges = Changes > CloneFunctionChangeType::LocalChangesOnly;
155+
void llvm::FindDebugInfoToIdentityMap(MetadataSetTy &MD,
156+
CloneFunctionChangeType Changes,
157+
DebugInfoFinder &DIFinder,
158+
DISubprogram *SPClonedWithinModule) {
160159
if (Changes < CloneFunctionChangeType::DifferentModule &&
161160
DIFinder.subprogram_count() > 0) {
162-
// Turn on module-level changes, since we need to clone (some of) the
163-
// debug info metadata.
161+
// Even if Changes are local only, we turn on module-level changes, since we
162+
// need to clone (some of) the debug info metadata.
164163
//
165164
// FIXME: Metadata effectively owned by a function should be made
166165
// local, and only that local metadata should be cloned.
167-
ModuleLevelChanges = true;
168-
169-
auto mapToSelfIfNew = [&MD](MDNode *N) {
170-
// Avoid clobbering an existing mapping.
171-
(void)MD.try_emplace(N, N);
172-
};
173166

174167
// Avoid cloning types, compile units, and (other) subprograms.
175168
for (DISubprogram *ISP : DIFinder.subprograms()) {
176169
if (ISP != SPClonedWithinModule)
177-
mapToSelfIfNew(ISP);
170+
MD.insert(ISP);
178171
}
179172

180173
// If a subprogram isn't going to be cloned skip its lexical blocks as well.
181174
for (DIScope *S : DIFinder.scopes()) {
182175
auto *LScope = dyn_cast<DILocalScope>(S);
183176
if (LScope && LScope->getSubprogram() != SPClonedWithinModule)
184-
mapToSelfIfNew(S);
177+
MD.insert(S);
185178
}
186179

187180
for (DICompileUnit *CU : DIFinder.compile_units())
188-
mapToSelfIfNew(CU);
181+
MD.insert(CU);
189182

190183
for (DIType *Type : DIFinder.types())
191-
mapToSelfIfNew(Type);
184+
MD.insert(Type);
192185
} else {
193186
assert(!SPClonedWithinModule &&
194187
"Subprogram should be in DIFinder->subprogram_count()...");
195188
}
196-
197-
return ModuleLevelChanges;
198189
}
199190

200191
void llvm::CloneFunctionMetadataInto(Function *NewFunc, const Function *OldFunc,
201192
ValueToValueMapTy &VMap,
202193
RemapFlags RemapFlag,
203194
ValueMapTypeRemapper *TypeMapper,
204-
ValueMaterializer *Materializer) {
195+
ValueMaterializer *Materializer,
196+
const MetadataSetTy *IdentityMD) {
205197
// Duplicate the metadata that is attached to the cloned function.
206198
// Subprograms/CUs/types that were already mapped to themselves won't be
207199
// duplicated.
208200
SmallVector<std::pair<unsigned, MDNode *>, 1> MDs;
209201
OldFunc->getAllMetadata(MDs);
210202
for (auto MD : MDs) {
211-
NewFunc->addMetadata(MD.first, *MapMetadata(MD.second, VMap, RemapFlag,
212-
TypeMapper, Materializer));
203+
NewFunc->addMetadata(MD.first,
204+
*MapMetadata(MD.second, VMap, RemapFlag, TypeMapper,
205+
Materializer, IdentityMD));
213206
}
214207
}
215208

@@ -219,7 +212,8 @@ void llvm::CloneFunctionBodyInto(Function *NewFunc, const Function *OldFunc,
219212
const char *NameSuffix,
220213
ClonedCodeInfo *CodeInfo,
221214
ValueMapTypeRemapper *TypeMapper,
222-
ValueMaterializer *Materializer) {
215+
ValueMaterializer *Materializer,
216+
const MetadataSetTy *IdentityMD) {
223217
if (OldFunc->isDeclaration())
224218
return;
225219

@@ -260,9 +254,10 @@ void llvm::CloneFunctionBodyInto(Function *NewFunc, const Function *OldFunc,
260254
// Loop over all instructions, fixing each one as we find it, and any
261255
// attached debug-info records.
262256
for (Instruction &II : *BB) {
263-
RemapInstruction(&II, VMap, RemapFlag, TypeMapper, Materializer);
257+
RemapInstruction(&II, VMap, RemapFlag, TypeMapper, Materializer,
258+
IdentityMD);
264259
RemapDbgRecordRange(II.getModule(), II.getDbgRecordRange(), VMap,
265-
RemapFlag, TypeMapper, Materializer);
260+
RemapFlag, TypeMapper, Materializer, IdentityMD);
266261
}
267262
}
268263

@@ -324,16 +319,20 @@ void llvm::CloneFunctionInto(Function *NewFunc, const Function *OldFunc,
324319
DISubprogram *SPClonedWithinModule =
325320
CollectDebugInfoForCloning(*OldFunc, Changes, DIFinder);
326321

327-
ModuleLevelChanges =
328-
BuildDebugInfoMDMap(VMap.MD(), Changes, DIFinder, SPClonedWithinModule);
322+
MetadataSetTy IdentityMD;
323+
FindDebugInfoToIdentityMap(IdentityMD, Changes, DIFinder,
324+
SPClonedWithinModule);
329325

330-
const auto RemapFlag = ModuleLevelChanges ? RF_None : RF_NoModuleLevelChanges;
326+
// Current implementation always upgrades from local changes to module level
327+
// changes due to the way metadata cloning is done. See
328+
// BuildDebugInfoToIdentityMap for more details.
329+
const auto RemapFlag = RF_None;
331330

332331
CloneFunctionMetadataInto(NewFunc, OldFunc, VMap, RemapFlag, TypeMapper,
333-
Materializer);
332+
Materializer, &IdentityMD);
334333

335334
CloneFunctionBodyInto(NewFunc, OldFunc, VMap, RemapFlag, Returns, NameSuffix,
336-
CodeInfo, TypeMapper, Materializer);
335+
CodeInfo, TypeMapper, Materializer, &IdentityMD);
337336

338337
// Only update !llvm.dbg.cu for DifferentModule (not CloneModule). In the
339338
// same module, the compile unit will already be listed (or not). When

llvm/lib/Transforms/Utils/ValueMapper.cpp

Lines changed: 15 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -120,12 +120,14 @@ class Mapper {
120120
SmallVector<WorklistEntry, 4> Worklist;
121121
SmallVector<DelayedBasicBlock, 1> DelayedBBs;
122122
SmallVector<Constant *, 16> AppendingInits;
123+
const MetadataSetTy *IdentityMD;
123124

124125
public:
125126
Mapper(ValueToValueMapTy &VM, RemapFlags Flags,
126-
ValueMapTypeRemapper *TypeMapper, ValueMaterializer *Materializer)
127+
ValueMapTypeRemapper *TypeMapper, ValueMaterializer *Materializer,
128+
const MetadataSetTy *IdentityMD)
127129
: Flags(Flags), TypeMapper(TypeMapper),
128-
MCs(1, MappingContext(VM, Materializer)) {}
130+
MCs(1, MappingContext(VM, Materializer)), IdentityMD(IdentityMD) {}
129131

130132
/// ValueMapper should explicitly call \a flush() before destruction.
131133
~Mapper() { assert(!hasWorkToDo() && "Expected to be flushed"); }
@@ -899,6 +901,14 @@ std::optional<Metadata *> Mapper::mapSimpleMetadata(const Metadata *MD) {
899901
return wrapConstantAsMetadata(*CMD, mapValue(CMD->getValue()));
900902
}
901903

904+
// Map metadata from IdentityMD on first use. We need to add these nodes to
905+
// the mapping as otherwise metadata nodes numbering gets messed up. This is
906+
// still economical because the amount of data in IdentityMD may be a lot
907+
// larger than what will actually get used.
908+
if (IdentityMD && IdentityMD->contains(MD)) {
909+
return getVM().MD()[MD] = TrackingMDRef(const_cast<Metadata *>(MD));
910+
}
911+
902912
assert(isa<MDNode>(MD) && "Expected a metadata node");
903913

904914
return std::nullopt;
@@ -1198,8 +1208,9 @@ class FlushingMapper {
11981208

11991209
ValueMapper::ValueMapper(ValueToValueMapTy &VM, RemapFlags Flags,
12001210
ValueMapTypeRemapper *TypeMapper,
1201-
ValueMaterializer *Materializer)
1202-
: pImpl(new Mapper(VM, Flags, TypeMapper, Materializer)) {}
1211+
ValueMaterializer *Materializer,
1212+
const MetadataSetTy *IdentityMD)
1213+
: pImpl(new Mapper(VM, Flags, TypeMapper, Materializer, IdentityMD)) {}
12031214

12041215
ValueMapper::~ValueMapper() { delete getAsMapper(pImpl); }
12051216

0 commit comments

Comments
 (0)