43
43
#include " llvm/Analysis/CallGraph.h"
44
44
#include " llvm/Analysis/TargetTransformInfo.h"
45
45
#include " llvm/IR/Function.h"
46
+ #include " llvm/IR/InstIterator.h"
46
47
#include " llvm/IR/Instruction.h"
47
48
#include " llvm/IR/Module.h"
48
49
#include " llvm/IR/User.h"
@@ -103,6 +104,11 @@ static cl::opt<bool> NoExternalizeGlobals(
103
104
cl::desc (" disables externalization of global variable with local linkage; "
104
105
" may cause globals to be duplicated which increases binary size" ));
105
106
107
+ static cl::opt<bool > NoExternalizeOnAddrTaken (
108
+ " amdgpu-module-splitting-no-externalize-address-taken" , cl::Hidden,
109
+ cl::desc (
110
+ " disables externalization of functions whose addresses are taken" ));
111
+
106
112
static cl::opt<std::string>
107
113
ModuleDotCfgOutput (" amdgpu-module-splitting-print-module-dotcfg" ,
108
114
cl::Hidden,
@@ -482,6 +488,9 @@ void SplitGraph::buildGraph(CallGraph &CG) {
482
488
dbgs ()
483
489
<< " [build graph] constructing graph representation of the input\n " );
484
490
491
+ // FIXME(?): Is the callgraph really worth using if we have to iterate the
492
+ // function again whenever it fails to give us enough information?
493
+
485
494
// We build the graph by just iterating all functions in the module and
486
495
// working on their direct callees. At the end, all nodes should be linked
487
496
// together as expected.
@@ -492,29 +501,52 @@ void SplitGraph::buildGraph(CallGraph &CG) {
492
501
continue ;
493
502
494
503
// Look at direct callees and create the necessary edges in the graph.
495
- bool HasIndirectCall = false ;
496
- Node &N = getNode (Cache, Fn) ;
504
+ SetVector< const Function *> DirectCallees ;
505
+ bool CallsExternal = false ;
497
506
for (auto &CGEntry : *CG[&Fn]) {
498
507
auto *CGNode = CGEntry.second ;
499
- auto *Callee = CGNode->getFunction ();
500
- if (!Callee) {
501
- // TODO: Don't consider inline assembly as indirect calls.
502
- if (CGNode == CG.getCallsExternalNode ())
503
- HasIndirectCall = true ;
504
- continue ;
505
- }
506
-
507
- if (!Callee->isDeclaration ())
508
- createEdge (N, getNode (Cache, *Callee), EdgeKind::DirectCall);
508
+ if (auto *Callee = CGNode->getFunction ()) {
509
+ if (!Callee->isDeclaration ())
510
+ DirectCallees.insert (Callee);
511
+ } else if (CGNode == CG.getCallsExternalNode ())
512
+ CallsExternal = true ;
509
513
}
510
514
511
515
// Keep track of this function if it contains an indirect call and/or if it
512
516
// can be indirectly called.
513
- if (HasIndirectCall) {
514
- LLVM_DEBUG (dbgs () << " indirect call found in " << Fn.getName () << " \n " );
515
- FnsWithIndirectCalls.push_back (&Fn);
517
+ if (CallsExternal) {
518
+ LLVM_DEBUG (dbgs () << " [!] callgraph is incomplete for " ;
519
+ Fn.printAsOperand (dbgs ());
520
+ dbgs () << " - analyzing function\n " );
521
+
522
+ bool HasIndirectCall = false ;
523
+ for (const auto &Inst : instructions (Fn)) {
524
+ // look at all calls without a direct callee.
525
+ if (const auto *CB = dyn_cast<CallBase>(&Inst);
526
+ CB && !CB->getCalledFunction ()) {
527
+ // inline assembly can be ignored, unless InlineAsmIsIndirectCall is
528
+ // true.
529
+ if (CB->isInlineAsm ()) {
530
+ LLVM_DEBUG (dbgs () << " found inline assembly\n " );
531
+ continue ;
532
+ }
533
+
534
+ // everything else is handled conservatively.
535
+ HasIndirectCall = true ;
536
+ break ;
537
+ }
538
+ }
539
+
540
+ if (HasIndirectCall) {
541
+ LLVM_DEBUG (dbgs () << " indirect call found\n " );
542
+ FnsWithIndirectCalls.push_back (&Fn);
543
+ }
516
544
}
517
545
546
+ Node &N = getNode (Cache, Fn);
547
+ for (const auto *Callee : DirectCallees)
548
+ createEdge (N, getNode (Cache, *Callee), EdgeKind::DirectCall);
549
+
518
550
if (canBeIndirectlyCalled (Fn))
519
551
IndirectlyCallableFns.push_back (&Fn);
520
552
}
@@ -1326,13 +1358,21 @@ static void splitAMDGPUModule(
1326
1358
//
1327
1359
// Additionally, it guides partitioning to not duplicate this function if it's
1328
1360
// called directly at some point.
1329
- for (auto &Fn : M) {
1330
- if (Fn.hasAddressTaken ()) {
1331
- if (Fn.hasLocalLinkage ()) {
1332
- LLVM_DEBUG (dbgs () << " [externalize] " << Fn.getName ()
1333
- << " because its address is taken\n " );
1361
+ //
1362
+ // TODO: Could we be smarter about this ? This makes all functions whose
1363
+ // addresses are taken non-copyable. We should probably model this type of
1364
+ // constraint in the graph and use it to guide splitting, instead of
1365
+ // externalizing like this. Maybe non-copyable should really mean "keep one
1366
+ // visible copy, then internalize all other copies" for some functions?
1367
+ if (!NoExternalizeOnAddrTaken) {
1368
+ for (auto &Fn : M) {
1369
+ // TODO: Should aliases count? Probably not but they're so rare I'm not
1370
+ // sure it's worth fixing.
1371
+ if (Fn.hasLocalLinkage () && Fn.hasAddressTaken ()) {
1372
+ LLVM_DEBUG (dbgs () << " [externalize] " ; Fn.printAsOperand (dbgs ());
1373
+ dbgs () << " because its address is taken\n " );
1374
+ externalize (Fn);
1334
1375
}
1335
- externalize (Fn);
1336
1376
}
1337
1377
}
1338
1378
@@ -1368,7 +1408,8 @@ static void splitAMDGPUModule(
1368
1408
dbgs () << " [graph] nodes:\n " ;
1369
1409
for (const SplitGraph::Node *N : SG.nodes ()) {
1370
1410
dbgs () << " - [" << N->getID () << " ]: " << N->getName () << " "
1371
- << (N->isGraphEntryPoint () ? " (entry)" : " " ) << " \n " ;
1411
+ << (N->isGraphEntryPoint () ? " (entry)" : " " ) << " "
1412
+ << (N->isNonCopyable () ? " (noncopyable)" : " " ) << " \n " ;
1372
1413
}
1373
1414
});
1374
1415
0 commit comments