Skip to content

Commit ba29467

Browse files
committed
[AMDGPU][SplitModule] Cleanup CallsExternal Handling (llvm#106528)
- Don't treat inline ASM as indirect calls - Remove call to alias testing, which was broken (only working by pure luck right now) and isn't needed anyway. GlobalOpt should take care of them for us. Change-Id: I7de442dd07b00198e9d01722a7e27601e399b540
1 parent c564e46 commit ba29467

File tree

5 files changed

+121
-75
lines changed

5 files changed

+121
-75
lines changed

llvm/lib/Target/AMDGPU/AMDGPUSplitModule.cpp

Lines changed: 63 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -43,6 +43,7 @@
4343
#include "llvm/Analysis/CallGraph.h"
4444
#include "llvm/Analysis/TargetTransformInfo.h"
4545
#include "llvm/IR/Function.h"
46+
#include "llvm/IR/InstIterator.h"
4647
#include "llvm/IR/Instruction.h"
4748
#include "llvm/IR/Module.h"
4849
#include "llvm/IR/User.h"
@@ -103,6 +104,11 @@ static cl::opt<bool> NoExternalizeGlobals(
103104
cl::desc("disables externalization of global variable with local linkage; "
104105
"may cause globals to be duplicated which increases binary size"));
105106

107+
static cl::opt<bool> NoExternalizeOnAddrTaken(
108+
"amdgpu-module-splitting-no-externalize-address-taken", cl::Hidden,
109+
cl::desc(
110+
"disables externalization of functions whose addresses are taken"));
111+
106112
static cl::opt<std::string>
107113
ModuleDotCfgOutput("amdgpu-module-splitting-print-module-dotcfg",
108114
cl::Hidden,
@@ -482,6 +488,9 @@ void SplitGraph::buildGraph(CallGraph &CG) {
482488
dbgs()
483489
<< "[build graph] constructing graph representation of the input\n");
484490

491+
// FIXME(?): Is the callgraph really worth using if we have to iterate the
492+
// function again whenever it fails to give us enough information?
493+
485494
// We build the graph by just iterating all functions in the module and
486495
// working on their direct callees. At the end, all nodes should be linked
487496
// together as expected.
@@ -492,29 +501,52 @@ void SplitGraph::buildGraph(CallGraph &CG) {
492501
continue;
493502

494503
// Look at direct callees and create the necessary edges in the graph.
495-
bool HasIndirectCall = false;
496-
Node &N = getNode(Cache, Fn);
504+
SetVector<const Function *> DirectCallees;
505+
bool CallsExternal = false;
497506
for (auto &CGEntry : *CG[&Fn]) {
498507
auto *CGNode = CGEntry.second;
499-
auto *Callee = CGNode->getFunction();
500-
if (!Callee) {
501-
// TODO: Don't consider inline assembly as indirect calls.
502-
if (CGNode == CG.getCallsExternalNode())
503-
HasIndirectCall = true;
504-
continue;
505-
}
506-
507-
if (!Callee->isDeclaration())
508-
createEdge(N, getNode(Cache, *Callee), EdgeKind::DirectCall);
508+
if (auto *Callee = CGNode->getFunction()) {
509+
if (!Callee->isDeclaration())
510+
DirectCallees.insert(Callee);
511+
} else if (CGNode == CG.getCallsExternalNode())
512+
CallsExternal = true;
509513
}
510514

511515
// Keep track of this function if it contains an indirect call and/or if it
512516
// can be indirectly called.
513-
if (HasIndirectCall) {
514-
LLVM_DEBUG(dbgs() << "indirect call found in " << Fn.getName() << "\n");
515-
FnsWithIndirectCalls.push_back(&Fn);
517+
if (CallsExternal) {
518+
LLVM_DEBUG(dbgs() << " [!] callgraph is incomplete for ";
519+
Fn.printAsOperand(dbgs());
520+
dbgs() << " - analyzing function\n");
521+
522+
bool HasIndirectCall = false;
523+
for (const auto &Inst : instructions(Fn)) {
524+
// look at all calls without a direct callee.
525+
if (const auto *CB = dyn_cast<CallBase>(&Inst);
526+
CB && !CB->getCalledFunction()) {
527+
// inline assembly can be ignored, unless InlineAsmIsIndirectCall is
528+
// true.
529+
if (CB->isInlineAsm()) {
530+
LLVM_DEBUG(dbgs() << " found inline assembly\n");
531+
continue;
532+
}
533+
534+
// everything else is handled conservatively.
535+
HasIndirectCall = true;
536+
break;
537+
}
538+
}
539+
540+
if (HasIndirectCall) {
541+
LLVM_DEBUG(dbgs() << " indirect call found\n");
542+
FnsWithIndirectCalls.push_back(&Fn);
543+
}
516544
}
517545

546+
Node &N = getNode(Cache, Fn);
547+
for (const auto *Callee : DirectCallees)
548+
createEdge(N, getNode(Cache, *Callee), EdgeKind::DirectCall);
549+
518550
if (canBeIndirectlyCalled(Fn))
519551
IndirectlyCallableFns.push_back(&Fn);
520552
}
@@ -1326,13 +1358,21 @@ static void splitAMDGPUModule(
13261358
//
13271359
// Additionally, it guides partitioning to not duplicate this function if it's
13281360
// called directly at some point.
1329-
for (auto &Fn : M) {
1330-
if (Fn.hasAddressTaken()) {
1331-
if (Fn.hasLocalLinkage()) {
1332-
LLVM_DEBUG(dbgs() << "[externalize] " << Fn.getName()
1333-
<< " because its address is taken\n");
1361+
//
1362+
// TODO: Could we be smarter about this ? This makes all functions whose
1363+
// addresses are taken non-copyable. We should probably model this type of
1364+
// constraint in the graph and use it to guide splitting, instead of
1365+
// externalizing like this. Maybe non-copyable should really mean "keep one
1366+
// visible copy, then internalize all other copies" for some functions?
1367+
if (!NoExternalizeOnAddrTaken) {
1368+
for (auto &Fn : M) {
1369+
// TODO: Should aliases count? Probably not but they're so rare I'm not
1370+
// sure it's worth fixing.
1371+
if (Fn.hasLocalLinkage() && Fn.hasAddressTaken()) {
1372+
LLVM_DEBUG(dbgs() << "[externalize] "; Fn.printAsOperand(dbgs());
1373+
dbgs() << " because its address is taken\n");
1374+
externalize(Fn);
13341375
}
1335-
externalize(Fn);
13361376
}
13371377
}
13381378

@@ -1368,7 +1408,8 @@ static void splitAMDGPUModule(
13681408
dbgs() << "[graph] nodes:\n";
13691409
for (const SplitGraph::Node *N : SG.nodes()) {
13701410
dbgs() << " - [" << N->getID() << "]: " << N->getName() << " "
1371-
<< (N->isGraphEntryPoint() ? "(entry)" : "") << "\n";
1411+
<< (N->isGraphEntryPoint() ? "(entry)" : "") << " "
1412+
<< (N->isNonCopyable() ? "(noncopyable)" : "") << "\n";
13721413
}
13731414
});
13741415

Lines changed: 28 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,28 @@
1+
; REQUIRES: asserts
2+
3+
; RUN: llvm-split -o %t %s -j 2 -mtriple amdgcn-amd-amdhsa -amdgpu-module-splitting-no-externalize-address-taken -debug-only=amdgpu-split-module 2>&1 | FileCheck %s
4+
5+
; CHECK: [!] callgraph is incomplete for ptr @A - analyzing function
6+
; CHECK-NEXT: found inline assembly
7+
; CHECK-NOT: indirect call found
8+
9+
@addrthief = global [2 x ptr] [ptr @HelperA, ptr @HelperB]
10+
11+
define internal void @HelperA() {
12+
ret void
13+
}
14+
15+
define internal void @HelperB() {
16+
ret void
17+
}
18+
19+
define amdgpu_kernel void @A() {
20+
call void asm sideeffect "v_mov_b32 v0, 7", "~{v0}"()
21+
call void @HelperA()
22+
ret void
23+
}
24+
25+
define amdgpu_kernel void @B(ptr %out) {
26+
call void @HelperB()
27+
ret void
28+
}
Lines changed: 30 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,30 @@
1+
; RUN: llvm-split -o %t %s -j 2 -mtriple amdgcn-amd-amdhsa -amdgpu-module-splitting-no-externalize-address-taken
2+
; RUN: llvm-dis -o - %t0 | FileCheck --check-prefix=CHECK0 --implicit-check-not=define %s
3+
; RUN: llvm-dis -o - %t1 | FileCheck --check-prefix=CHECK1 --implicit-check-not=define %s
4+
5+
; CHECK0: define internal void @HelperB
6+
; CHECK0: define amdgpu_kernel void @B
7+
8+
; CHECK1: define internal void @HelperA()
9+
; CHECK1: define amdgpu_kernel void @A()
10+
11+
@addrthief = global [2 x ptr] [ptr @HelperA, ptr @HelperB]
12+
13+
define internal void @HelperA() {
14+
ret void
15+
}
16+
17+
define internal void @HelperB() {
18+
ret void
19+
}
20+
21+
define amdgpu_kernel void @A() {
22+
call void asm sideeffect "v_mov_b32 v0, 7", "~{v0}"()
23+
call void @HelperA()
24+
ret void
25+
}
26+
27+
define amdgpu_kernel void @B(ptr %out) {
28+
call void @HelperB()
29+
ret void
30+
}

llvm/test/tools/llvm-split/AMDGPU/kernels-alias-dependencies.ll

Lines changed: 0 additions & 41 deletions
This file was deleted.

llvm/test/tools/llvm-split/AMDGPU/kernels-dependency-indirect.ll

Lines changed: 0 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -3,18 +3,6 @@
33
; RUN: llvm-dis -o - %t1 | FileCheck --check-prefix=CHECK1 --implicit-check-not=define %s
44
; RUN: llvm-dis -o - %t2 | FileCheck --check-prefix=CHECK2 --implicit-check-not=define %s
55

6-
; We have 4 kernels:
7-
; - Each kernel has an internal helper
8-
; - @A and @B's helpers does an indirect call.
9-
;
10-
; We default to putting A/B in P0, alongside a copy
11-
; of all helpers who have their address taken.
12-
; The other kernels can still go into separate partitions.
13-
;
14-
; Note that dependency discovery shouldn't stop upon finding an
15-
; indirect call. HelperC/D should also end up in P0 as they
16-
; are dependencies of HelperB.
17-
186
; CHECK0: define internal void @HelperD
197
; CHECK0: define amdgpu_kernel void @D
208

0 commit comments

Comments
 (0)