Skip to content

Commit 4a0dc3e

Browse files
authored
[AMDGPU][SplitModule] Handle !callees metadata (#108802)
See #106528 to review the first commit. Handle the `!callees` metadata to further reduce the amount of indirect call cases that end up conservatively assuming that any indirectly callable function is a potential target.
1 parent c01ddbe commit 4a0dc3e

File tree

2 files changed

+117
-15
lines changed

2 files changed

+117
-15
lines changed

llvm/lib/Target/AMDGPU/AMDGPUSplitModule.cpp

Lines changed: 48 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -482,6 +482,29 @@ void SplitGraph::Node::visitAllDependencies(
482482
}
483483
}
484484

485+
/// Checks if \p I has MD_callees and if it does, parse it and put the function
486+
/// in \p Callees.
487+
///
488+
/// \returns true if there was metadata and it was parsed correctly. false if
489+
/// there was no MD or if it contained unknown entries and parsing failed.
490+
/// If this returns false, \p Callees will contain incomplete information
491+
/// and must not be used.
492+
static bool handleCalleesMD(const Instruction &I,
493+
SetVector<Function *> &Callees) {
494+
auto *MD = I.getMetadata(LLVMContext::MD_callees);
495+
if (!MD)
496+
return false;
497+
498+
for (const auto &Op : MD->operands()) {
499+
Function *Callee = mdconst::extract_or_null<Function>(Op);
500+
if (!Callee)
501+
return false;
502+
Callees.insert(Callee);
503+
}
504+
505+
return true;
506+
}
507+
485508
void SplitGraph::buildGraph(CallGraph &CG) {
486509
SplitModuleTimer SMT("buildGraph", "graph construction");
487510
LLVM_DEBUG(
@@ -519,28 +542,38 @@ void SplitGraph::buildGraph(CallGraph &CG) {
519542
Fn.printAsOperand(dbgs());
520543
dbgs() << " - analyzing function\n");
521544

522-
bool HasIndirectCall = false;
545+
SetVector<Function *> KnownCallees;
546+
bool HasUnknownIndirectCall = false;
523547
for (const auto &Inst : instructions(Fn)) {
524548
// look at all calls without a direct callee.
525-
if (const auto *CB = dyn_cast<CallBase>(&Inst);
526-
CB && !CB->getCalledFunction()) {
527-
// inline assembly can be ignored, unless InlineAsmIsIndirectCall is
528-
// true.
529-
if (CB->isInlineAsm()) {
530-
LLVM_DEBUG(dbgs() << " found inline assembly\n");
531-
continue;
532-
}
533-
534-
// everything else is handled conservatively.
535-
HasIndirectCall = true;
536-
break;
549+
const auto *CB = dyn_cast<CallBase>(&Inst);
550+
if (!CB || CB->getCalledFunction())
551+
continue;
552+
553+
// inline assembly can be ignored, unless InlineAsmIsIndirectCall is
554+
// true.
555+
if (CB->isInlineAsm()) {
556+
LLVM_DEBUG(dbgs() << " found inline assembly\n");
557+
continue;
537558
}
559+
560+
if (handleCalleesMD(Inst, KnownCallees))
561+
continue;
562+
// If we failed to parse any !callees MD, or some was missing,
563+
// the entire KnownCallees list is now unreliable.
564+
KnownCallees.clear();
565+
566+
// Everything else is handled conservatively. If we fall into the
567+
// conservative case don't bother analyzing further.
568+
HasUnknownIndirectCall = true;
569+
break;
538570
}
539571

540-
if (HasIndirectCall) {
572+
if (HasUnknownIndirectCall) {
541573
LLVM_DEBUG(dbgs() << " indirect call found\n");
542574
FnsWithIndirectCalls.push_back(&Fn);
543-
}
575+
} else if (!KnownCallees.empty())
576+
DirectCallees.insert(KnownCallees.begin(), KnownCallees.end());
544577
}
545578

546579
Node &N = getNode(Cache, Fn);
Lines changed: 69 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,69 @@
1+
; RUN: sed -s 's/_MD_/, !callees !{ptr @CallCandidate0}/' %s | llvm-split -o %t -j 3 -mtriple amdgcn-amd-amdhsa
2+
; RUN: llvm-dis -o - %t0 | FileCheck --check-prefix=CHECK0 --implicit-check-not=define %s
3+
; RUN: llvm-dis -o - %t1 | FileCheck --check-prefix=CHECK1 --implicit-check-not=define %s
4+
; RUN: llvm-dis -o - %t2 | FileCheck --check-prefix=CHECK2 --implicit-check-not=define %s
5+
6+
; RUN: sed -s 's/_MD_//g' %s | llvm-split -o %t-nomd -j 3 -mtriple amdgcn-amd-amdhsa
7+
; RUN: llvm-dis -o - %t-nomd0 | FileCheck --check-prefix=CHECK-NOMD0 --implicit-check-not=define %s
8+
; RUN: llvm-dis -o - %t-nomd1 | FileCheck --check-prefix=CHECK-NOMD1 --implicit-check-not=define %s
9+
; RUN: llvm-dis -o - %t-nomd2 | FileCheck --check-prefix=CHECK-NOMD2 --implicit-check-not=define %s
10+
11+
; CHECK0: define internal void @HelperC
12+
; CHECK0: define amdgpu_kernel void @C
13+
14+
; CHECK1: define hidden void @CallCandidate1
15+
; CHECK1: define internal void @HelperB
16+
; CHECK1: define amdgpu_kernel void @B
17+
18+
; CHECK2: define internal void @HelperA
19+
; CHECK2: define hidden void @CallCandidate0
20+
; CHECK2: define amdgpu_kernel void @A
21+
22+
; CHECK-NOMD0: define internal void @HelperC
23+
; CHECK-NOMD0: define amdgpu_kernel void @C
24+
25+
; CHECK-NOMD1: define internal void @HelperB
26+
; CHECK-NOMD1: define amdgpu_kernel void @B
27+
28+
; CHECK-NOMD2: define internal void @HelperA
29+
; CHECK-NOMD2: define hidden void @CallCandidate0
30+
; CHECK-NOMD2: define hidden void @CallCandidate1
31+
; CHECK-NOMD2: define amdgpu_kernel void @A
32+
33+
@addrthief = global [2 x ptr] [ptr @CallCandidate0, ptr @CallCandidate1]
34+
35+
define internal void @HelperA(ptr %call) {
36+
call void %call() _MD_
37+
ret void
38+
}
39+
40+
define internal void @CallCandidate0() {
41+
ret void
42+
}
43+
44+
define internal void @CallCandidate1() {
45+
ret void
46+
}
47+
48+
define internal void @HelperB() {
49+
ret void
50+
}
51+
52+
define internal void @HelperC() {
53+
ret void
54+
}
55+
56+
define amdgpu_kernel void @A(ptr %call) {
57+
call void @HelperA(ptr %call)
58+
ret void
59+
}
60+
61+
define amdgpu_kernel void @B() {
62+
call void @HelperB()
63+
ret void
64+
}
65+
66+
define amdgpu_kernel void @C() {
67+
call void @HelperC()
68+
ret void
69+
}

0 commit comments

Comments
 (0)