Skip to content

Commit cd5d0aa

Browse files
Pierre-vhAlexisPerry
authored andcommitted
[AMDGPU][SplitModule] Allow non-kernels to be treated as roots (llvm#95902)
I initially assumed only kernels could be roots, but that is wrong. A function with no callers also needs to be a root to ensure it is correctly handled. They're very rare because we usually internalize everything, and internal functions with no callers would be deleted. When they are present, we need to also consider their dependencies and act accordingly. Previously, we could put a function "by default" in P0, but it could call another function with internal linkage defined in another module which was of course incorrect. Fixes SWDEV-467695
1 parent ee4f78f commit cd5d0aa

7 files changed

+294
-112
lines changed

llvm/lib/Target/AMDGPU/AMDGPUSplitModule.cpp

Lines changed: 138 additions & 108 deletions
Large diffs are not rendered by default.

llvm/test/tools/llvm-split/AMDGPU/address-taken-externalize-with-call.ll

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
; RUN: llvm-split -o %t %s -j 3 -mtriple amdgcn-amd-amdhsa -amdgpu-module-splitting-large-kernel-threshold=0
1+
; RUN: llvm-split -o %t %s -j 3 -mtriple amdgcn-amd-amdhsa -amdgpu-module-splitting-large-function-threshold=0
22
; RUN: llvm-dis -o - %t0 | FileCheck --check-prefix=CHECK0 %s
33
; RUN: llvm-dis -o - %t1 | FileCheck --check-prefix=CHECK1 %s
44
; RUN: llvm-dis -o - %t2 | FileCheck --check-prefix=CHECK2 %s

llvm/test/tools/llvm-split/AMDGPU/address-taken-externalize.ll

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
; RUN: llvm-split -o %t %s -j 2 -mtriple amdgcn-amd-amdhsa -amdgpu-module-splitting-large-kernel-threshold=0
1+
; RUN: llvm-split -o %t %s -j 2 -mtriple amdgcn-amd-amdhsa -amdgpu-module-splitting-large-function-threshold=0
22
; RUN: llvm-dis -o - %t0 | FileCheck --check-prefix=CHECK0 %s
33
; RUN: llvm-dis -o - %t1 | FileCheck --check-prefix=CHECK1 %s
44

Lines changed: 36 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,36 @@
1+
; RUN: llvm-split -o %t %s -j 2 -mtriple amdgcn-amd-amdhsa -debug 2>&1 | FileCheck %s --implicit-check-not="[root]"
2+
; REQUIRES: asserts
3+
4+
; func_3 is never directly called, it needs to be considered
5+
; as a root to handle this module correctly.
6+
7+
; CHECK: [root] kernel_1
8+
; CHECK-NEXT: [dependency] func_1
9+
; CHECK-NEXT: [dependency] func_2
10+
; CHECK-NEXT: [root] func_3
11+
; CHECK-NEXT: [dependency] func_2
12+
13+
define amdgpu_kernel void @kernel_1() {
14+
entry:
15+
call void @func_1()
16+
ret void
17+
}
18+
19+
define linkonce_odr hidden void @func_1() {
20+
entry:
21+
%call = call i32 @func_2()
22+
ret void
23+
}
24+
25+
define linkonce_odr hidden i32 @func_2() #0 {
26+
entry:
27+
ret i32 0
28+
}
29+
30+
define void @func_3() {
31+
entry:
32+
%call = call i32 @func_2()
33+
ret void
34+
}
35+
36+
attributes #0 = { noinline optnone }

llvm/test/tools/llvm-split/AMDGPU/large-kernels-merging.ll

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,9 +1,9 @@
1-
; RUN: llvm-split -o %t %s -j 3 -mtriple amdgcn-amd-amdhsa -amdgpu-module-splitting-large-kernel-threshold=1.2 -amdgpu-module-splitting-large-kernel-merge-overlap=0.5
1+
; RUN: llvm-split -o %t %s -j 3 -mtriple amdgcn-amd-amdhsa -amdgpu-module-splitting-large-function-threshold=1.2 -amdgpu-module-splitting-large-function-merge-overlap=0.5
22
; RUN: llvm-dis -o - %t0 | FileCheck --check-prefix=CHECK0 %s
33
; RUN: llvm-dis -o - %t1 | FileCheck --check-prefix=CHECK1 %s
44
; RUN: llvm-dis -o - %t2 | FileCheck --check-prefix=CHECK2 %s
55

6-
; RUN: llvm-split -o %t.nolarge %s -j 3 -mtriple amdgcn-amd-amdhsa -amdgpu-module-splitting-large-kernel-threshold=0
6+
; RUN: llvm-split -o %t.nolarge %s -j 3 -mtriple amdgcn-amd-amdhsa -amdgpu-module-splitting-large-function-threshold=0
77
; RUN: llvm-dis -o - %t.nolarge0 | FileCheck --check-prefix=NOLARGEKERNELS-CHECK0 %s
88
; RUN: llvm-dis -o - %t.nolarge1 | FileCheck --check-prefix=NOLARGEKERNELS-CHECK1 %s
99
; RUN: llvm-dis -o - %t.nolarge2 | FileCheck --check-prefix=NOLARGEKERNELS-CHECK2 %s
Lines changed: 44 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,44 @@
1+
; RUN: llvm-split -o %t %s -j 3 -mtriple amdgcn-amd-amdhsa
2+
; RUN: llvm-dis -o - %t0 | FileCheck --check-prefix=CHECK0 --implicit-check-not=DEFINE %s
3+
; RUN: llvm-dis -o - %t1 | FileCheck --check-prefix=CHECK1 --implicit-check-not=DEFINE %s
4+
; RUN: llvm-dis -o - %t2 | FileCheck --check-prefix=CHECK2 --implicit-check-not=DEFINE %s
5+
6+
; 3 functions with each their own dependencies should go into 3
7+
; distinct partitions.
8+
9+
; CHECK0: define void @C
10+
; CHECK0: define internal void @HelperC
11+
12+
; CHECK1: define void @B
13+
; CHECK1: define internal void @HelperB
14+
15+
; CHECK2: define void @A
16+
; CHECK2: define internal void @HelperA
17+
18+
19+
define void @A() {
20+
call void @HelperA()
21+
ret void
22+
}
23+
24+
define internal void @HelperA() {
25+
ret void
26+
}
27+
28+
define void @B() {
29+
call void @HelperB()
30+
ret void
31+
}
32+
33+
define internal void @HelperB() {
34+
ret void
35+
}
36+
37+
define void @C() {
38+
call void @HelperC()
39+
ret void
40+
}
41+
42+
define internal void @HelperC() {
43+
ret void
44+
}
Lines changed: 72 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,72 @@
1+
; RUN: llvm-split -o %t %s -j 3 -mtriple amdgcn-amd-amdhsa
2+
; RUN: llvm-dis -o - %t0 | FileCheck --check-prefix=CHECK0 --implicit-check-not=DEFINE %s
3+
; RUN: llvm-dis -o - %t1 | FileCheck --check-prefix=CHECK1 --implicit-check-not=DEFINE %s
4+
; RUN: llvm-dis -o - %t2 | FileCheck --check-prefix=CHECK2 --implicit-check-not=DEFINE %s
5+
6+
; We have 4 function:
7+
; - Each function has an internal helper
8+
; - @A and @B's helpers does an indirect call.
9+
;
10+
; For non-kernels, indirect calls shouldn't matter, so
11+
; @CallCandidate doesn't have to be in A/B's partition, unlike
12+
; in the corresponding tests for kernels where it has to.
13+
14+
; CHECK0: define hidden void @HelperA
15+
; CHECK0: define hidden void @HelperB
16+
; CHECK0: define internal void @HelperC
17+
; CHECK0: define internal void @HelperD
18+
; CHECK0: define void @A
19+
; CHECK0: define void @B
20+
21+
; CHECK1: define internal void @HelperD
22+
; CHECK1: define void @D
23+
24+
; CHECK2: define hidden void @CallCandidate
25+
; CHECK2: define internal void @HelperC
26+
; CHECK2: define void @C
27+
28+
@addrthief = global [3 x ptr] [ptr @HelperA, ptr @HelperB, ptr @CallCandidate]
29+
30+
define internal void @HelperA(ptr %call) {
31+
call void %call()
32+
ret void
33+
}
34+
35+
define internal void @HelperB(ptr %call) {
36+
call void @HelperC()
37+
call void %call()
38+
call void @HelperD()
39+
ret void
40+
}
41+
42+
define internal void @CallCandidate() {
43+
ret void
44+
}
45+
46+
define internal void @HelperC() {
47+
ret void
48+
}
49+
50+
define internal void @HelperD() {
51+
ret void
52+
}
53+
54+
define void @A(ptr %call) {
55+
call void @HelperA(ptr %call)
56+
ret void
57+
}
58+
59+
define void @B(ptr %call) {
60+
call void @HelperB(ptr %call)
61+
ret void
62+
}
63+
64+
define void @C() {
65+
call void @HelperC()
66+
ret void
67+
}
68+
69+
define void @D() {
70+
call void @HelperD()
71+
ret void
72+
}

0 commit comments

Comments
 (0)