Skip to content

[AMDGPU][SplitModule] Do not create empty modules #135761

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 3 commits into from
Apr 25, 2025
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 3 additions & 0 deletions llvm/include/llvm/Target/TargetMachine.h
Original file line number Diff line number Diff line change
Expand Up @@ -451,6 +451,9 @@ class TargetMachine {
/// Entry point for module splitting. Targets can implement custom module
/// splitting logic, mainly used by LTO for --lto-partitions.
///
/// On success, this guarantees that between 1 and \p NumParts modules were
/// created and passed to \p ModuleCallBack.
///
/// \returns `true` if the module was split, `false` otherwise. When `false`
/// is returned, it is assumed that \p ModuleCallback has never been called
/// and \p M has not been modified.
Expand Down
17 changes: 15 additions & 2 deletions llvm/lib/Target/AMDGPU/AMDGPUSplitModule.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1478,6 +1478,10 @@ static void splitAMDGPUModule(
<< "' - Partition summaries will not be printed\n";
}

// One module will import all GlobalValues that are not Functions
// and are not subject to conservative import.
bool ImportAllGVs = true;

for (unsigned PID = 0; PID < NumParts; ++PID) {
SplitModuleTimer SMT2("modules_creation",
"creating modules for each partition");
Expand All @@ -1487,6 +1491,13 @@ static void splitAMDGPUModule(
for (unsigned NodeID : (*Proposal)[PID].set_bits())
FnsInPart.insert(&SG.getNode(NodeID).getFunction());

// Don't create empty modules.
if (FnsInPart.empty()) {
LLVM_DEBUG(dbgs() << "[split] P" << PID
<< " is empty, not creating module\n");
continue;
}

Comment on lines 1491 to +1500
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Suggested change
for (unsigned NodeID : (*Proposal)[PID].set_bits())
FnsInPart.insert(&SG.getNode(NodeID).getFunction());
// Don't create empty modules.
if (FnsInPart.empty()) {
LLVM_DEBUG(dbgs() << "[split] P" << PID
<< " is empty, not creating module\n");
continue;
}
// Don't create empty modules.
if ((*Proposal)[PID].none()) {
LLVM_DEBUG(dbgs() << "[split] P" << PID
<< " is empty, not creating module\n");
continue;
}
for (unsigned NodeID : (*Proposal)[PID].set_bits())
FnsInPart.insert(&SG.getNode(NodeID).getFunction());

ValueToValueMapTy VMap;
CostType PartCost = 0;
std::unique_ptr<Module> MPart(
Expand All @@ -1500,10 +1511,12 @@ static void splitAMDGPUModule(
return false;
}

// Everything else goes in the first partition.
return needsConservativeImport(GV) || PID == 0;
// Everything else goes in the first non-empty module we create.
return ImportAllGVs || needsConservativeImport(GV);
}));

ImportAllGVs = false;

// FIXME: Aliases aren't seen often, and their handling isn't perfect so
// bugs are possible.

Expand Down
Original file line number Diff line number Diff line change
@@ -1,7 +1,6 @@
; RUN: llvm-split -o %t %s -j 3 -mtriple amdgcn-amd-amdhsa -amdgpu-module-splitting-large-threshold=0
; RUN: llvm-dis -o - %t0 | FileCheck --check-prefix=CHECK0 --implicit-check-not=define %s
; RUN: llvm-dis -o - %t1 | FileCheck --check-prefix=CHECK1 --implicit-check-not=define %s
; RUN: llvm-dis -o - %t2 | FileCheck --check-prefix=CHECK2 --implicit-check-not=define %s

; 3 kernels:
; - A does a direct call to HelperA
Expand All @@ -11,14 +10,11 @@
; The helper functions will get externalized, so C/A will end up
; in the same partition.

; P0 is empty.
; CHECK0: declare
; CHECK0: define amdgpu_kernel void @B(ptr %dst)

; CHECK1: define amdgpu_kernel void @B(ptr %dst)

; CHECK2: define hidden void @HelperA()
; CHECK2: define amdgpu_kernel void @A()
; CHECK2: define amdgpu_kernel void @C()
; CHECK1: define hidden void @HelperA()
; CHECK1: define amdgpu_kernel void @A()
; CHECK1: define amdgpu_kernel void @C()

define internal void @HelperA() {
ret void
Expand Down
22 changes: 10 additions & 12 deletions llvm/test/tools/llvm-split/AMDGPU/large-kernels-merging-weak_odr.ll
Original file line number Diff line number Diff line change
@@ -1,7 +1,6 @@
; RUN: llvm-split -o %t %s -j 3 -mtriple amdgcn-amd-amdhsa -amdgpu-module-splitting-max-depth=0 -amdgpu-module-splitting-large-threshold=1.2 -amdgpu-module-splitting-merge-threshold=0.5
; RUN: llvm-dis -o - %t0 | FileCheck --check-prefix=CHECK0 --implicit-check-not=define %s
; RUN: llvm-dis -o - %t1 | FileCheck --check-prefix=CHECK1 --implicit-check-not=define %s
; RUN: llvm-dis -o - %t2 | FileCheck --check-prefix=CHECK2 --implicit-check-not=define %s

; RUN: llvm-split -o %t.nolarge %s -j 3 -mtriple amdgcn-amd-amdhsa -amdgpu-module-splitting-large-threshold=0 -amdgpu-module-splitting-max-depth=0
; RUN: llvm-dis -o - %t.nolarge0 | FileCheck --check-prefix=NOLARGEKERNELS-CHECK0 --implicit-check-not=define %s
Expand All @@ -15,19 +14,18 @@
; Also check w/o large kernels processing to verify they are indeed handled
; differently.

; P0 is empty
; CHECK0: declare
; Only two partitions created for the first command.

; CHECK1: define internal void @HelperC()
; CHECK1: define weak_odr amdgpu_kernel void @C
; CHECK0: define internal void @HelperC()
; CHECK0: define weak_odr amdgpu_kernel void @C

; CHECK2: define internal void @large2()
; CHECK2: define internal void @large1()
; CHECK2: define internal void @large0()
; CHECK2: define internal void @HelperA()
; CHECK2: define internal void @HelperB()
; CHECK2: define amdgpu_kernel void @A
; CHECK2: define weak_odr amdgpu_kernel void @B
; CHECK1: define internal void @large2()
; CHECK1: define internal void @large1()
; CHECK1: define internal void @large0()
; CHECK1: define internal void @HelperA()
; CHECK1: define internal void @HelperB()
; CHECK1: define amdgpu_kernel void @A
; CHECK1: define weak_odr amdgpu_kernel void @B

; NOLARGEKERNELS-CHECK0: define internal void @HelperC()
; NOLARGEKERNELS-CHECK0: define weak_odr amdgpu_kernel void @C
Expand Down
22 changes: 10 additions & 12 deletions llvm/test/tools/llvm-split/AMDGPU/large-kernels-merging.ll
Original file line number Diff line number Diff line change
@@ -1,7 +1,6 @@
; RUN: llvm-split -o %t %s -j 3 -mtriple amdgcn-amd-amdhsa -amdgpu-module-splitting-max-depth=0 -amdgpu-module-splitting-large-threshold=1.2 -amdgpu-module-splitting-merge-threshold=0.5
; RUN: llvm-dis -o - %t0 | FileCheck --check-prefix=CHECK0 --implicit-check-not=define %s
; RUN: llvm-dis -o - %t1 | FileCheck --check-prefix=CHECK1 --implicit-check-not=define %s
; RUN: llvm-dis -o - %t2 | FileCheck --check-prefix=CHECK2 --implicit-check-not=define %s

; RUN: llvm-split -o %t.nolarge %s -j 3 -mtriple amdgcn-amd-amdhsa -amdgpu-module-splitting-large-threshold=0 -amdgpu-module-splitting-max-depth=0
; RUN: llvm-dis -o - %t.nolarge0 | FileCheck --check-prefix=NOLARGEKERNELS-CHECK0 --implicit-check-not=define %s
Expand All @@ -15,19 +14,18 @@
; Also check w/o large kernels processing to verify they are indeed handled
; differently.

; P0 is empty
; CHECK0: declare
; Only 2 partitions for the first command.

; CHECK1: define internal void @HelperC()
; CHECK1: define amdgpu_kernel void @C
; CHECK0: define internal void @HelperC()
; CHECK0: define amdgpu_kernel void @C

; CHECK2: define internal void @large2()
; CHECK2: define internal void @large1()
; CHECK2: define internal void @large0()
; CHECK2: define internal void @HelperA()
; CHECK2: define internal void @HelperB()
; CHECK2: define amdgpu_kernel void @A
; CHECK2: define amdgpu_kernel void @B
; CHECK1: define internal void @large2()
; CHECK1: define internal void @large1()
; CHECK1: define internal void @large0()
; CHECK1: define internal void @HelperA()
; CHECK1: define internal void @HelperB()
; CHECK1: define amdgpu_kernel void @A
; CHECK1: define amdgpu_kernel void @B

; NOLARGEKERNELS-CHECK0: define internal void @HelperC()
; NOLARGEKERNELS-CHECK0: define amdgpu_kernel void @C
Expand Down
21 changes: 21 additions & 0 deletions llvm/test/tools/llvm-split/AMDGPU/preserve-globals.ll
Original file line number Diff line number Diff line change
@@ -0,0 +1,21 @@
; RUN: llvm-split -o %t %s -j 3 -mtriple amdgcn-amd-amdhsa -amdgpu-module-splitting-max-depth=0 -amdgpu-module-splitting-large-threshold=1.2 -amdgpu-module-splitting-merge-threshold=0.5
; RUN: llvm-dis -o - %t0 | FileCheck --check-prefix=CHECK0 --implicit-check-not=define %s
; RUN: llvm-dis -o - %t1 | FileCheck --check-prefix=CHECK1 --implicit-check-not=define %s

; Only 2 out of 3 partitions are created, check the external global is preserved in the first partition.

; CHECK0: @foobar = linkonce_odr global i64 52
; CHECK0: define amdgpu_kernel void @B

; CHECK1-NOT: @foobar = linkonce_odr global i64 52
; CHECK1: define amdgpu_kernel void @A

@foobar = linkonce_odr global i64 52

define amdgpu_kernel void @A() {
ret void
}

define amdgpu_kernel void @B() {
ret void
}
Loading