Skip to content

Commit cc3657f

Browse files
Pierre-vhchencha3
authored andcommitted
(Reland) [AMDGPU] Run LowerLDS at the end of the fullLTO pipeline (llvm#85626)
Reland of llvm#75333
1 parent 01ee0b1 commit cc3657f

File tree

2 files changed

+56
-0
lines changed

2 files changed

+56
-0
lines changed

llvm/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -793,6 +793,15 @@ void AMDGPUTargetMachine::registerPassBuilderCallbacks(
793793

794794
PM.addPass(createCGSCCToFunctionPassAdaptor(std::move(FPM)));
795795
});
796+
797+
PB.registerFullLinkTimeOptimizationLastEPCallback(
798+
[this](ModulePassManager &PM, OptimizationLevel Level) {
799+
// We want to support the -lto-partitions=N option as "best effort".
800+
// For that, we need to lower LDS earlier in the pipeline before the
801+
// module is partitioned for codegen.
802+
if (EnableLowerModuleLDS)
803+
PM.addPass(AMDGPULowerModuleLDSPass(*this));
804+
});
796805
}
797806

798807
int64_t AMDGPUTargetMachine::getNullPointerValue(unsigned AddrSpace) {
Lines changed: 47 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,47 @@
1+
2+
; Default O0
3+
; RUN: opt -mtriple=amdgcn-- -mcpu=gfx1030 %s -o %t.bc
4+
; RUN: llvm-lto2 run -O0 -cg-opt-level 0 %t.bc -o %t.s -r %t.bc,test,px -debug-pass-manager -debug-pass=Structure 2>&1 | FileCheck %s
5+
6+
; Unified O0
7+
; RUN: opt -unified-lto -thinlto-split-lto-unit -thinlto-bc -mtriple=amdgcn-- -mcpu=gfx1030 %s -o %t.bc
8+
; RUN: llvm-lto2 run -unified-lto=full -O0 -cg-opt-level 0 %t.bc -o %t.s -r %t.bc,test,px -debug-pass-manager -debug-pass=Structure 2>&1 | FileCheck %s
9+
10+
; Default O1
11+
; RUN: opt -mtriple=amdgcn-- -mcpu=gfx1030 %s -o %t.bc
12+
; RUN: llvm-lto2 run -O1 -cg-opt-level 1 %t.bc -o %t.s -r %t.bc,test,px -debug-pass-manager -debug-pass=Structure 2>&1 | FileCheck %s
13+
14+
; Unified O1
15+
; RUN: opt -unified-lto -thinlto-split-lto-unit -thinlto-bc -mtriple=amdgcn-- -mcpu=gfx1030 %s -o %t.bc
16+
; RUN: llvm-lto2 run -unified-lto=full -O1 -cg-opt-level 1 %t.bc -o %t.s -r %t.bc,test,px -debug-pass-manager -debug-pass=Structure 2>&1 | FileCheck %s
17+
18+
; Default O2
19+
; RUN: opt -mtriple=amdgcn-- -mcpu=gfx1030 %s -o %t.bc
20+
; RUN: llvm-lto2 run -O2 -cg-opt-level 2 %t.bc -o %t.s -r %t.bc,test,px -debug-pass-manager -debug-pass=Structure 2>&1 | FileCheck %s
21+
22+
; Unified O2
23+
; RUN: opt -unified-lto -thinlto-split-lto-unit -thinlto-bc -mtriple=amdgcn-- -mcpu=gfx1030 %s -o %t.bc
24+
; RUN: llvm-lto2 run -unified-lto=full -O2 -cg-opt-level 2 %t.bc -o %t.s -r %t.bc,test,px -debug-pass-manager -debug-pass=Structure 2>&1 | FileCheck %s
25+
26+
; Default O3
27+
; RUN: opt -mtriple=amdgcn-- -mcpu=gfx1030 %s -o %t.bc
28+
; RUN: llvm-lto2 run -O3 -cg-opt-level 3 %t.bc -o %t.s -r %t.bc,test,px -debug-pass-manager -debug-pass=Structure 2>&1 | FileCheck %s
29+
30+
; Unified O3
31+
; RUN: opt -unified-lto -thinlto-split-lto-unit -thinlto-bc -mtriple=amdgcn-- -mcpu=gfx1030 %s -o %t.bc
32+
; RUN: llvm-lto2 run -unified-lto=full -O3 -cg-opt-level 3 %t.bc -o %t.s -r %t.bc,test,px -debug-pass-manager -debug-pass=Structure 2>&1 | FileCheck %s
33+
34+
; First print will be from the New PM during the full LTO pipeline.
35+
; Second print will be from the legacy PM during the CG pipeline.
36+
37+
; CHECK: Running pass: AMDGPULowerModuleLDSPass on [module]
38+
; CHECK: ModulePass Manager
39+
; CHECK: Lower uses of LDS variables from non-kernel functions
40+
41+
@lds = internal unnamed_addr addrspace(3) global i32 poison, align 4
42+
43+
define amdgpu_kernel void @test() {
44+
entry:
45+
store i32 1, ptr addrspace(3) @lds
46+
ret void
47+
}

0 commit comments

Comments
 (0)