Skip to content

Commit 959448f

Browse files
[Transforms][IPO] Add func suffix in ArgumentPromotion and DeadArgume… (#105742)
…ntElimination ArgumentPromotion and DeadArgumentElimination passes could change function signatures but the function name remains the same as before the transformation. This makes it hard for tracing with bpf programs where user tends to use function signature in the source. See discussion [1] for details. This patch added suffix to functions whose signatures are changed. The suffix lets users know that function signature has changed and they need to impact the IR or binary to find modified signature before tracing those functions. The suffix for ArgumentPromotion is ".argprom" and the suffixes for DeadArgumentElimination are ".argelim" and ".retelim". The suffix also gives user hints about what kind of transformation has been done. With this patch, I built a recent linux kernel with full LTO enabled. I got 4 functions with only argpromotion like ``` set_track_update.argelim.argprom pmd_trans_huge_lock.argprom ... ``` I got 1058 functions with only deadargelim like ``` process_bit0.argelim pci_io_ecs_init.argelim ... ``` I got 3 functions with both argpromotion and deadargelim ``` set_track_update.argelim.argprom zero_pud_populate.argelim.argprom zero_pmd_populate.argelim.argprom ``` [1] #104678
1 parent 30cdf1e commit 959448f

File tree

79 files changed

+342
-263
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

79 files changed

+342
-263
lines changed

llvm/lib/Transforms/IPO/ArgumentPromotion.cpp

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -215,6 +215,7 @@ doPromotion(Function *F, FunctionAnalysisManager &FAM,
215215

216216
F->getParent()->getFunctionList().insert(F->getIterator(), NF);
217217
NF->takeName(F);
218+
NF->setName(NF->getName() + ".argprom");
218219

219220
// Loop over all the callers of the function, transforming the call sites to
220221
// pass in the loaded pointers.

llvm/lib/Transforms/IPO/DeadArgumentElimination.cpp

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -889,6 +889,10 @@ bool DeadArgumentEliminationPass::removeDeadStuffFromFunction(Function *F) {
889889
// it again.
890890
F->getParent()->getFunctionList().insert(F->getIterator(), NF);
891891
NF->takeName(F);
892+
if (NumArgumentsEliminated)
893+
NF->setName(NF->getName() + ".argelim");
894+
else
895+
NF->setName(NF->getName() + ".retelim");
892896
NF->IsNewDbgInfoFormat = F->IsNewDbgInfoFormat;
893897

894898
// Loop over all the callers of the function, transforming the call sites to

llvm/test/Analysis/LazyCallGraph/remove-dead-function-spurious-ref-edge.ll

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -9,15 +9,15 @@ define internal void @a() alwaysinline {
99
}
1010

1111
define internal void @b(ptr) noinline {
12-
; CHECK-LABEL: @b(
12+
; CHECK-LABEL: @b.argprom(
1313
; CHECK-NEXT: ret void
1414
;
1515
ret void
1616
}
1717

1818
define internal void @c() noinline {
1919
; CHECK-LABEL: @c(
20-
; CHECK-NEXT: call void @b()
20+
; CHECK-NEXT: call void @b.argprom()
2121
; CHECK-NEXT: ret void
2222
;
2323
call void @b(ptr @a)

llvm/test/BugPoint/remove_arguments_test.ll

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -11,7 +11,7 @@
1111

1212
declare i32 @test2()
1313

14-
; CHECK: define void @test() {
14+
; CHECK: define void @test.argelim() {
1515
define i32 @test(i32 %A, ptr %B, float %C) {
1616
call i32 @test2()
1717
ret i32 %1

llvm/test/CodeGen/AArch64/arg_promotion.ll

Lines changed: 8 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -38,16 +38,16 @@ define dso_local void @caller_4xi32(ptr noalias %src, ptr noalias %dst) #1 {
3838
; CHECK-LABEL: define dso_local void @caller_4xi32(
3939
; CHECK-NEXT: entry:
4040
; CHECK-NEXT: [[SRC_VAL:%.*]] = load <4 x i32>, ptr [[SRC:%.*]], align 16
41-
; CHECK-NEXT: call fastcc void @callee_4xi32(<4 x i32> [[SRC_VAL]], ptr noalias [[DST:%.*]])
41+
; CHECK-NEXT: call fastcc void @callee_4xi32.argprom.argprom(<4 x i32> [[SRC_VAL]], ptr noalias [[DST:%.*]])
4242
; CHECK-NEXT: ret void
4343
;
4444
entry:
45-
call fastcc void @callee_4xi32(ptr noalias %src, ptr noalias %dst)
45+
call fastcc void @callee_4xi32.argprom(ptr noalias %src, ptr noalias %dst)
4646
ret void
4747
}
4848

49-
define internal fastcc void @callee_4xi32(ptr noalias %src, ptr noalias %dst) #1 {
50-
; CHECK-LABEL: define internal fastcc void @callee_4xi32(
49+
define internal fastcc void @callee_4xi32.argprom(ptr noalias %src, ptr noalias %dst) #1 {
50+
; CHECK-LABEL: define internal fastcc void @callee_4xi32.argprom.argprom(
5151
; CHECK-NEXT: entry:
5252
; CHECK-NEXT: store <4 x i32> [[SRC_0_VAL:%.*]], ptr [[DST:%.*]], align 16
5353
; CHECK-NEXT: ret void
@@ -65,7 +65,7 @@ define dso_local void @caller_i256(ptr noalias %src, ptr noalias %dst) #0 {
6565
; CHECK-LABEL: define dso_local void @caller_i256(
6666
; CHECK-NEXT: entry:
6767
; CHECK-NEXT: [[SRC_VAL:%.*]] = load i256, ptr [[SRC:%.*]], align 16
68-
; CHECK-NEXT: call fastcc void @callee_i256(i256 [[SRC_VAL]], ptr noalias [[DST:%.*]])
68+
; CHECK-NEXT: call fastcc void @callee_i256.argprom(i256 [[SRC_VAL]], ptr noalias [[DST:%.*]])
6969
; CHECK-NEXT: ret void
7070
;
7171
entry:
@@ -74,7 +74,7 @@ entry:
7474
}
7575

7676
define internal fastcc void @callee_i256(ptr noalias %src, ptr noalias %dst) #0 {
77-
; CHECK-LABEL: define internal fastcc void @callee_i256(
77+
; CHECK-LABEL: define internal fastcc void @callee_i256.argprom(
7878
; CHECK-NEXT: entry:
7979
; CHECK-NEXT: store i256 [[SRC_0_VAL:%.*]], ptr [[DST:%.*]], align 16
8080
; CHECK-NEXT: ret void
@@ -159,7 +159,7 @@ define dso_local void @caller_struct4xi32(ptr noalias %src, ptr noalias %dst) #1
159159
; CHECK-NEXT: [[SRC_VAL:%.*]] = load <4 x i32>, ptr [[SRC:%.*]], align 16
160160
; CHECK-NEXT: [[TMP0:%.*]] = getelementptr i8, ptr [[SRC]], i64 16
161161
; CHECK-NEXT: [[SRC_VAL1:%.*]] = load <4 x i32>, ptr [[TMP0]], align 16
162-
; CHECK-NEXT: call fastcc void @callee_struct4xi32(<4 x i32> [[SRC_VAL]], <4 x i32> [[SRC_VAL1]], ptr noalias [[DST:%.*]])
162+
; CHECK-NEXT: call fastcc void @callee_struct4xi32.argprom(<4 x i32> [[SRC_VAL]], <4 x i32> [[SRC_VAL1]], ptr noalias [[DST:%.*]])
163163
; CHECK-NEXT: ret void
164164
;
165165
entry:
@@ -168,7 +168,7 @@ entry:
168168
}
169169

170170
define internal fastcc void @callee_struct4xi32(ptr noalias %src, ptr noalias %dst) #1 {
171-
; CHECK-LABEL: define internal fastcc void @callee_struct4xi32(
171+
; CHECK-LABEL: define internal fastcc void @callee_struct4xi32.argprom(
172172
; CHECK-NEXT: entry:
173173
; CHECK-NEXT: store <4 x i32> [[SRC_0_VAL:%.*]], ptr [[DST:%.*]], align 16
174174
; CHECK-NEXT: [[DST2:%.*]] = getelementptr inbounds [[STRUCT_4XI32:%.*]], ptr [[DST]], i64 0, i32 1

llvm/test/CodeGen/AMDGPU/internalize.ll

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -10,7 +10,7 @@
1010
; ALL: gvar_used
1111
@gvar_used = addrspace(1) global i32 undef, align 4
1212

13-
; OPT: define internal fastcc void @func_used_noinline(
13+
; OPT: define internal fastcc void @func_used_noinline.argelim(
1414
; OPT-NONE: define fastcc void @func_used_noinline(
1515
define fastcc void @func_used_noinline(ptr addrspace(1) %out, i32 %tid) #1 {
1616
entry:

llvm/test/ThinLTO/X86/memprof-aliased-location1.ll

Lines changed: 12 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -84,22 +84,22 @@ attributes #0 = { noinline optnone }
8484
;; The first call to foo does not allocate cold memory. It should call the
8585
;; original functions, which ultimately call the original allocation decorated
8686
;; with a "notcold" attribute.
87-
; IR: call {{.*}} @_Z3foov()
87+
; IR: call {{.*}} @_Z3foov.retelim()
8888
;; The second call to foo allocates cold memory. It should call cloned functions
8989
;; which ultimately call a cloned allocation decorated with a "cold" attribute.
90-
; IR: call {{.*}} @_Z3foov.memprof.1()
91-
; IR: define internal {{.*}} @_Z3barv()
90+
; IR: call {{.*}} @_Z3foov.memprof.1.retelim()
91+
; IR: define internal {{.*}} @_Z3barv.retelim()
9292
; IR: call {{.*}} @_Znam(i64 0) #[[NOTCOLD:[0-9]+]]
93-
; IR: define internal {{.*}} @_Z3bazv()
94-
; IR: call {{.*}} @_Z3barv()
95-
; IR: define internal {{.*}} @_Z3foov()
96-
; IR: call {{.*}} @_Z3bazv()
97-
; IR: define internal {{.*}} @_Z3barv.memprof.1()
93+
; IR: define internal {{.*}} @_Z3bazv.retelim()
94+
; IR: call {{.*}} @_Z3barv.retelim()
95+
; IR: define internal {{.*}} @_Z3foov.retelim()
96+
; IR: call {{.*}} @_Z3bazv.retelim()
97+
; IR: define internal {{.*}} @_Z3barv.memprof.1.retelim()
9898
; IR: call {{.*}} @_Znam(i64 0) #[[COLD:[0-9]+]]
99-
; IR: define internal {{.*}} @_Z3bazv.memprof.1()
100-
; IR: call {{.*}} @_Z3barv.memprof.1()
101-
; IR: define internal {{.*}} @_Z3foov.memprof.1()
102-
; IR: call {{.*}} @_Z3bazv.memprof.1()
99+
; IR: define internal {{.*}} @_Z3bazv.memprof.1.retelim()
100+
; IR: call {{.*}} @_Z3barv.memprof.1.retelim()
101+
; IR: define internal {{.*}} @_Z3foov.memprof.1.retelim()
102+
; IR: call {{.*}} @_Z3bazv.memprof.1.retelim()
103103
; IR: attributes #[[NOTCOLD]] = { "memprof"="notcold" }
104104
; IR: attributes #[[COLD]] = { "memprof"="cold" }
105105

llvm/test/ThinLTO/X86/memprof-aliased-location2.ll

Lines changed: 12 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -84,22 +84,22 @@ attributes #0 = { noinline optnone }
8484
;; The first call to foo does not allocate cold memory. It should call the
8585
;; original functions, which ultimately call the original allocation decorated
8686
;; with a "notcold" attribute.
87-
; IR: call {{.*}} @_Z3foov()
87+
; IR: call {{.*}} @_Z3foov.retelim()
8888
;; The second call to foo allocates cold memory. It should call cloned functions
8989
;; which ultimately call a cloned allocation decorated with a "cold" attribute.
90-
; IR: call {{.*}} @_Z3foov.memprof.1()
91-
; IR: define internal {{.*}} @_Z3barv()
90+
; IR: call {{.*}} @_Z3foov.memprof.1.retelim()
91+
; IR: define internal {{.*}} @_Z3barv.retelim()
9292
; IR: call {{.*}} @_Znam(i64 0) #[[NOTCOLD:[0-9]+]]
93-
; IR: define internal {{.*}} @_Z3bazv()
94-
; IR: call {{.*}} @_Z3barv()
95-
; IR: define internal {{.*}} @_Z3foov()
96-
; IR: call {{.*}} @_Z3bazv()
97-
; IR: define internal {{.*}} @_Z3barv.memprof.1()
93+
; IR: define internal {{.*}} @_Z3bazv.retelim()
94+
; IR: call {{.*}} @_Z3barv.retelim()
95+
; IR: define internal {{.*}} @_Z3foov.retelim()
96+
; IR: call {{.*}} @_Z3bazv.retelim()
97+
; IR: define internal {{.*}} @_Z3barv.memprof.1.retelim()
9898
; IR: call {{.*}} @_Znam(i64 0) #[[COLD:[0-9]+]]
99-
; IR: define internal {{.*}} @_Z3bazv.memprof.1()
100-
; IR: call {{.*}} @_Z3barv.memprof.1()
101-
; IR: define internal {{.*}} @_Z3foov.memprof.1()
102-
; IR: call {{.*}} @_Z3bazv.memprof.1()
99+
; IR: define internal {{.*}} @_Z3bazv.memprof.1.retelim()
100+
; IR: call {{.*}} @_Z3barv.memprof.1.retelim()
101+
; IR: define internal {{.*}} @_Z3foov.memprof.1.retelim()
102+
; IR: call {{.*}} @_Z3bazv.memprof.1.retelim()
103103
; IR: attributes #[[NOTCOLD]] = { "memprof"="notcold" }
104104
; IR: attributes #[[COLD]] = { "memprof"="cold" }
105105

llvm/test/ThinLTO/X86/memprof-basic.ll

Lines changed: 18 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -53,7 +53,7 @@
5353
;; We should have cloned bar, baz, and foo, for the cold memory allocation.
5454
; RUN: cat %t.ccg.cloned.dot | FileCheck %s --check-prefix=DOTCLONED
5555

56-
; RUN: llvm-dis %t.out.1.4.opt.bc -o - | FileCheck %s --check-prefix=IR
56+
; RUN: llvm-dis %t.out.1.4.opt.bc -o - | FileCheck %s --check-prefix=IRNODIST
5757

5858

5959
;; Try again but with distributed ThinLTO
@@ -303,6 +303,23 @@ attributes #0 = { noinline optnone }
303303
; IR: attributes #[[NOTCOLD]] = { "memprof"="notcold" }
304304
; IR: attributes #[[COLD]] = { "memprof"="cold" }
305305

306+
; IRNODIST: define {{.*}} @main
307+
; IRNODIST: call {{.*}} @_Z3foov.retelim()
308+
; IRNODIST: call {{.*}} @_Z3foov.memprof.1.retelim()
309+
; IRNODIST: define internal {{.*}} @_Z3barv.retelim()
310+
; IRNODIST: call {{.*}} @_Znam(i64 0) #[[NOTCOLD:[0-9]+]]
311+
; IRNODIST: define internal {{.*}} @_Z3bazv.retelim()
312+
; IRNODIST: call {{.*}} @_Z3barv.retelim()
313+
; IRNODIST: define internal {{.*}} @_Z3foov.retelim()
314+
; IRNODIST: call {{.*}} @_Z3bazv.retelim()
315+
; IRNODIST: define internal {{.*}} @_Z3barv.memprof.1.retelim()
316+
; IRNODIST: call {{.*}} @_Znam(i64 0) #[[COLD:[0-9]+]]
317+
; IRNODIST: define internal {{.*}} @_Z3bazv.memprof.1.retelim()
318+
; IRNODIST: call {{.*}} @_Z3barv.memprof.1.retelim()
319+
; IRNODIST: define internal {{.*}} @_Z3foov.memprof.1.retelim()
320+
; IRNODIST: call {{.*}} @_Z3bazv.memprof.1.retelim()
321+
; IRNODIST: attributes #[[NOTCOLD]] = { "memprof"="notcold" }
322+
; IRNODIST: attributes #[[COLD]] = { "memprof"="cold" }
306323

307324
; STATS: 1 memprof-context-disambiguation - Number of cold static allocations (possibly cloned)
308325
; STATS-BE: 1 memprof-context-disambiguation - Number of cold static allocations (possibly cloned) during ThinLTO backend

llvm/test/ThinLTO/X86/memprof-duplicate-context-ids.ll

Lines changed: 13 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -68,7 +68,7 @@
6868
; RUN: -o %t.out 2>&1 | FileCheck %s --check-prefix=DUMP \
6969
; RUN: --check-prefix=STATS --check-prefix=STATS-BE --check-prefix=REMARKS
7070

71-
; RUN: llvm-dis %t.out.1.4.opt.bc -o - | FileCheck %s --check-prefix=IR
71+
; RUN: llvm-dis %t.out.1.4.opt.bc -o - | FileCheck %s --check-prefix=IRNODIST
7272

7373

7474
;; Try again but with distributed ThinLTO
@@ -247,6 +247,18 @@ attributes #0 = { noinline optnone}
247247
; IR: attributes #[[NOTCOLD]] = { "memprof"="notcold" }
248248
; IR: attributes #[[COLD]] = { "memprof"="cold" }
249249

250+
; IRNODIST: define internal {{.*}} @_Z1Dv.retelim()
251+
; IRNODIST: call {{.*}} @_Znam(i64 0) #[[NOTCOLD:[0-9]+]]
252+
; IRNODIST: define internal {{.*}} @_Z1Fv.retelim()
253+
; IRNODIST: call {{.*}} @_Z1Dv.retelim()
254+
; IRNODIST: define internal {{.*}} @_Z1Bv.retelim()
255+
; IRNODIST: call {{.*}} @_Z1Dv.memprof.1.retelim()
256+
; IRNODIST: define internal {{.*}} @_Z1Ev.retelim()
257+
; IRNODIST: call {{.*}} @_Z1Dv.memprof.1.retelim()
258+
; IRNODIST: define internal {{.*}} @_Z1Dv.memprof.1.retelim()
259+
; IRNODIST: call {{.*}} @_Znam(i64 0) #[[COLD:[0-9]+]]
260+
; IRNODIST: attributes #[[NOTCOLD]] = { "memprof"="notcold" }
261+
; IRNODIST: attributes #[[COLD]] = { "memprof"="cold" }
250262

251263
; STATS: 1 memprof-context-disambiguation - Number of cold static allocations (possibly cloned)
252264
; STATS-BE: 1 memprof-context-disambiguation - Number of cold static allocations (possibly cloned) during ThinLTO backend

llvm/test/ThinLTO/X86/memprof-funcassigncloning.ll

Lines changed: 18 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -61,7 +61,7 @@
6161
; RUN: -o %t.out 2>&1 | FileCheck %s --check-prefix=DUMP \
6262
; RUN: --check-prefix=STATS --check-prefix=STATS-BE --check-prefix=REMARKS
6363

64-
; RUN: llvm-dis %t.out.1.4.opt.bc -o - | FileCheck %s --check-prefix=IR
64+
; RUN: llvm-dis %t.out.1.4.opt.bc -o - | FileCheck %s --check-prefix=IRNODIST
6565

6666

6767
;; Try again but with distributed ThinLTO
@@ -283,6 +283,23 @@ attributes #0 = { noinline optnone }
283283
; IR: attributes #[[NOTCOLD]] = { "memprof"="notcold" }
284284
; IR: attributes #[[COLD]] = { "memprof"="cold" }
285285

286+
; IRNODIST: define internal {{.*}} @_Z1EPPcS0_.argelim(
287+
; IRNODIST: call {{.*}} @_Znam(i64 noundef 10) #[[NOTCOLD:[0-9]+]]
288+
; IRNODIST: call {{.*}} @_Znam(i64 noundef 10) #[[NOTCOLD]]
289+
; IRNODIST: define internal {{.*}} @_Z1BPPcS0_(
290+
; IRNODIST: call {{.*}} @_Z1EPPcS0_.argelim(
291+
; IRNODIST: define internal {{.*}} @_Z1CPPcS0_(
292+
; IRNODIST: call {{.*}} @_Z1EPPcS0_.memprof.3.argelim(
293+
; IRNODIST: define internal {{.*}} @_Z1DPPcS0_(
294+
; IRNODIST: call {{.*}} @_Z1EPPcS0_.memprof.2.argelim(
295+
; IRNODIST: define internal {{.*}} @_Z1EPPcS0_.memprof.2.argelim(
296+
; IRNODIST: call {{.*}} @_Znam(i64 noundef 10) #[[COLD:[0-9]+]]
297+
; IRNODIST: call {{.*}} @_Znam(i64 noundef 10) #[[NOTCOLD]]
298+
; IRNODIST: define internal {{.*}} @_Z1EPPcS0_.memprof.3.argelim(
299+
; IRNODIST: call {{.*}} @_Znam(i64 noundef 10) #[[NOTCOLD]]
300+
; IRNODIST: call {{.*}} @_Znam(i64 noundef 10) #[[COLD]]
301+
; IRNODIST: attributes #[[NOTCOLD]] = { "memprof"="notcold" }
302+
; IRNODIST: attributes #[[COLD]] = { "memprof"="cold" }
286303

287304
; STATS: 2 memprof-context-disambiguation - Number of cold static allocations (possibly cloned)
288305
; STATS-BE: 2 memprof-context-disambiguation - Number of cold static allocations (possibly cloned) during ThinLTO backend

llvm/test/ThinLTO/X86/memprof-indirectcall.ll

Lines changed: 14 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -74,7 +74,7 @@
7474
;; from main allocating cold memory.
7575
; RUN: cat %t.ccg.cloned.dot | FileCheck %s --check-prefix=DOTCLONED
7676

77-
; RUN: llvm-dis %t.out.1.4.opt.bc -o - | FileCheck %s --check-prefix=IR
77+
; RUN: llvm-dis %t.out.1.4.opt.bc -o - | FileCheck %s --check-prefix=IRNODIST
7878

7979

8080
;; Try again but with distributed ThinLTO
@@ -419,6 +419,19 @@ attributes #0 = { noinline optnone }
419419
; IR: attributes #[[NOTCOLD]] = { "memprof"="notcold" }
420420
; IR: attributes #[[COLD]] = { "memprof"="cold" }
421421

422+
; IRNODIST: define {{.*}} @main(
423+
; IRNODIST: call {{.*}} @_Z3foov.argelim()
424+
; IRNODIST: call {{.*}} @_Z3foov.memprof.1.argelim()
425+
; IRNODIST: call {{.*}} @_Z3barP1A.argelim(
426+
; IRNODIST: call {{.*}} @_Z3barP1A.argelim(
427+
; IRNODIST: call {{.*}} @_Z3barP1A.argelim(
428+
; IRNODIST: call {{.*}} @_Z3barP1A.argelim(
429+
; IRNODIST: define internal {{.*}} @_Z3foov.argelim()
430+
; IRNODIST: call {{.*}} @_Znam(i64 0) #[[NOTCOLD:[0-9]+]]
431+
; IRNODIST: define internal {{.*}} @_Z3foov.memprof.1.argelim()
432+
; IRNODIST: call {{.*}} @_Znam(i64 0) #[[COLD:[0-9]+]]
433+
; IRNODIST: attributes #[[NOTCOLD]] = { "memprof"="notcold" }
434+
; IRNODIST: attributes #[[COLD]] = { "memprof"="cold" }
422435

423436
; STATS: 1 memprof-context-disambiguation - Number of cold static allocations (possibly cloned)
424437
; STATS-BE: 1 memprof-context-disambiguation - Number of cold static allocations (possibly cloned) during ThinLTO backend

llvm/test/ThinLTO/X86/memprof-inlined.ll

Lines changed: 14 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -63,7 +63,7 @@
6363
;; cold memory.
6464
; RUN: cat %t.ccg.cloned.dot | FileCheck %s --check-prefix=DOTCLONED
6565

66-
; RUN: llvm-dis %t.out.1.4.opt.bc -o - | FileCheck %s --check-prefix=IR
66+
; RUN: llvm-dis %t.out.1.4.opt.bc -o - | FileCheck %s --check-prefix=IRNODIST
6767

6868

6969
;; Try again but with distributed ThinLTO
@@ -323,6 +323,19 @@ attributes #0 = { noinline optnone }
323323
; IR: attributes #[[NOTCOLD]] = { "memprof"="notcold" }
324324
; IR: attributes #[[COLD]] = { "memprof"="cold" }
325325

326+
; IRNODIST: define internal {{.*}} @_Z3barv.retelim()
327+
; IRNODIST: call {{.*}} @_Znam(i64 0) #[[NOTCOLD:[0-9]+]]
328+
; IRNODIST: define internal {{.*}} @_Z3foov.retelim()
329+
; IRNODIST: call {{.*}} @_Z3barv.retelim()
330+
; IRNODIST: define {{.*}} @main()
331+
; IRNODIST: call {{.*}} @_Z3foov.retelim()
332+
; IRNODIST: call {{.*}} @_Z3foov.memprof.1.retelim()
333+
; IRNODIST: define internal {{.*}} @_Z3barv.memprof.1.retelim()
334+
; IRNODIST: call {{.*}} @_Znam(i64 0) #[[COLD:[0-9]+]]
335+
; IRNODIST: define internal {{.*}} @_Z3foov.memprof.1.retelim()
336+
; IRNODIST: call {{.*}} @_Z3barv.memprof.1.retelim()
337+
; IRNODIST: attributes #[[NOTCOLD]] = { "memprof"="notcold" }
338+
; IRNODIST: attributes #[[COLD]] = { "memprof"="cold" }
326339

327340
; STATS: 1 memprof-context-disambiguation - Number of cold static allocations (possibly cloned)
328341
; STATS-BE: 1 memprof-context-disambiguation - Number of cold static allocations (possibly cloned) during ThinLTO backend

llvm/test/Transforms/ArgumentPromotion/2008-02-01-ReturnAttrs.ll

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -3,7 +3,7 @@
33
; RUN: cat %t | FileCheck -check-prefix=REMARK %s
44

55
define internal i32 @deref(ptr %x) nounwind {
6-
; CHECK-LABEL: define {{[^@]+}}@deref
6+
; CHECK-LABEL: define {{[^@]+}}@deref.argprom
77
; CHECK-SAME: (i32 [[X_0_VAL:%.*]]) #[[ATTR0:[0-9]+]] {
88
; CHECK-NEXT: entry:
99
; CHECK-NEXT: ret i32 [[X_0_VAL]]
@@ -29,7 +29,7 @@ define i32 @f(i32 %x) {
2929
; CHECK-NEXT: [[X_ADDR:%.*]] = alloca i32, align 4
3030
; CHECK-NEXT: store i32 [[X]], ptr [[X_ADDR]], align 4
3131
; CHECK-NEXT: [[X_ADDR_VAL:%.*]] = load i32, ptr [[X_ADDR]], align 4
32-
; CHECK-NEXT: [[TEMP1:%.*]] = call i32 @deref(i32 [[X_ADDR_VAL]])
32+
; CHECK-NEXT: [[TEMP1:%.*]] = call i32 @deref.argprom(i32 [[X_ADDR_VAL]])
3333
; CHECK-NEXT: ret i32 [[TEMP1]]
3434
;
3535
entry:

llvm/test/Transforms/ArgumentPromotion/BPF/argpromotion.ll

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -85,4 +85,4 @@ entry:
8585
; Without number-of-argument constraint, argpromotion will create a function signature with 5 arguments, which equals
8686
; the maximum number of argument permitted by bpf backend, so argpromotion result code does work.
8787
;
88-
; CHECK: i32 @foo2(i32 %p1.0.val, i32 %p1.4.val, i32 %p2.8.val, i32 %p2.16.val, i32 %p3.20.val)
88+
; CHECK: i32 @foo2.argprom(i32 %p1.0.val, i32 %p1.4.val, i32 %p2.8.val, i32 %p2.16.val, i32 %p3.20.val)

llvm/test/Transforms/ArgumentPromotion/X86/attributes.ll

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -42,7 +42,7 @@ bb:
4242
}
4343

4444
define internal fastcc void @promote_avx2(ptr %arg, ptr readonly %arg1) #0 {
45-
; CHECK-LABEL: define {{[^@]+}}@promote_avx2
45+
; CHECK-LABEL: define {{[^@]+}}@promote_avx2.argprom
4646
; CHECK-SAME: (ptr [[ARG:%.*]], <4 x i64> [[ARG1_VAL:%.*]])
4747
; CHECK-NEXT: bb:
4848
; CHECK-NEXT: store <4 x i64> [[ARG1_VAL]], ptr [[ARG]]
@@ -62,7 +62,7 @@ define void @promote(ptr %arg) #0 {
6262
; CHECK-NEXT: [[TMP2:%.*]] = alloca <4 x i64>, align 32
6363
; CHECK-NEXT: call void @llvm.memset.p0.i64(ptr align 32 [[TMP]], i8 0, i64 32, i1 false)
6464
; CHECK-NEXT: [[TMP_VAL:%.*]] = load <4 x i64>, ptr [[TMP]]
65-
; CHECK-NEXT: call fastcc void @promote_avx2(ptr [[TMP2]], <4 x i64> [[TMP_VAL]])
65+
; CHECK-NEXT: call fastcc void @promote_avx2.argprom(ptr [[TMP2]], <4 x i64> [[TMP_VAL]])
6666
; CHECK-NEXT: [[TMP4:%.*]] = load <4 x i64>, ptr [[TMP2]], align 32
6767
; CHECK-NEXT: store <4 x i64> [[TMP4]], ptr [[ARG]], align 2
6868
; CHECK-NEXT: ret void

0 commit comments

Comments
 (0)