Skip to content

[RFC][Transforms][IPO] Add func suffix in ArgumentPromotion and DeadArgumentElimination #109899

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Open
wants to merge 1 commit into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 2 additions & 2 deletions compiler-rt/test/cfi/stats.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -26,12 +26,12 @@ extern "C" __attribute__((noinline)) void nvcall(A *a) {
}

extern "C" __attribute__((noinline)) A *dcast(A *a) {
// CHECK: stats.cpp:[[@LINE+1]] {{_?}}dcast cfi-derived-cast 24
// CHECK: stats.cpp:[[@LINE+1]] {{_?}}dcast.retelim cfi-derived-cast 24
return (A *)(ABase *)a;
}

extern "C" __attribute__((noinline)) A *ucast(A *a) {
// CHECK: stats.cpp:[[@LINE+1]] {{_?}}ucast cfi-unrelated-cast 81
// CHECK: stats.cpp:[[@LINE+1]] {{_?}}ucast.retelim cfi-unrelated-cast 81
return (A *)(char *)a;
}

Expand Down
1 change: 1 addition & 0 deletions llvm/lib/Transforms/IPO/ArgumentPromotion.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -215,6 +215,7 @@ doPromotion(Function *F, FunctionAnalysisManager &FAM,

F->getParent()->getFunctionList().insert(F->getIterator(), NF);
NF->takeName(F);
NF->setName(NF->getName() + ".argprom");

// Loop over all the callers of the function, transforming the call sites to
// pass in the loaded pointers.
Expand Down
4 changes: 4 additions & 0 deletions llvm/lib/Transforms/IPO/DeadArgumentElimination.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -889,6 +889,10 @@ bool DeadArgumentEliminationPass::removeDeadStuffFromFunction(Function *F) {
// it again.
F->getParent()->getFunctionList().insert(F->getIterator(), NF);
NF->takeName(F);
if (NumArgumentsEliminated)
NF->setName(NF->getName() + ".argelim");
else
NF->setName(NF->getName() + ".retelim");
NF->IsNewDbgInfoFormat = F->IsNewDbgInfoFormat;

// Loop over all the callers of the function, transforming the call sites to
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -9,15 +9,15 @@ define internal void @a() alwaysinline {
}

define internal void @b(ptr) noinline {
; CHECK-LABEL: @b(
; CHECK-LABEL: @b.argprom(
; CHECK-NEXT: ret void
;
ret void
}

define internal void @c() noinline {
; CHECK-LABEL: @c(
; CHECK-NEXT: call void @b()
; CHECK-NEXT: call void @b.argprom()
; CHECK-NEXT: ret void
;
call void @b(ptr @a)
Expand Down
2 changes: 1 addition & 1 deletion llvm/test/BugPoint/remove_arguments_test.ll
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@

declare i32 @test2()

; CHECK: define void @test() {
; CHECK: define void @test.argelim() {
define i32 @test(i32 %A, ptr %B, float %C) {
call i32 @test2()
ret i32 %1
Expand Down
16 changes: 8 additions & 8 deletions llvm/test/CodeGen/AArch64/arg_promotion.ll
Original file line number Diff line number Diff line change
Expand Up @@ -38,16 +38,16 @@ define dso_local void @caller_4xi32(ptr noalias %src, ptr noalias %dst) #1 {
; CHECK-LABEL: define dso_local void @caller_4xi32(
; CHECK-NEXT: entry:
; CHECK-NEXT: [[SRC_VAL:%.*]] = load <4 x i32>, ptr [[SRC:%.*]], align 16
; CHECK-NEXT: call fastcc void @callee_4xi32(<4 x i32> [[SRC_VAL]], ptr noalias [[DST:%.*]])
; CHECK-NEXT: call fastcc void @callee_4xi32.argprom.argprom(<4 x i32> [[SRC_VAL]], ptr noalias [[DST:%.*]])
; CHECK-NEXT: ret void
;
entry:
call fastcc void @callee_4xi32(ptr noalias %src, ptr noalias %dst)
call fastcc void @callee_4xi32.argprom(ptr noalias %src, ptr noalias %dst)
ret void
}

define internal fastcc void @callee_4xi32(ptr noalias %src, ptr noalias %dst) #1 {
; CHECK-LABEL: define internal fastcc void @callee_4xi32(
define internal fastcc void @callee_4xi32.argprom(ptr noalias %src, ptr noalias %dst) #1 {
; CHECK-LABEL: define internal fastcc void @callee_4xi32.argprom.argprom(
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Can you change it to avoid adding cascading suffixes? This gets a little verbose and potentially even harder for e.g. profile tooling that tries to ignore suffixes.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I did this for two reasons. First, gcc has cascading suffixes, e.g. when I compiled llvm with gcc, I got the following:

_ZN5clang19RecursiveASTVisitorIN12_GLOBAL__N_119PluralMisuseChecker13MethodCrawlerEE14TraverseIfStmtEPNS_6IfStmtEPN4llvm15SmallVectorImplINS7_14PointerIntPairIPNS_4StmtELj1EbNS7_21PointerLikeTypeTraitsISB_EENS7_18PointerIntPairInfoISB_Lj1ESD_EEEEEE.part.0.constprop.0.isra.0

Second, cascading the suffix can give a hint what signature-changing transformation has done so it would be easier for people to find the changed func signature.

; CHECK-NEXT: entry:
; CHECK-NEXT: store <4 x i32> [[SRC_0_VAL:%.*]], ptr [[DST:%.*]], align 16
; CHECK-NEXT: ret void
Expand All @@ -65,7 +65,7 @@ define dso_local void @caller_i256(ptr noalias %src, ptr noalias %dst) #0 {
; CHECK-LABEL: define dso_local void @caller_i256(
; CHECK-NEXT: entry:
; CHECK-NEXT: [[SRC_VAL:%.*]] = load i256, ptr [[SRC:%.*]], align 16
; CHECK-NEXT: call fastcc void @callee_i256(i256 [[SRC_VAL]], ptr noalias [[DST:%.*]])
; CHECK-NEXT: call fastcc void @callee_i256.argprom(i256 [[SRC_VAL]], ptr noalias [[DST:%.*]])
; CHECK-NEXT: ret void
;
entry:
Expand All @@ -74,7 +74,7 @@ entry:
}

define internal fastcc void @callee_i256(ptr noalias %src, ptr noalias %dst) #0 {
; CHECK-LABEL: define internal fastcc void @callee_i256(
; CHECK-LABEL: define internal fastcc void @callee_i256.argprom(
; CHECK-NEXT: entry:
; CHECK-NEXT: store i256 [[SRC_0_VAL:%.*]], ptr [[DST:%.*]], align 16
; CHECK-NEXT: ret void
Expand Down Expand Up @@ -159,7 +159,7 @@ define dso_local void @caller_struct4xi32(ptr noalias %src, ptr noalias %dst) #1
; CHECK-NEXT: [[SRC_VAL:%.*]] = load <4 x i32>, ptr [[SRC:%.*]], align 16
; CHECK-NEXT: [[TMP0:%.*]] = getelementptr i8, ptr [[SRC]], i64 16
; CHECK-NEXT: [[SRC_VAL1:%.*]] = load <4 x i32>, ptr [[TMP0]], align 16
; CHECK-NEXT: call fastcc void @callee_struct4xi32(<4 x i32> [[SRC_VAL]], <4 x i32> [[SRC_VAL1]], ptr noalias [[DST:%.*]])
; CHECK-NEXT: call fastcc void @callee_struct4xi32.argprom(<4 x i32> [[SRC_VAL]], <4 x i32> [[SRC_VAL1]], ptr noalias [[DST:%.*]])
; CHECK-NEXT: ret void
;
entry:
Expand All @@ -168,7 +168,7 @@ entry:
}

define internal fastcc void @callee_struct4xi32(ptr noalias %src, ptr noalias %dst) #1 {
; CHECK-LABEL: define internal fastcc void @callee_struct4xi32(
; CHECK-LABEL: define internal fastcc void @callee_struct4xi32.argprom(
; CHECK-NEXT: entry:
; CHECK-NEXT: store <4 x i32> [[SRC_0_VAL:%.*]], ptr [[DST:%.*]], align 16
; CHECK-NEXT: [[DST2:%.*]] = getelementptr inbounds [[STRUCT_4XI32:%.*]], ptr [[DST]], i64 0, i32 1
Expand Down
2 changes: 1 addition & 1 deletion llvm/test/CodeGen/AMDGPU/internalize.ll
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@
; ALL: gvar_used
@gvar_used = addrspace(1) global i32 undef, align 4

; OPT: define internal fastcc void @func_used_noinline(
; OPT: define internal fastcc void @func_used_noinline.argelim(
; OPT-NONE: define fastcc void @func_used_noinline(
define fastcc void @func_used_noinline(ptr addrspace(1) %out, i32 %tid) #1 {
entry:
Expand Down
24 changes: 12 additions & 12 deletions llvm/test/ThinLTO/X86/memprof-aliased-location1.ll
Original file line number Diff line number Diff line change
Expand Up @@ -84,22 +84,22 @@ attributes #0 = { noinline optnone }
;; The first call to foo does not allocate cold memory. It should call the
;; original functions, which ultimately call the original allocation decorated
;; with a "notcold" attribute.
; IR: call {{.*}} @_Z3foov()
; IR: call {{.*}} @_Z3foov.retelim()
;; The second call to foo allocates cold memory. It should call cloned functions
;; which ultimately call a cloned allocation decorated with a "cold" attribute.
; IR: call {{.*}} @_Z3foov.memprof.1()
; IR: define internal {{.*}} @_Z3barv()
; IR: call {{.*}} @_Z3foov.memprof.1.retelim()
; IR: define internal {{.*}} @_Z3barv.retelim()
; IR: call {{.*}} @_Znam(i64 0) #[[NOTCOLD:[0-9]+]]
; IR: define internal {{.*}} @_Z3bazv()
; IR: call {{.*}} @_Z3barv()
; IR: define internal {{.*}} @_Z3foov()
; IR: call {{.*}} @_Z3bazv()
; IR: define internal {{.*}} @_Z3barv.memprof.1()
; IR: define internal {{.*}} @_Z3bazv.retelim()
; IR: call {{.*}} @_Z3barv.retelim()
; IR: define internal {{.*}} @_Z3foov.retelim()
; IR: call {{.*}} @_Z3bazv.retelim()
; IR: define internal {{.*}} @_Z3barv.memprof.1.retelim()
; IR: call {{.*}} @_Znam(i64 0) #[[COLD:[0-9]+]]
; IR: define internal {{.*}} @_Z3bazv.memprof.1()
; IR: call {{.*}} @_Z3barv.memprof.1()
; IR: define internal {{.*}} @_Z3foov.memprof.1()
; IR: call {{.*}} @_Z3bazv.memprof.1()
; IR: define internal {{.*}} @_Z3bazv.memprof.1.retelim()
; IR: call {{.*}} @_Z3barv.memprof.1.retelim()
; IR: define internal {{.*}} @_Z3foov.memprof.1.retelim()
; IR: call {{.*}} @_Z3bazv.memprof.1.retelim()
; IR: attributes #[[NOTCOLD]] = { "memprof"="notcold" }
; IR: attributes #[[COLD]] = { "memprof"="cold" }

Expand Down
24 changes: 12 additions & 12 deletions llvm/test/ThinLTO/X86/memprof-aliased-location2.ll
Original file line number Diff line number Diff line change
Expand Up @@ -84,22 +84,22 @@ attributes #0 = { noinline optnone }
;; The first call to foo does not allocate cold memory. It should call the
;; original functions, which ultimately call the original allocation decorated
;; with a "notcold" attribute.
; IR: call {{.*}} @_Z3foov()
; IR: call {{.*}} @_Z3foov.retelim()
;; The second call to foo allocates cold memory. It should call cloned functions
;; which ultimately call a cloned allocation decorated with a "cold" attribute.
; IR: call {{.*}} @_Z3foov.memprof.1()
; IR: define internal {{.*}} @_Z3barv()
; IR: call {{.*}} @_Z3foov.memprof.1.retelim()
; IR: define internal {{.*}} @_Z3barv.retelim()
; IR: call {{.*}} @_Znam(i64 0) #[[NOTCOLD:[0-9]+]]
; IR: define internal {{.*}} @_Z3bazv()
; IR: call {{.*}} @_Z3barv()
; IR: define internal {{.*}} @_Z3foov()
; IR: call {{.*}} @_Z3bazv()
; IR: define internal {{.*}} @_Z3barv.memprof.1()
; IR: define internal {{.*}} @_Z3bazv.retelim()
; IR: call {{.*}} @_Z3barv.retelim()
; IR: define internal {{.*}} @_Z3foov.retelim()
; IR: call {{.*}} @_Z3bazv.retelim()
; IR: define internal {{.*}} @_Z3barv.memprof.1.retelim()
; IR: call {{.*}} @_Znam(i64 0) #[[COLD:[0-9]+]]
; IR: define internal {{.*}} @_Z3bazv.memprof.1()
; IR: call {{.*}} @_Z3barv.memprof.1()
; IR: define internal {{.*}} @_Z3foov.memprof.1()
; IR: call {{.*}} @_Z3bazv.memprof.1()
; IR: define internal {{.*}} @_Z3bazv.memprof.1.retelim()
; IR: call {{.*}} @_Z3barv.memprof.1.retelim()
; IR: define internal {{.*}} @_Z3foov.memprof.1.retelim()
; IR: call {{.*}} @_Z3bazv.memprof.1.retelim()
; IR: attributes #[[NOTCOLD]] = { "memprof"="notcold" }
; IR: attributes #[[COLD]] = { "memprof"="cold" }

Expand Down
25 changes: 12 additions & 13 deletions llvm/test/ThinLTO/X86/memprof-basic.ll
Original file line number Diff line number Diff line change
Expand Up @@ -284,26 +284,25 @@ attributes #0 = { noinline optnone }
;; The first call to foo does not allocate cold memory. It should call the
;; original functions, which ultimately call the original allocation decorated
;; with a "notcold" attribute.
; IR: call {{.*}} @_Z3foov()
; IR: call {{.*}} @_Z3foov{{.*}}()
;; The second call to foo allocates cold memory. It should call cloned functions
;; which ultimately call a cloned allocation decorated with a "cold" attribute.
; IR: call {{.*}} @_Z3foov.memprof.1()
; IR: define internal {{.*}} @_Z3barv()
; IR: call {{.*}} @_Z3foov.memprof.1{{.*}}()
; IR: define internal {{.*}} @_Z3barv{{.*}}()
; IR: call {{.*}} @_Znam(i64 0) #[[NOTCOLD:[0-9]+]]
; IR: define internal {{.*}} @_Z3bazv()
; IR: call {{.*}} @_Z3barv()
; IR: define internal {{.*}} @_Z3foov()
; IR: call {{.*}} @_Z3bazv()
; IR: define internal {{.*}} @_Z3barv.memprof.1()
; IR: define internal {{.*}} @_Z3bazv{{.*}}()
; IR: call {{.*}} @_Z3barv{{.*}}()
; IR: define internal {{.*}} @_Z3foov{{.*}}()
; IR: call {{.*}} @_Z3bazv{{.*}}()
; IR: define internal {{.*}} @_Z3barv.memprof.1{{.*}}()
; IR: call {{.*}} @_Znam(i64 0) #[[COLD:[0-9]+]]
; IR: define internal {{.*}} @_Z3bazv.memprof.1()
; IR: call {{.*}} @_Z3barv.memprof.1()
; IR: define internal {{.*}} @_Z3foov.memprof.1()
; IR: call {{.*}} @_Z3bazv.memprof.1()
; IR: define internal {{.*}} @_Z3bazv.memprof.1{{.*}}()
; IR: call {{.*}} @_Z3barv.memprof.1{{.*}}()
; IR: define internal {{.*}} @_Z3foov.memprof.1{{.*}}()
; IR: call {{.*}} @_Z3bazv.memprof.1{{.*}}()
; IR: attributes #[[NOTCOLD]] = { "memprof"="notcold" }
; IR: attributes #[[COLD]] = { "memprof"="cold" }


; STATS: 1 memprof-context-disambiguation - Number of cold static allocations (possibly cloned)
; STATS-BE: 1 memprof-context-disambiguation - Number of cold static allocations (possibly cloned) during ThinLTO backend
; STATS: 1 memprof-context-disambiguation - Number of not cold static allocations (possibly cloned)
Expand Down
17 changes: 8 additions & 9 deletions llvm/test/ThinLTO/X86/memprof-duplicate-context-ids.ll
Original file line number Diff line number Diff line change
Expand Up @@ -231,23 +231,22 @@ attributes #0 = { noinline optnone}
;; The allocation via F does not allocate cold memory. It should call the
;; original D, which ultimately call the original allocation decorated
;; with a "notcold" attribute.
; IR: define internal {{.*}} @_Z1Dv()
; IR: define internal {{.*}} @_Z1Dv{{.*}}()
; IR: call {{.*}} @_Znam(i64 0) #[[NOTCOLD:[0-9]+]]
; IR: define internal {{.*}} @_Z1Fv()
; IR: call {{.*}} @_Z1Dv()
; IR: define internal {{.*}} @_Z1Fv{{.*}}()
; IR: call {{.*}} @_Z1Dv{{.*}}()
;; The allocations via B and E allocate cold memory. They should call the
;; cloned D, which ultimately call the cloned allocation decorated with a
;; "cold" attribute.
; IR: define internal {{.*}} @_Z1Bv()
; IR: call {{.*}} @_Z1Dv.memprof.1()
; IR: define internal {{.*}} @_Z1Ev()
; IR: call {{.*}} @_Z1Dv.memprof.1()
; IR: define internal {{.*}} @_Z1Dv.memprof.1()
; IR: define internal {{.*}} @_Z1Bv{{.*}}()
; IR: call {{.*}} @_Z1Dv.memprof.1{{.*}}()
; IR: define internal {{.*}} @_Z1Ev{{.*}}()
; IR: call {{.*}} @_Z1Dv.memprof.1{{.*}}()
; IR: define internal {{.*}} @_Z1Dv.memprof.1{{.*}}()
; IR: call {{.*}} @_Znam(i64 0) #[[COLD:[0-9]+]]
; IR: attributes #[[NOTCOLD]] = { "memprof"="notcold" }
; IR: attributes #[[COLD]] = { "memprof"="cold" }


; STATS: 1 memprof-context-disambiguation - Number of cold static allocations (possibly cloned)
; STATS-BE: 1 memprof-context-disambiguation - Number of cold static allocations (possibly cloned) during ThinLTO backend
; STATS: 1 memprof-context-disambiguation - Number of not cold static allocations (possibly cloned)
Expand Down
12 changes: 6 additions & 6 deletions llvm/test/ThinLTO/X86/memprof-funcassigncloning.ll
Original file line number Diff line number Diff line change
Expand Up @@ -261,23 +261,23 @@ attributes #0 = { noinline optnone }


;; Original version of E is used for the non-cold allocations, both from B.
; IR: define internal {{.*}} @_Z1EPPcS0_(
; IR: define internal {{.*}} @_Z1EPPcS0_{{.*}}(
; IR: call {{.*}} @_Znam(i64 noundef 10) #[[NOTCOLD:[0-9]+]]
; IR: call {{.*}} @_Znam(i64 noundef 10) #[[NOTCOLD]]
; IR: define internal {{.*}} @_Z1BPPcS0_(
; IR: call {{.*}} @_Z1EPPcS0_(
; IR: call {{.*}} @_Z1EPPcS0_{{.*}}(
;; C calls a clone of E with the first new allocating cold memory and the
;; second allocating non-cold memory.
; IR: define internal {{.*}} @_Z1CPPcS0_(
; IR: call {{.*}} @_Z1EPPcS0_.memprof.3(
; IR: call {{.*}} @_Z1EPPcS0_.memprof.3{{.*}}(
;; D calls a clone of E with the first new allocating non-cold memory and the
;; second allocating cold memory.
; IR: define internal {{.*}} @_Z1DPPcS0_(
; IR: call {{.*}} @_Z1EPPcS0_.memprof.2(
; IR: define internal {{.*}} @_Z1EPPcS0_.memprof.2(
; IR: call {{.*}} @_Z1EPPcS0_.memprof.2{{.*}}(
; IR: define internal {{.*}} @_Z1EPPcS0_.memprof.2{{.*}}(
; IR: call {{.*}} @_Znam(i64 noundef 10) #[[COLD:[0-9]+]]
; IR: call {{.*}} @_Znam(i64 noundef 10) #[[NOTCOLD]]
; IR: define internal {{.*}} @_Z1EPPcS0_.memprof.3(
; IR: define internal {{.*}} @_Z1EPPcS0_.memprof.3{{.*}}(
; IR: call {{.*}} @_Znam(i64 noundef 10) #[[NOTCOLD]]
; IR: call {{.*}} @_Znam(i64 noundef 10) #[[COLD]]
; IR: attributes #[[NOTCOLD]] = { "memprof"="notcold" }
Expand Down
17 changes: 8 additions & 9 deletions llvm/test/ThinLTO/X86/memprof-indirectcall.ll
Original file line number Diff line number Diff line change
Expand Up @@ -404,22 +404,21 @@ attributes #0 = { noinline optnone }


; IR: define {{.*}} @main(
; IR: call {{.*}} @_Z3foov()
; IR: call {{.*}} @_Z3foov{{.*}}()
;; Only the second call to foo, which allocates cold memory via direct calls,
;; is replaced with a call to a clone that calls a cold allocation.
; IR: call {{.*}} @_Z3foov.memprof.1()
; IR: call {{.*}} @_Z3barP1A(
; IR: call {{.*}} @_Z3barP1A(
; IR: call {{.*}} @_Z3barP1A(
; IR: call {{.*}} @_Z3barP1A(
; IR: define internal {{.*}} @_Z3foov()
; IR: call {{.*}} @_Z3foov.memprof.1{{.*}}()
; IR: call {{.*}} @_Z3barP1A{{.*}}(
; IR: call {{.*}} @_Z3barP1A{{.*}}(
; IR: call {{.*}} @_Z3barP1A{{.*}}(
; IR: call {{.*}} @_Z3barP1A{{.*}}(
; IR: define internal {{.*}} @_Z3foov{{.*}}()
; IR: call {{.*}} @_Znam(i64 0) #[[NOTCOLD:[0-9]+]]
; IR: define internal {{.*}} @_Z3foov.memprof.1()
; IR: define internal {{.*}} @_Z3foov.memprof.1{{.*}}()
; IR: call {{.*}} @_Znam(i64 0) #[[COLD:[0-9]+]]
; IR: attributes #[[NOTCOLD]] = { "memprof"="notcold" }
; IR: attributes #[[COLD]] = { "memprof"="cold" }


; STATS: 1 memprof-context-disambiguation - Number of cold static allocations (possibly cloned)
; STATS-BE: 1 memprof-context-disambiguation - Number of cold static allocations (possibly cloned) during ThinLTO backend
; STATS: 1 memprof-context-disambiguation - Number of not cold static allocations (possibly cloned)
Expand Down
17 changes: 8 additions & 9 deletions llvm/test/ThinLTO/X86/memprof-inlined.ll
Original file line number Diff line number Diff line change
Expand Up @@ -304,26 +304,25 @@ attributes #0 = { noinline optnone }
; REMARKS: call in clone main assigned to call function clone _Z3foov.memprof.1


; IR: define internal {{.*}} @_Z3barv()
; IR: define internal {{.*}} @_Z3barv{{.*}}()
; IR: call {{.*}} @_Znam(i64 0) #[[NOTCOLD:[0-9]+]]
; IR: define internal {{.*}} @_Z3foov()
; IR: call {{.*}} @_Z3barv()
; IR: define internal {{.*}} @_Z3foov{{.*}}()
; IR: call {{.*}} @_Z3barv{{.*}}()
; IR: define {{.*}} @main()
;; The first call to foo does not allocate cold memory. It should call the
;; original functions, which ultimately call the original allocation decorated
;; with a "notcold" attribute.
; IR: call {{.*}} @_Z3foov()
; IR: call {{.*}} @_Z3foov{{.*}}()
;; The second call to foo allocates cold memory. It should call cloned functions
;; which ultimately call a cloned allocation decorated with a "cold" attribute.
; IR: call {{.*}} @_Z3foov.memprof.1()
; IR: define internal {{.*}} @_Z3barv.memprof.1()
; IR: call {{.*}} @_Z3foov.memprof.1{{.*}}()
; IR: define internal {{.*}} @_Z3barv.memprof.1{{.*}}()
; IR: call {{.*}} @_Znam(i64 0) #[[COLD:[0-9]+]]
; IR: define internal {{.*}} @_Z3foov.memprof.1()
; IR: call {{.*}} @_Z3barv.memprof.1()
; IR: define internal {{.*}} @_Z3foov.memprof.1{{.*}}()
; IR: call {{.*}} @_Z3barv.memprof.1{{.*}}()
; IR: attributes #[[NOTCOLD]] = { "memprof"="notcold" }
; IR: attributes #[[COLD]] = { "memprof"="cold" }


; STATS: 1 memprof-context-disambiguation - Number of cold static allocations (possibly cloned)
; STATS-BE: 1 memprof-context-disambiguation - Number of cold static allocations (possibly cloned) during ThinLTO backend
; STATS: 2 memprof-context-disambiguation - Number of not cold static allocations (possibly cloned)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@
; RUN: cat %t | FileCheck -check-prefix=REMARK %s

define internal i32 @deref(ptr %x) nounwind {
; CHECK-LABEL: define {{[^@]+}}@deref
; CHECK-LABEL: define {{[^@]+}}@deref.argprom
; CHECK-SAME: (i32 [[X_0_VAL:%.*]]) #[[ATTR0:[0-9]+]] {
; CHECK-NEXT: entry:
; CHECK-NEXT: ret i32 [[X_0_VAL]]
Expand All @@ -29,7 +29,7 @@ define i32 @f(i32 %x) {
; CHECK-NEXT: [[X_ADDR:%.*]] = alloca i32, align 4
; CHECK-NEXT: store i32 [[X]], ptr [[X_ADDR]], align 4
; CHECK-NEXT: [[X_ADDR_VAL:%.*]] = load i32, ptr [[X_ADDR]], align 4
; CHECK-NEXT: [[TEMP1:%.*]] = call i32 @deref(i32 [[X_ADDR_VAL]])
; CHECK-NEXT: [[TEMP1:%.*]] = call i32 @deref.argprom(i32 [[X_ADDR_VAL]])
; CHECK-NEXT: ret i32 [[TEMP1]]
;
entry:
Expand Down
2 changes: 1 addition & 1 deletion llvm/test/Transforms/ArgumentPromotion/BPF/argpromotion.ll
Original file line number Diff line number Diff line change
Expand Up @@ -85,4 +85,4 @@ entry:
; Without number-of-argument constraint, argpromotion will create a function signature with 5 arguments, which equals
; the maximum number of argument permitted by bpf backend, so argpromotion result code does work.
;
; CHECK: i32 @foo2(i32 %p1.0.val, i32 %p1.4.val, i32 %p2.8.val, i32 %p2.16.val, i32 %p3.20.val)
; CHECK: i32 @foo2.argprom(i32 %p1.0.val, i32 %p1.4.val, i32 %p2.8.val, i32 %p2.16.val, i32 %p3.20.val)
Loading
Loading