Skip to content

Reapply "[MemProf] Reduce cloning overhead by sharing nodes when possible" (#102932) with fixes #106623

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
236 changes: 208 additions & 28 deletions llvm/lib/Transforms/IPO/MemProfContextDisambiguation.cpp

Large diffs are not rendered by default.

116 changes: 116 additions & 0 deletions llvm/test/ThinLTO/X86/memprof-aliased-location1.ll
Original file line number Diff line number Diff line change
@@ -0,0 +1,116 @@
;; Test to ensure a call to a different callee but with the same debug info
;; (and therefore callsite metadata) as a preceding call in the alloc context
;; does not cause missing or incorrect cloning. This test is otherwise the same
;; as memprof-basic.ll.

;; -stats requires asserts
; REQUIRES: asserts

; RUN: opt -thinlto-bc %s >%t.o
; RUN: llvm-lto2 run %t.o -enable-memprof-context-disambiguation \
; RUN: -supports-hot-cold-new \
; RUN: -r=%t.o,main,plx \
; RUN: -r=%t.o,blah, \
; RUN: -r=%t.o,_Znam, \
; RUN: -memprof-verify-ccg -memprof-verify-nodes \
; RUN: -stats -pass-remarks=memprof-context-disambiguation -save-temps \
; RUN: -o %t.out 2>&1 | FileCheck %s \
; RUN: --check-prefix=STATS --check-prefix=STATS-BE --check-prefix=REMARKS

; RUN: llvm-dis %t.out.1.4.opt.bc -o - | FileCheck %s --check-prefix=IR

source_filename = "memprof-aliased-location1.ll"
target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128"
target triple = "x86_64-unknown-linux-gnu"

define i32 @main() #0 {
entry:
%call = call ptr @_Z3foov(), !callsite !0
%call1 = call ptr @_Z3foov(), !callsite !1
ret i32 0
}

declare void @blah()

define internal ptr @_Z3barv() #0 {
entry:
%call = call ptr @_Znam(i64 0), !memprof !2, !callsite !7
ret ptr null
}

declare ptr @_Znam(i64)

define internal ptr @_Z3bazv() #0 {
entry:
;; Preceding call to another callee but with the same debug location / callsite id
call void @blah(), !callsite !8
%call = call ptr @_Z3barv(), !callsite !8
ret ptr null
}

define internal ptr @_Z3foov() #0 {
entry:
%call = call ptr @_Z3bazv(), !callsite !9
ret ptr null
}

; uselistorder directives
uselistorder ptr @_Z3foov, { 1, 0 }

attributes #0 = { noinline optnone }

!0 = !{i64 8632435727821051414}
!1 = !{i64 -3421689549917153178}
!2 = !{!3, !5}
!3 = !{!4, !"notcold", i64 100}
!4 = !{i64 9086428284934609951, i64 -5964873800580613432, i64 2732490490862098848, i64 8632435727821051414}
!5 = !{!6, !"cold", i64 400}
!6 = !{i64 9086428284934609951, i64 -5964873800580613432, i64 2732490490862098848, i64 -3421689549917153178}
!7 = !{i64 9086428284934609951}
!8 = !{i64 -5964873800580613432}
!9 = !{i64 2732490490862098848}

; REMARKS: call in clone main assigned to call function clone _Z3foov.memprof.1
; REMARKS: created clone _Z3barv.memprof.1
; REMARKS: call in clone _Z3barv marked with memprof allocation attribute notcold
; REMARKS: call in clone _Z3barv.memprof.1 marked with memprof allocation attribute cold
; REMARKS: created clone _Z3bazv.memprof.1
; REMARKS: call in clone _Z3bazv.memprof.1 assigned to call function clone _Z3barv.memprof.1
; REMARKS: created clone _Z3foov.memprof.1
; REMARKS: call in clone _Z3foov.memprof.1 assigned to call function clone _Z3bazv.memprof.1


; IR: define {{.*}} @main
;; The first call to foo does not allocate cold memory. It should call the
;; original functions, which ultimately call the original allocation decorated
;; with a "notcold" attribute.
; IR: call {{.*}} @_Z3foov()
;; The second call to foo allocates cold memory. It should call cloned functions
;; which ultimately call a cloned allocation decorated with a "cold" attribute.
; IR: call {{.*}} @_Z3foov.memprof.1()
; IR: define internal {{.*}} @_Z3barv()
; IR: call {{.*}} @_Znam(i64 0) #[[NOTCOLD:[0-9]+]]
; IR: define internal {{.*}} @_Z3bazv()
; IR: call {{.*}} @_Z3barv()
; IR: define internal {{.*}} @_Z3foov()
; IR: call {{.*}} @_Z3bazv()
; IR: define internal {{.*}} @_Z3barv.memprof.1()
; IR: call {{.*}} @_Znam(i64 0) #[[COLD:[0-9]+]]
; IR: define internal {{.*}} @_Z3bazv.memprof.1()
; IR: call {{.*}} @_Z3barv.memprof.1()
; IR: define internal {{.*}} @_Z3foov.memprof.1()
; IR: call {{.*}} @_Z3bazv.memprof.1()
; IR: attributes #[[NOTCOLD]] = { "memprof"="notcold" }
; IR: attributes #[[COLD]] = { "memprof"="cold" }


; STATS: 1 memprof-context-disambiguation - Number of cold static allocations (possibly cloned)
; STATS-BE: 1 memprof-context-disambiguation - Number of cold static allocations (possibly cloned) during ThinLTO backend
; STATS: 1 memprof-context-disambiguation - Number of not cold static allocations (possibly cloned)
; STATS-BE: 1 memprof-context-disambiguation - Number of not cold static allocations (possibly cloned) during ThinLTO backend
; STATS-BE: 2 memprof-context-disambiguation - Number of allocation versions (including clones) during ThinLTO backend
; STATS: 3 memprof-context-disambiguation - Number of function clones created during whole program analysis
; STATS-BE: 3 memprof-context-disambiguation - Number of function clones created during ThinLTO backend
; STATS-BE: 3 memprof-context-disambiguation - Number of functions that had clones created during ThinLTO backend
; STATS-BE: 2 memprof-context-disambiguation - Maximum number of allocation versions created for an original allocation during ThinLTO backend
; STATS-BE: 1 memprof-context-disambiguation - Number of original (not cloned) allocations with memprof profiles during ThinLTO backend
116 changes: 116 additions & 0 deletions llvm/test/ThinLTO/X86/memprof-aliased-location2.ll
Original file line number Diff line number Diff line change
@@ -0,0 +1,116 @@
;; Test to ensure a call to a different callee but with the same debug info
;; (and therefore callsite metadata) as a subsequent call in the alloc context
;; does not cause missing or incorrect cloning. This test is otherwise the same
;; as memprof-basic.ll.

;; -stats requires asserts
; REQUIRES: asserts

; RUN: opt -thinlto-bc %s >%t.o
; RUN: llvm-lto2 run %t.o -enable-memprof-context-disambiguation \
; RUN: -supports-hot-cold-new \
; RUN: -r=%t.o,main,plx \
; RUN: -r=%t.o,blah, \
; RUN: -r=%t.o,_Znam, \
; RUN: -memprof-verify-ccg -memprof-verify-nodes \
; RUN: -stats -pass-remarks=memprof-context-disambiguation -save-temps \
; RUN: -o %t.out 2>&1 | FileCheck %s \
; RUN: --check-prefix=STATS --check-prefix=STATS-BE --check-prefix=REMARKS

; RUN: llvm-dis %t.out.1.4.opt.bc -o - | FileCheck %s --check-prefix=IR

source_filename = "memprof-aliased-location2.ll"
target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128"
target triple = "x86_64-unknown-linux-gnu"

define i32 @main() #0 {
entry:
%call = call ptr @_Z3foov(), !callsite !0
%call1 = call ptr @_Z3foov(), !callsite !1
ret i32 0
}

declare void @blah()

define internal ptr @_Z3barv() #0 {
entry:
%call = call ptr @_Znam(i64 0), !memprof !2, !callsite !7
ret ptr null
}

declare ptr @_Znam(i64)

define internal ptr @_Z3bazv() #0 {
entry:
%call = call ptr @_Z3barv(), !callsite !8
;; Subsequent call to another callee but with the same debug location / callsite id
call void @blah(), !callsite !8
ret ptr null
}

define internal ptr @_Z3foov() #0 {
entry:
%call = call ptr @_Z3bazv(), !callsite !9
ret ptr null
}

; uselistorder directives
uselistorder ptr @_Z3foov, { 1, 0 }

attributes #0 = { noinline optnone }

!0 = !{i64 8632435727821051414}
!1 = !{i64 -3421689549917153178}
!2 = !{!3, !5}
!3 = !{!4, !"notcold", i64 100}
!4 = !{i64 9086428284934609951, i64 -5964873800580613432, i64 2732490490862098848, i64 8632435727821051414}
!5 = !{!6, !"cold", i64 400}
!6 = !{i64 9086428284934609951, i64 -5964873800580613432, i64 2732490490862098848, i64 -3421689549917153178}
!7 = !{i64 9086428284934609951}
!8 = !{i64 -5964873800580613432}
!9 = !{i64 2732490490862098848}

; REMARKS: call in clone main assigned to call function clone _Z3foov.memprof.1
; REMARKS: created clone _Z3barv.memprof.1
; REMARKS: call in clone _Z3barv marked with memprof allocation attribute notcold
; REMARKS: call in clone _Z3barv.memprof.1 marked with memprof allocation attribute cold
; REMARKS: created clone _Z3bazv.memprof.1
; REMARKS: call in clone _Z3bazv.memprof.1 assigned to call function clone _Z3barv.memprof.1
; REMARKS: created clone _Z3foov.memprof.1
; REMARKS: call in clone _Z3foov.memprof.1 assigned to call function clone _Z3bazv.memprof.1


; IR: define {{.*}} @main
;; The first call to foo does not allocate cold memory. It should call the
;; original functions, which ultimately call the original allocation decorated
;; with a "notcold" attribute.
; IR: call {{.*}} @_Z3foov()
;; The second call to foo allocates cold memory. It should call cloned functions
;; which ultimately call a cloned allocation decorated with a "cold" attribute.
; IR: call {{.*}} @_Z3foov.memprof.1()
; IR: define internal {{.*}} @_Z3barv()
; IR: call {{.*}} @_Znam(i64 0) #[[NOTCOLD:[0-9]+]]
; IR: define internal {{.*}} @_Z3bazv()
; IR: call {{.*}} @_Z3barv()
; IR: define internal {{.*}} @_Z3foov()
; IR: call {{.*}} @_Z3bazv()
; IR: define internal {{.*}} @_Z3barv.memprof.1()
; IR: call {{.*}} @_Znam(i64 0) #[[COLD:[0-9]+]]
; IR: define internal {{.*}} @_Z3bazv.memprof.1()
; IR: call {{.*}} @_Z3barv.memprof.1()
; IR: define internal {{.*}} @_Z3foov.memprof.1()
; IR: call {{.*}} @_Z3bazv.memprof.1()
; IR: attributes #[[NOTCOLD]] = { "memprof"="notcold" }
; IR: attributes #[[COLD]] = { "memprof"="cold" }


; STATS: 1 memprof-context-disambiguation - Number of cold static allocations (possibly cloned)
; STATS-BE: 1 memprof-context-disambiguation - Number of cold static allocations (possibly cloned) during ThinLTO backend
; STATS: 1 memprof-context-disambiguation - Number of not cold static allocations (possibly cloned)
; STATS-BE: 1 memprof-context-disambiguation - Number of not cold static allocations (possibly cloned) during ThinLTO backend
; STATS-BE: 2 memprof-context-disambiguation - Number of allocation versions (including clones) during ThinLTO backend
; STATS: 3 memprof-context-disambiguation - Number of function clones created during whole program analysis
; STATS-BE: 3 memprof-context-disambiguation - Number of function clones created during ThinLTO backend
; STATS-BE: 3 memprof-context-disambiguation - Number of functions that had clones created during ThinLTO backend
; STATS-BE: 2 memprof-context-disambiguation - Maximum number of allocation versions created for an original allocation during ThinLTO backend
; STATS-BE: 1 memprof-context-disambiguation - Number of original (not cloned) allocations with memprof profiles during ThinLTO backend
99 changes: 99 additions & 0 deletions llvm/test/ThinLTO/X86/memprof-tailcall-aliased-location1.ll
Original file line number Diff line number Diff line change
@@ -0,0 +1,99 @@
;; Test to ensure a call to a different callee but with the same debug info
;; (and therefore callsite metadata) as a preceding tail call in the alloc
;; context does not cause missing or incorrect cloning. This test is otherwise
;; the same as memprof-tailcall.ll.

;; -stats requires asserts
; REQUIRES: asserts

; RUN: opt -thinlto-bc %s >%t.o
; RUN: llvm-lto2 run %t.o -enable-memprof-context-disambiguation \
; RUN: -supports-hot-cold-new \
; RUN: -r=%t.o,_Z3barv,plx \
; RUN: -r=%t.o,_Z3bazv,plx \
; RUN: -r=%t.o,_Z3foov,plx \
; RUN: -r=%t.o,main,plx \
; RUN: -r=%t.o,_Znam, \
; RUN: -r=%t.o,blah, \
; RUN: -stats -save-temps \
; RUN: -o %t.out 2>&1 | FileCheck %s --check-prefix=STATS

; RUN: llvm-dis %t.out.1.4.opt.bc -o - | FileCheck %s --check-prefix=IR

; STATS: 2 memprof-context-disambiguation - Number of profiled callees found via tail calls
; STATS: 4 memprof-context-disambiguation - Aggregate depth of profiled callees found via tail calls
; STATS: 2 memprof-context-disambiguation - Maximum depth of profiled callees found via tail calls

source_filename = "memprof-tailcall-aliased-location1.cc"
target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-i128:128-f80:128-n8:16:32:64-S128"
target triple = "x86_64-unknown-linux-gnu"

; Function Attrs: noinline
; IR-LABEL: @_Z3barv()
define ptr @_Z3barv() local_unnamed_addr #0 {
entry:
; IR: call {{.*}} @_Znam(i64 10) #[[NOTCOLD:[0-9]+]]
%call = tail call ptr @_Znam(i64 10) #2, !memprof !0, !callsite !5
ret ptr %call
}

; Function Attrs: nobuiltin allocsize(0)
declare ptr @_Znam(i64) #1
declare void @blah()

; Function Attrs: noinline
; IR-LABEL: @_Z3bazv()
define ptr @_Z3bazv() #0 {
entry:
; IR: call ptr @_Z3barv()
%call = tail call ptr @_Z3barv()
ret ptr %call
}

; Function Attrs: noinline
; IR-LABEL: @_Z3foov()
define ptr @_Z3foov() #0 {
entry:
; IR: call ptr @_Z3bazv()
%call = tail call ptr @_Z3bazv()
ret ptr %call
}

; Function Attrs: noinline
; IR-LABEL: @main()
define i32 @main() #0 {
;; Preceding call to another callee but with the same debug location / callsite id
call void @blah(), !callsite !6
;; The first call to foo is part of a cold context, and should use the
;; original functions.
; IR: call ptr @_Z3foov()
%call = tail call ptr @_Z3foov(), !callsite !6
;; The second call to foo is part of a cold context, and should call the
;; cloned functions.
; IR: call ptr @_Z3foov.memprof.1()
%call1 = tail call ptr @_Z3foov(), !callsite !7
ret i32 0
}

; IR-LABEL: @_Z3barv.memprof.1()
; IR: call {{.*}} @_Znam(i64 10) #[[COLD:[0-9]+]]
; IR-LABEL: @_Z3bazv.memprof.1()
; IR: call ptr @_Z3barv.memprof.1()
; IR-LABEL: @_Z3foov.memprof.1()
; IR: call ptr @_Z3bazv.memprof.1()

; IR: attributes #[[NOTCOLD]] = { builtin allocsize(0) "memprof"="notcold" }
; IR: attributes #[[COLD]] = { builtin allocsize(0) "memprof"="cold" }

attributes #0 = { noinline }
attributes #1 = { nobuiltin allocsize(0) }
attributes #2 = { builtin allocsize(0) }

!0 = !{!1, !3}
!1 = !{!2, !"notcold"}
!2 = !{i64 3186456655321080972, i64 8632435727821051414}
!3 = !{!4, !"cold"}
!4 = !{i64 3186456655321080972, i64 -3421689549917153178}
!5 = !{i64 3186456655321080972}
!6 = !{i64 8632435727821051414}
!7 = !{i64 -3421689549917153178}
Loading
Loading