-
Notifications
You must be signed in to change notification settings - Fork 13.6k
[BOLT] Support pre-aggregated basic sample profile #140196
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Open
aaupov
wants to merge
2
commits into
users/aaupov/spr/main.bolt-support-pre-aggregated-basic-sample-profile
Choose a base branch
from
users/aaupov/spr/bolt-support-pre-aggregated-basic-sample-profile
base: users/aaupov/spr/main.bolt-support-pre-aggregated-basic-sample-profile
Could not load branches
Branch not found: {{ refName }}
Loading
Could not load tags
Nothing to show
Loading
Are you sure you want to change the base?
Some commits from the old base branch may be removed from the timeline,
and old review comments may become outdated.
Open
Conversation
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Created using spr 1.3.4
@llvm/pr-subscribers-bolt Author: Amir Ayupov (aaupov) ChangesDefine a pre-aggregated basic sample format:
Test Plan: update pre-aggregated-perf.test Full diff: https://github.com/llvm/llvm-project/pull/140196.diff 5 Files Affected:
diff --git a/bolt/include/bolt/Profile/DataAggregator.h b/bolt/include/bolt/Profile/DataAggregator.h
index 6d918134137d5..7d60b4689fb04 100644
--- a/bolt/include/bolt/Profile/DataAggregator.h
+++ b/bolt/include/bolt/Profile/DataAggregator.h
@@ -370,26 +370,25 @@ class DataAggregator : public DataReader {
/// memory.
///
/// File format syntax:
- /// {B|F|f|T} [<start_id>:]<start_offset> [<end_id>:]<end_offset> [<ft_end>]
- /// <count> [<mispred_count>]
+ /// {S|B|F|f|T} <start> [<end>] [<ft_end>] <count> [<mispred_count>]
///
- /// B - indicates an aggregated branch
- /// F - an aggregated fall-through
+ /// where <start>, <end>, <ft_end> have the format [<id>:]<offset>
+ ///
+ /// S - indicates an aggregated basic sample at <start>
+ /// B - indicates an aggregated branch from <start> to <end>
+ /// F - an aggregated fall-through from <start> to <end>
/// f - an aggregated fall-through with external origin - used to disambiguate
/// between a return hitting a basic block head and a regular internal
/// jump to the block
- /// T - an aggregated trace: branch with a fall-through (from, to, ft_end)
- ///
- /// <start_id> - build id of the object containing the start address. We can
- /// skip it for the main binary and use "X" for an unknown object. This will
- /// save some space and facilitate human parsing.
- ///
- /// <start_offset> - hex offset from the object base load address (0 for the
- /// main executable unless it's PIE) to the start address.
+ /// T - an aggregated trace: branch from <start> to <end> with a fall-through
+ /// to <ft_end>
///
- /// <end_id>, <end_offset> - same for the end address.
+ /// <id> - build id of the object containing the address. We can skip it for
+ /// the main binary and use "X" for an unknown object. This will save some
+ /// space and facilitate human parsing.
///
- /// <ft_end> - same for the fallthrough_end address.
+ /// <offset> - hex offset from the object base load address (0 for the
+ /// main executable unless it's PIE) to the address.
///
/// <count> - total aggregated count of the branch or a fall-through.
///
@@ -397,10 +396,17 @@ class DataAggregator : public DataReader {
/// Omitted for fall-throughs.
///
/// Example:
+ /// Basic samples profile:
+ /// S 41be50 3
+ ///
+ /// Soft-deprecated branch profile with separate branches and fall-throughs:
/// F 41be50 41be50 3
/// F 41be90 41be90 4
/// B 4b1942 39b57f0 3 0
/// B 4b196f 4b19e0 2 0
+ ///
+ /// Recommended branch profile with pre-aggregated traces:
+ /// T 4b196f 4b19e0 4b19ef 2
void parsePreAggregated();
/// Parse the full output of pre-aggregated LBR samples generated by
diff --git a/bolt/lib/Profile/DataAggregator.cpp b/bolt/lib/Profile/DataAggregator.cpp
index aa681e633c0d8..6e889908003da 100644
--- a/bolt/lib/Profile/DataAggregator.cpp
+++ b/bolt/lib/Profile/DataAggregator.cpp
@@ -1216,54 +1216,54 @@ std::error_code DataAggregator::parseAggregatedLBREntry() {
ErrorOr<StringRef> TypeOrErr = parseString(FieldSeparator);
if (std::error_code EC = TypeOrErr.getError())
return EC;
- enum AggregatedLBREntry { TRACE, BRANCH, FT, FT_EXTERNAL_ORIGIN, INVALID };
+ enum AggregatedLBREntry {
+ TRACE,
+ SAMPLE,
+ BRANCH,
+ FT,
+ FT_EXTERNAL_ORIGIN,
+ INVALID
+ };
auto Type = StringSwitch<AggregatedLBREntry>(TypeOrErr.get())
.Case("T", TRACE)
+ .Case("S", SAMPLE)
.Case("B", BRANCH)
.Case("F", FT)
.Case("f", FT_EXTERNAL_ORIGIN)
.Default(INVALID);
if (Type == INVALID) {
- reportError("expected T, B, F or f");
+ reportError("expected T, S, B, F or f");
return make_error_code(llvm::errc::io_error);
}
- while (checkAndConsumeFS()) {
- }
- ErrorOr<Location> From = parseLocationOrOffset();
- if (std::error_code EC = From.getError())
- return EC;
+ std::optional<Location> Addrs[3];
+ int AddrNum = 2;
+ if (Type == TRACE)
+ AddrNum = 3;
+ else if (Type == SAMPLE)
+ AddrNum = 1;
- while (checkAndConsumeFS()) {
- }
- ErrorOr<Location> To = parseLocationOrOffset();
- if (std::error_code EC = To.getError())
- return EC;
+ int64_t Counters[2];
+ int CounterNum = 1;
+ if (Type == BRANCH)
+ CounterNum = 2;
- ErrorOr<Location> TraceFtEnd = std::error_code();
- if (Type == AggregatedLBREntry::TRACE) {
+ for (int I = 0; I < AddrNum; ++I) {
while (checkAndConsumeFS()) {
}
- TraceFtEnd = parseLocationOrOffset();
- if (std::error_code EC = TraceFtEnd.getError())
- return EC;
- }
-
- while (checkAndConsumeFS()) {
+ if (ErrorOr<Location> Addr = parseLocationOrOffset())
+ Addrs[I] = Addr.get();
+ else
+ return Addr.getError();
}
- ErrorOr<int64_t> Frequency =
- parseNumberField(FieldSeparator, Type != AggregatedLBREntry::BRANCH);
- if (std::error_code EC = Frequency.getError())
- return EC;
- uint64_t Mispreds = 0;
- if (Type == AggregatedLBREntry::BRANCH) {
+ for (int I = 0; I < CounterNum; ++I) {
while (checkAndConsumeFS()) {
}
- ErrorOr<int64_t> MispredsOrErr = parseNumberField(FieldSeparator, true);
- if (std::error_code EC = MispredsOrErr.getError())
- return EC;
- Mispreds = static_cast<uint64_t>(MispredsOrErr.get());
+ if (ErrorOr<int64_t> Count = parseNumberField(FieldSeparator, I + 1 == CounterNum))
+ Counters[I] = Count.get();
+ else
+ return Count.getError();
}
if (!checkAndConsumeNewLine()) {
@@ -1271,16 +1271,25 @@ std::error_code DataAggregator::parseAggregatedLBREntry() {
return make_error_code(llvm::errc::io_error);
}
- BinaryFunction *FromFunc = getBinaryFunctionContainingAddress(From->Offset);
- BinaryFunction *ToFunc = getBinaryFunctionContainingAddress(To->Offset);
+ const uint64_t FromOffset = Addrs[0]->Offset;
+ BinaryFunction *FromFunc = getBinaryFunctionContainingAddress(FromOffset);
+ if (FromFunc)
+ FromFunc->setHasProfileAvailable();
- for (BinaryFunction *BF : {FromFunc, ToFunc})
- if (BF)
- BF->setHasProfileAvailable();
+ if (Type == SAMPLE) {
+ BasicSamples[FromOffset] += Counters[0];
+ return std::error_code();
+ }
+
+ const uint64_t ToOffset = Addrs[1]->Offset;
+ BinaryFunction *ToFunc = getBinaryFunctionContainingAddress(ToOffset);
+ if (ToFunc)
+ ToFunc->setHasProfileAvailable();
- uint64_t Count = static_cast<uint64_t>(Frequency.get());
+ int64_t Count = Counters[0];
+ int64_t Mispreds = Counters[1];
- Trace Trace(From->Offset, To->Offset);
+ Trace Trace(FromOffset, ToOffset);
// Taken trace
if (Type == TRACE || Type == BRANCH) {
TakenBranchInfo &Info = BranchLBRs[Trace];
@@ -1291,8 +1300,9 @@ std::error_code DataAggregator::parseAggregatedLBREntry() {
}
// Construct fallthrough part of the trace
if (Type == TRACE) {
- Trace.From = To->Offset;
- Trace.To = TraceFtEnd->Offset;
+ const uint64_t TraceFtEndOffset = Addrs[2]->Offset;
+ Trace.From = ToOffset;
+ Trace.To = TraceFtEndOffset;
Type = FromFunc == ToFunc ? FT : FT_EXTERNAL_ORIGIN;
}
// Add fallthrough trace
diff --git a/bolt/test/X86/Inputs/pre-aggregated-basic.txt b/bolt/test/X86/Inputs/pre-aggregated-basic.txt
new file mode 100644
index 0000000000000..28bcacca70ee1
--- /dev/null
+++ b/bolt/test/X86/Inputs/pre-aggregated-basic.txt
@@ -0,0 +1,18 @@
+S 4005f0 1
+S 4005f0 1
+S 400610 1
+S 400ad1 2
+S 400b10 1
+S 400bb7 1
+S 400bbc 2
+S 400d90 1
+S 400dae 1
+S 400e00 2
+S 401170 22
+S 401180 58
+S 4011a0 33
+S 4011a9 33
+S 4011ad 58
+S 4011b2 22
+S X:7f36d18d60c0 2
+S X:7f36d18f2ce0 1
diff --git a/bolt/test/X86/pre-aggregated-perf.test b/bolt/test/X86/pre-aggregated-perf.test
index c05a06bf74945..63488a9062bdd 100644
--- a/bolt/test/X86/pre-aggregated-perf.test
+++ b/bolt/test/X86/pre-aggregated-perf.test
@@ -57,6 +57,14 @@ RUN: llvm-bolt %t.exe -o %t.bolt.yaml --pa -p %p/Inputs/pre-aggregated.txt \
RUN: --aggregate-only --profile-format=yaml --profile-use-dfs
RUN: cat %t.bolt.yaml | FileCheck %s -check-prefix=NEWFORMAT
+## Test pre-aggregated basic profile
+RUN: perf2bolt %t.exe -o %t --pa -p %p/Inputs/pre-aggregated-basic.txt -o %t.ba \
+RUN: 2>&1 | FileCheck %s --check-prefix=BASIC-ERROR
+RUN: perf2bolt %t.exe -o %t --pa -p %p/Inputs/pre-aggregated-basic.txt -o %t.ba \
+RUN: -nl 2>&1 | FileCheck %s --check-prefix=BASIC-SUCCESS
+BASIC-ERROR: BOLT-INFO: 0 out of 7 functions in the binary (0.0%) have non-empty execution profile
+BASIC-SUCCESS: BOLT-INFO: 4 out of 7 functions in the binary (57.1%) have non-empty execution profile
+
PERF2BOLT: 0 [unknown] 7f36d18d60c0 1 main 53c 0 2
PERF2BOLT: 1 main 451 1 SolveCubic 0 0 2
PERF2BOLT: 1 main 490 0 [unknown] 4005f0 0 1
diff --git a/bolt/test/link_fdata.py b/bolt/test/link_fdata.py
index bcf9a777922d5..6a391c10b9481 100755
--- a/bolt/test/link_fdata.py
+++ b/bolt/test/link_fdata.py
@@ -36,9 +36,9 @@
fdata_pat = re.compile(r"([01].*) (?P<exec>\d+) (?P<mispred>\d+)")
# Pre-aggregated profile:
-# {T|B|F|f} [<start_id>:]<start_offset> [<end_id>:]<end_offset> [<ft_end>]
-# <count> [<mispred_count>]
-preagg_pat = re.compile(r"(?P<type>[TBFf]) (?P<offsets_count>.*)")
+# {T|S|B|F|f} <start> [<end>] [<ft_end>] <count> [<mispred_count>]
+# <loc>: [<id>:]<offset>
+preagg_pat = re.compile(r"(?P<type>[TSBFf]) (?P<offsets_count>.*)")
# No-LBR profile:
# <is symbol?> <closest elf symbol or DSO name> <relative address> <count>
|
✅ With the latest revision this PR passed the C/C++ code formatter. |
Created using spr 1.3.4
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
Add this suggestion to a batch that can be applied as a single commit.
This suggestion is invalid because no changes were made to the code.
Suggestions cannot be applied while the pull request is closed.
Suggestions cannot be applied while viewing a subset of changes.
Only one suggestion per line can be applied in a batch.
Add this suggestion to a batch that can be applied as a single commit.
Applying suggestions on deleted lines is not supported.
You must change the existing code in this line in order to create a valid suggestion.
Outdated suggestions cannot be applied.
This suggestion has been applied or marked resolved.
Suggestions cannot be applied from pending reviews.
Suggestions cannot be applied on multi-line comments.
Suggestions cannot be applied while the pull request is queued to merge.
Suggestion cannot be applied right now. Please check back later.
Define a pre-aggregated basic sample format:
-nl
flag is required to use parsed basic samples.Test Plan: update pre-aggregated-perf.test