Skip to content

[BOLT] Extend calculateEmittedSize() for block size calculation #73076

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 5 commits into from
Nov 23, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 3 additions & 0 deletions bolt/include/bolt/Core/BinaryContext.h
Original file line number Diff line number Diff line change
Expand Up @@ -1230,6 +1230,9 @@ class BinaryContext {
///
/// Return the pair where the first size is for the main part, and the second
/// size is for the cold one.
/// Modify BinaryBasicBlock::OutputAddressRange for each basic block in the
/// function in place so that BinaryBasicBlock::getOutputSize() gives the
/// emitted size of the basic block.
std::pair<size_t, size_t> calculateEmittedSize(BinaryFunction &BF,
bool FixBranches = true);

Expand Down
34 changes: 28 additions & 6 deletions bolt/lib/Core/BinaryContext.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -2331,14 +2331,36 @@ BinaryContext::calculateEmittedSize(BinaryFunction &BF, bool FixBranches) {
MCAsmLayout Layout(Assembler);
Assembler.layout(Layout);

// Obtain fragment sizes.
std::vector<uint64_t> FragmentSizes;
// Main fragment size.
const uint64_t HotSize =
Layout.getSymbolOffset(*EndLabel) - Layout.getSymbolOffset(*StartLabel);
const uint64_t ColdSize =
std::accumulate(SplitLabels.begin(), SplitLabels.end(), 0ULL,
[&](const uint64_t Accu, const LabelRange &Labels) {
return Accu + Layout.getSymbolOffset(*Labels.second) -
Layout.getSymbolOffset(*Labels.first);
});
FragmentSizes.push_back(HotSize);
// Split fragment sizes.
uint64_t ColdSize = 0;
for (const auto &Labels : SplitLabels) {
uint64_t Size = Layout.getSymbolOffset(*Labels.second) -
Layout.getSymbolOffset(*Labels.first);
FragmentSizes.push_back(Size);
ColdSize += Size;
}

// Populate new start and end offsets of each basic block.
uint64_t FragmentIndex = 0;
for (FunctionFragment &FF : BF.getLayout().fragments()) {
BinaryBasicBlock *PrevBB = nullptr;
for (BinaryBasicBlock *BB : FF) {
const uint64_t BBStartOffset = Layout.getSymbolOffset(*(BB->getLabel()));
BB->setOutputStartAddress(BBStartOffset);
if (PrevBB)
PrevBB->setOutputEndAddress(BBStartOffset);
PrevBB = BB;
}
if (PrevBB)
PrevBB->setOutputEndAddress(FragmentSizes[FragmentIndex]);
FragmentIndex++;
}

// Clean-up the effect of the code emission.
for (const MCSymbol &Symbol : Assembler.symbols()) {
Expand Down
12 changes: 12 additions & 0 deletions bolt/lib/Core/BinaryFunction.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -108,6 +108,13 @@ cl::opt<bool>
cl::desc("try to preserve basic block alignment"),
cl::cat(BoltOptCategory));

static cl::opt<bool> PrintOutputAddressRange(
"print-output-address-range",
cl::desc(
"print output address range for each basic block in the function when"
"BinaryFunction::print is called"),
cl::Hidden, cl::cat(BoltOptCategory));

cl::opt<bool>
PrintDynoStats("dyno-stats",
cl::desc("print execution info based on profile"),
Expand Down Expand Up @@ -510,6 +517,11 @@ void BinaryFunction::print(raw_ostream &OS, std::string Annotation) {
OS << BB->getName() << " (" << BB->size()
<< " instructions, align : " << BB->getAlignment() << ")\n";

if (opts::PrintOutputAddressRange)
OS << formatv(" Output Address Range: [{0:x}, {1:x}) ({2} bytes)\n",
BB->getOutputAddressRange().first,
BB->getOutputAddressRange().second, BB->getOutputSize());

if (isEntryPoint(*BB)) {
if (MCSymbol *EntrySymbol = getSecondaryEntryPointSymbol(*BB))
OS << " Secondary Entry Point: " << EntrySymbol->getName() << '\n';
Expand Down
101 changes: 101 additions & 0 deletions bolt/test/X86/calculate-emitted-block-size.s
Original file line number Diff line number Diff line change
@@ -0,0 +1,101 @@
# Test BinaryContext::calculateEmittedSize's functionality to update
# BinaryBasicBlock::OutputAddressRange in place so that the emitted size
# of each basic block is given by BinaryBasicBlock::getOutputSize()

# RUN: llvm-mc --filetype=obj --triple x86_64-unknown-unknown %s -o %t.o
# RUN: link_fdata %s %t.o %t.fdata
# RUN: llvm-strip --strip-unneeded %t.o
# RUN: %clang %cflags %t.o -o %t.exe -Wl,-q
# RUN: llvm-bolt %t.exe -o %t.bolt --split-functions --split-strategy=all \
# RUN: --print-split --print-only=chain --print-output-address-range \
# RUN: --data=%t.fdata --reorder-blocks=ext-tsp \
# RUN: 2>&1 | FileCheck --check-prefix=SPLITALL %s
# RUN: llvm-mc --filetype=obj --triple x86_64-unknown-unknown %s -o %t.o
# RUN: link_fdata %s %t.o %t.fdata
# RUN: llvm-strip --strip-unneeded %t.o
# RUN: %clang %cflags %t.o -o %t.exe -Wl,-q
# RUN: llvm-bolt %t.exe -o %t.bolt --split-functions --print-split \
# RUN: --print-only=chain --print-output-address-range \
# RUN: --data=%t.fdata --reorder-blocks=ext-tsp \
# RUN: 2>&1 | FileCheck --check-prefix=SPLITHOTCOLD %s

# SPLITALL: {{^\.LBB00}}
# SPLITALL: Output Address Range: [0x0, 0x12) (18 bytes)
# SPLITALL: {{^\.LFT0}}
# SPLITALL: Output Address Range: [0x0, 0xa) (10 bytes)
# SPLITALL: {{^\.Ltmp1}}
# SPLITALL: Output Address Range: [0x0, 0x2) (2 bytes)
# SPLITALL: {{^\.Ltmp0}}
# SPLITALL: Output Address Range: [0x0, 0x10) (16 bytes)
# SPLITALL: {{^\.Ltmp2}}
# SPLITALL: Output Address Range: [0x0, 0x8) (8 bytes)
# SPLITALL: {{^\.LFT1}}
# SPLITALL: Output Address Range: [0x0, 0x8) (8 bytes)

# SPLITHOTCOLD: {{^\.LBB00}}
# SPLITHOTCOLD: Output Address Range: [0x0, 0x9) (9 bytes)
# SPLITHOTCOLD: {{^\.LFT0}}
# SPLITHOTCOLD: Output Address Range: [0x9, 0xe) (5 bytes)
# SPLITHOTCOLD: {{^\.Ltmp1}}
# SPLITHOTCOLD: Output Address Range: [0xe, 0x10) (2 bytes)
# SPLITHOTCOLD: {{^\.Ltmp0}}
# SPLITHOTCOLD: Output Address Range: [0x10, 0x1b) (11 bytes)
# SPLITHOTCOLD: {{^\.Ltmp2}}
# SPLITHOTCOLD: Output Address Range: [0x1b, 0x20) (5 bytes)
# SPLITHOTCOLD: {{^\.LFT1}}
# SPLITHOTCOLD: Output Address Range: [0x0, 0x8) (8 bytes)

.text
.globl chain
.type chain, @function
chain:
pushq %rbp
movq %rsp, %rbp
cmpl $2, %edi
LLentry_LLchain_start:
jge LLchain_start
# FDATA: 1 chain #LLentry_LLchain_start# 1 chain #LLchain_start# 0 10
# FDATA: 1 chain #LLentry_LLchain_start# 1 chain #LLfast# 0 500
LLfast:
movl $5, %eax
LLfast_LLexit:
jmp LLexit
# FDATA: 1 chain #LLfast_LLexit# 1 chain #LLexit# 0 500
LLchain_start:
movl $10, %eax
LLchain_start_LLchain1:
jge LLchain1
# FDATA: 1 chain #LLchain_start_LLchain1# 1 chain #LLchain1# 0 10
# FDATA: 1 chain #LLchain_start_LLchain1# 1 chain #LLcold# 0 0
LLcold:
addl $1, %eax
LLchain1:
addl $1, %eax
LLchain1_LLexit:
jmp LLexit
# FDATA: 1 chain #LLchain1_LLexit# 1 chain #LLexit# 0 10
LLexit:
popq %rbp
ret
LLchain_end:
.size chain, LLchain_end-chain


.globl main
.type main, @function
main:
pushq %rbp
movq %rsp, %rbp
movl $1, %edi
LLmain_chain1:
call chain
# FDATA: 1 main #LLmain_chain1# 1 chain 0 0 500
movl $4, %edi
LLmain_chain2:
call chain
# FDATA: 1 main #LLmain_chain2# 1 chain 0 0 10
xorl %eax, %eax
popq %rbp
retq
.Lmain_end:
.size main, .Lmain_end-main