Skip to content

Commit 5a29887

Browse files
authored
[BOLT] Add writing support for Linux kernel ORC (#80950)
Update ORC information based on the new code layout and emit corresponding ORC sections for the Linux kernel. We rewrite ORC sections in place, which puts a limit on the size of new section contents. Since ORC info changes for the new code layout and the number of ORC entries can become larger, we free up space in the tables by removing redundant ORC terminators. As a result, we effectively emit fewer entries and have to add duplicate terminators at the end to match the original section sizes. Ideally, we need to update ORC boundaries to reflect the reduced size and optimize runtime lookup, but we will need relocations for this, and the benefits will be marginal, if any.
1 parent fde4b80 commit 5a29887

File tree

2 files changed

+239
-38
lines changed

2 files changed

+239
-38
lines changed

bolt/lib/Rewrite/LinuxKernelRewriter.cpp

Lines changed: 196 additions & 27 deletions
Original file line numberDiff line numberDiff line change
@@ -14,9 +14,13 @@
1414
#include "bolt/Rewrite/MetadataRewriter.h"
1515
#include "bolt/Rewrite/MetadataRewriters.h"
1616
#include "bolt/Utils/CommandLineOpts.h"
17+
#include "llvm/Support/BinaryStreamWriter.h"
1718
#include "llvm/Support/CommandLine.h"
19+
#include "llvm/Support/Debug.h"
1820
#include "llvm/Support/Errc.h"
1921

22+
#define DEBUG_TYPE "bolt-linux"
23+
2024
using namespace llvm;
2125
using namespace bolt;
2226

@@ -48,20 +52,25 @@ struct ORCState {
4852
bool operator!=(const ORCState &Other) const { return !(*this == Other); }
4953
};
5054

55+
/// Section terminator ORC entry.
56+
static ORCState NullORC = {0, 0, 0};
57+
5158
/// Basic printer for ORC entry. It does not provide the same level of
5259
/// information as objtool (for now).
5360
inline raw_ostream &operator<<(raw_ostream &OS, const ORCState &E) {
54-
if (opts::PrintORC)
61+
if (!opts::PrintORC)
62+
return OS;
63+
if (E != NullORC)
5564
OS << format("{sp: %d, bp: %d, info: 0x%x}", E.SPOffset, E.BPOffset,
5665
E.Info);
66+
else
67+
OS << "{terminator}";
68+
5769
return OS;
5870
}
5971

6072
namespace {
6173

62-
/// Section terminator ORC entry.
63-
static ORCState NullORC = {0, 0, 0};
64-
6574
class LinuxKernelRewriter final : public MetadataRewriter {
6675
/// Linux Kernel special sections point to a specific instruction in many
6776
/// cases. Unlike SDTMarkerInfo, these markers can come from different
@@ -90,6 +99,8 @@ class LinuxKernelRewriter final : public MetadataRewriter {
9099
BinaryFunction *BF; /// Binary function corresponding to the entry.
91100
ORCState ORC; /// Stack unwind info in ORC format.
92101

102+
/// ORC entries are sorted by their IPs. Terminator entries (NullORC)
103+
/// should precede other entries with the same address.
93104
bool operator<(const ORCListEntry &Other) const {
94105
if (IP < Other.IP)
95106
return 1;
@@ -102,6 +113,9 @@ class LinuxKernelRewriter final : public MetadataRewriter {
102113
using ORCListType = std::vector<ORCListEntry>;
103114
ORCListType ORCEntries;
104115

116+
/// Number of entries in the input file ORC sections.
117+
uint64_t NumORCEntries = 0;
118+
105119
/// Insert an LKMarker for a given code pointer \p PC from a non-code section
106120
/// \p SectionName.
107121
void insertLKMarker(uint64_t PC, uint64_t SectionOffset,
@@ -464,10 +478,9 @@ Error LinuxKernelRewriter::readORCTables() {
464478
return createStringError(errc::executable_format_error,
465479
"missing ORC section");
466480

467-
const uint64_t NumEntries =
468-
ORCUnwindIPSection->getSize() / ORC_UNWIND_IP_ENTRY_SIZE;
469-
if (ORCUnwindSection->getSize() != NumEntries * ORC_UNWIND_ENTRY_SIZE ||
470-
ORCUnwindIPSection->getSize() != NumEntries * ORC_UNWIND_IP_ENTRY_SIZE)
481+
NumORCEntries = ORCUnwindIPSection->getSize() / ORC_UNWIND_IP_ENTRY_SIZE;
482+
if (ORCUnwindSection->getSize() != NumORCEntries * ORC_UNWIND_ENTRY_SIZE ||
483+
ORCUnwindIPSection->getSize() != NumORCEntries * ORC_UNWIND_IP_ENTRY_SIZE)
471484
return createStringError(errc::executable_format_error,
472485
"ORC entries number mismatch detected");
473486

@@ -481,7 +494,7 @@ Error LinuxKernelRewriter::readORCTables() {
481494
DataExtractor::Cursor ORCCursor(0);
482495
DataExtractor::Cursor IPCursor(0);
483496
uint64_t PrevIP = 0;
484-
for (uint32_t Index = 0; Index < NumEntries; ++Index) {
497+
for (uint32_t Index = 0; Index < NumORCEntries; ++Index) {
485498
const uint64_t IP =
486499
IPSectionAddress + IPCursor.tell() + (int32_t)IPDE.getU32(IPCursor);
487500

@@ -505,35 +518,31 @@ Error LinuxKernelRewriter::readORCTables() {
505518
Entry.ORC.SPOffset = (int16_t)OrcDE.getU16(ORCCursor);
506519
Entry.ORC.BPOffset = (int16_t)OrcDE.getU16(ORCCursor);
507520
Entry.ORC.Info = (int16_t)OrcDE.getU16(ORCCursor);
521+
Entry.BF = nullptr;
508522

509523
// Consume the status of the cursor.
510524
if (!ORCCursor)
511525
return createStringError(errc::executable_format_error,
512526
"out of bounds while reading ORC");
513527

528+
if (Entry.ORC == NullORC)
529+
continue;
530+
514531
BinaryFunction *&BF = Entry.BF;
515532
BF = BC.getBinaryFunctionContainingAddress(IP, /*CheckPastEnd*/ true);
516533

517534
// If the entry immediately pointing past the end of the function is not
518535
// the terminator entry, then it does not belong to this function.
519-
if (BF && BF->getAddress() + BF->getSize() == IP && Entry.ORC != NullORC)
536+
if (BF && BF->getAddress() + BF->getSize() == IP)
520537
BF = 0;
521538

522-
// If terminator entry points to the start of the function, then it belongs
523-
// to a different function that contains the previous IP.
524-
if (BF && BF->getAddress() == IP && Entry.ORC == NullORC)
525-
BF = BC.getBinaryFunctionContainingAddress(IP - 1);
526-
527539
if (!BF) {
528540
if (opts::Verbosity)
529541
errs() << "BOLT-WARNING: no binary function found matching ORC 0x"
530542
<< Twine::utohexstr(IP) << ": " << Entry.ORC << '\n';
531543
continue;
532544
}
533545

534-
if (Entry.ORC == NullORC)
535-
continue;
536-
537546
BF->setHasORC(true);
538547

539548
if (!BF->hasInstructions())
@@ -556,9 +565,7 @@ Error LinuxKernelRewriter::readORCTables() {
556565
BC.MIB->addAnnotation(*Inst, "ORC", Entry.ORC);
557566
}
558567

559-
// Older kernels could contain unsorted tables in the file as the tables were
560-
// sorted during boot time.
561-
llvm::sort(ORCEntries);
568+
outs() << "BOLT-INFO: parsed " << NumORCEntries << " ORC entries\n";
562569

563570
if (opts::DumpORC) {
564571
outs() << "BOLT-INFO: ORC unwind information:\n";
@@ -570,10 +577,51 @@ Error LinuxKernelRewriter::readORCTables() {
570577
}
571578
}
572579

580+
// Add entries for functions that don't have explicit ORC info at the start.
581+
// We'll have the correct info for them even if ORC for the preceding function
582+
// changes.
583+
ORCListType NewEntries;
584+
for (BinaryFunction &BF : llvm::make_second_range(BC.getBinaryFunctions())) {
585+
auto It = llvm::partition_point(ORCEntries, [&](const ORCListEntry &E) {
586+
return E.IP <= BF.getAddress();
587+
});
588+
if (It != ORCEntries.begin())
589+
--It;
590+
591+
if (It->BF == &BF)
592+
continue;
593+
594+
if (It->ORC == NullORC && It->IP == BF.getAddress()) {
595+
assert(!It->BF);
596+
It->BF = &BF;
597+
continue;
598+
}
599+
600+
NewEntries.push_back({BF.getAddress(), &BF, It->ORC});
601+
if (It->ORC != NullORC)
602+
BF.setHasORC(true);
603+
}
604+
605+
llvm::copy(NewEntries, std::back_inserter(ORCEntries));
606+
llvm::sort(ORCEntries);
607+
608+
if (opts::DumpORC) {
609+
outs() << "BOLT-INFO: amended ORC unwind information:\n";
610+
for (const ORCListEntry &E : ORCEntries) {
611+
outs() << "0x" << Twine::utohexstr(E.IP) << ": " << E.ORC;
612+
if (E.BF)
613+
outs() << ": " << *E.BF;
614+
outs() << '\n';
615+
}
616+
}
617+
573618
return Error::success();
574619
}
575620

576621
Error LinuxKernelRewriter::processORCPostCFG() {
622+
if (!NumORCEntries)
623+
return Error::success();
624+
577625
// Propagate ORC to the rest of the function. We can annotate every
578626
// instruction in every function, but to minimize the overhead, we annotate
579627
// the first instruction in every basic block to reflect the state at the
@@ -593,19 +641,28 @@ Error LinuxKernelRewriter::processORCPostCFG() {
593641
continue;
594642
}
595643

596-
// In case there was no ORC entry that matched the function start
597-
// address, we need to propagate ORC state from the previous entry.
644+
// Get state for the start of the function.
598645
if (!CurrentState) {
646+
// A terminator entry (NullORC) can match the function address. If
647+
// there's also a non-terminator entry, it will be placed after the
648+
// terminator. Hence, we are looking for the last ORC entry that
649+
// matches the address.
599650
auto It =
600651
llvm::partition_point(ORCEntries, [&](const ORCListEntry &E) {
601-
return E.IP < BF.getAddress();
652+
return E.IP <= BF.getAddress();
602653
});
603654
if (It != ORCEntries.begin())
604-
It = std::prev(It);
655+
--It;
656+
657+
assert(It->IP == BF.getAddress() && (!It->BF || It->BF == &BF) &&
658+
"ORC info at function entry expected.");
605659

606-
if (It->ORC == NullORC && BF.hasORC())
660+
if (It->ORC == NullORC && BF.hasORC()) {
607661
errs() << "BOLT-WARNING: ORC unwind info excludes prologue for "
608662
<< BF << '\n';
663+
}
664+
665+
It->BF = &BF;
609666

610667
CurrentState = It->ORC;
611668
if (It->ORC != NullORC)
@@ -623,9 +680,121 @@ Error LinuxKernelRewriter::processORCPostCFG() {
623680
}
624681

625682
Error LinuxKernelRewriter::rewriteORCTables() {
626-
// TODO:
683+
if (!NumORCEntries)
684+
return Error::success();
685+
686+
// Update ORC sections in-place. As we change the code, the number of ORC
687+
// entries may increase for some functions. However, as we remove terminator
688+
// redundancy (see below), more space is freed up and we should always be able
689+
// to fit new ORC tables in the reserved space.
690+
auto createInPlaceWriter = [&](BinarySection &Section) -> BinaryStreamWriter {
691+
const size_t Size = Section.getSize();
692+
uint8_t *NewContents = new uint8_t[Size];
693+
Section.updateContents(NewContents, Size);
694+
Section.setOutputFileOffset(Section.getInputFileOffset());
695+
return BinaryStreamWriter({NewContents, Size}, BC.AsmInfo->isLittleEndian()
696+
? endianness::little
697+
: endianness::big);
698+
};
699+
BinaryStreamWriter UnwindWriter = createInPlaceWriter(*ORCUnwindSection);
700+
BinaryStreamWriter UnwindIPWriter = createInPlaceWriter(*ORCUnwindIPSection);
701+
702+
uint64_t NumEmitted = 0;
703+
std::optional<ORCState> LastEmittedORC;
704+
auto emitORCEntry = [&](const uint64_t IP, const ORCState &ORC,
705+
MCSymbol *Label = 0, bool Force = false) -> Error {
706+
if (LastEmittedORC && ORC == *LastEmittedORC && !Force)
707+
return Error::success();
708+
709+
LastEmittedORC = ORC;
710+
711+
if (++NumEmitted > NumORCEntries)
712+
return createStringError(errc::executable_format_error,
713+
"exceeded the number of allocated ORC entries");
714+
715+
if (Label)
716+
ORCUnwindIPSection->addRelocation(UnwindIPWriter.getOffset(), Label,
717+
Relocation::getPC32(), /*Addend*/ 0);
718+
719+
const int32_t IPValue =
720+
IP - ORCUnwindIPSection->getAddress() - UnwindIPWriter.getOffset();
721+
if (Error E = UnwindIPWriter.writeInteger(IPValue))
722+
return E;
723+
724+
if (Error E = UnwindWriter.writeInteger(ORC.SPOffset))
725+
return E;
726+
if (Error E = UnwindWriter.writeInteger(ORC.BPOffset))
727+
return E;
728+
if (Error E = UnwindWriter.writeInteger(ORC.Info))
729+
return E;
730+
731+
return Error::success();
732+
};
733+
734+
// Emit new ORC entries for the emitted function.
735+
auto emitORC = [&](const BinaryFunction &BF) -> Error {
736+
assert(!BF.isSplit() && "Split functions not supported by ORC writer yet.");
737+
738+
ORCState CurrentState = NullORC;
739+
for (BinaryBasicBlock *BB : BF.getLayout().blocks()) {
740+
for (MCInst &Inst : *BB) {
741+
ErrorOr<ORCState> ErrorOrState =
742+
BC.MIB->tryGetAnnotationAs<ORCState>(Inst, "ORC");
743+
if (!ErrorOrState || *ErrorOrState == CurrentState)
744+
continue;
745+
746+
// Issue label for the instruction.
747+
MCSymbol *Label = BC.MIB->getLabel(Inst);
748+
if (!Label) {
749+
Label = BC.Ctx->createTempSymbol("__ORC_");
750+
BC.MIB->setLabel(Inst, Label);
751+
}
752+
753+
if (Error E = emitORCEntry(0, *ErrorOrState, Label))
754+
return E;
755+
756+
CurrentState = *ErrorOrState;
757+
}
758+
}
759+
760+
return Error::success();
761+
};
762+
763+
for (ORCListEntry &Entry : ORCEntries) {
764+
// Emit original entries for functions that we haven't modified.
765+
if (!Entry.BF || !BC.shouldEmit(*Entry.BF)) {
766+
// Emit terminator only if it marks the start of a function.
767+
if (Entry.ORC == NullORC && !Entry.BF)
768+
continue;
769+
if (Error E = emitORCEntry(Entry.IP, Entry.ORC))
770+
return E;
771+
continue;
772+
}
773+
774+
// Emit all ORC entries for a function referenced by an entry and skip over
775+
// the rest of entries for this function by resetting its ORC attribute.
776+
if (Entry.BF->hasORC()) {
777+
if (Error E = emitORC(*Entry.BF))
778+
return E;
779+
Entry.BF->setHasORC(false);
780+
}
781+
}
782+
783+
LLVM_DEBUG(dbgs() << "BOLT-DEBUG: emitted " << NumEmitted
784+
<< " ORC entries\n");
785+
786+
// Replicate terminator entry at the end of sections to match the original
787+
// table sizes.
788+
const BinaryFunction &LastBF = BC.getBinaryFunctions().rbegin()->second;
789+
const uint64_t LastIP = LastBF.getAddress() + LastBF.getMaxSize();
790+
while (UnwindWriter.bytesRemaining()) {
791+
if (Error E = emitORCEntry(LastIP, NullORC, nullptr, /*Force*/ true))
792+
return E;
793+
}
794+
627795
return Error::success();
628796
}
797+
629798
} // namespace
630799

631800
std::unique_ptr<MetadataRewriter>

0 commit comments

Comments
 (0)