Skip to content

[BOLT] Add pseudo probe inline tree to YAML profile #107137

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
Merged
Show file tree
Hide file tree
Changes from 12 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
81 changes: 65 additions & 16 deletions bolt/include/bolt/Profile/ProfileYAMLMapping.h
Original file line number Diff line number Diff line change
Expand Up @@ -95,24 +95,29 @@ template <> struct MappingTraits<bolt::SuccessorInfo> {

namespace bolt {
struct PseudoProbeInfo {
llvm::yaml::Hex64 GUID;
uint64_t Index;
uint8_t Type;
uint32_t InlineTreeIndex = 0;
uint64_t BlockMask = 0; // bitset with probe indices from 1 to 64
std::vector<uint64_t> BlockProbes; // block probes with indices above 64
std::vector<uint64_t> CallProbes;
std::vector<uint64_t> IndCallProbes;
std::vector<uint32_t> InlineTreeNodes;

bool operator==(const PseudoProbeInfo &Other) const {
return GUID == Other.GUID && Index == Other.Index;
}
bool operator!=(const PseudoProbeInfo &Other) const {
return !(*this == Other);
return InlineTreeIndex == Other.InlineTreeIndex &&
BlockProbes == Other.BlockProbes && CallProbes == Other.CallProbes &&
IndCallProbes == Other.IndCallProbes;
}
};
} // end namespace bolt

template <> struct MappingTraits<bolt::PseudoProbeInfo> {
static void mapping(IO &YamlIO, bolt::PseudoProbeInfo &PI) {
YamlIO.mapRequired("guid", PI.GUID);
YamlIO.mapRequired("id", PI.Index);
YamlIO.mapRequired("type", PI.Type);
YamlIO.mapOptional("blx", PI.BlockMask, 0);
YamlIO.mapOptional("blk", PI.BlockProbes, std::vector<uint64_t>());
YamlIO.mapOptional("call", PI.CallProbes, std::vector<uint64_t>());
YamlIO.mapOptional("icall", PI.IndCallProbes, std::vector<uint64_t>());
YamlIO.mapOptional("id", PI.InlineTreeIndex, 0);
YamlIO.mapOptional("ids", PI.InlineTreeNodes, std::vector<uint32_t>());
}

static const bool flow = true;
Expand Down Expand Up @@ -158,15 +163,35 @@ template <> struct MappingTraits<bolt::BinaryBasicBlockProfile> {
std::vector<bolt::CallSiteInfo>());
YamlIO.mapOptional("succ", BBP.Successors,
std::vector<bolt::SuccessorInfo>());
YamlIO.mapOptional("pseudo_probes", BBP.PseudoProbes,
YamlIO.mapOptional("probes", BBP.PseudoProbes,
std::vector<bolt::PseudoProbeInfo>());
}
};

namespace bolt {
struct InlineTreeNode {
uint32_t ParentIndexDelta;
uint32_t CallSiteProbe;
// Index in PseudoProbeDesc.GUID, UINT32_MAX for same as previous (omitted)
uint32_t GUIDIndex;
bool operator==(const InlineTreeNode &) const { return false; }
};
} // end namespace bolt

template <> struct MappingTraits<bolt::InlineTreeNode> {
static void mapping(IO &YamlIO, bolt::InlineTreeNode &ITI) {
YamlIO.mapOptional("g", ITI.GUIDIndex, UINT32_MAX);
YamlIO.mapOptional("p", ITI.ParentIndexDelta, 0);
YamlIO.mapOptional("cs", ITI.CallSiteProbe, 0);
}

static const bool flow = true;
};
} // end namespace yaml
} // end namespace llvm

LLVM_YAML_IS_SEQUENCE_VECTOR(llvm::yaml::bolt::BinaryBasicBlockProfile)
LLVM_YAML_IS_FLOW_SEQUENCE_VECTOR(llvm::yaml::bolt::InlineTreeNode)

namespace llvm {
namespace yaml {
Expand All @@ -179,8 +204,7 @@ struct BinaryFunctionProfile {
llvm::yaml::Hex64 Hash{0};
uint64_t ExecCount{0};
std::vector<BinaryBasicBlockProfile> Blocks;
llvm::yaml::Hex64 GUID{0};
llvm::yaml::Hex64 PseudoProbeDescHash{0};
std::vector<InlineTreeNode> InlineTree;
bool Used{false};
};
} // end namespace bolt
Expand All @@ -194,9 +218,8 @@ template <> struct MappingTraits<bolt::BinaryFunctionProfile> {
YamlIO.mapRequired("nblocks", BFP.NumBasicBlocks);
YamlIO.mapOptional("blocks", BFP.Blocks,
std::vector<bolt::BinaryBasicBlockProfile>());
YamlIO.mapOptional("guid", BFP.GUID, (uint64_t)0);
YamlIO.mapOptional("pseudo_probe_desc_hash", BFP.PseudoProbeDescHash,
(uint64_t)0);
YamlIO.mapOptional("inline_tree", BFP.InlineTree,
std::vector<bolt::InlineTreeNode>());
}
};

Expand Down Expand Up @@ -246,10 +269,33 @@ template <> struct MappingTraits<bolt::BinaryProfileHeader> {
}
};

namespace bolt {
struct PseudoProbeDesc {
std::vector<Hex64> GUID;
std::vector<Hex64> Hash;
std::vector<uint32_t> GUIDHashIdx; // Index of hash for that GUID in Hash

bool operator==(const PseudoProbeDesc &Other) const {
// Only treat empty Desc as equal
return GUID.empty() && Other.GUID.empty() && Hash.empty() &&
Other.Hash.empty() && GUIDHashIdx.empty() &&
Other.GUIDHashIdx.empty();
}
};
} // end namespace bolt

template <> struct MappingTraits<bolt::PseudoProbeDesc> {
static void mapping(IO &YamlIO, bolt::PseudoProbeDesc &PD) {
YamlIO.mapRequired("gs", PD.GUID);
YamlIO.mapRequired("gh", PD.GUIDHashIdx);
YamlIO.mapRequired("hs", PD.Hash);
}
};
} // end namespace yaml
} // end namespace llvm

LLVM_YAML_IS_SEQUENCE_VECTOR(llvm::yaml::bolt::BinaryFunctionProfile)
LLVM_YAML_IS_SEQUENCE_VECTOR(llvm::yaml::bolt::PseudoProbeDesc)

namespace llvm {
namespace yaml {
Expand All @@ -258,13 +304,16 @@ namespace bolt {
struct BinaryProfile {
BinaryProfileHeader Header;
std::vector<BinaryFunctionProfile> Functions;
PseudoProbeDesc PseudoProbeDesc;
};
} // namespace bolt

template <> struct MappingTraits<bolt::BinaryProfile> {
static void mapping(IO &YamlIO, bolt::BinaryProfile &BP) {
YamlIO.mapRequired("header", BP.Header);
YamlIO.mapRequired("functions", BP.Functions);
YamlIO.mapOptional("pseudo_probe_desc", BP.PseudoProbeDesc,
bolt::PseudoProbeDesc());
}
};

Expand Down
52 changes: 51 additions & 1 deletion bolt/include/bolt/Profile/YAMLProfileWriter.h
Original file line number Diff line number Diff line change
Expand Up @@ -32,8 +32,27 @@ class YAMLProfileWriter {
/// Save execution profile for that instance.
std::error_code writeProfile(const RewriteInstance &RI);

using InlineTreeMapTy =
DenseMap<const MCDecodedPseudoProbeInlineTree *, uint32_t>;
struct InlineTreeDesc {
template <typename T> using GUIDMapTy = std::unordered_map<uint64_t, T>;
using GUIDNodeMap = GUIDMapTy<const MCDecodedPseudoProbeInlineTree *>;
using GUIDNumMap = GUIDMapTy<uint32_t>;
GUIDNodeMap TopLevelGUIDToInlineTree;
GUIDNumMap GUIDIdxMap;
GUIDNumMap HashIdxMap;
};

static std::tuple<std::vector<yaml::bolt::InlineTreeNode>, InlineTreeMapTy>
convertBFInlineTree(const MCPseudoProbeDecoder &Decoder,
const InlineTreeDesc &InlineTree, uint64_t GUID);

static std::tuple<yaml::bolt::PseudoProbeDesc, InlineTreeDesc>
convertPseudoProbeDesc(const MCPseudoProbeDecoder &PseudoProbeDecoder);

static yaml::bolt::BinaryFunctionProfile
convert(const BinaryFunction &BF, bool UseDFS,
const InlineTreeDesc &InlineTree,
const BoltAddressTranslation *BAT = nullptr);

/// Set CallSiteInfo destination fields from \p Symbol and return a target
Expand All @@ -42,8 +61,39 @@ class YAMLProfileWriter {
setCSIDestination(const BinaryContext &BC, yaml::bolt::CallSiteInfo &CSI,
const MCSymbol *Symbol, const BoltAddressTranslation *BAT,
uint32_t Offset = 0);
};

private:
struct InlineTreeNode {
const MCDecodedPseudoProbeInlineTree *InlineTree;
uint64_t GUID;
uint64_t Hash;
uint32_t ParentId;
uint32_t InlineSite;
};
static std::vector<InlineTreeNode>
collectInlineTree(const MCPseudoProbeDecoder &Decoder,
const MCDecodedPseudoProbeInlineTree &Root);

// 0 - block probe, 1 - indirect call, 2 - direct call
using ProbeList = std::array<SmallVector<uint64_t, 0>, 3>;
using NodeIdToProbes = DenseMap<uint32_t, ProbeList>;
static std::vector<yaml::bolt::PseudoProbeInfo>
convertNodeProbes(NodeIdToProbes &NodeProbes);

public:
template <typename T>
static std::vector<yaml::bolt::PseudoProbeInfo>
writeBlockProbes(T Probes, const InlineTreeMapTy &InlineTreeNodeId) {
NodeIdToProbes NodeProbes;
for (const MCDecodedPseudoProbe &Probe : Probes) {
auto It = InlineTreeNodeId.find(Probe.getInlineTreeNode());
if (It == InlineTreeNodeId.end())
continue;
NodeProbes[It->second][Probe.getType()].emplace_back(Probe.getIndex());
}
return convertNodeProbes(NodeProbes);
}
};
} // namespace bolt
} // namespace llvm

Expand Down
5 changes: 5 additions & 0 deletions bolt/include/bolt/Utils/Utils.h
Original file line number Diff line number Diff line change
Expand Up @@ -41,6 +41,11 @@ std::string getEscapedName(const StringRef &Name);
/// Return the unescaped name
std::string getUnescapedName(const StringRef &Name);

/// Return a common part for a given \p Name wrt a given \p Suffixes list.
/// Preserve the suffix if \p KeepSuffix is set, only dropping characters
/// following it, otherwise drop the suffix as well.
std::optional<StringRef> getCommonName(const StringRef Name, bool KeepSuffix,
ArrayRef<StringRef> Suffixes);
/// LTO-generated function names take a form:
///
/// <function_name>.lto_priv.<decimal_number>/...
Expand Down
51 changes: 35 additions & 16 deletions bolt/lib/Profile/DataAggregator.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -88,7 +88,7 @@ MaxSamples("max-samples",
cl::cat(AggregatorCategory));

extern cl::opt<opts::ProfileFormatKind> ProfileFormat;
extern cl::opt<bool> ProfileUsePseudoProbes;
extern cl::opt<bool> ProfileWritePseudoProbes;
extern cl::opt<std::string> SaveProfile;

cl::opt<bool> ReadPreAggregated(
Expand Down Expand Up @@ -2300,7 +2300,7 @@ std::error_code DataAggregator::writeBATYAML(BinaryContext &BC,
yaml::bolt::BinaryProfile BP;

const MCPseudoProbeDecoder *PseudoProbeDecoder =
opts::ProfileUsePseudoProbes ? BC.getPseudoProbeDecoder() : nullptr;
opts::ProfileWritePseudoProbes ? BC.getPseudoProbeDecoder() : nullptr;

// Fill out the header info.
BP.Header.Version = 1;
Expand All @@ -2321,6 +2321,12 @@ std::error_code DataAggregator::writeBATYAML(BinaryContext &BC,
BP.Header.Flags = opts::BasicAggregation ? BinaryFunction::PF_SAMPLE
: BinaryFunction::PF_LBR;

// Add probe inline tree nodes.
YAMLProfileWriter::InlineTreeDesc InlineTree;
if (PseudoProbeDecoder)
std::tie(BP.PseudoProbeDesc, InlineTree) =
YAMLProfileWriter::convertPseudoProbeDesc(*PseudoProbeDecoder);

if (!opts::BasicAggregation) {
// Convert profile for functions not covered by BAT
for (auto &BFI : BC.getBinaryFunctions()) {
Expand All @@ -2329,8 +2335,8 @@ std::error_code DataAggregator::writeBATYAML(BinaryContext &BC,
continue;
if (BAT->isBATFunction(Function.getAddress()))
continue;
BP.Functions.emplace_back(
YAMLProfileWriter::convert(Function, /*UseDFS=*/false, BAT));
BP.Functions.emplace_back(YAMLProfileWriter::convert(
Function, /*UseDFS=*/false, InlineTree, BAT));
}

for (const auto &KV : NamesToBranches) {
Expand Down Expand Up @@ -2403,16 +2409,22 @@ std::error_code DataAggregator::writeBATYAML(BinaryContext &BC,
YamlBF.Blocks[BlockIndex].ExecCount += BI.Branches;
}
if (PseudoProbeDecoder) {
if ((YamlBF.GUID = BF->getGUID())) {
const MCPseudoProbeFuncDesc *FuncDesc =
PseudoProbeDecoder->getFuncDescForGUID(YamlBF.GUID);
YamlBF.PseudoProbeDescHash = FuncDesc->FuncHash;
DenseMap<const MCDecodedPseudoProbeInlineTree *, uint32_t>
InlineTreeNodeId;
if (BF->getGUID()) {
std::tie(YamlBF.InlineTree, InlineTreeNodeId) =
YAMLProfileWriter::convertBFInlineTree(*PseudoProbeDecoder,
InlineTree, BF->getGUID());
}
// Fetch probes belonging to all fragments
const AddressProbesMap &ProbeMap =
PseudoProbeDecoder->getAddress2ProbesMap();
BinaryFunction::FragmentsSetTy Fragments(BF->Fragments);
Fragments.insert(BF);
DenseMap<
uint32_t,
std::vector<std::reference_wrapper<const MCDecodedPseudoProbe>>>
BlockProbes;
for (const BinaryFunction *F : Fragments) {
const uint64_t FuncAddr = F->getAddress();
for (const MCDecodedPseudoProbe &Probe :
Expand All @@ -2421,17 +2433,24 @@ std::error_code DataAggregator::writeBATYAML(BinaryContext &BC,
const uint32_t InputOffset = BAT->translate(
FuncAddr, OutputAddress - FuncAddr, /*IsBranchSrc=*/true);
const unsigned BlockIndex = getBlock(InputOffset).second;
YamlBF.Blocks[BlockIndex].PseudoProbes.emplace_back(
yaml::bolt::PseudoProbeInfo{Probe.getGuid(), Probe.getIndex(),
Probe.getType()});
BlockProbes[BlockIndex].emplace_back(Probe);
}
}

for (auto &[Block, Probes] : BlockProbes) {
YamlBF.Blocks[Block].PseudoProbes =
YAMLProfileWriter::writeBlockProbes(Probes, InlineTreeNodeId);
}
}
// Drop blocks without a hash, won't be useful for stale matching.
llvm::erase_if(YamlBF.Blocks,
[](const yaml::bolt::BinaryBasicBlockProfile &YamlBB) {
return YamlBB.Hash == (yaml::Hex64)0;
});
// Skip printing if there's no profile data
llvm::erase_if(
YamlBF.Blocks, [](const yaml::bolt::BinaryBasicBlockProfile &YamlBB) {
auto HasCount = [](const auto &SI) { return SI.Count; };
bool HasAnyCount = YamlBB.ExecCount ||
llvm::any_of(YamlBB.Successors, HasCount) ||
llvm::any_of(YamlBB.CallSites, HasCount);
return !HasAnyCount;
});
BP.Functions.emplace_back(YamlBF);
}
}
Expand Down
5 changes: 0 additions & 5 deletions bolt/lib/Profile/YAMLProfileReader.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -49,11 +49,6 @@ llvm::cl::opt<bool>
llvm::cl::opt<bool> ProfileUseDFS("profile-use-dfs",
cl::desc("use DFS order for YAML profile"),
cl::Hidden, cl::cat(BoltOptCategory));

llvm::cl::opt<bool> ProfileUsePseudoProbes(
"profile-use-pseudo-probes",
cl::desc("Use pseudo probes for profile generation and matching"),
cl::Hidden, cl::cat(BoltOptCategory));
} // namespace opts

namespace llvm {
Expand Down
Loading
Loading