[MemProf] Use radix tree for alloc contexts in bitcode summaries #117066

teresajohnson · 2024-11-20T22:02:16Z

Leverage the support added to represent allocation contexts in a more
compact way via a radix tree in the indexed profile to similarly reduce
sizes of the bitcode summaries.

For a large target, this reduced the size of the per-module summaries by
about 18% and in the distributed combined index files by 28%.

Leverage the support added to represent allocation contexts in a more compact way via a radix tree in the indexed profile to similarly reduce sizes of the bitcode summaries. For a large target, this reduced the size of the per-module summaries by about 18% and in the distributed combined index files by 28%.

llvmbot · 2024-11-20T22:02:50Z

@llvm/pr-subscribers-pgo

@llvm/pr-subscribers-lto

Author: Teresa Johnson (teresajohnson)

Changes

Leverage the support added to represent allocation contexts in a more
compact way via a radix tree in the indexed profile to similarly reduce
sizes of the bitcode summaries.

For a large target, this reduced the size of the per-module summaries by
about 18% and in the distributed combined index files by 28%.

Patch is 22.03 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/117066.diff

7 Files Affected:

(modified) llvm/include/llvm/Bitcode/LLVMBitCodes.h (+7-3)
(modified) llvm/lib/Bitcode/Reader/BitcodeAnalyzer.cpp (+1)
(modified) llvm/lib/Bitcode/Reader/BitcodeReader.cpp (+52-16)
(modified) llvm/lib/Bitcode/Writer/BitcodeWriter.cpp (+143-11)
(modified) llvm/lib/ProfileData/MemProf.cpp (+5)
(added) llvm/test/ThinLTO/X86/Inputs/memprof-old-alloc-context-summary.bc ()
(added) llvm/test/ThinLTO/X86/memprof-old-alloc-context-summary.ll (+28)

diff --git a/llvm/include/llvm/Bitcode/LLVMBitCodes.h b/llvm/include/llvm/Bitcode/LLVMBitCodes.h
index a0fb32f67e3858..41909a8fc1d590 100644
--- a/llvm/include/llvm/Bitcode/LLVMBitCodes.h
+++ b/llvm/include/llvm/Bitcode/LLVMBitCodes.h
@@ -307,12 +307,12 @@ enum GlobalValueSummarySymtabCodes {
   // [valueid, n x stackidindex]
   FS_PERMODULE_CALLSITE_INFO = 26,
   // Summary of per-module allocation memprof metadata.
-  // [nummib, nummib x (alloc type, numstackids, numstackids x stackidindex),
+  // [nummib, nummib x (alloc type, context radix tree index),
   // [nummib x (numcontext x total size)]?]
   FS_PERMODULE_ALLOC_INFO = 27,
   // Summary of combined index memprof callsite metadata.
-  // [valueid, numstackindices, numver,
-  //  numstackindices x stackidindex, numver x version]
+  // [valueid, context radix tree index, numver,
+  //  numver x version]
   FS_COMBINED_CALLSITE_INFO = 28,
   // Summary of combined index allocation memprof metadata.
   // [nummib, numver,
@@ -331,6 +331,10 @@ enum GlobalValueSummarySymtabCodes {
   // the entries must be in the exact same order as the corresponding sizes.
   // [nummib x (numcontext x full stack id)]
   FS_ALLOC_CONTEXT_IDS = 31,
+  // Linearized radix tree of allocation contexts. See the description above the
+  // CallStackRadixTreeBuilder class in ProfileData/MemProf.h for format.
+  // [n x entry]
+  FS_CONTEXT_RADIX_TREE_ARRAY = 32,
 };
 
 enum MetadataCodes {
diff --git a/llvm/lib/Bitcode/Reader/BitcodeAnalyzer.cpp b/llvm/lib/Bitcode/Reader/BitcodeAnalyzer.cpp
index 8f79ccdb9ff75f..032c0de3c7a00f 100644
--- a/llvm/lib/Bitcode/Reader/BitcodeAnalyzer.cpp
+++ b/llvm/lib/Bitcode/Reader/BitcodeAnalyzer.cpp
@@ -329,6 +329,7 @@ GetCodeName(unsigned CodeID, unsigned BlockID,
       STRINGIFY_CODE(FS, COMBINED_ALLOC_INFO)
       STRINGIFY_CODE(FS, STACK_IDS)
       STRINGIFY_CODE(FS, ALLOC_CONTEXT_IDS)
+      STRINGIFY_CODE(FS, CONTEXT_RADIX_TREE_ARRAY)
     }
   case bitc::METADATA_ATTACHMENT_ID:
     switch (CodeID) {
diff --git a/llvm/lib/Bitcode/Reader/BitcodeReader.cpp b/llvm/lib/Bitcode/Reader/BitcodeReader.cpp
index 3e6abacac27261..8472d23816a9a4 100644
--- a/llvm/lib/Bitcode/Reader/BitcodeReader.cpp
+++ b/llvm/lib/Bitcode/Reader/BitcodeReader.cpp
@@ -987,6 +987,10 @@ class ModuleSummaryIndexBitcodeReader : public BitcodeReaderBase {
   /// ids from the lists in the callsite and alloc entries to the index.
   std::vector<uint64_t> StackIds;
 
+  /// Linearized radix tree of allocation contexts. See the description above
+  /// the CallStackRadixTreeBuilder class in ProfileData/MemProf.h for format.
+  std::vector<uint64_t> RadixArray;
+
 public:
   ModuleSummaryIndexBitcodeReader(
       BitstreamCursor Stream, StringRef Strtab, ModuleSummaryIndex &TheIndex,
@@ -1013,6 +1017,8 @@ class ModuleSummaryIndexBitcodeReader : public BitcodeReaderBase {
                                        TypeIdCompatibleVtableInfo &TypeId);
   std::vector<FunctionSummary::ParamAccess>
   parseParamAccesses(ArrayRef<uint64_t> Record);
+  SmallVector<unsigned> parseAllocInfoContext(ArrayRef<uint64_t> Record,
+                                              unsigned &I);
 
   template <bool AllowNullValueInfo = false>
   std::pair<ValueInfo, GlobalValue::GUID>
@@ -7544,6 +7550,45 @@ void ModuleSummaryIndexBitcodeReader::parseTypeIdCompatibleVtableSummaryRecord(
     parseTypeIdCompatibleVtableInfo(Record, Slot, TypeId);
 }
 
+SmallVector<unsigned> ModuleSummaryIndexBitcodeReader::parseAllocInfoContext(
+    ArrayRef<uint64_t> Record, unsigned &I) {
+  SmallVector<unsigned> StackIdList;
+  // For backwards compatibility with old format before radix tree was
+  // used, simply see if we found a radix tree array record.
+  if (RadixArray.empty()) {
+    unsigned NumStackEntries = Record[I++];
+    assert(Record.size() - I >= NumStackEntries);
+    for (unsigned J = 0; J < NumStackEntries; J++) {
+      assert(Record[I] < StackIds.size());
+      StackIdList.push_back(
+          TheIndex.addOrGetStackIdIndex(StackIds[Record[I++]]));
+    }
+  } else {
+    unsigned RadixIndex = Record[I++];
+    // See the comments above CallStackRadixTreeBuilder in ProfileData/MemProf.h
+    // for a detailed description of the radix tree array format. Briefly, the
+    // first entry will be the number of frames, any negative values are the
+    // negative of the offset of the next frame, and otherwise the frames are in
+    // increasing linear order.
+    assert(RadixIndex < RadixArray.size());
+    unsigned NumStackIds = RadixArray[RadixIndex++];
+    while (NumStackIds--) {
+      assert(RadixIndex < RadixArray.size());
+      unsigned Elem = RadixArray[RadixIndex];
+      if (static_cast<std::make_signed_t<unsigned>>(Elem) < 0) {
+        RadixIndex = RadixIndex - Elem;
+        assert(RadixIndex < RadixArray.size());
+        Elem = RadixArray[RadixIndex];
+        // We shouldn't encounter a second offset in a row.
+        assert(static_cast<std::make_signed_t<unsigned>>(Elem) >= 0);
+      }
+      RadixIndex++;
+      StackIdList.push_back(TheIndex.addOrGetStackIdIndex(StackIds[Elem]));
+    }
+  }
+  return StackIdList;
+}
+
 static void setSpecialRefs(SmallVectorImpl<ValueInfo> &Refs, unsigned ROCnt,
                            unsigned WOCnt) {
   // Readonly and writeonly refs are in the end of the refs list.
@@ -8010,6 +8055,11 @@ Error ModuleSummaryIndexBitcodeReader::parseEntireSummary(unsigned ID) {
       break;
     }
 
+    case bitc::FS_CONTEXT_RADIX_TREE_ARRAY: { // [n x entry]
+      RadixArray = ArrayRef<uint64_t>(Record);
+      break;
+    }
+
     case bitc::FS_PERMODULE_CALLSITE_INFO: {
       unsigned ValueID = Record[0];
       SmallVector<unsigned> StackIdList;
@@ -8065,14 +8115,7 @@ Error ModuleSummaryIndexBitcodeReader::parseEntireSummary(unsigned ID) {
              (Version < 10 && I < Record.size())) {
         assert(Record.size() - I >= 2);
         AllocationType AllocType = (AllocationType)Record[I++];
-        unsigned NumStackEntries = Record[I++];
-        assert(Record.size() - I >= NumStackEntries);
-        SmallVector<unsigned> StackIdList;
-        for (unsigned J = 0; J < NumStackEntries; J++) {
-          assert(Record[I] < StackIds.size());
-          StackIdList.push_back(
-              TheIndex.addOrGetStackIdIndex(StackIds[Record[I++]]));
-        }
+        auto StackIdList = parseAllocInfoContext(Record, I);
         MIBs.push_back(MIBInfo(AllocType, std::move(StackIdList)));
       }
       // We either have nothing left or at least NumMIBs context size info
@@ -8123,14 +8166,7 @@ Error ModuleSummaryIndexBitcodeReader::parseEntireSummary(unsigned ID) {
       while (MIBsRead++ < NumMIBs) {
         assert(Record.size() - I >= 2);
         AllocationType AllocType = (AllocationType)Record[I++];
-        unsigned NumStackEntries = Record[I++];
-        assert(Record.size() - I >= NumStackEntries);
-        SmallVector<unsigned> StackIdList;
-        for (unsigned J = 0; J < NumStackEntries; J++) {
-          assert(Record[I] < StackIds.size());
-          StackIdList.push_back(
-              TheIndex.addOrGetStackIdIndex(StackIds[Record[I++]]));
-        }
+        auto StackIdList = parseAllocInfoContext(Record, I);
         MIBs.push_back(MIBInfo(AllocType, std::move(StackIdList)));
       }
       assert(Record.size() - I >= NumVersions);
diff --git a/llvm/lib/Bitcode/Writer/BitcodeWriter.cpp b/llvm/lib/Bitcode/Writer/BitcodeWriter.cpp
index 24a4c2e8303d5a..7f6deeec6cd1e8 100644
--- a/llvm/lib/Bitcode/Writer/BitcodeWriter.cpp
+++ b/llvm/lib/Bitcode/Writer/BitcodeWriter.cpp
@@ -60,6 +60,7 @@
 #include "llvm/MC/StringTableBuilder.h"
 #include "llvm/MC/TargetRegistry.h"
 #include "llvm/Object/IRSymtab.h"
+#include "llvm/ProfileData/MemProf.h"
 #include "llvm/Support/AtomicOrdering.h"
 #include "llvm/Support/Casting.h"
 #include "llvm/Support/CommandLine.h"
@@ -83,6 +84,7 @@
 #include <vector>
 
 using namespace llvm;
+using namespace llvm::memprof;
 
 static cl::opt<unsigned>
     IndexThreshold("bitcode-mdindex-threshold", cl::Hidden, cl::init(25),
@@ -231,7 +233,8 @@ class ModuleBitcodeWriterBase : public BitcodeWriterBase {
       SmallVector<uint64_t, 64> &NameVals, GlobalValueSummary *Summary,
       unsigned ValueID, unsigned FSCallsAbbrev, unsigned FSCallsProfileAbbrev,
       unsigned CallsiteAbbrev, unsigned AllocAbbrev, unsigned ContextIdAbbvId,
-      const Function &F);
+      const Function &F, DenseMap<CallStackId, LinearCallStackId> &CallStackPos,
+      CallStackId &CallStackCount);
   void writeModuleLevelReferences(const GlobalVariable &V,
                                   SmallVector<uint64_t, 64> &NameVals,
                                   unsigned FSModRefsAbbrev,
@@ -4195,12 +4198,58 @@ static void writeTypeIdCompatibleVtableSummaryRecord(
   }
 }
 
+// Adds the allocation contexts to the CallStacks map. We simply use the
+// size at the time the context was added as the CallStackId. This works because
+// when we look up the call stacks later on we process the function summaries
+// and their allocation records in the same exact order.
+static void collectMemProfCallStacks(
+    FunctionSummary *FS, std::function<LinearFrameId(unsigned)> GetStackIndex,
+    MapVector<CallStackId, llvm::SmallVector<LinearFrameId>> &CallStacks) {
+  // The interfaces in ProfileData/MemProf.h use a type alias for a stack frame
+  // id offset into the index of the full stack frames. The ModuleSummaryIndex
+  // currently uses unsigned. Make sure these stay in sync.
+  static_assert(std::is_same_v<LinearFrameId, unsigned>);
+  for (auto &AI : FS->allocs()) {
+    for (auto &MIB : AI.MIBs) {
+      SmallVector<unsigned> StackIdIndices;
+      StackIdIndices.reserve(MIB.StackIdIndices.size());
+      for (auto Id : MIB.StackIdIndices)
+        StackIdIndices.push_back(GetStackIndex(Id));
+      // The CallStackId is the size at the time this context was inserted.
+      CallStacks.insert({CallStacks.size(), StackIdIndices});
+    }
+  }
+}
+
+// Build the radix tree from the accumulated CallStacks, write out the resulting
+// linearized radix tree array, and return the map of call stack positions into
+// this array for use when writing the allocation records. The returned map is
+// indexed by a CallStackId which in this case is implicitly determined by the
+// order of function summaries and their allocation infos being written.
+static DenseMap<CallStackId, LinearCallStackId> writeMemoryProfileRadixTree(
+    MapVector<CallStackId, llvm::SmallVector<LinearFrameId>> &CallStacks,
+    BitstreamWriter &Stream, unsigned RadixAbbrev) {
+  assert(!CallStacks.empty());
+  DenseMap<unsigned, FrameStat> FrameHistogram =
+      computeFrameHistogram<LinearFrameId>(CallStacks);
+  CallStackRadixTreeBuilder<LinearFrameId> Builder;
+  // We don't need a MemProfFrameIndexes map as we have already converted the
+  // full stack id hash to a linear offset into the StackIds array.
+  Builder.build(std::move(CallStacks), /*MemProfFrameIndexes=*/std::nullopt,
+                FrameHistogram);
+  Stream.EmitRecord(bitc::FS_CONTEXT_RADIX_TREE_ARRAY, Builder.getRadixArray(),
+                    RadixAbbrev);
+  return Builder.takeCallStackPos();
+}
+
 static void writeFunctionHeapProfileRecords(
     BitstreamWriter &Stream, FunctionSummary *FS, unsigned CallsiteAbbrev,
     unsigned AllocAbbrev, unsigned ContextIdAbbvId, bool PerModule,
     std::function<unsigned(const ValueInfo &VI)> GetValueID,
     std::function<unsigned(unsigned)> GetStackIndex,
-    bool WriteContextSizeInfoIndex) {
+    bool WriteContextSizeInfoIndex,
+    DenseMap<CallStackId, LinearCallStackId> &CallStackPos,
+    CallStackId &CallStackCount) {
   SmallVector<uint64_t> Record;
 
   for (auto &CI : FS->callsites()) {
@@ -4234,9 +4283,9 @@ static void writeFunctionHeapProfileRecords(
       Record.push_back(AI.Versions.size());
     for (auto &MIB : AI.MIBs) {
       Record.push_back((uint8_t)MIB.AllocType);
-      Record.push_back(MIB.StackIdIndices.size());
-      for (auto Id : MIB.StackIdIndices)
-        Record.push_back(GetStackIndex(Id));
+      // Record the index into the radix tree array for this context.
+      assert(CallStackCount <= CallStackPos.size());
+      Record.push_back(CallStackPos[CallStackCount++]);
     }
     if (!PerModule) {
       for (auto V : AI.Versions)
@@ -4282,7 +4331,9 @@ void ModuleBitcodeWriterBase::writePerModuleFunctionSummaryRecord(
     SmallVector<uint64_t, 64> &NameVals, GlobalValueSummary *Summary,
     unsigned ValueID, unsigned FSCallsRelBFAbbrev,
     unsigned FSCallsProfileAbbrev, unsigned CallsiteAbbrev,
-    unsigned AllocAbbrev, unsigned ContextIdAbbvId, const Function &F) {
+    unsigned AllocAbbrev, unsigned ContextIdAbbvId, const Function &F,
+    DenseMap<CallStackId, LinearCallStackId> &CallStackPos,
+    CallStackId &CallStackCount) {
   NameVals.push_back(ValueID);
 
   FunctionSummary *FS = cast<FunctionSummary>(Summary);
@@ -4297,7 +4348,7 @@ void ModuleBitcodeWriterBase::writePerModuleFunctionSummaryRecord(
       /*PerModule*/ true,
       /*GetValueId*/ [&](const ValueInfo &VI) { return getValueId(VI); },
       /*GetStackIndex*/ [&](unsigned I) { return I; },
-      /*WriteContextSizeInfoIndex*/ true);
+      /*WriteContextSizeInfoIndex*/ true, CallStackPos, CallStackCount);
 
   auto SpecialRefCnts = FS->specialRefCounts();
   NameVals.push_back(getEncodedGVSummaryFlags(FS->flags()));
@@ -4530,12 +4581,52 @@ void ModuleBitcodeWriterBase::writePerModuleGlobalValueSummary() {
   Abbv = std::make_shared<BitCodeAbbrev>();
   Abbv->Add(BitCodeAbbrevOp(bitc::FS_PERMODULE_ALLOC_INFO));
   Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 4)); // nummib
-  // n x (alloc type, numstackids, numstackids x stackidindex)
+  // n x (alloc type, context radix tree index)
   // optional: nummib x (numcontext x total size)
   Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Array));
   Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 8));
   unsigned AllocAbbrev = Stream.EmitAbbrev(std::move(Abbv));
 
+  Abbv = std::make_shared<BitCodeAbbrev>();
+  Abbv->Add(BitCodeAbbrevOp(bitc::FS_CONTEXT_RADIX_TREE_ARRAY));
+  // n x entry
+  Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Array));
+  Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 8));
+  unsigned RadixAbbrev = Stream.EmitAbbrev(std::move(Abbv));
+
+  // First walk through all the functions and collect the allocation contexts in
+  // their associated summaries, for use in constructing a radix tree of
+  // contexts. Note that we need to do this in the same order as the functions
+  // are processed further below since the call stack positions in the resulting
+  // radix tree array are identified based on this order.
+  MapVector<CallStackId, llvm::SmallVector<LinearFrameId>> CallStacks;
+  for (const Function &F : M) {
+    // Summary emission does not support anonymous functions, they have to
+    // renamed using the anonymous function renaming pass.
+    if (!F.hasName())
+      report_fatal_error("Unexpected anonymous function when writing summary");
+
+    ValueInfo VI = Index->getValueInfo(F.getGUID());
+    if (!VI || VI.getSummaryList().empty()) {
+      // Only declarations should not have a summary (a declaration might
+      // however have a summary if the def was in module level asm).
+      assert(F.isDeclaration());
+      continue;
+    }
+    auto *Summary = VI.getSummaryList()[0].get();
+    FunctionSummary *FS = cast<FunctionSummary>(Summary);
+    collectMemProfCallStacks(
+        FS, /*GetStackIndex*/ [&](unsigned I) { return I; }, CallStacks);
+  }
+  // Finalize the radix tree, write it out, and get the map of positions in the
+  // linearized tree array.
+  DenseMap<CallStackId, LinearCallStackId> CallStackPos;
+  if (!CallStacks.empty())
+    CallStackPos = writeMemoryProfileRadixTree(CallStacks, Stream, RadixAbbrev);
+
+  // Keep track of the current index into the CallStackPos map.
+  CallStackId CallStackCount = 0;
+
   SmallVector<uint64_t, 64> NameVals;
   // Iterate over the list of functions instead of the Index to
   // ensure the ordering is stable.
@@ -4555,7 +4646,8 @@ void ModuleBitcodeWriterBase::writePerModuleGlobalValueSummary() {
     auto *Summary = VI.getSummaryList()[0].get();
     writePerModuleFunctionSummaryRecord(
         NameVals, Summary, VE.getValueID(&F), FSCallsRelBFAbbrev,
-        FSCallsProfileAbbrev, CallsiteAbbrev, AllocAbbrev, ContextIdAbbvId, F);
+        FSCallsProfileAbbrev, CallsiteAbbrev, AllocAbbrev, ContextIdAbbvId, F,
+        CallStackPos, CallStackCount);
   }
 
   // Capture references from GlobalVariable initializers, which are outside
@@ -4692,13 +4784,20 @@ void IndexBitcodeWriter::writeCombinedGlobalValueSummary() {
   Abbv->Add(BitCodeAbbrevOp(bitc::FS_COMBINED_ALLOC_INFO));
   Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 4)); // nummib
   Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 4)); // numver
-  // nummib x (alloc type, numstackids, numstackids x stackidindex),
+  // nummib x (alloc type, context radix tree index),
   // numver x version
   // optional: nummib x total size
   Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Array));
   Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 8));
   unsigned AllocAbbrev = Stream.EmitAbbrev(std::move(Abbv));
 
+  Abbv = std::make_shared<BitCodeAbbrev>();
+  Abbv->Add(BitCodeAbbrevOp(bitc::FS_CONTEXT_RADIX_TREE_ARRAY));
+  // n x entry
+  Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Array));
+  Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 8));
+  unsigned RadixAbbrev = Stream.EmitAbbrev(std::move(Abbv));
+
   auto shouldImportValueAsDecl = [&](GlobalValueSummary *GVS) -> bool {
     if (DecSummaries == nullptr)
       return false;
@@ -4735,6 +4834,39 @@ void IndexBitcodeWriter::writeCombinedGlobalValueSummary() {
     NameVals.clear();
   };
 
+  // First walk through all the functions and collect the allocation contexts in
+  // their associated summaries, for use in constructing a radix tree of
+  // contexts. Note that we need to do this in the same order as the functions
+  // are processed further below since the call stack positions in the resulting
+  // radix tree array are identified based on this order.
+  MapVector<CallStackId, llvm::SmallVector<LinearFrameId>> CallStacks;
+  forEachSummary([&](GVInfo I, bool IsAliasee) {
+    GlobalValueSummary *S = I.second;
+    assert(S);
+    auto *FS = dyn_cast<FunctionSummary>(S);
+    if (!FS)
+      return;
+    collectMemProfCallStacks(
+        FS,
+        /*GetStackIndex*/
+        [&](unsigned I) {
+          // Get the corresponding index into the list of StackIds actually
+          // being written for this combined index (which may be a subset in
+          // the case of distributed indexes).
+          assert(StackIdIndicesToIndex.contains(I));
+          return StackIdIndicesToIndex[I];
+        },
+        CallStacks);
+  });
+  // Finalize the radix tree, write it out, and get the map of positions in the
+  // linearized tree array.
+  DenseMap<CallStackId, LinearCallStackId> CallStackPos;
+  if (!CallStacks.empty())
+    CallStackPos = writeMemoryProfileRadixTree(CallStacks, Stream, RadixAbbrev);
+
+  // Keep track of the current index into the CallStackPos map.
+  CallStackId CallStackCount = 0;
+
   DenseSet<GlobalValue::GUID> DefOrUseGUIDs;
   forEachSummary([&](GVInfo I, bool IsAliasee) {
     GlobalValueSummary *S = I.second;
@@ -4813,7 +4945,7 @@ void IndexBitcodeWriter::writeCombinedGlobalValueSummary() {
           assert(StackIdIndicesToIndex.contains(I));
           return StackIdIndicesToIndex[I];
         },
-        /*WriteContextSizeInfoIndex*/ false);
+        /*WriteContextSizeInfoIndex*/ false, CallStackPos, CallStackCount);
 
     NameVals.push_back(*ValueId);
     assert(ModuleIdMap.count(FS->modulePath()));
diff --git a/llvm/lib/ProfileData/MemProf.cpp b/llvm/lib/ProfileData/MemProf.cpp
index 9d5ac748d7975d..12ee3d7a53c4b1 100644
--- a/llvm/lib/ProfileData/MemProf.cpp
+++ b/llvm/lib/ProfileData/MemProf.cpp
@@ -615,6 +615,7 @@ void CallStackRadixTreeBuilder<FrameIdTy>::build(
 
 // Explicitly instantiate class with the utilized FrameIdTy.
 template class CallStackRadixTreeBuilder<FrameId>;
+template class CallStackRadixTreeBuilder<LinearFrameId>;
 
 template <typename FrameIdTy>
 llvm::DenseMap<FrameIdTy, FrameStat>
@@ -637,6 +638,10 @@ computeFrameHistogram(llvm::MapVector<CallStackId, llvm::SmallVector<FrameIdTy>>
 template llvm::DenseMap<FrameId...
[truncated]

kazutakahirata · 2024-11-21T05:40:49Z

llvm/lib/Bitcode/Writer/BitcodeWriter.cpp

+// indexed by a CallStackId which in this case is implicitly determined by the
+// order of function summaries and their allocation infos being written.
+static DenseMap<CallStackId, LinearCallStackId> writeMemoryProfileRadixTree(
+    MapVector<CallStackId, llvm::SmallVector<LinearFrameId>> &CallStacks,


Did you mean to use an rvalue reference? I see std::move a few lines below.

Suggested change

MapVector<CallStackId, llvm::SmallVector<LinearFrameId>> &CallStacks,

MapVector<CallStackId, llvm::SmallVector<LinearFrameId>> &&CallStacks,

kazutakahirata · 2024-11-21T05:42:06Z

llvm/lib/Bitcode/Writer/BitcodeWriter.cpp

+  // linearized tree array.
+  DenseMap<CallStackId, LinearCallStackId> CallStackPos;
+  if (!CallStacks.empty())
+    CallStackPos = writeMemoryProfileRadixTree(CallStacks, Stream, RadixAbbrev);


May I suggest std::move here?

Suggested change

CallStackPos = writeMemoryProfileRadixTree(CallStacks, Stream, RadixAbbrev);

CallStackPos = writeMemoryProfileRadixTree(std::move(CallStacks), Stream, RadixAbbrev);

kazutakahirata · 2024-11-21T05:47:28Z

llvm/lib/Bitcode/Reader/BitcodeReader.cpp

+  if (RadixArray.empty()) {
+    unsigned NumStackEntries = Record[I++];
+    assert(Record.size() - I >= NumStackEntries);
+    for (unsigned J = 0; J < NumStackEntries; J++) {


I understand that you are copying this block of code from elsewhere, but may I suggest reserve here?

Suggested change

for (unsigned J = 0; J < NumStackEntries; J++) {

StackIdList.reserve(NumStackEntries);

for (unsigned J = 0; J < NumStackEntries; J++) {

kazutakahirata · 2024-11-21T05:49:04Z

llvm/lib/Bitcode/Reader/BitcodeReader.cpp

+    // increasing linear order.
+    assert(RadixIndex < RadixArray.size());
+    unsigned NumStackIds = RadixArray[RadixIndex++];
+    while (NumStackIds--) {


Likewise, may I suggest reserve here? The main reason for encoding the length in the radix tree array was to enable reserve.

Suggested change

while (NumStackIds--) {

StackIdList.reserve(NumStackIds);

while (NumStackIds--) {

kazutakahirata · 2024-11-21T05:56:39Z

llvm/lib/Bitcode/Writer/BitcodeWriter.cpp

+  // linearized tree array.
+  DenseMap<CallStackId, LinearCallStackId> CallStackPos;
+  if (!CallStacks.empty())
+    CallStackPos = writeMemoryProfileRadixTree(CallStacks, Stream, RadixAbbrev);


May I suggest std::move here?

Suggested change

CallStackPos = writeMemoryProfileRadixTree(CallStacks, Stream, RadixAbbrev);

CallStackPos = writeMemoryProfileRadixTree(std::move(CallStacks), Stream, RadixAbbrev);

kazutakahirata · 2024-11-21T07:22:24Z

llvm/lib/Bitcode/Writer/BitcodeWriter.cpp

+  // radix tree array are identified based on this order.
+  MapVector<CallStackId, llvm::SmallVector<LinearFrameId>> CallStacks;
+  for (const Function &F : M) {
+    // Summary emission does not support anonymous functions, they have to


Did you mean this?

Suggested change

// Summary emission does not support anonymous functions, they have to

// Summary emission does not support anonymous functions, they have to be

kazutakahirata

LGTM.

snehasish

lgtm

snehasish · 2024-11-22T19:33:24Z

llvm/lib/Bitcode/Writer/BitcodeWriter.cpp

+  // Finalize the radix tree, write it out, and get the map of positions in the
+  // linearized tree array.
+  DenseMap<CallStackId, LinearCallStackId> CallStackPos;
+  if (!CallStacks.empty())


nit: add brackets?

snehasish · 2024-11-22T19:34:48Z

llvm/lib/Bitcode/Writer/BitcodeWriter.cpp

+    collectMemProfCallStacks(
+        FS,
+        /*GetStackIndex*/
+        [&](unsigned I) {


Just capture StackIdIndicesToIndex since that's all we use?
Same for the outer lamdba with the additional capture of Callstacks?

I think it makes it clearer to the reader.

I tried this, but the compiler gave an error because StackIdIndicesToIndex is a class member. I could only get it to work by capturing "this", which I don't think improves clarity at all. Ditto for the outer lambda, "this" needs to be captured there too for StackIdIndicesToIndex. So in the end I left both of these alone.

However, I did remove the capture from the GetStackIndex lambda passed to the other collectMemProfCallStacks invocation as none is needed.

snehasish · 2024-11-22T19:39:23Z

llvm/lib/ProfileData/MemProf.cpp

@@ -637,6 +638,10 @@ computeFrameHistogram(llvm::MapVector<CallStackId, llvm::SmallVector<FrameIdTy>>
 template llvm::DenseMap<FrameId, FrameStat> computeFrameHistogram<FrameId>(
    llvm::MapVector<CallStackId, llvm::SmallVector<FrameId>>
        &MemProfCallStackData);
+template llvm::DenseMap<LinearFrameId, FrameStat>


Add a comment that the only thing different is FrameId LinearFrameId (both of which are the same underlying type).

They aren't the same underlying type, which is why the templatization was needed. One is uint32_t (LinearFrameId) and one is uint64_t (FrameId).

snehasish · 2024-11-22T19:40:24Z

llvm/lib/Bitcode/Writer/BitcodeWriter.cpp

+  // Finalize the radix tree, write it out, and get the map of positions in the
+  // linearized tree array.
+  DenseMap<CallStackId, LinearCallStackId> CallStackPos;
+  if (!CallStacks.empty())


nit: braces for multiple lines?

snehasish · 2024-11-22T19:44:59Z

llvm/lib/Bitcode/Reader/BitcodeReader.cpp

+    ArrayRef<uint64_t> Record, unsigned &I) {
+  SmallVector<unsigned> StackIdList;
+  // For backwards compatibility with old format before radix tree was
+  // used, simply see if we found a radix tree array record.


Should this be "simply see if we found a stack entry record"?

No, the RadixArray will be non-empty if we found a radix tree array record. Clarified in comment

llvm-ci · 2024-11-22T22:53:37Z

LLVM Buildbot has detected a new failure on builder llvm-nvptx-nvidia-ubuntu running on as-builder-7 while building llvm at step 5 "build-unified-tree".

Full details are available at: https://lab.llvm.org/buildbot/#/builders/180/builds/8904

Here is the relevant piece of the build log for the reference

Step 5 (build-unified-tree) failure: build (failure)
...
62.568 [111/9/2572] Linking CXX shared library lib/libLLVMIRPrinter.so.20.0git
62.568 [110/9/2573] Linking CXX shared library lib/libLLVMFrontendDriver.so.20.0git
62.569 [109/9/2574] Linking CXX shared library lib/libLLVMFrontendAtomic.so.20.0git
62.571 [108/9/2575] Linking CXX executable bin/llvm-sim
62.576 [108/8/2576] Linking CXX shared library lib/libLLVMTarget.so.20.0git
62.582 [107/8/2577] Linking CXX executable bin/llvm-tli-checker
62.589 [107/7/2578] Creating library symlink lib/libLLVMIRPrinter.so
62.589 [107/6/2579] Creating library symlink lib/libLLVMFrontendDriver.so
62.590 [107/5/2580] Creating library symlink lib/libLLVMFrontendAtomic.so
62.593 [107/4/2581] Linking CXX shared library lib/libLLVMBitWriter.so.20.0git
FAILED: lib/libLLVMBitWriter.so.20.0git 
: && /usr/bin/c++ -fPIC -fPIC -fno-semantic-interposition -fvisibility-inlines-hidden -Werror=date-time -fno-lifetime-dse -Wall -Wextra -Wno-unused-parameter -Wwrite-strings -Wcast-qual -Wno-missing-field-initializers -pedantic -Wno-long-long -Wimplicit-fallthrough -Wno-uninitialized -Wno-nonnull -Wno-class-memaccess -Wno-redundant-move -Wno-pessimizing-move -Wno-noexcept-type -Wdelete-non-virtual-dtor -Wsuggest-override -Wno-comment -Wno-misleading-indentation -Wctad-maybe-unsupported -fdiagnostics-color -ffunction-sections -fdata-sections -O3 -DNDEBUG  -Wl,-z,defs -Wl,-z,nodelete -fuse-ld=gold   -Wl,--gc-sections -shared -Wl,-soname,libLLVMBitWriter.so.20.0git -o lib/libLLVMBitWriter.so.20.0git lib/Bitcode/Writer/CMakeFiles/LLVMBitWriter.dir/BitWriter.cpp.o lib/Bitcode/Writer/CMakeFiles/LLVMBitWriter.dir/BitcodeWriter.cpp.o lib/Bitcode/Writer/CMakeFiles/LLVMBitWriter.dir/BitcodeWriterPass.cpp.o lib/Bitcode/Writer/CMakeFiles/LLVMBitWriter.dir/ValueEnumerator.cpp.o  -Wl,-rpath,"\$ORIGIN/../lib:/home/buildbot/worker/as-builder-7/ramdisk/llvm-nvptx-nvidia-ubuntu/build/lib:"  lib/libLLVMAnalysis.so.20.0git  lib/libLLVMObject.so.20.0git  lib/libLLVMCore.so.20.0git  lib/libLLVMMC.so.20.0git  lib/libLLVMTargetParser.so.20.0git  lib/libLLVMSupport.so.20.0git  -Wl,-rpath-link,/home/buildbot/worker/as-builder-7/ramdisk/llvm-nvptx-nvidia-ubuntu/build/lib && :
lib/Bitcode/Writer/CMakeFiles/LLVMBitWriter.dir/BitcodeWriter.cpp.o:BitcodeWriter.cpp:function writeMemoryProfileRadixTree(llvm::MapVector<unsigned long, llvm::SmallVector<unsigned int, 12u>, llvm::DenseMap<unsigned long, unsigned int, llvm::DenseMapInfo<unsigned long, void>, llvm::detail::DenseMapPair<unsigned long, unsigned int> >, llvm::SmallVector<std::pair<unsigned long, llvm::SmallVector<unsigned int, 12u> >, 0u> >&&, llvm::BitstreamWriter&, unsigned int): error: undefined reference to 'llvm::DenseMap<unsigned int, llvm::memprof::FrameStat, llvm::DenseMapInfo<unsigned int, void>, llvm::detail::DenseMapPair<unsigned int, llvm::memprof::FrameStat> > llvm::memprof::computeFrameHistogram<unsigned int>(llvm::MapVector<unsigned long, llvm::SmallVector<unsigned int, llvm::CalculateSmallVectorDefaultInlinedElements<unsigned int>::value>, llvm::DenseMap<unsigned long, unsigned int, llvm::DenseMapInfo<unsigned long, void>, llvm::detail::DenseMapPair<unsigned long, unsigned int> >, llvm::SmallVector<std::pair<unsigned long, llvm::SmallVector<unsigned int, llvm::CalculateSmallVectorDefaultInlinedElements<unsigned int>::value> >, 0u> >&)'
lib/Bitcode/Writer/CMakeFiles/LLVMBitWriter.dir/BitcodeWriter.cpp.o:BitcodeWriter.cpp:function writeMemoryProfileRadixTree(llvm::MapVector<unsigned long, llvm::SmallVector<unsigned int, 12u>, llvm::DenseMap<unsigned long, unsigned int, llvm::DenseMapInfo<unsigned long, void>, llvm::detail::DenseMapPair<unsigned long, unsigned int> >, llvm::SmallVector<std::pair<unsigned long, llvm::SmallVector<unsigned int, 12u> >, 0u> >&&, llvm::BitstreamWriter&, unsigned int): error: undefined reference to 'llvm::memprof::CallStackRadixTreeBuilder<unsigned int>::build(llvm::MapVector<unsigned long, llvm::SmallVector<unsigned int, 12u>, llvm::DenseMap<unsigned long, unsigned int, llvm::DenseMapInfo<unsigned long, void>, llvm::detail::DenseMapPair<unsigned long, unsigned int> >, llvm::SmallVector<std::pair<unsigned long, llvm::SmallVector<unsigned int, 12u> >, 0u> >&&, std::optional<llvm::DenseMap<unsigned int, unsigned int, llvm::DenseMapInfo<unsigned int, void>, llvm::detail::DenseMapPair<unsigned int, unsigned int> > const>, llvm::DenseMap<unsigned int, llvm::memprof::FrameStat, llvm::DenseMapInfo<unsigned int, void>, llvm::detail::DenseMapPair<unsigned int, llvm::memprof::FrameStat> >&)'
collect2: error: ld returned 1 exit status
62.596 [107/3/2582] Creating library symlink lib/libLLVMTarget.so
62.615 [107/2/2583] Linking CXX shared library lib/libLLVMSandboxIR.so.20.0git
62.760 [107/1/2584] Linking CXX shared library lib/libLLVMTransformUtils.so.20.0git
ninja: build stopped: subcommand failed.

llvm-ci · 2024-11-22T22:53:55Z

LLVM Buildbot has detected a new failure on builder flang-aarch64-libcxx running on linaro-flang-aarch64-libcxx while building llvm at step 5 "build-unified-tree".

Full details are available at: https://lab.llvm.org/buildbot/#/builders/89/builds/11187

Here is the relevant piece of the build log for the reference

Step 5 (build-unified-tree) failure: build (failure)
...
42.832 [2561/15/4701] Creating library symlink lib/libMLIRShapeToStandard.so
42.833 [2561/14/4702] Creating library symlink lib/libMLIRTosaToSCF.so
42.833 [2561/13/4703] Creating library symlink lib/libMLIRTosaToTensor.so
42.833 [2561/12/4704] Creating library symlink lib/libMLIREmitCTransforms.so
42.840 [2561/11/4705] Linking CXX shared library lib/libMLIRXeGPUTransforms.so.20.0git
42.849 [2561/10/4706] Creating library symlink lib/libMLIRBufferizationTransforms.so
42.850 [2561/9/4707] Linking CXX shared library lib/libLLVMIRPrinter.so.20.0git
42.854 [2561/8/4708] Linking CXX shared library lib/libMLIRMeshTransforms.so.20.0git
42.878 [2561/7/4709] Linking CXX shared library lib/libLLVMFrontendAtomic.so.20.0git
42.902 [2551/16/4710] Linking CXX shared library lib/libLLVMBitWriter.so.20.0git
FAILED: lib/libLLVMBitWriter.so.20.0git 
: && /usr/local/bin/c++ -fPIC -stdlib=libc++ -fPIC -fno-semantic-interposition -fvisibility-inlines-hidden -Werror=date-time -Werror=unguarded-availability-new -Wall -Wextra -Wno-unused-parameter -Wwrite-strings -Wcast-qual -Wmissing-field-initializers -pedantic -Wno-long-long -Wc++98-compat-extra-semi -Wimplicit-fallthrough -Wcovered-switch-default -Wno-noexcept-type -Wnon-virtual-dtor -Wdelete-non-virtual-dtor -Wsuggest-override -Wstring-conversion -Wmisleading-indentation -Wctad-maybe-unsupported -fdiagnostics-color -ffunction-sections -fdata-sections -O3 -DNDEBUG  -stdlib=libc++ -Wl,-z,defs -Wl,-z,nodelete   -Wl,-rpath-link,/home/tcwg-buildbot/worker/flang-aarch64-libcxx/build/./lib  -Wl,--gc-sections -shared -Wl,-soname,libLLVMBitWriter.so.20.0git -o lib/libLLVMBitWriter.so.20.0git lib/Bitcode/Writer/CMakeFiles/LLVMBitWriter.dir/BitWriter.cpp.o lib/Bitcode/Writer/CMakeFiles/LLVMBitWriter.dir/BitcodeWriter.cpp.o lib/Bitcode/Writer/CMakeFiles/LLVMBitWriter.dir/BitcodeWriterPass.cpp.o lib/Bitcode/Writer/CMakeFiles/LLVMBitWriter.dir/ValueEnumerator.cpp.o  -Wl,-rpath,"\$ORIGIN/../lib:/home/tcwg-buildbot/worker/flang-aarch64-libcxx/build/lib:"  lib/libLLVMAnalysis.so.20.0git  lib/libLLVMObject.so.20.0git  lib/libLLVMCore.so.20.0git  lib/libLLVMMC.so.20.0git  lib/libLLVMTargetParser.so.20.0git  lib/libLLVMSupport.so.20.0git  -Wl,-rpath-link,/home/tcwg-buildbot/worker/flang-aarch64-libcxx/build/lib && :
/usr/bin/ld: lib/Bitcode/Writer/CMakeFiles/LLVMBitWriter.dir/BitcodeWriter.cpp.o: in function `writeMemoryProfileRadixTree(llvm::MapVector<unsigned long, llvm::SmallVector<unsigned int, 12u>, llvm::DenseMap<unsigned long, unsigned int, llvm::DenseMapInfo<unsigned long, void>, llvm::detail::DenseMapPair<unsigned long, unsigned int> >, llvm::SmallVector<std::__1::pair<unsigned long, llvm::SmallVector<unsigned int, 12u> >, 0u> >&&, llvm::BitstreamWriter&, unsigned int)':
BitcodeWriter.cpp:(.text._ZL27writeMemoryProfileRadixTreeON4llvm9MapVectorImNS_11SmallVectorIjLj12EEENS_8DenseMapImjNS_12DenseMapInfoImvEENS_6detail12DenseMapPairImjEEEENS1_INSt3__14pairImS2_EELj0EEEEERNS_15BitstreamWriterEj+0x34): undefined reference to `llvm::DenseMap<unsigned int, llvm::memprof::FrameStat, llvm::DenseMapInfo<unsigned int, void>, llvm::detail::DenseMapPair<unsigned int, llvm::memprof::FrameStat> > llvm::memprof::computeFrameHistogram<unsigned int>(llvm::MapVector<unsigned long, llvm::SmallVector<unsigned int, CalculateSmallVectorDefaultInlinedElements<unsigned int>::value>, llvm::DenseMap<unsigned long, unsigned int, llvm::DenseMapInfo<unsigned long, void>, llvm::detail::DenseMapPair<unsigned long, unsigned int> >, llvm::SmallVector<std::__1::pair<unsigned long, llvm::SmallVector<unsigned int, CalculateSmallVectorDefaultInlinedElements<unsigned int>::value> >, 0u> >&)'
/usr/bin/ld: BitcodeWriter.cpp:(.text._ZL27writeMemoryProfileRadixTreeON4llvm9MapVectorImNS_11SmallVectorIjLj12EEENS_8DenseMapImjNS_12DenseMapInfoImvEENS_6detail12DenseMapPairImjEEEENS1_INSt3__14pairImS2_EELj0EEEEERNS_15BitstreamWriterEj+0x68): undefined reference to `llvm::memprof::CallStackRadixTreeBuilder<unsigned int>::build(llvm::MapVector<unsigned long, llvm::SmallVector<unsigned int, 12u>, llvm::DenseMap<unsigned long, unsigned int, llvm::DenseMapInfo<unsigned long, void>, llvm::detail::DenseMapPair<unsigned long, unsigned int> >, llvm::SmallVector<std::__1::pair<unsigned long, llvm::SmallVector<unsigned int, 12u> >, 0u> >&&, std::__1::optional<llvm::DenseMap<unsigned int, unsigned int, llvm::DenseMapInfo<unsigned int, void>, llvm::detail::DenseMapPair<unsigned int, unsigned int> > const>, llvm::DenseMap<unsigned int, llvm::memprof::FrameStat, llvm::DenseMapInfo<unsigned int, void>, llvm::detail::DenseMapPair<unsigned int, llvm::memprof::FrameStat> >&)'
clang++: error: linker command failed with exit code 1 (use -v to see invocation)
42.902 [2551/15/4711] Creating library symlink lib/libLLVMIRPrinter.so
42.903 [2551/14/4712] Creating library symlink lib/libLLVMFrontendAtomic.so
42.903 [2551/13/4713] Creating library symlink lib/libMLIRLinalgDialect.so
42.903 [2551/12/4714] Creating library symlink lib/libMLIRMeshTransforms.so
42.912 [2551/11/4715] Creating library symlink lib/libMLIRNVGPUTransforms.so
42.922 [2551/10/4716] Linking CXX shared library lib/libLLVMFrontendDriver.so.20.0git
42.939 [2551/9/4717] Linking CXX shared library lib/libLLVMTarget.so.20.0git
42.952 [2551/8/4718] Creating library symlink lib/libMLIRMLProgramDialect.so
42.999 [2551/7/4719] Linking CXX shared library lib/libLLVMSandboxIR.so.20.0git
43.030 [2551/6/4720] Linking CXX shared library lib/libMLIRArithToEmitC.so.20.0git
43.037 [2551/5/4721] Linking CXX shared library lib/libMLIRControlFlowTransforms.so.20.0git
43.068 [2551/4/4722] Linking CXX shared library lib/libMLIRBufferizationToMemRef.so.20.0git
43.082 [2551/3/4723] Linking CXX shared library lib/libMLIRArithTransforms.so.20.0git
43.135 [2551/2/4724] Linking CXX shared library lib/libLLVMTransformUtils.so.20.0git
43.829 [2551/1/4725] Linking CXX shared library lib/libMLIRSPIRVDialect.so.20.0git
ninja: build stopped: subcommand failed.

llvm-ci · 2024-11-22T22:54:08Z

LLVM Buildbot has detected a new failure on builder mlir-nvidia running on mlir-nvidia while building llvm at step 5 "build-check-mlir-build-only".

Full details are available at: https://lab.llvm.org/buildbot/#/builders/138/builds/6846

Here is the relevant piece of the build log for the reference

Step 5 (build-check-mlir-build-only) failure: build (failure)
...
36.391 [421/1/4599] Creating library symlink lib/libLLVMSymbolize.so
36.511 [420/1/4600] Linking CXX shared library lib/libLLVMProfileData.so.20.0git
36.517 [419/1/4601] Creating library symlink lib/libLLVMProfileData.so
36.690 [418/1/4602] Linking CXX shared library lib/libLLVMAnalysis.so.20.0git
36.696 [417/1/4603] Creating library symlink lib/libLLVMAnalysis.so
36.782 [412/5/4604] Linking CXX shared library lib/libLLVMIRPrinter.so.20.0git
36.784 [411/5/4605] Linking CXX shared library lib/libLLVMTarget.so.20.0git
36.789 [410/5/4606] Creating library symlink lib/libLLVMIRPrinter.so
36.792 [410/4/4607] Creating library symlink lib/libLLVMTarget.so
36.808 [409/4/4608] Linking CXX shared library lib/libLLVMBitWriter.so.20.0git
FAILED: lib/libLLVMBitWriter.so.20.0git 
: && /usr/bin/clang++ -fPIC -fPIC -fno-semantic-interposition -fvisibility-inlines-hidden -Werror=date-time -Werror=unguarded-availability-new -Wall -Wextra -Wno-unused-parameter -Wwrite-strings -Wcast-qual -Wmissing-field-initializers -pedantic -Wno-long-long -Wc++98-compat-extra-semi -Wimplicit-fallthrough -Wcovered-switch-default -Wno-noexcept-type -Wnon-virtual-dtor -Wdelete-non-virtual-dtor -Wsuggest-override -Wstring-conversion -Wmisleading-indentation -Wctad-maybe-unsupported -fdiagnostics-color -ffunction-sections -fdata-sections -O3 -DNDEBUG  -Wl,-z,defs -Wl,-z,nodelete -fuse-ld=lld -Wl,--color-diagnostics   -Wl,--gc-sections -shared -Wl,-soname,libLLVMBitWriter.so.20.0git -o lib/libLLVMBitWriter.so.20.0git lib/Bitcode/Writer/CMakeFiles/LLVMBitWriter.dir/BitWriter.cpp.o lib/Bitcode/Writer/CMakeFiles/LLVMBitWriter.dir/BitcodeWriter.cpp.o lib/Bitcode/Writer/CMakeFiles/LLVMBitWriter.dir/BitcodeWriterPass.cpp.o lib/Bitcode/Writer/CMakeFiles/LLVMBitWriter.dir/ValueEnumerator.cpp.o  -Wl,-rpath,"\$ORIGIN/../lib:/vol/worker/mlir-nvidia/mlir-nvidia/llvm.obj/lib:"  lib/libLLVMAnalysis.so.20.0git  lib/libLLVMObject.so.20.0git  lib/libLLVMCore.so.20.0git  lib/libLLVMMC.so.20.0git  lib/libLLVMTargetParser.so.20.0git  lib/libLLVMSupport.so.20.0git  -Wl,-rpath-link,/vol/worker/mlir-nvidia/mlir-nvidia/llvm.obj/lib && :
ld.lld: error: undefined symbol: llvm::DenseMap<unsigned int, llvm::memprof::FrameStat, llvm::DenseMapInfo<unsigned int, void>, llvm::detail::DenseMapPair<unsigned int, llvm::memprof::FrameStat> > llvm::memprof::computeFrameHistogram<unsigned int>(llvm::MapVector<unsigned long, llvm::SmallVector<unsigned int, CalculateSmallVectorDefaultInlinedElements<unsigned int>::value>, llvm::DenseMap<unsigned long, unsigned int, llvm::DenseMapInfo<unsigned long, void>, llvm::detail::DenseMapPair<unsigned long, unsigned int> >, llvm::SmallVector<std::pair<unsigned long, llvm::SmallVector<unsigned int, CalculateSmallVectorDefaultInlinedElements<unsigned int>::value> >, 0u> >&)
>>> referenced by BitcodeWriter.cpp
>>>               lib/Bitcode/Writer/CMakeFiles/LLVMBitWriter.dir/BitcodeWriter.cpp.o:(writeMemoryProfileRadixTree(llvm::MapVector<unsigned long, llvm::SmallVector<unsigned int, 12u>, llvm::DenseMap<unsigned long, unsigned int, llvm::DenseMapInfo<unsigned long, void>, llvm::detail::DenseMapPair<unsigned long, unsigned int> >, llvm::SmallVector<std::pair<unsigned long, llvm::SmallVector<unsigned int, 12u> >, 0u> >&&, llvm::BitstreamWriter&, unsigned int))

ld.lld: error: undefined symbol: llvm::memprof::CallStackRadixTreeBuilder<unsigned int>::build(llvm::MapVector<unsigned long, llvm::SmallVector<unsigned int, 12u>, llvm::DenseMap<unsigned long, unsigned int, llvm::DenseMapInfo<unsigned long, void>, llvm::detail::DenseMapPair<unsigned long, unsigned int> >, llvm::SmallVector<std::pair<unsigned long, llvm::SmallVector<unsigned int, 12u> >, 0u> >&&, std::optional<llvm::DenseMap<unsigned int, unsigned int, llvm::DenseMapInfo<unsigned int, void>, llvm::detail::DenseMapPair<unsigned int, unsigned int> > const>, llvm::DenseMap<unsigned int, llvm::memprof::FrameStat, llvm::DenseMapInfo<unsigned int, void>, llvm::detail::DenseMapPair<unsigned int, llvm::memprof::FrameStat> >&)
>>> referenced by BitcodeWriter.cpp
>>>               lib/Bitcode/Writer/CMakeFiles/LLVMBitWriter.dir/BitcodeWriter.cpp.o:(writeMemoryProfileRadixTree(llvm::MapVector<unsigned long, llvm::SmallVector<unsigned int, 12u>, llvm::DenseMap<unsigned long, unsigned int, llvm::DenseMapInfo<unsigned long, void>, llvm::detail::DenseMapPair<unsigned long, unsigned int> >, llvm::SmallVector<std::pair<unsigned long, llvm::SmallVector<unsigned int, 12u> >, 0u> >&&, llvm::BitstreamWriter&, unsigned int))
clang: error: linker command failed with exit code 1 (use -v to see invocation)
36.814 [409/3/4609] Linking CXX shared library lib/libLLVMSandboxIR.so.20.0git
36.849 [409/2/4610] Linking CXX shared library lib/libLLVMTransformUtils.so.20.0git
36.887 [409/1/4611] Linking CXX shared library lib/libLLVMExecutionEngine.so.20.0git
ninja: build stopped: subcommand failed.

llvm-ci · 2024-11-22T22:55:16Z

LLVM Buildbot has detected a new failure on builder llvm-nvptx64-nvidia-ubuntu running on as-builder-7 while building llvm at step 5 "build-unified-tree".

Full details are available at: https://lab.llvm.org/buildbot/#/builders/160/builds/8906

Here is the relevant piece of the build log for the reference

Step 5 (build-unified-tree) failure: build (failure)
...
56.053 [111/9/2572] Linking CXX shared library lib/libLLVMIRPrinter.so.20.0git
56.074 [110/9/2573] Creating library symlink lib/libLLVMIRPrinter.so
56.075 [110/8/2574] Linking CXX shared library lib/libLLVMFrontendAtomic.so.20.0git
56.076 [109/8/2575] Linking CXX shared library lib/libLLVMFrontendDriver.so.20.0git
56.078 [108/8/2576] Linking CXX executable bin/llvm-sim
56.083 [108/7/2577] Linking CXX shared library lib/libLLVMTarget.so.20.0git
56.089 [107/7/2578] Linking CXX executable bin/llvm-tli-checker
56.096 [107/6/2579] Creating library symlink lib/libLLVMFrontendAtomic.so
56.097 [107/5/2580] Creating library symlink lib/libLLVMFrontendDriver.so
56.100 [107/4/2581] Linking CXX shared library lib/libLLVMBitWriter.so.20.0git
FAILED: lib/libLLVMBitWriter.so.20.0git 
: && /usr/bin/c++ -fPIC -fPIC -fno-semantic-interposition -fvisibility-inlines-hidden -Werror=date-time -fno-lifetime-dse -Wall -Wextra -Wno-unused-parameter -Wwrite-strings -Wcast-qual -Wno-missing-field-initializers -pedantic -Wno-long-long -Wimplicit-fallthrough -Wno-uninitialized -Wno-nonnull -Wno-class-memaccess -Wno-redundant-move -Wno-pessimizing-move -Wno-noexcept-type -Wdelete-non-virtual-dtor -Wsuggest-override -Wno-comment -Wno-misleading-indentation -Wctad-maybe-unsupported -fdiagnostics-color -ffunction-sections -fdata-sections -O3 -DNDEBUG  -Wl,-z,defs -Wl,-z,nodelete -fuse-ld=gold   -Wl,--gc-sections -shared -Wl,-soname,libLLVMBitWriter.so.20.0git -o lib/libLLVMBitWriter.so.20.0git lib/Bitcode/Writer/CMakeFiles/LLVMBitWriter.dir/BitWriter.cpp.o lib/Bitcode/Writer/CMakeFiles/LLVMBitWriter.dir/BitcodeWriter.cpp.o lib/Bitcode/Writer/CMakeFiles/LLVMBitWriter.dir/BitcodeWriterPass.cpp.o lib/Bitcode/Writer/CMakeFiles/LLVMBitWriter.dir/ValueEnumerator.cpp.o  -Wl,-rpath,"\$ORIGIN/../lib:/home/buildbot/worker/as-builder-7/ramdisk/llvm-nvptx64-nvidia-ubuntu/build/lib:"  lib/libLLVMAnalysis.so.20.0git  lib/libLLVMObject.so.20.0git  lib/libLLVMCore.so.20.0git  lib/libLLVMMC.so.20.0git  lib/libLLVMTargetParser.so.20.0git  lib/libLLVMSupport.so.20.0git  -Wl,-rpath-link,/home/buildbot/worker/as-builder-7/ramdisk/llvm-nvptx64-nvidia-ubuntu/build/lib && :
lib/Bitcode/Writer/CMakeFiles/LLVMBitWriter.dir/BitcodeWriter.cpp.o:BitcodeWriter.cpp:function writeMemoryProfileRadixTree(llvm::MapVector<unsigned long, llvm::SmallVector<unsigned int, 12u>, llvm::DenseMap<unsigned long, unsigned int, llvm::DenseMapInfo<unsigned long, void>, llvm::detail::DenseMapPair<unsigned long, unsigned int> >, llvm::SmallVector<std::pair<unsigned long, llvm::SmallVector<unsigned int, 12u> >, 0u> >&&, llvm::BitstreamWriter&, unsigned int): error: undefined reference to 'llvm::DenseMap<unsigned int, llvm::memprof::FrameStat, llvm::DenseMapInfo<unsigned int, void>, llvm::detail::DenseMapPair<unsigned int, llvm::memprof::FrameStat> > llvm::memprof::computeFrameHistogram<unsigned int>(llvm::MapVector<unsigned long, llvm::SmallVector<unsigned int, llvm::CalculateSmallVectorDefaultInlinedElements<unsigned int>::value>, llvm::DenseMap<unsigned long, unsigned int, llvm::DenseMapInfo<unsigned long, void>, llvm::detail::DenseMapPair<unsigned long, unsigned int> >, llvm::SmallVector<std::pair<unsigned long, llvm::SmallVector<unsigned int, llvm::CalculateSmallVectorDefaultInlinedElements<unsigned int>::value> >, 0u> >&)'
lib/Bitcode/Writer/CMakeFiles/LLVMBitWriter.dir/BitcodeWriter.cpp.o:BitcodeWriter.cpp:function writeMemoryProfileRadixTree(llvm::MapVector<unsigned long, llvm::SmallVector<unsigned int, 12u>, llvm::DenseMap<unsigned long, unsigned int, llvm::DenseMapInfo<unsigned long, void>, llvm::detail::DenseMapPair<unsigned long, unsigned int> >, llvm::SmallVector<std::pair<unsigned long, llvm::SmallVector<unsigned int, 12u> >, 0u> >&&, llvm::BitstreamWriter&, unsigned int): error: undefined reference to 'llvm::memprof::CallStackRadixTreeBuilder<unsigned int>::build(llvm::MapVector<unsigned long, llvm::SmallVector<unsigned int, 12u>, llvm::DenseMap<unsigned long, unsigned int, llvm::DenseMapInfo<unsigned long, void>, llvm::detail::DenseMapPair<unsigned long, unsigned int> >, llvm::SmallVector<std::pair<unsigned long, llvm::SmallVector<unsigned int, 12u> >, 0u> >&&, std::optional<llvm::DenseMap<unsigned int, unsigned int, llvm::DenseMapInfo<unsigned int, void>, llvm::detail::DenseMapPair<unsigned int, unsigned int> > const>, llvm::DenseMap<unsigned int, llvm::memprof::FrameStat, llvm::DenseMapInfo<unsigned int, void>, llvm::detail::DenseMapPair<unsigned int, llvm::memprof::FrameStat> >&)'
collect2: error: ld returned 1 exit status
56.103 [107/3/2582] Creating library symlink lib/libLLVMTarget.so
56.120 [107/2/2583] Linking CXX shared library lib/libLLVMSandboxIR.so.20.0git
56.263 [107/1/2584] Linking CXX shared library lib/libLLVMTransformUtils.so.20.0git
ninja: build stopped: subcommand failed.

…ies" (#117395) Reverts #117066 This is causing some build bot failures that need investigation.

llvmbot added PGO Profile Guided Optimizations LTO Link time optimization (regular/full LTO or ThinLTO) labels Nov 20, 2024

teresajohnson requested a review from kazutakahirata November 20, 2024 22:02

teresajohnson requested a review from snehasish November 20, 2024 22:02

kazutakahirata reviewed Nov 21, 2024

View reviewed changes

Address comments

112a1a4

kazutakahirata approved these changes Nov 22, 2024

View reviewed changes

snehasish approved these changes Nov 22, 2024

View reviewed changes

Address comments

3f16861

teresajohnson merged commit ccb4702 into llvm:main Nov 22, 2024
8 checks passed

teresajohnson mentioned this pull request Nov 22, 2024

Revert "[MemProf] Use radix tree for alloc contexts in bitcode summaries" #117395

Merged

teresajohnson added a commit that referenced this pull request Nov 22, 2024

Revert "[MemProf] Use radix tree for alloc contexts in bitcode summar…

fdb050a

…ies" (#117395) Reverts #117066 This is causing some build bot failures that need investigation.

Provide feedback

Saved searches

Use saved searches to filter your results more quickly

[MemProf] Use radix tree for alloc contexts in bitcode summaries #117066

[MemProf] Use radix tree for alloc contexts in bitcode summaries #117066

teresajohnson commented Nov 20, 2024

llvmbot commented Nov 20, 2024 •

edited

Loading

kazutakahirata Nov 21, 2024

teresajohnson Nov 21, 2024

kazutakahirata Nov 21, 2024

teresajohnson Nov 21, 2024

kazutakahirata Nov 21, 2024

teresajohnson Nov 21, 2024

kazutakahirata Nov 21, 2024

teresajohnson Nov 21, 2024

kazutakahirata Nov 21, 2024

teresajohnson Nov 21, 2024

kazutakahirata Nov 21, 2024

teresajohnson Nov 21, 2024

kazutakahirata left a comment

snehasish left a comment

snehasish Nov 22, 2024

teresajohnson Nov 22, 2024

snehasish Nov 22, 2024

teresajohnson Nov 22, 2024

snehasish Nov 22, 2024

teresajohnson Nov 22, 2024

snehasish Nov 22, 2024

teresajohnson Nov 22, 2024

snehasish Nov 22, 2024

teresajohnson Nov 22, 2024

llvm-ci commented Nov 22, 2024

llvm-ci commented Nov 22, 2024

llvm-ci commented Nov 22, 2024

llvm-ci commented Nov 22, 2024

	MapVector<CallStackId, llvm::SmallVector<LinearFrameId>> &CallStacks,
	MapVector<CallStackId, llvm::SmallVector<LinearFrameId>> &&CallStacks,

	CallStackPos = writeMemoryProfileRadixTree(CallStacks, Stream, RadixAbbrev);
	CallStackPos = writeMemoryProfileRadixTree(std::move(CallStacks), Stream, RadixAbbrev);

	for (unsigned J = 0; J < NumStackEntries; J++) {
	StackIdList.reserve(NumStackEntries);
	for (unsigned J = 0; J < NumStackEntries; J++) {

	while (NumStackIds--) {
	StackIdList.reserve(NumStackIds);
	while (NumStackIds--) {

	// Summary emission does not support anonymous functions, they have to
	// Summary emission does not support anonymous functions, they have to be

[MemProf] Use radix tree for alloc contexts in bitcode summaries #117066

[MemProf] Use radix tree for alloc contexts in bitcode summaries #117066

Conversation

teresajohnson commented Nov 20, 2024

llvmbot commented Nov 20, 2024 • edited Loading

Choose a reason for hiding this comment

Choose a reason for hiding this comment

Choose a reason for hiding this comment

Choose a reason for hiding this comment

Choose a reason for hiding this comment

Choose a reason for hiding this comment

Choose a reason for hiding this comment

Choose a reason for hiding this comment

Choose a reason for hiding this comment

Choose a reason for hiding this comment

Choose a reason for hiding this comment

Choose a reason for hiding this comment

kazutakahirata left a comment

Choose a reason for hiding this comment

snehasish left a comment

Choose a reason for hiding this comment

Choose a reason for hiding this comment

Choose a reason for hiding this comment

Choose a reason for hiding this comment

Choose a reason for hiding this comment

Choose a reason for hiding this comment

Choose a reason for hiding this comment

Choose a reason for hiding this comment

Choose a reason for hiding this comment

Choose a reason for hiding this comment

Choose a reason for hiding this comment

llvm-ci commented Nov 22, 2024

llvm-ci commented Nov 22, 2024

llvm-ci commented Nov 22, 2024

llvm-ci commented Nov 22, 2024

llvmbot commented Nov 20, 2024 •

edited

Loading