[LLVM[NFC] Refactor to allow debug_names entries to conatain DIE offset #69399

ayermolo · 2023-10-17T23:52:19Z

This is pre-cursor patch to enabling type units with DWARF5 acceleration tables.
With this change it allows for entries to contain offsets directly, this way type
units do not need to be preserved until .debug_names is written out.

This is pre-cursor patch to enabling type units with DWARF5 acceleration tables. With this change it allows for entries to contain offsets directly, this way type units do not need to be preserved until .debug_names is written out.

llvmbot · 2023-10-17T23:53:19Z

@llvm/pr-subscribers-debuginfo

Author: Alexander Yermolovich (ayermolo)

Changes

This is pre-cursor patch to enabling type units with DWARF5 acceleration tables.
With this change it allows for entries to contain offsets directly, this way type
units do not need to be preserved until .debug_names is written out.

Full diff: https://github.com/llvm/llvm-project/pull/69399.diff

3 Files Affected:

(modified) llvm/include/llvm/CodeGen/AccelTable.h (+15-6)
(modified) llvm/lib/CodeGen/AsmPrinter/AccelTable.cpp (+23-2)
(modified) llvm/lib/CodeGen/AsmPrinter/DwarfDebug.cpp (+4-2)

diff --git a/llvm/include/llvm/CodeGen/AccelTable.h b/llvm/include/llvm/CodeGen/AccelTable.h
index d521b31e3d16ab4..d4bfd7d821c4952 100644
--- a/llvm/include/llvm/CodeGen/AccelTable.h
+++ b/llvm/include/llvm/CodeGen/AccelTable.h
@@ -252,20 +252,27 @@ class DWARF5AccelTableData : public AccelTableData {
 public:
   static uint32_t hash(StringRef Name) { return caseFoldingDjbHash(Name); }
 
-  DWARF5AccelTableData(const DIE &Die) : Die(Die) {}
+  DWARF5AccelTableData(const DIE &Die, const DwarfCompileUnit &CU);
 
 #ifndef NDEBUG
   void print(raw_ostream &OS) const override;
 #endif
 
-  const DIE &getDie() const { return Die; }
-  uint64_t getDieOffset() const { return Die.getOffset(); }
-  unsigned getDieTag() const { return Die.getTag(); }
+  uint64_t getDieOffset() const {
+    if (const DIE *const *TDie = std::get_if<const DIE *>(&OffsetVal))
+      return (*TDie)->getOffset();
+    return std::get<uint64_t>(OffsetVal);
+  }
+  unsigned getDieTag() const { return Tag; }
+  unsigned getUnitID() const { return UnitID; }
+  void normalizeDIEToOffset() { OffsetVal = getDieOffset(); }
 
 protected:
-  const DIE &Die;
+  std::variant<const DIE *, uint64_t> OffsetVal;
+  dwarf::Tag Tag;
+  unsigned UnitID;
 
-  uint64_t order() const override { return Die.getOffset(); }
+  uint64_t order() const override { return getDieOffset(); }
 };
 
 class DWARF5AccelTableStaticData : public AccelTableData {
@@ -283,6 +290,8 @@ class DWARF5AccelTableStaticData : public AccelTableData {
   uint64_t getDieOffset() const { return DieOffset; }
   unsigned getDieTag() const { return DieTag; }
   unsigned getCUIndex() const { return CUIndex; }
+  void normalizeDIEToOffset() { /* Not needed since DIE is not used. */
+  }
 
 protected:
   uint64_t DieOffset;
diff --git a/llvm/lib/CodeGen/AsmPrinter/AccelTable.cpp b/llvm/lib/CodeGen/AsmPrinter/AccelTable.cpp
index 88d5487427774c1..d30ca1ea7d1dc01 100644
--- a/llvm/lib/CodeGen/AsmPrinter/AccelTable.cpp
+++ b/llvm/lib/CodeGen/AsmPrinter/AccelTable.cpp
@@ -219,6 +219,10 @@ class Dwarf5AccelTableWriter : public AccelTableWriter {
   MCSymbol *AbbrevEnd = Asm->createTempSymbol("names_abbrev_end");
   MCSymbol *EntryPool = Asm->createTempSymbol("names_entries");
 
+  /// Iterates over all the entries and if it contains a DIE replaces it with an
+  /// offset.
+  void normalizeOffsets();
+
   DenseSet<uint32_t> getUniqueTags() const;
 
   // Right now, we emit uniform attributes for all tags.
@@ -357,6 +361,13 @@ void AppleAccelTableWriter::emit() const {
   emitData();
 }
 
+DWARF5AccelTableData::DWARF5AccelTableData(const DIE &Die,
+                                           const DwarfCompileUnit &CU)
+    : OffsetVal(&Die) {
+  Tag = Die.getTag();
+  UnitID = CU.getUniqueID();
+}
+
 template <typename DataT>
 void Dwarf5AccelTableWriter<DataT>::Header::emit(Dwarf5AccelTableWriter &Ctx) {
   assert(CompUnitCount > 0 && "Index must have at least one CU.");
@@ -387,6 +398,16 @@ void Dwarf5AccelTableWriter<DataT>::Header::emit(Dwarf5AccelTableWriter &Ctx) {
   Asm->OutStreamer->emitBytes({AugmentationString, AugmentationStringSize});
 }
 
+template <typename DataT>
+void Dwarf5AccelTableWriter<DataT>::normalizeOffsets() {
+  for (auto &Bucket : Contents.getBuckets()) {
+    for (auto *Hash : Bucket) {
+      for (auto *Value : Hash->Values) {
+        static_cast<DataT *>(Value)->normalizeDIEToOffset();
+      }
+    }
+  }
+}
 template <typename DataT>
 DenseSet<uint32_t> Dwarf5AccelTableWriter<DataT>::getUniqueTags() const {
   DenseSet<uint32_t> UniqueTags;
@@ -516,6 +537,7 @@ Dwarf5AccelTableWriter<DataT>::Dwarf5AccelTableWriter(
       Header(CompUnits.size(), Contents.getBucketCount(),
              Contents.getUniqueNameCount()),
       CompUnits(CompUnits), getCUIndexForEntry(std::move(getCUIndexForEntry)) {
+  normalizeOffsets();
   DenseSet<uint32_t> UniqueTags = getUniqueTags();
   SmallVector<AttributeEncoding, 2> UniformAttributes = getUniformAttributes();
 
@@ -575,8 +597,7 @@ void llvm::emitDWARF5AccelTable(
   Dwarf5AccelTableWriter<DWARF5AccelTableData>(
       Asm, Contents, CompUnits,
       [&](const DWARF5AccelTableData &Entry) {
-        const DIE *CUDie = Entry.getDie().getUnitDie();
-        return CUIndex[DD.lookupCU(CUDie)->getUniqueID()];
+        return CUIndex[Entry.getUnitID()];
       })
       .emit();
 }
diff --git a/llvm/lib/CodeGen/AsmPrinter/DwarfDebug.cpp b/llvm/lib/CodeGen/AsmPrinter/DwarfDebug.cpp
index ee2ab71ad28e47f..80af8a8d79e2ac3 100644
--- a/llvm/lib/CodeGen/AsmPrinter/DwarfDebug.cpp
+++ b/llvm/lib/CodeGen/AsmPrinter/DwarfDebug.cpp
@@ -3547,9 +3547,11 @@ void DwarfDebug::addAccelNameImpl(const DICompileUnit &CU,
   case AccelTableKind::Apple:
     AppleAccel.addName(Ref, Die);
     break;
-  case AccelTableKind::Dwarf:
-    AccelDebugNames.addName(Ref, Die);
+  case AccelTableKind::Dwarf: {
+    DwarfCompileUnit *CU = lookupCU(Die.getUnitDie());
+    AccelDebugNames.addName(Ref, Die, *CU);
     break;
+  }
   case AccelTableKind::Default:
     llvm_unreachable("Default should have already been resolved.");
   case AccelTableKind::None:

dwblaikie · 2023-10-18T21:46:13Z

llvm/lib/CodeGen/AsmPrinter/DwarfDebug.cpp

-  case AccelTableKind::Dwarf:
-    AccelDebugNames.addName(Ref, Die);
+  case AccelTableKind::Dwarf: {
+    DwarfCompileUnit *CU = lookupCU(Die.getUnitDie());


Is the addAccelNameImpl's CU parameter not accurate/correct here?

Well input there is DICompileUnit, which doesn't have uniqueID set yet, I think.

Hmm, I'd have thought it would - the DICompileUnit doesn't carry it directly, which is fair. But I think it's more efficient to use the CUMap to go from DICompileUnit -> DwarfCompileUnit, rather than going from DIE->unit DIE-> DwarfCompileUnit?

dwblaikie · 2023-10-18T22:05:48Z

llvm/include/llvm/CodeGen/AccelTable.h

+  std::variant<const DIE *, uint64_t> OffsetVal;
+  dwarf::Tag Tag;
+  unsigned UnitID;


This is going to end up increasing the size of the struct from 8 bytes to ((8(pointer/uint64)+8(discriminator and padding))+2(Tag)+2(padding)+4(uintID) == 24 bytes.
Might be worth skipping std::variant, so that the discriminator can be encoded in what's otherwise going to be 2 bytes of padding, then it'll only be 16 bytes (8(pointer/uint64)+2(discriminator)+2(tag)+4(uintID)) - and we could probably make uintIDs smaller, really - like they can probably be 16 bits just fine... (but that's a change for another time - it's fine here and wouldn't make the struct any smaller anyway)

If it's a blocker I can redesign, but personally I think trying to squeeze most bits out just leads to a more fragile implementation. Plus cool c++ features. :D

Running llc on sema bitcode:
time ~/local/llvm-build-upstream-release/bin/llc -filetype=obj tools/clang/lib/Sema/CMakeFiles/obj.clangSema.dir/Sema.cpp.ir.o -dwarf-version=5 -mtriple x86_64-pc-linux -o foo.o
with refactor 
0:05.98 real, 5.80 user, 0.17 sys, 0 amem, 297044 mmem 
0:06.18 real, 6.00 user, 0.17 sys, 0 amem, 296656 mmem 
0:05.85 real, 5.69 user, 0.15 sys, 0 amem, 297264 mmem  

main 
0:05.52 real, 5.33 user, 0.17 sys, 0 amem, 297412 mmem 
0:05.99 real, 5.82 user, 0.16 sys, 0 amem, 297068 mmem 
0:05.87 real, 5.69 user, 0.16 sys, 0 amem, 297080 mmem

Seems like these changes fall under noise?

dwblaikie · 2023-10-18T22:08:28Z

llvm/include/llvm/CodeGen/AccelTable.h

+  uint64_t getDieOffset() const {
+    if (const DIE *const *TDie = std::get_if<const DIE *>(&OffsetVal))
+      return (*TDie)->getOffset();
+    return std::get<uint64_t>(OffsetVal);


Might be better to make this assert-fail if it's stil in DIE mode, rather than offset mode? (to make it clear that that transformation is intended - I mean, if someone queries it before the DIEs have been laid out, DIE::getOffset will assert - but still, this seems like excess API surface area that could lead to more confusion later that'd be avoided by a hard stop here)

I was thinking about assert, but there are release builds that have asserts turned off.... So will result in "weird" crash.
Also getDieOffset is used in order() from ccelTableBase::finalize which is invoked before normalization happens.
One of the unit tests was failing because of it.

I was thinking about assert, but there are release builds that have asserts turned off.... So will result in "weird" crash.

Sure, though better than untested code being unidentifiable.

Also getDieOffset is used in order() from ccelTableBase::finalize which is invoked before normalization happens.
One of the unit tests was failing because of it.

Ah, OK - so at the moment this code is actually load bearing.

Could that be fixed/changed? Could normalizing happen before finalize?

It can be moved to llvm::emitDWARF5AccelTable before Contents.finalize is invoked?
My original thought was to do normalization close to use, in context where it matters. In Dwarf5AccelTableWriter.

I think it'd be a somewhat simpler conceptual model if all uses of the DIE offsets in the accelerator tables came in the offset mode, after the DIE mode, if possible. Like after the DIE tree is finalized, we could finalize the accelerator in a similar way?

Like just after the computeSizeAndOffsets call in DwarfDebug::finalizeModuleInfo() we could do something similar to the accelerator tables? (call some function then take the sizes and offsets so computed, and bake them in/discard the DIE references)

Sure, let me take a look there.

something like fixup?
I went with path of least resistance of exposing Entries. Umm probably not ideal.
Alternative is for AccelTableBase to have some kind of member function that takes in a lambda to peform stuff on Entries. Although with all the templated code not sure how easy that will be.

Issue is that buckets are not available until AccelTableBase::finalize is called.

Overall I kind of feel weird calling some normalize/finalize thing after offsets are computed, that is just applicable to one thing.

…IE offset

dwblaikie · 2023-10-20T19:54:43Z

llvm/lib/CodeGen/AsmPrinter/DwarfDebug.cpp

+void DwarfDebug::finalizeAccelerationTables() {
+  for (auto &Entry : AccelDebugNames.getEntries()) {
+    for (AccelTableData *Value : Entry.second.Values) {
+      static_cast<DWARF5AccelTableData *>(Value)->normalizeDIEToOffset();
+    }
+  }
+}


Yeah, mixed feelings - I wouldn't mind this being a member function of AccelTable - and having some function on all entry data types - which can be a no-op on AppleNames entry types (though that'll be a bit subtle - it's only a valid no-op because the CU DIEs do persist long enough to be usable - but AppleNames will hopefully eventually go away, so that's got a path to removal, hopefully).

I did also realize that in the DWARF5AccelTableStaticData looks a lot like the new version of the DWARF5AccelTableData you're introducing here - so there might be a chance to merge these, especially once the packing/structure layout/size issue is addressed (by not using std::variant the struct could be shrunk down to the same size/basically the same layout (making the tag 16 bits, using the extra bits to encode the discriminator for the pointer V offset - and if it's just assertions that it's in the right mode, it won't make the static case any less efficient for this generality - and remove some complexity (eg: the DWARF5 name emission can be a non-template then))

If the DWARF5AccelTable*Data types become one data type, then I suppose this "finalize" operation could be a non-member function that takes a AccelTable<DWARF5AccelTableData> parameter which would be pretty simple/tidy.

Thanks for suggestions. I expanded this refactor.

I removed DWARF5AccelTableStaticData (24 bytes), and changed to use DWARF5AccelTableData (32 bytes).
I added class DWARF5AccelTable : public AccelTable
-- to move getEntries into it, to reduce the scope of where it's called, and to reduce number of template instantiations.
Made the finalize function static that takes in DWARF5AccelTable

My reasoning for leaving std::variant in place for now:
-- Data in above comments shows when we are compiling sema from bit code it is noise.
-- The DWARF5AccelTableStaticData is used in DWARF*Linker It raises entire debug information into an IR form, so extra 8 bytes for a acceleration table I don't think matter that much. For example in BOLT which uses similar approach peak memory is 50-60 GB with breaking up how many CUs we process in IR form before writing them out. Without it it's 100+GB.
-- Using variant vs bit packing is more inline with c++ usage in llvm (I think).

WDYT?

I'd probably still pack the structure a bit tighter - it's not especially costly for the abstractions, etc. But shrug I don't insist on it.

…atain DIE offset

llvm/lib/CodeGen/AsmPrinter/AccelTable.cpp

llvm/lib/CodeGen/AsmPrinter/DwarfDebug.cpp

dwblaikie · 2023-10-23T22:50:13Z

llvm/lib/CodeGen/AsmPrinter/DwarfDebug.cpp

-  case AccelTableKind::Dwarf:
-    AccelDebugNames.addName(Ref, Die);
+  case AccelTableKind::Dwarf: {
+    DwarfCompileUnit *CU = lookupCU(Die.getUnitDie());


Hmm, I'd have thought it would - the DICompileUnit doesn't carry it directly, which is fair. But I think it's more efficient to use the CUMap to go from DICompileUnit -> DwarfCompileUnit, rather than going from DIE->unit DIE-> DwarfCompileUnit?

dwblaikie · 2023-10-23T22:53:16Z

llvm/lib/CodeGen/AsmPrinter/DwarfDebug.cpp

+void DwarfDebug::finalizeAccelerationTables() {
+  for (auto &Entry : AccelDebugNames.getEntries()) {
+    for (AccelTableData *Value : Entry.second.Values) {
+      static_cast<DWARF5AccelTableData *>(Value)->normalizeDIEToOffset();
+    }
+  }
+}


I'd probably still pack the structure a bit tighter - it's not especially costly for the abstractions, etc. But shrug I don't insist on it.

… to conatain DIE offset

dwblaikie

Looks good, thanks!

ayermolo · 2023-10-24T23:43:32Z

Awesome thanks! @JDevlieghere @felipepiovezan WDYT? :)

JDevlieghere

LGTM!

felipepiovezan · 2023-10-25T18:36:11Z

This actually makes some of the code simpler, so LGTM!

…et (llvm#69399) This is pre-cursor patch to enabling type units with DWARF5 acceleration tables. With this change it allows for entries to contain offsets directly, this way type units do not need to be preserved until .debug_names is written out.

felipepiovezan · 2023-12-21T15:50:37Z

IIUC, after this PR got merged, all uses of the class below have the same template argument.
What do you think of removing the template?

/// Class responsible for emitting a DWARF v5 Accelerator Table. The only
/// public function is emit(), which performs the actual emission.
///
/// The class is templated in its data type. This allows us to emit both dyamic
/// and static data entries. A callback abstract the logic to provide a CU
/// index for a given entry, which is different per data type, but identical
/// for every entry in the same table.
template <typename DataT>
class Dwarf5AccelTableWriter : public AccelTableWriter {

ag Dwarf5AccelTableWriter
───────┬───────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
       │ STDIN
───────┼───────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────
   1   │ llvm/lib/CodeGen/AsmPrinter/AccelTable.cpp
   2   │ 190:class Dwarf5AccelTableWriter : public AccelTableWriter {
   3   │ 210:    void emit(Dwarf5AccelTableWriter &Ctx);
   4   │ 241:  Dwarf5AccelTableWriter(
   5   │ 376:void Dwarf5AccelTableWriter<DataT>::Header::emit(Dwarf5AccelTableWriter &Ctx) {
   6   │ 424:void Dwarf5AccelTableWriter<DataT>::populateAbbrevsMap() {
   7   │ 446:void Dwarf5AccelTableWriter<DataT>::emitCUList() const {
   8   │ 457:void Dwarf5AccelTableWriter<DataT>::emitTUList() const {
   9   │ 470:void Dwarf5AccelTableWriter<DataT>::emitBuckets() const {
  10   │ 480:void Dwarf5AccelTableWriter<DataT>::emitStringOffsets() const {
  11   │ 492:void Dwarf5AccelTableWriter<DataT>::emitAbbrevs() const {
  12   │ 514:void Dwarf5AccelTableWriter<DataT>::emitEntry(const DataT &Entry) {
  13   │ 568:template <typename DataT> void Dwarf5AccelTableWriter<DataT>::emitData() {
  14   │ 594:Dwarf5AccelTableWriter<DataT>::Dwarf5AccelTableWriter(
  15   │ 612:template <typename DataT> void Dwarf5AccelTableWriter<DataT>::emit() {
  16   │ 677:  Dwarf5AccelTableWriter<DWARF5AccelTableData>(
  17   │ 709:  Dwarf5AccelTableWriter<DWARF5AccelTableData>(Asm, Contents, CUs, TypeUnits,

ayermolo · 2023-12-21T19:11:07Z

Less template hell the better. :) Not sure when I can circle to this particular cleanup thought.

felipepiovezan · 2023-12-21T19:12:19Z

Less template hell the better. :) Not sure when I can circle to this particular cleanup thought.

I can totally do it! Just wanted to get your opinion first :)

…et (llvm#69399) This is pre-cursor patch to enabling type units with DWARF5 acceleration tables. With this change it allows for entries to contain offsets directly, this way type units do not need to be preserved until .debug_names is written out. (cherry picked from commit da27c25)

llvmbot added the debuginfo label Oct 17, 2023

ayermolo requested a review from dwblaikie October 17, 2023 23:54

ayermolo mentioned this pull request Oct 17, 2023

[LLVM][DWARF] Add support for monolithic types in .debug_names #68131

Merged

dwblaikie reviewed Oct 18, 2023

View reviewed changes

fixup! [LLVM[NFC] Refactor to allow debug_names entries to conatain D…

06ae74e

…IE offset

dwblaikie reviewed Oct 20, 2023

View reviewed changes

fixup! fixup! [LLVM[NFC] Refactor to allow debug_names entries to con…

b3b4cb1

…atain DIE offset

adrian-prantl requested review from JDevlieghere and felipepiovezan October 23, 2023 20:45

dwblaikie reviewed Oct 23, 2023

View reviewed changes

fixup! fixup! fixup! [LLVM[NFC] Refactor to allow debug_names entries…

b1c90e8

… to conatain DIE offset

dwblaikie approved these changes Oct 24, 2023

View reviewed changes

JDevlieghere approved these changes Oct 25, 2023

View reviewed changes

felipepiovezan approved these changes Oct 25, 2023

View reviewed changes

ayermolo merged commit da27c25 into llvm:main Oct 25, 2023

ayermolo deleted the refactorNames branch October 25, 2023 19:39

ayermolo mentioned this pull request Jan 6, 2024

[AsmPrinter][Dwarf5][nfc] Remove template from AccelTable class #76296

Merged

[LLVM[NFC] Refactor to allow debug_names entries to conatain DIE offset #69399

[LLVM[NFC] Refactor to allow debug_names entries to conatain DIE offset #69399

Uh oh!

Conversation

ayermolo commented Oct 17, 2023

Uh oh!

llvmbot commented Oct 17, 2023

Uh oh!

Choose a reason for hiding this comment

Uh oh!

Choose a reason for hiding this comment

Uh oh!

Choose a reason for hiding this comment

Uh oh!

Choose a reason for hiding this comment

Uh oh!

Choose a reason for hiding this comment

Uh oh!

Choose a reason for hiding this comment

Uh oh!

Choose a reason for hiding this comment

Uh oh!

Choose a reason for hiding this comment

Uh oh!

Choose a reason for hiding this comment

Uh oh!

Choose a reason for hiding this comment

Uh oh!

Choose a reason for hiding this comment

Uh oh!

Choose a reason for hiding this comment

Uh oh!

Choose a reason for hiding this comment

Uh oh!

ayermolo Oct 21, 2023 • edited Loading Uh oh! There was an error while loading. Please reload this page.

Uh oh!

Choose a reason for hiding this comment

Uh oh!

Choose a reason for hiding this comment

Uh oh!

Uh oh!

Uh oh!

Choose a reason for hiding this comment

Uh oh!

Choose a reason for hiding this comment

Uh oh!

dwblaikie left a comment

Choose a reason for hiding this comment

Uh oh!

ayermolo commented Oct 24, 2023

Uh oh!

JDevlieghere left a comment

Choose a reason for hiding this comment

Uh oh!

felipepiovezan commented Oct 25, 2023

Uh oh!

felipepiovezan commented Dec 21, 2023 • edited Loading Uh oh! There was an error while loading. Please reload this page.

Uh oh!

Uh oh!

ayermolo commented Dec 21, 2023

Uh oh!

felipepiovezan commented Dec 21, 2023

Uh oh!

Uh oh!

ayermolo Oct 21, 2023 •

edited

Loading

felipepiovezan commented Dec 21, 2023 •

edited

Loading