-
Notifications
You must be signed in to change notification settings - Fork 13.5k
[LLVM[NFC] Refactor to allow debug_names entries to conatain DIE offset #69399
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Conversation
This is pre-cursor patch to enabling type units with DWARF5 acceleration tables. With this change it allows for entries to contain offsets directly, this way type units do not need to be preserved until .debug_names is written out.
@llvm/pr-subscribers-debuginfo Author: Alexander Yermolovich (ayermolo) ChangesThis is pre-cursor patch to enabling type units with DWARF5 acceleration tables. Full diff: https://github.com/llvm/llvm-project/pull/69399.diff 3 Files Affected:
diff --git a/llvm/include/llvm/CodeGen/AccelTable.h b/llvm/include/llvm/CodeGen/AccelTable.h
index d521b31e3d16ab4..d4bfd7d821c4952 100644
--- a/llvm/include/llvm/CodeGen/AccelTable.h
+++ b/llvm/include/llvm/CodeGen/AccelTable.h
@@ -252,20 +252,27 @@ class DWARF5AccelTableData : public AccelTableData {
public:
static uint32_t hash(StringRef Name) { return caseFoldingDjbHash(Name); }
- DWARF5AccelTableData(const DIE &Die) : Die(Die) {}
+ DWARF5AccelTableData(const DIE &Die, const DwarfCompileUnit &CU);
#ifndef NDEBUG
void print(raw_ostream &OS) const override;
#endif
- const DIE &getDie() const { return Die; }
- uint64_t getDieOffset() const { return Die.getOffset(); }
- unsigned getDieTag() const { return Die.getTag(); }
+ uint64_t getDieOffset() const {
+ if (const DIE *const *TDie = std::get_if<const DIE *>(&OffsetVal))
+ return (*TDie)->getOffset();
+ return std::get<uint64_t>(OffsetVal);
+ }
+ unsigned getDieTag() const { return Tag; }
+ unsigned getUnitID() const { return UnitID; }
+ void normalizeDIEToOffset() { OffsetVal = getDieOffset(); }
protected:
- const DIE &Die;
+ std::variant<const DIE *, uint64_t> OffsetVal;
+ dwarf::Tag Tag;
+ unsigned UnitID;
- uint64_t order() const override { return Die.getOffset(); }
+ uint64_t order() const override { return getDieOffset(); }
};
class DWARF5AccelTableStaticData : public AccelTableData {
@@ -283,6 +290,8 @@ class DWARF5AccelTableStaticData : public AccelTableData {
uint64_t getDieOffset() const { return DieOffset; }
unsigned getDieTag() const { return DieTag; }
unsigned getCUIndex() const { return CUIndex; }
+ void normalizeDIEToOffset() { /* Not needed since DIE is not used. */
+ }
protected:
uint64_t DieOffset;
diff --git a/llvm/lib/CodeGen/AsmPrinter/AccelTable.cpp b/llvm/lib/CodeGen/AsmPrinter/AccelTable.cpp
index 88d5487427774c1..d30ca1ea7d1dc01 100644
--- a/llvm/lib/CodeGen/AsmPrinter/AccelTable.cpp
+++ b/llvm/lib/CodeGen/AsmPrinter/AccelTable.cpp
@@ -219,6 +219,10 @@ class Dwarf5AccelTableWriter : public AccelTableWriter {
MCSymbol *AbbrevEnd = Asm->createTempSymbol("names_abbrev_end");
MCSymbol *EntryPool = Asm->createTempSymbol("names_entries");
+ /// Iterates over all the entries and if it contains a DIE replaces it with an
+ /// offset.
+ void normalizeOffsets();
+
DenseSet<uint32_t> getUniqueTags() const;
// Right now, we emit uniform attributes for all tags.
@@ -357,6 +361,13 @@ void AppleAccelTableWriter::emit() const {
emitData();
}
+DWARF5AccelTableData::DWARF5AccelTableData(const DIE &Die,
+ const DwarfCompileUnit &CU)
+ : OffsetVal(&Die) {
+ Tag = Die.getTag();
+ UnitID = CU.getUniqueID();
+}
+
template <typename DataT>
void Dwarf5AccelTableWriter<DataT>::Header::emit(Dwarf5AccelTableWriter &Ctx) {
assert(CompUnitCount > 0 && "Index must have at least one CU.");
@@ -387,6 +398,16 @@ void Dwarf5AccelTableWriter<DataT>::Header::emit(Dwarf5AccelTableWriter &Ctx) {
Asm->OutStreamer->emitBytes({AugmentationString, AugmentationStringSize});
}
+template <typename DataT>
+void Dwarf5AccelTableWriter<DataT>::normalizeOffsets() {
+ for (auto &Bucket : Contents.getBuckets()) {
+ for (auto *Hash : Bucket) {
+ for (auto *Value : Hash->Values) {
+ static_cast<DataT *>(Value)->normalizeDIEToOffset();
+ }
+ }
+ }
+}
template <typename DataT>
DenseSet<uint32_t> Dwarf5AccelTableWriter<DataT>::getUniqueTags() const {
DenseSet<uint32_t> UniqueTags;
@@ -516,6 +537,7 @@ Dwarf5AccelTableWriter<DataT>::Dwarf5AccelTableWriter(
Header(CompUnits.size(), Contents.getBucketCount(),
Contents.getUniqueNameCount()),
CompUnits(CompUnits), getCUIndexForEntry(std::move(getCUIndexForEntry)) {
+ normalizeOffsets();
DenseSet<uint32_t> UniqueTags = getUniqueTags();
SmallVector<AttributeEncoding, 2> UniformAttributes = getUniformAttributes();
@@ -575,8 +597,7 @@ void llvm::emitDWARF5AccelTable(
Dwarf5AccelTableWriter<DWARF5AccelTableData>(
Asm, Contents, CompUnits,
[&](const DWARF5AccelTableData &Entry) {
- const DIE *CUDie = Entry.getDie().getUnitDie();
- return CUIndex[DD.lookupCU(CUDie)->getUniqueID()];
+ return CUIndex[Entry.getUnitID()];
})
.emit();
}
diff --git a/llvm/lib/CodeGen/AsmPrinter/DwarfDebug.cpp b/llvm/lib/CodeGen/AsmPrinter/DwarfDebug.cpp
index ee2ab71ad28e47f..80af8a8d79e2ac3 100644
--- a/llvm/lib/CodeGen/AsmPrinter/DwarfDebug.cpp
+++ b/llvm/lib/CodeGen/AsmPrinter/DwarfDebug.cpp
@@ -3547,9 +3547,11 @@ void DwarfDebug::addAccelNameImpl(const DICompileUnit &CU,
case AccelTableKind::Apple:
AppleAccel.addName(Ref, Die);
break;
- case AccelTableKind::Dwarf:
- AccelDebugNames.addName(Ref, Die);
+ case AccelTableKind::Dwarf: {
+ DwarfCompileUnit *CU = lookupCU(Die.getUnitDie());
+ AccelDebugNames.addName(Ref, Die, *CU);
break;
+ }
case AccelTableKind::Default:
llvm_unreachable("Default should have already been resolved.");
case AccelTableKind::None:
|
case AccelTableKind::Dwarf: | ||
AccelDebugNames.addName(Ref, Die); | ||
case AccelTableKind::Dwarf: { | ||
DwarfCompileUnit *CU = lookupCU(Die.getUnitDie()); |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Is the addAccelNameImpl
's CU
parameter not accurate/correct here?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Well input there is DICompileUnit, which doesn't have uniqueID set yet, I think.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Hmm, I'd have thought it would - the DICompileUnit doesn't carry it directly, which is fair. But I think it's more efficient to use the CUMap to go from DICompileUnit -> DwarfCompileUnit, rather than going from DIE->unit DIE-> DwarfCompileUnit?
std::variant<const DIE *, uint64_t> OffsetVal; | ||
dwarf::Tag Tag; | ||
unsigned UnitID; |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
This is going to end up increasing the size of the struct from 8 bytes to ((8(pointer/uint64)+8(discriminator and padding))+2(Tag)+2(padding)+4(uintID) == 24 bytes.
Might be worth skipping std::variant, so that the discriminator can be encoded in what's otherwise going to be 2 bytes of padding, then it'll only be 16 bytes (8(pointer/uint64)+2(discriminator)+2(tag)+4(uintID)) - and we could probably make uintIDs smaller, really - like they can probably be 16 bits just fine... (but that's a change for another time - it's fine here and wouldn't make the struct any smaller anyway)
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
If it's a blocker I can redesign, but personally I think trying to squeeze most bits out just leads to a more fragile implementation. Plus cool c++ features. :D
Running llc on sema bitcode:
time ~/local/llvm-build-upstream-release/bin/llc -filetype=obj tools/clang/lib/Sema/CMakeFiles/obj.clangSema.dir/Sema.cpp.ir.o -dwarf-version=5 -mtriple x86_64-pc-linux -o foo.o
with refactor
0:05.98 real, 5.80 user, 0.17 sys, 0 amem, 297044 mmem
0:06.18 real, 6.00 user, 0.17 sys, 0 amem, 296656 mmem
0:05.85 real, 5.69 user, 0.15 sys, 0 amem, 297264 mmem
main
0:05.52 real, 5.33 user, 0.17 sys, 0 amem, 297412 mmem
0:05.99 real, 5.82 user, 0.16 sys, 0 amem, 297068 mmem
0:05.87 real, 5.69 user, 0.16 sys, 0 amem, 297080 mmem
Seems like these changes fall under noise?
uint64_t getDieOffset() const { | ||
if (const DIE *const *TDie = std::get_if<const DIE *>(&OffsetVal)) | ||
return (*TDie)->getOffset(); | ||
return std::get<uint64_t>(OffsetVal); |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Might be better to make this assert-fail if it's stil in DIE mode, rather than offset mode? (to make it clear that that transformation is intended - I mean, if someone queries it before the DIEs have been laid out, DIE::getOffset will assert - but still, this seems like excess API surface area that could lead to more confusion later that'd be avoided by a hard stop here)
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I was thinking about assert, but there are release builds that have asserts turned off.... So will result in "weird" crash.
Also getDieOffset is used in order() from ccelTableBase::finalize which is invoked before normalization happens.
One of the unit tests was failing because of it.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I was thinking about assert, but there are release builds that have asserts turned off.... So will result in "weird" crash.
Sure, though better than untested code being unidentifiable.
Also getDieOffset is used in order() from ccelTableBase::finalize which is invoked before normalization happens.
One of the unit tests was failing because of it.
Ah, OK - so at the moment this code is actually load bearing.
Could that be fixed/changed? Could normalizing happen before finalize?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
It can be moved to llvm::emitDWARF5AccelTable before Contents.finalize is invoked?
My original thought was to do normalization close to use, in context where it matters. In Dwarf5AccelTableWriter.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I think it'd be a somewhat simpler conceptual model if all uses of the DIE offsets in the accelerator tables came in the offset mode, after the DIE mode, if possible. Like after the DIE tree is finalized, we could finalize the accelerator in a similar way?
Like just after the computeSizeAndOffsets
call in DwarfDebug::finalizeModuleInfo()
we could do something similar to the accelerator tables? (call some function then take the sizes and offsets so computed, and bake them in/discard the DIE references)
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Sure, let me take a look there.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
something like fixup?
I went with path of least resistance of exposing Entries. Umm probably not ideal.
Alternative is for AccelTableBase to have some kind of member function that takes in a lambda to peform stuff on Entries. Although with all the templated code not sure how easy that will be.
Issue is that buckets are not available until AccelTableBase::finalize is called.
Overall I kind of feel weird calling some normalize/finalize thing after offsets are computed, that is just applicable to one thing.
void DwarfDebug::finalizeAccelerationTables() { | ||
for (auto &Entry : AccelDebugNames.getEntries()) { | ||
for (AccelTableData *Value : Entry.second.Values) { | ||
static_cast<DWARF5AccelTableData *>(Value)->normalizeDIEToOffset(); | ||
} | ||
} | ||
} |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Yeah, mixed feelings - I wouldn't mind this being a member function of AccelTable
- and having some function on all entry data types - which can be a no-op on AppleNames entry types (though that'll be a bit subtle - it's only a valid no-op because the CU DIEs do persist long enough to be usable - but AppleNames will hopefully eventually go away, so that's got a path to removal, hopefully).
I did also realize that in the DWARF5AccelTableStaticData
looks a lot like the new version of the DWARF5AccelTableData
you're introducing here - so there might be a chance to merge these, especially once the packing/structure layout/size issue is addressed (by not using std::variant
the struct could be shrunk down to the same size/basically the same layout (making the tag 16 bits, using the extra bits to encode the discriminator for the pointer V offset - and if it's just assertions that it's in the right mode, it won't make the static case any less efficient for this generality - and remove some complexity (eg: the DWARF5 name emission can be a non-template then))
If the DWARF5AccelTable*Data types become one data type, then I suppose this "finalize" operation could be a non-member function that takes a AccelTable<DWARF5AccelTableData>
parameter which would be pretty simple/tidy.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Thanks for suggestions. I expanded this refactor.
I removed DWARF5AccelTableStaticData (24 bytes), and changed to use DWARF5AccelTableData (32 bytes).
I added class DWARF5AccelTable : public AccelTable
-- to move getEntries into it, to reduce the scope of where it's called, and to reduce number of template instantiations.
Made the finalize function static that takes in DWARF5AccelTable
My reasoning for leaving std::variant in place for now:
-- Data in above comments shows when we are compiling sema from bit code it is noise.
-- The DWARF5AccelTableStaticData is used in DWARF*Linker It raises entire debug information into an IR form, so extra 8 bytes for a acceleration table I don't think matter that much. For example in BOLT which uses similar approach peak memory is 50-60 GB with breaking up how many CUs we process in IR form before writing them out. Without it it's 100+GB.
-- Using variant vs bit packing is more inline with c++ usage in llvm (I think).
WDYT?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I'd probably still pack the structure a bit tighter - it's not especially costly for the abstractions, etc. But shrug I don't insist on it.
case AccelTableKind::Dwarf: | ||
AccelDebugNames.addName(Ref, Die); | ||
case AccelTableKind::Dwarf: { | ||
DwarfCompileUnit *CU = lookupCU(Die.getUnitDie()); |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Hmm, I'd have thought it would - the DICompileUnit doesn't carry it directly, which is fair. But I think it's more efficient to use the CUMap to go from DICompileUnit -> DwarfCompileUnit, rather than going from DIE->unit DIE-> DwarfCompileUnit?
void DwarfDebug::finalizeAccelerationTables() { | ||
for (auto &Entry : AccelDebugNames.getEntries()) { | ||
for (AccelTableData *Value : Entry.second.Values) { | ||
static_cast<DWARF5AccelTableData *>(Value)->normalizeDIEToOffset(); | ||
} | ||
} | ||
} |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I'd probably still pack the structure a bit tighter - it's not especially costly for the abstractions, etc. But shrug I don't insist on it.
… to conatain DIE offset
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Looks good, thanks!
Awesome thanks! @JDevlieghere @felipepiovezan WDYT? :) |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
LGTM!
This actually makes some of the code simpler, so LGTM! |
…et (llvm#69399) This is pre-cursor patch to enabling type units with DWARF5 acceleration tables. With this change it allows for entries to contain offsets directly, this way type units do not need to be preserved until .debug_names is written out.
IIUC, after this PR got merged, all uses of the class below have the same template argument.
|
Less template hell the better. :) Not sure when I can circle to this particular cleanup thought. |
I can totally do it! Just wanted to get your opinion first :) |
…et (llvm#69399) This is pre-cursor patch to enabling type units with DWARF5 acceleration tables. With this change it allows for entries to contain offsets directly, this way type units do not need to be preserved until .debug_names is written out. (cherry picked from commit da27c25)
This is pre-cursor patch to enabling type units with DWARF5 acceleration tables.
With this change it allows for entries to contain offsets directly, this way type
units do not need to be preserved until .debug_names is written out.