Skip to content

Commit 9ffeaaa

Browse files
committed
[LLD] [COFF] Use StringTableBuilder to optimize the string table
This does tail merging (and deduplication) of the strings. On a statically linked clang.exe, this shrinks the ~17 MB string table by around 0.5 MB. This adds ~160 ms to the linking time which originally was around 950 ms. For cases where `-debug:symtab` or `-debug:dwarf` isn't set, the string table is only used for long section names, where this shouldn't make any difference at all. Differential Revision: https://reviews.llvm.org/D120677
1 parent f9c545e commit 9ffeaaa

File tree

1 file changed

+30
-20
lines changed

1 file changed

+30
-20
lines changed

lld/COFF/Writer.cpp

Lines changed: 30 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -24,6 +24,7 @@
2424
#include "llvm/ADT/STLExtras.h"
2525
#include "llvm/ADT/StringSet.h"
2626
#include "llvm/ADT/StringSwitch.h"
27+
#include "llvm/MC/StringTableBuilder.h"
2728
#include "llvm/Support/BinaryStreamReader.h"
2829
#include "llvm/Support/Debug.h"
2930
#include "llvm/Support/Endian.h"
@@ -195,7 +196,9 @@ class PartialSectionKey {
195196
// The writer writes a SymbolTable result to a file.
196197
class Writer {
197198
public:
198-
Writer(COFFLinkerContext &c) : buffer(errorHandler().outputBuffer), ctx(c) {}
199+
Writer(COFFLinkerContext &c)
200+
: buffer(errorHandler().outputBuffer),
201+
strtab(StringTableBuilder::WinCOFF), ctx(c) {}
199202
void run();
200203

201204
private:
@@ -240,7 +243,6 @@ class Writer {
240243
PartialSection *findPartialSection(StringRef name, uint32_t outChars);
241244

242245
llvm::Optional<coff_symbol16> createSymbol(Defined *d);
243-
size_t addEntryToStringTable(StringRef str);
244246

245247
OutputSection *findSection(StringRef name);
246248
void addBaserels();
@@ -250,7 +252,7 @@ class Writer {
250252

251253
std::unique_ptr<FileOutputBuffer> &buffer;
252254
std::map<PartialSectionKey, PartialSection *> partialSections;
253-
std::vector<char> strtab;
255+
StringTableBuilder strtab;
254256
std::vector<llvm::object::coff_symbol16> outputSymtab;
255257
IdataContents idata;
256258
Chunk *importTableStart = nullptr;
@@ -1120,14 +1122,6 @@ void Writer::assignOutputSectionIndices() {
11201122
sc->setOutputSectionIdx(mc->getOutputSectionIdx());
11211123
}
11221124

1123-
size_t Writer::addEntryToStringTable(StringRef str) {
1124-
assert(str.size() > COFF::NameSize);
1125-
size_t offsetOfEntry = strtab.size() + 4; // +4 for the size field
1126-
strtab.insert(strtab.end(), str.begin(), str.end());
1127-
strtab.push_back('\0');
1128-
return offsetOfEntry;
1129-
}
1130-
11311125
Optional<coff_symbol16> Writer::createSymbol(Defined *def) {
11321126
coff_symbol16 sym;
11331127
switch (def->kind()) {
@@ -1164,7 +1158,8 @@ Optional<coff_symbol16> Writer::createSymbol(Defined *def) {
11641158
StringRef name = def->getName();
11651159
if (name.size() > COFF::NameSize) {
11661160
sym.Name.Offset.Zeroes = 0;
1167-
sym.Name.Offset.Offset = addEntryToStringTable(name);
1161+
sym.Name.Offset.Offset = 0; // Filled in later
1162+
strtab.add(name);
11681163
} else {
11691164
memset(sym.Name.ShortName, 0, COFF::NameSize);
11701165
memcpy(sym.Name.ShortName, name.data(), name.size());
@@ -1191,6 +1186,7 @@ void Writer::createSymbolAndStringTable() {
11911186
// solution where discardable sections have long names preserved and
11921187
// non-discardable sections have their names truncated, to ensure that any
11931188
// section which is mapped at runtime also has its name mapped at runtime.
1189+
std::vector<OutputSection *> longNameSections;
11941190
for (OutputSection *sec : ctx.outputSections) {
11951191
if (sec->name.size() <= COFF::NameSize)
11961192
continue;
@@ -1201,9 +1197,12 @@ void Writer::createSymbolAndStringTable() {
12011197
" is longer than 8 characters and will use a non-standard string "
12021198
"table");
12031199
}
1204-
sec->setStringTableOff(addEntryToStringTable(sec->name));
1200+
1201+
strtab.add(sec->name);
1202+
longNameSections.push_back(sec);
12051203
}
12061204

1205+
std::vector<std::pair<size_t, StringRef>> longNameSymbols;
12071206
if (config->debugDwarf || config->debugSymtab) {
12081207
for (ObjFile *file : ctx.objFileInstances) {
12091208
for (Symbol *b : file->getSymbols()) {
@@ -1218,20 +1217,33 @@ void Writer::createSymbolAndStringTable() {
12181217
continue;
12191218
}
12201219

1221-
if (Optional<coff_symbol16> sym = createSymbol(d))
1220+
if (Optional<coff_symbol16> sym = createSymbol(d)) {
12221221
outputSymtab.push_back(*sym);
1222+
if (d->getName().size() > COFF::NameSize)
1223+
longNameSymbols.push_back({outputSymtab.size() - 1, d->getName()});
1224+
}
12231225
}
12241226
}
12251227
}
12261228

1227-
if (outputSymtab.empty() && strtab.empty())
1229+
strtab.finalize();
1230+
1231+
for (OutputSection *sec : longNameSections)
1232+
sec->setStringTableOff(strtab.getOffset(sec->name));
1233+
1234+
for (auto P : longNameSymbols) {
1235+
coff_symbol16 &sym = outputSymtab[P.first];
1236+
sym.Name.Offset.Offset = strtab.getOffset(P.second);
1237+
}
1238+
1239+
if (outputSymtab.empty() && strtab.getSize() <= 4)
12281240
return;
12291241

12301242
// We position the symbol table to be adjacent to the end of the last section.
12311243
uint64_t fileOff = fileSize;
12321244
pointerToSymbolTable = fileOff;
12331245
fileOff += outputSymtab.size() * sizeof(coff_symbol16);
1234-
fileOff += 4 + strtab.size();
1246+
fileOff += strtab.getSize();
12351247
fileSize = alignTo(fileOff, config->fileAlign);
12361248
}
12371249

@@ -1506,7 +1518,7 @@ template <typename PEHeaderTy> void Writer::writeHeader() {
15061518
sectionTable = ArrayRef<uint8_t>(
15071519
buf - ctx.outputSections.size() * sizeof(coff_section), buf);
15081520

1509-
if (outputSymtab.empty() && strtab.empty())
1521+
if (outputSymtab.empty() && strtab.getSize() <= 4)
15101522
return;
15111523

15121524
coff->PointerToSymbolTable = pointerToSymbolTable;
@@ -1519,9 +1531,7 @@ template <typename PEHeaderTy> void Writer::writeHeader() {
15191531
// Create the string table, it follows immediately after the symbol table.
15201532
// The first 4 bytes is length including itself.
15211533
buf = reinterpret_cast<uint8_t *>(&symbolTable[numberOfSymbols]);
1522-
write32le(buf, strtab.size() + 4);
1523-
if (!strtab.empty())
1524-
memcpy(buf + 4, strtab.data(), strtab.size());
1534+
strtab.write(buf);
15251535
}
15261536

15271537
void Writer::openFile(StringRef path) {

0 commit comments

Comments
 (0)