Skip to content

Commit 537344f

Browse files
authored
[clang][modules] Move SLocEntry search into ASTReader (#66966)
In `SourceManager::getFileID()`, Clang performs binary search over its buffer of `SLocEntries`. For modules, this binary search fully deserializes the entire `SLocEntry` block for each visited entry. For some entries, that includes decompressing the associated buffer (e.g. the predefines buffer, macro expansion buffers, contents of volatile files), which shows up in profiles of the dependency scanner. This patch moves the binary search over loaded entries into `ASTReader`, which can perform cheaper partial deserialization during the binary search, reducing the wall time of dependency scans by ~3%. This also reduces the number of retired instructions by ~1.4% on regular (implicit) modules compilation. Note that this patch drops the optimizations based on the last lookup ID (pruning the search space and performing linear search before resorting to the full binary search). Instead, it reduces the search space by asking `ASTReader::GlobalSLocOffsetMap` for the containing `ModuleFile` and only does binary search over entries of single module file.
1 parent 8eff5e4 commit 537344f

File tree

6 files changed

+155
-91
lines changed

6 files changed

+155
-91
lines changed

clang/include/clang/Basic/SourceManager.h

Lines changed: 20 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -500,6 +500,14 @@ class SLocEntry {
500500
return Expansion;
501501
}
502502

503+
/// Creates an incomplete SLocEntry that is only able to report its offset.
504+
static SLocEntry getOffsetOnly(SourceLocation::UIntTy Offset) {
505+
assert(!(Offset & (1ULL << OffsetBits)) && "Offset is too large");
506+
SLocEntry E;
507+
E.Offset = Offset;
508+
return E;
509+
}
510+
503511
static SLocEntry get(SourceLocation::UIntTy Offset, const FileInfo &FI) {
504512
assert(!(Offset & (1ULL << OffsetBits)) && "Offset is too large");
505513
SLocEntry E;
@@ -534,6 +542,12 @@ class ExternalSLocEntrySource {
534542
/// entry from being loaded.
535543
virtual bool ReadSLocEntry(int ID) = 0;
536544

545+
/// Get the index ID for the loaded SourceLocation offset.
546+
///
547+
/// \returns Invalid index ID (0) if an error occurred that prevented the
548+
/// SLocEntry from being loaded.
549+
virtual int getSLocEntryID(SourceLocation::UIntTy SLocOffset) = 0;
550+
537551
/// Retrieve the module import location and name for the given ID, if
538552
/// in fact it was loaded from a module (rather than, say, a precompiled
539553
/// header).
@@ -729,6 +743,12 @@ class SourceManager : public RefCountedBase<SourceManager> {
729743
/// Same indexing as LoadedSLocEntryTable.
730744
llvm::BitVector SLocEntryLoaded;
731745

746+
/// A bitmap that indicates whether the entries of LoadedSLocEntryTable
747+
/// have already had their offset loaded from the external source.
748+
///
749+
/// Superset of SLocEntryLoaded. Same indexing as SLocEntryLoaded.
750+
llvm::BitVector SLocEntryOffsetLoaded;
751+
732752
/// An external source for source location entries.
733753
ExternalSLocEntrySource *ExternalSLocEntries = nullptr;
734754

clang/include/clang/Serialization/ASTReader.h

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2154,6 +2154,12 @@ class ASTReader
21542154

21552155
/// Read the source location entry with index ID.
21562156
bool ReadSLocEntry(int ID) override;
2157+
/// Get the index ID for the loaded SourceLocation offset.
2158+
int getSLocEntryID(SourceLocation::UIntTy SLocOffset) override;
2159+
/// Try to read the offset of the SLocEntry at the given index in the given
2160+
/// module file.
2161+
llvm::Expected<SourceLocation::UIntTy> readSLocOffset(ModuleFile *F,
2162+
unsigned Index);
21572163

21582164
/// Retrieve the module import location and module name for the
21592165
/// given source manager entry ID.

clang/lib/Basic/SourceManager.cpp

Lines changed: 5 additions & 65 deletions
Original file line numberDiff line numberDiff line change
@@ -337,6 +337,7 @@ void SourceManager::clearIDTables() {
337337
LocalSLocEntryTable.clear();
338338
LoadedSLocEntryTable.clear();
339339
SLocEntryLoaded.clear();
340+
SLocEntryOffsetLoaded.clear();
340341
LastLineNoFileIDQuery = FileID();
341342
LastLineNoContentCache = nullptr;
342343
LastFileIDLookup = FileID();
@@ -459,6 +460,7 @@ SourceManager::AllocateLoadedSLocEntries(unsigned NumSLocEntries,
459460
}
460461
LoadedSLocEntryTable.resize(LoadedSLocEntryTable.size() + NumSLocEntries);
461462
SLocEntryLoaded.resize(LoadedSLocEntryTable.size());
463+
SLocEntryOffsetLoaded.resize(LoadedSLocEntryTable.size());
462464
CurrentLoadedOffset -= TotalSize;
463465
int BaseID = -int(LoadedSLocEntryTable.size()) - 1;
464466
LoadedSLocEntryAllocBegin.push_back(FileID::get(BaseID));
@@ -597,7 +599,7 @@ FileID SourceManager::createFileIDImpl(ContentCache &File, StringRef Filename,
597599
assert(!SLocEntryLoaded[Index] && "FileID already loaded");
598600
LoadedSLocEntryTable[Index] = SLocEntry::get(
599601
LoadedOffset, FileInfo::get(IncludePos, File, FileCharacter, Filename));
600-
SLocEntryLoaded[Index] = true;
602+
SLocEntryLoaded[Index] = SLocEntryOffsetLoaded[Index] = true;
601603
return FileID::get(LoadedID);
602604
}
603605
unsigned FileSize = File.getSize();
@@ -657,7 +659,7 @@ SourceManager::createExpansionLocImpl(const ExpansionInfo &Info,
657659
assert(Index < LoadedSLocEntryTable.size() && "FileID out of range");
658660
assert(!SLocEntryLoaded[Index] && "FileID already loaded");
659661
LoadedSLocEntryTable[Index] = SLocEntry::get(LoadedOffset, Info);
660-
SLocEntryLoaded[Index] = true;
662+
SLocEntryLoaded[Index] = SLocEntryOffsetLoaded[Index] = true;
661663
return SourceLocation::getMacroLoc(LoadedOffset);
662664
}
663665
LocalSLocEntryTable.push_back(SLocEntry::get(NextLocalOffset, Info));
@@ -858,69 +860,7 @@ FileID SourceManager::getFileIDLoaded(SourceLocation::UIntTy SLocOffset) const {
858860
return FileID();
859861
}
860862

861-
// Essentially the same as the local case, but the loaded array is sorted
862-
// in the other direction (decreasing order).
863-
// GreaterIndex is the one where the offset is greater, which is actually a
864-
// lower index!
865-
unsigned GreaterIndex = 0;
866-
unsigned LessIndex = LoadedSLocEntryTable.size();
867-
if (LastFileIDLookup.ID < 0) {
868-
// Prune the search space.
869-
int LastID = LastFileIDLookup.ID;
870-
if (getLoadedSLocEntryByID(LastID).getOffset() > SLocOffset)
871-
GreaterIndex =
872-
(-LastID - 2) + 1; // Exclude LastID, else we would have hit the cache
873-
else
874-
LessIndex = -LastID - 2;
875-
}
876-
877-
// First do a linear scan from the last lookup position, if possible.
878-
unsigned NumProbes;
879-
bool Invalid = false;
880-
for (NumProbes = 0; NumProbes < 8; ++NumProbes, ++GreaterIndex) {
881-
// Make sure the entry is loaded!
882-
const SrcMgr::SLocEntry &E = getLoadedSLocEntry(GreaterIndex, &Invalid);
883-
if (Invalid)
884-
return FileID(); // invalid entry.
885-
if (E.getOffset() <= SLocOffset) {
886-
FileID Res = FileID::get(-int(GreaterIndex) - 2);
887-
LastFileIDLookup = Res;
888-
NumLinearScans += NumProbes + 1;
889-
return Res;
890-
}
891-
}
892-
893-
// Linear scan failed. Do the binary search.
894-
NumProbes = 0;
895-
while (true) {
896-
++NumProbes;
897-
unsigned MiddleIndex = (LessIndex - GreaterIndex) / 2 + GreaterIndex;
898-
const SrcMgr::SLocEntry &E = getLoadedSLocEntry(MiddleIndex, &Invalid);
899-
if (Invalid)
900-
return FileID(); // invalid entry.
901-
902-
if (E.getOffset() > SLocOffset) {
903-
if (GreaterIndex == MiddleIndex) {
904-
assert(0 && "binary search missed the entry");
905-
return FileID();
906-
}
907-
GreaterIndex = MiddleIndex;
908-
continue;
909-
}
910-
911-
if (isOffsetInFileID(FileID::get(-int(MiddleIndex) - 2), SLocOffset)) {
912-
FileID Res = FileID::get(-int(MiddleIndex) - 2);
913-
LastFileIDLookup = Res;
914-
NumBinaryProbes += NumProbes;
915-
return Res;
916-
}
917-
918-
if (LessIndex == MiddleIndex) {
919-
assert(0 && "binary search missed the entry");
920-
return FileID();
921-
}
922-
LessIndex = MiddleIndex;
923-
}
863+
return FileID::get(ExternalSLocEntries->getSLocEntryID(SLocOffset));
924864
}
925865

926866
SourceLocation SourceManager::

clang/lib/Serialization/ASTReader.cpp

Lines changed: 73 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1444,6 +1444,79 @@ llvm::Error ASTReader::ReadSourceManagerBlock(ModuleFile &F) {
14441444
}
14451445
}
14461446

1447+
llvm::Expected<SourceLocation::UIntTy>
1448+
ASTReader::readSLocOffset(ModuleFile *F, unsigned Index) {
1449+
BitstreamCursor &Cursor = F->SLocEntryCursor;
1450+
SavedStreamPosition SavedPosition(Cursor);
1451+
if (llvm::Error Err = Cursor.JumpToBit(F->SLocEntryOffsetsBase +
1452+
F->SLocEntryOffsets[Index]))
1453+
return Err;
1454+
1455+
Expected<llvm::BitstreamEntry> MaybeEntry = Cursor.advance();
1456+
if (!MaybeEntry)
1457+
return MaybeEntry.takeError();
1458+
1459+
llvm::BitstreamEntry Entry = MaybeEntry.get();
1460+
if (Entry.Kind != llvm::BitstreamEntry::Record)
1461+
return llvm::createStringError(
1462+
std::errc::illegal_byte_sequence,
1463+
"incorrectly-formatted source location entry in AST file");
1464+
1465+
RecordData Record;
1466+
StringRef Blob;
1467+
Expected<unsigned> MaybeSLOC = Cursor.readRecord(Entry.ID, Record, &Blob);
1468+
if (!MaybeSLOC)
1469+
return MaybeSLOC.takeError();
1470+
1471+
switch (MaybeSLOC.get()) {
1472+
default:
1473+
return llvm::createStringError(
1474+
std::errc::illegal_byte_sequence,
1475+
"incorrectly-formatted source location entry in AST file");
1476+
case SM_SLOC_FILE_ENTRY:
1477+
case SM_SLOC_BUFFER_ENTRY:
1478+
case SM_SLOC_EXPANSION_ENTRY:
1479+
return F->SLocEntryBaseOffset + Record[0];
1480+
}
1481+
}
1482+
1483+
int ASTReader::getSLocEntryID(SourceLocation::UIntTy SLocOffset) {
1484+
auto SLocMapI =
1485+
GlobalSLocOffsetMap.find(SourceManager::MaxLoadedOffset - SLocOffset - 1);
1486+
assert(SLocMapI != GlobalSLocOffsetMap.end() &&
1487+
"Corrupted global sloc offset map");
1488+
ModuleFile *F = SLocMapI->second;
1489+
1490+
bool Invalid = false;
1491+
1492+
auto It = llvm::upper_bound(
1493+
llvm::index_range(0, F->LocalNumSLocEntries), SLocOffset,
1494+
[&](SourceLocation::UIntTy Offset, std::size_t LocalIndex) {
1495+
int ID = F->SLocEntryBaseID + LocalIndex;
1496+
std::size_t Index = -ID - 2;
1497+
if (!SourceMgr.SLocEntryOffsetLoaded[Index]) {
1498+
assert(!SourceMgr.SLocEntryLoaded[Index]);
1499+
auto MaybeEntryOffset = readSLocOffset(F, LocalIndex);
1500+
if (!MaybeEntryOffset) {
1501+
Error(MaybeEntryOffset.takeError());
1502+
Invalid = true;
1503+
return true;
1504+
}
1505+
SourceMgr.LoadedSLocEntryTable[Index] =
1506+
SrcMgr::SLocEntry::getOffsetOnly(*MaybeEntryOffset);
1507+
SourceMgr.SLocEntryOffsetLoaded[Index] = true;
1508+
}
1509+
return Offset < SourceMgr.LoadedSLocEntryTable[Index].getOffset();
1510+
});
1511+
1512+
if (Invalid)
1513+
return 0;
1514+
1515+
// The iterator points to the first entry with start offset greater than the
1516+
// offset of interest. The previous entry must contain the offset of interest.
1517+
return F->SLocEntryBaseID + *std::prev(It);
1518+
}
1519+
14471520
bool ASTReader::ReadSLocEntry(int ID) {
14481521
if (ID == 0)
14491522
return false;

clang/test/Modules/explicit-build-missing-files.cpp

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -50,6 +50,7 @@ int y = a2<int>;
5050
// CHECK: In module 'a':
5151
// CHECK-NEXT: a.h:1:45: error:
5252

53+
int z = b<int>;
5354
// MISSING-B: could not find file '{{.*}}b.h'
5455
// MISSING-B-NOT: please delete the module cache
5556
#endif

llvm/include/llvm/ADT/STLExtras.h

Lines changed: 50 additions & 26 deletions
Original file line numberDiff line numberDiff line change
@@ -2261,43 +2261,67 @@ template <typename... Refs> struct enumerator_result<std::size_t, Refs...> {
22612261
mutable range_reference_tuple Storage;
22622262
};
22632263

2264-
/// Infinite stream of increasing 0-based `size_t` indices.
2265-
struct index_stream {
2266-
struct iterator : iterator_facade_base<iterator, std::forward_iterator_tag,
2267-
const iterator> {
2268-
iterator &operator++() {
2269-
assert(Index != std::numeric_limits<std::size_t>::max() &&
2270-
"Attempting to increment end iterator");
2271-
++Index;
2272-
return *this;
2273-
}
2264+
struct index_iterator
2265+
: llvm::iterator_facade_base<index_iterator,
2266+
std::random_access_iterator_tag, std::size_t> {
2267+
index_iterator(std::size_t Index) : Index(Index) {}
22742268

2275-
// Note: This dereference operator returns a value instead of a reference
2276-
// and does not strictly conform to the C++17's definition of forward
2277-
// iterator. However, it satisfies all the forward_iterator requirements
2278-
// that the `zip_common` depends on and fully conforms to the C++20
2279-
// definition of forward iterator.
2280-
std::size_t operator*() const { return Index; }
2269+
index_iterator &operator+=(std::ptrdiff_t N) {
2270+
Index += N;
2271+
return *this;
2272+
}
22812273

2282-
friend bool operator==(const iterator &Lhs, const iterator &Rhs) {
2283-
return Lhs.Index == Rhs.Index;
2284-
}
2274+
index_iterator &operator-=(std::ptrdiff_t N) {
2275+
Index -= N;
2276+
return *this;
2277+
}
22852278

2286-
std::size_t Index = 0;
2287-
};
2279+
std::ptrdiff_t operator-(const index_iterator &R) const {
2280+
return Index - R.Index;
2281+
}
22882282

2289-
iterator begin() const { return {}; }
2290-
iterator end() const {
2283+
// Note: This dereference operator returns a value instead of a reference
2284+
// and does not strictly conform to the C++17's definition of forward
2285+
// iterator. However, it satisfies all the forward_iterator requirements
2286+
// that the `zip_common` depends on and fully conforms to the C++20
2287+
// definition of forward iterator.
2288+
std::size_t operator*() const { return Index; }
2289+
2290+
friend bool operator==(const index_iterator &Lhs, const index_iterator &Rhs) {
2291+
return Lhs.Index == Rhs.Index;
2292+
}
2293+
2294+
friend bool operator<(const index_iterator &Lhs, const index_iterator &Rhs) {
2295+
return Lhs.Index < Rhs.Index;
2296+
}
2297+
2298+
private:
2299+
std::size_t Index;
2300+
};
2301+
2302+
/// Infinite stream of increasing 0-based `size_t` indices.
2303+
struct index_stream {
2304+
index_iterator begin() const { return {0}; }
2305+
index_iterator end() const {
22912306
// We approximate 'infinity' with the max size_t value, which should be good
22922307
// enough to index over any container.
2293-
iterator It;
2294-
It.Index = std::numeric_limits<std::size_t>::max();
2295-
return It;
2308+
return index_iterator{std::numeric_limits<std::size_t>::max()};
22962309
}
22972310
};
22982311

22992312
} // end namespace detail
23002313

2314+
/// Increasing range of `size_t` indices.
2315+
class index_range {
2316+
std::size_t Begin;
2317+
std::size_t End;
2318+
2319+
public:
2320+
index_range(std::size_t Begin, std::size_t End) : Begin(Begin), End(End) {}
2321+
detail::index_iterator begin() const { return {Begin}; }
2322+
detail::index_iterator end() const { return {End}; }
2323+
};
2324+
23012325
/// Given two or more input ranges, returns a new range whose values are are
23022326
/// tuples (A, B, C, ...), such that A is the 0-based index of the item in the
23032327
/// sequence, and B, C, ..., are the values from the original input ranges. All

0 commit comments

Comments
 (0)