Skip to content

Commit a889d39

Browse files
committed
[clang][modules] Move SLocEntry search into ASTReader (llvm#66966)
In `SourceManager::getFileID()`, Clang performs binary search over its buffer of `SLocEntries`. For modules, this binary search fully deserializes the entire `SLocEntry` block for each visited entry. For some entries, that includes decompressing the associated buffer (e.g. the predefines buffer, macro expansion buffers, contents of volatile files), which shows up in profiles of the dependency scanner. This patch moves the binary search over loaded entries into `ASTReader`, which can perform cheaper partial deserialization during the binary search, reducing the wall time of dependency scans by ~3%. This also reduces the number of retired instructions by ~1.4% on regular (implicit) modules compilation. Note that this patch drops the optimizations based on the last lookup ID (pruning the search space and performing linear search before resorting to the full binary search). Instead, it reduces the search space by asking `ASTReader::GlobalSLocOffsetMap` for the containing `ModuleFile` and only does binary search over entries of single module file.
1 parent 9ba53c8 commit a889d39

File tree

6 files changed

+155
-91
lines changed

6 files changed

+155
-91
lines changed

clang/include/clang/Basic/SourceManager.h

Lines changed: 20 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -504,6 +504,14 @@ class SLocEntry {
504504
return Expansion;
505505
}
506506

507+
/// Creates an incomplete SLocEntry that is only able to report its offset.
508+
static SLocEntry getOffsetOnly(SourceLocation::UIntTy Offset) {
509+
assert(!(Offset & (1ULL << OffsetBits)) && "Offset is too large");
510+
SLocEntry E;
511+
E.Offset = Offset;
512+
return E;
513+
}
514+
507515
static SLocEntry get(SourceLocation::UIntTy Offset, const FileInfo &FI) {
508516
assert(!(Offset & (1ULL << OffsetBits)) && "Offset is too large");
509517
SLocEntry E;
@@ -538,6 +546,12 @@ class ExternalSLocEntrySource {
538546
/// entry from being loaded.
539547
virtual bool ReadSLocEntry(int ID) = 0;
540548

549+
/// Get the index ID for the loaded SourceLocation offset.
550+
///
551+
/// \returns Invalid index ID (0) if an error occurred that prevented the
552+
/// SLocEntry from being loaded.
553+
virtual int getSLocEntryID(SourceLocation::UIntTy SLocOffset) = 0;
554+
541555
/// Retrieve the module import location and name for the given ID, if
542556
/// in fact it was loaded from a module (rather than, say, a precompiled
543557
/// header).
@@ -733,6 +747,12 @@ class SourceManager : public RefCountedBase<SourceManager> {
733747
/// Same indexing as LoadedSLocEntryTable.
734748
llvm::BitVector SLocEntryLoaded;
735749

750+
/// A bitmap that indicates whether the entries of LoadedSLocEntryTable
751+
/// have already had their offset loaded from the external source.
752+
///
753+
/// Superset of SLocEntryLoaded. Same indexing as SLocEntryLoaded.
754+
llvm::BitVector SLocEntryOffsetLoaded;
755+
736756
/// An external source for source location entries.
737757
ExternalSLocEntrySource *ExternalSLocEntries = nullptr;
738758

clang/include/clang/Serialization/ASTReader.h

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2163,6 +2163,12 @@ class ASTReader
21632163

21642164
/// Read the source location entry with index ID.
21652165
bool ReadSLocEntry(int ID) override;
2166+
/// Get the index ID for the loaded SourceLocation offset.
2167+
int getSLocEntryID(SourceLocation::UIntTy SLocOffset) override;
2168+
/// Try to read the offset of the SLocEntry at the given index in the given
2169+
/// module file.
2170+
llvm::Expected<SourceLocation::UIntTy> readSLocOffset(ModuleFile *F,
2171+
unsigned Index);
21662172

21672173
/// Retrieve the module import location and module name for the
21682174
/// given source manager entry ID.

clang/lib/Basic/SourceManager.cpp

Lines changed: 5 additions & 65 deletions
Original file line numberDiff line numberDiff line change
@@ -338,6 +338,7 @@ void SourceManager::clearIDTables() {
338338
LocalSLocEntryTable.clear();
339339
LoadedSLocEntryTable.clear();
340340
SLocEntryLoaded.clear();
341+
SLocEntryOffsetLoaded.clear();
341342
LastLineNoFileIDQuery = FileID();
342343
LastLineNoContentCache = nullptr;
343344
LastFileIDLookup = FileID();
@@ -460,6 +461,7 @@ SourceManager::AllocateLoadedSLocEntries(unsigned NumSLocEntries,
460461
}
461462
LoadedSLocEntryTable.resize(LoadedSLocEntryTable.size() + NumSLocEntries);
462463
SLocEntryLoaded.resize(LoadedSLocEntryTable.size());
464+
SLocEntryOffsetLoaded.resize(LoadedSLocEntryTable.size());
463465
CurrentLoadedOffset -= TotalSize;
464466
int BaseID = -int(LoadedSLocEntryTable.size()) - 1;
465467
LoadedSLocEntryAllocBegin.push_back(FileID::get(BaseID));
@@ -609,7 +611,7 @@ FileID SourceManager::createFileIDImpl(ContentCache &File, StringRef Filename,
609611
assert(!SLocEntryLoaded[Index] && "FileID already loaded");
610612
LoadedSLocEntryTable[Index] = SLocEntry::get(
611613
LoadedOffset, FileInfo::get(IncludePos, File, FileCharacter, Filename));
612-
SLocEntryLoaded[Index] = true;
614+
SLocEntryLoaded[Index] = SLocEntryOffsetLoaded[Index] = true;
613615
return FileID::get(LoadedID);
614616
}
615617
unsigned FileSize = File.getSize();
@@ -669,7 +671,7 @@ SourceManager::createExpansionLocImpl(const ExpansionInfo &Info,
669671
assert(Index < LoadedSLocEntryTable.size() && "FileID out of range");
670672
assert(!SLocEntryLoaded[Index] && "FileID already loaded");
671673
LoadedSLocEntryTable[Index] = SLocEntry::get(LoadedOffset, Info);
672-
SLocEntryLoaded[Index] = true;
674+
SLocEntryLoaded[Index] = SLocEntryOffsetLoaded[Index] = true;
673675
return SourceLocation::getMacroLoc(LoadedOffset);
674676
}
675677
LocalSLocEntryTable.push_back(SLocEntry::get(NextLocalOffset, Info));
@@ -870,69 +872,7 @@ FileID SourceManager::getFileIDLoaded(SourceLocation::UIntTy SLocOffset) const {
870872
return FileID();
871873
}
872874

873-
// Essentially the same as the local case, but the loaded array is sorted
874-
// in the other direction (decreasing order).
875-
// GreaterIndex is the one where the offset is greater, which is actually a
876-
// lower index!
877-
unsigned GreaterIndex = 0;
878-
unsigned LessIndex = LoadedSLocEntryTable.size();
879-
if (LastFileIDLookup.ID < 0) {
880-
// Prune the search space.
881-
int LastID = LastFileIDLookup.ID;
882-
if (getLoadedSLocEntryByID(LastID).getOffset() > SLocOffset)
883-
GreaterIndex =
884-
(-LastID - 2) + 1; // Exclude LastID, else we would have hit the cache
885-
else
886-
LessIndex = -LastID - 2;
887-
}
888-
889-
// First do a linear scan from the last lookup position, if possible.
890-
unsigned NumProbes;
891-
bool Invalid = false;
892-
for (NumProbes = 0; NumProbes < 8; ++NumProbes, ++GreaterIndex) {
893-
// Make sure the entry is loaded!
894-
const SrcMgr::SLocEntry &E = getLoadedSLocEntry(GreaterIndex, &Invalid);
895-
if (Invalid)
896-
return FileID(); // invalid entry.
897-
if (E.getOffset() <= SLocOffset) {
898-
FileID Res = FileID::get(-int(GreaterIndex) - 2);
899-
LastFileIDLookup = Res;
900-
NumLinearScans += NumProbes + 1;
901-
return Res;
902-
}
903-
}
904-
905-
// Linear scan failed. Do the binary search.
906-
NumProbes = 0;
907-
while (true) {
908-
++NumProbes;
909-
unsigned MiddleIndex = (LessIndex - GreaterIndex) / 2 + GreaterIndex;
910-
const SrcMgr::SLocEntry &E = getLoadedSLocEntry(MiddleIndex, &Invalid);
911-
if (Invalid)
912-
return FileID(); // invalid entry.
913-
914-
if (E.getOffset() > SLocOffset) {
915-
if (GreaterIndex == MiddleIndex) {
916-
assert(0 && "binary search missed the entry");
917-
return FileID();
918-
}
919-
GreaterIndex = MiddleIndex;
920-
continue;
921-
}
922-
923-
if (isOffsetInFileID(FileID::get(-int(MiddleIndex) - 2), SLocOffset)) {
924-
FileID Res = FileID::get(-int(MiddleIndex) - 2);
925-
LastFileIDLookup = Res;
926-
NumBinaryProbes += NumProbes;
927-
return Res;
928-
}
929-
930-
if (LessIndex == MiddleIndex) {
931-
assert(0 && "binary search missed the entry");
932-
return FileID();
933-
}
934-
LessIndex = MiddleIndex;
935-
}
875+
return FileID::get(ExternalSLocEntries->getSLocEntryID(SLocOffset));
936876
}
937877

938878
SourceLocation SourceManager::

clang/lib/Serialization/ASTReader.cpp

Lines changed: 73 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1459,6 +1459,79 @@ llvm::Error ASTReader::ReadSourceManagerBlock(ModuleFile &F) {
14591459
}
14601460
}
14611461

1462+
llvm::Expected<SourceLocation::UIntTy>
1463+
ASTReader::readSLocOffset(ModuleFile *F, unsigned Index) {
1464+
BitstreamCursor &Cursor = F->SLocEntryCursor;
1465+
SavedStreamPosition SavedPosition(Cursor);
1466+
if (llvm::Error Err = Cursor.JumpToBit(F->SLocEntryOffsetsBase +
1467+
F->SLocEntryOffsets[Index]))
1468+
return Err;
1469+
1470+
Expected<llvm::BitstreamEntry> MaybeEntry = Cursor.advance();
1471+
if (!MaybeEntry)
1472+
return MaybeEntry.takeError();
1473+
1474+
llvm::BitstreamEntry Entry = MaybeEntry.get();
1475+
if (Entry.Kind != llvm::BitstreamEntry::Record)
1476+
return llvm::createStringError(
1477+
std::errc::illegal_byte_sequence,
1478+
"incorrectly-formatted source location entry in AST file");
1479+
1480+
RecordData Record;
1481+
StringRef Blob;
1482+
Expected<unsigned> MaybeSLOC = Cursor.readRecord(Entry.ID, Record, &Blob);
1483+
if (!MaybeSLOC)
1484+
return MaybeSLOC.takeError();
1485+
1486+
switch (MaybeSLOC.get()) {
1487+
default:
1488+
return llvm::createStringError(
1489+
std::errc::illegal_byte_sequence,
1490+
"incorrectly-formatted source location entry in AST file");
1491+
case SM_SLOC_FILE_ENTRY:
1492+
case SM_SLOC_BUFFER_ENTRY:
1493+
case SM_SLOC_EXPANSION_ENTRY:
1494+
return F->SLocEntryBaseOffset + Record[0];
1495+
}
1496+
}
1497+
1498+
int ASTReader::getSLocEntryID(SourceLocation::UIntTy SLocOffset) {
1499+
auto SLocMapI =
1500+
GlobalSLocOffsetMap.find(SourceManager::MaxLoadedOffset - SLocOffset - 1);
1501+
assert(SLocMapI != GlobalSLocOffsetMap.end() &&
1502+
"Corrupted global sloc offset map");
1503+
ModuleFile *F = SLocMapI->second;
1504+
1505+
bool Invalid = false;
1506+
1507+
auto It = llvm::upper_bound(
1508+
llvm::index_range(0, F->LocalNumSLocEntries), SLocOffset,
1509+
[&](SourceLocation::UIntTy Offset, std::size_t LocalIndex) {
1510+
int ID = F->SLocEntryBaseID + LocalIndex;
1511+
std::size_t Index = -ID - 2;
1512+
if (!SourceMgr.SLocEntryOffsetLoaded[Index]) {
1513+
assert(!SourceMgr.SLocEntryLoaded[Index]);
1514+
auto MaybeEntryOffset = readSLocOffset(F, LocalIndex);
1515+
if (!MaybeEntryOffset) {
1516+
Error(MaybeEntryOffset.takeError());
1517+
Invalid = true;
1518+
return true;
1519+
}
1520+
SourceMgr.LoadedSLocEntryTable[Index] =
1521+
SrcMgr::SLocEntry::getOffsetOnly(*MaybeEntryOffset);
1522+
SourceMgr.SLocEntryOffsetLoaded[Index] = true;
1523+
}
1524+
return Offset < SourceMgr.LoadedSLocEntryTable[Index].getOffset();
1525+
});
1526+
1527+
if (Invalid)
1528+
return 0;
1529+
1530+
// The iterator points to the first entry with start offset greater than the
1531+
// offset of interest. The previous entry must contain the offset of interest.
1532+
return F->SLocEntryBaseID + *std::prev(It);
1533+
}
1534+
14621535
bool ASTReader::ReadSLocEntry(int ID) {
14631536
if (ID == 0)
14641537
return false;

clang/test/Modules/explicit-build-missing-files.cpp

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -50,6 +50,7 @@ int y = a2<int>;
5050
// CHECK: In module 'a':
5151
// CHECK-NEXT: a.h:1:45: error:
5252

53+
int z = b<int>;
5354
// MISSING-B: could not find file '{{.*}}b.h'
5455
// MISSING-B-NOT: please delete the module cache
5556
#endif

llvm/include/llvm/ADT/STLExtras.h

Lines changed: 50 additions & 26 deletions
Original file line numberDiff line numberDiff line change
@@ -2271,43 +2271,67 @@ template <typename... Refs> struct enumerator_result<std::size_t, Refs...> {
22712271
mutable range_reference_tuple Storage;
22722272
};
22732273

2274-
/// Infinite stream of increasing 0-based `size_t` indices.
2275-
struct index_stream {
2276-
struct iterator : iterator_facade_base<iterator, std::forward_iterator_tag,
2277-
const iterator> {
2278-
iterator &operator++() {
2279-
assert(Index != std::numeric_limits<std::size_t>::max() &&
2280-
"Attempting to increment end iterator");
2281-
++Index;
2282-
return *this;
2283-
}
2274+
struct index_iterator
2275+
: llvm::iterator_facade_base<index_iterator,
2276+
std::random_access_iterator_tag, std::size_t> {
2277+
index_iterator(std::size_t Index) : Index(Index) {}
22842278

2285-
// Note: This dereference operator returns a value instead of a reference
2286-
// and does not strictly conform to the C++17's definition of forward
2287-
// iterator. However, it satisfies all the forward_iterator requirements
2288-
// that the `zip_common` depends on and fully conforms to the C++20
2289-
// definition of forward iterator.
2290-
std::size_t operator*() const { return Index; }
2279+
index_iterator &operator+=(std::ptrdiff_t N) {
2280+
Index += N;
2281+
return *this;
2282+
}
22912283

2292-
friend bool operator==(const iterator &Lhs, const iterator &Rhs) {
2293-
return Lhs.Index == Rhs.Index;
2294-
}
2284+
index_iterator &operator-=(std::ptrdiff_t N) {
2285+
Index -= N;
2286+
return *this;
2287+
}
22952288

2296-
std::size_t Index = 0;
2297-
};
2289+
std::ptrdiff_t operator-(const index_iterator &R) const {
2290+
return Index - R.Index;
2291+
}
22982292

2299-
iterator begin() const { return {}; }
2300-
iterator end() const {
2293+
// Note: This dereference operator returns a value instead of a reference
2294+
// and does not strictly conform to the C++17's definition of forward
2295+
// iterator. However, it satisfies all the forward_iterator requirements
2296+
// that the `zip_common` depends on and fully conforms to the C++20
2297+
// definition of forward iterator.
2298+
std::size_t operator*() const { return Index; }
2299+
2300+
friend bool operator==(const index_iterator &Lhs, const index_iterator &Rhs) {
2301+
return Lhs.Index == Rhs.Index;
2302+
}
2303+
2304+
friend bool operator<(const index_iterator &Lhs, const index_iterator &Rhs) {
2305+
return Lhs.Index < Rhs.Index;
2306+
}
2307+
2308+
private:
2309+
std::size_t Index;
2310+
};
2311+
2312+
/// Infinite stream of increasing 0-based `size_t` indices.
2313+
struct index_stream {
2314+
index_iterator begin() const { return {0}; }
2315+
index_iterator end() const {
23012316
// We approximate 'infinity' with the max size_t value, which should be good
23022317
// enough to index over any container.
2303-
iterator It;
2304-
It.Index = std::numeric_limits<std::size_t>::max();
2305-
return It;
2318+
return index_iterator{std::numeric_limits<std::size_t>::max()};
23062319
}
23072320
};
23082321

23092322
} // end namespace detail
23102323

2324+
/// Increasing range of `size_t` indices.
2325+
class index_range {
2326+
std::size_t Begin;
2327+
std::size_t End;
2328+
2329+
public:
2330+
index_range(std::size_t Begin, std::size_t End) : Begin(Begin), End(End) {}
2331+
detail::index_iterator begin() const { return {Begin}; }
2332+
detail::index_iterator end() const { return {End}; }
2333+
};
2334+
23112335
/// Given two or more input ranges, returns a new range whose values are are
23122336
/// tuples (A, B, C, ...), such that A is the 0-based index of the item in the
23132337
/// sequence, and B, C, ..., are the values from the original input ranges. All

0 commit comments

Comments
 (0)