Skip to content

Commit a11a432

Browse files
authored
[clang][deps] Cache VFS::getRealPath() (#68645)
This PR starts caching calls to `DependencyScanningWorkerFilesystem::getRealPath()` that we use whenever we canonicalize module map path. In the case of the real VFS, this functions performs an expensive syscall that we'd like to do as rarely as possible. This PR keeps the real path out of `CachedFileSystemEntry`, since that's **immutable**; populating the real path on creation of this data structure (every stat/open) would be expensive.
1 parent c11976f commit a11a432

File tree

4 files changed

+314
-19
lines changed

4 files changed

+314
-19
lines changed

clang/include/clang/Tooling/DependencyScanning/DependencyScanningFilesystem.h

Lines changed: 63 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -142,6 +142,8 @@ class CachedFileSystemEntry {
142142
CachedFileContents *Contents;
143143
};
144144

145+
using CachedRealPath = llvm::ErrorOr<std::string>;
146+
145147
/// This class is a shared cache, that caches the 'stat' and 'open' calls to the
146148
/// underlying real file system, and the scanned preprocessor directives of
147149
/// files.
@@ -154,9 +156,11 @@ class DependencyScanningFilesystemSharedCache {
154156
/// The mutex that needs to be locked before mutation of any member.
155157
mutable std::mutex CacheLock;
156158

157-
/// Map from filenames to cached entries.
158-
llvm::StringMap<const CachedFileSystemEntry *, llvm::BumpPtrAllocator>
159-
EntriesByFilename;
159+
/// Map from filenames to cached entries and real paths.
160+
llvm::StringMap<
161+
std::pair<const CachedFileSystemEntry *, const CachedRealPath *>,
162+
llvm::BumpPtrAllocator>
163+
CacheByFilename;
160164

161165
/// Map from unique IDs to cached entries.
162166
llvm::DenseMap<llvm::sys::fs::UniqueID, const CachedFileSystemEntry *>
@@ -168,6 +172,9 @@ class DependencyScanningFilesystemSharedCache {
168172
/// The backing storage for cached contents.
169173
llvm::SpecificBumpPtrAllocator<CachedFileContents> ContentsStorage;
170174

175+
/// The backing storage for cached real paths.
176+
llvm::SpecificBumpPtrAllocator<CachedRealPath> RealPathStorage;
177+
171178
/// Returns entry associated with the filename or nullptr if none is found.
172179
const CachedFileSystemEntry *findEntryByFilename(StringRef Filename) const;
173180

@@ -194,6 +201,17 @@ class DependencyScanningFilesystemSharedCache {
194201
const CachedFileSystemEntry &
195202
getOrInsertEntryForFilename(StringRef Filename,
196203
const CachedFileSystemEntry &Entry);
204+
205+
/// Returns the real path associated with the filename or nullptr if none is
206+
/// found.
207+
const CachedRealPath *findRealPathByFilename(StringRef Filename) const;
208+
209+
/// Returns the real path associated with the filename if there is some.
210+
/// Otherwise, constructs new one with the given one, associates it with the
211+
/// filename and returns the result.
212+
const CachedRealPath &
213+
getOrEmplaceRealPathForFilename(StringRef Filename,
214+
llvm::ErrorOr<StringRef> RealPath);
197215
};
198216

199217
DependencyScanningFilesystemSharedCache();
@@ -210,14 +228,17 @@ class DependencyScanningFilesystemSharedCache {
210228
/// This class is a local cache, that caches the 'stat' and 'open' calls to the
211229
/// underlying real file system.
212230
class DependencyScanningFilesystemLocalCache {
213-
llvm::StringMap<const CachedFileSystemEntry *, llvm::BumpPtrAllocator> Cache;
231+
llvm::StringMap<
232+
std::pair<const CachedFileSystemEntry *, const CachedRealPath *>,
233+
llvm::BumpPtrAllocator>
234+
Cache;
214235

215236
public:
216237
/// Returns entry associated with the filename or nullptr if none is found.
217238
const CachedFileSystemEntry *findEntryByFilename(StringRef Filename) const {
218239
assert(llvm::sys::path::is_absolute_gnu(Filename));
219240
auto It = Cache.find(Filename);
220-
return It == Cache.end() ? nullptr : It->getValue();
241+
return It == Cache.end() ? nullptr : It->getValue().first;
221242
}
222243

223244
/// Associates the given entry with the filename and returns the given entry
@@ -226,9 +247,40 @@ class DependencyScanningFilesystemLocalCache {
226247
insertEntryForFilename(StringRef Filename,
227248
const CachedFileSystemEntry &Entry) {
228249
assert(llvm::sys::path::is_absolute_gnu(Filename));
229-
const auto *InsertedEntry = Cache.insert({Filename, &Entry}).first->second;
230-
assert(InsertedEntry == &Entry && "entry already present");
231-
return *InsertedEntry;
250+
auto [It, Inserted] = Cache.insert({Filename, {&Entry, nullptr}});
251+
auto &[CachedEntry, CachedRealPath] = It->getValue();
252+
if (!Inserted) {
253+
// The file is already present in the local cache. If we got here, it only
254+
// contains the real path. Let's make sure the entry is populated too.
255+
assert((!CachedEntry && CachedRealPath) && "entry already present");
256+
CachedEntry = &Entry;
257+
}
258+
return *CachedEntry;
259+
}
260+
261+
/// Returns real path associated with the filename or nullptr if none is
262+
/// found.
263+
const CachedRealPath *findRealPathByFilename(StringRef Filename) const {
264+
assert(llvm::sys::path::is_absolute_gnu(Filename));
265+
auto It = Cache.find(Filename);
266+
return It == Cache.end() ? nullptr : It->getValue().second;
267+
}
268+
269+
/// Associates the given real path with the filename and returns the given
270+
/// entry pointer (for convenience).
271+
const CachedRealPath &
272+
insertRealPathForFilename(StringRef Filename,
273+
const CachedRealPath &RealPath) {
274+
assert(llvm::sys::path::is_absolute_gnu(Filename));
275+
auto [It, Inserted] = Cache.insert({Filename, {nullptr, &RealPath}});
276+
auto &[CachedEntry, CachedRealPath] = It->getValue();
277+
if (!Inserted) {
278+
// The file is already present in the local cache. If we got here, it only
279+
// contains the entry. Let's make sure the real path is populated too.
280+
assert((!CachedRealPath && CachedEntry) && "real path already present");
281+
CachedRealPath = &RealPath;
282+
}
283+
return *CachedRealPath;
232284
}
233285
};
234286

@@ -296,6 +348,9 @@ class DependencyScanningWorkerFilesystem
296348
llvm::ErrorOr<std::unique_ptr<llvm::vfs::File>>
297349
openFileForRead(const Twine &Path) override;
298350

351+
std::error_code getRealPath(const Twine &Path,
352+
SmallVectorImpl<char> &Output) override;
353+
299354
std::error_code setCurrentWorkingDirectory(const Twine &Path) override;
300355

301356
/// Returns entry for the given filename.

clang/lib/Tooling/DependencyScanning/DependencyScanningFilesystem.cpp

Lines changed: 98 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -113,8 +113,8 @@ DependencyScanningFilesystemSharedCache::CacheShard::findEntryByFilename(
113113
StringRef Filename) const {
114114
assert(llvm::sys::path::is_absolute_gnu(Filename));
115115
std::lock_guard<std::mutex> LockGuard(CacheLock);
116-
auto It = EntriesByFilename.find(Filename);
117-
return It == EntriesByFilename.end() ? nullptr : It->getValue();
116+
auto It = CacheByFilename.find(Filename);
117+
return It == CacheByFilename.end() ? nullptr : It->getValue().first;
118118
}
119119

120120
const CachedFileSystemEntry *
@@ -130,36 +130,75 @@ DependencyScanningFilesystemSharedCache::CacheShard::
130130
getOrEmplaceEntryForFilename(StringRef Filename,
131131
llvm::ErrorOr<llvm::vfs::Status> Stat) {
132132
std::lock_guard<std::mutex> LockGuard(CacheLock);
133-
auto Insertion = EntriesByFilename.insert({Filename, nullptr});
134-
if (Insertion.second)
135-
Insertion.first->second =
133+
auto [It, Inserted] = CacheByFilename.insert({Filename, {nullptr, nullptr}});
134+
auto &[CachedEntry, CachedRealPath] = It->getValue();
135+
if (!CachedEntry) {
136+
// The entry is not present in the shared cache. Either the cache doesn't
137+
// know about the file at all, or it only knows about its real path.
138+
assert((Inserted || CachedRealPath) && "existing file with empty pair");
139+
CachedEntry =
136140
new (EntryStorage.Allocate()) CachedFileSystemEntry(std::move(Stat));
137-
return *Insertion.first->second;
141+
}
142+
return *CachedEntry;
138143
}
139144

140145
const CachedFileSystemEntry &
141146
DependencyScanningFilesystemSharedCache::CacheShard::getOrEmplaceEntryForUID(
142147
llvm::sys::fs::UniqueID UID, llvm::vfs::Status Stat,
143148
std::unique_ptr<llvm::MemoryBuffer> Contents) {
144149
std::lock_guard<std::mutex> LockGuard(CacheLock);
145-
auto Insertion = EntriesByUID.insert({UID, nullptr});
146-
if (Insertion.second) {
150+
auto [It, Inserted] = EntriesByUID.insert({UID, nullptr});
151+
auto &CachedEntry = It->getSecond();
152+
if (Inserted) {
147153
CachedFileContents *StoredContents = nullptr;
148154
if (Contents)
149155
StoredContents = new (ContentsStorage.Allocate())
150156
CachedFileContents(std::move(Contents));
151-
Insertion.first->second = new (EntryStorage.Allocate())
157+
CachedEntry = new (EntryStorage.Allocate())
152158
CachedFileSystemEntry(std::move(Stat), StoredContents);
153159
}
154-
return *Insertion.first->second;
160+
return *CachedEntry;
155161
}
156162

157163
const CachedFileSystemEntry &
158164
DependencyScanningFilesystemSharedCache::CacheShard::
159165
getOrInsertEntryForFilename(StringRef Filename,
160166
const CachedFileSystemEntry &Entry) {
161167
std::lock_guard<std::mutex> LockGuard(CacheLock);
162-
return *EntriesByFilename.insert({Filename, &Entry}).first->getValue();
168+
auto [It, Inserted] = CacheByFilename.insert({Filename, {&Entry, nullptr}});
169+
auto &[CachedEntry, CachedRealPath] = It->getValue();
170+
if (!Inserted || !CachedEntry)
171+
CachedEntry = &Entry;
172+
return *CachedEntry;
173+
}
174+
175+
const CachedRealPath *
176+
DependencyScanningFilesystemSharedCache::CacheShard::findRealPathByFilename(
177+
StringRef Filename) const {
178+
assert(llvm::sys::path::is_absolute_gnu(Filename));
179+
std::lock_guard<std::mutex> LockGuard(CacheLock);
180+
auto It = CacheByFilename.find(Filename);
181+
return It == CacheByFilename.end() ? nullptr : It->getValue().second;
182+
}
183+
184+
const CachedRealPath &DependencyScanningFilesystemSharedCache::CacheShard::
185+
getOrEmplaceRealPathForFilename(StringRef Filename,
186+
llvm::ErrorOr<llvm::StringRef> RealPath) {
187+
std::lock_guard<std::mutex> LockGuard(CacheLock);
188+
189+
const CachedRealPath *&StoredRealPath = CacheByFilename[Filename].second;
190+
if (!StoredRealPath) {
191+
auto OwnedRealPath = [&]() -> CachedRealPath {
192+
if (!RealPath)
193+
return RealPath.getError();
194+
return RealPath->str();
195+
}();
196+
197+
StoredRealPath = new (RealPathStorage.Allocate())
198+
CachedRealPath(std::move(OwnedRealPath));
199+
}
200+
201+
return *StoredRealPath;
163202
}
164203

165204
static bool shouldCacheStatFailures(StringRef Filename) {
@@ -321,6 +360,54 @@ DependencyScanningWorkerFilesystem::openFileForRead(const Twine &Path) {
321360
return DepScanFile::create(Result.get());
322361
}
323362

363+
std::error_code
364+
DependencyScanningWorkerFilesystem::getRealPath(const Twine &Path,
365+
SmallVectorImpl<char> &Output) {
366+
SmallString<256> OwnedFilename;
367+
StringRef OriginalFilename = Path.toStringRef(OwnedFilename);
368+
369+
SmallString<256> PathBuf;
370+
auto FilenameForLookup = tryGetFilenameForLookup(OriginalFilename, PathBuf);
371+
if (!FilenameForLookup)
372+
return FilenameForLookup.getError();
373+
374+
auto HandleCachedRealPath =
375+
[&Output](const CachedRealPath &RealPath) -> std::error_code {
376+
if (!RealPath)
377+
return RealPath.getError();
378+
Output.assign(RealPath->begin(), RealPath->end());
379+
return {};
380+
};
381+
382+
// If we already have the result in local cache, no work required.
383+
if (const auto *RealPath =
384+
LocalCache.findRealPathByFilename(*FilenameForLookup))
385+
return HandleCachedRealPath(*RealPath);
386+
387+
// If we have the result in the shared cache, cache it locally.
388+
auto &Shard = SharedCache.getShardForFilename(*FilenameForLookup);
389+
if (const auto *ShardRealPath =
390+
Shard.findRealPathByFilename(*FilenameForLookup)) {
391+
const auto &RealPath = LocalCache.insertRealPathForFilename(
392+
*FilenameForLookup, *ShardRealPath);
393+
return HandleCachedRealPath(RealPath);
394+
}
395+
396+
// If we don't know the real path, compute it...
397+
std::error_code EC = getUnderlyingFS().getRealPath(OriginalFilename, Output);
398+
llvm::ErrorOr<llvm::StringRef> ComputedRealPath = EC;
399+
if (!EC)
400+
ComputedRealPath = StringRef{Output.data(), Output.size()};
401+
402+
// ...and try to write it into the shared cache. In case some other thread won
403+
// this race and already wrote its own result there, just adopt it. Write
404+
// whatever is in the shared cache into the local one.
405+
const auto &RealPath = Shard.getOrEmplaceRealPathForFilename(
406+
*FilenameForLookup, ComputedRealPath);
407+
return HandleCachedRealPath(
408+
LocalCache.insertRealPathForFilename(*FilenameForLookup, RealPath));
409+
}
410+
324411
std::error_code DependencyScanningWorkerFilesystem::setCurrentWorkingDirectory(
325412
const Twine &Path) {
326413
std::error_code EC = ProxyFileSystem::setCurrentWorkingDirectory(Path);

clang/unittests/Tooling/CMakeLists.txt

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -24,6 +24,7 @@ add_clang_unittest(ToolingTests
2424
QualTypeNamesTest.cpp
2525
RangeSelectorTest.cpp
2626
DependencyScanning/DependencyScannerTest.cpp
27+
DependencyScanning/DependencyScanningFilesystemTest.cpp
2728
RecursiveASTVisitorTests/Attr.cpp
2829
RecursiveASTVisitorTests/BitfieldInitializer.cpp
2930
RecursiveASTVisitorTests/CallbacksLeaf.cpp

0 commit comments

Comments
 (0)