Skip to content

Commit c545f0a

Browse files
committed
[clang][deps] Cache VFS::getRealPath() (llvm#68645)
This PR starts caching calls to `DependencyScanningWorkerFilesystem::getRealPath()` that we use whenever we canonicalize module map path. In the case of the real VFS, this functions performs an expensive syscall that we'd like to do as rarely as possible. This PR keeps the real path out of `CachedFileSystemEntry`, since that's **immutable**; populating the real path on creation of this data structure (every stat/open) would be expensive. (cherry picked from commit a11a432)
1 parent 1a74d9d commit c545f0a

File tree

4 files changed

+314
-19
lines changed

4 files changed

+314
-19
lines changed

clang/include/clang/Tooling/DependencyScanning/DependencyScanningFilesystem.h

Lines changed: 63 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -154,6 +154,8 @@ class CachedFileSystemEntry {
154154
CachedFileContents *Contents;
155155
};
156156

157+
using CachedRealPath = llvm::ErrorOr<std::string>;
158+
157159
/// This class is a shared cache, that caches the 'stat' and 'open' calls to the
158160
/// underlying real file system, and the scanned preprocessor directives of
159161
/// files.
@@ -166,9 +168,11 @@ class DependencyScanningFilesystemSharedCache {
166168
/// The mutex that needs to be locked before mutation of any member.
167169
mutable std::mutex CacheLock;
168170

169-
/// Map from filenames to cached entries.
170-
llvm::StringMap<const CachedFileSystemEntry *, llvm::BumpPtrAllocator>
171-
EntriesByFilename;
171+
/// Map from filenames to cached entries and real paths.
172+
llvm::StringMap<
173+
std::pair<const CachedFileSystemEntry *, const CachedRealPath *>,
174+
llvm::BumpPtrAllocator>
175+
CacheByFilename;
172176

173177
/// Map from unique IDs to cached entries.
174178
llvm::DenseMap<llvm::sys::fs::UniqueID, const CachedFileSystemEntry *>
@@ -180,6 +184,9 @@ class DependencyScanningFilesystemSharedCache {
180184
/// The backing storage for cached contents.
181185
llvm::SpecificBumpPtrAllocator<CachedFileContents> ContentsStorage;
182186

187+
/// The backing storage for cached real paths.
188+
llvm::SpecificBumpPtrAllocator<CachedRealPath> RealPathStorage;
189+
183190
/// Returns entry associated with the filename or nullptr if none is found.
184191
const CachedFileSystemEntry *findEntryByFilename(StringRef Filename) const;
185192

@@ -207,6 +214,17 @@ class DependencyScanningFilesystemSharedCache {
207214
const CachedFileSystemEntry &
208215
getOrInsertEntryForFilename(StringRef Filename,
209216
const CachedFileSystemEntry &Entry);
217+
218+
/// Returns the real path associated with the filename or nullptr if none is
219+
/// found.
220+
const CachedRealPath *findRealPathByFilename(StringRef Filename) const;
221+
222+
/// Returns the real path associated with the filename if there is some.
223+
/// Otherwise, constructs new one with the given one, associates it with the
224+
/// filename and returns the result.
225+
const CachedRealPath &
226+
getOrEmplaceRealPathForFilename(StringRef Filename,
227+
llvm::ErrorOr<StringRef> RealPath);
210228
};
211229

212230
DependencyScanningFilesystemSharedCache();
@@ -223,14 +241,17 @@ class DependencyScanningFilesystemSharedCache {
223241
/// This class is a local cache, that caches the 'stat' and 'open' calls to the
224242
/// underlying real file system.
225243
class DependencyScanningFilesystemLocalCache {
226-
llvm::StringMap<const CachedFileSystemEntry *, llvm::BumpPtrAllocator> Cache;
244+
llvm::StringMap<
245+
std::pair<const CachedFileSystemEntry *, const CachedRealPath *>,
246+
llvm::BumpPtrAllocator>
247+
Cache;
227248

228249
public:
229250
/// Returns entry associated with the filename or nullptr if none is found.
230251
const CachedFileSystemEntry *findEntryByFilename(StringRef Filename) const {
231252
assert(llvm::sys::path::is_absolute_gnu(Filename));
232253
auto It = Cache.find(Filename);
233-
return It == Cache.end() ? nullptr : It->getValue();
254+
return It == Cache.end() ? nullptr : It->getValue().first;
234255
}
235256

236257
/// Associates the given entry with the filename and returns the given entry
@@ -239,9 +260,40 @@ class DependencyScanningFilesystemLocalCache {
239260
insertEntryForFilename(StringRef Filename,
240261
const CachedFileSystemEntry &Entry) {
241262
assert(llvm::sys::path::is_absolute_gnu(Filename));
242-
const auto *InsertedEntry = Cache.insert({Filename, &Entry}).first->second;
243-
assert(InsertedEntry == &Entry && "entry already present");
244-
return *InsertedEntry;
263+
auto [It, Inserted] = Cache.insert({Filename, {&Entry, nullptr}});
264+
auto &[CachedEntry, CachedRealPath] = It->getValue();
265+
if (!Inserted) {
266+
// The file is already present in the local cache. If we got here, it only
267+
// contains the real path. Let's make sure the entry is populated too.
268+
assert((!CachedEntry && CachedRealPath) && "entry already present");
269+
CachedEntry = &Entry;
270+
}
271+
return *CachedEntry;
272+
}
273+
274+
/// Returns real path associated with the filename or nullptr if none is
275+
/// found.
276+
const CachedRealPath *findRealPathByFilename(StringRef Filename) const {
277+
assert(llvm::sys::path::is_absolute_gnu(Filename));
278+
auto It = Cache.find(Filename);
279+
return It == Cache.end() ? nullptr : It->getValue().second;
280+
}
281+
282+
/// Associates the given real path with the filename and returns the given
283+
/// entry pointer (for convenience).
284+
const CachedRealPath &
285+
insertRealPathForFilename(StringRef Filename,
286+
const CachedRealPath &RealPath) {
287+
assert(llvm::sys::path::is_absolute_gnu(Filename));
288+
auto [It, Inserted] = Cache.insert({Filename, {nullptr, &RealPath}});
289+
auto &[CachedEntry, CachedRealPath] = It->getValue();
290+
if (!Inserted) {
291+
// The file is already present in the local cache. If we got here, it only
292+
// contains the entry. Let's make sure the real path is populated too.
293+
assert((!CachedRealPath && CachedEntry) && "real path already present");
294+
CachedRealPath = &RealPath;
295+
}
296+
return *CachedRealPath;
245297
}
246298
};
247299

@@ -312,6 +364,9 @@ class DependencyScanningWorkerFilesystem
312364
llvm::ErrorOr<std::unique_ptr<llvm::vfs::File>>
313365
openFileForRead(const Twine &Path) override;
314366

367+
std::error_code getRealPath(const Twine &Path,
368+
SmallVectorImpl<char> &Output) override;
369+
315370
std::error_code setCurrentWorkingDirectory(const Twine &Path) override;
316371

317372
/// Returns entry for the given filename.

clang/lib/Tooling/DependencyScanning/DependencyScanningFilesystem.cpp

Lines changed: 98 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -118,8 +118,8 @@ DependencyScanningFilesystemSharedCache::CacheShard::findEntryByFilename(
118118
StringRef Filename) const {
119119
assert(llvm::sys::path::is_absolute_gnu(Filename));
120120
std::lock_guard<std::mutex> LockGuard(CacheLock);
121-
auto It = EntriesByFilename.find(Filename);
122-
return It == EntriesByFilename.end() ? nullptr : It->getValue();
121+
auto It = CacheByFilename.find(Filename);
122+
return It == CacheByFilename.end() ? nullptr : It->getValue().first;
123123
}
124124

125125
const CachedFileSystemEntry *
@@ -135,11 +135,16 @@ DependencyScanningFilesystemSharedCache::CacheShard::
135135
getOrEmplaceEntryForFilename(StringRef Filename,
136136
llvm::ErrorOr<llvm::vfs::Status> Stat) {
137137
std::lock_guard<std::mutex> LockGuard(CacheLock);
138-
auto Insertion = EntriesByFilename.insert({Filename, nullptr});
139-
if (Insertion.second)
140-
Insertion.first->second =
138+
auto [It, Inserted] = CacheByFilename.insert({Filename, {nullptr, nullptr}});
139+
auto &[CachedEntry, CachedRealPath] = It->getValue();
140+
if (!CachedEntry) {
141+
// The entry is not present in the shared cache. Either the cache doesn't
142+
// know about the file at all, or it only knows about its real path.
143+
assert((Inserted || CachedRealPath) && "existing file with empty pair");
144+
CachedEntry =
141145
new (EntryStorage.Allocate()) CachedFileSystemEntry(std::move(Stat));
142-
return *Insertion.first->second;
146+
}
147+
return *CachedEntry;
143148
}
144149

145150
const CachedFileSystemEntry &
@@ -148,24 +153,58 @@ DependencyScanningFilesystemSharedCache::CacheShard::getOrEmplaceEntryForUID(
148153
std::unique_ptr<llvm::MemoryBuffer> Contents,
149154
std::optional<cas::ObjectRef> CASContents) {
150155
std::lock_guard<std::mutex> LockGuard(CacheLock);
151-
auto Insertion = EntriesByUID.insert({UID, nullptr});
152-
if (Insertion.second) {
156+
auto [It, Inserted] = EntriesByUID.insert({UID, nullptr});
157+
auto &CachedEntry = It->getSecond();
158+
if (Inserted) {
153159
CachedFileContents *StoredContents = nullptr;
154160
if (Contents)
155161
StoredContents = new (ContentsStorage.Allocate())
156162
CachedFileContents(std::move(Contents), std::move(CASContents));
157-
Insertion.first->second = new (EntryStorage.Allocate())
163+
CachedEntry = new (EntryStorage.Allocate())
158164
CachedFileSystemEntry(std::move(Stat), StoredContents);
159165
}
160-
return *Insertion.first->second;
166+
return *CachedEntry;
161167
}
162168

163169
const CachedFileSystemEntry &
164170
DependencyScanningFilesystemSharedCache::CacheShard::
165171
getOrInsertEntryForFilename(StringRef Filename,
166172
const CachedFileSystemEntry &Entry) {
167173
std::lock_guard<std::mutex> LockGuard(CacheLock);
168-
return *EntriesByFilename.insert({Filename, &Entry}).first->getValue();
174+
auto [It, Inserted] = CacheByFilename.insert({Filename, {&Entry, nullptr}});
175+
auto &[CachedEntry, CachedRealPath] = It->getValue();
176+
if (!Inserted || !CachedEntry)
177+
CachedEntry = &Entry;
178+
return *CachedEntry;
179+
}
180+
181+
const CachedRealPath *
182+
DependencyScanningFilesystemSharedCache::CacheShard::findRealPathByFilename(
183+
StringRef Filename) const {
184+
assert(llvm::sys::path::is_absolute_gnu(Filename));
185+
std::lock_guard<std::mutex> LockGuard(CacheLock);
186+
auto It = CacheByFilename.find(Filename);
187+
return It == CacheByFilename.end() ? nullptr : It->getValue().second;
188+
}
189+
190+
const CachedRealPath &DependencyScanningFilesystemSharedCache::CacheShard::
191+
getOrEmplaceRealPathForFilename(StringRef Filename,
192+
llvm::ErrorOr<llvm::StringRef> RealPath) {
193+
std::lock_guard<std::mutex> LockGuard(CacheLock);
194+
195+
const CachedRealPath *&StoredRealPath = CacheByFilename[Filename].second;
196+
if (!StoredRealPath) {
197+
auto OwnedRealPath = [&]() -> CachedRealPath {
198+
if (!RealPath)
199+
return RealPath.getError();
200+
return RealPath->str();
201+
}();
202+
203+
StoredRealPath = new (RealPathStorage.Allocate())
204+
CachedRealPath(std::move(OwnedRealPath));
205+
}
206+
207+
return *StoredRealPath;
169208
}
170209

171210
static bool shouldCacheStatFailures(StringRef Filename) {
@@ -334,6 +373,54 @@ DependencyScanningWorkerFilesystem::openFileForRead(const Twine &Path) {
334373
return DepScanFile::create(Result.get());
335374
}
336375

376+
std::error_code
377+
DependencyScanningWorkerFilesystem::getRealPath(const Twine &Path,
378+
SmallVectorImpl<char> &Output) {
379+
SmallString<256> OwnedFilename;
380+
StringRef OriginalFilename = Path.toStringRef(OwnedFilename);
381+
382+
SmallString<256> PathBuf;
383+
auto FilenameForLookup = tryGetFilenameForLookup(OriginalFilename, PathBuf);
384+
if (!FilenameForLookup)
385+
return FilenameForLookup.getError();
386+
387+
auto HandleCachedRealPath =
388+
[&Output](const CachedRealPath &RealPath) -> std::error_code {
389+
if (!RealPath)
390+
return RealPath.getError();
391+
Output.assign(RealPath->begin(), RealPath->end());
392+
return {};
393+
};
394+
395+
// If we already have the result in local cache, no work required.
396+
if (const auto *RealPath =
397+
LocalCache.findRealPathByFilename(*FilenameForLookup))
398+
return HandleCachedRealPath(*RealPath);
399+
400+
// If we have the result in the shared cache, cache it locally.
401+
auto &Shard = SharedCache.getShardForFilename(*FilenameForLookup);
402+
if (const auto *ShardRealPath =
403+
Shard.findRealPathByFilename(*FilenameForLookup)) {
404+
const auto &RealPath = LocalCache.insertRealPathForFilename(
405+
*FilenameForLookup, *ShardRealPath);
406+
return HandleCachedRealPath(RealPath);
407+
}
408+
409+
// If we don't know the real path, compute it...
410+
std::error_code EC = getUnderlyingFS().getRealPath(OriginalFilename, Output);
411+
llvm::ErrorOr<llvm::StringRef> ComputedRealPath = EC;
412+
if (!EC)
413+
ComputedRealPath = StringRef{Output.data(), Output.size()};
414+
415+
// ...and try to write it into the shared cache. In case some other thread won
416+
// this race and already wrote its own result there, just adopt it. Write
417+
// whatever is in the shared cache into the local one.
418+
const auto &RealPath = Shard.getOrEmplaceRealPathForFilename(
419+
*FilenameForLookup, ComputedRealPath);
420+
return HandleCachedRealPath(
421+
LocalCache.insertRealPathForFilename(*FilenameForLookup, RealPath));
422+
}
423+
337424
std::error_code DependencyScanningWorkerFilesystem::setCurrentWorkingDirectory(
338425
const Twine &Path) {
339426
std::error_code EC = ProxyFileSystem::setCurrentWorkingDirectory(Path);

clang/unittests/Tooling/CMakeLists.txt

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -25,6 +25,7 @@ add_clang_unittest(ToolingTests
2525
RangeSelectorTest.cpp
2626
DependencyScanning/DependencyScannerTest.cpp
2727
DependencyScanning/DependencyScanningCASFilesystemTest.cpp
28+
DependencyScanning/DependencyScanningFilesystemTest.cpp
2829
RecursiveASTVisitorTests/Attr.cpp
2930
RecursiveASTVisitorTests/CallbacksLeaf.cpp
3031
RecursiveASTVisitorTests/CallbacksUnaryOperator.cpp

0 commit comments

Comments
 (0)