Skip to content

Commit 1c3a6bd

Browse files
authored
Merge pull request #8571 from apple/jan_svoboda/cache-real-path
[clang][deps] Cache `VFS::getRealPath()` and `VFS::exists()`
2 parents 49cbc3a + 00b2089 commit 1c3a6bd

File tree

14 files changed

+407
-61
lines changed

14 files changed

+407
-61
lines changed

clang/include/clang/Tooling/DependencyScanning/DependencyScanningCASFilesystem.h

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -52,7 +52,7 @@ class DependencyScanningCASFilesystem : public llvm::cas::ThreadSafeFileSystem {
5252
return FS->setCurrentWorkingDirectory(Path);
5353
}
5454
std::error_code getRealPath(const Twine &Path,
55-
SmallVectorImpl<char> &Output) const override {
55+
SmallVectorImpl<char> &Output) override {
5656
return FS->getRealPath(Path, Output);
5757
}
5858
std::error_code isLocal(const Twine &Path, bool &Result) override {

clang/include/clang/Tooling/DependencyScanning/DependencyScanningFilesystem.h

Lines changed: 71 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -154,6 +154,8 @@ class CachedFileSystemEntry {
154154
CachedFileContents *Contents;
155155
};
156156

157+
using CachedRealPath = llvm::ErrorOr<std::string>;
158+
157159
/// This class is a shared cache, that caches the 'stat' and 'open' calls to the
158160
/// underlying real file system, and the scanned preprocessor directives of
159161
/// files.
@@ -166,9 +168,11 @@ class DependencyScanningFilesystemSharedCache {
166168
/// The mutex that needs to be locked before mutation of any member.
167169
mutable std::mutex CacheLock;
168170

169-
/// Map from filenames to cached entries.
170-
llvm::StringMap<const CachedFileSystemEntry *, llvm::BumpPtrAllocator>
171-
EntriesByFilename;
171+
/// Map from filenames to cached entries and real paths.
172+
llvm::StringMap<
173+
std::pair<const CachedFileSystemEntry *, const CachedRealPath *>,
174+
llvm::BumpPtrAllocator>
175+
CacheByFilename;
172176

173177
/// Map from unique IDs to cached entries.
174178
llvm::DenseMap<llvm::sys::fs::UniqueID, const CachedFileSystemEntry *>
@@ -180,6 +184,9 @@ class DependencyScanningFilesystemSharedCache {
180184
/// The backing storage for cached contents.
181185
llvm::SpecificBumpPtrAllocator<CachedFileContents> ContentsStorage;
182186

187+
/// The backing storage for cached real paths.
188+
llvm::SpecificBumpPtrAllocator<CachedRealPath> RealPathStorage;
189+
183190
/// Returns entry associated with the filename or nullptr if none is found.
184191
const CachedFileSystemEntry *findEntryByFilename(StringRef Filename) const;
185192

@@ -207,6 +214,17 @@ class DependencyScanningFilesystemSharedCache {
207214
const CachedFileSystemEntry &
208215
getOrInsertEntryForFilename(StringRef Filename,
209216
const CachedFileSystemEntry &Entry);
217+
218+
/// Returns the real path associated with the filename or nullptr if none is
219+
/// found.
220+
const CachedRealPath *findRealPathByFilename(StringRef Filename) const;
221+
222+
/// Returns the real path associated with the filename if there is some.
223+
/// Otherwise, constructs new one with the given one, associates it with the
224+
/// filename and returns the result.
225+
const CachedRealPath &
226+
getOrEmplaceRealPathForFilename(StringRef Filename,
227+
llvm::ErrorOr<StringRef> RealPath);
210228
};
211229

212230
DependencyScanningFilesystemSharedCache();
@@ -223,14 +241,17 @@ class DependencyScanningFilesystemSharedCache {
223241
/// This class is a local cache, that caches the 'stat' and 'open' calls to the
224242
/// underlying real file system.
225243
class DependencyScanningFilesystemLocalCache {
226-
llvm::StringMap<const CachedFileSystemEntry *, llvm::BumpPtrAllocator> Cache;
244+
llvm::StringMap<
245+
std::pair<const CachedFileSystemEntry *, const CachedRealPath *>,
246+
llvm::BumpPtrAllocator>
247+
Cache;
227248

228249
public:
229250
/// Returns entry associated with the filename or nullptr if none is found.
230251
const CachedFileSystemEntry *findEntryByFilename(StringRef Filename) const {
231252
assert(llvm::sys::path::is_absolute_gnu(Filename));
232253
auto It = Cache.find(Filename);
233-
return It == Cache.end() ? nullptr : It->getValue();
254+
return It == Cache.end() ? nullptr : It->getValue().first;
234255
}
235256

236257
/// Associates the given entry with the filename and returns the given entry
@@ -239,9 +260,40 @@ class DependencyScanningFilesystemLocalCache {
239260
insertEntryForFilename(StringRef Filename,
240261
const CachedFileSystemEntry &Entry) {
241262
assert(llvm::sys::path::is_absolute_gnu(Filename));
242-
const auto *InsertedEntry = Cache.insert({Filename, &Entry}).first->second;
243-
assert(InsertedEntry == &Entry && "entry already present");
244-
return *InsertedEntry;
263+
auto [It, Inserted] = Cache.insert({Filename, {&Entry, nullptr}});
264+
auto &[CachedEntry, CachedRealPath] = It->getValue();
265+
if (!Inserted) {
266+
// The file is already present in the local cache. If we got here, it only
267+
// contains the real path. Let's make sure the entry is populated too.
268+
assert((!CachedEntry && CachedRealPath) && "entry already present");
269+
CachedEntry = &Entry;
270+
}
271+
return *CachedEntry;
272+
}
273+
274+
/// Returns real path associated with the filename or nullptr if none is
275+
/// found.
276+
const CachedRealPath *findRealPathByFilename(StringRef Filename) const {
277+
assert(llvm::sys::path::is_absolute_gnu(Filename));
278+
auto It = Cache.find(Filename);
279+
return It == Cache.end() ? nullptr : It->getValue().second;
280+
}
281+
282+
/// Associates the given real path with the filename and returns the given
283+
/// entry pointer (for convenience).
284+
const CachedRealPath &
285+
insertRealPathForFilename(StringRef Filename,
286+
const CachedRealPath &RealPath) {
287+
assert(llvm::sys::path::is_absolute_gnu(Filename));
288+
auto [It, Inserted] = Cache.insert({Filename, {nullptr, &RealPath}});
289+
auto &[CachedEntry, CachedRealPath] = It->getValue();
290+
if (!Inserted) {
291+
// The file is already present in the local cache. If we got here, it only
292+
// contains the entry. Let's make sure the real path is populated too.
293+
assert((!CachedRealPath && CachedEntry) && "real path already present");
294+
CachedRealPath = &RealPath;
295+
}
296+
return *CachedRealPath;
245297
}
246298
};
247299

@@ -312,6 +364,9 @@ class DependencyScanningWorkerFilesystem
312364
llvm::ErrorOr<std::unique_ptr<llvm::vfs::File>>
313365
openFileForRead(const Twine &Path) override;
314366

367+
std::error_code getRealPath(const Twine &Path,
368+
SmallVectorImpl<char> &Output) override;
369+
315370
std::error_code setCurrentWorkingDirectory(const Twine &Path) override;
316371

317372
/// Returns entry for the given filename.
@@ -326,6 +381,10 @@ class DependencyScanningWorkerFilesystem
326381
/// false if not (i.e. this entry is not a file or its scan fails).
327382
bool ensureDirectiveTokensArePopulated(EntryRef Entry);
328383

384+
/// Check whether \p Path exists. By default checks cached result of \c
385+
/// status(), and falls back on FS if unable to do so.
386+
bool exists(const Twine &Path) override;
387+
329388
private:
330389
/// For a filename that's not yet associated with any entry in the caches,
331390
/// uses the underlying filesystem to either look up the entry based in the
@@ -421,6 +480,10 @@ class DependencyScanningWorkerFilesystem
421480
llvm::ErrorOr<std::string> WorkingDirForCacheLookup;
422481

423482
void updateWorkingDirForCacheLookup();
483+
484+
llvm::ErrorOr<StringRef>
485+
tryGetFilenameForLookup(StringRef OriginalFilename,
486+
llvm::SmallVectorImpl<char> &PathBuf) const;
424487
};
425488

426489
} // end namespace dependencies

clang/lib/Tooling/DependencyScanning/DependencyScanningFilesystem.cpp

Lines changed: 135 additions & 26 deletions
Original file line numberDiff line numberDiff line change
@@ -118,8 +118,8 @@ DependencyScanningFilesystemSharedCache::CacheShard::findEntryByFilename(
118118
StringRef Filename) const {
119119
assert(llvm::sys::path::is_absolute_gnu(Filename));
120120
std::lock_guard<std::mutex> LockGuard(CacheLock);
121-
auto It = EntriesByFilename.find(Filename);
122-
return It == EntriesByFilename.end() ? nullptr : It->getValue();
121+
auto It = CacheByFilename.find(Filename);
122+
return It == CacheByFilename.end() ? nullptr : It->getValue().first;
123123
}
124124

125125
const CachedFileSystemEntry *
@@ -135,11 +135,16 @@ DependencyScanningFilesystemSharedCache::CacheShard::
135135
getOrEmplaceEntryForFilename(StringRef Filename,
136136
llvm::ErrorOr<llvm::vfs::Status> Stat) {
137137
std::lock_guard<std::mutex> LockGuard(CacheLock);
138-
auto Insertion = EntriesByFilename.insert({Filename, nullptr});
139-
if (Insertion.second)
140-
Insertion.first->second =
138+
auto [It, Inserted] = CacheByFilename.insert({Filename, {nullptr, nullptr}});
139+
auto &[CachedEntry, CachedRealPath] = It->getValue();
140+
if (!CachedEntry) {
141+
// The entry is not present in the shared cache. Either the cache doesn't
142+
// know about the file at all, or it only knows about its real path.
143+
assert((Inserted || CachedRealPath) && "existing file with empty pair");
144+
CachedEntry =
141145
new (EntryStorage.Allocate()) CachedFileSystemEntry(std::move(Stat));
142-
return *Insertion.first->second;
146+
}
147+
return *CachedEntry;
143148
}
144149

145150
const CachedFileSystemEntry &
@@ -148,24 +153,58 @@ DependencyScanningFilesystemSharedCache::CacheShard::getOrEmplaceEntryForUID(
148153
std::unique_ptr<llvm::MemoryBuffer> Contents,
149154
std::optional<cas::ObjectRef> CASContents) {
150155
std::lock_guard<std::mutex> LockGuard(CacheLock);
151-
auto Insertion = EntriesByUID.insert({UID, nullptr});
152-
if (Insertion.second) {
156+
auto [It, Inserted] = EntriesByUID.insert({UID, nullptr});
157+
auto &CachedEntry = It->getSecond();
158+
if (Inserted) {
153159
CachedFileContents *StoredContents = nullptr;
154160
if (Contents)
155161
StoredContents = new (ContentsStorage.Allocate())
156162
CachedFileContents(std::move(Contents), std::move(CASContents));
157-
Insertion.first->second = new (EntryStorage.Allocate())
163+
CachedEntry = new (EntryStorage.Allocate())
158164
CachedFileSystemEntry(std::move(Stat), StoredContents);
159165
}
160-
return *Insertion.first->second;
166+
return *CachedEntry;
161167
}
162168

163169
const CachedFileSystemEntry &
164170
DependencyScanningFilesystemSharedCache::CacheShard::
165171
getOrInsertEntryForFilename(StringRef Filename,
166172
const CachedFileSystemEntry &Entry) {
167173
std::lock_guard<std::mutex> LockGuard(CacheLock);
168-
return *EntriesByFilename.insert({Filename, &Entry}).first->getValue();
174+
auto [It, Inserted] = CacheByFilename.insert({Filename, {&Entry, nullptr}});
175+
auto &[CachedEntry, CachedRealPath] = It->getValue();
176+
if (!Inserted || !CachedEntry)
177+
CachedEntry = &Entry;
178+
return *CachedEntry;
179+
}
180+
181+
const CachedRealPath *
182+
DependencyScanningFilesystemSharedCache::CacheShard::findRealPathByFilename(
183+
StringRef Filename) const {
184+
assert(llvm::sys::path::is_absolute_gnu(Filename));
185+
std::lock_guard<std::mutex> LockGuard(CacheLock);
186+
auto It = CacheByFilename.find(Filename);
187+
return It == CacheByFilename.end() ? nullptr : It->getValue().second;
188+
}
189+
190+
const CachedRealPath &DependencyScanningFilesystemSharedCache::CacheShard::
191+
getOrEmplaceRealPathForFilename(StringRef Filename,
192+
llvm::ErrorOr<llvm::StringRef> RealPath) {
193+
std::lock_guard<std::mutex> LockGuard(CacheLock);
194+
195+
const CachedRealPath *&StoredRealPath = CacheByFilename[Filename].second;
196+
if (!StoredRealPath) {
197+
auto OwnedRealPath = [&]() -> CachedRealPath {
198+
if (!RealPath)
199+
return RealPath.getError();
200+
return RealPath->str();
201+
}();
202+
203+
StoredRealPath = new (RealPathStorage.Allocate())
204+
CachedRealPath(std::move(OwnedRealPath));
205+
}
206+
207+
return *StoredRealPath;
169208
}
170209

171210
static bool shouldCacheStatFailures(StringRef Filename) {
@@ -239,24 +278,15 @@ DependencyScanningWorkerFilesystem::computeAndStoreResult(
239278
llvm::ErrorOr<EntryRef>
240279
DependencyScanningWorkerFilesystem::getOrCreateFileSystemEntry(
241280
StringRef OriginalFilename) {
242-
StringRef FilenameForLookup;
243281
SmallString<256> PathBuf;
244-
if (llvm::sys::path::is_absolute_gnu(OriginalFilename)) {
245-
FilenameForLookup = OriginalFilename;
246-
} else if (!WorkingDirForCacheLookup) {
247-
return WorkingDirForCacheLookup.getError();
248-
} else {
249-
StringRef RelFilename = OriginalFilename;
250-
RelFilename.consume_front("./");
251-
PathBuf = *WorkingDirForCacheLookup;
252-
llvm::sys::path::append(PathBuf, RelFilename);
253-
FilenameForLookup = PathBuf.str();
254-
}
255-
assert(llvm::sys::path::is_absolute_gnu(FilenameForLookup));
282+
auto FilenameForLookup = tryGetFilenameForLookup(OriginalFilename, PathBuf);
283+
if (!FilenameForLookup)
284+
return FilenameForLookup.getError();
285+
256286
if (const auto *Entry =
257-
findEntryByFilenameWithWriteThrough(FilenameForLookup))
287+
findEntryByFilenameWithWriteThrough(*FilenameForLookup))
258288
return EntryRef(OriginalFilename, *Entry).unwrapError();
259-
auto MaybeEntry = computeAndStoreResult(OriginalFilename, FilenameForLookup);
289+
auto MaybeEntry = computeAndStoreResult(OriginalFilename, *FilenameForLookup);
260290
if (!MaybeEntry)
261291
return MaybeEntry.getError();
262292
return EntryRef(OriginalFilename, *MaybeEntry).unwrapError();
@@ -276,6 +306,17 @@ DependencyScanningWorkerFilesystem::status(const Twine &Path) {
276306
return Result->getStatus();
277307
}
278308

309+
bool DependencyScanningWorkerFilesystem::exists(const Twine &Path) {
310+
// While some VFS overlay filesystems may implement more-efficient
311+
// mechanisms for `exists` queries, `DependencyScanningWorkerFilesystem`
312+
// typically wraps `RealFileSystem` which does not specialize `exists`,
313+
// so it is not likely to benefit from such optimizations. Instead,
314+
// it is more-valuable to have this query go through the
315+
// cached-`status` code-path of the `DependencyScanningWorkerFilesystem`.
316+
llvm::ErrorOr<llvm::vfs::Status> Status = status(Path);
317+
return Status && Status->exists();
318+
}
319+
279320
namespace {
280321

281322
/// The VFS that is used by clang consumes the \c CachedFileSystemEntry using
@@ -343,6 +384,54 @@ DependencyScanningWorkerFilesystem::openFileForRead(const Twine &Path) {
343384
return DepScanFile::create(Result.get());
344385
}
345386

387+
std::error_code
388+
DependencyScanningWorkerFilesystem::getRealPath(const Twine &Path,
389+
SmallVectorImpl<char> &Output) {
390+
SmallString<256> OwnedFilename;
391+
StringRef OriginalFilename = Path.toStringRef(OwnedFilename);
392+
393+
SmallString<256> PathBuf;
394+
auto FilenameForLookup = tryGetFilenameForLookup(OriginalFilename, PathBuf);
395+
if (!FilenameForLookup)
396+
return FilenameForLookup.getError();
397+
398+
auto HandleCachedRealPath =
399+
[&Output](const CachedRealPath &RealPath) -> std::error_code {
400+
if (!RealPath)
401+
return RealPath.getError();
402+
Output.assign(RealPath->begin(), RealPath->end());
403+
return {};
404+
};
405+
406+
// If we already have the result in local cache, no work required.
407+
if (const auto *RealPath =
408+
LocalCache.findRealPathByFilename(*FilenameForLookup))
409+
return HandleCachedRealPath(*RealPath);
410+
411+
// If we have the result in the shared cache, cache it locally.
412+
auto &Shard = SharedCache.getShardForFilename(*FilenameForLookup);
413+
if (const auto *ShardRealPath =
414+
Shard.findRealPathByFilename(*FilenameForLookup)) {
415+
const auto &RealPath = LocalCache.insertRealPathForFilename(
416+
*FilenameForLookup, *ShardRealPath);
417+
return HandleCachedRealPath(RealPath);
418+
}
419+
420+
// If we don't know the real path, compute it...
421+
std::error_code EC = getUnderlyingFS().getRealPath(OriginalFilename, Output);
422+
llvm::ErrorOr<llvm::StringRef> ComputedRealPath = EC;
423+
if (!EC)
424+
ComputedRealPath = StringRef{Output.data(), Output.size()};
425+
426+
// ...and try to write it into the shared cache. In case some other thread won
427+
// this race and already wrote its own result there, just adopt it. Write
428+
// whatever is in the shared cache into the local one.
429+
const auto &RealPath = Shard.getOrEmplaceRealPathForFilename(
430+
*FilenameForLookup, ComputedRealPath);
431+
return HandleCachedRealPath(
432+
LocalCache.insertRealPathForFilename(*FilenameForLookup, RealPath));
433+
}
434+
346435
std::error_code DependencyScanningWorkerFilesystem::setCurrentWorkingDirectory(
347436
const Twine &Path) {
348437
std::error_code EC = ProxyFileSystem::setCurrentWorkingDirectory(Path);
@@ -364,4 +453,24 @@ void DependencyScanningWorkerFilesystem::updateWorkingDirForCacheLookup() {
364453
llvm::sys::path::is_absolute_gnu(*WorkingDirForCacheLookup));
365454
}
366455

456+
llvm::ErrorOr<StringRef>
457+
DependencyScanningWorkerFilesystem::tryGetFilenameForLookup(
458+
StringRef OriginalFilename, llvm::SmallVectorImpl<char> &PathBuf) const {
459+
StringRef FilenameForLookup;
460+
if (llvm::sys::path::is_absolute_gnu(OriginalFilename)) {
461+
FilenameForLookup = OriginalFilename;
462+
} else if (!WorkingDirForCacheLookup) {
463+
return WorkingDirForCacheLookup.getError();
464+
} else {
465+
StringRef RelFilename = OriginalFilename;
466+
RelFilename.consume_front("./");
467+
PathBuf.assign(WorkingDirForCacheLookup->begin(),
468+
WorkingDirForCacheLookup->end());
469+
llvm::sys::path::append(PathBuf, RelFilename);
470+
FilenameForLookup = StringRef{PathBuf.begin(), PathBuf.size()};
471+
}
472+
assert(llvm::sys::path::is_absolute_gnu(FilenameForLookup));
473+
return FilenameForLookup;
474+
}
475+
367476
const char DependencyScanningWorkerFilesystem::ID = 0;

0 commit comments

Comments
 (0)