Skip to content

[clang][deps] Cache VFS::getRealPath() and VFS::exists() #8571

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 8 commits into from
Apr 15, 2024
Merged
Original file line number Diff line number Diff line change
Expand Up @@ -52,7 +52,7 @@ class DependencyScanningCASFilesystem : public llvm::cas::ThreadSafeFileSystem {
return FS->setCurrentWorkingDirectory(Path);
}
std::error_code getRealPath(const Twine &Path,
SmallVectorImpl<char> &Output) const override {
SmallVectorImpl<char> &Output) override {
return FS->getRealPath(Path, Output);
}
std::error_code isLocal(const Twine &Path, bool &Result) override {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -154,6 +154,8 @@ class CachedFileSystemEntry {
CachedFileContents *Contents;
};

using CachedRealPath = llvm::ErrorOr<std::string>;

/// This class is a shared cache, that caches the 'stat' and 'open' calls to the
/// underlying real file system, and the scanned preprocessor directives of
/// files.
Expand All @@ -166,9 +168,11 @@ class DependencyScanningFilesystemSharedCache {
/// The mutex that needs to be locked before mutation of any member.
mutable std::mutex CacheLock;

/// Map from filenames to cached entries.
llvm::StringMap<const CachedFileSystemEntry *, llvm::BumpPtrAllocator>
EntriesByFilename;
/// Map from filenames to cached entries and real paths.
llvm::StringMap<
std::pair<const CachedFileSystemEntry *, const CachedRealPath *>,
llvm::BumpPtrAllocator>
CacheByFilename;

/// Map from unique IDs to cached entries.
llvm::DenseMap<llvm::sys::fs::UniqueID, const CachedFileSystemEntry *>
Expand All @@ -180,6 +184,9 @@ class DependencyScanningFilesystemSharedCache {
/// The backing storage for cached contents.
llvm::SpecificBumpPtrAllocator<CachedFileContents> ContentsStorage;

/// The backing storage for cached real paths.
llvm::SpecificBumpPtrAllocator<CachedRealPath> RealPathStorage;

/// Returns entry associated with the filename or nullptr if none is found.
const CachedFileSystemEntry *findEntryByFilename(StringRef Filename) const;

Expand Down Expand Up @@ -207,6 +214,17 @@ class DependencyScanningFilesystemSharedCache {
const CachedFileSystemEntry &
getOrInsertEntryForFilename(StringRef Filename,
const CachedFileSystemEntry &Entry);

/// Returns the real path associated with the filename or nullptr if none is
/// found.
const CachedRealPath *findRealPathByFilename(StringRef Filename) const;

/// Returns the real path associated with the filename if there is some.
/// Otherwise, constructs new one with the given one, associates it with the
/// filename and returns the result.
const CachedRealPath &
getOrEmplaceRealPathForFilename(StringRef Filename,
llvm::ErrorOr<StringRef> RealPath);
};

DependencyScanningFilesystemSharedCache();
Expand All @@ -223,14 +241,17 @@ class DependencyScanningFilesystemSharedCache {
/// This class is a local cache, that caches the 'stat' and 'open' calls to the
/// underlying real file system.
class DependencyScanningFilesystemLocalCache {
llvm::StringMap<const CachedFileSystemEntry *, llvm::BumpPtrAllocator> Cache;
llvm::StringMap<
std::pair<const CachedFileSystemEntry *, const CachedRealPath *>,
llvm::BumpPtrAllocator>
Cache;

public:
/// Returns entry associated with the filename or nullptr if none is found.
const CachedFileSystemEntry *findEntryByFilename(StringRef Filename) const {
assert(llvm::sys::path::is_absolute_gnu(Filename));
auto It = Cache.find(Filename);
return It == Cache.end() ? nullptr : It->getValue();
return It == Cache.end() ? nullptr : It->getValue().first;
}

/// Associates the given entry with the filename and returns the given entry
Expand All @@ -239,9 +260,40 @@ class DependencyScanningFilesystemLocalCache {
insertEntryForFilename(StringRef Filename,
const CachedFileSystemEntry &Entry) {
assert(llvm::sys::path::is_absolute_gnu(Filename));
const auto *InsertedEntry = Cache.insert({Filename, &Entry}).first->second;
assert(InsertedEntry == &Entry && "entry already present");
return *InsertedEntry;
auto [It, Inserted] = Cache.insert({Filename, {&Entry, nullptr}});
auto &[CachedEntry, CachedRealPath] = It->getValue();
if (!Inserted) {
// The file is already present in the local cache. If we got here, it only
// contains the real path. Let's make sure the entry is populated too.
assert((!CachedEntry && CachedRealPath) && "entry already present");
CachedEntry = &Entry;
}
return *CachedEntry;
}

/// Returns real path associated with the filename or nullptr if none is
/// found.
const CachedRealPath *findRealPathByFilename(StringRef Filename) const {
assert(llvm::sys::path::is_absolute_gnu(Filename));
auto It = Cache.find(Filename);
return It == Cache.end() ? nullptr : It->getValue().second;
}

/// Associates the given real path with the filename and returns the given
/// entry pointer (for convenience).
const CachedRealPath &
insertRealPathForFilename(StringRef Filename,
const CachedRealPath &RealPath) {
assert(llvm::sys::path::is_absolute_gnu(Filename));
auto [It, Inserted] = Cache.insert({Filename, {nullptr, &RealPath}});
auto &[CachedEntry, CachedRealPath] = It->getValue();
if (!Inserted) {
// The file is already present in the local cache. If we got here, it only
// contains the entry. Let's make sure the real path is populated too.
assert((!CachedRealPath && CachedEntry) && "real path already present");
CachedRealPath = &RealPath;
}
return *CachedRealPath;
}
};

Expand Down Expand Up @@ -312,6 +364,9 @@ class DependencyScanningWorkerFilesystem
llvm::ErrorOr<std::unique_ptr<llvm::vfs::File>>
openFileForRead(const Twine &Path) override;

std::error_code getRealPath(const Twine &Path,
SmallVectorImpl<char> &Output) override;

std::error_code setCurrentWorkingDirectory(const Twine &Path) override;

/// Returns entry for the given filename.
Expand All @@ -326,6 +381,10 @@ class DependencyScanningWorkerFilesystem
/// false if not (i.e. this entry is not a file or its scan fails).
bool ensureDirectiveTokensArePopulated(EntryRef Entry);

/// Check whether \p Path exists. By default checks cached result of \c
/// status(), and falls back on FS if unable to do so.
bool exists(const Twine &Path) override;

private:
/// For a filename that's not yet associated with any entry in the caches,
/// uses the underlying filesystem to either look up the entry based in the
Expand Down Expand Up @@ -421,6 +480,10 @@ class DependencyScanningWorkerFilesystem
llvm::ErrorOr<std::string> WorkingDirForCacheLookup;

void updateWorkingDirForCacheLookup();

llvm::ErrorOr<StringRef>
tryGetFilenameForLookup(StringRef OriginalFilename,
llvm::SmallVectorImpl<char> &PathBuf) const;
};

} // end namespace dependencies
Expand Down
161 changes: 135 additions & 26 deletions clang/lib/Tooling/DependencyScanning/DependencyScanningFilesystem.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -118,8 +118,8 @@ DependencyScanningFilesystemSharedCache::CacheShard::findEntryByFilename(
StringRef Filename) const {
assert(llvm::sys::path::is_absolute_gnu(Filename));
std::lock_guard<std::mutex> LockGuard(CacheLock);
auto It = EntriesByFilename.find(Filename);
return It == EntriesByFilename.end() ? nullptr : It->getValue();
auto It = CacheByFilename.find(Filename);
return It == CacheByFilename.end() ? nullptr : It->getValue().first;
}

const CachedFileSystemEntry *
Expand All @@ -135,11 +135,16 @@ DependencyScanningFilesystemSharedCache::CacheShard::
getOrEmplaceEntryForFilename(StringRef Filename,
llvm::ErrorOr<llvm::vfs::Status> Stat) {
std::lock_guard<std::mutex> LockGuard(CacheLock);
auto Insertion = EntriesByFilename.insert({Filename, nullptr});
if (Insertion.second)
Insertion.first->second =
auto [It, Inserted] = CacheByFilename.insert({Filename, {nullptr, nullptr}});
auto &[CachedEntry, CachedRealPath] = It->getValue();
if (!CachedEntry) {
// The entry is not present in the shared cache. Either the cache doesn't
// know about the file at all, or it only knows about its real path.
assert((Inserted || CachedRealPath) && "existing file with empty pair");
CachedEntry =
new (EntryStorage.Allocate()) CachedFileSystemEntry(std::move(Stat));
return *Insertion.first->second;
}
return *CachedEntry;
}

const CachedFileSystemEntry &
Expand All @@ -148,24 +153,58 @@ DependencyScanningFilesystemSharedCache::CacheShard::getOrEmplaceEntryForUID(
std::unique_ptr<llvm::MemoryBuffer> Contents,
std::optional<cas::ObjectRef> CASContents) {
std::lock_guard<std::mutex> LockGuard(CacheLock);
auto Insertion = EntriesByUID.insert({UID, nullptr});
if (Insertion.second) {
auto [It, Inserted] = EntriesByUID.insert({UID, nullptr});
auto &CachedEntry = It->getSecond();
if (Inserted) {
CachedFileContents *StoredContents = nullptr;
if (Contents)
StoredContents = new (ContentsStorage.Allocate())
CachedFileContents(std::move(Contents), std::move(CASContents));
Insertion.first->second = new (EntryStorage.Allocate())
CachedEntry = new (EntryStorage.Allocate())
CachedFileSystemEntry(std::move(Stat), StoredContents);
}
return *Insertion.first->second;
return *CachedEntry;
}

const CachedFileSystemEntry &
DependencyScanningFilesystemSharedCache::CacheShard::
getOrInsertEntryForFilename(StringRef Filename,
const CachedFileSystemEntry &Entry) {
std::lock_guard<std::mutex> LockGuard(CacheLock);
return *EntriesByFilename.insert({Filename, &Entry}).first->getValue();
auto [It, Inserted] = CacheByFilename.insert({Filename, {&Entry, nullptr}});
auto &[CachedEntry, CachedRealPath] = It->getValue();
if (!Inserted || !CachedEntry)
CachedEntry = &Entry;
return *CachedEntry;
}

const CachedRealPath *
DependencyScanningFilesystemSharedCache::CacheShard::findRealPathByFilename(
StringRef Filename) const {
assert(llvm::sys::path::is_absolute_gnu(Filename));
std::lock_guard<std::mutex> LockGuard(CacheLock);
auto It = CacheByFilename.find(Filename);
return It == CacheByFilename.end() ? nullptr : It->getValue().second;
}

const CachedRealPath &DependencyScanningFilesystemSharedCache::CacheShard::
getOrEmplaceRealPathForFilename(StringRef Filename,
llvm::ErrorOr<llvm::StringRef> RealPath) {
std::lock_guard<std::mutex> LockGuard(CacheLock);

const CachedRealPath *&StoredRealPath = CacheByFilename[Filename].second;
if (!StoredRealPath) {
auto OwnedRealPath = [&]() -> CachedRealPath {
if (!RealPath)
return RealPath.getError();
return RealPath->str();
}();

StoredRealPath = new (RealPathStorage.Allocate())
CachedRealPath(std::move(OwnedRealPath));
}

return *StoredRealPath;
}

static bool shouldCacheStatFailures(StringRef Filename) {
Expand Down Expand Up @@ -239,24 +278,15 @@ DependencyScanningWorkerFilesystem::computeAndStoreResult(
llvm::ErrorOr<EntryRef>
DependencyScanningWorkerFilesystem::getOrCreateFileSystemEntry(
StringRef OriginalFilename) {
StringRef FilenameForLookup;
SmallString<256> PathBuf;
if (llvm::sys::path::is_absolute_gnu(OriginalFilename)) {
FilenameForLookup = OriginalFilename;
} else if (!WorkingDirForCacheLookup) {
return WorkingDirForCacheLookup.getError();
} else {
StringRef RelFilename = OriginalFilename;
RelFilename.consume_front("./");
PathBuf = *WorkingDirForCacheLookup;
llvm::sys::path::append(PathBuf, RelFilename);
FilenameForLookup = PathBuf.str();
}
assert(llvm::sys::path::is_absolute_gnu(FilenameForLookup));
auto FilenameForLookup = tryGetFilenameForLookup(OriginalFilename, PathBuf);
if (!FilenameForLookup)
return FilenameForLookup.getError();

if (const auto *Entry =
findEntryByFilenameWithWriteThrough(FilenameForLookup))
findEntryByFilenameWithWriteThrough(*FilenameForLookup))
return EntryRef(OriginalFilename, *Entry).unwrapError();
auto MaybeEntry = computeAndStoreResult(OriginalFilename, FilenameForLookup);
auto MaybeEntry = computeAndStoreResult(OriginalFilename, *FilenameForLookup);
if (!MaybeEntry)
return MaybeEntry.getError();
return EntryRef(OriginalFilename, *MaybeEntry).unwrapError();
Expand All @@ -276,6 +306,17 @@ DependencyScanningWorkerFilesystem::status(const Twine &Path) {
return Result->getStatus();
}

bool DependencyScanningWorkerFilesystem::exists(const Twine &Path) {
// While some VFS overlay filesystems may implement more-efficient
// mechanisms for `exists` queries, `DependencyScanningWorkerFilesystem`
// typically wraps `RealFileSystem` which does not specialize `exists`,
// so it is not likely to benefit from such optimizations. Instead,
// it is more-valuable to have this query go through the
// cached-`status` code-path of the `DependencyScanningWorkerFilesystem`.
llvm::ErrorOr<llvm::vfs::Status> Status = status(Path);
return Status && Status->exists();
}

namespace {

/// The VFS that is used by clang consumes the \c CachedFileSystemEntry using
Expand Down Expand Up @@ -343,6 +384,54 @@ DependencyScanningWorkerFilesystem::openFileForRead(const Twine &Path) {
return DepScanFile::create(Result.get());
}

std::error_code
DependencyScanningWorkerFilesystem::getRealPath(const Twine &Path,
SmallVectorImpl<char> &Output) {
SmallString<256> OwnedFilename;
StringRef OriginalFilename = Path.toStringRef(OwnedFilename);

SmallString<256> PathBuf;
auto FilenameForLookup = tryGetFilenameForLookup(OriginalFilename, PathBuf);
if (!FilenameForLookup)
return FilenameForLookup.getError();

auto HandleCachedRealPath =
[&Output](const CachedRealPath &RealPath) -> std::error_code {
if (!RealPath)
return RealPath.getError();
Output.assign(RealPath->begin(), RealPath->end());
return {};
};

// If we already have the result in local cache, no work required.
if (const auto *RealPath =
LocalCache.findRealPathByFilename(*FilenameForLookup))
return HandleCachedRealPath(*RealPath);

// If we have the result in the shared cache, cache it locally.
auto &Shard = SharedCache.getShardForFilename(*FilenameForLookup);
if (const auto *ShardRealPath =
Shard.findRealPathByFilename(*FilenameForLookup)) {
const auto &RealPath = LocalCache.insertRealPathForFilename(
*FilenameForLookup, *ShardRealPath);
return HandleCachedRealPath(RealPath);
}

// If we don't know the real path, compute it...
std::error_code EC = getUnderlyingFS().getRealPath(OriginalFilename, Output);
llvm::ErrorOr<llvm::StringRef> ComputedRealPath = EC;
if (!EC)
ComputedRealPath = StringRef{Output.data(), Output.size()};

// ...and try to write it into the shared cache. In case some other thread won
// this race and already wrote its own result there, just adopt it. Write
// whatever is in the shared cache into the local one.
const auto &RealPath = Shard.getOrEmplaceRealPathForFilename(
*FilenameForLookup, ComputedRealPath);
return HandleCachedRealPath(
LocalCache.insertRealPathForFilename(*FilenameForLookup, RealPath));
}

std::error_code DependencyScanningWorkerFilesystem::setCurrentWorkingDirectory(
const Twine &Path) {
std::error_code EC = ProxyFileSystem::setCurrentWorkingDirectory(Path);
Expand All @@ -364,4 +453,24 @@ void DependencyScanningWorkerFilesystem::updateWorkingDirForCacheLookup() {
llvm::sys::path::is_absolute_gnu(*WorkingDirForCacheLookup));
}

llvm::ErrorOr<StringRef>
DependencyScanningWorkerFilesystem::tryGetFilenameForLookup(
StringRef OriginalFilename, llvm::SmallVectorImpl<char> &PathBuf) const {
StringRef FilenameForLookup;
if (llvm::sys::path::is_absolute_gnu(OriginalFilename)) {
FilenameForLookup = OriginalFilename;
} else if (!WorkingDirForCacheLookup) {
return WorkingDirForCacheLookup.getError();
} else {
StringRef RelFilename = OriginalFilename;
RelFilename.consume_front("./");
PathBuf.assign(WorkingDirForCacheLookup->begin(),
WorkingDirForCacheLookup->end());
llvm::sys::path::append(PathBuf, RelFilename);
FilenameForLookup = StringRef{PathBuf.begin(), PathBuf.size()};
}
assert(llvm::sys::path::is_absolute_gnu(FilenameForLookup));
return FilenameForLookup;
}

const char DependencyScanningWorkerFilesystem::ID = 0;
Loading