Skip to content

[clang][modules] Fix filesystem races in ModuleManager #131354

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Draft
wants to merge 2 commits into
base: main
Choose a base branch
from
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
25 changes: 3 additions & 22 deletions clang/include/clang/Serialization/ModuleManager.h
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,7 @@
#include "llvm/ADT/STLExtras.h"
#include "llvm/ADT/SmallPtrSet.h"
#include "llvm/ADT/SmallVector.h"
#include "llvm/ADT/StringMap.h"
#include "llvm/ADT/StringRef.h"
#include "llvm/ADT/iterator.h"
#include "llvm/ADT/iterator_range.h"
Expand Down Expand Up @@ -58,7 +59,7 @@ class ModuleManager {
SmallVector<ModuleFile *, 2> Roots;

/// All loaded modules, indexed by name.
llvm::DenseMap<const FileEntry *, ModuleFile *> Modules;
llvm::StringMap<ModuleFile *> Modules;

/// FileManager that handles translating between filenames and
/// FileEntry *.
Expand Down Expand Up @@ -179,7 +180,7 @@ class ModuleManager {
ModuleFile *lookupByModuleName(StringRef ModName) const;

/// Returns the module associated with the given module file.
ModuleFile *lookup(const FileEntry *File) const;
ModuleFile *lookup(FileEntryRef File) const;

/// Returns the in-memory (virtual file) buffer with the given name
std::unique_ptr<llvm::MemoryBuffer> lookupBuffer(StringRef Name);
Expand Down Expand Up @@ -283,26 +284,6 @@ class ModuleManager {
void visit(llvm::function_ref<bool(ModuleFile &M)> Visitor,
llvm::SmallPtrSetImpl<ModuleFile *> *ModuleFilesHit = nullptr);

/// Attempt to resolve the given module file name to a file entry.
///
/// \param FileName The name of the module file.
///
/// \param ExpectedSize The size that the module file is expected to have.
/// If the actual size differs, the resolver should return \c true.
///
/// \param ExpectedModTime The modification time that the module file is
/// expected to have. If the actual modification time differs, the resolver
/// should return \c true.
///
/// \param File Will be set to the file if there is one, or null
/// otherwise.
///
/// \returns True if a file exists but does not meet the size/
/// modification time criteria, false if the file is either available and
/// suitable, or is missing.
bool lookupModuleFile(StringRef FileName, off_t ExpectedSize,
time_t ExpectedModTime, OptionalFileEntryRef &File);

/// View the graphviz representation of the module graph.
void viewGraph();

Expand Down
13 changes: 9 additions & 4 deletions clang/lib/Serialization/ASTReader.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -4350,10 +4350,15 @@ void ASTReader::ReadModuleOffsetMap(ModuleFile &F) const {
uint16_t Len = endian::readNext<uint16_t, llvm::endianness::little>(Data);
StringRef Name = StringRef((const char*)Data, Len);
Data += Len;
ModuleFile *OM = (Kind == MK_PrebuiltModule || Kind == MK_ExplicitModule ||
Kind == MK_ImplicitModule
? ModuleMgr.lookupByModuleName(Name)
: ModuleMgr.lookupByFileName(Name));
ModuleFile *OM;
if (Kind == MK_PrebuiltModule || Kind == MK_ExplicitModule ||
Kind == MK_ImplicitModule) {
OM = ModuleMgr.lookupByModuleName(Name);
} else {
SmallString<128> NormalizedName = Name;
llvm::sys::path::make_preferred(NormalizedName);
OM = ModuleMgr.lookupByFileName(NormalizedName);
}
if (!OM) {
std::string Msg = "refers to unknown module, cannot find ";
Msg.append(std::string(Name));
Expand Down
32 changes: 26 additions & 6 deletions clang/lib/Serialization/InMemoryModuleCache.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -7,13 +7,18 @@
//===----------------------------------------------------------------------===//

#include "clang/Serialization/InMemoryModuleCache.h"
#include "llvm/ADT/SmallString.h"
#include "llvm/Support/MemoryBuffer.h"
#include "llvm/Support/Path.h"

using namespace clang;

InMemoryModuleCache::State
InMemoryModuleCache::getPCMState(llvm::StringRef Filename) const {
auto I = PCMs.find(Filename);
llvm::SmallString<128> NormalizedFilename = Filename;
llvm::sys::path::make_preferred(NormalizedFilename);

auto I = PCMs.find(NormalizedFilename);
if (I == PCMs.end())
return Unknown;
if (I->second.IsFinal)
Expand All @@ -24,15 +29,21 @@ InMemoryModuleCache::getPCMState(llvm::StringRef Filename) const {
llvm::MemoryBuffer &
InMemoryModuleCache::addPCM(llvm::StringRef Filename,
std::unique_ptr<llvm::MemoryBuffer> Buffer) {
auto Insertion = PCMs.insert(std::make_pair(Filename, std::move(Buffer)));
llvm::SmallString<128> NormalizedFilename = Filename;
llvm::sys::path::make_preferred(NormalizedFilename);

auto Insertion = PCMs.insert({NormalizedFilename, std::move(Buffer)});
assert(Insertion.second && "Already has a PCM");
return *Insertion.first->second.Buffer;
}

llvm::MemoryBuffer &
InMemoryModuleCache::addBuiltPCM(llvm::StringRef Filename,
std::unique_ptr<llvm::MemoryBuffer> Buffer) {
auto &PCM = PCMs[Filename];
llvm::SmallString<128> NormalizedFilename = Filename;
llvm::sys::path::make_preferred(NormalizedFilename);

auto &PCM = PCMs[NormalizedFilename];
assert(!PCM.IsFinal && "Trying to override finalized PCM?");
assert(!PCM.Buffer && "Trying to override tentative PCM?");
PCM.Buffer = std::move(Buffer);
Expand All @@ -42,7 +53,10 @@ InMemoryModuleCache::addBuiltPCM(llvm::StringRef Filename,

llvm::MemoryBuffer *
InMemoryModuleCache::lookupPCM(llvm::StringRef Filename) const {
auto I = PCMs.find(Filename);
llvm::SmallString<128> NormalizedFilename = Filename;
llvm::sys::path::make_preferred(NormalizedFilename);

auto I = PCMs.find(NormalizedFilename);
if (I == PCMs.end())
return nullptr;
return I->second.Buffer.get();
Expand All @@ -57,7 +71,10 @@ bool InMemoryModuleCache::shouldBuildPCM(llvm::StringRef Filename) const {
}

bool InMemoryModuleCache::tryToDropPCM(llvm::StringRef Filename) {
auto I = PCMs.find(Filename);
llvm::SmallString<128> NormalizedFilename = Filename;
llvm::sys::path::make_preferred(NormalizedFilename);

auto I = PCMs.find(NormalizedFilename);
assert(I != PCMs.end() && "PCM to remove is unknown...");

auto &PCM = I->second;
Expand All @@ -71,7 +88,10 @@ bool InMemoryModuleCache::tryToDropPCM(llvm::StringRef Filename) {
}

void InMemoryModuleCache::finalizePCM(llvm::StringRef Filename) {
auto I = PCMs.find(Filename);
llvm::SmallString<128> NormalizedFilename = Filename;
llvm::sys::path::make_preferred(NormalizedFilename);

auto I = PCMs.find(NormalizedFilename);
assert(I != PCMs.end() && "PCM to finalize is unknown...");

auto &PCM = I->second;
Expand Down
169 changes: 61 additions & 108 deletions clang/lib/Serialization/ModuleManager.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -59,8 +59,10 @@ ModuleFile *ModuleManager::lookupByModuleName(StringRef Name) const {
return nullptr;
}

ModuleFile *ModuleManager::lookup(const FileEntry *File) const {
return Modules.lookup(File);
ModuleFile *ModuleManager::lookup(FileEntryRef File) const {
llvm::SmallString<128> NormalizedFileName = File.getName();
llvm::sys::path::make_preferred(NormalizedFileName);
return Modules.lookup(NormalizedFileName);
}

std::unique_ptr<llvm::MemoryBuffer>
Expand Down Expand Up @@ -107,120 +109,94 @@ ModuleManager::addModule(StringRef FileName, ModuleKind Type,
std::string &ErrorStr) {
Module = nullptr;

// Look for the file entry. This only fails if the expected size or
// modification time differ.
OptionalFileEntryRef Entry;
if (Type == MK_ExplicitModule || Type == MK_PrebuiltModule) {
// If we're not expecting to pull this file out of the module cache, it
// might have a different mtime due to being moved across filesystems in
// a distributed build. The size must still match, though. (As must the
// contents, but we can't check that.)
ExpectedModTime = 0;
}
// Note: ExpectedSize and ExpectedModTime will be 0 for MK_ImplicitModule
// when using an ASTFileSignature.
if (lookupModuleFile(FileName, ExpectedSize, ExpectedModTime, Entry)) {
ErrorStr = "module file has a different size or mtime than expected";
return OutOfDate;
}

if (!Entry) {
ErrorStr = "module file not found";
return Missing;
}

// The ModuleManager's use of FileEntry nodes as the keys for its map of
// loaded modules is less than ideal. Uniqueness for FileEntry nodes is
// maintained by FileManager, which in turn uses inode numbers on hosts
// that support that. When coupled with the module cache's proclivity for
// turning over and deleting stale PCMs, this means entries for different
// module files can wind up reusing the same underlying inode. When this
// happens, subsequent accesses to the Modules map will disagree on the
// ModuleFile associated with a given file. In general, it is not sufficient
// to resolve this conundrum with a type like FileEntryRef that stores the
// name of the FileEntry node on first access because of path canonicalization
// issues. However, the paths constructed for implicit module builds are
// fully under Clang's control. We *can*, therefore, rely on their structure
// being consistent across operating systems and across subsequent accesses
// to the Modules map.
auto implicitModuleNamesMatch = [](ModuleKind Kind, const ModuleFile *MF,
FileEntryRef Entry) -> bool {
if (Kind != MK_ImplicitModule)
return true;
return Entry.getName() == MF->FileName;
};

// Check whether we already loaded this module, before
if (ModuleFile *ModuleEntry = Modules.lookup(*Entry)) {
if (implicitModuleNamesMatch(Type, ModuleEntry, *Entry)) {
// Check the stored signature.
if (checkSignature(ModuleEntry->Signature, ExpectedSignature, ErrorStr))
return OutOfDate;

Module = ModuleEntry;
updateModuleImports(*ModuleEntry, ImportedBy, ImportLoc);
return AlreadyLoaded;
}
llvm::SmallString<128> NormalizedFileName = FileName;
llvm::sys::path::make_preferred(NormalizedFileName);
if (ModuleFile *ModuleEntry = Modules.lookup(NormalizedFileName)) {
// Check the stored signature.
if (checkSignature(ModuleEntry->Signature, ExpectedSignature, ErrorStr))
return OutOfDate;

Module = ModuleEntry;
updateModuleImports(*ModuleEntry, ImportedBy, ImportLoc);
return AlreadyLoaded;
}

// Allocate a new module.
auto NewModule = std::make_unique<ModuleFile>(Type, *Entry, Generation);
NewModule->Index = Chain.size();
NewModule->FileName = FileName.str();
NewModule->ImportLoc = ImportLoc;
NewModule->InputFilesValidationTimestamp = 0;

if (NewModule->Kind == MK_ImplicitModule) {
std::string TimestampFilename =
ModuleFile::getTimestampFilename(NewModule->FileName);
llvm::vfs::Status Status;
// A cached stat value would be fine as well.
if (!FileMgr.getNoncachedStatValue(TimestampFilename, Status))
NewModule->InputFilesValidationTimestamp =
llvm::sys::toTimeT(Status.getLastModificationTime());
}
OptionalFileEntryRef Entry;
llvm::MemoryBuffer *ModuleBuffer = nullptr;

// Load the contents of the module
if (std::unique_ptr<llvm::MemoryBuffer> Buffer = lookupBuffer(FileName)) {
// The buffer was already provided for us.
NewModule->Buffer = &getModuleCache().getInMemoryModuleCache().addBuiltPCM(
ModuleBuffer = &getModuleCache().getInMemoryModuleCache().addBuiltPCM(
FileName, std::move(Buffer));
// Since the cached buffer is reused, it is safe to close the file
// descriptor that was opened while stat()ing the PCM in
// lookupModuleFile() above, it won't be needed any longer.
Entry->closeFile();
} else if (llvm::MemoryBuffer *Buffer =
getModuleCache().getInMemoryModuleCache().lookupPCM(
FileName)) {
NewModule->Buffer = Buffer;
// As above, the file descriptor is no longer needed.
Entry->closeFile();
ModuleBuffer = Buffer;
} else if (getModuleCache().getInMemoryModuleCache().shouldBuildPCM(
FileName)) {
// Report that the module is out of date, since we tried (and failed) to
// import it earlier.
Entry->closeFile();
return OutOfDate;
} else {
Entry = FileName == "-"
? expectedToOptional(FileMgr.getSTDIN())
: FileMgr.getOptionalFileRef(FileName, /*OpenFile=*/true,
/*CacheFailure=*/false);
if (!Entry) {
ErrorStr = "module file not found";
return Missing;
}

// Get a buffer of the file and close the file descriptor when done.
// The file is volatile because in a parallel build we expect multiple
// compiler processes to use the same module file rebuilding it if needed.
//
// RequiresNullTerminator is false because module files don't need it, and
// this allows the file to still be mmapped.
auto Buf = FileMgr.getBufferForFile(NewModule->File,
/*IsVolatile=*/true,
auto Buf = FileMgr.getBufferForFile(*Entry, /*IsVolatile=*/true,
/*RequiresNullTerminator=*/false);

if (!Buf) {
ErrorStr = Buf.getError().message();
return Missing;
}

NewModule->Buffer = &getModuleCache().getInMemoryModuleCache().addPCM(
if ((ExpectedSize && ExpectedSize != Entry->getSize()) ||
(ExpectedModTime && ExpectedModTime != Entry->getModificationTime())) {
ErrorStr = "module file has a different size or mtime than expected";
return OutOfDate;
}

ModuleBuffer = &getModuleCache().getInMemoryModuleCache().addPCM(
FileName, std::move(*Buf));
}

// TODO: Make it so that ModuleFile is not tied to a FileEntry.
if (!Entry && !((Entry = FileMgr.getVirtualFileRef(FileName, ExpectedSize,
ExpectedModTime))))
return Missing;

// Allocate a new module.
auto NewModule = std::make_unique<ModuleFile>(Type, *Entry, Generation);
NewModule->Index = Chain.size();
NewModule->FileName = FileName.str();
NewModule->ImportLoc = ImportLoc;
NewModule->InputFilesValidationTimestamp = 0;

if (NewModule->Kind == MK_ImplicitModule) {
std::string TimestampFilename =
ModuleFile::getTimestampFilename(NewModule->FileName);
llvm::vfs::Status Status;
// A cached stat value would be fine as well.
if (!FileMgr.getNoncachedStatValue(TimestampFilename, Status))
NewModule->InputFilesValidationTimestamp =
llvm::sys::toTimeT(Status.getLastModificationTime());
}

NewModule->Buffer = ModuleBuffer;

// Initialize the stream.
NewModule->Data = PCHContainerRdr.ExtractPCH(*NewModule->Buffer);

Expand All @@ -231,7 +207,7 @@ ModuleManager::addModule(StringRef FileName, ModuleKind Type,
return OutOfDate;

// We're keeping this module. Store it everywhere.
Module = Modules[*Entry] = NewModule.get();
Module = Modules[NormalizedFileName] = NewModule.get();

updateModuleImports(*NewModule, ImportedBy, ImportLoc);

Expand Down Expand Up @@ -277,7 +253,7 @@ void ModuleManager::removeModules(ModuleIterator First) {

// Delete the modules.
for (ModuleIterator victim = First; victim != Last; ++victim)
Modules.erase(victim->File);
Modules.erase(victim->File.getName());

Chain.erase(Chain.begin() + (First - begin()), Chain.end());
}
Expand Down Expand Up @@ -436,29 +412,6 @@ void ModuleManager::visit(llvm::function_ref<bool(ModuleFile &M)> Visitor,
returnVisitState(std::move(State));
}

bool ModuleManager::lookupModuleFile(StringRef FileName, off_t ExpectedSize,
time_t ExpectedModTime,
OptionalFileEntryRef &File) {
if (FileName == "-") {
File = expectedToOptional(FileMgr.getSTDIN());
return false;
}

// Open the file immediately to ensure there is no race between stat'ing and
// opening the file.
File = FileMgr.getOptionalFileRef(FileName, /*OpenFile=*/true,
/*CacheFailure=*/false);

if (File &&
((ExpectedSize && ExpectedSize != File->getSize()) ||
(ExpectedModTime && ExpectedModTime != File->getModificationTime())))
// Do not destroy File, as it may be referenced. If we need to rebuild it,
// it will be destroyed by removeModules.
return true;

return false;
}

#ifndef NDEBUG
namespace llvm {

Expand Down