Skip to content

Commit 7945795

Browse files
committed
[clang][modules] Fix filesystem races in ModuleManager
The `ModuleManager` uses `FileEntry` objects to uniquely identify module files. This requires first consulting the `FileManager` (and therefore the file system) when loading PCM files. This is problematic, as this may load a different PCM file to what's already in the `InMemoryModuleCache` and fail the size and mtime checks. This PR changes things so that module files are identified by their file system path. This removes the need of knowing the file entry at the start and allows us to fix the race. The downside is that we no longer get the `FileManager` inode-based uniquing, meaning symlinks in the module cache no longer work. I think this is fine, since Clang itself never creates symlinks in that directory. Moreover, we already had to work around filesystems recycling inode numbers, so this actually seems like a win to me (and resolves a long-standing FIXME-like comment). Note that this change also requires that all places calling into `ModuleManager` from within a single `CompilerInstance` use consistent paths to refer to module files. This might be problematic if there are concurrent Clang processes operating on the same module cache directory but with different spellings - the `IMPORT` records in PCM files will be valid and consistent within single process, but may break uniquing in another process. We might need to do some canonicalization of the module cache paths to avoid this issue.
1 parent 9455df9 commit 7945795

File tree

2 files changed

+60
-130
lines changed

2 files changed

+60
-130
lines changed

clang/include/clang/Serialization/ModuleManager.h

Lines changed: 3 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -22,6 +22,7 @@
2222
#include "llvm/ADT/STLExtras.h"
2323
#include "llvm/ADT/SmallPtrSet.h"
2424
#include "llvm/ADT/SmallVector.h"
25+
#include "llvm/ADT/StringMap.h"
2526
#include "llvm/ADT/StringRef.h"
2627
#include "llvm/ADT/iterator.h"
2728
#include "llvm/ADT/iterator_range.h"
@@ -58,7 +59,7 @@ class ModuleManager {
5859
SmallVector<ModuleFile *, 2> Roots;
5960

6061
/// All loaded modules, indexed by name.
61-
llvm::DenseMap<const FileEntry *, ModuleFile *> Modules;
62+
llvm::StringMap<ModuleFile *> Modules;
6263

6364
/// FileManager that handles translating between filenames and
6465
/// FileEntry *.
@@ -179,7 +180,7 @@ class ModuleManager {
179180
ModuleFile *lookupByModuleName(StringRef ModName) const;
180181

181182
/// Returns the module associated with the given module file.
182-
ModuleFile *lookup(const FileEntry *File) const;
183+
ModuleFile *lookup(FileEntryRef File) const;
183184

184185
/// Returns the in-memory (virtual file) buffer with the given name
185186
std::unique_ptr<llvm::MemoryBuffer> lookupBuffer(StringRef Name);
@@ -283,26 +284,6 @@ class ModuleManager {
283284
void visit(llvm::function_ref<bool(ModuleFile &M)> Visitor,
284285
llvm::SmallPtrSetImpl<ModuleFile *> *ModuleFilesHit = nullptr);
285286

286-
/// Attempt to resolve the given module file name to a file entry.
287-
///
288-
/// \param FileName The name of the module file.
289-
///
290-
/// \param ExpectedSize The size that the module file is expected to have.
291-
/// If the actual size differs, the resolver should return \c true.
292-
///
293-
/// \param ExpectedModTime The modification time that the module file is
294-
/// expected to have. If the actual modification time differs, the resolver
295-
/// should return \c true.
296-
///
297-
/// \param File Will be set to the file if there is one, or null
298-
/// otherwise.
299-
///
300-
/// \returns True if a file exists but does not meet the size/
301-
/// modification time criteria, false if the file is either available and
302-
/// suitable, or is missing.
303-
bool lookupModuleFile(StringRef FileName, off_t ExpectedSize,
304-
time_t ExpectedModTime, OptionalFileEntryRef &File);
305-
306287
/// View the graphviz representation of the module graph.
307288
void viewGraph();
308289

clang/lib/Serialization/ModuleManager.cpp

Lines changed: 57 additions & 108 deletions
Original file line numberDiff line numberDiff line change
@@ -59,8 +59,8 @@ ModuleFile *ModuleManager::lookupByModuleName(StringRef Name) const {
5959
return nullptr;
6060
}
6161

62-
ModuleFile *ModuleManager::lookup(const FileEntry *File) const {
63-
return Modules.lookup(File);
62+
ModuleFile *ModuleManager::lookup(FileEntryRef File) const {
63+
return Modules.lookup(File.getName());
6464
}
6565

6666
std::unique_ptr<llvm::MemoryBuffer>
@@ -107,120 +107,92 @@ ModuleManager::addModule(StringRef FileName, ModuleKind Type,
107107
std::string &ErrorStr) {
108108
Module = nullptr;
109109

110-
// Look for the file entry. This only fails if the expected size or
111-
// modification time differ.
112-
OptionalFileEntryRef Entry;
113-
if (Type == MK_ExplicitModule || Type == MK_PrebuiltModule) {
114-
// If we're not expecting to pull this file out of the module cache, it
115-
// might have a different mtime due to being moved across filesystems in
116-
// a distributed build. The size must still match, though. (As must the
117-
// contents, but we can't check that.)
118-
ExpectedModTime = 0;
119-
}
120-
// Note: ExpectedSize and ExpectedModTime will be 0 for MK_ImplicitModule
121-
// when using an ASTFileSignature.
122-
if (lookupModuleFile(FileName, ExpectedSize, ExpectedModTime, Entry)) {
123-
ErrorStr = "module file has a different size or mtime than expected";
124-
return OutOfDate;
125-
}
126-
127-
if (!Entry) {
128-
ErrorStr = "module file not found";
129-
return Missing;
130-
}
131-
132-
// The ModuleManager's use of FileEntry nodes as the keys for its map of
133-
// loaded modules is less than ideal. Uniqueness for FileEntry nodes is
134-
// maintained by FileManager, which in turn uses inode numbers on hosts
135-
// that support that. When coupled with the module cache's proclivity for
136-
// turning over and deleting stale PCMs, this means entries for different
137-
// module files can wind up reusing the same underlying inode. When this
138-
// happens, subsequent accesses to the Modules map will disagree on the
139-
// ModuleFile associated with a given file. In general, it is not sufficient
140-
// to resolve this conundrum with a type like FileEntryRef that stores the
141-
// name of the FileEntry node on first access because of path canonicalization
142-
// issues. However, the paths constructed for implicit module builds are
143-
// fully under Clang's control. We *can*, therefore, rely on their structure
144-
// being consistent across operating systems and across subsequent accesses
145-
// to the Modules map.
146-
auto implicitModuleNamesMatch = [](ModuleKind Kind, const ModuleFile *MF,
147-
FileEntryRef Entry) -> bool {
148-
if (Kind != MK_ImplicitModule)
149-
return true;
150-
return Entry.getName() == MF->FileName;
151-
};
152-
153110
// Check whether we already loaded this module, before
154-
if (ModuleFile *ModuleEntry = Modules.lookup(*Entry)) {
155-
if (implicitModuleNamesMatch(Type, ModuleEntry, *Entry)) {
156-
// Check the stored signature.
157-
if (checkSignature(ModuleEntry->Signature, ExpectedSignature, ErrorStr))
158-
return OutOfDate;
159-
160-
Module = ModuleEntry;
161-
updateModuleImports(*ModuleEntry, ImportedBy, ImportLoc);
162-
return AlreadyLoaded;
163-
}
111+
if (ModuleFile *ModuleEntry = Modules.lookup(FileName)) {
112+
// Check the stored signature.
113+
if (checkSignature(ModuleEntry->Signature, ExpectedSignature, ErrorStr))
114+
return OutOfDate;
115+
116+
Module = ModuleEntry;
117+
updateModuleImports(*ModuleEntry, ImportedBy, ImportLoc);
118+
return AlreadyLoaded;
164119
}
165120

166-
// Allocate a new module.
167-
auto NewModule = std::make_unique<ModuleFile>(Type, *Entry, Generation);
168-
NewModule->Index = Chain.size();
169-
NewModule->FileName = FileName.str();
170-
NewModule->ImportLoc = ImportLoc;
171-
NewModule->InputFilesValidationTimestamp = 0;
172-
173-
if (NewModule->Kind == MK_ImplicitModule) {
174-
std::string TimestampFilename =
175-
ModuleFile::getTimestampFilename(NewModule->FileName);
176-
llvm::vfs::Status Status;
177-
// A cached stat value would be fine as well.
178-
if (!FileMgr.getNoncachedStatValue(TimestampFilename, Status))
179-
NewModule->InputFilesValidationTimestamp =
180-
llvm::sys::toTimeT(Status.getLastModificationTime());
181-
}
121+
OptionalFileEntryRef Entry;
122+
llvm::MemoryBuffer *ModuleBuffer = nullptr;
182123

183124
// Load the contents of the module
184125
if (std::unique_ptr<llvm::MemoryBuffer> Buffer = lookupBuffer(FileName)) {
185126
// The buffer was already provided for us.
186-
NewModule->Buffer = &getModuleCache().getInMemoryModuleCache().addBuiltPCM(
127+
ModuleBuffer = &getModuleCache().getInMemoryModuleCache().addBuiltPCM(
187128
FileName, std::move(Buffer));
188-
// Since the cached buffer is reused, it is safe to close the file
189-
// descriptor that was opened while stat()ing the PCM in
190-
// lookupModuleFile() above, it won't be needed any longer.
191-
Entry->closeFile();
192129
} else if (llvm::MemoryBuffer *Buffer =
193130
getModuleCache().getInMemoryModuleCache().lookupPCM(
194131
FileName)) {
195-
NewModule->Buffer = Buffer;
196-
// As above, the file descriptor is no longer needed.
197-
Entry->closeFile();
132+
ModuleBuffer = Buffer;
198133
} else if (getModuleCache().getInMemoryModuleCache().shouldBuildPCM(
199134
FileName)) {
200135
// Report that the module is out of date, since we tried (and failed) to
201136
// import it earlier.
202-
Entry->closeFile();
203137
return OutOfDate;
204138
} else {
139+
Entry = FileName == "-"
140+
? expectedToOptional(FileMgr.getSTDIN())
141+
: FileMgr.getOptionalFileRef(FileName, /*OpenFile=*/true,
142+
/*CacheFailure=*/false);
143+
if (!Entry) {
144+
ErrorStr = "module file not found";
145+
return Missing;
146+
}
147+
205148
// Get a buffer of the file and close the file descriptor when done.
206149
// The file is volatile because in a parallel build we expect multiple
207150
// compiler processes to use the same module file rebuilding it if needed.
208151
//
209152
// RequiresNullTerminator is false because module files don't need it, and
210153
// this allows the file to still be mmapped.
211-
auto Buf = FileMgr.getBufferForFile(NewModule->File,
212-
/*IsVolatile=*/true,
154+
auto Buf = FileMgr.getBufferForFile(*Entry, /*IsVolatile=*/true,
213155
/*RequiresNullTerminator=*/false);
214156

215157
if (!Buf) {
216158
ErrorStr = Buf.getError().message();
217159
return Missing;
218160
}
219161

220-
NewModule->Buffer = &getModuleCache().getInMemoryModuleCache().addPCM(
162+
if ((ExpectedSize && ExpectedSize != Entry->getSize()) ||
163+
(ExpectedModTime && ExpectedModTime != Entry->getModificationTime())) {
164+
ErrorStr = "module file has a different size or mtime than expected";
165+
return OutOfDate;
166+
}
167+
168+
ModuleBuffer = &getModuleCache().getInMemoryModuleCache().addPCM(
221169
FileName, std::move(*Buf));
222170
}
223171

172+
// TODO: Make it so that ModuleFile is not tied to a FileEntry.
173+
if (!Entry && !((Entry = FileMgr.getVirtualFileRef(FileName, ExpectedSize,
174+
ExpectedModTime))))
175+
return Missing;
176+
177+
// Allocate a new module.
178+
auto NewModule = std::make_unique<ModuleFile>(Type, *Entry, Generation);
179+
NewModule->Index = Chain.size();
180+
NewModule->FileName = FileName.str();
181+
NewModule->ImportLoc = ImportLoc;
182+
NewModule->InputFilesValidationTimestamp = 0;
183+
184+
if (NewModule->Kind == MK_ImplicitModule) {
185+
std::string TimestampFilename =
186+
ModuleFile::getTimestampFilename(NewModule->FileName);
187+
llvm::vfs::Status Status;
188+
// A cached stat value would be fine as well.
189+
if (!FileMgr.getNoncachedStatValue(TimestampFilename, Status))
190+
NewModule->InputFilesValidationTimestamp =
191+
llvm::sys::toTimeT(Status.getLastModificationTime());
192+
}
193+
194+
NewModule->Buffer = ModuleBuffer;
195+
224196
// Initialize the stream.
225197
NewModule->Data = PCHContainerRdr.ExtractPCH(*NewModule->Buffer);
226198

@@ -231,7 +203,7 @@ ModuleManager::addModule(StringRef FileName, ModuleKind Type,
231203
return OutOfDate;
232204

233205
// We're keeping this module. Store it everywhere.
234-
Module = Modules[*Entry] = NewModule.get();
206+
Module = Modules[FileName] = NewModule.get();
235207

236208
updateModuleImports(*NewModule, ImportedBy, ImportLoc);
237209

@@ -277,7 +249,7 @@ void ModuleManager::removeModules(ModuleIterator First) {
277249

278250
// Delete the modules.
279251
for (ModuleIterator victim = First; victim != Last; ++victim)
280-
Modules.erase(victim->File);
252+
Modules.erase(victim->File.getName());
281253

282254
Chain.erase(Chain.begin() + (First - begin()), Chain.end());
283255
}
@@ -436,29 +408,6 @@ void ModuleManager::visit(llvm::function_ref<bool(ModuleFile &M)> Visitor,
436408
returnVisitState(std::move(State));
437409
}
438410

439-
bool ModuleManager::lookupModuleFile(StringRef FileName, off_t ExpectedSize,
440-
time_t ExpectedModTime,
441-
OptionalFileEntryRef &File) {
442-
if (FileName == "-") {
443-
File = expectedToOptional(FileMgr.getSTDIN());
444-
return false;
445-
}
446-
447-
// Open the file immediately to ensure there is no race between stat'ing and
448-
// opening the file.
449-
File = FileMgr.getOptionalFileRef(FileName, /*OpenFile=*/true,
450-
/*CacheFailure=*/false);
451-
452-
if (File &&
453-
((ExpectedSize && ExpectedSize != File->getSize()) ||
454-
(ExpectedModTime && ExpectedModTime != File->getModificationTime())))
455-
// Do not destroy File, as it may be referenced. If we need to rebuild it,
456-
// it will be destroyed by removeModules.
457-
return true;
458-
459-
return false;
460-
}
461-
462411
#ifndef NDEBUG
463412
namespace llvm {
464413

0 commit comments

Comments
 (0)