Skip to content

Commit 9d4837f

Browse files
authored
[clang][deps][modules] Allocate input file paths lazily (#114457)
This PR builds on top of #113984 and attempts to avoid allocating input file paths eagerly. Instead, the `InputFileInfo` type used by `ASTReader` now only holds `StringRef`s that point into the PCM file buffer, and the full input file paths get resolved on demand. The dependency scanner makes use of this in a bit of a roundabout way: `ModuleDeps` now only holds (an owning copy of) the short unresolved input file paths, which get resolved lazily. This can be a big win, I'm seeing up to a 5% speedup.
1 parent f895fc9 commit 9d4837f

29 files changed

+161
-151
lines changed

clang/include/clang/Serialization/ModuleFile.h

Lines changed: 7 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -62,15 +62,20 @@ enum ModuleKind {
6262

6363
/// The input file info that has been loaded from an AST file.
6464
struct InputFileInfo {
65-
std::string FilenameAsRequested;
66-
std::string Filename;
65+
StringRef UnresolvedImportedFilenameAsRequested;
66+
StringRef UnresolvedImportedFilename;
67+
6768
uint64_t ContentHash;
6869
off_t StoredSize;
6970
time_t StoredTime;
7071
bool Overridden;
7172
bool Transient;
7273
bool TopLevel;
7374
bool ModuleMap;
75+
76+
bool isValid() const {
77+
return !UnresolvedImportedFilenameAsRequested.empty();
78+
}
7479
};
7580

7681
/// The input file that has been loaded from this AST file, along with

clang/include/clang/Tooling/DependencyScanning/ModuleDepCollector.h

Lines changed: 12 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -120,10 +120,6 @@ struct ModuleDeps {
120120
/// additionally appear in \c FileDeps as a dependency.
121121
std::string ClangModuleMapFile;
122122

123-
/// A collection of absolute paths to files that this module directly depends
124-
/// on, not including transitive dependencies.
125-
llvm::StringSet<> FileDeps;
126-
127123
/// A collection of absolute paths to module map files that this module needs
128124
/// to know about. The ordering is significant.
129125
std::vector<std::string> ModuleMapFileDeps;
@@ -143,13 +139,25 @@ struct ModuleDeps {
143139
/// an entity from this module is used.
144140
llvm::SmallVector<Module::LinkLibrary, 2> LinkLibraries;
145141

142+
/// Invokes \c Cb for all file dependencies of this module. Each provided
143+
/// \c StringRef is only valid within the individual callback invocation.
144+
void forEachFileDep(llvm::function_ref<void(StringRef)> Cb) const;
145+
146146
/// Get (or compute) the compiler invocation that can be used to build this
147147
/// module. Does not include argv[0].
148148
const std::vector<std::string> &getBuildArguments();
149149

150150
private:
151+
friend class ModuleDepCollector;
151152
friend class ModuleDepCollectorPP;
152153

154+
/// The base directory for relative paths in \c FileDeps.
155+
std::string FileDepsBaseDir;
156+
157+
/// A collection of paths to files that this module directly depends on, not
158+
/// including transitive dependencies.
159+
std::vector<std::string> FileDeps;
160+
153161
std::variant<std::monostate, CowCompilerInvocation, std::vector<std::string>>
154162
BuildInfo;
155163
};

clang/lib/Serialization/ASTReader.cpp

Lines changed: 17 additions & 24 deletions
Original file line numberDiff line numberDiff line change
@@ -2465,7 +2465,7 @@ InputFileInfo ASTReader::getInputFileInfo(ModuleFile &F, unsigned ID) {
24652465
return InputFileInfo();
24662466

24672467
// If we've already loaded this input file, return it.
2468-
if (!F.InputFileInfosLoaded[ID - 1].Filename.empty())
2468+
if (F.InputFileInfosLoaded[ID - 1].isValid())
24692469
return F.InputFileInfosLoaded[ID - 1];
24702470

24712471
// Go find this input file.
@@ -2502,21 +2502,11 @@ InputFileInfo ASTReader::getInputFileInfo(ModuleFile &F, unsigned ID) {
25022502
R.Transient = static_cast<bool>(Record[4]);
25032503
R.TopLevel = static_cast<bool>(Record[5]);
25042504
R.ModuleMap = static_cast<bool>(Record[6]);
2505-
std::tie(R.FilenameAsRequested, R.Filename) = [&]() {
2506-
uint16_t AsRequestedLength = Record[7];
2507-
2508-
StringRef NameAsRequestedRef = Blob.substr(0, AsRequestedLength);
2509-
StringRef NameRef = Blob.substr(AsRequestedLength);
2510-
2511-
std::string NameAsRequested =
2512-
ResolveImportedPathAndAllocate(PathBuf, NameAsRequestedRef, F);
2513-
std::string Name = ResolveImportedPathAndAllocate(PathBuf, NameRef, F);
2514-
2515-
if (Name.empty())
2516-
Name = NameAsRequested;
2517-
2518-
return std::make_pair(std::move(NameAsRequested), std::move(Name));
2519-
}();
2505+
uint16_t AsRequestedLength = Record[7];
2506+
R.UnresolvedImportedFilenameAsRequested = Blob.substr(0, AsRequestedLength);
2507+
R.UnresolvedImportedFilename = Blob.substr(AsRequestedLength);
2508+
if (R.UnresolvedImportedFilename.empty())
2509+
R.UnresolvedImportedFilename = R.UnresolvedImportedFilenameAsRequested;
25202510

25212511
Expected<llvm::BitstreamEntry> MaybeEntry = Cursor.advance();
25222512
if (!MaybeEntry) // FIXME this drops errors on the floor.
@@ -2568,7 +2558,8 @@ InputFile ASTReader::getInputFile(ModuleFile &F, unsigned ID, bool Complain) {
25682558
time_t StoredTime = FI.StoredTime;
25692559
bool Overridden = FI.Overridden;
25702560
bool Transient = FI.Transient;
2571-
StringRef Filename = FI.FilenameAsRequested;
2561+
auto Filename =
2562+
ResolveImportedPath(PathBuf, FI.UnresolvedImportedFilenameAsRequested, F);
25722563
uint64_t StoredContentHash = FI.ContentHash;
25732564

25742565
// For standard C++ modules, we don't need to check the inputs.
@@ -2584,17 +2575,17 @@ InputFile ASTReader::getInputFile(ModuleFile &F, unsigned ID, bool Complain) {
25842575
Overridden = false;
25852576
}
25862577

2587-
auto File = FileMgr.getOptionalFileRef(Filename, /*OpenFile=*/false);
2578+
auto File = FileMgr.getOptionalFileRef(*Filename, /*OpenFile=*/false);
25882579

25892580
// For an overridden file, create a virtual file with the stored
25902581
// size/timestamp.
25912582
if ((Overridden || Transient || SkipChecks) && !File)
2592-
File = FileMgr.getVirtualFileRef(Filename, StoredSize, StoredTime);
2583+
File = FileMgr.getVirtualFileRef(*Filename, StoredSize, StoredTime);
25932584

25942585
if (!File) {
25952586
if (Complain) {
25962587
std::string ErrorStr = "could not find file '";
2597-
ErrorStr += Filename;
2588+
ErrorStr += *Filename;
25982589
ErrorStr += "' referenced by AST file '";
25992590
ErrorStr += F.FileName;
26002591
ErrorStr += "'";
@@ -2614,7 +2605,7 @@ InputFile ASTReader::getInputFile(ModuleFile &F, unsigned ID, bool Complain) {
26142605
if ((!Overridden && !Transient) && !SkipChecks &&
26152606
SM.isFileOverridden(*File)) {
26162607
if (Complain)
2617-
Error(diag::err_fe_pch_file_overridden, Filename);
2608+
Error(diag::err_fe_pch_file_overridden, *Filename);
26182609

26192610
// After emitting the diagnostic, bypass the overriding file to recover
26202611
// (this creates a separate FileEntry).
@@ -2706,7 +2697,7 @@ InputFile ASTReader::getInputFile(ModuleFile &F, unsigned ID, bool Complain) {
27062697
// The top-level PCH is stale.
27072698
StringRef TopLevelPCHName(ImportStack.back()->FileName);
27082699
Diag(diag::err_fe_ast_file_modified)
2709-
<< Filename << moduleKindForDiagnostic(ImportStack.back()->Kind)
2700+
<< *Filename << moduleKindForDiagnostic(ImportStack.back()->Kind)
27102701
<< TopLevelPCHName << FileChange.Kind
27112702
<< (FileChange.Old && FileChange.New)
27122703
<< llvm::itostr(FileChange.Old.value_or(0))
@@ -2715,7 +2706,7 @@ InputFile ASTReader::getInputFile(ModuleFile &F, unsigned ID, bool Complain) {
27152706
// Print the import stack.
27162707
if (ImportStack.size() > 1) {
27172708
Diag(diag::note_pch_required_by)
2718-
<< Filename << ImportStack[0]->FileName;
2709+
<< *Filename << ImportStack[0]->FileName;
27192710
for (unsigned I = 1; I < ImportStack.size(); ++I)
27202711
Diag(diag::note_pch_required_by)
27212712
<< ImportStack[I-1]->FileName << ImportStack[I]->FileName;
@@ -2963,8 +2954,10 @@ ASTReader::ReadControlBlock(ModuleFile &F,
29632954
for (unsigned I = 0; I < N; ++I) {
29642955
bool IsSystem = I >= NumUserInputs;
29652956
InputFileInfo FI = getInputFileInfo(F, I + 1);
2957+
auto FilenameAsRequested = ResolveImportedPath(
2958+
PathBuf, FI.UnresolvedImportedFilenameAsRequested, F);
29662959
Listener->visitInputFile(
2967-
FI.FilenameAsRequested, IsSystem, FI.Overridden,
2960+
*FilenameAsRequested, IsSystem, FI.Overridden,
29682961
F.Kind == MK_ExplicitModule || F.Kind == MK_PrebuiltModule);
29692962
}
29702963
}

clang/lib/Tooling/DependencyScanning/ModuleDepCollector.cpp

Lines changed: 25 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -21,6 +21,16 @@ using namespace clang;
2121
using namespace tooling;
2222
using namespace dependencies;
2323

24+
void ModuleDeps::forEachFileDep(llvm::function_ref<void(StringRef)> Cb) const {
25+
SmallString<0> PathBuf;
26+
PathBuf.reserve(256);
27+
for (StringRef FileDep : FileDeps) {
28+
auto ResolvedFileDep =
29+
ASTReader::ResolveImportedPath(PathBuf, FileDep, FileDepsBaseDir);
30+
Cb(*ResolvedFileDep);
31+
}
32+
}
33+
2434
const std::vector<std::string> &ModuleDeps::getBuildArguments() {
2535
assert(!std::holds_alternative<std::monostate>(BuildInfo) &&
2636
"Using uninitialized ModuleDeps");
@@ -596,6 +606,7 @@ ModuleDepCollectorPP::handleTopLevelModule(const Module *M) {
596606
serialization::ModuleFile *MF =
597607
MDC.ScanInstance.getASTReader()->getModuleManager().lookup(
598608
*M->getASTFile());
609+
MD.FileDepsBaseDir = MF->BaseDirectory;
599610
MDC.ScanInstance.getASTReader()->visitInputFileInfos(
600611
*MF, /*IncludeSystem=*/true,
601612
[&](const serialization::InputFileInfo &IFI, bool IsSystem) {
@@ -604,25 +615,30 @@ ModuleDepCollectorPP::handleTopLevelModule(const Module *M) {
604615
// actual on-disk module map file that allowed inferring the module,
605616
// which is what we need for building the module explicitly
606617
// Let's ignore this file.
607-
if (StringRef(IFI.Filename).ends_with("__inferred_module.map"))
618+
if (IFI.UnresolvedImportedFilename.ends_with("__inferred_module.map"))
608619
return;
609-
MDC.addFileDep(MD, IFI.Filename);
620+
MDC.addFileDep(MD, IFI.UnresolvedImportedFilename);
610621
});
611622

612623
llvm::DenseSet<const Module *> SeenDeps;
613624
addAllSubmodulePrebuiltDeps(M, MD, SeenDeps);
614625
addAllSubmoduleDeps(M, MD, SeenDeps);
615626
addAllAffectingClangModules(M, MD, SeenDeps);
616627

628+
SmallString<0> PathBuf;
629+
PathBuf.reserve(256);
617630
MDC.ScanInstance.getASTReader()->visitInputFileInfos(
618631
*MF, /*IncludeSystem=*/true,
619632
[&](const serialization::InputFileInfo &IFI, bool IsSystem) {
620633
if (!(IFI.TopLevel && IFI.ModuleMap))
621634
return;
622-
if (StringRef(IFI.FilenameAsRequested)
623-
.ends_with("__inferred_module.map"))
635+
if (IFI.UnresolvedImportedFilenameAsRequested.ends_with(
636+
"__inferred_module.map"))
624637
return;
625-
MD.ModuleMapFileDeps.emplace_back(IFI.FilenameAsRequested);
638+
auto ResolvedFilenameAsRequested = ASTReader::ResolveImportedPath(
639+
PathBuf, IFI.UnresolvedImportedFilenameAsRequested,
640+
MF->BaseDirectory);
641+
MD.ModuleMapFileDeps.emplace_back(*ResolvedFilenameAsRequested);
626642
});
627643

628644
CowCompilerInvocation CI =
@@ -779,23 +795,16 @@ static StringRef makeAbsoluteAndPreferred(CompilerInstance &CI, StringRef Path,
779795
void ModuleDepCollector::addFileDep(StringRef Path) {
780796
if (IsStdModuleP1689Format) {
781797
// Within P1689 format, we don't want all the paths to be absolute path
782-
// since it may violate the tranditional make style dependencies info.
783-
FileDeps.push_back(std::string(Path));
798+
// since it may violate the traditional make style dependencies info.
799+
FileDeps.emplace_back(Path);
784800
return;
785801
}
786802

787803
llvm::SmallString<256> Storage;
788804
Path = makeAbsoluteAndPreferred(ScanInstance, Path, Storage);
789-
FileDeps.push_back(std::string(Path));
805+
FileDeps.emplace_back(Path);
790806
}
791807

792808
void ModuleDepCollector::addFileDep(ModuleDeps &MD, StringRef Path) {
793-
if (IsStdModuleP1689Format) {
794-
MD.FileDeps.insert(Path);
795-
return;
796-
}
797-
798-
llvm::SmallString<256> Storage;
799-
Path = makeAbsoluteAndPreferred(ScanInstance, Path, Storage);
800-
MD.FileDeps.insert(Path);
809+
MD.FileDeps.emplace_back(Path);
801810
}

clang/test/ClangScanDeps/diagnostics.c

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -34,8 +34,8 @@ module mod { header "mod.h" }
3434
// CHECK: ],
3535
// CHECK-NEXT: "context-hash": "[[HASH_MOD:.*]]",
3636
// CHECK-NEXT: "file-deps": [
37+
// CHECK-NEXT: "[[PREFIX]]/module.modulemap",
3738
// CHECK-NEXT: "[[PREFIX]]/mod.h"
38-
// CHECK-NEXT: "[[PREFIX]]/module.modulemap"
3939
// CHECK-NEXT: ],
4040
// CHECK-NEXT: "link-libraries": [],
4141
// CHECK-NEXT: "name": "mod"

clang/test/ClangScanDeps/header-search-pruning-transitive.c

Lines changed: 9 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -72,8 +72,8 @@ module X { header "X.h" }
7272
// CHECK: ],
7373
// CHECK-NEXT: "context-hash": "[[HASH_X:.*]]",
7474
// CHECK-NEXT: "file-deps": [
75-
// CHECK-NEXT: "[[PREFIX]]/X.h",
76-
// CHECK-NEXT: "[[PREFIX]]/module.modulemap"
75+
// CHECK-NEXT: "[[PREFIX]]/module.modulemap",
76+
// CHECK-NEXT: "[[PREFIX]]/X.h"
7777
// CHECK-NEXT: ],
7878
// CHECK-NEXT: "link-libraries": [],
7979
// CHECK-NEXT: "name": "X"
@@ -85,11 +85,11 @@ module X { header "X.h" }
8585
// CHECK: ],
8686
// CHECK-NEXT: "context-hash": "[[HASH_Y_WITH_A]]",
8787
// CHECK-NEXT: "file-deps": [
88+
// CHECK-NEXT: "[[PREFIX]]/module.modulemap",
8889
// CHECK-NEXT: "[[PREFIX]]/Y.h",
89-
// CHECK-NEXT: "[[PREFIX]]/a/a.h",
9090
// CHECK-NEXT: "[[PREFIX]]/begin/begin.h",
91-
// CHECK-NEXT: "[[PREFIX]]/end/end.h",
92-
// CHECK-NEXT: "[[PREFIX]]/module.modulemap"
91+
// CHECK-NEXT: "[[PREFIX]]/a/a.h",
92+
// CHECK-NEXT: "[[PREFIX]]/end/end.h"
9393
// CHECK-NEXT: ],
9494
// CHECK-NEXT: "link-libraries": [],
9595
// CHECK-NEXT: "name": "Y"
@@ -128,8 +128,8 @@ module X { header "X.h" }
128128
// also has a different context hash from the first version of module X.
129129
// CHECK-NOT: "context-hash": "[[HASH_X]]",
130130
// CHECK: "file-deps": [
131-
// CHECK-NEXT: "[[PREFIX]]/X.h",
132-
// CHECK-NEXT: "[[PREFIX]]/module.modulemap"
131+
// CHECK-NEXT: "[[PREFIX]]/module.modulemap",
132+
// CHECK-NEXT: "[[PREFIX]]/X.h"
133133
// CHECK-NEXT: ],
134134
// CHECK-NEXT: "link-libraries": [],
135135
// CHECK-NEXT: "name": "X"
@@ -141,10 +141,10 @@ module X { header "X.h" }
141141
// CHECK: ],
142142
// CHECK-NEXT: "context-hash": "[[HASH_Y_WITHOUT_A]]",
143143
// CHECK-NEXT: "file-deps": [
144+
// CHECK-NEXT: "[[PREFIX]]/module.modulemap",
144145
// CHECK-NEXT: "[[PREFIX]]/Y.h",
145146
// CHECK-NEXT: "[[PREFIX]]/begin/begin.h",
146-
// CHECK-NEXT: "[[PREFIX]]/end/end.h",
147-
// CHECK-NEXT: "[[PREFIX]]/module.modulemap"
147+
// CHECK-NEXT: "[[PREFIX]]/end/end.h"
148148
// CHECK-NEXT: ],
149149
// CHECK-NEXT: "link-libraries": [],
150150
// CHECK-NEXT: "name": "Y"

clang/test/ClangScanDeps/link-libraries.c

Lines changed: 6 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -44,8 +44,8 @@ module transitive {
4444
// CHECK: ],
4545
// CHECK-NEXT: "context-hash": "{{.*}}",
4646
// CHECK-NEXT: "file-deps": [
47+
// CHECK-NEXT: "[[PREFIX]]/Inputs/frameworks/module.modulemap",
4748
// CHECK-NEXT: "[[PREFIX]]/Inputs/frameworks/Framework.framework/Headers/Framework.h"
48-
// CHECK-NEXT: "[[PREFIX]]/Inputs/frameworks/module.modulemap"
4949
// CHECK-NEXT: ],
5050
// CHECK-NEXT: "link-libraries": [
5151
// CHECK-NEXT: {
@@ -67,8 +67,8 @@ module transitive {
6767
// CHECK: ],
6868
// CHECK-NEXT: "context-hash": "{{.*}}",
6969
// CHECK-NEXT: "file-deps": [
70+
// CHECK-NEXT: "[[PREFIX]]/module.modulemap",
7071
// CHECK-NEXT: "[[PREFIX]]/direct.h"
71-
// CHECK-NEXT: "[[PREFIX]]/module.modulemap"
7272
// CHECK-NEXT: ],
7373
// CHECK-NEXT: "link-libraries": [],
7474
// CHECK-NEXT: "name": "direct"
@@ -89,10 +89,10 @@ module transitive {
8989
// CHECK: ],
9090
// CHECK-NEXT: "context-hash": "{{.*}}",
9191
// CHECK-NEXT: "file-deps": [
92+
// CHECK-NEXT: "[[PREFIX]]/module.modulemap",
93+
// CHECK-NEXT: "[[PREFIX]]/root.h",
94+
// CHECK-NEXT: "[[PREFIX]]/root/textual.h",
9295
// CHECK-NEXT: "[[PREFIX]]/Inputs/frameworks/module.modulemap"
93-
// CHECK-NEXT: "[[PREFIX]]/module.modulemap"
94-
// CHECK-NEXT: "[[PREFIX]]/root.h"
95-
// CHECK-NEXT: "[[PREFIX]]/root/textual.h"
9696
// CHECK-NEXT: ],
9797
// CHECK-NEXT: "link-libraries": [],
9898
// CHECK-NEXT: "name": "root"
@@ -104,7 +104,7 @@ module transitive {
104104
// CHECK: ],
105105
// CHECK-NEXT: "context-hash": "{{.*}}",
106106
// CHECK-NEXT: "file-deps": [
107-
// CHECK-NEXT: "[[PREFIX]]/module.modulemap"
107+
// CHECK-NEXT: "[[PREFIX]]/module.modulemap",
108108
// CHECK-NEXT: "[[PREFIX]]/transitive.h"
109109
// CHECK-NEXT: ],
110110
// CHECK-NEXT: "link-libraries": [

clang/test/ClangScanDeps/modules-context-hash.c

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -34,9 +34,9 @@
3434
// CHECK: ],
3535
// CHECK-NEXT: "context-hash": "[[HASH_MOD_A:.*]]",
3636
// CHECK-NEXT: "file-deps": [
37-
// CHECK-NEXT: "[[PREFIX]]/a/dep.h",
37+
// CHECK-NEXT: "[[PREFIX]]/module.modulemap",
3838
// CHECK-NEXT: "[[PREFIX]]/mod.h",
39-
// CHECK-NEXT: "[[PREFIX]]/module.modulemap"
39+
// CHECK-NEXT: "[[PREFIX]]/a/dep.h"
4040
// CHECK-NEXT: ],
4141
// CHECK-NEXT: "link-libraries": [],
4242
// CHECK-NEXT: "name": "mod"
@@ -72,9 +72,9 @@
7272
// CHECK: ],
7373
// CHECK-NOT: "context-hash": "[[HASH_MOD_A]]",
7474
// CHECK: "file-deps": [
75-
// CHECK-NEXT: "[[PREFIX]]/b/dep.h",
75+
// CHECK-NEXT: "[[PREFIX]]/module.modulemap",
7676
// CHECK-NEXT: "[[PREFIX]]/mod.h",
77-
// CHECK-NEXT: "[[PREFIX]]/module.modulemap"
77+
// CHECK-NEXT: "[[PREFIX]]/b/dep.h"
7878
// CHECK-NEXT: ],
7979
// CHECK-NEXT: "link-libraries": [],
8080
// CHECK-NEXT: "name": "mod"

clang/test/ClangScanDeps/modules-dep-args.c

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -58,8 +58,8 @@ module Direct { header "direct.h" }
5858
// CHECK: ],
5959
// CHECK-NEXT: "context-hash": "{{.*}}",
6060
// CHECK-NEXT: "file-deps": [
61-
// CHECK-NEXT: "[[PREFIX]]/direct.h",
62-
// CHECK-NEXT: "[[PREFIX]]/module.modulemap"
61+
// CHECK-NEXT: "[[PREFIX]]/module.modulemap",
62+
// CHECK-NEXT: "[[PREFIX]]/direct.h"
6363
// CHECK-NEXT: ],
6464
// CHECK-NEXT: "link-libraries": [],
6565
// CHECK-NEXT: "name": "Direct"

0 commit comments

Comments
 (0)