Skip to content

Commit af2d11b

Browse files
committed
[C++20][Modules] Implement include translation.
This addresses [cpp.include]/7 (when encountering #include header-name) If the header identified by the header-name denotes an importable header, it is implementation-defined whether the #include preprocessing directive is instead replaced by an import directive. In this implementation, include translation is performed _only_ for headers in the Global Module fragment, so: ``` module; #include "will-be-translated.h" // IFF the header unit is available. export module M; #include "will-not-be-translated.h" // even if the header unit is available ``` The reasoning is that, in general, includes in the module purview would not be validly translatable (they would have to immediately follow the module decl and without any other intervening decls). Otherwise that would violate the rules on contiguous import directives. This would be quite complex to track in the preprocessor, and for relatively little gain (the user can 'import "will-not-be-translated.h";' instead.) TODO: This is one area where it becomes increasingly difficult to disambiguate clang modules in C++ from C++ standard modules. That needs to be addressed in both the driver and the FE. Differential Revision: https://reviews.llvm.org/D128981
1 parent 3085e42 commit af2d11b

File tree

5 files changed

+266
-28
lines changed

5 files changed

+266
-28
lines changed

clang/include/clang/Lex/Preprocessor.h

Lines changed: 63 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -385,6 +385,7 @@ class Preprocessor {
385385

386386
bool atTopLevel() { return S <= 0; }
387387
bool afterImportSeq() { return S == AfterImportSeq; }
388+
bool afterTopLevelSeq() { return S == AfterTopLevelTokenSeq; }
388389

389390
private:
390391
State S;
@@ -397,6 +398,67 @@ class Preprocessor {
397398
/// Our current position within a C++20 import-seq.
398399
ImportSeq ImportSeqState = ImportSeq::AfterTopLevelTokenSeq;
399400

401+
/// Track whether we are in a Global Module Fragment
402+
class TrackGMF {
403+
public:
404+
enum GMFState : int {
405+
GMFActive = 1,
406+
MaybeGMF = 0,
407+
BeforeGMFIntroducer = -1,
408+
GMFAbsentOrEnded = -2,
409+
};
410+
411+
TrackGMF(GMFState S) : S(S) {}
412+
413+
/// Saw a semicolon.
414+
void handleSemi() {
415+
// If it is immediately after the first instance of the module keyword,
416+
// then that introduces the GMF.
417+
if (S == MaybeGMF)
418+
S = GMFActive;
419+
}
420+
421+
/// Saw an 'export' identifier.
422+
void handleExport() {
423+
// The presence of an 'export' keyword always ends or excludes a GMF.
424+
S = GMFAbsentOrEnded;
425+
}
426+
427+
/// Saw an 'import' identifier.
428+
void handleImport(bool AfterTopLevelTokenSeq) {
429+
// If we see this before any 'module' kw, then we have no GMF.
430+
if (AfterTopLevelTokenSeq && S == BeforeGMFIntroducer)
431+
S = GMFAbsentOrEnded;
432+
}
433+
434+
/// Saw a 'module' identifier.
435+
void handleModule(bool AfterTopLevelTokenSeq) {
436+
// This was the first module identifier and not preceded by any token
437+
// that would exclude a GMF. It could begin a GMF, but only if directly
438+
// followed by a semicolon.
439+
if (AfterTopLevelTokenSeq && S == BeforeGMFIntroducer)
440+
S = MaybeGMF;
441+
else
442+
S = GMFAbsentOrEnded;
443+
}
444+
445+
/// Saw any other token.
446+
void handleMisc() {
447+
// We saw something other than ; after the 'module' kw, so not a GMF.
448+
if (S == MaybeGMF)
449+
S = GMFAbsentOrEnded;
450+
}
451+
452+
bool inGMF() { return S == GMFActive; }
453+
454+
private:
455+
/// Track the transitions into and out of a Global Module Fragment,
456+
/// if one is present.
457+
GMFState S;
458+
};
459+
460+
TrackGMF TrackGMFState = TrackGMF::BeforeGMFIntroducer;
461+
400462
/// Whether the module import expects an identifier next. Otherwise,
401463
/// it expects a '.' or ';'.
402464
bool ModuleImportExpectsIdentifier = false;
@@ -2414,6 +2476,7 @@ class Preprocessor {
24142476
None,
24152477
ModuleBegin,
24162478
ModuleImport,
2479+
HeaderUnitImport,
24172480
SkippedModuleImport,
24182481
Failure,
24192482
} Kind;

clang/lib/Lex/PPDirectives.cpp

Lines changed: 66 additions & 24 deletions
Original file line numberDiff line numberDiff line change
@@ -1983,6 +1983,10 @@ void Preprocessor::HandleIncludeDirective(SourceLocation HashLoc,
19831983
EnterAnnotationToken(SourceRange(HashLoc, EndLoc),
19841984
tok::annot_module_begin, Action.ModuleForHeader);
19851985
break;
1986+
case ImportAction::HeaderUnitImport:
1987+
EnterAnnotationToken(SourceRange(HashLoc, EndLoc), tok::annot_header_unit,
1988+
Action.ModuleForHeader);
1989+
break;
19861990
case ImportAction::ModuleImport:
19871991
EnterAnnotationToken(SourceRange(HashLoc, EndLoc),
19881992
tok::annot_module_include, Action.ModuleForHeader);
@@ -2191,6 +2195,17 @@ Preprocessor::ImportAction Preprocessor::HandleHeaderIncludeOrImport(
21912195
// known to have no effect beyond its effect on module visibility -- that is,
21922196
// if it's got an include guard that is already defined, set to Import if it
21932197
// is a modular header we've already built and should import.
2198+
2199+
// For C++20 Modules
2200+
// [cpp.include]/7 If the header identified by the header-name denotes an
2201+
// importable header, it is implementation-defined whether the #include
2202+
// preprocessing directive is instead replaced by an import directive.
2203+
// For this implementation, the translation is permitted when we are parsing
2204+
// the Global Module Fragment, and not otherwise (the cases where it would be
2205+
// valid to replace an include with an import are highly constrained once in
2206+
// named module purview; this choice avoids considerable complexity in
2207+
// determining valid cases).
2208+
21942209
enum { Enter, Import, Skip, IncludeLimitReached } Action = Enter;
21952210

21962211
if (PPOpts->SingleFileParseMode)
@@ -2203,13 +2218,34 @@ Preprocessor::ImportAction Preprocessor::HandleHeaderIncludeOrImport(
22032218
alreadyIncluded(*File))
22042219
Action = IncludeLimitReached;
22052220

2221+
bool MaybeTranslateInclude = Action == Enter && File && SuggestedModule &&
2222+
!isForModuleBuilding(SuggestedModule.getModule(),
2223+
getLangOpts().CurrentModule,
2224+
getLangOpts().ModuleName);
2225+
2226+
// FIXME: We do not have a good way to disambiguate C++ clang modules from
2227+
// C++ standard modules (other than use/non-use of Header Units).
2228+
Module *SM = SuggestedModule.getModule();
2229+
// Maybe a usable Header Unit
2230+
bool UsableHeaderUnit = false;
2231+
if (getLangOpts().CPlusPlusModules && SM && SM->isHeaderUnit()) {
2232+
if (TrackGMFState.inGMF() || IsImportDecl)
2233+
UsableHeaderUnit = true;
2234+
else if (!IsImportDecl) {
2235+
// This is a Header Unit that we do not include-translate
2236+
SuggestedModule = ModuleMap::KnownHeader();
2237+
SM = nullptr;
2238+
}
2239+
}
2240+
// Maybe a usable clang header module.
2241+
bool UsableHeaderModule =
2242+
(getLangOpts().CPlusPlusModules || getLangOpts().Modules) && SM &&
2243+
!SM->isHeaderUnit();
2244+
22062245
// Determine whether we should try to import the module for this #include, if
22072246
// there is one. Don't do so if precompiled module support is disabled or we
22082247
// are processing this module textually (because we're building the module).
2209-
if (Action == Enter && File && SuggestedModule && getLangOpts().Modules &&
2210-
!isForModuleBuilding(SuggestedModule.getModule(),
2211-
getLangOpts().CurrentModule,
2212-
getLangOpts().ModuleName)) {
2248+
if (MaybeTranslateInclude && (UsableHeaderUnit || UsableHeaderModule)) {
22132249
// If this include corresponds to a module but that module is
22142250
// unavailable, diagnose the situation and bail out.
22152251
// FIXME: Remove this; loadModule does the same check (but produces
@@ -2226,7 +2262,7 @@ Preprocessor::ImportAction Preprocessor::HandleHeaderIncludeOrImport(
22262262
// FIXME: Should we have a second loadModule() overload to avoid this
22272263
// extra lookup step?
22282264
SmallVector<std::pair<IdentifierInfo *, SourceLocation>, 2> Path;
2229-
for (Module *Mod = SuggestedModule.getModule(); Mod; Mod = Mod->Parent)
2265+
for (Module *Mod = SM; Mod; Mod = Mod->Parent)
22302266
Path.push_back(std::make_pair(getIdentifierInfo(Mod->Name),
22312267
FilenameTok.getLocation()));
22322268
std::reverse(Path.begin(), Path.end());
@@ -2293,17 +2329,23 @@ Preprocessor::ImportAction Preprocessor::HandleHeaderIncludeOrImport(
22932329
// Ask HeaderInfo if we should enter this #include file. If not, #including
22942330
// this file will have no effect.
22952331
if (Action == Enter && File &&
2296-
!HeaderInfo.ShouldEnterIncludeFile(
2297-
*this, &File->getFileEntry(), EnterOnce, getLangOpts().Modules,
2298-
SuggestedModule.getModule(), IsFirstIncludeOfFile)) {
2332+
!HeaderInfo.ShouldEnterIncludeFile(*this, &File->getFileEntry(),
2333+
EnterOnce, getLangOpts().Modules, SM,
2334+
IsFirstIncludeOfFile)) {
2335+
// C++ standard modules:
2336+
// If we are not in the GMF, then we textually include only
2337+
// clang modules:
22992338
// Even if we've already preprocessed this header once and know that we
23002339
// don't need to see its contents again, we still need to import it if it's
23012340
// modular because we might not have imported it from this submodule before.
23022341
//
23032342
// FIXME: We don't do this when compiling a PCH because the AST
23042343
// serialization layer can't cope with it. This means we get local
23052344
// submodule visibility semantics wrong in that case.
2306-
Action = (SuggestedModule && !getLangOpts().CompilingPCH) ? Import : Skip;
2345+
if (UsableHeaderUnit && !getLangOpts().CompilingPCH)
2346+
Action = TrackGMFState.inGMF() ? Import : Skip;
2347+
else
2348+
Action = (SuggestedModule && !getLangOpts().CompilingPCH) ? Import : Skip;
23072349
}
23082350

23092351
// Check for circular inclusion of the main file.
@@ -2440,8 +2482,8 @@ Preprocessor::ImportAction Preprocessor::HandleHeaderIncludeOrImport(
24402482
switch (Action) {
24412483
case Skip:
24422484
// If we don't need to enter the file, stop now.
2443-
if (Module *M = SuggestedModule.getModule())
2444-
return {ImportAction::SkippedModuleImport, M};
2485+
if (SM)
2486+
return {ImportAction::SkippedModuleImport, SM};
24452487
return {ImportAction::None};
24462488

24472489
case IncludeLimitReached:
@@ -2451,16 +2493,15 @@ Preprocessor::ImportAction Preprocessor::HandleHeaderIncludeOrImport(
24512493

24522494
case Import: {
24532495
// If this is a module import, make it visible if needed.
2454-
Module *M = SuggestedModule.getModule();
2455-
assert(M && "no module to import");
2496+
assert(SM && "no module to import");
24562497

2457-
makeModuleVisible(M, EndLoc);
2498+
makeModuleVisible(SM, EndLoc);
24582499

24592500
if (IncludeTok.getIdentifierInfo()->getPPKeywordID() ==
24602501
tok::pp___include_macros)
24612502
return {ImportAction::None};
24622503

2463-
return {ImportAction::ModuleImport, M};
2504+
return {ImportAction::ModuleImport, SM};
24642505
}
24652506

24662507
case Enter:
@@ -2492,13 +2533,14 @@ Preprocessor::ImportAction Preprocessor::HandleHeaderIncludeOrImport(
24922533
return {ImportAction::None};
24932534

24942535
// Determine if we're switching to building a new submodule, and which one.
2495-
if (auto *M = SuggestedModule.getModule()) {
2496-
if (M->getTopLevelModule()->ShadowingModule) {
2536+
// This does not apply for C++20 modules header units.
2537+
if (SM && !SM->isHeaderUnit()) {
2538+
if (SM->getTopLevelModule()->ShadowingModule) {
24972539
// We are building a submodule that belongs to a shadowed module. This
24982540
// means we find header files in the shadowed module.
2499-
Diag(M->DefinitionLoc, diag::err_module_build_shadowed_submodule)
2500-
<< M->getFullModuleName();
2501-
Diag(M->getTopLevelModule()->ShadowingModule->DefinitionLoc,
2541+
Diag(SM->DefinitionLoc, diag::err_module_build_shadowed_submodule)
2542+
<< SM->getFullModuleName();
2543+
Diag(SM->getTopLevelModule()->ShadowingModule->DefinitionLoc,
25022544
diag::note_previous_definition);
25032545
return {ImportAction::None};
25042546
}
@@ -2511,22 +2553,22 @@ Preprocessor::ImportAction Preprocessor::HandleHeaderIncludeOrImport(
25112553
// that PCH, which means we should enter the submodule. We need to teach
25122554
// the AST serialization layer to deal with the resulting AST.
25132555
if (getLangOpts().CompilingPCH &&
2514-
isForModuleBuilding(M, getLangOpts().CurrentModule,
2556+
isForModuleBuilding(SM, getLangOpts().CurrentModule,
25152557
getLangOpts().ModuleName))
25162558
return {ImportAction::None};
25172559

25182560
assert(!CurLexerSubmodule && "should not have marked this as a module yet");
2519-
CurLexerSubmodule = M;
2561+
CurLexerSubmodule = SM;
25202562

25212563
// Let the macro handling code know that any future macros are within
25222564
// the new submodule.
2523-
EnterSubmodule(M, EndLoc, /*ForPragma*/false);
2565+
EnterSubmodule(SM, EndLoc, /*ForPragma*/ false);
25242566

25252567
// Let the parser know that any future declarations are within the new
25262568
// submodule.
25272569
// FIXME: There's no point doing this if we're handling a #__include_macros
25282570
// directive.
2529-
return {ImportAction::ModuleBegin, M};
2571+
return {ImportAction::ModuleBegin, SM};
25302572
}
25312573

25322574
assert(!IsImportDecl && "failed to diagnose missing module for import decl");

clang/lib/Lex/Preprocessor.cpp

Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -941,6 +941,9 @@ void Preprocessor::Lex(Token &Result) {
941941

942942
// Update ImportSeqState to track our position within a C++20 import-seq
943943
// if this token is being produced as a result of phase 4 of translation.
944+
// Update TrackGMFState to decide if we are currently in a Global Module
945+
// Fragment. GMF state updates should precede ImportSeq ones, since GMF state
946+
// depends on the prevailing ImportSeq state in two cases.
944947
if (getLangOpts().CPlusPlusModules && LexLevel == 1 &&
945948
!Result.getFlag(Token::IsReinjected)) {
946949
switch (Result.getKind()) {
@@ -953,18 +956,24 @@ void Preprocessor::Lex(Token &Result) {
953956
case tok::r_brace:
954957
ImportSeqState.handleCloseBrace();
955958
break;
959+
// This token is injected to represent the translation of '#include "a.h"'
960+
// into "import a.h;". Mimic the notional ';'.
961+
case tok::annot_module_include:
956962
case tok::semi:
963+
TrackGMFState.handleSemi();
957964
ImportSeqState.handleSemi();
958965
break;
959966
case tok::header_name:
960967
case tok::annot_header_unit:
961968
ImportSeqState.handleHeaderName();
962969
break;
963970
case tok::kw_export:
971+
TrackGMFState.handleExport();
964972
ImportSeqState.handleExport();
965973
break;
966974
case tok::identifier:
967975
if (Result.getIdentifierInfo()->isModulesImport()) {
976+
TrackGMFState.handleImport(ImportSeqState.afterTopLevelSeq());
968977
ImportSeqState.handleImport();
969978
if (ImportSeqState.afterImportSeq()) {
970979
ModuleImportLoc = Result.getLocation();
@@ -973,9 +982,13 @@ void Preprocessor::Lex(Token &Result) {
973982
CurLexerKind = CLK_LexAfterModuleImport;
974983
}
975984
break;
985+
} else if (Result.getIdentifierInfo() == getIdentifierInfo("module")) {
986+
TrackGMFState.handleModule(ImportSeqState.afterTopLevelSeq());
987+
break;
976988
}
977989
LLVM_FALLTHROUGH;
978990
default:
991+
TrackGMFState.handleMisc();
979992
ImportSeqState.handleMisc();
980993
break;
981994
}
@@ -1222,6 +1235,7 @@ bool Preprocessor::LexAfterModuleImport(Token &Result) {
12221235
LLVM_FALLTHROUGH;
12231236

12241237
case ImportAction::ModuleImport:
1238+
case ImportAction::HeaderUnitImport:
12251239
case ImportAction::SkippedModuleImport:
12261240
// We chose to import (or textually enter) the file. Convert the
12271241
// header-name token into a header unit annotation token.

clang/lib/Parse/Parser.cpp

Lines changed: 14 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -663,12 +663,22 @@ bool Parser::ParseTopLevelDecl(DeclGroupPtrTy &Result,
663663
return false;
664664
}
665665

666-
case tok::annot_module_include:
667-
Actions.ActOnModuleInclude(Tok.getLocation(),
668-
reinterpret_cast<Module *>(
669-
Tok.getAnnotationValue()));
666+
case tok::annot_module_include: {
667+
auto Loc = Tok.getLocation();
668+
Module *Mod = reinterpret_cast<Module *>(Tok.getAnnotationValue());
669+
// FIXME: We need a better way to disambiguate C++ clang modules and
670+
// standard C++ modules.
671+
if (!getLangOpts().CPlusPlusModules || !Mod->isHeaderUnit())
672+
Actions.ActOnModuleInclude(Loc, Mod);
673+
else {
674+
DeclResult Import =
675+
Actions.ActOnModuleImport(Loc, SourceLocation(), Loc, Mod);
676+
Decl *ImportDecl = Import.isInvalid() ? nullptr : Import.get();
677+
Result = Actions.ConvertDeclToDeclGroup(ImportDecl);
678+
}
670679
ConsumeAnnotationToken();
671680
return false;
681+
}
672682

673683
case tok::annot_module_begin:
674684
Actions.ActOnModuleBegin(Tok.getLocation(), reinterpret_cast<Module *>(

0 commit comments

Comments
 (0)