Skip to content

Commit 2e7b95e

Browse files
authored
[Safe Buffers] Serialize unsafe_buffer_usage pragmas (llvm#92031)
The commit adds serialization and de-serialization implementations for the stored regions. Basically, the serialized representation of the regions of a PP is a (ordered) sequence of source location encodings. For de-serialization, regions from loaded files are stored by their ASTs. When later one queries if a loaded location L is in an opt-out region, PP looks up the regions of the loaded AST where L is at. (Background if helps: a pair of `#pragma clang unsafe_buffer_usage begin/end` pragmas marks a warning-opt-out region. The begin and end locations (opt-out regions) are stored in preprocessor instances (PP) and will be queried by the `-Wunsafe-buffer-usage` analyzer.) The reported issue at upstream: llvm#90501 rdar://124035402
1 parent 53dbc1f commit 2e7b95e

12 files changed

+477
-24
lines changed

clang/include/clang/Basic/SourceManager.h

+5
Original file line numberDiff line numberDiff line change
@@ -1676,6 +1676,11 @@ class SourceManager : public RefCountedBase<SourceManager> {
16761676
isInTheSameTranslationUnit(std::pair<FileID, unsigned> &LOffs,
16771677
std::pair<FileID, unsigned> &ROffs) const;
16781678

1679+
/// \param Loc a source location in a loaded AST (of a PCH/Module file).
1680+
/// \returns a FileID uniquely identifies the AST of a loaded
1681+
/// module/PCH where `Loc` is at.
1682+
FileID getUniqueLoadedASTFileID(SourceLocation Loc) const;
1683+
16791684
/// Determines whether the two decomposed source location is in the same TU.
16801685
bool isInTheSameTranslationUnitImpl(
16811686
const std::pair<FileID, unsigned> &LOffs,

clang/include/clang/Lex/Preprocessor.h

+47-5
Original file line numberDiff line numberDiff line change
@@ -2883,11 +2883,41 @@ class Preprocessor {
28832883
/// otherwise.
28842884
SourceLocation CurrentSafeBufferOptOutStart; // It is used to report the start location of an never-closed region.
28852885

2886-
// An ordered sequence of "-Wunsafe-buffer-usage" opt-out regions in one
2887-
// translation unit. Each region is represented by a pair of start and end
2888-
// locations. A region is "open" if its' start and end locations are
2889-
// identical.
2890-
SmallVector<std::pair<SourceLocation, SourceLocation>, 8> SafeBufferOptOutMap;
2886+
using SafeBufferOptOutRegionsTy =
2887+
SmallVector<std::pair<SourceLocation, SourceLocation>, 16>;
2888+
// An ordered sequence of "-Wunsafe-buffer-usage" opt-out regions in this
2889+
// translation unit. Each region is represented by a pair of start and
2890+
// end locations.
2891+
SafeBufferOptOutRegionsTy SafeBufferOptOutMap;
2892+
2893+
// The "-Wunsafe-buffer-usage" opt-out regions in loaded ASTs. We use the
2894+
// following structure to manage them by their ASTs.
2895+
struct {
2896+
// A map from unique IDs to region maps of loaded ASTs. The ID identifies a
2897+
// loaded AST. See `SourceManager::getUniqueLoadedASTID`.
2898+
llvm::DenseMap<FileID, SafeBufferOptOutRegionsTy> LoadedRegions;
2899+
2900+
// Returns a reference to the safe buffer opt-out regions of the loaded
2901+
// AST where `Loc` belongs to. (Construct if absent)
2902+
SafeBufferOptOutRegionsTy &
2903+
findAndConsLoadedOptOutMap(SourceLocation Loc, SourceManager &SrcMgr) {
2904+
return LoadedRegions[SrcMgr.getUniqueLoadedASTFileID(Loc)];
2905+
}
2906+
2907+
// Returns a reference to the safe buffer opt-out regions of the loaded
2908+
// AST where `Loc` belongs to. (This const function returns nullptr if
2909+
// absent.)
2910+
const SafeBufferOptOutRegionsTy *
2911+
lookupLoadedOptOutMap(SourceLocation Loc,
2912+
const SourceManager &SrcMgr) const {
2913+
FileID FID = SrcMgr.getUniqueLoadedASTFileID(Loc);
2914+
auto Iter = LoadedRegions.find(FID);
2915+
2916+
if (Iter == LoadedRegions.end())
2917+
return nullptr;
2918+
return &Iter->getSecond();
2919+
}
2920+
} LoadedSafeBufferOptOutMap;
28912921

28922922
public:
28932923
/// \return true iff the given `Loc` is in a "-Wunsafe-buffer-usage" opt-out
@@ -2918,6 +2948,18 @@ class Preprocessor {
29182948
/// opt-out region
29192949
bool isPPInSafeBufferOptOutRegion(SourceLocation &StartLoc);
29202950

2951+
/// \return a sequence of SourceLocations representing ordered opt-out regions
2952+
/// specified by
2953+
/// `\#pragma clang unsafe_buffer_usage begin/end`s of this translation unit.
2954+
SmallVector<SourceLocation, 64> serializeSafeBufferOptOutMap() const;
2955+
2956+
/// \param SrcLocSeqs a sequence of SourceLocations deserialized from a
2957+
/// record of code `PP_UNSAFE_BUFFER_USAGE`.
2958+
/// \return true iff the `Preprocessor` has been updated; false `Preprocessor`
2959+
/// is same as itself before the call.
2960+
bool setDeserializedSafeBufferOptOutMap(
2961+
const SmallVectorImpl<SourceLocation> &SrcLocSeqs);
2962+
29212963
private:
29222964
/// Helper functions to forward lexing to the actual lexer. They all share the
29232965
/// same signature.

clang/include/clang/Serialization/ASTBitCodes.h

+3
Original file line numberDiff line numberDiff line change
@@ -694,6 +694,9 @@ enum ASTRecordTypes {
694694
/// Record code for lexical and visible block for delayed namespace in
695695
/// reduced BMI.
696696
DELAYED_NAMESPACE_LEXICAL_VISIBLE_RECORD = 68,
697+
698+
/// Record code for \#pragma clang unsafe_buffer_usage begin/end
699+
PP_UNSAFE_BUFFER_USAGE = 69,
697700
};
698701

699702
/// Record types used within a source manager block.

clang/lib/Basic/SourceManager.cpp

+18
Original file line numberDiff line numberDiff line change
@@ -1915,6 +1915,24 @@ SourceManager::getDecomposedIncludedLoc(FileID FID) const {
19151915
return DecompLoc;
19161916
}
19171917

1918+
FileID SourceManager::getUniqueLoadedASTFileID(SourceLocation Loc) const {
1919+
assert(isLoadedSourceLocation(Loc) &&
1920+
"Must be a source location in a loaded PCH/Module file");
1921+
1922+
auto [FID, Ignore] = getDecomposedLoc(Loc);
1923+
// `LoadedSLocEntryAllocBegin` stores the sorted lowest FID of each loaded
1924+
// allocation. Later allocations have lower FileIDs. The call below is to find
1925+
// the lowest FID of a loaded allocation from any FID in the same allocation.
1926+
// The lowest FID is used to identify a loaded allocation.
1927+
const FileID *FirstFID =
1928+
llvm::lower_bound(LoadedSLocEntryAllocBegin, FID, std::greater<FileID>{});
1929+
1930+
assert(FirstFID &&
1931+
"The failure to find the first FileID of a "
1932+
"loaded AST from a loaded source location was unexpected.");
1933+
return *FirstFID;
1934+
}
1935+
19181936
bool SourceManager::isInTheSameTranslationUnitImpl(
19191937
const std::pair<FileID, unsigned> &LOffs,
19201938
const std::pair<FileID, unsigned> &ROffs) const {

clang/lib/Lex/Preprocessor.cpp

+91-19
Original file line numberDiff line numberDiff line change
@@ -58,6 +58,7 @@
5858
#include "llvm/ADT/SmallString.h"
5959
#include "llvm/ADT/SmallVector.h"
6060
#include "llvm/ADT/StringRef.h"
61+
#include "llvm/ADT/iterator_range.h"
6162
#include "llvm/Support/Capacity.h"
6263
#include "llvm/Support/ErrorHandling.h"
6364
#include "llvm/Support/MemoryBuffer.h"
@@ -1483,26 +1484,56 @@ void Preprocessor::emitFinalMacroWarning(const Token &Identifier,
14831484
}
14841485

14851486
bool Preprocessor::isSafeBufferOptOut(const SourceManager &SourceMgr,
1486-
const SourceLocation &Loc) const {
1487-
// Try to find a region in `SafeBufferOptOutMap` where `Loc` is in:
1488-
auto FirstRegionEndingAfterLoc = llvm::partition_point(
1489-
SafeBufferOptOutMap,
1490-
[&SourceMgr,
1491-
&Loc](const std::pair<SourceLocation, SourceLocation> &Region) {
1492-
return SourceMgr.isBeforeInTranslationUnit(Region.second, Loc);
1493-
});
1487+
const SourceLocation &Loc) const {
1488+
// The lambda that tests if a `Loc` is in an opt-out region given one opt-out
1489+
// region map:
1490+
auto TestInMap = [&SourceMgr](const SafeBufferOptOutRegionsTy &Map,
1491+
const SourceLocation &Loc) -> bool {
1492+
// Try to find a region in `SafeBufferOptOutMap` where `Loc` is in:
1493+
auto FirstRegionEndingAfterLoc = llvm::partition_point(
1494+
Map, [&SourceMgr,
1495+
&Loc](const std::pair<SourceLocation, SourceLocation> &Region) {
1496+
return SourceMgr.isBeforeInTranslationUnit(Region.second, Loc);
1497+
});
1498+
1499+
if (FirstRegionEndingAfterLoc != Map.end()) {
1500+
// To test if the start location of the found region precedes `Loc`:
1501+
return SourceMgr.isBeforeInTranslationUnit(
1502+
FirstRegionEndingAfterLoc->first, Loc);
1503+
}
1504+
// If we do not find a region whose end location passes `Loc`, we want to
1505+
// check if the current region is still open:
1506+
if (!Map.empty() && Map.back().first == Map.back().second)
1507+
return SourceMgr.isBeforeInTranslationUnit(Map.back().first, Loc);
1508+
return false;
1509+
};
14941510

1495-
if (FirstRegionEndingAfterLoc != SafeBufferOptOutMap.end()) {
1496-
// To test if the start location of the found region precedes `Loc`:
1497-
return SourceMgr.isBeforeInTranslationUnit(FirstRegionEndingAfterLoc->first,
1498-
Loc);
1499-
}
1500-
// If we do not find a region whose end location passes `Loc`, we want to
1501-
// check if the current region is still open:
1502-
if (!SafeBufferOptOutMap.empty() &&
1503-
SafeBufferOptOutMap.back().first == SafeBufferOptOutMap.back().second)
1504-
return SourceMgr.isBeforeInTranslationUnit(SafeBufferOptOutMap.back().first,
1505-
Loc);
1511+
// What the following does:
1512+
//
1513+
// If `Loc` belongs to the local TU, we just look up `SafeBufferOptOutMap`.
1514+
// Otherwise, `Loc` is from a loaded AST. We look up the
1515+
// `LoadedSafeBufferOptOutMap` first to get the opt-out region map of the
1516+
// loaded AST where `Loc` is at. Then we find if `Loc` is in an opt-out
1517+
// region w.r.t. the region map. If the region map is absent, it means there
1518+
// is no opt-out pragma in that loaded AST.
1519+
//
1520+
// Opt-out pragmas in the local TU or a loaded AST is not visible to another
1521+
// one of them. That means if you put the pragmas around a `#include
1522+
// "module.h"`, where module.h is a module, it is not actually suppressing
1523+
// warnings in module.h. This is fine because warnings in module.h will be
1524+
// reported when module.h is compiled in isolation and nothing in module.h
1525+
// will be analyzed ever again. So you will not see warnings from the file
1526+
// that imports module.h anyway. And you can't even do the same thing for PCHs
1527+
// because they can only be included from the command line.
1528+
1529+
if (SourceMgr.isLocalSourceLocation(Loc))
1530+
return TestInMap(SafeBufferOptOutMap, Loc);
1531+
1532+
const SafeBufferOptOutRegionsTy *LoadedRegions =
1533+
LoadedSafeBufferOptOutMap.lookupLoadedOptOutMap(Loc, SourceMgr);
1534+
1535+
if (LoadedRegions)
1536+
return TestInMap(*LoadedRegions, Loc);
15061537
return false;
15071538
}
15081539

@@ -1551,6 +1582,47 @@ bool Preprocessor::isPPInSafeBufferOptOutRegion(SourceLocation &StartLoc) {
15511582
return InSafeBufferOptOutRegion;
15521583
}
15531584

1585+
SmallVector<SourceLocation, 64>
1586+
Preprocessor::serializeSafeBufferOptOutMap() const {
1587+
assert(!InSafeBufferOptOutRegion &&
1588+
"Attempt to serialize safe buffer opt-out regions before file being "
1589+
"completely preprocessed");
1590+
1591+
SmallVector<SourceLocation, 64> SrcSeq;
1592+
1593+
for (const auto &[begin, end] : SafeBufferOptOutMap) {
1594+
SrcSeq.push_back(begin);
1595+
SrcSeq.push_back(end);
1596+
}
1597+
// Only `SafeBufferOptOutMap` gets serialized. No need to serialize
1598+
// `LoadedSafeBufferOptOutMap` because if this TU loads a pch/module, every
1599+
// pch/module in the pch-chain/module-DAG will be loaded one by one in order.
1600+
// It means that for each loading pch/module m, it just needs to load m's own
1601+
// `SafeBufferOptOutMap`.
1602+
return SrcSeq;
1603+
}
1604+
1605+
bool Preprocessor::setDeserializedSafeBufferOptOutMap(
1606+
const SmallVectorImpl<SourceLocation> &SourceLocations) {
1607+
if (SourceLocations.size() == 0)
1608+
return false;
1609+
1610+
assert(SourceLocations.size() % 2 == 0 &&
1611+
"ill-formed SourceLocation sequence");
1612+
1613+
auto It = SourceLocations.begin();
1614+
SafeBufferOptOutRegionsTy &Regions =
1615+
LoadedSafeBufferOptOutMap.findAndConsLoadedOptOutMap(*It, SourceMgr);
1616+
1617+
do {
1618+
SourceLocation Begin = *It++;
1619+
SourceLocation End = *It++;
1620+
1621+
Regions.emplace_back(Begin, End);
1622+
} while (It != SourceLocations.end());
1623+
return true;
1624+
}
1625+
15541626
ModuleLoader::~ModuleLoader() = default;
15551627

15561628
CommentHandler::~CommentHandler() = default;

clang/lib/Serialization/ASTReader.cpp

+11
Original file line numberDiff line numberDiff line change
@@ -3583,6 +3583,17 @@ llvm::Error ASTReader::ReadASTBlock(ModuleFile &F,
35833583
break;
35843584
}
35853585

3586+
case PP_UNSAFE_BUFFER_USAGE: {
3587+
if (!Record.empty()) {
3588+
SmallVector<SourceLocation, 64> SrcLocs;
3589+
unsigned Idx = 0;
3590+
while (Idx < Record.size())
3591+
SrcLocs.push_back(ReadSourceLocation(F, Record, Idx));
3592+
PP.setDeserializedSafeBufferOptOutMap(SrcLocs);
3593+
}
3594+
break;
3595+
}
3596+
35863597
case PP_CONDITIONAL_STACK:
35873598
if (!Record.empty()) {
35883599
unsigned Idx = 0, End = Record.size() - 1;

clang/lib/Serialization/ASTWriter.cpp

+7
Original file line numberDiff line numberDiff line change
@@ -926,6 +926,7 @@ void ASTWriter::WriteBlockInfoBlock() {
926926
RECORD(PP_CONDITIONAL_STACK);
927927
RECORD(DECLS_TO_CHECK_FOR_DEFERRED_DIAGS);
928928
RECORD(PP_ASSUME_NONNULL_LOC);
929+
RECORD(PP_UNSAFE_BUFFER_USAGE);
929930

930931
// SourceManager Block.
931932
BLOCK(SOURCE_MANAGER_BLOCK);
@@ -2518,6 +2519,12 @@ void ASTWriter::WritePreprocessor(const Preprocessor &PP, bool IsModule) {
25182519
Record.clear();
25192520
}
25202521

2522+
// Write the safe buffer opt-out region map in PP
2523+
for (SourceLocation &S : PP.serializeSafeBufferOptOutMap())
2524+
AddSourceLocation(S, Record);
2525+
Stream.EmitRecord(PP_UNSAFE_BUFFER_USAGE, Record);
2526+
Record.clear();
2527+
25212528
// Enter the preprocessor block.
25222529
Stream.EnterSubblock(PREPROCESSOR_BLOCK_ID, 3);
25232530

0 commit comments

Comments
 (0)