Skip to content

Commit c4c5919

Browse files
authored
[clang][dataflow] Clear ExprToLoc and ExprToVal at the start of a block. (#72985)
We never need to access entries from these maps outside of the current basic block. This could only ever become a consideration when flow control happens inside a full-expression (i.e. we have multiple basic blocks for a full expression); there are two kinds of expression where this can happen, but we already deal with these in other ways: * Short-circuiting logical operators (`&&` and `||`) have operands that live in different basic blocks than the operator itself, but we already have code in the framework to retrieve the value of these operands from the environment for the block they are computed in, rather than in the environment of the block containing the operator. * The conditional operator similarly has operands that live in different basic blocks. However, we currently don't implement a transfer function for the conditional operator. When we do this, we need to retrieve the values of the operands from the environments of the basic blocks they live in, as we already do for logical operators. This patch adds a comment to this effect to the code. Clearing out `ExprToLoc` and `ExprToVal` has two benefits: * We avoid performing joins on boolean expressions contained in `ExprToVal` and hence extending the flow condition in cases where this is not needed. Simpler flow conditions should reduce the amount of work we do in the SAT solver. * Debugging becomes easier when flow conditions are simpler and `ExprToLoc` / `ExprToVal` don’t contain any extraneous entries. Benchmark results on Crubit's `pointer_nullability_analysis_benchmark show a slight runtime increase for simple benchmarks, offset by substantial runtime reductions for more complex benchmarks: ``` name old cpu/op new cpu/op delta BM_PointerAnalysisCopyPointer 29.8µs ± 1% 29.9µs ± 4% ~ (p=0.879 n=46+49) BM_PointerAnalysisIntLoop 101µs ± 3% 104µs ± 4% +2.96% (p=0.000 n=55+57) BM_PointerAnalysisPointerLoop 378µs ± 3% 245µs ± 3% -35.09% (p=0.000 n=47+55) BM_PointerAnalysisBranch 118µs ± 2% 122µs ± 3% +3.37% (p=0.000 n=59+59) BM_PointerAnalysisLoopAndBranch 779µs ± 3% 413µs ± 5% -47.01% (p=0.000 n=56+45) BM_PointerAnalysisTwoLoops 187µs ± 3% 192µs ± 5% +2.80% (p=0.000 n=57+58) BM_PointerAnalysisJoinFilePath 17.4ms ± 3% 7.2ms ± 3% -58.75% (p=0.000 n=58+57) BM_PointerAnalysisCallInLoop 14.7ms ± 4% 10.3ms ± 2% -29.87% (p=0.000 n=56+58) ```
1 parent d33bad6 commit c4c5919

File tree

5 files changed

+177
-79
lines changed

5 files changed

+177
-79
lines changed

clang/lib/Analysis/FlowSensitive/DataflowEnvironment.cpp

Lines changed: 29 additions & 32 deletions
Original file line numberDiff line numberDiff line change
@@ -36,13 +36,13 @@ static constexpr int MaxCompositeValueDepth = 3;
3636
static constexpr int MaxCompositeValueSize = 1000;
3737

3838
/// Returns a map consisting of key-value entries that are present in both maps.
39-
template <typename K, typename V>
40-
llvm::DenseMap<K, V> intersectDenseMaps(const llvm::DenseMap<K, V> &Map1,
41-
const llvm::DenseMap<K, V> &Map2) {
42-
llvm::DenseMap<K, V> Result;
43-
for (auto &Entry : Map1) {
44-
auto It = Map2.find(Entry.first);
45-
if (It != Map2.end() && Entry.second == It->second)
39+
static llvm::DenseMap<const ValueDecl *, StorageLocation *> intersectDeclToLoc(
40+
const llvm::DenseMap<const ValueDecl *, StorageLocation *> &DeclToLoc1,
41+
const llvm::DenseMap<const ValueDecl *, StorageLocation *> &DeclToLoc2) {
42+
llvm::DenseMap<const ValueDecl *, StorageLocation *> Result;
43+
for (auto &Entry : DeclToLoc1) {
44+
auto It = DeclToLoc2.find(Entry.first);
45+
if (It != DeclToLoc2.end() && Entry.second == It->second)
4646
Result.insert({Entry.first, Entry.second});
4747
}
4848
return Result;
@@ -203,39 +203,37 @@ bool compareKeyToValueMaps(const llvm::MapVector<Key, Value *> &Map1,
203203
return true;
204204
}
205205

206-
// Perform a join on either `LocToVal` or `ExprToVal`. `Key` must be either
207-
// `const StorageLocation *` or `const Expr *`.
208-
template <typename Key>
209-
llvm::MapVector<Key, Value *>
210-
joinKeyToValueMap(const llvm::MapVector<Key, Value *> &Map1,
211-
const llvm::MapVector<Key, Value *> &Map2,
212-
const Environment &Env1, const Environment &Env2,
213-
Environment &JoinedEnv, Environment::ValueModel &Model) {
214-
llvm::MapVector<Key, Value *> MergedMap;
215-
for (auto &Entry : Map1) {
216-
Key K = Entry.first;
217-
assert(K != nullptr);
206+
// Perform a join on two `LocToVal` maps.
207+
static llvm::MapVector<const StorageLocation *, Value *>
208+
joinLocToVal(const llvm::MapVector<const StorageLocation *, Value *> &LocToVal,
209+
const llvm::MapVector<const StorageLocation *, Value *> &LocToVal2,
210+
const Environment &Env1, const Environment &Env2,
211+
Environment &JoinedEnv, Environment::ValueModel &Model) {
212+
llvm::MapVector<const StorageLocation *, Value *> Result;
213+
for (auto &Entry : LocToVal) {
214+
const StorageLocation *Loc = Entry.first;
215+
assert(Loc != nullptr);
218216

219217
Value *Val = Entry.second;
220218
assert(Val != nullptr);
221219

222-
auto It = Map2.find(K);
223-
if (It == Map2.end())
220+
auto It = LocToVal2.find(Loc);
221+
if (It == LocToVal2.end())
224222
continue;
225223
assert(It->second != nullptr);
226224

227225
if (areEquivalentValues(*Val, *It->second)) {
228-
MergedMap.insert({K, Val});
226+
Result.insert({Loc, Val});
229227
continue;
230228
}
231229

232230
if (Value *MergedVal = mergeDistinctValues(
233-
K->getType(), *Val, Env1, *It->second, Env2, JoinedEnv, Model)) {
234-
MergedMap.insert({K, MergedVal});
231+
Loc->getType(), *Val, Env1, *It->second, Env2, JoinedEnv, Model)) {
232+
Result.insert({Loc, MergedVal});
235233
}
236234
}
237235

238-
return MergedMap;
236+
return Result;
239237
}
240238

241239
// Perform widening on either `LocToVal` or `ExprToVal`. `Key` must be either
@@ -648,20 +646,19 @@ Environment Environment::join(const Environment &EnvA, const Environment &EnvB,
648646
else
649647
JoinedEnv.ReturnLoc = nullptr;
650648

651-
JoinedEnv.DeclToLoc = intersectDenseMaps(EnvA.DeclToLoc, EnvB.DeclToLoc);
652-
653-
JoinedEnv.ExprToLoc = intersectDenseMaps(EnvA.ExprToLoc, EnvB.ExprToLoc);
649+
JoinedEnv.DeclToLoc = intersectDeclToLoc(EnvA.DeclToLoc, EnvB.DeclToLoc);
654650

655651
// FIXME: update join to detect backedges and simplify the flow condition
656652
// accordingly.
657653
JoinedEnv.FlowConditionToken = EnvA.DACtx->joinFlowConditions(
658654
EnvA.FlowConditionToken, EnvB.FlowConditionToken);
659655

660-
JoinedEnv.ExprToVal = joinKeyToValueMap(EnvA.ExprToVal, EnvB.ExprToVal, EnvA,
661-
EnvB, JoinedEnv, Model);
656+
JoinedEnv.LocToVal =
657+
joinLocToVal(EnvA.LocToVal, EnvB.LocToVal, EnvA, EnvB, JoinedEnv, Model);
662658

663-
JoinedEnv.LocToVal = joinKeyToValueMap(EnvA.LocToVal, EnvB.LocToVal, EnvA,
664-
EnvB, JoinedEnv, Model);
659+
// We intentionally leave `JoinedEnv.ExprToLoc` and `JoinedEnv.ExprToVal`
660+
// empty, as we never need to access entries in these maps outside of the
661+
// basic block that sets them.
665662

666663
return JoinedEnv;
667664
}

clang/lib/Analysis/FlowSensitive/Transfer.cpp

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -623,6 +623,10 @@ class TransferVisitor : public ConstStmtVisitor<TransferVisitor> {
623623
// FIXME: Revisit this once flow conditions are added to the framework. For
624624
// `a = b ? c : d` we can add `b => a == c && !b => a == d` to the flow
625625
// condition.
626+
// When we do this, we will need to retrieve the values of the operands from
627+
// the environments for the basic blocks they are computed in, in a similar
628+
// way to how this is done for short-circuited logical operators in
629+
// `getLogicOperatorSubExprValue()`.
626630
if (S->isGLValue())
627631
Env.setStorageLocation(*S, Env.createObject(S->getType()));
628632
else if (Value *Val = Env.createValue(S->getType()))

clang/lib/Analysis/FlowSensitive/TypeErasedDataflowAnalysis.cpp

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -257,7 +257,12 @@ class JoinedStateBuilder {
257257
// initialize the state of each basic block differently.
258258
return {AC.Analysis.typeErasedInitialElement(), AC.InitEnv.fork()};
259259
if (All.size() == 1)
260-
return Owned.empty() ? All.front()->fork() : std::move(Owned.front());
260+
// Join the environment with itself so that we discard the entries from
261+
// `ExprToLoc` and `ExprToVal`.
262+
// FIXME: We could consider writing special-case code for this that only
263+
// does the discarding, but it's not clear if this is worth it.
264+
return {All[0]->Lattice,
265+
Environment::join(All[0]->Env, All[0]->Env, AC.Analysis)};
261266

262267
auto Result = join(*All[0], *All[1]);
263268
for (unsigned I = 2; I < All.size(); ++I)

clang/unittests/Analysis/FlowSensitive/DataflowEnvironmentTest.cpp

Lines changed: 3 additions & 23 deletions
Original file line numberDiff line numberDiff line change
@@ -134,40 +134,20 @@ TEST_F(EnvironmentTest, JoinRecords) {
134134
Environment Env1(DAContext);
135135
auto &Val1 = *cast<RecordValue>(Env1.createValue(Ty));
136136
RecordStorageLocation &Loc = Val1.getLoc();
137-
Env1.setValue(*ConstructExpr, Val1);
137+
Env1.setValue(Loc, Val1);
138138

139139
Environment Env2(DAContext);
140140
auto &Val2 = Env2.create<RecordValue>(Loc);
141141
Env2.setValue(Loc, Val2);
142-
Env2.setValue(*ConstructExpr, Val2);
142+
Env2.setValue(Loc, Val2);
143143

144144
Environment::ValueModel Model;
145145
Environment EnvJoined = Environment::join(Env1, Env2, Model);
146-
auto *JoinedVal = cast<RecordValue>(EnvJoined.getValue(*ConstructExpr));
146+
auto *JoinedVal = cast<RecordValue>(EnvJoined.getValue(Loc));
147147
EXPECT_NE(JoinedVal, &Val1);
148148
EXPECT_NE(JoinedVal, &Val2);
149149
EXPECT_EQ(&JoinedVal->getLoc(), &Loc);
150150
}
151-
152-
// Two different `RecordValue`s with different locations are joined into a
153-
// third `RecordValue` with a location different from the other two.
154-
{
155-
Environment Env1(DAContext);
156-
auto &Val1 = *cast<RecordValue>(Env1.createValue(Ty));
157-
Env1.setValue(*ConstructExpr, Val1);
158-
159-
Environment Env2(DAContext);
160-
auto &Val2 = *cast<RecordValue>(Env2.createValue(Ty));
161-
Env2.setValue(*ConstructExpr, Val2);
162-
163-
Environment::ValueModel Model;
164-
Environment EnvJoined = Environment::join(Env1, Env2, Model);
165-
auto *JoinedVal = cast<RecordValue>(EnvJoined.getValue(*ConstructExpr));
166-
EXPECT_NE(JoinedVal, &Val1);
167-
EXPECT_NE(JoinedVal, &Val2);
168-
EXPECT_NE(&JoinedVal->getLoc(), &Val1.getLoc());
169-
EXPECT_NE(&JoinedVal->getLoc(), &Val2.getLoc());
170-
}
171151
}
172152

173153
TEST_F(EnvironmentTest, InitGlobalVarsFun) {

clang/unittests/Analysis/FlowSensitive/TypeErasedDataflowAnalysisTest.cpp

Lines changed: 135 additions & 23 deletions
Original file line numberDiff line numberDiff line change
@@ -52,29 +52,48 @@ using ::testing::NotNull;
5252
using ::testing::Test;
5353
using ::testing::UnorderedElementsAre;
5454

55-
template <typename AnalysisT>
56-
llvm::Expected<std::vector<
57-
std::optional<DataflowAnalysisState<typename AnalysisT::Lattice>>>>
58-
runAnalysis(llvm::StringRef Code, AnalysisT (*MakeAnalysis)(ASTContext &)) {
59-
std::unique_ptr<ASTUnit> AST =
60-
tooling::buildASTFromCodeWithArgs(Code, {"-std=c++11"});
61-
62-
auto *Func = selectFirst<FunctionDecl>(
63-
"func", match(functionDecl(ast_matchers::hasName("target")).bind("func"),
64-
AST->getASTContext()));
65-
assert(Func != nullptr);
66-
67-
auto CFCtx =
68-
llvm::cantFail(ControlFlowContext::build(*Func));
55+
class DataflowAnalysisTest : public Test {
56+
protected:
57+
template <typename AnalysisT>
58+
llvm::Expected<std::vector<
59+
std::optional<DataflowAnalysisState<typename AnalysisT::Lattice>>>>
60+
runAnalysis(llvm::StringRef Code, AnalysisT (*MakeAnalysis)(ASTContext &)) {
61+
AST = tooling::buildASTFromCodeWithArgs(Code, {"-std=c++11"});
62+
63+
auto *Func = selectFirst<FunctionDecl>(
64+
"func",
65+
match(functionDecl(ast_matchers::hasName("target")).bind("func"),
66+
AST->getASTContext()));
67+
assert(Func != nullptr);
68+
69+
CFCtx = std::make_unique<ControlFlowContext>(
70+
llvm::cantFail(ControlFlowContext::build(*Func)));
71+
72+
AnalysisT Analysis = MakeAnalysis(AST->getASTContext());
73+
DACtx = std::make_unique<DataflowAnalysisContext>(
74+
std::make_unique<WatchedLiteralsSolver>());
75+
Environment Env(*DACtx, *Func);
76+
77+
return runDataflowAnalysis(*CFCtx, Analysis, Env);
78+
}
6979

70-
AnalysisT Analysis = MakeAnalysis(AST->getASTContext());
71-
DataflowAnalysisContext DACtx(std::make_unique<WatchedLiteralsSolver>());
72-
Environment Env(DACtx);
80+
template <typename StateT>
81+
const StateT &
82+
blockStateForStmt(const std::vector<std::optional<StateT>> &BlockStates,
83+
const Stmt *S) {
84+
const CFGBlock *Block = CFCtx->getStmtToBlock().lookup(S);
85+
assert(Block != nullptr);
86+
const std::optional<StateT> &MaybeState = BlockStates[Block->getBlockID()];
87+
assert(MaybeState.has_value());
88+
return *MaybeState;
89+
};
7390

74-
return runDataflowAnalysis(CFCtx, Analysis, Env);
75-
}
91+
std::unique_ptr<ASTUnit> AST;
92+
std::unique_ptr<ControlFlowContext> CFCtx;
93+
std::unique_ptr<DataflowAnalysisContext> DACtx;
94+
};
7695

77-
TEST(DataflowAnalysisTest, NoopAnalysis) {
96+
TEST_F(DataflowAnalysisTest, NoopAnalysis) {
7897
auto BlockStates = llvm::cantFail(
7998
runAnalysis<NoopAnalysis>("void target() {}", [](ASTContext &C) {
8099
return NoopAnalysis(C,
@@ -88,7 +107,7 @@ TEST(DataflowAnalysisTest, NoopAnalysis) {
88107

89108
// Basic test that `diagnoseFunction` calls the Diagnoser function for the
90109
// number of elements expected.
91-
TEST(DataflowAnalysisTest, DiagnoseFunctionDiagnoserCalledOnEachElement) {
110+
TEST_F(DataflowAnalysisTest, DiagnoseFunctionDiagnoserCalledOnEachElement) {
92111
std::string Code = R"(void target() { int x = 0; ++x; })";
93112
std::unique_ptr<ASTUnit> AST =
94113
tooling::buildASTFromCodeWithArgs(Code, {"-std=c++11"});
@@ -111,6 +130,99 @@ TEST(DataflowAnalysisTest, DiagnoseFunctionDiagnoserCalledOnEachElement) {
111130
" (Lifetime ends)\n")));
112131
}
113132

133+
// Tests that check we discard state for expressions correctly.
134+
using DiscardExprStateTest = DataflowAnalysisTest;
135+
136+
TEST_F(DiscardExprStateTest, WhileStatement) {
137+
std::string Code = R"(
138+
void foo(int *p);
139+
void target(int *p) {
140+
while (p != nullptr)
141+
foo(p);
142+
}
143+
)";
144+
auto BlockStates = llvm::cantFail(runAnalysis<NoopAnalysis>(
145+
Code, [](ASTContext &C) { return NoopAnalysis(C); }));
146+
147+
auto *NotEqOp = selectFirst<BinaryOperator>(
148+
"op", match(binaryOperator(hasOperatorName("!=")).bind("op"),
149+
AST->getASTContext()));
150+
ASSERT_NE(NotEqOp, nullptr);
151+
152+
auto *CallFoo = selectFirst<CallExpr>(
153+
"call", match(callExpr(callee(functionDecl(hasName("foo")))).bind("call"),
154+
AST->getASTContext()));
155+
ASSERT_NE(CallFoo, nullptr);
156+
157+
// In the block that evaluates the expression `p != nullptr`, this expression
158+
// is associated with a value.
159+
const auto &NotEqOpState = blockStateForStmt(BlockStates, NotEqOp);
160+
EXPECT_NE(NotEqOpState.Env.getValue(*NotEqOp), nullptr);
161+
162+
// In the block that calls `foo(p)`, the value for `p != nullptr` is discarded
163+
// because it is not consumed by this block.
164+
const auto &CallFooState = blockStateForStmt(BlockStates, CallFoo);
165+
EXPECT_EQ(CallFooState.Env.getValue(*NotEqOp), nullptr);
166+
}
167+
168+
TEST_F(DiscardExprStateTest, BooleanOperator) {
169+
std::string Code = R"(
170+
bool target(bool b1, bool b2) {
171+
return b1 && b2;
172+
}
173+
)";
174+
auto BlockStates = llvm::cantFail(runAnalysis<NoopAnalysis>(
175+
Code, [](ASTContext &C) { return NoopAnalysis(C); }));
176+
177+
auto *AndOp = selectFirst<BinaryOperator>(
178+
"op", match(binaryOperator(hasOperatorName("&&")).bind("op"),
179+
AST->getASTContext()));
180+
ASSERT_NE(AndOp, nullptr);
181+
182+
auto *Return = selectFirst<ReturnStmt>(
183+
"return", match(returnStmt().bind("return"), AST->getASTContext()));
184+
ASSERT_NE(Return, nullptr);
185+
186+
// In the block that evaluates the LHS of the `&&` operator, the LHS is
187+
// associated with a value, while the right-hand side is not (unsurprisingly,
188+
// as it hasn't been evaluated yet).
189+
const auto &LHSState = blockStateForStmt(BlockStates, AndOp->getLHS());
190+
auto *LHSValue = cast<BoolValue>(LHSState.Env.getValue(*AndOp->getLHS()));
191+
ASSERT_NE(LHSValue, nullptr);
192+
EXPECT_EQ(LHSState.Env.getValue(*AndOp->getRHS()), nullptr);
193+
194+
// In the block that evaluates the RHS, the RHS is associated with a
195+
// value. The value for the LHS has been discarded as it is not consumed by
196+
// this block.
197+
const auto &RHSState = blockStateForStmt(BlockStates, AndOp->getRHS());
198+
EXPECT_EQ(RHSState.Env.getValue(*AndOp->getLHS()), nullptr);
199+
auto *RHSValue = cast<BoolValue>(RHSState.Env.getValue(*AndOp->getRHS()));
200+
ASSERT_NE(RHSValue, nullptr);
201+
202+
// In the block that evaluates the return statement, the expression `b1 && b2`
203+
// is associated with a value (and check that it's the right one).
204+
// The expressions `b1` and `b2` are _not_ associated with a value in this
205+
// block, even though they are consumed by the block, because:
206+
// * This block has two prececessor blocks (the one that evaluates `b1` and
207+
// the one that evaluates `b2`).
208+
// * `b1` is only associated with a value in the block that evaluates `b1` but
209+
// not the block that evalutes `b2`, so the join operation discards the
210+
// value for `b1`.
211+
// * `b2` is only associated with a value in the block that evaluates `b2` but
212+
// not the block that evaluates `b1`, the the join operation discards the
213+
// value for `b2`.
214+
// Nevertheless, the analysis generates the correct formula for `b1 && b2`
215+
// because the transfer function for the `&&` operator retrieves the values
216+
// for its operands from the environments for the blocks that compute the
217+
// operands, rather than from the environment for the block that contains the
218+
// `&&`.
219+
const auto &ReturnState = blockStateForStmt(BlockStates, Return);
220+
EXPECT_EQ(ReturnState.Env.getValue(*AndOp->getLHS()), nullptr);
221+
EXPECT_EQ(ReturnState.Env.getValue(*AndOp->getRHS()), nullptr);
222+
EXPECT_EQ(ReturnState.Env.getValue(*AndOp),
223+
&ReturnState.Env.makeAnd(*LHSValue, *RHSValue));
224+
}
225+
114226
struct NonConvergingLattice {
115227
int State;
116228

@@ -142,7 +254,7 @@ class NonConvergingAnalysis
142254
}
143255
};
144256

145-
TEST(DataflowAnalysisTest, NonConvergingAnalysis) {
257+
TEST_F(DataflowAnalysisTest, NonConvergingAnalysis) {
146258
std::string Code = R"(
147259
void target() {
148260
while(true) {}
@@ -163,7 +275,7 @@ TEST(DataflowAnalysisTest, NonConvergingAnalysis) {
163275
// An earlier version crashed for this condition (for boolean-typed lvalues), so
164276
// this test only verifies that the analysis runs successfully, without
165277
// examining any details of the results.
166-
TEST(DataflowAnalysisTest, JoinBoolLValues) {
278+
TEST_F(DataflowAnalysisTest, JoinBoolLValues) {
167279
std::string Code = R"(
168280
void target() {
169281
for (int x = 1; x; x = 0)

0 commit comments

Comments
 (0)