Skip to content

[clang][dataflow] Clear ExprToLoc and ExprToVal at the start of a block. #72985

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 1 commit into from
Nov 22, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
61 changes: 29 additions & 32 deletions clang/lib/Analysis/FlowSensitive/DataflowEnvironment.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -36,13 +36,13 @@ static constexpr int MaxCompositeValueDepth = 3;
static constexpr int MaxCompositeValueSize = 1000;

/// Returns a map consisting of key-value entries that are present in both maps.
template <typename K, typename V>
llvm::DenseMap<K, V> intersectDenseMaps(const llvm::DenseMap<K, V> &Map1,
const llvm::DenseMap<K, V> &Map2) {
llvm::DenseMap<K, V> Result;
for (auto &Entry : Map1) {
auto It = Map2.find(Entry.first);
if (It != Map2.end() && Entry.second == It->second)
static llvm::DenseMap<const ValueDecl *, StorageLocation *> intersectDeclToLoc(
const llvm::DenseMap<const ValueDecl *, StorageLocation *> &DeclToLoc1,
const llvm::DenseMap<const ValueDecl *, StorageLocation *> &DeclToLoc2) {
llvm::DenseMap<const ValueDecl *, StorageLocation *> Result;
for (auto &Entry : DeclToLoc1) {
auto It = DeclToLoc2.find(Entry.first);
if (It != DeclToLoc2.end() && Entry.second == It->second)
Result.insert({Entry.first, Entry.second});
}
return Result;
Expand Down Expand Up @@ -203,39 +203,37 @@ bool compareKeyToValueMaps(const llvm::MapVector<Key, Value *> &Map1,
return true;
}

// Perform a join on either `LocToVal` or `ExprToVal`. `Key` must be either
// `const StorageLocation *` or `const Expr *`.
template <typename Key>
llvm::MapVector<Key, Value *>
joinKeyToValueMap(const llvm::MapVector<Key, Value *> &Map1,
const llvm::MapVector<Key, Value *> &Map2,
const Environment &Env1, const Environment &Env2,
Environment &JoinedEnv, Environment::ValueModel &Model) {
llvm::MapVector<Key, Value *> MergedMap;
for (auto &Entry : Map1) {
Key K = Entry.first;
assert(K != nullptr);
// Perform a join on two `LocToVal` maps.
static llvm::MapVector<const StorageLocation *, Value *>
joinLocToVal(const llvm::MapVector<const StorageLocation *, Value *> &LocToVal,
const llvm::MapVector<const StorageLocation *, Value *> &LocToVal2,
const Environment &Env1, const Environment &Env2,
Environment &JoinedEnv, Environment::ValueModel &Model) {
llvm::MapVector<const StorageLocation *, Value *> Result;
for (auto &Entry : LocToVal) {
const StorageLocation *Loc = Entry.first;
assert(Loc != nullptr);

Value *Val = Entry.second;
assert(Val != nullptr);

auto It = Map2.find(K);
if (It == Map2.end())
auto It = LocToVal2.find(Loc);
if (It == LocToVal2.end())
continue;
assert(It->second != nullptr);

if (areEquivalentValues(*Val, *It->second)) {
MergedMap.insert({K, Val});
Result.insert({Loc, Val});
continue;
}

if (Value *MergedVal = mergeDistinctValues(
K->getType(), *Val, Env1, *It->second, Env2, JoinedEnv, Model)) {
MergedMap.insert({K, MergedVal});
Loc->getType(), *Val, Env1, *It->second, Env2, JoinedEnv, Model)) {
Result.insert({Loc, MergedVal});
}
}

return MergedMap;
return Result;
}

// Perform widening on either `LocToVal` or `ExprToVal`. `Key` must be either
Expand Down Expand Up @@ -648,20 +646,19 @@ Environment Environment::join(const Environment &EnvA, const Environment &EnvB,
else
JoinedEnv.ReturnLoc = nullptr;

JoinedEnv.DeclToLoc = intersectDenseMaps(EnvA.DeclToLoc, EnvB.DeclToLoc);

JoinedEnv.ExprToLoc = intersectDenseMaps(EnvA.ExprToLoc, EnvB.ExprToLoc);
JoinedEnv.DeclToLoc = intersectDeclToLoc(EnvA.DeclToLoc, EnvB.DeclToLoc);

// FIXME: update join to detect backedges and simplify the flow condition
// accordingly.
JoinedEnv.FlowConditionToken = EnvA.DACtx->joinFlowConditions(
EnvA.FlowConditionToken, EnvB.FlowConditionToken);

JoinedEnv.ExprToVal = joinKeyToValueMap(EnvA.ExprToVal, EnvB.ExprToVal, EnvA,
EnvB, JoinedEnv, Model);
JoinedEnv.LocToVal =
joinLocToVal(EnvA.LocToVal, EnvB.LocToVal, EnvA, EnvB, JoinedEnv, Model);

JoinedEnv.LocToVal = joinKeyToValueMap(EnvA.LocToVal, EnvB.LocToVal, EnvA,
EnvB, JoinedEnv, Model);
// We intentionally leave `JoinedEnv.ExprToLoc` and `JoinedEnv.ExprToVal`
// empty, as we never need to access entries in these maps outside of the
// basic block that sets them.

return JoinedEnv;
}
Expand Down
4 changes: 4 additions & 0 deletions clang/lib/Analysis/FlowSensitive/Transfer.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -623,6 +623,10 @@ class TransferVisitor : public ConstStmtVisitor<TransferVisitor> {
// FIXME: Revisit this once flow conditions are added to the framework. For
// `a = b ? c : d` we can add `b => a == c && !b => a == d` to the flow
// condition.
// When we do this, we will need to retrieve the values of the operands from
// the environments for the basic blocks they are computed in, in a similar
// way to how this is done for short-circuited logical operators in
// `getLogicOperatorSubExprValue()`.
if (S->isGLValue())
Env.setStorageLocation(*S, Env.createObject(S->getType()));
else if (Value *Val = Env.createValue(S->getType()))
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -257,7 +257,12 @@ class JoinedStateBuilder {
// initialize the state of each basic block differently.
return {AC.Analysis.typeErasedInitialElement(), AC.InitEnv.fork()};
if (All.size() == 1)
return Owned.empty() ? All.front()->fork() : std::move(Owned.front());
// Join the environment with itself so that we discard the entries from
// `ExprToLoc` and `ExprToVal`.
// FIXME: We could consider writing special-case code for this that only
// does the discarding, but it's not clear if this is worth it.
return {All[0]->Lattice,
Environment::join(All[0]->Env, All[0]->Env, AC.Analysis)};
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

On an equivalent change in #72850, @ymand commented:

This change may obviate the value of JoinedStateBuilder altogether, since a key point was to save on copies. Once we always build a fresh one, I'm not sure the complexity is doing much anymore. Please double check this.

I think there's still value in keeping the JoinedStateBuilder as it abstracts away the logic of what to do for different sizes of All. This logic would otherwise need to go in computeBlockInputState(), which is already large as it is.


auto Result = join(*All[0], *All[1]);
for (unsigned I = 2; I < All.size(); ++I)
Expand Down
26 changes: 3 additions & 23 deletions clang/unittests/Analysis/FlowSensitive/DataflowEnvironmentTest.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -134,40 +134,20 @@ TEST_F(EnvironmentTest, JoinRecords) {
Environment Env1(DAContext);
auto &Val1 = *cast<RecordValue>(Env1.createValue(Ty));
RecordStorageLocation &Loc = Val1.getLoc();
Env1.setValue(*ConstructExpr, Val1);
Env1.setValue(Loc, Val1);

Environment Env2(DAContext);
auto &Val2 = Env2.create<RecordValue>(Loc);
Env2.setValue(Loc, Val2);
Env2.setValue(*ConstructExpr, Val2);
Env2.setValue(Loc, Val2);

Environment::ValueModel Model;
Environment EnvJoined = Environment::join(Env1, Env2, Model);
auto *JoinedVal = cast<RecordValue>(EnvJoined.getValue(*ConstructExpr));
auto *JoinedVal = cast<RecordValue>(EnvJoined.getValue(Loc));
EXPECT_NE(JoinedVal, &Val1);
EXPECT_NE(JoinedVal, &Val2);
EXPECT_EQ(&JoinedVal->getLoc(), &Loc);
}

// Two different `RecordValue`s with different locations are joined into a
// third `RecordValue` with a location different from the other two.
{
Environment Env1(DAContext);
auto &Val1 = *cast<RecordValue>(Env1.createValue(Ty));
Env1.setValue(*ConstructExpr, Val1);

Environment Env2(DAContext);
auto &Val2 = *cast<RecordValue>(Env2.createValue(Ty));
Env2.setValue(*ConstructExpr, Val2);

Environment::ValueModel Model;
Environment EnvJoined = Environment::join(Env1, Env2, Model);
auto *JoinedVal = cast<RecordValue>(EnvJoined.getValue(*ConstructExpr));
EXPECT_NE(JoinedVal, &Val1);
EXPECT_NE(JoinedVal, &Val2);
EXPECT_NE(&JoinedVal->getLoc(), &Val1.getLoc());
EXPECT_NE(&JoinedVal->getLoc(), &Val2.getLoc());
}
}

TEST_F(EnvironmentTest, InitGlobalVarsFun) {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -52,29 +52,48 @@ using ::testing::NotNull;
using ::testing::Test;
using ::testing::UnorderedElementsAre;

template <typename AnalysisT>
llvm::Expected<std::vector<
std::optional<DataflowAnalysisState<typename AnalysisT::Lattice>>>>
runAnalysis(llvm::StringRef Code, AnalysisT (*MakeAnalysis)(ASTContext &)) {
std::unique_ptr<ASTUnit> AST =
tooling::buildASTFromCodeWithArgs(Code, {"-std=c++11"});

auto *Func = selectFirst<FunctionDecl>(
"func", match(functionDecl(ast_matchers::hasName("target")).bind("func"),
AST->getASTContext()));
assert(Func != nullptr);

auto CFCtx =
llvm::cantFail(ControlFlowContext::build(*Func));
class DataflowAnalysisTest : public Test {
protected:
template <typename AnalysisT>
llvm::Expected<std::vector<
std::optional<DataflowAnalysisState<typename AnalysisT::Lattice>>>>
runAnalysis(llvm::StringRef Code, AnalysisT (*MakeAnalysis)(ASTContext &)) {
AST = tooling::buildASTFromCodeWithArgs(Code, {"-std=c++11"});

auto *Func = selectFirst<FunctionDecl>(
"func",
match(functionDecl(ast_matchers::hasName("target")).bind("func"),
AST->getASTContext()));
assert(Func != nullptr);

CFCtx = std::make_unique<ControlFlowContext>(
llvm::cantFail(ControlFlowContext::build(*Func)));

AnalysisT Analysis = MakeAnalysis(AST->getASTContext());
DACtx = std::make_unique<DataflowAnalysisContext>(
std::make_unique<WatchedLiteralsSolver>());
Environment Env(*DACtx, *Func);

return runDataflowAnalysis(*CFCtx, Analysis, Env);
}

AnalysisT Analysis = MakeAnalysis(AST->getASTContext());
DataflowAnalysisContext DACtx(std::make_unique<WatchedLiteralsSolver>());
Environment Env(DACtx);
template <typename StateT>
const StateT &
blockStateForStmt(const std::vector<std::optional<StateT>> &BlockStates,
const Stmt *S) {
const CFGBlock *Block = CFCtx->getStmtToBlock().lookup(S);
assert(Block != nullptr);
const std::optional<StateT> &MaybeState = BlockStates[Block->getBlockID()];
assert(MaybeState.has_value());
return *MaybeState;
};

return runDataflowAnalysis(CFCtx, Analysis, Env);
}
std::unique_ptr<ASTUnit> AST;
std::unique_ptr<ControlFlowContext> CFCtx;
std::unique_ptr<DataflowAnalysisContext> DACtx;
};

TEST(DataflowAnalysisTest, NoopAnalysis) {
TEST_F(DataflowAnalysisTest, NoopAnalysis) {
auto BlockStates = llvm::cantFail(
runAnalysis<NoopAnalysis>("void target() {}", [](ASTContext &C) {
return NoopAnalysis(C,
Expand All @@ -88,7 +107,7 @@ TEST(DataflowAnalysisTest, NoopAnalysis) {

// Basic test that `diagnoseFunction` calls the Diagnoser function for the
// number of elements expected.
TEST(DataflowAnalysisTest, DiagnoseFunctionDiagnoserCalledOnEachElement) {
TEST_F(DataflowAnalysisTest, DiagnoseFunctionDiagnoserCalledOnEachElement) {
std::string Code = R"(void target() { int x = 0; ++x; })";
std::unique_ptr<ASTUnit> AST =
tooling::buildASTFromCodeWithArgs(Code, {"-std=c++11"});
Expand All @@ -111,6 +130,99 @@ TEST(DataflowAnalysisTest, DiagnoseFunctionDiagnoserCalledOnEachElement) {
" (Lifetime ends)\n")));
}

// Tests that check we discard state for expressions correctly.
using DiscardExprStateTest = DataflowAnalysisTest;

TEST_F(DiscardExprStateTest, WhileStatement) {
std::string Code = R"(
void foo(int *p);
void target(int *p) {
while (p != nullptr)
foo(p);
}
)";
auto BlockStates = llvm::cantFail(runAnalysis<NoopAnalysis>(
Code, [](ASTContext &C) { return NoopAnalysis(C); }));

auto *NotEqOp = selectFirst<BinaryOperator>(
"op", match(binaryOperator(hasOperatorName("!=")).bind("op"),
AST->getASTContext()));
ASSERT_NE(NotEqOp, nullptr);

auto *CallFoo = selectFirst<CallExpr>(
"call", match(callExpr(callee(functionDecl(hasName("foo")))).bind("call"),
AST->getASTContext()));
ASSERT_NE(CallFoo, nullptr);

// In the block that evaluates the expression `p != nullptr`, this expression
// is associated with a value.
const auto &NotEqOpState = blockStateForStmt(BlockStates, NotEqOp);
EXPECT_NE(NotEqOpState.Env.getValue(*NotEqOp), nullptr);

// In the block that calls `foo(p)`, the value for `p != nullptr` is discarded
// because it is not consumed by this block.
const auto &CallFooState = blockStateForStmt(BlockStates, CallFoo);
EXPECT_EQ(CallFooState.Env.getValue(*NotEqOp), nullptr);
}

TEST_F(DiscardExprStateTest, BooleanOperator) {
std::string Code = R"(
bool target(bool b1, bool b2) {
return b1 && b2;
}
)";
auto BlockStates = llvm::cantFail(runAnalysis<NoopAnalysis>(
Code, [](ASTContext &C) { return NoopAnalysis(C); }));

auto *AndOp = selectFirst<BinaryOperator>(
"op", match(binaryOperator(hasOperatorName("&&")).bind("op"),
AST->getASTContext()));
ASSERT_NE(AndOp, nullptr);

auto *Return = selectFirst<ReturnStmt>(
"return", match(returnStmt().bind("return"), AST->getASTContext()));
ASSERT_NE(Return, nullptr);

// In the block that evaluates the LHS of the `&&` operator, the LHS is
// associated with a value, while the right-hand side is not (unsurprisingly,
// as it hasn't been evaluated yet).
const auto &LHSState = blockStateForStmt(BlockStates, AndOp->getLHS());
auto *LHSValue = cast<BoolValue>(LHSState.Env.getValue(*AndOp->getLHS()));
ASSERT_NE(LHSValue, nullptr);
EXPECT_EQ(LHSState.Env.getValue(*AndOp->getRHS()), nullptr);

// In the block that evaluates the RHS, the RHS is associated with a
// value. The value for the LHS has been discarded as it is not consumed by
// this block.
const auto &RHSState = blockStateForStmt(BlockStates, AndOp->getRHS());
EXPECT_EQ(RHSState.Env.getValue(*AndOp->getLHS()), nullptr);
auto *RHSValue = cast<BoolValue>(RHSState.Env.getValue(*AndOp->getRHS()));
ASSERT_NE(RHSValue, nullptr);

// In the block that evaluates the return statement, the expression `b1 && b2`
// is associated with a value (and check that it's the right one).
// The expressions `b1` and `b2` are _not_ associated with a value in this
// block, even though they are consumed by the block, because:
// * This block has two prececessor blocks (the one that evaluates `b1` and
// the one that evaluates `b2`).
// * `b1` is only associated with a value in the block that evaluates `b1` but
// not the block that evalutes `b2`, so the join operation discards the
// value for `b1`.
// * `b2` is only associated with a value in the block that evaluates `b2` but
// not the block that evaluates `b1`, the the join operation discards the
// value for `b2`.
// Nevertheless, the analysis generates the correct formula for `b1 && b2`
// because the transfer function for the `&&` operator retrieves the values
// for its operands from the environments for the blocks that compute the
// operands, rather than from the environment for the block that contains the
// `&&`.
const auto &ReturnState = blockStateForStmt(BlockStates, Return);
EXPECT_EQ(ReturnState.Env.getValue(*AndOp->getLHS()), nullptr);
EXPECT_EQ(ReturnState.Env.getValue(*AndOp->getRHS()), nullptr);
EXPECT_EQ(ReturnState.Env.getValue(*AndOp),
&ReturnState.Env.makeAnd(*LHSValue, *RHSValue));
}

struct NonConvergingLattice {
int State;

Expand Down Expand Up @@ -142,7 +254,7 @@ class NonConvergingAnalysis
}
};

TEST(DataflowAnalysisTest, NonConvergingAnalysis) {
TEST_F(DataflowAnalysisTest, NonConvergingAnalysis) {
std::string Code = R"(
void target() {
while(true) {}
Expand All @@ -163,7 +275,7 @@ TEST(DataflowAnalysisTest, NonConvergingAnalysis) {
// An earlier version crashed for this condition (for boolean-typed lvalues), so
// this test only verifies that the analysis runs successfully, without
// examining any details of the results.
TEST(DataflowAnalysisTest, JoinBoolLValues) {
TEST_F(DataflowAnalysisTest, JoinBoolLValues) {
std::string Code = R"(
void target() {
for (int x = 1; x; x = 0)
Expand Down