Skip to content

Commit b50c87d

Browse files
burakemirmartinboehme
authored andcommitted
[clang][dataflow] #llvm #flow-analysis Simplify formula at CNF construction time, and short-cut solving of known contradictory formulas.
In dataflow analysis, SAT solver: simplify formula during CNF construction and short-cut solving when the formula has been recognized as contradictory. Reviewed By: sammccall Differential Revision: https://reviews.llvm.org/D158407
1 parent 844c0da commit b50c87d

File tree

2 files changed

+189
-50
lines changed

2 files changed

+189
-50
lines changed

clang/lib/Analysis/FlowSensitive/WatchedLiteralsSolver.cpp

Lines changed: 161 additions & 50 deletions
Original file line numberDiff line numberDiff line change
@@ -12,8 +12,8 @@
1212
//===----------------------------------------------------------------------===//
1313

1414
#include <cassert>
15+
#include <cstddef>
1516
#include <cstdint>
16-
#include <iterator>
1717
#include <queue>
1818
#include <vector>
1919

@@ -23,8 +23,10 @@
2323
#include "llvm/ADT/ArrayRef.h"
2424
#include "llvm/ADT/DenseMap.h"
2525
#include "llvm/ADT/DenseSet.h"
26+
#include "llvm/ADT/SmallVector.h"
2627
#include "llvm/ADT/STLExtras.h"
2728

29+
2830
namespace clang {
2931
namespace dataflow {
3032

@@ -62,6 +64,10 @@ static constexpr Literal NullLit = 0;
6264
/// Returns the positive literal `V`.
6365
static constexpr Literal posLit(Variable V) { return 2 * V; }
6466

67+
static constexpr bool isPosLit(Literal L) { return 0 == (L & 1); }
68+
69+
static constexpr bool isNegLit(Literal L) { return 1 == (L & 1); }
70+
6571
/// Returns the negative literal `!V`.
6672
static constexpr Literal negLit(Variable V) { return 2 * V + 1; }
6773

@@ -125,43 +131,39 @@ struct CNFFormula {
125131
/// formula.
126132
llvm::DenseMap<Variable, Atom> Atomics;
127133

134+
/// Indicates that we already know the formula is unsatisfiable.
135+
/// During construction, we catch simple cases of conflicting unit-clauses.
136+
bool KnownContradictory;
137+
128138
explicit CNFFormula(Variable LargestVar,
129139
llvm::DenseMap<Variable, Atom> Atomics)
130-
: LargestVar(LargestVar), Atomics(std::move(Atomics)) {
140+
: LargestVar(LargestVar), Atomics(std::move(Atomics)),
141+
KnownContradictory(false) {
131142
Clauses.push_back(0);
132143
ClauseStarts.push_back(0);
133144
NextWatched.push_back(0);
134145
const size_t NumLiterals = 2 * LargestVar + 1;
135146
WatchedHead.resize(NumLiterals + 1, 0);
136147
}
137148

138-
/// Adds the `L1 v L2 v L3` clause to the formula. If `L2` or `L3` are
139-
/// `NullLit` they are respectively omitted from the clause.
140-
///
149+
/// Adds the `L1 v ... v Ln` clause to the formula.
141150
/// Requirements:
142151
///
143-
/// `L1` must not be `NullLit`.
152+
/// `Li` must not be `NullLit`.
144153
///
145154
/// All literals in the input that are not `NullLit` must be distinct.
146-
void addClause(Literal L1, Literal L2 = NullLit, Literal L3 = NullLit) {
147-
// The literals are guaranteed to be distinct from properties of Formula
148-
// and the construction in `buildCNF`.
149-
assert(L1 != NullLit && L1 != L2 && L1 != L3 &&
150-
(L2 != L3 || L2 == NullLit));
155+
void addClause(ArrayRef<Literal> lits) {
156+
assert(!lits.empty());
157+
assert(llvm::all_of(lits, [](Literal L) { return L != NullLit; }));
151158

152159
const ClauseID C = ClauseStarts.size();
153160
const size_t S = Clauses.size();
154161
ClauseStarts.push_back(S);
155-
156-
Clauses.push_back(L1);
157-
if (L2 != NullLit)
158-
Clauses.push_back(L2);
159-
if (L3 != NullLit)
160-
Clauses.push_back(L3);
162+
Clauses.insert(Clauses.end(), lits.begin(), lits.end());
161163

162164
// Designate the first literal as the "watched" literal of the clause.
163-
NextWatched.push_back(WatchedHead[L1]);
164-
WatchedHead[L1] = C;
165+
NextWatched.push_back(WatchedHead[lits.front()]);
166+
WatchedHead[lits.front()] = C;
165167
}
166168

167169
/// Returns the number of literals in clause `C`.
@@ -176,6 +178,84 @@ struct CNFFormula {
176178
}
177179
};
178180

181+
/// Applies simplifications while building up a BooleanFormula.
182+
/// We keep track of unit clauses, which tell us variables that must be
183+
/// true/false in any model that satisfies the overall formula.
184+
/// Such variables can be dropped from subsequently-added clauses, which
185+
/// may in turn yield more unit clauses or even a contradiction.
186+
/// The total added complexity of this preprocessing is O(N) where we
187+
/// for every clause, we do a lookup for each unit clauses.
188+
/// The lookup is O(1) on average. This method won't catch all
189+
/// contradictory formulas, more passes can in principle catch
190+
/// more cases but we leave all these and the general case to the
191+
/// proper SAT solver.
192+
struct CNFFormulaBuilder {
193+
// Formula should outlive CNFFormulaBuilder.
194+
explicit CNFFormulaBuilder(CNFFormula &CNF)
195+
: Formula(CNF) {}
196+
197+
/// Adds the `L1 v ... v Ln` clause to the formula. Applies
198+
/// simplifications, based on single-literal clauses.
199+
///
200+
/// Requirements:
201+
///
202+
/// `Li` must not be `NullLit`.
203+
///
204+
/// All literals must be distinct.
205+
void addClause(ArrayRef<Literal> Literals) {
206+
// We generate clauses with up to 3 literals in this file.
207+
assert(!Literals.empty() && Literals.size() <= 3);
208+
// Contains literals of the simplified clause.
209+
llvm::SmallVector<Literal> Simplified;
210+
for (auto L : Literals) {
211+
assert(L != NullLit &&
212+
llvm::all_of(Simplified,
213+
[L](Literal S) { return S != L; }));
214+
auto X = var(L);
215+
if (trueVars.contains(X)) { // X must be true
216+
if (isPosLit(L))
217+
return; // Omit clause `(... v X v ...)`, it is `true`.
218+
else
219+
continue; // Omit `!X` from `(... v !X v ...)`.
220+
}
221+
if (falseVars.contains(X)) { // X must be false
222+
if (isNegLit(L))
223+
return; // Omit clause `(... v !X v ...)`, it is `true`.
224+
else
225+
continue; // Omit `X` from `(... v X v ...)`.
226+
}
227+
Simplified.push_back(L);
228+
}
229+
if (Simplified.empty()) {
230+
// Simplification made the clause empty, which is equivalent to `false`.
231+
// We already know that this formula is unsatisfiable.
232+
Formula.KnownContradictory = true;
233+
// We can add any of the input literals to get an unsatisfiable formula.
234+
Formula.addClause(Literals[0]);
235+
return;
236+
}
237+
if (Simplified.size() == 1) {
238+
// We have new unit clause.
239+
const Literal lit = Simplified.front();
240+
const Variable v = var(lit);
241+
if (isPosLit(lit))
242+
trueVars.insert(v);
243+
else
244+
falseVars.insert(v);
245+
}
246+
Formula.addClause(Simplified);
247+
}
248+
249+
/// Returns true if we observed a contradiction while adding clauses.
250+
/// In this case then the formula is already known to be unsatisfiable.
251+
bool isKnownContradictory() { return Formula.KnownContradictory; }
252+
253+
private:
254+
CNFFormula &Formula;
255+
llvm::DenseSet<Variable> trueVars;
256+
llvm::DenseSet<Variable> falseVars;
257+
};
258+
179259
/// Converts the conjunction of `Vals` into a formula in conjunctive normal
180260
/// form where each clause has at least one and at most three literals.
181261
CNFFormula buildCNF(const llvm::ArrayRef<const Formula *> &Vals) {
@@ -218,11 +298,12 @@ CNFFormula buildCNF(const llvm::ArrayRef<const Formula *> &Vals) {
218298

219299
CNFFormula CNF(NextVar - 1, std::move(Atomics));
220300
std::vector<bool> ProcessedSubVals(NextVar, false);
301+
CNFFormulaBuilder builder(CNF);
221302

222-
// Add a conjunct for each variable that represents a top-level formula in
223-
// `Vals`.
303+
// Add a conjunct for each variable that represents a top-level conjunction
304+
// value in `Vals`.
224305
for (const Formula *Val : Vals)
225-
CNF.addClause(posLit(GetVar(Val)));
306+
builder.addClause(posLit(GetVar(Val)));
226307

227308
// Add conjuncts that represent the mapping between newly-created variables
228309
// and their corresponding sub-formulas.
@@ -249,15 +330,15 @@ CNFFormula buildCNF(const llvm::ArrayRef<const Formula *> &Vals) {
249330
// `X <=> (A ^ A)` is equivalent to `(!X v A) ^ (X v !A)` which is
250331
// already in conjunctive normal form. Below we add each of the
251332
// conjuncts of the latter expression to the result.
252-
CNF.addClause(negLit(Var), posLit(LHS));
253-
CNF.addClause(posLit(Var), negLit(LHS));
333+
builder.addClause({negLit(Var), posLit(LHS)});
334+
builder.addClause({posLit(Var), negLit(LHS)});
254335
} else {
255-
// `X <=> (A ^ B)` is equivalent to `(!X v A) ^ (!X v B) ^ (X v !A v !B)`
256-
// which is already in conjunctive normal form. Below we add each of the
257-
// conjuncts of the latter expression to the result.
258-
CNF.addClause(negLit(Var), posLit(LHS));
259-
CNF.addClause(negLit(Var), posLit(RHS));
260-
CNF.addClause(posLit(Var), negLit(LHS), negLit(RHS));
336+
// `X <=> (A ^ B)` is equivalent to `(!X v A) ^ (!X v B) ^ (X v !A v
337+
// !B)` which is already in conjunctive normal form. Below we add each
338+
// of the conjuncts of the latter expression to the result.
339+
builder.addClause({negLit(Var), posLit(LHS)});
340+
builder.addClause({negLit(Var), posLit(RHS)});
341+
builder.addClause({posLit(Var), negLit(LHS), negLit(RHS)});
261342
}
262343
break;
263344
}
@@ -269,15 +350,15 @@ CNFFormula buildCNF(const llvm::ArrayRef<const Formula *> &Vals) {
269350
// `X <=> (A v A)` is equivalent to `(!X v A) ^ (X v !A)` which is
270351
// already in conjunctive normal form. Below we add each of the
271352
// conjuncts of the latter expression to the result.
272-
CNF.addClause(negLit(Var), posLit(LHS));
273-
CNF.addClause(posLit(Var), negLit(LHS));
353+
builder.addClause({negLit(Var), posLit(LHS)});
354+
builder.addClause({posLit(Var), negLit(LHS)});
274355
} else {
275356
// `X <=> (A v B)` is equivalent to `(!X v A v B) ^ (X v !A) ^ (X v
276357
// !B)` which is already in conjunctive normal form. Below we add each
277358
// of the conjuncts of the latter expression to the result.
278-
CNF.addClause(negLit(Var), posLit(LHS), posLit(RHS));
279-
CNF.addClause(posLit(Var), negLit(LHS));
280-
CNF.addClause(posLit(Var), negLit(RHS));
359+
builder.addClause({negLit(Var), posLit(LHS), posLit(RHS)});
360+
builder.addClause({posLit(Var), negLit(LHS)});
361+
builder.addClause({posLit(Var), negLit(RHS)});
281362
}
282363
break;
283364
}
@@ -287,8 +368,8 @@ CNFFormula buildCNF(const llvm::ArrayRef<const Formula *> &Vals) {
287368
// `X <=> !Y` is equivalent to `(!X v !Y) ^ (X v Y)` which is
288369
// already in conjunctive normal form. Below we add each of the
289370
// conjuncts of the latter expression to the result.
290-
CNF.addClause(negLit(Var), negLit(Operand));
291-
CNF.addClause(posLit(Var), posLit(Operand));
371+
builder.addClause({negLit(Var), negLit(Operand)});
372+
builder.addClause({posLit(Var), posLit(Operand)});
292373
break;
293374
}
294375
case Formula::Implies: {
@@ -299,20 +380,20 @@ CNFFormula buildCNF(const llvm::ArrayRef<const Formula *> &Vals) {
299380
// `(X v A) ^ (X v !B) ^ (!X v !A v B)` which is already in
300381
// conjunctive normal form. Below we add each of the conjuncts of
301382
// the latter expression to the result.
302-
CNF.addClause(posLit(Var), posLit(LHS));
303-
CNF.addClause(posLit(Var), negLit(RHS));
304-
CNF.addClause(negLit(Var), negLit(LHS), posLit(RHS));
383+
builder.addClause({posLit(Var), posLit(LHS)});
384+
builder.addClause({posLit(Var), negLit(RHS)});
385+
builder.addClause({negLit(Var), negLit(LHS), posLit(RHS)});
305386
break;
306387
}
307388
case Formula::Equal: {
308389
const Variable LHS = GetVar(Val->operands()[0]);
309390
const Variable RHS = GetVar(Val->operands()[1]);
310391

311392
if (LHS == RHS) {
312-
// `X <=> (A <=> A)` is equvalent to `X` which is already in
393+
// `X <=> (A <=> A)` is equivalent to `X` which is already in
313394
// conjunctive normal form. Below we add each of the conjuncts of the
314395
// latter expression to the result.
315-
CNF.addClause(posLit(Var));
396+
builder.addClause(posLit(Var));
316397

317398
// No need to visit the sub-values of `Val`.
318399
continue;
@@ -321,18 +402,43 @@ CNFFormula buildCNF(const llvm::ArrayRef<const Formula *> &Vals) {
321402
// `(X v A v B) ^ (X v !A v !B) ^ (!X v A v !B) ^ (!X v !A v B)` which
322403
// is already in conjunctive normal form. Below we add each of the
323404
// conjuncts of the latter expression to the result.
324-
CNF.addClause(posLit(Var), posLit(LHS), posLit(RHS));
325-
CNF.addClause(posLit(Var), negLit(LHS), negLit(RHS));
326-
CNF.addClause(negLit(Var), posLit(LHS), negLit(RHS));
327-
CNF.addClause(negLit(Var), negLit(LHS), posLit(RHS));
405+
builder.addClause({posLit(Var), posLit(LHS), posLit(RHS)});
406+
builder.addClause({posLit(Var), negLit(LHS), negLit(RHS)});
407+
builder.addClause({negLit(Var), posLit(LHS), negLit(RHS)});
408+
builder.addClause({negLit(Var), negLit(LHS), posLit(RHS)});
328409
break;
329410
}
330411
}
412+
if (builder.isKnownContradictory()) {
413+
return CNF;
414+
}
331415
for (const Formula *Child : Val->operands())
332416
UnprocessedSubVals.push(Child);
333417
}
334418

335-
return CNF;
419+
// Unit clauses that were added later were not
420+
// considered for the simplification of earlier clauses. Do a final
421+
// pass to find more opportunities for simplification.
422+
CNFFormula FinalCNF(NextVar - 1, std::move(CNF.Atomics));
423+
CNFFormulaBuilder FinalBuilder(FinalCNF);
424+
425+
// Collect unit clauses.
426+
for (ClauseID C = 1; C < CNF.ClauseStarts.size(); ++C) {
427+
if (CNF.clauseSize(C) == 1) {
428+
FinalBuilder.addClause(CNF.clauseLiterals(C)[0]);
429+
}
430+
}
431+
432+
// Add all clauses that were added previously, preserving the order.
433+
for (ClauseID C = 1; C < CNF.ClauseStarts.size(); ++C) {
434+
FinalBuilder.addClause(CNF.clauseLiterals(C));
435+
if (FinalBuilder.isKnownContradictory()) {
436+
break;
437+
}
438+
}
439+
// It is possible there were new unit clauses again, but
440+
// we stop here and leave the rest to the solver algorithm.
441+
return FinalCNF;
336442
}
337443

338444
class WatchedLiteralsSolverImpl {
@@ -414,6 +520,11 @@ class WatchedLiteralsSolverImpl {
414520
// Returns the `Result` and the number of iterations "remaining" from
415521
// `MaxIterations` (that is, `MaxIterations` - iterations in this call).
416522
std::pair<Solver::Result, std::int64_t> solve(std::int64_t MaxIterations) && {
523+
if (CNF.KnownContradictory) {
524+
// Short-cut the solving process. We already found out at CNF
525+
// construction time that the formula is unsatisfiable.
526+
return std::make_pair(Solver::Result::Unsatisfiable(), MaxIterations);
527+
}
417528
size_t I = 0;
418529
while (I < ActiveVars.size()) {
419530
if (MaxIterations == 0)
@@ -503,7 +614,8 @@ class WatchedLiteralsSolverImpl {
503614
++I;
504615
}
505616
}
506-
return std::make_pair(Solver::Result::Satisfiable(buildSolution()), MaxIterations);
617+
return std::make_pair(Solver::Result::Satisfiable(buildSolution()),
618+
MaxIterations);
507619
}
508620

509621
private:
@@ -672,8 +784,7 @@ Solver::Result
672784
WatchedLiteralsSolver::solve(llvm::ArrayRef<const Formula *> Vals) {
673785
if (Vals.empty())
674786
return Solver::Result::Satisfiable({{}});
675-
auto [Res, Iterations] =
676-
WatchedLiteralsSolverImpl(Vals).solve(MaxIterations);
787+
auto [Res, Iterations] = WatchedLiteralsSolverImpl(Vals).solve(MaxIterations);
677788
MaxIterations = Iterations;
678789
return Res;
679790
}

clang/unittests/Analysis/FlowSensitive/SolverTest.cpp

Lines changed: 28 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -369,4 +369,32 @@ TEST(SolverTest, ReachedLimitsReflectsTimeouts) {
369369
EXPECT_TRUE(solver.reachedLimit());
370370
}
371371

372+
TEST(SolverTest, SimpleButLargeContradiction) {
373+
// This test ensures that the solver takes a short-cut on known
374+
// contradictory inputs, without using max_iterations. At the time
375+
// this test is added, formulas that are easily recognized to be
376+
// contradictory at CNF construction time would lead to timeout.
377+
WatchedLiteralsSolver solver(10);
378+
ConstraintContext Ctx;
379+
auto first = Ctx.atom();
380+
auto last = first;
381+
for (int i = 1; i < 10000; ++i) {
382+
last = Ctx.conj(last, Ctx.atom());
383+
}
384+
last = Ctx.conj(Ctx.neg(first), last);
385+
ASSERT_EQ(solver.solve({last}).getStatus(),
386+
Solver::Result::Status::Unsatisfiable);
387+
EXPECT_FALSE(solver.reachedLimit());
388+
389+
first = Ctx.atom();
390+
last = Ctx.neg(first);
391+
for (int i = 1; i < 10000; ++i) {
392+
last = Ctx.conj(last, Ctx.neg(Ctx.atom()));
393+
}
394+
last = Ctx.conj(first, last);
395+
ASSERT_EQ(solver.solve({last}).getStatus(),
396+
Solver::Result::Status::Unsatisfiable);
397+
EXPECT_FALSE(solver.reachedLimit());
398+
}
399+
372400
} // namespace

0 commit comments

Comments
 (0)