[clang][dataflow] #llvm #flow-analysis Simplify formula at CNF construction time, and short-cut solving of known contradictory formulas.

burakemir · martinboehme · commit b50c87d1e63f · 2023-09-05T06:23:04.000Z
In dataflow analysis, SAT solver: simplify formula during CNF construction and short-cut solving when the formula has been recognized as contradictory. Reviewed By: sammccall Differential Revision: https://reviews.llvm.org/D158407
diff --git a/clang/lib/Analysis/FlowSensitive/WatchedLiteralsSolver.cpp b/clang/lib/Analysis/FlowSensitive/WatchedLiteralsSolver.cpp
@@ -12,8 +12,8 @@
 //===----------------------------------------------------------------------===//
 
 #include <cassert>
+#include <cstddef>
 #include <cstdint>
-#include <iterator>
 #include <queue>
 #include <vector>
 
@@ -23,8 +23,10 @@
 #include "llvm/ADT/ArrayRef.h"
 #include "llvm/ADT/DenseMap.h"
 #include "llvm/ADT/DenseSet.h"
+#include "llvm/ADT/SmallVector.h"
 #include "llvm/ADT/STLExtras.h"
 
+
 namespace clang {
 namespace dataflow {
 
@@ -62,6 +64,10 @@ static constexpr Literal NullLit = 0;
 /// Returns the positive literal `V`.
 static constexpr Literal posLit(Variable V) { return 2 * V; }
 
+static constexpr bool isPosLit(Literal L) { return 0 == (L & 1); }
+
+static constexpr bool isNegLit(Literal L) { return 1 == (L & 1); }
+
 /// Returns the negative literal `!V`.
 static constexpr Literal negLit(Variable V) { return 2 * V + 1; }
 
@@ -125,43 +131,39 @@ struct CNFFormula {
   /// formula.
   llvm::DenseMap<Variable, Atom> Atomics;
 
+  /// Indicates that we already know the formula is unsatisfiable.
+  /// During construction, we catch simple cases of conflicting unit-clauses.
+  bool KnownContradictory;
+
   explicit CNFFormula(Variable LargestVar,
                       llvm::DenseMap<Variable, Atom> Atomics)
-      : LargestVar(LargestVar), Atomics(std::move(Atomics)) {
+      : LargestVar(LargestVar), Atomics(std::move(Atomics)),
+        KnownContradictory(false) {
     Clauses.push_back(0);
     ClauseStarts.push_back(0);
     NextWatched.push_back(0);
     const size_t NumLiterals = 2 * LargestVar + 1;
     WatchedHead.resize(NumLiterals + 1, 0);
   }
 
-  /// Adds the `L1 v L2 v L3` clause to the formula. If `L2` or `L3` are
-  /// `NullLit` they are respectively omitted from the clause.
-  ///
+  /// Adds the `L1 v ... v Ln` clause to the formula.
   /// Requirements:
   ///
-  ///  `L1` must not be `NullLit`.
+  ///  `Li` must not be `NullLit`.
   ///
   ///  All literals in the input that are not `NullLit` must be distinct.
-  void addClause(Literal L1, Literal L2 = NullLit, Literal L3 = NullLit) {
-    // The literals are guaranteed to be distinct from properties of Formula
-    // and the construction in `buildCNF`.
-    assert(L1 != NullLit && L1 != L2 && L1 != L3 &&
-           (L2 != L3 || L2 == NullLit));
+  void addClause(ArrayRef<Literal> lits) {
+    assert(!lits.empty());
+    assert(llvm::all_of(lits, [](Literal L) { return L != NullLit; }));
 
     const ClauseID C = ClauseStarts.size();
     const size_t S = Clauses.size();
     ClauseStarts.push_back(S);
-
-    Clauses.push_back(L1);
-    if (L2 != NullLit)
-      Clauses.push_back(L2);
-    if (L3 != NullLit)
-      Clauses.push_back(L3);
+    Clauses.insert(Clauses.end(), lits.begin(), lits.end());
 
     // Designate the first literal as the "watched" literal of the clause.
-    NextWatched.push_back(WatchedHead[L1]);
-    WatchedHead[L1] = C;
+    NextWatched.push_back(WatchedHead[lits.front()]);
+    WatchedHead[lits.front()] = C;
   }
 
   /// Returns the number of literals in clause `C`.
@@ -176,6 +178,84 @@ struct CNFFormula {
   }
 };
 
+/// Applies simplifications while building up a BooleanFormula.
+/// We keep track of unit clauses, which tell us variables that must be
+/// true/false in any model that satisfies the overall formula.
+/// Such variables can be dropped from subsequently-added clauses, which
+/// may in turn yield more unit clauses or even a contradiction.
+/// The total added complexity of this preprocessing is O(N) where we
+/// for every clause, we do a lookup for each unit clauses.
+/// The lookup is O(1) on average. This method won't catch all
+/// contradictory formulas, more passes can in principle catch
+/// more cases but we leave all these and the general case to the
+/// proper SAT solver.
+struct CNFFormulaBuilder {
+  // Formula should outlive CNFFormulaBuilder.
+  explicit CNFFormulaBuilder(CNFFormula &CNF)
+      : Formula(CNF) {}
+
+  /// Adds the `L1 v ... v Ln` clause to the formula. Applies
+  /// simplifications, based on single-literal clauses.
+  ///
+  /// Requirements:
+  ///
+  ///  `Li` must not be `NullLit`.
+  ///
+  ///  All literals must be distinct.
+  void addClause(ArrayRef<Literal> Literals) {
+    // We generate clauses with up to 3 literals in this file.
+    assert(!Literals.empty() && Literals.size() <= 3);
+    // Contains literals of the simplified clause.
+    llvm::SmallVector<Literal> Simplified;
+    for (auto L : Literals) {
+      assert(L != NullLit &&
+             llvm::all_of(Simplified,
+                          [L](Literal S) { return  S != L; }));
+      auto X = var(L);
+      if (trueVars.contains(X)) { // X must be true
+        if (isPosLit(L))
+          return; // Omit clause `(... v X v ...)`, it is `true`.
+        else
+          continue; // Omit `!X` from `(... v !X v ...)`.
+      }
+      if (falseVars.contains(X)) { // X must be false
+        if (isNegLit(L))
+          return; // Omit clause `(... v !X v ...)`, it is `true`.
+        else
+          continue; // Omit `X` from `(... v X v ...)`.
+      }
+      Simplified.push_back(L);
+    }
+    if (Simplified.empty()) {
+      // Simplification made the clause empty, which is equivalent to `false`.
+      // We already know that this formula is unsatisfiable.
+      Formula.KnownContradictory = true;
+      // We can add any of the input literals to get an unsatisfiable formula.
+      Formula.addClause(Literals[0]);
+      return;
+    }
+    if (Simplified.size() == 1) {
+      // We have new unit clause.
+      const Literal lit = Simplified.front();
+      const Variable v = var(lit);
+      if (isPosLit(lit))
+        trueVars.insert(v);
+      else
+        falseVars.insert(v);
+    }
+    Formula.addClause(Simplified);
+  }
+
+  /// Returns true if we observed a contradiction while adding clauses.
+  /// In this case then the formula is already known to be unsatisfiable.
+  bool isKnownContradictory() { return Formula.KnownContradictory; }
+
+private:
+  CNFFormula &Formula;
+  llvm::DenseSet<Variable> trueVars;
+  llvm::DenseSet<Variable> falseVars;
+};
+
 /// Converts the conjunction of `Vals` into a formula in conjunctive normal
 /// form where each clause has at least one and at most three literals.
 CNFFormula buildCNF(const llvm::ArrayRef<const Formula *> &Vals) {
@@ -218,11 +298,12 @@ CNFFormula buildCNF(const llvm::ArrayRef<const Formula *> &Vals) {
 
   CNFFormula CNF(NextVar - 1, std::move(Atomics));
   std::vector<bool> ProcessedSubVals(NextVar, false);
+  CNFFormulaBuilder builder(CNF);
 
-  // Add a conjunct for each variable that represents a top-level formula in
-  // `Vals`.
+  // Add a conjunct for each variable that represents a top-level conjunction
+  // value in `Vals`.
   for (const Formula *Val : Vals)
-    CNF.addClause(posLit(GetVar(Val)));
+    builder.addClause(posLit(GetVar(Val)));
 
   // Add conjuncts that represent the mapping between newly-created variables
   // and their corresponding sub-formulas.
@@ -249,15 +330,15 @@ CNFFormula buildCNF(const llvm::ArrayRef<const Formula *> &Vals) {
         // `X <=> (A ^ A)` is equivalent to `(!X v A) ^ (X v !A)` which is
         // already in conjunctive normal form. Below we add each of the
         // conjuncts of the latter expression to the result.
-        CNF.addClause(negLit(Var), posLit(LHS));
-        CNF.addClause(posLit(Var), negLit(LHS));
+        builder.addClause({negLit(Var), posLit(LHS)});
+        builder.addClause({posLit(Var), negLit(LHS)});
       } else {
-        // `X <=> (A ^ B)` is equivalent to `(!X v A) ^ (!X v B) ^ (X v !A v !B)`
-        // which is already in conjunctive normal form. Below we add each of the
-        // conjuncts of the latter expression to the result.
-        CNF.addClause(negLit(Var), posLit(LHS));
-        CNF.addClause(negLit(Var), posLit(RHS));
-        CNF.addClause(posLit(Var), negLit(LHS), negLit(RHS));
+        // `X <=> (A ^ B)` is equivalent to `(!X v A) ^ (!X v B) ^ (X v !A v
+        // !B)` which is already in conjunctive normal form. Below we add each
+        // of the conjuncts of the latter expression to the result.
+        builder.addClause({negLit(Var), posLit(LHS)});
+        builder.addClause({negLit(Var), posLit(RHS)});
+        builder.addClause({posLit(Var), negLit(LHS), negLit(RHS)});
       }
       break;
     }
@@ -269,15 +350,15 @@ CNFFormula buildCNF(const llvm::ArrayRef<const Formula *> &Vals) {
         // `X <=> (A v A)` is equivalent to `(!X v A) ^ (X v !A)` which is
         // already in conjunctive normal form. Below we add each of the
         // conjuncts of the latter expression to the result.
-        CNF.addClause(negLit(Var), posLit(LHS));
-        CNF.addClause(posLit(Var), negLit(LHS));
+        builder.addClause({negLit(Var), posLit(LHS)});
+        builder.addClause({posLit(Var), negLit(LHS)});
       } else {
         // `X <=> (A v B)` is equivalent to `(!X v A v B) ^ (X v !A) ^ (X v
         // !B)` which is already in conjunctive normal form. Below we add each
         // of the conjuncts of the latter expression to the result.
-        CNF.addClause(negLit(Var), posLit(LHS), posLit(RHS));
-        CNF.addClause(posLit(Var), negLit(LHS));
-        CNF.addClause(posLit(Var), negLit(RHS));
+        builder.addClause({negLit(Var), posLit(LHS), posLit(RHS)});
+        builder.addClause({posLit(Var), negLit(LHS)});
+        builder.addClause({posLit(Var), negLit(RHS)});
       }
       break;
     }
@@ -287,8 +368,8 @@ CNFFormula buildCNF(const llvm::ArrayRef<const Formula *> &Vals) {
       // `X <=> !Y` is equivalent to `(!X v !Y) ^ (X v Y)` which is
       // already in conjunctive normal form. Below we add each of the
       // conjuncts of the latter expression to the result.
-      CNF.addClause(negLit(Var), negLit(Operand));
-      CNF.addClause(posLit(Var), posLit(Operand));
+      builder.addClause({negLit(Var), negLit(Operand)});
+      builder.addClause({posLit(Var), posLit(Operand)});
       break;
     }
     case Formula::Implies: {
@@ -299,20 +380,20 @@ CNFFormula buildCNF(const llvm::ArrayRef<const Formula *> &Vals) {
       // `(X v A) ^ (X v !B) ^ (!X v !A v B)` which is already in
       // conjunctive normal form. Below we add each of the conjuncts of
       // the latter expression to the result.
-      CNF.addClause(posLit(Var), posLit(LHS));
-      CNF.addClause(posLit(Var), negLit(RHS));
-      CNF.addClause(negLit(Var), negLit(LHS), posLit(RHS));
+      builder.addClause({posLit(Var), posLit(LHS)});
+      builder.addClause({posLit(Var), negLit(RHS)});
+      builder.addClause({negLit(Var), negLit(LHS), posLit(RHS)});
       break;
     }
     case Formula::Equal: {
       const Variable LHS = GetVar(Val->operands()[0]);
       const Variable RHS = GetVar(Val->operands()[1]);
 
       if (LHS == RHS) {
-        // `X <=> (A <=> A)` is equvalent to `X` which is already in
+        // `X <=> (A <=> A)` is equivalent to `X` which is already in
         // conjunctive normal form. Below we add each of the conjuncts of the
         // latter expression to the result.
-        CNF.addClause(posLit(Var));
+        builder.addClause(posLit(Var));
 
         // No need to visit the sub-values of `Val`.
         continue;
@@ -321,18 +402,43 @@ CNFFormula buildCNF(const llvm::ArrayRef<const Formula *> &Vals) {
       // `(X v A v B) ^ (X v !A v !B) ^ (!X v A v !B) ^ (!X v !A v B)` which
       // is already in conjunctive normal form. Below we add each of the
       // conjuncts of the latter expression to the result.
-      CNF.addClause(posLit(Var), posLit(LHS), posLit(RHS));
-      CNF.addClause(posLit(Var), negLit(LHS), negLit(RHS));
-      CNF.addClause(negLit(Var), posLit(LHS), negLit(RHS));
-      CNF.addClause(negLit(Var), negLit(LHS), posLit(RHS));
+      builder.addClause({posLit(Var), posLit(LHS), posLit(RHS)});
+      builder.addClause({posLit(Var), negLit(LHS), negLit(RHS)});
+      builder.addClause({negLit(Var), posLit(LHS), negLit(RHS)});
+      builder.addClause({negLit(Var), negLit(LHS), posLit(RHS)});
       break;
     }
     }
+    if (builder.isKnownContradictory()) {
+      return CNF;
+    }
     for (const Formula *Child : Val->operands())
       UnprocessedSubVals.push(Child);
   }
 
-  return CNF;
+  // Unit clauses that were added later were not
+  // considered for the simplification of earlier clauses. Do a final
+  // pass to find more opportunities for simplification.
+  CNFFormula FinalCNF(NextVar - 1, std::move(CNF.Atomics));
+  CNFFormulaBuilder FinalBuilder(FinalCNF);
+
+  // Collect unit clauses.
+  for (ClauseID C = 1; C < CNF.ClauseStarts.size(); ++C) {
+    if (CNF.clauseSize(C) == 1) {
+      FinalBuilder.addClause(CNF.clauseLiterals(C)[0]);
+    }
+  }
+
+  // Add all clauses that were added previously, preserving the order.
+  for (ClauseID C = 1; C < CNF.ClauseStarts.size(); ++C) {
+    FinalBuilder.addClause(CNF.clauseLiterals(C));
+    if (FinalBuilder.isKnownContradictory()) {
+      break;
+    }
+  }
+  // It is possible there were new unit clauses again, but
+  // we stop here and leave the rest to the solver algorithm.
+  return FinalCNF;
 }
 
 class WatchedLiteralsSolverImpl {
@@ -414,6 +520,11 @@ class WatchedLiteralsSolverImpl {
   // Returns the `Result` and the number of iterations "remaining" from
   // `MaxIterations` (that is, `MaxIterations` - iterations in this call).
   std::pair<Solver::Result, std::int64_t> solve(std::int64_t MaxIterations) && {
+    if (CNF.KnownContradictory) {
+      // Short-cut the solving process. We already found out at CNF
+      // construction time that the formula is unsatisfiable.
+      return std::make_pair(Solver::Result::Unsatisfiable(), MaxIterations);
+    }
     size_t I = 0;
     while (I < ActiveVars.size()) {
       if (MaxIterations == 0)
@@ -503,7 +614,8 @@ class WatchedLiteralsSolverImpl {
         ++I;
       }
     }
-    return std::make_pair(Solver::Result::Satisfiable(buildSolution()), MaxIterations);
+    return std::make_pair(Solver::Result::Satisfiable(buildSolution()),
+                          MaxIterations);
   }
 
 private:
@@ -672,8 +784,7 @@ Solver::Result
 WatchedLiteralsSolver::solve(llvm::ArrayRef<const Formula *> Vals) {
   if (Vals.empty())
     return Solver::Result::Satisfiable({{}});
-  auto [Res, Iterations] =
-      WatchedLiteralsSolverImpl(Vals).solve(MaxIterations);
+  auto [Res, Iterations] = WatchedLiteralsSolverImpl(Vals).solve(MaxIterations);
   MaxIterations = Iterations;
   return Res;
 }
diff --git a/clang/unittests/Analysis/FlowSensitive/SolverTest.cpp b/clang/unittests/Analysis/FlowSensitive/SolverTest.cpp
@@ -369,4 +369,32 @@ TEST(SolverTest, ReachedLimitsReflectsTimeouts) {
   EXPECT_TRUE(solver.reachedLimit());
 }
 
+TEST(SolverTest, SimpleButLargeContradiction) {
+  // This test ensures that the solver takes a short-cut on known
+  // contradictory inputs, without using max_iterations. At the time
+  // this test is added, formulas that are easily recognized to be
+  // contradictory at CNF construction time would lead to timeout.
+  WatchedLiteralsSolver solver(10);
+  ConstraintContext Ctx;
+  auto first = Ctx.atom();
+  auto last = first;
+  for (int i = 1; i < 10000; ++i) {
+    last = Ctx.conj(last, Ctx.atom());
+  }
+  last = Ctx.conj(Ctx.neg(first), last);
+  ASSERT_EQ(solver.solve({last}).getStatus(),
+            Solver::Result::Status::Unsatisfiable);
+  EXPECT_FALSE(solver.reachedLimit());
+
+  first = Ctx.atom();
+  last = Ctx.neg(first);
+  for (int i = 1; i < 10000; ++i) {
+    last = Ctx.conj(last, Ctx.neg(Ctx.atom()));
+  }
+  last = Ctx.conj(first, last);
+  ASSERT_EQ(solver.solve({last}).getStatus(),
+            Solver::Result::Status::Unsatisfiable);
+  EXPECT_FALSE(solver.reachedLimit());
+}
+
 } // namespace