Skip to content

Commit 37813e0

Browse files
authored
[clang][dataflow] Make CNFFormula externally accessible. (#92401)
This component can be useful when creating implementations of `Solver`, as some SAT solvers require the input to be in 3-CNF. As part of making `CNFFormula` externally accessible, I have moved some member variables out of it that aren't really part of the representation of a 3-CNF formula and thus live better elsewhere: * `WatchedHead` and `NextWatched` have been moved to `WatchedLiteralsSolverImpl`, as they're part of the specific algorithm used by that SAT solver. * `Atomics` has become an output parameter of `buildCNF()` because it has to do with the relationship between a `CNFFormula` and the set of `Formula`s it is derived from rather than being an integral part of the representation of a 3-CNF formula. I have also made all member variables private and added appropriate accessors.
1 parent 352dc7d commit 37813e0

File tree

7 files changed

+543
-433
lines changed

7 files changed

+543
-433
lines changed

clang/docs/tools/clang-formatted-files.txt

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -124,6 +124,7 @@ clang/include/clang/Analysis/Analyses/CFGReachabilityAnalysis.h
124124
clang/include/clang/Analysis/Analyses/ExprMutationAnalyzer.h
125125
clang/include/clang/Analysis/FlowSensitive/AdornedCFG.h
126126
clang/include/clang/Analysis/FlowSensitive/ASTOps.h
127+
clang/include/clang/Analysis/FlowSensitive/CNFFormula.h
127128
clang/include/clang/Analysis/FlowSensitive/DataflowAnalysis.h
128129
clang/include/clang/Analysis/FlowSensitive/DataflowAnalysisContext.h
129130
clang/include/clang/Analysis/FlowSensitive/DataflowEnvironment.h
@@ -621,6 +622,7 @@ clang/tools/libclang/CXCursor.h
621622
clang/tools/scan-build-py/tests/functional/src/include/clean-one.h
622623
clang/unittests/Analysis/CFGBuildResult.h
623624
clang/unittests/Analysis/MacroExpansionContextTest.cpp
625+
clang/unittests/Analysis/FlowSensitive/CNFFormula.cpp
624626
clang/unittests/Analysis/FlowSensitive/DataflowAnalysisContextTest.cpp
625627
clang/unittests/Analysis/FlowSensitive/DataflowEnvironmentTest.cpp
626628
clang/unittests/Analysis/FlowSensitive/MapLatticeTest.cpp
@@ -632,6 +634,7 @@ clang/unittests/Analysis/FlowSensitive/TestingSupport.cpp
632634
clang/unittests/Analysis/FlowSensitive/TestingSupport.h
633635
clang/unittests/Analysis/FlowSensitive/TestingSupportTest.cpp
634636
clang/unittests/Analysis/FlowSensitive/TypeErasedDataflowAnalysisTest.cpp
637+
clang/unittests/Analysis/FlowSensitive/WatchedLiteralsSolver.cpp
635638
clang/unittests/Analysis/FlowSensitive/WatchedLiteralsSolverTest.cpp
636639
clang/unittests/AST/ASTImporterFixtures.cpp
637640
clang/unittests/AST/ASTImporterFixtures.h
Lines changed: 179 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,179 @@
1+
//===- CNFFormula.h ---------------------------------------------*- C++ -*-===//
2+
//
3+
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4+
// See https://llvm.org/LICENSE.txt for license information.
5+
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6+
//
7+
//===----------------------------------------------------------------------===//
8+
//
9+
// A representation of a boolean formula in 3-CNF.
10+
//
11+
//===----------------------------------------------------------------------===//
12+
13+
#ifndef LLVM_CLANG_ANALYSIS_FLOWSENSITIVE_CNFFORMULA_H
14+
#define LLVM_CLANG_ANALYSIS_FLOWSENSITIVE_CNFFORMULA_H
15+
16+
#include <cstdint>
17+
#include <vector>
18+
19+
#include "clang/Analysis/FlowSensitive/Formula.h"
20+
21+
namespace clang {
22+
namespace dataflow {
23+
24+
/// Boolean variables are represented as positive integers.
25+
using Variable = uint32_t;
26+
27+
/// A null boolean variable is used as a placeholder in various data structures
28+
/// and algorithms.
29+
constexpr Variable NullVar = 0;
30+
31+
/// Literals are represented as positive integers. Specifically, for a boolean
32+
/// variable `V` that is represented as the positive integer `I`, the positive
33+
/// literal `V` is represented as the integer `2*I` and the negative literal
34+
/// `!V` is represented as the integer `2*I+1`.
35+
using Literal = uint32_t;
36+
37+
/// A null literal is used as a placeholder in various data structures and
38+
/// algorithms.
39+
constexpr Literal NullLit = 0;
40+
41+
/// Clause identifiers are represented as positive integers.
42+
using ClauseID = uint32_t;
43+
44+
/// A null clause identifier is used as a placeholder in various data structures
45+
/// and algorithms.
46+
constexpr ClauseID NullClause = 0;
47+
48+
/// Returns the positive literal `V`.
49+
inline constexpr Literal posLit(Variable V) { return 2 * V; }
50+
51+
/// Returns the negative literal `!V`.
52+
inline constexpr Literal negLit(Variable V) { return 2 * V + 1; }
53+
54+
/// Returns whether `L` is a positive literal.
55+
inline constexpr bool isPosLit(Literal L) { return 0 == (L & 1); }
56+
57+
/// Returns whether `L` is a negative literal.
58+
inline constexpr bool isNegLit(Literal L) { return 1 == (L & 1); }
59+
60+
/// Returns the negated literal `!L`.
61+
inline constexpr Literal notLit(Literal L) { return L ^ 1; }
62+
63+
/// Returns the variable of `L`.
64+
inline constexpr Variable var(Literal L) { return L >> 1; }
65+
66+
/// A boolean formula in 3-CNF (conjunctive normal form with at most 3 literals
67+
/// per clause).
68+
class CNFFormula {
69+
/// `LargestVar` is equal to the largest positive integer that represents a
70+
/// variable in the formula.
71+
const Variable LargestVar;
72+
73+
/// Literals of all clauses in the formula.
74+
///
75+
/// The element at index 0 stands for the literal in the null clause. It is
76+
/// set to 0 and isn't used. Literals of clauses in the formula start from the
77+
/// element at index 1.
78+
///
79+
/// For example, for the formula `(L1 v L2) ^ (L2 v L3 v L4)` the elements of
80+
/// `Clauses` will be `[0, L1, L2, L2, L3, L4]`.
81+
std::vector<Literal> Clauses;
82+
83+
/// Start indices of clauses of the formula in `Clauses`.
84+
///
85+
/// The element at index 0 stands for the start index of the null clause. It
86+
/// is set to 0 and isn't used. Start indices of clauses in the formula start
87+
/// from the element at index 1.
88+
///
89+
/// For example, for the formula `(L1 v L2) ^ (L2 v L3 v L4)` the elements of
90+
/// `ClauseStarts` will be `[0, 1, 3]`. Note that the literals of the first
91+
/// clause always start at index 1. The start index for the literals of the
92+
/// second clause depends on the size of the first clause and so on.
93+
std::vector<size_t> ClauseStarts;
94+
95+
/// Indicates that we already know the formula is unsatisfiable.
96+
/// During construction, we catch simple cases of conflicting unit-clauses.
97+
bool KnownContradictory;
98+
99+
public:
100+
explicit CNFFormula(Variable LargestVar);
101+
102+
/// Adds the `L1 v ... v Ln` clause to the formula.
103+
/// Requirements:
104+
///
105+
/// `Li` must not be `NullLit`.
106+
///
107+
/// All literals in the input that are not `NullLit` must be distinct.
108+
void addClause(ArrayRef<Literal> lits);
109+
110+
/// Returns whether the formula is known to be contradictory.
111+
/// This is the case if any of the clauses is empty.
112+
bool knownContradictory() const { return KnownContradictory; }
113+
114+
/// Returns the largest variable in the formula.
115+
Variable largestVar() const { return LargestVar; }
116+
117+
/// Returns the number of clauses in the formula.
118+
/// Valid clause IDs are in the range [1, `numClauses()`].
119+
ClauseID numClauses() const { return ClauseStarts.size() - 1; }
120+
121+
/// Returns the number of literals in clause `C`.
122+
size_t clauseSize(ClauseID C) const {
123+
return C == ClauseStarts.size() - 1 ? Clauses.size() - ClauseStarts[C]
124+
: ClauseStarts[C + 1] - ClauseStarts[C];
125+
}
126+
127+
/// Returns the literals of clause `C`.
128+
/// If `knownContradictory()` is false, each clause has at least one literal.
129+
llvm::ArrayRef<Literal> clauseLiterals(ClauseID C) const {
130+
size_t S = clauseSize(C);
131+
if (S == 0)
132+
return llvm::ArrayRef<Literal>();
133+
return llvm::ArrayRef<Literal>(&Clauses[ClauseStarts[C]], S);
134+
}
135+
136+
/// An iterator over all literals of all clauses in the formula.
137+
/// The iterator allows mutation of the literal through the `*` operator.
138+
/// This is to support solvers that mutate the formula during solving.
139+
class Iterator {
140+
friend class CNFFormula;
141+
CNFFormula *CNF;
142+
size_t Idx;
143+
Iterator(CNFFormula *CNF, size_t Idx) : CNF(CNF), Idx(Idx) {}
144+
145+
public:
146+
Iterator(const Iterator &) = default;
147+
Iterator &operator=(const Iterator &) = default;
148+
149+
Iterator &operator++() {
150+
++Idx;
151+
assert(Idx < CNF->Clauses.size() && "Iterator out of bounds");
152+
return *this;
153+
}
154+
155+
Iterator next() const {
156+
Iterator I = *this;
157+
++I;
158+
return I;
159+
}
160+
161+
Literal &operator*() const { return CNF->Clauses[Idx]; }
162+
};
163+
friend class Iterator;
164+
165+
/// Returns an iterator to the first literal of clause `C`.
166+
Iterator startOfClause(ClauseID C) { return Iterator(this, ClauseStarts[C]); }
167+
};
168+
169+
/// Converts the conjunction of `Vals` into a formula in conjunctive normal
170+
/// form where each clause has at least one and at most three literals.
171+
/// `Atomics` is populated with a mapping from `Variables` to the corresponding
172+
/// `Atom`s for atomic booleans in the input formulas.
173+
CNFFormula buildCNF(const llvm::ArrayRef<const Formula *> &Formulas,
174+
llvm::DenseMap<Variable, Atom> &Atomics);
175+
176+
} // namespace dataflow
177+
} // namespace clang
178+
179+
#endif // LLVM_CLANG_ANALYSIS_FLOWSENSITIVE_CNFFORMULA_H

clang/include/clang/Analysis/FlowSensitive/WatchedLiteralsSolver.h

Lines changed: 4 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -17,16 +17,17 @@
1717
#include "clang/Analysis/FlowSensitive/Formula.h"
1818
#include "clang/Analysis/FlowSensitive/Solver.h"
1919
#include "llvm/ADT/ArrayRef.h"
20-
#include <limits>
2120

2221
namespace clang {
2322
namespace dataflow {
2423

2524
/// A SAT solver that is an implementation of Algorithm D from Knuth's The Art
2625
/// of Computer Programming Volume 4: Satisfiability, Fascicle 6. It is based on
27-
/// the Davis-Putnam-Logemann-Loveland (DPLL) algorithm, keeps references to a
28-
/// single "watched" literal per clause, and uses a set of "active" variables
26+
/// the Davis-Putnam-Logemann-Loveland (DPLL) algorithm [1], keeps references to
27+
/// a single "watched" literal per clause, and uses a set of "active" variables
2928
/// for unit propagation.
29+
//
30+
// [1] https://en.wikipedia.org/wiki/DPLL_algorithm
3031
class WatchedLiteralsSolver : public Solver {
3132
// Count of the iterations of the main loop of the solver. This spans *all*
3233
// calls to the underlying solver across the life of this object. It is

clang/lib/Analysis/FlowSensitive/CMakeLists.txt

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,7 @@ add_clang_library(clangAnalysisFlowSensitive
22
AdornedCFG.cpp
33
Arena.cpp
44
ASTOps.cpp
5+
CNFFormula.cpp
56
DataflowAnalysisContext.cpp
67
DataflowEnvironment.cpp
78
Formula.cpp

0 commit comments

Comments
 (0)