xref: /freebsd-src/contrib/llvm-project/clang/lib/Analysis/FlowSensitive/CNFFormula.cpp (revision 0fca6ea1d4eea4c934cfff25ac9ee8ad6fe95583)
1*0fca6ea1SDimitry Andric //===- CNFFormula.cpp -------------------------------------------*- C++ -*-===//
2*0fca6ea1SDimitry Andric //
3*0fca6ea1SDimitry Andric // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4*0fca6ea1SDimitry Andric // See https://llvm.org/LICENSE.txt for license information.
5*0fca6ea1SDimitry Andric // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6*0fca6ea1SDimitry Andric //
7*0fca6ea1SDimitry Andric //===----------------------------------------------------------------------===//
8*0fca6ea1SDimitry Andric //
9*0fca6ea1SDimitry Andric //  A representation of a boolean formula in 3-CNF.
10*0fca6ea1SDimitry Andric //
11*0fca6ea1SDimitry Andric //===----------------------------------------------------------------------===//
12*0fca6ea1SDimitry Andric 
13*0fca6ea1SDimitry Andric #include "clang/Analysis/FlowSensitive/CNFFormula.h"
14*0fca6ea1SDimitry Andric #include "llvm/ADT/DenseSet.h"
15*0fca6ea1SDimitry Andric 
16*0fca6ea1SDimitry Andric #include <queue>
17*0fca6ea1SDimitry Andric 
18*0fca6ea1SDimitry Andric namespace clang {
19*0fca6ea1SDimitry Andric namespace dataflow {
20*0fca6ea1SDimitry Andric 
21*0fca6ea1SDimitry Andric namespace {
22*0fca6ea1SDimitry Andric 
23*0fca6ea1SDimitry Andric /// Applies simplifications while building up a BooleanFormula.
24*0fca6ea1SDimitry Andric /// We keep track of unit clauses, which tell us variables that must be
25*0fca6ea1SDimitry Andric /// true/false in any model that satisfies the overall formula.
26*0fca6ea1SDimitry Andric /// Such variables can be dropped from subsequently-added clauses, which
27*0fca6ea1SDimitry Andric /// may in turn yield more unit clauses or even a contradiction.
28*0fca6ea1SDimitry Andric /// The total added complexity of this preprocessing is O(N) where we
29*0fca6ea1SDimitry Andric /// for every clause, we do a lookup for each unit clauses.
30*0fca6ea1SDimitry Andric /// The lookup is O(1) on average. This method won't catch all
31*0fca6ea1SDimitry Andric /// contradictory formulas, more passes can in principle catch
32*0fca6ea1SDimitry Andric /// more cases but we leave all these and the general case to the
33*0fca6ea1SDimitry Andric /// proper SAT solver.
34*0fca6ea1SDimitry Andric struct CNFFormulaBuilder {
35*0fca6ea1SDimitry Andric   // Formula should outlive CNFFormulaBuilder.
36*0fca6ea1SDimitry Andric   explicit CNFFormulaBuilder(CNFFormula &CNF) : Formula(CNF) {}
37*0fca6ea1SDimitry Andric 
38*0fca6ea1SDimitry Andric   /// Adds the `L1 v ... v Ln` clause to the formula. Applies
39*0fca6ea1SDimitry Andric   /// simplifications, based on single-literal clauses.
40*0fca6ea1SDimitry Andric   ///
41*0fca6ea1SDimitry Andric   /// Requirements:
42*0fca6ea1SDimitry Andric   ///
43*0fca6ea1SDimitry Andric   ///  `Li` must not be `NullLit`.
44*0fca6ea1SDimitry Andric   ///
45*0fca6ea1SDimitry Andric   ///  All literals must be distinct.
46*0fca6ea1SDimitry Andric   void addClause(ArrayRef<Literal> Literals) {
47*0fca6ea1SDimitry Andric     // We generate clauses with up to 3 literals in this file.
48*0fca6ea1SDimitry Andric     assert(!Literals.empty() && Literals.size() <= 3);
49*0fca6ea1SDimitry Andric     // Contains literals of the simplified clause.
50*0fca6ea1SDimitry Andric     llvm::SmallVector<Literal> Simplified;
51*0fca6ea1SDimitry Andric     for (auto L : Literals) {
52*0fca6ea1SDimitry Andric       assert(L != NullLit &&
53*0fca6ea1SDimitry Andric              llvm::all_of(Simplified, [L](Literal S) { return S != L; }));
54*0fca6ea1SDimitry Andric       auto X = var(L);
55*0fca6ea1SDimitry Andric       if (trueVars.contains(X)) { // X must be true
56*0fca6ea1SDimitry Andric         if (isPosLit(L))
57*0fca6ea1SDimitry Andric           return; // Omit clause `(... v X v ...)`, it is `true`.
58*0fca6ea1SDimitry Andric         else
59*0fca6ea1SDimitry Andric           continue; // Omit `!X` from `(... v !X v ...)`.
60*0fca6ea1SDimitry Andric       }
61*0fca6ea1SDimitry Andric       if (falseVars.contains(X)) { // X must be false
62*0fca6ea1SDimitry Andric         if (isNegLit(L))
63*0fca6ea1SDimitry Andric           return; // Omit clause `(... v !X v ...)`, it is `true`.
64*0fca6ea1SDimitry Andric         else
65*0fca6ea1SDimitry Andric           continue; // Omit `X` from `(... v X v ...)`.
66*0fca6ea1SDimitry Andric       }
67*0fca6ea1SDimitry Andric       Simplified.push_back(L);
68*0fca6ea1SDimitry Andric     }
69*0fca6ea1SDimitry Andric     if (Simplified.empty()) {
70*0fca6ea1SDimitry Andric       // Simplification made the clause empty, which is equivalent to `false`.
71*0fca6ea1SDimitry Andric       // We already know that this formula is unsatisfiable.
72*0fca6ea1SDimitry Andric       Formula.addClause(Simplified);
73*0fca6ea1SDimitry Andric       return;
74*0fca6ea1SDimitry Andric     }
75*0fca6ea1SDimitry Andric     if (Simplified.size() == 1) {
76*0fca6ea1SDimitry Andric       // We have new unit clause.
77*0fca6ea1SDimitry Andric       const Literal lit = Simplified.front();
78*0fca6ea1SDimitry Andric       const Variable v = var(lit);
79*0fca6ea1SDimitry Andric       if (isPosLit(lit))
80*0fca6ea1SDimitry Andric         trueVars.insert(v);
81*0fca6ea1SDimitry Andric       else
82*0fca6ea1SDimitry Andric         falseVars.insert(v);
83*0fca6ea1SDimitry Andric     }
84*0fca6ea1SDimitry Andric     Formula.addClause(Simplified);
85*0fca6ea1SDimitry Andric   }
86*0fca6ea1SDimitry Andric 
87*0fca6ea1SDimitry Andric   /// Returns true if we observed a contradiction while adding clauses.
88*0fca6ea1SDimitry Andric   /// In this case then the formula is already known to be unsatisfiable.
89*0fca6ea1SDimitry Andric   bool isKnownContradictory() { return Formula.knownContradictory(); }
90*0fca6ea1SDimitry Andric 
91*0fca6ea1SDimitry Andric private:
92*0fca6ea1SDimitry Andric   CNFFormula &Formula;
93*0fca6ea1SDimitry Andric   llvm::DenseSet<Variable> trueVars;
94*0fca6ea1SDimitry Andric   llvm::DenseSet<Variable> falseVars;
95*0fca6ea1SDimitry Andric };
96*0fca6ea1SDimitry Andric 
97*0fca6ea1SDimitry Andric } // namespace
98*0fca6ea1SDimitry Andric 
99*0fca6ea1SDimitry Andric CNFFormula::CNFFormula(Variable LargestVar)
100*0fca6ea1SDimitry Andric     : LargestVar(LargestVar), KnownContradictory(false) {
101*0fca6ea1SDimitry Andric   Clauses.push_back(0);
102*0fca6ea1SDimitry Andric   ClauseStarts.push_back(0);
103*0fca6ea1SDimitry Andric }
104*0fca6ea1SDimitry Andric 
105*0fca6ea1SDimitry Andric void CNFFormula::addClause(ArrayRef<Literal> lits) {
106*0fca6ea1SDimitry Andric   assert(llvm::all_of(lits, [](Literal L) { return L != NullLit; }));
107*0fca6ea1SDimitry Andric 
108*0fca6ea1SDimitry Andric   if (lits.empty())
109*0fca6ea1SDimitry Andric     KnownContradictory = true;
110*0fca6ea1SDimitry Andric 
111*0fca6ea1SDimitry Andric   const size_t S = Clauses.size();
112*0fca6ea1SDimitry Andric   ClauseStarts.push_back(S);
113*0fca6ea1SDimitry Andric   Clauses.insert(Clauses.end(), lits.begin(), lits.end());
114*0fca6ea1SDimitry Andric }
115*0fca6ea1SDimitry Andric 
116*0fca6ea1SDimitry Andric CNFFormula buildCNF(const llvm::ArrayRef<const Formula *> &Formulas,
117*0fca6ea1SDimitry Andric                     llvm::DenseMap<Variable, Atom> &Atomics) {
118*0fca6ea1SDimitry Andric   // The general strategy of the algorithm implemented below is to map each
119*0fca6ea1SDimitry Andric   // of the sub-values in `Vals` to a unique variable and use these variables in
120*0fca6ea1SDimitry Andric   // the resulting CNF expression to avoid exponential blow up. The number of
121*0fca6ea1SDimitry Andric   // literals in the resulting formula is guaranteed to be linear in the number
122*0fca6ea1SDimitry Andric   // of sub-formulas in `Vals`.
123*0fca6ea1SDimitry Andric 
124*0fca6ea1SDimitry Andric   // Map each sub-formula in `Vals` to a unique variable.
125*0fca6ea1SDimitry Andric   llvm::DenseMap<const Formula *, Variable> FormulaToVar;
126*0fca6ea1SDimitry Andric   // Store variable identifiers and Atom of atomic booleans.
127*0fca6ea1SDimitry Andric   Variable NextVar = 1;
128*0fca6ea1SDimitry Andric   {
129*0fca6ea1SDimitry Andric     std::queue<const Formula *> UnprocessedFormulas;
130*0fca6ea1SDimitry Andric     for (const Formula *F : Formulas)
131*0fca6ea1SDimitry Andric       UnprocessedFormulas.push(F);
132*0fca6ea1SDimitry Andric     while (!UnprocessedFormulas.empty()) {
133*0fca6ea1SDimitry Andric       Variable Var = NextVar;
134*0fca6ea1SDimitry Andric       const Formula *F = UnprocessedFormulas.front();
135*0fca6ea1SDimitry Andric       UnprocessedFormulas.pop();
136*0fca6ea1SDimitry Andric 
137*0fca6ea1SDimitry Andric       if (!FormulaToVar.try_emplace(F, Var).second)
138*0fca6ea1SDimitry Andric         continue;
139*0fca6ea1SDimitry Andric       ++NextVar;
140*0fca6ea1SDimitry Andric 
141*0fca6ea1SDimitry Andric       for (const Formula *Op : F->operands())
142*0fca6ea1SDimitry Andric         UnprocessedFormulas.push(Op);
143*0fca6ea1SDimitry Andric       if (F->kind() == Formula::AtomRef)
144*0fca6ea1SDimitry Andric         Atomics[Var] = F->getAtom();
145*0fca6ea1SDimitry Andric     }
146*0fca6ea1SDimitry Andric   }
147*0fca6ea1SDimitry Andric 
148*0fca6ea1SDimitry Andric   auto GetVar = [&FormulaToVar](const Formula *F) {
149*0fca6ea1SDimitry Andric     auto ValIt = FormulaToVar.find(F);
150*0fca6ea1SDimitry Andric     assert(ValIt != FormulaToVar.end());
151*0fca6ea1SDimitry Andric     return ValIt->second;
152*0fca6ea1SDimitry Andric   };
153*0fca6ea1SDimitry Andric 
154*0fca6ea1SDimitry Andric   CNFFormula CNF(NextVar - 1);
155*0fca6ea1SDimitry Andric   std::vector<bool> ProcessedSubVals(NextVar, false);
156*0fca6ea1SDimitry Andric   CNFFormulaBuilder builder(CNF);
157*0fca6ea1SDimitry Andric 
158*0fca6ea1SDimitry Andric   // Add a conjunct for each variable that represents a top-level conjunction
159*0fca6ea1SDimitry Andric   // value in `Vals`.
160*0fca6ea1SDimitry Andric   for (const Formula *F : Formulas)
161*0fca6ea1SDimitry Andric     builder.addClause(posLit(GetVar(F)));
162*0fca6ea1SDimitry Andric 
163*0fca6ea1SDimitry Andric   // Add conjuncts that represent the mapping between newly-created variables
164*0fca6ea1SDimitry Andric   // and their corresponding sub-formulas.
165*0fca6ea1SDimitry Andric   std::queue<const Formula *> UnprocessedFormulas;
166*0fca6ea1SDimitry Andric   for (const Formula *F : Formulas)
167*0fca6ea1SDimitry Andric     UnprocessedFormulas.push(F);
168*0fca6ea1SDimitry Andric   while (!UnprocessedFormulas.empty()) {
169*0fca6ea1SDimitry Andric     const Formula *F = UnprocessedFormulas.front();
170*0fca6ea1SDimitry Andric     UnprocessedFormulas.pop();
171*0fca6ea1SDimitry Andric     const Variable Var = GetVar(F);
172*0fca6ea1SDimitry Andric 
173*0fca6ea1SDimitry Andric     if (ProcessedSubVals[Var])
174*0fca6ea1SDimitry Andric       continue;
175*0fca6ea1SDimitry Andric     ProcessedSubVals[Var] = true;
176*0fca6ea1SDimitry Andric 
177*0fca6ea1SDimitry Andric     switch (F->kind()) {
178*0fca6ea1SDimitry Andric     case Formula::AtomRef:
179*0fca6ea1SDimitry Andric       break;
180*0fca6ea1SDimitry Andric     case Formula::Literal:
181*0fca6ea1SDimitry Andric       CNF.addClause(F->literal() ? posLit(Var) : negLit(Var));
182*0fca6ea1SDimitry Andric       break;
183*0fca6ea1SDimitry Andric     case Formula::And: {
184*0fca6ea1SDimitry Andric       const Variable LHS = GetVar(F->operands()[0]);
185*0fca6ea1SDimitry Andric       const Variable RHS = GetVar(F->operands()[1]);
186*0fca6ea1SDimitry Andric 
187*0fca6ea1SDimitry Andric       if (LHS == RHS) {
188*0fca6ea1SDimitry Andric         // `X <=> (A ^ A)` is equivalent to `(!X v A) ^ (X v !A)` which is
189*0fca6ea1SDimitry Andric         // already in conjunctive normal form. Below we add each of the
190*0fca6ea1SDimitry Andric         // conjuncts of the latter expression to the result.
191*0fca6ea1SDimitry Andric         builder.addClause({negLit(Var), posLit(LHS)});
192*0fca6ea1SDimitry Andric         builder.addClause({posLit(Var), negLit(LHS)});
193*0fca6ea1SDimitry Andric       } else {
194*0fca6ea1SDimitry Andric         // `X <=> (A ^ B)` is equivalent to `(!X v A) ^ (!X v B) ^ (X v !A v
195*0fca6ea1SDimitry Andric         // !B)` which is already in conjunctive normal form. Below we add each
196*0fca6ea1SDimitry Andric         // of the conjuncts of the latter expression to the result.
197*0fca6ea1SDimitry Andric         builder.addClause({negLit(Var), posLit(LHS)});
198*0fca6ea1SDimitry Andric         builder.addClause({negLit(Var), posLit(RHS)});
199*0fca6ea1SDimitry Andric         builder.addClause({posLit(Var), negLit(LHS), negLit(RHS)});
200*0fca6ea1SDimitry Andric       }
201*0fca6ea1SDimitry Andric       break;
202*0fca6ea1SDimitry Andric     }
203*0fca6ea1SDimitry Andric     case Formula::Or: {
204*0fca6ea1SDimitry Andric       const Variable LHS = GetVar(F->operands()[0]);
205*0fca6ea1SDimitry Andric       const Variable RHS = GetVar(F->operands()[1]);
206*0fca6ea1SDimitry Andric 
207*0fca6ea1SDimitry Andric       if (LHS == RHS) {
208*0fca6ea1SDimitry Andric         // `X <=> (A v A)` is equivalent to `(!X v A) ^ (X v !A)` which is
209*0fca6ea1SDimitry Andric         // already in conjunctive normal form. Below we add each of the
210*0fca6ea1SDimitry Andric         // conjuncts of the latter expression to the result.
211*0fca6ea1SDimitry Andric         builder.addClause({negLit(Var), posLit(LHS)});
212*0fca6ea1SDimitry Andric         builder.addClause({posLit(Var), negLit(LHS)});
213*0fca6ea1SDimitry Andric       } else {
214*0fca6ea1SDimitry Andric         // `X <=> (A v B)` is equivalent to `(!X v A v B) ^ (X v !A) ^ (X v
215*0fca6ea1SDimitry Andric         // !B)` which is already in conjunctive normal form. Below we add each
216*0fca6ea1SDimitry Andric         // of the conjuncts of the latter expression to the result.
217*0fca6ea1SDimitry Andric         builder.addClause({negLit(Var), posLit(LHS), posLit(RHS)});
218*0fca6ea1SDimitry Andric         builder.addClause({posLit(Var), negLit(LHS)});
219*0fca6ea1SDimitry Andric         builder.addClause({posLit(Var), negLit(RHS)});
220*0fca6ea1SDimitry Andric       }
221*0fca6ea1SDimitry Andric       break;
222*0fca6ea1SDimitry Andric     }
223*0fca6ea1SDimitry Andric     case Formula::Not: {
224*0fca6ea1SDimitry Andric       const Variable Operand = GetVar(F->operands()[0]);
225*0fca6ea1SDimitry Andric 
226*0fca6ea1SDimitry Andric       // `X <=> !Y` is equivalent to `(!X v !Y) ^ (X v Y)` which is
227*0fca6ea1SDimitry Andric       // already in conjunctive normal form. Below we add each of the
228*0fca6ea1SDimitry Andric       // conjuncts of the latter expression to the result.
229*0fca6ea1SDimitry Andric       builder.addClause({negLit(Var), negLit(Operand)});
230*0fca6ea1SDimitry Andric       builder.addClause({posLit(Var), posLit(Operand)});
231*0fca6ea1SDimitry Andric       break;
232*0fca6ea1SDimitry Andric     }
233*0fca6ea1SDimitry Andric     case Formula::Implies: {
234*0fca6ea1SDimitry Andric       const Variable LHS = GetVar(F->operands()[0]);
235*0fca6ea1SDimitry Andric       const Variable RHS = GetVar(F->operands()[1]);
236*0fca6ea1SDimitry Andric 
237*0fca6ea1SDimitry Andric       // `X <=> (A => B)` is equivalent to
238*0fca6ea1SDimitry Andric       // `(X v A) ^ (X v !B) ^ (!X v !A v B)` which is already in
239*0fca6ea1SDimitry Andric       // conjunctive normal form. Below we add each of the conjuncts of
240*0fca6ea1SDimitry Andric       // the latter expression to the result.
241*0fca6ea1SDimitry Andric       builder.addClause({posLit(Var), posLit(LHS)});
242*0fca6ea1SDimitry Andric       builder.addClause({posLit(Var), negLit(RHS)});
243*0fca6ea1SDimitry Andric       builder.addClause({negLit(Var), negLit(LHS), posLit(RHS)});
244*0fca6ea1SDimitry Andric       break;
245*0fca6ea1SDimitry Andric     }
246*0fca6ea1SDimitry Andric     case Formula::Equal: {
247*0fca6ea1SDimitry Andric       const Variable LHS = GetVar(F->operands()[0]);
248*0fca6ea1SDimitry Andric       const Variable RHS = GetVar(F->operands()[1]);
249*0fca6ea1SDimitry Andric 
250*0fca6ea1SDimitry Andric       if (LHS == RHS) {
251*0fca6ea1SDimitry Andric         // `X <=> (A <=> A)` is equivalent to `X` which is already in
252*0fca6ea1SDimitry Andric         // conjunctive normal form. Below we add each of the conjuncts of the
253*0fca6ea1SDimitry Andric         // latter expression to the result.
254*0fca6ea1SDimitry Andric         builder.addClause(posLit(Var));
255*0fca6ea1SDimitry Andric 
256*0fca6ea1SDimitry Andric         // No need to visit the sub-values of `Val`.
257*0fca6ea1SDimitry Andric         continue;
258*0fca6ea1SDimitry Andric       }
259*0fca6ea1SDimitry Andric       // `X <=> (A <=> B)` is equivalent to
260*0fca6ea1SDimitry Andric       // `(X v A v B) ^ (X v !A v !B) ^ (!X v A v !B) ^ (!X v !A v B)` which
261*0fca6ea1SDimitry Andric       // is already in conjunctive normal form. Below we add each of the
262*0fca6ea1SDimitry Andric       // conjuncts of the latter expression to the result.
263*0fca6ea1SDimitry Andric       builder.addClause({posLit(Var), posLit(LHS), posLit(RHS)});
264*0fca6ea1SDimitry Andric       builder.addClause({posLit(Var), negLit(LHS), negLit(RHS)});
265*0fca6ea1SDimitry Andric       builder.addClause({negLit(Var), posLit(LHS), negLit(RHS)});
266*0fca6ea1SDimitry Andric       builder.addClause({negLit(Var), negLit(LHS), posLit(RHS)});
267*0fca6ea1SDimitry Andric       break;
268*0fca6ea1SDimitry Andric     }
269*0fca6ea1SDimitry Andric     }
270*0fca6ea1SDimitry Andric     if (builder.isKnownContradictory()) {
271*0fca6ea1SDimitry Andric       return CNF;
272*0fca6ea1SDimitry Andric     }
273*0fca6ea1SDimitry Andric     for (const Formula *Child : F->operands())
274*0fca6ea1SDimitry Andric       UnprocessedFormulas.push(Child);
275*0fca6ea1SDimitry Andric   }
276*0fca6ea1SDimitry Andric 
277*0fca6ea1SDimitry Andric   // Unit clauses that were added later were not
278*0fca6ea1SDimitry Andric   // considered for the simplification of earlier clauses. Do a final
279*0fca6ea1SDimitry Andric   // pass to find more opportunities for simplification.
280*0fca6ea1SDimitry Andric   CNFFormula FinalCNF(NextVar - 1);
281*0fca6ea1SDimitry Andric   CNFFormulaBuilder FinalBuilder(FinalCNF);
282*0fca6ea1SDimitry Andric 
283*0fca6ea1SDimitry Andric   // Collect unit clauses.
284*0fca6ea1SDimitry Andric   for (ClauseID C = 1; C <= CNF.numClauses(); ++C) {
285*0fca6ea1SDimitry Andric     if (CNF.clauseSize(C) == 1) {
286*0fca6ea1SDimitry Andric       FinalBuilder.addClause(CNF.clauseLiterals(C)[0]);
287*0fca6ea1SDimitry Andric     }
288*0fca6ea1SDimitry Andric   }
289*0fca6ea1SDimitry Andric 
290*0fca6ea1SDimitry Andric   // Add all clauses that were added previously, preserving the order.
291*0fca6ea1SDimitry Andric   for (ClauseID C = 1; C <= CNF.numClauses(); ++C) {
292*0fca6ea1SDimitry Andric     FinalBuilder.addClause(CNF.clauseLiterals(C));
293*0fca6ea1SDimitry Andric     if (FinalBuilder.isKnownContradictory()) {
294*0fca6ea1SDimitry Andric       break;
295*0fca6ea1SDimitry Andric     }
296*0fca6ea1SDimitry Andric   }
297*0fca6ea1SDimitry Andric   // It is possible there were new unit clauses again, but
298*0fca6ea1SDimitry Andric   // we stop here and leave the rest to the solver algorithm.
299*0fca6ea1SDimitry Andric   return FinalCNF;
300*0fca6ea1SDimitry Andric }
301*0fca6ea1SDimitry Andric 
302*0fca6ea1SDimitry Andric } // namespace dataflow
303*0fca6ea1SDimitry Andric } // namespace clang
304