1*0fca6ea1SDimitry Andric //===- CNFFormula.cpp -------------------------------------------*- C++ -*-===// 2*0fca6ea1SDimitry Andric // 3*0fca6ea1SDimitry Andric // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4*0fca6ea1SDimitry Andric // See https://llvm.org/LICENSE.txt for license information. 5*0fca6ea1SDimitry Andric // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6*0fca6ea1SDimitry Andric // 7*0fca6ea1SDimitry Andric //===----------------------------------------------------------------------===// 8*0fca6ea1SDimitry Andric // 9*0fca6ea1SDimitry Andric // A representation of a boolean formula in 3-CNF. 10*0fca6ea1SDimitry Andric // 11*0fca6ea1SDimitry Andric //===----------------------------------------------------------------------===// 12*0fca6ea1SDimitry Andric 13*0fca6ea1SDimitry Andric #include "clang/Analysis/FlowSensitive/CNFFormula.h" 14*0fca6ea1SDimitry Andric #include "llvm/ADT/DenseSet.h" 15*0fca6ea1SDimitry Andric 16*0fca6ea1SDimitry Andric #include <queue> 17*0fca6ea1SDimitry Andric 18*0fca6ea1SDimitry Andric namespace clang { 19*0fca6ea1SDimitry Andric namespace dataflow { 20*0fca6ea1SDimitry Andric 21*0fca6ea1SDimitry Andric namespace { 22*0fca6ea1SDimitry Andric 23*0fca6ea1SDimitry Andric /// Applies simplifications while building up a BooleanFormula. 24*0fca6ea1SDimitry Andric /// We keep track of unit clauses, which tell us variables that must be 25*0fca6ea1SDimitry Andric /// true/false in any model that satisfies the overall formula. 26*0fca6ea1SDimitry Andric /// Such variables can be dropped from subsequently-added clauses, which 27*0fca6ea1SDimitry Andric /// may in turn yield more unit clauses or even a contradiction. 28*0fca6ea1SDimitry Andric /// The total added complexity of this preprocessing is O(N) where we 29*0fca6ea1SDimitry Andric /// for every clause, we do a lookup for each unit clauses. 30*0fca6ea1SDimitry Andric /// The lookup is O(1) on average. This method won't catch all 31*0fca6ea1SDimitry Andric /// contradictory formulas, more passes can in principle catch 32*0fca6ea1SDimitry Andric /// more cases but we leave all these and the general case to the 33*0fca6ea1SDimitry Andric /// proper SAT solver. 34*0fca6ea1SDimitry Andric struct CNFFormulaBuilder { 35*0fca6ea1SDimitry Andric // Formula should outlive CNFFormulaBuilder. 36*0fca6ea1SDimitry Andric explicit CNFFormulaBuilder(CNFFormula &CNF) : Formula(CNF) {} 37*0fca6ea1SDimitry Andric 38*0fca6ea1SDimitry Andric /// Adds the `L1 v ... v Ln` clause to the formula. Applies 39*0fca6ea1SDimitry Andric /// simplifications, based on single-literal clauses. 40*0fca6ea1SDimitry Andric /// 41*0fca6ea1SDimitry Andric /// Requirements: 42*0fca6ea1SDimitry Andric /// 43*0fca6ea1SDimitry Andric /// `Li` must not be `NullLit`. 44*0fca6ea1SDimitry Andric /// 45*0fca6ea1SDimitry Andric /// All literals must be distinct. 46*0fca6ea1SDimitry Andric void addClause(ArrayRef<Literal> Literals) { 47*0fca6ea1SDimitry Andric // We generate clauses with up to 3 literals in this file. 48*0fca6ea1SDimitry Andric assert(!Literals.empty() && Literals.size() <= 3); 49*0fca6ea1SDimitry Andric // Contains literals of the simplified clause. 50*0fca6ea1SDimitry Andric llvm::SmallVector<Literal> Simplified; 51*0fca6ea1SDimitry Andric for (auto L : Literals) { 52*0fca6ea1SDimitry Andric assert(L != NullLit && 53*0fca6ea1SDimitry Andric llvm::all_of(Simplified, [L](Literal S) { return S != L; })); 54*0fca6ea1SDimitry Andric auto X = var(L); 55*0fca6ea1SDimitry Andric if (trueVars.contains(X)) { // X must be true 56*0fca6ea1SDimitry Andric if (isPosLit(L)) 57*0fca6ea1SDimitry Andric return; // Omit clause `(... v X v ...)`, it is `true`. 58*0fca6ea1SDimitry Andric else 59*0fca6ea1SDimitry Andric continue; // Omit `!X` from `(... v !X v ...)`. 60*0fca6ea1SDimitry Andric } 61*0fca6ea1SDimitry Andric if (falseVars.contains(X)) { // X must be false 62*0fca6ea1SDimitry Andric if (isNegLit(L)) 63*0fca6ea1SDimitry Andric return; // Omit clause `(... v !X v ...)`, it is `true`. 64*0fca6ea1SDimitry Andric else 65*0fca6ea1SDimitry Andric continue; // Omit `X` from `(... v X v ...)`. 66*0fca6ea1SDimitry Andric } 67*0fca6ea1SDimitry Andric Simplified.push_back(L); 68*0fca6ea1SDimitry Andric } 69*0fca6ea1SDimitry Andric if (Simplified.empty()) { 70*0fca6ea1SDimitry Andric // Simplification made the clause empty, which is equivalent to `false`. 71*0fca6ea1SDimitry Andric // We already know that this formula is unsatisfiable. 72*0fca6ea1SDimitry Andric Formula.addClause(Simplified); 73*0fca6ea1SDimitry Andric return; 74*0fca6ea1SDimitry Andric } 75*0fca6ea1SDimitry Andric if (Simplified.size() == 1) { 76*0fca6ea1SDimitry Andric // We have new unit clause. 77*0fca6ea1SDimitry Andric const Literal lit = Simplified.front(); 78*0fca6ea1SDimitry Andric const Variable v = var(lit); 79*0fca6ea1SDimitry Andric if (isPosLit(lit)) 80*0fca6ea1SDimitry Andric trueVars.insert(v); 81*0fca6ea1SDimitry Andric else 82*0fca6ea1SDimitry Andric falseVars.insert(v); 83*0fca6ea1SDimitry Andric } 84*0fca6ea1SDimitry Andric Formula.addClause(Simplified); 85*0fca6ea1SDimitry Andric } 86*0fca6ea1SDimitry Andric 87*0fca6ea1SDimitry Andric /// Returns true if we observed a contradiction while adding clauses. 88*0fca6ea1SDimitry Andric /// In this case then the formula is already known to be unsatisfiable. 89*0fca6ea1SDimitry Andric bool isKnownContradictory() { return Formula.knownContradictory(); } 90*0fca6ea1SDimitry Andric 91*0fca6ea1SDimitry Andric private: 92*0fca6ea1SDimitry Andric CNFFormula &Formula; 93*0fca6ea1SDimitry Andric llvm::DenseSet<Variable> trueVars; 94*0fca6ea1SDimitry Andric llvm::DenseSet<Variable> falseVars; 95*0fca6ea1SDimitry Andric }; 96*0fca6ea1SDimitry Andric 97*0fca6ea1SDimitry Andric } // namespace 98*0fca6ea1SDimitry Andric 99*0fca6ea1SDimitry Andric CNFFormula::CNFFormula(Variable LargestVar) 100*0fca6ea1SDimitry Andric : LargestVar(LargestVar), KnownContradictory(false) { 101*0fca6ea1SDimitry Andric Clauses.push_back(0); 102*0fca6ea1SDimitry Andric ClauseStarts.push_back(0); 103*0fca6ea1SDimitry Andric } 104*0fca6ea1SDimitry Andric 105*0fca6ea1SDimitry Andric void CNFFormula::addClause(ArrayRef<Literal> lits) { 106*0fca6ea1SDimitry Andric assert(llvm::all_of(lits, [](Literal L) { return L != NullLit; })); 107*0fca6ea1SDimitry Andric 108*0fca6ea1SDimitry Andric if (lits.empty()) 109*0fca6ea1SDimitry Andric KnownContradictory = true; 110*0fca6ea1SDimitry Andric 111*0fca6ea1SDimitry Andric const size_t S = Clauses.size(); 112*0fca6ea1SDimitry Andric ClauseStarts.push_back(S); 113*0fca6ea1SDimitry Andric Clauses.insert(Clauses.end(), lits.begin(), lits.end()); 114*0fca6ea1SDimitry Andric } 115*0fca6ea1SDimitry Andric 116*0fca6ea1SDimitry Andric CNFFormula buildCNF(const llvm::ArrayRef<const Formula *> &Formulas, 117*0fca6ea1SDimitry Andric llvm::DenseMap<Variable, Atom> &Atomics) { 118*0fca6ea1SDimitry Andric // The general strategy of the algorithm implemented below is to map each 119*0fca6ea1SDimitry Andric // of the sub-values in `Vals` to a unique variable and use these variables in 120*0fca6ea1SDimitry Andric // the resulting CNF expression to avoid exponential blow up. The number of 121*0fca6ea1SDimitry Andric // literals in the resulting formula is guaranteed to be linear in the number 122*0fca6ea1SDimitry Andric // of sub-formulas in `Vals`. 123*0fca6ea1SDimitry Andric 124*0fca6ea1SDimitry Andric // Map each sub-formula in `Vals` to a unique variable. 125*0fca6ea1SDimitry Andric llvm::DenseMap<const Formula *, Variable> FormulaToVar; 126*0fca6ea1SDimitry Andric // Store variable identifiers and Atom of atomic booleans. 127*0fca6ea1SDimitry Andric Variable NextVar = 1; 128*0fca6ea1SDimitry Andric { 129*0fca6ea1SDimitry Andric std::queue<const Formula *> UnprocessedFormulas; 130*0fca6ea1SDimitry Andric for (const Formula *F : Formulas) 131*0fca6ea1SDimitry Andric UnprocessedFormulas.push(F); 132*0fca6ea1SDimitry Andric while (!UnprocessedFormulas.empty()) { 133*0fca6ea1SDimitry Andric Variable Var = NextVar; 134*0fca6ea1SDimitry Andric const Formula *F = UnprocessedFormulas.front(); 135*0fca6ea1SDimitry Andric UnprocessedFormulas.pop(); 136*0fca6ea1SDimitry Andric 137*0fca6ea1SDimitry Andric if (!FormulaToVar.try_emplace(F, Var).second) 138*0fca6ea1SDimitry Andric continue; 139*0fca6ea1SDimitry Andric ++NextVar; 140*0fca6ea1SDimitry Andric 141*0fca6ea1SDimitry Andric for (const Formula *Op : F->operands()) 142*0fca6ea1SDimitry Andric UnprocessedFormulas.push(Op); 143*0fca6ea1SDimitry Andric if (F->kind() == Formula::AtomRef) 144*0fca6ea1SDimitry Andric Atomics[Var] = F->getAtom(); 145*0fca6ea1SDimitry Andric } 146*0fca6ea1SDimitry Andric } 147*0fca6ea1SDimitry Andric 148*0fca6ea1SDimitry Andric auto GetVar = [&FormulaToVar](const Formula *F) { 149*0fca6ea1SDimitry Andric auto ValIt = FormulaToVar.find(F); 150*0fca6ea1SDimitry Andric assert(ValIt != FormulaToVar.end()); 151*0fca6ea1SDimitry Andric return ValIt->second; 152*0fca6ea1SDimitry Andric }; 153*0fca6ea1SDimitry Andric 154*0fca6ea1SDimitry Andric CNFFormula CNF(NextVar - 1); 155*0fca6ea1SDimitry Andric std::vector<bool> ProcessedSubVals(NextVar, false); 156*0fca6ea1SDimitry Andric CNFFormulaBuilder builder(CNF); 157*0fca6ea1SDimitry Andric 158*0fca6ea1SDimitry Andric // Add a conjunct for each variable that represents a top-level conjunction 159*0fca6ea1SDimitry Andric // value in `Vals`. 160*0fca6ea1SDimitry Andric for (const Formula *F : Formulas) 161*0fca6ea1SDimitry Andric builder.addClause(posLit(GetVar(F))); 162*0fca6ea1SDimitry Andric 163*0fca6ea1SDimitry Andric // Add conjuncts that represent the mapping between newly-created variables 164*0fca6ea1SDimitry Andric // and their corresponding sub-formulas. 165*0fca6ea1SDimitry Andric std::queue<const Formula *> UnprocessedFormulas; 166*0fca6ea1SDimitry Andric for (const Formula *F : Formulas) 167*0fca6ea1SDimitry Andric UnprocessedFormulas.push(F); 168*0fca6ea1SDimitry Andric while (!UnprocessedFormulas.empty()) { 169*0fca6ea1SDimitry Andric const Formula *F = UnprocessedFormulas.front(); 170*0fca6ea1SDimitry Andric UnprocessedFormulas.pop(); 171*0fca6ea1SDimitry Andric const Variable Var = GetVar(F); 172*0fca6ea1SDimitry Andric 173*0fca6ea1SDimitry Andric if (ProcessedSubVals[Var]) 174*0fca6ea1SDimitry Andric continue; 175*0fca6ea1SDimitry Andric ProcessedSubVals[Var] = true; 176*0fca6ea1SDimitry Andric 177*0fca6ea1SDimitry Andric switch (F->kind()) { 178*0fca6ea1SDimitry Andric case Formula::AtomRef: 179*0fca6ea1SDimitry Andric break; 180*0fca6ea1SDimitry Andric case Formula::Literal: 181*0fca6ea1SDimitry Andric CNF.addClause(F->literal() ? posLit(Var) : negLit(Var)); 182*0fca6ea1SDimitry Andric break; 183*0fca6ea1SDimitry Andric case Formula::And: { 184*0fca6ea1SDimitry Andric const Variable LHS = GetVar(F->operands()[0]); 185*0fca6ea1SDimitry Andric const Variable RHS = GetVar(F->operands()[1]); 186*0fca6ea1SDimitry Andric 187*0fca6ea1SDimitry Andric if (LHS == RHS) { 188*0fca6ea1SDimitry Andric // `X <=> (A ^ A)` is equivalent to `(!X v A) ^ (X v !A)` which is 189*0fca6ea1SDimitry Andric // already in conjunctive normal form. Below we add each of the 190*0fca6ea1SDimitry Andric // conjuncts of the latter expression to the result. 191*0fca6ea1SDimitry Andric builder.addClause({negLit(Var), posLit(LHS)}); 192*0fca6ea1SDimitry Andric builder.addClause({posLit(Var), negLit(LHS)}); 193*0fca6ea1SDimitry Andric } else { 194*0fca6ea1SDimitry Andric // `X <=> (A ^ B)` is equivalent to `(!X v A) ^ (!X v B) ^ (X v !A v 195*0fca6ea1SDimitry Andric // !B)` which is already in conjunctive normal form. Below we add each 196*0fca6ea1SDimitry Andric // of the conjuncts of the latter expression to the result. 197*0fca6ea1SDimitry Andric builder.addClause({negLit(Var), posLit(LHS)}); 198*0fca6ea1SDimitry Andric builder.addClause({negLit(Var), posLit(RHS)}); 199*0fca6ea1SDimitry Andric builder.addClause({posLit(Var), negLit(LHS), negLit(RHS)}); 200*0fca6ea1SDimitry Andric } 201*0fca6ea1SDimitry Andric break; 202*0fca6ea1SDimitry Andric } 203*0fca6ea1SDimitry Andric case Formula::Or: { 204*0fca6ea1SDimitry Andric const Variable LHS = GetVar(F->operands()[0]); 205*0fca6ea1SDimitry Andric const Variable RHS = GetVar(F->operands()[1]); 206*0fca6ea1SDimitry Andric 207*0fca6ea1SDimitry Andric if (LHS == RHS) { 208*0fca6ea1SDimitry Andric // `X <=> (A v A)` is equivalent to `(!X v A) ^ (X v !A)` which is 209*0fca6ea1SDimitry Andric // already in conjunctive normal form. Below we add each of the 210*0fca6ea1SDimitry Andric // conjuncts of the latter expression to the result. 211*0fca6ea1SDimitry Andric builder.addClause({negLit(Var), posLit(LHS)}); 212*0fca6ea1SDimitry Andric builder.addClause({posLit(Var), negLit(LHS)}); 213*0fca6ea1SDimitry Andric } else { 214*0fca6ea1SDimitry Andric // `X <=> (A v B)` is equivalent to `(!X v A v B) ^ (X v !A) ^ (X v 215*0fca6ea1SDimitry Andric // !B)` which is already in conjunctive normal form. Below we add each 216*0fca6ea1SDimitry Andric // of the conjuncts of the latter expression to the result. 217*0fca6ea1SDimitry Andric builder.addClause({negLit(Var), posLit(LHS), posLit(RHS)}); 218*0fca6ea1SDimitry Andric builder.addClause({posLit(Var), negLit(LHS)}); 219*0fca6ea1SDimitry Andric builder.addClause({posLit(Var), negLit(RHS)}); 220*0fca6ea1SDimitry Andric } 221*0fca6ea1SDimitry Andric break; 222*0fca6ea1SDimitry Andric } 223*0fca6ea1SDimitry Andric case Formula::Not: { 224*0fca6ea1SDimitry Andric const Variable Operand = GetVar(F->operands()[0]); 225*0fca6ea1SDimitry Andric 226*0fca6ea1SDimitry Andric // `X <=> !Y` is equivalent to `(!X v !Y) ^ (X v Y)` which is 227*0fca6ea1SDimitry Andric // already in conjunctive normal form. Below we add each of the 228*0fca6ea1SDimitry Andric // conjuncts of the latter expression to the result. 229*0fca6ea1SDimitry Andric builder.addClause({negLit(Var), negLit(Operand)}); 230*0fca6ea1SDimitry Andric builder.addClause({posLit(Var), posLit(Operand)}); 231*0fca6ea1SDimitry Andric break; 232*0fca6ea1SDimitry Andric } 233*0fca6ea1SDimitry Andric case Formula::Implies: { 234*0fca6ea1SDimitry Andric const Variable LHS = GetVar(F->operands()[0]); 235*0fca6ea1SDimitry Andric const Variable RHS = GetVar(F->operands()[1]); 236*0fca6ea1SDimitry Andric 237*0fca6ea1SDimitry Andric // `X <=> (A => B)` is equivalent to 238*0fca6ea1SDimitry Andric // `(X v A) ^ (X v !B) ^ (!X v !A v B)` which is already in 239*0fca6ea1SDimitry Andric // conjunctive normal form. Below we add each of the conjuncts of 240*0fca6ea1SDimitry Andric // the latter expression to the result. 241*0fca6ea1SDimitry Andric builder.addClause({posLit(Var), posLit(LHS)}); 242*0fca6ea1SDimitry Andric builder.addClause({posLit(Var), negLit(RHS)}); 243*0fca6ea1SDimitry Andric builder.addClause({negLit(Var), negLit(LHS), posLit(RHS)}); 244*0fca6ea1SDimitry Andric break; 245*0fca6ea1SDimitry Andric } 246*0fca6ea1SDimitry Andric case Formula::Equal: { 247*0fca6ea1SDimitry Andric const Variable LHS = GetVar(F->operands()[0]); 248*0fca6ea1SDimitry Andric const Variable RHS = GetVar(F->operands()[1]); 249*0fca6ea1SDimitry Andric 250*0fca6ea1SDimitry Andric if (LHS == RHS) { 251*0fca6ea1SDimitry Andric // `X <=> (A <=> A)` is equivalent to `X` which is already in 252*0fca6ea1SDimitry Andric // conjunctive normal form. Below we add each of the conjuncts of the 253*0fca6ea1SDimitry Andric // latter expression to the result. 254*0fca6ea1SDimitry Andric builder.addClause(posLit(Var)); 255*0fca6ea1SDimitry Andric 256*0fca6ea1SDimitry Andric // No need to visit the sub-values of `Val`. 257*0fca6ea1SDimitry Andric continue; 258*0fca6ea1SDimitry Andric } 259*0fca6ea1SDimitry Andric // `X <=> (A <=> B)` is equivalent to 260*0fca6ea1SDimitry Andric // `(X v A v B) ^ (X v !A v !B) ^ (!X v A v !B) ^ (!X v !A v B)` which 261*0fca6ea1SDimitry Andric // is already in conjunctive normal form. Below we add each of the 262*0fca6ea1SDimitry Andric // conjuncts of the latter expression to the result. 263*0fca6ea1SDimitry Andric builder.addClause({posLit(Var), posLit(LHS), posLit(RHS)}); 264*0fca6ea1SDimitry Andric builder.addClause({posLit(Var), negLit(LHS), negLit(RHS)}); 265*0fca6ea1SDimitry Andric builder.addClause({negLit(Var), posLit(LHS), negLit(RHS)}); 266*0fca6ea1SDimitry Andric builder.addClause({negLit(Var), negLit(LHS), posLit(RHS)}); 267*0fca6ea1SDimitry Andric break; 268*0fca6ea1SDimitry Andric } 269*0fca6ea1SDimitry Andric } 270*0fca6ea1SDimitry Andric if (builder.isKnownContradictory()) { 271*0fca6ea1SDimitry Andric return CNF; 272*0fca6ea1SDimitry Andric } 273*0fca6ea1SDimitry Andric for (const Formula *Child : F->operands()) 274*0fca6ea1SDimitry Andric UnprocessedFormulas.push(Child); 275*0fca6ea1SDimitry Andric } 276*0fca6ea1SDimitry Andric 277*0fca6ea1SDimitry Andric // Unit clauses that were added later were not 278*0fca6ea1SDimitry Andric // considered for the simplification of earlier clauses. Do a final 279*0fca6ea1SDimitry Andric // pass to find more opportunities for simplification. 280*0fca6ea1SDimitry Andric CNFFormula FinalCNF(NextVar - 1); 281*0fca6ea1SDimitry Andric CNFFormulaBuilder FinalBuilder(FinalCNF); 282*0fca6ea1SDimitry Andric 283*0fca6ea1SDimitry Andric // Collect unit clauses. 284*0fca6ea1SDimitry Andric for (ClauseID C = 1; C <= CNF.numClauses(); ++C) { 285*0fca6ea1SDimitry Andric if (CNF.clauseSize(C) == 1) { 286*0fca6ea1SDimitry Andric FinalBuilder.addClause(CNF.clauseLiterals(C)[0]); 287*0fca6ea1SDimitry Andric } 288*0fca6ea1SDimitry Andric } 289*0fca6ea1SDimitry Andric 290*0fca6ea1SDimitry Andric // Add all clauses that were added previously, preserving the order. 291*0fca6ea1SDimitry Andric for (ClauseID C = 1; C <= CNF.numClauses(); ++C) { 292*0fca6ea1SDimitry Andric FinalBuilder.addClause(CNF.clauseLiterals(C)); 293*0fca6ea1SDimitry Andric if (FinalBuilder.isKnownContradictory()) { 294*0fca6ea1SDimitry Andric break; 295*0fca6ea1SDimitry Andric } 296*0fca6ea1SDimitry Andric } 297*0fca6ea1SDimitry Andric // It is possible there were new unit clauses again, but 298*0fca6ea1SDimitry Andric // we stop here and leave the rest to the solver algorithm. 299*0fca6ea1SDimitry Andric return FinalCNF; 300*0fca6ea1SDimitry Andric } 301*0fca6ea1SDimitry Andric 302*0fca6ea1SDimitry Andric } // namespace dataflow 303*0fca6ea1SDimitry Andric } // namespace clang 304