1 //===-- DataflowAnalysisContext.cpp -----------------------------*- C++ -*-===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 // 9 // This file defines a DataflowAnalysisContext class that owns objects that 10 // encompass the state of a program and stores context that is used during 11 // dataflow analysis. 12 // 13 //===----------------------------------------------------------------------===// 14 15 #include "clang/Analysis/FlowSensitive/DataflowAnalysisContext.h" 16 #include "clang/AST/ExprCXX.h" 17 #include "clang/Analysis/FlowSensitive/DebugSupport.h" 18 #include "clang/Analysis/FlowSensitive/Formula.h" 19 #include "clang/Analysis/FlowSensitive/Logger.h" 20 #include "clang/Analysis/FlowSensitive/Value.h" 21 #include "llvm/ADT/SetOperations.h" 22 #include "llvm/ADT/SetVector.h" 23 #include "llvm/Support/CommandLine.h" 24 #include "llvm/Support/Debug.h" 25 #include "llvm/Support/FileSystem.h" 26 #include "llvm/Support/Path.h" 27 #include "llvm/Support/raw_ostream.h" 28 #include <cassert> 29 #include <memory> 30 #include <string> 31 #include <utility> 32 #include <vector> 33 34 static llvm::cl::opt<std::string> DataflowLog( 35 "dataflow-log", llvm::cl::Hidden, llvm::cl::ValueOptional, 36 llvm::cl::desc("Emit log of dataflow analysis. With no arg, writes textual " 37 "log to stderr. With an arg, writes HTML logs under the " 38 "specified directory (one per analyzed function).")); 39 40 namespace clang { 41 namespace dataflow { 42 43 FieldSet DataflowAnalysisContext::getModeledFields(QualType Type) { 44 // During context-sensitive analysis, a struct may be allocated in one 45 // function, but its field accessed in a function lower in the stack than 46 // the allocation. Since we only collect fields used in the function where 47 // the allocation occurs, we can't apply that filter when performing 48 // context-sensitive analysis. But, this only applies to storage locations, 49 // since field access it not allowed to fail. In contrast, field *values* 50 // don't need this allowance, since the API allows for uninitialized fields. 51 if (Opts.ContextSensitiveOpts) 52 return getObjectFields(Type); 53 54 return llvm::set_intersection(getObjectFields(Type), ModeledFields); 55 } 56 57 void DataflowAnalysisContext::addModeledFields(const FieldSet &Fields) { 58 ModeledFields.set_union(Fields); 59 } 60 61 StorageLocation &DataflowAnalysisContext::createStorageLocation(QualType Type) { 62 if (!Type.isNull() && Type->isRecordType()) { 63 llvm::DenseMap<const ValueDecl *, StorageLocation *> FieldLocs; 64 for (const FieldDecl *Field : getModeledFields(Type)) 65 if (Field->getType()->isReferenceType()) 66 FieldLocs.insert({Field, nullptr}); 67 else 68 FieldLocs.insert({Field, &createStorageLocation( 69 Field->getType().getNonReferenceType())}); 70 return arena().create<RecordStorageLocation>(Type, std::move(FieldLocs)); 71 } 72 return arena().create<ScalarStorageLocation>(Type); 73 } 74 75 StorageLocation & 76 DataflowAnalysisContext::getStableStorageLocation(const VarDecl &D) { 77 if (auto *Loc = DeclToLoc.lookup(&D)) 78 return *Loc; 79 auto &Loc = createStorageLocation(D.getType().getNonReferenceType()); 80 DeclToLoc[&D] = &Loc; 81 return Loc; 82 } 83 84 StorageLocation & 85 DataflowAnalysisContext::getStableStorageLocation(const Expr &E) { 86 const Expr &CanonE = ignoreCFGOmittedNodes(E); 87 88 if (auto *Loc = ExprToLoc.lookup(&CanonE)) 89 return *Loc; 90 auto &Loc = createStorageLocation(CanonE.getType()); 91 ExprToLoc[&CanonE] = &Loc; 92 return Loc; 93 } 94 95 PointerValue & 96 DataflowAnalysisContext::getOrCreateNullPointerValue(QualType PointeeType) { 97 auto CanonicalPointeeType = 98 PointeeType.isNull() ? PointeeType : PointeeType.getCanonicalType(); 99 auto Res = NullPointerVals.try_emplace(CanonicalPointeeType, nullptr); 100 if (Res.second) { 101 auto &PointeeLoc = createStorageLocation(CanonicalPointeeType); 102 Res.first->second = &arena().create<PointerValue>(PointeeLoc); 103 } 104 return *Res.first->second; 105 } 106 107 void DataflowAnalysisContext::addInvariant(const Formula &Constraint) { 108 if (Invariant == nullptr) 109 Invariant = &Constraint; 110 else 111 Invariant = &arena().makeAnd(*Invariant, Constraint); 112 } 113 114 void DataflowAnalysisContext::addFlowConditionConstraint( 115 Atom Token, const Formula &Constraint) { 116 auto Res = FlowConditionConstraints.try_emplace(Token, &Constraint); 117 if (!Res.second) { 118 Res.first->second = 119 &arena().makeAnd(*Res.first->second, Constraint); 120 } 121 } 122 123 Atom DataflowAnalysisContext::forkFlowCondition(Atom Token) { 124 Atom ForkToken = arena().makeFlowConditionToken(); 125 FlowConditionDeps[ForkToken].insert(Token); 126 addFlowConditionConstraint(ForkToken, arena().makeAtomRef(Token)); 127 return ForkToken; 128 } 129 130 Atom 131 DataflowAnalysisContext::joinFlowConditions(Atom FirstToken, 132 Atom SecondToken) { 133 Atom Token = arena().makeFlowConditionToken(); 134 FlowConditionDeps[Token].insert(FirstToken); 135 FlowConditionDeps[Token].insert(SecondToken); 136 addFlowConditionConstraint(Token, 137 arena().makeOr(arena().makeAtomRef(FirstToken), 138 arena().makeAtomRef(SecondToken))); 139 return Token; 140 } 141 142 Solver::Result DataflowAnalysisContext::querySolver( 143 llvm::SetVector<const Formula *> Constraints) { 144 Constraints.insert(&arena().makeLiteral(true)); 145 Constraints.insert(&arena().makeNot(arena().makeLiteral(false))); 146 return S->solve(Constraints.getArrayRef()); 147 } 148 149 bool DataflowAnalysisContext::flowConditionImplies(Atom Token, 150 const Formula &Val) { 151 // Returns true if and only if truth assignment of the flow condition implies 152 // that `Val` is also true. We prove whether or not this property holds by 153 // reducing the problem to satisfiability checking. In other words, we attempt 154 // to show that assuming `Val` is false makes the constraints induced by the 155 // flow condition unsatisfiable. 156 llvm::SetVector<const Formula *> Constraints; 157 Constraints.insert(&arena().makeAtomRef(Token)); 158 Constraints.insert(&arena().makeNot(Val)); 159 addTransitiveFlowConditionConstraints(Token, Constraints); 160 return isUnsatisfiable(std::move(Constraints)); 161 } 162 163 bool DataflowAnalysisContext::flowConditionIsTautology(Atom Token) { 164 // Returns true if and only if we cannot prove that the flow condition can 165 // ever be false. 166 llvm::SetVector<const Formula *> Constraints; 167 Constraints.insert(&arena().makeNot(arena().makeAtomRef(Token))); 168 addTransitiveFlowConditionConstraints(Token, Constraints); 169 return isUnsatisfiable(std::move(Constraints)); 170 } 171 172 bool DataflowAnalysisContext::equivalentFormulas(const Formula &Val1, 173 const Formula &Val2) { 174 llvm::SetVector<const Formula *> Constraints; 175 Constraints.insert(&arena().makeNot(arena().makeEquals(Val1, Val2))); 176 return isUnsatisfiable(std::move(Constraints)); 177 } 178 179 void DataflowAnalysisContext::addTransitiveFlowConditionConstraints( 180 Atom Token, llvm::SetVector<const Formula *> &Constraints) { 181 llvm::DenseSet<Atom> AddedTokens; 182 std::vector<Atom> Remaining = {Token}; 183 184 if (Invariant) 185 Constraints.insert(Invariant); 186 // Define all the flow conditions that might be referenced in constraints. 187 while (!Remaining.empty()) { 188 auto Token = Remaining.back(); 189 Remaining.pop_back(); 190 if (!AddedTokens.insert(Token).second) 191 continue; 192 193 auto ConstraintsIt = FlowConditionConstraints.find(Token); 194 if (ConstraintsIt == FlowConditionConstraints.end()) { 195 Constraints.insert(&arena().makeAtomRef(Token)); 196 } else { 197 // Bind flow condition token via `iff` to its set of constraints: 198 // FC <=> (C1 ^ C2 ^ ...), where Ci are constraints 199 Constraints.insert(&arena().makeEquals(arena().makeAtomRef(Token), 200 *ConstraintsIt->second)); 201 } 202 203 if (auto DepsIt = FlowConditionDeps.find(Token); 204 DepsIt != FlowConditionDeps.end()) 205 for (Atom A : DepsIt->second) 206 Remaining.push_back(A); 207 } 208 } 209 210 void DataflowAnalysisContext::dumpFlowCondition(Atom Token, 211 llvm::raw_ostream &OS) { 212 llvm::SetVector<const Formula *> Constraints; 213 Constraints.insert(&arena().makeAtomRef(Token)); 214 addTransitiveFlowConditionConstraints(Token, Constraints); 215 216 // TODO: have formulas know about true/false directly instead 217 Atom True = arena().makeLiteral(true).getAtom(); 218 Atom False = arena().makeLiteral(false).getAtom(); 219 Formula::AtomNames Names = {{False, "false"}, {True, "true"}}; 220 221 for (const auto *Constraint : Constraints) { 222 Constraint->print(OS, &Names); 223 OS << "\n"; 224 } 225 } 226 227 const ControlFlowContext * 228 DataflowAnalysisContext::getControlFlowContext(const FunctionDecl *F) { 229 // Canonicalize the key: 230 F = F->getDefinition(); 231 if (F == nullptr) 232 return nullptr; 233 auto It = FunctionContexts.find(F); 234 if (It != FunctionContexts.end()) 235 return &It->second; 236 237 if (F->hasBody()) { 238 auto CFCtx = ControlFlowContext::build(*F); 239 // FIXME: Handle errors. 240 assert(CFCtx); 241 auto Result = FunctionContexts.insert({F, std::move(*CFCtx)}); 242 return &Result.first->second; 243 } 244 245 return nullptr; 246 } 247 248 static std::unique_ptr<Logger> makeLoggerFromCommandLine() { 249 if (DataflowLog.empty()) 250 return Logger::textual(llvm::errs()); 251 252 llvm::StringRef Dir = DataflowLog; 253 if (auto EC = llvm::sys::fs::create_directories(Dir)) 254 llvm::errs() << "Failed to create log dir: " << EC.message() << "\n"; 255 // All analysis runs within a process will log to the same directory. 256 // Share a counter so they don't all overwrite each other's 0.html. 257 // (Don't share a logger, it's not threadsafe). 258 static std::atomic<unsigned> Counter = {0}; 259 auto StreamFactory = 260 [Dir(Dir.str())]() mutable -> std::unique_ptr<llvm::raw_ostream> { 261 llvm::SmallString<256> File(Dir); 262 llvm::sys::path::append(File, 263 std::to_string(Counter.fetch_add(1)) + ".html"); 264 std::error_code EC; 265 auto OS = std::make_unique<llvm::raw_fd_ostream>(File, EC); 266 if (EC) { 267 llvm::errs() << "Failed to create log " << File << ": " << EC.message() 268 << "\n"; 269 return std::make_unique<llvm::raw_null_ostream>(); 270 } 271 return OS; 272 }; 273 return Logger::html(std::move(StreamFactory)); 274 } 275 276 DataflowAnalysisContext::DataflowAnalysisContext(std::unique_ptr<Solver> S, 277 Options Opts) 278 : S(std::move(S)), A(std::make_unique<Arena>()), Opts(Opts) { 279 assert(this->S != nullptr); 280 // If the -dataflow-log command-line flag was set, synthesize a logger. 281 // This is ugly but provides a uniform method for ad-hoc debugging dataflow- 282 // based tools. 283 if (Opts.Log == nullptr) { 284 if (DataflowLog.getNumOccurrences() > 0) { 285 LogOwner = makeLoggerFromCommandLine(); 286 this->Opts.Log = LogOwner.get(); 287 // FIXME: if the flag is given a value, write an HTML log to a file. 288 } else { 289 this->Opts.Log = &Logger::null(); 290 } 291 } 292 } 293 294 DataflowAnalysisContext::~DataflowAnalysisContext() = default; 295 296 } // namespace dataflow 297 } // namespace clang 298 299 using namespace clang; 300 301 const Expr &clang::dataflow::ignoreCFGOmittedNodes(const Expr &E) { 302 const Expr *Current = &E; 303 if (auto *EWC = dyn_cast<ExprWithCleanups>(Current)) { 304 Current = EWC->getSubExpr(); 305 assert(Current != nullptr); 306 } 307 Current = Current->IgnoreParens(); 308 assert(Current != nullptr); 309 return *Current; 310 } 311 312 const Stmt &clang::dataflow::ignoreCFGOmittedNodes(const Stmt &S) { 313 if (auto *E = dyn_cast<Expr>(&S)) 314 return ignoreCFGOmittedNodes(*E); 315 return S; 316 } 317 318 // FIXME: Does not precisely handle non-virtual diamond inheritance. A single 319 // field decl will be modeled for all instances of the inherited field. 320 static void getFieldsFromClassHierarchy(QualType Type, 321 clang::dataflow::FieldSet &Fields) { 322 if (Type->isIncompleteType() || Type->isDependentType() || 323 !Type->isRecordType()) 324 return; 325 326 for (const FieldDecl *Field : Type->getAsRecordDecl()->fields()) 327 Fields.insert(Field); 328 if (auto *CXXRecord = Type->getAsCXXRecordDecl()) 329 for (const CXXBaseSpecifier &Base : CXXRecord->bases()) 330 getFieldsFromClassHierarchy(Base.getType(), Fields); 331 } 332 333 /// Gets the set of all fields in the type. 334 clang::dataflow::FieldSet clang::dataflow::getObjectFields(QualType Type) { 335 FieldSet Fields; 336 getFieldsFromClassHierarchy(Type, Fields); 337 return Fields; 338 } 339