1 //===-- DataflowAnalysisContext.cpp -----------------------------*- C++ -*-===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 // 9 // This file defines a DataflowAnalysisContext class that owns objects that 10 // encompass the state of a program and stores context that is used during 11 // dataflow analysis. 12 // 13 //===----------------------------------------------------------------------===// 14 15 #include "clang/Analysis/FlowSensitive/DataflowAnalysisContext.h" 16 #include "clang/AST/ExprCXX.h" 17 #include "clang/Analysis/FlowSensitive/DebugSupport.h" 18 #include "clang/Analysis/FlowSensitive/Formula.h" 19 #include "clang/Analysis/FlowSensitive/Logger.h" 20 #include "clang/Analysis/FlowSensitive/SimplifyConstraints.h" 21 #include "clang/Analysis/FlowSensitive/Value.h" 22 #include "llvm/ADT/SetOperations.h" 23 #include "llvm/ADT/SetVector.h" 24 #include "llvm/Support/CommandLine.h" 25 #include "llvm/Support/Debug.h" 26 #include "llvm/Support/FileSystem.h" 27 #include "llvm/Support/Path.h" 28 #include "llvm/Support/raw_ostream.h" 29 #include <cassert> 30 #include <memory> 31 #include <string> 32 #include <utility> 33 #include <vector> 34 35 static llvm::cl::opt<std::string> DataflowLog( 36 "dataflow-log", llvm::cl::Hidden, llvm::cl::ValueOptional, 37 llvm::cl::desc("Emit log of dataflow analysis. With no arg, writes textual " 38 "log to stderr. With an arg, writes HTML logs under the " 39 "specified directory (one per analyzed function).")); 40 41 namespace clang { 42 namespace dataflow { 43 44 FieldSet DataflowAnalysisContext::getModeledFields(QualType Type) { 45 // During context-sensitive analysis, a struct may be allocated in one 46 // function, but its field accessed in a function lower in the stack than 47 // the allocation. Since we only collect fields used in the function where 48 // the allocation occurs, we can't apply that filter when performing 49 // context-sensitive analysis. But, this only applies to storage locations, 50 // since field access it not allowed to fail. In contrast, field *values* 51 // don't need this allowance, since the API allows for uninitialized fields. 52 if (Opts.ContextSensitiveOpts) 53 return getObjectFields(Type); 54 55 return llvm::set_intersection(getObjectFields(Type), ModeledFields); 56 } 57 58 void DataflowAnalysisContext::addModeledFields(const FieldSet &Fields) { 59 ModeledFields.set_union(Fields); 60 } 61 62 StorageLocation &DataflowAnalysisContext::createStorageLocation(QualType Type) { 63 if (!Type.isNull() && Type->isRecordType()) { 64 llvm::DenseMap<const ValueDecl *, StorageLocation *> FieldLocs; 65 for (const FieldDecl *Field : getModeledFields(Type)) 66 if (Field->getType()->isReferenceType()) 67 FieldLocs.insert({Field, nullptr}); 68 else 69 FieldLocs.insert({Field, &createStorageLocation( 70 Field->getType().getNonReferenceType())}); 71 return arena().create<RecordStorageLocation>(Type, std::move(FieldLocs)); 72 } 73 return arena().create<ScalarStorageLocation>(Type); 74 } 75 76 StorageLocation & 77 DataflowAnalysisContext::getStableStorageLocation(const ValueDecl &D) { 78 if (auto *Loc = DeclToLoc.lookup(&D)) 79 return *Loc; 80 auto &Loc = createStorageLocation(D.getType().getNonReferenceType()); 81 DeclToLoc[&D] = &Loc; 82 return Loc; 83 } 84 85 StorageLocation & 86 DataflowAnalysisContext::getStableStorageLocation(const Expr &E) { 87 const Expr &CanonE = ignoreCFGOmittedNodes(E); 88 89 if (auto *Loc = ExprToLoc.lookup(&CanonE)) 90 return *Loc; 91 auto &Loc = createStorageLocation(CanonE.getType()); 92 ExprToLoc[&CanonE] = &Loc; 93 return Loc; 94 } 95 96 PointerValue & 97 DataflowAnalysisContext::getOrCreateNullPointerValue(QualType PointeeType) { 98 auto CanonicalPointeeType = 99 PointeeType.isNull() ? PointeeType : PointeeType.getCanonicalType(); 100 auto Res = NullPointerVals.try_emplace(CanonicalPointeeType, nullptr); 101 if (Res.second) { 102 auto &PointeeLoc = createStorageLocation(CanonicalPointeeType); 103 Res.first->second = &arena().create<PointerValue>(PointeeLoc); 104 } 105 return *Res.first->second; 106 } 107 108 void DataflowAnalysisContext::addInvariant(const Formula &Constraint) { 109 if (Invariant == nullptr) 110 Invariant = &Constraint; 111 else 112 Invariant = &arena().makeAnd(*Invariant, Constraint); 113 } 114 115 void DataflowAnalysisContext::addFlowConditionConstraint( 116 Atom Token, const Formula &Constraint) { 117 auto Res = FlowConditionConstraints.try_emplace(Token, &Constraint); 118 if (!Res.second) { 119 Res.first->second = 120 &arena().makeAnd(*Res.first->second, Constraint); 121 } 122 } 123 124 Atom DataflowAnalysisContext::forkFlowCondition(Atom Token) { 125 Atom ForkToken = arena().makeFlowConditionToken(); 126 FlowConditionDeps[ForkToken].insert(Token); 127 addFlowConditionConstraint(ForkToken, arena().makeAtomRef(Token)); 128 return ForkToken; 129 } 130 131 Atom 132 DataflowAnalysisContext::joinFlowConditions(Atom FirstToken, 133 Atom SecondToken) { 134 Atom Token = arena().makeFlowConditionToken(); 135 FlowConditionDeps[Token].insert(FirstToken); 136 FlowConditionDeps[Token].insert(SecondToken); 137 addFlowConditionConstraint(Token, 138 arena().makeOr(arena().makeAtomRef(FirstToken), 139 arena().makeAtomRef(SecondToken))); 140 return Token; 141 } 142 143 Solver::Result DataflowAnalysisContext::querySolver( 144 llvm::SetVector<const Formula *> Constraints) { 145 return S->solve(Constraints.getArrayRef()); 146 } 147 148 bool DataflowAnalysisContext::flowConditionImplies(Atom Token, 149 const Formula &F) { 150 // Returns true if and only if truth assignment of the flow condition implies 151 // that `F` is also true. We prove whether or not this property holds by 152 // reducing the problem to satisfiability checking. In other words, we attempt 153 // to show that assuming `F` is false makes the constraints induced by the 154 // flow condition unsatisfiable. 155 llvm::SetVector<const Formula *> Constraints; 156 Constraints.insert(&arena().makeAtomRef(Token)); 157 Constraints.insert(&arena().makeNot(F)); 158 addTransitiveFlowConditionConstraints(Token, Constraints); 159 return isUnsatisfiable(std::move(Constraints)); 160 } 161 162 bool DataflowAnalysisContext::flowConditionAllows(Atom Token, 163 const Formula &F) { 164 llvm::SetVector<const Formula *> Constraints; 165 Constraints.insert(&arena().makeAtomRef(Token)); 166 Constraints.insert(&F); 167 addTransitiveFlowConditionConstraints(Token, Constraints); 168 return isSatisfiable(std::move(Constraints)); 169 } 170 171 bool DataflowAnalysisContext::equivalentFormulas(const Formula &Val1, 172 const Formula &Val2) { 173 llvm::SetVector<const Formula *> Constraints; 174 Constraints.insert(&arena().makeNot(arena().makeEquals(Val1, Val2))); 175 return isUnsatisfiable(std::move(Constraints)); 176 } 177 178 void DataflowAnalysisContext::addTransitiveFlowConditionConstraints( 179 Atom Token, llvm::SetVector<const Formula *> &Constraints) { 180 llvm::DenseSet<Atom> AddedTokens; 181 std::vector<Atom> Remaining = {Token}; 182 183 if (Invariant) 184 Constraints.insert(Invariant); 185 // Define all the flow conditions that might be referenced in constraints. 186 while (!Remaining.empty()) { 187 auto Token = Remaining.back(); 188 Remaining.pop_back(); 189 if (!AddedTokens.insert(Token).second) 190 continue; 191 192 auto ConstraintsIt = FlowConditionConstraints.find(Token); 193 if (ConstraintsIt == FlowConditionConstraints.end()) { 194 Constraints.insert(&arena().makeAtomRef(Token)); 195 } else { 196 // Bind flow condition token via `iff` to its set of constraints: 197 // FC <=> (C1 ^ C2 ^ ...), where Ci are constraints 198 Constraints.insert(&arena().makeEquals(arena().makeAtomRef(Token), 199 *ConstraintsIt->second)); 200 } 201 202 if (auto DepsIt = FlowConditionDeps.find(Token); 203 DepsIt != FlowConditionDeps.end()) 204 for (Atom A : DepsIt->second) 205 Remaining.push_back(A); 206 } 207 } 208 209 static void printAtomList(const llvm::SmallVector<Atom> &Atoms, 210 llvm::raw_ostream &OS) { 211 OS << "("; 212 for (size_t i = 0; i < Atoms.size(); ++i) { 213 OS << Atoms[i]; 214 if (i + 1 < Atoms.size()) 215 OS << ", "; 216 } 217 OS << ")\n"; 218 } 219 220 void DataflowAnalysisContext::dumpFlowCondition(Atom Token, 221 llvm::raw_ostream &OS) { 222 llvm::SetVector<const Formula *> Constraints; 223 Constraints.insert(&arena().makeAtomRef(Token)); 224 addTransitiveFlowConditionConstraints(Token, Constraints); 225 226 OS << "Flow condition token: " << Token << "\n"; 227 SimplifyConstraintsInfo Info; 228 llvm::SetVector<const Formula *> OriginalConstraints = Constraints; 229 simplifyConstraints(Constraints, arena(), &Info); 230 if (!Constraints.empty()) { 231 OS << "Constraints:\n"; 232 for (const auto *Constraint : Constraints) { 233 Constraint->print(OS); 234 OS << "\n"; 235 } 236 } 237 if (!Info.TrueAtoms.empty()) { 238 OS << "True atoms: "; 239 printAtomList(Info.TrueAtoms, OS); 240 } 241 if (!Info.FalseAtoms.empty()) { 242 OS << "False atoms: "; 243 printAtomList(Info.FalseAtoms, OS); 244 } 245 if (!Info.EquivalentAtoms.empty()) { 246 OS << "Equivalent atoms:\n"; 247 for (const llvm::SmallVector<Atom> &Class : Info.EquivalentAtoms) 248 printAtomList(Class, OS); 249 } 250 251 OS << "\nFlow condition constraints before simplification:\n"; 252 for (const auto *Constraint : OriginalConstraints) { 253 Constraint->print(OS); 254 OS << "\n"; 255 } 256 } 257 258 const ControlFlowContext * 259 DataflowAnalysisContext::getControlFlowContext(const FunctionDecl *F) { 260 // Canonicalize the key: 261 F = F->getDefinition(); 262 if (F == nullptr) 263 return nullptr; 264 auto It = FunctionContexts.find(F); 265 if (It != FunctionContexts.end()) 266 return &It->second; 267 268 if (F->hasBody()) { 269 auto CFCtx = ControlFlowContext::build(*F); 270 // FIXME: Handle errors. 271 assert(CFCtx); 272 auto Result = FunctionContexts.insert({F, std::move(*CFCtx)}); 273 return &Result.first->second; 274 } 275 276 return nullptr; 277 } 278 279 static std::unique_ptr<Logger> makeLoggerFromCommandLine() { 280 if (DataflowLog.empty()) 281 return Logger::textual(llvm::errs()); 282 283 llvm::StringRef Dir = DataflowLog; 284 if (auto EC = llvm::sys::fs::create_directories(Dir)) 285 llvm::errs() << "Failed to create log dir: " << EC.message() << "\n"; 286 // All analysis runs within a process will log to the same directory. 287 // Share a counter so they don't all overwrite each other's 0.html. 288 // (Don't share a logger, it's not threadsafe). 289 static std::atomic<unsigned> Counter = {0}; 290 auto StreamFactory = 291 [Dir(Dir.str())]() mutable -> std::unique_ptr<llvm::raw_ostream> { 292 llvm::SmallString<256> File(Dir); 293 llvm::sys::path::append(File, 294 std::to_string(Counter.fetch_add(1)) + ".html"); 295 std::error_code EC; 296 auto OS = std::make_unique<llvm::raw_fd_ostream>(File, EC); 297 if (EC) { 298 llvm::errs() << "Failed to create log " << File << ": " << EC.message() 299 << "\n"; 300 return std::make_unique<llvm::raw_null_ostream>(); 301 } 302 return OS; 303 }; 304 return Logger::html(std::move(StreamFactory)); 305 } 306 307 DataflowAnalysisContext::DataflowAnalysisContext(std::unique_ptr<Solver> S, 308 Options Opts) 309 : S(std::move(S)), A(std::make_unique<Arena>()), Opts(Opts) { 310 assert(this->S != nullptr); 311 // If the -dataflow-log command-line flag was set, synthesize a logger. 312 // This is ugly but provides a uniform method for ad-hoc debugging dataflow- 313 // based tools. 314 if (Opts.Log == nullptr) { 315 if (DataflowLog.getNumOccurrences() > 0) { 316 LogOwner = makeLoggerFromCommandLine(); 317 this->Opts.Log = LogOwner.get(); 318 // FIXME: if the flag is given a value, write an HTML log to a file. 319 } else { 320 this->Opts.Log = &Logger::null(); 321 } 322 } 323 } 324 325 DataflowAnalysisContext::~DataflowAnalysisContext() = default; 326 327 } // namespace dataflow 328 } // namespace clang 329 330 using namespace clang; 331 332 const Expr &clang::dataflow::ignoreCFGOmittedNodes(const Expr &E) { 333 const Expr *Current = &E; 334 if (auto *EWC = dyn_cast<ExprWithCleanups>(Current)) { 335 Current = EWC->getSubExpr(); 336 assert(Current != nullptr); 337 } 338 Current = Current->IgnoreParens(); 339 assert(Current != nullptr); 340 return *Current; 341 } 342 343 const Stmt &clang::dataflow::ignoreCFGOmittedNodes(const Stmt &S) { 344 if (auto *E = dyn_cast<Expr>(&S)) 345 return ignoreCFGOmittedNodes(*E); 346 return S; 347 } 348 349 // FIXME: Does not precisely handle non-virtual diamond inheritance. A single 350 // field decl will be modeled for all instances of the inherited field. 351 static void getFieldsFromClassHierarchy(QualType Type, 352 clang::dataflow::FieldSet &Fields) { 353 if (Type->isIncompleteType() || Type->isDependentType() || 354 !Type->isRecordType()) 355 return; 356 357 for (const FieldDecl *Field : Type->getAsRecordDecl()->fields()) 358 Fields.insert(Field); 359 if (auto *CXXRecord = Type->getAsCXXRecordDecl()) 360 for (const CXXBaseSpecifier &Base : CXXRecord->bases()) 361 getFieldsFromClassHierarchy(Base.getType(), Fields); 362 } 363 364 /// Gets the set of all fields in the type. 365 clang::dataflow::FieldSet clang::dataflow::getObjectFields(QualType Type) { 366 FieldSet Fields; 367 getFieldsFromClassHierarchy(Type, Fields); 368 return Fields; 369 } 370