1 //===-- DataflowAnalysisContext.cpp -----------------------------*- C++ -*-===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 // 9 // This file defines a DataflowAnalysisContext class that owns objects that 10 // encompass the state of a program and stores context that is used during 11 // dataflow analysis. 12 // 13 //===----------------------------------------------------------------------===// 14 15 #include "clang/Analysis/FlowSensitive/DataflowAnalysisContext.h" 16 #include "clang/AST/ExprCXX.h" 17 #include "clang/Analysis/FlowSensitive/DebugSupport.h" 18 #include "clang/Analysis/FlowSensitive/Formula.h" 19 #include "clang/Analysis/FlowSensitive/Logger.h" 20 #include "clang/Analysis/FlowSensitive/SimplifyConstraints.h" 21 #include "clang/Analysis/FlowSensitive/Value.h" 22 #include "llvm/ADT/SetOperations.h" 23 #include "llvm/ADT/SetVector.h" 24 #include "llvm/Support/CommandLine.h" 25 #include "llvm/Support/Debug.h" 26 #include "llvm/Support/FileSystem.h" 27 #include "llvm/Support/Path.h" 28 #include "llvm/Support/raw_ostream.h" 29 #include <cassert> 30 #include <memory> 31 #include <string> 32 #include <utility> 33 #include <vector> 34 35 static llvm::cl::opt<std::string> DataflowLog( 36 "dataflow-log", llvm::cl::Hidden, llvm::cl::ValueOptional, 37 llvm::cl::desc("Emit log of dataflow analysis. With no arg, writes textual " 38 "log to stderr. With an arg, writes HTML logs under the " 39 "specified directory (one per analyzed function).")); 40 41 namespace clang { 42 namespace dataflow { 43 44 FieldSet DataflowAnalysisContext::getModeledFields(QualType Type) { 45 // During context-sensitive analysis, a struct may be allocated in one 46 // function, but its field accessed in a function lower in the stack than 47 // the allocation. Since we only collect fields used in the function where 48 // the allocation occurs, we can't apply that filter when performing 49 // context-sensitive analysis. But, this only applies to storage locations, 50 // since field access it not allowed to fail. In contrast, field *values* 51 // don't need this allowance, since the API allows for uninitialized fields. 52 if (Opts.ContextSensitiveOpts) 53 return getObjectFields(Type); 54 55 return llvm::set_intersection(getObjectFields(Type), ModeledFields); 56 } 57 58 void DataflowAnalysisContext::addModeledFields(const FieldSet &Fields) { 59 ModeledFields.set_union(Fields); 60 } 61 62 StorageLocation &DataflowAnalysisContext::createStorageLocation(QualType Type) { 63 if (!Type.isNull() && Type->isRecordType()) { 64 llvm::DenseMap<const ValueDecl *, StorageLocation *> FieldLocs; 65 for (const FieldDecl *Field : getModeledFields(Type)) 66 if (Field->getType()->isReferenceType()) 67 FieldLocs.insert({Field, nullptr}); 68 else 69 FieldLocs.insert({Field, &createStorageLocation( 70 Field->getType().getNonReferenceType())}); 71 72 RecordStorageLocation::SyntheticFieldMap SyntheticFields; 73 for (const auto &Entry : getSyntheticFields(Type)) 74 SyntheticFields.insert( 75 {Entry.getKey(), 76 &createStorageLocation(Entry.getValue().getNonReferenceType())}); 77 78 return createRecordStorageLocation(Type, std::move(FieldLocs), 79 std::move(SyntheticFields)); 80 } 81 return arena().create<ScalarStorageLocation>(Type); 82 } 83 84 // Returns the keys for a given `StringMap`. 85 // Can't use `StringSet` as the return type as it doesn't support `operator==`. 86 template <typename T> 87 static llvm::DenseSet<llvm::StringRef> getKeys(const llvm::StringMap<T> &Map) { 88 return llvm::DenseSet<llvm::StringRef>(Map.keys().begin(), Map.keys().end()); 89 } 90 91 RecordStorageLocation &DataflowAnalysisContext::createRecordStorageLocation( 92 QualType Type, RecordStorageLocation::FieldToLoc FieldLocs, 93 RecordStorageLocation::SyntheticFieldMap SyntheticFields) { 94 assert(Type->isRecordType()); 95 assert(containsSameFields(getModeledFields(Type), FieldLocs)); 96 assert(getKeys(getSyntheticFields(Type)) == getKeys(SyntheticFields)); 97 98 RecordStorageLocationCreated = true; 99 return arena().create<RecordStorageLocation>(Type, std::move(FieldLocs), 100 std::move(SyntheticFields)); 101 } 102 103 StorageLocation & 104 DataflowAnalysisContext::getStableStorageLocation(const ValueDecl &D) { 105 if (auto *Loc = DeclToLoc.lookup(&D)) 106 return *Loc; 107 auto &Loc = createStorageLocation(D.getType().getNonReferenceType()); 108 DeclToLoc[&D] = &Loc; 109 return Loc; 110 } 111 112 StorageLocation & 113 DataflowAnalysisContext::getStableStorageLocation(const Expr &E) { 114 const Expr &CanonE = ignoreCFGOmittedNodes(E); 115 116 if (auto *Loc = ExprToLoc.lookup(&CanonE)) 117 return *Loc; 118 auto &Loc = createStorageLocation(CanonE.getType()); 119 ExprToLoc[&CanonE] = &Loc; 120 return Loc; 121 } 122 123 PointerValue & 124 DataflowAnalysisContext::getOrCreateNullPointerValue(QualType PointeeType) { 125 auto CanonicalPointeeType = 126 PointeeType.isNull() ? PointeeType : PointeeType.getCanonicalType(); 127 auto Res = NullPointerVals.try_emplace(CanonicalPointeeType, nullptr); 128 if (Res.second) { 129 auto &PointeeLoc = createStorageLocation(CanonicalPointeeType); 130 Res.first->second = &arena().create<PointerValue>(PointeeLoc); 131 } 132 return *Res.first->second; 133 } 134 135 void DataflowAnalysisContext::addInvariant(const Formula &Constraint) { 136 if (Invariant == nullptr) 137 Invariant = &Constraint; 138 else 139 Invariant = &arena().makeAnd(*Invariant, Constraint); 140 } 141 142 void DataflowAnalysisContext::addFlowConditionConstraint( 143 Atom Token, const Formula &Constraint) { 144 auto Res = FlowConditionConstraints.try_emplace(Token, &Constraint); 145 if (!Res.second) { 146 Res.first->second = 147 &arena().makeAnd(*Res.first->second, Constraint); 148 } 149 } 150 151 Atom DataflowAnalysisContext::forkFlowCondition(Atom Token) { 152 Atom ForkToken = arena().makeFlowConditionToken(); 153 FlowConditionDeps[ForkToken].insert(Token); 154 addFlowConditionConstraint(ForkToken, arena().makeAtomRef(Token)); 155 return ForkToken; 156 } 157 158 Atom 159 DataflowAnalysisContext::joinFlowConditions(Atom FirstToken, 160 Atom SecondToken) { 161 Atom Token = arena().makeFlowConditionToken(); 162 FlowConditionDeps[Token].insert(FirstToken); 163 FlowConditionDeps[Token].insert(SecondToken); 164 addFlowConditionConstraint(Token, 165 arena().makeOr(arena().makeAtomRef(FirstToken), 166 arena().makeAtomRef(SecondToken))); 167 return Token; 168 } 169 170 Solver::Result DataflowAnalysisContext::querySolver( 171 llvm::SetVector<const Formula *> Constraints) { 172 return S->solve(Constraints.getArrayRef()); 173 } 174 175 bool DataflowAnalysisContext::flowConditionImplies(Atom Token, 176 const Formula &F) { 177 // Returns true if and only if truth assignment of the flow condition implies 178 // that `F` is also true. We prove whether or not this property holds by 179 // reducing the problem to satisfiability checking. In other words, we attempt 180 // to show that assuming `F` is false makes the constraints induced by the 181 // flow condition unsatisfiable. 182 llvm::SetVector<const Formula *> Constraints; 183 Constraints.insert(&arena().makeAtomRef(Token)); 184 Constraints.insert(&arena().makeNot(F)); 185 addTransitiveFlowConditionConstraints(Token, Constraints); 186 return isUnsatisfiable(std::move(Constraints)); 187 } 188 189 bool DataflowAnalysisContext::flowConditionAllows(Atom Token, 190 const Formula &F) { 191 llvm::SetVector<const Formula *> Constraints; 192 Constraints.insert(&arena().makeAtomRef(Token)); 193 Constraints.insert(&F); 194 addTransitiveFlowConditionConstraints(Token, Constraints); 195 return isSatisfiable(std::move(Constraints)); 196 } 197 198 bool DataflowAnalysisContext::equivalentFormulas(const Formula &Val1, 199 const Formula &Val2) { 200 llvm::SetVector<const Formula *> Constraints; 201 Constraints.insert(&arena().makeNot(arena().makeEquals(Val1, Val2))); 202 return isUnsatisfiable(std::move(Constraints)); 203 } 204 205 void DataflowAnalysisContext::addTransitiveFlowConditionConstraints( 206 Atom Token, llvm::SetVector<const Formula *> &Constraints) { 207 llvm::DenseSet<Atom> AddedTokens; 208 std::vector<Atom> Remaining = {Token}; 209 210 if (Invariant) 211 Constraints.insert(Invariant); 212 // Define all the flow conditions that might be referenced in constraints. 213 while (!Remaining.empty()) { 214 auto Token = Remaining.back(); 215 Remaining.pop_back(); 216 if (!AddedTokens.insert(Token).second) 217 continue; 218 219 auto ConstraintsIt = FlowConditionConstraints.find(Token); 220 if (ConstraintsIt == FlowConditionConstraints.end()) { 221 Constraints.insert(&arena().makeAtomRef(Token)); 222 } else { 223 // Bind flow condition token via `iff` to its set of constraints: 224 // FC <=> (C1 ^ C2 ^ ...), where Ci are constraints 225 Constraints.insert(&arena().makeEquals(arena().makeAtomRef(Token), 226 *ConstraintsIt->second)); 227 } 228 229 if (auto DepsIt = FlowConditionDeps.find(Token); 230 DepsIt != FlowConditionDeps.end()) 231 for (Atom A : DepsIt->second) 232 Remaining.push_back(A); 233 } 234 } 235 236 static void printAtomList(const llvm::SmallVector<Atom> &Atoms, 237 llvm::raw_ostream &OS) { 238 OS << "("; 239 for (size_t i = 0; i < Atoms.size(); ++i) { 240 OS << Atoms[i]; 241 if (i + 1 < Atoms.size()) 242 OS << ", "; 243 } 244 OS << ")\n"; 245 } 246 247 void DataflowAnalysisContext::dumpFlowCondition(Atom Token, 248 llvm::raw_ostream &OS) { 249 llvm::SetVector<const Formula *> Constraints; 250 Constraints.insert(&arena().makeAtomRef(Token)); 251 addTransitiveFlowConditionConstraints(Token, Constraints); 252 253 OS << "Flow condition token: " << Token << "\n"; 254 SimplifyConstraintsInfo Info; 255 llvm::SetVector<const Formula *> OriginalConstraints = Constraints; 256 simplifyConstraints(Constraints, arena(), &Info); 257 if (!Constraints.empty()) { 258 OS << "Constraints:\n"; 259 for (const auto *Constraint : Constraints) { 260 Constraint->print(OS); 261 OS << "\n"; 262 } 263 } 264 if (!Info.TrueAtoms.empty()) { 265 OS << "True atoms: "; 266 printAtomList(Info.TrueAtoms, OS); 267 } 268 if (!Info.FalseAtoms.empty()) { 269 OS << "False atoms: "; 270 printAtomList(Info.FalseAtoms, OS); 271 } 272 if (!Info.EquivalentAtoms.empty()) { 273 OS << "Equivalent atoms:\n"; 274 for (const llvm::SmallVector<Atom> &Class : Info.EquivalentAtoms) 275 printAtomList(Class, OS); 276 } 277 278 OS << "\nFlow condition constraints before simplification:\n"; 279 for (const auto *Constraint : OriginalConstraints) { 280 Constraint->print(OS); 281 OS << "\n"; 282 } 283 } 284 285 const ControlFlowContext * 286 DataflowAnalysisContext::getControlFlowContext(const FunctionDecl *F) { 287 // Canonicalize the key: 288 F = F->getDefinition(); 289 if (F == nullptr) 290 return nullptr; 291 auto It = FunctionContexts.find(F); 292 if (It != FunctionContexts.end()) 293 return &It->second; 294 295 if (F->hasBody()) { 296 auto CFCtx = ControlFlowContext::build(*F); 297 // FIXME: Handle errors. 298 assert(CFCtx); 299 auto Result = FunctionContexts.insert({F, std::move(*CFCtx)}); 300 return &Result.first->second; 301 } 302 303 return nullptr; 304 } 305 306 static std::unique_ptr<Logger> makeLoggerFromCommandLine() { 307 if (DataflowLog.empty()) 308 return Logger::textual(llvm::errs()); 309 310 llvm::StringRef Dir = DataflowLog; 311 if (auto EC = llvm::sys::fs::create_directories(Dir)) 312 llvm::errs() << "Failed to create log dir: " << EC.message() << "\n"; 313 // All analysis runs within a process will log to the same directory. 314 // Share a counter so they don't all overwrite each other's 0.html. 315 // (Don't share a logger, it's not threadsafe). 316 static std::atomic<unsigned> Counter = {0}; 317 auto StreamFactory = 318 [Dir(Dir.str())]() mutable -> std::unique_ptr<llvm::raw_ostream> { 319 llvm::SmallString<256> File(Dir); 320 llvm::sys::path::append(File, 321 std::to_string(Counter.fetch_add(1)) + ".html"); 322 std::error_code EC; 323 auto OS = std::make_unique<llvm::raw_fd_ostream>(File, EC); 324 if (EC) { 325 llvm::errs() << "Failed to create log " << File << ": " << EC.message() 326 << "\n"; 327 return std::make_unique<llvm::raw_null_ostream>(); 328 } 329 return OS; 330 }; 331 return Logger::html(std::move(StreamFactory)); 332 } 333 334 DataflowAnalysisContext::DataflowAnalysisContext(std::unique_ptr<Solver> S, 335 Options Opts) 336 : S(std::move(S)), A(std::make_unique<Arena>()), Opts(Opts) { 337 assert(this->S != nullptr); 338 // If the -dataflow-log command-line flag was set, synthesize a logger. 339 // This is ugly but provides a uniform method for ad-hoc debugging dataflow- 340 // based tools. 341 if (Opts.Log == nullptr) { 342 if (DataflowLog.getNumOccurrences() > 0) { 343 LogOwner = makeLoggerFromCommandLine(); 344 this->Opts.Log = LogOwner.get(); 345 // FIXME: if the flag is given a value, write an HTML log to a file. 346 } else { 347 this->Opts.Log = &Logger::null(); 348 } 349 } 350 } 351 352 DataflowAnalysisContext::~DataflowAnalysisContext() = default; 353 354 } // namespace dataflow 355 } // namespace clang 356 357 using namespace clang; 358 359 const Expr &clang::dataflow::ignoreCFGOmittedNodes(const Expr &E) { 360 const Expr *Current = &E; 361 if (auto *EWC = dyn_cast<ExprWithCleanups>(Current)) { 362 Current = EWC->getSubExpr(); 363 assert(Current != nullptr); 364 } 365 Current = Current->IgnoreParens(); 366 assert(Current != nullptr); 367 return *Current; 368 } 369 370 const Stmt &clang::dataflow::ignoreCFGOmittedNodes(const Stmt &S) { 371 if (auto *E = dyn_cast<Expr>(&S)) 372 return ignoreCFGOmittedNodes(*E); 373 return S; 374 } 375 376 // FIXME: Does not precisely handle non-virtual diamond inheritance. A single 377 // field decl will be modeled for all instances of the inherited field. 378 static void getFieldsFromClassHierarchy(QualType Type, 379 clang::dataflow::FieldSet &Fields) { 380 if (Type->isIncompleteType() || Type->isDependentType() || 381 !Type->isRecordType()) 382 return; 383 384 for (const FieldDecl *Field : Type->getAsRecordDecl()->fields()) 385 Fields.insert(Field); 386 if (auto *CXXRecord = Type->getAsCXXRecordDecl()) 387 for (const CXXBaseSpecifier &Base : CXXRecord->bases()) 388 getFieldsFromClassHierarchy(Base.getType(), Fields); 389 } 390 391 /// Gets the set of all fields in the type. 392 clang::dataflow::FieldSet clang::dataflow::getObjectFields(QualType Type) { 393 FieldSet Fields; 394 getFieldsFromClassHierarchy(Type, Fields); 395 return Fields; 396 } 397 398 bool clang::dataflow::containsSameFields( 399 const clang::dataflow::FieldSet &Fields, 400 const clang::dataflow::RecordStorageLocation::FieldToLoc &FieldLocs) { 401 if (Fields.size() != FieldLocs.size()) 402 return false; 403 for ([[maybe_unused]] auto [Field, Loc] : FieldLocs) 404 if (!Fields.contains(cast_or_null<FieldDecl>(Field))) 405 return false; 406 return true; 407 } 408