1 //===-- DataflowAnalysisContext.cpp -----------------------------*- C++ -*-===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 // 9 // This file defines a DataflowAnalysisContext class that owns objects that 10 // encompass the state of a program and stores context that is used during 11 // dataflow analysis. 12 // 13 //===----------------------------------------------------------------------===// 14 15 #include "clang/Analysis/FlowSensitive/DataflowAnalysisContext.h" 16 #include "clang/AST/ExprCXX.h" 17 #include "clang/Analysis/FlowSensitive/DebugSupport.h" 18 #include "clang/Analysis/FlowSensitive/Formula.h" 19 #include "clang/Analysis/FlowSensitive/Logger.h" 20 #include "clang/Analysis/FlowSensitive/Value.h" 21 #include "llvm/ADT/SetOperations.h" 22 #include "llvm/ADT/SetVector.h" 23 #include "llvm/Support/CommandLine.h" 24 #include "llvm/Support/Debug.h" 25 #include "llvm/Support/FileSystem.h" 26 #include "llvm/Support/Path.h" 27 #include "llvm/Support/raw_ostream.h" 28 #include <cassert> 29 #include <memory> 30 #include <string> 31 #include <utility> 32 #include <vector> 33 34 static llvm::cl::opt<std::string> DataflowLog( 35 "dataflow-log", llvm::cl::Hidden, llvm::cl::ValueOptional, 36 llvm::cl::desc("Emit log of dataflow analysis. With no arg, writes textual " 37 "log to stderr. With an arg, writes HTML logs under the " 38 "specified directory (one per analyzed function).")); 39 40 namespace clang { 41 namespace dataflow { 42 43 FieldSet DataflowAnalysisContext::getModeledFields(QualType Type) { 44 // During context-sensitive analysis, a struct may be allocated in one 45 // function, but its field accessed in a function lower in the stack than 46 // the allocation. Since we only collect fields used in the function where 47 // the allocation occurs, we can't apply that filter when performing 48 // context-sensitive analysis. But, this only applies to storage locations, 49 // since field access it not allowed to fail. In contrast, field *values* 50 // don't need this allowance, since the API allows for uninitialized fields. 51 if (Opts.ContextSensitiveOpts) 52 return getObjectFields(Type); 53 54 return llvm::set_intersection(getObjectFields(Type), ModeledFields); 55 } 56 57 void DataflowAnalysisContext::addModeledFields(const FieldSet &Fields) { 58 ModeledFields.set_union(Fields); 59 } 60 61 StorageLocation &DataflowAnalysisContext::createStorageLocation(QualType Type) { 62 if (!Type.isNull() && Type->isRecordType()) { 63 llvm::DenseMap<const ValueDecl *, StorageLocation *> FieldLocs; 64 for (const FieldDecl *Field : getModeledFields(Type)) 65 if (Field->getType()->isReferenceType()) 66 FieldLocs.insert({Field, nullptr}); 67 else 68 FieldLocs.insert({Field, &createStorageLocation( 69 Field->getType().getNonReferenceType())}); 70 return arena().create<RecordStorageLocation>(Type, std::move(FieldLocs)); 71 } 72 return arena().create<ScalarStorageLocation>(Type); 73 } 74 75 StorageLocation & 76 DataflowAnalysisContext::getStableStorageLocation(const ValueDecl &D) { 77 if (auto *Loc = DeclToLoc.lookup(&D)) 78 return *Loc; 79 auto &Loc = createStorageLocation(D.getType().getNonReferenceType()); 80 DeclToLoc[&D] = &Loc; 81 return Loc; 82 } 83 84 StorageLocation & 85 DataflowAnalysisContext::getStableStorageLocation(const Expr &E) { 86 const Expr &CanonE = ignoreCFGOmittedNodes(E); 87 88 if (auto *Loc = ExprToLoc.lookup(&CanonE)) 89 return *Loc; 90 auto &Loc = createStorageLocation(CanonE.getType()); 91 ExprToLoc[&CanonE] = &Loc; 92 return Loc; 93 } 94 95 PointerValue & 96 DataflowAnalysisContext::getOrCreateNullPointerValue(QualType PointeeType) { 97 auto CanonicalPointeeType = 98 PointeeType.isNull() ? PointeeType : PointeeType.getCanonicalType(); 99 auto Res = NullPointerVals.try_emplace(CanonicalPointeeType, nullptr); 100 if (Res.second) { 101 auto &PointeeLoc = createStorageLocation(CanonicalPointeeType); 102 Res.first->second = &arena().create<PointerValue>(PointeeLoc); 103 } 104 return *Res.first->second; 105 } 106 107 void DataflowAnalysisContext::addInvariant(const Formula &Constraint) { 108 if (Invariant == nullptr) 109 Invariant = &Constraint; 110 else 111 Invariant = &arena().makeAnd(*Invariant, Constraint); 112 } 113 114 void DataflowAnalysisContext::addFlowConditionConstraint( 115 Atom Token, const Formula &Constraint) { 116 auto Res = FlowConditionConstraints.try_emplace(Token, &Constraint); 117 if (!Res.second) { 118 Res.first->second = 119 &arena().makeAnd(*Res.first->second, Constraint); 120 } 121 } 122 123 Atom DataflowAnalysisContext::forkFlowCondition(Atom Token) { 124 Atom ForkToken = arena().makeFlowConditionToken(); 125 FlowConditionDeps[ForkToken].insert(Token); 126 addFlowConditionConstraint(ForkToken, arena().makeAtomRef(Token)); 127 return ForkToken; 128 } 129 130 Atom 131 DataflowAnalysisContext::joinFlowConditions(Atom FirstToken, 132 Atom SecondToken) { 133 Atom Token = arena().makeFlowConditionToken(); 134 FlowConditionDeps[Token].insert(FirstToken); 135 FlowConditionDeps[Token].insert(SecondToken); 136 addFlowConditionConstraint(Token, 137 arena().makeOr(arena().makeAtomRef(FirstToken), 138 arena().makeAtomRef(SecondToken))); 139 return Token; 140 } 141 142 Solver::Result DataflowAnalysisContext::querySolver( 143 llvm::SetVector<const Formula *> Constraints) { 144 return S->solve(Constraints.getArrayRef()); 145 } 146 147 bool DataflowAnalysisContext::flowConditionImplies(Atom Token, 148 const Formula &Val) { 149 // Returns true if and only if truth assignment of the flow condition implies 150 // that `Val` is also true. We prove whether or not this property holds by 151 // reducing the problem to satisfiability checking. In other words, we attempt 152 // to show that assuming `Val` is false makes the constraints induced by the 153 // flow condition unsatisfiable. 154 llvm::SetVector<const Formula *> Constraints; 155 Constraints.insert(&arena().makeAtomRef(Token)); 156 Constraints.insert(&arena().makeNot(Val)); 157 addTransitiveFlowConditionConstraints(Token, Constraints); 158 return isUnsatisfiable(std::move(Constraints)); 159 } 160 161 bool DataflowAnalysisContext::equivalentFormulas(const Formula &Val1, 162 const Formula &Val2) { 163 llvm::SetVector<const Formula *> Constraints; 164 Constraints.insert(&arena().makeNot(arena().makeEquals(Val1, Val2))); 165 return isUnsatisfiable(std::move(Constraints)); 166 } 167 168 void DataflowAnalysisContext::addTransitiveFlowConditionConstraints( 169 Atom Token, llvm::SetVector<const Formula *> &Constraints) { 170 llvm::DenseSet<Atom> AddedTokens; 171 std::vector<Atom> Remaining = {Token}; 172 173 if (Invariant) 174 Constraints.insert(Invariant); 175 // Define all the flow conditions that might be referenced in constraints. 176 while (!Remaining.empty()) { 177 auto Token = Remaining.back(); 178 Remaining.pop_back(); 179 if (!AddedTokens.insert(Token).second) 180 continue; 181 182 auto ConstraintsIt = FlowConditionConstraints.find(Token); 183 if (ConstraintsIt == FlowConditionConstraints.end()) { 184 Constraints.insert(&arena().makeAtomRef(Token)); 185 } else { 186 // Bind flow condition token via `iff` to its set of constraints: 187 // FC <=> (C1 ^ C2 ^ ...), where Ci are constraints 188 Constraints.insert(&arena().makeEquals(arena().makeAtomRef(Token), 189 *ConstraintsIt->second)); 190 } 191 192 if (auto DepsIt = FlowConditionDeps.find(Token); 193 DepsIt != FlowConditionDeps.end()) 194 for (Atom A : DepsIt->second) 195 Remaining.push_back(A); 196 } 197 } 198 199 void DataflowAnalysisContext::dumpFlowCondition(Atom Token, 200 llvm::raw_ostream &OS) { 201 llvm::SetVector<const Formula *> Constraints; 202 Constraints.insert(&arena().makeAtomRef(Token)); 203 addTransitiveFlowConditionConstraints(Token, Constraints); 204 205 for (const auto *Constraint : Constraints) { 206 Constraint->print(OS); 207 OS << "\n"; 208 } 209 } 210 211 const ControlFlowContext * 212 DataflowAnalysisContext::getControlFlowContext(const FunctionDecl *F) { 213 // Canonicalize the key: 214 F = F->getDefinition(); 215 if (F == nullptr) 216 return nullptr; 217 auto It = FunctionContexts.find(F); 218 if (It != FunctionContexts.end()) 219 return &It->second; 220 221 if (F->hasBody()) { 222 auto CFCtx = ControlFlowContext::build(*F); 223 // FIXME: Handle errors. 224 assert(CFCtx); 225 auto Result = FunctionContexts.insert({F, std::move(*CFCtx)}); 226 return &Result.first->second; 227 } 228 229 return nullptr; 230 } 231 232 static std::unique_ptr<Logger> makeLoggerFromCommandLine() { 233 if (DataflowLog.empty()) 234 return Logger::textual(llvm::errs()); 235 236 llvm::StringRef Dir = DataflowLog; 237 if (auto EC = llvm::sys::fs::create_directories(Dir)) 238 llvm::errs() << "Failed to create log dir: " << EC.message() << "\n"; 239 // All analysis runs within a process will log to the same directory. 240 // Share a counter so they don't all overwrite each other's 0.html. 241 // (Don't share a logger, it's not threadsafe). 242 static std::atomic<unsigned> Counter = {0}; 243 auto StreamFactory = 244 [Dir(Dir.str())]() mutable -> std::unique_ptr<llvm::raw_ostream> { 245 llvm::SmallString<256> File(Dir); 246 llvm::sys::path::append(File, 247 std::to_string(Counter.fetch_add(1)) + ".html"); 248 std::error_code EC; 249 auto OS = std::make_unique<llvm::raw_fd_ostream>(File, EC); 250 if (EC) { 251 llvm::errs() << "Failed to create log " << File << ": " << EC.message() 252 << "\n"; 253 return std::make_unique<llvm::raw_null_ostream>(); 254 } 255 return OS; 256 }; 257 return Logger::html(std::move(StreamFactory)); 258 } 259 260 DataflowAnalysisContext::DataflowAnalysisContext(std::unique_ptr<Solver> S, 261 Options Opts) 262 : S(std::move(S)), A(std::make_unique<Arena>()), Opts(Opts) { 263 assert(this->S != nullptr); 264 // If the -dataflow-log command-line flag was set, synthesize a logger. 265 // This is ugly but provides a uniform method for ad-hoc debugging dataflow- 266 // based tools. 267 if (Opts.Log == nullptr) { 268 if (DataflowLog.getNumOccurrences() > 0) { 269 LogOwner = makeLoggerFromCommandLine(); 270 this->Opts.Log = LogOwner.get(); 271 // FIXME: if the flag is given a value, write an HTML log to a file. 272 } else { 273 this->Opts.Log = &Logger::null(); 274 } 275 } 276 } 277 278 DataflowAnalysisContext::~DataflowAnalysisContext() = default; 279 280 } // namespace dataflow 281 } // namespace clang 282 283 using namespace clang; 284 285 const Expr &clang::dataflow::ignoreCFGOmittedNodes(const Expr &E) { 286 const Expr *Current = &E; 287 if (auto *EWC = dyn_cast<ExprWithCleanups>(Current)) { 288 Current = EWC->getSubExpr(); 289 assert(Current != nullptr); 290 } 291 Current = Current->IgnoreParens(); 292 assert(Current != nullptr); 293 return *Current; 294 } 295 296 const Stmt &clang::dataflow::ignoreCFGOmittedNodes(const Stmt &S) { 297 if (auto *E = dyn_cast<Expr>(&S)) 298 return ignoreCFGOmittedNodes(*E); 299 return S; 300 } 301 302 // FIXME: Does not precisely handle non-virtual diamond inheritance. A single 303 // field decl will be modeled for all instances of the inherited field. 304 static void getFieldsFromClassHierarchy(QualType Type, 305 clang::dataflow::FieldSet &Fields) { 306 if (Type->isIncompleteType() || Type->isDependentType() || 307 !Type->isRecordType()) 308 return; 309 310 for (const FieldDecl *Field : Type->getAsRecordDecl()->fields()) 311 Fields.insert(Field); 312 if (auto *CXXRecord = Type->getAsCXXRecordDecl()) 313 for (const CXXBaseSpecifier &Base : CXXRecord->bases()) 314 getFieldsFromClassHierarchy(Base.getType(), Fields); 315 } 316 317 /// Gets the set of all fields in the type. 318 clang::dataflow::FieldSet clang::dataflow::getObjectFields(QualType Type) { 319 FieldSet Fields; 320 getFieldsFromClassHierarchy(Type, Fields); 321 return Fields; 322 } 323