1 //===-- DataflowAnalysisContext.cpp -----------------------------*- C++ -*-===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 // 9 // This file defines a DataflowAnalysisContext class that owns objects that 10 // encompass the state of a program and stores context that is used during 11 // dataflow analysis. 12 // 13 //===----------------------------------------------------------------------===// 14 15 #include "clang/Analysis/FlowSensitive/DataflowAnalysisContext.h" 16 #include "clang/AST/ExprCXX.h" 17 #include "clang/Analysis/FlowSensitive/DebugSupport.h" 18 #include "clang/Analysis/FlowSensitive/Formula.h" 19 #include "clang/Analysis/FlowSensitive/Logger.h" 20 #include "clang/Analysis/FlowSensitive/Value.h" 21 #include "llvm/ADT/SetOperations.h" 22 #include "llvm/ADT/SetVector.h" 23 #include "llvm/Support/CommandLine.h" 24 #include "llvm/Support/Debug.h" 25 #include "llvm/Support/FileSystem.h" 26 #include "llvm/Support/Path.h" 27 #include "llvm/Support/raw_ostream.h" 28 #include <cassert> 29 #include <memory> 30 #include <string> 31 #include <utility> 32 #include <vector> 33 34 static llvm::cl::opt<std::string> DataflowLog( 35 "dataflow-log", llvm::cl::Hidden, llvm::cl::ValueOptional, 36 llvm::cl::desc("Emit log of dataflow analysis. With no arg, writes textual " 37 "log to stderr. With an arg, writes HTML logs under the " 38 "specified directory (one per analyzed function).")); 39 40 namespace clang { 41 namespace dataflow { 42 43 void DataflowAnalysisContext::addModeledFields( 44 const llvm::DenseSet<const FieldDecl *> &Fields) { 45 llvm::set_union(ModeledFields, Fields); 46 } 47 48 llvm::DenseSet<const FieldDecl *> 49 DataflowAnalysisContext::getReferencedFields(QualType Type) { 50 llvm::DenseSet<const FieldDecl *> Fields = getObjectFields(Type); 51 llvm::set_intersect(Fields, ModeledFields); 52 return Fields; 53 } 54 55 StorageLocation &DataflowAnalysisContext::createStorageLocation(QualType Type) { 56 if (!Type.isNull() && Type->isRecordType()) { 57 llvm::DenseMap<const ValueDecl *, StorageLocation *> FieldLocs; 58 // During context-sensitive analysis, a struct may be allocated in one 59 // function, but its field accessed in a function lower in the stack than 60 // the allocation. Since we only collect fields used in the function where 61 // the allocation occurs, we can't apply that filter when performing 62 // context-sensitive analysis. But, this only applies to storage locations, 63 // since field access it not allowed to fail. In contrast, field *values* 64 // don't need this allowance, since the API allows for uninitialized fields. 65 auto Fields = Opts.ContextSensitiveOpts ? getObjectFields(Type) 66 : getReferencedFields(Type); 67 for (const FieldDecl *Field : Fields) 68 FieldLocs.insert({Field, &createStorageLocation(Field->getType())}); 69 return arena().create<AggregateStorageLocation>(Type, std::move(FieldLocs)); 70 } 71 return arena().create<ScalarStorageLocation>(Type); 72 } 73 74 StorageLocation & 75 DataflowAnalysisContext::getStableStorageLocation(const VarDecl &D) { 76 if (auto *Loc = getStorageLocation(D)) 77 return *Loc; 78 auto &Loc = createStorageLocation(D.getType()); 79 setStorageLocation(D, Loc); 80 return Loc; 81 } 82 83 StorageLocation & 84 DataflowAnalysisContext::getStableStorageLocation(const Expr &E) { 85 if (auto *Loc = getStorageLocation(E)) 86 return *Loc; 87 auto &Loc = createStorageLocation(E.getType()); 88 setStorageLocation(E, Loc); 89 return Loc; 90 } 91 92 PointerValue & 93 DataflowAnalysisContext::getOrCreateNullPointerValue(QualType PointeeType) { 94 auto CanonicalPointeeType = 95 PointeeType.isNull() ? PointeeType : PointeeType.getCanonicalType(); 96 auto Res = NullPointerVals.try_emplace(CanonicalPointeeType, nullptr); 97 if (Res.second) { 98 auto &PointeeLoc = createStorageLocation(CanonicalPointeeType); 99 Res.first->second = &arena().create<PointerValue>(PointeeLoc); 100 } 101 return *Res.first->second; 102 } 103 104 void DataflowAnalysisContext::addFlowConditionConstraint( 105 Atom Token, const Formula &Constraint) { 106 auto Res = FlowConditionConstraints.try_emplace(Token, &Constraint); 107 if (!Res.second) { 108 Res.first->second = 109 &arena().makeAnd(*Res.first->second, Constraint); 110 } 111 } 112 113 Atom DataflowAnalysisContext::forkFlowCondition(Atom Token) { 114 Atom ForkToken = arena().makeFlowConditionToken(); 115 FlowConditionDeps[ForkToken].insert(Token); 116 addFlowConditionConstraint(ForkToken, arena().makeAtomRef(Token)); 117 return ForkToken; 118 } 119 120 Atom 121 DataflowAnalysisContext::joinFlowConditions(Atom FirstToken, 122 Atom SecondToken) { 123 Atom Token = arena().makeFlowConditionToken(); 124 FlowConditionDeps[Token].insert(FirstToken); 125 FlowConditionDeps[Token].insert(SecondToken); 126 addFlowConditionConstraint(Token, 127 arena().makeOr(arena().makeAtomRef(FirstToken), 128 arena().makeAtomRef(SecondToken))); 129 return Token; 130 } 131 132 Solver::Result DataflowAnalysisContext::querySolver( 133 llvm::SetVector<const Formula *> Constraints) { 134 Constraints.insert(&arena().makeLiteral(true)); 135 Constraints.insert(&arena().makeNot(arena().makeLiteral(false))); 136 return S->solve(Constraints.getArrayRef()); 137 } 138 139 bool DataflowAnalysisContext::flowConditionImplies(Atom Token, 140 const Formula &Val) { 141 // Returns true if and only if truth assignment of the flow condition implies 142 // that `Val` is also true. We prove whether or not this property holds by 143 // reducing the problem to satisfiability checking. In other words, we attempt 144 // to show that assuming `Val` is false makes the constraints induced by the 145 // flow condition unsatisfiable. 146 llvm::SetVector<const Formula *> Constraints; 147 Constraints.insert(&arena().makeAtomRef(Token)); 148 Constraints.insert(&arena().makeNot(Val)); 149 llvm::DenseSet<Atom> VisitedTokens; 150 addTransitiveFlowConditionConstraints(Token, Constraints, VisitedTokens); 151 return isUnsatisfiable(std::move(Constraints)); 152 } 153 154 bool DataflowAnalysisContext::flowConditionIsTautology(Atom Token) { 155 // Returns true if and only if we cannot prove that the flow condition can 156 // ever be false. 157 llvm::SetVector<const Formula *> Constraints; 158 Constraints.insert(&arena().makeNot(arena().makeAtomRef(Token))); 159 llvm::DenseSet<Atom> VisitedTokens; 160 addTransitiveFlowConditionConstraints(Token, Constraints, VisitedTokens); 161 return isUnsatisfiable(std::move(Constraints)); 162 } 163 164 bool DataflowAnalysisContext::equivalentFormulas(const Formula &Val1, 165 const Formula &Val2) { 166 llvm::SetVector<const Formula *> Constraints; 167 Constraints.insert(&arena().makeNot(arena().makeEquals(Val1, Val2))); 168 return isUnsatisfiable(std::move(Constraints)); 169 } 170 171 void DataflowAnalysisContext::addTransitiveFlowConditionConstraints( 172 Atom Token, llvm::SetVector<const Formula *> &Constraints, 173 llvm::DenseSet<Atom> &VisitedTokens) { 174 auto Res = VisitedTokens.insert(Token); 175 if (!Res.second) 176 return; 177 178 auto ConstraintsIt = FlowConditionConstraints.find(Token); 179 if (ConstraintsIt == FlowConditionConstraints.end()) { 180 Constraints.insert(&arena().makeAtomRef(Token)); 181 } else { 182 // Bind flow condition token via `iff` to its set of constraints: 183 // FC <=> (C1 ^ C2 ^ ...), where Ci are constraints 184 Constraints.insert(&arena().makeEquals(arena().makeAtomRef(Token), 185 *ConstraintsIt->second)); 186 } 187 188 auto DepsIt = FlowConditionDeps.find(Token); 189 if (DepsIt != FlowConditionDeps.end()) { 190 for (Atom DepToken : DepsIt->second) { 191 addTransitiveFlowConditionConstraints(DepToken, Constraints, 192 VisitedTokens); 193 } 194 } 195 } 196 197 void DataflowAnalysisContext::dumpFlowCondition(Atom Token, 198 llvm::raw_ostream &OS) { 199 llvm::SetVector<const Formula *> Constraints; 200 Constraints.insert(&arena().makeAtomRef(Token)); 201 llvm::DenseSet<Atom> VisitedTokens; 202 addTransitiveFlowConditionConstraints(Token, Constraints, VisitedTokens); 203 204 // TODO: have formulas know about true/false directly instead 205 Atom True = arena().makeLiteral(true).getAtom(); 206 Atom False = arena().makeLiteral(false).getAtom(); 207 Formula::AtomNames Names = {{False, "false"}, {True, "true"}}; 208 209 for (const auto *Constraint : Constraints) { 210 Constraint->print(OS, &Names); 211 OS << "\n"; 212 } 213 } 214 215 const ControlFlowContext * 216 DataflowAnalysisContext::getControlFlowContext(const FunctionDecl *F) { 217 // Canonicalize the key: 218 F = F->getDefinition(); 219 if (F == nullptr) 220 return nullptr; 221 auto It = FunctionContexts.find(F); 222 if (It != FunctionContexts.end()) 223 return &It->second; 224 225 if (F->hasBody()) { 226 auto CFCtx = ControlFlowContext::build(*F); 227 // FIXME: Handle errors. 228 assert(CFCtx); 229 auto Result = FunctionContexts.insert({F, std::move(*CFCtx)}); 230 return &Result.first->second; 231 } 232 233 return nullptr; 234 } 235 236 static std::unique_ptr<Logger> makeLoggerFromCommandLine() { 237 if (DataflowLog.empty()) 238 return Logger::textual(llvm::errs()); 239 240 llvm::StringRef Dir = DataflowLog; 241 if (auto EC = llvm::sys::fs::create_directories(Dir)) 242 llvm::errs() << "Failed to create log dir: " << EC.message() << "\n"; 243 // All analysis runs within a process will log to the same directory. 244 // Share a counter so they don't all overwrite each other's 0.html. 245 // (Don't share a logger, it's not threadsafe). 246 static std::atomic<unsigned> Counter = {0}; 247 auto StreamFactory = 248 [Dir(Dir.str())]() mutable -> std::unique_ptr<llvm::raw_ostream> { 249 llvm::SmallString<256> File(Dir); 250 llvm::sys::path::append(File, 251 std::to_string(Counter.fetch_add(1)) + ".html"); 252 std::error_code EC; 253 auto OS = std::make_unique<llvm::raw_fd_ostream>(File, EC); 254 if (EC) { 255 llvm::errs() << "Failed to create log " << File << ": " << EC.message() 256 << "\n"; 257 return std::make_unique<llvm::raw_null_ostream>(); 258 } 259 return OS; 260 }; 261 return Logger::html(std::move(StreamFactory)); 262 } 263 264 DataflowAnalysisContext::DataflowAnalysisContext(std::unique_ptr<Solver> S, 265 Options Opts) 266 : S(std::move(S)), A(std::make_unique<Arena>()), Opts(Opts) { 267 assert(this->S != nullptr); 268 // If the -dataflow-log command-line flag was set, synthesize a logger. 269 // This is ugly but provides a uniform method for ad-hoc debugging dataflow- 270 // based tools. 271 if (Opts.Log == nullptr) { 272 if (DataflowLog.getNumOccurrences() > 0) { 273 LogOwner = makeLoggerFromCommandLine(); 274 this->Opts.Log = LogOwner.get(); 275 // FIXME: if the flag is given a value, write an HTML log to a file. 276 } else { 277 this->Opts.Log = &Logger::null(); 278 } 279 } 280 } 281 282 DataflowAnalysisContext::~DataflowAnalysisContext() = default; 283 284 } // namespace dataflow 285 } // namespace clang 286 287 using namespace clang; 288 289 const Expr &clang::dataflow::ignoreCFGOmittedNodes(const Expr &E) { 290 const Expr *Current = &E; 291 if (auto *EWC = dyn_cast<ExprWithCleanups>(Current)) { 292 Current = EWC->getSubExpr(); 293 assert(Current != nullptr); 294 } 295 Current = Current->IgnoreParens(); 296 assert(Current != nullptr); 297 return *Current; 298 } 299 300 const Stmt &clang::dataflow::ignoreCFGOmittedNodes(const Stmt &S) { 301 if (auto *E = dyn_cast<Expr>(&S)) 302 return ignoreCFGOmittedNodes(*E); 303 return S; 304 } 305 306 // FIXME: Does not precisely handle non-virtual diamond inheritance. A single 307 // field decl will be modeled for all instances of the inherited field. 308 static void 309 getFieldsFromClassHierarchy(QualType Type, 310 llvm::DenseSet<const FieldDecl *> &Fields) { 311 if (Type->isIncompleteType() || Type->isDependentType() || 312 !Type->isRecordType()) 313 return; 314 315 for (const FieldDecl *Field : Type->getAsRecordDecl()->fields()) 316 Fields.insert(Field); 317 if (auto *CXXRecord = Type->getAsCXXRecordDecl()) 318 for (const CXXBaseSpecifier &Base : CXXRecord->bases()) 319 getFieldsFromClassHierarchy(Base.getType(), Fields); 320 } 321 322 /// Gets the set of all fields in the type. 323 llvm::DenseSet<const FieldDecl *> 324 clang::dataflow::getObjectFields(QualType Type) { 325 llvm::DenseSet<const FieldDecl *> Fields; 326 getFieldsFromClassHierarchy(Type, Fields); 327 return Fields; 328 } 329