1 //===-- DataflowAnalysisContext.cpp -----------------------------*- C++ -*-===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 // 9 // This file defines a DataflowAnalysisContext class that owns objects that 10 // encompass the state of a program and stores context that is used during 11 // dataflow analysis. 12 // 13 //===----------------------------------------------------------------------===// 14 15 #include "clang/Analysis/FlowSensitive/DataflowAnalysisContext.h" 16 #include "clang/AST/ExprCXX.h" 17 #include "clang/Analysis/FlowSensitive/DebugSupport.h" 18 #include "clang/Analysis/FlowSensitive/Formula.h" 19 #include "clang/Analysis/FlowSensitive/Logger.h" 20 #include "clang/Analysis/FlowSensitive/Value.h" 21 #include "llvm/ADT/SetOperations.h" 22 #include "llvm/ADT/SetVector.h" 23 #include "llvm/Support/CommandLine.h" 24 #include "llvm/Support/Debug.h" 25 #include "llvm/Support/FileSystem.h" 26 #include "llvm/Support/Path.h" 27 #include "llvm/Support/raw_ostream.h" 28 #include <cassert> 29 #include <memory> 30 #include <string> 31 #include <utility> 32 #include <vector> 33 34 static llvm::cl::opt<std::string> DataflowLog( 35 "dataflow-log", llvm::cl::Hidden, llvm::cl::ValueOptional, 36 llvm::cl::desc("Emit log of dataflow analysis. With no arg, writes textual " 37 "log to stderr. With an arg, writes HTML logs under the " 38 "specified directory (one per analyzed function).")); 39 40 namespace clang { 41 namespace dataflow { 42 43 FieldSet DataflowAnalysisContext::getModeledFields(QualType Type) { 44 // During context-sensitive analysis, a struct may be allocated in one 45 // function, but its field accessed in a function lower in the stack than 46 // the allocation. Since we only collect fields used in the function where 47 // the allocation occurs, we can't apply that filter when performing 48 // context-sensitive analysis. But, this only applies to storage locations, 49 // since field access it not allowed to fail. In contrast, field *values* 50 // don't need this allowance, since the API allows for uninitialized fields. 51 if (Opts.ContextSensitiveOpts) 52 return getObjectFields(Type); 53 54 return llvm::set_intersection(getObjectFields(Type), ModeledFields); 55 } 56 57 void DataflowAnalysisContext::addModeledFields(const FieldSet &Fields) { 58 ModeledFields.set_union(Fields); 59 } 60 61 StorageLocation &DataflowAnalysisContext::createStorageLocation(QualType Type) { 62 if (!Type.isNull() && Type->isRecordType()) { 63 llvm::DenseMap<const ValueDecl *, StorageLocation *> FieldLocs; 64 for (const FieldDecl *Field : getModeledFields(Type)) 65 if (Field->getType()->isReferenceType()) 66 FieldLocs.insert({Field, nullptr}); 67 else 68 FieldLocs.insert({Field, &createStorageLocation( 69 Field->getType().getNonReferenceType())}); 70 return arena().create<RecordStorageLocation>(Type, std::move(FieldLocs)); 71 } 72 return arena().create<ScalarStorageLocation>(Type); 73 } 74 75 StorageLocation & 76 DataflowAnalysisContext::getStableStorageLocation(const VarDecl &D) { 77 if (auto *Loc = DeclToLoc.lookup(&D)) 78 return *Loc; 79 auto &Loc = createStorageLocation(D.getType().getNonReferenceType()); 80 DeclToLoc[&D] = &Loc; 81 return Loc; 82 } 83 84 StorageLocation & 85 DataflowAnalysisContext::getStableStorageLocation(const Expr &E) { 86 const Expr &CanonE = ignoreCFGOmittedNodes(E); 87 88 if (auto *Loc = ExprToLoc.lookup(&CanonE)) 89 return *Loc; 90 auto &Loc = createStorageLocation(CanonE.getType()); 91 ExprToLoc[&CanonE] = &Loc; 92 return Loc; 93 } 94 95 PointerValue & 96 DataflowAnalysisContext::getOrCreateNullPointerValue(QualType PointeeType) { 97 auto CanonicalPointeeType = 98 PointeeType.isNull() ? PointeeType : PointeeType.getCanonicalType(); 99 auto Res = NullPointerVals.try_emplace(CanonicalPointeeType, nullptr); 100 if (Res.second) { 101 auto &PointeeLoc = createStorageLocation(CanonicalPointeeType); 102 Res.first->second = &arena().create<PointerValue>(PointeeLoc); 103 } 104 return *Res.first->second; 105 } 106 107 void DataflowAnalysisContext::addFlowConditionConstraint( 108 Atom Token, const Formula &Constraint) { 109 auto Res = FlowConditionConstraints.try_emplace(Token, &Constraint); 110 if (!Res.second) { 111 Res.first->second = 112 &arena().makeAnd(*Res.first->second, Constraint); 113 } 114 } 115 116 Atom DataflowAnalysisContext::forkFlowCondition(Atom Token) { 117 Atom ForkToken = arena().makeFlowConditionToken(); 118 FlowConditionDeps[ForkToken].insert(Token); 119 addFlowConditionConstraint(ForkToken, arena().makeAtomRef(Token)); 120 return ForkToken; 121 } 122 123 Atom 124 DataflowAnalysisContext::joinFlowConditions(Atom FirstToken, 125 Atom SecondToken) { 126 Atom Token = arena().makeFlowConditionToken(); 127 FlowConditionDeps[Token].insert(FirstToken); 128 FlowConditionDeps[Token].insert(SecondToken); 129 addFlowConditionConstraint(Token, 130 arena().makeOr(arena().makeAtomRef(FirstToken), 131 arena().makeAtomRef(SecondToken))); 132 return Token; 133 } 134 135 Solver::Result DataflowAnalysisContext::querySolver( 136 llvm::SetVector<const Formula *> Constraints) { 137 Constraints.insert(&arena().makeLiteral(true)); 138 Constraints.insert(&arena().makeNot(arena().makeLiteral(false))); 139 return S->solve(Constraints.getArrayRef()); 140 } 141 142 bool DataflowAnalysisContext::flowConditionImplies(Atom Token, 143 const Formula &Val) { 144 // Returns true if and only if truth assignment of the flow condition implies 145 // that `Val` is also true. We prove whether or not this property holds by 146 // reducing the problem to satisfiability checking. In other words, we attempt 147 // to show that assuming `Val` is false makes the constraints induced by the 148 // flow condition unsatisfiable. 149 llvm::SetVector<const Formula *> Constraints; 150 Constraints.insert(&arena().makeAtomRef(Token)); 151 Constraints.insert(&arena().makeNot(Val)); 152 llvm::DenseSet<Atom> VisitedTokens; 153 addTransitiveFlowConditionConstraints(Token, Constraints, VisitedTokens); 154 return isUnsatisfiable(std::move(Constraints)); 155 } 156 157 bool DataflowAnalysisContext::flowConditionIsTautology(Atom Token) { 158 // Returns true if and only if we cannot prove that the flow condition can 159 // ever be false. 160 llvm::SetVector<const Formula *> Constraints; 161 Constraints.insert(&arena().makeNot(arena().makeAtomRef(Token))); 162 llvm::DenseSet<Atom> VisitedTokens; 163 addTransitiveFlowConditionConstraints(Token, Constraints, VisitedTokens); 164 return isUnsatisfiable(std::move(Constraints)); 165 } 166 167 bool DataflowAnalysisContext::equivalentFormulas(const Formula &Val1, 168 const Formula &Val2) { 169 llvm::SetVector<const Formula *> Constraints; 170 Constraints.insert(&arena().makeNot(arena().makeEquals(Val1, Val2))); 171 return isUnsatisfiable(std::move(Constraints)); 172 } 173 174 void DataflowAnalysisContext::addTransitiveFlowConditionConstraints( 175 Atom Token, llvm::SetVector<const Formula *> &Constraints, 176 llvm::DenseSet<Atom> &VisitedTokens) { 177 auto Res = VisitedTokens.insert(Token); 178 if (!Res.second) 179 return; 180 181 auto ConstraintsIt = FlowConditionConstraints.find(Token); 182 if (ConstraintsIt == FlowConditionConstraints.end()) { 183 Constraints.insert(&arena().makeAtomRef(Token)); 184 } else { 185 // Bind flow condition token via `iff` to its set of constraints: 186 // FC <=> (C1 ^ C2 ^ ...), where Ci are constraints 187 Constraints.insert(&arena().makeEquals(arena().makeAtomRef(Token), 188 *ConstraintsIt->second)); 189 } 190 191 auto DepsIt = FlowConditionDeps.find(Token); 192 if (DepsIt != FlowConditionDeps.end()) { 193 for (Atom DepToken : DepsIt->second) { 194 addTransitiveFlowConditionConstraints(DepToken, Constraints, 195 VisitedTokens); 196 } 197 } 198 } 199 200 void DataflowAnalysisContext::dumpFlowCondition(Atom Token, 201 llvm::raw_ostream &OS) { 202 llvm::SetVector<const Formula *> Constraints; 203 Constraints.insert(&arena().makeAtomRef(Token)); 204 llvm::DenseSet<Atom> VisitedTokens; 205 addTransitiveFlowConditionConstraints(Token, Constraints, VisitedTokens); 206 207 // TODO: have formulas know about true/false directly instead 208 Atom True = arena().makeLiteral(true).getAtom(); 209 Atom False = arena().makeLiteral(false).getAtom(); 210 Formula::AtomNames Names = {{False, "false"}, {True, "true"}}; 211 212 for (const auto *Constraint : Constraints) { 213 Constraint->print(OS, &Names); 214 OS << "\n"; 215 } 216 } 217 218 const ControlFlowContext * 219 DataflowAnalysisContext::getControlFlowContext(const FunctionDecl *F) { 220 // Canonicalize the key: 221 F = F->getDefinition(); 222 if (F == nullptr) 223 return nullptr; 224 auto It = FunctionContexts.find(F); 225 if (It != FunctionContexts.end()) 226 return &It->second; 227 228 if (F->hasBody()) { 229 auto CFCtx = ControlFlowContext::build(*F); 230 // FIXME: Handle errors. 231 assert(CFCtx); 232 auto Result = FunctionContexts.insert({F, std::move(*CFCtx)}); 233 return &Result.first->second; 234 } 235 236 return nullptr; 237 } 238 239 static std::unique_ptr<Logger> makeLoggerFromCommandLine() { 240 if (DataflowLog.empty()) 241 return Logger::textual(llvm::errs()); 242 243 llvm::StringRef Dir = DataflowLog; 244 if (auto EC = llvm::sys::fs::create_directories(Dir)) 245 llvm::errs() << "Failed to create log dir: " << EC.message() << "\n"; 246 // All analysis runs within a process will log to the same directory. 247 // Share a counter so they don't all overwrite each other's 0.html. 248 // (Don't share a logger, it's not threadsafe). 249 static std::atomic<unsigned> Counter = {0}; 250 auto StreamFactory = 251 [Dir(Dir.str())]() mutable -> std::unique_ptr<llvm::raw_ostream> { 252 llvm::SmallString<256> File(Dir); 253 llvm::sys::path::append(File, 254 std::to_string(Counter.fetch_add(1)) + ".html"); 255 std::error_code EC; 256 auto OS = std::make_unique<llvm::raw_fd_ostream>(File, EC); 257 if (EC) { 258 llvm::errs() << "Failed to create log " << File << ": " << EC.message() 259 << "\n"; 260 return std::make_unique<llvm::raw_null_ostream>(); 261 } 262 return OS; 263 }; 264 return Logger::html(std::move(StreamFactory)); 265 } 266 267 DataflowAnalysisContext::DataflowAnalysisContext(std::unique_ptr<Solver> S, 268 Options Opts) 269 : S(std::move(S)), A(std::make_unique<Arena>()), Opts(Opts) { 270 assert(this->S != nullptr); 271 // If the -dataflow-log command-line flag was set, synthesize a logger. 272 // This is ugly but provides a uniform method for ad-hoc debugging dataflow- 273 // based tools. 274 if (Opts.Log == nullptr) { 275 if (DataflowLog.getNumOccurrences() > 0) { 276 LogOwner = makeLoggerFromCommandLine(); 277 this->Opts.Log = LogOwner.get(); 278 // FIXME: if the flag is given a value, write an HTML log to a file. 279 } else { 280 this->Opts.Log = &Logger::null(); 281 } 282 } 283 } 284 285 DataflowAnalysisContext::~DataflowAnalysisContext() = default; 286 287 } // namespace dataflow 288 } // namespace clang 289 290 using namespace clang; 291 292 const Expr &clang::dataflow::ignoreCFGOmittedNodes(const Expr &E) { 293 const Expr *Current = &E; 294 if (auto *EWC = dyn_cast<ExprWithCleanups>(Current)) { 295 Current = EWC->getSubExpr(); 296 assert(Current != nullptr); 297 } 298 Current = Current->IgnoreParens(); 299 assert(Current != nullptr); 300 return *Current; 301 } 302 303 const Stmt &clang::dataflow::ignoreCFGOmittedNodes(const Stmt &S) { 304 if (auto *E = dyn_cast<Expr>(&S)) 305 return ignoreCFGOmittedNodes(*E); 306 return S; 307 } 308 309 // FIXME: Does not precisely handle non-virtual diamond inheritance. A single 310 // field decl will be modeled for all instances of the inherited field. 311 static void getFieldsFromClassHierarchy(QualType Type, 312 clang::dataflow::FieldSet &Fields) { 313 if (Type->isIncompleteType() || Type->isDependentType() || 314 !Type->isRecordType()) 315 return; 316 317 for (const FieldDecl *Field : Type->getAsRecordDecl()->fields()) 318 Fields.insert(Field); 319 if (auto *CXXRecord = Type->getAsCXXRecordDecl()) 320 for (const CXXBaseSpecifier &Base : CXXRecord->bases()) 321 getFieldsFromClassHierarchy(Base.getType(), Fields); 322 } 323 324 /// Gets the set of all fields in the type. 325 clang::dataflow::FieldSet clang::dataflow::getObjectFields(QualType Type) { 326 FieldSet Fields; 327 getFieldsFromClassHierarchy(Type, Fields); 328 return Fields; 329 } 330