1 //===- DataflowAnalysis.h ---------------------------------------*- C++ -*-===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 // 9 // This file defines base types and functions for building dataflow analyses 10 // that run over Control-Flow Graphs (CFGs). 11 // 12 //===----------------------------------------------------------------------===// 13 14 #ifndef LLVM_CLANG_ANALYSIS_FLOWSENSITIVE_DATAFLOWANALYSIS_H 15 #define LLVM_CLANG_ANALYSIS_FLOWSENSITIVE_DATAFLOWANALYSIS_H 16 17 #include <iterator> 18 #include <optional> 19 #include <type_traits> 20 #include <utility> 21 #include <vector> 22 23 #include "clang/AST/ASTContext.h" 24 #include "clang/Analysis/CFG.h" 25 #include "clang/Analysis/FlowSensitive/AdornedCFG.h" 26 #include "clang/Analysis/FlowSensitive/DataflowEnvironment.h" 27 #include "clang/Analysis/FlowSensitive/DataflowLattice.h" 28 #include "clang/Analysis/FlowSensitive/MatchSwitch.h" 29 #include "clang/Analysis/FlowSensitive/TypeErasedDataflowAnalysis.h" 30 #include "clang/Analysis/FlowSensitive/WatchedLiteralsSolver.h" 31 #include "llvm/ADT/STLExtras.h" 32 #include "llvm/ADT/STLFunctionalExtras.h" 33 #include "llvm/ADT/SmallVector.h" 34 #include "llvm/Support/Errc.h" 35 #include "llvm/Support/Error.h" 36 37 namespace clang { 38 namespace dataflow { 39 40 /// Base class template for dataflow analyses built on a single lattice type. 41 /// 42 /// Requirements: 43 /// 44 /// `Derived` must be derived from a specialization of this class template and 45 /// must provide the following public members: 46 /// * `LatticeT initialElement()` - returns a lattice element that models the 47 /// initial state of a basic block; 48 /// * `void transfer(const CFGElement &, LatticeT &, Environment &)` - applies 49 /// the analysis transfer function for a given CFG element and lattice 50 /// element. 51 /// 52 /// `Derived` can optionally provide the following members: 53 /// * `void transferBranch(bool Branch, const Stmt *Stmt, TypeErasedLattice &E, 54 /// Environment &Env)` - applies the analysis transfer 55 /// function for a given edge from a CFG block of a conditional statement. 56 /// 57 /// `Derived` can optionally override the virtual functions in the 58 /// `Environment::ValueModel` interface (which is an indirect base class of 59 /// this class). 60 /// 61 /// `LatticeT` is a bounded join-semilattice that is used by `Derived` and must 62 /// provide the following public members: 63 /// * `LatticeJoinEffect join(const LatticeT &)` - joins the object and the 64 /// argument by computing their least upper bound, modifies the object if 65 /// necessary, and returns an effect indicating whether any changes were 66 /// made to it; 67 /// FIXME: make it `static LatticeT join(const LatticeT&, const LatticeT&)` 68 /// * `bool operator==(const LatticeT &) const` - returns true if and only if 69 /// the object is equal to the argument. 70 /// 71 /// `LatticeT` can optionally provide the following members: 72 /// * `LatticeJoinEffect widen(const LatticeT &Previous)` - replaces the 73 /// lattice element with an approximation that can reach a fixed point more 74 /// quickly than iterated application of the transfer function alone. The 75 /// previous value is provided to inform the choice of widened value. The 76 /// function must also serve as a comparison operation, by indicating whether 77 /// the widened value is equivalent to the previous value with the returned 78 /// `LatticeJoinEffect`. 79 template <typename Derived, typename LatticeT> 80 class DataflowAnalysis : public TypeErasedDataflowAnalysis { 81 public: 82 /// Bounded join-semilattice that is used in the analysis. 83 using Lattice = LatticeT; 84 85 explicit DataflowAnalysis(ASTContext &Context) : Context(Context) {} 86 87 explicit DataflowAnalysis(ASTContext &Context, 88 DataflowAnalysisOptions Options) 89 : TypeErasedDataflowAnalysis(Options), Context(Context) {} 90 91 ASTContext &getASTContext() final { return Context; } 92 93 TypeErasedLattice typeErasedInitialElement() final { 94 return {static_cast<Derived *>(this)->initialElement()}; 95 } 96 97 TypeErasedLattice joinTypeErased(const TypeErasedLattice &E1, 98 const TypeErasedLattice &E2) final { 99 // FIXME: change the signature of join() to avoid copying here. 100 Lattice L1 = llvm::any_cast<const Lattice &>(E1.Value); 101 const Lattice &L2 = llvm::any_cast<const Lattice &>(E2.Value); 102 L1.join(L2); 103 return {std::move(L1)}; 104 } 105 106 LatticeJoinEffect widenTypeErased(TypeErasedLattice &Current, 107 const TypeErasedLattice &Previous) final { 108 Lattice &C = llvm::any_cast<Lattice &>(Current.Value); 109 const Lattice &P = llvm::any_cast<const Lattice &>(Previous.Value); 110 return widenInternal(Rank0{}, C, P); 111 } 112 113 bool isEqualTypeErased(const TypeErasedLattice &E1, 114 const TypeErasedLattice &E2) final { 115 const Lattice &L1 = llvm::any_cast<const Lattice &>(E1.Value); 116 const Lattice &L2 = llvm::any_cast<const Lattice &>(E2.Value); 117 return L1 == L2; 118 } 119 120 void transferTypeErased(const CFGElement &Element, TypeErasedLattice &E, 121 Environment &Env) final { 122 Lattice &L = llvm::any_cast<Lattice &>(E.Value); 123 static_cast<Derived *>(this)->transfer(Element, L, Env); 124 } 125 126 void transferBranchTypeErased(bool Branch, const Stmt *Stmt, 127 TypeErasedLattice &E, Environment &Env) final { 128 transferBranchInternal(Rank0{}, *static_cast<Derived *>(this), Branch, Stmt, 129 E, Env); 130 } 131 132 private: 133 // These `Rank` structs are used for template metaprogramming to choose 134 // between overloads. 135 struct Rank1 {}; 136 struct Rank0 : Rank1 {}; 137 138 // The first-choice implementation: use `widen` when it is available. 139 template <typename T> 140 static auto widenInternal(Rank0, T &Current, const T &Prev) 141 -> decltype(Current.widen(Prev)) { 142 return Current.widen(Prev); 143 } 144 145 // The second-choice implementation: `widen` is unavailable. Widening is 146 // merged with equality checking, so when widening is unimplemented, we 147 // default to equality checking. 148 static LatticeJoinEffect widenInternal(Rank1, const Lattice &Current, 149 const Lattice &Prev) { 150 return Prev == Current ? LatticeJoinEffect::Unchanged 151 : LatticeJoinEffect::Changed; 152 } 153 154 // The first-choice implementation: `transferBranch` is implemented. 155 template <typename Analysis> 156 static auto transferBranchInternal(Rank0, Analysis &A, bool Branch, 157 const Stmt *Stmt, TypeErasedLattice &L, 158 Environment &Env) 159 -> std::void_t<decltype(A.transferBranch( 160 Branch, Stmt, std::declval<LatticeT &>(), Env))> { 161 A.transferBranch(Branch, Stmt, llvm::any_cast<Lattice &>(L.Value), Env); 162 } 163 164 // The second-choice implementation: `transferBranch` is unimplemented. No-op. 165 template <typename Analysis> 166 static void transferBranchInternal(Rank1, Analysis &A, bool, const Stmt *, 167 TypeErasedLattice &, Environment &) {} 168 169 ASTContext &Context; 170 }; 171 172 // Model of the program at a given program point. 173 template <typename LatticeT> struct DataflowAnalysisState { 174 // Model of a program property. 175 LatticeT Lattice; 176 177 // Model of the state of the program (store and heap). 178 Environment Env; 179 }; 180 181 /// A callback to be called with the state before or after visiting a CFG 182 /// element. 183 template <typename AnalysisT> 184 using CFGEltCallback = std::function<void( 185 const CFGElement &, 186 const DataflowAnalysisState<typename AnalysisT::Lattice> &)>; 187 188 /// A pair of callbacks to be called with the state before and after visiting a 189 /// CFG element. 190 /// Either or both of the callbacks may be null. 191 template <typename AnalysisT> struct CFGEltCallbacks { 192 CFGEltCallback<AnalysisT> Before; 193 CFGEltCallback<AnalysisT> After; 194 }; 195 196 /// A callback for performing diagnosis on a CFG element, called with the state 197 /// before or after visiting that CFG element. Returns a list of diagnostics 198 /// to emit (if any). 199 template <typename AnalysisT, typename Diagnostic> 200 using DiagnosisCallback = llvm::function_ref<llvm::SmallVector<Diagnostic>( 201 const CFGElement &, ASTContext &, 202 const TransferStateForDiagnostics<typename AnalysisT::Lattice> &)>; 203 204 /// A pair of callbacks for performing diagnosis on a CFG element, called with 205 /// the state before and after visiting that CFG element. 206 /// Either or both of the callbacks may be null. 207 template <typename AnalysisT, typename Diagnostic> struct DiagnosisCallbacks { 208 DiagnosisCallback<AnalysisT, Diagnostic> Before; 209 DiagnosisCallback<AnalysisT, Diagnostic> After; 210 }; 211 212 /// Default for the maximum number of SAT solver iterations during analysis. 213 inline constexpr std::int64_t kDefaultMaxSATIterations = 1'000'000'000; 214 215 /// Default for the maximum number of block visits during analysis. 216 inline constexpr std::int32_t kDefaultMaxBlockVisits = 20'000; 217 218 /// Performs dataflow analysis and returns a mapping from basic block IDs to 219 /// dataflow analysis states that model the respective basic blocks. The 220 /// returned vector, if any, will have the same size as the number of CFG 221 /// blocks, with indices corresponding to basic block IDs. Returns an error if 222 /// the dataflow analysis cannot be performed successfully. Otherwise, calls 223 /// `PostAnalysisCallbacks` on each CFG element with the final analysis results 224 /// before and after that program point. 225 /// 226 /// `MaxBlockVisits` caps the number of block visits during analysis. See 227 /// `runTypeErasedDataflowAnalysis` for a full description. The default value is 228 /// essentially arbitrary -- large enough to accommodate what seems like any 229 /// reasonable CFG, but still small enough to limit the cost of hitting the 230 /// limit. 231 template <typename AnalysisT> 232 llvm::Expected<std::vector< 233 std::optional<DataflowAnalysisState<typename AnalysisT::Lattice>>>> 234 runDataflowAnalysis(const AdornedCFG &ACFG, AnalysisT &Analysis, 235 const Environment &InitEnv, 236 CFGEltCallbacks<AnalysisT> PostAnalysisCallbacks = {}, 237 std::int32_t MaxBlockVisits = kDefaultMaxBlockVisits) { 238 CFGEltCallbacksTypeErased TypeErasedCallbacks; 239 if (PostAnalysisCallbacks.Before) { 240 TypeErasedCallbacks.Before = 241 [&PostAnalysisCallbacks](const CFGElement &Element, 242 const TypeErasedDataflowAnalysisState &State) { 243 auto *Lattice = 244 llvm::any_cast<typename AnalysisT::Lattice>(&State.Lattice.Value); 245 // FIXME: we should not be copying the environment here! 246 // Ultimately the `CFGEltCallback` only gets a const reference anyway. 247 PostAnalysisCallbacks.Before( 248 Element, DataflowAnalysisState<typename AnalysisT::Lattice>{ 249 *Lattice, State.Env.fork()}); 250 }; 251 } 252 if (PostAnalysisCallbacks.After) { 253 TypeErasedCallbacks.After = 254 [&PostAnalysisCallbacks](const CFGElement &Element, 255 const TypeErasedDataflowAnalysisState &State) { 256 auto *Lattice = 257 llvm::any_cast<typename AnalysisT::Lattice>(&State.Lattice.Value); 258 // FIXME: we should not be copying the environment here! 259 // Ultimately the `CFGEltCallback` only gets a const reference anyway. 260 PostAnalysisCallbacks.After( 261 Element, DataflowAnalysisState<typename AnalysisT::Lattice>{ 262 *Lattice, State.Env.fork()}); 263 }; 264 } 265 266 auto TypeErasedBlockStates = runTypeErasedDataflowAnalysis( 267 ACFG, Analysis, InitEnv, TypeErasedCallbacks, MaxBlockVisits); 268 if (!TypeErasedBlockStates) 269 return TypeErasedBlockStates.takeError(); 270 271 std::vector<std::optional<DataflowAnalysisState<typename AnalysisT::Lattice>>> 272 BlockStates; 273 BlockStates.reserve(TypeErasedBlockStates->size()); 274 275 llvm::transform( 276 std::move(*TypeErasedBlockStates), std::back_inserter(BlockStates), 277 [](auto &OptState) { 278 return llvm::transformOptional( 279 std::move(OptState), [](TypeErasedDataflowAnalysisState &&State) { 280 return DataflowAnalysisState<typename AnalysisT::Lattice>{ 281 llvm::any_cast<typename AnalysisT::Lattice>( 282 std::move(State.Lattice.Value)), 283 std::move(State.Env)}; 284 }); 285 }); 286 return std::move(BlockStates); 287 } 288 289 // Create an analysis class that is derived from `DataflowAnalysis`. This is an 290 // SFINAE adapter that allows us to call two different variants of constructor 291 // (either with or without the optional `Environment` parameter). 292 // FIXME: Make all classes derived from `DataflowAnalysis` take an `Environment` 293 // parameter in their constructor so that we can get rid of this abomination. 294 template <typename AnalysisT> 295 auto createAnalysis(ASTContext &ASTCtx, Environment &Env) 296 -> decltype(AnalysisT(ASTCtx, Env)) { 297 return AnalysisT(ASTCtx, Env); 298 } 299 template <typename AnalysisT> 300 auto createAnalysis(ASTContext &ASTCtx, Environment &Env) 301 -> decltype(AnalysisT(ASTCtx)) { 302 return AnalysisT(ASTCtx); 303 } 304 305 /// Runs a dataflow analysis over the given function and then runs `Diagnoser` 306 /// over the results. Returns a list of diagnostics for `FuncDecl` or an 307 /// error. Currently, errors can occur (at least) because the analysis requires 308 /// too many iterations over the CFG or the SAT solver times out. 309 /// 310 /// The default value of `MaxSATIterations` was chosen based on the following 311 /// observations: 312 /// - Non-pathological calls to the solver typically require only a few hundred 313 /// iterations. 314 /// - This limit is still low enough to keep runtimes acceptable (on typical 315 /// machines) in cases where we hit the limit. 316 /// 317 /// `MaxBlockVisits` caps the number of block visits during analysis. See 318 /// `runDataflowAnalysis` for a full description and explanation of the default 319 /// value. 320 template <typename AnalysisT, typename Diagnostic> 321 llvm::Expected<llvm::SmallVector<Diagnostic>> 322 diagnoseFunction(const FunctionDecl &FuncDecl, ASTContext &ASTCtx, 323 DiagnosisCallbacks<AnalysisT, Diagnostic> Diagnoser, 324 std::int64_t MaxSATIterations = kDefaultMaxSATIterations, 325 std::int32_t MaxBlockVisits = kDefaultMaxBlockVisits) { 326 llvm::Expected<AdornedCFG> Context = AdornedCFG::build(FuncDecl); 327 if (!Context) 328 return Context.takeError(); 329 330 auto Solver = std::make_unique<WatchedLiteralsSolver>(MaxSATIterations); 331 DataflowAnalysisContext AnalysisContext(*Solver); 332 Environment Env(AnalysisContext, FuncDecl); 333 AnalysisT Analysis = createAnalysis<AnalysisT>(ASTCtx, Env); 334 llvm::SmallVector<Diagnostic> Diagnostics; 335 CFGEltCallbacksTypeErased PostAnalysisCallbacks; 336 if (Diagnoser.Before) { 337 PostAnalysisCallbacks.Before = 338 [&ASTCtx, &Diagnoser, 339 &Diagnostics](const CFGElement &Elt, 340 const TypeErasedDataflowAnalysisState &State) mutable { 341 auto EltDiagnostics = Diagnoser.Before( 342 Elt, ASTCtx, 343 TransferStateForDiagnostics<typename AnalysisT::Lattice>( 344 llvm::any_cast<const typename AnalysisT::Lattice &>( 345 State.Lattice.Value), 346 State.Env)); 347 llvm::move(EltDiagnostics, std::back_inserter(Diagnostics)); 348 }; 349 } 350 if (Diagnoser.After) { 351 PostAnalysisCallbacks.After = 352 [&ASTCtx, &Diagnoser, 353 &Diagnostics](const CFGElement &Elt, 354 const TypeErasedDataflowAnalysisState &State) mutable { 355 auto EltDiagnostics = Diagnoser.After( 356 Elt, ASTCtx, 357 TransferStateForDiagnostics<typename AnalysisT::Lattice>( 358 llvm::any_cast<const typename AnalysisT::Lattice &>( 359 State.Lattice.Value), 360 State.Env)); 361 llvm::move(EltDiagnostics, std::back_inserter(Diagnostics)); 362 }; 363 } 364 if (llvm::Error Err = 365 runTypeErasedDataflowAnalysis(*Context, Analysis, Env, 366 PostAnalysisCallbacks, MaxBlockVisits) 367 .takeError()) 368 return std::move(Err); 369 370 if (Solver->reachedLimit()) 371 return llvm::createStringError(llvm::errc::interrupted, 372 "SAT solver timed out"); 373 374 return Diagnostics; 375 } 376 377 /// Overload that takes only one diagnosis callback, which is run on the state 378 /// after visiting the `CFGElement`. This is provided for backwards 379 /// compatibility; new callers should call the overload taking 380 /// `DiagnosisCallbacks` instead. 381 template <typename AnalysisT, typename Diagnostic> 382 llvm::Expected<llvm::SmallVector<Diagnostic>> 383 diagnoseFunction(const FunctionDecl &FuncDecl, ASTContext &ASTCtx, 384 DiagnosisCallback<AnalysisT, Diagnostic> Diagnoser, 385 std::int64_t MaxSATIterations = kDefaultMaxSATIterations, 386 std::int32_t MaxBlockVisits = kDefaultMaxBlockVisits) { 387 DiagnosisCallbacks<AnalysisT, Diagnostic> Callbacks = {nullptr, Diagnoser}; 388 return diagnoseFunction(FuncDecl, ASTCtx, Callbacks, MaxSATIterations, 389 MaxBlockVisits); 390 } 391 392 /// Abstract base class for dataflow "models": reusable analysis components that 393 /// model a particular aspect of program semantics in the `Environment`. For 394 /// example, a model may capture a type and its related functions. 395 class DataflowModel : public Environment::ValueModel { 396 public: 397 /// Return value indicates whether the model processed the `Element`. 398 virtual bool transfer(const CFGElement &Element, Environment &Env) = 0; 399 }; 400 401 } // namespace dataflow 402 } // namespace clang 403 404 #endif // LLVM_CLANG_ANALYSIS_FLOWSENSITIVE_DATAFLOWANALYSIS_H 405