1af7bc39bSStanislav Gatev //===-- DataflowAnalysisContext.h -------------------------------*- C++ -*-===// 2af7bc39bSStanislav Gatev // 3af7bc39bSStanislav Gatev // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4af7bc39bSStanislav Gatev // See https://llvm.org/LICENSE.txt for license information. 5af7bc39bSStanislav Gatev // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6af7bc39bSStanislav Gatev // 7af7bc39bSStanislav Gatev //===----------------------------------------------------------------------===// 8af7bc39bSStanislav Gatev // 9af7bc39bSStanislav Gatev // This file defines a DataflowAnalysisContext class that owns objects that 10af7bc39bSStanislav Gatev // encompass the state of a program and stores context that is used during 11af7bc39bSStanislav Gatev // dataflow analysis. 12af7bc39bSStanislav Gatev // 13af7bc39bSStanislav Gatev //===----------------------------------------------------------------------===// 14af7bc39bSStanislav Gatev 15af7bc39bSStanislav Gatev #ifndef LLVM_CLANG_ANALYSIS_FLOWSENSITIVE_DATAFLOWANALYSISCONTEXT_H 16af7bc39bSStanislav Gatev #define LLVM_CLANG_ANALYSIS_FLOWSENSITIVE_DATAFLOWANALYSISCONTEXT_H 17af7bc39bSStanislav Gatev 18af7bc39bSStanislav Gatev #include "clang/AST/Decl.h" 19e7481f6eSStanislav Gatev #include "clang/AST/Expr.h" 20b611376eSWei Yi Tee #include "clang/AST/TypeOrdering.h" 219ec8c961SSamira Bazuzi #include "clang/Analysis/FlowSensitive/ASTOps.h" 2259ff3adcSmartinboehme #include "clang/Analysis/FlowSensitive/AdornedCFG.h" 23bf47c1edSSam McCall #include "clang/Analysis/FlowSensitive/Arena.h" 24ae60884dSStanislav Gatev #include "clang/Analysis/FlowSensitive/Solver.h" 25af7bc39bSStanislav Gatev #include "clang/Analysis/FlowSensitive/StorageLocation.h" 26af7bc39bSStanislav Gatev #include "clang/Analysis/FlowSensitive/Value.h" 27af7bc39bSStanislav Gatev #include "llvm/ADT/DenseMap.h" 28955a05a2SStanislav Gatev #include "llvm/ADT/DenseSet.h" 29d85c233eSSam McCall #include "llvm/ADT/SetVector.h" 30b5414b56SDmitri Gribenko #include "llvm/Support/Compiler.h" 31af7bc39bSStanislav Gatev #include <cassert> 32af7bc39bSStanislav Gatev #include <memory> 33a1580d7bSKazu Hirata #include <optional> 34af7bc39bSStanislav Gatev 35af7bc39bSStanislav Gatev namespace clang { 36af7bc39bSStanislav Gatev namespace dataflow { 3748f97e57SSam McCall class Logger; 38af7bc39bSStanislav Gatev 39264976d9SYitzhak Mandelbaum struct ContextSensitiveOptions { 40264976d9SYitzhak Mandelbaum /// The maximum depth to analyze. A value of zero is equivalent to disabling 41264976d9SYitzhak Mandelbaum /// context-sensitive analysis entirely. 42264976d9SYitzhak Mandelbaum unsigned Depth = 2; 43264976d9SYitzhak Mandelbaum }; 44264976d9SYitzhak Mandelbaum 45af7bc39bSStanislav Gatev /// Owns objects that encompass the state of a program and stores context that 46af7bc39bSStanislav Gatev /// is used during dataflow analysis. 47af7bc39bSStanislav Gatev class DataflowAnalysisContext { 48af7bc39bSStanislav Gatev public: 4901ccf7b3SYitzhak Mandelbaum struct Options { 50264976d9SYitzhak Mandelbaum /// Options for analyzing function bodies when present in the translation 51264976d9SYitzhak Mandelbaum /// unit, or empty to disable context-sensitive analysis. Note that this is 52264976d9SYitzhak Mandelbaum /// fundamentally limited: some constructs, such as recursion, are 53264976d9SYitzhak Mandelbaum /// explicitly unsupported. 546ad0788cSKazu Hirata std::optional<ContextSensitiveOptions> ContextSensitiveOpts; 5548f97e57SSam McCall 5648f97e57SSam McCall /// If provided, analysis details will be recorded here. 5748f97e57SSam McCall /// (This is always non-null within an AnalysisContext, the framework 5848f97e57SSam McCall /// provides a fallback no-op logger). 5948f97e57SSam McCall Logger *Log = nullptr; 6001ccf7b3SYitzhak Mandelbaum }; 6101ccf7b3SYitzhak Mandelbaum 62ae60884dSStanislav Gatev /// Constructs a dataflow analysis context. 63ae60884dSStanislav Gatev /// 64ae60884dSStanislav Gatev /// Requirements: 65ae60884dSStanislav Gatev /// 66ae60884dSStanislav Gatev /// `S` must not be null. 6701ccf7b3SYitzhak Mandelbaum DataflowAnalysisContext(std::unique_ptr<Solver> S, 68264976d9SYitzhak Mandelbaum Options Opts = Options{ 6948f97e57SSam McCall /*ContextSensitiveOpts=*/std::nullopt, 70*23ae482bSmartinboehme /*Logger=*/nullptr}) 71*23ae482bSmartinboehme : DataflowAnalysisContext(*S, std::move(S), Opts) {} 72*23ae482bSmartinboehme 73*23ae482bSmartinboehme /// Constructs a dataflow analysis context. 74*23ae482bSmartinboehme /// 75*23ae482bSmartinboehme /// Requirements: 76*23ae482bSmartinboehme /// 77*23ae482bSmartinboehme /// `S` must outlive the `DataflowAnalysisContext`. 78*23ae482bSmartinboehme DataflowAnalysisContext(Solver &S, Options Opts = Options{ 79*23ae482bSmartinboehme /*ContextSensitiveOpts=*/std::nullopt, 80*23ae482bSmartinboehme /*Logger=*/nullptr}) DataflowAnalysisContext(S,nullptr,Opts)81*23ae482bSmartinboehme : DataflowAnalysisContext(S, nullptr, Opts) {} 82*23ae482bSmartinboehme 8348f97e57SSam McCall ~DataflowAnalysisContext(); 84ae60884dSStanislav Gatev 8571f2ec2dSmartinboehme /// Sets a callback that returns the names and types of the synthetic fields 8671f2ec2dSmartinboehme /// to add to a `RecordStorageLocation` of a given type. 8771f2ec2dSmartinboehme /// Typically, this is called from the constructor of a `DataflowAnalysis` 8871f2ec2dSmartinboehme /// 89270f2c55Smartinboehme /// The field types returned by the callback may not have reference type. 90270f2c55Smartinboehme /// 9171f2ec2dSmartinboehme /// To maintain the invariant that all `RecordStorageLocation`s of a given 9271f2ec2dSmartinboehme /// type have the same fields: 9371f2ec2dSmartinboehme /// * The callback must always return the same result for a given type 9471f2ec2dSmartinboehme /// * `setSyntheticFieldCallback()` must be called before any 9571f2ec2dSmartinboehme // `RecordStorageLocation`s are created. setSyntheticFieldCallback(std::function<llvm::StringMap<QualType> (QualType)> CB)9671f2ec2dSmartinboehme void setSyntheticFieldCallback( 9771f2ec2dSmartinboehme std::function<llvm::StringMap<QualType>(QualType)> CB) { 9871f2ec2dSmartinboehme assert(!RecordStorageLocationCreated); 9971f2ec2dSmartinboehme SyntheticFieldCallback = CB; 10071f2ec2dSmartinboehme } 10171f2ec2dSmartinboehme 102817dd5e3SStanislav Gatev /// Returns a new storage location appropriate for `Type`. 10312c7352fSWei Yi Tee /// 10454d24eaeSEric Li /// A null `Type` is interpreted as the pointee type of `std::nullptr_t`. 105817dd5e3SStanislav Gatev StorageLocation &createStorageLocation(QualType Type); 10612c7352fSWei Yi Tee 10771f2ec2dSmartinboehme /// Creates a `RecordStorageLocation` for the given type and with the given 10871f2ec2dSmartinboehme /// fields. 10971f2ec2dSmartinboehme /// 11071f2ec2dSmartinboehme /// Requirements: 11171f2ec2dSmartinboehme /// 11271f2ec2dSmartinboehme /// `FieldLocs` must contain exactly the fields returned by 11371f2ec2dSmartinboehme /// `getModeledFields(Type)`. 11471f2ec2dSmartinboehme /// `SyntheticFields` must contain exactly the fields returned by 11571f2ec2dSmartinboehme /// `getSyntheticFields(Type)`. 11671f2ec2dSmartinboehme RecordStorageLocation &createRecordStorageLocation( 11771f2ec2dSmartinboehme QualType Type, RecordStorageLocation::FieldToLoc FieldLocs, 11871f2ec2dSmartinboehme RecordStorageLocation::SyntheticFieldMap SyntheticFields); 11971f2ec2dSmartinboehme 12012c7352fSWei Yi Tee /// Returns a stable storage location for `D`. 12152d06963SStanislav Gatev StorageLocation &getStableStorageLocation(const ValueDecl &D); 12212c7352fSWei Yi Tee 12312c7352fSWei Yi Tee /// Returns a stable storage location for `E`. 12412c7352fSWei Yi Tee StorageLocation &getStableStorageLocation(const Expr &E); 12512c7352fSWei Yi Tee 126b611376eSWei Yi Tee /// Returns a pointer value that represents a null pointer. Calls with 127b611376eSWei Yi Tee /// `PointeeType` that are canonically equivalent will return the same result. 128f10d271aSEric Li /// A null `PointeeType` can be used for the pointee of `std::nullptr_t`. 129b611376eSWei Yi Tee PointerValue &getOrCreateNullPointerValue(QualType PointeeType); 130b611376eSWei Yi Tee 13121ab252fSSam McCall /// Adds `Constraint` to current and future flow conditions in this context. 13221ab252fSSam McCall /// 13321ab252fSSam McCall /// Invariants must contain only flow-insensitive information, i.e. facts that 13421ab252fSSam McCall /// are true on all paths through the program. 13521ab252fSSam McCall /// Information can be added eagerly (when analysis begins), or lazily (e.g. 13621ab252fSSam McCall /// when values are first used). The analysis must be careful that the same 13721ab252fSSam McCall /// information is added regardless of which order blocks are analyzed in. 13821ab252fSSam McCall void addInvariant(const Formula &Constraint); 13921ab252fSSam McCall 140955a05a2SStanislav Gatev /// Adds `Constraint` to the flow condition identified by `Token`. 141fc9821a8SSam McCall void addFlowConditionConstraint(Atom Token, const Formula &Constraint); 142955a05a2SStanislav Gatev 143955a05a2SStanislav Gatev /// Creates a new flow condition with the same constraints as the flow 144955a05a2SStanislav Gatev /// condition identified by `Token` and returns its token. 145fc9821a8SSam McCall Atom forkFlowCondition(Atom Token); 146955a05a2SStanislav Gatev 147955a05a2SStanislav Gatev /// Creates a new flow condition that represents the disjunction of the flow 148955a05a2SStanislav Gatev /// conditions identified by `FirstToken` and `SecondToken`, and returns its 149955a05a2SStanislav Gatev /// token. 150fc9821a8SSam McCall Atom joinFlowConditions(Atom FirstToken, Atom SecondToken); 151955a05a2SStanislav Gatev 152d1f59544Smartinboehme /// Returns true if the constraints of the flow condition identified by 153d1f59544Smartinboehme /// `Token` imply that `F` is true. 154d1f59544Smartinboehme /// Returns false if the flow condition does not imply `F` or if the solver 155d1f59544Smartinboehme /// times out. 156d1f59544Smartinboehme bool flowConditionImplies(Atom Token, const Formula &F); 157d1f59544Smartinboehme 158d1f59544Smartinboehme /// Returns true if the constraints of the flow condition identified by 159d1f59544Smartinboehme /// `Token` still allow `F` to be true. 160d1f59544Smartinboehme /// Returns false if the flow condition implies that `F` is false or if the 161d1f59544Smartinboehme /// solver times out. 162d1f59544Smartinboehme bool flowConditionAllows(Atom Token, const Formula &F); 163955a05a2SStanislav Gatev 1640f65a3e6SWei Yi Tee /// Returns true if `Val1` is equivalent to `Val2`. 1650f65a3e6SWei Yi Tee /// Note: This function doesn't take into account constraints on `Val1` and 1660f65a3e6SWei Yi Tee /// `Val2` imposed by the flow condition. 167fc9821a8SSam McCall bool equivalentFormulas(const Formula &Val1, const Formula &Val2); 1680f65a3e6SWei Yi Tee 169fc9821a8SSam McCall LLVM_DUMP_METHOD void dumpFlowCondition(Atom Token, 170ee2cd606SSam McCall llvm::raw_ostream &OS = llvm::dbgs()); 171b5414b56SDmitri Gribenko 17259ff3adcSmartinboehme /// Returns the `AdornedCFG` registered for `F`, if any. Otherwise, 173692e0303SYitzhak Mandelbaum /// returns null. 17459ff3adcSmartinboehme const AdornedCFG *getAdornedCFG(const FunctionDecl *F); 175692e0303SYitzhak Mandelbaum getOptions()176264976d9SYitzhak Mandelbaum const Options &getOptions() { return Opts; } 177264976d9SYitzhak Mandelbaum arena()178bf47c1edSSam McCall Arena &arena() { return *A; } 179bf47c1edSSam McCall 1803b29b8a2SSamira Bazuzi /// Returns the outcome of satisfiability checking on `Constraints`. 1813b29b8a2SSamira Bazuzi /// 1823b29b8a2SSamira Bazuzi /// Flow conditions are not incorporated, so they may need to be manually 1833b29b8a2SSamira Bazuzi /// included in `Constraints` to provide contextually-accurate results, e.g. 1843b29b8a2SSamira Bazuzi /// if any definitions or relationships of the values in `Constraints` have 1853b29b8a2SSamira Bazuzi /// been stored in flow conditions. 186fc9821a8SSam McCall Solver::Result querySolver(llvm::SetVector<const Formula *> Constraints); 1873b29b8a2SSamira Bazuzi 188e8a1560dSMartin Braenne /// Returns the fields of `Type`, limited to the set of fields modeled by this 189e8a1560dSMartin Braenne /// context. 190e8a1560dSMartin Braenne FieldSet getModeledFields(QualType Type); 191e8a1560dSMartin Braenne 19271f2ec2dSmartinboehme /// Returns the names and types of the synthetic fields for the given record 19371f2ec2dSmartinboehme /// type. getSyntheticFields(QualType Type)19471f2ec2dSmartinboehme llvm::StringMap<QualType> getSyntheticFields(QualType Type) { 19571f2ec2dSmartinboehme assert(Type->isRecordType()); 196270f2c55Smartinboehme if (SyntheticFieldCallback) { 197270f2c55Smartinboehme llvm::StringMap<QualType> Result = SyntheticFieldCallback(Type); 198270f2c55Smartinboehme // Synthetic fields are not allowed to have reference type. 199270f2c55Smartinboehme assert([&Result] { 200270f2c55Smartinboehme for (const auto &Entry : Result) 201270f2c55Smartinboehme if (Entry.getValue()->isReferenceType()) 202270f2c55Smartinboehme return false; 203270f2c55Smartinboehme return true; 204270f2c55Smartinboehme }()); 205270f2c55Smartinboehme return Result; 206270f2c55Smartinboehme } 20771f2ec2dSmartinboehme return {}; 20871f2ec2dSmartinboehme } 20971f2ec2dSmartinboehme 210af7bc39bSStanislav Gatev private: 21101ccf7b3SYitzhak Mandelbaum friend class Environment; 21201ccf7b3SYitzhak Mandelbaum 213f10d271aSEric Li struct NullableQualTypeDenseMapInfo : private llvm::DenseMapInfo<QualType> { getEmptyKeyNullableQualTypeDenseMapInfo214f10d271aSEric Li static QualType getEmptyKey() { 215f10d271aSEric Li // Allow a NULL `QualType` by using a different value as the empty key. 216f10d271aSEric Li return QualType::getFromOpaquePtr(reinterpret_cast<Type *>(1)); 217f10d271aSEric Li } 218f10d271aSEric Li 219f10d271aSEric Li using DenseMapInfo::getHashValue; 220f10d271aSEric Li using DenseMapInfo::getTombstoneKey; 221f10d271aSEric Li using DenseMapInfo::isEqual; 222f10d271aSEric Li }; 223f10d271aSEric Li 224*23ae482bSmartinboehme /// `S` is the solver to use. `OwnedSolver` may be: 225*23ae482bSmartinboehme /// * Null (in which case `S` is non-onwed and must outlive this object), or 226*23ae482bSmartinboehme /// * Non-null (in which case it must refer to `S`, and the 227*23ae482bSmartinboehme /// `DataflowAnalysisContext will take ownership of `OwnedSolver`). 228*23ae482bSmartinboehme DataflowAnalysisContext(Solver &S, std::unique_ptr<Solver> &&OwnedSolver, 229*23ae482bSmartinboehme Options Opts); 230*23ae482bSmartinboehme 231089a5446SYitzhak Mandelbaum // Extends the set of modeled field declarations. 232e8a1560dSMartin Braenne void addModeledFields(const FieldSet &Fields); 23301ccf7b3SYitzhak Mandelbaum 234955a05a2SStanislav Gatev /// Adds all constraints of the flow condition identified by `Token` and all 23521ab252fSSam McCall /// of its transitive dependencies to `Constraints`. 23621ab252fSSam McCall void 23721ab252fSSam McCall addTransitiveFlowConditionConstraints(Atom Token, 23821ab252fSSam McCall llvm::SetVector<const Formula *> &Out); 23921ab252fSSam McCall 240d1f59544Smartinboehme /// Returns true if the solver is able to prove that there is a satisfying 241d1f59544Smartinboehme /// assignment for `Constraints`. isSatisfiable(llvm::SetVector<const Formula * > Constraints)242d1f59544Smartinboehme bool isSatisfiable(llvm::SetVector<const Formula *> Constraints) { 243d1f59544Smartinboehme return querySolver(std::move(Constraints)).getStatus() == 244d1f59544Smartinboehme Solver::Result::Status::Satisfiable; 245d1f59544Smartinboehme } 246955a05a2SStanislav Gatev 24742a7ddb4SWei Yi Tee /// Returns true if the solver is able to prove that there is no satisfying 24842a7ddb4SWei Yi Tee /// assignment for `Constraints` isUnsatisfiable(llvm::SetVector<const Formula * > Constraints)249fc9821a8SSam McCall bool isUnsatisfiable(llvm::SetVector<const Formula *> Constraints) { 25081e6400dSWei Yi Tee return querySolver(std::move(Constraints)).getStatus() == 25181e6400dSWei Yi Tee Solver::Result::Status::Unsatisfiable; 25242a7ddb4SWei Yi Tee } 25342a7ddb4SWei Yi Tee 254*23ae482bSmartinboehme Solver &S; 255*23ae482bSmartinboehme std::unique_ptr<Solver> OwnedSolver; 256bf47c1edSSam McCall std::unique_ptr<Arena> A; 257af7bc39bSStanislav Gatev 258af7bc39bSStanislav Gatev // Maps from program declarations and statements to storage locations that are 259af7bc39bSStanislav Gatev // assigned to them. These assignments are global (aggregated across all basic 260af7bc39bSStanislav Gatev // blocks) and are used to produce stable storage locations when the same 261af7bc39bSStanislav Gatev // basic blocks are evaluated multiple times. The storage locations that are 262af7bc39bSStanislav Gatev // in scope for a particular basic block are stored in `Environment`. 263af7bc39bSStanislav Gatev llvm::DenseMap<const ValueDecl *, StorageLocation *> DeclToLoc; 264e7481f6eSStanislav Gatev llvm::DenseMap<const Expr *, StorageLocation *> ExprToLoc; 265af7bc39bSStanislav Gatev 266b611376eSWei Yi Tee // Null pointer values, keyed by the canonical pointee type. 267b611376eSWei Yi Tee // 268b611376eSWei Yi Tee // FIXME: The pointer values are indexed by the pointee types which are 269b611376eSWei Yi Tee // required to initialize the `PointeeLoc` field in `PointerValue`. Consider 270b611376eSWei Yi Tee // creating a type-independent `NullPointerValue` without a `PointeeLoc` 271b611376eSWei Yi Tee // field. 272f10d271aSEric Li llvm::DenseMap<QualType, PointerValue *, NullableQualTypeDenseMapInfo> 273f10d271aSEric Li NullPointerVals; 274b611376eSWei Yi Tee 275264976d9SYitzhak Mandelbaum Options Opts; 27601ccf7b3SYitzhak Mandelbaum 277955a05a2SStanislav Gatev // Flow conditions are tracked symbolically: each unique flow condition is 278955a05a2SStanislav Gatev // associated with a fresh symbolic variable (token), bound to the clause that 279955a05a2SStanislav Gatev // defines the flow condition. Conceptually, each binding corresponds to an 280955a05a2SStanislav Gatev // "iff" of the form `FC <=> (C1 ^ C2 ^ ...)` where `FC` is a flow condition 281955a05a2SStanislav Gatev // token (an atomic boolean) and `Ci`s are the set of constraints in the flow 282fb88ea62SWei Yi Tee // flow condition clause. The set of constraints (C1 ^ C2 ^ ...) are stored in 283fb88ea62SWei Yi Tee // the `FlowConditionConstraints` map, keyed by the token of the flow 284fb88ea62SWei Yi Tee // condition. 285955a05a2SStanislav Gatev // 286955a05a2SStanislav Gatev // Flow conditions depend on other flow conditions if they are created using 287955a05a2SStanislav Gatev // `forkFlowCondition` or `joinFlowConditions`. The graph of flow condition 288955a05a2SStanislav Gatev // dependencies is stored in the `FlowConditionDeps` map. 289fc9821a8SSam McCall llvm::DenseMap<Atom, llvm::DenseSet<Atom>> FlowConditionDeps; 290fc9821a8SSam McCall llvm::DenseMap<Atom, const Formula *> FlowConditionConstraints; 29121ab252fSSam McCall const Formula *Invariant = nullptr; 292692e0303SYitzhak Mandelbaum 29359ff3adcSmartinboehme llvm::DenseMap<const FunctionDecl *, AdornedCFG> FunctionContexts; 29401ccf7b3SYitzhak Mandelbaum 295089a5446SYitzhak Mandelbaum // Fields modeled by environments covered by this context. 296e8a1560dSMartin Braenne FieldSet ModeledFields; 29748f97e57SSam McCall 29848f97e57SSam McCall std::unique_ptr<Logger> LogOwner; // If created via flags. 29971f2ec2dSmartinboehme 30071f2ec2dSmartinboehme std::function<llvm::StringMap<QualType>(QualType)> SyntheticFieldCallback; 30171f2ec2dSmartinboehme 30271f2ec2dSmartinboehme /// Has any `RecordStorageLocation` been created yet? 30371f2ec2dSmartinboehme bool RecordStorageLocationCreated = false; 304af7bc39bSStanislav Gatev }; 305af7bc39bSStanislav Gatev 306af7bc39bSStanislav Gatev } // namespace dataflow 307af7bc39bSStanislav Gatev } // namespace clang 308af7bc39bSStanislav Gatev 309af7bc39bSStanislav Gatev #endif // LLVM_CLANG_ANALYSIS_FLOWSENSITIVE_DATAFLOWANALYSISCONTEXT_H 310