104eeddc0SDimitry Andric //===-- DataflowAnalysisContext.h -------------------------------*- C++ -*-===// 204eeddc0SDimitry Andric // 304eeddc0SDimitry Andric // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 404eeddc0SDimitry Andric // See https://llvm.org/LICENSE.txt for license information. 504eeddc0SDimitry Andric // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 604eeddc0SDimitry Andric // 704eeddc0SDimitry Andric //===----------------------------------------------------------------------===// 804eeddc0SDimitry Andric // 904eeddc0SDimitry Andric // This file defines a DataflowAnalysisContext class that owns objects that 1004eeddc0SDimitry Andric // encompass the state of a program and stores context that is used during 1104eeddc0SDimitry Andric // dataflow analysis. 1204eeddc0SDimitry Andric // 1304eeddc0SDimitry Andric //===----------------------------------------------------------------------===// 1404eeddc0SDimitry Andric 1504eeddc0SDimitry Andric #ifndef LLVM_CLANG_ANALYSIS_FLOWSENSITIVE_DATAFLOWANALYSISCONTEXT_H 1604eeddc0SDimitry Andric #define LLVM_CLANG_ANALYSIS_FLOWSENSITIVE_DATAFLOWANALYSISCONTEXT_H 1704eeddc0SDimitry Andric 1804eeddc0SDimitry Andric #include "clang/AST/Decl.h" 1904eeddc0SDimitry Andric #include "clang/AST/Expr.h" 2081ad6265SDimitry Andric #include "clang/AST/TypeOrdering.h" 21*0fca6ea1SDimitry Andric #include "clang/Analysis/FlowSensitive/ASTOps.h" 22*0fca6ea1SDimitry Andric #include "clang/Analysis/FlowSensitive/AdornedCFG.h" 2306c3fb27SDimitry Andric #include "clang/Analysis/FlowSensitive/Arena.h" 2481ad6265SDimitry Andric #include "clang/Analysis/FlowSensitive/Solver.h" 2504eeddc0SDimitry Andric #include "clang/Analysis/FlowSensitive/StorageLocation.h" 2604eeddc0SDimitry Andric #include "clang/Analysis/FlowSensitive/Value.h" 2704eeddc0SDimitry Andric #include "llvm/ADT/DenseMap.h" 2881ad6265SDimitry Andric #include "llvm/ADT/DenseSet.h" 2906c3fb27SDimitry Andric #include "llvm/ADT/SetVector.h" 30fcaf7f86SDimitry Andric #include "llvm/Support/Compiler.h" 3104eeddc0SDimitry Andric #include <cassert> 3204eeddc0SDimitry Andric #include <memory> 33bdd1243dSDimitry Andric #include <optional> 3404eeddc0SDimitry Andric 3504eeddc0SDimitry Andric namespace clang { 3604eeddc0SDimitry Andric namespace dataflow { 3706c3fb27SDimitry Andric class Logger; 3804eeddc0SDimitry Andric 39bdd1243dSDimitry Andric struct ContextSensitiveOptions { 40bdd1243dSDimitry Andric /// The maximum depth to analyze. A value of zero is equivalent to disabling 41bdd1243dSDimitry Andric /// context-sensitive analysis entirely. 42bdd1243dSDimitry Andric unsigned Depth = 2; 43bdd1243dSDimitry Andric }; 44bdd1243dSDimitry Andric 4504eeddc0SDimitry Andric /// Owns objects that encompass the state of a program and stores context that 4604eeddc0SDimitry Andric /// is used during dataflow analysis. 4704eeddc0SDimitry Andric class DataflowAnalysisContext { 4804eeddc0SDimitry Andric public: 49bdd1243dSDimitry Andric struct Options { 50bdd1243dSDimitry Andric /// Options for analyzing function bodies when present in the translation 51bdd1243dSDimitry Andric /// unit, or empty to disable context-sensitive analysis. Note that this is 52bdd1243dSDimitry Andric /// fundamentally limited: some constructs, such as recursion, are 53bdd1243dSDimitry Andric /// explicitly unsupported. 54bdd1243dSDimitry Andric std::optional<ContextSensitiveOptions> ContextSensitiveOpts; 5506c3fb27SDimitry Andric 5606c3fb27SDimitry Andric /// If provided, analysis details will be recorded here. 5706c3fb27SDimitry Andric /// (This is always non-null within an AnalysisContext, the framework 5806c3fb27SDimitry Andric /// provides a fallback no-op logger). 5906c3fb27SDimitry Andric Logger *Log = nullptr; 60bdd1243dSDimitry Andric }; 61bdd1243dSDimitry Andric 6281ad6265SDimitry Andric /// Constructs a dataflow analysis context. 6381ad6265SDimitry Andric /// 6481ad6265SDimitry Andric /// Requirements: 6581ad6265SDimitry Andric /// 6681ad6265SDimitry Andric /// `S` must not be null. 67bdd1243dSDimitry Andric DataflowAnalysisContext(std::unique_ptr<Solver> S, 68bdd1243dSDimitry Andric Options Opts = Options{ 6906c3fb27SDimitry Andric /*ContextSensitiveOpts=*/std::nullopt, 70*0fca6ea1SDimitry Andric /*Logger=*/nullptr}) 71*0fca6ea1SDimitry Andric : DataflowAnalysisContext(*S, std::move(S), Opts) {} 72*0fca6ea1SDimitry Andric 73*0fca6ea1SDimitry Andric /// Constructs a dataflow analysis context. 74*0fca6ea1SDimitry Andric /// 75*0fca6ea1SDimitry Andric /// Requirements: 76*0fca6ea1SDimitry Andric /// 77*0fca6ea1SDimitry Andric /// `S` must outlive the `DataflowAnalysisContext`. 78*0fca6ea1SDimitry Andric DataflowAnalysisContext(Solver &S, Options Opts = Options{ 79*0fca6ea1SDimitry Andric /*ContextSensitiveOpts=*/std::nullopt, 80*0fca6ea1SDimitry Andric /*Logger=*/nullptr}) 81*0fca6ea1SDimitry Andric : DataflowAnalysisContext(S, nullptr, Opts) {} 82*0fca6ea1SDimitry Andric 8306c3fb27SDimitry Andric ~DataflowAnalysisContext(); 8404eeddc0SDimitry Andric 855f757f3fSDimitry Andric /// Sets a callback that returns the names and types of the synthetic fields 865f757f3fSDimitry Andric /// to add to a `RecordStorageLocation` of a given type. 875f757f3fSDimitry Andric /// Typically, this is called from the constructor of a `DataflowAnalysis` 885f757f3fSDimitry Andric /// 89*0fca6ea1SDimitry Andric /// The field types returned by the callback may not have reference type. 90*0fca6ea1SDimitry Andric /// 915f757f3fSDimitry Andric /// To maintain the invariant that all `RecordStorageLocation`s of a given 925f757f3fSDimitry Andric /// type have the same fields: 935f757f3fSDimitry Andric /// * The callback must always return the same result for a given type 945f757f3fSDimitry Andric /// * `setSyntheticFieldCallback()` must be called before any 955f757f3fSDimitry Andric // `RecordStorageLocation`s are created. 965f757f3fSDimitry Andric void setSyntheticFieldCallback( 975f757f3fSDimitry Andric std::function<llvm::StringMap<QualType>(QualType)> CB) { 985f757f3fSDimitry Andric assert(!RecordStorageLocationCreated); 995f757f3fSDimitry Andric SyntheticFieldCallback = CB; 1005f757f3fSDimitry Andric } 1015f757f3fSDimitry Andric 102bdd1243dSDimitry Andric /// Returns a new storage location appropriate for `Type`. 10381ad6265SDimitry Andric /// 104bdd1243dSDimitry Andric /// A null `Type` is interpreted as the pointee type of `std::nullptr_t`. 105bdd1243dSDimitry Andric StorageLocation &createStorageLocation(QualType Type); 10681ad6265SDimitry Andric 1075f757f3fSDimitry Andric /// Creates a `RecordStorageLocation` for the given type and with the given 1085f757f3fSDimitry Andric /// fields. 1095f757f3fSDimitry Andric /// 1105f757f3fSDimitry Andric /// Requirements: 1115f757f3fSDimitry Andric /// 1125f757f3fSDimitry Andric /// `FieldLocs` must contain exactly the fields returned by 1135f757f3fSDimitry Andric /// `getModeledFields(Type)`. 1145f757f3fSDimitry Andric /// `SyntheticFields` must contain exactly the fields returned by 1155f757f3fSDimitry Andric /// `getSyntheticFields(Type)`. 1165f757f3fSDimitry Andric RecordStorageLocation &createRecordStorageLocation( 1175f757f3fSDimitry Andric QualType Type, RecordStorageLocation::FieldToLoc FieldLocs, 1185f757f3fSDimitry Andric RecordStorageLocation::SyntheticFieldMap SyntheticFields); 1195f757f3fSDimitry Andric 12081ad6265SDimitry Andric /// Returns a stable storage location for `D`. 1215f757f3fSDimitry Andric StorageLocation &getStableStorageLocation(const ValueDecl &D); 12281ad6265SDimitry Andric 12381ad6265SDimitry Andric /// Returns a stable storage location for `E`. 12481ad6265SDimitry Andric StorageLocation &getStableStorageLocation(const Expr &E); 12581ad6265SDimitry Andric 12681ad6265SDimitry Andric /// Returns a pointer value that represents a null pointer. Calls with 12781ad6265SDimitry Andric /// `PointeeType` that are canonically equivalent will return the same result. 128753f127fSDimitry Andric /// A null `PointeeType` can be used for the pointee of `std::nullptr_t`. 12981ad6265SDimitry Andric PointerValue &getOrCreateNullPointerValue(QualType PointeeType); 13081ad6265SDimitry Andric 1315f757f3fSDimitry Andric /// Adds `Constraint` to current and future flow conditions in this context. 1325f757f3fSDimitry Andric /// 1335f757f3fSDimitry Andric /// Invariants must contain only flow-insensitive information, i.e. facts that 1345f757f3fSDimitry Andric /// are true on all paths through the program. 1355f757f3fSDimitry Andric /// Information can be added eagerly (when analysis begins), or lazily (e.g. 1365f757f3fSDimitry Andric /// when values are first used). The analysis must be careful that the same 1375f757f3fSDimitry Andric /// information is added regardless of which order blocks are analyzed in. 1385f757f3fSDimitry Andric void addInvariant(const Formula &Constraint); 1395f757f3fSDimitry Andric 14081ad6265SDimitry Andric /// Adds `Constraint` to the flow condition identified by `Token`. 14106c3fb27SDimitry Andric void addFlowConditionConstraint(Atom Token, const Formula &Constraint); 14281ad6265SDimitry Andric 14381ad6265SDimitry Andric /// Creates a new flow condition with the same constraints as the flow 14481ad6265SDimitry Andric /// condition identified by `Token` and returns its token. 14506c3fb27SDimitry Andric Atom forkFlowCondition(Atom Token); 14681ad6265SDimitry Andric 14781ad6265SDimitry Andric /// Creates a new flow condition that represents the disjunction of the flow 14881ad6265SDimitry Andric /// conditions identified by `FirstToken` and `SecondToken`, and returns its 14981ad6265SDimitry Andric /// token. 15006c3fb27SDimitry Andric Atom joinFlowConditions(Atom FirstToken, Atom SecondToken); 15181ad6265SDimitry Andric 1525f757f3fSDimitry Andric /// Returns true if the constraints of the flow condition identified by 1535f757f3fSDimitry Andric /// `Token` imply that `F` is true. 1545f757f3fSDimitry Andric /// Returns false if the flow condition does not imply `F` or if the solver 1555f757f3fSDimitry Andric /// times out. 1565f757f3fSDimitry Andric bool flowConditionImplies(Atom Token, const Formula &F); 15781ad6265SDimitry Andric 1585f757f3fSDimitry Andric /// Returns true if the constraints of the flow condition identified by 1595f757f3fSDimitry Andric /// `Token` still allow `F` to be true. 1605f757f3fSDimitry Andric /// Returns false if the flow condition implies that `F` is false or if the 1615f757f3fSDimitry Andric /// solver times out. 1625f757f3fSDimitry Andric bool flowConditionAllows(Atom Token, const Formula &F); 16381ad6265SDimitry Andric 16481ad6265SDimitry Andric /// Returns true if `Val1` is equivalent to `Val2`. 16581ad6265SDimitry Andric /// Note: This function doesn't take into account constraints on `Val1` and 16681ad6265SDimitry Andric /// `Val2` imposed by the flow condition. 16706c3fb27SDimitry Andric bool equivalentFormulas(const Formula &Val1, const Formula &Val2); 16881ad6265SDimitry Andric 16906c3fb27SDimitry Andric LLVM_DUMP_METHOD void dumpFlowCondition(Atom Token, 17006c3fb27SDimitry Andric llvm::raw_ostream &OS = llvm::dbgs()); 171fcaf7f86SDimitry Andric 172*0fca6ea1SDimitry Andric /// Returns the `AdornedCFG` registered for `F`, if any. Otherwise, 173bdd1243dSDimitry Andric /// returns null. 174*0fca6ea1SDimitry Andric const AdornedCFG *getAdornedCFG(const FunctionDecl *F); 175bdd1243dSDimitry Andric 176bdd1243dSDimitry Andric const Options &getOptions() { return Opts; } 177bdd1243dSDimitry Andric 17806c3fb27SDimitry Andric Arena &arena() { return *A; } 17906c3fb27SDimitry Andric 18006c3fb27SDimitry Andric /// Returns the outcome of satisfiability checking on `Constraints`. 18106c3fb27SDimitry Andric /// 18206c3fb27SDimitry Andric /// Flow conditions are not incorporated, so they may need to be manually 18306c3fb27SDimitry Andric /// included in `Constraints` to provide contextually-accurate results, e.g. 18406c3fb27SDimitry Andric /// if any definitions or relationships of the values in `Constraints` have 18506c3fb27SDimitry Andric /// been stored in flow conditions. 18606c3fb27SDimitry Andric Solver::Result querySolver(llvm::SetVector<const Formula *> Constraints); 18706c3fb27SDimitry Andric 18806c3fb27SDimitry Andric /// Returns the fields of `Type`, limited to the set of fields modeled by this 18906c3fb27SDimitry Andric /// context. 19006c3fb27SDimitry Andric FieldSet getModeledFields(QualType Type); 19106c3fb27SDimitry Andric 1925f757f3fSDimitry Andric /// Returns the names and types of the synthetic fields for the given record 1935f757f3fSDimitry Andric /// type. 1945f757f3fSDimitry Andric llvm::StringMap<QualType> getSyntheticFields(QualType Type) { 1955f757f3fSDimitry Andric assert(Type->isRecordType()); 196*0fca6ea1SDimitry Andric if (SyntheticFieldCallback) { 197*0fca6ea1SDimitry Andric llvm::StringMap<QualType> Result = SyntheticFieldCallback(Type); 198*0fca6ea1SDimitry Andric // Synthetic fields are not allowed to have reference type. 199*0fca6ea1SDimitry Andric assert([&Result] { 200*0fca6ea1SDimitry Andric for (const auto &Entry : Result) 201*0fca6ea1SDimitry Andric if (Entry.getValue()->isReferenceType()) 202*0fca6ea1SDimitry Andric return false; 203*0fca6ea1SDimitry Andric return true; 204*0fca6ea1SDimitry Andric }()); 205*0fca6ea1SDimitry Andric return Result; 206*0fca6ea1SDimitry Andric } 2075f757f3fSDimitry Andric return {}; 2085f757f3fSDimitry Andric } 2095f757f3fSDimitry Andric 21004eeddc0SDimitry Andric private: 211bdd1243dSDimitry Andric friend class Environment; 212bdd1243dSDimitry Andric 213753f127fSDimitry Andric struct NullableQualTypeDenseMapInfo : private llvm::DenseMapInfo<QualType> { 214753f127fSDimitry Andric static QualType getEmptyKey() { 215753f127fSDimitry Andric // Allow a NULL `QualType` by using a different value as the empty key. 216753f127fSDimitry Andric return QualType::getFromOpaquePtr(reinterpret_cast<Type *>(1)); 217753f127fSDimitry Andric } 218753f127fSDimitry Andric 219753f127fSDimitry Andric using DenseMapInfo::getHashValue; 220753f127fSDimitry Andric using DenseMapInfo::getTombstoneKey; 221753f127fSDimitry Andric using DenseMapInfo::isEqual; 222753f127fSDimitry Andric }; 223753f127fSDimitry Andric 224*0fca6ea1SDimitry Andric /// `S` is the solver to use. `OwnedSolver` may be: 225*0fca6ea1SDimitry Andric /// * Null (in which case `S` is non-onwed and must outlive this object), or 226*0fca6ea1SDimitry Andric /// * Non-null (in which case it must refer to `S`, and the 227*0fca6ea1SDimitry Andric /// `DataflowAnalysisContext will take ownership of `OwnedSolver`). 228*0fca6ea1SDimitry Andric DataflowAnalysisContext(Solver &S, std::unique_ptr<Solver> &&OwnedSolver, 229*0fca6ea1SDimitry Andric Options Opts); 230*0fca6ea1SDimitry Andric 231bdd1243dSDimitry Andric // Extends the set of modeled field declarations. 23206c3fb27SDimitry Andric void addModeledFields(const FieldSet &Fields); 233bdd1243dSDimitry Andric 23481ad6265SDimitry Andric /// Adds all constraints of the flow condition identified by `Token` and all 2355f757f3fSDimitry Andric /// of its transitive dependencies to `Constraints`. 2365f757f3fSDimitry Andric void 2375f757f3fSDimitry Andric addTransitiveFlowConditionConstraints(Atom Token, 2385f757f3fSDimitry Andric llvm::SetVector<const Formula *> &Out); 2395f757f3fSDimitry Andric 2405f757f3fSDimitry Andric /// Returns true if the solver is able to prove that there is a satisfying 2415f757f3fSDimitry Andric /// assignment for `Constraints`. 2425f757f3fSDimitry Andric bool isSatisfiable(llvm::SetVector<const Formula *> Constraints) { 2435f757f3fSDimitry Andric return querySolver(std::move(Constraints)).getStatus() == 2445f757f3fSDimitry Andric Solver::Result::Status::Satisfiable; 2455f757f3fSDimitry Andric } 24681ad6265SDimitry Andric 24781ad6265SDimitry Andric /// Returns true if the solver is able to prove that there is no satisfying 24881ad6265SDimitry Andric /// assignment for `Constraints` 24906c3fb27SDimitry Andric bool isUnsatisfiable(llvm::SetVector<const Formula *> Constraints) { 250753f127fSDimitry Andric return querySolver(std::move(Constraints)).getStatus() == 251753f127fSDimitry Andric Solver::Result::Status::Unsatisfiable; 25281ad6265SDimitry Andric } 25381ad6265SDimitry Andric 254*0fca6ea1SDimitry Andric Solver &S; 255*0fca6ea1SDimitry Andric std::unique_ptr<Solver> OwnedSolver; 25606c3fb27SDimitry Andric std::unique_ptr<Arena> A; 25704eeddc0SDimitry Andric 25804eeddc0SDimitry Andric // Maps from program declarations and statements to storage locations that are 25904eeddc0SDimitry Andric // assigned to them. These assignments are global (aggregated across all basic 26004eeddc0SDimitry Andric // blocks) and are used to produce stable storage locations when the same 26104eeddc0SDimitry Andric // basic blocks are evaluated multiple times. The storage locations that are 26204eeddc0SDimitry Andric // in scope for a particular basic block are stored in `Environment`. 26304eeddc0SDimitry Andric llvm::DenseMap<const ValueDecl *, StorageLocation *> DeclToLoc; 26404eeddc0SDimitry Andric llvm::DenseMap<const Expr *, StorageLocation *> ExprToLoc; 26504eeddc0SDimitry Andric 26681ad6265SDimitry Andric // Null pointer values, keyed by the canonical pointee type. 26781ad6265SDimitry Andric // 26881ad6265SDimitry Andric // FIXME: The pointer values are indexed by the pointee types which are 26981ad6265SDimitry Andric // required to initialize the `PointeeLoc` field in `PointerValue`. Consider 27081ad6265SDimitry Andric // creating a type-independent `NullPointerValue` without a `PointeeLoc` 27181ad6265SDimitry Andric // field. 272753f127fSDimitry Andric llvm::DenseMap<QualType, PointerValue *, NullableQualTypeDenseMapInfo> 273753f127fSDimitry Andric NullPointerVals; 27481ad6265SDimitry Andric 275bdd1243dSDimitry Andric Options Opts; 276bdd1243dSDimitry Andric 27781ad6265SDimitry Andric // Flow conditions are tracked symbolically: each unique flow condition is 27881ad6265SDimitry Andric // associated with a fresh symbolic variable (token), bound to the clause that 27981ad6265SDimitry Andric // defines the flow condition. Conceptually, each binding corresponds to an 28081ad6265SDimitry Andric // "iff" of the form `FC <=> (C1 ^ C2 ^ ...)` where `FC` is a flow condition 28181ad6265SDimitry Andric // token (an atomic boolean) and `Ci`s are the set of constraints in the flow 28281ad6265SDimitry Andric // flow condition clause. The set of constraints (C1 ^ C2 ^ ...) are stored in 28381ad6265SDimitry Andric // the `FlowConditionConstraints` map, keyed by the token of the flow 28481ad6265SDimitry Andric // condition. 28581ad6265SDimitry Andric // 28681ad6265SDimitry Andric // Flow conditions depend on other flow conditions if they are created using 28781ad6265SDimitry Andric // `forkFlowCondition` or `joinFlowConditions`. The graph of flow condition 28881ad6265SDimitry Andric // dependencies is stored in the `FlowConditionDeps` map. 28906c3fb27SDimitry Andric llvm::DenseMap<Atom, llvm::DenseSet<Atom>> FlowConditionDeps; 29006c3fb27SDimitry Andric llvm::DenseMap<Atom, const Formula *> FlowConditionConstraints; 2915f757f3fSDimitry Andric const Formula *Invariant = nullptr; 292bdd1243dSDimitry Andric 293*0fca6ea1SDimitry Andric llvm::DenseMap<const FunctionDecl *, AdornedCFG> FunctionContexts; 294bdd1243dSDimitry Andric 295bdd1243dSDimitry Andric // Fields modeled by environments covered by this context. 29606c3fb27SDimitry Andric FieldSet ModeledFields; 29706c3fb27SDimitry Andric 29806c3fb27SDimitry Andric std::unique_ptr<Logger> LogOwner; // If created via flags. 2995f757f3fSDimitry Andric 3005f757f3fSDimitry Andric std::function<llvm::StringMap<QualType>(QualType)> SyntheticFieldCallback; 3015f757f3fSDimitry Andric 3025f757f3fSDimitry Andric /// Has any `RecordStorageLocation` been created yet? 3035f757f3fSDimitry Andric bool RecordStorageLocationCreated = false; 30404eeddc0SDimitry Andric }; 30504eeddc0SDimitry Andric 30604eeddc0SDimitry Andric } // namespace dataflow 30704eeddc0SDimitry Andric } // namespace clang 30804eeddc0SDimitry Andric 30904eeddc0SDimitry Andric #endif // LLVM_CLANG_ANALYSIS_FLOWSENSITIVE_DATAFLOWANALYSISCONTEXT_H 310