xref: /llvm-project/clang/include/clang/Analysis/FlowSensitive/DataflowAnalysisContext.h (revision 23ae482bd01d7c966f871ddd620e9a26d6d66299)
1af7bc39bSStanislav Gatev //===-- DataflowAnalysisContext.h -------------------------------*- C++ -*-===//
2af7bc39bSStanislav Gatev //
3af7bc39bSStanislav Gatev // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4af7bc39bSStanislav Gatev // See https://llvm.org/LICENSE.txt for license information.
5af7bc39bSStanislav Gatev // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6af7bc39bSStanislav Gatev //
7af7bc39bSStanislav Gatev //===----------------------------------------------------------------------===//
8af7bc39bSStanislav Gatev //
9af7bc39bSStanislav Gatev //  This file defines a DataflowAnalysisContext class that owns objects that
10af7bc39bSStanislav Gatev //  encompass the state of a program and stores context that is used during
11af7bc39bSStanislav Gatev //  dataflow analysis.
12af7bc39bSStanislav Gatev //
13af7bc39bSStanislav Gatev //===----------------------------------------------------------------------===//
14af7bc39bSStanislav Gatev 
15af7bc39bSStanislav Gatev #ifndef LLVM_CLANG_ANALYSIS_FLOWSENSITIVE_DATAFLOWANALYSISCONTEXT_H
16af7bc39bSStanislav Gatev #define LLVM_CLANG_ANALYSIS_FLOWSENSITIVE_DATAFLOWANALYSISCONTEXT_H
17af7bc39bSStanislav Gatev 
18af7bc39bSStanislav Gatev #include "clang/AST/Decl.h"
19e7481f6eSStanislav Gatev #include "clang/AST/Expr.h"
20b611376eSWei Yi Tee #include "clang/AST/TypeOrdering.h"
219ec8c961SSamira Bazuzi #include "clang/Analysis/FlowSensitive/ASTOps.h"
2259ff3adcSmartinboehme #include "clang/Analysis/FlowSensitive/AdornedCFG.h"
23bf47c1edSSam McCall #include "clang/Analysis/FlowSensitive/Arena.h"
24ae60884dSStanislav Gatev #include "clang/Analysis/FlowSensitive/Solver.h"
25af7bc39bSStanislav Gatev #include "clang/Analysis/FlowSensitive/StorageLocation.h"
26af7bc39bSStanislav Gatev #include "clang/Analysis/FlowSensitive/Value.h"
27af7bc39bSStanislav Gatev #include "llvm/ADT/DenseMap.h"
28955a05a2SStanislav Gatev #include "llvm/ADT/DenseSet.h"
29d85c233eSSam McCall #include "llvm/ADT/SetVector.h"
30b5414b56SDmitri Gribenko #include "llvm/Support/Compiler.h"
31af7bc39bSStanislav Gatev #include <cassert>
32af7bc39bSStanislav Gatev #include <memory>
33a1580d7bSKazu Hirata #include <optional>
34af7bc39bSStanislav Gatev 
35af7bc39bSStanislav Gatev namespace clang {
36af7bc39bSStanislav Gatev namespace dataflow {
3748f97e57SSam McCall class Logger;
38af7bc39bSStanislav Gatev 
39264976d9SYitzhak Mandelbaum struct ContextSensitiveOptions {
40264976d9SYitzhak Mandelbaum   /// The maximum depth to analyze. A value of zero is equivalent to disabling
41264976d9SYitzhak Mandelbaum   /// context-sensitive analysis entirely.
42264976d9SYitzhak Mandelbaum   unsigned Depth = 2;
43264976d9SYitzhak Mandelbaum };
44264976d9SYitzhak Mandelbaum 
45af7bc39bSStanislav Gatev /// Owns objects that encompass the state of a program and stores context that
46af7bc39bSStanislav Gatev /// is used during dataflow analysis.
47af7bc39bSStanislav Gatev class DataflowAnalysisContext {
48af7bc39bSStanislav Gatev public:
4901ccf7b3SYitzhak Mandelbaum   struct Options {
50264976d9SYitzhak Mandelbaum     /// Options for analyzing function bodies when present in the translation
51264976d9SYitzhak Mandelbaum     /// unit, or empty to disable context-sensitive analysis. Note that this is
52264976d9SYitzhak Mandelbaum     /// fundamentally limited: some constructs, such as recursion, are
53264976d9SYitzhak Mandelbaum     /// explicitly unsupported.
546ad0788cSKazu Hirata     std::optional<ContextSensitiveOptions> ContextSensitiveOpts;
5548f97e57SSam McCall 
5648f97e57SSam McCall     /// If provided, analysis details will be recorded here.
5748f97e57SSam McCall     /// (This is always non-null within an AnalysisContext, the framework
5848f97e57SSam McCall     /// provides a fallback no-op logger).
5948f97e57SSam McCall     Logger *Log = nullptr;
6001ccf7b3SYitzhak Mandelbaum   };
6101ccf7b3SYitzhak Mandelbaum 
62ae60884dSStanislav Gatev   /// Constructs a dataflow analysis context.
63ae60884dSStanislav Gatev   ///
64ae60884dSStanislav Gatev   /// Requirements:
65ae60884dSStanislav Gatev   ///
66ae60884dSStanislav Gatev   ///  `S` must not be null.
6701ccf7b3SYitzhak Mandelbaum   DataflowAnalysisContext(std::unique_ptr<Solver> S,
68264976d9SYitzhak Mandelbaum                           Options Opts = Options{
6948f97e57SSam McCall                               /*ContextSensitiveOpts=*/std::nullopt,
70*23ae482bSmartinboehme                               /*Logger=*/nullptr})
71*23ae482bSmartinboehme       : DataflowAnalysisContext(*S, std::move(S), Opts) {}
72*23ae482bSmartinboehme 
73*23ae482bSmartinboehme   /// Constructs a dataflow analysis context.
74*23ae482bSmartinboehme   ///
75*23ae482bSmartinboehme   /// Requirements:
76*23ae482bSmartinboehme   ///
77*23ae482bSmartinboehme   ///  `S` must outlive the `DataflowAnalysisContext`.
78*23ae482bSmartinboehme   DataflowAnalysisContext(Solver &S, Options Opts = Options{
79*23ae482bSmartinboehme                                          /*ContextSensitiveOpts=*/std::nullopt,
80*23ae482bSmartinboehme                                          /*Logger=*/nullptr})
DataflowAnalysisContext(S,nullptr,Opts)81*23ae482bSmartinboehme       : DataflowAnalysisContext(S, nullptr, Opts) {}
82*23ae482bSmartinboehme 
8348f97e57SSam McCall   ~DataflowAnalysisContext();
84ae60884dSStanislav Gatev 
8571f2ec2dSmartinboehme   /// Sets a callback that returns the names and types of the synthetic fields
8671f2ec2dSmartinboehme   /// to add to a `RecordStorageLocation` of a given type.
8771f2ec2dSmartinboehme   /// Typically, this is called from the constructor of a `DataflowAnalysis`
8871f2ec2dSmartinboehme   ///
89270f2c55Smartinboehme   /// The field types returned by the callback may not have reference type.
90270f2c55Smartinboehme   ///
9171f2ec2dSmartinboehme   /// To maintain the invariant that all `RecordStorageLocation`s of a given
9271f2ec2dSmartinboehme   /// type have the same fields:
9371f2ec2dSmartinboehme   /// *  The callback must always return the same result for a given type
9471f2ec2dSmartinboehme   /// *  `setSyntheticFieldCallback()` must be called before any
9571f2ec2dSmartinboehme   //     `RecordStorageLocation`s are created.
setSyntheticFieldCallback(std::function<llvm::StringMap<QualType> (QualType)> CB)9671f2ec2dSmartinboehme   void setSyntheticFieldCallback(
9771f2ec2dSmartinboehme       std::function<llvm::StringMap<QualType>(QualType)> CB) {
9871f2ec2dSmartinboehme     assert(!RecordStorageLocationCreated);
9971f2ec2dSmartinboehme     SyntheticFieldCallback = CB;
10071f2ec2dSmartinboehme   }
10171f2ec2dSmartinboehme 
102817dd5e3SStanislav Gatev   /// Returns a new storage location appropriate for `Type`.
10312c7352fSWei Yi Tee   ///
10454d24eaeSEric Li   /// A null `Type` is interpreted as the pointee type of `std::nullptr_t`.
105817dd5e3SStanislav Gatev   StorageLocation &createStorageLocation(QualType Type);
10612c7352fSWei Yi Tee 
10771f2ec2dSmartinboehme   /// Creates a `RecordStorageLocation` for the given type and with the given
10871f2ec2dSmartinboehme   /// fields.
10971f2ec2dSmartinboehme   ///
11071f2ec2dSmartinboehme   /// Requirements:
11171f2ec2dSmartinboehme   ///
11271f2ec2dSmartinboehme   ///  `FieldLocs` must contain exactly the fields returned by
11371f2ec2dSmartinboehme   ///  `getModeledFields(Type)`.
11471f2ec2dSmartinboehme   ///  `SyntheticFields` must contain exactly the fields returned by
11571f2ec2dSmartinboehme   ///  `getSyntheticFields(Type)`.
11671f2ec2dSmartinboehme   RecordStorageLocation &createRecordStorageLocation(
11771f2ec2dSmartinboehme       QualType Type, RecordStorageLocation::FieldToLoc FieldLocs,
11871f2ec2dSmartinboehme       RecordStorageLocation::SyntheticFieldMap SyntheticFields);
11971f2ec2dSmartinboehme 
12012c7352fSWei Yi Tee   /// Returns a stable storage location for `D`.
12152d06963SStanislav Gatev   StorageLocation &getStableStorageLocation(const ValueDecl &D);
12212c7352fSWei Yi Tee 
12312c7352fSWei Yi Tee   /// Returns a stable storage location for `E`.
12412c7352fSWei Yi Tee   StorageLocation &getStableStorageLocation(const Expr &E);
12512c7352fSWei Yi Tee 
126b611376eSWei Yi Tee   /// Returns a pointer value that represents a null pointer. Calls with
127b611376eSWei Yi Tee   /// `PointeeType` that are canonically equivalent will return the same result.
128f10d271aSEric Li   /// A null `PointeeType` can be used for the pointee of `std::nullptr_t`.
129b611376eSWei Yi Tee   PointerValue &getOrCreateNullPointerValue(QualType PointeeType);
130b611376eSWei Yi Tee 
13121ab252fSSam McCall   /// Adds `Constraint` to current and future flow conditions in this context.
13221ab252fSSam McCall   ///
13321ab252fSSam McCall   /// Invariants must contain only flow-insensitive information, i.e. facts that
13421ab252fSSam McCall   /// are true on all paths through the program.
13521ab252fSSam McCall   /// Information can be added eagerly (when analysis begins), or lazily (e.g.
13621ab252fSSam McCall   /// when values are first used). The analysis must be careful that the same
13721ab252fSSam McCall   /// information is added regardless of which order blocks are analyzed in.
13821ab252fSSam McCall   void addInvariant(const Formula &Constraint);
13921ab252fSSam McCall 
140955a05a2SStanislav Gatev   /// Adds `Constraint` to the flow condition identified by `Token`.
141fc9821a8SSam McCall   void addFlowConditionConstraint(Atom Token, const Formula &Constraint);
142955a05a2SStanislav Gatev 
143955a05a2SStanislav Gatev   /// Creates a new flow condition with the same constraints as the flow
144955a05a2SStanislav Gatev   /// condition identified by `Token` and returns its token.
145fc9821a8SSam McCall   Atom forkFlowCondition(Atom Token);
146955a05a2SStanislav Gatev 
147955a05a2SStanislav Gatev   /// Creates a new flow condition that represents the disjunction of the flow
148955a05a2SStanislav Gatev   /// conditions identified by `FirstToken` and `SecondToken`, and returns its
149955a05a2SStanislav Gatev   /// token.
150fc9821a8SSam McCall   Atom joinFlowConditions(Atom FirstToken, Atom SecondToken);
151955a05a2SStanislav Gatev 
152d1f59544Smartinboehme   /// Returns true if the constraints of the flow condition identified by
153d1f59544Smartinboehme   /// `Token` imply that `F` is true.
154d1f59544Smartinboehme   /// Returns false if the flow condition does not imply `F` or if the solver
155d1f59544Smartinboehme   /// times out.
156d1f59544Smartinboehme   bool flowConditionImplies(Atom Token, const Formula &F);
157d1f59544Smartinboehme 
158d1f59544Smartinboehme   /// Returns true if the constraints of the flow condition identified by
159d1f59544Smartinboehme   /// `Token` still allow `F` to be true.
160d1f59544Smartinboehme   /// Returns false if the flow condition implies that `F` is false or if the
161d1f59544Smartinboehme   /// solver times out.
162d1f59544Smartinboehme   bool flowConditionAllows(Atom Token, const Formula &F);
163955a05a2SStanislav Gatev 
1640f65a3e6SWei Yi Tee   /// Returns true if `Val1` is equivalent to `Val2`.
1650f65a3e6SWei Yi Tee   /// Note: This function doesn't take into account constraints on `Val1` and
1660f65a3e6SWei Yi Tee   /// `Val2` imposed by the flow condition.
167fc9821a8SSam McCall   bool equivalentFormulas(const Formula &Val1, const Formula &Val2);
1680f65a3e6SWei Yi Tee 
169fc9821a8SSam McCall   LLVM_DUMP_METHOD void dumpFlowCondition(Atom Token,
170ee2cd606SSam McCall                                           llvm::raw_ostream &OS = llvm::dbgs());
171b5414b56SDmitri Gribenko 
17259ff3adcSmartinboehme   /// Returns the `AdornedCFG` registered for `F`, if any. Otherwise,
173692e0303SYitzhak Mandelbaum   /// returns null.
17459ff3adcSmartinboehme   const AdornedCFG *getAdornedCFG(const FunctionDecl *F);
175692e0303SYitzhak Mandelbaum 
getOptions()176264976d9SYitzhak Mandelbaum   const Options &getOptions() { return Opts; }
177264976d9SYitzhak Mandelbaum 
arena()178bf47c1edSSam McCall   Arena &arena() { return *A; }
179bf47c1edSSam McCall 
1803b29b8a2SSamira Bazuzi   /// Returns the outcome of satisfiability checking on `Constraints`.
1813b29b8a2SSamira Bazuzi   ///
1823b29b8a2SSamira Bazuzi   /// Flow conditions are not incorporated, so they may need to be manually
1833b29b8a2SSamira Bazuzi   /// included in `Constraints` to provide contextually-accurate results, e.g.
1843b29b8a2SSamira Bazuzi   /// if any definitions or relationships of the values in `Constraints` have
1853b29b8a2SSamira Bazuzi   /// been stored in flow conditions.
186fc9821a8SSam McCall   Solver::Result querySolver(llvm::SetVector<const Formula *> Constraints);
1873b29b8a2SSamira Bazuzi 
188e8a1560dSMartin Braenne   /// Returns the fields of `Type`, limited to the set of fields modeled by this
189e8a1560dSMartin Braenne   /// context.
190e8a1560dSMartin Braenne   FieldSet getModeledFields(QualType Type);
191e8a1560dSMartin Braenne 
19271f2ec2dSmartinboehme   /// Returns the names and types of the synthetic fields for the given record
19371f2ec2dSmartinboehme   /// type.
getSyntheticFields(QualType Type)19471f2ec2dSmartinboehme   llvm::StringMap<QualType> getSyntheticFields(QualType Type) {
19571f2ec2dSmartinboehme     assert(Type->isRecordType());
196270f2c55Smartinboehme     if (SyntheticFieldCallback) {
197270f2c55Smartinboehme       llvm::StringMap<QualType> Result = SyntheticFieldCallback(Type);
198270f2c55Smartinboehme       // Synthetic fields are not allowed to have reference type.
199270f2c55Smartinboehme       assert([&Result] {
200270f2c55Smartinboehme         for (const auto &Entry : Result)
201270f2c55Smartinboehme           if (Entry.getValue()->isReferenceType())
202270f2c55Smartinboehme             return false;
203270f2c55Smartinboehme         return true;
204270f2c55Smartinboehme       }());
205270f2c55Smartinboehme       return Result;
206270f2c55Smartinboehme     }
20771f2ec2dSmartinboehme     return {};
20871f2ec2dSmartinboehme   }
20971f2ec2dSmartinboehme 
210af7bc39bSStanislav Gatev private:
21101ccf7b3SYitzhak Mandelbaum   friend class Environment;
21201ccf7b3SYitzhak Mandelbaum 
213f10d271aSEric Li   struct NullableQualTypeDenseMapInfo : private llvm::DenseMapInfo<QualType> {
getEmptyKeyNullableQualTypeDenseMapInfo214f10d271aSEric Li     static QualType getEmptyKey() {
215f10d271aSEric Li       // Allow a NULL `QualType` by using a different value as the empty key.
216f10d271aSEric Li       return QualType::getFromOpaquePtr(reinterpret_cast<Type *>(1));
217f10d271aSEric Li     }
218f10d271aSEric Li 
219f10d271aSEric Li     using DenseMapInfo::getHashValue;
220f10d271aSEric Li     using DenseMapInfo::getTombstoneKey;
221f10d271aSEric Li     using DenseMapInfo::isEqual;
222f10d271aSEric Li   };
223f10d271aSEric Li 
224*23ae482bSmartinboehme   /// `S` is the solver to use. `OwnedSolver` may be:
225*23ae482bSmartinboehme   /// *  Null (in which case `S` is non-onwed and must outlive this object), or
226*23ae482bSmartinboehme   /// *  Non-null (in which case it must refer to `S`, and the
227*23ae482bSmartinboehme   ///    `DataflowAnalysisContext will take ownership of `OwnedSolver`).
228*23ae482bSmartinboehme   DataflowAnalysisContext(Solver &S, std::unique_ptr<Solver> &&OwnedSolver,
229*23ae482bSmartinboehme                           Options Opts);
230*23ae482bSmartinboehme 
231089a5446SYitzhak Mandelbaum   // Extends the set of modeled field declarations.
232e8a1560dSMartin Braenne   void addModeledFields(const FieldSet &Fields);
23301ccf7b3SYitzhak Mandelbaum 
234955a05a2SStanislav Gatev   /// Adds all constraints of the flow condition identified by `Token` and all
23521ab252fSSam McCall   /// of its transitive dependencies to `Constraints`.
23621ab252fSSam McCall   void
23721ab252fSSam McCall   addTransitiveFlowConditionConstraints(Atom Token,
23821ab252fSSam McCall                                         llvm::SetVector<const Formula *> &Out);
23921ab252fSSam McCall 
240d1f59544Smartinboehme   /// Returns true if the solver is able to prove that there is a satisfying
241d1f59544Smartinboehme   /// assignment for `Constraints`.
isSatisfiable(llvm::SetVector<const Formula * > Constraints)242d1f59544Smartinboehme   bool isSatisfiable(llvm::SetVector<const Formula *> Constraints) {
243d1f59544Smartinboehme     return querySolver(std::move(Constraints)).getStatus() ==
244d1f59544Smartinboehme            Solver::Result::Status::Satisfiable;
245d1f59544Smartinboehme   }
246955a05a2SStanislav Gatev 
24742a7ddb4SWei Yi Tee   /// Returns true if the solver is able to prove that there is no satisfying
24842a7ddb4SWei Yi Tee   /// assignment for `Constraints`
isUnsatisfiable(llvm::SetVector<const Formula * > Constraints)249fc9821a8SSam McCall   bool isUnsatisfiable(llvm::SetVector<const Formula *> Constraints) {
25081e6400dSWei Yi Tee     return querySolver(std::move(Constraints)).getStatus() ==
25181e6400dSWei Yi Tee            Solver::Result::Status::Unsatisfiable;
25242a7ddb4SWei Yi Tee   }
25342a7ddb4SWei Yi Tee 
254*23ae482bSmartinboehme   Solver &S;
255*23ae482bSmartinboehme   std::unique_ptr<Solver> OwnedSolver;
256bf47c1edSSam McCall   std::unique_ptr<Arena> A;
257af7bc39bSStanislav Gatev 
258af7bc39bSStanislav Gatev   // Maps from program declarations and statements to storage locations that are
259af7bc39bSStanislav Gatev   // assigned to them. These assignments are global (aggregated across all basic
260af7bc39bSStanislav Gatev   // blocks) and are used to produce stable storage locations when the same
261af7bc39bSStanislav Gatev   // basic blocks are evaluated multiple times. The storage locations that are
262af7bc39bSStanislav Gatev   // in scope for a particular basic block are stored in `Environment`.
263af7bc39bSStanislav Gatev   llvm::DenseMap<const ValueDecl *, StorageLocation *> DeclToLoc;
264e7481f6eSStanislav Gatev   llvm::DenseMap<const Expr *, StorageLocation *> ExprToLoc;
265af7bc39bSStanislav Gatev 
266b611376eSWei Yi Tee   // Null pointer values, keyed by the canonical pointee type.
267b611376eSWei Yi Tee   //
268b611376eSWei Yi Tee   // FIXME: The pointer values are indexed by the pointee types which are
269b611376eSWei Yi Tee   // required to initialize the `PointeeLoc` field in `PointerValue`. Consider
270b611376eSWei Yi Tee   // creating a type-independent `NullPointerValue` without a `PointeeLoc`
271b611376eSWei Yi Tee   // field.
272f10d271aSEric Li   llvm::DenseMap<QualType, PointerValue *, NullableQualTypeDenseMapInfo>
273f10d271aSEric Li       NullPointerVals;
274b611376eSWei Yi Tee 
275264976d9SYitzhak Mandelbaum   Options Opts;
27601ccf7b3SYitzhak Mandelbaum 
277955a05a2SStanislav Gatev   // Flow conditions are tracked symbolically: each unique flow condition is
278955a05a2SStanislav Gatev   // associated with a fresh symbolic variable (token), bound to the clause that
279955a05a2SStanislav Gatev   // defines the flow condition. Conceptually, each binding corresponds to an
280955a05a2SStanislav Gatev   // "iff" of the form `FC <=> (C1 ^ C2 ^ ...)` where `FC` is a flow condition
281955a05a2SStanislav Gatev   // token (an atomic boolean) and `Ci`s are the set of constraints in the flow
282fb88ea62SWei Yi Tee   // flow condition clause. The set of constraints (C1 ^ C2 ^ ...) are stored in
283fb88ea62SWei Yi Tee   // the `FlowConditionConstraints` map, keyed by the token of the flow
284fb88ea62SWei Yi Tee   // condition.
285955a05a2SStanislav Gatev   //
286955a05a2SStanislav Gatev   // Flow conditions depend on other flow conditions if they are created using
287955a05a2SStanislav Gatev   // `forkFlowCondition` or `joinFlowConditions`. The graph of flow condition
288955a05a2SStanislav Gatev   // dependencies is stored in the `FlowConditionDeps` map.
289fc9821a8SSam McCall   llvm::DenseMap<Atom, llvm::DenseSet<Atom>> FlowConditionDeps;
290fc9821a8SSam McCall   llvm::DenseMap<Atom, const Formula *> FlowConditionConstraints;
29121ab252fSSam McCall   const Formula *Invariant = nullptr;
292692e0303SYitzhak Mandelbaum 
29359ff3adcSmartinboehme   llvm::DenseMap<const FunctionDecl *, AdornedCFG> FunctionContexts;
29401ccf7b3SYitzhak Mandelbaum 
295089a5446SYitzhak Mandelbaum   // Fields modeled by environments covered by this context.
296e8a1560dSMartin Braenne   FieldSet ModeledFields;
29748f97e57SSam McCall 
29848f97e57SSam McCall   std::unique_ptr<Logger> LogOwner; // If created via flags.
29971f2ec2dSmartinboehme 
30071f2ec2dSmartinboehme   std::function<llvm::StringMap<QualType>(QualType)> SyntheticFieldCallback;
30171f2ec2dSmartinboehme 
30271f2ec2dSmartinboehme   /// Has any `RecordStorageLocation` been created yet?
30371f2ec2dSmartinboehme   bool RecordStorageLocationCreated = false;
304af7bc39bSStanislav Gatev };
305af7bc39bSStanislav Gatev 
306af7bc39bSStanislav Gatev } // namespace dataflow
307af7bc39bSStanislav Gatev } // namespace clang
308af7bc39bSStanislav Gatev 
309af7bc39bSStanislav Gatev #endif // LLVM_CLANG_ANALYSIS_FLOWSENSITIVE_DATAFLOWANALYSISCONTEXT_H
310