xref: /llvm-project/clang/include/clang/StaticAnalyzer/Core/PathSensitive/CoreEngine.h (revision bb27d5e5c6b194a1440b8ac4e5ace68d0ee2a849)
1 //===- CoreEngine.h - Path-Sensitive Dataflow Engine ------------*- C++ -*-===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 //  This file defines a generic engine for intraprocedural, path-sensitive,
10 //  dataflow analysis via graph reachability.
11 //
12 //===----------------------------------------------------------------------===//
13 
14 #ifndef LLVM_CLANG_STATICANALYZER_CORE_PATHSENSITIVE_COREENGINE_H
15 #define LLVM_CLANG_STATICANALYZER_CORE_PATHSENSITIVE_COREENGINE_H
16 
17 #include "clang/AST/Stmt.h"
18 #include "clang/Analysis/AnalysisDeclContext.h"
19 #include "clang/Analysis/CFG.h"
20 #include "clang/Analysis/ProgramPoint.h"
21 #include "clang/Basic/LLVM.h"
22 #include "clang/StaticAnalyzer/Core/BugReporter/BugReporter.h"
23 #include "clang/StaticAnalyzer/Core/PathSensitive/BlockCounter.h"
24 #include "clang/StaticAnalyzer/Core/PathSensitive/ExplodedGraph.h"
25 #include "clang/StaticAnalyzer/Core/PathSensitive/ProgramState_Fwd.h"
26 #include "clang/StaticAnalyzer/Core/PathSensitive/WorkList.h"
27 #include "llvm/ADT/SmallVector.h"
28 #include "llvm/ADT/iterator_range.h"
29 #include "llvm/Support/Casting.h"
30 #include <cassert>
31 #include <memory>
32 #include <utility>
33 #include <vector>
34 
35 namespace clang {
36 
37 class AnalyzerOptions;
38 class CXXBindTemporaryExpr;
39 class Expr;
40 class LabelDecl;
41 
42 namespace ento {
43 
44 class FunctionSummariesTy;
45 class ExprEngine;
46 
47 //===----------------------------------------------------------------------===//
48 /// CoreEngine - Implements the core logic of the graph-reachability analysis.
49 /// It traverses the CFG and generates the ExplodedGraph.
50 class CoreEngine {
51   friend class CommonNodeBuilder;
52   friend class EndOfFunctionNodeBuilder;
53   friend class ExprEngine;
54   friend class IndirectGotoNodeBuilder;
55   friend class NodeBuilder;
56   friend class NodeBuilderContext;
57   friend class SwitchNodeBuilder;
58 
59 public:
60   using BlocksExhausted =
61       std::vector<std::pair<BlockEdge, const ExplodedNode *>>;
62 
63   using BlocksAborted =
64       std::vector<std::pair<const CFGBlock *, const ExplodedNode *>>;
65 
66 private:
67   ExprEngine &ExprEng;
68 
69   /// G - The simulation graph.  Each node is a (location,state) pair.
70   mutable ExplodedGraph G;
71 
72   /// WList - A set of queued nodes that need to be processed by the
73   ///  worklist algorithm.  It is up to the implementation of WList to decide
74   ///  the order that nodes are processed.
75   std::unique_ptr<WorkList> WList;
76   std::unique_ptr<WorkList> CTUWList;
77 
78   /// BCounterFactory - A factory object for created BlockCounter objects.
79   ///   These are used to record for key nodes in the ExplodedGraph the
80   ///   number of times different CFGBlocks have been visited along a path.
81   BlockCounter::Factory BCounterFactory;
82 
83   /// The locations where we stopped doing work because we visited a location
84   ///  too many times.
85   BlocksExhausted blocksExhausted;
86 
87   /// The locations where we stopped because the engine aborted analysis,
88   /// usually because it could not reason about something.
89   BlocksAborted blocksAborted;
90 
91   /// The information about functions shared by the whole translation unit.
92   /// (This data is owned by AnalysisConsumer.)
93   FunctionSummariesTy *FunctionSummaries;
94 
95   /// Add path tags with some useful data along the path when we see that
96   /// something interesting is happening. This field is the allocator for such
97   /// tags.
98   DataTag::Factory DataTags;
99 
100   void setBlockCounter(BlockCounter C);
101 
102   void generateNode(const ProgramPoint &Loc,
103                     ProgramStateRef State,
104                     ExplodedNode *Pred);
105 
106   void HandleBlockEdge(const BlockEdge &E, ExplodedNode *Pred);
107   void HandleBlockEntrance(const BlockEntrance &E, ExplodedNode *Pred);
108   void HandleBlockExit(const CFGBlock *B, ExplodedNode *Pred);
109 
110   void HandleCallEnter(const CallEnter &CE, ExplodedNode *Pred);
111 
112   void HandlePostStmt(const CFGBlock *B, unsigned StmtIdx, ExplodedNode *Pred);
113 
114   void HandleBranch(const Stmt *Cond, const Stmt *Term, const CFGBlock *B,
115                     ExplodedNode *Pred);
116   void HandleCleanupTemporaryBranch(const CXXBindTemporaryExpr *BTE,
117                                     const CFGBlock *B, ExplodedNode *Pred);
118 
119   /// Handle conditional logic for running static initializers.
120   void HandleStaticInit(const DeclStmt *DS, const CFGBlock *B,
121                         ExplodedNode *Pred);
122 
123   void HandleVirtualBaseBranch(const CFGBlock *B, ExplodedNode *Pred);
124 
125 private:
126   ExplodedNode *generateCallExitBeginNode(ExplodedNode *N,
127                                           const ReturnStmt *RS);
128 
129   /// Helper function called by `HandleBranch()`. If the currently handled
130   /// branch corresponds to a loop, this returns the number of already
131   /// completed iterations in that loop, otherwise the return value is
132   /// `std::nullopt`. Note that this counts _all_ earlier iterations, including
133   /// ones that were performed within an earlier iteration of an outer loop.
134   std::optional<unsigned> getCompletedIterationCount(const CFGBlock *B,
135                                                      ExplodedNode *Pred) const;
136 
137 public:
138   /// Construct a CoreEngine object to analyze the provided CFG.
139   CoreEngine(ExprEngine &exprengine,
140              FunctionSummariesTy *FS,
141              AnalyzerOptions &Opts);
142 
143   CoreEngine(const CoreEngine &) = delete;
144   CoreEngine &operator=(const CoreEngine &) = delete;
145 
146   /// getGraph - Returns the exploded graph.
147   ExplodedGraph &getGraph() { return G; }
148 
149   /// ExecuteWorkList - Run the worklist algorithm for a maximum number of
150   ///  steps.  Returns true if there is still simulation state on the worklist.
151   bool ExecuteWorkList(const LocationContext *L, unsigned Steps,
152                        ProgramStateRef InitState);
153 
154   /// Dispatch the work list item based on the given location information.
155   /// Use Pred parameter as the predecessor state.
156   void dispatchWorkItem(ExplodedNode* Pred, ProgramPoint Loc,
157                         const WorkListUnit& WU);
158 
159   // Functions for external checking of whether we have unfinished work
160   bool wasBlockAborted() const { return !blocksAborted.empty(); }
161   bool wasBlocksExhausted() const { return !blocksExhausted.empty(); }
162   bool hasWorkRemaining() const { return wasBlocksExhausted() ||
163                                          WList->hasWork() ||
164                                          wasBlockAborted(); }
165 
166   /// Inform the CoreEngine that a basic block was aborted because
167   /// it could not be completely analyzed.
168   void addAbortedBlock(const ExplodedNode *node, const CFGBlock *block) {
169     blocksAborted.push_back(std::make_pair(block, node));
170   }
171 
172   WorkList *getWorkList() const { return WList.get(); }
173   WorkList *getCTUWorkList() const { return CTUWList.get(); }
174 
175   auto exhausted_blocks() const {
176     return llvm::iterator_range(blocksExhausted);
177   }
178 
179   auto aborted_blocks() const { return llvm::iterator_range(blocksAborted); }
180 
181   /// Enqueue the given set of nodes onto the work list.
182   void enqueue(ExplodedNodeSet &Set);
183 
184   /// Enqueue nodes that were created as a result of processing
185   /// a statement onto the work list.
186   void enqueue(ExplodedNodeSet &Set, const CFGBlock *Block, unsigned Idx);
187 
188   /// enqueue the nodes corresponding to the end of function onto the
189   /// end of path / work list.
190   void enqueueEndOfFunction(ExplodedNodeSet &Set, const ReturnStmt *RS);
191 
192   /// Enqueue a single node created as a result of statement processing.
193   void enqueueStmtNode(ExplodedNode *N, const CFGBlock *Block, unsigned Idx);
194 
195   DataTag::Factory &getDataTags() { return DataTags; }
196 };
197 
198 class NodeBuilderContext {
199   const CoreEngine &Eng;
200   const CFGBlock *Block;
201   const LocationContext *LC;
202 
203 public:
204   NodeBuilderContext(const CoreEngine &E, const CFGBlock *B,
205                      const LocationContext *L)
206       : Eng(E), Block(B), LC(L) {
207     assert(B);
208   }
209 
210   NodeBuilderContext(const CoreEngine &E, const CFGBlock *B, ExplodedNode *N)
211       : NodeBuilderContext(E, B, N->getLocationContext()) {}
212 
213   /// Return the CoreEngine associated with this builder.
214   const CoreEngine &getEngine() const { return Eng; }
215 
216   /// Return the CFGBlock associated with this builder.
217   const CFGBlock *getBlock() const { return Block; }
218 
219   /// Return the location context associated with this builder.
220   const LocationContext *getLocationContext() const { return LC; }
221 
222   /// Returns the number of times the current basic block has been
223   /// visited on the exploded graph path.
224   unsigned blockCount() const {
225     return Eng.WList->getBlockCounter().getNumVisited(
226                     LC->getStackFrame(),
227                     Block->getBlockID());
228   }
229 };
230 
231 /// \class NodeBuilder
232 /// This is the simplest builder which generates nodes in the
233 /// ExplodedGraph.
234 ///
235 /// The main benefit of the builder is that it automatically tracks the
236 /// frontier nodes (or destination set). This is the set of nodes which should
237 /// be propagated to the next step / builder. They are the nodes which have been
238 /// added to the builder (either as the input node set or as the newly
239 /// constructed nodes) but did not have any outgoing transitions added.
240 class NodeBuilder {
241   virtual void anchor();
242 
243 protected:
244   const NodeBuilderContext &C;
245 
246   /// Specifies if the builder results have been finalized. For example, if it
247   /// is set to false, autotransitions are yet to be generated.
248   bool Finalized;
249 
250   bool HasGeneratedNodes = false;
251 
252   /// The frontier set - a set of nodes which need to be propagated after
253   /// the builder dies.
254   ExplodedNodeSet &Frontier;
255 
256   /// Checks if the results are ready.
257   virtual bool checkResults() {
258     return Finalized;
259   }
260 
261   bool hasNoSinksInFrontier() {
262     for (const auto  I : Frontier)
263       if (I->isSink())
264         return false;
265     return true;
266   }
267 
268   /// Allow subclasses to finalize results before result_begin() is executed.
269   virtual void finalizeResults() {}
270 
271   ExplodedNode *generateNodeImpl(const ProgramPoint &PP,
272                                  ProgramStateRef State,
273                                  ExplodedNode *Pred,
274                                  bool MarkAsSink = false);
275 
276 public:
277   NodeBuilder(ExplodedNode *SrcNode, ExplodedNodeSet &DstSet,
278               const NodeBuilderContext &Ctx, bool F = true)
279       : C(Ctx), Finalized(F), Frontier(DstSet) {
280     Frontier.Add(SrcNode);
281   }
282 
283   NodeBuilder(const ExplodedNodeSet &SrcSet, ExplodedNodeSet &DstSet,
284               const NodeBuilderContext &Ctx, bool F = true)
285       : C(Ctx), Finalized(F), Frontier(DstSet) {
286     Frontier.insert(SrcSet);
287     assert(hasNoSinksInFrontier());
288   }
289 
290   virtual ~NodeBuilder() = default;
291 
292   /// Generates a node in the ExplodedGraph.
293   ExplodedNode *generateNode(const ProgramPoint &PP,
294                              ProgramStateRef State,
295                              ExplodedNode *Pred) {
296     return generateNodeImpl(
297         PP, State, Pred,
298         /*MarkAsSink=*/State->isPosteriorlyOverconstrained());
299   }
300 
301   /// Generates a sink in the ExplodedGraph.
302   ///
303   /// When a node is marked as sink, the exploration from the node is stopped -
304   /// the node becomes the last node on the path and certain kinds of bugs are
305   /// suppressed.
306   ExplodedNode *generateSink(const ProgramPoint &PP,
307                              ProgramStateRef State,
308                              ExplodedNode *Pred) {
309     return generateNodeImpl(PP, State, Pred, true);
310   }
311 
312   const ExplodedNodeSet &getResults() {
313     finalizeResults();
314     assert(checkResults());
315     return Frontier;
316   }
317 
318   using iterator = ExplodedNodeSet::iterator;
319 
320   /// Iterators through the results frontier.
321   iterator begin() {
322     finalizeResults();
323     assert(checkResults());
324     return Frontier.begin();
325   }
326 
327   iterator end() {
328     finalizeResults();
329     return Frontier.end();
330   }
331 
332   const NodeBuilderContext &getContext() { return C; }
333   bool hasGeneratedNodes() { return HasGeneratedNodes; }
334 
335   void takeNodes(const ExplodedNodeSet &S) {
336     for (const auto I : S)
337       Frontier.erase(I);
338   }
339 
340   void takeNodes(ExplodedNode *N) { Frontier.erase(N); }
341   void addNodes(const ExplodedNodeSet &S) { Frontier.insert(S); }
342   void addNodes(ExplodedNode *N) { Frontier.Add(N); }
343 };
344 
345 /// \class NodeBuilderWithSinks
346 /// This node builder keeps track of the generated sink nodes.
347 class NodeBuilderWithSinks: public NodeBuilder {
348   void anchor() override;
349 
350 protected:
351   SmallVector<ExplodedNode*, 2> sinksGenerated;
352   ProgramPoint &Location;
353 
354 public:
355   NodeBuilderWithSinks(ExplodedNode *Pred, ExplodedNodeSet &DstSet,
356                        const NodeBuilderContext &Ctx, ProgramPoint &L)
357       : NodeBuilder(Pred, DstSet, Ctx), Location(L) {}
358 
359   ExplodedNode *generateNode(ProgramStateRef State,
360                              ExplodedNode *Pred,
361                              const ProgramPointTag *Tag = nullptr) {
362     const ProgramPoint &LocalLoc = (Tag ? Location.withTag(Tag) : Location);
363     return NodeBuilder::generateNode(LocalLoc, State, Pred);
364   }
365 
366   ExplodedNode *generateSink(ProgramStateRef State, ExplodedNode *Pred,
367                              const ProgramPointTag *Tag = nullptr) {
368     const ProgramPoint &LocalLoc = (Tag ? Location.withTag(Tag) : Location);
369     ExplodedNode *N = NodeBuilder::generateSink(LocalLoc, State, Pred);
370     if (N && N->isSink())
371       sinksGenerated.push_back(N);
372     return N;
373   }
374 
375   const SmallVectorImpl<ExplodedNode*> &getSinks() const {
376     return sinksGenerated;
377   }
378 };
379 
380 /// \class StmtNodeBuilder
381 /// This builder class is useful for generating nodes that resulted from
382 /// visiting a statement. The main difference from its parent NodeBuilder is
383 /// that it creates a statement specific ProgramPoint.
384 class StmtNodeBuilder: public NodeBuilder {
385   NodeBuilder *EnclosingBldr;
386 
387 public:
388   /// Constructs a StmtNodeBuilder. If the builder is going to process
389   /// nodes currently owned by another builder(with larger scope), use
390   /// Enclosing builder to transfer ownership.
391   StmtNodeBuilder(ExplodedNode *SrcNode, ExplodedNodeSet &DstSet,
392                   const NodeBuilderContext &Ctx,
393                   NodeBuilder *Enclosing = nullptr)
394       : NodeBuilder(SrcNode, DstSet, Ctx), EnclosingBldr(Enclosing) {
395     if (EnclosingBldr)
396       EnclosingBldr->takeNodes(SrcNode);
397   }
398 
399   StmtNodeBuilder(ExplodedNodeSet &SrcSet, ExplodedNodeSet &DstSet,
400                   const NodeBuilderContext &Ctx,
401                   NodeBuilder *Enclosing = nullptr)
402       : NodeBuilder(SrcSet, DstSet, Ctx), EnclosingBldr(Enclosing) {
403     if (EnclosingBldr)
404       for (const auto I : SrcSet)
405         EnclosingBldr->takeNodes(I);
406   }
407 
408   ~StmtNodeBuilder() override;
409 
410   using NodeBuilder::generateNode;
411   using NodeBuilder::generateSink;
412 
413   ExplodedNode *generateNode(const Stmt *S,
414                              ExplodedNode *Pred,
415                              ProgramStateRef St,
416                              const ProgramPointTag *tag = nullptr,
417                              ProgramPoint::Kind K = ProgramPoint::PostStmtKind){
418     const ProgramPoint &L = ProgramPoint::getProgramPoint(S, K,
419                                   Pred->getLocationContext(), tag);
420     return NodeBuilder::generateNode(L, St, Pred);
421   }
422 
423   ExplodedNode *generateSink(const Stmt *S,
424                              ExplodedNode *Pred,
425                              ProgramStateRef St,
426                              const ProgramPointTag *tag = nullptr,
427                              ProgramPoint::Kind K = ProgramPoint::PostStmtKind){
428     const ProgramPoint &L = ProgramPoint::getProgramPoint(S, K,
429                                   Pred->getLocationContext(), tag);
430     return NodeBuilder::generateSink(L, St, Pred);
431   }
432 };
433 
434 /// BranchNodeBuilder is responsible for constructing the nodes
435 /// corresponding to the two branches of the if statement - true and false.
436 class BranchNodeBuilder: public NodeBuilder {
437   const CFGBlock *DstT;
438   const CFGBlock *DstF;
439 
440   void anchor() override;
441 
442 public:
443   BranchNodeBuilder(ExplodedNode *SrcNode, ExplodedNodeSet &DstSet,
444                     const NodeBuilderContext &C, const CFGBlock *DT,
445                     const CFGBlock *DF)
446       : NodeBuilder(SrcNode, DstSet, C), DstT(DT), DstF(DF) {
447     // The branch node builder does not generate autotransitions.
448     // If there are no successors it means that both branches are infeasible.
449     takeNodes(SrcNode);
450   }
451 
452   BranchNodeBuilder(const ExplodedNodeSet &SrcSet, ExplodedNodeSet &DstSet,
453                     const NodeBuilderContext &C, const CFGBlock *DT,
454                     const CFGBlock *DF)
455       : NodeBuilder(SrcSet, DstSet, C), DstT(DT), DstF(DF) {
456     takeNodes(SrcSet);
457   }
458 
459   ExplodedNode *generateNode(ProgramStateRef State, bool branch,
460                              ExplodedNode *Pred);
461 };
462 
463 class IndirectGotoNodeBuilder {
464   CoreEngine& Eng;
465   const CFGBlock *Src;
466   const CFGBlock &DispatchBlock;
467   const Expr *E;
468   ExplodedNode *Pred;
469 
470 public:
471   IndirectGotoNodeBuilder(ExplodedNode *pred, const CFGBlock *src,
472                     const Expr *e, const CFGBlock *dispatch, CoreEngine* eng)
473       : Eng(*eng), Src(src), DispatchBlock(*dispatch), E(e), Pred(pred) {}
474 
475   class iterator {
476     friend class IndirectGotoNodeBuilder;
477 
478     CFGBlock::const_succ_iterator I;
479 
480     iterator(CFGBlock::const_succ_iterator i) : I(i) {}
481 
482   public:
483     // This isn't really a conventional iterator.
484     // We just implement the deref as a no-op for now to make range-based for
485     // loops work.
486     const iterator &operator*() const { return *this; }
487 
488     iterator &operator++() { ++I; return *this; }
489     bool operator!=(const iterator &X) const { return I != X.I; }
490 
491     const LabelDecl *getLabel() const {
492       return cast<LabelStmt>((*I)->getLabel())->getDecl();
493     }
494 
495     const CFGBlock *getBlock() const {
496       return *I;
497     }
498   };
499 
500   iterator begin() { return iterator(DispatchBlock.succ_begin()); }
501   iterator end() { return iterator(DispatchBlock.succ_end()); }
502 
503   ExplodedNode *generateNode(const iterator &I,
504                              ProgramStateRef State,
505                              bool isSink = false);
506 
507   const Expr *getTarget() const { return E; }
508 
509   ProgramStateRef getState() const { return Pred->State; }
510 
511   const LocationContext *getLocationContext() const {
512     return Pred->getLocationContext();
513   }
514 };
515 
516 class SwitchNodeBuilder {
517   CoreEngine& Eng;
518   const CFGBlock *Src;
519   const Expr *Condition;
520   ExplodedNode *Pred;
521 
522 public:
523   SwitchNodeBuilder(ExplodedNode *pred, const CFGBlock *src,
524                     const Expr *condition, CoreEngine* eng)
525       : Eng(*eng), Src(src), Condition(condition), Pred(pred) {}
526 
527   class iterator {
528     friend class SwitchNodeBuilder;
529 
530     CFGBlock::const_succ_reverse_iterator I;
531 
532     iterator(CFGBlock::const_succ_reverse_iterator i) : I(i) {}
533 
534   public:
535     iterator &operator++() { ++I; return *this; }
536     bool operator!=(const iterator &X) const { return I != X.I; }
537     bool operator==(const iterator &X) const { return I == X.I; }
538 
539     const CaseStmt *getCase() const {
540       return cast<CaseStmt>((*I)->getLabel());
541     }
542 
543     const CFGBlock *getBlock() const {
544       return *I;
545     }
546   };
547 
548   iterator begin() { return iterator(Src->succ_rbegin()+1); }
549   iterator end() { return iterator(Src->succ_rend()); }
550 
551   const SwitchStmt *getSwitch() const {
552     return cast<SwitchStmt>(Src->getTerminator());
553   }
554 
555   ExplodedNode *generateCaseStmtNode(const iterator &I,
556                                      ProgramStateRef State);
557 
558   ExplodedNode *generateDefaultCaseNode(ProgramStateRef State,
559                                         bool isSink = false);
560 
561   const Expr *getCondition() const { return Condition; }
562 
563   ProgramStateRef getState() const { return Pred->State; }
564 
565   const LocationContext *getLocationContext() const {
566     return Pred->getLocationContext();
567   }
568 };
569 
570 } // namespace ento
571 
572 } // namespace clang
573 
574 #endif // LLVM_CLANG_STATICANALYZER_CORE_PATHSENSITIVE_COREENGINE_H
575