xref: /openbsd-src/gnu/llvm/clang/lib/Analysis/FlowSensitive/TypeErasedDataflowAnalysis.cpp (revision 12c855180aad702bbcca06e0398d774beeafb155)
1 //===- TypeErasedDataflowAnalysis.cpp -------------------------------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 //  This file defines type-erased base types and functions for building dataflow
10 //  analyses that run over Control-Flow Graphs (CFGs).
11 //
12 //===----------------------------------------------------------------------===//
13 
14 #include <algorithm>
15 #include <memory>
16 #include <optional>
17 #include <system_error>
18 #include <utility>
19 #include <vector>
20 
21 #include "clang/AST/DeclCXX.h"
22 #include "clang/AST/OperationKinds.h"
23 #include "clang/AST/StmtVisitor.h"
24 #include "clang/Analysis/Analyses/PostOrderCFGView.h"
25 #include "clang/Analysis/CFG.h"
26 #include "clang/Analysis/FlowSensitive/DataflowEnvironment.h"
27 #include "clang/Analysis/FlowSensitive/DataflowLattice.h"
28 #include "clang/Analysis/FlowSensitive/DataflowWorklist.h"
29 #include "clang/Analysis/FlowSensitive/Transfer.h"
30 #include "clang/Analysis/FlowSensitive/TypeErasedDataflowAnalysis.h"
31 #include "clang/Analysis/FlowSensitive/Value.h"
32 #include "llvm/ADT/ArrayRef.h"
33 #include "llvm/ADT/DenseSet.h"
34 #include "llvm/ADT/STLExtras.h"
35 #include "llvm/Support/Debug.h"
36 #include "llvm/Support/Error.h"
37 
38 #define DEBUG_TYPE "clang-dataflow"
39 
40 namespace clang {
41 namespace dataflow {
42 
43 class StmtToEnvMapImpl : public StmtToEnvMap {
44 public:
StmtToEnvMapImpl(const ControlFlowContext & CFCtx,llvm::ArrayRef<std::optional<TypeErasedDataflowAnalysisState>> BlockToState)45   StmtToEnvMapImpl(
46       const ControlFlowContext &CFCtx,
47       llvm::ArrayRef<std::optional<TypeErasedDataflowAnalysisState>>
48           BlockToState)
49       : CFCtx(CFCtx), BlockToState(BlockToState) {}
50 
getEnvironment(const Stmt & S) const51   const Environment *getEnvironment(const Stmt &S) const override {
52     auto BlockIt = CFCtx.getStmtToBlock().find(&ignoreCFGOmittedNodes(S));
53     assert(BlockIt != CFCtx.getStmtToBlock().end());
54     const auto &State = BlockToState[BlockIt->getSecond()->getBlockID()];
55     assert(State);
56     return &State->Env;
57   }
58 
59 private:
60   const ControlFlowContext &CFCtx;
61   llvm::ArrayRef<std::optional<TypeErasedDataflowAnalysisState>> BlockToState;
62 };
63 
64 /// Returns the index of `Block` in the successors of `Pred`.
blockIndexInPredecessor(const CFGBlock & Pred,const CFGBlock & Block)65 static int blockIndexInPredecessor(const CFGBlock &Pred,
66                                    const CFGBlock &Block) {
67   auto BlockPos = llvm::find_if(
68       Pred.succs(), [&Block](const CFGBlock::AdjacentBlock &Succ) {
69         return Succ && Succ->getBlockID() == Block.getBlockID();
70       });
71   return BlockPos - Pred.succ_begin();
72 }
73 
isLoopHead(const CFGBlock & B)74 static bool isLoopHead(const CFGBlock &B) {
75   if (const auto *T = B.getTerminatorStmt())
76     switch (T->getStmtClass()) {
77       case Stmt::WhileStmtClass:
78       case Stmt::DoStmtClass:
79       case Stmt::ForStmtClass:
80         return true;
81       default:
82         return false;
83     }
84 
85   return false;
86 }
87 
88 // The return type of the visit functions in TerminatorVisitor. The first
89 // element represents the terminator expression (that is the conditional
90 // expression in case of a path split in the CFG). The second element
91 // represents whether the condition was true or false.
92 using TerminatorVisitorRetTy = std::pair<const Expr *, bool>;
93 
94 /// Extends the flow condition of an environment based on a terminator
95 /// statement.
96 class TerminatorVisitor
97     : public ConstStmtVisitor<TerminatorVisitor, TerminatorVisitorRetTy> {
98 public:
TerminatorVisitor(const StmtToEnvMap & StmtToEnv,Environment & Env,int BlockSuccIdx)99   TerminatorVisitor(const StmtToEnvMap &StmtToEnv, Environment &Env,
100                     int BlockSuccIdx)
101       : StmtToEnv(StmtToEnv), Env(Env), BlockSuccIdx(BlockSuccIdx) {}
102 
VisitIfStmt(const IfStmt * S)103   TerminatorVisitorRetTy VisitIfStmt(const IfStmt *S) {
104     auto *Cond = S->getCond();
105     assert(Cond != nullptr);
106     return extendFlowCondition(*Cond);
107   }
108 
VisitWhileStmt(const WhileStmt * S)109   TerminatorVisitorRetTy VisitWhileStmt(const WhileStmt *S) {
110     auto *Cond = S->getCond();
111     assert(Cond != nullptr);
112     return extendFlowCondition(*Cond);
113   }
114 
VisitDoStmt(const DoStmt * S)115   TerminatorVisitorRetTy VisitDoStmt(const DoStmt *S) {
116     auto *Cond = S->getCond();
117     assert(Cond != nullptr);
118     return extendFlowCondition(*Cond);
119   }
120 
VisitForStmt(const ForStmt * S)121   TerminatorVisitorRetTy VisitForStmt(const ForStmt *S) {
122     auto *Cond = S->getCond();
123     if (Cond != nullptr)
124       return extendFlowCondition(*Cond);
125     return {nullptr, false};
126   }
127 
VisitBinaryOperator(const BinaryOperator * S)128   TerminatorVisitorRetTy VisitBinaryOperator(const BinaryOperator *S) {
129     assert(S->getOpcode() == BO_LAnd || S->getOpcode() == BO_LOr);
130     auto *LHS = S->getLHS();
131     assert(LHS != nullptr);
132     return extendFlowCondition(*LHS);
133   }
134 
135   TerminatorVisitorRetTy
VisitConditionalOperator(const ConditionalOperator * S)136   VisitConditionalOperator(const ConditionalOperator *S) {
137     auto *Cond = S->getCond();
138     assert(Cond != nullptr);
139     return extendFlowCondition(*Cond);
140   }
141 
142 private:
extendFlowCondition(const Expr & Cond)143   TerminatorVisitorRetTy extendFlowCondition(const Expr &Cond) {
144     // The terminator sub-expression might not be evaluated.
145     if (Env.getStorageLocation(Cond, SkipPast::None) == nullptr)
146       transfer(StmtToEnv, Cond, Env);
147 
148     // FIXME: The flow condition must be an r-value, so `SkipPast::None` should
149     // suffice.
150     auto *Val =
151         cast_or_null<BoolValue>(Env.getValue(Cond, SkipPast::Reference));
152     // Value merging depends on flow conditions from different environments
153     // being mutually exclusive -- that is, they cannot both be true in their
154     // entirety (even if they may share some clauses). So, we need *some* value
155     // for the condition expression, even if just an atom.
156     if (Val == nullptr) {
157       // FIXME: Consider introducing a helper for this get-or-create pattern.
158       auto *Loc = Env.getStorageLocation(Cond, SkipPast::None);
159       if (Loc == nullptr) {
160         Loc = &Env.createStorageLocation(Cond);
161         Env.setStorageLocation(Cond, *Loc);
162       }
163       Val = &Env.makeAtomicBoolValue();
164       Env.setValue(*Loc, *Val);
165     }
166 
167     bool ConditionValue = true;
168     // The condition must be inverted for the successor that encompasses the
169     // "else" branch, if such exists.
170     if (BlockSuccIdx == 1) {
171       Val = &Env.makeNot(*Val);
172       ConditionValue = false;
173     }
174 
175     Env.addToFlowCondition(*Val);
176     return {&Cond, ConditionValue};
177   }
178 
179   const StmtToEnvMap &StmtToEnv;
180   Environment &Env;
181   int BlockSuccIdx;
182 };
183 
184 /// Holds data structures required for running dataflow analysis.
185 struct AnalysisContext {
AnalysisContextclang::dataflow::AnalysisContext186   AnalysisContext(const ControlFlowContext &CFCtx,
187                   TypeErasedDataflowAnalysis &Analysis,
188                   const Environment &InitEnv,
189                   llvm::ArrayRef<std::optional<TypeErasedDataflowAnalysisState>>
190                       BlockStates)
191       : CFCtx(CFCtx), Analysis(Analysis), InitEnv(InitEnv),
192         BlockStates(BlockStates) {}
193 
194   /// Contains the CFG being analyzed.
195   const ControlFlowContext &CFCtx;
196   /// The analysis to be run.
197   TypeErasedDataflowAnalysis &Analysis;
198   /// Initial state to start the analysis.
199   const Environment &InitEnv;
200   /// Stores the state of a CFG block if it has been evaluated by the analysis.
201   /// The indices correspond to the block IDs.
202   llvm::ArrayRef<std::optional<TypeErasedDataflowAnalysisState>> BlockStates;
203 };
204 
205 /// Computes the input state for a given basic block by joining the output
206 /// states of its predecessors.
207 ///
208 /// Requirements:
209 ///
210 ///   All predecessors of `Block` except those with loop back edges must have
211 ///   already been transferred. States in `AC.BlockStates` that are set to
212 ///   `std::nullopt` represent basic blocks that are not evaluated yet.
213 static TypeErasedDataflowAnalysisState
computeBlockInputState(const CFGBlock & Block,AnalysisContext & AC)214 computeBlockInputState(const CFGBlock &Block, AnalysisContext &AC) {
215   llvm::DenseSet<const CFGBlock *> Preds;
216   Preds.insert(Block.pred_begin(), Block.pred_end());
217   if (Block.getTerminator().isTemporaryDtorsBranch()) {
218     // This handles a special case where the code that produced the CFG includes
219     // a conditional operator with a branch that constructs a temporary and
220     // calls a destructor annotated as noreturn. The CFG models this as follows:
221     //
222     // B1 (contains the condition of the conditional operator) - succs: B2, B3
223     // B2 (contains code that does not call a noreturn destructor) - succs: B4
224     // B3 (contains code that calls a noreturn destructor) - succs: B4
225     // B4 (has temporary destructor terminator) - succs: B5, B6
226     // B5 (noreturn block that is associated with the noreturn destructor call)
227     // B6 (contains code that follows the conditional operator statement)
228     //
229     // The first successor (B5 above) of a basic block with a temporary
230     // destructor terminator (B4 above) is the block that evaluates the
231     // destructor. If that block has a noreturn element then the predecessor
232     // block that constructed the temporary object (B3 above) is effectively a
233     // noreturn block and its state should not be used as input for the state
234     // of the block that has a temporary destructor terminator (B4 above). This
235     // holds regardless of which branch of the ternary operator calls the
236     // noreturn destructor. However, it doesn't cases where a nested ternary
237     // operator includes a branch that contains a noreturn destructor call.
238     //
239     // See `NoreturnDestructorTest` for concrete examples.
240     if (Block.succ_begin()->getReachableBlock()->hasNoReturnElement()) {
241       auto &StmtToBlock = AC.CFCtx.getStmtToBlock();
242       auto StmtBlock = StmtToBlock.find(Block.getTerminatorStmt());
243       assert(StmtBlock != StmtToBlock.end());
244       Preds.erase(StmtBlock->getSecond());
245     }
246   }
247 
248   std::optional<TypeErasedDataflowAnalysisState> MaybeState;
249 
250   auto &Analysis = AC.Analysis;
251   for (const CFGBlock *Pred : Preds) {
252     // Skip if the `Block` is unreachable or control flow cannot get past it.
253     if (!Pred || Pred->hasNoReturnElement())
254       continue;
255 
256     // Skip if `Pred` was not evaluated yet. This could happen if `Pred` has a
257     // loop back edge to `Block`.
258     const std::optional<TypeErasedDataflowAnalysisState> &MaybePredState =
259         AC.BlockStates[Pred->getBlockID()];
260     if (!MaybePredState)
261       continue;
262 
263     TypeErasedDataflowAnalysisState PredState = *MaybePredState;
264     if (Analysis.builtinOptions()) {
265       if (const Stmt *PredTerminatorStmt = Pred->getTerminatorStmt()) {
266         const StmtToEnvMapImpl StmtToEnv(AC.CFCtx, AC.BlockStates);
267         auto [Cond, CondValue] =
268             TerminatorVisitor(StmtToEnv, PredState.Env,
269                               blockIndexInPredecessor(*Pred, Block))
270                 .Visit(PredTerminatorStmt);
271         if (Cond != nullptr)
272           // FIXME: Call transferBranchTypeErased even if BuiltinTransferOpts
273           // are not set.
274           Analysis.transferBranchTypeErased(CondValue, Cond, PredState.Lattice,
275                                             PredState.Env);
276       }
277     }
278 
279     if (MaybeState) {
280       Analysis.joinTypeErased(MaybeState->Lattice, PredState.Lattice);
281       MaybeState->Env.join(PredState.Env, Analysis);
282     } else {
283       MaybeState = std::move(PredState);
284     }
285   }
286   if (!MaybeState) {
287     // FIXME: Consider passing `Block` to `Analysis.typeErasedInitialElement()`
288     // to enable building analyses like computation of dominators that
289     // initialize the state of each basic block differently.
290     MaybeState.emplace(Analysis.typeErasedInitialElement(), AC.InitEnv);
291   }
292   return *MaybeState;
293 }
294 
295 /// Built-in transfer function for `CFGStmt`.
builtinTransferStatement(const CFGStmt & Elt,TypeErasedDataflowAnalysisState & InputState,AnalysisContext & AC)296 void builtinTransferStatement(const CFGStmt &Elt,
297                               TypeErasedDataflowAnalysisState &InputState,
298                               AnalysisContext &AC) {
299   const Stmt *S = Elt.getStmt();
300   assert(S != nullptr);
301   transfer(StmtToEnvMapImpl(AC.CFCtx, AC.BlockStates), *S, InputState.Env);
302 }
303 
304 /// Built-in transfer function for `CFGInitializer`.
builtinTransferInitializer(const CFGInitializer & Elt,TypeErasedDataflowAnalysisState & InputState)305 void builtinTransferInitializer(const CFGInitializer &Elt,
306                                 TypeErasedDataflowAnalysisState &InputState) {
307   const CXXCtorInitializer *Init = Elt.getInitializer();
308   assert(Init != nullptr);
309 
310   auto &Env = InputState.Env;
311   const auto &ThisLoc =
312       *cast<AggregateStorageLocation>(Env.getThisPointeeStorageLocation());
313 
314   const FieldDecl *Member = Init->getMember();
315   if (Member == nullptr)
316     // Not a field initializer.
317     return;
318 
319   auto *InitStmt = Init->getInit();
320   assert(InitStmt != nullptr);
321 
322   auto *InitStmtLoc = Env.getStorageLocation(*InitStmt, SkipPast::Reference);
323   if (InitStmtLoc == nullptr)
324     return;
325 
326   auto *InitStmtVal = Env.getValue(*InitStmtLoc);
327   if (InitStmtVal == nullptr)
328     return;
329 
330   if (Member->getType()->isReferenceType()) {
331     auto &MemberLoc = ThisLoc.getChild(*Member);
332     Env.setValue(MemberLoc, Env.takeOwnership(std::make_unique<ReferenceValue>(
333                                 *InitStmtLoc)));
334   } else {
335     auto &MemberLoc = ThisLoc.getChild(*Member);
336     Env.setValue(MemberLoc, *InitStmtVal);
337   }
338 }
339 
builtinTransfer(const CFGElement & Elt,TypeErasedDataflowAnalysisState & State,AnalysisContext & AC)340 void builtinTransfer(const CFGElement &Elt,
341                      TypeErasedDataflowAnalysisState &State,
342                      AnalysisContext &AC) {
343   switch (Elt.getKind()) {
344   case CFGElement::Statement:
345     builtinTransferStatement(Elt.castAs<CFGStmt>(), State, AC);
346     break;
347   case CFGElement::Initializer:
348     builtinTransferInitializer(Elt.castAs<CFGInitializer>(), State);
349     break;
350   default:
351     // FIXME: Evaluate other kinds of `CFGElement`.
352     break;
353   }
354 }
355 
356 /// Transfers `State` by evaluating each element in the `Block` based on the
357 /// `AC.Analysis` specified.
358 ///
359 /// Built-in transfer functions (if the option for `ApplyBuiltinTransfer` is set
360 /// by the analysis) will be applied to the element before evaluation by the
361 /// user-specified analysis.
362 /// `PostVisitCFG` (if provided) will be applied to the element after evaluation
363 /// by the user-specified analysis.
364 TypeErasedDataflowAnalysisState
transferCFGBlock(const CFGBlock & Block,AnalysisContext & AC,std::function<void (const CFGElement &,const TypeErasedDataflowAnalysisState &)> PostVisitCFG=nullptr)365 transferCFGBlock(const CFGBlock &Block, AnalysisContext &AC,
366                  std::function<void(const CFGElement &,
367                                     const TypeErasedDataflowAnalysisState &)>
368                      PostVisitCFG = nullptr) {
369   auto State = computeBlockInputState(Block, AC);
370   for (const auto &Element : Block) {
371     // Built-in analysis
372     if (AC.Analysis.builtinOptions()) {
373       builtinTransfer(Element, State, AC);
374     }
375 
376     // User-provided analysis
377     AC.Analysis.transferTypeErased(&Element, State.Lattice, State.Env);
378 
379     // Post processing
380     if (PostVisitCFG) {
381       PostVisitCFG(Element, State);
382     }
383   }
384   return State;
385 }
386 
transferBlock(const ControlFlowContext & CFCtx,llvm::ArrayRef<std::optional<TypeErasedDataflowAnalysisState>> BlockStates,const CFGBlock & Block,const Environment & InitEnv,TypeErasedDataflowAnalysis & Analysis,std::function<void (const CFGElement &,const TypeErasedDataflowAnalysisState &)> PostVisitCFG)387 TypeErasedDataflowAnalysisState transferBlock(
388     const ControlFlowContext &CFCtx,
389     llvm::ArrayRef<std::optional<TypeErasedDataflowAnalysisState>> BlockStates,
390     const CFGBlock &Block, const Environment &InitEnv,
391     TypeErasedDataflowAnalysis &Analysis,
392     std::function<void(const CFGElement &,
393                        const TypeErasedDataflowAnalysisState &)>
394         PostVisitCFG) {
395   AnalysisContext AC(CFCtx, Analysis, InitEnv, BlockStates);
396   return transferCFGBlock(Block, AC, PostVisitCFG);
397 }
398 
399 llvm::Expected<std::vector<std::optional<TypeErasedDataflowAnalysisState>>>
runTypeErasedDataflowAnalysis(const ControlFlowContext & CFCtx,TypeErasedDataflowAnalysis & Analysis,const Environment & InitEnv,std::function<void (const CFGElement &,const TypeErasedDataflowAnalysisState &)> PostVisitCFG)400 runTypeErasedDataflowAnalysis(
401     const ControlFlowContext &CFCtx, TypeErasedDataflowAnalysis &Analysis,
402     const Environment &InitEnv,
403     std::function<void(const CFGElement &,
404                        const TypeErasedDataflowAnalysisState &)>
405         PostVisitCFG) {
406   PostOrderCFGView POV(&CFCtx.getCFG());
407   ForwardDataflowWorklist Worklist(CFCtx.getCFG(), &POV);
408 
409   std::vector<std::optional<TypeErasedDataflowAnalysisState>> BlockStates(
410       CFCtx.getCFG().size(), std::nullopt);
411 
412   // The entry basic block doesn't contain statements so it can be skipped.
413   const CFGBlock &Entry = CFCtx.getCFG().getEntry();
414   BlockStates[Entry.getBlockID()] = {Analysis.typeErasedInitialElement(),
415                                      InitEnv};
416   Worklist.enqueueSuccessors(&Entry);
417 
418   AnalysisContext AC(CFCtx, Analysis, InitEnv, BlockStates);
419 
420   // Bugs in lattices and transfer functions can prevent the analysis from
421   // converging. To limit the damage (infinite loops) that these bugs can cause,
422   // limit the number of iterations.
423   // FIXME: Consider making the maximum number of iterations configurable.
424   // FIXME: Consider restricting the number of backedges followed, rather than
425   // iterations.
426   // FIXME: Set up statistics (see llvm/ADT/Statistic.h) to count average number
427   // of iterations, number of functions that time out, etc.
428   static constexpr uint32_t MaxAverageVisitsPerBlock = 4;
429   static constexpr uint32_t AbsoluteMaxIterations = 1 << 16;
430   const uint32_t RelativeMaxIterations =
431       MaxAverageVisitsPerBlock * BlockStates.size();
432   const uint32_t MaxIterations =
433       std::min(RelativeMaxIterations, AbsoluteMaxIterations);
434   uint32_t Iterations = 0;
435   while (const CFGBlock *Block = Worklist.dequeue()) {
436     LLVM_DEBUG(llvm::dbgs()
437                << "Processing Block " << Block->getBlockID() << "\n");
438     if (++Iterations > MaxIterations) {
439       return llvm::createStringError(std::errc::timed_out,
440                                      "maximum number of iterations reached");
441     }
442 
443     const std::optional<TypeErasedDataflowAnalysisState> &OldBlockState =
444         BlockStates[Block->getBlockID()];
445     TypeErasedDataflowAnalysisState NewBlockState =
446         transferCFGBlock(*Block, AC);
447     LLVM_DEBUG({
448       llvm::errs() << "New Env:\n";
449       NewBlockState.Env.dump();
450     });
451 
452     if (OldBlockState) {
453       LLVM_DEBUG({
454         llvm::errs() << "Old Env:\n";
455         OldBlockState->Env.dump();
456       });
457       if (isLoopHead(*Block)) {
458         LatticeJoinEffect Effect1 = Analysis.widenTypeErased(
459             NewBlockState.Lattice, OldBlockState->Lattice);
460         LatticeJoinEffect Effect2 =
461             NewBlockState.Env.widen(OldBlockState->Env, Analysis);
462         if (Effect1 == LatticeJoinEffect::Unchanged &&
463             Effect2 == LatticeJoinEffect::Unchanged)
464           // The state of `Block` didn't change from widening so there's no need
465           // to revisit its successors.
466           continue;
467       } else if (Analysis.isEqualTypeErased(OldBlockState->Lattice,
468                                             NewBlockState.Lattice) &&
469                  OldBlockState->Env.equivalentTo(NewBlockState.Env, Analysis)) {
470         // The state of `Block` didn't change after transfer so there's no need
471         // to revisit its successors.
472         continue;
473       }
474     }
475 
476     BlockStates[Block->getBlockID()] = std::move(NewBlockState);
477 
478     // Do not add unreachable successor blocks to `Worklist`.
479     if (Block->hasNoReturnElement())
480       continue;
481 
482     Worklist.enqueueSuccessors(Block);
483   }
484   // FIXME: Consider evaluating unreachable basic blocks (those that have a
485   // state set to `std::nullopt` at this point) to also analyze dead code.
486 
487   if (PostVisitCFG) {
488     for (const CFGBlock *Block : CFCtx.getCFG()) {
489       // Skip blocks that were not evaluated.
490       if (!BlockStates[Block->getBlockID()])
491         continue;
492       transferCFGBlock(*Block, AC, PostVisitCFG);
493     }
494   }
495 
496   return BlockStates;
497 }
498 
499 } // namespace dataflow
500 } // namespace clang
501