1 //===- TypeErasedDataflowAnalysis.cpp -------------------------------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This file defines type-erased base types and functions for building dataflow
10 // analyses that run over Control-Flow Graphs (CFGs).
11 //
12 //===----------------------------------------------------------------------===//
13
14 #include <algorithm>
15 #include <memory>
16 #include <optional>
17 #include <system_error>
18 #include <utility>
19 #include <vector>
20
21 #include "clang/AST/DeclCXX.h"
22 #include "clang/AST/OperationKinds.h"
23 #include "clang/AST/StmtVisitor.h"
24 #include "clang/Analysis/Analyses/PostOrderCFGView.h"
25 #include "clang/Analysis/CFG.h"
26 #include "clang/Analysis/FlowSensitive/DataflowEnvironment.h"
27 #include "clang/Analysis/FlowSensitive/DataflowLattice.h"
28 #include "clang/Analysis/FlowSensitive/DataflowWorklist.h"
29 #include "clang/Analysis/FlowSensitive/Transfer.h"
30 #include "clang/Analysis/FlowSensitive/TypeErasedDataflowAnalysis.h"
31 #include "clang/Analysis/FlowSensitive/Value.h"
32 #include "llvm/ADT/ArrayRef.h"
33 #include "llvm/ADT/DenseSet.h"
34 #include "llvm/ADT/STLExtras.h"
35 #include "llvm/Support/Debug.h"
36 #include "llvm/Support/Error.h"
37
38 #define DEBUG_TYPE "clang-dataflow"
39
40 namespace clang {
41 namespace dataflow {
42
43 class StmtToEnvMapImpl : public StmtToEnvMap {
44 public:
StmtToEnvMapImpl(const ControlFlowContext & CFCtx,llvm::ArrayRef<std::optional<TypeErasedDataflowAnalysisState>> BlockToState)45 StmtToEnvMapImpl(
46 const ControlFlowContext &CFCtx,
47 llvm::ArrayRef<std::optional<TypeErasedDataflowAnalysisState>>
48 BlockToState)
49 : CFCtx(CFCtx), BlockToState(BlockToState) {}
50
getEnvironment(const Stmt & S) const51 const Environment *getEnvironment(const Stmt &S) const override {
52 auto BlockIt = CFCtx.getStmtToBlock().find(&ignoreCFGOmittedNodes(S));
53 assert(BlockIt != CFCtx.getStmtToBlock().end());
54 const auto &State = BlockToState[BlockIt->getSecond()->getBlockID()];
55 assert(State);
56 return &State->Env;
57 }
58
59 private:
60 const ControlFlowContext &CFCtx;
61 llvm::ArrayRef<std::optional<TypeErasedDataflowAnalysisState>> BlockToState;
62 };
63
64 /// Returns the index of `Block` in the successors of `Pred`.
blockIndexInPredecessor(const CFGBlock & Pred,const CFGBlock & Block)65 static int blockIndexInPredecessor(const CFGBlock &Pred,
66 const CFGBlock &Block) {
67 auto BlockPos = llvm::find_if(
68 Pred.succs(), [&Block](const CFGBlock::AdjacentBlock &Succ) {
69 return Succ && Succ->getBlockID() == Block.getBlockID();
70 });
71 return BlockPos - Pred.succ_begin();
72 }
73
isLoopHead(const CFGBlock & B)74 static bool isLoopHead(const CFGBlock &B) {
75 if (const auto *T = B.getTerminatorStmt())
76 switch (T->getStmtClass()) {
77 case Stmt::WhileStmtClass:
78 case Stmt::DoStmtClass:
79 case Stmt::ForStmtClass:
80 return true;
81 default:
82 return false;
83 }
84
85 return false;
86 }
87
88 // The return type of the visit functions in TerminatorVisitor. The first
89 // element represents the terminator expression (that is the conditional
90 // expression in case of a path split in the CFG). The second element
91 // represents whether the condition was true or false.
92 using TerminatorVisitorRetTy = std::pair<const Expr *, bool>;
93
94 /// Extends the flow condition of an environment based on a terminator
95 /// statement.
96 class TerminatorVisitor
97 : public ConstStmtVisitor<TerminatorVisitor, TerminatorVisitorRetTy> {
98 public:
TerminatorVisitor(const StmtToEnvMap & StmtToEnv,Environment & Env,int BlockSuccIdx)99 TerminatorVisitor(const StmtToEnvMap &StmtToEnv, Environment &Env,
100 int BlockSuccIdx)
101 : StmtToEnv(StmtToEnv), Env(Env), BlockSuccIdx(BlockSuccIdx) {}
102
VisitIfStmt(const IfStmt * S)103 TerminatorVisitorRetTy VisitIfStmt(const IfStmt *S) {
104 auto *Cond = S->getCond();
105 assert(Cond != nullptr);
106 return extendFlowCondition(*Cond);
107 }
108
VisitWhileStmt(const WhileStmt * S)109 TerminatorVisitorRetTy VisitWhileStmt(const WhileStmt *S) {
110 auto *Cond = S->getCond();
111 assert(Cond != nullptr);
112 return extendFlowCondition(*Cond);
113 }
114
VisitDoStmt(const DoStmt * S)115 TerminatorVisitorRetTy VisitDoStmt(const DoStmt *S) {
116 auto *Cond = S->getCond();
117 assert(Cond != nullptr);
118 return extendFlowCondition(*Cond);
119 }
120
VisitForStmt(const ForStmt * S)121 TerminatorVisitorRetTy VisitForStmt(const ForStmt *S) {
122 auto *Cond = S->getCond();
123 if (Cond != nullptr)
124 return extendFlowCondition(*Cond);
125 return {nullptr, false};
126 }
127
VisitBinaryOperator(const BinaryOperator * S)128 TerminatorVisitorRetTy VisitBinaryOperator(const BinaryOperator *S) {
129 assert(S->getOpcode() == BO_LAnd || S->getOpcode() == BO_LOr);
130 auto *LHS = S->getLHS();
131 assert(LHS != nullptr);
132 return extendFlowCondition(*LHS);
133 }
134
135 TerminatorVisitorRetTy
VisitConditionalOperator(const ConditionalOperator * S)136 VisitConditionalOperator(const ConditionalOperator *S) {
137 auto *Cond = S->getCond();
138 assert(Cond != nullptr);
139 return extendFlowCondition(*Cond);
140 }
141
142 private:
extendFlowCondition(const Expr & Cond)143 TerminatorVisitorRetTy extendFlowCondition(const Expr &Cond) {
144 // The terminator sub-expression might not be evaluated.
145 if (Env.getStorageLocation(Cond, SkipPast::None) == nullptr)
146 transfer(StmtToEnv, Cond, Env);
147
148 // FIXME: The flow condition must be an r-value, so `SkipPast::None` should
149 // suffice.
150 auto *Val =
151 cast_or_null<BoolValue>(Env.getValue(Cond, SkipPast::Reference));
152 // Value merging depends on flow conditions from different environments
153 // being mutually exclusive -- that is, they cannot both be true in their
154 // entirety (even if they may share some clauses). So, we need *some* value
155 // for the condition expression, even if just an atom.
156 if (Val == nullptr) {
157 // FIXME: Consider introducing a helper for this get-or-create pattern.
158 auto *Loc = Env.getStorageLocation(Cond, SkipPast::None);
159 if (Loc == nullptr) {
160 Loc = &Env.createStorageLocation(Cond);
161 Env.setStorageLocation(Cond, *Loc);
162 }
163 Val = &Env.makeAtomicBoolValue();
164 Env.setValue(*Loc, *Val);
165 }
166
167 bool ConditionValue = true;
168 // The condition must be inverted for the successor that encompasses the
169 // "else" branch, if such exists.
170 if (BlockSuccIdx == 1) {
171 Val = &Env.makeNot(*Val);
172 ConditionValue = false;
173 }
174
175 Env.addToFlowCondition(*Val);
176 return {&Cond, ConditionValue};
177 }
178
179 const StmtToEnvMap &StmtToEnv;
180 Environment &Env;
181 int BlockSuccIdx;
182 };
183
184 /// Holds data structures required for running dataflow analysis.
185 struct AnalysisContext {
AnalysisContextclang::dataflow::AnalysisContext186 AnalysisContext(const ControlFlowContext &CFCtx,
187 TypeErasedDataflowAnalysis &Analysis,
188 const Environment &InitEnv,
189 llvm::ArrayRef<std::optional<TypeErasedDataflowAnalysisState>>
190 BlockStates)
191 : CFCtx(CFCtx), Analysis(Analysis), InitEnv(InitEnv),
192 BlockStates(BlockStates) {}
193
194 /// Contains the CFG being analyzed.
195 const ControlFlowContext &CFCtx;
196 /// The analysis to be run.
197 TypeErasedDataflowAnalysis &Analysis;
198 /// Initial state to start the analysis.
199 const Environment &InitEnv;
200 /// Stores the state of a CFG block if it has been evaluated by the analysis.
201 /// The indices correspond to the block IDs.
202 llvm::ArrayRef<std::optional<TypeErasedDataflowAnalysisState>> BlockStates;
203 };
204
205 /// Computes the input state for a given basic block by joining the output
206 /// states of its predecessors.
207 ///
208 /// Requirements:
209 ///
210 /// All predecessors of `Block` except those with loop back edges must have
211 /// already been transferred. States in `AC.BlockStates` that are set to
212 /// `std::nullopt` represent basic blocks that are not evaluated yet.
213 static TypeErasedDataflowAnalysisState
computeBlockInputState(const CFGBlock & Block,AnalysisContext & AC)214 computeBlockInputState(const CFGBlock &Block, AnalysisContext &AC) {
215 llvm::DenseSet<const CFGBlock *> Preds;
216 Preds.insert(Block.pred_begin(), Block.pred_end());
217 if (Block.getTerminator().isTemporaryDtorsBranch()) {
218 // This handles a special case where the code that produced the CFG includes
219 // a conditional operator with a branch that constructs a temporary and
220 // calls a destructor annotated as noreturn. The CFG models this as follows:
221 //
222 // B1 (contains the condition of the conditional operator) - succs: B2, B3
223 // B2 (contains code that does not call a noreturn destructor) - succs: B4
224 // B3 (contains code that calls a noreturn destructor) - succs: B4
225 // B4 (has temporary destructor terminator) - succs: B5, B6
226 // B5 (noreturn block that is associated with the noreturn destructor call)
227 // B6 (contains code that follows the conditional operator statement)
228 //
229 // The first successor (B5 above) of a basic block with a temporary
230 // destructor terminator (B4 above) is the block that evaluates the
231 // destructor. If that block has a noreturn element then the predecessor
232 // block that constructed the temporary object (B3 above) is effectively a
233 // noreturn block and its state should not be used as input for the state
234 // of the block that has a temporary destructor terminator (B4 above). This
235 // holds regardless of which branch of the ternary operator calls the
236 // noreturn destructor. However, it doesn't cases where a nested ternary
237 // operator includes a branch that contains a noreturn destructor call.
238 //
239 // See `NoreturnDestructorTest` for concrete examples.
240 if (Block.succ_begin()->getReachableBlock()->hasNoReturnElement()) {
241 auto &StmtToBlock = AC.CFCtx.getStmtToBlock();
242 auto StmtBlock = StmtToBlock.find(Block.getTerminatorStmt());
243 assert(StmtBlock != StmtToBlock.end());
244 Preds.erase(StmtBlock->getSecond());
245 }
246 }
247
248 std::optional<TypeErasedDataflowAnalysisState> MaybeState;
249
250 auto &Analysis = AC.Analysis;
251 for (const CFGBlock *Pred : Preds) {
252 // Skip if the `Block` is unreachable or control flow cannot get past it.
253 if (!Pred || Pred->hasNoReturnElement())
254 continue;
255
256 // Skip if `Pred` was not evaluated yet. This could happen if `Pred` has a
257 // loop back edge to `Block`.
258 const std::optional<TypeErasedDataflowAnalysisState> &MaybePredState =
259 AC.BlockStates[Pred->getBlockID()];
260 if (!MaybePredState)
261 continue;
262
263 TypeErasedDataflowAnalysisState PredState = *MaybePredState;
264 if (Analysis.builtinOptions()) {
265 if (const Stmt *PredTerminatorStmt = Pred->getTerminatorStmt()) {
266 const StmtToEnvMapImpl StmtToEnv(AC.CFCtx, AC.BlockStates);
267 auto [Cond, CondValue] =
268 TerminatorVisitor(StmtToEnv, PredState.Env,
269 blockIndexInPredecessor(*Pred, Block))
270 .Visit(PredTerminatorStmt);
271 if (Cond != nullptr)
272 // FIXME: Call transferBranchTypeErased even if BuiltinTransferOpts
273 // are not set.
274 Analysis.transferBranchTypeErased(CondValue, Cond, PredState.Lattice,
275 PredState.Env);
276 }
277 }
278
279 if (MaybeState) {
280 Analysis.joinTypeErased(MaybeState->Lattice, PredState.Lattice);
281 MaybeState->Env.join(PredState.Env, Analysis);
282 } else {
283 MaybeState = std::move(PredState);
284 }
285 }
286 if (!MaybeState) {
287 // FIXME: Consider passing `Block` to `Analysis.typeErasedInitialElement()`
288 // to enable building analyses like computation of dominators that
289 // initialize the state of each basic block differently.
290 MaybeState.emplace(Analysis.typeErasedInitialElement(), AC.InitEnv);
291 }
292 return *MaybeState;
293 }
294
295 /// Built-in transfer function for `CFGStmt`.
builtinTransferStatement(const CFGStmt & Elt,TypeErasedDataflowAnalysisState & InputState,AnalysisContext & AC)296 void builtinTransferStatement(const CFGStmt &Elt,
297 TypeErasedDataflowAnalysisState &InputState,
298 AnalysisContext &AC) {
299 const Stmt *S = Elt.getStmt();
300 assert(S != nullptr);
301 transfer(StmtToEnvMapImpl(AC.CFCtx, AC.BlockStates), *S, InputState.Env);
302 }
303
304 /// Built-in transfer function for `CFGInitializer`.
builtinTransferInitializer(const CFGInitializer & Elt,TypeErasedDataflowAnalysisState & InputState)305 void builtinTransferInitializer(const CFGInitializer &Elt,
306 TypeErasedDataflowAnalysisState &InputState) {
307 const CXXCtorInitializer *Init = Elt.getInitializer();
308 assert(Init != nullptr);
309
310 auto &Env = InputState.Env;
311 const auto &ThisLoc =
312 *cast<AggregateStorageLocation>(Env.getThisPointeeStorageLocation());
313
314 const FieldDecl *Member = Init->getMember();
315 if (Member == nullptr)
316 // Not a field initializer.
317 return;
318
319 auto *InitStmt = Init->getInit();
320 assert(InitStmt != nullptr);
321
322 auto *InitStmtLoc = Env.getStorageLocation(*InitStmt, SkipPast::Reference);
323 if (InitStmtLoc == nullptr)
324 return;
325
326 auto *InitStmtVal = Env.getValue(*InitStmtLoc);
327 if (InitStmtVal == nullptr)
328 return;
329
330 if (Member->getType()->isReferenceType()) {
331 auto &MemberLoc = ThisLoc.getChild(*Member);
332 Env.setValue(MemberLoc, Env.takeOwnership(std::make_unique<ReferenceValue>(
333 *InitStmtLoc)));
334 } else {
335 auto &MemberLoc = ThisLoc.getChild(*Member);
336 Env.setValue(MemberLoc, *InitStmtVal);
337 }
338 }
339
builtinTransfer(const CFGElement & Elt,TypeErasedDataflowAnalysisState & State,AnalysisContext & AC)340 void builtinTransfer(const CFGElement &Elt,
341 TypeErasedDataflowAnalysisState &State,
342 AnalysisContext &AC) {
343 switch (Elt.getKind()) {
344 case CFGElement::Statement:
345 builtinTransferStatement(Elt.castAs<CFGStmt>(), State, AC);
346 break;
347 case CFGElement::Initializer:
348 builtinTransferInitializer(Elt.castAs<CFGInitializer>(), State);
349 break;
350 default:
351 // FIXME: Evaluate other kinds of `CFGElement`.
352 break;
353 }
354 }
355
356 /// Transfers `State` by evaluating each element in the `Block` based on the
357 /// `AC.Analysis` specified.
358 ///
359 /// Built-in transfer functions (if the option for `ApplyBuiltinTransfer` is set
360 /// by the analysis) will be applied to the element before evaluation by the
361 /// user-specified analysis.
362 /// `PostVisitCFG` (if provided) will be applied to the element after evaluation
363 /// by the user-specified analysis.
364 TypeErasedDataflowAnalysisState
transferCFGBlock(const CFGBlock & Block,AnalysisContext & AC,std::function<void (const CFGElement &,const TypeErasedDataflowAnalysisState &)> PostVisitCFG=nullptr)365 transferCFGBlock(const CFGBlock &Block, AnalysisContext &AC,
366 std::function<void(const CFGElement &,
367 const TypeErasedDataflowAnalysisState &)>
368 PostVisitCFG = nullptr) {
369 auto State = computeBlockInputState(Block, AC);
370 for (const auto &Element : Block) {
371 // Built-in analysis
372 if (AC.Analysis.builtinOptions()) {
373 builtinTransfer(Element, State, AC);
374 }
375
376 // User-provided analysis
377 AC.Analysis.transferTypeErased(&Element, State.Lattice, State.Env);
378
379 // Post processing
380 if (PostVisitCFG) {
381 PostVisitCFG(Element, State);
382 }
383 }
384 return State;
385 }
386
transferBlock(const ControlFlowContext & CFCtx,llvm::ArrayRef<std::optional<TypeErasedDataflowAnalysisState>> BlockStates,const CFGBlock & Block,const Environment & InitEnv,TypeErasedDataflowAnalysis & Analysis,std::function<void (const CFGElement &,const TypeErasedDataflowAnalysisState &)> PostVisitCFG)387 TypeErasedDataflowAnalysisState transferBlock(
388 const ControlFlowContext &CFCtx,
389 llvm::ArrayRef<std::optional<TypeErasedDataflowAnalysisState>> BlockStates,
390 const CFGBlock &Block, const Environment &InitEnv,
391 TypeErasedDataflowAnalysis &Analysis,
392 std::function<void(const CFGElement &,
393 const TypeErasedDataflowAnalysisState &)>
394 PostVisitCFG) {
395 AnalysisContext AC(CFCtx, Analysis, InitEnv, BlockStates);
396 return transferCFGBlock(Block, AC, PostVisitCFG);
397 }
398
399 llvm::Expected<std::vector<std::optional<TypeErasedDataflowAnalysisState>>>
runTypeErasedDataflowAnalysis(const ControlFlowContext & CFCtx,TypeErasedDataflowAnalysis & Analysis,const Environment & InitEnv,std::function<void (const CFGElement &,const TypeErasedDataflowAnalysisState &)> PostVisitCFG)400 runTypeErasedDataflowAnalysis(
401 const ControlFlowContext &CFCtx, TypeErasedDataflowAnalysis &Analysis,
402 const Environment &InitEnv,
403 std::function<void(const CFGElement &,
404 const TypeErasedDataflowAnalysisState &)>
405 PostVisitCFG) {
406 PostOrderCFGView POV(&CFCtx.getCFG());
407 ForwardDataflowWorklist Worklist(CFCtx.getCFG(), &POV);
408
409 std::vector<std::optional<TypeErasedDataflowAnalysisState>> BlockStates(
410 CFCtx.getCFG().size(), std::nullopt);
411
412 // The entry basic block doesn't contain statements so it can be skipped.
413 const CFGBlock &Entry = CFCtx.getCFG().getEntry();
414 BlockStates[Entry.getBlockID()] = {Analysis.typeErasedInitialElement(),
415 InitEnv};
416 Worklist.enqueueSuccessors(&Entry);
417
418 AnalysisContext AC(CFCtx, Analysis, InitEnv, BlockStates);
419
420 // Bugs in lattices and transfer functions can prevent the analysis from
421 // converging. To limit the damage (infinite loops) that these bugs can cause,
422 // limit the number of iterations.
423 // FIXME: Consider making the maximum number of iterations configurable.
424 // FIXME: Consider restricting the number of backedges followed, rather than
425 // iterations.
426 // FIXME: Set up statistics (see llvm/ADT/Statistic.h) to count average number
427 // of iterations, number of functions that time out, etc.
428 static constexpr uint32_t MaxAverageVisitsPerBlock = 4;
429 static constexpr uint32_t AbsoluteMaxIterations = 1 << 16;
430 const uint32_t RelativeMaxIterations =
431 MaxAverageVisitsPerBlock * BlockStates.size();
432 const uint32_t MaxIterations =
433 std::min(RelativeMaxIterations, AbsoluteMaxIterations);
434 uint32_t Iterations = 0;
435 while (const CFGBlock *Block = Worklist.dequeue()) {
436 LLVM_DEBUG(llvm::dbgs()
437 << "Processing Block " << Block->getBlockID() << "\n");
438 if (++Iterations > MaxIterations) {
439 return llvm::createStringError(std::errc::timed_out,
440 "maximum number of iterations reached");
441 }
442
443 const std::optional<TypeErasedDataflowAnalysisState> &OldBlockState =
444 BlockStates[Block->getBlockID()];
445 TypeErasedDataflowAnalysisState NewBlockState =
446 transferCFGBlock(*Block, AC);
447 LLVM_DEBUG({
448 llvm::errs() << "New Env:\n";
449 NewBlockState.Env.dump();
450 });
451
452 if (OldBlockState) {
453 LLVM_DEBUG({
454 llvm::errs() << "Old Env:\n";
455 OldBlockState->Env.dump();
456 });
457 if (isLoopHead(*Block)) {
458 LatticeJoinEffect Effect1 = Analysis.widenTypeErased(
459 NewBlockState.Lattice, OldBlockState->Lattice);
460 LatticeJoinEffect Effect2 =
461 NewBlockState.Env.widen(OldBlockState->Env, Analysis);
462 if (Effect1 == LatticeJoinEffect::Unchanged &&
463 Effect2 == LatticeJoinEffect::Unchanged)
464 // The state of `Block` didn't change from widening so there's no need
465 // to revisit its successors.
466 continue;
467 } else if (Analysis.isEqualTypeErased(OldBlockState->Lattice,
468 NewBlockState.Lattice) &&
469 OldBlockState->Env.equivalentTo(NewBlockState.Env, Analysis)) {
470 // The state of `Block` didn't change after transfer so there's no need
471 // to revisit its successors.
472 continue;
473 }
474 }
475
476 BlockStates[Block->getBlockID()] = std::move(NewBlockState);
477
478 // Do not add unreachable successor blocks to `Worklist`.
479 if (Block->hasNoReturnElement())
480 continue;
481
482 Worklist.enqueueSuccessors(Block);
483 }
484 // FIXME: Consider evaluating unreachable basic blocks (those that have a
485 // state set to `std::nullopt` at this point) to also analyze dead code.
486
487 if (PostVisitCFG) {
488 for (const CFGBlock *Block : CFCtx.getCFG()) {
489 // Skip blocks that were not evaluated.
490 if (!BlockStates[Block->getBlockID()])
491 continue;
492 transferCFGBlock(*Block, AC, PostVisitCFG);
493 }
494 }
495
496 return BlockStates;
497 }
498
499 } // namespace dataflow
500 } // namespace clang
501