//===- TypeErasedDataflowAnalysis.cpp -------------------------------------===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception // //===----------------------------------------------------------------------===// // // This file defines type-erased base types and functions for building dataflow // analyses that run over Control-Flow Graphs (CFGs). // //===----------------------------------------------------------------------===// #include #include #include #include #include "clang/AST/ASTDumper.h" #include "clang/AST/DeclCXX.h" #include "clang/AST/OperationKinds.h" #include "clang/AST/StmtCXX.h" #include "clang/AST/StmtVisitor.h" #include "clang/Analysis/Analyses/PostOrderCFGView.h" #include "clang/Analysis/CFG.h" #include "clang/Analysis/FlowSensitive/DataflowEnvironment.h" #include "clang/Analysis/FlowSensitive/DataflowLattice.h" #include "clang/Analysis/FlowSensitive/DataflowWorklist.h" #include "clang/Analysis/FlowSensitive/RecordOps.h" #include "clang/Analysis/FlowSensitive/Transfer.h" #include "clang/Analysis/FlowSensitive/TypeErasedDataflowAnalysis.h" #include "clang/Analysis/FlowSensitive/Value.h" #include "clang/Support/Compiler.h" #include "llvm/ADT/ArrayRef.h" #include "llvm/ADT/STLExtras.h" #include "llvm/Support/Debug.h" #include "llvm/Support/Error.h" #define DEBUG_TYPE "clang-dataflow" namespace clang { namespace dataflow { class NoopLattice; } } // namespace clang namespace llvm { // This needs to be exported for ClangAnalysisFlowSensitiveTests so any_cast // uses the correct address of Any::TypeId from the clang shared library instead // of creating one in the test executable. when building with // CLANG_LINK_CLANG_DYLIB template struct CLANG_EXPORT_TEMPLATE Any::TypeId; } // namespace llvm namespace clang { namespace dataflow { /// Returns the index of `Block` in the successors of `Pred`. static int blockIndexInPredecessor(const CFGBlock &Pred, const CFGBlock &Block) { auto BlockPos = llvm::find_if( Pred.succs(), [&Block](const CFGBlock::AdjacentBlock &Succ) { return Succ && Succ->getBlockID() == Block.getBlockID(); }); return BlockPos - Pred.succ_begin(); } // A "backedge" node is a block introduced in the CFG exclusively to indicate a // loop backedge. They are exactly identified by the presence of a non-null // pointer to the entry block of the loop condition. Note that this is not // necessarily the block with the loop statement as terminator, because // short-circuit operators will result in multiple blocks encoding the loop // condition, only one of which will contain the loop statement as terminator. static bool isBackedgeNode(const CFGBlock &B) { return B.getLoopTarget() != nullptr; } namespace { /// Extracts the terminator's condition expression. class TerminatorVisitor : public ConstStmtVisitor { public: TerminatorVisitor() = default; const Expr *VisitIfStmt(const IfStmt *S) { return S->getCond(); } const Expr *VisitWhileStmt(const WhileStmt *S) { return S->getCond(); } const Expr *VisitDoStmt(const DoStmt *S) { return S->getCond(); } const Expr *VisitForStmt(const ForStmt *S) { return S->getCond(); } const Expr *VisitCXXForRangeStmt(const CXXForRangeStmt *) { // Don't do anything special for CXXForRangeStmt, because the condition // (being implicitly generated) isn't visible from the loop body. return nullptr; } const Expr *VisitBinaryOperator(const BinaryOperator *S) { assert(S->getOpcode() == BO_LAnd || S->getOpcode() == BO_LOr); return S->getLHS(); } const Expr *VisitConditionalOperator(const ConditionalOperator *S) { return S->getCond(); } }; /// Holds data structures required for running dataflow analysis. struct AnalysisContext { AnalysisContext(const AdornedCFG &ACFG, TypeErasedDataflowAnalysis &Analysis, const Environment &InitEnv, llvm::ArrayRef> BlockStates) : ACFG(ACFG), Analysis(Analysis), InitEnv(InitEnv), Log(*InitEnv.getDataflowAnalysisContext().getOptions().Log), BlockStates(BlockStates) { Log.beginAnalysis(ACFG, Analysis); } ~AnalysisContext() { Log.endAnalysis(); } /// Contains the CFG being analyzed. const AdornedCFG &ACFG; /// The analysis to be run. TypeErasedDataflowAnalysis &Analysis; /// Initial state to start the analysis. const Environment &InitEnv; Logger &Log; /// Stores the state of a CFG block if it has been evaluated by the analysis. /// The indices correspond to the block IDs. llvm::ArrayRef> BlockStates; }; class PrettyStackTraceAnalysis : public llvm::PrettyStackTraceEntry { public: PrettyStackTraceAnalysis(const AdornedCFG &ACFG, const char *Message) : ACFG(ACFG), Message(Message) {} void print(raw_ostream &OS) const override { OS << Message << "\n"; OS << "Decl:\n"; ACFG.getDecl().dump(OS); OS << "CFG:\n"; ACFG.getCFG().print(OS, LangOptions(), false); } private: const AdornedCFG &ACFG; const char *Message; }; class PrettyStackTraceCFGElement : public llvm::PrettyStackTraceEntry { public: PrettyStackTraceCFGElement(const CFGElement &Element, int BlockIdx, int ElementIdx, const char *Message) : Element(Element), BlockIdx(BlockIdx), ElementIdx(ElementIdx), Message(Message) {} void print(raw_ostream &OS) const override { OS << Message << ": Element [B" << BlockIdx << "." << ElementIdx << "]\n"; if (auto Stmt = Element.getAs()) { OS << "Stmt:\n"; ASTDumper Dumper(OS, false); Dumper.Visit(Stmt->getStmt()); } } private: const CFGElement ∈ int BlockIdx; int ElementIdx; const char *Message; }; // Builds a joined TypeErasedDataflowAnalysisState from 0 or more sources, // each of which may be owned (built as part of the join) or external (a // reference to an Environment that will outlive the builder). // Avoids unneccesary copies of the environment. class JoinedStateBuilder { AnalysisContext &AC; Environment::ExprJoinBehavior JoinBehavior; std::vector All; std::deque Owned; TypeErasedDataflowAnalysisState join(const TypeErasedDataflowAnalysisState &L, const TypeErasedDataflowAnalysisState &R) { return {AC.Analysis.joinTypeErased(L.Lattice, R.Lattice), Environment::join(L.Env, R.Env, AC.Analysis, JoinBehavior)}; } public: JoinedStateBuilder(AnalysisContext &AC, Environment::ExprJoinBehavior JoinBehavior) : AC(AC), JoinBehavior(JoinBehavior) {} void addOwned(TypeErasedDataflowAnalysisState State) { Owned.push_back(std::move(State)); All.push_back(&Owned.back()); } void addUnowned(const TypeErasedDataflowAnalysisState &State) { All.push_back(&State); } TypeErasedDataflowAnalysisState take() && { if (All.empty()) // FIXME: Consider passing `Block` to Analysis.typeErasedInitialElement // to enable building analyses like computation of dominators that // initialize the state of each basic block differently. return {AC.Analysis.typeErasedInitialElement(), AC.InitEnv.fork()}; if (All.size() == 1) // Join the environment with itself so that we discard expression state if // desired. // FIXME: We could consider writing special-case code for this that only // does the discarding, but it's not clear if this is worth it. return {All[0]->Lattice, Environment::join(All[0]->Env, All[0]->Env, AC.Analysis, JoinBehavior)}; auto Result = join(*All[0], *All[1]); for (unsigned I = 2; I < All.size(); ++I) Result = join(Result, *All[I]); return Result; } }; } // namespace static const Expr *getTerminatorCondition(const Stmt *TerminatorStmt) { return TerminatorStmt == nullptr ? nullptr : TerminatorVisitor().Visit(TerminatorStmt); } /// Computes the input state for a given basic block by joining the output /// states of its predecessors. /// /// Requirements: /// /// All predecessors of `Block` except those with loop back edges must have /// already been transferred. States in `AC.BlockStates` that are set to /// `std::nullopt` represent basic blocks that are not evaluated yet. static TypeErasedDataflowAnalysisState computeBlockInputState(const CFGBlock &Block, AnalysisContext &AC) { std::vector Preds(Block.pred_begin(), Block.pred_end()); if (Block.getTerminator().isTemporaryDtorsBranch()) { // This handles a special case where the code that produced the CFG includes // a conditional operator with a branch that constructs a temporary and // calls a destructor annotated as noreturn. The CFG models this as follows: // // B1 (contains the condition of the conditional operator) - succs: B2, B3 // B2 (contains code that does not call a noreturn destructor) - succs: B4 // B3 (contains code that calls a noreturn destructor) - succs: B4 // B4 (has temporary destructor terminator) - succs: B5, B6 // B5 (noreturn block that is associated with the noreturn destructor call) // B6 (contains code that follows the conditional operator statement) // // The first successor (B5 above) of a basic block with a temporary // destructor terminator (B4 above) is the block that evaluates the // destructor. If that block has a noreturn element then the predecessor // block that constructed the temporary object (B3 above) is effectively a // noreturn block and its state should not be used as input for the state // of the block that has a temporary destructor terminator (B4 above). This // holds regardless of which branch of the ternary operator calls the // noreturn destructor. However, it doesn't cases where a nested ternary // operator includes a branch that contains a noreturn destructor call. // // See `NoreturnDestructorTest` for concrete examples. if (Block.succ_begin()->getReachableBlock() != nullptr && Block.succ_begin()->getReachableBlock()->hasNoReturnElement()) { const CFGBlock *StmtBlock = nullptr; if (const Stmt *Terminator = Block.getTerminatorStmt()) StmtBlock = AC.ACFG.blockForStmt(*Terminator); assert(StmtBlock != nullptr); llvm::erase(Preds, StmtBlock); } } // If any of the predecessor blocks contains an expression consumed in a // different block, we need to keep expression state. // Note that in this case, we keep expression state for all predecessors, // rather than only those predecessors that actually contain an expression // consumed in a different block. While this is potentially suboptimal, it's // actually likely, if we have control flow within a full expression, that // all predecessors have expression state consumed in a different block. Environment::ExprJoinBehavior JoinBehavior = Environment::DiscardExprState; for (const CFGBlock *Pred : Preds) { if (Pred && AC.ACFG.containsExprConsumedInDifferentBlock(*Pred)) { JoinBehavior = Environment::KeepExprState; break; } } JoinedStateBuilder Builder(AC, JoinBehavior); for (const CFGBlock *Pred : Preds) { // Skip if the `Block` is unreachable or control flow cannot get past it. if (!Pred || Pred->hasNoReturnElement()) continue; // Skip if `Pred` was not evaluated yet. This could happen if `Pred` has a // loop back edge to `Block`. const std::optional &MaybePredState = AC.BlockStates[Pred->getBlockID()]; if (!MaybePredState) continue; const TypeErasedDataflowAnalysisState &PredState = *MaybePredState; const Expr *Cond = getTerminatorCondition(Pred->getTerminatorStmt()); if (Cond == nullptr) { Builder.addUnowned(PredState); continue; } bool BranchVal = blockIndexInPredecessor(*Pred, Block) == 0; // `transferBranch` may need to mutate the environment to describe the // dynamic effect of the terminator for a given branch. Copy now. TypeErasedDataflowAnalysisState Copy = MaybePredState->fork(); if (AC.Analysis.builtinOptions()) { auto *CondVal = Copy.Env.get(*Cond); // In transferCFGBlock(), we ensure that we always have a `Value` // for the terminator condition, so assert this. We consciously // assert ourselves instead of asserting via `cast()` so that we get // a more meaningful line number if the assertion fails. assert(CondVal != nullptr); BoolValue *AssertedVal = BranchVal ? CondVal : &Copy.Env.makeNot(*CondVal); Copy.Env.assume(AssertedVal->formula()); } AC.Analysis.transferBranchTypeErased(BranchVal, Cond, Copy.Lattice, Copy.Env); Builder.addOwned(std::move(Copy)); } return std::move(Builder).take(); } /// Built-in transfer function for `CFGStmt`. static void builtinTransferStatement(unsigned CurBlockID, const CFGStmt &Elt, TypeErasedDataflowAnalysisState &InputState, AnalysisContext &AC) { const Stmt *S = Elt.getStmt(); assert(S != nullptr); transfer(StmtToEnvMap(AC.ACFG, AC.BlockStates, CurBlockID, InputState), *S, InputState.Env, AC.Analysis); } /// Built-in transfer function for `CFGInitializer`. static void builtinTransferInitializer(const CFGInitializer &Elt, TypeErasedDataflowAnalysisState &InputState) { const CXXCtorInitializer *Init = Elt.getInitializer(); assert(Init != nullptr); auto &Env = InputState.Env; auto &ThisLoc = *Env.getThisPointeeStorageLocation(); if (!Init->isAnyMemberInitializer()) // FIXME: Handle base initialization return; auto *InitExpr = Init->getInit(); assert(InitExpr != nullptr); const FieldDecl *Member = nullptr; RecordStorageLocation *ParentLoc = &ThisLoc; StorageLocation *MemberLoc = nullptr; if (Init->isMemberInitializer()) { Member = Init->getMember(); MemberLoc = ThisLoc.getChild(*Member); } else { IndirectFieldDecl *IndirectField = Init->getIndirectMember(); assert(IndirectField != nullptr); MemberLoc = &ThisLoc; for (const auto *I : IndirectField->chain()) { Member = cast(I); ParentLoc = cast(MemberLoc); MemberLoc = ParentLoc->getChild(*Member); } } assert(Member != nullptr); // FIXME: Instead of these case distinctions, we would ideally want to be able // to simply use `Environment::createObject()` here, the same way that we do // this in `TransferVisitor::VisitInitListExpr()`. However, this would require // us to be able to build a list of fields that we then use to initialize an // `RecordStorageLocation` -- and the problem is that, when we get here, // the `RecordStorageLocation` already exists. We should explore if there's // anything that we can do to change this. if (Member->getType()->isReferenceType()) { auto *InitExprLoc = Env.getStorageLocation(*InitExpr); if (InitExprLoc == nullptr) return; ParentLoc->setChild(*Member, InitExprLoc); // Record-type initializers construct themselves directly into the result // object, so there is no need to handle them here. } else if (!Member->getType()->isRecordType()) { assert(MemberLoc != nullptr); if (auto *InitExprVal = Env.getValue(*InitExpr)) Env.setValue(*MemberLoc, *InitExprVal); } } static void builtinTransfer(unsigned CurBlockID, const CFGElement &Elt, TypeErasedDataflowAnalysisState &State, AnalysisContext &AC) { switch (Elt.getKind()) { case CFGElement::Statement: builtinTransferStatement(CurBlockID, Elt.castAs(), State, AC); break; case CFGElement::Initializer: builtinTransferInitializer(Elt.castAs(), State); break; case CFGElement::LifetimeEnds: // Removing declarations when their lifetime ends serves two purposes: // - Eliminate unnecessary clutter from `Environment::DeclToLoc` // - Allow us to assert that, when joining two `Environment`s, the two // `DeclToLoc` maps never contain entries that map the same declaration to // different storage locations. if (const ValueDecl *VD = Elt.castAs().getVarDecl()) State.Env.removeDecl(*VD); break; default: // FIXME: Evaluate other kinds of `CFGElement` break; } } /// Transfers `State` by evaluating each element in the `Block` based on the /// `AC.Analysis` specified. /// /// Built-in transfer functions (if the option for `ApplyBuiltinTransfer` is set /// by the analysis) will be applied to the element before evaluation by the /// user-specified analysis. /// `PostVisitCFG` (if provided) will be applied to the element after evaluation /// by the user-specified analysis. static TypeErasedDataflowAnalysisState transferCFGBlock(const CFGBlock &Block, AnalysisContext &AC, const CFGEltCallbacksTypeErased &PostAnalysisCallbacks = {}) { AC.Log.enterBlock(Block, PostAnalysisCallbacks.Before != nullptr || PostAnalysisCallbacks.After != nullptr); auto State = computeBlockInputState(Block, AC); AC.Log.recordState(State); int ElementIdx = 1; for (const auto &Element : Block) { PrettyStackTraceCFGElement CrashInfo(Element, Block.getBlockID(), ElementIdx++, "transferCFGBlock"); AC.Log.enterElement(Element); if (PostAnalysisCallbacks.Before) { PostAnalysisCallbacks.Before(Element, State); } // Built-in analysis if (AC.Analysis.builtinOptions()) { builtinTransfer(Block.getBlockID(), Element, State, AC); } // User-provided analysis AC.Analysis.transferTypeErased(Element, State.Lattice, State.Env); if (PostAnalysisCallbacks.After) { PostAnalysisCallbacks.After(Element, State); } AC.Log.recordState(State); } // If we have a terminator, evaluate its condition. // This `Expr` may not appear as a `CFGElement` anywhere else, and it's // important that we evaluate it here (rather than while processing the // terminator) so that we put the corresponding value in the right // environment. if (const Expr *TerminatorCond = dyn_cast_or_null(Block.getTerminatorCondition())) { if (State.Env.getValue(*TerminatorCond) == nullptr) // FIXME: This only runs the builtin transfer, not the analysis-specific // transfer. Fixing this isn't trivial, as the analysis-specific transfer // takes a `CFGElement` as input, but some expressions only show up as a // terminator condition, but not as a `CFGElement`. The condition of an if // statement is one such example. transfer(StmtToEnvMap(AC.ACFG, AC.BlockStates, Block.getBlockID(), State), *TerminatorCond, State.Env, AC.Analysis); // If the transfer function didn't produce a value, create an atom so that // we have *some* value for the condition expression. This ensures that // when we extend the flow condition, it actually changes. if (State.Env.getValue(*TerminatorCond) == nullptr) State.Env.setValue(*TerminatorCond, State.Env.makeAtomicBoolValue()); AC.Log.recordState(State); } return State; } llvm::Expected>> runTypeErasedDataflowAnalysis( const AdornedCFG &ACFG, TypeErasedDataflowAnalysis &Analysis, const Environment &InitEnv, const CFGEltCallbacksTypeErased &PostAnalysisCallbacks, std::int32_t MaxBlockVisits) { PrettyStackTraceAnalysis CrashInfo(ACFG, "runTypeErasedDataflowAnalysis"); std::optional MaybeStartingEnv; if (InitEnv.callStackSize() == 0) { MaybeStartingEnv = InitEnv.fork(); MaybeStartingEnv->initialize(); } const Environment &StartingEnv = MaybeStartingEnv ? *MaybeStartingEnv : InitEnv; const clang::CFG &CFG = ACFG.getCFG(); PostOrderCFGView POV(&CFG); ForwardDataflowWorklist Worklist(CFG, &POV); std::vector> BlockStates( CFG.size()); // The entry basic block doesn't contain statements so it can be skipped. const CFGBlock &Entry = CFG.getEntry(); BlockStates[Entry.getBlockID()] = {Analysis.typeErasedInitialElement(), StartingEnv.fork()}; Worklist.enqueueSuccessors(&Entry); AnalysisContext AC(ACFG, Analysis, StartingEnv, BlockStates); std::int32_t BlockVisits = 0; while (const CFGBlock *Block = Worklist.dequeue()) { LLVM_DEBUG(llvm::dbgs() << "Processing Block " << Block->getBlockID() << "\n"); if (++BlockVisits > MaxBlockVisits) { return llvm::createStringError(std::errc::timed_out, "maximum number of blocks processed"); } const std::optional &OldBlockState = BlockStates[Block->getBlockID()]; TypeErasedDataflowAnalysisState NewBlockState = transferCFGBlock(*Block, AC); LLVM_DEBUG({ llvm::errs() << "New Env:\n"; NewBlockState.Env.dump(); }); if (OldBlockState) { LLVM_DEBUG({ llvm::errs() << "Old Env:\n"; OldBlockState->Env.dump(); }); if (isBackedgeNode(*Block)) { LatticeJoinEffect Effect1 = Analysis.widenTypeErased( NewBlockState.Lattice, OldBlockState->Lattice); LatticeJoinEffect Effect2 = NewBlockState.Env.widen(OldBlockState->Env, Analysis); if (Effect1 == LatticeJoinEffect::Unchanged && Effect2 == LatticeJoinEffect::Unchanged) { // The state of `Block` didn't change from widening so there's no need // to revisit its successors. AC.Log.blockConverged(); continue; } } else if (Analysis.isEqualTypeErased(OldBlockState->Lattice, NewBlockState.Lattice) && OldBlockState->Env.equivalentTo(NewBlockState.Env, Analysis)) { // The state of `Block` didn't change after transfer so there's no need // to revisit its successors. AC.Log.blockConverged(); continue; } } BlockStates[Block->getBlockID()] = std::move(NewBlockState); // Do not add unreachable successor blocks to `Worklist`. if (Block->hasNoReturnElement()) continue; Worklist.enqueueSuccessors(Block); } // FIXME: Consider evaluating unreachable basic blocks (those that have a // state set to `std::nullopt` at this point) to also analyze dead code. if (PostAnalysisCallbacks.Before || PostAnalysisCallbacks.After) { for (const CFGBlock *Block : ACFG.getCFG()) { // Skip blocks that were not evaluated. if (!BlockStates[Block->getBlockID()]) continue; transferCFGBlock(*Block, AC, PostAnalysisCallbacks); } } return std::move(BlockStates); } } // namespace dataflow } // namespace clang