xref: /freebsd-src/contrib/llvm-project/llvm/lib/IR/SafepointIRVerifier.cpp (revision 06c3fb2749bda94cb5201f81ffdb8fa6c3161b2e)
10b57cec5SDimitry Andric //===-- SafepointIRVerifier.cpp - Verify gc.statepoint invariants ---------===//
20b57cec5SDimitry Andric //
30b57cec5SDimitry Andric // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
40b57cec5SDimitry Andric // See https://llvm.org/LICENSE.txt for license information.
50b57cec5SDimitry Andric // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
60b57cec5SDimitry Andric //
70b57cec5SDimitry Andric //===----------------------------------------------------------------------===//
80b57cec5SDimitry Andric //
94824e7fdSDimitry Andric // Run a basic correctness check on the IR to ensure that Safepoints - if
104824e7fdSDimitry Andric // they've been inserted - were inserted correctly.  In particular, look for use
114824e7fdSDimitry Andric // of non-relocated values after a safepoint.  It's primary use is to check the
120b57cec5SDimitry Andric // correctness of safepoint insertion immediately after insertion, but it can
130b57cec5SDimitry Andric // also be used to verify that later transforms have not found a way to break
140b57cec5SDimitry Andric // safepoint semenatics.
150b57cec5SDimitry Andric //
160b57cec5SDimitry Andric // In its current form, this verify checks a property which is sufficient, but
170b57cec5SDimitry Andric // not neccessary for correctness.  There are some cases where an unrelocated
180b57cec5SDimitry Andric // pointer can be used after the safepoint.  Consider this example:
190b57cec5SDimitry Andric //
200b57cec5SDimitry Andric //    a = ...
210b57cec5SDimitry Andric //    b = ...
220b57cec5SDimitry Andric //    (a',b') = safepoint(a,b)
230b57cec5SDimitry Andric //    c = cmp eq a b
240b57cec5SDimitry Andric //    br c, ..., ....
250b57cec5SDimitry Andric //
260b57cec5SDimitry Andric // Because it is valid to reorder 'c' above the safepoint, this is legal.  In
270b57cec5SDimitry Andric // practice, this is a somewhat uncommon transform, but CodeGenPrep does create
280b57cec5SDimitry Andric // idioms like this.  The verifier knows about these cases and avoids reporting
290b57cec5SDimitry Andric // false positives.
300b57cec5SDimitry Andric //
310b57cec5SDimitry Andric //===----------------------------------------------------------------------===//
320b57cec5SDimitry Andric 
33480093f4SDimitry Andric #include "llvm/IR/SafepointIRVerifier.h"
340b57cec5SDimitry Andric #include "llvm/ADT/DenseSet.h"
350b57cec5SDimitry Andric #include "llvm/ADT/PostOrderIterator.h"
360b57cec5SDimitry Andric #include "llvm/ADT/SetOperations.h"
370b57cec5SDimitry Andric #include "llvm/ADT/SetVector.h"
380b57cec5SDimitry Andric #include "llvm/IR/BasicBlock.h"
390b57cec5SDimitry Andric #include "llvm/IR/Dominators.h"
400b57cec5SDimitry Andric #include "llvm/IR/Function.h"
411fd87a68SDimitry Andric #include "llvm/IR/InstrTypes.h"
420b57cec5SDimitry Andric #include "llvm/IR/Instructions.h"
430b57cec5SDimitry Andric #include "llvm/IR/Statepoint.h"
44480093f4SDimitry Andric #include "llvm/IR/Value.h"
45480093f4SDimitry Andric #include "llvm/InitializePasses.h"
465ffd83dbSDimitry Andric #include "llvm/Support/Allocator.h"
470b57cec5SDimitry Andric #include "llvm/Support/CommandLine.h"
48480093f4SDimitry Andric #include "llvm/Support/Debug.h"
490b57cec5SDimitry Andric #include "llvm/Support/raw_ostream.h"
500b57cec5SDimitry Andric 
510b57cec5SDimitry Andric #define DEBUG_TYPE "safepoint-ir-verifier"
520b57cec5SDimitry Andric 
530b57cec5SDimitry Andric using namespace llvm;
540b57cec5SDimitry Andric 
550b57cec5SDimitry Andric /// This option is used for writing test cases.  Instead of crashing the program
560b57cec5SDimitry Andric /// when verification fails, report a message to the console (for FileCheck
570b57cec5SDimitry Andric /// usage) and continue execution as if nothing happened.
580b57cec5SDimitry Andric static cl::opt<bool> PrintOnly("safepoint-ir-verifier-print-only",
590b57cec5SDimitry Andric                                cl::init(false));
600b57cec5SDimitry Andric 
610b57cec5SDimitry Andric namespace {
620b57cec5SDimitry Andric 
630b57cec5SDimitry Andric /// This CFG Deadness finds dead blocks and edges. Algorithm starts with a set
640b57cec5SDimitry Andric /// of blocks unreachable from entry then propagates deadness using foldable
650b57cec5SDimitry Andric /// conditional branches without modifying CFG. So GVN does but it changes CFG
660b57cec5SDimitry Andric /// by splitting critical edges. In most cases passes rely on SimplifyCFG to
670b57cec5SDimitry Andric /// clean up dead blocks, but in some cases, like verification or loop passes
680b57cec5SDimitry Andric /// it's not possible.
690b57cec5SDimitry Andric class CFGDeadness {
700b57cec5SDimitry Andric   const DominatorTree *DT = nullptr;
710b57cec5SDimitry Andric   SetVector<const BasicBlock *> DeadBlocks;
720b57cec5SDimitry Andric   SetVector<const Use *> DeadEdges; // Contains all dead edges from live blocks.
730b57cec5SDimitry Andric 
740b57cec5SDimitry Andric public:
750b57cec5SDimitry Andric   /// Return the edge that coresponds to the predecessor.
getEdge(const_pred_iterator & PredIt)760b57cec5SDimitry Andric   static const Use& getEdge(const_pred_iterator &PredIt) {
770b57cec5SDimitry Andric     auto &PU = PredIt.getUse();
780b57cec5SDimitry Andric     return PU.getUser()->getOperandUse(PU.getOperandNo());
790b57cec5SDimitry Andric   }
800b57cec5SDimitry Andric 
810b57cec5SDimitry Andric   /// Return true if there is at least one live edge that corresponds to the
820b57cec5SDimitry Andric   /// basic block InBB listed in the phi node.
hasLiveIncomingEdge(const PHINode * PN,const BasicBlock * InBB) const830b57cec5SDimitry Andric   bool hasLiveIncomingEdge(const PHINode *PN, const BasicBlock *InBB) const {
840b57cec5SDimitry Andric     assert(!isDeadBlock(InBB) && "block must be live");
850b57cec5SDimitry Andric     const BasicBlock* BB = PN->getParent();
860b57cec5SDimitry Andric     bool Listed = false;
870b57cec5SDimitry Andric     for (const_pred_iterator PredIt(BB), End(BB, true); PredIt != End; ++PredIt) {
880b57cec5SDimitry Andric       if (InBB == *PredIt) {
890b57cec5SDimitry Andric         if (!isDeadEdge(&getEdge(PredIt)))
900b57cec5SDimitry Andric           return true;
910b57cec5SDimitry Andric         Listed = true;
920b57cec5SDimitry Andric       }
930b57cec5SDimitry Andric     }
940b57cec5SDimitry Andric     (void)Listed;
950b57cec5SDimitry Andric     assert(Listed && "basic block is not found among incoming blocks");
960b57cec5SDimitry Andric     return false;
970b57cec5SDimitry Andric   }
980b57cec5SDimitry Andric 
990b57cec5SDimitry Andric 
isDeadBlock(const BasicBlock * BB) const1000b57cec5SDimitry Andric   bool isDeadBlock(const BasicBlock *BB) const {
1010b57cec5SDimitry Andric     return DeadBlocks.count(BB);
1020b57cec5SDimitry Andric   }
1030b57cec5SDimitry Andric 
isDeadEdge(const Use * U) const1040b57cec5SDimitry Andric   bool isDeadEdge(const Use *U) const {
1058bcb0991SDimitry Andric     assert(cast<Instruction>(U->getUser())->isTerminator() &&
1060b57cec5SDimitry Andric            "edge must be operand of terminator");
1070b57cec5SDimitry Andric     assert(cast_or_null<BasicBlock>(U->get()) &&
1080b57cec5SDimitry Andric            "edge must refer to basic block");
1098bcb0991SDimitry Andric     assert(!isDeadBlock(cast<Instruction>(U->getUser())->getParent()) &&
1100b57cec5SDimitry Andric            "isDeadEdge() must be applied to edge from live block");
1110b57cec5SDimitry Andric     return DeadEdges.count(U);
1120b57cec5SDimitry Andric   }
1130b57cec5SDimitry Andric 
hasLiveIncomingEdges(const BasicBlock * BB) const1140b57cec5SDimitry Andric   bool hasLiveIncomingEdges(const BasicBlock *BB) const {
1150b57cec5SDimitry Andric     // Check if all incoming edges are dead.
1160b57cec5SDimitry Andric     for (const_pred_iterator PredIt(BB), End(BB, true); PredIt != End; ++PredIt) {
1170b57cec5SDimitry Andric       auto &PU = PredIt.getUse();
1180b57cec5SDimitry Andric       const Use &U = PU.getUser()->getOperandUse(PU.getOperandNo());
1190b57cec5SDimitry Andric       if (!isDeadBlock(*PredIt) && !isDeadEdge(&U))
1200b57cec5SDimitry Andric         return true; // Found a live edge.
1210b57cec5SDimitry Andric     }
1220b57cec5SDimitry Andric     return false;
1230b57cec5SDimitry Andric   }
1240b57cec5SDimitry Andric 
processFunction(const Function & F,const DominatorTree & DT)1250b57cec5SDimitry Andric   void processFunction(const Function &F, const DominatorTree &DT) {
1260b57cec5SDimitry Andric     this->DT = &DT;
1270b57cec5SDimitry Andric 
1280b57cec5SDimitry Andric     // Start with all blocks unreachable from entry.
1290b57cec5SDimitry Andric     for (const BasicBlock &BB : F)
1300b57cec5SDimitry Andric       if (!DT.isReachableFromEntry(&BB))
1310b57cec5SDimitry Andric         DeadBlocks.insert(&BB);
1320b57cec5SDimitry Andric 
1330b57cec5SDimitry Andric     // Top-down walk of the dominator tree
1340b57cec5SDimitry Andric     ReversePostOrderTraversal<const Function *> RPOT(&F);
1350b57cec5SDimitry Andric     for (const BasicBlock *BB : RPOT) {
1360b57cec5SDimitry Andric       const Instruction *TI = BB->getTerminator();
1370b57cec5SDimitry Andric       assert(TI && "blocks must be well formed");
1380b57cec5SDimitry Andric 
1390b57cec5SDimitry Andric       // For conditional branches, we can perform simple conditional propagation on
1400b57cec5SDimitry Andric       // the condition value itself.
1410b57cec5SDimitry Andric       const BranchInst *BI = dyn_cast<BranchInst>(TI);
1420b57cec5SDimitry Andric       if (!BI || !BI->isConditional() || !isa<Constant>(BI->getCondition()))
1430b57cec5SDimitry Andric         continue;
1440b57cec5SDimitry Andric 
1450b57cec5SDimitry Andric       // If a branch has two identical successors, we cannot declare either dead.
1460b57cec5SDimitry Andric       if (BI->getSuccessor(0) == BI->getSuccessor(1))
1470b57cec5SDimitry Andric         continue;
1480b57cec5SDimitry Andric 
1490b57cec5SDimitry Andric       ConstantInt *Cond = dyn_cast<ConstantInt>(BI->getCondition());
1500b57cec5SDimitry Andric       if (!Cond)
1510b57cec5SDimitry Andric         continue;
1520b57cec5SDimitry Andric 
1530b57cec5SDimitry Andric       addDeadEdge(BI->getOperandUse(Cond->getZExtValue() ? 1 : 2));
1540b57cec5SDimitry Andric     }
1550b57cec5SDimitry Andric   }
1560b57cec5SDimitry Andric 
1570b57cec5SDimitry Andric protected:
addDeadBlock(const BasicBlock * BB)1580b57cec5SDimitry Andric   void addDeadBlock(const BasicBlock *BB) {
1590b57cec5SDimitry Andric     SmallVector<const BasicBlock *, 4> NewDead;
1600b57cec5SDimitry Andric     SmallSetVector<const BasicBlock *, 4> DF;
1610b57cec5SDimitry Andric 
1620b57cec5SDimitry Andric     NewDead.push_back(BB);
1630b57cec5SDimitry Andric     while (!NewDead.empty()) {
1640b57cec5SDimitry Andric       const BasicBlock *D = NewDead.pop_back_val();
1650b57cec5SDimitry Andric       if (isDeadBlock(D))
1660b57cec5SDimitry Andric         continue;
1670b57cec5SDimitry Andric 
1680b57cec5SDimitry Andric       // All blocks dominated by D are dead.
1690b57cec5SDimitry Andric       SmallVector<BasicBlock *, 8> Dom;
1700b57cec5SDimitry Andric       DT->getDescendants(const_cast<BasicBlock*>(D), Dom);
1710b57cec5SDimitry Andric       // Do not need to mark all in and out edges dead
1720b57cec5SDimitry Andric       // because BB is marked dead and this is enough
1730b57cec5SDimitry Andric       // to run further.
1740b57cec5SDimitry Andric       DeadBlocks.insert(Dom.begin(), Dom.end());
1750b57cec5SDimitry Andric 
1760b57cec5SDimitry Andric       // Figure out the dominance-frontier(D).
1770b57cec5SDimitry Andric       for (BasicBlock *B : Dom)
1780b57cec5SDimitry Andric         for (BasicBlock *S : successors(B))
1790b57cec5SDimitry Andric           if (!isDeadBlock(S) && !hasLiveIncomingEdges(S))
1800b57cec5SDimitry Andric             NewDead.push_back(S);
1810b57cec5SDimitry Andric     }
1820b57cec5SDimitry Andric   }
1830b57cec5SDimitry Andric 
addDeadEdge(const Use & DeadEdge)1840b57cec5SDimitry Andric   void addDeadEdge(const Use &DeadEdge) {
1850b57cec5SDimitry Andric     if (!DeadEdges.insert(&DeadEdge))
1860b57cec5SDimitry Andric       return;
1870b57cec5SDimitry Andric 
1880b57cec5SDimitry Andric     BasicBlock *BB = cast_or_null<BasicBlock>(DeadEdge.get());
1890b57cec5SDimitry Andric     if (hasLiveIncomingEdges(BB))
1900b57cec5SDimitry Andric       return;
1910b57cec5SDimitry Andric 
1920b57cec5SDimitry Andric     addDeadBlock(BB);
1930b57cec5SDimitry Andric   }
1940b57cec5SDimitry Andric };
1950b57cec5SDimitry Andric } // namespace
1960b57cec5SDimitry Andric 
1970b57cec5SDimitry Andric static void Verify(const Function &F, const DominatorTree &DT,
1980b57cec5SDimitry Andric                    const CFGDeadness &CD);
1990b57cec5SDimitry Andric 
2000b57cec5SDimitry Andric namespace llvm {
run(Function & F,FunctionAnalysisManager & AM)2010b57cec5SDimitry Andric PreservedAnalyses SafepointIRVerifierPass::run(Function &F,
2020b57cec5SDimitry Andric                                                FunctionAnalysisManager &AM) {
2030b57cec5SDimitry Andric   const auto &DT = AM.getResult<DominatorTreeAnalysis>(F);
2040b57cec5SDimitry Andric   CFGDeadness CD;
2050b57cec5SDimitry Andric   CD.processFunction(F, DT);
2060b57cec5SDimitry Andric   Verify(F, DT, CD);
2070b57cec5SDimitry Andric   return PreservedAnalyses::all();
2080b57cec5SDimitry Andric }
2095ffd83dbSDimitry Andric } // namespace llvm
2100b57cec5SDimitry Andric 
2110b57cec5SDimitry Andric namespace {
2120b57cec5SDimitry Andric 
2130b57cec5SDimitry Andric struct SafepointIRVerifier : public FunctionPass {
2140b57cec5SDimitry Andric   static char ID; // Pass identification, replacement for typeid
SafepointIRVerifier__anon8bb76e6c0211::SafepointIRVerifier2150b57cec5SDimitry Andric   SafepointIRVerifier() : FunctionPass(ID) {
2160b57cec5SDimitry Andric     initializeSafepointIRVerifierPass(*PassRegistry::getPassRegistry());
2170b57cec5SDimitry Andric   }
2180b57cec5SDimitry Andric 
runOnFunction__anon8bb76e6c0211::SafepointIRVerifier2190b57cec5SDimitry Andric   bool runOnFunction(Function &F) override {
2200b57cec5SDimitry Andric     auto &DT = getAnalysis<DominatorTreeWrapperPass>().getDomTree();
2210b57cec5SDimitry Andric     CFGDeadness CD;
2220b57cec5SDimitry Andric     CD.processFunction(F, DT);
2230b57cec5SDimitry Andric     Verify(F, DT, CD);
2240b57cec5SDimitry Andric     return false; // no modifications
2250b57cec5SDimitry Andric   }
2260b57cec5SDimitry Andric 
getAnalysisUsage__anon8bb76e6c0211::SafepointIRVerifier2270b57cec5SDimitry Andric   void getAnalysisUsage(AnalysisUsage &AU) const override {
2280b57cec5SDimitry Andric     AU.addRequiredID(DominatorTreeWrapperPass::ID);
2290b57cec5SDimitry Andric     AU.setPreservesAll();
2300b57cec5SDimitry Andric   }
2310b57cec5SDimitry Andric 
getPassName__anon8bb76e6c0211::SafepointIRVerifier2320b57cec5SDimitry Andric   StringRef getPassName() const override { return "safepoint verifier"; }
2330b57cec5SDimitry Andric };
2340b57cec5SDimitry Andric } // namespace
2350b57cec5SDimitry Andric 
verifySafepointIR(Function & F)2360b57cec5SDimitry Andric void llvm::verifySafepointIR(Function &F) {
2370b57cec5SDimitry Andric   SafepointIRVerifier pass;
2380b57cec5SDimitry Andric   pass.runOnFunction(F);
2390b57cec5SDimitry Andric }
2400b57cec5SDimitry Andric 
2410b57cec5SDimitry Andric char SafepointIRVerifier::ID = 0;
2420b57cec5SDimitry Andric 
createSafepointIRVerifierPass()2430b57cec5SDimitry Andric FunctionPass *llvm::createSafepointIRVerifierPass() {
2440b57cec5SDimitry Andric   return new SafepointIRVerifier();
2450b57cec5SDimitry Andric }
2460b57cec5SDimitry Andric 
2470b57cec5SDimitry Andric INITIALIZE_PASS_BEGIN(SafepointIRVerifier, "verify-safepoint-ir",
2480b57cec5SDimitry Andric                       "Safepoint IR Verifier", false, false)
INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass)2490b57cec5SDimitry Andric INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass)
2500b57cec5SDimitry Andric INITIALIZE_PASS_END(SafepointIRVerifier, "verify-safepoint-ir",
2510b57cec5SDimitry Andric                     "Safepoint IR Verifier", false, false)
2520b57cec5SDimitry Andric 
2530b57cec5SDimitry Andric static bool isGCPointerType(Type *T) {
2540b57cec5SDimitry Andric   if (auto *PT = dyn_cast<PointerType>(T))
2550b57cec5SDimitry Andric     // For the sake of this example GC, we arbitrarily pick addrspace(1) as our
2560b57cec5SDimitry Andric     // GC managed heap.  We know that a pointer into this heap needs to be
2570b57cec5SDimitry Andric     // updated and that no other pointer does.
2580b57cec5SDimitry Andric     return (1 == PT->getAddressSpace());
2590b57cec5SDimitry Andric   return false;
2600b57cec5SDimitry Andric }
2610b57cec5SDimitry Andric 
containsGCPtrType(Type * Ty)2620b57cec5SDimitry Andric static bool containsGCPtrType(Type *Ty) {
2630b57cec5SDimitry Andric   if (isGCPointerType(Ty))
2640b57cec5SDimitry Andric     return true;
2650b57cec5SDimitry Andric   if (VectorType *VT = dyn_cast<VectorType>(Ty))
2660b57cec5SDimitry Andric     return isGCPointerType(VT->getScalarType());
2670b57cec5SDimitry Andric   if (ArrayType *AT = dyn_cast<ArrayType>(Ty))
2680b57cec5SDimitry Andric     return containsGCPtrType(AT->getElementType());
2690b57cec5SDimitry Andric   if (StructType *ST = dyn_cast<StructType>(Ty))
2700b57cec5SDimitry Andric     return llvm::any_of(ST->elements(), containsGCPtrType);
2710b57cec5SDimitry Andric   return false;
2720b57cec5SDimitry Andric }
2730b57cec5SDimitry Andric 
2740b57cec5SDimitry Andric // Debugging aid -- prints a [Begin, End) range of values.
2750b57cec5SDimitry Andric template<typename IteratorTy>
PrintValueSet(raw_ostream & OS,IteratorTy Begin,IteratorTy End)2760b57cec5SDimitry Andric static void PrintValueSet(raw_ostream &OS, IteratorTy Begin, IteratorTy End) {
2770b57cec5SDimitry Andric   OS << "[ ";
2780b57cec5SDimitry Andric   while (Begin != End) {
2790b57cec5SDimitry Andric     OS << **Begin << " ";
2800b57cec5SDimitry Andric     ++Begin;
2810b57cec5SDimitry Andric   }
2820b57cec5SDimitry Andric   OS << "]";
2830b57cec5SDimitry Andric }
2840b57cec5SDimitry Andric 
2850b57cec5SDimitry Andric /// The verifier algorithm is phrased in terms of availability.  The set of
2860b57cec5SDimitry Andric /// values "available" at a given point in the control flow graph is the set of
2870b57cec5SDimitry Andric /// correctly relocated value at that point, and is a subset of the set of
2880b57cec5SDimitry Andric /// definitions dominating that point.
2890b57cec5SDimitry Andric 
2900b57cec5SDimitry Andric using AvailableValueSet = DenseSet<const Value *>;
2910b57cec5SDimitry Andric 
2920b57cec5SDimitry Andric /// State we compute and track per basic block.
2930b57cec5SDimitry Andric struct BasicBlockState {
2940b57cec5SDimitry Andric   // Set of values available coming in, before the phi nodes
2950b57cec5SDimitry Andric   AvailableValueSet AvailableIn;
2960b57cec5SDimitry Andric 
2970b57cec5SDimitry Andric   // Set of values available going out
2980b57cec5SDimitry Andric   AvailableValueSet AvailableOut;
2990b57cec5SDimitry Andric 
3000b57cec5SDimitry Andric   // AvailableOut minus AvailableIn.
3010b57cec5SDimitry Andric   // All elements are Instructions
3020b57cec5SDimitry Andric   AvailableValueSet Contribution;
3030b57cec5SDimitry Andric 
3040b57cec5SDimitry Andric   // True if this block contains a safepoint and thus AvailableIn does not
3050b57cec5SDimitry Andric   // contribute to AvailableOut.
3060b57cec5SDimitry Andric   bool Cleared = false;
3070b57cec5SDimitry Andric };
3080b57cec5SDimitry Andric 
3090b57cec5SDimitry Andric /// A given derived pointer can have multiple base pointers through phi/selects.
3100b57cec5SDimitry Andric /// This type indicates when the base pointer is exclusively constant
3110b57cec5SDimitry Andric /// (ExclusivelySomeConstant), and if that constant is proven to be exclusively
3120b57cec5SDimitry Andric /// null, we record that as ExclusivelyNull. In all other cases, the BaseType is
3130b57cec5SDimitry Andric /// NonConstant.
3140b57cec5SDimitry Andric enum BaseType {
3150b57cec5SDimitry Andric   NonConstant = 1, // Base pointers is not exclusively constant.
3160b57cec5SDimitry Andric   ExclusivelyNull,
3170b57cec5SDimitry Andric   ExclusivelySomeConstant // Base pointers for a given derived pointer is from a
3180b57cec5SDimitry Andric                           // set of constants, but they are not exclusively
3190b57cec5SDimitry Andric                           // null.
3200b57cec5SDimitry Andric };
3210b57cec5SDimitry Andric 
3220b57cec5SDimitry Andric /// Return the baseType for Val which states whether Val is exclusively
3230b57cec5SDimitry Andric /// derived from constant/null, or not exclusively derived from constant.
3240b57cec5SDimitry Andric /// Val is exclusively derived off a constant base when all operands of phi and
3250b57cec5SDimitry Andric /// selects are derived off a constant base.
getBaseType(const Value * Val)3260b57cec5SDimitry Andric static enum BaseType getBaseType(const Value *Val) {
3270b57cec5SDimitry Andric 
3280b57cec5SDimitry Andric   SmallVector<const Value *, 32> Worklist;
3290b57cec5SDimitry Andric   DenseSet<const Value *> Visited;
3300b57cec5SDimitry Andric   bool isExclusivelyDerivedFromNull = true;
3310b57cec5SDimitry Andric   Worklist.push_back(Val);
3320b57cec5SDimitry Andric   // Strip through all the bitcasts and geps to get base pointer. Also check for
3330b57cec5SDimitry Andric   // the exclusive value when there can be multiple base pointers (through phis
3340b57cec5SDimitry Andric   // or selects).
3350b57cec5SDimitry Andric   while(!Worklist.empty()) {
3360b57cec5SDimitry Andric     const Value *V = Worklist.pop_back_val();
3370b57cec5SDimitry Andric     if (!Visited.insert(V).second)
3380b57cec5SDimitry Andric       continue;
3390b57cec5SDimitry Andric 
3400b57cec5SDimitry Andric     if (const auto *CI = dyn_cast<CastInst>(V)) {
3410b57cec5SDimitry Andric       Worklist.push_back(CI->stripPointerCasts());
3420b57cec5SDimitry Andric       continue;
3430b57cec5SDimitry Andric     }
3440b57cec5SDimitry Andric     if (const auto *GEP = dyn_cast<GetElementPtrInst>(V)) {
3450b57cec5SDimitry Andric       Worklist.push_back(GEP->getPointerOperand());
3460b57cec5SDimitry Andric       continue;
3470b57cec5SDimitry Andric     }
3480b57cec5SDimitry Andric     // Push all the incoming values of phi node into the worklist for
3490b57cec5SDimitry Andric     // processing.
3500b57cec5SDimitry Andric     if (const auto *PN = dyn_cast<PHINode>(V)) {
351fe6060f1SDimitry Andric       append_range(Worklist, PN->incoming_values());
3520b57cec5SDimitry Andric       continue;
3530b57cec5SDimitry Andric     }
3540b57cec5SDimitry Andric     if (const auto *SI = dyn_cast<SelectInst>(V)) {
3550b57cec5SDimitry Andric       // Push in the true and false values
3560b57cec5SDimitry Andric       Worklist.push_back(SI->getTrueValue());
3570b57cec5SDimitry Andric       Worklist.push_back(SI->getFalseValue());
3580b57cec5SDimitry Andric       continue;
3590b57cec5SDimitry Andric     }
36081ad6265SDimitry Andric     if (const auto *GCRelocate = dyn_cast<GCRelocateInst>(V)) {
36181ad6265SDimitry Andric       // GCRelocates do not change null-ness or constant-ness of the value.
36281ad6265SDimitry Andric       // So we can continue with derived pointer this instruction relocates.
36381ad6265SDimitry Andric       Worklist.push_back(GCRelocate->getDerivedPtr());
36481ad6265SDimitry Andric       continue;
36581ad6265SDimitry Andric     }
36681ad6265SDimitry Andric     if (const auto *FI = dyn_cast<FreezeInst>(V)) {
36781ad6265SDimitry Andric       // Freeze does not change null-ness or constant-ness of the value.
36881ad6265SDimitry Andric       Worklist.push_back(FI->getOperand(0));
36981ad6265SDimitry Andric       continue;
37081ad6265SDimitry Andric     }
3710b57cec5SDimitry Andric     if (isa<Constant>(V)) {
3720b57cec5SDimitry Andric       // We found at least one base pointer which is non-null, so this derived
3730b57cec5SDimitry Andric       // pointer is not exclusively derived from null.
3740b57cec5SDimitry Andric       if (V != Constant::getNullValue(V->getType()))
3750b57cec5SDimitry Andric         isExclusivelyDerivedFromNull = false;
3760b57cec5SDimitry Andric       // Continue processing the remaining values to make sure it's exclusively
3770b57cec5SDimitry Andric       // constant.
3780b57cec5SDimitry Andric       continue;
3790b57cec5SDimitry Andric     }
3800b57cec5SDimitry Andric     // At this point, we know that the base pointer is not exclusively
3810b57cec5SDimitry Andric     // constant.
3820b57cec5SDimitry Andric     return BaseType::NonConstant;
3830b57cec5SDimitry Andric   }
3840b57cec5SDimitry Andric   // Now, we know that the base pointer is exclusively constant, but we need to
3850b57cec5SDimitry Andric   // differentiate between exclusive null constant and non-null constant.
3860b57cec5SDimitry Andric   return isExclusivelyDerivedFromNull ? BaseType::ExclusivelyNull
3870b57cec5SDimitry Andric                                       : BaseType::ExclusivelySomeConstant;
3880b57cec5SDimitry Andric }
3890b57cec5SDimitry Andric 
isNotExclusivelyConstantDerived(const Value * V)3900b57cec5SDimitry Andric static bool isNotExclusivelyConstantDerived(const Value *V) {
3910b57cec5SDimitry Andric   return getBaseType(V) == BaseType::NonConstant;
3920b57cec5SDimitry Andric }
3930b57cec5SDimitry Andric 
3940b57cec5SDimitry Andric namespace {
3950b57cec5SDimitry Andric class InstructionVerifier;
3960b57cec5SDimitry Andric 
3970b57cec5SDimitry Andric /// Builds BasicBlockState for each BB of the function.
3980b57cec5SDimitry Andric /// It can traverse function for verification and provides all required
3990b57cec5SDimitry Andric /// information.
4000b57cec5SDimitry Andric ///
4010b57cec5SDimitry Andric /// GC pointer may be in one of three states: relocated, unrelocated and
4020b57cec5SDimitry Andric /// poisoned.
4030b57cec5SDimitry Andric /// Relocated pointer may be used without any restrictions.
4040b57cec5SDimitry Andric /// Unrelocated pointer cannot be dereferenced, passed as argument to any call
4050b57cec5SDimitry Andric /// or returned. Unrelocated pointer may be safely compared against another
4060b57cec5SDimitry Andric /// unrelocated pointer or against a pointer exclusively derived from null.
4070b57cec5SDimitry Andric /// Poisoned pointers are produced when we somehow derive pointer from relocated
4080b57cec5SDimitry Andric /// and unrelocated pointers (e.g. phi, select). This pointers may be safely
4090b57cec5SDimitry Andric /// used in a very limited number of situations. Currently the only way to use
4100b57cec5SDimitry Andric /// it is comparison against constant exclusively derived from null. All
4110b57cec5SDimitry Andric /// limitations arise due to their undefined state: this pointers should be
4120b57cec5SDimitry Andric /// treated as relocated and unrelocated simultaneously.
4130b57cec5SDimitry Andric /// Rules of deriving:
4140b57cec5SDimitry Andric /// R + U = P - that's where the poisoned pointers come from
4150b57cec5SDimitry Andric /// P + X = P
4160b57cec5SDimitry Andric /// U + U = U
4170b57cec5SDimitry Andric /// R + R = R
4180b57cec5SDimitry Andric /// X + C = X
4190b57cec5SDimitry Andric /// Where "+" - any operation that somehow derive pointer, U - unrelocated,
4200b57cec5SDimitry Andric /// R - relocated and P - poisoned, C - constant, X - U or R or P or C or
4210b57cec5SDimitry Andric /// nothing (in case when "+" is unary operation).
4220b57cec5SDimitry Andric /// Deriving of pointers by itself is always safe.
4230b57cec5SDimitry Andric /// NOTE: when we are making decision on the status of instruction's result:
4240b57cec5SDimitry Andric /// a) for phi we need to check status of each input *at the end of
4250b57cec5SDimitry Andric ///    corresponding predecessor BB*.
4260b57cec5SDimitry Andric /// b) for other instructions we need to check status of each input *at the
4270b57cec5SDimitry Andric ///    current point*.
4280b57cec5SDimitry Andric ///
4290b57cec5SDimitry Andric /// FIXME: This works fairly well except one case
4300b57cec5SDimitry Andric ///     bb1:
4310b57cec5SDimitry Andric ///     p = *some GC-ptr def*
4320b57cec5SDimitry Andric ///     p1 = gep p, offset
4330b57cec5SDimitry Andric ///         /     |
4340b57cec5SDimitry Andric ///        /      |
4350b57cec5SDimitry Andric ///    bb2:       |
4360b57cec5SDimitry Andric ///    safepoint  |
4370b57cec5SDimitry Andric ///        \      |
4380b57cec5SDimitry Andric ///         \     |
4390b57cec5SDimitry Andric ///      bb3:
4400b57cec5SDimitry Andric ///      p2 = phi [p, bb2] [p1, bb1]
4410b57cec5SDimitry Andric ///      p3 = phi [p, bb2] [p, bb1]
4420b57cec5SDimitry Andric ///      here p and p1 is unrelocated
4430b57cec5SDimitry Andric ///           p2 and p3 is poisoned (though they shouldn't be)
4440b57cec5SDimitry Andric ///
4450b57cec5SDimitry Andric /// This leads to some weird results:
4460b57cec5SDimitry Andric ///      cmp eq p, p2 - illegal instruction (false-positive)
4470b57cec5SDimitry Andric ///      cmp eq p1, p2 - illegal instruction (false-positive)
4480b57cec5SDimitry Andric ///      cmp eq p, p3 - illegal instruction (false-positive)
4490b57cec5SDimitry Andric ///      cmp eq p, p1 - ok
4500b57cec5SDimitry Andric /// To fix this we need to introduce conception of generations and be able to
4510b57cec5SDimitry Andric /// check if two values belong to one generation or not. This way p2 will be
4520b57cec5SDimitry Andric /// considered to be unrelocated and no false alarm will happen.
4530b57cec5SDimitry Andric class GCPtrTracker {
4540b57cec5SDimitry Andric   const Function &F;
4550b57cec5SDimitry Andric   const CFGDeadness &CD;
4560b57cec5SDimitry Andric   SpecificBumpPtrAllocator<BasicBlockState> BSAllocator;
4570b57cec5SDimitry Andric   DenseMap<const BasicBlock *, BasicBlockState *> BlockMap;
4580b57cec5SDimitry Andric   // This set contains defs of unrelocated pointers that are proved to be legal
4590b57cec5SDimitry Andric   // and don't need verification.
4600b57cec5SDimitry Andric   DenseSet<const Instruction *> ValidUnrelocatedDefs;
4610b57cec5SDimitry Andric   // This set contains poisoned defs. They can be safely ignored during
4620b57cec5SDimitry Andric   // verification too.
4630b57cec5SDimitry Andric   DenseSet<const Value *> PoisonedDefs;
4640b57cec5SDimitry Andric 
4650b57cec5SDimitry Andric public:
4660b57cec5SDimitry Andric   GCPtrTracker(const Function &F, const DominatorTree &DT,
4670b57cec5SDimitry Andric                const CFGDeadness &CD);
4680b57cec5SDimitry Andric 
hasLiveIncomingEdge(const PHINode * PN,const BasicBlock * InBB) const4690b57cec5SDimitry Andric   bool hasLiveIncomingEdge(const PHINode *PN, const BasicBlock *InBB) const {
4700b57cec5SDimitry Andric     return CD.hasLiveIncomingEdge(PN, InBB);
4710b57cec5SDimitry Andric   }
4720b57cec5SDimitry Andric 
4730b57cec5SDimitry Andric   BasicBlockState *getBasicBlockState(const BasicBlock *BB);
4740b57cec5SDimitry Andric   const BasicBlockState *getBasicBlockState(const BasicBlock *BB) const;
4750b57cec5SDimitry Andric 
isValuePoisoned(const Value * V) const4760b57cec5SDimitry Andric   bool isValuePoisoned(const Value *V) const { return PoisonedDefs.count(V); }
4770b57cec5SDimitry Andric 
4780b57cec5SDimitry Andric   /// Traverse each BB of the function and call
4790b57cec5SDimitry Andric   /// InstructionVerifier::verifyInstruction for each possibly invalid
4800b57cec5SDimitry Andric   /// instruction.
4810b57cec5SDimitry Andric   /// It destructively modifies GCPtrTracker so it's passed via rvalue reference
4820b57cec5SDimitry Andric   /// in order to prohibit further usages of GCPtrTracker as it'll be in
4830b57cec5SDimitry Andric   /// inconsistent state.
4840b57cec5SDimitry Andric   static void verifyFunction(GCPtrTracker &&Tracker,
4850b57cec5SDimitry Andric                              InstructionVerifier &Verifier);
4860b57cec5SDimitry Andric 
4870b57cec5SDimitry Andric   /// Returns true for reachable and live blocks.
isMapped(const BasicBlock * BB) const488*06c3fb27SDimitry Andric   bool isMapped(const BasicBlock *BB) const { return BlockMap.contains(BB); }
4890b57cec5SDimitry Andric 
4900b57cec5SDimitry Andric private:
4910b57cec5SDimitry Andric   /// Returns true if the instruction may be safely skipped during verification.
4920b57cec5SDimitry Andric   bool instructionMayBeSkipped(const Instruction *I) const;
4930b57cec5SDimitry Andric 
4940b57cec5SDimitry Andric   /// Iterates over all BBs from BlockMap and recalculates AvailableIn/Out for
4950b57cec5SDimitry Andric   /// each of them until it converges.
4960b57cec5SDimitry Andric   void recalculateBBsStates();
4970b57cec5SDimitry Andric 
4980b57cec5SDimitry Andric   /// Remove from Contribution all defs that legally produce unrelocated
4990b57cec5SDimitry Andric   /// pointers and saves them to ValidUnrelocatedDefs.
5000b57cec5SDimitry Andric   /// Though Contribution should belong to BBS it is passed separately with
5010b57cec5SDimitry Andric   /// different const-modifier in order to emphasize (and guarantee) that only
5020b57cec5SDimitry Andric   /// Contribution will be changed.
5030b57cec5SDimitry Andric   /// Returns true if Contribution was changed otherwise false.
5040b57cec5SDimitry Andric   bool removeValidUnrelocatedDefs(const BasicBlock *BB,
5050b57cec5SDimitry Andric                                   const BasicBlockState *BBS,
5060b57cec5SDimitry Andric                                   AvailableValueSet &Contribution);
5070b57cec5SDimitry Andric 
5080b57cec5SDimitry Andric   /// Gather all the definitions dominating the start of BB into Result. This is
5090b57cec5SDimitry Andric   /// simply the defs introduced by every dominating basic block and the
5100b57cec5SDimitry Andric   /// function arguments.
5110b57cec5SDimitry Andric   void gatherDominatingDefs(const BasicBlock *BB, AvailableValueSet &Result,
5120b57cec5SDimitry Andric                             const DominatorTree &DT);
5130b57cec5SDimitry Andric 
5140b57cec5SDimitry Andric   /// Compute the AvailableOut set for BB, based on the BasicBlockState BBS,
5150b57cec5SDimitry Andric   /// which is the BasicBlockState for BB.
5160b57cec5SDimitry Andric   /// ContributionChanged is set when the verifier runs for the first time
5170b57cec5SDimitry Andric   /// (in this case Contribution was changed from 'empty' to its initial state)
5180b57cec5SDimitry Andric   /// or when Contribution of this BB was changed since last computation.
5190b57cec5SDimitry Andric   static void transferBlock(const BasicBlock *BB, BasicBlockState &BBS,
5200b57cec5SDimitry Andric                             bool ContributionChanged);
5210b57cec5SDimitry Andric 
5220b57cec5SDimitry Andric   /// Model the effect of an instruction on the set of available values.
5230b57cec5SDimitry Andric   static void transferInstruction(const Instruction &I, bool &Cleared,
5240b57cec5SDimitry Andric                                   AvailableValueSet &Available);
5250b57cec5SDimitry Andric };
5260b57cec5SDimitry Andric 
5270b57cec5SDimitry Andric /// It is a visitor for GCPtrTracker::verifyFunction. It decides if the
5280b57cec5SDimitry Andric /// instruction (which uses heap reference) is legal or not, given our safepoint
5290b57cec5SDimitry Andric /// semantics.
5300b57cec5SDimitry Andric class InstructionVerifier {
5310b57cec5SDimitry Andric   bool AnyInvalidUses = false;
5320b57cec5SDimitry Andric 
5330b57cec5SDimitry Andric public:
5340b57cec5SDimitry Andric   void verifyInstruction(const GCPtrTracker *Tracker, const Instruction &I,
5350b57cec5SDimitry Andric                          const AvailableValueSet &AvailableSet);
5360b57cec5SDimitry Andric 
hasAnyInvalidUses() const5370b57cec5SDimitry Andric   bool hasAnyInvalidUses() const { return AnyInvalidUses; }
5380b57cec5SDimitry Andric 
5390b57cec5SDimitry Andric private:
5400b57cec5SDimitry Andric   void reportInvalidUse(const Value &V, const Instruction &I);
5410b57cec5SDimitry Andric };
5420b57cec5SDimitry Andric } // end anonymous namespace
5430b57cec5SDimitry Andric 
GCPtrTracker(const Function & F,const DominatorTree & DT,const CFGDeadness & CD)5440b57cec5SDimitry Andric GCPtrTracker::GCPtrTracker(const Function &F, const DominatorTree &DT,
5450b57cec5SDimitry Andric                            const CFGDeadness &CD) : F(F), CD(CD) {
5460b57cec5SDimitry Andric   // Calculate Contribution of each live BB.
5470b57cec5SDimitry Andric   // Allocate BB states for live blocks.
5480b57cec5SDimitry Andric   for (const BasicBlock &BB : F)
5490b57cec5SDimitry Andric     if (!CD.isDeadBlock(&BB)) {
5500b57cec5SDimitry Andric       BasicBlockState *BBS = new (BSAllocator.Allocate()) BasicBlockState;
5510b57cec5SDimitry Andric       for (const auto &I : BB)
5520b57cec5SDimitry Andric         transferInstruction(I, BBS->Cleared, BBS->Contribution);
5530b57cec5SDimitry Andric       BlockMap[&BB] = BBS;
5540b57cec5SDimitry Andric     }
5550b57cec5SDimitry Andric 
5560b57cec5SDimitry Andric   // Initialize AvailableIn/Out sets of each BB using only information about
5570b57cec5SDimitry Andric   // dominating BBs.
5580b57cec5SDimitry Andric   for (auto &BBI : BlockMap) {
5590b57cec5SDimitry Andric     gatherDominatingDefs(BBI.first, BBI.second->AvailableIn, DT);
5600b57cec5SDimitry Andric     transferBlock(BBI.first, *BBI.second, true);
5610b57cec5SDimitry Andric   }
5620b57cec5SDimitry Andric 
5630b57cec5SDimitry Andric   // Simulate the flow of defs through the CFG and recalculate AvailableIn/Out
5640b57cec5SDimitry Andric   // sets of each BB until it converges. If any def is proved to be an
5650b57cec5SDimitry Andric   // unrelocated pointer, it will be removed from all BBSs.
5660b57cec5SDimitry Andric   recalculateBBsStates();
5670b57cec5SDimitry Andric }
5680b57cec5SDimitry Andric 
getBasicBlockState(const BasicBlock * BB)5690b57cec5SDimitry Andric BasicBlockState *GCPtrTracker::getBasicBlockState(const BasicBlock *BB) {
570e8d8bef9SDimitry Andric   return BlockMap.lookup(BB);
5710b57cec5SDimitry Andric }
5720b57cec5SDimitry Andric 
getBasicBlockState(const BasicBlock * BB) const5730b57cec5SDimitry Andric const BasicBlockState *GCPtrTracker::getBasicBlockState(
5740b57cec5SDimitry Andric     const BasicBlock *BB) const {
5750b57cec5SDimitry Andric   return const_cast<GCPtrTracker *>(this)->getBasicBlockState(BB);
5760b57cec5SDimitry Andric }
5770b57cec5SDimitry Andric 
instructionMayBeSkipped(const Instruction * I) const5780b57cec5SDimitry Andric bool GCPtrTracker::instructionMayBeSkipped(const Instruction *I) const {
5790b57cec5SDimitry Andric   // Poisoned defs are skipped since they are always safe by itself by
5800b57cec5SDimitry Andric   // definition (for details see comment to this class).
5810b57cec5SDimitry Andric   return ValidUnrelocatedDefs.count(I) || PoisonedDefs.count(I);
5820b57cec5SDimitry Andric }
5830b57cec5SDimitry Andric 
verifyFunction(GCPtrTracker && Tracker,InstructionVerifier & Verifier)5840b57cec5SDimitry Andric void GCPtrTracker::verifyFunction(GCPtrTracker &&Tracker,
5850b57cec5SDimitry Andric                                   InstructionVerifier &Verifier) {
5860b57cec5SDimitry Andric   // We need RPO here to a) report always the first error b) report errors in
5870b57cec5SDimitry Andric   // same order from run to run.
5880b57cec5SDimitry Andric   ReversePostOrderTraversal<const Function *> RPOT(&Tracker.F);
5890b57cec5SDimitry Andric   for (const BasicBlock *BB : RPOT) {
5900b57cec5SDimitry Andric     BasicBlockState *BBS = Tracker.getBasicBlockState(BB);
5910b57cec5SDimitry Andric     if (!BBS)
5920b57cec5SDimitry Andric       continue;
5930b57cec5SDimitry Andric 
5940b57cec5SDimitry Andric     // We destructively modify AvailableIn as we traverse the block instruction
5950b57cec5SDimitry Andric     // by instruction.
5960b57cec5SDimitry Andric     AvailableValueSet &AvailableSet = BBS->AvailableIn;
5970b57cec5SDimitry Andric     for (const Instruction &I : *BB) {
5980b57cec5SDimitry Andric       if (Tracker.instructionMayBeSkipped(&I))
5990b57cec5SDimitry Andric         continue; // This instruction shouldn't be added to AvailableSet.
6000b57cec5SDimitry Andric 
6010b57cec5SDimitry Andric       Verifier.verifyInstruction(&Tracker, I, AvailableSet);
6020b57cec5SDimitry Andric 
6030b57cec5SDimitry Andric       // Model the effect of current instruction on AvailableSet to keep the set
6040b57cec5SDimitry Andric       // relevant at each point of BB.
6050b57cec5SDimitry Andric       bool Cleared = false;
6060b57cec5SDimitry Andric       transferInstruction(I, Cleared, AvailableSet);
6070b57cec5SDimitry Andric       (void)Cleared;
6080b57cec5SDimitry Andric     }
6090b57cec5SDimitry Andric   }
6100b57cec5SDimitry Andric }
6110b57cec5SDimitry Andric 
recalculateBBsStates()6120b57cec5SDimitry Andric void GCPtrTracker::recalculateBBsStates() {
6130b57cec5SDimitry Andric   SetVector<const BasicBlock *> Worklist;
6140b57cec5SDimitry Andric   // TODO: This order is suboptimal, it's better to replace it with priority
6150b57cec5SDimitry Andric   // queue where priority is RPO number of BB.
6160b57cec5SDimitry Andric   for (auto &BBI : BlockMap)
6170b57cec5SDimitry Andric     Worklist.insert(BBI.first);
6180b57cec5SDimitry Andric 
6190b57cec5SDimitry Andric   // This loop iterates the AvailableIn/Out sets until it converges.
6200b57cec5SDimitry Andric   // The AvailableIn and AvailableOut sets decrease as we iterate.
6210b57cec5SDimitry Andric   while (!Worklist.empty()) {
6220b57cec5SDimitry Andric     const BasicBlock *BB = Worklist.pop_back_val();
6230b57cec5SDimitry Andric     BasicBlockState *BBS = getBasicBlockState(BB);
6240b57cec5SDimitry Andric     if (!BBS)
6250b57cec5SDimitry Andric       continue; // Ignore dead successors.
6260b57cec5SDimitry Andric 
6270b57cec5SDimitry Andric     size_t OldInCount = BBS->AvailableIn.size();
6280b57cec5SDimitry Andric     for (const_pred_iterator PredIt(BB), End(BB, true); PredIt != End; ++PredIt) {
6290b57cec5SDimitry Andric       const BasicBlock *PBB = *PredIt;
6300b57cec5SDimitry Andric       BasicBlockState *PBBS = getBasicBlockState(PBB);
6310b57cec5SDimitry Andric       if (PBBS && !CD.isDeadEdge(&CFGDeadness::getEdge(PredIt)))
6320b57cec5SDimitry Andric         set_intersect(BBS->AvailableIn, PBBS->AvailableOut);
6330b57cec5SDimitry Andric     }
6340b57cec5SDimitry Andric 
6350b57cec5SDimitry Andric     assert(OldInCount >= BBS->AvailableIn.size() && "invariant!");
6360b57cec5SDimitry Andric 
6370b57cec5SDimitry Andric     bool InputsChanged = OldInCount != BBS->AvailableIn.size();
6380b57cec5SDimitry Andric     bool ContributionChanged =
6390b57cec5SDimitry Andric         removeValidUnrelocatedDefs(BB, BBS, BBS->Contribution);
6400b57cec5SDimitry Andric     if (!InputsChanged && !ContributionChanged)
6410b57cec5SDimitry Andric       continue;
6420b57cec5SDimitry Andric 
6430b57cec5SDimitry Andric     size_t OldOutCount = BBS->AvailableOut.size();
6440b57cec5SDimitry Andric     transferBlock(BB, *BBS, ContributionChanged);
6450b57cec5SDimitry Andric     if (OldOutCount != BBS->AvailableOut.size()) {
6460b57cec5SDimitry Andric       assert(OldOutCount > BBS->AvailableOut.size() && "invariant!");
6470b57cec5SDimitry Andric       Worklist.insert(succ_begin(BB), succ_end(BB));
6480b57cec5SDimitry Andric     }
6490b57cec5SDimitry Andric   }
6500b57cec5SDimitry Andric }
6510b57cec5SDimitry Andric 
removeValidUnrelocatedDefs(const BasicBlock * BB,const BasicBlockState * BBS,AvailableValueSet & Contribution)6520b57cec5SDimitry Andric bool GCPtrTracker::removeValidUnrelocatedDefs(const BasicBlock *BB,
6530b57cec5SDimitry Andric                                               const BasicBlockState *BBS,
6540b57cec5SDimitry Andric                                               AvailableValueSet &Contribution) {
6550b57cec5SDimitry Andric   assert(&BBS->Contribution == &Contribution &&
6560b57cec5SDimitry Andric          "Passed Contribution should be from the passed BasicBlockState!");
6570b57cec5SDimitry Andric   AvailableValueSet AvailableSet = BBS->AvailableIn;
6580b57cec5SDimitry Andric   bool ContributionChanged = false;
6590b57cec5SDimitry Andric   // For explanation why instructions are processed this way see
6600b57cec5SDimitry Andric   // "Rules of deriving" in the comment to this class.
6610b57cec5SDimitry Andric   for (const Instruction &I : *BB) {
6620b57cec5SDimitry Andric     bool ValidUnrelocatedPointerDef = false;
6630b57cec5SDimitry Andric     bool PoisonedPointerDef = false;
6640b57cec5SDimitry Andric     // TODO: `select` instructions should be handled here too.
6650b57cec5SDimitry Andric     if (const PHINode *PN = dyn_cast<PHINode>(&I)) {
6660b57cec5SDimitry Andric       if (containsGCPtrType(PN->getType())) {
6670b57cec5SDimitry Andric         // If both is true, output is poisoned.
6680b57cec5SDimitry Andric         bool HasRelocatedInputs = false;
6690b57cec5SDimitry Andric         bool HasUnrelocatedInputs = false;
6700b57cec5SDimitry Andric         for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i) {
6710b57cec5SDimitry Andric           const BasicBlock *InBB = PN->getIncomingBlock(i);
6720b57cec5SDimitry Andric           if (!isMapped(InBB) ||
6730b57cec5SDimitry Andric               !CD.hasLiveIncomingEdge(PN, InBB))
6740b57cec5SDimitry Andric             continue; // Skip dead block or dead edge.
6750b57cec5SDimitry Andric 
6760b57cec5SDimitry Andric           const Value *InValue = PN->getIncomingValue(i);
6770b57cec5SDimitry Andric 
6780b57cec5SDimitry Andric           if (isNotExclusivelyConstantDerived(InValue)) {
6790b57cec5SDimitry Andric             if (isValuePoisoned(InValue)) {
6800b57cec5SDimitry Andric               // If any of inputs is poisoned, output is always poisoned too.
6810b57cec5SDimitry Andric               HasRelocatedInputs = true;
6820b57cec5SDimitry Andric               HasUnrelocatedInputs = true;
6830b57cec5SDimitry Andric               break;
6840b57cec5SDimitry Andric             }
6850b57cec5SDimitry Andric             if (BlockMap[InBB]->AvailableOut.count(InValue))
6860b57cec5SDimitry Andric               HasRelocatedInputs = true;
6870b57cec5SDimitry Andric             else
6880b57cec5SDimitry Andric               HasUnrelocatedInputs = true;
6890b57cec5SDimitry Andric           }
6900b57cec5SDimitry Andric         }
6910b57cec5SDimitry Andric         if (HasUnrelocatedInputs) {
6920b57cec5SDimitry Andric           if (HasRelocatedInputs)
6930b57cec5SDimitry Andric             PoisonedPointerDef = true;
6940b57cec5SDimitry Andric           else
6950b57cec5SDimitry Andric             ValidUnrelocatedPointerDef = true;
6960b57cec5SDimitry Andric         }
6970b57cec5SDimitry Andric       }
6980b57cec5SDimitry Andric     } else if ((isa<GetElementPtrInst>(I) || isa<BitCastInst>(I)) &&
6990b57cec5SDimitry Andric                containsGCPtrType(I.getType())) {
7000b57cec5SDimitry Andric       // GEP/bitcast of unrelocated pointer is legal by itself but this def
7010b57cec5SDimitry Andric       // shouldn't appear in any AvailableSet.
7020b57cec5SDimitry Andric       for (const Value *V : I.operands())
7030b57cec5SDimitry Andric         if (containsGCPtrType(V->getType()) &&
7040b57cec5SDimitry Andric             isNotExclusivelyConstantDerived(V) && !AvailableSet.count(V)) {
7050b57cec5SDimitry Andric           if (isValuePoisoned(V))
7060b57cec5SDimitry Andric             PoisonedPointerDef = true;
7070b57cec5SDimitry Andric           else
7080b57cec5SDimitry Andric             ValidUnrelocatedPointerDef = true;
7090b57cec5SDimitry Andric           break;
7100b57cec5SDimitry Andric         }
7110b57cec5SDimitry Andric     }
7120b57cec5SDimitry Andric     assert(!(ValidUnrelocatedPointerDef && PoisonedPointerDef) &&
7130b57cec5SDimitry Andric            "Value cannot be both unrelocated and poisoned!");
7140b57cec5SDimitry Andric     if (ValidUnrelocatedPointerDef) {
7150b57cec5SDimitry Andric       // Remove def of unrelocated pointer from Contribution of this BB and
7160b57cec5SDimitry Andric       // trigger update of all its successors.
7170b57cec5SDimitry Andric       Contribution.erase(&I);
7180b57cec5SDimitry Andric       PoisonedDefs.erase(&I);
7190b57cec5SDimitry Andric       ValidUnrelocatedDefs.insert(&I);
7200b57cec5SDimitry Andric       LLVM_DEBUG(dbgs() << "Removing urelocated " << I
7210b57cec5SDimitry Andric                         << " from Contribution of " << BB->getName() << "\n");
7220b57cec5SDimitry Andric       ContributionChanged = true;
7230b57cec5SDimitry Andric     } else if (PoisonedPointerDef) {
7240b57cec5SDimitry Andric       // Mark pointer as poisoned, remove its def from Contribution and trigger
7250b57cec5SDimitry Andric       // update of all successors.
7260b57cec5SDimitry Andric       Contribution.erase(&I);
7270b57cec5SDimitry Andric       PoisonedDefs.insert(&I);
7280b57cec5SDimitry Andric       LLVM_DEBUG(dbgs() << "Removing poisoned " << I << " from Contribution of "
7290b57cec5SDimitry Andric                         << BB->getName() << "\n");
7300b57cec5SDimitry Andric       ContributionChanged = true;
7310b57cec5SDimitry Andric     } else {
7320b57cec5SDimitry Andric       bool Cleared = false;
7330b57cec5SDimitry Andric       transferInstruction(I, Cleared, AvailableSet);
7340b57cec5SDimitry Andric       (void)Cleared;
7350b57cec5SDimitry Andric     }
7360b57cec5SDimitry Andric   }
7370b57cec5SDimitry Andric   return ContributionChanged;
7380b57cec5SDimitry Andric }
7390b57cec5SDimitry Andric 
gatherDominatingDefs(const BasicBlock * BB,AvailableValueSet & Result,const DominatorTree & DT)7400b57cec5SDimitry Andric void GCPtrTracker::gatherDominatingDefs(const BasicBlock *BB,
7410b57cec5SDimitry Andric                                         AvailableValueSet &Result,
7420b57cec5SDimitry Andric                                         const DominatorTree &DT) {
7430b57cec5SDimitry Andric   DomTreeNode *DTN = DT[const_cast<BasicBlock *>(BB)];
7440b57cec5SDimitry Andric 
7450b57cec5SDimitry Andric   assert(DTN && "Unreachable blocks are ignored");
7460b57cec5SDimitry Andric   while (DTN->getIDom()) {
7470b57cec5SDimitry Andric     DTN = DTN->getIDom();
7480b57cec5SDimitry Andric     auto BBS = getBasicBlockState(DTN->getBlock());
7490b57cec5SDimitry Andric     assert(BBS && "immediate dominator cannot be dead for a live block");
7500b57cec5SDimitry Andric     const auto &Defs = BBS->Contribution;
7510b57cec5SDimitry Andric     Result.insert(Defs.begin(), Defs.end());
7520b57cec5SDimitry Andric     // If this block is 'Cleared', then nothing LiveIn to this block can be
7530b57cec5SDimitry Andric     // available after this block completes.  Note: This turns out to be
7540b57cec5SDimitry Andric     // really important for reducing memory consuption of the initial available
7550b57cec5SDimitry Andric     // sets and thus peak memory usage by this verifier.
7560b57cec5SDimitry Andric     if (BBS->Cleared)
7570b57cec5SDimitry Andric       return;
7580b57cec5SDimitry Andric   }
7590b57cec5SDimitry Andric 
7600b57cec5SDimitry Andric   for (const Argument &A : BB->getParent()->args())
7610b57cec5SDimitry Andric     if (containsGCPtrType(A.getType()))
7620b57cec5SDimitry Andric       Result.insert(&A);
7630b57cec5SDimitry Andric }
7640b57cec5SDimitry Andric 
transferBlock(const BasicBlock * BB,BasicBlockState & BBS,bool ContributionChanged)7650b57cec5SDimitry Andric void GCPtrTracker::transferBlock(const BasicBlock *BB, BasicBlockState &BBS,
7660b57cec5SDimitry Andric                                  bool ContributionChanged) {
7670b57cec5SDimitry Andric   const AvailableValueSet &AvailableIn = BBS.AvailableIn;
7680b57cec5SDimitry Andric   AvailableValueSet &AvailableOut = BBS.AvailableOut;
7690b57cec5SDimitry Andric 
7700b57cec5SDimitry Andric   if (BBS.Cleared) {
7710b57cec5SDimitry Andric     // AvailableOut will change only when Contribution changed.
7720b57cec5SDimitry Andric     if (ContributionChanged)
7730b57cec5SDimitry Andric       AvailableOut = BBS.Contribution;
7740b57cec5SDimitry Andric   } else {
7750b57cec5SDimitry Andric     // Otherwise, we need to reduce the AvailableOut set by things which are no
7760b57cec5SDimitry Andric     // longer in our AvailableIn
7770b57cec5SDimitry Andric     AvailableValueSet Temp = BBS.Contribution;
7780b57cec5SDimitry Andric     set_union(Temp, AvailableIn);
7790b57cec5SDimitry Andric     AvailableOut = std::move(Temp);
7800b57cec5SDimitry Andric   }
7810b57cec5SDimitry Andric 
7820b57cec5SDimitry Andric   LLVM_DEBUG(dbgs() << "Transfered block " << BB->getName() << " from ";
7830b57cec5SDimitry Andric              PrintValueSet(dbgs(), AvailableIn.begin(), AvailableIn.end());
7840b57cec5SDimitry Andric              dbgs() << " to ";
7850b57cec5SDimitry Andric              PrintValueSet(dbgs(), AvailableOut.begin(), AvailableOut.end());
7860b57cec5SDimitry Andric              dbgs() << "\n";);
7870b57cec5SDimitry Andric }
7880b57cec5SDimitry Andric 
transferInstruction(const Instruction & I,bool & Cleared,AvailableValueSet & Available)7890b57cec5SDimitry Andric void GCPtrTracker::transferInstruction(const Instruction &I, bool &Cleared,
7900b57cec5SDimitry Andric                                        AvailableValueSet &Available) {
7915ffd83dbSDimitry Andric   if (isa<GCStatepointInst>(I)) {
7920b57cec5SDimitry Andric     Cleared = true;
7930b57cec5SDimitry Andric     Available.clear();
7940b57cec5SDimitry Andric   } else if (containsGCPtrType(I.getType()))
7950b57cec5SDimitry Andric     Available.insert(&I);
7960b57cec5SDimitry Andric }
7970b57cec5SDimitry Andric 
verifyInstruction(const GCPtrTracker * Tracker,const Instruction & I,const AvailableValueSet & AvailableSet)7980b57cec5SDimitry Andric void InstructionVerifier::verifyInstruction(
7990b57cec5SDimitry Andric     const GCPtrTracker *Tracker, const Instruction &I,
8000b57cec5SDimitry Andric     const AvailableValueSet &AvailableSet) {
8010b57cec5SDimitry Andric   if (const PHINode *PN = dyn_cast<PHINode>(&I)) {
8020b57cec5SDimitry Andric     if (containsGCPtrType(PN->getType()))
8030b57cec5SDimitry Andric       for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i) {
8040b57cec5SDimitry Andric         const BasicBlock *InBB = PN->getIncomingBlock(i);
8050b57cec5SDimitry Andric         const BasicBlockState *InBBS = Tracker->getBasicBlockState(InBB);
8060b57cec5SDimitry Andric         if (!InBBS ||
8070b57cec5SDimitry Andric             !Tracker->hasLiveIncomingEdge(PN, InBB))
8080b57cec5SDimitry Andric           continue; // Skip dead block or dead edge.
8090b57cec5SDimitry Andric 
8100b57cec5SDimitry Andric         const Value *InValue = PN->getIncomingValue(i);
8110b57cec5SDimitry Andric 
8120b57cec5SDimitry Andric         if (isNotExclusivelyConstantDerived(InValue) &&
8130b57cec5SDimitry Andric             !InBBS->AvailableOut.count(InValue))
8140b57cec5SDimitry Andric           reportInvalidUse(*InValue, *PN);
8150b57cec5SDimitry Andric       }
8160b57cec5SDimitry Andric   } else if (isa<CmpInst>(I) &&
8170b57cec5SDimitry Andric              containsGCPtrType(I.getOperand(0)->getType())) {
8180b57cec5SDimitry Andric     Value *LHS = I.getOperand(0), *RHS = I.getOperand(1);
8190b57cec5SDimitry Andric     enum BaseType baseTyLHS = getBaseType(LHS),
8200b57cec5SDimitry Andric                   baseTyRHS = getBaseType(RHS);
8210b57cec5SDimitry Andric 
8220b57cec5SDimitry Andric     // Returns true if LHS and RHS are unrelocated pointers and they are
8230b57cec5SDimitry Andric     // valid unrelocated uses.
8240b57cec5SDimitry Andric     auto hasValidUnrelocatedUse = [&AvailableSet, Tracker, baseTyLHS, baseTyRHS,
8250b57cec5SDimitry Andric                                    &LHS, &RHS] () {
8260b57cec5SDimitry Andric         // A cmp instruction has valid unrelocated pointer operands only if
8270b57cec5SDimitry Andric         // both operands are unrelocated pointers.
8280b57cec5SDimitry Andric         // In the comparison between two pointers, if one is an unrelocated
8290b57cec5SDimitry Andric         // use, the other *should be* an unrelocated use, for this
8300b57cec5SDimitry Andric         // instruction to contain valid unrelocated uses. This unrelocated
8310b57cec5SDimitry Andric         // use can be a null constant as well, or another unrelocated
8320b57cec5SDimitry Andric         // pointer.
8330b57cec5SDimitry Andric         if (AvailableSet.count(LHS) || AvailableSet.count(RHS))
8340b57cec5SDimitry Andric           return false;
8350b57cec5SDimitry Andric         // Constant pointers (that are not exclusively null) may have
8360b57cec5SDimitry Andric         // meaning in different VMs, so we cannot reorder the compare
8370b57cec5SDimitry Andric         // against constant pointers before the safepoint. In other words,
8380b57cec5SDimitry Andric         // comparison of an unrelocated use against a non-null constant
8390b57cec5SDimitry Andric         // maybe invalid.
8400b57cec5SDimitry Andric         if ((baseTyLHS == BaseType::ExclusivelySomeConstant &&
8410b57cec5SDimitry Andric              baseTyRHS == BaseType::NonConstant) ||
8420b57cec5SDimitry Andric             (baseTyLHS == BaseType::NonConstant &&
8430b57cec5SDimitry Andric              baseTyRHS == BaseType::ExclusivelySomeConstant))
8440b57cec5SDimitry Andric           return false;
8450b57cec5SDimitry Andric 
8460b57cec5SDimitry Andric         // If one of pointers is poisoned and other is not exclusively derived
8470b57cec5SDimitry Andric         // from null it is an invalid expression: it produces poisoned result
8480b57cec5SDimitry Andric         // and unless we want to track all defs (not only gc pointers) the only
8490b57cec5SDimitry Andric         // option is to prohibit such instructions.
8500b57cec5SDimitry Andric         if ((Tracker->isValuePoisoned(LHS) && baseTyRHS != ExclusivelyNull) ||
8510b57cec5SDimitry Andric             (Tracker->isValuePoisoned(RHS) && baseTyLHS != ExclusivelyNull))
8520b57cec5SDimitry Andric             return false;
8530b57cec5SDimitry Andric 
8540b57cec5SDimitry Andric         // All other cases are valid cases enumerated below:
8550b57cec5SDimitry Andric         // 1. Comparison between an exclusively derived null pointer and a
8560b57cec5SDimitry Andric         // constant base pointer.
8570b57cec5SDimitry Andric         // 2. Comparison between an exclusively derived null pointer and a
8580b57cec5SDimitry Andric         // non-constant unrelocated base pointer.
8590b57cec5SDimitry Andric         // 3. Comparison between 2 unrelocated pointers.
8600b57cec5SDimitry Andric         // 4. Comparison between a pointer exclusively derived from null and a
8610b57cec5SDimitry Andric         // non-constant poisoned pointer.
8620b57cec5SDimitry Andric         return true;
8630b57cec5SDimitry Andric     };
8640b57cec5SDimitry Andric     if (!hasValidUnrelocatedUse()) {
8650b57cec5SDimitry Andric       // Print out all non-constant derived pointers that are unrelocated
8660b57cec5SDimitry Andric       // uses, which are invalid.
8670b57cec5SDimitry Andric       if (baseTyLHS == BaseType::NonConstant && !AvailableSet.count(LHS))
8680b57cec5SDimitry Andric         reportInvalidUse(*LHS, I);
8690b57cec5SDimitry Andric       if (baseTyRHS == BaseType::NonConstant && !AvailableSet.count(RHS))
8700b57cec5SDimitry Andric         reportInvalidUse(*RHS, I);
8710b57cec5SDimitry Andric     }
8720b57cec5SDimitry Andric   } else {
8730b57cec5SDimitry Andric     for (const Value *V : I.operands())
8740b57cec5SDimitry Andric       if (containsGCPtrType(V->getType()) &&
8750b57cec5SDimitry Andric           isNotExclusivelyConstantDerived(V) && !AvailableSet.count(V))
8760b57cec5SDimitry Andric         reportInvalidUse(*V, I);
8770b57cec5SDimitry Andric   }
8780b57cec5SDimitry Andric }
8790b57cec5SDimitry Andric 
reportInvalidUse(const Value & V,const Instruction & I)8800b57cec5SDimitry Andric void InstructionVerifier::reportInvalidUse(const Value &V,
8810b57cec5SDimitry Andric                                            const Instruction &I) {
8820b57cec5SDimitry Andric   errs() << "Illegal use of unrelocated value found!\n";
8830b57cec5SDimitry Andric   errs() << "Def: " << V << "\n";
8840b57cec5SDimitry Andric   errs() << "Use: " << I << "\n";
8850b57cec5SDimitry Andric   if (!PrintOnly)
8860b57cec5SDimitry Andric     abort();
8870b57cec5SDimitry Andric   AnyInvalidUses = true;
8880b57cec5SDimitry Andric }
8890b57cec5SDimitry Andric 
Verify(const Function & F,const DominatorTree & DT,const CFGDeadness & CD)8900b57cec5SDimitry Andric static void Verify(const Function &F, const DominatorTree &DT,
8910b57cec5SDimitry Andric                    const CFGDeadness &CD) {
8920b57cec5SDimitry Andric   LLVM_DEBUG(dbgs() << "Verifying gc pointers in function: " << F.getName()
8930b57cec5SDimitry Andric                     << "\n");
8940b57cec5SDimitry Andric   if (PrintOnly)
8950b57cec5SDimitry Andric     dbgs() << "Verifying gc pointers in function: " << F.getName() << "\n";
8960b57cec5SDimitry Andric 
8970b57cec5SDimitry Andric   GCPtrTracker Tracker(F, DT, CD);
8980b57cec5SDimitry Andric 
8990b57cec5SDimitry Andric   // We now have all the information we need to decide if the use of a heap
9000b57cec5SDimitry Andric   // reference is legal or not, given our safepoint semantics.
9010b57cec5SDimitry Andric 
9020b57cec5SDimitry Andric   InstructionVerifier Verifier;
9030b57cec5SDimitry Andric   GCPtrTracker::verifyFunction(std::move(Tracker), Verifier);
9040b57cec5SDimitry Andric 
9050b57cec5SDimitry Andric   if (PrintOnly && !Verifier.hasAnyInvalidUses()) {
9060b57cec5SDimitry Andric     dbgs() << "No illegal uses found by SafepointIRVerifier in: " << F.getName()
9070b57cec5SDimitry Andric            << "\n";
9080b57cec5SDimitry Andric   }
9090b57cec5SDimitry Andric }
910