xref: /llvm-project/llvm/lib/Transforms/Scalar/RewriteStatepointsForGC.cpp (revision 729dafc16ba925b45470f133eb9a3e902e9871fc)
1 //===- RewriteStatepointsForGC.cpp - Make GC relocations explicit ---------===//
2 //
3 //                     The LLVM Compiler Infrastructure
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //
8 //===----------------------------------------------------------------------===//
9 //
10 // Rewrite call/invoke instructions so as to make potential relocations
11 // performed by the garbage collector explicit in the IR.
12 //
13 //===----------------------------------------------------------------------===//
14 
15 #include "llvm/ADT/ArrayRef.h"
16 #include "llvm/ADT/DenseMap.h"
17 #include "llvm/ADT/DenseSet.h"
18 #include "llvm/ADT/MapVector.h"
19 #include "llvm/ADT/None.h"
20 #include "llvm/ADT/Optional.h"
21 #include "llvm/ADT/STLExtras.h"
22 #include "llvm/ADT/SetVector.h"
23 #include "llvm/ADT/SmallSet.h"
24 #include "llvm/ADT/SmallVector.h"
25 #include "llvm/ADT/StringRef.h"
26 #include "llvm/ADT/iterator_range.h"
27 #include "llvm/Analysis/TargetLibraryInfo.h"
28 #include "llvm/Analysis/TargetTransformInfo.h"
29 #include "llvm/IR/Argument.h"
30 #include "llvm/IR/Attributes.h"
31 #include "llvm/IR/BasicBlock.h"
32 #include "llvm/IR/CallSite.h"
33 #include "llvm/IR/CallingConv.h"
34 #include "llvm/IR/Constant.h"
35 #include "llvm/IR/Constants.h"
36 #include "llvm/IR/DataLayout.h"
37 #include "llvm/IR/DerivedTypes.h"
38 #include "llvm/IR/Dominators.h"
39 #include "llvm/IR/Function.h"
40 #include "llvm/IR/IRBuilder.h"
41 #include "llvm/IR/InstIterator.h"
42 #include "llvm/IR/InstrTypes.h"
43 #include "llvm/IR/Instruction.h"
44 #include "llvm/IR/Instructions.h"
45 #include "llvm/IR/IntrinsicInst.h"
46 #include "llvm/IR/Intrinsics.h"
47 #include "llvm/IR/LLVMContext.h"
48 #include "llvm/IR/MDBuilder.h"
49 #include "llvm/IR/Metadata.h"
50 #include "llvm/IR/Module.h"
51 #include "llvm/IR/Statepoint.h"
52 #include "llvm/IR/Type.h"
53 #include "llvm/IR/User.h"
54 #include "llvm/IR/Value.h"
55 #include "llvm/IR/ValueHandle.h"
56 #include "llvm/Pass.h"
57 #include "llvm/Support/Casting.h"
58 #include "llvm/Support/CommandLine.h"
59 #include "llvm/Support/Compiler.h"
60 #include "llvm/Support/Debug.h"
61 #include "llvm/Support/ErrorHandling.h"
62 #include "llvm/Support/raw_ostream.h"
63 #include "llvm/Transforms/Scalar.h"
64 #include "llvm/Transforms/Utils/BasicBlockUtils.h"
65 #include "llvm/Transforms/Utils/Local.h"
66 #include "llvm/Transforms/Utils/PromoteMemToReg.h"
67 #include <algorithm>
68 #include <cassert>
69 #include <cstddef>
70 #include <cstdint>
71 #include <iterator>
72 #include <set>
73 #include <string>
74 #include <utility>
75 #include <vector>
76 
77 #define DEBUG_TYPE "rewrite-statepoints-for-gc"
78 
79 using namespace llvm;
80 
81 // Print the liveset found at the insert location
82 static cl::opt<bool> PrintLiveSet("spp-print-liveset", cl::Hidden,
83                                   cl::init(false));
84 static cl::opt<bool> PrintLiveSetSize("spp-print-liveset-size", cl::Hidden,
85                                       cl::init(false));
86 
87 // Print out the base pointers for debugging
88 static cl::opt<bool> PrintBasePointers("spp-print-base-pointers", cl::Hidden,
89                                        cl::init(false));
90 
91 // Cost threshold measuring when it is profitable to rematerialize value instead
92 // of relocating it
93 static cl::opt<unsigned>
94 RematerializationThreshold("spp-rematerialization-threshold", cl::Hidden,
95                            cl::init(6));
96 
97 #ifdef EXPENSIVE_CHECKS
98 static bool ClobberNonLive = true;
99 #else
100 static bool ClobberNonLive = false;
101 #endif
102 
103 static cl::opt<bool, true> ClobberNonLiveOverride("rs4gc-clobber-non-live",
104                                                   cl::location(ClobberNonLive),
105                                                   cl::Hidden);
106 
107 static cl::opt<bool>
108     AllowStatepointWithNoDeoptInfo("rs4gc-allow-statepoint-with-no-deopt-info",
109                                    cl::Hidden, cl::init(true));
110 
111 namespace {
112 
113 struct RewriteStatepointsForGC : public ModulePass {
114   static char ID; // Pass identification, replacement for typeid
115 
116   RewriteStatepointsForGC() : ModulePass(ID) {
117     initializeRewriteStatepointsForGCPass(*PassRegistry::getPassRegistry());
118   }
119 
120   bool runOnFunction(Function &F);
121 
122   bool runOnModule(Module &M) override {
123     bool Changed = false;
124     for (Function &F : M)
125       Changed |= runOnFunction(F);
126 
127     if (Changed) {
128       // stripNonValidData asserts that shouldRewriteStatepointsIn
129       // returns true for at least one function in the module.  Since at least
130       // one function changed, we know that the precondition is satisfied.
131       stripNonValidData(M);
132     }
133 
134     return Changed;
135   }
136 
137   void getAnalysisUsage(AnalysisUsage &AU) const override {
138     // We add and rewrite a bunch of instructions, but don't really do much
139     // else.  We could in theory preserve a lot more analyses here.
140     AU.addRequired<DominatorTreeWrapperPass>();
141     AU.addRequired<TargetTransformInfoWrapperPass>();
142     AU.addRequired<TargetLibraryInfoWrapperPass>();
143   }
144 
145   /// The IR fed into RewriteStatepointsForGC may have had attributes and
146   /// metadata implying dereferenceability that are no longer valid/correct after
147   /// RewriteStatepointsForGC has run. This is because semantically, after
148   /// RewriteStatepointsForGC runs, all calls to gc.statepoint "free" the entire
149   /// heap. stripNonValidData (conservatively) restores
150   /// correctness by erasing all attributes in the module that externally imply
151   /// dereferenceability. Similar reasoning also applies to the noalias
152   /// attributes and metadata. gc.statepoint can touch the entire heap including
153   /// noalias objects.
154   /// Apart from attributes and metadata, we also remove instructions that imply
155   /// constant physical memory: llvm.invariant.start.
156   void stripNonValidData(Module &M);
157 
158   // Helpers for stripNonValidData
159   void stripNonValidDataFromBody(Function &F);
160   void stripNonValidAttributesFromPrototype(Function &F);
161 
162   // Certain metadata on instructions are invalid after running RS4GC.
163   // Optimizations that run after RS4GC can incorrectly use this metadata to
164   // optimize functions. We drop such metadata on the instruction.
165   void stripInvalidMetadataFromInstruction(Instruction &I);
166 };
167 
168 } // end anonymous namespace
169 
170 char RewriteStatepointsForGC::ID = 0;
171 
172 ModulePass *llvm::createRewriteStatepointsForGCPass() {
173   return new RewriteStatepointsForGC();
174 }
175 
176 INITIALIZE_PASS_BEGIN(RewriteStatepointsForGC, "rewrite-statepoints-for-gc",
177                       "Make relocations explicit at statepoints", false, false)
178 INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass)
179 INITIALIZE_PASS_DEPENDENCY(TargetTransformInfoWrapperPass)
180 INITIALIZE_PASS_END(RewriteStatepointsForGC, "rewrite-statepoints-for-gc",
181                     "Make relocations explicit at statepoints", false, false)
182 
183 namespace {
184 
185 struct GCPtrLivenessData {
186   /// Values defined in this block.
187   MapVector<BasicBlock *, SetVector<Value *>> KillSet;
188 
189   /// Values used in this block (and thus live); does not included values
190   /// killed within this block.
191   MapVector<BasicBlock *, SetVector<Value *>> LiveSet;
192 
193   /// Values live into this basic block (i.e. used by any
194   /// instruction in this basic block or ones reachable from here)
195   MapVector<BasicBlock *, SetVector<Value *>> LiveIn;
196 
197   /// Values live out of this basic block (i.e. live into
198   /// any successor block)
199   MapVector<BasicBlock *, SetVector<Value *>> LiveOut;
200 };
201 
202 // The type of the internal cache used inside the findBasePointers family
203 // of functions.  From the callers perspective, this is an opaque type and
204 // should not be inspected.
205 //
206 // In the actual implementation this caches two relations:
207 // - The base relation itself (i.e. this pointer is based on that one)
208 // - The base defining value relation (i.e. before base_phi insertion)
209 // Generally, after the execution of a full findBasePointer call, only the
210 // base relation will remain.  Internally, we add a mixture of the two
211 // types, then update all the second type to the first type
212 using DefiningValueMapTy = MapVector<Value *, Value *>;
213 using StatepointLiveSetTy = SetVector<Value *>;
214 using RematerializedValueMapTy =
215     MapVector<AssertingVH<Instruction>, AssertingVH<Value>>;
216 
217 struct PartiallyConstructedSafepointRecord {
218   /// The set of values known to be live across this safepoint
219   StatepointLiveSetTy LiveSet;
220 
221   /// Mapping from live pointers to a base-defining-value
222   MapVector<Value *, Value *> PointerToBase;
223 
224   /// The *new* gc.statepoint instruction itself.  This produces the token
225   /// that normal path gc.relocates and the gc.result are tied to.
226   Instruction *StatepointToken;
227 
228   /// Instruction to which exceptional gc relocates are attached
229   /// Makes it easier to iterate through them during relocationViaAlloca.
230   Instruction *UnwindToken;
231 
232   /// Record live values we are rematerialized instead of relocating.
233   /// They are not included into 'LiveSet' field.
234   /// Maps rematerialized copy to it's original value.
235   RematerializedValueMapTy RematerializedValues;
236 };
237 
238 } // end anonymous namespace
239 
240 static ArrayRef<Use> GetDeoptBundleOperands(ImmutableCallSite CS) {
241   Optional<OperandBundleUse> DeoptBundle =
242       CS.getOperandBundle(LLVMContext::OB_deopt);
243 
244   if (!DeoptBundle.hasValue()) {
245     assert(AllowStatepointWithNoDeoptInfo &&
246            "Found non-leaf call without deopt info!");
247     return None;
248   }
249 
250   return DeoptBundle.getValue().Inputs;
251 }
252 
253 /// Compute the live-in set for every basic block in the function
254 static void computeLiveInValues(DominatorTree &DT, Function &F,
255                                 GCPtrLivenessData &Data);
256 
257 /// Given results from the dataflow liveness computation, find the set of live
258 /// Values at a particular instruction.
259 static void findLiveSetAtInst(Instruction *inst, GCPtrLivenessData &Data,
260                               StatepointLiveSetTy &out);
261 
262 // TODO: Once we can get to the GCStrategy, this becomes
263 // Optional<bool> isGCManagedPointer(const Type *Ty) const override {
264 
265 static bool isGCPointerType(Type *T) {
266   if (auto *PT = dyn_cast<PointerType>(T))
267     // For the sake of this example GC, we arbitrarily pick addrspace(1) as our
268     // GC managed heap.  We know that a pointer into this heap needs to be
269     // updated and that no other pointer does.
270     return PT->getAddressSpace() == 1;
271   return false;
272 }
273 
274 // Return true if this type is one which a) is a gc pointer or contains a GC
275 // pointer and b) is of a type this code expects to encounter as a live value.
276 // (The insertion code will assert that a type which matches (a) and not (b)
277 // is not encountered.)
278 static bool isHandledGCPointerType(Type *T) {
279   // We fully support gc pointers
280   if (isGCPointerType(T))
281     return true;
282   // We partially support vectors of gc pointers. The code will assert if it
283   // can't handle something.
284   if (auto VT = dyn_cast<VectorType>(T))
285     if (isGCPointerType(VT->getElementType()))
286       return true;
287   return false;
288 }
289 
290 #ifndef NDEBUG
291 /// Returns true if this type contains a gc pointer whether we know how to
292 /// handle that type or not.
293 static bool containsGCPtrType(Type *Ty) {
294   if (isGCPointerType(Ty))
295     return true;
296   if (VectorType *VT = dyn_cast<VectorType>(Ty))
297     return isGCPointerType(VT->getScalarType());
298   if (ArrayType *AT = dyn_cast<ArrayType>(Ty))
299     return containsGCPtrType(AT->getElementType());
300   if (StructType *ST = dyn_cast<StructType>(Ty))
301     return llvm::any_of(ST->subtypes(), containsGCPtrType);
302   return false;
303 }
304 
305 // Returns true if this is a type which a) is a gc pointer or contains a GC
306 // pointer and b) is of a type which the code doesn't expect (i.e. first class
307 // aggregates).  Used to trip assertions.
308 static bool isUnhandledGCPointerType(Type *Ty) {
309   return containsGCPtrType(Ty) && !isHandledGCPointerType(Ty);
310 }
311 #endif
312 
313 // Return the name of the value suffixed with the provided value, or if the
314 // value didn't have a name, the default value specified.
315 static std::string suffixed_name_or(Value *V, StringRef Suffix,
316                                     StringRef DefaultName) {
317   return V->hasName() ? (V->getName() + Suffix).str() : DefaultName.str();
318 }
319 
320 // Conservatively identifies any definitions which might be live at the
321 // given instruction. The  analysis is performed immediately before the
322 // given instruction. Values defined by that instruction are not considered
323 // live.  Values used by that instruction are considered live.
324 static void
325 analyzeParsePointLiveness(DominatorTree &DT,
326                           GCPtrLivenessData &OriginalLivenessData, CallSite CS,
327                           PartiallyConstructedSafepointRecord &Result) {
328   Instruction *Inst = CS.getInstruction();
329 
330   StatepointLiveSetTy LiveSet;
331   findLiveSetAtInst(Inst, OriginalLivenessData, LiveSet);
332 
333   if (PrintLiveSet) {
334     dbgs() << "Live Variables:\n";
335     for (Value *V : LiveSet)
336       dbgs() << " " << V->getName() << " " << *V << "\n";
337   }
338   if (PrintLiveSetSize) {
339     dbgs() << "Safepoint For: " << CS.getCalledValue()->getName() << "\n";
340     dbgs() << "Number live values: " << LiveSet.size() << "\n";
341   }
342   Result.LiveSet = LiveSet;
343 }
344 
345 static bool isKnownBaseResult(Value *V);
346 
347 namespace {
348 
349 /// A single base defining value - An immediate base defining value for an
350 /// instruction 'Def' is an input to 'Def' whose base is also a base of 'Def'.
351 /// For instructions which have multiple pointer [vector] inputs or that
352 /// transition between vector and scalar types, there is no immediate base
353 /// defining value.  The 'base defining value' for 'Def' is the transitive
354 /// closure of this relation stopping at the first instruction which has no
355 /// immediate base defining value.  The b.d.v. might itself be a base pointer,
356 /// but it can also be an arbitrary derived pointer.
357 struct BaseDefiningValueResult {
358   /// Contains the value which is the base defining value.
359   Value * const BDV;
360 
361   /// True if the base defining value is also known to be an actual base
362   /// pointer.
363   const bool IsKnownBase;
364 
365   BaseDefiningValueResult(Value *BDV, bool IsKnownBase)
366     : BDV(BDV), IsKnownBase(IsKnownBase) {
367 #ifndef NDEBUG
368     // Check consistency between new and old means of checking whether a BDV is
369     // a base.
370     bool MustBeBase = isKnownBaseResult(BDV);
371     assert(!MustBeBase || MustBeBase == IsKnownBase);
372 #endif
373   }
374 };
375 
376 } // end anonymous namespace
377 
378 static BaseDefiningValueResult findBaseDefiningValue(Value *I);
379 
380 /// Return a base defining value for the 'Index' element of the given vector
381 /// instruction 'I'.  If Index is null, returns a BDV for the entire vector
382 /// 'I'.  As an optimization, this method will try to determine when the
383 /// element is known to already be a base pointer.  If this can be established,
384 /// the second value in the returned pair will be true.  Note that either a
385 /// vector or a pointer typed value can be returned.  For the former, the
386 /// vector returned is a BDV (and possibly a base) of the entire vector 'I'.
387 /// If the later, the return pointer is a BDV (or possibly a base) for the
388 /// particular element in 'I'.
389 static BaseDefiningValueResult
390 findBaseDefiningValueOfVector(Value *I) {
391   // Each case parallels findBaseDefiningValue below, see that code for
392   // detailed motivation.
393 
394   if (isa<Argument>(I))
395     // An incoming argument to the function is a base pointer
396     return BaseDefiningValueResult(I, true);
397 
398   if (isa<Constant>(I))
399     // Base of constant vector consists only of constant null pointers.
400     // For reasoning see similar case inside 'findBaseDefiningValue' function.
401     return BaseDefiningValueResult(ConstantAggregateZero::get(I->getType()),
402                                    true);
403 
404   if (isa<LoadInst>(I))
405     return BaseDefiningValueResult(I, true);
406 
407   if (isa<InsertElementInst>(I))
408     // We don't know whether this vector contains entirely base pointers or
409     // not.  To be conservatively correct, we treat it as a BDV and will
410     // duplicate code as needed to construct a parallel vector of bases.
411     return BaseDefiningValueResult(I, false);
412 
413   if (isa<ShuffleVectorInst>(I))
414     // We don't know whether this vector contains entirely base pointers or
415     // not.  To be conservatively correct, we treat it as a BDV and will
416     // duplicate code as needed to construct a parallel vector of bases.
417     // TODO: There a number of local optimizations which could be applied here
418     // for particular sufflevector patterns.
419     return BaseDefiningValueResult(I, false);
420 
421   // The behavior of getelementptr instructions is the same for vector and
422   // non-vector data types.
423   if (auto *GEP = dyn_cast<GetElementPtrInst>(I))
424     return findBaseDefiningValue(GEP->getPointerOperand());
425 
426   // If the pointer comes through a bitcast of a vector of pointers to
427   // a vector of another type of pointer, then look through the bitcast
428   if (auto *BC = dyn_cast<BitCastInst>(I))
429     return findBaseDefiningValue(BC->getOperand(0));
430 
431   // A PHI or Select is a base defining value.  The outer findBasePointer
432   // algorithm is responsible for constructing a base value for this BDV.
433   assert((isa<SelectInst>(I) || isa<PHINode>(I)) &&
434          "unknown vector instruction - no base found for vector element");
435   return BaseDefiningValueResult(I, false);
436 }
437 
438 /// Helper function for findBasePointer - Will return a value which either a)
439 /// defines the base pointer for the input, b) blocks the simple search
440 /// (i.e. a PHI or Select of two derived pointers), or c) involves a change
441 /// from pointer to vector type or back.
442 static BaseDefiningValueResult findBaseDefiningValue(Value *I) {
443   assert(I->getType()->isPtrOrPtrVectorTy() &&
444          "Illegal to ask for the base pointer of a non-pointer type");
445 
446   if (I->getType()->isVectorTy())
447     return findBaseDefiningValueOfVector(I);
448 
449   if (isa<Argument>(I))
450     // An incoming argument to the function is a base pointer
451     // We should have never reached here if this argument isn't an gc value
452     return BaseDefiningValueResult(I, true);
453 
454   if (isa<Constant>(I)) {
455     // We assume that objects with a constant base (e.g. a global) can't move
456     // and don't need to be reported to the collector because they are always
457     // live. Besides global references, all kinds of constants (e.g. undef,
458     // constant expressions, null pointers) can be introduced by the inliner or
459     // the optimizer, especially on dynamically dead paths.
460     // Here we treat all of them as having single null base. By doing this we
461     // trying to avoid problems reporting various conflicts in a form of
462     // "phi (const1, const2)" or "phi (const, regular gc ptr)".
463     // See constant.ll file for relevant test cases.
464 
465     return BaseDefiningValueResult(
466         ConstantPointerNull::get(cast<PointerType>(I->getType())), true);
467   }
468 
469   if (CastInst *CI = dyn_cast<CastInst>(I)) {
470     Value *Def = CI->stripPointerCasts();
471     // If stripping pointer casts changes the address space there is an
472     // addrspacecast in between.
473     assert(cast<PointerType>(Def->getType())->getAddressSpace() ==
474                cast<PointerType>(CI->getType())->getAddressSpace() &&
475            "unsupported addrspacecast");
476     // If we find a cast instruction here, it means we've found a cast which is
477     // not simply a pointer cast (i.e. an inttoptr).  We don't know how to
478     // handle int->ptr conversion.
479     assert(!isa<CastInst>(Def) && "shouldn't find another cast here");
480     return findBaseDefiningValue(Def);
481   }
482 
483   if (isa<LoadInst>(I))
484     // The value loaded is an gc base itself
485     return BaseDefiningValueResult(I, true);
486 
487   if (GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(I))
488     // The base of this GEP is the base
489     return findBaseDefiningValue(GEP->getPointerOperand());
490 
491   if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(I)) {
492     switch (II->getIntrinsicID()) {
493     default:
494       // fall through to general call handling
495       break;
496     case Intrinsic::experimental_gc_statepoint:
497       llvm_unreachable("statepoints don't produce pointers");
498     case Intrinsic::experimental_gc_relocate:
499       // Rerunning safepoint insertion after safepoints are already
500       // inserted is not supported.  It could probably be made to work,
501       // but why are you doing this?  There's no good reason.
502       llvm_unreachable("repeat safepoint insertion is not supported");
503     case Intrinsic::gcroot:
504       // Currently, this mechanism hasn't been extended to work with gcroot.
505       // There's no reason it couldn't be, but I haven't thought about the
506       // implications much.
507       llvm_unreachable(
508           "interaction with the gcroot mechanism is not supported");
509     }
510   }
511   // We assume that functions in the source language only return base
512   // pointers.  This should probably be generalized via attributes to support
513   // both source language and internal functions.
514   if (isa<CallInst>(I) || isa<InvokeInst>(I))
515     return BaseDefiningValueResult(I, true);
516 
517   // TODO: I have absolutely no idea how to implement this part yet.  It's not
518   // necessarily hard, I just haven't really looked at it yet.
519   assert(!isa<LandingPadInst>(I) && "Landing Pad is unimplemented");
520 
521   if (isa<AtomicCmpXchgInst>(I))
522     // A CAS is effectively a atomic store and load combined under a
523     // predicate.  From the perspective of base pointers, we just treat it
524     // like a load.
525     return BaseDefiningValueResult(I, true);
526 
527   assert(!isa<AtomicRMWInst>(I) && "Xchg handled above, all others are "
528                                    "binary ops which don't apply to pointers");
529 
530   // The aggregate ops.  Aggregates can either be in the heap or on the
531   // stack, but in either case, this is simply a field load.  As a result,
532   // this is a defining definition of the base just like a load is.
533   if (isa<ExtractValueInst>(I))
534     return BaseDefiningValueResult(I, true);
535 
536   // We should never see an insert vector since that would require we be
537   // tracing back a struct value not a pointer value.
538   assert(!isa<InsertValueInst>(I) &&
539          "Base pointer for a struct is meaningless");
540 
541   // An extractelement produces a base result exactly when it's input does.
542   // We may need to insert a parallel instruction to extract the appropriate
543   // element out of the base vector corresponding to the input. Given this,
544   // it's analogous to the phi and select case even though it's not a merge.
545   if (isa<ExtractElementInst>(I))
546     // Note: There a lot of obvious peephole cases here.  This are deliberately
547     // handled after the main base pointer inference algorithm to make writing
548     // test cases to exercise that code easier.
549     return BaseDefiningValueResult(I, false);
550 
551   // The last two cases here don't return a base pointer.  Instead, they
552   // return a value which dynamically selects from among several base
553   // derived pointers (each with it's own base potentially).  It's the job of
554   // the caller to resolve these.
555   assert((isa<SelectInst>(I) || isa<PHINode>(I)) &&
556          "missing instruction case in findBaseDefiningValing");
557   return BaseDefiningValueResult(I, false);
558 }
559 
560 /// Returns the base defining value for this value.
561 static Value *findBaseDefiningValueCached(Value *I, DefiningValueMapTy &Cache) {
562   Value *&Cached = Cache[I];
563   if (!Cached) {
564     Cached = findBaseDefiningValue(I).BDV;
565     DEBUG(dbgs() << "fBDV-cached: " << I->getName() << " -> "
566                  << Cached->getName() << "\n");
567   }
568   assert(Cache[I] != nullptr);
569   return Cached;
570 }
571 
572 /// Return a base pointer for this value if known.  Otherwise, return it's
573 /// base defining value.
574 static Value *findBaseOrBDV(Value *I, DefiningValueMapTy &Cache) {
575   Value *Def = findBaseDefiningValueCached(I, Cache);
576   auto Found = Cache.find(Def);
577   if (Found != Cache.end()) {
578     // Either a base-of relation, or a self reference.  Caller must check.
579     return Found->second;
580   }
581   // Only a BDV available
582   return Def;
583 }
584 
585 /// Given the result of a call to findBaseDefiningValue, or findBaseOrBDV,
586 /// is it known to be a base pointer?  Or do we need to continue searching.
587 static bool isKnownBaseResult(Value *V) {
588   if (!isa<PHINode>(V) && !isa<SelectInst>(V) &&
589       !isa<ExtractElementInst>(V) && !isa<InsertElementInst>(V) &&
590       !isa<ShuffleVectorInst>(V)) {
591     // no recursion possible
592     return true;
593   }
594   if (isa<Instruction>(V) &&
595       cast<Instruction>(V)->getMetadata("is_base_value")) {
596     // This is a previously inserted base phi or select.  We know
597     // that this is a base value.
598     return true;
599   }
600 
601   // We need to keep searching
602   return false;
603 }
604 
605 namespace {
606 
607 /// Models the state of a single base defining value in the findBasePointer
608 /// algorithm for determining where a new instruction is needed to propagate
609 /// the base of this BDV.
610 class BDVState {
611 public:
612   enum Status { Unknown, Base, Conflict };
613 
614   BDVState() : BaseValue(nullptr) {}
615 
616   explicit BDVState(Status Status, Value *BaseValue = nullptr)
617       : Status(Status), BaseValue(BaseValue) {
618     assert(Status != Base || BaseValue);
619   }
620 
621   explicit BDVState(Value *BaseValue) : Status(Base), BaseValue(BaseValue) {}
622 
623   Status getStatus() const { return Status; }
624   Value *getBaseValue() const { return BaseValue; }
625 
626   bool isBase() const { return getStatus() == Base; }
627   bool isUnknown() const { return getStatus() == Unknown; }
628   bool isConflict() const { return getStatus() == Conflict; }
629 
630   bool operator==(const BDVState &Other) const {
631     return BaseValue == Other.BaseValue && Status == Other.Status;
632   }
633 
634   bool operator!=(const BDVState &other) const { return !(*this == other); }
635 
636   LLVM_DUMP_METHOD
637   void dump() const {
638     print(dbgs());
639     dbgs() << '\n';
640   }
641 
642   void print(raw_ostream &OS) const {
643     switch (getStatus()) {
644     case Unknown:
645       OS << "U";
646       break;
647     case Base:
648       OS << "B";
649       break;
650     case Conflict:
651       OS << "C";
652       break;
653     }
654     OS << " (" << getBaseValue() << " - "
655        << (getBaseValue() ? getBaseValue()->getName() : "nullptr") << "): ";
656   }
657 
658 private:
659   Status Status = Unknown;
660   AssertingVH<Value> BaseValue; // Non-null only if Status == Base.
661 };
662 
663 } // end anonymous namespace
664 
665 #ifndef NDEBUG
666 static raw_ostream &operator<<(raw_ostream &OS, const BDVState &State) {
667   State.print(OS);
668   return OS;
669 }
670 #endif
671 
672 static BDVState meetBDVStateImpl(const BDVState &LHS, const BDVState &RHS) {
673   switch (LHS.getStatus()) {
674   case BDVState::Unknown:
675     return RHS;
676 
677   case BDVState::Base:
678     assert(LHS.getBaseValue() && "can't be null");
679     if (RHS.isUnknown())
680       return LHS;
681 
682     if (RHS.isBase()) {
683       if (LHS.getBaseValue() == RHS.getBaseValue()) {
684         assert(LHS == RHS && "equality broken!");
685         return LHS;
686       }
687       return BDVState(BDVState::Conflict);
688     }
689     assert(RHS.isConflict() && "only three states!");
690     return BDVState(BDVState::Conflict);
691 
692   case BDVState::Conflict:
693     return LHS;
694   }
695   llvm_unreachable("only three states!");
696 }
697 
698 // Values of type BDVState form a lattice, and this function implements the meet
699 // operation.
700 static BDVState meetBDVState(const BDVState &LHS, const BDVState &RHS) {
701   BDVState Result = meetBDVStateImpl(LHS, RHS);
702   assert(Result == meetBDVStateImpl(RHS, LHS) &&
703          "Math is wrong: meet does not commute!");
704   return Result;
705 }
706 
707 /// For a given value or instruction, figure out what base ptr its derived from.
708 /// For gc objects, this is simply itself.  On success, returns a value which is
709 /// the base pointer.  (This is reliable and can be used for relocation.)  On
710 /// failure, returns nullptr.
711 static Value *findBasePointer(Value *I, DefiningValueMapTy &Cache) {
712   Value *Def = findBaseOrBDV(I, Cache);
713 
714   if (isKnownBaseResult(Def))
715     return Def;
716 
717   // Here's the rough algorithm:
718   // - For every SSA value, construct a mapping to either an actual base
719   //   pointer or a PHI which obscures the base pointer.
720   // - Construct a mapping from PHI to unknown TOP state.  Use an
721   //   optimistic algorithm to propagate base pointer information.  Lattice
722   //   looks like:
723   //   UNKNOWN
724   //   b1 b2 b3 b4
725   //   CONFLICT
726   //   When algorithm terminates, all PHIs will either have a single concrete
727   //   base or be in a conflict state.
728   // - For every conflict, insert a dummy PHI node without arguments.  Add
729   //   these to the base[Instruction] = BasePtr mapping.  For every
730   //   non-conflict, add the actual base.
731   //  - For every conflict, add arguments for the base[a] of each input
732   //   arguments.
733   //
734   // Note: A simpler form of this would be to add the conflict form of all
735   // PHIs without running the optimistic algorithm.  This would be
736   // analogous to pessimistic data flow and would likely lead to an
737   // overall worse solution.
738 
739 #ifndef NDEBUG
740   auto isExpectedBDVType = [](Value *BDV) {
741     return isa<PHINode>(BDV) || isa<SelectInst>(BDV) ||
742            isa<ExtractElementInst>(BDV) || isa<InsertElementInst>(BDV) ||
743            isa<ShuffleVectorInst>(BDV);
744   };
745 #endif
746 
747   // Once populated, will contain a mapping from each potentially non-base BDV
748   // to a lattice value (described above) which corresponds to that BDV.
749   // We use the order of insertion (DFS over the def/use graph) to provide a
750   // stable deterministic ordering for visiting DenseMaps (which are unordered)
751   // below.  This is important for deterministic compilation.
752   MapVector<Value *, BDVState> States;
753 
754   // Recursively fill in all base defining values reachable from the initial
755   // one for which we don't already know a definite base value for
756   /* scope */ {
757     SmallVector<Value*, 16> Worklist;
758     Worklist.push_back(Def);
759     States.insert({Def, BDVState()});
760     while (!Worklist.empty()) {
761       Value *Current = Worklist.pop_back_val();
762       assert(!isKnownBaseResult(Current) && "why did it get added?");
763 
764       auto visitIncomingValue = [&](Value *InVal) {
765         Value *Base = findBaseOrBDV(InVal, Cache);
766         if (isKnownBaseResult(Base))
767           // Known bases won't need new instructions introduced and can be
768           // ignored safely
769           return;
770         assert(isExpectedBDVType(Base) && "the only non-base values "
771                "we see should be base defining values");
772         if (States.insert(std::make_pair(Base, BDVState())).second)
773           Worklist.push_back(Base);
774       };
775       if (PHINode *PN = dyn_cast<PHINode>(Current)) {
776         for (Value *InVal : PN->incoming_values())
777           visitIncomingValue(InVal);
778       } else if (SelectInst *SI = dyn_cast<SelectInst>(Current)) {
779         visitIncomingValue(SI->getTrueValue());
780         visitIncomingValue(SI->getFalseValue());
781       } else if (auto *EE = dyn_cast<ExtractElementInst>(Current)) {
782         visitIncomingValue(EE->getVectorOperand());
783       } else if (auto *IE = dyn_cast<InsertElementInst>(Current)) {
784         visitIncomingValue(IE->getOperand(0)); // vector operand
785         visitIncomingValue(IE->getOperand(1)); // scalar operand
786       } else if (auto *SV = dyn_cast<ShuffleVectorInst>(Current)) {
787         visitIncomingValue(SV->getOperand(0));
788         visitIncomingValue(SV->getOperand(1));
789       }
790       else {
791         llvm_unreachable("Unimplemented instruction case");
792       }
793     }
794   }
795 
796 #ifndef NDEBUG
797   DEBUG(dbgs() << "States after initialization:\n");
798   for (auto Pair : States) {
799     DEBUG(dbgs() << " " << Pair.second << " for " << *Pair.first << "\n");
800   }
801 #endif
802 
803   // Return a phi state for a base defining value.  We'll generate a new
804   // base state for known bases and expect to find a cached state otherwise.
805   auto getStateForBDV = [&](Value *baseValue) {
806     if (isKnownBaseResult(baseValue))
807       return BDVState(baseValue);
808     auto I = States.find(baseValue);
809     assert(I != States.end() && "lookup failed!");
810     return I->second;
811   };
812 
813   bool Progress = true;
814   while (Progress) {
815 #ifndef NDEBUG
816     const size_t OldSize = States.size();
817 #endif
818     Progress = false;
819     // We're only changing values in this loop, thus safe to keep iterators.
820     // Since this is computing a fixed point, the order of visit does not
821     // effect the result.  TODO: We could use a worklist here and make this run
822     // much faster.
823     for (auto Pair : States) {
824       Value *BDV = Pair.first;
825       assert(!isKnownBaseResult(BDV) && "why did it get added?");
826 
827       // Given an input value for the current instruction, return a BDVState
828       // instance which represents the BDV of that value.
829       auto getStateForInput = [&](Value *V) mutable {
830         Value *BDV = findBaseOrBDV(V, Cache);
831         return getStateForBDV(BDV);
832       };
833 
834       BDVState NewState;
835       if (SelectInst *SI = dyn_cast<SelectInst>(BDV)) {
836         NewState = meetBDVState(NewState, getStateForInput(SI->getTrueValue()));
837         NewState =
838             meetBDVState(NewState, getStateForInput(SI->getFalseValue()));
839       } else if (PHINode *PN = dyn_cast<PHINode>(BDV)) {
840         for (Value *Val : PN->incoming_values())
841           NewState = meetBDVState(NewState, getStateForInput(Val));
842       } else if (auto *EE = dyn_cast<ExtractElementInst>(BDV)) {
843         // The 'meet' for an extractelement is slightly trivial, but it's still
844         // useful in that it drives us to conflict if our input is.
845         NewState =
846             meetBDVState(NewState, getStateForInput(EE->getVectorOperand()));
847       } else if (auto *IE = dyn_cast<InsertElementInst>(BDV)){
848         // Given there's a inherent type mismatch between the operands, will
849         // *always* produce Conflict.
850         NewState = meetBDVState(NewState, getStateForInput(IE->getOperand(0)));
851         NewState = meetBDVState(NewState, getStateForInput(IE->getOperand(1)));
852       } else {
853         // The only instance this does not return a Conflict is when both the
854         // vector operands are the same vector.
855         auto *SV = cast<ShuffleVectorInst>(BDV);
856         NewState = meetBDVState(NewState, getStateForInput(SV->getOperand(0)));
857         NewState = meetBDVState(NewState, getStateForInput(SV->getOperand(1)));
858       }
859 
860       BDVState OldState = States[BDV];
861       if (OldState != NewState) {
862         Progress = true;
863         States[BDV] = NewState;
864       }
865     }
866 
867     assert(OldSize == States.size() &&
868            "fixed point shouldn't be adding any new nodes to state");
869   }
870 
871 #ifndef NDEBUG
872   DEBUG(dbgs() << "States after meet iteration:\n");
873   for (auto Pair : States) {
874     DEBUG(dbgs() << " " << Pair.second << " for " << *Pair.first << "\n");
875   }
876 #endif
877 
878   // Insert Phis for all conflicts
879   // TODO: adjust naming patterns to avoid this order of iteration dependency
880   for (auto Pair : States) {
881     Instruction *I = cast<Instruction>(Pair.first);
882     BDVState State = Pair.second;
883     assert(!isKnownBaseResult(I) && "why did it get added?");
884     assert(!State.isUnknown() && "Optimistic algorithm didn't complete!");
885 
886     // extractelement instructions are a bit special in that we may need to
887     // insert an extract even when we know an exact base for the instruction.
888     // The problem is that we need to convert from a vector base to a scalar
889     // base for the particular indice we're interested in.
890     if (State.isBase() && isa<ExtractElementInst>(I) &&
891         isa<VectorType>(State.getBaseValue()->getType())) {
892       auto *EE = cast<ExtractElementInst>(I);
893       // TODO: In many cases, the new instruction is just EE itself.  We should
894       // exploit this, but can't do it here since it would break the invariant
895       // about the BDV not being known to be a base.
896       auto *BaseInst = ExtractElementInst::Create(
897           State.getBaseValue(), EE->getIndexOperand(), "base_ee", EE);
898       BaseInst->setMetadata("is_base_value", MDNode::get(I->getContext(), {}));
899       States[I] = BDVState(BDVState::Base, BaseInst);
900     }
901 
902     // Since we're joining a vector and scalar base, they can never be the
903     // same.  As a result, we should always see insert element having reached
904     // the conflict state.
905     assert(!isa<InsertElementInst>(I) || State.isConflict());
906 
907     if (!State.isConflict())
908       continue;
909 
910     /// Create and insert a new instruction which will represent the base of
911     /// the given instruction 'I'.
912     auto MakeBaseInstPlaceholder = [](Instruction *I) -> Instruction* {
913       if (isa<PHINode>(I)) {
914         BasicBlock *BB = I->getParent();
915         int NumPreds = std::distance(pred_begin(BB), pred_end(BB));
916         assert(NumPreds > 0 && "how did we reach here");
917         std::string Name = suffixed_name_or(I, ".base", "base_phi");
918         return PHINode::Create(I->getType(), NumPreds, Name, I);
919       } else if (SelectInst *SI = dyn_cast<SelectInst>(I)) {
920         // The undef will be replaced later
921         UndefValue *Undef = UndefValue::get(SI->getType());
922         std::string Name = suffixed_name_or(I, ".base", "base_select");
923         return SelectInst::Create(SI->getCondition(), Undef, Undef, Name, SI);
924       } else if (auto *EE = dyn_cast<ExtractElementInst>(I)) {
925         UndefValue *Undef = UndefValue::get(EE->getVectorOperand()->getType());
926         std::string Name = suffixed_name_or(I, ".base", "base_ee");
927         return ExtractElementInst::Create(Undef, EE->getIndexOperand(), Name,
928                                           EE);
929       } else if (auto *IE = dyn_cast<InsertElementInst>(I)) {
930         UndefValue *VecUndef = UndefValue::get(IE->getOperand(0)->getType());
931         UndefValue *ScalarUndef = UndefValue::get(IE->getOperand(1)->getType());
932         std::string Name = suffixed_name_or(I, ".base", "base_ie");
933         return InsertElementInst::Create(VecUndef, ScalarUndef,
934                                          IE->getOperand(2), Name, IE);
935       } else {
936         auto *SV = cast<ShuffleVectorInst>(I);
937         UndefValue *VecUndef = UndefValue::get(SV->getOperand(0)->getType());
938         std::string Name = suffixed_name_or(I, ".base", "base_sv");
939         return new ShuffleVectorInst(VecUndef, VecUndef, SV->getOperand(2),
940                                      Name, SV);
941       }
942     };
943     Instruction *BaseInst = MakeBaseInstPlaceholder(I);
944     // Add metadata marking this as a base value
945     BaseInst->setMetadata("is_base_value", MDNode::get(I->getContext(), {}));
946     States[I] = BDVState(BDVState::Conflict, BaseInst);
947   }
948 
949   // Returns a instruction which produces the base pointer for a given
950   // instruction.  The instruction is assumed to be an input to one of the BDVs
951   // seen in the inference algorithm above.  As such, we must either already
952   // know it's base defining value is a base, or have inserted a new
953   // instruction to propagate the base of it's BDV and have entered that newly
954   // introduced instruction into the state table.  In either case, we are
955   // assured to be able to determine an instruction which produces it's base
956   // pointer.
957   auto getBaseForInput = [&](Value *Input, Instruction *InsertPt) {
958     Value *BDV = findBaseOrBDV(Input, Cache);
959     Value *Base = nullptr;
960     if (isKnownBaseResult(BDV)) {
961       Base = BDV;
962     } else {
963       // Either conflict or base.
964       assert(States.count(BDV));
965       Base = States[BDV].getBaseValue();
966     }
967     assert(Base && "Can't be null");
968     // The cast is needed since base traversal may strip away bitcasts
969     if (Base->getType() != Input->getType() && InsertPt)
970       Base = new BitCastInst(Base, Input->getType(), "cast", InsertPt);
971     return Base;
972   };
973 
974   // Fixup all the inputs of the new PHIs.  Visit order needs to be
975   // deterministic and predictable because we're naming newly created
976   // instructions.
977   for (auto Pair : States) {
978     Instruction *BDV = cast<Instruction>(Pair.first);
979     BDVState State = Pair.second;
980 
981     assert(!isKnownBaseResult(BDV) && "why did it get added?");
982     assert(!State.isUnknown() && "Optimistic algorithm didn't complete!");
983     if (!State.isConflict())
984       continue;
985 
986     if (PHINode *BasePHI = dyn_cast<PHINode>(State.getBaseValue())) {
987       PHINode *PN = cast<PHINode>(BDV);
988       unsigned NumPHIValues = PN->getNumIncomingValues();
989       for (unsigned i = 0; i < NumPHIValues; i++) {
990         Value *InVal = PN->getIncomingValue(i);
991         BasicBlock *InBB = PN->getIncomingBlock(i);
992 
993         // If we've already seen InBB, add the same incoming value
994         // we added for it earlier.  The IR verifier requires phi
995         // nodes with multiple entries from the same basic block
996         // to have the same incoming value for each of those
997         // entries.  If we don't do this check here and basephi
998         // has a different type than base, we'll end up adding two
999         // bitcasts (and hence two distinct values) as incoming
1000         // values for the same basic block.
1001 
1002         int BlockIndex = BasePHI->getBasicBlockIndex(InBB);
1003         if (BlockIndex != -1) {
1004           Value *OldBase = BasePHI->getIncomingValue(BlockIndex);
1005           BasePHI->addIncoming(OldBase, InBB);
1006 
1007 #ifndef NDEBUG
1008           Value *Base = getBaseForInput(InVal, nullptr);
1009           // In essence this assert states: the only way two values
1010           // incoming from the same basic block may be different is by
1011           // being different bitcasts of the same value.  A cleanup
1012           // that remains TODO is changing findBaseOrBDV to return an
1013           // llvm::Value of the correct type (and still remain pure).
1014           // This will remove the need to add bitcasts.
1015           assert(Base->stripPointerCasts() == OldBase->stripPointerCasts() &&
1016                  "Sanity -- findBaseOrBDV should be pure!");
1017 #endif
1018           continue;
1019         }
1020 
1021         // Find the instruction which produces the base for each input.  We may
1022         // need to insert a bitcast in the incoming block.
1023         // TODO: Need to split critical edges if insertion is needed
1024         Value *Base = getBaseForInput(InVal, InBB->getTerminator());
1025         BasePHI->addIncoming(Base, InBB);
1026       }
1027       assert(BasePHI->getNumIncomingValues() == NumPHIValues);
1028     } else if (SelectInst *BaseSI =
1029                    dyn_cast<SelectInst>(State.getBaseValue())) {
1030       SelectInst *SI = cast<SelectInst>(BDV);
1031 
1032       // Find the instruction which produces the base for each input.
1033       // We may need to insert a bitcast.
1034       BaseSI->setTrueValue(getBaseForInput(SI->getTrueValue(), BaseSI));
1035       BaseSI->setFalseValue(getBaseForInput(SI->getFalseValue(), BaseSI));
1036     } else if (auto *BaseEE =
1037                    dyn_cast<ExtractElementInst>(State.getBaseValue())) {
1038       Value *InVal = cast<ExtractElementInst>(BDV)->getVectorOperand();
1039       // Find the instruction which produces the base for each input.  We may
1040       // need to insert a bitcast.
1041       BaseEE->setOperand(0, getBaseForInput(InVal, BaseEE));
1042     } else if (auto *BaseIE = dyn_cast<InsertElementInst>(State.getBaseValue())){
1043       auto *BdvIE = cast<InsertElementInst>(BDV);
1044       auto UpdateOperand = [&](int OperandIdx) {
1045         Value *InVal = BdvIE->getOperand(OperandIdx);
1046         Value *Base = getBaseForInput(InVal, BaseIE);
1047         BaseIE->setOperand(OperandIdx, Base);
1048       };
1049       UpdateOperand(0); // vector operand
1050       UpdateOperand(1); // scalar operand
1051     } else {
1052       auto *BaseSV = cast<ShuffleVectorInst>(State.getBaseValue());
1053       auto *BdvSV = cast<ShuffleVectorInst>(BDV);
1054       auto UpdateOperand = [&](int OperandIdx) {
1055         Value *InVal = BdvSV->getOperand(OperandIdx);
1056         Value *Base = getBaseForInput(InVal, BaseSV);
1057         BaseSV->setOperand(OperandIdx, Base);
1058       };
1059       UpdateOperand(0); // vector operand
1060       UpdateOperand(1); // vector operand
1061     }
1062   }
1063 
1064   // Cache all of our results so we can cheaply reuse them
1065   // NOTE: This is actually two caches: one of the base defining value
1066   // relation and one of the base pointer relation!  FIXME
1067   for (auto Pair : States) {
1068     auto *BDV = Pair.first;
1069     Value *Base = Pair.second.getBaseValue();
1070     assert(BDV && Base);
1071     assert(!isKnownBaseResult(BDV) && "why did it get added?");
1072 
1073     DEBUG(dbgs() << "Updating base value cache"
1074                  << " for: " << BDV->getName() << " from: "
1075                  << (Cache.count(BDV) ? Cache[BDV]->getName().str() : "none")
1076                  << " to: " << Base->getName() << "\n");
1077 
1078     if (Cache.count(BDV)) {
1079       assert(isKnownBaseResult(Base) &&
1080              "must be something we 'know' is a base pointer");
1081       // Once we transition from the BDV relation being store in the Cache to
1082       // the base relation being stored, it must be stable
1083       assert((!isKnownBaseResult(Cache[BDV]) || Cache[BDV] == Base) &&
1084              "base relation should be stable");
1085     }
1086     Cache[BDV] = Base;
1087   }
1088   assert(Cache.count(Def));
1089   return Cache[Def];
1090 }
1091 
1092 // For a set of live pointers (base and/or derived), identify the base
1093 // pointer of the object which they are derived from.  This routine will
1094 // mutate the IR graph as needed to make the 'base' pointer live at the
1095 // definition site of 'derived'.  This ensures that any use of 'derived' can
1096 // also use 'base'.  This may involve the insertion of a number of
1097 // additional PHI nodes.
1098 //
1099 // preconditions: live is a set of pointer type Values
1100 //
1101 // side effects: may insert PHI nodes into the existing CFG, will preserve
1102 // CFG, will not remove or mutate any existing nodes
1103 //
1104 // post condition: PointerToBase contains one (derived, base) pair for every
1105 // pointer in live.  Note that derived can be equal to base if the original
1106 // pointer was a base pointer.
1107 static void
1108 findBasePointers(const StatepointLiveSetTy &live,
1109                  MapVector<Value *, Value *> &PointerToBase,
1110                  DominatorTree *DT, DefiningValueMapTy &DVCache) {
1111   for (Value *ptr : live) {
1112     Value *base = findBasePointer(ptr, DVCache);
1113     assert(base && "failed to find base pointer");
1114     PointerToBase[ptr] = base;
1115     assert((!isa<Instruction>(base) || !isa<Instruction>(ptr) ||
1116             DT->dominates(cast<Instruction>(base)->getParent(),
1117                           cast<Instruction>(ptr)->getParent())) &&
1118            "The base we found better dominate the derived pointer");
1119   }
1120 }
1121 
1122 /// Find the required based pointers (and adjust the live set) for the given
1123 /// parse point.
1124 static void findBasePointers(DominatorTree &DT, DefiningValueMapTy &DVCache,
1125                              CallSite CS,
1126                              PartiallyConstructedSafepointRecord &result) {
1127   MapVector<Value *, Value *> PointerToBase;
1128   findBasePointers(result.LiveSet, PointerToBase, &DT, DVCache);
1129 
1130   if (PrintBasePointers) {
1131     errs() << "Base Pairs (w/o Relocation):\n";
1132     for (auto &Pair : PointerToBase) {
1133       errs() << " derived ";
1134       Pair.first->printAsOperand(errs(), false);
1135       errs() << " base ";
1136       Pair.second->printAsOperand(errs(), false);
1137       errs() << "\n";;
1138     }
1139   }
1140 
1141   result.PointerToBase = PointerToBase;
1142 }
1143 
1144 /// Given an updated version of the dataflow liveness results, update the
1145 /// liveset and base pointer maps for the call site CS.
1146 static void recomputeLiveInValues(GCPtrLivenessData &RevisedLivenessData,
1147                                   CallSite CS,
1148                                   PartiallyConstructedSafepointRecord &result);
1149 
1150 static void recomputeLiveInValues(
1151     Function &F, DominatorTree &DT, ArrayRef<CallSite> toUpdate,
1152     MutableArrayRef<struct PartiallyConstructedSafepointRecord> records) {
1153   // TODO-PERF: reuse the original liveness, then simply run the dataflow
1154   // again.  The old values are still live and will help it stabilize quickly.
1155   GCPtrLivenessData RevisedLivenessData;
1156   computeLiveInValues(DT, F, RevisedLivenessData);
1157   for (size_t i = 0; i < records.size(); i++) {
1158     struct PartiallyConstructedSafepointRecord &info = records[i];
1159     recomputeLiveInValues(RevisedLivenessData, toUpdate[i], info);
1160   }
1161 }
1162 
1163 // When inserting gc.relocate and gc.result calls, we need to ensure there are
1164 // no uses of the original value / return value between the gc.statepoint and
1165 // the gc.relocate / gc.result call.  One case which can arise is a phi node
1166 // starting one of the successor blocks.  We also need to be able to insert the
1167 // gc.relocates only on the path which goes through the statepoint.  We might
1168 // need to split an edge to make this possible.
1169 static BasicBlock *
1170 normalizeForInvokeSafepoint(BasicBlock *BB, BasicBlock *InvokeParent,
1171                             DominatorTree &DT) {
1172   BasicBlock *Ret = BB;
1173   if (!BB->getUniquePredecessor())
1174     Ret = SplitBlockPredecessors(BB, InvokeParent, "", &DT);
1175 
1176   // Now that 'Ret' has unique predecessor we can safely remove all phi nodes
1177   // from it
1178   FoldSingleEntryPHINodes(Ret);
1179   assert(!isa<PHINode>(Ret->begin()) &&
1180          "All PHI nodes should have been removed!");
1181 
1182   // At this point, we can safely insert a gc.relocate or gc.result as the first
1183   // instruction in Ret if needed.
1184   return Ret;
1185 }
1186 
1187 // Create new attribute set containing only attributes which can be transferred
1188 // from original call to the safepoint.
1189 static AttributeList legalizeCallAttributes(AttributeList AL) {
1190   if (AL.isEmpty())
1191     return AL;
1192 
1193   // Remove the readonly, readnone, and statepoint function attributes.
1194   AttrBuilder FnAttrs = AL.getFnAttributes();
1195   FnAttrs.removeAttribute(Attribute::ReadNone);
1196   FnAttrs.removeAttribute(Attribute::ReadOnly);
1197   for (Attribute A : AL.getFnAttributes()) {
1198     if (isStatepointDirectiveAttr(A))
1199       FnAttrs.remove(A);
1200   }
1201 
1202   // Just skip parameter and return attributes for now
1203   LLVMContext &Ctx = AL.getContext();
1204   return AttributeList::get(Ctx, AttributeList::FunctionIndex,
1205                             AttributeSet::get(Ctx, FnAttrs));
1206 }
1207 
1208 /// Helper function to place all gc relocates necessary for the given
1209 /// statepoint.
1210 /// Inputs:
1211 ///   liveVariables - list of variables to be relocated.
1212 ///   liveStart - index of the first live variable.
1213 ///   basePtrs - base pointers.
1214 ///   statepointToken - statepoint instruction to which relocates should be
1215 ///   bound.
1216 ///   Builder - Llvm IR builder to be used to construct new calls.
1217 static void CreateGCRelocates(ArrayRef<Value *> LiveVariables,
1218                               const int LiveStart,
1219                               ArrayRef<Value *> BasePtrs,
1220                               Instruction *StatepointToken,
1221                               IRBuilder<> Builder) {
1222   if (LiveVariables.empty())
1223     return;
1224 
1225   auto FindIndex = [](ArrayRef<Value *> LiveVec, Value *Val) {
1226     auto ValIt = llvm::find(LiveVec, Val);
1227     assert(ValIt != LiveVec.end() && "Val not found in LiveVec!");
1228     size_t Index = std::distance(LiveVec.begin(), ValIt);
1229     assert(Index < LiveVec.size() && "Bug in std::find?");
1230     return Index;
1231   };
1232   Module *M = StatepointToken->getModule();
1233 
1234   // All gc_relocate are generated as i8 addrspace(1)* (or a vector type whose
1235   // element type is i8 addrspace(1)*). We originally generated unique
1236   // declarations for each pointer type, but this proved problematic because
1237   // the intrinsic mangling code is incomplete and fragile.  Since we're moving
1238   // towards a single unified pointer type anyways, we can just cast everything
1239   // to an i8* of the right address space.  A bitcast is added later to convert
1240   // gc_relocate to the actual value's type.
1241   auto getGCRelocateDecl = [&] (Type *Ty) {
1242     assert(isHandledGCPointerType(Ty));
1243     auto AS = Ty->getScalarType()->getPointerAddressSpace();
1244     Type *NewTy = Type::getInt8PtrTy(M->getContext(), AS);
1245     if (auto *VT = dyn_cast<VectorType>(Ty))
1246       NewTy = VectorType::get(NewTy, VT->getNumElements());
1247     return Intrinsic::getDeclaration(M, Intrinsic::experimental_gc_relocate,
1248                                      {NewTy});
1249   };
1250 
1251   // Lazily populated map from input types to the canonicalized form mentioned
1252   // in the comment above.  This should probably be cached somewhere more
1253   // broadly.
1254   DenseMap<Type*, Value*> TypeToDeclMap;
1255 
1256   for (unsigned i = 0; i < LiveVariables.size(); i++) {
1257     // Generate the gc.relocate call and save the result
1258     Value *BaseIdx =
1259       Builder.getInt32(LiveStart + FindIndex(LiveVariables, BasePtrs[i]));
1260     Value *LiveIdx = Builder.getInt32(LiveStart + i);
1261 
1262     Type *Ty = LiveVariables[i]->getType();
1263     if (!TypeToDeclMap.count(Ty))
1264       TypeToDeclMap[Ty] = getGCRelocateDecl(Ty);
1265     Value *GCRelocateDecl = TypeToDeclMap[Ty];
1266 
1267     // only specify a debug name if we can give a useful one
1268     CallInst *Reloc = Builder.CreateCall(
1269         GCRelocateDecl, {StatepointToken, BaseIdx, LiveIdx},
1270         suffixed_name_or(LiveVariables[i], ".relocated", ""));
1271     // Trick CodeGen into thinking there are lots of free registers at this
1272     // fake call.
1273     Reloc->setCallingConv(CallingConv::Cold);
1274   }
1275 }
1276 
1277 namespace {
1278 
1279 /// This struct is used to defer RAUWs and `eraseFromParent` s.  Using this
1280 /// avoids having to worry about keeping around dangling pointers to Values.
1281 class DeferredReplacement {
1282   AssertingVH<Instruction> Old;
1283   AssertingVH<Instruction> New;
1284   bool IsDeoptimize = false;
1285 
1286   DeferredReplacement() = default;
1287 
1288 public:
1289   static DeferredReplacement createRAUW(Instruction *Old, Instruction *New) {
1290     assert(Old != New && Old && New &&
1291            "Cannot RAUW equal values or to / from null!");
1292 
1293     DeferredReplacement D;
1294     D.Old = Old;
1295     D.New = New;
1296     return D;
1297   }
1298 
1299   static DeferredReplacement createDelete(Instruction *ToErase) {
1300     DeferredReplacement D;
1301     D.Old = ToErase;
1302     return D;
1303   }
1304 
1305   static DeferredReplacement createDeoptimizeReplacement(Instruction *Old) {
1306 #ifndef NDEBUG
1307     auto *F = cast<CallInst>(Old)->getCalledFunction();
1308     assert(F && F->getIntrinsicID() == Intrinsic::experimental_deoptimize &&
1309            "Only way to construct a deoptimize deferred replacement");
1310 #endif
1311     DeferredReplacement D;
1312     D.Old = Old;
1313     D.IsDeoptimize = true;
1314     return D;
1315   }
1316 
1317   /// Does the task represented by this instance.
1318   void doReplacement() {
1319     Instruction *OldI = Old;
1320     Instruction *NewI = New;
1321 
1322     assert(OldI != NewI && "Disallowed at construction?!");
1323     assert((!IsDeoptimize || !New) &&
1324            "Deoptimize instrinsics are not replaced!");
1325 
1326     Old = nullptr;
1327     New = nullptr;
1328 
1329     if (NewI)
1330       OldI->replaceAllUsesWith(NewI);
1331 
1332     if (IsDeoptimize) {
1333       // Note: we've inserted instructions, so the call to llvm.deoptimize may
1334       // not necessarilly be followed by the matching return.
1335       auto *RI = cast<ReturnInst>(OldI->getParent()->getTerminator());
1336       new UnreachableInst(RI->getContext(), RI);
1337       RI->eraseFromParent();
1338     }
1339 
1340     OldI->eraseFromParent();
1341   }
1342 };
1343 
1344 } // end anonymous namespace
1345 
1346 static StringRef getDeoptLowering(CallSite CS) {
1347   const char *DeoptLowering = "deopt-lowering";
1348   if (CS.hasFnAttr(DeoptLowering)) {
1349     // FIXME: CallSite has a *really* confusing interface around attributes
1350     // with values.
1351     const AttributeList &CSAS = CS.getAttributes();
1352     if (CSAS.hasAttribute(AttributeList::FunctionIndex, DeoptLowering))
1353       return CSAS.getAttribute(AttributeList::FunctionIndex, DeoptLowering)
1354           .getValueAsString();
1355     Function *F = CS.getCalledFunction();
1356     assert(F && F->hasFnAttribute(DeoptLowering));
1357     return F->getFnAttribute(DeoptLowering).getValueAsString();
1358   }
1359   return "live-through";
1360 }
1361 
1362 static void
1363 makeStatepointExplicitImpl(const CallSite CS, /* to replace */
1364                            const SmallVectorImpl<Value *> &BasePtrs,
1365                            const SmallVectorImpl<Value *> &LiveVariables,
1366                            PartiallyConstructedSafepointRecord &Result,
1367                            std::vector<DeferredReplacement> &Replacements) {
1368   assert(BasePtrs.size() == LiveVariables.size());
1369 
1370   // Then go ahead and use the builder do actually do the inserts.  We insert
1371   // immediately before the previous instruction under the assumption that all
1372   // arguments will be available here.  We can't insert afterwards since we may
1373   // be replacing a terminator.
1374   Instruction *InsertBefore = CS.getInstruction();
1375   IRBuilder<> Builder(InsertBefore);
1376 
1377   ArrayRef<Value *> GCArgs(LiveVariables);
1378   uint64_t StatepointID = StatepointDirectives::DefaultStatepointID;
1379   uint32_t NumPatchBytes = 0;
1380   uint32_t Flags = uint32_t(StatepointFlags::None);
1381 
1382   ArrayRef<Use> CallArgs(CS.arg_begin(), CS.arg_end());
1383   ArrayRef<Use> DeoptArgs = GetDeoptBundleOperands(CS);
1384   ArrayRef<Use> TransitionArgs;
1385   if (auto TransitionBundle =
1386       CS.getOperandBundle(LLVMContext::OB_gc_transition)) {
1387     Flags |= uint32_t(StatepointFlags::GCTransition);
1388     TransitionArgs = TransitionBundle->Inputs;
1389   }
1390 
1391   // Instead of lowering calls to @llvm.experimental.deoptimize as normal calls
1392   // with a return value, we lower then as never returning calls to
1393   // __llvm_deoptimize that are followed by unreachable to get better codegen.
1394   bool IsDeoptimize = false;
1395 
1396   StatepointDirectives SD =
1397       parseStatepointDirectivesFromAttrs(CS.getAttributes());
1398   if (SD.NumPatchBytes)
1399     NumPatchBytes = *SD.NumPatchBytes;
1400   if (SD.StatepointID)
1401     StatepointID = *SD.StatepointID;
1402 
1403   // Pass through the requested lowering if any.  The default is live-through.
1404   StringRef DeoptLowering = getDeoptLowering(CS);
1405   if (DeoptLowering.equals("live-in"))
1406     Flags |= uint32_t(StatepointFlags::DeoptLiveIn);
1407   else {
1408     assert(DeoptLowering.equals("live-through") && "Unsupported value!");
1409   }
1410 
1411   Value *CallTarget = CS.getCalledValue();
1412   if (Function *F = dyn_cast<Function>(CallTarget)) {
1413     if (F->getIntrinsicID() == Intrinsic::experimental_deoptimize) {
1414       // Calls to llvm.experimental.deoptimize are lowered to calls to the
1415       // __llvm_deoptimize symbol.  We want to resolve this now, since the
1416       // verifier does not allow taking the address of an intrinsic function.
1417 
1418       SmallVector<Type *, 8> DomainTy;
1419       for (Value *Arg : CallArgs)
1420         DomainTy.push_back(Arg->getType());
1421       auto *FTy = FunctionType::get(Type::getVoidTy(F->getContext()), DomainTy,
1422                                     /* isVarArg = */ false);
1423 
1424       // Note: CallTarget can be a bitcast instruction of a symbol if there are
1425       // calls to @llvm.experimental.deoptimize with different argument types in
1426       // the same module.  This is fine -- we assume the frontend knew what it
1427       // was doing when generating this kind of IR.
1428       CallTarget =
1429           F->getParent()->getOrInsertFunction("__llvm_deoptimize", FTy);
1430 
1431       IsDeoptimize = true;
1432     }
1433   }
1434 
1435   // Create the statepoint given all the arguments
1436   Instruction *Token = nullptr;
1437   if (CS.isCall()) {
1438     CallInst *ToReplace = cast<CallInst>(CS.getInstruction());
1439     CallInst *Call = Builder.CreateGCStatepointCall(
1440         StatepointID, NumPatchBytes, CallTarget, Flags, CallArgs,
1441         TransitionArgs, DeoptArgs, GCArgs, "safepoint_token");
1442 
1443     Call->setTailCallKind(ToReplace->getTailCallKind());
1444     Call->setCallingConv(ToReplace->getCallingConv());
1445 
1446     // Currently we will fail on parameter attributes and on certain
1447     // function attributes.  In case if we can handle this set of attributes -
1448     // set up function attrs directly on statepoint and return attrs later for
1449     // gc_result intrinsic.
1450     Call->setAttributes(legalizeCallAttributes(ToReplace->getAttributes()));
1451 
1452     Token = Call;
1453 
1454     // Put the following gc_result and gc_relocate calls immediately after the
1455     // the old call (which we're about to delete)
1456     assert(ToReplace->getNextNode() && "Not a terminator, must have next!");
1457     Builder.SetInsertPoint(ToReplace->getNextNode());
1458     Builder.SetCurrentDebugLocation(ToReplace->getNextNode()->getDebugLoc());
1459   } else {
1460     InvokeInst *ToReplace = cast<InvokeInst>(CS.getInstruction());
1461 
1462     // Insert the new invoke into the old block.  We'll remove the old one in a
1463     // moment at which point this will become the new terminator for the
1464     // original block.
1465     InvokeInst *Invoke = Builder.CreateGCStatepointInvoke(
1466         StatepointID, NumPatchBytes, CallTarget, ToReplace->getNormalDest(),
1467         ToReplace->getUnwindDest(), Flags, CallArgs, TransitionArgs, DeoptArgs,
1468         GCArgs, "statepoint_token");
1469 
1470     Invoke->setCallingConv(ToReplace->getCallingConv());
1471 
1472     // Currently we will fail on parameter attributes and on certain
1473     // function attributes.  In case if we can handle this set of attributes -
1474     // set up function attrs directly on statepoint and return attrs later for
1475     // gc_result intrinsic.
1476     Invoke->setAttributes(legalizeCallAttributes(ToReplace->getAttributes()));
1477 
1478     Token = Invoke;
1479 
1480     // Generate gc relocates in exceptional path
1481     BasicBlock *UnwindBlock = ToReplace->getUnwindDest();
1482     assert(!isa<PHINode>(UnwindBlock->begin()) &&
1483            UnwindBlock->getUniquePredecessor() &&
1484            "can't safely insert in this block!");
1485 
1486     Builder.SetInsertPoint(&*UnwindBlock->getFirstInsertionPt());
1487     Builder.SetCurrentDebugLocation(ToReplace->getDebugLoc());
1488 
1489     // Attach exceptional gc relocates to the landingpad.
1490     Instruction *ExceptionalToken = UnwindBlock->getLandingPadInst();
1491     Result.UnwindToken = ExceptionalToken;
1492 
1493     const unsigned LiveStartIdx = Statepoint(Token).gcArgsStartIdx();
1494     CreateGCRelocates(LiveVariables, LiveStartIdx, BasePtrs, ExceptionalToken,
1495                       Builder);
1496 
1497     // Generate gc relocates and returns for normal block
1498     BasicBlock *NormalDest = ToReplace->getNormalDest();
1499     assert(!isa<PHINode>(NormalDest->begin()) &&
1500            NormalDest->getUniquePredecessor() &&
1501            "can't safely insert in this block!");
1502 
1503     Builder.SetInsertPoint(&*NormalDest->getFirstInsertionPt());
1504 
1505     // gc relocates will be generated later as if it were regular call
1506     // statepoint
1507   }
1508   assert(Token && "Should be set in one of the above branches!");
1509 
1510   if (IsDeoptimize) {
1511     // If we're wrapping an @llvm.experimental.deoptimize in a statepoint, we
1512     // transform the tail-call like structure to a call to a void function
1513     // followed by unreachable to get better codegen.
1514     Replacements.push_back(
1515         DeferredReplacement::createDeoptimizeReplacement(CS.getInstruction()));
1516   } else {
1517     Token->setName("statepoint_token");
1518     if (!CS.getType()->isVoidTy() && !CS.getInstruction()->use_empty()) {
1519       StringRef Name =
1520           CS.getInstruction()->hasName() ? CS.getInstruction()->getName() : "";
1521       CallInst *GCResult = Builder.CreateGCResult(Token, CS.getType(), Name);
1522       GCResult->setAttributes(
1523           AttributeList::get(GCResult->getContext(), AttributeList::ReturnIndex,
1524                              CS.getAttributes().getRetAttributes()));
1525 
1526       // We cannot RAUW or delete CS.getInstruction() because it could be in the
1527       // live set of some other safepoint, in which case that safepoint's
1528       // PartiallyConstructedSafepointRecord will hold a raw pointer to this
1529       // llvm::Instruction.  Instead, we defer the replacement and deletion to
1530       // after the live sets have been made explicit in the IR, and we no longer
1531       // have raw pointers to worry about.
1532       Replacements.emplace_back(
1533           DeferredReplacement::createRAUW(CS.getInstruction(), GCResult));
1534     } else {
1535       Replacements.emplace_back(
1536           DeferredReplacement::createDelete(CS.getInstruction()));
1537     }
1538   }
1539 
1540   Result.StatepointToken = Token;
1541 
1542   // Second, create a gc.relocate for every live variable
1543   const unsigned LiveStartIdx = Statepoint(Token).gcArgsStartIdx();
1544   CreateGCRelocates(LiveVariables, LiveStartIdx, BasePtrs, Token, Builder);
1545 }
1546 
1547 // Replace an existing gc.statepoint with a new one and a set of gc.relocates
1548 // which make the relocations happening at this safepoint explicit.
1549 //
1550 // WARNING: Does not do any fixup to adjust users of the original live
1551 // values.  That's the callers responsibility.
1552 static void
1553 makeStatepointExplicit(DominatorTree &DT, CallSite CS,
1554                        PartiallyConstructedSafepointRecord &Result,
1555                        std::vector<DeferredReplacement> &Replacements) {
1556   const auto &LiveSet = Result.LiveSet;
1557   const auto &PointerToBase = Result.PointerToBase;
1558 
1559   // Convert to vector for efficient cross referencing.
1560   SmallVector<Value *, 64> BaseVec, LiveVec;
1561   LiveVec.reserve(LiveSet.size());
1562   BaseVec.reserve(LiveSet.size());
1563   for (Value *L : LiveSet) {
1564     LiveVec.push_back(L);
1565     assert(PointerToBase.count(L));
1566     Value *Base = PointerToBase.find(L)->second;
1567     BaseVec.push_back(Base);
1568   }
1569   assert(LiveVec.size() == BaseVec.size());
1570 
1571   // Do the actual rewriting and delete the old statepoint
1572   makeStatepointExplicitImpl(CS, BaseVec, LiveVec, Result, Replacements);
1573 }
1574 
1575 // Helper function for the relocationViaAlloca.
1576 //
1577 // It receives iterator to the statepoint gc relocates and emits a store to the
1578 // assigned location (via allocaMap) for the each one of them.  It adds the
1579 // visited values into the visitedLiveValues set, which we will later use them
1580 // for sanity checking.
1581 static void
1582 insertRelocationStores(iterator_range<Value::user_iterator> GCRelocs,
1583                        DenseMap<Value *, Value *> &AllocaMap,
1584                        DenseSet<Value *> &VisitedLiveValues) {
1585   for (User *U : GCRelocs) {
1586     GCRelocateInst *Relocate = dyn_cast<GCRelocateInst>(U);
1587     if (!Relocate)
1588       continue;
1589 
1590     Value *OriginalValue = Relocate->getDerivedPtr();
1591     assert(AllocaMap.count(OriginalValue));
1592     Value *Alloca = AllocaMap[OriginalValue];
1593 
1594     // Emit store into the related alloca
1595     // All gc_relocates are i8 addrspace(1)* typed, and it must be bitcasted to
1596     // the correct type according to alloca.
1597     assert(Relocate->getNextNode() &&
1598            "Should always have one since it's not a terminator");
1599     IRBuilder<> Builder(Relocate->getNextNode());
1600     Value *CastedRelocatedValue =
1601       Builder.CreateBitCast(Relocate,
1602                             cast<AllocaInst>(Alloca)->getAllocatedType(),
1603                             suffixed_name_or(Relocate, ".casted", ""));
1604 
1605     StoreInst *Store = new StoreInst(CastedRelocatedValue, Alloca);
1606     Store->insertAfter(cast<Instruction>(CastedRelocatedValue));
1607 
1608 #ifndef NDEBUG
1609     VisitedLiveValues.insert(OriginalValue);
1610 #endif
1611   }
1612 }
1613 
1614 // Helper function for the "relocationViaAlloca". Similar to the
1615 // "insertRelocationStores" but works for rematerialized values.
1616 static void insertRematerializationStores(
1617     const RematerializedValueMapTy &RematerializedValues,
1618     DenseMap<Value *, Value *> &AllocaMap,
1619     DenseSet<Value *> &VisitedLiveValues) {
1620   for (auto RematerializedValuePair: RematerializedValues) {
1621     Instruction *RematerializedValue = RematerializedValuePair.first;
1622     Value *OriginalValue = RematerializedValuePair.second;
1623 
1624     assert(AllocaMap.count(OriginalValue) &&
1625            "Can not find alloca for rematerialized value");
1626     Value *Alloca = AllocaMap[OriginalValue];
1627 
1628     StoreInst *Store = new StoreInst(RematerializedValue, Alloca);
1629     Store->insertAfter(RematerializedValue);
1630 
1631 #ifndef NDEBUG
1632     VisitedLiveValues.insert(OriginalValue);
1633 #endif
1634   }
1635 }
1636 
1637 /// Do all the relocation update via allocas and mem2reg
1638 static void relocationViaAlloca(
1639     Function &F, DominatorTree &DT, ArrayRef<Value *> Live,
1640     ArrayRef<PartiallyConstructedSafepointRecord> Records) {
1641 #ifndef NDEBUG
1642   // record initial number of (static) allocas; we'll check we have the same
1643   // number when we get done.
1644   int InitialAllocaNum = 0;
1645   for (Instruction &I : F.getEntryBlock())
1646     if (isa<AllocaInst>(I))
1647       InitialAllocaNum++;
1648 #endif
1649 
1650   // TODO-PERF: change data structures, reserve
1651   DenseMap<Value *, Value *> AllocaMap;
1652   SmallVector<AllocaInst *, 200> PromotableAllocas;
1653   // Used later to chack that we have enough allocas to store all values
1654   std::size_t NumRematerializedValues = 0;
1655   PromotableAllocas.reserve(Live.size());
1656 
1657   // Emit alloca for "LiveValue" and record it in "allocaMap" and
1658   // "PromotableAllocas"
1659   const DataLayout &DL = F.getParent()->getDataLayout();
1660   auto emitAllocaFor = [&](Value *LiveValue) {
1661     AllocaInst *Alloca = new AllocaInst(LiveValue->getType(),
1662                                         DL.getAllocaAddrSpace(), "",
1663                                         F.getEntryBlock().getFirstNonPHI());
1664     AllocaMap[LiveValue] = Alloca;
1665     PromotableAllocas.push_back(Alloca);
1666   };
1667 
1668   // Emit alloca for each live gc pointer
1669   for (Value *V : Live)
1670     emitAllocaFor(V);
1671 
1672   // Emit allocas for rematerialized values
1673   for (const auto &Info : Records)
1674     for (auto RematerializedValuePair : Info.RematerializedValues) {
1675       Value *OriginalValue = RematerializedValuePair.second;
1676       if (AllocaMap.count(OriginalValue) != 0)
1677         continue;
1678 
1679       emitAllocaFor(OriginalValue);
1680       ++NumRematerializedValues;
1681     }
1682 
1683   // The next two loops are part of the same conceptual operation.  We need to
1684   // insert a store to the alloca after the original def and at each
1685   // redefinition.  We need to insert a load before each use.  These are split
1686   // into distinct loops for performance reasons.
1687 
1688   // Update gc pointer after each statepoint: either store a relocated value or
1689   // null (if no relocated value was found for this gc pointer and it is not a
1690   // gc_result).  This must happen before we update the statepoint with load of
1691   // alloca otherwise we lose the link between statepoint and old def.
1692   for (const auto &Info : Records) {
1693     Value *Statepoint = Info.StatepointToken;
1694 
1695     // This will be used for consistency check
1696     DenseSet<Value *> VisitedLiveValues;
1697 
1698     // Insert stores for normal statepoint gc relocates
1699     insertRelocationStores(Statepoint->users(), AllocaMap, VisitedLiveValues);
1700 
1701     // In case if it was invoke statepoint
1702     // we will insert stores for exceptional path gc relocates.
1703     if (isa<InvokeInst>(Statepoint)) {
1704       insertRelocationStores(Info.UnwindToken->users(), AllocaMap,
1705                              VisitedLiveValues);
1706     }
1707 
1708     // Do similar thing with rematerialized values
1709     insertRematerializationStores(Info.RematerializedValues, AllocaMap,
1710                                   VisitedLiveValues);
1711 
1712     if (ClobberNonLive) {
1713       // As a debugging aid, pretend that an unrelocated pointer becomes null at
1714       // the gc.statepoint.  This will turn some subtle GC problems into
1715       // slightly easier to debug SEGVs.  Note that on large IR files with
1716       // lots of gc.statepoints this is extremely costly both memory and time
1717       // wise.
1718       SmallVector<AllocaInst *, 64> ToClobber;
1719       for (auto Pair : AllocaMap) {
1720         Value *Def = Pair.first;
1721         AllocaInst *Alloca = cast<AllocaInst>(Pair.second);
1722 
1723         // This value was relocated
1724         if (VisitedLiveValues.count(Def)) {
1725           continue;
1726         }
1727         ToClobber.push_back(Alloca);
1728       }
1729 
1730       auto InsertClobbersAt = [&](Instruction *IP) {
1731         for (auto *AI : ToClobber) {
1732           auto PT = cast<PointerType>(AI->getAllocatedType());
1733           Constant *CPN = ConstantPointerNull::get(PT);
1734           StoreInst *Store = new StoreInst(CPN, AI);
1735           Store->insertBefore(IP);
1736         }
1737       };
1738 
1739       // Insert the clobbering stores.  These may get intermixed with the
1740       // gc.results and gc.relocates, but that's fine.
1741       if (auto II = dyn_cast<InvokeInst>(Statepoint)) {
1742         InsertClobbersAt(&*II->getNormalDest()->getFirstInsertionPt());
1743         InsertClobbersAt(&*II->getUnwindDest()->getFirstInsertionPt());
1744       } else {
1745         InsertClobbersAt(cast<Instruction>(Statepoint)->getNextNode());
1746       }
1747     }
1748   }
1749 
1750   // Update use with load allocas and add store for gc_relocated.
1751   for (auto Pair : AllocaMap) {
1752     Value *Def = Pair.first;
1753     Value *Alloca = Pair.second;
1754 
1755     // We pre-record the uses of allocas so that we dont have to worry about
1756     // later update that changes the user information..
1757 
1758     SmallVector<Instruction *, 20> Uses;
1759     // PERF: trade a linear scan for repeated reallocation
1760     Uses.reserve(std::distance(Def->user_begin(), Def->user_end()));
1761     for (User *U : Def->users()) {
1762       if (!isa<ConstantExpr>(U)) {
1763         // If the def has a ConstantExpr use, then the def is either a
1764         // ConstantExpr use itself or null.  In either case
1765         // (recursively in the first, directly in the second), the oop
1766         // it is ultimately dependent on is null and this particular
1767         // use does not need to be fixed up.
1768         Uses.push_back(cast<Instruction>(U));
1769       }
1770     }
1771 
1772     std::sort(Uses.begin(), Uses.end());
1773     auto Last = std::unique(Uses.begin(), Uses.end());
1774     Uses.erase(Last, Uses.end());
1775 
1776     for (Instruction *Use : Uses) {
1777       if (isa<PHINode>(Use)) {
1778         PHINode *Phi = cast<PHINode>(Use);
1779         for (unsigned i = 0; i < Phi->getNumIncomingValues(); i++) {
1780           if (Def == Phi->getIncomingValue(i)) {
1781             LoadInst *Load = new LoadInst(
1782                 Alloca, "", Phi->getIncomingBlock(i)->getTerminator());
1783             Phi->setIncomingValue(i, Load);
1784           }
1785         }
1786       } else {
1787         LoadInst *Load = new LoadInst(Alloca, "", Use);
1788         Use->replaceUsesOfWith(Def, Load);
1789       }
1790     }
1791 
1792     // Emit store for the initial gc value.  Store must be inserted after load,
1793     // otherwise store will be in alloca's use list and an extra load will be
1794     // inserted before it.
1795     StoreInst *Store = new StoreInst(Def, Alloca);
1796     if (Instruction *Inst = dyn_cast<Instruction>(Def)) {
1797       if (InvokeInst *Invoke = dyn_cast<InvokeInst>(Inst)) {
1798         // InvokeInst is a TerminatorInst so the store need to be inserted
1799         // into its normal destination block.
1800         BasicBlock *NormalDest = Invoke->getNormalDest();
1801         Store->insertBefore(NormalDest->getFirstNonPHI());
1802       } else {
1803         assert(!Inst->isTerminator() &&
1804                "The only TerminatorInst that can produce a value is "
1805                "InvokeInst which is handled above.");
1806         Store->insertAfter(Inst);
1807       }
1808     } else {
1809       assert(isa<Argument>(Def));
1810       Store->insertAfter(cast<Instruction>(Alloca));
1811     }
1812   }
1813 
1814   assert(PromotableAllocas.size() == Live.size() + NumRematerializedValues &&
1815          "we must have the same allocas with lives");
1816   if (!PromotableAllocas.empty()) {
1817     // Apply mem2reg to promote alloca to SSA
1818     PromoteMemToReg(PromotableAllocas, DT);
1819   }
1820 
1821 #ifndef NDEBUG
1822   for (auto &I : F.getEntryBlock())
1823     if (isa<AllocaInst>(I))
1824       InitialAllocaNum--;
1825   assert(InitialAllocaNum == 0 && "We must not introduce any extra allocas");
1826 #endif
1827 }
1828 
1829 /// Implement a unique function which doesn't require we sort the input
1830 /// vector.  Doing so has the effect of changing the output of a couple of
1831 /// tests in ways which make them less useful in testing fused safepoints.
1832 template <typename T> static void unique_unsorted(SmallVectorImpl<T> &Vec) {
1833   SmallSet<T, 8> Seen;
1834   Vec.erase(remove_if(Vec, [&](const T &V) { return !Seen.insert(V).second; }),
1835             Vec.end());
1836 }
1837 
1838 /// Insert holders so that each Value is obviously live through the entire
1839 /// lifetime of the call.
1840 static void insertUseHolderAfter(CallSite &CS, const ArrayRef<Value *> Values,
1841                                  SmallVectorImpl<CallInst *> &Holders) {
1842   if (Values.empty())
1843     // No values to hold live, might as well not insert the empty holder
1844     return;
1845 
1846   Module *M = CS.getInstruction()->getModule();
1847   // Use a dummy vararg function to actually hold the values live
1848   Function *Func = cast<Function>(M->getOrInsertFunction(
1849       "__tmp_use", FunctionType::get(Type::getVoidTy(M->getContext()), true)));
1850   if (CS.isCall()) {
1851     // For call safepoints insert dummy calls right after safepoint
1852     Holders.push_back(CallInst::Create(Func, Values, "",
1853                                        &*++CS.getInstruction()->getIterator()));
1854     return;
1855   }
1856   // For invoke safepooints insert dummy calls both in normal and
1857   // exceptional destination blocks
1858   auto *II = cast<InvokeInst>(CS.getInstruction());
1859   Holders.push_back(CallInst::Create(
1860       Func, Values, "", &*II->getNormalDest()->getFirstInsertionPt()));
1861   Holders.push_back(CallInst::Create(
1862       Func, Values, "", &*II->getUnwindDest()->getFirstInsertionPt()));
1863 }
1864 
1865 static void findLiveReferences(
1866     Function &F, DominatorTree &DT, ArrayRef<CallSite> toUpdate,
1867     MutableArrayRef<struct PartiallyConstructedSafepointRecord> records) {
1868   GCPtrLivenessData OriginalLivenessData;
1869   computeLiveInValues(DT, F, OriginalLivenessData);
1870   for (size_t i = 0; i < records.size(); i++) {
1871     struct PartiallyConstructedSafepointRecord &info = records[i];
1872     analyzeParsePointLiveness(DT, OriginalLivenessData, toUpdate[i], info);
1873   }
1874 }
1875 
1876 // Helper function for the "rematerializeLiveValues". It walks use chain
1877 // starting from the "CurrentValue" until it reaches the root of the chain, i.e.
1878 // the base or a value it cannot process. Only "simple" values are processed
1879 // (currently it is GEP's and casts). The returned root is  examined by the
1880 // callers of findRematerializableChainToBasePointer.  Fills "ChainToBase" array
1881 // with all visited values.
1882 static Value* findRematerializableChainToBasePointer(
1883   SmallVectorImpl<Instruction*> &ChainToBase,
1884   Value *CurrentValue) {
1885   if (GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(CurrentValue)) {
1886     ChainToBase.push_back(GEP);
1887     return findRematerializableChainToBasePointer(ChainToBase,
1888                                                   GEP->getPointerOperand());
1889   }
1890 
1891   if (CastInst *CI = dyn_cast<CastInst>(CurrentValue)) {
1892     if (!CI->isNoopCast(CI->getModule()->getDataLayout()))
1893       return CI;
1894 
1895     ChainToBase.push_back(CI);
1896     return findRematerializableChainToBasePointer(ChainToBase,
1897                                                   CI->getOperand(0));
1898   }
1899 
1900   // We have reached the root of the chain, which is either equal to the base or
1901   // is the first unsupported value along the use chain.
1902   return CurrentValue;
1903 }
1904 
1905 // Helper function for the "rematerializeLiveValues". Compute cost of the use
1906 // chain we are going to rematerialize.
1907 static unsigned
1908 chainToBasePointerCost(SmallVectorImpl<Instruction*> &Chain,
1909                        TargetTransformInfo &TTI) {
1910   unsigned Cost = 0;
1911 
1912   for (Instruction *Instr : Chain) {
1913     if (CastInst *CI = dyn_cast<CastInst>(Instr)) {
1914       assert(CI->isNoopCast(CI->getModule()->getDataLayout()) &&
1915              "non noop cast is found during rematerialization");
1916 
1917       Type *SrcTy = CI->getOperand(0)->getType();
1918       Cost += TTI.getCastInstrCost(CI->getOpcode(), CI->getType(), SrcTy, CI);
1919 
1920     } else if (GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(Instr)) {
1921       // Cost of the address calculation
1922       Type *ValTy = GEP->getSourceElementType();
1923       Cost += TTI.getAddressComputationCost(ValTy);
1924 
1925       // And cost of the GEP itself
1926       // TODO: Use TTI->getGEPCost here (it exists, but appears to be not
1927       //       allowed for the external usage)
1928       if (!GEP->hasAllConstantIndices())
1929         Cost += 2;
1930 
1931     } else {
1932       llvm_unreachable("unsupported instruciton type during rematerialization");
1933     }
1934   }
1935 
1936   return Cost;
1937 }
1938 
1939 static bool AreEquivalentPhiNodes(PHINode &OrigRootPhi, PHINode &AlternateRootPhi) {
1940   unsigned PhiNum = OrigRootPhi.getNumIncomingValues();
1941   if (PhiNum != AlternateRootPhi.getNumIncomingValues() ||
1942       OrigRootPhi.getParent() != AlternateRootPhi.getParent())
1943     return false;
1944   // Map of incoming values and their corresponding basic blocks of
1945   // OrigRootPhi.
1946   SmallDenseMap<Value *, BasicBlock *, 8> CurrentIncomingValues;
1947   for (unsigned i = 0; i < PhiNum; i++)
1948     CurrentIncomingValues[OrigRootPhi.getIncomingValue(i)] =
1949         OrigRootPhi.getIncomingBlock(i);
1950 
1951   // Both current and base PHIs should have same incoming values and
1952   // the same basic blocks corresponding to the incoming values.
1953   for (unsigned i = 0; i < PhiNum; i++) {
1954     auto CIVI =
1955         CurrentIncomingValues.find(AlternateRootPhi.getIncomingValue(i));
1956     if (CIVI == CurrentIncomingValues.end())
1957       return false;
1958     BasicBlock *CurrentIncomingBB = CIVI->second;
1959     if (CurrentIncomingBB != AlternateRootPhi.getIncomingBlock(i))
1960       return false;
1961   }
1962   return true;
1963 }
1964 
1965 // From the statepoint live set pick values that are cheaper to recompute then
1966 // to relocate. Remove this values from the live set, rematerialize them after
1967 // statepoint and record them in "Info" structure. Note that similar to
1968 // relocated values we don't do any user adjustments here.
1969 static void rematerializeLiveValues(CallSite CS,
1970                                     PartiallyConstructedSafepointRecord &Info,
1971                                     TargetTransformInfo &TTI) {
1972   const unsigned int ChainLengthThreshold = 10;
1973 
1974   // Record values we are going to delete from this statepoint live set.
1975   // We can not di this in following loop due to iterator invalidation.
1976   SmallVector<Value *, 32> LiveValuesToBeDeleted;
1977 
1978   for (Value *LiveValue: Info.LiveSet) {
1979     // For each live pointer find it's defining chain
1980     SmallVector<Instruction *, 3> ChainToBase;
1981     assert(Info.PointerToBase.count(LiveValue));
1982     Value *RootOfChain =
1983       findRematerializableChainToBasePointer(ChainToBase,
1984                                              LiveValue);
1985 
1986     // Nothing to do, or chain is too long
1987     if ( ChainToBase.size() == 0 ||
1988         ChainToBase.size() > ChainLengthThreshold)
1989       continue;
1990 
1991     // Handle the scenario where the RootOfChain is not equal to the
1992     // Base Value, but they are essentially the same phi values.
1993     if (RootOfChain != Info.PointerToBase[LiveValue]) {
1994       PHINode *OrigRootPhi = dyn_cast<PHINode>(RootOfChain);
1995       PHINode *AlternateRootPhi = dyn_cast<PHINode>(Info.PointerToBase[LiveValue]);
1996       if (!OrigRootPhi || !AlternateRootPhi)
1997         continue;
1998       // PHI nodes that have the same incoming values, and belonging to the same
1999       // basic blocks are essentially the same SSA value.  When the original phi
2000       // has incoming values with different base pointers, the original phi is
2001       // marked as conflict, and an additional `AlternateRootPhi` with the same
2002       // incoming values get generated by the findBasePointer function. We need
2003       // to identify the newly generated AlternateRootPhi (.base version of phi)
2004       // and RootOfChain (the original phi node itself) are the same, so that we
2005       // can rematerialize the gep and casts. This is a workaround for the
2006       // deficiency in the findBasePointer algorithm.
2007       if (!AreEquivalentPhiNodes(*OrigRootPhi, *AlternateRootPhi))
2008         continue;
2009       // Now that the phi nodes are proved to be the same, assert that
2010       // findBasePointer's newly generated AlternateRootPhi is present in the
2011       // liveset of the call.
2012       assert(Info.LiveSet.count(AlternateRootPhi));
2013     }
2014     // Compute cost of this chain
2015     unsigned Cost = chainToBasePointerCost(ChainToBase, TTI);
2016     // TODO: We can also account for cases when we will be able to remove some
2017     //       of the rematerialized values by later optimization passes. I.e if
2018     //       we rematerialized several intersecting chains. Or if original values
2019     //       don't have any uses besides this statepoint.
2020 
2021     // For invokes we need to rematerialize each chain twice - for normal and
2022     // for unwind basic blocks. Model this by multiplying cost by two.
2023     if (CS.isInvoke()) {
2024       Cost *= 2;
2025     }
2026     // If it's too expensive - skip it
2027     if (Cost >= RematerializationThreshold)
2028       continue;
2029 
2030     // Remove value from the live set
2031     LiveValuesToBeDeleted.push_back(LiveValue);
2032 
2033     // Clone instructions and record them inside "Info" structure
2034 
2035     // Walk backwards to visit top-most instructions first
2036     std::reverse(ChainToBase.begin(), ChainToBase.end());
2037 
2038     // Utility function which clones all instructions from "ChainToBase"
2039     // and inserts them before "InsertBefore". Returns rematerialized value
2040     // which should be used after statepoint.
2041     auto rematerializeChain = [&ChainToBase](
2042         Instruction *InsertBefore, Value *RootOfChain, Value *AlternateLiveBase) {
2043       Instruction *LastClonedValue = nullptr;
2044       Instruction *LastValue = nullptr;
2045       for (Instruction *Instr: ChainToBase) {
2046         // Only GEP's and casts are supported as we need to be careful to not
2047         // introduce any new uses of pointers not in the liveset.
2048         // Note that it's fine to introduce new uses of pointers which were
2049         // otherwise not used after this statepoint.
2050         assert(isa<GetElementPtrInst>(Instr) || isa<CastInst>(Instr));
2051 
2052         Instruction *ClonedValue = Instr->clone();
2053         ClonedValue->insertBefore(InsertBefore);
2054         ClonedValue->setName(Instr->getName() + ".remat");
2055 
2056         // If it is not first instruction in the chain then it uses previously
2057         // cloned value. We should update it to use cloned value.
2058         if (LastClonedValue) {
2059           assert(LastValue);
2060           ClonedValue->replaceUsesOfWith(LastValue, LastClonedValue);
2061 #ifndef NDEBUG
2062           for (auto OpValue : ClonedValue->operand_values()) {
2063             // Assert that cloned instruction does not use any instructions from
2064             // this chain other than LastClonedValue
2065             assert(!is_contained(ChainToBase, OpValue) &&
2066                    "incorrect use in rematerialization chain");
2067             // Assert that the cloned instruction does not use the RootOfChain
2068             // or the AlternateLiveBase.
2069             assert(OpValue != RootOfChain && OpValue != AlternateLiveBase);
2070           }
2071 #endif
2072         } else {
2073           // For the first instruction, replace the use of unrelocated base i.e.
2074           // RootOfChain/OrigRootPhi, with the corresponding PHI present in the
2075           // live set. They have been proved to be the same PHI nodes.  Note
2076           // that the *only* use of the RootOfChain in the ChainToBase list is
2077           // the first Value in the list.
2078           if (RootOfChain != AlternateLiveBase)
2079             ClonedValue->replaceUsesOfWith(RootOfChain, AlternateLiveBase);
2080         }
2081 
2082         LastClonedValue = ClonedValue;
2083         LastValue = Instr;
2084       }
2085       assert(LastClonedValue);
2086       return LastClonedValue;
2087     };
2088 
2089     // Different cases for calls and invokes. For invokes we need to clone
2090     // instructions both on normal and unwind path.
2091     if (CS.isCall()) {
2092       Instruction *InsertBefore = CS.getInstruction()->getNextNode();
2093       assert(InsertBefore);
2094       Instruction *RematerializedValue = rematerializeChain(
2095           InsertBefore, RootOfChain, Info.PointerToBase[LiveValue]);
2096       Info.RematerializedValues[RematerializedValue] = LiveValue;
2097     } else {
2098       InvokeInst *Invoke = cast<InvokeInst>(CS.getInstruction());
2099 
2100       Instruction *NormalInsertBefore =
2101           &*Invoke->getNormalDest()->getFirstInsertionPt();
2102       Instruction *UnwindInsertBefore =
2103           &*Invoke->getUnwindDest()->getFirstInsertionPt();
2104 
2105       Instruction *NormalRematerializedValue = rematerializeChain(
2106           NormalInsertBefore, RootOfChain, Info.PointerToBase[LiveValue]);
2107       Instruction *UnwindRematerializedValue = rematerializeChain(
2108           UnwindInsertBefore, RootOfChain, Info.PointerToBase[LiveValue]);
2109 
2110       Info.RematerializedValues[NormalRematerializedValue] = LiveValue;
2111       Info.RematerializedValues[UnwindRematerializedValue] = LiveValue;
2112     }
2113   }
2114 
2115   // Remove rematerializaed values from the live set
2116   for (auto LiveValue: LiveValuesToBeDeleted) {
2117     Info.LiveSet.remove(LiveValue);
2118   }
2119 }
2120 
2121 static bool insertParsePoints(Function &F, DominatorTree &DT,
2122                               TargetTransformInfo &TTI,
2123                               SmallVectorImpl<CallSite> &ToUpdate) {
2124 #ifndef NDEBUG
2125   // sanity check the input
2126   std::set<CallSite> Uniqued;
2127   Uniqued.insert(ToUpdate.begin(), ToUpdate.end());
2128   assert(Uniqued.size() == ToUpdate.size() && "no duplicates please!");
2129 
2130   for (CallSite CS : ToUpdate)
2131     assert(CS.getInstruction()->getFunction() == &F);
2132 #endif
2133 
2134   // When inserting gc.relocates for invokes, we need to be able to insert at
2135   // the top of the successor blocks.  See the comment on
2136   // normalForInvokeSafepoint on exactly what is needed.  Note that this step
2137   // may restructure the CFG.
2138   for (CallSite CS : ToUpdate) {
2139     if (!CS.isInvoke())
2140       continue;
2141     auto *II = cast<InvokeInst>(CS.getInstruction());
2142     normalizeForInvokeSafepoint(II->getNormalDest(), II->getParent(), DT);
2143     normalizeForInvokeSafepoint(II->getUnwindDest(), II->getParent(), DT);
2144   }
2145 
2146   // A list of dummy calls added to the IR to keep various values obviously
2147   // live in the IR.  We'll remove all of these when done.
2148   SmallVector<CallInst *, 64> Holders;
2149 
2150   // Insert a dummy call with all of the deopt operands we'll need for the
2151   // actual safepoint insertion as arguments.  This ensures reference operands
2152   // in the deopt argument list are considered live through the safepoint (and
2153   // thus makes sure they get relocated.)
2154   for (CallSite CS : ToUpdate) {
2155     SmallVector<Value *, 64> DeoptValues;
2156 
2157     for (Value *Arg : GetDeoptBundleOperands(CS)) {
2158       assert(!isUnhandledGCPointerType(Arg->getType()) &&
2159              "support for FCA unimplemented");
2160       if (isHandledGCPointerType(Arg->getType()))
2161         DeoptValues.push_back(Arg);
2162     }
2163 
2164     insertUseHolderAfter(CS, DeoptValues, Holders);
2165   }
2166 
2167   SmallVector<PartiallyConstructedSafepointRecord, 64> Records(ToUpdate.size());
2168 
2169   // A) Identify all gc pointers which are statically live at the given call
2170   // site.
2171   findLiveReferences(F, DT, ToUpdate, Records);
2172 
2173   // B) Find the base pointers for each live pointer
2174   /* scope for caching */ {
2175     // Cache the 'defining value' relation used in the computation and
2176     // insertion of base phis and selects.  This ensures that we don't insert
2177     // large numbers of duplicate base_phis.
2178     DefiningValueMapTy DVCache;
2179 
2180     for (size_t i = 0; i < Records.size(); i++) {
2181       PartiallyConstructedSafepointRecord &info = Records[i];
2182       findBasePointers(DT, DVCache, ToUpdate[i], info);
2183     }
2184   } // end of cache scope
2185 
2186   // The base phi insertion logic (for any safepoint) may have inserted new
2187   // instructions which are now live at some safepoint.  The simplest such
2188   // example is:
2189   // loop:
2190   //   phi a  <-- will be a new base_phi here
2191   //   safepoint 1 <-- that needs to be live here
2192   //   gep a + 1
2193   //   safepoint 2
2194   //   br loop
2195   // We insert some dummy calls after each safepoint to definitely hold live
2196   // the base pointers which were identified for that safepoint.  We'll then
2197   // ask liveness for _every_ base inserted to see what is now live.  Then we
2198   // remove the dummy calls.
2199   Holders.reserve(Holders.size() + Records.size());
2200   for (size_t i = 0; i < Records.size(); i++) {
2201     PartiallyConstructedSafepointRecord &Info = Records[i];
2202 
2203     SmallVector<Value *, 128> Bases;
2204     for (auto Pair : Info.PointerToBase)
2205       Bases.push_back(Pair.second);
2206 
2207     insertUseHolderAfter(ToUpdate[i], Bases, Holders);
2208   }
2209 
2210   // By selecting base pointers, we've effectively inserted new uses. Thus, we
2211   // need to rerun liveness.  We may *also* have inserted new defs, but that's
2212   // not the key issue.
2213   recomputeLiveInValues(F, DT, ToUpdate, Records);
2214 
2215   if (PrintBasePointers) {
2216     for (auto &Info : Records) {
2217       errs() << "Base Pairs: (w/Relocation)\n";
2218       for (auto Pair : Info.PointerToBase) {
2219         errs() << " derived ";
2220         Pair.first->printAsOperand(errs(), false);
2221         errs() << " base ";
2222         Pair.second->printAsOperand(errs(), false);
2223         errs() << "\n";
2224       }
2225     }
2226   }
2227 
2228   // It is possible that non-constant live variables have a constant base.  For
2229   // example, a GEP with a variable offset from a global.  In this case we can
2230   // remove it from the liveset.  We already don't add constants to the liveset
2231   // because we assume they won't move at runtime and the GC doesn't need to be
2232   // informed about them.  The same reasoning applies if the base is constant.
2233   // Note that the relocation placement code relies on this filtering for
2234   // correctness as it expects the base to be in the liveset, which isn't true
2235   // if the base is constant.
2236   for (auto &Info : Records)
2237     for (auto &BasePair : Info.PointerToBase)
2238       if (isa<Constant>(BasePair.second))
2239         Info.LiveSet.remove(BasePair.first);
2240 
2241   for (CallInst *CI : Holders)
2242     CI->eraseFromParent();
2243 
2244   Holders.clear();
2245 
2246   // In order to reduce live set of statepoint we might choose to rematerialize
2247   // some values instead of relocating them. This is purely an optimization and
2248   // does not influence correctness.
2249   for (size_t i = 0; i < Records.size(); i++)
2250     rematerializeLiveValues(ToUpdate[i], Records[i], TTI);
2251 
2252   // We need this to safely RAUW and delete call or invoke return values that
2253   // may themselves be live over a statepoint.  For details, please see usage in
2254   // makeStatepointExplicitImpl.
2255   std::vector<DeferredReplacement> Replacements;
2256 
2257   // Now run through and replace the existing statepoints with new ones with
2258   // the live variables listed.  We do not yet update uses of the values being
2259   // relocated. We have references to live variables that need to
2260   // survive to the last iteration of this loop.  (By construction, the
2261   // previous statepoint can not be a live variable, thus we can and remove
2262   // the old statepoint calls as we go.)
2263   for (size_t i = 0; i < Records.size(); i++)
2264     makeStatepointExplicit(DT, ToUpdate[i], Records[i], Replacements);
2265 
2266   ToUpdate.clear(); // prevent accident use of invalid CallSites
2267 
2268   for (auto &PR : Replacements)
2269     PR.doReplacement();
2270 
2271   Replacements.clear();
2272 
2273   for (auto &Info : Records) {
2274     // These live sets may contain state Value pointers, since we replaced calls
2275     // with operand bundles with calls wrapped in gc.statepoint, and some of
2276     // those calls may have been def'ing live gc pointers.  Clear these out to
2277     // avoid accidentally using them.
2278     //
2279     // TODO: We should create a separate data structure that does not contain
2280     // these live sets, and migrate to using that data structure from this point
2281     // onward.
2282     Info.LiveSet.clear();
2283     Info.PointerToBase.clear();
2284   }
2285 
2286   // Do all the fixups of the original live variables to their relocated selves
2287   SmallVector<Value *, 128> Live;
2288   for (size_t i = 0; i < Records.size(); i++) {
2289     PartiallyConstructedSafepointRecord &Info = Records[i];
2290 
2291     // We can't simply save the live set from the original insertion.  One of
2292     // the live values might be the result of a call which needs a safepoint.
2293     // That Value* no longer exists and we need to use the new gc_result.
2294     // Thankfully, the live set is embedded in the statepoint (and updated), so
2295     // we just grab that.
2296     Statepoint Statepoint(Info.StatepointToken);
2297     Live.insert(Live.end(), Statepoint.gc_args_begin(),
2298                 Statepoint.gc_args_end());
2299 #ifndef NDEBUG
2300     // Do some basic sanity checks on our liveness results before performing
2301     // relocation.  Relocation can and will turn mistakes in liveness results
2302     // into non-sensical code which is must harder to debug.
2303     // TODO: It would be nice to test consistency as well
2304     assert(DT.isReachableFromEntry(Info.StatepointToken->getParent()) &&
2305            "statepoint must be reachable or liveness is meaningless");
2306     for (Value *V : Statepoint.gc_args()) {
2307       if (!isa<Instruction>(V))
2308         // Non-instruction values trivial dominate all possible uses
2309         continue;
2310       auto *LiveInst = cast<Instruction>(V);
2311       assert(DT.isReachableFromEntry(LiveInst->getParent()) &&
2312              "unreachable values should never be live");
2313       assert(DT.dominates(LiveInst, Info.StatepointToken) &&
2314              "basic SSA liveness expectation violated by liveness analysis");
2315     }
2316 #endif
2317   }
2318   unique_unsorted(Live);
2319 
2320 #ifndef NDEBUG
2321   // sanity check
2322   for (auto *Ptr : Live)
2323     assert(isHandledGCPointerType(Ptr->getType()) &&
2324            "must be a gc pointer type");
2325 #endif
2326 
2327   relocationViaAlloca(F, DT, Live, Records);
2328   return !Records.empty();
2329 }
2330 
2331 // Handles both return values and arguments for Functions and CallSites.
2332 template <typename AttrHolder>
2333 static void RemoveNonValidAttrAtIndex(LLVMContext &Ctx, AttrHolder &AH,
2334                                       unsigned Index) {
2335   AttrBuilder R;
2336   if (AH.getDereferenceableBytes(Index))
2337     R.addAttribute(Attribute::get(Ctx, Attribute::Dereferenceable,
2338                                   AH.getDereferenceableBytes(Index)));
2339   if (AH.getDereferenceableOrNullBytes(Index))
2340     R.addAttribute(Attribute::get(Ctx, Attribute::DereferenceableOrNull,
2341                                   AH.getDereferenceableOrNullBytes(Index)));
2342   if (AH.getAttributes().hasAttribute(Index, Attribute::NoAlias))
2343     R.addAttribute(Attribute::NoAlias);
2344 
2345   if (!R.empty())
2346     AH.setAttributes(AH.getAttributes().removeAttributes(Ctx, Index, R));
2347 }
2348 
2349 void
2350 RewriteStatepointsForGC::stripNonValidAttributesFromPrototype(Function &F) {
2351   LLVMContext &Ctx = F.getContext();
2352 
2353   for (Argument &A : F.args())
2354     if (isa<PointerType>(A.getType()))
2355       RemoveNonValidAttrAtIndex(Ctx, F,
2356                                 A.getArgNo() + AttributeList::FirstArgIndex);
2357 
2358   if (isa<PointerType>(F.getReturnType()))
2359     RemoveNonValidAttrAtIndex(Ctx, F, AttributeList::ReturnIndex);
2360 }
2361 
2362 void RewriteStatepointsForGC::stripInvalidMetadataFromInstruction(Instruction &I) {
2363   if (!isa<LoadInst>(I) && !isa<StoreInst>(I))
2364     return;
2365   // These are the attributes that are still valid on loads and stores after
2366   // RS4GC.
2367   // The metadata implying dereferenceability and noalias are (conservatively)
2368   // dropped.  This is because semantically, after RewriteStatepointsForGC runs,
2369   // all calls to gc.statepoint "free" the entire heap. Also, gc.statepoint can
2370   // touch the entire heap including noalias objects. Note: The reasoning is
2371   // same as stripping the dereferenceability and noalias attributes that are
2372   // analogous to the metadata counterparts.
2373   // We also drop the invariant.load metadata on the load because that metadata
2374   // implies the address operand to the load points to memory that is never
2375   // changed once it became dereferenceable. This is no longer true after RS4GC.
2376   // Similar reasoning applies to invariant.group metadata, which applies to
2377   // loads within a group.
2378   unsigned ValidMetadataAfterRS4GC[] = {LLVMContext::MD_tbaa,
2379                          LLVMContext::MD_range,
2380                          LLVMContext::MD_alias_scope,
2381                          LLVMContext::MD_nontemporal,
2382                          LLVMContext::MD_nonnull,
2383                          LLVMContext::MD_align,
2384                          LLVMContext::MD_type};
2385 
2386   // Drops all metadata on the instruction other than ValidMetadataAfterRS4GC.
2387   I.dropUnknownNonDebugMetadata(ValidMetadataAfterRS4GC);
2388 }
2389 
2390 void RewriteStatepointsForGC::stripNonValidDataFromBody(Function &F) {
2391   if (F.empty())
2392     return;
2393 
2394   LLVMContext &Ctx = F.getContext();
2395   MDBuilder Builder(Ctx);
2396 
2397   // Set of invariantstart instructions that we need to remove.
2398   // Use this to avoid invalidating the instruction iterator.
2399   SmallVector<IntrinsicInst*, 12> InvariantStartInstructions;
2400 
2401   for (Instruction &I : instructions(F)) {
2402     // invariant.start on memory location implies that the referenced memory
2403     // location is constant and unchanging. This is no longer true after
2404     // RewriteStatepointsForGC runs because there can be calls to gc.statepoint
2405     // which frees the entire heap and the presence of invariant.start allows
2406     // the optimizer to sink the load of a memory location past a statepoint,
2407     // which is incorrect.
2408     if (auto *II = dyn_cast<IntrinsicInst>(&I))
2409       if (II->getIntrinsicID() == Intrinsic::invariant_start) {
2410         InvariantStartInstructions.push_back(II);
2411         continue;
2412       }
2413 
2414     if (const MDNode *MD = I.getMetadata(LLVMContext::MD_tbaa)) {
2415       assert(MD->getNumOperands() < 5 && "unrecognized metadata shape!");
2416       bool IsImmutableTBAA =
2417           MD->getNumOperands() == 4 &&
2418           mdconst::extract<ConstantInt>(MD->getOperand(3))->getValue() == 1;
2419 
2420       if (!IsImmutableTBAA)
2421         continue; // no work to do, MD_tbaa is already marked mutable
2422 
2423       MDNode *Base = cast<MDNode>(MD->getOperand(0));
2424       MDNode *Access = cast<MDNode>(MD->getOperand(1));
2425       uint64_t Offset =
2426           mdconst::extract<ConstantInt>(MD->getOperand(2))->getZExtValue();
2427 
2428       MDNode *MutableTBAA =
2429           Builder.createTBAAStructTagNode(Base, Access, Offset);
2430       I.setMetadata(LLVMContext::MD_tbaa, MutableTBAA);
2431     }
2432 
2433     stripInvalidMetadataFromInstruction(I);
2434 
2435     if (CallSite CS = CallSite(&I)) {
2436       for (int i = 0, e = CS.arg_size(); i != e; i++)
2437         if (isa<PointerType>(CS.getArgument(i)->getType()))
2438           RemoveNonValidAttrAtIndex(Ctx, CS, i + AttributeList::FirstArgIndex);
2439       if (isa<PointerType>(CS.getType()))
2440         RemoveNonValidAttrAtIndex(Ctx, CS, AttributeList::ReturnIndex);
2441     }
2442   }
2443 
2444   // Delete the invariant.start instructions and RAUW undef.
2445   for (auto *II : InvariantStartInstructions) {
2446     II->replaceAllUsesWith(UndefValue::get(II->getType()));
2447     II->eraseFromParent();
2448   }
2449 }
2450 
2451 /// Returns true if this function should be rewritten by this pass.  The main
2452 /// point of this function is as an extension point for custom logic.
2453 static bool shouldRewriteStatepointsIn(Function &F) {
2454   // TODO: This should check the GCStrategy
2455   if (F.hasGC()) {
2456     const auto &FunctionGCName = F.getGC();
2457     const StringRef StatepointExampleName("statepoint-example");
2458     const StringRef CoreCLRName("coreclr");
2459     return (StatepointExampleName == FunctionGCName) ||
2460            (CoreCLRName == FunctionGCName);
2461   } else
2462     return false;
2463 }
2464 
2465 void RewriteStatepointsForGC::stripNonValidData(Module &M) {
2466 #ifndef NDEBUG
2467   assert(llvm::any_of(M, shouldRewriteStatepointsIn) && "precondition!");
2468 #endif
2469 
2470   for (Function &F : M)
2471     stripNonValidAttributesFromPrototype(F);
2472 
2473   for (Function &F : M)
2474     stripNonValidDataFromBody(F);
2475 }
2476 
2477 bool RewriteStatepointsForGC::runOnFunction(Function &F) {
2478   // Nothing to do for declarations.
2479   if (F.isDeclaration() || F.empty())
2480     return false;
2481 
2482   // Policy choice says not to rewrite - the most common reason is that we're
2483   // compiling code without a GCStrategy.
2484   if (!shouldRewriteStatepointsIn(F))
2485     return false;
2486 
2487   DominatorTree &DT = getAnalysis<DominatorTreeWrapperPass>(F).getDomTree();
2488   TargetTransformInfo &TTI =
2489       getAnalysis<TargetTransformInfoWrapperPass>().getTTI(F);
2490   const TargetLibraryInfo &TLI =
2491       getAnalysis<TargetLibraryInfoWrapperPass>().getTLI();
2492 
2493   auto NeedsRewrite = [&TLI](Instruction &I) {
2494     if (ImmutableCallSite CS = ImmutableCallSite(&I))
2495       return !callsGCLeafFunction(CS, TLI) && !isStatepoint(CS);
2496     return false;
2497   };
2498 
2499   // Gather all the statepoints which need rewritten.  Be careful to only
2500   // consider those in reachable code since we need to ask dominance queries
2501   // when rewriting.  We'll delete the unreachable ones in a moment.
2502   SmallVector<CallSite, 64> ParsePointNeeded;
2503   bool HasUnreachableStatepoint = false;
2504   for (Instruction &I : instructions(F)) {
2505     // TODO: only the ones with the flag set!
2506     if (NeedsRewrite(I)) {
2507       if (DT.isReachableFromEntry(I.getParent()))
2508         ParsePointNeeded.push_back(CallSite(&I));
2509       else
2510         HasUnreachableStatepoint = true;
2511     }
2512   }
2513 
2514   bool MadeChange = false;
2515 
2516   // Delete any unreachable statepoints so that we don't have unrewritten
2517   // statepoints surviving this pass.  This makes testing easier and the
2518   // resulting IR less confusing to human readers.  Rather than be fancy, we
2519   // just reuse a utility function which removes the unreachable blocks.
2520   if (HasUnreachableStatepoint)
2521     MadeChange |= removeUnreachableBlocks(F);
2522 
2523   // Return early if no work to do.
2524   if (ParsePointNeeded.empty())
2525     return MadeChange;
2526 
2527   // As a prepass, go ahead and aggressively destroy single entry phi nodes.
2528   // These are created by LCSSA.  They have the effect of increasing the size
2529   // of liveness sets for no good reason.  It may be harder to do this post
2530   // insertion since relocations and base phis can confuse things.
2531   for (BasicBlock &BB : F)
2532     if (BB.getUniquePredecessor()) {
2533       MadeChange = true;
2534       FoldSingleEntryPHINodes(&BB);
2535     }
2536 
2537   // Before we start introducing relocations, we want to tweak the IR a bit to
2538   // avoid unfortunate code generation effects.  The main example is that we
2539   // want to try to make sure the comparison feeding a branch is after any
2540   // safepoints.  Otherwise, we end up with a comparison of pre-relocation
2541   // values feeding a branch after relocation.  This is semantically correct,
2542   // but results in extra register pressure since both the pre-relocation and
2543   // post-relocation copies must be available in registers.  For code without
2544   // relocations this is handled elsewhere, but teaching the scheduler to
2545   // reverse the transform we're about to do would be slightly complex.
2546   // Note: This may extend the live range of the inputs to the icmp and thus
2547   // increase the liveset of any statepoint we move over.  This is profitable
2548   // as long as all statepoints are in rare blocks.  If we had in-register
2549   // lowering for live values this would be a much safer transform.
2550   auto getConditionInst = [](TerminatorInst *TI) -> Instruction* {
2551     if (auto *BI = dyn_cast<BranchInst>(TI))
2552       if (BI->isConditional())
2553         return dyn_cast<Instruction>(BI->getCondition());
2554     // TODO: Extend this to handle switches
2555     return nullptr;
2556   };
2557   for (BasicBlock &BB : F) {
2558     TerminatorInst *TI = BB.getTerminator();
2559     if (auto *Cond = getConditionInst(TI))
2560       // TODO: Handle more than just ICmps here.  We should be able to move
2561       // most instructions without side effects or memory access.
2562       if (isa<ICmpInst>(Cond) && Cond->hasOneUse()) {
2563         MadeChange = true;
2564         Cond->moveBefore(TI);
2565       }
2566   }
2567 
2568   MadeChange |= insertParsePoints(F, DT, TTI, ParsePointNeeded);
2569   return MadeChange;
2570 }
2571 
2572 // liveness computation via standard dataflow
2573 // -------------------------------------------------------------------
2574 
2575 // TODO: Consider using bitvectors for liveness, the set of potentially
2576 // interesting values should be small and easy to pre-compute.
2577 
2578 /// Compute the live-in set for the location rbegin starting from
2579 /// the live-out set of the basic block
2580 static void computeLiveInValues(BasicBlock::reverse_iterator Begin,
2581                                 BasicBlock::reverse_iterator End,
2582                                 SetVector<Value *> &LiveTmp) {
2583   for (auto &I : make_range(Begin, End)) {
2584     // KILL/Def - Remove this definition from LiveIn
2585     LiveTmp.remove(&I);
2586 
2587     // Don't consider *uses* in PHI nodes, we handle their contribution to
2588     // predecessor blocks when we seed the LiveOut sets
2589     if (isa<PHINode>(I))
2590       continue;
2591 
2592     // USE - Add to the LiveIn set for this instruction
2593     for (Value *V : I.operands()) {
2594       assert(!isUnhandledGCPointerType(V->getType()) &&
2595              "support for FCA unimplemented");
2596       if (isHandledGCPointerType(V->getType()) && !isa<Constant>(V)) {
2597         // The choice to exclude all things constant here is slightly subtle.
2598         // There are two independent reasons:
2599         // - We assume that things which are constant (from LLVM's definition)
2600         // do not move at runtime.  For example, the address of a global
2601         // variable is fixed, even though it's contents may not be.
2602         // - Second, we can't disallow arbitrary inttoptr constants even
2603         // if the language frontend does.  Optimization passes are free to
2604         // locally exploit facts without respect to global reachability.  This
2605         // can create sections of code which are dynamically unreachable and
2606         // contain just about anything.  (see constants.ll in tests)
2607         LiveTmp.insert(V);
2608       }
2609     }
2610   }
2611 }
2612 
2613 static void computeLiveOutSeed(BasicBlock *BB, SetVector<Value *> &LiveTmp) {
2614   for (BasicBlock *Succ : successors(BB)) {
2615     for (auto &I : *Succ) {
2616       PHINode *PN = dyn_cast<PHINode>(&I);
2617       if (!PN)
2618         break;
2619 
2620       Value *V = PN->getIncomingValueForBlock(BB);
2621       assert(!isUnhandledGCPointerType(V->getType()) &&
2622              "support for FCA unimplemented");
2623       if (isHandledGCPointerType(V->getType()) && !isa<Constant>(V))
2624         LiveTmp.insert(V);
2625     }
2626   }
2627 }
2628 
2629 static SetVector<Value *> computeKillSet(BasicBlock *BB) {
2630   SetVector<Value *> KillSet;
2631   for (Instruction &I : *BB)
2632     if (isHandledGCPointerType(I.getType()))
2633       KillSet.insert(&I);
2634   return KillSet;
2635 }
2636 
2637 #ifndef NDEBUG
2638 /// Check that the items in 'Live' dominate 'TI'.  This is used as a basic
2639 /// sanity check for the liveness computation.
2640 static void checkBasicSSA(DominatorTree &DT, SetVector<Value *> &Live,
2641                           TerminatorInst *TI, bool TermOkay = false) {
2642   for (Value *V : Live) {
2643     if (auto *I = dyn_cast<Instruction>(V)) {
2644       // The terminator can be a member of the LiveOut set.  LLVM's definition
2645       // of instruction dominance states that V does not dominate itself.  As
2646       // such, we need to special case this to allow it.
2647       if (TermOkay && TI == I)
2648         continue;
2649       assert(DT.dominates(I, TI) &&
2650              "basic SSA liveness expectation violated by liveness analysis");
2651     }
2652   }
2653 }
2654 
2655 /// Check that all the liveness sets used during the computation of liveness
2656 /// obey basic SSA properties.  This is useful for finding cases where we miss
2657 /// a def.
2658 static void checkBasicSSA(DominatorTree &DT, GCPtrLivenessData &Data,
2659                           BasicBlock &BB) {
2660   checkBasicSSA(DT, Data.LiveSet[&BB], BB.getTerminator());
2661   checkBasicSSA(DT, Data.LiveOut[&BB], BB.getTerminator(), true);
2662   checkBasicSSA(DT, Data.LiveIn[&BB], BB.getTerminator());
2663 }
2664 #endif
2665 
2666 static void computeLiveInValues(DominatorTree &DT, Function &F,
2667                                 GCPtrLivenessData &Data) {
2668   SmallSetVector<BasicBlock *, 32> Worklist;
2669 
2670   // Seed the liveness for each individual block
2671   for (BasicBlock &BB : F) {
2672     Data.KillSet[&BB] = computeKillSet(&BB);
2673     Data.LiveSet[&BB].clear();
2674     computeLiveInValues(BB.rbegin(), BB.rend(), Data.LiveSet[&BB]);
2675 
2676 #ifndef NDEBUG
2677     for (Value *Kill : Data.KillSet[&BB])
2678       assert(!Data.LiveSet[&BB].count(Kill) && "live set contains kill");
2679 #endif
2680 
2681     Data.LiveOut[&BB] = SetVector<Value *>();
2682     computeLiveOutSeed(&BB, Data.LiveOut[&BB]);
2683     Data.LiveIn[&BB] = Data.LiveSet[&BB];
2684     Data.LiveIn[&BB].set_union(Data.LiveOut[&BB]);
2685     Data.LiveIn[&BB].set_subtract(Data.KillSet[&BB]);
2686     if (!Data.LiveIn[&BB].empty())
2687       Worklist.insert(pred_begin(&BB), pred_end(&BB));
2688   }
2689 
2690   // Propagate that liveness until stable
2691   while (!Worklist.empty()) {
2692     BasicBlock *BB = Worklist.pop_back_val();
2693 
2694     // Compute our new liveout set, then exit early if it hasn't changed despite
2695     // the contribution of our successor.
2696     SetVector<Value *> LiveOut = Data.LiveOut[BB];
2697     const auto OldLiveOutSize = LiveOut.size();
2698     for (BasicBlock *Succ : successors(BB)) {
2699       assert(Data.LiveIn.count(Succ));
2700       LiveOut.set_union(Data.LiveIn[Succ]);
2701     }
2702     // assert OutLiveOut is a subset of LiveOut
2703     if (OldLiveOutSize == LiveOut.size()) {
2704       // If the sets are the same size, then we didn't actually add anything
2705       // when unioning our successors LiveIn.  Thus, the LiveIn of this block
2706       // hasn't changed.
2707       continue;
2708     }
2709     Data.LiveOut[BB] = LiveOut;
2710 
2711     // Apply the effects of this basic block
2712     SetVector<Value *> LiveTmp = LiveOut;
2713     LiveTmp.set_union(Data.LiveSet[BB]);
2714     LiveTmp.set_subtract(Data.KillSet[BB]);
2715 
2716     assert(Data.LiveIn.count(BB));
2717     const SetVector<Value *> &OldLiveIn = Data.LiveIn[BB];
2718     // assert: OldLiveIn is a subset of LiveTmp
2719     if (OldLiveIn.size() != LiveTmp.size()) {
2720       Data.LiveIn[BB] = LiveTmp;
2721       Worklist.insert(pred_begin(BB), pred_end(BB));
2722     }
2723   } // while (!Worklist.empty())
2724 
2725 #ifndef NDEBUG
2726   // Sanity check our output against SSA properties.  This helps catch any
2727   // missing kills during the above iteration.
2728   for (BasicBlock &BB : F)
2729     checkBasicSSA(DT, Data, BB);
2730 #endif
2731 }
2732 
2733 static void findLiveSetAtInst(Instruction *Inst, GCPtrLivenessData &Data,
2734                               StatepointLiveSetTy &Out) {
2735   BasicBlock *BB = Inst->getParent();
2736 
2737   // Note: The copy is intentional and required
2738   assert(Data.LiveOut.count(BB));
2739   SetVector<Value *> LiveOut = Data.LiveOut[BB];
2740 
2741   // We want to handle the statepoint itself oddly.  It's
2742   // call result is not live (normal), nor are it's arguments
2743   // (unless they're used again later).  This adjustment is
2744   // specifically what we need to relocate
2745   computeLiveInValues(BB->rbegin(), ++Inst->getIterator().getReverse(),
2746                       LiveOut);
2747   LiveOut.remove(Inst);
2748   Out.insert(LiveOut.begin(), LiveOut.end());
2749 }
2750 
2751 static void recomputeLiveInValues(GCPtrLivenessData &RevisedLivenessData,
2752                                   CallSite CS,
2753                                   PartiallyConstructedSafepointRecord &Info) {
2754   Instruction *Inst = CS.getInstruction();
2755   StatepointLiveSetTy Updated;
2756   findLiveSetAtInst(Inst, RevisedLivenessData, Updated);
2757 
2758 #ifndef NDEBUG
2759   DenseSet<Value *> Bases;
2760   for (auto KVPair : Info.PointerToBase)
2761     Bases.insert(KVPair.second);
2762 #endif
2763 
2764   // We may have base pointers which are now live that weren't before.  We need
2765   // to update the PointerToBase structure to reflect this.
2766   for (auto V : Updated)
2767     if (Info.PointerToBase.insert({V, V}).second) {
2768       assert(Bases.count(V) && "Can't find base for unexpected live value!");
2769       continue;
2770     }
2771 
2772 #ifndef NDEBUG
2773   for (auto V : Updated)
2774     assert(Info.PointerToBase.count(V) &&
2775            "Must be able to find base for live value!");
2776 #endif
2777 
2778   // Remove any stale base mappings - this can happen since our liveness is
2779   // more precise then the one inherent in the base pointer analysis.
2780   DenseSet<Value *> ToErase;
2781   for (auto KVPair : Info.PointerToBase)
2782     if (!Updated.count(KVPair.first))
2783       ToErase.insert(KVPair.first);
2784 
2785   for (auto *V : ToErase)
2786     Info.PointerToBase.erase(V);
2787 
2788 #ifndef NDEBUG
2789   for (auto KVPair : Info.PointerToBase)
2790     assert(Updated.count(KVPair.first) && "record for non-live value");
2791 #endif
2792 
2793   Info.LiveSet = Updated;
2794 }
2795