1 //===- RewriteStatepointsForGC.cpp - Make GC relocations explicit ---------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // Rewrite call/invoke instructions so as to make potential relocations
10 // performed by the garbage collector explicit in the IR.
11 //
12 //===----------------------------------------------------------------------===//
13
14 #include "llvm/Transforms/Scalar/RewriteStatepointsForGC.h"
15
16 #include "llvm/ADT/ArrayRef.h"
17 #include "llvm/ADT/DenseMap.h"
18 #include "llvm/ADT/DenseSet.h"
19 #include "llvm/ADT/MapVector.h"
20 #include "llvm/ADT/STLExtras.h"
21 #include "llvm/ADT/SetVector.h"
22 #include "llvm/ADT/SmallSet.h"
23 #include "llvm/ADT/SmallVector.h"
24 #include "llvm/ADT/StringRef.h"
25 #include "llvm/ADT/iterator_range.h"
26 #include "llvm/Analysis/DomTreeUpdater.h"
27 #include "llvm/Analysis/TargetLibraryInfo.h"
28 #include "llvm/Analysis/TargetTransformInfo.h"
29 #include "llvm/IR/Argument.h"
30 #include "llvm/IR/Attributes.h"
31 #include "llvm/IR/BasicBlock.h"
32 #include "llvm/IR/CallingConv.h"
33 #include "llvm/IR/Constant.h"
34 #include "llvm/IR/Constants.h"
35 #include "llvm/IR/DataLayout.h"
36 #include "llvm/IR/DerivedTypes.h"
37 #include "llvm/IR/Dominators.h"
38 #include "llvm/IR/Function.h"
39 #include "llvm/IR/IRBuilder.h"
40 #include "llvm/IR/InstIterator.h"
41 #include "llvm/IR/InstrTypes.h"
42 #include "llvm/IR/Instruction.h"
43 #include "llvm/IR/Instructions.h"
44 #include "llvm/IR/IntrinsicInst.h"
45 #include "llvm/IR/Intrinsics.h"
46 #include "llvm/IR/LLVMContext.h"
47 #include "llvm/IR/MDBuilder.h"
48 #include "llvm/IR/Metadata.h"
49 #include "llvm/IR/Module.h"
50 #include "llvm/IR/Statepoint.h"
51 #include "llvm/IR/Type.h"
52 #include "llvm/IR/User.h"
53 #include "llvm/IR/Value.h"
54 #include "llvm/IR/ValueHandle.h"
55 #include "llvm/InitializePasses.h"
56 #include "llvm/Pass.h"
57 #include "llvm/Support/Casting.h"
58 #include "llvm/Support/CommandLine.h"
59 #include "llvm/Support/Compiler.h"
60 #include "llvm/Support/Debug.h"
61 #include "llvm/Support/ErrorHandling.h"
62 #include "llvm/Support/raw_ostream.h"
63 #include "llvm/Transforms/Scalar.h"
64 #include "llvm/Transforms/Utils/BasicBlockUtils.h"
65 #include "llvm/Transforms/Utils/Local.h"
66 #include "llvm/Transforms/Utils/PromoteMemToReg.h"
67 #include <algorithm>
68 #include <cassert>
69 #include <cstddef>
70 #include <cstdint>
71 #include <iterator>
72 #include <optional>
73 #include <set>
74 #include <string>
75 #include <utility>
76 #include <vector>
77
78 #define DEBUG_TYPE "rewrite-statepoints-for-gc"
79
80 using namespace llvm;
81
82 // Print the liveset found at the insert location
83 static cl::opt<bool> PrintLiveSet("spp-print-liveset", cl::Hidden,
84 cl::init(false));
85 static cl::opt<bool> PrintLiveSetSize("spp-print-liveset-size", cl::Hidden,
86 cl::init(false));
87
88 // Print out the base pointers for debugging
89 static cl::opt<bool> PrintBasePointers("spp-print-base-pointers", cl::Hidden,
90 cl::init(false));
91
92 // Cost threshold measuring when it is profitable to rematerialize value instead
93 // of relocating it
94 static cl::opt<unsigned>
95 RematerializationThreshold("spp-rematerialization-threshold", cl::Hidden,
96 cl::init(6));
97
98 #ifdef EXPENSIVE_CHECKS
99 static bool ClobberNonLive = true;
100 #else
101 static bool ClobberNonLive = false;
102 #endif
103
104 static cl::opt<bool, true> ClobberNonLiveOverride("rs4gc-clobber-non-live",
105 cl::location(ClobberNonLive),
106 cl::Hidden);
107
108 static cl::opt<bool>
109 AllowStatepointWithNoDeoptInfo("rs4gc-allow-statepoint-with-no-deopt-info",
110 cl::Hidden, cl::init(true));
111
112 static cl::opt<bool> RematDerivedAtUses("rs4gc-remat-derived-at-uses",
113 cl::Hidden, cl::init(true));
114
115 /// The IR fed into RewriteStatepointsForGC may have had attributes and
116 /// metadata implying dereferenceability that are no longer valid/correct after
117 /// RewriteStatepointsForGC has run. This is because semantically, after
118 /// RewriteStatepointsForGC runs, all calls to gc.statepoint "free" the entire
119 /// heap. stripNonValidData (conservatively) restores
120 /// correctness by erasing all attributes in the module that externally imply
121 /// dereferenceability. Similar reasoning also applies to the noalias
122 /// attributes and metadata. gc.statepoint can touch the entire heap including
123 /// noalias objects.
124 /// Apart from attributes and metadata, we also remove instructions that imply
125 /// constant physical memory: llvm.invariant.start.
126 static void stripNonValidData(Module &M);
127
128 static bool shouldRewriteStatepointsIn(Function &F);
129
run(Module & M,ModuleAnalysisManager & AM)130 PreservedAnalyses RewriteStatepointsForGC::run(Module &M,
131 ModuleAnalysisManager &AM) {
132 bool Changed = false;
133 auto &FAM = AM.getResult<FunctionAnalysisManagerModuleProxy>(M).getManager();
134 for (Function &F : M) {
135 // Nothing to do for declarations.
136 if (F.isDeclaration() || F.empty())
137 continue;
138
139 // Policy choice says not to rewrite - the most common reason is that we're
140 // compiling code without a GCStrategy.
141 if (!shouldRewriteStatepointsIn(F))
142 continue;
143
144 auto &DT = FAM.getResult<DominatorTreeAnalysis>(F);
145 auto &TTI = FAM.getResult<TargetIRAnalysis>(F);
146 auto &TLI = FAM.getResult<TargetLibraryAnalysis>(F);
147 Changed |= runOnFunction(F, DT, TTI, TLI);
148 }
149 if (!Changed)
150 return PreservedAnalyses::all();
151
152 // stripNonValidData asserts that shouldRewriteStatepointsIn
153 // returns true for at least one function in the module. Since at least
154 // one function changed, we know that the precondition is satisfied.
155 stripNonValidData(M);
156
157 PreservedAnalyses PA;
158 PA.preserve<TargetIRAnalysis>();
159 PA.preserve<TargetLibraryAnalysis>();
160 return PA;
161 }
162
163 namespace {
164
165 class RewriteStatepointsForGCLegacyPass : public ModulePass {
166 RewriteStatepointsForGC Impl;
167
168 public:
169 static char ID; // Pass identification, replacement for typeid
170
RewriteStatepointsForGCLegacyPass()171 RewriteStatepointsForGCLegacyPass() : ModulePass(ID), Impl() {
172 initializeRewriteStatepointsForGCLegacyPassPass(
173 *PassRegistry::getPassRegistry());
174 }
175
runOnModule(Module & M)176 bool runOnModule(Module &M) override {
177 bool Changed = false;
178 for (Function &F : M) {
179 // Nothing to do for declarations.
180 if (F.isDeclaration() || F.empty())
181 continue;
182
183 // Policy choice says not to rewrite - the most common reason is that
184 // we're compiling code without a GCStrategy.
185 if (!shouldRewriteStatepointsIn(F))
186 continue;
187
188 TargetTransformInfo &TTI =
189 getAnalysis<TargetTransformInfoWrapperPass>().getTTI(F);
190 const TargetLibraryInfo &TLI =
191 getAnalysis<TargetLibraryInfoWrapperPass>().getTLI(F);
192 auto &DT = getAnalysis<DominatorTreeWrapperPass>(F).getDomTree();
193
194 Changed |= Impl.runOnFunction(F, DT, TTI, TLI);
195 }
196
197 if (!Changed)
198 return false;
199
200 // stripNonValidData asserts that shouldRewriteStatepointsIn
201 // returns true for at least one function in the module. Since at least
202 // one function changed, we know that the precondition is satisfied.
203 stripNonValidData(M);
204 return true;
205 }
206
getAnalysisUsage(AnalysisUsage & AU) const207 void getAnalysisUsage(AnalysisUsage &AU) const override {
208 // We add and rewrite a bunch of instructions, but don't really do much
209 // else. We could in theory preserve a lot more analyses here.
210 AU.addRequired<DominatorTreeWrapperPass>();
211 AU.addRequired<TargetTransformInfoWrapperPass>();
212 AU.addRequired<TargetLibraryInfoWrapperPass>();
213 }
214 };
215
216 } // end anonymous namespace
217
218 char RewriteStatepointsForGCLegacyPass::ID = 0;
219
createRewriteStatepointsForGCLegacyPass()220 ModulePass *llvm::createRewriteStatepointsForGCLegacyPass() {
221 return new RewriteStatepointsForGCLegacyPass();
222 }
223
224 INITIALIZE_PASS_BEGIN(RewriteStatepointsForGCLegacyPass,
225 "rewrite-statepoints-for-gc",
226 "Make relocations explicit at statepoints", false, false)
227 INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass)
228 INITIALIZE_PASS_DEPENDENCY(TargetTransformInfoWrapperPass)
229 INITIALIZE_PASS_END(RewriteStatepointsForGCLegacyPass,
230 "rewrite-statepoints-for-gc",
231 "Make relocations explicit at statepoints", false, false)
232
233 namespace {
234
235 struct GCPtrLivenessData {
236 /// Values defined in this block.
237 MapVector<BasicBlock *, SetVector<Value *>> KillSet;
238
239 /// Values used in this block (and thus live); does not included values
240 /// killed within this block.
241 MapVector<BasicBlock *, SetVector<Value *>> LiveSet;
242
243 /// Values live into this basic block (i.e. used by any
244 /// instruction in this basic block or ones reachable from here)
245 MapVector<BasicBlock *, SetVector<Value *>> LiveIn;
246
247 /// Values live out of this basic block (i.e. live into
248 /// any successor block)
249 MapVector<BasicBlock *, SetVector<Value *>> LiveOut;
250 };
251
252 // The type of the internal cache used inside the findBasePointers family
253 // of functions. From the callers perspective, this is an opaque type and
254 // should not be inspected.
255 //
256 // In the actual implementation this caches two relations:
257 // - The base relation itself (i.e. this pointer is based on that one)
258 // - The base defining value relation (i.e. before base_phi insertion)
259 // Generally, after the execution of a full findBasePointer call, only the
260 // base relation will remain. Internally, we add a mixture of the two
261 // types, then update all the second type to the first type
262 using DefiningValueMapTy = MapVector<Value *, Value *>;
263 using IsKnownBaseMapTy = MapVector<Value *, bool>;
264 using PointerToBaseTy = MapVector<Value *, Value *>;
265 using StatepointLiveSetTy = SetVector<Value *>;
266 using RematerializedValueMapTy =
267 MapVector<AssertingVH<Instruction>, AssertingVH<Value>>;
268
269 struct PartiallyConstructedSafepointRecord {
270 /// The set of values known to be live across this safepoint
271 StatepointLiveSetTy LiveSet;
272
273 /// The *new* gc.statepoint instruction itself. This produces the token
274 /// that normal path gc.relocates and the gc.result are tied to.
275 GCStatepointInst *StatepointToken;
276
277 /// Instruction to which exceptional gc relocates are attached
278 /// Makes it easier to iterate through them during relocationViaAlloca.
279 Instruction *UnwindToken;
280
281 /// Record live values we are rematerialized instead of relocating.
282 /// They are not included into 'LiveSet' field.
283 /// Maps rematerialized copy to it's original value.
284 RematerializedValueMapTy RematerializedValues;
285 };
286
287 struct RematerizlizationCandidateRecord {
288 // Chain from derived pointer to base.
289 SmallVector<Instruction *, 3> ChainToBase;
290 // Original base.
291 Value *RootOfChain;
292 // Cost of chain.
293 InstructionCost Cost;
294 };
295 using RematCandTy = MapVector<Value *, RematerizlizationCandidateRecord>;
296
297 } // end anonymous namespace
298
GetDeoptBundleOperands(const CallBase * Call)299 static ArrayRef<Use> GetDeoptBundleOperands(const CallBase *Call) {
300 std::optional<OperandBundleUse> DeoptBundle =
301 Call->getOperandBundle(LLVMContext::OB_deopt);
302
303 if (!DeoptBundle) {
304 assert(AllowStatepointWithNoDeoptInfo &&
305 "Found non-leaf call without deopt info!");
306 return std::nullopt;
307 }
308
309 return DeoptBundle->Inputs;
310 }
311
312 /// Compute the live-in set for every basic block in the function
313 static void computeLiveInValues(DominatorTree &DT, Function &F,
314 GCPtrLivenessData &Data);
315
316 /// Given results from the dataflow liveness computation, find the set of live
317 /// Values at a particular instruction.
318 static void findLiveSetAtInst(Instruction *inst, GCPtrLivenessData &Data,
319 StatepointLiveSetTy &out);
320
321 // TODO: Once we can get to the GCStrategy, this becomes
322 // std::optional<bool> isGCManagedPointer(const Type *Ty) const override {
323
isGCPointerType(Type * T)324 static bool isGCPointerType(Type *T) {
325 if (auto *PT = dyn_cast<PointerType>(T))
326 // For the sake of this example GC, we arbitrarily pick addrspace(1) as our
327 // GC managed heap. We know that a pointer into this heap needs to be
328 // updated and that no other pointer does.
329 return PT->getAddressSpace() == 1;
330 return false;
331 }
332
333 // Return true if this type is one which a) is a gc pointer or contains a GC
334 // pointer and b) is of a type this code expects to encounter as a live value.
335 // (The insertion code will assert that a type which matches (a) and not (b)
336 // is not encountered.)
isHandledGCPointerType(Type * T)337 static bool isHandledGCPointerType(Type *T) {
338 // We fully support gc pointers
339 if (isGCPointerType(T))
340 return true;
341 // We partially support vectors of gc pointers. The code will assert if it
342 // can't handle something.
343 if (auto VT = dyn_cast<VectorType>(T))
344 if (isGCPointerType(VT->getElementType()))
345 return true;
346 return false;
347 }
348
349 #ifndef NDEBUG
350 /// Returns true if this type contains a gc pointer whether we know how to
351 /// handle that type or not.
containsGCPtrType(Type * Ty)352 static bool containsGCPtrType(Type *Ty) {
353 if (isGCPointerType(Ty))
354 return true;
355 if (VectorType *VT = dyn_cast<VectorType>(Ty))
356 return isGCPointerType(VT->getScalarType());
357 if (ArrayType *AT = dyn_cast<ArrayType>(Ty))
358 return containsGCPtrType(AT->getElementType());
359 if (StructType *ST = dyn_cast<StructType>(Ty))
360 return llvm::any_of(ST->elements(), containsGCPtrType);
361 return false;
362 }
363
364 // Returns true if this is a type which a) is a gc pointer or contains a GC
365 // pointer and b) is of a type which the code doesn't expect (i.e. first class
366 // aggregates). Used to trip assertions.
isUnhandledGCPointerType(Type * Ty)367 static bool isUnhandledGCPointerType(Type *Ty) {
368 return containsGCPtrType(Ty) && !isHandledGCPointerType(Ty);
369 }
370 #endif
371
372 // Return the name of the value suffixed with the provided value, or if the
373 // value didn't have a name, the default value specified.
suffixed_name_or(Value * V,StringRef Suffix,StringRef DefaultName)374 static std::string suffixed_name_or(Value *V, StringRef Suffix,
375 StringRef DefaultName) {
376 return V->hasName() ? (V->getName() + Suffix).str() : DefaultName.str();
377 }
378
379 // Conservatively identifies any definitions which might be live at the
380 // given instruction. The analysis is performed immediately before the
381 // given instruction. Values defined by that instruction are not considered
382 // live. Values used by that instruction are considered live.
analyzeParsePointLiveness(DominatorTree & DT,GCPtrLivenessData & OriginalLivenessData,CallBase * Call,PartiallyConstructedSafepointRecord & Result)383 static void analyzeParsePointLiveness(
384 DominatorTree &DT, GCPtrLivenessData &OriginalLivenessData, CallBase *Call,
385 PartiallyConstructedSafepointRecord &Result) {
386 StatepointLiveSetTy LiveSet;
387 findLiveSetAtInst(Call, OriginalLivenessData, LiveSet);
388
389 if (PrintLiveSet) {
390 dbgs() << "Live Variables:\n";
391 for (Value *V : LiveSet)
392 dbgs() << " " << V->getName() << " " << *V << "\n";
393 }
394 if (PrintLiveSetSize) {
395 dbgs() << "Safepoint For: " << Call->getCalledOperand()->getName() << "\n";
396 dbgs() << "Number live values: " << LiveSet.size() << "\n";
397 }
398 Result.LiveSet = LiveSet;
399 }
400
401 /// Returns true if V is a known base.
402 static bool isKnownBase(Value *V, const IsKnownBaseMapTy &KnownBases);
403
404 /// Caches the IsKnownBase flag for a value and asserts that it wasn't present
405 /// in the cache before.
406 static void setKnownBase(Value *V, bool IsKnownBase,
407 IsKnownBaseMapTy &KnownBases);
408
409 static Value *findBaseDefiningValue(Value *I, DefiningValueMapTy &Cache,
410 IsKnownBaseMapTy &KnownBases);
411
412 /// Return a base defining value for the 'Index' element of the given vector
413 /// instruction 'I'. If Index is null, returns a BDV for the entire vector
414 /// 'I'. As an optimization, this method will try to determine when the
415 /// element is known to already be a base pointer. If this can be established,
416 /// the second value in the returned pair will be true. Note that either a
417 /// vector or a pointer typed value can be returned. For the former, the
418 /// vector returned is a BDV (and possibly a base) of the entire vector 'I'.
419 /// If the later, the return pointer is a BDV (or possibly a base) for the
420 /// particular element in 'I'.
findBaseDefiningValueOfVector(Value * I,DefiningValueMapTy & Cache,IsKnownBaseMapTy & KnownBases)421 static Value *findBaseDefiningValueOfVector(Value *I, DefiningValueMapTy &Cache,
422 IsKnownBaseMapTy &KnownBases) {
423 // Each case parallels findBaseDefiningValue below, see that code for
424 // detailed motivation.
425
426 auto Cached = Cache.find(I);
427 if (Cached != Cache.end())
428 return Cached->second;
429
430 if (isa<Argument>(I)) {
431 // An incoming argument to the function is a base pointer
432 Cache[I] = I;
433 setKnownBase(I, /* IsKnownBase */true, KnownBases);
434 return I;
435 }
436
437 if (isa<Constant>(I)) {
438 // Base of constant vector consists only of constant null pointers.
439 // For reasoning see similar case inside 'findBaseDefiningValue' function.
440 auto *CAZ = ConstantAggregateZero::get(I->getType());
441 Cache[I] = CAZ;
442 setKnownBase(CAZ, /* IsKnownBase */true, KnownBases);
443 return CAZ;
444 }
445
446 if (isa<LoadInst>(I)) {
447 Cache[I] = I;
448 setKnownBase(I, /* IsKnownBase */true, KnownBases);
449 return I;
450 }
451
452 if (isa<InsertElementInst>(I)) {
453 // We don't know whether this vector contains entirely base pointers or
454 // not. To be conservatively correct, we treat it as a BDV and will
455 // duplicate code as needed to construct a parallel vector of bases.
456 Cache[I] = I;
457 setKnownBase(I, /* IsKnownBase */false, KnownBases);
458 return I;
459 }
460
461 if (isa<ShuffleVectorInst>(I)) {
462 // We don't know whether this vector contains entirely base pointers or
463 // not. To be conservatively correct, we treat it as a BDV and will
464 // duplicate code as needed to construct a parallel vector of bases.
465 // TODO: There a number of local optimizations which could be applied here
466 // for particular sufflevector patterns.
467 Cache[I] = I;
468 setKnownBase(I, /* IsKnownBase */false, KnownBases);
469 return I;
470 }
471
472 // The behavior of getelementptr instructions is the same for vector and
473 // non-vector data types.
474 if (auto *GEP = dyn_cast<GetElementPtrInst>(I)) {
475 auto *BDV =
476 findBaseDefiningValue(GEP->getPointerOperand(), Cache, KnownBases);
477 Cache[GEP] = BDV;
478 return BDV;
479 }
480
481 // The behavior of freeze instructions is the same for vector and
482 // non-vector data types.
483 if (auto *Freeze = dyn_cast<FreezeInst>(I)) {
484 auto *BDV = findBaseDefiningValue(Freeze->getOperand(0), Cache, KnownBases);
485 Cache[Freeze] = BDV;
486 return BDV;
487 }
488
489 // If the pointer comes through a bitcast of a vector of pointers to
490 // a vector of another type of pointer, then look through the bitcast
491 if (auto *BC = dyn_cast<BitCastInst>(I)) {
492 auto *BDV = findBaseDefiningValue(BC->getOperand(0), Cache, KnownBases);
493 Cache[BC] = BDV;
494 return BDV;
495 }
496
497 // We assume that functions in the source language only return base
498 // pointers. This should probably be generalized via attributes to support
499 // both source language and internal functions.
500 if (isa<CallInst>(I) || isa<InvokeInst>(I)) {
501 Cache[I] = I;
502 setKnownBase(I, /* IsKnownBase */true, KnownBases);
503 return I;
504 }
505
506 // A PHI or Select is a base defining value. The outer findBasePointer
507 // algorithm is responsible for constructing a base value for this BDV.
508 assert((isa<SelectInst>(I) || isa<PHINode>(I)) &&
509 "unknown vector instruction - no base found for vector element");
510 Cache[I] = I;
511 setKnownBase(I, /* IsKnownBase */false, KnownBases);
512 return I;
513 }
514
515 /// Helper function for findBasePointer - Will return a value which either a)
516 /// defines the base pointer for the input, b) blocks the simple search
517 /// (i.e. a PHI or Select of two derived pointers), or c) involves a change
518 /// from pointer to vector type or back.
findBaseDefiningValue(Value * I,DefiningValueMapTy & Cache,IsKnownBaseMapTy & KnownBases)519 static Value *findBaseDefiningValue(Value *I, DefiningValueMapTy &Cache,
520 IsKnownBaseMapTy &KnownBases) {
521 assert(I->getType()->isPtrOrPtrVectorTy() &&
522 "Illegal to ask for the base pointer of a non-pointer type");
523 auto Cached = Cache.find(I);
524 if (Cached != Cache.end())
525 return Cached->second;
526
527 if (I->getType()->isVectorTy())
528 return findBaseDefiningValueOfVector(I, Cache, KnownBases);
529
530 if (isa<Argument>(I)) {
531 // An incoming argument to the function is a base pointer
532 // We should have never reached here if this argument isn't an gc value
533 Cache[I] = I;
534 setKnownBase(I, /* IsKnownBase */true, KnownBases);
535 return I;
536 }
537
538 if (isa<Constant>(I)) {
539 // We assume that objects with a constant base (e.g. a global) can't move
540 // and don't need to be reported to the collector because they are always
541 // live. Besides global references, all kinds of constants (e.g. undef,
542 // constant expressions, null pointers) can be introduced by the inliner or
543 // the optimizer, especially on dynamically dead paths.
544 // Here we treat all of them as having single null base. By doing this we
545 // trying to avoid problems reporting various conflicts in a form of
546 // "phi (const1, const2)" or "phi (const, regular gc ptr)".
547 // See constant.ll file for relevant test cases.
548
549 auto *CPN = ConstantPointerNull::get(cast<PointerType>(I->getType()));
550 Cache[I] = CPN;
551 setKnownBase(CPN, /* IsKnownBase */true, KnownBases);
552 return CPN;
553 }
554
555 // inttoptrs in an integral address space are currently ill-defined. We
556 // treat them as defining base pointers here for consistency with the
557 // constant rule above and because we don't really have a better semantic
558 // to give them. Note that the optimizer is always free to insert undefined
559 // behavior on dynamically dead paths as well.
560 if (isa<IntToPtrInst>(I)) {
561 Cache[I] = I;
562 setKnownBase(I, /* IsKnownBase */true, KnownBases);
563 return I;
564 }
565
566 if (CastInst *CI = dyn_cast<CastInst>(I)) {
567 Value *Def = CI->stripPointerCasts();
568 // If stripping pointer casts changes the address space there is an
569 // addrspacecast in between.
570 assert(cast<PointerType>(Def->getType())->getAddressSpace() ==
571 cast<PointerType>(CI->getType())->getAddressSpace() &&
572 "unsupported addrspacecast");
573 // If we find a cast instruction here, it means we've found a cast which is
574 // not simply a pointer cast (i.e. an inttoptr). We don't know how to
575 // handle int->ptr conversion.
576 assert(!isa<CastInst>(Def) && "shouldn't find another cast here");
577 auto *BDV = findBaseDefiningValue(Def, Cache, KnownBases);
578 Cache[CI] = BDV;
579 return BDV;
580 }
581
582 if (isa<LoadInst>(I)) {
583 // The value loaded is an gc base itself
584 Cache[I] = I;
585 setKnownBase(I, /* IsKnownBase */true, KnownBases);
586 return I;
587 }
588
589 if (GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(I)) {
590 // The base of this GEP is the base
591 auto *BDV =
592 findBaseDefiningValue(GEP->getPointerOperand(), Cache, KnownBases);
593 Cache[GEP] = BDV;
594 return BDV;
595 }
596
597 if (auto *Freeze = dyn_cast<FreezeInst>(I)) {
598 auto *BDV = findBaseDefiningValue(Freeze->getOperand(0), Cache, KnownBases);
599 Cache[Freeze] = BDV;
600 return BDV;
601 }
602
603 if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(I)) {
604 switch (II->getIntrinsicID()) {
605 default:
606 // fall through to general call handling
607 break;
608 case Intrinsic::experimental_gc_statepoint:
609 llvm_unreachable("statepoints don't produce pointers");
610 case Intrinsic::experimental_gc_relocate:
611 // Rerunning safepoint insertion after safepoints are already
612 // inserted is not supported. It could probably be made to work,
613 // but why are you doing this? There's no good reason.
614 llvm_unreachable("repeat safepoint insertion is not supported");
615 case Intrinsic::gcroot:
616 // Currently, this mechanism hasn't been extended to work with gcroot.
617 // There's no reason it couldn't be, but I haven't thought about the
618 // implications much.
619 llvm_unreachable(
620 "interaction with the gcroot mechanism is not supported");
621 case Intrinsic::experimental_gc_get_pointer_base:
622 auto *BDV = findBaseDefiningValue(II->getOperand(0), Cache, KnownBases);
623 Cache[II] = BDV;
624 return BDV;
625 }
626 }
627 // We assume that functions in the source language only return base
628 // pointers. This should probably be generalized via attributes to support
629 // both source language and internal functions.
630 if (isa<CallInst>(I) || isa<InvokeInst>(I)) {
631 Cache[I] = I;
632 setKnownBase(I, /* IsKnownBase */true, KnownBases);
633 return I;
634 }
635
636 // TODO: I have absolutely no idea how to implement this part yet. It's not
637 // necessarily hard, I just haven't really looked at it yet.
638 assert(!isa<LandingPadInst>(I) && "Landing Pad is unimplemented");
639
640 if (isa<AtomicCmpXchgInst>(I)) {
641 // A CAS is effectively a atomic store and load combined under a
642 // predicate. From the perspective of base pointers, we just treat it
643 // like a load.
644 Cache[I] = I;
645 setKnownBase(I, /* IsKnownBase */true, KnownBases);
646 return I;
647 }
648
649 assert(!isa<AtomicRMWInst>(I) && "Xchg handled above, all others are "
650 "binary ops which don't apply to pointers");
651
652 // The aggregate ops. Aggregates can either be in the heap or on the
653 // stack, but in either case, this is simply a field load. As a result,
654 // this is a defining definition of the base just like a load is.
655 if (isa<ExtractValueInst>(I)) {
656 Cache[I] = I;
657 setKnownBase(I, /* IsKnownBase */true, KnownBases);
658 return I;
659 }
660
661 // We should never see an insert vector since that would require we be
662 // tracing back a struct value not a pointer value.
663 assert(!isa<InsertValueInst>(I) &&
664 "Base pointer for a struct is meaningless");
665
666 // This value might have been generated by findBasePointer() called when
667 // substituting gc.get.pointer.base() intrinsic.
668 bool IsKnownBase =
669 isa<Instruction>(I) && cast<Instruction>(I)->getMetadata("is_base_value");
670 setKnownBase(I, /* IsKnownBase */IsKnownBase, KnownBases);
671 Cache[I] = I;
672
673 // An extractelement produces a base result exactly when it's input does.
674 // We may need to insert a parallel instruction to extract the appropriate
675 // element out of the base vector corresponding to the input. Given this,
676 // it's analogous to the phi and select case even though it's not a merge.
677 if (isa<ExtractElementInst>(I))
678 // Note: There a lot of obvious peephole cases here. This are deliberately
679 // handled after the main base pointer inference algorithm to make writing
680 // test cases to exercise that code easier.
681 return I;
682
683 // The last two cases here don't return a base pointer. Instead, they
684 // return a value which dynamically selects from among several base
685 // derived pointers (each with it's own base potentially). It's the job of
686 // the caller to resolve these.
687 assert((isa<SelectInst>(I) || isa<PHINode>(I)) &&
688 "missing instruction case in findBaseDefiningValue");
689 return I;
690 }
691
692 /// Returns the base defining value for this value.
findBaseDefiningValueCached(Value * I,DefiningValueMapTy & Cache,IsKnownBaseMapTy & KnownBases)693 static Value *findBaseDefiningValueCached(Value *I, DefiningValueMapTy &Cache,
694 IsKnownBaseMapTy &KnownBases) {
695 if (Cache.find(I) == Cache.end()) {
696 auto *BDV = findBaseDefiningValue(I, Cache, KnownBases);
697 Cache[I] = BDV;
698 LLVM_DEBUG(dbgs() << "fBDV-cached: " << I->getName() << " -> "
699 << Cache[I]->getName() << ", is known base = "
700 << KnownBases[I] << "\n");
701 }
702 assert(Cache[I] != nullptr);
703 assert(KnownBases.find(Cache[I]) != KnownBases.end() &&
704 "Cached value must be present in known bases map");
705 return Cache[I];
706 }
707
708 /// Return a base pointer for this value if known. Otherwise, return it's
709 /// base defining value.
findBaseOrBDV(Value * I,DefiningValueMapTy & Cache,IsKnownBaseMapTy & KnownBases)710 static Value *findBaseOrBDV(Value *I, DefiningValueMapTy &Cache,
711 IsKnownBaseMapTy &KnownBases) {
712 Value *Def = findBaseDefiningValueCached(I, Cache, KnownBases);
713 auto Found = Cache.find(Def);
714 if (Found != Cache.end()) {
715 // Either a base-of relation, or a self reference. Caller must check.
716 return Found->second;
717 }
718 // Only a BDV available
719 return Def;
720 }
721
722 #ifndef NDEBUG
723 /// This value is a base pointer that is not generated by RS4GC, i.e. it already
724 /// exists in the code.
isOriginalBaseResult(Value * V)725 static bool isOriginalBaseResult(Value *V) {
726 // no recursion possible
727 return !isa<PHINode>(V) && !isa<SelectInst>(V) &&
728 !isa<ExtractElementInst>(V) && !isa<InsertElementInst>(V) &&
729 !isa<ShuffleVectorInst>(V);
730 }
731 #endif
732
isKnownBase(Value * V,const IsKnownBaseMapTy & KnownBases)733 static bool isKnownBase(Value *V, const IsKnownBaseMapTy &KnownBases) {
734 auto It = KnownBases.find(V);
735 assert(It != KnownBases.end() && "Value not present in the map");
736 return It->second;
737 }
738
setKnownBase(Value * V,bool IsKnownBase,IsKnownBaseMapTy & KnownBases)739 static void setKnownBase(Value *V, bool IsKnownBase,
740 IsKnownBaseMapTy &KnownBases) {
741 #ifndef NDEBUG
742 auto It = KnownBases.find(V);
743 if (It != KnownBases.end())
744 assert(It->second == IsKnownBase && "Changing already present value");
745 #endif
746 KnownBases[V] = IsKnownBase;
747 }
748
749 // Returns true if First and Second values are both scalar or both vector.
areBothVectorOrScalar(Value * First,Value * Second)750 static bool areBothVectorOrScalar(Value *First, Value *Second) {
751 return isa<VectorType>(First->getType()) ==
752 isa<VectorType>(Second->getType());
753 }
754
755 namespace {
756
757 /// Models the state of a single base defining value in the findBasePointer
758 /// algorithm for determining where a new instruction is needed to propagate
759 /// the base of this BDV.
760 class BDVState {
761 public:
762 enum StatusTy {
763 // Starting state of lattice
764 Unknown,
765 // Some specific base value -- does *not* mean that instruction
766 // propagates the base of the object
767 // ex: gep %arg, 16 -> %arg is the base value
768 Base,
769 // Need to insert a node to represent a merge.
770 Conflict
771 };
772
BDVState()773 BDVState() {
774 llvm_unreachable("missing state in map");
775 }
776
BDVState(Value * OriginalValue)777 explicit BDVState(Value *OriginalValue)
778 : OriginalValue(OriginalValue) {}
BDVState(Value * OriginalValue,StatusTy Status,Value * BaseValue=nullptr)779 explicit BDVState(Value *OriginalValue, StatusTy Status, Value *BaseValue = nullptr)
780 : OriginalValue(OriginalValue), Status(Status), BaseValue(BaseValue) {
781 assert(Status != Base || BaseValue);
782 }
783
getStatus() const784 StatusTy getStatus() const { return Status; }
getOriginalValue() const785 Value *getOriginalValue() const { return OriginalValue; }
getBaseValue() const786 Value *getBaseValue() const { return BaseValue; }
787
isBase() const788 bool isBase() const { return getStatus() == Base; }
isUnknown() const789 bool isUnknown() const { return getStatus() == Unknown; }
isConflict() const790 bool isConflict() const { return getStatus() == Conflict; }
791
792 // Values of type BDVState form a lattice, and this function implements the
793 // meet
794 // operation.
meet(const BDVState & Other)795 void meet(const BDVState &Other) {
796 auto markConflict = [&]() {
797 Status = BDVState::Conflict;
798 BaseValue = nullptr;
799 };
800 // Conflict is a final state.
801 if (isConflict())
802 return;
803 // if we are not known - just take other state.
804 if (isUnknown()) {
805 Status = Other.getStatus();
806 BaseValue = Other.getBaseValue();
807 return;
808 }
809 // We are base.
810 assert(isBase() && "Unknown state");
811 // If other is unknown - just keep our state.
812 if (Other.isUnknown())
813 return;
814 // If other is conflict - it is a final state.
815 if (Other.isConflict())
816 return markConflict();
817 // Other is base as well.
818 assert(Other.isBase() && "Unknown state");
819 // If bases are different - Conflict.
820 if (getBaseValue() != Other.getBaseValue())
821 return markConflict();
822 // We are identical, do nothing.
823 }
824
operator ==(const BDVState & Other) const825 bool operator==(const BDVState &Other) const {
826 return OriginalValue == Other.OriginalValue && BaseValue == Other.BaseValue &&
827 Status == Other.Status;
828 }
829
operator !=(const BDVState & other) const830 bool operator!=(const BDVState &other) const { return !(*this == other); }
831
832 LLVM_DUMP_METHOD
dump() const833 void dump() const {
834 print(dbgs());
835 dbgs() << '\n';
836 }
837
print(raw_ostream & OS) const838 void print(raw_ostream &OS) const {
839 switch (getStatus()) {
840 case Unknown:
841 OS << "U";
842 break;
843 case Base:
844 OS << "B";
845 break;
846 case Conflict:
847 OS << "C";
848 break;
849 }
850 OS << " (base " << getBaseValue() << " - "
851 << (getBaseValue() ? getBaseValue()->getName() : "nullptr") << ")"
852 << " for " << OriginalValue->getName() << ":";
853 }
854
855 private:
856 AssertingVH<Value> OriginalValue; // instruction this state corresponds to
857 StatusTy Status = Unknown;
858 AssertingVH<Value> BaseValue = nullptr; // Non-null only if Status == Base.
859 };
860
861 } // end anonymous namespace
862
863 #ifndef NDEBUG
operator <<(raw_ostream & OS,const BDVState & State)864 static raw_ostream &operator<<(raw_ostream &OS, const BDVState &State) {
865 State.print(OS);
866 return OS;
867 }
868 #endif
869
870 /// For a given value or instruction, figure out what base ptr its derived from.
871 /// For gc objects, this is simply itself. On success, returns a value which is
872 /// the base pointer. (This is reliable and can be used for relocation.) On
873 /// failure, returns nullptr.
findBasePointer(Value * I,DefiningValueMapTy & Cache,IsKnownBaseMapTy & KnownBases)874 static Value *findBasePointer(Value *I, DefiningValueMapTy &Cache,
875 IsKnownBaseMapTy &KnownBases) {
876 Value *Def = findBaseOrBDV(I, Cache, KnownBases);
877
878 if (isKnownBase(Def, KnownBases) && areBothVectorOrScalar(Def, I))
879 return Def;
880
881 // Here's the rough algorithm:
882 // - For every SSA value, construct a mapping to either an actual base
883 // pointer or a PHI which obscures the base pointer.
884 // - Construct a mapping from PHI to unknown TOP state. Use an
885 // optimistic algorithm to propagate base pointer information. Lattice
886 // looks like:
887 // UNKNOWN
888 // b1 b2 b3 b4
889 // CONFLICT
890 // When algorithm terminates, all PHIs will either have a single concrete
891 // base or be in a conflict state.
892 // - For every conflict, insert a dummy PHI node without arguments. Add
893 // these to the base[Instruction] = BasePtr mapping. For every
894 // non-conflict, add the actual base.
895 // - For every conflict, add arguments for the base[a] of each input
896 // arguments.
897 //
898 // Note: A simpler form of this would be to add the conflict form of all
899 // PHIs without running the optimistic algorithm. This would be
900 // analogous to pessimistic data flow and would likely lead to an
901 // overall worse solution.
902
903 #ifndef NDEBUG
904 auto isExpectedBDVType = [](Value *BDV) {
905 return isa<PHINode>(BDV) || isa<SelectInst>(BDV) ||
906 isa<ExtractElementInst>(BDV) || isa<InsertElementInst>(BDV) ||
907 isa<ShuffleVectorInst>(BDV);
908 };
909 #endif
910
911 // Once populated, will contain a mapping from each potentially non-base BDV
912 // to a lattice value (described above) which corresponds to that BDV.
913 // We use the order of insertion (DFS over the def/use graph) to provide a
914 // stable deterministic ordering for visiting DenseMaps (which are unordered)
915 // below. This is important for deterministic compilation.
916 MapVector<Value *, BDVState> States;
917
918 #ifndef NDEBUG
919 auto VerifyStates = [&]() {
920 for (auto &Entry : States) {
921 assert(Entry.first == Entry.second.getOriginalValue());
922 }
923 };
924 #endif
925
926 auto visitBDVOperands = [](Value *BDV, std::function<void (Value*)> F) {
927 if (PHINode *PN = dyn_cast<PHINode>(BDV)) {
928 for (Value *InVal : PN->incoming_values())
929 F(InVal);
930 } else if (SelectInst *SI = dyn_cast<SelectInst>(BDV)) {
931 F(SI->getTrueValue());
932 F(SI->getFalseValue());
933 } else if (auto *EE = dyn_cast<ExtractElementInst>(BDV)) {
934 F(EE->getVectorOperand());
935 } else if (auto *IE = dyn_cast<InsertElementInst>(BDV)) {
936 F(IE->getOperand(0));
937 F(IE->getOperand(1));
938 } else if (auto *SV = dyn_cast<ShuffleVectorInst>(BDV)) {
939 // For a canonical broadcast, ignore the undef argument
940 // (without this, we insert a parallel base shuffle for every broadcast)
941 F(SV->getOperand(0));
942 if (!SV->isZeroEltSplat())
943 F(SV->getOperand(1));
944 } else {
945 llvm_unreachable("unexpected BDV type");
946 }
947 };
948
949
950 // Recursively fill in all base defining values reachable from the initial
951 // one for which we don't already know a definite base value for
952 /* scope */ {
953 SmallVector<Value*, 16> Worklist;
954 Worklist.push_back(Def);
955 States.insert({Def, BDVState(Def)});
956 while (!Worklist.empty()) {
957 Value *Current = Worklist.pop_back_val();
958 assert(!isOriginalBaseResult(Current) && "why did it get added?");
959
960 auto visitIncomingValue = [&](Value *InVal) {
961 Value *Base = findBaseOrBDV(InVal, Cache, KnownBases);
962 if (isKnownBase(Base, KnownBases) && areBothVectorOrScalar(Base, InVal))
963 // Known bases won't need new instructions introduced and can be
964 // ignored safely. However, this can only be done when InVal and Base
965 // are both scalar or both vector. Otherwise, we need to find a
966 // correct BDV for InVal, by creating an entry in the lattice
967 // (States).
968 return;
969 assert(isExpectedBDVType(Base) && "the only non-base values "
970 "we see should be base defining values");
971 if (States.insert(std::make_pair(Base, BDVState(Base))).second)
972 Worklist.push_back(Base);
973 };
974
975 visitBDVOperands(Current, visitIncomingValue);
976 }
977 }
978
979 #ifndef NDEBUG
980 VerifyStates();
981 LLVM_DEBUG(dbgs() << "States after initialization:\n");
982 for (const auto &Pair : States) {
983 LLVM_DEBUG(dbgs() << " " << Pair.second << " for " << *Pair.first << "\n");
984 }
985 #endif
986
987 // Iterate forward through the value graph pruning any node from the state
988 // list where all of the inputs are base pointers. The purpose of this is to
989 // reuse existing values when the derived pointer we were asked to materialize
990 // a base pointer for happens to be a base pointer itself. (Or a sub-graph
991 // feeding it does.)
992 SmallVector<Value *> ToRemove;
993 do {
994 ToRemove.clear();
995 for (auto Pair : States) {
996 Value *BDV = Pair.first;
997 auto canPruneInput = [&](Value *V) {
998 // If the input of the BDV is the BDV itself we can prune it. This is
999 // only possible if the BDV is a PHI node.
1000 if (V->stripPointerCasts() == BDV)
1001 return true;
1002 Value *VBDV = findBaseOrBDV(V, Cache, KnownBases);
1003 if (V->stripPointerCasts() != VBDV)
1004 return false;
1005 // The assumption is that anything not in the state list is
1006 // propagates a base pointer.
1007 return States.count(VBDV) == 0;
1008 };
1009
1010 bool CanPrune = true;
1011 visitBDVOperands(BDV, [&](Value *Op) {
1012 CanPrune = CanPrune && canPruneInput(Op);
1013 });
1014 if (CanPrune)
1015 ToRemove.push_back(BDV);
1016 }
1017 for (Value *V : ToRemove) {
1018 States.erase(V);
1019 // Cache the fact V is it's own base for later usage.
1020 Cache[V] = V;
1021 }
1022 } while (!ToRemove.empty());
1023
1024 // Did we manage to prove that Def itself must be a base pointer?
1025 if (!States.count(Def))
1026 return Def;
1027
1028 // Return a phi state for a base defining value. We'll generate a new
1029 // base state for known bases and expect to find a cached state otherwise.
1030 auto GetStateForBDV = [&](Value *BaseValue, Value *Input) {
1031 auto I = States.find(BaseValue);
1032 if (I != States.end())
1033 return I->second;
1034 assert(areBothVectorOrScalar(BaseValue, Input));
1035 return BDVState(BaseValue, BDVState::Base, BaseValue);
1036 };
1037
1038 bool Progress = true;
1039 while (Progress) {
1040 #ifndef NDEBUG
1041 const size_t OldSize = States.size();
1042 #endif
1043 Progress = false;
1044 // We're only changing values in this loop, thus safe to keep iterators.
1045 // Since this is computing a fixed point, the order of visit does not
1046 // effect the result. TODO: We could use a worklist here and make this run
1047 // much faster.
1048 for (auto Pair : States) {
1049 Value *BDV = Pair.first;
1050 // Only values that do not have known bases or those that have differing
1051 // type (scalar versus vector) from a possible known base should be in the
1052 // lattice.
1053 assert((!isKnownBase(BDV, KnownBases) ||
1054 !areBothVectorOrScalar(BDV, Pair.second.getBaseValue())) &&
1055 "why did it get added?");
1056
1057 BDVState NewState(BDV);
1058 visitBDVOperands(BDV, [&](Value *Op) {
1059 Value *BDV = findBaseOrBDV(Op, Cache, KnownBases);
1060 auto OpState = GetStateForBDV(BDV, Op);
1061 NewState.meet(OpState);
1062 });
1063
1064 BDVState OldState = States[BDV];
1065 if (OldState != NewState) {
1066 Progress = true;
1067 States[BDV] = NewState;
1068 }
1069 }
1070
1071 assert(OldSize == States.size() &&
1072 "fixed point shouldn't be adding any new nodes to state");
1073 }
1074
1075 #ifndef NDEBUG
1076 VerifyStates();
1077 LLVM_DEBUG(dbgs() << "States after meet iteration:\n");
1078 for (const auto &Pair : States) {
1079 LLVM_DEBUG(dbgs() << " " << Pair.second << " for " << *Pair.first << "\n");
1080 }
1081 #endif
1082
1083 // Handle all instructions that have a vector BDV, but the instruction itself
1084 // is of scalar type.
1085 for (auto Pair : States) {
1086 Instruction *I = cast<Instruction>(Pair.first);
1087 BDVState State = Pair.second;
1088 auto *BaseValue = State.getBaseValue();
1089 // Only values that do not have known bases or those that have differing
1090 // type (scalar versus vector) from a possible known base should be in the
1091 // lattice.
1092 assert(
1093 (!isKnownBase(I, KnownBases) || !areBothVectorOrScalar(I, BaseValue)) &&
1094 "why did it get added?");
1095 assert(!State.isUnknown() && "Optimistic algorithm didn't complete!");
1096
1097 if (!State.isBase() || !isa<VectorType>(BaseValue->getType()))
1098 continue;
1099 // extractelement instructions are a bit special in that we may need to
1100 // insert an extract even when we know an exact base for the instruction.
1101 // The problem is that we need to convert from a vector base to a scalar
1102 // base for the particular indice we're interested in.
1103 if (isa<ExtractElementInst>(I)) {
1104 auto *EE = cast<ExtractElementInst>(I);
1105 // TODO: In many cases, the new instruction is just EE itself. We should
1106 // exploit this, but can't do it here since it would break the invariant
1107 // about the BDV not being known to be a base.
1108 auto *BaseInst = ExtractElementInst::Create(
1109 State.getBaseValue(), EE->getIndexOperand(), "base_ee", EE);
1110 BaseInst->setMetadata("is_base_value", MDNode::get(I->getContext(), {}));
1111 States[I] = BDVState(I, BDVState::Base, BaseInst);
1112 setKnownBase(BaseInst, /* IsKnownBase */true, KnownBases);
1113 } else if (!isa<VectorType>(I->getType())) {
1114 // We need to handle cases that have a vector base but the instruction is
1115 // a scalar type (these could be phis or selects or any instruction that
1116 // are of scalar type, but the base can be a vector type). We
1117 // conservatively set this as conflict. Setting the base value for these
1118 // conflicts is handled in the next loop which traverses States.
1119 States[I] = BDVState(I, BDVState::Conflict);
1120 }
1121 }
1122
1123 #ifndef NDEBUG
1124 VerifyStates();
1125 #endif
1126
1127 // Insert Phis for all conflicts
1128 // TODO: adjust naming patterns to avoid this order of iteration dependency
1129 for (auto Pair : States) {
1130 Instruction *I = cast<Instruction>(Pair.first);
1131 BDVState State = Pair.second;
1132 // Only values that do not have known bases or those that have differing
1133 // type (scalar versus vector) from a possible known base should be in the
1134 // lattice.
1135 assert((!isKnownBase(I, KnownBases) ||
1136 !areBothVectorOrScalar(I, State.getBaseValue())) &&
1137 "why did it get added?");
1138 assert(!State.isUnknown() && "Optimistic algorithm didn't complete!");
1139
1140 // Since we're joining a vector and scalar base, they can never be the
1141 // same. As a result, we should always see insert element having reached
1142 // the conflict state.
1143 assert(!isa<InsertElementInst>(I) || State.isConflict());
1144
1145 if (!State.isConflict())
1146 continue;
1147
1148 auto getMangledName = [](Instruction *I) -> std::string {
1149 if (isa<PHINode>(I)) {
1150 return suffixed_name_or(I, ".base", "base_phi");
1151 } else if (isa<SelectInst>(I)) {
1152 return suffixed_name_or(I, ".base", "base_select");
1153 } else if (isa<ExtractElementInst>(I)) {
1154 return suffixed_name_or(I, ".base", "base_ee");
1155 } else if (isa<InsertElementInst>(I)) {
1156 return suffixed_name_or(I, ".base", "base_ie");
1157 } else {
1158 return suffixed_name_or(I, ".base", "base_sv");
1159 }
1160 };
1161
1162 Instruction *BaseInst = I->clone();
1163 BaseInst->insertBefore(I);
1164 BaseInst->setName(getMangledName(I));
1165 // Add metadata marking this as a base value
1166 BaseInst->setMetadata("is_base_value", MDNode::get(I->getContext(), {}));
1167 States[I] = BDVState(I, BDVState::Conflict, BaseInst);
1168 setKnownBase(BaseInst, /* IsKnownBase */true, KnownBases);
1169 }
1170
1171 #ifndef NDEBUG
1172 VerifyStates();
1173 #endif
1174
1175 // Returns a instruction which produces the base pointer for a given
1176 // instruction. The instruction is assumed to be an input to one of the BDVs
1177 // seen in the inference algorithm above. As such, we must either already
1178 // know it's base defining value is a base, or have inserted a new
1179 // instruction to propagate the base of it's BDV and have entered that newly
1180 // introduced instruction into the state table. In either case, we are
1181 // assured to be able to determine an instruction which produces it's base
1182 // pointer.
1183 auto getBaseForInput = [&](Value *Input, Instruction *InsertPt) {
1184 Value *BDV = findBaseOrBDV(Input, Cache, KnownBases);
1185 Value *Base = nullptr;
1186 if (!States.count(BDV)) {
1187 assert(areBothVectorOrScalar(BDV, Input));
1188 Base = BDV;
1189 } else {
1190 // Either conflict or base.
1191 assert(States.count(BDV));
1192 Base = States[BDV].getBaseValue();
1193 }
1194 assert(Base && "Can't be null");
1195 // The cast is needed since base traversal may strip away bitcasts
1196 if (Base->getType() != Input->getType() && InsertPt)
1197 Base = new BitCastInst(Base, Input->getType(), "cast", InsertPt);
1198 return Base;
1199 };
1200
1201 // Fixup all the inputs of the new PHIs. Visit order needs to be
1202 // deterministic and predictable because we're naming newly created
1203 // instructions.
1204 for (auto Pair : States) {
1205 Instruction *BDV = cast<Instruction>(Pair.first);
1206 BDVState State = Pair.second;
1207
1208 // Only values that do not have known bases or those that have differing
1209 // type (scalar versus vector) from a possible known base should be in the
1210 // lattice.
1211 assert((!isKnownBase(BDV, KnownBases) ||
1212 !areBothVectorOrScalar(BDV, State.getBaseValue())) &&
1213 "why did it get added?");
1214 assert(!State.isUnknown() && "Optimistic algorithm didn't complete!");
1215 if (!State.isConflict())
1216 continue;
1217
1218 if (PHINode *BasePHI = dyn_cast<PHINode>(State.getBaseValue())) {
1219 PHINode *PN = cast<PHINode>(BDV);
1220 const unsigned NumPHIValues = PN->getNumIncomingValues();
1221
1222 // The IR verifier requires phi nodes with multiple entries from the
1223 // same basic block to have the same incoming value for each of those
1224 // entries. Since we're inserting bitcasts in the loop, make sure we
1225 // do so at least once per incoming block.
1226 DenseMap<BasicBlock *, Value*> BlockToValue;
1227 for (unsigned i = 0; i < NumPHIValues; i++) {
1228 Value *InVal = PN->getIncomingValue(i);
1229 BasicBlock *InBB = PN->getIncomingBlock(i);
1230 if (!BlockToValue.count(InBB))
1231 BlockToValue[InBB] = getBaseForInput(InVal, InBB->getTerminator());
1232 else {
1233 #ifndef NDEBUG
1234 Value *OldBase = BlockToValue[InBB];
1235 Value *Base = getBaseForInput(InVal, nullptr);
1236
1237 // We can't use `stripPointerCasts` instead of this function because
1238 // `stripPointerCasts` doesn't handle vectors of pointers.
1239 auto StripBitCasts = [](Value *V) -> Value * {
1240 while (auto *BC = dyn_cast<BitCastInst>(V))
1241 V = BC->getOperand(0);
1242 return V;
1243 };
1244 // In essence this assert states: the only way two values
1245 // incoming from the same basic block may be different is by
1246 // being different bitcasts of the same value. A cleanup
1247 // that remains TODO is changing findBaseOrBDV to return an
1248 // llvm::Value of the correct type (and still remain pure).
1249 // This will remove the need to add bitcasts.
1250 assert(StripBitCasts(Base) == StripBitCasts(OldBase) &&
1251 "findBaseOrBDV should be pure!");
1252 #endif
1253 }
1254 Value *Base = BlockToValue[InBB];
1255 BasePHI->setIncomingValue(i, Base);
1256 }
1257 } else if (SelectInst *BaseSI =
1258 dyn_cast<SelectInst>(State.getBaseValue())) {
1259 SelectInst *SI = cast<SelectInst>(BDV);
1260
1261 // Find the instruction which produces the base for each input.
1262 // We may need to insert a bitcast.
1263 BaseSI->setTrueValue(getBaseForInput(SI->getTrueValue(), BaseSI));
1264 BaseSI->setFalseValue(getBaseForInput(SI->getFalseValue(), BaseSI));
1265 } else if (auto *BaseEE =
1266 dyn_cast<ExtractElementInst>(State.getBaseValue())) {
1267 Value *InVal = cast<ExtractElementInst>(BDV)->getVectorOperand();
1268 // Find the instruction which produces the base for each input. We may
1269 // need to insert a bitcast.
1270 BaseEE->setOperand(0, getBaseForInput(InVal, BaseEE));
1271 } else if (auto *BaseIE = dyn_cast<InsertElementInst>(State.getBaseValue())){
1272 auto *BdvIE = cast<InsertElementInst>(BDV);
1273 auto UpdateOperand = [&](int OperandIdx) {
1274 Value *InVal = BdvIE->getOperand(OperandIdx);
1275 Value *Base = getBaseForInput(InVal, BaseIE);
1276 BaseIE->setOperand(OperandIdx, Base);
1277 };
1278 UpdateOperand(0); // vector operand
1279 UpdateOperand(1); // scalar operand
1280 } else {
1281 auto *BaseSV = cast<ShuffleVectorInst>(State.getBaseValue());
1282 auto *BdvSV = cast<ShuffleVectorInst>(BDV);
1283 auto UpdateOperand = [&](int OperandIdx) {
1284 Value *InVal = BdvSV->getOperand(OperandIdx);
1285 Value *Base = getBaseForInput(InVal, BaseSV);
1286 BaseSV->setOperand(OperandIdx, Base);
1287 };
1288 UpdateOperand(0); // vector operand
1289 if (!BdvSV->isZeroEltSplat())
1290 UpdateOperand(1); // vector operand
1291 else {
1292 // Never read, so just use undef
1293 Value *InVal = BdvSV->getOperand(1);
1294 BaseSV->setOperand(1, UndefValue::get(InVal->getType()));
1295 }
1296 }
1297 }
1298
1299 #ifndef NDEBUG
1300 VerifyStates();
1301 #endif
1302
1303 // Cache all of our results so we can cheaply reuse them
1304 // NOTE: This is actually two caches: one of the base defining value
1305 // relation and one of the base pointer relation! FIXME
1306 for (auto Pair : States) {
1307 auto *BDV = Pair.first;
1308 Value *Base = Pair.second.getBaseValue();
1309 assert(BDV && Base);
1310 // Only values that do not have known bases or those that have differing
1311 // type (scalar versus vector) from a possible known base should be in the
1312 // lattice.
1313 assert(
1314 (!isKnownBase(BDV, KnownBases) || !areBothVectorOrScalar(BDV, Base)) &&
1315 "why did it get added?");
1316
1317 LLVM_DEBUG(
1318 dbgs() << "Updating base value cache"
1319 << " for: " << BDV->getName() << " from: "
1320 << (Cache.count(BDV) ? Cache[BDV]->getName().str() : "none")
1321 << " to: " << Base->getName() << "\n");
1322
1323 Cache[BDV] = Base;
1324 }
1325 assert(Cache.count(Def));
1326 return Cache[Def];
1327 }
1328
1329 // For a set of live pointers (base and/or derived), identify the base
1330 // pointer of the object which they are derived from. This routine will
1331 // mutate the IR graph as needed to make the 'base' pointer live at the
1332 // definition site of 'derived'. This ensures that any use of 'derived' can
1333 // also use 'base'. This may involve the insertion of a number of
1334 // additional PHI nodes.
1335 //
1336 // preconditions: live is a set of pointer type Values
1337 //
1338 // side effects: may insert PHI nodes into the existing CFG, will preserve
1339 // CFG, will not remove or mutate any existing nodes
1340 //
1341 // post condition: PointerToBase contains one (derived, base) pair for every
1342 // pointer in live. Note that derived can be equal to base if the original
1343 // pointer was a base pointer.
findBasePointers(const StatepointLiveSetTy & live,PointerToBaseTy & PointerToBase,DominatorTree * DT,DefiningValueMapTy & DVCache,IsKnownBaseMapTy & KnownBases)1344 static void findBasePointers(const StatepointLiveSetTy &live,
1345 PointerToBaseTy &PointerToBase, DominatorTree *DT,
1346 DefiningValueMapTy &DVCache,
1347 IsKnownBaseMapTy &KnownBases) {
1348 for (Value *ptr : live) {
1349 Value *base = findBasePointer(ptr, DVCache, KnownBases);
1350 assert(base && "failed to find base pointer");
1351 PointerToBase[ptr] = base;
1352 assert((!isa<Instruction>(base) || !isa<Instruction>(ptr) ||
1353 DT->dominates(cast<Instruction>(base)->getParent(),
1354 cast<Instruction>(ptr)->getParent())) &&
1355 "The base we found better dominate the derived pointer");
1356 }
1357 }
1358
1359 /// Find the required based pointers (and adjust the live set) for the given
1360 /// parse point.
findBasePointers(DominatorTree & DT,DefiningValueMapTy & DVCache,CallBase * Call,PartiallyConstructedSafepointRecord & result,PointerToBaseTy & PointerToBase,IsKnownBaseMapTy & KnownBases)1361 static void findBasePointers(DominatorTree &DT, DefiningValueMapTy &DVCache,
1362 CallBase *Call,
1363 PartiallyConstructedSafepointRecord &result,
1364 PointerToBaseTy &PointerToBase,
1365 IsKnownBaseMapTy &KnownBases) {
1366 StatepointLiveSetTy PotentiallyDerivedPointers = result.LiveSet;
1367 // We assume that all pointers passed to deopt are base pointers; as an
1368 // optimization, we can use this to avoid seperately materializing the base
1369 // pointer graph. This is only relevant since we're very conservative about
1370 // generating new conflict nodes during base pointer insertion. If we were
1371 // smarter there, this would be irrelevant.
1372 if (auto Opt = Call->getOperandBundle(LLVMContext::OB_deopt))
1373 for (Value *V : Opt->Inputs) {
1374 if (!PotentiallyDerivedPointers.count(V))
1375 continue;
1376 PotentiallyDerivedPointers.remove(V);
1377 PointerToBase[V] = V;
1378 }
1379 findBasePointers(PotentiallyDerivedPointers, PointerToBase, &DT, DVCache,
1380 KnownBases);
1381 }
1382
1383 /// Given an updated version of the dataflow liveness results, update the
1384 /// liveset and base pointer maps for the call site CS.
1385 static void recomputeLiveInValues(GCPtrLivenessData &RevisedLivenessData,
1386 CallBase *Call,
1387 PartiallyConstructedSafepointRecord &result,
1388 PointerToBaseTy &PointerToBase);
1389
recomputeLiveInValues(Function & F,DominatorTree & DT,ArrayRef<CallBase * > toUpdate,MutableArrayRef<struct PartiallyConstructedSafepointRecord> records,PointerToBaseTy & PointerToBase)1390 static void recomputeLiveInValues(
1391 Function &F, DominatorTree &DT, ArrayRef<CallBase *> toUpdate,
1392 MutableArrayRef<struct PartiallyConstructedSafepointRecord> records,
1393 PointerToBaseTy &PointerToBase) {
1394 // TODO-PERF: reuse the original liveness, then simply run the dataflow
1395 // again. The old values are still live and will help it stabilize quickly.
1396 GCPtrLivenessData RevisedLivenessData;
1397 computeLiveInValues(DT, F, RevisedLivenessData);
1398 for (size_t i = 0; i < records.size(); i++) {
1399 struct PartiallyConstructedSafepointRecord &info = records[i];
1400 recomputeLiveInValues(RevisedLivenessData, toUpdate[i], info,
1401 PointerToBase);
1402 }
1403 }
1404
1405 // Utility function which clones all instructions from "ChainToBase"
1406 // and inserts them before "InsertBefore". Returns rematerialized value
1407 // which should be used after statepoint.
rematerializeChain(ArrayRef<Instruction * > ChainToBase,Instruction * InsertBefore,Value * RootOfChain,Value * AlternateLiveBase)1408 static Instruction *rematerializeChain(ArrayRef<Instruction *> ChainToBase,
1409 Instruction *InsertBefore,
1410 Value *RootOfChain,
1411 Value *AlternateLiveBase) {
1412 Instruction *LastClonedValue = nullptr;
1413 Instruction *LastValue = nullptr;
1414 // Walk backwards to visit top-most instructions first.
1415 for (Instruction *Instr :
1416 make_range(ChainToBase.rbegin(), ChainToBase.rend())) {
1417 // Only GEP's and casts are supported as we need to be careful to not
1418 // introduce any new uses of pointers not in the liveset.
1419 // Note that it's fine to introduce new uses of pointers which were
1420 // otherwise not used after this statepoint.
1421 assert(isa<GetElementPtrInst>(Instr) || isa<CastInst>(Instr));
1422
1423 Instruction *ClonedValue = Instr->clone();
1424 ClonedValue->insertBefore(InsertBefore);
1425 ClonedValue->setName(Instr->getName() + ".remat");
1426
1427 // If it is not first instruction in the chain then it uses previously
1428 // cloned value. We should update it to use cloned value.
1429 if (LastClonedValue) {
1430 assert(LastValue);
1431 ClonedValue->replaceUsesOfWith(LastValue, LastClonedValue);
1432 #ifndef NDEBUG
1433 for (auto *OpValue : ClonedValue->operand_values()) {
1434 // Assert that cloned instruction does not use any instructions from
1435 // this chain other than LastClonedValue
1436 assert(!is_contained(ChainToBase, OpValue) &&
1437 "incorrect use in rematerialization chain");
1438 // Assert that the cloned instruction does not use the RootOfChain
1439 // or the AlternateLiveBase.
1440 assert(OpValue != RootOfChain && OpValue != AlternateLiveBase);
1441 }
1442 #endif
1443 } else {
1444 // For the first instruction, replace the use of unrelocated base i.e.
1445 // RootOfChain/OrigRootPhi, with the corresponding PHI present in the
1446 // live set. They have been proved to be the same PHI nodes. Note
1447 // that the *only* use of the RootOfChain in the ChainToBase list is
1448 // the first Value in the list.
1449 if (RootOfChain != AlternateLiveBase)
1450 ClonedValue->replaceUsesOfWith(RootOfChain, AlternateLiveBase);
1451 }
1452
1453 LastClonedValue = ClonedValue;
1454 LastValue = Instr;
1455 }
1456 assert(LastClonedValue);
1457 return LastClonedValue;
1458 }
1459
1460 // When inserting gc.relocate and gc.result calls, we need to ensure there are
1461 // no uses of the original value / return value between the gc.statepoint and
1462 // the gc.relocate / gc.result call. One case which can arise is a phi node
1463 // starting one of the successor blocks. We also need to be able to insert the
1464 // gc.relocates only on the path which goes through the statepoint. We might
1465 // need to split an edge to make this possible.
1466 static BasicBlock *
normalizeForInvokeSafepoint(BasicBlock * BB,BasicBlock * InvokeParent,DominatorTree & DT)1467 normalizeForInvokeSafepoint(BasicBlock *BB, BasicBlock *InvokeParent,
1468 DominatorTree &DT) {
1469 BasicBlock *Ret = BB;
1470 if (!BB->getUniquePredecessor())
1471 Ret = SplitBlockPredecessors(BB, InvokeParent, "", &DT);
1472
1473 // Now that 'Ret' has unique predecessor we can safely remove all phi nodes
1474 // from it
1475 FoldSingleEntryPHINodes(Ret);
1476 assert(!isa<PHINode>(Ret->begin()) &&
1477 "All PHI nodes should have been removed!");
1478
1479 // At this point, we can safely insert a gc.relocate or gc.result as the first
1480 // instruction in Ret if needed.
1481 return Ret;
1482 }
1483
1484 // List of all function attributes which must be stripped when lowering from
1485 // abstract machine model to physical machine model. Essentially, these are
1486 // all the effects a safepoint might have which we ignored in the abstract
1487 // machine model for purposes of optimization. We have to strip these on
1488 // both function declarations and call sites.
1489 static constexpr Attribute::AttrKind FnAttrsToStrip[] =
1490 {Attribute::Memory, Attribute::NoSync, Attribute::NoFree};
1491
1492 // Create new attribute set containing only attributes which can be transferred
1493 // from original call to the safepoint.
legalizeCallAttributes(LLVMContext & Ctx,AttributeList OrigAL,AttributeList StatepointAL)1494 static AttributeList legalizeCallAttributes(LLVMContext &Ctx,
1495 AttributeList OrigAL,
1496 AttributeList StatepointAL) {
1497 if (OrigAL.isEmpty())
1498 return StatepointAL;
1499
1500 // Remove the readonly, readnone, and statepoint function attributes.
1501 AttrBuilder FnAttrs(Ctx, OrigAL.getFnAttrs());
1502 for (auto Attr : FnAttrsToStrip)
1503 FnAttrs.removeAttribute(Attr);
1504
1505 for (Attribute A : OrigAL.getFnAttrs()) {
1506 if (isStatepointDirectiveAttr(A))
1507 FnAttrs.removeAttribute(A);
1508 }
1509
1510 // Just skip parameter and return attributes for now
1511 return StatepointAL.addFnAttributes(Ctx, FnAttrs);
1512 }
1513
1514 /// Helper function to place all gc relocates necessary for the given
1515 /// statepoint.
1516 /// Inputs:
1517 /// liveVariables - list of variables to be relocated.
1518 /// basePtrs - base pointers.
1519 /// statepointToken - statepoint instruction to which relocates should be
1520 /// bound.
1521 /// Builder - Llvm IR builder to be used to construct new calls.
CreateGCRelocates(ArrayRef<Value * > LiveVariables,ArrayRef<Value * > BasePtrs,Instruction * StatepointToken,IRBuilder<> & Builder)1522 static void CreateGCRelocates(ArrayRef<Value *> LiveVariables,
1523 ArrayRef<Value *> BasePtrs,
1524 Instruction *StatepointToken,
1525 IRBuilder<> &Builder) {
1526 if (LiveVariables.empty())
1527 return;
1528
1529 auto FindIndex = [](ArrayRef<Value *> LiveVec, Value *Val) {
1530 auto ValIt = llvm::find(LiveVec, Val);
1531 assert(ValIt != LiveVec.end() && "Val not found in LiveVec!");
1532 size_t Index = std::distance(LiveVec.begin(), ValIt);
1533 assert(Index < LiveVec.size() && "Bug in std::find?");
1534 return Index;
1535 };
1536 Module *M = StatepointToken->getModule();
1537
1538 // All gc_relocate are generated as i8 addrspace(1)* (or a vector type whose
1539 // element type is i8 addrspace(1)*). We originally generated unique
1540 // declarations for each pointer type, but this proved problematic because
1541 // the intrinsic mangling code is incomplete and fragile. Since we're moving
1542 // towards a single unified pointer type anyways, we can just cast everything
1543 // to an i8* of the right address space. A bitcast is added later to convert
1544 // gc_relocate to the actual value's type.
1545 auto getGCRelocateDecl = [&] (Type *Ty) {
1546 assert(isHandledGCPointerType(Ty));
1547 auto AS = Ty->getScalarType()->getPointerAddressSpace();
1548 Type *NewTy = Type::getInt8PtrTy(M->getContext(), AS);
1549 if (auto *VT = dyn_cast<VectorType>(Ty))
1550 NewTy = FixedVectorType::get(NewTy,
1551 cast<FixedVectorType>(VT)->getNumElements());
1552 return Intrinsic::getDeclaration(M, Intrinsic::experimental_gc_relocate,
1553 {NewTy});
1554 };
1555
1556 // Lazily populated map from input types to the canonicalized form mentioned
1557 // in the comment above. This should probably be cached somewhere more
1558 // broadly.
1559 DenseMap<Type *, Function *> TypeToDeclMap;
1560
1561 for (unsigned i = 0; i < LiveVariables.size(); i++) {
1562 // Generate the gc.relocate call and save the result
1563 Value *BaseIdx = Builder.getInt32(FindIndex(LiveVariables, BasePtrs[i]));
1564 Value *LiveIdx = Builder.getInt32(i);
1565
1566 Type *Ty = LiveVariables[i]->getType();
1567 if (!TypeToDeclMap.count(Ty))
1568 TypeToDeclMap[Ty] = getGCRelocateDecl(Ty);
1569 Function *GCRelocateDecl = TypeToDeclMap[Ty];
1570
1571 // only specify a debug name if we can give a useful one
1572 CallInst *Reloc = Builder.CreateCall(
1573 GCRelocateDecl, {StatepointToken, BaseIdx, LiveIdx},
1574 suffixed_name_or(LiveVariables[i], ".relocated", ""));
1575 // Trick CodeGen into thinking there are lots of free registers at this
1576 // fake call.
1577 Reloc->setCallingConv(CallingConv::Cold);
1578 }
1579 }
1580
1581 namespace {
1582
1583 /// This struct is used to defer RAUWs and `eraseFromParent` s. Using this
1584 /// avoids having to worry about keeping around dangling pointers to Values.
1585 class DeferredReplacement {
1586 AssertingVH<Instruction> Old;
1587 AssertingVH<Instruction> New;
1588 bool IsDeoptimize = false;
1589
1590 DeferredReplacement() = default;
1591
1592 public:
createRAUW(Instruction * Old,Instruction * New)1593 static DeferredReplacement createRAUW(Instruction *Old, Instruction *New) {
1594 assert(Old != New && Old && New &&
1595 "Cannot RAUW equal values or to / from null!");
1596
1597 DeferredReplacement D;
1598 D.Old = Old;
1599 D.New = New;
1600 return D;
1601 }
1602
createDelete(Instruction * ToErase)1603 static DeferredReplacement createDelete(Instruction *ToErase) {
1604 DeferredReplacement D;
1605 D.Old = ToErase;
1606 return D;
1607 }
1608
createDeoptimizeReplacement(Instruction * Old)1609 static DeferredReplacement createDeoptimizeReplacement(Instruction *Old) {
1610 #ifndef NDEBUG
1611 auto *F = cast<CallInst>(Old)->getCalledFunction();
1612 assert(F && F->getIntrinsicID() == Intrinsic::experimental_deoptimize &&
1613 "Only way to construct a deoptimize deferred replacement");
1614 #endif
1615 DeferredReplacement D;
1616 D.Old = Old;
1617 D.IsDeoptimize = true;
1618 return D;
1619 }
1620
1621 /// Does the task represented by this instance.
doReplacement()1622 void doReplacement() {
1623 Instruction *OldI = Old;
1624 Instruction *NewI = New;
1625
1626 assert(OldI != NewI && "Disallowed at construction?!");
1627 assert((!IsDeoptimize || !New) &&
1628 "Deoptimize intrinsics are not replaced!");
1629
1630 Old = nullptr;
1631 New = nullptr;
1632
1633 if (NewI)
1634 OldI->replaceAllUsesWith(NewI);
1635
1636 if (IsDeoptimize) {
1637 // Note: we've inserted instructions, so the call to llvm.deoptimize may
1638 // not necessarily be followed by the matching return.
1639 auto *RI = cast<ReturnInst>(OldI->getParent()->getTerminator());
1640 new UnreachableInst(RI->getContext(), RI);
1641 RI->eraseFromParent();
1642 }
1643
1644 OldI->eraseFromParent();
1645 }
1646 };
1647
1648 } // end anonymous namespace
1649
getDeoptLowering(CallBase * Call)1650 static StringRef getDeoptLowering(CallBase *Call) {
1651 const char *DeoptLowering = "deopt-lowering";
1652 if (Call->hasFnAttr(DeoptLowering)) {
1653 // FIXME: Calls have a *really* confusing interface around attributes
1654 // with values.
1655 const AttributeList &CSAS = Call->getAttributes();
1656 if (CSAS.hasFnAttr(DeoptLowering))
1657 return CSAS.getFnAttr(DeoptLowering).getValueAsString();
1658 Function *F = Call->getCalledFunction();
1659 assert(F && F->hasFnAttribute(DeoptLowering));
1660 return F->getFnAttribute(DeoptLowering).getValueAsString();
1661 }
1662 return "live-through";
1663 }
1664
1665 static void
makeStatepointExplicitImpl(CallBase * Call,const SmallVectorImpl<Value * > & BasePtrs,const SmallVectorImpl<Value * > & LiveVariables,PartiallyConstructedSafepointRecord & Result,std::vector<DeferredReplacement> & Replacements,const PointerToBaseTy & PointerToBase)1666 makeStatepointExplicitImpl(CallBase *Call, /* to replace */
1667 const SmallVectorImpl<Value *> &BasePtrs,
1668 const SmallVectorImpl<Value *> &LiveVariables,
1669 PartiallyConstructedSafepointRecord &Result,
1670 std::vector<DeferredReplacement> &Replacements,
1671 const PointerToBaseTy &PointerToBase) {
1672 assert(BasePtrs.size() == LiveVariables.size());
1673
1674 // Then go ahead and use the builder do actually do the inserts. We insert
1675 // immediately before the previous instruction under the assumption that all
1676 // arguments will be available here. We can't insert afterwards since we may
1677 // be replacing a terminator.
1678 IRBuilder<> Builder(Call);
1679
1680 ArrayRef<Value *> GCArgs(LiveVariables);
1681 uint64_t StatepointID = StatepointDirectives::DefaultStatepointID;
1682 uint32_t NumPatchBytes = 0;
1683 uint32_t Flags = uint32_t(StatepointFlags::None);
1684
1685 SmallVector<Value *, 8> CallArgs(Call->args());
1686 std::optional<ArrayRef<Use>> DeoptArgs;
1687 if (auto Bundle = Call->getOperandBundle(LLVMContext::OB_deopt))
1688 DeoptArgs = Bundle->Inputs;
1689 std::optional<ArrayRef<Use>> TransitionArgs;
1690 if (auto Bundle = Call->getOperandBundle(LLVMContext::OB_gc_transition)) {
1691 TransitionArgs = Bundle->Inputs;
1692 // TODO: This flag no longer serves a purpose and can be removed later
1693 Flags |= uint32_t(StatepointFlags::GCTransition);
1694 }
1695
1696 // Instead of lowering calls to @llvm.experimental.deoptimize as normal calls
1697 // with a return value, we lower then as never returning calls to
1698 // __llvm_deoptimize that are followed by unreachable to get better codegen.
1699 bool IsDeoptimize = false;
1700
1701 StatepointDirectives SD =
1702 parseStatepointDirectivesFromAttrs(Call->getAttributes());
1703 if (SD.NumPatchBytes)
1704 NumPatchBytes = *SD.NumPatchBytes;
1705 if (SD.StatepointID)
1706 StatepointID = *SD.StatepointID;
1707
1708 // Pass through the requested lowering if any. The default is live-through.
1709 StringRef DeoptLowering = getDeoptLowering(Call);
1710 if (DeoptLowering.equals("live-in"))
1711 Flags |= uint32_t(StatepointFlags::DeoptLiveIn);
1712 else {
1713 assert(DeoptLowering.equals("live-through") && "Unsupported value!");
1714 }
1715
1716 FunctionCallee CallTarget(Call->getFunctionType(), Call->getCalledOperand());
1717 if (Function *F = dyn_cast<Function>(CallTarget.getCallee())) {
1718 auto IID = F->getIntrinsicID();
1719 if (IID == Intrinsic::experimental_deoptimize) {
1720 // Calls to llvm.experimental.deoptimize are lowered to calls to the
1721 // __llvm_deoptimize symbol. We want to resolve this now, since the
1722 // verifier does not allow taking the address of an intrinsic function.
1723
1724 SmallVector<Type *, 8> DomainTy;
1725 for (Value *Arg : CallArgs)
1726 DomainTy.push_back(Arg->getType());
1727 auto *FTy = FunctionType::get(Type::getVoidTy(F->getContext()), DomainTy,
1728 /* isVarArg = */ false);
1729
1730 // Note: CallTarget can be a bitcast instruction of a symbol if there are
1731 // calls to @llvm.experimental.deoptimize with different argument types in
1732 // the same module. This is fine -- we assume the frontend knew what it
1733 // was doing when generating this kind of IR.
1734 CallTarget = F->getParent()
1735 ->getOrInsertFunction("__llvm_deoptimize", FTy);
1736
1737 IsDeoptimize = true;
1738 } else if (IID == Intrinsic::memcpy_element_unordered_atomic ||
1739 IID == Intrinsic::memmove_element_unordered_atomic) {
1740 // Unordered atomic memcpy and memmove intrinsics which are not explicitly
1741 // marked as "gc-leaf-function" should be lowered in a GC parseable way.
1742 // Specifically, these calls should be lowered to the
1743 // __llvm_{memcpy|memmove}_element_unordered_atomic_safepoint symbols.
1744 // Similarly to __llvm_deoptimize we want to resolve this now, since the
1745 // verifier does not allow taking the address of an intrinsic function.
1746 //
1747 // Moreover we need to shuffle the arguments for the call in order to
1748 // accommodate GC. The underlying source and destination objects might be
1749 // relocated during copy operation should the GC occur. To relocate the
1750 // derived source and destination pointers the implementation of the
1751 // intrinsic should know the corresponding base pointers.
1752 //
1753 // To make the base pointers available pass them explicitly as arguments:
1754 // memcpy(dest_derived, source_derived, ...) =>
1755 // memcpy(dest_base, dest_offset, source_base, source_offset, ...)
1756 auto &Context = Call->getContext();
1757 auto &DL = Call->getModule()->getDataLayout();
1758 auto GetBaseAndOffset = [&](Value *Derived) {
1759 Value *Base = nullptr;
1760 // Optimizations in unreachable code might substitute the real pointer
1761 // with undef, poison or null-derived constant. Return null base for
1762 // them to be consistent with the handling in the main algorithm in
1763 // findBaseDefiningValue.
1764 if (isa<Constant>(Derived))
1765 Base =
1766 ConstantPointerNull::get(cast<PointerType>(Derived->getType()));
1767 else {
1768 assert(PointerToBase.count(Derived));
1769 Base = PointerToBase.find(Derived)->second;
1770 }
1771 unsigned AddressSpace = Derived->getType()->getPointerAddressSpace();
1772 unsigned IntPtrSize = DL.getPointerSizeInBits(AddressSpace);
1773 Value *Base_int = Builder.CreatePtrToInt(
1774 Base, Type::getIntNTy(Context, IntPtrSize));
1775 Value *Derived_int = Builder.CreatePtrToInt(
1776 Derived, Type::getIntNTy(Context, IntPtrSize));
1777 return std::make_pair(Base, Builder.CreateSub(Derived_int, Base_int));
1778 };
1779
1780 auto *Dest = CallArgs[0];
1781 Value *DestBase, *DestOffset;
1782 std::tie(DestBase, DestOffset) = GetBaseAndOffset(Dest);
1783
1784 auto *Source = CallArgs[1];
1785 Value *SourceBase, *SourceOffset;
1786 std::tie(SourceBase, SourceOffset) = GetBaseAndOffset(Source);
1787
1788 auto *LengthInBytes = CallArgs[2];
1789 auto *ElementSizeCI = cast<ConstantInt>(CallArgs[3]);
1790
1791 CallArgs.clear();
1792 CallArgs.push_back(DestBase);
1793 CallArgs.push_back(DestOffset);
1794 CallArgs.push_back(SourceBase);
1795 CallArgs.push_back(SourceOffset);
1796 CallArgs.push_back(LengthInBytes);
1797
1798 SmallVector<Type *, 8> DomainTy;
1799 for (Value *Arg : CallArgs)
1800 DomainTy.push_back(Arg->getType());
1801 auto *FTy = FunctionType::get(Type::getVoidTy(F->getContext()), DomainTy,
1802 /* isVarArg = */ false);
1803
1804 auto GetFunctionName = [](Intrinsic::ID IID, ConstantInt *ElementSizeCI) {
1805 uint64_t ElementSize = ElementSizeCI->getZExtValue();
1806 if (IID == Intrinsic::memcpy_element_unordered_atomic) {
1807 switch (ElementSize) {
1808 case 1:
1809 return "__llvm_memcpy_element_unordered_atomic_safepoint_1";
1810 case 2:
1811 return "__llvm_memcpy_element_unordered_atomic_safepoint_2";
1812 case 4:
1813 return "__llvm_memcpy_element_unordered_atomic_safepoint_4";
1814 case 8:
1815 return "__llvm_memcpy_element_unordered_atomic_safepoint_8";
1816 case 16:
1817 return "__llvm_memcpy_element_unordered_atomic_safepoint_16";
1818 default:
1819 llvm_unreachable("unexpected element size!");
1820 }
1821 }
1822 assert(IID == Intrinsic::memmove_element_unordered_atomic);
1823 switch (ElementSize) {
1824 case 1:
1825 return "__llvm_memmove_element_unordered_atomic_safepoint_1";
1826 case 2:
1827 return "__llvm_memmove_element_unordered_atomic_safepoint_2";
1828 case 4:
1829 return "__llvm_memmove_element_unordered_atomic_safepoint_4";
1830 case 8:
1831 return "__llvm_memmove_element_unordered_atomic_safepoint_8";
1832 case 16:
1833 return "__llvm_memmove_element_unordered_atomic_safepoint_16";
1834 default:
1835 llvm_unreachable("unexpected element size!");
1836 }
1837 };
1838
1839 CallTarget =
1840 F->getParent()
1841 ->getOrInsertFunction(GetFunctionName(IID, ElementSizeCI), FTy);
1842 }
1843 }
1844
1845 // Create the statepoint given all the arguments
1846 GCStatepointInst *Token = nullptr;
1847 if (auto *CI = dyn_cast<CallInst>(Call)) {
1848 CallInst *SPCall = Builder.CreateGCStatepointCall(
1849 StatepointID, NumPatchBytes, CallTarget, Flags, CallArgs,
1850 TransitionArgs, DeoptArgs, GCArgs, "safepoint_token");
1851
1852 SPCall->setTailCallKind(CI->getTailCallKind());
1853 SPCall->setCallingConv(CI->getCallingConv());
1854
1855 // Currently we will fail on parameter attributes and on certain
1856 // function attributes. In case if we can handle this set of attributes -
1857 // set up function attrs directly on statepoint and return attrs later for
1858 // gc_result intrinsic.
1859 SPCall->setAttributes(legalizeCallAttributes(
1860 CI->getContext(), CI->getAttributes(), SPCall->getAttributes()));
1861
1862 Token = cast<GCStatepointInst>(SPCall);
1863
1864 // Put the following gc_result and gc_relocate calls immediately after the
1865 // the old call (which we're about to delete)
1866 assert(CI->getNextNode() && "Not a terminator, must have next!");
1867 Builder.SetInsertPoint(CI->getNextNode());
1868 Builder.SetCurrentDebugLocation(CI->getNextNode()->getDebugLoc());
1869 } else {
1870 auto *II = cast<InvokeInst>(Call);
1871
1872 // Insert the new invoke into the old block. We'll remove the old one in a
1873 // moment at which point this will become the new terminator for the
1874 // original block.
1875 InvokeInst *SPInvoke = Builder.CreateGCStatepointInvoke(
1876 StatepointID, NumPatchBytes, CallTarget, II->getNormalDest(),
1877 II->getUnwindDest(), Flags, CallArgs, TransitionArgs, DeoptArgs, GCArgs,
1878 "statepoint_token");
1879
1880 SPInvoke->setCallingConv(II->getCallingConv());
1881
1882 // Currently we will fail on parameter attributes and on certain
1883 // function attributes. In case if we can handle this set of attributes -
1884 // set up function attrs directly on statepoint and return attrs later for
1885 // gc_result intrinsic.
1886 SPInvoke->setAttributes(legalizeCallAttributes(
1887 II->getContext(), II->getAttributes(), SPInvoke->getAttributes()));
1888
1889 Token = cast<GCStatepointInst>(SPInvoke);
1890
1891 // Generate gc relocates in exceptional path
1892 BasicBlock *UnwindBlock = II->getUnwindDest();
1893 assert(!isa<PHINode>(UnwindBlock->begin()) &&
1894 UnwindBlock->getUniquePredecessor() &&
1895 "can't safely insert in this block!");
1896
1897 Builder.SetInsertPoint(&*UnwindBlock->getFirstInsertionPt());
1898 Builder.SetCurrentDebugLocation(II->getDebugLoc());
1899
1900 // Attach exceptional gc relocates to the landingpad.
1901 Instruction *ExceptionalToken = UnwindBlock->getLandingPadInst();
1902 Result.UnwindToken = ExceptionalToken;
1903
1904 CreateGCRelocates(LiveVariables, BasePtrs, ExceptionalToken, Builder);
1905
1906 // Generate gc relocates and returns for normal block
1907 BasicBlock *NormalDest = II->getNormalDest();
1908 assert(!isa<PHINode>(NormalDest->begin()) &&
1909 NormalDest->getUniquePredecessor() &&
1910 "can't safely insert in this block!");
1911
1912 Builder.SetInsertPoint(&*NormalDest->getFirstInsertionPt());
1913
1914 // gc relocates will be generated later as if it were regular call
1915 // statepoint
1916 }
1917 assert(Token && "Should be set in one of the above branches!");
1918
1919 if (IsDeoptimize) {
1920 // If we're wrapping an @llvm.experimental.deoptimize in a statepoint, we
1921 // transform the tail-call like structure to a call to a void function
1922 // followed by unreachable to get better codegen.
1923 Replacements.push_back(
1924 DeferredReplacement::createDeoptimizeReplacement(Call));
1925 } else {
1926 Token->setName("statepoint_token");
1927 if (!Call->getType()->isVoidTy() && !Call->use_empty()) {
1928 StringRef Name = Call->hasName() ? Call->getName() : "";
1929 CallInst *GCResult = Builder.CreateGCResult(Token, Call->getType(), Name);
1930 GCResult->setAttributes(
1931 AttributeList::get(GCResult->getContext(), AttributeList::ReturnIndex,
1932 Call->getAttributes().getRetAttrs()));
1933
1934 // We cannot RAUW or delete CS.getInstruction() because it could be in the
1935 // live set of some other safepoint, in which case that safepoint's
1936 // PartiallyConstructedSafepointRecord will hold a raw pointer to this
1937 // llvm::Instruction. Instead, we defer the replacement and deletion to
1938 // after the live sets have been made explicit in the IR, and we no longer
1939 // have raw pointers to worry about.
1940 Replacements.emplace_back(
1941 DeferredReplacement::createRAUW(Call, GCResult));
1942 } else {
1943 Replacements.emplace_back(DeferredReplacement::createDelete(Call));
1944 }
1945 }
1946
1947 Result.StatepointToken = Token;
1948
1949 // Second, create a gc.relocate for every live variable
1950 CreateGCRelocates(LiveVariables, BasePtrs, Token, Builder);
1951 }
1952
1953 // Replace an existing gc.statepoint with a new one and a set of gc.relocates
1954 // which make the relocations happening at this safepoint explicit.
1955 //
1956 // WARNING: Does not do any fixup to adjust users of the original live
1957 // values. That's the callers responsibility.
1958 static void
makeStatepointExplicit(DominatorTree & DT,CallBase * Call,PartiallyConstructedSafepointRecord & Result,std::vector<DeferredReplacement> & Replacements,const PointerToBaseTy & PointerToBase)1959 makeStatepointExplicit(DominatorTree &DT, CallBase *Call,
1960 PartiallyConstructedSafepointRecord &Result,
1961 std::vector<DeferredReplacement> &Replacements,
1962 const PointerToBaseTy &PointerToBase) {
1963 const auto &LiveSet = Result.LiveSet;
1964
1965 // Convert to vector for efficient cross referencing.
1966 SmallVector<Value *, 64> BaseVec, LiveVec;
1967 LiveVec.reserve(LiveSet.size());
1968 BaseVec.reserve(LiveSet.size());
1969 for (Value *L : LiveSet) {
1970 LiveVec.push_back(L);
1971 assert(PointerToBase.count(L));
1972 Value *Base = PointerToBase.find(L)->second;
1973 BaseVec.push_back(Base);
1974 }
1975 assert(LiveVec.size() == BaseVec.size());
1976
1977 // Do the actual rewriting and delete the old statepoint
1978 makeStatepointExplicitImpl(Call, BaseVec, LiveVec, Result, Replacements,
1979 PointerToBase);
1980 }
1981
1982 // Helper function for the relocationViaAlloca.
1983 //
1984 // It receives iterator to the statepoint gc relocates and emits a store to the
1985 // assigned location (via allocaMap) for the each one of them. It adds the
1986 // visited values into the visitedLiveValues set, which we will later use them
1987 // for validation checking.
1988 static void
insertRelocationStores(iterator_range<Value::user_iterator> GCRelocs,DenseMap<Value *,AllocaInst * > & AllocaMap,DenseSet<Value * > & VisitedLiveValues)1989 insertRelocationStores(iterator_range<Value::user_iterator> GCRelocs,
1990 DenseMap<Value *, AllocaInst *> &AllocaMap,
1991 DenseSet<Value *> &VisitedLiveValues) {
1992 for (User *U : GCRelocs) {
1993 GCRelocateInst *Relocate = dyn_cast<GCRelocateInst>(U);
1994 if (!Relocate)
1995 continue;
1996
1997 Value *OriginalValue = Relocate->getDerivedPtr();
1998 assert(AllocaMap.count(OriginalValue));
1999 Value *Alloca = AllocaMap[OriginalValue];
2000
2001 // Emit store into the related alloca
2002 // All gc_relocates are i8 addrspace(1)* typed, and it must be bitcasted to
2003 // the correct type according to alloca.
2004 assert(Relocate->getNextNode() &&
2005 "Should always have one since it's not a terminator");
2006 IRBuilder<> Builder(Relocate->getNextNode());
2007 Value *CastedRelocatedValue =
2008 Builder.CreateBitCast(Relocate,
2009 cast<AllocaInst>(Alloca)->getAllocatedType(),
2010 suffixed_name_or(Relocate, ".casted", ""));
2011
2012 new StoreInst(CastedRelocatedValue, Alloca,
2013 cast<Instruction>(CastedRelocatedValue)->getNextNode());
2014
2015 #ifndef NDEBUG
2016 VisitedLiveValues.insert(OriginalValue);
2017 #endif
2018 }
2019 }
2020
2021 // Helper function for the "relocationViaAlloca". Similar to the
2022 // "insertRelocationStores" but works for rematerialized values.
insertRematerializationStores(const RematerializedValueMapTy & RematerializedValues,DenseMap<Value *,AllocaInst * > & AllocaMap,DenseSet<Value * > & VisitedLiveValues)2023 static void insertRematerializationStores(
2024 const RematerializedValueMapTy &RematerializedValues,
2025 DenseMap<Value *, AllocaInst *> &AllocaMap,
2026 DenseSet<Value *> &VisitedLiveValues) {
2027 for (auto RematerializedValuePair: RematerializedValues) {
2028 Instruction *RematerializedValue = RematerializedValuePair.first;
2029 Value *OriginalValue = RematerializedValuePair.second;
2030
2031 assert(AllocaMap.count(OriginalValue) &&
2032 "Can not find alloca for rematerialized value");
2033 Value *Alloca = AllocaMap[OriginalValue];
2034
2035 new StoreInst(RematerializedValue, Alloca,
2036 RematerializedValue->getNextNode());
2037
2038 #ifndef NDEBUG
2039 VisitedLiveValues.insert(OriginalValue);
2040 #endif
2041 }
2042 }
2043
2044 /// Do all the relocation update via allocas and mem2reg
relocationViaAlloca(Function & F,DominatorTree & DT,ArrayRef<Value * > Live,ArrayRef<PartiallyConstructedSafepointRecord> Records)2045 static void relocationViaAlloca(
2046 Function &F, DominatorTree &DT, ArrayRef<Value *> Live,
2047 ArrayRef<PartiallyConstructedSafepointRecord> Records) {
2048 #ifndef NDEBUG
2049 // record initial number of (static) allocas; we'll check we have the same
2050 // number when we get done.
2051 int InitialAllocaNum = 0;
2052 for (Instruction &I : F.getEntryBlock())
2053 if (isa<AllocaInst>(I))
2054 InitialAllocaNum++;
2055 #endif
2056
2057 // TODO-PERF: change data structures, reserve
2058 DenseMap<Value *, AllocaInst *> AllocaMap;
2059 SmallVector<AllocaInst *, 200> PromotableAllocas;
2060 // Used later to chack that we have enough allocas to store all values
2061 std::size_t NumRematerializedValues = 0;
2062 PromotableAllocas.reserve(Live.size());
2063
2064 // Emit alloca for "LiveValue" and record it in "allocaMap" and
2065 // "PromotableAllocas"
2066 const DataLayout &DL = F.getParent()->getDataLayout();
2067 auto emitAllocaFor = [&](Value *LiveValue) {
2068 AllocaInst *Alloca = new AllocaInst(LiveValue->getType(),
2069 DL.getAllocaAddrSpace(), "",
2070 F.getEntryBlock().getFirstNonPHI());
2071 AllocaMap[LiveValue] = Alloca;
2072 PromotableAllocas.push_back(Alloca);
2073 };
2074
2075 // Emit alloca for each live gc pointer
2076 for (Value *V : Live)
2077 emitAllocaFor(V);
2078
2079 // Emit allocas for rematerialized values
2080 for (const auto &Info : Records)
2081 for (auto RematerializedValuePair : Info.RematerializedValues) {
2082 Value *OriginalValue = RematerializedValuePair.second;
2083 if (AllocaMap.count(OriginalValue) != 0)
2084 continue;
2085
2086 emitAllocaFor(OriginalValue);
2087 ++NumRematerializedValues;
2088 }
2089
2090 // The next two loops are part of the same conceptual operation. We need to
2091 // insert a store to the alloca after the original def and at each
2092 // redefinition. We need to insert a load before each use. These are split
2093 // into distinct loops for performance reasons.
2094
2095 // Update gc pointer after each statepoint: either store a relocated value or
2096 // null (if no relocated value was found for this gc pointer and it is not a
2097 // gc_result). This must happen before we update the statepoint with load of
2098 // alloca otherwise we lose the link between statepoint and old def.
2099 for (const auto &Info : Records) {
2100 Value *Statepoint = Info.StatepointToken;
2101
2102 // This will be used for consistency check
2103 DenseSet<Value *> VisitedLiveValues;
2104
2105 // Insert stores for normal statepoint gc relocates
2106 insertRelocationStores(Statepoint->users(), AllocaMap, VisitedLiveValues);
2107
2108 // In case if it was invoke statepoint
2109 // we will insert stores for exceptional path gc relocates.
2110 if (isa<InvokeInst>(Statepoint)) {
2111 insertRelocationStores(Info.UnwindToken->users(), AllocaMap,
2112 VisitedLiveValues);
2113 }
2114
2115 // Do similar thing with rematerialized values
2116 insertRematerializationStores(Info.RematerializedValues, AllocaMap,
2117 VisitedLiveValues);
2118
2119 if (ClobberNonLive) {
2120 // As a debugging aid, pretend that an unrelocated pointer becomes null at
2121 // the gc.statepoint. This will turn some subtle GC problems into
2122 // slightly easier to debug SEGVs. Note that on large IR files with
2123 // lots of gc.statepoints this is extremely costly both memory and time
2124 // wise.
2125 SmallVector<AllocaInst *, 64> ToClobber;
2126 for (auto Pair : AllocaMap) {
2127 Value *Def = Pair.first;
2128 AllocaInst *Alloca = Pair.second;
2129
2130 // This value was relocated
2131 if (VisitedLiveValues.count(Def)) {
2132 continue;
2133 }
2134 ToClobber.push_back(Alloca);
2135 }
2136
2137 auto InsertClobbersAt = [&](Instruction *IP) {
2138 for (auto *AI : ToClobber) {
2139 auto AT = AI->getAllocatedType();
2140 Constant *CPN;
2141 if (AT->isVectorTy())
2142 CPN = ConstantAggregateZero::get(AT);
2143 else
2144 CPN = ConstantPointerNull::get(cast<PointerType>(AT));
2145 new StoreInst(CPN, AI, IP);
2146 }
2147 };
2148
2149 // Insert the clobbering stores. These may get intermixed with the
2150 // gc.results and gc.relocates, but that's fine.
2151 if (auto II = dyn_cast<InvokeInst>(Statepoint)) {
2152 InsertClobbersAt(&*II->getNormalDest()->getFirstInsertionPt());
2153 InsertClobbersAt(&*II->getUnwindDest()->getFirstInsertionPt());
2154 } else {
2155 InsertClobbersAt(cast<Instruction>(Statepoint)->getNextNode());
2156 }
2157 }
2158 }
2159
2160 // Update use with load allocas and add store for gc_relocated.
2161 for (auto Pair : AllocaMap) {
2162 Value *Def = Pair.first;
2163 AllocaInst *Alloca = Pair.second;
2164
2165 // We pre-record the uses of allocas so that we dont have to worry about
2166 // later update that changes the user information..
2167
2168 SmallVector<Instruction *, 20> Uses;
2169 // PERF: trade a linear scan for repeated reallocation
2170 Uses.reserve(Def->getNumUses());
2171 for (User *U : Def->users()) {
2172 if (!isa<ConstantExpr>(U)) {
2173 // If the def has a ConstantExpr use, then the def is either a
2174 // ConstantExpr use itself or null. In either case
2175 // (recursively in the first, directly in the second), the oop
2176 // it is ultimately dependent on is null and this particular
2177 // use does not need to be fixed up.
2178 Uses.push_back(cast<Instruction>(U));
2179 }
2180 }
2181
2182 llvm::sort(Uses);
2183 auto Last = std::unique(Uses.begin(), Uses.end());
2184 Uses.erase(Last, Uses.end());
2185
2186 for (Instruction *Use : Uses) {
2187 if (isa<PHINode>(Use)) {
2188 PHINode *Phi = cast<PHINode>(Use);
2189 for (unsigned i = 0; i < Phi->getNumIncomingValues(); i++) {
2190 if (Def == Phi->getIncomingValue(i)) {
2191 LoadInst *Load =
2192 new LoadInst(Alloca->getAllocatedType(), Alloca, "",
2193 Phi->getIncomingBlock(i)->getTerminator());
2194 Phi->setIncomingValue(i, Load);
2195 }
2196 }
2197 } else {
2198 LoadInst *Load =
2199 new LoadInst(Alloca->getAllocatedType(), Alloca, "", Use);
2200 Use->replaceUsesOfWith(Def, Load);
2201 }
2202 }
2203
2204 // Emit store for the initial gc value. Store must be inserted after load,
2205 // otherwise store will be in alloca's use list and an extra load will be
2206 // inserted before it.
2207 StoreInst *Store = new StoreInst(Def, Alloca, /*volatile*/ false,
2208 DL.getABITypeAlign(Def->getType()));
2209 if (Instruction *Inst = dyn_cast<Instruction>(Def)) {
2210 if (InvokeInst *Invoke = dyn_cast<InvokeInst>(Inst)) {
2211 // InvokeInst is a terminator so the store need to be inserted into its
2212 // normal destination block.
2213 BasicBlock *NormalDest = Invoke->getNormalDest();
2214 Store->insertBefore(NormalDest->getFirstNonPHI());
2215 } else {
2216 assert(!Inst->isTerminator() &&
2217 "The only terminator that can produce a value is "
2218 "InvokeInst which is handled above.");
2219 Store->insertAfter(Inst);
2220 }
2221 } else {
2222 assert(isa<Argument>(Def));
2223 Store->insertAfter(cast<Instruction>(Alloca));
2224 }
2225 }
2226
2227 assert(PromotableAllocas.size() == Live.size() + NumRematerializedValues &&
2228 "we must have the same allocas with lives");
2229 (void) NumRematerializedValues;
2230 if (!PromotableAllocas.empty()) {
2231 // Apply mem2reg to promote alloca to SSA
2232 PromoteMemToReg(PromotableAllocas, DT);
2233 }
2234
2235 #ifndef NDEBUG
2236 for (auto &I : F.getEntryBlock())
2237 if (isa<AllocaInst>(I))
2238 InitialAllocaNum--;
2239 assert(InitialAllocaNum == 0 && "We must not introduce any extra allocas");
2240 #endif
2241 }
2242
2243 /// Implement a unique function which doesn't require we sort the input
2244 /// vector. Doing so has the effect of changing the output of a couple of
2245 /// tests in ways which make them less useful in testing fused safepoints.
unique_unsorted(SmallVectorImpl<T> & Vec)2246 template <typename T> static void unique_unsorted(SmallVectorImpl<T> &Vec) {
2247 SmallSet<T, 8> Seen;
2248 erase_if(Vec, [&](const T &V) { return !Seen.insert(V).second; });
2249 }
2250
2251 /// Insert holders so that each Value is obviously live through the entire
2252 /// lifetime of the call.
insertUseHolderAfter(CallBase * Call,const ArrayRef<Value * > Values,SmallVectorImpl<CallInst * > & Holders)2253 static void insertUseHolderAfter(CallBase *Call, const ArrayRef<Value *> Values,
2254 SmallVectorImpl<CallInst *> &Holders) {
2255 if (Values.empty())
2256 // No values to hold live, might as well not insert the empty holder
2257 return;
2258
2259 Module *M = Call->getModule();
2260 // Use a dummy vararg function to actually hold the values live
2261 FunctionCallee Func = M->getOrInsertFunction(
2262 "__tmp_use", FunctionType::get(Type::getVoidTy(M->getContext()), true));
2263 if (isa<CallInst>(Call)) {
2264 // For call safepoints insert dummy calls right after safepoint
2265 Holders.push_back(
2266 CallInst::Create(Func, Values, "", &*++Call->getIterator()));
2267 return;
2268 }
2269 // For invoke safepooints insert dummy calls both in normal and
2270 // exceptional destination blocks
2271 auto *II = cast<InvokeInst>(Call);
2272 Holders.push_back(CallInst::Create(
2273 Func, Values, "", &*II->getNormalDest()->getFirstInsertionPt()));
2274 Holders.push_back(CallInst::Create(
2275 Func, Values, "", &*II->getUnwindDest()->getFirstInsertionPt()));
2276 }
2277
findLiveReferences(Function & F,DominatorTree & DT,ArrayRef<CallBase * > toUpdate,MutableArrayRef<struct PartiallyConstructedSafepointRecord> records)2278 static void findLiveReferences(
2279 Function &F, DominatorTree &DT, ArrayRef<CallBase *> toUpdate,
2280 MutableArrayRef<struct PartiallyConstructedSafepointRecord> records) {
2281 GCPtrLivenessData OriginalLivenessData;
2282 computeLiveInValues(DT, F, OriginalLivenessData);
2283 for (size_t i = 0; i < records.size(); i++) {
2284 struct PartiallyConstructedSafepointRecord &info = records[i];
2285 analyzeParsePointLiveness(DT, OriginalLivenessData, toUpdate[i], info);
2286 }
2287 }
2288
2289 // Helper function for the "rematerializeLiveValues". It walks use chain
2290 // starting from the "CurrentValue" until it reaches the root of the chain, i.e.
2291 // the base or a value it cannot process. Only "simple" values are processed
2292 // (currently it is GEP's and casts). The returned root is examined by the
2293 // callers of findRematerializableChainToBasePointer. Fills "ChainToBase" array
2294 // with all visited values.
findRematerializableChainToBasePointer(SmallVectorImpl<Instruction * > & ChainToBase,Value * CurrentValue)2295 static Value* findRematerializableChainToBasePointer(
2296 SmallVectorImpl<Instruction*> &ChainToBase,
2297 Value *CurrentValue) {
2298 if (GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(CurrentValue)) {
2299 ChainToBase.push_back(GEP);
2300 return findRematerializableChainToBasePointer(ChainToBase,
2301 GEP->getPointerOperand());
2302 }
2303
2304 if (CastInst *CI = dyn_cast<CastInst>(CurrentValue)) {
2305 if (!CI->isNoopCast(CI->getModule()->getDataLayout()))
2306 return CI;
2307
2308 ChainToBase.push_back(CI);
2309 return findRematerializableChainToBasePointer(ChainToBase,
2310 CI->getOperand(0));
2311 }
2312
2313 // We have reached the root of the chain, which is either equal to the base or
2314 // is the first unsupported value along the use chain.
2315 return CurrentValue;
2316 }
2317
2318 // Helper function for the "rematerializeLiveValues". Compute cost of the use
2319 // chain we are going to rematerialize.
2320 static InstructionCost
chainToBasePointerCost(SmallVectorImpl<Instruction * > & Chain,TargetTransformInfo & TTI)2321 chainToBasePointerCost(SmallVectorImpl<Instruction *> &Chain,
2322 TargetTransformInfo &TTI) {
2323 InstructionCost Cost = 0;
2324
2325 for (Instruction *Instr : Chain) {
2326 if (CastInst *CI = dyn_cast<CastInst>(Instr)) {
2327 assert(CI->isNoopCast(CI->getModule()->getDataLayout()) &&
2328 "non noop cast is found during rematerialization");
2329
2330 Type *SrcTy = CI->getOperand(0)->getType();
2331 Cost += TTI.getCastInstrCost(CI->getOpcode(), CI->getType(), SrcTy,
2332 TTI::getCastContextHint(CI),
2333 TargetTransformInfo::TCK_SizeAndLatency, CI);
2334
2335 } else if (GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(Instr)) {
2336 // Cost of the address calculation
2337 Type *ValTy = GEP->getSourceElementType();
2338 Cost += TTI.getAddressComputationCost(ValTy);
2339
2340 // And cost of the GEP itself
2341 // TODO: Use TTI->getGEPCost here (it exists, but appears to be not
2342 // allowed for the external usage)
2343 if (!GEP->hasAllConstantIndices())
2344 Cost += 2;
2345
2346 } else {
2347 llvm_unreachable("unsupported instruction type during rematerialization");
2348 }
2349 }
2350
2351 return Cost;
2352 }
2353
AreEquivalentPhiNodes(PHINode & OrigRootPhi,PHINode & AlternateRootPhi)2354 static bool AreEquivalentPhiNodes(PHINode &OrigRootPhi, PHINode &AlternateRootPhi) {
2355 unsigned PhiNum = OrigRootPhi.getNumIncomingValues();
2356 if (PhiNum != AlternateRootPhi.getNumIncomingValues() ||
2357 OrigRootPhi.getParent() != AlternateRootPhi.getParent())
2358 return false;
2359 // Map of incoming values and their corresponding basic blocks of
2360 // OrigRootPhi.
2361 SmallDenseMap<Value *, BasicBlock *, 8> CurrentIncomingValues;
2362 for (unsigned i = 0; i < PhiNum; i++)
2363 CurrentIncomingValues[OrigRootPhi.getIncomingValue(i)] =
2364 OrigRootPhi.getIncomingBlock(i);
2365
2366 // Both current and base PHIs should have same incoming values and
2367 // the same basic blocks corresponding to the incoming values.
2368 for (unsigned i = 0; i < PhiNum; i++) {
2369 auto CIVI =
2370 CurrentIncomingValues.find(AlternateRootPhi.getIncomingValue(i));
2371 if (CIVI == CurrentIncomingValues.end())
2372 return false;
2373 BasicBlock *CurrentIncomingBB = CIVI->second;
2374 if (CurrentIncomingBB != AlternateRootPhi.getIncomingBlock(i))
2375 return false;
2376 }
2377 return true;
2378 }
2379
2380 // Find derived pointers that can be recomputed cheap enough and fill
2381 // RematerizationCandidates with such candidates.
2382 static void
findRematerializationCandidates(PointerToBaseTy PointerToBase,RematCandTy & RematerizationCandidates,TargetTransformInfo & TTI)2383 findRematerializationCandidates(PointerToBaseTy PointerToBase,
2384 RematCandTy &RematerizationCandidates,
2385 TargetTransformInfo &TTI) {
2386 const unsigned int ChainLengthThreshold = 10;
2387
2388 for (auto P2B : PointerToBase) {
2389 auto *Derived = P2B.first;
2390 auto *Base = P2B.second;
2391 // Consider only derived pointers.
2392 if (Derived == Base)
2393 continue;
2394
2395 // For each live pointer find its defining chain.
2396 SmallVector<Instruction *, 3> ChainToBase;
2397 Value *RootOfChain =
2398 findRematerializableChainToBasePointer(ChainToBase, Derived);
2399
2400 // Nothing to do, or chain is too long
2401 if ( ChainToBase.size() == 0 ||
2402 ChainToBase.size() > ChainLengthThreshold)
2403 continue;
2404
2405 // Handle the scenario where the RootOfChain is not equal to the
2406 // Base Value, but they are essentially the same phi values.
2407 if (RootOfChain != PointerToBase[Derived]) {
2408 PHINode *OrigRootPhi = dyn_cast<PHINode>(RootOfChain);
2409 PHINode *AlternateRootPhi = dyn_cast<PHINode>(PointerToBase[Derived]);
2410 if (!OrigRootPhi || !AlternateRootPhi)
2411 continue;
2412 // PHI nodes that have the same incoming values, and belonging to the same
2413 // basic blocks are essentially the same SSA value. When the original phi
2414 // has incoming values with different base pointers, the original phi is
2415 // marked as conflict, and an additional `AlternateRootPhi` with the same
2416 // incoming values get generated by the findBasePointer function. We need
2417 // to identify the newly generated AlternateRootPhi (.base version of phi)
2418 // and RootOfChain (the original phi node itself) are the same, so that we
2419 // can rematerialize the gep and casts. This is a workaround for the
2420 // deficiency in the findBasePointer algorithm.
2421 if (!AreEquivalentPhiNodes(*OrigRootPhi, *AlternateRootPhi))
2422 continue;
2423 }
2424 // Compute cost of this chain.
2425 InstructionCost Cost = chainToBasePointerCost(ChainToBase, TTI);
2426 // TODO: We can also account for cases when we will be able to remove some
2427 // of the rematerialized values by later optimization passes. I.e if
2428 // we rematerialized several intersecting chains. Or if original values
2429 // don't have any uses besides this statepoint.
2430
2431 // Ok, there is a candidate.
2432 RematerizlizationCandidateRecord Record;
2433 Record.ChainToBase = ChainToBase;
2434 Record.RootOfChain = RootOfChain;
2435 Record.Cost = Cost;
2436 RematerizationCandidates.insert({ Derived, Record });
2437 }
2438 }
2439
2440 // Try to rematerialize derived pointers immediately before their uses
2441 // (instead of rematerializing after every statepoint it is live through).
2442 // This can be beneficial when derived pointer is live across many
2443 // statepoints, but uses are rare.
rematerializeLiveValuesAtUses(RematCandTy & RematerizationCandidates,MutableArrayRef<PartiallyConstructedSafepointRecord> Records,PointerToBaseTy & PointerToBase)2444 static void rematerializeLiveValuesAtUses(
2445 RematCandTy &RematerizationCandidates,
2446 MutableArrayRef<PartiallyConstructedSafepointRecord> Records,
2447 PointerToBaseTy &PointerToBase) {
2448 if (!RematDerivedAtUses)
2449 return;
2450
2451 SmallVector<Instruction *, 32> LiveValuesToBeDeleted;
2452
2453 LLVM_DEBUG(dbgs() << "Rematerialize derived pointers at uses, "
2454 << "Num statepoints: " << Records.size() << '\n');
2455
2456 for (auto &It : RematerizationCandidates) {
2457 Instruction *Cand = cast<Instruction>(It.first);
2458 auto &Record = It.second;
2459
2460 if (Record.Cost >= RematerializationThreshold)
2461 continue;
2462
2463 if (Cand->user_empty())
2464 continue;
2465
2466 if (Cand->hasOneUse())
2467 if (auto *U = dyn_cast<Instruction>(Cand->getUniqueUndroppableUser()))
2468 if (U->getParent() == Cand->getParent())
2469 continue;
2470
2471 // Rematerialization before PHI nodes is not implemented.
2472 if (llvm::any_of(Cand->users(),
2473 [](const auto *U) { return isa<PHINode>(U); }))
2474 continue;
2475
2476 LLVM_DEBUG(dbgs() << "Trying cand " << *Cand << " ... ");
2477
2478 // Count of rematerialization instructions we introduce is equal to number
2479 // of candidate uses.
2480 // Count of rematerialization instructions we eliminate is equal to number
2481 // of statepoints it is live through.
2482 // Consider transformation profitable if latter is greater than former
2483 // (in other words, we create less than eliminate).
2484 unsigned NumLiveStatepoints = llvm::count_if(
2485 Records, [Cand](const auto &R) { return R.LiveSet.contains(Cand); });
2486 unsigned NumUses = Cand->getNumUses();
2487
2488 LLVM_DEBUG(dbgs() << "Num uses: " << NumUses << " Num live statepoints: "
2489 << NumLiveStatepoints << " ");
2490
2491 if (NumLiveStatepoints < NumUses) {
2492 LLVM_DEBUG(dbgs() << "not profitable\n");
2493 continue;
2494 }
2495
2496 // If rematerialization is 'free', then favor rematerialization at
2497 // uses as it generally shortens live ranges.
2498 // TODO: Short (size ==1) chains only?
2499 if (NumLiveStatepoints == NumUses && Record.Cost > 0) {
2500 LLVM_DEBUG(dbgs() << "not profitable\n");
2501 continue;
2502 }
2503
2504 LLVM_DEBUG(dbgs() << "looks profitable\n");
2505
2506 // ChainToBase may contain another remat candidate (as a sub chain) which
2507 // has been rewritten by now. Need to recollect chain to have up to date
2508 // value.
2509 // TODO: sort records in findRematerializationCandidates() in
2510 // decreasing chain size order?
2511 if (Record.ChainToBase.size() > 1) {
2512 Record.ChainToBase.clear();
2513 findRematerializableChainToBasePointer(Record.ChainToBase, Cand);
2514 }
2515
2516 // Current rematerialization algorithm is very simple: we rematerialize
2517 // immediately before EVERY use, even if there are several uses in same
2518 // block or if use is local to Cand Def. The reason is that this allows
2519 // us to avoid recomputing liveness without complicated analysis:
2520 // - If we did not eliminate all uses of original Candidate, we do not
2521 // know exaclty in what BBs it is still live.
2522 // - If we rematerialize once per BB, we need to find proper insertion
2523 // place (first use in block, but after Def) and analyze if there is
2524 // statepoint between uses in the block.
2525 while (!Cand->user_empty()) {
2526 Instruction *UserI = cast<Instruction>(*Cand->user_begin());
2527 Instruction *RematChain = rematerializeChain(
2528 Record.ChainToBase, UserI, Record.RootOfChain, PointerToBase[Cand]);
2529 UserI->replaceUsesOfWith(Cand, RematChain);
2530 PointerToBase[RematChain] = PointerToBase[Cand];
2531 }
2532 LiveValuesToBeDeleted.push_back(Cand);
2533 }
2534
2535 LLVM_DEBUG(dbgs() << "Rematerialized " << LiveValuesToBeDeleted.size()
2536 << " derived pointers\n");
2537 for (auto *Cand : LiveValuesToBeDeleted) {
2538 assert(Cand->use_empty() && "Unexpected user remain");
2539 RematerizationCandidates.erase(Cand);
2540 for (auto &R : Records) {
2541 assert(!R.LiveSet.contains(Cand) ||
2542 R.LiveSet.contains(PointerToBase[Cand]));
2543 R.LiveSet.remove(Cand);
2544 }
2545 }
2546
2547 // Recollect not rematerialized chains - we might have rewritten
2548 // their sub-chains.
2549 if (!LiveValuesToBeDeleted.empty()) {
2550 for (auto &P : RematerizationCandidates) {
2551 auto &R = P.second;
2552 if (R.ChainToBase.size() > 1) {
2553 R.ChainToBase.clear();
2554 findRematerializableChainToBasePointer(R.ChainToBase, P.first);
2555 }
2556 }
2557 }
2558 }
2559
2560 // From the statepoint live set pick values that are cheaper to recompute then
2561 // to relocate. Remove this values from the live set, rematerialize them after
2562 // statepoint and record them in "Info" structure. Note that similar to
2563 // relocated values we don't do any user adjustments here.
rematerializeLiveValues(CallBase * Call,PartiallyConstructedSafepointRecord & Info,PointerToBaseTy & PointerToBase,RematCandTy & RematerizationCandidates,TargetTransformInfo & TTI)2564 static void rematerializeLiveValues(CallBase *Call,
2565 PartiallyConstructedSafepointRecord &Info,
2566 PointerToBaseTy &PointerToBase,
2567 RematCandTy &RematerizationCandidates,
2568 TargetTransformInfo &TTI) {
2569 // Record values we are going to delete from this statepoint live set.
2570 // We can not di this in following loop due to iterator invalidation.
2571 SmallVector<Value *, 32> LiveValuesToBeDeleted;
2572
2573 for (Value *LiveValue : Info.LiveSet) {
2574 auto It = RematerizationCandidates.find(LiveValue);
2575 if (It == RematerizationCandidates.end())
2576 continue;
2577
2578 RematerizlizationCandidateRecord &Record = It->second;
2579
2580 InstructionCost Cost = Record.Cost;
2581 // For invokes we need to rematerialize each chain twice - for normal and
2582 // for unwind basic blocks. Model this by multiplying cost by two.
2583 if (isa<InvokeInst>(Call))
2584 Cost *= 2;
2585
2586 // If it's too expensive - skip it.
2587 if (Cost >= RematerializationThreshold)
2588 continue;
2589
2590 // Remove value from the live set
2591 LiveValuesToBeDeleted.push_back(LiveValue);
2592
2593 // Clone instructions and record them inside "Info" structure.
2594
2595 // Different cases for calls and invokes. For invokes we need to clone
2596 // instructions both on normal and unwind path.
2597 if (isa<CallInst>(Call)) {
2598 Instruction *InsertBefore = Call->getNextNode();
2599 assert(InsertBefore);
2600 Instruction *RematerializedValue =
2601 rematerializeChain(Record.ChainToBase, InsertBefore,
2602 Record.RootOfChain, PointerToBase[LiveValue]);
2603 Info.RematerializedValues[RematerializedValue] = LiveValue;
2604 } else {
2605 auto *Invoke = cast<InvokeInst>(Call);
2606
2607 Instruction *NormalInsertBefore =
2608 &*Invoke->getNormalDest()->getFirstInsertionPt();
2609 Instruction *UnwindInsertBefore =
2610 &*Invoke->getUnwindDest()->getFirstInsertionPt();
2611
2612 Instruction *NormalRematerializedValue =
2613 rematerializeChain(Record.ChainToBase, NormalInsertBefore,
2614 Record.RootOfChain, PointerToBase[LiveValue]);
2615 Instruction *UnwindRematerializedValue =
2616 rematerializeChain(Record.ChainToBase, UnwindInsertBefore,
2617 Record.RootOfChain, PointerToBase[LiveValue]);
2618
2619 Info.RematerializedValues[NormalRematerializedValue] = LiveValue;
2620 Info.RematerializedValues[UnwindRematerializedValue] = LiveValue;
2621 }
2622 }
2623
2624 // Remove rematerialized values from the live set.
2625 for (auto *LiveValue: LiveValuesToBeDeleted) {
2626 Info.LiveSet.remove(LiveValue);
2627 }
2628 }
2629
inlineGetBaseAndOffset(Function & F,SmallVectorImpl<CallInst * > & Intrinsics,DefiningValueMapTy & DVCache,IsKnownBaseMapTy & KnownBases)2630 static bool inlineGetBaseAndOffset(Function &F,
2631 SmallVectorImpl<CallInst *> &Intrinsics,
2632 DefiningValueMapTy &DVCache,
2633 IsKnownBaseMapTy &KnownBases) {
2634 auto &Context = F.getContext();
2635 auto &DL = F.getParent()->getDataLayout();
2636 bool Changed = false;
2637
2638 for (auto *Callsite : Intrinsics)
2639 switch (Callsite->getIntrinsicID()) {
2640 case Intrinsic::experimental_gc_get_pointer_base: {
2641 Changed = true;
2642 Value *Base =
2643 findBasePointer(Callsite->getOperand(0), DVCache, KnownBases);
2644 assert(!DVCache.count(Callsite));
2645 auto *BaseBC = IRBuilder<>(Callsite).CreateBitCast(
2646 Base, Callsite->getType(), suffixed_name_or(Base, ".cast", ""));
2647 if (BaseBC != Base)
2648 DVCache[BaseBC] = Base;
2649 Callsite->replaceAllUsesWith(BaseBC);
2650 if (!BaseBC->hasName())
2651 BaseBC->takeName(Callsite);
2652 Callsite->eraseFromParent();
2653 break;
2654 }
2655 case Intrinsic::experimental_gc_get_pointer_offset: {
2656 Changed = true;
2657 Value *Derived = Callsite->getOperand(0);
2658 Value *Base = findBasePointer(Derived, DVCache, KnownBases);
2659 assert(!DVCache.count(Callsite));
2660 unsigned AddressSpace = Derived->getType()->getPointerAddressSpace();
2661 unsigned IntPtrSize = DL.getPointerSizeInBits(AddressSpace);
2662 IRBuilder<> Builder(Callsite);
2663 Value *BaseInt =
2664 Builder.CreatePtrToInt(Base, Type::getIntNTy(Context, IntPtrSize),
2665 suffixed_name_or(Base, ".int", ""));
2666 Value *DerivedInt =
2667 Builder.CreatePtrToInt(Derived, Type::getIntNTy(Context, IntPtrSize),
2668 suffixed_name_or(Derived, ".int", ""));
2669 Value *Offset = Builder.CreateSub(DerivedInt, BaseInt);
2670 Callsite->replaceAllUsesWith(Offset);
2671 Offset->takeName(Callsite);
2672 Callsite->eraseFromParent();
2673 break;
2674 }
2675 default:
2676 llvm_unreachable("Unknown intrinsic");
2677 }
2678
2679 return Changed;
2680 }
2681
insertParsePoints(Function & F,DominatorTree & DT,TargetTransformInfo & TTI,SmallVectorImpl<CallBase * > & ToUpdate,DefiningValueMapTy & DVCache,IsKnownBaseMapTy & KnownBases)2682 static bool insertParsePoints(Function &F, DominatorTree &DT,
2683 TargetTransformInfo &TTI,
2684 SmallVectorImpl<CallBase *> &ToUpdate,
2685 DefiningValueMapTy &DVCache,
2686 IsKnownBaseMapTy &KnownBases) {
2687 #ifndef NDEBUG
2688 // Validate the input
2689 std::set<CallBase *> Uniqued;
2690 Uniqued.insert(ToUpdate.begin(), ToUpdate.end());
2691 assert(Uniqued.size() == ToUpdate.size() && "no duplicates please!");
2692
2693 for (CallBase *Call : ToUpdate)
2694 assert(Call->getFunction() == &F);
2695 #endif
2696
2697 // When inserting gc.relocates for invokes, we need to be able to insert at
2698 // the top of the successor blocks. See the comment on
2699 // normalForInvokeSafepoint on exactly what is needed. Note that this step
2700 // may restructure the CFG.
2701 for (CallBase *Call : ToUpdate) {
2702 auto *II = dyn_cast<InvokeInst>(Call);
2703 if (!II)
2704 continue;
2705 normalizeForInvokeSafepoint(II->getNormalDest(), II->getParent(), DT);
2706 normalizeForInvokeSafepoint(II->getUnwindDest(), II->getParent(), DT);
2707 }
2708
2709 // A list of dummy calls added to the IR to keep various values obviously
2710 // live in the IR. We'll remove all of these when done.
2711 SmallVector<CallInst *, 64> Holders;
2712
2713 // Insert a dummy call with all of the deopt operands we'll need for the
2714 // actual safepoint insertion as arguments. This ensures reference operands
2715 // in the deopt argument list are considered live through the safepoint (and
2716 // thus makes sure they get relocated.)
2717 for (CallBase *Call : ToUpdate) {
2718 SmallVector<Value *, 64> DeoptValues;
2719
2720 for (Value *Arg : GetDeoptBundleOperands(Call)) {
2721 assert(!isUnhandledGCPointerType(Arg->getType()) &&
2722 "support for FCA unimplemented");
2723 if (isHandledGCPointerType(Arg->getType()))
2724 DeoptValues.push_back(Arg);
2725 }
2726
2727 insertUseHolderAfter(Call, DeoptValues, Holders);
2728 }
2729
2730 SmallVector<PartiallyConstructedSafepointRecord, 64> Records(ToUpdate.size());
2731
2732 // A) Identify all gc pointers which are statically live at the given call
2733 // site.
2734 findLiveReferences(F, DT, ToUpdate, Records);
2735
2736 /// Global mapping from live pointers to a base-defining-value.
2737 PointerToBaseTy PointerToBase;
2738
2739 // B) Find the base pointers for each live pointer
2740 for (size_t i = 0; i < Records.size(); i++) {
2741 PartiallyConstructedSafepointRecord &info = Records[i];
2742 findBasePointers(DT, DVCache, ToUpdate[i], info, PointerToBase, KnownBases);
2743 }
2744 if (PrintBasePointers) {
2745 errs() << "Base Pairs (w/o Relocation):\n";
2746 for (auto &Pair : PointerToBase) {
2747 errs() << " derived ";
2748 Pair.first->printAsOperand(errs(), false);
2749 errs() << " base ";
2750 Pair.second->printAsOperand(errs(), false);
2751 errs() << "\n";
2752 ;
2753 }
2754 }
2755
2756 // The base phi insertion logic (for any safepoint) may have inserted new
2757 // instructions which are now live at some safepoint. The simplest such
2758 // example is:
2759 // loop:
2760 // phi a <-- will be a new base_phi here
2761 // safepoint 1 <-- that needs to be live here
2762 // gep a + 1
2763 // safepoint 2
2764 // br loop
2765 // We insert some dummy calls after each safepoint to definitely hold live
2766 // the base pointers which were identified for that safepoint. We'll then
2767 // ask liveness for _every_ base inserted to see what is now live. Then we
2768 // remove the dummy calls.
2769 Holders.reserve(Holders.size() + Records.size());
2770 for (size_t i = 0; i < Records.size(); i++) {
2771 PartiallyConstructedSafepointRecord &Info = Records[i];
2772
2773 SmallVector<Value *, 128> Bases;
2774 for (auto *Derived : Info.LiveSet) {
2775 assert(PointerToBase.count(Derived) && "Missed base for derived pointer");
2776 Bases.push_back(PointerToBase[Derived]);
2777 }
2778
2779 insertUseHolderAfter(ToUpdate[i], Bases, Holders);
2780 }
2781
2782 // By selecting base pointers, we've effectively inserted new uses. Thus, we
2783 // need to rerun liveness. We may *also* have inserted new defs, but that's
2784 // not the key issue.
2785 recomputeLiveInValues(F, DT, ToUpdate, Records, PointerToBase);
2786
2787 if (PrintBasePointers) {
2788 errs() << "Base Pairs: (w/Relocation)\n";
2789 for (auto Pair : PointerToBase) {
2790 errs() << " derived ";
2791 Pair.first->printAsOperand(errs(), false);
2792 errs() << " base ";
2793 Pair.second->printAsOperand(errs(), false);
2794 errs() << "\n";
2795 }
2796 }
2797
2798 // It is possible that non-constant live variables have a constant base. For
2799 // example, a GEP with a variable offset from a global. In this case we can
2800 // remove it from the liveset. We already don't add constants to the liveset
2801 // because we assume they won't move at runtime and the GC doesn't need to be
2802 // informed about them. The same reasoning applies if the base is constant.
2803 // Note that the relocation placement code relies on this filtering for
2804 // correctness as it expects the base to be in the liveset, which isn't true
2805 // if the base is constant.
2806 for (auto &Info : Records) {
2807 Info.LiveSet.remove_if([&](Value *LiveV) {
2808 assert(PointerToBase.count(LiveV) && "Missed base for derived pointer");
2809 return isa<Constant>(PointerToBase[LiveV]);
2810 });
2811 }
2812
2813 for (CallInst *CI : Holders)
2814 CI->eraseFromParent();
2815
2816 Holders.clear();
2817
2818 // Compute the cost of possible re-materialization of derived pointers.
2819 RematCandTy RematerizationCandidates;
2820 findRematerializationCandidates(PointerToBase, RematerizationCandidates, TTI);
2821
2822 // In order to reduce live set of statepoint we might choose to rematerialize
2823 // some values instead of relocating them. This is purely an optimization and
2824 // does not influence correctness.
2825 // First try rematerialization at uses, then after statepoints.
2826 rematerializeLiveValuesAtUses(RematerizationCandidates, Records,
2827 PointerToBase);
2828 for (size_t i = 0; i < Records.size(); i++)
2829 rematerializeLiveValues(ToUpdate[i], Records[i], PointerToBase,
2830 RematerizationCandidates, TTI);
2831
2832 // We need this to safely RAUW and delete call or invoke return values that
2833 // may themselves be live over a statepoint. For details, please see usage in
2834 // makeStatepointExplicitImpl.
2835 std::vector<DeferredReplacement> Replacements;
2836
2837 // Now run through and replace the existing statepoints with new ones with
2838 // the live variables listed. We do not yet update uses of the values being
2839 // relocated. We have references to live variables that need to
2840 // survive to the last iteration of this loop. (By construction, the
2841 // previous statepoint can not be a live variable, thus we can and remove
2842 // the old statepoint calls as we go.)
2843 for (size_t i = 0; i < Records.size(); i++)
2844 makeStatepointExplicit(DT, ToUpdate[i], Records[i], Replacements,
2845 PointerToBase);
2846
2847 ToUpdate.clear(); // prevent accident use of invalid calls.
2848
2849 for (auto &PR : Replacements)
2850 PR.doReplacement();
2851
2852 Replacements.clear();
2853
2854 for (auto &Info : Records) {
2855 // These live sets may contain state Value pointers, since we replaced calls
2856 // with operand bundles with calls wrapped in gc.statepoint, and some of
2857 // those calls may have been def'ing live gc pointers. Clear these out to
2858 // avoid accidentally using them.
2859 //
2860 // TODO: We should create a separate data structure that does not contain
2861 // these live sets, and migrate to using that data structure from this point
2862 // onward.
2863 Info.LiveSet.clear();
2864 }
2865 PointerToBase.clear();
2866
2867 // Do all the fixups of the original live variables to their relocated selves
2868 SmallVector<Value *, 128> Live;
2869 for (size_t i = 0; i < Records.size(); i++) {
2870 PartiallyConstructedSafepointRecord &Info = Records[i];
2871
2872 // We can't simply save the live set from the original insertion. One of
2873 // the live values might be the result of a call which needs a safepoint.
2874 // That Value* no longer exists and we need to use the new gc_result.
2875 // Thankfully, the live set is embedded in the statepoint (and updated), so
2876 // we just grab that.
2877 llvm::append_range(Live, Info.StatepointToken->gc_args());
2878 #ifndef NDEBUG
2879 // Do some basic validation checking on our liveness results before
2880 // performing relocation. Relocation can and will turn mistakes in liveness
2881 // results into non-sensical code which is must harder to debug.
2882 // TODO: It would be nice to test consistency as well
2883 assert(DT.isReachableFromEntry(Info.StatepointToken->getParent()) &&
2884 "statepoint must be reachable or liveness is meaningless");
2885 for (Value *V : Info.StatepointToken->gc_args()) {
2886 if (!isa<Instruction>(V))
2887 // Non-instruction values trivial dominate all possible uses
2888 continue;
2889 auto *LiveInst = cast<Instruction>(V);
2890 assert(DT.isReachableFromEntry(LiveInst->getParent()) &&
2891 "unreachable values should never be live");
2892 assert(DT.dominates(LiveInst, Info.StatepointToken) &&
2893 "basic SSA liveness expectation violated by liveness analysis");
2894 }
2895 #endif
2896 }
2897 unique_unsorted(Live);
2898
2899 #ifndef NDEBUG
2900 // Validation check
2901 for (auto *Ptr : Live)
2902 assert(isHandledGCPointerType(Ptr->getType()) &&
2903 "must be a gc pointer type");
2904 #endif
2905
2906 relocationViaAlloca(F, DT, Live, Records);
2907 return !Records.empty();
2908 }
2909
2910 // List of all parameter and return attributes which must be stripped when
2911 // lowering from the abstract machine model. Note that we list attributes
2912 // here which aren't valid as return attributes, that is okay.
getParamAndReturnAttributesToRemove()2913 static AttributeMask getParamAndReturnAttributesToRemove() {
2914 AttributeMask R;
2915 R.addAttribute(Attribute::Dereferenceable);
2916 R.addAttribute(Attribute::DereferenceableOrNull);
2917 R.addAttribute(Attribute::ReadNone);
2918 R.addAttribute(Attribute::ReadOnly);
2919 R.addAttribute(Attribute::WriteOnly);
2920 R.addAttribute(Attribute::NoAlias);
2921 R.addAttribute(Attribute::NoFree);
2922 return R;
2923 }
2924
stripNonValidAttributesFromPrototype(Function & F)2925 static void stripNonValidAttributesFromPrototype(Function &F) {
2926 LLVMContext &Ctx = F.getContext();
2927
2928 // Intrinsics are very delicate. Lowering sometimes depends the presence
2929 // of certain attributes for correctness, but we may have also inferred
2930 // additional ones in the abstract machine model which need stripped. This
2931 // assumes that the attributes defined in Intrinsic.td are conservatively
2932 // correct for both physical and abstract model.
2933 if (Intrinsic::ID id = F.getIntrinsicID()) {
2934 F.setAttributes(Intrinsic::getAttributes(Ctx, id));
2935 return;
2936 }
2937
2938 AttributeMask R = getParamAndReturnAttributesToRemove();
2939 for (Argument &A : F.args())
2940 if (isa<PointerType>(A.getType()))
2941 F.removeParamAttrs(A.getArgNo(), R);
2942
2943 if (isa<PointerType>(F.getReturnType()))
2944 F.removeRetAttrs(R);
2945
2946 for (auto Attr : FnAttrsToStrip)
2947 F.removeFnAttr(Attr);
2948 }
2949
2950 /// Certain metadata on instructions are invalid after running RS4GC.
2951 /// Optimizations that run after RS4GC can incorrectly use this metadata to
2952 /// optimize functions. We drop such metadata on the instruction.
stripInvalidMetadataFromInstruction(Instruction & I)2953 static void stripInvalidMetadataFromInstruction(Instruction &I) {
2954 if (!isa<LoadInst>(I) && !isa<StoreInst>(I))
2955 return;
2956 // These are the attributes that are still valid on loads and stores after
2957 // RS4GC.
2958 // The metadata implying dereferenceability and noalias are (conservatively)
2959 // dropped. This is because semantically, after RewriteStatepointsForGC runs,
2960 // all calls to gc.statepoint "free" the entire heap. Also, gc.statepoint can
2961 // touch the entire heap including noalias objects. Note: The reasoning is
2962 // same as stripping the dereferenceability and noalias attributes that are
2963 // analogous to the metadata counterparts.
2964 // We also drop the invariant.load metadata on the load because that metadata
2965 // implies the address operand to the load points to memory that is never
2966 // changed once it became dereferenceable. This is no longer true after RS4GC.
2967 // Similar reasoning applies to invariant.group metadata, which applies to
2968 // loads within a group.
2969 unsigned ValidMetadataAfterRS4GC[] = {LLVMContext::MD_tbaa,
2970 LLVMContext::MD_range,
2971 LLVMContext::MD_alias_scope,
2972 LLVMContext::MD_nontemporal,
2973 LLVMContext::MD_nonnull,
2974 LLVMContext::MD_align,
2975 LLVMContext::MD_type};
2976
2977 // Drops all metadata on the instruction other than ValidMetadataAfterRS4GC.
2978 I.dropUnknownNonDebugMetadata(ValidMetadataAfterRS4GC);
2979 }
2980
stripNonValidDataFromBody(Function & F)2981 static void stripNonValidDataFromBody(Function &F) {
2982 if (F.empty())
2983 return;
2984
2985 LLVMContext &Ctx = F.getContext();
2986 MDBuilder Builder(Ctx);
2987
2988 // Set of invariantstart instructions that we need to remove.
2989 // Use this to avoid invalidating the instruction iterator.
2990 SmallVector<IntrinsicInst*, 12> InvariantStartInstructions;
2991
2992 for (Instruction &I : instructions(F)) {
2993 // invariant.start on memory location implies that the referenced memory
2994 // location is constant and unchanging. This is no longer true after
2995 // RewriteStatepointsForGC runs because there can be calls to gc.statepoint
2996 // which frees the entire heap and the presence of invariant.start allows
2997 // the optimizer to sink the load of a memory location past a statepoint,
2998 // which is incorrect.
2999 if (auto *II = dyn_cast<IntrinsicInst>(&I))
3000 if (II->getIntrinsicID() == Intrinsic::invariant_start) {
3001 InvariantStartInstructions.push_back(II);
3002 continue;
3003 }
3004
3005 if (MDNode *Tag = I.getMetadata(LLVMContext::MD_tbaa)) {
3006 MDNode *MutableTBAA = Builder.createMutableTBAAAccessTag(Tag);
3007 I.setMetadata(LLVMContext::MD_tbaa, MutableTBAA);
3008 }
3009
3010 stripInvalidMetadataFromInstruction(I);
3011
3012 AttributeMask R = getParamAndReturnAttributesToRemove();
3013 if (auto *Call = dyn_cast<CallBase>(&I)) {
3014 for (int i = 0, e = Call->arg_size(); i != e; i++)
3015 if (isa<PointerType>(Call->getArgOperand(i)->getType()))
3016 Call->removeParamAttrs(i, R);
3017 if (isa<PointerType>(Call->getType()))
3018 Call->removeRetAttrs(R);
3019 }
3020 }
3021
3022 // Delete the invariant.start instructions and RAUW undef.
3023 for (auto *II : InvariantStartInstructions) {
3024 II->replaceAllUsesWith(UndefValue::get(II->getType()));
3025 II->eraseFromParent();
3026 }
3027 }
3028
3029 /// Returns true if this function should be rewritten by this pass. The main
3030 /// point of this function is as an extension point for custom logic.
shouldRewriteStatepointsIn(Function & F)3031 static bool shouldRewriteStatepointsIn(Function &F) {
3032 // TODO: This should check the GCStrategy
3033 if (F.hasGC()) {
3034 const auto &FunctionGCName = F.getGC();
3035 const StringRef StatepointExampleName("statepoint-example");
3036 const StringRef CoreCLRName("coreclr");
3037 return (StatepointExampleName == FunctionGCName) ||
3038 (CoreCLRName == FunctionGCName);
3039 } else
3040 return false;
3041 }
3042
stripNonValidData(Module & M)3043 static void stripNonValidData(Module &M) {
3044 #ifndef NDEBUG
3045 assert(llvm::any_of(M, shouldRewriteStatepointsIn) && "precondition!");
3046 #endif
3047
3048 for (Function &F : M)
3049 stripNonValidAttributesFromPrototype(F);
3050
3051 for (Function &F : M)
3052 stripNonValidDataFromBody(F);
3053 }
3054
runOnFunction(Function & F,DominatorTree & DT,TargetTransformInfo & TTI,const TargetLibraryInfo & TLI)3055 bool RewriteStatepointsForGC::runOnFunction(Function &F, DominatorTree &DT,
3056 TargetTransformInfo &TTI,
3057 const TargetLibraryInfo &TLI) {
3058 assert(!F.isDeclaration() && !F.empty() &&
3059 "need function body to rewrite statepoints in");
3060 assert(shouldRewriteStatepointsIn(F) && "mismatch in rewrite decision");
3061
3062 auto NeedsRewrite = [&TLI](Instruction &I) {
3063 if (const auto *Call = dyn_cast<CallBase>(&I)) {
3064 if (isa<GCStatepointInst>(Call))
3065 return false;
3066 if (callsGCLeafFunction(Call, TLI))
3067 return false;
3068
3069 // Normally it's up to the frontend to make sure that non-leaf calls also
3070 // have proper deopt state if it is required. We make an exception for
3071 // element atomic memcpy/memmove intrinsics here. Unlike other intrinsics
3072 // these are non-leaf by default. They might be generated by the optimizer
3073 // which doesn't know how to produce a proper deopt state. So if we see a
3074 // non-leaf memcpy/memmove without deopt state just treat it as a leaf
3075 // copy and don't produce a statepoint.
3076 if (!AllowStatepointWithNoDeoptInfo &&
3077 !Call->getOperandBundle(LLVMContext::OB_deopt)) {
3078 assert((isa<AtomicMemCpyInst>(Call) || isa<AtomicMemMoveInst>(Call)) &&
3079 "Don't expect any other calls here!");
3080 return false;
3081 }
3082 return true;
3083 }
3084 return false;
3085 };
3086
3087 // Delete any unreachable statepoints so that we don't have unrewritten
3088 // statepoints surviving this pass. This makes testing easier and the
3089 // resulting IR less confusing to human readers.
3090 DomTreeUpdater DTU(DT, DomTreeUpdater::UpdateStrategy::Lazy);
3091 bool MadeChange = removeUnreachableBlocks(F, &DTU);
3092 // Flush the Dominator Tree.
3093 DTU.getDomTree();
3094
3095 // Gather all the statepoints which need rewritten. Be careful to only
3096 // consider those in reachable code since we need to ask dominance queries
3097 // when rewriting. We'll delete the unreachable ones in a moment.
3098 SmallVector<CallBase *, 64> ParsePointNeeded;
3099 SmallVector<CallInst *, 64> Intrinsics;
3100 for (Instruction &I : instructions(F)) {
3101 // TODO: only the ones with the flag set!
3102 if (NeedsRewrite(I)) {
3103 // NOTE removeUnreachableBlocks() is stronger than
3104 // DominatorTree::isReachableFromEntry(). In other words
3105 // removeUnreachableBlocks can remove some blocks for which
3106 // isReachableFromEntry() returns true.
3107 assert(DT.isReachableFromEntry(I.getParent()) &&
3108 "no unreachable blocks expected");
3109 ParsePointNeeded.push_back(cast<CallBase>(&I));
3110 }
3111 if (auto *CI = dyn_cast<CallInst>(&I))
3112 if (CI->getIntrinsicID() == Intrinsic::experimental_gc_get_pointer_base ||
3113 CI->getIntrinsicID() == Intrinsic::experimental_gc_get_pointer_offset)
3114 Intrinsics.emplace_back(CI);
3115 }
3116
3117 // Return early if no work to do.
3118 if (ParsePointNeeded.empty() && Intrinsics.empty())
3119 return MadeChange;
3120
3121 // As a prepass, go ahead and aggressively destroy single entry phi nodes.
3122 // These are created by LCSSA. They have the effect of increasing the size
3123 // of liveness sets for no good reason. It may be harder to do this post
3124 // insertion since relocations and base phis can confuse things.
3125 for (BasicBlock &BB : F)
3126 if (BB.getUniquePredecessor())
3127 MadeChange |= FoldSingleEntryPHINodes(&BB);
3128
3129 // Before we start introducing relocations, we want to tweak the IR a bit to
3130 // avoid unfortunate code generation effects. The main example is that we
3131 // want to try to make sure the comparison feeding a branch is after any
3132 // safepoints. Otherwise, we end up with a comparison of pre-relocation
3133 // values feeding a branch after relocation. This is semantically correct,
3134 // but results in extra register pressure since both the pre-relocation and
3135 // post-relocation copies must be available in registers. For code without
3136 // relocations this is handled elsewhere, but teaching the scheduler to
3137 // reverse the transform we're about to do would be slightly complex.
3138 // Note: This may extend the live range of the inputs to the icmp and thus
3139 // increase the liveset of any statepoint we move over. This is profitable
3140 // as long as all statepoints are in rare blocks. If we had in-register
3141 // lowering for live values this would be a much safer transform.
3142 auto getConditionInst = [](Instruction *TI) -> Instruction * {
3143 if (auto *BI = dyn_cast<BranchInst>(TI))
3144 if (BI->isConditional())
3145 return dyn_cast<Instruction>(BI->getCondition());
3146 // TODO: Extend this to handle switches
3147 return nullptr;
3148 };
3149 for (BasicBlock &BB : F) {
3150 Instruction *TI = BB.getTerminator();
3151 if (auto *Cond = getConditionInst(TI))
3152 // TODO: Handle more than just ICmps here. We should be able to move
3153 // most instructions without side effects or memory access.
3154 if (isa<ICmpInst>(Cond) && Cond->hasOneUse()) {
3155 MadeChange = true;
3156 Cond->moveBefore(TI);
3157 }
3158 }
3159
3160 // Nasty workaround - The base computation code in the main algorithm doesn't
3161 // consider the fact that a GEP can be used to convert a scalar to a vector.
3162 // The right fix for this is to integrate GEPs into the base rewriting
3163 // algorithm properly, this is just a short term workaround to prevent
3164 // crashes by canonicalizing such GEPs into fully vector GEPs.
3165 for (Instruction &I : instructions(F)) {
3166 if (!isa<GetElementPtrInst>(I))
3167 continue;
3168
3169 unsigned VF = 0;
3170 for (unsigned i = 0; i < I.getNumOperands(); i++)
3171 if (auto *OpndVTy = dyn_cast<VectorType>(I.getOperand(i)->getType())) {
3172 assert(VF == 0 ||
3173 VF == cast<FixedVectorType>(OpndVTy)->getNumElements());
3174 VF = cast<FixedVectorType>(OpndVTy)->getNumElements();
3175 }
3176
3177 // It's the vector to scalar traversal through the pointer operand which
3178 // confuses base pointer rewriting, so limit ourselves to that case.
3179 if (!I.getOperand(0)->getType()->isVectorTy() && VF != 0) {
3180 IRBuilder<> B(&I);
3181 auto *Splat = B.CreateVectorSplat(VF, I.getOperand(0));
3182 I.setOperand(0, Splat);
3183 MadeChange = true;
3184 }
3185 }
3186
3187 // Cache the 'defining value' relation used in the computation and
3188 // insertion of base phis and selects. This ensures that we don't insert
3189 // large numbers of duplicate base_phis. Use one cache for both
3190 // inlineGetBaseAndOffset() and insertParsePoints().
3191 DefiningValueMapTy DVCache;
3192
3193 // Mapping between a base values and a flag indicating whether it's a known
3194 // base or not.
3195 IsKnownBaseMapTy KnownBases;
3196
3197 if (!Intrinsics.empty())
3198 // Inline @gc.get.pointer.base() and @gc.get.pointer.offset() before finding
3199 // live references.
3200 MadeChange |= inlineGetBaseAndOffset(F, Intrinsics, DVCache, KnownBases);
3201
3202 if (!ParsePointNeeded.empty())
3203 MadeChange |=
3204 insertParsePoints(F, DT, TTI, ParsePointNeeded, DVCache, KnownBases);
3205
3206 return MadeChange;
3207 }
3208
3209 // liveness computation via standard dataflow
3210 // -------------------------------------------------------------------
3211
3212 // TODO: Consider using bitvectors for liveness, the set of potentially
3213 // interesting values should be small and easy to pre-compute.
3214
3215 /// Compute the live-in set for the location rbegin starting from
3216 /// the live-out set of the basic block
computeLiveInValues(BasicBlock::reverse_iterator Begin,BasicBlock::reverse_iterator End,SetVector<Value * > & LiveTmp)3217 static void computeLiveInValues(BasicBlock::reverse_iterator Begin,
3218 BasicBlock::reverse_iterator End,
3219 SetVector<Value *> &LiveTmp) {
3220 for (auto &I : make_range(Begin, End)) {
3221 // KILL/Def - Remove this definition from LiveIn
3222 LiveTmp.remove(&I);
3223
3224 // Don't consider *uses* in PHI nodes, we handle their contribution to
3225 // predecessor blocks when we seed the LiveOut sets
3226 if (isa<PHINode>(I))
3227 continue;
3228
3229 // USE - Add to the LiveIn set for this instruction
3230 for (Value *V : I.operands()) {
3231 assert(!isUnhandledGCPointerType(V->getType()) &&
3232 "support for FCA unimplemented");
3233 if (isHandledGCPointerType(V->getType()) && !isa<Constant>(V)) {
3234 // The choice to exclude all things constant here is slightly subtle.
3235 // There are two independent reasons:
3236 // - We assume that things which are constant (from LLVM's definition)
3237 // do not move at runtime. For example, the address of a global
3238 // variable is fixed, even though it's contents may not be.
3239 // - Second, we can't disallow arbitrary inttoptr constants even
3240 // if the language frontend does. Optimization passes are free to
3241 // locally exploit facts without respect to global reachability. This
3242 // can create sections of code which are dynamically unreachable and
3243 // contain just about anything. (see constants.ll in tests)
3244 LiveTmp.insert(V);
3245 }
3246 }
3247 }
3248 }
3249
computeLiveOutSeed(BasicBlock * BB,SetVector<Value * > & LiveTmp)3250 static void computeLiveOutSeed(BasicBlock *BB, SetVector<Value *> &LiveTmp) {
3251 for (BasicBlock *Succ : successors(BB)) {
3252 for (auto &I : *Succ) {
3253 PHINode *PN = dyn_cast<PHINode>(&I);
3254 if (!PN)
3255 break;
3256
3257 Value *V = PN->getIncomingValueForBlock(BB);
3258 assert(!isUnhandledGCPointerType(V->getType()) &&
3259 "support for FCA unimplemented");
3260 if (isHandledGCPointerType(V->getType()) && !isa<Constant>(V))
3261 LiveTmp.insert(V);
3262 }
3263 }
3264 }
3265
computeKillSet(BasicBlock * BB)3266 static SetVector<Value *> computeKillSet(BasicBlock *BB) {
3267 SetVector<Value *> KillSet;
3268 for (Instruction &I : *BB)
3269 if (isHandledGCPointerType(I.getType()))
3270 KillSet.insert(&I);
3271 return KillSet;
3272 }
3273
3274 #ifndef NDEBUG
3275 /// Check that the items in 'Live' dominate 'TI'. This is used as a basic
3276 /// validation check for the liveness computation.
checkBasicSSA(DominatorTree & DT,SetVector<Value * > & Live,Instruction * TI,bool TermOkay=false)3277 static void checkBasicSSA(DominatorTree &DT, SetVector<Value *> &Live,
3278 Instruction *TI, bool TermOkay = false) {
3279 for (Value *V : Live) {
3280 if (auto *I = dyn_cast<Instruction>(V)) {
3281 // The terminator can be a member of the LiveOut set. LLVM's definition
3282 // of instruction dominance states that V does not dominate itself. As
3283 // such, we need to special case this to allow it.
3284 if (TermOkay && TI == I)
3285 continue;
3286 assert(DT.dominates(I, TI) &&
3287 "basic SSA liveness expectation violated by liveness analysis");
3288 }
3289 }
3290 }
3291
3292 /// Check that all the liveness sets used during the computation of liveness
3293 /// obey basic SSA properties. This is useful for finding cases where we miss
3294 /// a def.
checkBasicSSA(DominatorTree & DT,GCPtrLivenessData & Data,BasicBlock & BB)3295 static void checkBasicSSA(DominatorTree &DT, GCPtrLivenessData &Data,
3296 BasicBlock &BB) {
3297 checkBasicSSA(DT, Data.LiveSet[&BB], BB.getTerminator());
3298 checkBasicSSA(DT, Data.LiveOut[&BB], BB.getTerminator(), true);
3299 checkBasicSSA(DT, Data.LiveIn[&BB], BB.getTerminator());
3300 }
3301 #endif
3302
computeLiveInValues(DominatorTree & DT,Function & F,GCPtrLivenessData & Data)3303 static void computeLiveInValues(DominatorTree &DT, Function &F,
3304 GCPtrLivenessData &Data) {
3305 SmallSetVector<BasicBlock *, 32> Worklist;
3306
3307 // Seed the liveness for each individual block
3308 for (BasicBlock &BB : F) {
3309 Data.KillSet[&BB] = computeKillSet(&BB);
3310 Data.LiveSet[&BB].clear();
3311 computeLiveInValues(BB.rbegin(), BB.rend(), Data.LiveSet[&BB]);
3312
3313 #ifndef NDEBUG
3314 for (Value *Kill : Data.KillSet[&BB])
3315 assert(!Data.LiveSet[&BB].count(Kill) && "live set contains kill");
3316 #endif
3317
3318 Data.LiveOut[&BB] = SetVector<Value *>();
3319 computeLiveOutSeed(&BB, Data.LiveOut[&BB]);
3320 Data.LiveIn[&BB] = Data.LiveSet[&BB];
3321 Data.LiveIn[&BB].set_union(Data.LiveOut[&BB]);
3322 Data.LiveIn[&BB].set_subtract(Data.KillSet[&BB]);
3323 if (!Data.LiveIn[&BB].empty())
3324 Worklist.insert(pred_begin(&BB), pred_end(&BB));
3325 }
3326
3327 // Propagate that liveness until stable
3328 while (!Worklist.empty()) {
3329 BasicBlock *BB = Worklist.pop_back_val();
3330
3331 // Compute our new liveout set, then exit early if it hasn't changed despite
3332 // the contribution of our successor.
3333 SetVector<Value *> LiveOut = Data.LiveOut[BB];
3334 const auto OldLiveOutSize = LiveOut.size();
3335 for (BasicBlock *Succ : successors(BB)) {
3336 assert(Data.LiveIn.count(Succ));
3337 LiveOut.set_union(Data.LiveIn[Succ]);
3338 }
3339 // assert OutLiveOut is a subset of LiveOut
3340 if (OldLiveOutSize == LiveOut.size()) {
3341 // If the sets are the same size, then we didn't actually add anything
3342 // when unioning our successors LiveIn. Thus, the LiveIn of this block
3343 // hasn't changed.
3344 continue;
3345 }
3346 Data.LiveOut[BB] = LiveOut;
3347
3348 // Apply the effects of this basic block
3349 SetVector<Value *> LiveTmp = LiveOut;
3350 LiveTmp.set_union(Data.LiveSet[BB]);
3351 LiveTmp.set_subtract(Data.KillSet[BB]);
3352
3353 assert(Data.LiveIn.count(BB));
3354 const SetVector<Value *> &OldLiveIn = Data.LiveIn[BB];
3355 // assert: OldLiveIn is a subset of LiveTmp
3356 if (OldLiveIn.size() != LiveTmp.size()) {
3357 Data.LiveIn[BB] = LiveTmp;
3358 Worklist.insert(pred_begin(BB), pred_end(BB));
3359 }
3360 } // while (!Worklist.empty())
3361
3362 #ifndef NDEBUG
3363 // Verify our output against SSA properties. This helps catch any
3364 // missing kills during the above iteration.
3365 for (BasicBlock &BB : F)
3366 checkBasicSSA(DT, Data, BB);
3367 #endif
3368 }
3369
findLiveSetAtInst(Instruction * Inst,GCPtrLivenessData & Data,StatepointLiveSetTy & Out)3370 static void findLiveSetAtInst(Instruction *Inst, GCPtrLivenessData &Data,
3371 StatepointLiveSetTy &Out) {
3372 BasicBlock *BB = Inst->getParent();
3373
3374 // Note: The copy is intentional and required
3375 assert(Data.LiveOut.count(BB));
3376 SetVector<Value *> LiveOut = Data.LiveOut[BB];
3377
3378 // We want to handle the statepoint itself oddly. It's
3379 // call result is not live (normal), nor are it's arguments
3380 // (unless they're used again later). This adjustment is
3381 // specifically what we need to relocate
3382 computeLiveInValues(BB->rbegin(), ++Inst->getIterator().getReverse(),
3383 LiveOut);
3384 LiveOut.remove(Inst);
3385 Out.insert(LiveOut.begin(), LiveOut.end());
3386 }
3387
recomputeLiveInValues(GCPtrLivenessData & RevisedLivenessData,CallBase * Call,PartiallyConstructedSafepointRecord & Info,PointerToBaseTy & PointerToBase)3388 static void recomputeLiveInValues(GCPtrLivenessData &RevisedLivenessData,
3389 CallBase *Call,
3390 PartiallyConstructedSafepointRecord &Info,
3391 PointerToBaseTy &PointerToBase) {
3392 StatepointLiveSetTy Updated;
3393 findLiveSetAtInst(Call, RevisedLivenessData, Updated);
3394
3395 // We may have base pointers which are now live that weren't before. We need
3396 // to update the PointerToBase structure to reflect this.
3397 for (auto *V : Updated)
3398 PointerToBase.insert({ V, V });
3399
3400 Info.LiveSet = Updated;
3401 }
3402