1 //===- RewriteStatepointsForGC.cpp - Make GC relocations explicit ---------===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 // 9 // Rewrite call/invoke instructions so as to make potential relocations 10 // performed by the garbage collector explicit in the IR. 11 // 12 //===----------------------------------------------------------------------===// 13 14 #include "llvm/Transforms/Scalar/RewriteStatepointsForGC.h" 15 16 #include "llvm/ADT/ArrayRef.h" 17 #include "llvm/ADT/DenseMap.h" 18 #include "llvm/ADT/DenseSet.h" 19 #include "llvm/ADT/MapVector.h" 20 #include "llvm/ADT/STLExtras.h" 21 #include "llvm/ADT/Sequence.h" 22 #include "llvm/ADT/SetVector.h" 23 #include "llvm/ADT/SmallSet.h" 24 #include "llvm/ADT/SmallVector.h" 25 #include "llvm/ADT/StringRef.h" 26 #include "llvm/ADT/iterator_range.h" 27 #include "llvm/Analysis/DomTreeUpdater.h" 28 #include "llvm/Analysis/TargetLibraryInfo.h" 29 #include "llvm/Analysis/TargetTransformInfo.h" 30 #include "llvm/IR/Argument.h" 31 #include "llvm/IR/AttributeMask.h" 32 #include "llvm/IR/Attributes.h" 33 #include "llvm/IR/BasicBlock.h" 34 #include "llvm/IR/CallingConv.h" 35 #include "llvm/IR/Constant.h" 36 #include "llvm/IR/Constants.h" 37 #include "llvm/IR/DataLayout.h" 38 #include "llvm/IR/DerivedTypes.h" 39 #include "llvm/IR/Dominators.h" 40 #include "llvm/IR/Function.h" 41 #include "llvm/IR/GCStrategy.h" 42 #include "llvm/IR/IRBuilder.h" 43 #include "llvm/IR/InstIterator.h" 44 #include "llvm/IR/InstrTypes.h" 45 #include "llvm/IR/Instruction.h" 46 #include "llvm/IR/Instructions.h" 47 #include "llvm/IR/IntrinsicInst.h" 48 #include "llvm/IR/Intrinsics.h" 49 #include "llvm/IR/LLVMContext.h" 50 #include "llvm/IR/MDBuilder.h" 51 #include "llvm/IR/Metadata.h" 52 #include "llvm/IR/Module.h" 53 #include "llvm/IR/Statepoint.h" 54 #include "llvm/IR/Type.h" 55 #include "llvm/IR/User.h" 56 #include "llvm/IR/Value.h" 57 #include "llvm/IR/ValueHandle.h" 58 #include "llvm/Support/Casting.h" 59 #include "llvm/Support/CommandLine.h" 60 #include "llvm/Support/Compiler.h" 61 #include "llvm/Support/Debug.h" 62 #include "llvm/Support/ErrorHandling.h" 63 #include "llvm/Support/raw_ostream.h" 64 #include "llvm/Transforms/Utils/BasicBlockUtils.h" 65 #include "llvm/Transforms/Utils/Local.h" 66 #include "llvm/Transforms/Utils/PromoteMemToReg.h" 67 #include <cassert> 68 #include <cstddef> 69 #include <cstdint> 70 #include <iterator> 71 #include <optional> 72 #include <set> 73 #include <string> 74 #include <utility> 75 #include <vector> 76 77 #define DEBUG_TYPE "rewrite-statepoints-for-gc" 78 79 using namespace llvm; 80 81 // Print the liveset found at the insert location 82 static cl::opt<bool> PrintLiveSet("spp-print-liveset", cl::Hidden, 83 cl::init(false)); 84 static cl::opt<bool> PrintLiveSetSize("spp-print-liveset-size", cl::Hidden, 85 cl::init(false)); 86 87 // Print out the base pointers for debugging 88 static cl::opt<bool> PrintBasePointers("spp-print-base-pointers", cl::Hidden, 89 cl::init(false)); 90 91 // Cost threshold measuring when it is profitable to rematerialize value instead 92 // of relocating it 93 static cl::opt<unsigned> 94 RematerializationThreshold("spp-rematerialization-threshold", cl::Hidden, 95 cl::init(6)); 96 97 #ifdef EXPENSIVE_CHECKS 98 static bool ClobberNonLive = true; 99 #else 100 static bool ClobberNonLive = false; 101 #endif 102 103 static cl::opt<bool, true> ClobberNonLiveOverride("rs4gc-clobber-non-live", 104 cl::location(ClobberNonLive), 105 cl::Hidden); 106 107 static cl::opt<bool> 108 AllowStatepointWithNoDeoptInfo("rs4gc-allow-statepoint-with-no-deopt-info", 109 cl::Hidden, cl::init(true)); 110 111 static cl::opt<bool> RematDerivedAtUses("rs4gc-remat-derived-at-uses", 112 cl::Hidden, cl::init(true)); 113 114 /// The IR fed into RewriteStatepointsForGC may have had attributes and 115 /// metadata implying dereferenceability that are no longer valid/correct after 116 /// RewriteStatepointsForGC has run. This is because semantically, after 117 /// RewriteStatepointsForGC runs, all calls to gc.statepoint "free" the entire 118 /// heap. stripNonValidData (conservatively) restores 119 /// correctness by erasing all attributes in the module that externally imply 120 /// dereferenceability. Similar reasoning also applies to the noalias 121 /// attributes and metadata. gc.statepoint can touch the entire heap including 122 /// noalias objects. 123 /// Apart from attributes and metadata, we also remove instructions that imply 124 /// constant physical memory: llvm.invariant.start. 125 static void stripNonValidData(Module &M); 126 127 // Find the GC strategy for a function, or null if it doesn't have one. 128 static std::unique_ptr<GCStrategy> findGCStrategy(Function &F); 129 130 static bool shouldRewriteStatepointsIn(Function &F); 131 132 PreservedAnalyses RewriteStatepointsForGC::run(Module &M, 133 ModuleAnalysisManager &AM) { 134 bool Changed = false; 135 auto &FAM = AM.getResult<FunctionAnalysisManagerModuleProxy>(M).getManager(); 136 for (Function &F : M) { 137 // Nothing to do for declarations. 138 if (F.isDeclaration() || F.empty()) 139 continue; 140 141 // Policy choice says not to rewrite - the most common reason is that we're 142 // compiling code without a GCStrategy. 143 if (!shouldRewriteStatepointsIn(F)) 144 continue; 145 146 auto &DT = FAM.getResult<DominatorTreeAnalysis>(F); 147 auto &TTI = FAM.getResult<TargetIRAnalysis>(F); 148 auto &TLI = FAM.getResult<TargetLibraryAnalysis>(F); 149 Changed |= runOnFunction(F, DT, TTI, TLI); 150 } 151 if (!Changed) 152 return PreservedAnalyses::all(); 153 154 // stripNonValidData asserts that shouldRewriteStatepointsIn 155 // returns true for at least one function in the module. Since at least 156 // one function changed, we know that the precondition is satisfied. 157 stripNonValidData(M); 158 159 PreservedAnalyses PA; 160 PA.preserve<TargetIRAnalysis>(); 161 PA.preserve<TargetLibraryAnalysis>(); 162 return PA; 163 } 164 165 namespace { 166 167 struct GCPtrLivenessData { 168 /// Values defined in this block. 169 MapVector<BasicBlock *, SetVector<Value *>> KillSet; 170 171 /// Values used in this block (and thus live); does not included values 172 /// killed within this block. 173 MapVector<BasicBlock *, SetVector<Value *>> LiveSet; 174 175 /// Values live into this basic block (i.e. used by any 176 /// instruction in this basic block or ones reachable from here) 177 MapVector<BasicBlock *, SetVector<Value *>> LiveIn; 178 179 /// Values live out of this basic block (i.e. live into 180 /// any successor block) 181 MapVector<BasicBlock *, SetVector<Value *>> LiveOut; 182 }; 183 184 // The type of the internal cache used inside the findBasePointers family 185 // of functions. From the callers perspective, this is an opaque type and 186 // should not be inspected. 187 // 188 // In the actual implementation this caches two relations: 189 // - The base relation itself (i.e. this pointer is based on that one) 190 // - The base defining value relation (i.e. before base_phi insertion) 191 // Generally, after the execution of a full findBasePointer call, only the 192 // base relation will remain. Internally, we add a mixture of the two 193 // types, then update all the second type to the first type 194 using DefiningValueMapTy = MapVector<Value *, Value *>; 195 using IsKnownBaseMapTy = MapVector<Value *, bool>; 196 using PointerToBaseTy = MapVector<Value *, Value *>; 197 using StatepointLiveSetTy = SetVector<Value *>; 198 using RematerializedValueMapTy = 199 MapVector<AssertingVH<Instruction>, AssertingVH<Value>>; 200 201 struct PartiallyConstructedSafepointRecord { 202 /// The set of values known to be live across this safepoint 203 StatepointLiveSetTy LiveSet; 204 205 /// The *new* gc.statepoint instruction itself. This produces the token 206 /// that normal path gc.relocates and the gc.result are tied to. 207 GCStatepointInst *StatepointToken; 208 209 /// Instruction to which exceptional gc relocates are attached 210 /// Makes it easier to iterate through them during relocationViaAlloca. 211 Instruction *UnwindToken; 212 213 /// Record live values we are rematerialized instead of relocating. 214 /// They are not included into 'LiveSet' field. 215 /// Maps rematerialized copy to it's original value. 216 RematerializedValueMapTy RematerializedValues; 217 }; 218 219 struct RematerizlizationCandidateRecord { 220 // Chain from derived pointer to base. 221 SmallVector<Instruction *, 3> ChainToBase; 222 // Original base. 223 Value *RootOfChain; 224 // Cost of chain. 225 InstructionCost Cost; 226 }; 227 using RematCandTy = MapVector<Value *, RematerizlizationCandidateRecord>; 228 229 } // end anonymous namespace 230 231 static ArrayRef<Use> GetDeoptBundleOperands(const CallBase *Call) { 232 std::optional<OperandBundleUse> DeoptBundle = 233 Call->getOperandBundle(LLVMContext::OB_deopt); 234 235 if (!DeoptBundle) { 236 assert(AllowStatepointWithNoDeoptInfo && 237 "Found non-leaf call without deopt info!"); 238 return {}; 239 } 240 241 return DeoptBundle->Inputs; 242 } 243 244 /// Compute the live-in set for every basic block in the function 245 static void computeLiveInValues(DominatorTree &DT, Function &F, 246 GCPtrLivenessData &Data, GCStrategy *GC); 247 248 /// Given results from the dataflow liveness computation, find the set of live 249 /// Values at a particular instruction. 250 static void findLiveSetAtInst(Instruction *inst, GCPtrLivenessData &Data, 251 StatepointLiveSetTy &out, GCStrategy *GC); 252 253 static bool isGCPointerType(Type *T, GCStrategy *GC) { 254 assert(GC && "GC Strategy for isGCPointerType cannot be null"); 255 256 if (!isa<PointerType>(T)) 257 return false; 258 259 // conservative - same as StatepointLowering 260 return GC->isGCManagedPointer(T).value_or(true); 261 } 262 263 // Return true if this type is one which a) is a gc pointer or contains a GC 264 // pointer and b) is of a type this code expects to encounter as a live value. 265 // (The insertion code will assert that a type which matches (a) and not (b) 266 // is not encountered.) 267 static bool isHandledGCPointerType(Type *T, GCStrategy *GC) { 268 // We fully support gc pointers 269 if (isGCPointerType(T, GC)) 270 return true; 271 // We partially support vectors of gc pointers. The code will assert if it 272 // can't handle something. 273 if (auto VT = dyn_cast<VectorType>(T)) 274 if (isGCPointerType(VT->getElementType(), GC)) 275 return true; 276 return false; 277 } 278 279 #ifndef NDEBUG 280 /// Returns true if this type contains a gc pointer whether we know how to 281 /// handle that type or not. 282 static bool containsGCPtrType(Type *Ty, GCStrategy *GC) { 283 if (isGCPointerType(Ty, GC)) 284 return true; 285 if (VectorType *VT = dyn_cast<VectorType>(Ty)) 286 return isGCPointerType(VT->getScalarType(), GC); 287 if (ArrayType *AT = dyn_cast<ArrayType>(Ty)) 288 return containsGCPtrType(AT->getElementType(), GC); 289 if (StructType *ST = dyn_cast<StructType>(Ty)) 290 return llvm::any_of(ST->elements(), 291 [GC](Type *Ty) { return containsGCPtrType(Ty, GC); }); 292 return false; 293 } 294 295 // Returns true if this is a type which a) is a gc pointer or contains a GC 296 // pointer and b) is of a type which the code doesn't expect (i.e. first class 297 // aggregates). Used to trip assertions. 298 static bool isUnhandledGCPointerType(Type *Ty, GCStrategy *GC) { 299 return containsGCPtrType(Ty, GC) && !isHandledGCPointerType(Ty, GC); 300 } 301 #endif 302 303 // Return the name of the value suffixed with the provided value, or if the 304 // value didn't have a name, the default value specified. 305 static std::string suffixed_name_or(Value *V, StringRef Suffix, 306 StringRef DefaultName) { 307 return V->hasName() ? (V->getName() + Suffix).str() : DefaultName.str(); 308 } 309 310 // Conservatively identifies any definitions which might be live at the 311 // given instruction. The analysis is performed immediately before the 312 // given instruction. Values defined by that instruction are not considered 313 // live. Values used by that instruction are considered live. 314 static void analyzeParsePointLiveness( 315 DominatorTree &DT, GCPtrLivenessData &OriginalLivenessData, CallBase *Call, 316 PartiallyConstructedSafepointRecord &Result, GCStrategy *GC) { 317 StatepointLiveSetTy LiveSet; 318 findLiveSetAtInst(Call, OriginalLivenessData, LiveSet, GC); 319 320 if (PrintLiveSet) { 321 dbgs() << "Live Variables:\n"; 322 for (Value *V : LiveSet) 323 dbgs() << " " << V->getName() << " " << *V << "\n"; 324 } 325 if (PrintLiveSetSize) { 326 dbgs() << "Safepoint For: " << Call->getCalledOperand()->getName() << "\n"; 327 dbgs() << "Number live values: " << LiveSet.size() << "\n"; 328 } 329 Result.LiveSet = LiveSet; 330 } 331 332 /// Returns true if V is a known base. 333 static bool isKnownBase(Value *V, const IsKnownBaseMapTy &KnownBases); 334 335 /// Caches the IsKnownBase flag for a value and asserts that it wasn't present 336 /// in the cache before. 337 static void setKnownBase(Value *V, bool IsKnownBase, 338 IsKnownBaseMapTy &KnownBases); 339 340 static Value *findBaseDefiningValue(Value *I, DefiningValueMapTy &Cache, 341 IsKnownBaseMapTy &KnownBases); 342 343 /// Return a base defining value for the 'Index' element of the given vector 344 /// instruction 'I'. If Index is null, returns a BDV for the entire vector 345 /// 'I'. As an optimization, this method will try to determine when the 346 /// element is known to already be a base pointer. If this can be established, 347 /// the second value in the returned pair will be true. Note that either a 348 /// vector or a pointer typed value can be returned. For the former, the 349 /// vector returned is a BDV (and possibly a base) of the entire vector 'I'. 350 /// If the later, the return pointer is a BDV (or possibly a base) for the 351 /// particular element in 'I'. 352 static Value *findBaseDefiningValueOfVector(Value *I, DefiningValueMapTy &Cache, 353 IsKnownBaseMapTy &KnownBases) { 354 // Each case parallels findBaseDefiningValue below, see that code for 355 // detailed motivation. 356 357 auto Cached = Cache.find(I); 358 if (Cached != Cache.end()) 359 return Cached->second; 360 361 if (isa<Argument>(I)) { 362 // An incoming argument to the function is a base pointer 363 Cache[I] = I; 364 setKnownBase(I, /* IsKnownBase */true, KnownBases); 365 return I; 366 } 367 368 if (isa<Constant>(I)) { 369 // Base of constant vector consists only of constant null pointers. 370 // For reasoning see similar case inside 'findBaseDefiningValue' function. 371 auto *CAZ = ConstantAggregateZero::get(I->getType()); 372 Cache[I] = CAZ; 373 setKnownBase(CAZ, /* IsKnownBase */true, KnownBases); 374 return CAZ; 375 } 376 377 if (isa<LoadInst>(I)) { 378 Cache[I] = I; 379 setKnownBase(I, /* IsKnownBase */true, KnownBases); 380 return I; 381 } 382 383 if (isa<InsertElementInst>(I)) { 384 // We don't know whether this vector contains entirely base pointers or 385 // not. To be conservatively correct, we treat it as a BDV and will 386 // duplicate code as needed to construct a parallel vector of bases. 387 Cache[I] = I; 388 setKnownBase(I, /* IsKnownBase */false, KnownBases); 389 return I; 390 } 391 392 if (isa<ShuffleVectorInst>(I)) { 393 // We don't know whether this vector contains entirely base pointers or 394 // not. To be conservatively correct, we treat it as a BDV and will 395 // duplicate code as needed to construct a parallel vector of bases. 396 // TODO: There a number of local optimizations which could be applied here 397 // for particular sufflevector patterns. 398 Cache[I] = I; 399 setKnownBase(I, /* IsKnownBase */false, KnownBases); 400 return I; 401 } 402 403 // The behavior of getelementptr instructions is the same for vector and 404 // non-vector data types. 405 if (auto *GEP = dyn_cast<GetElementPtrInst>(I)) { 406 auto *BDV = 407 findBaseDefiningValue(GEP->getPointerOperand(), Cache, KnownBases); 408 Cache[GEP] = BDV; 409 return BDV; 410 } 411 412 // The behavior of freeze instructions is the same for vector and 413 // non-vector data types. 414 if (auto *Freeze = dyn_cast<FreezeInst>(I)) { 415 auto *BDV = findBaseDefiningValue(Freeze->getOperand(0), Cache, KnownBases); 416 Cache[Freeze] = BDV; 417 return BDV; 418 } 419 420 // If the pointer comes through a bitcast of a vector of pointers to 421 // a vector of another type of pointer, then look through the bitcast 422 if (auto *BC = dyn_cast<BitCastInst>(I)) { 423 auto *BDV = findBaseDefiningValue(BC->getOperand(0), Cache, KnownBases); 424 Cache[BC] = BDV; 425 return BDV; 426 } 427 428 // We assume that functions in the source language only return base 429 // pointers. This should probably be generalized via attributes to support 430 // both source language and internal functions. 431 if (isa<CallInst>(I) || isa<InvokeInst>(I)) { 432 Cache[I] = I; 433 setKnownBase(I, /* IsKnownBase */true, KnownBases); 434 return I; 435 } 436 437 // A PHI or Select is a base defining value. The outer findBasePointer 438 // algorithm is responsible for constructing a base value for this BDV. 439 assert((isa<SelectInst>(I) || isa<PHINode>(I)) && 440 "unknown vector instruction - no base found for vector element"); 441 Cache[I] = I; 442 setKnownBase(I, /* IsKnownBase */false, KnownBases); 443 return I; 444 } 445 446 /// Helper function for findBasePointer - Will return a value which either a) 447 /// defines the base pointer for the input, b) blocks the simple search 448 /// (i.e. a PHI or Select of two derived pointers), or c) involves a change 449 /// from pointer to vector type or back. 450 static Value *findBaseDefiningValue(Value *I, DefiningValueMapTy &Cache, 451 IsKnownBaseMapTy &KnownBases) { 452 assert(I->getType()->isPtrOrPtrVectorTy() && 453 "Illegal to ask for the base pointer of a non-pointer type"); 454 auto Cached = Cache.find(I); 455 if (Cached != Cache.end()) 456 return Cached->second; 457 458 if (I->getType()->isVectorTy()) 459 return findBaseDefiningValueOfVector(I, Cache, KnownBases); 460 461 if (isa<Argument>(I)) { 462 // An incoming argument to the function is a base pointer 463 // We should have never reached here if this argument isn't an gc value 464 Cache[I] = I; 465 setKnownBase(I, /* IsKnownBase */true, KnownBases); 466 return I; 467 } 468 469 if (isa<Constant>(I)) { 470 // We assume that objects with a constant base (e.g. a global) can't move 471 // and don't need to be reported to the collector because they are always 472 // live. Besides global references, all kinds of constants (e.g. undef, 473 // constant expressions, null pointers) can be introduced by the inliner or 474 // the optimizer, especially on dynamically dead paths. 475 // Here we treat all of them as having single null base. By doing this we 476 // trying to avoid problems reporting various conflicts in a form of 477 // "phi (const1, const2)" or "phi (const, regular gc ptr)". 478 // See constant.ll file for relevant test cases. 479 480 auto *CPN = ConstantPointerNull::get(cast<PointerType>(I->getType())); 481 Cache[I] = CPN; 482 setKnownBase(CPN, /* IsKnownBase */true, KnownBases); 483 return CPN; 484 } 485 486 // inttoptrs in an integral address space are currently ill-defined. We 487 // treat them as defining base pointers here for consistency with the 488 // constant rule above and because we don't really have a better semantic 489 // to give them. Note that the optimizer is always free to insert undefined 490 // behavior on dynamically dead paths as well. 491 if (isa<IntToPtrInst>(I)) { 492 Cache[I] = I; 493 setKnownBase(I, /* IsKnownBase */true, KnownBases); 494 return I; 495 } 496 497 if (CastInst *CI = dyn_cast<CastInst>(I)) { 498 Value *Def = CI->stripPointerCasts(); 499 // If stripping pointer casts changes the address space there is an 500 // addrspacecast in between. 501 assert(cast<PointerType>(Def->getType())->getAddressSpace() == 502 cast<PointerType>(CI->getType())->getAddressSpace() && 503 "unsupported addrspacecast"); 504 // If we find a cast instruction here, it means we've found a cast which is 505 // not simply a pointer cast (i.e. an inttoptr). We don't know how to 506 // handle int->ptr conversion. 507 assert(!isa<CastInst>(Def) && "shouldn't find another cast here"); 508 auto *BDV = findBaseDefiningValue(Def, Cache, KnownBases); 509 Cache[CI] = BDV; 510 return BDV; 511 } 512 513 if (isa<LoadInst>(I)) { 514 // The value loaded is an gc base itself 515 Cache[I] = I; 516 setKnownBase(I, /* IsKnownBase */true, KnownBases); 517 return I; 518 } 519 520 if (GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(I)) { 521 // The base of this GEP is the base 522 auto *BDV = 523 findBaseDefiningValue(GEP->getPointerOperand(), Cache, KnownBases); 524 Cache[GEP] = BDV; 525 return BDV; 526 } 527 528 if (auto *Freeze = dyn_cast<FreezeInst>(I)) { 529 auto *BDV = findBaseDefiningValue(Freeze->getOperand(0), Cache, KnownBases); 530 Cache[Freeze] = BDV; 531 return BDV; 532 } 533 534 if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(I)) { 535 switch (II->getIntrinsicID()) { 536 default: 537 // fall through to general call handling 538 break; 539 case Intrinsic::experimental_gc_statepoint: 540 llvm_unreachable("statepoints don't produce pointers"); 541 case Intrinsic::experimental_gc_relocate: 542 // Rerunning safepoint insertion after safepoints are already 543 // inserted is not supported. It could probably be made to work, 544 // but why are you doing this? There's no good reason. 545 llvm_unreachable("repeat safepoint insertion is not supported"); 546 case Intrinsic::gcroot: 547 // Currently, this mechanism hasn't been extended to work with gcroot. 548 // There's no reason it couldn't be, but I haven't thought about the 549 // implications much. 550 llvm_unreachable( 551 "interaction with the gcroot mechanism is not supported"); 552 case Intrinsic::experimental_gc_get_pointer_base: 553 auto *BDV = findBaseDefiningValue(II->getOperand(0), Cache, KnownBases); 554 Cache[II] = BDV; 555 return BDV; 556 } 557 } 558 // We assume that functions in the source language only return base 559 // pointers. This should probably be generalized via attributes to support 560 // both source language and internal functions. 561 if (isa<CallInst>(I) || isa<InvokeInst>(I)) { 562 Cache[I] = I; 563 setKnownBase(I, /* IsKnownBase */true, KnownBases); 564 return I; 565 } 566 567 // TODO: I have absolutely no idea how to implement this part yet. It's not 568 // necessarily hard, I just haven't really looked at it yet. 569 assert(!isa<LandingPadInst>(I) && "Landing Pad is unimplemented"); 570 571 if (isa<AtomicCmpXchgInst>(I)) { 572 // A CAS is effectively a atomic store and load combined under a 573 // predicate. From the perspective of base pointers, we just treat it 574 // like a load. 575 Cache[I] = I; 576 setKnownBase(I, /* IsKnownBase */true, KnownBases); 577 return I; 578 } 579 580 if (isa<AtomicRMWInst>(I)) { 581 assert(cast<AtomicRMWInst>(I)->getOperation() == AtomicRMWInst::Xchg && 582 "Only Xchg is allowed for pointer values"); 583 // A RMW Xchg is a combined atomic load and store, so we can treat the 584 // loaded value as a base pointer. 585 Cache[I] = I; 586 setKnownBase(I, /* IsKnownBase */ true, KnownBases); 587 return I; 588 } 589 590 // The aggregate ops. Aggregates can either be in the heap or on the 591 // stack, but in either case, this is simply a field load. As a result, 592 // this is a defining definition of the base just like a load is. 593 if (isa<ExtractValueInst>(I)) { 594 Cache[I] = I; 595 setKnownBase(I, /* IsKnownBase */true, KnownBases); 596 return I; 597 } 598 599 // We should never see an insert vector since that would require we be 600 // tracing back a struct value not a pointer value. 601 assert(!isa<InsertValueInst>(I) && 602 "Base pointer for a struct is meaningless"); 603 604 // This value might have been generated by findBasePointer() called when 605 // substituting gc.get.pointer.base() intrinsic. 606 bool IsKnownBase = 607 isa<Instruction>(I) && cast<Instruction>(I)->getMetadata("is_base_value"); 608 setKnownBase(I, /* IsKnownBase */IsKnownBase, KnownBases); 609 Cache[I] = I; 610 611 // An extractelement produces a base result exactly when it's input does. 612 // We may need to insert a parallel instruction to extract the appropriate 613 // element out of the base vector corresponding to the input. Given this, 614 // it's analogous to the phi and select case even though it's not a merge. 615 if (isa<ExtractElementInst>(I)) 616 // Note: There a lot of obvious peephole cases here. This are deliberately 617 // handled after the main base pointer inference algorithm to make writing 618 // test cases to exercise that code easier. 619 return I; 620 621 // The last two cases here don't return a base pointer. Instead, they 622 // return a value which dynamically selects from among several base 623 // derived pointers (each with it's own base potentially). It's the job of 624 // the caller to resolve these. 625 assert((isa<SelectInst>(I) || isa<PHINode>(I)) && 626 "missing instruction case in findBaseDefiningValue"); 627 return I; 628 } 629 630 /// Returns the base defining value for this value. 631 static Value *findBaseDefiningValueCached(Value *I, DefiningValueMapTy &Cache, 632 IsKnownBaseMapTy &KnownBases) { 633 if (!Cache.contains(I)) { 634 auto *BDV = findBaseDefiningValue(I, Cache, KnownBases); 635 Cache[I] = BDV; 636 LLVM_DEBUG(dbgs() << "fBDV-cached: " << I->getName() << " -> " 637 << Cache[I]->getName() << ", is known base = " 638 << KnownBases[I] << "\n"); 639 } 640 assert(Cache[I] != nullptr); 641 assert(KnownBases.contains(Cache[I]) && 642 "Cached value must be present in known bases map"); 643 return Cache[I]; 644 } 645 646 /// Return a base pointer for this value if known. Otherwise, return it's 647 /// base defining value. 648 static Value *findBaseOrBDV(Value *I, DefiningValueMapTy &Cache, 649 IsKnownBaseMapTy &KnownBases) { 650 Value *Def = findBaseDefiningValueCached(I, Cache, KnownBases); 651 auto Found = Cache.find(Def); 652 if (Found != Cache.end()) { 653 // Either a base-of relation, or a self reference. Caller must check. 654 return Found->second; 655 } 656 // Only a BDV available 657 return Def; 658 } 659 660 #ifndef NDEBUG 661 /// This value is a base pointer that is not generated by RS4GC, i.e. it already 662 /// exists in the code. 663 static bool isOriginalBaseResult(Value *V) { 664 // no recursion possible 665 return !isa<PHINode>(V) && !isa<SelectInst>(V) && 666 !isa<ExtractElementInst>(V) && !isa<InsertElementInst>(V) && 667 !isa<ShuffleVectorInst>(V); 668 } 669 #endif 670 671 static bool isKnownBase(Value *V, const IsKnownBaseMapTy &KnownBases) { 672 auto It = KnownBases.find(V); 673 assert(It != KnownBases.end() && "Value not present in the map"); 674 return It->second; 675 } 676 677 static void setKnownBase(Value *V, bool IsKnownBase, 678 IsKnownBaseMapTy &KnownBases) { 679 #ifndef NDEBUG 680 auto It = KnownBases.find(V); 681 if (It != KnownBases.end()) 682 assert(It->second == IsKnownBase && "Changing already present value"); 683 #endif 684 KnownBases[V] = IsKnownBase; 685 } 686 687 // Returns true if First and Second values are both scalar or both vector. 688 static bool areBothVectorOrScalar(Value *First, Value *Second) { 689 return isa<VectorType>(First->getType()) == 690 isa<VectorType>(Second->getType()); 691 } 692 693 namespace { 694 695 /// Models the state of a single base defining value in the findBasePointer 696 /// algorithm for determining where a new instruction is needed to propagate 697 /// the base of this BDV. 698 class BDVState { 699 public: 700 enum StatusTy { 701 // Starting state of lattice 702 Unknown, 703 // Some specific base value -- does *not* mean that instruction 704 // propagates the base of the object 705 // ex: gep %arg, 16 -> %arg is the base value 706 Base, 707 // Need to insert a node to represent a merge. 708 Conflict 709 }; 710 711 BDVState() { 712 llvm_unreachable("missing state in map"); 713 } 714 715 explicit BDVState(Value *OriginalValue) 716 : OriginalValue(OriginalValue) {} 717 explicit BDVState(Value *OriginalValue, StatusTy Status, Value *BaseValue = nullptr) 718 : OriginalValue(OriginalValue), Status(Status), BaseValue(BaseValue) { 719 assert(Status != Base || BaseValue); 720 } 721 722 StatusTy getStatus() const { return Status; } 723 Value *getOriginalValue() const { return OriginalValue; } 724 Value *getBaseValue() const { return BaseValue; } 725 726 bool isBase() const { return getStatus() == Base; } 727 bool isUnknown() const { return getStatus() == Unknown; } 728 bool isConflict() const { return getStatus() == Conflict; } 729 730 // Values of type BDVState form a lattice, and this function implements the 731 // meet 732 // operation. 733 void meet(const BDVState &Other) { 734 auto markConflict = [&]() { 735 Status = BDVState::Conflict; 736 BaseValue = nullptr; 737 }; 738 // Conflict is a final state. 739 if (isConflict()) 740 return; 741 // if we are not known - just take other state. 742 if (isUnknown()) { 743 Status = Other.getStatus(); 744 BaseValue = Other.getBaseValue(); 745 return; 746 } 747 // We are base. 748 assert(isBase() && "Unknown state"); 749 // If other is unknown - just keep our state. 750 if (Other.isUnknown()) 751 return; 752 // If other is conflict - it is a final state. 753 if (Other.isConflict()) 754 return markConflict(); 755 // Other is base as well. 756 assert(Other.isBase() && "Unknown state"); 757 // If bases are different - Conflict. 758 if (getBaseValue() != Other.getBaseValue()) 759 return markConflict(); 760 // We are identical, do nothing. 761 } 762 763 bool operator==(const BDVState &Other) const { 764 return OriginalValue == Other.OriginalValue && BaseValue == Other.BaseValue && 765 Status == Other.Status; 766 } 767 768 bool operator!=(const BDVState &other) const { return !(*this == other); } 769 770 LLVM_DUMP_METHOD 771 void dump() const { 772 print(dbgs()); 773 dbgs() << '\n'; 774 } 775 776 void print(raw_ostream &OS) const { 777 switch (getStatus()) { 778 case Unknown: 779 OS << "U"; 780 break; 781 case Base: 782 OS << "B"; 783 break; 784 case Conflict: 785 OS << "C"; 786 break; 787 } 788 OS << " (base " << getBaseValue() << " - " 789 << (getBaseValue() ? getBaseValue()->getName() : "nullptr") << ")" 790 << " for " << OriginalValue->getName() << ":"; 791 } 792 793 private: 794 AssertingVH<Value> OriginalValue; // instruction this state corresponds to 795 StatusTy Status = Unknown; 796 AssertingVH<Value> BaseValue = nullptr; // Non-null only if Status == Base. 797 }; 798 799 } // end anonymous namespace 800 801 #ifndef NDEBUG 802 static raw_ostream &operator<<(raw_ostream &OS, const BDVState &State) { 803 State.print(OS); 804 return OS; 805 } 806 #endif 807 808 /// For a given value or instruction, figure out what base ptr its derived from. 809 /// For gc objects, this is simply itself. On success, returns a value which is 810 /// the base pointer. (This is reliable and can be used for relocation.) On 811 /// failure, returns nullptr. 812 static Value *findBasePointer(Value *I, DefiningValueMapTy &Cache, 813 IsKnownBaseMapTy &KnownBases) { 814 Value *Def = findBaseOrBDV(I, Cache, KnownBases); 815 816 if (isKnownBase(Def, KnownBases) && areBothVectorOrScalar(Def, I)) 817 return Def; 818 819 // Here's the rough algorithm: 820 // - For every SSA value, construct a mapping to either an actual base 821 // pointer or a PHI which obscures the base pointer. 822 // - Construct a mapping from PHI to unknown TOP state. Use an 823 // optimistic algorithm to propagate base pointer information. Lattice 824 // looks like: 825 // UNKNOWN 826 // b1 b2 b3 b4 827 // CONFLICT 828 // When algorithm terminates, all PHIs will either have a single concrete 829 // base or be in a conflict state. 830 // - For every conflict, insert a dummy PHI node without arguments. Add 831 // these to the base[Instruction] = BasePtr mapping. For every 832 // non-conflict, add the actual base. 833 // - For every conflict, add arguments for the base[a] of each input 834 // arguments. 835 // 836 // Note: A simpler form of this would be to add the conflict form of all 837 // PHIs without running the optimistic algorithm. This would be 838 // analogous to pessimistic data flow and would likely lead to an 839 // overall worse solution. 840 841 #ifndef NDEBUG 842 auto isExpectedBDVType = [](Value *BDV) { 843 return isa<PHINode>(BDV) || isa<SelectInst>(BDV) || 844 isa<ExtractElementInst>(BDV) || isa<InsertElementInst>(BDV) || 845 isa<ShuffleVectorInst>(BDV); 846 }; 847 #endif 848 849 // Once populated, will contain a mapping from each potentially non-base BDV 850 // to a lattice value (described above) which corresponds to that BDV. 851 // We use the order of insertion (DFS over the def/use graph) to provide a 852 // stable deterministic ordering for visiting DenseMaps (which are unordered) 853 // below. This is important for deterministic compilation. 854 MapVector<Value *, BDVState> States; 855 856 #ifndef NDEBUG 857 auto VerifyStates = [&]() { 858 for (auto &Entry : States) { 859 assert(Entry.first == Entry.second.getOriginalValue()); 860 } 861 }; 862 #endif 863 864 auto visitBDVOperands = [](Value *BDV, std::function<void (Value*)> F) { 865 if (PHINode *PN = dyn_cast<PHINode>(BDV)) { 866 for (Value *InVal : PN->incoming_values()) 867 F(InVal); 868 } else if (SelectInst *SI = dyn_cast<SelectInst>(BDV)) { 869 F(SI->getTrueValue()); 870 F(SI->getFalseValue()); 871 } else if (auto *EE = dyn_cast<ExtractElementInst>(BDV)) { 872 F(EE->getVectorOperand()); 873 } else if (auto *IE = dyn_cast<InsertElementInst>(BDV)) { 874 F(IE->getOperand(0)); 875 F(IE->getOperand(1)); 876 } else if (auto *SV = dyn_cast<ShuffleVectorInst>(BDV)) { 877 // For a canonical broadcast, ignore the undef argument 878 // (without this, we insert a parallel base shuffle for every broadcast) 879 F(SV->getOperand(0)); 880 if (!SV->isZeroEltSplat()) 881 F(SV->getOperand(1)); 882 } else { 883 llvm_unreachable("unexpected BDV type"); 884 } 885 }; 886 887 888 // Recursively fill in all base defining values reachable from the initial 889 // one for which we don't already know a definite base value for 890 /* scope */ { 891 SmallVector<Value*, 16> Worklist; 892 Worklist.push_back(Def); 893 States.insert({Def, BDVState(Def)}); 894 while (!Worklist.empty()) { 895 Value *Current = Worklist.pop_back_val(); 896 assert(!isOriginalBaseResult(Current) && "why did it get added?"); 897 898 auto visitIncomingValue = [&](Value *InVal) { 899 Value *Base = findBaseOrBDV(InVal, Cache, KnownBases); 900 if (isKnownBase(Base, KnownBases) && areBothVectorOrScalar(Base, InVal)) 901 // Known bases won't need new instructions introduced and can be 902 // ignored safely. However, this can only be done when InVal and Base 903 // are both scalar or both vector. Otherwise, we need to find a 904 // correct BDV for InVal, by creating an entry in the lattice 905 // (States). 906 return; 907 assert(isExpectedBDVType(Base) && "the only non-base values " 908 "we see should be base defining values"); 909 if (States.insert(std::make_pair(Base, BDVState(Base))).second) 910 Worklist.push_back(Base); 911 }; 912 913 visitBDVOperands(Current, visitIncomingValue); 914 } 915 } 916 917 #ifndef NDEBUG 918 VerifyStates(); 919 LLVM_DEBUG(dbgs() << "States after initialization:\n"); 920 for (const auto &Pair : States) { 921 LLVM_DEBUG(dbgs() << " " << Pair.second << " for " << *Pair.first << "\n"); 922 } 923 #endif 924 925 // Iterate forward through the value graph pruning any node from the state 926 // list where all of the inputs are base pointers. The purpose of this is to 927 // reuse existing values when the derived pointer we were asked to materialize 928 // a base pointer for happens to be a base pointer itself. (Or a sub-graph 929 // feeding it does.) 930 SmallVector<Value *> ToRemove; 931 do { 932 ToRemove.clear(); 933 for (auto Pair : States) { 934 Value *BDV = Pair.first; 935 auto canPruneInput = [&](Value *V) { 936 // If the input of the BDV is the BDV itself we can prune it. This is 937 // only possible if the BDV is a PHI node. 938 if (V->stripPointerCasts() == BDV) 939 return true; 940 Value *VBDV = findBaseOrBDV(V, Cache, KnownBases); 941 if (V->stripPointerCasts() != VBDV) 942 return false; 943 // The assumption is that anything not in the state list is 944 // propagates a base pointer. 945 return States.count(VBDV) == 0; 946 }; 947 948 bool CanPrune = true; 949 visitBDVOperands(BDV, [&](Value *Op) { 950 CanPrune = CanPrune && canPruneInput(Op); 951 }); 952 if (CanPrune) 953 ToRemove.push_back(BDV); 954 } 955 for (Value *V : ToRemove) { 956 States.erase(V); 957 // Cache the fact V is it's own base for later usage. 958 Cache[V] = V; 959 } 960 } while (!ToRemove.empty()); 961 962 // Did we manage to prove that Def itself must be a base pointer? 963 if (!States.count(Def)) 964 return Def; 965 966 // Return a phi state for a base defining value. We'll generate a new 967 // base state for known bases and expect to find a cached state otherwise. 968 auto GetStateForBDV = [&](Value *BaseValue, Value *Input) { 969 auto I = States.find(BaseValue); 970 if (I != States.end()) 971 return I->second; 972 assert(areBothVectorOrScalar(BaseValue, Input)); 973 return BDVState(BaseValue, BDVState::Base, BaseValue); 974 }; 975 976 // Even though we have identified a concrete base (or a conflict) for all live 977 // pointers at this point, there are cases where the base is of an 978 // incompatible type compared to the original instruction. We conservatively 979 // mark those as conflicts to ensure that corresponding BDVs will be generated 980 // in the next steps. 981 982 // this is a rather explicit check for all cases where we should mark the 983 // state as a conflict to force the latter stages of the algorithm to emit 984 // the BDVs. 985 // TODO: in many cases the instructions emited for the conflicting states 986 // will be identical to the I itself (if the I's operate on their BDVs 987 // themselves). We should exploit this, but can't do it here since it would 988 // break the invariant about the BDVs not being known to be a base. 989 // TODO: the code also does not handle constants at all - the algorithm relies 990 // on all constants having the same BDV and therefore constant-only insns 991 // will never be in conflict, but this check is ignored here. If the 992 // constant conflicts will be to BDVs themselves, they will be identical 993 // instructions and will get optimized away (as in the above TODO) 994 auto MarkConflict = [&](Instruction *I, Value *BaseValue) { 995 // II and EE mixes vector & scalar so is always a conflict 996 if (isa<InsertElementInst>(I) || isa<ExtractElementInst>(I)) 997 return true; 998 // Shuffle vector is always a conflict as it creates new vector from 999 // existing ones. 1000 if (isa<ShuffleVectorInst>(I)) 1001 return true; 1002 // Any instructions where the computed base type differs from the 1003 // instruction type. An example is where an extract instruction is used by a 1004 // select. Here the select's BDV is a vector (because of extract's BDV), 1005 // while the select itself is a scalar type. Note that the IE and EE 1006 // instruction check is not fully subsumed by the vector<->scalar check at 1007 // the end, this is due to the BDV algorithm being ignorant of BDV types at 1008 // this junction. 1009 if (!areBothVectorOrScalar(BaseValue, I)) 1010 return true; 1011 return false; 1012 }; 1013 1014 bool Progress = true; 1015 while (Progress) { 1016 #ifndef NDEBUG 1017 const size_t OldSize = States.size(); 1018 #endif 1019 Progress = false; 1020 // We're only changing values in this loop, thus safe to keep iterators. 1021 // Since this is computing a fixed point, the order of visit does not 1022 // effect the result. TODO: We could use a worklist here and make this run 1023 // much faster. 1024 for (auto Pair : States) { 1025 Value *BDV = Pair.first; 1026 // Only values that do not have known bases or those that have differing 1027 // type (scalar versus vector) from a possible known base should be in the 1028 // lattice. 1029 assert((!isKnownBase(BDV, KnownBases) || 1030 !areBothVectorOrScalar(BDV, Pair.second.getBaseValue())) && 1031 "why did it get added?"); 1032 1033 BDVState NewState(BDV); 1034 visitBDVOperands(BDV, [&](Value *Op) { 1035 Value *BDV = findBaseOrBDV(Op, Cache, KnownBases); 1036 auto OpState = GetStateForBDV(BDV, Op); 1037 NewState.meet(OpState); 1038 }); 1039 1040 // if the instruction has known base, but should in fact be marked as 1041 // conflict because of incompatible in/out types, we mark it as such 1042 // ensuring that it will propagate through the fixpoint iteration 1043 auto I = cast<Instruction>(BDV); 1044 auto BV = NewState.getBaseValue(); 1045 if (BV && MarkConflict(I, BV)) 1046 NewState = BDVState(I, BDVState::Conflict); 1047 1048 BDVState OldState = Pair.second; 1049 if (OldState != NewState) { 1050 Progress = true; 1051 States[BDV] = NewState; 1052 } 1053 } 1054 1055 assert(OldSize == States.size() && 1056 "fixed point shouldn't be adding any new nodes to state"); 1057 } 1058 1059 #ifndef NDEBUG 1060 VerifyStates(); 1061 LLVM_DEBUG(dbgs() << "States after meet iteration:\n"); 1062 for (const auto &Pair : States) { 1063 LLVM_DEBUG(dbgs() << " " << Pair.second << " for " << *Pair.first << "\n"); 1064 } 1065 1066 // since we do the conflict marking as part of the fixpoint iteration this 1067 // loop only asserts that invariants are met 1068 for (auto Pair : States) { 1069 Instruction *I = cast<Instruction>(Pair.first); 1070 BDVState State = Pair.second; 1071 auto *BaseValue = State.getBaseValue(); 1072 // Only values that do not have known bases or those that have differing 1073 // type (scalar versus vector) from a possible known base should be in the 1074 // lattice. 1075 assert( 1076 (!isKnownBase(I, KnownBases) || !areBothVectorOrScalar(I, BaseValue)) && 1077 "why did it get added?"); 1078 assert(!State.isUnknown() && "Optimistic algorithm didn't complete!"); 1079 } 1080 #endif 1081 1082 // Insert Phis for all conflicts 1083 // TODO: adjust naming patterns to avoid this order of iteration dependency 1084 for (auto Pair : States) { 1085 Instruction *I = cast<Instruction>(Pair.first); 1086 BDVState State = Pair.second; 1087 // Only values that do not have known bases or those that have differing 1088 // type (scalar versus vector) from a possible known base should be in the 1089 // lattice. 1090 assert((!isKnownBase(I, KnownBases) || 1091 !areBothVectorOrScalar(I, State.getBaseValue())) && 1092 "why did it get added?"); 1093 assert(!State.isUnknown() && "Optimistic algorithm didn't complete!"); 1094 1095 // Since we're joining a vector and scalar base, they can never be the 1096 // same. As a result, we should always see insert element having reached 1097 // the conflict state. 1098 assert(!isa<InsertElementInst>(I) || State.isConflict()); 1099 1100 if (!State.isConflict()) 1101 continue; 1102 1103 auto getMangledName = [](Instruction *I) -> std::string { 1104 if (isa<PHINode>(I)) { 1105 return suffixed_name_or(I, ".base", "base_phi"); 1106 } else if (isa<SelectInst>(I)) { 1107 return suffixed_name_or(I, ".base", "base_select"); 1108 } else if (isa<ExtractElementInst>(I)) { 1109 return suffixed_name_or(I, ".base", "base_ee"); 1110 } else if (isa<InsertElementInst>(I)) { 1111 return suffixed_name_or(I, ".base", "base_ie"); 1112 } else { 1113 return suffixed_name_or(I, ".base", "base_sv"); 1114 } 1115 }; 1116 1117 Instruction *BaseInst = I->clone(); 1118 BaseInst->insertBefore(I->getIterator()); 1119 BaseInst->setName(getMangledName(I)); 1120 // Add metadata marking this as a base value 1121 BaseInst->setMetadata("is_base_value", MDNode::get(I->getContext(), {})); 1122 States[I] = BDVState(I, BDVState::Conflict, BaseInst); 1123 setKnownBase(BaseInst, /* IsKnownBase */true, KnownBases); 1124 } 1125 1126 #ifndef NDEBUG 1127 VerifyStates(); 1128 #endif 1129 1130 // Returns a instruction which produces the base pointer for a given 1131 // instruction. The instruction is assumed to be an input to one of the BDVs 1132 // seen in the inference algorithm above. As such, we must either already 1133 // know it's base defining value is a base, or have inserted a new 1134 // instruction to propagate the base of it's BDV and have entered that newly 1135 // introduced instruction into the state table. In either case, we are 1136 // assured to be able to determine an instruction which produces it's base 1137 // pointer. 1138 auto getBaseForInput = [&](Value *Input, Instruction *InsertPt) { 1139 Value *BDV = findBaseOrBDV(Input, Cache, KnownBases); 1140 Value *Base = nullptr; 1141 if (!States.count(BDV)) { 1142 assert(areBothVectorOrScalar(BDV, Input)); 1143 Base = BDV; 1144 } else { 1145 // Either conflict or base. 1146 assert(States.count(BDV)); 1147 Base = States[BDV].getBaseValue(); 1148 } 1149 assert(Base && "Can't be null"); 1150 // The cast is needed since base traversal may strip away bitcasts 1151 if (Base->getType() != Input->getType() && InsertPt) 1152 Base = new BitCastInst(Base, Input->getType(), "cast", 1153 InsertPt->getIterator()); 1154 return Base; 1155 }; 1156 1157 // Fixup all the inputs of the new PHIs. Visit order needs to be 1158 // deterministic and predictable because we're naming newly created 1159 // instructions. 1160 for (auto Pair : States) { 1161 Instruction *BDV = cast<Instruction>(Pair.first); 1162 BDVState State = Pair.second; 1163 1164 // Only values that do not have known bases or those that have differing 1165 // type (scalar versus vector) from a possible known base should be in the 1166 // lattice. 1167 assert((!isKnownBase(BDV, KnownBases) || 1168 !areBothVectorOrScalar(BDV, State.getBaseValue())) && 1169 "why did it get added?"); 1170 assert(!State.isUnknown() && "Optimistic algorithm didn't complete!"); 1171 if (!State.isConflict()) 1172 continue; 1173 1174 if (PHINode *BasePHI = dyn_cast<PHINode>(State.getBaseValue())) { 1175 PHINode *PN = cast<PHINode>(BDV); 1176 const unsigned NumPHIValues = PN->getNumIncomingValues(); 1177 1178 // The IR verifier requires phi nodes with multiple entries from the 1179 // same basic block to have the same incoming value for each of those 1180 // entries. Since we're inserting bitcasts in the loop, make sure we 1181 // do so at least once per incoming block. 1182 DenseMap<BasicBlock *, Value*> BlockToValue; 1183 for (unsigned i = 0; i < NumPHIValues; i++) { 1184 Value *InVal = PN->getIncomingValue(i); 1185 BasicBlock *InBB = PN->getIncomingBlock(i); 1186 if (!BlockToValue.count(InBB)) 1187 BlockToValue[InBB] = getBaseForInput(InVal, InBB->getTerminator()); 1188 else { 1189 #ifndef NDEBUG 1190 Value *OldBase = BlockToValue[InBB]; 1191 Value *Base = getBaseForInput(InVal, nullptr); 1192 1193 // We can't use `stripPointerCasts` instead of this function because 1194 // `stripPointerCasts` doesn't handle vectors of pointers. 1195 auto StripBitCasts = [](Value *V) -> Value * { 1196 while (auto *BC = dyn_cast<BitCastInst>(V)) 1197 V = BC->getOperand(0); 1198 return V; 1199 }; 1200 // In essence this assert states: the only way two values 1201 // incoming from the same basic block may be different is by 1202 // being different bitcasts of the same value. A cleanup 1203 // that remains TODO is changing findBaseOrBDV to return an 1204 // llvm::Value of the correct type (and still remain pure). 1205 // This will remove the need to add bitcasts. 1206 assert(StripBitCasts(Base) == StripBitCasts(OldBase) && 1207 "findBaseOrBDV should be pure!"); 1208 #endif 1209 } 1210 Value *Base = BlockToValue[InBB]; 1211 BasePHI->setIncomingValue(i, Base); 1212 } 1213 } else if (SelectInst *BaseSI = 1214 dyn_cast<SelectInst>(State.getBaseValue())) { 1215 SelectInst *SI = cast<SelectInst>(BDV); 1216 1217 // Find the instruction which produces the base for each input. 1218 // We may need to insert a bitcast. 1219 BaseSI->setTrueValue(getBaseForInput(SI->getTrueValue(), BaseSI)); 1220 BaseSI->setFalseValue(getBaseForInput(SI->getFalseValue(), BaseSI)); 1221 } else if (auto *BaseEE = 1222 dyn_cast<ExtractElementInst>(State.getBaseValue())) { 1223 Value *InVal = cast<ExtractElementInst>(BDV)->getVectorOperand(); 1224 // Find the instruction which produces the base for each input. We may 1225 // need to insert a bitcast. 1226 BaseEE->setOperand(0, getBaseForInput(InVal, BaseEE)); 1227 } else if (auto *BaseIE = dyn_cast<InsertElementInst>(State.getBaseValue())){ 1228 auto *BdvIE = cast<InsertElementInst>(BDV); 1229 auto UpdateOperand = [&](int OperandIdx) { 1230 Value *InVal = BdvIE->getOperand(OperandIdx); 1231 Value *Base = getBaseForInput(InVal, BaseIE); 1232 BaseIE->setOperand(OperandIdx, Base); 1233 }; 1234 UpdateOperand(0); // vector operand 1235 UpdateOperand(1); // scalar operand 1236 } else { 1237 auto *BaseSV = cast<ShuffleVectorInst>(State.getBaseValue()); 1238 auto *BdvSV = cast<ShuffleVectorInst>(BDV); 1239 auto UpdateOperand = [&](int OperandIdx) { 1240 Value *InVal = BdvSV->getOperand(OperandIdx); 1241 Value *Base = getBaseForInput(InVal, BaseSV); 1242 BaseSV->setOperand(OperandIdx, Base); 1243 }; 1244 UpdateOperand(0); // vector operand 1245 if (!BdvSV->isZeroEltSplat()) 1246 UpdateOperand(1); // vector operand 1247 else { 1248 // Never read, so just use poison 1249 Value *InVal = BdvSV->getOperand(1); 1250 BaseSV->setOperand(1, PoisonValue::get(InVal->getType())); 1251 } 1252 } 1253 } 1254 1255 #ifndef NDEBUG 1256 VerifyStates(); 1257 #endif 1258 1259 // get the data layout to compare the sizes of base/derived pointer values 1260 [[maybe_unused]] auto &DL = 1261 cast<llvm::Instruction>(Def)->getDataLayout(); 1262 // Cache all of our results so we can cheaply reuse them 1263 // NOTE: This is actually two caches: one of the base defining value 1264 // relation and one of the base pointer relation! FIXME 1265 for (auto Pair : States) { 1266 auto *BDV = Pair.first; 1267 Value *Base = Pair.second.getBaseValue(); 1268 assert(BDV && Base); 1269 // Whenever we have a derived ptr(s), their base 1270 // ptr(s) must be of the same size, not necessarily the same type 1271 assert(DL.getTypeAllocSize(BDV->getType()) == 1272 DL.getTypeAllocSize(Base->getType()) && 1273 "Derived and base values should have same size"); 1274 // Only values that do not have known bases or those that have differing 1275 // type (scalar versus vector) from a possible known base should be in the 1276 // lattice. 1277 assert( 1278 (!isKnownBase(BDV, KnownBases) || !areBothVectorOrScalar(BDV, Base)) && 1279 "why did it get added?"); 1280 1281 LLVM_DEBUG( 1282 dbgs() << "Updating base value cache" 1283 << " for: " << BDV->getName() << " from: " 1284 << (Cache.count(BDV) ? Cache[BDV]->getName().str() : "none") 1285 << " to: " << Base->getName() << "\n"); 1286 1287 Cache[BDV] = Base; 1288 } 1289 assert(Cache.count(Def)); 1290 return Cache[Def]; 1291 } 1292 1293 // For a set of live pointers (base and/or derived), identify the base 1294 // pointer of the object which they are derived from. This routine will 1295 // mutate the IR graph as needed to make the 'base' pointer live at the 1296 // definition site of 'derived'. This ensures that any use of 'derived' can 1297 // also use 'base'. This may involve the insertion of a number of 1298 // additional PHI nodes. 1299 // 1300 // preconditions: live is a set of pointer type Values 1301 // 1302 // side effects: may insert PHI nodes into the existing CFG, will preserve 1303 // CFG, will not remove or mutate any existing nodes 1304 // 1305 // post condition: PointerToBase contains one (derived, base) pair for every 1306 // pointer in live. Note that derived can be equal to base if the original 1307 // pointer was a base pointer. 1308 static void findBasePointers(const StatepointLiveSetTy &live, 1309 PointerToBaseTy &PointerToBase, DominatorTree *DT, 1310 DefiningValueMapTy &DVCache, 1311 IsKnownBaseMapTy &KnownBases) { 1312 for (Value *ptr : live) { 1313 Value *base = findBasePointer(ptr, DVCache, KnownBases); 1314 assert(base && "failed to find base pointer"); 1315 PointerToBase[ptr] = base; 1316 assert((!isa<Instruction>(base) || !isa<Instruction>(ptr) || 1317 DT->dominates(cast<Instruction>(base)->getParent(), 1318 cast<Instruction>(ptr)->getParent())) && 1319 "The base we found better dominate the derived pointer"); 1320 } 1321 } 1322 1323 /// Find the required based pointers (and adjust the live set) for the given 1324 /// parse point. 1325 static void findBasePointers(DominatorTree &DT, DefiningValueMapTy &DVCache, 1326 CallBase *Call, 1327 PartiallyConstructedSafepointRecord &result, 1328 PointerToBaseTy &PointerToBase, 1329 IsKnownBaseMapTy &KnownBases) { 1330 StatepointLiveSetTy PotentiallyDerivedPointers = result.LiveSet; 1331 // We assume that all pointers passed to deopt are base pointers; as an 1332 // optimization, we can use this to avoid separately materializing the base 1333 // pointer graph. This is only relevant since we're very conservative about 1334 // generating new conflict nodes during base pointer insertion. If we were 1335 // smarter there, this would be irrelevant. 1336 if (auto Opt = Call->getOperandBundle(LLVMContext::OB_deopt)) 1337 for (Value *V : Opt->Inputs) { 1338 if (!PotentiallyDerivedPointers.count(V)) 1339 continue; 1340 PotentiallyDerivedPointers.remove(V); 1341 PointerToBase[V] = V; 1342 } 1343 findBasePointers(PotentiallyDerivedPointers, PointerToBase, &DT, DVCache, 1344 KnownBases); 1345 } 1346 1347 /// Given an updated version of the dataflow liveness results, update the 1348 /// liveset and base pointer maps for the call site CS. 1349 static void recomputeLiveInValues(GCPtrLivenessData &RevisedLivenessData, 1350 CallBase *Call, 1351 PartiallyConstructedSafepointRecord &result, 1352 PointerToBaseTy &PointerToBase, 1353 GCStrategy *GC); 1354 1355 static void recomputeLiveInValues( 1356 Function &F, DominatorTree &DT, ArrayRef<CallBase *> toUpdate, 1357 MutableArrayRef<struct PartiallyConstructedSafepointRecord> records, 1358 PointerToBaseTy &PointerToBase, GCStrategy *GC) { 1359 // TODO-PERF: reuse the original liveness, then simply run the dataflow 1360 // again. The old values are still live and will help it stabilize quickly. 1361 GCPtrLivenessData RevisedLivenessData; 1362 computeLiveInValues(DT, F, RevisedLivenessData, GC); 1363 for (size_t i = 0; i < records.size(); i++) { 1364 struct PartiallyConstructedSafepointRecord &info = records[i]; 1365 recomputeLiveInValues(RevisedLivenessData, toUpdate[i], info, PointerToBase, 1366 GC); 1367 } 1368 } 1369 1370 // Utility function which clones all instructions from "ChainToBase" 1371 // and inserts them before "InsertBefore". Returns rematerialized value 1372 // which should be used after statepoint. 1373 static Instruction *rematerializeChain(ArrayRef<Instruction *> ChainToBase, 1374 BasicBlock::iterator InsertBefore, 1375 Value *RootOfChain, 1376 Value *AlternateLiveBase) { 1377 Instruction *LastClonedValue = nullptr; 1378 Instruction *LastValue = nullptr; 1379 // Walk backwards to visit top-most instructions first. 1380 for (Instruction *Instr : 1381 make_range(ChainToBase.rbegin(), ChainToBase.rend())) { 1382 // Only GEP's and casts are supported as we need to be careful to not 1383 // introduce any new uses of pointers not in the liveset. 1384 // Note that it's fine to introduce new uses of pointers which were 1385 // otherwise not used after this statepoint. 1386 assert(isa<GetElementPtrInst>(Instr) || isa<CastInst>(Instr)); 1387 1388 Instruction *ClonedValue = Instr->clone(); 1389 ClonedValue->insertBefore(InsertBefore); 1390 ClonedValue->setName(Instr->getName() + ".remat"); 1391 1392 // If it is not first instruction in the chain then it uses previously 1393 // cloned value. We should update it to use cloned value. 1394 if (LastClonedValue) { 1395 assert(LastValue); 1396 ClonedValue->replaceUsesOfWith(LastValue, LastClonedValue); 1397 #ifndef NDEBUG 1398 for (auto *OpValue : ClonedValue->operand_values()) { 1399 // Assert that cloned instruction does not use any instructions from 1400 // this chain other than LastClonedValue 1401 assert(!is_contained(ChainToBase, OpValue) && 1402 "incorrect use in rematerialization chain"); 1403 // Assert that the cloned instruction does not use the RootOfChain 1404 // or the AlternateLiveBase. 1405 assert(OpValue != RootOfChain && OpValue != AlternateLiveBase); 1406 } 1407 #endif 1408 } else { 1409 // For the first instruction, replace the use of unrelocated base i.e. 1410 // RootOfChain/OrigRootPhi, with the corresponding PHI present in the 1411 // live set. They have been proved to be the same PHI nodes. Note 1412 // that the *only* use of the RootOfChain in the ChainToBase list is 1413 // the first Value in the list. 1414 if (RootOfChain != AlternateLiveBase) 1415 ClonedValue->replaceUsesOfWith(RootOfChain, AlternateLiveBase); 1416 } 1417 1418 LastClonedValue = ClonedValue; 1419 LastValue = Instr; 1420 } 1421 assert(LastClonedValue); 1422 return LastClonedValue; 1423 } 1424 1425 // When inserting gc.relocate and gc.result calls, we need to ensure there are 1426 // no uses of the original value / return value between the gc.statepoint and 1427 // the gc.relocate / gc.result call. One case which can arise is a phi node 1428 // starting one of the successor blocks. We also need to be able to insert the 1429 // gc.relocates only on the path which goes through the statepoint. We might 1430 // need to split an edge to make this possible. 1431 static BasicBlock * 1432 normalizeForInvokeSafepoint(BasicBlock *BB, BasicBlock *InvokeParent, 1433 DominatorTree &DT) { 1434 BasicBlock *Ret = BB; 1435 if (!BB->getUniquePredecessor()) 1436 Ret = SplitBlockPredecessors(BB, InvokeParent, "", &DT); 1437 1438 // Now that 'Ret' has unique predecessor we can safely remove all phi nodes 1439 // from it 1440 FoldSingleEntryPHINodes(Ret); 1441 assert(!isa<PHINode>(Ret->begin()) && 1442 "All PHI nodes should have been removed!"); 1443 1444 // At this point, we can safely insert a gc.relocate or gc.result as the first 1445 // instruction in Ret if needed. 1446 return Ret; 1447 } 1448 1449 // List of all function attributes which must be stripped when lowering from 1450 // abstract machine model to physical machine model. Essentially, these are 1451 // all the effects a safepoint might have which we ignored in the abstract 1452 // machine model for purposes of optimization. We have to strip these on 1453 // both function declarations and call sites. 1454 static constexpr Attribute::AttrKind FnAttrsToStrip[] = 1455 {Attribute::Memory, Attribute::NoSync, Attribute::NoFree}; 1456 1457 // Create new attribute set containing only attributes which can be transferred 1458 // from the original call to the safepoint. 1459 static AttributeList legalizeCallAttributes(CallBase *Call, bool IsMemIntrinsic, 1460 AttributeList StatepointAL) { 1461 AttributeList OrigAL = Call->getAttributes(); 1462 if (OrigAL.isEmpty()) 1463 return StatepointAL; 1464 1465 // Remove the readonly, readnone, and statepoint function attributes. 1466 LLVMContext &Ctx = Call->getContext(); 1467 AttrBuilder FnAttrs(Ctx, OrigAL.getFnAttrs()); 1468 for (auto Attr : FnAttrsToStrip) 1469 FnAttrs.removeAttribute(Attr); 1470 1471 for (Attribute A : OrigAL.getFnAttrs()) { 1472 if (isStatepointDirectiveAttr(A)) 1473 FnAttrs.removeAttribute(A); 1474 } 1475 1476 StatepointAL = StatepointAL.addFnAttributes(Ctx, FnAttrs); 1477 1478 // The memory intrinsics do not have a 1:1 correspondence of the original 1479 // call arguments to the produced statepoint. Do not transfer the argument 1480 // attributes to avoid putting them on incorrect arguments. 1481 if (IsMemIntrinsic) 1482 return StatepointAL; 1483 1484 // Attach the argument attributes from the original call at the corresponding 1485 // arguments in the statepoint. Note that any argument attributes that are 1486 // invalid after lowering are stripped in stripNonValidDataFromBody. 1487 for (unsigned I : llvm::seq(Call->arg_size())) 1488 StatepointAL = StatepointAL.addParamAttributes( 1489 Ctx, GCStatepointInst::CallArgsBeginPos + I, 1490 AttrBuilder(Ctx, OrigAL.getParamAttrs(I))); 1491 1492 // Return attributes are later attached to the gc.result intrinsic. 1493 return StatepointAL; 1494 } 1495 1496 /// Helper function to place all gc relocates necessary for the given 1497 /// statepoint. 1498 /// Inputs: 1499 /// liveVariables - list of variables to be relocated. 1500 /// basePtrs - base pointers. 1501 /// statepointToken - statepoint instruction to which relocates should be 1502 /// bound. 1503 /// Builder - Llvm IR builder to be used to construct new calls. 1504 static void CreateGCRelocates(ArrayRef<Value *> LiveVariables, 1505 ArrayRef<Value *> BasePtrs, 1506 Instruction *StatepointToken, 1507 IRBuilder<> &Builder, GCStrategy *GC) { 1508 if (LiveVariables.empty()) 1509 return; 1510 1511 auto FindIndex = [](ArrayRef<Value *> LiveVec, Value *Val) { 1512 auto ValIt = llvm::find(LiveVec, Val); 1513 assert(ValIt != LiveVec.end() && "Val not found in LiveVec!"); 1514 size_t Index = std::distance(LiveVec.begin(), ValIt); 1515 assert(Index < LiveVec.size() && "Bug in std::find?"); 1516 return Index; 1517 }; 1518 Module *M = StatepointToken->getModule(); 1519 1520 // All gc_relocate are generated as i8 addrspace(1)* (or a vector type whose 1521 // element type is i8 addrspace(1)*). We originally generated unique 1522 // declarations for each pointer type, but this proved problematic because 1523 // the intrinsic mangling code is incomplete and fragile. Since we're moving 1524 // towards a single unified pointer type anyways, we can just cast everything 1525 // to an i8* of the right address space. A bitcast is added later to convert 1526 // gc_relocate to the actual value's type. 1527 auto getGCRelocateDecl = [&](Type *Ty) { 1528 assert(isHandledGCPointerType(Ty, GC)); 1529 auto AS = Ty->getScalarType()->getPointerAddressSpace(); 1530 Type *NewTy = PointerType::get(M->getContext(), AS); 1531 if (auto *VT = dyn_cast<VectorType>(Ty)) 1532 NewTy = FixedVectorType::get(NewTy, 1533 cast<FixedVectorType>(VT)->getNumElements()); 1534 return Intrinsic::getOrInsertDeclaration( 1535 M, Intrinsic::experimental_gc_relocate, {NewTy}); 1536 }; 1537 1538 // Lazily populated map from input types to the canonicalized form mentioned 1539 // in the comment above. This should probably be cached somewhere more 1540 // broadly. 1541 DenseMap<Type *, Function *> TypeToDeclMap; 1542 1543 for (unsigned i = 0; i < LiveVariables.size(); i++) { 1544 // Generate the gc.relocate call and save the result 1545 Value *BaseIdx = Builder.getInt32(FindIndex(LiveVariables, BasePtrs[i])); 1546 Value *LiveIdx = Builder.getInt32(i); 1547 1548 Type *Ty = LiveVariables[i]->getType(); 1549 if (!TypeToDeclMap.count(Ty)) 1550 TypeToDeclMap[Ty] = getGCRelocateDecl(Ty); 1551 Function *GCRelocateDecl = TypeToDeclMap[Ty]; 1552 1553 // only specify a debug name if we can give a useful one 1554 CallInst *Reloc = Builder.CreateCall( 1555 GCRelocateDecl, {StatepointToken, BaseIdx, LiveIdx}, 1556 suffixed_name_or(LiveVariables[i], ".relocated", "")); 1557 // Trick CodeGen into thinking there are lots of free registers at this 1558 // fake call. 1559 Reloc->setCallingConv(CallingConv::Cold); 1560 } 1561 } 1562 1563 namespace { 1564 1565 /// This struct is used to defer RAUWs and `eraseFromParent` s. Using this 1566 /// avoids having to worry about keeping around dangling pointers to Values. 1567 class DeferredReplacement { 1568 AssertingVH<Instruction> Old; 1569 AssertingVH<Instruction> New; 1570 bool IsDeoptimize = false; 1571 1572 DeferredReplacement() = default; 1573 1574 public: 1575 static DeferredReplacement createRAUW(Instruction *Old, Instruction *New) { 1576 assert(Old != New && Old && New && 1577 "Cannot RAUW equal values or to / from null!"); 1578 1579 DeferredReplacement D; 1580 D.Old = Old; 1581 D.New = New; 1582 return D; 1583 } 1584 1585 static DeferredReplacement createDelete(Instruction *ToErase) { 1586 DeferredReplacement D; 1587 D.Old = ToErase; 1588 return D; 1589 } 1590 1591 static DeferredReplacement createDeoptimizeReplacement(Instruction *Old) { 1592 #ifndef NDEBUG 1593 auto *F = cast<CallInst>(Old)->getCalledFunction(); 1594 assert(F && F->getIntrinsicID() == Intrinsic::experimental_deoptimize && 1595 "Only way to construct a deoptimize deferred replacement"); 1596 #endif 1597 DeferredReplacement D; 1598 D.Old = Old; 1599 D.IsDeoptimize = true; 1600 return D; 1601 } 1602 1603 /// Does the task represented by this instance. 1604 void doReplacement() { 1605 Instruction *OldI = Old; 1606 Instruction *NewI = New; 1607 1608 assert(OldI != NewI && "Disallowed at construction?!"); 1609 assert((!IsDeoptimize || !New) && 1610 "Deoptimize intrinsics are not replaced!"); 1611 1612 Old = nullptr; 1613 New = nullptr; 1614 1615 if (NewI) 1616 OldI->replaceAllUsesWith(NewI); 1617 1618 if (IsDeoptimize) { 1619 // Note: we've inserted instructions, so the call to llvm.deoptimize may 1620 // not necessarily be followed by the matching return. 1621 auto *RI = cast<ReturnInst>(OldI->getParent()->getTerminator()); 1622 new UnreachableInst(RI->getContext(), RI->getIterator()); 1623 RI->eraseFromParent(); 1624 } 1625 1626 OldI->eraseFromParent(); 1627 } 1628 }; 1629 1630 } // end anonymous namespace 1631 1632 static StringRef getDeoptLowering(CallBase *Call) { 1633 const char *DeoptLowering = "deopt-lowering"; 1634 if (Call->hasFnAttr(DeoptLowering)) { 1635 // FIXME: Calls have a *really* confusing interface around attributes 1636 // with values. 1637 const AttributeList &CSAS = Call->getAttributes(); 1638 if (CSAS.hasFnAttr(DeoptLowering)) 1639 return CSAS.getFnAttr(DeoptLowering).getValueAsString(); 1640 Function *F = Call->getCalledFunction(); 1641 assert(F && F->hasFnAttribute(DeoptLowering)); 1642 return F->getFnAttribute(DeoptLowering).getValueAsString(); 1643 } 1644 return "live-through"; 1645 } 1646 1647 static void 1648 makeStatepointExplicitImpl(CallBase *Call, /* to replace */ 1649 const SmallVectorImpl<Value *> &BasePtrs, 1650 const SmallVectorImpl<Value *> &LiveVariables, 1651 PartiallyConstructedSafepointRecord &Result, 1652 std::vector<DeferredReplacement> &Replacements, 1653 const PointerToBaseTy &PointerToBase, 1654 GCStrategy *GC) { 1655 assert(BasePtrs.size() == LiveVariables.size()); 1656 1657 // Then go ahead and use the builder do actually do the inserts. We insert 1658 // immediately before the previous instruction under the assumption that all 1659 // arguments will be available here. We can't insert afterwards since we may 1660 // be replacing a terminator. 1661 IRBuilder<> Builder(Call); 1662 1663 ArrayRef<Value *> GCLive(LiveVariables); 1664 uint64_t StatepointID = StatepointDirectives::DefaultStatepointID; 1665 uint32_t NumPatchBytes = 0; 1666 uint32_t Flags = uint32_t(StatepointFlags::None); 1667 1668 SmallVector<Value *, 8> CallArgs(Call->args()); 1669 std::optional<ArrayRef<Use>> DeoptArgs; 1670 if (auto Bundle = Call->getOperandBundle(LLVMContext::OB_deopt)) 1671 DeoptArgs = Bundle->Inputs; 1672 std::optional<ArrayRef<Use>> TransitionArgs; 1673 if (auto Bundle = Call->getOperandBundle(LLVMContext::OB_gc_transition)) { 1674 TransitionArgs = Bundle->Inputs; 1675 // TODO: This flag no longer serves a purpose and can be removed later 1676 Flags |= uint32_t(StatepointFlags::GCTransition); 1677 } 1678 1679 // Instead of lowering calls to @llvm.experimental.deoptimize as normal calls 1680 // with a return value, we lower then as never returning calls to 1681 // __llvm_deoptimize that are followed by unreachable to get better codegen. 1682 bool IsDeoptimize = false; 1683 bool IsMemIntrinsic = false; 1684 1685 StatepointDirectives SD = 1686 parseStatepointDirectivesFromAttrs(Call->getAttributes()); 1687 if (SD.NumPatchBytes) 1688 NumPatchBytes = *SD.NumPatchBytes; 1689 if (SD.StatepointID) 1690 StatepointID = *SD.StatepointID; 1691 1692 // Pass through the requested lowering if any. The default is live-through. 1693 StringRef DeoptLowering = getDeoptLowering(Call); 1694 if (DeoptLowering == "live-in") 1695 Flags |= uint32_t(StatepointFlags::DeoptLiveIn); 1696 else { 1697 assert(DeoptLowering == "live-through" && "Unsupported value!"); 1698 } 1699 1700 FunctionCallee CallTarget(Call->getFunctionType(), Call->getCalledOperand()); 1701 if (Function *F = dyn_cast<Function>(CallTarget.getCallee())) { 1702 auto IID = F->getIntrinsicID(); 1703 if (IID == Intrinsic::experimental_deoptimize) { 1704 // Calls to llvm.experimental.deoptimize are lowered to calls to the 1705 // __llvm_deoptimize symbol. We want to resolve this now, since the 1706 // verifier does not allow taking the address of an intrinsic function. 1707 1708 SmallVector<Type *, 8> DomainTy; 1709 for (Value *Arg : CallArgs) 1710 DomainTy.push_back(Arg->getType()); 1711 auto *FTy = FunctionType::get(Type::getVoidTy(F->getContext()), DomainTy, 1712 /* isVarArg = */ false); 1713 1714 // Note: CallTarget can be a bitcast instruction of a symbol if there are 1715 // calls to @llvm.experimental.deoptimize with different argument types in 1716 // the same module. This is fine -- we assume the frontend knew what it 1717 // was doing when generating this kind of IR. 1718 CallTarget = F->getParent() 1719 ->getOrInsertFunction("__llvm_deoptimize", FTy); 1720 1721 IsDeoptimize = true; 1722 } else if (IID == Intrinsic::memcpy_element_unordered_atomic || 1723 IID == Intrinsic::memmove_element_unordered_atomic) { 1724 IsMemIntrinsic = true; 1725 1726 // Unordered atomic memcpy and memmove intrinsics which are not explicitly 1727 // marked as "gc-leaf-function" should be lowered in a GC parseable way. 1728 // Specifically, these calls should be lowered to the 1729 // __llvm_{memcpy|memmove}_element_unordered_atomic_safepoint symbols. 1730 // Similarly to __llvm_deoptimize we want to resolve this now, since the 1731 // verifier does not allow taking the address of an intrinsic function. 1732 // 1733 // Moreover we need to shuffle the arguments for the call in order to 1734 // accommodate GC. The underlying source and destination objects might be 1735 // relocated during copy operation should the GC occur. To relocate the 1736 // derived source and destination pointers the implementation of the 1737 // intrinsic should know the corresponding base pointers. 1738 // 1739 // To make the base pointers available pass them explicitly as arguments: 1740 // memcpy(dest_derived, source_derived, ...) => 1741 // memcpy(dest_base, dest_offset, source_base, source_offset, ...) 1742 auto &Context = Call->getContext(); 1743 auto &DL = Call->getDataLayout(); 1744 auto GetBaseAndOffset = [&](Value *Derived) { 1745 Value *Base = nullptr; 1746 // Optimizations in unreachable code might substitute the real pointer 1747 // with undef, poison or null-derived constant. Return null base for 1748 // them to be consistent with the handling in the main algorithm in 1749 // findBaseDefiningValue. 1750 if (isa<Constant>(Derived)) 1751 Base = 1752 ConstantPointerNull::get(cast<PointerType>(Derived->getType())); 1753 else { 1754 assert(PointerToBase.count(Derived)); 1755 Base = PointerToBase.find(Derived)->second; 1756 } 1757 unsigned AddressSpace = Derived->getType()->getPointerAddressSpace(); 1758 unsigned IntPtrSize = DL.getPointerSizeInBits(AddressSpace); 1759 Value *Base_int = Builder.CreatePtrToInt( 1760 Base, Type::getIntNTy(Context, IntPtrSize)); 1761 Value *Derived_int = Builder.CreatePtrToInt( 1762 Derived, Type::getIntNTy(Context, IntPtrSize)); 1763 return std::make_pair(Base, Builder.CreateSub(Derived_int, Base_int)); 1764 }; 1765 1766 auto *Dest = CallArgs[0]; 1767 Value *DestBase, *DestOffset; 1768 std::tie(DestBase, DestOffset) = GetBaseAndOffset(Dest); 1769 1770 auto *Source = CallArgs[1]; 1771 Value *SourceBase, *SourceOffset; 1772 std::tie(SourceBase, SourceOffset) = GetBaseAndOffset(Source); 1773 1774 auto *LengthInBytes = CallArgs[2]; 1775 auto *ElementSizeCI = cast<ConstantInt>(CallArgs[3]); 1776 1777 CallArgs.clear(); 1778 CallArgs.push_back(DestBase); 1779 CallArgs.push_back(DestOffset); 1780 CallArgs.push_back(SourceBase); 1781 CallArgs.push_back(SourceOffset); 1782 CallArgs.push_back(LengthInBytes); 1783 1784 SmallVector<Type *, 8> DomainTy; 1785 for (Value *Arg : CallArgs) 1786 DomainTy.push_back(Arg->getType()); 1787 auto *FTy = FunctionType::get(Type::getVoidTy(F->getContext()), DomainTy, 1788 /* isVarArg = */ false); 1789 1790 auto GetFunctionName = [](Intrinsic::ID IID, ConstantInt *ElementSizeCI) { 1791 uint64_t ElementSize = ElementSizeCI->getZExtValue(); 1792 if (IID == Intrinsic::memcpy_element_unordered_atomic) { 1793 switch (ElementSize) { 1794 case 1: 1795 return "__llvm_memcpy_element_unordered_atomic_safepoint_1"; 1796 case 2: 1797 return "__llvm_memcpy_element_unordered_atomic_safepoint_2"; 1798 case 4: 1799 return "__llvm_memcpy_element_unordered_atomic_safepoint_4"; 1800 case 8: 1801 return "__llvm_memcpy_element_unordered_atomic_safepoint_8"; 1802 case 16: 1803 return "__llvm_memcpy_element_unordered_atomic_safepoint_16"; 1804 default: 1805 llvm_unreachable("unexpected element size!"); 1806 } 1807 } 1808 assert(IID == Intrinsic::memmove_element_unordered_atomic); 1809 switch (ElementSize) { 1810 case 1: 1811 return "__llvm_memmove_element_unordered_atomic_safepoint_1"; 1812 case 2: 1813 return "__llvm_memmove_element_unordered_atomic_safepoint_2"; 1814 case 4: 1815 return "__llvm_memmove_element_unordered_atomic_safepoint_4"; 1816 case 8: 1817 return "__llvm_memmove_element_unordered_atomic_safepoint_8"; 1818 case 16: 1819 return "__llvm_memmove_element_unordered_atomic_safepoint_16"; 1820 default: 1821 llvm_unreachable("unexpected element size!"); 1822 } 1823 }; 1824 1825 CallTarget = 1826 F->getParent() 1827 ->getOrInsertFunction(GetFunctionName(IID, ElementSizeCI), FTy); 1828 } 1829 } 1830 1831 // Create the statepoint given all the arguments 1832 GCStatepointInst *Token = nullptr; 1833 if (auto *CI = dyn_cast<CallInst>(Call)) { 1834 CallInst *SPCall = Builder.CreateGCStatepointCall( 1835 StatepointID, NumPatchBytes, CallTarget, Flags, CallArgs, 1836 TransitionArgs, DeoptArgs, GCLive, "safepoint_token"); 1837 1838 SPCall->setTailCallKind(CI->getTailCallKind()); 1839 SPCall->setCallingConv(CI->getCallingConv()); 1840 1841 // Set up function attrs directly on statepoint and return attrs later for 1842 // gc_result intrinsic. 1843 SPCall->setAttributes( 1844 legalizeCallAttributes(CI, IsMemIntrinsic, SPCall->getAttributes())); 1845 1846 Token = cast<GCStatepointInst>(SPCall); 1847 1848 // Put the following gc_result and gc_relocate calls immediately after the 1849 // the old call (which we're about to delete) 1850 assert(CI->getNextNode() && "Not a terminator, must have next!"); 1851 Builder.SetInsertPoint(CI->getNextNode()); 1852 Builder.SetCurrentDebugLocation(CI->getNextNode()->getDebugLoc()); 1853 } else { 1854 auto *II = cast<InvokeInst>(Call); 1855 1856 // Insert the new invoke into the old block. We'll remove the old one in a 1857 // moment at which point this will become the new terminator for the 1858 // original block. 1859 InvokeInst *SPInvoke = Builder.CreateGCStatepointInvoke( 1860 StatepointID, NumPatchBytes, CallTarget, II->getNormalDest(), 1861 II->getUnwindDest(), Flags, CallArgs, TransitionArgs, DeoptArgs, 1862 GCLive, "statepoint_token"); 1863 1864 SPInvoke->setCallingConv(II->getCallingConv()); 1865 1866 // Set up function attrs directly on statepoint and return attrs later for 1867 // gc_result intrinsic. 1868 SPInvoke->setAttributes( 1869 legalizeCallAttributes(II, IsMemIntrinsic, SPInvoke->getAttributes())); 1870 1871 Token = cast<GCStatepointInst>(SPInvoke); 1872 1873 // Generate gc relocates in exceptional path 1874 BasicBlock *UnwindBlock = II->getUnwindDest(); 1875 assert(!isa<PHINode>(UnwindBlock->begin()) && 1876 UnwindBlock->getUniquePredecessor() && 1877 "can't safely insert in this block!"); 1878 1879 Builder.SetInsertPoint(UnwindBlock, UnwindBlock->getFirstInsertionPt()); 1880 Builder.SetCurrentDebugLocation(II->getDebugLoc()); 1881 1882 // Attach exceptional gc relocates to the landingpad. 1883 Instruction *ExceptionalToken = UnwindBlock->getLandingPadInst(); 1884 Result.UnwindToken = ExceptionalToken; 1885 1886 CreateGCRelocates(LiveVariables, BasePtrs, ExceptionalToken, Builder, GC); 1887 1888 // Generate gc relocates and returns for normal block 1889 BasicBlock *NormalDest = II->getNormalDest(); 1890 assert(!isa<PHINode>(NormalDest->begin()) && 1891 NormalDest->getUniquePredecessor() && 1892 "can't safely insert in this block!"); 1893 1894 Builder.SetInsertPoint(NormalDest, NormalDest->getFirstInsertionPt()); 1895 1896 // gc relocates will be generated later as if it were regular call 1897 // statepoint 1898 } 1899 assert(Token && "Should be set in one of the above branches!"); 1900 1901 if (IsDeoptimize) { 1902 // If we're wrapping an @llvm.experimental.deoptimize in a statepoint, we 1903 // transform the tail-call like structure to a call to a void function 1904 // followed by unreachable to get better codegen. 1905 Replacements.push_back( 1906 DeferredReplacement::createDeoptimizeReplacement(Call)); 1907 } else { 1908 Token->setName("statepoint_token"); 1909 if (!Call->getType()->isVoidTy() && !Call->use_empty()) { 1910 StringRef Name = Call->hasName() ? Call->getName() : ""; 1911 CallInst *GCResult = Builder.CreateGCResult(Token, Call->getType(), Name); 1912 GCResult->setAttributes( 1913 AttributeList::get(GCResult->getContext(), AttributeList::ReturnIndex, 1914 Call->getAttributes().getRetAttrs())); 1915 1916 // We cannot RAUW or delete CS.getInstruction() because it could be in the 1917 // live set of some other safepoint, in which case that safepoint's 1918 // PartiallyConstructedSafepointRecord will hold a raw pointer to this 1919 // llvm::Instruction. Instead, we defer the replacement and deletion to 1920 // after the live sets have been made explicit in the IR, and we no longer 1921 // have raw pointers to worry about. 1922 Replacements.emplace_back( 1923 DeferredReplacement::createRAUW(Call, GCResult)); 1924 } else { 1925 Replacements.emplace_back(DeferredReplacement::createDelete(Call)); 1926 } 1927 } 1928 1929 Result.StatepointToken = Token; 1930 1931 // Second, create a gc.relocate for every live variable 1932 CreateGCRelocates(LiveVariables, BasePtrs, Token, Builder, GC); 1933 } 1934 1935 // Replace an existing gc.statepoint with a new one and a set of gc.relocates 1936 // which make the relocations happening at this safepoint explicit. 1937 // 1938 // WARNING: Does not do any fixup to adjust users of the original live 1939 // values. That's the callers responsibility. 1940 static void 1941 makeStatepointExplicit(DominatorTree &DT, CallBase *Call, 1942 PartiallyConstructedSafepointRecord &Result, 1943 std::vector<DeferredReplacement> &Replacements, 1944 const PointerToBaseTy &PointerToBase, GCStrategy *GC) { 1945 const auto &LiveSet = Result.LiveSet; 1946 1947 // Convert to vector for efficient cross referencing. 1948 SmallVector<Value *, 64> BaseVec, LiveVec; 1949 LiveVec.reserve(LiveSet.size()); 1950 BaseVec.reserve(LiveSet.size()); 1951 for (Value *L : LiveSet) { 1952 LiveVec.push_back(L); 1953 assert(PointerToBase.count(L)); 1954 Value *Base = PointerToBase.find(L)->second; 1955 BaseVec.push_back(Base); 1956 } 1957 assert(LiveVec.size() == BaseVec.size()); 1958 1959 // Do the actual rewriting and delete the old statepoint 1960 makeStatepointExplicitImpl(Call, BaseVec, LiveVec, Result, Replacements, 1961 PointerToBase, GC); 1962 } 1963 1964 // Helper function for the relocationViaAlloca. 1965 // 1966 // It receives iterator to the statepoint gc relocates and emits a store to the 1967 // assigned location (via allocaMap) for the each one of them. It adds the 1968 // visited values into the visitedLiveValues set, which we will later use them 1969 // for validation checking. 1970 static void 1971 insertRelocationStores(iterator_range<Value::user_iterator> GCRelocs, 1972 DenseMap<Value *, AllocaInst *> &AllocaMap, 1973 DenseSet<Value *> &VisitedLiveValues) { 1974 for (User *U : GCRelocs) { 1975 GCRelocateInst *Relocate = dyn_cast<GCRelocateInst>(U); 1976 if (!Relocate) 1977 continue; 1978 1979 Value *OriginalValue = Relocate->getDerivedPtr(); 1980 assert(AllocaMap.count(OriginalValue)); 1981 Value *Alloca = AllocaMap[OriginalValue]; 1982 1983 // Emit store into the related alloca. 1984 assert(Relocate->getNextNode() && 1985 "Should always have one since it's not a terminator"); 1986 new StoreInst(Relocate, Alloca, std::next(Relocate->getIterator())); 1987 1988 #ifndef NDEBUG 1989 VisitedLiveValues.insert(OriginalValue); 1990 #endif 1991 } 1992 } 1993 1994 // Helper function for the "relocationViaAlloca". Similar to the 1995 // "insertRelocationStores" but works for rematerialized values. 1996 static void insertRematerializationStores( 1997 const RematerializedValueMapTy &RematerializedValues, 1998 DenseMap<Value *, AllocaInst *> &AllocaMap, 1999 DenseSet<Value *> &VisitedLiveValues) { 2000 for (auto RematerializedValuePair: RematerializedValues) { 2001 Instruction *RematerializedValue = RematerializedValuePair.first; 2002 Value *OriginalValue = RematerializedValuePair.second; 2003 2004 assert(AllocaMap.count(OriginalValue) && 2005 "Can not find alloca for rematerialized value"); 2006 Value *Alloca = AllocaMap[OriginalValue]; 2007 2008 new StoreInst(RematerializedValue, Alloca, 2009 std::next(RematerializedValue->getIterator())); 2010 2011 #ifndef NDEBUG 2012 VisitedLiveValues.insert(OriginalValue); 2013 #endif 2014 } 2015 } 2016 2017 /// Do all the relocation update via allocas and mem2reg 2018 static void relocationViaAlloca( 2019 Function &F, DominatorTree &DT, ArrayRef<Value *> Live, 2020 ArrayRef<PartiallyConstructedSafepointRecord> Records) { 2021 #ifndef NDEBUG 2022 // record initial number of (static) allocas; we'll check we have the same 2023 // number when we get done. 2024 int InitialAllocaNum = 0; 2025 for (Instruction &I : F.getEntryBlock()) 2026 if (isa<AllocaInst>(I)) 2027 InitialAllocaNum++; 2028 #endif 2029 2030 // TODO-PERF: change data structures, reserve 2031 DenseMap<Value *, AllocaInst *> AllocaMap; 2032 SmallVector<AllocaInst *, 200> PromotableAllocas; 2033 // Used later to chack that we have enough allocas to store all values 2034 std::size_t NumRematerializedValues = 0; 2035 PromotableAllocas.reserve(Live.size()); 2036 2037 // Emit alloca for "LiveValue" and record it in "allocaMap" and 2038 // "PromotableAllocas" 2039 const DataLayout &DL = F.getDataLayout(); 2040 auto emitAllocaFor = [&](Value *LiveValue) { 2041 AllocaInst *Alloca = 2042 new AllocaInst(LiveValue->getType(), DL.getAllocaAddrSpace(), "", 2043 F.getEntryBlock().getFirstNonPHIIt()); 2044 AllocaMap[LiveValue] = Alloca; 2045 PromotableAllocas.push_back(Alloca); 2046 }; 2047 2048 // Emit alloca for each live gc pointer 2049 for (Value *V : Live) 2050 emitAllocaFor(V); 2051 2052 // Emit allocas for rematerialized values 2053 for (const auto &Info : Records) 2054 for (auto RematerializedValuePair : Info.RematerializedValues) { 2055 Value *OriginalValue = RematerializedValuePair.second; 2056 if (AllocaMap.contains(OriginalValue)) 2057 continue; 2058 2059 emitAllocaFor(OriginalValue); 2060 ++NumRematerializedValues; 2061 } 2062 2063 // The next two loops are part of the same conceptual operation. We need to 2064 // insert a store to the alloca after the original def and at each 2065 // redefinition. We need to insert a load before each use. These are split 2066 // into distinct loops for performance reasons. 2067 2068 // Update gc pointer after each statepoint: either store a relocated value or 2069 // null (if no relocated value was found for this gc pointer and it is not a 2070 // gc_result). This must happen before we update the statepoint with load of 2071 // alloca otherwise we lose the link between statepoint and old def. 2072 for (const auto &Info : Records) { 2073 Value *Statepoint = Info.StatepointToken; 2074 2075 // This will be used for consistency check 2076 DenseSet<Value *> VisitedLiveValues; 2077 2078 // Insert stores for normal statepoint gc relocates 2079 insertRelocationStores(Statepoint->users(), AllocaMap, VisitedLiveValues); 2080 2081 // In case if it was invoke statepoint 2082 // we will insert stores for exceptional path gc relocates. 2083 if (isa<InvokeInst>(Statepoint)) { 2084 insertRelocationStores(Info.UnwindToken->users(), AllocaMap, 2085 VisitedLiveValues); 2086 } 2087 2088 // Do similar thing with rematerialized values 2089 insertRematerializationStores(Info.RematerializedValues, AllocaMap, 2090 VisitedLiveValues); 2091 2092 if (ClobberNonLive) { 2093 // As a debugging aid, pretend that an unrelocated pointer becomes null at 2094 // the gc.statepoint. This will turn some subtle GC problems into 2095 // slightly easier to debug SEGVs. Note that on large IR files with 2096 // lots of gc.statepoints this is extremely costly both memory and time 2097 // wise. 2098 SmallVector<AllocaInst *, 64> ToClobber; 2099 for (auto Pair : AllocaMap) { 2100 Value *Def = Pair.first; 2101 AllocaInst *Alloca = Pair.second; 2102 2103 // This value was relocated 2104 if (VisitedLiveValues.count(Def)) { 2105 continue; 2106 } 2107 ToClobber.push_back(Alloca); 2108 } 2109 2110 auto InsertClobbersAt = [&](BasicBlock::iterator IP) { 2111 for (auto *AI : ToClobber) { 2112 auto AT = AI->getAllocatedType(); 2113 Constant *CPN; 2114 if (AT->isVectorTy()) 2115 CPN = ConstantAggregateZero::get(AT); 2116 else 2117 CPN = ConstantPointerNull::get(cast<PointerType>(AT)); 2118 new StoreInst(CPN, AI, IP); 2119 } 2120 }; 2121 2122 // Insert the clobbering stores. These may get intermixed with the 2123 // gc.results and gc.relocates, but that's fine. 2124 if (auto II = dyn_cast<InvokeInst>(Statepoint)) { 2125 InsertClobbersAt(II->getNormalDest()->getFirstInsertionPt()); 2126 InsertClobbersAt(II->getUnwindDest()->getFirstInsertionPt()); 2127 } else { 2128 InsertClobbersAt( 2129 std::next(cast<Instruction>(Statepoint)->getIterator())); 2130 } 2131 } 2132 } 2133 2134 // Update use with load allocas and add store for gc_relocated. 2135 for (auto Pair : AllocaMap) { 2136 Value *Def = Pair.first; 2137 AllocaInst *Alloca = Pair.second; 2138 2139 // We pre-record the uses of allocas so that we dont have to worry about 2140 // later update that changes the user information.. 2141 2142 SmallVector<Instruction *, 20> Uses; 2143 // PERF: trade a linear scan for repeated reallocation 2144 Uses.reserve(Def->getNumUses()); 2145 for (User *U : Def->users()) { 2146 if (!isa<ConstantExpr>(U)) { 2147 // If the def has a ConstantExpr use, then the def is either a 2148 // ConstantExpr use itself or null. In either case 2149 // (recursively in the first, directly in the second), the oop 2150 // it is ultimately dependent on is null and this particular 2151 // use does not need to be fixed up. 2152 Uses.push_back(cast<Instruction>(U)); 2153 } 2154 } 2155 2156 llvm::sort(Uses); 2157 auto Last = llvm::unique(Uses); 2158 Uses.erase(Last, Uses.end()); 2159 2160 for (Instruction *Use : Uses) { 2161 if (isa<PHINode>(Use)) { 2162 PHINode *Phi = cast<PHINode>(Use); 2163 for (unsigned i = 0; i < Phi->getNumIncomingValues(); i++) { 2164 if (Def == Phi->getIncomingValue(i)) { 2165 LoadInst *Load = new LoadInst( 2166 Alloca->getAllocatedType(), Alloca, "", 2167 Phi->getIncomingBlock(i)->getTerminator()->getIterator()); 2168 Phi->setIncomingValue(i, Load); 2169 } 2170 } 2171 } else { 2172 LoadInst *Load = new LoadInst(Alloca->getAllocatedType(), Alloca, "", 2173 Use->getIterator()); 2174 Use->replaceUsesOfWith(Def, Load); 2175 } 2176 } 2177 2178 // Emit store for the initial gc value. Store must be inserted after load, 2179 // otherwise store will be in alloca's use list and an extra load will be 2180 // inserted before it. 2181 StoreInst *Store = new StoreInst(Def, Alloca, /*volatile*/ false, 2182 DL.getABITypeAlign(Def->getType())); 2183 if (Instruction *Inst = dyn_cast<Instruction>(Def)) { 2184 if (InvokeInst *Invoke = dyn_cast<InvokeInst>(Inst)) { 2185 // InvokeInst is a terminator so the store need to be inserted into its 2186 // normal destination block. 2187 BasicBlock *NormalDest = Invoke->getNormalDest(); 2188 Store->insertBefore(NormalDest->getFirstNonPHIIt()); 2189 } else { 2190 assert(!Inst->isTerminator() && 2191 "The only terminator that can produce a value is " 2192 "InvokeInst which is handled above."); 2193 Store->insertAfter(Inst->getIterator()); 2194 } 2195 } else { 2196 assert(isa<Argument>(Def)); 2197 Store->insertAfter(cast<Instruction>(Alloca)->getIterator()); 2198 } 2199 } 2200 2201 assert(PromotableAllocas.size() == Live.size() + NumRematerializedValues && 2202 "we must have the same allocas with lives"); 2203 (void) NumRematerializedValues; 2204 if (!PromotableAllocas.empty()) { 2205 // Apply mem2reg to promote alloca to SSA 2206 PromoteMemToReg(PromotableAllocas, DT); 2207 } 2208 2209 #ifndef NDEBUG 2210 for (auto &I : F.getEntryBlock()) 2211 if (isa<AllocaInst>(I)) 2212 InitialAllocaNum--; 2213 assert(InitialAllocaNum == 0 && "We must not introduce any extra allocas"); 2214 #endif 2215 } 2216 2217 /// Implement a unique function which doesn't require we sort the input 2218 /// vector. Doing so has the effect of changing the output of a couple of 2219 /// tests in ways which make them less useful in testing fused safepoints. 2220 template <typename T> static void unique_unsorted(SmallVectorImpl<T> &Vec) { 2221 SmallSet<T, 8> Seen; 2222 erase_if(Vec, [&](const T &V) { return !Seen.insert(V).second; }); 2223 } 2224 2225 /// Insert holders so that each Value is obviously live through the entire 2226 /// lifetime of the call. 2227 static void insertUseHolderAfter(CallBase *Call, const ArrayRef<Value *> Values, 2228 SmallVectorImpl<CallInst *> &Holders) { 2229 if (Values.empty()) 2230 // No values to hold live, might as well not insert the empty holder 2231 return; 2232 2233 Module *M = Call->getModule(); 2234 // Use a dummy vararg function to actually hold the values live 2235 FunctionCallee Func = M->getOrInsertFunction( 2236 "__tmp_use", FunctionType::get(Type::getVoidTy(M->getContext()), true)); 2237 if (isa<CallInst>(Call)) { 2238 // For call safepoints insert dummy calls right after safepoint 2239 Holders.push_back( 2240 CallInst::Create(Func, Values, "", std::next(Call->getIterator()))); 2241 return; 2242 } 2243 // For invoke safepooints insert dummy calls both in normal and 2244 // exceptional destination blocks 2245 auto *II = cast<InvokeInst>(Call); 2246 Holders.push_back(CallInst::Create( 2247 Func, Values, "", II->getNormalDest()->getFirstInsertionPt())); 2248 Holders.push_back(CallInst::Create( 2249 Func, Values, "", II->getUnwindDest()->getFirstInsertionPt())); 2250 } 2251 2252 static void findLiveReferences( 2253 Function &F, DominatorTree &DT, ArrayRef<CallBase *> toUpdate, 2254 MutableArrayRef<struct PartiallyConstructedSafepointRecord> records, 2255 GCStrategy *GC) { 2256 GCPtrLivenessData OriginalLivenessData; 2257 computeLiveInValues(DT, F, OriginalLivenessData, GC); 2258 for (size_t i = 0; i < records.size(); i++) { 2259 struct PartiallyConstructedSafepointRecord &info = records[i]; 2260 analyzeParsePointLiveness(DT, OriginalLivenessData, toUpdate[i], info, GC); 2261 } 2262 } 2263 2264 // Helper function for the "rematerializeLiveValues". It walks use chain 2265 // starting from the "CurrentValue" until it reaches the root of the chain, i.e. 2266 // the base or a value it cannot process. Only "simple" values are processed 2267 // (currently it is GEP's and casts). The returned root is examined by the 2268 // callers of findRematerializableChainToBasePointer. Fills "ChainToBase" array 2269 // with all visited values. 2270 static Value* findRematerializableChainToBasePointer( 2271 SmallVectorImpl<Instruction*> &ChainToBase, 2272 Value *CurrentValue) { 2273 if (GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(CurrentValue)) { 2274 ChainToBase.push_back(GEP); 2275 return findRematerializableChainToBasePointer(ChainToBase, 2276 GEP->getPointerOperand()); 2277 } 2278 2279 if (CastInst *CI = dyn_cast<CastInst>(CurrentValue)) { 2280 if (!CI->isNoopCast(CI->getDataLayout())) 2281 return CI; 2282 2283 ChainToBase.push_back(CI); 2284 return findRematerializableChainToBasePointer(ChainToBase, 2285 CI->getOperand(0)); 2286 } 2287 2288 // We have reached the root of the chain, which is either equal to the base or 2289 // is the first unsupported value along the use chain. 2290 return CurrentValue; 2291 } 2292 2293 // Helper function for the "rematerializeLiveValues". Compute cost of the use 2294 // chain we are going to rematerialize. 2295 static InstructionCost 2296 chainToBasePointerCost(SmallVectorImpl<Instruction *> &Chain, 2297 TargetTransformInfo &TTI) { 2298 InstructionCost Cost = 0; 2299 2300 for (Instruction *Instr : Chain) { 2301 if (CastInst *CI = dyn_cast<CastInst>(Instr)) { 2302 assert(CI->isNoopCast(CI->getDataLayout()) && 2303 "non noop cast is found during rematerialization"); 2304 2305 Type *SrcTy = CI->getOperand(0)->getType(); 2306 Cost += TTI.getCastInstrCost(CI->getOpcode(), CI->getType(), SrcTy, 2307 TTI::getCastContextHint(CI), 2308 TargetTransformInfo::TCK_SizeAndLatency, CI); 2309 2310 } else if (GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(Instr)) { 2311 // Cost of the address calculation 2312 Type *ValTy = GEP->getSourceElementType(); 2313 Cost += TTI.getAddressComputationCost(ValTy); 2314 2315 // And cost of the GEP itself 2316 // TODO: Use TTI->getGEPCost here (it exists, but appears to be not 2317 // allowed for the external usage) 2318 if (!GEP->hasAllConstantIndices()) 2319 Cost += 2; 2320 2321 } else { 2322 llvm_unreachable("unsupported instruction type during rematerialization"); 2323 } 2324 } 2325 2326 return Cost; 2327 } 2328 2329 static bool AreEquivalentPhiNodes(PHINode &OrigRootPhi, PHINode &AlternateRootPhi) { 2330 unsigned PhiNum = OrigRootPhi.getNumIncomingValues(); 2331 if (PhiNum != AlternateRootPhi.getNumIncomingValues() || 2332 OrigRootPhi.getParent() != AlternateRootPhi.getParent()) 2333 return false; 2334 // Map of incoming values and their corresponding basic blocks of 2335 // OrigRootPhi. 2336 SmallDenseMap<Value *, BasicBlock *, 8> CurrentIncomingValues; 2337 for (unsigned i = 0; i < PhiNum; i++) 2338 CurrentIncomingValues[OrigRootPhi.getIncomingValue(i)] = 2339 OrigRootPhi.getIncomingBlock(i); 2340 2341 // Both current and base PHIs should have same incoming values and 2342 // the same basic blocks corresponding to the incoming values. 2343 for (unsigned i = 0; i < PhiNum; i++) { 2344 auto CIVI = 2345 CurrentIncomingValues.find(AlternateRootPhi.getIncomingValue(i)); 2346 if (CIVI == CurrentIncomingValues.end()) 2347 return false; 2348 BasicBlock *CurrentIncomingBB = CIVI->second; 2349 if (CurrentIncomingBB != AlternateRootPhi.getIncomingBlock(i)) 2350 return false; 2351 } 2352 return true; 2353 } 2354 2355 // Find derived pointers that can be recomputed cheap enough and fill 2356 // RematerizationCandidates with such candidates. 2357 static void 2358 findRematerializationCandidates(PointerToBaseTy PointerToBase, 2359 RematCandTy &RematerizationCandidates, 2360 TargetTransformInfo &TTI) { 2361 const unsigned int ChainLengthThreshold = 10; 2362 2363 for (auto P2B : PointerToBase) { 2364 auto *Derived = P2B.first; 2365 auto *Base = P2B.second; 2366 // Consider only derived pointers. 2367 if (Derived == Base) 2368 continue; 2369 2370 // For each live pointer find its defining chain. 2371 SmallVector<Instruction *, 3> ChainToBase; 2372 Value *RootOfChain = 2373 findRematerializableChainToBasePointer(ChainToBase, Derived); 2374 2375 // Nothing to do, or chain is too long 2376 if ( ChainToBase.size() == 0 || 2377 ChainToBase.size() > ChainLengthThreshold) 2378 continue; 2379 2380 // Handle the scenario where the RootOfChain is not equal to the 2381 // Base Value, but they are essentially the same phi values. 2382 if (RootOfChain != PointerToBase[Derived]) { 2383 PHINode *OrigRootPhi = dyn_cast<PHINode>(RootOfChain); 2384 PHINode *AlternateRootPhi = dyn_cast<PHINode>(PointerToBase[Derived]); 2385 if (!OrigRootPhi || !AlternateRootPhi) 2386 continue; 2387 // PHI nodes that have the same incoming values, and belonging to the same 2388 // basic blocks are essentially the same SSA value. When the original phi 2389 // has incoming values with different base pointers, the original phi is 2390 // marked as conflict, and an additional `AlternateRootPhi` with the same 2391 // incoming values get generated by the findBasePointer function. We need 2392 // to identify the newly generated AlternateRootPhi (.base version of phi) 2393 // and RootOfChain (the original phi node itself) are the same, so that we 2394 // can rematerialize the gep and casts. This is a workaround for the 2395 // deficiency in the findBasePointer algorithm. 2396 if (!AreEquivalentPhiNodes(*OrigRootPhi, *AlternateRootPhi)) 2397 continue; 2398 } 2399 // Compute cost of this chain. 2400 InstructionCost Cost = chainToBasePointerCost(ChainToBase, TTI); 2401 // TODO: We can also account for cases when we will be able to remove some 2402 // of the rematerialized values by later optimization passes. I.e if 2403 // we rematerialized several intersecting chains. Or if original values 2404 // don't have any uses besides this statepoint. 2405 2406 // Ok, there is a candidate. 2407 RematerizlizationCandidateRecord Record; 2408 Record.ChainToBase = ChainToBase; 2409 Record.RootOfChain = RootOfChain; 2410 Record.Cost = Cost; 2411 RematerizationCandidates.insert({ Derived, Record }); 2412 } 2413 } 2414 2415 // Try to rematerialize derived pointers immediately before their uses 2416 // (instead of rematerializing after every statepoint it is live through). 2417 // This can be beneficial when derived pointer is live across many 2418 // statepoints, but uses are rare. 2419 static void rematerializeLiveValuesAtUses( 2420 RematCandTy &RematerizationCandidates, 2421 MutableArrayRef<PartiallyConstructedSafepointRecord> Records, 2422 PointerToBaseTy &PointerToBase) { 2423 if (!RematDerivedAtUses) 2424 return; 2425 2426 SmallVector<Instruction *, 32> LiveValuesToBeDeleted; 2427 2428 LLVM_DEBUG(dbgs() << "Rematerialize derived pointers at uses, " 2429 << "Num statepoints: " << Records.size() << '\n'); 2430 2431 for (auto &It : RematerizationCandidates) { 2432 Instruction *Cand = cast<Instruction>(It.first); 2433 auto &Record = It.second; 2434 2435 if (Record.Cost >= RematerializationThreshold) 2436 continue; 2437 2438 if (Cand->user_empty()) 2439 continue; 2440 2441 if (Cand->hasOneUse()) 2442 if (auto *U = dyn_cast<Instruction>(Cand->getUniqueUndroppableUser())) 2443 if (U->getParent() == Cand->getParent()) 2444 continue; 2445 2446 // Rematerialization before PHI nodes is not implemented. 2447 if (llvm::any_of(Cand->users(), 2448 [](const auto *U) { return isa<PHINode>(U); })) 2449 continue; 2450 2451 LLVM_DEBUG(dbgs() << "Trying cand " << *Cand << " ... "); 2452 2453 // Count of rematerialization instructions we introduce is equal to number 2454 // of candidate uses. 2455 // Count of rematerialization instructions we eliminate is equal to number 2456 // of statepoints it is live through. 2457 // Consider transformation profitable if latter is greater than former 2458 // (in other words, we create less than eliminate). 2459 unsigned NumLiveStatepoints = llvm::count_if( 2460 Records, [Cand](const auto &R) { return R.LiveSet.contains(Cand); }); 2461 unsigned NumUses = Cand->getNumUses(); 2462 2463 LLVM_DEBUG(dbgs() << "Num uses: " << NumUses << " Num live statepoints: " 2464 << NumLiveStatepoints << " "); 2465 2466 if (NumLiveStatepoints < NumUses) { 2467 LLVM_DEBUG(dbgs() << "not profitable\n"); 2468 continue; 2469 } 2470 2471 // If rematerialization is 'free', then favor rematerialization at 2472 // uses as it generally shortens live ranges. 2473 // TODO: Short (size ==1) chains only? 2474 if (NumLiveStatepoints == NumUses && Record.Cost > 0) { 2475 LLVM_DEBUG(dbgs() << "not profitable\n"); 2476 continue; 2477 } 2478 2479 LLVM_DEBUG(dbgs() << "looks profitable\n"); 2480 2481 // ChainToBase may contain another remat candidate (as a sub chain) which 2482 // has been rewritten by now. Need to recollect chain to have up to date 2483 // value. 2484 // TODO: sort records in findRematerializationCandidates() in 2485 // decreasing chain size order? 2486 if (Record.ChainToBase.size() > 1) { 2487 Record.ChainToBase.clear(); 2488 findRematerializableChainToBasePointer(Record.ChainToBase, Cand); 2489 } 2490 2491 // Current rematerialization algorithm is very simple: we rematerialize 2492 // immediately before EVERY use, even if there are several uses in same 2493 // block or if use is local to Cand Def. The reason is that this allows 2494 // us to avoid recomputing liveness without complicated analysis: 2495 // - If we did not eliminate all uses of original Candidate, we do not 2496 // know exaclty in what BBs it is still live. 2497 // - If we rematerialize once per BB, we need to find proper insertion 2498 // place (first use in block, but after Def) and analyze if there is 2499 // statepoint between uses in the block. 2500 while (!Cand->user_empty()) { 2501 Instruction *UserI = cast<Instruction>(*Cand->user_begin()); 2502 Instruction *RematChain = 2503 rematerializeChain(Record.ChainToBase, UserI->getIterator(), 2504 Record.RootOfChain, PointerToBase[Cand]); 2505 UserI->replaceUsesOfWith(Cand, RematChain); 2506 PointerToBase[RematChain] = PointerToBase[Cand]; 2507 } 2508 LiveValuesToBeDeleted.push_back(Cand); 2509 } 2510 2511 LLVM_DEBUG(dbgs() << "Rematerialized " << LiveValuesToBeDeleted.size() 2512 << " derived pointers\n"); 2513 for (auto *Cand : LiveValuesToBeDeleted) { 2514 assert(Cand->use_empty() && "Unexpected user remain"); 2515 RematerizationCandidates.erase(Cand); 2516 for (auto &R : Records) { 2517 assert(!R.LiveSet.contains(Cand) || 2518 R.LiveSet.contains(PointerToBase[Cand])); 2519 R.LiveSet.remove(Cand); 2520 } 2521 } 2522 2523 // Recollect not rematerialized chains - we might have rewritten 2524 // their sub-chains. 2525 if (!LiveValuesToBeDeleted.empty()) { 2526 for (auto &P : RematerizationCandidates) { 2527 auto &R = P.second; 2528 if (R.ChainToBase.size() > 1) { 2529 R.ChainToBase.clear(); 2530 findRematerializableChainToBasePointer(R.ChainToBase, P.first); 2531 } 2532 } 2533 } 2534 } 2535 2536 // From the statepoint live set pick values that are cheaper to recompute then 2537 // to relocate. Remove this values from the live set, rematerialize them after 2538 // statepoint and record them in "Info" structure. Note that similar to 2539 // relocated values we don't do any user adjustments here. 2540 static void rematerializeLiveValues(CallBase *Call, 2541 PartiallyConstructedSafepointRecord &Info, 2542 PointerToBaseTy &PointerToBase, 2543 RematCandTy &RematerizationCandidates, 2544 TargetTransformInfo &TTI) { 2545 // Record values we are going to delete from this statepoint live set. 2546 // We can not di this in following loop due to iterator invalidation. 2547 SmallVector<Value *, 32> LiveValuesToBeDeleted; 2548 2549 for (Value *LiveValue : Info.LiveSet) { 2550 auto It = RematerizationCandidates.find(LiveValue); 2551 if (It == RematerizationCandidates.end()) 2552 continue; 2553 2554 RematerizlizationCandidateRecord &Record = It->second; 2555 2556 InstructionCost Cost = Record.Cost; 2557 // For invokes we need to rematerialize each chain twice - for normal and 2558 // for unwind basic blocks. Model this by multiplying cost by two. 2559 if (isa<InvokeInst>(Call)) 2560 Cost *= 2; 2561 2562 // If it's too expensive - skip it. 2563 if (Cost >= RematerializationThreshold) 2564 continue; 2565 2566 // Remove value from the live set 2567 LiveValuesToBeDeleted.push_back(LiveValue); 2568 2569 // Clone instructions and record them inside "Info" structure. 2570 2571 // Different cases for calls and invokes. For invokes we need to clone 2572 // instructions both on normal and unwind path. 2573 if (isa<CallInst>(Call)) { 2574 Instruction *InsertBefore = Call->getNextNode(); 2575 assert(InsertBefore); 2576 Instruction *RematerializedValue = 2577 rematerializeChain(Record.ChainToBase, InsertBefore->getIterator(), 2578 Record.RootOfChain, PointerToBase[LiveValue]); 2579 Info.RematerializedValues[RematerializedValue] = LiveValue; 2580 } else { 2581 auto *Invoke = cast<InvokeInst>(Call); 2582 2583 BasicBlock::iterator NormalInsertBefore = 2584 Invoke->getNormalDest()->getFirstInsertionPt(); 2585 BasicBlock::iterator UnwindInsertBefore = 2586 Invoke->getUnwindDest()->getFirstInsertionPt(); 2587 2588 Instruction *NormalRematerializedValue = 2589 rematerializeChain(Record.ChainToBase, NormalInsertBefore, 2590 Record.RootOfChain, PointerToBase[LiveValue]); 2591 Instruction *UnwindRematerializedValue = 2592 rematerializeChain(Record.ChainToBase, UnwindInsertBefore, 2593 Record.RootOfChain, PointerToBase[LiveValue]); 2594 2595 Info.RematerializedValues[NormalRematerializedValue] = LiveValue; 2596 Info.RematerializedValues[UnwindRematerializedValue] = LiveValue; 2597 } 2598 } 2599 2600 // Remove rematerialized values from the live set. 2601 for (auto *LiveValue: LiveValuesToBeDeleted) { 2602 Info.LiveSet.remove(LiveValue); 2603 } 2604 } 2605 2606 static bool inlineGetBaseAndOffset(Function &F, 2607 SmallVectorImpl<CallInst *> &Intrinsics, 2608 DefiningValueMapTy &DVCache, 2609 IsKnownBaseMapTy &KnownBases) { 2610 auto &Context = F.getContext(); 2611 auto &DL = F.getDataLayout(); 2612 bool Changed = false; 2613 2614 for (auto *Callsite : Intrinsics) 2615 switch (Callsite->getIntrinsicID()) { 2616 case Intrinsic::experimental_gc_get_pointer_base: { 2617 Changed = true; 2618 Value *Base = 2619 findBasePointer(Callsite->getOperand(0), DVCache, KnownBases); 2620 assert(!DVCache.count(Callsite)); 2621 Callsite->replaceAllUsesWith(Base); 2622 if (!Base->hasName()) 2623 Base->takeName(Callsite); 2624 Callsite->eraseFromParent(); 2625 break; 2626 } 2627 case Intrinsic::experimental_gc_get_pointer_offset: { 2628 Changed = true; 2629 Value *Derived = Callsite->getOperand(0); 2630 Value *Base = findBasePointer(Derived, DVCache, KnownBases); 2631 assert(!DVCache.count(Callsite)); 2632 unsigned AddressSpace = Derived->getType()->getPointerAddressSpace(); 2633 unsigned IntPtrSize = DL.getPointerSizeInBits(AddressSpace); 2634 IRBuilder<> Builder(Callsite); 2635 Value *BaseInt = 2636 Builder.CreatePtrToInt(Base, Type::getIntNTy(Context, IntPtrSize), 2637 suffixed_name_or(Base, ".int", "")); 2638 Value *DerivedInt = 2639 Builder.CreatePtrToInt(Derived, Type::getIntNTy(Context, IntPtrSize), 2640 suffixed_name_or(Derived, ".int", "")); 2641 Value *Offset = Builder.CreateSub(DerivedInt, BaseInt); 2642 Callsite->replaceAllUsesWith(Offset); 2643 Offset->takeName(Callsite); 2644 Callsite->eraseFromParent(); 2645 break; 2646 } 2647 default: 2648 llvm_unreachable("Unknown intrinsic"); 2649 } 2650 2651 return Changed; 2652 } 2653 2654 static bool insertParsePoints(Function &F, DominatorTree &DT, 2655 TargetTransformInfo &TTI, 2656 SmallVectorImpl<CallBase *> &ToUpdate, 2657 DefiningValueMapTy &DVCache, 2658 IsKnownBaseMapTy &KnownBases) { 2659 std::unique_ptr<GCStrategy> GC = findGCStrategy(F); 2660 2661 #ifndef NDEBUG 2662 // Validate the input 2663 std::set<CallBase *> Uniqued; 2664 Uniqued.insert(ToUpdate.begin(), ToUpdate.end()); 2665 assert(Uniqued.size() == ToUpdate.size() && "no duplicates please!"); 2666 2667 for (CallBase *Call : ToUpdate) 2668 assert(Call->getFunction() == &F); 2669 #endif 2670 2671 // When inserting gc.relocates for invokes, we need to be able to insert at 2672 // the top of the successor blocks. See the comment on 2673 // normalForInvokeSafepoint on exactly what is needed. Note that this step 2674 // may restructure the CFG. 2675 for (CallBase *Call : ToUpdate) { 2676 auto *II = dyn_cast<InvokeInst>(Call); 2677 if (!II) 2678 continue; 2679 normalizeForInvokeSafepoint(II->getNormalDest(), II->getParent(), DT); 2680 normalizeForInvokeSafepoint(II->getUnwindDest(), II->getParent(), DT); 2681 } 2682 2683 // A list of dummy calls added to the IR to keep various values obviously 2684 // live in the IR. We'll remove all of these when done. 2685 SmallVector<CallInst *, 64> Holders; 2686 2687 // Insert a dummy call with all of the deopt operands we'll need for the 2688 // actual safepoint insertion as arguments. This ensures reference operands 2689 // in the deopt argument list are considered live through the safepoint (and 2690 // thus makes sure they get relocated.) 2691 for (CallBase *Call : ToUpdate) { 2692 SmallVector<Value *, 64> DeoptValues; 2693 2694 for (Value *Arg : GetDeoptBundleOperands(Call)) { 2695 assert(!isUnhandledGCPointerType(Arg->getType(), GC.get()) && 2696 "support for FCA unimplemented"); 2697 if (isHandledGCPointerType(Arg->getType(), GC.get())) 2698 DeoptValues.push_back(Arg); 2699 } 2700 2701 insertUseHolderAfter(Call, DeoptValues, Holders); 2702 } 2703 2704 SmallVector<PartiallyConstructedSafepointRecord, 64> Records(ToUpdate.size()); 2705 2706 // A) Identify all gc pointers which are statically live at the given call 2707 // site. 2708 findLiveReferences(F, DT, ToUpdate, Records, GC.get()); 2709 2710 /// Global mapping from live pointers to a base-defining-value. 2711 PointerToBaseTy PointerToBase; 2712 2713 // B) Find the base pointers for each live pointer 2714 for (size_t i = 0; i < Records.size(); i++) { 2715 PartiallyConstructedSafepointRecord &info = Records[i]; 2716 findBasePointers(DT, DVCache, ToUpdate[i], info, PointerToBase, KnownBases); 2717 } 2718 if (PrintBasePointers) { 2719 errs() << "Base Pairs (w/o Relocation):\n"; 2720 for (auto &Pair : PointerToBase) { 2721 errs() << " derived "; 2722 Pair.first->printAsOperand(errs(), false); 2723 errs() << " base "; 2724 Pair.second->printAsOperand(errs(), false); 2725 errs() << "\n"; 2726 ; 2727 } 2728 } 2729 2730 // The base phi insertion logic (for any safepoint) may have inserted new 2731 // instructions which are now live at some safepoint. The simplest such 2732 // example is: 2733 // loop: 2734 // phi a <-- will be a new base_phi here 2735 // safepoint 1 <-- that needs to be live here 2736 // gep a + 1 2737 // safepoint 2 2738 // br loop 2739 // We insert some dummy calls after each safepoint to definitely hold live 2740 // the base pointers which were identified for that safepoint. We'll then 2741 // ask liveness for _every_ base inserted to see what is now live. Then we 2742 // remove the dummy calls. 2743 Holders.reserve(Holders.size() + Records.size()); 2744 for (size_t i = 0; i < Records.size(); i++) { 2745 PartiallyConstructedSafepointRecord &Info = Records[i]; 2746 2747 SmallVector<Value *, 128> Bases; 2748 for (auto *Derived : Info.LiveSet) { 2749 assert(PointerToBase.count(Derived) && "Missed base for derived pointer"); 2750 Bases.push_back(PointerToBase[Derived]); 2751 } 2752 2753 insertUseHolderAfter(ToUpdate[i], Bases, Holders); 2754 } 2755 2756 // By selecting base pointers, we've effectively inserted new uses. Thus, we 2757 // need to rerun liveness. We may *also* have inserted new defs, but that's 2758 // not the key issue. 2759 recomputeLiveInValues(F, DT, ToUpdate, Records, PointerToBase, GC.get()); 2760 2761 if (PrintBasePointers) { 2762 errs() << "Base Pairs: (w/Relocation)\n"; 2763 for (auto Pair : PointerToBase) { 2764 errs() << " derived "; 2765 Pair.first->printAsOperand(errs(), false); 2766 errs() << " base "; 2767 Pair.second->printAsOperand(errs(), false); 2768 errs() << "\n"; 2769 } 2770 } 2771 2772 // It is possible that non-constant live variables have a constant base. For 2773 // example, a GEP with a variable offset from a global. In this case we can 2774 // remove it from the liveset. We already don't add constants to the liveset 2775 // because we assume they won't move at runtime and the GC doesn't need to be 2776 // informed about them. The same reasoning applies if the base is constant. 2777 // Note that the relocation placement code relies on this filtering for 2778 // correctness as it expects the base to be in the liveset, which isn't true 2779 // if the base is constant. 2780 for (auto &Info : Records) { 2781 Info.LiveSet.remove_if([&](Value *LiveV) { 2782 assert(PointerToBase.count(LiveV) && "Missed base for derived pointer"); 2783 return isa<Constant>(PointerToBase[LiveV]); 2784 }); 2785 } 2786 2787 for (CallInst *CI : Holders) 2788 CI->eraseFromParent(); 2789 2790 Holders.clear(); 2791 2792 // Compute the cost of possible re-materialization of derived pointers. 2793 RematCandTy RematerizationCandidates; 2794 findRematerializationCandidates(PointerToBase, RematerizationCandidates, TTI); 2795 2796 // In order to reduce live set of statepoint we might choose to rematerialize 2797 // some values instead of relocating them. This is purely an optimization and 2798 // does not influence correctness. 2799 // First try rematerialization at uses, then after statepoints. 2800 rematerializeLiveValuesAtUses(RematerizationCandidates, Records, 2801 PointerToBase); 2802 for (size_t i = 0; i < Records.size(); i++) 2803 rematerializeLiveValues(ToUpdate[i], Records[i], PointerToBase, 2804 RematerizationCandidates, TTI); 2805 2806 // We need this to safely RAUW and delete call or invoke return values that 2807 // may themselves be live over a statepoint. For details, please see usage in 2808 // makeStatepointExplicitImpl. 2809 std::vector<DeferredReplacement> Replacements; 2810 2811 // Now run through and replace the existing statepoints with new ones with 2812 // the live variables listed. We do not yet update uses of the values being 2813 // relocated. We have references to live variables that need to 2814 // survive to the last iteration of this loop. (By construction, the 2815 // previous statepoint can not be a live variable, thus we can and remove 2816 // the old statepoint calls as we go.) 2817 for (size_t i = 0; i < Records.size(); i++) 2818 makeStatepointExplicit(DT, ToUpdate[i], Records[i], Replacements, 2819 PointerToBase, GC.get()); 2820 2821 ToUpdate.clear(); // prevent accident use of invalid calls. 2822 2823 for (auto &PR : Replacements) 2824 PR.doReplacement(); 2825 2826 Replacements.clear(); 2827 2828 for (auto &Info : Records) { 2829 // These live sets may contain state Value pointers, since we replaced calls 2830 // with operand bundles with calls wrapped in gc.statepoint, and some of 2831 // those calls may have been def'ing live gc pointers. Clear these out to 2832 // avoid accidentally using them. 2833 // 2834 // TODO: We should create a separate data structure that does not contain 2835 // these live sets, and migrate to using that data structure from this point 2836 // onward. 2837 Info.LiveSet.clear(); 2838 } 2839 PointerToBase.clear(); 2840 2841 // Do all the fixups of the original live variables to their relocated selves 2842 SmallVector<Value *, 128> Live; 2843 for (const PartiallyConstructedSafepointRecord &Info : Records) { 2844 // We can't simply save the live set from the original insertion. One of 2845 // the live values might be the result of a call which needs a safepoint. 2846 // That Value* no longer exists and we need to use the new gc_result. 2847 // Thankfully, the live set is embedded in the statepoint (and updated), so 2848 // we just grab that. 2849 llvm::append_range(Live, Info.StatepointToken->gc_live()); 2850 #ifndef NDEBUG 2851 // Do some basic validation checking on our liveness results before 2852 // performing relocation. Relocation can and will turn mistakes in liveness 2853 // results into non-sensical code which is must harder to debug. 2854 // TODO: It would be nice to test consistency as well 2855 assert(DT.isReachableFromEntry(Info.StatepointToken->getParent()) && 2856 "statepoint must be reachable or liveness is meaningless"); 2857 for (Value *V : Info.StatepointToken->gc_live()) { 2858 if (!isa<Instruction>(V)) 2859 // Non-instruction values trivial dominate all possible uses 2860 continue; 2861 auto *LiveInst = cast<Instruction>(V); 2862 assert(DT.isReachableFromEntry(LiveInst->getParent()) && 2863 "unreachable values should never be live"); 2864 assert(DT.dominates(LiveInst, Info.StatepointToken) && 2865 "basic SSA liveness expectation violated by liveness analysis"); 2866 } 2867 #endif 2868 } 2869 unique_unsorted(Live); 2870 2871 #ifndef NDEBUG 2872 // Validation check 2873 for (auto *Ptr : Live) 2874 assert(isHandledGCPointerType(Ptr->getType(), GC.get()) && 2875 "must be a gc pointer type"); 2876 #endif 2877 2878 relocationViaAlloca(F, DT, Live, Records); 2879 return !Records.empty(); 2880 } 2881 2882 // List of all parameter and return attributes which must be stripped when 2883 // lowering from the abstract machine model. Note that we list attributes 2884 // here which aren't valid as return attributes, that is okay. 2885 static AttributeMask getParamAndReturnAttributesToRemove() { 2886 AttributeMask R; 2887 R.addAttribute(Attribute::Dereferenceable); 2888 R.addAttribute(Attribute::DereferenceableOrNull); 2889 R.addAttribute(Attribute::ReadNone); 2890 R.addAttribute(Attribute::ReadOnly); 2891 R.addAttribute(Attribute::WriteOnly); 2892 R.addAttribute(Attribute::NoAlias); 2893 R.addAttribute(Attribute::NoFree); 2894 return R; 2895 } 2896 2897 static void stripNonValidAttributesFromPrototype(Function &F) { 2898 LLVMContext &Ctx = F.getContext(); 2899 2900 // Intrinsics are very delicate. Lowering sometimes depends the presence 2901 // of certain attributes for correctness, but we may have also inferred 2902 // additional ones in the abstract machine model which need stripped. This 2903 // assumes that the attributes defined in Intrinsic.td are conservatively 2904 // correct for both physical and abstract model. 2905 if (Intrinsic::ID id = F.getIntrinsicID()) { 2906 F.setAttributes(Intrinsic::getAttributes(Ctx, id)); 2907 return; 2908 } 2909 2910 AttributeMask R = getParamAndReturnAttributesToRemove(); 2911 for (Argument &A : F.args()) 2912 if (isa<PointerType>(A.getType())) 2913 F.removeParamAttrs(A.getArgNo(), R); 2914 2915 if (isa<PointerType>(F.getReturnType())) 2916 F.removeRetAttrs(R); 2917 2918 for (auto Attr : FnAttrsToStrip) 2919 F.removeFnAttr(Attr); 2920 } 2921 2922 /// Certain metadata on instructions are invalid after running RS4GC. 2923 /// Optimizations that run after RS4GC can incorrectly use this metadata to 2924 /// optimize functions. We drop such metadata on the instruction. 2925 static void stripInvalidMetadataFromInstruction(Instruction &I) { 2926 if (!isa<LoadInst>(I) && !isa<StoreInst>(I)) 2927 return; 2928 // These are the attributes that are still valid on loads and stores after 2929 // RS4GC. 2930 // The metadata implying dereferenceability and noalias are (conservatively) 2931 // dropped. This is because semantically, after RewriteStatepointsForGC runs, 2932 // all calls to gc.statepoint "free" the entire heap. Also, gc.statepoint can 2933 // touch the entire heap including noalias objects. Note: The reasoning is 2934 // same as stripping the dereferenceability and noalias attributes that are 2935 // analogous to the metadata counterparts. 2936 // We also drop the invariant.load metadata on the load because that metadata 2937 // implies the address operand to the load points to memory that is never 2938 // changed once it became dereferenceable. This is no longer true after RS4GC. 2939 // Similar reasoning applies to invariant.group metadata, which applies to 2940 // loads within a group. 2941 unsigned ValidMetadataAfterRS4GC[] = {LLVMContext::MD_tbaa, 2942 LLVMContext::MD_range, 2943 LLVMContext::MD_alias_scope, 2944 LLVMContext::MD_nontemporal, 2945 LLVMContext::MD_nonnull, 2946 LLVMContext::MD_align, 2947 LLVMContext::MD_type}; 2948 2949 // Drops all metadata on the instruction other than ValidMetadataAfterRS4GC. 2950 I.dropUnknownNonDebugMetadata(ValidMetadataAfterRS4GC); 2951 } 2952 2953 static void stripNonValidDataFromBody(Function &F) { 2954 if (F.empty()) 2955 return; 2956 2957 LLVMContext &Ctx = F.getContext(); 2958 MDBuilder Builder(Ctx); 2959 2960 // Set of invariantstart instructions that we need to remove. 2961 // Use this to avoid invalidating the instruction iterator. 2962 SmallVector<IntrinsicInst*, 12> InvariantStartInstructions; 2963 2964 for (Instruction &I : instructions(F)) { 2965 // invariant.start on memory location implies that the referenced memory 2966 // location is constant and unchanging. This is no longer true after 2967 // RewriteStatepointsForGC runs because there can be calls to gc.statepoint 2968 // which frees the entire heap and the presence of invariant.start allows 2969 // the optimizer to sink the load of a memory location past a statepoint, 2970 // which is incorrect. 2971 if (auto *II = dyn_cast<IntrinsicInst>(&I)) 2972 if (II->getIntrinsicID() == Intrinsic::invariant_start) { 2973 InvariantStartInstructions.push_back(II); 2974 continue; 2975 } 2976 2977 if (MDNode *Tag = I.getMetadata(LLVMContext::MD_tbaa)) { 2978 MDNode *MutableTBAA = Builder.createMutableTBAAAccessTag(Tag); 2979 I.setMetadata(LLVMContext::MD_tbaa, MutableTBAA); 2980 } 2981 2982 stripInvalidMetadataFromInstruction(I); 2983 2984 AttributeMask R = getParamAndReturnAttributesToRemove(); 2985 if (auto *Call = dyn_cast<CallBase>(&I)) { 2986 for (int i = 0, e = Call->arg_size(); i != e; i++) 2987 if (isa<PointerType>(Call->getArgOperand(i)->getType())) 2988 Call->removeParamAttrs(i, R); 2989 if (isa<PointerType>(Call->getType())) 2990 Call->removeRetAttrs(R); 2991 } 2992 } 2993 2994 // Delete the invariant.start instructions and RAUW poison. 2995 for (auto *II : InvariantStartInstructions) { 2996 II->replaceAllUsesWith(PoisonValue::get(II->getType())); 2997 II->eraseFromParent(); 2998 } 2999 } 3000 3001 /// Looks up the GC strategy for a given function, returning null if the 3002 /// function doesn't have a GC tag. The strategy is stored in the cache. 3003 static std::unique_ptr<GCStrategy> findGCStrategy(Function &F) { 3004 if (!F.hasGC()) 3005 return nullptr; 3006 3007 return getGCStrategy(F.getGC()); 3008 } 3009 3010 /// Returns true if this function should be rewritten by this pass. The main 3011 /// point of this function is as an extension point for custom logic. 3012 static bool shouldRewriteStatepointsIn(Function &F) { 3013 if (!F.hasGC()) 3014 return false; 3015 3016 std::unique_ptr<GCStrategy> Strategy = findGCStrategy(F); 3017 3018 assert(Strategy && "GC strategy is required by function, but was not found"); 3019 3020 return Strategy->useRS4GC(); 3021 } 3022 3023 static void stripNonValidData(Module &M) { 3024 #ifndef NDEBUG 3025 assert(llvm::any_of(M, shouldRewriteStatepointsIn) && "precondition!"); 3026 #endif 3027 3028 for (Function &F : M) 3029 stripNonValidAttributesFromPrototype(F); 3030 3031 for (Function &F : M) 3032 stripNonValidDataFromBody(F); 3033 } 3034 3035 bool RewriteStatepointsForGC::runOnFunction(Function &F, DominatorTree &DT, 3036 TargetTransformInfo &TTI, 3037 const TargetLibraryInfo &TLI) { 3038 assert(!F.isDeclaration() && !F.empty() && 3039 "need function body to rewrite statepoints in"); 3040 assert(shouldRewriteStatepointsIn(F) && "mismatch in rewrite decision"); 3041 3042 auto NeedsRewrite = [&TLI](Instruction &I) { 3043 if (const auto *Call = dyn_cast<CallBase>(&I)) { 3044 if (isa<GCStatepointInst>(Call)) 3045 return false; 3046 if (callsGCLeafFunction(Call, TLI)) 3047 return false; 3048 3049 // Normally it's up to the frontend to make sure that non-leaf calls also 3050 // have proper deopt state if it is required. We make an exception for 3051 // element atomic memcpy/memmove intrinsics here. Unlike other intrinsics 3052 // these are non-leaf by default. They might be generated by the optimizer 3053 // which doesn't know how to produce a proper deopt state. So if we see a 3054 // non-leaf memcpy/memmove without deopt state just treat it as a leaf 3055 // copy and don't produce a statepoint. 3056 if (!AllowStatepointWithNoDeoptInfo && !Call->hasDeoptState()) { 3057 assert((isa<AtomicMemCpyInst>(Call) || isa<AtomicMemMoveInst>(Call)) && 3058 "Don't expect any other calls here!"); 3059 return false; 3060 } 3061 return true; 3062 } 3063 return false; 3064 }; 3065 3066 // Delete any unreachable statepoints so that we don't have unrewritten 3067 // statepoints surviving this pass. This makes testing easier and the 3068 // resulting IR less confusing to human readers. 3069 DomTreeUpdater DTU(DT, DomTreeUpdater::UpdateStrategy::Lazy); 3070 bool MadeChange = removeUnreachableBlocks(F, &DTU); 3071 // Flush the Dominator Tree. 3072 DTU.getDomTree(); 3073 3074 // Gather all the statepoints which need rewritten. Be careful to only 3075 // consider those in reachable code since we need to ask dominance queries 3076 // when rewriting. We'll delete the unreachable ones in a moment. 3077 SmallVector<CallBase *, 64> ParsePointNeeded; 3078 SmallVector<CallInst *, 64> Intrinsics; 3079 for (Instruction &I : instructions(F)) { 3080 // TODO: only the ones with the flag set! 3081 if (NeedsRewrite(I)) { 3082 // NOTE removeUnreachableBlocks() is stronger than 3083 // DominatorTree::isReachableFromEntry(). In other words 3084 // removeUnreachableBlocks can remove some blocks for which 3085 // isReachableFromEntry() returns true. 3086 assert(DT.isReachableFromEntry(I.getParent()) && 3087 "no unreachable blocks expected"); 3088 ParsePointNeeded.push_back(cast<CallBase>(&I)); 3089 } 3090 if (auto *CI = dyn_cast<CallInst>(&I)) 3091 if (CI->getIntrinsicID() == Intrinsic::experimental_gc_get_pointer_base || 3092 CI->getIntrinsicID() == Intrinsic::experimental_gc_get_pointer_offset) 3093 Intrinsics.emplace_back(CI); 3094 } 3095 3096 // Return early if no work to do. 3097 if (ParsePointNeeded.empty() && Intrinsics.empty()) 3098 return MadeChange; 3099 3100 // As a prepass, go ahead and aggressively destroy single entry phi nodes. 3101 // These are created by LCSSA. They have the effect of increasing the size 3102 // of liveness sets for no good reason. It may be harder to do this post 3103 // insertion since relocations and base phis can confuse things. 3104 for (BasicBlock &BB : F) 3105 if (BB.getUniquePredecessor()) 3106 MadeChange |= FoldSingleEntryPHINodes(&BB); 3107 3108 // Before we start introducing relocations, we want to tweak the IR a bit to 3109 // avoid unfortunate code generation effects. The main example is that we 3110 // want to try to make sure the comparison feeding a branch is after any 3111 // safepoints. Otherwise, we end up with a comparison of pre-relocation 3112 // values feeding a branch after relocation. This is semantically correct, 3113 // but results in extra register pressure since both the pre-relocation and 3114 // post-relocation copies must be available in registers. For code without 3115 // relocations this is handled elsewhere, but teaching the scheduler to 3116 // reverse the transform we're about to do would be slightly complex. 3117 // Note: This may extend the live range of the inputs to the icmp and thus 3118 // increase the liveset of any statepoint we move over. This is profitable 3119 // as long as all statepoints are in rare blocks. If we had in-register 3120 // lowering for live values this would be a much safer transform. 3121 auto getConditionInst = [](Instruction *TI) -> Instruction * { 3122 if (auto *BI = dyn_cast<BranchInst>(TI)) 3123 if (BI->isConditional()) 3124 return dyn_cast<Instruction>(BI->getCondition()); 3125 // TODO: Extend this to handle switches 3126 return nullptr; 3127 }; 3128 for (BasicBlock &BB : F) { 3129 Instruction *TI = BB.getTerminator(); 3130 if (auto *Cond = getConditionInst(TI)) 3131 // TODO: Handle more than just ICmps here. We should be able to move 3132 // most instructions without side effects or memory access. 3133 if (isa<ICmpInst>(Cond) && Cond->hasOneUse()) { 3134 MadeChange = true; 3135 Cond->moveBefore(TI->getIterator()); 3136 } 3137 } 3138 3139 // Nasty workaround - The base computation code in the main algorithm doesn't 3140 // consider the fact that a GEP can be used to convert a scalar to a vector. 3141 // The right fix for this is to integrate GEPs into the base rewriting 3142 // algorithm properly, this is just a short term workaround to prevent 3143 // crashes by canonicalizing such GEPs into fully vector GEPs. 3144 for (Instruction &I : instructions(F)) { 3145 if (!isa<GetElementPtrInst>(I)) 3146 continue; 3147 3148 unsigned VF = 0; 3149 for (unsigned i = 0; i < I.getNumOperands(); i++) 3150 if (auto *OpndVTy = dyn_cast<VectorType>(I.getOperand(i)->getType())) { 3151 assert(VF == 0 || 3152 VF == cast<FixedVectorType>(OpndVTy)->getNumElements()); 3153 VF = cast<FixedVectorType>(OpndVTy)->getNumElements(); 3154 } 3155 3156 // It's the vector to scalar traversal through the pointer operand which 3157 // confuses base pointer rewriting, so limit ourselves to that case. 3158 if (!I.getOperand(0)->getType()->isVectorTy() && VF != 0) { 3159 IRBuilder<> B(&I); 3160 auto *Splat = B.CreateVectorSplat(VF, I.getOperand(0)); 3161 I.setOperand(0, Splat); 3162 MadeChange = true; 3163 } 3164 } 3165 3166 // Cache the 'defining value' relation used in the computation and 3167 // insertion of base phis and selects. This ensures that we don't insert 3168 // large numbers of duplicate base_phis. Use one cache for both 3169 // inlineGetBaseAndOffset() and insertParsePoints(). 3170 DefiningValueMapTy DVCache; 3171 3172 // Mapping between a base values and a flag indicating whether it's a known 3173 // base or not. 3174 IsKnownBaseMapTy KnownBases; 3175 3176 if (!Intrinsics.empty()) 3177 // Inline @gc.get.pointer.base() and @gc.get.pointer.offset() before finding 3178 // live references. 3179 MadeChange |= inlineGetBaseAndOffset(F, Intrinsics, DVCache, KnownBases); 3180 3181 if (!ParsePointNeeded.empty()) 3182 MadeChange |= 3183 insertParsePoints(F, DT, TTI, ParsePointNeeded, DVCache, KnownBases); 3184 3185 return MadeChange; 3186 } 3187 3188 // liveness computation via standard dataflow 3189 // ------------------------------------------------------------------- 3190 3191 // TODO: Consider using bitvectors for liveness, the set of potentially 3192 // interesting values should be small and easy to pre-compute. 3193 3194 /// Compute the live-in set for the location rbegin starting from 3195 /// the live-out set of the basic block 3196 static void computeLiveInValues(BasicBlock::reverse_iterator Begin, 3197 BasicBlock::reverse_iterator End, 3198 SetVector<Value *> &LiveTmp, GCStrategy *GC) { 3199 for (auto &I : make_range(Begin, End)) { 3200 // KILL/Def - Remove this definition from LiveIn 3201 LiveTmp.remove(&I); 3202 3203 // Don't consider *uses* in PHI nodes, we handle their contribution to 3204 // predecessor blocks when we seed the LiveOut sets 3205 if (isa<PHINode>(I)) 3206 continue; 3207 3208 // USE - Add to the LiveIn set for this instruction 3209 for (Value *V : I.operands()) { 3210 assert(!isUnhandledGCPointerType(V->getType(), GC) && 3211 "support for FCA unimplemented"); 3212 if (isHandledGCPointerType(V->getType(), GC) && !isa<Constant>(V)) { 3213 // The choice to exclude all things constant here is slightly subtle. 3214 // There are two independent reasons: 3215 // - We assume that things which are constant (from LLVM's definition) 3216 // do not move at runtime. For example, the address of a global 3217 // variable is fixed, even though it's contents may not be. 3218 // - Second, we can't disallow arbitrary inttoptr constants even 3219 // if the language frontend does. Optimization passes are free to 3220 // locally exploit facts without respect to global reachability. This 3221 // can create sections of code which are dynamically unreachable and 3222 // contain just about anything. (see constants.ll in tests) 3223 LiveTmp.insert(V); 3224 } 3225 } 3226 } 3227 } 3228 3229 static void computeLiveOutSeed(BasicBlock *BB, SetVector<Value *> &LiveTmp, 3230 GCStrategy *GC) { 3231 for (BasicBlock *Succ : successors(BB)) { 3232 for (auto &I : *Succ) { 3233 PHINode *PN = dyn_cast<PHINode>(&I); 3234 if (!PN) 3235 break; 3236 3237 Value *V = PN->getIncomingValueForBlock(BB); 3238 assert(!isUnhandledGCPointerType(V->getType(), GC) && 3239 "support for FCA unimplemented"); 3240 if (isHandledGCPointerType(V->getType(), GC) && !isa<Constant>(V)) 3241 LiveTmp.insert(V); 3242 } 3243 } 3244 } 3245 3246 static SetVector<Value *> computeKillSet(BasicBlock *BB, GCStrategy *GC) { 3247 SetVector<Value *> KillSet; 3248 for (Instruction &I : *BB) 3249 if (isHandledGCPointerType(I.getType(), GC)) 3250 KillSet.insert(&I); 3251 return KillSet; 3252 } 3253 3254 #ifndef NDEBUG 3255 /// Check that the items in 'Live' dominate 'TI'. This is used as a basic 3256 /// validation check for the liveness computation. 3257 static void checkBasicSSA(DominatorTree &DT, SetVector<Value *> &Live, 3258 Instruction *TI, bool TermOkay = false) { 3259 for (Value *V : Live) { 3260 if (auto *I = dyn_cast<Instruction>(V)) { 3261 // The terminator can be a member of the LiveOut set. LLVM's definition 3262 // of instruction dominance states that V does not dominate itself. As 3263 // such, we need to special case this to allow it. 3264 if (TermOkay && TI == I) 3265 continue; 3266 assert(DT.dominates(I, TI) && 3267 "basic SSA liveness expectation violated by liveness analysis"); 3268 } 3269 } 3270 } 3271 3272 /// Check that all the liveness sets used during the computation of liveness 3273 /// obey basic SSA properties. This is useful for finding cases where we miss 3274 /// a def. 3275 static void checkBasicSSA(DominatorTree &DT, GCPtrLivenessData &Data, 3276 BasicBlock &BB) { 3277 checkBasicSSA(DT, Data.LiveSet[&BB], BB.getTerminator()); 3278 checkBasicSSA(DT, Data.LiveOut[&BB], BB.getTerminator(), true); 3279 checkBasicSSA(DT, Data.LiveIn[&BB], BB.getTerminator()); 3280 } 3281 #endif 3282 3283 static void computeLiveInValues(DominatorTree &DT, Function &F, 3284 GCPtrLivenessData &Data, GCStrategy *GC) { 3285 SmallSetVector<BasicBlock *, 32> Worklist; 3286 3287 // Seed the liveness for each individual block 3288 for (BasicBlock &BB : F) { 3289 Data.KillSet[&BB] = computeKillSet(&BB, GC); 3290 Data.LiveSet[&BB].clear(); 3291 computeLiveInValues(BB.rbegin(), BB.rend(), Data.LiveSet[&BB], GC); 3292 3293 #ifndef NDEBUG 3294 for (Value *Kill : Data.KillSet[&BB]) 3295 assert(!Data.LiveSet[&BB].count(Kill) && "live set contains kill"); 3296 #endif 3297 3298 Data.LiveOut[&BB] = SetVector<Value *>(); 3299 computeLiveOutSeed(&BB, Data.LiveOut[&BB], GC); 3300 Data.LiveIn[&BB] = Data.LiveSet[&BB]; 3301 Data.LiveIn[&BB].set_union(Data.LiveOut[&BB]); 3302 Data.LiveIn[&BB].set_subtract(Data.KillSet[&BB]); 3303 if (!Data.LiveIn[&BB].empty()) 3304 Worklist.insert(pred_begin(&BB), pred_end(&BB)); 3305 } 3306 3307 // Propagate that liveness until stable 3308 while (!Worklist.empty()) { 3309 BasicBlock *BB = Worklist.pop_back_val(); 3310 3311 // Compute our new liveout set, then exit early if it hasn't changed despite 3312 // the contribution of our successor. 3313 SetVector<Value *> LiveOut = Data.LiveOut[BB]; 3314 const auto OldLiveOutSize = LiveOut.size(); 3315 for (BasicBlock *Succ : successors(BB)) { 3316 assert(Data.LiveIn.count(Succ)); 3317 LiveOut.set_union(Data.LiveIn[Succ]); 3318 } 3319 // assert OutLiveOut is a subset of LiveOut 3320 if (OldLiveOutSize == LiveOut.size()) { 3321 // If the sets are the same size, then we didn't actually add anything 3322 // when unioning our successors LiveIn. Thus, the LiveIn of this block 3323 // hasn't changed. 3324 continue; 3325 } 3326 Data.LiveOut[BB] = LiveOut; 3327 3328 // Apply the effects of this basic block 3329 SetVector<Value *> LiveTmp = LiveOut; 3330 LiveTmp.set_union(Data.LiveSet[BB]); 3331 LiveTmp.set_subtract(Data.KillSet[BB]); 3332 3333 assert(Data.LiveIn.count(BB)); 3334 const SetVector<Value *> &OldLiveIn = Data.LiveIn[BB]; 3335 // assert: OldLiveIn is a subset of LiveTmp 3336 if (OldLiveIn.size() != LiveTmp.size()) { 3337 Data.LiveIn[BB] = LiveTmp; 3338 Worklist.insert(pred_begin(BB), pred_end(BB)); 3339 } 3340 } // while (!Worklist.empty()) 3341 3342 #ifndef NDEBUG 3343 // Verify our output against SSA properties. This helps catch any 3344 // missing kills during the above iteration. 3345 for (BasicBlock &BB : F) 3346 checkBasicSSA(DT, Data, BB); 3347 #endif 3348 } 3349 3350 static void findLiveSetAtInst(Instruction *Inst, GCPtrLivenessData &Data, 3351 StatepointLiveSetTy &Out, GCStrategy *GC) { 3352 BasicBlock *BB = Inst->getParent(); 3353 3354 // Note: The copy is intentional and required 3355 assert(Data.LiveOut.count(BB)); 3356 SetVector<Value *> LiveOut = Data.LiveOut[BB]; 3357 3358 // We want to handle the statepoint itself oddly. It's 3359 // call result is not live (normal), nor are it's arguments 3360 // (unless they're used again later). This adjustment is 3361 // specifically what we need to relocate 3362 computeLiveInValues(BB->rbegin(), ++Inst->getIterator().getReverse(), LiveOut, 3363 GC); 3364 LiveOut.remove(Inst); 3365 Out.insert(LiveOut.begin(), LiveOut.end()); 3366 } 3367 3368 static void recomputeLiveInValues(GCPtrLivenessData &RevisedLivenessData, 3369 CallBase *Call, 3370 PartiallyConstructedSafepointRecord &Info, 3371 PointerToBaseTy &PointerToBase, 3372 GCStrategy *GC) { 3373 StatepointLiveSetTy Updated; 3374 findLiveSetAtInst(Call, RevisedLivenessData, Updated, GC); 3375 3376 // We may have base pointers which are now live that weren't before. We need 3377 // to update the PointerToBase structure to reflect this. 3378 for (auto *V : Updated) 3379 PointerToBase.insert({ V, V }); 3380 3381 Info.LiveSet = Updated; 3382 } 3383