1 //===- RewriteStatepointsForGC.cpp - Make GC relocations explicit ---------===// 2 // 3 // The LLVM Compiler Infrastructure 4 // 5 // This file is distributed under the University of Illinois Open Source 6 // License. See LICENSE.TXT for details. 7 // 8 //===----------------------------------------------------------------------===// 9 // 10 // Rewrite an existing set of gc.statepoints such that they make potential 11 // relocations performed by the garbage collector explicit in the IR. 12 // 13 //===----------------------------------------------------------------------===// 14 15 #include "llvm/Pass.h" 16 #include "llvm/Analysis/CFG.h" 17 #include "llvm/Analysis/InstructionSimplify.h" 18 #include "llvm/Analysis/TargetTransformInfo.h" 19 #include "llvm/ADT/SetOperations.h" 20 #include "llvm/ADT/Statistic.h" 21 #include "llvm/ADT/DenseSet.h" 22 #include "llvm/ADT/SetVector.h" 23 #include "llvm/ADT/StringRef.h" 24 #include "llvm/IR/BasicBlock.h" 25 #include "llvm/IR/CallSite.h" 26 #include "llvm/IR/Dominators.h" 27 #include "llvm/IR/Function.h" 28 #include "llvm/IR/IRBuilder.h" 29 #include "llvm/IR/InstIterator.h" 30 #include "llvm/IR/Instructions.h" 31 #include "llvm/IR/Intrinsics.h" 32 #include "llvm/IR/IntrinsicInst.h" 33 #include "llvm/IR/Module.h" 34 #include "llvm/IR/MDBuilder.h" 35 #include "llvm/IR/Statepoint.h" 36 #include "llvm/IR/Value.h" 37 #include "llvm/IR/Verifier.h" 38 #include "llvm/Support/Debug.h" 39 #include "llvm/Support/CommandLine.h" 40 #include "llvm/Transforms/Scalar.h" 41 #include "llvm/Transforms/Utils/BasicBlockUtils.h" 42 #include "llvm/Transforms/Utils/Cloning.h" 43 #include "llvm/Transforms/Utils/Local.h" 44 #include "llvm/Transforms/Utils/PromoteMemToReg.h" 45 46 #define DEBUG_TYPE "rewrite-statepoints-for-gc" 47 48 using namespace llvm; 49 50 // Print the liveset found at the insert location 51 static cl::opt<bool> PrintLiveSet("spp-print-liveset", cl::Hidden, 52 cl::init(false)); 53 static cl::opt<bool> PrintLiveSetSize("spp-print-liveset-size", cl::Hidden, 54 cl::init(false)); 55 // Print out the base pointers for debugging 56 static cl::opt<bool> PrintBasePointers("spp-print-base-pointers", cl::Hidden, 57 cl::init(false)); 58 59 // Cost threshold measuring when it is profitable to rematerialize value instead 60 // of relocating it 61 static cl::opt<unsigned> 62 RematerializationThreshold("spp-rematerialization-threshold", cl::Hidden, 63 cl::init(6)); 64 65 #ifdef XDEBUG 66 static bool ClobberNonLive = true; 67 #else 68 static bool ClobberNonLive = false; 69 #endif 70 static cl::opt<bool, true> ClobberNonLiveOverride("rs4gc-clobber-non-live", 71 cl::location(ClobberNonLive), 72 cl::Hidden); 73 74 namespace { 75 struct RewriteStatepointsForGC : public ModulePass { 76 static char ID; // Pass identification, replacement for typeid 77 78 RewriteStatepointsForGC() : ModulePass(ID) { 79 initializeRewriteStatepointsForGCPass(*PassRegistry::getPassRegistry()); 80 } 81 bool runOnFunction(Function &F); 82 bool runOnModule(Module &M) override { 83 bool Changed = false; 84 for (Function &F : M) 85 Changed |= runOnFunction(F); 86 87 if (Changed) { 88 // stripDereferenceabilityInfo asserts that shouldRewriteStatepointsIn 89 // returns true for at least one function in the module. Since at least 90 // one function changed, we know that the precondition is satisfied. 91 stripDereferenceabilityInfo(M); 92 } 93 94 return Changed; 95 } 96 97 void getAnalysisUsage(AnalysisUsage &AU) const override { 98 // We add and rewrite a bunch of instructions, but don't really do much 99 // else. We could in theory preserve a lot more analyses here. 100 AU.addRequired<DominatorTreeWrapperPass>(); 101 AU.addRequired<TargetTransformInfoWrapperPass>(); 102 } 103 104 /// The IR fed into RewriteStatepointsForGC may have had attributes implying 105 /// dereferenceability that are no longer valid/correct after 106 /// RewriteStatepointsForGC has run. This is because semantically, after 107 /// RewriteStatepointsForGC runs, all calls to gc.statepoint "free" the entire 108 /// heap. stripDereferenceabilityInfo (conservatively) restores correctness 109 /// by erasing all attributes in the module that externally imply 110 /// dereferenceability. 111 /// 112 void stripDereferenceabilityInfo(Module &M); 113 114 // Helpers for stripDereferenceabilityInfo 115 void stripDereferenceabilityInfoFromBody(Function &F); 116 void stripDereferenceabilityInfoFromPrototype(Function &F); 117 }; 118 } // namespace 119 120 char RewriteStatepointsForGC::ID = 0; 121 122 ModulePass *llvm::createRewriteStatepointsForGCPass() { 123 return new RewriteStatepointsForGC(); 124 } 125 126 INITIALIZE_PASS_BEGIN(RewriteStatepointsForGC, "rewrite-statepoints-for-gc", 127 "Make relocations explicit at statepoints", false, false) 128 INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass) 129 INITIALIZE_PASS_END(RewriteStatepointsForGC, "rewrite-statepoints-for-gc", 130 "Make relocations explicit at statepoints", false, false) 131 132 namespace { 133 struct GCPtrLivenessData { 134 /// Values defined in this block. 135 DenseMap<BasicBlock *, DenseSet<Value *>> KillSet; 136 /// Values used in this block (and thus live); does not included values 137 /// killed within this block. 138 DenseMap<BasicBlock *, DenseSet<Value *>> LiveSet; 139 140 /// Values live into this basic block (i.e. used by any 141 /// instruction in this basic block or ones reachable from here) 142 DenseMap<BasicBlock *, DenseSet<Value *>> LiveIn; 143 144 /// Values live out of this basic block (i.e. live into 145 /// any successor block) 146 DenseMap<BasicBlock *, DenseSet<Value *>> LiveOut; 147 }; 148 149 // The type of the internal cache used inside the findBasePointers family 150 // of functions. From the callers perspective, this is an opaque type and 151 // should not be inspected. 152 // 153 // In the actual implementation this caches two relations: 154 // - The base relation itself (i.e. this pointer is based on that one) 155 // - The base defining value relation (i.e. before base_phi insertion) 156 // Generally, after the execution of a full findBasePointer call, only the 157 // base relation will remain. Internally, we add a mixture of the two 158 // types, then update all the second type to the first type 159 typedef DenseMap<Value *, Value *> DefiningValueMapTy; 160 typedef DenseSet<llvm::Value *> StatepointLiveSetTy; 161 typedef DenseMap<Instruction *, Value *> RematerializedValueMapTy; 162 163 struct PartiallyConstructedSafepointRecord { 164 /// The set of values known to be live across this safepoint 165 StatepointLiveSetTy liveset; 166 167 /// Mapping from live pointers to a base-defining-value 168 DenseMap<llvm::Value *, llvm::Value *> PointerToBase; 169 170 /// The *new* gc.statepoint instruction itself. This produces the token 171 /// that normal path gc.relocates and the gc.result are tied to. 172 Instruction *StatepointToken; 173 174 /// Instruction to which exceptional gc relocates are attached 175 /// Makes it easier to iterate through them during relocationViaAlloca. 176 Instruction *UnwindToken; 177 178 /// Record live values we are rematerialized instead of relocating. 179 /// They are not included into 'liveset' field. 180 /// Maps rematerialized copy to it's original value. 181 RematerializedValueMapTy RematerializedValues; 182 }; 183 } 184 185 /// Compute the live-in set for every basic block in the function 186 static void computeLiveInValues(DominatorTree &DT, Function &F, 187 GCPtrLivenessData &Data); 188 189 /// Given results from the dataflow liveness computation, find the set of live 190 /// Values at a particular instruction. 191 static void findLiveSetAtInst(Instruction *inst, GCPtrLivenessData &Data, 192 StatepointLiveSetTy &out); 193 194 // TODO: Once we can get to the GCStrategy, this becomes 195 // Optional<bool> isGCManagedPointer(const Value *V) const override { 196 197 static bool isGCPointerType(Type *T) { 198 if (auto *PT = dyn_cast<PointerType>(T)) 199 // For the sake of this example GC, we arbitrarily pick addrspace(1) as our 200 // GC managed heap. We know that a pointer into this heap needs to be 201 // updated and that no other pointer does. 202 return (1 == PT->getAddressSpace()); 203 return false; 204 } 205 206 // Return true if this type is one which a) is a gc pointer or contains a GC 207 // pointer and b) is of a type this code expects to encounter as a live value. 208 // (The insertion code will assert that a type which matches (a) and not (b) 209 // is not encountered.) 210 static bool isHandledGCPointerType(Type *T) { 211 // We fully support gc pointers 212 if (isGCPointerType(T)) 213 return true; 214 // We partially support vectors of gc pointers. The code will assert if it 215 // can't handle something. 216 if (auto VT = dyn_cast<VectorType>(T)) 217 if (isGCPointerType(VT->getElementType())) 218 return true; 219 return false; 220 } 221 222 #ifndef NDEBUG 223 /// Returns true if this type contains a gc pointer whether we know how to 224 /// handle that type or not. 225 static bool containsGCPtrType(Type *Ty) { 226 if (isGCPointerType(Ty)) 227 return true; 228 if (VectorType *VT = dyn_cast<VectorType>(Ty)) 229 return isGCPointerType(VT->getScalarType()); 230 if (ArrayType *AT = dyn_cast<ArrayType>(Ty)) 231 return containsGCPtrType(AT->getElementType()); 232 if (StructType *ST = dyn_cast<StructType>(Ty)) 233 return std::any_of( 234 ST->subtypes().begin(), ST->subtypes().end(), 235 [](Type *SubType) { return containsGCPtrType(SubType); }); 236 return false; 237 } 238 239 // Returns true if this is a type which a) is a gc pointer or contains a GC 240 // pointer and b) is of a type which the code doesn't expect (i.e. first class 241 // aggregates). Used to trip assertions. 242 static bool isUnhandledGCPointerType(Type *Ty) { 243 return containsGCPtrType(Ty) && !isHandledGCPointerType(Ty); 244 } 245 #endif 246 247 static bool order_by_name(llvm::Value *a, llvm::Value *b) { 248 if (a->hasName() && b->hasName()) { 249 return -1 == a->getName().compare(b->getName()); 250 } else if (a->hasName() && !b->hasName()) { 251 return true; 252 } else if (!a->hasName() && b->hasName()) { 253 return false; 254 } else { 255 // Better than nothing, but not stable 256 return a < b; 257 } 258 } 259 260 // Conservatively identifies any definitions which might be live at the 261 // given instruction. The analysis is performed immediately before the 262 // given instruction. Values defined by that instruction are not considered 263 // live. Values used by that instruction are considered live. 264 static void analyzeParsePointLiveness( 265 DominatorTree &DT, GCPtrLivenessData &OriginalLivenessData, 266 const CallSite &CS, PartiallyConstructedSafepointRecord &result) { 267 Instruction *inst = CS.getInstruction(); 268 269 StatepointLiveSetTy liveset; 270 findLiveSetAtInst(inst, OriginalLivenessData, liveset); 271 272 if (PrintLiveSet) { 273 // Note: This output is used by several of the test cases 274 // The order of elements in a set is not stable, put them in a vec and sort 275 // by name 276 SmallVector<Value *, 64> Temp; 277 Temp.insert(Temp.end(), liveset.begin(), liveset.end()); 278 std::sort(Temp.begin(), Temp.end(), order_by_name); 279 errs() << "Live Variables:\n"; 280 for (Value *V : Temp) 281 dbgs() << " " << V->getName() << " " << *V << "\n"; 282 } 283 if (PrintLiveSetSize) { 284 errs() << "Safepoint For: " << CS.getCalledValue()->getName() << "\n"; 285 errs() << "Number live values: " << liveset.size() << "\n"; 286 } 287 result.liveset = liveset; 288 } 289 290 static Value *findBaseDefiningValue(Value *I); 291 292 /// Return a base defining value for the 'Index' element of the given vector 293 /// instruction 'I'. If Index is null, returns a BDV for the entire vector 294 /// 'I'. As an optimization, this method will try to determine when the 295 /// element is known to already be a base pointer. If this can be established, 296 /// the second value in the returned pair will be true. Note that either a 297 /// vector or a pointer typed value can be returned. For the former, the 298 /// vector returned is a BDV (and possibly a base) of the entire vector 'I'. 299 /// If the later, the return pointer is a BDV (or possibly a base) for the 300 /// particular element in 'I'. 301 static std::pair<Value *, bool> 302 findBaseDefiningValueOfVector(Value *I, Value *Index = nullptr) { 303 assert(I->getType()->isVectorTy() && 304 cast<VectorType>(I->getType())->getElementType()->isPointerTy() && 305 "Illegal to ask for the base pointer of a non-pointer type"); 306 307 // Each case parallels findBaseDefiningValue below, see that code for 308 // detailed motivation. 309 310 if (isa<Argument>(I)) 311 // An incoming argument to the function is a base pointer 312 return std::make_pair(I, true); 313 314 // We shouldn't see the address of a global as a vector value? 315 assert(!isa<GlobalVariable>(I) && 316 "unexpected global variable found in base of vector"); 317 318 // inlining could possibly introduce phi node that contains 319 // undef if callee has multiple returns 320 if (isa<UndefValue>(I)) 321 // utterly meaningless, but useful for dealing with partially optimized 322 // code. 323 return std::make_pair(I, true); 324 325 // Due to inheritance, this must be _after_ the global variable and undef 326 // checks 327 if (Constant *Con = dyn_cast<Constant>(I)) { 328 assert(!isa<GlobalVariable>(I) && !isa<UndefValue>(I) && 329 "order of checks wrong!"); 330 assert(Con->isNullValue() && "null is the only case which makes sense"); 331 return std::make_pair(Con, true); 332 } 333 334 if (isa<LoadInst>(I)) 335 return std::make_pair(I, true); 336 337 // For an insert element, we might be able to look through it if we know 338 // something about the indexes. 339 if (InsertElementInst *IEI = dyn_cast<InsertElementInst>(I)) { 340 if (Index) { 341 Value *InsertIndex = IEI->getOperand(2); 342 // This index is inserting the value, look for its BDV 343 if (InsertIndex == Index) 344 return std::make_pair(findBaseDefiningValue(IEI->getOperand(1)), false); 345 // Both constant, and can't be equal per above. This insert is definitely 346 // not relevant, look back at the rest of the vector and keep trying. 347 if (isa<ConstantInt>(Index) && isa<ConstantInt>(InsertIndex)) 348 return findBaseDefiningValueOfVector(IEI->getOperand(0), Index); 349 } 350 351 // We don't know whether this vector contains entirely base pointers or 352 // not. To be conservatively correct, we treat it as a BDV and will 353 // duplicate code as needed to construct a parallel vector of bases. 354 return std::make_pair(IEI, false); 355 } 356 357 if (isa<ShuffleVectorInst>(I)) 358 // We don't know whether this vector contains entirely base pointers or 359 // not. To be conservatively correct, we treat it as a BDV and will 360 // duplicate code as needed to construct a parallel vector of bases. 361 // TODO: There a number of local optimizations which could be applied here 362 // for particular sufflevector patterns. 363 return std::make_pair(I, false); 364 365 // A PHI or Select is a base defining value. The outer findBasePointer 366 // algorithm is responsible for constructing a base value for this BDV. 367 assert((isa<SelectInst>(I) || isa<PHINode>(I)) && 368 "unknown vector instruction - no base found for vector element"); 369 return std::make_pair(I, false); 370 } 371 372 static bool isKnownBaseResult(Value *V); 373 374 /// Helper function for findBasePointer - Will return a value which either a) 375 /// defines the base pointer for the input, b) blocks the simple search 376 /// (i.e. a PHI or Select of two derived pointers), or c) involves a change 377 /// from pointer to vector type or back. 378 static Value *findBaseDefiningValue(Value *I) { 379 if (I->getType()->isVectorTy()) 380 return findBaseDefiningValueOfVector(I).first; 381 382 assert(I->getType()->isPointerTy() && 383 "Illegal to ask for the base pointer of a non-pointer type"); 384 385 if (isa<Argument>(I)) 386 // An incoming argument to the function is a base pointer 387 // We should have never reached here if this argument isn't an gc value 388 return I; 389 390 if (isa<GlobalVariable>(I)) 391 // base case 392 return I; 393 394 // inlining could possibly introduce phi node that contains 395 // undef if callee has multiple returns 396 if (isa<UndefValue>(I)) 397 // utterly meaningless, but useful for dealing with 398 // partially optimized code. 399 return I; 400 401 // Due to inheritance, this must be _after_ the global variable and undef 402 // checks 403 if (Constant *Con = dyn_cast<Constant>(I)) { 404 assert(!isa<GlobalVariable>(I) && !isa<UndefValue>(I) && 405 "order of checks wrong!"); 406 // Note: Finding a constant base for something marked for relocation 407 // doesn't really make sense. The most likely case is either a) some 408 // screwed up the address space usage or b) your validating against 409 // compiled C++ code w/o the proper separation. The only real exception 410 // is a null pointer. You could have generic code written to index of 411 // off a potentially null value and have proven it null. We also use 412 // null pointers in dead paths of relocation phis (which we might later 413 // want to find a base pointer for). 414 assert(isa<ConstantPointerNull>(Con) && 415 "null is the only case which makes sense"); 416 return Con; 417 } 418 419 if (CastInst *CI = dyn_cast<CastInst>(I)) { 420 Value *Def = CI->stripPointerCasts(); 421 // If we find a cast instruction here, it means we've found a cast which is 422 // not simply a pointer cast (i.e. an inttoptr). We don't know how to 423 // handle int->ptr conversion. 424 assert(!isa<CastInst>(Def) && "shouldn't find another cast here"); 425 return findBaseDefiningValue(Def); 426 } 427 428 if (isa<LoadInst>(I)) 429 return I; // The value loaded is an gc base itself 430 431 if (GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(I)) 432 // The base of this GEP is the base 433 return findBaseDefiningValue(GEP->getPointerOperand()); 434 435 if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(I)) { 436 switch (II->getIntrinsicID()) { 437 case Intrinsic::experimental_gc_result_ptr: 438 default: 439 // fall through to general call handling 440 break; 441 case Intrinsic::experimental_gc_statepoint: 442 case Intrinsic::experimental_gc_result_float: 443 case Intrinsic::experimental_gc_result_int: 444 llvm_unreachable("these don't produce pointers"); 445 case Intrinsic::experimental_gc_relocate: { 446 // Rerunning safepoint insertion after safepoints are already 447 // inserted is not supported. It could probably be made to work, 448 // but why are you doing this? There's no good reason. 449 llvm_unreachable("repeat safepoint insertion is not supported"); 450 } 451 case Intrinsic::gcroot: 452 // Currently, this mechanism hasn't been extended to work with gcroot. 453 // There's no reason it couldn't be, but I haven't thought about the 454 // implications much. 455 llvm_unreachable( 456 "interaction with the gcroot mechanism is not supported"); 457 } 458 } 459 // We assume that functions in the source language only return base 460 // pointers. This should probably be generalized via attributes to support 461 // both source language and internal functions. 462 if (isa<CallInst>(I) || isa<InvokeInst>(I)) 463 return I; 464 465 // I have absolutely no idea how to implement this part yet. It's not 466 // necessarily hard, I just haven't really looked at it yet. 467 assert(!isa<LandingPadInst>(I) && "Landing Pad is unimplemented"); 468 469 if (isa<AtomicCmpXchgInst>(I)) 470 // A CAS is effectively a atomic store and load combined under a 471 // predicate. From the perspective of base pointers, we just treat it 472 // like a load. 473 return I; 474 475 assert(!isa<AtomicRMWInst>(I) && "Xchg handled above, all others are " 476 "binary ops which don't apply to pointers"); 477 478 // The aggregate ops. Aggregates can either be in the heap or on the 479 // stack, but in either case, this is simply a field load. As a result, 480 // this is a defining definition of the base just like a load is. 481 if (isa<ExtractValueInst>(I)) 482 return I; 483 484 // We should never see an insert vector since that would require we be 485 // tracing back a struct value not a pointer value. 486 assert(!isa<InsertValueInst>(I) && 487 "Base pointer for a struct is meaningless"); 488 489 // An extractelement produces a base result exactly when it's input does. 490 // We may need to insert a parallel instruction to extract the appropriate 491 // element out of the base vector corresponding to the input. Given this, 492 // it's analogous to the phi and select case even though it's not a merge. 493 if (auto *EEI = dyn_cast<ExtractElementInst>(I)) { 494 Value *VectorOperand = EEI->getVectorOperand(); 495 Value *Index = EEI->getIndexOperand(); 496 std::pair<Value *, bool> pair = 497 findBaseDefiningValueOfVector(VectorOperand, Index); 498 Value *VectorBase = pair.first; 499 if (VectorBase->getType()->isPointerTy()) 500 // We found a BDV for this specific element with the vector. This is an 501 // optimization, but in practice it covers most of the useful cases 502 // created via scalarization. Note: The peephole optimization here is 503 // currently needed for correctness since the general algorithm doesn't 504 // yet handle insertelements. That will change shortly. 505 return VectorBase; 506 else { 507 assert(VectorBase->getType()->isVectorTy()); 508 // Otherwise, we have an instruction which potentially produces a 509 // derived pointer and we need findBasePointers to clone code for us 510 // such that we can create an instruction which produces the 511 // accompanying base pointer. 512 return EEI; 513 } 514 } 515 516 // The last two cases here don't return a base pointer. Instead, they 517 // return a value which dynamically selects from among several base 518 // derived pointers (each with it's own base potentially). It's the job of 519 // the caller to resolve these. 520 assert((isa<SelectInst>(I) || isa<PHINode>(I)) && 521 "missing instruction case in findBaseDefiningValing"); 522 return I; 523 } 524 525 /// Returns the base defining value for this value. 526 static Value *findBaseDefiningValueCached(Value *I, DefiningValueMapTy &Cache) { 527 Value *&Cached = Cache[I]; 528 if (!Cached) { 529 Cached = findBaseDefiningValue(I); 530 DEBUG(dbgs() << "fBDV-cached: " << I->getName() << " -> " 531 << Cached->getName() << "\n"); 532 } 533 assert(Cache[I] != nullptr); 534 return Cached; 535 } 536 537 /// Return a base pointer for this value if known. Otherwise, return it's 538 /// base defining value. 539 static Value *findBaseOrBDV(Value *I, DefiningValueMapTy &Cache) { 540 Value *Def = findBaseDefiningValueCached(I, Cache); 541 auto Found = Cache.find(Def); 542 if (Found != Cache.end()) { 543 // Either a base-of relation, or a self reference. Caller must check. 544 return Found->second; 545 } 546 // Only a BDV available 547 return Def; 548 } 549 550 /// Given the result of a call to findBaseDefiningValue, or findBaseOrBDV, 551 /// is it known to be a base pointer? Or do we need to continue searching. 552 static bool isKnownBaseResult(Value *V) { 553 if (!isa<PHINode>(V) && !isa<SelectInst>(V) && !isa<ExtractElementInst>(V)) { 554 // no recursion possible 555 return true; 556 } 557 if (isa<Instruction>(V) && 558 cast<Instruction>(V)->getMetadata("is_base_value")) { 559 // This is a previously inserted base phi or select. We know 560 // that this is a base value. 561 return true; 562 } 563 564 // We need to keep searching 565 return false; 566 } 567 568 namespace { 569 /// Models the state of a single base defining value in the findBasePointer 570 /// algorithm for determining where a new instruction is needed to propagate 571 /// the base of this BDV. 572 class BDVState { 573 public: 574 enum Status { Unknown, Base, Conflict }; 575 576 BDVState(Status s, Value *b = nullptr) : status(s), base(b) { 577 assert(status != Base || b); 578 } 579 explicit BDVState(Value *b) : status(Base), base(b) {} 580 BDVState() : status(Unknown), base(nullptr) {} 581 582 Status getStatus() const { return status; } 583 Value *getBase() const { return base; } 584 585 bool isBase() const { return getStatus() == Base; } 586 bool isUnknown() const { return getStatus() == Unknown; } 587 bool isConflict() const { return getStatus() == Conflict; } 588 589 bool operator==(const BDVState &other) const { 590 return base == other.base && status == other.status; 591 } 592 593 bool operator!=(const BDVState &other) const { return !(*this == other); } 594 595 LLVM_DUMP_METHOD 596 void dump() const { print(dbgs()); dbgs() << '\n'; } 597 598 void print(raw_ostream &OS) const { 599 switch (status) { 600 case Unknown: 601 OS << "U"; 602 break; 603 case Base: 604 OS << "B"; 605 break; 606 case Conflict: 607 OS << "C"; 608 break; 609 }; 610 OS << " (" << base << " - " 611 << (base ? base->getName() : "nullptr") << "): "; 612 } 613 614 private: 615 Status status; 616 Value *base; // non null only if status == base 617 }; 618 619 inline raw_ostream &operator<<(raw_ostream &OS, const BDVState &State) { 620 State.print(OS); 621 return OS; 622 } 623 624 625 typedef DenseMap<Value *, BDVState> ConflictStateMapTy; 626 // Values of type BDVState form a lattice, and this is a helper 627 // class that implementes the meet operation. The meat of the meet 628 // operation is implemented in MeetBDVStates::pureMeet 629 class MeetBDVStates { 630 public: 631 /// Initializes the currentResult to the TOP state so that if can be met with 632 /// any other state to produce that state. 633 MeetBDVStates() {} 634 635 // Destructively meet the current result with the given BDVState 636 void meetWith(BDVState otherState) { 637 currentResult = meet(otherState, currentResult); 638 } 639 640 BDVState getResult() const { return currentResult; } 641 642 private: 643 BDVState currentResult; 644 645 /// Perform a meet operation on two elements of the BDVState lattice. 646 static BDVState meet(BDVState LHS, BDVState RHS) { 647 assert((pureMeet(LHS, RHS) == pureMeet(RHS, LHS)) && 648 "math is wrong: meet does not commute!"); 649 BDVState Result = pureMeet(LHS, RHS); 650 DEBUG(dbgs() << "meet of " << LHS << " with " << RHS 651 << " produced " << Result << "\n"); 652 return Result; 653 } 654 655 static BDVState pureMeet(const BDVState &stateA, const BDVState &stateB) { 656 switch (stateA.getStatus()) { 657 case BDVState::Unknown: 658 return stateB; 659 660 case BDVState::Base: 661 assert(stateA.getBase() && "can't be null"); 662 if (stateB.isUnknown()) 663 return stateA; 664 665 if (stateB.isBase()) { 666 if (stateA.getBase() == stateB.getBase()) { 667 assert(stateA == stateB && "equality broken!"); 668 return stateA; 669 } 670 return BDVState(BDVState::Conflict); 671 } 672 assert(stateB.isConflict() && "only three states!"); 673 return BDVState(BDVState::Conflict); 674 675 case BDVState::Conflict: 676 return stateA; 677 } 678 llvm_unreachable("only three states!"); 679 } 680 }; 681 } 682 /// For a given value or instruction, figure out what base ptr it's derived 683 /// from. For gc objects, this is simply itself. On success, returns a value 684 /// which is the base pointer. (This is reliable and can be used for 685 /// relocation.) On failure, returns nullptr. 686 static Value *findBasePointer(Value *I, DefiningValueMapTy &cache) { 687 Value *def = findBaseOrBDV(I, cache); 688 689 if (isKnownBaseResult(def)) { 690 return def; 691 } 692 693 // Here's the rough algorithm: 694 // - For every SSA value, construct a mapping to either an actual base 695 // pointer or a PHI which obscures the base pointer. 696 // - Construct a mapping from PHI to unknown TOP state. Use an 697 // optimistic algorithm to propagate base pointer information. Lattice 698 // looks like: 699 // UNKNOWN 700 // b1 b2 b3 b4 701 // CONFLICT 702 // When algorithm terminates, all PHIs will either have a single concrete 703 // base or be in a conflict state. 704 // - For every conflict, insert a dummy PHI node without arguments. Add 705 // these to the base[Instruction] = BasePtr mapping. For every 706 // non-conflict, add the actual base. 707 // - For every conflict, add arguments for the base[a] of each input 708 // arguments. 709 // 710 // Note: A simpler form of this would be to add the conflict form of all 711 // PHIs without running the optimistic algorithm. This would be 712 // analogous to pessimistic data flow and would likely lead to an 713 // overall worse solution. 714 715 #ifndef NDEBUG 716 auto isExpectedBDVType = [](Value *BDV) { 717 return isa<PHINode>(BDV) || isa<SelectInst>(BDV) || isa<ExtractElementInst>(BDV); 718 }; 719 #endif 720 721 // Once populated, will contain a mapping from each potentially non-base BDV 722 // to a lattice value (described above) which corresponds to that BDV. 723 ConflictStateMapTy states; 724 // Recursively fill in all phis & selects reachable from the initial one 725 // for which we don't already know a definite base value for 726 /* scope */ { 727 DenseSet<Value *> Visited; 728 SmallVector<Value*, 16> Worklist; 729 Worklist.push_back(def); 730 Visited.insert(def); 731 while (!Worklist.empty()) { 732 Value *Current = Worklist.pop_back_val(); 733 assert(!isKnownBaseResult(Current) && "why did it get added?"); 734 735 auto visitIncomingValue = [&](Value *InVal) { 736 Value *Base = findBaseOrBDV(InVal, cache); 737 if (isKnownBaseResult(Base)) 738 // Known bases won't need new instructions introduced and can be 739 // ignored safely 740 return; 741 assert(isExpectedBDVType(Base) && "the only non-base values " 742 "we see should be base defining values"); 743 if (Visited.insert(Base).second) 744 Worklist.push_back(Base); 745 }; 746 if (PHINode *Phi = dyn_cast<PHINode>(Current)) { 747 for (Value *InVal : Phi->incoming_values()) 748 visitIncomingValue(InVal); 749 } else if (SelectInst *Sel = dyn_cast<SelectInst>(Current)) { 750 visitIncomingValue(Sel->getTrueValue()); 751 visitIncomingValue(Sel->getFalseValue()); 752 } else if (auto *EE = dyn_cast<ExtractElementInst>(Current)) { 753 visitIncomingValue(EE->getVectorOperand()); 754 } else { 755 // There are two classes of instructions we know we don't handle. 756 assert(isa<ShuffleVectorInst>(Current) || 757 isa<InsertElementInst>(Current)); 758 llvm_unreachable("unimplemented instruction case"); 759 } 760 } 761 // The frontier of visited instructions are the ones we might need to 762 // duplicate, so fill in the starting state for the optimistic algorithm 763 // that follows. 764 for (Value *BDV : Visited) { 765 states[BDV] = BDVState(); 766 } 767 } 768 769 #ifndef NDEBUG 770 DEBUG(dbgs() << "States after initialization:\n"); 771 for (auto Pair : states) { 772 DEBUG(dbgs() << " " << Pair.second << " for " << *Pair.first << "\n"); 773 } 774 #endif 775 776 // TODO: come back and revisit the state transitions around inputs which 777 // have reached conflict state. The current version seems too conservative. 778 779 // Return a phi state for a base defining value. We'll generate a new 780 // base state for known bases and expect to find a cached state otherwise. 781 auto getStateForBDV = [&](Value *baseValue) { 782 if (isKnownBaseResult(baseValue)) 783 return BDVState(baseValue); 784 auto I = states.find(baseValue); 785 assert(I != states.end() && "lookup failed!"); 786 return I->second; 787 }; 788 789 bool progress = true; 790 while (progress) { 791 #ifndef NDEBUG 792 size_t oldSize = states.size(); 793 #endif 794 progress = false; 795 // We're only changing keys in this loop, thus safe to keep iterators 796 for (auto Pair : states) { 797 Value *v = Pair.first; 798 assert(!isKnownBaseResult(v) && "why did it get added?"); 799 800 // Given an input value for the current instruction, return a BDVState 801 // instance which represents the BDV of that value. 802 auto getStateForInput = [&](Value *V) mutable { 803 Value *BDV = findBaseOrBDV(V, cache); 804 return getStateForBDV(BDV); 805 }; 806 807 MeetBDVStates calculateMeet; 808 if (SelectInst *select = dyn_cast<SelectInst>(v)) { 809 calculateMeet.meetWith(getStateForInput(select->getTrueValue())); 810 calculateMeet.meetWith(getStateForInput(select->getFalseValue())); 811 } else if (PHINode *Phi = dyn_cast<PHINode>(v)) { 812 for (Value *Val : Phi->incoming_values()) 813 calculateMeet.meetWith(getStateForInput(Val)); 814 } else { 815 // The 'meet' for an extractelement is slightly trivial, but it's still 816 // useful in that it drives us to conflict if our input is. 817 auto *EE = cast<ExtractElementInst>(v); 818 calculateMeet.meetWith(getStateForInput(EE->getVectorOperand())); 819 } 820 821 822 BDVState oldState = states[v]; 823 BDVState newState = calculateMeet.getResult(); 824 if (oldState != newState) { 825 progress = true; 826 states[v] = newState; 827 } 828 } 829 830 assert(oldSize <= states.size()); 831 assert(oldSize == states.size() || progress); 832 } 833 834 #ifndef NDEBUG 835 DEBUG(dbgs() << "States after meet iteration:\n"); 836 for (auto Pair : states) { 837 DEBUG(dbgs() << " " << Pair.second << " for " << *Pair.first << "\n"); 838 } 839 #endif 840 841 // Insert Phis for all conflicts 842 // We want to keep naming deterministic in the loop that follows, so 843 // sort the keys before iteration. This is useful in allowing us to 844 // write stable tests. Note that there is no invalidation issue here. 845 SmallVector<Value *, 16> Keys; 846 Keys.reserve(states.size()); 847 for (auto Pair : states) { 848 Value *V = Pair.first; 849 Keys.push_back(V); 850 } 851 std::sort(Keys.begin(), Keys.end(), order_by_name); 852 // TODO: adjust naming patterns to avoid this order of iteration dependency 853 for (Value *V : Keys) { 854 Instruction *I = cast<Instruction>(V); 855 BDVState State = states[I]; 856 assert(!isKnownBaseResult(I) && "why did it get added?"); 857 assert(!State.isUnknown() && "Optimistic algorithm didn't complete!"); 858 859 // extractelement instructions are a bit special in that we may need to 860 // insert an extract even when we know an exact base for the instruction. 861 // The problem is that we need to convert from a vector base to a scalar 862 // base for the particular indice we're interested in. 863 if (State.isBase() && isa<ExtractElementInst>(I) && 864 isa<VectorType>(State.getBase()->getType())) { 865 auto *EE = cast<ExtractElementInst>(I); 866 // TODO: In many cases, the new instruction is just EE itself. We should 867 // exploit this, but can't do it here since it would break the invariant 868 // about the BDV not being known to be a base. 869 auto *BaseInst = ExtractElementInst::Create(State.getBase(), 870 EE->getIndexOperand(), 871 "base_ee", EE); 872 BaseInst->setMetadata("is_base_value", MDNode::get(I->getContext(), {})); 873 states[I] = BDVState(BDVState::Base, BaseInst); 874 } 875 876 if (!State.isConflict()) 877 continue; 878 879 /// Create and insert a new instruction which will represent the base of 880 /// the given instruction 'I'. 881 auto MakeBaseInstPlaceholder = [](Instruction *I) -> Instruction* { 882 if (isa<PHINode>(I)) { 883 BasicBlock *BB = I->getParent(); 884 int NumPreds = std::distance(pred_begin(BB), pred_end(BB)); 885 assert(NumPreds > 0 && "how did we reach here"); 886 std::string Name = I->hasName() ? 887 (I->getName() + ".base").str() : "base_phi"; 888 return PHINode::Create(I->getType(), NumPreds, Name, I); 889 } else if (SelectInst *Sel = dyn_cast<SelectInst>(I)) { 890 // The undef will be replaced later 891 UndefValue *Undef = UndefValue::get(Sel->getType()); 892 std::string Name = I->hasName() ? 893 (I->getName() + ".base").str() : "base_select"; 894 return SelectInst::Create(Sel->getCondition(), Undef, 895 Undef, Name, Sel); 896 } else { 897 auto *EE = cast<ExtractElementInst>(I); 898 UndefValue *Undef = UndefValue::get(EE->getVectorOperand()->getType()); 899 std::string Name = I->hasName() ? 900 (I->getName() + ".base").str() : "base_ee"; 901 return ExtractElementInst::Create(Undef, EE->getIndexOperand(), Name, 902 EE); 903 } 904 }; 905 Instruction *BaseInst = MakeBaseInstPlaceholder(I); 906 // Add metadata marking this as a base value 907 BaseInst->setMetadata("is_base_value", MDNode::get(I->getContext(), {})); 908 states[I] = BDVState(BDVState::Conflict, BaseInst); 909 } 910 911 // Fixup all the inputs of the new PHIs 912 for (auto Pair : states) { 913 Instruction *v = cast<Instruction>(Pair.first); 914 BDVState state = Pair.second; 915 916 assert(!isKnownBaseResult(v) && "why did it get added?"); 917 assert(!state.isUnknown() && "Optimistic algorithm didn't complete!"); 918 if (!state.isConflict()) 919 continue; 920 921 if (PHINode *basephi = dyn_cast<PHINode>(state.getBase())) { 922 PHINode *phi = cast<PHINode>(v); 923 unsigned NumPHIValues = phi->getNumIncomingValues(); 924 for (unsigned i = 0; i < NumPHIValues; i++) { 925 Value *InVal = phi->getIncomingValue(i); 926 BasicBlock *InBB = phi->getIncomingBlock(i); 927 928 // If we've already seen InBB, add the same incoming value 929 // we added for it earlier. The IR verifier requires phi 930 // nodes with multiple entries from the same basic block 931 // to have the same incoming value for each of those 932 // entries. If we don't do this check here and basephi 933 // has a different type than base, we'll end up adding two 934 // bitcasts (and hence two distinct values) as incoming 935 // values for the same basic block. 936 937 int blockIndex = basephi->getBasicBlockIndex(InBB); 938 if (blockIndex != -1) { 939 Value *oldBase = basephi->getIncomingValue(blockIndex); 940 basephi->addIncoming(oldBase, InBB); 941 #ifndef NDEBUG 942 Value *base = findBaseOrBDV(InVal, cache); 943 if (!isKnownBaseResult(base)) { 944 // Either conflict or base. 945 assert(states.count(base)); 946 base = states[base].getBase(); 947 assert(base != nullptr && "unknown BDVState!"); 948 } 949 950 // In essence this assert states: the only way two 951 // values incoming from the same basic block may be 952 // different is by being different bitcasts of the same 953 // value. A cleanup that remains TODO is changing 954 // findBaseOrBDV to return an llvm::Value of the correct 955 // type (and still remain pure). This will remove the 956 // need to add bitcasts. 957 assert(base->stripPointerCasts() == oldBase->stripPointerCasts() && 958 "sanity -- findBaseOrBDV should be pure!"); 959 #endif 960 continue; 961 } 962 963 // Find either the defining value for the PHI or the normal base for 964 // a non-phi node 965 Value *base = findBaseOrBDV(InVal, cache); 966 if (!isKnownBaseResult(base)) { 967 // Either conflict or base. 968 assert(states.count(base)); 969 base = states[base].getBase(); 970 assert(base != nullptr && "unknown BDVState!"); 971 } 972 assert(base && "can't be null"); 973 // Must use original input BB since base may not be Instruction 974 // The cast is needed since base traversal may strip away bitcasts 975 if (base->getType() != basephi->getType()) { 976 base = new BitCastInst(base, basephi->getType(), "cast", 977 InBB->getTerminator()); 978 } 979 basephi->addIncoming(base, InBB); 980 } 981 assert(basephi->getNumIncomingValues() == NumPHIValues); 982 } else if (SelectInst *basesel = dyn_cast<SelectInst>(state.getBase())) { 983 SelectInst *sel = cast<SelectInst>(v); 984 // Operand 1 & 2 are true, false path respectively. TODO: refactor to 985 // something more safe and less hacky. 986 for (int i = 1; i <= 2; i++) { 987 Value *InVal = sel->getOperand(i); 988 // Find either the defining value for the PHI or the normal base for 989 // a non-phi node 990 Value *base = findBaseOrBDV(InVal, cache); 991 if (!isKnownBaseResult(base)) { 992 // Either conflict or base. 993 assert(states.count(base)); 994 base = states[base].getBase(); 995 assert(base != nullptr && "unknown BDVState!"); 996 } 997 assert(base && "can't be null"); 998 // Must use original input BB since base may not be Instruction 999 // The cast is needed since base traversal may strip away bitcasts 1000 if (base->getType() != basesel->getType()) { 1001 base = new BitCastInst(base, basesel->getType(), "cast", basesel); 1002 } 1003 basesel->setOperand(i, base); 1004 } 1005 } else { 1006 auto *BaseEE = cast<ExtractElementInst>(state.getBase()); 1007 Value *InVal = cast<ExtractElementInst>(v)->getVectorOperand(); 1008 Value *Base = findBaseOrBDV(InVal, cache); 1009 if (!isKnownBaseResult(Base)) { 1010 // Either conflict or base. 1011 assert(states.count(Base)); 1012 Base = states[Base].getBase(); 1013 assert(Base != nullptr && "unknown BDVState!"); 1014 } 1015 assert(Base && "can't be null"); 1016 BaseEE->setOperand(0, Base); 1017 } 1018 } 1019 1020 // Now that we're done with the algorithm, see if we can optimize the 1021 // results slightly by reducing the number of new instructions needed. 1022 // Arguably, this should be integrated into the algorithm above, but 1023 // doing as a post process step is easier to reason about for the moment. 1024 DenseMap<Value *, Value *> ReverseMap; 1025 SmallPtrSet<Instruction *, 16> NewInsts; 1026 SmallSetVector<Instruction *, 16> Worklist; 1027 for (auto Item : states) { 1028 Value *V = Item.first; 1029 Value *Base = Item.second.getBase(); 1030 assert(V && Base); 1031 assert(!isKnownBaseResult(V) && "why did it get added?"); 1032 assert(isKnownBaseResult(Base) && 1033 "must be something we 'know' is a base pointer"); 1034 if (!Item.second.isConflict()) 1035 continue; 1036 1037 ReverseMap[Base] = V; 1038 if (auto *BaseI = dyn_cast<Instruction>(Base)) { 1039 NewInsts.insert(BaseI); 1040 Worklist.insert(BaseI); 1041 } 1042 } 1043 auto PushNewUsers = [&](Instruction *I) { 1044 for (User *U : I->users()) 1045 if (auto *UI = dyn_cast<Instruction>(U)) 1046 if (NewInsts.count(UI)) 1047 Worklist.insert(UI); 1048 }; 1049 const DataLayout &DL = cast<Instruction>(def)->getModule()->getDataLayout(); 1050 while (!Worklist.empty()) { 1051 Instruction *BaseI = Worklist.pop_back_val(); 1052 assert(NewInsts.count(BaseI)); 1053 Value *Bdv = ReverseMap[BaseI]; 1054 if (auto *BdvI = dyn_cast<Instruction>(Bdv)) 1055 if (BaseI->isIdenticalTo(BdvI)) { 1056 DEBUG(dbgs() << "Identical Base: " << *BaseI << "\n"); 1057 PushNewUsers(BaseI); 1058 BaseI->replaceAllUsesWith(Bdv); 1059 BaseI->eraseFromParent(); 1060 states[Bdv] = BDVState(BDVState::Conflict, Bdv); 1061 NewInsts.erase(BaseI); 1062 ReverseMap.erase(BaseI); 1063 continue; 1064 } 1065 if (Value *V = SimplifyInstruction(BaseI, DL)) { 1066 DEBUG(dbgs() << "Base " << *BaseI << " simplified to " << *V << "\n"); 1067 PushNewUsers(BaseI); 1068 BaseI->replaceAllUsesWith(V); 1069 BaseI->eraseFromParent(); 1070 states[Bdv] = BDVState(BDVState::Conflict, V); 1071 NewInsts.erase(BaseI); 1072 ReverseMap.erase(BaseI); 1073 continue; 1074 } 1075 } 1076 1077 // Cache all of our results so we can cheaply reuse them 1078 // NOTE: This is actually two caches: one of the base defining value 1079 // relation and one of the base pointer relation! FIXME 1080 for (auto item : states) { 1081 Value *v = item.first; 1082 Value *base = item.second.getBase(); 1083 assert(v && base); 1084 1085 std::string fromstr = 1086 cache.count(v) ? (cache[v]->hasName() ? cache[v]->getName() : "") 1087 : "none"; 1088 DEBUG(dbgs() << "Updating base value cache" 1089 << " for: " << (v->hasName() ? v->getName() : "") 1090 << " from: " << fromstr 1091 << " to: " << (base->hasName() ? base->getName() : "") << "\n"); 1092 1093 if (cache.count(v)) { 1094 // Once we transition from the BDV relation being store in the cache to 1095 // the base relation being stored, it must be stable 1096 assert((!isKnownBaseResult(cache[v]) || cache[v] == base) && 1097 "base relation should be stable"); 1098 } 1099 cache[v] = base; 1100 } 1101 assert(cache.find(def) != cache.end()); 1102 return cache[def]; 1103 } 1104 1105 // For a set of live pointers (base and/or derived), identify the base 1106 // pointer of the object which they are derived from. This routine will 1107 // mutate the IR graph as needed to make the 'base' pointer live at the 1108 // definition site of 'derived'. This ensures that any use of 'derived' can 1109 // also use 'base'. This may involve the insertion of a number of 1110 // additional PHI nodes. 1111 // 1112 // preconditions: live is a set of pointer type Values 1113 // 1114 // side effects: may insert PHI nodes into the existing CFG, will preserve 1115 // CFG, will not remove or mutate any existing nodes 1116 // 1117 // post condition: PointerToBase contains one (derived, base) pair for every 1118 // pointer in live. Note that derived can be equal to base if the original 1119 // pointer was a base pointer. 1120 static void 1121 findBasePointers(const StatepointLiveSetTy &live, 1122 DenseMap<llvm::Value *, llvm::Value *> &PointerToBase, 1123 DominatorTree *DT, DefiningValueMapTy &DVCache) { 1124 // For the naming of values inserted to be deterministic - which makes for 1125 // much cleaner and more stable tests - we need to assign an order to the 1126 // live values. DenseSets do not provide a deterministic order across runs. 1127 SmallVector<Value *, 64> Temp; 1128 Temp.insert(Temp.end(), live.begin(), live.end()); 1129 std::sort(Temp.begin(), Temp.end(), order_by_name); 1130 for (Value *ptr : Temp) { 1131 Value *base = findBasePointer(ptr, DVCache); 1132 assert(base && "failed to find base pointer"); 1133 PointerToBase[ptr] = base; 1134 assert((!isa<Instruction>(base) || !isa<Instruction>(ptr) || 1135 DT->dominates(cast<Instruction>(base)->getParent(), 1136 cast<Instruction>(ptr)->getParent())) && 1137 "The base we found better dominate the derived pointer"); 1138 1139 // If you see this trip and like to live really dangerously, the code should 1140 // be correct, just with idioms the verifier can't handle. You can try 1141 // disabling the verifier at your own substantial risk. 1142 assert(!isa<ConstantPointerNull>(base) && 1143 "the relocation code needs adjustment to handle the relocation of " 1144 "a null pointer constant without causing false positives in the " 1145 "safepoint ir verifier."); 1146 } 1147 } 1148 1149 /// Find the required based pointers (and adjust the live set) for the given 1150 /// parse point. 1151 static void findBasePointers(DominatorTree &DT, DefiningValueMapTy &DVCache, 1152 const CallSite &CS, 1153 PartiallyConstructedSafepointRecord &result) { 1154 DenseMap<llvm::Value *, llvm::Value *> PointerToBase; 1155 findBasePointers(result.liveset, PointerToBase, &DT, DVCache); 1156 1157 if (PrintBasePointers) { 1158 // Note: Need to print these in a stable order since this is checked in 1159 // some tests. 1160 errs() << "Base Pairs (w/o Relocation):\n"; 1161 SmallVector<Value *, 64> Temp; 1162 Temp.reserve(PointerToBase.size()); 1163 for (auto Pair : PointerToBase) { 1164 Temp.push_back(Pair.first); 1165 } 1166 std::sort(Temp.begin(), Temp.end(), order_by_name); 1167 for (Value *Ptr : Temp) { 1168 Value *Base = PointerToBase[Ptr]; 1169 errs() << " derived %" << Ptr->getName() << " base %" << Base->getName() 1170 << "\n"; 1171 } 1172 } 1173 1174 result.PointerToBase = PointerToBase; 1175 } 1176 1177 /// Given an updated version of the dataflow liveness results, update the 1178 /// liveset and base pointer maps for the call site CS. 1179 static void recomputeLiveInValues(GCPtrLivenessData &RevisedLivenessData, 1180 const CallSite &CS, 1181 PartiallyConstructedSafepointRecord &result); 1182 1183 static void recomputeLiveInValues( 1184 Function &F, DominatorTree &DT, Pass *P, ArrayRef<CallSite> toUpdate, 1185 MutableArrayRef<struct PartiallyConstructedSafepointRecord> records) { 1186 // TODO-PERF: reuse the original liveness, then simply run the dataflow 1187 // again. The old values are still live and will help it stabilize quickly. 1188 GCPtrLivenessData RevisedLivenessData; 1189 computeLiveInValues(DT, F, RevisedLivenessData); 1190 for (size_t i = 0; i < records.size(); i++) { 1191 struct PartiallyConstructedSafepointRecord &info = records[i]; 1192 const CallSite &CS = toUpdate[i]; 1193 recomputeLiveInValues(RevisedLivenessData, CS, info); 1194 } 1195 } 1196 1197 // When inserting gc.relocate calls, we need to ensure there are no uses 1198 // of the original value between the gc.statepoint and the gc.relocate call. 1199 // One case which can arise is a phi node starting one of the successor blocks. 1200 // We also need to be able to insert the gc.relocates only on the path which 1201 // goes through the statepoint. We might need to split an edge to make this 1202 // possible. 1203 static BasicBlock * 1204 normalizeForInvokeSafepoint(BasicBlock *BB, BasicBlock *InvokeParent, 1205 DominatorTree &DT) { 1206 BasicBlock *Ret = BB; 1207 if (!BB->getUniquePredecessor()) { 1208 Ret = SplitBlockPredecessors(BB, InvokeParent, "", &DT); 1209 } 1210 1211 // Now that 'ret' has unique predecessor we can safely remove all phi nodes 1212 // from it 1213 FoldSingleEntryPHINodes(Ret); 1214 assert(!isa<PHINode>(Ret->begin())); 1215 1216 // At this point, we can safely insert a gc.relocate as the first instruction 1217 // in Ret if needed. 1218 return Ret; 1219 } 1220 1221 static int find_index(ArrayRef<Value *> livevec, Value *val) { 1222 auto itr = std::find(livevec.begin(), livevec.end(), val); 1223 assert(livevec.end() != itr); 1224 size_t index = std::distance(livevec.begin(), itr); 1225 assert(index < livevec.size()); 1226 return index; 1227 } 1228 1229 // Create new attribute set containing only attributes which can be transferred 1230 // from original call to the safepoint. 1231 static AttributeSet legalizeCallAttributes(AttributeSet AS) { 1232 AttributeSet ret; 1233 1234 for (unsigned Slot = 0; Slot < AS.getNumSlots(); Slot++) { 1235 unsigned index = AS.getSlotIndex(Slot); 1236 1237 if (index == AttributeSet::ReturnIndex || 1238 index == AttributeSet::FunctionIndex) { 1239 1240 for (auto it = AS.begin(Slot), it_end = AS.end(Slot); it != it_end; 1241 ++it) { 1242 Attribute attr = *it; 1243 1244 // Do not allow certain attributes - just skip them 1245 // Safepoint can not be read only or read none. 1246 if (attr.hasAttribute(Attribute::ReadNone) || 1247 attr.hasAttribute(Attribute::ReadOnly)) 1248 continue; 1249 1250 ret = ret.addAttributes( 1251 AS.getContext(), index, 1252 AttributeSet::get(AS.getContext(), index, AttrBuilder(attr))); 1253 } 1254 } 1255 1256 // Just skip parameter attributes for now 1257 } 1258 1259 return ret; 1260 } 1261 1262 /// Helper function to place all gc relocates necessary for the given 1263 /// statepoint. 1264 /// Inputs: 1265 /// liveVariables - list of variables to be relocated. 1266 /// liveStart - index of the first live variable. 1267 /// basePtrs - base pointers. 1268 /// statepointToken - statepoint instruction to which relocates should be 1269 /// bound. 1270 /// Builder - Llvm IR builder to be used to construct new calls. 1271 static void CreateGCRelocates(ArrayRef<llvm::Value *> LiveVariables, 1272 const int LiveStart, 1273 ArrayRef<llvm::Value *> BasePtrs, 1274 Instruction *StatepointToken, 1275 IRBuilder<> Builder) { 1276 if (LiveVariables.empty()) 1277 return; 1278 1279 // All gc_relocate are set to i8 addrspace(1)* type. We originally generated 1280 // unique declarations for each pointer type, but this proved problematic 1281 // because the intrinsic mangling code is incomplete and fragile. Since 1282 // we're moving towards a single unified pointer type anyways, we can just 1283 // cast everything to an i8* of the right address space. A bitcast is added 1284 // later to convert gc_relocate to the actual value's type. 1285 Module *M = StatepointToken->getModule(); 1286 auto AS = cast<PointerType>(LiveVariables[0]->getType())->getAddressSpace(); 1287 Type *Types[] = {Type::getInt8PtrTy(M->getContext(), AS)}; 1288 Value *GCRelocateDecl = 1289 Intrinsic::getDeclaration(M, Intrinsic::experimental_gc_relocate, Types); 1290 1291 for (unsigned i = 0; i < LiveVariables.size(); i++) { 1292 // Generate the gc.relocate call and save the result 1293 Value *BaseIdx = 1294 Builder.getInt32(LiveStart + find_index(LiveVariables, BasePtrs[i])); 1295 Value *LiveIdx = 1296 Builder.getInt32(LiveStart + find_index(LiveVariables, LiveVariables[i])); 1297 1298 // only specify a debug name if we can give a useful one 1299 CallInst *Reloc = Builder.CreateCall( 1300 GCRelocateDecl, {StatepointToken, BaseIdx, LiveIdx}, 1301 LiveVariables[i]->hasName() ? LiveVariables[i]->getName() + ".relocated" 1302 : ""); 1303 // Trick CodeGen into thinking there are lots of free registers at this 1304 // fake call. 1305 Reloc->setCallingConv(CallingConv::Cold); 1306 } 1307 } 1308 1309 static void 1310 makeStatepointExplicitImpl(const CallSite &CS, /* to replace */ 1311 const SmallVectorImpl<llvm::Value *> &basePtrs, 1312 const SmallVectorImpl<llvm::Value *> &liveVariables, 1313 Pass *P, 1314 PartiallyConstructedSafepointRecord &result) { 1315 assert(basePtrs.size() == liveVariables.size()); 1316 assert(isStatepoint(CS) && 1317 "This method expects to be rewriting a statepoint"); 1318 1319 BasicBlock *BB = CS.getInstruction()->getParent(); 1320 assert(BB); 1321 Function *F = BB->getParent(); 1322 assert(F && "must be set"); 1323 Module *M = F->getParent(); 1324 (void)M; 1325 assert(M && "must be set"); 1326 1327 // We're not changing the function signature of the statepoint since the gc 1328 // arguments go into the var args section. 1329 Function *gc_statepoint_decl = CS.getCalledFunction(); 1330 1331 // Then go ahead and use the builder do actually do the inserts. We insert 1332 // immediately before the previous instruction under the assumption that all 1333 // arguments will be available here. We can't insert afterwards since we may 1334 // be replacing a terminator. 1335 Instruction *insertBefore = CS.getInstruction(); 1336 IRBuilder<> Builder(insertBefore); 1337 // Copy all of the arguments from the original statepoint - this includes the 1338 // target, call args, and deopt args 1339 SmallVector<llvm::Value *, 64> args; 1340 args.insert(args.end(), CS.arg_begin(), CS.arg_end()); 1341 // TODO: Clear the 'needs rewrite' flag 1342 1343 // add all the pointers to be relocated (gc arguments) 1344 // Capture the start of the live variable list for use in the gc_relocates 1345 const int live_start = args.size(); 1346 args.insert(args.end(), liveVariables.begin(), liveVariables.end()); 1347 1348 // Create the statepoint given all the arguments 1349 Instruction *token = nullptr; 1350 AttributeSet return_attributes; 1351 if (CS.isCall()) { 1352 CallInst *toReplace = cast<CallInst>(CS.getInstruction()); 1353 CallInst *call = 1354 Builder.CreateCall(gc_statepoint_decl, args, "safepoint_token"); 1355 call->setTailCall(toReplace->isTailCall()); 1356 call->setCallingConv(toReplace->getCallingConv()); 1357 1358 // Currently we will fail on parameter attributes and on certain 1359 // function attributes. 1360 AttributeSet new_attrs = legalizeCallAttributes(toReplace->getAttributes()); 1361 // In case if we can handle this set of attributes - set up function attrs 1362 // directly on statepoint and return attrs later for gc_result intrinsic. 1363 call->setAttributes(new_attrs.getFnAttributes()); 1364 return_attributes = new_attrs.getRetAttributes(); 1365 1366 token = call; 1367 1368 // Put the following gc_result and gc_relocate calls immediately after the 1369 // the old call (which we're about to delete) 1370 BasicBlock::iterator next(toReplace); 1371 assert(BB->end() != next && "not a terminator, must have next"); 1372 next++; 1373 Instruction *IP = &*(next); 1374 Builder.SetInsertPoint(IP); 1375 Builder.SetCurrentDebugLocation(IP->getDebugLoc()); 1376 1377 } else { 1378 InvokeInst *toReplace = cast<InvokeInst>(CS.getInstruction()); 1379 1380 // Insert the new invoke into the old block. We'll remove the old one in a 1381 // moment at which point this will become the new terminator for the 1382 // original block. 1383 InvokeInst *invoke = InvokeInst::Create( 1384 gc_statepoint_decl, toReplace->getNormalDest(), 1385 toReplace->getUnwindDest(), args, "statepoint_token", toReplace->getParent()); 1386 invoke->setCallingConv(toReplace->getCallingConv()); 1387 1388 // Currently we will fail on parameter attributes and on certain 1389 // function attributes. 1390 AttributeSet new_attrs = legalizeCallAttributes(toReplace->getAttributes()); 1391 // In case if we can handle this set of attributes - set up function attrs 1392 // directly on statepoint and return attrs later for gc_result intrinsic. 1393 invoke->setAttributes(new_attrs.getFnAttributes()); 1394 return_attributes = new_attrs.getRetAttributes(); 1395 1396 token = invoke; 1397 1398 // Generate gc relocates in exceptional path 1399 BasicBlock *unwindBlock = toReplace->getUnwindDest(); 1400 assert(!isa<PHINode>(unwindBlock->begin()) && 1401 unwindBlock->getUniquePredecessor() && 1402 "can't safely insert in this block!"); 1403 1404 Instruction *IP = &*(unwindBlock->getFirstInsertionPt()); 1405 Builder.SetInsertPoint(IP); 1406 Builder.SetCurrentDebugLocation(toReplace->getDebugLoc()); 1407 1408 // Extract second element from landingpad return value. We will attach 1409 // exceptional gc relocates to it. 1410 const unsigned idx = 1; 1411 Instruction *exceptional_token = 1412 cast<Instruction>(Builder.CreateExtractValue( 1413 unwindBlock->getLandingPadInst(), idx, "relocate_token")); 1414 result.UnwindToken = exceptional_token; 1415 1416 CreateGCRelocates(liveVariables, live_start, basePtrs, 1417 exceptional_token, Builder); 1418 1419 // Generate gc relocates and returns for normal block 1420 BasicBlock *normalDest = toReplace->getNormalDest(); 1421 assert(!isa<PHINode>(normalDest->begin()) && 1422 normalDest->getUniquePredecessor() && 1423 "can't safely insert in this block!"); 1424 1425 IP = &*(normalDest->getFirstInsertionPt()); 1426 Builder.SetInsertPoint(IP); 1427 1428 // gc relocates will be generated later as if it were regular call 1429 // statepoint 1430 } 1431 assert(token); 1432 1433 // Take the name of the original value call if it had one. 1434 token->takeName(CS.getInstruction()); 1435 1436 // The GCResult is already inserted, we just need to find it 1437 #ifndef NDEBUG 1438 Instruction *toReplace = CS.getInstruction(); 1439 assert((toReplace->hasNUses(0) || toReplace->hasNUses(1)) && 1440 "only valid use before rewrite is gc.result"); 1441 assert(!toReplace->hasOneUse() || 1442 isGCResult(cast<Instruction>(*toReplace->user_begin()))); 1443 #endif 1444 1445 // Update the gc.result of the original statepoint (if any) to use the newly 1446 // inserted statepoint. This is safe to do here since the token can't be 1447 // considered a live reference. 1448 CS.getInstruction()->replaceAllUsesWith(token); 1449 1450 result.StatepointToken = token; 1451 1452 // Second, create a gc.relocate for every live variable 1453 CreateGCRelocates(liveVariables, live_start, basePtrs, token, Builder); 1454 } 1455 1456 namespace { 1457 struct name_ordering { 1458 Value *base; 1459 Value *derived; 1460 bool operator()(name_ordering const &a, name_ordering const &b) { 1461 return -1 == a.derived->getName().compare(b.derived->getName()); 1462 } 1463 }; 1464 } 1465 static void stablize_order(SmallVectorImpl<Value *> &basevec, 1466 SmallVectorImpl<Value *> &livevec) { 1467 assert(basevec.size() == livevec.size()); 1468 1469 SmallVector<name_ordering, 64> temp; 1470 for (size_t i = 0; i < basevec.size(); i++) { 1471 name_ordering v; 1472 v.base = basevec[i]; 1473 v.derived = livevec[i]; 1474 temp.push_back(v); 1475 } 1476 std::sort(temp.begin(), temp.end(), name_ordering()); 1477 for (size_t i = 0; i < basevec.size(); i++) { 1478 basevec[i] = temp[i].base; 1479 livevec[i] = temp[i].derived; 1480 } 1481 } 1482 1483 // Replace an existing gc.statepoint with a new one and a set of gc.relocates 1484 // which make the relocations happening at this safepoint explicit. 1485 // 1486 // WARNING: Does not do any fixup to adjust users of the original live 1487 // values. That's the callers responsibility. 1488 static void 1489 makeStatepointExplicit(DominatorTree &DT, const CallSite &CS, Pass *P, 1490 PartiallyConstructedSafepointRecord &result) { 1491 auto liveset = result.liveset; 1492 auto PointerToBase = result.PointerToBase; 1493 1494 // Convert to vector for efficient cross referencing. 1495 SmallVector<Value *, 64> basevec, livevec; 1496 livevec.reserve(liveset.size()); 1497 basevec.reserve(liveset.size()); 1498 for (Value *L : liveset) { 1499 livevec.push_back(L); 1500 assert(PointerToBase.count(L)); 1501 Value *base = PointerToBase[L]; 1502 basevec.push_back(base); 1503 } 1504 assert(livevec.size() == basevec.size()); 1505 1506 // To make the output IR slightly more stable (for use in diffs), ensure a 1507 // fixed order of the values in the safepoint (by sorting the value name). 1508 // The order is otherwise meaningless. 1509 stablize_order(basevec, livevec); 1510 1511 // Do the actual rewriting and delete the old statepoint 1512 makeStatepointExplicitImpl(CS, basevec, livevec, P, result); 1513 CS.getInstruction()->eraseFromParent(); 1514 } 1515 1516 // Helper function for the relocationViaAlloca. 1517 // It receives iterator to the statepoint gc relocates and emits store to the 1518 // assigned 1519 // location (via allocaMap) for the each one of them. 1520 // Add visited values into the visitedLiveValues set we will later use them 1521 // for sanity check. 1522 static void 1523 insertRelocationStores(iterator_range<Value::user_iterator> GCRelocs, 1524 DenseMap<Value *, Value *> &AllocaMap, 1525 DenseSet<Value *> &VisitedLiveValues) { 1526 1527 for (User *U : GCRelocs) { 1528 if (!isa<IntrinsicInst>(U)) 1529 continue; 1530 1531 IntrinsicInst *RelocatedValue = cast<IntrinsicInst>(U); 1532 1533 // We only care about relocates 1534 if (RelocatedValue->getIntrinsicID() != 1535 Intrinsic::experimental_gc_relocate) { 1536 continue; 1537 } 1538 1539 GCRelocateOperands RelocateOperands(RelocatedValue); 1540 Value *OriginalValue = 1541 const_cast<Value *>(RelocateOperands.getDerivedPtr()); 1542 assert(AllocaMap.count(OriginalValue)); 1543 Value *Alloca = AllocaMap[OriginalValue]; 1544 1545 // Emit store into the related alloca 1546 // All gc_relocate are i8 addrspace(1)* typed, and it must be bitcasted to 1547 // the correct type according to alloca. 1548 assert(RelocatedValue->getNextNode() && "Should always have one since it's not a terminator"); 1549 IRBuilder<> Builder(RelocatedValue->getNextNode()); 1550 Value *CastedRelocatedValue = 1551 Builder.CreateBitCast(RelocatedValue, cast<AllocaInst>(Alloca)->getAllocatedType(), 1552 RelocatedValue->hasName() ? RelocatedValue->getName() + ".casted" : ""); 1553 1554 StoreInst *Store = new StoreInst(CastedRelocatedValue, Alloca); 1555 Store->insertAfter(cast<Instruction>(CastedRelocatedValue)); 1556 1557 #ifndef NDEBUG 1558 VisitedLiveValues.insert(OriginalValue); 1559 #endif 1560 } 1561 } 1562 1563 // Helper function for the "relocationViaAlloca". Similar to the 1564 // "insertRelocationStores" but works for rematerialized values. 1565 static void 1566 insertRematerializationStores( 1567 RematerializedValueMapTy RematerializedValues, 1568 DenseMap<Value *, Value *> &AllocaMap, 1569 DenseSet<Value *> &VisitedLiveValues) { 1570 1571 for (auto RematerializedValuePair: RematerializedValues) { 1572 Instruction *RematerializedValue = RematerializedValuePair.first; 1573 Value *OriginalValue = RematerializedValuePair.second; 1574 1575 assert(AllocaMap.count(OriginalValue) && 1576 "Can not find alloca for rematerialized value"); 1577 Value *Alloca = AllocaMap[OriginalValue]; 1578 1579 StoreInst *Store = new StoreInst(RematerializedValue, Alloca); 1580 Store->insertAfter(RematerializedValue); 1581 1582 #ifndef NDEBUG 1583 VisitedLiveValues.insert(OriginalValue); 1584 #endif 1585 } 1586 } 1587 1588 /// do all the relocation update via allocas and mem2reg 1589 static void relocationViaAlloca( 1590 Function &F, DominatorTree &DT, ArrayRef<Value *> Live, 1591 ArrayRef<struct PartiallyConstructedSafepointRecord> Records) { 1592 #ifndef NDEBUG 1593 // record initial number of (static) allocas; we'll check we have the same 1594 // number when we get done. 1595 int InitialAllocaNum = 0; 1596 for (auto I = F.getEntryBlock().begin(), E = F.getEntryBlock().end(); I != E; 1597 I++) 1598 if (isa<AllocaInst>(*I)) 1599 InitialAllocaNum++; 1600 #endif 1601 1602 // TODO-PERF: change data structures, reserve 1603 DenseMap<Value *, Value *> AllocaMap; 1604 SmallVector<AllocaInst *, 200> PromotableAllocas; 1605 // Used later to chack that we have enough allocas to store all values 1606 std::size_t NumRematerializedValues = 0; 1607 PromotableAllocas.reserve(Live.size()); 1608 1609 // Emit alloca for "LiveValue" and record it in "allocaMap" and 1610 // "PromotableAllocas" 1611 auto emitAllocaFor = [&](Value *LiveValue) { 1612 AllocaInst *Alloca = new AllocaInst(LiveValue->getType(), "", 1613 F.getEntryBlock().getFirstNonPHI()); 1614 AllocaMap[LiveValue] = Alloca; 1615 PromotableAllocas.push_back(Alloca); 1616 }; 1617 1618 // emit alloca for each live gc pointer 1619 for (unsigned i = 0; i < Live.size(); i++) { 1620 emitAllocaFor(Live[i]); 1621 } 1622 1623 // emit allocas for rematerialized values 1624 for (size_t i = 0; i < Records.size(); i++) { 1625 const struct PartiallyConstructedSafepointRecord &Info = Records[i]; 1626 1627 for (auto RematerializedValuePair : Info.RematerializedValues) { 1628 Value *OriginalValue = RematerializedValuePair.second; 1629 if (AllocaMap.count(OriginalValue) != 0) 1630 continue; 1631 1632 emitAllocaFor(OriginalValue); 1633 ++NumRematerializedValues; 1634 } 1635 } 1636 1637 // The next two loops are part of the same conceptual operation. We need to 1638 // insert a store to the alloca after the original def and at each 1639 // redefinition. We need to insert a load before each use. These are split 1640 // into distinct loops for performance reasons. 1641 1642 // update gc pointer after each statepoint 1643 // either store a relocated value or null (if no relocated value found for 1644 // this gc pointer and it is not a gc_result) 1645 // this must happen before we update the statepoint with load of alloca 1646 // otherwise we lose the link between statepoint and old def 1647 for (size_t i = 0; i < Records.size(); i++) { 1648 const struct PartiallyConstructedSafepointRecord &Info = Records[i]; 1649 Value *Statepoint = Info.StatepointToken; 1650 1651 // This will be used for consistency check 1652 DenseSet<Value *> VisitedLiveValues; 1653 1654 // Insert stores for normal statepoint gc relocates 1655 insertRelocationStores(Statepoint->users(), AllocaMap, VisitedLiveValues); 1656 1657 // In case if it was invoke statepoint 1658 // we will insert stores for exceptional path gc relocates. 1659 if (isa<InvokeInst>(Statepoint)) { 1660 insertRelocationStores(Info.UnwindToken->users(), AllocaMap, 1661 VisitedLiveValues); 1662 } 1663 1664 // Do similar thing with rematerialized values 1665 insertRematerializationStores(Info.RematerializedValues, AllocaMap, 1666 VisitedLiveValues); 1667 1668 if (ClobberNonLive) { 1669 // As a debugging aid, pretend that an unrelocated pointer becomes null at 1670 // the gc.statepoint. This will turn some subtle GC problems into 1671 // slightly easier to debug SEGVs. Note that on large IR files with 1672 // lots of gc.statepoints this is extremely costly both memory and time 1673 // wise. 1674 SmallVector<AllocaInst *, 64> ToClobber; 1675 for (auto Pair : AllocaMap) { 1676 Value *Def = Pair.first; 1677 AllocaInst *Alloca = cast<AllocaInst>(Pair.second); 1678 1679 // This value was relocated 1680 if (VisitedLiveValues.count(Def)) { 1681 continue; 1682 } 1683 ToClobber.push_back(Alloca); 1684 } 1685 1686 auto InsertClobbersAt = [&](Instruction *IP) { 1687 for (auto *AI : ToClobber) { 1688 auto AIType = cast<PointerType>(AI->getType()); 1689 auto PT = cast<PointerType>(AIType->getElementType()); 1690 Constant *CPN = ConstantPointerNull::get(PT); 1691 StoreInst *Store = new StoreInst(CPN, AI); 1692 Store->insertBefore(IP); 1693 } 1694 }; 1695 1696 // Insert the clobbering stores. These may get intermixed with the 1697 // gc.results and gc.relocates, but that's fine. 1698 if (auto II = dyn_cast<InvokeInst>(Statepoint)) { 1699 InsertClobbersAt(II->getNormalDest()->getFirstInsertionPt()); 1700 InsertClobbersAt(II->getUnwindDest()->getFirstInsertionPt()); 1701 } else { 1702 BasicBlock::iterator Next(cast<CallInst>(Statepoint)); 1703 Next++; 1704 InsertClobbersAt(Next); 1705 } 1706 } 1707 } 1708 // update use with load allocas and add store for gc_relocated 1709 for (auto Pair : AllocaMap) { 1710 Value *Def = Pair.first; 1711 Value *Alloca = Pair.second; 1712 1713 // we pre-record the uses of allocas so that we dont have to worry about 1714 // later update 1715 // that change the user information. 1716 SmallVector<Instruction *, 20> Uses; 1717 // PERF: trade a linear scan for repeated reallocation 1718 Uses.reserve(std::distance(Def->user_begin(), Def->user_end())); 1719 for (User *U : Def->users()) { 1720 if (!isa<ConstantExpr>(U)) { 1721 // If the def has a ConstantExpr use, then the def is either a 1722 // ConstantExpr use itself or null. In either case 1723 // (recursively in the first, directly in the second), the oop 1724 // it is ultimately dependent on is null and this particular 1725 // use does not need to be fixed up. 1726 Uses.push_back(cast<Instruction>(U)); 1727 } 1728 } 1729 1730 std::sort(Uses.begin(), Uses.end()); 1731 auto Last = std::unique(Uses.begin(), Uses.end()); 1732 Uses.erase(Last, Uses.end()); 1733 1734 for (Instruction *Use : Uses) { 1735 if (isa<PHINode>(Use)) { 1736 PHINode *Phi = cast<PHINode>(Use); 1737 for (unsigned i = 0; i < Phi->getNumIncomingValues(); i++) { 1738 if (Def == Phi->getIncomingValue(i)) { 1739 LoadInst *Load = new LoadInst( 1740 Alloca, "", Phi->getIncomingBlock(i)->getTerminator()); 1741 Phi->setIncomingValue(i, Load); 1742 } 1743 } 1744 } else { 1745 LoadInst *Load = new LoadInst(Alloca, "", Use); 1746 Use->replaceUsesOfWith(Def, Load); 1747 } 1748 } 1749 1750 // emit store for the initial gc value 1751 // store must be inserted after load, otherwise store will be in alloca's 1752 // use list and an extra load will be inserted before it 1753 StoreInst *Store = new StoreInst(Def, Alloca); 1754 if (Instruction *Inst = dyn_cast<Instruction>(Def)) { 1755 if (InvokeInst *Invoke = dyn_cast<InvokeInst>(Inst)) { 1756 // InvokeInst is a TerminatorInst so the store need to be inserted 1757 // into its normal destination block. 1758 BasicBlock *NormalDest = Invoke->getNormalDest(); 1759 Store->insertBefore(NormalDest->getFirstNonPHI()); 1760 } else { 1761 assert(!Inst->isTerminator() && 1762 "The only TerminatorInst that can produce a value is " 1763 "InvokeInst which is handled above."); 1764 Store->insertAfter(Inst); 1765 } 1766 } else { 1767 assert(isa<Argument>(Def)); 1768 Store->insertAfter(cast<Instruction>(Alloca)); 1769 } 1770 } 1771 1772 assert(PromotableAllocas.size() == Live.size() + NumRematerializedValues && 1773 "we must have the same allocas with lives"); 1774 if (!PromotableAllocas.empty()) { 1775 // apply mem2reg to promote alloca to SSA 1776 PromoteMemToReg(PromotableAllocas, DT); 1777 } 1778 1779 #ifndef NDEBUG 1780 for (auto I = F.getEntryBlock().begin(), E = F.getEntryBlock().end(); I != E; 1781 I++) 1782 if (isa<AllocaInst>(*I)) 1783 InitialAllocaNum--; 1784 assert(InitialAllocaNum == 0 && "We must not introduce any extra allocas"); 1785 #endif 1786 } 1787 1788 /// Implement a unique function which doesn't require we sort the input 1789 /// vector. Doing so has the effect of changing the output of a couple of 1790 /// tests in ways which make them less useful in testing fused safepoints. 1791 template <typename T> static void unique_unsorted(SmallVectorImpl<T> &Vec) { 1792 SmallSet<T, 8> Seen; 1793 Vec.erase(std::remove_if(Vec.begin(), Vec.end(), [&](const T &V) { 1794 return !Seen.insert(V).second; 1795 }), Vec.end()); 1796 } 1797 1798 /// Insert holders so that each Value is obviously live through the entire 1799 /// lifetime of the call. 1800 static void insertUseHolderAfter(CallSite &CS, const ArrayRef<Value *> Values, 1801 SmallVectorImpl<CallInst *> &Holders) { 1802 if (Values.empty()) 1803 // No values to hold live, might as well not insert the empty holder 1804 return; 1805 1806 Module *M = CS.getInstruction()->getParent()->getParent()->getParent(); 1807 // Use a dummy vararg function to actually hold the values live 1808 Function *Func = cast<Function>(M->getOrInsertFunction( 1809 "__tmp_use", FunctionType::get(Type::getVoidTy(M->getContext()), true))); 1810 if (CS.isCall()) { 1811 // For call safepoints insert dummy calls right after safepoint 1812 BasicBlock::iterator Next(CS.getInstruction()); 1813 Next++; 1814 Holders.push_back(CallInst::Create(Func, Values, "", Next)); 1815 return; 1816 } 1817 // For invoke safepooints insert dummy calls both in normal and 1818 // exceptional destination blocks 1819 auto *II = cast<InvokeInst>(CS.getInstruction()); 1820 Holders.push_back(CallInst::Create( 1821 Func, Values, "", II->getNormalDest()->getFirstInsertionPt())); 1822 Holders.push_back(CallInst::Create( 1823 Func, Values, "", II->getUnwindDest()->getFirstInsertionPt())); 1824 } 1825 1826 static void findLiveReferences( 1827 Function &F, DominatorTree &DT, Pass *P, ArrayRef<CallSite> toUpdate, 1828 MutableArrayRef<struct PartiallyConstructedSafepointRecord> records) { 1829 GCPtrLivenessData OriginalLivenessData; 1830 computeLiveInValues(DT, F, OriginalLivenessData); 1831 for (size_t i = 0; i < records.size(); i++) { 1832 struct PartiallyConstructedSafepointRecord &info = records[i]; 1833 const CallSite &CS = toUpdate[i]; 1834 analyzeParsePointLiveness(DT, OriginalLivenessData, CS, info); 1835 } 1836 } 1837 1838 /// Remove any vector of pointers from the liveset by scalarizing them over the 1839 /// statepoint instruction. Adds the scalarized pieces to the liveset. It 1840 /// would be preferable to include the vector in the statepoint itself, but 1841 /// the lowering code currently does not handle that. Extending it would be 1842 /// slightly non-trivial since it requires a format change. Given how rare 1843 /// such cases are (for the moment?) scalarizing is an acceptable compromise. 1844 static void splitVectorValues(Instruction *StatepointInst, 1845 StatepointLiveSetTy &LiveSet, 1846 DenseMap<Value *, Value *>& PointerToBase, 1847 DominatorTree &DT) { 1848 SmallVector<Value *, 16> ToSplit; 1849 for (Value *V : LiveSet) 1850 if (isa<VectorType>(V->getType())) 1851 ToSplit.push_back(V); 1852 1853 if (ToSplit.empty()) 1854 return; 1855 1856 DenseMap<Value *, SmallVector<Value *, 16>> ElementMapping; 1857 1858 Function &F = *(StatepointInst->getParent()->getParent()); 1859 1860 DenseMap<Value *, AllocaInst *> AllocaMap; 1861 // First is normal return, second is exceptional return (invoke only) 1862 DenseMap<Value *, std::pair<Value *, Value *>> Replacements; 1863 for (Value *V : ToSplit) { 1864 AllocaInst *Alloca = 1865 new AllocaInst(V->getType(), "", F.getEntryBlock().getFirstNonPHI()); 1866 AllocaMap[V] = Alloca; 1867 1868 VectorType *VT = cast<VectorType>(V->getType()); 1869 IRBuilder<> Builder(StatepointInst); 1870 SmallVector<Value *, 16> Elements; 1871 for (unsigned i = 0; i < VT->getNumElements(); i++) 1872 Elements.push_back(Builder.CreateExtractElement(V, Builder.getInt32(i))); 1873 ElementMapping[V] = Elements; 1874 1875 auto InsertVectorReform = [&](Instruction *IP) { 1876 Builder.SetInsertPoint(IP); 1877 Builder.SetCurrentDebugLocation(IP->getDebugLoc()); 1878 Value *ResultVec = UndefValue::get(VT); 1879 for (unsigned i = 0; i < VT->getNumElements(); i++) 1880 ResultVec = Builder.CreateInsertElement(ResultVec, Elements[i], 1881 Builder.getInt32(i)); 1882 return ResultVec; 1883 }; 1884 1885 if (isa<CallInst>(StatepointInst)) { 1886 BasicBlock::iterator Next(StatepointInst); 1887 Next++; 1888 Instruction *IP = &*(Next); 1889 Replacements[V].first = InsertVectorReform(IP); 1890 Replacements[V].second = nullptr; 1891 } else { 1892 InvokeInst *Invoke = cast<InvokeInst>(StatepointInst); 1893 // We've already normalized - check that we don't have shared destination 1894 // blocks 1895 BasicBlock *NormalDest = Invoke->getNormalDest(); 1896 assert(!isa<PHINode>(NormalDest->begin())); 1897 BasicBlock *UnwindDest = Invoke->getUnwindDest(); 1898 assert(!isa<PHINode>(UnwindDest->begin())); 1899 // Insert insert element sequences in both successors 1900 Instruction *IP = &*(NormalDest->getFirstInsertionPt()); 1901 Replacements[V].first = InsertVectorReform(IP); 1902 IP = &*(UnwindDest->getFirstInsertionPt()); 1903 Replacements[V].second = InsertVectorReform(IP); 1904 } 1905 } 1906 1907 for (Value *V : ToSplit) { 1908 AllocaInst *Alloca = AllocaMap[V]; 1909 1910 // Capture all users before we start mutating use lists 1911 SmallVector<Instruction *, 16> Users; 1912 for (User *U : V->users()) 1913 Users.push_back(cast<Instruction>(U)); 1914 1915 for (Instruction *I : Users) { 1916 if (auto Phi = dyn_cast<PHINode>(I)) { 1917 for (unsigned i = 0; i < Phi->getNumIncomingValues(); i++) 1918 if (V == Phi->getIncomingValue(i)) { 1919 LoadInst *Load = new LoadInst( 1920 Alloca, "", Phi->getIncomingBlock(i)->getTerminator()); 1921 Phi->setIncomingValue(i, Load); 1922 } 1923 } else { 1924 LoadInst *Load = new LoadInst(Alloca, "", I); 1925 I->replaceUsesOfWith(V, Load); 1926 } 1927 } 1928 1929 // Store the original value and the replacement value into the alloca 1930 StoreInst *Store = new StoreInst(V, Alloca); 1931 if (auto I = dyn_cast<Instruction>(V)) 1932 Store->insertAfter(I); 1933 else 1934 Store->insertAfter(Alloca); 1935 1936 // Normal return for invoke, or call return 1937 Instruction *Replacement = cast<Instruction>(Replacements[V].first); 1938 (new StoreInst(Replacement, Alloca))->insertAfter(Replacement); 1939 // Unwind return for invoke only 1940 Replacement = cast_or_null<Instruction>(Replacements[V].second); 1941 if (Replacement) 1942 (new StoreInst(Replacement, Alloca))->insertAfter(Replacement); 1943 } 1944 1945 // apply mem2reg to promote alloca to SSA 1946 SmallVector<AllocaInst *, 16> Allocas; 1947 for (Value *V : ToSplit) 1948 Allocas.push_back(AllocaMap[V]); 1949 PromoteMemToReg(Allocas, DT); 1950 1951 // Update our tracking of live pointers and base mappings to account for the 1952 // changes we just made. 1953 for (Value *V : ToSplit) { 1954 auto &Elements = ElementMapping[V]; 1955 1956 LiveSet.erase(V); 1957 LiveSet.insert(Elements.begin(), Elements.end()); 1958 // We need to update the base mapping as well. 1959 assert(PointerToBase.count(V)); 1960 Value *OldBase = PointerToBase[V]; 1961 auto &BaseElements = ElementMapping[OldBase]; 1962 PointerToBase.erase(V); 1963 assert(Elements.size() == BaseElements.size()); 1964 for (unsigned i = 0; i < Elements.size(); i++) { 1965 Value *Elem = Elements[i]; 1966 PointerToBase[Elem] = BaseElements[i]; 1967 } 1968 } 1969 } 1970 1971 // Helper function for the "rematerializeLiveValues". It walks use chain 1972 // starting from the "CurrentValue" until it meets "BaseValue". Only "simple" 1973 // values are visited (currently it is GEP's and casts). Returns true if it 1974 // successfully reached "BaseValue" and false otherwise. 1975 // Fills "ChainToBase" array with all visited values. "BaseValue" is not 1976 // recorded. 1977 static bool findRematerializableChainToBasePointer( 1978 SmallVectorImpl<Instruction*> &ChainToBase, 1979 Value *CurrentValue, Value *BaseValue) { 1980 1981 // We have found a base value 1982 if (CurrentValue == BaseValue) { 1983 return true; 1984 } 1985 1986 if (GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(CurrentValue)) { 1987 ChainToBase.push_back(GEP); 1988 return findRematerializableChainToBasePointer(ChainToBase, 1989 GEP->getPointerOperand(), 1990 BaseValue); 1991 } 1992 1993 if (CastInst *CI = dyn_cast<CastInst>(CurrentValue)) { 1994 Value *Def = CI->stripPointerCasts(); 1995 1996 // This two checks are basically similar. First one is here for the 1997 // consistency with findBasePointers logic. 1998 assert(!isa<CastInst>(Def) && "not a pointer cast found"); 1999 if (!CI->isNoopCast(CI->getModule()->getDataLayout())) 2000 return false; 2001 2002 ChainToBase.push_back(CI); 2003 return findRematerializableChainToBasePointer(ChainToBase, Def, BaseValue); 2004 } 2005 2006 // Not supported instruction in the chain 2007 return false; 2008 } 2009 2010 // Helper function for the "rematerializeLiveValues". Compute cost of the use 2011 // chain we are going to rematerialize. 2012 static unsigned 2013 chainToBasePointerCost(SmallVectorImpl<Instruction*> &Chain, 2014 TargetTransformInfo &TTI) { 2015 unsigned Cost = 0; 2016 2017 for (Instruction *Instr : Chain) { 2018 if (CastInst *CI = dyn_cast<CastInst>(Instr)) { 2019 assert(CI->isNoopCast(CI->getModule()->getDataLayout()) && 2020 "non noop cast is found during rematerialization"); 2021 2022 Type *SrcTy = CI->getOperand(0)->getType(); 2023 Cost += TTI.getCastInstrCost(CI->getOpcode(), CI->getType(), SrcTy); 2024 2025 } else if (GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(Instr)) { 2026 // Cost of the address calculation 2027 Type *ValTy = GEP->getPointerOperandType()->getPointerElementType(); 2028 Cost += TTI.getAddressComputationCost(ValTy); 2029 2030 // And cost of the GEP itself 2031 // TODO: Use TTI->getGEPCost here (it exists, but appears to be not 2032 // allowed for the external usage) 2033 if (!GEP->hasAllConstantIndices()) 2034 Cost += 2; 2035 2036 } else { 2037 llvm_unreachable("unsupported instruciton type during rematerialization"); 2038 } 2039 } 2040 2041 return Cost; 2042 } 2043 2044 // From the statepoint liveset pick values that are cheaper to recompute then to 2045 // relocate. Remove this values from the liveset, rematerialize them after 2046 // statepoint and record them in "Info" structure. Note that similar to 2047 // relocated values we don't do any user adjustments here. 2048 static void rematerializeLiveValues(CallSite CS, 2049 PartiallyConstructedSafepointRecord &Info, 2050 TargetTransformInfo &TTI) { 2051 const unsigned int ChainLengthThreshold = 10; 2052 2053 // Record values we are going to delete from this statepoint live set. 2054 // We can not di this in following loop due to iterator invalidation. 2055 SmallVector<Value *, 32> LiveValuesToBeDeleted; 2056 2057 for (Value *LiveValue: Info.liveset) { 2058 // For each live pointer find it's defining chain 2059 SmallVector<Instruction *, 3> ChainToBase; 2060 assert(Info.PointerToBase.count(LiveValue)); 2061 bool FoundChain = 2062 findRematerializableChainToBasePointer(ChainToBase, 2063 LiveValue, 2064 Info.PointerToBase[LiveValue]); 2065 // Nothing to do, or chain is too long 2066 if (!FoundChain || 2067 ChainToBase.size() == 0 || 2068 ChainToBase.size() > ChainLengthThreshold) 2069 continue; 2070 2071 // Compute cost of this chain 2072 unsigned Cost = chainToBasePointerCost(ChainToBase, TTI); 2073 // TODO: We can also account for cases when we will be able to remove some 2074 // of the rematerialized values by later optimization passes. I.e if 2075 // we rematerialized several intersecting chains. Or if original values 2076 // don't have any uses besides this statepoint. 2077 2078 // For invokes we need to rematerialize each chain twice - for normal and 2079 // for unwind basic blocks. Model this by multiplying cost by two. 2080 if (CS.isInvoke()) { 2081 Cost *= 2; 2082 } 2083 // If it's too expensive - skip it 2084 if (Cost >= RematerializationThreshold) 2085 continue; 2086 2087 // Remove value from the live set 2088 LiveValuesToBeDeleted.push_back(LiveValue); 2089 2090 // Clone instructions and record them inside "Info" structure 2091 2092 // Walk backwards to visit top-most instructions first 2093 std::reverse(ChainToBase.begin(), ChainToBase.end()); 2094 2095 // Utility function which clones all instructions from "ChainToBase" 2096 // and inserts them before "InsertBefore". Returns rematerialized value 2097 // which should be used after statepoint. 2098 auto rematerializeChain = [&ChainToBase](Instruction *InsertBefore) { 2099 Instruction *LastClonedValue = nullptr; 2100 Instruction *LastValue = nullptr; 2101 for (Instruction *Instr: ChainToBase) { 2102 // Only GEP's and casts are suported as we need to be careful to not 2103 // introduce any new uses of pointers not in the liveset. 2104 // Note that it's fine to introduce new uses of pointers which were 2105 // otherwise not used after this statepoint. 2106 assert(isa<GetElementPtrInst>(Instr) || isa<CastInst>(Instr)); 2107 2108 Instruction *ClonedValue = Instr->clone(); 2109 ClonedValue->insertBefore(InsertBefore); 2110 ClonedValue->setName(Instr->getName() + ".remat"); 2111 2112 // If it is not first instruction in the chain then it uses previously 2113 // cloned value. We should update it to use cloned value. 2114 if (LastClonedValue) { 2115 assert(LastValue); 2116 ClonedValue->replaceUsesOfWith(LastValue, LastClonedValue); 2117 #ifndef NDEBUG 2118 // Assert that cloned instruction does not use any instructions from 2119 // this chain other than LastClonedValue 2120 for (auto OpValue : ClonedValue->operand_values()) { 2121 assert(std::find(ChainToBase.begin(), ChainToBase.end(), OpValue) == 2122 ChainToBase.end() && 2123 "incorrect use in rematerialization chain"); 2124 } 2125 #endif 2126 } 2127 2128 LastClonedValue = ClonedValue; 2129 LastValue = Instr; 2130 } 2131 assert(LastClonedValue); 2132 return LastClonedValue; 2133 }; 2134 2135 // Different cases for calls and invokes. For invokes we need to clone 2136 // instructions both on normal and unwind path. 2137 if (CS.isCall()) { 2138 Instruction *InsertBefore = CS.getInstruction()->getNextNode(); 2139 assert(InsertBefore); 2140 Instruction *RematerializedValue = rematerializeChain(InsertBefore); 2141 Info.RematerializedValues[RematerializedValue] = LiveValue; 2142 } else { 2143 InvokeInst *Invoke = cast<InvokeInst>(CS.getInstruction()); 2144 2145 Instruction *NormalInsertBefore = 2146 Invoke->getNormalDest()->getFirstInsertionPt(); 2147 Instruction *UnwindInsertBefore = 2148 Invoke->getUnwindDest()->getFirstInsertionPt(); 2149 2150 Instruction *NormalRematerializedValue = 2151 rematerializeChain(NormalInsertBefore); 2152 Instruction *UnwindRematerializedValue = 2153 rematerializeChain(UnwindInsertBefore); 2154 2155 Info.RematerializedValues[NormalRematerializedValue] = LiveValue; 2156 Info.RematerializedValues[UnwindRematerializedValue] = LiveValue; 2157 } 2158 } 2159 2160 // Remove rematerializaed values from the live set 2161 for (auto LiveValue: LiveValuesToBeDeleted) { 2162 Info.liveset.erase(LiveValue); 2163 } 2164 } 2165 2166 static bool insertParsePoints(Function &F, DominatorTree &DT, Pass *P, 2167 SmallVectorImpl<CallSite> &toUpdate) { 2168 #ifndef NDEBUG 2169 // sanity check the input 2170 std::set<CallSite> uniqued; 2171 uniqued.insert(toUpdate.begin(), toUpdate.end()); 2172 assert(uniqued.size() == toUpdate.size() && "no duplicates please!"); 2173 2174 for (size_t i = 0; i < toUpdate.size(); i++) { 2175 CallSite &CS = toUpdate[i]; 2176 assert(CS.getInstruction()->getParent()->getParent() == &F); 2177 assert(isStatepoint(CS) && "expected to already be a deopt statepoint"); 2178 } 2179 #endif 2180 2181 // When inserting gc.relocates for invokes, we need to be able to insert at 2182 // the top of the successor blocks. See the comment on 2183 // normalForInvokeSafepoint on exactly what is needed. Note that this step 2184 // may restructure the CFG. 2185 for (CallSite CS : toUpdate) { 2186 if (!CS.isInvoke()) 2187 continue; 2188 InvokeInst *invoke = cast<InvokeInst>(CS.getInstruction()); 2189 normalizeForInvokeSafepoint(invoke->getNormalDest(), invoke->getParent(), 2190 DT); 2191 normalizeForInvokeSafepoint(invoke->getUnwindDest(), invoke->getParent(), 2192 DT); 2193 } 2194 2195 // A list of dummy calls added to the IR to keep various values obviously 2196 // live in the IR. We'll remove all of these when done. 2197 SmallVector<CallInst *, 64> holders; 2198 2199 // Insert a dummy call with all of the arguments to the vm_state we'll need 2200 // for the actual safepoint insertion. This ensures reference arguments in 2201 // the deopt argument list are considered live through the safepoint (and 2202 // thus makes sure they get relocated.) 2203 for (size_t i = 0; i < toUpdate.size(); i++) { 2204 CallSite &CS = toUpdate[i]; 2205 Statepoint StatepointCS(CS); 2206 2207 SmallVector<Value *, 64> DeoptValues; 2208 for (Use &U : StatepointCS.vm_state_args()) { 2209 Value *Arg = cast<Value>(&U); 2210 assert(!isUnhandledGCPointerType(Arg->getType()) && 2211 "support for FCA unimplemented"); 2212 if (isHandledGCPointerType(Arg->getType())) 2213 DeoptValues.push_back(Arg); 2214 } 2215 insertUseHolderAfter(CS, DeoptValues, holders); 2216 } 2217 2218 SmallVector<struct PartiallyConstructedSafepointRecord, 64> records; 2219 records.reserve(toUpdate.size()); 2220 for (size_t i = 0; i < toUpdate.size(); i++) { 2221 struct PartiallyConstructedSafepointRecord info; 2222 records.push_back(info); 2223 } 2224 assert(records.size() == toUpdate.size()); 2225 2226 // A) Identify all gc pointers which are statically live at the given call 2227 // site. 2228 findLiveReferences(F, DT, P, toUpdate, records); 2229 2230 // B) Find the base pointers for each live pointer 2231 /* scope for caching */ { 2232 // Cache the 'defining value' relation used in the computation and 2233 // insertion of base phis and selects. This ensures that we don't insert 2234 // large numbers of duplicate base_phis. 2235 DefiningValueMapTy DVCache; 2236 2237 for (size_t i = 0; i < records.size(); i++) { 2238 struct PartiallyConstructedSafepointRecord &info = records[i]; 2239 CallSite &CS = toUpdate[i]; 2240 findBasePointers(DT, DVCache, CS, info); 2241 } 2242 } // end of cache scope 2243 2244 // The base phi insertion logic (for any safepoint) may have inserted new 2245 // instructions which are now live at some safepoint. The simplest such 2246 // example is: 2247 // loop: 2248 // phi a <-- will be a new base_phi here 2249 // safepoint 1 <-- that needs to be live here 2250 // gep a + 1 2251 // safepoint 2 2252 // br loop 2253 // We insert some dummy calls after each safepoint to definitely hold live 2254 // the base pointers which were identified for that safepoint. We'll then 2255 // ask liveness for _every_ base inserted to see what is now live. Then we 2256 // remove the dummy calls. 2257 holders.reserve(holders.size() + records.size()); 2258 for (size_t i = 0; i < records.size(); i++) { 2259 struct PartiallyConstructedSafepointRecord &info = records[i]; 2260 CallSite &CS = toUpdate[i]; 2261 2262 SmallVector<Value *, 128> Bases; 2263 for (auto Pair : info.PointerToBase) { 2264 Bases.push_back(Pair.second); 2265 } 2266 insertUseHolderAfter(CS, Bases, holders); 2267 } 2268 2269 // By selecting base pointers, we've effectively inserted new uses. Thus, we 2270 // need to rerun liveness. We may *also* have inserted new defs, but that's 2271 // not the key issue. 2272 recomputeLiveInValues(F, DT, P, toUpdate, records); 2273 2274 if (PrintBasePointers) { 2275 for (size_t i = 0; i < records.size(); i++) { 2276 struct PartiallyConstructedSafepointRecord &info = records[i]; 2277 errs() << "Base Pairs: (w/Relocation)\n"; 2278 for (auto Pair : info.PointerToBase) { 2279 errs() << " derived %" << Pair.first->getName() << " base %" 2280 << Pair.second->getName() << "\n"; 2281 } 2282 } 2283 } 2284 for (size_t i = 0; i < holders.size(); i++) { 2285 holders[i]->eraseFromParent(); 2286 holders[i] = nullptr; 2287 } 2288 holders.clear(); 2289 2290 // Do a limited scalarization of any live at safepoint vector values which 2291 // contain pointers. This enables this pass to run after vectorization at 2292 // the cost of some possible performance loss. TODO: it would be nice to 2293 // natively support vectors all the way through the backend so we don't need 2294 // to scalarize here. 2295 for (size_t i = 0; i < records.size(); i++) { 2296 struct PartiallyConstructedSafepointRecord &info = records[i]; 2297 Instruction *statepoint = toUpdate[i].getInstruction(); 2298 splitVectorValues(cast<Instruction>(statepoint), info.liveset, 2299 info.PointerToBase, DT); 2300 } 2301 2302 // In order to reduce live set of statepoint we might choose to rematerialize 2303 // some values instead of relocating them. This is purely an optimization and 2304 // does not influence correctness. 2305 TargetTransformInfo &TTI = 2306 P->getAnalysis<TargetTransformInfoWrapperPass>().getTTI(F); 2307 2308 for (size_t i = 0; i < records.size(); i++) { 2309 struct PartiallyConstructedSafepointRecord &info = records[i]; 2310 CallSite &CS = toUpdate[i]; 2311 2312 rematerializeLiveValues(CS, info, TTI); 2313 } 2314 2315 // Now run through and replace the existing statepoints with new ones with 2316 // the live variables listed. We do not yet update uses of the values being 2317 // relocated. We have references to live variables that need to 2318 // survive to the last iteration of this loop. (By construction, the 2319 // previous statepoint can not be a live variable, thus we can and remove 2320 // the old statepoint calls as we go.) 2321 for (size_t i = 0; i < records.size(); i++) { 2322 struct PartiallyConstructedSafepointRecord &info = records[i]; 2323 CallSite &CS = toUpdate[i]; 2324 makeStatepointExplicit(DT, CS, P, info); 2325 } 2326 toUpdate.clear(); // prevent accident use of invalid CallSites 2327 2328 // Do all the fixups of the original live variables to their relocated selves 2329 SmallVector<Value *, 128> live; 2330 for (size_t i = 0; i < records.size(); i++) { 2331 struct PartiallyConstructedSafepointRecord &info = records[i]; 2332 // We can't simply save the live set from the original insertion. One of 2333 // the live values might be the result of a call which needs a safepoint. 2334 // That Value* no longer exists and we need to use the new gc_result. 2335 // Thankfully, the liveset is embedded in the statepoint (and updated), so 2336 // we just grab that. 2337 Statepoint statepoint(info.StatepointToken); 2338 live.insert(live.end(), statepoint.gc_args_begin(), 2339 statepoint.gc_args_end()); 2340 #ifndef NDEBUG 2341 // Do some basic sanity checks on our liveness results before performing 2342 // relocation. Relocation can and will turn mistakes in liveness results 2343 // into non-sensical code which is must harder to debug. 2344 // TODO: It would be nice to test consistency as well 2345 assert(DT.isReachableFromEntry(info.StatepointToken->getParent()) && 2346 "statepoint must be reachable or liveness is meaningless"); 2347 for (Value *V : statepoint.gc_args()) { 2348 if (!isa<Instruction>(V)) 2349 // Non-instruction values trivial dominate all possible uses 2350 continue; 2351 auto LiveInst = cast<Instruction>(V); 2352 assert(DT.isReachableFromEntry(LiveInst->getParent()) && 2353 "unreachable values should never be live"); 2354 assert(DT.dominates(LiveInst, info.StatepointToken) && 2355 "basic SSA liveness expectation violated by liveness analysis"); 2356 } 2357 #endif 2358 } 2359 unique_unsorted(live); 2360 2361 #ifndef NDEBUG 2362 // sanity check 2363 for (auto ptr : live) { 2364 assert(isGCPointerType(ptr->getType()) && "must be a gc pointer type"); 2365 } 2366 #endif 2367 2368 relocationViaAlloca(F, DT, live, records); 2369 return !records.empty(); 2370 } 2371 2372 // Handles both return values and arguments for Functions and CallSites. 2373 template <typename AttrHolder> 2374 static void RemoveDerefAttrAtIndex(LLVMContext &Ctx, AttrHolder &AH, 2375 unsigned Index) { 2376 AttrBuilder R; 2377 if (AH.getDereferenceableBytes(Index)) 2378 R.addAttribute(Attribute::get(Ctx, Attribute::Dereferenceable, 2379 AH.getDereferenceableBytes(Index))); 2380 if (AH.getDereferenceableOrNullBytes(Index)) 2381 R.addAttribute(Attribute::get(Ctx, Attribute::DereferenceableOrNull, 2382 AH.getDereferenceableOrNullBytes(Index))); 2383 2384 if (!R.empty()) 2385 AH.setAttributes(AH.getAttributes().removeAttributes( 2386 Ctx, Index, AttributeSet::get(Ctx, Index, R))); 2387 } 2388 2389 void 2390 RewriteStatepointsForGC::stripDereferenceabilityInfoFromPrototype(Function &F) { 2391 LLVMContext &Ctx = F.getContext(); 2392 2393 for (Argument &A : F.args()) 2394 if (isa<PointerType>(A.getType())) 2395 RemoveDerefAttrAtIndex(Ctx, F, A.getArgNo() + 1); 2396 2397 if (isa<PointerType>(F.getReturnType())) 2398 RemoveDerefAttrAtIndex(Ctx, F, AttributeSet::ReturnIndex); 2399 } 2400 2401 void RewriteStatepointsForGC::stripDereferenceabilityInfoFromBody(Function &F) { 2402 if (F.empty()) 2403 return; 2404 2405 LLVMContext &Ctx = F.getContext(); 2406 MDBuilder Builder(Ctx); 2407 2408 for (Instruction &I : instructions(F)) { 2409 if (const MDNode *MD = I.getMetadata(LLVMContext::MD_tbaa)) { 2410 assert(MD->getNumOperands() < 5 && "unrecognized metadata shape!"); 2411 bool IsImmutableTBAA = 2412 MD->getNumOperands() == 4 && 2413 mdconst::extract<ConstantInt>(MD->getOperand(3))->getValue() == 1; 2414 2415 if (!IsImmutableTBAA) 2416 continue; // no work to do, MD_tbaa is already marked mutable 2417 2418 MDNode *Base = cast<MDNode>(MD->getOperand(0)); 2419 MDNode *Access = cast<MDNode>(MD->getOperand(1)); 2420 uint64_t Offset = 2421 mdconst::extract<ConstantInt>(MD->getOperand(2))->getZExtValue(); 2422 2423 MDNode *MutableTBAA = 2424 Builder.createTBAAStructTagNode(Base, Access, Offset); 2425 I.setMetadata(LLVMContext::MD_tbaa, MutableTBAA); 2426 } 2427 2428 if (CallSite CS = CallSite(&I)) { 2429 for (int i = 0, e = CS.arg_size(); i != e; i++) 2430 if (isa<PointerType>(CS.getArgument(i)->getType())) 2431 RemoveDerefAttrAtIndex(Ctx, CS, i + 1); 2432 if (isa<PointerType>(CS.getType())) 2433 RemoveDerefAttrAtIndex(Ctx, CS, AttributeSet::ReturnIndex); 2434 } 2435 } 2436 } 2437 2438 /// Returns true if this function should be rewritten by this pass. The main 2439 /// point of this function is as an extension point for custom logic. 2440 static bool shouldRewriteStatepointsIn(Function &F) { 2441 // TODO: This should check the GCStrategy 2442 if (F.hasGC()) { 2443 const char *FunctionGCName = F.getGC(); 2444 const StringRef StatepointExampleName("statepoint-example"); 2445 const StringRef CoreCLRName("coreclr"); 2446 return (StatepointExampleName == FunctionGCName) || 2447 (CoreCLRName == FunctionGCName); 2448 } else 2449 return false; 2450 } 2451 2452 void RewriteStatepointsForGC::stripDereferenceabilityInfo(Module &M) { 2453 #ifndef NDEBUG 2454 assert(std::any_of(M.begin(), M.end(), shouldRewriteStatepointsIn) && 2455 "precondition!"); 2456 #endif 2457 2458 for (Function &F : M) 2459 stripDereferenceabilityInfoFromPrototype(F); 2460 2461 for (Function &F : M) 2462 stripDereferenceabilityInfoFromBody(F); 2463 } 2464 2465 bool RewriteStatepointsForGC::runOnFunction(Function &F) { 2466 // Nothing to do for declarations. 2467 if (F.isDeclaration() || F.empty()) 2468 return false; 2469 2470 // Policy choice says not to rewrite - the most common reason is that we're 2471 // compiling code without a GCStrategy. 2472 if (!shouldRewriteStatepointsIn(F)) 2473 return false; 2474 2475 DominatorTree &DT = getAnalysis<DominatorTreeWrapperPass>(F).getDomTree(); 2476 2477 // Gather all the statepoints which need rewritten. Be careful to only 2478 // consider those in reachable code since we need to ask dominance queries 2479 // when rewriting. We'll delete the unreachable ones in a moment. 2480 SmallVector<CallSite, 64> ParsePointNeeded; 2481 bool HasUnreachableStatepoint = false; 2482 for (Instruction &I : instructions(F)) { 2483 // TODO: only the ones with the flag set! 2484 if (isStatepoint(I)) { 2485 if (DT.isReachableFromEntry(I.getParent())) 2486 ParsePointNeeded.push_back(CallSite(&I)); 2487 else 2488 HasUnreachableStatepoint = true; 2489 } 2490 } 2491 2492 bool MadeChange = false; 2493 2494 // Delete any unreachable statepoints so that we don't have unrewritten 2495 // statepoints surviving this pass. This makes testing easier and the 2496 // resulting IR less confusing to human readers. Rather than be fancy, we 2497 // just reuse a utility function which removes the unreachable blocks. 2498 if (HasUnreachableStatepoint) 2499 MadeChange |= removeUnreachableBlocks(F); 2500 2501 // Return early if no work to do. 2502 if (ParsePointNeeded.empty()) 2503 return MadeChange; 2504 2505 // As a prepass, go ahead and aggressively destroy single entry phi nodes. 2506 // These are created by LCSSA. They have the effect of increasing the size 2507 // of liveness sets for no good reason. It may be harder to do this post 2508 // insertion since relocations and base phis can confuse things. 2509 for (BasicBlock &BB : F) 2510 if (BB.getUniquePredecessor()) { 2511 MadeChange = true; 2512 FoldSingleEntryPHINodes(&BB); 2513 } 2514 2515 // Before we start introducing relocations, we want to tweak the IR a bit to 2516 // avoid unfortunate code generation effects. The main example is that we 2517 // want to try to make sure the comparison feeding a branch is after any 2518 // safepoints. Otherwise, we end up with a comparison of pre-relocation 2519 // values feeding a branch after relocation. This is semantically correct, 2520 // but results in extra register pressure since both the pre-relocation and 2521 // post-relocation copies must be available in registers. For code without 2522 // relocations this is handled elsewhere, but teaching the scheduler to 2523 // reverse the transform we're about to do would be slightly complex. 2524 // Note: This may extend the live range of the inputs to the icmp and thus 2525 // increase the liveset of any statepoint we move over. This is profitable 2526 // as long as all statepoints are in rare blocks. If we had in-register 2527 // lowering for live values this would be a much safer transform. 2528 auto getConditionInst = [](TerminatorInst *TI) -> Instruction* { 2529 if (auto *BI = dyn_cast<BranchInst>(TI)) 2530 if (BI->isConditional()) 2531 return dyn_cast<Instruction>(BI->getCondition()); 2532 // TODO: Extend this to handle switches 2533 return nullptr; 2534 }; 2535 for (BasicBlock &BB : F) { 2536 TerminatorInst *TI = BB.getTerminator(); 2537 if (auto *Cond = getConditionInst(TI)) 2538 // TODO: Handle more than just ICmps here. We should be able to move 2539 // most instructions without side effects or memory access. 2540 if (isa<ICmpInst>(Cond) && Cond->hasOneUse()) { 2541 MadeChange = true; 2542 Cond->moveBefore(TI); 2543 } 2544 } 2545 2546 MadeChange |= insertParsePoints(F, DT, this, ParsePointNeeded); 2547 return MadeChange; 2548 } 2549 2550 // liveness computation via standard dataflow 2551 // ------------------------------------------------------------------- 2552 2553 // TODO: Consider using bitvectors for liveness, the set of potentially 2554 // interesting values should be small and easy to pre-compute. 2555 2556 /// Compute the live-in set for the location rbegin starting from 2557 /// the live-out set of the basic block 2558 static void computeLiveInValues(BasicBlock::reverse_iterator rbegin, 2559 BasicBlock::reverse_iterator rend, 2560 DenseSet<Value *> &LiveTmp) { 2561 2562 for (BasicBlock::reverse_iterator ritr = rbegin; ritr != rend; ritr++) { 2563 Instruction *I = &*ritr; 2564 2565 // KILL/Def - Remove this definition from LiveIn 2566 LiveTmp.erase(I); 2567 2568 // Don't consider *uses* in PHI nodes, we handle their contribution to 2569 // predecessor blocks when we seed the LiveOut sets 2570 if (isa<PHINode>(I)) 2571 continue; 2572 2573 // USE - Add to the LiveIn set for this instruction 2574 for (Value *V : I->operands()) { 2575 assert(!isUnhandledGCPointerType(V->getType()) && 2576 "support for FCA unimplemented"); 2577 if (isHandledGCPointerType(V->getType()) && !isa<Constant>(V)) { 2578 // The choice to exclude all things constant here is slightly subtle. 2579 // There are two independent reasons: 2580 // - We assume that things which are constant (from LLVM's definition) 2581 // do not move at runtime. For example, the address of a global 2582 // variable is fixed, even though it's contents may not be. 2583 // - Second, we can't disallow arbitrary inttoptr constants even 2584 // if the language frontend does. Optimization passes are free to 2585 // locally exploit facts without respect to global reachability. This 2586 // can create sections of code which are dynamically unreachable and 2587 // contain just about anything. (see constants.ll in tests) 2588 LiveTmp.insert(V); 2589 } 2590 } 2591 } 2592 } 2593 2594 static void computeLiveOutSeed(BasicBlock *BB, DenseSet<Value *> &LiveTmp) { 2595 2596 for (BasicBlock *Succ : successors(BB)) { 2597 const BasicBlock::iterator E(Succ->getFirstNonPHI()); 2598 for (BasicBlock::iterator I = Succ->begin(); I != E; I++) { 2599 PHINode *Phi = cast<PHINode>(&*I); 2600 Value *V = Phi->getIncomingValueForBlock(BB); 2601 assert(!isUnhandledGCPointerType(V->getType()) && 2602 "support for FCA unimplemented"); 2603 if (isHandledGCPointerType(V->getType()) && !isa<Constant>(V)) { 2604 LiveTmp.insert(V); 2605 } 2606 } 2607 } 2608 } 2609 2610 static DenseSet<Value *> computeKillSet(BasicBlock *BB) { 2611 DenseSet<Value *> KillSet; 2612 for (Instruction &I : *BB) 2613 if (isHandledGCPointerType(I.getType())) 2614 KillSet.insert(&I); 2615 return KillSet; 2616 } 2617 2618 #ifndef NDEBUG 2619 /// Check that the items in 'Live' dominate 'TI'. This is used as a basic 2620 /// sanity check for the liveness computation. 2621 static void checkBasicSSA(DominatorTree &DT, DenseSet<Value *> &Live, 2622 TerminatorInst *TI, bool TermOkay = false) { 2623 for (Value *V : Live) { 2624 if (auto *I = dyn_cast<Instruction>(V)) { 2625 // The terminator can be a member of the LiveOut set. LLVM's definition 2626 // of instruction dominance states that V does not dominate itself. As 2627 // such, we need to special case this to allow it. 2628 if (TermOkay && TI == I) 2629 continue; 2630 assert(DT.dominates(I, TI) && 2631 "basic SSA liveness expectation violated by liveness analysis"); 2632 } 2633 } 2634 } 2635 2636 /// Check that all the liveness sets used during the computation of liveness 2637 /// obey basic SSA properties. This is useful for finding cases where we miss 2638 /// a def. 2639 static void checkBasicSSA(DominatorTree &DT, GCPtrLivenessData &Data, 2640 BasicBlock &BB) { 2641 checkBasicSSA(DT, Data.LiveSet[&BB], BB.getTerminator()); 2642 checkBasicSSA(DT, Data.LiveOut[&BB], BB.getTerminator(), true); 2643 checkBasicSSA(DT, Data.LiveIn[&BB], BB.getTerminator()); 2644 } 2645 #endif 2646 2647 static void computeLiveInValues(DominatorTree &DT, Function &F, 2648 GCPtrLivenessData &Data) { 2649 2650 SmallSetVector<BasicBlock *, 200> Worklist; 2651 auto AddPredsToWorklist = [&](BasicBlock *BB) { 2652 // We use a SetVector so that we don't have duplicates in the worklist. 2653 Worklist.insert(pred_begin(BB), pred_end(BB)); 2654 }; 2655 auto NextItem = [&]() { 2656 BasicBlock *BB = Worklist.back(); 2657 Worklist.pop_back(); 2658 return BB; 2659 }; 2660 2661 // Seed the liveness for each individual block 2662 for (BasicBlock &BB : F) { 2663 Data.KillSet[&BB] = computeKillSet(&BB); 2664 Data.LiveSet[&BB].clear(); 2665 computeLiveInValues(BB.rbegin(), BB.rend(), Data.LiveSet[&BB]); 2666 2667 #ifndef NDEBUG 2668 for (Value *Kill : Data.KillSet[&BB]) 2669 assert(!Data.LiveSet[&BB].count(Kill) && "live set contains kill"); 2670 #endif 2671 2672 Data.LiveOut[&BB] = DenseSet<Value *>(); 2673 computeLiveOutSeed(&BB, Data.LiveOut[&BB]); 2674 Data.LiveIn[&BB] = Data.LiveSet[&BB]; 2675 set_union(Data.LiveIn[&BB], Data.LiveOut[&BB]); 2676 set_subtract(Data.LiveIn[&BB], Data.KillSet[&BB]); 2677 if (!Data.LiveIn[&BB].empty()) 2678 AddPredsToWorklist(&BB); 2679 } 2680 2681 // Propagate that liveness until stable 2682 while (!Worklist.empty()) { 2683 BasicBlock *BB = NextItem(); 2684 2685 // Compute our new liveout set, then exit early if it hasn't changed 2686 // despite the contribution of our successor. 2687 DenseSet<Value *> LiveOut = Data.LiveOut[BB]; 2688 const auto OldLiveOutSize = LiveOut.size(); 2689 for (BasicBlock *Succ : successors(BB)) { 2690 assert(Data.LiveIn.count(Succ)); 2691 set_union(LiveOut, Data.LiveIn[Succ]); 2692 } 2693 // assert OutLiveOut is a subset of LiveOut 2694 if (OldLiveOutSize == LiveOut.size()) { 2695 // If the sets are the same size, then we didn't actually add anything 2696 // when unioning our successors LiveIn Thus, the LiveIn of this block 2697 // hasn't changed. 2698 continue; 2699 } 2700 Data.LiveOut[BB] = LiveOut; 2701 2702 // Apply the effects of this basic block 2703 DenseSet<Value *> LiveTmp = LiveOut; 2704 set_union(LiveTmp, Data.LiveSet[BB]); 2705 set_subtract(LiveTmp, Data.KillSet[BB]); 2706 2707 assert(Data.LiveIn.count(BB)); 2708 const DenseSet<Value *> &OldLiveIn = Data.LiveIn[BB]; 2709 // assert: OldLiveIn is a subset of LiveTmp 2710 if (OldLiveIn.size() != LiveTmp.size()) { 2711 Data.LiveIn[BB] = LiveTmp; 2712 AddPredsToWorklist(BB); 2713 } 2714 } // while( !worklist.empty() ) 2715 2716 #ifndef NDEBUG 2717 // Sanity check our output against SSA properties. This helps catch any 2718 // missing kills during the above iteration. 2719 for (BasicBlock &BB : F) { 2720 checkBasicSSA(DT, Data, BB); 2721 } 2722 #endif 2723 } 2724 2725 static void findLiveSetAtInst(Instruction *Inst, GCPtrLivenessData &Data, 2726 StatepointLiveSetTy &Out) { 2727 2728 BasicBlock *BB = Inst->getParent(); 2729 2730 // Note: The copy is intentional and required 2731 assert(Data.LiveOut.count(BB)); 2732 DenseSet<Value *> LiveOut = Data.LiveOut[BB]; 2733 2734 // We want to handle the statepoint itself oddly. It's 2735 // call result is not live (normal), nor are it's arguments 2736 // (unless they're used again later). This adjustment is 2737 // specifically what we need to relocate 2738 BasicBlock::reverse_iterator rend(Inst); 2739 computeLiveInValues(BB->rbegin(), rend, LiveOut); 2740 LiveOut.erase(Inst); 2741 Out.insert(LiveOut.begin(), LiveOut.end()); 2742 } 2743 2744 static void recomputeLiveInValues(GCPtrLivenessData &RevisedLivenessData, 2745 const CallSite &CS, 2746 PartiallyConstructedSafepointRecord &Info) { 2747 Instruction *Inst = CS.getInstruction(); 2748 StatepointLiveSetTy Updated; 2749 findLiveSetAtInst(Inst, RevisedLivenessData, Updated); 2750 2751 #ifndef NDEBUG 2752 DenseSet<Value *> Bases; 2753 for (auto KVPair : Info.PointerToBase) { 2754 Bases.insert(KVPair.second); 2755 } 2756 #endif 2757 // We may have base pointers which are now live that weren't before. We need 2758 // to update the PointerToBase structure to reflect this. 2759 for (auto V : Updated) 2760 if (!Info.PointerToBase.count(V)) { 2761 assert(Bases.count(V) && "can't find base for unexpected live value"); 2762 Info.PointerToBase[V] = V; 2763 continue; 2764 } 2765 2766 #ifndef NDEBUG 2767 for (auto V : Updated) { 2768 assert(Info.PointerToBase.count(V) && 2769 "must be able to find base for live value"); 2770 } 2771 #endif 2772 2773 // Remove any stale base mappings - this can happen since our liveness is 2774 // more precise then the one inherent in the base pointer analysis 2775 DenseSet<Value *> ToErase; 2776 for (auto KVPair : Info.PointerToBase) 2777 if (!Updated.count(KVPair.first)) 2778 ToErase.insert(KVPair.first); 2779 for (auto V : ToErase) 2780 Info.PointerToBase.erase(V); 2781 2782 #ifndef NDEBUG 2783 for (auto KVPair : Info.PointerToBase) 2784 assert(Updated.count(KVPair.first) && "record for non-live value"); 2785 #endif 2786 2787 Info.liveset = Updated; 2788 } 2789