1 //===- RewriteStatepointsForGC.cpp - Make GC relocations explicit ---------===// 2 // 3 // The LLVM Compiler Infrastructure 4 // 5 // This file is distributed under the University of Illinois Open Source 6 // License. See LICENSE.TXT for details. 7 // 8 //===----------------------------------------------------------------------===// 9 // 10 // Rewrite an existing set of gc.statepoints such that they make potential 11 // relocations performed by the garbage collector explicit in the IR. 12 // 13 //===----------------------------------------------------------------------===// 14 15 #include "llvm/Pass.h" 16 #include "llvm/Analysis/CFG.h" 17 #include "llvm/ADT/SetOperations.h" 18 #include "llvm/ADT/Statistic.h" 19 #include "llvm/ADT/DenseSet.h" 20 #include "llvm/ADT/SetVector.h" 21 #include "llvm/IR/BasicBlock.h" 22 #include "llvm/IR/CallSite.h" 23 #include "llvm/IR/Dominators.h" 24 #include "llvm/IR/Function.h" 25 #include "llvm/IR/IRBuilder.h" 26 #include "llvm/IR/InstIterator.h" 27 #include "llvm/IR/Instructions.h" 28 #include "llvm/IR/Intrinsics.h" 29 #include "llvm/IR/IntrinsicInst.h" 30 #include "llvm/IR/Module.h" 31 #include "llvm/IR/Statepoint.h" 32 #include "llvm/IR/Value.h" 33 #include "llvm/IR/Verifier.h" 34 #include "llvm/Support/Debug.h" 35 #include "llvm/Support/CommandLine.h" 36 #include "llvm/Transforms/Scalar.h" 37 #include "llvm/Transforms/Utils/BasicBlockUtils.h" 38 #include "llvm/Transforms/Utils/Cloning.h" 39 #include "llvm/Transforms/Utils/Local.h" 40 #include "llvm/Transforms/Utils/PromoteMemToReg.h" 41 42 #define DEBUG_TYPE "rewrite-statepoints-for-gc" 43 44 using namespace llvm; 45 46 // Print tracing output 47 static cl::opt<bool> TraceLSP("trace-rewrite-statepoints", cl::Hidden, 48 cl::init(false)); 49 50 // Print the liveset found at the insert location 51 static cl::opt<bool> PrintLiveSet("spp-print-liveset", cl::Hidden, 52 cl::init(false)); 53 static cl::opt<bool> PrintLiveSetSize("spp-print-liveset-size", cl::Hidden, 54 cl::init(false)); 55 // Print out the base pointers for debugging 56 static cl::opt<bool> PrintBasePointers("spp-print-base-pointers", cl::Hidden, 57 cl::init(false)); 58 59 #ifdef XDEBUG 60 static bool ClobberNonLive = true; 61 #else 62 static bool ClobberNonLive = false; 63 #endif 64 static cl::opt<bool, true> ClobberNonLiveOverride("rs4gc-clobber-non-live", 65 cl::location(ClobberNonLive), 66 cl::Hidden); 67 68 namespace { 69 struct RewriteStatepointsForGC : public FunctionPass { 70 static char ID; // Pass identification, replacement for typeid 71 72 RewriteStatepointsForGC() : FunctionPass(ID) { 73 initializeRewriteStatepointsForGCPass(*PassRegistry::getPassRegistry()); 74 } 75 bool runOnFunction(Function &F) override; 76 77 void getAnalysisUsage(AnalysisUsage &AU) const override { 78 // We add and rewrite a bunch of instructions, but don't really do much 79 // else. We could in theory preserve a lot more analyses here. 80 AU.addRequired<DominatorTreeWrapperPass>(); 81 } 82 }; 83 } // namespace 84 85 char RewriteStatepointsForGC::ID = 0; 86 87 FunctionPass *llvm::createRewriteStatepointsForGCPass() { 88 return new RewriteStatepointsForGC(); 89 } 90 91 INITIALIZE_PASS_BEGIN(RewriteStatepointsForGC, "rewrite-statepoints-for-gc", 92 "Make relocations explicit at statepoints", false, false) 93 INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass) 94 INITIALIZE_PASS_END(RewriteStatepointsForGC, "rewrite-statepoints-for-gc", 95 "Make relocations explicit at statepoints", false, false) 96 97 namespace { 98 struct GCPtrLivenessData { 99 /// Values defined in this block. 100 DenseMap<BasicBlock *, DenseSet<Value *>> KillSet; 101 /// Values used in this block (and thus live); does not included values 102 /// killed within this block. 103 DenseMap<BasicBlock *, DenseSet<Value *>> LiveSet; 104 105 /// Values live into this basic block (i.e. used by any 106 /// instruction in this basic block or ones reachable from here) 107 DenseMap<BasicBlock *, DenseSet<Value *>> LiveIn; 108 109 /// Values live out of this basic block (i.e. live into 110 /// any successor block) 111 DenseMap<BasicBlock *, DenseSet<Value *>> LiveOut; 112 }; 113 114 // The type of the internal cache used inside the findBasePointers family 115 // of functions. From the callers perspective, this is an opaque type and 116 // should not be inspected. 117 // 118 // In the actual implementation this caches two relations: 119 // - The base relation itself (i.e. this pointer is based on that one) 120 // - The base defining value relation (i.e. before base_phi insertion) 121 // Generally, after the execution of a full findBasePointer call, only the 122 // base relation will remain. Internally, we add a mixture of the two 123 // types, then update all the second type to the first type 124 typedef DenseMap<Value *, Value *> DefiningValueMapTy; 125 typedef DenseSet<llvm::Value *> StatepointLiveSetTy; 126 127 struct PartiallyConstructedSafepointRecord { 128 /// The set of values known to be live accross this safepoint 129 StatepointLiveSetTy liveset; 130 131 /// Mapping from live pointers to a base-defining-value 132 DenseMap<llvm::Value *, llvm::Value *> PointerToBase; 133 134 /// The *new* gc.statepoint instruction itself. This produces the token 135 /// that normal path gc.relocates and the gc.result are tied to. 136 Instruction *StatepointToken; 137 138 /// Instruction to which exceptional gc relocates are attached 139 /// Makes it easier to iterate through them during relocationViaAlloca. 140 Instruction *UnwindToken; 141 }; 142 } 143 144 /// Compute the live-in set for every basic block in the function 145 static void computeLiveInValues(DominatorTree &DT, Function &F, 146 GCPtrLivenessData &Data); 147 148 /// Given results from the dataflow liveness computation, find the set of live 149 /// Values at a particular instruction. 150 static void findLiveSetAtInst(Instruction *inst, GCPtrLivenessData &Data, 151 StatepointLiveSetTy &out); 152 153 // TODO: Once we can get to the GCStrategy, this becomes 154 // Optional<bool> isGCManagedPointer(const Value *V) const override { 155 156 static bool isGCPointerType(const Type *T) { 157 if (const PointerType *PT = dyn_cast<PointerType>(T)) 158 // For the sake of this example GC, we arbitrarily pick addrspace(1) as our 159 // GC managed heap. We know that a pointer into this heap needs to be 160 // updated and that no other pointer does. 161 return (1 == PT->getAddressSpace()); 162 return false; 163 } 164 165 // Return true if this type is one which a) is a gc pointer or contains a GC 166 // pointer and b) is of a type this code expects to encounter as a live value. 167 // (The insertion code will assert that a type which matches (a) and not (b) 168 // is not encountered.) 169 static bool isHandledGCPointerType(Type *T) { 170 // We fully support gc pointers 171 if (isGCPointerType(T)) 172 return true; 173 // We partially support vectors of gc pointers. The code will assert if it 174 // can't handle something. 175 if (auto VT = dyn_cast<VectorType>(T)) 176 if (isGCPointerType(VT->getElementType())) 177 return true; 178 return false; 179 } 180 181 #ifndef NDEBUG 182 /// Returns true if this type contains a gc pointer whether we know how to 183 /// handle that type or not. 184 static bool containsGCPtrType(Type *Ty) { 185 if (isGCPointerType(Ty)) 186 return true; 187 if (VectorType *VT = dyn_cast<VectorType>(Ty)) 188 return isGCPointerType(VT->getScalarType()); 189 if (ArrayType *AT = dyn_cast<ArrayType>(Ty)) 190 return containsGCPtrType(AT->getElementType()); 191 if (StructType *ST = dyn_cast<StructType>(Ty)) 192 return std::any_of( 193 ST->subtypes().begin(), ST->subtypes().end(), 194 [](Type *SubType) { return containsGCPtrType(SubType); }); 195 return false; 196 } 197 198 // Returns true if this is a type which a) is a gc pointer or contains a GC 199 // pointer and b) is of a type which the code doesn't expect (i.e. first class 200 // aggregates). Used to trip assertions. 201 static bool isUnhandledGCPointerType(Type *Ty) { 202 return containsGCPtrType(Ty) && !isHandledGCPointerType(Ty); 203 } 204 #endif 205 206 static bool order_by_name(llvm::Value *a, llvm::Value *b) { 207 if (a->hasName() && b->hasName()) { 208 return -1 == a->getName().compare(b->getName()); 209 } else if (a->hasName() && !b->hasName()) { 210 return true; 211 } else if (!a->hasName() && b->hasName()) { 212 return false; 213 } else { 214 // Better than nothing, but not stable 215 return a < b; 216 } 217 } 218 219 // Conservatively identifies any definitions which might be live at the 220 // given instruction. The analysis is performed immediately before the 221 // given instruction. Values defined by that instruction are not considered 222 // live. Values used by that instruction are considered live. 223 static void analyzeParsePointLiveness( 224 DominatorTree &DT, GCPtrLivenessData &OriginalLivenessData, 225 const CallSite &CS, PartiallyConstructedSafepointRecord &result) { 226 Instruction *inst = CS.getInstruction(); 227 228 StatepointLiveSetTy liveset; 229 findLiveSetAtInst(inst, OriginalLivenessData, liveset); 230 231 if (PrintLiveSet) { 232 // Note: This output is used by several of the test cases 233 // The order of elemtns in a set is not stable, put them in a vec and sort 234 // by name 235 SmallVector<Value *, 64> temp; 236 temp.insert(temp.end(), liveset.begin(), liveset.end()); 237 std::sort(temp.begin(), temp.end(), order_by_name); 238 errs() << "Live Variables:\n"; 239 for (Value *V : temp) { 240 errs() << " " << V->getName(); // no newline 241 V->dump(); 242 } 243 } 244 if (PrintLiveSetSize) { 245 errs() << "Safepoint For: " << CS.getCalledValue()->getName() << "\n"; 246 errs() << "Number live values: " << liveset.size() << "\n"; 247 } 248 result.liveset = liveset; 249 } 250 251 static Value *findBaseDefiningValue(Value *I); 252 253 /// If we can trivially determine that the index specified in the given vector 254 /// is a base pointer, return it. In cases where the entire vector is known to 255 /// consist of base pointers, the entire vector will be returned. This 256 /// indicates that the relevant extractelement is a valid base pointer and 257 /// should be used directly. 258 static Value *findBaseOfVector(Value *I, Value *Index) { 259 assert(I->getType()->isVectorTy() && 260 cast<VectorType>(I->getType())->getElementType()->isPointerTy() && 261 "Illegal to ask for the base pointer of a non-pointer type"); 262 263 // Each case parallels findBaseDefiningValue below, see that code for 264 // detailed motivation. 265 266 if (isa<Argument>(I)) 267 // An incoming argument to the function is a base pointer 268 return I; 269 270 // We shouldn't see the address of a global as a vector value? 271 assert(!isa<GlobalVariable>(I) && 272 "unexpected global variable found in base of vector"); 273 274 // inlining could possibly introduce phi node that contains 275 // undef if callee has multiple returns 276 if (isa<UndefValue>(I)) 277 // utterly meaningless, but useful for dealing with partially optimized 278 // code. 279 return I; 280 281 // Due to inheritance, this must be _after_ the global variable and undef 282 // checks 283 if (Constant *Con = dyn_cast<Constant>(I)) { 284 assert(!isa<GlobalVariable>(I) && !isa<UndefValue>(I) && 285 "order of checks wrong!"); 286 assert(Con->isNullValue() && "null is the only case which makes sense"); 287 return Con; 288 } 289 290 if (isa<LoadInst>(I)) 291 return I; 292 293 // For an insert element, we might be able to look through it if we know 294 // something about the indexes, but if the indices are arbitrary values, we 295 // can't without much more extensive scalarization. 296 if (InsertElementInst *IEI = dyn_cast<InsertElementInst>(I)) { 297 Value *InsertIndex = IEI->getOperand(2); 298 // This index is inserting the value, look for it's base 299 if (InsertIndex == Index) 300 return findBaseDefiningValue(IEI->getOperand(1)); 301 // Both constant, and can't be equal per above. This insert is definitely 302 // not relevant, look back at the rest of the vector and keep trying. 303 if (isa<ConstantInt>(Index) && isa<ConstantInt>(InsertIndex)) 304 return findBaseOfVector(IEI->getOperand(0), Index); 305 } 306 307 // Note: This code is currently rather incomplete. We are essentially only 308 // handling cases where the vector element is trivially a base pointer. We 309 // need to update the entire base pointer construction algorithm to know how 310 // to track vector elements and potentially scalarize, but the case which 311 // would motivate the work hasn't shown up in real workloads yet. 312 llvm_unreachable("no base found for vector element"); 313 } 314 315 /// Helper function for findBasePointer - Will return a value which either a) 316 /// defines the base pointer for the input or b) blocks the simple search 317 /// (i.e. a PHI or Select of two derived pointers) 318 static Value *findBaseDefiningValue(Value *I) { 319 assert(I->getType()->isPointerTy() && 320 "Illegal to ask for the base pointer of a non-pointer type"); 321 322 // This case is a bit of a hack - it only handles extracts from vectors which 323 // trivially contain only base pointers or cases where we can directly match 324 // the index of the original extract element to an insertion into the vector. 325 // See note inside the function for how to improve this. 326 if (auto *EEI = dyn_cast<ExtractElementInst>(I)) { 327 Value *VectorOperand = EEI->getVectorOperand(); 328 Value *Index = EEI->getIndexOperand(); 329 Value *VectorBase = findBaseOfVector(VectorOperand, Index); 330 // If the result returned is a vector, we know the entire vector must 331 // contain base pointers. In that case, the extractelement is a valid base 332 // for this value. 333 if (VectorBase->getType()->isVectorTy()) 334 return EEI; 335 // Otherwise, we needed to look through the vector to find the base for 336 // this particular element. 337 assert(VectorBase->getType()->isPointerTy()); 338 return VectorBase; 339 } 340 341 if (isa<Argument>(I)) 342 // An incoming argument to the function is a base pointer 343 // We should have never reached here if this argument isn't an gc value 344 return I; 345 346 if (isa<GlobalVariable>(I)) 347 // base case 348 return I; 349 350 // inlining could possibly introduce phi node that contains 351 // undef if callee has multiple returns 352 if (isa<UndefValue>(I)) 353 // utterly meaningless, but useful for dealing with 354 // partially optimized code. 355 return I; 356 357 // Due to inheritance, this must be _after_ the global variable and undef 358 // checks 359 if (Constant *Con = dyn_cast<Constant>(I)) { 360 assert(!isa<GlobalVariable>(I) && !isa<UndefValue>(I) && 361 "order of checks wrong!"); 362 // Note: Finding a constant base for something marked for relocation 363 // doesn't really make sense. The most likely case is either a) some 364 // screwed up the address space usage or b) your validating against 365 // compiled C++ code w/o the proper separation. The only real exception 366 // is a null pointer. You could have generic code written to index of 367 // off a potentially null value and have proven it null. We also use 368 // null pointers in dead paths of relocation phis (which we might later 369 // want to find a base pointer for). 370 assert(isa<ConstantPointerNull>(Con) && 371 "null is the only case which makes sense"); 372 return Con; 373 } 374 375 if (CastInst *CI = dyn_cast<CastInst>(I)) { 376 Value *Def = CI->stripPointerCasts(); 377 // If we find a cast instruction here, it means we've found a cast which is 378 // not simply a pointer cast (i.e. an inttoptr). We don't know how to 379 // handle int->ptr conversion. 380 assert(!isa<CastInst>(Def) && "shouldn't find another cast here"); 381 return findBaseDefiningValue(Def); 382 } 383 384 if (isa<LoadInst>(I)) 385 return I; // The value loaded is an gc base itself 386 387 if (GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(I)) 388 // The base of this GEP is the base 389 return findBaseDefiningValue(GEP->getPointerOperand()); 390 391 if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(I)) { 392 switch (II->getIntrinsicID()) { 393 case Intrinsic::experimental_gc_result_ptr: 394 default: 395 // fall through to general call handling 396 break; 397 case Intrinsic::experimental_gc_statepoint: 398 case Intrinsic::experimental_gc_result_float: 399 case Intrinsic::experimental_gc_result_int: 400 llvm_unreachable("these don't produce pointers"); 401 case Intrinsic::experimental_gc_relocate: { 402 // Rerunning safepoint insertion after safepoints are already 403 // inserted is not supported. It could probably be made to work, 404 // but why are you doing this? There's no good reason. 405 llvm_unreachable("repeat safepoint insertion is not supported"); 406 } 407 case Intrinsic::gcroot: 408 // Currently, this mechanism hasn't been extended to work with gcroot. 409 // There's no reason it couldn't be, but I haven't thought about the 410 // implications much. 411 llvm_unreachable( 412 "interaction with the gcroot mechanism is not supported"); 413 } 414 } 415 // We assume that functions in the source language only return base 416 // pointers. This should probably be generalized via attributes to support 417 // both source language and internal functions. 418 if (isa<CallInst>(I) || isa<InvokeInst>(I)) 419 return I; 420 421 // I have absolutely no idea how to implement this part yet. It's not 422 // neccessarily hard, I just haven't really looked at it yet. 423 assert(!isa<LandingPadInst>(I) && "Landing Pad is unimplemented"); 424 425 if (isa<AtomicCmpXchgInst>(I)) 426 // A CAS is effectively a atomic store and load combined under a 427 // predicate. From the perspective of base pointers, we just treat it 428 // like a load. 429 return I; 430 431 assert(!isa<AtomicRMWInst>(I) && "Xchg handled above, all others are " 432 "binary ops which don't apply to pointers"); 433 434 // The aggregate ops. Aggregates can either be in the heap or on the 435 // stack, but in either case, this is simply a field load. As a result, 436 // this is a defining definition of the base just like a load is. 437 if (isa<ExtractValueInst>(I)) 438 return I; 439 440 // We should never see an insert vector since that would require we be 441 // tracing back a struct value not a pointer value. 442 assert(!isa<InsertValueInst>(I) && 443 "Base pointer for a struct is meaningless"); 444 445 // The last two cases here don't return a base pointer. Instead, they 446 // return a value which dynamically selects from amoung several base 447 // derived pointers (each with it's own base potentially). It's the job of 448 // the caller to resolve these. 449 assert((isa<SelectInst>(I) || isa<PHINode>(I)) && 450 "missing instruction case in findBaseDefiningValing"); 451 return I; 452 } 453 454 /// Returns the base defining value for this value. 455 static Value *findBaseDefiningValueCached(Value *I, DefiningValueMapTy &Cache) { 456 Value *&Cached = Cache[I]; 457 if (!Cached) { 458 Cached = findBaseDefiningValue(I); 459 } 460 assert(Cache[I] != nullptr); 461 462 if (TraceLSP) { 463 dbgs() << "fBDV-cached: " << I->getName() << " -> " << Cached->getName() 464 << "\n"; 465 } 466 return Cached; 467 } 468 469 /// Return a base pointer for this value if known. Otherwise, return it's 470 /// base defining value. 471 static Value *findBaseOrBDV(Value *I, DefiningValueMapTy &Cache) { 472 Value *Def = findBaseDefiningValueCached(I, Cache); 473 auto Found = Cache.find(Def); 474 if (Found != Cache.end()) { 475 // Either a base-of relation, or a self reference. Caller must check. 476 return Found->second; 477 } 478 // Only a BDV available 479 return Def; 480 } 481 482 /// Given the result of a call to findBaseDefiningValue, or findBaseOrBDV, 483 /// is it known to be a base pointer? Or do we need to continue searching. 484 static bool isKnownBaseResult(Value *V) { 485 if (!isa<PHINode>(V) && !isa<SelectInst>(V)) { 486 // no recursion possible 487 return true; 488 } 489 if (isa<Instruction>(V) && 490 cast<Instruction>(V)->getMetadata("is_base_value")) { 491 // This is a previously inserted base phi or select. We know 492 // that this is a base value. 493 return true; 494 } 495 496 // We need to keep searching 497 return false; 498 } 499 500 // TODO: find a better name for this 501 namespace { 502 class PhiState { 503 public: 504 enum Status { Unknown, Base, Conflict }; 505 506 PhiState(Status s, Value *b = nullptr) : status(s), base(b) { 507 assert(status != Base || b); 508 } 509 PhiState(Value *b) : status(Base), base(b) {} 510 PhiState() : status(Unknown), base(nullptr) {} 511 512 Status getStatus() const { return status; } 513 Value *getBase() const { return base; } 514 515 bool isBase() const { return getStatus() == Base; } 516 bool isUnknown() const { return getStatus() == Unknown; } 517 bool isConflict() const { return getStatus() == Conflict; } 518 519 bool operator==(const PhiState &other) const { 520 return base == other.base && status == other.status; 521 } 522 523 bool operator!=(const PhiState &other) const { return !(*this == other); } 524 525 void dump() { 526 errs() << status << " (" << base << " - " 527 << (base ? base->getName() : "nullptr") << "): "; 528 } 529 530 private: 531 Status status; 532 Value *base; // non null only if status == base 533 }; 534 535 typedef DenseMap<Value *, PhiState> ConflictStateMapTy; 536 // Values of type PhiState form a lattice, and this is a helper 537 // class that implementes the meet operation. The meat of the meet 538 // operation is implemented in MeetPhiStates::pureMeet 539 class MeetPhiStates { 540 public: 541 // phiStates is a mapping from PHINodes and SelectInst's to PhiStates. 542 explicit MeetPhiStates(const ConflictStateMapTy &phiStates) 543 : phiStates(phiStates) {} 544 545 // Destructively meet the current result with the base V. V can 546 // either be a merge instruction (SelectInst / PHINode), in which 547 // case its status is looked up in the phiStates map; or a regular 548 // SSA value, in which case it is assumed to be a base. 549 void meetWith(Value *V) { 550 PhiState otherState = getStateForBDV(V); 551 assert((MeetPhiStates::pureMeet(otherState, currentResult) == 552 MeetPhiStates::pureMeet(currentResult, otherState)) && 553 "math is wrong: meet does not commute!"); 554 currentResult = MeetPhiStates::pureMeet(otherState, currentResult); 555 } 556 557 PhiState getResult() const { return currentResult; } 558 559 private: 560 const ConflictStateMapTy &phiStates; 561 PhiState currentResult; 562 563 /// Return a phi state for a base defining value. We'll generate a new 564 /// base state for known bases and expect to find a cached state otherwise 565 PhiState getStateForBDV(Value *baseValue) { 566 if (isKnownBaseResult(baseValue)) { 567 return PhiState(baseValue); 568 } else { 569 return lookupFromMap(baseValue); 570 } 571 } 572 573 PhiState lookupFromMap(Value *V) { 574 auto I = phiStates.find(V); 575 assert(I != phiStates.end() && "lookup failed!"); 576 return I->second; 577 } 578 579 static PhiState pureMeet(const PhiState &stateA, const PhiState &stateB) { 580 switch (stateA.getStatus()) { 581 case PhiState::Unknown: 582 return stateB; 583 584 case PhiState::Base: 585 assert(stateA.getBase() && "can't be null"); 586 if (stateB.isUnknown()) 587 return stateA; 588 589 if (stateB.isBase()) { 590 if (stateA.getBase() == stateB.getBase()) { 591 assert(stateA == stateB && "equality broken!"); 592 return stateA; 593 } 594 return PhiState(PhiState::Conflict); 595 } 596 assert(stateB.isConflict() && "only three states!"); 597 return PhiState(PhiState::Conflict); 598 599 case PhiState::Conflict: 600 return stateA; 601 } 602 llvm_unreachable("only three states!"); 603 } 604 }; 605 } 606 /// For a given value or instruction, figure out what base ptr it's derived 607 /// from. For gc objects, this is simply itself. On success, returns a value 608 /// which is the base pointer. (This is reliable and can be used for 609 /// relocation.) On failure, returns nullptr. 610 static Value *findBasePointer(Value *I, DefiningValueMapTy &cache) { 611 Value *def = findBaseOrBDV(I, cache); 612 613 if (isKnownBaseResult(def)) { 614 return def; 615 } 616 617 // Here's the rough algorithm: 618 // - For every SSA value, construct a mapping to either an actual base 619 // pointer or a PHI which obscures the base pointer. 620 // - Construct a mapping from PHI to unknown TOP state. Use an 621 // optimistic algorithm to propagate base pointer information. Lattice 622 // looks like: 623 // UNKNOWN 624 // b1 b2 b3 b4 625 // CONFLICT 626 // When algorithm terminates, all PHIs will either have a single concrete 627 // base or be in a conflict state. 628 // - For every conflict, insert a dummy PHI node without arguments. Add 629 // these to the base[Instruction] = BasePtr mapping. For every 630 // non-conflict, add the actual base. 631 // - For every conflict, add arguments for the base[a] of each input 632 // arguments. 633 // 634 // Note: A simpler form of this would be to add the conflict form of all 635 // PHIs without running the optimistic algorithm. This would be 636 // analougous to pessimistic data flow and would likely lead to an 637 // overall worse solution. 638 639 ConflictStateMapTy states; 640 states[def] = PhiState(); 641 // Recursively fill in all phis & selects reachable from the initial one 642 // for which we don't already know a definite base value for 643 // TODO: This should be rewritten with a worklist 644 bool done = false; 645 while (!done) { 646 done = true; 647 // Since we're adding elements to 'states' as we run, we can't keep 648 // iterators into the set. 649 SmallVector<Value *, 16> Keys; 650 Keys.reserve(states.size()); 651 for (auto Pair : states) { 652 Value *V = Pair.first; 653 Keys.push_back(V); 654 } 655 for (Value *v : Keys) { 656 assert(!isKnownBaseResult(v) && "why did it get added?"); 657 if (PHINode *phi = dyn_cast<PHINode>(v)) { 658 assert(phi->getNumIncomingValues() > 0 && 659 "zero input phis are illegal"); 660 for (Value *InVal : phi->incoming_values()) { 661 Value *local = findBaseOrBDV(InVal, cache); 662 if (!isKnownBaseResult(local) && states.find(local) == states.end()) { 663 states[local] = PhiState(); 664 done = false; 665 } 666 } 667 } else if (SelectInst *sel = dyn_cast<SelectInst>(v)) { 668 Value *local = findBaseOrBDV(sel->getTrueValue(), cache); 669 if (!isKnownBaseResult(local) && states.find(local) == states.end()) { 670 states[local] = PhiState(); 671 done = false; 672 } 673 local = findBaseOrBDV(sel->getFalseValue(), cache); 674 if (!isKnownBaseResult(local) && states.find(local) == states.end()) { 675 states[local] = PhiState(); 676 done = false; 677 } 678 } 679 } 680 } 681 682 if (TraceLSP) { 683 errs() << "States after initialization:\n"; 684 for (auto Pair : states) { 685 Instruction *v = cast<Instruction>(Pair.first); 686 PhiState state = Pair.second; 687 state.dump(); 688 v->dump(); 689 } 690 } 691 692 // TODO: come back and revisit the state transitions around inputs which 693 // have reached conflict state. The current version seems too conservative. 694 695 bool progress = true; 696 while (progress) { 697 #ifndef NDEBUG 698 size_t oldSize = states.size(); 699 #endif 700 progress = false; 701 // We're only changing keys in this loop, thus safe to keep iterators 702 for (auto Pair : states) { 703 MeetPhiStates calculateMeet(states); 704 Value *v = Pair.first; 705 assert(!isKnownBaseResult(v) && "why did it get added?"); 706 if (SelectInst *select = dyn_cast<SelectInst>(v)) { 707 calculateMeet.meetWith(findBaseOrBDV(select->getTrueValue(), cache)); 708 calculateMeet.meetWith(findBaseOrBDV(select->getFalseValue(), cache)); 709 } else 710 for (Value *Val : cast<PHINode>(v)->incoming_values()) 711 calculateMeet.meetWith(findBaseOrBDV(Val, cache)); 712 713 PhiState oldState = states[v]; 714 PhiState newState = calculateMeet.getResult(); 715 if (oldState != newState) { 716 progress = true; 717 states[v] = newState; 718 } 719 } 720 721 assert(oldSize <= states.size()); 722 assert(oldSize == states.size() || progress); 723 } 724 725 if (TraceLSP) { 726 errs() << "States after meet iteration:\n"; 727 for (auto Pair : states) { 728 Instruction *v = cast<Instruction>(Pair.first); 729 PhiState state = Pair.second; 730 state.dump(); 731 v->dump(); 732 } 733 } 734 735 // Insert Phis for all conflicts 736 // We want to keep naming deterministic in the loop that follows, so 737 // sort the keys before iteration. This is useful in allowing us to 738 // write stable tests. Note that there is no invalidation issue here. 739 SmallVector<Value *, 16> Keys; 740 Keys.reserve(states.size()); 741 for (auto Pair : states) { 742 Value *V = Pair.first; 743 Keys.push_back(V); 744 } 745 std::sort(Keys.begin(), Keys.end(), order_by_name); 746 // TODO: adjust naming patterns to avoid this order of iteration dependency 747 for (Value *V : Keys) { 748 Instruction *v = cast<Instruction>(V); 749 PhiState state = states[V]; 750 assert(!isKnownBaseResult(v) && "why did it get added?"); 751 assert(!state.isUnknown() && "Optimistic algorithm didn't complete!"); 752 if (!state.isConflict()) 753 continue; 754 755 if (isa<PHINode>(v)) { 756 int num_preds = 757 std::distance(pred_begin(v->getParent()), pred_end(v->getParent())); 758 assert(num_preds > 0 && "how did we reach here"); 759 PHINode *phi = PHINode::Create(v->getType(), num_preds, "base_phi", v); 760 // Add metadata marking this as a base value 761 auto *const_1 = ConstantInt::get( 762 Type::getInt32Ty( 763 v->getParent()->getParent()->getParent()->getContext()), 764 1); 765 auto MDConst = ConstantAsMetadata::get(const_1); 766 MDNode *md = MDNode::get( 767 v->getParent()->getParent()->getParent()->getContext(), MDConst); 768 phi->setMetadata("is_base_value", md); 769 states[v] = PhiState(PhiState::Conflict, phi); 770 } else { 771 SelectInst *sel = cast<SelectInst>(v); 772 // The undef will be replaced later 773 UndefValue *undef = UndefValue::get(sel->getType()); 774 SelectInst *basesel = SelectInst::Create(sel->getCondition(), undef, 775 undef, "base_select", sel); 776 // Add metadata marking this as a base value 777 auto *const_1 = ConstantInt::get( 778 Type::getInt32Ty( 779 v->getParent()->getParent()->getParent()->getContext()), 780 1); 781 auto MDConst = ConstantAsMetadata::get(const_1); 782 MDNode *md = MDNode::get( 783 v->getParent()->getParent()->getParent()->getContext(), MDConst); 784 basesel->setMetadata("is_base_value", md); 785 states[v] = PhiState(PhiState::Conflict, basesel); 786 } 787 } 788 789 // Fixup all the inputs of the new PHIs 790 for (auto Pair : states) { 791 Instruction *v = cast<Instruction>(Pair.first); 792 PhiState state = Pair.second; 793 794 assert(!isKnownBaseResult(v) && "why did it get added?"); 795 assert(!state.isUnknown() && "Optimistic algorithm didn't complete!"); 796 if (!state.isConflict()) 797 continue; 798 799 if (PHINode *basephi = dyn_cast<PHINode>(state.getBase())) { 800 PHINode *phi = cast<PHINode>(v); 801 unsigned NumPHIValues = phi->getNumIncomingValues(); 802 for (unsigned i = 0; i < NumPHIValues; i++) { 803 Value *InVal = phi->getIncomingValue(i); 804 BasicBlock *InBB = phi->getIncomingBlock(i); 805 806 // If we've already seen InBB, add the same incoming value 807 // we added for it earlier. The IR verifier requires phi 808 // nodes with multiple entries from the same basic block 809 // to have the same incoming value for each of those 810 // entries. If we don't do this check here and basephi 811 // has a different type than base, we'll end up adding two 812 // bitcasts (and hence two distinct values) as incoming 813 // values for the same basic block. 814 815 int blockIndex = basephi->getBasicBlockIndex(InBB); 816 if (blockIndex != -1) { 817 Value *oldBase = basephi->getIncomingValue(blockIndex); 818 basephi->addIncoming(oldBase, InBB); 819 #ifndef NDEBUG 820 Value *base = findBaseOrBDV(InVal, cache); 821 if (!isKnownBaseResult(base)) { 822 // Either conflict or base. 823 assert(states.count(base)); 824 base = states[base].getBase(); 825 assert(base != nullptr && "unknown PhiState!"); 826 } 827 828 // In essense this assert states: the only way two 829 // values incoming from the same basic block may be 830 // different is by being different bitcasts of the same 831 // value. A cleanup that remains TODO is changing 832 // findBaseOrBDV to return an llvm::Value of the correct 833 // type (and still remain pure). This will remove the 834 // need to add bitcasts. 835 assert(base->stripPointerCasts() == oldBase->stripPointerCasts() && 836 "sanity -- findBaseOrBDV should be pure!"); 837 #endif 838 continue; 839 } 840 841 // Find either the defining value for the PHI or the normal base for 842 // a non-phi node 843 Value *base = findBaseOrBDV(InVal, cache); 844 if (!isKnownBaseResult(base)) { 845 // Either conflict or base. 846 assert(states.count(base)); 847 base = states[base].getBase(); 848 assert(base != nullptr && "unknown PhiState!"); 849 } 850 assert(base && "can't be null"); 851 // Must use original input BB since base may not be Instruction 852 // The cast is needed since base traversal may strip away bitcasts 853 if (base->getType() != basephi->getType()) { 854 base = new BitCastInst(base, basephi->getType(), "cast", 855 InBB->getTerminator()); 856 } 857 basephi->addIncoming(base, InBB); 858 } 859 assert(basephi->getNumIncomingValues() == NumPHIValues); 860 } else { 861 SelectInst *basesel = cast<SelectInst>(state.getBase()); 862 SelectInst *sel = cast<SelectInst>(v); 863 // Operand 1 & 2 are true, false path respectively. TODO: refactor to 864 // something more safe and less hacky. 865 for (int i = 1; i <= 2; i++) { 866 Value *InVal = sel->getOperand(i); 867 // Find either the defining value for the PHI or the normal base for 868 // a non-phi node 869 Value *base = findBaseOrBDV(InVal, cache); 870 if (!isKnownBaseResult(base)) { 871 // Either conflict or base. 872 assert(states.count(base)); 873 base = states[base].getBase(); 874 assert(base != nullptr && "unknown PhiState!"); 875 } 876 assert(base && "can't be null"); 877 // Must use original input BB since base may not be Instruction 878 // The cast is needed since base traversal may strip away bitcasts 879 if (base->getType() != basesel->getType()) { 880 base = new BitCastInst(base, basesel->getType(), "cast", basesel); 881 } 882 basesel->setOperand(i, base); 883 } 884 } 885 } 886 887 // Cache all of our results so we can cheaply reuse them 888 // NOTE: This is actually two caches: one of the base defining value 889 // relation and one of the base pointer relation! FIXME 890 for (auto item : states) { 891 Value *v = item.first; 892 Value *base = item.second.getBase(); 893 assert(v && base); 894 assert(!isKnownBaseResult(v) && "why did it get added?"); 895 896 if (TraceLSP) { 897 std::string fromstr = 898 cache.count(v) ? (cache[v]->hasName() ? cache[v]->getName() : "") 899 : "none"; 900 errs() << "Updating base value cache" 901 << " for: " << (v->hasName() ? v->getName() : "") 902 << " from: " << fromstr 903 << " to: " << (base->hasName() ? base->getName() : "") << "\n"; 904 } 905 906 assert(isKnownBaseResult(base) && 907 "must be something we 'know' is a base pointer"); 908 if (cache.count(v)) { 909 // Once we transition from the BDV relation being store in the cache to 910 // the base relation being stored, it must be stable 911 assert((!isKnownBaseResult(cache[v]) || cache[v] == base) && 912 "base relation should be stable"); 913 } 914 cache[v] = base; 915 } 916 assert(cache.find(def) != cache.end()); 917 return cache[def]; 918 } 919 920 // For a set of live pointers (base and/or derived), identify the base 921 // pointer of the object which they are derived from. This routine will 922 // mutate the IR graph as needed to make the 'base' pointer live at the 923 // definition site of 'derived'. This ensures that any use of 'derived' can 924 // also use 'base'. This may involve the insertion of a number of 925 // additional PHI nodes. 926 // 927 // preconditions: live is a set of pointer type Values 928 // 929 // side effects: may insert PHI nodes into the existing CFG, will preserve 930 // CFG, will not remove or mutate any existing nodes 931 // 932 // post condition: PointerToBase contains one (derived, base) pair for every 933 // pointer in live. Note that derived can be equal to base if the original 934 // pointer was a base pointer. 935 static void 936 findBasePointers(const StatepointLiveSetTy &live, 937 DenseMap<llvm::Value *, llvm::Value *> &PointerToBase, 938 DominatorTree *DT, DefiningValueMapTy &DVCache) { 939 // For the naming of values inserted to be deterministic - which makes for 940 // much cleaner and more stable tests - we need to assign an order to the 941 // live values. DenseSets do not provide a deterministic order across runs. 942 SmallVector<Value *, 64> Temp; 943 Temp.insert(Temp.end(), live.begin(), live.end()); 944 std::sort(Temp.begin(), Temp.end(), order_by_name); 945 for (Value *ptr : Temp) { 946 Value *base = findBasePointer(ptr, DVCache); 947 assert(base && "failed to find base pointer"); 948 PointerToBase[ptr] = base; 949 assert((!isa<Instruction>(base) || !isa<Instruction>(ptr) || 950 DT->dominates(cast<Instruction>(base)->getParent(), 951 cast<Instruction>(ptr)->getParent())) && 952 "The base we found better dominate the derived pointer"); 953 954 // If you see this trip and like to live really dangerously, the code should 955 // be correct, just with idioms the verifier can't handle. You can try 956 // disabling the verifier at your own substaintial risk. 957 assert(!isa<ConstantPointerNull>(base) && 958 "the relocation code needs adjustment to handle the relocation of " 959 "a null pointer constant without causing false positives in the " 960 "safepoint ir verifier."); 961 } 962 } 963 964 /// Find the required based pointers (and adjust the live set) for the given 965 /// parse point. 966 static void findBasePointers(DominatorTree &DT, DefiningValueMapTy &DVCache, 967 const CallSite &CS, 968 PartiallyConstructedSafepointRecord &result) { 969 DenseMap<llvm::Value *, llvm::Value *> PointerToBase; 970 findBasePointers(result.liveset, PointerToBase, &DT, DVCache); 971 972 if (PrintBasePointers) { 973 // Note: Need to print these in a stable order since this is checked in 974 // some tests. 975 errs() << "Base Pairs (w/o Relocation):\n"; 976 SmallVector<Value *, 64> Temp; 977 Temp.reserve(PointerToBase.size()); 978 for (auto Pair : PointerToBase) { 979 Temp.push_back(Pair.first); 980 } 981 std::sort(Temp.begin(), Temp.end(), order_by_name); 982 for (Value *Ptr : Temp) { 983 Value *Base = PointerToBase[Ptr]; 984 errs() << " derived %" << Ptr->getName() << " base %" << Base->getName() 985 << "\n"; 986 } 987 } 988 989 result.PointerToBase = PointerToBase; 990 } 991 992 /// Given an updated version of the dataflow liveness results, update the 993 /// liveset and base pointer maps for the call site CS. 994 static void recomputeLiveInValues(GCPtrLivenessData &RevisedLivenessData, 995 const CallSite &CS, 996 PartiallyConstructedSafepointRecord &result); 997 998 static void recomputeLiveInValues( 999 Function &F, DominatorTree &DT, Pass *P, ArrayRef<CallSite> toUpdate, 1000 MutableArrayRef<struct PartiallyConstructedSafepointRecord> records) { 1001 // TODO-PERF: reuse the original liveness, then simply run the dataflow 1002 // again. The old values are still live and will help it stablize quickly. 1003 GCPtrLivenessData RevisedLivenessData; 1004 computeLiveInValues(DT, F, RevisedLivenessData); 1005 for (size_t i = 0; i < records.size(); i++) { 1006 struct PartiallyConstructedSafepointRecord &info = records[i]; 1007 const CallSite &CS = toUpdate[i]; 1008 recomputeLiveInValues(RevisedLivenessData, CS, info); 1009 } 1010 } 1011 1012 // When inserting gc.relocate calls, we need to ensure there are no uses 1013 // of the original value between the gc.statepoint and the gc.relocate call. 1014 // One case which can arise is a phi node starting one of the successor blocks. 1015 // We also need to be able to insert the gc.relocates only on the path which 1016 // goes through the statepoint. We might need to split an edge to make this 1017 // possible. 1018 static BasicBlock * 1019 normalizeForInvokeSafepoint(BasicBlock *BB, BasicBlock *InvokeParent, Pass *P) { 1020 DominatorTree *DT = nullptr; 1021 if (auto *DTP = P->getAnalysisIfAvailable<DominatorTreeWrapperPass>()) 1022 DT = &DTP->getDomTree(); 1023 1024 BasicBlock *Ret = BB; 1025 if (!BB->getUniquePredecessor()) { 1026 Ret = SplitBlockPredecessors(BB, InvokeParent, "", nullptr, DT); 1027 } 1028 1029 // Now that 'ret' has unique predecessor we can safely remove all phi nodes 1030 // from it 1031 FoldSingleEntryPHINodes(Ret); 1032 assert(!isa<PHINode>(Ret->begin())); 1033 1034 // At this point, we can safely insert a gc.relocate as the first instruction 1035 // in Ret if needed. 1036 return Ret; 1037 } 1038 1039 static int find_index(ArrayRef<Value *> livevec, Value *val) { 1040 auto itr = std::find(livevec.begin(), livevec.end(), val); 1041 assert(livevec.end() != itr); 1042 size_t index = std::distance(livevec.begin(), itr); 1043 assert(index < livevec.size()); 1044 return index; 1045 } 1046 1047 // Create new attribute set containing only attributes which can be transfered 1048 // from original call to the safepoint. 1049 static AttributeSet legalizeCallAttributes(AttributeSet AS) { 1050 AttributeSet ret; 1051 1052 for (unsigned Slot = 0; Slot < AS.getNumSlots(); Slot++) { 1053 unsigned index = AS.getSlotIndex(Slot); 1054 1055 if (index == AttributeSet::ReturnIndex || 1056 index == AttributeSet::FunctionIndex) { 1057 1058 for (auto it = AS.begin(Slot), it_end = AS.end(Slot); it != it_end; 1059 ++it) { 1060 Attribute attr = *it; 1061 1062 // Do not allow certain attributes - just skip them 1063 // Safepoint can not be read only or read none. 1064 if (attr.hasAttribute(Attribute::ReadNone) || 1065 attr.hasAttribute(Attribute::ReadOnly)) 1066 continue; 1067 1068 ret = ret.addAttributes( 1069 AS.getContext(), index, 1070 AttributeSet::get(AS.getContext(), index, AttrBuilder(attr))); 1071 } 1072 } 1073 1074 // Just skip parameter attributes for now 1075 } 1076 1077 return ret; 1078 } 1079 1080 /// Helper function to place all gc relocates necessary for the given 1081 /// statepoint. 1082 /// Inputs: 1083 /// liveVariables - list of variables to be relocated. 1084 /// liveStart - index of the first live variable. 1085 /// basePtrs - base pointers. 1086 /// statepointToken - statepoint instruction to which relocates should be 1087 /// bound. 1088 /// Builder - Llvm IR builder to be used to construct new calls. 1089 static void CreateGCRelocates(ArrayRef<llvm::Value *> LiveVariables, 1090 const int LiveStart, 1091 ArrayRef<llvm::Value *> BasePtrs, 1092 Instruction *StatepointToken, 1093 IRBuilder<> Builder) { 1094 SmallVector<Instruction *, 64> NewDefs; 1095 NewDefs.reserve(LiveVariables.size()); 1096 1097 Module *M = StatepointToken->getParent()->getParent()->getParent(); 1098 1099 for (unsigned i = 0; i < LiveVariables.size(); i++) { 1100 // We generate a (potentially) unique declaration for every pointer type 1101 // combination. This results is some blow up the function declarations in 1102 // the IR, but removes the need for argument bitcasts which shrinks the IR 1103 // greatly and makes it much more readable. 1104 SmallVector<Type *, 1> Types; // one per 'any' type 1105 // All gc_relocate are set to i8 addrspace(1)* type. This could help avoid 1106 // cases where the actual value's type mangling is not supported by llvm. A 1107 // bitcast is added later to convert gc_relocate to the actual value's type. 1108 Types.push_back(Type::getInt8PtrTy(M->getContext(), 1)); 1109 Value *GCRelocateDecl = Intrinsic::getDeclaration( 1110 M, Intrinsic::experimental_gc_relocate, Types); 1111 1112 // Generate the gc.relocate call and save the result 1113 Value *BaseIdx = 1114 ConstantInt::get(Type::getInt32Ty(M->getContext()), 1115 LiveStart + find_index(LiveVariables, BasePtrs[i])); 1116 Value *LiveIdx = ConstantInt::get( 1117 Type::getInt32Ty(M->getContext()), 1118 LiveStart + find_index(LiveVariables, LiveVariables[i])); 1119 1120 // only specify a debug name if we can give a useful one 1121 Value *Reloc = Builder.CreateCall3( 1122 GCRelocateDecl, StatepointToken, BaseIdx, LiveIdx, 1123 LiveVariables[i]->hasName() ? LiveVariables[i]->getName() + ".relocated" 1124 : ""); 1125 // Trick CodeGen into thinking there are lots of free registers at this 1126 // fake call. 1127 cast<CallInst>(Reloc)->setCallingConv(CallingConv::Cold); 1128 1129 NewDefs.push_back(cast<Instruction>(Reloc)); 1130 } 1131 assert(NewDefs.size() == LiveVariables.size() && 1132 "missing or extra redefinition at safepoint"); 1133 } 1134 1135 static void 1136 makeStatepointExplicitImpl(const CallSite &CS, /* to replace */ 1137 const SmallVectorImpl<llvm::Value *> &basePtrs, 1138 const SmallVectorImpl<llvm::Value *> &liveVariables, 1139 Pass *P, 1140 PartiallyConstructedSafepointRecord &result) { 1141 assert(basePtrs.size() == liveVariables.size()); 1142 assert(isStatepoint(CS) && 1143 "This method expects to be rewriting a statepoint"); 1144 1145 BasicBlock *BB = CS.getInstruction()->getParent(); 1146 assert(BB); 1147 Function *F = BB->getParent(); 1148 assert(F && "must be set"); 1149 Module *M = F->getParent(); 1150 (void)M; 1151 assert(M && "must be set"); 1152 1153 // We're not changing the function signature of the statepoint since the gc 1154 // arguments go into the var args section. 1155 Function *gc_statepoint_decl = CS.getCalledFunction(); 1156 1157 // Then go ahead and use the builder do actually do the inserts. We insert 1158 // immediately before the previous instruction under the assumption that all 1159 // arguments will be available here. We can't insert afterwards since we may 1160 // be replacing a terminator. 1161 Instruction *insertBefore = CS.getInstruction(); 1162 IRBuilder<> Builder(insertBefore); 1163 // Copy all of the arguments from the original statepoint - this includes the 1164 // target, call args, and deopt args 1165 SmallVector<llvm::Value *, 64> args; 1166 args.insert(args.end(), CS.arg_begin(), CS.arg_end()); 1167 // TODO: Clear the 'needs rewrite' flag 1168 1169 // add all the pointers to be relocated (gc arguments) 1170 // Capture the start of the live variable list for use in the gc_relocates 1171 const int live_start = args.size(); 1172 args.insert(args.end(), liveVariables.begin(), liveVariables.end()); 1173 1174 // Create the statepoint given all the arguments 1175 Instruction *token = nullptr; 1176 AttributeSet return_attributes; 1177 if (CS.isCall()) { 1178 CallInst *toReplace = cast<CallInst>(CS.getInstruction()); 1179 CallInst *call = 1180 Builder.CreateCall(gc_statepoint_decl, args, "safepoint_token"); 1181 call->setTailCall(toReplace->isTailCall()); 1182 call->setCallingConv(toReplace->getCallingConv()); 1183 1184 // Currently we will fail on parameter attributes and on certain 1185 // function attributes. 1186 AttributeSet new_attrs = legalizeCallAttributes(toReplace->getAttributes()); 1187 // In case if we can handle this set of sttributes - set up function attrs 1188 // directly on statepoint and return attrs later for gc_result intrinsic. 1189 call->setAttributes(new_attrs.getFnAttributes()); 1190 return_attributes = new_attrs.getRetAttributes(); 1191 1192 token = call; 1193 1194 // Put the following gc_result and gc_relocate calls immediately after the 1195 // the old call (which we're about to delete) 1196 BasicBlock::iterator next(toReplace); 1197 assert(BB->end() != next && "not a terminator, must have next"); 1198 next++; 1199 Instruction *IP = &*(next); 1200 Builder.SetInsertPoint(IP); 1201 Builder.SetCurrentDebugLocation(IP->getDebugLoc()); 1202 1203 } else { 1204 InvokeInst *toReplace = cast<InvokeInst>(CS.getInstruction()); 1205 1206 // Insert the new invoke into the old block. We'll remove the old one in a 1207 // moment at which point this will become the new terminator for the 1208 // original block. 1209 InvokeInst *invoke = InvokeInst::Create( 1210 gc_statepoint_decl, toReplace->getNormalDest(), 1211 toReplace->getUnwindDest(), args, "", toReplace->getParent()); 1212 invoke->setCallingConv(toReplace->getCallingConv()); 1213 1214 // Currently we will fail on parameter attributes and on certain 1215 // function attributes. 1216 AttributeSet new_attrs = legalizeCallAttributes(toReplace->getAttributes()); 1217 // In case if we can handle this set of sttributes - set up function attrs 1218 // directly on statepoint and return attrs later for gc_result intrinsic. 1219 invoke->setAttributes(new_attrs.getFnAttributes()); 1220 return_attributes = new_attrs.getRetAttributes(); 1221 1222 token = invoke; 1223 1224 // Generate gc relocates in exceptional path 1225 BasicBlock *unwindBlock = toReplace->getUnwindDest(); 1226 assert(!isa<PHINode>(unwindBlock->begin()) && 1227 unwindBlock->getUniquePredecessor() && 1228 "can't safely insert in this block!"); 1229 1230 Instruction *IP = &*(unwindBlock->getFirstInsertionPt()); 1231 Builder.SetInsertPoint(IP); 1232 Builder.SetCurrentDebugLocation(toReplace->getDebugLoc()); 1233 1234 // Extract second element from landingpad return value. We will attach 1235 // exceptional gc relocates to it. 1236 const unsigned idx = 1; 1237 Instruction *exceptional_token = 1238 cast<Instruction>(Builder.CreateExtractValue( 1239 unwindBlock->getLandingPadInst(), idx, "relocate_token")); 1240 result.UnwindToken = exceptional_token; 1241 1242 // Just throw away return value. We will use the one we got for normal 1243 // block. 1244 (void)CreateGCRelocates(liveVariables, live_start, basePtrs, 1245 exceptional_token, Builder); 1246 1247 // Generate gc relocates and returns for normal block 1248 BasicBlock *normalDest = toReplace->getNormalDest(); 1249 assert(!isa<PHINode>(normalDest->begin()) && 1250 normalDest->getUniquePredecessor() && 1251 "can't safely insert in this block!"); 1252 1253 IP = &*(normalDest->getFirstInsertionPt()); 1254 Builder.SetInsertPoint(IP); 1255 1256 // gc relocates will be generated later as if it were regular call 1257 // statepoint 1258 } 1259 assert(token); 1260 1261 // Take the name of the original value call if it had one. 1262 token->takeName(CS.getInstruction()); 1263 1264 // The GCResult is already inserted, we just need to find it 1265 #ifndef NDEBUG 1266 Instruction *toReplace = CS.getInstruction(); 1267 assert((toReplace->hasNUses(0) || toReplace->hasNUses(1)) && 1268 "only valid use before rewrite is gc.result"); 1269 assert(!toReplace->hasOneUse() || 1270 isGCResult(cast<Instruction>(*toReplace->user_begin()))); 1271 #endif 1272 1273 // Update the gc.result of the original statepoint (if any) to use the newly 1274 // inserted statepoint. This is safe to do here since the token can't be 1275 // considered a live reference. 1276 CS.getInstruction()->replaceAllUsesWith(token); 1277 1278 result.StatepointToken = token; 1279 1280 // Second, create a gc.relocate for every live variable 1281 CreateGCRelocates(liveVariables, live_start, basePtrs, token, Builder); 1282 } 1283 1284 namespace { 1285 struct name_ordering { 1286 Value *base; 1287 Value *derived; 1288 bool operator()(name_ordering const &a, name_ordering const &b) { 1289 return -1 == a.derived->getName().compare(b.derived->getName()); 1290 } 1291 }; 1292 } 1293 static void stablize_order(SmallVectorImpl<Value *> &basevec, 1294 SmallVectorImpl<Value *> &livevec) { 1295 assert(basevec.size() == livevec.size()); 1296 1297 SmallVector<name_ordering, 64> temp; 1298 for (size_t i = 0; i < basevec.size(); i++) { 1299 name_ordering v; 1300 v.base = basevec[i]; 1301 v.derived = livevec[i]; 1302 temp.push_back(v); 1303 } 1304 std::sort(temp.begin(), temp.end(), name_ordering()); 1305 for (size_t i = 0; i < basevec.size(); i++) { 1306 basevec[i] = temp[i].base; 1307 livevec[i] = temp[i].derived; 1308 } 1309 } 1310 1311 // Replace an existing gc.statepoint with a new one and a set of gc.relocates 1312 // which make the relocations happening at this safepoint explicit. 1313 // 1314 // WARNING: Does not do any fixup to adjust users of the original live 1315 // values. That's the callers responsibility. 1316 static void 1317 makeStatepointExplicit(DominatorTree &DT, const CallSite &CS, Pass *P, 1318 PartiallyConstructedSafepointRecord &result) { 1319 auto liveset = result.liveset; 1320 auto PointerToBase = result.PointerToBase; 1321 1322 // Convert to vector for efficient cross referencing. 1323 SmallVector<Value *, 64> basevec, livevec; 1324 livevec.reserve(liveset.size()); 1325 basevec.reserve(liveset.size()); 1326 for (Value *L : liveset) { 1327 livevec.push_back(L); 1328 1329 assert(PointerToBase.find(L) != PointerToBase.end()); 1330 Value *base = PointerToBase[L]; 1331 basevec.push_back(base); 1332 } 1333 assert(livevec.size() == basevec.size()); 1334 1335 // To make the output IR slightly more stable (for use in diffs), ensure a 1336 // fixed order of the values in the safepoint (by sorting the value name). 1337 // The order is otherwise meaningless. 1338 stablize_order(basevec, livevec); 1339 1340 // Do the actual rewriting and delete the old statepoint 1341 makeStatepointExplicitImpl(CS, basevec, livevec, P, result); 1342 CS.getInstruction()->eraseFromParent(); 1343 } 1344 1345 // Helper function for the relocationViaAlloca. 1346 // It receives iterator to the statepoint gc relocates and emits store to the 1347 // assigned 1348 // location (via allocaMap) for the each one of them. 1349 // Add visited values into the visitedLiveValues set we will later use them 1350 // for sanity check. 1351 static void 1352 insertRelocationStores(iterator_range<Value::user_iterator> GCRelocs, 1353 DenseMap<Value *, Value *> &AllocaMap, 1354 DenseSet<Value *> &VisitedLiveValues) { 1355 1356 for (User *U : GCRelocs) { 1357 if (!isa<IntrinsicInst>(U)) 1358 continue; 1359 1360 IntrinsicInst *RelocatedValue = cast<IntrinsicInst>(U); 1361 1362 // We only care about relocates 1363 if (RelocatedValue->getIntrinsicID() != 1364 Intrinsic::experimental_gc_relocate) { 1365 continue; 1366 } 1367 1368 GCRelocateOperands RelocateOperands(RelocatedValue); 1369 Value *OriginalValue = 1370 const_cast<Value *>(RelocateOperands.getDerivedPtr()); 1371 assert(AllocaMap.count(OriginalValue)); 1372 Value *Alloca = AllocaMap[OriginalValue]; 1373 1374 // Emit store into the related alloca 1375 // All gc_relocate are i8 addrspace(1)* typed, and it must be bitcasted to 1376 // the correct type according to alloca. 1377 assert(RelocatedValue->getNextNode() && "Should always have one since it's not a terminator"); 1378 IRBuilder<> Builder(RelocatedValue->getNextNode()); 1379 Value *CastedRelocatedValue = 1380 Builder.CreateBitCast(RelocatedValue, cast<AllocaInst>(Alloca)->getAllocatedType(), 1381 RelocatedValue->hasName() ? RelocatedValue->getName() + ".casted" : ""); 1382 1383 StoreInst *Store = new StoreInst(CastedRelocatedValue, Alloca); 1384 Store->insertAfter(cast<Instruction>(CastedRelocatedValue)); 1385 1386 #ifndef NDEBUG 1387 VisitedLiveValues.insert(OriginalValue); 1388 #endif 1389 } 1390 } 1391 1392 /// do all the relocation update via allocas and mem2reg 1393 static void relocationViaAlloca( 1394 Function &F, DominatorTree &DT, ArrayRef<Value *> live, 1395 ArrayRef<struct PartiallyConstructedSafepointRecord> records) { 1396 #ifndef NDEBUG 1397 // record initial number of (static) allocas; we'll check we have the same 1398 // number when we get done. 1399 int InitialAllocaNum = 0; 1400 for (auto I = F.getEntryBlock().begin(), E = F.getEntryBlock().end(); I != E; 1401 I++) 1402 if (isa<AllocaInst>(*I)) 1403 InitialAllocaNum++; 1404 #endif 1405 1406 // TODO-PERF: change data structures, reserve 1407 DenseMap<Value *, Value *> allocaMap; 1408 SmallVector<AllocaInst *, 200> PromotableAllocas; 1409 PromotableAllocas.reserve(live.size()); 1410 1411 // emit alloca for each live gc pointer 1412 for (unsigned i = 0; i < live.size(); i++) { 1413 Value *liveValue = live[i]; 1414 AllocaInst *alloca = new AllocaInst(liveValue->getType(), "", 1415 F.getEntryBlock().getFirstNonPHI()); 1416 allocaMap[liveValue] = alloca; 1417 PromotableAllocas.push_back(alloca); 1418 } 1419 1420 // The next two loops are part of the same conceptual operation. We need to 1421 // insert a store to the alloca after the original def and at each 1422 // redefinition. We need to insert a load before each use. These are split 1423 // into distinct loops for performance reasons. 1424 1425 // update gc pointer after each statepoint 1426 // either store a relocated value or null (if no relocated value found for 1427 // this gc pointer and it is not a gc_result) 1428 // this must happen before we update the statepoint with load of alloca 1429 // otherwise we lose the link between statepoint and old def 1430 for (size_t i = 0; i < records.size(); i++) { 1431 const struct PartiallyConstructedSafepointRecord &info = records[i]; 1432 Value *Statepoint = info.StatepointToken; 1433 1434 // This will be used for consistency check 1435 DenseSet<Value *> visitedLiveValues; 1436 1437 // Insert stores for normal statepoint gc relocates 1438 insertRelocationStores(Statepoint->users(), allocaMap, visitedLiveValues); 1439 1440 // In case if it was invoke statepoint 1441 // we will insert stores for exceptional path gc relocates. 1442 if (isa<InvokeInst>(Statepoint)) { 1443 insertRelocationStores(info.UnwindToken->users(), allocaMap, 1444 visitedLiveValues); 1445 } 1446 1447 if (ClobberNonLive) { 1448 // As a debuging aid, pretend that an unrelocated pointer becomes null at 1449 // the gc.statepoint. This will turn some subtle GC problems into 1450 // slightly easier to debug SEGVs. Note that on large IR files with 1451 // lots of gc.statepoints this is extremely costly both memory and time 1452 // wise. 1453 SmallVector<AllocaInst *, 64> ToClobber; 1454 for (auto Pair : allocaMap) { 1455 Value *Def = Pair.first; 1456 AllocaInst *Alloca = cast<AllocaInst>(Pair.second); 1457 1458 // This value was relocated 1459 if (visitedLiveValues.count(Def)) { 1460 continue; 1461 } 1462 ToClobber.push_back(Alloca); 1463 } 1464 1465 auto InsertClobbersAt = [&](Instruction *IP) { 1466 for (auto *AI : ToClobber) { 1467 auto AIType = cast<PointerType>(AI->getType()); 1468 auto PT = cast<PointerType>(AIType->getElementType()); 1469 Constant *CPN = ConstantPointerNull::get(PT); 1470 StoreInst *store = new StoreInst(CPN, AI); 1471 store->insertBefore(IP); 1472 } 1473 }; 1474 1475 // Insert the clobbering stores. These may get intermixed with the 1476 // gc.results and gc.relocates, but that's fine. 1477 if (auto II = dyn_cast<InvokeInst>(Statepoint)) { 1478 InsertClobbersAt(II->getNormalDest()->getFirstInsertionPt()); 1479 InsertClobbersAt(II->getUnwindDest()->getFirstInsertionPt()); 1480 } else { 1481 BasicBlock::iterator Next(cast<CallInst>(Statepoint)); 1482 Next++; 1483 InsertClobbersAt(Next); 1484 } 1485 } 1486 } 1487 // update use with load allocas and add store for gc_relocated 1488 for (auto Pair : allocaMap) { 1489 Value *def = Pair.first; 1490 Value *alloca = Pair.second; 1491 1492 // we pre-record the uses of allocas so that we dont have to worry about 1493 // later update 1494 // that change the user information. 1495 SmallVector<Instruction *, 20> uses; 1496 // PERF: trade a linear scan for repeated reallocation 1497 uses.reserve(std::distance(def->user_begin(), def->user_end())); 1498 for (User *U : def->users()) { 1499 if (!isa<ConstantExpr>(U)) { 1500 // If the def has a ConstantExpr use, then the def is either a 1501 // ConstantExpr use itself or null. In either case 1502 // (recursively in the first, directly in the second), the oop 1503 // it is ultimately dependent on is null and this particular 1504 // use does not need to be fixed up. 1505 uses.push_back(cast<Instruction>(U)); 1506 } 1507 } 1508 1509 std::sort(uses.begin(), uses.end()); 1510 auto last = std::unique(uses.begin(), uses.end()); 1511 uses.erase(last, uses.end()); 1512 1513 for (Instruction *use : uses) { 1514 if (isa<PHINode>(use)) { 1515 PHINode *phi = cast<PHINode>(use); 1516 for (unsigned i = 0; i < phi->getNumIncomingValues(); i++) { 1517 if (def == phi->getIncomingValue(i)) { 1518 LoadInst *load = new LoadInst( 1519 alloca, "", phi->getIncomingBlock(i)->getTerminator()); 1520 phi->setIncomingValue(i, load); 1521 } 1522 } 1523 } else { 1524 LoadInst *load = new LoadInst(alloca, "", use); 1525 use->replaceUsesOfWith(def, load); 1526 } 1527 } 1528 1529 // emit store for the initial gc value 1530 // store must be inserted after load, otherwise store will be in alloca's 1531 // use list and an extra load will be inserted before it 1532 StoreInst *store = new StoreInst(def, alloca); 1533 if (Instruction *inst = dyn_cast<Instruction>(def)) { 1534 if (InvokeInst *invoke = dyn_cast<InvokeInst>(inst)) { 1535 // InvokeInst is a TerminatorInst so the store need to be inserted 1536 // into its normal destination block. 1537 BasicBlock *normalDest = invoke->getNormalDest(); 1538 store->insertBefore(normalDest->getFirstNonPHI()); 1539 } else { 1540 assert(!inst->isTerminator() && 1541 "The only TerminatorInst that can produce a value is " 1542 "InvokeInst which is handled above."); 1543 store->insertAfter(inst); 1544 } 1545 } else { 1546 assert(isa<Argument>(def)); 1547 store->insertAfter(cast<Instruction>(alloca)); 1548 } 1549 } 1550 1551 assert(PromotableAllocas.size() == live.size() && 1552 "we must have the same allocas with lives"); 1553 if (!PromotableAllocas.empty()) { 1554 // apply mem2reg to promote alloca to SSA 1555 PromoteMemToReg(PromotableAllocas, DT); 1556 } 1557 1558 #ifndef NDEBUG 1559 for (auto I = F.getEntryBlock().begin(), E = F.getEntryBlock().end(); I != E; 1560 I++) 1561 if (isa<AllocaInst>(*I)) 1562 InitialAllocaNum--; 1563 assert(InitialAllocaNum == 0 && "We must not introduce any extra allocas"); 1564 #endif 1565 } 1566 1567 /// Implement a unique function which doesn't require we sort the input 1568 /// vector. Doing so has the effect of changing the output of a couple of 1569 /// tests in ways which make them less useful in testing fused safepoints. 1570 template <typename T> static void unique_unsorted(SmallVectorImpl<T> &Vec) { 1571 DenseSet<T> Seen; 1572 SmallVector<T, 128> TempVec; 1573 TempVec.reserve(Vec.size()); 1574 for (auto Element : Vec) 1575 TempVec.push_back(Element); 1576 Vec.clear(); 1577 for (auto V : TempVec) { 1578 if (Seen.insert(V).second) { 1579 Vec.push_back(V); 1580 } 1581 } 1582 } 1583 1584 /// Insert holders so that each Value is obviously live through the entire 1585 /// lifetime of the call. 1586 static void insertUseHolderAfter(CallSite &CS, const ArrayRef<Value *> Values, 1587 SmallVectorImpl<CallInst *> &Holders) { 1588 if (Values.empty()) 1589 // No values to hold live, might as well not insert the empty holder 1590 return; 1591 1592 Module *M = CS.getInstruction()->getParent()->getParent()->getParent(); 1593 // Use a dummy vararg function to actually hold the values live 1594 Function *Func = cast<Function>(M->getOrInsertFunction( 1595 "__tmp_use", FunctionType::get(Type::getVoidTy(M->getContext()), true))); 1596 if (CS.isCall()) { 1597 // For call safepoints insert dummy calls right after safepoint 1598 BasicBlock::iterator Next(CS.getInstruction()); 1599 Next++; 1600 Holders.push_back(CallInst::Create(Func, Values, "", Next)); 1601 return; 1602 } 1603 // For invoke safepooints insert dummy calls both in normal and 1604 // exceptional destination blocks 1605 auto *II = cast<InvokeInst>(CS.getInstruction()); 1606 Holders.push_back(CallInst::Create( 1607 Func, Values, "", II->getNormalDest()->getFirstInsertionPt())); 1608 Holders.push_back(CallInst::Create( 1609 Func, Values, "", II->getUnwindDest()->getFirstInsertionPt())); 1610 } 1611 1612 static void findLiveReferences( 1613 Function &F, DominatorTree &DT, Pass *P, ArrayRef<CallSite> toUpdate, 1614 MutableArrayRef<struct PartiallyConstructedSafepointRecord> records) { 1615 GCPtrLivenessData OriginalLivenessData; 1616 computeLiveInValues(DT, F, OriginalLivenessData); 1617 for (size_t i = 0; i < records.size(); i++) { 1618 struct PartiallyConstructedSafepointRecord &info = records[i]; 1619 const CallSite &CS = toUpdate[i]; 1620 analyzeParsePointLiveness(DT, OriginalLivenessData, CS, info); 1621 } 1622 } 1623 1624 /// Remove any vector of pointers from the liveset by scalarizing them over the 1625 /// statepoint instruction. Adds the scalarized pieces to the liveset. It 1626 /// would be preferrable to include the vector in the statepoint itself, but 1627 /// the lowering code currently does not handle that. Extending it would be 1628 /// slightly non-trivial since it requires a format change. Given how rare 1629 /// such cases are (for the moment?) scalarizing is an acceptable comprimise. 1630 static void splitVectorValues(Instruction *StatepointInst, 1631 StatepointLiveSetTy &LiveSet, DominatorTree &DT) { 1632 SmallVector<Value *, 16> ToSplit; 1633 for (Value *V : LiveSet) 1634 if (isa<VectorType>(V->getType())) 1635 ToSplit.push_back(V); 1636 1637 if (ToSplit.empty()) 1638 return; 1639 1640 Function &F = *(StatepointInst->getParent()->getParent()); 1641 1642 DenseMap<Value *, AllocaInst *> AllocaMap; 1643 // First is normal return, second is exceptional return (invoke only) 1644 DenseMap<Value *, std::pair<Value *, Value *>> Replacements; 1645 for (Value *V : ToSplit) { 1646 LiveSet.erase(V); 1647 1648 AllocaInst *Alloca = 1649 new AllocaInst(V->getType(), "", F.getEntryBlock().getFirstNonPHI()); 1650 AllocaMap[V] = Alloca; 1651 1652 VectorType *VT = cast<VectorType>(V->getType()); 1653 IRBuilder<> Builder(StatepointInst); 1654 SmallVector<Value *, 16> Elements; 1655 for (unsigned i = 0; i < VT->getNumElements(); i++) 1656 Elements.push_back(Builder.CreateExtractElement(V, Builder.getInt32(i))); 1657 LiveSet.insert(Elements.begin(), Elements.end()); 1658 1659 auto InsertVectorReform = [&](Instruction *IP) { 1660 Builder.SetInsertPoint(IP); 1661 Builder.SetCurrentDebugLocation(IP->getDebugLoc()); 1662 Value *ResultVec = UndefValue::get(VT); 1663 for (unsigned i = 0; i < VT->getNumElements(); i++) 1664 ResultVec = Builder.CreateInsertElement(ResultVec, Elements[i], 1665 Builder.getInt32(i)); 1666 return ResultVec; 1667 }; 1668 1669 if (isa<CallInst>(StatepointInst)) { 1670 BasicBlock::iterator Next(StatepointInst); 1671 Next++; 1672 Instruction *IP = &*(Next); 1673 Replacements[V].first = InsertVectorReform(IP); 1674 Replacements[V].second = nullptr; 1675 } else { 1676 InvokeInst *Invoke = cast<InvokeInst>(StatepointInst); 1677 // We've already normalized - check that we don't have shared destination 1678 // blocks 1679 BasicBlock *NormalDest = Invoke->getNormalDest(); 1680 assert(!isa<PHINode>(NormalDest->begin())); 1681 BasicBlock *UnwindDest = Invoke->getUnwindDest(); 1682 assert(!isa<PHINode>(UnwindDest->begin())); 1683 // Insert insert element sequences in both successors 1684 Instruction *IP = &*(NormalDest->getFirstInsertionPt()); 1685 Replacements[V].first = InsertVectorReform(IP); 1686 IP = &*(UnwindDest->getFirstInsertionPt()); 1687 Replacements[V].second = InsertVectorReform(IP); 1688 } 1689 } 1690 for (Value *V : ToSplit) { 1691 AllocaInst *Alloca = AllocaMap[V]; 1692 1693 // Capture all users before we start mutating use lists 1694 SmallVector<Instruction *, 16> Users; 1695 for (User *U : V->users()) 1696 Users.push_back(cast<Instruction>(U)); 1697 1698 for (Instruction *I : Users) { 1699 if (auto Phi = dyn_cast<PHINode>(I)) { 1700 for (unsigned i = 0; i < Phi->getNumIncomingValues(); i++) 1701 if (V == Phi->getIncomingValue(i)) { 1702 LoadInst *Load = new LoadInst( 1703 Alloca, "", Phi->getIncomingBlock(i)->getTerminator()); 1704 Phi->setIncomingValue(i, Load); 1705 } 1706 } else { 1707 LoadInst *Load = new LoadInst(Alloca, "", I); 1708 I->replaceUsesOfWith(V, Load); 1709 } 1710 } 1711 1712 // Store the original value and the replacement value into the alloca 1713 StoreInst *Store = new StoreInst(V, Alloca); 1714 if (auto I = dyn_cast<Instruction>(V)) 1715 Store->insertAfter(I); 1716 else 1717 Store->insertAfter(Alloca); 1718 1719 // Normal return for invoke, or call return 1720 Instruction *Replacement = cast<Instruction>(Replacements[V].first); 1721 (new StoreInst(Replacement, Alloca))->insertAfter(Replacement); 1722 // Unwind return for invoke only 1723 Replacement = cast_or_null<Instruction>(Replacements[V].second); 1724 if (Replacement) 1725 (new StoreInst(Replacement, Alloca))->insertAfter(Replacement); 1726 } 1727 1728 // apply mem2reg to promote alloca to SSA 1729 SmallVector<AllocaInst *, 16> Allocas; 1730 for (Value *V : ToSplit) 1731 Allocas.push_back(AllocaMap[V]); 1732 PromoteMemToReg(Allocas, DT); 1733 } 1734 1735 static bool insertParsePoints(Function &F, DominatorTree &DT, Pass *P, 1736 SmallVectorImpl<CallSite> &toUpdate) { 1737 #ifndef NDEBUG 1738 // sanity check the input 1739 std::set<CallSite> uniqued; 1740 uniqued.insert(toUpdate.begin(), toUpdate.end()); 1741 assert(uniqued.size() == toUpdate.size() && "no duplicates please!"); 1742 1743 for (size_t i = 0; i < toUpdate.size(); i++) { 1744 CallSite &CS = toUpdate[i]; 1745 assert(CS.getInstruction()->getParent()->getParent() == &F); 1746 assert(isStatepoint(CS) && "expected to already be a deopt statepoint"); 1747 } 1748 #endif 1749 1750 // When inserting gc.relocates for invokes, we need to be able to insert at 1751 // the top of the successor blocks. See the comment on 1752 // normalForInvokeSafepoint on exactly what is needed. Note that this step 1753 // may restructure the CFG. 1754 for (CallSite CS : toUpdate) { 1755 if (!CS.isInvoke()) 1756 continue; 1757 InvokeInst *invoke = cast<InvokeInst>(CS.getInstruction()); 1758 normalizeForInvokeSafepoint(invoke->getNormalDest(), invoke->getParent(), 1759 P); 1760 normalizeForInvokeSafepoint(invoke->getUnwindDest(), invoke->getParent(), 1761 P); 1762 } 1763 1764 // A list of dummy calls added to the IR to keep various values obviously 1765 // live in the IR. We'll remove all of these when done. 1766 SmallVector<CallInst *, 64> holders; 1767 1768 // Insert a dummy call with all of the arguments to the vm_state we'll need 1769 // for the actual safepoint insertion. This ensures reference arguments in 1770 // the deopt argument list are considered live through the safepoint (and 1771 // thus makes sure they get relocated.) 1772 for (size_t i = 0; i < toUpdate.size(); i++) { 1773 CallSite &CS = toUpdate[i]; 1774 Statepoint StatepointCS(CS); 1775 1776 SmallVector<Value *, 64> DeoptValues; 1777 for (Use &U : StatepointCS.vm_state_args()) { 1778 Value *Arg = cast<Value>(&U); 1779 assert(!isUnhandledGCPointerType(Arg->getType()) && 1780 "support for FCA unimplemented"); 1781 if (isHandledGCPointerType(Arg->getType())) 1782 DeoptValues.push_back(Arg); 1783 } 1784 insertUseHolderAfter(CS, DeoptValues, holders); 1785 } 1786 1787 SmallVector<struct PartiallyConstructedSafepointRecord, 64> records; 1788 records.reserve(toUpdate.size()); 1789 for (size_t i = 0; i < toUpdate.size(); i++) { 1790 struct PartiallyConstructedSafepointRecord info; 1791 records.push_back(info); 1792 } 1793 assert(records.size() == toUpdate.size()); 1794 1795 // A) Identify all gc pointers which are staticly live at the given call 1796 // site. 1797 findLiveReferences(F, DT, P, toUpdate, records); 1798 1799 // Do a limited scalarization of any live at safepoint vector values which 1800 // contain pointers. This enables this pass to run after vectorization at 1801 // the cost of some possible performance loss. TODO: it would be nice to 1802 // natively support vectors all the way through the backend so we don't need 1803 // to scalarize here. 1804 for (size_t i = 0; i < records.size(); i++) { 1805 struct PartiallyConstructedSafepointRecord &info = records[i]; 1806 Instruction *statepoint = toUpdate[i].getInstruction(); 1807 splitVectorValues(cast<Instruction>(statepoint), info.liveset, DT); 1808 } 1809 1810 // B) Find the base pointers for each live pointer 1811 /* scope for caching */ { 1812 // Cache the 'defining value' relation used in the computation and 1813 // insertion of base phis and selects. This ensures that we don't insert 1814 // large numbers of duplicate base_phis. 1815 DefiningValueMapTy DVCache; 1816 1817 for (size_t i = 0; i < records.size(); i++) { 1818 struct PartiallyConstructedSafepointRecord &info = records[i]; 1819 CallSite &CS = toUpdate[i]; 1820 findBasePointers(DT, DVCache, CS, info); 1821 } 1822 } // end of cache scope 1823 1824 // The base phi insertion logic (for any safepoint) may have inserted new 1825 // instructions which are now live at some safepoint. The simplest such 1826 // example is: 1827 // loop: 1828 // phi a <-- will be a new base_phi here 1829 // safepoint 1 <-- that needs to be live here 1830 // gep a + 1 1831 // safepoint 2 1832 // br loop 1833 // We insert some dummy calls after each safepoint to definitely hold live 1834 // the base pointers which were identified for that safepoint. We'll then 1835 // ask liveness for _every_ base inserted to see what is now live. Then we 1836 // remove the dummy calls. 1837 holders.reserve(holders.size() + records.size()); 1838 for (size_t i = 0; i < records.size(); i++) { 1839 struct PartiallyConstructedSafepointRecord &info = records[i]; 1840 CallSite &CS = toUpdate[i]; 1841 1842 SmallVector<Value *, 128> Bases; 1843 for (auto Pair : info.PointerToBase) { 1844 Bases.push_back(Pair.second); 1845 } 1846 insertUseHolderAfter(CS, Bases, holders); 1847 } 1848 1849 // By selecting base pointers, we've effectively inserted new uses. Thus, we 1850 // need to rerun liveness. We may *also* have inserted new defs, but that's 1851 // not the key issue. 1852 recomputeLiveInValues(F, DT, P, toUpdate, records); 1853 1854 if (PrintBasePointers) { 1855 for (size_t i = 0; i < records.size(); i++) { 1856 struct PartiallyConstructedSafepointRecord &info = records[i]; 1857 errs() << "Base Pairs: (w/Relocation)\n"; 1858 for (auto Pair : info.PointerToBase) { 1859 errs() << " derived %" << Pair.first->getName() << " base %" 1860 << Pair.second->getName() << "\n"; 1861 } 1862 } 1863 } 1864 for (size_t i = 0; i < holders.size(); i++) { 1865 holders[i]->eraseFromParent(); 1866 holders[i] = nullptr; 1867 } 1868 holders.clear(); 1869 1870 // Now run through and replace the existing statepoints with new ones with 1871 // the live variables listed. We do not yet update uses of the values being 1872 // relocated. We have references to live variables that need to 1873 // survive to the last iteration of this loop. (By construction, the 1874 // previous statepoint can not be a live variable, thus we can and remove 1875 // the old statepoint calls as we go.) 1876 for (size_t i = 0; i < records.size(); i++) { 1877 struct PartiallyConstructedSafepointRecord &info = records[i]; 1878 CallSite &CS = toUpdate[i]; 1879 makeStatepointExplicit(DT, CS, P, info); 1880 } 1881 toUpdate.clear(); // prevent accident use of invalid CallSites 1882 1883 // Do all the fixups of the original live variables to their relocated selves 1884 SmallVector<Value *, 128> live; 1885 for (size_t i = 0; i < records.size(); i++) { 1886 struct PartiallyConstructedSafepointRecord &info = records[i]; 1887 // We can't simply save the live set from the original insertion. One of 1888 // the live values might be the result of a call which needs a safepoint. 1889 // That Value* no longer exists and we need to use the new gc_result. 1890 // Thankfully, the liveset is embedded in the statepoint (and updated), so 1891 // we just grab that. 1892 Statepoint statepoint(info.StatepointToken); 1893 live.insert(live.end(), statepoint.gc_args_begin(), 1894 statepoint.gc_args_end()); 1895 #ifndef NDEBUG 1896 // Do some basic sanity checks on our liveness results before performing 1897 // relocation. Relocation can and will turn mistakes in liveness results 1898 // into non-sensical code which is must harder to debug. 1899 // TODO: It would be nice to test consistency as well 1900 assert(DT.isReachableFromEntry(info.StatepointToken->getParent()) && 1901 "statepoint must be reachable or liveness is meaningless"); 1902 for (Value *V : statepoint.gc_args()) { 1903 if (!isa<Instruction>(V)) 1904 // Non-instruction values trivial dominate all possible uses 1905 continue; 1906 auto LiveInst = cast<Instruction>(V); 1907 assert(DT.isReachableFromEntry(LiveInst->getParent()) && 1908 "unreachable values should never be live"); 1909 assert(DT.dominates(LiveInst, info.StatepointToken) && 1910 "basic SSA liveness expectation violated by liveness analysis"); 1911 } 1912 #endif 1913 } 1914 unique_unsorted(live); 1915 1916 #ifndef NDEBUG 1917 // sanity check 1918 for (auto ptr : live) { 1919 assert(isGCPointerType(ptr->getType()) && "must be a gc pointer type"); 1920 } 1921 #endif 1922 1923 relocationViaAlloca(F, DT, live, records); 1924 return !records.empty(); 1925 } 1926 1927 /// Returns true if this function should be rewritten by this pass. The main 1928 /// point of this function is as an extension point for custom logic. 1929 static bool shouldRewriteStatepointsIn(Function &F) { 1930 // TODO: This should check the GCStrategy 1931 if (F.hasGC()) { 1932 const std::string StatepointExampleName("statepoint-example"); 1933 return StatepointExampleName == F.getGC(); 1934 } else 1935 return false; 1936 } 1937 1938 bool RewriteStatepointsForGC::runOnFunction(Function &F) { 1939 // Nothing to do for declarations. 1940 if (F.isDeclaration() || F.empty()) 1941 return false; 1942 1943 // Policy choice says not to rewrite - the most common reason is that we're 1944 // compiling code without a GCStrategy. 1945 if (!shouldRewriteStatepointsIn(F)) 1946 return false; 1947 1948 DominatorTree &DT = getAnalysis<DominatorTreeWrapperPass>().getDomTree(); 1949 1950 // Gather all the statepoints which need rewritten. Be careful to only 1951 // consider those in reachable code since we need to ask dominance queries 1952 // when rewriting. We'll delete the unreachable ones in a moment. 1953 SmallVector<CallSite, 64> ParsePointNeeded; 1954 bool HasUnreachableStatepoint = false; 1955 for (Instruction &I : inst_range(F)) { 1956 // TODO: only the ones with the flag set! 1957 if (isStatepoint(I)) { 1958 if (DT.isReachableFromEntry(I.getParent())) 1959 ParsePointNeeded.push_back(CallSite(&I)); 1960 else 1961 HasUnreachableStatepoint = true; 1962 } 1963 } 1964 1965 bool MadeChange = false; 1966 1967 // Delete any unreachable statepoints so that we don't have unrewritten 1968 // statepoints surviving this pass. This makes testing easier and the 1969 // resulting IR less confusing to human readers. Rather than be fancy, we 1970 // just reuse a utility function which removes the unreachable blocks. 1971 if (HasUnreachableStatepoint) 1972 MadeChange |= removeUnreachableBlocks(F); 1973 1974 // Return early if no work to do. 1975 if (ParsePointNeeded.empty()) 1976 return MadeChange; 1977 1978 // As a prepass, go ahead and aggressively destroy single entry phi nodes. 1979 // These are created by LCSSA. They have the effect of increasing the size 1980 // of liveness sets for no good reason. It may be harder to do this post 1981 // insertion since relocations and base phis can confuse things. 1982 for (BasicBlock &BB : F) 1983 if (BB.getUniquePredecessor()) { 1984 MadeChange = true; 1985 FoldSingleEntryPHINodes(&BB); 1986 } 1987 1988 MadeChange |= insertParsePoints(F, DT, this, ParsePointNeeded); 1989 return MadeChange; 1990 } 1991 1992 // liveness computation via standard dataflow 1993 // ------------------------------------------------------------------- 1994 1995 // TODO: Consider using bitvectors for liveness, the set of potentially 1996 // interesting values should be small and easy to pre-compute. 1997 1998 /// Compute the live-in set for the location rbegin starting from 1999 /// the live-out set of the basic block 2000 static void computeLiveInValues(BasicBlock::reverse_iterator rbegin, 2001 BasicBlock::reverse_iterator rend, 2002 DenseSet<Value *> &LiveTmp) { 2003 2004 for (BasicBlock::reverse_iterator ritr = rbegin; ritr != rend; ritr++) { 2005 Instruction *I = &*ritr; 2006 2007 // KILL/Def - Remove this definition from LiveIn 2008 LiveTmp.erase(I); 2009 2010 // Don't consider *uses* in PHI nodes, we handle their contribution to 2011 // predecessor blocks when we seed the LiveOut sets 2012 if (isa<PHINode>(I)) 2013 continue; 2014 2015 // USE - Add to the LiveIn set for this instruction 2016 for (Value *V : I->operands()) { 2017 assert(!isUnhandledGCPointerType(V->getType()) && 2018 "support for FCA unimplemented"); 2019 if (isHandledGCPointerType(V->getType()) && !isa<Constant>(V)) { 2020 // The choice to exclude all things constant here is slightly subtle. 2021 // There are two idependent reasons: 2022 // - We assume that things which are constant (from LLVM's definition) 2023 // do not move at runtime. For example, the address of a global 2024 // variable is fixed, even though it's contents may not be. 2025 // - Second, we can't disallow arbitrary inttoptr constants even 2026 // if the language frontend does. Optimization passes are free to 2027 // locally exploit facts without respect to global reachability. This 2028 // can create sections of code which are dynamically unreachable and 2029 // contain just about anything. (see constants.ll in tests) 2030 LiveTmp.insert(V); 2031 } 2032 } 2033 } 2034 } 2035 2036 static void computeLiveOutSeed(BasicBlock *BB, DenseSet<Value *> &LiveTmp) { 2037 2038 for (BasicBlock *Succ : successors(BB)) { 2039 const BasicBlock::iterator E(Succ->getFirstNonPHI()); 2040 for (BasicBlock::iterator I = Succ->begin(); I != E; I++) { 2041 PHINode *Phi = cast<PHINode>(&*I); 2042 Value *V = Phi->getIncomingValueForBlock(BB); 2043 assert(!isUnhandledGCPointerType(V->getType()) && 2044 "support for FCA unimplemented"); 2045 if (isHandledGCPointerType(V->getType()) && !isa<Constant>(V)) { 2046 LiveTmp.insert(V); 2047 } 2048 } 2049 } 2050 } 2051 2052 static DenseSet<Value *> computeKillSet(BasicBlock *BB) { 2053 DenseSet<Value *> KillSet; 2054 for (Instruction &I : *BB) 2055 if (isHandledGCPointerType(I.getType())) 2056 KillSet.insert(&I); 2057 return KillSet; 2058 } 2059 2060 #ifndef NDEBUG 2061 /// Check that the items in 'Live' dominate 'TI'. This is used as a basic 2062 /// sanity check for the liveness computation. 2063 static void checkBasicSSA(DominatorTree &DT, DenseSet<Value *> &Live, 2064 TerminatorInst *TI, bool TermOkay = false) { 2065 for (Value *V : Live) { 2066 if (auto *I = dyn_cast<Instruction>(V)) { 2067 // The terminator can be a member of the LiveOut set. LLVM's definition 2068 // of instruction dominance states that V does not dominate itself. As 2069 // such, we need to special case this to allow it. 2070 if (TermOkay && TI == I) 2071 continue; 2072 assert(DT.dominates(I, TI) && 2073 "basic SSA liveness expectation violated by liveness analysis"); 2074 } 2075 } 2076 } 2077 2078 /// Check that all the liveness sets used during the computation of liveness 2079 /// obey basic SSA properties. This is useful for finding cases where we miss 2080 /// a def. 2081 static void checkBasicSSA(DominatorTree &DT, GCPtrLivenessData &Data, 2082 BasicBlock &BB) { 2083 checkBasicSSA(DT, Data.LiveSet[&BB], BB.getTerminator()); 2084 checkBasicSSA(DT, Data.LiveOut[&BB], BB.getTerminator(), true); 2085 checkBasicSSA(DT, Data.LiveIn[&BB], BB.getTerminator()); 2086 } 2087 #endif 2088 2089 static void computeLiveInValues(DominatorTree &DT, Function &F, 2090 GCPtrLivenessData &Data) { 2091 2092 SmallSetVector<BasicBlock *, 200> Worklist; 2093 auto AddPredsToWorklist = [&](BasicBlock *BB) { 2094 // We use a SetVector so that we don't have duplicates in the worklist. 2095 Worklist.insert(pred_begin(BB), pred_end(BB)); 2096 }; 2097 auto NextItem = [&]() { 2098 BasicBlock *BB = Worklist.back(); 2099 Worklist.pop_back(); 2100 return BB; 2101 }; 2102 2103 // Seed the liveness for each individual block 2104 for (BasicBlock &BB : F) { 2105 Data.KillSet[&BB] = computeKillSet(&BB); 2106 Data.LiveSet[&BB].clear(); 2107 computeLiveInValues(BB.rbegin(), BB.rend(), Data.LiveSet[&BB]); 2108 2109 #ifndef NDEBUG 2110 for (Value *Kill : Data.KillSet[&BB]) 2111 assert(!Data.LiveSet[&BB].count(Kill) && "live set contains kill"); 2112 #endif 2113 2114 Data.LiveOut[&BB] = DenseSet<Value *>(); 2115 computeLiveOutSeed(&BB, Data.LiveOut[&BB]); 2116 Data.LiveIn[&BB] = Data.LiveSet[&BB]; 2117 set_union(Data.LiveIn[&BB], Data.LiveOut[&BB]); 2118 set_subtract(Data.LiveIn[&BB], Data.KillSet[&BB]); 2119 if (!Data.LiveIn[&BB].empty()) 2120 AddPredsToWorklist(&BB); 2121 } 2122 2123 // Propagate that liveness until stable 2124 while (!Worklist.empty()) { 2125 BasicBlock *BB = NextItem(); 2126 2127 // Compute our new liveout set, then exit early if it hasn't changed 2128 // despite the contribution of our successor. 2129 DenseSet<Value *> LiveOut = Data.LiveOut[BB]; 2130 const auto OldLiveOutSize = LiveOut.size(); 2131 for (BasicBlock *Succ : successors(BB)) { 2132 assert(Data.LiveIn.count(Succ)); 2133 set_union(LiveOut, Data.LiveIn[Succ]); 2134 } 2135 // assert OutLiveOut is a subset of LiveOut 2136 if (OldLiveOutSize == LiveOut.size()) { 2137 // If the sets are the same size, then we didn't actually add anything 2138 // when unioning our successors LiveIn Thus, the LiveIn of this block 2139 // hasn't changed. 2140 continue; 2141 } 2142 Data.LiveOut[BB] = LiveOut; 2143 2144 // Apply the effects of this basic block 2145 DenseSet<Value *> LiveTmp = LiveOut; 2146 set_union(LiveTmp, Data.LiveSet[BB]); 2147 set_subtract(LiveTmp, Data.KillSet[BB]); 2148 2149 assert(Data.LiveIn.count(BB)); 2150 const DenseSet<Value *> &OldLiveIn = Data.LiveIn[BB]; 2151 // assert: OldLiveIn is a subset of LiveTmp 2152 if (OldLiveIn.size() != LiveTmp.size()) { 2153 Data.LiveIn[BB] = LiveTmp; 2154 AddPredsToWorklist(BB); 2155 } 2156 } // while( !worklist.empty() ) 2157 2158 #ifndef NDEBUG 2159 // Sanity check our ouput against SSA properties. This helps catch any 2160 // missing kills during the above iteration. 2161 for (BasicBlock &BB : F) { 2162 checkBasicSSA(DT, Data, BB); 2163 } 2164 #endif 2165 } 2166 2167 static void findLiveSetAtInst(Instruction *Inst, GCPtrLivenessData &Data, 2168 StatepointLiveSetTy &Out) { 2169 2170 BasicBlock *BB = Inst->getParent(); 2171 2172 // Note: The copy is intentional and required 2173 assert(Data.LiveOut.count(BB)); 2174 DenseSet<Value *> LiveOut = Data.LiveOut[BB]; 2175 2176 // We want to handle the statepoint itself oddly. It's 2177 // call result is not live (normal), nor are it's arguments 2178 // (unless they're used again later). This adjustment is 2179 // specifically what we need to relocate 2180 BasicBlock::reverse_iterator rend(Inst); 2181 computeLiveInValues(BB->rbegin(), rend, LiveOut); 2182 LiveOut.erase(Inst); 2183 Out.insert(LiveOut.begin(), LiveOut.end()); 2184 } 2185 2186 static void recomputeLiveInValues(GCPtrLivenessData &RevisedLivenessData, 2187 const CallSite &CS, 2188 PartiallyConstructedSafepointRecord &Info) { 2189 Instruction *Inst = CS.getInstruction(); 2190 StatepointLiveSetTy Updated; 2191 findLiveSetAtInst(Inst, RevisedLivenessData, Updated); 2192 2193 #ifndef NDEBUG 2194 DenseSet<Value *> Bases; 2195 for (auto KVPair : Info.PointerToBase) { 2196 Bases.insert(KVPair.second); 2197 } 2198 #endif 2199 // We may have base pointers which are now live that weren't before. We need 2200 // to update the PointerToBase structure to reflect this. 2201 for (auto V : Updated) 2202 if (!Info.PointerToBase.count(V)) { 2203 assert(Bases.count(V) && "can't find base for unexpected live value"); 2204 Info.PointerToBase[V] = V; 2205 continue; 2206 } 2207 2208 #ifndef NDEBUG 2209 for (auto V : Updated) { 2210 assert(Info.PointerToBase.count(V) && 2211 "must be able to find base for live value"); 2212 } 2213 #endif 2214 2215 // Remove any stale base mappings - this can happen since our liveness is 2216 // more precise then the one inherent in the base pointer analysis 2217 DenseSet<Value *> ToErase; 2218 for (auto KVPair : Info.PointerToBase) 2219 if (!Updated.count(KVPair.first)) 2220 ToErase.insert(KVPair.first); 2221 for (auto V : ToErase) 2222 Info.PointerToBase.erase(V); 2223 2224 #ifndef NDEBUG 2225 for (auto KVPair : Info.PointerToBase) 2226 assert(Updated.count(KVPair.first) && "record for non-live value"); 2227 #endif 2228 2229 Info.liveset = Updated; 2230 } 2231