1 //===- RewriteStatepointsForGC.cpp - Make GC relocations explicit ---------===// 2 // 3 // The LLVM Compiler Infrastructure 4 // 5 // This file is distributed under the University of Illinois Open Source 6 // License. See LICENSE.TXT for details. 7 // 8 //===----------------------------------------------------------------------===// 9 // 10 // Rewrite an existing set of gc.statepoints such that they make potential 11 // relocations performed by the garbage collector explicit in the IR. 12 // 13 //===----------------------------------------------------------------------===// 14 15 #include "llvm/Pass.h" 16 #include "llvm/Analysis/CFG.h" 17 #include "llvm/Analysis/TargetTransformInfo.h" 18 #include "llvm/ADT/SetOperations.h" 19 #include "llvm/ADT/Statistic.h" 20 #include "llvm/ADT/DenseSet.h" 21 #include "llvm/ADT/SetVector.h" 22 #include "llvm/ADT/StringRef.h" 23 #include "llvm/IR/BasicBlock.h" 24 #include "llvm/IR/CallSite.h" 25 #include "llvm/IR/Dominators.h" 26 #include "llvm/IR/Function.h" 27 #include "llvm/IR/IRBuilder.h" 28 #include "llvm/IR/InstIterator.h" 29 #include "llvm/IR/Instructions.h" 30 #include "llvm/IR/Intrinsics.h" 31 #include "llvm/IR/IntrinsicInst.h" 32 #include "llvm/IR/Module.h" 33 #include "llvm/IR/Statepoint.h" 34 #include "llvm/IR/Value.h" 35 #include "llvm/IR/Verifier.h" 36 #include "llvm/Support/Debug.h" 37 #include "llvm/Support/CommandLine.h" 38 #include "llvm/Transforms/Scalar.h" 39 #include "llvm/Transforms/Utils/BasicBlockUtils.h" 40 #include "llvm/Transforms/Utils/Cloning.h" 41 #include "llvm/Transforms/Utils/Local.h" 42 #include "llvm/Transforms/Utils/PromoteMemToReg.h" 43 44 #define DEBUG_TYPE "rewrite-statepoints-for-gc" 45 46 using namespace llvm; 47 48 // Print tracing output 49 static cl::opt<bool> TraceLSP("trace-rewrite-statepoints", cl::Hidden, 50 cl::init(false)); 51 52 // Print the liveset found at the insert location 53 static cl::opt<bool> PrintLiveSet("spp-print-liveset", cl::Hidden, 54 cl::init(false)); 55 static cl::opt<bool> PrintLiveSetSize("spp-print-liveset-size", cl::Hidden, 56 cl::init(false)); 57 // Print out the base pointers for debugging 58 static cl::opt<bool> PrintBasePointers("spp-print-base-pointers", cl::Hidden, 59 cl::init(false)); 60 61 // Cost threshold measuring when it is profitable to rematerialize value instead 62 // of relocating it 63 static cl::opt<unsigned> 64 RematerializationThreshold("spp-rematerialization-threshold", cl::Hidden, 65 cl::init(6)); 66 67 #ifdef XDEBUG 68 static bool ClobberNonLive = true; 69 #else 70 static bool ClobberNonLive = false; 71 #endif 72 static cl::opt<bool, true> ClobberNonLiveOverride("rs4gc-clobber-non-live", 73 cl::location(ClobberNonLive), 74 cl::Hidden); 75 76 namespace { 77 struct RewriteStatepointsForGC : public FunctionPass { 78 static char ID; // Pass identification, replacement for typeid 79 80 RewriteStatepointsForGC() : FunctionPass(ID) { 81 initializeRewriteStatepointsForGCPass(*PassRegistry::getPassRegistry()); 82 } 83 bool runOnFunction(Function &F) override; 84 85 void getAnalysisUsage(AnalysisUsage &AU) const override { 86 // We add and rewrite a bunch of instructions, but don't really do much 87 // else. We could in theory preserve a lot more analyses here. 88 AU.addRequired<DominatorTreeWrapperPass>(); 89 AU.addRequired<TargetTransformInfoWrapperPass>(); 90 } 91 }; 92 } // namespace 93 94 char RewriteStatepointsForGC::ID = 0; 95 96 FunctionPass *llvm::createRewriteStatepointsForGCPass() { 97 return new RewriteStatepointsForGC(); 98 } 99 100 INITIALIZE_PASS_BEGIN(RewriteStatepointsForGC, "rewrite-statepoints-for-gc", 101 "Make relocations explicit at statepoints", false, false) 102 INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass) 103 INITIALIZE_PASS_END(RewriteStatepointsForGC, "rewrite-statepoints-for-gc", 104 "Make relocations explicit at statepoints", false, false) 105 106 namespace { 107 struct GCPtrLivenessData { 108 /// Values defined in this block. 109 DenseMap<BasicBlock *, DenseSet<Value *>> KillSet; 110 /// Values used in this block (and thus live); does not included values 111 /// killed within this block. 112 DenseMap<BasicBlock *, DenseSet<Value *>> LiveSet; 113 114 /// Values live into this basic block (i.e. used by any 115 /// instruction in this basic block or ones reachable from here) 116 DenseMap<BasicBlock *, DenseSet<Value *>> LiveIn; 117 118 /// Values live out of this basic block (i.e. live into 119 /// any successor block) 120 DenseMap<BasicBlock *, DenseSet<Value *>> LiveOut; 121 }; 122 123 // The type of the internal cache used inside the findBasePointers family 124 // of functions. From the callers perspective, this is an opaque type and 125 // should not be inspected. 126 // 127 // In the actual implementation this caches two relations: 128 // - The base relation itself (i.e. this pointer is based on that one) 129 // - The base defining value relation (i.e. before base_phi insertion) 130 // Generally, after the execution of a full findBasePointer call, only the 131 // base relation will remain. Internally, we add a mixture of the two 132 // types, then update all the second type to the first type 133 typedef DenseMap<Value *, Value *> DefiningValueMapTy; 134 typedef DenseSet<llvm::Value *> StatepointLiveSetTy; 135 typedef DenseMap<Instruction *, Value *> RematerializedValueMapTy; 136 137 struct PartiallyConstructedSafepointRecord { 138 /// The set of values known to be live accross this safepoint 139 StatepointLiveSetTy liveset; 140 141 /// Mapping from live pointers to a base-defining-value 142 DenseMap<llvm::Value *, llvm::Value *> PointerToBase; 143 144 /// The *new* gc.statepoint instruction itself. This produces the token 145 /// that normal path gc.relocates and the gc.result are tied to. 146 Instruction *StatepointToken; 147 148 /// Instruction to which exceptional gc relocates are attached 149 /// Makes it easier to iterate through them during relocationViaAlloca. 150 Instruction *UnwindToken; 151 152 /// Record live values we are rematerialized instead of relocating. 153 /// They are not included into 'liveset' field. 154 /// Maps rematerialized copy to it's original value. 155 RematerializedValueMapTy RematerializedValues; 156 }; 157 } 158 159 /// Compute the live-in set for every basic block in the function 160 static void computeLiveInValues(DominatorTree &DT, Function &F, 161 GCPtrLivenessData &Data); 162 163 /// Given results from the dataflow liveness computation, find the set of live 164 /// Values at a particular instruction. 165 static void findLiveSetAtInst(Instruction *inst, GCPtrLivenessData &Data, 166 StatepointLiveSetTy &out); 167 168 // TODO: Once we can get to the GCStrategy, this becomes 169 // Optional<bool> isGCManagedPointer(const Value *V) const override { 170 171 static bool isGCPointerType(const Type *T) { 172 if (const PointerType *PT = dyn_cast<PointerType>(T)) 173 // For the sake of this example GC, we arbitrarily pick addrspace(1) as our 174 // GC managed heap. We know that a pointer into this heap needs to be 175 // updated and that no other pointer does. 176 return (1 == PT->getAddressSpace()); 177 return false; 178 } 179 180 // Return true if this type is one which a) is a gc pointer or contains a GC 181 // pointer and b) is of a type this code expects to encounter as a live value. 182 // (The insertion code will assert that a type which matches (a) and not (b) 183 // is not encountered.) 184 static bool isHandledGCPointerType(Type *T) { 185 // We fully support gc pointers 186 if (isGCPointerType(T)) 187 return true; 188 // We partially support vectors of gc pointers. The code will assert if it 189 // can't handle something. 190 if (auto VT = dyn_cast<VectorType>(T)) 191 if (isGCPointerType(VT->getElementType())) 192 return true; 193 return false; 194 } 195 196 #ifndef NDEBUG 197 /// Returns true if this type contains a gc pointer whether we know how to 198 /// handle that type or not. 199 static bool containsGCPtrType(Type *Ty) { 200 if (isGCPointerType(Ty)) 201 return true; 202 if (VectorType *VT = dyn_cast<VectorType>(Ty)) 203 return isGCPointerType(VT->getScalarType()); 204 if (ArrayType *AT = dyn_cast<ArrayType>(Ty)) 205 return containsGCPtrType(AT->getElementType()); 206 if (StructType *ST = dyn_cast<StructType>(Ty)) 207 return std::any_of( 208 ST->subtypes().begin(), ST->subtypes().end(), 209 [](Type *SubType) { return containsGCPtrType(SubType); }); 210 return false; 211 } 212 213 // Returns true if this is a type which a) is a gc pointer or contains a GC 214 // pointer and b) is of a type which the code doesn't expect (i.e. first class 215 // aggregates). Used to trip assertions. 216 static bool isUnhandledGCPointerType(Type *Ty) { 217 return containsGCPtrType(Ty) && !isHandledGCPointerType(Ty); 218 } 219 #endif 220 221 static bool order_by_name(llvm::Value *a, llvm::Value *b) { 222 if (a->hasName() && b->hasName()) { 223 return -1 == a->getName().compare(b->getName()); 224 } else if (a->hasName() && !b->hasName()) { 225 return true; 226 } else if (!a->hasName() && b->hasName()) { 227 return false; 228 } else { 229 // Better than nothing, but not stable 230 return a < b; 231 } 232 } 233 234 // Conservatively identifies any definitions which might be live at the 235 // given instruction. The analysis is performed immediately before the 236 // given instruction. Values defined by that instruction are not considered 237 // live. Values used by that instruction are considered live. 238 static void analyzeParsePointLiveness( 239 DominatorTree &DT, GCPtrLivenessData &OriginalLivenessData, 240 const CallSite &CS, PartiallyConstructedSafepointRecord &result) { 241 Instruction *inst = CS.getInstruction(); 242 243 StatepointLiveSetTy liveset; 244 findLiveSetAtInst(inst, OriginalLivenessData, liveset); 245 246 if (PrintLiveSet) { 247 // Note: This output is used by several of the test cases 248 // The order of elemtns in a set is not stable, put them in a vec and sort 249 // by name 250 SmallVector<Value *, 64> temp; 251 temp.insert(temp.end(), liveset.begin(), liveset.end()); 252 std::sort(temp.begin(), temp.end(), order_by_name); 253 errs() << "Live Variables:\n"; 254 for (Value *V : temp) { 255 errs() << " " << V->getName(); // no newline 256 V->dump(); 257 } 258 } 259 if (PrintLiveSetSize) { 260 errs() << "Safepoint For: " << CS.getCalledValue()->getName() << "\n"; 261 errs() << "Number live values: " << liveset.size() << "\n"; 262 } 263 result.liveset = liveset; 264 } 265 266 static Value *findBaseDefiningValue(Value *I); 267 268 /// If we can trivially determine that the index specified in the given vector 269 /// is a base pointer, return it. In cases where the entire vector is known to 270 /// consist of base pointers, the entire vector will be returned. This 271 /// indicates that the relevant extractelement is a valid base pointer and 272 /// should be used directly. 273 static Value *findBaseOfVector(Value *I, Value *Index) { 274 assert(I->getType()->isVectorTy() && 275 cast<VectorType>(I->getType())->getElementType()->isPointerTy() && 276 "Illegal to ask for the base pointer of a non-pointer type"); 277 278 // Each case parallels findBaseDefiningValue below, see that code for 279 // detailed motivation. 280 281 if (isa<Argument>(I)) 282 // An incoming argument to the function is a base pointer 283 return I; 284 285 // We shouldn't see the address of a global as a vector value? 286 assert(!isa<GlobalVariable>(I) && 287 "unexpected global variable found in base of vector"); 288 289 // inlining could possibly introduce phi node that contains 290 // undef if callee has multiple returns 291 if (isa<UndefValue>(I)) 292 // utterly meaningless, but useful for dealing with partially optimized 293 // code. 294 return I; 295 296 // Due to inheritance, this must be _after_ the global variable and undef 297 // checks 298 if (Constant *Con = dyn_cast<Constant>(I)) { 299 assert(!isa<GlobalVariable>(I) && !isa<UndefValue>(I) && 300 "order of checks wrong!"); 301 assert(Con->isNullValue() && "null is the only case which makes sense"); 302 return Con; 303 } 304 305 if (isa<LoadInst>(I)) 306 return I; 307 308 // For an insert element, we might be able to look through it if we know 309 // something about the indexes, but if the indices are arbitrary values, we 310 // can't without much more extensive scalarization. 311 if (InsertElementInst *IEI = dyn_cast<InsertElementInst>(I)) { 312 Value *InsertIndex = IEI->getOperand(2); 313 // This index is inserting the value, look for it's base 314 if (InsertIndex == Index) 315 return findBaseDefiningValue(IEI->getOperand(1)); 316 // Both constant, and can't be equal per above. This insert is definitely 317 // not relevant, look back at the rest of the vector and keep trying. 318 if (isa<ConstantInt>(Index) && isa<ConstantInt>(InsertIndex)) 319 return findBaseOfVector(IEI->getOperand(0), Index); 320 } 321 322 // Note: This code is currently rather incomplete. We are essentially only 323 // handling cases where the vector element is trivially a base pointer. We 324 // need to update the entire base pointer construction algorithm to know how 325 // to track vector elements and potentially scalarize, but the case which 326 // would motivate the work hasn't shown up in real workloads yet. 327 llvm_unreachable("no base found for vector element"); 328 } 329 330 /// Helper function for findBasePointer - Will return a value which either a) 331 /// defines the base pointer for the input or b) blocks the simple search 332 /// (i.e. a PHI or Select of two derived pointers) 333 static Value *findBaseDefiningValue(Value *I) { 334 assert(I->getType()->isPointerTy() && 335 "Illegal to ask for the base pointer of a non-pointer type"); 336 337 // This case is a bit of a hack - it only handles extracts from vectors which 338 // trivially contain only base pointers or cases where we can directly match 339 // the index of the original extract element to an insertion into the vector. 340 // See note inside the function for how to improve this. 341 if (auto *EEI = dyn_cast<ExtractElementInst>(I)) { 342 Value *VectorOperand = EEI->getVectorOperand(); 343 Value *Index = EEI->getIndexOperand(); 344 Value *VectorBase = findBaseOfVector(VectorOperand, Index); 345 // If the result returned is a vector, we know the entire vector must 346 // contain base pointers. In that case, the extractelement is a valid base 347 // for this value. 348 if (VectorBase->getType()->isVectorTy()) 349 return EEI; 350 // Otherwise, we needed to look through the vector to find the base for 351 // this particular element. 352 assert(VectorBase->getType()->isPointerTy()); 353 return VectorBase; 354 } 355 356 if (isa<Argument>(I)) 357 // An incoming argument to the function is a base pointer 358 // We should have never reached here if this argument isn't an gc value 359 return I; 360 361 if (isa<GlobalVariable>(I)) 362 // base case 363 return I; 364 365 // inlining could possibly introduce phi node that contains 366 // undef if callee has multiple returns 367 if (isa<UndefValue>(I)) 368 // utterly meaningless, but useful for dealing with 369 // partially optimized code. 370 return I; 371 372 // Due to inheritance, this must be _after_ the global variable and undef 373 // checks 374 if (Constant *Con = dyn_cast<Constant>(I)) { 375 assert(!isa<GlobalVariable>(I) && !isa<UndefValue>(I) && 376 "order of checks wrong!"); 377 // Note: Finding a constant base for something marked for relocation 378 // doesn't really make sense. The most likely case is either a) some 379 // screwed up the address space usage or b) your validating against 380 // compiled C++ code w/o the proper separation. The only real exception 381 // is a null pointer. You could have generic code written to index of 382 // off a potentially null value and have proven it null. We also use 383 // null pointers in dead paths of relocation phis (which we might later 384 // want to find a base pointer for). 385 assert(isa<ConstantPointerNull>(Con) && 386 "null is the only case which makes sense"); 387 return Con; 388 } 389 390 if (CastInst *CI = dyn_cast<CastInst>(I)) { 391 Value *Def = CI->stripPointerCasts(); 392 // If we find a cast instruction here, it means we've found a cast which is 393 // not simply a pointer cast (i.e. an inttoptr). We don't know how to 394 // handle int->ptr conversion. 395 assert(!isa<CastInst>(Def) && "shouldn't find another cast here"); 396 return findBaseDefiningValue(Def); 397 } 398 399 if (isa<LoadInst>(I)) 400 return I; // The value loaded is an gc base itself 401 402 if (GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(I)) 403 // The base of this GEP is the base 404 return findBaseDefiningValue(GEP->getPointerOperand()); 405 406 if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(I)) { 407 switch (II->getIntrinsicID()) { 408 case Intrinsic::experimental_gc_result_ptr: 409 default: 410 // fall through to general call handling 411 break; 412 case Intrinsic::experimental_gc_statepoint: 413 case Intrinsic::experimental_gc_result_float: 414 case Intrinsic::experimental_gc_result_int: 415 llvm_unreachable("these don't produce pointers"); 416 case Intrinsic::experimental_gc_relocate: { 417 // Rerunning safepoint insertion after safepoints are already 418 // inserted is not supported. It could probably be made to work, 419 // but why are you doing this? There's no good reason. 420 llvm_unreachable("repeat safepoint insertion is not supported"); 421 } 422 case Intrinsic::gcroot: 423 // Currently, this mechanism hasn't been extended to work with gcroot. 424 // There's no reason it couldn't be, but I haven't thought about the 425 // implications much. 426 llvm_unreachable( 427 "interaction with the gcroot mechanism is not supported"); 428 } 429 } 430 // We assume that functions in the source language only return base 431 // pointers. This should probably be generalized via attributes to support 432 // both source language and internal functions. 433 if (isa<CallInst>(I) || isa<InvokeInst>(I)) 434 return I; 435 436 // I have absolutely no idea how to implement this part yet. It's not 437 // neccessarily hard, I just haven't really looked at it yet. 438 assert(!isa<LandingPadInst>(I) && "Landing Pad is unimplemented"); 439 440 if (isa<AtomicCmpXchgInst>(I)) 441 // A CAS is effectively a atomic store and load combined under a 442 // predicate. From the perspective of base pointers, we just treat it 443 // like a load. 444 return I; 445 446 assert(!isa<AtomicRMWInst>(I) && "Xchg handled above, all others are " 447 "binary ops which don't apply to pointers"); 448 449 // The aggregate ops. Aggregates can either be in the heap or on the 450 // stack, but in either case, this is simply a field load. As a result, 451 // this is a defining definition of the base just like a load is. 452 if (isa<ExtractValueInst>(I)) 453 return I; 454 455 // We should never see an insert vector since that would require we be 456 // tracing back a struct value not a pointer value. 457 assert(!isa<InsertValueInst>(I) && 458 "Base pointer for a struct is meaningless"); 459 460 // The last two cases here don't return a base pointer. Instead, they 461 // return a value which dynamically selects from amoung several base 462 // derived pointers (each with it's own base potentially). It's the job of 463 // the caller to resolve these. 464 assert((isa<SelectInst>(I) || isa<PHINode>(I)) && 465 "missing instruction case in findBaseDefiningValing"); 466 return I; 467 } 468 469 /// Returns the base defining value for this value. 470 static Value *findBaseDefiningValueCached(Value *I, DefiningValueMapTy &Cache) { 471 Value *&Cached = Cache[I]; 472 if (!Cached) { 473 Cached = findBaseDefiningValue(I); 474 } 475 assert(Cache[I] != nullptr); 476 477 if (TraceLSP) { 478 dbgs() << "fBDV-cached: " << I->getName() << " -> " << Cached->getName() 479 << "\n"; 480 } 481 return Cached; 482 } 483 484 /// Return a base pointer for this value if known. Otherwise, return it's 485 /// base defining value. 486 static Value *findBaseOrBDV(Value *I, DefiningValueMapTy &Cache) { 487 Value *Def = findBaseDefiningValueCached(I, Cache); 488 auto Found = Cache.find(Def); 489 if (Found != Cache.end()) { 490 // Either a base-of relation, or a self reference. Caller must check. 491 return Found->second; 492 } 493 // Only a BDV available 494 return Def; 495 } 496 497 /// Given the result of a call to findBaseDefiningValue, or findBaseOrBDV, 498 /// is it known to be a base pointer? Or do we need to continue searching. 499 static bool isKnownBaseResult(Value *V) { 500 if (!isa<PHINode>(V) && !isa<SelectInst>(V)) { 501 // no recursion possible 502 return true; 503 } 504 if (isa<Instruction>(V) && 505 cast<Instruction>(V)->getMetadata("is_base_value")) { 506 // This is a previously inserted base phi or select. We know 507 // that this is a base value. 508 return true; 509 } 510 511 // We need to keep searching 512 return false; 513 } 514 515 // TODO: find a better name for this 516 namespace { 517 class PhiState { 518 public: 519 enum Status { Unknown, Base, Conflict }; 520 521 PhiState(Status s, Value *b = nullptr) : status(s), base(b) { 522 assert(status != Base || b); 523 } 524 PhiState(Value *b) : status(Base), base(b) {} 525 PhiState() : status(Unknown), base(nullptr) {} 526 527 Status getStatus() const { return status; } 528 Value *getBase() const { return base; } 529 530 bool isBase() const { return getStatus() == Base; } 531 bool isUnknown() const { return getStatus() == Unknown; } 532 bool isConflict() const { return getStatus() == Conflict; } 533 534 bool operator==(const PhiState &other) const { 535 return base == other.base && status == other.status; 536 } 537 538 bool operator!=(const PhiState &other) const { return !(*this == other); } 539 540 void dump() { 541 errs() << status << " (" << base << " - " 542 << (base ? base->getName() : "nullptr") << "): "; 543 } 544 545 private: 546 Status status; 547 Value *base; // non null only if status == base 548 }; 549 550 typedef DenseMap<Value *, PhiState> ConflictStateMapTy; 551 // Values of type PhiState form a lattice, and this is a helper 552 // class that implementes the meet operation. The meat of the meet 553 // operation is implemented in MeetPhiStates::pureMeet 554 class MeetPhiStates { 555 public: 556 // phiStates is a mapping from PHINodes and SelectInst's to PhiStates. 557 explicit MeetPhiStates(const ConflictStateMapTy &phiStates) 558 : phiStates(phiStates) {} 559 560 // Destructively meet the current result with the base V. V can 561 // either be a merge instruction (SelectInst / PHINode), in which 562 // case its status is looked up in the phiStates map; or a regular 563 // SSA value, in which case it is assumed to be a base. 564 void meetWith(Value *V) { 565 PhiState otherState = getStateForBDV(V); 566 assert((MeetPhiStates::pureMeet(otherState, currentResult) == 567 MeetPhiStates::pureMeet(currentResult, otherState)) && 568 "math is wrong: meet does not commute!"); 569 currentResult = MeetPhiStates::pureMeet(otherState, currentResult); 570 } 571 572 PhiState getResult() const { return currentResult; } 573 574 private: 575 const ConflictStateMapTy &phiStates; 576 PhiState currentResult; 577 578 /// Return a phi state for a base defining value. We'll generate a new 579 /// base state for known bases and expect to find a cached state otherwise 580 PhiState getStateForBDV(Value *baseValue) { 581 if (isKnownBaseResult(baseValue)) { 582 return PhiState(baseValue); 583 } else { 584 return lookupFromMap(baseValue); 585 } 586 } 587 588 PhiState lookupFromMap(Value *V) { 589 auto I = phiStates.find(V); 590 assert(I != phiStates.end() && "lookup failed!"); 591 return I->second; 592 } 593 594 static PhiState pureMeet(const PhiState &stateA, const PhiState &stateB) { 595 switch (stateA.getStatus()) { 596 case PhiState::Unknown: 597 return stateB; 598 599 case PhiState::Base: 600 assert(stateA.getBase() && "can't be null"); 601 if (stateB.isUnknown()) 602 return stateA; 603 604 if (stateB.isBase()) { 605 if (stateA.getBase() == stateB.getBase()) { 606 assert(stateA == stateB && "equality broken!"); 607 return stateA; 608 } 609 return PhiState(PhiState::Conflict); 610 } 611 assert(stateB.isConflict() && "only three states!"); 612 return PhiState(PhiState::Conflict); 613 614 case PhiState::Conflict: 615 return stateA; 616 } 617 llvm_unreachable("only three states!"); 618 } 619 }; 620 } 621 /// For a given value or instruction, figure out what base ptr it's derived 622 /// from. For gc objects, this is simply itself. On success, returns a value 623 /// which is the base pointer. (This is reliable and can be used for 624 /// relocation.) On failure, returns nullptr. 625 static Value *findBasePointer(Value *I, DefiningValueMapTy &cache) { 626 Value *def = findBaseOrBDV(I, cache); 627 628 if (isKnownBaseResult(def)) { 629 return def; 630 } 631 632 // Here's the rough algorithm: 633 // - For every SSA value, construct a mapping to either an actual base 634 // pointer or a PHI which obscures the base pointer. 635 // - Construct a mapping from PHI to unknown TOP state. Use an 636 // optimistic algorithm to propagate base pointer information. Lattice 637 // looks like: 638 // UNKNOWN 639 // b1 b2 b3 b4 640 // CONFLICT 641 // When algorithm terminates, all PHIs will either have a single concrete 642 // base or be in a conflict state. 643 // - For every conflict, insert a dummy PHI node without arguments. Add 644 // these to the base[Instruction] = BasePtr mapping. For every 645 // non-conflict, add the actual base. 646 // - For every conflict, add arguments for the base[a] of each input 647 // arguments. 648 // 649 // Note: A simpler form of this would be to add the conflict form of all 650 // PHIs without running the optimistic algorithm. This would be 651 // analougous to pessimistic data flow and would likely lead to an 652 // overall worse solution. 653 654 ConflictStateMapTy states; 655 states[def] = PhiState(); 656 // Recursively fill in all phis & selects reachable from the initial one 657 // for which we don't already know a definite base value for 658 // TODO: This should be rewritten with a worklist 659 bool done = false; 660 while (!done) { 661 done = true; 662 // Since we're adding elements to 'states' as we run, we can't keep 663 // iterators into the set. 664 SmallVector<Value *, 16> Keys; 665 Keys.reserve(states.size()); 666 for (auto Pair : states) { 667 Value *V = Pair.first; 668 Keys.push_back(V); 669 } 670 for (Value *v : Keys) { 671 assert(!isKnownBaseResult(v) && "why did it get added?"); 672 if (PHINode *phi = dyn_cast<PHINode>(v)) { 673 assert(phi->getNumIncomingValues() > 0 && 674 "zero input phis are illegal"); 675 for (Value *InVal : phi->incoming_values()) { 676 Value *local = findBaseOrBDV(InVal, cache); 677 if (!isKnownBaseResult(local) && states.find(local) == states.end()) { 678 states[local] = PhiState(); 679 done = false; 680 } 681 } 682 } else if (SelectInst *sel = dyn_cast<SelectInst>(v)) { 683 Value *local = findBaseOrBDV(sel->getTrueValue(), cache); 684 if (!isKnownBaseResult(local) && states.find(local) == states.end()) { 685 states[local] = PhiState(); 686 done = false; 687 } 688 local = findBaseOrBDV(sel->getFalseValue(), cache); 689 if (!isKnownBaseResult(local) && states.find(local) == states.end()) { 690 states[local] = PhiState(); 691 done = false; 692 } 693 } 694 } 695 } 696 697 if (TraceLSP) { 698 errs() << "States after initialization:\n"; 699 for (auto Pair : states) { 700 Instruction *v = cast<Instruction>(Pair.first); 701 PhiState state = Pair.second; 702 state.dump(); 703 v->dump(); 704 } 705 } 706 707 // TODO: come back and revisit the state transitions around inputs which 708 // have reached conflict state. The current version seems too conservative. 709 710 bool progress = true; 711 while (progress) { 712 #ifndef NDEBUG 713 size_t oldSize = states.size(); 714 #endif 715 progress = false; 716 // We're only changing keys in this loop, thus safe to keep iterators 717 for (auto Pair : states) { 718 MeetPhiStates calculateMeet(states); 719 Value *v = Pair.first; 720 assert(!isKnownBaseResult(v) && "why did it get added?"); 721 if (SelectInst *select = dyn_cast<SelectInst>(v)) { 722 calculateMeet.meetWith(findBaseOrBDV(select->getTrueValue(), cache)); 723 calculateMeet.meetWith(findBaseOrBDV(select->getFalseValue(), cache)); 724 } else 725 for (Value *Val : cast<PHINode>(v)->incoming_values()) 726 calculateMeet.meetWith(findBaseOrBDV(Val, cache)); 727 728 PhiState oldState = states[v]; 729 PhiState newState = calculateMeet.getResult(); 730 if (oldState != newState) { 731 progress = true; 732 states[v] = newState; 733 } 734 } 735 736 assert(oldSize <= states.size()); 737 assert(oldSize == states.size() || progress); 738 } 739 740 if (TraceLSP) { 741 errs() << "States after meet iteration:\n"; 742 for (auto Pair : states) { 743 Instruction *v = cast<Instruction>(Pair.first); 744 PhiState state = Pair.second; 745 state.dump(); 746 v->dump(); 747 } 748 } 749 750 // Insert Phis for all conflicts 751 // We want to keep naming deterministic in the loop that follows, so 752 // sort the keys before iteration. This is useful in allowing us to 753 // write stable tests. Note that there is no invalidation issue here. 754 SmallVector<Value *, 16> Keys; 755 Keys.reserve(states.size()); 756 for (auto Pair : states) { 757 Value *V = Pair.first; 758 Keys.push_back(V); 759 } 760 std::sort(Keys.begin(), Keys.end(), order_by_name); 761 // TODO: adjust naming patterns to avoid this order of iteration dependency 762 for (Value *V : Keys) { 763 Instruction *v = cast<Instruction>(V); 764 PhiState state = states[V]; 765 assert(!isKnownBaseResult(v) && "why did it get added?"); 766 assert(!state.isUnknown() && "Optimistic algorithm didn't complete!"); 767 if (!state.isConflict()) 768 continue; 769 770 if (isa<PHINode>(v)) { 771 int num_preds = 772 std::distance(pred_begin(v->getParent()), pred_end(v->getParent())); 773 assert(num_preds > 0 && "how did we reach here"); 774 PHINode *phi = PHINode::Create(v->getType(), num_preds, "base_phi", v); 775 // Add metadata marking this as a base value 776 auto *const_1 = ConstantInt::get( 777 Type::getInt32Ty( 778 v->getParent()->getParent()->getParent()->getContext()), 779 1); 780 auto MDConst = ConstantAsMetadata::get(const_1); 781 MDNode *md = MDNode::get( 782 v->getParent()->getParent()->getParent()->getContext(), MDConst); 783 phi->setMetadata("is_base_value", md); 784 states[v] = PhiState(PhiState::Conflict, phi); 785 } else { 786 SelectInst *sel = cast<SelectInst>(v); 787 // The undef will be replaced later 788 UndefValue *undef = UndefValue::get(sel->getType()); 789 SelectInst *basesel = SelectInst::Create(sel->getCondition(), undef, 790 undef, "base_select", sel); 791 // Add metadata marking this as a base value 792 auto *const_1 = ConstantInt::get( 793 Type::getInt32Ty( 794 v->getParent()->getParent()->getParent()->getContext()), 795 1); 796 auto MDConst = ConstantAsMetadata::get(const_1); 797 MDNode *md = MDNode::get( 798 v->getParent()->getParent()->getParent()->getContext(), MDConst); 799 basesel->setMetadata("is_base_value", md); 800 states[v] = PhiState(PhiState::Conflict, basesel); 801 } 802 } 803 804 // Fixup all the inputs of the new PHIs 805 for (auto Pair : states) { 806 Instruction *v = cast<Instruction>(Pair.first); 807 PhiState state = Pair.second; 808 809 assert(!isKnownBaseResult(v) && "why did it get added?"); 810 assert(!state.isUnknown() && "Optimistic algorithm didn't complete!"); 811 if (!state.isConflict()) 812 continue; 813 814 if (PHINode *basephi = dyn_cast<PHINode>(state.getBase())) { 815 PHINode *phi = cast<PHINode>(v); 816 unsigned NumPHIValues = phi->getNumIncomingValues(); 817 for (unsigned i = 0; i < NumPHIValues; i++) { 818 Value *InVal = phi->getIncomingValue(i); 819 BasicBlock *InBB = phi->getIncomingBlock(i); 820 821 // If we've already seen InBB, add the same incoming value 822 // we added for it earlier. The IR verifier requires phi 823 // nodes with multiple entries from the same basic block 824 // to have the same incoming value for each of those 825 // entries. If we don't do this check here and basephi 826 // has a different type than base, we'll end up adding two 827 // bitcasts (and hence two distinct values) as incoming 828 // values for the same basic block. 829 830 int blockIndex = basephi->getBasicBlockIndex(InBB); 831 if (blockIndex != -1) { 832 Value *oldBase = basephi->getIncomingValue(blockIndex); 833 basephi->addIncoming(oldBase, InBB); 834 #ifndef NDEBUG 835 Value *base = findBaseOrBDV(InVal, cache); 836 if (!isKnownBaseResult(base)) { 837 // Either conflict or base. 838 assert(states.count(base)); 839 base = states[base].getBase(); 840 assert(base != nullptr && "unknown PhiState!"); 841 } 842 843 // In essense this assert states: the only way two 844 // values incoming from the same basic block may be 845 // different is by being different bitcasts of the same 846 // value. A cleanup that remains TODO is changing 847 // findBaseOrBDV to return an llvm::Value of the correct 848 // type (and still remain pure). This will remove the 849 // need to add bitcasts. 850 assert(base->stripPointerCasts() == oldBase->stripPointerCasts() && 851 "sanity -- findBaseOrBDV should be pure!"); 852 #endif 853 continue; 854 } 855 856 // Find either the defining value for the PHI or the normal base for 857 // a non-phi node 858 Value *base = findBaseOrBDV(InVal, cache); 859 if (!isKnownBaseResult(base)) { 860 // Either conflict or base. 861 assert(states.count(base)); 862 base = states[base].getBase(); 863 assert(base != nullptr && "unknown PhiState!"); 864 } 865 assert(base && "can't be null"); 866 // Must use original input BB since base may not be Instruction 867 // The cast is needed since base traversal may strip away bitcasts 868 if (base->getType() != basephi->getType()) { 869 base = new BitCastInst(base, basephi->getType(), "cast", 870 InBB->getTerminator()); 871 } 872 basephi->addIncoming(base, InBB); 873 } 874 assert(basephi->getNumIncomingValues() == NumPHIValues); 875 } else { 876 SelectInst *basesel = cast<SelectInst>(state.getBase()); 877 SelectInst *sel = cast<SelectInst>(v); 878 // Operand 1 & 2 are true, false path respectively. TODO: refactor to 879 // something more safe and less hacky. 880 for (int i = 1; i <= 2; i++) { 881 Value *InVal = sel->getOperand(i); 882 // Find either the defining value for the PHI or the normal base for 883 // a non-phi node 884 Value *base = findBaseOrBDV(InVal, cache); 885 if (!isKnownBaseResult(base)) { 886 // Either conflict or base. 887 assert(states.count(base)); 888 base = states[base].getBase(); 889 assert(base != nullptr && "unknown PhiState!"); 890 } 891 assert(base && "can't be null"); 892 // Must use original input BB since base may not be Instruction 893 // The cast is needed since base traversal may strip away bitcasts 894 if (base->getType() != basesel->getType()) { 895 base = new BitCastInst(base, basesel->getType(), "cast", basesel); 896 } 897 basesel->setOperand(i, base); 898 } 899 } 900 } 901 902 // Cache all of our results so we can cheaply reuse them 903 // NOTE: This is actually two caches: one of the base defining value 904 // relation and one of the base pointer relation! FIXME 905 for (auto item : states) { 906 Value *v = item.first; 907 Value *base = item.second.getBase(); 908 assert(v && base); 909 assert(!isKnownBaseResult(v) && "why did it get added?"); 910 911 if (TraceLSP) { 912 std::string fromstr = 913 cache.count(v) ? (cache[v]->hasName() ? cache[v]->getName() : "") 914 : "none"; 915 errs() << "Updating base value cache" 916 << " for: " << (v->hasName() ? v->getName() : "") 917 << " from: " << fromstr 918 << " to: " << (base->hasName() ? base->getName() : "") << "\n"; 919 } 920 921 assert(isKnownBaseResult(base) && 922 "must be something we 'know' is a base pointer"); 923 if (cache.count(v)) { 924 // Once we transition from the BDV relation being store in the cache to 925 // the base relation being stored, it must be stable 926 assert((!isKnownBaseResult(cache[v]) || cache[v] == base) && 927 "base relation should be stable"); 928 } 929 cache[v] = base; 930 } 931 assert(cache.find(def) != cache.end()); 932 return cache[def]; 933 } 934 935 // For a set of live pointers (base and/or derived), identify the base 936 // pointer of the object which they are derived from. This routine will 937 // mutate the IR graph as needed to make the 'base' pointer live at the 938 // definition site of 'derived'. This ensures that any use of 'derived' can 939 // also use 'base'. This may involve the insertion of a number of 940 // additional PHI nodes. 941 // 942 // preconditions: live is a set of pointer type Values 943 // 944 // side effects: may insert PHI nodes into the existing CFG, will preserve 945 // CFG, will not remove or mutate any existing nodes 946 // 947 // post condition: PointerToBase contains one (derived, base) pair for every 948 // pointer in live. Note that derived can be equal to base if the original 949 // pointer was a base pointer. 950 static void 951 findBasePointers(const StatepointLiveSetTy &live, 952 DenseMap<llvm::Value *, llvm::Value *> &PointerToBase, 953 DominatorTree *DT, DefiningValueMapTy &DVCache) { 954 // For the naming of values inserted to be deterministic - which makes for 955 // much cleaner and more stable tests - we need to assign an order to the 956 // live values. DenseSets do not provide a deterministic order across runs. 957 SmallVector<Value *, 64> Temp; 958 Temp.insert(Temp.end(), live.begin(), live.end()); 959 std::sort(Temp.begin(), Temp.end(), order_by_name); 960 for (Value *ptr : Temp) { 961 Value *base = findBasePointer(ptr, DVCache); 962 assert(base && "failed to find base pointer"); 963 PointerToBase[ptr] = base; 964 assert((!isa<Instruction>(base) || !isa<Instruction>(ptr) || 965 DT->dominates(cast<Instruction>(base)->getParent(), 966 cast<Instruction>(ptr)->getParent())) && 967 "The base we found better dominate the derived pointer"); 968 969 // If you see this trip and like to live really dangerously, the code should 970 // be correct, just with idioms the verifier can't handle. You can try 971 // disabling the verifier at your own substaintial risk. 972 assert(!isa<ConstantPointerNull>(base) && 973 "the relocation code needs adjustment to handle the relocation of " 974 "a null pointer constant without causing false positives in the " 975 "safepoint ir verifier."); 976 } 977 } 978 979 /// Find the required based pointers (and adjust the live set) for the given 980 /// parse point. 981 static void findBasePointers(DominatorTree &DT, DefiningValueMapTy &DVCache, 982 const CallSite &CS, 983 PartiallyConstructedSafepointRecord &result) { 984 DenseMap<llvm::Value *, llvm::Value *> PointerToBase; 985 findBasePointers(result.liveset, PointerToBase, &DT, DVCache); 986 987 if (PrintBasePointers) { 988 // Note: Need to print these in a stable order since this is checked in 989 // some tests. 990 errs() << "Base Pairs (w/o Relocation):\n"; 991 SmallVector<Value *, 64> Temp; 992 Temp.reserve(PointerToBase.size()); 993 for (auto Pair : PointerToBase) { 994 Temp.push_back(Pair.first); 995 } 996 std::sort(Temp.begin(), Temp.end(), order_by_name); 997 for (Value *Ptr : Temp) { 998 Value *Base = PointerToBase[Ptr]; 999 errs() << " derived %" << Ptr->getName() << " base %" << Base->getName() 1000 << "\n"; 1001 } 1002 } 1003 1004 result.PointerToBase = PointerToBase; 1005 } 1006 1007 /// Given an updated version of the dataflow liveness results, update the 1008 /// liveset and base pointer maps for the call site CS. 1009 static void recomputeLiveInValues(GCPtrLivenessData &RevisedLivenessData, 1010 const CallSite &CS, 1011 PartiallyConstructedSafepointRecord &result); 1012 1013 static void recomputeLiveInValues( 1014 Function &F, DominatorTree &DT, Pass *P, ArrayRef<CallSite> toUpdate, 1015 MutableArrayRef<struct PartiallyConstructedSafepointRecord> records) { 1016 // TODO-PERF: reuse the original liveness, then simply run the dataflow 1017 // again. The old values are still live and will help it stablize quickly. 1018 GCPtrLivenessData RevisedLivenessData; 1019 computeLiveInValues(DT, F, RevisedLivenessData); 1020 for (size_t i = 0; i < records.size(); i++) { 1021 struct PartiallyConstructedSafepointRecord &info = records[i]; 1022 const CallSite &CS = toUpdate[i]; 1023 recomputeLiveInValues(RevisedLivenessData, CS, info); 1024 } 1025 } 1026 1027 // When inserting gc.relocate calls, we need to ensure there are no uses 1028 // of the original value between the gc.statepoint and the gc.relocate call. 1029 // One case which can arise is a phi node starting one of the successor blocks. 1030 // We also need to be able to insert the gc.relocates only on the path which 1031 // goes through the statepoint. We might need to split an edge to make this 1032 // possible. 1033 static BasicBlock * 1034 normalizeForInvokeSafepoint(BasicBlock *BB, BasicBlock *InvokeParent, Pass *P) { 1035 DominatorTree *DT = nullptr; 1036 if (auto *DTP = P->getAnalysisIfAvailable<DominatorTreeWrapperPass>()) 1037 DT = &DTP->getDomTree(); 1038 1039 BasicBlock *Ret = BB; 1040 if (!BB->getUniquePredecessor()) { 1041 Ret = SplitBlockPredecessors(BB, InvokeParent, "", nullptr, DT); 1042 } 1043 1044 // Now that 'ret' has unique predecessor we can safely remove all phi nodes 1045 // from it 1046 FoldSingleEntryPHINodes(Ret); 1047 assert(!isa<PHINode>(Ret->begin())); 1048 1049 // At this point, we can safely insert a gc.relocate as the first instruction 1050 // in Ret if needed. 1051 return Ret; 1052 } 1053 1054 static int find_index(ArrayRef<Value *> livevec, Value *val) { 1055 auto itr = std::find(livevec.begin(), livevec.end(), val); 1056 assert(livevec.end() != itr); 1057 size_t index = std::distance(livevec.begin(), itr); 1058 assert(index < livevec.size()); 1059 return index; 1060 } 1061 1062 // Create new attribute set containing only attributes which can be transfered 1063 // from original call to the safepoint. 1064 static AttributeSet legalizeCallAttributes(AttributeSet AS) { 1065 AttributeSet ret; 1066 1067 for (unsigned Slot = 0; Slot < AS.getNumSlots(); Slot++) { 1068 unsigned index = AS.getSlotIndex(Slot); 1069 1070 if (index == AttributeSet::ReturnIndex || 1071 index == AttributeSet::FunctionIndex) { 1072 1073 for (auto it = AS.begin(Slot), it_end = AS.end(Slot); it != it_end; 1074 ++it) { 1075 Attribute attr = *it; 1076 1077 // Do not allow certain attributes - just skip them 1078 // Safepoint can not be read only or read none. 1079 if (attr.hasAttribute(Attribute::ReadNone) || 1080 attr.hasAttribute(Attribute::ReadOnly)) 1081 continue; 1082 1083 ret = ret.addAttributes( 1084 AS.getContext(), index, 1085 AttributeSet::get(AS.getContext(), index, AttrBuilder(attr))); 1086 } 1087 } 1088 1089 // Just skip parameter attributes for now 1090 } 1091 1092 return ret; 1093 } 1094 1095 /// Helper function to place all gc relocates necessary for the given 1096 /// statepoint. 1097 /// Inputs: 1098 /// liveVariables - list of variables to be relocated. 1099 /// liveStart - index of the first live variable. 1100 /// basePtrs - base pointers. 1101 /// statepointToken - statepoint instruction to which relocates should be 1102 /// bound. 1103 /// Builder - Llvm IR builder to be used to construct new calls. 1104 static void CreateGCRelocates(ArrayRef<llvm::Value *> LiveVariables, 1105 const int LiveStart, 1106 ArrayRef<llvm::Value *> BasePtrs, 1107 Instruction *StatepointToken, 1108 IRBuilder<> Builder) { 1109 SmallVector<Instruction *, 64> NewDefs; 1110 NewDefs.reserve(LiveVariables.size()); 1111 1112 Module *M = StatepointToken->getParent()->getParent()->getParent(); 1113 1114 for (unsigned i = 0; i < LiveVariables.size(); i++) { 1115 // We generate a (potentially) unique declaration for every pointer type 1116 // combination. This results is some blow up the function declarations in 1117 // the IR, but removes the need for argument bitcasts which shrinks the IR 1118 // greatly and makes it much more readable. 1119 SmallVector<Type *, 1> Types; // one per 'any' type 1120 // All gc_relocate are set to i8 addrspace(1)* type. This could help avoid 1121 // cases where the actual value's type mangling is not supported by llvm. A 1122 // bitcast is added later to convert gc_relocate to the actual value's type. 1123 Types.push_back(Type::getInt8PtrTy(M->getContext(), 1)); 1124 Value *GCRelocateDecl = Intrinsic::getDeclaration( 1125 M, Intrinsic::experimental_gc_relocate, Types); 1126 1127 // Generate the gc.relocate call and save the result 1128 Value *BaseIdx = 1129 ConstantInt::get(Type::getInt32Ty(M->getContext()), 1130 LiveStart + find_index(LiveVariables, BasePtrs[i])); 1131 Value *LiveIdx = ConstantInt::get( 1132 Type::getInt32Ty(M->getContext()), 1133 LiveStart + find_index(LiveVariables, LiveVariables[i])); 1134 1135 // only specify a debug name if we can give a useful one 1136 Value *Reloc = Builder.CreateCall( 1137 GCRelocateDecl, {StatepointToken, BaseIdx, LiveIdx}, 1138 LiveVariables[i]->hasName() ? LiveVariables[i]->getName() + ".relocated" 1139 : ""); 1140 // Trick CodeGen into thinking there are lots of free registers at this 1141 // fake call. 1142 cast<CallInst>(Reloc)->setCallingConv(CallingConv::Cold); 1143 1144 NewDefs.push_back(cast<Instruction>(Reloc)); 1145 } 1146 assert(NewDefs.size() == LiveVariables.size() && 1147 "missing or extra redefinition at safepoint"); 1148 } 1149 1150 static void 1151 makeStatepointExplicitImpl(const CallSite &CS, /* to replace */ 1152 const SmallVectorImpl<llvm::Value *> &basePtrs, 1153 const SmallVectorImpl<llvm::Value *> &liveVariables, 1154 Pass *P, 1155 PartiallyConstructedSafepointRecord &result) { 1156 assert(basePtrs.size() == liveVariables.size()); 1157 assert(isStatepoint(CS) && 1158 "This method expects to be rewriting a statepoint"); 1159 1160 BasicBlock *BB = CS.getInstruction()->getParent(); 1161 assert(BB); 1162 Function *F = BB->getParent(); 1163 assert(F && "must be set"); 1164 Module *M = F->getParent(); 1165 (void)M; 1166 assert(M && "must be set"); 1167 1168 // We're not changing the function signature of the statepoint since the gc 1169 // arguments go into the var args section. 1170 Function *gc_statepoint_decl = CS.getCalledFunction(); 1171 1172 // Then go ahead and use the builder do actually do the inserts. We insert 1173 // immediately before the previous instruction under the assumption that all 1174 // arguments will be available here. We can't insert afterwards since we may 1175 // be replacing a terminator. 1176 Instruction *insertBefore = CS.getInstruction(); 1177 IRBuilder<> Builder(insertBefore); 1178 // Copy all of the arguments from the original statepoint - this includes the 1179 // target, call args, and deopt args 1180 SmallVector<llvm::Value *, 64> args; 1181 args.insert(args.end(), CS.arg_begin(), CS.arg_end()); 1182 // TODO: Clear the 'needs rewrite' flag 1183 1184 // add all the pointers to be relocated (gc arguments) 1185 // Capture the start of the live variable list for use in the gc_relocates 1186 const int live_start = args.size(); 1187 args.insert(args.end(), liveVariables.begin(), liveVariables.end()); 1188 1189 // Create the statepoint given all the arguments 1190 Instruction *token = nullptr; 1191 AttributeSet return_attributes; 1192 if (CS.isCall()) { 1193 CallInst *toReplace = cast<CallInst>(CS.getInstruction()); 1194 CallInst *call = 1195 Builder.CreateCall(gc_statepoint_decl, args, "safepoint_token"); 1196 call->setTailCall(toReplace->isTailCall()); 1197 call->setCallingConv(toReplace->getCallingConv()); 1198 1199 // Currently we will fail on parameter attributes and on certain 1200 // function attributes. 1201 AttributeSet new_attrs = legalizeCallAttributes(toReplace->getAttributes()); 1202 // In case if we can handle this set of sttributes - set up function attrs 1203 // directly on statepoint and return attrs later for gc_result intrinsic. 1204 call->setAttributes(new_attrs.getFnAttributes()); 1205 return_attributes = new_attrs.getRetAttributes(); 1206 1207 token = call; 1208 1209 // Put the following gc_result and gc_relocate calls immediately after the 1210 // the old call (which we're about to delete) 1211 BasicBlock::iterator next(toReplace); 1212 assert(BB->end() != next && "not a terminator, must have next"); 1213 next++; 1214 Instruction *IP = &*(next); 1215 Builder.SetInsertPoint(IP); 1216 Builder.SetCurrentDebugLocation(IP->getDebugLoc()); 1217 1218 } else { 1219 InvokeInst *toReplace = cast<InvokeInst>(CS.getInstruction()); 1220 1221 // Insert the new invoke into the old block. We'll remove the old one in a 1222 // moment at which point this will become the new terminator for the 1223 // original block. 1224 InvokeInst *invoke = InvokeInst::Create( 1225 gc_statepoint_decl, toReplace->getNormalDest(), 1226 toReplace->getUnwindDest(), args, "", toReplace->getParent()); 1227 invoke->setCallingConv(toReplace->getCallingConv()); 1228 1229 // Currently we will fail on parameter attributes and on certain 1230 // function attributes. 1231 AttributeSet new_attrs = legalizeCallAttributes(toReplace->getAttributes()); 1232 // In case if we can handle this set of sttributes - set up function attrs 1233 // directly on statepoint and return attrs later for gc_result intrinsic. 1234 invoke->setAttributes(new_attrs.getFnAttributes()); 1235 return_attributes = new_attrs.getRetAttributes(); 1236 1237 token = invoke; 1238 1239 // Generate gc relocates in exceptional path 1240 BasicBlock *unwindBlock = toReplace->getUnwindDest(); 1241 assert(!isa<PHINode>(unwindBlock->begin()) && 1242 unwindBlock->getUniquePredecessor() && 1243 "can't safely insert in this block!"); 1244 1245 Instruction *IP = &*(unwindBlock->getFirstInsertionPt()); 1246 Builder.SetInsertPoint(IP); 1247 Builder.SetCurrentDebugLocation(toReplace->getDebugLoc()); 1248 1249 // Extract second element from landingpad return value. We will attach 1250 // exceptional gc relocates to it. 1251 const unsigned idx = 1; 1252 Instruction *exceptional_token = 1253 cast<Instruction>(Builder.CreateExtractValue( 1254 unwindBlock->getLandingPadInst(), idx, "relocate_token")); 1255 result.UnwindToken = exceptional_token; 1256 1257 // Just throw away return value. We will use the one we got for normal 1258 // block. 1259 (void)CreateGCRelocates(liveVariables, live_start, basePtrs, 1260 exceptional_token, Builder); 1261 1262 // Generate gc relocates and returns for normal block 1263 BasicBlock *normalDest = toReplace->getNormalDest(); 1264 assert(!isa<PHINode>(normalDest->begin()) && 1265 normalDest->getUniquePredecessor() && 1266 "can't safely insert in this block!"); 1267 1268 IP = &*(normalDest->getFirstInsertionPt()); 1269 Builder.SetInsertPoint(IP); 1270 1271 // gc relocates will be generated later as if it were regular call 1272 // statepoint 1273 } 1274 assert(token); 1275 1276 // Take the name of the original value call if it had one. 1277 token->takeName(CS.getInstruction()); 1278 1279 // The GCResult is already inserted, we just need to find it 1280 #ifndef NDEBUG 1281 Instruction *toReplace = CS.getInstruction(); 1282 assert((toReplace->hasNUses(0) || toReplace->hasNUses(1)) && 1283 "only valid use before rewrite is gc.result"); 1284 assert(!toReplace->hasOneUse() || 1285 isGCResult(cast<Instruction>(*toReplace->user_begin()))); 1286 #endif 1287 1288 // Update the gc.result of the original statepoint (if any) to use the newly 1289 // inserted statepoint. This is safe to do here since the token can't be 1290 // considered a live reference. 1291 CS.getInstruction()->replaceAllUsesWith(token); 1292 1293 result.StatepointToken = token; 1294 1295 // Second, create a gc.relocate for every live variable 1296 CreateGCRelocates(liveVariables, live_start, basePtrs, token, Builder); 1297 } 1298 1299 namespace { 1300 struct name_ordering { 1301 Value *base; 1302 Value *derived; 1303 bool operator()(name_ordering const &a, name_ordering const &b) { 1304 return -1 == a.derived->getName().compare(b.derived->getName()); 1305 } 1306 }; 1307 } 1308 static void stablize_order(SmallVectorImpl<Value *> &basevec, 1309 SmallVectorImpl<Value *> &livevec) { 1310 assert(basevec.size() == livevec.size()); 1311 1312 SmallVector<name_ordering, 64> temp; 1313 for (size_t i = 0; i < basevec.size(); i++) { 1314 name_ordering v; 1315 v.base = basevec[i]; 1316 v.derived = livevec[i]; 1317 temp.push_back(v); 1318 } 1319 std::sort(temp.begin(), temp.end(), name_ordering()); 1320 for (size_t i = 0; i < basevec.size(); i++) { 1321 basevec[i] = temp[i].base; 1322 livevec[i] = temp[i].derived; 1323 } 1324 } 1325 1326 // Replace an existing gc.statepoint with a new one and a set of gc.relocates 1327 // which make the relocations happening at this safepoint explicit. 1328 // 1329 // WARNING: Does not do any fixup to adjust users of the original live 1330 // values. That's the callers responsibility. 1331 static void 1332 makeStatepointExplicit(DominatorTree &DT, const CallSite &CS, Pass *P, 1333 PartiallyConstructedSafepointRecord &result) { 1334 auto liveset = result.liveset; 1335 auto PointerToBase = result.PointerToBase; 1336 1337 // Convert to vector for efficient cross referencing. 1338 SmallVector<Value *, 64> basevec, livevec; 1339 livevec.reserve(liveset.size()); 1340 basevec.reserve(liveset.size()); 1341 for (Value *L : liveset) { 1342 livevec.push_back(L); 1343 1344 assert(PointerToBase.find(L) != PointerToBase.end()); 1345 Value *base = PointerToBase[L]; 1346 basevec.push_back(base); 1347 } 1348 assert(livevec.size() == basevec.size()); 1349 1350 // To make the output IR slightly more stable (for use in diffs), ensure a 1351 // fixed order of the values in the safepoint (by sorting the value name). 1352 // The order is otherwise meaningless. 1353 stablize_order(basevec, livevec); 1354 1355 // Do the actual rewriting and delete the old statepoint 1356 makeStatepointExplicitImpl(CS, basevec, livevec, P, result); 1357 CS.getInstruction()->eraseFromParent(); 1358 } 1359 1360 // Helper function for the relocationViaAlloca. 1361 // It receives iterator to the statepoint gc relocates and emits store to the 1362 // assigned 1363 // location (via allocaMap) for the each one of them. 1364 // Add visited values into the visitedLiveValues set we will later use them 1365 // for sanity check. 1366 static void 1367 insertRelocationStores(iterator_range<Value::user_iterator> GCRelocs, 1368 DenseMap<Value *, Value *> &AllocaMap, 1369 DenseSet<Value *> &VisitedLiveValues) { 1370 1371 for (User *U : GCRelocs) { 1372 if (!isa<IntrinsicInst>(U)) 1373 continue; 1374 1375 IntrinsicInst *RelocatedValue = cast<IntrinsicInst>(U); 1376 1377 // We only care about relocates 1378 if (RelocatedValue->getIntrinsicID() != 1379 Intrinsic::experimental_gc_relocate) { 1380 continue; 1381 } 1382 1383 GCRelocateOperands RelocateOperands(RelocatedValue); 1384 Value *OriginalValue = 1385 const_cast<Value *>(RelocateOperands.getDerivedPtr()); 1386 assert(AllocaMap.count(OriginalValue)); 1387 Value *Alloca = AllocaMap[OriginalValue]; 1388 1389 // Emit store into the related alloca 1390 // All gc_relocate are i8 addrspace(1)* typed, and it must be bitcasted to 1391 // the correct type according to alloca. 1392 assert(RelocatedValue->getNextNode() && "Should always have one since it's not a terminator"); 1393 IRBuilder<> Builder(RelocatedValue->getNextNode()); 1394 Value *CastedRelocatedValue = 1395 Builder.CreateBitCast(RelocatedValue, cast<AllocaInst>(Alloca)->getAllocatedType(), 1396 RelocatedValue->hasName() ? RelocatedValue->getName() + ".casted" : ""); 1397 1398 StoreInst *Store = new StoreInst(CastedRelocatedValue, Alloca); 1399 Store->insertAfter(cast<Instruction>(CastedRelocatedValue)); 1400 1401 #ifndef NDEBUG 1402 VisitedLiveValues.insert(OriginalValue); 1403 #endif 1404 } 1405 } 1406 1407 // Helper function for the "relocationViaAlloca". Similar to the 1408 // "insertRelocationStores" but works for rematerialized values. 1409 static void 1410 insertRematerializationStores( 1411 RematerializedValueMapTy RematerializedValues, 1412 DenseMap<Value *, Value *> &AllocaMap, 1413 DenseSet<Value *> &VisitedLiveValues) { 1414 1415 for (auto RematerializedValuePair: RematerializedValues) { 1416 Instruction *RematerializedValue = RematerializedValuePair.first; 1417 Value *OriginalValue = RematerializedValuePair.second; 1418 1419 assert(AllocaMap.count(OriginalValue) && 1420 "Can not find alloca for rematerialized value"); 1421 Value *Alloca = AllocaMap[OriginalValue]; 1422 1423 StoreInst *Store = new StoreInst(RematerializedValue, Alloca); 1424 Store->insertAfter(RematerializedValue); 1425 1426 #ifndef NDEBUG 1427 VisitedLiveValues.insert(OriginalValue); 1428 #endif 1429 } 1430 } 1431 1432 /// do all the relocation update via allocas and mem2reg 1433 static void relocationViaAlloca( 1434 Function &F, DominatorTree &DT, ArrayRef<Value *> Live, 1435 ArrayRef<struct PartiallyConstructedSafepointRecord> Records) { 1436 #ifndef NDEBUG 1437 // record initial number of (static) allocas; we'll check we have the same 1438 // number when we get done. 1439 int InitialAllocaNum = 0; 1440 for (auto I = F.getEntryBlock().begin(), E = F.getEntryBlock().end(); I != E; 1441 I++) 1442 if (isa<AllocaInst>(*I)) 1443 InitialAllocaNum++; 1444 #endif 1445 1446 // TODO-PERF: change data structures, reserve 1447 DenseMap<Value *, Value *> AllocaMap; 1448 SmallVector<AllocaInst *, 200> PromotableAllocas; 1449 // Used later to chack that we have enough allocas to store all values 1450 std::size_t NumRematerializedValues = 0; 1451 PromotableAllocas.reserve(Live.size()); 1452 1453 // Emit alloca for "LiveValue" and record it in "allocaMap" and 1454 // "PromotableAllocas" 1455 auto emitAllocaFor = [&](Value *LiveValue) { 1456 AllocaInst *Alloca = new AllocaInst(LiveValue->getType(), "", 1457 F.getEntryBlock().getFirstNonPHI()); 1458 AllocaMap[LiveValue] = Alloca; 1459 PromotableAllocas.push_back(Alloca); 1460 }; 1461 1462 // emit alloca for each live gc pointer 1463 for (unsigned i = 0; i < Live.size(); i++) { 1464 emitAllocaFor(Live[i]); 1465 } 1466 1467 // emit allocas for rematerialized values 1468 for (size_t i = 0; i < Records.size(); i++) { 1469 const struct PartiallyConstructedSafepointRecord &Info = Records[i]; 1470 1471 for (auto RematerializedValuePair : Info.RematerializedValues) { 1472 Value *OriginalValue = RematerializedValuePair.second; 1473 if (AllocaMap.count(OriginalValue) != 0) 1474 continue; 1475 1476 emitAllocaFor(OriginalValue); 1477 ++NumRematerializedValues; 1478 } 1479 } 1480 1481 // The next two loops are part of the same conceptual operation. We need to 1482 // insert a store to the alloca after the original def and at each 1483 // redefinition. We need to insert a load before each use. These are split 1484 // into distinct loops for performance reasons. 1485 1486 // update gc pointer after each statepoint 1487 // either store a relocated value or null (if no relocated value found for 1488 // this gc pointer and it is not a gc_result) 1489 // this must happen before we update the statepoint with load of alloca 1490 // otherwise we lose the link between statepoint and old def 1491 for (size_t i = 0; i < Records.size(); i++) { 1492 const struct PartiallyConstructedSafepointRecord &Info = Records[i]; 1493 Value *Statepoint = Info.StatepointToken; 1494 1495 // This will be used for consistency check 1496 DenseSet<Value *> VisitedLiveValues; 1497 1498 // Insert stores for normal statepoint gc relocates 1499 insertRelocationStores(Statepoint->users(), AllocaMap, VisitedLiveValues); 1500 1501 // In case if it was invoke statepoint 1502 // we will insert stores for exceptional path gc relocates. 1503 if (isa<InvokeInst>(Statepoint)) { 1504 insertRelocationStores(Info.UnwindToken->users(), AllocaMap, 1505 VisitedLiveValues); 1506 } 1507 1508 // Do similar thing with rematerialized values 1509 insertRematerializationStores(Info.RematerializedValues, AllocaMap, 1510 VisitedLiveValues); 1511 1512 if (ClobberNonLive) { 1513 // As a debuging aid, pretend that an unrelocated pointer becomes null at 1514 // the gc.statepoint. This will turn some subtle GC problems into 1515 // slightly easier to debug SEGVs. Note that on large IR files with 1516 // lots of gc.statepoints this is extremely costly both memory and time 1517 // wise. 1518 SmallVector<AllocaInst *, 64> ToClobber; 1519 for (auto Pair : AllocaMap) { 1520 Value *Def = Pair.first; 1521 AllocaInst *Alloca = cast<AllocaInst>(Pair.second); 1522 1523 // This value was relocated 1524 if (VisitedLiveValues.count(Def)) { 1525 continue; 1526 } 1527 ToClobber.push_back(Alloca); 1528 } 1529 1530 auto InsertClobbersAt = [&](Instruction *IP) { 1531 for (auto *AI : ToClobber) { 1532 auto AIType = cast<PointerType>(AI->getType()); 1533 auto PT = cast<PointerType>(AIType->getElementType()); 1534 Constant *CPN = ConstantPointerNull::get(PT); 1535 StoreInst *Store = new StoreInst(CPN, AI); 1536 Store->insertBefore(IP); 1537 } 1538 }; 1539 1540 // Insert the clobbering stores. These may get intermixed with the 1541 // gc.results and gc.relocates, but that's fine. 1542 if (auto II = dyn_cast<InvokeInst>(Statepoint)) { 1543 InsertClobbersAt(II->getNormalDest()->getFirstInsertionPt()); 1544 InsertClobbersAt(II->getUnwindDest()->getFirstInsertionPt()); 1545 } else { 1546 BasicBlock::iterator Next(cast<CallInst>(Statepoint)); 1547 Next++; 1548 InsertClobbersAt(Next); 1549 } 1550 } 1551 } 1552 // update use with load allocas and add store for gc_relocated 1553 for (auto Pair : AllocaMap) { 1554 Value *Def = Pair.first; 1555 Value *Alloca = Pair.second; 1556 1557 // we pre-record the uses of allocas so that we dont have to worry about 1558 // later update 1559 // that change the user information. 1560 SmallVector<Instruction *, 20> Uses; 1561 // PERF: trade a linear scan for repeated reallocation 1562 Uses.reserve(std::distance(Def->user_begin(), Def->user_end())); 1563 for (User *U : Def->users()) { 1564 if (!isa<ConstantExpr>(U)) { 1565 // If the def has a ConstantExpr use, then the def is either a 1566 // ConstantExpr use itself or null. In either case 1567 // (recursively in the first, directly in the second), the oop 1568 // it is ultimately dependent on is null and this particular 1569 // use does not need to be fixed up. 1570 Uses.push_back(cast<Instruction>(U)); 1571 } 1572 } 1573 1574 std::sort(Uses.begin(), Uses.end()); 1575 auto Last = std::unique(Uses.begin(), Uses.end()); 1576 Uses.erase(Last, Uses.end()); 1577 1578 for (Instruction *Use : Uses) { 1579 if (isa<PHINode>(Use)) { 1580 PHINode *Phi = cast<PHINode>(Use); 1581 for (unsigned i = 0; i < Phi->getNumIncomingValues(); i++) { 1582 if (Def == Phi->getIncomingValue(i)) { 1583 LoadInst *Load = new LoadInst( 1584 Alloca, "", Phi->getIncomingBlock(i)->getTerminator()); 1585 Phi->setIncomingValue(i, Load); 1586 } 1587 } 1588 } else { 1589 LoadInst *Load = new LoadInst(Alloca, "", Use); 1590 Use->replaceUsesOfWith(Def, Load); 1591 } 1592 } 1593 1594 // emit store for the initial gc value 1595 // store must be inserted after load, otherwise store will be in alloca's 1596 // use list and an extra load will be inserted before it 1597 StoreInst *Store = new StoreInst(Def, Alloca); 1598 if (Instruction *Inst = dyn_cast<Instruction>(Def)) { 1599 if (InvokeInst *Invoke = dyn_cast<InvokeInst>(Inst)) { 1600 // InvokeInst is a TerminatorInst so the store need to be inserted 1601 // into its normal destination block. 1602 BasicBlock *NormalDest = Invoke->getNormalDest(); 1603 Store->insertBefore(NormalDest->getFirstNonPHI()); 1604 } else { 1605 assert(!Inst->isTerminator() && 1606 "The only TerminatorInst that can produce a value is " 1607 "InvokeInst which is handled above."); 1608 Store->insertAfter(Inst); 1609 } 1610 } else { 1611 assert(isa<Argument>(Def)); 1612 Store->insertAfter(cast<Instruction>(Alloca)); 1613 } 1614 } 1615 1616 assert(PromotableAllocas.size() == Live.size() + NumRematerializedValues && 1617 "we must have the same allocas with lives"); 1618 if (!PromotableAllocas.empty()) { 1619 // apply mem2reg to promote alloca to SSA 1620 PromoteMemToReg(PromotableAllocas, DT); 1621 } 1622 1623 #ifndef NDEBUG 1624 for (auto I = F.getEntryBlock().begin(), E = F.getEntryBlock().end(); I != E; 1625 I++) 1626 if (isa<AllocaInst>(*I)) 1627 InitialAllocaNum--; 1628 assert(InitialAllocaNum == 0 && "We must not introduce any extra allocas"); 1629 #endif 1630 } 1631 1632 /// Implement a unique function which doesn't require we sort the input 1633 /// vector. Doing so has the effect of changing the output of a couple of 1634 /// tests in ways which make them less useful in testing fused safepoints. 1635 template <typename T> static void unique_unsorted(SmallVectorImpl<T> &Vec) { 1636 DenseSet<T> Seen; 1637 SmallVector<T, 128> TempVec; 1638 TempVec.reserve(Vec.size()); 1639 for (auto Element : Vec) 1640 TempVec.push_back(Element); 1641 Vec.clear(); 1642 for (auto V : TempVec) { 1643 if (Seen.insert(V).second) { 1644 Vec.push_back(V); 1645 } 1646 } 1647 } 1648 1649 /// Insert holders so that each Value is obviously live through the entire 1650 /// lifetime of the call. 1651 static void insertUseHolderAfter(CallSite &CS, const ArrayRef<Value *> Values, 1652 SmallVectorImpl<CallInst *> &Holders) { 1653 if (Values.empty()) 1654 // No values to hold live, might as well not insert the empty holder 1655 return; 1656 1657 Module *M = CS.getInstruction()->getParent()->getParent()->getParent(); 1658 // Use a dummy vararg function to actually hold the values live 1659 Function *Func = cast<Function>(M->getOrInsertFunction( 1660 "__tmp_use", FunctionType::get(Type::getVoidTy(M->getContext()), true))); 1661 if (CS.isCall()) { 1662 // For call safepoints insert dummy calls right after safepoint 1663 BasicBlock::iterator Next(CS.getInstruction()); 1664 Next++; 1665 Holders.push_back(CallInst::Create(Func, Values, "", Next)); 1666 return; 1667 } 1668 // For invoke safepooints insert dummy calls both in normal and 1669 // exceptional destination blocks 1670 auto *II = cast<InvokeInst>(CS.getInstruction()); 1671 Holders.push_back(CallInst::Create( 1672 Func, Values, "", II->getNormalDest()->getFirstInsertionPt())); 1673 Holders.push_back(CallInst::Create( 1674 Func, Values, "", II->getUnwindDest()->getFirstInsertionPt())); 1675 } 1676 1677 static void findLiveReferences( 1678 Function &F, DominatorTree &DT, Pass *P, ArrayRef<CallSite> toUpdate, 1679 MutableArrayRef<struct PartiallyConstructedSafepointRecord> records) { 1680 GCPtrLivenessData OriginalLivenessData; 1681 computeLiveInValues(DT, F, OriginalLivenessData); 1682 for (size_t i = 0; i < records.size(); i++) { 1683 struct PartiallyConstructedSafepointRecord &info = records[i]; 1684 const CallSite &CS = toUpdate[i]; 1685 analyzeParsePointLiveness(DT, OriginalLivenessData, CS, info); 1686 } 1687 } 1688 1689 /// Remove any vector of pointers from the liveset by scalarizing them over the 1690 /// statepoint instruction. Adds the scalarized pieces to the liveset. It 1691 /// would be preferrable to include the vector in the statepoint itself, but 1692 /// the lowering code currently does not handle that. Extending it would be 1693 /// slightly non-trivial since it requires a format change. Given how rare 1694 /// such cases are (for the moment?) scalarizing is an acceptable comprimise. 1695 static void splitVectorValues(Instruction *StatepointInst, 1696 StatepointLiveSetTy &LiveSet, DominatorTree &DT) { 1697 SmallVector<Value *, 16> ToSplit; 1698 for (Value *V : LiveSet) 1699 if (isa<VectorType>(V->getType())) 1700 ToSplit.push_back(V); 1701 1702 if (ToSplit.empty()) 1703 return; 1704 1705 Function &F = *(StatepointInst->getParent()->getParent()); 1706 1707 DenseMap<Value *, AllocaInst *> AllocaMap; 1708 // First is normal return, second is exceptional return (invoke only) 1709 DenseMap<Value *, std::pair<Value *, Value *>> Replacements; 1710 for (Value *V : ToSplit) { 1711 LiveSet.erase(V); 1712 1713 AllocaInst *Alloca = 1714 new AllocaInst(V->getType(), "", F.getEntryBlock().getFirstNonPHI()); 1715 AllocaMap[V] = Alloca; 1716 1717 VectorType *VT = cast<VectorType>(V->getType()); 1718 IRBuilder<> Builder(StatepointInst); 1719 SmallVector<Value *, 16> Elements; 1720 for (unsigned i = 0; i < VT->getNumElements(); i++) 1721 Elements.push_back(Builder.CreateExtractElement(V, Builder.getInt32(i))); 1722 LiveSet.insert(Elements.begin(), Elements.end()); 1723 1724 auto InsertVectorReform = [&](Instruction *IP) { 1725 Builder.SetInsertPoint(IP); 1726 Builder.SetCurrentDebugLocation(IP->getDebugLoc()); 1727 Value *ResultVec = UndefValue::get(VT); 1728 for (unsigned i = 0; i < VT->getNumElements(); i++) 1729 ResultVec = Builder.CreateInsertElement(ResultVec, Elements[i], 1730 Builder.getInt32(i)); 1731 return ResultVec; 1732 }; 1733 1734 if (isa<CallInst>(StatepointInst)) { 1735 BasicBlock::iterator Next(StatepointInst); 1736 Next++; 1737 Instruction *IP = &*(Next); 1738 Replacements[V].first = InsertVectorReform(IP); 1739 Replacements[V].second = nullptr; 1740 } else { 1741 InvokeInst *Invoke = cast<InvokeInst>(StatepointInst); 1742 // We've already normalized - check that we don't have shared destination 1743 // blocks 1744 BasicBlock *NormalDest = Invoke->getNormalDest(); 1745 assert(!isa<PHINode>(NormalDest->begin())); 1746 BasicBlock *UnwindDest = Invoke->getUnwindDest(); 1747 assert(!isa<PHINode>(UnwindDest->begin())); 1748 // Insert insert element sequences in both successors 1749 Instruction *IP = &*(NormalDest->getFirstInsertionPt()); 1750 Replacements[V].first = InsertVectorReform(IP); 1751 IP = &*(UnwindDest->getFirstInsertionPt()); 1752 Replacements[V].second = InsertVectorReform(IP); 1753 } 1754 } 1755 for (Value *V : ToSplit) { 1756 AllocaInst *Alloca = AllocaMap[V]; 1757 1758 // Capture all users before we start mutating use lists 1759 SmallVector<Instruction *, 16> Users; 1760 for (User *U : V->users()) 1761 Users.push_back(cast<Instruction>(U)); 1762 1763 for (Instruction *I : Users) { 1764 if (auto Phi = dyn_cast<PHINode>(I)) { 1765 for (unsigned i = 0; i < Phi->getNumIncomingValues(); i++) 1766 if (V == Phi->getIncomingValue(i)) { 1767 LoadInst *Load = new LoadInst( 1768 Alloca, "", Phi->getIncomingBlock(i)->getTerminator()); 1769 Phi->setIncomingValue(i, Load); 1770 } 1771 } else { 1772 LoadInst *Load = new LoadInst(Alloca, "", I); 1773 I->replaceUsesOfWith(V, Load); 1774 } 1775 } 1776 1777 // Store the original value and the replacement value into the alloca 1778 StoreInst *Store = new StoreInst(V, Alloca); 1779 if (auto I = dyn_cast<Instruction>(V)) 1780 Store->insertAfter(I); 1781 else 1782 Store->insertAfter(Alloca); 1783 1784 // Normal return for invoke, or call return 1785 Instruction *Replacement = cast<Instruction>(Replacements[V].first); 1786 (new StoreInst(Replacement, Alloca))->insertAfter(Replacement); 1787 // Unwind return for invoke only 1788 Replacement = cast_or_null<Instruction>(Replacements[V].second); 1789 if (Replacement) 1790 (new StoreInst(Replacement, Alloca))->insertAfter(Replacement); 1791 } 1792 1793 // apply mem2reg to promote alloca to SSA 1794 SmallVector<AllocaInst *, 16> Allocas; 1795 for (Value *V : ToSplit) 1796 Allocas.push_back(AllocaMap[V]); 1797 PromoteMemToReg(Allocas, DT); 1798 } 1799 1800 // Helper function for the "rematerializeLiveValues". It walks use chain 1801 // starting from the "CurrentValue" until it meets "BaseValue". Only "simple" 1802 // values are visited (currently it is GEP's and casts). Returns true if it 1803 // sucessfully reached "BaseValue" and false otherwise. 1804 // Fills "ChainToBase" array with all visited values. "BaseValue" is not 1805 // recorded. 1806 static bool findRematerializableChainToBasePointer( 1807 SmallVectorImpl<Instruction*> &ChainToBase, 1808 Value *CurrentValue, Value *BaseValue) { 1809 1810 // We have found a base value 1811 if (CurrentValue == BaseValue) { 1812 return true; 1813 } 1814 1815 if (GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(CurrentValue)) { 1816 ChainToBase.push_back(GEP); 1817 return findRematerializableChainToBasePointer(ChainToBase, 1818 GEP->getPointerOperand(), 1819 BaseValue); 1820 } 1821 1822 if (CastInst *CI = dyn_cast<CastInst>(CurrentValue)) { 1823 Value *Def = CI->stripPointerCasts(); 1824 1825 // This two checks are basically similar. First one is here for the 1826 // consistency with findBasePointers logic. 1827 assert(!isa<CastInst>(Def) && "not a pointer cast found"); 1828 if (!CI->isNoopCast(CI->getModule()->getDataLayout())) 1829 return false; 1830 1831 ChainToBase.push_back(CI); 1832 return findRematerializableChainToBasePointer(ChainToBase, Def, BaseValue); 1833 } 1834 1835 // Not supported instruction in the chain 1836 return false; 1837 } 1838 1839 // Helper function for the "rematerializeLiveValues". Compute cost of the use 1840 // chain we are going to rematerialize. 1841 static unsigned 1842 chainToBasePointerCost(SmallVectorImpl<Instruction*> &Chain, 1843 TargetTransformInfo &TTI) { 1844 unsigned Cost = 0; 1845 1846 for (Instruction *Instr : Chain) { 1847 if (CastInst *CI = dyn_cast<CastInst>(Instr)) { 1848 assert(CI->isNoopCast(CI->getModule()->getDataLayout()) && 1849 "non noop cast is found during rematerialization"); 1850 1851 Type *SrcTy = CI->getOperand(0)->getType(); 1852 Cost += TTI.getCastInstrCost(CI->getOpcode(), CI->getType(), SrcTy); 1853 1854 } else if (GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(Instr)) { 1855 // Cost of the address calculation 1856 Type *ValTy = GEP->getPointerOperandType()->getPointerElementType(); 1857 Cost += TTI.getAddressComputationCost(ValTy); 1858 1859 // And cost of the GEP itself 1860 // TODO: Use TTI->getGEPCost here (it exists, but appears to be not 1861 // allowed for the external usage) 1862 if (!GEP->hasAllConstantIndices()) 1863 Cost += 2; 1864 1865 } else { 1866 llvm_unreachable("unsupported instruciton type during rematerialization"); 1867 } 1868 } 1869 1870 return Cost; 1871 } 1872 1873 // From the statepoint liveset pick values that are cheaper to recompute then to 1874 // relocate. Remove this values from the liveset, rematerialize them after 1875 // statepoint and record them in "Info" structure. Note that similar to 1876 // relocated values we don't do any user adjustments here. 1877 static void rematerializeLiveValues(CallSite CS, 1878 PartiallyConstructedSafepointRecord &Info, 1879 TargetTransformInfo &TTI) { 1880 const int ChainLengthThreshold = 10; 1881 1882 // Record values we are going to delete from this statepoint live set. 1883 // We can not di this in following loop due to iterator invalidation. 1884 SmallVector<Value *, 32> LiveValuesToBeDeleted; 1885 1886 for (Value *LiveValue: Info.liveset) { 1887 // For each live pointer find it's defining chain 1888 SmallVector<Instruction *, 3> ChainToBase; 1889 assert(Info.PointerToBase.find(LiveValue) != Info.PointerToBase.end()); 1890 bool FoundChain = 1891 findRematerializableChainToBasePointer(ChainToBase, 1892 LiveValue, 1893 Info.PointerToBase[LiveValue]); 1894 // Nothing to do, or chain is too long 1895 if (!FoundChain || 1896 ChainToBase.size() == 0 || 1897 ChainToBase.size() > ChainLengthThreshold) 1898 continue; 1899 1900 // Compute cost of this chain 1901 unsigned Cost = chainToBasePointerCost(ChainToBase, TTI); 1902 // TODO: We can also account for cases when we will be able to remove some 1903 // of the rematerialized values by later optimization passes. I.e if 1904 // we rematerialized several intersecting chains. Or if original values 1905 // don't have any uses besides this statepoint. 1906 1907 // For invokes we need to rematerialize each chain twice - for normal and 1908 // for unwind basic blocks. Model this by multiplying cost by two. 1909 if (CS.isInvoke()) { 1910 Cost *= 2; 1911 } 1912 // If it's too expensive - skip it 1913 if (Cost >= RematerializationThreshold) 1914 continue; 1915 1916 // Remove value from the live set 1917 LiveValuesToBeDeleted.push_back(LiveValue); 1918 1919 // Clone instructions and record them inside "Info" structure 1920 1921 // Walk backwards to visit top-most instructions first 1922 std::reverse(ChainToBase.begin(), ChainToBase.end()); 1923 1924 // Utility function which clones all instructions from "ChainToBase" 1925 // and inserts them before "InsertBefore". Returns rematerialized value 1926 // which should be used after statepoint. 1927 auto rematerializeChain = [&ChainToBase](Instruction *InsertBefore) { 1928 Instruction *LastClonedValue = nullptr; 1929 Instruction *LastValue = nullptr; 1930 for (Instruction *Instr: ChainToBase) { 1931 // Only GEP's and casts are suported as we need to be careful to not 1932 // introduce any new uses of pointers not in the liveset. 1933 // Note that it's fine to introduce new uses of pointers which were 1934 // otherwise not used after this statepoint. 1935 assert(isa<GetElementPtrInst>(Instr) || isa<CastInst>(Instr)); 1936 1937 Instruction *ClonedValue = Instr->clone(); 1938 ClonedValue->insertBefore(InsertBefore); 1939 ClonedValue->setName(Instr->getName() + ".remat"); 1940 1941 // If it is not first instruction in the chain then it uses previously 1942 // cloned value. We should update it to use cloned value. 1943 if (LastClonedValue) { 1944 assert(LastValue); 1945 ClonedValue->replaceUsesOfWith(LastValue, LastClonedValue); 1946 #ifndef NDEBUG 1947 // Assert that cloned instruction does not use any instructions 1948 // other than LastClonedValue 1949 for (auto OpValue: ClonedValue->operand_values()) { 1950 if (isa<Instruction>(OpValue)) 1951 assert(OpValue == LastClonedValue && 1952 "unexpected use found in rematerialized value"); 1953 } 1954 #endif 1955 } 1956 1957 LastClonedValue = ClonedValue; 1958 LastValue = Instr; 1959 } 1960 assert(LastClonedValue); 1961 return LastClonedValue; 1962 }; 1963 1964 // Different cases for calls and invokes. For invokes we need to clone 1965 // instructions both on normal and unwind path. 1966 if (CS.isCall()) { 1967 Instruction *InsertBefore = CS.getInstruction()->getNextNode(); 1968 assert(InsertBefore); 1969 Instruction *RematerializedValue = rematerializeChain(InsertBefore); 1970 Info.RematerializedValues[RematerializedValue] = LiveValue; 1971 } else { 1972 InvokeInst *Invoke = cast<InvokeInst>(CS.getInstruction()); 1973 1974 Instruction *NormalInsertBefore = 1975 Invoke->getNormalDest()->getFirstInsertionPt(); 1976 Instruction *UnwindInsertBefore = 1977 Invoke->getUnwindDest()->getFirstInsertionPt(); 1978 1979 Instruction *NormalRematerializedValue = 1980 rematerializeChain(NormalInsertBefore); 1981 Instruction *UnwindRematerializedValue = 1982 rematerializeChain(UnwindInsertBefore); 1983 1984 Info.RematerializedValues[NormalRematerializedValue] = LiveValue; 1985 Info.RematerializedValues[UnwindRematerializedValue] = LiveValue; 1986 } 1987 } 1988 1989 // Remove rematerializaed values from the live set 1990 for (auto LiveValue: LiveValuesToBeDeleted) { 1991 Info.liveset.erase(LiveValue); 1992 } 1993 } 1994 1995 static bool insertParsePoints(Function &F, DominatorTree &DT, Pass *P, 1996 SmallVectorImpl<CallSite> &toUpdate) { 1997 #ifndef NDEBUG 1998 // sanity check the input 1999 std::set<CallSite> uniqued; 2000 uniqued.insert(toUpdate.begin(), toUpdate.end()); 2001 assert(uniqued.size() == toUpdate.size() && "no duplicates please!"); 2002 2003 for (size_t i = 0; i < toUpdate.size(); i++) { 2004 CallSite &CS = toUpdate[i]; 2005 assert(CS.getInstruction()->getParent()->getParent() == &F); 2006 assert(isStatepoint(CS) && "expected to already be a deopt statepoint"); 2007 } 2008 #endif 2009 2010 // When inserting gc.relocates for invokes, we need to be able to insert at 2011 // the top of the successor blocks. See the comment on 2012 // normalForInvokeSafepoint on exactly what is needed. Note that this step 2013 // may restructure the CFG. 2014 for (CallSite CS : toUpdate) { 2015 if (!CS.isInvoke()) 2016 continue; 2017 InvokeInst *invoke = cast<InvokeInst>(CS.getInstruction()); 2018 normalizeForInvokeSafepoint(invoke->getNormalDest(), invoke->getParent(), 2019 P); 2020 normalizeForInvokeSafepoint(invoke->getUnwindDest(), invoke->getParent(), 2021 P); 2022 } 2023 2024 // A list of dummy calls added to the IR to keep various values obviously 2025 // live in the IR. We'll remove all of these when done. 2026 SmallVector<CallInst *, 64> holders; 2027 2028 // Insert a dummy call with all of the arguments to the vm_state we'll need 2029 // for the actual safepoint insertion. This ensures reference arguments in 2030 // the deopt argument list are considered live through the safepoint (and 2031 // thus makes sure they get relocated.) 2032 for (size_t i = 0; i < toUpdate.size(); i++) { 2033 CallSite &CS = toUpdate[i]; 2034 Statepoint StatepointCS(CS); 2035 2036 SmallVector<Value *, 64> DeoptValues; 2037 for (Use &U : StatepointCS.vm_state_args()) { 2038 Value *Arg = cast<Value>(&U); 2039 assert(!isUnhandledGCPointerType(Arg->getType()) && 2040 "support for FCA unimplemented"); 2041 if (isHandledGCPointerType(Arg->getType())) 2042 DeoptValues.push_back(Arg); 2043 } 2044 insertUseHolderAfter(CS, DeoptValues, holders); 2045 } 2046 2047 SmallVector<struct PartiallyConstructedSafepointRecord, 64> records; 2048 records.reserve(toUpdate.size()); 2049 for (size_t i = 0; i < toUpdate.size(); i++) { 2050 struct PartiallyConstructedSafepointRecord info; 2051 records.push_back(info); 2052 } 2053 assert(records.size() == toUpdate.size()); 2054 2055 // A) Identify all gc pointers which are staticly live at the given call 2056 // site. 2057 findLiveReferences(F, DT, P, toUpdate, records); 2058 2059 // Do a limited scalarization of any live at safepoint vector values which 2060 // contain pointers. This enables this pass to run after vectorization at 2061 // the cost of some possible performance loss. TODO: it would be nice to 2062 // natively support vectors all the way through the backend so we don't need 2063 // to scalarize here. 2064 for (size_t i = 0; i < records.size(); i++) { 2065 struct PartiallyConstructedSafepointRecord &info = records[i]; 2066 Instruction *statepoint = toUpdate[i].getInstruction(); 2067 splitVectorValues(cast<Instruction>(statepoint), info.liveset, DT); 2068 } 2069 2070 // B) Find the base pointers for each live pointer 2071 /* scope for caching */ { 2072 // Cache the 'defining value' relation used in the computation and 2073 // insertion of base phis and selects. This ensures that we don't insert 2074 // large numbers of duplicate base_phis. 2075 DefiningValueMapTy DVCache; 2076 2077 for (size_t i = 0; i < records.size(); i++) { 2078 struct PartiallyConstructedSafepointRecord &info = records[i]; 2079 CallSite &CS = toUpdate[i]; 2080 findBasePointers(DT, DVCache, CS, info); 2081 } 2082 } // end of cache scope 2083 2084 // The base phi insertion logic (for any safepoint) may have inserted new 2085 // instructions which are now live at some safepoint. The simplest such 2086 // example is: 2087 // loop: 2088 // phi a <-- will be a new base_phi here 2089 // safepoint 1 <-- that needs to be live here 2090 // gep a + 1 2091 // safepoint 2 2092 // br loop 2093 // We insert some dummy calls after each safepoint to definitely hold live 2094 // the base pointers which were identified for that safepoint. We'll then 2095 // ask liveness for _every_ base inserted to see what is now live. Then we 2096 // remove the dummy calls. 2097 holders.reserve(holders.size() + records.size()); 2098 for (size_t i = 0; i < records.size(); i++) { 2099 struct PartiallyConstructedSafepointRecord &info = records[i]; 2100 CallSite &CS = toUpdate[i]; 2101 2102 SmallVector<Value *, 128> Bases; 2103 for (auto Pair : info.PointerToBase) { 2104 Bases.push_back(Pair.second); 2105 } 2106 insertUseHolderAfter(CS, Bases, holders); 2107 } 2108 2109 // By selecting base pointers, we've effectively inserted new uses. Thus, we 2110 // need to rerun liveness. We may *also* have inserted new defs, but that's 2111 // not the key issue. 2112 recomputeLiveInValues(F, DT, P, toUpdate, records); 2113 2114 if (PrintBasePointers) { 2115 for (size_t i = 0; i < records.size(); i++) { 2116 struct PartiallyConstructedSafepointRecord &info = records[i]; 2117 errs() << "Base Pairs: (w/Relocation)\n"; 2118 for (auto Pair : info.PointerToBase) { 2119 errs() << " derived %" << Pair.first->getName() << " base %" 2120 << Pair.second->getName() << "\n"; 2121 } 2122 } 2123 } 2124 for (size_t i = 0; i < holders.size(); i++) { 2125 holders[i]->eraseFromParent(); 2126 holders[i] = nullptr; 2127 } 2128 holders.clear(); 2129 2130 // In order to reduce live set of statepoint we might choose to rematerialize 2131 // some values instead of relocating them. This is purelly an optimization and 2132 // does not influence correctness. 2133 TargetTransformInfo &TTI = 2134 P->getAnalysis<TargetTransformInfoWrapperPass>().getTTI(F); 2135 2136 for (size_t i = 0; i < records.size(); i++) { 2137 struct PartiallyConstructedSafepointRecord &info = records[i]; 2138 CallSite &CS = toUpdate[i]; 2139 2140 rematerializeLiveValues(CS, info, TTI); 2141 } 2142 2143 // Now run through and replace the existing statepoints with new ones with 2144 // the live variables listed. We do not yet update uses of the values being 2145 // relocated. We have references to live variables that need to 2146 // survive to the last iteration of this loop. (By construction, the 2147 // previous statepoint can not be a live variable, thus we can and remove 2148 // the old statepoint calls as we go.) 2149 for (size_t i = 0; i < records.size(); i++) { 2150 struct PartiallyConstructedSafepointRecord &info = records[i]; 2151 CallSite &CS = toUpdate[i]; 2152 makeStatepointExplicit(DT, CS, P, info); 2153 } 2154 toUpdate.clear(); // prevent accident use of invalid CallSites 2155 2156 // Do all the fixups of the original live variables to their relocated selves 2157 SmallVector<Value *, 128> live; 2158 for (size_t i = 0; i < records.size(); i++) { 2159 struct PartiallyConstructedSafepointRecord &info = records[i]; 2160 // We can't simply save the live set from the original insertion. One of 2161 // the live values might be the result of a call which needs a safepoint. 2162 // That Value* no longer exists and we need to use the new gc_result. 2163 // Thankfully, the liveset is embedded in the statepoint (and updated), so 2164 // we just grab that. 2165 Statepoint statepoint(info.StatepointToken); 2166 live.insert(live.end(), statepoint.gc_args_begin(), 2167 statepoint.gc_args_end()); 2168 #ifndef NDEBUG 2169 // Do some basic sanity checks on our liveness results before performing 2170 // relocation. Relocation can and will turn mistakes in liveness results 2171 // into non-sensical code which is must harder to debug. 2172 // TODO: It would be nice to test consistency as well 2173 assert(DT.isReachableFromEntry(info.StatepointToken->getParent()) && 2174 "statepoint must be reachable or liveness is meaningless"); 2175 for (Value *V : statepoint.gc_args()) { 2176 if (!isa<Instruction>(V)) 2177 // Non-instruction values trivial dominate all possible uses 2178 continue; 2179 auto LiveInst = cast<Instruction>(V); 2180 assert(DT.isReachableFromEntry(LiveInst->getParent()) && 2181 "unreachable values should never be live"); 2182 assert(DT.dominates(LiveInst, info.StatepointToken) && 2183 "basic SSA liveness expectation violated by liveness analysis"); 2184 } 2185 #endif 2186 } 2187 unique_unsorted(live); 2188 2189 #ifndef NDEBUG 2190 // sanity check 2191 for (auto ptr : live) { 2192 assert(isGCPointerType(ptr->getType()) && "must be a gc pointer type"); 2193 } 2194 #endif 2195 2196 relocationViaAlloca(F, DT, live, records); 2197 return !records.empty(); 2198 } 2199 2200 /// Returns true if this function should be rewritten by this pass. The main 2201 /// point of this function is as an extension point for custom logic. 2202 static bool shouldRewriteStatepointsIn(Function &F) { 2203 // TODO: This should check the GCStrategy 2204 if (F.hasGC()) { 2205 const char *FunctionGCName = F.getGC(); 2206 const StringRef StatepointExampleName("statepoint-example"); 2207 const StringRef CoreCLRName("coreclr"); 2208 return (StatepointExampleName == FunctionGCName) || 2209 (CoreCLRName == FunctionGCName); 2210 } 2211 else 2212 return false; 2213 } 2214 2215 bool RewriteStatepointsForGC::runOnFunction(Function &F) { 2216 // Nothing to do for declarations. 2217 if (F.isDeclaration() || F.empty()) 2218 return false; 2219 2220 // Policy choice says not to rewrite - the most common reason is that we're 2221 // compiling code without a GCStrategy. 2222 if (!shouldRewriteStatepointsIn(F)) 2223 return false; 2224 2225 DominatorTree &DT = getAnalysis<DominatorTreeWrapperPass>().getDomTree(); 2226 2227 // Gather all the statepoints which need rewritten. Be careful to only 2228 // consider those in reachable code since we need to ask dominance queries 2229 // when rewriting. We'll delete the unreachable ones in a moment. 2230 SmallVector<CallSite, 64> ParsePointNeeded; 2231 bool HasUnreachableStatepoint = false; 2232 for (Instruction &I : inst_range(F)) { 2233 // TODO: only the ones with the flag set! 2234 if (isStatepoint(I)) { 2235 if (DT.isReachableFromEntry(I.getParent())) 2236 ParsePointNeeded.push_back(CallSite(&I)); 2237 else 2238 HasUnreachableStatepoint = true; 2239 } 2240 } 2241 2242 bool MadeChange = false; 2243 2244 // Delete any unreachable statepoints so that we don't have unrewritten 2245 // statepoints surviving this pass. This makes testing easier and the 2246 // resulting IR less confusing to human readers. Rather than be fancy, we 2247 // just reuse a utility function which removes the unreachable blocks. 2248 if (HasUnreachableStatepoint) 2249 MadeChange |= removeUnreachableBlocks(F); 2250 2251 // Return early if no work to do. 2252 if (ParsePointNeeded.empty()) 2253 return MadeChange; 2254 2255 // As a prepass, go ahead and aggressively destroy single entry phi nodes. 2256 // These are created by LCSSA. They have the effect of increasing the size 2257 // of liveness sets for no good reason. It may be harder to do this post 2258 // insertion since relocations and base phis can confuse things. 2259 for (BasicBlock &BB : F) 2260 if (BB.getUniquePredecessor()) { 2261 MadeChange = true; 2262 FoldSingleEntryPHINodes(&BB); 2263 } 2264 2265 MadeChange |= insertParsePoints(F, DT, this, ParsePointNeeded); 2266 return MadeChange; 2267 } 2268 2269 // liveness computation via standard dataflow 2270 // ------------------------------------------------------------------- 2271 2272 // TODO: Consider using bitvectors for liveness, the set of potentially 2273 // interesting values should be small and easy to pre-compute. 2274 2275 /// Compute the live-in set for the location rbegin starting from 2276 /// the live-out set of the basic block 2277 static void computeLiveInValues(BasicBlock::reverse_iterator rbegin, 2278 BasicBlock::reverse_iterator rend, 2279 DenseSet<Value *> &LiveTmp) { 2280 2281 for (BasicBlock::reverse_iterator ritr = rbegin; ritr != rend; ritr++) { 2282 Instruction *I = &*ritr; 2283 2284 // KILL/Def - Remove this definition from LiveIn 2285 LiveTmp.erase(I); 2286 2287 // Don't consider *uses* in PHI nodes, we handle their contribution to 2288 // predecessor blocks when we seed the LiveOut sets 2289 if (isa<PHINode>(I)) 2290 continue; 2291 2292 // USE - Add to the LiveIn set for this instruction 2293 for (Value *V : I->operands()) { 2294 assert(!isUnhandledGCPointerType(V->getType()) && 2295 "support for FCA unimplemented"); 2296 if (isHandledGCPointerType(V->getType()) && !isa<Constant>(V)) { 2297 // The choice to exclude all things constant here is slightly subtle. 2298 // There are two idependent reasons: 2299 // - We assume that things which are constant (from LLVM's definition) 2300 // do not move at runtime. For example, the address of a global 2301 // variable is fixed, even though it's contents may not be. 2302 // - Second, we can't disallow arbitrary inttoptr constants even 2303 // if the language frontend does. Optimization passes are free to 2304 // locally exploit facts without respect to global reachability. This 2305 // can create sections of code which are dynamically unreachable and 2306 // contain just about anything. (see constants.ll in tests) 2307 LiveTmp.insert(V); 2308 } 2309 } 2310 } 2311 } 2312 2313 static void computeLiveOutSeed(BasicBlock *BB, DenseSet<Value *> &LiveTmp) { 2314 2315 for (BasicBlock *Succ : successors(BB)) { 2316 const BasicBlock::iterator E(Succ->getFirstNonPHI()); 2317 for (BasicBlock::iterator I = Succ->begin(); I != E; I++) { 2318 PHINode *Phi = cast<PHINode>(&*I); 2319 Value *V = Phi->getIncomingValueForBlock(BB); 2320 assert(!isUnhandledGCPointerType(V->getType()) && 2321 "support for FCA unimplemented"); 2322 if (isHandledGCPointerType(V->getType()) && !isa<Constant>(V)) { 2323 LiveTmp.insert(V); 2324 } 2325 } 2326 } 2327 } 2328 2329 static DenseSet<Value *> computeKillSet(BasicBlock *BB) { 2330 DenseSet<Value *> KillSet; 2331 for (Instruction &I : *BB) 2332 if (isHandledGCPointerType(I.getType())) 2333 KillSet.insert(&I); 2334 return KillSet; 2335 } 2336 2337 #ifndef NDEBUG 2338 /// Check that the items in 'Live' dominate 'TI'. This is used as a basic 2339 /// sanity check for the liveness computation. 2340 static void checkBasicSSA(DominatorTree &DT, DenseSet<Value *> &Live, 2341 TerminatorInst *TI, bool TermOkay = false) { 2342 for (Value *V : Live) { 2343 if (auto *I = dyn_cast<Instruction>(V)) { 2344 // The terminator can be a member of the LiveOut set. LLVM's definition 2345 // of instruction dominance states that V does not dominate itself. As 2346 // such, we need to special case this to allow it. 2347 if (TermOkay && TI == I) 2348 continue; 2349 assert(DT.dominates(I, TI) && 2350 "basic SSA liveness expectation violated by liveness analysis"); 2351 } 2352 } 2353 } 2354 2355 /// Check that all the liveness sets used during the computation of liveness 2356 /// obey basic SSA properties. This is useful for finding cases where we miss 2357 /// a def. 2358 static void checkBasicSSA(DominatorTree &DT, GCPtrLivenessData &Data, 2359 BasicBlock &BB) { 2360 checkBasicSSA(DT, Data.LiveSet[&BB], BB.getTerminator()); 2361 checkBasicSSA(DT, Data.LiveOut[&BB], BB.getTerminator(), true); 2362 checkBasicSSA(DT, Data.LiveIn[&BB], BB.getTerminator()); 2363 } 2364 #endif 2365 2366 static void computeLiveInValues(DominatorTree &DT, Function &F, 2367 GCPtrLivenessData &Data) { 2368 2369 SmallSetVector<BasicBlock *, 200> Worklist; 2370 auto AddPredsToWorklist = [&](BasicBlock *BB) { 2371 // We use a SetVector so that we don't have duplicates in the worklist. 2372 Worklist.insert(pred_begin(BB), pred_end(BB)); 2373 }; 2374 auto NextItem = [&]() { 2375 BasicBlock *BB = Worklist.back(); 2376 Worklist.pop_back(); 2377 return BB; 2378 }; 2379 2380 // Seed the liveness for each individual block 2381 for (BasicBlock &BB : F) { 2382 Data.KillSet[&BB] = computeKillSet(&BB); 2383 Data.LiveSet[&BB].clear(); 2384 computeLiveInValues(BB.rbegin(), BB.rend(), Data.LiveSet[&BB]); 2385 2386 #ifndef NDEBUG 2387 for (Value *Kill : Data.KillSet[&BB]) 2388 assert(!Data.LiveSet[&BB].count(Kill) && "live set contains kill"); 2389 #endif 2390 2391 Data.LiveOut[&BB] = DenseSet<Value *>(); 2392 computeLiveOutSeed(&BB, Data.LiveOut[&BB]); 2393 Data.LiveIn[&BB] = Data.LiveSet[&BB]; 2394 set_union(Data.LiveIn[&BB], Data.LiveOut[&BB]); 2395 set_subtract(Data.LiveIn[&BB], Data.KillSet[&BB]); 2396 if (!Data.LiveIn[&BB].empty()) 2397 AddPredsToWorklist(&BB); 2398 } 2399 2400 // Propagate that liveness until stable 2401 while (!Worklist.empty()) { 2402 BasicBlock *BB = NextItem(); 2403 2404 // Compute our new liveout set, then exit early if it hasn't changed 2405 // despite the contribution of our successor. 2406 DenseSet<Value *> LiveOut = Data.LiveOut[BB]; 2407 const auto OldLiveOutSize = LiveOut.size(); 2408 for (BasicBlock *Succ : successors(BB)) { 2409 assert(Data.LiveIn.count(Succ)); 2410 set_union(LiveOut, Data.LiveIn[Succ]); 2411 } 2412 // assert OutLiveOut is a subset of LiveOut 2413 if (OldLiveOutSize == LiveOut.size()) { 2414 // If the sets are the same size, then we didn't actually add anything 2415 // when unioning our successors LiveIn Thus, the LiveIn of this block 2416 // hasn't changed. 2417 continue; 2418 } 2419 Data.LiveOut[BB] = LiveOut; 2420 2421 // Apply the effects of this basic block 2422 DenseSet<Value *> LiveTmp = LiveOut; 2423 set_union(LiveTmp, Data.LiveSet[BB]); 2424 set_subtract(LiveTmp, Data.KillSet[BB]); 2425 2426 assert(Data.LiveIn.count(BB)); 2427 const DenseSet<Value *> &OldLiveIn = Data.LiveIn[BB]; 2428 // assert: OldLiveIn is a subset of LiveTmp 2429 if (OldLiveIn.size() != LiveTmp.size()) { 2430 Data.LiveIn[BB] = LiveTmp; 2431 AddPredsToWorklist(BB); 2432 } 2433 } // while( !worklist.empty() ) 2434 2435 #ifndef NDEBUG 2436 // Sanity check our ouput against SSA properties. This helps catch any 2437 // missing kills during the above iteration. 2438 for (BasicBlock &BB : F) { 2439 checkBasicSSA(DT, Data, BB); 2440 } 2441 #endif 2442 } 2443 2444 static void findLiveSetAtInst(Instruction *Inst, GCPtrLivenessData &Data, 2445 StatepointLiveSetTy &Out) { 2446 2447 BasicBlock *BB = Inst->getParent(); 2448 2449 // Note: The copy is intentional and required 2450 assert(Data.LiveOut.count(BB)); 2451 DenseSet<Value *> LiveOut = Data.LiveOut[BB]; 2452 2453 // We want to handle the statepoint itself oddly. It's 2454 // call result is not live (normal), nor are it's arguments 2455 // (unless they're used again later). This adjustment is 2456 // specifically what we need to relocate 2457 BasicBlock::reverse_iterator rend(Inst); 2458 computeLiveInValues(BB->rbegin(), rend, LiveOut); 2459 LiveOut.erase(Inst); 2460 Out.insert(LiveOut.begin(), LiveOut.end()); 2461 } 2462 2463 static void recomputeLiveInValues(GCPtrLivenessData &RevisedLivenessData, 2464 const CallSite &CS, 2465 PartiallyConstructedSafepointRecord &Info) { 2466 Instruction *Inst = CS.getInstruction(); 2467 StatepointLiveSetTy Updated; 2468 findLiveSetAtInst(Inst, RevisedLivenessData, Updated); 2469 2470 #ifndef NDEBUG 2471 DenseSet<Value *> Bases; 2472 for (auto KVPair : Info.PointerToBase) { 2473 Bases.insert(KVPair.second); 2474 } 2475 #endif 2476 // We may have base pointers which are now live that weren't before. We need 2477 // to update the PointerToBase structure to reflect this. 2478 for (auto V : Updated) 2479 if (!Info.PointerToBase.count(V)) { 2480 assert(Bases.count(V) && "can't find base for unexpected live value"); 2481 Info.PointerToBase[V] = V; 2482 continue; 2483 } 2484 2485 #ifndef NDEBUG 2486 for (auto V : Updated) { 2487 assert(Info.PointerToBase.count(V) && 2488 "must be able to find base for live value"); 2489 } 2490 #endif 2491 2492 // Remove any stale base mappings - this can happen since our liveness is 2493 // more precise then the one inherent in the base pointer analysis 2494 DenseSet<Value *> ToErase; 2495 for (auto KVPair : Info.PointerToBase) 2496 if (!Updated.count(KVPair.first)) 2497 ToErase.insert(KVPair.first); 2498 for (auto V : ToErase) 2499 Info.PointerToBase.erase(V); 2500 2501 #ifndef NDEBUG 2502 for (auto KVPair : Info.PointerToBase) 2503 assert(Updated.count(KVPair.first) && "record for non-live value"); 2504 #endif 2505 2506 Info.liveset = Updated; 2507 } 2508