1 //===- GVNHoist.cpp - Hoist scalar and load expressions -------------------===// 2 // 3 // The LLVM Compiler Infrastructure 4 // 5 // This file is distributed under the University of Illinois Open Source 6 // License. See LICENSE.TXT for details. 7 // 8 //===----------------------------------------------------------------------===// 9 // 10 // This pass hoists expressions from branches to a common dominator. It uses 11 // GVN (global value numbering) to discover expressions computing the same 12 // values. The primary goal is to reduce the code size, and in some 13 // cases reduce critical path (by exposing more ILP). 14 // Hoisting may affect the performance in some cases. To mitigate that, hoisting 15 // is disabled in the following cases. 16 // 1. Scalars across calls. 17 // 2. geps when corresponding load/store cannot be hoisted. 18 //===----------------------------------------------------------------------===// 19 20 #include "llvm/ADT/DenseMap.h" 21 #include "llvm/ADT/SmallPtrSet.h" 22 #include "llvm/ADT/Statistic.h" 23 #include "llvm/Analysis/ValueTracking.h" 24 #include "llvm/Transforms/Scalar.h" 25 #include "llvm/Transforms/Scalar/GVN.h" 26 #include "llvm/Transforms/Utils/Local.h" 27 #include "llvm/Transforms/Utils/MemorySSA.h" 28 29 using namespace llvm; 30 31 #define DEBUG_TYPE "gvn-hoist" 32 33 STATISTIC(NumHoisted, "Number of instructions hoisted"); 34 STATISTIC(NumRemoved, "Number of instructions removed"); 35 STATISTIC(NumLoadsHoisted, "Number of loads hoisted"); 36 STATISTIC(NumLoadsRemoved, "Number of loads removed"); 37 STATISTIC(NumStoresHoisted, "Number of stores hoisted"); 38 STATISTIC(NumStoresRemoved, "Number of stores removed"); 39 STATISTIC(NumCallsHoisted, "Number of calls hoisted"); 40 STATISTIC(NumCallsRemoved, "Number of calls removed"); 41 42 static cl::opt<int> 43 MaxHoistedThreshold("gvn-max-hoisted", cl::Hidden, cl::init(-1), 44 cl::desc("Max number of instructions to hoist " 45 "(default unlimited = -1)")); 46 static cl::opt<int> MaxNumberOfBBSInPath( 47 "gvn-hoist-max-bbs", cl::Hidden, cl::init(4), 48 cl::desc("Max number of basic blocks on the path between " 49 "hoisting locations (default = 4, unlimited = -1)")); 50 51 namespace { 52 53 // Provides a sorting function based on the execution order of two instructions. 54 struct SortByDFSIn { 55 private: 56 DenseMap<const Value *, unsigned> &DFSNumber; 57 58 public: 59 SortByDFSIn(DenseMap<const Value *, unsigned> &D) : DFSNumber(D) {} 60 61 // Returns true when A executes before B. 62 bool operator()(const Instruction *A, const Instruction *B) const { 63 // FIXME: libc++ has a std::sort() algorithm that will call the compare 64 // function on the same element. Once PR20837 is fixed and some more years 65 // pass by and all the buildbots have moved to a corrected std::sort(), 66 // enable the following assert: 67 // 68 // assert(A != B); 69 70 const BasicBlock *BA = A->getParent(); 71 const BasicBlock *BB = B->getParent(); 72 unsigned NA = DFSNumber[BA]; 73 unsigned NB = DFSNumber[BB]; 74 if (NA < NB) 75 return true; 76 if (NA == NB) { 77 assert (DFSNumber.count(A) && DFSNumber.count(B)); 78 return DFSNumber[A] < DFSNumber[B]; 79 } 80 return false; 81 } 82 }; 83 84 // A map from a pair of VNs to all the instructions with those VNs. 85 typedef DenseMap<std::pair<unsigned, unsigned>, SmallVector<Instruction *, 4>> 86 VNtoInsns; 87 // An invalid value number Used when inserting a single value number into 88 // VNtoInsns. 89 enum : unsigned { InvalidVN = ~2U }; 90 91 // Records all scalar instructions candidate for code hoisting. 92 class InsnInfo { 93 VNtoInsns VNtoScalars; 94 95 public: 96 // Inserts I and its value number in VNtoScalars. 97 void insert(Instruction *I, GVN::ValueTable &VN) { 98 // Scalar instruction. 99 unsigned V = VN.lookupOrAdd(I); 100 VNtoScalars[{V, InvalidVN}].push_back(I); 101 } 102 103 const VNtoInsns &getVNTable() const { return VNtoScalars; } 104 }; 105 106 // Records all load instructions candidate for code hoisting. 107 class LoadInfo { 108 VNtoInsns VNtoLoads; 109 110 public: 111 // Insert Load and the value number of its memory address in VNtoLoads. 112 void insert(LoadInst *Load, GVN::ValueTable &VN) { 113 if (Load->isSimple()) { 114 unsigned V = VN.lookupOrAdd(Load->getPointerOperand()); 115 VNtoLoads[{V, InvalidVN}].push_back(Load); 116 } 117 } 118 119 const VNtoInsns &getVNTable() const { return VNtoLoads; } 120 }; 121 122 // Records all store instructions candidate for code hoisting. 123 class StoreInfo { 124 VNtoInsns VNtoStores; 125 126 public: 127 // Insert the Store and a hash number of the store address and the stored 128 // value in VNtoStores. 129 void insert(StoreInst *Store, GVN::ValueTable &VN) { 130 if (!Store->isSimple()) 131 return; 132 // Hash the store address and the stored value. 133 Value *Ptr = Store->getPointerOperand(); 134 Value *Val = Store->getValueOperand(); 135 VNtoStores[{VN.lookupOrAdd(Ptr), VN.lookupOrAdd(Val)}].push_back(Store); 136 } 137 138 const VNtoInsns &getVNTable() const { return VNtoStores; } 139 }; 140 141 // Records all call instructions candidate for code hoisting. 142 class CallInfo { 143 VNtoInsns VNtoCallsScalars; 144 VNtoInsns VNtoCallsLoads; 145 VNtoInsns VNtoCallsStores; 146 147 public: 148 // Insert Call and its value numbering in one of the VNtoCalls* containers. 149 void insert(CallInst *Call, GVN::ValueTable &VN) { 150 // A call that doesNotAccessMemory is handled as a Scalar, 151 // onlyReadsMemory will be handled as a Load instruction, 152 // all other calls will be handled as stores. 153 unsigned V = VN.lookupOrAdd(Call); 154 auto Entry = std::make_pair(V, InvalidVN); 155 156 if (Call->doesNotAccessMemory()) 157 VNtoCallsScalars[Entry].push_back(Call); 158 else if (Call->onlyReadsMemory()) 159 VNtoCallsLoads[Entry].push_back(Call); 160 else 161 VNtoCallsStores[Entry].push_back(Call); 162 } 163 164 const VNtoInsns &getScalarVNTable() const { return VNtoCallsScalars; } 165 166 const VNtoInsns &getLoadVNTable() const { return VNtoCallsLoads; } 167 168 const VNtoInsns &getStoreVNTable() const { return VNtoCallsStores; } 169 }; 170 171 typedef DenseMap<const BasicBlock *, bool> BBSideEffectsSet; 172 typedef SmallVector<Instruction *, 4> SmallVecInsn; 173 typedef SmallVectorImpl<Instruction *> SmallVecImplInsn; 174 175 static void combineKnownMetadata(Instruction *ReplInst, Instruction *I) { 176 static const unsigned KnownIDs[] = { 177 LLVMContext::MD_tbaa, LLVMContext::MD_alias_scope, 178 LLVMContext::MD_noalias, LLVMContext::MD_range, 179 LLVMContext::MD_fpmath, LLVMContext::MD_invariant_load, 180 LLVMContext::MD_invariant_group}; 181 combineMetadata(ReplInst, I, KnownIDs); 182 } 183 184 // This pass hoists common computations across branches sharing common 185 // dominator. The primary goal is to reduce the code size, and in some 186 // cases reduce critical path (by exposing more ILP). 187 class GVNHoist { 188 public: 189 GVNHoist(DominatorTree *Dt, AliasAnalysis *Aa, MemoryDependenceResults *Md, 190 bool OptForMinSize) 191 : DT(Dt), AA(Aa), MD(Md), OptForMinSize(OptForMinSize), HoistedCtr(0) {} 192 bool run(Function &F) { 193 VN.setDomTree(DT); 194 VN.setAliasAnalysis(AA); 195 VN.setMemDep(MD); 196 bool Res = false; 197 198 // Perform DFS Numbering of blocks and instructions. 199 unsigned I = 0; 200 for (const BasicBlock *BB : depth_first(&F.getEntryBlock())) { 201 DFSNumber.insert({BB, ++I}); 202 for (auto &Inst: *BB) 203 DFSNumber.insert({&Inst, ++I}); 204 } 205 206 // FIXME: use lazy evaluation of VN to avoid the fix-point computation. 207 while (1) { 208 // FIXME: only compute MemorySSA once. We need to update the analysis in 209 // the same time as transforming the code. 210 MemorySSA M(F, AA, DT); 211 MSSA = &M; 212 213 auto HoistStat = hoistExpressions(F); 214 if (HoistStat.first + HoistStat.second == 0) { 215 return Res; 216 } 217 if (HoistStat.second > 0) { 218 // To address a limitation of the current GVN, we need to rerun the 219 // hoisting after we hoisted loads in order to be able to hoist all 220 // scalars dependent on the hoisted loads. Same for stores. 221 VN.clear(); 222 } 223 Res = true; 224 } 225 226 return Res; 227 } 228 private: 229 GVN::ValueTable VN; 230 DominatorTree *DT; 231 AliasAnalysis *AA; 232 MemoryDependenceResults *MD; 233 const bool OptForMinSize; 234 DenseMap<const Value *, unsigned> DFSNumber; 235 BBSideEffectsSet BBSideEffects; 236 MemorySSA *MSSA; 237 int HoistedCtr; 238 239 enum InsKind { Unknown, Scalar, Load, Store }; 240 241 // Return true when there are exception handling in BB. 242 bool hasEH(const BasicBlock *BB) { 243 auto It = BBSideEffects.find(BB); 244 if (It != BBSideEffects.end()) 245 return It->second; 246 247 if (BB->isEHPad() || BB->hasAddressTaken()) { 248 BBSideEffects[BB] = true; 249 return true; 250 } 251 252 if (BB->getTerminator()->mayThrow()) { 253 BBSideEffects[BB] = true; 254 return true; 255 } 256 257 BBSideEffects[BB] = false; 258 return false; 259 } 260 261 // Return true when all paths from A to the end of the function pass through 262 // either B or C. 263 bool hoistingFromAllPaths(const BasicBlock *A, const BasicBlock *B, 264 const BasicBlock *C) { 265 // We fully copy the WL in order to be able to remove items from it. 266 SmallPtrSet<const BasicBlock *, 2> WL; 267 WL.insert(B); 268 WL.insert(C); 269 270 for (auto It = df_begin(A), E = df_end(A); It != E;) { 271 // There exists a path from A to the exit of the function if we are still 272 // iterating in DF traversal and we removed all instructions from the work 273 // list. 274 if (WL.empty()) 275 return false; 276 277 const BasicBlock *BB = *It; 278 if (WL.erase(BB)) { 279 // Stop DFS traversal when BB is in the work list. 280 It.skipChildren(); 281 continue; 282 } 283 284 // Check for end of function, calls that do not return, etc. 285 if (!isGuaranteedToTransferExecutionToSuccessor(BB->getTerminator())) 286 return false; 287 288 // Increment DFS traversal when not skipping children. 289 ++It; 290 } 291 292 return true; 293 } 294 295 /* Return true when I1 appears before I2 in the instructions of BB. */ 296 bool firstInBB(const Instruction *I1, const Instruction *I2) { 297 assert (I1->getParent() == I2->getParent()); 298 assert (DFSNumber.count(I1) && DFSNumber.count(I2)); 299 return DFSNumber[I1] < DFSNumber[I2]; 300 } 301 302 // Return true when there are users of Def in BB. 303 bool hasMemoryUseOnPath(MemoryAccess *Def, const BasicBlock *BB, 304 const Instruction *OldPt) { 305 const BasicBlock *DefBB = Def->getBlock(); 306 const BasicBlock *OldBB = OldPt->getParent(); 307 308 for (User *U : Def->users()) 309 if (auto *MU = dyn_cast<MemoryUse>(U)) { 310 BasicBlock *UBB = MU->getBlock(); 311 // Only analyze uses in BB. 312 if (BB != UBB) 313 continue; 314 315 // A use in the same block as the Def is on the path. 316 if (UBB == DefBB) { 317 assert(MSSA->locallyDominates(Def, MU) && "def not dominating use"); 318 return true; 319 } 320 321 if (UBB != OldBB) 322 return true; 323 324 // It is only harmful to hoist when the use is before OldPt. 325 if (firstInBB(MU->getMemoryInst(), OldPt)) 326 return true; 327 } 328 329 return false; 330 } 331 332 // Return true when there are exception handling or loads of memory Def 333 // between OldPt and NewPt. 334 335 // Decrement by 1 NBBsOnAllPaths for each block between HoistPt and BB, and 336 // return true when the counter NBBsOnAllPaths reaces 0, except when it is 337 // initialized to -1 which is unlimited. 338 bool hasEHOrLoadsOnPath(const Instruction *NewPt, const Instruction *OldPt, 339 MemoryAccess *Def, int &NBBsOnAllPaths) { 340 const BasicBlock *NewBB = NewPt->getParent(); 341 const BasicBlock *OldBB = OldPt->getParent(); 342 assert(DT->dominates(NewBB, OldBB) && "invalid path"); 343 assert(DT->dominates(Def->getBlock(), NewBB) && 344 "def does not dominate new hoisting point"); 345 346 // Walk all basic blocks reachable in depth-first iteration on the inverse 347 // CFG from OldBB to NewBB. These blocks are all the blocks that may be 348 // executed between the execution of NewBB and OldBB. Hoisting an expression 349 // from OldBB into NewBB has to be safe on all execution paths. 350 for (auto I = idf_begin(OldBB), E = idf_end(OldBB); I != E;) { 351 if (*I == NewBB) { 352 // Stop traversal when reaching HoistPt. 353 I.skipChildren(); 354 continue; 355 } 356 357 // Impossible to hoist with exceptions on the path. 358 if (hasEH(*I)) 359 return true; 360 361 // Check that we do not move a store past loads. 362 if (hasMemoryUseOnPath(Def, *I, OldPt)) 363 return true; 364 365 // Stop walk once the limit is reached. 366 if (NBBsOnAllPaths == 0) 367 return true; 368 369 // -1 is unlimited number of blocks on all paths. 370 if (NBBsOnAllPaths != -1) 371 --NBBsOnAllPaths; 372 373 ++I; 374 } 375 376 return false; 377 } 378 379 // Return true when there are exception handling between HoistPt and BB. 380 // Decrement by 1 NBBsOnAllPaths for each block between HoistPt and BB, and 381 // return true when the counter NBBsOnAllPaths reaches 0, except when it is 382 // initialized to -1 which is unlimited. 383 bool hasEHOnPath(const BasicBlock *HoistPt, const BasicBlock *BB, 384 int &NBBsOnAllPaths) { 385 assert(DT->dominates(HoistPt, BB) && "Invalid path"); 386 387 // Walk all basic blocks reachable in depth-first iteration on 388 // the inverse CFG from BBInsn to NewHoistPt. These blocks are all the 389 // blocks that may be executed between the execution of NewHoistPt and 390 // BBInsn. Hoisting an expression from BBInsn into NewHoistPt has to be safe 391 // on all execution paths. 392 for (auto I = idf_begin(BB), E = idf_end(BB); I != E;) { 393 if (*I == HoistPt) { 394 // Stop traversal when reaching NewHoistPt. 395 I.skipChildren(); 396 continue; 397 } 398 399 // Impossible to hoist with exceptions on the path. 400 if (hasEH(*I)) 401 return true; 402 403 // Stop walk once the limit is reached. 404 if (NBBsOnAllPaths == 0) 405 return true; 406 407 // -1 is unlimited number of blocks on all paths. 408 if (NBBsOnAllPaths != -1) 409 --NBBsOnAllPaths; 410 411 ++I; 412 } 413 414 return false; 415 } 416 417 // Return true when it is safe to hoist a memory load or store U from OldPt 418 // to NewPt. 419 bool safeToHoistLdSt(const Instruction *NewPt, const Instruction *OldPt, 420 MemoryUseOrDef *U, InsKind K, int &NBBsOnAllPaths) { 421 422 // In place hoisting is safe. 423 if (NewPt == OldPt) 424 return true; 425 426 const BasicBlock *NewBB = NewPt->getParent(); 427 const BasicBlock *OldBB = OldPt->getParent(); 428 const BasicBlock *UBB = U->getBlock(); 429 430 // Check for dependences on the Memory SSA. 431 MemoryAccess *D = U->getDefiningAccess(); 432 BasicBlock *DBB = D->getBlock(); 433 if (DT->properlyDominates(NewBB, DBB)) 434 // Cannot move the load or store to NewBB above its definition in DBB. 435 return false; 436 437 if (NewBB == DBB && !MSSA->isLiveOnEntryDef(D)) 438 if (auto *UD = dyn_cast<MemoryUseOrDef>(D)) 439 if (firstInBB(NewPt, UD->getMemoryInst())) 440 // Cannot move the load or store to NewPt above its definition in D. 441 return false; 442 443 // Check for unsafe hoistings due to side effects. 444 if (K == InsKind::Store) { 445 if (hasEHOrLoadsOnPath(NewPt, OldPt, D, NBBsOnAllPaths)) 446 return false; 447 } else if (hasEHOnPath(NewBB, OldBB, NBBsOnAllPaths)) 448 return false; 449 450 if (UBB == NewBB) { 451 if (DT->properlyDominates(DBB, NewBB)) 452 return true; 453 assert(UBB == DBB); 454 assert(MSSA->locallyDominates(D, U)); 455 } 456 457 // No side effects: it is safe to hoist. 458 return true; 459 } 460 461 // Return true when it is safe to hoist scalar instructions from BB1 and BB2 462 // to HoistBB. 463 bool safeToHoistScalar(const BasicBlock *HoistBB, const BasicBlock *BB1, 464 const BasicBlock *BB2, int &NBBsOnAllPaths) { 465 // Check that the hoisted expression is needed on all paths. When HoistBB 466 // already contains an instruction to be hoisted, the expression is needed 467 // on all paths. Enable scalar hoisting at -Oz as it is safe to hoist 468 // scalars to a place where they are partially needed. 469 if (!OptForMinSize && BB1 != HoistBB && 470 !hoistingFromAllPaths(HoistBB, BB1, BB2)) 471 return false; 472 473 if (hasEHOnPath(HoistBB, BB1, NBBsOnAllPaths) || 474 hasEHOnPath(HoistBB, BB2, NBBsOnAllPaths)) 475 return false; 476 477 // Safe to hoist scalars from BB1 and BB2 to HoistBB. 478 return true; 479 } 480 481 // Each element of a hoisting list contains the basic block where to hoist and 482 // a list of instructions to be hoisted. 483 typedef std::pair<BasicBlock *, SmallVecInsn> HoistingPointInfo; 484 typedef SmallVector<HoistingPointInfo, 4> HoistingPointList; 485 486 // Partition InstructionsToHoist into a set of candidates which can share a 487 // common hoisting point. The partitions are collected in HPL. IsScalar is 488 // true when the instructions in InstructionsToHoist are scalars. IsLoad is 489 // true when the InstructionsToHoist are loads, false when they are stores. 490 void partitionCandidates(SmallVecImplInsn &InstructionsToHoist, 491 HoistingPointList &HPL, InsKind K) { 492 // No need to sort for two instructions. 493 if (InstructionsToHoist.size() > 2) { 494 SortByDFSIn Pred(DFSNumber); 495 std::sort(InstructionsToHoist.begin(), InstructionsToHoist.end(), Pred); 496 } 497 498 int NBBsOnAllPaths = MaxNumberOfBBSInPath; 499 500 SmallVecImplInsn::iterator II = InstructionsToHoist.begin(); 501 SmallVecImplInsn::iterator Start = II; 502 Instruction *HoistPt = *II; 503 BasicBlock *HoistBB = HoistPt->getParent(); 504 MemoryUseOrDef *UD; 505 if (K != InsKind::Scalar) 506 UD = cast<MemoryUseOrDef>(MSSA->getMemoryAccess(HoistPt)); 507 508 for (++II; II != InstructionsToHoist.end(); ++II) { 509 Instruction *Insn = *II; 510 BasicBlock *BB = Insn->getParent(); 511 BasicBlock *NewHoistBB; 512 Instruction *NewHoistPt; 513 514 if (BB == HoistBB) { 515 NewHoistBB = HoistBB; 516 NewHoistPt = firstInBB(Insn, HoistPt) ? Insn : HoistPt; 517 } else { 518 NewHoistBB = DT->findNearestCommonDominator(HoistBB, BB); 519 if (NewHoistBB == BB) 520 NewHoistPt = Insn; 521 else if (NewHoistBB == HoistBB) 522 NewHoistPt = HoistPt; 523 else 524 NewHoistPt = NewHoistBB->getTerminator(); 525 } 526 527 if (K == InsKind::Scalar) { 528 if (safeToHoistScalar(NewHoistBB, HoistBB, BB, NBBsOnAllPaths)) { 529 // Extend HoistPt to NewHoistPt. 530 HoistPt = NewHoistPt; 531 HoistBB = NewHoistBB; 532 continue; 533 } 534 } else { 535 // When NewBB already contains an instruction to be hoisted, the 536 // expression is needed on all paths. 537 // Check that the hoisted expression is needed on all paths: it is 538 // unsafe to hoist loads to a place where there may be a path not 539 // loading from the same address: for instance there may be a branch on 540 // which the address of the load may not be initialized. 541 if ((HoistBB == NewHoistBB || BB == NewHoistBB || 542 hoistingFromAllPaths(NewHoistBB, HoistBB, BB)) && 543 // Also check that it is safe to move the load or store from HoistPt 544 // to NewHoistPt, and from Insn to NewHoistPt. 545 safeToHoistLdSt(NewHoistPt, HoistPt, UD, K, NBBsOnAllPaths) && 546 safeToHoistLdSt(NewHoistPt, Insn, 547 cast<MemoryUseOrDef>(MSSA->getMemoryAccess(Insn)), 548 K, NBBsOnAllPaths)) { 549 // Extend HoistPt to NewHoistPt. 550 HoistPt = NewHoistPt; 551 HoistBB = NewHoistBB; 552 continue; 553 } 554 } 555 556 // At this point it is not safe to extend the current hoisting to 557 // NewHoistPt: save the hoisting list so far. 558 if (std::distance(Start, II) > 1) 559 HPL.push_back({HoistBB, SmallVecInsn(Start, II)}); 560 561 // Start over from BB. 562 Start = II; 563 if (K != InsKind::Scalar) 564 UD = cast<MemoryUseOrDef>(MSSA->getMemoryAccess(*Start)); 565 HoistPt = Insn; 566 HoistBB = BB; 567 NBBsOnAllPaths = MaxNumberOfBBSInPath; 568 } 569 570 // Save the last partition. 571 if (std::distance(Start, II) > 1) 572 HPL.push_back({HoistBB, SmallVecInsn(Start, II)}); 573 } 574 575 // Initialize HPL from Map. 576 void computeInsertionPoints(const VNtoInsns &Map, HoistingPointList &HPL, 577 InsKind K) { 578 for (const auto &Entry : Map) { 579 if (MaxHoistedThreshold != -1 && ++HoistedCtr > MaxHoistedThreshold) 580 return; 581 582 const SmallVecInsn &V = Entry.second; 583 if (V.size() < 2) 584 continue; 585 586 // Compute the insertion point and the list of expressions to be hoisted. 587 SmallVecInsn InstructionsToHoist; 588 for (auto I : V) 589 if (!hasEH(I->getParent())) 590 InstructionsToHoist.push_back(I); 591 592 if (!InstructionsToHoist.empty()) 593 partitionCandidates(InstructionsToHoist, HPL, K); 594 } 595 } 596 597 // Return true when all operands of Instr are available at insertion point 598 // HoistPt. When limiting the number of hoisted expressions, one could hoist 599 // a load without hoisting its access function. So before hoisting any 600 // expression, make sure that all its operands are available at insert point. 601 bool allOperandsAvailable(const Instruction *I, 602 const BasicBlock *HoistPt) const { 603 for (const Use &Op : I->operands()) 604 if (const auto *Inst = dyn_cast<Instruction>(&Op)) 605 if (!DT->dominates(Inst->getParent(), HoistPt)) 606 return false; 607 608 return true; 609 } 610 611 Instruction *firstOfTwo(Instruction *I, Instruction *J) const { 612 for (Instruction &I1 : *I->getParent()) 613 if (&I1 == I || &I1 == J) 614 return &I1; 615 llvm_unreachable("Both I and J must be from same BB"); 616 } 617 618 bool makeOperandsAvailable(Instruction *Repl, BasicBlock *HoistPt, 619 const SmallVecInsn &InstructionsToHoist) const { 620 // Check whether the GEP of a ld/st can be synthesized at HoistPt. 621 GetElementPtrInst *Gep = nullptr; 622 Instruction *Val = nullptr; 623 if (auto *Ld = dyn_cast<LoadInst>(Repl)) 624 Gep = dyn_cast<GetElementPtrInst>(Ld->getPointerOperand()); 625 if (auto *St = dyn_cast<StoreInst>(Repl)) { 626 Gep = dyn_cast<GetElementPtrInst>(St->getPointerOperand()); 627 Val = dyn_cast<Instruction>(St->getValueOperand()); 628 // Check that the stored value is available. 629 if (Val) { 630 if (isa<GetElementPtrInst>(Val)) { 631 // Check whether we can compute the GEP at HoistPt. 632 if (!allOperandsAvailable(Val, HoistPt)) 633 return false; 634 } else if (!DT->dominates(Val->getParent(), HoistPt)) 635 return false; 636 } 637 } 638 639 // Check whether we can compute the Gep at HoistPt. 640 if (!Gep || !allOperandsAvailable(Gep, HoistPt)) 641 return false; 642 643 // Copy the gep before moving the ld/st. 644 Instruction *ClonedGep = Gep->clone(); 645 ClonedGep->insertBefore(HoistPt->getTerminator()); 646 // Conservatively discard any optimization hints, they may differ on the 647 // other paths. 648 for (Instruction *OtherInst : InstructionsToHoist) { 649 GetElementPtrInst *OtherGep; 650 if (auto *OtherLd = dyn_cast<LoadInst>(OtherInst)) 651 OtherGep = cast<GetElementPtrInst>(OtherLd->getPointerOperand()); 652 else 653 OtherGep = cast<GetElementPtrInst>( 654 cast<StoreInst>(OtherInst)->getPointerOperand()); 655 ClonedGep->intersectOptionalDataWith(OtherGep); 656 combineKnownMetadata(ClonedGep, OtherGep); 657 } 658 Repl->replaceUsesOfWith(Gep, ClonedGep); 659 660 // Also copy Val when it is a GEP. 661 if (Val && isa<GetElementPtrInst>(Val)) { 662 Instruction *ClonedVal = Val->clone(); 663 ClonedVal->insertBefore(HoistPt->getTerminator()); 664 // Conservatively discard any optimization hints, they may differ on the 665 // other paths. 666 for (Instruction *OtherInst : InstructionsToHoist) { 667 auto *OtherVal = 668 cast<Instruction>(cast<StoreInst>(OtherInst)->getValueOperand()); 669 ClonedVal->intersectOptionalDataWith(OtherVal); 670 combineKnownMetadata(ClonedVal, OtherVal); 671 } 672 Repl->replaceUsesOfWith(Val, ClonedVal); 673 } 674 675 return true; 676 } 677 678 std::pair<unsigned, unsigned> hoist(HoistingPointList &HPL) { 679 unsigned NI = 0, NL = 0, NS = 0, NC = 0, NR = 0; 680 for (const HoistingPointInfo &HP : HPL) { 681 // Find out whether we already have one of the instructions in HoistPt, 682 // in which case we do not have to move it. 683 BasicBlock *HoistPt = HP.first; 684 const SmallVecInsn &InstructionsToHoist = HP.second; 685 Instruction *Repl = nullptr; 686 for (Instruction *I : InstructionsToHoist) 687 if (I->getParent() == HoistPt) { 688 // If there are two instructions in HoistPt to be hoisted in place: 689 // update Repl to be the first one, such that we can rename the uses 690 // of the second based on the first. 691 Repl = !Repl ? I : firstOfTwo(Repl, I); 692 } 693 694 if (Repl) { 695 // Repl is already in HoistPt: it remains in place. 696 assert(allOperandsAvailable(Repl, HoistPt) && 697 "instruction depends on operands that are not available"); 698 } else { 699 // When we do not find Repl in HoistPt, select the first in the list 700 // and move it to HoistPt. 701 Repl = InstructionsToHoist.front(); 702 703 // We can move Repl in HoistPt only when all operands are available. 704 // The order in which hoistings are done may influence the availability 705 // of operands. 706 if (!allOperandsAvailable(Repl, HoistPt) && 707 !makeOperandsAvailable(Repl, HoistPt, InstructionsToHoist)) 708 continue; 709 Repl->moveBefore(HoistPt->getTerminator()); 710 // TBAA may differ on one of the other paths, we need to get rid of 711 // anything which might conflict. 712 } 713 714 if (isa<LoadInst>(Repl)) 715 ++NL; 716 else if (isa<StoreInst>(Repl)) 717 ++NS; 718 else if (isa<CallInst>(Repl)) 719 ++NC; 720 else // Scalar 721 ++NI; 722 723 // Remove and rename all other instructions. 724 for (Instruction *I : InstructionsToHoist) 725 if (I != Repl) { 726 ++NR; 727 if (auto *ReplacementLoad = dyn_cast<LoadInst>(Repl)) { 728 ReplacementLoad->setAlignment( 729 std::min(ReplacementLoad->getAlignment(), 730 cast<LoadInst>(I)->getAlignment())); 731 ++NumLoadsRemoved; 732 } else if (auto *ReplacementStore = dyn_cast<StoreInst>(Repl)) { 733 ReplacementStore->setAlignment( 734 std::min(ReplacementStore->getAlignment(), 735 cast<StoreInst>(I)->getAlignment())); 736 ++NumStoresRemoved; 737 } else if (auto *ReplacementAlloca = dyn_cast<AllocaInst>(Repl)) { 738 ReplacementAlloca->setAlignment( 739 std::max(ReplacementAlloca->getAlignment(), 740 cast<AllocaInst>(I)->getAlignment())); 741 } else if (isa<CallInst>(Repl)) { 742 ++NumCallsRemoved; 743 } 744 Repl->intersectOptionalDataWith(I); 745 combineKnownMetadata(Repl, I); 746 I->replaceAllUsesWith(Repl); 747 I->eraseFromParent(); 748 } 749 } 750 751 NumHoisted += NL + NS + NC + NI; 752 NumRemoved += NR; 753 NumLoadsHoisted += NL; 754 NumStoresHoisted += NS; 755 NumCallsHoisted += NC; 756 return {NI, NL + NC + NS}; 757 } 758 759 // Hoist all expressions. Returns Number of scalars hoisted 760 // and number of non-scalars hoisted. 761 std::pair<unsigned, unsigned> hoistExpressions(Function &F) { 762 InsnInfo II; 763 LoadInfo LI; 764 StoreInfo SI; 765 CallInfo CI; 766 for (BasicBlock *BB : depth_first(&F.getEntryBlock())) { 767 for (Instruction &I1 : *BB) { 768 if (auto *Load = dyn_cast<LoadInst>(&I1)) 769 LI.insert(Load, VN); 770 else if (auto *Store = dyn_cast<StoreInst>(&I1)) 771 SI.insert(Store, VN); 772 else if (auto *Call = dyn_cast<CallInst>(&I1)) { 773 if (auto *Intr = dyn_cast<IntrinsicInst>(Call)) { 774 if (isa<DbgInfoIntrinsic>(Intr) || 775 Intr->getIntrinsicID() == Intrinsic::assume) 776 continue; 777 } 778 if (Call->mayHaveSideEffects()) { 779 if (!OptForMinSize) 780 break; 781 // We may continue hoisting across calls which write to memory. 782 if (Call->mayThrow()) 783 break; 784 } 785 CI.insert(Call, VN); 786 } else if (OptForMinSize || !isa<GetElementPtrInst>(&I1)) 787 // Do not hoist scalars past calls that may write to memory because 788 // that could result in spills later. geps are handled separately. 789 // TODO: We can relax this for targets like AArch64 as they have more 790 // registers than X86. 791 II.insert(&I1, VN); 792 } 793 } 794 795 HoistingPointList HPL; 796 computeInsertionPoints(II.getVNTable(), HPL, InsKind::Scalar); 797 computeInsertionPoints(LI.getVNTable(), HPL, InsKind::Load); 798 computeInsertionPoints(SI.getVNTable(), HPL, InsKind::Store); 799 computeInsertionPoints(CI.getScalarVNTable(), HPL, InsKind::Scalar); 800 computeInsertionPoints(CI.getLoadVNTable(), HPL, InsKind::Load); 801 computeInsertionPoints(CI.getStoreVNTable(), HPL, InsKind::Store); 802 return hoist(HPL); 803 } 804 }; 805 806 class GVNHoistLegacyPass : public FunctionPass { 807 public: 808 static char ID; 809 810 GVNHoistLegacyPass() : FunctionPass(ID) { 811 initializeGVNHoistLegacyPassPass(*PassRegistry::getPassRegistry()); 812 } 813 814 bool runOnFunction(Function &F) override { 815 if (skipFunction(F)) 816 return false; 817 auto &DT = getAnalysis<DominatorTreeWrapperPass>().getDomTree(); 818 auto &AA = getAnalysis<AAResultsWrapperPass>().getAAResults(); 819 auto &MD = getAnalysis<MemoryDependenceWrapperPass>().getMemDep(); 820 821 GVNHoist G(&DT, &AA, &MD, F.optForMinSize()); 822 return G.run(F); 823 } 824 825 void getAnalysisUsage(AnalysisUsage &AU) const override { 826 AU.addRequired<DominatorTreeWrapperPass>(); 827 AU.addRequired<AAResultsWrapperPass>(); 828 AU.addRequired<MemoryDependenceWrapperPass>(); 829 AU.addPreserved<DominatorTreeWrapperPass>(); 830 } 831 }; 832 } // namespace 833 834 PreservedAnalyses GVNHoistPass::run(Function &F, 835 AnalysisManager<Function> &AM) { 836 DominatorTree &DT = AM.getResult<DominatorTreeAnalysis>(F); 837 AliasAnalysis &AA = AM.getResult<AAManager>(F); 838 MemoryDependenceResults &MD = AM.getResult<MemoryDependenceAnalysis>(F); 839 840 GVNHoist G(&DT, &AA, &MD, F.optForMinSize()); 841 if (!G.run(F)) 842 return PreservedAnalyses::all(); 843 844 PreservedAnalyses PA; 845 PA.preserve<DominatorTreeAnalysis>(); 846 return PA; 847 } 848 849 char GVNHoistLegacyPass::ID = 0; 850 INITIALIZE_PASS_BEGIN(GVNHoistLegacyPass, "gvn-hoist", 851 "Early GVN Hoisting of Expressions", false, false) 852 INITIALIZE_PASS_DEPENDENCY(MemoryDependenceWrapperPass) 853 INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass) 854 INITIALIZE_PASS_DEPENDENCY(AAResultsWrapperPass) 855 INITIALIZE_PASS_END(GVNHoistLegacyPass, "gvn-hoist", 856 "Early GVN Hoisting of Expressions", false, false) 857 858 FunctionPass *llvm::createGVNHoistPass() { return new GVNHoistLegacyPass(); } 859