1 //===-- ControlHeightReduction.cpp - Control Height Reduction -------------===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 // 9 // This pass merges conditional blocks of code and reduces the number of 10 // conditional branches in the hot paths based on profiles. 11 // 12 //===----------------------------------------------------------------------===// 13 14 #include "llvm/Transforms/Instrumentation/ControlHeightReduction.h" 15 #include "llvm/ADT/DenseMap.h" 16 #include "llvm/ADT/DenseSet.h" 17 #include "llvm/ADT/SmallVector.h" 18 #include "llvm/ADT/StringSet.h" 19 #include "llvm/Analysis/BlockFrequencyInfo.h" 20 #include "llvm/Analysis/GlobalsModRef.h" 21 #include "llvm/Analysis/OptimizationRemarkEmitter.h" 22 #include "llvm/Analysis/ProfileSummaryInfo.h" 23 #include "llvm/Analysis/RegionInfo.h" 24 #include "llvm/Analysis/RegionIterator.h" 25 #include "llvm/Analysis/ValueTracking.h" 26 #include "llvm/IR/CFG.h" 27 #include "llvm/IR/Dominators.h" 28 #include "llvm/IR/IRBuilder.h" 29 #include "llvm/IR/MDBuilder.h" 30 #include "llvm/InitializePasses.h" 31 #include "llvm/Support/BranchProbability.h" 32 #include "llvm/Support/MemoryBuffer.h" 33 #include "llvm/Transforms/Utils.h" 34 #include "llvm/Transforms/Utils/BasicBlockUtils.h" 35 #include "llvm/Transforms/Utils/Cloning.h" 36 #include "llvm/Transforms/Utils/ValueMapper.h" 37 38 #include <set> 39 #include <sstream> 40 41 using namespace llvm; 42 43 #define DEBUG_TYPE "chr" 44 45 #define CHR_DEBUG(X) LLVM_DEBUG(X) 46 47 static cl::opt<bool> ForceCHR("force-chr", cl::init(false), cl::Hidden, 48 cl::desc("Apply CHR for all functions")); 49 50 static cl::opt<double> CHRBiasThreshold( 51 "chr-bias-threshold", cl::init(0.99), cl::Hidden, 52 cl::desc("CHR considers a branch bias greater than this ratio as biased")); 53 54 static cl::opt<unsigned> CHRMergeThreshold( 55 "chr-merge-threshold", cl::init(2), cl::Hidden, 56 cl::desc("CHR merges a group of N branches/selects where N >= this value")); 57 58 static cl::opt<std::string> CHRModuleList( 59 "chr-module-list", cl::init(""), cl::Hidden, 60 cl::desc("Specify file to retrieve the list of modules to apply CHR to")); 61 62 static cl::opt<std::string> CHRFunctionList( 63 "chr-function-list", cl::init(""), cl::Hidden, 64 cl::desc("Specify file to retrieve the list of functions to apply CHR to")); 65 66 static StringSet<> CHRModules; 67 static StringSet<> CHRFunctions; 68 69 static void parseCHRFilterFiles() { 70 if (!CHRModuleList.empty()) { 71 auto FileOrErr = MemoryBuffer::getFile(CHRModuleList); 72 if (!FileOrErr) { 73 errs() << "Error: Couldn't read the chr-module-list file " << CHRModuleList << "\n"; 74 std::exit(1); 75 } 76 StringRef Buf = FileOrErr->get()->getBuffer(); 77 SmallVector<StringRef, 0> Lines; 78 Buf.split(Lines, '\n'); 79 for (StringRef Line : Lines) { 80 Line = Line.trim(); 81 if (!Line.empty()) 82 CHRModules.insert(Line); 83 } 84 } 85 if (!CHRFunctionList.empty()) { 86 auto FileOrErr = MemoryBuffer::getFile(CHRFunctionList); 87 if (!FileOrErr) { 88 errs() << "Error: Couldn't read the chr-function-list file " << CHRFunctionList << "\n"; 89 std::exit(1); 90 } 91 StringRef Buf = FileOrErr->get()->getBuffer(); 92 SmallVector<StringRef, 0> Lines; 93 Buf.split(Lines, '\n'); 94 for (StringRef Line : Lines) { 95 Line = Line.trim(); 96 if (!Line.empty()) 97 CHRFunctions.insert(Line); 98 } 99 } 100 } 101 102 namespace { 103 class ControlHeightReductionLegacyPass : public FunctionPass { 104 public: 105 static char ID; 106 107 ControlHeightReductionLegacyPass() : FunctionPass(ID) { 108 initializeControlHeightReductionLegacyPassPass( 109 *PassRegistry::getPassRegistry()); 110 parseCHRFilterFiles(); 111 } 112 113 bool runOnFunction(Function &F) override; 114 void getAnalysisUsage(AnalysisUsage &AU) const override { 115 AU.addRequired<BlockFrequencyInfoWrapperPass>(); 116 AU.addRequired<DominatorTreeWrapperPass>(); 117 AU.addRequired<ProfileSummaryInfoWrapperPass>(); 118 AU.addRequired<RegionInfoPass>(); 119 AU.addPreserved<GlobalsAAWrapperPass>(); 120 } 121 }; 122 } // end anonymous namespace 123 124 char ControlHeightReductionLegacyPass::ID = 0; 125 126 INITIALIZE_PASS_BEGIN(ControlHeightReductionLegacyPass, 127 "chr", 128 "Reduce control height in the hot paths", 129 false, false) 130 INITIALIZE_PASS_DEPENDENCY(BlockFrequencyInfoWrapperPass) 131 INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass) 132 INITIALIZE_PASS_DEPENDENCY(ProfileSummaryInfoWrapperPass) 133 INITIALIZE_PASS_DEPENDENCY(RegionInfoPass) 134 INITIALIZE_PASS_END(ControlHeightReductionLegacyPass, 135 "chr", 136 "Reduce control height in the hot paths", 137 false, false) 138 139 FunctionPass *llvm::createControlHeightReductionLegacyPass() { 140 return new ControlHeightReductionLegacyPass(); 141 } 142 143 namespace { 144 145 struct CHRStats { 146 CHRStats() : NumBranches(0), NumBranchesDelta(0), 147 WeightedNumBranchesDelta(0) {} 148 void print(raw_ostream &OS) const { 149 OS << "CHRStats: NumBranches " << NumBranches 150 << " NumBranchesDelta " << NumBranchesDelta 151 << " WeightedNumBranchesDelta " << WeightedNumBranchesDelta; 152 } 153 uint64_t NumBranches; // The original number of conditional branches / 154 // selects 155 uint64_t NumBranchesDelta; // The decrease of the number of conditional 156 // branches / selects in the hot paths due to CHR. 157 uint64_t WeightedNumBranchesDelta; // NumBranchesDelta weighted by the profile 158 // count at the scope entry. 159 }; 160 161 // RegInfo - some properties of a Region. 162 struct RegInfo { 163 RegInfo() : R(nullptr), HasBranch(false) {} 164 RegInfo(Region *RegionIn) : R(RegionIn), HasBranch(false) {} 165 Region *R; 166 bool HasBranch; 167 SmallVector<SelectInst *, 8> Selects; 168 }; 169 170 typedef DenseMap<Region *, DenseSet<Instruction *>> HoistStopMapTy; 171 172 // CHRScope - a sequence of regions to CHR together. It corresponds to a 173 // sequence of conditional blocks. It can have subscopes which correspond to 174 // nested conditional blocks. Nested CHRScopes form a tree. 175 class CHRScope { 176 public: 177 CHRScope(RegInfo RI) : BranchInsertPoint(nullptr) { 178 assert(RI.R && "Null RegionIn"); 179 RegInfos.push_back(RI); 180 } 181 182 Region *getParentRegion() { 183 assert(RegInfos.size() > 0 && "Empty CHRScope"); 184 Region *Parent = RegInfos[0].R->getParent(); 185 assert(Parent && "Unexpected to call this on the top-level region"); 186 return Parent; 187 } 188 189 BasicBlock *getEntryBlock() { 190 assert(RegInfos.size() > 0 && "Empty CHRScope"); 191 return RegInfos.front().R->getEntry(); 192 } 193 194 BasicBlock *getExitBlock() { 195 assert(RegInfos.size() > 0 && "Empty CHRScope"); 196 return RegInfos.back().R->getExit(); 197 } 198 199 bool appendable(CHRScope *Next) { 200 // The next scope is appendable only if this scope is directly connected to 201 // it (which implies it post-dominates this scope) and this scope dominates 202 // it (no edge to the next scope outside this scope). 203 BasicBlock *NextEntry = Next->getEntryBlock(); 204 if (getExitBlock() != NextEntry) 205 // Not directly connected. 206 return false; 207 Region *LastRegion = RegInfos.back().R; 208 for (BasicBlock *Pred : predecessors(NextEntry)) 209 if (!LastRegion->contains(Pred)) 210 // There's an edge going into the entry of the next scope from outside 211 // of this scope. 212 return false; 213 return true; 214 } 215 216 void append(CHRScope *Next) { 217 assert(RegInfos.size() > 0 && "Empty CHRScope"); 218 assert(Next->RegInfos.size() > 0 && "Empty CHRScope"); 219 assert(getParentRegion() == Next->getParentRegion() && 220 "Must be siblings"); 221 assert(getExitBlock() == Next->getEntryBlock() && 222 "Must be adjacent"); 223 for (RegInfo &RI : Next->RegInfos) 224 RegInfos.push_back(RI); 225 for (CHRScope *Sub : Next->Subs) 226 Subs.push_back(Sub); 227 } 228 229 void addSub(CHRScope *SubIn) { 230 #ifndef NDEBUG 231 bool IsChild = false; 232 for (RegInfo &RI : RegInfos) 233 if (RI.R == SubIn->getParentRegion()) { 234 IsChild = true; 235 break; 236 } 237 assert(IsChild && "Must be a child"); 238 #endif 239 Subs.push_back(SubIn); 240 } 241 242 // Split this scope at the boundary region into two, which will belong to the 243 // tail and returns the tail. 244 CHRScope *split(Region *Boundary) { 245 assert(Boundary && "Boundary null"); 246 assert(RegInfos.begin()->R != Boundary && 247 "Can't be split at beginning"); 248 auto BoundaryIt = std::find_if(RegInfos.begin(), RegInfos.end(), 249 [&Boundary](const RegInfo& RI) { 250 return Boundary == RI.R; 251 }); 252 if (BoundaryIt == RegInfos.end()) 253 return nullptr; 254 SmallVector<RegInfo, 8> TailRegInfos; 255 SmallVector<CHRScope *, 8> TailSubs; 256 TailRegInfos.insert(TailRegInfos.begin(), BoundaryIt, RegInfos.end()); 257 RegInfos.resize(BoundaryIt - RegInfos.begin()); 258 DenseSet<Region *> TailRegionSet; 259 for (RegInfo &RI : TailRegInfos) 260 TailRegionSet.insert(RI.R); 261 for (auto It = Subs.begin(); It != Subs.end(); ) { 262 CHRScope *Sub = *It; 263 assert(Sub && "null Sub"); 264 Region *Parent = Sub->getParentRegion(); 265 if (TailRegionSet.count(Parent)) { 266 TailSubs.push_back(Sub); 267 It = Subs.erase(It); 268 } else { 269 assert(std::find_if(RegInfos.begin(), RegInfos.end(), 270 [&Parent](const RegInfo& RI) { 271 return Parent == RI.R; 272 }) != RegInfos.end() && 273 "Must be in head"); 274 ++It; 275 } 276 } 277 assert(HoistStopMap.empty() && "MapHoistStops must be empty"); 278 return new CHRScope(TailRegInfos, TailSubs); 279 } 280 281 bool contains(Instruction *I) const { 282 BasicBlock *Parent = I->getParent(); 283 for (const RegInfo &RI : RegInfos) 284 if (RI.R->contains(Parent)) 285 return true; 286 return false; 287 } 288 289 void print(raw_ostream &OS) const; 290 291 SmallVector<RegInfo, 8> RegInfos; // Regions that belong to this scope 292 SmallVector<CHRScope *, 8> Subs; // Subscopes. 293 294 // The instruction at which to insert the CHR conditional branch (and hoist 295 // the dependent condition values). 296 Instruction *BranchInsertPoint; 297 298 // True-biased and false-biased regions (conditional blocks), 299 // respectively. Used only for the outermost scope and includes regions in 300 // subscopes. The rest are unbiased. 301 DenseSet<Region *> TrueBiasedRegions; 302 DenseSet<Region *> FalseBiasedRegions; 303 // Among the biased regions, the regions that get CHRed. 304 SmallVector<RegInfo, 8> CHRRegions; 305 306 // True-biased and false-biased selects, respectively. Used only for the 307 // outermost scope and includes ones in subscopes. 308 DenseSet<SelectInst *> TrueBiasedSelects; 309 DenseSet<SelectInst *> FalseBiasedSelects; 310 311 // Map from one of the above regions to the instructions to stop 312 // hoisting instructions at through use-def chains. 313 HoistStopMapTy HoistStopMap; 314 315 private: 316 CHRScope(SmallVector<RegInfo, 8> &RegInfosIn, 317 SmallVector<CHRScope *, 8> &SubsIn) 318 : RegInfos(RegInfosIn), Subs(SubsIn), BranchInsertPoint(nullptr) {} 319 }; 320 321 class CHR { 322 public: 323 CHR(Function &Fin, BlockFrequencyInfo &BFIin, DominatorTree &DTin, 324 ProfileSummaryInfo &PSIin, RegionInfo &RIin, 325 OptimizationRemarkEmitter &OREin) 326 : F(Fin), BFI(BFIin), DT(DTin), PSI(PSIin), RI(RIin), ORE(OREin) {} 327 328 ~CHR() { 329 for (CHRScope *Scope : Scopes) { 330 delete Scope; 331 } 332 } 333 334 bool run(); 335 336 private: 337 // See the comments in CHR::run() for the high level flow of the algorithm and 338 // what the following functions do. 339 340 void findScopes(SmallVectorImpl<CHRScope *> &Output) { 341 Region *R = RI.getTopLevelRegion(); 342 CHRScope *Scope = findScopes(R, nullptr, nullptr, Output); 343 if (Scope) { 344 Output.push_back(Scope); 345 } 346 } 347 CHRScope *findScopes(Region *R, Region *NextRegion, Region *ParentRegion, 348 SmallVectorImpl<CHRScope *> &Scopes); 349 CHRScope *findScope(Region *R); 350 void checkScopeHoistable(CHRScope *Scope); 351 352 void splitScopes(SmallVectorImpl<CHRScope *> &Input, 353 SmallVectorImpl<CHRScope *> &Output); 354 SmallVector<CHRScope *, 8> splitScope(CHRScope *Scope, 355 CHRScope *Outer, 356 DenseSet<Value *> *OuterConditionValues, 357 Instruction *OuterInsertPoint, 358 SmallVectorImpl<CHRScope *> &Output, 359 DenseSet<Instruction *> &Unhoistables); 360 361 void classifyBiasedScopes(SmallVectorImpl<CHRScope *> &Scopes); 362 void classifyBiasedScopes(CHRScope *Scope, CHRScope *OutermostScope); 363 364 void filterScopes(SmallVectorImpl<CHRScope *> &Input, 365 SmallVectorImpl<CHRScope *> &Output); 366 367 void setCHRRegions(SmallVectorImpl<CHRScope *> &Input, 368 SmallVectorImpl<CHRScope *> &Output); 369 void setCHRRegions(CHRScope *Scope, CHRScope *OutermostScope); 370 371 void sortScopes(SmallVectorImpl<CHRScope *> &Input, 372 SmallVectorImpl<CHRScope *> &Output); 373 374 void transformScopes(SmallVectorImpl<CHRScope *> &CHRScopes); 375 void transformScopes(CHRScope *Scope, DenseSet<PHINode *> &TrivialPHIs); 376 void cloneScopeBlocks(CHRScope *Scope, 377 BasicBlock *PreEntryBlock, 378 BasicBlock *ExitBlock, 379 Region *LastRegion, 380 ValueToValueMapTy &VMap); 381 BranchInst *createMergedBranch(BasicBlock *PreEntryBlock, 382 BasicBlock *EntryBlock, 383 BasicBlock *NewEntryBlock, 384 ValueToValueMapTy &VMap); 385 void fixupBranchesAndSelects(CHRScope *Scope, 386 BasicBlock *PreEntryBlock, 387 BranchInst *MergedBR, 388 uint64_t ProfileCount); 389 void fixupBranch(Region *R, 390 CHRScope *Scope, 391 IRBuilder<> &IRB, 392 Value *&MergedCondition, BranchProbability &CHRBranchBias); 393 void fixupSelect(SelectInst* SI, 394 CHRScope *Scope, 395 IRBuilder<> &IRB, 396 Value *&MergedCondition, BranchProbability &CHRBranchBias); 397 void addToMergedCondition(bool IsTrueBiased, Value *Cond, 398 Instruction *BranchOrSelect, 399 CHRScope *Scope, 400 IRBuilder<> &IRB, 401 Value *&MergedCondition); 402 403 Function &F; 404 BlockFrequencyInfo &BFI; 405 DominatorTree &DT; 406 ProfileSummaryInfo &PSI; 407 RegionInfo &RI; 408 OptimizationRemarkEmitter &ORE; 409 CHRStats Stats; 410 411 // All the true-biased regions in the function 412 DenseSet<Region *> TrueBiasedRegionsGlobal; 413 // All the false-biased regions in the function 414 DenseSet<Region *> FalseBiasedRegionsGlobal; 415 // All the true-biased selects in the function 416 DenseSet<SelectInst *> TrueBiasedSelectsGlobal; 417 // All the false-biased selects in the function 418 DenseSet<SelectInst *> FalseBiasedSelectsGlobal; 419 // A map from biased regions to their branch bias 420 DenseMap<Region *, BranchProbability> BranchBiasMap; 421 // A map from biased selects to their branch bias 422 DenseMap<SelectInst *, BranchProbability> SelectBiasMap; 423 // All the scopes. 424 DenseSet<CHRScope *> Scopes; 425 }; 426 427 } // end anonymous namespace 428 429 static inline 430 raw_ostream LLVM_ATTRIBUTE_UNUSED &operator<<(raw_ostream &OS, 431 const CHRStats &Stats) { 432 Stats.print(OS); 433 return OS; 434 } 435 436 static inline 437 raw_ostream &operator<<(raw_ostream &OS, const CHRScope &Scope) { 438 Scope.print(OS); 439 return OS; 440 } 441 442 static bool shouldApply(Function &F, ProfileSummaryInfo& PSI) { 443 if (ForceCHR) 444 return true; 445 446 if (!CHRModuleList.empty() || !CHRFunctionList.empty()) { 447 if (CHRModules.count(F.getParent()->getName())) 448 return true; 449 return CHRFunctions.count(F.getName()); 450 } 451 452 assert(PSI.hasProfileSummary() && "Empty PSI?"); 453 return PSI.isFunctionEntryHot(&F); 454 } 455 456 static void LLVM_ATTRIBUTE_UNUSED dumpIR(Function &F, const char *Label, 457 CHRStats *Stats) { 458 StringRef FuncName = F.getName(); 459 StringRef ModuleName = F.getParent()->getName(); 460 (void)(FuncName); // Unused in release build. 461 (void)(ModuleName); // Unused in release build. 462 CHR_DEBUG(dbgs() << "CHR IR dump " << Label << " " << ModuleName << " " 463 << FuncName); 464 if (Stats) 465 CHR_DEBUG(dbgs() << " " << *Stats); 466 CHR_DEBUG(dbgs() << "\n"); 467 CHR_DEBUG(F.dump()); 468 } 469 470 void CHRScope::print(raw_ostream &OS) const { 471 assert(RegInfos.size() > 0 && "Empty CHRScope"); 472 OS << "CHRScope["; 473 OS << RegInfos.size() << ", Regions["; 474 for (const RegInfo &RI : RegInfos) { 475 OS << RI.R->getNameStr(); 476 if (RI.HasBranch) 477 OS << " B"; 478 if (RI.Selects.size() > 0) 479 OS << " S" << RI.Selects.size(); 480 OS << ", "; 481 } 482 if (RegInfos[0].R->getParent()) { 483 OS << "], Parent " << RegInfos[0].R->getParent()->getNameStr(); 484 } else { 485 // top level region 486 OS << "]"; 487 } 488 OS << ", Subs["; 489 for (CHRScope *Sub : Subs) { 490 OS << *Sub << ", "; 491 } 492 OS << "]]"; 493 } 494 495 // Return true if the given instruction type can be hoisted by CHR. 496 static bool isHoistableInstructionType(Instruction *I) { 497 return isa<BinaryOperator>(I) || isa<CastInst>(I) || isa<SelectInst>(I) || 498 isa<GetElementPtrInst>(I) || isa<CmpInst>(I) || 499 isa<InsertElementInst>(I) || isa<ExtractElementInst>(I) || 500 isa<ShuffleVectorInst>(I) || isa<ExtractValueInst>(I) || 501 isa<InsertValueInst>(I); 502 } 503 504 // Return true if the given instruction can be hoisted by CHR. 505 static bool isHoistable(Instruction *I, DominatorTree &DT) { 506 if (!isHoistableInstructionType(I)) 507 return false; 508 return isSafeToSpeculativelyExecute(I, nullptr, &DT); 509 } 510 511 // Recursively traverse the use-def chains of the given value and return a set 512 // of the unhoistable base values defined within the scope (excluding the 513 // first-region entry block) or the (hoistable or unhoistable) base values that 514 // are defined outside (including the first-region entry block) of the 515 // scope. The returned set doesn't include constants. 516 static std::set<Value *> getBaseValues( 517 Value *V, DominatorTree &DT, 518 DenseMap<Value *, std::set<Value *>> &Visited) { 519 if (Visited.count(V)) { 520 return Visited[V]; 521 } 522 std::set<Value *> Result; 523 if (auto *I = dyn_cast<Instruction>(V)) { 524 // We don't stop at a block that's not in the Scope because we would miss some 525 // instructions that are based on the same base values if we stop there. 526 if (!isHoistable(I, DT)) { 527 Result.insert(I); 528 Visited.insert(std::make_pair(V, Result)); 529 return Result; 530 } 531 // I is hoistable above the Scope. 532 for (Value *Op : I->operands()) { 533 std::set<Value *> OpResult = getBaseValues(Op, DT, Visited); 534 Result.insert(OpResult.begin(), OpResult.end()); 535 } 536 Visited.insert(std::make_pair(V, Result)); 537 return Result; 538 } 539 if (isa<Argument>(V)) { 540 Result.insert(V); 541 Visited.insert(std::make_pair(V, Result)); 542 return Result; 543 } 544 // We don't include others like constants because those won't lead to any 545 // chance of folding of conditions (eg two bit checks merged into one check) 546 // after CHR. 547 Visited.insert(std::make_pair(V, Result)); 548 return Result; // empty 549 } 550 551 // Return true if V is already hoisted or can be hoisted (along with its 552 // operands) above the insert point. When it returns true and HoistStops is 553 // non-null, the instructions to stop hoisting at through the use-def chains are 554 // inserted into HoistStops. 555 static bool 556 checkHoistValue(Value *V, Instruction *InsertPoint, DominatorTree &DT, 557 DenseSet<Instruction *> &Unhoistables, 558 DenseSet<Instruction *> *HoistStops, 559 DenseMap<Instruction *, bool> &Visited) { 560 assert(InsertPoint && "Null InsertPoint"); 561 if (auto *I = dyn_cast<Instruction>(V)) { 562 if (Visited.count(I)) { 563 return Visited[I]; 564 } 565 assert(DT.getNode(I->getParent()) && "DT must contain I's parent block"); 566 assert(DT.getNode(InsertPoint->getParent()) && "DT must contain Destination"); 567 if (Unhoistables.count(I)) { 568 // Don't hoist if they are not to be hoisted. 569 Visited[I] = false; 570 return false; 571 } 572 if (DT.dominates(I, InsertPoint)) { 573 // We are already above the insert point. Stop here. 574 if (HoistStops) 575 HoistStops->insert(I); 576 Visited[I] = true; 577 return true; 578 } 579 // We aren't not above the insert point, check if we can hoist it above the 580 // insert point. 581 if (isHoistable(I, DT)) { 582 // Check operands first. 583 DenseSet<Instruction *> OpsHoistStops; 584 bool AllOpsHoisted = true; 585 for (Value *Op : I->operands()) { 586 if (!checkHoistValue(Op, InsertPoint, DT, Unhoistables, &OpsHoistStops, 587 Visited)) { 588 AllOpsHoisted = false; 589 break; 590 } 591 } 592 if (AllOpsHoisted) { 593 CHR_DEBUG(dbgs() << "checkHoistValue " << *I << "\n"); 594 if (HoistStops) 595 HoistStops->insert(OpsHoistStops.begin(), OpsHoistStops.end()); 596 Visited[I] = true; 597 return true; 598 } 599 } 600 Visited[I] = false; 601 return false; 602 } 603 // Non-instructions are considered hoistable. 604 return true; 605 } 606 607 // Returns true and sets the true probability and false probability of an 608 // MD_prof metadata if it's well-formed. 609 static bool checkMDProf(MDNode *MD, BranchProbability &TrueProb, 610 BranchProbability &FalseProb) { 611 if (!MD) return false; 612 MDString *MDName = cast<MDString>(MD->getOperand(0)); 613 if (MDName->getString() != "branch_weights" || 614 MD->getNumOperands() != 3) 615 return false; 616 ConstantInt *TrueWeight = mdconst::extract<ConstantInt>(MD->getOperand(1)); 617 ConstantInt *FalseWeight = mdconst::extract<ConstantInt>(MD->getOperand(2)); 618 if (!TrueWeight || !FalseWeight) 619 return false; 620 uint64_t TrueWt = TrueWeight->getValue().getZExtValue(); 621 uint64_t FalseWt = FalseWeight->getValue().getZExtValue(); 622 uint64_t SumWt = TrueWt + FalseWt; 623 624 assert(SumWt >= TrueWt && SumWt >= FalseWt && 625 "Overflow calculating branch probabilities."); 626 627 TrueProb = BranchProbability::getBranchProbability(TrueWt, SumWt); 628 FalseProb = BranchProbability::getBranchProbability(FalseWt, SumWt); 629 return true; 630 } 631 632 static BranchProbability getCHRBiasThreshold() { 633 return BranchProbability::getBranchProbability( 634 static_cast<uint64_t>(CHRBiasThreshold * 1000000), 1000000); 635 } 636 637 // A helper for CheckBiasedBranch and CheckBiasedSelect. If TrueProb >= 638 // CHRBiasThreshold, put Key into TrueSet and return true. If FalseProb >= 639 // CHRBiasThreshold, put Key into FalseSet and return true. Otherwise, return 640 // false. 641 template <typename K, typename S, typename M> 642 static bool checkBias(K *Key, BranchProbability TrueProb, 643 BranchProbability FalseProb, S &TrueSet, S &FalseSet, 644 M &BiasMap) { 645 BranchProbability Threshold = getCHRBiasThreshold(); 646 if (TrueProb >= Threshold) { 647 TrueSet.insert(Key); 648 BiasMap[Key] = TrueProb; 649 return true; 650 } else if (FalseProb >= Threshold) { 651 FalseSet.insert(Key); 652 BiasMap[Key] = FalseProb; 653 return true; 654 } 655 return false; 656 } 657 658 // Returns true and insert a region into the right biased set and the map if the 659 // branch of the region is biased. 660 static bool checkBiasedBranch(BranchInst *BI, Region *R, 661 DenseSet<Region *> &TrueBiasedRegionsGlobal, 662 DenseSet<Region *> &FalseBiasedRegionsGlobal, 663 DenseMap<Region *, BranchProbability> &BranchBiasMap) { 664 if (!BI->isConditional()) 665 return false; 666 BranchProbability ThenProb, ElseProb; 667 if (!checkMDProf(BI->getMetadata(LLVMContext::MD_prof), 668 ThenProb, ElseProb)) 669 return false; 670 BasicBlock *IfThen = BI->getSuccessor(0); 671 BasicBlock *IfElse = BI->getSuccessor(1); 672 assert((IfThen == R->getExit() || IfElse == R->getExit()) && 673 IfThen != IfElse && 674 "Invariant from findScopes"); 675 if (IfThen == R->getExit()) { 676 // Swap them so that IfThen/ThenProb means going into the conditional code 677 // and IfElse/ElseProb means skipping it. 678 std::swap(IfThen, IfElse); 679 std::swap(ThenProb, ElseProb); 680 } 681 CHR_DEBUG(dbgs() << "BI " << *BI << " "); 682 CHR_DEBUG(dbgs() << "ThenProb " << ThenProb << " "); 683 CHR_DEBUG(dbgs() << "ElseProb " << ElseProb << "\n"); 684 return checkBias(R, ThenProb, ElseProb, 685 TrueBiasedRegionsGlobal, FalseBiasedRegionsGlobal, 686 BranchBiasMap); 687 } 688 689 // Returns true and insert a select into the right biased set and the map if the 690 // select is biased. 691 static bool checkBiasedSelect( 692 SelectInst *SI, Region *R, 693 DenseSet<SelectInst *> &TrueBiasedSelectsGlobal, 694 DenseSet<SelectInst *> &FalseBiasedSelectsGlobal, 695 DenseMap<SelectInst *, BranchProbability> &SelectBiasMap) { 696 BranchProbability TrueProb, FalseProb; 697 if (!checkMDProf(SI->getMetadata(LLVMContext::MD_prof), 698 TrueProb, FalseProb)) 699 return false; 700 CHR_DEBUG(dbgs() << "SI " << *SI << " "); 701 CHR_DEBUG(dbgs() << "TrueProb " << TrueProb << " "); 702 CHR_DEBUG(dbgs() << "FalseProb " << FalseProb << "\n"); 703 return checkBias(SI, TrueProb, FalseProb, 704 TrueBiasedSelectsGlobal, FalseBiasedSelectsGlobal, 705 SelectBiasMap); 706 } 707 708 // Returns the instruction at which to hoist the dependent condition values and 709 // insert the CHR branch for a region. This is the terminator branch in the 710 // entry block or the first select in the entry block, if any. 711 static Instruction* getBranchInsertPoint(RegInfo &RI) { 712 Region *R = RI.R; 713 BasicBlock *EntryBB = R->getEntry(); 714 // The hoist point is by default the terminator of the entry block, which is 715 // the same as the branch instruction if RI.HasBranch is true. 716 Instruction *HoistPoint = EntryBB->getTerminator(); 717 for (SelectInst *SI : RI.Selects) { 718 if (SI->getParent() == EntryBB) { 719 // Pick the first select in Selects in the entry block. Note Selects is 720 // sorted in the instruction order within a block (asserted below). 721 HoistPoint = SI; 722 break; 723 } 724 } 725 assert(HoistPoint && "Null HoistPoint"); 726 #ifndef NDEBUG 727 // Check that HoistPoint is the first one in Selects in the entry block, 728 // if any. 729 DenseSet<Instruction *> EntryBlockSelectSet; 730 for (SelectInst *SI : RI.Selects) { 731 if (SI->getParent() == EntryBB) { 732 EntryBlockSelectSet.insert(SI); 733 } 734 } 735 for (Instruction &I : *EntryBB) { 736 if (EntryBlockSelectSet.count(&I) > 0) { 737 assert(&I == HoistPoint && 738 "HoistPoint must be the first one in Selects"); 739 break; 740 } 741 } 742 #endif 743 return HoistPoint; 744 } 745 746 // Find a CHR scope in the given region. 747 CHRScope * CHR::findScope(Region *R) { 748 CHRScope *Result = nullptr; 749 BasicBlock *Entry = R->getEntry(); 750 BasicBlock *Exit = R->getExit(); // null if top level. 751 assert(Entry && "Entry must not be null"); 752 assert((Exit == nullptr) == (R->isTopLevelRegion()) && 753 "Only top level region has a null exit"); 754 if (Entry) 755 CHR_DEBUG(dbgs() << "Entry " << Entry->getName() << "\n"); 756 else 757 CHR_DEBUG(dbgs() << "Entry null\n"); 758 if (Exit) 759 CHR_DEBUG(dbgs() << "Exit " << Exit->getName() << "\n"); 760 else 761 CHR_DEBUG(dbgs() << "Exit null\n"); 762 // Exclude cases where Entry is part of a subregion (hence it doesn't belong 763 // to this region). 764 bool EntryInSubregion = RI.getRegionFor(Entry) != R; 765 if (EntryInSubregion) 766 return nullptr; 767 // Exclude loops 768 for (BasicBlock *Pred : predecessors(Entry)) 769 if (R->contains(Pred)) 770 return nullptr; 771 if (Exit) { 772 // Try to find an if-then block (check if R is an if-then). 773 // if (cond) { 774 // ... 775 // } 776 auto *BI = dyn_cast<BranchInst>(Entry->getTerminator()); 777 if (BI) 778 CHR_DEBUG(dbgs() << "BI.isConditional " << BI->isConditional() << "\n"); 779 else 780 CHR_DEBUG(dbgs() << "BI null\n"); 781 if (BI && BI->isConditional()) { 782 BasicBlock *S0 = BI->getSuccessor(0); 783 BasicBlock *S1 = BI->getSuccessor(1); 784 CHR_DEBUG(dbgs() << "S0 " << S0->getName() << "\n"); 785 CHR_DEBUG(dbgs() << "S1 " << S1->getName() << "\n"); 786 if (S0 != S1 && (S0 == Exit || S1 == Exit)) { 787 RegInfo RI(R); 788 RI.HasBranch = checkBiasedBranch( 789 BI, R, TrueBiasedRegionsGlobal, FalseBiasedRegionsGlobal, 790 BranchBiasMap); 791 Result = new CHRScope(RI); 792 Scopes.insert(Result); 793 CHR_DEBUG(dbgs() << "Found a region with a branch\n"); 794 ++Stats.NumBranches; 795 if (!RI.HasBranch) { 796 ORE.emit([&]() { 797 return OptimizationRemarkMissed(DEBUG_TYPE, "BranchNotBiased", BI) 798 << "Branch not biased"; 799 }); 800 } 801 } 802 } 803 } 804 { 805 // Try to look for selects in the direct child blocks (as opposed to in 806 // subregions) of R. 807 // ... 808 // if (..) { // Some subregion 809 // ... 810 // } 811 // if (..) { // Some subregion 812 // ... 813 // } 814 // ... 815 // a = cond ? b : c; 816 // ... 817 SmallVector<SelectInst *, 8> Selects; 818 for (RegionNode *E : R->elements()) { 819 if (E->isSubRegion()) 820 continue; 821 // This returns the basic block of E if E is a direct child of R (not a 822 // subregion.) 823 BasicBlock *BB = E->getEntry(); 824 // Need to push in the order to make it easier to find the first Select 825 // later. 826 for (Instruction &I : *BB) { 827 if (auto *SI = dyn_cast<SelectInst>(&I)) { 828 Selects.push_back(SI); 829 ++Stats.NumBranches; 830 } 831 } 832 } 833 if (Selects.size() > 0) { 834 auto AddSelects = [&](RegInfo &RI) { 835 for (auto *SI : Selects) 836 if (checkBiasedSelect(SI, RI.R, 837 TrueBiasedSelectsGlobal, 838 FalseBiasedSelectsGlobal, 839 SelectBiasMap)) 840 RI.Selects.push_back(SI); 841 else 842 ORE.emit([&]() { 843 return OptimizationRemarkMissed(DEBUG_TYPE, "SelectNotBiased", SI) 844 << "Select not biased"; 845 }); 846 }; 847 if (!Result) { 848 CHR_DEBUG(dbgs() << "Found a select-only region\n"); 849 RegInfo RI(R); 850 AddSelects(RI); 851 Result = new CHRScope(RI); 852 Scopes.insert(Result); 853 } else { 854 CHR_DEBUG(dbgs() << "Found select(s) in a region with a branch\n"); 855 AddSelects(Result->RegInfos[0]); 856 } 857 } 858 } 859 860 if (Result) { 861 checkScopeHoistable(Result); 862 } 863 return Result; 864 } 865 866 // Check that any of the branch and the selects in the region could be 867 // hoisted above the the CHR branch insert point (the most dominating of 868 // them, either the branch (at the end of the first block) or the first 869 // select in the first block). If the branch can't be hoisted, drop the 870 // selects in the first blocks. 871 // 872 // For example, for the following scope/region with selects, we want to insert 873 // the merged branch right before the first select in the first/entry block by 874 // hoisting c1, c2, c3, and c4. 875 // 876 // // Branch insert point here. 877 // a = c1 ? b : c; // Select 1 878 // d = c2 ? e : f; // Select 2 879 // if (c3) { // Branch 880 // ... 881 // c4 = foo() // A call. 882 // g = c4 ? h : i; // Select 3 883 // } 884 // 885 // But suppose we can't hoist c4 because it's dependent on the preceding 886 // call. Then, we drop Select 3. Furthermore, if we can't hoist c2, we also drop 887 // Select 2. If we can't hoist c3, we drop Selects 1 & 2. 888 void CHR::checkScopeHoistable(CHRScope *Scope) { 889 RegInfo &RI = Scope->RegInfos[0]; 890 Region *R = RI.R; 891 BasicBlock *EntryBB = R->getEntry(); 892 auto *Branch = RI.HasBranch ? 893 cast<BranchInst>(EntryBB->getTerminator()) : nullptr; 894 SmallVector<SelectInst *, 8> &Selects = RI.Selects; 895 if (RI.HasBranch || !Selects.empty()) { 896 Instruction *InsertPoint = getBranchInsertPoint(RI); 897 CHR_DEBUG(dbgs() << "InsertPoint " << *InsertPoint << "\n"); 898 // Avoid a data dependence from a select or a branch to a(nother) 899 // select. Note no instruction can't data-depend on a branch (a branch 900 // instruction doesn't produce a value). 901 DenseSet<Instruction *> Unhoistables; 902 // Initialize Unhoistables with the selects. 903 for (SelectInst *SI : Selects) { 904 Unhoistables.insert(SI); 905 } 906 // Remove Selects that can't be hoisted. 907 for (auto it = Selects.begin(); it != Selects.end(); ) { 908 SelectInst *SI = *it; 909 if (SI == InsertPoint) { 910 ++it; 911 continue; 912 } 913 DenseMap<Instruction *, bool> Visited; 914 bool IsHoistable = checkHoistValue(SI->getCondition(), InsertPoint, 915 DT, Unhoistables, nullptr, Visited); 916 if (!IsHoistable) { 917 CHR_DEBUG(dbgs() << "Dropping select " << *SI << "\n"); 918 ORE.emit([&]() { 919 return OptimizationRemarkMissed(DEBUG_TYPE, 920 "DropUnhoistableSelect", SI) 921 << "Dropped unhoistable select"; 922 }); 923 it = Selects.erase(it); 924 // Since we are dropping the select here, we also drop it from 925 // Unhoistables. 926 Unhoistables.erase(SI); 927 } else 928 ++it; 929 } 930 // Update InsertPoint after potentially removing selects. 931 InsertPoint = getBranchInsertPoint(RI); 932 CHR_DEBUG(dbgs() << "InsertPoint " << *InsertPoint << "\n"); 933 if (RI.HasBranch && InsertPoint != Branch) { 934 DenseMap<Instruction *, bool> Visited; 935 bool IsHoistable = checkHoistValue(Branch->getCondition(), InsertPoint, 936 DT, Unhoistables, nullptr, Visited); 937 if (!IsHoistable) { 938 // If the branch isn't hoistable, drop the selects in the entry 939 // block, preferring the branch, which makes the branch the hoist 940 // point. 941 assert(InsertPoint != Branch && "Branch must not be the hoist point"); 942 CHR_DEBUG(dbgs() << "Dropping selects in entry block \n"); 943 CHR_DEBUG( 944 for (SelectInst *SI : Selects) { 945 dbgs() << "SI " << *SI << "\n"; 946 }); 947 for (SelectInst *SI : Selects) { 948 ORE.emit([&]() { 949 return OptimizationRemarkMissed(DEBUG_TYPE, 950 "DropSelectUnhoistableBranch", SI) 951 << "Dropped select due to unhoistable branch"; 952 }); 953 } 954 Selects.erase(std::remove_if(Selects.begin(), Selects.end(), 955 [EntryBB](SelectInst *SI) { 956 return SI->getParent() == EntryBB; 957 }), Selects.end()); 958 Unhoistables.clear(); 959 InsertPoint = Branch; 960 } 961 } 962 CHR_DEBUG(dbgs() << "InsertPoint " << *InsertPoint << "\n"); 963 #ifndef NDEBUG 964 if (RI.HasBranch) { 965 assert(!DT.dominates(Branch, InsertPoint) && 966 "Branch can't be already above the hoist point"); 967 DenseMap<Instruction *, bool> Visited; 968 assert(checkHoistValue(Branch->getCondition(), InsertPoint, 969 DT, Unhoistables, nullptr, Visited) && 970 "checkHoistValue for branch"); 971 } 972 for (auto *SI : Selects) { 973 assert(!DT.dominates(SI, InsertPoint) && 974 "SI can't be already above the hoist point"); 975 DenseMap<Instruction *, bool> Visited; 976 assert(checkHoistValue(SI->getCondition(), InsertPoint, DT, 977 Unhoistables, nullptr, Visited) && 978 "checkHoistValue for selects"); 979 } 980 CHR_DEBUG(dbgs() << "Result\n"); 981 if (RI.HasBranch) { 982 CHR_DEBUG(dbgs() << "BI " << *Branch << "\n"); 983 } 984 for (auto *SI : Selects) { 985 CHR_DEBUG(dbgs() << "SI " << *SI << "\n"); 986 } 987 #endif 988 } 989 } 990 991 // Traverse the region tree, find all nested scopes and merge them if possible. 992 CHRScope * CHR::findScopes(Region *R, Region *NextRegion, Region *ParentRegion, 993 SmallVectorImpl<CHRScope *> &Scopes) { 994 CHR_DEBUG(dbgs() << "findScopes " << R->getNameStr() << "\n"); 995 CHRScope *Result = findScope(R); 996 // Visit subscopes. 997 CHRScope *ConsecutiveSubscope = nullptr; 998 SmallVector<CHRScope *, 8> Subscopes; 999 for (auto It = R->begin(); It != R->end(); ++It) { 1000 const std::unique_ptr<Region> &SubR = *It; 1001 auto NextIt = std::next(It); 1002 Region *NextSubR = NextIt != R->end() ? NextIt->get() : nullptr; 1003 CHR_DEBUG(dbgs() << "Looking at subregion " << SubR.get()->getNameStr() 1004 << "\n"); 1005 CHRScope *SubCHRScope = findScopes(SubR.get(), NextSubR, R, Scopes); 1006 if (SubCHRScope) { 1007 CHR_DEBUG(dbgs() << "Subregion Scope " << *SubCHRScope << "\n"); 1008 } else { 1009 CHR_DEBUG(dbgs() << "Subregion Scope null\n"); 1010 } 1011 if (SubCHRScope) { 1012 if (!ConsecutiveSubscope) 1013 ConsecutiveSubscope = SubCHRScope; 1014 else if (!ConsecutiveSubscope->appendable(SubCHRScope)) { 1015 Subscopes.push_back(ConsecutiveSubscope); 1016 ConsecutiveSubscope = SubCHRScope; 1017 } else 1018 ConsecutiveSubscope->append(SubCHRScope); 1019 } else { 1020 if (ConsecutiveSubscope) { 1021 Subscopes.push_back(ConsecutiveSubscope); 1022 } 1023 ConsecutiveSubscope = nullptr; 1024 } 1025 } 1026 if (ConsecutiveSubscope) { 1027 Subscopes.push_back(ConsecutiveSubscope); 1028 } 1029 for (CHRScope *Sub : Subscopes) { 1030 if (Result) { 1031 // Combine it with the parent. 1032 Result->addSub(Sub); 1033 } else { 1034 // Push Subscopes as they won't be combined with the parent. 1035 Scopes.push_back(Sub); 1036 } 1037 } 1038 return Result; 1039 } 1040 1041 static DenseSet<Value *> getCHRConditionValuesForRegion(RegInfo &RI) { 1042 DenseSet<Value *> ConditionValues; 1043 if (RI.HasBranch) { 1044 auto *BI = cast<BranchInst>(RI.R->getEntry()->getTerminator()); 1045 ConditionValues.insert(BI->getCondition()); 1046 } 1047 for (SelectInst *SI : RI.Selects) { 1048 ConditionValues.insert(SI->getCondition()); 1049 } 1050 return ConditionValues; 1051 } 1052 1053 1054 // Determine whether to split a scope depending on the sets of the branch 1055 // condition values of the previous region and the current region. We split 1056 // (return true) it if 1) the condition values of the inner/lower scope can't be 1057 // hoisted up to the outer/upper scope, or 2) the two sets of the condition 1058 // values have an empty intersection (because the combined branch conditions 1059 // won't probably lead to a simpler combined condition). 1060 static bool shouldSplit(Instruction *InsertPoint, 1061 DenseSet<Value *> &PrevConditionValues, 1062 DenseSet<Value *> &ConditionValues, 1063 DominatorTree &DT, 1064 DenseSet<Instruction *> &Unhoistables) { 1065 assert(InsertPoint && "Null InsertPoint"); 1066 CHR_DEBUG( 1067 dbgs() << "shouldSplit " << *InsertPoint << " PrevConditionValues "; 1068 for (Value *V : PrevConditionValues) { 1069 dbgs() << *V << ", "; 1070 } 1071 dbgs() << " ConditionValues "; 1072 for (Value *V : ConditionValues) { 1073 dbgs() << *V << ", "; 1074 } 1075 dbgs() << "\n"); 1076 // If any of Bases isn't hoistable to the hoist point, split. 1077 for (Value *V : ConditionValues) { 1078 DenseMap<Instruction *, bool> Visited; 1079 if (!checkHoistValue(V, InsertPoint, DT, Unhoistables, nullptr, Visited)) { 1080 CHR_DEBUG(dbgs() << "Split. checkHoistValue false " << *V << "\n"); 1081 return true; // Not hoistable, split. 1082 } 1083 } 1084 // If PrevConditionValues or ConditionValues is empty, don't split to avoid 1085 // unnecessary splits at scopes with no branch/selects. If 1086 // PrevConditionValues and ConditionValues don't intersect at all, split. 1087 if (!PrevConditionValues.empty() && !ConditionValues.empty()) { 1088 // Use std::set as DenseSet doesn't work with set_intersection. 1089 std::set<Value *> PrevBases, Bases; 1090 DenseMap<Value *, std::set<Value *>> Visited; 1091 for (Value *V : PrevConditionValues) { 1092 std::set<Value *> BaseValues = getBaseValues(V, DT, Visited); 1093 PrevBases.insert(BaseValues.begin(), BaseValues.end()); 1094 } 1095 for (Value *V : ConditionValues) { 1096 std::set<Value *> BaseValues = getBaseValues(V, DT, Visited); 1097 Bases.insert(BaseValues.begin(), BaseValues.end()); 1098 } 1099 CHR_DEBUG( 1100 dbgs() << "PrevBases "; 1101 for (Value *V : PrevBases) { 1102 dbgs() << *V << ", "; 1103 } 1104 dbgs() << " Bases "; 1105 for (Value *V : Bases) { 1106 dbgs() << *V << ", "; 1107 } 1108 dbgs() << "\n"); 1109 std::set<Value *> Intersection; 1110 std::set_intersection(PrevBases.begin(), PrevBases.end(), 1111 Bases.begin(), Bases.end(), 1112 std::inserter(Intersection, Intersection.begin())); 1113 if (Intersection.empty()) { 1114 // Empty intersection, split. 1115 CHR_DEBUG(dbgs() << "Split. Intersection empty\n"); 1116 return true; 1117 } 1118 } 1119 CHR_DEBUG(dbgs() << "No split\n"); 1120 return false; // Don't split. 1121 } 1122 1123 static void getSelectsInScope(CHRScope *Scope, 1124 DenseSet<Instruction *> &Output) { 1125 for (RegInfo &RI : Scope->RegInfos) 1126 for (SelectInst *SI : RI.Selects) 1127 Output.insert(SI); 1128 for (CHRScope *Sub : Scope->Subs) 1129 getSelectsInScope(Sub, Output); 1130 } 1131 1132 void CHR::splitScopes(SmallVectorImpl<CHRScope *> &Input, 1133 SmallVectorImpl<CHRScope *> &Output) { 1134 for (CHRScope *Scope : Input) { 1135 assert(!Scope->BranchInsertPoint && 1136 "BranchInsertPoint must not be set"); 1137 DenseSet<Instruction *> Unhoistables; 1138 getSelectsInScope(Scope, Unhoistables); 1139 splitScope(Scope, nullptr, nullptr, nullptr, Output, Unhoistables); 1140 } 1141 #ifndef NDEBUG 1142 for (CHRScope *Scope : Output) { 1143 assert(Scope->BranchInsertPoint && "BranchInsertPoint must be set"); 1144 } 1145 #endif 1146 } 1147 1148 SmallVector<CHRScope *, 8> CHR::splitScope( 1149 CHRScope *Scope, 1150 CHRScope *Outer, 1151 DenseSet<Value *> *OuterConditionValues, 1152 Instruction *OuterInsertPoint, 1153 SmallVectorImpl<CHRScope *> &Output, 1154 DenseSet<Instruction *> &Unhoistables) { 1155 if (Outer) { 1156 assert(OuterConditionValues && "Null OuterConditionValues"); 1157 assert(OuterInsertPoint && "Null OuterInsertPoint"); 1158 } 1159 bool PrevSplitFromOuter = true; 1160 DenseSet<Value *> PrevConditionValues; 1161 Instruction *PrevInsertPoint = nullptr; 1162 SmallVector<CHRScope *, 8> Splits; 1163 SmallVector<bool, 8> SplitsSplitFromOuter; 1164 SmallVector<DenseSet<Value *>, 8> SplitsConditionValues; 1165 SmallVector<Instruction *, 8> SplitsInsertPoints; 1166 SmallVector<RegInfo, 8> RegInfos(Scope->RegInfos); // Copy 1167 for (RegInfo &RI : RegInfos) { 1168 Instruction *InsertPoint = getBranchInsertPoint(RI); 1169 DenseSet<Value *> ConditionValues = getCHRConditionValuesForRegion(RI); 1170 CHR_DEBUG( 1171 dbgs() << "ConditionValues "; 1172 for (Value *V : ConditionValues) { 1173 dbgs() << *V << ", "; 1174 } 1175 dbgs() << "\n"); 1176 if (RI.R == RegInfos[0].R) { 1177 // First iteration. Check to see if we should split from the outer. 1178 if (Outer) { 1179 CHR_DEBUG(dbgs() << "Outer " << *Outer << "\n"); 1180 CHR_DEBUG(dbgs() << "Should split from outer at " 1181 << RI.R->getNameStr() << "\n"); 1182 if (shouldSplit(OuterInsertPoint, *OuterConditionValues, 1183 ConditionValues, DT, Unhoistables)) { 1184 PrevConditionValues = ConditionValues; 1185 PrevInsertPoint = InsertPoint; 1186 ORE.emit([&]() { 1187 return OptimizationRemarkMissed(DEBUG_TYPE, 1188 "SplitScopeFromOuter", 1189 RI.R->getEntry()->getTerminator()) 1190 << "Split scope from outer due to unhoistable branch/select " 1191 << "and/or lack of common condition values"; 1192 }); 1193 } else { 1194 // Not splitting from the outer. Use the outer bases and insert 1195 // point. Union the bases. 1196 PrevSplitFromOuter = false; 1197 PrevConditionValues = *OuterConditionValues; 1198 PrevConditionValues.insert(ConditionValues.begin(), 1199 ConditionValues.end()); 1200 PrevInsertPoint = OuterInsertPoint; 1201 } 1202 } else { 1203 CHR_DEBUG(dbgs() << "Outer null\n"); 1204 PrevConditionValues = ConditionValues; 1205 PrevInsertPoint = InsertPoint; 1206 } 1207 } else { 1208 CHR_DEBUG(dbgs() << "Should split from prev at " 1209 << RI.R->getNameStr() << "\n"); 1210 if (shouldSplit(PrevInsertPoint, PrevConditionValues, ConditionValues, 1211 DT, Unhoistables)) { 1212 CHRScope *Tail = Scope->split(RI.R); 1213 Scopes.insert(Tail); 1214 Splits.push_back(Scope); 1215 SplitsSplitFromOuter.push_back(PrevSplitFromOuter); 1216 SplitsConditionValues.push_back(PrevConditionValues); 1217 SplitsInsertPoints.push_back(PrevInsertPoint); 1218 Scope = Tail; 1219 PrevConditionValues = ConditionValues; 1220 PrevInsertPoint = InsertPoint; 1221 PrevSplitFromOuter = true; 1222 ORE.emit([&]() { 1223 return OptimizationRemarkMissed(DEBUG_TYPE, 1224 "SplitScopeFromPrev", 1225 RI.R->getEntry()->getTerminator()) 1226 << "Split scope from previous due to unhoistable branch/select " 1227 << "and/or lack of common condition values"; 1228 }); 1229 } else { 1230 // Not splitting. Union the bases. Keep the hoist point. 1231 PrevConditionValues.insert(ConditionValues.begin(), ConditionValues.end()); 1232 } 1233 } 1234 } 1235 Splits.push_back(Scope); 1236 SplitsSplitFromOuter.push_back(PrevSplitFromOuter); 1237 SplitsConditionValues.push_back(PrevConditionValues); 1238 assert(PrevInsertPoint && "Null PrevInsertPoint"); 1239 SplitsInsertPoints.push_back(PrevInsertPoint); 1240 assert(Splits.size() == SplitsConditionValues.size() && 1241 Splits.size() == SplitsSplitFromOuter.size() && 1242 Splits.size() == SplitsInsertPoints.size() && "Mismatching sizes"); 1243 for (size_t I = 0; I < Splits.size(); ++I) { 1244 CHRScope *Split = Splits[I]; 1245 DenseSet<Value *> &SplitConditionValues = SplitsConditionValues[I]; 1246 Instruction *SplitInsertPoint = SplitsInsertPoints[I]; 1247 SmallVector<CHRScope *, 8> NewSubs; 1248 DenseSet<Instruction *> SplitUnhoistables; 1249 getSelectsInScope(Split, SplitUnhoistables); 1250 for (CHRScope *Sub : Split->Subs) { 1251 SmallVector<CHRScope *, 8> SubSplits = splitScope( 1252 Sub, Split, &SplitConditionValues, SplitInsertPoint, Output, 1253 SplitUnhoistables); 1254 NewSubs.insert(NewSubs.end(), SubSplits.begin(), SubSplits.end()); 1255 } 1256 Split->Subs = NewSubs; 1257 } 1258 SmallVector<CHRScope *, 8> Result; 1259 for (size_t I = 0; I < Splits.size(); ++I) { 1260 CHRScope *Split = Splits[I]; 1261 if (SplitsSplitFromOuter[I]) { 1262 // Split from the outer. 1263 Output.push_back(Split); 1264 Split->BranchInsertPoint = SplitsInsertPoints[I]; 1265 CHR_DEBUG(dbgs() << "BranchInsertPoint " << *SplitsInsertPoints[I] 1266 << "\n"); 1267 } else { 1268 // Connected to the outer. 1269 Result.push_back(Split); 1270 } 1271 } 1272 if (!Outer) 1273 assert(Result.empty() && 1274 "If no outer (top-level), must return no nested ones"); 1275 return Result; 1276 } 1277 1278 void CHR::classifyBiasedScopes(SmallVectorImpl<CHRScope *> &Scopes) { 1279 for (CHRScope *Scope : Scopes) { 1280 assert(Scope->TrueBiasedRegions.empty() && Scope->FalseBiasedRegions.empty() && "Empty"); 1281 classifyBiasedScopes(Scope, Scope); 1282 CHR_DEBUG( 1283 dbgs() << "classifyBiasedScopes " << *Scope << "\n"; 1284 dbgs() << "TrueBiasedRegions "; 1285 for (Region *R : Scope->TrueBiasedRegions) { 1286 dbgs() << R->getNameStr() << ", "; 1287 } 1288 dbgs() << "\n"; 1289 dbgs() << "FalseBiasedRegions "; 1290 for (Region *R : Scope->FalseBiasedRegions) { 1291 dbgs() << R->getNameStr() << ", "; 1292 } 1293 dbgs() << "\n"; 1294 dbgs() << "TrueBiasedSelects "; 1295 for (SelectInst *SI : Scope->TrueBiasedSelects) { 1296 dbgs() << *SI << ", "; 1297 } 1298 dbgs() << "\n"; 1299 dbgs() << "FalseBiasedSelects "; 1300 for (SelectInst *SI : Scope->FalseBiasedSelects) { 1301 dbgs() << *SI << ", "; 1302 } 1303 dbgs() << "\n";); 1304 } 1305 } 1306 1307 void CHR::classifyBiasedScopes(CHRScope *Scope, CHRScope *OutermostScope) { 1308 for (RegInfo &RI : Scope->RegInfos) { 1309 if (RI.HasBranch) { 1310 Region *R = RI.R; 1311 if (TrueBiasedRegionsGlobal.count(R) > 0) 1312 OutermostScope->TrueBiasedRegions.insert(R); 1313 else if (FalseBiasedRegionsGlobal.count(R) > 0) 1314 OutermostScope->FalseBiasedRegions.insert(R); 1315 else 1316 llvm_unreachable("Must be biased"); 1317 } 1318 for (SelectInst *SI : RI.Selects) { 1319 if (TrueBiasedSelectsGlobal.count(SI) > 0) 1320 OutermostScope->TrueBiasedSelects.insert(SI); 1321 else if (FalseBiasedSelectsGlobal.count(SI) > 0) 1322 OutermostScope->FalseBiasedSelects.insert(SI); 1323 else 1324 llvm_unreachable("Must be biased"); 1325 } 1326 } 1327 for (CHRScope *Sub : Scope->Subs) { 1328 classifyBiasedScopes(Sub, OutermostScope); 1329 } 1330 } 1331 1332 static bool hasAtLeastTwoBiasedBranches(CHRScope *Scope) { 1333 unsigned NumBiased = Scope->TrueBiasedRegions.size() + 1334 Scope->FalseBiasedRegions.size() + 1335 Scope->TrueBiasedSelects.size() + 1336 Scope->FalseBiasedSelects.size(); 1337 return NumBiased >= CHRMergeThreshold; 1338 } 1339 1340 void CHR::filterScopes(SmallVectorImpl<CHRScope *> &Input, 1341 SmallVectorImpl<CHRScope *> &Output) { 1342 for (CHRScope *Scope : Input) { 1343 // Filter out the ones with only one region and no subs. 1344 if (!hasAtLeastTwoBiasedBranches(Scope)) { 1345 CHR_DEBUG(dbgs() << "Filtered out by biased branches truthy-regions " 1346 << Scope->TrueBiasedRegions.size() 1347 << " falsy-regions " << Scope->FalseBiasedRegions.size() 1348 << " true-selects " << Scope->TrueBiasedSelects.size() 1349 << " false-selects " << Scope->FalseBiasedSelects.size() << "\n"); 1350 ORE.emit([&]() { 1351 return OptimizationRemarkMissed( 1352 DEBUG_TYPE, 1353 "DropScopeWithOneBranchOrSelect", 1354 Scope->RegInfos[0].R->getEntry()->getTerminator()) 1355 << "Drop scope with < " 1356 << ore::NV("CHRMergeThreshold", CHRMergeThreshold) 1357 << " biased branch(es) or select(s)"; 1358 }); 1359 continue; 1360 } 1361 Output.push_back(Scope); 1362 } 1363 } 1364 1365 void CHR::setCHRRegions(SmallVectorImpl<CHRScope *> &Input, 1366 SmallVectorImpl<CHRScope *> &Output) { 1367 for (CHRScope *Scope : Input) { 1368 assert(Scope->HoistStopMap.empty() && Scope->CHRRegions.empty() && 1369 "Empty"); 1370 setCHRRegions(Scope, Scope); 1371 Output.push_back(Scope); 1372 CHR_DEBUG( 1373 dbgs() << "setCHRRegions HoistStopMap " << *Scope << "\n"; 1374 for (auto pair : Scope->HoistStopMap) { 1375 Region *R = pair.first; 1376 dbgs() << "Region " << R->getNameStr() << "\n"; 1377 for (Instruction *I : pair.second) { 1378 dbgs() << "HoistStop " << *I << "\n"; 1379 } 1380 } 1381 dbgs() << "CHRRegions" << "\n"; 1382 for (RegInfo &RI : Scope->CHRRegions) { 1383 dbgs() << RI.R->getNameStr() << "\n"; 1384 }); 1385 } 1386 } 1387 1388 void CHR::setCHRRegions(CHRScope *Scope, CHRScope *OutermostScope) { 1389 DenseSet<Instruction *> Unhoistables; 1390 // Put the biased selects in Unhoistables because they should stay where they 1391 // are and constant-folded after CHR (in case one biased select or a branch 1392 // can depend on another biased select.) 1393 for (RegInfo &RI : Scope->RegInfos) { 1394 for (SelectInst *SI : RI.Selects) { 1395 Unhoistables.insert(SI); 1396 } 1397 } 1398 Instruction *InsertPoint = OutermostScope->BranchInsertPoint; 1399 for (RegInfo &RI : Scope->RegInfos) { 1400 Region *R = RI.R; 1401 DenseSet<Instruction *> HoistStops; 1402 bool IsHoisted = false; 1403 if (RI.HasBranch) { 1404 assert((OutermostScope->TrueBiasedRegions.count(R) > 0 || 1405 OutermostScope->FalseBiasedRegions.count(R) > 0) && 1406 "Must be truthy or falsy"); 1407 auto *BI = cast<BranchInst>(R->getEntry()->getTerminator()); 1408 // Note checkHoistValue fills in HoistStops. 1409 DenseMap<Instruction *, bool> Visited; 1410 bool IsHoistable = checkHoistValue(BI->getCondition(), InsertPoint, DT, 1411 Unhoistables, &HoistStops, Visited); 1412 assert(IsHoistable && "Must be hoistable"); 1413 (void)(IsHoistable); // Unused in release build 1414 IsHoisted = true; 1415 } 1416 for (SelectInst *SI : RI.Selects) { 1417 assert((OutermostScope->TrueBiasedSelects.count(SI) > 0 || 1418 OutermostScope->FalseBiasedSelects.count(SI) > 0) && 1419 "Must be true or false biased"); 1420 // Note checkHoistValue fills in HoistStops. 1421 DenseMap<Instruction *, bool> Visited; 1422 bool IsHoistable = checkHoistValue(SI->getCondition(), InsertPoint, DT, 1423 Unhoistables, &HoistStops, Visited); 1424 assert(IsHoistable && "Must be hoistable"); 1425 (void)(IsHoistable); // Unused in release build 1426 IsHoisted = true; 1427 } 1428 if (IsHoisted) { 1429 OutermostScope->CHRRegions.push_back(RI); 1430 OutermostScope->HoistStopMap[R] = HoistStops; 1431 } 1432 } 1433 for (CHRScope *Sub : Scope->Subs) 1434 setCHRRegions(Sub, OutermostScope); 1435 } 1436 1437 bool CHRScopeSorter(CHRScope *Scope1, CHRScope *Scope2) { 1438 return Scope1->RegInfos[0].R->getDepth() < Scope2->RegInfos[0].R->getDepth(); 1439 } 1440 1441 void CHR::sortScopes(SmallVectorImpl<CHRScope *> &Input, 1442 SmallVectorImpl<CHRScope *> &Output) { 1443 Output.resize(Input.size()); 1444 llvm::copy(Input, Output.begin()); 1445 llvm::stable_sort(Output, CHRScopeSorter); 1446 } 1447 1448 // Return true if V is already hoisted or was hoisted (along with its operands) 1449 // to the insert point. 1450 static void hoistValue(Value *V, Instruction *HoistPoint, Region *R, 1451 HoistStopMapTy &HoistStopMap, 1452 DenseSet<Instruction *> &HoistedSet, 1453 DenseSet<PHINode *> &TrivialPHIs, 1454 DominatorTree &DT) { 1455 auto IT = HoistStopMap.find(R); 1456 assert(IT != HoistStopMap.end() && "Region must be in hoist stop map"); 1457 DenseSet<Instruction *> &HoistStops = IT->second; 1458 if (auto *I = dyn_cast<Instruction>(V)) { 1459 if (I == HoistPoint) 1460 return; 1461 if (HoistStops.count(I)) 1462 return; 1463 if (auto *PN = dyn_cast<PHINode>(I)) 1464 if (TrivialPHIs.count(PN)) 1465 // The trivial phi inserted by the previous CHR scope could replace a 1466 // non-phi in HoistStops. Note that since this phi is at the exit of a 1467 // previous CHR scope, which dominates this scope, it's safe to stop 1468 // hoisting there. 1469 return; 1470 if (HoistedSet.count(I)) 1471 // Already hoisted, return. 1472 return; 1473 assert(isHoistableInstructionType(I) && "Unhoistable instruction type"); 1474 assert(DT.getNode(I->getParent()) && "DT must contain I's block"); 1475 assert(DT.getNode(HoistPoint->getParent()) && 1476 "DT must contain HoistPoint block"); 1477 if (DT.dominates(I, HoistPoint)) 1478 // We are already above the hoist point. Stop here. This may be necessary 1479 // when multiple scopes would independently hoist the same 1480 // instruction. Since an outer (dominating) scope would hoist it to its 1481 // entry before an inner (dominated) scope would to its entry, the inner 1482 // scope may see the instruction already hoisted, in which case it 1483 // potentially wrong for the inner scope to hoist it and could cause bad 1484 // IR (non-dominating def), but safe to skip hoisting it instead because 1485 // it's already in a block that dominates the inner scope. 1486 return; 1487 for (Value *Op : I->operands()) { 1488 hoistValue(Op, HoistPoint, R, HoistStopMap, HoistedSet, TrivialPHIs, DT); 1489 } 1490 I->moveBefore(HoistPoint); 1491 HoistedSet.insert(I); 1492 CHR_DEBUG(dbgs() << "hoistValue " << *I << "\n"); 1493 } 1494 } 1495 1496 // Hoist the dependent condition values of the branches and the selects in the 1497 // scope to the insert point. 1498 static void hoistScopeConditions(CHRScope *Scope, Instruction *HoistPoint, 1499 DenseSet<PHINode *> &TrivialPHIs, 1500 DominatorTree &DT) { 1501 DenseSet<Instruction *> HoistedSet; 1502 for (const RegInfo &RI : Scope->CHRRegions) { 1503 Region *R = RI.R; 1504 bool IsTrueBiased = Scope->TrueBiasedRegions.count(R); 1505 bool IsFalseBiased = Scope->FalseBiasedRegions.count(R); 1506 if (RI.HasBranch && (IsTrueBiased || IsFalseBiased)) { 1507 auto *BI = cast<BranchInst>(R->getEntry()->getTerminator()); 1508 hoistValue(BI->getCondition(), HoistPoint, R, Scope->HoistStopMap, 1509 HoistedSet, TrivialPHIs, DT); 1510 } 1511 for (SelectInst *SI : RI.Selects) { 1512 bool IsTrueBiased = Scope->TrueBiasedSelects.count(SI); 1513 bool IsFalseBiased = Scope->FalseBiasedSelects.count(SI); 1514 if (!(IsTrueBiased || IsFalseBiased)) 1515 continue; 1516 hoistValue(SI->getCondition(), HoistPoint, R, Scope->HoistStopMap, 1517 HoistedSet, TrivialPHIs, DT); 1518 } 1519 } 1520 } 1521 1522 // Negate the predicate if an ICmp if it's used only by branches or selects by 1523 // swapping the operands of the branches or the selects. Returns true if success. 1524 static bool negateICmpIfUsedByBranchOrSelectOnly(ICmpInst *ICmp, 1525 Instruction *ExcludedUser, 1526 CHRScope *Scope) { 1527 for (User *U : ICmp->users()) { 1528 if (U == ExcludedUser) 1529 continue; 1530 if (isa<BranchInst>(U) && cast<BranchInst>(U)->isConditional()) 1531 continue; 1532 if (isa<SelectInst>(U) && cast<SelectInst>(U)->getCondition() == ICmp) 1533 continue; 1534 return false; 1535 } 1536 for (User *U : ICmp->users()) { 1537 if (U == ExcludedUser) 1538 continue; 1539 if (auto *BI = dyn_cast<BranchInst>(U)) { 1540 assert(BI->isConditional() && "Must be conditional"); 1541 BI->swapSuccessors(); 1542 // Don't need to swap this in terms of 1543 // TrueBiasedRegions/FalseBiasedRegions because true-based/false-based 1544 // mean whehter the branch is likely go into the if-then rather than 1545 // successor0/successor1 and because we can tell which edge is the then or 1546 // the else one by comparing the destination to the region exit block. 1547 continue; 1548 } 1549 if (auto *SI = dyn_cast<SelectInst>(U)) { 1550 // Swap operands 1551 SI->swapValues(); 1552 SI->swapProfMetadata(); 1553 if (Scope->TrueBiasedSelects.count(SI)) { 1554 assert(Scope->FalseBiasedSelects.count(SI) == 0 && 1555 "Must not be already in"); 1556 Scope->FalseBiasedSelects.insert(SI); 1557 } else if (Scope->FalseBiasedSelects.count(SI)) { 1558 assert(Scope->TrueBiasedSelects.count(SI) == 0 && 1559 "Must not be already in"); 1560 Scope->TrueBiasedSelects.insert(SI); 1561 } 1562 continue; 1563 } 1564 llvm_unreachable("Must be a branch or a select"); 1565 } 1566 ICmp->setPredicate(CmpInst::getInversePredicate(ICmp->getPredicate())); 1567 return true; 1568 } 1569 1570 // A helper for transformScopes. Insert a trivial phi at the scope exit block 1571 // for a value that's defined in the scope but used outside it (meaning it's 1572 // alive at the exit block). 1573 static void insertTrivialPHIs(CHRScope *Scope, 1574 BasicBlock *EntryBlock, BasicBlock *ExitBlock, 1575 DenseSet<PHINode *> &TrivialPHIs) { 1576 DenseSet<BasicBlock *> BlocksInScopeSet; 1577 SmallVector<BasicBlock *, 8> BlocksInScopeVec; 1578 for (RegInfo &RI : Scope->RegInfos) { 1579 for (BasicBlock *BB : RI.R->blocks()) { // This includes the blocks in the 1580 // sub-Scopes. 1581 BlocksInScopeSet.insert(BB); 1582 BlocksInScopeVec.push_back(BB); 1583 } 1584 } 1585 CHR_DEBUG( 1586 dbgs() << "Inserting redudant phis\n"; 1587 for (BasicBlock *BB : BlocksInScopeVec) { 1588 dbgs() << "BlockInScope " << BB->getName() << "\n"; 1589 }); 1590 for (BasicBlock *BB : BlocksInScopeVec) { 1591 for (Instruction &I : *BB) { 1592 SmallVector<Instruction *, 8> Users; 1593 for (User *U : I.users()) { 1594 if (auto *UI = dyn_cast<Instruction>(U)) { 1595 if (BlocksInScopeSet.count(UI->getParent()) == 0 && 1596 // Unless there's already a phi for I at the exit block. 1597 !(isa<PHINode>(UI) && UI->getParent() == ExitBlock)) { 1598 CHR_DEBUG(dbgs() << "V " << I << "\n"); 1599 CHR_DEBUG(dbgs() << "Used outside scope by user " << *UI << "\n"); 1600 Users.push_back(UI); 1601 } else if (UI->getParent() == EntryBlock && isa<PHINode>(UI)) { 1602 // There's a loop backedge from a block that's dominated by this 1603 // scope to the entry block. 1604 CHR_DEBUG(dbgs() << "V " << I << "\n"); 1605 CHR_DEBUG(dbgs() 1606 << "Used at entry block (for a back edge) by a phi user " 1607 << *UI << "\n"); 1608 Users.push_back(UI); 1609 } 1610 } 1611 } 1612 if (Users.size() > 0) { 1613 // Insert a trivial phi for I (phi [&I, P0], [&I, P1], ...) at 1614 // ExitBlock. Replace I with the new phi in UI unless UI is another 1615 // phi at ExitBlock. 1616 unsigned PredCount = std::distance(pred_begin(ExitBlock), 1617 pred_end(ExitBlock)); 1618 PHINode *PN = PHINode::Create(I.getType(), PredCount, "", 1619 &ExitBlock->front()); 1620 for (BasicBlock *Pred : predecessors(ExitBlock)) { 1621 PN->addIncoming(&I, Pred); 1622 } 1623 TrivialPHIs.insert(PN); 1624 CHR_DEBUG(dbgs() << "Insert phi " << *PN << "\n"); 1625 for (Instruction *UI : Users) { 1626 for (unsigned J = 0, NumOps = UI->getNumOperands(); J < NumOps; ++J) { 1627 if (UI->getOperand(J) == &I) { 1628 UI->setOperand(J, PN); 1629 } 1630 } 1631 CHR_DEBUG(dbgs() << "Updated user " << *UI << "\n"); 1632 } 1633 } 1634 } 1635 } 1636 } 1637 1638 // Assert that all the CHR regions of the scope have a biased branch or select. 1639 static void LLVM_ATTRIBUTE_UNUSED 1640 assertCHRRegionsHaveBiasedBranchOrSelect(CHRScope *Scope) { 1641 #ifndef NDEBUG 1642 auto HasBiasedBranchOrSelect = [](RegInfo &RI, CHRScope *Scope) { 1643 if (Scope->TrueBiasedRegions.count(RI.R) || 1644 Scope->FalseBiasedRegions.count(RI.R)) 1645 return true; 1646 for (SelectInst *SI : RI.Selects) 1647 if (Scope->TrueBiasedSelects.count(SI) || 1648 Scope->FalseBiasedSelects.count(SI)) 1649 return true; 1650 return false; 1651 }; 1652 for (RegInfo &RI : Scope->CHRRegions) { 1653 assert(HasBiasedBranchOrSelect(RI, Scope) && 1654 "Must have biased branch or select"); 1655 } 1656 #endif 1657 } 1658 1659 // Assert that all the condition values of the biased branches and selects have 1660 // been hoisted to the pre-entry block or outside of the scope. 1661 static void LLVM_ATTRIBUTE_UNUSED assertBranchOrSelectConditionHoisted( 1662 CHRScope *Scope, BasicBlock *PreEntryBlock) { 1663 CHR_DEBUG(dbgs() << "Biased regions condition values \n"); 1664 for (RegInfo &RI : Scope->CHRRegions) { 1665 Region *R = RI.R; 1666 bool IsTrueBiased = Scope->TrueBiasedRegions.count(R); 1667 bool IsFalseBiased = Scope->FalseBiasedRegions.count(R); 1668 if (RI.HasBranch && (IsTrueBiased || IsFalseBiased)) { 1669 auto *BI = cast<BranchInst>(R->getEntry()->getTerminator()); 1670 Value *V = BI->getCondition(); 1671 CHR_DEBUG(dbgs() << *V << "\n"); 1672 if (auto *I = dyn_cast<Instruction>(V)) { 1673 (void)(I); // Unused in release build. 1674 assert((I->getParent() == PreEntryBlock || 1675 !Scope->contains(I)) && 1676 "Must have been hoisted to PreEntryBlock or outside the scope"); 1677 } 1678 } 1679 for (SelectInst *SI : RI.Selects) { 1680 bool IsTrueBiased = Scope->TrueBiasedSelects.count(SI); 1681 bool IsFalseBiased = Scope->FalseBiasedSelects.count(SI); 1682 if (!(IsTrueBiased || IsFalseBiased)) 1683 continue; 1684 Value *V = SI->getCondition(); 1685 CHR_DEBUG(dbgs() << *V << "\n"); 1686 if (auto *I = dyn_cast<Instruction>(V)) { 1687 (void)(I); // Unused in release build. 1688 assert((I->getParent() == PreEntryBlock || 1689 !Scope->contains(I)) && 1690 "Must have been hoisted to PreEntryBlock or outside the scope"); 1691 } 1692 } 1693 } 1694 } 1695 1696 void CHR::transformScopes(CHRScope *Scope, DenseSet<PHINode *> &TrivialPHIs) { 1697 CHR_DEBUG(dbgs() << "transformScopes " << *Scope << "\n"); 1698 1699 assert(Scope->RegInfos.size() >= 1 && "Should have at least one Region"); 1700 Region *FirstRegion = Scope->RegInfos[0].R; 1701 BasicBlock *EntryBlock = FirstRegion->getEntry(); 1702 Region *LastRegion = Scope->RegInfos[Scope->RegInfos.size() - 1].R; 1703 BasicBlock *ExitBlock = LastRegion->getExit(); 1704 Optional<uint64_t> ProfileCount = BFI.getBlockProfileCount(EntryBlock); 1705 1706 if (ExitBlock) { 1707 // Insert a trivial phi at the exit block (where the CHR hot path and the 1708 // cold path merges) for a value that's defined in the scope but used 1709 // outside it (meaning it's alive at the exit block). We will add the 1710 // incoming values for the CHR cold paths to it below. Without this, we'd 1711 // miss updating phi's for such values unless there happens to already be a 1712 // phi for that value there. 1713 insertTrivialPHIs(Scope, EntryBlock, ExitBlock, TrivialPHIs); 1714 } 1715 1716 // Split the entry block of the first region. The new block becomes the new 1717 // entry block of the first region. The old entry block becomes the block to 1718 // insert the CHR branch into. Note DT gets updated. Since DT gets updated 1719 // through the split, we update the entry of the first region after the split, 1720 // and Region only points to the entry and the exit blocks, rather than 1721 // keeping everything in a list or set, the blocks membership and the 1722 // entry/exit blocks of the region are still valid after the split. 1723 CHR_DEBUG(dbgs() << "Splitting entry block " << EntryBlock->getName() 1724 << " at " << *Scope->BranchInsertPoint << "\n"); 1725 BasicBlock *NewEntryBlock = 1726 SplitBlock(EntryBlock, Scope->BranchInsertPoint, &DT); 1727 assert(NewEntryBlock->getSinglePredecessor() == EntryBlock && 1728 "NewEntryBlock's only pred must be EntryBlock"); 1729 FirstRegion->replaceEntryRecursive(NewEntryBlock); 1730 BasicBlock *PreEntryBlock = EntryBlock; 1731 1732 ValueToValueMapTy VMap; 1733 // Clone the blocks in the scope (excluding the PreEntryBlock) to split into a 1734 // hot path (originals) and a cold path (clones) and update the PHIs at the 1735 // exit block. 1736 cloneScopeBlocks(Scope, PreEntryBlock, ExitBlock, LastRegion, VMap); 1737 1738 // Replace the old (placeholder) branch with the new (merged) conditional 1739 // branch. 1740 BranchInst *MergedBr = createMergedBranch(PreEntryBlock, EntryBlock, 1741 NewEntryBlock, VMap); 1742 1743 #ifndef NDEBUG 1744 assertCHRRegionsHaveBiasedBranchOrSelect(Scope); 1745 #endif 1746 1747 // Hoist the conditional values of the branches/selects. 1748 hoistScopeConditions(Scope, PreEntryBlock->getTerminator(), TrivialPHIs, DT); 1749 1750 #ifndef NDEBUG 1751 assertBranchOrSelectConditionHoisted(Scope, PreEntryBlock); 1752 #endif 1753 1754 // Create the combined branch condition and constant-fold the branches/selects 1755 // in the hot path. 1756 fixupBranchesAndSelects(Scope, PreEntryBlock, MergedBr, 1757 ProfileCount ? ProfileCount.getValue() : 0); 1758 } 1759 1760 // A helper for transformScopes. Clone the blocks in the scope (excluding the 1761 // PreEntryBlock) to split into a hot path and a cold path and update the PHIs 1762 // at the exit block. 1763 void CHR::cloneScopeBlocks(CHRScope *Scope, 1764 BasicBlock *PreEntryBlock, 1765 BasicBlock *ExitBlock, 1766 Region *LastRegion, 1767 ValueToValueMapTy &VMap) { 1768 // Clone all the blocks. The original blocks will be the hot-path 1769 // CHR-optimized code and the cloned blocks will be the original unoptimized 1770 // code. This is so that the block pointers from the 1771 // CHRScope/Region/RegionInfo can stay valid in pointing to the hot-path code 1772 // which CHR should apply to. 1773 SmallVector<BasicBlock*, 8> NewBlocks; 1774 for (RegInfo &RI : Scope->RegInfos) 1775 for (BasicBlock *BB : RI.R->blocks()) { // This includes the blocks in the 1776 // sub-Scopes. 1777 assert(BB != PreEntryBlock && "Don't copy the preetntry block"); 1778 BasicBlock *NewBB = CloneBasicBlock(BB, VMap, ".nonchr", &F); 1779 NewBlocks.push_back(NewBB); 1780 VMap[BB] = NewBB; 1781 } 1782 1783 // Place the cloned blocks right after the original blocks (right before the 1784 // exit block of.) 1785 if (ExitBlock) 1786 F.getBasicBlockList().splice(ExitBlock->getIterator(), 1787 F.getBasicBlockList(), 1788 NewBlocks[0]->getIterator(), F.end()); 1789 1790 // Update the cloned blocks/instructions to refer to themselves. 1791 for (unsigned i = 0, e = NewBlocks.size(); i != e; ++i) 1792 for (Instruction &I : *NewBlocks[i]) 1793 RemapInstruction(&I, VMap, 1794 RF_NoModuleLevelChanges | RF_IgnoreMissingLocals); 1795 1796 // Add the cloned blocks to the PHIs of the exit blocks. ExitBlock is null for 1797 // the top-level region but we don't need to add PHIs. The trivial PHIs 1798 // inserted above will be updated here. 1799 if (ExitBlock) 1800 for (PHINode &PN : ExitBlock->phis()) 1801 for (unsigned I = 0, NumOps = PN.getNumIncomingValues(); I < NumOps; 1802 ++I) { 1803 BasicBlock *Pred = PN.getIncomingBlock(I); 1804 if (LastRegion->contains(Pred)) { 1805 Value *V = PN.getIncomingValue(I); 1806 auto It = VMap.find(V); 1807 if (It != VMap.end()) V = It->second; 1808 assert(VMap.find(Pred) != VMap.end() && "Pred must have been cloned"); 1809 PN.addIncoming(V, cast<BasicBlock>(VMap[Pred])); 1810 } 1811 } 1812 } 1813 1814 // A helper for transformScope. Replace the old (placeholder) branch with the 1815 // new (merged) conditional branch. 1816 BranchInst *CHR::createMergedBranch(BasicBlock *PreEntryBlock, 1817 BasicBlock *EntryBlock, 1818 BasicBlock *NewEntryBlock, 1819 ValueToValueMapTy &VMap) { 1820 BranchInst *OldBR = cast<BranchInst>(PreEntryBlock->getTerminator()); 1821 assert(OldBR->isUnconditional() && OldBR->getSuccessor(0) == NewEntryBlock && 1822 "SplitBlock did not work correctly!"); 1823 assert(NewEntryBlock->getSinglePredecessor() == EntryBlock && 1824 "NewEntryBlock's only pred must be EntryBlock"); 1825 assert(VMap.find(NewEntryBlock) != VMap.end() && 1826 "NewEntryBlock must have been copied"); 1827 OldBR->dropAllReferences(); 1828 OldBR->eraseFromParent(); 1829 // The true predicate is a placeholder. It will be replaced later in 1830 // fixupBranchesAndSelects(). 1831 BranchInst *NewBR = BranchInst::Create(NewEntryBlock, 1832 cast<BasicBlock>(VMap[NewEntryBlock]), 1833 ConstantInt::getTrue(F.getContext())); 1834 PreEntryBlock->getInstList().push_back(NewBR); 1835 assert(NewEntryBlock->getSinglePredecessor() == EntryBlock && 1836 "NewEntryBlock's only pred must be EntryBlock"); 1837 return NewBR; 1838 } 1839 1840 // A helper for transformScopes. Create the combined branch condition and 1841 // constant-fold the branches/selects in the hot path. 1842 void CHR::fixupBranchesAndSelects(CHRScope *Scope, 1843 BasicBlock *PreEntryBlock, 1844 BranchInst *MergedBR, 1845 uint64_t ProfileCount) { 1846 Value *MergedCondition = ConstantInt::getTrue(F.getContext()); 1847 BranchProbability CHRBranchBias(1, 1); 1848 uint64_t NumCHRedBranches = 0; 1849 IRBuilder<> IRB(PreEntryBlock->getTerminator()); 1850 for (RegInfo &RI : Scope->CHRRegions) { 1851 Region *R = RI.R; 1852 if (RI.HasBranch) { 1853 fixupBranch(R, Scope, IRB, MergedCondition, CHRBranchBias); 1854 ++NumCHRedBranches; 1855 } 1856 for (SelectInst *SI : RI.Selects) { 1857 fixupSelect(SI, Scope, IRB, MergedCondition, CHRBranchBias); 1858 ++NumCHRedBranches; 1859 } 1860 } 1861 Stats.NumBranchesDelta += NumCHRedBranches - 1; 1862 Stats.WeightedNumBranchesDelta += (NumCHRedBranches - 1) * ProfileCount; 1863 ORE.emit([&]() { 1864 return OptimizationRemark(DEBUG_TYPE, 1865 "CHR", 1866 // Refer to the hot (original) path 1867 MergedBR->getSuccessor(0)->getTerminator()) 1868 << "Merged " << ore::NV("NumCHRedBranches", NumCHRedBranches) 1869 << " branches or selects"; 1870 }); 1871 MergedBR->setCondition(MergedCondition); 1872 SmallVector<uint32_t, 2> Weights; 1873 Weights.push_back(static_cast<uint32_t>(CHRBranchBias.scale(1000))); 1874 Weights.push_back(static_cast<uint32_t>(CHRBranchBias.getCompl().scale(1000))); 1875 MDBuilder MDB(F.getContext()); 1876 MergedBR->setMetadata(LLVMContext::MD_prof, MDB.createBranchWeights(Weights)); 1877 CHR_DEBUG(dbgs() << "CHR branch bias " << Weights[0] << ":" << Weights[1] 1878 << "\n"); 1879 } 1880 1881 // A helper for fixupBranchesAndSelects. Add to the combined branch condition 1882 // and constant-fold a branch in the hot path. 1883 void CHR::fixupBranch(Region *R, CHRScope *Scope, 1884 IRBuilder<> &IRB, 1885 Value *&MergedCondition, 1886 BranchProbability &CHRBranchBias) { 1887 bool IsTrueBiased = Scope->TrueBiasedRegions.count(R); 1888 assert((IsTrueBiased || Scope->FalseBiasedRegions.count(R)) && 1889 "Must be truthy or falsy"); 1890 auto *BI = cast<BranchInst>(R->getEntry()->getTerminator()); 1891 assert(BranchBiasMap.find(R) != BranchBiasMap.end() && 1892 "Must be in the bias map"); 1893 BranchProbability Bias = BranchBiasMap[R]; 1894 assert(Bias >= getCHRBiasThreshold() && "Must be highly biased"); 1895 // Take the min. 1896 if (CHRBranchBias > Bias) 1897 CHRBranchBias = Bias; 1898 BasicBlock *IfThen = BI->getSuccessor(1); 1899 BasicBlock *IfElse = BI->getSuccessor(0); 1900 BasicBlock *RegionExitBlock = R->getExit(); 1901 assert(RegionExitBlock && "Null ExitBlock"); 1902 assert((IfThen == RegionExitBlock || IfElse == RegionExitBlock) && 1903 IfThen != IfElse && "Invariant from findScopes"); 1904 if (IfThen == RegionExitBlock) { 1905 // Swap them so that IfThen means going into it and IfElse means skipping 1906 // it. 1907 std::swap(IfThen, IfElse); 1908 } 1909 CHR_DEBUG(dbgs() << "IfThen " << IfThen->getName() 1910 << " IfElse " << IfElse->getName() << "\n"); 1911 Value *Cond = BI->getCondition(); 1912 BasicBlock *HotTarget = IsTrueBiased ? IfThen : IfElse; 1913 bool ConditionTrue = HotTarget == BI->getSuccessor(0); 1914 addToMergedCondition(ConditionTrue, Cond, BI, Scope, IRB, 1915 MergedCondition); 1916 // Constant-fold the branch at ClonedEntryBlock. 1917 assert(ConditionTrue == (HotTarget == BI->getSuccessor(0)) && 1918 "The successor shouldn't change"); 1919 Value *NewCondition = ConditionTrue ? 1920 ConstantInt::getTrue(F.getContext()) : 1921 ConstantInt::getFalse(F.getContext()); 1922 BI->setCondition(NewCondition); 1923 } 1924 1925 // A helper for fixupBranchesAndSelects. Add to the combined branch condition 1926 // and constant-fold a select in the hot path. 1927 void CHR::fixupSelect(SelectInst *SI, CHRScope *Scope, 1928 IRBuilder<> &IRB, 1929 Value *&MergedCondition, 1930 BranchProbability &CHRBranchBias) { 1931 bool IsTrueBiased = Scope->TrueBiasedSelects.count(SI); 1932 assert((IsTrueBiased || 1933 Scope->FalseBiasedSelects.count(SI)) && "Must be biased"); 1934 assert(SelectBiasMap.find(SI) != SelectBiasMap.end() && 1935 "Must be in the bias map"); 1936 BranchProbability Bias = SelectBiasMap[SI]; 1937 assert(Bias >= getCHRBiasThreshold() && "Must be highly biased"); 1938 // Take the min. 1939 if (CHRBranchBias > Bias) 1940 CHRBranchBias = Bias; 1941 Value *Cond = SI->getCondition(); 1942 addToMergedCondition(IsTrueBiased, Cond, SI, Scope, IRB, 1943 MergedCondition); 1944 Value *NewCondition = IsTrueBiased ? 1945 ConstantInt::getTrue(F.getContext()) : 1946 ConstantInt::getFalse(F.getContext()); 1947 SI->setCondition(NewCondition); 1948 } 1949 1950 // A helper for fixupBranch/fixupSelect. Add a branch condition to the merged 1951 // condition. 1952 void CHR::addToMergedCondition(bool IsTrueBiased, Value *Cond, 1953 Instruction *BranchOrSelect, 1954 CHRScope *Scope, 1955 IRBuilder<> &IRB, 1956 Value *&MergedCondition) { 1957 if (IsTrueBiased) { 1958 MergedCondition = IRB.CreateAnd(MergedCondition, Cond); 1959 } else { 1960 // If Cond is an icmp and all users of V except for BranchOrSelect is a 1961 // branch, negate the icmp predicate and swap the branch targets and avoid 1962 // inserting an Xor to negate Cond. 1963 bool Done = false; 1964 if (auto *ICmp = dyn_cast<ICmpInst>(Cond)) 1965 if (negateICmpIfUsedByBranchOrSelectOnly(ICmp, BranchOrSelect, Scope)) { 1966 MergedCondition = IRB.CreateAnd(MergedCondition, Cond); 1967 Done = true; 1968 } 1969 if (!Done) { 1970 Value *Negate = IRB.CreateXor( 1971 ConstantInt::getTrue(F.getContext()), Cond); 1972 MergedCondition = IRB.CreateAnd(MergedCondition, Negate); 1973 } 1974 } 1975 } 1976 1977 void CHR::transformScopes(SmallVectorImpl<CHRScope *> &CHRScopes) { 1978 unsigned I = 0; 1979 DenseSet<PHINode *> TrivialPHIs; 1980 for (CHRScope *Scope : CHRScopes) { 1981 transformScopes(Scope, TrivialPHIs); 1982 CHR_DEBUG( 1983 std::ostringstream oss; 1984 oss << " after transformScopes " << I++; 1985 dumpIR(F, oss.str().c_str(), nullptr)); 1986 (void)I; 1987 } 1988 } 1989 1990 static void LLVM_ATTRIBUTE_UNUSED 1991 dumpScopes(SmallVectorImpl<CHRScope *> &Scopes, const char *Label) { 1992 dbgs() << Label << " " << Scopes.size() << "\n"; 1993 for (CHRScope *Scope : Scopes) { 1994 dbgs() << *Scope << "\n"; 1995 } 1996 } 1997 1998 bool CHR::run() { 1999 if (!shouldApply(F, PSI)) 2000 return false; 2001 2002 CHR_DEBUG(dumpIR(F, "before", nullptr)); 2003 2004 bool Changed = false; 2005 { 2006 CHR_DEBUG( 2007 dbgs() << "RegionInfo:\n"; 2008 RI.print(dbgs())); 2009 2010 // Recursively traverse the region tree and find regions that have biased 2011 // branches and/or selects and create scopes. 2012 SmallVector<CHRScope *, 8> AllScopes; 2013 findScopes(AllScopes); 2014 CHR_DEBUG(dumpScopes(AllScopes, "All scopes")); 2015 2016 // Split the scopes if 1) the conditiona values of the biased 2017 // branches/selects of the inner/lower scope can't be hoisted up to the 2018 // outermost/uppermost scope entry, or 2) the condition values of the biased 2019 // branches/selects in a scope (including subscopes) don't share at least 2020 // one common value. 2021 SmallVector<CHRScope *, 8> SplitScopes; 2022 splitScopes(AllScopes, SplitScopes); 2023 CHR_DEBUG(dumpScopes(SplitScopes, "Split scopes")); 2024 2025 // After splitting, set the biased regions and selects of a scope (a tree 2026 // root) that include those of the subscopes. 2027 classifyBiasedScopes(SplitScopes); 2028 CHR_DEBUG(dbgs() << "Set per-scope bias " << SplitScopes.size() << "\n"); 2029 2030 // Filter out the scopes that has only one biased region or select (CHR 2031 // isn't useful in such a case). 2032 SmallVector<CHRScope *, 8> FilteredScopes; 2033 filterScopes(SplitScopes, FilteredScopes); 2034 CHR_DEBUG(dumpScopes(FilteredScopes, "Filtered scopes")); 2035 2036 // Set the regions to be CHR'ed and their hoist stops for each scope. 2037 SmallVector<CHRScope *, 8> SetScopes; 2038 setCHRRegions(FilteredScopes, SetScopes); 2039 CHR_DEBUG(dumpScopes(SetScopes, "Set CHR regions")); 2040 2041 // Sort CHRScopes by the depth so that outer CHRScopes comes before inner 2042 // ones. We need to apply CHR from outer to inner so that we apply CHR only 2043 // to the hot path, rather than both hot and cold paths. 2044 SmallVector<CHRScope *, 8> SortedScopes; 2045 sortScopes(SetScopes, SortedScopes); 2046 CHR_DEBUG(dumpScopes(SortedScopes, "Sorted scopes")); 2047 2048 CHR_DEBUG( 2049 dbgs() << "RegionInfo:\n"; 2050 RI.print(dbgs())); 2051 2052 // Apply the CHR transformation. 2053 if (!SortedScopes.empty()) { 2054 transformScopes(SortedScopes); 2055 Changed = true; 2056 } 2057 } 2058 2059 if (Changed) { 2060 CHR_DEBUG(dumpIR(F, "after", &Stats)); 2061 ORE.emit([&]() { 2062 return OptimizationRemark(DEBUG_TYPE, "Stats", &F) 2063 << ore::NV("Function", &F) << " " 2064 << "Reduced the number of branches in hot paths by " 2065 << ore::NV("NumBranchesDelta", Stats.NumBranchesDelta) 2066 << " (static) and " 2067 << ore::NV("WeightedNumBranchesDelta", Stats.WeightedNumBranchesDelta) 2068 << " (weighted by PGO count)"; 2069 }); 2070 } 2071 2072 return Changed; 2073 } 2074 2075 bool ControlHeightReductionLegacyPass::runOnFunction(Function &F) { 2076 BlockFrequencyInfo &BFI = 2077 getAnalysis<BlockFrequencyInfoWrapperPass>().getBFI(); 2078 DominatorTree &DT = getAnalysis<DominatorTreeWrapperPass>().getDomTree(); 2079 ProfileSummaryInfo &PSI = 2080 getAnalysis<ProfileSummaryInfoWrapperPass>().getPSI(); 2081 RegionInfo &RI = getAnalysis<RegionInfoPass>().getRegionInfo(); 2082 std::unique_ptr<OptimizationRemarkEmitter> OwnedORE = 2083 std::make_unique<OptimizationRemarkEmitter>(&F); 2084 return CHR(F, BFI, DT, PSI, RI, *OwnedORE.get()).run(); 2085 } 2086 2087 namespace llvm { 2088 2089 ControlHeightReductionPass::ControlHeightReductionPass() { 2090 parseCHRFilterFiles(); 2091 } 2092 2093 PreservedAnalyses ControlHeightReductionPass::run( 2094 Function &F, 2095 FunctionAnalysisManager &FAM) { 2096 auto &BFI = FAM.getResult<BlockFrequencyAnalysis>(F); 2097 auto &DT = FAM.getResult<DominatorTreeAnalysis>(F); 2098 auto &MAMProxy = FAM.getResult<ModuleAnalysisManagerFunctionProxy>(F); 2099 auto &MAM = MAMProxy.getManager(); 2100 auto &PSI = *MAM.getCachedResult<ProfileSummaryAnalysis>(*F.getParent()); 2101 auto &RI = FAM.getResult<RegionInfoAnalysis>(F); 2102 auto &ORE = FAM.getResult<OptimizationRemarkEmitterAnalysis>(F); 2103 bool Changed = CHR(F, BFI, DT, PSI, RI, ORE).run(); 2104 if (!Changed) 2105 return PreservedAnalyses::all(); 2106 auto PA = PreservedAnalyses(); 2107 PA.preserve<GlobalsAA>(); 2108 return PA; 2109 } 2110 2111 } // namespace llvm 2112