1 //===-- ControlHeightReduction.cpp - Control Height Reduction -------------===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 // 9 // This pass merges conditional blocks of code and reduces the number of 10 // conditional branches in the hot paths based on profiles. 11 // 12 //===----------------------------------------------------------------------===// 13 14 #include "llvm/Transforms/Instrumentation/ControlHeightReduction.h" 15 #include "llvm/ADT/DenseMap.h" 16 #include "llvm/ADT/DenseSet.h" 17 #include "llvm/ADT/SmallVector.h" 18 #include "llvm/ADT/StringSet.h" 19 #include "llvm/Analysis/BlockFrequencyInfo.h" 20 #include "llvm/Analysis/GlobalsModRef.h" 21 #include "llvm/Analysis/OptimizationRemarkEmitter.h" 22 #include "llvm/Analysis/ProfileSummaryInfo.h" 23 #include "llvm/Analysis/RegionInfo.h" 24 #include "llvm/Analysis/RegionIterator.h" 25 #include "llvm/Analysis/ValueTracking.h" 26 #include "llvm/IR/CFG.h" 27 #include "llvm/IR/Dominators.h" 28 #include "llvm/IR/IRBuilder.h" 29 #include "llvm/IR/MDBuilder.h" 30 #include "llvm/InitializePasses.h" 31 #include "llvm/Support/BranchProbability.h" 32 #include "llvm/Support/CommandLine.h" 33 #include "llvm/Support/MemoryBuffer.h" 34 #include "llvm/Transforms/Utils.h" 35 #include "llvm/Transforms/Utils/BasicBlockUtils.h" 36 #include "llvm/Transforms/Utils/Cloning.h" 37 #include "llvm/Transforms/Utils/ValueMapper.h" 38 39 #include <set> 40 #include <sstream> 41 42 using namespace llvm; 43 44 #define DEBUG_TYPE "chr" 45 46 #define CHR_DEBUG(X) LLVM_DEBUG(X) 47 48 static cl::opt<bool> ForceCHR("force-chr", cl::init(false), cl::Hidden, 49 cl::desc("Apply CHR for all functions")); 50 51 static cl::opt<double> CHRBiasThreshold( 52 "chr-bias-threshold", cl::init(0.99), cl::Hidden, 53 cl::desc("CHR considers a branch bias greater than this ratio as biased")); 54 55 static cl::opt<unsigned> CHRMergeThreshold( 56 "chr-merge-threshold", cl::init(2), cl::Hidden, 57 cl::desc("CHR merges a group of N branches/selects where N >= this value")); 58 59 static cl::opt<std::string> CHRModuleList( 60 "chr-module-list", cl::init(""), cl::Hidden, 61 cl::desc("Specify file to retrieve the list of modules to apply CHR to")); 62 63 static cl::opt<std::string> CHRFunctionList( 64 "chr-function-list", cl::init(""), cl::Hidden, 65 cl::desc("Specify file to retrieve the list of functions to apply CHR to")); 66 67 static StringSet<> CHRModules; 68 static StringSet<> CHRFunctions; 69 70 static void parseCHRFilterFiles() { 71 if (!CHRModuleList.empty()) { 72 auto FileOrErr = MemoryBuffer::getFile(CHRModuleList); 73 if (!FileOrErr) { 74 errs() << "Error: Couldn't read the chr-module-list file " << CHRModuleList << "\n"; 75 std::exit(1); 76 } 77 StringRef Buf = FileOrErr->get()->getBuffer(); 78 SmallVector<StringRef, 0> Lines; 79 Buf.split(Lines, '\n'); 80 for (StringRef Line : Lines) { 81 Line = Line.trim(); 82 if (!Line.empty()) 83 CHRModules.insert(Line); 84 } 85 } 86 if (!CHRFunctionList.empty()) { 87 auto FileOrErr = MemoryBuffer::getFile(CHRFunctionList); 88 if (!FileOrErr) { 89 errs() << "Error: Couldn't read the chr-function-list file " << CHRFunctionList << "\n"; 90 std::exit(1); 91 } 92 StringRef Buf = FileOrErr->get()->getBuffer(); 93 SmallVector<StringRef, 0> Lines; 94 Buf.split(Lines, '\n'); 95 for (StringRef Line : Lines) { 96 Line = Line.trim(); 97 if (!Line.empty()) 98 CHRFunctions.insert(Line); 99 } 100 } 101 } 102 103 namespace { 104 class ControlHeightReductionLegacyPass : public FunctionPass { 105 public: 106 static char ID; 107 108 ControlHeightReductionLegacyPass() : FunctionPass(ID) { 109 initializeControlHeightReductionLegacyPassPass( 110 *PassRegistry::getPassRegistry()); 111 parseCHRFilterFiles(); 112 } 113 114 bool runOnFunction(Function &F) override; 115 void getAnalysisUsage(AnalysisUsage &AU) const override { 116 AU.addRequired<BlockFrequencyInfoWrapperPass>(); 117 AU.addRequired<DominatorTreeWrapperPass>(); 118 AU.addRequired<ProfileSummaryInfoWrapperPass>(); 119 AU.addRequired<RegionInfoPass>(); 120 AU.addPreserved<GlobalsAAWrapperPass>(); 121 } 122 }; 123 } // end anonymous namespace 124 125 char ControlHeightReductionLegacyPass::ID = 0; 126 127 INITIALIZE_PASS_BEGIN(ControlHeightReductionLegacyPass, 128 "chr", 129 "Reduce control height in the hot paths", 130 false, false) 131 INITIALIZE_PASS_DEPENDENCY(BlockFrequencyInfoWrapperPass) 132 INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass) 133 INITIALIZE_PASS_DEPENDENCY(ProfileSummaryInfoWrapperPass) 134 INITIALIZE_PASS_DEPENDENCY(RegionInfoPass) 135 INITIALIZE_PASS_END(ControlHeightReductionLegacyPass, 136 "chr", 137 "Reduce control height in the hot paths", 138 false, false) 139 140 FunctionPass *llvm::createControlHeightReductionLegacyPass() { 141 return new ControlHeightReductionLegacyPass(); 142 } 143 144 namespace { 145 146 struct CHRStats { 147 CHRStats() : NumBranches(0), NumBranchesDelta(0), 148 WeightedNumBranchesDelta(0) {} 149 void print(raw_ostream &OS) const { 150 OS << "CHRStats: NumBranches " << NumBranches 151 << " NumBranchesDelta " << NumBranchesDelta 152 << " WeightedNumBranchesDelta " << WeightedNumBranchesDelta; 153 } 154 uint64_t NumBranches; // The original number of conditional branches / 155 // selects 156 uint64_t NumBranchesDelta; // The decrease of the number of conditional 157 // branches / selects in the hot paths due to CHR. 158 uint64_t WeightedNumBranchesDelta; // NumBranchesDelta weighted by the profile 159 // count at the scope entry. 160 }; 161 162 // RegInfo - some properties of a Region. 163 struct RegInfo { 164 RegInfo() : R(nullptr), HasBranch(false) {} 165 RegInfo(Region *RegionIn) : R(RegionIn), HasBranch(false) {} 166 Region *R; 167 bool HasBranch; 168 SmallVector<SelectInst *, 8> Selects; 169 }; 170 171 typedef DenseMap<Region *, DenseSet<Instruction *>> HoistStopMapTy; 172 173 // CHRScope - a sequence of regions to CHR together. It corresponds to a 174 // sequence of conditional blocks. It can have subscopes which correspond to 175 // nested conditional blocks. Nested CHRScopes form a tree. 176 class CHRScope { 177 public: 178 CHRScope(RegInfo RI) : BranchInsertPoint(nullptr) { 179 assert(RI.R && "Null RegionIn"); 180 RegInfos.push_back(RI); 181 } 182 183 Region *getParentRegion() { 184 assert(RegInfos.size() > 0 && "Empty CHRScope"); 185 Region *Parent = RegInfos[0].R->getParent(); 186 assert(Parent && "Unexpected to call this on the top-level region"); 187 return Parent; 188 } 189 190 BasicBlock *getEntryBlock() { 191 assert(RegInfos.size() > 0 && "Empty CHRScope"); 192 return RegInfos.front().R->getEntry(); 193 } 194 195 BasicBlock *getExitBlock() { 196 assert(RegInfos.size() > 0 && "Empty CHRScope"); 197 return RegInfos.back().R->getExit(); 198 } 199 200 bool appendable(CHRScope *Next) { 201 // The next scope is appendable only if this scope is directly connected to 202 // it (which implies it post-dominates this scope) and this scope dominates 203 // it (no edge to the next scope outside this scope). 204 BasicBlock *NextEntry = Next->getEntryBlock(); 205 if (getExitBlock() != NextEntry) 206 // Not directly connected. 207 return false; 208 Region *LastRegion = RegInfos.back().R; 209 for (BasicBlock *Pred : predecessors(NextEntry)) 210 if (!LastRegion->contains(Pred)) 211 // There's an edge going into the entry of the next scope from outside 212 // of this scope. 213 return false; 214 return true; 215 } 216 217 void append(CHRScope *Next) { 218 assert(RegInfos.size() > 0 && "Empty CHRScope"); 219 assert(Next->RegInfos.size() > 0 && "Empty CHRScope"); 220 assert(getParentRegion() == Next->getParentRegion() && 221 "Must be siblings"); 222 assert(getExitBlock() == Next->getEntryBlock() && 223 "Must be adjacent"); 224 for (RegInfo &RI : Next->RegInfos) 225 RegInfos.push_back(RI); 226 for (CHRScope *Sub : Next->Subs) 227 Subs.push_back(Sub); 228 } 229 230 void addSub(CHRScope *SubIn) { 231 #ifndef NDEBUG 232 bool IsChild = false; 233 for (RegInfo &RI : RegInfos) 234 if (RI.R == SubIn->getParentRegion()) { 235 IsChild = true; 236 break; 237 } 238 assert(IsChild && "Must be a child"); 239 #endif 240 Subs.push_back(SubIn); 241 } 242 243 // Split this scope at the boundary region into two, which will belong to the 244 // tail and returns the tail. 245 CHRScope *split(Region *Boundary) { 246 assert(Boundary && "Boundary null"); 247 assert(RegInfos.begin()->R != Boundary && 248 "Can't be split at beginning"); 249 auto BoundaryIt = std::find_if(RegInfos.begin(), RegInfos.end(), 250 [&Boundary](const RegInfo& RI) { 251 return Boundary == RI.R; 252 }); 253 if (BoundaryIt == RegInfos.end()) 254 return nullptr; 255 SmallVector<RegInfo, 8> TailRegInfos; 256 SmallVector<CHRScope *, 8> TailSubs; 257 TailRegInfos.insert(TailRegInfos.begin(), BoundaryIt, RegInfos.end()); 258 RegInfos.resize(BoundaryIt - RegInfos.begin()); 259 DenseSet<Region *> TailRegionSet; 260 for (RegInfo &RI : TailRegInfos) 261 TailRegionSet.insert(RI.R); 262 for (auto It = Subs.begin(); It != Subs.end(); ) { 263 CHRScope *Sub = *It; 264 assert(Sub && "null Sub"); 265 Region *Parent = Sub->getParentRegion(); 266 if (TailRegionSet.count(Parent)) { 267 TailSubs.push_back(Sub); 268 It = Subs.erase(It); 269 } else { 270 assert(std::find_if(RegInfos.begin(), RegInfos.end(), 271 [&Parent](const RegInfo& RI) { 272 return Parent == RI.R; 273 }) != RegInfos.end() && 274 "Must be in head"); 275 ++It; 276 } 277 } 278 assert(HoistStopMap.empty() && "MapHoistStops must be empty"); 279 return new CHRScope(TailRegInfos, TailSubs); 280 } 281 282 bool contains(Instruction *I) const { 283 BasicBlock *Parent = I->getParent(); 284 for (const RegInfo &RI : RegInfos) 285 if (RI.R->contains(Parent)) 286 return true; 287 return false; 288 } 289 290 void print(raw_ostream &OS) const; 291 292 SmallVector<RegInfo, 8> RegInfos; // Regions that belong to this scope 293 SmallVector<CHRScope *, 8> Subs; // Subscopes. 294 295 // The instruction at which to insert the CHR conditional branch (and hoist 296 // the dependent condition values). 297 Instruction *BranchInsertPoint; 298 299 // True-biased and false-biased regions (conditional blocks), 300 // respectively. Used only for the outermost scope and includes regions in 301 // subscopes. The rest are unbiased. 302 DenseSet<Region *> TrueBiasedRegions; 303 DenseSet<Region *> FalseBiasedRegions; 304 // Among the biased regions, the regions that get CHRed. 305 SmallVector<RegInfo, 8> CHRRegions; 306 307 // True-biased and false-biased selects, respectively. Used only for the 308 // outermost scope and includes ones in subscopes. 309 DenseSet<SelectInst *> TrueBiasedSelects; 310 DenseSet<SelectInst *> FalseBiasedSelects; 311 312 // Map from one of the above regions to the instructions to stop 313 // hoisting instructions at through use-def chains. 314 HoistStopMapTy HoistStopMap; 315 316 private: 317 CHRScope(SmallVector<RegInfo, 8> &RegInfosIn, 318 SmallVector<CHRScope *, 8> &SubsIn) 319 : RegInfos(RegInfosIn), Subs(SubsIn), BranchInsertPoint(nullptr) {} 320 }; 321 322 class CHR { 323 public: 324 CHR(Function &Fin, BlockFrequencyInfo &BFIin, DominatorTree &DTin, 325 ProfileSummaryInfo &PSIin, RegionInfo &RIin, 326 OptimizationRemarkEmitter &OREin) 327 : F(Fin), BFI(BFIin), DT(DTin), PSI(PSIin), RI(RIin), ORE(OREin) {} 328 329 ~CHR() { 330 for (CHRScope *Scope : Scopes) { 331 delete Scope; 332 } 333 } 334 335 bool run(); 336 337 private: 338 // See the comments in CHR::run() for the high level flow of the algorithm and 339 // what the following functions do. 340 341 void findScopes(SmallVectorImpl<CHRScope *> &Output) { 342 Region *R = RI.getTopLevelRegion(); 343 CHRScope *Scope = findScopes(R, nullptr, nullptr, Output); 344 if (Scope) { 345 Output.push_back(Scope); 346 } 347 } 348 CHRScope *findScopes(Region *R, Region *NextRegion, Region *ParentRegion, 349 SmallVectorImpl<CHRScope *> &Scopes); 350 CHRScope *findScope(Region *R); 351 void checkScopeHoistable(CHRScope *Scope); 352 353 void splitScopes(SmallVectorImpl<CHRScope *> &Input, 354 SmallVectorImpl<CHRScope *> &Output); 355 SmallVector<CHRScope *, 8> splitScope(CHRScope *Scope, 356 CHRScope *Outer, 357 DenseSet<Value *> *OuterConditionValues, 358 Instruction *OuterInsertPoint, 359 SmallVectorImpl<CHRScope *> &Output, 360 DenseSet<Instruction *> &Unhoistables); 361 362 void classifyBiasedScopes(SmallVectorImpl<CHRScope *> &Scopes); 363 void classifyBiasedScopes(CHRScope *Scope, CHRScope *OutermostScope); 364 365 void filterScopes(SmallVectorImpl<CHRScope *> &Input, 366 SmallVectorImpl<CHRScope *> &Output); 367 368 void setCHRRegions(SmallVectorImpl<CHRScope *> &Input, 369 SmallVectorImpl<CHRScope *> &Output); 370 void setCHRRegions(CHRScope *Scope, CHRScope *OutermostScope); 371 372 void sortScopes(SmallVectorImpl<CHRScope *> &Input, 373 SmallVectorImpl<CHRScope *> &Output); 374 375 void transformScopes(SmallVectorImpl<CHRScope *> &CHRScopes); 376 void transformScopes(CHRScope *Scope, DenseSet<PHINode *> &TrivialPHIs); 377 void cloneScopeBlocks(CHRScope *Scope, 378 BasicBlock *PreEntryBlock, 379 BasicBlock *ExitBlock, 380 Region *LastRegion, 381 ValueToValueMapTy &VMap); 382 BranchInst *createMergedBranch(BasicBlock *PreEntryBlock, 383 BasicBlock *EntryBlock, 384 BasicBlock *NewEntryBlock, 385 ValueToValueMapTy &VMap); 386 void fixupBranchesAndSelects(CHRScope *Scope, 387 BasicBlock *PreEntryBlock, 388 BranchInst *MergedBR, 389 uint64_t ProfileCount); 390 void fixupBranch(Region *R, 391 CHRScope *Scope, 392 IRBuilder<> &IRB, 393 Value *&MergedCondition, BranchProbability &CHRBranchBias); 394 void fixupSelect(SelectInst* SI, 395 CHRScope *Scope, 396 IRBuilder<> &IRB, 397 Value *&MergedCondition, BranchProbability &CHRBranchBias); 398 void addToMergedCondition(bool IsTrueBiased, Value *Cond, 399 Instruction *BranchOrSelect, 400 CHRScope *Scope, 401 IRBuilder<> &IRB, 402 Value *&MergedCondition); 403 404 Function &F; 405 BlockFrequencyInfo &BFI; 406 DominatorTree &DT; 407 ProfileSummaryInfo &PSI; 408 RegionInfo &RI; 409 OptimizationRemarkEmitter &ORE; 410 CHRStats Stats; 411 412 // All the true-biased regions in the function 413 DenseSet<Region *> TrueBiasedRegionsGlobal; 414 // All the false-biased regions in the function 415 DenseSet<Region *> FalseBiasedRegionsGlobal; 416 // All the true-biased selects in the function 417 DenseSet<SelectInst *> TrueBiasedSelectsGlobal; 418 // All the false-biased selects in the function 419 DenseSet<SelectInst *> FalseBiasedSelectsGlobal; 420 // A map from biased regions to their branch bias 421 DenseMap<Region *, BranchProbability> BranchBiasMap; 422 // A map from biased selects to their branch bias 423 DenseMap<SelectInst *, BranchProbability> SelectBiasMap; 424 // All the scopes. 425 DenseSet<CHRScope *> Scopes; 426 }; 427 428 } // end anonymous namespace 429 430 static inline 431 raw_ostream LLVM_ATTRIBUTE_UNUSED &operator<<(raw_ostream &OS, 432 const CHRStats &Stats) { 433 Stats.print(OS); 434 return OS; 435 } 436 437 static inline 438 raw_ostream &operator<<(raw_ostream &OS, const CHRScope &Scope) { 439 Scope.print(OS); 440 return OS; 441 } 442 443 static bool shouldApply(Function &F, ProfileSummaryInfo& PSI) { 444 if (ForceCHR) 445 return true; 446 447 if (!CHRModuleList.empty() || !CHRFunctionList.empty()) { 448 if (CHRModules.count(F.getParent()->getName())) 449 return true; 450 return CHRFunctions.count(F.getName()); 451 } 452 453 assert(PSI.hasProfileSummary() && "Empty PSI?"); 454 return PSI.isFunctionEntryHot(&F); 455 } 456 457 static void LLVM_ATTRIBUTE_UNUSED dumpIR(Function &F, const char *Label, 458 CHRStats *Stats) { 459 StringRef FuncName = F.getName(); 460 StringRef ModuleName = F.getParent()->getName(); 461 (void)(FuncName); // Unused in release build. 462 (void)(ModuleName); // Unused in release build. 463 CHR_DEBUG(dbgs() << "CHR IR dump " << Label << " " << ModuleName << " " 464 << FuncName); 465 if (Stats) 466 CHR_DEBUG(dbgs() << " " << *Stats); 467 CHR_DEBUG(dbgs() << "\n"); 468 CHR_DEBUG(F.dump()); 469 } 470 471 void CHRScope::print(raw_ostream &OS) const { 472 assert(RegInfos.size() > 0 && "Empty CHRScope"); 473 OS << "CHRScope["; 474 OS << RegInfos.size() << ", Regions["; 475 for (const RegInfo &RI : RegInfos) { 476 OS << RI.R->getNameStr(); 477 if (RI.HasBranch) 478 OS << " B"; 479 if (RI.Selects.size() > 0) 480 OS << " S" << RI.Selects.size(); 481 OS << ", "; 482 } 483 if (RegInfos[0].R->getParent()) { 484 OS << "], Parent " << RegInfos[0].R->getParent()->getNameStr(); 485 } else { 486 // top level region 487 OS << "]"; 488 } 489 OS << ", Subs["; 490 for (CHRScope *Sub : Subs) { 491 OS << *Sub << ", "; 492 } 493 OS << "]]"; 494 } 495 496 // Return true if the given instruction type can be hoisted by CHR. 497 static bool isHoistableInstructionType(Instruction *I) { 498 return isa<BinaryOperator>(I) || isa<CastInst>(I) || isa<SelectInst>(I) || 499 isa<GetElementPtrInst>(I) || isa<CmpInst>(I) || 500 isa<InsertElementInst>(I) || isa<ExtractElementInst>(I) || 501 isa<ShuffleVectorInst>(I) || isa<ExtractValueInst>(I) || 502 isa<InsertValueInst>(I); 503 } 504 505 // Return true if the given instruction can be hoisted by CHR. 506 static bool isHoistable(Instruction *I, DominatorTree &DT) { 507 if (!isHoistableInstructionType(I)) 508 return false; 509 return isSafeToSpeculativelyExecute(I, nullptr, &DT); 510 } 511 512 // Recursively traverse the use-def chains of the given value and return a set 513 // of the unhoistable base values defined within the scope (excluding the 514 // first-region entry block) or the (hoistable or unhoistable) base values that 515 // are defined outside (including the first-region entry block) of the 516 // scope. The returned set doesn't include constants. 517 static std::set<Value *> getBaseValues( 518 Value *V, DominatorTree &DT, 519 DenseMap<Value *, std::set<Value *>> &Visited) { 520 if (Visited.count(V)) { 521 return Visited[V]; 522 } 523 std::set<Value *> Result; 524 if (auto *I = dyn_cast<Instruction>(V)) { 525 // We don't stop at a block that's not in the Scope because we would miss some 526 // instructions that are based on the same base values if we stop there. 527 if (!isHoistable(I, DT)) { 528 Result.insert(I); 529 Visited.insert(std::make_pair(V, Result)); 530 return Result; 531 } 532 // I is hoistable above the Scope. 533 for (Value *Op : I->operands()) { 534 std::set<Value *> OpResult = getBaseValues(Op, DT, Visited); 535 Result.insert(OpResult.begin(), OpResult.end()); 536 } 537 Visited.insert(std::make_pair(V, Result)); 538 return Result; 539 } 540 if (isa<Argument>(V)) { 541 Result.insert(V); 542 Visited.insert(std::make_pair(V, Result)); 543 return Result; 544 } 545 // We don't include others like constants because those won't lead to any 546 // chance of folding of conditions (eg two bit checks merged into one check) 547 // after CHR. 548 Visited.insert(std::make_pair(V, Result)); 549 return Result; // empty 550 } 551 552 // Return true if V is already hoisted or can be hoisted (along with its 553 // operands) above the insert point. When it returns true and HoistStops is 554 // non-null, the instructions to stop hoisting at through the use-def chains are 555 // inserted into HoistStops. 556 static bool 557 checkHoistValue(Value *V, Instruction *InsertPoint, DominatorTree &DT, 558 DenseSet<Instruction *> &Unhoistables, 559 DenseSet<Instruction *> *HoistStops, 560 DenseMap<Instruction *, bool> &Visited) { 561 assert(InsertPoint && "Null InsertPoint"); 562 if (auto *I = dyn_cast<Instruction>(V)) { 563 if (Visited.count(I)) { 564 return Visited[I]; 565 } 566 assert(DT.getNode(I->getParent()) && "DT must contain I's parent block"); 567 assert(DT.getNode(InsertPoint->getParent()) && "DT must contain Destination"); 568 if (Unhoistables.count(I)) { 569 // Don't hoist if they are not to be hoisted. 570 Visited[I] = false; 571 return false; 572 } 573 if (DT.dominates(I, InsertPoint)) { 574 // We are already above the insert point. Stop here. 575 if (HoistStops) 576 HoistStops->insert(I); 577 Visited[I] = true; 578 return true; 579 } 580 // We aren't not above the insert point, check if we can hoist it above the 581 // insert point. 582 if (isHoistable(I, DT)) { 583 // Check operands first. 584 DenseSet<Instruction *> OpsHoistStops; 585 bool AllOpsHoisted = true; 586 for (Value *Op : I->operands()) { 587 if (!checkHoistValue(Op, InsertPoint, DT, Unhoistables, &OpsHoistStops, 588 Visited)) { 589 AllOpsHoisted = false; 590 break; 591 } 592 } 593 if (AllOpsHoisted) { 594 CHR_DEBUG(dbgs() << "checkHoistValue " << *I << "\n"); 595 if (HoistStops) 596 HoistStops->insert(OpsHoistStops.begin(), OpsHoistStops.end()); 597 Visited[I] = true; 598 return true; 599 } 600 } 601 Visited[I] = false; 602 return false; 603 } 604 // Non-instructions are considered hoistable. 605 return true; 606 } 607 608 // Returns true and sets the true probability and false probability of an 609 // MD_prof metadata if it's well-formed. 610 static bool checkMDProf(MDNode *MD, BranchProbability &TrueProb, 611 BranchProbability &FalseProb) { 612 if (!MD) return false; 613 MDString *MDName = cast<MDString>(MD->getOperand(0)); 614 if (MDName->getString() != "branch_weights" || 615 MD->getNumOperands() != 3) 616 return false; 617 ConstantInt *TrueWeight = mdconst::extract<ConstantInt>(MD->getOperand(1)); 618 ConstantInt *FalseWeight = mdconst::extract<ConstantInt>(MD->getOperand(2)); 619 if (!TrueWeight || !FalseWeight) 620 return false; 621 uint64_t TrueWt = TrueWeight->getValue().getZExtValue(); 622 uint64_t FalseWt = FalseWeight->getValue().getZExtValue(); 623 uint64_t SumWt = TrueWt + FalseWt; 624 625 assert(SumWt >= TrueWt && SumWt >= FalseWt && 626 "Overflow calculating branch probabilities."); 627 628 TrueProb = BranchProbability::getBranchProbability(TrueWt, SumWt); 629 FalseProb = BranchProbability::getBranchProbability(FalseWt, SumWt); 630 return true; 631 } 632 633 static BranchProbability getCHRBiasThreshold() { 634 return BranchProbability::getBranchProbability( 635 static_cast<uint64_t>(CHRBiasThreshold * 1000000), 1000000); 636 } 637 638 // A helper for CheckBiasedBranch and CheckBiasedSelect. If TrueProb >= 639 // CHRBiasThreshold, put Key into TrueSet and return true. If FalseProb >= 640 // CHRBiasThreshold, put Key into FalseSet and return true. Otherwise, return 641 // false. 642 template <typename K, typename S, typename M> 643 static bool checkBias(K *Key, BranchProbability TrueProb, 644 BranchProbability FalseProb, S &TrueSet, S &FalseSet, 645 M &BiasMap) { 646 BranchProbability Threshold = getCHRBiasThreshold(); 647 if (TrueProb >= Threshold) { 648 TrueSet.insert(Key); 649 BiasMap[Key] = TrueProb; 650 return true; 651 } else if (FalseProb >= Threshold) { 652 FalseSet.insert(Key); 653 BiasMap[Key] = FalseProb; 654 return true; 655 } 656 return false; 657 } 658 659 // Returns true and insert a region into the right biased set and the map if the 660 // branch of the region is biased. 661 static bool checkBiasedBranch(BranchInst *BI, Region *R, 662 DenseSet<Region *> &TrueBiasedRegionsGlobal, 663 DenseSet<Region *> &FalseBiasedRegionsGlobal, 664 DenseMap<Region *, BranchProbability> &BranchBiasMap) { 665 if (!BI->isConditional()) 666 return false; 667 BranchProbability ThenProb, ElseProb; 668 if (!checkMDProf(BI->getMetadata(LLVMContext::MD_prof), 669 ThenProb, ElseProb)) 670 return false; 671 BasicBlock *IfThen = BI->getSuccessor(0); 672 BasicBlock *IfElse = BI->getSuccessor(1); 673 assert((IfThen == R->getExit() || IfElse == R->getExit()) && 674 IfThen != IfElse && 675 "Invariant from findScopes"); 676 if (IfThen == R->getExit()) { 677 // Swap them so that IfThen/ThenProb means going into the conditional code 678 // and IfElse/ElseProb means skipping it. 679 std::swap(IfThen, IfElse); 680 std::swap(ThenProb, ElseProb); 681 } 682 CHR_DEBUG(dbgs() << "BI " << *BI << " "); 683 CHR_DEBUG(dbgs() << "ThenProb " << ThenProb << " "); 684 CHR_DEBUG(dbgs() << "ElseProb " << ElseProb << "\n"); 685 return checkBias(R, ThenProb, ElseProb, 686 TrueBiasedRegionsGlobal, FalseBiasedRegionsGlobal, 687 BranchBiasMap); 688 } 689 690 // Returns true and insert a select into the right biased set and the map if the 691 // select is biased. 692 static bool checkBiasedSelect( 693 SelectInst *SI, Region *R, 694 DenseSet<SelectInst *> &TrueBiasedSelectsGlobal, 695 DenseSet<SelectInst *> &FalseBiasedSelectsGlobal, 696 DenseMap<SelectInst *, BranchProbability> &SelectBiasMap) { 697 BranchProbability TrueProb, FalseProb; 698 if (!checkMDProf(SI->getMetadata(LLVMContext::MD_prof), 699 TrueProb, FalseProb)) 700 return false; 701 CHR_DEBUG(dbgs() << "SI " << *SI << " "); 702 CHR_DEBUG(dbgs() << "TrueProb " << TrueProb << " "); 703 CHR_DEBUG(dbgs() << "FalseProb " << FalseProb << "\n"); 704 return checkBias(SI, TrueProb, FalseProb, 705 TrueBiasedSelectsGlobal, FalseBiasedSelectsGlobal, 706 SelectBiasMap); 707 } 708 709 // Returns the instruction at which to hoist the dependent condition values and 710 // insert the CHR branch for a region. This is the terminator branch in the 711 // entry block or the first select in the entry block, if any. 712 static Instruction* getBranchInsertPoint(RegInfo &RI) { 713 Region *R = RI.R; 714 BasicBlock *EntryBB = R->getEntry(); 715 // The hoist point is by default the terminator of the entry block, which is 716 // the same as the branch instruction if RI.HasBranch is true. 717 Instruction *HoistPoint = EntryBB->getTerminator(); 718 for (SelectInst *SI : RI.Selects) { 719 if (SI->getParent() == EntryBB) { 720 // Pick the first select in Selects in the entry block. Note Selects is 721 // sorted in the instruction order within a block (asserted below). 722 HoistPoint = SI; 723 break; 724 } 725 } 726 assert(HoistPoint && "Null HoistPoint"); 727 #ifndef NDEBUG 728 // Check that HoistPoint is the first one in Selects in the entry block, 729 // if any. 730 DenseSet<Instruction *> EntryBlockSelectSet; 731 for (SelectInst *SI : RI.Selects) { 732 if (SI->getParent() == EntryBB) { 733 EntryBlockSelectSet.insert(SI); 734 } 735 } 736 for (Instruction &I : *EntryBB) { 737 if (EntryBlockSelectSet.count(&I) > 0) { 738 assert(&I == HoistPoint && 739 "HoistPoint must be the first one in Selects"); 740 break; 741 } 742 } 743 #endif 744 return HoistPoint; 745 } 746 747 // Find a CHR scope in the given region. 748 CHRScope * CHR::findScope(Region *R) { 749 CHRScope *Result = nullptr; 750 BasicBlock *Entry = R->getEntry(); 751 BasicBlock *Exit = R->getExit(); // null if top level. 752 assert(Entry && "Entry must not be null"); 753 assert((Exit == nullptr) == (R->isTopLevelRegion()) && 754 "Only top level region has a null exit"); 755 if (Entry) 756 CHR_DEBUG(dbgs() << "Entry " << Entry->getName() << "\n"); 757 else 758 CHR_DEBUG(dbgs() << "Entry null\n"); 759 if (Exit) 760 CHR_DEBUG(dbgs() << "Exit " << Exit->getName() << "\n"); 761 else 762 CHR_DEBUG(dbgs() << "Exit null\n"); 763 // Exclude cases where Entry is part of a subregion (hence it doesn't belong 764 // to this region). 765 bool EntryInSubregion = RI.getRegionFor(Entry) != R; 766 if (EntryInSubregion) 767 return nullptr; 768 // Exclude loops 769 for (BasicBlock *Pred : predecessors(Entry)) 770 if (R->contains(Pred)) 771 return nullptr; 772 if (Exit) { 773 // Try to find an if-then block (check if R is an if-then). 774 // if (cond) { 775 // ... 776 // } 777 auto *BI = dyn_cast<BranchInst>(Entry->getTerminator()); 778 if (BI) 779 CHR_DEBUG(dbgs() << "BI.isConditional " << BI->isConditional() << "\n"); 780 else 781 CHR_DEBUG(dbgs() << "BI null\n"); 782 if (BI && BI->isConditional()) { 783 BasicBlock *S0 = BI->getSuccessor(0); 784 BasicBlock *S1 = BI->getSuccessor(1); 785 CHR_DEBUG(dbgs() << "S0 " << S0->getName() << "\n"); 786 CHR_DEBUG(dbgs() << "S1 " << S1->getName() << "\n"); 787 if (S0 != S1 && (S0 == Exit || S1 == Exit)) { 788 RegInfo RI(R); 789 RI.HasBranch = checkBiasedBranch( 790 BI, R, TrueBiasedRegionsGlobal, FalseBiasedRegionsGlobal, 791 BranchBiasMap); 792 Result = new CHRScope(RI); 793 Scopes.insert(Result); 794 CHR_DEBUG(dbgs() << "Found a region with a branch\n"); 795 ++Stats.NumBranches; 796 if (!RI.HasBranch) { 797 ORE.emit([&]() { 798 return OptimizationRemarkMissed(DEBUG_TYPE, "BranchNotBiased", BI) 799 << "Branch not biased"; 800 }); 801 } 802 } 803 } 804 } 805 { 806 // Try to look for selects in the direct child blocks (as opposed to in 807 // subregions) of R. 808 // ... 809 // if (..) { // Some subregion 810 // ... 811 // } 812 // if (..) { // Some subregion 813 // ... 814 // } 815 // ... 816 // a = cond ? b : c; 817 // ... 818 SmallVector<SelectInst *, 8> Selects; 819 for (RegionNode *E : R->elements()) { 820 if (E->isSubRegion()) 821 continue; 822 // This returns the basic block of E if E is a direct child of R (not a 823 // subregion.) 824 BasicBlock *BB = E->getEntry(); 825 // Need to push in the order to make it easier to find the first Select 826 // later. 827 for (Instruction &I : *BB) { 828 if (auto *SI = dyn_cast<SelectInst>(&I)) { 829 Selects.push_back(SI); 830 ++Stats.NumBranches; 831 } 832 } 833 } 834 if (Selects.size() > 0) { 835 auto AddSelects = [&](RegInfo &RI) { 836 for (auto *SI : Selects) 837 if (checkBiasedSelect(SI, RI.R, 838 TrueBiasedSelectsGlobal, 839 FalseBiasedSelectsGlobal, 840 SelectBiasMap)) 841 RI.Selects.push_back(SI); 842 else 843 ORE.emit([&]() { 844 return OptimizationRemarkMissed(DEBUG_TYPE, "SelectNotBiased", SI) 845 << "Select not biased"; 846 }); 847 }; 848 if (!Result) { 849 CHR_DEBUG(dbgs() << "Found a select-only region\n"); 850 RegInfo RI(R); 851 AddSelects(RI); 852 Result = new CHRScope(RI); 853 Scopes.insert(Result); 854 } else { 855 CHR_DEBUG(dbgs() << "Found select(s) in a region with a branch\n"); 856 AddSelects(Result->RegInfos[0]); 857 } 858 } 859 } 860 861 if (Result) { 862 checkScopeHoistable(Result); 863 } 864 return Result; 865 } 866 867 // Check that any of the branch and the selects in the region could be 868 // hoisted above the the CHR branch insert point (the most dominating of 869 // them, either the branch (at the end of the first block) or the first 870 // select in the first block). If the branch can't be hoisted, drop the 871 // selects in the first blocks. 872 // 873 // For example, for the following scope/region with selects, we want to insert 874 // the merged branch right before the first select in the first/entry block by 875 // hoisting c1, c2, c3, and c4. 876 // 877 // // Branch insert point here. 878 // a = c1 ? b : c; // Select 1 879 // d = c2 ? e : f; // Select 2 880 // if (c3) { // Branch 881 // ... 882 // c4 = foo() // A call. 883 // g = c4 ? h : i; // Select 3 884 // } 885 // 886 // But suppose we can't hoist c4 because it's dependent on the preceding 887 // call. Then, we drop Select 3. Furthermore, if we can't hoist c2, we also drop 888 // Select 2. If we can't hoist c3, we drop Selects 1 & 2. 889 void CHR::checkScopeHoistable(CHRScope *Scope) { 890 RegInfo &RI = Scope->RegInfos[0]; 891 Region *R = RI.R; 892 BasicBlock *EntryBB = R->getEntry(); 893 auto *Branch = RI.HasBranch ? 894 cast<BranchInst>(EntryBB->getTerminator()) : nullptr; 895 SmallVector<SelectInst *, 8> &Selects = RI.Selects; 896 if (RI.HasBranch || !Selects.empty()) { 897 Instruction *InsertPoint = getBranchInsertPoint(RI); 898 CHR_DEBUG(dbgs() << "InsertPoint " << *InsertPoint << "\n"); 899 // Avoid a data dependence from a select or a branch to a(nother) 900 // select. Note no instruction can't data-depend on a branch (a branch 901 // instruction doesn't produce a value). 902 DenseSet<Instruction *> Unhoistables; 903 // Initialize Unhoistables with the selects. 904 for (SelectInst *SI : Selects) { 905 Unhoistables.insert(SI); 906 } 907 // Remove Selects that can't be hoisted. 908 for (auto it = Selects.begin(); it != Selects.end(); ) { 909 SelectInst *SI = *it; 910 if (SI == InsertPoint) { 911 ++it; 912 continue; 913 } 914 DenseMap<Instruction *, bool> Visited; 915 bool IsHoistable = checkHoistValue(SI->getCondition(), InsertPoint, 916 DT, Unhoistables, nullptr, Visited); 917 if (!IsHoistable) { 918 CHR_DEBUG(dbgs() << "Dropping select " << *SI << "\n"); 919 ORE.emit([&]() { 920 return OptimizationRemarkMissed(DEBUG_TYPE, 921 "DropUnhoistableSelect", SI) 922 << "Dropped unhoistable select"; 923 }); 924 it = Selects.erase(it); 925 // Since we are dropping the select here, we also drop it from 926 // Unhoistables. 927 Unhoistables.erase(SI); 928 } else 929 ++it; 930 } 931 // Update InsertPoint after potentially removing selects. 932 InsertPoint = getBranchInsertPoint(RI); 933 CHR_DEBUG(dbgs() << "InsertPoint " << *InsertPoint << "\n"); 934 if (RI.HasBranch && InsertPoint != Branch) { 935 DenseMap<Instruction *, bool> Visited; 936 bool IsHoistable = checkHoistValue(Branch->getCondition(), InsertPoint, 937 DT, Unhoistables, nullptr, Visited); 938 if (!IsHoistable) { 939 // If the branch isn't hoistable, drop the selects in the entry 940 // block, preferring the branch, which makes the branch the hoist 941 // point. 942 assert(InsertPoint != Branch && "Branch must not be the hoist point"); 943 CHR_DEBUG(dbgs() << "Dropping selects in entry block \n"); 944 CHR_DEBUG( 945 for (SelectInst *SI : Selects) { 946 dbgs() << "SI " << *SI << "\n"; 947 }); 948 for (SelectInst *SI : Selects) { 949 ORE.emit([&]() { 950 return OptimizationRemarkMissed(DEBUG_TYPE, 951 "DropSelectUnhoistableBranch", SI) 952 << "Dropped select due to unhoistable branch"; 953 }); 954 } 955 Selects.erase(std::remove_if(Selects.begin(), Selects.end(), 956 [EntryBB](SelectInst *SI) { 957 return SI->getParent() == EntryBB; 958 }), Selects.end()); 959 Unhoistables.clear(); 960 InsertPoint = Branch; 961 } 962 } 963 CHR_DEBUG(dbgs() << "InsertPoint " << *InsertPoint << "\n"); 964 #ifndef NDEBUG 965 if (RI.HasBranch) { 966 assert(!DT.dominates(Branch, InsertPoint) && 967 "Branch can't be already above the hoist point"); 968 DenseMap<Instruction *, bool> Visited; 969 assert(checkHoistValue(Branch->getCondition(), InsertPoint, 970 DT, Unhoistables, nullptr, Visited) && 971 "checkHoistValue for branch"); 972 } 973 for (auto *SI : Selects) { 974 assert(!DT.dominates(SI, InsertPoint) && 975 "SI can't be already above the hoist point"); 976 DenseMap<Instruction *, bool> Visited; 977 assert(checkHoistValue(SI->getCondition(), InsertPoint, DT, 978 Unhoistables, nullptr, Visited) && 979 "checkHoistValue for selects"); 980 } 981 CHR_DEBUG(dbgs() << "Result\n"); 982 if (RI.HasBranch) { 983 CHR_DEBUG(dbgs() << "BI " << *Branch << "\n"); 984 } 985 for (auto *SI : Selects) { 986 CHR_DEBUG(dbgs() << "SI " << *SI << "\n"); 987 } 988 #endif 989 } 990 } 991 992 // Traverse the region tree, find all nested scopes and merge them if possible. 993 CHRScope * CHR::findScopes(Region *R, Region *NextRegion, Region *ParentRegion, 994 SmallVectorImpl<CHRScope *> &Scopes) { 995 CHR_DEBUG(dbgs() << "findScopes " << R->getNameStr() << "\n"); 996 CHRScope *Result = findScope(R); 997 // Visit subscopes. 998 CHRScope *ConsecutiveSubscope = nullptr; 999 SmallVector<CHRScope *, 8> Subscopes; 1000 for (auto It = R->begin(); It != R->end(); ++It) { 1001 const std::unique_ptr<Region> &SubR = *It; 1002 auto NextIt = std::next(It); 1003 Region *NextSubR = NextIt != R->end() ? NextIt->get() : nullptr; 1004 CHR_DEBUG(dbgs() << "Looking at subregion " << SubR.get()->getNameStr() 1005 << "\n"); 1006 CHRScope *SubCHRScope = findScopes(SubR.get(), NextSubR, R, Scopes); 1007 if (SubCHRScope) { 1008 CHR_DEBUG(dbgs() << "Subregion Scope " << *SubCHRScope << "\n"); 1009 } else { 1010 CHR_DEBUG(dbgs() << "Subregion Scope null\n"); 1011 } 1012 if (SubCHRScope) { 1013 if (!ConsecutiveSubscope) 1014 ConsecutiveSubscope = SubCHRScope; 1015 else if (!ConsecutiveSubscope->appendable(SubCHRScope)) { 1016 Subscopes.push_back(ConsecutiveSubscope); 1017 ConsecutiveSubscope = SubCHRScope; 1018 } else 1019 ConsecutiveSubscope->append(SubCHRScope); 1020 } else { 1021 if (ConsecutiveSubscope) { 1022 Subscopes.push_back(ConsecutiveSubscope); 1023 } 1024 ConsecutiveSubscope = nullptr; 1025 } 1026 } 1027 if (ConsecutiveSubscope) { 1028 Subscopes.push_back(ConsecutiveSubscope); 1029 } 1030 for (CHRScope *Sub : Subscopes) { 1031 if (Result) { 1032 // Combine it with the parent. 1033 Result->addSub(Sub); 1034 } else { 1035 // Push Subscopes as they won't be combined with the parent. 1036 Scopes.push_back(Sub); 1037 } 1038 } 1039 return Result; 1040 } 1041 1042 static DenseSet<Value *> getCHRConditionValuesForRegion(RegInfo &RI) { 1043 DenseSet<Value *> ConditionValues; 1044 if (RI.HasBranch) { 1045 auto *BI = cast<BranchInst>(RI.R->getEntry()->getTerminator()); 1046 ConditionValues.insert(BI->getCondition()); 1047 } 1048 for (SelectInst *SI : RI.Selects) { 1049 ConditionValues.insert(SI->getCondition()); 1050 } 1051 return ConditionValues; 1052 } 1053 1054 1055 // Determine whether to split a scope depending on the sets of the branch 1056 // condition values of the previous region and the current region. We split 1057 // (return true) it if 1) the condition values of the inner/lower scope can't be 1058 // hoisted up to the outer/upper scope, or 2) the two sets of the condition 1059 // values have an empty intersection (because the combined branch conditions 1060 // won't probably lead to a simpler combined condition). 1061 static bool shouldSplit(Instruction *InsertPoint, 1062 DenseSet<Value *> &PrevConditionValues, 1063 DenseSet<Value *> &ConditionValues, 1064 DominatorTree &DT, 1065 DenseSet<Instruction *> &Unhoistables) { 1066 assert(InsertPoint && "Null InsertPoint"); 1067 CHR_DEBUG( 1068 dbgs() << "shouldSplit " << *InsertPoint << " PrevConditionValues "; 1069 for (Value *V : PrevConditionValues) { 1070 dbgs() << *V << ", "; 1071 } 1072 dbgs() << " ConditionValues "; 1073 for (Value *V : ConditionValues) { 1074 dbgs() << *V << ", "; 1075 } 1076 dbgs() << "\n"); 1077 // If any of Bases isn't hoistable to the hoist point, split. 1078 for (Value *V : ConditionValues) { 1079 DenseMap<Instruction *, bool> Visited; 1080 if (!checkHoistValue(V, InsertPoint, DT, Unhoistables, nullptr, Visited)) { 1081 CHR_DEBUG(dbgs() << "Split. checkHoistValue false " << *V << "\n"); 1082 return true; // Not hoistable, split. 1083 } 1084 } 1085 // If PrevConditionValues or ConditionValues is empty, don't split to avoid 1086 // unnecessary splits at scopes with no branch/selects. If 1087 // PrevConditionValues and ConditionValues don't intersect at all, split. 1088 if (!PrevConditionValues.empty() && !ConditionValues.empty()) { 1089 // Use std::set as DenseSet doesn't work with set_intersection. 1090 std::set<Value *> PrevBases, Bases; 1091 DenseMap<Value *, std::set<Value *>> Visited; 1092 for (Value *V : PrevConditionValues) { 1093 std::set<Value *> BaseValues = getBaseValues(V, DT, Visited); 1094 PrevBases.insert(BaseValues.begin(), BaseValues.end()); 1095 } 1096 for (Value *V : ConditionValues) { 1097 std::set<Value *> BaseValues = getBaseValues(V, DT, Visited); 1098 Bases.insert(BaseValues.begin(), BaseValues.end()); 1099 } 1100 CHR_DEBUG( 1101 dbgs() << "PrevBases "; 1102 for (Value *V : PrevBases) { 1103 dbgs() << *V << ", "; 1104 } 1105 dbgs() << " Bases "; 1106 for (Value *V : Bases) { 1107 dbgs() << *V << ", "; 1108 } 1109 dbgs() << "\n"); 1110 std::set<Value *> Intersection; 1111 std::set_intersection(PrevBases.begin(), PrevBases.end(), 1112 Bases.begin(), Bases.end(), 1113 std::inserter(Intersection, Intersection.begin())); 1114 if (Intersection.empty()) { 1115 // Empty intersection, split. 1116 CHR_DEBUG(dbgs() << "Split. Intersection empty\n"); 1117 return true; 1118 } 1119 } 1120 CHR_DEBUG(dbgs() << "No split\n"); 1121 return false; // Don't split. 1122 } 1123 1124 static void getSelectsInScope(CHRScope *Scope, 1125 DenseSet<Instruction *> &Output) { 1126 for (RegInfo &RI : Scope->RegInfos) 1127 for (SelectInst *SI : RI.Selects) 1128 Output.insert(SI); 1129 for (CHRScope *Sub : Scope->Subs) 1130 getSelectsInScope(Sub, Output); 1131 } 1132 1133 void CHR::splitScopes(SmallVectorImpl<CHRScope *> &Input, 1134 SmallVectorImpl<CHRScope *> &Output) { 1135 for (CHRScope *Scope : Input) { 1136 assert(!Scope->BranchInsertPoint && 1137 "BranchInsertPoint must not be set"); 1138 DenseSet<Instruction *> Unhoistables; 1139 getSelectsInScope(Scope, Unhoistables); 1140 splitScope(Scope, nullptr, nullptr, nullptr, Output, Unhoistables); 1141 } 1142 #ifndef NDEBUG 1143 for (CHRScope *Scope : Output) { 1144 assert(Scope->BranchInsertPoint && "BranchInsertPoint must be set"); 1145 } 1146 #endif 1147 } 1148 1149 SmallVector<CHRScope *, 8> CHR::splitScope( 1150 CHRScope *Scope, 1151 CHRScope *Outer, 1152 DenseSet<Value *> *OuterConditionValues, 1153 Instruction *OuterInsertPoint, 1154 SmallVectorImpl<CHRScope *> &Output, 1155 DenseSet<Instruction *> &Unhoistables) { 1156 if (Outer) { 1157 assert(OuterConditionValues && "Null OuterConditionValues"); 1158 assert(OuterInsertPoint && "Null OuterInsertPoint"); 1159 } 1160 bool PrevSplitFromOuter = true; 1161 DenseSet<Value *> PrevConditionValues; 1162 Instruction *PrevInsertPoint = nullptr; 1163 SmallVector<CHRScope *, 8> Splits; 1164 SmallVector<bool, 8> SplitsSplitFromOuter; 1165 SmallVector<DenseSet<Value *>, 8> SplitsConditionValues; 1166 SmallVector<Instruction *, 8> SplitsInsertPoints; 1167 SmallVector<RegInfo, 8> RegInfos(Scope->RegInfos); // Copy 1168 for (RegInfo &RI : RegInfos) { 1169 Instruction *InsertPoint = getBranchInsertPoint(RI); 1170 DenseSet<Value *> ConditionValues = getCHRConditionValuesForRegion(RI); 1171 CHR_DEBUG( 1172 dbgs() << "ConditionValues "; 1173 for (Value *V : ConditionValues) { 1174 dbgs() << *V << ", "; 1175 } 1176 dbgs() << "\n"); 1177 if (RI.R == RegInfos[0].R) { 1178 // First iteration. Check to see if we should split from the outer. 1179 if (Outer) { 1180 CHR_DEBUG(dbgs() << "Outer " << *Outer << "\n"); 1181 CHR_DEBUG(dbgs() << "Should split from outer at " 1182 << RI.R->getNameStr() << "\n"); 1183 if (shouldSplit(OuterInsertPoint, *OuterConditionValues, 1184 ConditionValues, DT, Unhoistables)) { 1185 PrevConditionValues = ConditionValues; 1186 PrevInsertPoint = InsertPoint; 1187 ORE.emit([&]() { 1188 return OptimizationRemarkMissed(DEBUG_TYPE, 1189 "SplitScopeFromOuter", 1190 RI.R->getEntry()->getTerminator()) 1191 << "Split scope from outer due to unhoistable branch/select " 1192 << "and/or lack of common condition values"; 1193 }); 1194 } else { 1195 // Not splitting from the outer. Use the outer bases and insert 1196 // point. Union the bases. 1197 PrevSplitFromOuter = false; 1198 PrevConditionValues = *OuterConditionValues; 1199 PrevConditionValues.insert(ConditionValues.begin(), 1200 ConditionValues.end()); 1201 PrevInsertPoint = OuterInsertPoint; 1202 } 1203 } else { 1204 CHR_DEBUG(dbgs() << "Outer null\n"); 1205 PrevConditionValues = ConditionValues; 1206 PrevInsertPoint = InsertPoint; 1207 } 1208 } else { 1209 CHR_DEBUG(dbgs() << "Should split from prev at " 1210 << RI.R->getNameStr() << "\n"); 1211 if (shouldSplit(PrevInsertPoint, PrevConditionValues, ConditionValues, 1212 DT, Unhoistables)) { 1213 CHRScope *Tail = Scope->split(RI.R); 1214 Scopes.insert(Tail); 1215 Splits.push_back(Scope); 1216 SplitsSplitFromOuter.push_back(PrevSplitFromOuter); 1217 SplitsConditionValues.push_back(PrevConditionValues); 1218 SplitsInsertPoints.push_back(PrevInsertPoint); 1219 Scope = Tail; 1220 PrevConditionValues = ConditionValues; 1221 PrevInsertPoint = InsertPoint; 1222 PrevSplitFromOuter = true; 1223 ORE.emit([&]() { 1224 return OptimizationRemarkMissed(DEBUG_TYPE, 1225 "SplitScopeFromPrev", 1226 RI.R->getEntry()->getTerminator()) 1227 << "Split scope from previous due to unhoistable branch/select " 1228 << "and/or lack of common condition values"; 1229 }); 1230 } else { 1231 // Not splitting. Union the bases. Keep the hoist point. 1232 PrevConditionValues.insert(ConditionValues.begin(), ConditionValues.end()); 1233 } 1234 } 1235 } 1236 Splits.push_back(Scope); 1237 SplitsSplitFromOuter.push_back(PrevSplitFromOuter); 1238 SplitsConditionValues.push_back(PrevConditionValues); 1239 assert(PrevInsertPoint && "Null PrevInsertPoint"); 1240 SplitsInsertPoints.push_back(PrevInsertPoint); 1241 assert(Splits.size() == SplitsConditionValues.size() && 1242 Splits.size() == SplitsSplitFromOuter.size() && 1243 Splits.size() == SplitsInsertPoints.size() && "Mismatching sizes"); 1244 for (size_t I = 0; I < Splits.size(); ++I) { 1245 CHRScope *Split = Splits[I]; 1246 DenseSet<Value *> &SplitConditionValues = SplitsConditionValues[I]; 1247 Instruction *SplitInsertPoint = SplitsInsertPoints[I]; 1248 SmallVector<CHRScope *, 8> NewSubs; 1249 DenseSet<Instruction *> SplitUnhoistables; 1250 getSelectsInScope(Split, SplitUnhoistables); 1251 for (CHRScope *Sub : Split->Subs) { 1252 SmallVector<CHRScope *, 8> SubSplits = splitScope( 1253 Sub, Split, &SplitConditionValues, SplitInsertPoint, Output, 1254 SplitUnhoistables); 1255 NewSubs.insert(NewSubs.end(), SubSplits.begin(), SubSplits.end()); 1256 } 1257 Split->Subs = NewSubs; 1258 } 1259 SmallVector<CHRScope *, 8> Result; 1260 for (size_t I = 0; I < Splits.size(); ++I) { 1261 CHRScope *Split = Splits[I]; 1262 if (SplitsSplitFromOuter[I]) { 1263 // Split from the outer. 1264 Output.push_back(Split); 1265 Split->BranchInsertPoint = SplitsInsertPoints[I]; 1266 CHR_DEBUG(dbgs() << "BranchInsertPoint " << *SplitsInsertPoints[I] 1267 << "\n"); 1268 } else { 1269 // Connected to the outer. 1270 Result.push_back(Split); 1271 } 1272 } 1273 if (!Outer) 1274 assert(Result.empty() && 1275 "If no outer (top-level), must return no nested ones"); 1276 return Result; 1277 } 1278 1279 void CHR::classifyBiasedScopes(SmallVectorImpl<CHRScope *> &Scopes) { 1280 for (CHRScope *Scope : Scopes) { 1281 assert(Scope->TrueBiasedRegions.empty() && Scope->FalseBiasedRegions.empty() && "Empty"); 1282 classifyBiasedScopes(Scope, Scope); 1283 CHR_DEBUG( 1284 dbgs() << "classifyBiasedScopes " << *Scope << "\n"; 1285 dbgs() << "TrueBiasedRegions "; 1286 for (Region *R : Scope->TrueBiasedRegions) { 1287 dbgs() << R->getNameStr() << ", "; 1288 } 1289 dbgs() << "\n"; 1290 dbgs() << "FalseBiasedRegions "; 1291 for (Region *R : Scope->FalseBiasedRegions) { 1292 dbgs() << R->getNameStr() << ", "; 1293 } 1294 dbgs() << "\n"; 1295 dbgs() << "TrueBiasedSelects "; 1296 for (SelectInst *SI : Scope->TrueBiasedSelects) { 1297 dbgs() << *SI << ", "; 1298 } 1299 dbgs() << "\n"; 1300 dbgs() << "FalseBiasedSelects "; 1301 for (SelectInst *SI : Scope->FalseBiasedSelects) { 1302 dbgs() << *SI << ", "; 1303 } 1304 dbgs() << "\n";); 1305 } 1306 } 1307 1308 void CHR::classifyBiasedScopes(CHRScope *Scope, CHRScope *OutermostScope) { 1309 for (RegInfo &RI : Scope->RegInfos) { 1310 if (RI.HasBranch) { 1311 Region *R = RI.R; 1312 if (TrueBiasedRegionsGlobal.count(R) > 0) 1313 OutermostScope->TrueBiasedRegions.insert(R); 1314 else if (FalseBiasedRegionsGlobal.count(R) > 0) 1315 OutermostScope->FalseBiasedRegions.insert(R); 1316 else 1317 llvm_unreachable("Must be biased"); 1318 } 1319 for (SelectInst *SI : RI.Selects) { 1320 if (TrueBiasedSelectsGlobal.count(SI) > 0) 1321 OutermostScope->TrueBiasedSelects.insert(SI); 1322 else if (FalseBiasedSelectsGlobal.count(SI) > 0) 1323 OutermostScope->FalseBiasedSelects.insert(SI); 1324 else 1325 llvm_unreachable("Must be biased"); 1326 } 1327 } 1328 for (CHRScope *Sub : Scope->Subs) { 1329 classifyBiasedScopes(Sub, OutermostScope); 1330 } 1331 } 1332 1333 static bool hasAtLeastTwoBiasedBranches(CHRScope *Scope) { 1334 unsigned NumBiased = Scope->TrueBiasedRegions.size() + 1335 Scope->FalseBiasedRegions.size() + 1336 Scope->TrueBiasedSelects.size() + 1337 Scope->FalseBiasedSelects.size(); 1338 return NumBiased >= CHRMergeThreshold; 1339 } 1340 1341 void CHR::filterScopes(SmallVectorImpl<CHRScope *> &Input, 1342 SmallVectorImpl<CHRScope *> &Output) { 1343 for (CHRScope *Scope : Input) { 1344 // Filter out the ones with only one region and no subs. 1345 if (!hasAtLeastTwoBiasedBranches(Scope)) { 1346 CHR_DEBUG(dbgs() << "Filtered out by biased branches truthy-regions " 1347 << Scope->TrueBiasedRegions.size() 1348 << " falsy-regions " << Scope->FalseBiasedRegions.size() 1349 << " true-selects " << Scope->TrueBiasedSelects.size() 1350 << " false-selects " << Scope->FalseBiasedSelects.size() << "\n"); 1351 ORE.emit([&]() { 1352 return OptimizationRemarkMissed( 1353 DEBUG_TYPE, 1354 "DropScopeWithOneBranchOrSelect", 1355 Scope->RegInfos[0].R->getEntry()->getTerminator()) 1356 << "Drop scope with < " 1357 << ore::NV("CHRMergeThreshold", CHRMergeThreshold) 1358 << " biased branch(es) or select(s)"; 1359 }); 1360 continue; 1361 } 1362 Output.push_back(Scope); 1363 } 1364 } 1365 1366 void CHR::setCHRRegions(SmallVectorImpl<CHRScope *> &Input, 1367 SmallVectorImpl<CHRScope *> &Output) { 1368 for (CHRScope *Scope : Input) { 1369 assert(Scope->HoistStopMap.empty() && Scope->CHRRegions.empty() && 1370 "Empty"); 1371 setCHRRegions(Scope, Scope); 1372 Output.push_back(Scope); 1373 CHR_DEBUG( 1374 dbgs() << "setCHRRegions HoistStopMap " << *Scope << "\n"; 1375 for (auto pair : Scope->HoistStopMap) { 1376 Region *R = pair.first; 1377 dbgs() << "Region " << R->getNameStr() << "\n"; 1378 for (Instruction *I : pair.second) { 1379 dbgs() << "HoistStop " << *I << "\n"; 1380 } 1381 } 1382 dbgs() << "CHRRegions" << "\n"; 1383 for (RegInfo &RI : Scope->CHRRegions) { 1384 dbgs() << RI.R->getNameStr() << "\n"; 1385 }); 1386 } 1387 } 1388 1389 void CHR::setCHRRegions(CHRScope *Scope, CHRScope *OutermostScope) { 1390 DenseSet<Instruction *> Unhoistables; 1391 // Put the biased selects in Unhoistables because they should stay where they 1392 // are and constant-folded after CHR (in case one biased select or a branch 1393 // can depend on another biased select.) 1394 for (RegInfo &RI : Scope->RegInfos) { 1395 for (SelectInst *SI : RI.Selects) { 1396 Unhoistables.insert(SI); 1397 } 1398 } 1399 Instruction *InsertPoint = OutermostScope->BranchInsertPoint; 1400 for (RegInfo &RI : Scope->RegInfos) { 1401 Region *R = RI.R; 1402 DenseSet<Instruction *> HoistStops; 1403 bool IsHoisted = false; 1404 if (RI.HasBranch) { 1405 assert((OutermostScope->TrueBiasedRegions.count(R) > 0 || 1406 OutermostScope->FalseBiasedRegions.count(R) > 0) && 1407 "Must be truthy or falsy"); 1408 auto *BI = cast<BranchInst>(R->getEntry()->getTerminator()); 1409 // Note checkHoistValue fills in HoistStops. 1410 DenseMap<Instruction *, bool> Visited; 1411 bool IsHoistable = checkHoistValue(BI->getCondition(), InsertPoint, DT, 1412 Unhoistables, &HoistStops, Visited); 1413 assert(IsHoistable && "Must be hoistable"); 1414 (void)(IsHoistable); // Unused in release build 1415 IsHoisted = true; 1416 } 1417 for (SelectInst *SI : RI.Selects) { 1418 assert((OutermostScope->TrueBiasedSelects.count(SI) > 0 || 1419 OutermostScope->FalseBiasedSelects.count(SI) > 0) && 1420 "Must be true or false biased"); 1421 // Note checkHoistValue fills in HoistStops. 1422 DenseMap<Instruction *, bool> Visited; 1423 bool IsHoistable = checkHoistValue(SI->getCondition(), InsertPoint, DT, 1424 Unhoistables, &HoistStops, Visited); 1425 assert(IsHoistable && "Must be hoistable"); 1426 (void)(IsHoistable); // Unused in release build 1427 IsHoisted = true; 1428 } 1429 if (IsHoisted) { 1430 OutermostScope->CHRRegions.push_back(RI); 1431 OutermostScope->HoistStopMap[R] = HoistStops; 1432 } 1433 } 1434 for (CHRScope *Sub : Scope->Subs) 1435 setCHRRegions(Sub, OutermostScope); 1436 } 1437 1438 bool CHRScopeSorter(CHRScope *Scope1, CHRScope *Scope2) { 1439 return Scope1->RegInfos[0].R->getDepth() < Scope2->RegInfos[0].R->getDepth(); 1440 } 1441 1442 void CHR::sortScopes(SmallVectorImpl<CHRScope *> &Input, 1443 SmallVectorImpl<CHRScope *> &Output) { 1444 Output.resize(Input.size()); 1445 llvm::copy(Input, Output.begin()); 1446 llvm::stable_sort(Output, CHRScopeSorter); 1447 } 1448 1449 // Return true if V is already hoisted or was hoisted (along with its operands) 1450 // to the insert point. 1451 static void hoistValue(Value *V, Instruction *HoistPoint, Region *R, 1452 HoistStopMapTy &HoistStopMap, 1453 DenseSet<Instruction *> &HoistedSet, 1454 DenseSet<PHINode *> &TrivialPHIs, 1455 DominatorTree &DT) { 1456 auto IT = HoistStopMap.find(R); 1457 assert(IT != HoistStopMap.end() && "Region must be in hoist stop map"); 1458 DenseSet<Instruction *> &HoistStops = IT->second; 1459 if (auto *I = dyn_cast<Instruction>(V)) { 1460 if (I == HoistPoint) 1461 return; 1462 if (HoistStops.count(I)) 1463 return; 1464 if (auto *PN = dyn_cast<PHINode>(I)) 1465 if (TrivialPHIs.count(PN)) 1466 // The trivial phi inserted by the previous CHR scope could replace a 1467 // non-phi in HoistStops. Note that since this phi is at the exit of a 1468 // previous CHR scope, which dominates this scope, it's safe to stop 1469 // hoisting there. 1470 return; 1471 if (HoistedSet.count(I)) 1472 // Already hoisted, return. 1473 return; 1474 assert(isHoistableInstructionType(I) && "Unhoistable instruction type"); 1475 assert(DT.getNode(I->getParent()) && "DT must contain I's block"); 1476 assert(DT.getNode(HoistPoint->getParent()) && 1477 "DT must contain HoistPoint block"); 1478 if (DT.dominates(I, HoistPoint)) 1479 // We are already above the hoist point. Stop here. This may be necessary 1480 // when multiple scopes would independently hoist the same 1481 // instruction. Since an outer (dominating) scope would hoist it to its 1482 // entry before an inner (dominated) scope would to its entry, the inner 1483 // scope may see the instruction already hoisted, in which case it 1484 // potentially wrong for the inner scope to hoist it and could cause bad 1485 // IR (non-dominating def), but safe to skip hoisting it instead because 1486 // it's already in a block that dominates the inner scope. 1487 return; 1488 for (Value *Op : I->operands()) { 1489 hoistValue(Op, HoistPoint, R, HoistStopMap, HoistedSet, TrivialPHIs, DT); 1490 } 1491 I->moveBefore(HoistPoint); 1492 HoistedSet.insert(I); 1493 CHR_DEBUG(dbgs() << "hoistValue " << *I << "\n"); 1494 } 1495 } 1496 1497 // Hoist the dependent condition values of the branches and the selects in the 1498 // scope to the insert point. 1499 static void hoistScopeConditions(CHRScope *Scope, Instruction *HoistPoint, 1500 DenseSet<PHINode *> &TrivialPHIs, 1501 DominatorTree &DT) { 1502 DenseSet<Instruction *> HoistedSet; 1503 for (const RegInfo &RI : Scope->CHRRegions) { 1504 Region *R = RI.R; 1505 bool IsTrueBiased = Scope->TrueBiasedRegions.count(R); 1506 bool IsFalseBiased = Scope->FalseBiasedRegions.count(R); 1507 if (RI.HasBranch && (IsTrueBiased || IsFalseBiased)) { 1508 auto *BI = cast<BranchInst>(R->getEntry()->getTerminator()); 1509 hoistValue(BI->getCondition(), HoistPoint, R, Scope->HoistStopMap, 1510 HoistedSet, TrivialPHIs, DT); 1511 } 1512 for (SelectInst *SI : RI.Selects) { 1513 bool IsTrueBiased = Scope->TrueBiasedSelects.count(SI); 1514 bool IsFalseBiased = Scope->FalseBiasedSelects.count(SI); 1515 if (!(IsTrueBiased || IsFalseBiased)) 1516 continue; 1517 hoistValue(SI->getCondition(), HoistPoint, R, Scope->HoistStopMap, 1518 HoistedSet, TrivialPHIs, DT); 1519 } 1520 } 1521 } 1522 1523 // Negate the predicate if an ICmp if it's used only by branches or selects by 1524 // swapping the operands of the branches or the selects. Returns true if success. 1525 static bool negateICmpIfUsedByBranchOrSelectOnly(ICmpInst *ICmp, 1526 Instruction *ExcludedUser, 1527 CHRScope *Scope) { 1528 for (User *U : ICmp->users()) { 1529 if (U == ExcludedUser) 1530 continue; 1531 if (isa<BranchInst>(U) && cast<BranchInst>(U)->isConditional()) 1532 continue; 1533 if (isa<SelectInst>(U) && cast<SelectInst>(U)->getCondition() == ICmp) 1534 continue; 1535 return false; 1536 } 1537 for (User *U : ICmp->users()) { 1538 if (U == ExcludedUser) 1539 continue; 1540 if (auto *BI = dyn_cast<BranchInst>(U)) { 1541 assert(BI->isConditional() && "Must be conditional"); 1542 BI->swapSuccessors(); 1543 // Don't need to swap this in terms of 1544 // TrueBiasedRegions/FalseBiasedRegions because true-based/false-based 1545 // mean whehter the branch is likely go into the if-then rather than 1546 // successor0/successor1 and because we can tell which edge is the then or 1547 // the else one by comparing the destination to the region exit block. 1548 continue; 1549 } 1550 if (auto *SI = dyn_cast<SelectInst>(U)) { 1551 // Swap operands 1552 SI->swapValues(); 1553 SI->swapProfMetadata(); 1554 if (Scope->TrueBiasedSelects.count(SI)) { 1555 assert(Scope->FalseBiasedSelects.count(SI) == 0 && 1556 "Must not be already in"); 1557 Scope->FalseBiasedSelects.insert(SI); 1558 } else if (Scope->FalseBiasedSelects.count(SI)) { 1559 assert(Scope->TrueBiasedSelects.count(SI) == 0 && 1560 "Must not be already in"); 1561 Scope->TrueBiasedSelects.insert(SI); 1562 } 1563 continue; 1564 } 1565 llvm_unreachable("Must be a branch or a select"); 1566 } 1567 ICmp->setPredicate(CmpInst::getInversePredicate(ICmp->getPredicate())); 1568 return true; 1569 } 1570 1571 // A helper for transformScopes. Insert a trivial phi at the scope exit block 1572 // for a value that's defined in the scope but used outside it (meaning it's 1573 // alive at the exit block). 1574 static void insertTrivialPHIs(CHRScope *Scope, 1575 BasicBlock *EntryBlock, BasicBlock *ExitBlock, 1576 DenseSet<PHINode *> &TrivialPHIs) { 1577 DenseSet<BasicBlock *> BlocksInScopeSet; 1578 SmallVector<BasicBlock *, 8> BlocksInScopeVec; 1579 for (RegInfo &RI : Scope->RegInfos) { 1580 for (BasicBlock *BB : RI.R->blocks()) { // This includes the blocks in the 1581 // sub-Scopes. 1582 BlocksInScopeSet.insert(BB); 1583 BlocksInScopeVec.push_back(BB); 1584 } 1585 } 1586 CHR_DEBUG( 1587 dbgs() << "Inserting redudant phis\n"; 1588 for (BasicBlock *BB : BlocksInScopeVec) { 1589 dbgs() << "BlockInScope " << BB->getName() << "\n"; 1590 }); 1591 for (BasicBlock *BB : BlocksInScopeVec) { 1592 for (Instruction &I : *BB) { 1593 SmallVector<Instruction *, 8> Users; 1594 for (User *U : I.users()) { 1595 if (auto *UI = dyn_cast<Instruction>(U)) { 1596 if (BlocksInScopeSet.count(UI->getParent()) == 0 && 1597 // Unless there's already a phi for I at the exit block. 1598 !(isa<PHINode>(UI) && UI->getParent() == ExitBlock)) { 1599 CHR_DEBUG(dbgs() << "V " << I << "\n"); 1600 CHR_DEBUG(dbgs() << "Used outside scope by user " << *UI << "\n"); 1601 Users.push_back(UI); 1602 } else if (UI->getParent() == EntryBlock && isa<PHINode>(UI)) { 1603 // There's a loop backedge from a block that's dominated by this 1604 // scope to the entry block. 1605 CHR_DEBUG(dbgs() << "V " << I << "\n"); 1606 CHR_DEBUG(dbgs() 1607 << "Used at entry block (for a back edge) by a phi user " 1608 << *UI << "\n"); 1609 Users.push_back(UI); 1610 } 1611 } 1612 } 1613 if (Users.size() > 0) { 1614 // Insert a trivial phi for I (phi [&I, P0], [&I, P1], ...) at 1615 // ExitBlock. Replace I with the new phi in UI unless UI is another 1616 // phi at ExitBlock. 1617 unsigned PredCount = std::distance(pred_begin(ExitBlock), 1618 pred_end(ExitBlock)); 1619 PHINode *PN = PHINode::Create(I.getType(), PredCount, "", 1620 &ExitBlock->front()); 1621 for (BasicBlock *Pred : predecessors(ExitBlock)) { 1622 PN->addIncoming(&I, Pred); 1623 } 1624 TrivialPHIs.insert(PN); 1625 CHR_DEBUG(dbgs() << "Insert phi " << *PN << "\n"); 1626 for (Instruction *UI : Users) { 1627 for (unsigned J = 0, NumOps = UI->getNumOperands(); J < NumOps; ++J) { 1628 if (UI->getOperand(J) == &I) { 1629 UI->setOperand(J, PN); 1630 } 1631 } 1632 CHR_DEBUG(dbgs() << "Updated user " << *UI << "\n"); 1633 } 1634 } 1635 } 1636 } 1637 } 1638 1639 // Assert that all the CHR regions of the scope have a biased branch or select. 1640 static void LLVM_ATTRIBUTE_UNUSED 1641 assertCHRRegionsHaveBiasedBranchOrSelect(CHRScope *Scope) { 1642 #ifndef NDEBUG 1643 auto HasBiasedBranchOrSelect = [](RegInfo &RI, CHRScope *Scope) { 1644 if (Scope->TrueBiasedRegions.count(RI.R) || 1645 Scope->FalseBiasedRegions.count(RI.R)) 1646 return true; 1647 for (SelectInst *SI : RI.Selects) 1648 if (Scope->TrueBiasedSelects.count(SI) || 1649 Scope->FalseBiasedSelects.count(SI)) 1650 return true; 1651 return false; 1652 }; 1653 for (RegInfo &RI : Scope->CHRRegions) { 1654 assert(HasBiasedBranchOrSelect(RI, Scope) && 1655 "Must have biased branch or select"); 1656 } 1657 #endif 1658 } 1659 1660 // Assert that all the condition values of the biased branches and selects have 1661 // been hoisted to the pre-entry block or outside of the scope. 1662 static void LLVM_ATTRIBUTE_UNUSED assertBranchOrSelectConditionHoisted( 1663 CHRScope *Scope, BasicBlock *PreEntryBlock) { 1664 CHR_DEBUG(dbgs() << "Biased regions condition values \n"); 1665 for (RegInfo &RI : Scope->CHRRegions) { 1666 Region *R = RI.R; 1667 bool IsTrueBiased = Scope->TrueBiasedRegions.count(R); 1668 bool IsFalseBiased = Scope->FalseBiasedRegions.count(R); 1669 if (RI.HasBranch && (IsTrueBiased || IsFalseBiased)) { 1670 auto *BI = cast<BranchInst>(R->getEntry()->getTerminator()); 1671 Value *V = BI->getCondition(); 1672 CHR_DEBUG(dbgs() << *V << "\n"); 1673 if (auto *I = dyn_cast<Instruction>(V)) { 1674 (void)(I); // Unused in release build. 1675 assert((I->getParent() == PreEntryBlock || 1676 !Scope->contains(I)) && 1677 "Must have been hoisted to PreEntryBlock or outside the scope"); 1678 } 1679 } 1680 for (SelectInst *SI : RI.Selects) { 1681 bool IsTrueBiased = Scope->TrueBiasedSelects.count(SI); 1682 bool IsFalseBiased = Scope->FalseBiasedSelects.count(SI); 1683 if (!(IsTrueBiased || IsFalseBiased)) 1684 continue; 1685 Value *V = SI->getCondition(); 1686 CHR_DEBUG(dbgs() << *V << "\n"); 1687 if (auto *I = dyn_cast<Instruction>(V)) { 1688 (void)(I); // Unused in release build. 1689 assert((I->getParent() == PreEntryBlock || 1690 !Scope->contains(I)) && 1691 "Must have been hoisted to PreEntryBlock or outside the scope"); 1692 } 1693 } 1694 } 1695 } 1696 1697 void CHR::transformScopes(CHRScope *Scope, DenseSet<PHINode *> &TrivialPHIs) { 1698 CHR_DEBUG(dbgs() << "transformScopes " << *Scope << "\n"); 1699 1700 assert(Scope->RegInfos.size() >= 1 && "Should have at least one Region"); 1701 Region *FirstRegion = Scope->RegInfos[0].R; 1702 BasicBlock *EntryBlock = FirstRegion->getEntry(); 1703 Region *LastRegion = Scope->RegInfos[Scope->RegInfos.size() - 1].R; 1704 BasicBlock *ExitBlock = LastRegion->getExit(); 1705 Optional<uint64_t> ProfileCount = BFI.getBlockProfileCount(EntryBlock); 1706 1707 if (ExitBlock) { 1708 // Insert a trivial phi at the exit block (where the CHR hot path and the 1709 // cold path merges) for a value that's defined in the scope but used 1710 // outside it (meaning it's alive at the exit block). We will add the 1711 // incoming values for the CHR cold paths to it below. Without this, we'd 1712 // miss updating phi's for such values unless there happens to already be a 1713 // phi for that value there. 1714 insertTrivialPHIs(Scope, EntryBlock, ExitBlock, TrivialPHIs); 1715 } 1716 1717 // Split the entry block of the first region. The new block becomes the new 1718 // entry block of the first region. The old entry block becomes the block to 1719 // insert the CHR branch into. Note DT gets updated. Since DT gets updated 1720 // through the split, we update the entry of the first region after the split, 1721 // and Region only points to the entry and the exit blocks, rather than 1722 // keeping everything in a list or set, the blocks membership and the 1723 // entry/exit blocks of the region are still valid after the split. 1724 CHR_DEBUG(dbgs() << "Splitting entry block " << EntryBlock->getName() 1725 << " at " << *Scope->BranchInsertPoint << "\n"); 1726 BasicBlock *NewEntryBlock = 1727 SplitBlock(EntryBlock, Scope->BranchInsertPoint, &DT); 1728 assert(NewEntryBlock->getSinglePredecessor() == EntryBlock && 1729 "NewEntryBlock's only pred must be EntryBlock"); 1730 FirstRegion->replaceEntryRecursive(NewEntryBlock); 1731 BasicBlock *PreEntryBlock = EntryBlock; 1732 1733 ValueToValueMapTy VMap; 1734 // Clone the blocks in the scope (excluding the PreEntryBlock) to split into a 1735 // hot path (originals) and a cold path (clones) and update the PHIs at the 1736 // exit block. 1737 cloneScopeBlocks(Scope, PreEntryBlock, ExitBlock, LastRegion, VMap); 1738 1739 // Replace the old (placeholder) branch with the new (merged) conditional 1740 // branch. 1741 BranchInst *MergedBr = createMergedBranch(PreEntryBlock, EntryBlock, 1742 NewEntryBlock, VMap); 1743 1744 #ifndef NDEBUG 1745 assertCHRRegionsHaveBiasedBranchOrSelect(Scope); 1746 #endif 1747 1748 // Hoist the conditional values of the branches/selects. 1749 hoistScopeConditions(Scope, PreEntryBlock->getTerminator(), TrivialPHIs, DT); 1750 1751 #ifndef NDEBUG 1752 assertBranchOrSelectConditionHoisted(Scope, PreEntryBlock); 1753 #endif 1754 1755 // Create the combined branch condition and constant-fold the branches/selects 1756 // in the hot path. 1757 fixupBranchesAndSelects(Scope, PreEntryBlock, MergedBr, 1758 ProfileCount ? ProfileCount.getValue() : 0); 1759 } 1760 1761 // A helper for transformScopes. Clone the blocks in the scope (excluding the 1762 // PreEntryBlock) to split into a hot path and a cold path and update the PHIs 1763 // at the exit block. 1764 void CHR::cloneScopeBlocks(CHRScope *Scope, 1765 BasicBlock *PreEntryBlock, 1766 BasicBlock *ExitBlock, 1767 Region *LastRegion, 1768 ValueToValueMapTy &VMap) { 1769 // Clone all the blocks. The original blocks will be the hot-path 1770 // CHR-optimized code and the cloned blocks will be the original unoptimized 1771 // code. This is so that the block pointers from the 1772 // CHRScope/Region/RegionInfo can stay valid in pointing to the hot-path code 1773 // which CHR should apply to. 1774 SmallVector<BasicBlock*, 8> NewBlocks; 1775 for (RegInfo &RI : Scope->RegInfos) 1776 for (BasicBlock *BB : RI.R->blocks()) { // This includes the blocks in the 1777 // sub-Scopes. 1778 assert(BB != PreEntryBlock && "Don't copy the preetntry block"); 1779 BasicBlock *NewBB = CloneBasicBlock(BB, VMap, ".nonchr", &F); 1780 NewBlocks.push_back(NewBB); 1781 VMap[BB] = NewBB; 1782 } 1783 1784 // Place the cloned blocks right after the original blocks (right before the 1785 // exit block of.) 1786 if (ExitBlock) 1787 F.getBasicBlockList().splice(ExitBlock->getIterator(), 1788 F.getBasicBlockList(), 1789 NewBlocks[0]->getIterator(), F.end()); 1790 1791 // Update the cloned blocks/instructions to refer to themselves. 1792 for (unsigned i = 0, e = NewBlocks.size(); i != e; ++i) 1793 for (Instruction &I : *NewBlocks[i]) 1794 RemapInstruction(&I, VMap, 1795 RF_NoModuleLevelChanges | RF_IgnoreMissingLocals); 1796 1797 // Add the cloned blocks to the PHIs of the exit blocks. ExitBlock is null for 1798 // the top-level region but we don't need to add PHIs. The trivial PHIs 1799 // inserted above will be updated here. 1800 if (ExitBlock) 1801 for (PHINode &PN : ExitBlock->phis()) 1802 for (unsigned I = 0, NumOps = PN.getNumIncomingValues(); I < NumOps; 1803 ++I) { 1804 BasicBlock *Pred = PN.getIncomingBlock(I); 1805 if (LastRegion->contains(Pred)) { 1806 Value *V = PN.getIncomingValue(I); 1807 auto It = VMap.find(V); 1808 if (It != VMap.end()) V = It->second; 1809 assert(VMap.find(Pred) != VMap.end() && "Pred must have been cloned"); 1810 PN.addIncoming(V, cast<BasicBlock>(VMap[Pred])); 1811 } 1812 } 1813 } 1814 1815 // A helper for transformScope. Replace the old (placeholder) branch with the 1816 // new (merged) conditional branch. 1817 BranchInst *CHR::createMergedBranch(BasicBlock *PreEntryBlock, 1818 BasicBlock *EntryBlock, 1819 BasicBlock *NewEntryBlock, 1820 ValueToValueMapTy &VMap) { 1821 BranchInst *OldBR = cast<BranchInst>(PreEntryBlock->getTerminator()); 1822 assert(OldBR->isUnconditional() && OldBR->getSuccessor(0) == NewEntryBlock && 1823 "SplitBlock did not work correctly!"); 1824 assert(NewEntryBlock->getSinglePredecessor() == EntryBlock && 1825 "NewEntryBlock's only pred must be EntryBlock"); 1826 assert(VMap.find(NewEntryBlock) != VMap.end() && 1827 "NewEntryBlock must have been copied"); 1828 OldBR->dropAllReferences(); 1829 OldBR->eraseFromParent(); 1830 // The true predicate is a placeholder. It will be replaced later in 1831 // fixupBranchesAndSelects(). 1832 BranchInst *NewBR = BranchInst::Create(NewEntryBlock, 1833 cast<BasicBlock>(VMap[NewEntryBlock]), 1834 ConstantInt::getTrue(F.getContext())); 1835 PreEntryBlock->getInstList().push_back(NewBR); 1836 assert(NewEntryBlock->getSinglePredecessor() == EntryBlock && 1837 "NewEntryBlock's only pred must be EntryBlock"); 1838 return NewBR; 1839 } 1840 1841 // A helper for transformScopes. Create the combined branch condition and 1842 // constant-fold the branches/selects in the hot path. 1843 void CHR::fixupBranchesAndSelects(CHRScope *Scope, 1844 BasicBlock *PreEntryBlock, 1845 BranchInst *MergedBR, 1846 uint64_t ProfileCount) { 1847 Value *MergedCondition = ConstantInt::getTrue(F.getContext()); 1848 BranchProbability CHRBranchBias(1, 1); 1849 uint64_t NumCHRedBranches = 0; 1850 IRBuilder<> IRB(PreEntryBlock->getTerminator()); 1851 for (RegInfo &RI : Scope->CHRRegions) { 1852 Region *R = RI.R; 1853 if (RI.HasBranch) { 1854 fixupBranch(R, Scope, IRB, MergedCondition, CHRBranchBias); 1855 ++NumCHRedBranches; 1856 } 1857 for (SelectInst *SI : RI.Selects) { 1858 fixupSelect(SI, Scope, IRB, MergedCondition, CHRBranchBias); 1859 ++NumCHRedBranches; 1860 } 1861 } 1862 Stats.NumBranchesDelta += NumCHRedBranches - 1; 1863 Stats.WeightedNumBranchesDelta += (NumCHRedBranches - 1) * ProfileCount; 1864 ORE.emit([&]() { 1865 return OptimizationRemark(DEBUG_TYPE, 1866 "CHR", 1867 // Refer to the hot (original) path 1868 MergedBR->getSuccessor(0)->getTerminator()) 1869 << "Merged " << ore::NV("NumCHRedBranches", NumCHRedBranches) 1870 << " branches or selects"; 1871 }); 1872 MergedBR->setCondition(MergedCondition); 1873 SmallVector<uint32_t, 2> Weights; 1874 Weights.push_back(static_cast<uint32_t>(CHRBranchBias.scale(1000))); 1875 Weights.push_back(static_cast<uint32_t>(CHRBranchBias.getCompl().scale(1000))); 1876 MDBuilder MDB(F.getContext()); 1877 MergedBR->setMetadata(LLVMContext::MD_prof, MDB.createBranchWeights(Weights)); 1878 CHR_DEBUG(dbgs() << "CHR branch bias " << Weights[0] << ":" << Weights[1] 1879 << "\n"); 1880 } 1881 1882 // A helper for fixupBranchesAndSelects. Add to the combined branch condition 1883 // and constant-fold a branch in the hot path. 1884 void CHR::fixupBranch(Region *R, CHRScope *Scope, 1885 IRBuilder<> &IRB, 1886 Value *&MergedCondition, 1887 BranchProbability &CHRBranchBias) { 1888 bool IsTrueBiased = Scope->TrueBiasedRegions.count(R); 1889 assert((IsTrueBiased || Scope->FalseBiasedRegions.count(R)) && 1890 "Must be truthy or falsy"); 1891 auto *BI = cast<BranchInst>(R->getEntry()->getTerminator()); 1892 assert(BranchBiasMap.find(R) != BranchBiasMap.end() && 1893 "Must be in the bias map"); 1894 BranchProbability Bias = BranchBiasMap[R]; 1895 assert(Bias >= getCHRBiasThreshold() && "Must be highly biased"); 1896 // Take the min. 1897 if (CHRBranchBias > Bias) 1898 CHRBranchBias = Bias; 1899 BasicBlock *IfThen = BI->getSuccessor(1); 1900 BasicBlock *IfElse = BI->getSuccessor(0); 1901 BasicBlock *RegionExitBlock = R->getExit(); 1902 assert(RegionExitBlock && "Null ExitBlock"); 1903 assert((IfThen == RegionExitBlock || IfElse == RegionExitBlock) && 1904 IfThen != IfElse && "Invariant from findScopes"); 1905 if (IfThen == RegionExitBlock) { 1906 // Swap them so that IfThen means going into it and IfElse means skipping 1907 // it. 1908 std::swap(IfThen, IfElse); 1909 } 1910 CHR_DEBUG(dbgs() << "IfThen " << IfThen->getName() 1911 << " IfElse " << IfElse->getName() << "\n"); 1912 Value *Cond = BI->getCondition(); 1913 BasicBlock *HotTarget = IsTrueBiased ? IfThen : IfElse; 1914 bool ConditionTrue = HotTarget == BI->getSuccessor(0); 1915 addToMergedCondition(ConditionTrue, Cond, BI, Scope, IRB, 1916 MergedCondition); 1917 // Constant-fold the branch at ClonedEntryBlock. 1918 assert(ConditionTrue == (HotTarget == BI->getSuccessor(0)) && 1919 "The successor shouldn't change"); 1920 Value *NewCondition = ConditionTrue ? 1921 ConstantInt::getTrue(F.getContext()) : 1922 ConstantInt::getFalse(F.getContext()); 1923 BI->setCondition(NewCondition); 1924 } 1925 1926 // A helper for fixupBranchesAndSelects. Add to the combined branch condition 1927 // and constant-fold a select in the hot path. 1928 void CHR::fixupSelect(SelectInst *SI, CHRScope *Scope, 1929 IRBuilder<> &IRB, 1930 Value *&MergedCondition, 1931 BranchProbability &CHRBranchBias) { 1932 bool IsTrueBiased = Scope->TrueBiasedSelects.count(SI); 1933 assert((IsTrueBiased || 1934 Scope->FalseBiasedSelects.count(SI)) && "Must be biased"); 1935 assert(SelectBiasMap.find(SI) != SelectBiasMap.end() && 1936 "Must be in the bias map"); 1937 BranchProbability Bias = SelectBiasMap[SI]; 1938 assert(Bias >= getCHRBiasThreshold() && "Must be highly biased"); 1939 // Take the min. 1940 if (CHRBranchBias > Bias) 1941 CHRBranchBias = Bias; 1942 Value *Cond = SI->getCondition(); 1943 addToMergedCondition(IsTrueBiased, Cond, SI, Scope, IRB, 1944 MergedCondition); 1945 Value *NewCondition = IsTrueBiased ? 1946 ConstantInt::getTrue(F.getContext()) : 1947 ConstantInt::getFalse(F.getContext()); 1948 SI->setCondition(NewCondition); 1949 } 1950 1951 // A helper for fixupBranch/fixupSelect. Add a branch condition to the merged 1952 // condition. 1953 void CHR::addToMergedCondition(bool IsTrueBiased, Value *Cond, 1954 Instruction *BranchOrSelect, 1955 CHRScope *Scope, 1956 IRBuilder<> &IRB, 1957 Value *&MergedCondition) { 1958 if (IsTrueBiased) { 1959 MergedCondition = IRB.CreateAnd(MergedCondition, Cond); 1960 } else { 1961 // If Cond is an icmp and all users of V except for BranchOrSelect is a 1962 // branch, negate the icmp predicate and swap the branch targets and avoid 1963 // inserting an Xor to negate Cond. 1964 bool Done = false; 1965 if (auto *ICmp = dyn_cast<ICmpInst>(Cond)) 1966 if (negateICmpIfUsedByBranchOrSelectOnly(ICmp, BranchOrSelect, Scope)) { 1967 MergedCondition = IRB.CreateAnd(MergedCondition, Cond); 1968 Done = true; 1969 } 1970 if (!Done) { 1971 Value *Negate = IRB.CreateXor( 1972 ConstantInt::getTrue(F.getContext()), Cond); 1973 MergedCondition = IRB.CreateAnd(MergedCondition, Negate); 1974 } 1975 } 1976 } 1977 1978 void CHR::transformScopes(SmallVectorImpl<CHRScope *> &CHRScopes) { 1979 unsigned I = 0; 1980 DenseSet<PHINode *> TrivialPHIs; 1981 for (CHRScope *Scope : CHRScopes) { 1982 transformScopes(Scope, TrivialPHIs); 1983 CHR_DEBUG( 1984 std::ostringstream oss; 1985 oss << " after transformScopes " << I++; 1986 dumpIR(F, oss.str().c_str(), nullptr)); 1987 (void)I; 1988 } 1989 } 1990 1991 static void LLVM_ATTRIBUTE_UNUSED 1992 dumpScopes(SmallVectorImpl<CHRScope *> &Scopes, const char *Label) { 1993 dbgs() << Label << " " << Scopes.size() << "\n"; 1994 for (CHRScope *Scope : Scopes) { 1995 dbgs() << *Scope << "\n"; 1996 } 1997 } 1998 1999 bool CHR::run() { 2000 if (!shouldApply(F, PSI)) 2001 return false; 2002 2003 CHR_DEBUG(dumpIR(F, "before", nullptr)); 2004 2005 bool Changed = false; 2006 { 2007 CHR_DEBUG( 2008 dbgs() << "RegionInfo:\n"; 2009 RI.print(dbgs())); 2010 2011 // Recursively traverse the region tree and find regions that have biased 2012 // branches and/or selects and create scopes. 2013 SmallVector<CHRScope *, 8> AllScopes; 2014 findScopes(AllScopes); 2015 CHR_DEBUG(dumpScopes(AllScopes, "All scopes")); 2016 2017 // Split the scopes if 1) the conditiona values of the biased 2018 // branches/selects of the inner/lower scope can't be hoisted up to the 2019 // outermost/uppermost scope entry, or 2) the condition values of the biased 2020 // branches/selects in a scope (including subscopes) don't share at least 2021 // one common value. 2022 SmallVector<CHRScope *, 8> SplitScopes; 2023 splitScopes(AllScopes, SplitScopes); 2024 CHR_DEBUG(dumpScopes(SplitScopes, "Split scopes")); 2025 2026 // After splitting, set the biased regions and selects of a scope (a tree 2027 // root) that include those of the subscopes. 2028 classifyBiasedScopes(SplitScopes); 2029 CHR_DEBUG(dbgs() << "Set per-scope bias " << SplitScopes.size() << "\n"); 2030 2031 // Filter out the scopes that has only one biased region or select (CHR 2032 // isn't useful in such a case). 2033 SmallVector<CHRScope *, 8> FilteredScopes; 2034 filterScopes(SplitScopes, FilteredScopes); 2035 CHR_DEBUG(dumpScopes(FilteredScopes, "Filtered scopes")); 2036 2037 // Set the regions to be CHR'ed and their hoist stops for each scope. 2038 SmallVector<CHRScope *, 8> SetScopes; 2039 setCHRRegions(FilteredScopes, SetScopes); 2040 CHR_DEBUG(dumpScopes(SetScopes, "Set CHR regions")); 2041 2042 // Sort CHRScopes by the depth so that outer CHRScopes comes before inner 2043 // ones. We need to apply CHR from outer to inner so that we apply CHR only 2044 // to the hot path, rather than both hot and cold paths. 2045 SmallVector<CHRScope *, 8> SortedScopes; 2046 sortScopes(SetScopes, SortedScopes); 2047 CHR_DEBUG(dumpScopes(SortedScopes, "Sorted scopes")); 2048 2049 CHR_DEBUG( 2050 dbgs() << "RegionInfo:\n"; 2051 RI.print(dbgs())); 2052 2053 // Apply the CHR transformation. 2054 if (!SortedScopes.empty()) { 2055 transformScopes(SortedScopes); 2056 Changed = true; 2057 } 2058 } 2059 2060 if (Changed) { 2061 CHR_DEBUG(dumpIR(F, "after", &Stats)); 2062 ORE.emit([&]() { 2063 return OptimizationRemark(DEBUG_TYPE, "Stats", &F) 2064 << ore::NV("Function", &F) << " " 2065 << "Reduced the number of branches in hot paths by " 2066 << ore::NV("NumBranchesDelta", Stats.NumBranchesDelta) 2067 << " (static) and " 2068 << ore::NV("WeightedNumBranchesDelta", Stats.WeightedNumBranchesDelta) 2069 << " (weighted by PGO count)"; 2070 }); 2071 } 2072 2073 return Changed; 2074 } 2075 2076 bool ControlHeightReductionLegacyPass::runOnFunction(Function &F) { 2077 BlockFrequencyInfo &BFI = 2078 getAnalysis<BlockFrequencyInfoWrapperPass>().getBFI(); 2079 DominatorTree &DT = getAnalysis<DominatorTreeWrapperPass>().getDomTree(); 2080 ProfileSummaryInfo &PSI = 2081 getAnalysis<ProfileSummaryInfoWrapperPass>().getPSI(); 2082 RegionInfo &RI = getAnalysis<RegionInfoPass>().getRegionInfo(); 2083 std::unique_ptr<OptimizationRemarkEmitter> OwnedORE = 2084 std::make_unique<OptimizationRemarkEmitter>(&F); 2085 return CHR(F, BFI, DT, PSI, RI, *OwnedORE.get()).run(); 2086 } 2087 2088 namespace llvm { 2089 2090 ControlHeightReductionPass::ControlHeightReductionPass() { 2091 parseCHRFilterFiles(); 2092 } 2093 2094 PreservedAnalyses ControlHeightReductionPass::run( 2095 Function &F, 2096 FunctionAnalysisManager &FAM) { 2097 auto &BFI = FAM.getResult<BlockFrequencyAnalysis>(F); 2098 auto &DT = FAM.getResult<DominatorTreeAnalysis>(F); 2099 auto &MAMProxy = FAM.getResult<ModuleAnalysisManagerFunctionProxy>(F); 2100 auto &MAM = MAMProxy.getManager(); 2101 auto &PSI = *MAM.getCachedResult<ProfileSummaryAnalysis>(*F.getParent()); 2102 auto &RI = FAM.getResult<RegionInfoAnalysis>(F); 2103 auto &ORE = FAM.getResult<OptimizationRemarkEmitterAnalysis>(F); 2104 bool Changed = CHR(F, BFI, DT, PSI, RI, ORE).run(); 2105 if (!Changed) 2106 return PreservedAnalyses::all(); 2107 auto PA = PreservedAnalyses(); 2108 PA.preserve<GlobalsAA>(); 2109 return PA; 2110 } 2111 2112 } // namespace llvm 2113