1 //===-- ControlHeightReduction.cpp - Control Height Reduction -------------===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 // 9 // This pass merges conditional blocks of code and reduces the number of 10 // conditional branches in the hot paths based on profiles. 11 // 12 //===----------------------------------------------------------------------===// 13 14 #include "llvm/Transforms/Instrumentation/ControlHeightReduction.h" 15 #include "llvm/ADT/DenseMap.h" 16 #include "llvm/ADT/DenseSet.h" 17 #include "llvm/ADT/SmallVector.h" 18 #include "llvm/ADT/StringSet.h" 19 #include "llvm/Analysis/BlockFrequencyInfo.h" 20 #include "llvm/Analysis/GlobalsModRef.h" 21 #include "llvm/Analysis/OptimizationRemarkEmitter.h" 22 #include "llvm/Analysis/ProfileSummaryInfo.h" 23 #include "llvm/Analysis/RegionInfo.h" 24 #include "llvm/Analysis/RegionIterator.h" 25 #include "llvm/Analysis/ValueTracking.h" 26 #include "llvm/IR/CFG.h" 27 #include "llvm/IR/Dominators.h" 28 #include "llvm/IR/IRBuilder.h" 29 #include "llvm/IR/MDBuilder.h" 30 #include "llvm/Support/BranchProbability.h" 31 #include "llvm/Support/MemoryBuffer.h" 32 #include "llvm/Transforms/Utils.h" 33 #include "llvm/Transforms/Utils/BasicBlockUtils.h" 34 #include "llvm/Transforms/Utils/Cloning.h" 35 #include "llvm/Transforms/Utils/ValueMapper.h" 36 37 #include <set> 38 #include <sstream> 39 40 using namespace llvm; 41 42 #define DEBUG_TYPE "chr" 43 44 #define CHR_DEBUG(X) LLVM_DEBUG(X) 45 46 static cl::opt<bool> ForceCHR("force-chr", cl::init(false), cl::Hidden, 47 cl::desc("Apply CHR for all functions")); 48 49 static cl::opt<double> CHRBiasThreshold( 50 "chr-bias-threshold", cl::init(0.99), cl::Hidden, 51 cl::desc("CHR considers a branch bias greater than this ratio as biased")); 52 53 static cl::opt<unsigned> CHRMergeThreshold( 54 "chr-merge-threshold", cl::init(2), cl::Hidden, 55 cl::desc("CHR merges a group of N branches/selects where N >= this value")); 56 57 static cl::opt<std::string> CHRModuleList( 58 "chr-module-list", cl::init(""), cl::Hidden, 59 cl::desc("Specify file to retrieve the list of modules to apply CHR to")); 60 61 static cl::opt<std::string> CHRFunctionList( 62 "chr-function-list", cl::init(""), cl::Hidden, 63 cl::desc("Specify file to retrieve the list of functions to apply CHR to")); 64 65 static StringSet<> CHRModules; 66 static StringSet<> CHRFunctions; 67 68 static void parseCHRFilterFiles() { 69 if (!CHRModuleList.empty()) { 70 auto FileOrErr = MemoryBuffer::getFile(CHRModuleList); 71 if (!FileOrErr) { 72 errs() << "Error: Couldn't read the chr-module-list file " << CHRModuleList << "\n"; 73 std::exit(1); 74 } 75 StringRef Buf = FileOrErr->get()->getBuffer(); 76 SmallVector<StringRef, 0> Lines; 77 Buf.split(Lines, '\n'); 78 for (StringRef Line : Lines) { 79 Line = Line.trim(); 80 if (!Line.empty()) 81 CHRModules.insert(Line); 82 } 83 } 84 if (!CHRFunctionList.empty()) { 85 auto FileOrErr = MemoryBuffer::getFile(CHRFunctionList); 86 if (!FileOrErr) { 87 errs() << "Error: Couldn't read the chr-function-list file " << CHRFunctionList << "\n"; 88 std::exit(1); 89 } 90 StringRef Buf = FileOrErr->get()->getBuffer(); 91 SmallVector<StringRef, 0> Lines; 92 Buf.split(Lines, '\n'); 93 for (StringRef Line : Lines) { 94 Line = Line.trim(); 95 if (!Line.empty()) 96 CHRFunctions.insert(Line); 97 } 98 } 99 } 100 101 namespace { 102 class ControlHeightReductionLegacyPass : public FunctionPass { 103 public: 104 static char ID; 105 106 ControlHeightReductionLegacyPass() : FunctionPass(ID) { 107 initializeControlHeightReductionLegacyPassPass( 108 *PassRegistry::getPassRegistry()); 109 parseCHRFilterFiles(); 110 } 111 112 bool runOnFunction(Function &F) override; 113 void getAnalysisUsage(AnalysisUsage &AU) const override { 114 AU.addRequired<BlockFrequencyInfoWrapperPass>(); 115 AU.addRequired<DominatorTreeWrapperPass>(); 116 AU.addRequired<ProfileSummaryInfoWrapperPass>(); 117 AU.addRequired<RegionInfoPass>(); 118 AU.addPreserved<GlobalsAAWrapperPass>(); 119 } 120 }; 121 } // end anonymous namespace 122 123 char ControlHeightReductionLegacyPass::ID = 0; 124 125 INITIALIZE_PASS_BEGIN(ControlHeightReductionLegacyPass, 126 "chr", 127 "Reduce control height in the hot paths", 128 false, false) 129 INITIALIZE_PASS_DEPENDENCY(BlockFrequencyInfoWrapperPass) 130 INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass) 131 INITIALIZE_PASS_DEPENDENCY(ProfileSummaryInfoWrapperPass) 132 INITIALIZE_PASS_DEPENDENCY(RegionInfoPass) 133 INITIALIZE_PASS_END(ControlHeightReductionLegacyPass, 134 "chr", 135 "Reduce control height in the hot paths", 136 false, false) 137 138 FunctionPass *llvm::createControlHeightReductionLegacyPass() { 139 return new ControlHeightReductionLegacyPass(); 140 } 141 142 namespace { 143 144 struct CHRStats { 145 CHRStats() : NumBranches(0), NumBranchesDelta(0), 146 WeightedNumBranchesDelta(0) {} 147 void print(raw_ostream &OS) const { 148 OS << "CHRStats: NumBranches " << NumBranches 149 << " NumBranchesDelta " << NumBranchesDelta 150 << " WeightedNumBranchesDelta " << WeightedNumBranchesDelta; 151 } 152 uint64_t NumBranches; // The original number of conditional branches / 153 // selects 154 uint64_t NumBranchesDelta; // The decrease of the number of conditional 155 // branches / selects in the hot paths due to CHR. 156 uint64_t WeightedNumBranchesDelta; // NumBranchesDelta weighted by the profile 157 // count at the scope entry. 158 }; 159 160 // RegInfo - some properties of a Region. 161 struct RegInfo { 162 RegInfo() : R(nullptr), HasBranch(false) {} 163 RegInfo(Region *RegionIn) : R(RegionIn), HasBranch(false) {} 164 Region *R; 165 bool HasBranch; 166 SmallVector<SelectInst *, 8> Selects; 167 }; 168 169 typedef DenseMap<Region *, DenseSet<Instruction *>> HoistStopMapTy; 170 171 // CHRScope - a sequence of regions to CHR together. It corresponds to a 172 // sequence of conditional blocks. It can have subscopes which correspond to 173 // nested conditional blocks. Nested CHRScopes form a tree. 174 class CHRScope { 175 public: 176 CHRScope(RegInfo RI) : BranchInsertPoint(nullptr) { 177 assert(RI.R && "Null RegionIn"); 178 RegInfos.push_back(RI); 179 } 180 181 Region *getParentRegion() { 182 assert(RegInfos.size() > 0 && "Empty CHRScope"); 183 Region *Parent = RegInfos[0].R->getParent(); 184 assert(Parent && "Unexpected to call this on the top-level region"); 185 return Parent; 186 } 187 188 BasicBlock *getEntryBlock() { 189 assert(RegInfos.size() > 0 && "Empty CHRScope"); 190 return RegInfos.front().R->getEntry(); 191 } 192 193 BasicBlock *getExitBlock() { 194 assert(RegInfos.size() > 0 && "Empty CHRScope"); 195 return RegInfos.back().R->getExit(); 196 } 197 198 bool appendable(CHRScope *Next) { 199 // The next scope is appendable only if this scope is directly connected to 200 // it (which implies it post-dominates this scope) and this scope dominates 201 // it (no edge to the next scope outside this scope). 202 BasicBlock *NextEntry = Next->getEntryBlock(); 203 if (getExitBlock() != NextEntry) 204 // Not directly connected. 205 return false; 206 Region *LastRegion = RegInfos.back().R; 207 for (BasicBlock *Pred : predecessors(NextEntry)) 208 if (!LastRegion->contains(Pred)) 209 // There's an edge going into the entry of the next scope from outside 210 // of this scope. 211 return false; 212 return true; 213 } 214 215 void append(CHRScope *Next) { 216 assert(RegInfos.size() > 0 && "Empty CHRScope"); 217 assert(Next->RegInfos.size() > 0 && "Empty CHRScope"); 218 assert(getParentRegion() == Next->getParentRegion() && 219 "Must be siblings"); 220 assert(getExitBlock() == Next->getEntryBlock() && 221 "Must be adjacent"); 222 for (RegInfo &RI : Next->RegInfos) 223 RegInfos.push_back(RI); 224 for (CHRScope *Sub : Next->Subs) 225 Subs.push_back(Sub); 226 } 227 228 void addSub(CHRScope *SubIn) { 229 #ifndef NDEBUG 230 bool IsChild = false; 231 for (RegInfo &RI : RegInfos) 232 if (RI.R == SubIn->getParentRegion()) { 233 IsChild = true; 234 break; 235 } 236 assert(IsChild && "Must be a child"); 237 #endif 238 Subs.push_back(SubIn); 239 } 240 241 // Split this scope at the boundary region into two, which will belong to the 242 // tail and returns the tail. 243 CHRScope *split(Region *Boundary) { 244 assert(Boundary && "Boundary null"); 245 assert(RegInfos.begin()->R != Boundary && 246 "Can't be split at beginning"); 247 auto BoundaryIt = std::find_if(RegInfos.begin(), RegInfos.end(), 248 [&Boundary](const RegInfo& RI) { 249 return Boundary == RI.R; 250 }); 251 if (BoundaryIt == RegInfos.end()) 252 return nullptr; 253 SmallVector<RegInfo, 8> TailRegInfos; 254 SmallVector<CHRScope *, 8> TailSubs; 255 TailRegInfos.insert(TailRegInfos.begin(), BoundaryIt, RegInfos.end()); 256 RegInfos.resize(BoundaryIt - RegInfos.begin()); 257 DenseSet<Region *> TailRegionSet; 258 for (RegInfo &RI : TailRegInfos) 259 TailRegionSet.insert(RI.R); 260 for (auto It = Subs.begin(); It != Subs.end(); ) { 261 CHRScope *Sub = *It; 262 assert(Sub && "null Sub"); 263 Region *Parent = Sub->getParentRegion(); 264 if (TailRegionSet.count(Parent)) { 265 TailSubs.push_back(Sub); 266 It = Subs.erase(It); 267 } else { 268 assert(std::find_if(RegInfos.begin(), RegInfos.end(), 269 [&Parent](const RegInfo& RI) { 270 return Parent == RI.R; 271 }) != RegInfos.end() && 272 "Must be in head"); 273 ++It; 274 } 275 } 276 assert(HoistStopMap.empty() && "MapHoistStops must be empty"); 277 return new CHRScope(TailRegInfos, TailSubs); 278 } 279 280 bool contains(Instruction *I) const { 281 BasicBlock *Parent = I->getParent(); 282 for (const RegInfo &RI : RegInfos) 283 if (RI.R->contains(Parent)) 284 return true; 285 return false; 286 } 287 288 void print(raw_ostream &OS) const; 289 290 SmallVector<RegInfo, 8> RegInfos; // Regions that belong to this scope 291 SmallVector<CHRScope *, 8> Subs; // Subscopes. 292 293 // The instruction at which to insert the CHR conditional branch (and hoist 294 // the dependent condition values). 295 Instruction *BranchInsertPoint; 296 297 // True-biased and false-biased regions (conditional blocks), 298 // respectively. Used only for the outermost scope and includes regions in 299 // subscopes. The rest are unbiased. 300 DenseSet<Region *> TrueBiasedRegions; 301 DenseSet<Region *> FalseBiasedRegions; 302 // Among the biased regions, the regions that get CHRed. 303 SmallVector<RegInfo, 8> CHRRegions; 304 305 // True-biased and false-biased selects, respectively. Used only for the 306 // outermost scope and includes ones in subscopes. 307 DenseSet<SelectInst *> TrueBiasedSelects; 308 DenseSet<SelectInst *> FalseBiasedSelects; 309 310 // Map from one of the above regions to the instructions to stop 311 // hoisting instructions at through use-def chains. 312 HoistStopMapTy HoistStopMap; 313 314 private: 315 CHRScope(SmallVector<RegInfo, 8> &RegInfosIn, 316 SmallVector<CHRScope *, 8> &SubsIn) 317 : RegInfos(RegInfosIn), Subs(SubsIn), BranchInsertPoint(nullptr) {} 318 }; 319 320 class CHR { 321 public: 322 CHR(Function &Fin, BlockFrequencyInfo &BFIin, DominatorTree &DTin, 323 ProfileSummaryInfo &PSIin, RegionInfo &RIin, 324 OptimizationRemarkEmitter &OREin) 325 : F(Fin), BFI(BFIin), DT(DTin), PSI(PSIin), RI(RIin), ORE(OREin) {} 326 327 ~CHR() { 328 for (CHRScope *Scope : Scopes) { 329 delete Scope; 330 } 331 } 332 333 bool run(); 334 335 private: 336 // See the comments in CHR::run() for the high level flow of the algorithm and 337 // what the following functions do. 338 339 void findScopes(SmallVectorImpl<CHRScope *> &Output) { 340 Region *R = RI.getTopLevelRegion(); 341 CHRScope *Scope = findScopes(R, nullptr, nullptr, Output); 342 if (Scope) { 343 Output.push_back(Scope); 344 } 345 } 346 CHRScope *findScopes(Region *R, Region *NextRegion, Region *ParentRegion, 347 SmallVectorImpl<CHRScope *> &Scopes); 348 CHRScope *findScope(Region *R); 349 void checkScopeHoistable(CHRScope *Scope); 350 351 void splitScopes(SmallVectorImpl<CHRScope *> &Input, 352 SmallVectorImpl<CHRScope *> &Output); 353 SmallVector<CHRScope *, 8> splitScope(CHRScope *Scope, 354 CHRScope *Outer, 355 DenseSet<Value *> *OuterConditionValues, 356 Instruction *OuterInsertPoint, 357 SmallVectorImpl<CHRScope *> &Output, 358 DenseSet<Instruction *> &Unhoistables); 359 360 void classifyBiasedScopes(SmallVectorImpl<CHRScope *> &Scopes); 361 void classifyBiasedScopes(CHRScope *Scope, CHRScope *OutermostScope); 362 363 void filterScopes(SmallVectorImpl<CHRScope *> &Input, 364 SmallVectorImpl<CHRScope *> &Output); 365 366 void setCHRRegions(SmallVectorImpl<CHRScope *> &Input, 367 SmallVectorImpl<CHRScope *> &Output); 368 void setCHRRegions(CHRScope *Scope, CHRScope *OutermostScope); 369 370 void sortScopes(SmallVectorImpl<CHRScope *> &Input, 371 SmallVectorImpl<CHRScope *> &Output); 372 373 void transformScopes(SmallVectorImpl<CHRScope *> &CHRScopes); 374 void transformScopes(CHRScope *Scope, DenseSet<PHINode *> &TrivialPHIs); 375 void cloneScopeBlocks(CHRScope *Scope, 376 BasicBlock *PreEntryBlock, 377 BasicBlock *ExitBlock, 378 Region *LastRegion, 379 ValueToValueMapTy &VMap); 380 BranchInst *createMergedBranch(BasicBlock *PreEntryBlock, 381 BasicBlock *EntryBlock, 382 BasicBlock *NewEntryBlock, 383 ValueToValueMapTy &VMap); 384 void fixupBranchesAndSelects(CHRScope *Scope, 385 BasicBlock *PreEntryBlock, 386 BranchInst *MergedBR, 387 uint64_t ProfileCount); 388 void fixupBranch(Region *R, 389 CHRScope *Scope, 390 IRBuilder<> &IRB, 391 Value *&MergedCondition, BranchProbability &CHRBranchBias); 392 void fixupSelect(SelectInst* SI, 393 CHRScope *Scope, 394 IRBuilder<> &IRB, 395 Value *&MergedCondition, BranchProbability &CHRBranchBias); 396 void addToMergedCondition(bool IsTrueBiased, Value *Cond, 397 Instruction *BranchOrSelect, 398 CHRScope *Scope, 399 IRBuilder<> &IRB, 400 Value *&MergedCondition); 401 402 Function &F; 403 BlockFrequencyInfo &BFI; 404 DominatorTree &DT; 405 ProfileSummaryInfo &PSI; 406 RegionInfo &RI; 407 OptimizationRemarkEmitter &ORE; 408 CHRStats Stats; 409 410 // All the true-biased regions in the function 411 DenseSet<Region *> TrueBiasedRegionsGlobal; 412 // All the false-biased regions in the function 413 DenseSet<Region *> FalseBiasedRegionsGlobal; 414 // All the true-biased selects in the function 415 DenseSet<SelectInst *> TrueBiasedSelectsGlobal; 416 // All the false-biased selects in the function 417 DenseSet<SelectInst *> FalseBiasedSelectsGlobal; 418 // A map from biased regions to their branch bias 419 DenseMap<Region *, BranchProbability> BranchBiasMap; 420 // A map from biased selects to their branch bias 421 DenseMap<SelectInst *, BranchProbability> SelectBiasMap; 422 // All the scopes. 423 DenseSet<CHRScope *> Scopes; 424 }; 425 426 } // end anonymous namespace 427 428 static inline 429 raw_ostream LLVM_ATTRIBUTE_UNUSED &operator<<(raw_ostream &OS, 430 const CHRStats &Stats) { 431 Stats.print(OS); 432 return OS; 433 } 434 435 static inline 436 raw_ostream &operator<<(raw_ostream &OS, const CHRScope &Scope) { 437 Scope.print(OS); 438 return OS; 439 } 440 441 static bool shouldApply(Function &F, ProfileSummaryInfo& PSI) { 442 if (ForceCHR) 443 return true; 444 445 if (!CHRModuleList.empty() || !CHRFunctionList.empty()) { 446 if (CHRModules.count(F.getParent()->getName())) 447 return true; 448 return CHRFunctions.count(F.getName()); 449 } 450 451 assert(PSI.hasProfileSummary() && "Empty PSI?"); 452 return PSI.isFunctionEntryHot(&F); 453 } 454 455 static void LLVM_ATTRIBUTE_UNUSED dumpIR(Function &F, const char *Label, 456 CHRStats *Stats) { 457 StringRef FuncName = F.getName(); 458 StringRef ModuleName = F.getParent()->getName(); 459 (void)(FuncName); // Unused in release build. 460 (void)(ModuleName); // Unused in release build. 461 CHR_DEBUG(dbgs() << "CHR IR dump " << Label << " " << ModuleName << " " 462 << FuncName); 463 if (Stats) 464 CHR_DEBUG(dbgs() << " " << *Stats); 465 CHR_DEBUG(dbgs() << "\n"); 466 CHR_DEBUG(F.dump()); 467 } 468 469 void CHRScope::print(raw_ostream &OS) const { 470 assert(RegInfos.size() > 0 && "Empty CHRScope"); 471 OS << "CHRScope["; 472 OS << RegInfos.size() << ", Regions["; 473 for (const RegInfo &RI : RegInfos) { 474 OS << RI.R->getNameStr(); 475 if (RI.HasBranch) 476 OS << " B"; 477 if (RI.Selects.size() > 0) 478 OS << " S" << RI.Selects.size(); 479 OS << ", "; 480 } 481 if (RegInfos[0].R->getParent()) { 482 OS << "], Parent " << RegInfos[0].R->getParent()->getNameStr(); 483 } else { 484 // top level region 485 OS << "]"; 486 } 487 OS << ", Subs["; 488 for (CHRScope *Sub : Subs) { 489 OS << *Sub << ", "; 490 } 491 OS << "]]"; 492 } 493 494 // Return true if the given instruction type can be hoisted by CHR. 495 static bool isHoistableInstructionType(Instruction *I) { 496 return isa<BinaryOperator>(I) || isa<CastInst>(I) || isa<SelectInst>(I) || 497 isa<GetElementPtrInst>(I) || isa<CmpInst>(I) || 498 isa<InsertElementInst>(I) || isa<ExtractElementInst>(I) || 499 isa<ShuffleVectorInst>(I) || isa<ExtractValueInst>(I) || 500 isa<InsertValueInst>(I); 501 } 502 503 // Return true if the given instruction can be hoisted by CHR. 504 static bool isHoistable(Instruction *I, DominatorTree &DT) { 505 if (!isHoistableInstructionType(I)) 506 return false; 507 return isSafeToSpeculativelyExecute(I, nullptr, &DT); 508 } 509 510 // Recursively traverse the use-def chains of the given value and return a set 511 // of the unhoistable base values defined within the scope (excluding the 512 // first-region entry block) or the (hoistable or unhoistable) base values that 513 // are defined outside (including the first-region entry block) of the 514 // scope. The returned set doesn't include constants. 515 static std::set<Value *> getBaseValues(Value *V, 516 DominatorTree &DT) { 517 std::set<Value *> Result; 518 if (auto *I = dyn_cast<Instruction>(V)) { 519 // We don't stop at a block that's not in the Scope because we would miss some 520 // instructions that are based on the same base values if we stop there. 521 if (!isHoistable(I, DT)) { 522 Result.insert(I); 523 return Result; 524 } 525 // I is hoistable above the Scope. 526 for (Value *Op : I->operands()) { 527 std::set<Value *> OpResult = getBaseValues(Op, DT); 528 Result.insert(OpResult.begin(), OpResult.end()); 529 } 530 return Result; 531 } 532 if (isa<Argument>(V)) { 533 Result.insert(V); 534 return Result; 535 } 536 // We don't include others like constants because those won't lead to any 537 // chance of folding of conditions (eg two bit checks merged into one check) 538 // after CHR. 539 return Result; // empty 540 } 541 542 // Return true if V is already hoisted or can be hoisted (along with its 543 // operands) above the insert point. When it returns true and HoistStops is 544 // non-null, the instructions to stop hoisting at through the use-def chains are 545 // inserted into HoistStops. 546 static bool 547 checkHoistValue(Value *V, Instruction *InsertPoint, DominatorTree &DT, 548 DenseSet<Instruction *> &Unhoistables, 549 DenseSet<Instruction *> *HoistStops) { 550 assert(InsertPoint && "Null InsertPoint"); 551 if (auto *I = dyn_cast<Instruction>(V)) { 552 assert(DT.getNode(I->getParent()) && "DT must contain I's parent block"); 553 assert(DT.getNode(InsertPoint->getParent()) && "DT must contain Destination"); 554 if (Unhoistables.count(I)) { 555 // Don't hoist if they are not to be hoisted. 556 return false; 557 } 558 if (DT.dominates(I, InsertPoint)) { 559 // We are already above the insert point. Stop here. 560 if (HoistStops) 561 HoistStops->insert(I); 562 return true; 563 } 564 // We aren't not above the insert point, check if we can hoist it above the 565 // insert point. 566 if (isHoistable(I, DT)) { 567 // Check operands first. 568 DenseSet<Instruction *> OpsHoistStops; 569 bool AllOpsHoisted = true; 570 for (Value *Op : I->operands()) { 571 if (!checkHoistValue(Op, InsertPoint, DT, Unhoistables, &OpsHoistStops)) { 572 AllOpsHoisted = false; 573 break; 574 } 575 } 576 if (AllOpsHoisted) { 577 CHR_DEBUG(dbgs() << "checkHoistValue " << *I << "\n"); 578 if (HoistStops) 579 HoistStops->insert(OpsHoistStops.begin(), OpsHoistStops.end()); 580 return true; 581 } 582 } 583 return false; 584 } 585 // Non-instructions are considered hoistable. 586 return true; 587 } 588 589 // Returns true and sets the true probability and false probability of an 590 // MD_prof metadata if it's well-formed. 591 static bool checkMDProf(MDNode *MD, BranchProbability &TrueProb, 592 BranchProbability &FalseProb) { 593 if (!MD) return false; 594 MDString *MDName = cast<MDString>(MD->getOperand(0)); 595 if (MDName->getString() != "branch_weights" || 596 MD->getNumOperands() != 3) 597 return false; 598 ConstantInt *TrueWeight = mdconst::extract<ConstantInt>(MD->getOperand(1)); 599 ConstantInt *FalseWeight = mdconst::extract<ConstantInt>(MD->getOperand(2)); 600 if (!TrueWeight || !FalseWeight) 601 return false; 602 uint64_t TrueWt = TrueWeight->getValue().getZExtValue(); 603 uint64_t FalseWt = FalseWeight->getValue().getZExtValue(); 604 uint64_t SumWt = TrueWt + FalseWt; 605 606 assert(SumWt >= TrueWt && SumWt >= FalseWt && 607 "Overflow calculating branch probabilities."); 608 609 TrueProb = BranchProbability::getBranchProbability(TrueWt, SumWt); 610 FalseProb = BranchProbability::getBranchProbability(FalseWt, SumWt); 611 return true; 612 } 613 614 static BranchProbability getCHRBiasThreshold() { 615 return BranchProbability::getBranchProbability( 616 static_cast<uint64_t>(CHRBiasThreshold * 1000000), 1000000); 617 } 618 619 // A helper for CheckBiasedBranch and CheckBiasedSelect. If TrueProb >= 620 // CHRBiasThreshold, put Key into TrueSet and return true. If FalseProb >= 621 // CHRBiasThreshold, put Key into FalseSet and return true. Otherwise, return 622 // false. 623 template <typename K, typename S, typename M> 624 static bool checkBias(K *Key, BranchProbability TrueProb, 625 BranchProbability FalseProb, S &TrueSet, S &FalseSet, 626 M &BiasMap) { 627 BranchProbability Threshold = getCHRBiasThreshold(); 628 if (TrueProb >= Threshold) { 629 TrueSet.insert(Key); 630 BiasMap[Key] = TrueProb; 631 return true; 632 } else if (FalseProb >= Threshold) { 633 FalseSet.insert(Key); 634 BiasMap[Key] = FalseProb; 635 return true; 636 } 637 return false; 638 } 639 640 // Returns true and insert a region into the right biased set and the map if the 641 // branch of the region is biased. 642 static bool checkBiasedBranch(BranchInst *BI, Region *R, 643 DenseSet<Region *> &TrueBiasedRegionsGlobal, 644 DenseSet<Region *> &FalseBiasedRegionsGlobal, 645 DenseMap<Region *, BranchProbability> &BranchBiasMap) { 646 if (!BI->isConditional()) 647 return false; 648 BranchProbability ThenProb, ElseProb; 649 if (!checkMDProf(BI->getMetadata(LLVMContext::MD_prof), 650 ThenProb, ElseProb)) 651 return false; 652 BasicBlock *IfThen = BI->getSuccessor(0); 653 BasicBlock *IfElse = BI->getSuccessor(1); 654 assert((IfThen == R->getExit() || IfElse == R->getExit()) && 655 IfThen != IfElse && 656 "Invariant from findScopes"); 657 if (IfThen == R->getExit()) { 658 // Swap them so that IfThen/ThenProb means going into the conditional code 659 // and IfElse/ElseProb means skipping it. 660 std::swap(IfThen, IfElse); 661 std::swap(ThenProb, ElseProb); 662 } 663 CHR_DEBUG(dbgs() << "BI " << *BI << " "); 664 CHR_DEBUG(dbgs() << "ThenProb " << ThenProb << " "); 665 CHR_DEBUG(dbgs() << "ElseProb " << ElseProb << "\n"); 666 return checkBias(R, ThenProb, ElseProb, 667 TrueBiasedRegionsGlobal, FalseBiasedRegionsGlobal, 668 BranchBiasMap); 669 } 670 671 // Returns true and insert a select into the right biased set and the map if the 672 // select is biased. 673 static bool checkBiasedSelect( 674 SelectInst *SI, Region *R, 675 DenseSet<SelectInst *> &TrueBiasedSelectsGlobal, 676 DenseSet<SelectInst *> &FalseBiasedSelectsGlobal, 677 DenseMap<SelectInst *, BranchProbability> &SelectBiasMap) { 678 BranchProbability TrueProb, FalseProb; 679 if (!checkMDProf(SI->getMetadata(LLVMContext::MD_prof), 680 TrueProb, FalseProb)) 681 return false; 682 CHR_DEBUG(dbgs() << "SI " << *SI << " "); 683 CHR_DEBUG(dbgs() << "TrueProb " << TrueProb << " "); 684 CHR_DEBUG(dbgs() << "FalseProb " << FalseProb << "\n"); 685 return checkBias(SI, TrueProb, FalseProb, 686 TrueBiasedSelectsGlobal, FalseBiasedSelectsGlobal, 687 SelectBiasMap); 688 } 689 690 // Returns the instruction at which to hoist the dependent condition values and 691 // insert the CHR branch for a region. This is the terminator branch in the 692 // entry block or the first select in the entry block, if any. 693 static Instruction* getBranchInsertPoint(RegInfo &RI) { 694 Region *R = RI.R; 695 BasicBlock *EntryBB = R->getEntry(); 696 // The hoist point is by default the terminator of the entry block, which is 697 // the same as the branch instruction if RI.HasBranch is true. 698 Instruction *HoistPoint = EntryBB->getTerminator(); 699 for (SelectInst *SI : RI.Selects) { 700 if (SI->getParent() == EntryBB) { 701 // Pick the first select in Selects in the entry block. Note Selects is 702 // sorted in the instruction order within a block (asserted below). 703 HoistPoint = SI; 704 break; 705 } 706 } 707 assert(HoistPoint && "Null HoistPoint"); 708 #ifndef NDEBUG 709 // Check that HoistPoint is the first one in Selects in the entry block, 710 // if any. 711 DenseSet<Instruction *> EntryBlockSelectSet; 712 for (SelectInst *SI : RI.Selects) { 713 if (SI->getParent() == EntryBB) { 714 EntryBlockSelectSet.insert(SI); 715 } 716 } 717 for (Instruction &I : *EntryBB) { 718 if (EntryBlockSelectSet.count(&I) > 0) { 719 assert(&I == HoistPoint && 720 "HoistPoint must be the first one in Selects"); 721 break; 722 } 723 } 724 #endif 725 return HoistPoint; 726 } 727 728 // Find a CHR scope in the given region. 729 CHRScope * CHR::findScope(Region *R) { 730 CHRScope *Result = nullptr; 731 BasicBlock *Entry = R->getEntry(); 732 BasicBlock *Exit = R->getExit(); // null if top level. 733 assert(Entry && "Entry must not be null"); 734 assert((Exit == nullptr) == (R->isTopLevelRegion()) && 735 "Only top level region has a null exit"); 736 if (Entry) 737 CHR_DEBUG(dbgs() << "Entry " << Entry->getName() << "\n"); 738 else 739 CHR_DEBUG(dbgs() << "Entry null\n"); 740 if (Exit) 741 CHR_DEBUG(dbgs() << "Exit " << Exit->getName() << "\n"); 742 else 743 CHR_DEBUG(dbgs() << "Exit null\n"); 744 // Exclude cases where Entry is part of a subregion (hence it doesn't belong 745 // to this region). 746 bool EntryInSubregion = RI.getRegionFor(Entry) != R; 747 if (EntryInSubregion) 748 return nullptr; 749 // Exclude loops 750 for (BasicBlock *Pred : predecessors(Entry)) 751 if (R->contains(Pred)) 752 return nullptr; 753 if (Exit) { 754 // Try to find an if-then block (check if R is an if-then). 755 // if (cond) { 756 // ... 757 // } 758 auto *BI = dyn_cast<BranchInst>(Entry->getTerminator()); 759 if (BI) 760 CHR_DEBUG(dbgs() << "BI.isConditional " << BI->isConditional() << "\n"); 761 else 762 CHR_DEBUG(dbgs() << "BI null\n"); 763 if (BI && BI->isConditional()) { 764 BasicBlock *S0 = BI->getSuccessor(0); 765 BasicBlock *S1 = BI->getSuccessor(1); 766 CHR_DEBUG(dbgs() << "S0 " << S0->getName() << "\n"); 767 CHR_DEBUG(dbgs() << "S1 " << S1->getName() << "\n"); 768 if (S0 != S1 && (S0 == Exit || S1 == Exit)) { 769 RegInfo RI(R); 770 RI.HasBranch = checkBiasedBranch( 771 BI, R, TrueBiasedRegionsGlobal, FalseBiasedRegionsGlobal, 772 BranchBiasMap); 773 Result = new CHRScope(RI); 774 Scopes.insert(Result); 775 CHR_DEBUG(dbgs() << "Found a region with a branch\n"); 776 ++Stats.NumBranches; 777 if (!RI.HasBranch) { 778 ORE.emit([&]() { 779 return OptimizationRemarkMissed(DEBUG_TYPE, "BranchNotBiased", BI) 780 << "Branch not biased"; 781 }); 782 } 783 } 784 } 785 } 786 { 787 // Try to look for selects in the direct child blocks (as opposed to in 788 // subregions) of R. 789 // ... 790 // if (..) { // Some subregion 791 // ... 792 // } 793 // if (..) { // Some subregion 794 // ... 795 // } 796 // ... 797 // a = cond ? b : c; 798 // ... 799 SmallVector<SelectInst *, 8> Selects; 800 for (RegionNode *E : R->elements()) { 801 if (E->isSubRegion()) 802 continue; 803 // This returns the basic block of E if E is a direct child of R (not a 804 // subregion.) 805 BasicBlock *BB = E->getEntry(); 806 // Need to push in the order to make it easier to find the first Select 807 // later. 808 for (Instruction &I : *BB) { 809 if (auto *SI = dyn_cast<SelectInst>(&I)) { 810 Selects.push_back(SI); 811 ++Stats.NumBranches; 812 } 813 } 814 } 815 if (Selects.size() > 0) { 816 auto AddSelects = [&](RegInfo &RI) { 817 for (auto *SI : Selects) 818 if (checkBiasedSelect(SI, RI.R, 819 TrueBiasedSelectsGlobal, 820 FalseBiasedSelectsGlobal, 821 SelectBiasMap)) 822 RI.Selects.push_back(SI); 823 else 824 ORE.emit([&]() { 825 return OptimizationRemarkMissed(DEBUG_TYPE, "SelectNotBiased", SI) 826 << "Select not biased"; 827 }); 828 }; 829 if (!Result) { 830 CHR_DEBUG(dbgs() << "Found a select-only region\n"); 831 RegInfo RI(R); 832 AddSelects(RI); 833 Result = new CHRScope(RI); 834 Scopes.insert(Result); 835 } else { 836 CHR_DEBUG(dbgs() << "Found select(s) in a region with a branch\n"); 837 AddSelects(Result->RegInfos[0]); 838 } 839 } 840 } 841 842 if (Result) { 843 checkScopeHoistable(Result); 844 } 845 return Result; 846 } 847 848 // Check that any of the branch and the selects in the region could be 849 // hoisted above the the CHR branch insert point (the most dominating of 850 // them, either the branch (at the end of the first block) or the first 851 // select in the first block). If the branch can't be hoisted, drop the 852 // selects in the first blocks. 853 // 854 // For example, for the following scope/region with selects, we want to insert 855 // the merged branch right before the first select in the first/entry block by 856 // hoisting c1, c2, c3, and c4. 857 // 858 // // Branch insert point here. 859 // a = c1 ? b : c; // Select 1 860 // d = c2 ? e : f; // Select 2 861 // if (c3) { // Branch 862 // ... 863 // c4 = foo() // A call. 864 // g = c4 ? h : i; // Select 3 865 // } 866 // 867 // But suppose we can't hoist c4 because it's dependent on the preceding 868 // call. Then, we drop Select 3. Furthermore, if we can't hoist c2, we also drop 869 // Select 2. If we can't hoist c3, we drop Selects 1 & 2. 870 void CHR::checkScopeHoistable(CHRScope *Scope) { 871 RegInfo &RI = Scope->RegInfos[0]; 872 Region *R = RI.R; 873 BasicBlock *EntryBB = R->getEntry(); 874 auto *Branch = RI.HasBranch ? 875 cast<BranchInst>(EntryBB->getTerminator()) : nullptr; 876 SmallVector<SelectInst *, 8> &Selects = RI.Selects; 877 if (RI.HasBranch || !Selects.empty()) { 878 Instruction *InsertPoint = getBranchInsertPoint(RI); 879 CHR_DEBUG(dbgs() << "InsertPoint " << *InsertPoint << "\n"); 880 // Avoid a data dependence from a select or a branch to a(nother) 881 // select. Note no instruction can't data-depend on a branch (a branch 882 // instruction doesn't produce a value). 883 DenseSet<Instruction *> Unhoistables; 884 // Initialize Unhoistables with the selects. 885 for (SelectInst *SI : Selects) { 886 Unhoistables.insert(SI); 887 } 888 // Remove Selects that can't be hoisted. 889 for (auto it = Selects.begin(); it != Selects.end(); ) { 890 SelectInst *SI = *it; 891 if (SI == InsertPoint) { 892 ++it; 893 continue; 894 } 895 bool IsHoistable = checkHoistValue(SI->getCondition(), InsertPoint, 896 DT, Unhoistables, nullptr); 897 if (!IsHoistable) { 898 CHR_DEBUG(dbgs() << "Dropping select " << *SI << "\n"); 899 ORE.emit([&]() { 900 return OptimizationRemarkMissed(DEBUG_TYPE, 901 "DropUnhoistableSelect", SI) 902 << "Dropped unhoistable select"; 903 }); 904 it = Selects.erase(it); 905 // Since we are dropping the select here, we also drop it from 906 // Unhoistables. 907 Unhoistables.erase(SI); 908 } else 909 ++it; 910 } 911 // Update InsertPoint after potentially removing selects. 912 InsertPoint = getBranchInsertPoint(RI); 913 CHR_DEBUG(dbgs() << "InsertPoint " << *InsertPoint << "\n"); 914 if (RI.HasBranch && InsertPoint != Branch) { 915 bool IsHoistable = checkHoistValue(Branch->getCondition(), InsertPoint, 916 DT, Unhoistables, nullptr); 917 if (!IsHoistable) { 918 // If the branch isn't hoistable, drop the selects in the entry 919 // block, preferring the branch, which makes the branch the hoist 920 // point. 921 assert(InsertPoint != Branch && "Branch must not be the hoist point"); 922 CHR_DEBUG(dbgs() << "Dropping selects in entry block \n"); 923 CHR_DEBUG( 924 for (SelectInst *SI : Selects) { 925 dbgs() << "SI " << *SI << "\n"; 926 }); 927 for (SelectInst *SI : Selects) { 928 ORE.emit([&]() { 929 return OptimizationRemarkMissed(DEBUG_TYPE, 930 "DropSelectUnhoistableBranch", SI) 931 << "Dropped select due to unhoistable branch"; 932 }); 933 } 934 Selects.erase(std::remove_if(Selects.begin(), Selects.end(), 935 [EntryBB](SelectInst *SI) { 936 return SI->getParent() == EntryBB; 937 }), Selects.end()); 938 Unhoistables.clear(); 939 InsertPoint = Branch; 940 } 941 } 942 CHR_DEBUG(dbgs() << "InsertPoint " << *InsertPoint << "\n"); 943 #ifndef NDEBUG 944 if (RI.HasBranch) { 945 assert(!DT.dominates(Branch, InsertPoint) && 946 "Branch can't be already above the hoist point"); 947 assert(checkHoistValue(Branch->getCondition(), InsertPoint, 948 DT, Unhoistables, nullptr) && 949 "checkHoistValue for branch"); 950 } 951 for (auto *SI : Selects) { 952 assert(!DT.dominates(SI, InsertPoint) && 953 "SI can't be already above the hoist point"); 954 assert(checkHoistValue(SI->getCondition(), InsertPoint, DT, 955 Unhoistables, nullptr) && 956 "checkHoistValue for selects"); 957 } 958 CHR_DEBUG(dbgs() << "Result\n"); 959 if (RI.HasBranch) { 960 CHR_DEBUG(dbgs() << "BI " << *Branch << "\n"); 961 } 962 for (auto *SI : Selects) { 963 CHR_DEBUG(dbgs() << "SI " << *SI << "\n"); 964 } 965 #endif 966 } 967 } 968 969 // Traverse the region tree, find all nested scopes and merge them if possible. 970 CHRScope * CHR::findScopes(Region *R, Region *NextRegion, Region *ParentRegion, 971 SmallVectorImpl<CHRScope *> &Scopes) { 972 CHR_DEBUG(dbgs() << "findScopes " << R->getNameStr() << "\n"); 973 CHRScope *Result = findScope(R); 974 // Visit subscopes. 975 CHRScope *ConsecutiveSubscope = nullptr; 976 SmallVector<CHRScope *, 8> Subscopes; 977 for (auto It = R->begin(); It != R->end(); ++It) { 978 const std::unique_ptr<Region> &SubR = *It; 979 auto NextIt = std::next(It); 980 Region *NextSubR = NextIt != R->end() ? NextIt->get() : nullptr; 981 CHR_DEBUG(dbgs() << "Looking at subregion " << SubR.get()->getNameStr() 982 << "\n"); 983 CHRScope *SubCHRScope = findScopes(SubR.get(), NextSubR, R, Scopes); 984 if (SubCHRScope) { 985 CHR_DEBUG(dbgs() << "Subregion Scope " << *SubCHRScope << "\n"); 986 } else { 987 CHR_DEBUG(dbgs() << "Subregion Scope null\n"); 988 } 989 if (SubCHRScope) { 990 if (!ConsecutiveSubscope) 991 ConsecutiveSubscope = SubCHRScope; 992 else if (!ConsecutiveSubscope->appendable(SubCHRScope)) { 993 Subscopes.push_back(ConsecutiveSubscope); 994 ConsecutiveSubscope = SubCHRScope; 995 } else 996 ConsecutiveSubscope->append(SubCHRScope); 997 } else { 998 if (ConsecutiveSubscope) { 999 Subscopes.push_back(ConsecutiveSubscope); 1000 } 1001 ConsecutiveSubscope = nullptr; 1002 } 1003 } 1004 if (ConsecutiveSubscope) { 1005 Subscopes.push_back(ConsecutiveSubscope); 1006 } 1007 for (CHRScope *Sub : Subscopes) { 1008 if (Result) { 1009 // Combine it with the parent. 1010 Result->addSub(Sub); 1011 } else { 1012 // Push Subscopes as they won't be combined with the parent. 1013 Scopes.push_back(Sub); 1014 } 1015 } 1016 return Result; 1017 } 1018 1019 static DenseSet<Value *> getCHRConditionValuesForRegion(RegInfo &RI) { 1020 DenseSet<Value *> ConditionValues; 1021 if (RI.HasBranch) { 1022 auto *BI = cast<BranchInst>(RI.R->getEntry()->getTerminator()); 1023 ConditionValues.insert(BI->getCondition()); 1024 } 1025 for (SelectInst *SI : RI.Selects) { 1026 ConditionValues.insert(SI->getCondition()); 1027 } 1028 return ConditionValues; 1029 } 1030 1031 1032 // Determine whether to split a scope depending on the sets of the branch 1033 // condition values of the previous region and the current region. We split 1034 // (return true) it if 1) the condition values of the inner/lower scope can't be 1035 // hoisted up to the outer/upper scope, or 2) the two sets of the condition 1036 // values have an empty intersection (because the combined branch conditions 1037 // won't probably lead to a simpler combined condition). 1038 static bool shouldSplit(Instruction *InsertPoint, 1039 DenseSet<Value *> &PrevConditionValues, 1040 DenseSet<Value *> &ConditionValues, 1041 DominatorTree &DT, 1042 DenseSet<Instruction *> &Unhoistables) { 1043 CHR_DEBUG( 1044 dbgs() << "shouldSplit " << *InsertPoint << " PrevConditionValues "; 1045 for (Value *V : PrevConditionValues) { 1046 dbgs() << *V << ", "; 1047 } 1048 dbgs() << " ConditionValues "; 1049 for (Value *V : ConditionValues) { 1050 dbgs() << *V << ", "; 1051 } 1052 dbgs() << "\n"); 1053 assert(InsertPoint && "Null InsertPoint"); 1054 // If any of Bases isn't hoistable to the hoist point, split. 1055 for (Value *V : ConditionValues) { 1056 if (!checkHoistValue(V, InsertPoint, DT, Unhoistables, nullptr)) { 1057 CHR_DEBUG(dbgs() << "Split. checkHoistValue false " << *V << "\n"); 1058 return true; // Not hoistable, split. 1059 } 1060 } 1061 // If PrevConditionValues or ConditionValues is empty, don't split to avoid 1062 // unnecessary splits at scopes with no branch/selects. If 1063 // PrevConditionValues and ConditionValues don't intersect at all, split. 1064 if (!PrevConditionValues.empty() && !ConditionValues.empty()) { 1065 // Use std::set as DenseSet doesn't work with set_intersection. 1066 std::set<Value *> PrevBases, Bases; 1067 for (Value *V : PrevConditionValues) { 1068 std::set<Value *> BaseValues = getBaseValues(V, DT); 1069 PrevBases.insert(BaseValues.begin(), BaseValues.end()); 1070 } 1071 for (Value *V : ConditionValues) { 1072 std::set<Value *> BaseValues = getBaseValues(V, DT); 1073 Bases.insert(BaseValues.begin(), BaseValues.end()); 1074 } 1075 CHR_DEBUG( 1076 dbgs() << "PrevBases "; 1077 for (Value *V : PrevBases) { 1078 dbgs() << *V << ", "; 1079 } 1080 dbgs() << " Bases "; 1081 for (Value *V : Bases) { 1082 dbgs() << *V << ", "; 1083 } 1084 dbgs() << "\n"); 1085 std::set<Value *> Intersection; 1086 std::set_intersection(PrevBases.begin(), PrevBases.end(), 1087 Bases.begin(), Bases.end(), 1088 std::inserter(Intersection, Intersection.begin())); 1089 if (Intersection.empty()) { 1090 // Empty intersection, split. 1091 CHR_DEBUG(dbgs() << "Split. Intersection empty\n"); 1092 return true; 1093 } 1094 } 1095 CHR_DEBUG(dbgs() << "No split\n"); 1096 return false; // Don't split. 1097 } 1098 1099 static void getSelectsInScope(CHRScope *Scope, 1100 DenseSet<Instruction *> &Output) { 1101 for (RegInfo &RI : Scope->RegInfos) 1102 for (SelectInst *SI : RI.Selects) 1103 Output.insert(SI); 1104 for (CHRScope *Sub : Scope->Subs) 1105 getSelectsInScope(Sub, Output); 1106 } 1107 1108 void CHR::splitScopes(SmallVectorImpl<CHRScope *> &Input, 1109 SmallVectorImpl<CHRScope *> &Output) { 1110 for (CHRScope *Scope : Input) { 1111 assert(!Scope->BranchInsertPoint && 1112 "BranchInsertPoint must not be set"); 1113 DenseSet<Instruction *> Unhoistables; 1114 getSelectsInScope(Scope, Unhoistables); 1115 splitScope(Scope, nullptr, nullptr, nullptr, Output, Unhoistables); 1116 } 1117 #ifndef NDEBUG 1118 for (CHRScope *Scope : Output) { 1119 assert(Scope->BranchInsertPoint && "BranchInsertPoint must be set"); 1120 } 1121 #endif 1122 } 1123 1124 SmallVector<CHRScope *, 8> CHR::splitScope( 1125 CHRScope *Scope, 1126 CHRScope *Outer, 1127 DenseSet<Value *> *OuterConditionValues, 1128 Instruction *OuterInsertPoint, 1129 SmallVectorImpl<CHRScope *> &Output, 1130 DenseSet<Instruction *> &Unhoistables) { 1131 if (Outer) { 1132 assert(OuterConditionValues && "Null OuterConditionValues"); 1133 assert(OuterInsertPoint && "Null OuterInsertPoint"); 1134 } 1135 bool PrevSplitFromOuter = true; 1136 DenseSet<Value *> PrevConditionValues; 1137 Instruction *PrevInsertPoint = nullptr; 1138 SmallVector<CHRScope *, 8> Splits; 1139 SmallVector<bool, 8> SplitsSplitFromOuter; 1140 SmallVector<DenseSet<Value *>, 8> SplitsConditionValues; 1141 SmallVector<Instruction *, 8> SplitsInsertPoints; 1142 SmallVector<RegInfo, 8> RegInfos(Scope->RegInfos); // Copy 1143 for (RegInfo &RI : RegInfos) { 1144 Instruction *InsertPoint = getBranchInsertPoint(RI); 1145 DenseSet<Value *> ConditionValues = getCHRConditionValuesForRegion(RI); 1146 CHR_DEBUG( 1147 dbgs() << "ConditionValues "; 1148 for (Value *V : ConditionValues) { 1149 dbgs() << *V << ", "; 1150 } 1151 dbgs() << "\n"); 1152 if (RI.R == RegInfos[0].R) { 1153 // First iteration. Check to see if we should split from the outer. 1154 if (Outer) { 1155 CHR_DEBUG(dbgs() << "Outer " << *Outer << "\n"); 1156 CHR_DEBUG(dbgs() << "Should split from outer at " 1157 << RI.R->getNameStr() << "\n"); 1158 if (shouldSplit(OuterInsertPoint, *OuterConditionValues, 1159 ConditionValues, DT, Unhoistables)) { 1160 PrevConditionValues = ConditionValues; 1161 PrevInsertPoint = InsertPoint; 1162 ORE.emit([&]() { 1163 return OptimizationRemarkMissed(DEBUG_TYPE, 1164 "SplitScopeFromOuter", 1165 RI.R->getEntry()->getTerminator()) 1166 << "Split scope from outer due to unhoistable branch/select " 1167 << "and/or lack of common condition values"; 1168 }); 1169 } else { 1170 // Not splitting from the outer. Use the outer bases and insert 1171 // point. Union the bases. 1172 PrevSplitFromOuter = false; 1173 PrevConditionValues = *OuterConditionValues; 1174 PrevConditionValues.insert(ConditionValues.begin(), 1175 ConditionValues.end()); 1176 PrevInsertPoint = OuterInsertPoint; 1177 } 1178 } else { 1179 CHR_DEBUG(dbgs() << "Outer null\n"); 1180 PrevConditionValues = ConditionValues; 1181 PrevInsertPoint = InsertPoint; 1182 } 1183 } else { 1184 CHR_DEBUG(dbgs() << "Should split from prev at " 1185 << RI.R->getNameStr() << "\n"); 1186 if (shouldSplit(PrevInsertPoint, PrevConditionValues, ConditionValues, 1187 DT, Unhoistables)) { 1188 CHRScope *Tail = Scope->split(RI.R); 1189 Scopes.insert(Tail); 1190 Splits.push_back(Scope); 1191 SplitsSplitFromOuter.push_back(PrevSplitFromOuter); 1192 SplitsConditionValues.push_back(PrevConditionValues); 1193 SplitsInsertPoints.push_back(PrevInsertPoint); 1194 Scope = Tail; 1195 PrevConditionValues = ConditionValues; 1196 PrevInsertPoint = InsertPoint; 1197 PrevSplitFromOuter = true; 1198 ORE.emit([&]() { 1199 return OptimizationRemarkMissed(DEBUG_TYPE, 1200 "SplitScopeFromPrev", 1201 RI.R->getEntry()->getTerminator()) 1202 << "Split scope from previous due to unhoistable branch/select " 1203 << "and/or lack of common condition values"; 1204 }); 1205 } else { 1206 // Not splitting. Union the bases. Keep the hoist point. 1207 PrevConditionValues.insert(ConditionValues.begin(), ConditionValues.end()); 1208 } 1209 } 1210 } 1211 Splits.push_back(Scope); 1212 SplitsSplitFromOuter.push_back(PrevSplitFromOuter); 1213 SplitsConditionValues.push_back(PrevConditionValues); 1214 assert(PrevInsertPoint && "Null PrevInsertPoint"); 1215 SplitsInsertPoints.push_back(PrevInsertPoint); 1216 assert(Splits.size() == SplitsConditionValues.size() && 1217 Splits.size() == SplitsSplitFromOuter.size() && 1218 Splits.size() == SplitsInsertPoints.size() && "Mismatching sizes"); 1219 for (size_t I = 0; I < Splits.size(); ++I) { 1220 CHRScope *Split = Splits[I]; 1221 DenseSet<Value *> &SplitConditionValues = SplitsConditionValues[I]; 1222 Instruction *SplitInsertPoint = SplitsInsertPoints[I]; 1223 SmallVector<CHRScope *, 8> NewSubs; 1224 DenseSet<Instruction *> SplitUnhoistables; 1225 getSelectsInScope(Split, SplitUnhoistables); 1226 for (CHRScope *Sub : Split->Subs) { 1227 SmallVector<CHRScope *, 8> SubSplits = splitScope( 1228 Sub, Split, &SplitConditionValues, SplitInsertPoint, Output, 1229 SplitUnhoistables); 1230 NewSubs.insert(NewSubs.end(), SubSplits.begin(), SubSplits.end()); 1231 } 1232 Split->Subs = NewSubs; 1233 } 1234 SmallVector<CHRScope *, 8> Result; 1235 for (size_t I = 0; I < Splits.size(); ++I) { 1236 CHRScope *Split = Splits[I]; 1237 if (SplitsSplitFromOuter[I]) { 1238 // Split from the outer. 1239 Output.push_back(Split); 1240 Split->BranchInsertPoint = SplitsInsertPoints[I]; 1241 CHR_DEBUG(dbgs() << "BranchInsertPoint " << *SplitsInsertPoints[I] 1242 << "\n"); 1243 } else { 1244 // Connected to the outer. 1245 Result.push_back(Split); 1246 } 1247 } 1248 if (!Outer) 1249 assert(Result.empty() && 1250 "If no outer (top-level), must return no nested ones"); 1251 return Result; 1252 } 1253 1254 void CHR::classifyBiasedScopes(SmallVectorImpl<CHRScope *> &Scopes) { 1255 for (CHRScope *Scope : Scopes) { 1256 assert(Scope->TrueBiasedRegions.empty() && Scope->FalseBiasedRegions.empty() && "Empty"); 1257 classifyBiasedScopes(Scope, Scope); 1258 CHR_DEBUG( 1259 dbgs() << "classifyBiasedScopes " << *Scope << "\n"; 1260 dbgs() << "TrueBiasedRegions "; 1261 for (Region *R : Scope->TrueBiasedRegions) { 1262 dbgs() << R->getNameStr() << ", "; 1263 } 1264 dbgs() << "\n"; 1265 dbgs() << "FalseBiasedRegions "; 1266 for (Region *R : Scope->FalseBiasedRegions) { 1267 dbgs() << R->getNameStr() << ", "; 1268 } 1269 dbgs() << "\n"; 1270 dbgs() << "TrueBiasedSelects "; 1271 for (SelectInst *SI : Scope->TrueBiasedSelects) { 1272 dbgs() << *SI << ", "; 1273 } 1274 dbgs() << "\n"; 1275 dbgs() << "FalseBiasedSelects "; 1276 for (SelectInst *SI : Scope->FalseBiasedSelects) { 1277 dbgs() << *SI << ", "; 1278 } 1279 dbgs() << "\n";); 1280 } 1281 } 1282 1283 void CHR::classifyBiasedScopes(CHRScope *Scope, CHRScope *OutermostScope) { 1284 for (RegInfo &RI : Scope->RegInfos) { 1285 if (RI.HasBranch) { 1286 Region *R = RI.R; 1287 if (TrueBiasedRegionsGlobal.count(R) > 0) 1288 OutermostScope->TrueBiasedRegions.insert(R); 1289 else if (FalseBiasedRegionsGlobal.count(R) > 0) 1290 OutermostScope->FalseBiasedRegions.insert(R); 1291 else 1292 llvm_unreachable("Must be biased"); 1293 } 1294 for (SelectInst *SI : RI.Selects) { 1295 if (TrueBiasedSelectsGlobal.count(SI) > 0) 1296 OutermostScope->TrueBiasedSelects.insert(SI); 1297 else if (FalseBiasedSelectsGlobal.count(SI) > 0) 1298 OutermostScope->FalseBiasedSelects.insert(SI); 1299 else 1300 llvm_unreachable("Must be biased"); 1301 } 1302 } 1303 for (CHRScope *Sub : Scope->Subs) { 1304 classifyBiasedScopes(Sub, OutermostScope); 1305 } 1306 } 1307 1308 static bool hasAtLeastTwoBiasedBranches(CHRScope *Scope) { 1309 unsigned NumBiased = Scope->TrueBiasedRegions.size() + 1310 Scope->FalseBiasedRegions.size() + 1311 Scope->TrueBiasedSelects.size() + 1312 Scope->FalseBiasedSelects.size(); 1313 return NumBiased >= CHRMergeThreshold; 1314 } 1315 1316 void CHR::filterScopes(SmallVectorImpl<CHRScope *> &Input, 1317 SmallVectorImpl<CHRScope *> &Output) { 1318 for (CHRScope *Scope : Input) { 1319 // Filter out the ones with only one region and no subs. 1320 if (!hasAtLeastTwoBiasedBranches(Scope)) { 1321 CHR_DEBUG(dbgs() << "Filtered out by biased branches truthy-regions " 1322 << Scope->TrueBiasedRegions.size() 1323 << " falsy-regions " << Scope->FalseBiasedRegions.size() 1324 << " true-selects " << Scope->TrueBiasedSelects.size() 1325 << " false-selects " << Scope->FalseBiasedSelects.size() << "\n"); 1326 ORE.emit([&]() { 1327 return OptimizationRemarkMissed( 1328 DEBUG_TYPE, 1329 "DropScopeWithOneBranchOrSelect", 1330 Scope->RegInfos[0].R->getEntry()->getTerminator()) 1331 << "Drop scope with < " 1332 << ore::NV("CHRMergeThreshold", CHRMergeThreshold) 1333 << " biased branch(es) or select(s)"; 1334 }); 1335 continue; 1336 } 1337 Output.push_back(Scope); 1338 } 1339 } 1340 1341 void CHR::setCHRRegions(SmallVectorImpl<CHRScope *> &Input, 1342 SmallVectorImpl<CHRScope *> &Output) { 1343 for (CHRScope *Scope : Input) { 1344 assert(Scope->HoistStopMap.empty() && Scope->CHRRegions.empty() && 1345 "Empty"); 1346 setCHRRegions(Scope, Scope); 1347 Output.push_back(Scope); 1348 CHR_DEBUG( 1349 dbgs() << "setCHRRegions HoistStopMap " << *Scope << "\n"; 1350 for (auto pair : Scope->HoistStopMap) { 1351 Region *R = pair.first; 1352 dbgs() << "Region " << R->getNameStr() << "\n"; 1353 for (Instruction *I : pair.second) { 1354 dbgs() << "HoistStop " << *I << "\n"; 1355 } 1356 } 1357 dbgs() << "CHRRegions" << "\n"; 1358 for (RegInfo &RI : Scope->CHRRegions) { 1359 dbgs() << RI.R->getNameStr() << "\n"; 1360 }); 1361 } 1362 } 1363 1364 void CHR::setCHRRegions(CHRScope *Scope, CHRScope *OutermostScope) { 1365 DenseSet<Instruction *> Unhoistables; 1366 // Put the biased selects in Unhoistables because they should stay where they 1367 // are and constant-folded after CHR (in case one biased select or a branch 1368 // can depend on another biased select.) 1369 for (RegInfo &RI : Scope->RegInfos) { 1370 for (SelectInst *SI : RI.Selects) { 1371 Unhoistables.insert(SI); 1372 } 1373 } 1374 Instruction *InsertPoint = OutermostScope->BranchInsertPoint; 1375 for (RegInfo &RI : Scope->RegInfos) { 1376 Region *R = RI.R; 1377 DenseSet<Instruction *> HoistStops; 1378 bool IsHoisted = false; 1379 if (RI.HasBranch) { 1380 assert((OutermostScope->TrueBiasedRegions.count(R) > 0 || 1381 OutermostScope->FalseBiasedRegions.count(R) > 0) && 1382 "Must be truthy or falsy"); 1383 auto *BI = cast<BranchInst>(R->getEntry()->getTerminator()); 1384 // Note checkHoistValue fills in HoistStops. 1385 bool IsHoistable = checkHoistValue(BI->getCondition(), InsertPoint, DT, 1386 Unhoistables, &HoistStops); 1387 assert(IsHoistable && "Must be hoistable"); 1388 (void)(IsHoistable); // Unused in release build 1389 IsHoisted = true; 1390 } 1391 for (SelectInst *SI : RI.Selects) { 1392 assert((OutermostScope->TrueBiasedSelects.count(SI) > 0 || 1393 OutermostScope->FalseBiasedSelects.count(SI) > 0) && 1394 "Must be true or false biased"); 1395 // Note checkHoistValue fills in HoistStops. 1396 bool IsHoistable = checkHoistValue(SI->getCondition(), InsertPoint, DT, 1397 Unhoistables, &HoistStops); 1398 assert(IsHoistable && "Must be hoistable"); 1399 (void)(IsHoistable); // Unused in release build 1400 IsHoisted = true; 1401 } 1402 if (IsHoisted) { 1403 OutermostScope->CHRRegions.push_back(RI); 1404 OutermostScope->HoistStopMap[R] = HoistStops; 1405 } 1406 } 1407 for (CHRScope *Sub : Scope->Subs) 1408 setCHRRegions(Sub, OutermostScope); 1409 } 1410 1411 bool CHRScopeSorter(CHRScope *Scope1, CHRScope *Scope2) { 1412 return Scope1->RegInfos[0].R->getDepth() < Scope2->RegInfos[0].R->getDepth(); 1413 } 1414 1415 void CHR::sortScopes(SmallVectorImpl<CHRScope *> &Input, 1416 SmallVectorImpl<CHRScope *> &Output) { 1417 Output.resize(Input.size()); 1418 llvm::copy(Input, Output.begin()); 1419 llvm::stable_sort(Output, CHRScopeSorter); 1420 } 1421 1422 // Return true if V is already hoisted or was hoisted (along with its operands) 1423 // to the insert point. 1424 static void hoistValue(Value *V, Instruction *HoistPoint, Region *R, 1425 HoistStopMapTy &HoistStopMap, 1426 DenseSet<Instruction *> &HoistedSet, 1427 DenseSet<PHINode *> &TrivialPHIs, 1428 DominatorTree &DT) { 1429 auto IT = HoistStopMap.find(R); 1430 assert(IT != HoistStopMap.end() && "Region must be in hoist stop map"); 1431 DenseSet<Instruction *> &HoistStops = IT->second; 1432 if (auto *I = dyn_cast<Instruction>(V)) { 1433 if (I == HoistPoint) 1434 return; 1435 if (HoistStops.count(I)) 1436 return; 1437 if (auto *PN = dyn_cast<PHINode>(I)) 1438 if (TrivialPHIs.count(PN)) 1439 // The trivial phi inserted by the previous CHR scope could replace a 1440 // non-phi in HoistStops. Note that since this phi is at the exit of a 1441 // previous CHR scope, which dominates this scope, it's safe to stop 1442 // hoisting there. 1443 return; 1444 if (HoistedSet.count(I)) 1445 // Already hoisted, return. 1446 return; 1447 assert(isHoistableInstructionType(I) && "Unhoistable instruction type"); 1448 assert(DT.getNode(I->getParent()) && "DT must contain I's block"); 1449 assert(DT.getNode(HoistPoint->getParent()) && 1450 "DT must contain HoistPoint block"); 1451 if (DT.dominates(I, HoistPoint)) 1452 // We are already above the hoist point. Stop here. This may be necessary 1453 // when multiple scopes would independently hoist the same 1454 // instruction. Since an outer (dominating) scope would hoist it to its 1455 // entry before an inner (dominated) scope would to its entry, the inner 1456 // scope may see the instruction already hoisted, in which case it 1457 // potentially wrong for the inner scope to hoist it and could cause bad 1458 // IR (non-dominating def), but safe to skip hoisting it instead because 1459 // it's already in a block that dominates the inner scope. 1460 return; 1461 for (Value *Op : I->operands()) { 1462 hoistValue(Op, HoistPoint, R, HoistStopMap, HoistedSet, TrivialPHIs, DT); 1463 } 1464 I->moveBefore(HoistPoint); 1465 HoistedSet.insert(I); 1466 CHR_DEBUG(dbgs() << "hoistValue " << *I << "\n"); 1467 } 1468 } 1469 1470 // Hoist the dependent condition values of the branches and the selects in the 1471 // scope to the insert point. 1472 static void hoistScopeConditions(CHRScope *Scope, Instruction *HoistPoint, 1473 DenseSet<PHINode *> &TrivialPHIs, 1474 DominatorTree &DT) { 1475 DenseSet<Instruction *> HoistedSet; 1476 for (const RegInfo &RI : Scope->CHRRegions) { 1477 Region *R = RI.R; 1478 bool IsTrueBiased = Scope->TrueBiasedRegions.count(R); 1479 bool IsFalseBiased = Scope->FalseBiasedRegions.count(R); 1480 if (RI.HasBranch && (IsTrueBiased || IsFalseBiased)) { 1481 auto *BI = cast<BranchInst>(R->getEntry()->getTerminator()); 1482 hoistValue(BI->getCondition(), HoistPoint, R, Scope->HoistStopMap, 1483 HoistedSet, TrivialPHIs, DT); 1484 } 1485 for (SelectInst *SI : RI.Selects) { 1486 bool IsTrueBiased = Scope->TrueBiasedSelects.count(SI); 1487 bool IsFalseBiased = Scope->FalseBiasedSelects.count(SI); 1488 if (!(IsTrueBiased || IsFalseBiased)) 1489 continue; 1490 hoistValue(SI->getCondition(), HoistPoint, R, Scope->HoistStopMap, 1491 HoistedSet, TrivialPHIs, DT); 1492 } 1493 } 1494 } 1495 1496 // Negate the predicate if an ICmp if it's used only by branches or selects by 1497 // swapping the operands of the branches or the selects. Returns true if success. 1498 static bool negateICmpIfUsedByBranchOrSelectOnly(ICmpInst *ICmp, 1499 Instruction *ExcludedUser, 1500 CHRScope *Scope) { 1501 for (User *U : ICmp->users()) { 1502 if (U == ExcludedUser) 1503 continue; 1504 if (isa<BranchInst>(U) && cast<BranchInst>(U)->isConditional()) 1505 continue; 1506 if (isa<SelectInst>(U) && cast<SelectInst>(U)->getCondition() == ICmp) 1507 continue; 1508 return false; 1509 } 1510 for (User *U : ICmp->users()) { 1511 if (U == ExcludedUser) 1512 continue; 1513 if (auto *BI = dyn_cast<BranchInst>(U)) { 1514 assert(BI->isConditional() && "Must be conditional"); 1515 BI->swapSuccessors(); 1516 // Don't need to swap this in terms of 1517 // TrueBiasedRegions/FalseBiasedRegions because true-based/false-based 1518 // mean whehter the branch is likely go into the if-then rather than 1519 // successor0/successor1 and because we can tell which edge is the then or 1520 // the else one by comparing the destination to the region exit block. 1521 continue; 1522 } 1523 if (auto *SI = dyn_cast<SelectInst>(U)) { 1524 // Swap operands 1525 Value *TrueValue = SI->getTrueValue(); 1526 Value *FalseValue = SI->getFalseValue(); 1527 SI->setTrueValue(FalseValue); 1528 SI->setFalseValue(TrueValue); 1529 SI->swapProfMetadata(); 1530 if (Scope->TrueBiasedSelects.count(SI)) { 1531 assert(Scope->FalseBiasedSelects.count(SI) == 0 && 1532 "Must not be already in"); 1533 Scope->FalseBiasedSelects.insert(SI); 1534 } else if (Scope->FalseBiasedSelects.count(SI)) { 1535 assert(Scope->TrueBiasedSelects.count(SI) == 0 && 1536 "Must not be already in"); 1537 Scope->TrueBiasedSelects.insert(SI); 1538 } 1539 continue; 1540 } 1541 llvm_unreachable("Must be a branch or a select"); 1542 } 1543 ICmp->setPredicate(CmpInst::getInversePredicate(ICmp->getPredicate())); 1544 return true; 1545 } 1546 1547 // A helper for transformScopes. Insert a trivial phi at the scope exit block 1548 // for a value that's defined in the scope but used outside it (meaning it's 1549 // alive at the exit block). 1550 static void insertTrivialPHIs(CHRScope *Scope, 1551 BasicBlock *EntryBlock, BasicBlock *ExitBlock, 1552 DenseSet<PHINode *> &TrivialPHIs) { 1553 DenseSet<BasicBlock *> BlocksInScopeSet; 1554 SmallVector<BasicBlock *, 8> BlocksInScopeVec; 1555 for (RegInfo &RI : Scope->RegInfos) { 1556 for (BasicBlock *BB : RI.R->blocks()) { // This includes the blocks in the 1557 // sub-Scopes. 1558 BlocksInScopeSet.insert(BB); 1559 BlocksInScopeVec.push_back(BB); 1560 } 1561 } 1562 CHR_DEBUG( 1563 dbgs() << "Inserting redudant phis\n"; 1564 for (BasicBlock *BB : BlocksInScopeVec) { 1565 dbgs() << "BlockInScope " << BB->getName() << "\n"; 1566 }); 1567 for (BasicBlock *BB : BlocksInScopeVec) { 1568 for (Instruction &I : *BB) { 1569 SmallVector<Instruction *, 8> Users; 1570 for (User *U : I.users()) { 1571 if (auto *UI = dyn_cast<Instruction>(U)) { 1572 if (BlocksInScopeSet.count(UI->getParent()) == 0 && 1573 // Unless there's already a phi for I at the exit block. 1574 !(isa<PHINode>(UI) && UI->getParent() == ExitBlock)) { 1575 CHR_DEBUG(dbgs() << "V " << I << "\n"); 1576 CHR_DEBUG(dbgs() << "Used outside scope by user " << *UI << "\n"); 1577 Users.push_back(UI); 1578 } else if (UI->getParent() == EntryBlock && isa<PHINode>(UI)) { 1579 // There's a loop backedge from a block that's dominated by this 1580 // scope to the entry block. 1581 CHR_DEBUG(dbgs() << "V " << I << "\n"); 1582 CHR_DEBUG(dbgs() 1583 << "Used at entry block (for a back edge) by a phi user " 1584 << *UI << "\n"); 1585 Users.push_back(UI); 1586 } 1587 } 1588 } 1589 if (Users.size() > 0) { 1590 // Insert a trivial phi for I (phi [&I, P0], [&I, P1], ...) at 1591 // ExitBlock. Replace I with the new phi in UI unless UI is another 1592 // phi at ExitBlock. 1593 unsigned PredCount = std::distance(pred_begin(ExitBlock), 1594 pred_end(ExitBlock)); 1595 PHINode *PN = PHINode::Create(I.getType(), PredCount, "", 1596 &ExitBlock->front()); 1597 for (BasicBlock *Pred : predecessors(ExitBlock)) { 1598 PN->addIncoming(&I, Pred); 1599 } 1600 TrivialPHIs.insert(PN); 1601 CHR_DEBUG(dbgs() << "Insert phi " << *PN << "\n"); 1602 for (Instruction *UI : Users) { 1603 for (unsigned J = 0, NumOps = UI->getNumOperands(); J < NumOps; ++J) { 1604 if (UI->getOperand(J) == &I) { 1605 UI->setOperand(J, PN); 1606 } 1607 } 1608 CHR_DEBUG(dbgs() << "Updated user " << *UI << "\n"); 1609 } 1610 } 1611 } 1612 } 1613 } 1614 1615 // Assert that all the CHR regions of the scope have a biased branch or select. 1616 static void LLVM_ATTRIBUTE_UNUSED 1617 assertCHRRegionsHaveBiasedBranchOrSelect(CHRScope *Scope) { 1618 #ifndef NDEBUG 1619 auto HasBiasedBranchOrSelect = [](RegInfo &RI, CHRScope *Scope) { 1620 if (Scope->TrueBiasedRegions.count(RI.R) || 1621 Scope->FalseBiasedRegions.count(RI.R)) 1622 return true; 1623 for (SelectInst *SI : RI.Selects) 1624 if (Scope->TrueBiasedSelects.count(SI) || 1625 Scope->FalseBiasedSelects.count(SI)) 1626 return true; 1627 return false; 1628 }; 1629 for (RegInfo &RI : Scope->CHRRegions) { 1630 assert(HasBiasedBranchOrSelect(RI, Scope) && 1631 "Must have biased branch or select"); 1632 } 1633 #endif 1634 } 1635 1636 // Assert that all the condition values of the biased branches and selects have 1637 // been hoisted to the pre-entry block or outside of the scope. 1638 static void LLVM_ATTRIBUTE_UNUSED assertBranchOrSelectConditionHoisted( 1639 CHRScope *Scope, BasicBlock *PreEntryBlock) { 1640 CHR_DEBUG(dbgs() << "Biased regions condition values \n"); 1641 for (RegInfo &RI : Scope->CHRRegions) { 1642 Region *R = RI.R; 1643 bool IsTrueBiased = Scope->TrueBiasedRegions.count(R); 1644 bool IsFalseBiased = Scope->FalseBiasedRegions.count(R); 1645 if (RI.HasBranch && (IsTrueBiased || IsFalseBiased)) { 1646 auto *BI = cast<BranchInst>(R->getEntry()->getTerminator()); 1647 Value *V = BI->getCondition(); 1648 CHR_DEBUG(dbgs() << *V << "\n"); 1649 if (auto *I = dyn_cast<Instruction>(V)) { 1650 (void)(I); // Unused in release build. 1651 assert((I->getParent() == PreEntryBlock || 1652 !Scope->contains(I)) && 1653 "Must have been hoisted to PreEntryBlock or outside the scope"); 1654 } 1655 } 1656 for (SelectInst *SI : RI.Selects) { 1657 bool IsTrueBiased = Scope->TrueBiasedSelects.count(SI); 1658 bool IsFalseBiased = Scope->FalseBiasedSelects.count(SI); 1659 if (!(IsTrueBiased || IsFalseBiased)) 1660 continue; 1661 Value *V = SI->getCondition(); 1662 CHR_DEBUG(dbgs() << *V << "\n"); 1663 if (auto *I = dyn_cast<Instruction>(V)) { 1664 (void)(I); // Unused in release build. 1665 assert((I->getParent() == PreEntryBlock || 1666 !Scope->contains(I)) && 1667 "Must have been hoisted to PreEntryBlock or outside the scope"); 1668 } 1669 } 1670 } 1671 } 1672 1673 void CHR::transformScopes(CHRScope *Scope, DenseSet<PHINode *> &TrivialPHIs) { 1674 CHR_DEBUG(dbgs() << "transformScopes " << *Scope << "\n"); 1675 1676 assert(Scope->RegInfos.size() >= 1 && "Should have at least one Region"); 1677 Region *FirstRegion = Scope->RegInfos[0].R; 1678 BasicBlock *EntryBlock = FirstRegion->getEntry(); 1679 Region *LastRegion = Scope->RegInfos[Scope->RegInfos.size() - 1].R; 1680 BasicBlock *ExitBlock = LastRegion->getExit(); 1681 Optional<uint64_t> ProfileCount = BFI.getBlockProfileCount(EntryBlock); 1682 1683 if (ExitBlock) { 1684 // Insert a trivial phi at the exit block (where the CHR hot path and the 1685 // cold path merges) for a value that's defined in the scope but used 1686 // outside it (meaning it's alive at the exit block). We will add the 1687 // incoming values for the CHR cold paths to it below. Without this, we'd 1688 // miss updating phi's for such values unless there happens to already be a 1689 // phi for that value there. 1690 insertTrivialPHIs(Scope, EntryBlock, ExitBlock, TrivialPHIs); 1691 } 1692 1693 // Split the entry block of the first region. The new block becomes the new 1694 // entry block of the first region. The old entry block becomes the block to 1695 // insert the CHR branch into. Note DT gets updated. Since DT gets updated 1696 // through the split, we update the entry of the first region after the split, 1697 // and Region only points to the entry and the exit blocks, rather than 1698 // keeping everything in a list or set, the blocks membership and the 1699 // entry/exit blocks of the region are still valid after the split. 1700 CHR_DEBUG(dbgs() << "Splitting entry block " << EntryBlock->getName() 1701 << " at " << *Scope->BranchInsertPoint << "\n"); 1702 BasicBlock *NewEntryBlock = 1703 SplitBlock(EntryBlock, Scope->BranchInsertPoint, &DT); 1704 assert(NewEntryBlock->getSinglePredecessor() == EntryBlock && 1705 "NewEntryBlock's only pred must be EntryBlock"); 1706 FirstRegion->replaceEntryRecursive(NewEntryBlock); 1707 BasicBlock *PreEntryBlock = EntryBlock; 1708 1709 ValueToValueMapTy VMap; 1710 // Clone the blocks in the scope (excluding the PreEntryBlock) to split into a 1711 // hot path (originals) and a cold path (clones) and update the PHIs at the 1712 // exit block. 1713 cloneScopeBlocks(Scope, PreEntryBlock, ExitBlock, LastRegion, VMap); 1714 1715 // Replace the old (placeholder) branch with the new (merged) conditional 1716 // branch. 1717 BranchInst *MergedBr = createMergedBranch(PreEntryBlock, EntryBlock, 1718 NewEntryBlock, VMap); 1719 1720 #ifndef NDEBUG 1721 assertCHRRegionsHaveBiasedBranchOrSelect(Scope); 1722 #endif 1723 1724 // Hoist the conditional values of the branches/selects. 1725 hoistScopeConditions(Scope, PreEntryBlock->getTerminator(), TrivialPHIs, DT); 1726 1727 #ifndef NDEBUG 1728 assertBranchOrSelectConditionHoisted(Scope, PreEntryBlock); 1729 #endif 1730 1731 // Create the combined branch condition and constant-fold the branches/selects 1732 // in the hot path. 1733 fixupBranchesAndSelects(Scope, PreEntryBlock, MergedBr, 1734 ProfileCount ? ProfileCount.getValue() : 0); 1735 } 1736 1737 // A helper for transformScopes. Clone the blocks in the scope (excluding the 1738 // PreEntryBlock) to split into a hot path and a cold path and update the PHIs 1739 // at the exit block. 1740 void CHR::cloneScopeBlocks(CHRScope *Scope, 1741 BasicBlock *PreEntryBlock, 1742 BasicBlock *ExitBlock, 1743 Region *LastRegion, 1744 ValueToValueMapTy &VMap) { 1745 // Clone all the blocks. The original blocks will be the hot-path 1746 // CHR-optimized code and the cloned blocks will be the original unoptimized 1747 // code. This is so that the block pointers from the 1748 // CHRScope/Region/RegionInfo can stay valid in pointing to the hot-path code 1749 // which CHR should apply to. 1750 SmallVector<BasicBlock*, 8> NewBlocks; 1751 for (RegInfo &RI : Scope->RegInfos) 1752 for (BasicBlock *BB : RI.R->blocks()) { // This includes the blocks in the 1753 // sub-Scopes. 1754 assert(BB != PreEntryBlock && "Don't copy the preetntry block"); 1755 BasicBlock *NewBB = CloneBasicBlock(BB, VMap, ".nonchr", &F); 1756 NewBlocks.push_back(NewBB); 1757 VMap[BB] = NewBB; 1758 } 1759 1760 // Place the cloned blocks right after the original blocks (right before the 1761 // exit block of.) 1762 if (ExitBlock) 1763 F.getBasicBlockList().splice(ExitBlock->getIterator(), 1764 F.getBasicBlockList(), 1765 NewBlocks[0]->getIterator(), F.end()); 1766 1767 // Update the cloned blocks/instructions to refer to themselves. 1768 for (unsigned i = 0, e = NewBlocks.size(); i != e; ++i) 1769 for (Instruction &I : *NewBlocks[i]) 1770 RemapInstruction(&I, VMap, 1771 RF_NoModuleLevelChanges | RF_IgnoreMissingLocals); 1772 1773 // Add the cloned blocks to the PHIs of the exit blocks. ExitBlock is null for 1774 // the top-level region but we don't need to add PHIs. The trivial PHIs 1775 // inserted above will be updated here. 1776 if (ExitBlock) 1777 for (PHINode &PN : ExitBlock->phis()) 1778 for (unsigned I = 0, NumOps = PN.getNumIncomingValues(); I < NumOps; 1779 ++I) { 1780 BasicBlock *Pred = PN.getIncomingBlock(I); 1781 if (LastRegion->contains(Pred)) { 1782 Value *V = PN.getIncomingValue(I); 1783 auto It = VMap.find(V); 1784 if (It != VMap.end()) V = It->second; 1785 assert(VMap.find(Pred) != VMap.end() && "Pred must have been cloned"); 1786 PN.addIncoming(V, cast<BasicBlock>(VMap[Pred])); 1787 } 1788 } 1789 } 1790 1791 // A helper for transformScope. Replace the old (placeholder) branch with the 1792 // new (merged) conditional branch. 1793 BranchInst *CHR::createMergedBranch(BasicBlock *PreEntryBlock, 1794 BasicBlock *EntryBlock, 1795 BasicBlock *NewEntryBlock, 1796 ValueToValueMapTy &VMap) { 1797 BranchInst *OldBR = cast<BranchInst>(PreEntryBlock->getTerminator()); 1798 assert(OldBR->isUnconditional() && OldBR->getSuccessor(0) == NewEntryBlock && 1799 "SplitBlock did not work correctly!"); 1800 assert(NewEntryBlock->getSinglePredecessor() == EntryBlock && 1801 "NewEntryBlock's only pred must be EntryBlock"); 1802 assert(VMap.find(NewEntryBlock) != VMap.end() && 1803 "NewEntryBlock must have been copied"); 1804 OldBR->dropAllReferences(); 1805 OldBR->eraseFromParent(); 1806 // The true predicate is a placeholder. It will be replaced later in 1807 // fixupBranchesAndSelects(). 1808 BranchInst *NewBR = BranchInst::Create(NewEntryBlock, 1809 cast<BasicBlock>(VMap[NewEntryBlock]), 1810 ConstantInt::getTrue(F.getContext())); 1811 PreEntryBlock->getInstList().push_back(NewBR); 1812 assert(NewEntryBlock->getSinglePredecessor() == EntryBlock && 1813 "NewEntryBlock's only pred must be EntryBlock"); 1814 return NewBR; 1815 } 1816 1817 // A helper for transformScopes. Create the combined branch condition and 1818 // constant-fold the branches/selects in the hot path. 1819 void CHR::fixupBranchesAndSelects(CHRScope *Scope, 1820 BasicBlock *PreEntryBlock, 1821 BranchInst *MergedBR, 1822 uint64_t ProfileCount) { 1823 Value *MergedCondition = ConstantInt::getTrue(F.getContext()); 1824 BranchProbability CHRBranchBias(1, 1); 1825 uint64_t NumCHRedBranches = 0; 1826 IRBuilder<> IRB(PreEntryBlock->getTerminator()); 1827 for (RegInfo &RI : Scope->CHRRegions) { 1828 Region *R = RI.R; 1829 if (RI.HasBranch) { 1830 fixupBranch(R, Scope, IRB, MergedCondition, CHRBranchBias); 1831 ++NumCHRedBranches; 1832 } 1833 for (SelectInst *SI : RI.Selects) { 1834 fixupSelect(SI, Scope, IRB, MergedCondition, CHRBranchBias); 1835 ++NumCHRedBranches; 1836 } 1837 } 1838 Stats.NumBranchesDelta += NumCHRedBranches - 1; 1839 Stats.WeightedNumBranchesDelta += (NumCHRedBranches - 1) * ProfileCount; 1840 ORE.emit([&]() { 1841 return OptimizationRemark(DEBUG_TYPE, 1842 "CHR", 1843 // Refer to the hot (original) path 1844 MergedBR->getSuccessor(0)->getTerminator()) 1845 << "Merged " << ore::NV("NumCHRedBranches", NumCHRedBranches) 1846 << " branches or selects"; 1847 }); 1848 MergedBR->setCondition(MergedCondition); 1849 SmallVector<uint32_t, 2> Weights; 1850 Weights.push_back(static_cast<uint32_t>(CHRBranchBias.scale(1000))); 1851 Weights.push_back(static_cast<uint32_t>(CHRBranchBias.getCompl().scale(1000))); 1852 MDBuilder MDB(F.getContext()); 1853 MergedBR->setMetadata(LLVMContext::MD_prof, MDB.createBranchWeights(Weights)); 1854 CHR_DEBUG(dbgs() << "CHR branch bias " << Weights[0] << ":" << Weights[1] 1855 << "\n"); 1856 } 1857 1858 // A helper for fixupBranchesAndSelects. Add to the combined branch condition 1859 // and constant-fold a branch in the hot path. 1860 void CHR::fixupBranch(Region *R, CHRScope *Scope, 1861 IRBuilder<> &IRB, 1862 Value *&MergedCondition, 1863 BranchProbability &CHRBranchBias) { 1864 bool IsTrueBiased = Scope->TrueBiasedRegions.count(R); 1865 assert((IsTrueBiased || Scope->FalseBiasedRegions.count(R)) && 1866 "Must be truthy or falsy"); 1867 auto *BI = cast<BranchInst>(R->getEntry()->getTerminator()); 1868 assert(BranchBiasMap.find(R) != BranchBiasMap.end() && 1869 "Must be in the bias map"); 1870 BranchProbability Bias = BranchBiasMap[R]; 1871 assert(Bias >= getCHRBiasThreshold() && "Must be highly biased"); 1872 // Take the min. 1873 if (CHRBranchBias > Bias) 1874 CHRBranchBias = Bias; 1875 BasicBlock *IfThen = BI->getSuccessor(1); 1876 BasicBlock *IfElse = BI->getSuccessor(0); 1877 BasicBlock *RegionExitBlock = R->getExit(); 1878 assert(RegionExitBlock && "Null ExitBlock"); 1879 assert((IfThen == RegionExitBlock || IfElse == RegionExitBlock) && 1880 IfThen != IfElse && "Invariant from findScopes"); 1881 if (IfThen == RegionExitBlock) { 1882 // Swap them so that IfThen means going into it and IfElse means skipping 1883 // it. 1884 std::swap(IfThen, IfElse); 1885 } 1886 CHR_DEBUG(dbgs() << "IfThen " << IfThen->getName() 1887 << " IfElse " << IfElse->getName() << "\n"); 1888 Value *Cond = BI->getCondition(); 1889 BasicBlock *HotTarget = IsTrueBiased ? IfThen : IfElse; 1890 bool ConditionTrue = HotTarget == BI->getSuccessor(0); 1891 addToMergedCondition(ConditionTrue, Cond, BI, Scope, IRB, 1892 MergedCondition); 1893 // Constant-fold the branch at ClonedEntryBlock. 1894 assert(ConditionTrue == (HotTarget == BI->getSuccessor(0)) && 1895 "The successor shouldn't change"); 1896 Value *NewCondition = ConditionTrue ? 1897 ConstantInt::getTrue(F.getContext()) : 1898 ConstantInt::getFalse(F.getContext()); 1899 BI->setCondition(NewCondition); 1900 } 1901 1902 // A helper for fixupBranchesAndSelects. Add to the combined branch condition 1903 // and constant-fold a select in the hot path. 1904 void CHR::fixupSelect(SelectInst *SI, CHRScope *Scope, 1905 IRBuilder<> &IRB, 1906 Value *&MergedCondition, 1907 BranchProbability &CHRBranchBias) { 1908 bool IsTrueBiased = Scope->TrueBiasedSelects.count(SI); 1909 assert((IsTrueBiased || 1910 Scope->FalseBiasedSelects.count(SI)) && "Must be biased"); 1911 assert(SelectBiasMap.find(SI) != SelectBiasMap.end() && 1912 "Must be in the bias map"); 1913 BranchProbability Bias = SelectBiasMap[SI]; 1914 assert(Bias >= getCHRBiasThreshold() && "Must be highly biased"); 1915 // Take the min. 1916 if (CHRBranchBias > Bias) 1917 CHRBranchBias = Bias; 1918 Value *Cond = SI->getCondition(); 1919 addToMergedCondition(IsTrueBiased, Cond, SI, Scope, IRB, 1920 MergedCondition); 1921 Value *NewCondition = IsTrueBiased ? 1922 ConstantInt::getTrue(F.getContext()) : 1923 ConstantInt::getFalse(F.getContext()); 1924 SI->setCondition(NewCondition); 1925 } 1926 1927 // A helper for fixupBranch/fixupSelect. Add a branch condition to the merged 1928 // condition. 1929 void CHR::addToMergedCondition(bool IsTrueBiased, Value *Cond, 1930 Instruction *BranchOrSelect, 1931 CHRScope *Scope, 1932 IRBuilder<> &IRB, 1933 Value *&MergedCondition) { 1934 if (IsTrueBiased) { 1935 MergedCondition = IRB.CreateAnd(MergedCondition, Cond); 1936 } else { 1937 // If Cond is an icmp and all users of V except for BranchOrSelect is a 1938 // branch, negate the icmp predicate and swap the branch targets and avoid 1939 // inserting an Xor to negate Cond. 1940 bool Done = false; 1941 if (auto *ICmp = dyn_cast<ICmpInst>(Cond)) 1942 if (negateICmpIfUsedByBranchOrSelectOnly(ICmp, BranchOrSelect, Scope)) { 1943 MergedCondition = IRB.CreateAnd(MergedCondition, Cond); 1944 Done = true; 1945 } 1946 if (!Done) { 1947 Value *Negate = IRB.CreateXor( 1948 ConstantInt::getTrue(F.getContext()), Cond); 1949 MergedCondition = IRB.CreateAnd(MergedCondition, Negate); 1950 } 1951 } 1952 } 1953 1954 void CHR::transformScopes(SmallVectorImpl<CHRScope *> &CHRScopes) { 1955 unsigned I = 0; 1956 DenseSet<PHINode *> TrivialPHIs; 1957 for (CHRScope *Scope : CHRScopes) { 1958 transformScopes(Scope, TrivialPHIs); 1959 CHR_DEBUG( 1960 std::ostringstream oss; 1961 oss << " after transformScopes " << I++; 1962 dumpIR(F, oss.str().c_str(), nullptr)); 1963 (void)I; 1964 } 1965 } 1966 1967 static void LLVM_ATTRIBUTE_UNUSED 1968 dumpScopes(SmallVectorImpl<CHRScope *> &Scopes, const char *Label) { 1969 dbgs() << Label << " " << Scopes.size() << "\n"; 1970 for (CHRScope *Scope : Scopes) { 1971 dbgs() << *Scope << "\n"; 1972 } 1973 } 1974 1975 bool CHR::run() { 1976 if (!shouldApply(F, PSI)) 1977 return false; 1978 1979 CHR_DEBUG(dumpIR(F, "before", nullptr)); 1980 1981 bool Changed = false; 1982 { 1983 CHR_DEBUG( 1984 dbgs() << "RegionInfo:\n"; 1985 RI.print(dbgs())); 1986 1987 // Recursively traverse the region tree and find regions that have biased 1988 // branches and/or selects and create scopes. 1989 SmallVector<CHRScope *, 8> AllScopes; 1990 findScopes(AllScopes); 1991 CHR_DEBUG(dumpScopes(AllScopes, "All scopes")); 1992 1993 // Split the scopes if 1) the conditiona values of the biased 1994 // branches/selects of the inner/lower scope can't be hoisted up to the 1995 // outermost/uppermost scope entry, or 2) the condition values of the biased 1996 // branches/selects in a scope (including subscopes) don't share at least 1997 // one common value. 1998 SmallVector<CHRScope *, 8> SplitScopes; 1999 splitScopes(AllScopes, SplitScopes); 2000 CHR_DEBUG(dumpScopes(SplitScopes, "Split scopes")); 2001 2002 // After splitting, set the biased regions and selects of a scope (a tree 2003 // root) that include those of the subscopes. 2004 classifyBiasedScopes(SplitScopes); 2005 CHR_DEBUG(dbgs() << "Set per-scope bias " << SplitScopes.size() << "\n"); 2006 2007 // Filter out the scopes that has only one biased region or select (CHR 2008 // isn't useful in such a case). 2009 SmallVector<CHRScope *, 8> FilteredScopes; 2010 filterScopes(SplitScopes, FilteredScopes); 2011 CHR_DEBUG(dumpScopes(FilteredScopes, "Filtered scopes")); 2012 2013 // Set the regions to be CHR'ed and their hoist stops for each scope. 2014 SmallVector<CHRScope *, 8> SetScopes; 2015 setCHRRegions(FilteredScopes, SetScopes); 2016 CHR_DEBUG(dumpScopes(SetScopes, "Set CHR regions")); 2017 2018 // Sort CHRScopes by the depth so that outer CHRScopes comes before inner 2019 // ones. We need to apply CHR from outer to inner so that we apply CHR only 2020 // to the hot path, rather than both hot and cold paths. 2021 SmallVector<CHRScope *, 8> SortedScopes; 2022 sortScopes(SetScopes, SortedScopes); 2023 CHR_DEBUG(dumpScopes(SortedScopes, "Sorted scopes")); 2024 2025 CHR_DEBUG( 2026 dbgs() << "RegionInfo:\n"; 2027 RI.print(dbgs())); 2028 2029 // Apply the CHR transformation. 2030 if (!SortedScopes.empty()) { 2031 transformScopes(SortedScopes); 2032 Changed = true; 2033 } 2034 } 2035 2036 if (Changed) { 2037 CHR_DEBUG(dumpIR(F, "after", &Stats)); 2038 ORE.emit([&]() { 2039 return OptimizationRemark(DEBUG_TYPE, "Stats", &F) 2040 << ore::NV("Function", &F) << " " 2041 << "Reduced the number of branches in hot paths by " 2042 << ore::NV("NumBranchesDelta", Stats.NumBranchesDelta) 2043 << " (static) and " 2044 << ore::NV("WeightedNumBranchesDelta", Stats.WeightedNumBranchesDelta) 2045 << " (weighted by PGO count)"; 2046 }); 2047 } 2048 2049 return Changed; 2050 } 2051 2052 bool ControlHeightReductionLegacyPass::runOnFunction(Function &F) { 2053 BlockFrequencyInfo &BFI = 2054 getAnalysis<BlockFrequencyInfoWrapperPass>().getBFI(); 2055 DominatorTree &DT = getAnalysis<DominatorTreeWrapperPass>().getDomTree(); 2056 ProfileSummaryInfo &PSI = 2057 getAnalysis<ProfileSummaryInfoWrapperPass>().getPSI(); 2058 RegionInfo &RI = getAnalysis<RegionInfoPass>().getRegionInfo(); 2059 std::unique_ptr<OptimizationRemarkEmitter> OwnedORE = 2060 llvm::make_unique<OptimizationRemarkEmitter>(&F); 2061 return CHR(F, BFI, DT, PSI, RI, *OwnedORE.get()).run(); 2062 } 2063 2064 namespace llvm { 2065 2066 ControlHeightReductionPass::ControlHeightReductionPass() { 2067 parseCHRFilterFiles(); 2068 } 2069 2070 PreservedAnalyses ControlHeightReductionPass::run( 2071 Function &F, 2072 FunctionAnalysisManager &FAM) { 2073 auto &BFI = FAM.getResult<BlockFrequencyAnalysis>(F); 2074 auto &DT = FAM.getResult<DominatorTreeAnalysis>(F); 2075 auto &MAMProxy = FAM.getResult<ModuleAnalysisManagerFunctionProxy>(F); 2076 auto &MAM = MAMProxy.getManager(); 2077 auto &PSI = *MAM.getCachedResult<ProfileSummaryAnalysis>(*F.getParent()); 2078 auto &RI = FAM.getResult<RegionInfoAnalysis>(F); 2079 auto &ORE = FAM.getResult<OptimizationRemarkEmitterAnalysis>(F); 2080 bool Changed = CHR(F, BFI, DT, PSI, RI, ORE).run(); 2081 if (!Changed) 2082 return PreservedAnalyses::all(); 2083 auto PA = PreservedAnalyses(); 2084 PA.preserve<GlobalsAA>(); 2085 return PA; 2086 } 2087 2088 } // namespace llvm 2089