1 //===-- ControlHeightReduction.cpp - Control Height Reduction -------------===// 2 // 3 // The LLVM Compiler Infrastructure 4 // 5 // This file is distributed under the University of Illinois Open Source 6 // License. See LICENSE.TXT for details. 7 // 8 //===----------------------------------------------------------------------===// 9 // 10 // This pass merges conditional blocks of code and reduces the number of 11 // conditional branches in the hot paths based on profiles. 12 // 13 //===----------------------------------------------------------------------===// 14 15 #include "llvm/Transforms/Instrumentation/ControlHeightReduction.h" 16 #include "llvm/ADT/DenseMap.h" 17 #include "llvm/ADT/DenseSet.h" 18 #include "llvm/ADT/SmallVector.h" 19 #include "llvm/ADT/StringSet.h" 20 #include "llvm/Analysis/BlockFrequencyInfo.h" 21 #include "llvm/Analysis/GlobalsModRef.h" 22 #include "llvm/Analysis/ProfileSummaryInfo.h" 23 #include "llvm/Analysis/RegionInfo.h" 24 #include "llvm/Analysis/RegionIterator.h" 25 #include "llvm/Analysis/ValueTracking.h" 26 #include "llvm/IR/CFG.h" 27 #include "llvm/IR/Dominators.h" 28 #include "llvm/IR/IRBuilder.h" 29 #include "llvm/IR/MDBuilder.h" 30 #include "llvm/Support/BranchProbability.h" 31 #include "llvm/Support/MemoryBuffer.h" 32 #include "llvm/Transforms/Utils.h" 33 #include "llvm/Transforms/Utils/BasicBlockUtils.h" 34 #include "llvm/Transforms/Utils/Cloning.h" 35 #include "llvm/Transforms/Utils/ValueMapper.h" 36 37 #include <set> 38 #include <sstream> 39 40 using namespace llvm; 41 42 #define DEBUG_TYPE "chr" 43 44 #define CHR_DEBUG(X) LLVM_DEBUG(X) 45 46 static cl::opt<bool> ForceCHR("force-chr", cl::init(false), cl::Hidden, 47 cl::desc("Apply CHR for all functions")); 48 49 static cl::opt<double> CHRBiasThreshold( 50 "chr-bias-threshold", cl::init(0.99), cl::Hidden, 51 cl::desc("CHR considers a branch bias greater than this ratio as biased")); 52 53 static cl::opt<unsigned> CHRMergeThreshold( 54 "chr-merge-threshold", cl::init(2), cl::Hidden, 55 cl::desc("CHR merges a group of N branches/selects where N >= this value")); 56 57 static cl::opt<std::string> CHRModuleList( 58 "chr-module-list", cl::init(""), cl::Hidden, 59 cl::desc("Specify file to retrieve the list of modules to apply CHR to")); 60 61 static cl::opt<std::string> CHRFunctionList( 62 "chr-function-list", cl::init(""), cl::Hidden, 63 cl::desc("Specify file to retrieve the list of functions to apply CHR to")); 64 65 static StringSet<> CHRModules; 66 static StringSet<> CHRFunctions; 67 68 static void parseCHRFilterFiles() { 69 if (!CHRModuleList.empty()) { 70 auto FileOrErr = MemoryBuffer::getFile(CHRModuleList); 71 if (!FileOrErr) { 72 errs() << "Error: Couldn't read the chr-module-list file " << CHRModuleList << "\n"; 73 std::exit(1); 74 } 75 StringRef Buf = FileOrErr->get()->getBuffer(); 76 SmallVector<StringRef, 0> Lines; 77 Buf.split(Lines, '\n'); 78 for (StringRef Line : Lines) { 79 Line = Line.trim(); 80 if (!Line.empty()) 81 CHRModules.insert(Line); 82 } 83 } 84 if (!CHRFunctionList.empty()) { 85 auto FileOrErr = MemoryBuffer::getFile(CHRFunctionList); 86 if (!FileOrErr) { 87 errs() << "Error: Couldn't read the chr-function-list file " << CHRFunctionList << "\n"; 88 std::exit(1); 89 } 90 StringRef Buf = FileOrErr->get()->getBuffer(); 91 SmallVector<StringRef, 0> Lines; 92 Buf.split(Lines, '\n'); 93 for (StringRef Line : Lines) { 94 Line = Line.trim(); 95 if (!Line.empty()) 96 CHRFunctions.insert(Line); 97 } 98 } 99 } 100 101 namespace { 102 class ControlHeightReductionLegacyPass : public FunctionPass { 103 public: 104 static char ID; 105 106 ControlHeightReductionLegacyPass() : FunctionPass(ID) { 107 initializeControlHeightReductionLegacyPassPass( 108 *PassRegistry::getPassRegistry()); 109 parseCHRFilterFiles(); 110 } 111 112 bool runOnFunction(Function &F) override; 113 void getAnalysisUsage(AnalysisUsage &AU) const override { 114 AU.addRequired<BlockFrequencyInfoWrapperPass>(); 115 AU.addRequired<DominatorTreeWrapperPass>(); 116 AU.addRequired<ProfileSummaryInfoWrapperPass>(); 117 AU.addRequired<RegionInfoPass>(); 118 AU.addPreserved<GlobalsAAWrapperPass>(); 119 } 120 }; 121 } // end anonymous namespace 122 123 char ControlHeightReductionLegacyPass::ID = 0; 124 125 INITIALIZE_PASS_BEGIN(ControlHeightReductionLegacyPass, 126 "chr", 127 "Reduce control height in the hot paths", 128 false, false) 129 INITIALIZE_PASS_DEPENDENCY(BlockFrequencyInfoWrapperPass) 130 INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass) 131 INITIALIZE_PASS_DEPENDENCY(ProfileSummaryInfoWrapperPass) 132 INITIALIZE_PASS_DEPENDENCY(RegionInfoPass) 133 INITIALIZE_PASS_END(ControlHeightReductionLegacyPass, 134 "chr", 135 "Reduce control height in the hot paths", 136 false, false) 137 138 FunctionPass *llvm::createControlHeightReductionLegacyPass() { 139 return new ControlHeightReductionLegacyPass(); 140 } 141 142 namespace { 143 144 struct CHRStats { 145 CHRStats() : NumBranches(0), NumBranchesDelta(0), 146 WeightedNumBranchesDelta(0) {} 147 void print(raw_ostream &OS) const { 148 OS << "CHRStats: NumBranches " << NumBranches 149 << " NumBranchesDelta " << NumBranchesDelta 150 << " WeightedNumBranchesDelta " << WeightedNumBranchesDelta; 151 } 152 uint64_t NumBranches; // The original number of conditional branches / 153 // selects 154 uint64_t NumBranchesDelta; // The decrease of the number of conditional 155 // branches / selects in the hot paths due to CHR. 156 uint64_t WeightedNumBranchesDelta; // NumBranchesDelta weighted by the profile 157 // count at the scope entry. 158 }; 159 160 // RegInfo - some properties of a Region. 161 struct RegInfo { 162 RegInfo() : R(nullptr), HasBranch(false) {} 163 RegInfo(Region *RegionIn) : R(RegionIn), HasBranch(false) {} 164 Region *R; 165 bool HasBranch; 166 SmallVector<SelectInst *, 8> Selects; 167 }; 168 169 typedef DenseMap<Region *, DenseSet<Instruction *>> HoistStopMapTy; 170 171 // CHRScope - a sequence of regions to CHR together. It corresponds to a 172 // sequence of conditional blocks. It can have subscopes which correspond to 173 // nested conditional blocks. Nested CHRScopes form a tree. 174 class CHRScope { 175 public: 176 CHRScope(RegInfo RI) : BranchInsertPoint(nullptr) { 177 assert(RI.R && "Null RegionIn"); 178 RegInfos.push_back(RI); 179 } 180 181 Region *getParentRegion() { 182 assert(RegInfos.size() > 0 && "Empty CHRScope"); 183 Region *Parent = RegInfos[0].R->getParent(); 184 assert(Parent && "Unexpected to call this on the top-level region"); 185 return Parent; 186 } 187 188 BasicBlock *getEntryBlock() { 189 assert(RegInfos.size() > 0 && "Empty CHRScope"); 190 return RegInfos.front().R->getEntry(); 191 } 192 193 BasicBlock *getExitBlock() { 194 assert(RegInfos.size() > 0 && "Empty CHRScope"); 195 return RegInfos.back().R->getExit(); 196 } 197 198 bool appendable(CHRScope *Next) { 199 // The next scope is appendable only if this scope is directly connected to 200 // it (which implies it post-dominates this scope) and this scope dominates 201 // it (no edge to the next scope outside this scope). 202 BasicBlock *NextEntry = Next->getEntryBlock(); 203 if (getExitBlock() != NextEntry) 204 // Not directly connected. 205 return false; 206 Region *LastRegion = RegInfos.back().R; 207 for (BasicBlock *Pred : predecessors(NextEntry)) 208 if (!LastRegion->contains(Pred)) 209 // There's an edge going into the entry of the next scope from outside 210 // of this scope. 211 return false; 212 return true; 213 } 214 215 void append(CHRScope *Next) { 216 assert(RegInfos.size() > 0 && "Empty CHRScope"); 217 assert(Next->RegInfos.size() > 0 && "Empty CHRScope"); 218 assert(getParentRegion() == Next->getParentRegion() && 219 "Must be siblings"); 220 assert(getExitBlock() == Next->getEntryBlock() && 221 "Must be adjacent"); 222 for (RegInfo &RI : Next->RegInfos) 223 RegInfos.push_back(RI); 224 for (CHRScope *Sub : Next->Subs) 225 Subs.push_back(Sub); 226 } 227 228 void addSub(CHRScope *SubIn) { 229 #ifndef NDEBUG 230 bool IsChild = false; 231 for (RegInfo &RI : RegInfos) 232 if (RI.R == SubIn->getParentRegion()) { 233 IsChild = true; 234 break; 235 } 236 assert(IsChild && "Must be a child"); 237 #endif 238 Subs.push_back(SubIn); 239 } 240 241 // Split this scope at the boundary region into two, which will belong to the 242 // tail and returns the tail. 243 CHRScope *split(Region *Boundary) { 244 assert(Boundary && "Boundary null"); 245 assert(RegInfos.begin()->R != Boundary && 246 "Can't be split at beginning"); 247 auto BoundaryIt = std::find_if(RegInfos.begin(), RegInfos.end(), 248 [&Boundary](const RegInfo& RI) { 249 return Boundary == RI.R; 250 }); 251 if (BoundaryIt == RegInfos.end()) 252 return nullptr; 253 SmallVector<RegInfo, 8> TailRegInfos; 254 SmallVector<CHRScope *, 8> TailSubs; 255 TailRegInfos.insert(TailRegInfos.begin(), BoundaryIt, RegInfos.end()); 256 RegInfos.resize(BoundaryIt - RegInfos.begin()); 257 DenseSet<Region *> TailRegionSet; 258 for (RegInfo &RI : TailRegInfos) 259 TailRegionSet.insert(RI.R); 260 for (auto It = Subs.begin(); It != Subs.end(); ) { 261 CHRScope *Sub = *It; 262 assert(Sub && "null Sub"); 263 Region *Parent = Sub->getParentRegion(); 264 if (TailRegionSet.count(Parent)) { 265 TailSubs.push_back(Sub); 266 It = Subs.erase(It); 267 } else { 268 assert(std::find_if(RegInfos.begin(), RegInfos.end(), 269 [&Parent](const RegInfo& RI) { 270 return Parent == RI.R; 271 }) != RegInfos.end() && 272 "Must be in head"); 273 ++It; 274 } 275 } 276 assert(HoistStopMap.empty() && "MapHoistStops must be empty"); 277 return new CHRScope(TailRegInfos, TailSubs); 278 } 279 280 bool contains(Instruction *I) const { 281 BasicBlock *Parent = I->getParent(); 282 for (const RegInfo &RI : RegInfos) 283 if (RI.R->contains(Parent)) 284 return true; 285 return false; 286 } 287 288 void print(raw_ostream &OS) const; 289 290 SmallVector<RegInfo, 8> RegInfos; // Regions that belong to this scope 291 SmallVector<CHRScope *, 8> Subs; // Subscopes. 292 293 // The instruction at which to insert the CHR conditional branch (and hoist 294 // the dependent condition values). 295 Instruction *BranchInsertPoint; 296 297 // True-biased and false-biased regions (conditional blocks), 298 // respectively. Used only for the outermost scope and includes regions in 299 // subscopes. The rest are unbiased. 300 DenseSet<Region *> TrueBiasedRegions; 301 DenseSet<Region *> FalseBiasedRegions; 302 // Among the biased regions, the regions that get CHRed. 303 SmallVector<RegInfo, 8> CHRRegions; 304 305 // True-biased and false-biased selects, respectively. Used only for the 306 // outermost scope and includes ones in subscopes. 307 DenseSet<SelectInst *> TrueBiasedSelects; 308 DenseSet<SelectInst *> FalseBiasedSelects; 309 310 // Map from one of the above regions to the instructions to stop 311 // hoisting instructions at through use-def chains. 312 HoistStopMapTy HoistStopMap; 313 314 private: 315 CHRScope(SmallVector<RegInfo, 8> &RegInfosIn, 316 SmallVector<CHRScope *, 8> &SubsIn) 317 : RegInfos(RegInfosIn), Subs(SubsIn), BranchInsertPoint(nullptr) {} 318 }; 319 320 class CHR { 321 public: 322 CHR(Function &Fin, BlockFrequencyInfo &BFIin, DominatorTree &DTin, 323 ProfileSummaryInfo &PSIin, RegionInfo &RIin) 324 : F(Fin), BFI(BFIin), DT(DTin), PSI(PSIin), RI(RIin) {} 325 326 ~CHR() { 327 for (CHRScope *Scope : Scopes) { 328 delete Scope; 329 } 330 } 331 332 bool run(); 333 334 private: 335 // See the comments in CHR::run() for the high level flow of the algorithm and 336 // what the following functions do. 337 338 void findScopes(SmallVectorImpl<CHRScope *> &Output) { 339 Region *R = RI.getTopLevelRegion(); 340 CHRScope *Scope = findScopes(R, nullptr, nullptr, Output); 341 if (Scope) { 342 Output.push_back(Scope); 343 } 344 } 345 CHRScope *findScopes(Region *R, Region *NextRegion, Region *ParentRegion, 346 SmallVectorImpl<CHRScope *> &Scopes); 347 CHRScope *findScope(Region *R); 348 void checkScopeHoistable(CHRScope *Scope); 349 350 void splitScopes(SmallVectorImpl<CHRScope *> &Input, 351 SmallVectorImpl<CHRScope *> &Output); 352 SmallVector<CHRScope *, 8> splitScope(CHRScope *Scope, 353 CHRScope *Outer, 354 DenseSet<Value *> *OuterConditionValues, 355 Instruction *OuterInsertPoint, 356 SmallVectorImpl<CHRScope *> &Output, 357 DenseSet<Instruction *> &Unhoistables); 358 359 void classifyBiasedScopes(SmallVectorImpl<CHRScope *> &Scopes); 360 void classifyBiasedScopes(CHRScope *Scope, CHRScope *OutermostScope); 361 362 void filterScopes(SmallVectorImpl<CHRScope *> &Input, 363 SmallVectorImpl<CHRScope *> &Output); 364 365 void setCHRRegions(SmallVectorImpl<CHRScope *> &Input, 366 SmallVectorImpl<CHRScope *> &Output); 367 void setCHRRegions(CHRScope *Scope, CHRScope *OutermostScope); 368 369 void sortScopes(SmallVectorImpl<CHRScope *> &Input, 370 SmallVectorImpl<CHRScope *> &Output); 371 372 void transformScopes(SmallVectorImpl<CHRScope *> &CHRScopes); 373 void transformScopes(CHRScope *Scope, DenseSet<PHINode *> &TrivialPHIs); 374 void cloneScopeBlocks(CHRScope *Scope, 375 BasicBlock *PreEntryBlock, 376 BasicBlock *ExitBlock, 377 Region *LastRegion, 378 ValueToValueMapTy &VMap); 379 BranchInst *createMergedBranch(BasicBlock *PreEntryBlock, 380 BasicBlock *EntryBlock, 381 BasicBlock *NewEntryBlock, 382 ValueToValueMapTy &VMap); 383 void fixupBranchesAndSelects(CHRScope *Scope, 384 BasicBlock *PreEntryBlock, 385 BranchInst *MergedBR, 386 uint64_t ProfileCount); 387 void fixupBranch(Region *R, 388 CHRScope *Scope, 389 IRBuilder<> &IRB, 390 Value *&MergedCondition, BranchProbability &CHRBranchBias); 391 void fixupSelect(SelectInst* SI, 392 CHRScope *Scope, 393 IRBuilder<> &IRB, 394 Value *&MergedCondition, BranchProbability &CHRBranchBias); 395 void addToMergedCondition(bool IsTrueBiased, Value *Cond, 396 Instruction *BranchOrSelect, 397 CHRScope *Scope, 398 IRBuilder<> &IRB, 399 Value *&MergedCondition); 400 401 Function &F; 402 BlockFrequencyInfo &BFI; 403 DominatorTree &DT; 404 ProfileSummaryInfo &PSI; 405 RegionInfo &RI; 406 CHRStats Stats; 407 408 // All the true-biased regions in the function 409 DenseSet<Region *> TrueBiasedRegionsGlobal; 410 // All the false-biased regions in the function 411 DenseSet<Region *> FalseBiasedRegionsGlobal; 412 // All the true-biased selects in the function 413 DenseSet<SelectInst *> TrueBiasedSelectsGlobal; 414 // All the false-biased selects in the function 415 DenseSet<SelectInst *> FalseBiasedSelectsGlobal; 416 // A map from biased regions to their branch bias 417 DenseMap<Region *, BranchProbability> BranchBiasMap; 418 // A map from biased selects to their branch bias 419 DenseMap<SelectInst *, BranchProbability> SelectBiasMap; 420 // All the scopes. 421 DenseSet<CHRScope *> Scopes; 422 }; 423 424 } // end anonymous namespace 425 426 static inline 427 raw_ostream LLVM_ATTRIBUTE_UNUSED &operator<<(raw_ostream &OS, 428 const CHRStats &Stats) { 429 Stats.print(OS); 430 return OS; 431 } 432 433 static inline 434 raw_ostream &operator<<(raw_ostream &OS, const CHRScope &Scope) { 435 Scope.print(OS); 436 return OS; 437 } 438 439 static bool shouldApply(Function &F, ProfileSummaryInfo& PSI) { 440 if (ForceCHR) 441 return true; 442 443 if (!CHRModuleList.empty() || !CHRFunctionList.empty()) { 444 if (CHRModules.count(F.getParent()->getName())) 445 return true; 446 return CHRFunctions.count(F.getName()); 447 } 448 449 assert(PSI.hasProfileSummary() && "Empty PSI?"); 450 return PSI.isFunctionEntryHot(&F); 451 } 452 453 static void LLVM_ATTRIBUTE_UNUSED dumpIR(Function &F, const char *Label, 454 CHRStats *Stats) { 455 StringRef FuncName = F.getName(); 456 StringRef ModuleName = F.getParent()->getName(); 457 (void)(FuncName); // Unused in release build. 458 (void)(ModuleName); // Unused in release build. 459 CHR_DEBUG(dbgs() << "CHR IR dump " << Label << " " << ModuleName << " " 460 << FuncName); 461 if (Stats) 462 CHR_DEBUG(dbgs() << " " << *Stats); 463 CHR_DEBUG(dbgs() << "\n"); 464 CHR_DEBUG(F.dump()); 465 } 466 467 void CHRScope::print(raw_ostream &OS) const { 468 assert(RegInfos.size() > 0 && "Empty CHRScope"); 469 OS << "CHRScope["; 470 OS << RegInfos.size() << ", Regions["; 471 for (const RegInfo &RI : RegInfos) { 472 OS << RI.R->getNameStr(); 473 if (RI.HasBranch) 474 OS << " B"; 475 if (RI.Selects.size() > 0) 476 OS << " S" << RI.Selects.size(); 477 OS << ", "; 478 } 479 if (RegInfos[0].R->getParent()) { 480 OS << "], Parent " << RegInfos[0].R->getParent()->getNameStr(); 481 } else { 482 // top level region 483 OS << "]"; 484 } 485 OS << ", Subs["; 486 for (CHRScope *Sub : Subs) { 487 OS << *Sub << ", "; 488 } 489 OS << "]]"; 490 } 491 492 // Return true if the given instruction type can be hoisted by CHR. 493 static bool isHoistableInstructionType(Instruction *I) { 494 return isa<BinaryOperator>(I) || isa<CastInst>(I) || isa<SelectInst>(I) || 495 isa<GetElementPtrInst>(I) || isa<CmpInst>(I) || 496 isa<InsertElementInst>(I) || isa<ExtractElementInst>(I) || 497 isa<ShuffleVectorInst>(I) || isa<ExtractValueInst>(I) || 498 isa<InsertValueInst>(I); 499 } 500 501 // Return true if the given instruction can be hoisted by CHR. 502 static bool isHoistable(Instruction *I, DominatorTree &DT) { 503 if (!isHoistableInstructionType(I)) 504 return false; 505 return isSafeToSpeculativelyExecute(I, nullptr, &DT); 506 } 507 508 // Recursively traverse the use-def chains of the given value and return a set 509 // of the unhoistable base values defined within the scope (excluding the 510 // first-region entry block) or the (hoistable or unhoistable) base values that 511 // are defined outside (including the first-region entry block) of the 512 // scope. The returned set doesn't include constants. 513 static std::set<Value *> getBaseValues(Value *V, 514 DominatorTree &DT) { 515 std::set<Value *> Result; 516 if (auto *I = dyn_cast<Instruction>(V)) { 517 // We don't stop at a block that's not in the Scope because we would miss some 518 // instructions that are based on the same base values if we stop there. 519 if (!isHoistable(I, DT)) { 520 Result.insert(I); 521 return Result; 522 } 523 // I is hoistable above the Scope. 524 for (Value *Op : I->operands()) { 525 std::set<Value *> OpResult = getBaseValues(Op, DT); 526 Result.insert(OpResult.begin(), OpResult.end()); 527 } 528 return Result; 529 } 530 if (isa<Argument>(V)) { 531 Result.insert(V); 532 return Result; 533 } 534 // We don't include others like constants because those won't lead to any 535 // chance of folding of conditions (eg two bit checks merged into one check) 536 // after CHR. 537 return Result; // empty 538 } 539 540 // Return true if V is already hoisted or can be hoisted (along with its 541 // operands) above the insert point. When it returns true and HoistStops is 542 // non-null, the instructions to stop hoisting at through the use-def chains are 543 // inserted into HoistStops. 544 static bool 545 checkHoistValue(Value *V, Instruction *InsertPoint, DominatorTree &DT, 546 DenseSet<Instruction *> &Unhoistables, 547 DenseSet<Instruction *> *HoistStops) { 548 assert(InsertPoint && "Null InsertPoint"); 549 if (auto *I = dyn_cast<Instruction>(V)) { 550 assert(DT.getNode(I->getParent()) && "DT must contain I's parent block"); 551 assert(DT.getNode(InsertPoint->getParent()) && "DT must contain Destination"); 552 if (Unhoistables.count(I)) { 553 // Don't hoist if they are not to be hoisted. 554 return false; 555 } 556 if (DT.dominates(I, InsertPoint)) { 557 // We are already above the insert point. Stop here. 558 if (HoistStops) 559 HoistStops->insert(I); 560 return true; 561 } 562 // We aren't not above the insert point, check if we can hoist it above the 563 // insert point. 564 if (isHoistable(I, DT)) { 565 // Check operands first. 566 DenseSet<Instruction *> OpsHoistStops; 567 bool AllOpsHoisted = true; 568 for (Value *Op : I->operands()) { 569 if (!checkHoistValue(Op, InsertPoint, DT, Unhoistables, &OpsHoistStops)) { 570 AllOpsHoisted = false; 571 break; 572 } 573 } 574 if (AllOpsHoisted) { 575 CHR_DEBUG(dbgs() << "checkHoistValue " << *I << "\n"); 576 if (HoistStops) 577 HoistStops->insert(OpsHoistStops.begin(), OpsHoistStops.end()); 578 return true; 579 } 580 } 581 return false; 582 } 583 // Non-instructions are considered hoistable. 584 return true; 585 } 586 587 // Returns true and sets the true probability and false probability of an 588 // MD_prof metadata if it's well-formed. 589 static bool checkMDProf(MDNode *MD, BranchProbability &TrueProb, 590 BranchProbability &FalseProb) { 591 if (!MD) return false; 592 MDString *MDName = cast<MDString>(MD->getOperand(0)); 593 if (MDName->getString() != "branch_weights" || 594 MD->getNumOperands() != 3) 595 return false; 596 ConstantInt *TrueWeight = mdconst::extract<ConstantInt>(MD->getOperand(1)); 597 ConstantInt *FalseWeight = mdconst::extract<ConstantInt>(MD->getOperand(2)); 598 if (!TrueWeight || !FalseWeight) 599 return false; 600 uint64_t TrueWt = TrueWeight->getValue().getZExtValue(); 601 uint64_t FalseWt = FalseWeight->getValue().getZExtValue(); 602 uint64_t SumWt = TrueWt + FalseWt; 603 604 assert(SumWt >= TrueWt && SumWt >= FalseWt && 605 "Overflow calculating branch probabilities."); 606 607 TrueProb = BranchProbability::getBranchProbability(TrueWt, SumWt); 608 FalseProb = BranchProbability::getBranchProbability(FalseWt, SumWt); 609 return true; 610 } 611 612 static BranchProbability getCHRBiasThreshold() { 613 return BranchProbability::getBranchProbability( 614 static_cast<uint64_t>(CHRBiasThreshold * 1000000), 1000000); 615 } 616 617 // A helper for CheckBiasedBranch and CheckBiasedSelect. If TrueProb >= 618 // CHRBiasThreshold, put Key into TrueSet and return true. If FalseProb >= 619 // CHRBiasThreshold, put Key into FalseSet and return true. Otherwise, return 620 // false. 621 template<typename K, typename S, typename M> 622 bool checkBias(K *Key, BranchProbability TrueProb, BranchProbability FalseProb, 623 S &TrueSet, S &FalseSet, M &BiasMap) { 624 BranchProbability Threshold = getCHRBiasThreshold(); 625 if (TrueProb >= Threshold) { 626 TrueSet.insert(Key); 627 BiasMap[Key] = TrueProb; 628 return true; 629 } else if (FalseProb >= Threshold) { 630 FalseSet.insert(Key); 631 BiasMap[Key] = FalseProb; 632 return true; 633 } 634 return false; 635 } 636 637 // Returns true and insert a region into the right biased set and the map if the 638 // branch of the region is biased. 639 static bool checkBiasedBranch(BranchInst *BI, Region *R, 640 DenseSet<Region *> &TrueBiasedRegionsGlobal, 641 DenseSet<Region *> &FalseBiasedRegionsGlobal, 642 DenseMap<Region *, BranchProbability> &BranchBiasMap) { 643 if (!BI->isConditional()) 644 return false; 645 BranchProbability ThenProb, ElseProb; 646 if (!checkMDProf(BI->getMetadata(LLVMContext::MD_prof), 647 ThenProb, ElseProb)) 648 return false; 649 BasicBlock *IfThen = BI->getSuccessor(0); 650 BasicBlock *IfElse = BI->getSuccessor(1); 651 assert((IfThen == R->getExit() || IfElse == R->getExit()) && 652 IfThen != IfElse && 653 "Invariant from findScopes"); 654 if (IfThen == R->getExit()) { 655 // Swap them so that IfThen/ThenProb means going into the conditional code 656 // and IfElse/ElseProb means skipping it. 657 std::swap(IfThen, IfElse); 658 std::swap(ThenProb, ElseProb); 659 } 660 CHR_DEBUG(dbgs() << "BI " << *BI << " "); 661 CHR_DEBUG(dbgs() << "ThenProb " << ThenProb << " "); 662 CHR_DEBUG(dbgs() << "ElseProb " << ElseProb << "\n"); 663 return checkBias(R, ThenProb, ElseProb, 664 TrueBiasedRegionsGlobal, FalseBiasedRegionsGlobal, 665 BranchBiasMap); 666 } 667 668 // Returns true and insert a select into the right biased set and the map if the 669 // select is biased. 670 static bool checkBiasedSelect( 671 SelectInst *SI, Region *R, 672 DenseSet<SelectInst *> &TrueBiasedSelectsGlobal, 673 DenseSet<SelectInst *> &FalseBiasedSelectsGlobal, 674 DenseMap<SelectInst *, BranchProbability> &SelectBiasMap) { 675 BranchProbability TrueProb, FalseProb; 676 if (!checkMDProf(SI->getMetadata(LLVMContext::MD_prof), 677 TrueProb, FalseProb)) 678 return false; 679 CHR_DEBUG(dbgs() << "SI " << *SI << " "); 680 CHR_DEBUG(dbgs() << "TrueProb " << TrueProb << " "); 681 CHR_DEBUG(dbgs() << "FalseProb " << FalseProb << "\n"); 682 return checkBias(SI, TrueProb, FalseProb, 683 TrueBiasedSelectsGlobal, FalseBiasedSelectsGlobal, 684 SelectBiasMap); 685 } 686 687 // Returns the instruction at which to hoist the dependent condition values and 688 // insert the CHR branch for a region. This is the terminator branch in the 689 // entry block or the first select in the entry block, if any. 690 static Instruction* getBranchInsertPoint(RegInfo &RI) { 691 Region *R = RI.R; 692 BasicBlock *EntryBB = R->getEntry(); 693 // The hoist point is by default the terminator of the entry block, which is 694 // the same as the branch instruction if RI.HasBranch is true. 695 Instruction *HoistPoint = EntryBB->getTerminator(); 696 for (SelectInst *SI : RI.Selects) { 697 if (SI->getParent() == EntryBB) { 698 // Pick the first select in Selects in the entry block. Note Selects is 699 // sorted in the instruction order within a block (asserted below). 700 HoistPoint = SI; 701 break; 702 } 703 } 704 assert(HoistPoint && "Null HoistPoint"); 705 #ifndef NDEBUG 706 // Check that HoistPoint is the first one in Selects in the entry block, 707 // if any. 708 DenseSet<Instruction *> EntryBlockSelectSet; 709 for (SelectInst *SI : RI.Selects) { 710 if (SI->getParent() == EntryBB) { 711 EntryBlockSelectSet.insert(SI); 712 } 713 } 714 for (Instruction &I : *EntryBB) { 715 if (EntryBlockSelectSet.count(&I) > 0) { 716 assert(&I == HoistPoint && 717 "HoistPoint must be the first one in Selects"); 718 break; 719 } 720 } 721 #endif 722 return HoistPoint; 723 } 724 725 // Find a CHR scope in the given region. 726 CHRScope * CHR::findScope(Region *R) { 727 CHRScope *Result = nullptr; 728 BasicBlock *Entry = R->getEntry(); 729 BasicBlock *Exit = R->getExit(); // null if top level. 730 assert(Entry && "Entry must not be null"); 731 assert((Exit == nullptr) == (R->isTopLevelRegion()) && 732 "Only top level region has a null exit"); 733 if (Entry) 734 CHR_DEBUG(dbgs() << "Entry " << Entry->getName() << "\n"); 735 else 736 CHR_DEBUG(dbgs() << "Entry null\n"); 737 if (Exit) 738 CHR_DEBUG(dbgs() << "Exit " << Exit->getName() << "\n"); 739 else 740 CHR_DEBUG(dbgs() << "Exit null\n"); 741 // Exclude cases where Entry is part of a subregion (hence it doesn't belong 742 // to this region). 743 bool EntryInSubregion = RI.getRegionFor(Entry) != R; 744 if (EntryInSubregion) 745 return nullptr; 746 // Exclude loops 747 for (BasicBlock *Pred : predecessors(Entry)) 748 if (R->contains(Pred)) 749 return nullptr; 750 if (Exit) { 751 // Try to find an if-then block (check if R is an if-then). 752 // if (cond) { 753 // ... 754 // } 755 auto *BI = dyn_cast<BranchInst>(Entry->getTerminator()); 756 if (BI) 757 CHR_DEBUG(dbgs() << "BI.isConditional " << BI->isConditional() << "\n"); 758 else 759 CHR_DEBUG(dbgs() << "BI null\n"); 760 if (BI && BI->isConditional()) { 761 BasicBlock *S0 = BI->getSuccessor(0); 762 BasicBlock *S1 = BI->getSuccessor(1); 763 CHR_DEBUG(dbgs() << "S0 " << S0->getName() << "\n"); 764 CHR_DEBUG(dbgs() << "S1 " << S1->getName() << "\n"); 765 if (S0 != S1 && (S0 == Exit || S1 == Exit)) { 766 RegInfo RI(R); 767 RI.HasBranch = checkBiasedBranch( 768 BI, R, TrueBiasedRegionsGlobal, FalseBiasedRegionsGlobal, 769 BranchBiasMap); 770 Result = new CHRScope(RI); 771 Scopes.insert(Result); 772 CHR_DEBUG(dbgs() << "Found a region with a branch\n"); 773 ++Stats.NumBranches; 774 } 775 } 776 } 777 { 778 // Try to look for selects in the direct child blocks (as opposed to in 779 // subregions) of R. 780 // ... 781 // if (..) { // Some subregion 782 // ... 783 // } 784 // if (..) { // Some subregion 785 // ... 786 // } 787 // ... 788 // a = cond ? b : c; 789 // ... 790 SmallVector<SelectInst *, 8> Selects; 791 for (RegionNode *E : R->elements()) { 792 if (E->isSubRegion()) 793 continue; 794 // This returns the basic block of E if E is a direct child of R (not a 795 // subregion.) 796 BasicBlock *BB = E->getEntry(); 797 // Need to push in the order to make it easier to find the first Select 798 // later. 799 for (Instruction &I : *BB) { 800 if (auto *SI = dyn_cast<SelectInst>(&I)) { 801 Selects.push_back(SI); 802 ++Stats.NumBranches; 803 } 804 } 805 } 806 if (Selects.size() > 0) { 807 auto AddSelects = [&](RegInfo &RI) { 808 for (auto *SI : Selects) 809 if (checkBiasedSelect(SI, RI.R, 810 TrueBiasedSelectsGlobal, 811 FalseBiasedSelectsGlobal, 812 SelectBiasMap)) 813 RI.Selects.push_back(SI); 814 }; 815 if (!Result) { 816 CHR_DEBUG(dbgs() << "Found a select-only region\n"); 817 RegInfo RI(R); 818 AddSelects(RI); 819 Result = new CHRScope(RI); 820 Scopes.insert(Result); 821 } else { 822 CHR_DEBUG(dbgs() << "Found select(s) in a region with a branch\n"); 823 AddSelects(Result->RegInfos[0]); 824 } 825 } 826 } 827 828 if (Result) { 829 checkScopeHoistable(Result); 830 } 831 return Result; 832 } 833 834 // Check that any of the branch and the selects in the region could be 835 // hoisted above the the CHR branch insert point (the most dominating of 836 // them, either the branch (at the end of the first block) or the first 837 // select in the first block). If the branch can't be hoisted, drop the 838 // selects in the first blocks. 839 // 840 // For example, for the following scope/region with selects, we want to insert 841 // the merged branch right before the first select in the first/entry block by 842 // hoisting c1, c2, c3, and c4. 843 // 844 // // Branch insert point here. 845 // a = c1 ? b : c; // Select 1 846 // d = c2 ? e : f; // Select 2 847 // if (c3) { // Branch 848 // ... 849 // c4 = foo() // A call. 850 // g = c4 ? h : i; // Select 3 851 // } 852 // 853 // But suppose we can't hoist c4 because it's dependent on the preceding 854 // call. Then, we drop Select 3. Furthermore, if we can't hoist c2, we also drop 855 // Select 2. If we can't hoist c3, we drop Selects 1 & 2. 856 void CHR::checkScopeHoistable(CHRScope *Scope) { 857 RegInfo &RI = Scope->RegInfos[0]; 858 Region *R = RI.R; 859 BasicBlock *EntryBB = R->getEntry(); 860 auto *Branch = RI.HasBranch ? 861 cast<BranchInst>(EntryBB->getTerminator()) : nullptr; 862 SmallVector<SelectInst *, 8> &Selects = RI.Selects; 863 if (RI.HasBranch || !Selects.empty()) { 864 Instruction *InsertPoint = getBranchInsertPoint(RI); 865 CHR_DEBUG(dbgs() << "InsertPoint " << *InsertPoint << "\n"); 866 // Avoid a data dependence from a select or a branch to a(nother) 867 // select. Note no instruction can't data-depend on a branch (a branch 868 // instruction doesn't produce a value). 869 DenseSet<Instruction *> Unhoistables; 870 // Initialize Unhoistables with the selects. 871 for (SelectInst *SI : Selects) { 872 Unhoistables.insert(SI); 873 } 874 // Remove Selects that can't be hoisted. 875 for (auto it = Selects.begin(); it != Selects.end(); ) { 876 SelectInst *SI = *it; 877 if (SI == InsertPoint) { 878 ++it; 879 continue; 880 } 881 bool IsHoistable = checkHoistValue(SI->getCondition(), InsertPoint, 882 DT, Unhoistables, nullptr); 883 if (!IsHoistable) { 884 CHR_DEBUG(dbgs() << "Dropping select " << *SI << "\n"); 885 it = Selects.erase(it); 886 // Since we are dropping the select here, we also drop it from 887 // Unhoistables. 888 Unhoistables.erase(SI); 889 } else 890 ++it; 891 } 892 // Update InsertPoint after potentially removing selects. 893 InsertPoint = getBranchInsertPoint(RI); 894 CHR_DEBUG(dbgs() << "InsertPoint " << *InsertPoint << "\n"); 895 if (RI.HasBranch && InsertPoint != Branch) { 896 bool IsHoistable = checkHoistValue(Branch->getCondition(), InsertPoint, 897 DT, Unhoistables, nullptr); 898 if (!IsHoistable) { 899 // If the branch isn't hoistable, drop the selects in the entry 900 // block, preferring the branch, which makes the branch the hoist 901 // point. 902 assert(InsertPoint != Branch && "Branch must not be the hoist point"); 903 CHR_DEBUG(dbgs() << "Dropping selects in entry block \n"); 904 CHR_DEBUG( 905 for (SelectInst *SI : Selects) { 906 dbgs() << "SI " << *SI << "\n"; 907 }); 908 Selects.erase(std::remove_if(Selects.begin(), Selects.end(), 909 [EntryBB](SelectInst *SI) { 910 return SI->getParent() == EntryBB; 911 }), Selects.end()); 912 Unhoistables.clear(); 913 InsertPoint = Branch; 914 } 915 } 916 CHR_DEBUG(dbgs() << "InsertPoint " << *InsertPoint << "\n"); 917 #ifndef NDEBUG 918 if (RI.HasBranch) { 919 assert(!DT.dominates(Branch, InsertPoint) && 920 "Branch can't be already above the hoist point"); 921 assert(checkHoistValue(Branch->getCondition(), InsertPoint, 922 DT, Unhoistables, nullptr) && 923 "checkHoistValue for branch"); 924 } 925 for (auto *SI : Selects) { 926 assert(!DT.dominates(SI, InsertPoint) && 927 "SI can't be already above the hoist point"); 928 assert(checkHoistValue(SI->getCondition(), InsertPoint, DT, 929 Unhoistables, nullptr) && 930 "checkHoistValue for selects"); 931 } 932 CHR_DEBUG(dbgs() << "Result\n"); 933 if (RI.HasBranch) { 934 CHR_DEBUG(dbgs() << "BI " << *Branch << "\n"); 935 } 936 for (auto *SI : Selects) { 937 CHR_DEBUG(dbgs() << "SI " << *SI << "\n"); 938 } 939 #endif 940 } 941 } 942 943 // Traverse the region tree, find all nested scopes and merge them if possible. 944 CHRScope * CHR::findScopes(Region *R, Region *NextRegion, Region *ParentRegion, 945 SmallVectorImpl<CHRScope *> &Scopes) { 946 CHR_DEBUG(dbgs() << "findScopes " << R->getNameStr() << "\n"); 947 CHRScope *Result = findScope(R); 948 // Visit subscopes. 949 CHRScope *ConsecutiveSubscope = nullptr; 950 SmallVector<CHRScope *, 8> Subscopes; 951 for (auto It = R->begin(); It != R->end(); ++It) { 952 const std::unique_ptr<Region> &SubR = *It; 953 auto NextIt = std::next(It); 954 Region *NextSubR = NextIt != R->end() ? NextIt->get() : nullptr; 955 CHR_DEBUG(dbgs() << "Looking at subregion " << SubR.get()->getNameStr() 956 << "\n"); 957 CHRScope *SubCHRScope = findScopes(SubR.get(), NextSubR, R, Scopes); 958 if (SubCHRScope) { 959 CHR_DEBUG(dbgs() << "Subregion Scope " << *SubCHRScope << "\n"); 960 } else { 961 CHR_DEBUG(dbgs() << "Subregion Scope null\n"); 962 } 963 if (SubCHRScope) { 964 if (!ConsecutiveSubscope) 965 ConsecutiveSubscope = SubCHRScope; 966 else if (!ConsecutiveSubscope->appendable(SubCHRScope)) { 967 Subscopes.push_back(ConsecutiveSubscope); 968 ConsecutiveSubscope = SubCHRScope; 969 } else 970 ConsecutiveSubscope->append(SubCHRScope); 971 } else { 972 if (ConsecutiveSubscope) { 973 Subscopes.push_back(ConsecutiveSubscope); 974 } 975 ConsecutiveSubscope = nullptr; 976 } 977 } 978 if (ConsecutiveSubscope) { 979 Subscopes.push_back(ConsecutiveSubscope); 980 } 981 for (CHRScope *Sub : Subscopes) { 982 if (Result) { 983 // Combine it with the parent. 984 Result->addSub(Sub); 985 } else { 986 // Push Subscopes as they won't be combined with the parent. 987 Scopes.push_back(Sub); 988 } 989 } 990 return Result; 991 } 992 993 static DenseSet<Value *> getCHRConditionValuesForRegion(RegInfo &RI) { 994 DenseSet<Value *> ConditionValues; 995 if (RI.HasBranch) { 996 auto *BI = cast<BranchInst>(RI.R->getEntry()->getTerminator()); 997 ConditionValues.insert(BI->getCondition()); 998 } 999 for (SelectInst *SI : RI.Selects) { 1000 ConditionValues.insert(SI->getCondition()); 1001 } 1002 return ConditionValues; 1003 } 1004 1005 1006 // Determine whether to split a scope depending on the sets of the branch 1007 // condition values of the previous region and the current region. We split 1008 // (return true) it if 1) the condition values of the inner/lower scope can't be 1009 // hoisted up to the outer/upper scope, or 2) the two sets of the condition 1010 // values have an empty intersection (because the combined branch conditions 1011 // won't probably lead to a simpler combined condition). 1012 static bool shouldSplit(Instruction *InsertPoint, 1013 DenseSet<Value *> &PrevConditionValues, 1014 DenseSet<Value *> &ConditionValues, 1015 DominatorTree &DT, 1016 DenseSet<Instruction *> &Unhoistables) { 1017 CHR_DEBUG( 1018 dbgs() << "shouldSplit " << *InsertPoint << " PrevConditionValues "; 1019 for (Value *V : PrevConditionValues) { 1020 dbgs() << *V << ", "; 1021 } 1022 dbgs() << " ConditionValues "; 1023 for (Value *V : ConditionValues) { 1024 dbgs() << *V << ", "; 1025 } 1026 dbgs() << "\n"); 1027 assert(InsertPoint && "Null InsertPoint"); 1028 // If any of Bases isn't hoistable to the hoist point, split. 1029 for (Value *V : ConditionValues) { 1030 if (!checkHoistValue(V, InsertPoint, DT, Unhoistables, nullptr)) { 1031 CHR_DEBUG(dbgs() << "Split. checkHoistValue false " << *V << "\n"); 1032 return true; // Not hoistable, split. 1033 } 1034 } 1035 // If PrevConditionValues or ConditionValues is empty, don't split to avoid 1036 // unnecessary splits at scopes with no branch/selects. If 1037 // PrevConditionValues and ConditionValues don't intersect at all, split. 1038 if (!PrevConditionValues.empty() && !ConditionValues.empty()) { 1039 // Use std::set as DenseSet doesn't work with set_intersection. 1040 std::set<Value *> PrevBases, Bases; 1041 for (Value *V : PrevConditionValues) { 1042 std::set<Value *> BaseValues = getBaseValues(V, DT); 1043 PrevBases.insert(BaseValues.begin(), BaseValues.end()); 1044 } 1045 for (Value *V : ConditionValues) { 1046 std::set<Value *> BaseValues = getBaseValues(V, DT); 1047 Bases.insert(BaseValues.begin(), BaseValues.end()); 1048 } 1049 CHR_DEBUG( 1050 dbgs() << "PrevBases "; 1051 for (Value *V : PrevBases) { 1052 dbgs() << *V << ", "; 1053 } 1054 dbgs() << " Bases "; 1055 for (Value *V : Bases) { 1056 dbgs() << *V << ", "; 1057 } 1058 dbgs() << "\n"); 1059 std::set<Value *> Intersection; 1060 std::set_intersection(PrevBases.begin(), PrevBases.end(), 1061 Bases.begin(), Bases.end(), 1062 std::inserter(Intersection, Intersection.begin())); 1063 if (Intersection.empty()) { 1064 // Empty intersection, split. 1065 CHR_DEBUG(dbgs() << "Split. Intersection empty\n"); 1066 return true; 1067 } 1068 } 1069 CHR_DEBUG(dbgs() << "No split\n"); 1070 return false; // Don't split. 1071 } 1072 1073 static void getSelectsInScope(CHRScope *Scope, 1074 DenseSet<Instruction *> &Output) { 1075 for (RegInfo &RI : Scope->RegInfos) 1076 for (SelectInst *SI : RI.Selects) 1077 Output.insert(SI); 1078 for (CHRScope *Sub : Scope->Subs) 1079 getSelectsInScope(Sub, Output); 1080 } 1081 1082 void CHR::splitScopes(SmallVectorImpl<CHRScope *> &Input, 1083 SmallVectorImpl<CHRScope *> &Output) { 1084 for (CHRScope *Scope : Input) { 1085 assert(!Scope->BranchInsertPoint && 1086 "BranchInsertPoint must not be set"); 1087 DenseSet<Instruction *> Unhoistables; 1088 getSelectsInScope(Scope, Unhoistables); 1089 splitScope(Scope, nullptr, nullptr, nullptr, Output, Unhoistables); 1090 } 1091 #ifndef NDEBUG 1092 for (CHRScope *Scope : Output) { 1093 assert(Scope->BranchInsertPoint && "BranchInsertPoint must be set"); 1094 } 1095 #endif 1096 } 1097 1098 SmallVector<CHRScope *, 8> CHR::splitScope( 1099 CHRScope *Scope, 1100 CHRScope *Outer, 1101 DenseSet<Value *> *OuterConditionValues, 1102 Instruction *OuterInsertPoint, 1103 SmallVectorImpl<CHRScope *> &Output, 1104 DenseSet<Instruction *> &Unhoistables) { 1105 if (Outer) { 1106 assert(OuterConditionValues && "Null OuterConditionValues"); 1107 assert(OuterInsertPoint && "Null OuterInsertPoint"); 1108 } 1109 bool PrevSplitFromOuter = true; 1110 DenseSet<Value *> PrevConditionValues; 1111 Instruction *PrevInsertPoint = nullptr; 1112 SmallVector<CHRScope *, 8> Splits; 1113 SmallVector<bool, 8> SplitsSplitFromOuter; 1114 SmallVector<DenseSet<Value *>, 8> SplitsConditionValues; 1115 SmallVector<Instruction *, 8> SplitsInsertPoints; 1116 SmallVector<RegInfo, 8> RegInfos(Scope->RegInfos); // Copy 1117 for (RegInfo &RI : RegInfos) { 1118 Instruction *InsertPoint = getBranchInsertPoint(RI); 1119 DenseSet<Value *> ConditionValues = getCHRConditionValuesForRegion(RI); 1120 CHR_DEBUG( 1121 dbgs() << "ConditionValues "; 1122 for (Value *V : ConditionValues) { 1123 dbgs() << *V << ", "; 1124 } 1125 dbgs() << "\n"); 1126 if (RI.R == RegInfos[0].R) { 1127 // First iteration. Check to see if we should split from the outer. 1128 if (Outer) { 1129 CHR_DEBUG(dbgs() << "Outer " << *Outer << "\n"); 1130 CHR_DEBUG(dbgs() << "Should split from outer at " 1131 << RI.R->getNameStr() << "\n"); 1132 if (shouldSplit(OuterInsertPoint, *OuterConditionValues, 1133 ConditionValues, DT, Unhoistables)) { 1134 PrevConditionValues = ConditionValues; 1135 PrevInsertPoint = InsertPoint; 1136 } else { 1137 // Not splitting from the outer. Use the outer bases and insert 1138 // point. Union the bases. 1139 PrevSplitFromOuter = false; 1140 PrevConditionValues = *OuterConditionValues; 1141 PrevConditionValues.insert(ConditionValues.begin(), 1142 ConditionValues.end()); 1143 PrevInsertPoint = OuterInsertPoint; 1144 } 1145 } else { 1146 CHR_DEBUG(dbgs() << "Outer null\n"); 1147 PrevConditionValues = ConditionValues; 1148 PrevInsertPoint = InsertPoint; 1149 } 1150 } else { 1151 CHR_DEBUG(dbgs() << "Should split from prev at " 1152 << RI.R->getNameStr() << "\n"); 1153 if (shouldSplit(PrevInsertPoint, PrevConditionValues, ConditionValues, 1154 DT, Unhoistables)) { 1155 CHRScope *Tail = Scope->split(RI.R); 1156 Scopes.insert(Tail); 1157 Splits.push_back(Scope); 1158 SplitsSplitFromOuter.push_back(PrevSplitFromOuter); 1159 SplitsConditionValues.push_back(PrevConditionValues); 1160 SplitsInsertPoints.push_back(PrevInsertPoint); 1161 Scope = Tail; 1162 PrevConditionValues = ConditionValues; 1163 PrevInsertPoint = InsertPoint; 1164 PrevSplitFromOuter = true; 1165 } else { 1166 // Not splitting. Union the bases. Keep the hoist point. 1167 PrevConditionValues.insert(ConditionValues.begin(), ConditionValues.end()); 1168 } 1169 } 1170 } 1171 Splits.push_back(Scope); 1172 SplitsSplitFromOuter.push_back(PrevSplitFromOuter); 1173 SplitsConditionValues.push_back(PrevConditionValues); 1174 assert(PrevInsertPoint && "Null PrevInsertPoint"); 1175 SplitsInsertPoints.push_back(PrevInsertPoint); 1176 assert(Splits.size() == SplitsConditionValues.size() && 1177 Splits.size() == SplitsSplitFromOuter.size() && 1178 Splits.size() == SplitsInsertPoints.size() && "Mismatching sizes"); 1179 for (size_t I = 0; I < Splits.size(); ++I) { 1180 CHRScope *Split = Splits[I]; 1181 DenseSet<Value *> &SplitConditionValues = SplitsConditionValues[I]; 1182 Instruction *SplitInsertPoint = SplitsInsertPoints[I]; 1183 SmallVector<CHRScope *, 8> NewSubs; 1184 DenseSet<Instruction *> SplitUnhoistables; 1185 getSelectsInScope(Split, SplitUnhoistables); 1186 for (CHRScope *Sub : Split->Subs) { 1187 SmallVector<CHRScope *, 8> SubSplits = splitScope( 1188 Sub, Split, &SplitConditionValues, SplitInsertPoint, Output, 1189 SplitUnhoistables); 1190 NewSubs.insert(NewSubs.end(), SubSplits.begin(), SubSplits.end()); 1191 } 1192 Split->Subs = NewSubs; 1193 } 1194 SmallVector<CHRScope *, 8> Result; 1195 for (size_t I = 0; I < Splits.size(); ++I) { 1196 CHRScope *Split = Splits[I]; 1197 if (SplitsSplitFromOuter[I]) { 1198 // Split from the outer. 1199 Output.push_back(Split); 1200 Split->BranchInsertPoint = SplitsInsertPoints[I]; 1201 CHR_DEBUG(dbgs() << "BranchInsertPoint " << *SplitsInsertPoints[I] 1202 << "\n"); 1203 } else { 1204 // Connected to the outer. 1205 Result.push_back(Split); 1206 } 1207 } 1208 if (!Outer) 1209 assert(Result.empty() && 1210 "If no outer (top-level), must return no nested ones"); 1211 return Result; 1212 } 1213 1214 void CHR::classifyBiasedScopes(SmallVectorImpl<CHRScope *> &Scopes) { 1215 for (CHRScope *Scope : Scopes) { 1216 assert(Scope->TrueBiasedRegions.empty() && Scope->FalseBiasedRegions.empty() && "Empty"); 1217 classifyBiasedScopes(Scope, Scope); 1218 CHR_DEBUG( 1219 dbgs() << "classifyBiasedScopes " << *Scope << "\n"; 1220 dbgs() << "TrueBiasedRegions "; 1221 for (Region *R : Scope->TrueBiasedRegions) { 1222 dbgs() << R->getNameStr() << ", "; 1223 } 1224 dbgs() << "\n"; 1225 dbgs() << "FalseBiasedRegions "; 1226 for (Region *R : Scope->FalseBiasedRegions) { 1227 dbgs() << R->getNameStr() << ", "; 1228 } 1229 dbgs() << "\n"; 1230 dbgs() << "TrueBiasedSelects "; 1231 for (SelectInst *SI : Scope->TrueBiasedSelects) { 1232 dbgs() << *SI << ", "; 1233 } 1234 dbgs() << "\n"; 1235 dbgs() << "FalseBiasedSelects "; 1236 for (SelectInst *SI : Scope->FalseBiasedSelects) { 1237 dbgs() << *SI << ", "; 1238 } 1239 dbgs() << "\n";); 1240 } 1241 } 1242 1243 void CHR::classifyBiasedScopes(CHRScope *Scope, CHRScope *OutermostScope) { 1244 for (RegInfo &RI : Scope->RegInfos) { 1245 if (RI.HasBranch) { 1246 Region *R = RI.R; 1247 if (TrueBiasedRegionsGlobal.count(R) > 0) 1248 OutermostScope->TrueBiasedRegions.insert(R); 1249 else if (FalseBiasedRegionsGlobal.count(R) > 0) 1250 OutermostScope->FalseBiasedRegions.insert(R); 1251 else 1252 llvm_unreachable("Must be biased"); 1253 } 1254 for (SelectInst *SI : RI.Selects) { 1255 if (TrueBiasedSelectsGlobal.count(SI) > 0) 1256 OutermostScope->TrueBiasedSelects.insert(SI); 1257 else if (FalseBiasedSelectsGlobal.count(SI) > 0) 1258 OutermostScope->FalseBiasedSelects.insert(SI); 1259 else 1260 llvm_unreachable("Must be biased"); 1261 } 1262 } 1263 for (CHRScope *Sub : Scope->Subs) { 1264 classifyBiasedScopes(Sub, OutermostScope); 1265 } 1266 } 1267 1268 static bool hasAtLeastTwoBiasedBranches(CHRScope *Scope) { 1269 unsigned NumBiased = Scope->TrueBiasedRegions.size() + 1270 Scope->FalseBiasedRegions.size() + 1271 Scope->TrueBiasedSelects.size() + 1272 Scope->FalseBiasedSelects.size(); 1273 return NumBiased >= CHRMergeThreshold; 1274 } 1275 1276 void CHR::filterScopes(SmallVectorImpl<CHRScope *> &Input, 1277 SmallVectorImpl<CHRScope *> &Output) { 1278 for (CHRScope *Scope : Input) { 1279 // Filter out the ones with only one region and no subs. 1280 if (!hasAtLeastTwoBiasedBranches(Scope)) { 1281 CHR_DEBUG(dbgs() << "Filtered out by biased branches truthy-regions " 1282 << Scope->TrueBiasedRegions.size() 1283 << " falsy-regions " << Scope->FalseBiasedRegions.size() 1284 << " true-selects " << Scope->TrueBiasedSelects.size() 1285 << " false-selects " << Scope->FalseBiasedSelects.size() << "\n"); 1286 continue; 1287 } 1288 Output.push_back(Scope); 1289 } 1290 } 1291 1292 void CHR::setCHRRegions(SmallVectorImpl<CHRScope *> &Input, 1293 SmallVectorImpl<CHRScope *> &Output) { 1294 for (CHRScope *Scope : Input) { 1295 assert(Scope->HoistStopMap.empty() && Scope->CHRRegions.empty() && 1296 "Empty"); 1297 setCHRRegions(Scope, Scope); 1298 Output.push_back(Scope); 1299 CHR_DEBUG( 1300 dbgs() << "setCHRRegions HoistStopMap " << *Scope << "\n"; 1301 for (auto pair : Scope->HoistStopMap) { 1302 Region *R = pair.first; 1303 dbgs() << "Region " << R->getNameStr() << "\n"; 1304 for (Instruction *I : pair.second) { 1305 dbgs() << "HoistStop " << *I << "\n"; 1306 } 1307 } 1308 dbgs() << "CHRRegions" << "\n"; 1309 for (RegInfo &RI : Scope->CHRRegions) { 1310 dbgs() << RI.R->getNameStr() << "\n"; 1311 }); 1312 } 1313 } 1314 1315 void CHR::setCHRRegions(CHRScope *Scope, CHRScope *OutermostScope) { 1316 DenseSet<Instruction *> Unhoistables; 1317 // Put the biased selects in Unhoistables because they should stay where they 1318 // are and constant-folded after CHR (in case one biased select or a branch 1319 // can depend on another biased select.) 1320 for (RegInfo &RI : Scope->RegInfos) { 1321 for (SelectInst *SI : RI.Selects) { 1322 Unhoistables.insert(SI); 1323 } 1324 } 1325 Instruction *InsertPoint = OutermostScope->BranchInsertPoint; 1326 for (RegInfo &RI : Scope->RegInfos) { 1327 Region *R = RI.R; 1328 DenseSet<Instruction *> HoistStops; 1329 bool IsHoisted = false; 1330 if (RI.HasBranch) { 1331 assert((OutermostScope->TrueBiasedRegions.count(R) > 0 || 1332 OutermostScope->FalseBiasedRegions.count(R) > 0) && 1333 "Must be truthy or falsy"); 1334 auto *BI = cast<BranchInst>(R->getEntry()->getTerminator()); 1335 // Note checkHoistValue fills in HoistStops. 1336 bool IsHoistable = checkHoistValue(BI->getCondition(), InsertPoint, DT, 1337 Unhoistables, &HoistStops); 1338 assert(IsHoistable && "Must be hoistable"); 1339 (void)(IsHoistable); // Unused in release build 1340 IsHoisted = true; 1341 } 1342 for (SelectInst *SI : RI.Selects) { 1343 assert((OutermostScope->TrueBiasedSelects.count(SI) > 0 || 1344 OutermostScope->FalseBiasedSelects.count(SI) > 0) && 1345 "Must be true or false biased"); 1346 // Note checkHoistValue fills in HoistStops. 1347 bool IsHoistable = checkHoistValue(SI->getCondition(), InsertPoint, DT, 1348 Unhoistables, &HoistStops); 1349 assert(IsHoistable && "Must be hoistable"); 1350 (void)(IsHoistable); // Unused in release build 1351 IsHoisted = true; 1352 } 1353 if (IsHoisted) { 1354 OutermostScope->CHRRegions.push_back(RI); 1355 OutermostScope->HoistStopMap[R] = HoistStops; 1356 } 1357 } 1358 for (CHRScope *Sub : Scope->Subs) 1359 setCHRRegions(Sub, OutermostScope); 1360 } 1361 1362 bool CHRScopeSorter(CHRScope *Scope1, CHRScope *Scope2) { 1363 return Scope1->RegInfos[0].R->getDepth() < Scope2->RegInfos[0].R->getDepth(); 1364 } 1365 1366 void CHR::sortScopes(SmallVectorImpl<CHRScope *> &Input, 1367 SmallVectorImpl<CHRScope *> &Output) { 1368 Output.resize(Input.size()); 1369 std::copy(Input.begin(), Input.end(), Output.begin()); 1370 std::stable_sort(Output.begin(), Output.end(), CHRScopeSorter); 1371 } 1372 1373 // Return true if V is already hoisted or was hoisted (along with its operands) 1374 // to the insert point. 1375 static void hoistValue(Value *V, Instruction *HoistPoint, Region *R, 1376 HoistStopMapTy &HoistStopMap, 1377 DenseSet<Instruction *> &HoistedSet, 1378 DenseSet<PHINode *> &TrivialPHIs) { 1379 auto IT = HoistStopMap.find(R); 1380 assert(IT != HoistStopMap.end() && "Region must be in hoist stop map"); 1381 DenseSet<Instruction *> &HoistStops = IT->second; 1382 if (auto *I = dyn_cast<Instruction>(V)) { 1383 if (I == HoistPoint) 1384 return; 1385 if (HoistStops.count(I)) 1386 return; 1387 if (auto *PN = dyn_cast<PHINode>(I)) 1388 if (TrivialPHIs.count(PN)) 1389 // The trivial phi inserted by the previous CHR scope could replace a 1390 // non-phi in HoistStops. Note that since this phi is at the exit of a 1391 // previous CHR scope, which dominates this scope, it's safe to stop 1392 // hoisting there. 1393 return; 1394 if (HoistedSet.count(I)) 1395 // Already hoisted, return. 1396 return; 1397 assert(isHoistableInstructionType(I) && "Unhoistable instruction type"); 1398 for (Value *Op : I->operands()) { 1399 hoistValue(Op, HoistPoint, R, HoistStopMap, HoistedSet, TrivialPHIs); 1400 } 1401 I->moveBefore(HoistPoint); 1402 HoistedSet.insert(I); 1403 CHR_DEBUG(dbgs() << "hoistValue " << *I << "\n"); 1404 } 1405 } 1406 1407 // Hoist the dependent condition values of the branches and the selects in the 1408 // scope to the insert point. 1409 static void hoistScopeConditions(CHRScope *Scope, Instruction *HoistPoint, 1410 DenseSet<PHINode *> &TrivialPHIs) { 1411 DenseSet<Instruction *> HoistedSet; 1412 for (const RegInfo &RI : Scope->CHRRegions) { 1413 Region *R = RI.R; 1414 bool IsTrueBiased = Scope->TrueBiasedRegions.count(R); 1415 bool IsFalseBiased = Scope->FalseBiasedRegions.count(R); 1416 if (RI.HasBranch && (IsTrueBiased || IsFalseBiased)) { 1417 auto *BI = cast<BranchInst>(R->getEntry()->getTerminator()); 1418 hoistValue(BI->getCondition(), HoistPoint, R, Scope->HoistStopMap, 1419 HoistedSet, TrivialPHIs); 1420 } 1421 for (SelectInst *SI : RI.Selects) { 1422 bool IsTrueBiased = Scope->TrueBiasedSelects.count(SI); 1423 bool IsFalseBiased = Scope->FalseBiasedSelects.count(SI); 1424 if (!(IsTrueBiased || IsFalseBiased)) 1425 continue; 1426 hoistValue(SI->getCondition(), HoistPoint, R, Scope->HoistStopMap, 1427 HoistedSet, TrivialPHIs); 1428 } 1429 } 1430 } 1431 1432 // Negate the predicate if an ICmp if it's used only by branches or selects by 1433 // swapping the operands of the branches or the selects. Returns true if success. 1434 static bool negateICmpIfUsedByBranchOrSelectOnly(ICmpInst *ICmp, 1435 Instruction *ExcludedUser, 1436 CHRScope *Scope) { 1437 for (User *U : ICmp->users()) { 1438 if (U == ExcludedUser) 1439 continue; 1440 if (isa<BranchInst>(U) && cast<BranchInst>(U)->isConditional()) 1441 continue; 1442 if (isa<SelectInst>(U) && cast<SelectInst>(U)->getCondition() == ICmp) 1443 continue; 1444 return false; 1445 } 1446 for (User *U : ICmp->users()) { 1447 if (U == ExcludedUser) 1448 continue; 1449 if (auto *BI = dyn_cast<BranchInst>(U)) { 1450 assert(BI->isConditional() && "Must be conditional"); 1451 BI->swapSuccessors(); 1452 // Don't need to swap this in terms of 1453 // TrueBiasedRegions/FalseBiasedRegions because true-based/false-based 1454 // mean whehter the branch is likely go into the if-then rather than 1455 // successor0/successor1 and because we can tell which edge is the then or 1456 // the else one by comparing the destination to the region exit block. 1457 continue; 1458 } 1459 if (auto *SI = dyn_cast<SelectInst>(U)) { 1460 // Swap operands 1461 Value *TrueValue = SI->getTrueValue(); 1462 Value *FalseValue = SI->getFalseValue(); 1463 SI->setTrueValue(FalseValue); 1464 SI->setFalseValue(TrueValue); 1465 SI->swapProfMetadata(); 1466 if (Scope->TrueBiasedSelects.count(SI)) { 1467 assert(Scope->FalseBiasedSelects.count(SI) == 0 && 1468 "Must not be already in"); 1469 Scope->FalseBiasedSelects.insert(SI); 1470 } else if (Scope->FalseBiasedSelects.count(SI)) { 1471 assert(Scope->TrueBiasedSelects.count(SI) == 0 && 1472 "Must not be already in"); 1473 Scope->TrueBiasedSelects.insert(SI); 1474 } 1475 continue; 1476 } 1477 llvm_unreachable("Must be a branch or a select"); 1478 } 1479 ICmp->setPredicate(CmpInst::getInversePredicate(ICmp->getPredicate())); 1480 return true; 1481 } 1482 1483 // A helper for transformScopes. Insert a trivial phi at the scope exit block 1484 // for a value that's defined in the scope but used outside it (meaning it's 1485 // alive at the exit block). 1486 static void insertTrivialPHIs(CHRScope *Scope, 1487 BasicBlock *EntryBlock, BasicBlock *ExitBlock, 1488 DenseSet<PHINode *> &TrivialPHIs) { 1489 DenseSet<BasicBlock *> BlocksInScopeSet; 1490 SmallVector<BasicBlock *, 8> BlocksInScopeVec; 1491 for (RegInfo &RI : Scope->RegInfos) { 1492 for (BasicBlock *BB : RI.R->blocks()) { // This includes the blocks in the 1493 // sub-Scopes. 1494 BlocksInScopeSet.insert(BB); 1495 BlocksInScopeVec.push_back(BB); 1496 } 1497 } 1498 CHR_DEBUG( 1499 dbgs() << "Inserting redudant phis\n"; 1500 for (BasicBlock *BB : BlocksInScopeVec) { 1501 dbgs() << "BlockInScope " << BB->getName() << "\n"; 1502 }); 1503 for (BasicBlock *BB : BlocksInScopeVec) { 1504 for (Instruction &I : *BB) { 1505 SmallVector<Instruction *, 8> Users; 1506 for (User *U : I.users()) { 1507 if (auto *UI = dyn_cast<Instruction>(U)) { 1508 if (BlocksInScopeSet.count(UI->getParent()) == 0 && 1509 // Unless there's already a phi for I at the exit block. 1510 !(isa<PHINode>(UI) && UI->getParent() == ExitBlock)) { 1511 CHR_DEBUG(dbgs() << "V " << I << "\n"); 1512 CHR_DEBUG(dbgs() << "Used outside scope by user " << *UI << "\n"); 1513 Users.push_back(UI); 1514 } else if (UI->getParent() == EntryBlock && isa<PHINode>(UI)) { 1515 // There's a loop backedge from a block that's dominated by this 1516 // scope to the entry block. 1517 CHR_DEBUG(dbgs() << "V " << I << "\n"); 1518 CHR_DEBUG(dbgs() 1519 << "Used at entry block (for a back edge) by a phi user " 1520 << *UI << "\n"); 1521 Users.push_back(UI); 1522 } 1523 } 1524 } 1525 if (Users.size() > 0) { 1526 // Insert a trivial phi for I (phi [&I, P0], [&I, P1], ...) at 1527 // ExitBlock. Replace I with the new phi in UI unless UI is another 1528 // phi at ExitBlock. 1529 unsigned PredCount = std::distance(pred_begin(ExitBlock), 1530 pred_end(ExitBlock)); 1531 PHINode *PN = PHINode::Create(I.getType(), PredCount, "", 1532 &ExitBlock->front()); 1533 for (BasicBlock *Pred : predecessors(ExitBlock)) { 1534 PN->addIncoming(&I, Pred); 1535 } 1536 TrivialPHIs.insert(PN); 1537 CHR_DEBUG(dbgs() << "Insert phi " << *PN << "\n"); 1538 for (Instruction *UI : Users) { 1539 for (unsigned J = 0, NumOps = UI->getNumOperands(); J < NumOps; ++J) { 1540 if (UI->getOperand(J) == &I) { 1541 UI->setOperand(J, PN); 1542 } 1543 } 1544 CHR_DEBUG(dbgs() << "Updated user " << *UI << "\n"); 1545 } 1546 } 1547 } 1548 } 1549 } 1550 1551 // Assert that all the CHR regions of the scope have a biased branch or select. 1552 static void LLVM_ATTRIBUTE_UNUSED 1553 assertCHRRegionsHaveBiasedBranchOrSelect(CHRScope *Scope) { 1554 #ifndef NDEBUG 1555 auto HasBiasedBranchOrSelect = [](RegInfo &RI, CHRScope *Scope) { 1556 if (Scope->TrueBiasedRegions.count(RI.R) || 1557 Scope->FalseBiasedRegions.count(RI.R)) 1558 return true; 1559 for (SelectInst *SI : RI.Selects) 1560 if (Scope->TrueBiasedSelects.count(SI) || 1561 Scope->FalseBiasedSelects.count(SI)) 1562 return true; 1563 return false; 1564 }; 1565 for (RegInfo &RI : Scope->CHRRegions) { 1566 assert(HasBiasedBranchOrSelect(RI, Scope) && 1567 "Must have biased branch or select"); 1568 } 1569 #endif 1570 } 1571 1572 // Assert that all the condition values of the biased branches and selects have 1573 // been hoisted to the pre-entry block or outside of the scope. 1574 static void LLVM_ATTRIBUTE_UNUSED assertBranchOrSelectConditionHoisted( 1575 CHRScope *Scope, BasicBlock *PreEntryBlock) { 1576 CHR_DEBUG(dbgs() << "Biased regions condition values \n"); 1577 for (RegInfo &RI : Scope->CHRRegions) { 1578 Region *R = RI.R; 1579 bool IsTrueBiased = Scope->TrueBiasedRegions.count(R); 1580 bool IsFalseBiased = Scope->FalseBiasedRegions.count(R); 1581 if (RI.HasBranch && (IsTrueBiased || IsFalseBiased)) { 1582 auto *BI = cast<BranchInst>(R->getEntry()->getTerminator()); 1583 Value *V = BI->getCondition(); 1584 CHR_DEBUG(dbgs() << *V << "\n"); 1585 if (auto *I = dyn_cast<Instruction>(V)) { 1586 (void)(I); // Unused in release build. 1587 assert((I->getParent() == PreEntryBlock || 1588 !Scope->contains(I)) && 1589 "Must have been hoisted to PreEntryBlock or outside the scope"); 1590 } 1591 } 1592 for (SelectInst *SI : RI.Selects) { 1593 bool IsTrueBiased = Scope->TrueBiasedSelects.count(SI); 1594 bool IsFalseBiased = Scope->FalseBiasedSelects.count(SI); 1595 if (!(IsTrueBiased || IsFalseBiased)) 1596 continue; 1597 Value *V = SI->getCondition(); 1598 CHR_DEBUG(dbgs() << *V << "\n"); 1599 if (auto *I = dyn_cast<Instruction>(V)) { 1600 (void)(I); // Unused in release build. 1601 assert((I->getParent() == PreEntryBlock || 1602 !Scope->contains(I)) && 1603 "Must have been hoisted to PreEntryBlock or outside the scope"); 1604 } 1605 } 1606 } 1607 } 1608 1609 void CHR::transformScopes(CHRScope *Scope, DenseSet<PHINode *> &TrivialPHIs) { 1610 CHR_DEBUG(dbgs() << "transformScopes " << *Scope << "\n"); 1611 1612 assert(Scope->RegInfos.size() >= 1 && "Should have at least one Region"); 1613 Region *FirstRegion = Scope->RegInfos[0].R; 1614 BasicBlock *EntryBlock = FirstRegion->getEntry(); 1615 Region *LastRegion = Scope->RegInfos[Scope->RegInfos.size() - 1].R; 1616 BasicBlock *ExitBlock = LastRegion->getExit(); 1617 Optional<uint64_t> ProfileCount = BFI.getBlockProfileCount(EntryBlock); 1618 1619 if (ExitBlock) { 1620 // Insert a trivial phi at the exit block (where the CHR hot path and the 1621 // cold path merges) for a value that's defined in the scope but used 1622 // outside it (meaning it's alive at the exit block). We will add the 1623 // incoming values for the CHR cold paths to it below. Without this, we'd 1624 // miss updating phi's for such values unless there happens to already be a 1625 // phi for that value there. 1626 insertTrivialPHIs(Scope, EntryBlock, ExitBlock, TrivialPHIs); 1627 } 1628 1629 // Split the entry block of the first region. The new block becomes the new 1630 // entry block of the first region. The old entry block becomes the block to 1631 // insert the CHR branch into. Note DT gets updated. Since DT gets updated 1632 // through the split, we update the entry of the first region after the split, 1633 // and Region only points to the entry and the exit blocks, rather than 1634 // keeping everything in a list or set, the blocks membership and the 1635 // entry/exit blocks of the region are still valid after the split. 1636 CHR_DEBUG(dbgs() << "Splitting entry block " << EntryBlock->getName() 1637 << " at " << *Scope->BranchInsertPoint << "\n"); 1638 BasicBlock *NewEntryBlock = 1639 SplitBlock(EntryBlock, Scope->BranchInsertPoint, &DT); 1640 assert(NewEntryBlock->getSinglePredecessor() == EntryBlock && 1641 "NewEntryBlock's only pred must be EntryBlock"); 1642 FirstRegion->replaceEntryRecursive(NewEntryBlock); 1643 BasicBlock *PreEntryBlock = EntryBlock; 1644 1645 ValueToValueMapTy VMap; 1646 // Clone the blocks in the scope (excluding the PreEntryBlock) to split into a 1647 // hot path (originals) and a cold path (clones) and update the PHIs at the 1648 // exit block. 1649 cloneScopeBlocks(Scope, PreEntryBlock, ExitBlock, LastRegion, VMap); 1650 1651 // Replace the old (placeholder) branch with the new (merged) conditional 1652 // branch. 1653 BranchInst *MergedBr = createMergedBranch(PreEntryBlock, EntryBlock, 1654 NewEntryBlock, VMap); 1655 1656 #ifndef NDEBUG 1657 assertCHRRegionsHaveBiasedBranchOrSelect(Scope); 1658 #endif 1659 1660 // Hoist the conditional values of the branches/selects. 1661 hoistScopeConditions(Scope, PreEntryBlock->getTerminator(), TrivialPHIs); 1662 1663 #ifndef NDEBUG 1664 assertBranchOrSelectConditionHoisted(Scope, PreEntryBlock); 1665 #endif 1666 1667 // Create the combined branch condition and constant-fold the branches/selects 1668 // in the hot path. 1669 fixupBranchesAndSelects(Scope, PreEntryBlock, MergedBr, 1670 ProfileCount ? ProfileCount.getValue() : 0); 1671 } 1672 1673 // A helper for transformScopes. Clone the blocks in the scope (excluding the 1674 // PreEntryBlock) to split into a hot path and a cold path and update the PHIs 1675 // at the exit block. 1676 void CHR::cloneScopeBlocks(CHRScope *Scope, 1677 BasicBlock *PreEntryBlock, 1678 BasicBlock *ExitBlock, 1679 Region *LastRegion, 1680 ValueToValueMapTy &VMap) { 1681 // Clone all the blocks. The original blocks will be the hot-path 1682 // CHR-optimized code and the cloned blocks will be the original unoptimized 1683 // code. This is so that the block pointers from the 1684 // CHRScope/Region/RegionInfo can stay valid in pointing to the hot-path code 1685 // which CHR should apply to. 1686 SmallVector<BasicBlock*, 8> NewBlocks; 1687 for (RegInfo &RI : Scope->RegInfos) 1688 for (BasicBlock *BB : RI.R->blocks()) { // This includes the blocks in the 1689 // sub-Scopes. 1690 assert(BB != PreEntryBlock && "Don't copy the preetntry block"); 1691 BasicBlock *NewBB = CloneBasicBlock(BB, VMap, ".nonchr", &F); 1692 NewBlocks.push_back(NewBB); 1693 VMap[BB] = NewBB; 1694 } 1695 1696 // Place the cloned blocks right after the original blocks (right before the 1697 // exit block of.) 1698 if (ExitBlock) 1699 F.getBasicBlockList().splice(ExitBlock->getIterator(), 1700 F.getBasicBlockList(), 1701 NewBlocks[0]->getIterator(), F.end()); 1702 1703 // Update the cloned blocks/instructions to refer to themselves. 1704 for (unsigned i = 0, e = NewBlocks.size(); i != e; ++i) 1705 for (Instruction &I : *NewBlocks[i]) 1706 RemapInstruction(&I, VMap, 1707 RF_NoModuleLevelChanges | RF_IgnoreMissingLocals); 1708 1709 // Add the cloned blocks to the PHIs of the exit blocks. ExitBlock is null for 1710 // the top-level region but we don't need to add PHIs. The trivial PHIs 1711 // inserted above will be updated here. 1712 if (ExitBlock) 1713 for (PHINode &PN : ExitBlock->phis()) 1714 for (unsigned I = 0, NumOps = PN.getNumIncomingValues(); I < NumOps; 1715 ++I) { 1716 BasicBlock *Pred = PN.getIncomingBlock(I); 1717 if (LastRegion->contains(Pred)) { 1718 Value *V = PN.getIncomingValue(I); 1719 auto It = VMap.find(V); 1720 if (It != VMap.end()) V = It->second; 1721 assert(VMap.find(Pred) != VMap.end() && "Pred must have been cloned"); 1722 PN.addIncoming(V, cast<BasicBlock>(VMap[Pred])); 1723 } 1724 } 1725 } 1726 1727 // A helper for transformScope. Replace the old (placeholder) branch with the 1728 // new (merged) conditional branch. 1729 BranchInst *CHR::createMergedBranch(BasicBlock *PreEntryBlock, 1730 BasicBlock *EntryBlock, 1731 BasicBlock *NewEntryBlock, 1732 ValueToValueMapTy &VMap) { 1733 BranchInst *OldBR = cast<BranchInst>(PreEntryBlock->getTerminator()); 1734 assert(OldBR->isUnconditional() && OldBR->getSuccessor(0) == NewEntryBlock && 1735 "SplitBlock did not work correctly!"); 1736 assert(NewEntryBlock->getSinglePredecessor() == EntryBlock && 1737 "NewEntryBlock's only pred must be EntryBlock"); 1738 assert(VMap.find(NewEntryBlock) != VMap.end() && 1739 "NewEntryBlock must have been copied"); 1740 OldBR->dropAllReferences(); 1741 OldBR->eraseFromParent(); 1742 // The true predicate is a placeholder. It will be replaced later in 1743 // fixupBranchesAndSelects(). 1744 BranchInst *NewBR = BranchInst::Create(NewEntryBlock, 1745 cast<BasicBlock>(VMap[NewEntryBlock]), 1746 ConstantInt::getTrue(F.getContext())); 1747 PreEntryBlock->getInstList().push_back(NewBR); 1748 assert(NewEntryBlock->getSinglePredecessor() == EntryBlock && 1749 "NewEntryBlock's only pred must be EntryBlock"); 1750 return NewBR; 1751 } 1752 1753 // A helper for transformScopes. Create the combined branch condition and 1754 // constant-fold the branches/selects in the hot path. 1755 void CHR::fixupBranchesAndSelects(CHRScope *Scope, 1756 BasicBlock *PreEntryBlock, 1757 BranchInst *MergedBR, 1758 uint64_t ProfileCount) { 1759 Value *MergedCondition = ConstantInt::getTrue(F.getContext()); 1760 BranchProbability CHRBranchBias(1, 1); 1761 uint64_t NumCHRedBranches = 0; 1762 IRBuilder<> IRB(PreEntryBlock->getTerminator()); 1763 for (RegInfo &RI : Scope->CHRRegions) { 1764 Region *R = RI.R; 1765 if (RI.HasBranch) { 1766 fixupBranch(R, Scope, IRB, MergedCondition, CHRBranchBias); 1767 ++NumCHRedBranches; 1768 } 1769 for (SelectInst *SI : RI.Selects) { 1770 fixupSelect(SI, Scope, IRB, MergedCondition, CHRBranchBias); 1771 ++NumCHRedBranches; 1772 } 1773 } 1774 Stats.NumBranchesDelta += NumCHRedBranches - 1; 1775 Stats.WeightedNumBranchesDelta += (NumCHRedBranches - 1) * ProfileCount; 1776 MergedBR->setCondition(MergedCondition); 1777 SmallVector<uint32_t, 2> Weights; 1778 Weights.push_back(static_cast<uint32_t>(CHRBranchBias.scale(1000))); 1779 Weights.push_back(static_cast<uint32_t>(CHRBranchBias.getCompl().scale(1000))); 1780 MDBuilder MDB(F.getContext()); 1781 MergedBR->setMetadata(LLVMContext::MD_prof, MDB.createBranchWeights(Weights)); 1782 CHR_DEBUG(dbgs() << "CHR branch bias " << Weights[0] << ":" << Weights[1] 1783 << "\n"); 1784 } 1785 1786 // A helper for fixupBranchesAndSelects. Add to the combined branch condition 1787 // and constant-fold a branch in the hot path. 1788 void CHR::fixupBranch(Region *R, CHRScope *Scope, 1789 IRBuilder<> &IRB, 1790 Value *&MergedCondition, 1791 BranchProbability &CHRBranchBias) { 1792 bool IsTrueBiased = Scope->TrueBiasedRegions.count(R); 1793 assert((IsTrueBiased || Scope->FalseBiasedRegions.count(R)) && 1794 "Must be truthy or falsy"); 1795 auto *BI = cast<BranchInst>(R->getEntry()->getTerminator()); 1796 assert(BranchBiasMap.find(R) != BranchBiasMap.end() && 1797 "Must be in the bias map"); 1798 BranchProbability Bias = BranchBiasMap[R]; 1799 assert(Bias >= getCHRBiasThreshold() && "Must be highly biased"); 1800 // Take the min. 1801 if (CHRBranchBias > Bias) 1802 CHRBranchBias = Bias; 1803 BasicBlock *IfThen = BI->getSuccessor(1); 1804 BasicBlock *IfElse = BI->getSuccessor(0); 1805 BasicBlock *RegionExitBlock = R->getExit(); 1806 assert(RegionExitBlock && "Null ExitBlock"); 1807 assert((IfThen == RegionExitBlock || IfElse == RegionExitBlock) && 1808 IfThen != IfElse && "Invariant from findScopes"); 1809 if (IfThen == RegionExitBlock) { 1810 // Swap them so that IfThen means going into it and IfElse means skipping 1811 // it. 1812 std::swap(IfThen, IfElse); 1813 } 1814 CHR_DEBUG(dbgs() << "IfThen " << IfThen->getName() 1815 << " IfElse " << IfElse->getName() << "\n"); 1816 Value *Cond = BI->getCondition(); 1817 BasicBlock *HotTarget = IsTrueBiased ? IfThen : IfElse; 1818 bool ConditionTrue = HotTarget == BI->getSuccessor(0); 1819 addToMergedCondition(ConditionTrue, Cond, BI, Scope, IRB, 1820 MergedCondition); 1821 // Constant-fold the branch at ClonedEntryBlock. 1822 assert(ConditionTrue == (HotTarget == BI->getSuccessor(0)) && 1823 "The successor shouldn't change"); 1824 Value *NewCondition = ConditionTrue ? 1825 ConstantInt::getTrue(F.getContext()) : 1826 ConstantInt::getFalse(F.getContext()); 1827 BI->setCondition(NewCondition); 1828 } 1829 1830 // A helper for fixupBranchesAndSelects. Add to the combined branch condition 1831 // and constant-fold a select in the hot path. 1832 void CHR::fixupSelect(SelectInst *SI, CHRScope *Scope, 1833 IRBuilder<> &IRB, 1834 Value *&MergedCondition, 1835 BranchProbability &CHRBranchBias) { 1836 bool IsTrueBiased = Scope->TrueBiasedSelects.count(SI); 1837 assert((IsTrueBiased || 1838 Scope->FalseBiasedSelects.count(SI)) && "Must be biased"); 1839 assert(SelectBiasMap.find(SI) != SelectBiasMap.end() && 1840 "Must be in the bias map"); 1841 BranchProbability Bias = SelectBiasMap[SI]; 1842 assert(Bias >= getCHRBiasThreshold() && "Must be highly biased"); 1843 // Take the min. 1844 if (CHRBranchBias > Bias) 1845 CHRBranchBias = Bias; 1846 Value *Cond = SI->getCondition(); 1847 addToMergedCondition(IsTrueBiased, Cond, SI, Scope, IRB, 1848 MergedCondition); 1849 Value *NewCondition = IsTrueBiased ? 1850 ConstantInt::getTrue(F.getContext()) : 1851 ConstantInt::getFalse(F.getContext()); 1852 SI->setCondition(NewCondition); 1853 } 1854 1855 // A helper for fixupBranch/fixupSelect. Add a branch condition to the merged 1856 // condition. 1857 void CHR::addToMergedCondition(bool IsTrueBiased, Value *Cond, 1858 Instruction *BranchOrSelect, 1859 CHRScope *Scope, 1860 IRBuilder<> &IRB, 1861 Value *&MergedCondition) { 1862 if (IsTrueBiased) { 1863 MergedCondition = IRB.CreateAnd(MergedCondition, Cond); 1864 } else { 1865 // If Cond is an icmp and all users of V except for BranchOrSelect is a 1866 // branch, negate the icmp predicate and swap the branch targets and avoid 1867 // inserting an Xor to negate Cond. 1868 bool Done = false; 1869 if (auto *ICmp = dyn_cast<ICmpInst>(Cond)) 1870 if (negateICmpIfUsedByBranchOrSelectOnly(ICmp, BranchOrSelect, Scope)) { 1871 MergedCondition = IRB.CreateAnd(MergedCondition, Cond); 1872 Done = true; 1873 } 1874 if (!Done) { 1875 Value *Negate = IRB.CreateXor( 1876 ConstantInt::getTrue(F.getContext()), Cond); 1877 MergedCondition = IRB.CreateAnd(MergedCondition, Negate); 1878 } 1879 } 1880 } 1881 1882 void CHR::transformScopes(SmallVectorImpl<CHRScope *> &CHRScopes) { 1883 unsigned I = 0; 1884 DenseSet<PHINode *> TrivialPHIs; 1885 for (CHRScope *Scope : CHRScopes) { 1886 transformScopes(Scope, TrivialPHIs); 1887 CHR_DEBUG( 1888 std::ostringstream oss; 1889 oss << " after transformScopes " << I++; 1890 dumpIR(F, oss.str().c_str(), nullptr)); 1891 (void)I; 1892 } 1893 } 1894 1895 static void LLVM_ATTRIBUTE_UNUSED 1896 dumpScopes(SmallVectorImpl<CHRScope *> &Scopes, const char *Label) { 1897 dbgs() << Label << " " << Scopes.size() << "\n"; 1898 for (CHRScope *Scope : Scopes) { 1899 dbgs() << *Scope << "\n"; 1900 } 1901 } 1902 1903 bool CHR::run() { 1904 if (!shouldApply(F, PSI)) 1905 return false; 1906 1907 CHR_DEBUG(dumpIR(F, "before", nullptr)); 1908 1909 bool Changed = false; 1910 { 1911 CHR_DEBUG( 1912 dbgs() << "RegionInfo:\n"; 1913 RI.print(dbgs())); 1914 1915 // Recursively traverse the region tree and find regions that have biased 1916 // branches and/or selects and create scopes. 1917 SmallVector<CHRScope *, 8> AllScopes; 1918 findScopes(AllScopes); 1919 CHR_DEBUG(dumpScopes(AllScopes, "All scopes")); 1920 1921 // Split the scopes if 1) the conditiona values of the biased 1922 // branches/selects of the inner/lower scope can't be hoisted up to the 1923 // outermost/uppermost scope entry, or 2) the condition values of the biased 1924 // branches/selects in a scope (including subscopes) don't share at least 1925 // one common value. 1926 SmallVector<CHRScope *, 8> SplitScopes; 1927 splitScopes(AllScopes, SplitScopes); 1928 CHR_DEBUG(dumpScopes(SplitScopes, "Split scopes")); 1929 1930 // After splitting, set the biased regions and selects of a scope (a tree 1931 // root) that include those of the subscopes. 1932 classifyBiasedScopes(SplitScopes); 1933 CHR_DEBUG(dbgs() << "Set per-scope bias " << SplitScopes.size() << "\n"); 1934 1935 // Filter out the scopes that has only one biased region or select (CHR 1936 // isn't useful in such a case). 1937 SmallVector<CHRScope *, 8> FilteredScopes; 1938 filterScopes(SplitScopes, FilteredScopes); 1939 CHR_DEBUG(dumpScopes(FilteredScopes, "Filtered scopes")); 1940 1941 // Set the regions to be CHR'ed and their hoist stops for each scope. 1942 SmallVector<CHRScope *, 8> SetScopes; 1943 setCHRRegions(FilteredScopes, SetScopes); 1944 CHR_DEBUG(dumpScopes(SetScopes, "Set CHR regions")); 1945 1946 // Sort CHRScopes by the depth so that outer CHRScopes comes before inner 1947 // ones. We need to apply CHR from outer to inner so that we apply CHR only 1948 // to the hot path, rather than both hot and cold paths. 1949 SmallVector<CHRScope *, 8> SortedScopes; 1950 sortScopes(SetScopes, SortedScopes); 1951 CHR_DEBUG(dumpScopes(SortedScopes, "Sorted scopes")); 1952 1953 CHR_DEBUG( 1954 dbgs() << "RegionInfo:\n"; 1955 RI.print(dbgs())); 1956 1957 // Apply the CHR transformation. 1958 if (!SortedScopes.empty()) { 1959 transformScopes(SortedScopes); 1960 Changed = true; 1961 } 1962 } 1963 1964 if (Changed) 1965 CHR_DEBUG(dumpIR(F, "after", &Stats)); 1966 1967 return Changed; 1968 } 1969 1970 bool ControlHeightReductionLegacyPass::runOnFunction(Function &F) { 1971 BlockFrequencyInfo &BFI = 1972 getAnalysis<BlockFrequencyInfoWrapperPass>().getBFI(); 1973 DominatorTree &DT = getAnalysis<DominatorTreeWrapperPass>().getDomTree(); 1974 ProfileSummaryInfo &PSI = 1975 *getAnalysis<ProfileSummaryInfoWrapperPass>().getPSI(); 1976 RegionInfo &RI = getAnalysis<RegionInfoPass>().getRegionInfo(); 1977 return CHR(F, BFI, DT, PSI, RI).run(); 1978 } 1979 1980 namespace llvm { 1981 1982 ControlHeightReductionPass::ControlHeightReductionPass() { 1983 parseCHRFilterFiles(); 1984 } 1985 1986 PreservedAnalyses ControlHeightReductionPass::run( 1987 Function &F, 1988 FunctionAnalysisManager &FAM) { 1989 auto &BFI = FAM.getResult<BlockFrequencyAnalysis>(F); 1990 auto &DT = FAM.getResult<DominatorTreeAnalysis>(F); 1991 auto &MAMProxy = FAM.getResult<ModuleAnalysisManagerFunctionProxy>(F); 1992 auto &MAM = MAMProxy.getManager(); 1993 auto &PSI = *MAM.getCachedResult<ProfileSummaryAnalysis>(*F.getParent()); 1994 auto &RI = FAM.getResult<RegionInfoAnalysis>(F); 1995 bool Changed = CHR(F, BFI, DT, PSI, RI).run(); 1996 if (!Changed) 1997 return PreservedAnalyses::all(); 1998 auto PA = PreservedAnalyses(); 1999 PA.preserve<GlobalsAA>(); 2000 return PA; 2001 } 2002 2003 } // namespace llvm 2004