1 //===-- ControlHeightReduction.cpp - Control Height Reduction -------------===// 2 // 3 // The LLVM Compiler Infrastructure 4 // 5 // This file is distributed under the University of Illinois Open Source 6 // License. See LICENSE.TXT for details. 7 // 8 //===----------------------------------------------------------------------===// 9 // 10 // This pass merges conditional blocks of code and reduces the number of 11 // conditional branches in the hot paths based on profiles. 12 // 13 //===----------------------------------------------------------------------===// 14 15 #include "llvm/Transforms/Instrumentation/ControlHeightReduction.h" 16 #include "llvm/ADT/DenseMap.h" 17 #include "llvm/ADT/DenseSet.h" 18 #include "llvm/ADT/SmallVector.h" 19 #include "llvm/ADT/StringSet.h" 20 #include "llvm/Analysis/BlockFrequencyInfo.h" 21 #include "llvm/Analysis/GlobalsModRef.h" 22 #include "llvm/Analysis/ProfileSummaryInfo.h" 23 #include "llvm/Analysis/RegionInfo.h" 24 #include "llvm/Analysis/RegionIterator.h" 25 #include "llvm/Analysis/ValueTracking.h" 26 #include "llvm/IR/CFG.h" 27 #include "llvm/IR/Dominators.h" 28 #include "llvm/IR/IRBuilder.h" 29 #include "llvm/IR/MDBuilder.h" 30 #include "llvm/Support/BranchProbability.h" 31 #include "llvm/Support/MemoryBuffer.h" 32 #include "llvm/Transforms/Utils.h" 33 #include "llvm/Transforms/Utils/BasicBlockUtils.h" 34 #include "llvm/Transforms/Utils/Cloning.h" 35 #include "llvm/Transforms/Utils/ValueMapper.h" 36 37 #include <set> 38 #include <sstream> 39 40 using namespace llvm; 41 42 #define DEBUG_TYPE "chr" 43 44 #define CHR_DEBUG(X) LLVM_DEBUG(X) 45 46 static cl::opt<bool> ForceCHR("force-chr", cl::init(false), cl::Hidden, 47 cl::desc("Apply CHR for all functions")); 48 49 static cl::opt<double> CHRBiasThreshold( 50 "chr-bias-threshold", cl::init(0.99), cl::Hidden, 51 cl::desc("CHR considers a branch bias greater than this ratio as biased")); 52 53 static cl::opt<unsigned> CHRMergeThreshold( 54 "chr-merge-threshold", cl::init(2), cl::Hidden, 55 cl::desc("CHR merges a group of N branches/selects where N >= this value")); 56 57 static cl::opt<std::string> CHRModuleList( 58 "chr-module-list", cl::init(""), cl::Hidden, 59 cl::desc("Specify file to retrieve the list of modules to apply CHR to")); 60 61 static cl::opt<std::string> CHRFunctionList( 62 "chr-function-list", cl::init(""), cl::Hidden, 63 cl::desc("Specify file to retrieve the list of functions to apply CHR to")); 64 65 static StringSet<> CHRModules; 66 static StringSet<> CHRFunctions; 67 68 static void ParseCHRFilterFiles() { 69 if (!CHRModuleList.empty()) { 70 auto FileOrErr = MemoryBuffer::getFile(CHRModuleList); 71 if (!FileOrErr) { 72 errs() << "Error: Couldn't read the chr-module-list file " << CHRModuleList << "\n"; 73 std::exit(1); 74 } 75 StringRef Buf = FileOrErr->get()->getBuffer(); 76 SmallVector<StringRef, 0> Lines; 77 Buf.split(Lines, '\n'); 78 for (StringRef Line : Lines) { 79 Line = Line.trim(); 80 if (!Line.empty()) 81 CHRModules.insert(Line); 82 } 83 } 84 if (!CHRFunctionList.empty()) { 85 auto FileOrErr = MemoryBuffer::getFile(CHRFunctionList); 86 if (!FileOrErr) { 87 errs() << "Error: Couldn't read the chr-function-list file " << CHRFunctionList << "\n"; 88 std::exit(1); 89 } 90 StringRef Buf = FileOrErr->get()->getBuffer(); 91 SmallVector<StringRef, 0> Lines; 92 Buf.split(Lines, '\n'); 93 for (StringRef Line : Lines) { 94 Line = Line.trim(); 95 if (!Line.empty()) 96 CHRFunctions.insert(Line); 97 } 98 } 99 } 100 101 namespace { 102 class ControlHeightReductionLegacyPass : public FunctionPass { 103 public: 104 static char ID; 105 106 ControlHeightReductionLegacyPass() : FunctionPass(ID) { 107 initializeControlHeightReductionLegacyPassPass( 108 *PassRegistry::getPassRegistry()); 109 ParseCHRFilterFiles(); 110 } 111 112 bool runOnFunction(Function &F) override; 113 void getAnalysisUsage(AnalysisUsage &AU) const override { 114 AU.addRequired<BlockFrequencyInfoWrapperPass>(); 115 AU.addRequired<DominatorTreeWrapperPass>(); 116 AU.addRequired<ProfileSummaryInfoWrapperPass>(); 117 AU.addRequired<RegionInfoPass>(); 118 AU.addPreserved<GlobalsAAWrapperPass>(); 119 } 120 }; 121 } // end anonymous namespace 122 123 char ControlHeightReductionLegacyPass::ID = 0; 124 125 INITIALIZE_PASS_BEGIN(ControlHeightReductionLegacyPass, 126 "chr", 127 "Reduce control height in the hot paths", 128 false, false) 129 INITIALIZE_PASS_DEPENDENCY(BlockFrequencyInfoWrapperPass) 130 INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass) 131 INITIALIZE_PASS_DEPENDENCY(ProfileSummaryInfoWrapperPass) 132 INITIALIZE_PASS_DEPENDENCY(RegionInfoPass) 133 INITIALIZE_PASS_END(ControlHeightReductionLegacyPass, 134 "chr", 135 "Reduce control height in the hot paths", 136 false, false) 137 138 FunctionPass *llvm::createControlHeightReductionLegacyPass() { 139 return new ControlHeightReductionLegacyPass(); 140 } 141 142 namespace { 143 144 struct CHRStats { 145 CHRStats() : NumBranches(0), NumBranchesDelta(0), 146 WeightedNumBranchesDelta(0) {} 147 void print(raw_ostream &OS) const { 148 OS << "CHRStats: NumBranches " << NumBranches 149 << " NumBranchesDelta " << NumBranchesDelta 150 << " WeightedNumBranchesDelta " << WeightedNumBranchesDelta; 151 } 152 uint64_t NumBranches; // The original number of conditional branches / 153 // selects 154 uint64_t NumBranchesDelta; // The decrease of the number of conditional 155 // branches / selects in the hot paths due to CHR. 156 uint64_t WeightedNumBranchesDelta; // NumBranchesDelta weighted by the profile 157 // count at the scope entry. 158 }; 159 160 // RegInfo - some properties of a Region. 161 struct RegInfo { 162 RegInfo() : R(nullptr), HasBranch(false) {} 163 RegInfo(Region *RegionIn) : R(RegionIn), HasBranch(false) {} 164 Region *R; 165 bool HasBranch; 166 SmallVector<SelectInst *, 8> Selects; 167 }; 168 169 typedef DenseMap<Region *, DenseSet<Instruction *>> HoistStopMapTy; 170 171 // CHRScope - a sequence of regions to CHR together. It corresponds to a 172 // sequence of conditional blocks. It can have subscopes which correspond to 173 // nested conditional blocks. Nested CHRScopes form a tree. 174 class CHRScope { 175 public: 176 CHRScope(RegInfo RI) : BranchInsertPoint(nullptr) { 177 assert(RI.R && "Null RegionIn"); 178 RegInfos.push_back(RI); 179 } 180 181 Region *getParentRegion() { 182 assert(RegInfos.size() > 0 && "Empty CHRScope"); 183 Region *Parent = RegInfos[0].R->getParent(); 184 assert(Parent && "Unexpected to call this on the top-level region"); 185 return Parent; 186 } 187 188 BasicBlock *getEntryBlock() { 189 assert(RegInfos.size() > 0 && "Empty CHRScope"); 190 return RegInfos.front().R->getEntry(); 191 } 192 193 BasicBlock *getExitBlock() { 194 assert(RegInfos.size() > 0 && "Empty CHRScope"); 195 return RegInfos.back().R->getExit(); 196 } 197 198 bool appendable(CHRScope *Next) { 199 // The next scope is appendable only if this scope is directly connected to 200 // it (which implies it post-dominates this scope) and this scope dominates 201 // it (no edge to the next scope outside this scope). 202 BasicBlock *NextEntry = Next->getEntryBlock(); 203 if (getExitBlock() != NextEntry) 204 // Not directly connected. 205 return false; 206 Region *LastRegion = RegInfos.back().R; 207 for (BasicBlock *Pred : predecessors(NextEntry)) 208 if (!LastRegion->contains(Pred)) 209 // There's an edge going into the entry of the next scope from outside 210 // of this scope. 211 return false; 212 return true; 213 } 214 215 void append(CHRScope *Next) { 216 assert(RegInfos.size() > 0 && "Empty CHRScope"); 217 assert(Next->RegInfos.size() > 0 && "Empty CHRScope"); 218 assert(getParentRegion() == Next->getParentRegion() && 219 "Must be siblings"); 220 assert(getExitBlock() == Next->getEntryBlock() && 221 "Must be adjacent"); 222 for (RegInfo &RI : Next->RegInfos) 223 RegInfos.push_back(RI); 224 for (CHRScope *Sub : Next->Subs) 225 Subs.push_back(Sub); 226 } 227 228 void addSub(CHRScope *SubIn) { 229 #ifndef NDEBUG 230 bool is_child = false; 231 for (RegInfo &RI : RegInfos) 232 if (RI.R == SubIn->getParentRegion()) { 233 is_child = true; 234 break; 235 } 236 assert(is_child && "Must be a child"); 237 #endif 238 Subs.push_back(SubIn); 239 } 240 241 // Split this scope at the boundary region into two, which will belong to the 242 // tail and returns the tail. 243 CHRScope *split(Region *Boundary) { 244 assert(Boundary && "Boundary null"); 245 assert(RegInfos.begin()->R != Boundary && 246 "Can't be split at beginning"); 247 auto BoundaryIt = std::find_if(RegInfos.begin(), RegInfos.end(), 248 [&Boundary](const RegInfo& RI) { 249 return Boundary == RI.R; 250 }); 251 if (BoundaryIt == RegInfos.end()) 252 return nullptr; 253 SmallVector<RegInfo, 8> TailRegInfos; 254 SmallVector<CHRScope *, 8> TailSubs; 255 TailRegInfos.insert(TailRegInfos.begin(), BoundaryIt, RegInfos.end()); 256 RegInfos.resize(BoundaryIt - RegInfos.begin()); 257 DenseSet<Region *> TailRegionSet; 258 for (RegInfo &RI : TailRegInfos) 259 TailRegionSet.insert(RI.R); 260 for (auto It = Subs.begin(); It != Subs.end(); ) { 261 CHRScope *Sub = *It; 262 assert(Sub && "null Sub"); 263 Region *Parent = Sub->getParentRegion(); 264 if (TailRegionSet.count(Parent)) { 265 TailSubs.push_back(Sub); 266 It = Subs.erase(It); 267 } else { 268 assert(std::find_if(RegInfos.begin(), RegInfos.end(), 269 [&Parent](const RegInfo& RI) { 270 return Parent == RI.R; 271 }) != RegInfos.end() && 272 "Must be in head"); 273 ++It; 274 } 275 } 276 assert(HoistStopMap.empty() && "MapHoistStops must be empty"); 277 return new CHRScope(TailRegInfos, TailSubs); 278 } 279 280 bool contains(Instruction *I) const { 281 BasicBlock *Parent = I->getParent(); 282 for (const RegInfo &RI : RegInfos) 283 if (RI.R->contains(Parent)) 284 return true; 285 return false; 286 } 287 288 void print(raw_ostream &OS) const; 289 290 SmallVector<RegInfo, 8> RegInfos; // Regions that belong to this scope 291 SmallVector<CHRScope *, 8> Subs; // Subscopes. 292 293 // The instruction at which to insert the CHR conditional branch (and hoist 294 // the dependent condition values). 295 Instruction *BranchInsertPoint; 296 297 // True-biased and false-biased regions (conditional blocks), 298 // respectively. Used only for the outermost scope and includes regions in 299 // subscopes. The rest are unbiased. 300 DenseSet<Region *> TrueBiasedRegions; 301 DenseSet<Region *> FalseBiasedRegions; 302 // Among the biased regions, the regions that get CHRed. 303 SmallVector<RegInfo, 8> CHRRegions; 304 305 // True-biased and false-biased selects, respectively. Used only for the 306 // outermost scope and includes ones in subscopes. 307 DenseSet<SelectInst *> TrueBiasedSelects; 308 DenseSet<SelectInst *> FalseBiasedSelects; 309 310 // Map from one of the above regions to the instructions to stop 311 // hoisting instructions at through use-def chains. 312 HoistStopMapTy HoistStopMap; 313 314 private: 315 CHRScope(SmallVector<RegInfo, 8> &RegInfosIn, 316 SmallVector<CHRScope *, 8> &SubsIn) 317 : RegInfos(RegInfosIn), Subs(SubsIn), BranchInsertPoint(nullptr) {} 318 }; 319 320 class CHR { 321 public: 322 CHR(Function &Fin, BlockFrequencyInfo &BFIin, DominatorTree &DTin, 323 ProfileSummaryInfo &PSIin, RegionInfo &RIin) 324 : F(Fin), BFI(BFIin), DT(DTin), PSI(PSIin), RI(RIin) {} 325 326 ~CHR() { 327 for (CHRScope *Scope : Scopes) { 328 delete Scope; 329 } 330 } 331 332 bool run(); 333 334 private: 335 // See the comments in CHR::run() for the high level flow of the algorithm and 336 // what the following functions do. 337 338 void findScopes(SmallVectorImpl<CHRScope *> &Output) { 339 Region *R = RI.getTopLevelRegion(); 340 CHRScope *Scope = findScopes(R, nullptr, nullptr, Output); 341 if (Scope) { 342 Output.push_back(Scope); 343 } 344 } 345 CHRScope *findScopes(Region *R, Region *NextRegion, Region *ParentRegion, 346 SmallVectorImpl<CHRScope *> &Scopes); 347 CHRScope *findScope(Region *R); 348 void checkScopeHoistable(CHRScope *Scope); 349 350 void splitScopes(SmallVectorImpl<CHRScope *> &Input, 351 SmallVectorImpl<CHRScope *> &Output); 352 SmallVector<CHRScope *, 8> splitScope(CHRScope *Scope, 353 CHRScope *Outer, 354 DenseSet<Value *> *OuterConditionValues, 355 Instruction *OuterInsertPoint, 356 SmallVectorImpl<CHRScope *> &Output, 357 DenseSet<Instruction *> &Unhoistables); 358 359 void classifyBiasedScopes(SmallVectorImpl<CHRScope *> &Scopes); 360 void classifyBiasedScopes(CHRScope *Scope, CHRScope *OutermostScope); 361 362 void filterScopes(SmallVectorImpl<CHRScope *> &Input, 363 SmallVectorImpl<CHRScope *> &Output); 364 365 void setCHRRegions(SmallVectorImpl<CHRScope *> &Input, 366 SmallVectorImpl<CHRScope *> &Output); 367 void setCHRRegions(CHRScope *Scope, CHRScope *OutermostScope); 368 369 void sortScopes(SmallVectorImpl<CHRScope *> &Input, 370 SmallVectorImpl<CHRScope *> &Output); 371 372 void transformScopes(SmallVectorImpl<CHRScope *> &CHRScopes); 373 void transformScopes(CHRScope *Scope, DenseSet<PHINode *> &TrivialPHIs); 374 void cloneScopeBlocks(CHRScope *Scope, 375 BasicBlock *PreEntryBlock, 376 BasicBlock *ExitBlock, 377 Region *LastRegion, 378 ValueToValueMapTy &VMap); 379 BranchInst *createMergedBranch(BasicBlock *PreEntryBlock, 380 BasicBlock *EntryBlock, 381 BasicBlock *NewEntryBlock, 382 ValueToValueMapTy &VMap); 383 void fixupBranchesAndSelects(CHRScope *Scope, 384 BasicBlock *PreEntryBlock, 385 BranchInst *MergedBR, 386 uint64_t ProfileCount); 387 void fixupBranch(Region *R, 388 CHRScope *Scope, 389 IRBuilder<> &IRB, 390 Value *&MergedCondition, BranchProbability &CHRBranchBias); 391 void fixupSelect(SelectInst* SI, 392 CHRScope *Scope, 393 IRBuilder<> &IRB, 394 Value *&MergedCondition, BranchProbability &CHRBranchBias); 395 void addToMergedCondition(bool IsTrueBiased, Value *Cond, 396 Instruction *BranchOrSelect, 397 CHRScope *Scope, 398 IRBuilder<> &IRB, 399 Value *&MergedCondition); 400 401 Function &F; 402 BlockFrequencyInfo &BFI; 403 DominatorTree &DT; 404 ProfileSummaryInfo &PSI; 405 RegionInfo &RI; 406 CHRStats Stats; 407 408 // All the true-biased regions in the function 409 DenseSet<Region *> TrueBiasedRegionsGlobal; 410 // All the false-biased regions in the function 411 DenseSet<Region *> FalseBiasedRegionsGlobal; 412 // All the true-biased selects in the function 413 DenseSet<SelectInst *> TrueBiasedSelectsGlobal; 414 // All the false-biased selects in the function 415 DenseSet<SelectInst *> FalseBiasedSelectsGlobal; 416 // A map from biased regions to their branch bias 417 DenseMap<Region *, BranchProbability> BranchBiasMap; 418 // A map from biased selects to their branch bias 419 DenseMap<SelectInst *, BranchProbability> SelectBiasMap; 420 // All the scopes. 421 DenseSet<CHRScope *> Scopes; 422 }; 423 424 } // end anonymous namespace 425 426 static inline 427 raw_ostream LLVM_ATTRIBUTE_UNUSED &operator<<(raw_ostream &OS, 428 const CHRStats &Stats) { 429 Stats.print(OS); 430 return OS; 431 } 432 433 static inline 434 raw_ostream &operator<<(raw_ostream &OS, const CHRScope &Scope) { 435 Scope.print(OS); 436 return OS; 437 } 438 439 static bool shouldApply(Function &F, ProfileSummaryInfo& PSI) { 440 if (ForceCHR) 441 return true; 442 443 if (!CHRModuleList.empty() || !CHRFunctionList.empty()) { 444 if (CHRModules.count(F.getParent()->getName())) 445 return true; 446 return CHRFunctions.count(F.getName()); 447 } 448 449 assert(PSI.hasProfileSummary() && "Empty PSI?"); 450 return PSI.isFunctionEntryHot(&F); 451 } 452 453 static void LLVM_ATTRIBUTE_UNUSED dumpIR(Function &F, const char *Label, 454 CHRStats *Stats) { 455 StringRef FuncName = F.getName(); 456 StringRef ModuleName = F.getParent()->getName(); 457 CHR_DEBUG(dbgs() << "CHR IR dump " << Label << " " << ModuleName << " " 458 << FuncName); 459 if (Stats) 460 CHR_DEBUG(dbgs() << " " << *Stats); 461 CHR_DEBUG(dbgs() << "\n"); 462 CHR_DEBUG(F.dump()); 463 } 464 465 void CHRScope::print(raw_ostream &OS) const { 466 assert(RegInfos.size() > 0 && "Empty CHRScope"); 467 OS << "CHRScope["; 468 OS << RegInfos.size() << ", Regions["; 469 for (const RegInfo &RI : RegInfos) { 470 OS << RI.R->getNameStr(); 471 if (RI.HasBranch) 472 OS << " B"; 473 if (RI.Selects.size() > 0) 474 OS << " S" << RI.Selects.size(); 475 OS << ", "; 476 } 477 if (RegInfos[0].R->getParent()) { 478 OS << "], Parent " << RegInfos[0].R->getParent()->getNameStr(); 479 } else { 480 // top level region 481 OS << "]"; 482 } 483 OS << ", Subs["; 484 for (CHRScope *Sub : Subs) { 485 OS << *Sub << ", "; 486 } 487 OS << "]]"; 488 } 489 490 // Return true if the given instruction type can be hoisted by CHR. 491 static bool isHoistableInstructionType(Instruction *I) { 492 return isa<BinaryOperator>(I) || isa<CastInst>(I) || isa<SelectInst>(I) || 493 isa<GetElementPtrInst>(I) || isa<CmpInst>(I) || 494 isa<InsertElementInst>(I) || isa<ExtractElementInst>(I) || 495 isa<ShuffleVectorInst>(I) || isa<ExtractValueInst>(I) || 496 isa<InsertValueInst>(I); 497 } 498 499 // Return true if the given instruction can be hoisted by CHR. 500 static bool isHoistable(Instruction *I, DominatorTree &DT) { 501 if (!isHoistableInstructionType(I)) 502 return false; 503 return isSafeToSpeculativelyExecute(I, nullptr, &DT); 504 } 505 506 // Recursively traverse the use-def chains of the given value and return a set 507 // of the unhoistable base values defined within the scope (excluding the 508 // first-region entry block) or the (hoistable or unhoistable) base values that 509 // are defined outside (including the first-region entry block) of the 510 // scope. The returned set doesn't include constants. 511 static std::set<Value *> getBaseValues(Value *V, 512 DominatorTree &DT) { 513 std::set<Value *> Result; 514 if (auto *I = dyn_cast<Instruction>(V)) { 515 // We don't stop at a block that's not in the Scope because we would miss some 516 // instructions that are based on the same base values if we stop there. 517 if (!isHoistable(I, DT)) { 518 Result.insert(I); 519 return Result; 520 } 521 // I is hoistable above the Scope. 522 for (Value *Op : I->operands()) { 523 std::set<Value *> OpResult = getBaseValues(Op, DT); 524 Result.insert(OpResult.begin(), OpResult.end()); 525 } 526 return Result; 527 } 528 if (isa<Argument>(V)) { 529 Result.insert(V); 530 return Result; 531 } 532 // We don't include others like constants because those won't lead to any 533 // chance of folding of conditions (eg two bit checks merged into one check) 534 // after CHR. 535 return Result; // empty 536 } 537 538 // Return true if V is already hoisted or can be hoisted (along with its 539 // operands) above the insert point. When it returns true and HoistStops is 540 // non-null, the instructions to stop hoisting at through the use-def chains are 541 // inserted into HoistStops. 542 static bool 543 checkHoistValue(Value *V, Instruction *InsertPoint, DominatorTree &DT, 544 DenseSet<Instruction *> &Unhoistables, 545 DenseSet<Instruction *> *HoistStops) { 546 assert(InsertPoint && "Null InsertPoint"); 547 if (auto *I = dyn_cast<Instruction>(V)) { 548 assert(DT.getNode(I->getParent()) && "DT must contain I's parent block"); 549 assert(DT.getNode(InsertPoint->getParent()) && "DT must contain Destination"); 550 if (Unhoistables.count(I)) { 551 // Don't hoist if they are not to be hoisted. 552 return false; 553 } 554 if (DT.dominates(I, InsertPoint)) { 555 // We are already above the insert point. Stop here. 556 if (HoistStops) 557 HoistStops->insert(I); 558 return true; 559 } 560 // We aren't not above the insert point, check if we can hoist it above the 561 // insert point. 562 if (isHoistable(I, DT)) { 563 // Check operands first. 564 DenseSet<Instruction *> OpsHoistStops; 565 bool AllOpsHoisted = true; 566 for (Value *Op : I->operands()) { 567 if (!checkHoistValue(Op, InsertPoint, DT, Unhoistables, &OpsHoistStops)) { 568 AllOpsHoisted = false; 569 break; 570 } 571 } 572 if (AllOpsHoisted) { 573 CHR_DEBUG(dbgs() << "checkHoistValue " << *I << "\n"); 574 if (HoistStops) 575 HoistStops->insert(OpsHoistStops.begin(), OpsHoistStops.end()); 576 return true; 577 } 578 } 579 return false; 580 } 581 // Non-instructions are considered hoistable. 582 return true; 583 } 584 585 // Returns true and sets the true probability and false probability of an 586 // MD_prof metadata if it's well-formed. 587 static bool CheckMDProf(MDNode *MD, BranchProbability &TrueProb, 588 BranchProbability &FalseProb) { 589 if (!MD) return false; 590 MDString *MDName = cast<MDString>(MD->getOperand(0)); 591 if (MDName->getString() != "branch_weights" || 592 MD->getNumOperands() != 3) 593 return false; 594 ConstantInt *TrueWeight = mdconst::extract<ConstantInt>(MD->getOperand(1)); 595 ConstantInt *FalseWeight = mdconst::extract<ConstantInt>(MD->getOperand(2)); 596 if (!TrueWeight || !FalseWeight) 597 return false; 598 uint64_t TrueWt = TrueWeight->getValue().getZExtValue(); 599 uint64_t FalseWt = FalseWeight->getValue().getZExtValue(); 600 uint64_t SumWt = TrueWt + FalseWt; 601 602 assert(SumWt >= TrueWt && SumWt >= FalseWt && 603 "Overflow calculating branch probabilities."); 604 605 TrueProb = BranchProbability::getBranchProbability(TrueWt, SumWt); 606 FalseProb = BranchProbability::getBranchProbability(FalseWt, SumWt); 607 return true; 608 } 609 610 static BranchProbability getCHRBiasThreshold() { 611 return BranchProbability::getBranchProbability( 612 static_cast<uint64_t>(CHRBiasThreshold * 1000000), 1000000); 613 } 614 615 // A helper for CheckBiasedBranch and CheckBiasedSelect. If TrueProb >= 616 // CHRBiasThreshold, put Key into TrueSet and return true. If FalseProb >= 617 // CHRBiasThreshold, put Key into FalseSet and return true. Otherwise, return 618 // false. 619 template<typename K, typename S, typename M> 620 bool CheckBias(K *Key, BranchProbability TrueProb, BranchProbability FalseProb, 621 S &TrueSet, S &FalseSet, M &BiasMap) { 622 BranchProbability Threshold = getCHRBiasThreshold(); 623 if (TrueProb >= Threshold) { 624 TrueSet.insert(Key); 625 BiasMap[Key] = TrueProb; 626 return true; 627 } else if (FalseProb >= Threshold) { 628 FalseSet.insert(Key); 629 BiasMap[Key] = FalseProb; 630 return true; 631 } 632 return false; 633 } 634 635 // Returns true and insert a region into the right biased set and the map if the 636 // branch of the region is biased. 637 static bool CheckBiasedBranch(BranchInst *BI, Region *R, 638 DenseSet<Region *> &TrueBiasedRegionsGlobal, 639 DenseSet<Region *> &FalseBiasedRegionsGlobal, 640 DenseMap<Region *, BranchProbability> &BranchBiasMap) { 641 if (!BI->isConditional()) 642 return false; 643 BranchProbability ThenProb, ElseProb; 644 if (!CheckMDProf(BI->getMetadata(LLVMContext::MD_prof), 645 ThenProb, ElseProb)) 646 return false; 647 BasicBlock *IfThen = BI->getSuccessor(0); 648 BasicBlock *IfElse = BI->getSuccessor(1); 649 assert((IfThen == R->getExit() || IfElse == R->getExit()) && 650 IfThen != IfElse && 651 "Invariant from findScopes"); 652 if (IfThen == R->getExit()) { 653 // Swap them so that IfThen/ThenProb means going into the conditional code 654 // and IfElse/ElseProb means skipping it. 655 std::swap(IfThen, IfElse); 656 std::swap(ThenProb, ElseProb); 657 } 658 CHR_DEBUG(dbgs() << "BI " << *BI << " "); 659 CHR_DEBUG(dbgs() << "ThenProb " << ThenProb << " "); 660 CHR_DEBUG(dbgs() << "ElseProb " << ElseProb << "\n"); 661 return CheckBias(R, ThenProb, ElseProb, 662 TrueBiasedRegionsGlobal, FalseBiasedRegionsGlobal, 663 BranchBiasMap); 664 } 665 666 // Returns true and insert a select into the right biased set and the map if the 667 // select is biased. 668 static bool CheckBiasedSelect( 669 SelectInst *SI, Region *R, 670 DenseSet<SelectInst *> &TrueBiasedSelectsGlobal, 671 DenseSet<SelectInst *> &FalseBiasedSelectsGlobal, 672 DenseMap<SelectInst *, BranchProbability> &SelectBiasMap) { 673 BranchProbability TrueProb, FalseProb; 674 if (!CheckMDProf(SI->getMetadata(LLVMContext::MD_prof), 675 TrueProb, FalseProb)) 676 return false; 677 CHR_DEBUG(dbgs() << "SI " << *SI << " "); 678 CHR_DEBUG(dbgs() << "TrueProb " << TrueProb << " "); 679 CHR_DEBUG(dbgs() << "FalseProb " << FalseProb << "\n"); 680 return CheckBias(SI, TrueProb, FalseProb, 681 TrueBiasedSelectsGlobal, FalseBiasedSelectsGlobal, 682 SelectBiasMap); 683 } 684 685 // Returns the instruction at which to hoist the dependent condition values and 686 // insert the CHR branch for a region. This is the terminator branch in the 687 // entry block or the first select in the entry block, if any. 688 static Instruction* getBranchInsertPoint(RegInfo &RI) { 689 Region *R = RI.R; 690 BasicBlock *EntryBB = R->getEntry(); 691 // The hoist point is by default the terminator of the entry block, which is 692 // the same as the branch instruction if RI.HasBranch is true. 693 Instruction *HoistPoint = EntryBB->getTerminator(); 694 for (SelectInst *SI : RI.Selects) { 695 if (SI->getParent() == EntryBB) { 696 // Pick the first select in Selects in the entry block. Note Selects is 697 // sorted in the instruction order within a block (asserted below). 698 HoistPoint = SI; 699 break; 700 } 701 } 702 assert(HoistPoint && "Null HoistPoint"); 703 #ifndef NDEBUG 704 // Check that HoistPoint is the first one in Selects in the entry block, 705 // if any. 706 DenseSet<Instruction *> EntryBlockSelectSet; 707 for (SelectInst *SI : RI.Selects) { 708 if (SI->getParent() == EntryBB) { 709 EntryBlockSelectSet.insert(SI); 710 } 711 } 712 for (Instruction &I : *EntryBB) { 713 if (EntryBlockSelectSet.count(&I) > 0) { 714 assert(&I == HoistPoint && 715 "HoistPoint must be the first one in Selects"); 716 break; 717 } 718 } 719 #endif 720 return HoistPoint; 721 } 722 723 // Find a CHR scope in the given region. 724 CHRScope * CHR::findScope(Region *R) { 725 CHRScope *Result = nullptr; 726 BasicBlock *Entry = R->getEntry(); 727 BasicBlock *Exit = R->getExit(); // null if top level. 728 assert(Entry && "Entry must not be null"); 729 assert((Exit == nullptr) == (R->isTopLevelRegion()) && 730 "Only top level region has a null exit"); 731 if (Entry) 732 CHR_DEBUG(dbgs() << "Entry " << Entry->getName() << "\n"); 733 else 734 CHR_DEBUG(dbgs() << "Entry null\n"); 735 if (Exit) 736 CHR_DEBUG(dbgs() << "Exit " << Exit->getName() << "\n"); 737 else 738 CHR_DEBUG(dbgs() << "Exit null\n"); 739 // Exclude cases where Entry is part of a subregion (hence it doesn't belong 740 // to this region). 741 bool EntryInSubregion = RI.getRegionFor(Entry) != R; 742 if (EntryInSubregion) 743 return nullptr; 744 // Exclude loops 745 for (BasicBlock *Pred : predecessors(Entry)) 746 if (R->contains(Pred)) 747 return nullptr; 748 if (Exit) { 749 // Try to find an if-then block (check if R is an if-then). 750 // if (cond) { 751 // ... 752 // } 753 auto *BI = dyn_cast<BranchInst>(Entry->getTerminator()); 754 if (BI) 755 CHR_DEBUG(dbgs() << "BI.isConditional " << BI->isConditional() << "\n"); 756 else 757 CHR_DEBUG(dbgs() << "BI null\n"); 758 if (BI && BI->isConditional()) { 759 BasicBlock *S0 = BI->getSuccessor(0); 760 BasicBlock *S1 = BI->getSuccessor(1); 761 CHR_DEBUG(dbgs() << "S0 " << S0->getName() << "\n"); 762 CHR_DEBUG(dbgs() << "S1 " << S1->getName() << "\n"); 763 if (S0 != S1 && (S0 == Exit || S1 == Exit)) { 764 RegInfo RI(R); 765 RI.HasBranch = CheckBiasedBranch( 766 BI, R, TrueBiasedRegionsGlobal, FalseBiasedRegionsGlobal, 767 BranchBiasMap); 768 Result = new CHRScope(RI); 769 Scopes.insert(Result); 770 CHR_DEBUG(dbgs() << "Found a region with a branch\n"); 771 ++Stats.NumBranches; 772 } 773 } 774 } 775 { 776 // Try to look for selects in the direct child blocks (as opposed to in 777 // subregions) of R. 778 // ... 779 // if (..) { // Some subregion 780 // ... 781 // } 782 // if (..) { // Some subregion 783 // ... 784 // } 785 // ... 786 // a = cond ? b : c; 787 // ... 788 SmallVector<SelectInst *, 8> Selects; 789 for (RegionNode *E : R->elements()) { 790 if (E->isSubRegion()) 791 continue; 792 // This returns the basic block of E if E is a direct child of R (not a 793 // subregion.) 794 BasicBlock *BB = E->getEntry(); 795 // Need to push in the order to make it easier to find the first Select 796 // later. 797 for (Instruction &I : *BB) { 798 if (auto *SI = dyn_cast<SelectInst>(&I)) { 799 Selects.push_back(SI); 800 ++Stats.NumBranches; 801 } 802 } 803 } 804 if (Selects.size() > 0) { 805 auto AddSelects = [&](RegInfo &RI) { 806 for (auto *SI : Selects) 807 if (CheckBiasedSelect(SI, RI.R, 808 TrueBiasedSelectsGlobal, 809 FalseBiasedSelectsGlobal, 810 SelectBiasMap)) 811 RI.Selects.push_back(SI); 812 }; 813 if (!Result) { 814 CHR_DEBUG(dbgs() << "Found a select-only region\n"); 815 RegInfo RI(R); 816 AddSelects(RI); 817 Result = new CHRScope(RI); 818 Scopes.insert(Result); 819 } else { 820 CHR_DEBUG(dbgs() << "Found select(s) in a region with a branch\n"); 821 AddSelects(Result->RegInfos[0]); 822 } 823 } 824 } 825 826 if (Result) { 827 checkScopeHoistable(Result); 828 } 829 return Result; 830 } 831 832 // Check that any of the branch and the selects in the region could be 833 // hoisted above the the CHR branch insert point (the most dominating of 834 // them, either the branch (at the end of the first block) or the first 835 // select in the first block). If the branch can't be hoisted, drop the 836 // selects in the first blocks. 837 // 838 // For example, for the following scope/region with selects, we want to insert 839 // the merged branch right before the first select in the first/entry block by 840 // hoisting c1, c2, c3, and c4. 841 // 842 // // Branch insert point here. 843 // a = c1 ? b : c; // Select 1 844 // d = c2 ? e : f; // Select 2 845 // if (c3) { // Branch 846 // ... 847 // c4 = foo() // A call. 848 // g = c4 ? h : i; // Select 3 849 // } 850 // 851 // But suppose we can't hoist c4 because it's dependent on the preceding 852 // call. Then, we drop Select 3. Furthermore, if we can't hoist c2, we also drop 853 // Select 2. If we can't hoist c3, we drop Selects 1 & 2. 854 void CHR::checkScopeHoistable(CHRScope *Scope) { 855 RegInfo &RI = Scope->RegInfos[0]; 856 Region *R = RI.R; 857 BasicBlock *EntryBB = R->getEntry(); 858 auto *Branch = RI.HasBranch ? 859 cast<BranchInst>(EntryBB->getTerminator()) : nullptr; 860 SmallVector<SelectInst *, 8> &Selects = RI.Selects; 861 if (RI.HasBranch || !Selects.empty()) { 862 Instruction *InsertPoint = getBranchInsertPoint(RI); 863 CHR_DEBUG(dbgs() << "InsertPoint " << *InsertPoint << "\n"); 864 // Avoid a data dependence from a select or a branch to a(nother) 865 // select. Note no instruction can't data-depend on a branch (a branch 866 // instruction doesn't produce a value). 867 DenseSet<Instruction *> Unhoistables; 868 // Initialize Unhoistables with the selects. 869 for (SelectInst *SI : Selects) { 870 Unhoistables.insert(SI); 871 } 872 // Remove Selects that can't be hoisted. 873 for (auto it = Selects.begin(); it != Selects.end(); ) { 874 SelectInst *SI = *it; 875 if (SI == InsertPoint) { 876 ++it; 877 continue; 878 } 879 bool IsHoistable = checkHoistValue(SI->getCondition(), InsertPoint, 880 DT, Unhoistables, nullptr); 881 if (!IsHoistable) { 882 CHR_DEBUG(dbgs() << "Dropping select " << *SI << "\n"); 883 it = Selects.erase(it); 884 // Since we are dropping the select here, we also drop it from 885 // Unhoistables. 886 Unhoistables.erase(SI); 887 } else 888 ++it; 889 } 890 // Update InsertPoint after potentially removing selects. 891 InsertPoint = getBranchInsertPoint(RI); 892 CHR_DEBUG(dbgs() << "InsertPoint " << *InsertPoint << "\n"); 893 if (RI.HasBranch && InsertPoint != Branch) { 894 bool IsHoistable = checkHoistValue(Branch->getCondition(), InsertPoint, 895 DT, Unhoistables, nullptr); 896 if (!IsHoistable) { 897 // If the branch isn't hoistable, drop the selects in the entry 898 // block, preferring the branch, which makes the branch the hoist 899 // point. 900 assert(InsertPoint != Branch && "Branch must not be the hoist point"); 901 CHR_DEBUG(dbgs() << "Dropping selects in entry block \n"); 902 CHR_DEBUG( 903 for (SelectInst *SI : Selects) { 904 dbgs() << "SI " << *SI << "\n"; 905 }); 906 Selects.erase(std::remove_if(Selects.begin(), Selects.end(), 907 [EntryBB](SelectInst *SI) { 908 return SI->getParent() == EntryBB; 909 }), Selects.end()); 910 Unhoistables.clear(); 911 InsertPoint = Branch; 912 } 913 } 914 CHR_DEBUG(dbgs() << "InsertPoint " << *InsertPoint << "\n"); 915 #ifndef NDEBUG 916 if (RI.HasBranch) { 917 assert(!DT.dominates(Branch, InsertPoint) && 918 "Branch can't be already above the hoist point"); 919 assert(checkHoistValue(Branch->getCondition(), InsertPoint, 920 DT, Unhoistables, nullptr) && 921 "checkHoistValue for branch"); 922 } 923 for (auto *SI : Selects) { 924 assert(!DT.dominates(SI, InsertPoint) && 925 "SI can't be already above the hoist point"); 926 assert(checkHoistValue(SI->getCondition(), InsertPoint, DT, 927 Unhoistables, nullptr) && 928 "checkHoistValue for selects"); 929 } 930 CHR_DEBUG(dbgs() << "Result\n"); 931 if (RI.HasBranch) { 932 CHR_DEBUG(dbgs() << "BI " << *Branch << "\n"); 933 } 934 for (auto *SI : Selects) { 935 CHR_DEBUG(dbgs() << "SI " << *SI << "\n"); 936 } 937 #endif 938 } 939 } 940 941 // Traverse the region tree, find all nested scopes and merge them if possible. 942 CHRScope * CHR::findScopes(Region *R, Region *NextRegion, Region *ParentRegion, 943 SmallVectorImpl<CHRScope *> &Scopes) { 944 CHR_DEBUG(dbgs() << "findScopes " << R->getNameStr() << "\n"); 945 CHRScope *Result = findScope(R); 946 // Visit subscopes. 947 CHRScope *ConsecutiveSubscope = nullptr; 948 SmallVector<CHRScope *, 8> Subscopes; 949 for (auto It = R->begin(); It != R->end(); ++It) { 950 const std::unique_ptr<Region> &SubR = *It; 951 auto Next_It = std::next(It); 952 Region *NextSubR = Next_It != R->end() ? Next_It->get() : nullptr; 953 CHR_DEBUG(dbgs() << "Looking at subregion " << SubR.get()->getNameStr() 954 << "\n"); 955 CHRScope *SubCHRScope = findScopes(SubR.get(), NextSubR, R, Scopes); 956 if (SubCHRScope) { 957 CHR_DEBUG(dbgs() << "Subregion Scope " << *SubCHRScope << "\n"); 958 } else { 959 CHR_DEBUG(dbgs() << "Subregion Scope null\n"); 960 } 961 if (SubCHRScope) { 962 if (!ConsecutiveSubscope) 963 ConsecutiveSubscope = SubCHRScope; 964 else if (!ConsecutiveSubscope->appendable(SubCHRScope)) { 965 Subscopes.push_back(ConsecutiveSubscope); 966 ConsecutiveSubscope = SubCHRScope; 967 } else 968 ConsecutiveSubscope->append(SubCHRScope); 969 } else { 970 if (ConsecutiveSubscope) { 971 Subscopes.push_back(ConsecutiveSubscope); 972 } 973 ConsecutiveSubscope = nullptr; 974 } 975 } 976 if (ConsecutiveSubscope) { 977 Subscopes.push_back(ConsecutiveSubscope); 978 } 979 for (CHRScope *Sub : Subscopes) { 980 if (Result) { 981 // Combine it with the parent. 982 Result->addSub(Sub); 983 } else { 984 // Push Subscopes as they won't be combined with the parent. 985 Scopes.push_back(Sub); 986 } 987 } 988 return Result; 989 } 990 991 static DenseSet<Value *> getCHRConditionValuesForRegion(RegInfo &RI) { 992 DenseSet<Value *> ConditionValues; 993 if (RI.HasBranch) { 994 auto *BI = cast<BranchInst>(RI.R->getEntry()->getTerminator()); 995 ConditionValues.insert(BI->getCondition()); 996 } 997 for (SelectInst *SI : RI.Selects) { 998 ConditionValues.insert(SI->getCondition()); 999 } 1000 return ConditionValues; 1001 } 1002 1003 1004 // Determine whether to split a scope depending on the sets of the branch 1005 // condition values of the previous region and the current region. We split 1006 // (return true) it if 1) the condition values of the inner/lower scope can't be 1007 // hoisted up to the outer/upper scope, or 2) the two sets of the condition 1008 // values have an empty intersection (because the combined branch conditions 1009 // won't probably lead to a simpler combined condition). 1010 static bool shouldSplit(Instruction *InsertPoint, 1011 DenseSet<Value *> &PrevConditionValues, 1012 DenseSet<Value *> &ConditionValues, 1013 DominatorTree &DT, 1014 DenseSet<Instruction *> &Unhoistables) { 1015 CHR_DEBUG( 1016 dbgs() << "shouldSplit " << *InsertPoint << " PrevConditionValues "; 1017 for (Value *V : PrevConditionValues) { 1018 dbgs() << *V << ", "; 1019 } 1020 dbgs() << " ConditionValues "; 1021 for (Value *V : ConditionValues) { 1022 dbgs() << *V << ", "; 1023 } 1024 dbgs() << "\n"); 1025 assert(InsertPoint && "Null InsertPoint"); 1026 // If any of Bases isn't hoistable to the hoist point, split. 1027 for (Value *V : ConditionValues) { 1028 if (!checkHoistValue(V, InsertPoint, DT, Unhoistables, nullptr)) { 1029 CHR_DEBUG(dbgs() << "Split. checkHoistValue false " << *V << "\n"); 1030 return true; // Not hoistable, split. 1031 } 1032 } 1033 // If PrevConditionValues or ConditionValues is empty, don't split to avoid 1034 // unnecessary splits at scopes with no branch/selects. If 1035 // PrevConditionValues and ConditionValues don't intersect at all, split. 1036 if (!PrevConditionValues.empty() && !ConditionValues.empty()) { 1037 // Use std::set as DenseSet doesn't work with set_intersection. 1038 std::set<Value *> PrevBases, Bases; 1039 for (Value *V : PrevConditionValues) { 1040 std::set<Value *> BaseValues = getBaseValues(V, DT); 1041 PrevBases.insert(BaseValues.begin(), BaseValues.end()); 1042 } 1043 for (Value *V : ConditionValues) { 1044 std::set<Value *> BaseValues = getBaseValues(V, DT); 1045 Bases.insert(BaseValues.begin(), BaseValues.end()); 1046 } 1047 CHR_DEBUG( 1048 dbgs() << "PrevBases "; 1049 for (Value *V : PrevBases) { 1050 dbgs() << *V << ", "; 1051 } 1052 dbgs() << " Bases "; 1053 for (Value *V : Bases) { 1054 dbgs() << *V << ", "; 1055 } 1056 dbgs() << "\n"); 1057 std::set<Value *> Intersection; 1058 std::set_intersection(PrevBases.begin(), PrevBases.end(), 1059 Bases.begin(), Bases.end(), 1060 std::inserter(Intersection, Intersection.begin())); 1061 if (Intersection.empty()) { 1062 // Empty intersection, split. 1063 CHR_DEBUG(dbgs() << "Split. Intersection empty\n"); 1064 return true; 1065 } 1066 } 1067 CHR_DEBUG(dbgs() << "No split\n"); 1068 return false; // Don't split. 1069 } 1070 1071 static void GetSelectsInScope(CHRScope *Scope, 1072 DenseSet<Instruction *> &Output) { 1073 for (RegInfo &RI : Scope->RegInfos) { 1074 for (SelectInst *SI : RI.Selects) { 1075 Output.insert(SI); 1076 } 1077 } 1078 for (CHRScope *Sub : Scope->Subs) { 1079 GetSelectsInScope(Sub, Output); 1080 } 1081 } 1082 1083 void CHR::splitScopes(SmallVectorImpl<CHRScope *> &Input, 1084 SmallVectorImpl<CHRScope *> &Output) { 1085 for (CHRScope *Scope : Input) { 1086 assert(!Scope->BranchInsertPoint && 1087 "BranchInsertPoint must not be set"); 1088 DenseSet<Instruction *> Unhoistables; 1089 GetSelectsInScope(Scope, Unhoistables); 1090 splitScope(Scope, nullptr, nullptr, nullptr, Output, Unhoistables); 1091 } 1092 #ifndef NDEBUG 1093 for (CHRScope *Scope : Output) { 1094 assert(Scope->BranchInsertPoint && "BranchInsertPoint must be set"); 1095 } 1096 #endif 1097 } 1098 1099 SmallVector<CHRScope *, 8> CHR::splitScope( 1100 CHRScope *Scope, 1101 CHRScope *Outer, 1102 DenseSet<Value *> *OuterConditionValues, 1103 Instruction *OuterInsertPoint, 1104 SmallVectorImpl<CHRScope *> &Output, 1105 DenseSet<Instruction *> &Unhoistables) { 1106 if (Outer) { 1107 assert(OuterConditionValues && "Null OuterConditionValues"); 1108 assert(OuterInsertPoint && "Null OuterInsertPoint"); 1109 } 1110 bool PrevSplitFromOuter = true; 1111 DenseSet<Value *> PrevConditionValues; 1112 Instruction *PrevInsertPoint = nullptr; 1113 SmallVector<CHRScope *, 8> Splits; 1114 SmallVector<bool, 8> SplitsSplitFromOuter; 1115 SmallVector<DenseSet<Value *>, 8> SplitsConditionValues; 1116 SmallVector<Instruction *, 8> SplitsInsertPoints; 1117 SmallVector<RegInfo, 8> RegInfos(Scope->RegInfos); // Copy 1118 for (RegInfo &RI : RegInfos) { 1119 Instruction *InsertPoint = getBranchInsertPoint(RI); 1120 DenseSet<Value *> ConditionValues = getCHRConditionValuesForRegion(RI); 1121 CHR_DEBUG( 1122 dbgs() << "ConditionValues "; 1123 for (Value *V : ConditionValues) { 1124 dbgs() << *V << ", "; 1125 } 1126 dbgs() << "\n"); 1127 if (RI.R == RegInfos[0].R) { 1128 // First iteration. Check to see if we should split from the outer. 1129 if (Outer) { 1130 CHR_DEBUG(dbgs() << "Outer " << *Outer << "\n"); 1131 CHR_DEBUG(dbgs() << "Should split from outer at " 1132 << RI.R->getNameStr() << "\n"); 1133 if (shouldSplit(OuterInsertPoint, *OuterConditionValues, 1134 ConditionValues, DT, Unhoistables)) { 1135 PrevConditionValues = ConditionValues; 1136 PrevInsertPoint = InsertPoint; 1137 } else { 1138 // Not splitting from the outer. Use the outer bases and insert 1139 // point. Union the bases. 1140 PrevSplitFromOuter = false; 1141 PrevConditionValues = *OuterConditionValues; 1142 PrevConditionValues.insert(ConditionValues.begin(), 1143 ConditionValues.end()); 1144 PrevInsertPoint = OuterInsertPoint; 1145 } 1146 } else { 1147 CHR_DEBUG(dbgs() << "Outer null\n"); 1148 PrevConditionValues = ConditionValues; 1149 PrevInsertPoint = InsertPoint; 1150 } 1151 } else { 1152 CHR_DEBUG(dbgs() << "Should split from prev at " 1153 << RI.R->getNameStr() << "\n"); 1154 if (shouldSplit(PrevInsertPoint, PrevConditionValues, ConditionValues, 1155 DT, Unhoistables)) { 1156 CHRScope *Tail = Scope->split(RI.R); 1157 Scopes.insert(Tail); 1158 Splits.push_back(Scope); 1159 SplitsSplitFromOuter.push_back(PrevSplitFromOuter); 1160 SplitsConditionValues.push_back(PrevConditionValues); 1161 SplitsInsertPoints.push_back(PrevInsertPoint); 1162 Scope = Tail; 1163 PrevConditionValues = ConditionValues; 1164 PrevInsertPoint = InsertPoint; 1165 PrevSplitFromOuter = true; 1166 } else { 1167 // Not splitting. Union the bases. Keep the hoist point. 1168 PrevConditionValues.insert(ConditionValues.begin(), ConditionValues.end()); 1169 } 1170 } 1171 } 1172 Splits.push_back(Scope); 1173 SplitsSplitFromOuter.push_back(PrevSplitFromOuter); 1174 SplitsConditionValues.push_back(PrevConditionValues); 1175 assert(PrevInsertPoint && "Null PrevInsertPoint"); 1176 SplitsInsertPoints.push_back(PrevInsertPoint); 1177 assert(Splits.size() == SplitsConditionValues.size() && 1178 Splits.size() == SplitsSplitFromOuter.size() && 1179 Splits.size() == SplitsInsertPoints.size() && "Mismatching sizes"); 1180 for (size_t I = 0; I < Splits.size(); ++I) { 1181 CHRScope *Split = Splits[I]; 1182 DenseSet<Value *> &SplitConditionValues = SplitsConditionValues[I]; 1183 Instruction *SplitInsertPoint = SplitsInsertPoints[I]; 1184 SmallVector<CHRScope *, 8> NewSubs; 1185 DenseSet<Instruction *> SplitUnhoistables; 1186 GetSelectsInScope(Split, SplitUnhoistables); 1187 for (CHRScope *Sub : Split->Subs) { 1188 SmallVector<CHRScope *, 8> SubSplits = splitScope( 1189 Sub, Split, &SplitConditionValues, SplitInsertPoint, Output, 1190 SplitUnhoistables); 1191 NewSubs.insert(NewSubs.end(), SubSplits.begin(), SubSplits.end()); 1192 } 1193 Split->Subs = NewSubs; 1194 } 1195 SmallVector<CHRScope *, 8> Result; 1196 for (size_t I = 0; I < Splits.size(); ++I) { 1197 CHRScope *Split = Splits[I]; 1198 if (SplitsSplitFromOuter[I]) { 1199 // Split from the outer. 1200 Output.push_back(Split); 1201 Split->BranchInsertPoint = SplitsInsertPoints[I]; 1202 CHR_DEBUG(dbgs() << "BranchInsertPoint " << *SplitsInsertPoints[I] 1203 << "\n"); 1204 } else { 1205 // Connected to the outer. 1206 Result.push_back(Split); 1207 } 1208 } 1209 if (!Outer) 1210 assert(Result.empty() && 1211 "If no outer (top-level), must return no nested ones"); 1212 return Result; 1213 } 1214 1215 void CHR::classifyBiasedScopes(SmallVectorImpl<CHRScope *> &Scopes) { 1216 for (CHRScope *Scope : Scopes) { 1217 assert(Scope->TrueBiasedRegions.empty() && Scope->FalseBiasedRegions.empty() && "Empty"); 1218 classifyBiasedScopes(Scope, Scope); 1219 CHR_DEBUG( 1220 dbgs() << "classifyBiasedScopes " << *Scope << "\n"; 1221 dbgs() << "TrueBiasedRegions "; 1222 for (Region *R : Scope->TrueBiasedRegions) { 1223 dbgs() << R->getNameStr() << ", "; 1224 } 1225 dbgs() << "\n"; 1226 dbgs() << "FalseBiasedRegions "; 1227 for (Region *R : Scope->FalseBiasedRegions) { 1228 dbgs() << R->getNameStr() << ", "; 1229 } 1230 dbgs() << "\n"; 1231 dbgs() << "TrueBiasedSelects "; 1232 for (SelectInst *SI : Scope->TrueBiasedSelects) { 1233 dbgs() << *SI << ", "; 1234 } 1235 dbgs() << "\n"; 1236 dbgs() << "FalseBiasedSelects "; 1237 for (SelectInst *SI : Scope->FalseBiasedSelects) { 1238 dbgs() << *SI << ", "; 1239 } 1240 dbgs() << "\n";); 1241 } 1242 } 1243 1244 void CHR::classifyBiasedScopes(CHRScope *Scope, CHRScope *OutermostScope) { 1245 for (RegInfo &RI : Scope->RegInfos) { 1246 if (RI.HasBranch) { 1247 Region *R = RI.R; 1248 if (TrueBiasedRegionsGlobal.count(R) > 0) 1249 OutermostScope->TrueBiasedRegions.insert(R); 1250 else if (FalseBiasedRegionsGlobal.count(R) > 0) 1251 OutermostScope->FalseBiasedRegions.insert(R); 1252 else 1253 llvm_unreachable("Must be biased"); 1254 } 1255 for (SelectInst *SI : RI.Selects) { 1256 if (TrueBiasedSelectsGlobal.count(SI) > 0) 1257 OutermostScope->TrueBiasedSelects.insert(SI); 1258 else if (FalseBiasedSelectsGlobal.count(SI) > 0) 1259 OutermostScope->FalseBiasedSelects.insert(SI); 1260 else 1261 llvm_unreachable("Must be biased"); 1262 } 1263 } 1264 for (CHRScope *Sub : Scope->Subs) { 1265 classifyBiasedScopes(Sub, OutermostScope); 1266 } 1267 } 1268 1269 static bool hasAtLeastTwoBiasedBranches(CHRScope *Scope) { 1270 unsigned NumBiased = Scope->TrueBiasedRegions.size() + 1271 Scope->FalseBiasedRegions.size() + 1272 Scope->TrueBiasedSelects.size() + 1273 Scope->FalseBiasedSelects.size(); 1274 return NumBiased >= CHRMergeThreshold; 1275 } 1276 1277 void CHR::filterScopes(SmallVectorImpl<CHRScope *> &Input, 1278 SmallVectorImpl<CHRScope *> &Output) { 1279 for (CHRScope *Scope : Input) { 1280 // Filter out the ones with only one region and no subs. 1281 if (!hasAtLeastTwoBiasedBranches(Scope)) { 1282 CHR_DEBUG(dbgs() << "Filtered out by biased branches truthy-regions " 1283 << Scope->TrueBiasedRegions.size() 1284 << " falsy-regions " << Scope->FalseBiasedRegions.size() 1285 << " true-selects " << Scope->TrueBiasedSelects.size() 1286 << " false-selects " << Scope->FalseBiasedSelects.size() << "\n"); 1287 continue; 1288 } 1289 Output.push_back(Scope); 1290 } 1291 } 1292 1293 void CHR::setCHRRegions(SmallVectorImpl<CHRScope *> &Input, 1294 SmallVectorImpl<CHRScope *> &Output) { 1295 for (CHRScope *Scope : Input) { 1296 assert(Scope->HoistStopMap.empty() && Scope->CHRRegions.empty() && 1297 "Empty"); 1298 setCHRRegions(Scope, Scope); 1299 Output.push_back(Scope); 1300 CHR_DEBUG( 1301 dbgs() << "setCHRRegions HoistStopMap " << *Scope << "\n"; 1302 for (auto pair : Scope->HoistStopMap) { 1303 Region *R = pair.first; 1304 dbgs() << "Region " << R->getNameStr() << "\n"; 1305 for (Instruction *I : pair.second) { 1306 dbgs() << "HoistStop " << *I << "\n"; 1307 } 1308 } 1309 dbgs() << "CHRRegions" << "\n"; 1310 for (RegInfo &RI : Scope->CHRRegions) { 1311 dbgs() << RI.R->getNameStr() << "\n"; 1312 }); 1313 } 1314 } 1315 1316 void CHR::setCHRRegions(CHRScope *Scope, CHRScope *OutermostScope) { 1317 DenseSet<Instruction *> Unhoistables; 1318 // Put the biased selects in Unhoistables because they should stay where they 1319 // are and constant-folded after CHR (in case one biased select or a branch 1320 // can depend on another biased select.) 1321 for (RegInfo &RI : Scope->RegInfos) { 1322 for (SelectInst *SI : RI.Selects) { 1323 Unhoistables.insert(SI); 1324 } 1325 } 1326 Instruction *InsertPoint = OutermostScope->BranchInsertPoint; 1327 for (RegInfo &RI : Scope->RegInfos) { 1328 Region *R = RI.R; 1329 DenseSet<Instruction *> HoistStops; 1330 bool IsHoisted = false; 1331 if (RI.HasBranch) { 1332 assert((OutermostScope->TrueBiasedRegions.count(R) > 0 || 1333 OutermostScope->FalseBiasedRegions.count(R) > 0) && 1334 "Must be truthy or falsy"); 1335 auto *BI = cast<BranchInst>(R->getEntry()->getTerminator()); 1336 // Note checkHoistValue fills in HoistStops. 1337 bool IsHoistable = checkHoistValue(BI->getCondition(), InsertPoint, DT, 1338 Unhoistables, &HoistStops); 1339 assert(IsHoistable && "Must be hoistable"); 1340 (void)(IsHoistable); // Unused in release build 1341 IsHoisted = true; 1342 } 1343 for (SelectInst *SI : RI.Selects) { 1344 assert((OutermostScope->TrueBiasedSelects.count(SI) > 0 || 1345 OutermostScope->FalseBiasedSelects.count(SI) > 0) && 1346 "Must be true or false biased"); 1347 // Note checkHoistValue fills in HoistStops. 1348 bool IsHoistable = checkHoistValue(SI->getCondition(), InsertPoint, DT, 1349 Unhoistables, &HoistStops); 1350 assert(IsHoistable && "Must be hoistable"); 1351 (void)(IsHoistable); // Unused in release build 1352 IsHoisted = true; 1353 } 1354 if (IsHoisted) { 1355 OutermostScope->CHRRegions.push_back(RI); 1356 OutermostScope->HoistStopMap[R] = HoistStops; 1357 } 1358 } 1359 for (CHRScope *Sub : Scope->Subs) 1360 setCHRRegions(Sub, OutermostScope); 1361 } 1362 1363 bool CHRScopeSorter(CHRScope *Scope1, CHRScope *Scope2) { 1364 return Scope1->RegInfos[0].R->getDepth() < Scope2->RegInfos[0].R->getDepth(); 1365 } 1366 1367 void CHR::sortScopes(SmallVectorImpl<CHRScope *> &Input, 1368 SmallVectorImpl<CHRScope *> &Output) { 1369 Output.resize(Input.size()); 1370 std::copy(Input.begin(), Input.end(), Output.begin()); 1371 std::stable_sort(Output.begin(), Output.end(), CHRScopeSorter); 1372 } 1373 1374 // Return true if V is already hoisted or was hoisted (along with its operands) 1375 // to the insert point. 1376 static void hoistValue(Value *V, Instruction *HoistPoint, Region *R, 1377 HoistStopMapTy &HoistStopMap, 1378 DenseSet<Instruction *> &HoistedSet, 1379 DenseSet<PHINode *> &TrivialPHIs) { 1380 auto IT = HoistStopMap.find(R); 1381 assert(IT != HoistStopMap.end() && "Region must be in hoist stop map"); 1382 DenseSet<Instruction *> &HoistStops = IT->second; 1383 if (auto *I = dyn_cast<Instruction>(V)) { 1384 if (I == HoistPoint) 1385 return; 1386 if (HoistStops.count(I)) 1387 return; 1388 if (auto *PN = dyn_cast<PHINode>(I)) 1389 if (TrivialPHIs.count(PN)) 1390 // The trivial phi inserted by the previous CHR scope could replace a 1391 // non-phi in HoistStops. Note that since this phi is at the exit of a 1392 // previous CHR scope, which dominates this scope, it's safe to stop 1393 // hoisting there. 1394 return; 1395 if (HoistedSet.count(I)) 1396 // Already hoisted, return. 1397 return; 1398 assert(isHoistableInstructionType(I) && "Unhoistable instruction type"); 1399 for (Value *Op : I->operands()) { 1400 hoistValue(Op, HoistPoint, R, HoistStopMap, HoistedSet, TrivialPHIs); 1401 } 1402 I->moveBefore(HoistPoint); 1403 HoistedSet.insert(I); 1404 CHR_DEBUG(dbgs() << "hoistValue " << *I << "\n"); 1405 } 1406 } 1407 1408 // Hoist the dependent condition values of the branches and the selects in the 1409 // scope to the insert point. 1410 static void hoistScopeConditions(CHRScope *Scope, Instruction *HoistPoint, 1411 DenseSet<PHINode *> &TrivialPHIs) { 1412 DenseSet<Instruction *> HoistedSet; 1413 for (const RegInfo &RI : Scope->CHRRegions) { 1414 Region *R = RI.R; 1415 bool IsTrueBiased = Scope->TrueBiasedRegions.count(R); 1416 bool IsFalseBiased = Scope->FalseBiasedRegions.count(R); 1417 if (RI.HasBranch && (IsTrueBiased || IsFalseBiased)) { 1418 auto *BI = cast<BranchInst>(R->getEntry()->getTerminator()); 1419 hoistValue(BI->getCondition(), HoistPoint, R, Scope->HoistStopMap, 1420 HoistedSet, TrivialPHIs); 1421 } 1422 for (SelectInst *SI : RI.Selects) { 1423 bool IsTrueBiased = Scope->TrueBiasedSelects.count(SI); 1424 bool IsFalseBiased = Scope->FalseBiasedSelects.count(SI); 1425 if (!(IsTrueBiased || IsFalseBiased)) 1426 continue; 1427 hoistValue(SI->getCondition(), HoistPoint, R, Scope->HoistStopMap, 1428 HoistedSet, TrivialPHIs); 1429 } 1430 } 1431 } 1432 1433 // Negate the predicate if an ICmp if it's used only by branches or selects by 1434 // swapping the operands of the branches or the selects. Returns true if success. 1435 static bool NegateICmpIfUsedByBranchOrSelectOnly(ICmpInst *ICmp, 1436 Instruction *ExcludedUser, 1437 CHRScope *Scope) { 1438 for (User *U : ICmp->users()) { 1439 if (U == ExcludedUser) 1440 continue; 1441 if (isa<BranchInst>(U) && cast<BranchInst>(U)->isConditional()) 1442 continue; 1443 if (isa<SelectInst>(U) && cast<SelectInst>(U)->getCondition() == ICmp) 1444 continue; 1445 return false; 1446 } 1447 for (User *U : ICmp->users()) { 1448 if (U == ExcludedUser) 1449 continue; 1450 if (auto *BI = dyn_cast<BranchInst>(U)) { 1451 assert(BI->isConditional() && "Must be conditional"); 1452 BI->swapSuccessors(); 1453 // Don't need to swap this in terms of 1454 // TrueBiasedRegions/FalseBiasedRegions because true-based/false-based 1455 // mean whehter the branch is likely go into the if-then rather than 1456 // successor0/successor1 and because we can tell which edge is the then or 1457 // the else one by comparing the destination to the region exit block. 1458 continue; 1459 } 1460 if (auto *SI = dyn_cast<SelectInst>(U)) { 1461 // Swap operands 1462 Value *TrueValue = SI->getTrueValue(); 1463 Value *FalseValue = SI->getFalseValue(); 1464 SI->setTrueValue(FalseValue); 1465 SI->setFalseValue(TrueValue); 1466 SI->swapProfMetadata(); 1467 if (Scope->TrueBiasedSelects.count(SI)) { 1468 assert(Scope->FalseBiasedSelects.count(SI) == 0 && 1469 "Must not be already in"); 1470 Scope->FalseBiasedSelects.insert(SI); 1471 } else if (Scope->FalseBiasedSelects.count(SI)) { 1472 assert(Scope->TrueBiasedSelects.count(SI) == 0 && 1473 "Must not be already in"); 1474 Scope->TrueBiasedSelects.insert(SI); 1475 } 1476 continue; 1477 } 1478 llvm_unreachable("Must be a branch or a select"); 1479 } 1480 ICmp->setPredicate(CmpInst::getInversePredicate(ICmp->getPredicate())); 1481 return true; 1482 } 1483 1484 // A helper for transformScopes. Insert a trivial phi at the scope exit block 1485 // for a value that's defined in the scope but used outside it (meaning it's 1486 // alive at the exit block). 1487 static void insertTrivialPHIs(CHRScope *Scope, 1488 BasicBlock *EntryBlock, BasicBlock *ExitBlock, 1489 DenseSet<PHINode *> &TrivialPHIs) { 1490 DenseSet<BasicBlock *> BlocksInScopeSet; 1491 SmallVector<BasicBlock *, 8> BlocksInScopeVec; 1492 for (RegInfo &RI : Scope->RegInfos) { 1493 for (BasicBlock *BB : RI.R->blocks()) { // This includes the blocks in the 1494 // sub-Scopes. 1495 BlocksInScopeSet.insert(BB); 1496 BlocksInScopeVec.push_back(BB); 1497 } 1498 } 1499 CHR_DEBUG( 1500 dbgs() << "Inserting redudant phis\n"; 1501 for (BasicBlock *BB : BlocksInScopeVec) { 1502 dbgs() << "BlockInScope " << BB->getName() << "\n"; 1503 }); 1504 for (BasicBlock *BB : BlocksInScopeVec) { 1505 for (Instruction &I : *BB) { 1506 SmallVector<Instruction *, 8> Users; 1507 for (User *U : I.users()) { 1508 if (auto *UI = dyn_cast<Instruction>(U)) { 1509 if (BlocksInScopeSet.count(UI->getParent()) == 0 && 1510 // Unless there's already a phi for I at the exit block. 1511 !(isa<PHINode>(UI) && UI->getParent() == ExitBlock)) { 1512 CHR_DEBUG(dbgs() << "V " << I << "\n"); 1513 CHR_DEBUG(dbgs() << "Used outside scope by user " << *UI << "\n"); 1514 Users.push_back(UI); 1515 } else if (UI->getParent() == EntryBlock && isa<PHINode>(UI)) { 1516 // There's a loop backedge from a block that's dominated by this 1517 // scope to the entry block. 1518 CHR_DEBUG(dbgs() << "V " << I << "\n"); 1519 CHR_DEBUG(dbgs() 1520 << "Used at entry block (for a back edge) by a phi user " 1521 << *UI << "\n"); 1522 Users.push_back(UI); 1523 } 1524 } 1525 } 1526 if (Users.size() > 0) { 1527 // Insert a trivial phi for I (phi [&I, P0], [&I, P1], ...) at 1528 // ExitBlock. Replace I with the new phi in UI unless UI is another 1529 // phi at ExitBlock. 1530 unsigned PredCount = std::distance(pred_begin(ExitBlock), 1531 pred_end(ExitBlock)); 1532 PHINode *PN = PHINode::Create(I.getType(), PredCount, "", 1533 &ExitBlock->front()); 1534 for (BasicBlock *Pred : predecessors(ExitBlock)) { 1535 PN->addIncoming(&I, Pred); 1536 } 1537 TrivialPHIs.insert(PN); 1538 CHR_DEBUG(dbgs() << "Insert phi " << *PN << "\n"); 1539 for (Instruction *UI : Users) { 1540 for (unsigned J = 0, NumOps = UI->getNumOperands(); J < NumOps; ++J) { 1541 if (UI->getOperand(J) == &I) { 1542 UI->setOperand(J, PN); 1543 } 1544 } 1545 CHR_DEBUG(dbgs() << "Updated user " << *UI << "\n"); 1546 } 1547 } 1548 } 1549 } 1550 } 1551 1552 // Assert that all the CHR regions of the scope have a biased branch or select. 1553 static void LLVM_ATTRIBUTE_UNUSED 1554 assertCHRRegionsHaveBiasedBranchOrSelect(CHRScope *Scope) { 1555 #ifndef NDEBUG 1556 auto HasBiasedBranchOrSelect = [](RegInfo &RI, CHRScope *Scope) { 1557 if (Scope->TrueBiasedRegions.count(RI.R) || 1558 Scope->FalseBiasedRegions.count(RI.R)) 1559 return true; 1560 for (SelectInst *SI : RI.Selects) 1561 if (Scope->TrueBiasedSelects.count(SI) || 1562 Scope->FalseBiasedSelects.count(SI)) 1563 return true; 1564 return false; 1565 }; 1566 for (RegInfo &RI : Scope->CHRRegions) { 1567 assert(HasBiasedBranchOrSelect(RI, Scope) && 1568 "Must have biased branch or select"); 1569 } 1570 #endif 1571 } 1572 1573 // Assert that all the condition values of the biased branches and selects have 1574 // been hoisted to the pre-entry block or outside of the scope. 1575 static void LLVM_ATTRIBUTE_UNUSED assertBranchOrSelectConditionHoisted( 1576 CHRScope *Scope, BasicBlock *PreEntryBlock) { 1577 CHR_DEBUG(dbgs() << "Biased regions condition values \n"); 1578 for (RegInfo &RI : Scope->CHRRegions) { 1579 Region *R = RI.R; 1580 bool IsTrueBiased = Scope->TrueBiasedRegions.count(R); 1581 bool IsFalseBiased = Scope->FalseBiasedRegions.count(R); 1582 if (RI.HasBranch && (IsTrueBiased || IsFalseBiased)) { 1583 auto *BI = cast<BranchInst>(R->getEntry()->getTerminator()); 1584 Value *V = BI->getCondition(); 1585 CHR_DEBUG(dbgs() << *V << "\n"); 1586 if (auto *I = dyn_cast<Instruction>(V)) { 1587 (void)(I); // Unused in release build. 1588 assert((I->getParent() == PreEntryBlock || 1589 !Scope->contains(I)) && 1590 "Must have been hoisted to PreEntryBlock or outside the scope"); 1591 } 1592 } 1593 for (SelectInst *SI : RI.Selects) { 1594 bool IsTrueBiased = Scope->TrueBiasedSelects.count(SI); 1595 bool IsFalseBiased = Scope->FalseBiasedSelects.count(SI); 1596 if (!(IsTrueBiased || IsFalseBiased)) 1597 continue; 1598 Value *V = SI->getCondition(); 1599 CHR_DEBUG(dbgs() << *V << "\n"); 1600 if (auto *I = dyn_cast<Instruction>(V)) { 1601 (void)(I); // Unused in release build. 1602 assert((I->getParent() == PreEntryBlock || 1603 !Scope->contains(I)) && 1604 "Must have been hoisted to PreEntryBlock or outside the scope"); 1605 } 1606 } 1607 } 1608 } 1609 1610 void CHR::transformScopes(CHRScope *Scope, DenseSet<PHINode *> &TrivialPHIs) { 1611 CHR_DEBUG(dbgs() << "transformScopes " << *Scope << "\n"); 1612 1613 assert(Scope->RegInfos.size() >= 1 && "Should have at least one Region"); 1614 Region *FirstRegion = Scope->RegInfos[0].R; 1615 BasicBlock *EntryBlock = FirstRegion->getEntry(); 1616 Region *LastRegion = Scope->RegInfos[Scope->RegInfos.size() - 1].R; 1617 BasicBlock *ExitBlock = LastRegion->getExit(); 1618 Optional<uint64_t> ProfileCount = BFI.getBlockProfileCount(EntryBlock); 1619 1620 if (ExitBlock) { 1621 // Insert a trivial phi at the exit block (where the CHR hot path and the 1622 // cold path merges) for a value that's defined in the scope but used 1623 // outside it (meaning it's alive at the exit block). We will add the 1624 // incoming values for the CHR cold paths to it below. Without this, we'd 1625 // miss updating phi's for such values unless there happens to already be a 1626 // phi for that value there. 1627 insertTrivialPHIs(Scope, EntryBlock, ExitBlock, TrivialPHIs); 1628 } 1629 1630 // Split the entry block of the first region. The new block becomes the new 1631 // entry block of the first region. The old entry block becomes the block to 1632 // insert the CHR branch into. Note DT gets updated. Since DT gets updated 1633 // through the split, we update the entry of the first region after the split, 1634 // and Region only points to the entry and the exit blocks, rather than 1635 // keeping everything in a list or set, the blocks membership and the 1636 // entry/exit blocks of the region are still valid after the split. 1637 CHR_DEBUG(dbgs() << "Splitting entry block " << EntryBlock->getName() 1638 << " at " << *Scope->BranchInsertPoint << "\n"); 1639 BasicBlock *NewEntryBlock = 1640 SplitBlock(EntryBlock, Scope->BranchInsertPoint, &DT); 1641 assert(NewEntryBlock->getSinglePredecessor() == EntryBlock && 1642 "NewEntryBlock's only pred must be EntryBlock"); 1643 FirstRegion->replaceEntryRecursive(NewEntryBlock); 1644 BasicBlock *PreEntryBlock = EntryBlock; 1645 1646 ValueToValueMapTy VMap; 1647 // Clone the blocks in the scope (excluding the PreEntryBlock) to split into a 1648 // hot path (originals) and a cold path (clones) and update the PHIs at the 1649 // exit block. 1650 cloneScopeBlocks(Scope, PreEntryBlock, ExitBlock, LastRegion, VMap); 1651 1652 // Replace the old (placeholder) branch with the new (merged) conditional 1653 // branch. 1654 BranchInst *MergedBr = createMergedBranch(PreEntryBlock, EntryBlock, 1655 NewEntryBlock, VMap); 1656 1657 #ifndef NDEBUG 1658 assertCHRRegionsHaveBiasedBranchOrSelect(Scope); 1659 #endif 1660 1661 // Hoist the conditional values of the branches/selects. 1662 hoistScopeConditions(Scope, PreEntryBlock->getTerminator(), TrivialPHIs); 1663 1664 #ifndef NDEBUG 1665 assertBranchOrSelectConditionHoisted(Scope, PreEntryBlock); 1666 #endif 1667 1668 // Create the combined branch condition and constant-fold the branches/selects 1669 // in the hot path. 1670 fixupBranchesAndSelects(Scope, PreEntryBlock, MergedBr, 1671 ProfileCount ? ProfileCount.getValue() : 0); 1672 } 1673 1674 // A helper for transformScopes. Clone the blocks in the scope (excluding the 1675 // PreEntryBlock) to split into a hot path and a cold path and update the PHIs 1676 // at the exit block. 1677 void CHR::cloneScopeBlocks(CHRScope *Scope, 1678 BasicBlock *PreEntryBlock, 1679 BasicBlock *ExitBlock, 1680 Region *LastRegion, 1681 ValueToValueMapTy &VMap) { 1682 // Clone all the blocks. The original blocks will be the hot-path 1683 // CHR-optimized code and the cloned blocks will be the original unoptimized 1684 // code. This is so that the block pointers from the 1685 // CHRScope/Region/RegionInfo can stay valid in pointing to the hot-path code 1686 // which CHR should apply to. 1687 SmallVector<BasicBlock*, 8> NewBlocks; 1688 for (RegInfo &RI : Scope->RegInfos) 1689 for (BasicBlock *BB : RI.R->blocks()) { // This includes the blocks in the 1690 // sub-Scopes. 1691 assert(BB != PreEntryBlock && "Don't copy the preetntry block"); 1692 BasicBlock *NewBB = CloneBasicBlock(BB, VMap, ".nonchr", &F); 1693 NewBlocks.push_back(NewBB); 1694 VMap[BB] = NewBB; 1695 } 1696 1697 // Place the cloned blocks right after the original blocks (right before the 1698 // exit block of.) 1699 if (ExitBlock) 1700 F.getBasicBlockList().splice(ExitBlock->getIterator(), 1701 F.getBasicBlockList(), 1702 NewBlocks[0]->getIterator(), F.end()); 1703 1704 // Update the cloned blocks/instructions to refer to themselves. 1705 for (unsigned i = 0, e = NewBlocks.size(); i != e; ++i) 1706 for (Instruction &I : *NewBlocks[i]) 1707 RemapInstruction(&I, VMap, 1708 RF_NoModuleLevelChanges | RF_IgnoreMissingLocals); 1709 1710 // Add the cloned blocks to the PHIs of the exit blocks. ExitBlock is null for 1711 // the top-level region but we don't need to add PHIs. The trivial PHIs 1712 // inserted above will be updated here. 1713 if (ExitBlock) 1714 for (PHINode &PN : ExitBlock->phis()) 1715 for (unsigned I = 0, NumOps = PN.getNumIncomingValues(); I < NumOps; 1716 ++I) { 1717 BasicBlock *Pred = PN.getIncomingBlock(I); 1718 if (LastRegion->contains(Pred)) { 1719 Value *V = PN.getIncomingValue(I); 1720 auto It = VMap.find(V); 1721 if (It != VMap.end()) V = It->second; 1722 assert(VMap.find(Pred) != VMap.end() && "Pred must have been cloned"); 1723 PN.addIncoming(V, cast<BasicBlock>(VMap[Pred])); 1724 } 1725 } 1726 } 1727 1728 // A helper for transformScope. Replace the old (placeholder) branch with the 1729 // new (merged) conditional branch. 1730 BranchInst *CHR::createMergedBranch(BasicBlock *PreEntryBlock, 1731 BasicBlock *EntryBlock, 1732 BasicBlock *NewEntryBlock, 1733 ValueToValueMapTy &VMap) { 1734 BranchInst *OldBR = cast<BranchInst>(PreEntryBlock->getTerminator()); 1735 assert(OldBR->isUnconditional() && OldBR->getSuccessor(0) == NewEntryBlock && 1736 "SplitBlock did not work correctly!"); 1737 assert(NewEntryBlock->getSinglePredecessor() == EntryBlock && 1738 "NewEntryBlock's only pred must be EntryBlock"); 1739 assert(VMap.find(NewEntryBlock) != VMap.end() && 1740 "NewEntryBlock must have been copied"); 1741 OldBR->dropAllReferences(); 1742 OldBR->eraseFromParent(); 1743 // The true predicate is a placeholder. It will be replaced later in 1744 // fixupBranchesAndSelects(). 1745 BranchInst *NewBR = BranchInst::Create(NewEntryBlock, 1746 cast<BasicBlock>(VMap[NewEntryBlock]), 1747 ConstantInt::getTrue(F.getContext())); 1748 PreEntryBlock->getInstList().push_back(NewBR); 1749 assert(NewEntryBlock->getSinglePredecessor() == EntryBlock && 1750 "NewEntryBlock's only pred must be EntryBlock"); 1751 return NewBR; 1752 } 1753 1754 // A helper for transformScopes. Create the combined branch condition and 1755 // constant-fold the branches/selects in the hot path. 1756 void CHR::fixupBranchesAndSelects(CHRScope *Scope, 1757 BasicBlock *PreEntryBlock, 1758 BranchInst *MergedBR, 1759 uint64_t ProfileCount) { 1760 Value *MergedCondition = ConstantInt::getTrue(F.getContext()); 1761 BranchProbability CHRBranchBias(1, 1); 1762 uint64_t NumCHRedBranches = 0; 1763 IRBuilder<> IRB(PreEntryBlock->getTerminator()); 1764 for (RegInfo &RI : Scope->CHRRegions) { 1765 Region *R = RI.R; 1766 if (RI.HasBranch) { 1767 fixupBranch(R, Scope, IRB, MergedCondition, CHRBranchBias); 1768 ++NumCHRedBranches; 1769 } 1770 for (SelectInst *SI : RI.Selects) { 1771 fixupSelect(SI, Scope, IRB, MergedCondition, CHRBranchBias); 1772 ++NumCHRedBranches; 1773 } 1774 } 1775 Stats.NumBranchesDelta += NumCHRedBranches - 1; 1776 Stats.WeightedNumBranchesDelta += (NumCHRedBranches - 1) * ProfileCount; 1777 MergedBR->setCondition(MergedCondition); 1778 SmallVector<uint32_t, 2> Weights; 1779 Weights.push_back(static_cast<uint32_t>(CHRBranchBias.scale(1000))); 1780 Weights.push_back(static_cast<uint32_t>(CHRBranchBias.getCompl().scale(1000))); 1781 MDBuilder MDB(F.getContext()); 1782 MergedBR->setMetadata(LLVMContext::MD_prof, MDB.createBranchWeights(Weights)); 1783 CHR_DEBUG(dbgs() << "CHR branch bias " << Weights[0] << ":" << Weights[1] 1784 << "\n"); 1785 } 1786 1787 // A helper for fixupBranchesAndSelects. Add to the combined branch condition 1788 // and constant-fold a branch in the hot path. 1789 void CHR::fixupBranch(Region *R, CHRScope *Scope, 1790 IRBuilder<> &IRB, 1791 Value *&MergedCondition, 1792 BranchProbability &CHRBranchBias) { 1793 bool IsTrueBiased = Scope->TrueBiasedRegions.count(R); 1794 assert((IsTrueBiased || Scope->FalseBiasedRegions.count(R)) && 1795 "Must be truthy or falsy"); 1796 auto *BI = cast<BranchInst>(R->getEntry()->getTerminator()); 1797 assert(BranchBiasMap.find(R) != BranchBiasMap.end() && 1798 "Must be in the bias map"); 1799 BranchProbability Bias = BranchBiasMap[R]; 1800 assert(Bias >= getCHRBiasThreshold() && "Must be highly biased"); 1801 // Take the min. 1802 if (CHRBranchBias > Bias) 1803 CHRBranchBias = Bias; 1804 BasicBlock *IfThen = BI->getSuccessor(1); 1805 BasicBlock *IfElse = BI->getSuccessor(0); 1806 BasicBlock *RegionExitBlock = R->getExit(); 1807 assert(RegionExitBlock && "Null ExitBlock"); 1808 assert((IfThen == RegionExitBlock || IfElse == RegionExitBlock) && 1809 IfThen != IfElse && "Invariant from findScopes"); 1810 if (IfThen == RegionExitBlock) { 1811 // Swap them so that IfThen means going into it and IfElse means skipping 1812 // it. 1813 std::swap(IfThen, IfElse); 1814 } 1815 CHR_DEBUG(dbgs() << "IfThen " << IfThen->getName() 1816 << " IfElse " << IfElse->getName() << "\n"); 1817 Value *Cond = BI->getCondition(); 1818 BasicBlock *HotTarget = IsTrueBiased ? IfThen : IfElse; 1819 bool ConditionTrue = HotTarget == BI->getSuccessor(0); 1820 addToMergedCondition(ConditionTrue, Cond, BI, Scope, IRB, 1821 MergedCondition); 1822 // Constant-fold the branch at ClonedEntryBlock. 1823 assert(ConditionTrue == (HotTarget == BI->getSuccessor(0)) && 1824 "The successor shouldn't change"); 1825 Value *NewCondition = ConditionTrue ? 1826 ConstantInt::getTrue(F.getContext()) : 1827 ConstantInt::getFalse(F.getContext()); 1828 BI->setCondition(NewCondition); 1829 } 1830 1831 // A helper for fixupBranchesAndSelects. Add to the combined branch condition 1832 // and constant-fold a select in the hot path. 1833 void CHR::fixupSelect(SelectInst *SI, CHRScope *Scope, 1834 IRBuilder<> &IRB, 1835 Value *&MergedCondition, 1836 BranchProbability &CHRBranchBias) { 1837 bool IsTrueBiased = Scope->TrueBiasedSelects.count(SI); 1838 assert((IsTrueBiased || 1839 Scope->FalseBiasedSelects.count(SI)) && "Must be biased"); 1840 assert(SelectBiasMap.find(SI) != SelectBiasMap.end() && 1841 "Must be in the bias map"); 1842 BranchProbability Bias = SelectBiasMap[SI]; 1843 assert(Bias >= getCHRBiasThreshold() && "Must be highly biased"); 1844 // Take the min. 1845 if (CHRBranchBias > Bias) 1846 CHRBranchBias = Bias; 1847 Value *Cond = SI->getCondition(); 1848 addToMergedCondition(IsTrueBiased, Cond, SI, Scope, IRB, 1849 MergedCondition); 1850 Value *NewCondition = IsTrueBiased ? 1851 ConstantInt::getTrue(F.getContext()) : 1852 ConstantInt::getFalse(F.getContext()); 1853 SI->setCondition(NewCondition); 1854 } 1855 1856 // A helper for fixupBranch/fixupSelect. Add a branch condition to the merged 1857 // condition. 1858 void CHR::addToMergedCondition(bool IsTrueBiased, Value *Cond, 1859 Instruction *BranchOrSelect, 1860 CHRScope *Scope, 1861 IRBuilder<> &IRB, 1862 Value *&MergedCondition) { 1863 if (IsTrueBiased) { 1864 MergedCondition = IRB.CreateAnd(MergedCondition, Cond); 1865 } else { 1866 // If Cond is an icmp and all users of V except for BranchOrSelect is a 1867 // branch, negate the icmp predicate and swap the branch targets and avoid 1868 // inserting an Xor to negate Cond. 1869 bool Done = false; 1870 if (auto *ICmp = dyn_cast<ICmpInst>(Cond)) 1871 if (NegateICmpIfUsedByBranchOrSelectOnly(ICmp, BranchOrSelect, Scope)) { 1872 MergedCondition = IRB.CreateAnd(MergedCondition, Cond); 1873 Done = true; 1874 } 1875 if (!Done) { 1876 Value *Negate = IRB.CreateXor( 1877 ConstantInt::getTrue(F.getContext()), Cond); 1878 MergedCondition = IRB.CreateAnd(MergedCondition, Negate); 1879 } 1880 } 1881 } 1882 1883 void CHR::transformScopes(SmallVectorImpl<CHRScope *> &CHRScopes) { 1884 unsigned i = 0; 1885 (void)(i); // Unused in release build. 1886 DenseSet<PHINode *> TrivialPHIs; 1887 for (CHRScope *Scope : CHRScopes) { 1888 transformScopes(Scope, TrivialPHIs); 1889 CHR_DEBUG( 1890 std::ostringstream oss; 1891 oss << " after transformScopes " << i++; 1892 dumpIR(F, oss.str().c_str(), nullptr)); 1893 } 1894 } 1895 1896 static void LLVM_ATTRIBUTE_UNUSED 1897 dumpScopes(SmallVectorImpl<CHRScope *> &Scopes, const char *Label) { 1898 dbgs() << Label << " " << Scopes.size() << "\n"; 1899 for (CHRScope *Scope : Scopes) { 1900 dbgs() << *Scope << "\n"; 1901 } 1902 } 1903 1904 bool CHR::run() { 1905 if (!shouldApply(F, PSI)) 1906 return false; 1907 1908 CHR_DEBUG(dumpIR(F, "before", nullptr)); 1909 1910 bool Changed = false; 1911 { 1912 CHR_DEBUG( 1913 dbgs() << "RegionInfo:\n"; 1914 RI.print(dbgs())); 1915 1916 // Recursively traverse the region tree and find regions that have biased 1917 // branches and/or selects and create scopes. 1918 SmallVector<CHRScope *, 8> AllScopes; 1919 findScopes(AllScopes); 1920 CHR_DEBUG(dumpScopes(AllScopes, "All scopes")); 1921 1922 // Split the scopes if 1) the conditiona values of the biased 1923 // branches/selects of the inner/lower scope can't be hoisted up to the 1924 // outermost/uppermost scope entry, or 2) the condition values of the biased 1925 // branches/selects in a scope (including subscopes) don't share at least 1926 // one common value. 1927 SmallVector<CHRScope *, 8> SplitScopes; 1928 splitScopes(AllScopes, SplitScopes); 1929 CHR_DEBUG(dumpScopes(SplitScopes, "Split scopes")); 1930 1931 // After splitting, set the biased regions and selects of a scope (a tree 1932 // root) that include those of the subscopes. 1933 classifyBiasedScopes(SplitScopes); 1934 CHR_DEBUG(dbgs() << "Set per-scope bias " << SplitScopes.size() << "\n"); 1935 1936 // Filter out the scopes that has only one biased region or select (CHR 1937 // isn't useful in such a case). 1938 SmallVector<CHRScope *, 8> FilteredScopes; 1939 filterScopes(SplitScopes, FilteredScopes); 1940 CHR_DEBUG(dumpScopes(FilteredScopes, "Filtered scopes")); 1941 1942 // Set the regions to be CHR'ed and their hoist stops for each scope. 1943 SmallVector<CHRScope *, 8> SetScopes; 1944 setCHRRegions(FilteredScopes, SetScopes); 1945 CHR_DEBUG(dumpScopes(SetScopes, "Set CHR regions")); 1946 1947 // Sort CHRScopes by the depth so that outer CHRScopes comes before inner 1948 // ones. We need to apply CHR from outer to inner so that we apply CHR only 1949 // to the hot path, rather than both hot and cold paths. 1950 SmallVector<CHRScope *, 8> SortedScopes; 1951 sortScopes(SetScopes, SortedScopes); 1952 CHR_DEBUG(dumpScopes(SortedScopes, "Sorted scopes")); 1953 1954 CHR_DEBUG( 1955 dbgs() << "RegionInfo:\n"; 1956 RI.print(dbgs())); 1957 1958 // Apply the CHR transformation. 1959 if (!SortedScopes.empty()) { 1960 transformScopes(SortedScopes); 1961 Changed = true; 1962 } 1963 } 1964 1965 if (Changed) 1966 CHR_DEBUG(dumpIR(F, "after", &Stats)); 1967 1968 return Changed; 1969 } 1970 1971 bool ControlHeightReductionLegacyPass::runOnFunction(Function &F) { 1972 BlockFrequencyInfo &BFI = 1973 getAnalysis<BlockFrequencyInfoWrapperPass>().getBFI(); 1974 DominatorTree &DT = getAnalysis<DominatorTreeWrapperPass>().getDomTree(); 1975 ProfileSummaryInfo &PSI = 1976 *getAnalysis<ProfileSummaryInfoWrapperPass>().getPSI(); 1977 RegionInfo &RI = getAnalysis<RegionInfoPass>().getRegionInfo(); 1978 return CHR(F, BFI, DT, PSI, RI).run(); 1979 } 1980 1981 namespace llvm { 1982 1983 ControlHeightReductionPass::ControlHeightReductionPass() { 1984 ParseCHRFilterFiles(); 1985 } 1986 1987 PreservedAnalyses ControlHeightReductionPass::run( 1988 Function &F, 1989 FunctionAnalysisManager &FAM) { 1990 auto &BFI = FAM.getResult<BlockFrequencyAnalysis>(F); 1991 auto &DT = FAM.getResult<DominatorTreeAnalysis>(F); 1992 auto &MAMProxy = FAM.getResult<ModuleAnalysisManagerFunctionProxy>(F); 1993 auto &MAM = MAMProxy.getManager(); 1994 auto &PSI = *MAM.getCachedResult<ProfileSummaryAnalysis>(*F.getParent()); 1995 auto &RI = FAM.getResult<RegionInfoAnalysis>(F); 1996 bool Changed = CHR(F, BFI, DT, PSI, RI).run(); 1997 if (!Changed) 1998 return PreservedAnalyses::all(); 1999 auto PA = PreservedAnalyses(); 2000 PA.preserve<GlobalsAA>(); 2001 return PA; 2002 } 2003 2004 } // namespace llvm 2005