1 //===-- ControlHeightReduction.cpp - Control Height Reduction -------------===// 2 // 3 // The LLVM Compiler Infrastructure 4 // 5 // This file is distributed under the University of Illinois Open Source 6 // License. See LICENSE.TXT for details. 7 // 8 //===----------------------------------------------------------------------===// 9 // 10 // This pass merges conditional blocks of code and reduces the number of 11 // conditional branches in the hot paths based on profiles. 12 // 13 //===----------------------------------------------------------------------===// 14 15 #include "llvm/Transforms/Instrumentation/ControlHeightReduction.h" 16 #include "llvm/ADT/DenseMap.h" 17 #include "llvm/ADT/DenseSet.h" 18 #include "llvm/ADT/SmallVector.h" 19 #include "llvm/ADT/StringSet.h" 20 #include "llvm/Analysis/BlockFrequencyInfo.h" 21 #include "llvm/Analysis/GlobalsModRef.h" 22 #include "llvm/Analysis/ProfileSummaryInfo.h" 23 #include "llvm/Analysis/RegionInfo.h" 24 #include "llvm/Analysis/RegionIterator.h" 25 #include "llvm/Analysis/ValueTracking.h" 26 #include "llvm/IR/CFG.h" 27 #include "llvm/IR/Dominators.h" 28 #include "llvm/IR/IRBuilder.h" 29 #include "llvm/IR/MDBuilder.h" 30 #include "llvm/Support/BranchProbability.h" 31 #include "llvm/Support/MemoryBuffer.h" 32 #include "llvm/Transforms/Utils.h" 33 #include "llvm/Transforms/Utils/BasicBlockUtils.h" 34 #include "llvm/Transforms/Utils/Cloning.h" 35 #include "llvm/Transforms/Utils/ValueMapper.h" 36 37 #include <set> 38 #include <sstream> 39 40 using namespace llvm; 41 42 #define DEBUG_TYPE "chr" 43 44 #define CHR_DEBUG(X) LLVM_DEBUG(X) 45 46 static cl::opt<bool> ForceCHR("force-chr", cl::init(false), cl::Hidden, 47 cl::desc("Apply CHR for all functions")); 48 49 static cl::opt<double> CHRBiasThreshold( 50 "chr-bias-threshold", cl::init(0.99), cl::Hidden, 51 cl::desc("CHR considers a branch bias greater than this ratio as biased")); 52 53 static cl::opt<unsigned> CHRMergeThreshold( 54 "chr-merge-threshold", cl::init(2), cl::Hidden, 55 cl::desc("CHR merges a group of N branches/selects where N >= this value")); 56 57 static cl::opt<std::string> CHRModuleList( 58 "chr-module-list", cl::init(""), cl::Hidden, 59 cl::desc("Specify file to retrieve the list of modules to apply CHR to")); 60 61 static cl::opt<std::string> CHRFunctionList( 62 "chr-function-list", cl::init(""), cl::Hidden, 63 cl::desc("Specify file to retrieve the list of functions to apply CHR to")); 64 65 static StringSet<> CHRModules; 66 static StringSet<> CHRFunctions; 67 68 static void ParseCHRFilterFiles() { 69 if (!CHRModuleList.empty()) { 70 auto FileOrErr = MemoryBuffer::getFile(CHRModuleList); 71 if (!FileOrErr) { 72 errs() << "Error: Couldn't read the chr-module-list file " << CHRModuleList << "\n"; 73 std::exit(1); 74 } 75 StringRef Buf = FileOrErr->get()->getBuffer(); 76 SmallVector<StringRef, 0> Lines; 77 Buf.split(Lines, '\n'); 78 for (StringRef Line : Lines) { 79 Line = Line.trim(); 80 if (!Line.empty()) 81 CHRModules.insert(Line); 82 } 83 } 84 if (!CHRFunctionList.empty()) { 85 auto FileOrErr = MemoryBuffer::getFile(CHRFunctionList); 86 if (!FileOrErr) { 87 errs() << "Error: Couldn't read the chr-function-list file " << CHRFunctionList << "\n"; 88 std::exit(1); 89 } 90 StringRef Buf = FileOrErr->get()->getBuffer(); 91 SmallVector<StringRef, 0> Lines; 92 Buf.split(Lines, '\n'); 93 for (StringRef Line : Lines) { 94 Line = Line.trim(); 95 if (!Line.empty()) 96 CHRFunctions.insert(Line); 97 } 98 } 99 } 100 101 namespace { 102 class ControlHeightReductionLegacyPass : public FunctionPass { 103 public: 104 static char ID; 105 106 ControlHeightReductionLegacyPass() : FunctionPass(ID) { 107 initializeControlHeightReductionLegacyPassPass( 108 *PassRegistry::getPassRegistry()); 109 ParseCHRFilterFiles(); 110 } 111 112 bool runOnFunction(Function &F) override; 113 void getAnalysisUsage(AnalysisUsage &AU) const override { 114 AU.addRequired<BlockFrequencyInfoWrapperPass>(); 115 AU.addRequired<DominatorTreeWrapperPass>(); 116 AU.addRequired<ProfileSummaryInfoWrapperPass>(); 117 AU.addRequired<RegionInfoPass>(); 118 AU.addPreserved<GlobalsAAWrapperPass>(); 119 } 120 }; 121 } // end anonymous namespace 122 123 char ControlHeightReductionLegacyPass::ID = 0; 124 125 INITIALIZE_PASS_BEGIN(ControlHeightReductionLegacyPass, 126 "chr", 127 "Reduce control height in the hot paths", 128 false, false) 129 INITIALIZE_PASS_DEPENDENCY(BlockFrequencyInfoWrapperPass) 130 INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass) 131 INITIALIZE_PASS_DEPENDENCY(ProfileSummaryInfoWrapperPass) 132 INITIALIZE_PASS_DEPENDENCY(RegionInfoPass) 133 INITIALIZE_PASS_END(ControlHeightReductionLegacyPass, 134 "chr", 135 "Reduce control height in the hot paths", 136 false, false) 137 138 FunctionPass *llvm::createControlHeightReductionLegacyPass() { 139 return new ControlHeightReductionLegacyPass(); 140 } 141 142 namespace { 143 144 struct CHRStats { 145 CHRStats() : NumBranches(0), NumBranchesDelta(0), 146 WeightedNumBranchesDelta(0) {} 147 void print(raw_ostream &OS) const { 148 OS << "CHRStats: NumBranches " << NumBranches 149 << " NumBranchesDelta " << NumBranchesDelta 150 << " WeightedNumBranchesDelta " << WeightedNumBranchesDelta; 151 } 152 uint64_t NumBranches; // The original number of conditional branches / 153 // selects 154 uint64_t NumBranchesDelta; // The decrease of the number of conditional 155 // branches / selects in the hot paths due to CHR. 156 uint64_t WeightedNumBranchesDelta; // NumBranchesDelta weighted by the profile 157 // count at the scope entry. 158 }; 159 160 // RegInfo - some properties of a Region. 161 struct RegInfo { 162 RegInfo() : R(nullptr), HasBranch(false) {} 163 RegInfo(Region *RegionIn) : R(RegionIn), HasBranch(false) {} 164 Region *R; 165 bool HasBranch; 166 SmallVector<SelectInst *, 8> Selects; 167 }; 168 169 typedef DenseMap<Region *, DenseSet<Instruction *>> HoistStopMapTy; 170 171 // CHRScope - a sequence of regions to CHR together. It corresponds to a 172 // sequence of conditional blocks. It can have subscopes which correspond to 173 // nested conditional blocks. Nested CHRScopes form a tree. 174 class CHRScope { 175 public: 176 CHRScope(RegInfo RI) : BranchInsertPoint(nullptr) { 177 assert(RI.R && "Null RegionIn"); 178 RegInfos.push_back(RI); 179 } 180 181 Region *getParentRegion() { 182 assert(RegInfos.size() > 0 && "Empty CHRScope"); 183 Region *Parent = RegInfos[0].R->getParent(); 184 assert(Parent && "Unexpected to call this on the top-level region"); 185 return Parent; 186 } 187 188 BasicBlock *getEntryBlock() { 189 assert(RegInfos.size() > 0 && "Empty CHRScope"); 190 return RegInfos.front().R->getEntry(); 191 } 192 193 BasicBlock *getExitBlock() { 194 assert(RegInfos.size() > 0 && "Empty CHRScope"); 195 return RegInfos.back().R->getExit(); 196 } 197 198 bool appendable(CHRScope *Next) { 199 // The next scope is appendable only if this scope is directly connected to 200 // it (which implies it post-dominates this scope) and this scope dominates 201 // it (no edge to the next scope outside this scope). 202 BasicBlock *NextEntry = Next->getEntryBlock(); 203 if (getExitBlock() != NextEntry) 204 // Not directly connected. 205 return false; 206 Region *LastRegion = RegInfos.back().R; 207 for (BasicBlock *Pred : predecessors(NextEntry)) 208 if (!LastRegion->contains(Pred)) 209 // There's an edge going into the entry of the next scope from outside 210 // of this scope. 211 return false; 212 return true; 213 } 214 215 void append(CHRScope *Next) { 216 assert(RegInfos.size() > 0 && "Empty CHRScope"); 217 assert(Next->RegInfos.size() > 0 && "Empty CHRScope"); 218 assert(getParentRegion() == Next->getParentRegion() && 219 "Must be siblings"); 220 assert(getExitBlock() == Next->getEntryBlock() && 221 "Must be adjacent"); 222 for (RegInfo &RI : Next->RegInfos) 223 RegInfos.push_back(RI); 224 for (CHRScope *Sub : Next->Subs) 225 Subs.push_back(Sub); 226 } 227 228 void addSub(CHRScope *SubIn) { 229 #ifndef NDEBUG 230 bool is_child = false; 231 for (RegInfo &RI : RegInfos) 232 if (RI.R == SubIn->getParentRegion()) { 233 is_child = true; 234 break; 235 } 236 assert(is_child && "Must be a child"); 237 #endif 238 Subs.push_back(SubIn); 239 } 240 241 // Split this scope at the boundary region into two, which will belong to the 242 // tail and returns the tail. 243 CHRScope *split(Region *Boundary) { 244 assert(Boundary && "Boundary null"); 245 assert(RegInfos.begin()->R != Boundary && 246 "Can't be split at beginning"); 247 auto BoundaryIt = std::find_if(RegInfos.begin(), RegInfos.end(), 248 [&Boundary](const RegInfo& RI) { 249 return Boundary == RI.R; 250 }); 251 if (BoundaryIt == RegInfos.end()) 252 return nullptr; 253 SmallVector<RegInfo, 8> TailRegInfos; 254 SmallVector<CHRScope *, 8> TailSubs; 255 TailRegInfos.insert(TailRegInfos.begin(), BoundaryIt, RegInfos.end()); 256 RegInfos.resize(BoundaryIt - RegInfos.begin()); 257 DenseSet<Region *> TailRegionSet; 258 for (RegInfo &RI : TailRegInfos) 259 TailRegionSet.insert(RI.R); 260 for (auto It = Subs.begin(); It != Subs.end(); ) { 261 CHRScope *Sub = *It; 262 assert(Sub && "null Sub"); 263 Region *Parent = Sub->getParentRegion(); 264 if (TailRegionSet.count(Parent)) { 265 TailSubs.push_back(Sub); 266 It = Subs.erase(It); 267 } else { 268 assert(std::find_if(RegInfos.begin(), RegInfos.end(), 269 [&Parent](const RegInfo& RI) { 270 return Parent == RI.R; 271 }) != RegInfos.end() && 272 "Must be in head"); 273 ++It; 274 } 275 } 276 assert(HoistStopMap.empty() && "MapHoistStops must be empty"); 277 return new CHRScope(TailRegInfos, TailSubs); 278 } 279 280 bool contains(Instruction *I) const { 281 BasicBlock *Parent = I->getParent(); 282 for (const RegInfo &RI : RegInfos) 283 if (RI.R->contains(Parent)) 284 return true; 285 return false; 286 } 287 288 void print(raw_ostream &OS) const; 289 290 SmallVector<RegInfo, 8> RegInfos; // Regions that belong to this scope 291 SmallVector<CHRScope *, 8> Subs; // Subscopes. 292 293 // The instruction at which to insert the CHR conditional branch (and hoist 294 // the dependent condition values). 295 Instruction *BranchInsertPoint; 296 297 // True-biased and false-biased regions (conditional blocks), 298 // respectively. Used only for the outermost scope and includes regions in 299 // subscopes. The rest are unbiased. 300 DenseSet<Region *> TrueBiasedRegions; 301 DenseSet<Region *> FalseBiasedRegions; 302 // Among the biased regions, the regions that get CHRed. 303 SmallVector<RegInfo, 8> CHRRegions; 304 305 // True-biased and false-biased selects, respectively. Used only for the 306 // outermost scope and includes ones in subscopes. 307 DenseSet<SelectInst *> TrueBiasedSelects; 308 DenseSet<SelectInst *> FalseBiasedSelects; 309 310 // Map from one of the above regions to the instructions to stop 311 // hoisting instructions at through use-def chains. 312 HoistStopMapTy HoistStopMap; 313 314 private: 315 CHRScope(SmallVector<RegInfo, 8> &RegInfosIn, 316 SmallVector<CHRScope *, 8> &SubsIn) 317 : RegInfos(RegInfosIn), Subs(SubsIn), BranchInsertPoint(nullptr) {} 318 }; 319 320 class CHR { 321 public: 322 CHR(Function &Fin, BlockFrequencyInfo &BFIin, DominatorTree &DTin, 323 ProfileSummaryInfo &PSIin, RegionInfo &RIin) 324 : F(Fin), BFI(BFIin), DT(DTin), PSI(PSIin), RI(RIin) {} 325 326 ~CHR() { 327 for (CHRScope *Scope : Scopes) { 328 delete Scope; 329 } 330 } 331 332 bool run(); 333 334 private: 335 // See the comments in CHR::run() for the high level flow of the algorithm and 336 // what the following functions do. 337 338 void findScopes(SmallVectorImpl<CHRScope *> &Output) { 339 Region *R = RI.getTopLevelRegion(); 340 CHRScope *Scope = findScopes(R, nullptr, nullptr, Output); 341 if (Scope) { 342 Output.push_back(Scope); 343 } 344 } 345 CHRScope *findScopes(Region *R, Region *NextRegion, Region *ParentRegion, 346 SmallVectorImpl<CHRScope *> &Scopes); 347 CHRScope *findScope(Region *R); 348 void checkScopeHoistable(CHRScope *Scope); 349 350 void splitScopes(SmallVectorImpl<CHRScope *> &Input, 351 SmallVectorImpl<CHRScope *> &Output); 352 SmallVector<CHRScope *, 8> splitScope(CHRScope *Scope, 353 CHRScope *Outer, 354 DenseSet<Value *> *OuterConditionValues, 355 Instruction *OuterInsertPoint, 356 SmallVectorImpl<CHRScope *> &Output, 357 DenseSet<Instruction *> &Unhoistables); 358 359 void classifyBiasedScopes(SmallVectorImpl<CHRScope *> &Scopes); 360 void classifyBiasedScopes(CHRScope *Scope, CHRScope *OutermostScope); 361 362 void filterScopes(SmallVectorImpl<CHRScope *> &Input, 363 SmallVectorImpl<CHRScope *> &Output); 364 365 void setCHRRegions(SmallVectorImpl<CHRScope *> &Input, 366 SmallVectorImpl<CHRScope *> &Output); 367 void setCHRRegions(CHRScope *Scope, CHRScope *OutermostScope); 368 369 void sortScopes(SmallVectorImpl<CHRScope *> &Input, 370 SmallVectorImpl<CHRScope *> &Output); 371 372 void transformScopes(SmallVectorImpl<CHRScope *> &CHRScopes); 373 void transformScopes(CHRScope *Scope, DenseSet<PHINode *> &TrivialPHIs); 374 void cloneScopeBlocks(CHRScope *Scope, 375 BasicBlock *PreEntryBlock, 376 BasicBlock *ExitBlock, 377 Region *LastRegion, 378 ValueToValueMapTy &VMap); 379 BranchInst *createMergedBranch(BasicBlock *PreEntryBlock, 380 BasicBlock *EntryBlock, 381 BasicBlock *NewEntryBlock, 382 ValueToValueMapTy &VMap); 383 void fixupBranchesAndSelects(CHRScope *Scope, 384 BasicBlock *PreEntryBlock, 385 BranchInst *MergedBR, 386 uint64_t ProfileCount); 387 void fixupBranch(Region *R, 388 CHRScope *Scope, 389 IRBuilder<> &IRB, 390 Value *&MergedCondition, BranchProbability &CHRBranchBias); 391 void fixupSelect(SelectInst* SI, 392 CHRScope *Scope, 393 IRBuilder<> &IRB, 394 Value *&MergedCondition, BranchProbability &CHRBranchBias); 395 void addToMergedCondition(bool IsTrueBiased, Value *Cond, 396 Instruction *BranchOrSelect, 397 CHRScope *Scope, 398 IRBuilder<> &IRB, 399 Value *&MergedCondition); 400 401 Function &F; 402 BlockFrequencyInfo &BFI; 403 DominatorTree &DT; 404 ProfileSummaryInfo &PSI; 405 RegionInfo &RI; 406 CHRStats Stats; 407 408 // All the true-biased regions in the function 409 DenseSet<Region *> TrueBiasedRegionsGlobal; 410 // All the false-biased regions in the function 411 DenseSet<Region *> FalseBiasedRegionsGlobal; 412 // All the true-biased selects in the function 413 DenseSet<SelectInst *> TrueBiasedSelectsGlobal; 414 // All the false-biased selects in the function 415 DenseSet<SelectInst *> FalseBiasedSelectsGlobal; 416 // A map from biased regions to their branch bias 417 DenseMap<Region *, BranchProbability> BranchBiasMap; 418 // A map from biased selects to their branch bias 419 DenseMap<SelectInst *, BranchProbability> SelectBiasMap; 420 // All the scopes. 421 DenseSet<CHRScope *> Scopes; 422 }; 423 424 } // end anonymous namespace 425 426 static inline 427 raw_ostream LLVM_ATTRIBUTE_UNUSED &operator<<(raw_ostream &OS, 428 const CHRStats &Stats) { 429 Stats.print(OS); 430 return OS; 431 } 432 433 static inline 434 raw_ostream &operator<<(raw_ostream &OS, const CHRScope &Scope) { 435 Scope.print(OS); 436 return OS; 437 } 438 439 static bool shouldApply(Function &F, ProfileSummaryInfo& PSI) { 440 if (ForceCHR) 441 return true; 442 443 if (!CHRModuleList.empty() || !CHRFunctionList.empty()) { 444 if (CHRModules.count(F.getParent()->getName())) 445 return true; 446 return CHRFunctions.count(F.getName()); 447 } 448 449 assert(PSI.hasProfileSummary() && "Empty PSI?"); 450 return PSI.isFunctionEntryHot(&F); 451 } 452 453 static void LLVM_ATTRIBUTE_UNUSED dumpIR(Function &F, const char *Label, 454 CHRStats *Stats) { 455 StringRef FuncName = F.getName(); 456 StringRef ModuleName = F.getParent()->getName(); 457 (void)(FuncName); // Unused in release build. 458 (void)(ModuleName); // Unused in release build. 459 CHR_DEBUG(dbgs() << "CHR IR dump " << Label << " " << ModuleName << " " 460 << FuncName); 461 if (Stats) 462 CHR_DEBUG(dbgs() << " " << *Stats); 463 CHR_DEBUG(dbgs() << "\n"); 464 CHR_DEBUG(F.dump()); 465 } 466 467 void CHRScope::print(raw_ostream &OS) const { 468 assert(RegInfos.size() > 0 && "Empty CHRScope"); 469 OS << "CHRScope["; 470 OS << RegInfos.size() << ", Regions["; 471 for (const RegInfo &RI : RegInfos) { 472 OS << RI.R->getNameStr(); 473 if (RI.HasBranch) 474 OS << " B"; 475 if (RI.Selects.size() > 0) 476 OS << " S" << RI.Selects.size(); 477 OS << ", "; 478 } 479 if (RegInfos[0].R->getParent()) { 480 OS << "], Parent " << RegInfos[0].R->getParent()->getNameStr(); 481 } else { 482 // top level region 483 OS << "]"; 484 } 485 OS << ", Subs["; 486 for (CHRScope *Sub : Subs) { 487 OS << *Sub << ", "; 488 } 489 OS << "]]"; 490 } 491 492 // Return true if the given instruction type can be hoisted by CHR. 493 static bool isHoistableInstructionType(Instruction *I) { 494 return isa<BinaryOperator>(I) || isa<CastInst>(I) || isa<SelectInst>(I) || 495 isa<GetElementPtrInst>(I) || isa<CmpInst>(I) || 496 isa<InsertElementInst>(I) || isa<ExtractElementInst>(I) || 497 isa<ShuffleVectorInst>(I) || isa<ExtractValueInst>(I) || 498 isa<InsertValueInst>(I); 499 } 500 501 // Return true if the given instruction can be hoisted by CHR. 502 static bool isHoistable(Instruction *I, DominatorTree &DT) { 503 if (!isHoistableInstructionType(I)) 504 return false; 505 return isSafeToSpeculativelyExecute(I, nullptr, &DT); 506 } 507 508 // Recursively traverse the use-def chains of the given value and return a set 509 // of the unhoistable base values defined within the scope (excluding the 510 // first-region entry block) or the (hoistable or unhoistable) base values that 511 // are defined outside (including the first-region entry block) of the 512 // scope. The returned set doesn't include constants. 513 static std::set<Value *> getBaseValues(Value *V, 514 DominatorTree &DT) { 515 std::set<Value *> Result; 516 if (auto *I = dyn_cast<Instruction>(V)) { 517 // We don't stop at a block that's not in the Scope because we would miss some 518 // instructions that are based on the same base values if we stop there. 519 if (!isHoistable(I, DT)) { 520 Result.insert(I); 521 return Result; 522 } 523 // I is hoistable above the Scope. 524 for (Value *Op : I->operands()) { 525 std::set<Value *> OpResult = getBaseValues(Op, DT); 526 Result.insert(OpResult.begin(), OpResult.end()); 527 } 528 return Result; 529 } 530 if (isa<Argument>(V)) { 531 Result.insert(V); 532 return Result; 533 } 534 // We don't include others like constants because those won't lead to any 535 // chance of folding of conditions (eg two bit checks merged into one check) 536 // after CHR. 537 return Result; // empty 538 } 539 540 // Return true if V is already hoisted or can be hoisted (along with its 541 // operands) above the insert point. When it returns true and HoistStops is 542 // non-null, the instructions to stop hoisting at through the use-def chains are 543 // inserted into HoistStops. 544 static bool 545 checkHoistValue(Value *V, Instruction *InsertPoint, DominatorTree &DT, 546 DenseSet<Instruction *> &Unhoistables, 547 DenseSet<Instruction *> *HoistStops) { 548 assert(InsertPoint && "Null InsertPoint"); 549 if (auto *I = dyn_cast<Instruction>(V)) { 550 assert(DT.getNode(I->getParent()) && "DT must contain I's parent block"); 551 assert(DT.getNode(InsertPoint->getParent()) && "DT must contain Destination"); 552 if (Unhoistables.count(I)) { 553 // Don't hoist if they are not to be hoisted. 554 return false; 555 } 556 if (DT.dominates(I, InsertPoint)) { 557 // We are already above the insert point. Stop here. 558 if (HoistStops) 559 HoistStops->insert(I); 560 return true; 561 } 562 // We aren't not above the insert point, check if we can hoist it above the 563 // insert point. 564 if (isHoistable(I, DT)) { 565 // Check operands first. 566 DenseSet<Instruction *> OpsHoistStops; 567 bool AllOpsHoisted = true; 568 for (Value *Op : I->operands()) { 569 if (!checkHoistValue(Op, InsertPoint, DT, Unhoistables, &OpsHoistStops)) { 570 AllOpsHoisted = false; 571 break; 572 } 573 } 574 if (AllOpsHoisted) { 575 CHR_DEBUG(dbgs() << "checkHoistValue " << *I << "\n"); 576 if (HoistStops) 577 HoistStops->insert(OpsHoistStops.begin(), OpsHoistStops.end()); 578 return true; 579 } 580 } 581 return false; 582 } 583 // Non-instructions are considered hoistable. 584 return true; 585 } 586 587 // Returns true and sets the true probability and false probability of an 588 // MD_prof metadata if it's well-formed. 589 static bool CheckMDProf(MDNode *MD, BranchProbability &TrueProb, 590 BranchProbability &FalseProb) { 591 if (!MD) return false; 592 MDString *MDName = cast<MDString>(MD->getOperand(0)); 593 if (MDName->getString() != "branch_weights" || 594 MD->getNumOperands() != 3) 595 return false; 596 ConstantInt *TrueWeight = mdconst::extract<ConstantInt>(MD->getOperand(1)); 597 ConstantInt *FalseWeight = mdconst::extract<ConstantInt>(MD->getOperand(2)); 598 if (!TrueWeight || !FalseWeight) 599 return false; 600 uint64_t TrueWt = TrueWeight->getValue().getZExtValue(); 601 uint64_t FalseWt = FalseWeight->getValue().getZExtValue(); 602 uint64_t SumWt = TrueWt + FalseWt; 603 604 assert(SumWt >= TrueWt && SumWt >= FalseWt && 605 "Overflow calculating branch probabilities."); 606 607 TrueProb = BranchProbability::getBranchProbability(TrueWt, SumWt); 608 FalseProb = BranchProbability::getBranchProbability(FalseWt, SumWt); 609 return true; 610 } 611 612 static BranchProbability getCHRBiasThreshold() { 613 return BranchProbability::getBranchProbability( 614 static_cast<uint64_t>(CHRBiasThreshold * 1000000), 1000000); 615 } 616 617 // A helper for CheckBiasedBranch and CheckBiasedSelect. If TrueProb >= 618 // CHRBiasThreshold, put Key into TrueSet and return true. If FalseProb >= 619 // CHRBiasThreshold, put Key into FalseSet and return true. Otherwise, return 620 // false. 621 template<typename K, typename S, typename M> 622 bool CheckBias(K *Key, BranchProbability TrueProb, BranchProbability FalseProb, 623 S &TrueSet, S &FalseSet, M &BiasMap) { 624 BranchProbability Threshold = getCHRBiasThreshold(); 625 if (TrueProb >= Threshold) { 626 TrueSet.insert(Key); 627 BiasMap[Key] = TrueProb; 628 return true; 629 } else if (FalseProb >= Threshold) { 630 FalseSet.insert(Key); 631 BiasMap[Key] = FalseProb; 632 return true; 633 } 634 return false; 635 } 636 637 // Returns true and insert a region into the right biased set and the map if the 638 // branch of the region is biased. 639 static bool CheckBiasedBranch(BranchInst *BI, Region *R, 640 DenseSet<Region *> &TrueBiasedRegionsGlobal, 641 DenseSet<Region *> &FalseBiasedRegionsGlobal, 642 DenseMap<Region *, BranchProbability> &BranchBiasMap) { 643 if (!BI->isConditional()) 644 return false; 645 BranchProbability ThenProb, ElseProb; 646 if (!CheckMDProf(BI->getMetadata(LLVMContext::MD_prof), 647 ThenProb, ElseProb)) 648 return false; 649 BasicBlock *IfThen = BI->getSuccessor(0); 650 BasicBlock *IfElse = BI->getSuccessor(1); 651 assert((IfThen == R->getExit() || IfElse == R->getExit()) && 652 IfThen != IfElse && 653 "Invariant from findScopes"); 654 if (IfThen == R->getExit()) { 655 // Swap them so that IfThen/ThenProb means going into the conditional code 656 // and IfElse/ElseProb means skipping it. 657 std::swap(IfThen, IfElse); 658 std::swap(ThenProb, ElseProb); 659 } 660 CHR_DEBUG(dbgs() << "BI " << *BI << " "); 661 CHR_DEBUG(dbgs() << "ThenProb " << ThenProb << " "); 662 CHR_DEBUG(dbgs() << "ElseProb " << ElseProb << "\n"); 663 return CheckBias(R, ThenProb, ElseProb, 664 TrueBiasedRegionsGlobal, FalseBiasedRegionsGlobal, 665 BranchBiasMap); 666 } 667 668 // Returns true and insert a select into the right biased set and the map if the 669 // select is biased. 670 static bool CheckBiasedSelect( 671 SelectInst *SI, Region *R, 672 DenseSet<SelectInst *> &TrueBiasedSelectsGlobal, 673 DenseSet<SelectInst *> &FalseBiasedSelectsGlobal, 674 DenseMap<SelectInst *, BranchProbability> &SelectBiasMap) { 675 BranchProbability TrueProb, FalseProb; 676 if (!CheckMDProf(SI->getMetadata(LLVMContext::MD_prof), 677 TrueProb, FalseProb)) 678 return false; 679 CHR_DEBUG(dbgs() << "SI " << *SI << " "); 680 CHR_DEBUG(dbgs() << "TrueProb " << TrueProb << " "); 681 CHR_DEBUG(dbgs() << "FalseProb " << FalseProb << "\n"); 682 return CheckBias(SI, TrueProb, FalseProb, 683 TrueBiasedSelectsGlobal, FalseBiasedSelectsGlobal, 684 SelectBiasMap); 685 } 686 687 // Returns the instruction at which to hoist the dependent condition values and 688 // insert the CHR branch for a region. This is the terminator branch in the 689 // entry block or the first select in the entry block, if any. 690 static Instruction* getBranchInsertPoint(RegInfo &RI) { 691 Region *R = RI.R; 692 BasicBlock *EntryBB = R->getEntry(); 693 // The hoist point is by default the terminator of the entry block, which is 694 // the same as the branch instruction if RI.HasBranch is true. 695 Instruction *HoistPoint = EntryBB->getTerminator(); 696 for (SelectInst *SI : RI.Selects) { 697 if (SI->getParent() == EntryBB) { 698 // Pick the first select in Selects in the entry block. Note Selects is 699 // sorted in the instruction order within a block (asserted below). 700 HoistPoint = SI; 701 break; 702 } 703 } 704 assert(HoistPoint && "Null HoistPoint"); 705 #ifndef NDEBUG 706 // Check that HoistPoint is the first one in Selects in the entry block, 707 // if any. 708 DenseSet<Instruction *> EntryBlockSelectSet; 709 for (SelectInst *SI : RI.Selects) { 710 if (SI->getParent() == EntryBB) { 711 EntryBlockSelectSet.insert(SI); 712 } 713 } 714 for (Instruction &I : *EntryBB) { 715 if (EntryBlockSelectSet.count(&I) > 0) { 716 assert(&I == HoistPoint && 717 "HoistPoint must be the first one in Selects"); 718 break; 719 } 720 } 721 #endif 722 return HoistPoint; 723 } 724 725 // Find a CHR scope in the given region. 726 CHRScope * CHR::findScope(Region *R) { 727 CHRScope *Result = nullptr; 728 BasicBlock *Entry = R->getEntry(); 729 BasicBlock *Exit = R->getExit(); // null if top level. 730 assert(Entry && "Entry must not be null"); 731 assert((Exit == nullptr) == (R->isTopLevelRegion()) && 732 "Only top level region has a null exit"); 733 if (Entry) 734 CHR_DEBUG(dbgs() << "Entry " << Entry->getName() << "\n"); 735 else 736 CHR_DEBUG(dbgs() << "Entry null\n"); 737 if (Exit) 738 CHR_DEBUG(dbgs() << "Exit " << Exit->getName() << "\n"); 739 else 740 CHR_DEBUG(dbgs() << "Exit null\n"); 741 // Exclude cases where Entry is part of a subregion (hence it doesn't belong 742 // to this region). 743 bool EntryInSubregion = RI.getRegionFor(Entry) != R; 744 if (EntryInSubregion) 745 return nullptr; 746 // Exclude loops 747 for (BasicBlock *Pred : predecessors(Entry)) 748 if (R->contains(Pred)) 749 return nullptr; 750 if (Exit) { 751 // Try to find an if-then block (check if R is an if-then). 752 // if (cond) { 753 // ... 754 // } 755 auto *BI = dyn_cast<BranchInst>(Entry->getTerminator()); 756 if (BI) 757 CHR_DEBUG(dbgs() << "BI.isConditional " << BI->isConditional() << "\n"); 758 else 759 CHR_DEBUG(dbgs() << "BI null\n"); 760 if (BI && BI->isConditional()) { 761 BasicBlock *S0 = BI->getSuccessor(0); 762 BasicBlock *S1 = BI->getSuccessor(1); 763 CHR_DEBUG(dbgs() << "S0 " << S0->getName() << "\n"); 764 CHR_DEBUG(dbgs() << "S1 " << S1->getName() << "\n"); 765 if (S0 != S1 && (S0 == Exit || S1 == Exit)) { 766 RegInfo RI(R); 767 RI.HasBranch = CheckBiasedBranch( 768 BI, R, TrueBiasedRegionsGlobal, FalseBiasedRegionsGlobal, 769 BranchBiasMap); 770 Result = new CHRScope(RI); 771 Scopes.insert(Result); 772 CHR_DEBUG(dbgs() << "Found a region with a branch\n"); 773 ++Stats.NumBranches; 774 } 775 } 776 } 777 { 778 // Try to look for selects in the direct child blocks (as opposed to in 779 // subregions) of R. 780 // ... 781 // if (..) { // Some subregion 782 // ... 783 // } 784 // if (..) { // Some subregion 785 // ... 786 // } 787 // ... 788 // a = cond ? b : c; 789 // ... 790 SmallVector<SelectInst *, 8> Selects; 791 for (RegionNode *E : R->elements()) { 792 if (E->isSubRegion()) 793 continue; 794 // This returns the basic block of E if E is a direct child of R (not a 795 // subregion.) 796 BasicBlock *BB = E->getEntry(); 797 // Need to push in the order to make it easier to find the first Select 798 // later. 799 for (Instruction &I : *BB) { 800 if (auto *SI = dyn_cast<SelectInst>(&I)) { 801 Selects.push_back(SI); 802 ++Stats.NumBranches; 803 } 804 } 805 } 806 if (Selects.size() > 0) { 807 auto AddSelects = [&](RegInfo &RI) { 808 for (auto *SI : Selects) 809 if (CheckBiasedSelect(SI, RI.R, 810 TrueBiasedSelectsGlobal, 811 FalseBiasedSelectsGlobal, 812 SelectBiasMap)) 813 RI.Selects.push_back(SI); 814 }; 815 if (!Result) { 816 CHR_DEBUG(dbgs() << "Found a select-only region\n"); 817 RegInfo RI(R); 818 AddSelects(RI); 819 Result = new CHRScope(RI); 820 Scopes.insert(Result); 821 } else { 822 CHR_DEBUG(dbgs() << "Found select(s) in a region with a branch\n"); 823 AddSelects(Result->RegInfos[0]); 824 } 825 } 826 } 827 828 if (Result) { 829 checkScopeHoistable(Result); 830 } 831 return Result; 832 } 833 834 // Check that any of the branch and the selects in the region could be 835 // hoisted above the the CHR branch insert point (the most dominating of 836 // them, either the branch (at the end of the first block) or the first 837 // select in the first block). If the branch can't be hoisted, drop the 838 // selects in the first blocks. 839 // 840 // For example, for the following scope/region with selects, we want to insert 841 // the merged branch right before the first select in the first/entry block by 842 // hoisting c1, c2, c3, and c4. 843 // 844 // // Branch insert point here. 845 // a = c1 ? b : c; // Select 1 846 // d = c2 ? e : f; // Select 2 847 // if (c3) { // Branch 848 // ... 849 // c4 = foo() // A call. 850 // g = c4 ? h : i; // Select 3 851 // } 852 // 853 // But suppose we can't hoist c4 because it's dependent on the preceding 854 // call. Then, we drop Select 3. Furthermore, if we can't hoist c2, we also drop 855 // Select 2. If we can't hoist c3, we drop Selects 1 & 2. 856 void CHR::checkScopeHoistable(CHRScope *Scope) { 857 RegInfo &RI = Scope->RegInfos[0]; 858 Region *R = RI.R; 859 BasicBlock *EntryBB = R->getEntry(); 860 auto *Branch = RI.HasBranch ? 861 cast<BranchInst>(EntryBB->getTerminator()) : nullptr; 862 SmallVector<SelectInst *, 8> &Selects = RI.Selects; 863 if (RI.HasBranch || !Selects.empty()) { 864 Instruction *InsertPoint = getBranchInsertPoint(RI); 865 CHR_DEBUG(dbgs() << "InsertPoint " << *InsertPoint << "\n"); 866 // Avoid a data dependence from a select or a branch to a(nother) 867 // select. Note no instruction can't data-depend on a branch (a branch 868 // instruction doesn't produce a value). 869 DenseSet<Instruction *> Unhoistables; 870 // Initialize Unhoistables with the selects. 871 for (SelectInst *SI : Selects) { 872 Unhoistables.insert(SI); 873 } 874 // Remove Selects that can't be hoisted. 875 for (auto it = Selects.begin(); it != Selects.end(); ) { 876 SelectInst *SI = *it; 877 if (SI == InsertPoint) { 878 ++it; 879 continue; 880 } 881 bool IsHoistable = checkHoistValue(SI->getCondition(), InsertPoint, 882 DT, Unhoistables, nullptr); 883 if (!IsHoistable) { 884 CHR_DEBUG(dbgs() << "Dropping select " << *SI << "\n"); 885 it = Selects.erase(it); 886 // Since we are dropping the select here, we also drop it from 887 // Unhoistables. 888 Unhoistables.erase(SI); 889 } else 890 ++it; 891 } 892 // Update InsertPoint after potentially removing selects. 893 InsertPoint = getBranchInsertPoint(RI); 894 CHR_DEBUG(dbgs() << "InsertPoint " << *InsertPoint << "\n"); 895 if (RI.HasBranch && InsertPoint != Branch) { 896 bool IsHoistable = checkHoistValue(Branch->getCondition(), InsertPoint, 897 DT, Unhoistables, nullptr); 898 if (!IsHoistable) { 899 // If the branch isn't hoistable, drop the selects in the entry 900 // block, preferring the branch, which makes the branch the hoist 901 // point. 902 assert(InsertPoint != Branch && "Branch must not be the hoist point"); 903 CHR_DEBUG(dbgs() << "Dropping selects in entry block \n"); 904 CHR_DEBUG( 905 for (SelectInst *SI : Selects) { 906 dbgs() << "SI " << *SI << "\n"; 907 }); 908 Selects.erase(std::remove_if(Selects.begin(), Selects.end(), 909 [EntryBB](SelectInst *SI) { 910 return SI->getParent() == EntryBB; 911 }), Selects.end()); 912 Unhoistables.clear(); 913 InsertPoint = Branch; 914 } 915 } 916 CHR_DEBUG(dbgs() << "InsertPoint " << *InsertPoint << "\n"); 917 #ifndef NDEBUG 918 if (RI.HasBranch) { 919 assert(!DT.dominates(Branch, InsertPoint) && 920 "Branch can't be already above the hoist point"); 921 assert(checkHoistValue(Branch->getCondition(), InsertPoint, 922 DT, Unhoistables, nullptr) && 923 "checkHoistValue for branch"); 924 } 925 for (auto *SI : Selects) { 926 assert(!DT.dominates(SI, InsertPoint) && 927 "SI can't be already above the hoist point"); 928 assert(checkHoistValue(SI->getCondition(), InsertPoint, DT, 929 Unhoistables, nullptr) && 930 "checkHoistValue for selects"); 931 } 932 CHR_DEBUG(dbgs() << "Result\n"); 933 if (RI.HasBranch) { 934 CHR_DEBUG(dbgs() << "BI " << *Branch << "\n"); 935 } 936 for (auto *SI : Selects) { 937 CHR_DEBUG(dbgs() << "SI " << *SI << "\n"); 938 } 939 #endif 940 } 941 } 942 943 // Traverse the region tree, find all nested scopes and merge them if possible. 944 CHRScope * CHR::findScopes(Region *R, Region *NextRegion, Region *ParentRegion, 945 SmallVectorImpl<CHRScope *> &Scopes) { 946 CHR_DEBUG(dbgs() << "findScopes " << R->getNameStr() << "\n"); 947 CHRScope *Result = findScope(R); 948 // Visit subscopes. 949 CHRScope *ConsecutiveSubscope = nullptr; 950 SmallVector<CHRScope *, 8> Subscopes; 951 for (auto It = R->begin(); It != R->end(); ++It) { 952 const std::unique_ptr<Region> &SubR = *It; 953 auto Next_It = std::next(It); 954 Region *NextSubR = Next_It != R->end() ? Next_It->get() : nullptr; 955 CHR_DEBUG(dbgs() << "Looking at subregion " << SubR.get()->getNameStr() 956 << "\n"); 957 CHRScope *SubCHRScope = findScopes(SubR.get(), NextSubR, R, Scopes); 958 if (SubCHRScope) { 959 CHR_DEBUG(dbgs() << "Subregion Scope " << *SubCHRScope << "\n"); 960 } else { 961 CHR_DEBUG(dbgs() << "Subregion Scope null\n"); 962 } 963 if (SubCHRScope) { 964 if (!ConsecutiveSubscope) 965 ConsecutiveSubscope = SubCHRScope; 966 else if (!ConsecutiveSubscope->appendable(SubCHRScope)) { 967 Subscopes.push_back(ConsecutiveSubscope); 968 ConsecutiveSubscope = SubCHRScope; 969 } else 970 ConsecutiveSubscope->append(SubCHRScope); 971 } else { 972 if (ConsecutiveSubscope) { 973 Subscopes.push_back(ConsecutiveSubscope); 974 } 975 ConsecutiveSubscope = nullptr; 976 } 977 } 978 if (ConsecutiveSubscope) { 979 Subscopes.push_back(ConsecutiveSubscope); 980 } 981 for (CHRScope *Sub : Subscopes) { 982 if (Result) { 983 // Combine it with the parent. 984 Result->addSub(Sub); 985 } else { 986 // Push Subscopes as they won't be combined with the parent. 987 Scopes.push_back(Sub); 988 } 989 } 990 return Result; 991 } 992 993 static DenseSet<Value *> getCHRConditionValuesForRegion(RegInfo &RI) { 994 DenseSet<Value *> ConditionValues; 995 if (RI.HasBranch) { 996 auto *BI = cast<BranchInst>(RI.R->getEntry()->getTerminator()); 997 ConditionValues.insert(BI->getCondition()); 998 } 999 for (SelectInst *SI : RI.Selects) { 1000 ConditionValues.insert(SI->getCondition()); 1001 } 1002 return ConditionValues; 1003 } 1004 1005 1006 // Determine whether to split a scope depending on the sets of the branch 1007 // condition values of the previous region and the current region. We split 1008 // (return true) it if 1) the condition values of the inner/lower scope can't be 1009 // hoisted up to the outer/upper scope, or 2) the two sets of the condition 1010 // values have an empty intersection (because the combined branch conditions 1011 // won't probably lead to a simpler combined condition). 1012 static bool shouldSplit(Instruction *InsertPoint, 1013 DenseSet<Value *> &PrevConditionValues, 1014 DenseSet<Value *> &ConditionValues, 1015 DominatorTree &DT, 1016 DenseSet<Instruction *> &Unhoistables) { 1017 CHR_DEBUG( 1018 dbgs() << "shouldSplit " << *InsertPoint << " PrevConditionValues "; 1019 for (Value *V : PrevConditionValues) { 1020 dbgs() << *V << ", "; 1021 } 1022 dbgs() << " ConditionValues "; 1023 for (Value *V : ConditionValues) { 1024 dbgs() << *V << ", "; 1025 } 1026 dbgs() << "\n"); 1027 assert(InsertPoint && "Null InsertPoint"); 1028 // If any of Bases isn't hoistable to the hoist point, split. 1029 for (Value *V : ConditionValues) { 1030 if (!checkHoistValue(V, InsertPoint, DT, Unhoistables, nullptr)) { 1031 CHR_DEBUG(dbgs() << "Split. checkHoistValue false " << *V << "\n"); 1032 return true; // Not hoistable, split. 1033 } 1034 } 1035 // If PrevConditionValues or ConditionValues is empty, don't split to avoid 1036 // unnecessary splits at scopes with no branch/selects. If 1037 // PrevConditionValues and ConditionValues don't intersect at all, split. 1038 if (!PrevConditionValues.empty() && !ConditionValues.empty()) { 1039 // Use std::set as DenseSet doesn't work with set_intersection. 1040 std::set<Value *> PrevBases, Bases; 1041 for (Value *V : PrevConditionValues) { 1042 std::set<Value *> BaseValues = getBaseValues(V, DT); 1043 PrevBases.insert(BaseValues.begin(), BaseValues.end()); 1044 } 1045 for (Value *V : ConditionValues) { 1046 std::set<Value *> BaseValues = getBaseValues(V, DT); 1047 Bases.insert(BaseValues.begin(), BaseValues.end()); 1048 } 1049 CHR_DEBUG( 1050 dbgs() << "PrevBases "; 1051 for (Value *V : PrevBases) { 1052 dbgs() << *V << ", "; 1053 } 1054 dbgs() << " Bases "; 1055 for (Value *V : Bases) { 1056 dbgs() << *V << ", "; 1057 } 1058 dbgs() << "\n"); 1059 std::set<Value *> Intersection; 1060 std::set_intersection(PrevBases.begin(), PrevBases.end(), 1061 Bases.begin(), Bases.end(), 1062 std::inserter(Intersection, Intersection.begin())); 1063 if (Intersection.empty()) { 1064 // Empty intersection, split. 1065 CHR_DEBUG(dbgs() << "Split. Intersection empty\n"); 1066 return true; 1067 } 1068 } 1069 CHR_DEBUG(dbgs() << "No split\n"); 1070 return false; // Don't split. 1071 } 1072 1073 static void GetSelectsInScope(CHRScope *Scope, 1074 DenseSet<Instruction *> &Output) { 1075 for (RegInfo &RI : Scope->RegInfos) { 1076 for (SelectInst *SI : RI.Selects) { 1077 Output.insert(SI); 1078 } 1079 } 1080 for (CHRScope *Sub : Scope->Subs) { 1081 GetSelectsInScope(Sub, Output); 1082 } 1083 } 1084 1085 void CHR::splitScopes(SmallVectorImpl<CHRScope *> &Input, 1086 SmallVectorImpl<CHRScope *> &Output) { 1087 for (CHRScope *Scope : Input) { 1088 assert(!Scope->BranchInsertPoint && 1089 "BranchInsertPoint must not be set"); 1090 DenseSet<Instruction *> Unhoistables; 1091 GetSelectsInScope(Scope, Unhoistables); 1092 splitScope(Scope, nullptr, nullptr, nullptr, Output, Unhoistables); 1093 } 1094 #ifndef NDEBUG 1095 for (CHRScope *Scope : Output) { 1096 assert(Scope->BranchInsertPoint && "BranchInsertPoint must be set"); 1097 } 1098 #endif 1099 } 1100 1101 SmallVector<CHRScope *, 8> CHR::splitScope( 1102 CHRScope *Scope, 1103 CHRScope *Outer, 1104 DenseSet<Value *> *OuterConditionValues, 1105 Instruction *OuterInsertPoint, 1106 SmallVectorImpl<CHRScope *> &Output, 1107 DenseSet<Instruction *> &Unhoistables) { 1108 if (Outer) { 1109 assert(OuterConditionValues && "Null OuterConditionValues"); 1110 assert(OuterInsertPoint && "Null OuterInsertPoint"); 1111 } 1112 bool PrevSplitFromOuter = true; 1113 DenseSet<Value *> PrevConditionValues; 1114 Instruction *PrevInsertPoint = nullptr; 1115 SmallVector<CHRScope *, 8> Splits; 1116 SmallVector<bool, 8> SplitsSplitFromOuter; 1117 SmallVector<DenseSet<Value *>, 8> SplitsConditionValues; 1118 SmallVector<Instruction *, 8> SplitsInsertPoints; 1119 SmallVector<RegInfo, 8> RegInfos(Scope->RegInfos); // Copy 1120 for (RegInfo &RI : RegInfos) { 1121 Instruction *InsertPoint = getBranchInsertPoint(RI); 1122 DenseSet<Value *> ConditionValues = getCHRConditionValuesForRegion(RI); 1123 CHR_DEBUG( 1124 dbgs() << "ConditionValues "; 1125 for (Value *V : ConditionValues) { 1126 dbgs() << *V << ", "; 1127 } 1128 dbgs() << "\n"); 1129 if (RI.R == RegInfos[0].R) { 1130 // First iteration. Check to see if we should split from the outer. 1131 if (Outer) { 1132 CHR_DEBUG(dbgs() << "Outer " << *Outer << "\n"); 1133 CHR_DEBUG(dbgs() << "Should split from outer at " 1134 << RI.R->getNameStr() << "\n"); 1135 if (shouldSplit(OuterInsertPoint, *OuterConditionValues, 1136 ConditionValues, DT, Unhoistables)) { 1137 PrevConditionValues = ConditionValues; 1138 PrevInsertPoint = InsertPoint; 1139 } else { 1140 // Not splitting from the outer. Use the outer bases and insert 1141 // point. Union the bases. 1142 PrevSplitFromOuter = false; 1143 PrevConditionValues = *OuterConditionValues; 1144 PrevConditionValues.insert(ConditionValues.begin(), 1145 ConditionValues.end()); 1146 PrevInsertPoint = OuterInsertPoint; 1147 } 1148 } else { 1149 CHR_DEBUG(dbgs() << "Outer null\n"); 1150 PrevConditionValues = ConditionValues; 1151 PrevInsertPoint = InsertPoint; 1152 } 1153 } else { 1154 CHR_DEBUG(dbgs() << "Should split from prev at " 1155 << RI.R->getNameStr() << "\n"); 1156 if (shouldSplit(PrevInsertPoint, PrevConditionValues, ConditionValues, 1157 DT, Unhoistables)) { 1158 CHRScope *Tail = Scope->split(RI.R); 1159 Scopes.insert(Tail); 1160 Splits.push_back(Scope); 1161 SplitsSplitFromOuter.push_back(PrevSplitFromOuter); 1162 SplitsConditionValues.push_back(PrevConditionValues); 1163 SplitsInsertPoints.push_back(PrevInsertPoint); 1164 Scope = Tail; 1165 PrevConditionValues = ConditionValues; 1166 PrevInsertPoint = InsertPoint; 1167 PrevSplitFromOuter = true; 1168 } else { 1169 // Not splitting. Union the bases. Keep the hoist point. 1170 PrevConditionValues.insert(ConditionValues.begin(), ConditionValues.end()); 1171 } 1172 } 1173 } 1174 Splits.push_back(Scope); 1175 SplitsSplitFromOuter.push_back(PrevSplitFromOuter); 1176 SplitsConditionValues.push_back(PrevConditionValues); 1177 assert(PrevInsertPoint && "Null PrevInsertPoint"); 1178 SplitsInsertPoints.push_back(PrevInsertPoint); 1179 assert(Splits.size() == SplitsConditionValues.size() && 1180 Splits.size() == SplitsSplitFromOuter.size() && 1181 Splits.size() == SplitsInsertPoints.size() && "Mismatching sizes"); 1182 for (size_t I = 0; I < Splits.size(); ++I) { 1183 CHRScope *Split = Splits[I]; 1184 DenseSet<Value *> &SplitConditionValues = SplitsConditionValues[I]; 1185 Instruction *SplitInsertPoint = SplitsInsertPoints[I]; 1186 SmallVector<CHRScope *, 8> NewSubs; 1187 DenseSet<Instruction *> SplitUnhoistables; 1188 GetSelectsInScope(Split, SplitUnhoistables); 1189 for (CHRScope *Sub : Split->Subs) { 1190 SmallVector<CHRScope *, 8> SubSplits = splitScope( 1191 Sub, Split, &SplitConditionValues, SplitInsertPoint, Output, 1192 SplitUnhoistables); 1193 NewSubs.insert(NewSubs.end(), SubSplits.begin(), SubSplits.end()); 1194 } 1195 Split->Subs = NewSubs; 1196 } 1197 SmallVector<CHRScope *, 8> Result; 1198 for (size_t I = 0; I < Splits.size(); ++I) { 1199 CHRScope *Split = Splits[I]; 1200 if (SplitsSplitFromOuter[I]) { 1201 // Split from the outer. 1202 Output.push_back(Split); 1203 Split->BranchInsertPoint = SplitsInsertPoints[I]; 1204 CHR_DEBUG(dbgs() << "BranchInsertPoint " << *SplitsInsertPoints[I] 1205 << "\n"); 1206 } else { 1207 // Connected to the outer. 1208 Result.push_back(Split); 1209 } 1210 } 1211 if (!Outer) 1212 assert(Result.empty() && 1213 "If no outer (top-level), must return no nested ones"); 1214 return Result; 1215 } 1216 1217 void CHR::classifyBiasedScopes(SmallVectorImpl<CHRScope *> &Scopes) { 1218 for (CHRScope *Scope : Scopes) { 1219 assert(Scope->TrueBiasedRegions.empty() && Scope->FalseBiasedRegions.empty() && "Empty"); 1220 classifyBiasedScopes(Scope, Scope); 1221 CHR_DEBUG( 1222 dbgs() << "classifyBiasedScopes " << *Scope << "\n"; 1223 dbgs() << "TrueBiasedRegions "; 1224 for (Region *R : Scope->TrueBiasedRegions) { 1225 dbgs() << R->getNameStr() << ", "; 1226 } 1227 dbgs() << "\n"; 1228 dbgs() << "FalseBiasedRegions "; 1229 for (Region *R : Scope->FalseBiasedRegions) { 1230 dbgs() << R->getNameStr() << ", "; 1231 } 1232 dbgs() << "\n"; 1233 dbgs() << "TrueBiasedSelects "; 1234 for (SelectInst *SI : Scope->TrueBiasedSelects) { 1235 dbgs() << *SI << ", "; 1236 } 1237 dbgs() << "\n"; 1238 dbgs() << "FalseBiasedSelects "; 1239 for (SelectInst *SI : Scope->FalseBiasedSelects) { 1240 dbgs() << *SI << ", "; 1241 } 1242 dbgs() << "\n";); 1243 } 1244 } 1245 1246 void CHR::classifyBiasedScopes(CHRScope *Scope, CHRScope *OutermostScope) { 1247 for (RegInfo &RI : Scope->RegInfos) { 1248 if (RI.HasBranch) { 1249 Region *R = RI.R; 1250 if (TrueBiasedRegionsGlobal.count(R) > 0) 1251 OutermostScope->TrueBiasedRegions.insert(R); 1252 else if (FalseBiasedRegionsGlobal.count(R) > 0) 1253 OutermostScope->FalseBiasedRegions.insert(R); 1254 else 1255 llvm_unreachable("Must be biased"); 1256 } 1257 for (SelectInst *SI : RI.Selects) { 1258 if (TrueBiasedSelectsGlobal.count(SI) > 0) 1259 OutermostScope->TrueBiasedSelects.insert(SI); 1260 else if (FalseBiasedSelectsGlobal.count(SI) > 0) 1261 OutermostScope->FalseBiasedSelects.insert(SI); 1262 else 1263 llvm_unreachable("Must be biased"); 1264 } 1265 } 1266 for (CHRScope *Sub : Scope->Subs) { 1267 classifyBiasedScopes(Sub, OutermostScope); 1268 } 1269 } 1270 1271 static bool hasAtLeastTwoBiasedBranches(CHRScope *Scope) { 1272 unsigned NumBiased = Scope->TrueBiasedRegions.size() + 1273 Scope->FalseBiasedRegions.size() + 1274 Scope->TrueBiasedSelects.size() + 1275 Scope->FalseBiasedSelects.size(); 1276 return NumBiased >= CHRMergeThreshold; 1277 } 1278 1279 void CHR::filterScopes(SmallVectorImpl<CHRScope *> &Input, 1280 SmallVectorImpl<CHRScope *> &Output) { 1281 for (CHRScope *Scope : Input) { 1282 // Filter out the ones with only one region and no subs. 1283 if (!hasAtLeastTwoBiasedBranches(Scope)) { 1284 CHR_DEBUG(dbgs() << "Filtered out by biased branches truthy-regions " 1285 << Scope->TrueBiasedRegions.size() 1286 << " falsy-regions " << Scope->FalseBiasedRegions.size() 1287 << " true-selects " << Scope->TrueBiasedSelects.size() 1288 << " false-selects " << Scope->FalseBiasedSelects.size() << "\n"); 1289 continue; 1290 } 1291 Output.push_back(Scope); 1292 } 1293 } 1294 1295 void CHR::setCHRRegions(SmallVectorImpl<CHRScope *> &Input, 1296 SmallVectorImpl<CHRScope *> &Output) { 1297 for (CHRScope *Scope : Input) { 1298 assert(Scope->HoistStopMap.empty() && Scope->CHRRegions.empty() && 1299 "Empty"); 1300 setCHRRegions(Scope, Scope); 1301 Output.push_back(Scope); 1302 CHR_DEBUG( 1303 dbgs() << "setCHRRegions HoistStopMap " << *Scope << "\n"; 1304 for (auto pair : Scope->HoistStopMap) { 1305 Region *R = pair.first; 1306 dbgs() << "Region " << R->getNameStr() << "\n"; 1307 for (Instruction *I : pair.second) { 1308 dbgs() << "HoistStop " << *I << "\n"; 1309 } 1310 } 1311 dbgs() << "CHRRegions" << "\n"; 1312 for (RegInfo &RI : Scope->CHRRegions) { 1313 dbgs() << RI.R->getNameStr() << "\n"; 1314 }); 1315 } 1316 } 1317 1318 void CHR::setCHRRegions(CHRScope *Scope, CHRScope *OutermostScope) { 1319 DenseSet<Instruction *> Unhoistables; 1320 // Put the biased selects in Unhoistables because they should stay where they 1321 // are and constant-folded after CHR (in case one biased select or a branch 1322 // can depend on another biased select.) 1323 for (RegInfo &RI : Scope->RegInfos) { 1324 for (SelectInst *SI : RI.Selects) { 1325 Unhoistables.insert(SI); 1326 } 1327 } 1328 Instruction *InsertPoint = OutermostScope->BranchInsertPoint; 1329 for (RegInfo &RI : Scope->RegInfos) { 1330 Region *R = RI.R; 1331 DenseSet<Instruction *> HoistStops; 1332 bool IsHoisted = false; 1333 if (RI.HasBranch) { 1334 assert((OutermostScope->TrueBiasedRegions.count(R) > 0 || 1335 OutermostScope->FalseBiasedRegions.count(R) > 0) && 1336 "Must be truthy or falsy"); 1337 auto *BI = cast<BranchInst>(R->getEntry()->getTerminator()); 1338 // Note checkHoistValue fills in HoistStops. 1339 bool IsHoistable = checkHoistValue(BI->getCondition(), InsertPoint, DT, 1340 Unhoistables, &HoistStops); 1341 assert(IsHoistable && "Must be hoistable"); 1342 (void)(IsHoistable); // Unused in release build 1343 IsHoisted = true; 1344 } 1345 for (SelectInst *SI : RI.Selects) { 1346 assert((OutermostScope->TrueBiasedSelects.count(SI) > 0 || 1347 OutermostScope->FalseBiasedSelects.count(SI) > 0) && 1348 "Must be true or false biased"); 1349 // Note checkHoistValue fills in HoistStops. 1350 bool IsHoistable = checkHoistValue(SI->getCondition(), InsertPoint, DT, 1351 Unhoistables, &HoistStops); 1352 assert(IsHoistable && "Must be hoistable"); 1353 (void)(IsHoistable); // Unused in release build 1354 IsHoisted = true; 1355 } 1356 if (IsHoisted) { 1357 OutermostScope->CHRRegions.push_back(RI); 1358 OutermostScope->HoistStopMap[R] = HoistStops; 1359 } 1360 } 1361 for (CHRScope *Sub : Scope->Subs) 1362 setCHRRegions(Sub, OutermostScope); 1363 } 1364 1365 bool CHRScopeSorter(CHRScope *Scope1, CHRScope *Scope2) { 1366 return Scope1->RegInfos[0].R->getDepth() < Scope2->RegInfos[0].R->getDepth(); 1367 } 1368 1369 void CHR::sortScopes(SmallVectorImpl<CHRScope *> &Input, 1370 SmallVectorImpl<CHRScope *> &Output) { 1371 Output.resize(Input.size()); 1372 std::copy(Input.begin(), Input.end(), Output.begin()); 1373 std::stable_sort(Output.begin(), Output.end(), CHRScopeSorter); 1374 } 1375 1376 // Return true if V is already hoisted or was hoisted (along with its operands) 1377 // to the insert point. 1378 static void hoistValue(Value *V, Instruction *HoistPoint, Region *R, 1379 HoistStopMapTy &HoistStopMap, 1380 DenseSet<Instruction *> &HoistedSet, 1381 DenseSet<PHINode *> &TrivialPHIs) { 1382 auto IT = HoistStopMap.find(R); 1383 assert(IT != HoistStopMap.end() && "Region must be in hoist stop map"); 1384 DenseSet<Instruction *> &HoistStops = IT->second; 1385 if (auto *I = dyn_cast<Instruction>(V)) { 1386 if (I == HoistPoint) 1387 return; 1388 if (HoistStops.count(I)) 1389 return; 1390 if (auto *PN = dyn_cast<PHINode>(I)) 1391 if (TrivialPHIs.count(PN)) 1392 // The trivial phi inserted by the previous CHR scope could replace a 1393 // non-phi in HoistStops. Note that since this phi is at the exit of a 1394 // previous CHR scope, which dominates this scope, it's safe to stop 1395 // hoisting there. 1396 return; 1397 if (HoistedSet.count(I)) 1398 // Already hoisted, return. 1399 return; 1400 assert(isHoistableInstructionType(I) && "Unhoistable instruction type"); 1401 for (Value *Op : I->operands()) { 1402 hoistValue(Op, HoistPoint, R, HoistStopMap, HoistedSet, TrivialPHIs); 1403 } 1404 I->moveBefore(HoistPoint); 1405 HoistedSet.insert(I); 1406 CHR_DEBUG(dbgs() << "hoistValue " << *I << "\n"); 1407 } 1408 } 1409 1410 // Hoist the dependent condition values of the branches and the selects in the 1411 // scope to the insert point. 1412 static void hoistScopeConditions(CHRScope *Scope, Instruction *HoistPoint, 1413 DenseSet<PHINode *> &TrivialPHIs) { 1414 DenseSet<Instruction *> HoistedSet; 1415 for (const RegInfo &RI : Scope->CHRRegions) { 1416 Region *R = RI.R; 1417 bool IsTrueBiased = Scope->TrueBiasedRegions.count(R); 1418 bool IsFalseBiased = Scope->FalseBiasedRegions.count(R); 1419 if (RI.HasBranch && (IsTrueBiased || IsFalseBiased)) { 1420 auto *BI = cast<BranchInst>(R->getEntry()->getTerminator()); 1421 hoistValue(BI->getCondition(), HoistPoint, R, Scope->HoistStopMap, 1422 HoistedSet, TrivialPHIs); 1423 } 1424 for (SelectInst *SI : RI.Selects) { 1425 bool IsTrueBiased = Scope->TrueBiasedSelects.count(SI); 1426 bool IsFalseBiased = Scope->FalseBiasedSelects.count(SI); 1427 if (!(IsTrueBiased || IsFalseBiased)) 1428 continue; 1429 hoistValue(SI->getCondition(), HoistPoint, R, Scope->HoistStopMap, 1430 HoistedSet, TrivialPHIs); 1431 } 1432 } 1433 } 1434 1435 // Negate the predicate if an ICmp if it's used only by branches or selects by 1436 // swapping the operands of the branches or the selects. Returns true if success. 1437 static bool NegateICmpIfUsedByBranchOrSelectOnly(ICmpInst *ICmp, 1438 Instruction *ExcludedUser, 1439 CHRScope *Scope) { 1440 for (User *U : ICmp->users()) { 1441 if (U == ExcludedUser) 1442 continue; 1443 if (isa<BranchInst>(U) && cast<BranchInst>(U)->isConditional()) 1444 continue; 1445 if (isa<SelectInst>(U) && cast<SelectInst>(U)->getCondition() == ICmp) 1446 continue; 1447 return false; 1448 } 1449 for (User *U : ICmp->users()) { 1450 if (U == ExcludedUser) 1451 continue; 1452 if (auto *BI = dyn_cast<BranchInst>(U)) { 1453 assert(BI->isConditional() && "Must be conditional"); 1454 BI->swapSuccessors(); 1455 // Don't need to swap this in terms of 1456 // TrueBiasedRegions/FalseBiasedRegions because true-based/false-based 1457 // mean whehter the branch is likely go into the if-then rather than 1458 // successor0/successor1 and because we can tell which edge is the then or 1459 // the else one by comparing the destination to the region exit block. 1460 continue; 1461 } 1462 if (auto *SI = dyn_cast<SelectInst>(U)) { 1463 // Swap operands 1464 Value *TrueValue = SI->getTrueValue(); 1465 Value *FalseValue = SI->getFalseValue(); 1466 SI->setTrueValue(FalseValue); 1467 SI->setFalseValue(TrueValue); 1468 SI->swapProfMetadata(); 1469 if (Scope->TrueBiasedSelects.count(SI)) { 1470 assert(Scope->FalseBiasedSelects.count(SI) == 0 && 1471 "Must not be already in"); 1472 Scope->FalseBiasedSelects.insert(SI); 1473 } else if (Scope->FalseBiasedSelects.count(SI)) { 1474 assert(Scope->TrueBiasedSelects.count(SI) == 0 && 1475 "Must not be already in"); 1476 Scope->TrueBiasedSelects.insert(SI); 1477 } 1478 continue; 1479 } 1480 llvm_unreachable("Must be a branch or a select"); 1481 } 1482 ICmp->setPredicate(CmpInst::getInversePredicate(ICmp->getPredicate())); 1483 return true; 1484 } 1485 1486 // A helper for transformScopes. Insert a trivial phi at the scope exit block 1487 // for a value that's defined in the scope but used outside it (meaning it's 1488 // alive at the exit block). 1489 static void insertTrivialPHIs(CHRScope *Scope, 1490 BasicBlock *EntryBlock, BasicBlock *ExitBlock, 1491 DenseSet<PHINode *> &TrivialPHIs) { 1492 DenseSet<BasicBlock *> BlocksInScopeSet; 1493 SmallVector<BasicBlock *, 8> BlocksInScopeVec; 1494 for (RegInfo &RI : Scope->RegInfos) { 1495 for (BasicBlock *BB : RI.R->blocks()) { // This includes the blocks in the 1496 // sub-Scopes. 1497 BlocksInScopeSet.insert(BB); 1498 BlocksInScopeVec.push_back(BB); 1499 } 1500 } 1501 CHR_DEBUG( 1502 dbgs() << "Inserting redudant phis\n"; 1503 for (BasicBlock *BB : BlocksInScopeVec) { 1504 dbgs() << "BlockInScope " << BB->getName() << "\n"; 1505 }); 1506 for (BasicBlock *BB : BlocksInScopeVec) { 1507 for (Instruction &I : *BB) { 1508 SmallVector<Instruction *, 8> Users; 1509 for (User *U : I.users()) { 1510 if (auto *UI = dyn_cast<Instruction>(U)) { 1511 if (BlocksInScopeSet.count(UI->getParent()) == 0 && 1512 // Unless there's already a phi for I at the exit block. 1513 !(isa<PHINode>(UI) && UI->getParent() == ExitBlock)) { 1514 CHR_DEBUG(dbgs() << "V " << I << "\n"); 1515 CHR_DEBUG(dbgs() << "Used outside scope by user " << *UI << "\n"); 1516 Users.push_back(UI); 1517 } else if (UI->getParent() == EntryBlock && isa<PHINode>(UI)) { 1518 // There's a loop backedge from a block that's dominated by this 1519 // scope to the entry block. 1520 CHR_DEBUG(dbgs() << "V " << I << "\n"); 1521 CHR_DEBUG(dbgs() 1522 << "Used at entry block (for a back edge) by a phi user " 1523 << *UI << "\n"); 1524 Users.push_back(UI); 1525 } 1526 } 1527 } 1528 if (Users.size() > 0) { 1529 // Insert a trivial phi for I (phi [&I, P0], [&I, P1], ...) at 1530 // ExitBlock. Replace I with the new phi in UI unless UI is another 1531 // phi at ExitBlock. 1532 unsigned PredCount = std::distance(pred_begin(ExitBlock), 1533 pred_end(ExitBlock)); 1534 PHINode *PN = PHINode::Create(I.getType(), PredCount, "", 1535 &ExitBlock->front()); 1536 for (BasicBlock *Pred : predecessors(ExitBlock)) { 1537 PN->addIncoming(&I, Pred); 1538 } 1539 TrivialPHIs.insert(PN); 1540 CHR_DEBUG(dbgs() << "Insert phi " << *PN << "\n"); 1541 for (Instruction *UI : Users) { 1542 for (unsigned J = 0, NumOps = UI->getNumOperands(); J < NumOps; ++J) { 1543 if (UI->getOperand(J) == &I) { 1544 UI->setOperand(J, PN); 1545 } 1546 } 1547 CHR_DEBUG(dbgs() << "Updated user " << *UI << "\n"); 1548 } 1549 } 1550 } 1551 } 1552 } 1553 1554 // Assert that all the CHR regions of the scope have a biased branch or select. 1555 static void LLVM_ATTRIBUTE_UNUSED 1556 assertCHRRegionsHaveBiasedBranchOrSelect(CHRScope *Scope) { 1557 #ifndef NDEBUG 1558 auto HasBiasedBranchOrSelect = [](RegInfo &RI, CHRScope *Scope) { 1559 if (Scope->TrueBiasedRegions.count(RI.R) || 1560 Scope->FalseBiasedRegions.count(RI.R)) 1561 return true; 1562 for (SelectInst *SI : RI.Selects) 1563 if (Scope->TrueBiasedSelects.count(SI) || 1564 Scope->FalseBiasedSelects.count(SI)) 1565 return true; 1566 return false; 1567 }; 1568 for (RegInfo &RI : Scope->CHRRegions) { 1569 assert(HasBiasedBranchOrSelect(RI, Scope) && 1570 "Must have biased branch or select"); 1571 } 1572 #endif 1573 } 1574 1575 // Assert that all the condition values of the biased branches and selects have 1576 // been hoisted to the pre-entry block or outside of the scope. 1577 static void LLVM_ATTRIBUTE_UNUSED assertBranchOrSelectConditionHoisted( 1578 CHRScope *Scope, BasicBlock *PreEntryBlock) { 1579 CHR_DEBUG(dbgs() << "Biased regions condition values \n"); 1580 for (RegInfo &RI : Scope->CHRRegions) { 1581 Region *R = RI.R; 1582 bool IsTrueBiased = Scope->TrueBiasedRegions.count(R); 1583 bool IsFalseBiased = Scope->FalseBiasedRegions.count(R); 1584 if (RI.HasBranch && (IsTrueBiased || IsFalseBiased)) { 1585 auto *BI = cast<BranchInst>(R->getEntry()->getTerminator()); 1586 Value *V = BI->getCondition(); 1587 CHR_DEBUG(dbgs() << *V << "\n"); 1588 if (auto *I = dyn_cast<Instruction>(V)) { 1589 (void)(I); // Unused in release build. 1590 assert((I->getParent() == PreEntryBlock || 1591 !Scope->contains(I)) && 1592 "Must have been hoisted to PreEntryBlock or outside the scope"); 1593 } 1594 } 1595 for (SelectInst *SI : RI.Selects) { 1596 bool IsTrueBiased = Scope->TrueBiasedSelects.count(SI); 1597 bool IsFalseBiased = Scope->FalseBiasedSelects.count(SI); 1598 if (!(IsTrueBiased || IsFalseBiased)) 1599 continue; 1600 Value *V = SI->getCondition(); 1601 CHR_DEBUG(dbgs() << *V << "\n"); 1602 if (auto *I = dyn_cast<Instruction>(V)) { 1603 (void)(I); // Unused in release build. 1604 assert((I->getParent() == PreEntryBlock || 1605 !Scope->contains(I)) && 1606 "Must have been hoisted to PreEntryBlock or outside the scope"); 1607 } 1608 } 1609 } 1610 } 1611 1612 void CHR::transformScopes(CHRScope *Scope, DenseSet<PHINode *> &TrivialPHIs) { 1613 CHR_DEBUG(dbgs() << "transformScopes " << *Scope << "\n"); 1614 1615 assert(Scope->RegInfos.size() >= 1 && "Should have at least one Region"); 1616 Region *FirstRegion = Scope->RegInfos[0].R; 1617 BasicBlock *EntryBlock = FirstRegion->getEntry(); 1618 Region *LastRegion = Scope->RegInfos[Scope->RegInfos.size() - 1].R; 1619 BasicBlock *ExitBlock = LastRegion->getExit(); 1620 Optional<uint64_t> ProfileCount = BFI.getBlockProfileCount(EntryBlock); 1621 1622 if (ExitBlock) { 1623 // Insert a trivial phi at the exit block (where the CHR hot path and the 1624 // cold path merges) for a value that's defined in the scope but used 1625 // outside it (meaning it's alive at the exit block). We will add the 1626 // incoming values for the CHR cold paths to it below. Without this, we'd 1627 // miss updating phi's for such values unless there happens to already be a 1628 // phi for that value there. 1629 insertTrivialPHIs(Scope, EntryBlock, ExitBlock, TrivialPHIs); 1630 } 1631 1632 // Split the entry block of the first region. The new block becomes the new 1633 // entry block of the first region. The old entry block becomes the block to 1634 // insert the CHR branch into. Note DT gets updated. Since DT gets updated 1635 // through the split, we update the entry of the first region after the split, 1636 // and Region only points to the entry and the exit blocks, rather than 1637 // keeping everything in a list or set, the blocks membership and the 1638 // entry/exit blocks of the region are still valid after the split. 1639 CHR_DEBUG(dbgs() << "Splitting entry block " << EntryBlock->getName() 1640 << " at " << *Scope->BranchInsertPoint << "\n"); 1641 BasicBlock *NewEntryBlock = 1642 SplitBlock(EntryBlock, Scope->BranchInsertPoint, &DT); 1643 assert(NewEntryBlock->getSinglePredecessor() == EntryBlock && 1644 "NewEntryBlock's only pred must be EntryBlock"); 1645 FirstRegion->replaceEntryRecursive(NewEntryBlock); 1646 BasicBlock *PreEntryBlock = EntryBlock; 1647 1648 ValueToValueMapTy VMap; 1649 // Clone the blocks in the scope (excluding the PreEntryBlock) to split into a 1650 // hot path (originals) and a cold path (clones) and update the PHIs at the 1651 // exit block. 1652 cloneScopeBlocks(Scope, PreEntryBlock, ExitBlock, LastRegion, VMap); 1653 1654 // Replace the old (placeholder) branch with the new (merged) conditional 1655 // branch. 1656 BranchInst *MergedBr = createMergedBranch(PreEntryBlock, EntryBlock, 1657 NewEntryBlock, VMap); 1658 1659 #ifndef NDEBUG 1660 assertCHRRegionsHaveBiasedBranchOrSelect(Scope); 1661 #endif 1662 1663 // Hoist the conditional values of the branches/selects. 1664 hoistScopeConditions(Scope, PreEntryBlock->getTerminator(), TrivialPHIs); 1665 1666 #ifndef NDEBUG 1667 assertBranchOrSelectConditionHoisted(Scope, PreEntryBlock); 1668 #endif 1669 1670 // Create the combined branch condition and constant-fold the branches/selects 1671 // in the hot path. 1672 fixupBranchesAndSelects(Scope, PreEntryBlock, MergedBr, 1673 ProfileCount ? ProfileCount.getValue() : 0); 1674 } 1675 1676 // A helper for transformScopes. Clone the blocks in the scope (excluding the 1677 // PreEntryBlock) to split into a hot path and a cold path and update the PHIs 1678 // at the exit block. 1679 void CHR::cloneScopeBlocks(CHRScope *Scope, 1680 BasicBlock *PreEntryBlock, 1681 BasicBlock *ExitBlock, 1682 Region *LastRegion, 1683 ValueToValueMapTy &VMap) { 1684 // Clone all the blocks. The original blocks will be the hot-path 1685 // CHR-optimized code and the cloned blocks will be the original unoptimized 1686 // code. This is so that the block pointers from the 1687 // CHRScope/Region/RegionInfo can stay valid in pointing to the hot-path code 1688 // which CHR should apply to. 1689 SmallVector<BasicBlock*, 8> NewBlocks; 1690 for (RegInfo &RI : Scope->RegInfos) 1691 for (BasicBlock *BB : RI.R->blocks()) { // This includes the blocks in the 1692 // sub-Scopes. 1693 assert(BB != PreEntryBlock && "Don't copy the preetntry block"); 1694 BasicBlock *NewBB = CloneBasicBlock(BB, VMap, ".nonchr", &F); 1695 NewBlocks.push_back(NewBB); 1696 VMap[BB] = NewBB; 1697 } 1698 1699 // Place the cloned blocks right after the original blocks (right before the 1700 // exit block of.) 1701 if (ExitBlock) 1702 F.getBasicBlockList().splice(ExitBlock->getIterator(), 1703 F.getBasicBlockList(), 1704 NewBlocks[0]->getIterator(), F.end()); 1705 1706 // Update the cloned blocks/instructions to refer to themselves. 1707 for (unsigned i = 0, e = NewBlocks.size(); i != e; ++i) 1708 for (Instruction &I : *NewBlocks[i]) 1709 RemapInstruction(&I, VMap, 1710 RF_NoModuleLevelChanges | RF_IgnoreMissingLocals); 1711 1712 // Add the cloned blocks to the PHIs of the exit blocks. ExitBlock is null for 1713 // the top-level region but we don't need to add PHIs. The trivial PHIs 1714 // inserted above will be updated here. 1715 if (ExitBlock) 1716 for (PHINode &PN : ExitBlock->phis()) 1717 for (unsigned I = 0, NumOps = PN.getNumIncomingValues(); I < NumOps; 1718 ++I) { 1719 BasicBlock *Pred = PN.getIncomingBlock(I); 1720 if (LastRegion->contains(Pred)) { 1721 Value *V = PN.getIncomingValue(I); 1722 auto It = VMap.find(V); 1723 if (It != VMap.end()) V = It->second; 1724 assert(VMap.find(Pred) != VMap.end() && "Pred must have been cloned"); 1725 PN.addIncoming(V, cast<BasicBlock>(VMap[Pred])); 1726 } 1727 } 1728 } 1729 1730 // A helper for transformScope. Replace the old (placeholder) branch with the 1731 // new (merged) conditional branch. 1732 BranchInst *CHR::createMergedBranch(BasicBlock *PreEntryBlock, 1733 BasicBlock *EntryBlock, 1734 BasicBlock *NewEntryBlock, 1735 ValueToValueMapTy &VMap) { 1736 BranchInst *OldBR = cast<BranchInst>(PreEntryBlock->getTerminator()); 1737 assert(OldBR->isUnconditional() && OldBR->getSuccessor(0) == NewEntryBlock && 1738 "SplitBlock did not work correctly!"); 1739 assert(NewEntryBlock->getSinglePredecessor() == EntryBlock && 1740 "NewEntryBlock's only pred must be EntryBlock"); 1741 assert(VMap.find(NewEntryBlock) != VMap.end() && 1742 "NewEntryBlock must have been copied"); 1743 OldBR->dropAllReferences(); 1744 OldBR->eraseFromParent(); 1745 // The true predicate is a placeholder. It will be replaced later in 1746 // fixupBranchesAndSelects(). 1747 BranchInst *NewBR = BranchInst::Create(NewEntryBlock, 1748 cast<BasicBlock>(VMap[NewEntryBlock]), 1749 ConstantInt::getTrue(F.getContext())); 1750 PreEntryBlock->getInstList().push_back(NewBR); 1751 assert(NewEntryBlock->getSinglePredecessor() == EntryBlock && 1752 "NewEntryBlock's only pred must be EntryBlock"); 1753 return NewBR; 1754 } 1755 1756 // A helper for transformScopes. Create the combined branch condition and 1757 // constant-fold the branches/selects in the hot path. 1758 void CHR::fixupBranchesAndSelects(CHRScope *Scope, 1759 BasicBlock *PreEntryBlock, 1760 BranchInst *MergedBR, 1761 uint64_t ProfileCount) { 1762 Value *MergedCondition = ConstantInt::getTrue(F.getContext()); 1763 BranchProbability CHRBranchBias(1, 1); 1764 uint64_t NumCHRedBranches = 0; 1765 IRBuilder<> IRB(PreEntryBlock->getTerminator()); 1766 for (RegInfo &RI : Scope->CHRRegions) { 1767 Region *R = RI.R; 1768 if (RI.HasBranch) { 1769 fixupBranch(R, Scope, IRB, MergedCondition, CHRBranchBias); 1770 ++NumCHRedBranches; 1771 } 1772 for (SelectInst *SI : RI.Selects) { 1773 fixupSelect(SI, Scope, IRB, MergedCondition, CHRBranchBias); 1774 ++NumCHRedBranches; 1775 } 1776 } 1777 Stats.NumBranchesDelta += NumCHRedBranches - 1; 1778 Stats.WeightedNumBranchesDelta += (NumCHRedBranches - 1) * ProfileCount; 1779 MergedBR->setCondition(MergedCondition); 1780 SmallVector<uint32_t, 2> Weights; 1781 Weights.push_back(static_cast<uint32_t>(CHRBranchBias.scale(1000))); 1782 Weights.push_back(static_cast<uint32_t>(CHRBranchBias.getCompl().scale(1000))); 1783 MDBuilder MDB(F.getContext()); 1784 MergedBR->setMetadata(LLVMContext::MD_prof, MDB.createBranchWeights(Weights)); 1785 CHR_DEBUG(dbgs() << "CHR branch bias " << Weights[0] << ":" << Weights[1] 1786 << "\n"); 1787 } 1788 1789 // A helper for fixupBranchesAndSelects. Add to the combined branch condition 1790 // and constant-fold a branch in the hot path. 1791 void CHR::fixupBranch(Region *R, CHRScope *Scope, 1792 IRBuilder<> &IRB, 1793 Value *&MergedCondition, 1794 BranchProbability &CHRBranchBias) { 1795 bool IsTrueBiased = Scope->TrueBiasedRegions.count(R); 1796 assert((IsTrueBiased || Scope->FalseBiasedRegions.count(R)) && 1797 "Must be truthy or falsy"); 1798 auto *BI = cast<BranchInst>(R->getEntry()->getTerminator()); 1799 assert(BranchBiasMap.find(R) != BranchBiasMap.end() && 1800 "Must be in the bias map"); 1801 BranchProbability Bias = BranchBiasMap[R]; 1802 assert(Bias >= getCHRBiasThreshold() && "Must be highly biased"); 1803 // Take the min. 1804 if (CHRBranchBias > Bias) 1805 CHRBranchBias = Bias; 1806 BasicBlock *IfThen = BI->getSuccessor(1); 1807 BasicBlock *IfElse = BI->getSuccessor(0); 1808 BasicBlock *RegionExitBlock = R->getExit(); 1809 assert(RegionExitBlock && "Null ExitBlock"); 1810 assert((IfThen == RegionExitBlock || IfElse == RegionExitBlock) && 1811 IfThen != IfElse && "Invariant from findScopes"); 1812 if (IfThen == RegionExitBlock) { 1813 // Swap them so that IfThen means going into it and IfElse means skipping 1814 // it. 1815 std::swap(IfThen, IfElse); 1816 } 1817 CHR_DEBUG(dbgs() << "IfThen " << IfThen->getName() 1818 << " IfElse " << IfElse->getName() << "\n"); 1819 Value *Cond = BI->getCondition(); 1820 BasicBlock *HotTarget = IsTrueBiased ? IfThen : IfElse; 1821 bool ConditionTrue = HotTarget == BI->getSuccessor(0); 1822 addToMergedCondition(ConditionTrue, Cond, BI, Scope, IRB, 1823 MergedCondition); 1824 // Constant-fold the branch at ClonedEntryBlock. 1825 assert(ConditionTrue == (HotTarget == BI->getSuccessor(0)) && 1826 "The successor shouldn't change"); 1827 Value *NewCondition = ConditionTrue ? 1828 ConstantInt::getTrue(F.getContext()) : 1829 ConstantInt::getFalse(F.getContext()); 1830 BI->setCondition(NewCondition); 1831 } 1832 1833 // A helper for fixupBranchesAndSelects. Add to the combined branch condition 1834 // and constant-fold a select in the hot path. 1835 void CHR::fixupSelect(SelectInst *SI, CHRScope *Scope, 1836 IRBuilder<> &IRB, 1837 Value *&MergedCondition, 1838 BranchProbability &CHRBranchBias) { 1839 bool IsTrueBiased = Scope->TrueBiasedSelects.count(SI); 1840 assert((IsTrueBiased || 1841 Scope->FalseBiasedSelects.count(SI)) && "Must be biased"); 1842 assert(SelectBiasMap.find(SI) != SelectBiasMap.end() && 1843 "Must be in the bias map"); 1844 BranchProbability Bias = SelectBiasMap[SI]; 1845 assert(Bias >= getCHRBiasThreshold() && "Must be highly biased"); 1846 // Take the min. 1847 if (CHRBranchBias > Bias) 1848 CHRBranchBias = Bias; 1849 Value *Cond = SI->getCondition(); 1850 addToMergedCondition(IsTrueBiased, Cond, SI, Scope, IRB, 1851 MergedCondition); 1852 Value *NewCondition = IsTrueBiased ? 1853 ConstantInt::getTrue(F.getContext()) : 1854 ConstantInt::getFalse(F.getContext()); 1855 SI->setCondition(NewCondition); 1856 } 1857 1858 // A helper for fixupBranch/fixupSelect. Add a branch condition to the merged 1859 // condition. 1860 void CHR::addToMergedCondition(bool IsTrueBiased, Value *Cond, 1861 Instruction *BranchOrSelect, 1862 CHRScope *Scope, 1863 IRBuilder<> &IRB, 1864 Value *&MergedCondition) { 1865 if (IsTrueBiased) { 1866 MergedCondition = IRB.CreateAnd(MergedCondition, Cond); 1867 } else { 1868 // If Cond is an icmp and all users of V except for BranchOrSelect is a 1869 // branch, negate the icmp predicate and swap the branch targets and avoid 1870 // inserting an Xor to negate Cond. 1871 bool Done = false; 1872 if (auto *ICmp = dyn_cast<ICmpInst>(Cond)) 1873 if (NegateICmpIfUsedByBranchOrSelectOnly(ICmp, BranchOrSelect, Scope)) { 1874 MergedCondition = IRB.CreateAnd(MergedCondition, Cond); 1875 Done = true; 1876 } 1877 if (!Done) { 1878 Value *Negate = IRB.CreateXor( 1879 ConstantInt::getTrue(F.getContext()), Cond); 1880 MergedCondition = IRB.CreateAnd(MergedCondition, Negate); 1881 } 1882 } 1883 } 1884 1885 void CHR::transformScopes(SmallVectorImpl<CHRScope *> &CHRScopes) { 1886 unsigned i = 0; 1887 (void)(i); // Unused in release build. 1888 DenseSet<PHINode *> TrivialPHIs; 1889 for (CHRScope *Scope : CHRScopes) { 1890 transformScopes(Scope, TrivialPHIs); 1891 CHR_DEBUG( 1892 std::ostringstream oss; 1893 oss << " after transformScopes " << i++; 1894 dumpIR(F, oss.str().c_str(), nullptr)); 1895 } 1896 } 1897 1898 static void LLVM_ATTRIBUTE_UNUSED 1899 dumpScopes(SmallVectorImpl<CHRScope *> &Scopes, const char *Label) { 1900 dbgs() << Label << " " << Scopes.size() << "\n"; 1901 for (CHRScope *Scope : Scopes) { 1902 dbgs() << *Scope << "\n"; 1903 } 1904 } 1905 1906 bool CHR::run() { 1907 if (!shouldApply(F, PSI)) 1908 return false; 1909 1910 CHR_DEBUG(dumpIR(F, "before", nullptr)); 1911 1912 bool Changed = false; 1913 { 1914 CHR_DEBUG( 1915 dbgs() << "RegionInfo:\n"; 1916 RI.print(dbgs())); 1917 1918 // Recursively traverse the region tree and find regions that have biased 1919 // branches and/or selects and create scopes. 1920 SmallVector<CHRScope *, 8> AllScopes; 1921 findScopes(AllScopes); 1922 CHR_DEBUG(dumpScopes(AllScopes, "All scopes")); 1923 1924 // Split the scopes if 1) the conditiona values of the biased 1925 // branches/selects of the inner/lower scope can't be hoisted up to the 1926 // outermost/uppermost scope entry, or 2) the condition values of the biased 1927 // branches/selects in a scope (including subscopes) don't share at least 1928 // one common value. 1929 SmallVector<CHRScope *, 8> SplitScopes; 1930 splitScopes(AllScopes, SplitScopes); 1931 CHR_DEBUG(dumpScopes(SplitScopes, "Split scopes")); 1932 1933 // After splitting, set the biased regions and selects of a scope (a tree 1934 // root) that include those of the subscopes. 1935 classifyBiasedScopes(SplitScopes); 1936 CHR_DEBUG(dbgs() << "Set per-scope bias " << SplitScopes.size() << "\n"); 1937 1938 // Filter out the scopes that has only one biased region or select (CHR 1939 // isn't useful in such a case). 1940 SmallVector<CHRScope *, 8> FilteredScopes; 1941 filterScopes(SplitScopes, FilteredScopes); 1942 CHR_DEBUG(dumpScopes(FilteredScopes, "Filtered scopes")); 1943 1944 // Set the regions to be CHR'ed and their hoist stops for each scope. 1945 SmallVector<CHRScope *, 8> SetScopes; 1946 setCHRRegions(FilteredScopes, SetScopes); 1947 CHR_DEBUG(dumpScopes(SetScopes, "Set CHR regions")); 1948 1949 // Sort CHRScopes by the depth so that outer CHRScopes comes before inner 1950 // ones. We need to apply CHR from outer to inner so that we apply CHR only 1951 // to the hot path, rather than both hot and cold paths. 1952 SmallVector<CHRScope *, 8> SortedScopes; 1953 sortScopes(SetScopes, SortedScopes); 1954 CHR_DEBUG(dumpScopes(SortedScopes, "Sorted scopes")); 1955 1956 CHR_DEBUG( 1957 dbgs() << "RegionInfo:\n"; 1958 RI.print(dbgs())); 1959 1960 // Apply the CHR transformation. 1961 if (!SortedScopes.empty()) { 1962 transformScopes(SortedScopes); 1963 Changed = true; 1964 } 1965 } 1966 1967 if (Changed) 1968 CHR_DEBUG(dumpIR(F, "after", &Stats)); 1969 1970 return Changed; 1971 } 1972 1973 bool ControlHeightReductionLegacyPass::runOnFunction(Function &F) { 1974 BlockFrequencyInfo &BFI = 1975 getAnalysis<BlockFrequencyInfoWrapperPass>().getBFI(); 1976 DominatorTree &DT = getAnalysis<DominatorTreeWrapperPass>().getDomTree(); 1977 ProfileSummaryInfo &PSI = 1978 *getAnalysis<ProfileSummaryInfoWrapperPass>().getPSI(); 1979 RegionInfo &RI = getAnalysis<RegionInfoPass>().getRegionInfo(); 1980 return CHR(F, BFI, DT, PSI, RI).run(); 1981 } 1982 1983 namespace llvm { 1984 1985 ControlHeightReductionPass::ControlHeightReductionPass() { 1986 ParseCHRFilterFiles(); 1987 } 1988 1989 PreservedAnalyses ControlHeightReductionPass::run( 1990 Function &F, 1991 FunctionAnalysisManager &FAM) { 1992 auto &BFI = FAM.getResult<BlockFrequencyAnalysis>(F); 1993 auto &DT = FAM.getResult<DominatorTreeAnalysis>(F); 1994 auto &MAMProxy = FAM.getResult<ModuleAnalysisManagerFunctionProxy>(F); 1995 auto &MAM = MAMProxy.getManager(); 1996 auto &PSI = *MAM.getCachedResult<ProfileSummaryAnalysis>(*F.getParent()); 1997 auto &RI = FAM.getResult<RegionInfoAnalysis>(F); 1998 bool Changed = CHR(F, BFI, DT, PSI, RI).run(); 1999 if (!Changed) 2000 return PreservedAnalyses::all(); 2001 auto PA = PreservedAnalyses(); 2002 PA.preserve<GlobalsAA>(); 2003 return PA; 2004 } 2005 2006 } // namespace llvm 2007