1 //===-- ControlHeightReduction.cpp - Control Height Reduction -------------===// 2 // 3 // The LLVM Compiler Infrastructure 4 // 5 // This file is distributed under the University of Illinois Open Source 6 // License. See LICENSE.TXT for details. 7 // 8 //===----------------------------------------------------------------------===// 9 // 10 // This pass merges conditional blocks of code and reduces the number of 11 // conditional branches in the hot paths based on profiles. 12 // 13 //===----------------------------------------------------------------------===// 14 15 #include "llvm/Transforms/Instrumentation/ControlHeightReduction.h" 16 #include "llvm/ADT/DenseMap.h" 17 #include "llvm/ADT/DenseSet.h" 18 #include "llvm/ADT/SmallVector.h" 19 #include "llvm/ADT/StringSet.h" 20 #include "llvm/Analysis/BlockFrequencyInfo.h" 21 #include "llvm/Analysis/GlobalsModRef.h" 22 #include "llvm/Analysis/ProfileSummaryInfo.h" 23 #include "llvm/Analysis/RegionInfo.h" 24 #include "llvm/Analysis/RegionIterator.h" 25 #include "llvm/Analysis/ValueTracking.h" 26 #include "llvm/IR/CFG.h" 27 #include "llvm/IR/Dominators.h" 28 #include "llvm/IR/IRBuilder.h" 29 #include "llvm/IR/MDBuilder.h" 30 #include "llvm/Support/BranchProbability.h" 31 #include "llvm/Support/MemoryBuffer.h" 32 #include "llvm/Transforms/Utils.h" 33 #include "llvm/Transforms/Utils/BasicBlockUtils.h" 34 #include "llvm/Transforms/Utils/Cloning.h" 35 #include "llvm/Transforms/Utils/ValueMapper.h" 36 37 #if !defined(_MSC_VER) 38 #include <cxxabi.h> 39 #endif 40 #include <set> 41 #include <sstream> 42 43 using namespace llvm; 44 45 #define DEBUG_TYPE "chr" 46 47 #define CHR_DEBUG(X) LLVM_DEBUG(X) 48 49 static cl::opt<bool> ForceCHR("force-chr", cl::init(false), cl::Hidden, 50 cl::desc("Apply CHR for all functions")); 51 52 static cl::opt<double> CHRBiasThreshold( 53 "chr-bias-threshold", cl::init(0.99), cl::Hidden, 54 cl::desc("CHR considers a branch bias greater than this ratio as biased")); 55 56 static cl::opt<unsigned> CHRMergeThreshold( 57 "chr-merge-threshold", cl::init(2), cl::Hidden, 58 cl::desc("CHR merges a group of N branches/selects where N >= this value")); 59 60 static cl::opt<std::string> CHRModuleList( 61 "chr-module-list", cl::init(""), cl::Hidden, 62 cl::desc("Specify file to retrieve the list of modules to apply CHR to")); 63 64 static cl::opt<std::string> CHRFunctionList( 65 "chr-function-list", cl::init(""), cl::Hidden, 66 cl::desc("Specify file to retrieve the list of functions to apply CHR to")); 67 68 static StringSet<> CHRModules; 69 static StringSet<> CHRFunctions; 70 71 static void ParseCHRFilterFiles() { 72 if (!CHRModuleList.empty()) { 73 auto FileOrErr = MemoryBuffer::getFile(CHRModuleList); 74 if (!FileOrErr) { 75 errs() << "Error: Couldn't read the chr-module-list file " << CHRModuleList << "\n"; 76 std::exit(1); 77 } 78 StringRef Buf = FileOrErr->get()->getBuffer(); 79 SmallVector<StringRef, 0> Lines; 80 Buf.split(Lines, '\n'); 81 for (StringRef Line : Lines) { 82 Line = Line.trim(); 83 if (!Line.empty()) 84 CHRModules.insert(Line); 85 } 86 } 87 if (!CHRFunctionList.empty()) { 88 auto FileOrErr = MemoryBuffer::getFile(CHRFunctionList); 89 if (!FileOrErr) { 90 errs() << "Error: Couldn't read the chr-function-list file " << CHRFunctionList << "\n"; 91 std::exit(1); 92 } 93 StringRef Buf = FileOrErr->get()->getBuffer(); 94 SmallVector<StringRef, 0> Lines; 95 Buf.split(Lines, '\n'); 96 for (StringRef Line : Lines) { 97 Line = Line.trim(); 98 if (!Line.empty()) 99 CHRFunctions.insert(Line); 100 } 101 } 102 } 103 104 namespace { 105 class ControlHeightReductionLegacyPass : public FunctionPass { 106 public: 107 static char ID; 108 109 ControlHeightReductionLegacyPass() : FunctionPass(ID) { 110 initializeControlHeightReductionLegacyPassPass( 111 *PassRegistry::getPassRegistry()); 112 ParseCHRFilterFiles(); 113 } 114 115 bool runOnFunction(Function &F) override; 116 void getAnalysisUsage(AnalysisUsage &AU) const override { 117 AU.addRequired<BlockFrequencyInfoWrapperPass>(); 118 AU.addRequired<DominatorTreeWrapperPass>(); 119 AU.addRequired<ProfileSummaryInfoWrapperPass>(); 120 AU.addRequired<RegionInfoPass>(); 121 AU.addPreserved<GlobalsAAWrapperPass>(); 122 } 123 }; 124 } // end anonymous namespace 125 126 char ControlHeightReductionLegacyPass::ID = 0; 127 128 INITIALIZE_PASS_BEGIN(ControlHeightReductionLegacyPass, 129 "chr", 130 "Reduce control height in the hot paths", 131 false, false) 132 INITIALIZE_PASS_DEPENDENCY(BlockFrequencyInfoWrapperPass) 133 INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass) 134 INITIALIZE_PASS_DEPENDENCY(ProfileSummaryInfoWrapperPass) 135 INITIALIZE_PASS_DEPENDENCY(RegionInfoPass) 136 INITIALIZE_PASS_END(ControlHeightReductionLegacyPass, 137 "chr", 138 "Reduce control height in the hot paths", 139 false, false) 140 141 FunctionPass *llvm::createControlHeightReductionLegacyPass() { 142 return new ControlHeightReductionLegacyPass(); 143 } 144 145 namespace { 146 147 struct CHRStats { 148 CHRStats() : NumBranches(0), NumBranchesDelta(0), 149 WeightedNumBranchesDelta(0) {} 150 void print(raw_ostream &OS) const { 151 OS << "CHRStats: NumBranches " << NumBranches 152 << " NumBranchesDelta " << NumBranchesDelta 153 << " WeightedNumBranchesDelta " << WeightedNumBranchesDelta; 154 } 155 uint64_t NumBranches; // The original number of conditional branches / 156 // selects 157 uint64_t NumBranchesDelta; // The decrease of the number of conditional 158 // branches / selects in the hot paths due to CHR. 159 uint64_t WeightedNumBranchesDelta; // NumBranchesDelta weighted by the profile 160 // count at the scope entry. 161 }; 162 163 inline raw_ostream LLVM_ATTRIBUTE_UNUSED &operator<<(raw_ostream &OS, 164 const CHRStats &Stats) { 165 Stats.print(OS); 166 return OS; 167 } 168 169 // RegInfo - some properties of a Region. 170 struct RegInfo { 171 RegInfo() : R(nullptr), HasBranch(false) {} 172 RegInfo(Region *RegionIn) : R(RegionIn), HasBranch(false) {} 173 Region *R; 174 bool HasBranch; 175 SmallVector<SelectInst *, 8> Selects; 176 }; 177 178 typedef DenseMap<Region *, DenseSet<Instruction *>> HoistStopMapTy; 179 180 // CHRScope - a sequence of regions to CHR together. It corresponds to a 181 // sequence of conditional blocks. It can have subscopes which correspond to 182 // nested conditional blocks. Nested CHRScopes form a tree. 183 class CHRScope { 184 public: 185 CHRScope(RegInfo RI) : BranchInsertPoint(nullptr) { 186 assert(RI.R && "Null RegionIn"); 187 RegInfos.push_back(RI); 188 } 189 190 Region *getParentRegion() { 191 assert(RegInfos.size() > 0 && "Empty CHRScope"); 192 Region *Parent = RegInfos[0].R->getParent(); 193 assert(Parent && "Unexpected to call this on the top-level region"); 194 return Parent; 195 } 196 197 BasicBlock *getEntryBlock() { 198 assert(RegInfos.size() > 0 && "Empty CHRScope"); 199 return RegInfos.front().R->getEntry(); 200 } 201 202 BasicBlock *getExitBlock() { 203 assert(RegInfos.size() > 0 && "Empty CHRScope"); 204 return RegInfos.back().R->getExit(); 205 } 206 207 bool appendable(CHRScope *Next) { 208 // The next scope is appendable only if this scope is directly connected to 209 // it (which implies it post-dominates this scope) and this scope dominates 210 // it (no edge to the next scope outside this scope). 211 BasicBlock *NextEntry = Next->getEntryBlock(); 212 if (getExitBlock() != NextEntry) 213 // Not directly connected. 214 return false; 215 Region *LastRegion = RegInfos.back().R; 216 for (BasicBlock *Pred : predecessors(NextEntry)) 217 if (!LastRegion->contains(Pred)) 218 // There's an edge going into the entry of the next scope from outside 219 // of this scope. 220 return false; 221 return true; 222 } 223 224 void append(CHRScope *Next) { 225 assert(RegInfos.size() > 0 && "Empty CHRScope"); 226 assert(Next->RegInfos.size() > 0 && "Empty CHRScope"); 227 assert(getParentRegion() == Next->getParentRegion() && 228 "Must be siblings"); 229 assert(getExitBlock() == Next->getEntryBlock() && 230 "Must be adjacent"); 231 for (RegInfo &RI : Next->RegInfos) 232 RegInfos.push_back(RI); 233 for (CHRScope *Sub : Next->Subs) 234 Subs.push_back(Sub); 235 } 236 237 void addSub(CHRScope *SubIn) { 238 #ifndef NDEBUG 239 bool is_child = false; 240 for (RegInfo &RI : RegInfos) 241 if (RI.R == SubIn->getParentRegion()) { 242 is_child = true; 243 break; 244 } 245 assert(is_child && "Must be a child"); 246 #endif 247 Subs.push_back(SubIn); 248 } 249 250 // Split this scope at the boundary region into two, which will belong to the 251 // tail and returns the tail. 252 CHRScope *split(Region *Boundary) { 253 assert(Boundary && "Boundary null"); 254 assert(RegInfos.begin()->R != Boundary && 255 "Can't be split at beginning"); 256 auto BoundaryIt = std::find_if(RegInfos.begin(), RegInfos.end(), 257 [&Boundary](const RegInfo& RI) { 258 return Boundary == RI.R; 259 }); 260 if (BoundaryIt == RegInfos.end()) 261 return nullptr; 262 SmallVector<RegInfo, 8> TailRegInfos; 263 SmallVector<CHRScope *, 8> TailSubs; 264 TailRegInfos.insert(TailRegInfos.begin(), BoundaryIt, RegInfos.end()); 265 RegInfos.resize(BoundaryIt - RegInfos.begin()); 266 DenseSet<Region *> TailRegionSet; 267 for (RegInfo &RI : TailRegInfos) 268 TailRegionSet.insert(RI.R); 269 for (auto It = Subs.begin(); It != Subs.end(); ) { 270 CHRScope *Sub = *It; 271 assert(Sub && "null Sub"); 272 Region *Parent = Sub->getParentRegion(); 273 if (TailRegionSet.count(Parent)) { 274 TailSubs.push_back(Sub); 275 It = Subs.erase(It); 276 } else { 277 assert(std::find_if(RegInfos.begin(), RegInfos.end(), 278 [&Parent](const RegInfo& RI) { 279 return Parent == RI.R; 280 }) != RegInfos.end() && 281 "Must be in head"); 282 ++It; 283 } 284 } 285 assert(HoistStopMap.empty() && "MapHoistStops must be empty"); 286 return new CHRScope(TailRegInfos, TailSubs); 287 } 288 289 bool contains(Instruction *I) const { 290 BasicBlock *Parent = I->getParent(); 291 for (const RegInfo &RI : RegInfos) 292 if (RI.R->contains(Parent)) 293 return true; 294 return false; 295 } 296 297 void print(raw_ostream &OS) const; 298 299 SmallVector<RegInfo, 8> RegInfos; // Regions that belong to this scope 300 SmallVector<CHRScope *, 8> Subs; // Subscopes. 301 302 // The instruction at which to insert the CHR conditional branch (and hoist 303 // the dependent condition values). 304 Instruction *BranchInsertPoint; 305 306 // True-biased and false-biased regions (conditional blocks), 307 // respectively. Used only for the outermost scope and includes regions in 308 // subscopes. The rest are unbiased. 309 DenseSet<Region *> TrueBiasedRegions; 310 DenseSet<Region *> FalseBiasedRegions; 311 // Among the biased regions, the regions that get CHRed. 312 SmallVector<RegInfo, 8> CHRRegions; 313 314 // True-biased and false-biased selects, respectively. Used only for the 315 // outermost scope and includes ones in subscopes. 316 DenseSet<SelectInst *> TrueBiasedSelects; 317 DenseSet<SelectInst *> FalseBiasedSelects; 318 319 // Map from one of the above regions to the instructions to stop 320 // hoisting instructions at through use-def chains. 321 HoistStopMapTy HoistStopMap; 322 323 private: 324 CHRScope(SmallVector<RegInfo, 8> &RegInfosIn, 325 SmallVector<CHRScope *, 8> &SubsIn) 326 : RegInfos(RegInfosIn), Subs(SubsIn), BranchInsertPoint(nullptr) {} 327 }; 328 329 inline raw_ostream &operator<<(raw_ostream &OS, const CHRScope &Scope) { 330 Scope.print(OS); 331 return OS; 332 } 333 334 class CHR { 335 public: 336 CHR(Function &Fin, BlockFrequencyInfo &BFIin, DominatorTree &DTin, 337 ProfileSummaryInfo &PSIin, RegionInfo &RIin) 338 : F(Fin), BFI(BFIin), DT(DTin), PSI(PSIin), RI(RIin) {} 339 340 ~CHR() { 341 for (CHRScope *Scope : Scopes) { 342 delete Scope; 343 } 344 } 345 346 bool run(); 347 348 private: 349 // See the comments in CHR::run() for the high level flow of the algorithm and 350 // what the following functions do. 351 352 void findScopes(SmallVectorImpl<CHRScope *> &Output) { 353 Region *R = RI.getTopLevelRegion(); 354 CHRScope *Scope = findScopes(R, nullptr, nullptr, Output); 355 if (Scope) { 356 Output.push_back(Scope); 357 } 358 } 359 CHRScope *findScopes(Region *R, Region *NextRegion, Region *ParentRegion, 360 SmallVectorImpl<CHRScope *> &Scopes); 361 CHRScope *findScope(Region *R); 362 void checkScopeHoistable(CHRScope *Scope); 363 364 void splitScopes(SmallVectorImpl<CHRScope *> &Input, 365 SmallVectorImpl<CHRScope *> &Output); 366 SmallVector<CHRScope *, 8> splitScope(CHRScope *Scope, 367 CHRScope *Outer, 368 DenseSet<Value *> *OuterConditionValues, 369 Instruction *OuterInsertPoint, 370 SmallVectorImpl<CHRScope *> &Output, 371 DenseSet<Instruction *> &Unhoistables); 372 373 void classifyBiasedScopes(SmallVectorImpl<CHRScope *> &Scopes); 374 void classifyBiasedScopes(CHRScope *Scope, CHRScope *OutermostScope); 375 376 void filterScopes(SmallVectorImpl<CHRScope *> &Input, 377 SmallVectorImpl<CHRScope *> &Output); 378 379 void setCHRRegions(SmallVectorImpl<CHRScope *> &Input, 380 SmallVectorImpl<CHRScope *> &Output); 381 void setCHRRegions(CHRScope *Scope, CHRScope *OutermostScope); 382 383 void sortScopes(SmallVectorImpl<CHRScope *> &Input, 384 SmallVectorImpl<CHRScope *> &Output); 385 386 void transformScopes(SmallVectorImpl<CHRScope *> &CHRScopes); 387 void transformScopes(CHRScope *Scope, DenseSet<PHINode *> &TrivialPHIs); 388 void cloneScopeBlocks(CHRScope *Scope, 389 BasicBlock *PreEntryBlock, 390 BasicBlock *ExitBlock, 391 Region *LastRegion, 392 ValueToValueMapTy &VMap); 393 BranchInst *createMergedBranch(BasicBlock *PreEntryBlock, 394 BasicBlock *EntryBlock, 395 BasicBlock *NewEntryBlock, 396 ValueToValueMapTy &VMap); 397 void fixupBranchesAndSelects(CHRScope *Scope, 398 BasicBlock *PreEntryBlock, 399 BranchInst *MergedBR, 400 uint64_t ProfileCount); 401 void fixupBranch(Region *R, 402 CHRScope *Scope, 403 IRBuilder<> &IRB, 404 Value *&MergedCondition, BranchProbability &CHRBranchBias); 405 void fixupSelect(SelectInst* SI, 406 CHRScope *Scope, 407 IRBuilder<> &IRB, 408 Value *&MergedCondition, BranchProbability &CHRBranchBias); 409 void addToMergedCondition(bool IsTrueBiased, Value *Cond, 410 Instruction *BranchOrSelect, 411 CHRScope *Scope, 412 IRBuilder<> &IRB, 413 Value *&MergedCondition); 414 415 Function &F; 416 BlockFrequencyInfo &BFI; 417 DominatorTree &DT; 418 ProfileSummaryInfo &PSI; 419 RegionInfo &RI; 420 CHRStats Stats; 421 422 // All the true-biased regions in the function 423 DenseSet<Region *> TrueBiasedRegionsGlobal; 424 // All the false-biased regions in the function 425 DenseSet<Region *> FalseBiasedRegionsGlobal; 426 // All the true-biased selects in the function 427 DenseSet<SelectInst *> TrueBiasedSelectsGlobal; 428 // All the false-biased selects in the function 429 DenseSet<SelectInst *> FalseBiasedSelectsGlobal; 430 // A map from biased regions to their branch bias 431 DenseMap<Region *, BranchProbability> BranchBiasMap; 432 // A map from biased selects to their branch bias 433 DenseMap<SelectInst *, BranchProbability> SelectBiasMap; 434 // All the scopes. 435 DenseSet<CHRScope *> Scopes; 436 }; 437 438 } // end anonymous namespace 439 440 static bool shouldApply(Function &F, ProfileSummaryInfo& PSI) { 441 if (ForceCHR) 442 return true; 443 444 if (!CHRModuleList.empty() || !CHRFunctionList.empty()) { 445 if (CHRModules.count(F.getParent()->getName())) 446 return true; 447 StringRef Name = F.getName(); 448 if (CHRFunctions.count(Name)) 449 return true; 450 const char* DemangledName = nullptr; 451 #if !defined(_MSC_VER) 452 int Status = -1; 453 DemangledName = abi::__cxa_demangle(Name.str().c_str(), 454 nullptr, nullptr, &Status); 455 #endif 456 return DemangledName && CHRFunctions.count(DemangledName); 457 } 458 459 assert(PSI.hasProfileSummary() && "Empty PSI?"); 460 return PSI.isFunctionEntryHot(&F); 461 } 462 463 static void LLVM_ATTRIBUTE_UNUSED dumpIR(Function &F, const char *Label, 464 CHRStats *Stats) { 465 std::string Name = F.getName().str(); 466 const char *DemangledName = nullptr; 467 #if !defined(_MSC_VER) 468 int Status = -1; 469 DemangledName = abi::__cxa_demangle(Name.c_str(), 470 nullptr, nullptr, &Status); 471 #endif 472 if (DemangledName == nullptr) { 473 DemangledName = "<NOT-MANGLED>"; 474 } 475 std::string ModuleName = F.getParent()->getName().str(); 476 CHR_DEBUG(dbgs() << "CHR IR dump " << Label << " " << ModuleName << " " 477 << Name); 478 if (Stats) 479 CHR_DEBUG(dbgs() << " " << *Stats); 480 CHR_DEBUG(dbgs() << "\n"); 481 CHR_DEBUG(F.dump()); 482 } 483 484 void CHRScope::print(raw_ostream &OS) const { 485 assert(RegInfos.size() > 0 && "Empty CHRScope"); 486 OS << "CHRScope["; 487 OS << RegInfos.size() << ", Regions["; 488 for (const RegInfo &RI : RegInfos) { 489 OS << RI.R->getNameStr(); 490 if (RI.HasBranch) 491 OS << " B"; 492 if (RI.Selects.size() > 0) 493 OS << " S" << RI.Selects.size(); 494 OS << ", "; 495 } 496 if (RegInfos[0].R->getParent()) { 497 OS << "], Parent " << RegInfos[0].R->getParent()->getNameStr(); 498 } else { 499 // top level region 500 OS << "]"; 501 } 502 OS << ", Subs["; 503 for (CHRScope *Sub : Subs) { 504 OS << *Sub << ", "; 505 } 506 OS << "]]"; 507 } 508 509 // Return true if the given instruction type can be hoisted by CHR. 510 static bool isHoistableInstructionType(Instruction *I) { 511 return isa<BinaryOperator>(I) || isa<CastInst>(I) || isa<SelectInst>(I) || 512 isa<GetElementPtrInst>(I) || isa<CmpInst>(I) || 513 isa<InsertElementInst>(I) || isa<ExtractElementInst>(I) || 514 isa<ShuffleVectorInst>(I) || isa<ExtractValueInst>(I) || 515 isa<InsertValueInst>(I); 516 } 517 518 // Return true if the given instruction can be hoisted by CHR. 519 static bool isHoistable(Instruction *I, DominatorTree &DT) { 520 if (!isHoistableInstructionType(I)) 521 return false; 522 return isSafeToSpeculativelyExecute(I, nullptr, &DT); 523 } 524 525 // Recursively traverse the use-def chains of the given value and return a set 526 // of the unhoistable base values defined within the scope (excluding the 527 // first-region entry block) or the (hoistable or unhoistable) base values that 528 // are defined outside (including the first-region entry block) of the 529 // scope. The returned set doesn't include constants. 530 static std::set<Value *> getBaseValues(Value *V, 531 DominatorTree &DT) { 532 std::set<Value *> Result; 533 if (auto *I = dyn_cast<Instruction>(V)) { 534 // We don't stop at a block that's not in the Scope because we would miss some 535 // instructions that are based on the same base values if we stop there. 536 if (!isHoistable(I, DT)) { 537 Result.insert(I); 538 return Result; 539 } 540 // I is hoistable above the Scope. 541 for (Value *Op : I->operands()) { 542 std::set<Value *> OpResult = getBaseValues(Op, DT); 543 Result.insert(OpResult.begin(), OpResult.end()); 544 } 545 return Result; 546 } 547 if (isa<Argument>(V)) { 548 Result.insert(V); 549 return Result; 550 } 551 // We don't include others like constants because those won't lead to any 552 // chance of folding of conditions (eg two bit checks merged into one check) 553 // after CHR. 554 return Result; // empty 555 } 556 557 // Return true if V is already hoisted or can be hoisted (along with its 558 // operands) above the insert point. When it returns true and HoistStops is 559 // non-null, the instructions to stop hoisting at through the use-def chains are 560 // inserted into HoistStops. 561 static bool 562 checkHoistValue(Value *V, Instruction *InsertPoint, DominatorTree &DT, 563 DenseSet<Instruction *> &Unhoistables, 564 DenseSet<Instruction *> *HoistStops) { 565 assert(InsertPoint && "Null InsertPoint"); 566 if (auto *I = dyn_cast<Instruction>(V)) { 567 assert(DT.getNode(I->getParent()) && "DT must contain I's parent block"); 568 assert(DT.getNode(InsertPoint->getParent()) && "DT must contain Destination"); 569 if (Unhoistables.count(I)) { 570 // Don't hoist if they are not to be hoisted. 571 return false; 572 } 573 if (DT.dominates(I, InsertPoint)) { 574 // We are already above the insert point. Stop here. 575 if (HoistStops) 576 HoistStops->insert(I); 577 return true; 578 } 579 // We aren't not above the insert point, check if we can hoist it above the 580 // insert point. 581 if (isHoistable(I, DT)) { 582 // Check operands first. 583 DenseSet<Instruction *> OpsHoistStops; 584 bool AllOpsHoisted = true; 585 for (Value *Op : I->operands()) { 586 if (!checkHoistValue(Op, InsertPoint, DT, Unhoistables, &OpsHoistStops)) { 587 AllOpsHoisted = false; 588 break; 589 } 590 } 591 if (AllOpsHoisted) { 592 CHR_DEBUG(dbgs() << "checkHoistValue " << *I << "\n"); 593 if (HoistStops) 594 HoistStops->insert(OpsHoistStops.begin(), OpsHoistStops.end()); 595 return true; 596 } 597 } 598 return false; 599 } 600 // Non-instructions are considered hoistable. 601 return true; 602 } 603 604 // Returns true and sets the true probability and false probability of an 605 // MD_prof metadata if it's well-formed. 606 static bool CheckMDProf(MDNode *MD, BranchProbability &TrueProb, 607 BranchProbability &FalseProb) { 608 if (!MD) return false; 609 MDString *MDName = cast<MDString>(MD->getOperand(0)); 610 if (MDName->getString() != "branch_weights" || 611 MD->getNumOperands() != 3) 612 return false; 613 ConstantInt *TrueWeight = mdconst::extract<ConstantInt>(MD->getOperand(1)); 614 ConstantInt *FalseWeight = mdconst::extract<ConstantInt>(MD->getOperand(2)); 615 if (!TrueWeight || !FalseWeight) 616 return false; 617 uint64_t TrueWt = TrueWeight->getValue().getZExtValue(); 618 uint64_t FalseWt = FalseWeight->getValue().getZExtValue(); 619 uint64_t SumWt = TrueWt + FalseWt; 620 621 assert(SumWt >= TrueWt && SumWt >= FalseWt && 622 "Overflow calculating branch probabilities."); 623 624 TrueProb = BranchProbability::getBranchProbability(TrueWt, SumWt); 625 FalseProb = BranchProbability::getBranchProbability(FalseWt, SumWt); 626 return true; 627 } 628 629 static BranchProbability getCHRBiasThreshold() { 630 return BranchProbability::getBranchProbability( 631 static_cast<uint64_t>(CHRBiasThreshold * 1000000), 1000000); 632 } 633 634 // A helper for CheckBiasedBranch and CheckBiasedSelect. If TrueProb >= 635 // CHRBiasThreshold, put Key into TrueSet and return true. If FalseProb >= 636 // CHRBiasThreshold, put Key into FalseSet and return true. Otherwise, return 637 // false. 638 template<typename K, typename S, typename M> 639 bool CheckBias(K *Key, BranchProbability TrueProb, BranchProbability FalseProb, 640 S &TrueSet, S &FalseSet, M &BiasMap) { 641 BranchProbability Threshold = getCHRBiasThreshold(); 642 if (TrueProb >= Threshold) { 643 TrueSet.insert(Key); 644 BiasMap[Key] = TrueProb; 645 return true; 646 } else if (FalseProb >= Threshold) { 647 FalseSet.insert(Key); 648 BiasMap[Key] = FalseProb; 649 return true; 650 } 651 return false; 652 } 653 654 // Returns true and insert a region into the right biased set and the map if the 655 // branch of the region is biased. 656 static bool CheckBiasedBranch(BranchInst *BI, Region *R, 657 DenseSet<Region *> &TrueBiasedRegionsGlobal, 658 DenseSet<Region *> &FalseBiasedRegionsGlobal, 659 DenseMap<Region *, BranchProbability> &BranchBiasMap) { 660 if (!BI->isConditional()) 661 return false; 662 BranchProbability ThenProb, ElseProb; 663 if (!CheckMDProf(BI->getMetadata(LLVMContext::MD_prof), 664 ThenProb, ElseProb)) 665 return false; 666 BasicBlock *IfThen = BI->getSuccessor(0); 667 BasicBlock *IfElse = BI->getSuccessor(1); 668 assert((IfThen == R->getExit() || IfElse == R->getExit()) && 669 IfThen != IfElse && 670 "Invariant from findScopes"); 671 if (IfThen == R->getExit()) { 672 // Swap them so that IfThen/ThenProb means going into the conditional code 673 // and IfElse/ElseProb means skipping it. 674 std::swap(IfThen, IfElse); 675 std::swap(ThenProb, ElseProb); 676 } 677 CHR_DEBUG(dbgs() << "BI " << *BI << " "); 678 CHR_DEBUG(dbgs() << "ThenProb " << ThenProb << " "); 679 CHR_DEBUG(dbgs() << "ElseProb " << ElseProb << "\n"); 680 return CheckBias(R, ThenProb, ElseProb, 681 TrueBiasedRegionsGlobal, FalseBiasedRegionsGlobal, 682 BranchBiasMap); 683 } 684 685 // Returns true and insert a select into the right biased set and the map if the 686 // select is biased. 687 static bool CheckBiasedSelect( 688 SelectInst *SI, Region *R, 689 DenseSet<SelectInst *> &TrueBiasedSelectsGlobal, 690 DenseSet<SelectInst *> &FalseBiasedSelectsGlobal, 691 DenseMap<SelectInst *, BranchProbability> &SelectBiasMap) { 692 BranchProbability TrueProb, FalseProb; 693 if (!CheckMDProf(SI->getMetadata(LLVMContext::MD_prof), 694 TrueProb, FalseProb)) 695 return false; 696 CHR_DEBUG(dbgs() << "SI " << *SI << " "); 697 CHR_DEBUG(dbgs() << "TrueProb " << TrueProb << " "); 698 CHR_DEBUG(dbgs() << "FalseProb " << FalseProb << "\n"); 699 return CheckBias(SI, TrueProb, FalseProb, 700 TrueBiasedSelectsGlobal, FalseBiasedSelectsGlobal, 701 SelectBiasMap); 702 } 703 704 // Returns the instruction at which to hoist the dependent condition values and 705 // insert the CHR branch for a region. This is the terminator branch in the 706 // entry block or the first select in the entry block, if any. 707 static Instruction* getBranchInsertPoint(RegInfo &RI) { 708 Region *R = RI.R; 709 BasicBlock *EntryBB = R->getEntry(); 710 // The hoist point is by default the terminator of the entry block, which is 711 // the same as the branch instruction if RI.HasBranch is true. 712 Instruction *HoistPoint = EntryBB->getTerminator(); 713 for (SelectInst *SI : RI.Selects) { 714 if (SI->getParent() == EntryBB) { 715 // Pick the first select in Selects in the entry block. Note Selects is 716 // sorted in the instruction order within a block (asserted below). 717 HoistPoint = SI; 718 break; 719 } 720 } 721 assert(HoistPoint && "Null HoistPoint"); 722 #ifndef NDEBUG 723 // Check that HoistPoint is the first one in Selects in the entry block, 724 // if any. 725 DenseSet<Instruction *> EntryBlockSelectSet; 726 for (SelectInst *SI : RI.Selects) { 727 if (SI->getParent() == EntryBB) { 728 EntryBlockSelectSet.insert(SI); 729 } 730 } 731 for (Instruction &I : *EntryBB) { 732 if (EntryBlockSelectSet.count(&I) > 0) { 733 assert(&I == HoistPoint && 734 "HoistPoint must be the first one in Selects"); 735 break; 736 } 737 } 738 #endif 739 return HoistPoint; 740 } 741 742 // Find a CHR scope in the given region. 743 CHRScope * CHR::findScope(Region *R) { 744 CHRScope *Result = nullptr; 745 BasicBlock *Entry = R->getEntry(); 746 BasicBlock *Exit = R->getExit(); // null if top level. 747 assert(Entry && "Entry must not be null"); 748 assert((Exit == nullptr) == (R->isTopLevelRegion()) && 749 "Only top level region has a null exit"); 750 if (Entry) 751 CHR_DEBUG(dbgs() << "Entry " << Entry->getName() << "\n"); 752 else 753 CHR_DEBUG(dbgs() << "Entry null\n"); 754 if (Exit) 755 CHR_DEBUG(dbgs() << "Exit " << Exit->getName() << "\n"); 756 else 757 CHR_DEBUG(dbgs() << "Exit null\n"); 758 // Exclude cases where Entry is part of a subregion (hence it doesn't belong 759 // to this region). 760 bool EntryInSubregion = RI.getRegionFor(Entry) != R; 761 if (EntryInSubregion) 762 return nullptr; 763 // Exclude loops 764 for (BasicBlock *Pred : predecessors(Entry)) 765 if (R->contains(Pred)) 766 return nullptr; 767 if (Exit) { 768 // Try to find an if-then block (check if R is an if-then). 769 // if (cond) { 770 // ... 771 // } 772 auto *BI = dyn_cast<BranchInst>(Entry->getTerminator()); 773 if (BI) 774 CHR_DEBUG(dbgs() << "BI.isConditional " << BI->isConditional() << "\n"); 775 else 776 CHR_DEBUG(dbgs() << "BI null\n"); 777 if (BI && BI->isConditional()) { 778 BasicBlock *S0 = BI->getSuccessor(0); 779 BasicBlock *S1 = BI->getSuccessor(1); 780 CHR_DEBUG(dbgs() << "S0 " << S0->getName() << "\n"); 781 CHR_DEBUG(dbgs() << "S1 " << S1->getName() << "\n"); 782 if (S0 != S1 && (S0 == Exit || S1 == Exit)) { 783 RegInfo RI(R); 784 RI.HasBranch = CheckBiasedBranch( 785 BI, R, TrueBiasedRegionsGlobal, FalseBiasedRegionsGlobal, 786 BranchBiasMap); 787 Result = new CHRScope(RI); 788 Scopes.insert(Result); 789 CHR_DEBUG(dbgs() << "Found a region with a branch\n"); 790 ++Stats.NumBranches; 791 } 792 } 793 } 794 { 795 // Try to look for selects in the direct child blocks (as opposed to in 796 // subregions) of R. 797 // ... 798 // if (..) { // Some subregion 799 // ... 800 // } 801 // if (..) { // Some subregion 802 // ... 803 // } 804 // ... 805 // a = cond ? b : c; 806 // ... 807 SmallVector<SelectInst *, 8> Selects; 808 for (RegionNode *E : R->elements()) { 809 if (E->isSubRegion()) 810 continue; 811 // This returns the basic block of E if E is a direct child of R (not a 812 // subregion.) 813 BasicBlock *BB = E->getEntry(); 814 // Need to push in the order to make it easier to find the first Select 815 // later. 816 for (Instruction &I : *BB) { 817 if (auto *SI = dyn_cast<SelectInst>(&I)) { 818 Selects.push_back(SI); 819 ++Stats.NumBranches; 820 } 821 } 822 } 823 if (Selects.size() > 0) { 824 auto AddSelects = [&](RegInfo &RI) { 825 for (auto *SI : Selects) 826 if (CheckBiasedSelect(SI, RI.R, 827 TrueBiasedSelectsGlobal, 828 FalseBiasedSelectsGlobal, 829 SelectBiasMap)) 830 RI.Selects.push_back(SI); 831 }; 832 if (!Result) { 833 CHR_DEBUG(dbgs() << "Found a select-only region\n"); 834 RegInfo RI(R); 835 AddSelects(RI); 836 Result = new CHRScope(RI); 837 Scopes.insert(Result); 838 } else { 839 CHR_DEBUG(dbgs() << "Found select(s) in a region with a branch\n"); 840 AddSelects(Result->RegInfos[0]); 841 } 842 } 843 } 844 845 if (Result) { 846 checkScopeHoistable(Result); 847 } 848 return Result; 849 } 850 851 // Check that any of the branch and the selects in the region could be 852 // hoisted above the the CHR branch insert point (the most dominating of 853 // them, either the branch (at the end of the first block) or the first 854 // select in the first block). If the branch can't be hoisted, drop the 855 // selects in the first blocks. 856 // 857 // For example, for the following scope/region with selects, we want to insert 858 // the merged branch right before the first select in the first/entry block by 859 // hoisting c1, c2, c3, and c4. 860 // 861 // // Branch insert point here. 862 // a = c1 ? b : c; // Select 1 863 // d = c2 ? e : f; // Select 2 864 // if (c3) { // Branch 865 // ... 866 // c4 = foo() // A call. 867 // g = c4 ? h : i; // Select 3 868 // } 869 // 870 // But suppose we can't hoist c4 because it's dependent on the preceding 871 // call. Then, we drop Select 3. Furthermore, if we can't hoist c2, we also drop 872 // Select 2. If we can't hoist c3, we drop Selects 1 & 2. 873 void CHR::checkScopeHoistable(CHRScope *Scope) { 874 RegInfo &RI = Scope->RegInfos[0]; 875 Region *R = RI.R; 876 BasicBlock *EntryBB = R->getEntry(); 877 auto *Branch = RI.HasBranch ? 878 cast<BranchInst>(EntryBB->getTerminator()) : nullptr; 879 SmallVector<SelectInst *, 8> &Selects = RI.Selects; 880 if (RI.HasBranch || !Selects.empty()) { 881 Instruction *InsertPoint = getBranchInsertPoint(RI); 882 CHR_DEBUG(dbgs() << "InsertPoint " << *InsertPoint << "\n"); 883 // Avoid a data dependence from a select or a branch to a(nother) 884 // select. Note no instruction can't data-depend on a branch (a branch 885 // instruction doesn't produce a value). 886 DenseSet<Instruction *> Unhoistables; 887 // Initialize Unhoistables with the selects. 888 for (SelectInst *SI : Selects) { 889 Unhoistables.insert(SI); 890 } 891 // Remove Selects that can't be hoisted. 892 for (auto it = Selects.begin(); it != Selects.end(); ) { 893 SelectInst *SI = *it; 894 if (SI == InsertPoint) { 895 ++it; 896 continue; 897 } 898 bool IsHoistable = checkHoistValue(SI->getCondition(), InsertPoint, 899 DT, Unhoistables, nullptr); 900 if (!IsHoistable) { 901 CHR_DEBUG(dbgs() << "Dropping select " << *SI << "\n"); 902 it = Selects.erase(it); 903 // Since we are dropping the select here, we also drop it from 904 // Unhoistables. 905 Unhoistables.erase(SI); 906 } else 907 ++it; 908 } 909 // Update InsertPoint after potentially removing selects. 910 InsertPoint = getBranchInsertPoint(RI); 911 CHR_DEBUG(dbgs() << "InsertPoint " << *InsertPoint << "\n"); 912 if (RI.HasBranch && InsertPoint != Branch) { 913 bool IsHoistable = checkHoistValue(Branch->getCondition(), InsertPoint, 914 DT, Unhoistables, nullptr); 915 if (!IsHoistable) { 916 // If the branch isn't hoistable, drop the selects in the entry 917 // block, preferring the branch, which makes the branch the hoist 918 // point. 919 assert(InsertPoint != Branch && "Branch must not be the hoist point"); 920 CHR_DEBUG(dbgs() << "Dropping selects in entry block \n"); 921 CHR_DEBUG( 922 for (SelectInst *SI : Selects) { 923 dbgs() << "SI " << *SI << "\n"; 924 }); 925 Selects.erase(std::remove_if(Selects.begin(), Selects.end(), 926 [EntryBB](SelectInst *SI) { 927 return SI->getParent() == EntryBB; 928 }), Selects.end()); 929 Unhoistables.clear(); 930 InsertPoint = Branch; 931 } 932 } 933 CHR_DEBUG(dbgs() << "InsertPoint " << *InsertPoint << "\n"); 934 #ifndef NDEBUG 935 if (RI.HasBranch) { 936 assert(!DT.dominates(Branch, InsertPoint) && 937 "Branch can't be already above the hoist point"); 938 assert(checkHoistValue(Branch->getCondition(), InsertPoint, 939 DT, Unhoistables, nullptr) && 940 "checkHoistValue for branch"); 941 } 942 for (auto *SI : Selects) { 943 assert(!DT.dominates(SI, InsertPoint) && 944 "SI can't be already above the hoist point"); 945 assert(checkHoistValue(SI->getCondition(), InsertPoint, DT, 946 Unhoistables, nullptr) && 947 "checkHoistValue for selects"); 948 } 949 CHR_DEBUG(dbgs() << "Result\n"); 950 if (RI.HasBranch) { 951 CHR_DEBUG(dbgs() << "BI " << *Branch << "\n"); 952 } 953 for (auto *SI : Selects) { 954 CHR_DEBUG(dbgs() << "SI " << *SI << "\n"); 955 } 956 #endif 957 } 958 } 959 960 // Traverse the region tree, find all nested scopes and merge them if possible. 961 CHRScope * CHR::findScopes(Region *R, Region *NextRegion, Region *ParentRegion, 962 SmallVectorImpl<CHRScope *> &Scopes) { 963 CHR_DEBUG(dbgs() << "findScopes " << R->getNameStr() << "\n"); 964 CHRScope *Result = findScope(R); 965 // Visit subscopes. 966 CHRScope *ConsecutiveSubscope = nullptr; 967 SmallVector<CHRScope *, 8> Subscopes; 968 for (auto It = R->begin(); It != R->end(); ++It) { 969 const std::unique_ptr<Region> &SubR = *It; 970 auto Next_It = std::next(It); 971 Region *NextSubR = Next_It != R->end() ? Next_It->get() : nullptr; 972 CHR_DEBUG(dbgs() << "Looking at subregion " << SubR.get()->getNameStr() 973 << "\n"); 974 CHRScope *SubCHRScope = findScopes(SubR.get(), NextSubR, R, Scopes); 975 if (SubCHRScope) { 976 CHR_DEBUG(dbgs() << "Subregion Scope " << *SubCHRScope << "\n"); 977 } else { 978 CHR_DEBUG(dbgs() << "Subregion Scope null\n"); 979 } 980 if (SubCHRScope) { 981 if (!ConsecutiveSubscope) 982 ConsecutiveSubscope = SubCHRScope; 983 else if (!ConsecutiveSubscope->appendable(SubCHRScope)) { 984 Subscopes.push_back(ConsecutiveSubscope); 985 ConsecutiveSubscope = SubCHRScope; 986 } else 987 ConsecutiveSubscope->append(SubCHRScope); 988 } else { 989 if (ConsecutiveSubscope) { 990 Subscopes.push_back(ConsecutiveSubscope); 991 } 992 ConsecutiveSubscope = nullptr; 993 } 994 } 995 if (ConsecutiveSubscope) { 996 Subscopes.push_back(ConsecutiveSubscope); 997 } 998 for (CHRScope *Sub : Subscopes) { 999 if (Result) { 1000 // Combine it with the parent. 1001 Result->addSub(Sub); 1002 } else { 1003 // Push Subscopes as they won't be combined with the parent. 1004 Scopes.push_back(Sub); 1005 } 1006 } 1007 return Result; 1008 } 1009 1010 static DenseSet<Value *> getCHRConditionValuesForRegion(RegInfo &RI) { 1011 DenseSet<Value *> ConditionValues; 1012 if (RI.HasBranch) { 1013 auto *BI = cast<BranchInst>(RI.R->getEntry()->getTerminator()); 1014 ConditionValues.insert(BI->getCondition()); 1015 } 1016 for (SelectInst *SI : RI.Selects) { 1017 ConditionValues.insert(SI->getCondition()); 1018 } 1019 return ConditionValues; 1020 } 1021 1022 1023 // Determine whether to split a scope depending on the sets of the branch 1024 // condition values of the previous region and the current region. We split 1025 // (return true) it if 1) the condition values of the inner/lower scope can't be 1026 // hoisted up to the outer/upper scope, or 2) the two sets of the condition 1027 // values have an empty intersection (because the combined branch conditions 1028 // won't probably lead to a simpler combined condition). 1029 static bool shouldSplit(Instruction *InsertPoint, 1030 DenseSet<Value *> &PrevConditionValues, 1031 DenseSet<Value *> &ConditionValues, 1032 DominatorTree &DT, 1033 DenseSet<Instruction *> &Unhoistables) { 1034 CHR_DEBUG( 1035 dbgs() << "shouldSplit " << *InsertPoint << " PrevConditionValues "; 1036 for (Value *V : PrevConditionValues) { 1037 dbgs() << *V << ", "; 1038 } 1039 dbgs() << " ConditionValues "; 1040 for (Value *V : ConditionValues) { 1041 dbgs() << *V << ", "; 1042 } 1043 dbgs() << "\n"); 1044 assert(InsertPoint && "Null InsertPoint"); 1045 // If any of Bases isn't hoistable to the hoist point, split. 1046 for (Value *V : ConditionValues) { 1047 if (!checkHoistValue(V, InsertPoint, DT, Unhoistables, nullptr)) { 1048 CHR_DEBUG(dbgs() << "Split. checkHoistValue false " << *V << "\n"); 1049 return true; // Not hoistable, split. 1050 } 1051 } 1052 // If PrevConditionValues or ConditionValues is empty, don't split to avoid 1053 // unnecessary splits at scopes with no branch/selects. If 1054 // PrevConditionValues and ConditionValues don't intersect at all, split. 1055 if (!PrevConditionValues.empty() && !ConditionValues.empty()) { 1056 // Use std::set as DenseSet doesn't work with set_intersection. 1057 std::set<Value *> PrevBases, Bases; 1058 for (Value *V : PrevConditionValues) { 1059 std::set<Value *> BaseValues = getBaseValues(V, DT); 1060 PrevBases.insert(BaseValues.begin(), BaseValues.end()); 1061 } 1062 for (Value *V : ConditionValues) { 1063 std::set<Value *> BaseValues = getBaseValues(V, DT); 1064 Bases.insert(BaseValues.begin(), BaseValues.end()); 1065 } 1066 CHR_DEBUG( 1067 dbgs() << "PrevBases "; 1068 for (Value *V : PrevBases) { 1069 dbgs() << *V << ", "; 1070 } 1071 dbgs() << " Bases "; 1072 for (Value *V : Bases) { 1073 dbgs() << *V << ", "; 1074 } 1075 dbgs() << "\n"); 1076 std::set<Value *> Intersection; 1077 std::set_intersection(PrevBases.begin(), PrevBases.end(), 1078 Bases.begin(), Bases.end(), 1079 std::inserter(Intersection, Intersection.begin())); 1080 if (Intersection.empty()) { 1081 // Empty intersection, split. 1082 CHR_DEBUG(dbgs() << "Split. Intersection empty\n"); 1083 return true; 1084 } 1085 } 1086 CHR_DEBUG(dbgs() << "No split\n"); 1087 return false; // Don't split. 1088 } 1089 1090 static void GetSelectsInScope(CHRScope *Scope, 1091 DenseSet<Instruction *> &Output) { 1092 for (RegInfo &RI : Scope->RegInfos) { 1093 for (SelectInst *SI : RI.Selects) { 1094 Output.insert(SI); 1095 } 1096 } 1097 for (CHRScope *Sub : Scope->Subs) { 1098 GetSelectsInScope(Sub, Output); 1099 } 1100 } 1101 1102 void CHR::splitScopes(SmallVectorImpl<CHRScope *> &Input, 1103 SmallVectorImpl<CHRScope *> &Output) { 1104 for (CHRScope *Scope : Input) { 1105 assert(!Scope->BranchInsertPoint && 1106 "BranchInsertPoint must not be set"); 1107 DenseSet<Instruction *> Unhoistables; 1108 GetSelectsInScope(Scope, Unhoistables); 1109 splitScope(Scope, nullptr, nullptr, nullptr, Output, Unhoistables); 1110 } 1111 #ifndef NDEBUG 1112 for (CHRScope *Scope : Output) { 1113 assert(Scope->BranchInsertPoint && "BranchInsertPoint must be set"); 1114 } 1115 #endif 1116 } 1117 1118 SmallVector<CHRScope *, 8> CHR::splitScope( 1119 CHRScope *Scope, 1120 CHRScope *Outer, 1121 DenseSet<Value *> *OuterConditionValues, 1122 Instruction *OuterInsertPoint, 1123 SmallVectorImpl<CHRScope *> &Output, 1124 DenseSet<Instruction *> &Unhoistables) { 1125 if (Outer) { 1126 assert(OuterConditionValues && "Null OuterConditionValues"); 1127 assert(OuterInsertPoint && "Null OuterInsertPoint"); 1128 } 1129 bool PrevSplitFromOuter = true; 1130 DenseSet<Value *> PrevConditionValues; 1131 Instruction *PrevInsertPoint = nullptr; 1132 SmallVector<CHRScope *, 8> Splits; 1133 SmallVector<bool, 8> SplitsSplitFromOuter; 1134 SmallVector<DenseSet<Value *>, 8> SplitsConditionValues; 1135 SmallVector<Instruction *, 8> SplitsInsertPoints; 1136 SmallVector<RegInfo, 8> RegInfos(Scope->RegInfos); // Copy 1137 for (RegInfo &RI : RegInfos) { 1138 Instruction *InsertPoint = getBranchInsertPoint(RI); 1139 DenseSet<Value *> ConditionValues = getCHRConditionValuesForRegion(RI); 1140 CHR_DEBUG( 1141 dbgs() << "ConditionValues "; 1142 for (Value *V : ConditionValues) { 1143 dbgs() << *V << ", "; 1144 } 1145 dbgs() << "\n"); 1146 if (RI.R == RegInfos[0].R) { 1147 // First iteration. Check to see if we should split from the outer. 1148 if (Outer) { 1149 CHR_DEBUG(dbgs() << "Outer " << *Outer << "\n"); 1150 CHR_DEBUG(dbgs() << "Should split from outer at " 1151 << RI.R->getNameStr() << "\n"); 1152 if (shouldSplit(OuterInsertPoint, *OuterConditionValues, 1153 ConditionValues, DT, Unhoistables)) { 1154 PrevConditionValues = ConditionValues; 1155 PrevInsertPoint = InsertPoint; 1156 } else { 1157 // Not splitting from the outer. Use the outer bases and insert 1158 // point. Union the bases. 1159 PrevSplitFromOuter = false; 1160 PrevConditionValues = *OuterConditionValues; 1161 PrevConditionValues.insert(ConditionValues.begin(), 1162 ConditionValues.end()); 1163 PrevInsertPoint = OuterInsertPoint; 1164 } 1165 } else { 1166 CHR_DEBUG(dbgs() << "Outer null\n"); 1167 PrevConditionValues = ConditionValues; 1168 PrevInsertPoint = InsertPoint; 1169 } 1170 } else { 1171 CHR_DEBUG(dbgs() << "Should split from prev at " 1172 << RI.R->getNameStr() << "\n"); 1173 if (shouldSplit(PrevInsertPoint, PrevConditionValues, ConditionValues, 1174 DT, Unhoistables)) { 1175 CHRScope *Tail = Scope->split(RI.R); 1176 Scopes.insert(Tail); 1177 Splits.push_back(Scope); 1178 SplitsSplitFromOuter.push_back(PrevSplitFromOuter); 1179 SplitsConditionValues.push_back(PrevConditionValues); 1180 SplitsInsertPoints.push_back(PrevInsertPoint); 1181 Scope = Tail; 1182 PrevConditionValues = ConditionValues; 1183 PrevInsertPoint = InsertPoint; 1184 PrevSplitFromOuter = true; 1185 } else { 1186 // Not splitting. Union the bases. Keep the hoist point. 1187 PrevConditionValues.insert(ConditionValues.begin(), ConditionValues.end()); 1188 } 1189 } 1190 } 1191 Splits.push_back(Scope); 1192 SplitsSplitFromOuter.push_back(PrevSplitFromOuter); 1193 SplitsConditionValues.push_back(PrevConditionValues); 1194 assert(PrevInsertPoint && "Null PrevInsertPoint"); 1195 SplitsInsertPoints.push_back(PrevInsertPoint); 1196 assert(Splits.size() == SplitsConditionValues.size() && 1197 Splits.size() == SplitsSplitFromOuter.size() && 1198 Splits.size() == SplitsInsertPoints.size() && "Mismatching sizes"); 1199 for (size_t I = 0; I < Splits.size(); ++I) { 1200 CHRScope *Split = Splits[I]; 1201 DenseSet<Value *> &SplitConditionValues = SplitsConditionValues[I]; 1202 Instruction *SplitInsertPoint = SplitsInsertPoints[I]; 1203 SmallVector<CHRScope *, 8> NewSubs; 1204 DenseSet<Instruction *> SplitUnhoistables; 1205 GetSelectsInScope(Split, SplitUnhoistables); 1206 for (CHRScope *Sub : Split->Subs) { 1207 SmallVector<CHRScope *, 8> SubSplits = splitScope( 1208 Sub, Split, &SplitConditionValues, SplitInsertPoint, Output, 1209 SplitUnhoistables); 1210 NewSubs.insert(NewSubs.end(), SubSplits.begin(), SubSplits.end()); 1211 } 1212 Split->Subs = NewSubs; 1213 } 1214 SmallVector<CHRScope *, 8> Result; 1215 for (size_t I = 0; I < Splits.size(); ++I) { 1216 CHRScope *Split = Splits[I]; 1217 if (SplitsSplitFromOuter[I]) { 1218 // Split from the outer. 1219 Output.push_back(Split); 1220 Split->BranchInsertPoint = SplitsInsertPoints[I]; 1221 CHR_DEBUG(dbgs() << "BranchInsertPoint " << *SplitsInsertPoints[I] 1222 << "\n"); 1223 } else { 1224 // Connected to the outer. 1225 Result.push_back(Split); 1226 } 1227 } 1228 if (!Outer) 1229 assert(Result.empty() && 1230 "If no outer (top-level), must return no nested ones"); 1231 return Result; 1232 } 1233 1234 void CHR::classifyBiasedScopes(SmallVectorImpl<CHRScope *> &Scopes) { 1235 for (CHRScope *Scope : Scopes) { 1236 assert(Scope->TrueBiasedRegions.empty() && Scope->FalseBiasedRegions.empty() && "Empty"); 1237 classifyBiasedScopes(Scope, Scope); 1238 CHR_DEBUG( 1239 dbgs() << "classifyBiasedScopes " << *Scope << "\n"; 1240 dbgs() << "TrueBiasedRegions "; 1241 for (Region *R : Scope->TrueBiasedRegions) { 1242 dbgs() << R->getNameStr() << ", "; 1243 } 1244 dbgs() << "\n"; 1245 dbgs() << "FalseBiasedRegions "; 1246 for (Region *R : Scope->FalseBiasedRegions) { 1247 dbgs() << R->getNameStr() << ", "; 1248 } 1249 dbgs() << "\n"; 1250 dbgs() << "TrueBiasedSelects "; 1251 for (SelectInst *SI : Scope->TrueBiasedSelects) { 1252 dbgs() << *SI << ", "; 1253 } 1254 dbgs() << "\n"; 1255 dbgs() << "FalseBiasedSelects "; 1256 for (SelectInst *SI : Scope->FalseBiasedSelects) { 1257 dbgs() << *SI << ", "; 1258 } 1259 dbgs() << "\n";); 1260 } 1261 } 1262 1263 void CHR::classifyBiasedScopes(CHRScope *Scope, CHRScope *OutermostScope) { 1264 for (RegInfo &RI : Scope->RegInfos) { 1265 if (RI.HasBranch) { 1266 Region *R = RI.R; 1267 if (TrueBiasedRegionsGlobal.count(R) > 0) 1268 OutermostScope->TrueBiasedRegions.insert(R); 1269 else if (FalseBiasedRegionsGlobal.count(R) > 0) 1270 OutermostScope->FalseBiasedRegions.insert(R); 1271 else 1272 llvm_unreachable("Must be biased"); 1273 } 1274 for (SelectInst *SI : RI.Selects) { 1275 if (TrueBiasedSelectsGlobal.count(SI) > 0) 1276 OutermostScope->TrueBiasedSelects.insert(SI); 1277 else if (FalseBiasedSelectsGlobal.count(SI) > 0) 1278 OutermostScope->FalseBiasedSelects.insert(SI); 1279 else 1280 llvm_unreachable("Must be biased"); 1281 } 1282 } 1283 for (CHRScope *Sub : Scope->Subs) { 1284 classifyBiasedScopes(Sub, OutermostScope); 1285 } 1286 } 1287 1288 static bool hasAtLeastTwoBiasedBranches(CHRScope *Scope) { 1289 unsigned NumBiased = Scope->TrueBiasedRegions.size() + 1290 Scope->FalseBiasedRegions.size() + 1291 Scope->TrueBiasedSelects.size() + 1292 Scope->FalseBiasedSelects.size(); 1293 return NumBiased >= CHRMergeThreshold; 1294 } 1295 1296 void CHR::filterScopes(SmallVectorImpl<CHRScope *> &Input, 1297 SmallVectorImpl<CHRScope *> &Output) { 1298 for (CHRScope *Scope : Input) { 1299 // Filter out the ones with only one region and no subs. 1300 if (!hasAtLeastTwoBiasedBranches(Scope)) { 1301 CHR_DEBUG(dbgs() << "Filtered out by biased branches truthy-regions " 1302 << Scope->TrueBiasedRegions.size() 1303 << " falsy-regions " << Scope->FalseBiasedRegions.size() 1304 << " true-selects " << Scope->TrueBiasedSelects.size() 1305 << " false-selects " << Scope->FalseBiasedSelects.size() << "\n"); 1306 continue; 1307 } 1308 Output.push_back(Scope); 1309 } 1310 } 1311 1312 void CHR::setCHRRegions(SmallVectorImpl<CHRScope *> &Input, 1313 SmallVectorImpl<CHRScope *> &Output) { 1314 for (CHRScope *Scope : Input) { 1315 assert(Scope->HoistStopMap.empty() && Scope->CHRRegions.empty() && 1316 "Empty"); 1317 setCHRRegions(Scope, Scope); 1318 Output.push_back(Scope); 1319 CHR_DEBUG( 1320 dbgs() << "setCHRRegions HoistStopMap " << *Scope << "\n"; 1321 for (auto pair : Scope->HoistStopMap) { 1322 Region *R = pair.first; 1323 dbgs() << "Region " << R->getNameStr() << "\n"; 1324 for (Instruction *I : pair.second) { 1325 dbgs() << "HoistStop " << *I << "\n"; 1326 } 1327 } 1328 dbgs() << "CHRRegions" << "\n"; 1329 for (RegInfo &RI : Scope->CHRRegions) { 1330 dbgs() << RI.R->getNameStr() << "\n"; 1331 }); 1332 } 1333 } 1334 1335 void CHR::setCHRRegions(CHRScope *Scope, CHRScope *OutermostScope) { 1336 DenseSet<Instruction *> Unhoistables; 1337 // Put the biased selects in Unhoistables because they should stay where they 1338 // are and constant-folded after CHR (in case one biased select or a branch 1339 // can depend on another biased select.) 1340 for (RegInfo &RI : Scope->RegInfos) { 1341 for (SelectInst *SI : RI.Selects) { 1342 Unhoistables.insert(SI); 1343 } 1344 } 1345 Instruction *InsertPoint = OutermostScope->BranchInsertPoint; 1346 for (RegInfo &RI : Scope->RegInfos) { 1347 Region *R = RI.R; 1348 DenseSet<Instruction *> HoistStops; 1349 bool IsHoisted = false; 1350 if (RI.HasBranch) { 1351 assert((OutermostScope->TrueBiasedRegions.count(R) > 0 || 1352 OutermostScope->FalseBiasedRegions.count(R) > 0) && 1353 "Must be truthy or falsy"); 1354 auto *BI = cast<BranchInst>(R->getEntry()->getTerminator()); 1355 // Note checkHoistValue fills in HoistStops. 1356 bool IsHoistable = checkHoistValue(BI->getCondition(), InsertPoint, DT, 1357 Unhoistables, &HoistStops); 1358 assert(IsHoistable && "Must be hoistable"); 1359 (void)(IsHoistable); // Unused in release build 1360 IsHoisted = true; 1361 } 1362 for (SelectInst *SI : RI.Selects) { 1363 assert((OutermostScope->TrueBiasedSelects.count(SI) > 0 || 1364 OutermostScope->FalseBiasedSelects.count(SI) > 0) && 1365 "Must be true or false biased"); 1366 // Note checkHoistValue fills in HoistStops. 1367 bool IsHoistable = checkHoistValue(SI->getCondition(), InsertPoint, DT, 1368 Unhoistables, &HoistStops); 1369 assert(IsHoistable && "Must be hoistable"); 1370 (void)(IsHoistable); // Unused in release build 1371 IsHoisted = true; 1372 } 1373 if (IsHoisted) { 1374 OutermostScope->CHRRegions.push_back(RI); 1375 OutermostScope->HoistStopMap[R] = HoistStops; 1376 } 1377 } 1378 for (CHRScope *Sub : Scope->Subs) 1379 setCHRRegions(Sub, OutermostScope); 1380 } 1381 1382 bool CHRScopeSorter(CHRScope *Scope1, CHRScope *Scope2) { 1383 return Scope1->RegInfos[0].R->getDepth() < Scope2->RegInfos[0].R->getDepth(); 1384 } 1385 1386 void CHR::sortScopes(SmallVectorImpl<CHRScope *> &Input, 1387 SmallVectorImpl<CHRScope *> &Output) { 1388 Output.resize(Input.size()); 1389 std::copy(Input.begin(), Input.end(), Output.begin()); 1390 std::stable_sort(Output.begin(), Output.end(), CHRScopeSorter); 1391 } 1392 1393 // Return true if V is already hoisted or was hoisted (along with its operands) 1394 // to the insert point. 1395 static void hoistValue(Value *V, Instruction *HoistPoint, Region *R, 1396 HoistStopMapTy &HoistStopMap, 1397 DenseSet<Instruction *> &HoistedSet, 1398 DenseSet<PHINode *> &TrivialPHIs) { 1399 auto IT = HoistStopMap.find(R); 1400 assert(IT != HoistStopMap.end() && "Region must be in hoist stop map"); 1401 DenseSet<Instruction *> &HoistStops = IT->second; 1402 if (auto *I = dyn_cast<Instruction>(V)) { 1403 if (I == HoistPoint) 1404 return; 1405 if (HoistStops.count(I)) 1406 return; 1407 if (auto *PN = dyn_cast<PHINode>(I)) 1408 if (TrivialPHIs.count(PN)) 1409 // The trivial phi inserted by the previous CHR scope could replace a 1410 // non-phi in HoistStops. Note that since this phi is at the exit of a 1411 // previous CHR scope, which dominates this scope, it's safe to stop 1412 // hoisting there. 1413 return; 1414 if (HoistedSet.count(I)) 1415 // Already hoisted, return. 1416 return; 1417 assert(isHoistableInstructionType(I) && "Unhoistable instruction type"); 1418 for (Value *Op : I->operands()) { 1419 hoistValue(Op, HoistPoint, R, HoistStopMap, HoistedSet, TrivialPHIs); 1420 } 1421 I->moveBefore(HoistPoint); 1422 HoistedSet.insert(I); 1423 CHR_DEBUG(dbgs() << "hoistValue " << *I << "\n"); 1424 } 1425 } 1426 1427 // Hoist the dependent condition values of the branches and the selects in the 1428 // scope to the insert point. 1429 static void hoistScopeConditions(CHRScope *Scope, Instruction *HoistPoint, 1430 DenseSet<PHINode *> &TrivialPHIs) { 1431 DenseSet<Instruction *> HoistedSet; 1432 for (const RegInfo &RI : Scope->CHRRegions) { 1433 Region *R = RI.R; 1434 bool IsTrueBiased = Scope->TrueBiasedRegions.count(R); 1435 bool IsFalseBiased = Scope->FalseBiasedRegions.count(R); 1436 if (RI.HasBranch && (IsTrueBiased || IsFalseBiased)) { 1437 auto *BI = cast<BranchInst>(R->getEntry()->getTerminator()); 1438 hoistValue(BI->getCondition(), HoistPoint, R, Scope->HoistStopMap, 1439 HoistedSet, TrivialPHIs); 1440 } 1441 for (SelectInst *SI : RI.Selects) { 1442 bool IsTrueBiased = Scope->TrueBiasedSelects.count(SI); 1443 bool IsFalseBiased = Scope->FalseBiasedSelects.count(SI); 1444 if (!(IsTrueBiased || IsFalseBiased)) 1445 continue; 1446 hoistValue(SI->getCondition(), HoistPoint, R, Scope->HoistStopMap, 1447 HoistedSet, TrivialPHIs); 1448 } 1449 } 1450 } 1451 1452 // Negate the predicate if an ICmp if it's used only by branches or selects by 1453 // swapping the operands of the branches or the selects. Returns true if success. 1454 static bool NegateICmpIfUsedByBranchOrSelectOnly(ICmpInst *ICmp, 1455 Instruction *ExcludedUser, 1456 CHRScope *Scope) { 1457 for (User *U : ICmp->users()) { 1458 if (U == ExcludedUser) 1459 continue; 1460 if (isa<BranchInst>(U) && cast<BranchInst>(U)->isConditional()) 1461 continue; 1462 if (isa<SelectInst>(U) && cast<SelectInst>(U)->getCondition() == ICmp) 1463 continue; 1464 return false; 1465 } 1466 for (User *U : ICmp->users()) { 1467 if (U == ExcludedUser) 1468 continue; 1469 if (auto *BI = dyn_cast<BranchInst>(U)) { 1470 assert(BI->isConditional() && "Must be conditional"); 1471 BI->swapSuccessors(); 1472 // Don't need to swap this in terms of 1473 // TrueBiasedRegions/FalseBiasedRegions because true-based/false-based 1474 // mean whehter the branch is likely go into the if-then rather than 1475 // successor0/successor1 and because we can tell which edge is the then or 1476 // the else one by comparing the destination to the region exit block. 1477 continue; 1478 } 1479 if (auto *SI = dyn_cast<SelectInst>(U)) { 1480 // Swap operands 1481 Value *TrueValue = SI->getTrueValue(); 1482 Value *FalseValue = SI->getFalseValue(); 1483 SI->setTrueValue(FalseValue); 1484 SI->setFalseValue(TrueValue); 1485 SI->swapProfMetadata(); 1486 if (Scope->TrueBiasedSelects.count(SI)) { 1487 assert(Scope->FalseBiasedSelects.count(SI) == 0 && 1488 "Must not be already in"); 1489 Scope->FalseBiasedSelects.insert(SI); 1490 } else if (Scope->FalseBiasedSelects.count(SI)) { 1491 assert(Scope->TrueBiasedSelects.count(SI) == 0 && 1492 "Must not be already in"); 1493 Scope->TrueBiasedSelects.insert(SI); 1494 } 1495 continue; 1496 } 1497 llvm_unreachable("Must be a branch or a select"); 1498 } 1499 ICmp->setPredicate(CmpInst::getInversePredicate(ICmp->getPredicate())); 1500 return true; 1501 } 1502 1503 // A helper for transformScopes. Insert a trivial phi at the scope exit block 1504 // for a value that's defined in the scope but used outside it (meaning it's 1505 // alive at the exit block). 1506 static void insertTrivialPHIs(CHRScope *Scope, 1507 BasicBlock *EntryBlock, BasicBlock *ExitBlock, 1508 DenseSet<PHINode *> &TrivialPHIs) { 1509 DenseSet<BasicBlock *> BlocksInScopeSet; 1510 SmallVector<BasicBlock *, 8> BlocksInScopeVec; 1511 for (RegInfo &RI : Scope->RegInfos) { 1512 for (BasicBlock *BB : RI.R->blocks()) { // This includes the blocks in the 1513 // sub-Scopes. 1514 BlocksInScopeSet.insert(BB); 1515 BlocksInScopeVec.push_back(BB); 1516 } 1517 } 1518 CHR_DEBUG( 1519 dbgs() << "Inserting redudant phis\n"; 1520 for (BasicBlock *BB : BlocksInScopeVec) { 1521 dbgs() << "BlockInScope " << BB->getName() << "\n"; 1522 }); 1523 for (BasicBlock *BB : BlocksInScopeVec) { 1524 for (Instruction &I : *BB) { 1525 SmallVector<Instruction *, 8> Users; 1526 for (User *U : I.users()) { 1527 if (auto *UI = dyn_cast<Instruction>(U)) { 1528 if (BlocksInScopeSet.count(UI->getParent()) == 0 && 1529 // Unless there's already a phi for I at the exit block. 1530 !(isa<PHINode>(UI) && UI->getParent() == ExitBlock)) { 1531 CHR_DEBUG(dbgs() << "V " << I << "\n"); 1532 CHR_DEBUG(dbgs() << "Used outside scope by user " << *UI << "\n"); 1533 Users.push_back(UI); 1534 } else if (UI->getParent() == EntryBlock && isa<PHINode>(UI)) { 1535 // There's a loop backedge from a block that's dominated by this 1536 // scope to the entry block. 1537 CHR_DEBUG(dbgs() << "V " << I << "\n"); 1538 CHR_DEBUG(dbgs() 1539 << "Used at entry block (for a back edge) by a phi user " 1540 << *UI << "\n"); 1541 Users.push_back(UI); 1542 } 1543 } 1544 } 1545 if (Users.size() > 0) { 1546 // Insert a trivial phi for I (phi [&I, P0], [&I, P1], ...) at 1547 // ExitBlock. Replace I with the new phi in UI unless UI is another 1548 // phi at ExitBlock. 1549 unsigned PredCount = std::distance(pred_begin(ExitBlock), 1550 pred_end(ExitBlock)); 1551 PHINode *PN = PHINode::Create(I.getType(), PredCount, "", 1552 &ExitBlock->front()); 1553 for (BasicBlock *Pred : predecessors(ExitBlock)) { 1554 PN->addIncoming(&I, Pred); 1555 } 1556 TrivialPHIs.insert(PN); 1557 CHR_DEBUG(dbgs() << "Insert phi " << *PN << "\n"); 1558 for (Instruction *UI : Users) { 1559 for (unsigned J = 0, NumOps = UI->getNumOperands(); J < NumOps; ++J) { 1560 if (UI->getOperand(J) == &I) { 1561 UI->setOperand(J, PN); 1562 } 1563 } 1564 CHR_DEBUG(dbgs() << "Updated user " << *UI << "\n"); 1565 } 1566 } 1567 } 1568 } 1569 } 1570 1571 // Assert that all the CHR regions of the scope have a biased branch or select. 1572 static void LLVM_ATTRIBUTE_UNUSED 1573 assertCHRRegionsHaveBiasedBranchOrSelect(CHRScope *Scope) { 1574 #ifndef NDEBUG 1575 auto HasBiasedBranchOrSelect = [](RegInfo &RI, CHRScope *Scope) { 1576 if (Scope->TrueBiasedRegions.count(RI.R) || 1577 Scope->FalseBiasedRegions.count(RI.R)) 1578 return true; 1579 for (SelectInst *SI : RI.Selects) 1580 if (Scope->TrueBiasedSelects.count(SI) || 1581 Scope->FalseBiasedSelects.count(SI)) 1582 return true; 1583 return false; 1584 }; 1585 for (RegInfo &RI : Scope->CHRRegions) { 1586 assert(HasBiasedBranchOrSelect(RI, Scope) && 1587 "Must have biased branch or select"); 1588 } 1589 #endif 1590 } 1591 1592 // Assert that all the condition values of the biased branches and selects have 1593 // been hoisted to the pre-entry block or outside of the scope. 1594 static void LLVM_ATTRIBUTE_UNUSED assertBranchOrSelectConditionHoisted( 1595 CHRScope *Scope, BasicBlock *PreEntryBlock) { 1596 CHR_DEBUG(dbgs() << "Biased regions condition values \n"); 1597 for (RegInfo &RI : Scope->CHRRegions) { 1598 Region *R = RI.R; 1599 bool IsTrueBiased = Scope->TrueBiasedRegions.count(R); 1600 bool IsFalseBiased = Scope->FalseBiasedRegions.count(R); 1601 if (RI.HasBranch && (IsTrueBiased || IsFalseBiased)) { 1602 auto *BI = cast<BranchInst>(R->getEntry()->getTerminator()); 1603 Value *V = BI->getCondition(); 1604 CHR_DEBUG(dbgs() << *V << "\n"); 1605 if (auto *I = dyn_cast<Instruction>(V)) { 1606 (void)(I); // Unused in release build. 1607 assert((I->getParent() == PreEntryBlock || 1608 !Scope->contains(I)) && 1609 "Must have been hoisted to PreEntryBlock or outside the scope"); 1610 } 1611 } 1612 for (SelectInst *SI : RI.Selects) { 1613 bool IsTrueBiased = Scope->TrueBiasedSelects.count(SI); 1614 bool IsFalseBiased = Scope->FalseBiasedSelects.count(SI); 1615 if (!(IsTrueBiased || IsFalseBiased)) 1616 continue; 1617 Value *V = SI->getCondition(); 1618 CHR_DEBUG(dbgs() << *V << "\n"); 1619 if (auto *I = dyn_cast<Instruction>(V)) { 1620 (void)(I); // Unused in release build. 1621 assert((I->getParent() == PreEntryBlock || 1622 !Scope->contains(I)) && 1623 "Must have been hoisted to PreEntryBlock or outside the scope"); 1624 } 1625 } 1626 } 1627 } 1628 1629 void CHR::transformScopes(CHRScope *Scope, DenseSet<PHINode *> &TrivialPHIs) { 1630 CHR_DEBUG(dbgs() << "transformScopes " << *Scope << "\n"); 1631 1632 assert(Scope->RegInfos.size() >= 1 && "Should have at least one Region"); 1633 Region *FirstRegion = Scope->RegInfos[0].R; 1634 BasicBlock *EntryBlock = FirstRegion->getEntry(); 1635 Region *LastRegion = Scope->RegInfos[Scope->RegInfos.size() - 1].R; 1636 BasicBlock *ExitBlock = LastRegion->getExit(); 1637 Optional<uint64_t> ProfileCount = BFI.getBlockProfileCount(EntryBlock); 1638 1639 if (ExitBlock) { 1640 // Insert a trivial phi at the exit block (where the CHR hot path and the 1641 // cold path merges) for a value that's defined in the scope but used 1642 // outside it (meaning it's alive at the exit block). We will add the 1643 // incoming values for the CHR cold paths to it below. Without this, we'd 1644 // miss updating phi's for such values unless there happens to already be a 1645 // phi for that value there. 1646 insertTrivialPHIs(Scope, EntryBlock, ExitBlock, TrivialPHIs); 1647 } 1648 1649 // Split the entry block of the first region. The new block becomes the new 1650 // entry block of the first region. The old entry block becomes the block to 1651 // insert the CHR branch into. Note DT gets updated. Since DT gets updated 1652 // through the split, we update the entry of the first region after the split, 1653 // and Region only points to the entry and the exit blocks, rather than 1654 // keeping everything in a list or set, the blocks membership and the 1655 // entry/exit blocks of the region are still valid after the split. 1656 CHR_DEBUG(dbgs() << "Splitting entry block " << EntryBlock->getName() 1657 << " at " << *Scope->BranchInsertPoint << "\n"); 1658 BasicBlock *NewEntryBlock = 1659 SplitBlock(EntryBlock, Scope->BranchInsertPoint, &DT); 1660 assert(NewEntryBlock->getSinglePredecessor() == EntryBlock && 1661 "NewEntryBlock's only pred must be EntryBlock"); 1662 FirstRegion->replaceEntryRecursive(NewEntryBlock); 1663 BasicBlock *PreEntryBlock = EntryBlock; 1664 1665 ValueToValueMapTy VMap; 1666 // Clone the blocks in the scope (excluding the PreEntryBlock) to split into a 1667 // hot path (originals) and a cold path (clones) and update the PHIs at the 1668 // exit block. 1669 cloneScopeBlocks(Scope, PreEntryBlock, ExitBlock, LastRegion, VMap); 1670 1671 // Replace the old (placeholder) branch with the new (merged) conditional 1672 // branch. 1673 BranchInst *MergedBr = createMergedBranch(PreEntryBlock, EntryBlock, 1674 NewEntryBlock, VMap); 1675 1676 #ifndef NDEBUG 1677 assertCHRRegionsHaveBiasedBranchOrSelect(Scope); 1678 #endif 1679 1680 // Hoist the conditional values of the branches/selects. 1681 hoistScopeConditions(Scope, PreEntryBlock->getTerminator(), TrivialPHIs); 1682 1683 #ifndef NDEBUG 1684 assertBranchOrSelectConditionHoisted(Scope, PreEntryBlock); 1685 #endif 1686 1687 // Create the combined branch condition and constant-fold the branches/selects 1688 // in the hot path. 1689 fixupBranchesAndSelects(Scope, PreEntryBlock, MergedBr, 1690 ProfileCount ? ProfileCount.getValue() : 0); 1691 } 1692 1693 // A helper for transformScopes. Clone the blocks in the scope (excluding the 1694 // PreEntryBlock) to split into a hot path and a cold path and update the PHIs 1695 // at the exit block. 1696 void CHR::cloneScopeBlocks(CHRScope *Scope, 1697 BasicBlock *PreEntryBlock, 1698 BasicBlock *ExitBlock, 1699 Region *LastRegion, 1700 ValueToValueMapTy &VMap) { 1701 // Clone all the blocks. The original blocks will be the hot-path 1702 // CHR-optimized code and the cloned blocks will be the original unoptimized 1703 // code. This is so that the block pointers from the 1704 // CHRScope/Region/RegionInfo can stay valid in pointing to the hot-path code 1705 // which CHR should apply to. 1706 SmallVector<BasicBlock*, 8> NewBlocks; 1707 for (RegInfo &RI : Scope->RegInfos) 1708 for (BasicBlock *BB : RI.R->blocks()) { // This includes the blocks in the 1709 // sub-Scopes. 1710 assert(BB != PreEntryBlock && "Don't copy the preetntry block"); 1711 BasicBlock *NewBB = CloneBasicBlock(BB, VMap, ".nonchr", &F); 1712 NewBlocks.push_back(NewBB); 1713 VMap[BB] = NewBB; 1714 } 1715 1716 // Place the cloned blocks right after the original blocks (right before the 1717 // exit block of.) 1718 if (ExitBlock) 1719 F.getBasicBlockList().splice(ExitBlock->getIterator(), 1720 F.getBasicBlockList(), 1721 NewBlocks[0]->getIterator(), F.end()); 1722 1723 // Update the cloned blocks/instructions to refer to themselves. 1724 for (unsigned i = 0, e = NewBlocks.size(); i != e; ++i) 1725 for (Instruction &I : *NewBlocks[i]) 1726 RemapInstruction(&I, VMap, 1727 RF_NoModuleLevelChanges | RF_IgnoreMissingLocals); 1728 1729 // Add the cloned blocks to the PHIs of the exit blocks. ExitBlock is null for 1730 // the top-level region but we don't need to add PHIs. The trivial PHIs 1731 // inserted above will be updated here. 1732 if (ExitBlock) 1733 for (PHINode &PN : ExitBlock->phis()) 1734 for (unsigned I = 0, NumOps = PN.getNumIncomingValues(); I < NumOps; 1735 ++I) { 1736 BasicBlock *Pred = PN.getIncomingBlock(I); 1737 if (LastRegion->contains(Pred)) { 1738 Value *V = PN.getIncomingValue(I); 1739 auto It = VMap.find(V); 1740 if (It != VMap.end()) V = It->second; 1741 assert(VMap.find(Pred) != VMap.end() && "Pred must have been cloned"); 1742 PN.addIncoming(V, cast<BasicBlock>(VMap[Pred])); 1743 } 1744 } 1745 } 1746 1747 // A helper for transformScope. Replace the old (placeholder) branch with the 1748 // new (merged) conditional branch. 1749 BranchInst *CHR::createMergedBranch(BasicBlock *PreEntryBlock, 1750 BasicBlock *EntryBlock, 1751 BasicBlock *NewEntryBlock, 1752 ValueToValueMapTy &VMap) { 1753 BranchInst *OldBR = cast<BranchInst>(PreEntryBlock->getTerminator()); 1754 assert(OldBR->isUnconditional() && OldBR->getSuccessor(0) == NewEntryBlock && 1755 "SplitBlock did not work correctly!"); 1756 assert(NewEntryBlock->getSinglePredecessor() == EntryBlock && 1757 "NewEntryBlock's only pred must be EntryBlock"); 1758 assert(VMap.find(NewEntryBlock) != VMap.end() && 1759 "NewEntryBlock must have been copied"); 1760 OldBR->dropAllReferences(); 1761 OldBR->eraseFromParent(); 1762 // The true predicate is a placeholder. It will be replaced later in 1763 // fixupBranchesAndSelects(). 1764 BranchInst *NewBR = BranchInst::Create(NewEntryBlock, 1765 cast<BasicBlock>(VMap[NewEntryBlock]), 1766 ConstantInt::getTrue(F.getContext())); 1767 PreEntryBlock->getInstList().push_back(NewBR); 1768 assert(NewEntryBlock->getSinglePredecessor() == EntryBlock && 1769 "NewEntryBlock's only pred must be EntryBlock"); 1770 return NewBR; 1771 } 1772 1773 // A helper for transformScopes. Create the combined branch condition and 1774 // constant-fold the branches/selects in the hot path. 1775 void CHR::fixupBranchesAndSelects(CHRScope *Scope, 1776 BasicBlock *PreEntryBlock, 1777 BranchInst *MergedBR, 1778 uint64_t ProfileCount) { 1779 Value *MergedCondition = ConstantInt::getTrue(F.getContext()); 1780 BranchProbability CHRBranchBias(1, 1); 1781 uint64_t NumCHRedBranches = 0; 1782 IRBuilder<> IRB(PreEntryBlock->getTerminator()); 1783 for (RegInfo &RI : Scope->CHRRegions) { 1784 Region *R = RI.R; 1785 if (RI.HasBranch) { 1786 fixupBranch(R, Scope, IRB, MergedCondition, CHRBranchBias); 1787 ++NumCHRedBranches; 1788 } 1789 for (SelectInst *SI : RI.Selects) { 1790 fixupSelect(SI, Scope, IRB, MergedCondition, CHRBranchBias); 1791 ++NumCHRedBranches; 1792 } 1793 } 1794 Stats.NumBranchesDelta += NumCHRedBranches - 1; 1795 Stats.WeightedNumBranchesDelta += (NumCHRedBranches - 1) * ProfileCount; 1796 MergedBR->setCondition(MergedCondition); 1797 SmallVector<uint32_t, 2> Weights; 1798 Weights.push_back(static_cast<uint32_t>(CHRBranchBias.scale(1000))); 1799 Weights.push_back(static_cast<uint32_t>(CHRBranchBias.getCompl().scale(1000))); 1800 MDBuilder MDB(F.getContext()); 1801 MergedBR->setMetadata(LLVMContext::MD_prof, MDB.createBranchWeights(Weights)); 1802 CHR_DEBUG(dbgs() << "CHR branch bias " << Weights[0] << ":" << Weights[1] 1803 << "\n"); 1804 } 1805 1806 // A helper for fixupBranchesAndSelects. Add to the combined branch condition 1807 // and constant-fold a branch in the hot path. 1808 void CHR::fixupBranch(Region *R, CHRScope *Scope, 1809 IRBuilder<> &IRB, 1810 Value *&MergedCondition, 1811 BranchProbability &CHRBranchBias) { 1812 bool IsTrueBiased = Scope->TrueBiasedRegions.count(R); 1813 assert((IsTrueBiased || Scope->FalseBiasedRegions.count(R)) && 1814 "Must be truthy or falsy"); 1815 auto *BI = cast<BranchInst>(R->getEntry()->getTerminator()); 1816 assert(BranchBiasMap.find(R) != BranchBiasMap.end() && 1817 "Must be in the bias map"); 1818 BranchProbability Bias = BranchBiasMap[R]; 1819 assert(Bias >= getCHRBiasThreshold() && "Must be highly biased"); 1820 // Take the min. 1821 if (CHRBranchBias > Bias) 1822 CHRBranchBias = Bias; 1823 BasicBlock *IfThen = BI->getSuccessor(1); 1824 BasicBlock *IfElse = BI->getSuccessor(0); 1825 BasicBlock *RegionExitBlock = R->getExit(); 1826 assert(RegionExitBlock && "Null ExitBlock"); 1827 assert((IfThen == RegionExitBlock || IfElse == RegionExitBlock) && 1828 IfThen != IfElse && "Invariant from findScopes"); 1829 if (IfThen == RegionExitBlock) { 1830 // Swap them so that IfThen means going into it and IfElse means skipping 1831 // it. 1832 std::swap(IfThen, IfElse); 1833 } 1834 CHR_DEBUG(dbgs() << "IfThen " << IfThen->getName() 1835 << " IfElse " << IfElse->getName() << "\n"); 1836 Value *Cond = BI->getCondition(); 1837 BasicBlock *HotTarget = IsTrueBiased ? IfThen : IfElse; 1838 bool ConditionTrue = HotTarget == BI->getSuccessor(0); 1839 addToMergedCondition(ConditionTrue, Cond, BI, Scope, IRB, 1840 MergedCondition); 1841 // Constant-fold the branch at ClonedEntryBlock. 1842 assert(ConditionTrue == (HotTarget == BI->getSuccessor(0)) && 1843 "The successor shouldn't change"); 1844 Value *NewCondition = ConditionTrue ? 1845 ConstantInt::getTrue(F.getContext()) : 1846 ConstantInt::getFalse(F.getContext()); 1847 BI->setCondition(NewCondition); 1848 } 1849 1850 // A helper for fixupBranchesAndSelects. Add to the combined branch condition 1851 // and constant-fold a select in the hot path. 1852 void CHR::fixupSelect(SelectInst *SI, CHRScope *Scope, 1853 IRBuilder<> &IRB, 1854 Value *&MergedCondition, 1855 BranchProbability &CHRBranchBias) { 1856 bool IsTrueBiased = Scope->TrueBiasedSelects.count(SI); 1857 assert((IsTrueBiased || 1858 Scope->FalseBiasedSelects.count(SI)) && "Must be biased"); 1859 assert(SelectBiasMap.find(SI) != SelectBiasMap.end() && 1860 "Must be in the bias map"); 1861 BranchProbability Bias = SelectBiasMap[SI]; 1862 assert(Bias >= getCHRBiasThreshold() && "Must be highly biased"); 1863 // Take the min. 1864 if (CHRBranchBias > Bias) 1865 CHRBranchBias = Bias; 1866 Value *Cond = SI->getCondition(); 1867 addToMergedCondition(IsTrueBiased, Cond, SI, Scope, IRB, 1868 MergedCondition); 1869 Value *NewCondition = IsTrueBiased ? 1870 ConstantInt::getTrue(F.getContext()) : 1871 ConstantInt::getFalse(F.getContext()); 1872 SI->setCondition(NewCondition); 1873 } 1874 1875 // A helper for fixupBranch/fixupSelect. Add a branch condition to the merged 1876 // condition. 1877 void CHR::addToMergedCondition(bool IsTrueBiased, Value *Cond, 1878 Instruction *BranchOrSelect, 1879 CHRScope *Scope, 1880 IRBuilder<> &IRB, 1881 Value *&MergedCondition) { 1882 if (IsTrueBiased) { 1883 MergedCondition = IRB.CreateAnd(MergedCondition, Cond); 1884 } else { 1885 // If Cond is an icmp and all users of V except for BranchOrSelect is a 1886 // branch, negate the icmp predicate and swap the branch targets and avoid 1887 // inserting an Xor to negate Cond. 1888 bool Done = false; 1889 if (auto *ICmp = dyn_cast<ICmpInst>(Cond)) 1890 if (NegateICmpIfUsedByBranchOrSelectOnly(ICmp, BranchOrSelect, Scope)) { 1891 MergedCondition = IRB.CreateAnd(MergedCondition, Cond); 1892 Done = true; 1893 } 1894 if (!Done) { 1895 Value *Negate = IRB.CreateXor( 1896 ConstantInt::getTrue(F.getContext()), Cond); 1897 MergedCondition = IRB.CreateAnd(MergedCondition, Negate); 1898 } 1899 } 1900 } 1901 1902 void CHR::transformScopes(SmallVectorImpl<CHRScope *> &CHRScopes) { 1903 unsigned i = 0; 1904 (void)(i); // Unused in release build. 1905 DenseSet<PHINode *> TrivialPHIs; 1906 for (CHRScope *Scope : CHRScopes) { 1907 transformScopes(Scope, TrivialPHIs); 1908 CHR_DEBUG( 1909 std::ostringstream oss; 1910 oss << " after transformScopes " << i++; 1911 dumpIR(F, oss.str().c_str(), nullptr)); 1912 } 1913 } 1914 1915 static void LLVM_ATTRIBUTE_UNUSED 1916 dumpScopes(SmallVectorImpl<CHRScope *> &Scopes, const char *Label) { 1917 dbgs() << Label << " " << Scopes.size() << "\n"; 1918 for (CHRScope *Scope : Scopes) { 1919 dbgs() << *Scope << "\n"; 1920 } 1921 } 1922 1923 bool CHR::run() { 1924 if (!shouldApply(F, PSI)) 1925 return false; 1926 1927 CHR_DEBUG(dumpIR(F, "before", nullptr)); 1928 1929 bool Changed = false; 1930 { 1931 CHR_DEBUG( 1932 dbgs() << "RegionInfo:\n"; 1933 RI.print(dbgs())); 1934 1935 // Recursively traverse the region tree and find regions that have biased 1936 // branches and/or selects and create scopes. 1937 SmallVector<CHRScope *, 8> AllScopes; 1938 findScopes(AllScopes); 1939 CHR_DEBUG(dumpScopes(AllScopes, "All scopes")); 1940 1941 // Split the scopes if 1) the conditiona values of the biased 1942 // branches/selects of the inner/lower scope can't be hoisted up to the 1943 // outermost/uppermost scope entry, or 2) the condition values of the biased 1944 // branches/selects in a scope (including subscopes) don't share at least 1945 // one common value. 1946 SmallVector<CHRScope *, 8> SplitScopes; 1947 splitScopes(AllScopes, SplitScopes); 1948 CHR_DEBUG(dumpScopes(SplitScopes, "Split scopes")); 1949 1950 // After splitting, set the biased regions and selects of a scope (a tree 1951 // root) that include those of the subscopes. 1952 classifyBiasedScopes(SplitScopes); 1953 CHR_DEBUG(dbgs() << "Set per-scope bias " << SplitScopes.size() << "\n"); 1954 1955 // Filter out the scopes that has only one biased region or select (CHR 1956 // isn't useful in such a case). 1957 SmallVector<CHRScope *, 8> FilteredScopes; 1958 filterScopes(SplitScopes, FilteredScopes); 1959 CHR_DEBUG(dumpScopes(FilteredScopes, "Filtered scopes")); 1960 1961 // Set the regions to be CHR'ed and their hoist stops for each scope. 1962 SmallVector<CHRScope *, 8> SetScopes; 1963 setCHRRegions(FilteredScopes, SetScopes); 1964 CHR_DEBUG(dumpScopes(SetScopes, "Set CHR regions")); 1965 1966 // Sort CHRScopes by the depth so that outer CHRScopes comes before inner 1967 // ones. We need to apply CHR from outer to inner so that we apply CHR only 1968 // to the hot path, rather than both hot and cold paths. 1969 SmallVector<CHRScope *, 8> SortedScopes; 1970 sortScopes(SetScopes, SortedScopes); 1971 CHR_DEBUG(dumpScopes(SortedScopes, "Sorted scopes")); 1972 1973 CHR_DEBUG( 1974 dbgs() << "RegionInfo:\n"; 1975 RI.print(dbgs())); 1976 1977 // Apply the CHR transformation. 1978 if (!SortedScopes.empty()) { 1979 transformScopes(SortedScopes); 1980 Changed = true; 1981 } 1982 } 1983 1984 if (Changed) 1985 CHR_DEBUG(dumpIR(F, "after", &Stats)); 1986 1987 return Changed; 1988 } 1989 1990 bool ControlHeightReductionLegacyPass::runOnFunction(Function &F) { 1991 BlockFrequencyInfo &BFI = 1992 getAnalysis<BlockFrequencyInfoWrapperPass>().getBFI(); 1993 DominatorTree &DT = getAnalysis<DominatorTreeWrapperPass>().getDomTree(); 1994 ProfileSummaryInfo &PSI = 1995 *getAnalysis<ProfileSummaryInfoWrapperPass>().getPSI(); 1996 RegionInfo &RI = getAnalysis<RegionInfoPass>().getRegionInfo(); 1997 return CHR(F, BFI, DT, PSI, RI).run(); 1998 } 1999 2000 namespace llvm { 2001 2002 ControlHeightReductionPass::ControlHeightReductionPass() { 2003 ParseCHRFilterFiles(); 2004 } 2005 2006 PreservedAnalyses ControlHeightReductionPass::run( 2007 Function &F, 2008 FunctionAnalysisManager &FAM) { 2009 auto &BFI = FAM.getResult<BlockFrequencyAnalysis>(F); 2010 auto &DT = FAM.getResult<DominatorTreeAnalysis>(F); 2011 auto &MAMProxy = FAM.getResult<ModuleAnalysisManagerFunctionProxy>(F); 2012 auto &MAM = MAMProxy.getManager(); 2013 auto &PSI = *MAM.getCachedResult<ProfileSummaryAnalysis>(*F.getParent()); 2014 auto &RI = FAM.getResult<RegionInfoAnalysis>(F); 2015 bool Changed = CHR(F, BFI, DT, PSI, RI).run(); 2016 if (!Changed) 2017 return PreservedAnalyses::all(); 2018 auto PA = PreservedAnalyses(); 2019 PA.preserve<GlobalsAA>(); 2020 return PA; 2021 } 2022 2023 } // namespace llvm 2024