1 //===-- ControlHeightReduction.cpp - Control Height Reduction -------------===// 2 // 3 // The LLVM Compiler Infrastructure 4 // 5 // This file is distributed under the University of Illinois Open Source 6 // License. See LICENSE.TXT for details. 7 // 8 //===----------------------------------------------------------------------===// 9 // 10 // This pass merges conditional blocks of code and reduces the number of 11 // conditional branches in the hot paths based on profiles. 12 // 13 //===----------------------------------------------------------------------===// 14 15 #include "llvm/Transforms/Instrumentation/ControlHeightReduction.h" 16 #include "llvm/Transforms/Utils.h" 17 #include "llvm/Transforms/Utils/BasicBlockUtils.h" 18 #include "llvm/Transforms/Utils/Cloning.h" 19 #include "llvm/Transforms/Utils/ValueMapper.h" 20 #include "llvm/ADT/DenseMap.h" 21 #include "llvm/ADT/DenseSet.h" 22 #include "llvm/ADT/SmallVector.h" 23 #include "llvm/ADT/StringSet.h" 24 #include "llvm/Analysis/BlockFrequencyInfo.h" 25 #include "llvm/Analysis/ProfileSummaryInfo.h" 26 #include "llvm/Analysis/RegionInfo.h" 27 #include "llvm/Analysis/RegionIterator.h" 28 #include "llvm/Analysis/ValueTracking.h" 29 #include "llvm/IR/CFG.h" 30 #include "llvm/IR/Dominators.h" 31 #include "llvm/IR/IRBuilder.h" 32 #include "llvm/IR/MDBuilder.h" 33 #include "llvm/Support/BranchProbability.h" 34 #include "llvm/Support/MemoryBuffer.h" 35 #include "llvm/Transforms/Scalar.h" 36 37 #if !defined(_MSC_VER) 38 #include <cxxabi.h> 39 #endif 40 #include <set> 41 #include <sstream> 42 43 using namespace llvm; 44 45 #define DEBUG_TYPE "chr" 46 47 #define CHR_DEBUG(X) LLVM_DEBUG(X) 48 49 static cl::opt<bool> ForceCHR("force-chr", cl::init(false), cl::Hidden, 50 cl::desc("Apply CHR for all functions")); 51 52 static cl::opt<double> CHRBiasThreshold( 53 "chr-bias-threshold", cl::init(0.99), cl::Hidden, 54 cl::desc("CHR considers a branch bias greater than this ratio as biased")); 55 56 static cl::opt<unsigned> CHRMergeThreshold( 57 "chr-merge-threshold", cl::init(2), cl::Hidden, 58 cl::desc("CHR merges a group of N branches/selects where N >= this value")); 59 60 static cl::opt<std::string> CHRModuleList( 61 "chr-module-list", cl::init(""), cl::Hidden, 62 cl::desc("Specify file to retrieve the list of modules to apply CHR to")); 63 64 static cl::opt<std::string> CHRFunctionList( 65 "chr-function-list", cl::init(""), cl::Hidden, 66 cl::desc("Specify file to retrieve the list of functions to apply CHR to")); 67 68 static StringSet<> CHRModules; 69 static StringSet<> CHRFunctions; 70 71 static void ParseCHRFilterFiles() { 72 if (!CHRModuleList.empty()) { 73 auto FileOrErr = MemoryBuffer::getFile(CHRModuleList); 74 if (!FileOrErr) { 75 errs() << "Error: Couldn't read the chr-module-list file " << CHRModuleList << "\n"; 76 std::exit(1); 77 } 78 StringRef Buf = FileOrErr->get()->getBuffer(); 79 SmallVector<StringRef, 0> Lines; 80 Buf.split(Lines, '\n'); 81 for (StringRef Line : Lines) { 82 Line = Line.trim(); 83 if (!Line.empty()) 84 CHRModules.insert(Line); 85 } 86 } 87 if (!CHRFunctionList.empty()) { 88 auto FileOrErr = MemoryBuffer::getFile(CHRFunctionList); 89 if (!FileOrErr) { 90 errs() << "Error: Couldn't read the chr-function-list file " << CHRFunctionList << "\n"; 91 std::exit(1); 92 } 93 StringRef Buf = FileOrErr->get()->getBuffer(); 94 SmallVector<StringRef, 0> Lines; 95 Buf.split(Lines, '\n'); 96 for (StringRef Line : Lines) { 97 Line = Line.trim(); 98 if (!Line.empty()) 99 CHRFunctions.insert(Line); 100 } 101 } 102 } 103 104 namespace { 105 class ControlHeightReductionLegacyPass : public FunctionPass { 106 public: 107 static char ID; 108 109 ControlHeightReductionLegacyPass() : FunctionPass(ID) { 110 initializeControlHeightReductionLegacyPassPass( 111 *PassRegistry::getPassRegistry()); 112 ParseCHRFilterFiles(); 113 } 114 115 bool runOnFunction(Function &F) override; 116 void getAnalysisUsage(AnalysisUsage &AU) const override { 117 AU.addRequired<BlockFrequencyInfoWrapperPass>(); 118 AU.addRequired<DominatorTreeWrapperPass>(); 119 AU.addRequired<ProfileSummaryInfoWrapperPass>(); 120 AU.addRequired<RegionInfoPass>(); 121 AU.addPreserved<GlobalsAAWrapperPass>(); 122 } 123 }; 124 } // end anonymous namespace 125 126 char ControlHeightReductionLegacyPass::ID = 0; 127 128 INITIALIZE_PASS_BEGIN(ControlHeightReductionLegacyPass, 129 "chr", 130 "Reduce control height in the hot paths", 131 false, false) 132 INITIALIZE_PASS_DEPENDENCY(BlockFrequencyInfoWrapperPass) 133 INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass) 134 INITIALIZE_PASS_DEPENDENCY(ProfileSummaryInfoWrapperPass) 135 INITIALIZE_PASS_DEPENDENCY(RegionInfoPass) 136 INITIALIZE_PASS_END(ControlHeightReductionLegacyPass, 137 "chr", 138 "Reduce control height in the hot paths", 139 false, false) 140 141 FunctionPass *llvm::createControlHeightReductionLegacyPass() { 142 return new ControlHeightReductionLegacyPass(); 143 } 144 145 namespace { 146 147 struct CHRStats { 148 CHRStats() : NumBranches(0), NumBranchesDelta(0), 149 WeightedNumBranchesDelta(0) {} 150 void print(raw_ostream &OS) const { 151 OS << "CHRStats: NumBranches " << NumBranches 152 << " NumBranchesDelta " << NumBranchesDelta 153 << " WeightedNumBranchesDelta " << WeightedNumBranchesDelta; 154 } 155 uint64_t NumBranches; // The original number of conditional branches / 156 // selects 157 uint64_t NumBranchesDelta; // The decrease of the number of conditional 158 // branches / selects in the hot paths due to CHR. 159 uint64_t WeightedNumBranchesDelta; // NumBranchesDelta weighted by the profile 160 // count at the scope entry. 161 }; 162 163 inline raw_ostream &operator<<(raw_ostream &OS, const CHRStats &Stats) { 164 Stats.print(OS); 165 return OS; 166 } 167 168 // RegInfo - some properties of a Region. 169 struct RegInfo { 170 RegInfo() : R(nullptr), HasBranch(false) {} 171 RegInfo(Region *RegionIn) : R(RegionIn), HasBranch(false) {} 172 Region *R; 173 bool HasBranch; 174 SmallVector<SelectInst *, 8> Selects; 175 }; 176 177 typedef DenseMap<Region *, DenseSet<Instruction *>> HoistStopMapTy; 178 179 // CHRScope - a sequence of regions to CHR together. It corresponds to a 180 // sequence of conditional blocks. It can have subscopes which correspond to 181 // nested conditional blocks. Nested CHRScopes form a tree. 182 class CHRScope { 183 public: 184 CHRScope(RegInfo RI) : BranchInsertPoint(nullptr) { 185 assert(RI.R && "Null RegionIn"); 186 RegInfos.push_back(RI); 187 } 188 189 Region *getParentRegion() { 190 assert(RegInfos.size() > 0 && "Empty CHRScope"); 191 Region *Parent = RegInfos[0].R->getParent(); 192 assert(Parent && "Unexpected to call this on the top-level region"); 193 return Parent; 194 } 195 196 BasicBlock *getEntryBlock() { 197 assert(RegInfos.size() > 0 && "Empty CHRScope"); 198 return RegInfos.front().R->getEntry(); 199 } 200 201 BasicBlock *getExitBlock() { 202 assert(RegInfos.size() > 0 && "Empty CHRScope"); 203 return RegInfos.back().R->getExit(); 204 } 205 206 bool appendable(CHRScope *Next) { 207 // The next scope is appendable only if this scope is directly connected to 208 // it (which implies it post-dominates this scope) and this scope dominates 209 // it (no edge to the next scope outside this scope). 210 BasicBlock *NextEntry = Next->getEntryBlock(); 211 if (getExitBlock() != NextEntry) 212 // Not directly connected. 213 return false; 214 Region *LastRegion = RegInfos.back().R; 215 for (BasicBlock *Pred : predecessors(NextEntry)) 216 if (!LastRegion->contains(Pred)) 217 // There's an edge going into the entry of the next scope from outside 218 // of this scope. 219 return false; 220 return true; 221 } 222 223 void append(CHRScope *Next) { 224 assert(RegInfos.size() > 0 && "Empty CHRScope"); 225 assert(Next->RegInfos.size() > 0 && "Empty CHRScope"); 226 assert(getParentRegion() == Next->getParentRegion() && 227 "Must be siblings"); 228 assert(getExitBlock() == Next->getEntryBlock() && 229 "Must be adjacent"); 230 for (RegInfo &RI : Next->RegInfos) 231 RegInfos.push_back(RI); 232 for (CHRScope *Sub : Next->Subs) 233 Subs.push_back(Sub); 234 } 235 236 void addSub(CHRScope *SubIn) { 237 #ifndef NDEBUG 238 bool is_child = false; 239 for (RegInfo &RI : RegInfos) 240 if (RI.R == SubIn->getParentRegion()) { 241 is_child = true; 242 break; 243 } 244 assert(is_child && "Must be a child"); 245 #endif 246 Subs.push_back(SubIn); 247 } 248 249 // Split this scope at the boundary region into two, which will belong to the 250 // tail and returns the tail. 251 CHRScope *split(Region *Boundary) { 252 assert(Boundary && "Boundary null"); 253 assert(RegInfos.begin()->R != Boundary && 254 "Can't be split at beginning"); 255 auto BoundaryIt = std::find_if(RegInfos.begin(), RegInfos.end(), 256 [&Boundary](const RegInfo& RI) { 257 return Boundary == RI.R; 258 }); 259 if (BoundaryIt == RegInfos.end()) 260 return nullptr; 261 SmallVector<RegInfo, 8> TailRegInfos; 262 SmallVector<CHRScope *, 8> TailSubs; 263 TailRegInfos.insert(TailRegInfos.begin(), BoundaryIt, RegInfos.end()); 264 RegInfos.resize(BoundaryIt - RegInfos.begin()); 265 DenseSet<Region *> TailRegionSet; 266 for (RegInfo &RI : TailRegInfos) 267 TailRegionSet.insert(RI.R); 268 for (auto It = Subs.begin(); It != Subs.end(); ) { 269 CHRScope *Sub = *It; 270 assert(Sub && "null Sub"); 271 Region *Parent = Sub->getParentRegion(); 272 if (TailRegionSet.count(Parent)) { 273 TailSubs.push_back(Sub); 274 It = Subs.erase(It); 275 } else { 276 assert(std::find_if(RegInfos.begin(), RegInfos.end(), 277 [&Parent](const RegInfo& RI) { 278 return Parent == RI.R; 279 }) != RegInfos.end() && 280 "Must be in head"); 281 ++It; 282 } 283 } 284 assert(HoistStopMap.empty() && "MapHoistStops must be empty"); 285 return new CHRScope(TailRegInfos, TailSubs); 286 } 287 288 bool contains(Instruction *I) const { 289 BasicBlock *Parent = I->getParent(); 290 for (const RegInfo &RI : RegInfos) 291 if (RI.R->contains(Parent)) 292 return true; 293 return false; 294 } 295 296 void print(raw_ostream &OS) const; 297 298 SmallVector<RegInfo, 8> RegInfos; // Regions that belong to this scope 299 SmallVector<CHRScope *, 8> Subs; // Subscopes. 300 301 // The instruction at which to insert the CHR conditional branch (and hoist 302 // the dependent condition values). 303 Instruction *BranchInsertPoint; 304 305 // True-biased and false-biased regions (conditional blocks), 306 // respectively. Used only for the outermost scope and includes regions in 307 // subscopes. The rest are unbiased. 308 DenseSet<Region *> TrueBiasedRegions; 309 DenseSet<Region *> FalseBiasedRegions; 310 // Among the biased regions, the regions that get CHRed. 311 SmallVector<RegInfo, 8> CHRRegions; 312 313 // True-biased and false-biased selects, respectively. Used only for the 314 // outermost scope and includes ones in subscopes. 315 DenseSet<SelectInst *> TrueBiasedSelects; 316 DenseSet<SelectInst *> FalseBiasedSelects; 317 318 // Map from one of the above regions to the instructions to stop 319 // hoisting instructions at through use-def chains. 320 HoistStopMapTy HoistStopMap; 321 322 private: 323 CHRScope(SmallVector<RegInfo, 8> &RegInfosIn, 324 SmallVector<CHRScope *, 8> &SubsIn) 325 : RegInfos(RegInfosIn), Subs(SubsIn), BranchInsertPoint(nullptr) {} 326 }; 327 328 inline raw_ostream &operator<<(raw_ostream &OS, const CHRScope &Scope) { 329 Scope.print(OS); 330 return OS; 331 } 332 333 class CHR { 334 public: 335 CHR(Function &Fin, BlockFrequencyInfo &BFIin, DominatorTree &DTin, 336 ProfileSummaryInfo &PSIin, RegionInfo &RIin) 337 : F(Fin), BFI(BFIin), DT(DTin), PSI(PSIin), RI(RIin) {} 338 339 ~CHR() { 340 for (CHRScope *Scope : Scopes) { 341 delete Scope; 342 } 343 } 344 345 bool run(); 346 347 private: 348 // See the comments in CHR::run() for the high level flow of the algorithm and 349 // what the following functions do. 350 351 void findScopes(SmallVectorImpl<CHRScope *> &Output) { 352 Region *R = RI.getTopLevelRegion(); 353 CHRScope *Scope = findScopes(R, nullptr, nullptr, Output); 354 if (Scope) { 355 Output.push_back(Scope); 356 } 357 } 358 CHRScope *findScopes(Region *R, Region *NextRegion, Region *ParentRegion, 359 SmallVectorImpl<CHRScope *> &Scopes); 360 CHRScope *findScope(Region *R); 361 void checkScopeHoistable(CHRScope *Scope); 362 363 void splitScopes(SmallVectorImpl<CHRScope *> &Input, 364 SmallVectorImpl<CHRScope *> &Output); 365 SmallVector<CHRScope *, 8> splitScope(CHRScope *Scope, 366 CHRScope *Outer, 367 DenseSet<Value *> *OuterConditionValues, 368 Instruction *OuterInsertPoint, 369 SmallVectorImpl<CHRScope *> &Output, 370 DenseSet<Instruction *> &Unhoistables); 371 372 void classifyBiasedScopes(SmallVectorImpl<CHRScope *> &Scopes); 373 void classifyBiasedScopes(CHRScope *Scope, CHRScope *OutermostScope); 374 375 void filterScopes(SmallVectorImpl<CHRScope *> &Input, 376 SmallVectorImpl<CHRScope *> &Output); 377 378 void setCHRRegions(SmallVectorImpl<CHRScope *> &Input, 379 SmallVectorImpl<CHRScope *> &Output); 380 void setCHRRegions(CHRScope *Scope, CHRScope *OutermostScope); 381 382 void sortScopes(SmallVectorImpl<CHRScope *> &Input, 383 SmallVectorImpl<CHRScope *> &Output); 384 385 void transformScopes(SmallVectorImpl<CHRScope *> &CHRScopes); 386 void transformScopes(CHRScope *Scope, DenseSet<PHINode *> &TrivialPHIs); 387 void cloneScopeBlocks(CHRScope *Scope, 388 BasicBlock *PreEntryBlock, 389 BasicBlock *ExitBlock, 390 Region *LastRegion, 391 ValueToValueMapTy &VMap); 392 BranchInst *createMergedBranch(BasicBlock *PreEntryBlock, 393 BasicBlock *EntryBlock, 394 BasicBlock *NewEntryBlock, 395 ValueToValueMapTy &VMap); 396 void fixupBranchesAndSelects(CHRScope *Scope, 397 BasicBlock *PreEntryBlock, 398 BranchInst *MergedBR, 399 uint64_t ProfileCount); 400 void fixupBranch(Region *R, 401 CHRScope *Scope, 402 IRBuilder<> &IRB, 403 Value *&MergedCondition, BranchProbability &CHRBranchBias); 404 void fixupSelect(SelectInst* SI, 405 CHRScope *Scope, 406 IRBuilder<> &IRB, 407 Value *&MergedCondition, BranchProbability &CHRBranchBias); 408 void addToMergedCondition(bool IsTrueBiased, Value *Cond, 409 Instruction *BranchOrSelect, 410 CHRScope *Scope, 411 IRBuilder<> &IRB, 412 Value *&MergedCondition); 413 414 Function &F; 415 BlockFrequencyInfo &BFI; 416 DominatorTree &DT; 417 ProfileSummaryInfo &PSI; 418 RegionInfo &RI; 419 CHRStats Stats; 420 421 // All the true-biased regions in the function 422 DenseSet<Region *> TrueBiasedRegionsGlobal; 423 // All the false-biased regions in the function 424 DenseSet<Region *> FalseBiasedRegionsGlobal; 425 // All the true-biased selects in the function 426 DenseSet<SelectInst *> TrueBiasedSelectsGlobal; 427 // All the false-biased selects in the function 428 DenseSet<SelectInst *> FalseBiasedSelectsGlobal; 429 // A map from biased regions to their branch bias 430 DenseMap<Region *, BranchProbability> BranchBiasMap; 431 // A map from biased selects to their branch bias 432 DenseMap<SelectInst *, BranchProbability> SelectBiasMap; 433 // All the scopes. 434 DenseSet<CHRScope *> Scopes; 435 }; 436 437 } // end anonymous namespace 438 439 static bool shouldApply(Function &F, ProfileSummaryInfo& PSI) { 440 if (ForceCHR) 441 return true; 442 443 if (!CHRModuleList.empty() || !CHRFunctionList.empty()) { 444 if (CHRModules.count(F.getParent()->getName())) 445 return true; 446 StringRef Name = F.getName(); 447 if (CHRFunctions.count(Name)) 448 return true; 449 const char* DemangledName = nullptr; 450 int Status = -1; 451 #if !defined(_MSC_VER) 452 DemangledName = abi::__cxa_demangle(Name.str().c_str(), 453 nullptr, nullptr, &Status); 454 #endif 455 return DemangledName && CHRFunctions.count(DemangledName); 456 } 457 458 assert(PSI.hasProfileSummary() && "Empty PSI?"); 459 return PSI.isFunctionEntryHot(&F); 460 } 461 462 static void dumpIR(Function &F, const char *Label, CHRStats *Stats) { 463 std::string Name = F.getName().str(); 464 const char *DemangledName = nullptr; 465 int Status = -1; 466 #if !defined(_MSC_VER) 467 DemangledName = abi::__cxa_demangle(Name.c_str(), 468 nullptr, nullptr, &Status); 469 #endif 470 if (DemangledName == nullptr) { 471 DemangledName = "<NOT-MANGLED>"; 472 } 473 std::string ModuleName = F.getParent()->getName().str(); 474 CHR_DEBUG(dbgs() << "CHR IR dump " << Label << " " << ModuleName << " " 475 << Name); 476 if (Stats) 477 CHR_DEBUG(dbgs() << " " << *Stats); 478 CHR_DEBUG(dbgs() << "\n"); 479 CHR_DEBUG(F.dump()); 480 } 481 482 483 void CHRScope::print(raw_ostream &OS) const { 484 assert(RegInfos.size() > 0 && "Empty CHRScope"); 485 OS << "CHRScope["; 486 OS << RegInfos.size() << ", Regions["; 487 for (const RegInfo &RI : RegInfos) { 488 OS << RI.R->getNameStr(); 489 if (RI.HasBranch) 490 OS << " B"; 491 if (RI.Selects.size() > 0) 492 OS << " S" << RI.Selects.size(); 493 OS << ", "; 494 } 495 if (RegInfos[0].R->getParent()) { 496 OS << "], Parent " << RegInfos[0].R->getParent()->getNameStr(); 497 } else { 498 // top level region 499 OS << "]"; 500 } 501 OS << ", Subs["; 502 for (CHRScope *Sub : Subs) { 503 OS << *Sub << ", "; 504 } 505 OS << "]]"; 506 } 507 508 // Return true if the given instruction type can be hoisted by CHR. 509 static bool isHoistableInstructionType(Instruction *I) { 510 return isa<BinaryOperator>(I) || isa<CastInst>(I) || isa<SelectInst>(I) || 511 isa<GetElementPtrInst>(I) || isa<CmpInst>(I) || 512 isa<InsertElementInst>(I) || isa<ExtractElementInst>(I) || 513 isa<ShuffleVectorInst>(I) || isa<ExtractValueInst>(I) || 514 isa<InsertValueInst>(I); 515 } 516 517 // Return true if the given instruction can be hoisted by CHR. 518 static bool isHoistable(Instruction *I, DominatorTree &DT) { 519 if (!isHoistableInstructionType(I)) 520 return false; 521 return isSafeToSpeculativelyExecute(I, nullptr, &DT); 522 } 523 524 // Recursively traverse the use-def chains of the given value and return a set 525 // of the unhoistable base values defined within the scope (excluding the 526 // first-region entry block) or the (hoistable or unhoistable) base values that 527 // are defined outside (including the first-region entry block) of the 528 // scope. The returned set doesn't include constants. 529 static std::set<Value *> getBaseValues(Value *V, 530 DominatorTree &DT) { 531 std::set<Value *> Result; 532 if (auto *I = dyn_cast<Instruction>(V)) { 533 // We don't stop at a block that's not in the Scope because we would miss some 534 // instructions that are based on the same base values if we stop there. 535 if (!isHoistable(I, DT)) { 536 Result.insert(I); 537 return Result; 538 } 539 // I is hoistable above the Scope. 540 for (Value *Op : I->operands()) { 541 std::set<Value *> OpResult = getBaseValues(Op, DT); 542 Result.insert(OpResult.begin(), OpResult.end()); 543 } 544 return Result; 545 } 546 if (isa<Argument>(V)) { 547 Result.insert(V); 548 return Result; 549 } 550 // We don't include others like constants because those won't lead to any 551 // chance of folding of conditions (eg two bit checks merged into one check) 552 // after CHR. 553 return Result; // empty 554 } 555 556 // Return true if V is already hoisted or can be hoisted (along with its 557 // operands) above the insert point. When it returns true and HoistStops is 558 // non-null, the instructions to stop hoisting at through the use-def chains are 559 // inserted into HoistStops. 560 static bool 561 checkHoistValue(Value *V, Instruction *InsertPoint, DominatorTree &DT, 562 DenseSet<Instruction *> &Unhoistables, 563 DenseSet<Instruction *> *HoistStops) { 564 assert(InsertPoint && "Null InsertPoint"); 565 if (auto *I = dyn_cast<Instruction>(V)) { 566 assert(DT.getNode(I->getParent()) && "DT must contain I's parent block"); 567 assert(DT.getNode(InsertPoint->getParent()) && "DT must contain Destination"); 568 if (Unhoistables.count(I)) { 569 // Don't hoist if they are not to be hoisted. 570 return false; 571 } 572 if (DT.dominates(I, InsertPoint)) { 573 // We are already above the insert point. Stop here. 574 if (HoistStops) 575 HoistStops->insert(I); 576 return true; 577 } 578 // We aren't not above the insert point, check if we can hoist it above the 579 // insert point. 580 if (isHoistable(I, DT)) { 581 // Check operands first. 582 DenseSet<Instruction *> OpsHoistStops; 583 bool AllOpsHoisted = true; 584 for (Value *Op : I->operands()) { 585 if (!checkHoistValue(Op, InsertPoint, DT, Unhoistables, &OpsHoistStops)) { 586 AllOpsHoisted = false; 587 break; 588 } 589 } 590 if (AllOpsHoisted) { 591 CHR_DEBUG(dbgs() << "checkHoistValue " << *I << "\n"); 592 if (HoistStops) 593 HoistStops->insert(OpsHoistStops.begin(), OpsHoistStops.end()); 594 return true; 595 } 596 } 597 return false; 598 } 599 // Non-instructions are considered hoistable. 600 return true; 601 } 602 603 // Returns true and sets the true probability and false probability of an 604 // MD_prof metadata if it's well-formed. 605 static bool CheckMDProf(MDNode *MD, BranchProbability &TrueProb, 606 BranchProbability &FalseProb) { 607 if (!MD) return false; 608 MDString *MDName = cast<MDString>(MD->getOperand(0)); 609 if (MDName->getString() != "branch_weights" || 610 MD->getNumOperands() != 3) 611 return false; 612 ConstantInt *TrueWeight = mdconst::extract<ConstantInt>(MD->getOperand(1)); 613 ConstantInt *FalseWeight = mdconst::extract<ConstantInt>(MD->getOperand(2)); 614 if (!TrueWeight || !FalseWeight) 615 return false; 616 APInt TrueWt = TrueWeight->getValue(); 617 APInt FalseWt = FalseWeight->getValue(); 618 APInt SumWt = TrueWt + FalseWt; 619 TrueProb = BranchProbability::getBranchProbability(TrueWt.getZExtValue(), 620 SumWt.getZExtValue()); 621 FalseProb = BranchProbability::getBranchProbability(FalseWt.getZExtValue(), 622 SumWt.getZExtValue()); 623 return true; 624 } 625 626 static BranchProbability getCHRBiasThreshold() { 627 return BranchProbability::getBranchProbability( 628 static_cast<uint64_t>(CHRBiasThreshold * 1000000), 1000000); 629 } 630 631 // A helper for CheckBiasedBranch and CheckBiasedSelect. If TrueProb >= 632 // CHRBiasThreshold, put Key into TrueSet and return true. If FalseProb >= 633 // CHRBiasThreshold, put Key into FalseSet and return true. Otherwise, return 634 // false. 635 template<typename K, typename S, typename M> 636 bool CheckBias(K *Key, BranchProbability TrueProb, BranchProbability FalseProb, 637 S &TrueSet, S &FalseSet, M &BiasMap) { 638 BranchProbability Threshold = getCHRBiasThreshold(); 639 if (TrueProb >= Threshold) { 640 TrueSet.insert(Key); 641 BiasMap[Key] = TrueProb; 642 return true; 643 } else if (FalseProb >= Threshold) { 644 FalseSet.insert(Key); 645 BiasMap[Key] = FalseProb; 646 return true; 647 } 648 return false; 649 } 650 651 // Returns true and insert a region into the right biased set and the map if the 652 // branch of the region is biased. 653 static bool CheckBiasedBranch(BranchInst *BI, Region *R, 654 DenseSet<Region *> &TrueBiasedRegionsGlobal, 655 DenseSet<Region *> &FalseBiasedRegionsGlobal, 656 DenseMap<Region *, BranchProbability> &BranchBiasMap) { 657 if (!BI->isConditional()) 658 return false; 659 BranchProbability ThenProb, ElseProb; 660 if (!CheckMDProf(BI->getMetadata(LLVMContext::MD_prof), 661 ThenProb, ElseProb)) 662 return false; 663 BasicBlock *IfThen = BI->getSuccessor(0); 664 BasicBlock *IfElse = BI->getSuccessor(1); 665 assert((IfThen == R->getExit() || IfElse == R->getExit()) && 666 IfThen != IfElse && 667 "Invariant from findScopes"); 668 if (IfThen == R->getExit()) { 669 // Swap them so that IfThen/ThenProb means going into the conditional code 670 // and IfElse/ElseProb means skipping it. 671 std::swap(IfThen, IfElse); 672 std::swap(ThenProb, ElseProb); 673 } 674 CHR_DEBUG(dbgs() << "BI " << *BI << " "); 675 CHR_DEBUG(dbgs() << "ThenProb " << ThenProb << " "); 676 CHR_DEBUG(dbgs() << "ElseProb " << ElseProb << "\n"); 677 return CheckBias(R, ThenProb, ElseProb, 678 TrueBiasedRegionsGlobal, FalseBiasedRegionsGlobal, 679 BranchBiasMap); 680 } 681 682 // Returns true and insert a select into the right biased set and the map if the 683 // select is biased. 684 static bool CheckBiasedSelect( 685 SelectInst *SI, Region *R, 686 DenseSet<SelectInst *> &TrueBiasedSelectsGlobal, 687 DenseSet<SelectInst *> &FalseBiasedSelectsGlobal, 688 DenseMap<SelectInst *, BranchProbability> &SelectBiasMap) { 689 BranchProbability TrueProb, FalseProb; 690 if (!CheckMDProf(SI->getMetadata(LLVMContext::MD_prof), 691 TrueProb, FalseProb)) 692 return false; 693 CHR_DEBUG(dbgs() << "SI " << *SI << " "); 694 CHR_DEBUG(dbgs() << "TrueProb " << TrueProb << " "); 695 CHR_DEBUG(dbgs() << "FalseProb " << FalseProb << "\n"); 696 return CheckBias(SI, TrueProb, FalseProb, 697 TrueBiasedSelectsGlobal, FalseBiasedSelectsGlobal, 698 SelectBiasMap); 699 } 700 701 // Returns the instruction at which to hoist the dependent condition values and 702 // insert the CHR branch for a region. This is the terminator branch in the 703 // entry block or the first select in the entry block, if any. 704 static Instruction* getBranchInsertPoint(RegInfo &RI) { 705 Region *R = RI.R; 706 BasicBlock *EntryBB = R->getEntry(); 707 // The hoist point is by default the terminator of the entry block, which is 708 // the same as the branch instruction if RI.HasBranch is true. 709 Instruction *HoistPoint = EntryBB->getTerminator(); 710 for (SelectInst *SI : RI.Selects) { 711 if (SI->getParent() == EntryBB) { 712 // Pick the first select in Selects in the entry block. Note Selects is 713 // sorted in the instruction order within a block (asserted below). 714 HoistPoint = SI; 715 break; 716 } 717 } 718 assert(HoistPoint && "Null HoistPoint"); 719 #ifndef NDEBUG 720 // Check that HoistPoint is the first one in Selects in the entry block, 721 // if any. 722 DenseSet<Instruction *> EntryBlockSelectSet; 723 for (SelectInst *SI : RI.Selects) { 724 if (SI->getParent() == EntryBB) { 725 EntryBlockSelectSet.insert(SI); 726 } 727 } 728 for (Instruction &I : *EntryBB) { 729 if (EntryBlockSelectSet.count(&I) > 0) { 730 assert(&I == HoistPoint && 731 "HoistPoint must be the first one in Selects"); 732 break; 733 } 734 } 735 #endif 736 return HoistPoint; 737 } 738 739 // Find a CHR scope in the given region. 740 CHRScope * CHR::findScope(Region *R) { 741 CHRScope *Result = nullptr; 742 BasicBlock *Entry = R->getEntry(); 743 BasicBlock *Exit = R->getExit(); // null if top level. 744 assert(Entry && "Entry must not be null"); 745 assert((Exit == nullptr) == (R->isTopLevelRegion()) && 746 "Only top level region has a null exit"); 747 if (Entry) 748 CHR_DEBUG(dbgs() << "Entry " << Entry->getName() << "\n"); 749 else 750 CHR_DEBUG(dbgs() << "Entry null\n"); 751 if (Exit) 752 CHR_DEBUG(dbgs() << "Exit " << Exit->getName() << "\n"); 753 else 754 CHR_DEBUG(dbgs() << "Exit null\n"); 755 // Exclude cases where Entry is part of a subregion (hence it doesn't belong 756 // to this region). 757 bool EntryInSubregion = RI.getRegionFor(Entry) != R; 758 if (EntryInSubregion) 759 return nullptr; 760 // Exclude loops 761 for (BasicBlock *Pred : predecessors(Entry)) 762 if (R->contains(Pred)) 763 return nullptr; 764 if (Exit) { 765 // Try to find an if-then block (check if R is an if-then). 766 // if (cond) { 767 // ... 768 // } 769 auto *BI = dyn_cast<BranchInst>(Entry->getTerminator()); 770 if (BI) 771 CHR_DEBUG(dbgs() << "BI.isConditional " << BI->isConditional() << "\n"); 772 else 773 CHR_DEBUG(dbgs() << "BI null\n"); 774 if (BI && BI->isConditional()) { 775 BasicBlock *S0 = BI->getSuccessor(0); 776 BasicBlock *S1 = BI->getSuccessor(1); 777 CHR_DEBUG(dbgs() << "S0 " << S0->getName() << "\n"); 778 CHR_DEBUG(dbgs() << "S1 " << S1->getName() << "\n"); 779 if (S0 != S1 && (S0 == Exit || S1 == Exit)) { 780 RegInfo RI(R); 781 RI.HasBranch = CheckBiasedBranch( 782 BI, R, TrueBiasedRegionsGlobal, FalseBiasedRegionsGlobal, 783 BranchBiasMap); 784 Result = new CHRScope(RI); 785 Scopes.insert(Result); 786 CHR_DEBUG(dbgs() << "Found a region with a branch\n"); 787 ++Stats.NumBranches; 788 } 789 } 790 } 791 { 792 // Try to look for selects in the direct child blocks (as opposed to in 793 // subregions) of R. 794 // ... 795 // if (..) { // Some subregion 796 // ... 797 // } 798 // if (..) { // Some subregion 799 // ... 800 // } 801 // ... 802 // a = cond ? b : c; 803 // ... 804 SmallVector<SelectInst *, 8> Selects; 805 for (RegionNode *E : R->elements()) { 806 if (E->isSubRegion()) 807 continue; 808 // This returns the basic block of E if E is a direct child of R (not a 809 // subregion.) 810 BasicBlock *BB = E->getEntry(); 811 // Need to push in the order to make it easier to find the first Select 812 // later. 813 for (Instruction &I : *BB) { 814 if (auto *SI = dyn_cast<SelectInst>(&I)) { 815 Selects.push_back(SI); 816 ++Stats.NumBranches; 817 } 818 } 819 } 820 if (Selects.size() > 0) { 821 auto AddSelects = [&](RegInfo &RI) { 822 for (auto *SI : Selects) 823 if (CheckBiasedSelect(SI, RI.R, 824 TrueBiasedSelectsGlobal, 825 FalseBiasedSelectsGlobal, 826 SelectBiasMap)) 827 RI.Selects.push_back(SI); 828 }; 829 if (!Result) { 830 CHR_DEBUG(dbgs() << "Found a select-only region\n"); 831 RegInfo RI(R); 832 AddSelects(RI); 833 Result = new CHRScope(RI); 834 Scopes.insert(Result); 835 } else { 836 CHR_DEBUG(dbgs() << "Found select(s) in a region with a branch\n"); 837 AddSelects(Result->RegInfos[0]); 838 } 839 } 840 } 841 842 if (Result) { 843 checkScopeHoistable(Result); 844 } 845 return Result; 846 } 847 848 // Check that any of the branch and the selects in the region could be 849 // hoisted above the the CHR branch insert point (the most dominating of 850 // them, either the branch (at the end of the first block) or the first 851 // select in the first block). If the branch can't be hoisted, drop the 852 // selects in the first blocks. 853 // 854 // For example, for the following scope/region with selects, we want to insert 855 // the merged branch right before the first select in the first/entry block by 856 // hoisting c1, c2, c3, and c4. 857 // 858 // // Branch insert point here. 859 // a = c1 ? b : c; // Select 1 860 // d = c2 ? e : f; // Select 2 861 // if (c3) { // Branch 862 // ... 863 // c4 = foo() // A call. 864 // g = c4 ? h : i; // Select 3 865 // } 866 // 867 // But suppose we can't hoist c4 because it's dependent on the preceding 868 // call. Then, we drop Select 3. Furthermore, if we can't hoist c2, we also drop 869 // Select 2. If we can't hoist c3, we drop Selects 1 & 2. 870 void CHR::checkScopeHoistable(CHRScope *Scope) { 871 RegInfo &RI = Scope->RegInfos[0]; 872 Region *R = RI.R; 873 BasicBlock *EntryBB = R->getEntry(); 874 auto *Branch = RI.HasBranch ? 875 cast<BranchInst>(EntryBB->getTerminator()) : nullptr; 876 SmallVector<SelectInst *, 8> &Selects = RI.Selects; 877 if (RI.HasBranch || !Selects.empty()) { 878 Instruction *InsertPoint = getBranchInsertPoint(RI); 879 CHR_DEBUG(dbgs() << "InsertPoint " << *InsertPoint << "\n"); 880 // Avoid a data dependence from a select or a branch to a(nother) 881 // select. Note no instruction can't data-depend on a branch (a branch 882 // instruction doesn't produce a value). 883 DenseSet<Instruction *> Unhoistables; 884 // Initialize Unhoistables with the selects. 885 for (SelectInst *SI : Selects) { 886 Unhoistables.insert(SI); 887 } 888 // Remove Selects that can't be hoisted. 889 for (auto it = Selects.begin(); it != Selects.end(); ) { 890 SelectInst *SI = *it; 891 if (SI == InsertPoint) { 892 ++it; 893 continue; 894 } 895 bool IsHoistable = checkHoistValue(SI->getCondition(), InsertPoint, 896 DT, Unhoistables, nullptr); 897 if (!IsHoistable) { 898 CHR_DEBUG(dbgs() << "Dropping select " << *SI << "\n"); 899 it = Selects.erase(it); 900 // Since we are dropping the select here, we also drop it from 901 // Unhoistables. 902 Unhoistables.erase(SI); 903 } else 904 ++it; 905 } 906 // Update InsertPoint after potentially removing selects. 907 InsertPoint = getBranchInsertPoint(RI); 908 CHR_DEBUG(dbgs() << "InsertPoint " << *InsertPoint << "\n"); 909 if (RI.HasBranch && InsertPoint != Branch) { 910 bool IsHoistable = checkHoistValue(Branch->getCondition(), InsertPoint, 911 DT, Unhoistables, nullptr); 912 if (!IsHoistable) { 913 // If the branch isn't hoistable, drop the selects in the entry 914 // block, preferring the branch, which makes the branch the hoist 915 // point. 916 assert(InsertPoint != Branch && "Branch must not be the hoist point"); 917 CHR_DEBUG(dbgs() << "Dropping selects in entry block \n"); 918 CHR_DEBUG( 919 for (SelectInst *SI : Selects) { 920 dbgs() << "SI " << *SI << "\n"; 921 }); 922 Selects.erase(std::remove_if(Selects.begin(), Selects.end(), 923 [EntryBB](SelectInst *SI) { 924 return SI->getParent() == EntryBB; 925 }), Selects.end()); 926 Unhoistables.clear(); 927 InsertPoint = Branch; 928 } 929 } 930 CHR_DEBUG(dbgs() << "InsertPoint " << *InsertPoint << "\n"); 931 #ifndef NDEBUG 932 if (RI.HasBranch) { 933 assert(!DT.dominates(Branch, InsertPoint) && 934 "Branch can't be already above the hoist point"); 935 assert(checkHoistValue(Branch->getCondition(), InsertPoint, 936 DT, Unhoistables, nullptr) && 937 "checkHoistValue for branch"); 938 } 939 for (auto *SI : Selects) { 940 assert(!DT.dominates(SI, InsertPoint) && 941 "SI can't be already above the hoist point"); 942 assert(checkHoistValue(SI->getCondition(), InsertPoint, DT, 943 Unhoistables, nullptr) && 944 "checkHoistValue for selects"); 945 } 946 CHR_DEBUG(dbgs() << "Result\n"); 947 if (RI.HasBranch) { 948 CHR_DEBUG(dbgs() << "BI " << *Branch << "\n"); 949 } 950 for (auto *SI : Selects) { 951 CHR_DEBUG(dbgs() << "SI " << *SI << "\n"); 952 } 953 #endif 954 } 955 } 956 957 // Traverse the region tree, find all nested scopes and merge them if possible. 958 CHRScope * CHR::findScopes(Region *R, Region *NextRegion, Region *ParentRegion, 959 SmallVectorImpl<CHRScope *> &Scopes) { 960 CHR_DEBUG(dbgs() << "findScopes " << R->getNameStr() << "\n"); 961 CHRScope *Result = findScope(R); 962 // Visit subscopes. 963 CHRScope *ConsecutiveSubscope = nullptr; 964 SmallVector<CHRScope *, 8> Subscopes; 965 for (auto It = R->begin(); It != R->end(); ++It) { 966 const std::unique_ptr<Region> &SubR = *It; 967 auto Next_It = std::next(It); 968 Region *NextSubR = Next_It != R->end() ? Next_It->get() : nullptr; 969 CHR_DEBUG(dbgs() << "Looking at subregion " << SubR.get()->getNameStr() 970 << "\n"); 971 CHRScope *SubCHRScope = findScopes(SubR.get(), NextSubR, R, Scopes); 972 if (SubCHRScope) { 973 CHR_DEBUG(dbgs() << "Subregion Scope " << *SubCHRScope << "\n"); 974 } else { 975 CHR_DEBUG(dbgs() << "Subregion Scope null\n"); 976 } 977 if (SubCHRScope) { 978 if (!ConsecutiveSubscope) 979 ConsecutiveSubscope = SubCHRScope; 980 else if (!ConsecutiveSubscope->appendable(SubCHRScope)) { 981 Subscopes.push_back(ConsecutiveSubscope); 982 ConsecutiveSubscope = SubCHRScope; 983 } else 984 ConsecutiveSubscope->append(SubCHRScope); 985 } else { 986 if (ConsecutiveSubscope) { 987 Subscopes.push_back(ConsecutiveSubscope); 988 } 989 ConsecutiveSubscope = nullptr; 990 } 991 } 992 if (ConsecutiveSubscope) { 993 Subscopes.push_back(ConsecutiveSubscope); 994 } 995 for (CHRScope *Sub : Subscopes) { 996 if (Result) { 997 // Combine it with the parent. 998 Result->addSub(Sub); 999 } else { 1000 // Push Subscopes as they won't be combined with the parent. 1001 Scopes.push_back(Sub); 1002 } 1003 } 1004 return Result; 1005 } 1006 1007 static DenseSet<Value *> getCHRConditionValuesForRegion(RegInfo &RI) { 1008 DenseSet<Value *> ConditionValues; 1009 if (RI.HasBranch) { 1010 auto *BI = cast<BranchInst>(RI.R->getEntry()->getTerminator()); 1011 ConditionValues.insert(BI->getCondition()); 1012 } 1013 for (SelectInst *SI : RI.Selects) { 1014 ConditionValues.insert(SI->getCondition()); 1015 } 1016 return ConditionValues; 1017 } 1018 1019 1020 // Determine whether to split a scope depending on the sets of the branch 1021 // condition values of the previous region and the current region. We split 1022 // (return true) it if 1) the condition values of the inner/lower scope can't be 1023 // hoisted up to the outer/upper scope, or 2) the two sets of the condition 1024 // values have an empty intersection (because the combined branch conditions 1025 // won't probably lead to a simpler combined condition). 1026 static bool shouldSplit(Instruction *InsertPoint, 1027 DenseSet<Value *> &PrevConditionValues, 1028 DenseSet<Value *> &ConditionValues, 1029 DominatorTree &DT, 1030 DenseSet<Instruction *> &Unhoistables) { 1031 CHR_DEBUG( 1032 dbgs() << "shouldSplit " << *InsertPoint << " PrevConditionValues "; 1033 for (Value *V : PrevConditionValues) { 1034 dbgs() << *V << ", "; 1035 } 1036 dbgs() << " ConditionValues "; 1037 for (Value *V : ConditionValues) { 1038 dbgs() << *V << ", "; 1039 } 1040 dbgs() << "\n"); 1041 assert(InsertPoint && "Null InsertPoint"); 1042 // If any of Bases isn't hoistable to the hoist point, split. 1043 for (Value *V : ConditionValues) { 1044 if (!checkHoistValue(V, InsertPoint, DT, Unhoistables, nullptr)) { 1045 CHR_DEBUG(dbgs() << "Split. checkHoistValue false " << *V << "\n"); 1046 return true; // Not hoistable, split. 1047 } 1048 } 1049 // If PrevConditionValues or ConditionValues is empty, don't split to avoid 1050 // unnecessary splits at scopes with no branch/selects. If 1051 // PrevConditionValues and ConditionValues don't intersect at all, split. 1052 if (!PrevConditionValues.empty() && !ConditionValues.empty()) { 1053 // Use std::set as DenseSet doesn't work with set_intersection. 1054 std::set<Value *> PrevBases, Bases; 1055 for (Value *V : PrevConditionValues) { 1056 std::set<Value *> BaseValues = getBaseValues(V, DT); 1057 PrevBases.insert(BaseValues.begin(), BaseValues.end()); 1058 } 1059 for (Value *V : ConditionValues) { 1060 std::set<Value *> BaseValues = getBaseValues(V, DT); 1061 Bases.insert(BaseValues.begin(), BaseValues.end()); 1062 } 1063 CHR_DEBUG( 1064 dbgs() << "PrevBases "; 1065 for (Value *V : PrevBases) { 1066 dbgs() << *V << ", "; 1067 } 1068 dbgs() << " Bases "; 1069 for (Value *V : Bases) { 1070 dbgs() << *V << ", "; 1071 } 1072 dbgs() << "\n"); 1073 std::set<Value *> Intersection; 1074 std::set_intersection(PrevBases.begin(), PrevBases.end(), 1075 Bases.begin(), Bases.end(), 1076 std::inserter(Intersection, Intersection.begin())); 1077 if (Intersection.empty()) { 1078 // Empty intersection, split. 1079 CHR_DEBUG(dbgs() << "Split. Intersection empty\n"); 1080 return true; 1081 } 1082 } 1083 CHR_DEBUG(dbgs() << "No split\n"); 1084 return false; // Don't split. 1085 } 1086 1087 static void GetSelectsInScope(CHRScope *Scope, 1088 DenseSet<Instruction *> &Output) { 1089 for (RegInfo &RI : Scope->RegInfos) { 1090 for (SelectInst *SI : RI.Selects) { 1091 Output.insert(SI); 1092 } 1093 } 1094 for (CHRScope *Sub : Scope->Subs) { 1095 GetSelectsInScope(Sub, Output); 1096 } 1097 } 1098 1099 void CHR::splitScopes(SmallVectorImpl<CHRScope *> &Input, 1100 SmallVectorImpl<CHRScope *> &Output) { 1101 for (CHRScope *Scope : Input) { 1102 assert(!Scope->BranchInsertPoint && 1103 "BranchInsertPoint must not be set"); 1104 DenseSet<Instruction *> Unhoistables; 1105 GetSelectsInScope(Scope, Unhoistables); 1106 splitScope(Scope, nullptr, nullptr, nullptr, Output, Unhoistables); 1107 } 1108 #ifndef NDEBUG 1109 for (CHRScope *Scope : Output) { 1110 assert(Scope->BranchInsertPoint && "BranchInsertPoint must be set"); 1111 } 1112 #endif 1113 } 1114 1115 SmallVector<CHRScope *, 8> CHR::splitScope( 1116 CHRScope *Scope, 1117 CHRScope *Outer, 1118 DenseSet<Value *> *OuterConditionValues, 1119 Instruction *OuterInsertPoint, 1120 SmallVectorImpl<CHRScope *> &Output, 1121 DenseSet<Instruction *> &Unhoistables) { 1122 if (Outer) { 1123 assert(OuterConditionValues && "Null OuterConditionValues"); 1124 assert(OuterInsertPoint && "Null OuterInsertPoint"); 1125 } 1126 bool PrevSplitFromOuter = true; 1127 DenseSet<Value *> PrevConditionValues; 1128 Instruction *PrevInsertPoint = nullptr; 1129 SmallVector<CHRScope *, 8> Splits; 1130 SmallVector<bool, 8> SplitsSplitFromOuter; 1131 SmallVector<DenseSet<Value *>, 8> SplitsConditionValues; 1132 SmallVector<Instruction *, 8> SplitsInsertPoints; 1133 SmallVector<RegInfo, 8> RegInfos(Scope->RegInfos); // Copy 1134 for (RegInfo &RI : RegInfos) { 1135 Instruction *InsertPoint = getBranchInsertPoint(RI); 1136 DenseSet<Value *> ConditionValues = getCHRConditionValuesForRegion(RI); 1137 CHR_DEBUG( 1138 dbgs() << "ConditionValues "; 1139 for (Value *V : ConditionValues) { 1140 dbgs() << *V << ", "; 1141 } 1142 dbgs() << "\n"); 1143 if (RI.R == RegInfos[0].R) { 1144 // First iteration. Check to see if we should split from the outer. 1145 if (Outer) { 1146 CHR_DEBUG(dbgs() << "Outer " << *Outer << "\n"); 1147 CHR_DEBUG(dbgs() << "Should split from outer at " 1148 << RI.R->getNameStr() << "\n"); 1149 if (shouldSplit(OuterInsertPoint, *OuterConditionValues, 1150 ConditionValues, DT, Unhoistables)) { 1151 PrevConditionValues = ConditionValues; 1152 PrevInsertPoint = InsertPoint; 1153 } else { 1154 // Not splitting from the outer. Use the outer bases and insert 1155 // point. Union the bases. 1156 PrevSplitFromOuter = false; 1157 PrevConditionValues = *OuterConditionValues; 1158 PrevConditionValues.insert(ConditionValues.begin(), 1159 ConditionValues.end()); 1160 PrevInsertPoint = OuterInsertPoint; 1161 } 1162 } else { 1163 CHR_DEBUG(dbgs() << "Outer null\n"); 1164 PrevConditionValues = ConditionValues; 1165 PrevInsertPoint = InsertPoint; 1166 } 1167 } else { 1168 CHR_DEBUG(dbgs() << "Should split from prev at " 1169 << RI.R->getNameStr() << "\n"); 1170 if (shouldSplit(PrevInsertPoint, PrevConditionValues, ConditionValues, 1171 DT, Unhoistables)) { 1172 CHRScope *Tail = Scope->split(RI.R); 1173 Scopes.insert(Tail); 1174 Splits.push_back(Scope); 1175 SplitsSplitFromOuter.push_back(PrevSplitFromOuter); 1176 SplitsConditionValues.push_back(PrevConditionValues); 1177 SplitsInsertPoints.push_back(PrevInsertPoint); 1178 Scope = Tail; 1179 PrevConditionValues = ConditionValues; 1180 PrevInsertPoint = InsertPoint; 1181 PrevSplitFromOuter = true; 1182 } else { 1183 // Not splitting. Union the bases. Keep the hoist point. 1184 PrevConditionValues.insert(ConditionValues.begin(), ConditionValues.end()); 1185 } 1186 } 1187 } 1188 Splits.push_back(Scope); 1189 SplitsSplitFromOuter.push_back(PrevSplitFromOuter); 1190 SplitsConditionValues.push_back(PrevConditionValues); 1191 assert(PrevInsertPoint && "Null PrevInsertPoint"); 1192 SplitsInsertPoints.push_back(PrevInsertPoint); 1193 assert(Splits.size() == SplitsConditionValues.size() && 1194 Splits.size() == SplitsSplitFromOuter.size() && 1195 Splits.size() == SplitsInsertPoints.size() && "Mismatching sizes"); 1196 for (size_t I = 0; I < Splits.size(); ++I) { 1197 CHRScope *Split = Splits[I]; 1198 DenseSet<Value *> &SplitConditionValues = SplitsConditionValues[I]; 1199 Instruction *SplitInsertPoint = SplitsInsertPoints[I]; 1200 SmallVector<CHRScope *, 8> NewSubs; 1201 DenseSet<Instruction *> SplitUnhoistables; 1202 GetSelectsInScope(Split, SplitUnhoistables); 1203 for (CHRScope *Sub : Split->Subs) { 1204 SmallVector<CHRScope *, 8> SubSplits = splitScope( 1205 Sub, Split, &SplitConditionValues, SplitInsertPoint, Output, 1206 SplitUnhoistables); 1207 NewSubs.insert(NewSubs.end(), SubSplits.begin(), SubSplits.end()); 1208 } 1209 Split->Subs = NewSubs; 1210 } 1211 SmallVector<CHRScope *, 8> Result; 1212 for (size_t I = 0; I < Splits.size(); ++I) { 1213 CHRScope *Split = Splits[I]; 1214 if (SplitsSplitFromOuter[I]) { 1215 // Split from the outer. 1216 Output.push_back(Split); 1217 Split->BranchInsertPoint = SplitsInsertPoints[I]; 1218 CHR_DEBUG(dbgs() << "BranchInsertPoint " << *SplitsInsertPoints[I] 1219 << "\n"); 1220 } else { 1221 // Connected to the outer. 1222 Result.push_back(Split); 1223 } 1224 } 1225 if (!Outer) 1226 assert(Result.empty() && 1227 "If no outer (top-level), must return no nested ones"); 1228 return Result; 1229 } 1230 1231 void CHR::classifyBiasedScopes(SmallVectorImpl<CHRScope *> &Scopes) { 1232 for (CHRScope *Scope : Scopes) { 1233 assert(Scope->TrueBiasedRegions.empty() && Scope->FalseBiasedRegions.empty() && "Empty"); 1234 classifyBiasedScopes(Scope, Scope); 1235 CHR_DEBUG( 1236 dbgs() << "classifyBiasedScopes " << *Scope << "\n"; 1237 dbgs() << "TrueBiasedRegions "; 1238 for (Region *R : Scope->TrueBiasedRegions) { 1239 dbgs() << R->getNameStr() << ", "; 1240 } 1241 dbgs() << "\n"; 1242 dbgs() << "FalseBiasedRegions "; 1243 for (Region *R : Scope->FalseBiasedRegions) { 1244 dbgs() << R->getNameStr() << ", "; 1245 } 1246 dbgs() << "\n"; 1247 dbgs() << "TrueBiasedSelects "; 1248 for (SelectInst *SI : Scope->TrueBiasedSelects) { 1249 dbgs() << *SI << ", "; 1250 } 1251 dbgs() << "\n"; 1252 dbgs() << "FalseBiasedSelects "; 1253 for (SelectInst *SI : Scope->FalseBiasedSelects) { 1254 dbgs() << *SI << ", "; 1255 } 1256 dbgs() << "\n";); 1257 } 1258 } 1259 1260 void CHR::classifyBiasedScopes(CHRScope *Scope, CHRScope *OutermostScope) { 1261 for (RegInfo &RI : Scope->RegInfos) { 1262 if (RI.HasBranch) { 1263 Region *R = RI.R; 1264 if (TrueBiasedRegionsGlobal.count(R) > 0) 1265 OutermostScope->TrueBiasedRegions.insert(R); 1266 else if (FalseBiasedRegionsGlobal.count(R) > 0) 1267 OutermostScope->FalseBiasedRegions.insert(R); 1268 else 1269 llvm_unreachable("Must be biased"); 1270 } 1271 for (SelectInst *SI : RI.Selects) { 1272 if (TrueBiasedSelectsGlobal.count(SI) > 0) 1273 OutermostScope->TrueBiasedSelects.insert(SI); 1274 else if (FalseBiasedSelectsGlobal.count(SI) > 0) 1275 OutermostScope->FalseBiasedSelects.insert(SI); 1276 else 1277 llvm_unreachable("Must be biased"); 1278 } 1279 } 1280 for (CHRScope *Sub : Scope->Subs) { 1281 classifyBiasedScopes(Sub, OutermostScope); 1282 } 1283 } 1284 1285 static bool hasAtLeastTwoBiasedBranches(CHRScope *Scope) { 1286 unsigned NumBiased = Scope->TrueBiasedRegions.size() + 1287 Scope->FalseBiasedRegions.size() + 1288 Scope->TrueBiasedSelects.size() + 1289 Scope->FalseBiasedSelects.size(); 1290 return NumBiased >= CHRMergeThreshold; 1291 } 1292 1293 void CHR::filterScopes(SmallVectorImpl<CHRScope *> &Input, 1294 SmallVectorImpl<CHRScope *> &Output) { 1295 for (CHRScope *Scope : Input) { 1296 // Filter out the ones with only one region and no subs. 1297 if (!hasAtLeastTwoBiasedBranches(Scope)) { 1298 CHR_DEBUG(dbgs() << "Filtered out by biased branches truthy-regions " 1299 << Scope->TrueBiasedRegions.size() 1300 << " falsy-regions " << Scope->FalseBiasedRegions.size() 1301 << " true-selects " << Scope->TrueBiasedSelects.size() 1302 << " false-selects " << Scope->FalseBiasedSelects.size() << "\n"); 1303 continue; 1304 } 1305 Output.push_back(Scope); 1306 } 1307 } 1308 1309 void CHR::setCHRRegions(SmallVectorImpl<CHRScope *> &Input, 1310 SmallVectorImpl<CHRScope *> &Output) { 1311 for (CHRScope *Scope : Input) { 1312 assert(Scope->HoistStopMap.empty() && Scope->CHRRegions.empty() && 1313 "Empty"); 1314 setCHRRegions(Scope, Scope); 1315 Output.push_back(Scope); 1316 CHR_DEBUG( 1317 dbgs() << "setCHRRegions HoistStopMap " << *Scope << "\n"; 1318 for (auto pair : Scope->HoistStopMap) { 1319 Region *R = pair.first; 1320 dbgs() << "Region " << R->getNameStr() << "\n"; 1321 for (Instruction *I : pair.second) { 1322 dbgs() << "HoistStop " << *I << "\n"; 1323 } 1324 } 1325 dbgs() << "CHRRegions" << "\n"; 1326 for (RegInfo &RI : Scope->CHRRegions) { 1327 dbgs() << RI.R->getNameStr() << "\n"; 1328 }); 1329 } 1330 } 1331 1332 void CHR::setCHRRegions(CHRScope *Scope, CHRScope *OutermostScope) { 1333 DenseSet<Instruction *> Unhoistables; 1334 // Put the biased selects in Unhoistables because they should stay where they 1335 // are and constant-folded after CHR (in case one biased select or a branch 1336 // can depend on another biased select.) 1337 for (RegInfo &RI : Scope->RegInfos) { 1338 for (SelectInst *SI : RI.Selects) { 1339 Unhoistables.insert(SI); 1340 } 1341 } 1342 Instruction *InsertPoint = OutermostScope->BranchInsertPoint; 1343 for (RegInfo &RI : Scope->RegInfos) { 1344 Region *R = RI.R; 1345 DenseSet<Instruction *> HoistStops; 1346 bool IsHoisted = false; 1347 if (RI.HasBranch) { 1348 assert((OutermostScope->TrueBiasedRegions.count(R) > 0 || 1349 OutermostScope->FalseBiasedRegions.count(R) > 0) && 1350 "Must be truthy or falsy"); 1351 auto *BI = cast<BranchInst>(R->getEntry()->getTerminator()); 1352 // Note checkHoistValue fills in HoistStops. 1353 bool IsHoistable = checkHoistValue(BI->getCondition(), InsertPoint, DT, 1354 Unhoistables, &HoistStops); 1355 assert(IsHoistable && "Must be hoistable"); 1356 (void)(IsHoistable); // Unused in release build 1357 IsHoisted = true; 1358 } 1359 for (SelectInst *SI : RI.Selects) { 1360 assert((OutermostScope->TrueBiasedSelects.count(SI) > 0 || 1361 OutermostScope->FalseBiasedSelects.count(SI) > 0) && 1362 "Must be true or false biased"); 1363 // Note checkHoistValue fills in HoistStops. 1364 bool IsHoistable = checkHoistValue(SI->getCondition(), InsertPoint, DT, 1365 Unhoistables, &HoistStops); 1366 assert(IsHoistable && "Must be hoistable"); 1367 (void)(IsHoistable); // Unused in release build 1368 IsHoisted = true; 1369 } 1370 if (IsHoisted) { 1371 OutermostScope->CHRRegions.push_back(RI); 1372 OutermostScope->HoistStopMap[R] = HoistStops; 1373 } 1374 } 1375 for (CHRScope *Sub : Scope->Subs) 1376 setCHRRegions(Sub, OutermostScope); 1377 } 1378 1379 bool CHRScopeSorter(CHRScope *Scope1, CHRScope *Scope2) { 1380 return Scope1->RegInfos[0].R->getDepth() < Scope2->RegInfos[0].R->getDepth(); 1381 } 1382 1383 void CHR::sortScopes(SmallVectorImpl<CHRScope *> &Input, 1384 SmallVectorImpl<CHRScope *> &Output) { 1385 Output.resize(Input.size()); 1386 std::copy(Input.begin(), Input.end(), Output.begin()); 1387 std::stable_sort(Output.begin(), Output.end(), CHRScopeSorter); 1388 } 1389 1390 // Return true if V is already hoisted or was hoisted (along with its operands) 1391 // to the insert point. 1392 static void hoistValue(Value *V, Instruction *HoistPoint, Region *R, 1393 HoistStopMapTy &HoistStopMap, 1394 DenseSet<Instruction *> &HoistedSet, 1395 DenseSet<PHINode *> &TrivialPHIs) { 1396 auto IT = HoistStopMap.find(R); 1397 assert(IT != HoistStopMap.end() && "Region must be in hoist stop map"); 1398 DenseSet<Instruction *> &HoistStops = IT->second; 1399 if (auto *I = dyn_cast<Instruction>(V)) { 1400 if (I == HoistPoint) 1401 return; 1402 if (HoistStops.count(I)) 1403 return; 1404 if (auto *PN = dyn_cast<PHINode>(I)) 1405 if (TrivialPHIs.count(PN)) 1406 // The trivial phi inserted by the previous CHR scope could replace a 1407 // non-phi in HoistStops. Note that since this phi is at the exit of a 1408 // previous CHR scope, which dominates this scope, it's safe to stop 1409 // hoisting there. 1410 return; 1411 if (HoistedSet.count(I)) 1412 // Already hoisted, return. 1413 return; 1414 assert(isHoistableInstructionType(I) && "Unhoistable instruction type"); 1415 for (Value *Op : I->operands()) { 1416 hoistValue(Op, HoistPoint, R, HoistStopMap, HoistedSet, TrivialPHIs); 1417 } 1418 I->moveBefore(HoistPoint); 1419 HoistedSet.insert(I); 1420 CHR_DEBUG(dbgs() << "hoistValue " << *I << "\n"); 1421 } 1422 } 1423 1424 // Hoist the dependent condition values of the branches and the selects in the 1425 // scope to the insert point. 1426 static void hoistScopeConditions(CHRScope *Scope, Instruction *HoistPoint, 1427 DenseSet<PHINode *> &TrivialPHIs) { 1428 DenseSet<Instruction *> HoistedSet; 1429 for (const RegInfo &RI : Scope->CHRRegions) { 1430 Region *R = RI.R; 1431 bool IsTrueBiased = Scope->TrueBiasedRegions.count(R); 1432 bool IsFalseBiased = Scope->FalseBiasedRegions.count(R); 1433 if (RI.HasBranch && (IsTrueBiased || IsFalseBiased)) { 1434 auto *BI = cast<BranchInst>(R->getEntry()->getTerminator()); 1435 hoistValue(BI->getCondition(), HoistPoint, R, Scope->HoistStopMap, 1436 HoistedSet, TrivialPHIs); 1437 } 1438 for (SelectInst *SI : RI.Selects) { 1439 bool IsTrueBiased = Scope->TrueBiasedSelects.count(SI); 1440 bool IsFalseBiased = Scope->FalseBiasedSelects.count(SI); 1441 if (!(IsTrueBiased || IsFalseBiased)) 1442 continue; 1443 hoistValue(SI->getCondition(), HoistPoint, R, Scope->HoistStopMap, 1444 HoistedSet, TrivialPHIs); 1445 } 1446 } 1447 } 1448 1449 // Negate the predicate if an ICmp if it's used only by branches or selects by 1450 // swapping the operands of the branches or the selects. Returns true if success. 1451 static bool NegateICmpIfUsedByBranchOrSelectOnly(ICmpInst *ICmp, 1452 Instruction *ExcludedUser, 1453 CHRScope *Scope) { 1454 for (User *U : ICmp->users()) { 1455 if (U == ExcludedUser) 1456 continue; 1457 if (isa<BranchInst>(U) && cast<BranchInst>(U)->isConditional()) 1458 continue; 1459 if (isa<SelectInst>(U) && cast<SelectInst>(U)->getCondition() == ICmp) 1460 continue; 1461 return false; 1462 } 1463 for (User *U : ICmp->users()) { 1464 if (U == ExcludedUser) 1465 continue; 1466 if (auto *BI = dyn_cast<BranchInst>(U)) { 1467 assert(BI->isConditional() && "Must be conditional"); 1468 BI->swapSuccessors(); 1469 // Don't need to swap this in terms of 1470 // TrueBiasedRegions/FalseBiasedRegions because true-based/false-based 1471 // mean whehter the branch is likely go into the if-then rather than 1472 // successor0/successor1 and because we can tell which edge is the then or 1473 // the else one by comparing the destination to the region exit block. 1474 continue; 1475 } 1476 if (auto *SI = dyn_cast<SelectInst>(U)) { 1477 // Swap operands 1478 Value *TrueValue = SI->getTrueValue(); 1479 Value *FalseValue = SI->getFalseValue(); 1480 SI->setTrueValue(FalseValue); 1481 SI->setFalseValue(TrueValue); 1482 SI->swapProfMetadata(); 1483 if (Scope->TrueBiasedSelects.count(SI)) { 1484 assert(Scope->FalseBiasedSelects.count(SI) == 0 && 1485 "Must not be already in"); 1486 Scope->FalseBiasedSelects.insert(SI); 1487 } else if (Scope->FalseBiasedSelects.count(SI)) { 1488 assert(Scope->TrueBiasedSelects.count(SI) == 0 && 1489 "Must not be already in"); 1490 Scope->TrueBiasedSelects.insert(SI); 1491 } 1492 continue; 1493 } 1494 llvm_unreachable("Must be a branch or a select"); 1495 } 1496 ICmp->setPredicate(CmpInst::getInversePredicate(ICmp->getPredicate())); 1497 return true; 1498 } 1499 1500 // A helper for transformScopes. Insert a trivial phi at the scope exit block 1501 // for a value that's defined in the scope but used outside it (meaning it's 1502 // alive at the exit block). 1503 static void insertTrivialPHIs(CHRScope *Scope, 1504 BasicBlock *EntryBlock, BasicBlock *ExitBlock, 1505 DenseSet<PHINode *> &TrivialPHIs) { 1506 DenseSet<BasicBlock *> BlocksInScopeSet; 1507 SmallVector<BasicBlock *, 8> BlocksInScopeVec; 1508 for (RegInfo &RI : Scope->RegInfos) { 1509 for (BasicBlock *BB : RI.R->blocks()) { // This includes the blocks in the 1510 // sub-Scopes. 1511 BlocksInScopeSet.insert(BB); 1512 BlocksInScopeVec.push_back(BB); 1513 } 1514 } 1515 CHR_DEBUG( 1516 dbgs() << "Inserting redudant phis\n"; 1517 for (BasicBlock *BB : BlocksInScopeVec) { 1518 dbgs() << "BlockInScope " << BB->getName() << "\n"; 1519 }); 1520 for (BasicBlock *BB : BlocksInScopeVec) { 1521 for (Instruction &I : *BB) { 1522 SmallVector<Instruction *, 8> Users; 1523 for (User *U : I.users()) { 1524 if (auto *UI = dyn_cast<Instruction>(U)) { 1525 if (BlocksInScopeSet.count(UI->getParent()) == 0 && 1526 // Unless there's already a phi for I at the exit block. 1527 !(isa<PHINode>(UI) && UI->getParent() == ExitBlock)) { 1528 CHR_DEBUG(dbgs() << "V " << I << "\n"); 1529 CHR_DEBUG(dbgs() << "Used outside scope by user " << *UI << "\n"); 1530 Users.push_back(UI); 1531 } else if (UI->getParent() == EntryBlock && isa<PHINode>(UI)) { 1532 // There's a loop backedge from a block that's dominated by this 1533 // scope to the entry block. 1534 CHR_DEBUG(dbgs() << "V " << I << "\n"); 1535 CHR_DEBUG(dbgs() 1536 << "Used at entry block (for a back edge) by a phi user " 1537 << *UI << "\n"); 1538 Users.push_back(UI); 1539 } 1540 } 1541 } 1542 if (Users.size() > 0) { 1543 // Insert a trivial phi for I (phi [&I, P0], [&I, P1], ...) at 1544 // ExitBlock. Replace I with the new phi in UI unless UI is another 1545 // phi at ExitBlock. 1546 unsigned PredCount = std::distance(pred_begin(ExitBlock), 1547 pred_end(ExitBlock)); 1548 PHINode *PN = PHINode::Create(I.getType(), PredCount, "", 1549 &ExitBlock->front()); 1550 for (BasicBlock *Pred : predecessors(ExitBlock)) { 1551 PN->addIncoming(&I, Pred); 1552 } 1553 TrivialPHIs.insert(PN); 1554 CHR_DEBUG(dbgs() << "Insert phi " << *PN << "\n"); 1555 for (Instruction *UI : Users) { 1556 for (unsigned J = 0, NumOps = UI->getNumOperands(); J < NumOps; ++J) { 1557 if (UI->getOperand(J) == &I) { 1558 UI->setOperand(J, PN); 1559 } 1560 } 1561 CHR_DEBUG(dbgs() << "Updated user " << *UI << "\n"); 1562 } 1563 } 1564 } 1565 } 1566 } 1567 1568 // Assert that all the CHR regions of the scope have a biased branch or select. 1569 static void assertCHRRegionsHaveBiasedBranchOrSelect(CHRScope *Scope) { 1570 #ifndef NDEBUG 1571 auto HasBiasedBranchOrSelect = [](RegInfo &RI, CHRScope *Scope) { 1572 if (Scope->TrueBiasedRegions.count(RI.R) || 1573 Scope->FalseBiasedRegions.count(RI.R)) 1574 return true; 1575 for (SelectInst *SI : RI.Selects) 1576 if (Scope->TrueBiasedSelects.count(SI) || 1577 Scope->FalseBiasedSelects.count(SI)) 1578 return true; 1579 return false; 1580 }; 1581 for (RegInfo &RI : Scope->CHRRegions) { 1582 assert(HasBiasedBranchOrSelect(RI, Scope) && 1583 "Must have biased branch or select"); 1584 } 1585 #endif 1586 } 1587 1588 // Assert that all the condition values of the biased branches and selects have 1589 // been hoisted to the pre-entry block or outside of the scope. 1590 static void assertBranchOrSelectConditionHoisted(CHRScope *Scope, 1591 BasicBlock *PreEntryBlock) { 1592 CHR_DEBUG(dbgs() << "Biased regions condition values \n"); 1593 for (RegInfo &RI : Scope->CHRRegions) { 1594 Region *R = RI.R; 1595 bool IsTrueBiased = Scope->TrueBiasedRegions.count(R); 1596 bool IsFalseBiased = Scope->FalseBiasedRegions.count(R); 1597 if (RI.HasBranch && (IsTrueBiased || IsFalseBiased)) { 1598 auto *BI = cast<BranchInst>(R->getEntry()->getTerminator()); 1599 Value *V = BI->getCondition(); 1600 CHR_DEBUG(dbgs() << *V << "\n"); 1601 if (auto *I = dyn_cast<Instruction>(V)) { 1602 (void)(I); // Unused in release build. 1603 assert((I->getParent() == PreEntryBlock || 1604 !Scope->contains(I)) && 1605 "Must have been hoisted to PreEntryBlock or outside the scope"); 1606 } 1607 } 1608 for (SelectInst *SI : RI.Selects) { 1609 bool IsTrueBiased = Scope->TrueBiasedSelects.count(SI); 1610 bool IsFalseBiased = Scope->FalseBiasedSelects.count(SI); 1611 if (!(IsTrueBiased || IsFalseBiased)) 1612 continue; 1613 Value *V = SI->getCondition(); 1614 CHR_DEBUG(dbgs() << *V << "\n"); 1615 if (auto *I = dyn_cast<Instruction>(V)) { 1616 (void)(I); // Unused in release build. 1617 assert((I->getParent() == PreEntryBlock || 1618 !Scope->contains(I)) && 1619 "Must have been hoisted to PreEntryBlock or outside the scope"); 1620 } 1621 } 1622 } 1623 } 1624 1625 void CHR::transformScopes(CHRScope *Scope, DenseSet<PHINode *> &TrivialPHIs) { 1626 CHR_DEBUG(dbgs() << "transformScopes " << *Scope << "\n"); 1627 1628 assert(Scope->RegInfos.size() >= 1 && "Should have at least one Region"); 1629 Region *FirstRegion = Scope->RegInfos[0].R; 1630 BasicBlock *EntryBlock = FirstRegion->getEntry(); 1631 Region *LastRegion = Scope->RegInfos[Scope->RegInfos.size() - 1].R; 1632 BasicBlock *ExitBlock = LastRegion->getExit(); 1633 Optional<uint64_t> ProfileCount = BFI.getBlockProfileCount(EntryBlock); 1634 1635 if (ExitBlock) { 1636 // Insert a trivial phi at the exit block (where the CHR hot path and the 1637 // cold path merges) for a value that's defined in the scope but used 1638 // outside it (meaning it's alive at the exit block). We will add the 1639 // incoming values for the CHR cold paths to it below. Without this, we'd 1640 // miss updating phi's for such values unless there happens to already be a 1641 // phi for that value there. 1642 insertTrivialPHIs(Scope, EntryBlock, ExitBlock, TrivialPHIs); 1643 } 1644 1645 // Split the entry block of the first region. The new block becomes the new 1646 // entry block of the first region. The old entry block becomes the block to 1647 // insert the CHR branch into. Note DT gets updated. Since DT gets updated 1648 // through the split, we update the entry of the first region after the split, 1649 // and Region only points to the entry and the exit blocks, rather than 1650 // keeping everything in a list or set, the blocks membership and the 1651 // entry/exit blocks of the region are still valid after the split. 1652 CHR_DEBUG(dbgs() << "Splitting entry block " << EntryBlock->getName() 1653 << " at " << *Scope->BranchInsertPoint << "\n"); 1654 BasicBlock *NewEntryBlock = 1655 SplitBlock(EntryBlock, Scope->BranchInsertPoint, &DT); 1656 assert(NewEntryBlock->getSinglePredecessor() == EntryBlock && 1657 "NewEntryBlock's only pred must be EntryBlock"); 1658 FirstRegion->replaceEntryRecursive(NewEntryBlock); 1659 BasicBlock *PreEntryBlock = EntryBlock; 1660 1661 ValueToValueMapTy VMap; 1662 // Clone the blocks in the scope (excluding the PreEntryBlock) to split into a 1663 // hot path (originals) and a cold path (clones) and update the PHIs at the 1664 // exit block. 1665 cloneScopeBlocks(Scope, PreEntryBlock, ExitBlock, LastRegion, VMap); 1666 1667 // Replace the old (placeholder) branch with the new (merged) conditional 1668 // branch. 1669 BranchInst *MergedBr = createMergedBranch(PreEntryBlock, EntryBlock, 1670 NewEntryBlock, VMap); 1671 1672 #ifndef NDEBUG 1673 assertCHRRegionsHaveBiasedBranchOrSelect(Scope); 1674 #endif 1675 1676 // Hoist the conditional values of the branches/selects. 1677 hoistScopeConditions(Scope, PreEntryBlock->getTerminator(), TrivialPHIs); 1678 1679 #ifndef NDEBUG 1680 assertBranchOrSelectConditionHoisted(Scope, PreEntryBlock); 1681 #endif 1682 1683 // Create the combined branch condition and constant-fold the branches/selects 1684 // in the hot path. 1685 fixupBranchesAndSelects(Scope, PreEntryBlock, MergedBr, 1686 ProfileCount ? ProfileCount.getValue() : 0); 1687 } 1688 1689 // A helper for transformScopes. Clone the blocks in the scope (excluding the 1690 // PreEntryBlock) to split into a hot path and a cold path and update the PHIs 1691 // at the exit block. 1692 void CHR::cloneScopeBlocks(CHRScope *Scope, 1693 BasicBlock *PreEntryBlock, 1694 BasicBlock *ExitBlock, 1695 Region *LastRegion, 1696 ValueToValueMapTy &VMap) { 1697 // Clone all the blocks. The original blocks will be the hot-path 1698 // CHR-optimized code and the cloned blocks will be the original unoptimized 1699 // code. This is so that the block pointers from the 1700 // CHRScope/Region/RegionInfo can stay valid in pointing to the hot-path code 1701 // which CHR should apply to. 1702 SmallVector<BasicBlock*, 8> NewBlocks; 1703 for (RegInfo &RI : Scope->RegInfos) 1704 for (BasicBlock *BB : RI.R->blocks()) { // This includes the blocks in the 1705 // sub-Scopes. 1706 assert(BB != PreEntryBlock && "Don't copy the preetntry block"); 1707 BasicBlock *NewBB = CloneBasicBlock(BB, VMap, ".nonchr", &F); 1708 NewBlocks.push_back(NewBB); 1709 VMap[BB] = NewBB; 1710 } 1711 1712 // Place the cloned blocks right after the original blocks (right before the 1713 // exit block of.) 1714 if (ExitBlock) 1715 F.getBasicBlockList().splice(ExitBlock->getIterator(), 1716 F.getBasicBlockList(), 1717 NewBlocks[0]->getIterator(), F.end()); 1718 1719 // Update the cloned blocks/instructions to refer to themselves. 1720 for (unsigned i = 0, e = NewBlocks.size(); i != e; ++i) 1721 for (Instruction &I : *NewBlocks[i]) 1722 RemapInstruction(&I, VMap, 1723 RF_NoModuleLevelChanges | RF_IgnoreMissingLocals); 1724 1725 // Add the cloned blocks to the PHIs of the exit blocks. ExitBlock is null for 1726 // the top-level region but we don't need to add PHIs. The trivial PHIs 1727 // inserted above will be updated here. 1728 if (ExitBlock) 1729 for (PHINode &PN : ExitBlock->phis()) 1730 for (unsigned I = 0, NumOps = PN.getNumIncomingValues(); I < NumOps; 1731 ++I) { 1732 BasicBlock *Pred = PN.getIncomingBlock(I); 1733 if (LastRegion->contains(Pred)) { 1734 Value *V = PN.getIncomingValue(I); 1735 auto It = VMap.find(V); 1736 if (It != VMap.end()) V = It->second; 1737 assert(VMap.find(Pred) != VMap.end() && "Pred must have been cloned"); 1738 PN.addIncoming(V, cast<BasicBlock>(VMap[Pred])); 1739 } 1740 } 1741 } 1742 1743 // A helper for transformScope. Replace the old (placeholder) branch with the 1744 // new (merged) conditional branch. 1745 BranchInst *CHR::createMergedBranch(BasicBlock *PreEntryBlock, 1746 BasicBlock *EntryBlock, 1747 BasicBlock *NewEntryBlock, 1748 ValueToValueMapTy &VMap) { 1749 BranchInst *OldBR = cast<BranchInst>(PreEntryBlock->getTerminator()); 1750 assert(OldBR->isUnconditional() && OldBR->getSuccessor(0) == NewEntryBlock && 1751 "SplitBlock did not work correctly!"); 1752 assert(NewEntryBlock->getSinglePredecessor() == EntryBlock && 1753 "NewEntryBlock's only pred must be EntryBlock"); 1754 assert(VMap.find(NewEntryBlock) != VMap.end() && 1755 "NewEntryBlock must have been copied"); 1756 OldBR->removeFromParent(); 1757 OldBR->dropAllReferences(); 1758 // The true predicate is a placeholder. It will be replaced later in 1759 // fixupBranchesAndSelects(). 1760 BranchInst *NewBR = BranchInst::Create(NewEntryBlock, 1761 cast<BasicBlock>(VMap[NewEntryBlock]), 1762 ConstantInt::getTrue(F.getContext())); 1763 PreEntryBlock->getInstList().push_back(NewBR); 1764 assert(NewEntryBlock->getSinglePredecessor() == EntryBlock && 1765 "NewEntryBlock's only pred must be EntryBlock"); 1766 return NewBR; 1767 } 1768 1769 // A helper for transformScopes. Create the combined branch condition and 1770 // constant-fold the branches/selects in the hot path. 1771 void CHR::fixupBranchesAndSelects(CHRScope *Scope, 1772 BasicBlock *PreEntryBlock, 1773 BranchInst *MergedBR, 1774 uint64_t ProfileCount) { 1775 Value *MergedCondition = ConstantInt::getTrue(F.getContext()); 1776 BranchProbability CHRBranchBias(1, 1); 1777 uint64_t NumCHRedBranches = 0; 1778 IRBuilder<> IRB(PreEntryBlock->getTerminator()); 1779 for (RegInfo &RI : Scope->CHRRegions) { 1780 Region *R = RI.R; 1781 if (RI.HasBranch) { 1782 fixupBranch(R, Scope, IRB, MergedCondition, CHRBranchBias); 1783 ++NumCHRedBranches; 1784 } 1785 for (SelectInst *SI : RI.Selects) { 1786 fixupSelect(SI, Scope, IRB, MergedCondition, CHRBranchBias); 1787 ++NumCHRedBranches; 1788 } 1789 } 1790 Stats.NumBranchesDelta += NumCHRedBranches - 1; 1791 Stats.WeightedNumBranchesDelta += (NumCHRedBranches - 1) * ProfileCount; 1792 MergedBR->setCondition(MergedCondition); 1793 SmallVector<uint32_t, 2> Weights; 1794 Weights.push_back(static_cast<uint32_t>(CHRBranchBias.scale(1000))); 1795 Weights.push_back(static_cast<uint32_t>(CHRBranchBias.getCompl().scale(1000))); 1796 MDBuilder MDB(F.getContext()); 1797 MergedBR->setMetadata(LLVMContext::MD_prof, MDB.createBranchWeights(Weights)); 1798 CHR_DEBUG(dbgs() << "CHR branch bias " << Weights[0] << ":" << Weights[1] 1799 << "\n"); 1800 } 1801 1802 // A helper for fixupBranchesAndSelects. Add to the combined branch condition 1803 // and constant-fold a branch in the hot path. 1804 void CHR::fixupBranch(Region *R, CHRScope *Scope, 1805 IRBuilder<> &IRB, 1806 Value *&MergedCondition, 1807 BranchProbability &CHRBranchBias) { 1808 bool IsTrueBiased = Scope->TrueBiasedRegions.count(R); 1809 assert((IsTrueBiased || Scope->FalseBiasedRegions.count(R)) && 1810 "Must be truthy or falsy"); 1811 auto *BI = cast<BranchInst>(R->getEntry()->getTerminator()); 1812 assert(BranchBiasMap.find(R) != BranchBiasMap.end() && 1813 "Must be in the bias map"); 1814 BranchProbability Bias = BranchBiasMap[R]; 1815 assert(Bias >= getCHRBiasThreshold() && "Must be highly biased"); 1816 // Take the min. 1817 if (CHRBranchBias > Bias) 1818 CHRBranchBias = Bias; 1819 BasicBlock *IfThen = BI->getSuccessor(1); 1820 BasicBlock *IfElse = BI->getSuccessor(0); 1821 BasicBlock *RegionExitBlock = R->getExit(); 1822 assert(RegionExitBlock && "Null ExitBlock"); 1823 assert((IfThen == RegionExitBlock || IfElse == RegionExitBlock) && 1824 IfThen != IfElse && "Invariant from findScopes"); 1825 if (IfThen == RegionExitBlock) { 1826 // Swap them so that IfThen means going into it and IfElse means skipping 1827 // it. 1828 std::swap(IfThen, IfElse); 1829 } 1830 CHR_DEBUG(dbgs() << "IfThen " << IfThen->getName() 1831 << " IfElse " << IfElse->getName() << "\n"); 1832 Value *Cond = BI->getCondition(); 1833 BasicBlock *HotTarget = IsTrueBiased ? IfThen : IfElse; 1834 bool ConditionTrue = HotTarget == BI->getSuccessor(0); 1835 addToMergedCondition(ConditionTrue, Cond, BI, Scope, IRB, 1836 MergedCondition); 1837 // Constant-fold the branch at ClonedEntryBlock. 1838 assert(ConditionTrue == (HotTarget == BI->getSuccessor(0)) && 1839 "The successor shouldn't change"); 1840 Value *NewCondition = ConditionTrue ? 1841 ConstantInt::getTrue(F.getContext()) : 1842 ConstantInt::getFalse(F.getContext()); 1843 BI->setCondition(NewCondition); 1844 } 1845 1846 // A helper for fixupBranchesAndSelects. Add to the combined branch condition 1847 // and constant-fold a select in the hot path. 1848 void CHR::fixupSelect(SelectInst *SI, CHRScope *Scope, 1849 IRBuilder<> &IRB, 1850 Value *&MergedCondition, 1851 BranchProbability &CHRBranchBias) { 1852 bool IsTrueBiased = Scope->TrueBiasedSelects.count(SI); 1853 assert((IsTrueBiased || 1854 Scope->FalseBiasedSelects.count(SI)) && "Must be biased"); 1855 assert(SelectBiasMap.find(SI) != SelectBiasMap.end() && 1856 "Must be in the bias map"); 1857 BranchProbability Bias = SelectBiasMap[SI]; 1858 assert(Bias >= getCHRBiasThreshold() && "Must be highly biased"); 1859 // Take the min. 1860 if (CHRBranchBias > Bias) 1861 CHRBranchBias = Bias; 1862 Value *Cond = SI->getCondition(); 1863 addToMergedCondition(IsTrueBiased, Cond, SI, Scope, IRB, 1864 MergedCondition); 1865 Value *NewCondition = IsTrueBiased ? 1866 ConstantInt::getTrue(F.getContext()) : 1867 ConstantInt::getFalse(F.getContext()); 1868 SI->setCondition(NewCondition); 1869 } 1870 1871 // A helper for fixupBranch/fixupSelect. Add a branch condition to the merged 1872 // condition. 1873 void CHR::addToMergedCondition(bool IsTrueBiased, Value *Cond, 1874 Instruction *BranchOrSelect, 1875 CHRScope *Scope, 1876 IRBuilder<> &IRB, 1877 Value *&MergedCondition) { 1878 if (IsTrueBiased) { 1879 MergedCondition = IRB.CreateAnd(MergedCondition, Cond); 1880 } else { 1881 // If Cond is an icmp and all users of V except for BranchOrSelect is a 1882 // branch, negate the icmp predicate and swap the branch targets and avoid 1883 // inserting an Xor to negate Cond. 1884 bool Done = false; 1885 if (auto *ICmp = dyn_cast<ICmpInst>(Cond)) 1886 if (NegateICmpIfUsedByBranchOrSelectOnly(ICmp, BranchOrSelect, Scope)) { 1887 MergedCondition = IRB.CreateAnd(MergedCondition, Cond); 1888 Done = true; 1889 } 1890 if (!Done) { 1891 Value *Negate = IRB.CreateXor( 1892 ConstantInt::getTrue(F.getContext()), Cond); 1893 MergedCondition = IRB.CreateAnd(MergedCondition, Negate); 1894 } 1895 } 1896 } 1897 1898 void CHR::transformScopes(SmallVectorImpl<CHRScope *> &CHRScopes) { 1899 unsigned i = 0; 1900 (void)(i); // Unused in release build. 1901 DenseSet<PHINode *> TrivialPHIs; 1902 for (CHRScope *Scope : CHRScopes) { 1903 transformScopes(Scope, TrivialPHIs); 1904 CHR_DEBUG( 1905 std::ostringstream oss; 1906 oss << " after transformScopes " << i++; 1907 dumpIR(F, oss.str().c_str(), nullptr)); 1908 } 1909 } 1910 1911 static void dumpScopes(SmallVectorImpl<CHRScope *> &Scopes, const char * Label) { 1912 dbgs() << Label << " " << Scopes.size() << "\n"; 1913 for (CHRScope *Scope : Scopes) { 1914 dbgs() << *Scope << "\n"; 1915 } 1916 } 1917 1918 bool CHR::run() { 1919 if (!shouldApply(F, PSI)) 1920 return false; 1921 1922 CHR_DEBUG(dumpIR(F, "before", nullptr)); 1923 1924 bool Changed = false; 1925 { 1926 CHR_DEBUG( 1927 dbgs() << "RegionInfo:\n"; 1928 RI.print(dbgs())); 1929 1930 // Recursively traverse the region tree and find regions that have biased 1931 // branches and/or selects and create scopes. 1932 SmallVector<CHRScope *, 8> AllScopes; 1933 findScopes(AllScopes); 1934 CHR_DEBUG(dumpScopes(AllScopes, "All scopes")); 1935 1936 // Split the scopes if 1) the conditiona values of the biased 1937 // branches/selects of the inner/lower scope can't be hoisted up to the 1938 // outermost/uppermost scope entry, or 2) the condition values of the biased 1939 // branches/selects in a scope (including subscopes) don't share at least 1940 // one common value. 1941 SmallVector<CHRScope *, 8> SplitScopes; 1942 splitScopes(AllScopes, SplitScopes); 1943 CHR_DEBUG(dumpScopes(SplitScopes, "Split scopes")); 1944 1945 // After splitting, set the biased regions and selects of a scope (a tree 1946 // root) that include those of the subscopes. 1947 classifyBiasedScopes(SplitScopes); 1948 CHR_DEBUG(dbgs() << "Set per-scope bias " << SplitScopes.size() << "\n"); 1949 1950 // Filter out the scopes that has only one biased region or select (CHR 1951 // isn't useful in such a case). 1952 SmallVector<CHRScope *, 8> FilteredScopes; 1953 filterScopes(SplitScopes, FilteredScopes); 1954 CHR_DEBUG(dumpScopes(FilteredScopes, "Filtered scopes")); 1955 1956 // Set the regions to be CHR'ed and their hoist stops for each scope. 1957 SmallVector<CHRScope *, 8> SetScopes; 1958 setCHRRegions(FilteredScopes, SetScopes); 1959 CHR_DEBUG(dumpScopes(SetScopes, "Set CHR regions")); 1960 1961 // Sort CHRScopes by the depth so that outer CHRScopes comes before inner 1962 // ones. We need to apply CHR from outer to inner so that we apply CHR only 1963 // to the hot path, rather than both hot and cold paths. 1964 SmallVector<CHRScope *, 8> SortedScopes; 1965 sortScopes(SetScopes, SortedScopes); 1966 CHR_DEBUG(dumpScopes(SortedScopes, "Sorted scopes")); 1967 1968 CHR_DEBUG( 1969 dbgs() << "RegionInfo:\n"; 1970 RI.print(dbgs())); 1971 1972 // Apply the CHR transformation. 1973 if (!SortedScopes.empty()) { 1974 transformScopes(SortedScopes); 1975 Changed = true; 1976 } 1977 } 1978 1979 if (Changed) 1980 CHR_DEBUG(dumpIR(F, "after", &Stats)); 1981 1982 return Changed; 1983 } 1984 1985 bool ControlHeightReductionLegacyPass::runOnFunction(Function &F) { 1986 BlockFrequencyInfo &BFI = 1987 getAnalysis<BlockFrequencyInfoWrapperPass>().getBFI(); 1988 DominatorTree &DT = getAnalysis<DominatorTreeWrapperPass>().getDomTree(); 1989 ProfileSummaryInfo &PSI = 1990 *getAnalysis<ProfileSummaryInfoWrapperPass>().getPSI(); 1991 RegionInfo &RI = getAnalysis<RegionInfoPass>().getRegionInfo(); 1992 return CHR(F, BFI, DT, PSI, RI).run(); 1993 } 1994 1995 namespace llvm { 1996 1997 ControlHeightReductionPass::ControlHeightReductionPass() { 1998 ParseCHRFilterFiles(); 1999 } 2000 2001 PreservedAnalyses ControlHeightReductionPass::run( 2002 Function &F, 2003 FunctionAnalysisManager &FAM) { 2004 auto &BFI = FAM.getResult<BlockFrequencyAnalysis>(F); 2005 auto &DT = FAM.getResult<DominatorTreeAnalysis>(F); 2006 auto &MAMProxy = FAM.getResult<ModuleAnalysisManagerFunctionProxy>(F); 2007 auto &MAM = MAMProxy.getManager(); 2008 auto &PSI = *MAM.getCachedResult<ProfileSummaryAnalysis>(*F.getParent()); 2009 auto &RI = FAM.getResult<RegionInfoAnalysis>(F); 2010 bool Changed = CHR(F, BFI, DT, PSI, RI).run(); 2011 if (!Changed) 2012 return PreservedAnalyses::all(); 2013 auto PA = PreservedAnalyses(); 2014 PA.preserve<GlobalsAA>(); 2015 return PA; 2016 } 2017 2018 } // namespace llvm 2019