1 //===-- ControlHeightReduction.cpp - Control Height Reduction -------------===// 2 // 3 // The LLVM Compiler Infrastructure 4 // 5 // This file is distributed under the University of Illinois Open Source 6 // License. See LICENSE.TXT for details. 7 // 8 //===----------------------------------------------------------------------===// 9 // 10 // This pass merges conditional blocks of code and reduces the number of 11 // conditional branches in the hot paths based on profiles. 12 // 13 //===----------------------------------------------------------------------===// 14 15 #include "llvm/Transforms/Instrumentation/ControlHeightReduction.h" 16 #include "llvm/Transforms/Utils.h" 17 #include "llvm/Transforms/Utils/BasicBlockUtils.h" 18 #include "llvm/Transforms/Utils/Cloning.h" 19 #include "llvm/Transforms/Utils/ValueMapper.h" 20 #include "llvm/ADT/DenseMap.h" 21 #include "llvm/ADT/DenseSet.h" 22 #include "llvm/ADT/SmallVector.h" 23 #include "llvm/ADT/StringSet.h" 24 #include "llvm/Analysis/BlockFrequencyInfo.h" 25 #include "llvm/Analysis/ProfileSummaryInfo.h" 26 #include "llvm/Analysis/RegionInfo.h" 27 #include "llvm/Analysis/RegionIterator.h" 28 #include "llvm/Analysis/ValueTracking.h" 29 #include "llvm/IR/CFG.h" 30 #include "llvm/IR/Dominators.h" 31 #include "llvm/IR/IRBuilder.h" 32 #include "llvm/IR/MDBuilder.h" 33 #include "llvm/Support/BranchProbability.h" 34 #include "llvm/Support/MemoryBuffer.h" 35 #include "llvm/Transforms/Scalar.h" 36 37 #include <cxxabi.h> 38 #include <set> 39 #include <sstream> 40 41 using namespace llvm; 42 43 #define DEBUG_TYPE "chr" 44 45 #define CHR_DEBUG(X) LLVM_DEBUG(X) 46 47 static cl::opt<bool> ForceCHR("force-chr", cl::init(false), cl::Hidden, 48 cl::desc("Apply CHR for all functions")); 49 50 static cl::opt<double> CHRBiasThreshold( 51 "chr-bias-threshold", cl::init(0.99), cl::Hidden, 52 cl::desc("CHR considers a branch bias greater than this ratio as biased")); 53 54 static cl::opt<unsigned> CHRMergeThreshold( 55 "chr-merge-threshold", cl::init(2), cl::Hidden, 56 cl::desc("CHR merges a group of N branches/selects where N >= this value")); 57 58 static cl::opt<std::string> CHRModuleList( 59 "chr-module-list", cl::init(""), cl::Hidden, 60 cl::desc("Specify file to retrieve the list of modules to apply CHR to")); 61 62 static cl::opt<std::string> CHRFunctionList( 63 "chr-function-list", cl::init(""), cl::Hidden, 64 cl::desc("Specify file to retrieve the list of functions to apply CHR to")); 65 66 static StringSet<> CHRModules; 67 static StringSet<> CHRFunctions; 68 69 static void ParseCHRFilterFiles() { 70 if (!CHRModuleList.empty()) { 71 auto FileOrErr = MemoryBuffer::getFile(CHRModuleList); 72 if (!FileOrErr) { 73 errs() << "Error: Couldn't read the chr-module-list file " << CHRModuleList << "\n"; 74 std::exit(1); 75 } 76 StringRef Buf = FileOrErr->get()->getBuffer(); 77 SmallVector<StringRef, 0> Lines; 78 Buf.split(Lines, '\n'); 79 for (StringRef Line : Lines) { 80 Line = Line.trim(); 81 if (!Line.empty()) 82 CHRModules.insert(Line); 83 } 84 } 85 if (!CHRFunctionList.empty()) { 86 auto FileOrErr = MemoryBuffer::getFile(CHRFunctionList); 87 if (!FileOrErr) { 88 errs() << "Error: Couldn't read the chr-function-list file " << CHRFunctionList << "\n"; 89 std::exit(1); 90 } 91 StringRef Buf = FileOrErr->get()->getBuffer(); 92 SmallVector<StringRef, 0> Lines; 93 Buf.split(Lines, '\n'); 94 for (StringRef Line : Lines) { 95 Line = Line.trim(); 96 if (!Line.empty()) 97 CHRFunctions.insert(Line); 98 } 99 } 100 } 101 102 namespace { 103 class ControlHeightReductionLegacyPass : public FunctionPass { 104 public: 105 static char ID; 106 107 ControlHeightReductionLegacyPass() : FunctionPass(ID) { 108 initializeControlHeightReductionLegacyPassPass( 109 *PassRegistry::getPassRegistry()); 110 ParseCHRFilterFiles(); 111 } 112 113 bool runOnFunction(Function &F) override; 114 void getAnalysisUsage(AnalysisUsage &AU) const override { 115 AU.addRequired<BlockFrequencyInfoWrapperPass>(); 116 AU.addRequired<DominatorTreeWrapperPass>(); 117 AU.addRequired<ProfileSummaryInfoWrapperPass>(); 118 AU.addRequired<RegionInfoPass>(); 119 AU.addPreserved<GlobalsAAWrapperPass>(); 120 } 121 }; 122 } // end anonymous namespace 123 124 char ControlHeightReductionLegacyPass::ID = 0; 125 126 INITIALIZE_PASS_BEGIN(ControlHeightReductionLegacyPass, 127 "chr", 128 "Reduce control height in the hot paths", 129 false, false) 130 INITIALIZE_PASS_DEPENDENCY(BlockFrequencyInfoWrapperPass) 131 INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass) 132 INITIALIZE_PASS_DEPENDENCY(ProfileSummaryInfoWrapperPass) 133 INITIALIZE_PASS_DEPENDENCY(RegionInfoPass) 134 INITIALIZE_PASS_END(ControlHeightReductionLegacyPass, 135 "chr", 136 "Reduce control height in the hot paths", 137 false, false) 138 139 FunctionPass *llvm::createControlHeightReductionLegacyPass() { 140 return new ControlHeightReductionLegacyPass(); 141 } 142 143 namespace { 144 145 struct CHRStats { 146 CHRStats() : NumBranches(0), NumBranchesDelta(0), 147 WeightedNumBranchesDelta(0) {} 148 void print(raw_ostream &OS) const { 149 OS << "CHRStats: NumBranches " << NumBranches 150 << " NumBranchesDelta " << NumBranchesDelta 151 << " WeightedNumBranchesDelta " << WeightedNumBranchesDelta; 152 } 153 uint64_t NumBranches; // The original number of conditional branches / 154 // selects 155 uint64_t NumBranchesDelta; // The decrease of the number of conditional 156 // branches / selects in the hot paths due to CHR. 157 uint64_t WeightedNumBranchesDelta; // NumBranchesDelta weighted by the profile 158 // count at the scope entry. 159 }; 160 161 inline raw_ostream &operator<<(raw_ostream &OS, const CHRStats &Stats) { 162 Stats.print(OS); 163 return OS; 164 } 165 166 // RegInfo - some properties of a Region. 167 struct RegInfo { 168 RegInfo() : R(nullptr), HasBranch(false) {} 169 RegInfo(Region *RegionIn) : R(RegionIn), HasBranch(false) {} 170 Region *R; 171 bool HasBranch; 172 SmallVector<SelectInst *, 8> Selects; 173 }; 174 175 typedef DenseMap<Region *, DenseSet<Instruction *>> HoistStopMapTy; 176 177 // CHRScope - a sequence of regions to CHR together. It corresponds to a 178 // sequence of conditional blocks. It can have subscopes which correspond to 179 // nested conditional blocks. Nested CHRScopes form a tree. 180 class CHRScope { 181 public: 182 CHRScope(RegInfo RI) : BranchInsertPoint(nullptr) { 183 assert(RI.R && "Null RegionIn"); 184 RegInfos.push_back(RI); 185 } 186 187 Region *getParentRegion() { 188 assert(RegInfos.size() > 0 && "Empty CHRScope"); 189 Region *Parent = RegInfos[0].R->getParent(); 190 assert(Parent && "Unexpected to call this on the top-level region"); 191 return Parent; 192 } 193 194 BasicBlock *getEntryBlock() { 195 assert(RegInfos.size() > 0 && "Empty CHRScope"); 196 return RegInfos.front().R->getEntry(); 197 } 198 199 BasicBlock *getExitBlock() { 200 assert(RegInfos.size() > 0 && "Empty CHRScope"); 201 return RegInfos.back().R->getExit(); 202 } 203 204 bool appendable(CHRScope *Next) { 205 // The next scope is appendable only if this scope is directly connected to 206 // it (which implies it post-dominates this scope) and this scope dominates 207 // it (no edge to the next scope outside this scope). 208 BasicBlock *NextEntry = Next->getEntryBlock(); 209 if (getExitBlock() != NextEntry) 210 // Not directly connected. 211 return false; 212 Region *LastRegion = RegInfos.back().R; 213 for (BasicBlock *Pred : predecessors(NextEntry)) 214 if (!LastRegion->contains(Pred)) 215 // There's an edge going into the entry of the next scope from outside 216 // of this scope. 217 return false; 218 return true; 219 } 220 221 void append(CHRScope *Next) { 222 assert(RegInfos.size() > 0 && "Empty CHRScope"); 223 assert(Next->RegInfos.size() > 0 && "Empty CHRScope"); 224 assert(getParentRegion() == Next->getParentRegion() && 225 "Must be siblings"); 226 assert(getExitBlock() == Next->getEntryBlock() && 227 "Must be adjacent"); 228 for (RegInfo &RI : Next->RegInfos) 229 RegInfos.push_back(RI); 230 for (CHRScope *Sub : Next->Subs) 231 Subs.push_back(Sub); 232 } 233 234 void addSub(CHRScope *SubIn) { 235 #ifndef NDEBUG 236 bool is_child = false; 237 for (RegInfo &RI : RegInfos) 238 if (RI.R == SubIn->getParentRegion()) { 239 is_child = true; 240 break; 241 } 242 assert(is_child && "Must be a child"); 243 #endif 244 Subs.push_back(SubIn); 245 } 246 247 // Split this scope at the boundary region into two, which will belong to the 248 // tail and returns the tail. 249 CHRScope *split(Region *Boundary) { 250 assert(Boundary && "Boundary null"); 251 assert(RegInfos.begin()->R != Boundary && 252 "Can't be split at beginning"); 253 auto BoundaryIt = std::find_if(RegInfos.begin(), RegInfos.end(), 254 [&Boundary](const RegInfo& RI) { 255 return Boundary == RI.R; 256 }); 257 if (BoundaryIt == RegInfos.end()) 258 return nullptr; 259 SmallVector<RegInfo, 8> TailRegInfos; 260 SmallVector<CHRScope *, 8> TailSubs; 261 TailRegInfos.insert(TailRegInfos.begin(), BoundaryIt, RegInfos.end()); 262 RegInfos.resize(BoundaryIt - RegInfos.begin()); 263 DenseSet<Region *> TailRegionSet; 264 for (RegInfo &RI : TailRegInfos) 265 TailRegionSet.insert(RI.R); 266 for (auto It = Subs.begin(); It != Subs.end(); ) { 267 CHRScope *Sub = *It; 268 assert(Sub && "null Sub"); 269 Region *Parent = Sub->getParentRegion(); 270 if (TailRegionSet.count(Parent)) { 271 TailSubs.push_back(Sub); 272 It = Subs.erase(It); 273 } else { 274 assert(std::find_if(RegInfos.begin(), RegInfos.end(), 275 [&Parent](const RegInfo& RI) { 276 return Parent == RI.R; 277 }) != RegInfos.end() && 278 "Must be in head"); 279 ++It; 280 } 281 } 282 assert(HoistStopMap.empty() && "MapHoistStops must be empty"); 283 return new CHRScope(TailRegInfos, TailSubs); 284 } 285 286 bool contains(Instruction *I) const { 287 BasicBlock *Parent = I->getParent(); 288 for (const RegInfo &RI : RegInfos) 289 if (RI.R->contains(Parent)) 290 return true; 291 return false; 292 } 293 294 void print(raw_ostream &OS) const; 295 296 SmallVector<RegInfo, 8> RegInfos; // Regions that belong to this scope 297 SmallVector<CHRScope *, 8> Subs; // Subscopes. 298 299 // The instruction at which to insert the CHR conditional branch (and hoist 300 // the dependent condition values). 301 Instruction *BranchInsertPoint; 302 303 // True-biased and false-biased regions (conditional blocks), 304 // respectively. Used only for the outermost scope and includes regions in 305 // subscopes. The rest are unbiased. 306 DenseSet<Region *> TrueBiasedRegions; 307 DenseSet<Region *> FalseBiasedRegions; 308 // Among the biased regions, the regions that get CHRed. 309 SmallVector<RegInfo, 8> CHRRegions; 310 311 // True-biased and false-biased selects, respectively. Used only for the 312 // outermost scope and includes ones in subscopes. 313 DenseSet<SelectInst *> TrueBiasedSelects; 314 DenseSet<SelectInst *> FalseBiasedSelects; 315 316 // Map from one of the above regions to the instructions to stop 317 // hoisting instructions at through use-def chains. 318 HoistStopMapTy HoistStopMap; 319 320 private: 321 CHRScope(SmallVector<RegInfo, 8> &RegInfosIn, 322 SmallVector<CHRScope *, 8> &SubsIn) 323 : RegInfos(RegInfosIn), Subs(SubsIn), BranchInsertPoint(nullptr) {} 324 }; 325 326 inline raw_ostream &operator<<(raw_ostream &OS, const CHRScope &Scope) { 327 Scope.print(OS); 328 return OS; 329 } 330 331 class CHR { 332 public: 333 CHR(Function &Fin, BlockFrequencyInfo &BFIin, DominatorTree &DTin, 334 ProfileSummaryInfo &PSIin, RegionInfo &RIin) 335 : F(Fin), BFI(BFIin), DT(DTin), PSI(PSIin), RI(RIin) {} 336 337 ~CHR() { 338 for (CHRScope *Scope : Scopes) { 339 delete Scope; 340 } 341 } 342 343 bool run(); 344 345 private: 346 // See the comments in CHR::run() for the high level flow of the algorithm and 347 // what the following functions do. 348 349 void findScopes(SmallVectorImpl<CHRScope *> &Output) { 350 Region *R = RI.getTopLevelRegion(); 351 CHRScope *Scope = findScopes(R, nullptr, nullptr, Output); 352 if (Scope) { 353 Output.push_back(Scope); 354 } 355 } 356 CHRScope *findScopes(Region *R, Region *NextRegion, Region *ParentRegion, 357 SmallVectorImpl<CHRScope *> &Scopes); 358 CHRScope *findScope(Region *R); 359 void checkScopeHoistable(CHRScope *Scope); 360 361 void splitScopes(SmallVectorImpl<CHRScope *> &Input, 362 SmallVectorImpl<CHRScope *> &Output); 363 SmallVector<CHRScope *, 8> splitScope(CHRScope *Scope, 364 CHRScope *Outer, 365 DenseSet<Value *> *OuterConditionValues, 366 Instruction *OuterInsertPoint, 367 SmallVectorImpl<CHRScope *> &Output, 368 DenseSet<Instruction *> &Unhoistables); 369 370 void classifyBiasedScopes(SmallVectorImpl<CHRScope *> &Scopes); 371 void classifyBiasedScopes(CHRScope *Scope, CHRScope *OutermostScope); 372 373 void filterScopes(SmallVectorImpl<CHRScope *> &Input, 374 SmallVectorImpl<CHRScope *> &Output); 375 376 void setCHRRegions(SmallVectorImpl<CHRScope *> &Input, 377 SmallVectorImpl<CHRScope *> &Output); 378 void setCHRRegions(CHRScope *Scope, CHRScope *OutermostScope); 379 380 void sortScopes(SmallVectorImpl<CHRScope *> &Input, 381 SmallVectorImpl<CHRScope *> &Output); 382 383 void transformScopes(SmallVectorImpl<CHRScope *> &CHRScopes); 384 void transformScopes(CHRScope *Scope, DenseSet<PHINode *> &TrivialPHIs); 385 void cloneScopeBlocks(CHRScope *Scope, 386 BasicBlock *PreEntryBlock, 387 BasicBlock *ExitBlock, 388 Region *LastRegion, 389 ValueToValueMapTy &VMap); 390 BranchInst *createMergedBranch(BasicBlock *PreEntryBlock, 391 BasicBlock *EntryBlock, 392 BasicBlock *NewEntryBlock, 393 ValueToValueMapTy &VMap); 394 void fixupBranchesAndSelects(CHRScope *Scope, 395 BasicBlock *PreEntryBlock, 396 BranchInst *MergedBR, 397 uint64_t ProfileCount); 398 void fixupBranch(Region *R, 399 CHRScope *Scope, 400 IRBuilder<> &IRB, 401 Value *&MergedCondition, BranchProbability &CHRBranchBias); 402 void fixupSelect(SelectInst* SI, 403 CHRScope *Scope, 404 IRBuilder<> &IRB, 405 Value *&MergedCondition, BranchProbability &CHRBranchBias); 406 void addToMergedCondition(bool IsTrueBiased, Value *Cond, 407 Instruction *BranchOrSelect, 408 CHRScope *Scope, 409 IRBuilder<> &IRB, 410 Value *&MergedCondition); 411 412 Function &F; 413 BlockFrequencyInfo &BFI; 414 DominatorTree &DT; 415 ProfileSummaryInfo &PSI; 416 RegionInfo &RI; 417 CHRStats Stats; 418 419 // All the true-biased regions in the function 420 DenseSet<Region *> TrueBiasedRegionsGlobal; 421 // All the false-biased regions in the function 422 DenseSet<Region *> FalseBiasedRegionsGlobal; 423 // All the true-biased selects in the function 424 DenseSet<SelectInst *> TrueBiasedSelectsGlobal; 425 // All the false-biased selects in the function 426 DenseSet<SelectInst *> FalseBiasedSelectsGlobal; 427 // A map from biased regions to their branch bias 428 DenseMap<Region *, BranchProbability> BranchBiasMap; 429 // A map from biased selects to their branch bias 430 DenseMap<SelectInst *, BranchProbability> SelectBiasMap; 431 // All the scopes. 432 DenseSet<CHRScope *> Scopes; 433 }; 434 435 } // end anonymous namespace 436 437 static bool shouldApply(Function &F, ProfileSummaryInfo& PSI) { 438 if (ForceCHR) 439 return true; 440 441 if (!CHRModuleList.empty() || !CHRFunctionList.empty()) { 442 if (CHRModules.count(F.getParent()->getName())) 443 return true; 444 StringRef Name = F.getName(); 445 if (CHRFunctions.count(Name)) 446 return true; 447 const char* DemangledName = nullptr; 448 int Status = -1; 449 DemangledName = abi::__cxa_demangle(Name.str().c_str(), 450 nullptr, nullptr, &Status); 451 return DemangledName && CHRFunctions.count(DemangledName); 452 } 453 454 assert(PSI.hasProfileSummary() && "Empty PSI?"); 455 return PSI.isFunctionEntryHot(&F); 456 } 457 458 static void dumpIR(Function &F, const char *Label, CHRStats *Stats) { 459 std::string Name = F.getName().str(); 460 const char *DemangledName = nullptr; 461 int Status = -1; 462 DemangledName = abi::__cxa_demangle(Name.c_str(), 463 nullptr, nullptr, &Status); 464 if (DemangledName == nullptr) { 465 DemangledName = "<NOT-MANGLED>"; 466 } 467 std::string ModuleName = F.getParent()->getName().str(); 468 CHR_DEBUG(dbgs() << "CHR IR dump " << Label << " " << ModuleName << " " 469 << Name); 470 if (Stats) 471 CHR_DEBUG(dbgs() << " " << *Stats); 472 CHR_DEBUG(dbgs() << "\n"); 473 CHR_DEBUG(F.dump()); 474 } 475 476 477 void CHRScope::print(raw_ostream &OS) const { 478 assert(RegInfos.size() > 0 && "Empty CHRScope"); 479 OS << "CHRScope["; 480 OS << RegInfos.size() << ", Regions["; 481 for (const RegInfo &RI : RegInfos) { 482 OS << RI.R->getNameStr(); 483 if (RI.HasBranch) 484 OS << " B"; 485 if (RI.Selects.size() > 0) 486 OS << " S" << RI.Selects.size(); 487 OS << ", "; 488 } 489 if (RegInfos[0].R->getParent()) { 490 OS << "], Parent " << RegInfos[0].R->getParent()->getNameStr(); 491 } else { 492 // top level region 493 OS << "]"; 494 } 495 OS << ", Subs["; 496 for (CHRScope *Sub : Subs) { 497 OS << *Sub << ", "; 498 } 499 OS << "]]"; 500 } 501 502 // Return true if the given instruction type can be hoisted by CHR. 503 static bool isHoistableInstructionType(Instruction *I) { 504 return isa<BinaryOperator>(I) || isa<CastInst>(I) || isa<SelectInst>(I) || 505 isa<GetElementPtrInst>(I) || isa<CmpInst>(I) || 506 isa<InsertElementInst>(I) || isa<ExtractElementInst>(I) || 507 isa<ShuffleVectorInst>(I) || isa<ExtractValueInst>(I) || 508 isa<InsertValueInst>(I); 509 } 510 511 // Return true if the given instruction can be hoisted by CHR. 512 static bool isHoistable(Instruction *I, DominatorTree &DT) { 513 if (!isHoistableInstructionType(I)) 514 return false; 515 return isSafeToSpeculativelyExecute(I, nullptr, &DT); 516 } 517 518 // Recursively traverse the use-def chains of the given value and return a set 519 // of the unhoistable base values defined within the scope (excluding the 520 // first-region entry block) or the (hoistable or unhoistable) base values that 521 // are defined outside (including the first-region entry block) of the 522 // scope. The returned set doesn't include constants. 523 static std::set<Value *> getBaseValues(Value *V, 524 DominatorTree &DT) { 525 std::set<Value *> Result; 526 if (auto *I = dyn_cast<Instruction>(V)) { 527 // We don't stop at a block that's not in the Scope because we would miss some 528 // instructions that are based on the same base values if we stop there. 529 if (!isHoistable(I, DT)) { 530 Result.insert(I); 531 return Result; 532 } 533 // I is hoistable above the Scope. 534 for (Value *Op : I->operands()) { 535 std::set<Value *> OpResult = getBaseValues(Op, DT); 536 Result.insert(OpResult.begin(), OpResult.end()); 537 } 538 return Result; 539 } 540 if (isa<Argument>(V)) { 541 Result.insert(V); 542 return Result; 543 } 544 // We don't include others like constants because those won't lead to any 545 // chance of folding of conditions (eg two bit checks merged into one check) 546 // after CHR. 547 return Result; // empty 548 } 549 550 // Return true if V is already hoisted or can be hoisted (along with its 551 // operands) above the insert point. When it returns true and HoistStops is 552 // non-null, the instructions to stop hoisting at through the use-def chains are 553 // inserted into HoistStops. 554 static bool 555 checkHoistValue(Value *V, Instruction *InsertPoint, DominatorTree &DT, 556 DenseSet<Instruction *> &Unhoistables, 557 DenseSet<Instruction *> *HoistStops) { 558 assert(InsertPoint && "Null InsertPoint"); 559 if (auto *I = dyn_cast<Instruction>(V)) { 560 assert(DT.getNode(I->getParent()) && "DT must contain I's parent block"); 561 assert(DT.getNode(InsertPoint->getParent()) && "DT must contain Destination"); 562 if (Unhoistables.count(I)) { 563 // Don't hoist if they are not to be hoisted. 564 return false; 565 } 566 if (DT.dominates(I, InsertPoint)) { 567 // We are already above the insert point. Stop here. 568 if (HoistStops) 569 HoistStops->insert(I); 570 return true; 571 } 572 // We aren't not above the insert point, check if we can hoist it above the 573 // insert point. 574 if (isHoistable(I, DT)) { 575 // Check operands first. 576 DenseSet<Instruction *> OpsHoistStops; 577 bool AllOpsHoisted = true; 578 for (Value *Op : I->operands()) { 579 if (!checkHoistValue(Op, InsertPoint, DT, Unhoistables, &OpsHoistStops)) { 580 AllOpsHoisted = false; 581 break; 582 } 583 } 584 if (AllOpsHoisted) { 585 CHR_DEBUG(dbgs() << "checkHoistValue " << *I << "\n"); 586 if (HoistStops) 587 HoistStops->insert(OpsHoistStops.begin(), OpsHoistStops.end()); 588 return true; 589 } 590 } 591 return false; 592 } 593 // Non-instructions are considered hoistable. 594 return true; 595 } 596 597 // Returns true and sets the true probability and false probability of an 598 // MD_prof metadata if it's well-formed. 599 static bool CheckMDProf(MDNode *MD, BranchProbability &TrueProb, 600 BranchProbability &FalseProb) { 601 if (!MD) return false; 602 MDString *MDName = cast<MDString>(MD->getOperand(0)); 603 if (MDName->getString() != "branch_weights" || 604 MD->getNumOperands() != 3) 605 return false; 606 ConstantInt *TrueWeight = mdconst::extract<ConstantInt>(MD->getOperand(1)); 607 ConstantInt *FalseWeight = mdconst::extract<ConstantInt>(MD->getOperand(2)); 608 if (!TrueWeight || !FalseWeight) 609 return false; 610 APInt TrueWt = TrueWeight->getValue(); 611 APInt FalseWt = FalseWeight->getValue(); 612 APInt SumWt = TrueWt + FalseWt; 613 TrueProb = BranchProbability::getBranchProbability(TrueWt.getZExtValue(), 614 SumWt.getZExtValue()); 615 FalseProb = BranchProbability::getBranchProbability(FalseWt.getZExtValue(), 616 SumWt.getZExtValue()); 617 return true; 618 } 619 620 static BranchProbability getCHRBiasThreshold() { 621 return BranchProbability::getBranchProbability( 622 static_cast<uint64_t>(CHRBiasThreshold * 1000000), 1000000); 623 } 624 625 // A helper for CheckBiasedBranch and CheckBiasedSelect. If TrueProb >= 626 // CHRBiasThreshold, put Key into TrueSet and return true. If FalseProb >= 627 // CHRBiasThreshold, put Key into FalseSet and return true. Otherwise, return 628 // false. 629 template<typename K, typename S, typename M> 630 bool CheckBias(K *Key, BranchProbability TrueProb, BranchProbability FalseProb, 631 S &TrueSet, S &FalseSet, M &BiasMap) { 632 BranchProbability Threshold = getCHRBiasThreshold(); 633 if (TrueProb >= Threshold) { 634 TrueSet.insert(Key); 635 BiasMap[Key] = TrueProb; 636 return true; 637 } else if (FalseProb >= Threshold) { 638 FalseSet.insert(Key); 639 BiasMap[Key] = FalseProb; 640 return true; 641 } 642 return false; 643 } 644 645 // Returns true and insert a region into the right biased set and the map if the 646 // branch of the region is biased. 647 static bool CheckBiasedBranch(BranchInst *BI, Region *R, 648 DenseSet<Region *> &TrueBiasedRegionsGlobal, 649 DenseSet<Region *> &FalseBiasedRegionsGlobal, 650 DenseMap<Region *, BranchProbability> &BranchBiasMap) { 651 if (!BI->isConditional()) 652 return false; 653 BranchProbability ThenProb, ElseProb; 654 if (!CheckMDProf(BI->getMetadata(LLVMContext::MD_prof), 655 ThenProb, ElseProb)) 656 return false; 657 BasicBlock *IfThen = BI->getSuccessor(0); 658 BasicBlock *IfElse = BI->getSuccessor(1); 659 assert((IfThen == R->getExit() || IfElse == R->getExit()) && 660 IfThen != IfElse && 661 "Invariant from findScopes"); 662 if (IfThen == R->getExit()) { 663 // Swap them so that IfThen/ThenProb means going into the conditional code 664 // and IfElse/ElseProb means skipping it. 665 std::swap(IfThen, IfElse); 666 std::swap(ThenProb, ElseProb); 667 } 668 CHR_DEBUG(dbgs() << "BI " << *BI << " "); 669 CHR_DEBUG(dbgs() << "ThenProb " << ThenProb << " "); 670 CHR_DEBUG(dbgs() << "ElseProb " << ElseProb << "\n"); 671 return CheckBias(R, ThenProb, ElseProb, 672 TrueBiasedRegionsGlobal, FalseBiasedRegionsGlobal, 673 BranchBiasMap); 674 } 675 676 // Returns true and insert a select into the right biased set and the map if the 677 // select is biased. 678 static bool CheckBiasedSelect( 679 SelectInst *SI, Region *R, 680 DenseSet<SelectInst *> &TrueBiasedSelectsGlobal, 681 DenseSet<SelectInst *> &FalseBiasedSelectsGlobal, 682 DenseMap<SelectInst *, BranchProbability> &SelectBiasMap) { 683 BranchProbability TrueProb, FalseProb; 684 if (!CheckMDProf(SI->getMetadata(LLVMContext::MD_prof), 685 TrueProb, FalseProb)) 686 return false; 687 CHR_DEBUG(dbgs() << "SI " << *SI << " "); 688 CHR_DEBUG(dbgs() << "TrueProb " << TrueProb << " "); 689 CHR_DEBUG(dbgs() << "FalseProb " << FalseProb << "\n"); 690 return CheckBias(SI, TrueProb, FalseProb, 691 TrueBiasedSelectsGlobal, FalseBiasedSelectsGlobal, 692 SelectBiasMap); 693 } 694 695 // Returns the instruction at which to hoist the dependent condition values and 696 // insert the CHR branch for a region. This is the terminator branch in the 697 // entry block or the first select in the entry block, if any. 698 static Instruction* getBranchInsertPoint(RegInfo &RI) { 699 Region *R = RI.R; 700 BasicBlock *EntryBB = R->getEntry(); 701 // The hoist point is by default the terminator of the entry block, which is 702 // the same as the branch instruction if RI.HasBranch is true. 703 Instruction *HoistPoint = EntryBB->getTerminator(); 704 for (SelectInst *SI : RI.Selects) { 705 if (SI->getParent() == EntryBB) { 706 // Pick the first select in Selects in the entry block. Note Selects is 707 // sorted in the instruction order within a block (asserted below). 708 HoistPoint = SI; 709 break; 710 } 711 } 712 assert(HoistPoint && "Null HoistPoint"); 713 #ifndef NDEBUG 714 // Check that HoistPoint is the first one in Selects in the entry block, 715 // if any. 716 DenseSet<Instruction *> EntryBlockSelectSet; 717 for (SelectInst *SI : RI.Selects) { 718 if (SI->getParent() == EntryBB) { 719 EntryBlockSelectSet.insert(SI); 720 } 721 } 722 for (Instruction &I : *EntryBB) { 723 if (EntryBlockSelectSet.count(&I) > 0) { 724 assert(&I == HoistPoint && 725 "HoistPoint must be the first one in Selects"); 726 break; 727 } 728 } 729 #endif 730 return HoistPoint; 731 } 732 733 // Find a CHR scope in the given region. 734 CHRScope * CHR::findScope(Region *R) { 735 CHRScope *Result = nullptr; 736 BasicBlock *Entry = R->getEntry(); 737 BasicBlock *Exit = R->getExit(); // null if top level. 738 assert(Entry && "Entry must not be null"); 739 assert((Exit == nullptr) == (R->isTopLevelRegion()) && 740 "Only top level region has a null exit"); 741 if (Entry) 742 CHR_DEBUG(dbgs() << "Entry " << Entry->getName() << "\n"); 743 else 744 CHR_DEBUG(dbgs() << "Entry null\n"); 745 if (Exit) 746 CHR_DEBUG(dbgs() << "Exit " << Exit->getName() << "\n"); 747 else 748 CHR_DEBUG(dbgs() << "Exit null\n"); 749 // Exclude cases where Entry is part of a subregion (hence it doesn't belong 750 // to this region). 751 bool EntryInSubregion = RI.getRegionFor(Entry) != R; 752 if (EntryInSubregion) 753 return nullptr; 754 // Exclude loops 755 for (BasicBlock *Pred : predecessors(Entry)) 756 if (R->contains(Pred)) 757 return nullptr; 758 if (Exit) { 759 // Try to find an if-then block (check if R is an if-then). 760 // if (cond) { 761 // ... 762 // } 763 auto *BI = dyn_cast<BranchInst>(Entry->getTerminator()); 764 if (BI) 765 CHR_DEBUG(dbgs() << "BI.isConditional " << BI->isConditional() << "\n"); 766 else 767 CHR_DEBUG(dbgs() << "BI null\n"); 768 if (BI && BI->isConditional()) { 769 BasicBlock *S0 = BI->getSuccessor(0); 770 BasicBlock *S1 = BI->getSuccessor(1); 771 CHR_DEBUG(dbgs() << "S0 " << S0->getName() << "\n"); 772 CHR_DEBUG(dbgs() << "S1 " << S1->getName() << "\n"); 773 if (S0 != S1 && (S0 == Exit || S1 == Exit)) { 774 RegInfo RI(R); 775 RI.HasBranch = CheckBiasedBranch( 776 BI, R, TrueBiasedRegionsGlobal, FalseBiasedRegionsGlobal, 777 BranchBiasMap); 778 Result = new CHRScope(RI); 779 Scopes.insert(Result); 780 CHR_DEBUG(dbgs() << "Found a region with a branch\n"); 781 ++Stats.NumBranches; 782 } 783 } 784 } 785 { 786 // Try to look for selects in the direct child blocks (as opposed to in 787 // subregions) of R. 788 // ... 789 // if (..) { // Some subregion 790 // ... 791 // } 792 // if (..) { // Some subregion 793 // ... 794 // } 795 // ... 796 // a = cond ? b : c; 797 // ... 798 SmallVector<SelectInst *, 8> Selects; 799 for (RegionNode *E : R->elements()) { 800 if (E->isSubRegion()) 801 continue; 802 // This returns the basic block of E if E is a direct child of R (not a 803 // subregion.) 804 BasicBlock *BB = E->getEntry(); 805 // Need to push in the order to make it easier to find the first Select 806 // later. 807 for (Instruction &I : *BB) { 808 if (auto *SI = dyn_cast<SelectInst>(&I)) { 809 Selects.push_back(SI); 810 ++Stats.NumBranches; 811 } 812 } 813 } 814 if (Selects.size() > 0) { 815 auto AddSelects = [&](RegInfo &RI) { 816 for (auto *SI : Selects) 817 if (CheckBiasedSelect(SI, RI.R, 818 TrueBiasedSelectsGlobal, 819 FalseBiasedSelectsGlobal, 820 SelectBiasMap)) 821 RI.Selects.push_back(SI); 822 }; 823 if (!Result) { 824 CHR_DEBUG(dbgs() << "Found a select-only region\n"); 825 RegInfo RI(R); 826 AddSelects(RI); 827 Result = new CHRScope(RI); 828 Scopes.insert(Result); 829 } else { 830 CHR_DEBUG(dbgs() << "Found select(s) in a region with a branch\n"); 831 AddSelects(Result->RegInfos[0]); 832 } 833 } 834 } 835 836 if (Result) { 837 checkScopeHoistable(Result); 838 } 839 return Result; 840 } 841 842 // Check that any of the branch and the selects in the region could be 843 // hoisted above the the CHR branch insert point (the most dominating of 844 // them, either the branch (at the end of the first block) or the first 845 // select in the first block). If the branch can't be hoisted, drop the 846 // selects in the first blocks. 847 // 848 // For example, for the following scope/region with selects, we want to insert 849 // the merged branch right before the first select in the first/entry block by 850 // hoisting c1, c2, c3, and c4. 851 // 852 // // Branch insert point here. 853 // a = c1 ? b : c; // Select 1 854 // d = c2 ? e : f; // Select 2 855 // if (c3) { // Branch 856 // ... 857 // c4 = foo() // A call. 858 // g = c4 ? h : i; // Select 3 859 // } 860 // 861 // But suppose we can't hoist c4 because it's dependent on the preceding 862 // call. Then, we drop Select 3. Furthermore, if we can't hoist c2, we also drop 863 // Select 2. If we can't hoist c3, we drop Selects 1 & 2. 864 void CHR::checkScopeHoistable(CHRScope *Scope) { 865 RegInfo &RI = Scope->RegInfos[0]; 866 Region *R = RI.R; 867 BasicBlock *EntryBB = R->getEntry(); 868 auto *Branch = RI.HasBranch ? 869 cast<BranchInst>(EntryBB->getTerminator()) : nullptr; 870 SmallVector<SelectInst *, 8> &Selects = RI.Selects; 871 if (RI.HasBranch || !Selects.empty()) { 872 Instruction *InsertPoint = getBranchInsertPoint(RI); 873 CHR_DEBUG(dbgs() << "InsertPoint " << *InsertPoint << "\n"); 874 // Avoid a data dependence from a select or a branch to a(nother) 875 // select. Note no instruction can't data-depend on a branch (a branch 876 // instruction doesn't produce a value). 877 DenseSet<Instruction *> Unhoistables; 878 // Initialize Unhoistables with the selects. 879 for (SelectInst *SI : Selects) { 880 Unhoistables.insert(SI); 881 } 882 // Remove Selects that can't be hoisted. 883 for (auto it = Selects.begin(); it != Selects.end(); ) { 884 SelectInst *SI = *it; 885 if (SI == InsertPoint) { 886 ++it; 887 continue; 888 } 889 bool IsHoistable = checkHoistValue(SI->getCondition(), InsertPoint, 890 DT, Unhoistables, nullptr); 891 if (!IsHoistable) { 892 CHR_DEBUG(dbgs() << "Dropping select " << *SI << "\n"); 893 it = Selects.erase(it); 894 // Since we are dropping the select here, we also drop it from 895 // Unhoistables. 896 Unhoistables.erase(SI); 897 } else 898 ++it; 899 } 900 // Update InsertPoint after potentially removing selects. 901 InsertPoint = getBranchInsertPoint(RI); 902 CHR_DEBUG(dbgs() << "InsertPoint " << *InsertPoint << "\n"); 903 if (RI.HasBranch && InsertPoint != Branch) { 904 bool IsHoistable = checkHoistValue(Branch->getCondition(), InsertPoint, 905 DT, Unhoistables, nullptr); 906 if (!IsHoistable) { 907 // If the branch isn't hoistable, drop the selects in the entry 908 // block, preferring the branch, which makes the branch the hoist 909 // point. 910 assert(InsertPoint != Branch && "Branch must not be the hoist point"); 911 CHR_DEBUG(dbgs() << "Dropping selects in entry block \n"); 912 CHR_DEBUG( 913 for (SelectInst *SI : Selects) { 914 dbgs() << "SI " << *SI << "\n"; 915 }); 916 Selects.erase(std::remove_if(Selects.begin(), Selects.end(), 917 [EntryBB](SelectInst *SI) { 918 return SI->getParent() == EntryBB; 919 }), Selects.end()); 920 Unhoistables.clear(); 921 InsertPoint = Branch; 922 } 923 } 924 CHR_DEBUG(dbgs() << "InsertPoint " << *InsertPoint << "\n"); 925 #ifndef NDEBUG 926 if (RI.HasBranch) { 927 assert(!DT.dominates(Branch, InsertPoint) && 928 "Branch can't be already above the hoist point"); 929 assert(checkHoistValue(Branch->getCondition(), InsertPoint, 930 DT, Unhoistables, nullptr) && 931 "checkHoistValue for branch"); 932 } 933 for (auto *SI : Selects) { 934 assert(!DT.dominates(SI, InsertPoint) && 935 "SI can't be already above the hoist point"); 936 assert(checkHoistValue(SI->getCondition(), InsertPoint, DT, 937 Unhoistables, nullptr) && 938 "checkHoistValue for selects"); 939 } 940 CHR_DEBUG(dbgs() << "Result\n"); 941 if (RI.HasBranch) { 942 CHR_DEBUG(dbgs() << "BI " << *Branch << "\n"); 943 } 944 for (auto *SI : Selects) { 945 CHR_DEBUG(dbgs() << "SI " << *SI << "\n"); 946 } 947 #endif 948 } 949 } 950 951 // Traverse the region tree, find all nested scopes and merge them if possible. 952 CHRScope * CHR::findScopes(Region *R, Region *NextRegion, Region *ParentRegion, 953 SmallVectorImpl<CHRScope *> &Scopes) { 954 CHR_DEBUG(dbgs() << "findScopes " << R->getNameStr() << "\n"); 955 CHRScope *Result = findScope(R); 956 // Visit subscopes. 957 CHRScope *ConsecutiveSubscope = nullptr; 958 SmallVector<CHRScope *, 8> Subscopes; 959 for (auto It = R->begin(); It != R->end(); ++It) { 960 const std::unique_ptr<Region> &SubR = *It; 961 auto Next_It = std::next(It); 962 Region *NextSubR = Next_It != R->end() ? Next_It->get() : nullptr; 963 CHR_DEBUG(dbgs() << "Looking at subregion " << SubR.get()->getNameStr() 964 << "\n"); 965 CHRScope *SubCHRScope = findScopes(SubR.get(), NextSubR, R, Scopes); 966 if (SubCHRScope) { 967 CHR_DEBUG(dbgs() << "Subregion Scope " << *SubCHRScope << "\n"); 968 } else { 969 CHR_DEBUG(dbgs() << "Subregion Scope null\n"); 970 } 971 if (SubCHRScope) { 972 if (!ConsecutiveSubscope) 973 ConsecutiveSubscope = SubCHRScope; 974 else if (!ConsecutiveSubscope->appendable(SubCHRScope)) { 975 Subscopes.push_back(ConsecutiveSubscope); 976 ConsecutiveSubscope = SubCHRScope; 977 } else 978 ConsecutiveSubscope->append(SubCHRScope); 979 } else { 980 if (ConsecutiveSubscope) { 981 Subscopes.push_back(ConsecutiveSubscope); 982 } 983 ConsecutiveSubscope = nullptr; 984 } 985 } 986 if (ConsecutiveSubscope) { 987 Subscopes.push_back(ConsecutiveSubscope); 988 } 989 for (CHRScope *Sub : Subscopes) { 990 if (Result) { 991 // Combine it with the parent. 992 Result->addSub(Sub); 993 } else { 994 // Push Subscopes as they won't be combined with the parent. 995 Scopes.push_back(Sub); 996 } 997 } 998 return Result; 999 } 1000 1001 static DenseSet<Value *> getCHRConditionValuesForRegion(RegInfo &RI) { 1002 DenseSet<Value *> ConditionValues; 1003 if (RI.HasBranch) { 1004 auto *BI = cast<BranchInst>(RI.R->getEntry()->getTerminator()); 1005 ConditionValues.insert(BI->getCondition()); 1006 } 1007 for (SelectInst *SI : RI.Selects) { 1008 ConditionValues.insert(SI->getCondition()); 1009 } 1010 return ConditionValues; 1011 } 1012 1013 1014 // Determine whether to split a scope depending on the sets of the branch 1015 // condition values of the previous region and the current region. We split 1016 // (return true) it if 1) the condition values of the inner/lower scope can't be 1017 // hoisted up to the outer/upper scope, or 2) the two sets of the condition 1018 // values have an empty intersection (because the combined branch conditions 1019 // won't probably lead to a simpler combined condition). 1020 static bool shouldSplit(Instruction *InsertPoint, 1021 DenseSet<Value *> &PrevConditionValues, 1022 DenseSet<Value *> &ConditionValues, 1023 DominatorTree &DT, 1024 DenseSet<Instruction *> &Unhoistables) { 1025 CHR_DEBUG( 1026 dbgs() << "shouldSplit " << *InsertPoint << " PrevConditionValues "; 1027 for (Value *V : PrevConditionValues) { 1028 dbgs() << *V << ", "; 1029 } 1030 dbgs() << " ConditionValues "; 1031 for (Value *V : ConditionValues) { 1032 dbgs() << *V << ", "; 1033 } 1034 dbgs() << "\n"); 1035 assert(InsertPoint && "Null InsertPoint"); 1036 // If any of Bases isn't hoistable to the hoist point, split. 1037 for (Value *V : ConditionValues) { 1038 if (!checkHoistValue(V, InsertPoint, DT, Unhoistables, nullptr)) { 1039 CHR_DEBUG(dbgs() << "Split. checkHoistValue false " << *V << "\n"); 1040 return true; // Not hoistable, split. 1041 } 1042 } 1043 // If PrevConditionValues or ConditionValues is empty, don't split to avoid 1044 // unnecessary splits at scopes with no branch/selects. If 1045 // PrevConditionValues and ConditionValues don't intersect at all, split. 1046 if (!PrevConditionValues.empty() && !ConditionValues.empty()) { 1047 // Use std::set as DenseSet doesn't work with set_intersection. 1048 std::set<Value *> PrevBases, Bases; 1049 for (Value *V : PrevConditionValues) { 1050 std::set<Value *> BaseValues = getBaseValues(V, DT); 1051 PrevBases.insert(BaseValues.begin(), BaseValues.end()); 1052 } 1053 for (Value *V : ConditionValues) { 1054 std::set<Value *> BaseValues = getBaseValues(V, DT); 1055 Bases.insert(BaseValues.begin(), BaseValues.end()); 1056 } 1057 CHR_DEBUG( 1058 dbgs() << "PrevBases "; 1059 for (Value *V : PrevBases) { 1060 dbgs() << *V << ", "; 1061 } 1062 dbgs() << " Bases "; 1063 for (Value *V : Bases) { 1064 dbgs() << *V << ", "; 1065 } 1066 dbgs() << "\n"); 1067 std::set<Value *> Intersection; 1068 std::set_intersection(PrevBases.begin(), PrevBases.end(), 1069 Bases.begin(), Bases.end(), 1070 std::inserter(Intersection, Intersection.begin())); 1071 if (Intersection.empty()) { 1072 // Empty intersection, split. 1073 CHR_DEBUG(dbgs() << "Split. Intersection empty\n"); 1074 return true; 1075 } 1076 } 1077 CHR_DEBUG(dbgs() << "No split\n"); 1078 return false; // Don't split. 1079 } 1080 1081 static void GetSelectsInScope(CHRScope *Scope, 1082 DenseSet<Instruction *> &Output) { 1083 for (RegInfo &RI : Scope->RegInfos) { 1084 for (SelectInst *SI : RI.Selects) { 1085 Output.insert(SI); 1086 } 1087 } 1088 for (CHRScope *Sub : Scope->Subs) { 1089 GetSelectsInScope(Sub, Output); 1090 } 1091 } 1092 1093 void CHR::splitScopes(SmallVectorImpl<CHRScope *> &Input, 1094 SmallVectorImpl<CHRScope *> &Output) { 1095 for (CHRScope *Scope : Input) { 1096 assert(!Scope->BranchInsertPoint && 1097 "BranchInsertPoint must not be set"); 1098 DenseSet<Instruction *> Unhoistables; 1099 GetSelectsInScope(Scope, Unhoistables); 1100 splitScope(Scope, nullptr, nullptr, nullptr, Output, Unhoistables); 1101 } 1102 #ifndef NDEBUG 1103 for (CHRScope *Scope : Output) { 1104 assert(Scope->BranchInsertPoint && "BranchInsertPoint must be set"); 1105 } 1106 #endif 1107 } 1108 1109 SmallVector<CHRScope *, 8> CHR::splitScope( 1110 CHRScope *Scope, 1111 CHRScope *Outer, 1112 DenseSet<Value *> *OuterConditionValues, 1113 Instruction *OuterInsertPoint, 1114 SmallVectorImpl<CHRScope *> &Output, 1115 DenseSet<Instruction *> &Unhoistables) { 1116 if (Outer) { 1117 assert(OuterConditionValues && "Null OuterConditionValues"); 1118 assert(OuterInsertPoint && "Null OuterInsertPoint"); 1119 } 1120 bool PrevSplitFromOuter = true; 1121 DenseSet<Value *> PrevConditionValues; 1122 Instruction *PrevInsertPoint = nullptr; 1123 SmallVector<CHRScope *, 8> Splits; 1124 SmallVector<bool, 8> SplitsSplitFromOuter; 1125 SmallVector<DenseSet<Value *>, 8> SplitsConditionValues; 1126 SmallVector<Instruction *, 8> SplitsInsertPoints; 1127 SmallVector<RegInfo, 8> RegInfos(Scope->RegInfos); // Copy 1128 for (RegInfo &RI : RegInfos) { 1129 Instruction *InsertPoint = getBranchInsertPoint(RI); 1130 DenseSet<Value *> ConditionValues = getCHRConditionValuesForRegion(RI); 1131 CHR_DEBUG( 1132 dbgs() << "ConditionValues "; 1133 for (Value *V : ConditionValues) { 1134 dbgs() << *V << ", "; 1135 } 1136 dbgs() << "\n"); 1137 if (RI.R == RegInfos[0].R) { 1138 // First iteration. Check to see if we should split from the outer. 1139 if (Outer) { 1140 CHR_DEBUG(dbgs() << "Outer " << *Outer << "\n"); 1141 CHR_DEBUG(dbgs() << "Should split from outer at " 1142 << RI.R->getNameStr() << "\n"); 1143 if (shouldSplit(OuterInsertPoint, *OuterConditionValues, 1144 ConditionValues, DT, Unhoistables)) { 1145 PrevConditionValues = ConditionValues; 1146 PrevInsertPoint = InsertPoint; 1147 } else { 1148 // Not splitting from the outer. Use the outer bases and insert 1149 // point. Union the bases. 1150 PrevSplitFromOuter = false; 1151 PrevConditionValues = *OuterConditionValues; 1152 PrevConditionValues.insert(ConditionValues.begin(), 1153 ConditionValues.end()); 1154 PrevInsertPoint = OuterInsertPoint; 1155 } 1156 } else { 1157 CHR_DEBUG(dbgs() << "Outer null\n"); 1158 PrevConditionValues = ConditionValues; 1159 PrevInsertPoint = InsertPoint; 1160 } 1161 } else { 1162 CHR_DEBUG(dbgs() << "Should split from prev at " 1163 << RI.R->getNameStr() << "\n"); 1164 if (shouldSplit(PrevInsertPoint, PrevConditionValues, ConditionValues, 1165 DT, Unhoistables)) { 1166 CHRScope *Tail = Scope->split(RI.R); 1167 Scopes.insert(Tail); 1168 Splits.push_back(Scope); 1169 SplitsSplitFromOuter.push_back(PrevSplitFromOuter); 1170 SplitsConditionValues.push_back(PrevConditionValues); 1171 SplitsInsertPoints.push_back(PrevInsertPoint); 1172 Scope = Tail; 1173 PrevConditionValues = ConditionValues; 1174 PrevInsertPoint = InsertPoint; 1175 PrevSplitFromOuter = true; 1176 } else { 1177 // Not splitting. Union the bases. Keep the hoist point. 1178 PrevConditionValues.insert(ConditionValues.begin(), ConditionValues.end()); 1179 } 1180 } 1181 } 1182 Splits.push_back(Scope); 1183 SplitsSplitFromOuter.push_back(PrevSplitFromOuter); 1184 SplitsConditionValues.push_back(PrevConditionValues); 1185 assert(PrevInsertPoint && "Null PrevInsertPoint"); 1186 SplitsInsertPoints.push_back(PrevInsertPoint); 1187 assert(Splits.size() == SplitsConditionValues.size() && 1188 Splits.size() == SplitsSplitFromOuter.size() && 1189 Splits.size() == SplitsInsertPoints.size() && "Mismatching sizes"); 1190 for (size_t I = 0; I < Splits.size(); ++I) { 1191 CHRScope *Split = Splits[I]; 1192 DenseSet<Value *> &SplitConditionValues = SplitsConditionValues[I]; 1193 Instruction *SplitInsertPoint = SplitsInsertPoints[I]; 1194 SmallVector<CHRScope *, 8> NewSubs; 1195 DenseSet<Instruction *> SplitUnhoistables; 1196 GetSelectsInScope(Split, SplitUnhoistables); 1197 for (CHRScope *Sub : Split->Subs) { 1198 SmallVector<CHRScope *, 8> SubSplits = splitScope( 1199 Sub, Split, &SplitConditionValues, SplitInsertPoint, Output, 1200 SplitUnhoistables); 1201 NewSubs.insert(NewSubs.end(), SubSplits.begin(), SubSplits.end()); 1202 } 1203 Split->Subs = NewSubs; 1204 } 1205 SmallVector<CHRScope *, 8> Result; 1206 for (size_t I = 0; I < Splits.size(); ++I) { 1207 CHRScope *Split = Splits[I]; 1208 if (SplitsSplitFromOuter[I]) { 1209 // Split from the outer. 1210 Output.push_back(Split); 1211 Split->BranchInsertPoint = SplitsInsertPoints[I]; 1212 CHR_DEBUG(dbgs() << "BranchInsertPoint " << *SplitsInsertPoints[I] 1213 << "\n"); 1214 } else { 1215 // Connected to the outer. 1216 Result.push_back(Split); 1217 } 1218 } 1219 if (!Outer) 1220 assert(Result.empty() && 1221 "If no outer (top-level), must return no nested ones"); 1222 return Result; 1223 } 1224 1225 void CHR::classifyBiasedScopes(SmallVectorImpl<CHRScope *> &Scopes) { 1226 for (CHRScope *Scope : Scopes) { 1227 assert(Scope->TrueBiasedRegions.empty() && Scope->FalseBiasedRegions.empty() && "Empty"); 1228 classifyBiasedScopes(Scope, Scope); 1229 CHR_DEBUG( 1230 dbgs() << "classifyBiasedScopes " << *Scope << "\n"; 1231 dbgs() << "TrueBiasedRegions "; 1232 for (Region *R : Scope->TrueBiasedRegions) { 1233 dbgs() << R->getNameStr() << ", "; 1234 } 1235 dbgs() << "\n"; 1236 dbgs() << "FalseBiasedRegions "; 1237 for (Region *R : Scope->FalseBiasedRegions) { 1238 dbgs() << R->getNameStr() << ", "; 1239 } 1240 dbgs() << "\n"; 1241 dbgs() << "TrueBiasedSelects "; 1242 for (SelectInst *SI : Scope->TrueBiasedSelects) { 1243 dbgs() << *SI << ", "; 1244 } 1245 dbgs() << "\n"; 1246 dbgs() << "FalseBiasedSelects "; 1247 for (SelectInst *SI : Scope->FalseBiasedSelects) { 1248 dbgs() << *SI << ", "; 1249 } 1250 dbgs() << "\n";); 1251 } 1252 } 1253 1254 void CHR::classifyBiasedScopes(CHRScope *Scope, CHRScope *OutermostScope) { 1255 for (RegInfo &RI : Scope->RegInfos) { 1256 if (RI.HasBranch) { 1257 Region *R = RI.R; 1258 if (TrueBiasedRegionsGlobal.count(R) > 0) 1259 OutermostScope->TrueBiasedRegions.insert(R); 1260 else if (FalseBiasedRegionsGlobal.count(R) > 0) 1261 OutermostScope->FalseBiasedRegions.insert(R); 1262 else 1263 llvm_unreachable("Must be biased"); 1264 } 1265 for (SelectInst *SI : RI.Selects) { 1266 if (TrueBiasedSelectsGlobal.count(SI) > 0) 1267 OutermostScope->TrueBiasedSelects.insert(SI); 1268 else if (FalseBiasedSelectsGlobal.count(SI) > 0) 1269 OutermostScope->FalseBiasedSelects.insert(SI); 1270 else 1271 llvm_unreachable("Must be biased"); 1272 } 1273 } 1274 for (CHRScope *Sub : Scope->Subs) { 1275 classifyBiasedScopes(Sub, OutermostScope); 1276 } 1277 } 1278 1279 static bool hasAtLeastTwoBiasedBranches(CHRScope *Scope) { 1280 unsigned NumBiased = Scope->TrueBiasedRegions.size() + 1281 Scope->FalseBiasedRegions.size() + 1282 Scope->TrueBiasedSelects.size() + 1283 Scope->FalseBiasedSelects.size(); 1284 return NumBiased >= CHRMergeThreshold; 1285 } 1286 1287 void CHR::filterScopes(SmallVectorImpl<CHRScope *> &Input, 1288 SmallVectorImpl<CHRScope *> &Output) { 1289 for (CHRScope *Scope : Input) { 1290 // Filter out the ones with only one region and no subs. 1291 if (!hasAtLeastTwoBiasedBranches(Scope)) { 1292 CHR_DEBUG(dbgs() << "Filtered out by biased branches truthy-regions " 1293 << Scope->TrueBiasedRegions.size() 1294 << " falsy-regions " << Scope->FalseBiasedRegions.size() 1295 << " true-selects " << Scope->TrueBiasedSelects.size() 1296 << " false-selects " << Scope->FalseBiasedSelects.size() << "\n"); 1297 continue; 1298 } 1299 Output.push_back(Scope); 1300 } 1301 } 1302 1303 void CHR::setCHRRegions(SmallVectorImpl<CHRScope *> &Input, 1304 SmallVectorImpl<CHRScope *> &Output) { 1305 for (CHRScope *Scope : Input) { 1306 assert(Scope->HoistStopMap.empty() && Scope->CHRRegions.empty() && 1307 "Empty"); 1308 setCHRRegions(Scope, Scope); 1309 Output.push_back(Scope); 1310 CHR_DEBUG( 1311 dbgs() << "setCHRRegions HoistStopMap " << *Scope << "\n"; 1312 for (auto pair : Scope->HoistStopMap) { 1313 Region *R = pair.first; 1314 dbgs() << "Region " << R->getNameStr() << "\n"; 1315 for (Instruction *I : pair.second) { 1316 dbgs() << "HoistStop " << *I << "\n"; 1317 } 1318 } 1319 dbgs() << "CHRRegions" << "\n"; 1320 for (RegInfo &RI : Scope->CHRRegions) { 1321 dbgs() << RI.R->getNameStr() << "\n"; 1322 }); 1323 } 1324 } 1325 1326 void CHR::setCHRRegions(CHRScope *Scope, CHRScope *OutermostScope) { 1327 DenseSet<Instruction *> Unhoistables; 1328 // Put the biased selects in Unhoistables because they should stay where they 1329 // are and constant-folded after CHR (in case one biased select or a branch 1330 // can depend on another biased select.) 1331 for (RegInfo &RI : Scope->RegInfos) { 1332 for (SelectInst *SI : RI.Selects) { 1333 Unhoistables.insert(SI); 1334 } 1335 } 1336 Instruction *InsertPoint = OutermostScope->BranchInsertPoint; 1337 for (RegInfo &RI : Scope->RegInfos) { 1338 Region *R = RI.R; 1339 DenseSet<Instruction *> HoistStops; 1340 bool IsHoisted = false; 1341 if (RI.HasBranch) { 1342 assert((OutermostScope->TrueBiasedRegions.count(R) > 0 || 1343 OutermostScope->FalseBiasedRegions.count(R) > 0) && 1344 "Must be truthy or falsy"); 1345 auto *BI = cast<BranchInst>(R->getEntry()->getTerminator()); 1346 // Note checkHoistValue fills in HoistStops. 1347 bool IsHoistable = checkHoistValue(BI->getCondition(), InsertPoint, DT, 1348 Unhoistables, &HoistStops); 1349 assert(IsHoistable && "Must be hoistable"); 1350 (void)(IsHoistable); // Unused in release build 1351 IsHoisted = true; 1352 } 1353 for (SelectInst *SI : RI.Selects) { 1354 assert((OutermostScope->TrueBiasedSelects.count(SI) > 0 || 1355 OutermostScope->FalseBiasedSelects.count(SI) > 0) && 1356 "Must be true or false biased"); 1357 // Note checkHoistValue fills in HoistStops. 1358 bool IsHoistable = checkHoistValue(SI->getCondition(), InsertPoint, DT, 1359 Unhoistables, &HoistStops); 1360 assert(IsHoistable && "Must be hoistable"); 1361 (void)(IsHoistable); // Unused in release build 1362 IsHoisted = true; 1363 } 1364 if (IsHoisted) { 1365 OutermostScope->CHRRegions.push_back(RI); 1366 OutermostScope->HoistStopMap[R] = HoistStops; 1367 } 1368 } 1369 for (CHRScope *Sub : Scope->Subs) 1370 setCHRRegions(Sub, OutermostScope); 1371 } 1372 1373 bool CHRScopeSorter(CHRScope *Scope1, CHRScope *Scope2) { 1374 return Scope1->RegInfos[0].R->getDepth() < Scope2->RegInfos[0].R->getDepth(); 1375 } 1376 1377 void CHR::sortScopes(SmallVectorImpl<CHRScope *> &Input, 1378 SmallVectorImpl<CHRScope *> &Output) { 1379 Output.resize(Input.size()); 1380 std::copy(Input.begin(), Input.end(), Output.begin()); 1381 std::stable_sort(Output.begin(), Output.end(), CHRScopeSorter); 1382 } 1383 1384 // Return true if V is already hoisted or was hoisted (along with its operands) 1385 // to the insert point. 1386 static void hoistValue(Value *V, Instruction *HoistPoint, Region *R, 1387 HoistStopMapTy &HoistStopMap, 1388 DenseSet<Instruction *> &HoistedSet, 1389 DenseSet<PHINode *> &TrivialPHIs) { 1390 auto IT = HoistStopMap.find(R); 1391 assert(IT != HoistStopMap.end() && "Region must be in hoist stop map"); 1392 DenseSet<Instruction *> &HoistStops = IT->second; 1393 if (auto *I = dyn_cast<Instruction>(V)) { 1394 if (I == HoistPoint) 1395 return; 1396 if (HoistStops.count(I)) 1397 return; 1398 if (auto *PN = dyn_cast<PHINode>(I)) 1399 if (TrivialPHIs.count(PN)) 1400 // The trivial phi inserted by the previous CHR scope could replace a 1401 // non-phi in HoistStops. Note that since this phi is at the exit of a 1402 // previous CHR scope, which dominates this scope, it's safe to stop 1403 // hoisting there. 1404 return; 1405 if (HoistedSet.count(I)) 1406 // Already hoisted, return. 1407 return; 1408 assert(isHoistableInstructionType(I) && "Unhoistable instruction type"); 1409 for (Value *Op : I->operands()) { 1410 hoistValue(Op, HoistPoint, R, HoistStopMap, HoistedSet, TrivialPHIs); 1411 } 1412 I->moveBefore(HoistPoint); 1413 HoistedSet.insert(I); 1414 CHR_DEBUG(dbgs() << "hoistValue " << *I << "\n"); 1415 } 1416 } 1417 1418 // Hoist the dependent condition values of the branches and the selects in the 1419 // scope to the insert point. 1420 static void hoistScopeConditions(CHRScope *Scope, Instruction *HoistPoint, 1421 DenseSet<PHINode *> &TrivialPHIs) { 1422 DenseSet<Instruction *> HoistedSet; 1423 for (const RegInfo &RI : Scope->CHRRegions) { 1424 Region *R = RI.R; 1425 bool IsTrueBiased = Scope->TrueBiasedRegions.count(R); 1426 bool IsFalseBiased = Scope->FalseBiasedRegions.count(R); 1427 if (RI.HasBranch && (IsTrueBiased || IsFalseBiased)) { 1428 auto *BI = cast<BranchInst>(R->getEntry()->getTerminator()); 1429 hoistValue(BI->getCondition(), HoistPoint, R, Scope->HoistStopMap, 1430 HoistedSet, TrivialPHIs); 1431 } 1432 for (SelectInst *SI : RI.Selects) { 1433 bool IsTrueBiased = Scope->TrueBiasedSelects.count(SI); 1434 bool IsFalseBiased = Scope->FalseBiasedSelects.count(SI); 1435 if (!(IsTrueBiased || IsFalseBiased)) 1436 continue; 1437 hoistValue(SI->getCondition(), HoistPoint, R, Scope->HoistStopMap, 1438 HoistedSet, TrivialPHIs); 1439 } 1440 } 1441 } 1442 1443 // Negate the predicate if an ICmp if it's used only by branches or selects by 1444 // swapping the operands of the branches or the selects. Returns true if success. 1445 static bool NegateICmpIfUsedByBranchOrSelectOnly(ICmpInst *ICmp, 1446 Instruction *ExcludedUser, 1447 CHRScope *Scope) { 1448 for (User *U : ICmp->users()) { 1449 if (U == ExcludedUser) 1450 continue; 1451 if (isa<BranchInst>(U) && cast<BranchInst>(U)->isConditional()) 1452 continue; 1453 if (isa<SelectInst>(U) && cast<SelectInst>(U)->getCondition() == ICmp) 1454 continue; 1455 return false; 1456 } 1457 for (User *U : ICmp->users()) { 1458 if (U == ExcludedUser) 1459 continue; 1460 if (auto *BI = dyn_cast<BranchInst>(U)) { 1461 assert(BI->isConditional() && "Must be conditional"); 1462 BI->swapSuccessors(); 1463 // Don't need to swap this in terms of 1464 // TrueBiasedRegions/FalseBiasedRegions because true-based/false-based 1465 // mean whehter the branch is likely go into the if-then rather than 1466 // successor0/successor1 and because we can tell which edge is the then or 1467 // the else one by comparing the destination to the region exit block. 1468 continue; 1469 } 1470 if (auto *SI = dyn_cast<SelectInst>(U)) { 1471 // Swap operands 1472 Value *TrueValue = SI->getTrueValue(); 1473 Value *FalseValue = SI->getFalseValue(); 1474 SI->setTrueValue(FalseValue); 1475 SI->setFalseValue(TrueValue); 1476 SI->swapProfMetadata(); 1477 if (Scope->TrueBiasedSelects.count(SI)) { 1478 assert(Scope->FalseBiasedSelects.count(SI) == 0 && 1479 "Must not be already in"); 1480 Scope->FalseBiasedSelects.insert(SI); 1481 } else if (Scope->FalseBiasedSelects.count(SI)) { 1482 assert(Scope->TrueBiasedSelects.count(SI) == 0 && 1483 "Must not be already in"); 1484 Scope->TrueBiasedSelects.insert(SI); 1485 } 1486 continue; 1487 } 1488 llvm_unreachable("Must be a branch or a select"); 1489 } 1490 ICmp->setPredicate(CmpInst::getInversePredicate(ICmp->getPredicate())); 1491 return true; 1492 } 1493 1494 // A helper for transformScopes. Insert a trivial phi at the scope exit block 1495 // for a value that's defined in the scope but used outside it (meaning it's 1496 // alive at the exit block). 1497 static void insertTrivialPHIs(CHRScope *Scope, 1498 BasicBlock *EntryBlock, BasicBlock *ExitBlock, 1499 DenseSet<PHINode *> &TrivialPHIs) { 1500 DenseSet<BasicBlock *> BlocksInScopeSet; 1501 SmallVector<BasicBlock *, 8> BlocksInScopeVec; 1502 for (RegInfo &RI : Scope->RegInfos) { 1503 for (BasicBlock *BB : RI.R->blocks()) { // This includes the blocks in the 1504 // sub-Scopes. 1505 BlocksInScopeSet.insert(BB); 1506 BlocksInScopeVec.push_back(BB); 1507 } 1508 } 1509 CHR_DEBUG( 1510 dbgs() << "Inserting redudant phis\n"; 1511 for (BasicBlock *BB : BlocksInScopeVec) { 1512 dbgs() << "BlockInScope " << BB->getName() << "\n"; 1513 }); 1514 for (BasicBlock *BB : BlocksInScopeVec) { 1515 for (Instruction &I : *BB) { 1516 SmallVector<Instruction *, 8> Users; 1517 for (User *U : I.users()) { 1518 if (auto *UI = dyn_cast<Instruction>(U)) { 1519 if (BlocksInScopeSet.count(UI->getParent()) == 0 && 1520 // Unless there's already a phi for I at the exit block. 1521 !(isa<PHINode>(UI) && UI->getParent() == ExitBlock)) { 1522 CHR_DEBUG(dbgs() << "V " << I << "\n"); 1523 CHR_DEBUG(dbgs() << "Used outside scope by user " << *UI << "\n"); 1524 Users.push_back(UI); 1525 } else if (UI->getParent() == EntryBlock && isa<PHINode>(UI)) { 1526 // There's a loop backedge from a block that's dominated by this 1527 // scope to the entry block. 1528 CHR_DEBUG(dbgs() << "V " << I << "\n"); 1529 CHR_DEBUG(dbgs() 1530 << "Used at entry block (for a back edge) by a phi user " 1531 << *UI << "\n"); 1532 Users.push_back(UI); 1533 } 1534 } 1535 } 1536 if (Users.size() > 0) { 1537 // Insert a trivial phi for I (phi [&I, P0], [&I, P1], ...) at 1538 // ExitBlock. Replace I with the new phi in UI unless UI is another 1539 // phi at ExitBlock. 1540 unsigned PredCount = std::distance(pred_begin(ExitBlock), 1541 pred_end(ExitBlock)); 1542 PHINode *PN = PHINode::Create(I.getType(), PredCount, "", 1543 &ExitBlock->front()); 1544 for (BasicBlock *Pred : predecessors(ExitBlock)) { 1545 PN->addIncoming(&I, Pred); 1546 } 1547 TrivialPHIs.insert(PN); 1548 CHR_DEBUG(dbgs() << "Insert phi " << *PN << "\n"); 1549 for (Instruction *UI : Users) { 1550 for (unsigned J = 0, NumOps = UI->getNumOperands(); J < NumOps; ++J) { 1551 if (UI->getOperand(J) == &I) { 1552 UI->setOperand(J, PN); 1553 } 1554 } 1555 CHR_DEBUG(dbgs() << "Updated user " << *UI << "\n"); 1556 } 1557 } 1558 } 1559 } 1560 } 1561 1562 // Assert that all the CHR regions of the scope have a biased branch or select. 1563 static void assertCHRRegionsHaveBiasedBranchOrSelect(CHRScope *Scope) { 1564 #ifndef NDEBUG 1565 auto HasBiasedBranchOrSelect = [](RegInfo &RI, CHRScope *Scope) { 1566 if (Scope->TrueBiasedRegions.count(RI.R) || 1567 Scope->FalseBiasedRegions.count(RI.R)) 1568 return true; 1569 for (SelectInst *SI : RI.Selects) 1570 if (Scope->TrueBiasedSelects.count(SI) || 1571 Scope->FalseBiasedSelects.count(SI)) 1572 return true; 1573 return false; 1574 }; 1575 for (RegInfo &RI : Scope->CHRRegions) { 1576 assert(HasBiasedBranchOrSelect(RI, Scope) && 1577 "Must have biased branch or select"); 1578 } 1579 #endif 1580 } 1581 1582 // Assert that all the condition values of the biased branches and selects have 1583 // been hoisted to the pre-entry block or outside of the scope. 1584 static void assertBranchOrSelectConditionHoisted(CHRScope *Scope, 1585 BasicBlock *PreEntryBlock) { 1586 CHR_DEBUG(dbgs() << "Biased regions condition values \n"); 1587 for (RegInfo &RI : Scope->CHRRegions) { 1588 Region *R = RI.R; 1589 bool IsTrueBiased = Scope->TrueBiasedRegions.count(R); 1590 bool IsFalseBiased = Scope->FalseBiasedRegions.count(R); 1591 if (RI.HasBranch && (IsTrueBiased || IsFalseBiased)) { 1592 auto *BI = cast<BranchInst>(R->getEntry()->getTerminator()); 1593 Value *V = BI->getCondition(); 1594 CHR_DEBUG(dbgs() << *V << "\n"); 1595 if (auto *I = dyn_cast<Instruction>(V)) { 1596 assert((I->getParent() == PreEntryBlock || 1597 !Scope->contains(I)) && 1598 "Must have been hoisted to PreEntryBlock or outside the scope"); 1599 } 1600 } 1601 for (SelectInst *SI : RI.Selects) { 1602 bool IsTrueBiased = Scope->TrueBiasedSelects.count(SI); 1603 bool IsFalseBiased = Scope->FalseBiasedSelects.count(SI); 1604 if (!(IsTrueBiased || IsFalseBiased)) 1605 continue; 1606 Value *V = SI->getCondition(); 1607 CHR_DEBUG(dbgs() << *V << "\n"); 1608 if (auto *I = dyn_cast<Instruction>(V)) { 1609 assert((I->getParent() == PreEntryBlock || 1610 !Scope->contains(I)) && 1611 "Must have been hoisted to PreEntryBlock or outside the scope"); 1612 } 1613 } 1614 } 1615 } 1616 1617 void CHR::transformScopes(CHRScope *Scope, DenseSet<PHINode *> &TrivialPHIs) { 1618 CHR_DEBUG(dbgs() << "transformScopes " << *Scope << "\n"); 1619 1620 assert(Scope->RegInfos.size() >= 1 && "Should have at least one Region"); 1621 Region *FirstRegion = Scope->RegInfos[0].R; 1622 BasicBlock *EntryBlock = FirstRegion->getEntry(); 1623 Region *LastRegion = Scope->RegInfos[Scope->RegInfos.size() - 1].R; 1624 BasicBlock *ExitBlock = LastRegion->getExit(); 1625 Optional<uint64_t> ProfileCount = BFI.getBlockProfileCount(EntryBlock); 1626 1627 if (ExitBlock) { 1628 // Insert a trivial phi at the exit block (where the CHR hot path and the 1629 // cold path merges) for a value that's defined in the scope but used 1630 // outside it (meaning it's alive at the exit block). We will add the 1631 // incoming values for the CHR cold paths to it below. Without this, we'd 1632 // miss updating phi's for such values unless there happens to already be a 1633 // phi for that value there. 1634 insertTrivialPHIs(Scope, EntryBlock, ExitBlock, TrivialPHIs); 1635 } 1636 1637 // Split the entry block of the first region. The new block becomes the new 1638 // entry block of the first region. The old entry block becomes the block to 1639 // insert the CHR branch into. Note DT gets updated. Since DT gets updated 1640 // through the split, we update the entry of the first region after the split, 1641 // and Region only points to the entry and the exit blocks, rather than 1642 // keeping everything in a list or set, the blocks membership and the 1643 // entry/exit blocks of the region are still valid after the split. 1644 CHR_DEBUG(dbgs() << "Splitting entry block " << EntryBlock->getName() 1645 << " at " << *Scope->BranchInsertPoint << "\n"); 1646 BasicBlock *NewEntryBlock = 1647 SplitBlock(EntryBlock, Scope->BranchInsertPoint, &DT); 1648 assert(NewEntryBlock->getSinglePredecessor() == EntryBlock && 1649 "NewEntryBlock's only pred must be EntryBlock"); 1650 FirstRegion->replaceEntryRecursive(NewEntryBlock); 1651 BasicBlock *PreEntryBlock = EntryBlock; 1652 1653 ValueToValueMapTy VMap; 1654 // Clone the blocks in the scope (excluding the PreEntryBlock) to split into a 1655 // hot path (originals) and a cold path (clones) and update the PHIs at the 1656 // exit block. 1657 cloneScopeBlocks(Scope, PreEntryBlock, ExitBlock, LastRegion, VMap); 1658 1659 // Replace the old (placeholder) branch with the new (merged) conditional 1660 // branch. 1661 BranchInst *MergedBr = createMergedBranch(PreEntryBlock, EntryBlock, 1662 NewEntryBlock, VMap); 1663 1664 #ifndef NDEBUG 1665 assertCHRRegionsHaveBiasedBranchOrSelect(Scope); 1666 #endif 1667 1668 // Hoist the conditional values of the branches/selects. 1669 hoistScopeConditions(Scope, PreEntryBlock->getTerminator(), TrivialPHIs); 1670 1671 #ifndef NDEBUG 1672 assertBranchOrSelectConditionHoisted(Scope, PreEntryBlock); 1673 #endif 1674 1675 // Create the combined branch condition and constant-fold the branches/selects 1676 // in the hot path. 1677 fixupBranchesAndSelects(Scope, PreEntryBlock, MergedBr, 1678 ProfileCount ? ProfileCount.getValue() : 0); 1679 } 1680 1681 // A helper for transformScopes. Clone the blocks in the scope (excluding the 1682 // PreEntryBlock) to split into a hot path and a cold path and update the PHIs 1683 // at the exit block. 1684 void CHR::cloneScopeBlocks(CHRScope *Scope, 1685 BasicBlock *PreEntryBlock, 1686 BasicBlock *ExitBlock, 1687 Region *LastRegion, 1688 ValueToValueMapTy &VMap) { 1689 // Clone all the blocks. The original blocks will be the hot-path 1690 // CHR-optimized code and the cloned blocks will be the original unoptimized 1691 // code. This is so that the block pointers from the 1692 // CHRScope/Region/RegionInfo can stay valid in pointing to the hot-path code 1693 // which CHR should apply to. 1694 SmallVector<BasicBlock*, 8> NewBlocks; 1695 for (RegInfo &RI : Scope->RegInfos) 1696 for (BasicBlock *BB : RI.R->blocks()) { // This includes the blocks in the 1697 // sub-Scopes. 1698 assert(BB != PreEntryBlock && "Don't copy the preetntry block"); 1699 BasicBlock *NewBB = CloneBasicBlock(BB, VMap, ".nonchr", &F); 1700 NewBlocks.push_back(NewBB); 1701 VMap[BB] = NewBB; 1702 } 1703 1704 // Place the cloned blocks right after the original blocks (right before the 1705 // exit block of.) 1706 if (ExitBlock) 1707 F.getBasicBlockList().splice(ExitBlock->getIterator(), 1708 F.getBasicBlockList(), 1709 NewBlocks[0]->getIterator(), F.end()); 1710 1711 // Update the cloned blocks/instructions to refer to themselves. 1712 for (unsigned i = 0, e = NewBlocks.size(); i != e; ++i) 1713 for (Instruction &I : *NewBlocks[i]) 1714 RemapInstruction(&I, VMap, 1715 RF_NoModuleLevelChanges | RF_IgnoreMissingLocals); 1716 1717 // Add the cloned blocks to the PHIs of the exit blocks. ExitBlock is null for 1718 // the top-level region but we don't need to add PHIs. The trivial PHIs 1719 // inserted above will be updated here. 1720 if (ExitBlock) 1721 for (PHINode &PN : ExitBlock->phis()) 1722 for (unsigned I = 0, NumOps = PN.getNumIncomingValues(); I < NumOps; 1723 ++I) { 1724 BasicBlock *Pred = PN.getIncomingBlock(I); 1725 if (LastRegion->contains(Pred)) { 1726 Value *V = PN.getIncomingValue(I); 1727 auto It = VMap.find(V); 1728 if (It != VMap.end()) V = It->second; 1729 assert(VMap.find(Pred) != VMap.end() && "Pred must have been cloned"); 1730 PN.addIncoming(V, cast<BasicBlock>(VMap[Pred])); 1731 } 1732 } 1733 } 1734 1735 // A helper for transformScope. Replace the old (placeholder) branch with the 1736 // new (merged) conditional branch. 1737 BranchInst *CHR::createMergedBranch(BasicBlock *PreEntryBlock, 1738 BasicBlock *EntryBlock, 1739 BasicBlock *NewEntryBlock, 1740 ValueToValueMapTy &VMap) { 1741 BranchInst *OldBR = cast<BranchInst>(PreEntryBlock->getTerminator()); 1742 assert(OldBR->isUnconditional() && OldBR->getSuccessor(0) == NewEntryBlock && 1743 "SplitBlock did not work correctly!"); 1744 assert(NewEntryBlock->getSinglePredecessor() == EntryBlock && 1745 "NewEntryBlock's only pred must be EntryBlock"); 1746 assert(VMap.find(NewEntryBlock) != VMap.end() && 1747 "NewEntryBlock must have been copied"); 1748 OldBR->removeFromParent(); 1749 OldBR->dropAllReferences(); 1750 // The true predicate is a placeholder. It will be replaced later in 1751 // fixupBranchesAndSelects(). 1752 BranchInst *NewBR = BranchInst::Create(NewEntryBlock, 1753 cast<BasicBlock>(VMap[NewEntryBlock]), 1754 ConstantInt::getTrue(F.getContext())); 1755 PreEntryBlock->getInstList().push_back(NewBR); 1756 assert(NewEntryBlock->getSinglePredecessor() == EntryBlock && 1757 "NewEntryBlock's only pred must be EntryBlock"); 1758 return NewBR; 1759 } 1760 1761 // A helper for transformScopes. Create the combined branch condition and 1762 // constant-fold the branches/selects in the hot path. 1763 void CHR::fixupBranchesAndSelects(CHRScope *Scope, 1764 BasicBlock *PreEntryBlock, 1765 BranchInst *MergedBR, 1766 uint64_t ProfileCount) { 1767 Value *MergedCondition = ConstantInt::getTrue(F.getContext()); 1768 BranchProbability CHRBranchBias(1, 1); 1769 uint64_t NumCHRedBranches = 0; 1770 IRBuilder<> IRB(PreEntryBlock->getTerminator()); 1771 for (RegInfo &RI : Scope->CHRRegions) { 1772 Region *R = RI.R; 1773 if (RI.HasBranch) { 1774 fixupBranch(R, Scope, IRB, MergedCondition, CHRBranchBias); 1775 ++NumCHRedBranches; 1776 } 1777 for (SelectInst *SI : RI.Selects) { 1778 fixupSelect(SI, Scope, IRB, MergedCondition, CHRBranchBias); 1779 ++NumCHRedBranches; 1780 } 1781 } 1782 Stats.NumBranchesDelta += NumCHRedBranches - 1; 1783 Stats.WeightedNumBranchesDelta += (NumCHRedBranches - 1) * ProfileCount; 1784 MergedBR->setCondition(MergedCondition); 1785 SmallVector<uint32_t, 2> Weights; 1786 Weights.push_back(static_cast<uint32_t>(CHRBranchBias.scale(1000))); 1787 Weights.push_back(static_cast<uint32_t>(CHRBranchBias.getCompl().scale(1000))); 1788 MDBuilder MDB(F.getContext()); 1789 MergedBR->setMetadata(LLVMContext::MD_prof, MDB.createBranchWeights(Weights)); 1790 CHR_DEBUG(dbgs() << "CHR branch bias " << Weights[0] << ":" << Weights[1] 1791 << "\n"); 1792 } 1793 1794 // A helper for fixupBranchesAndSelects. Add to the combined branch condition 1795 // and constant-fold a branch in the hot path. 1796 void CHR::fixupBranch(Region *R, CHRScope *Scope, 1797 IRBuilder<> &IRB, 1798 Value *&MergedCondition, 1799 BranchProbability &CHRBranchBias) { 1800 bool IsTrueBiased = Scope->TrueBiasedRegions.count(R); 1801 assert((IsTrueBiased || Scope->FalseBiasedRegions.count(R)) && 1802 "Must be truthy or falsy"); 1803 auto *BI = cast<BranchInst>(R->getEntry()->getTerminator()); 1804 assert(BranchBiasMap.find(R) != BranchBiasMap.end() && 1805 "Must be in the bias map"); 1806 BranchProbability Bias = BranchBiasMap[R]; 1807 assert(Bias >= getCHRBiasThreshold() && "Must be highly biased"); 1808 // Take the min. 1809 if (CHRBranchBias > Bias) 1810 CHRBranchBias = Bias; 1811 BasicBlock *IfThen = BI->getSuccessor(1); 1812 BasicBlock *IfElse = BI->getSuccessor(0); 1813 BasicBlock *RegionExitBlock = R->getExit(); 1814 assert(RegionExitBlock && "Null ExitBlock"); 1815 assert((IfThen == RegionExitBlock || IfElse == RegionExitBlock) && 1816 IfThen != IfElse && "Invariant from findScopes"); 1817 if (IfThen == RegionExitBlock) { 1818 // Swap them so that IfThen means going into it and IfElse means skipping 1819 // it. 1820 std::swap(IfThen, IfElse); 1821 } 1822 CHR_DEBUG(dbgs() << "IfThen " << IfThen->getName() 1823 << " IfElse " << IfElse->getName() << "\n"); 1824 Value *Cond = BI->getCondition(); 1825 BasicBlock *HotTarget = IsTrueBiased ? IfThen : IfElse; 1826 bool ConditionTrue = HotTarget == BI->getSuccessor(0); 1827 addToMergedCondition(ConditionTrue, Cond, BI, Scope, IRB, 1828 MergedCondition); 1829 // Constant-fold the branch at ClonedEntryBlock. 1830 assert(ConditionTrue == (HotTarget == BI->getSuccessor(0)) && 1831 "The successor shouldn't change"); 1832 Value *NewCondition = ConditionTrue ? 1833 ConstantInt::getTrue(F.getContext()) : 1834 ConstantInt::getFalse(F.getContext()); 1835 BI->setCondition(NewCondition); 1836 } 1837 1838 // A helper for fixupBranchesAndSelects. Add to the combined branch condition 1839 // and constant-fold a select in the hot path. 1840 void CHR::fixupSelect(SelectInst *SI, CHRScope *Scope, 1841 IRBuilder<> &IRB, 1842 Value *&MergedCondition, 1843 BranchProbability &CHRBranchBias) { 1844 bool IsTrueBiased = Scope->TrueBiasedSelects.count(SI); 1845 assert((IsTrueBiased || 1846 Scope->FalseBiasedSelects.count(SI)) && "Must be biased"); 1847 assert(SelectBiasMap.find(SI) != SelectBiasMap.end() && 1848 "Must be in the bias map"); 1849 BranchProbability Bias = SelectBiasMap[SI]; 1850 assert(Bias >= getCHRBiasThreshold() && "Must be highly biased"); 1851 // Take the min. 1852 if (CHRBranchBias > Bias) 1853 CHRBranchBias = Bias; 1854 Value *Cond = SI->getCondition(); 1855 addToMergedCondition(IsTrueBiased, Cond, SI, Scope, IRB, 1856 MergedCondition); 1857 Value *NewCondition = IsTrueBiased ? 1858 ConstantInt::getTrue(F.getContext()) : 1859 ConstantInt::getFalse(F.getContext()); 1860 SI->setCondition(NewCondition); 1861 } 1862 1863 // A helper for fixupBranch/fixupSelect. Add a branch condition to the merged 1864 // condition. 1865 void CHR::addToMergedCondition(bool IsTrueBiased, Value *Cond, 1866 Instruction *BranchOrSelect, 1867 CHRScope *Scope, 1868 IRBuilder<> &IRB, 1869 Value *&MergedCondition) { 1870 if (IsTrueBiased) { 1871 MergedCondition = IRB.CreateAnd(MergedCondition, Cond); 1872 } else { 1873 // If Cond is an icmp and all users of V except for BranchOrSelect is a 1874 // branch, negate the icmp predicate and swap the branch targets and avoid 1875 // inserting an Xor to negate Cond. 1876 bool Done = false; 1877 if (auto *ICmp = dyn_cast<ICmpInst>(Cond)) 1878 if (NegateICmpIfUsedByBranchOrSelectOnly(ICmp, BranchOrSelect, Scope)) { 1879 MergedCondition = IRB.CreateAnd(MergedCondition, Cond); 1880 Done = true; 1881 } 1882 if (!Done) { 1883 Value *Negate = IRB.CreateXor( 1884 ConstantInt::getTrue(F.getContext()), Cond); 1885 MergedCondition = IRB.CreateAnd(MergedCondition, Negate); 1886 } 1887 } 1888 } 1889 1890 void CHR::transformScopes(SmallVectorImpl<CHRScope *> &CHRScopes) { 1891 unsigned i = 0; 1892 (void)(i); // Unused in release build. 1893 DenseSet<PHINode *> TrivialPHIs; 1894 for (CHRScope *Scope : CHRScopes) { 1895 transformScopes(Scope, TrivialPHIs); 1896 CHR_DEBUG( 1897 std::ostringstream oss; 1898 oss << " after transformScopes " << i++; 1899 dumpIR(F, oss.str().c_str(), nullptr)); 1900 } 1901 } 1902 1903 static void dumpScopes(SmallVectorImpl<CHRScope *> &Scopes, const char * Label) { 1904 dbgs() << Label << " " << Scopes.size() << "\n"; 1905 for (CHRScope *Scope : Scopes) { 1906 dbgs() << *Scope << "\n"; 1907 } 1908 } 1909 1910 bool CHR::run() { 1911 if (!shouldApply(F, PSI)) 1912 return false; 1913 1914 CHR_DEBUG(dumpIR(F, "before", nullptr)); 1915 1916 bool Changed = false; 1917 { 1918 CHR_DEBUG( 1919 dbgs() << "RegionInfo:\n"; 1920 RI.print(dbgs())); 1921 1922 // Recursively traverse the region tree and find regions that have biased 1923 // branches and/or selects and create scopes. 1924 SmallVector<CHRScope *, 8> AllScopes; 1925 findScopes(AllScopes); 1926 CHR_DEBUG(dumpScopes(AllScopes, "All scopes")); 1927 1928 // Split the scopes if 1) the conditiona values of the biased 1929 // branches/selects of the inner/lower scope can't be hoisted up to the 1930 // outermost/uppermost scope entry, or 2) the condition values of the biased 1931 // branches/selects in a scope (including subscopes) don't share at least 1932 // one common value. 1933 SmallVector<CHRScope *, 8> SplitScopes; 1934 splitScopes(AllScopes, SplitScopes); 1935 CHR_DEBUG(dumpScopes(SplitScopes, "Split scopes")); 1936 1937 // After splitting, set the biased regions and selects of a scope (a tree 1938 // root) that include those of the subscopes. 1939 classifyBiasedScopes(SplitScopes); 1940 CHR_DEBUG(dbgs() << "Set per-scope bias " << SplitScopes.size() << "\n"); 1941 1942 // Filter out the scopes that has only one biased region or select (CHR 1943 // isn't useful in such a case). 1944 SmallVector<CHRScope *, 8> FilteredScopes; 1945 filterScopes(SplitScopes, FilteredScopes); 1946 CHR_DEBUG(dumpScopes(FilteredScopes, "Filtered scopes")); 1947 1948 // Set the regions to be CHR'ed and their hoist stops for each scope. 1949 SmallVector<CHRScope *, 8> SetScopes; 1950 setCHRRegions(FilteredScopes, SetScopes); 1951 CHR_DEBUG(dumpScopes(SetScopes, "Set CHR regions")); 1952 1953 // Sort CHRScopes by the depth so that outer CHRScopes comes before inner 1954 // ones. We need to apply CHR from outer to inner so that we apply CHR only 1955 // to the hot path, rather than both hot and cold paths. 1956 SmallVector<CHRScope *, 8> SortedScopes; 1957 sortScopes(SetScopes, SortedScopes); 1958 CHR_DEBUG(dumpScopes(SortedScopes, "Sorted scopes")); 1959 1960 CHR_DEBUG( 1961 dbgs() << "RegionInfo:\n"; 1962 RI.print(dbgs())); 1963 1964 // Apply the CHR transformation. 1965 if (!SortedScopes.empty()) { 1966 transformScopes(SortedScopes); 1967 Changed = true; 1968 } 1969 } 1970 1971 if (Changed) 1972 CHR_DEBUG(dumpIR(F, "after", &Stats)); 1973 1974 return Changed; 1975 } 1976 1977 bool ControlHeightReductionLegacyPass::runOnFunction(Function &F) { 1978 BlockFrequencyInfo &BFI = 1979 getAnalysis<BlockFrequencyInfoWrapperPass>().getBFI(); 1980 DominatorTree &DT = getAnalysis<DominatorTreeWrapperPass>().getDomTree(); 1981 ProfileSummaryInfo &PSI = 1982 *getAnalysis<ProfileSummaryInfoWrapperPass>().getPSI(); 1983 RegionInfo &RI = getAnalysis<RegionInfoPass>().getRegionInfo(); 1984 return CHR(F, BFI, DT, PSI, RI).run(); 1985 } 1986 1987 namespace llvm { 1988 1989 ControlHeightReductionPass::ControlHeightReductionPass() { 1990 ParseCHRFilterFiles(); 1991 } 1992 1993 PreservedAnalyses ControlHeightReductionPass::run( 1994 Function &F, 1995 FunctionAnalysisManager &FAM) { 1996 auto &BFI = FAM.getResult<BlockFrequencyAnalysis>(F); 1997 auto &DT = FAM.getResult<DominatorTreeAnalysis>(F); 1998 auto &MAMProxy = FAM.getResult<ModuleAnalysisManagerFunctionProxy>(F); 1999 auto &MAM = MAMProxy.getManager(); 2000 auto &PSI = *MAM.getCachedResult<ProfileSummaryAnalysis>(*F.getParent()); 2001 auto &RI = FAM.getResult<RegionInfoAnalysis>(F); 2002 bool Changed = CHR(F, BFI, DT, PSI, RI).run(); 2003 if (!Changed) 2004 return PreservedAnalyses::all(); 2005 auto PA = PreservedAnalyses(); 2006 PA.preserve<GlobalsAA>(); 2007 return PA; 2008 } 2009 2010 } // namespace llvm 2011