1 //===-- ControlHeightReduction.cpp - Control Height Reduction -------------===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 // 9 // This pass merges conditional blocks of code and reduces the number of 10 // conditional branches in the hot paths based on profiles. 11 // 12 //===----------------------------------------------------------------------===// 13 14 #include "llvm/Transforms/Instrumentation/ControlHeightReduction.h" 15 #include "llvm/ADT/DenseMap.h" 16 #include "llvm/ADT/DenseSet.h" 17 #include "llvm/ADT/SmallVector.h" 18 #include "llvm/ADT/StringSet.h" 19 #include "llvm/Analysis/BlockFrequencyInfo.h" 20 #include "llvm/Analysis/GlobalsModRef.h" 21 #include "llvm/Analysis/OptimizationRemarkEmitter.h" 22 #include "llvm/Analysis/ProfileSummaryInfo.h" 23 #include "llvm/Analysis/RegionInfo.h" 24 #include "llvm/Analysis/RegionIterator.h" 25 #include "llvm/Analysis/ValueTracking.h" 26 #include "llvm/IR/CFG.h" 27 #include "llvm/IR/Dominators.h" 28 #include "llvm/IR/IRBuilder.h" 29 #include "llvm/IR/IntrinsicInst.h" 30 #include "llvm/IR/MDBuilder.h" 31 #include "llvm/IR/PassManager.h" 32 #include "llvm/InitializePasses.h" 33 #include "llvm/Support/BranchProbability.h" 34 #include "llvm/Support/CommandLine.h" 35 #include "llvm/Support/MemoryBuffer.h" 36 #include "llvm/Transforms/Utils.h" 37 #include "llvm/Transforms/Utils/BasicBlockUtils.h" 38 #include "llvm/Transforms/Utils/Cloning.h" 39 #include "llvm/Transforms/Utils/ValueMapper.h" 40 41 #include <set> 42 #include <sstream> 43 44 using namespace llvm; 45 46 #define DEBUG_TYPE "chr" 47 48 #define CHR_DEBUG(X) LLVM_DEBUG(X) 49 50 static cl::opt<bool> DisableCHR("disable-chr", cl::init(false), cl::Hidden, 51 cl::desc("Disable CHR for all functions")); 52 53 static cl::opt<bool> ForceCHR("force-chr", cl::init(false), cl::Hidden, 54 cl::desc("Apply CHR for all functions")); 55 56 static cl::opt<double> CHRBiasThreshold( 57 "chr-bias-threshold", cl::init(0.99), cl::Hidden, 58 cl::desc("CHR considers a branch bias greater than this ratio as biased")); 59 60 static cl::opt<unsigned> CHRMergeThreshold( 61 "chr-merge-threshold", cl::init(2), cl::Hidden, 62 cl::desc("CHR merges a group of N branches/selects where N >= this value")); 63 64 static cl::opt<std::string> CHRModuleList( 65 "chr-module-list", cl::init(""), cl::Hidden, 66 cl::desc("Specify file to retrieve the list of modules to apply CHR to")); 67 68 static cl::opt<std::string> CHRFunctionList( 69 "chr-function-list", cl::init(""), cl::Hidden, 70 cl::desc("Specify file to retrieve the list of functions to apply CHR to")); 71 72 static cl::opt<unsigned> CHRDupThreshsold( 73 "chr-dup-threshold", cl::init(3), cl::Hidden, 74 cl::desc("Max number of duplications by CHR for a region")); 75 76 static StringSet<> CHRModules; 77 static StringSet<> CHRFunctions; 78 79 static void parseCHRFilterFiles() { 80 if (!CHRModuleList.empty()) { 81 auto FileOrErr = MemoryBuffer::getFile(CHRModuleList); 82 if (!FileOrErr) { 83 errs() << "Error: Couldn't read the chr-module-list file " << CHRModuleList << "\n"; 84 std::exit(1); 85 } 86 StringRef Buf = FileOrErr->get()->getBuffer(); 87 SmallVector<StringRef, 0> Lines; 88 Buf.split(Lines, '\n'); 89 for (StringRef Line : Lines) { 90 Line = Line.trim(); 91 if (!Line.empty()) 92 CHRModules.insert(Line); 93 } 94 } 95 if (!CHRFunctionList.empty()) { 96 auto FileOrErr = MemoryBuffer::getFile(CHRFunctionList); 97 if (!FileOrErr) { 98 errs() << "Error: Couldn't read the chr-function-list file " << CHRFunctionList << "\n"; 99 std::exit(1); 100 } 101 StringRef Buf = FileOrErr->get()->getBuffer(); 102 SmallVector<StringRef, 0> Lines; 103 Buf.split(Lines, '\n'); 104 for (StringRef Line : Lines) { 105 Line = Line.trim(); 106 if (!Line.empty()) 107 CHRFunctions.insert(Line); 108 } 109 } 110 } 111 112 namespace { 113 114 struct CHRStats { 115 CHRStats() = default; 116 void print(raw_ostream &OS) const { 117 OS << "CHRStats: NumBranches " << NumBranches 118 << " NumBranchesDelta " << NumBranchesDelta 119 << " WeightedNumBranchesDelta " << WeightedNumBranchesDelta; 120 } 121 // The original number of conditional branches / selects 122 uint64_t NumBranches = 0; 123 // The decrease of the number of conditional branches / selects in the hot 124 // paths due to CHR. 125 uint64_t NumBranchesDelta = 0; 126 // NumBranchesDelta weighted by the profile count at the scope entry. 127 uint64_t WeightedNumBranchesDelta = 0; 128 }; 129 130 // RegInfo - some properties of a Region. 131 struct RegInfo { 132 RegInfo() = default; 133 RegInfo(Region *RegionIn) : R(RegionIn) {} 134 Region *R = nullptr; 135 bool HasBranch = false; 136 SmallVector<SelectInst *, 8> Selects; 137 }; 138 139 typedef DenseMap<Region *, DenseSet<Instruction *>> HoistStopMapTy; 140 141 // CHRScope - a sequence of regions to CHR together. It corresponds to a 142 // sequence of conditional blocks. It can have subscopes which correspond to 143 // nested conditional blocks. Nested CHRScopes form a tree. 144 class CHRScope { 145 public: 146 CHRScope(RegInfo RI) : BranchInsertPoint(nullptr) { 147 assert(RI.R && "Null RegionIn"); 148 RegInfos.push_back(RI); 149 } 150 151 Region *getParentRegion() { 152 assert(RegInfos.size() > 0 && "Empty CHRScope"); 153 Region *Parent = RegInfos[0].R->getParent(); 154 assert(Parent && "Unexpected to call this on the top-level region"); 155 return Parent; 156 } 157 158 BasicBlock *getEntryBlock() { 159 assert(RegInfos.size() > 0 && "Empty CHRScope"); 160 return RegInfos.front().R->getEntry(); 161 } 162 163 BasicBlock *getExitBlock() { 164 assert(RegInfos.size() > 0 && "Empty CHRScope"); 165 return RegInfos.back().R->getExit(); 166 } 167 168 bool appendable(CHRScope *Next) { 169 // The next scope is appendable only if this scope is directly connected to 170 // it (which implies it post-dominates this scope) and this scope dominates 171 // it (no edge to the next scope outside this scope). 172 BasicBlock *NextEntry = Next->getEntryBlock(); 173 if (getExitBlock() != NextEntry) 174 // Not directly connected. 175 return false; 176 Region *LastRegion = RegInfos.back().R; 177 for (BasicBlock *Pred : predecessors(NextEntry)) 178 if (!LastRegion->contains(Pred)) 179 // There's an edge going into the entry of the next scope from outside 180 // of this scope. 181 return false; 182 return true; 183 } 184 185 void append(CHRScope *Next) { 186 assert(RegInfos.size() > 0 && "Empty CHRScope"); 187 assert(Next->RegInfos.size() > 0 && "Empty CHRScope"); 188 assert(getParentRegion() == Next->getParentRegion() && 189 "Must be siblings"); 190 assert(getExitBlock() == Next->getEntryBlock() && 191 "Must be adjacent"); 192 RegInfos.append(Next->RegInfos.begin(), Next->RegInfos.end()); 193 Subs.append(Next->Subs.begin(), Next->Subs.end()); 194 } 195 196 void addSub(CHRScope *SubIn) { 197 #ifndef NDEBUG 198 bool IsChild = false; 199 for (RegInfo &RI : RegInfos) 200 if (RI.R == SubIn->getParentRegion()) { 201 IsChild = true; 202 break; 203 } 204 assert(IsChild && "Must be a child"); 205 #endif 206 Subs.push_back(SubIn); 207 } 208 209 // Split this scope at the boundary region into two, which will belong to the 210 // tail and returns the tail. 211 CHRScope *split(Region *Boundary) { 212 assert(Boundary && "Boundary null"); 213 assert(RegInfos.begin()->R != Boundary && 214 "Can't be split at beginning"); 215 auto BoundaryIt = llvm::find_if( 216 RegInfos, [&Boundary](const RegInfo &RI) { return Boundary == RI.R; }); 217 if (BoundaryIt == RegInfos.end()) 218 return nullptr; 219 ArrayRef<RegInfo> TailRegInfos(BoundaryIt, RegInfos.end()); 220 DenseSet<Region *> TailRegionSet; 221 for (const RegInfo &RI : TailRegInfos) 222 TailRegionSet.insert(RI.R); 223 224 auto TailIt = 225 std::stable_partition(Subs.begin(), Subs.end(), [&](CHRScope *Sub) { 226 assert(Sub && "null Sub"); 227 Region *Parent = Sub->getParentRegion(); 228 if (TailRegionSet.count(Parent)) 229 return false; 230 231 assert(llvm::any_of( 232 RegInfos, 233 [&Parent](const RegInfo &RI) { return Parent == RI.R; }) && 234 "Must be in head"); 235 return true; 236 }); 237 ArrayRef<CHRScope *> TailSubs(TailIt, Subs.end()); 238 239 assert(HoistStopMap.empty() && "MapHoistStops must be empty"); 240 auto *Scope = new CHRScope(TailRegInfos, TailSubs); 241 RegInfos.erase(BoundaryIt, RegInfos.end()); 242 Subs.erase(TailIt, Subs.end()); 243 return Scope; 244 } 245 246 bool contains(Instruction *I) const { 247 BasicBlock *Parent = I->getParent(); 248 for (const RegInfo &RI : RegInfos) 249 if (RI.R->contains(Parent)) 250 return true; 251 return false; 252 } 253 254 void print(raw_ostream &OS) const; 255 256 SmallVector<RegInfo, 8> RegInfos; // Regions that belong to this scope 257 SmallVector<CHRScope *, 8> Subs; // Subscopes. 258 259 // The instruction at which to insert the CHR conditional branch (and hoist 260 // the dependent condition values). 261 Instruction *BranchInsertPoint; 262 263 // True-biased and false-biased regions (conditional blocks), 264 // respectively. Used only for the outermost scope and includes regions in 265 // subscopes. The rest are unbiased. 266 DenseSet<Region *> TrueBiasedRegions; 267 DenseSet<Region *> FalseBiasedRegions; 268 // Among the biased regions, the regions that get CHRed. 269 SmallVector<RegInfo, 8> CHRRegions; 270 271 // True-biased and false-biased selects, respectively. Used only for the 272 // outermost scope and includes ones in subscopes. 273 DenseSet<SelectInst *> TrueBiasedSelects; 274 DenseSet<SelectInst *> FalseBiasedSelects; 275 276 // Map from one of the above regions to the instructions to stop 277 // hoisting instructions at through use-def chains. 278 HoistStopMapTy HoistStopMap; 279 280 private: 281 CHRScope(ArrayRef<RegInfo> RegInfosIn, ArrayRef<CHRScope *> SubsIn) 282 : RegInfos(RegInfosIn.begin(), RegInfosIn.end()), 283 Subs(SubsIn.begin(), SubsIn.end()), BranchInsertPoint(nullptr) {} 284 }; 285 286 class CHR { 287 public: 288 CHR(Function &Fin, BlockFrequencyInfo &BFIin, DominatorTree &DTin, 289 ProfileSummaryInfo &PSIin, RegionInfo &RIin, 290 OptimizationRemarkEmitter &OREin) 291 : F(Fin), BFI(BFIin), DT(DTin), PSI(PSIin), RI(RIin), ORE(OREin) {} 292 293 ~CHR() { 294 for (CHRScope *Scope : Scopes) { 295 delete Scope; 296 } 297 } 298 299 bool run(); 300 301 private: 302 // See the comments in CHR::run() for the high level flow of the algorithm and 303 // what the following functions do. 304 305 void findScopes(SmallVectorImpl<CHRScope *> &Output) { 306 Region *R = RI.getTopLevelRegion(); 307 if (CHRScope *Scope = findScopes(R, nullptr, nullptr, Output)) { 308 Output.push_back(Scope); 309 } 310 } 311 CHRScope *findScopes(Region *R, Region *NextRegion, Region *ParentRegion, 312 SmallVectorImpl<CHRScope *> &Scopes); 313 CHRScope *findScope(Region *R); 314 void checkScopeHoistable(CHRScope *Scope); 315 316 void splitScopes(SmallVectorImpl<CHRScope *> &Input, 317 SmallVectorImpl<CHRScope *> &Output); 318 SmallVector<CHRScope *, 8> splitScope(CHRScope *Scope, 319 CHRScope *Outer, 320 DenseSet<Value *> *OuterConditionValues, 321 Instruction *OuterInsertPoint, 322 SmallVectorImpl<CHRScope *> &Output, 323 DenseSet<Instruction *> &Unhoistables); 324 325 void classifyBiasedScopes(SmallVectorImpl<CHRScope *> &Scopes); 326 void classifyBiasedScopes(CHRScope *Scope, CHRScope *OutermostScope); 327 328 void filterScopes(SmallVectorImpl<CHRScope *> &Input, 329 SmallVectorImpl<CHRScope *> &Output); 330 331 void setCHRRegions(SmallVectorImpl<CHRScope *> &Input, 332 SmallVectorImpl<CHRScope *> &Output); 333 void setCHRRegions(CHRScope *Scope, CHRScope *OutermostScope); 334 335 void sortScopes(SmallVectorImpl<CHRScope *> &Input, 336 SmallVectorImpl<CHRScope *> &Output); 337 338 void transformScopes(SmallVectorImpl<CHRScope *> &CHRScopes); 339 void transformScopes(CHRScope *Scope, DenseSet<PHINode *> &TrivialPHIs); 340 void cloneScopeBlocks(CHRScope *Scope, 341 BasicBlock *PreEntryBlock, 342 BasicBlock *ExitBlock, 343 Region *LastRegion, 344 ValueToValueMapTy &VMap); 345 BranchInst *createMergedBranch(BasicBlock *PreEntryBlock, 346 BasicBlock *EntryBlock, 347 BasicBlock *NewEntryBlock, 348 ValueToValueMapTy &VMap); 349 void fixupBranchesAndSelects(CHRScope *Scope, BasicBlock *PreEntryBlock, 350 BranchInst *MergedBR, uint64_t ProfileCount); 351 void fixupBranch(Region *R, CHRScope *Scope, IRBuilder<> &IRB, 352 Value *&MergedCondition, BranchProbability &CHRBranchBias); 353 void fixupSelect(SelectInst *SI, CHRScope *Scope, IRBuilder<> &IRB, 354 Value *&MergedCondition, BranchProbability &CHRBranchBias); 355 void addToMergedCondition(bool IsTrueBiased, Value *Cond, 356 Instruction *BranchOrSelect, CHRScope *Scope, 357 IRBuilder<> &IRB, Value *&MergedCondition); 358 unsigned getRegionDuplicationCount(const Region *R) { 359 unsigned Count = 0; 360 // Find out how many times region R is cloned. Note that if the parent 361 // of R is cloned, R is also cloned, but R's clone count is not updated 362 // from the clone of the parent. We need to accumlate all the counts 363 // from the ancestors to get the clone count. 364 while (R) { 365 Count += DuplicationCount[R]; 366 R = R->getParent(); 367 } 368 return Count; 369 } 370 371 Function &F; 372 BlockFrequencyInfo &BFI; 373 DominatorTree &DT; 374 ProfileSummaryInfo &PSI; 375 RegionInfo &RI; 376 OptimizationRemarkEmitter &ORE; 377 CHRStats Stats; 378 379 // All the true-biased regions in the function 380 DenseSet<Region *> TrueBiasedRegionsGlobal; 381 // All the false-biased regions in the function 382 DenseSet<Region *> FalseBiasedRegionsGlobal; 383 // All the true-biased selects in the function 384 DenseSet<SelectInst *> TrueBiasedSelectsGlobal; 385 // All the false-biased selects in the function 386 DenseSet<SelectInst *> FalseBiasedSelectsGlobal; 387 // A map from biased regions to their branch bias 388 DenseMap<Region *, BranchProbability> BranchBiasMap; 389 // A map from biased selects to their branch bias 390 DenseMap<SelectInst *, BranchProbability> SelectBiasMap; 391 // All the scopes. 392 DenseSet<CHRScope *> Scopes; 393 // This maps records how many times this region is cloned. 394 DenseMap<const Region *, unsigned> DuplicationCount; 395 }; 396 397 } // end anonymous namespace 398 399 static inline 400 raw_ostream LLVM_ATTRIBUTE_UNUSED &operator<<(raw_ostream &OS, 401 const CHRStats &Stats) { 402 Stats.print(OS); 403 return OS; 404 } 405 406 static inline 407 raw_ostream &operator<<(raw_ostream &OS, const CHRScope &Scope) { 408 Scope.print(OS); 409 return OS; 410 } 411 412 static bool shouldApply(Function &F, ProfileSummaryInfo &PSI) { 413 if (DisableCHR) 414 return false; 415 416 if (ForceCHR) 417 return true; 418 419 if (!CHRModuleList.empty() || !CHRFunctionList.empty()) { 420 if (CHRModules.count(F.getParent()->getName())) 421 return true; 422 return CHRFunctions.count(F.getName()); 423 } 424 425 return PSI.isFunctionEntryHot(&F); 426 } 427 428 static void LLVM_ATTRIBUTE_UNUSED dumpIR(Function &F, const char *Label, 429 CHRStats *Stats) { 430 StringRef FuncName = F.getName(); 431 StringRef ModuleName = F.getParent()->getName(); 432 (void)(FuncName); // Unused in release build. 433 (void)(ModuleName); // Unused in release build. 434 CHR_DEBUG(dbgs() << "CHR IR dump " << Label << " " << ModuleName << " " 435 << FuncName); 436 if (Stats) 437 CHR_DEBUG(dbgs() << " " << *Stats); 438 CHR_DEBUG(dbgs() << "\n"); 439 CHR_DEBUG(F.dump()); 440 } 441 442 void CHRScope::print(raw_ostream &OS) const { 443 assert(RegInfos.size() > 0 && "Empty CHRScope"); 444 OS << "CHRScope["; 445 OS << RegInfos.size() << ", Regions["; 446 for (const RegInfo &RI : RegInfos) { 447 OS << RI.R->getNameStr(); 448 if (RI.HasBranch) 449 OS << " B"; 450 if (RI.Selects.size() > 0) 451 OS << " S" << RI.Selects.size(); 452 OS << ", "; 453 } 454 if (RegInfos[0].R->getParent()) { 455 OS << "], Parent " << RegInfos[0].R->getParent()->getNameStr(); 456 } else { 457 // top level region 458 OS << "]"; 459 } 460 OS << ", Subs["; 461 for (CHRScope *Sub : Subs) { 462 OS << *Sub << ", "; 463 } 464 OS << "]]"; 465 } 466 467 // Return true if the given instruction type can be hoisted by CHR. 468 static bool isHoistableInstructionType(Instruction *I) { 469 return isa<BinaryOperator>(I) || isa<CastInst>(I) || isa<SelectInst>(I) || 470 isa<GetElementPtrInst>(I) || isa<CmpInst>(I) || 471 isa<InsertElementInst>(I) || isa<ExtractElementInst>(I) || 472 isa<ShuffleVectorInst>(I) || isa<ExtractValueInst>(I) || 473 isa<InsertValueInst>(I); 474 } 475 476 // Return true if the given instruction can be hoisted by CHR. 477 static bool isHoistable(Instruction *I, DominatorTree &DT) { 478 if (!isHoistableInstructionType(I)) 479 return false; 480 return isSafeToSpeculativelyExecute(I, nullptr, nullptr, &DT); 481 } 482 483 // Recursively traverse the use-def chains of the given value and return a set 484 // of the unhoistable base values defined within the scope (excluding the 485 // first-region entry block) or the (hoistable or unhoistable) base values that 486 // are defined outside (including the first-region entry block) of the 487 // scope. The returned set doesn't include constants. 488 static const std::set<Value *> & 489 getBaseValues(Value *V, DominatorTree &DT, 490 DenseMap<Value *, std::set<Value *>> &Visited) { 491 auto It = Visited.find(V); 492 if (It != Visited.end()) { 493 return It->second; 494 } 495 std::set<Value *> Result; 496 if (auto *I = dyn_cast<Instruction>(V)) { 497 // We don't stop at a block that's not in the Scope because we would miss 498 // some instructions that are based on the same base values if we stop 499 // there. 500 if (!isHoistable(I, DT)) { 501 Result.insert(I); 502 return Visited.insert(std::make_pair(V, std::move(Result))).first->second; 503 } 504 // I is hoistable above the Scope. 505 for (Value *Op : I->operands()) { 506 const std::set<Value *> &OpResult = getBaseValues(Op, DT, Visited); 507 Result.insert(OpResult.begin(), OpResult.end()); 508 } 509 return Visited.insert(std::make_pair(V, std::move(Result))).first->second; 510 } 511 if (isa<Argument>(V)) { 512 Result.insert(V); 513 } 514 // We don't include others like constants because those won't lead to any 515 // chance of folding of conditions (eg two bit checks merged into one check) 516 // after CHR. 517 return Visited.insert(std::make_pair(V, std::move(Result))).first->second; 518 } 519 520 // Return true if V is already hoisted or can be hoisted (along with its 521 // operands) above the insert point. When it returns true and HoistStops is 522 // non-null, the instructions to stop hoisting at through the use-def chains are 523 // inserted into HoistStops. 524 static bool 525 checkHoistValue(Value *V, Instruction *InsertPoint, DominatorTree &DT, 526 DenseSet<Instruction *> &Unhoistables, 527 DenseSet<Instruction *> *HoistStops, 528 DenseMap<Instruction *, bool> &Visited) { 529 assert(InsertPoint && "Null InsertPoint"); 530 if (auto *I = dyn_cast<Instruction>(V)) { 531 auto It = Visited.find(I); 532 if (It != Visited.end()) { 533 return It->second; 534 } 535 assert(DT.getNode(I->getParent()) && "DT must contain I's parent block"); 536 assert(DT.getNode(InsertPoint->getParent()) && "DT must contain Destination"); 537 if (Unhoistables.count(I)) { 538 // Don't hoist if they are not to be hoisted. 539 Visited[I] = false; 540 return false; 541 } 542 if (DT.dominates(I, InsertPoint)) { 543 // We are already above the insert point. Stop here. 544 if (HoistStops) 545 HoistStops->insert(I); 546 Visited[I] = true; 547 return true; 548 } 549 // We aren't not above the insert point, check if we can hoist it above the 550 // insert point. 551 if (isHoistable(I, DT)) { 552 // Check operands first. 553 DenseSet<Instruction *> OpsHoistStops; 554 bool AllOpsHoisted = true; 555 for (Value *Op : I->operands()) { 556 if (!checkHoistValue(Op, InsertPoint, DT, Unhoistables, &OpsHoistStops, 557 Visited)) { 558 AllOpsHoisted = false; 559 break; 560 } 561 } 562 if (AllOpsHoisted) { 563 CHR_DEBUG(dbgs() << "checkHoistValue " << *I << "\n"); 564 if (HoistStops) 565 HoistStops->insert(OpsHoistStops.begin(), OpsHoistStops.end()); 566 Visited[I] = true; 567 return true; 568 } 569 } 570 Visited[I] = false; 571 return false; 572 } 573 // Non-instructions are considered hoistable. 574 return true; 575 } 576 577 // Returns true and sets the true probability and false probability of an 578 // MD_prof metadata if it's well-formed. 579 static bool checkMDProf(MDNode *MD, BranchProbability &TrueProb, 580 BranchProbability &FalseProb) { 581 if (!MD) return false; 582 MDString *MDName = cast<MDString>(MD->getOperand(0)); 583 if (MDName->getString() != "branch_weights" || 584 MD->getNumOperands() != 3) 585 return false; 586 ConstantInt *TrueWeight = mdconst::extract<ConstantInt>(MD->getOperand(1)); 587 ConstantInt *FalseWeight = mdconst::extract<ConstantInt>(MD->getOperand(2)); 588 if (!TrueWeight || !FalseWeight) 589 return false; 590 uint64_t TrueWt = TrueWeight->getValue().getZExtValue(); 591 uint64_t FalseWt = FalseWeight->getValue().getZExtValue(); 592 uint64_t SumWt = TrueWt + FalseWt; 593 594 assert(SumWt >= TrueWt && SumWt >= FalseWt && 595 "Overflow calculating branch probabilities."); 596 597 // Guard against 0-to-0 branch weights to avoid a division-by-zero crash. 598 if (SumWt == 0) 599 return false; 600 601 TrueProb = BranchProbability::getBranchProbability(TrueWt, SumWt); 602 FalseProb = BranchProbability::getBranchProbability(FalseWt, SumWt); 603 return true; 604 } 605 606 static BranchProbability getCHRBiasThreshold() { 607 return BranchProbability::getBranchProbability( 608 static_cast<uint64_t>(CHRBiasThreshold * 1000000), 1000000); 609 } 610 611 // A helper for CheckBiasedBranch and CheckBiasedSelect. If TrueProb >= 612 // CHRBiasThreshold, put Key into TrueSet and return true. If FalseProb >= 613 // CHRBiasThreshold, put Key into FalseSet and return true. Otherwise, return 614 // false. 615 template <typename K, typename S, typename M> 616 static bool checkBias(K *Key, BranchProbability TrueProb, 617 BranchProbability FalseProb, S &TrueSet, S &FalseSet, 618 M &BiasMap) { 619 BranchProbability Threshold = getCHRBiasThreshold(); 620 if (TrueProb >= Threshold) { 621 TrueSet.insert(Key); 622 BiasMap[Key] = TrueProb; 623 return true; 624 } else if (FalseProb >= Threshold) { 625 FalseSet.insert(Key); 626 BiasMap[Key] = FalseProb; 627 return true; 628 } 629 return false; 630 } 631 632 // Returns true and insert a region into the right biased set and the map if the 633 // branch of the region is biased. 634 static bool checkBiasedBranch(BranchInst *BI, Region *R, 635 DenseSet<Region *> &TrueBiasedRegionsGlobal, 636 DenseSet<Region *> &FalseBiasedRegionsGlobal, 637 DenseMap<Region *, BranchProbability> &BranchBiasMap) { 638 if (!BI->isConditional()) 639 return false; 640 BranchProbability ThenProb, ElseProb; 641 if (!checkMDProf(BI->getMetadata(LLVMContext::MD_prof), 642 ThenProb, ElseProb)) 643 return false; 644 BasicBlock *IfThen = BI->getSuccessor(0); 645 BasicBlock *IfElse = BI->getSuccessor(1); 646 assert((IfThen == R->getExit() || IfElse == R->getExit()) && 647 IfThen != IfElse && 648 "Invariant from findScopes"); 649 if (IfThen == R->getExit()) { 650 // Swap them so that IfThen/ThenProb means going into the conditional code 651 // and IfElse/ElseProb means skipping it. 652 std::swap(IfThen, IfElse); 653 std::swap(ThenProb, ElseProb); 654 } 655 CHR_DEBUG(dbgs() << "BI " << *BI << " "); 656 CHR_DEBUG(dbgs() << "ThenProb " << ThenProb << " "); 657 CHR_DEBUG(dbgs() << "ElseProb " << ElseProb << "\n"); 658 return checkBias(R, ThenProb, ElseProb, 659 TrueBiasedRegionsGlobal, FalseBiasedRegionsGlobal, 660 BranchBiasMap); 661 } 662 663 // Returns true and insert a select into the right biased set and the map if the 664 // select is biased. 665 static bool checkBiasedSelect( 666 SelectInst *SI, Region *R, 667 DenseSet<SelectInst *> &TrueBiasedSelectsGlobal, 668 DenseSet<SelectInst *> &FalseBiasedSelectsGlobal, 669 DenseMap<SelectInst *, BranchProbability> &SelectBiasMap) { 670 BranchProbability TrueProb, FalseProb; 671 if (!checkMDProf(SI->getMetadata(LLVMContext::MD_prof), 672 TrueProb, FalseProb)) 673 return false; 674 CHR_DEBUG(dbgs() << "SI " << *SI << " "); 675 CHR_DEBUG(dbgs() << "TrueProb " << TrueProb << " "); 676 CHR_DEBUG(dbgs() << "FalseProb " << FalseProb << "\n"); 677 return checkBias(SI, TrueProb, FalseProb, 678 TrueBiasedSelectsGlobal, FalseBiasedSelectsGlobal, 679 SelectBiasMap); 680 } 681 682 // Returns the instruction at which to hoist the dependent condition values and 683 // insert the CHR branch for a region. This is the terminator branch in the 684 // entry block or the first select in the entry block, if any. 685 static Instruction* getBranchInsertPoint(RegInfo &RI) { 686 Region *R = RI.R; 687 BasicBlock *EntryBB = R->getEntry(); 688 // The hoist point is by default the terminator of the entry block, which is 689 // the same as the branch instruction if RI.HasBranch is true. 690 Instruction *HoistPoint = EntryBB->getTerminator(); 691 for (SelectInst *SI : RI.Selects) { 692 if (SI->getParent() == EntryBB) { 693 // Pick the first select in Selects in the entry block. Note Selects is 694 // sorted in the instruction order within a block (asserted below). 695 HoistPoint = SI; 696 break; 697 } 698 } 699 assert(HoistPoint && "Null HoistPoint"); 700 #ifndef NDEBUG 701 // Check that HoistPoint is the first one in Selects in the entry block, 702 // if any. 703 DenseSet<Instruction *> EntryBlockSelectSet; 704 for (SelectInst *SI : RI.Selects) { 705 if (SI->getParent() == EntryBB) { 706 EntryBlockSelectSet.insert(SI); 707 } 708 } 709 for (Instruction &I : *EntryBB) { 710 if (EntryBlockSelectSet.contains(&I)) { 711 assert(&I == HoistPoint && 712 "HoistPoint must be the first one in Selects"); 713 break; 714 } 715 } 716 #endif 717 return HoistPoint; 718 } 719 720 // Find a CHR scope in the given region. 721 CHRScope * CHR::findScope(Region *R) { 722 CHRScope *Result = nullptr; 723 BasicBlock *Entry = R->getEntry(); 724 BasicBlock *Exit = R->getExit(); // null if top level. 725 assert(Entry && "Entry must not be null"); 726 assert((Exit == nullptr) == (R->isTopLevelRegion()) && 727 "Only top level region has a null exit"); 728 if (Entry) 729 CHR_DEBUG(dbgs() << "Entry " << Entry->getName() << "\n"); 730 else 731 CHR_DEBUG(dbgs() << "Entry null\n"); 732 if (Exit) 733 CHR_DEBUG(dbgs() << "Exit " << Exit->getName() << "\n"); 734 else 735 CHR_DEBUG(dbgs() << "Exit null\n"); 736 // Exclude cases where Entry is part of a subregion (hence it doesn't belong 737 // to this region). 738 bool EntryInSubregion = RI.getRegionFor(Entry) != R; 739 if (EntryInSubregion) 740 return nullptr; 741 // Exclude loops 742 for (BasicBlock *Pred : predecessors(Entry)) 743 if (R->contains(Pred)) 744 return nullptr; 745 // If any of the basic blocks have address taken, we must skip this region 746 // because we cannot clone basic blocks that have address taken. 747 for (BasicBlock *BB : R->blocks()) { 748 if (BB->hasAddressTaken()) 749 return nullptr; 750 // If we encounter llvm.coro.id, skip this region because if the basic block 751 // is cloned, we end up inserting a token type PHI node to the block with 752 // llvm.coro.begin. 753 // FIXME: This could lead to less optimal codegen, because the region is 754 // excluded, it can prevent CHR from merging adjacent regions into bigger 755 // scope and hoisting more branches. 756 for (Instruction &I : *BB) 757 if (auto *II = dyn_cast<IntrinsicInst>(&I)) 758 if (II->getIntrinsicID() == Intrinsic::coro_id) 759 return nullptr; 760 } 761 762 if (Exit) { 763 // Try to find an if-then block (check if R is an if-then). 764 // if (cond) { 765 // ... 766 // } 767 auto *BI = dyn_cast<BranchInst>(Entry->getTerminator()); 768 if (BI) 769 CHR_DEBUG(dbgs() << "BI.isConditional " << BI->isConditional() << "\n"); 770 else 771 CHR_DEBUG(dbgs() << "BI null\n"); 772 if (BI && BI->isConditional()) { 773 BasicBlock *S0 = BI->getSuccessor(0); 774 BasicBlock *S1 = BI->getSuccessor(1); 775 CHR_DEBUG(dbgs() << "S0 " << S0->getName() << "\n"); 776 CHR_DEBUG(dbgs() << "S1 " << S1->getName() << "\n"); 777 if (S0 != S1 && (S0 == Exit || S1 == Exit)) { 778 RegInfo RI(R); 779 RI.HasBranch = checkBiasedBranch( 780 BI, R, TrueBiasedRegionsGlobal, FalseBiasedRegionsGlobal, 781 BranchBiasMap); 782 Result = new CHRScope(RI); 783 Scopes.insert(Result); 784 CHR_DEBUG(dbgs() << "Found a region with a branch\n"); 785 ++Stats.NumBranches; 786 if (!RI.HasBranch) { 787 ORE.emit([&]() { 788 return OptimizationRemarkMissed(DEBUG_TYPE, "BranchNotBiased", BI) 789 << "Branch not biased"; 790 }); 791 } 792 } 793 } 794 } 795 { 796 // Try to look for selects in the direct child blocks (as opposed to in 797 // subregions) of R. 798 // ... 799 // if (..) { // Some subregion 800 // ... 801 // } 802 // if (..) { // Some subregion 803 // ... 804 // } 805 // ... 806 // a = cond ? b : c; 807 // ... 808 SmallVector<SelectInst *, 8> Selects; 809 for (RegionNode *E : R->elements()) { 810 if (E->isSubRegion()) 811 continue; 812 // This returns the basic block of E if E is a direct child of R (not a 813 // subregion.) 814 BasicBlock *BB = E->getEntry(); 815 // Need to push in the order to make it easier to find the first Select 816 // later. 817 for (Instruction &I : *BB) { 818 if (auto *SI = dyn_cast<SelectInst>(&I)) { 819 Selects.push_back(SI); 820 ++Stats.NumBranches; 821 } 822 } 823 } 824 if (Selects.size() > 0) { 825 auto AddSelects = [&](RegInfo &RI) { 826 for (auto *SI : Selects) 827 if (checkBiasedSelect(SI, RI.R, 828 TrueBiasedSelectsGlobal, 829 FalseBiasedSelectsGlobal, 830 SelectBiasMap)) 831 RI.Selects.push_back(SI); 832 else 833 ORE.emit([&]() { 834 return OptimizationRemarkMissed(DEBUG_TYPE, "SelectNotBiased", SI) 835 << "Select not biased"; 836 }); 837 }; 838 if (!Result) { 839 CHR_DEBUG(dbgs() << "Found a select-only region\n"); 840 RegInfo RI(R); 841 AddSelects(RI); 842 Result = new CHRScope(RI); 843 Scopes.insert(Result); 844 } else { 845 CHR_DEBUG(dbgs() << "Found select(s) in a region with a branch\n"); 846 AddSelects(Result->RegInfos[0]); 847 } 848 } 849 } 850 851 if (Result) { 852 checkScopeHoistable(Result); 853 } 854 return Result; 855 } 856 857 // Check that any of the branch and the selects in the region could be 858 // hoisted above the the CHR branch insert point (the most dominating of 859 // them, either the branch (at the end of the first block) or the first 860 // select in the first block). If the branch can't be hoisted, drop the 861 // selects in the first blocks. 862 // 863 // For example, for the following scope/region with selects, we want to insert 864 // the merged branch right before the first select in the first/entry block by 865 // hoisting c1, c2, c3, and c4. 866 // 867 // // Branch insert point here. 868 // a = c1 ? b : c; // Select 1 869 // d = c2 ? e : f; // Select 2 870 // if (c3) { // Branch 871 // ... 872 // c4 = foo() // A call. 873 // g = c4 ? h : i; // Select 3 874 // } 875 // 876 // But suppose we can't hoist c4 because it's dependent on the preceding 877 // call. Then, we drop Select 3. Furthermore, if we can't hoist c2, we also drop 878 // Select 2. If we can't hoist c3, we drop Selects 1 & 2. 879 void CHR::checkScopeHoistable(CHRScope *Scope) { 880 RegInfo &RI = Scope->RegInfos[0]; 881 Region *R = RI.R; 882 BasicBlock *EntryBB = R->getEntry(); 883 auto *Branch = RI.HasBranch ? 884 cast<BranchInst>(EntryBB->getTerminator()) : nullptr; 885 SmallVector<SelectInst *, 8> &Selects = RI.Selects; 886 if (RI.HasBranch || !Selects.empty()) { 887 Instruction *InsertPoint = getBranchInsertPoint(RI); 888 CHR_DEBUG(dbgs() << "InsertPoint " << *InsertPoint << "\n"); 889 // Avoid a data dependence from a select or a branch to a(nother) 890 // select. Note no instruction can't data-depend on a branch (a branch 891 // instruction doesn't produce a value). 892 DenseSet<Instruction *> Unhoistables; 893 // Initialize Unhoistables with the selects. 894 for (SelectInst *SI : Selects) { 895 Unhoistables.insert(SI); 896 } 897 // Remove Selects that can't be hoisted. 898 for (auto it = Selects.begin(); it != Selects.end(); ) { 899 SelectInst *SI = *it; 900 if (SI == InsertPoint) { 901 ++it; 902 continue; 903 } 904 DenseMap<Instruction *, bool> Visited; 905 bool IsHoistable = checkHoistValue(SI->getCondition(), InsertPoint, 906 DT, Unhoistables, nullptr, Visited); 907 if (!IsHoistable) { 908 CHR_DEBUG(dbgs() << "Dropping select " << *SI << "\n"); 909 ORE.emit([&]() { 910 return OptimizationRemarkMissed(DEBUG_TYPE, 911 "DropUnhoistableSelect", SI) 912 << "Dropped unhoistable select"; 913 }); 914 it = Selects.erase(it); 915 // Since we are dropping the select here, we also drop it from 916 // Unhoistables. 917 Unhoistables.erase(SI); 918 } else 919 ++it; 920 } 921 // Update InsertPoint after potentially removing selects. 922 InsertPoint = getBranchInsertPoint(RI); 923 CHR_DEBUG(dbgs() << "InsertPoint " << *InsertPoint << "\n"); 924 if (RI.HasBranch && InsertPoint != Branch) { 925 DenseMap<Instruction *, bool> Visited; 926 bool IsHoistable = checkHoistValue(Branch->getCondition(), InsertPoint, 927 DT, Unhoistables, nullptr, Visited); 928 if (!IsHoistable) { 929 // If the branch isn't hoistable, drop the selects in the entry 930 // block, preferring the branch, which makes the branch the hoist 931 // point. 932 assert(InsertPoint != Branch && "Branch must not be the hoist point"); 933 CHR_DEBUG(dbgs() << "Dropping selects in entry block \n"); 934 CHR_DEBUG( 935 for (SelectInst *SI : Selects) { 936 dbgs() << "SI " << *SI << "\n"; 937 }); 938 for (SelectInst *SI : Selects) { 939 ORE.emit([&]() { 940 return OptimizationRemarkMissed(DEBUG_TYPE, 941 "DropSelectUnhoistableBranch", SI) 942 << "Dropped select due to unhoistable branch"; 943 }); 944 } 945 llvm::erase_if(Selects, [EntryBB](SelectInst *SI) { 946 return SI->getParent() == EntryBB; 947 }); 948 Unhoistables.clear(); 949 InsertPoint = Branch; 950 } 951 } 952 CHR_DEBUG(dbgs() << "InsertPoint " << *InsertPoint << "\n"); 953 #ifndef NDEBUG 954 if (RI.HasBranch) { 955 assert(!DT.dominates(Branch, InsertPoint) && 956 "Branch can't be already above the hoist point"); 957 DenseMap<Instruction *, bool> Visited; 958 assert(checkHoistValue(Branch->getCondition(), InsertPoint, 959 DT, Unhoistables, nullptr, Visited) && 960 "checkHoistValue for branch"); 961 } 962 for (auto *SI : Selects) { 963 assert(!DT.dominates(SI, InsertPoint) && 964 "SI can't be already above the hoist point"); 965 DenseMap<Instruction *, bool> Visited; 966 assert(checkHoistValue(SI->getCondition(), InsertPoint, DT, 967 Unhoistables, nullptr, Visited) && 968 "checkHoistValue for selects"); 969 } 970 CHR_DEBUG(dbgs() << "Result\n"); 971 if (RI.HasBranch) { 972 CHR_DEBUG(dbgs() << "BI " << *Branch << "\n"); 973 } 974 for (auto *SI : Selects) { 975 CHR_DEBUG(dbgs() << "SI " << *SI << "\n"); 976 } 977 #endif 978 } 979 } 980 981 // Traverse the region tree, find all nested scopes and merge them if possible. 982 CHRScope * CHR::findScopes(Region *R, Region *NextRegion, Region *ParentRegion, 983 SmallVectorImpl<CHRScope *> &Scopes) { 984 CHR_DEBUG(dbgs() << "findScopes " << R->getNameStr() << "\n"); 985 CHRScope *Result = findScope(R); 986 // Visit subscopes. 987 CHRScope *ConsecutiveSubscope = nullptr; 988 SmallVector<CHRScope *, 8> Subscopes; 989 for (auto It = R->begin(); It != R->end(); ++It) { 990 const std::unique_ptr<Region> &SubR = *It; 991 auto NextIt = std::next(It); 992 Region *NextSubR = NextIt != R->end() ? NextIt->get() : nullptr; 993 CHR_DEBUG(dbgs() << "Looking at subregion " << SubR.get()->getNameStr() 994 << "\n"); 995 CHRScope *SubCHRScope = findScopes(SubR.get(), NextSubR, R, Scopes); 996 if (SubCHRScope) { 997 CHR_DEBUG(dbgs() << "Subregion Scope " << *SubCHRScope << "\n"); 998 } else { 999 CHR_DEBUG(dbgs() << "Subregion Scope null\n"); 1000 } 1001 if (SubCHRScope) { 1002 if (!ConsecutiveSubscope) 1003 ConsecutiveSubscope = SubCHRScope; 1004 else if (!ConsecutiveSubscope->appendable(SubCHRScope)) { 1005 Subscopes.push_back(ConsecutiveSubscope); 1006 ConsecutiveSubscope = SubCHRScope; 1007 } else 1008 ConsecutiveSubscope->append(SubCHRScope); 1009 } else { 1010 if (ConsecutiveSubscope) { 1011 Subscopes.push_back(ConsecutiveSubscope); 1012 } 1013 ConsecutiveSubscope = nullptr; 1014 } 1015 } 1016 if (ConsecutiveSubscope) { 1017 Subscopes.push_back(ConsecutiveSubscope); 1018 } 1019 for (CHRScope *Sub : Subscopes) { 1020 if (Result) { 1021 // Combine it with the parent. 1022 Result->addSub(Sub); 1023 } else { 1024 // Push Subscopes as they won't be combined with the parent. 1025 Scopes.push_back(Sub); 1026 } 1027 } 1028 return Result; 1029 } 1030 1031 static DenseSet<Value *> getCHRConditionValuesForRegion(RegInfo &RI) { 1032 DenseSet<Value *> ConditionValues; 1033 if (RI.HasBranch) { 1034 auto *BI = cast<BranchInst>(RI.R->getEntry()->getTerminator()); 1035 ConditionValues.insert(BI->getCondition()); 1036 } 1037 for (SelectInst *SI : RI.Selects) { 1038 ConditionValues.insert(SI->getCondition()); 1039 } 1040 return ConditionValues; 1041 } 1042 1043 1044 // Determine whether to split a scope depending on the sets of the branch 1045 // condition values of the previous region and the current region. We split 1046 // (return true) it if 1) the condition values of the inner/lower scope can't be 1047 // hoisted up to the outer/upper scope, or 2) the two sets of the condition 1048 // values have an empty intersection (because the combined branch conditions 1049 // won't probably lead to a simpler combined condition). 1050 static bool shouldSplit(Instruction *InsertPoint, 1051 DenseSet<Value *> &PrevConditionValues, 1052 DenseSet<Value *> &ConditionValues, 1053 DominatorTree &DT, 1054 DenseSet<Instruction *> &Unhoistables) { 1055 assert(InsertPoint && "Null InsertPoint"); 1056 CHR_DEBUG( 1057 dbgs() << "shouldSplit " << *InsertPoint << " PrevConditionValues "; 1058 for (Value *V : PrevConditionValues) { 1059 dbgs() << *V << ", "; 1060 } 1061 dbgs() << " ConditionValues "; 1062 for (Value *V : ConditionValues) { 1063 dbgs() << *V << ", "; 1064 } 1065 dbgs() << "\n"); 1066 // If any of Bases isn't hoistable to the hoist point, split. 1067 for (Value *V : ConditionValues) { 1068 DenseMap<Instruction *, bool> Visited; 1069 if (!checkHoistValue(V, InsertPoint, DT, Unhoistables, nullptr, Visited)) { 1070 CHR_DEBUG(dbgs() << "Split. checkHoistValue false " << *V << "\n"); 1071 return true; // Not hoistable, split. 1072 } 1073 } 1074 // If PrevConditionValues or ConditionValues is empty, don't split to avoid 1075 // unnecessary splits at scopes with no branch/selects. If 1076 // PrevConditionValues and ConditionValues don't intersect at all, split. 1077 if (!PrevConditionValues.empty() && !ConditionValues.empty()) { 1078 // Use std::set as DenseSet doesn't work with set_intersection. 1079 std::set<Value *> PrevBases, Bases; 1080 DenseMap<Value *, std::set<Value *>> Visited; 1081 for (Value *V : PrevConditionValues) { 1082 const std::set<Value *> &BaseValues = getBaseValues(V, DT, Visited); 1083 PrevBases.insert(BaseValues.begin(), BaseValues.end()); 1084 } 1085 for (Value *V : ConditionValues) { 1086 const std::set<Value *> &BaseValues = getBaseValues(V, DT, Visited); 1087 Bases.insert(BaseValues.begin(), BaseValues.end()); 1088 } 1089 CHR_DEBUG( 1090 dbgs() << "PrevBases "; 1091 for (Value *V : PrevBases) { 1092 dbgs() << *V << ", "; 1093 } 1094 dbgs() << " Bases "; 1095 for (Value *V : Bases) { 1096 dbgs() << *V << ", "; 1097 } 1098 dbgs() << "\n"); 1099 std::vector<Value *> Intersection; 1100 std::set_intersection(PrevBases.begin(), PrevBases.end(), Bases.begin(), 1101 Bases.end(), std::back_inserter(Intersection)); 1102 if (Intersection.empty()) { 1103 // Empty intersection, split. 1104 CHR_DEBUG(dbgs() << "Split. Intersection empty\n"); 1105 return true; 1106 } 1107 } 1108 CHR_DEBUG(dbgs() << "No split\n"); 1109 return false; // Don't split. 1110 } 1111 1112 static void getSelectsInScope(CHRScope *Scope, 1113 DenseSet<Instruction *> &Output) { 1114 for (RegInfo &RI : Scope->RegInfos) 1115 for (SelectInst *SI : RI.Selects) 1116 Output.insert(SI); 1117 for (CHRScope *Sub : Scope->Subs) 1118 getSelectsInScope(Sub, Output); 1119 } 1120 1121 void CHR::splitScopes(SmallVectorImpl<CHRScope *> &Input, 1122 SmallVectorImpl<CHRScope *> &Output) { 1123 for (CHRScope *Scope : Input) { 1124 assert(!Scope->BranchInsertPoint && 1125 "BranchInsertPoint must not be set"); 1126 DenseSet<Instruction *> Unhoistables; 1127 getSelectsInScope(Scope, Unhoistables); 1128 splitScope(Scope, nullptr, nullptr, nullptr, Output, Unhoistables); 1129 } 1130 #ifndef NDEBUG 1131 for (CHRScope *Scope : Output) { 1132 assert(Scope->BranchInsertPoint && "BranchInsertPoint must be set"); 1133 } 1134 #endif 1135 } 1136 1137 SmallVector<CHRScope *, 8> CHR::splitScope( 1138 CHRScope *Scope, 1139 CHRScope *Outer, 1140 DenseSet<Value *> *OuterConditionValues, 1141 Instruction *OuterInsertPoint, 1142 SmallVectorImpl<CHRScope *> &Output, 1143 DenseSet<Instruction *> &Unhoistables) { 1144 if (Outer) { 1145 assert(OuterConditionValues && "Null OuterConditionValues"); 1146 assert(OuterInsertPoint && "Null OuterInsertPoint"); 1147 } 1148 bool PrevSplitFromOuter = true; 1149 DenseSet<Value *> PrevConditionValues; 1150 Instruction *PrevInsertPoint = nullptr; 1151 SmallVector<CHRScope *, 8> Splits; 1152 SmallVector<bool, 8> SplitsSplitFromOuter; 1153 SmallVector<DenseSet<Value *>, 8> SplitsConditionValues; 1154 SmallVector<Instruction *, 8> SplitsInsertPoints; 1155 SmallVector<RegInfo, 8> RegInfos(Scope->RegInfos); // Copy 1156 for (RegInfo &RI : RegInfos) { 1157 Instruction *InsertPoint = getBranchInsertPoint(RI); 1158 DenseSet<Value *> ConditionValues = getCHRConditionValuesForRegion(RI); 1159 CHR_DEBUG( 1160 dbgs() << "ConditionValues "; 1161 for (Value *V : ConditionValues) { 1162 dbgs() << *V << ", "; 1163 } 1164 dbgs() << "\n"); 1165 if (RI.R == RegInfos[0].R) { 1166 // First iteration. Check to see if we should split from the outer. 1167 if (Outer) { 1168 CHR_DEBUG(dbgs() << "Outer " << *Outer << "\n"); 1169 CHR_DEBUG(dbgs() << "Should split from outer at " 1170 << RI.R->getNameStr() << "\n"); 1171 if (shouldSplit(OuterInsertPoint, *OuterConditionValues, 1172 ConditionValues, DT, Unhoistables)) { 1173 PrevConditionValues = ConditionValues; 1174 PrevInsertPoint = InsertPoint; 1175 ORE.emit([&]() { 1176 return OptimizationRemarkMissed(DEBUG_TYPE, 1177 "SplitScopeFromOuter", 1178 RI.R->getEntry()->getTerminator()) 1179 << "Split scope from outer due to unhoistable branch/select " 1180 << "and/or lack of common condition values"; 1181 }); 1182 } else { 1183 // Not splitting from the outer. Use the outer bases and insert 1184 // point. Union the bases. 1185 PrevSplitFromOuter = false; 1186 PrevConditionValues = *OuterConditionValues; 1187 PrevConditionValues.insert(ConditionValues.begin(), 1188 ConditionValues.end()); 1189 PrevInsertPoint = OuterInsertPoint; 1190 } 1191 } else { 1192 CHR_DEBUG(dbgs() << "Outer null\n"); 1193 PrevConditionValues = ConditionValues; 1194 PrevInsertPoint = InsertPoint; 1195 } 1196 } else { 1197 CHR_DEBUG(dbgs() << "Should split from prev at " 1198 << RI.R->getNameStr() << "\n"); 1199 if (shouldSplit(PrevInsertPoint, PrevConditionValues, ConditionValues, 1200 DT, Unhoistables)) { 1201 CHRScope *Tail = Scope->split(RI.R); 1202 Scopes.insert(Tail); 1203 Splits.push_back(Scope); 1204 SplitsSplitFromOuter.push_back(PrevSplitFromOuter); 1205 SplitsConditionValues.push_back(PrevConditionValues); 1206 SplitsInsertPoints.push_back(PrevInsertPoint); 1207 Scope = Tail; 1208 PrevConditionValues = ConditionValues; 1209 PrevInsertPoint = InsertPoint; 1210 PrevSplitFromOuter = true; 1211 ORE.emit([&]() { 1212 return OptimizationRemarkMissed(DEBUG_TYPE, 1213 "SplitScopeFromPrev", 1214 RI.R->getEntry()->getTerminator()) 1215 << "Split scope from previous due to unhoistable branch/select " 1216 << "and/or lack of common condition values"; 1217 }); 1218 } else { 1219 // Not splitting. Union the bases. Keep the hoist point. 1220 PrevConditionValues.insert(ConditionValues.begin(), ConditionValues.end()); 1221 } 1222 } 1223 } 1224 Splits.push_back(Scope); 1225 SplitsSplitFromOuter.push_back(PrevSplitFromOuter); 1226 SplitsConditionValues.push_back(PrevConditionValues); 1227 assert(PrevInsertPoint && "Null PrevInsertPoint"); 1228 SplitsInsertPoints.push_back(PrevInsertPoint); 1229 assert(Splits.size() == SplitsConditionValues.size() && 1230 Splits.size() == SplitsSplitFromOuter.size() && 1231 Splits.size() == SplitsInsertPoints.size() && "Mismatching sizes"); 1232 for (size_t I = 0; I < Splits.size(); ++I) { 1233 CHRScope *Split = Splits[I]; 1234 DenseSet<Value *> &SplitConditionValues = SplitsConditionValues[I]; 1235 Instruction *SplitInsertPoint = SplitsInsertPoints[I]; 1236 SmallVector<CHRScope *, 8> NewSubs; 1237 DenseSet<Instruction *> SplitUnhoistables; 1238 getSelectsInScope(Split, SplitUnhoistables); 1239 for (CHRScope *Sub : Split->Subs) { 1240 SmallVector<CHRScope *, 8> SubSplits = splitScope( 1241 Sub, Split, &SplitConditionValues, SplitInsertPoint, Output, 1242 SplitUnhoistables); 1243 llvm::append_range(NewSubs, SubSplits); 1244 } 1245 Split->Subs = NewSubs; 1246 } 1247 SmallVector<CHRScope *, 8> Result; 1248 for (size_t I = 0; I < Splits.size(); ++I) { 1249 CHRScope *Split = Splits[I]; 1250 if (SplitsSplitFromOuter[I]) { 1251 // Split from the outer. 1252 Output.push_back(Split); 1253 Split->BranchInsertPoint = SplitsInsertPoints[I]; 1254 CHR_DEBUG(dbgs() << "BranchInsertPoint " << *SplitsInsertPoints[I] 1255 << "\n"); 1256 } else { 1257 // Connected to the outer. 1258 Result.push_back(Split); 1259 } 1260 } 1261 if (!Outer) 1262 assert(Result.empty() && 1263 "If no outer (top-level), must return no nested ones"); 1264 return Result; 1265 } 1266 1267 void CHR::classifyBiasedScopes(SmallVectorImpl<CHRScope *> &Scopes) { 1268 for (CHRScope *Scope : Scopes) { 1269 assert(Scope->TrueBiasedRegions.empty() && Scope->FalseBiasedRegions.empty() && "Empty"); 1270 classifyBiasedScopes(Scope, Scope); 1271 CHR_DEBUG( 1272 dbgs() << "classifyBiasedScopes " << *Scope << "\n"; 1273 dbgs() << "TrueBiasedRegions "; 1274 for (Region *R : Scope->TrueBiasedRegions) { 1275 dbgs() << R->getNameStr() << ", "; 1276 } 1277 dbgs() << "\n"; 1278 dbgs() << "FalseBiasedRegions "; 1279 for (Region *R : Scope->FalseBiasedRegions) { 1280 dbgs() << R->getNameStr() << ", "; 1281 } 1282 dbgs() << "\n"; 1283 dbgs() << "TrueBiasedSelects "; 1284 for (SelectInst *SI : Scope->TrueBiasedSelects) { 1285 dbgs() << *SI << ", "; 1286 } 1287 dbgs() << "\n"; 1288 dbgs() << "FalseBiasedSelects "; 1289 for (SelectInst *SI : Scope->FalseBiasedSelects) { 1290 dbgs() << *SI << ", "; 1291 } 1292 dbgs() << "\n";); 1293 } 1294 } 1295 1296 void CHR::classifyBiasedScopes(CHRScope *Scope, CHRScope *OutermostScope) { 1297 for (RegInfo &RI : Scope->RegInfos) { 1298 if (RI.HasBranch) { 1299 Region *R = RI.R; 1300 if (TrueBiasedRegionsGlobal.contains(R)) 1301 OutermostScope->TrueBiasedRegions.insert(R); 1302 else if (FalseBiasedRegionsGlobal.contains(R)) 1303 OutermostScope->FalseBiasedRegions.insert(R); 1304 else 1305 llvm_unreachable("Must be biased"); 1306 } 1307 for (SelectInst *SI : RI.Selects) { 1308 if (TrueBiasedSelectsGlobal.contains(SI)) 1309 OutermostScope->TrueBiasedSelects.insert(SI); 1310 else if (FalseBiasedSelectsGlobal.contains(SI)) 1311 OutermostScope->FalseBiasedSelects.insert(SI); 1312 else 1313 llvm_unreachable("Must be biased"); 1314 } 1315 } 1316 for (CHRScope *Sub : Scope->Subs) { 1317 classifyBiasedScopes(Sub, OutermostScope); 1318 } 1319 } 1320 1321 static bool hasAtLeastTwoBiasedBranches(CHRScope *Scope) { 1322 unsigned NumBiased = Scope->TrueBiasedRegions.size() + 1323 Scope->FalseBiasedRegions.size() + 1324 Scope->TrueBiasedSelects.size() + 1325 Scope->FalseBiasedSelects.size(); 1326 return NumBiased >= CHRMergeThreshold; 1327 } 1328 1329 void CHR::filterScopes(SmallVectorImpl<CHRScope *> &Input, 1330 SmallVectorImpl<CHRScope *> &Output) { 1331 for (CHRScope *Scope : Input) { 1332 // Filter out the ones with only one region and no subs. 1333 if (!hasAtLeastTwoBiasedBranches(Scope)) { 1334 CHR_DEBUG(dbgs() << "Filtered out by biased branches truthy-regions " 1335 << Scope->TrueBiasedRegions.size() 1336 << " falsy-regions " << Scope->FalseBiasedRegions.size() 1337 << " true-selects " << Scope->TrueBiasedSelects.size() 1338 << " false-selects " << Scope->FalseBiasedSelects.size() << "\n"); 1339 ORE.emit([&]() { 1340 return OptimizationRemarkMissed( 1341 DEBUG_TYPE, 1342 "DropScopeWithOneBranchOrSelect", 1343 Scope->RegInfos[0].R->getEntry()->getTerminator()) 1344 << "Drop scope with < " 1345 << ore::NV("CHRMergeThreshold", CHRMergeThreshold) 1346 << " biased branch(es) or select(s)"; 1347 }); 1348 continue; 1349 } 1350 Output.push_back(Scope); 1351 } 1352 } 1353 1354 void CHR::setCHRRegions(SmallVectorImpl<CHRScope *> &Input, 1355 SmallVectorImpl<CHRScope *> &Output) { 1356 for (CHRScope *Scope : Input) { 1357 assert(Scope->HoistStopMap.empty() && Scope->CHRRegions.empty() && 1358 "Empty"); 1359 setCHRRegions(Scope, Scope); 1360 Output.push_back(Scope); 1361 CHR_DEBUG( 1362 dbgs() << "setCHRRegions HoistStopMap " << *Scope << "\n"; 1363 for (auto pair : Scope->HoistStopMap) { 1364 Region *R = pair.first; 1365 dbgs() << "Region " << R->getNameStr() << "\n"; 1366 for (Instruction *I : pair.second) { 1367 dbgs() << "HoistStop " << *I << "\n"; 1368 } 1369 } 1370 dbgs() << "CHRRegions" << "\n"; 1371 for (RegInfo &RI : Scope->CHRRegions) { 1372 dbgs() << RI.R->getNameStr() << "\n"; 1373 }); 1374 } 1375 } 1376 1377 void CHR::setCHRRegions(CHRScope *Scope, CHRScope *OutermostScope) { 1378 DenseSet<Instruction *> Unhoistables; 1379 // Put the biased selects in Unhoistables because they should stay where they 1380 // are and constant-folded after CHR (in case one biased select or a branch 1381 // can depend on another biased select.) 1382 for (RegInfo &RI : Scope->RegInfos) { 1383 for (SelectInst *SI : RI.Selects) { 1384 Unhoistables.insert(SI); 1385 } 1386 } 1387 Instruction *InsertPoint = OutermostScope->BranchInsertPoint; 1388 for (RegInfo &RI : Scope->RegInfos) { 1389 Region *R = RI.R; 1390 DenseSet<Instruction *> HoistStops; 1391 bool IsHoisted = false; 1392 if (RI.HasBranch) { 1393 assert((OutermostScope->TrueBiasedRegions.contains(R) || 1394 OutermostScope->FalseBiasedRegions.contains(R)) && 1395 "Must be truthy or falsy"); 1396 auto *BI = cast<BranchInst>(R->getEntry()->getTerminator()); 1397 // Note checkHoistValue fills in HoistStops. 1398 DenseMap<Instruction *, bool> Visited; 1399 bool IsHoistable = checkHoistValue(BI->getCondition(), InsertPoint, DT, 1400 Unhoistables, &HoistStops, Visited); 1401 assert(IsHoistable && "Must be hoistable"); 1402 (void)(IsHoistable); // Unused in release build 1403 IsHoisted = true; 1404 } 1405 for (SelectInst *SI : RI.Selects) { 1406 assert((OutermostScope->TrueBiasedSelects.contains(SI) || 1407 OutermostScope->FalseBiasedSelects.contains(SI)) && 1408 "Must be true or false biased"); 1409 // Note checkHoistValue fills in HoistStops. 1410 DenseMap<Instruction *, bool> Visited; 1411 bool IsHoistable = checkHoistValue(SI->getCondition(), InsertPoint, DT, 1412 Unhoistables, &HoistStops, Visited); 1413 assert(IsHoistable && "Must be hoistable"); 1414 (void)(IsHoistable); // Unused in release build 1415 IsHoisted = true; 1416 } 1417 if (IsHoisted) { 1418 OutermostScope->CHRRegions.push_back(RI); 1419 OutermostScope->HoistStopMap[R] = HoistStops; 1420 } 1421 } 1422 for (CHRScope *Sub : Scope->Subs) 1423 setCHRRegions(Sub, OutermostScope); 1424 } 1425 1426 static bool CHRScopeSorter(CHRScope *Scope1, CHRScope *Scope2) { 1427 return Scope1->RegInfos[0].R->getDepth() < Scope2->RegInfos[0].R->getDepth(); 1428 } 1429 1430 void CHR::sortScopes(SmallVectorImpl<CHRScope *> &Input, 1431 SmallVectorImpl<CHRScope *> &Output) { 1432 Output.resize(Input.size()); 1433 llvm::copy(Input, Output.begin()); 1434 llvm::stable_sort(Output, CHRScopeSorter); 1435 } 1436 1437 // Return true if V is already hoisted or was hoisted (along with its operands) 1438 // to the insert point. 1439 static void hoistValue(Value *V, Instruction *HoistPoint, Region *R, 1440 HoistStopMapTy &HoistStopMap, 1441 DenseSet<Instruction *> &HoistedSet, 1442 DenseSet<PHINode *> &TrivialPHIs, 1443 DominatorTree &DT) { 1444 auto IT = HoistStopMap.find(R); 1445 assert(IT != HoistStopMap.end() && "Region must be in hoist stop map"); 1446 DenseSet<Instruction *> &HoistStops = IT->second; 1447 if (auto *I = dyn_cast<Instruction>(V)) { 1448 if (I == HoistPoint) 1449 return; 1450 if (HoistStops.count(I)) 1451 return; 1452 if (auto *PN = dyn_cast<PHINode>(I)) 1453 if (TrivialPHIs.count(PN)) 1454 // The trivial phi inserted by the previous CHR scope could replace a 1455 // non-phi in HoistStops. Note that since this phi is at the exit of a 1456 // previous CHR scope, which dominates this scope, it's safe to stop 1457 // hoisting there. 1458 return; 1459 if (HoistedSet.count(I)) 1460 // Already hoisted, return. 1461 return; 1462 assert(isHoistableInstructionType(I) && "Unhoistable instruction type"); 1463 assert(DT.getNode(I->getParent()) && "DT must contain I's block"); 1464 assert(DT.getNode(HoistPoint->getParent()) && 1465 "DT must contain HoistPoint block"); 1466 if (DT.dominates(I, HoistPoint)) 1467 // We are already above the hoist point. Stop here. This may be necessary 1468 // when multiple scopes would independently hoist the same 1469 // instruction. Since an outer (dominating) scope would hoist it to its 1470 // entry before an inner (dominated) scope would to its entry, the inner 1471 // scope may see the instruction already hoisted, in which case it 1472 // potentially wrong for the inner scope to hoist it and could cause bad 1473 // IR (non-dominating def), but safe to skip hoisting it instead because 1474 // it's already in a block that dominates the inner scope. 1475 return; 1476 for (Value *Op : I->operands()) { 1477 hoistValue(Op, HoistPoint, R, HoistStopMap, HoistedSet, TrivialPHIs, DT); 1478 } 1479 I->moveBefore(HoistPoint); 1480 HoistedSet.insert(I); 1481 CHR_DEBUG(dbgs() << "hoistValue " << *I << "\n"); 1482 } 1483 } 1484 1485 // Hoist the dependent condition values of the branches and the selects in the 1486 // scope to the insert point. 1487 static void hoistScopeConditions(CHRScope *Scope, Instruction *HoistPoint, 1488 DenseSet<PHINode *> &TrivialPHIs, 1489 DominatorTree &DT) { 1490 DenseSet<Instruction *> HoistedSet; 1491 for (const RegInfo &RI : Scope->CHRRegions) { 1492 Region *R = RI.R; 1493 bool IsTrueBiased = Scope->TrueBiasedRegions.count(R); 1494 bool IsFalseBiased = Scope->FalseBiasedRegions.count(R); 1495 if (RI.HasBranch && (IsTrueBiased || IsFalseBiased)) { 1496 auto *BI = cast<BranchInst>(R->getEntry()->getTerminator()); 1497 hoistValue(BI->getCondition(), HoistPoint, R, Scope->HoistStopMap, 1498 HoistedSet, TrivialPHIs, DT); 1499 } 1500 for (SelectInst *SI : RI.Selects) { 1501 bool IsTrueBiased = Scope->TrueBiasedSelects.count(SI); 1502 bool IsFalseBiased = Scope->FalseBiasedSelects.count(SI); 1503 if (!(IsTrueBiased || IsFalseBiased)) 1504 continue; 1505 hoistValue(SI->getCondition(), HoistPoint, R, Scope->HoistStopMap, 1506 HoistedSet, TrivialPHIs, DT); 1507 } 1508 } 1509 } 1510 1511 // Negate the predicate if an ICmp if it's used only by branches or selects by 1512 // swapping the operands of the branches or the selects. Returns true if success. 1513 static bool negateICmpIfUsedByBranchOrSelectOnly(ICmpInst *ICmp, 1514 Instruction *ExcludedUser, 1515 CHRScope *Scope) { 1516 for (User *U : ICmp->users()) { 1517 if (U == ExcludedUser) 1518 continue; 1519 if (isa<BranchInst>(U) && cast<BranchInst>(U)->isConditional()) 1520 continue; 1521 if (isa<SelectInst>(U) && cast<SelectInst>(U)->getCondition() == ICmp) 1522 continue; 1523 return false; 1524 } 1525 for (User *U : ICmp->users()) { 1526 if (U == ExcludedUser) 1527 continue; 1528 if (auto *BI = dyn_cast<BranchInst>(U)) { 1529 assert(BI->isConditional() && "Must be conditional"); 1530 BI->swapSuccessors(); 1531 // Don't need to swap this in terms of 1532 // TrueBiasedRegions/FalseBiasedRegions because true-based/false-based 1533 // mean whehter the branch is likely go into the if-then rather than 1534 // successor0/successor1 and because we can tell which edge is the then or 1535 // the else one by comparing the destination to the region exit block. 1536 continue; 1537 } 1538 if (auto *SI = dyn_cast<SelectInst>(U)) { 1539 // Swap operands 1540 SI->swapValues(); 1541 SI->swapProfMetadata(); 1542 if (Scope->TrueBiasedSelects.count(SI)) { 1543 assert(!Scope->FalseBiasedSelects.contains(SI) && 1544 "Must not be already in"); 1545 Scope->FalseBiasedSelects.insert(SI); 1546 } else if (Scope->FalseBiasedSelects.count(SI)) { 1547 assert(!Scope->TrueBiasedSelects.contains(SI) && 1548 "Must not be already in"); 1549 Scope->TrueBiasedSelects.insert(SI); 1550 } 1551 continue; 1552 } 1553 llvm_unreachable("Must be a branch or a select"); 1554 } 1555 ICmp->setPredicate(CmpInst::getInversePredicate(ICmp->getPredicate())); 1556 return true; 1557 } 1558 1559 // A helper for transformScopes. Insert a trivial phi at the scope exit block 1560 // for a value that's defined in the scope but used outside it (meaning it's 1561 // alive at the exit block). 1562 static void insertTrivialPHIs(CHRScope *Scope, 1563 BasicBlock *EntryBlock, BasicBlock *ExitBlock, 1564 DenseSet<PHINode *> &TrivialPHIs) { 1565 SmallSetVector<BasicBlock *, 8> BlocksInScope; 1566 for (RegInfo &RI : Scope->RegInfos) { 1567 for (BasicBlock *BB : RI.R->blocks()) { // This includes the blocks in the 1568 // sub-Scopes. 1569 BlocksInScope.insert(BB); 1570 } 1571 } 1572 CHR_DEBUG({ 1573 dbgs() << "Inserting redundant phis\n"; 1574 for (BasicBlock *BB : BlocksInScope) 1575 dbgs() << "BlockInScope " << BB->getName() << "\n"; 1576 }); 1577 for (BasicBlock *BB : BlocksInScope) { 1578 for (Instruction &I : *BB) { 1579 SmallVector<Instruction *, 8> Users; 1580 for (User *U : I.users()) { 1581 if (auto *UI = dyn_cast<Instruction>(U)) { 1582 if (!BlocksInScope.contains(UI->getParent()) && 1583 // Unless there's already a phi for I at the exit block. 1584 !(isa<PHINode>(UI) && UI->getParent() == ExitBlock)) { 1585 CHR_DEBUG(dbgs() << "V " << I << "\n"); 1586 CHR_DEBUG(dbgs() << "Used outside scope by user " << *UI << "\n"); 1587 Users.push_back(UI); 1588 } else if (UI->getParent() == EntryBlock && isa<PHINode>(UI)) { 1589 // There's a loop backedge from a block that's dominated by this 1590 // scope to the entry block. 1591 CHR_DEBUG(dbgs() << "V " << I << "\n"); 1592 CHR_DEBUG(dbgs() 1593 << "Used at entry block (for a back edge) by a phi user " 1594 << *UI << "\n"); 1595 Users.push_back(UI); 1596 } 1597 } 1598 } 1599 if (Users.size() > 0) { 1600 // Insert a trivial phi for I (phi [&I, P0], [&I, P1], ...) at 1601 // ExitBlock. Replace I with the new phi in UI unless UI is another 1602 // phi at ExitBlock. 1603 PHINode *PN = PHINode::Create(I.getType(), pred_size(ExitBlock), "", 1604 &ExitBlock->front()); 1605 for (BasicBlock *Pred : predecessors(ExitBlock)) { 1606 PN->addIncoming(&I, Pred); 1607 } 1608 TrivialPHIs.insert(PN); 1609 CHR_DEBUG(dbgs() << "Insert phi " << *PN << "\n"); 1610 for (Instruction *UI : Users) { 1611 for (unsigned J = 0, NumOps = UI->getNumOperands(); J < NumOps; ++J) { 1612 if (UI->getOperand(J) == &I) { 1613 UI->setOperand(J, PN); 1614 } 1615 } 1616 CHR_DEBUG(dbgs() << "Updated user " << *UI << "\n"); 1617 } 1618 } 1619 } 1620 } 1621 } 1622 1623 // Assert that all the CHR regions of the scope have a biased branch or select. 1624 static void LLVM_ATTRIBUTE_UNUSED 1625 assertCHRRegionsHaveBiasedBranchOrSelect(CHRScope *Scope) { 1626 #ifndef NDEBUG 1627 auto HasBiasedBranchOrSelect = [](RegInfo &RI, CHRScope *Scope) { 1628 if (Scope->TrueBiasedRegions.count(RI.R) || 1629 Scope->FalseBiasedRegions.count(RI.R)) 1630 return true; 1631 for (SelectInst *SI : RI.Selects) 1632 if (Scope->TrueBiasedSelects.count(SI) || 1633 Scope->FalseBiasedSelects.count(SI)) 1634 return true; 1635 return false; 1636 }; 1637 for (RegInfo &RI : Scope->CHRRegions) { 1638 assert(HasBiasedBranchOrSelect(RI, Scope) && 1639 "Must have biased branch or select"); 1640 } 1641 #endif 1642 } 1643 1644 // Assert that all the condition values of the biased branches and selects have 1645 // been hoisted to the pre-entry block or outside of the scope. 1646 static void LLVM_ATTRIBUTE_UNUSED assertBranchOrSelectConditionHoisted( 1647 CHRScope *Scope, BasicBlock *PreEntryBlock) { 1648 CHR_DEBUG(dbgs() << "Biased regions condition values \n"); 1649 for (RegInfo &RI : Scope->CHRRegions) { 1650 Region *R = RI.R; 1651 bool IsTrueBiased = Scope->TrueBiasedRegions.count(R); 1652 bool IsFalseBiased = Scope->FalseBiasedRegions.count(R); 1653 if (RI.HasBranch && (IsTrueBiased || IsFalseBiased)) { 1654 auto *BI = cast<BranchInst>(R->getEntry()->getTerminator()); 1655 Value *V = BI->getCondition(); 1656 CHR_DEBUG(dbgs() << *V << "\n"); 1657 if (auto *I = dyn_cast<Instruction>(V)) { 1658 (void)(I); // Unused in release build. 1659 assert((I->getParent() == PreEntryBlock || 1660 !Scope->contains(I)) && 1661 "Must have been hoisted to PreEntryBlock or outside the scope"); 1662 } 1663 } 1664 for (SelectInst *SI : RI.Selects) { 1665 bool IsTrueBiased = Scope->TrueBiasedSelects.count(SI); 1666 bool IsFalseBiased = Scope->FalseBiasedSelects.count(SI); 1667 if (!(IsTrueBiased || IsFalseBiased)) 1668 continue; 1669 Value *V = SI->getCondition(); 1670 CHR_DEBUG(dbgs() << *V << "\n"); 1671 if (auto *I = dyn_cast<Instruction>(V)) { 1672 (void)(I); // Unused in release build. 1673 assert((I->getParent() == PreEntryBlock || 1674 !Scope->contains(I)) && 1675 "Must have been hoisted to PreEntryBlock or outside the scope"); 1676 } 1677 } 1678 } 1679 } 1680 1681 void CHR::transformScopes(CHRScope *Scope, DenseSet<PHINode *> &TrivialPHIs) { 1682 CHR_DEBUG(dbgs() << "transformScopes " << *Scope << "\n"); 1683 1684 assert(Scope->RegInfos.size() >= 1 && "Should have at least one Region"); 1685 1686 for (RegInfo &RI : Scope->RegInfos) { 1687 const Region *R = RI.R; 1688 unsigned Duplication = getRegionDuplicationCount(R); 1689 dbgs() << "Dup count for R=" << R << " is " << Duplication << "\n"; 1690 if (Duplication >= CHRDupThreshsold) { 1691 CHR_DEBUG(dbgs() << "Reached the dup threshold of " << Duplication 1692 << " for this region"); 1693 ORE.emit([&]() { 1694 return OptimizationRemarkMissed(DEBUG_TYPE, "DupThresholdReached", 1695 R->getEntry()->getTerminator()) 1696 << "Reached the duplication threshold for the region"; 1697 }); 1698 return; 1699 } 1700 } 1701 for (RegInfo &RI : Scope->RegInfos) { 1702 DuplicationCount[RI.R]++; 1703 } 1704 1705 Region *FirstRegion = Scope->RegInfos[0].R; 1706 BasicBlock *EntryBlock = FirstRegion->getEntry(); 1707 Region *LastRegion = Scope->RegInfos[Scope->RegInfos.size() - 1].R; 1708 BasicBlock *ExitBlock = LastRegion->getExit(); 1709 Optional<uint64_t> ProfileCount = BFI.getBlockProfileCount(EntryBlock); 1710 1711 if (ExitBlock) { 1712 // Insert a trivial phi at the exit block (where the CHR hot path and the 1713 // cold path merges) for a value that's defined in the scope but used 1714 // outside it (meaning it's alive at the exit block). We will add the 1715 // incoming values for the CHR cold paths to it below. Without this, we'd 1716 // miss updating phi's for such values unless there happens to already be a 1717 // phi for that value there. 1718 insertTrivialPHIs(Scope, EntryBlock, ExitBlock, TrivialPHIs); 1719 } 1720 1721 // Split the entry block of the first region. The new block becomes the new 1722 // entry block of the first region. The old entry block becomes the block to 1723 // insert the CHR branch into. Note DT gets updated. Since DT gets updated 1724 // through the split, we update the entry of the first region after the split, 1725 // and Region only points to the entry and the exit blocks, rather than 1726 // keeping everything in a list or set, the blocks membership and the 1727 // entry/exit blocks of the region are still valid after the split. 1728 CHR_DEBUG(dbgs() << "Splitting entry block " << EntryBlock->getName() 1729 << " at " << *Scope->BranchInsertPoint << "\n"); 1730 BasicBlock *NewEntryBlock = 1731 SplitBlock(EntryBlock, Scope->BranchInsertPoint, &DT); 1732 assert(NewEntryBlock->getSinglePredecessor() == EntryBlock && 1733 "NewEntryBlock's only pred must be EntryBlock"); 1734 FirstRegion->replaceEntryRecursive(NewEntryBlock); 1735 BasicBlock *PreEntryBlock = EntryBlock; 1736 1737 ValueToValueMapTy VMap; 1738 // Clone the blocks in the scope (excluding the PreEntryBlock) to split into a 1739 // hot path (originals) and a cold path (clones) and update the PHIs at the 1740 // exit block. 1741 cloneScopeBlocks(Scope, PreEntryBlock, ExitBlock, LastRegion, VMap); 1742 1743 // Replace the old (placeholder) branch with the new (merged) conditional 1744 // branch. 1745 BranchInst *MergedBr = createMergedBranch(PreEntryBlock, EntryBlock, 1746 NewEntryBlock, VMap); 1747 1748 #ifndef NDEBUG 1749 assertCHRRegionsHaveBiasedBranchOrSelect(Scope); 1750 #endif 1751 1752 // Hoist the conditional values of the branches/selects. 1753 hoistScopeConditions(Scope, PreEntryBlock->getTerminator(), TrivialPHIs, DT); 1754 1755 #ifndef NDEBUG 1756 assertBranchOrSelectConditionHoisted(Scope, PreEntryBlock); 1757 #endif 1758 1759 // Create the combined branch condition and constant-fold the branches/selects 1760 // in the hot path. 1761 fixupBranchesAndSelects(Scope, PreEntryBlock, MergedBr, 1762 ProfileCount.value_or(0)); 1763 } 1764 1765 // A helper for transformScopes. Clone the blocks in the scope (excluding the 1766 // PreEntryBlock) to split into a hot path and a cold path and update the PHIs 1767 // at the exit block. 1768 void CHR::cloneScopeBlocks(CHRScope *Scope, 1769 BasicBlock *PreEntryBlock, 1770 BasicBlock *ExitBlock, 1771 Region *LastRegion, 1772 ValueToValueMapTy &VMap) { 1773 // Clone all the blocks. The original blocks will be the hot-path 1774 // CHR-optimized code and the cloned blocks will be the original unoptimized 1775 // code. This is so that the block pointers from the 1776 // CHRScope/Region/RegionInfo can stay valid in pointing to the hot-path code 1777 // which CHR should apply to. 1778 SmallVector<BasicBlock*, 8> NewBlocks; 1779 for (RegInfo &RI : Scope->RegInfos) 1780 for (BasicBlock *BB : RI.R->blocks()) { // This includes the blocks in the 1781 // sub-Scopes. 1782 assert(BB != PreEntryBlock && "Don't copy the preetntry block"); 1783 BasicBlock *NewBB = CloneBasicBlock(BB, VMap, ".nonchr", &F); 1784 NewBlocks.push_back(NewBB); 1785 VMap[BB] = NewBB; 1786 } 1787 1788 // Place the cloned blocks right after the original blocks (right before the 1789 // exit block of.) 1790 if (ExitBlock) 1791 F.getBasicBlockList().splice(ExitBlock->getIterator(), 1792 F.getBasicBlockList(), 1793 NewBlocks[0]->getIterator(), F.end()); 1794 1795 // Update the cloned blocks/instructions to refer to themselves. 1796 for (BasicBlock *NewBB : NewBlocks) 1797 for (Instruction &I : *NewBB) 1798 RemapInstruction(&I, VMap, 1799 RF_NoModuleLevelChanges | RF_IgnoreMissingLocals); 1800 1801 // Add the cloned blocks to the PHIs of the exit blocks. ExitBlock is null for 1802 // the top-level region but we don't need to add PHIs. The trivial PHIs 1803 // inserted above will be updated here. 1804 if (ExitBlock) 1805 for (PHINode &PN : ExitBlock->phis()) 1806 for (unsigned I = 0, NumOps = PN.getNumIncomingValues(); I < NumOps; 1807 ++I) { 1808 BasicBlock *Pred = PN.getIncomingBlock(I); 1809 if (LastRegion->contains(Pred)) { 1810 Value *V = PN.getIncomingValue(I); 1811 auto It = VMap.find(V); 1812 if (It != VMap.end()) V = It->second; 1813 assert(VMap.find(Pred) != VMap.end() && "Pred must have been cloned"); 1814 PN.addIncoming(V, cast<BasicBlock>(VMap[Pred])); 1815 } 1816 } 1817 } 1818 1819 // A helper for transformScope. Replace the old (placeholder) branch with the 1820 // new (merged) conditional branch. 1821 BranchInst *CHR::createMergedBranch(BasicBlock *PreEntryBlock, 1822 BasicBlock *EntryBlock, 1823 BasicBlock *NewEntryBlock, 1824 ValueToValueMapTy &VMap) { 1825 BranchInst *OldBR = cast<BranchInst>(PreEntryBlock->getTerminator()); 1826 assert(OldBR->isUnconditional() && OldBR->getSuccessor(0) == NewEntryBlock && 1827 "SplitBlock did not work correctly!"); 1828 assert(NewEntryBlock->getSinglePredecessor() == EntryBlock && 1829 "NewEntryBlock's only pred must be EntryBlock"); 1830 assert(VMap.find(NewEntryBlock) != VMap.end() && 1831 "NewEntryBlock must have been copied"); 1832 OldBR->dropAllReferences(); 1833 OldBR->eraseFromParent(); 1834 // The true predicate is a placeholder. It will be replaced later in 1835 // fixupBranchesAndSelects(). 1836 BranchInst *NewBR = BranchInst::Create(NewEntryBlock, 1837 cast<BasicBlock>(VMap[NewEntryBlock]), 1838 ConstantInt::getTrue(F.getContext())); 1839 PreEntryBlock->getInstList().push_back(NewBR); 1840 assert(NewEntryBlock->getSinglePredecessor() == EntryBlock && 1841 "NewEntryBlock's only pred must be EntryBlock"); 1842 return NewBR; 1843 } 1844 1845 // A helper for transformScopes. Create the combined branch condition and 1846 // constant-fold the branches/selects in the hot path. 1847 void CHR::fixupBranchesAndSelects(CHRScope *Scope, 1848 BasicBlock *PreEntryBlock, 1849 BranchInst *MergedBR, 1850 uint64_t ProfileCount) { 1851 Value *MergedCondition = ConstantInt::getTrue(F.getContext()); 1852 BranchProbability CHRBranchBias(1, 1); 1853 uint64_t NumCHRedBranches = 0; 1854 IRBuilder<> IRB(PreEntryBlock->getTerminator()); 1855 for (RegInfo &RI : Scope->CHRRegions) { 1856 Region *R = RI.R; 1857 if (RI.HasBranch) { 1858 fixupBranch(R, Scope, IRB, MergedCondition, CHRBranchBias); 1859 ++NumCHRedBranches; 1860 } 1861 for (SelectInst *SI : RI.Selects) { 1862 fixupSelect(SI, Scope, IRB, MergedCondition, CHRBranchBias); 1863 ++NumCHRedBranches; 1864 } 1865 } 1866 Stats.NumBranchesDelta += NumCHRedBranches - 1; 1867 Stats.WeightedNumBranchesDelta += (NumCHRedBranches - 1) * ProfileCount; 1868 ORE.emit([&]() { 1869 return OptimizationRemark(DEBUG_TYPE, 1870 "CHR", 1871 // Refer to the hot (original) path 1872 MergedBR->getSuccessor(0)->getTerminator()) 1873 << "Merged " << ore::NV("NumCHRedBranches", NumCHRedBranches) 1874 << " branches or selects"; 1875 }); 1876 MergedBR->setCondition(MergedCondition); 1877 uint32_t Weights[] = { 1878 static_cast<uint32_t>(CHRBranchBias.scale(1000)), 1879 static_cast<uint32_t>(CHRBranchBias.getCompl().scale(1000)), 1880 }; 1881 MDBuilder MDB(F.getContext()); 1882 MergedBR->setMetadata(LLVMContext::MD_prof, MDB.createBranchWeights(Weights)); 1883 CHR_DEBUG(dbgs() << "CHR branch bias " << Weights[0] << ":" << Weights[1] 1884 << "\n"); 1885 } 1886 1887 // A helper for fixupBranchesAndSelects. Add to the combined branch condition 1888 // and constant-fold a branch in the hot path. 1889 void CHR::fixupBranch(Region *R, CHRScope *Scope, 1890 IRBuilder<> &IRB, 1891 Value *&MergedCondition, 1892 BranchProbability &CHRBranchBias) { 1893 bool IsTrueBiased = Scope->TrueBiasedRegions.count(R); 1894 assert((IsTrueBiased || Scope->FalseBiasedRegions.count(R)) && 1895 "Must be truthy or falsy"); 1896 auto *BI = cast<BranchInst>(R->getEntry()->getTerminator()); 1897 assert(BranchBiasMap.find(R) != BranchBiasMap.end() && 1898 "Must be in the bias map"); 1899 BranchProbability Bias = BranchBiasMap[R]; 1900 assert(Bias >= getCHRBiasThreshold() && "Must be highly biased"); 1901 // Take the min. 1902 if (CHRBranchBias > Bias) 1903 CHRBranchBias = Bias; 1904 BasicBlock *IfThen = BI->getSuccessor(1); 1905 BasicBlock *IfElse = BI->getSuccessor(0); 1906 BasicBlock *RegionExitBlock = R->getExit(); 1907 assert(RegionExitBlock && "Null ExitBlock"); 1908 assert((IfThen == RegionExitBlock || IfElse == RegionExitBlock) && 1909 IfThen != IfElse && "Invariant from findScopes"); 1910 if (IfThen == RegionExitBlock) { 1911 // Swap them so that IfThen means going into it and IfElse means skipping 1912 // it. 1913 std::swap(IfThen, IfElse); 1914 } 1915 CHR_DEBUG(dbgs() << "IfThen " << IfThen->getName() 1916 << " IfElse " << IfElse->getName() << "\n"); 1917 Value *Cond = BI->getCondition(); 1918 BasicBlock *HotTarget = IsTrueBiased ? IfThen : IfElse; 1919 bool ConditionTrue = HotTarget == BI->getSuccessor(0); 1920 addToMergedCondition(ConditionTrue, Cond, BI, Scope, IRB, 1921 MergedCondition); 1922 // Constant-fold the branch at ClonedEntryBlock. 1923 assert(ConditionTrue == (HotTarget == BI->getSuccessor(0)) && 1924 "The successor shouldn't change"); 1925 Value *NewCondition = ConditionTrue ? 1926 ConstantInt::getTrue(F.getContext()) : 1927 ConstantInt::getFalse(F.getContext()); 1928 BI->setCondition(NewCondition); 1929 } 1930 1931 // A helper for fixupBranchesAndSelects. Add to the combined branch condition 1932 // and constant-fold a select in the hot path. 1933 void CHR::fixupSelect(SelectInst *SI, CHRScope *Scope, 1934 IRBuilder<> &IRB, 1935 Value *&MergedCondition, 1936 BranchProbability &CHRBranchBias) { 1937 bool IsTrueBiased = Scope->TrueBiasedSelects.count(SI); 1938 assert((IsTrueBiased || 1939 Scope->FalseBiasedSelects.count(SI)) && "Must be biased"); 1940 assert(SelectBiasMap.find(SI) != SelectBiasMap.end() && 1941 "Must be in the bias map"); 1942 BranchProbability Bias = SelectBiasMap[SI]; 1943 assert(Bias >= getCHRBiasThreshold() && "Must be highly biased"); 1944 // Take the min. 1945 if (CHRBranchBias > Bias) 1946 CHRBranchBias = Bias; 1947 Value *Cond = SI->getCondition(); 1948 addToMergedCondition(IsTrueBiased, Cond, SI, Scope, IRB, 1949 MergedCondition); 1950 Value *NewCondition = IsTrueBiased ? 1951 ConstantInt::getTrue(F.getContext()) : 1952 ConstantInt::getFalse(F.getContext()); 1953 SI->setCondition(NewCondition); 1954 } 1955 1956 // A helper for fixupBranch/fixupSelect. Add a branch condition to the merged 1957 // condition. 1958 void CHR::addToMergedCondition(bool IsTrueBiased, Value *Cond, 1959 Instruction *BranchOrSelect, CHRScope *Scope, 1960 IRBuilder<> &IRB, Value *&MergedCondition) { 1961 if (!IsTrueBiased) { 1962 // If Cond is an icmp and all users of V except for BranchOrSelect is a 1963 // branch, negate the icmp predicate and swap the branch targets and avoid 1964 // inserting an Xor to negate Cond. 1965 auto *ICmp = dyn_cast<ICmpInst>(Cond); 1966 if (!ICmp || 1967 !negateICmpIfUsedByBranchOrSelectOnly(ICmp, BranchOrSelect, Scope)) 1968 Cond = IRB.CreateXor(ConstantInt::getTrue(F.getContext()), Cond); 1969 } 1970 1971 // Select conditions can be poison, while branching on poison is immediate 1972 // undefined behavior. As such, we need to freeze potentially poisonous 1973 // conditions derived from selects. 1974 if (isa<SelectInst>(BranchOrSelect) && 1975 !isGuaranteedNotToBeUndefOrPoison(Cond)) 1976 Cond = IRB.CreateFreeze(Cond); 1977 1978 // Use logical and to avoid propagating poison from later conditions. 1979 MergedCondition = IRB.CreateLogicalAnd(MergedCondition, Cond); 1980 } 1981 1982 void CHR::transformScopes(SmallVectorImpl<CHRScope *> &CHRScopes) { 1983 unsigned I = 0; 1984 DenseSet<PHINode *> TrivialPHIs; 1985 for (CHRScope *Scope : CHRScopes) { 1986 transformScopes(Scope, TrivialPHIs); 1987 CHR_DEBUG( 1988 std::ostringstream oss; 1989 oss << " after transformScopes " << I++; 1990 dumpIR(F, oss.str().c_str(), nullptr)); 1991 (void)I; 1992 } 1993 } 1994 1995 static void LLVM_ATTRIBUTE_UNUSED 1996 dumpScopes(SmallVectorImpl<CHRScope *> &Scopes, const char *Label) { 1997 dbgs() << Label << " " << Scopes.size() << "\n"; 1998 for (CHRScope *Scope : Scopes) { 1999 dbgs() << *Scope << "\n"; 2000 } 2001 } 2002 2003 bool CHR::run() { 2004 if (!shouldApply(F, PSI)) 2005 return false; 2006 2007 CHR_DEBUG(dumpIR(F, "before", nullptr)); 2008 2009 bool Changed = false; 2010 { 2011 CHR_DEBUG( 2012 dbgs() << "RegionInfo:\n"; 2013 RI.print(dbgs())); 2014 2015 // Recursively traverse the region tree and find regions that have biased 2016 // branches and/or selects and create scopes. 2017 SmallVector<CHRScope *, 8> AllScopes; 2018 findScopes(AllScopes); 2019 CHR_DEBUG(dumpScopes(AllScopes, "All scopes")); 2020 2021 // Split the scopes if 1) the conditional values of the biased 2022 // branches/selects of the inner/lower scope can't be hoisted up to the 2023 // outermost/uppermost scope entry, or 2) the condition values of the biased 2024 // branches/selects in a scope (including subscopes) don't share at least 2025 // one common value. 2026 SmallVector<CHRScope *, 8> SplitScopes; 2027 splitScopes(AllScopes, SplitScopes); 2028 CHR_DEBUG(dumpScopes(SplitScopes, "Split scopes")); 2029 2030 // After splitting, set the biased regions and selects of a scope (a tree 2031 // root) that include those of the subscopes. 2032 classifyBiasedScopes(SplitScopes); 2033 CHR_DEBUG(dbgs() << "Set per-scope bias " << SplitScopes.size() << "\n"); 2034 2035 // Filter out the scopes that has only one biased region or select (CHR 2036 // isn't useful in such a case). 2037 SmallVector<CHRScope *, 8> FilteredScopes; 2038 filterScopes(SplitScopes, FilteredScopes); 2039 CHR_DEBUG(dumpScopes(FilteredScopes, "Filtered scopes")); 2040 2041 // Set the regions to be CHR'ed and their hoist stops for each scope. 2042 SmallVector<CHRScope *, 8> SetScopes; 2043 setCHRRegions(FilteredScopes, SetScopes); 2044 CHR_DEBUG(dumpScopes(SetScopes, "Set CHR regions")); 2045 2046 // Sort CHRScopes by the depth so that outer CHRScopes comes before inner 2047 // ones. We need to apply CHR from outer to inner so that we apply CHR only 2048 // to the hot path, rather than both hot and cold paths. 2049 SmallVector<CHRScope *, 8> SortedScopes; 2050 sortScopes(SetScopes, SortedScopes); 2051 CHR_DEBUG(dumpScopes(SortedScopes, "Sorted scopes")); 2052 2053 CHR_DEBUG( 2054 dbgs() << "RegionInfo:\n"; 2055 RI.print(dbgs())); 2056 2057 // Apply the CHR transformation. 2058 if (!SortedScopes.empty()) { 2059 transformScopes(SortedScopes); 2060 Changed = true; 2061 } 2062 } 2063 2064 if (Changed) { 2065 CHR_DEBUG(dumpIR(F, "after", &Stats)); 2066 ORE.emit([&]() { 2067 return OptimizationRemark(DEBUG_TYPE, "Stats", &F) 2068 << ore::NV("Function", &F) << " " 2069 << "Reduced the number of branches in hot paths by " 2070 << ore::NV("NumBranchesDelta", Stats.NumBranchesDelta) 2071 << " (static) and " 2072 << ore::NV("WeightedNumBranchesDelta", Stats.WeightedNumBranchesDelta) 2073 << " (weighted by PGO count)"; 2074 }); 2075 } 2076 2077 return Changed; 2078 } 2079 2080 namespace llvm { 2081 2082 ControlHeightReductionPass::ControlHeightReductionPass() { 2083 parseCHRFilterFiles(); 2084 } 2085 2086 PreservedAnalyses ControlHeightReductionPass::run( 2087 Function &F, 2088 FunctionAnalysisManager &FAM) { 2089 auto &BFI = FAM.getResult<BlockFrequencyAnalysis>(F); 2090 auto &DT = FAM.getResult<DominatorTreeAnalysis>(F); 2091 auto &MAMProxy = FAM.getResult<ModuleAnalysisManagerFunctionProxy>(F); 2092 auto &PSI = *MAMProxy.getCachedResult<ProfileSummaryAnalysis>(*F.getParent()); 2093 auto &RI = FAM.getResult<RegionInfoAnalysis>(F); 2094 auto &ORE = FAM.getResult<OptimizationRemarkEmitterAnalysis>(F); 2095 bool Changed = CHR(F, BFI, DT, PSI, RI, ORE).run(); 2096 if (!Changed) 2097 return PreservedAnalyses::all(); 2098 return PreservedAnalyses::none(); 2099 } 2100 2101 } // namespace llvm 2102