xref: /llvm-project/llvm/lib/Transforms/Instrumentation/ControlHeightReduction.cpp (revision 9abad4814dca14bc9eee63f1c7f23b092442826b)
1 //===-- ControlHeightReduction.cpp - Control Height Reduction -------------===//
2 //
3 //                     The LLVM Compiler Infrastructure
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //
8 //===----------------------------------------------------------------------===//
9 //
10 // This pass merges conditional blocks of code and reduces the number of
11 // conditional branches in the hot paths based on profiles.
12 //
13 //===----------------------------------------------------------------------===//
14 
15 #include "llvm/Transforms/Instrumentation/ControlHeightReduction.h"
16 #include "llvm/ADT/DenseMap.h"
17 #include "llvm/ADT/DenseSet.h"
18 #include "llvm/ADT/SmallVector.h"
19 #include "llvm/ADT/StringSet.h"
20 #include "llvm/Analysis/BlockFrequencyInfo.h"
21 #include "llvm/Analysis/GlobalsModRef.h"
22 #include "llvm/Analysis/ProfileSummaryInfo.h"
23 #include "llvm/Analysis/RegionInfo.h"
24 #include "llvm/Analysis/RegionIterator.h"
25 #include "llvm/Analysis/ValueTracking.h"
26 #include "llvm/IR/CFG.h"
27 #include "llvm/IR/Dominators.h"
28 #include "llvm/IR/IRBuilder.h"
29 #include "llvm/IR/MDBuilder.h"
30 #include "llvm/Support/BranchProbability.h"
31 #include "llvm/Support/MemoryBuffer.h"
32 #include "llvm/Transforms/Utils.h"
33 #include "llvm/Transforms/Utils/BasicBlockUtils.h"
34 #include "llvm/Transforms/Utils/Cloning.h"
35 #include "llvm/Transforms/Utils/ValueMapper.h"
36 
37 #if !defined(_MSC_VER)
38 #include <cxxabi.h>
39 #endif
40 #include <set>
41 #include <sstream>
42 
43 using namespace llvm;
44 
45 #define DEBUG_TYPE "chr"
46 
47 #define CHR_DEBUG(X) LLVM_DEBUG(X)
48 
49 static cl::opt<bool> ForceCHR("force-chr", cl::init(false), cl::Hidden,
50                               cl::desc("Apply CHR for all functions"));
51 
52 static cl::opt<double> CHRBiasThreshold(
53     "chr-bias-threshold", cl::init(0.99), cl::Hidden,
54     cl::desc("CHR considers a branch bias greater than this ratio as biased"));
55 
56 static cl::opt<unsigned> CHRMergeThreshold(
57     "chr-merge-threshold", cl::init(2), cl::Hidden,
58     cl::desc("CHR merges a group of N branches/selects where N >= this value"));
59 
60 static cl::opt<std::string> CHRModuleList(
61     "chr-module-list", cl::init(""), cl::Hidden,
62     cl::desc("Specify file to retrieve the list of modules to apply CHR to"));
63 
64 static cl::opt<std::string> CHRFunctionList(
65     "chr-function-list", cl::init(""), cl::Hidden,
66     cl::desc("Specify file to retrieve the list of functions to apply CHR to"));
67 
68 static StringSet<> CHRModules;
69 static StringSet<> CHRFunctions;
70 
71 static void ParseCHRFilterFiles() {
72   if (!CHRModuleList.empty()) {
73     auto FileOrErr = MemoryBuffer::getFile(CHRModuleList);
74     if (!FileOrErr) {
75       errs() << "Error: Couldn't read the chr-module-list file " << CHRModuleList << "\n";
76       std::exit(1);
77     }
78     StringRef Buf = FileOrErr->get()->getBuffer();
79     SmallVector<StringRef, 0> Lines;
80     Buf.split(Lines, '\n');
81     for (StringRef Line : Lines) {
82       Line = Line.trim();
83       if (!Line.empty())
84         CHRModules.insert(Line);
85     }
86   }
87   if (!CHRFunctionList.empty()) {
88     auto FileOrErr = MemoryBuffer::getFile(CHRFunctionList);
89     if (!FileOrErr) {
90       errs() << "Error: Couldn't read the chr-function-list file " << CHRFunctionList << "\n";
91       std::exit(1);
92     }
93     StringRef Buf = FileOrErr->get()->getBuffer();
94     SmallVector<StringRef, 0> Lines;
95     Buf.split(Lines, '\n');
96     for (StringRef Line : Lines) {
97       Line = Line.trim();
98       if (!Line.empty())
99         CHRFunctions.insert(Line);
100     }
101   }
102 }
103 
104 namespace {
105 class ControlHeightReductionLegacyPass : public FunctionPass {
106 public:
107   static char ID;
108 
109   ControlHeightReductionLegacyPass() : FunctionPass(ID) {
110     initializeControlHeightReductionLegacyPassPass(
111         *PassRegistry::getPassRegistry());
112     ParseCHRFilterFiles();
113   }
114 
115   bool runOnFunction(Function &F) override;
116   void getAnalysisUsage(AnalysisUsage &AU) const override {
117     AU.addRequired<BlockFrequencyInfoWrapperPass>();
118     AU.addRequired<DominatorTreeWrapperPass>();
119     AU.addRequired<ProfileSummaryInfoWrapperPass>();
120     AU.addRequired<RegionInfoPass>();
121     AU.addPreserved<GlobalsAAWrapperPass>();
122   }
123 };
124 } // end anonymous namespace
125 
126 char ControlHeightReductionLegacyPass::ID = 0;
127 
128 INITIALIZE_PASS_BEGIN(ControlHeightReductionLegacyPass,
129                       "chr",
130                       "Reduce control height in the hot paths",
131                       false, false)
132 INITIALIZE_PASS_DEPENDENCY(BlockFrequencyInfoWrapperPass)
133 INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass)
134 INITIALIZE_PASS_DEPENDENCY(ProfileSummaryInfoWrapperPass)
135 INITIALIZE_PASS_DEPENDENCY(RegionInfoPass)
136 INITIALIZE_PASS_END(ControlHeightReductionLegacyPass,
137                     "chr",
138                     "Reduce control height in the hot paths",
139                     false, false)
140 
141 FunctionPass *llvm::createControlHeightReductionLegacyPass() {
142   return new ControlHeightReductionLegacyPass();
143 }
144 
145 namespace {
146 
147 struct CHRStats {
148   CHRStats() : NumBranches(0), NumBranchesDelta(0),
149                WeightedNumBranchesDelta(0) {}
150   void print(raw_ostream &OS) const {
151     OS << "CHRStats: NumBranches " << NumBranches
152        << " NumBranchesDelta " << NumBranchesDelta
153        << " WeightedNumBranchesDelta " << WeightedNumBranchesDelta;
154   }
155   uint64_t NumBranches;       // The original number of conditional branches /
156                               // selects
157   uint64_t NumBranchesDelta;  // The decrease of the number of conditional
158                               // branches / selects in the hot paths due to CHR.
159   uint64_t WeightedNumBranchesDelta; // NumBranchesDelta weighted by the profile
160                                      // count at the scope entry.
161 };
162 
163 inline raw_ostream LLVM_ATTRIBUTE_UNUSED &operator<<(raw_ostream &OS,
164                                                      const CHRStats &Stats) {
165   Stats.print(OS);
166   return OS;
167 }
168 
169 // RegInfo - some properties of a Region.
170 struct RegInfo {
171   RegInfo() : R(nullptr), HasBranch(false) {}
172   RegInfo(Region *RegionIn) : R(RegionIn), HasBranch(false) {}
173   Region *R;
174   bool HasBranch;
175   SmallVector<SelectInst *, 8> Selects;
176 };
177 
178 typedef DenseMap<Region *, DenseSet<Instruction *>> HoistStopMapTy;
179 
180 // CHRScope - a sequence of regions to CHR together. It corresponds to a
181 // sequence of conditional blocks. It can have subscopes which correspond to
182 // nested conditional blocks. Nested CHRScopes form a tree.
183 class CHRScope {
184  public:
185   CHRScope(RegInfo RI) : BranchInsertPoint(nullptr) {
186     assert(RI.R && "Null RegionIn");
187     RegInfos.push_back(RI);
188   }
189 
190   Region *getParentRegion() {
191     assert(RegInfos.size() > 0 && "Empty CHRScope");
192     Region *Parent = RegInfos[0].R->getParent();
193     assert(Parent && "Unexpected to call this on the top-level region");
194     return Parent;
195   }
196 
197   BasicBlock *getEntryBlock() {
198     assert(RegInfos.size() > 0 && "Empty CHRScope");
199     return RegInfos.front().R->getEntry();
200   }
201 
202   BasicBlock *getExitBlock() {
203     assert(RegInfos.size() > 0 && "Empty CHRScope");
204     return RegInfos.back().R->getExit();
205   }
206 
207   bool appendable(CHRScope *Next) {
208     // The next scope is appendable only if this scope is directly connected to
209     // it (which implies it post-dominates this scope) and this scope dominates
210     // it (no edge to the next scope outside this scope).
211     BasicBlock *NextEntry = Next->getEntryBlock();
212     if (getExitBlock() != NextEntry)
213       // Not directly connected.
214       return false;
215     Region *LastRegion = RegInfos.back().R;
216     for (BasicBlock *Pred : predecessors(NextEntry))
217       if (!LastRegion->contains(Pred))
218         // There's an edge going into the entry of the next scope from outside
219         // of this scope.
220         return false;
221     return true;
222   }
223 
224   void append(CHRScope *Next) {
225     assert(RegInfos.size() > 0 && "Empty CHRScope");
226     assert(Next->RegInfos.size() > 0 && "Empty CHRScope");
227     assert(getParentRegion() == Next->getParentRegion() &&
228            "Must be siblings");
229     assert(getExitBlock() == Next->getEntryBlock() &&
230            "Must be adjacent");
231     for (RegInfo &RI : Next->RegInfos)
232       RegInfos.push_back(RI);
233     for (CHRScope *Sub : Next->Subs)
234       Subs.push_back(Sub);
235   }
236 
237   void addSub(CHRScope *SubIn) {
238 #ifndef NDEBUG
239     bool is_child = false;
240     for (RegInfo &RI : RegInfos)
241       if (RI.R == SubIn->getParentRegion()) {
242         is_child = true;
243         break;
244       }
245     assert(is_child && "Must be a child");
246 #endif
247     Subs.push_back(SubIn);
248   }
249 
250   // Split this scope at the boundary region into two, which will belong to the
251   // tail and returns the tail.
252   CHRScope *split(Region *Boundary) {
253     assert(Boundary && "Boundary null");
254     assert(RegInfos.begin()->R != Boundary &&
255            "Can't be split at beginning");
256     auto BoundaryIt = std::find_if(RegInfos.begin(), RegInfos.end(),
257                                    [&Boundary](const RegInfo& RI) {
258                                      return Boundary == RI.R;
259                                    });
260     if (BoundaryIt == RegInfos.end())
261       return nullptr;
262     SmallVector<RegInfo, 8> TailRegInfos;
263     SmallVector<CHRScope *, 8> TailSubs;
264     TailRegInfos.insert(TailRegInfos.begin(), BoundaryIt, RegInfos.end());
265     RegInfos.resize(BoundaryIt - RegInfos.begin());
266     DenseSet<Region *> TailRegionSet;
267     for (RegInfo &RI : TailRegInfos)
268       TailRegionSet.insert(RI.R);
269     for (auto It = Subs.begin(); It != Subs.end(); ) {
270       CHRScope *Sub = *It;
271       assert(Sub && "null Sub");
272       Region *Parent = Sub->getParentRegion();
273       if (TailRegionSet.count(Parent)) {
274         TailSubs.push_back(Sub);
275         It = Subs.erase(It);
276       } else {
277         assert(std::find_if(RegInfos.begin(), RegInfos.end(),
278                             [&Parent](const RegInfo& RI) {
279                               return Parent == RI.R;
280                             }) != RegInfos.end() &&
281                "Must be in head");
282         ++It;
283       }
284     }
285     assert(HoistStopMap.empty() && "MapHoistStops must be empty");
286     return new CHRScope(TailRegInfos, TailSubs);
287   }
288 
289   bool contains(Instruction *I) const {
290     BasicBlock *Parent = I->getParent();
291     for (const RegInfo &RI : RegInfos)
292       if (RI.R->contains(Parent))
293         return true;
294     return false;
295   }
296 
297   void print(raw_ostream &OS) const;
298 
299   SmallVector<RegInfo, 8> RegInfos; // Regions that belong to this scope
300   SmallVector<CHRScope *, 8> Subs;  // Subscopes.
301 
302   // The instruction at which to insert the CHR conditional branch (and hoist
303   // the dependent condition values).
304   Instruction *BranchInsertPoint;
305 
306   // True-biased and false-biased regions (conditional blocks),
307   // respectively. Used only for the outermost scope and includes regions in
308   // subscopes. The rest are unbiased.
309   DenseSet<Region *> TrueBiasedRegions;
310   DenseSet<Region *> FalseBiasedRegions;
311   // Among the biased regions, the regions that get CHRed.
312   SmallVector<RegInfo, 8> CHRRegions;
313 
314   // True-biased and false-biased selects, respectively. Used only for the
315   // outermost scope and includes ones in subscopes.
316   DenseSet<SelectInst *> TrueBiasedSelects;
317   DenseSet<SelectInst *> FalseBiasedSelects;
318 
319   // Map from one of the above regions to the instructions to stop
320   // hoisting instructions at through use-def chains.
321   HoistStopMapTy HoistStopMap;
322 
323  private:
324   CHRScope(SmallVector<RegInfo, 8> &RegInfosIn,
325            SmallVector<CHRScope *, 8> &SubsIn)
326     : RegInfos(RegInfosIn), Subs(SubsIn), BranchInsertPoint(nullptr) {}
327 };
328 
329 inline raw_ostream &operator<<(raw_ostream &OS, const CHRScope &Scope) {
330   Scope.print(OS);
331   return OS;
332 }
333 
334 class CHR {
335  public:
336   CHR(Function &Fin, BlockFrequencyInfo &BFIin, DominatorTree &DTin,
337       ProfileSummaryInfo &PSIin, RegionInfo &RIin)
338       : F(Fin), BFI(BFIin), DT(DTin), PSI(PSIin), RI(RIin) {}
339 
340   ~CHR() {
341     for (CHRScope *Scope : Scopes) {
342       delete Scope;
343     }
344   }
345 
346   bool run();
347 
348  private:
349   // See the comments in CHR::run() for the high level flow of the algorithm and
350   // what the following functions do.
351 
352   void findScopes(SmallVectorImpl<CHRScope *> &Output) {
353     Region *R = RI.getTopLevelRegion();
354     CHRScope *Scope = findScopes(R, nullptr, nullptr, Output);
355     if (Scope) {
356       Output.push_back(Scope);
357     }
358   }
359   CHRScope *findScopes(Region *R, Region *NextRegion, Region *ParentRegion,
360                         SmallVectorImpl<CHRScope *> &Scopes);
361   CHRScope *findScope(Region *R);
362   void checkScopeHoistable(CHRScope *Scope);
363 
364   void splitScopes(SmallVectorImpl<CHRScope *> &Input,
365                    SmallVectorImpl<CHRScope *> &Output);
366   SmallVector<CHRScope *, 8> splitScope(CHRScope *Scope,
367                                         CHRScope *Outer,
368                                         DenseSet<Value *> *OuterConditionValues,
369                                         Instruction *OuterInsertPoint,
370                                         SmallVectorImpl<CHRScope *> &Output,
371                                         DenseSet<Instruction *> &Unhoistables);
372 
373   void classifyBiasedScopes(SmallVectorImpl<CHRScope *> &Scopes);
374   void classifyBiasedScopes(CHRScope *Scope, CHRScope *OutermostScope);
375 
376   void filterScopes(SmallVectorImpl<CHRScope *> &Input,
377                     SmallVectorImpl<CHRScope *> &Output);
378 
379   void setCHRRegions(SmallVectorImpl<CHRScope *> &Input,
380                      SmallVectorImpl<CHRScope *> &Output);
381   void setCHRRegions(CHRScope *Scope, CHRScope *OutermostScope);
382 
383   void sortScopes(SmallVectorImpl<CHRScope *> &Input,
384                   SmallVectorImpl<CHRScope *> &Output);
385 
386   void transformScopes(SmallVectorImpl<CHRScope *> &CHRScopes);
387   void transformScopes(CHRScope *Scope, DenseSet<PHINode *> &TrivialPHIs);
388   void cloneScopeBlocks(CHRScope *Scope,
389                         BasicBlock *PreEntryBlock,
390                         BasicBlock *ExitBlock,
391                         Region *LastRegion,
392                         ValueToValueMapTy &VMap);
393   BranchInst *createMergedBranch(BasicBlock *PreEntryBlock,
394                                  BasicBlock *EntryBlock,
395                                  BasicBlock *NewEntryBlock,
396                                  ValueToValueMapTy &VMap);
397   void fixupBranchesAndSelects(CHRScope *Scope,
398                                BasicBlock *PreEntryBlock,
399                                BranchInst *MergedBR,
400                                uint64_t ProfileCount);
401   void fixupBranch(Region *R,
402                    CHRScope *Scope,
403                    IRBuilder<> &IRB,
404                    Value *&MergedCondition, BranchProbability &CHRBranchBias);
405   void fixupSelect(SelectInst* SI,
406                    CHRScope *Scope,
407                    IRBuilder<> &IRB,
408                    Value *&MergedCondition, BranchProbability &CHRBranchBias);
409   void addToMergedCondition(bool IsTrueBiased, Value *Cond,
410                             Instruction *BranchOrSelect,
411                             CHRScope *Scope,
412                             IRBuilder<> &IRB,
413                             Value *&MergedCondition);
414 
415   Function &F;
416   BlockFrequencyInfo &BFI;
417   DominatorTree &DT;
418   ProfileSummaryInfo &PSI;
419   RegionInfo &RI;
420   CHRStats Stats;
421 
422   // All the true-biased regions in the function
423   DenseSet<Region *> TrueBiasedRegionsGlobal;
424   // All the false-biased regions in the function
425   DenseSet<Region *> FalseBiasedRegionsGlobal;
426   // All the true-biased selects in the function
427   DenseSet<SelectInst *> TrueBiasedSelectsGlobal;
428   // All the false-biased selects in the function
429   DenseSet<SelectInst *> FalseBiasedSelectsGlobal;
430   // A map from biased regions to their branch bias
431   DenseMap<Region *, BranchProbability> BranchBiasMap;
432   // A map from biased selects to their branch bias
433   DenseMap<SelectInst *, BranchProbability> SelectBiasMap;
434   // All the scopes.
435   DenseSet<CHRScope *> Scopes;
436 };
437 
438 } // end anonymous namespace
439 
440 static bool shouldApply(Function &F, ProfileSummaryInfo& PSI) {
441   if (ForceCHR)
442     return true;
443 
444   if (!CHRModuleList.empty() || !CHRFunctionList.empty()) {
445     if (CHRModules.count(F.getParent()->getName()))
446       return true;
447     StringRef Name = F.getName();
448     if (CHRFunctions.count(Name))
449       return true;
450     const char* DemangledName = nullptr;
451 #if !defined(_MSC_VER)
452     int Status = -1;
453     DemangledName = abi::__cxa_demangle(Name.str().c_str(),
454                                         nullptr, nullptr, &Status);
455 #endif
456     return DemangledName && CHRFunctions.count(DemangledName);
457   }
458 
459   assert(PSI.hasProfileSummary() && "Empty PSI?");
460   return PSI.isFunctionEntryHot(&F);
461 }
462 
463 static void LLVM_ATTRIBUTE_UNUSED dumpIR(Function &F, const char *Label,
464                                          CHRStats *Stats) {
465   std::string Name = F.getName().str();
466   const char *DemangledName = nullptr;
467 #if !defined(_MSC_VER)
468   int Status = -1;
469   DemangledName = abi::__cxa_demangle(Name.c_str(),
470                                       nullptr, nullptr, &Status);
471 #endif
472   if (DemangledName == nullptr) {
473     DemangledName = "<NOT-MANGLED>";
474   }
475   std::string ModuleName = F.getParent()->getName().str();
476   CHR_DEBUG(dbgs() << "CHR IR dump " << Label << " " << ModuleName << " "
477             << Name);
478   if (Stats)
479     CHR_DEBUG(dbgs() << " " << *Stats);
480   CHR_DEBUG(dbgs() << "\n");
481   CHR_DEBUG(F.dump());
482 }
483 
484 void CHRScope::print(raw_ostream &OS) const {
485   assert(RegInfos.size() > 0 && "Empty CHRScope");
486   OS << "CHRScope[";
487   OS << RegInfos.size() << ", Regions[";
488   for (const RegInfo &RI : RegInfos) {
489     OS << RI.R->getNameStr();
490     if (RI.HasBranch)
491       OS << " B";
492     if (RI.Selects.size() > 0)
493       OS << " S" << RI.Selects.size();
494     OS << ", ";
495   }
496   if (RegInfos[0].R->getParent()) {
497     OS << "], Parent " << RegInfos[0].R->getParent()->getNameStr();
498   } else {
499     // top level region
500     OS << "]";
501   }
502   OS << ", Subs[";
503   for (CHRScope *Sub : Subs) {
504     OS << *Sub << ", ";
505   }
506   OS << "]]";
507 }
508 
509 // Return true if the given instruction type can be hoisted by CHR.
510 static bool isHoistableInstructionType(Instruction *I) {
511   return isa<BinaryOperator>(I) || isa<CastInst>(I) || isa<SelectInst>(I) ||
512       isa<GetElementPtrInst>(I) || isa<CmpInst>(I) ||
513       isa<InsertElementInst>(I) || isa<ExtractElementInst>(I) ||
514       isa<ShuffleVectorInst>(I) || isa<ExtractValueInst>(I) ||
515       isa<InsertValueInst>(I);
516 }
517 
518 // Return true if the given instruction can be hoisted by CHR.
519 static bool isHoistable(Instruction *I, DominatorTree &DT) {
520   if (!isHoistableInstructionType(I))
521     return false;
522   return isSafeToSpeculativelyExecute(I, nullptr, &DT);
523 }
524 
525 // Recursively traverse the use-def chains of the given value and return a set
526 // of the unhoistable base values defined within the scope (excluding the
527 // first-region entry block) or the (hoistable or unhoistable) base values that
528 // are defined outside (including the first-region entry block) of the
529 // scope. The returned set doesn't include constants.
530 static std::set<Value *> getBaseValues(Value *V,
531                                        DominatorTree &DT) {
532   std::set<Value *> Result;
533   if (auto *I = dyn_cast<Instruction>(V)) {
534     // We don't stop at a block that's not in the Scope because we would miss some
535     // instructions that are based on the same base values if we stop there.
536     if (!isHoistable(I, DT)) {
537       Result.insert(I);
538       return Result;
539     }
540     // I is hoistable above the Scope.
541     for (Value *Op : I->operands()) {
542       std::set<Value *> OpResult = getBaseValues(Op, DT);
543       Result.insert(OpResult.begin(), OpResult.end());
544     }
545     return Result;
546   }
547   if (isa<Argument>(V)) {
548     Result.insert(V);
549     return Result;
550   }
551   // We don't include others like constants because those won't lead to any
552   // chance of folding of conditions (eg two bit checks merged into one check)
553   // after CHR.
554   return Result;  // empty
555 }
556 
557 // Return true if V is already hoisted or can be hoisted (along with its
558 // operands) above the insert point. When it returns true and HoistStops is
559 // non-null, the instructions to stop hoisting at through the use-def chains are
560 // inserted into HoistStops.
561 static bool
562 checkHoistValue(Value *V, Instruction *InsertPoint, DominatorTree &DT,
563                 DenseSet<Instruction *> &Unhoistables,
564                 DenseSet<Instruction *> *HoistStops) {
565   assert(InsertPoint && "Null InsertPoint");
566   if (auto *I = dyn_cast<Instruction>(V)) {
567     assert(DT.getNode(I->getParent()) && "DT must contain I's parent block");
568     assert(DT.getNode(InsertPoint->getParent()) && "DT must contain Destination");
569     if (Unhoistables.count(I)) {
570       // Don't hoist if they are not to be hoisted.
571       return false;
572     }
573     if (DT.dominates(I, InsertPoint)) {
574       // We are already above the insert point. Stop here.
575       if (HoistStops)
576         HoistStops->insert(I);
577       return true;
578     }
579     // We aren't not above the insert point, check if we can hoist it above the
580     // insert point.
581     if (isHoistable(I, DT)) {
582       // Check operands first.
583       DenseSet<Instruction *> OpsHoistStops;
584       bool AllOpsHoisted = true;
585       for (Value *Op : I->operands()) {
586         if (!checkHoistValue(Op, InsertPoint, DT, Unhoistables, &OpsHoistStops)) {
587           AllOpsHoisted = false;
588           break;
589         }
590       }
591       if (AllOpsHoisted) {
592         CHR_DEBUG(dbgs() << "checkHoistValue " << *I << "\n");
593         if (HoistStops)
594           HoistStops->insert(OpsHoistStops.begin(), OpsHoistStops.end());
595         return true;
596       }
597     }
598     return false;
599   }
600   // Non-instructions are considered hoistable.
601   return true;
602 }
603 
604 // Returns true and sets the true probability and false probability of an
605 // MD_prof metadata if it's well-formed.
606 static bool CheckMDProf(MDNode *MD, BranchProbability &TrueProb,
607                         BranchProbability &FalseProb) {
608   if (!MD) return false;
609   MDString *MDName = cast<MDString>(MD->getOperand(0));
610   if (MDName->getString() != "branch_weights" ||
611       MD->getNumOperands() != 3)
612     return false;
613   ConstantInt *TrueWeight = mdconst::extract<ConstantInt>(MD->getOperand(1));
614   ConstantInt *FalseWeight = mdconst::extract<ConstantInt>(MD->getOperand(2));
615   if (!TrueWeight || !FalseWeight)
616     return false;
617   uint64_t TrueWt = TrueWeight->getValue().getZExtValue();
618   uint64_t FalseWt = FalseWeight->getValue().getZExtValue();
619   uint64_t SumWt = TrueWt + FalseWt;
620 
621   assert(SumWt >= TrueWt && SumWt >= FalseWt &&
622          "Overflow calculating branch probabilities.");
623 
624   TrueProb = BranchProbability::getBranchProbability(TrueWt, SumWt);
625   FalseProb = BranchProbability::getBranchProbability(FalseWt, SumWt);
626   return true;
627 }
628 
629 static BranchProbability getCHRBiasThreshold() {
630   return BranchProbability::getBranchProbability(
631       static_cast<uint64_t>(CHRBiasThreshold * 1000000), 1000000);
632 }
633 
634 // A helper for CheckBiasedBranch and CheckBiasedSelect. If TrueProb >=
635 // CHRBiasThreshold, put Key into TrueSet and return true. If FalseProb >=
636 // CHRBiasThreshold, put Key into FalseSet and return true. Otherwise, return
637 // false.
638 template<typename K, typename S, typename M>
639 bool CheckBias(K *Key, BranchProbability TrueProb, BranchProbability FalseProb,
640                S &TrueSet, S &FalseSet, M &BiasMap) {
641   BranchProbability Threshold = getCHRBiasThreshold();
642   if (TrueProb >= Threshold) {
643     TrueSet.insert(Key);
644     BiasMap[Key] = TrueProb;
645     return true;
646   } else if (FalseProb >= Threshold) {
647     FalseSet.insert(Key);
648     BiasMap[Key] = FalseProb;
649     return true;
650   }
651   return false;
652 }
653 
654 // Returns true and insert a region into the right biased set and the map if the
655 // branch of the region is biased.
656 static bool CheckBiasedBranch(BranchInst *BI, Region *R,
657                               DenseSet<Region *> &TrueBiasedRegionsGlobal,
658                               DenseSet<Region *> &FalseBiasedRegionsGlobal,
659                               DenseMap<Region *, BranchProbability> &BranchBiasMap) {
660   if (!BI->isConditional())
661     return false;
662   BranchProbability ThenProb, ElseProb;
663   if (!CheckMDProf(BI->getMetadata(LLVMContext::MD_prof),
664                    ThenProb, ElseProb))
665     return false;
666   BasicBlock *IfThen = BI->getSuccessor(0);
667   BasicBlock *IfElse = BI->getSuccessor(1);
668   assert((IfThen == R->getExit() || IfElse == R->getExit()) &&
669          IfThen != IfElse &&
670          "Invariant from findScopes");
671   if (IfThen == R->getExit()) {
672     // Swap them so that IfThen/ThenProb means going into the conditional code
673     // and IfElse/ElseProb means skipping it.
674     std::swap(IfThen, IfElse);
675     std::swap(ThenProb, ElseProb);
676   }
677   CHR_DEBUG(dbgs() << "BI " << *BI << " ");
678   CHR_DEBUG(dbgs() << "ThenProb " << ThenProb << " ");
679   CHR_DEBUG(dbgs() << "ElseProb " << ElseProb << "\n");
680   return CheckBias(R, ThenProb, ElseProb,
681                    TrueBiasedRegionsGlobal, FalseBiasedRegionsGlobal,
682                    BranchBiasMap);
683 }
684 
685 // Returns true and insert a select into the right biased set and the map if the
686 // select is biased.
687 static bool CheckBiasedSelect(
688     SelectInst *SI, Region *R,
689     DenseSet<SelectInst *> &TrueBiasedSelectsGlobal,
690     DenseSet<SelectInst *> &FalseBiasedSelectsGlobal,
691     DenseMap<SelectInst *, BranchProbability> &SelectBiasMap) {
692   BranchProbability TrueProb, FalseProb;
693   if (!CheckMDProf(SI->getMetadata(LLVMContext::MD_prof),
694                    TrueProb, FalseProb))
695     return false;
696   CHR_DEBUG(dbgs() << "SI " << *SI << " ");
697   CHR_DEBUG(dbgs() << "TrueProb " << TrueProb << " ");
698   CHR_DEBUG(dbgs() << "FalseProb " << FalseProb << "\n");
699   return CheckBias(SI, TrueProb, FalseProb,
700                    TrueBiasedSelectsGlobal, FalseBiasedSelectsGlobal,
701                    SelectBiasMap);
702 }
703 
704 // Returns the instruction at which to hoist the dependent condition values and
705 // insert the CHR branch for a region. This is the terminator branch in the
706 // entry block or the first select in the entry block, if any.
707 static Instruction* getBranchInsertPoint(RegInfo &RI) {
708   Region *R = RI.R;
709   BasicBlock *EntryBB = R->getEntry();
710   // The hoist point is by default the terminator of the entry block, which is
711   // the same as the branch instruction if RI.HasBranch is true.
712   Instruction *HoistPoint = EntryBB->getTerminator();
713   for (SelectInst *SI : RI.Selects) {
714     if (SI->getParent() == EntryBB) {
715       // Pick the first select in Selects in the entry block.  Note Selects is
716       // sorted in the instruction order within a block (asserted below).
717       HoistPoint = SI;
718       break;
719     }
720   }
721   assert(HoistPoint && "Null HoistPoint");
722 #ifndef NDEBUG
723   // Check that HoistPoint is the first one in Selects in the entry block,
724   // if any.
725   DenseSet<Instruction *> EntryBlockSelectSet;
726   for (SelectInst *SI : RI.Selects) {
727     if (SI->getParent() == EntryBB) {
728       EntryBlockSelectSet.insert(SI);
729     }
730   }
731   for (Instruction &I : *EntryBB) {
732     if (EntryBlockSelectSet.count(&I) > 0) {
733       assert(&I == HoistPoint &&
734              "HoistPoint must be the first one in Selects");
735       break;
736     }
737   }
738 #endif
739   return HoistPoint;
740 }
741 
742 // Find a CHR scope in the given region.
743 CHRScope * CHR::findScope(Region *R) {
744   CHRScope *Result = nullptr;
745   BasicBlock *Entry = R->getEntry();
746   BasicBlock *Exit = R->getExit();  // null if top level.
747   assert(Entry && "Entry must not be null");
748   assert((Exit == nullptr) == (R->isTopLevelRegion()) &&
749          "Only top level region has a null exit");
750   if (Entry)
751     CHR_DEBUG(dbgs() << "Entry " << Entry->getName() << "\n");
752   else
753     CHR_DEBUG(dbgs() << "Entry null\n");
754   if (Exit)
755     CHR_DEBUG(dbgs() << "Exit " << Exit->getName() << "\n");
756   else
757     CHR_DEBUG(dbgs() << "Exit null\n");
758   // Exclude cases where Entry is part of a subregion (hence it doesn't belong
759   // to this region).
760   bool EntryInSubregion = RI.getRegionFor(Entry) != R;
761   if (EntryInSubregion)
762     return nullptr;
763   // Exclude loops
764   for (BasicBlock *Pred : predecessors(Entry))
765     if (R->contains(Pred))
766       return nullptr;
767   if (Exit) {
768     // Try to find an if-then block (check if R is an if-then).
769     // if (cond) {
770     //  ...
771     // }
772     auto *BI = dyn_cast<BranchInst>(Entry->getTerminator());
773     if (BI)
774       CHR_DEBUG(dbgs() << "BI.isConditional " << BI->isConditional() << "\n");
775     else
776       CHR_DEBUG(dbgs() << "BI null\n");
777     if (BI && BI->isConditional()) {
778       BasicBlock *S0 = BI->getSuccessor(0);
779       BasicBlock *S1 = BI->getSuccessor(1);
780       CHR_DEBUG(dbgs() << "S0 " << S0->getName() << "\n");
781       CHR_DEBUG(dbgs() << "S1 " << S1->getName() << "\n");
782       if (S0 != S1 && (S0 == Exit || S1 == Exit)) {
783         RegInfo RI(R);
784         RI.HasBranch = CheckBiasedBranch(
785             BI, R, TrueBiasedRegionsGlobal, FalseBiasedRegionsGlobal,
786             BranchBiasMap);
787         Result = new CHRScope(RI);
788         Scopes.insert(Result);
789         CHR_DEBUG(dbgs() << "Found a region with a branch\n");
790         ++Stats.NumBranches;
791       }
792     }
793   }
794   {
795     // Try to look for selects in the direct child blocks (as opposed to in
796     // subregions) of R.
797     // ...
798     // if (..) { // Some subregion
799     //   ...
800     // }
801     // if (..) { // Some subregion
802     //   ...
803     // }
804     // ...
805     // a = cond ? b : c;
806     // ...
807     SmallVector<SelectInst *, 8> Selects;
808     for (RegionNode *E : R->elements()) {
809       if (E->isSubRegion())
810         continue;
811       // This returns the basic block of E if E is a direct child of R (not a
812       // subregion.)
813       BasicBlock *BB = E->getEntry();
814       // Need to push in the order to make it easier to find the first Select
815       // later.
816       for (Instruction &I : *BB) {
817         if (auto *SI = dyn_cast<SelectInst>(&I)) {
818           Selects.push_back(SI);
819           ++Stats.NumBranches;
820         }
821       }
822     }
823     if (Selects.size() > 0) {
824       auto AddSelects = [&](RegInfo &RI) {
825         for (auto *SI : Selects)
826           if (CheckBiasedSelect(SI, RI.R,
827                                 TrueBiasedSelectsGlobal,
828                                 FalseBiasedSelectsGlobal,
829                                 SelectBiasMap))
830             RI.Selects.push_back(SI);
831       };
832       if (!Result) {
833         CHR_DEBUG(dbgs() << "Found a select-only region\n");
834         RegInfo RI(R);
835         AddSelects(RI);
836         Result = new CHRScope(RI);
837         Scopes.insert(Result);
838       } else {
839         CHR_DEBUG(dbgs() << "Found select(s) in a region with a branch\n");
840         AddSelects(Result->RegInfos[0]);
841       }
842     }
843   }
844 
845   if (Result) {
846     checkScopeHoistable(Result);
847   }
848   return Result;
849 }
850 
851 // Check that any of the branch and the selects in the region could be
852 // hoisted above the the CHR branch insert point (the most dominating of
853 // them, either the branch (at the end of the first block) or the first
854 // select in the first block). If the branch can't be hoisted, drop the
855 // selects in the first blocks.
856 //
857 // For example, for the following scope/region with selects, we want to insert
858 // the merged branch right before the first select in the first/entry block by
859 // hoisting c1, c2, c3, and c4.
860 //
861 // // Branch insert point here.
862 // a = c1 ? b : c; // Select 1
863 // d = c2 ? e : f; // Select 2
864 // if (c3) { // Branch
865 //   ...
866 //   c4 = foo() // A call.
867 //   g = c4 ? h : i; // Select 3
868 // }
869 //
870 // But suppose we can't hoist c4 because it's dependent on the preceding
871 // call. Then, we drop Select 3. Furthermore, if we can't hoist c2, we also drop
872 // Select 2. If we can't hoist c3, we drop Selects 1 & 2.
873 void CHR::checkScopeHoistable(CHRScope *Scope) {
874   RegInfo &RI = Scope->RegInfos[0];
875   Region *R = RI.R;
876   BasicBlock *EntryBB = R->getEntry();
877   auto *Branch = RI.HasBranch ?
878                  cast<BranchInst>(EntryBB->getTerminator()) : nullptr;
879   SmallVector<SelectInst *, 8> &Selects = RI.Selects;
880   if (RI.HasBranch || !Selects.empty()) {
881     Instruction *InsertPoint = getBranchInsertPoint(RI);
882     CHR_DEBUG(dbgs() << "InsertPoint " << *InsertPoint << "\n");
883     // Avoid a data dependence from a select or a branch to a(nother)
884     // select. Note no instruction can't data-depend on a branch (a branch
885     // instruction doesn't produce a value).
886     DenseSet<Instruction *> Unhoistables;
887     // Initialize Unhoistables with the selects.
888     for (SelectInst *SI : Selects) {
889       Unhoistables.insert(SI);
890     }
891     // Remove Selects that can't be hoisted.
892     for (auto it = Selects.begin(); it != Selects.end(); ) {
893       SelectInst *SI = *it;
894       if (SI == InsertPoint) {
895         ++it;
896         continue;
897       }
898       bool IsHoistable = checkHoistValue(SI->getCondition(), InsertPoint,
899                                          DT, Unhoistables, nullptr);
900       if (!IsHoistable) {
901         CHR_DEBUG(dbgs() << "Dropping select " << *SI << "\n");
902         it = Selects.erase(it);
903         // Since we are dropping the select here, we also drop it from
904         // Unhoistables.
905         Unhoistables.erase(SI);
906       } else
907         ++it;
908     }
909     // Update InsertPoint after potentially removing selects.
910     InsertPoint = getBranchInsertPoint(RI);
911     CHR_DEBUG(dbgs() << "InsertPoint " << *InsertPoint << "\n");
912     if (RI.HasBranch && InsertPoint != Branch) {
913       bool IsHoistable = checkHoistValue(Branch->getCondition(), InsertPoint,
914                                          DT, Unhoistables, nullptr);
915       if (!IsHoistable) {
916         // If the branch isn't hoistable, drop the selects in the entry
917         // block, preferring the branch, which makes the branch the hoist
918         // point.
919         assert(InsertPoint != Branch && "Branch must not be the hoist point");
920         CHR_DEBUG(dbgs() << "Dropping selects in entry block \n");
921         CHR_DEBUG(
922             for (SelectInst *SI : Selects) {
923               dbgs() << "SI " << *SI << "\n";
924             });
925         Selects.erase(std::remove_if(Selects.begin(), Selects.end(),
926                                      [EntryBB](SelectInst *SI) {
927                                        return SI->getParent() == EntryBB;
928                                      }), Selects.end());
929         Unhoistables.clear();
930         InsertPoint = Branch;
931       }
932     }
933     CHR_DEBUG(dbgs() << "InsertPoint " << *InsertPoint << "\n");
934 #ifndef NDEBUG
935     if (RI.HasBranch) {
936       assert(!DT.dominates(Branch, InsertPoint) &&
937              "Branch can't be already above the hoist point");
938       assert(checkHoistValue(Branch->getCondition(), InsertPoint,
939                              DT, Unhoistables, nullptr) &&
940              "checkHoistValue for branch");
941     }
942     for (auto *SI : Selects) {
943       assert(!DT.dominates(SI, InsertPoint) &&
944              "SI can't be already above the hoist point");
945       assert(checkHoistValue(SI->getCondition(), InsertPoint, DT,
946                              Unhoistables, nullptr) &&
947              "checkHoistValue for selects");
948     }
949     CHR_DEBUG(dbgs() << "Result\n");
950     if (RI.HasBranch) {
951       CHR_DEBUG(dbgs() << "BI " << *Branch << "\n");
952     }
953     for (auto *SI : Selects) {
954       CHR_DEBUG(dbgs() << "SI " << *SI << "\n");
955     }
956 #endif
957   }
958 }
959 
960 // Traverse the region tree, find all nested scopes and merge them if possible.
961 CHRScope * CHR::findScopes(Region *R, Region *NextRegion, Region *ParentRegion,
962                            SmallVectorImpl<CHRScope *> &Scopes) {
963   CHR_DEBUG(dbgs() << "findScopes " << R->getNameStr() << "\n");
964   CHRScope *Result = findScope(R);
965   // Visit subscopes.
966   CHRScope *ConsecutiveSubscope = nullptr;
967   SmallVector<CHRScope *, 8> Subscopes;
968   for (auto It = R->begin(); It != R->end(); ++It) {
969     const std::unique_ptr<Region> &SubR = *It;
970     auto Next_It = std::next(It);
971     Region *NextSubR = Next_It != R->end() ? Next_It->get() : nullptr;
972     CHR_DEBUG(dbgs() << "Looking at subregion " << SubR.get()->getNameStr()
973               << "\n");
974     CHRScope *SubCHRScope = findScopes(SubR.get(), NextSubR, R, Scopes);
975     if (SubCHRScope) {
976       CHR_DEBUG(dbgs() << "Subregion Scope " << *SubCHRScope << "\n");
977     } else {
978       CHR_DEBUG(dbgs() << "Subregion Scope null\n");
979     }
980     if (SubCHRScope) {
981       if (!ConsecutiveSubscope)
982         ConsecutiveSubscope = SubCHRScope;
983       else if (!ConsecutiveSubscope->appendable(SubCHRScope)) {
984         Subscopes.push_back(ConsecutiveSubscope);
985         ConsecutiveSubscope = SubCHRScope;
986       } else
987         ConsecutiveSubscope->append(SubCHRScope);
988     } else {
989       if (ConsecutiveSubscope) {
990         Subscopes.push_back(ConsecutiveSubscope);
991       }
992       ConsecutiveSubscope = nullptr;
993     }
994   }
995   if (ConsecutiveSubscope) {
996     Subscopes.push_back(ConsecutiveSubscope);
997   }
998   for (CHRScope *Sub : Subscopes) {
999     if (Result) {
1000       // Combine it with the parent.
1001       Result->addSub(Sub);
1002     } else {
1003       // Push Subscopes as they won't be combined with the parent.
1004       Scopes.push_back(Sub);
1005     }
1006   }
1007   return Result;
1008 }
1009 
1010 static DenseSet<Value *> getCHRConditionValuesForRegion(RegInfo &RI) {
1011   DenseSet<Value *> ConditionValues;
1012   if (RI.HasBranch) {
1013     auto *BI = cast<BranchInst>(RI.R->getEntry()->getTerminator());
1014     ConditionValues.insert(BI->getCondition());
1015   }
1016   for (SelectInst *SI : RI.Selects) {
1017     ConditionValues.insert(SI->getCondition());
1018   }
1019   return ConditionValues;
1020 }
1021 
1022 
1023 // Determine whether to split a scope depending on the sets of the branch
1024 // condition values of the previous region and the current region. We split
1025 // (return true) it if 1) the condition values of the inner/lower scope can't be
1026 // hoisted up to the outer/upper scope, or 2) the two sets of the condition
1027 // values have an empty intersection (because the combined branch conditions
1028 // won't probably lead to a simpler combined condition).
1029 static bool shouldSplit(Instruction *InsertPoint,
1030                         DenseSet<Value *> &PrevConditionValues,
1031                         DenseSet<Value *> &ConditionValues,
1032                         DominatorTree &DT,
1033                         DenseSet<Instruction *> &Unhoistables) {
1034   CHR_DEBUG(
1035       dbgs() << "shouldSplit " << *InsertPoint << " PrevConditionValues ";
1036       for (Value *V : PrevConditionValues) {
1037         dbgs() << *V << ", ";
1038       }
1039       dbgs() << " ConditionValues ";
1040       for (Value *V : ConditionValues) {
1041         dbgs() << *V << ", ";
1042       }
1043       dbgs() << "\n");
1044   assert(InsertPoint && "Null InsertPoint");
1045   // If any of Bases isn't hoistable to the hoist point, split.
1046   for (Value *V : ConditionValues) {
1047     if (!checkHoistValue(V, InsertPoint, DT, Unhoistables, nullptr)) {
1048       CHR_DEBUG(dbgs() << "Split. checkHoistValue false " << *V << "\n");
1049       return true; // Not hoistable, split.
1050     }
1051   }
1052   // If PrevConditionValues or ConditionValues is empty, don't split to avoid
1053   // unnecessary splits at scopes with no branch/selects.  If
1054   // PrevConditionValues and ConditionValues don't intersect at all, split.
1055   if (!PrevConditionValues.empty() && !ConditionValues.empty()) {
1056     // Use std::set as DenseSet doesn't work with set_intersection.
1057     std::set<Value *> PrevBases, Bases;
1058     for (Value *V : PrevConditionValues) {
1059       std::set<Value *> BaseValues = getBaseValues(V, DT);
1060       PrevBases.insert(BaseValues.begin(), BaseValues.end());
1061     }
1062     for (Value *V : ConditionValues) {
1063       std::set<Value *> BaseValues = getBaseValues(V, DT);
1064       Bases.insert(BaseValues.begin(), BaseValues.end());
1065     }
1066     CHR_DEBUG(
1067         dbgs() << "PrevBases ";
1068         for (Value *V : PrevBases) {
1069           dbgs() << *V << ", ";
1070         }
1071         dbgs() << " Bases ";
1072         for (Value *V : Bases) {
1073           dbgs() << *V << ", ";
1074         }
1075         dbgs() << "\n");
1076     std::set<Value *> Intersection;
1077     std::set_intersection(PrevBases.begin(), PrevBases.end(),
1078                           Bases.begin(), Bases.end(),
1079                           std::inserter(Intersection, Intersection.begin()));
1080     if (Intersection.empty()) {
1081       // Empty intersection, split.
1082       CHR_DEBUG(dbgs() << "Split. Intersection empty\n");
1083       return true;
1084     }
1085   }
1086   CHR_DEBUG(dbgs() << "No split\n");
1087   return false;  // Don't split.
1088 }
1089 
1090 static void GetSelectsInScope(CHRScope *Scope,
1091                               DenseSet<Instruction *> &Output) {
1092   for (RegInfo &RI : Scope->RegInfos) {
1093     for (SelectInst *SI : RI.Selects) {
1094       Output.insert(SI);
1095     }
1096   }
1097   for (CHRScope *Sub : Scope->Subs) {
1098     GetSelectsInScope(Sub, Output);
1099   }
1100 }
1101 
1102 void CHR::splitScopes(SmallVectorImpl<CHRScope *> &Input,
1103                       SmallVectorImpl<CHRScope *> &Output) {
1104   for (CHRScope *Scope : Input) {
1105     assert(!Scope->BranchInsertPoint &&
1106            "BranchInsertPoint must not be set");
1107     DenseSet<Instruction *> Unhoistables;
1108     GetSelectsInScope(Scope, Unhoistables);
1109     splitScope(Scope, nullptr, nullptr, nullptr, Output, Unhoistables);
1110   }
1111 #ifndef NDEBUG
1112   for (CHRScope *Scope : Output) {
1113     assert(Scope->BranchInsertPoint && "BranchInsertPoint must be set");
1114   }
1115 #endif
1116 }
1117 
1118 SmallVector<CHRScope *, 8> CHR::splitScope(
1119     CHRScope *Scope,
1120     CHRScope *Outer,
1121     DenseSet<Value *> *OuterConditionValues,
1122     Instruction *OuterInsertPoint,
1123     SmallVectorImpl<CHRScope *> &Output,
1124     DenseSet<Instruction *> &Unhoistables) {
1125   if (Outer) {
1126     assert(OuterConditionValues && "Null OuterConditionValues");
1127     assert(OuterInsertPoint && "Null OuterInsertPoint");
1128   }
1129   bool PrevSplitFromOuter = true;
1130   DenseSet<Value *> PrevConditionValues;
1131   Instruction *PrevInsertPoint = nullptr;
1132   SmallVector<CHRScope *, 8> Splits;
1133   SmallVector<bool, 8> SplitsSplitFromOuter;
1134   SmallVector<DenseSet<Value *>, 8> SplitsConditionValues;
1135   SmallVector<Instruction *, 8> SplitsInsertPoints;
1136   SmallVector<RegInfo, 8> RegInfos(Scope->RegInfos);  // Copy
1137   for (RegInfo &RI : RegInfos) {
1138     Instruction *InsertPoint = getBranchInsertPoint(RI);
1139     DenseSet<Value *> ConditionValues = getCHRConditionValuesForRegion(RI);
1140     CHR_DEBUG(
1141         dbgs() << "ConditionValues ";
1142         for (Value *V : ConditionValues) {
1143           dbgs() << *V << ", ";
1144         }
1145         dbgs() << "\n");
1146     if (RI.R == RegInfos[0].R) {
1147       // First iteration. Check to see if we should split from the outer.
1148       if (Outer) {
1149         CHR_DEBUG(dbgs() << "Outer " << *Outer << "\n");
1150         CHR_DEBUG(dbgs() << "Should split from outer at "
1151                   << RI.R->getNameStr() << "\n");
1152         if (shouldSplit(OuterInsertPoint, *OuterConditionValues,
1153                         ConditionValues, DT, Unhoistables)) {
1154           PrevConditionValues = ConditionValues;
1155           PrevInsertPoint = InsertPoint;
1156         } else {
1157           // Not splitting from the outer. Use the outer bases and insert
1158           // point. Union the bases.
1159           PrevSplitFromOuter = false;
1160           PrevConditionValues = *OuterConditionValues;
1161           PrevConditionValues.insert(ConditionValues.begin(),
1162                                      ConditionValues.end());
1163           PrevInsertPoint = OuterInsertPoint;
1164         }
1165       } else {
1166         CHR_DEBUG(dbgs() << "Outer null\n");
1167         PrevConditionValues = ConditionValues;
1168         PrevInsertPoint = InsertPoint;
1169       }
1170     } else {
1171       CHR_DEBUG(dbgs() << "Should split from prev at "
1172                 << RI.R->getNameStr() << "\n");
1173       if (shouldSplit(PrevInsertPoint, PrevConditionValues, ConditionValues,
1174                       DT, Unhoistables)) {
1175         CHRScope *Tail = Scope->split(RI.R);
1176         Scopes.insert(Tail);
1177         Splits.push_back(Scope);
1178         SplitsSplitFromOuter.push_back(PrevSplitFromOuter);
1179         SplitsConditionValues.push_back(PrevConditionValues);
1180         SplitsInsertPoints.push_back(PrevInsertPoint);
1181         Scope = Tail;
1182         PrevConditionValues = ConditionValues;
1183         PrevInsertPoint = InsertPoint;
1184         PrevSplitFromOuter = true;
1185       } else {
1186         // Not splitting. Union the bases. Keep the hoist point.
1187         PrevConditionValues.insert(ConditionValues.begin(), ConditionValues.end());
1188       }
1189     }
1190   }
1191   Splits.push_back(Scope);
1192   SplitsSplitFromOuter.push_back(PrevSplitFromOuter);
1193   SplitsConditionValues.push_back(PrevConditionValues);
1194   assert(PrevInsertPoint && "Null PrevInsertPoint");
1195   SplitsInsertPoints.push_back(PrevInsertPoint);
1196   assert(Splits.size() == SplitsConditionValues.size() &&
1197          Splits.size() == SplitsSplitFromOuter.size() &&
1198          Splits.size() == SplitsInsertPoints.size() && "Mismatching sizes");
1199   for (size_t I = 0; I < Splits.size(); ++I) {
1200     CHRScope *Split = Splits[I];
1201     DenseSet<Value *> &SplitConditionValues = SplitsConditionValues[I];
1202     Instruction *SplitInsertPoint = SplitsInsertPoints[I];
1203     SmallVector<CHRScope *, 8> NewSubs;
1204     DenseSet<Instruction *> SplitUnhoistables;
1205     GetSelectsInScope(Split, SplitUnhoistables);
1206     for (CHRScope *Sub : Split->Subs) {
1207       SmallVector<CHRScope *, 8> SubSplits = splitScope(
1208           Sub, Split, &SplitConditionValues, SplitInsertPoint, Output,
1209           SplitUnhoistables);
1210       NewSubs.insert(NewSubs.end(), SubSplits.begin(), SubSplits.end());
1211     }
1212     Split->Subs = NewSubs;
1213   }
1214   SmallVector<CHRScope *, 8> Result;
1215   for (size_t I = 0; I < Splits.size(); ++I) {
1216     CHRScope *Split = Splits[I];
1217     if (SplitsSplitFromOuter[I]) {
1218       // Split from the outer.
1219       Output.push_back(Split);
1220       Split->BranchInsertPoint = SplitsInsertPoints[I];
1221       CHR_DEBUG(dbgs() << "BranchInsertPoint " << *SplitsInsertPoints[I]
1222                 << "\n");
1223     } else {
1224       // Connected to the outer.
1225       Result.push_back(Split);
1226     }
1227   }
1228   if (!Outer)
1229     assert(Result.empty() &&
1230            "If no outer (top-level), must return no nested ones");
1231   return Result;
1232 }
1233 
1234 void CHR::classifyBiasedScopes(SmallVectorImpl<CHRScope *> &Scopes) {
1235   for (CHRScope *Scope : Scopes) {
1236     assert(Scope->TrueBiasedRegions.empty() && Scope->FalseBiasedRegions.empty() && "Empty");
1237     classifyBiasedScopes(Scope, Scope);
1238     CHR_DEBUG(
1239         dbgs() << "classifyBiasedScopes " << *Scope << "\n";
1240         dbgs() << "TrueBiasedRegions ";
1241         for (Region *R : Scope->TrueBiasedRegions) {
1242           dbgs() << R->getNameStr() << ", ";
1243         }
1244         dbgs() << "\n";
1245         dbgs() << "FalseBiasedRegions ";
1246         for (Region *R : Scope->FalseBiasedRegions) {
1247           dbgs() << R->getNameStr() << ", ";
1248         }
1249         dbgs() << "\n";
1250         dbgs() << "TrueBiasedSelects ";
1251         for (SelectInst *SI : Scope->TrueBiasedSelects) {
1252           dbgs() << *SI << ", ";
1253         }
1254         dbgs() << "\n";
1255         dbgs() << "FalseBiasedSelects ";
1256         for (SelectInst *SI : Scope->FalseBiasedSelects) {
1257           dbgs() << *SI << ", ";
1258         }
1259         dbgs() << "\n";);
1260   }
1261 }
1262 
1263 void CHR::classifyBiasedScopes(CHRScope *Scope, CHRScope *OutermostScope) {
1264   for (RegInfo &RI : Scope->RegInfos) {
1265     if (RI.HasBranch) {
1266       Region *R = RI.R;
1267       if (TrueBiasedRegionsGlobal.count(R) > 0)
1268         OutermostScope->TrueBiasedRegions.insert(R);
1269       else if (FalseBiasedRegionsGlobal.count(R) > 0)
1270         OutermostScope->FalseBiasedRegions.insert(R);
1271       else
1272         llvm_unreachable("Must be biased");
1273     }
1274     for (SelectInst *SI : RI.Selects) {
1275       if (TrueBiasedSelectsGlobal.count(SI) > 0)
1276         OutermostScope->TrueBiasedSelects.insert(SI);
1277       else if (FalseBiasedSelectsGlobal.count(SI) > 0)
1278         OutermostScope->FalseBiasedSelects.insert(SI);
1279       else
1280         llvm_unreachable("Must be biased");
1281     }
1282   }
1283   for (CHRScope *Sub : Scope->Subs) {
1284     classifyBiasedScopes(Sub, OutermostScope);
1285   }
1286 }
1287 
1288 static bool hasAtLeastTwoBiasedBranches(CHRScope *Scope) {
1289   unsigned NumBiased = Scope->TrueBiasedRegions.size() +
1290                        Scope->FalseBiasedRegions.size() +
1291                        Scope->TrueBiasedSelects.size() +
1292                        Scope->FalseBiasedSelects.size();
1293   return NumBiased >= CHRMergeThreshold;
1294 }
1295 
1296 void CHR::filterScopes(SmallVectorImpl<CHRScope *> &Input,
1297                        SmallVectorImpl<CHRScope *> &Output) {
1298   for (CHRScope *Scope : Input) {
1299     // Filter out the ones with only one region and no subs.
1300     if (!hasAtLeastTwoBiasedBranches(Scope)) {
1301       CHR_DEBUG(dbgs() << "Filtered out by biased branches truthy-regions "
1302                 << Scope->TrueBiasedRegions.size()
1303                 << " falsy-regions " << Scope->FalseBiasedRegions.size()
1304                 << " true-selects " << Scope->TrueBiasedSelects.size()
1305                 << " false-selects " << Scope->FalseBiasedSelects.size() << "\n");
1306       continue;
1307     }
1308     Output.push_back(Scope);
1309   }
1310 }
1311 
1312 void CHR::setCHRRegions(SmallVectorImpl<CHRScope *> &Input,
1313                         SmallVectorImpl<CHRScope *> &Output) {
1314   for (CHRScope *Scope : Input) {
1315     assert(Scope->HoistStopMap.empty() && Scope->CHRRegions.empty() &&
1316            "Empty");
1317     setCHRRegions(Scope, Scope);
1318     Output.push_back(Scope);
1319     CHR_DEBUG(
1320         dbgs() << "setCHRRegions HoistStopMap " << *Scope << "\n";
1321         for (auto pair : Scope->HoistStopMap) {
1322           Region *R = pair.first;
1323           dbgs() << "Region " << R->getNameStr() << "\n";
1324           for (Instruction *I : pair.second) {
1325             dbgs() << "HoistStop " << *I << "\n";
1326           }
1327         }
1328         dbgs() << "CHRRegions" << "\n";
1329         for (RegInfo &RI : Scope->CHRRegions) {
1330           dbgs() << RI.R->getNameStr() << "\n";
1331         });
1332   }
1333 }
1334 
1335 void CHR::setCHRRegions(CHRScope *Scope, CHRScope *OutermostScope) {
1336   DenseSet<Instruction *> Unhoistables;
1337   // Put the biased selects in Unhoistables because they should stay where they
1338   // are and constant-folded after CHR (in case one biased select or a branch
1339   // can depend on another biased select.)
1340   for (RegInfo &RI : Scope->RegInfos) {
1341     for (SelectInst *SI : RI.Selects) {
1342       Unhoistables.insert(SI);
1343     }
1344   }
1345   Instruction *InsertPoint = OutermostScope->BranchInsertPoint;
1346   for (RegInfo &RI : Scope->RegInfos) {
1347     Region *R = RI.R;
1348     DenseSet<Instruction *> HoistStops;
1349     bool IsHoisted = false;
1350     if (RI.HasBranch) {
1351       assert((OutermostScope->TrueBiasedRegions.count(R) > 0 ||
1352               OutermostScope->FalseBiasedRegions.count(R) > 0) &&
1353              "Must be truthy or falsy");
1354       auto *BI = cast<BranchInst>(R->getEntry()->getTerminator());
1355       // Note checkHoistValue fills in HoistStops.
1356       bool IsHoistable = checkHoistValue(BI->getCondition(), InsertPoint, DT,
1357                                          Unhoistables, &HoistStops);
1358       assert(IsHoistable && "Must be hoistable");
1359       (void)(IsHoistable);  // Unused in release build
1360       IsHoisted = true;
1361     }
1362     for (SelectInst *SI : RI.Selects) {
1363       assert((OutermostScope->TrueBiasedSelects.count(SI) > 0 ||
1364               OutermostScope->FalseBiasedSelects.count(SI) > 0) &&
1365              "Must be true or false biased");
1366       // Note checkHoistValue fills in HoistStops.
1367       bool IsHoistable = checkHoistValue(SI->getCondition(), InsertPoint, DT,
1368                                          Unhoistables, &HoistStops);
1369       assert(IsHoistable && "Must be hoistable");
1370       (void)(IsHoistable);  // Unused in release build
1371       IsHoisted = true;
1372     }
1373     if (IsHoisted) {
1374       OutermostScope->CHRRegions.push_back(RI);
1375       OutermostScope->HoistStopMap[R] = HoistStops;
1376     }
1377   }
1378   for (CHRScope *Sub : Scope->Subs)
1379     setCHRRegions(Sub, OutermostScope);
1380 }
1381 
1382 bool CHRScopeSorter(CHRScope *Scope1, CHRScope *Scope2) {
1383   return Scope1->RegInfos[0].R->getDepth() < Scope2->RegInfos[0].R->getDepth();
1384 }
1385 
1386 void CHR::sortScopes(SmallVectorImpl<CHRScope *> &Input,
1387                      SmallVectorImpl<CHRScope *> &Output) {
1388   Output.resize(Input.size());
1389   std::copy(Input.begin(), Input.end(), Output.begin());
1390   std::stable_sort(Output.begin(), Output.end(), CHRScopeSorter);
1391 }
1392 
1393 // Return true if V is already hoisted or was hoisted (along with its operands)
1394 // to the insert point.
1395 static void hoistValue(Value *V, Instruction *HoistPoint, Region *R,
1396                        HoistStopMapTy &HoistStopMap,
1397                        DenseSet<Instruction *> &HoistedSet,
1398                        DenseSet<PHINode *> &TrivialPHIs) {
1399   auto IT = HoistStopMap.find(R);
1400   assert(IT != HoistStopMap.end() && "Region must be in hoist stop map");
1401   DenseSet<Instruction *> &HoistStops = IT->second;
1402   if (auto *I = dyn_cast<Instruction>(V)) {
1403     if (I == HoistPoint)
1404       return;
1405     if (HoistStops.count(I))
1406       return;
1407     if (auto *PN = dyn_cast<PHINode>(I))
1408       if (TrivialPHIs.count(PN))
1409         // The trivial phi inserted by the previous CHR scope could replace a
1410         // non-phi in HoistStops. Note that since this phi is at the exit of a
1411         // previous CHR scope, which dominates this scope, it's safe to stop
1412         // hoisting there.
1413         return;
1414     if (HoistedSet.count(I))
1415       // Already hoisted, return.
1416       return;
1417     assert(isHoistableInstructionType(I) && "Unhoistable instruction type");
1418     for (Value *Op : I->operands()) {
1419       hoistValue(Op, HoistPoint, R, HoistStopMap, HoistedSet, TrivialPHIs);
1420     }
1421     I->moveBefore(HoistPoint);
1422     HoistedSet.insert(I);
1423     CHR_DEBUG(dbgs() << "hoistValue " << *I << "\n");
1424   }
1425 }
1426 
1427 // Hoist the dependent condition values of the branches and the selects in the
1428 // scope to the insert point.
1429 static void hoistScopeConditions(CHRScope *Scope, Instruction *HoistPoint,
1430                                  DenseSet<PHINode *> &TrivialPHIs) {
1431   DenseSet<Instruction *> HoistedSet;
1432   for (const RegInfo &RI : Scope->CHRRegions) {
1433     Region *R = RI.R;
1434     bool IsTrueBiased = Scope->TrueBiasedRegions.count(R);
1435     bool IsFalseBiased = Scope->FalseBiasedRegions.count(R);
1436     if (RI.HasBranch && (IsTrueBiased || IsFalseBiased)) {
1437       auto *BI = cast<BranchInst>(R->getEntry()->getTerminator());
1438       hoistValue(BI->getCondition(), HoistPoint, R, Scope->HoistStopMap,
1439                  HoistedSet, TrivialPHIs);
1440     }
1441     for (SelectInst *SI : RI.Selects) {
1442       bool IsTrueBiased = Scope->TrueBiasedSelects.count(SI);
1443       bool IsFalseBiased = Scope->FalseBiasedSelects.count(SI);
1444       if (!(IsTrueBiased || IsFalseBiased))
1445         continue;
1446       hoistValue(SI->getCondition(), HoistPoint, R, Scope->HoistStopMap,
1447                  HoistedSet, TrivialPHIs);
1448     }
1449   }
1450 }
1451 
1452 // Negate the predicate if an ICmp if it's used only by branches or selects by
1453 // swapping the operands of the branches or the selects. Returns true if success.
1454 static bool NegateICmpIfUsedByBranchOrSelectOnly(ICmpInst *ICmp,
1455                                                  Instruction *ExcludedUser,
1456                                                  CHRScope *Scope) {
1457   for (User *U : ICmp->users()) {
1458     if (U == ExcludedUser)
1459       continue;
1460     if (isa<BranchInst>(U) && cast<BranchInst>(U)->isConditional())
1461       continue;
1462     if (isa<SelectInst>(U) && cast<SelectInst>(U)->getCondition() == ICmp)
1463       continue;
1464     return false;
1465   }
1466   for (User *U : ICmp->users()) {
1467     if (U == ExcludedUser)
1468       continue;
1469     if (auto *BI = dyn_cast<BranchInst>(U)) {
1470       assert(BI->isConditional() && "Must be conditional");
1471       BI->swapSuccessors();
1472       // Don't need to swap this in terms of
1473       // TrueBiasedRegions/FalseBiasedRegions because true-based/false-based
1474       // mean whehter the branch is likely go into the if-then rather than
1475       // successor0/successor1 and because we can tell which edge is the then or
1476       // the else one by comparing the destination to the region exit block.
1477       continue;
1478     }
1479     if (auto *SI = dyn_cast<SelectInst>(U)) {
1480       // Swap operands
1481       Value *TrueValue = SI->getTrueValue();
1482       Value *FalseValue = SI->getFalseValue();
1483       SI->setTrueValue(FalseValue);
1484       SI->setFalseValue(TrueValue);
1485       SI->swapProfMetadata();
1486       if (Scope->TrueBiasedSelects.count(SI)) {
1487         assert(Scope->FalseBiasedSelects.count(SI) == 0 &&
1488                "Must not be already in");
1489         Scope->FalseBiasedSelects.insert(SI);
1490       } else if (Scope->FalseBiasedSelects.count(SI)) {
1491         assert(Scope->TrueBiasedSelects.count(SI) == 0 &&
1492                "Must not be already in");
1493         Scope->TrueBiasedSelects.insert(SI);
1494       }
1495       continue;
1496     }
1497     llvm_unreachable("Must be a branch or a select");
1498   }
1499   ICmp->setPredicate(CmpInst::getInversePredicate(ICmp->getPredicate()));
1500   return true;
1501 }
1502 
1503 // A helper for transformScopes. Insert a trivial phi at the scope exit block
1504 // for a value that's defined in the scope but used outside it (meaning it's
1505 // alive at the exit block).
1506 static void insertTrivialPHIs(CHRScope *Scope,
1507                               BasicBlock *EntryBlock, BasicBlock *ExitBlock,
1508                               DenseSet<PHINode *> &TrivialPHIs) {
1509   DenseSet<BasicBlock *> BlocksInScopeSet;
1510   SmallVector<BasicBlock *, 8> BlocksInScopeVec;
1511   for (RegInfo &RI : Scope->RegInfos) {
1512     for (BasicBlock *BB : RI.R->blocks()) { // This includes the blocks in the
1513                                             // sub-Scopes.
1514       BlocksInScopeSet.insert(BB);
1515       BlocksInScopeVec.push_back(BB);
1516     }
1517   }
1518   CHR_DEBUG(
1519       dbgs() << "Inserting redudant phis\n";
1520       for (BasicBlock *BB : BlocksInScopeVec) {
1521         dbgs() << "BlockInScope " << BB->getName() << "\n";
1522       });
1523   for (BasicBlock *BB : BlocksInScopeVec) {
1524     for (Instruction &I : *BB) {
1525       SmallVector<Instruction *, 8> Users;
1526       for (User *U : I.users()) {
1527         if (auto *UI = dyn_cast<Instruction>(U)) {
1528           if (BlocksInScopeSet.count(UI->getParent()) == 0 &&
1529               // Unless there's already a phi for I at the exit block.
1530               !(isa<PHINode>(UI) && UI->getParent() == ExitBlock)) {
1531             CHR_DEBUG(dbgs() << "V " << I << "\n");
1532             CHR_DEBUG(dbgs() << "Used outside scope by user " << *UI << "\n");
1533             Users.push_back(UI);
1534           } else if (UI->getParent() == EntryBlock && isa<PHINode>(UI)) {
1535             // There's a loop backedge from a block that's dominated by this
1536             // scope to the entry block.
1537             CHR_DEBUG(dbgs() << "V " << I << "\n");
1538             CHR_DEBUG(dbgs()
1539                       << "Used at entry block (for a back edge) by a phi user "
1540                       << *UI << "\n");
1541             Users.push_back(UI);
1542           }
1543         }
1544       }
1545       if (Users.size() > 0) {
1546         // Insert a trivial phi for I (phi [&I, P0], [&I, P1], ...) at
1547         // ExitBlock. Replace I with the new phi in UI unless UI is another
1548         // phi at ExitBlock.
1549         unsigned PredCount = std::distance(pred_begin(ExitBlock),
1550                                            pred_end(ExitBlock));
1551         PHINode *PN = PHINode::Create(I.getType(), PredCount, "",
1552                                       &ExitBlock->front());
1553         for (BasicBlock *Pred : predecessors(ExitBlock)) {
1554           PN->addIncoming(&I, Pred);
1555         }
1556         TrivialPHIs.insert(PN);
1557         CHR_DEBUG(dbgs() << "Insert phi " << *PN << "\n");
1558         for (Instruction *UI : Users) {
1559           for (unsigned J = 0, NumOps = UI->getNumOperands(); J < NumOps; ++J) {
1560             if (UI->getOperand(J) == &I) {
1561               UI->setOperand(J, PN);
1562             }
1563           }
1564           CHR_DEBUG(dbgs() << "Updated user " << *UI << "\n");
1565         }
1566       }
1567     }
1568   }
1569 }
1570 
1571 // Assert that all the CHR regions of the scope have a biased branch or select.
1572 static void LLVM_ATTRIBUTE_UNUSED
1573 assertCHRRegionsHaveBiasedBranchOrSelect(CHRScope *Scope) {
1574 #ifndef NDEBUG
1575   auto HasBiasedBranchOrSelect = [](RegInfo &RI, CHRScope *Scope) {
1576     if (Scope->TrueBiasedRegions.count(RI.R) ||
1577         Scope->FalseBiasedRegions.count(RI.R))
1578       return true;
1579     for (SelectInst *SI : RI.Selects)
1580       if (Scope->TrueBiasedSelects.count(SI) ||
1581           Scope->FalseBiasedSelects.count(SI))
1582         return true;
1583     return false;
1584   };
1585   for (RegInfo &RI : Scope->CHRRegions) {
1586     assert(HasBiasedBranchOrSelect(RI, Scope) &&
1587            "Must have biased branch or select");
1588   }
1589 #endif
1590 }
1591 
1592 // Assert that all the condition values of the biased branches and selects have
1593 // been hoisted to the pre-entry block or outside of the scope.
1594 static void LLVM_ATTRIBUTE_UNUSED assertBranchOrSelectConditionHoisted(
1595     CHRScope *Scope, BasicBlock *PreEntryBlock) {
1596   CHR_DEBUG(dbgs() << "Biased regions condition values \n");
1597   for (RegInfo &RI : Scope->CHRRegions) {
1598     Region *R = RI.R;
1599     bool IsTrueBiased = Scope->TrueBiasedRegions.count(R);
1600     bool IsFalseBiased = Scope->FalseBiasedRegions.count(R);
1601     if (RI.HasBranch && (IsTrueBiased || IsFalseBiased)) {
1602       auto *BI = cast<BranchInst>(R->getEntry()->getTerminator());
1603       Value *V = BI->getCondition();
1604       CHR_DEBUG(dbgs() << *V << "\n");
1605       if (auto *I = dyn_cast<Instruction>(V)) {
1606         (void)(I); // Unused in release build.
1607         assert((I->getParent() == PreEntryBlock ||
1608                 !Scope->contains(I)) &&
1609                "Must have been hoisted to PreEntryBlock or outside the scope");
1610       }
1611     }
1612     for (SelectInst *SI : RI.Selects) {
1613       bool IsTrueBiased = Scope->TrueBiasedSelects.count(SI);
1614       bool IsFalseBiased = Scope->FalseBiasedSelects.count(SI);
1615       if (!(IsTrueBiased || IsFalseBiased))
1616         continue;
1617       Value *V = SI->getCondition();
1618       CHR_DEBUG(dbgs() << *V << "\n");
1619       if (auto *I = dyn_cast<Instruction>(V)) {
1620         (void)(I); // Unused in release build.
1621         assert((I->getParent() == PreEntryBlock ||
1622                 !Scope->contains(I)) &&
1623                "Must have been hoisted to PreEntryBlock or outside the scope");
1624       }
1625     }
1626   }
1627 }
1628 
1629 void CHR::transformScopes(CHRScope *Scope, DenseSet<PHINode *> &TrivialPHIs) {
1630   CHR_DEBUG(dbgs() << "transformScopes " << *Scope << "\n");
1631 
1632   assert(Scope->RegInfos.size() >= 1 && "Should have at least one Region");
1633   Region *FirstRegion = Scope->RegInfos[0].R;
1634   BasicBlock *EntryBlock = FirstRegion->getEntry();
1635   Region *LastRegion = Scope->RegInfos[Scope->RegInfos.size() - 1].R;
1636   BasicBlock *ExitBlock = LastRegion->getExit();
1637   Optional<uint64_t> ProfileCount = BFI.getBlockProfileCount(EntryBlock);
1638 
1639   if (ExitBlock) {
1640     // Insert a trivial phi at the exit block (where the CHR hot path and the
1641     // cold path merges) for a value that's defined in the scope but used
1642     // outside it (meaning it's alive at the exit block). We will add the
1643     // incoming values for the CHR cold paths to it below. Without this, we'd
1644     // miss updating phi's for such values unless there happens to already be a
1645     // phi for that value there.
1646     insertTrivialPHIs(Scope, EntryBlock, ExitBlock, TrivialPHIs);
1647   }
1648 
1649   // Split the entry block of the first region. The new block becomes the new
1650   // entry block of the first region. The old entry block becomes the block to
1651   // insert the CHR branch into. Note DT gets updated. Since DT gets updated
1652   // through the split, we update the entry of the first region after the split,
1653   // and Region only points to the entry and the exit blocks, rather than
1654   // keeping everything in a list or set, the blocks membership and the
1655   // entry/exit blocks of the region are still valid after the split.
1656   CHR_DEBUG(dbgs() << "Splitting entry block " << EntryBlock->getName()
1657             << " at " << *Scope->BranchInsertPoint << "\n");
1658   BasicBlock *NewEntryBlock =
1659       SplitBlock(EntryBlock, Scope->BranchInsertPoint, &DT);
1660   assert(NewEntryBlock->getSinglePredecessor() == EntryBlock &&
1661          "NewEntryBlock's only pred must be EntryBlock");
1662   FirstRegion->replaceEntryRecursive(NewEntryBlock);
1663   BasicBlock *PreEntryBlock = EntryBlock;
1664 
1665   ValueToValueMapTy VMap;
1666   // Clone the blocks in the scope (excluding the PreEntryBlock) to split into a
1667   // hot path (originals) and a cold path (clones) and update the PHIs at the
1668   // exit block.
1669   cloneScopeBlocks(Scope, PreEntryBlock, ExitBlock, LastRegion, VMap);
1670 
1671   // Replace the old (placeholder) branch with the new (merged) conditional
1672   // branch.
1673   BranchInst *MergedBr = createMergedBranch(PreEntryBlock, EntryBlock,
1674                                             NewEntryBlock, VMap);
1675 
1676 #ifndef NDEBUG
1677   assertCHRRegionsHaveBiasedBranchOrSelect(Scope);
1678 #endif
1679 
1680   // Hoist the conditional values of the branches/selects.
1681   hoistScopeConditions(Scope, PreEntryBlock->getTerminator(), TrivialPHIs);
1682 
1683 #ifndef NDEBUG
1684   assertBranchOrSelectConditionHoisted(Scope, PreEntryBlock);
1685 #endif
1686 
1687   // Create the combined branch condition and constant-fold the branches/selects
1688   // in the hot path.
1689   fixupBranchesAndSelects(Scope, PreEntryBlock, MergedBr,
1690                           ProfileCount ? ProfileCount.getValue() : 0);
1691 }
1692 
1693 // A helper for transformScopes. Clone the blocks in the scope (excluding the
1694 // PreEntryBlock) to split into a hot path and a cold path and update the PHIs
1695 // at the exit block.
1696 void CHR::cloneScopeBlocks(CHRScope *Scope,
1697                            BasicBlock *PreEntryBlock,
1698                            BasicBlock *ExitBlock,
1699                            Region *LastRegion,
1700                            ValueToValueMapTy &VMap) {
1701   // Clone all the blocks. The original blocks will be the hot-path
1702   // CHR-optimized code and the cloned blocks will be the original unoptimized
1703   // code. This is so that the block pointers from the
1704   // CHRScope/Region/RegionInfo can stay valid in pointing to the hot-path code
1705   // which CHR should apply to.
1706   SmallVector<BasicBlock*, 8> NewBlocks;
1707   for (RegInfo &RI : Scope->RegInfos)
1708     for (BasicBlock *BB : RI.R->blocks()) { // This includes the blocks in the
1709                                             // sub-Scopes.
1710       assert(BB != PreEntryBlock && "Don't copy the preetntry block");
1711       BasicBlock *NewBB = CloneBasicBlock(BB, VMap, ".nonchr", &F);
1712       NewBlocks.push_back(NewBB);
1713       VMap[BB] = NewBB;
1714     }
1715 
1716   // Place the cloned blocks right after the original blocks (right before the
1717   // exit block of.)
1718   if (ExitBlock)
1719     F.getBasicBlockList().splice(ExitBlock->getIterator(),
1720                                  F.getBasicBlockList(),
1721                                  NewBlocks[0]->getIterator(), F.end());
1722 
1723   // Update the cloned blocks/instructions to refer to themselves.
1724   for (unsigned i = 0, e = NewBlocks.size(); i != e; ++i)
1725     for (Instruction &I : *NewBlocks[i])
1726       RemapInstruction(&I, VMap,
1727                        RF_NoModuleLevelChanges | RF_IgnoreMissingLocals);
1728 
1729   // Add the cloned blocks to the PHIs of the exit blocks. ExitBlock is null for
1730   // the top-level region but we don't need to add PHIs. The trivial PHIs
1731   // inserted above will be updated here.
1732   if (ExitBlock)
1733     for (PHINode &PN : ExitBlock->phis())
1734       for (unsigned I = 0, NumOps = PN.getNumIncomingValues(); I < NumOps;
1735            ++I) {
1736         BasicBlock *Pred = PN.getIncomingBlock(I);
1737         if (LastRegion->contains(Pred)) {
1738           Value *V = PN.getIncomingValue(I);
1739           auto It = VMap.find(V);
1740           if (It != VMap.end()) V = It->second;
1741           assert(VMap.find(Pred) != VMap.end() && "Pred must have been cloned");
1742           PN.addIncoming(V, cast<BasicBlock>(VMap[Pred]));
1743         }
1744       }
1745 }
1746 
1747 // A helper for transformScope. Replace the old (placeholder) branch with the
1748 // new (merged) conditional branch.
1749 BranchInst *CHR::createMergedBranch(BasicBlock *PreEntryBlock,
1750                                     BasicBlock *EntryBlock,
1751                                     BasicBlock *NewEntryBlock,
1752                                     ValueToValueMapTy &VMap) {
1753   BranchInst *OldBR = cast<BranchInst>(PreEntryBlock->getTerminator());
1754   assert(OldBR->isUnconditional() && OldBR->getSuccessor(0) == NewEntryBlock &&
1755          "SplitBlock did not work correctly!");
1756   assert(NewEntryBlock->getSinglePredecessor() == EntryBlock &&
1757          "NewEntryBlock's only pred must be EntryBlock");
1758   assert(VMap.find(NewEntryBlock) != VMap.end() &&
1759          "NewEntryBlock must have been copied");
1760   OldBR->dropAllReferences();
1761   OldBR->eraseFromParent();
1762   // The true predicate is a placeholder. It will be replaced later in
1763   // fixupBranchesAndSelects().
1764   BranchInst *NewBR = BranchInst::Create(NewEntryBlock,
1765                                          cast<BasicBlock>(VMap[NewEntryBlock]),
1766                                          ConstantInt::getTrue(F.getContext()));
1767   PreEntryBlock->getInstList().push_back(NewBR);
1768   assert(NewEntryBlock->getSinglePredecessor() == EntryBlock &&
1769          "NewEntryBlock's only pred must be EntryBlock");
1770   return NewBR;
1771 }
1772 
1773 // A helper for transformScopes. Create the combined branch condition and
1774 // constant-fold the branches/selects in the hot path.
1775 void CHR::fixupBranchesAndSelects(CHRScope *Scope,
1776                                   BasicBlock *PreEntryBlock,
1777                                   BranchInst *MergedBR,
1778                                   uint64_t ProfileCount) {
1779   Value *MergedCondition = ConstantInt::getTrue(F.getContext());
1780   BranchProbability CHRBranchBias(1, 1);
1781   uint64_t NumCHRedBranches = 0;
1782   IRBuilder<> IRB(PreEntryBlock->getTerminator());
1783   for (RegInfo &RI : Scope->CHRRegions) {
1784     Region *R = RI.R;
1785     if (RI.HasBranch) {
1786       fixupBranch(R, Scope, IRB, MergedCondition, CHRBranchBias);
1787       ++NumCHRedBranches;
1788     }
1789     for (SelectInst *SI : RI.Selects) {
1790       fixupSelect(SI, Scope, IRB, MergedCondition, CHRBranchBias);
1791       ++NumCHRedBranches;
1792     }
1793   }
1794   Stats.NumBranchesDelta += NumCHRedBranches - 1;
1795   Stats.WeightedNumBranchesDelta += (NumCHRedBranches - 1) * ProfileCount;
1796   MergedBR->setCondition(MergedCondition);
1797   SmallVector<uint32_t, 2> Weights;
1798   Weights.push_back(static_cast<uint32_t>(CHRBranchBias.scale(1000)));
1799   Weights.push_back(static_cast<uint32_t>(CHRBranchBias.getCompl().scale(1000)));
1800   MDBuilder MDB(F.getContext());
1801   MergedBR->setMetadata(LLVMContext::MD_prof, MDB.createBranchWeights(Weights));
1802   CHR_DEBUG(dbgs() << "CHR branch bias " << Weights[0] << ":" << Weights[1]
1803             << "\n");
1804 }
1805 
1806 // A helper for fixupBranchesAndSelects. Add to the combined branch condition
1807 // and constant-fold a branch in the hot path.
1808 void CHR::fixupBranch(Region *R, CHRScope *Scope,
1809                       IRBuilder<> &IRB,
1810                       Value *&MergedCondition,
1811                       BranchProbability &CHRBranchBias) {
1812   bool IsTrueBiased = Scope->TrueBiasedRegions.count(R);
1813   assert((IsTrueBiased || Scope->FalseBiasedRegions.count(R)) &&
1814          "Must be truthy or falsy");
1815   auto *BI = cast<BranchInst>(R->getEntry()->getTerminator());
1816   assert(BranchBiasMap.find(R) != BranchBiasMap.end() &&
1817          "Must be in the bias map");
1818   BranchProbability Bias = BranchBiasMap[R];
1819   assert(Bias >= getCHRBiasThreshold() && "Must be highly biased");
1820   // Take the min.
1821   if (CHRBranchBias > Bias)
1822     CHRBranchBias = Bias;
1823   BasicBlock *IfThen = BI->getSuccessor(1);
1824   BasicBlock *IfElse = BI->getSuccessor(0);
1825   BasicBlock *RegionExitBlock = R->getExit();
1826   assert(RegionExitBlock && "Null ExitBlock");
1827   assert((IfThen == RegionExitBlock || IfElse == RegionExitBlock) &&
1828          IfThen != IfElse && "Invariant from findScopes");
1829   if (IfThen == RegionExitBlock) {
1830     // Swap them so that IfThen means going into it and IfElse means skipping
1831     // it.
1832     std::swap(IfThen, IfElse);
1833   }
1834   CHR_DEBUG(dbgs() << "IfThen " << IfThen->getName()
1835             << " IfElse " << IfElse->getName() << "\n");
1836   Value *Cond = BI->getCondition();
1837   BasicBlock *HotTarget = IsTrueBiased ? IfThen : IfElse;
1838   bool ConditionTrue = HotTarget == BI->getSuccessor(0);
1839   addToMergedCondition(ConditionTrue, Cond, BI, Scope, IRB,
1840                        MergedCondition);
1841   // Constant-fold the branch at ClonedEntryBlock.
1842   assert(ConditionTrue == (HotTarget == BI->getSuccessor(0)) &&
1843          "The successor shouldn't change");
1844   Value *NewCondition = ConditionTrue ?
1845                         ConstantInt::getTrue(F.getContext()) :
1846                         ConstantInt::getFalse(F.getContext());
1847   BI->setCondition(NewCondition);
1848 }
1849 
1850 // A helper for fixupBranchesAndSelects. Add to the combined branch condition
1851 // and constant-fold a select in the hot path.
1852 void CHR::fixupSelect(SelectInst *SI, CHRScope *Scope,
1853                       IRBuilder<> &IRB,
1854                       Value *&MergedCondition,
1855                       BranchProbability &CHRBranchBias) {
1856   bool IsTrueBiased = Scope->TrueBiasedSelects.count(SI);
1857   assert((IsTrueBiased ||
1858           Scope->FalseBiasedSelects.count(SI)) && "Must be biased");
1859   assert(SelectBiasMap.find(SI) != SelectBiasMap.end() &&
1860          "Must be in the bias map");
1861   BranchProbability Bias = SelectBiasMap[SI];
1862   assert(Bias >= getCHRBiasThreshold() && "Must be highly biased");
1863   // Take the min.
1864   if (CHRBranchBias > Bias)
1865     CHRBranchBias = Bias;
1866   Value *Cond = SI->getCondition();
1867   addToMergedCondition(IsTrueBiased, Cond, SI, Scope, IRB,
1868                        MergedCondition);
1869   Value *NewCondition = IsTrueBiased ?
1870                         ConstantInt::getTrue(F.getContext()) :
1871                         ConstantInt::getFalse(F.getContext());
1872   SI->setCondition(NewCondition);
1873 }
1874 
1875 // A helper for fixupBranch/fixupSelect. Add a branch condition to the merged
1876 // condition.
1877 void CHR::addToMergedCondition(bool IsTrueBiased, Value *Cond,
1878                                Instruction *BranchOrSelect,
1879                                CHRScope *Scope,
1880                                IRBuilder<> &IRB,
1881                                Value *&MergedCondition) {
1882   if (IsTrueBiased) {
1883     MergedCondition = IRB.CreateAnd(MergedCondition, Cond);
1884   } else {
1885     // If Cond is an icmp and all users of V except for BranchOrSelect is a
1886     // branch, negate the icmp predicate and swap the branch targets and avoid
1887     // inserting an Xor to negate Cond.
1888     bool Done = false;
1889     if (auto *ICmp = dyn_cast<ICmpInst>(Cond))
1890       if (NegateICmpIfUsedByBranchOrSelectOnly(ICmp, BranchOrSelect, Scope)) {
1891         MergedCondition = IRB.CreateAnd(MergedCondition, Cond);
1892         Done = true;
1893       }
1894     if (!Done) {
1895       Value *Negate = IRB.CreateXor(
1896           ConstantInt::getTrue(F.getContext()), Cond);
1897       MergedCondition = IRB.CreateAnd(MergedCondition, Negate);
1898     }
1899   }
1900 }
1901 
1902 void CHR::transformScopes(SmallVectorImpl<CHRScope *> &CHRScopes) {
1903   unsigned i = 0;
1904   (void)(i); // Unused in release build.
1905   DenseSet<PHINode *> TrivialPHIs;
1906   for (CHRScope *Scope : CHRScopes) {
1907     transformScopes(Scope, TrivialPHIs);
1908     CHR_DEBUG(
1909         std::ostringstream oss;
1910         oss << " after transformScopes " << i++;
1911         dumpIR(F, oss.str().c_str(), nullptr));
1912   }
1913 }
1914 
1915 static void LLVM_ATTRIBUTE_UNUSED
1916 dumpScopes(SmallVectorImpl<CHRScope *> &Scopes, const char *Label) {
1917   dbgs() << Label << " " << Scopes.size() << "\n";
1918   for (CHRScope *Scope : Scopes) {
1919     dbgs() << *Scope << "\n";
1920   }
1921 }
1922 
1923 bool CHR::run() {
1924   if (!shouldApply(F, PSI))
1925     return false;
1926 
1927   CHR_DEBUG(dumpIR(F, "before", nullptr));
1928 
1929   bool Changed = false;
1930   {
1931     CHR_DEBUG(
1932         dbgs() << "RegionInfo:\n";
1933         RI.print(dbgs()));
1934 
1935     // Recursively traverse the region tree and find regions that have biased
1936     // branches and/or selects and create scopes.
1937     SmallVector<CHRScope *, 8> AllScopes;
1938     findScopes(AllScopes);
1939     CHR_DEBUG(dumpScopes(AllScopes, "All scopes"));
1940 
1941     // Split the scopes if 1) the conditiona values of the biased
1942     // branches/selects of the inner/lower scope can't be hoisted up to the
1943     // outermost/uppermost scope entry, or 2) the condition values of the biased
1944     // branches/selects in a scope (including subscopes) don't share at least
1945     // one common value.
1946     SmallVector<CHRScope *, 8> SplitScopes;
1947     splitScopes(AllScopes, SplitScopes);
1948     CHR_DEBUG(dumpScopes(SplitScopes, "Split scopes"));
1949 
1950     // After splitting, set the biased regions and selects of a scope (a tree
1951     // root) that include those of the subscopes.
1952     classifyBiasedScopes(SplitScopes);
1953     CHR_DEBUG(dbgs() << "Set per-scope bias " << SplitScopes.size() << "\n");
1954 
1955     // Filter out the scopes that has only one biased region or select (CHR
1956     // isn't useful in such a case).
1957     SmallVector<CHRScope *, 8> FilteredScopes;
1958     filterScopes(SplitScopes, FilteredScopes);
1959     CHR_DEBUG(dumpScopes(FilteredScopes, "Filtered scopes"));
1960 
1961     // Set the regions to be CHR'ed and their hoist stops for each scope.
1962     SmallVector<CHRScope *, 8> SetScopes;
1963     setCHRRegions(FilteredScopes, SetScopes);
1964     CHR_DEBUG(dumpScopes(SetScopes, "Set CHR regions"));
1965 
1966     // Sort CHRScopes by the depth so that outer CHRScopes comes before inner
1967     // ones. We need to apply CHR from outer to inner so that we apply CHR only
1968     // to the hot path, rather than both hot and cold paths.
1969     SmallVector<CHRScope *, 8> SortedScopes;
1970     sortScopes(SetScopes, SortedScopes);
1971     CHR_DEBUG(dumpScopes(SortedScopes, "Sorted scopes"));
1972 
1973     CHR_DEBUG(
1974         dbgs() << "RegionInfo:\n";
1975         RI.print(dbgs()));
1976 
1977     // Apply the CHR transformation.
1978     if (!SortedScopes.empty()) {
1979       transformScopes(SortedScopes);
1980       Changed = true;
1981     }
1982   }
1983 
1984   if (Changed)
1985     CHR_DEBUG(dumpIR(F, "after", &Stats));
1986 
1987   return Changed;
1988 }
1989 
1990 bool ControlHeightReductionLegacyPass::runOnFunction(Function &F) {
1991   BlockFrequencyInfo &BFI =
1992       getAnalysis<BlockFrequencyInfoWrapperPass>().getBFI();
1993   DominatorTree &DT = getAnalysis<DominatorTreeWrapperPass>().getDomTree();
1994   ProfileSummaryInfo &PSI =
1995       *getAnalysis<ProfileSummaryInfoWrapperPass>().getPSI();
1996   RegionInfo &RI = getAnalysis<RegionInfoPass>().getRegionInfo();
1997   return CHR(F, BFI, DT, PSI, RI).run();
1998 }
1999 
2000 namespace llvm {
2001 
2002 ControlHeightReductionPass::ControlHeightReductionPass() {
2003   ParseCHRFilterFiles();
2004 }
2005 
2006 PreservedAnalyses ControlHeightReductionPass::run(
2007     Function &F,
2008     FunctionAnalysisManager &FAM) {
2009   auto &BFI = FAM.getResult<BlockFrequencyAnalysis>(F);
2010   auto &DT = FAM.getResult<DominatorTreeAnalysis>(F);
2011   auto &MAMProxy = FAM.getResult<ModuleAnalysisManagerFunctionProxy>(F);
2012   auto &MAM = MAMProxy.getManager();
2013   auto &PSI = *MAM.getCachedResult<ProfileSummaryAnalysis>(*F.getParent());
2014   auto &RI = FAM.getResult<RegionInfoAnalysis>(F);
2015   bool Changed = CHR(F, BFI, DT, PSI, RI).run();
2016   if (!Changed)
2017     return PreservedAnalyses::all();
2018   auto PA = PreservedAnalyses();
2019   PA.preserve<GlobalsAA>();
2020   return PA;
2021 }
2022 
2023 } // namespace llvm
2024