xref: /llvm-project/llvm/lib/Transforms/Instrumentation/ControlHeightReduction.cpp (revision c8f348cba7035286ab49d3a2e5d82439ffd814ec)
1 //===-- ControlHeightReduction.cpp - Control Height Reduction -------------===//
2 //
3 //                     The LLVM Compiler Infrastructure
4 //
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
7 //
8 //===----------------------------------------------------------------------===//
9 //
10 // This pass merges conditional blocks of code and reduces the number of
11 // conditional branches in the hot paths based on profiles.
12 //
13 //===----------------------------------------------------------------------===//
14 
15 #include "llvm/Transforms/Instrumentation/ControlHeightReduction.h"
16 #include "llvm/Transforms/Utils.h"
17 #include "llvm/Transforms/Utils/BasicBlockUtils.h"
18 #include "llvm/Transforms/Utils/Cloning.h"
19 #include "llvm/Transforms/Utils/ValueMapper.h"
20 #include "llvm/ADT/DenseMap.h"
21 #include "llvm/ADT/DenseSet.h"
22 #include "llvm/ADT/SmallVector.h"
23 #include "llvm/ADT/StringSet.h"
24 #include "llvm/Analysis/BlockFrequencyInfo.h"
25 #include "llvm/Analysis/ProfileSummaryInfo.h"
26 #include "llvm/Analysis/RegionInfo.h"
27 #include "llvm/Analysis/RegionIterator.h"
28 #include "llvm/Analysis/ValueTracking.h"
29 #include "llvm/IR/CFG.h"
30 #include "llvm/IR/Dominators.h"
31 #include "llvm/IR/IRBuilder.h"
32 #include "llvm/IR/MDBuilder.h"
33 #include "llvm/Support/BranchProbability.h"
34 #include "llvm/Support/MemoryBuffer.h"
35 #include "llvm/Transforms/Scalar.h"
36 
37 #if !defined(_MSC_VER)
38 #include <cxxabi.h>
39 #endif
40 #include <set>
41 #include <sstream>
42 
43 using namespace llvm;
44 
45 #define DEBUG_TYPE "chr"
46 
47 #define CHR_DEBUG(X) LLVM_DEBUG(X)
48 
49 static cl::opt<bool> ForceCHR("force-chr", cl::init(false), cl::Hidden,
50                               cl::desc("Apply CHR for all functions"));
51 
52 static cl::opt<double> CHRBiasThreshold(
53     "chr-bias-threshold", cl::init(0.99), cl::Hidden,
54     cl::desc("CHR considers a branch bias greater than this ratio as biased"));
55 
56 static cl::opt<unsigned> CHRMergeThreshold(
57     "chr-merge-threshold", cl::init(2), cl::Hidden,
58     cl::desc("CHR merges a group of N branches/selects where N >= this value"));
59 
60 static cl::opt<std::string> CHRModuleList(
61     "chr-module-list", cl::init(""), cl::Hidden,
62     cl::desc("Specify file to retrieve the list of modules to apply CHR to"));
63 
64 static cl::opt<std::string> CHRFunctionList(
65     "chr-function-list", cl::init(""), cl::Hidden,
66     cl::desc("Specify file to retrieve the list of functions to apply CHR to"));
67 
68 static StringSet<> CHRModules;
69 static StringSet<> CHRFunctions;
70 
71 static void ParseCHRFilterFiles() {
72   if (!CHRModuleList.empty()) {
73     auto FileOrErr = MemoryBuffer::getFile(CHRModuleList);
74     if (!FileOrErr) {
75       errs() << "Error: Couldn't read the chr-module-list file " << CHRModuleList << "\n";
76       std::exit(1);
77     }
78     StringRef Buf = FileOrErr->get()->getBuffer();
79     SmallVector<StringRef, 0> Lines;
80     Buf.split(Lines, '\n');
81     for (StringRef Line : Lines) {
82       Line = Line.trim();
83       if (!Line.empty())
84         CHRModules.insert(Line);
85     }
86   }
87   if (!CHRFunctionList.empty()) {
88     auto FileOrErr = MemoryBuffer::getFile(CHRFunctionList);
89     if (!FileOrErr) {
90       errs() << "Error: Couldn't read the chr-function-list file " << CHRFunctionList << "\n";
91       std::exit(1);
92     }
93     StringRef Buf = FileOrErr->get()->getBuffer();
94     SmallVector<StringRef, 0> Lines;
95     Buf.split(Lines, '\n');
96     for (StringRef Line : Lines) {
97       Line = Line.trim();
98       if (!Line.empty())
99         CHRFunctions.insert(Line);
100     }
101   }
102 }
103 
104 namespace {
105 class ControlHeightReductionLegacyPass : public FunctionPass {
106 public:
107   static char ID;
108 
109   ControlHeightReductionLegacyPass() : FunctionPass(ID) {
110     initializeControlHeightReductionLegacyPassPass(
111         *PassRegistry::getPassRegistry());
112     ParseCHRFilterFiles();
113   }
114 
115   bool runOnFunction(Function &F) override;
116   void getAnalysisUsage(AnalysisUsage &AU) const override {
117     AU.addRequired<BlockFrequencyInfoWrapperPass>();
118     AU.addRequired<DominatorTreeWrapperPass>();
119     AU.addRequired<ProfileSummaryInfoWrapperPass>();
120     AU.addRequired<RegionInfoPass>();
121     AU.addPreserved<GlobalsAAWrapperPass>();
122   }
123 };
124 } // end anonymous namespace
125 
126 char ControlHeightReductionLegacyPass::ID = 0;
127 
128 INITIALIZE_PASS_BEGIN(ControlHeightReductionLegacyPass,
129                       "chr",
130                       "Reduce control height in the hot paths",
131                       false, false)
132 INITIALIZE_PASS_DEPENDENCY(BlockFrequencyInfoWrapperPass)
133 INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass)
134 INITIALIZE_PASS_DEPENDENCY(ProfileSummaryInfoWrapperPass)
135 INITIALIZE_PASS_DEPENDENCY(RegionInfoPass)
136 INITIALIZE_PASS_END(ControlHeightReductionLegacyPass,
137                     "chr",
138                     "Reduce control height in the hot paths",
139                     false, false)
140 
141 FunctionPass *llvm::createControlHeightReductionLegacyPass() {
142   return new ControlHeightReductionLegacyPass();
143 }
144 
145 namespace {
146 
147 struct CHRStats {
148   CHRStats() : NumBranches(0), NumBranchesDelta(0),
149                WeightedNumBranchesDelta(0) {}
150   void print(raw_ostream &OS) const {
151     OS << "CHRStats: NumBranches " << NumBranches
152        << " NumBranchesDelta " << NumBranchesDelta
153        << " WeightedNumBranchesDelta " << WeightedNumBranchesDelta;
154   }
155   uint64_t NumBranches;       // The original number of conditional branches /
156                               // selects
157   uint64_t NumBranchesDelta;  // The decrease of the number of conditional
158                               // branches / selects in the hot paths due to CHR.
159   uint64_t WeightedNumBranchesDelta; // NumBranchesDelta weighted by the profile
160                                      // count at the scope entry.
161 };
162 
163 inline raw_ostream LLVM_ATTRIBUTE_UNUSED &operator<<(raw_ostream &OS,
164                                                      const CHRStats &Stats) {
165   Stats.print(OS);
166   return OS;
167 }
168 
169 // RegInfo - some properties of a Region.
170 struct RegInfo {
171   RegInfo() : R(nullptr), HasBranch(false) {}
172   RegInfo(Region *RegionIn) : R(RegionIn), HasBranch(false) {}
173   Region *R;
174   bool HasBranch;
175   SmallVector<SelectInst *, 8> Selects;
176 };
177 
178 typedef DenseMap<Region *, DenseSet<Instruction *>> HoistStopMapTy;
179 
180 // CHRScope - a sequence of regions to CHR together. It corresponds to a
181 // sequence of conditional blocks. It can have subscopes which correspond to
182 // nested conditional blocks. Nested CHRScopes form a tree.
183 class CHRScope {
184  public:
185   CHRScope(RegInfo RI) : BranchInsertPoint(nullptr) {
186     assert(RI.R && "Null RegionIn");
187     RegInfos.push_back(RI);
188   }
189 
190   Region *getParentRegion() {
191     assert(RegInfos.size() > 0 && "Empty CHRScope");
192     Region *Parent = RegInfos[0].R->getParent();
193     assert(Parent && "Unexpected to call this on the top-level region");
194     return Parent;
195   }
196 
197   BasicBlock *getEntryBlock() {
198     assert(RegInfos.size() > 0 && "Empty CHRScope");
199     return RegInfos.front().R->getEntry();
200   }
201 
202   BasicBlock *getExitBlock() {
203     assert(RegInfos.size() > 0 && "Empty CHRScope");
204     return RegInfos.back().R->getExit();
205   }
206 
207   bool appendable(CHRScope *Next) {
208     // The next scope is appendable only if this scope is directly connected to
209     // it (which implies it post-dominates this scope) and this scope dominates
210     // it (no edge to the next scope outside this scope).
211     BasicBlock *NextEntry = Next->getEntryBlock();
212     if (getExitBlock() != NextEntry)
213       // Not directly connected.
214       return false;
215     Region *LastRegion = RegInfos.back().R;
216     for (BasicBlock *Pred : predecessors(NextEntry))
217       if (!LastRegion->contains(Pred))
218         // There's an edge going into the entry of the next scope from outside
219         // of this scope.
220         return false;
221     return true;
222   }
223 
224   void append(CHRScope *Next) {
225     assert(RegInfos.size() > 0 && "Empty CHRScope");
226     assert(Next->RegInfos.size() > 0 && "Empty CHRScope");
227     assert(getParentRegion() == Next->getParentRegion() &&
228            "Must be siblings");
229     assert(getExitBlock() == Next->getEntryBlock() &&
230            "Must be adjacent");
231     for (RegInfo &RI : Next->RegInfos)
232       RegInfos.push_back(RI);
233     for (CHRScope *Sub : Next->Subs)
234       Subs.push_back(Sub);
235   }
236 
237   void addSub(CHRScope *SubIn) {
238 #ifndef NDEBUG
239     bool is_child = false;
240     for (RegInfo &RI : RegInfos)
241       if (RI.R == SubIn->getParentRegion()) {
242         is_child = true;
243         break;
244       }
245     assert(is_child && "Must be a child");
246 #endif
247     Subs.push_back(SubIn);
248   }
249 
250   // Split this scope at the boundary region into two, which will belong to the
251   // tail and returns the tail.
252   CHRScope *split(Region *Boundary) {
253     assert(Boundary && "Boundary null");
254     assert(RegInfos.begin()->R != Boundary &&
255            "Can't be split at beginning");
256     auto BoundaryIt = std::find_if(RegInfos.begin(), RegInfos.end(),
257                                    [&Boundary](const RegInfo& RI) {
258                                      return Boundary == RI.R;
259                                    });
260     if (BoundaryIt == RegInfos.end())
261       return nullptr;
262     SmallVector<RegInfo, 8> TailRegInfos;
263     SmallVector<CHRScope *, 8> TailSubs;
264     TailRegInfos.insert(TailRegInfos.begin(), BoundaryIt, RegInfos.end());
265     RegInfos.resize(BoundaryIt - RegInfos.begin());
266     DenseSet<Region *> TailRegionSet;
267     for (RegInfo &RI : TailRegInfos)
268       TailRegionSet.insert(RI.R);
269     for (auto It = Subs.begin(); It != Subs.end(); ) {
270       CHRScope *Sub = *It;
271       assert(Sub && "null Sub");
272       Region *Parent = Sub->getParentRegion();
273       if (TailRegionSet.count(Parent)) {
274         TailSubs.push_back(Sub);
275         It = Subs.erase(It);
276       } else {
277         assert(std::find_if(RegInfos.begin(), RegInfos.end(),
278                             [&Parent](const RegInfo& RI) {
279                               return Parent == RI.R;
280                             }) != RegInfos.end() &&
281                "Must be in head");
282         ++It;
283       }
284     }
285     assert(HoistStopMap.empty() && "MapHoistStops must be empty");
286     return new CHRScope(TailRegInfos, TailSubs);
287   }
288 
289   bool contains(Instruction *I) const {
290     BasicBlock *Parent = I->getParent();
291     for (const RegInfo &RI : RegInfos)
292       if (RI.R->contains(Parent))
293         return true;
294     return false;
295   }
296 
297   void print(raw_ostream &OS) const;
298 
299   SmallVector<RegInfo, 8> RegInfos; // Regions that belong to this scope
300   SmallVector<CHRScope *, 8> Subs;  // Subscopes.
301 
302   // The instruction at which to insert the CHR conditional branch (and hoist
303   // the dependent condition values).
304   Instruction *BranchInsertPoint;
305 
306   // True-biased and false-biased regions (conditional blocks),
307   // respectively. Used only for the outermost scope and includes regions in
308   // subscopes. The rest are unbiased.
309   DenseSet<Region *> TrueBiasedRegions;
310   DenseSet<Region *> FalseBiasedRegions;
311   // Among the biased regions, the regions that get CHRed.
312   SmallVector<RegInfo, 8> CHRRegions;
313 
314   // True-biased and false-biased selects, respectively. Used only for the
315   // outermost scope and includes ones in subscopes.
316   DenseSet<SelectInst *> TrueBiasedSelects;
317   DenseSet<SelectInst *> FalseBiasedSelects;
318 
319   // Map from one of the above regions to the instructions to stop
320   // hoisting instructions at through use-def chains.
321   HoistStopMapTy HoistStopMap;
322 
323  private:
324   CHRScope(SmallVector<RegInfo, 8> &RegInfosIn,
325            SmallVector<CHRScope *, 8> &SubsIn)
326     : RegInfos(RegInfosIn), Subs(SubsIn), BranchInsertPoint(nullptr) {}
327 };
328 
329 inline raw_ostream &operator<<(raw_ostream &OS, const CHRScope &Scope) {
330   Scope.print(OS);
331   return OS;
332 }
333 
334 class CHR {
335  public:
336   CHR(Function &Fin, BlockFrequencyInfo &BFIin, DominatorTree &DTin,
337       ProfileSummaryInfo &PSIin, RegionInfo &RIin)
338       : F(Fin), BFI(BFIin), DT(DTin), PSI(PSIin), RI(RIin) {}
339 
340   ~CHR() {
341     for (CHRScope *Scope : Scopes) {
342       delete Scope;
343     }
344   }
345 
346   bool run();
347 
348  private:
349   // See the comments in CHR::run() for the high level flow of the algorithm and
350   // what the following functions do.
351 
352   void findScopes(SmallVectorImpl<CHRScope *> &Output) {
353     Region *R = RI.getTopLevelRegion();
354     CHRScope *Scope = findScopes(R, nullptr, nullptr, Output);
355     if (Scope) {
356       Output.push_back(Scope);
357     }
358   }
359   CHRScope *findScopes(Region *R, Region *NextRegion, Region *ParentRegion,
360                         SmallVectorImpl<CHRScope *> &Scopes);
361   CHRScope *findScope(Region *R);
362   void checkScopeHoistable(CHRScope *Scope);
363 
364   void splitScopes(SmallVectorImpl<CHRScope *> &Input,
365                    SmallVectorImpl<CHRScope *> &Output);
366   SmallVector<CHRScope *, 8> splitScope(CHRScope *Scope,
367                                         CHRScope *Outer,
368                                         DenseSet<Value *> *OuterConditionValues,
369                                         Instruction *OuterInsertPoint,
370                                         SmallVectorImpl<CHRScope *> &Output,
371                                         DenseSet<Instruction *> &Unhoistables);
372 
373   void classifyBiasedScopes(SmallVectorImpl<CHRScope *> &Scopes);
374   void classifyBiasedScopes(CHRScope *Scope, CHRScope *OutermostScope);
375 
376   void filterScopes(SmallVectorImpl<CHRScope *> &Input,
377                     SmallVectorImpl<CHRScope *> &Output);
378 
379   void setCHRRegions(SmallVectorImpl<CHRScope *> &Input,
380                      SmallVectorImpl<CHRScope *> &Output);
381   void setCHRRegions(CHRScope *Scope, CHRScope *OutermostScope);
382 
383   void sortScopes(SmallVectorImpl<CHRScope *> &Input,
384                   SmallVectorImpl<CHRScope *> &Output);
385 
386   void transformScopes(SmallVectorImpl<CHRScope *> &CHRScopes);
387   void transformScopes(CHRScope *Scope, DenseSet<PHINode *> &TrivialPHIs);
388   void cloneScopeBlocks(CHRScope *Scope,
389                         BasicBlock *PreEntryBlock,
390                         BasicBlock *ExitBlock,
391                         Region *LastRegion,
392                         ValueToValueMapTy &VMap);
393   BranchInst *createMergedBranch(BasicBlock *PreEntryBlock,
394                                  BasicBlock *EntryBlock,
395                                  BasicBlock *NewEntryBlock,
396                                  ValueToValueMapTy &VMap);
397   void fixupBranchesAndSelects(CHRScope *Scope,
398                                BasicBlock *PreEntryBlock,
399                                BranchInst *MergedBR,
400                                uint64_t ProfileCount);
401   void fixupBranch(Region *R,
402                    CHRScope *Scope,
403                    IRBuilder<> &IRB,
404                    Value *&MergedCondition, BranchProbability &CHRBranchBias);
405   void fixupSelect(SelectInst* SI,
406                    CHRScope *Scope,
407                    IRBuilder<> &IRB,
408                    Value *&MergedCondition, BranchProbability &CHRBranchBias);
409   void addToMergedCondition(bool IsTrueBiased, Value *Cond,
410                             Instruction *BranchOrSelect,
411                             CHRScope *Scope,
412                             IRBuilder<> &IRB,
413                             Value *&MergedCondition);
414 
415   Function &F;
416   BlockFrequencyInfo &BFI;
417   DominatorTree &DT;
418   ProfileSummaryInfo &PSI;
419   RegionInfo &RI;
420   CHRStats Stats;
421 
422   // All the true-biased regions in the function
423   DenseSet<Region *> TrueBiasedRegionsGlobal;
424   // All the false-biased regions in the function
425   DenseSet<Region *> FalseBiasedRegionsGlobal;
426   // All the true-biased selects in the function
427   DenseSet<SelectInst *> TrueBiasedSelectsGlobal;
428   // All the false-biased selects in the function
429   DenseSet<SelectInst *> FalseBiasedSelectsGlobal;
430   // A map from biased regions to their branch bias
431   DenseMap<Region *, BranchProbability> BranchBiasMap;
432   // A map from biased selects to their branch bias
433   DenseMap<SelectInst *, BranchProbability> SelectBiasMap;
434   // All the scopes.
435   DenseSet<CHRScope *> Scopes;
436 };
437 
438 } // end anonymous namespace
439 
440 static bool shouldApply(Function &F, ProfileSummaryInfo& PSI) {
441   if (ForceCHR)
442     return true;
443 
444   if (!CHRModuleList.empty() || !CHRFunctionList.empty()) {
445     if (CHRModules.count(F.getParent()->getName()))
446       return true;
447     StringRef Name = F.getName();
448     if (CHRFunctions.count(Name))
449       return true;
450     const char* DemangledName = nullptr;
451 #if !defined(_MSC_VER)
452     int Status = -1;
453     DemangledName = abi::__cxa_demangle(Name.str().c_str(),
454                                         nullptr, nullptr, &Status);
455 #endif
456     return DemangledName && CHRFunctions.count(DemangledName);
457   }
458 
459   assert(PSI.hasProfileSummary() && "Empty PSI?");
460   return PSI.isFunctionEntryHot(&F);
461 }
462 
463 static void LLVM_ATTRIBUTE_UNUSED dumpIR(Function &F, const char *Label,
464                                          CHRStats *Stats) {
465   std::string Name = F.getName().str();
466   const char *DemangledName = nullptr;
467 #if !defined(_MSC_VER)
468   int Status = -1;
469   DemangledName = abi::__cxa_demangle(Name.c_str(),
470                                       nullptr, nullptr, &Status);
471 #endif
472   if (DemangledName == nullptr) {
473     DemangledName = "<NOT-MANGLED>";
474   }
475   std::string ModuleName = F.getParent()->getName().str();
476   CHR_DEBUG(dbgs() << "CHR IR dump " << Label << " " << ModuleName << " "
477             << Name);
478   if (Stats)
479     CHR_DEBUG(dbgs() << " " << *Stats);
480   CHR_DEBUG(dbgs() << "\n");
481   CHR_DEBUG(F.dump());
482 }
483 
484 void CHRScope::print(raw_ostream &OS) const {
485   assert(RegInfos.size() > 0 && "Empty CHRScope");
486   OS << "CHRScope[";
487   OS << RegInfos.size() << ", Regions[";
488   for (const RegInfo &RI : RegInfos) {
489     OS << RI.R->getNameStr();
490     if (RI.HasBranch)
491       OS << " B";
492     if (RI.Selects.size() > 0)
493       OS << " S" << RI.Selects.size();
494     OS << ", ";
495   }
496   if (RegInfos[0].R->getParent()) {
497     OS << "], Parent " << RegInfos[0].R->getParent()->getNameStr();
498   } else {
499     // top level region
500     OS << "]";
501   }
502   OS << ", Subs[";
503   for (CHRScope *Sub : Subs) {
504     OS << *Sub << ", ";
505   }
506   OS << "]]";
507 }
508 
509 // Return true if the given instruction type can be hoisted by CHR.
510 static bool isHoistableInstructionType(Instruction *I) {
511   return isa<BinaryOperator>(I) || isa<CastInst>(I) || isa<SelectInst>(I) ||
512       isa<GetElementPtrInst>(I) || isa<CmpInst>(I) ||
513       isa<InsertElementInst>(I) || isa<ExtractElementInst>(I) ||
514       isa<ShuffleVectorInst>(I) || isa<ExtractValueInst>(I) ||
515       isa<InsertValueInst>(I);
516 }
517 
518 // Return true if the given instruction can be hoisted by CHR.
519 static bool isHoistable(Instruction *I, DominatorTree &DT) {
520   if (!isHoistableInstructionType(I))
521     return false;
522   return isSafeToSpeculativelyExecute(I, nullptr, &DT);
523 }
524 
525 // Recursively traverse the use-def chains of the given value and return a set
526 // of the unhoistable base values defined within the scope (excluding the
527 // first-region entry block) or the (hoistable or unhoistable) base values that
528 // are defined outside (including the first-region entry block) of the
529 // scope. The returned set doesn't include constants.
530 static std::set<Value *> getBaseValues(Value *V,
531                                        DominatorTree &DT) {
532   std::set<Value *> Result;
533   if (auto *I = dyn_cast<Instruction>(V)) {
534     // We don't stop at a block that's not in the Scope because we would miss some
535     // instructions that are based on the same base values if we stop there.
536     if (!isHoistable(I, DT)) {
537       Result.insert(I);
538       return Result;
539     }
540     // I is hoistable above the Scope.
541     for (Value *Op : I->operands()) {
542       std::set<Value *> OpResult = getBaseValues(Op, DT);
543       Result.insert(OpResult.begin(), OpResult.end());
544     }
545     return Result;
546   }
547   if (isa<Argument>(V)) {
548     Result.insert(V);
549     return Result;
550   }
551   // We don't include others like constants because those won't lead to any
552   // chance of folding of conditions (eg two bit checks merged into one check)
553   // after CHR.
554   return Result;  // empty
555 }
556 
557 // Return true if V is already hoisted or can be hoisted (along with its
558 // operands) above the insert point. When it returns true and HoistStops is
559 // non-null, the instructions to stop hoisting at through the use-def chains are
560 // inserted into HoistStops.
561 static bool
562 checkHoistValue(Value *V, Instruction *InsertPoint, DominatorTree &DT,
563                 DenseSet<Instruction *> &Unhoistables,
564                 DenseSet<Instruction *> *HoistStops) {
565   assert(InsertPoint && "Null InsertPoint");
566   if (auto *I = dyn_cast<Instruction>(V)) {
567     assert(DT.getNode(I->getParent()) && "DT must contain I's parent block");
568     assert(DT.getNode(InsertPoint->getParent()) && "DT must contain Destination");
569     if (Unhoistables.count(I)) {
570       // Don't hoist if they are not to be hoisted.
571       return false;
572     }
573     if (DT.dominates(I, InsertPoint)) {
574       // We are already above the insert point. Stop here.
575       if (HoistStops)
576         HoistStops->insert(I);
577       return true;
578     }
579     // We aren't not above the insert point, check if we can hoist it above the
580     // insert point.
581     if (isHoistable(I, DT)) {
582       // Check operands first.
583       DenseSet<Instruction *> OpsHoistStops;
584       bool AllOpsHoisted = true;
585       for (Value *Op : I->operands()) {
586         if (!checkHoistValue(Op, InsertPoint, DT, Unhoistables, &OpsHoistStops)) {
587           AllOpsHoisted = false;
588           break;
589         }
590       }
591       if (AllOpsHoisted) {
592         CHR_DEBUG(dbgs() << "checkHoistValue " << *I << "\n");
593         if (HoistStops)
594           HoistStops->insert(OpsHoistStops.begin(), OpsHoistStops.end());
595         return true;
596       }
597     }
598     return false;
599   }
600   // Non-instructions are considered hoistable.
601   return true;
602 }
603 
604 // Returns true and sets the true probability and false probability of an
605 // MD_prof metadata if it's well-formed.
606 static bool CheckMDProf(MDNode *MD, BranchProbability &TrueProb,
607                         BranchProbability &FalseProb) {
608   if (!MD) return false;
609   MDString *MDName = cast<MDString>(MD->getOperand(0));
610   if (MDName->getString() != "branch_weights" ||
611       MD->getNumOperands() != 3)
612     return false;
613   ConstantInt *TrueWeight = mdconst::extract<ConstantInt>(MD->getOperand(1));
614   ConstantInt *FalseWeight = mdconst::extract<ConstantInt>(MD->getOperand(2));
615   if (!TrueWeight || !FalseWeight)
616     return false;
617   APInt TrueWt = TrueWeight->getValue();
618   APInt FalseWt = FalseWeight->getValue();
619   APInt SumWt = TrueWt + FalseWt;
620   TrueProb = BranchProbability::getBranchProbability(TrueWt.getZExtValue(),
621                                                      SumWt.getZExtValue());
622   FalseProb = BranchProbability::getBranchProbability(FalseWt.getZExtValue(),
623                                                       SumWt.getZExtValue());
624   return true;
625 }
626 
627 static BranchProbability getCHRBiasThreshold() {
628   return BranchProbability::getBranchProbability(
629       static_cast<uint64_t>(CHRBiasThreshold * 1000000), 1000000);
630 }
631 
632 // A helper for CheckBiasedBranch and CheckBiasedSelect. If TrueProb >=
633 // CHRBiasThreshold, put Key into TrueSet and return true. If FalseProb >=
634 // CHRBiasThreshold, put Key into FalseSet and return true. Otherwise, return
635 // false.
636 template<typename K, typename S, typename M>
637 bool CheckBias(K *Key, BranchProbability TrueProb, BranchProbability FalseProb,
638                S &TrueSet, S &FalseSet, M &BiasMap) {
639   BranchProbability Threshold = getCHRBiasThreshold();
640   if (TrueProb >= Threshold) {
641     TrueSet.insert(Key);
642     BiasMap[Key] = TrueProb;
643     return true;
644   } else if (FalseProb >= Threshold) {
645     FalseSet.insert(Key);
646     BiasMap[Key] = FalseProb;
647     return true;
648   }
649   return false;
650 }
651 
652 // Returns true and insert a region into the right biased set and the map if the
653 // branch of the region is biased.
654 static bool CheckBiasedBranch(BranchInst *BI, Region *R,
655                               DenseSet<Region *> &TrueBiasedRegionsGlobal,
656                               DenseSet<Region *> &FalseBiasedRegionsGlobal,
657                               DenseMap<Region *, BranchProbability> &BranchBiasMap) {
658   if (!BI->isConditional())
659     return false;
660   BranchProbability ThenProb, ElseProb;
661   if (!CheckMDProf(BI->getMetadata(LLVMContext::MD_prof),
662                    ThenProb, ElseProb))
663     return false;
664   BasicBlock *IfThen = BI->getSuccessor(0);
665   BasicBlock *IfElse = BI->getSuccessor(1);
666   assert((IfThen == R->getExit() || IfElse == R->getExit()) &&
667          IfThen != IfElse &&
668          "Invariant from findScopes");
669   if (IfThen == R->getExit()) {
670     // Swap them so that IfThen/ThenProb means going into the conditional code
671     // and IfElse/ElseProb means skipping it.
672     std::swap(IfThen, IfElse);
673     std::swap(ThenProb, ElseProb);
674   }
675   CHR_DEBUG(dbgs() << "BI " << *BI << " ");
676   CHR_DEBUG(dbgs() << "ThenProb " << ThenProb << " ");
677   CHR_DEBUG(dbgs() << "ElseProb " << ElseProb << "\n");
678   return CheckBias(R, ThenProb, ElseProb,
679                    TrueBiasedRegionsGlobal, FalseBiasedRegionsGlobal,
680                    BranchBiasMap);
681 }
682 
683 // Returns true and insert a select into the right biased set and the map if the
684 // select is biased.
685 static bool CheckBiasedSelect(
686     SelectInst *SI, Region *R,
687     DenseSet<SelectInst *> &TrueBiasedSelectsGlobal,
688     DenseSet<SelectInst *> &FalseBiasedSelectsGlobal,
689     DenseMap<SelectInst *, BranchProbability> &SelectBiasMap) {
690   BranchProbability TrueProb, FalseProb;
691   if (!CheckMDProf(SI->getMetadata(LLVMContext::MD_prof),
692                    TrueProb, FalseProb))
693     return false;
694   CHR_DEBUG(dbgs() << "SI " << *SI << " ");
695   CHR_DEBUG(dbgs() << "TrueProb " << TrueProb << " ");
696   CHR_DEBUG(dbgs() << "FalseProb " << FalseProb << "\n");
697   return CheckBias(SI, TrueProb, FalseProb,
698                    TrueBiasedSelectsGlobal, FalseBiasedSelectsGlobal,
699                    SelectBiasMap);
700 }
701 
702 // Returns the instruction at which to hoist the dependent condition values and
703 // insert the CHR branch for a region. This is the terminator branch in the
704 // entry block or the first select in the entry block, if any.
705 static Instruction* getBranchInsertPoint(RegInfo &RI) {
706   Region *R = RI.R;
707   BasicBlock *EntryBB = R->getEntry();
708   // The hoist point is by default the terminator of the entry block, which is
709   // the same as the branch instruction if RI.HasBranch is true.
710   Instruction *HoistPoint = EntryBB->getTerminator();
711   for (SelectInst *SI : RI.Selects) {
712     if (SI->getParent() == EntryBB) {
713       // Pick the first select in Selects in the entry block.  Note Selects is
714       // sorted in the instruction order within a block (asserted below).
715       HoistPoint = SI;
716       break;
717     }
718   }
719   assert(HoistPoint && "Null HoistPoint");
720 #ifndef NDEBUG
721   // Check that HoistPoint is the first one in Selects in the entry block,
722   // if any.
723   DenseSet<Instruction *> EntryBlockSelectSet;
724   for (SelectInst *SI : RI.Selects) {
725     if (SI->getParent() == EntryBB) {
726       EntryBlockSelectSet.insert(SI);
727     }
728   }
729   for (Instruction &I : *EntryBB) {
730     if (EntryBlockSelectSet.count(&I) > 0) {
731       assert(&I == HoistPoint &&
732              "HoistPoint must be the first one in Selects");
733       break;
734     }
735   }
736 #endif
737   return HoistPoint;
738 }
739 
740 // Find a CHR scope in the given region.
741 CHRScope * CHR::findScope(Region *R) {
742   CHRScope *Result = nullptr;
743   BasicBlock *Entry = R->getEntry();
744   BasicBlock *Exit = R->getExit();  // null if top level.
745   assert(Entry && "Entry must not be null");
746   assert((Exit == nullptr) == (R->isTopLevelRegion()) &&
747          "Only top level region has a null exit");
748   if (Entry)
749     CHR_DEBUG(dbgs() << "Entry " << Entry->getName() << "\n");
750   else
751     CHR_DEBUG(dbgs() << "Entry null\n");
752   if (Exit)
753     CHR_DEBUG(dbgs() << "Exit " << Exit->getName() << "\n");
754   else
755     CHR_DEBUG(dbgs() << "Exit null\n");
756   // Exclude cases where Entry is part of a subregion (hence it doesn't belong
757   // to this region).
758   bool EntryInSubregion = RI.getRegionFor(Entry) != R;
759   if (EntryInSubregion)
760     return nullptr;
761   // Exclude loops
762   for (BasicBlock *Pred : predecessors(Entry))
763     if (R->contains(Pred))
764       return nullptr;
765   if (Exit) {
766     // Try to find an if-then block (check if R is an if-then).
767     // if (cond) {
768     //  ...
769     // }
770     auto *BI = dyn_cast<BranchInst>(Entry->getTerminator());
771     if (BI)
772       CHR_DEBUG(dbgs() << "BI.isConditional " << BI->isConditional() << "\n");
773     else
774       CHR_DEBUG(dbgs() << "BI null\n");
775     if (BI && BI->isConditional()) {
776       BasicBlock *S0 = BI->getSuccessor(0);
777       BasicBlock *S1 = BI->getSuccessor(1);
778       CHR_DEBUG(dbgs() << "S0 " << S0->getName() << "\n");
779       CHR_DEBUG(dbgs() << "S1 " << S1->getName() << "\n");
780       if (S0 != S1 && (S0 == Exit || S1 == Exit)) {
781         RegInfo RI(R);
782         RI.HasBranch = CheckBiasedBranch(
783             BI, R, TrueBiasedRegionsGlobal, FalseBiasedRegionsGlobal,
784             BranchBiasMap);
785         Result = new CHRScope(RI);
786         Scopes.insert(Result);
787         CHR_DEBUG(dbgs() << "Found a region with a branch\n");
788         ++Stats.NumBranches;
789       }
790     }
791   }
792   {
793     // Try to look for selects in the direct child blocks (as opposed to in
794     // subregions) of R.
795     // ...
796     // if (..) { // Some subregion
797     //   ...
798     // }
799     // if (..) { // Some subregion
800     //   ...
801     // }
802     // ...
803     // a = cond ? b : c;
804     // ...
805     SmallVector<SelectInst *, 8> Selects;
806     for (RegionNode *E : R->elements()) {
807       if (E->isSubRegion())
808         continue;
809       // This returns the basic block of E if E is a direct child of R (not a
810       // subregion.)
811       BasicBlock *BB = E->getEntry();
812       // Need to push in the order to make it easier to find the first Select
813       // later.
814       for (Instruction &I : *BB) {
815         if (auto *SI = dyn_cast<SelectInst>(&I)) {
816           Selects.push_back(SI);
817           ++Stats.NumBranches;
818         }
819       }
820     }
821     if (Selects.size() > 0) {
822       auto AddSelects = [&](RegInfo &RI) {
823         for (auto *SI : Selects)
824           if (CheckBiasedSelect(SI, RI.R,
825                                 TrueBiasedSelectsGlobal,
826                                 FalseBiasedSelectsGlobal,
827                                 SelectBiasMap))
828             RI.Selects.push_back(SI);
829       };
830       if (!Result) {
831         CHR_DEBUG(dbgs() << "Found a select-only region\n");
832         RegInfo RI(R);
833         AddSelects(RI);
834         Result = new CHRScope(RI);
835         Scopes.insert(Result);
836       } else {
837         CHR_DEBUG(dbgs() << "Found select(s) in a region with a branch\n");
838         AddSelects(Result->RegInfos[0]);
839       }
840     }
841   }
842 
843   if (Result) {
844     checkScopeHoistable(Result);
845   }
846   return Result;
847 }
848 
849 // Check that any of the branch and the selects in the region could be
850 // hoisted above the the CHR branch insert point (the most dominating of
851 // them, either the branch (at the end of the first block) or the first
852 // select in the first block). If the branch can't be hoisted, drop the
853 // selects in the first blocks.
854 //
855 // For example, for the following scope/region with selects, we want to insert
856 // the merged branch right before the first select in the first/entry block by
857 // hoisting c1, c2, c3, and c4.
858 //
859 // // Branch insert point here.
860 // a = c1 ? b : c; // Select 1
861 // d = c2 ? e : f; // Select 2
862 // if (c3) { // Branch
863 //   ...
864 //   c4 = foo() // A call.
865 //   g = c4 ? h : i; // Select 3
866 // }
867 //
868 // But suppose we can't hoist c4 because it's dependent on the preceding
869 // call. Then, we drop Select 3. Furthermore, if we can't hoist c2, we also drop
870 // Select 2. If we can't hoist c3, we drop Selects 1 & 2.
871 void CHR::checkScopeHoistable(CHRScope *Scope) {
872   RegInfo &RI = Scope->RegInfos[0];
873   Region *R = RI.R;
874   BasicBlock *EntryBB = R->getEntry();
875   auto *Branch = RI.HasBranch ?
876                  cast<BranchInst>(EntryBB->getTerminator()) : nullptr;
877   SmallVector<SelectInst *, 8> &Selects = RI.Selects;
878   if (RI.HasBranch || !Selects.empty()) {
879     Instruction *InsertPoint = getBranchInsertPoint(RI);
880     CHR_DEBUG(dbgs() << "InsertPoint " << *InsertPoint << "\n");
881     // Avoid a data dependence from a select or a branch to a(nother)
882     // select. Note no instruction can't data-depend on a branch (a branch
883     // instruction doesn't produce a value).
884     DenseSet<Instruction *> Unhoistables;
885     // Initialize Unhoistables with the selects.
886     for (SelectInst *SI : Selects) {
887       Unhoistables.insert(SI);
888     }
889     // Remove Selects that can't be hoisted.
890     for (auto it = Selects.begin(); it != Selects.end(); ) {
891       SelectInst *SI = *it;
892       if (SI == InsertPoint) {
893         ++it;
894         continue;
895       }
896       bool IsHoistable = checkHoistValue(SI->getCondition(), InsertPoint,
897                                          DT, Unhoistables, nullptr);
898       if (!IsHoistable) {
899         CHR_DEBUG(dbgs() << "Dropping select " << *SI << "\n");
900         it = Selects.erase(it);
901         // Since we are dropping the select here, we also drop it from
902         // Unhoistables.
903         Unhoistables.erase(SI);
904       } else
905         ++it;
906     }
907     // Update InsertPoint after potentially removing selects.
908     InsertPoint = getBranchInsertPoint(RI);
909     CHR_DEBUG(dbgs() << "InsertPoint " << *InsertPoint << "\n");
910     if (RI.HasBranch && InsertPoint != Branch) {
911       bool IsHoistable = checkHoistValue(Branch->getCondition(), InsertPoint,
912                                          DT, Unhoistables, nullptr);
913       if (!IsHoistable) {
914         // If the branch isn't hoistable, drop the selects in the entry
915         // block, preferring the branch, which makes the branch the hoist
916         // point.
917         assert(InsertPoint != Branch && "Branch must not be the hoist point");
918         CHR_DEBUG(dbgs() << "Dropping selects in entry block \n");
919         CHR_DEBUG(
920             for (SelectInst *SI : Selects) {
921               dbgs() << "SI " << *SI << "\n";
922             });
923         Selects.erase(std::remove_if(Selects.begin(), Selects.end(),
924                                      [EntryBB](SelectInst *SI) {
925                                        return SI->getParent() == EntryBB;
926                                      }), Selects.end());
927         Unhoistables.clear();
928         InsertPoint = Branch;
929       }
930     }
931     CHR_DEBUG(dbgs() << "InsertPoint " << *InsertPoint << "\n");
932 #ifndef NDEBUG
933     if (RI.HasBranch) {
934       assert(!DT.dominates(Branch, InsertPoint) &&
935              "Branch can't be already above the hoist point");
936       assert(checkHoistValue(Branch->getCondition(), InsertPoint,
937                              DT, Unhoistables, nullptr) &&
938              "checkHoistValue for branch");
939     }
940     for (auto *SI : Selects) {
941       assert(!DT.dominates(SI, InsertPoint) &&
942              "SI can't be already above the hoist point");
943       assert(checkHoistValue(SI->getCondition(), InsertPoint, DT,
944                              Unhoistables, nullptr) &&
945              "checkHoistValue for selects");
946     }
947     CHR_DEBUG(dbgs() << "Result\n");
948     if (RI.HasBranch) {
949       CHR_DEBUG(dbgs() << "BI " << *Branch << "\n");
950     }
951     for (auto *SI : Selects) {
952       CHR_DEBUG(dbgs() << "SI " << *SI << "\n");
953     }
954 #endif
955   }
956 }
957 
958 // Traverse the region tree, find all nested scopes and merge them if possible.
959 CHRScope * CHR::findScopes(Region *R, Region *NextRegion, Region *ParentRegion,
960                            SmallVectorImpl<CHRScope *> &Scopes) {
961   CHR_DEBUG(dbgs() << "findScopes " << R->getNameStr() << "\n");
962   CHRScope *Result = findScope(R);
963   // Visit subscopes.
964   CHRScope *ConsecutiveSubscope = nullptr;
965   SmallVector<CHRScope *, 8> Subscopes;
966   for (auto It = R->begin(); It != R->end(); ++It) {
967     const std::unique_ptr<Region> &SubR = *It;
968     auto Next_It = std::next(It);
969     Region *NextSubR = Next_It != R->end() ? Next_It->get() : nullptr;
970     CHR_DEBUG(dbgs() << "Looking at subregion " << SubR.get()->getNameStr()
971               << "\n");
972     CHRScope *SubCHRScope = findScopes(SubR.get(), NextSubR, R, Scopes);
973     if (SubCHRScope) {
974       CHR_DEBUG(dbgs() << "Subregion Scope " << *SubCHRScope << "\n");
975     } else {
976       CHR_DEBUG(dbgs() << "Subregion Scope null\n");
977     }
978     if (SubCHRScope) {
979       if (!ConsecutiveSubscope)
980         ConsecutiveSubscope = SubCHRScope;
981       else if (!ConsecutiveSubscope->appendable(SubCHRScope)) {
982         Subscopes.push_back(ConsecutiveSubscope);
983         ConsecutiveSubscope = SubCHRScope;
984       } else
985         ConsecutiveSubscope->append(SubCHRScope);
986     } else {
987       if (ConsecutiveSubscope) {
988         Subscopes.push_back(ConsecutiveSubscope);
989       }
990       ConsecutiveSubscope = nullptr;
991     }
992   }
993   if (ConsecutiveSubscope) {
994     Subscopes.push_back(ConsecutiveSubscope);
995   }
996   for (CHRScope *Sub : Subscopes) {
997     if (Result) {
998       // Combine it with the parent.
999       Result->addSub(Sub);
1000     } else {
1001       // Push Subscopes as they won't be combined with the parent.
1002       Scopes.push_back(Sub);
1003     }
1004   }
1005   return Result;
1006 }
1007 
1008 static DenseSet<Value *> getCHRConditionValuesForRegion(RegInfo &RI) {
1009   DenseSet<Value *> ConditionValues;
1010   if (RI.HasBranch) {
1011     auto *BI = cast<BranchInst>(RI.R->getEntry()->getTerminator());
1012     ConditionValues.insert(BI->getCondition());
1013   }
1014   for (SelectInst *SI : RI.Selects) {
1015     ConditionValues.insert(SI->getCondition());
1016   }
1017   return ConditionValues;
1018 }
1019 
1020 
1021 // Determine whether to split a scope depending on the sets of the branch
1022 // condition values of the previous region and the current region. We split
1023 // (return true) it if 1) the condition values of the inner/lower scope can't be
1024 // hoisted up to the outer/upper scope, or 2) the two sets of the condition
1025 // values have an empty intersection (because the combined branch conditions
1026 // won't probably lead to a simpler combined condition).
1027 static bool shouldSplit(Instruction *InsertPoint,
1028                         DenseSet<Value *> &PrevConditionValues,
1029                         DenseSet<Value *> &ConditionValues,
1030                         DominatorTree &DT,
1031                         DenseSet<Instruction *> &Unhoistables) {
1032   CHR_DEBUG(
1033       dbgs() << "shouldSplit " << *InsertPoint << " PrevConditionValues ";
1034       for (Value *V : PrevConditionValues) {
1035         dbgs() << *V << ", ";
1036       }
1037       dbgs() << " ConditionValues ";
1038       for (Value *V : ConditionValues) {
1039         dbgs() << *V << ", ";
1040       }
1041       dbgs() << "\n");
1042   assert(InsertPoint && "Null InsertPoint");
1043   // If any of Bases isn't hoistable to the hoist point, split.
1044   for (Value *V : ConditionValues) {
1045     if (!checkHoistValue(V, InsertPoint, DT, Unhoistables, nullptr)) {
1046       CHR_DEBUG(dbgs() << "Split. checkHoistValue false " << *V << "\n");
1047       return true; // Not hoistable, split.
1048     }
1049   }
1050   // If PrevConditionValues or ConditionValues is empty, don't split to avoid
1051   // unnecessary splits at scopes with no branch/selects.  If
1052   // PrevConditionValues and ConditionValues don't intersect at all, split.
1053   if (!PrevConditionValues.empty() && !ConditionValues.empty()) {
1054     // Use std::set as DenseSet doesn't work with set_intersection.
1055     std::set<Value *> PrevBases, Bases;
1056     for (Value *V : PrevConditionValues) {
1057       std::set<Value *> BaseValues = getBaseValues(V, DT);
1058       PrevBases.insert(BaseValues.begin(), BaseValues.end());
1059     }
1060     for (Value *V : ConditionValues) {
1061       std::set<Value *> BaseValues = getBaseValues(V, DT);
1062       Bases.insert(BaseValues.begin(), BaseValues.end());
1063     }
1064     CHR_DEBUG(
1065         dbgs() << "PrevBases ";
1066         for (Value *V : PrevBases) {
1067           dbgs() << *V << ", ";
1068         }
1069         dbgs() << " Bases ";
1070         for (Value *V : Bases) {
1071           dbgs() << *V << ", ";
1072         }
1073         dbgs() << "\n");
1074     std::set<Value *> Intersection;
1075     std::set_intersection(PrevBases.begin(), PrevBases.end(),
1076                           Bases.begin(), Bases.end(),
1077                           std::inserter(Intersection, Intersection.begin()));
1078     if (Intersection.empty()) {
1079       // Empty intersection, split.
1080       CHR_DEBUG(dbgs() << "Split. Intersection empty\n");
1081       return true;
1082     }
1083   }
1084   CHR_DEBUG(dbgs() << "No split\n");
1085   return false;  // Don't split.
1086 }
1087 
1088 static void GetSelectsInScope(CHRScope *Scope,
1089                               DenseSet<Instruction *> &Output) {
1090   for (RegInfo &RI : Scope->RegInfos) {
1091     for (SelectInst *SI : RI.Selects) {
1092       Output.insert(SI);
1093     }
1094   }
1095   for (CHRScope *Sub : Scope->Subs) {
1096     GetSelectsInScope(Sub, Output);
1097   }
1098 }
1099 
1100 void CHR::splitScopes(SmallVectorImpl<CHRScope *> &Input,
1101                       SmallVectorImpl<CHRScope *> &Output) {
1102   for (CHRScope *Scope : Input) {
1103     assert(!Scope->BranchInsertPoint &&
1104            "BranchInsertPoint must not be set");
1105     DenseSet<Instruction *> Unhoistables;
1106     GetSelectsInScope(Scope, Unhoistables);
1107     splitScope(Scope, nullptr, nullptr, nullptr, Output, Unhoistables);
1108   }
1109 #ifndef NDEBUG
1110   for (CHRScope *Scope : Output) {
1111     assert(Scope->BranchInsertPoint && "BranchInsertPoint must be set");
1112   }
1113 #endif
1114 }
1115 
1116 SmallVector<CHRScope *, 8> CHR::splitScope(
1117     CHRScope *Scope,
1118     CHRScope *Outer,
1119     DenseSet<Value *> *OuterConditionValues,
1120     Instruction *OuterInsertPoint,
1121     SmallVectorImpl<CHRScope *> &Output,
1122     DenseSet<Instruction *> &Unhoistables) {
1123   if (Outer) {
1124     assert(OuterConditionValues && "Null OuterConditionValues");
1125     assert(OuterInsertPoint && "Null OuterInsertPoint");
1126   }
1127   bool PrevSplitFromOuter = true;
1128   DenseSet<Value *> PrevConditionValues;
1129   Instruction *PrevInsertPoint = nullptr;
1130   SmallVector<CHRScope *, 8> Splits;
1131   SmallVector<bool, 8> SplitsSplitFromOuter;
1132   SmallVector<DenseSet<Value *>, 8> SplitsConditionValues;
1133   SmallVector<Instruction *, 8> SplitsInsertPoints;
1134   SmallVector<RegInfo, 8> RegInfos(Scope->RegInfos);  // Copy
1135   for (RegInfo &RI : RegInfos) {
1136     Instruction *InsertPoint = getBranchInsertPoint(RI);
1137     DenseSet<Value *> ConditionValues = getCHRConditionValuesForRegion(RI);
1138     CHR_DEBUG(
1139         dbgs() << "ConditionValues ";
1140         for (Value *V : ConditionValues) {
1141           dbgs() << *V << ", ";
1142         }
1143         dbgs() << "\n");
1144     if (RI.R == RegInfos[0].R) {
1145       // First iteration. Check to see if we should split from the outer.
1146       if (Outer) {
1147         CHR_DEBUG(dbgs() << "Outer " << *Outer << "\n");
1148         CHR_DEBUG(dbgs() << "Should split from outer at "
1149                   << RI.R->getNameStr() << "\n");
1150         if (shouldSplit(OuterInsertPoint, *OuterConditionValues,
1151                         ConditionValues, DT, Unhoistables)) {
1152           PrevConditionValues = ConditionValues;
1153           PrevInsertPoint = InsertPoint;
1154         } else {
1155           // Not splitting from the outer. Use the outer bases and insert
1156           // point. Union the bases.
1157           PrevSplitFromOuter = false;
1158           PrevConditionValues = *OuterConditionValues;
1159           PrevConditionValues.insert(ConditionValues.begin(),
1160                                      ConditionValues.end());
1161           PrevInsertPoint = OuterInsertPoint;
1162         }
1163       } else {
1164         CHR_DEBUG(dbgs() << "Outer null\n");
1165         PrevConditionValues = ConditionValues;
1166         PrevInsertPoint = InsertPoint;
1167       }
1168     } else {
1169       CHR_DEBUG(dbgs() << "Should split from prev at "
1170                 << RI.R->getNameStr() << "\n");
1171       if (shouldSplit(PrevInsertPoint, PrevConditionValues, ConditionValues,
1172                       DT, Unhoistables)) {
1173         CHRScope *Tail = Scope->split(RI.R);
1174         Scopes.insert(Tail);
1175         Splits.push_back(Scope);
1176         SplitsSplitFromOuter.push_back(PrevSplitFromOuter);
1177         SplitsConditionValues.push_back(PrevConditionValues);
1178         SplitsInsertPoints.push_back(PrevInsertPoint);
1179         Scope = Tail;
1180         PrevConditionValues = ConditionValues;
1181         PrevInsertPoint = InsertPoint;
1182         PrevSplitFromOuter = true;
1183       } else {
1184         // Not splitting. Union the bases. Keep the hoist point.
1185         PrevConditionValues.insert(ConditionValues.begin(), ConditionValues.end());
1186       }
1187     }
1188   }
1189   Splits.push_back(Scope);
1190   SplitsSplitFromOuter.push_back(PrevSplitFromOuter);
1191   SplitsConditionValues.push_back(PrevConditionValues);
1192   assert(PrevInsertPoint && "Null PrevInsertPoint");
1193   SplitsInsertPoints.push_back(PrevInsertPoint);
1194   assert(Splits.size() == SplitsConditionValues.size() &&
1195          Splits.size() == SplitsSplitFromOuter.size() &&
1196          Splits.size() == SplitsInsertPoints.size() && "Mismatching sizes");
1197   for (size_t I = 0; I < Splits.size(); ++I) {
1198     CHRScope *Split = Splits[I];
1199     DenseSet<Value *> &SplitConditionValues = SplitsConditionValues[I];
1200     Instruction *SplitInsertPoint = SplitsInsertPoints[I];
1201     SmallVector<CHRScope *, 8> NewSubs;
1202     DenseSet<Instruction *> SplitUnhoistables;
1203     GetSelectsInScope(Split, SplitUnhoistables);
1204     for (CHRScope *Sub : Split->Subs) {
1205       SmallVector<CHRScope *, 8> SubSplits = splitScope(
1206           Sub, Split, &SplitConditionValues, SplitInsertPoint, Output,
1207           SplitUnhoistables);
1208       NewSubs.insert(NewSubs.end(), SubSplits.begin(), SubSplits.end());
1209     }
1210     Split->Subs = NewSubs;
1211   }
1212   SmallVector<CHRScope *, 8> Result;
1213   for (size_t I = 0; I < Splits.size(); ++I) {
1214     CHRScope *Split = Splits[I];
1215     if (SplitsSplitFromOuter[I]) {
1216       // Split from the outer.
1217       Output.push_back(Split);
1218       Split->BranchInsertPoint = SplitsInsertPoints[I];
1219       CHR_DEBUG(dbgs() << "BranchInsertPoint " << *SplitsInsertPoints[I]
1220                 << "\n");
1221     } else {
1222       // Connected to the outer.
1223       Result.push_back(Split);
1224     }
1225   }
1226   if (!Outer)
1227     assert(Result.empty() &&
1228            "If no outer (top-level), must return no nested ones");
1229   return Result;
1230 }
1231 
1232 void CHR::classifyBiasedScopes(SmallVectorImpl<CHRScope *> &Scopes) {
1233   for (CHRScope *Scope : Scopes) {
1234     assert(Scope->TrueBiasedRegions.empty() && Scope->FalseBiasedRegions.empty() && "Empty");
1235     classifyBiasedScopes(Scope, Scope);
1236     CHR_DEBUG(
1237         dbgs() << "classifyBiasedScopes " << *Scope << "\n";
1238         dbgs() << "TrueBiasedRegions ";
1239         for (Region *R : Scope->TrueBiasedRegions) {
1240           dbgs() << R->getNameStr() << ", ";
1241         }
1242         dbgs() << "\n";
1243         dbgs() << "FalseBiasedRegions ";
1244         for (Region *R : Scope->FalseBiasedRegions) {
1245           dbgs() << R->getNameStr() << ", ";
1246         }
1247         dbgs() << "\n";
1248         dbgs() << "TrueBiasedSelects ";
1249         for (SelectInst *SI : Scope->TrueBiasedSelects) {
1250           dbgs() << *SI << ", ";
1251         }
1252         dbgs() << "\n";
1253         dbgs() << "FalseBiasedSelects ";
1254         for (SelectInst *SI : Scope->FalseBiasedSelects) {
1255           dbgs() << *SI << ", ";
1256         }
1257         dbgs() << "\n";);
1258   }
1259 }
1260 
1261 void CHR::classifyBiasedScopes(CHRScope *Scope, CHRScope *OutermostScope) {
1262   for (RegInfo &RI : Scope->RegInfos) {
1263     if (RI.HasBranch) {
1264       Region *R = RI.R;
1265       if (TrueBiasedRegionsGlobal.count(R) > 0)
1266         OutermostScope->TrueBiasedRegions.insert(R);
1267       else if (FalseBiasedRegionsGlobal.count(R) > 0)
1268         OutermostScope->FalseBiasedRegions.insert(R);
1269       else
1270         llvm_unreachable("Must be biased");
1271     }
1272     for (SelectInst *SI : RI.Selects) {
1273       if (TrueBiasedSelectsGlobal.count(SI) > 0)
1274         OutermostScope->TrueBiasedSelects.insert(SI);
1275       else if (FalseBiasedSelectsGlobal.count(SI) > 0)
1276         OutermostScope->FalseBiasedSelects.insert(SI);
1277       else
1278         llvm_unreachable("Must be biased");
1279     }
1280   }
1281   for (CHRScope *Sub : Scope->Subs) {
1282     classifyBiasedScopes(Sub, OutermostScope);
1283   }
1284 }
1285 
1286 static bool hasAtLeastTwoBiasedBranches(CHRScope *Scope) {
1287   unsigned NumBiased = Scope->TrueBiasedRegions.size() +
1288                        Scope->FalseBiasedRegions.size() +
1289                        Scope->TrueBiasedSelects.size() +
1290                        Scope->FalseBiasedSelects.size();
1291   return NumBiased >= CHRMergeThreshold;
1292 }
1293 
1294 void CHR::filterScopes(SmallVectorImpl<CHRScope *> &Input,
1295                        SmallVectorImpl<CHRScope *> &Output) {
1296   for (CHRScope *Scope : Input) {
1297     // Filter out the ones with only one region and no subs.
1298     if (!hasAtLeastTwoBiasedBranches(Scope)) {
1299       CHR_DEBUG(dbgs() << "Filtered out by biased branches truthy-regions "
1300                 << Scope->TrueBiasedRegions.size()
1301                 << " falsy-regions " << Scope->FalseBiasedRegions.size()
1302                 << " true-selects " << Scope->TrueBiasedSelects.size()
1303                 << " false-selects " << Scope->FalseBiasedSelects.size() << "\n");
1304       continue;
1305     }
1306     Output.push_back(Scope);
1307   }
1308 }
1309 
1310 void CHR::setCHRRegions(SmallVectorImpl<CHRScope *> &Input,
1311                         SmallVectorImpl<CHRScope *> &Output) {
1312   for (CHRScope *Scope : Input) {
1313     assert(Scope->HoistStopMap.empty() && Scope->CHRRegions.empty() &&
1314            "Empty");
1315     setCHRRegions(Scope, Scope);
1316     Output.push_back(Scope);
1317     CHR_DEBUG(
1318         dbgs() << "setCHRRegions HoistStopMap " << *Scope << "\n";
1319         for (auto pair : Scope->HoistStopMap) {
1320           Region *R = pair.first;
1321           dbgs() << "Region " << R->getNameStr() << "\n";
1322           for (Instruction *I : pair.second) {
1323             dbgs() << "HoistStop " << *I << "\n";
1324           }
1325         }
1326         dbgs() << "CHRRegions" << "\n";
1327         for (RegInfo &RI : Scope->CHRRegions) {
1328           dbgs() << RI.R->getNameStr() << "\n";
1329         });
1330   }
1331 }
1332 
1333 void CHR::setCHRRegions(CHRScope *Scope, CHRScope *OutermostScope) {
1334   DenseSet<Instruction *> Unhoistables;
1335   // Put the biased selects in Unhoistables because they should stay where they
1336   // are and constant-folded after CHR (in case one biased select or a branch
1337   // can depend on another biased select.)
1338   for (RegInfo &RI : Scope->RegInfos) {
1339     for (SelectInst *SI : RI.Selects) {
1340       Unhoistables.insert(SI);
1341     }
1342   }
1343   Instruction *InsertPoint = OutermostScope->BranchInsertPoint;
1344   for (RegInfo &RI : Scope->RegInfos) {
1345     Region *R = RI.R;
1346     DenseSet<Instruction *> HoistStops;
1347     bool IsHoisted = false;
1348     if (RI.HasBranch) {
1349       assert((OutermostScope->TrueBiasedRegions.count(R) > 0 ||
1350               OutermostScope->FalseBiasedRegions.count(R) > 0) &&
1351              "Must be truthy or falsy");
1352       auto *BI = cast<BranchInst>(R->getEntry()->getTerminator());
1353       // Note checkHoistValue fills in HoistStops.
1354       bool IsHoistable = checkHoistValue(BI->getCondition(), InsertPoint, DT,
1355                                          Unhoistables, &HoistStops);
1356       assert(IsHoistable && "Must be hoistable");
1357       (void)(IsHoistable);  // Unused in release build
1358       IsHoisted = true;
1359     }
1360     for (SelectInst *SI : RI.Selects) {
1361       assert((OutermostScope->TrueBiasedSelects.count(SI) > 0 ||
1362               OutermostScope->FalseBiasedSelects.count(SI) > 0) &&
1363              "Must be true or false biased");
1364       // Note checkHoistValue fills in HoistStops.
1365       bool IsHoistable = checkHoistValue(SI->getCondition(), InsertPoint, DT,
1366                                          Unhoistables, &HoistStops);
1367       assert(IsHoistable && "Must be hoistable");
1368       (void)(IsHoistable);  // Unused in release build
1369       IsHoisted = true;
1370     }
1371     if (IsHoisted) {
1372       OutermostScope->CHRRegions.push_back(RI);
1373       OutermostScope->HoistStopMap[R] = HoistStops;
1374     }
1375   }
1376   for (CHRScope *Sub : Scope->Subs)
1377     setCHRRegions(Sub, OutermostScope);
1378 }
1379 
1380 bool CHRScopeSorter(CHRScope *Scope1, CHRScope *Scope2) {
1381   return Scope1->RegInfos[0].R->getDepth() < Scope2->RegInfos[0].R->getDepth();
1382 }
1383 
1384 void CHR::sortScopes(SmallVectorImpl<CHRScope *> &Input,
1385                      SmallVectorImpl<CHRScope *> &Output) {
1386   Output.resize(Input.size());
1387   std::copy(Input.begin(), Input.end(), Output.begin());
1388   std::stable_sort(Output.begin(), Output.end(), CHRScopeSorter);
1389 }
1390 
1391 // Return true if V is already hoisted or was hoisted (along with its operands)
1392 // to the insert point.
1393 static void hoistValue(Value *V, Instruction *HoistPoint, Region *R,
1394                        HoistStopMapTy &HoistStopMap,
1395                        DenseSet<Instruction *> &HoistedSet,
1396                        DenseSet<PHINode *> &TrivialPHIs) {
1397   auto IT = HoistStopMap.find(R);
1398   assert(IT != HoistStopMap.end() && "Region must be in hoist stop map");
1399   DenseSet<Instruction *> &HoistStops = IT->second;
1400   if (auto *I = dyn_cast<Instruction>(V)) {
1401     if (I == HoistPoint)
1402       return;
1403     if (HoistStops.count(I))
1404       return;
1405     if (auto *PN = dyn_cast<PHINode>(I))
1406       if (TrivialPHIs.count(PN))
1407         // The trivial phi inserted by the previous CHR scope could replace a
1408         // non-phi in HoistStops. Note that since this phi is at the exit of a
1409         // previous CHR scope, which dominates this scope, it's safe to stop
1410         // hoisting there.
1411         return;
1412     if (HoistedSet.count(I))
1413       // Already hoisted, return.
1414       return;
1415     assert(isHoistableInstructionType(I) && "Unhoistable instruction type");
1416     for (Value *Op : I->operands()) {
1417       hoistValue(Op, HoistPoint, R, HoistStopMap, HoistedSet, TrivialPHIs);
1418     }
1419     I->moveBefore(HoistPoint);
1420     HoistedSet.insert(I);
1421     CHR_DEBUG(dbgs() << "hoistValue " << *I << "\n");
1422   }
1423 }
1424 
1425 // Hoist the dependent condition values of the branches and the selects in the
1426 // scope to the insert point.
1427 static void hoistScopeConditions(CHRScope *Scope, Instruction *HoistPoint,
1428                                  DenseSet<PHINode *> &TrivialPHIs) {
1429   DenseSet<Instruction *> HoistedSet;
1430   for (const RegInfo &RI : Scope->CHRRegions) {
1431     Region *R = RI.R;
1432     bool IsTrueBiased = Scope->TrueBiasedRegions.count(R);
1433     bool IsFalseBiased = Scope->FalseBiasedRegions.count(R);
1434     if (RI.HasBranch && (IsTrueBiased || IsFalseBiased)) {
1435       auto *BI = cast<BranchInst>(R->getEntry()->getTerminator());
1436       hoistValue(BI->getCondition(), HoistPoint, R, Scope->HoistStopMap,
1437                  HoistedSet, TrivialPHIs);
1438     }
1439     for (SelectInst *SI : RI.Selects) {
1440       bool IsTrueBiased = Scope->TrueBiasedSelects.count(SI);
1441       bool IsFalseBiased = Scope->FalseBiasedSelects.count(SI);
1442       if (!(IsTrueBiased || IsFalseBiased))
1443         continue;
1444       hoistValue(SI->getCondition(), HoistPoint, R, Scope->HoistStopMap,
1445                  HoistedSet, TrivialPHIs);
1446     }
1447   }
1448 }
1449 
1450 // Negate the predicate if an ICmp if it's used only by branches or selects by
1451 // swapping the operands of the branches or the selects. Returns true if success.
1452 static bool NegateICmpIfUsedByBranchOrSelectOnly(ICmpInst *ICmp,
1453                                                  Instruction *ExcludedUser,
1454                                                  CHRScope *Scope) {
1455   for (User *U : ICmp->users()) {
1456     if (U == ExcludedUser)
1457       continue;
1458     if (isa<BranchInst>(U) && cast<BranchInst>(U)->isConditional())
1459       continue;
1460     if (isa<SelectInst>(U) && cast<SelectInst>(U)->getCondition() == ICmp)
1461       continue;
1462     return false;
1463   }
1464   for (User *U : ICmp->users()) {
1465     if (U == ExcludedUser)
1466       continue;
1467     if (auto *BI = dyn_cast<BranchInst>(U)) {
1468       assert(BI->isConditional() && "Must be conditional");
1469       BI->swapSuccessors();
1470       // Don't need to swap this in terms of
1471       // TrueBiasedRegions/FalseBiasedRegions because true-based/false-based
1472       // mean whehter the branch is likely go into the if-then rather than
1473       // successor0/successor1 and because we can tell which edge is the then or
1474       // the else one by comparing the destination to the region exit block.
1475       continue;
1476     }
1477     if (auto *SI = dyn_cast<SelectInst>(U)) {
1478       // Swap operands
1479       Value *TrueValue = SI->getTrueValue();
1480       Value *FalseValue = SI->getFalseValue();
1481       SI->setTrueValue(FalseValue);
1482       SI->setFalseValue(TrueValue);
1483       SI->swapProfMetadata();
1484       if (Scope->TrueBiasedSelects.count(SI)) {
1485         assert(Scope->FalseBiasedSelects.count(SI) == 0 &&
1486                "Must not be already in");
1487         Scope->FalseBiasedSelects.insert(SI);
1488       } else if (Scope->FalseBiasedSelects.count(SI)) {
1489         assert(Scope->TrueBiasedSelects.count(SI) == 0 &&
1490                "Must not be already in");
1491         Scope->TrueBiasedSelects.insert(SI);
1492       }
1493       continue;
1494     }
1495     llvm_unreachable("Must be a branch or a select");
1496   }
1497   ICmp->setPredicate(CmpInst::getInversePredicate(ICmp->getPredicate()));
1498   return true;
1499 }
1500 
1501 // A helper for transformScopes. Insert a trivial phi at the scope exit block
1502 // for a value that's defined in the scope but used outside it (meaning it's
1503 // alive at the exit block).
1504 static void insertTrivialPHIs(CHRScope *Scope,
1505                               BasicBlock *EntryBlock, BasicBlock *ExitBlock,
1506                               DenseSet<PHINode *> &TrivialPHIs) {
1507   DenseSet<BasicBlock *> BlocksInScopeSet;
1508   SmallVector<BasicBlock *, 8> BlocksInScopeVec;
1509   for (RegInfo &RI : Scope->RegInfos) {
1510     for (BasicBlock *BB : RI.R->blocks()) { // This includes the blocks in the
1511                                             // sub-Scopes.
1512       BlocksInScopeSet.insert(BB);
1513       BlocksInScopeVec.push_back(BB);
1514     }
1515   }
1516   CHR_DEBUG(
1517       dbgs() << "Inserting redudant phis\n";
1518       for (BasicBlock *BB : BlocksInScopeVec) {
1519         dbgs() << "BlockInScope " << BB->getName() << "\n";
1520       });
1521   for (BasicBlock *BB : BlocksInScopeVec) {
1522     for (Instruction &I : *BB) {
1523       SmallVector<Instruction *, 8> Users;
1524       for (User *U : I.users()) {
1525         if (auto *UI = dyn_cast<Instruction>(U)) {
1526           if (BlocksInScopeSet.count(UI->getParent()) == 0 &&
1527               // Unless there's already a phi for I at the exit block.
1528               !(isa<PHINode>(UI) && UI->getParent() == ExitBlock)) {
1529             CHR_DEBUG(dbgs() << "V " << I << "\n");
1530             CHR_DEBUG(dbgs() << "Used outside scope by user " << *UI << "\n");
1531             Users.push_back(UI);
1532           } else if (UI->getParent() == EntryBlock && isa<PHINode>(UI)) {
1533             // There's a loop backedge from a block that's dominated by this
1534             // scope to the entry block.
1535             CHR_DEBUG(dbgs() << "V " << I << "\n");
1536             CHR_DEBUG(dbgs()
1537                       << "Used at entry block (for a back edge) by a phi user "
1538                       << *UI << "\n");
1539             Users.push_back(UI);
1540           }
1541         }
1542       }
1543       if (Users.size() > 0) {
1544         // Insert a trivial phi for I (phi [&I, P0], [&I, P1], ...) at
1545         // ExitBlock. Replace I with the new phi in UI unless UI is another
1546         // phi at ExitBlock.
1547         unsigned PredCount = std::distance(pred_begin(ExitBlock),
1548                                            pred_end(ExitBlock));
1549         PHINode *PN = PHINode::Create(I.getType(), PredCount, "",
1550                                       &ExitBlock->front());
1551         for (BasicBlock *Pred : predecessors(ExitBlock)) {
1552           PN->addIncoming(&I, Pred);
1553         }
1554         TrivialPHIs.insert(PN);
1555         CHR_DEBUG(dbgs() << "Insert phi " << *PN << "\n");
1556         for (Instruction *UI : Users) {
1557           for (unsigned J = 0, NumOps = UI->getNumOperands(); J < NumOps; ++J) {
1558             if (UI->getOperand(J) == &I) {
1559               UI->setOperand(J, PN);
1560             }
1561           }
1562           CHR_DEBUG(dbgs() << "Updated user " << *UI << "\n");
1563         }
1564       }
1565     }
1566   }
1567 }
1568 
1569 // Assert that all the CHR regions of the scope have a biased branch or select.
1570 static void LLVM_ATTRIBUTE_UNUSED
1571 assertCHRRegionsHaveBiasedBranchOrSelect(CHRScope *Scope) {
1572 #ifndef NDEBUG
1573   auto HasBiasedBranchOrSelect = [](RegInfo &RI, CHRScope *Scope) {
1574     if (Scope->TrueBiasedRegions.count(RI.R) ||
1575         Scope->FalseBiasedRegions.count(RI.R))
1576       return true;
1577     for (SelectInst *SI : RI.Selects)
1578       if (Scope->TrueBiasedSelects.count(SI) ||
1579           Scope->FalseBiasedSelects.count(SI))
1580         return true;
1581     return false;
1582   };
1583   for (RegInfo &RI : Scope->CHRRegions) {
1584     assert(HasBiasedBranchOrSelect(RI, Scope) &&
1585            "Must have biased branch or select");
1586   }
1587 #endif
1588 }
1589 
1590 // Assert that all the condition values of the biased branches and selects have
1591 // been hoisted to the pre-entry block or outside of the scope.
1592 static void LLVM_ATTRIBUTE_UNUSED assertBranchOrSelectConditionHoisted(
1593     CHRScope *Scope, BasicBlock *PreEntryBlock) {
1594   CHR_DEBUG(dbgs() << "Biased regions condition values \n");
1595   for (RegInfo &RI : Scope->CHRRegions) {
1596     Region *R = RI.R;
1597     bool IsTrueBiased = Scope->TrueBiasedRegions.count(R);
1598     bool IsFalseBiased = Scope->FalseBiasedRegions.count(R);
1599     if (RI.HasBranch && (IsTrueBiased || IsFalseBiased)) {
1600       auto *BI = cast<BranchInst>(R->getEntry()->getTerminator());
1601       Value *V = BI->getCondition();
1602       CHR_DEBUG(dbgs() << *V << "\n");
1603       if (auto *I = dyn_cast<Instruction>(V)) {
1604         (void)(I); // Unused in release build.
1605         assert((I->getParent() == PreEntryBlock ||
1606                 !Scope->contains(I)) &&
1607                "Must have been hoisted to PreEntryBlock or outside the scope");
1608       }
1609     }
1610     for (SelectInst *SI : RI.Selects) {
1611       bool IsTrueBiased = Scope->TrueBiasedSelects.count(SI);
1612       bool IsFalseBiased = Scope->FalseBiasedSelects.count(SI);
1613       if (!(IsTrueBiased || IsFalseBiased))
1614         continue;
1615       Value *V = SI->getCondition();
1616       CHR_DEBUG(dbgs() << *V << "\n");
1617       if (auto *I = dyn_cast<Instruction>(V)) {
1618         (void)(I); // Unused in release build.
1619         assert((I->getParent() == PreEntryBlock ||
1620                 !Scope->contains(I)) &&
1621                "Must have been hoisted to PreEntryBlock or outside the scope");
1622       }
1623     }
1624   }
1625 }
1626 
1627 void CHR::transformScopes(CHRScope *Scope, DenseSet<PHINode *> &TrivialPHIs) {
1628   CHR_DEBUG(dbgs() << "transformScopes " << *Scope << "\n");
1629 
1630   assert(Scope->RegInfos.size() >= 1 && "Should have at least one Region");
1631   Region *FirstRegion = Scope->RegInfos[0].R;
1632   BasicBlock *EntryBlock = FirstRegion->getEntry();
1633   Region *LastRegion = Scope->RegInfos[Scope->RegInfos.size() - 1].R;
1634   BasicBlock *ExitBlock = LastRegion->getExit();
1635   Optional<uint64_t> ProfileCount = BFI.getBlockProfileCount(EntryBlock);
1636 
1637   if (ExitBlock) {
1638     // Insert a trivial phi at the exit block (where the CHR hot path and the
1639     // cold path merges) for a value that's defined in the scope but used
1640     // outside it (meaning it's alive at the exit block). We will add the
1641     // incoming values for the CHR cold paths to it below. Without this, we'd
1642     // miss updating phi's for such values unless there happens to already be a
1643     // phi for that value there.
1644     insertTrivialPHIs(Scope, EntryBlock, ExitBlock, TrivialPHIs);
1645   }
1646 
1647   // Split the entry block of the first region. The new block becomes the new
1648   // entry block of the first region. The old entry block becomes the block to
1649   // insert the CHR branch into. Note DT gets updated. Since DT gets updated
1650   // through the split, we update the entry of the first region after the split,
1651   // and Region only points to the entry and the exit blocks, rather than
1652   // keeping everything in a list or set, the blocks membership and the
1653   // entry/exit blocks of the region are still valid after the split.
1654   CHR_DEBUG(dbgs() << "Splitting entry block " << EntryBlock->getName()
1655             << " at " << *Scope->BranchInsertPoint << "\n");
1656   BasicBlock *NewEntryBlock =
1657       SplitBlock(EntryBlock, Scope->BranchInsertPoint, &DT);
1658   assert(NewEntryBlock->getSinglePredecessor() == EntryBlock &&
1659          "NewEntryBlock's only pred must be EntryBlock");
1660   FirstRegion->replaceEntryRecursive(NewEntryBlock);
1661   BasicBlock *PreEntryBlock = EntryBlock;
1662 
1663   ValueToValueMapTy VMap;
1664   // Clone the blocks in the scope (excluding the PreEntryBlock) to split into a
1665   // hot path (originals) and a cold path (clones) and update the PHIs at the
1666   // exit block.
1667   cloneScopeBlocks(Scope, PreEntryBlock, ExitBlock, LastRegion, VMap);
1668 
1669   // Replace the old (placeholder) branch with the new (merged) conditional
1670   // branch.
1671   BranchInst *MergedBr = createMergedBranch(PreEntryBlock, EntryBlock,
1672                                             NewEntryBlock, VMap);
1673 
1674 #ifndef NDEBUG
1675   assertCHRRegionsHaveBiasedBranchOrSelect(Scope);
1676 #endif
1677 
1678   // Hoist the conditional values of the branches/selects.
1679   hoistScopeConditions(Scope, PreEntryBlock->getTerminator(), TrivialPHIs);
1680 
1681 #ifndef NDEBUG
1682   assertBranchOrSelectConditionHoisted(Scope, PreEntryBlock);
1683 #endif
1684 
1685   // Create the combined branch condition and constant-fold the branches/selects
1686   // in the hot path.
1687   fixupBranchesAndSelects(Scope, PreEntryBlock, MergedBr,
1688                           ProfileCount ? ProfileCount.getValue() : 0);
1689 }
1690 
1691 // A helper for transformScopes. Clone the blocks in the scope (excluding the
1692 // PreEntryBlock) to split into a hot path and a cold path and update the PHIs
1693 // at the exit block.
1694 void CHR::cloneScopeBlocks(CHRScope *Scope,
1695                            BasicBlock *PreEntryBlock,
1696                            BasicBlock *ExitBlock,
1697                            Region *LastRegion,
1698                            ValueToValueMapTy &VMap) {
1699   // Clone all the blocks. The original blocks will be the hot-path
1700   // CHR-optimized code and the cloned blocks will be the original unoptimized
1701   // code. This is so that the block pointers from the
1702   // CHRScope/Region/RegionInfo can stay valid in pointing to the hot-path code
1703   // which CHR should apply to.
1704   SmallVector<BasicBlock*, 8> NewBlocks;
1705   for (RegInfo &RI : Scope->RegInfos)
1706     for (BasicBlock *BB : RI.R->blocks()) { // This includes the blocks in the
1707                                             // sub-Scopes.
1708       assert(BB != PreEntryBlock && "Don't copy the preetntry block");
1709       BasicBlock *NewBB = CloneBasicBlock(BB, VMap, ".nonchr", &F);
1710       NewBlocks.push_back(NewBB);
1711       VMap[BB] = NewBB;
1712     }
1713 
1714   // Place the cloned blocks right after the original blocks (right before the
1715   // exit block of.)
1716   if (ExitBlock)
1717     F.getBasicBlockList().splice(ExitBlock->getIterator(),
1718                                  F.getBasicBlockList(),
1719                                  NewBlocks[0]->getIterator(), F.end());
1720 
1721   // Update the cloned blocks/instructions to refer to themselves.
1722   for (unsigned i = 0, e = NewBlocks.size(); i != e; ++i)
1723     for (Instruction &I : *NewBlocks[i])
1724       RemapInstruction(&I, VMap,
1725                        RF_NoModuleLevelChanges | RF_IgnoreMissingLocals);
1726 
1727   // Add the cloned blocks to the PHIs of the exit blocks. ExitBlock is null for
1728   // the top-level region but we don't need to add PHIs. The trivial PHIs
1729   // inserted above will be updated here.
1730   if (ExitBlock)
1731     for (PHINode &PN : ExitBlock->phis())
1732       for (unsigned I = 0, NumOps = PN.getNumIncomingValues(); I < NumOps;
1733            ++I) {
1734         BasicBlock *Pred = PN.getIncomingBlock(I);
1735         if (LastRegion->contains(Pred)) {
1736           Value *V = PN.getIncomingValue(I);
1737           auto It = VMap.find(V);
1738           if (It != VMap.end()) V = It->second;
1739           assert(VMap.find(Pred) != VMap.end() && "Pred must have been cloned");
1740           PN.addIncoming(V, cast<BasicBlock>(VMap[Pred]));
1741         }
1742       }
1743 }
1744 
1745 // A helper for transformScope. Replace the old (placeholder) branch with the
1746 // new (merged) conditional branch.
1747 BranchInst *CHR::createMergedBranch(BasicBlock *PreEntryBlock,
1748                                     BasicBlock *EntryBlock,
1749                                     BasicBlock *NewEntryBlock,
1750                                     ValueToValueMapTy &VMap) {
1751   BranchInst *OldBR = cast<BranchInst>(PreEntryBlock->getTerminator());
1752   assert(OldBR->isUnconditional() && OldBR->getSuccessor(0) == NewEntryBlock &&
1753          "SplitBlock did not work correctly!");
1754   assert(NewEntryBlock->getSinglePredecessor() == EntryBlock &&
1755          "NewEntryBlock's only pred must be EntryBlock");
1756   assert(VMap.find(NewEntryBlock) != VMap.end() &&
1757          "NewEntryBlock must have been copied");
1758   OldBR->dropAllReferences();
1759   OldBR->eraseFromParent();
1760   // The true predicate is a placeholder. It will be replaced later in
1761   // fixupBranchesAndSelects().
1762   BranchInst *NewBR = BranchInst::Create(NewEntryBlock,
1763                                          cast<BasicBlock>(VMap[NewEntryBlock]),
1764                                          ConstantInt::getTrue(F.getContext()));
1765   PreEntryBlock->getInstList().push_back(NewBR);
1766   assert(NewEntryBlock->getSinglePredecessor() == EntryBlock &&
1767          "NewEntryBlock's only pred must be EntryBlock");
1768   return NewBR;
1769 }
1770 
1771 // A helper for transformScopes. Create the combined branch condition and
1772 // constant-fold the branches/selects in the hot path.
1773 void CHR::fixupBranchesAndSelects(CHRScope *Scope,
1774                                   BasicBlock *PreEntryBlock,
1775                                   BranchInst *MergedBR,
1776                                   uint64_t ProfileCount) {
1777   Value *MergedCondition = ConstantInt::getTrue(F.getContext());
1778   BranchProbability CHRBranchBias(1, 1);
1779   uint64_t NumCHRedBranches = 0;
1780   IRBuilder<> IRB(PreEntryBlock->getTerminator());
1781   for (RegInfo &RI : Scope->CHRRegions) {
1782     Region *R = RI.R;
1783     if (RI.HasBranch) {
1784       fixupBranch(R, Scope, IRB, MergedCondition, CHRBranchBias);
1785       ++NumCHRedBranches;
1786     }
1787     for (SelectInst *SI : RI.Selects) {
1788       fixupSelect(SI, Scope, IRB, MergedCondition, CHRBranchBias);
1789       ++NumCHRedBranches;
1790     }
1791   }
1792   Stats.NumBranchesDelta += NumCHRedBranches - 1;
1793   Stats.WeightedNumBranchesDelta += (NumCHRedBranches - 1) * ProfileCount;
1794   MergedBR->setCondition(MergedCondition);
1795   SmallVector<uint32_t, 2> Weights;
1796   Weights.push_back(static_cast<uint32_t>(CHRBranchBias.scale(1000)));
1797   Weights.push_back(static_cast<uint32_t>(CHRBranchBias.getCompl().scale(1000)));
1798   MDBuilder MDB(F.getContext());
1799   MergedBR->setMetadata(LLVMContext::MD_prof, MDB.createBranchWeights(Weights));
1800   CHR_DEBUG(dbgs() << "CHR branch bias " << Weights[0] << ":" << Weights[1]
1801             << "\n");
1802 }
1803 
1804 // A helper for fixupBranchesAndSelects. Add to the combined branch condition
1805 // and constant-fold a branch in the hot path.
1806 void CHR::fixupBranch(Region *R, CHRScope *Scope,
1807                       IRBuilder<> &IRB,
1808                       Value *&MergedCondition,
1809                       BranchProbability &CHRBranchBias) {
1810   bool IsTrueBiased = Scope->TrueBiasedRegions.count(R);
1811   assert((IsTrueBiased || Scope->FalseBiasedRegions.count(R)) &&
1812          "Must be truthy or falsy");
1813   auto *BI = cast<BranchInst>(R->getEntry()->getTerminator());
1814   assert(BranchBiasMap.find(R) != BranchBiasMap.end() &&
1815          "Must be in the bias map");
1816   BranchProbability Bias = BranchBiasMap[R];
1817   assert(Bias >= getCHRBiasThreshold() && "Must be highly biased");
1818   // Take the min.
1819   if (CHRBranchBias > Bias)
1820     CHRBranchBias = Bias;
1821   BasicBlock *IfThen = BI->getSuccessor(1);
1822   BasicBlock *IfElse = BI->getSuccessor(0);
1823   BasicBlock *RegionExitBlock = R->getExit();
1824   assert(RegionExitBlock && "Null ExitBlock");
1825   assert((IfThen == RegionExitBlock || IfElse == RegionExitBlock) &&
1826          IfThen != IfElse && "Invariant from findScopes");
1827   if (IfThen == RegionExitBlock) {
1828     // Swap them so that IfThen means going into it and IfElse means skipping
1829     // it.
1830     std::swap(IfThen, IfElse);
1831   }
1832   CHR_DEBUG(dbgs() << "IfThen " << IfThen->getName()
1833             << " IfElse " << IfElse->getName() << "\n");
1834   Value *Cond = BI->getCondition();
1835   BasicBlock *HotTarget = IsTrueBiased ? IfThen : IfElse;
1836   bool ConditionTrue = HotTarget == BI->getSuccessor(0);
1837   addToMergedCondition(ConditionTrue, Cond, BI, Scope, IRB,
1838                        MergedCondition);
1839   // Constant-fold the branch at ClonedEntryBlock.
1840   assert(ConditionTrue == (HotTarget == BI->getSuccessor(0)) &&
1841          "The successor shouldn't change");
1842   Value *NewCondition = ConditionTrue ?
1843                         ConstantInt::getTrue(F.getContext()) :
1844                         ConstantInt::getFalse(F.getContext());
1845   BI->setCondition(NewCondition);
1846 }
1847 
1848 // A helper for fixupBranchesAndSelects. Add to the combined branch condition
1849 // and constant-fold a select in the hot path.
1850 void CHR::fixupSelect(SelectInst *SI, CHRScope *Scope,
1851                       IRBuilder<> &IRB,
1852                       Value *&MergedCondition,
1853                       BranchProbability &CHRBranchBias) {
1854   bool IsTrueBiased = Scope->TrueBiasedSelects.count(SI);
1855   assert((IsTrueBiased ||
1856           Scope->FalseBiasedSelects.count(SI)) && "Must be biased");
1857   assert(SelectBiasMap.find(SI) != SelectBiasMap.end() &&
1858          "Must be in the bias map");
1859   BranchProbability Bias = SelectBiasMap[SI];
1860   assert(Bias >= getCHRBiasThreshold() && "Must be highly biased");
1861   // Take the min.
1862   if (CHRBranchBias > Bias)
1863     CHRBranchBias = Bias;
1864   Value *Cond = SI->getCondition();
1865   addToMergedCondition(IsTrueBiased, Cond, SI, Scope, IRB,
1866                        MergedCondition);
1867   Value *NewCondition = IsTrueBiased ?
1868                         ConstantInt::getTrue(F.getContext()) :
1869                         ConstantInt::getFalse(F.getContext());
1870   SI->setCondition(NewCondition);
1871 }
1872 
1873 // A helper for fixupBranch/fixupSelect. Add a branch condition to the merged
1874 // condition.
1875 void CHR::addToMergedCondition(bool IsTrueBiased, Value *Cond,
1876                                Instruction *BranchOrSelect,
1877                                CHRScope *Scope,
1878                                IRBuilder<> &IRB,
1879                                Value *&MergedCondition) {
1880   if (IsTrueBiased) {
1881     MergedCondition = IRB.CreateAnd(MergedCondition, Cond);
1882   } else {
1883     // If Cond is an icmp and all users of V except for BranchOrSelect is a
1884     // branch, negate the icmp predicate and swap the branch targets and avoid
1885     // inserting an Xor to negate Cond.
1886     bool Done = false;
1887     if (auto *ICmp = dyn_cast<ICmpInst>(Cond))
1888       if (NegateICmpIfUsedByBranchOrSelectOnly(ICmp, BranchOrSelect, Scope)) {
1889         MergedCondition = IRB.CreateAnd(MergedCondition, Cond);
1890         Done = true;
1891       }
1892     if (!Done) {
1893       Value *Negate = IRB.CreateXor(
1894           ConstantInt::getTrue(F.getContext()), Cond);
1895       MergedCondition = IRB.CreateAnd(MergedCondition, Negate);
1896     }
1897   }
1898 }
1899 
1900 void CHR::transformScopes(SmallVectorImpl<CHRScope *> &CHRScopes) {
1901   unsigned i = 0;
1902   (void)(i); // Unused in release build.
1903   DenseSet<PHINode *> TrivialPHIs;
1904   for (CHRScope *Scope : CHRScopes) {
1905     transformScopes(Scope, TrivialPHIs);
1906     CHR_DEBUG(
1907         std::ostringstream oss;
1908         oss << " after transformScopes " << i++;
1909         dumpIR(F, oss.str().c_str(), nullptr));
1910   }
1911 }
1912 
1913 static void LLVM_ATTRIBUTE_UNUSED
1914 dumpScopes(SmallVectorImpl<CHRScope *> &Scopes, const char *Label) {
1915   dbgs() << Label << " " << Scopes.size() << "\n";
1916   for (CHRScope *Scope : Scopes) {
1917     dbgs() << *Scope << "\n";
1918   }
1919 }
1920 
1921 bool CHR::run() {
1922   if (!shouldApply(F, PSI))
1923     return false;
1924 
1925   CHR_DEBUG(dumpIR(F, "before", nullptr));
1926 
1927   bool Changed = false;
1928   {
1929     CHR_DEBUG(
1930         dbgs() << "RegionInfo:\n";
1931         RI.print(dbgs()));
1932 
1933     // Recursively traverse the region tree and find regions that have biased
1934     // branches and/or selects and create scopes.
1935     SmallVector<CHRScope *, 8> AllScopes;
1936     findScopes(AllScopes);
1937     CHR_DEBUG(dumpScopes(AllScopes, "All scopes"));
1938 
1939     // Split the scopes if 1) the conditiona values of the biased
1940     // branches/selects of the inner/lower scope can't be hoisted up to the
1941     // outermost/uppermost scope entry, or 2) the condition values of the biased
1942     // branches/selects in a scope (including subscopes) don't share at least
1943     // one common value.
1944     SmallVector<CHRScope *, 8> SplitScopes;
1945     splitScopes(AllScopes, SplitScopes);
1946     CHR_DEBUG(dumpScopes(SplitScopes, "Split scopes"));
1947 
1948     // After splitting, set the biased regions and selects of a scope (a tree
1949     // root) that include those of the subscopes.
1950     classifyBiasedScopes(SplitScopes);
1951     CHR_DEBUG(dbgs() << "Set per-scope bias " << SplitScopes.size() << "\n");
1952 
1953     // Filter out the scopes that has only one biased region or select (CHR
1954     // isn't useful in such a case).
1955     SmallVector<CHRScope *, 8> FilteredScopes;
1956     filterScopes(SplitScopes, FilteredScopes);
1957     CHR_DEBUG(dumpScopes(FilteredScopes, "Filtered scopes"));
1958 
1959     // Set the regions to be CHR'ed and their hoist stops for each scope.
1960     SmallVector<CHRScope *, 8> SetScopes;
1961     setCHRRegions(FilteredScopes, SetScopes);
1962     CHR_DEBUG(dumpScopes(SetScopes, "Set CHR regions"));
1963 
1964     // Sort CHRScopes by the depth so that outer CHRScopes comes before inner
1965     // ones. We need to apply CHR from outer to inner so that we apply CHR only
1966     // to the hot path, rather than both hot and cold paths.
1967     SmallVector<CHRScope *, 8> SortedScopes;
1968     sortScopes(SetScopes, SortedScopes);
1969     CHR_DEBUG(dumpScopes(SortedScopes, "Sorted scopes"));
1970 
1971     CHR_DEBUG(
1972         dbgs() << "RegionInfo:\n";
1973         RI.print(dbgs()));
1974 
1975     // Apply the CHR transformation.
1976     if (!SortedScopes.empty()) {
1977       transformScopes(SortedScopes);
1978       Changed = true;
1979     }
1980   }
1981 
1982   if (Changed)
1983     CHR_DEBUG(dumpIR(F, "after", &Stats));
1984 
1985   return Changed;
1986 }
1987 
1988 bool ControlHeightReductionLegacyPass::runOnFunction(Function &F) {
1989   BlockFrequencyInfo &BFI =
1990       getAnalysis<BlockFrequencyInfoWrapperPass>().getBFI();
1991   DominatorTree &DT = getAnalysis<DominatorTreeWrapperPass>().getDomTree();
1992   ProfileSummaryInfo &PSI =
1993       *getAnalysis<ProfileSummaryInfoWrapperPass>().getPSI();
1994   RegionInfo &RI = getAnalysis<RegionInfoPass>().getRegionInfo();
1995   return CHR(F, BFI, DT, PSI, RI).run();
1996 }
1997 
1998 namespace llvm {
1999 
2000 ControlHeightReductionPass::ControlHeightReductionPass() {
2001   ParseCHRFilterFiles();
2002 }
2003 
2004 PreservedAnalyses ControlHeightReductionPass::run(
2005     Function &F,
2006     FunctionAnalysisManager &FAM) {
2007   auto &BFI = FAM.getResult<BlockFrequencyAnalysis>(F);
2008   auto &DT = FAM.getResult<DominatorTreeAnalysis>(F);
2009   auto &MAMProxy = FAM.getResult<ModuleAnalysisManagerFunctionProxy>(F);
2010   auto &MAM = MAMProxy.getManager();
2011   auto &PSI = *MAM.getCachedResult<ProfileSummaryAnalysis>(*F.getParent());
2012   auto &RI = FAM.getResult<RegionInfoAnalysis>(F);
2013   bool Changed = CHR(F, BFI, DT, PSI, RI).run();
2014   if (!Changed)
2015     return PreservedAnalyses::all();
2016   auto PA = PreservedAnalyses();
2017   PA.preserve<GlobalsAA>();
2018   return PA;
2019 }
2020 
2021 } // namespace llvm
2022