xref: /netbsd-src/external/apache2/llvm/dist/llvm/lib/Transforms/Instrumentation/ControlHeightReduction.cpp (revision 82d56013d7b633d116a93943de88e08335357a7c)
1 //===-- ControlHeightReduction.cpp - Control Height Reduction -------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This pass merges conditional blocks of code and reduces the number of
10 // conditional branches in the hot paths based on profiles.
11 //
12 //===----------------------------------------------------------------------===//
13 
14 #include "llvm/Transforms/Instrumentation/ControlHeightReduction.h"
15 #include "llvm/ADT/DenseMap.h"
16 #include "llvm/ADT/DenseSet.h"
17 #include "llvm/ADT/SmallVector.h"
18 #include "llvm/ADT/StringSet.h"
19 #include "llvm/Analysis/BlockFrequencyInfo.h"
20 #include "llvm/Analysis/GlobalsModRef.h"
21 #include "llvm/Analysis/OptimizationRemarkEmitter.h"
22 #include "llvm/Analysis/ProfileSummaryInfo.h"
23 #include "llvm/Analysis/RegionInfo.h"
24 #include "llvm/Analysis/RegionIterator.h"
25 #include "llvm/Analysis/ValueTracking.h"
26 #include "llvm/IR/CFG.h"
27 #include "llvm/IR/Dominators.h"
28 #include "llvm/IR/IRBuilder.h"
29 #include "llvm/IR/MDBuilder.h"
30 #include "llvm/IR/PassManager.h"
31 #include "llvm/InitializePasses.h"
32 #include "llvm/Support/BranchProbability.h"
33 #include "llvm/Support/CommandLine.h"
34 #include "llvm/Support/MemoryBuffer.h"
35 #include "llvm/Transforms/Utils.h"
36 #include "llvm/Transforms/Utils/BasicBlockUtils.h"
37 #include "llvm/Transforms/Utils/Cloning.h"
38 #include "llvm/Transforms/Utils/ValueMapper.h"
39 
40 #include <set>
41 #include <sstream>
42 
43 using namespace llvm;
44 
45 #define DEBUG_TYPE "chr"
46 
47 #define CHR_DEBUG(X) LLVM_DEBUG(X)
48 
49 static cl::opt<bool> ForceCHR("force-chr", cl::init(false), cl::Hidden,
50                               cl::desc("Apply CHR for all functions"));
51 
52 static cl::opt<double> CHRBiasThreshold(
53     "chr-bias-threshold", cl::init(0.99), cl::Hidden,
54     cl::desc("CHR considers a branch bias greater than this ratio as biased"));
55 
56 static cl::opt<unsigned> CHRMergeThreshold(
57     "chr-merge-threshold", cl::init(2), cl::Hidden,
58     cl::desc("CHR merges a group of N branches/selects where N >= this value"));
59 
60 static cl::opt<std::string> CHRModuleList(
61     "chr-module-list", cl::init(""), cl::Hidden,
62     cl::desc("Specify file to retrieve the list of modules to apply CHR to"));
63 
64 static cl::opt<std::string> CHRFunctionList(
65     "chr-function-list", cl::init(""), cl::Hidden,
66     cl::desc("Specify file to retrieve the list of functions to apply CHR to"));
67 
68 static StringSet<> CHRModules;
69 static StringSet<> CHRFunctions;
70 
parseCHRFilterFiles()71 static void parseCHRFilterFiles() {
72   if (!CHRModuleList.empty()) {
73     auto FileOrErr = MemoryBuffer::getFile(CHRModuleList);
74     if (!FileOrErr) {
75       errs() << "Error: Couldn't read the chr-module-list file " << CHRModuleList << "\n";
76       std::exit(1);
77     }
78     StringRef Buf = FileOrErr->get()->getBuffer();
79     SmallVector<StringRef, 0> Lines;
80     Buf.split(Lines, '\n');
81     for (StringRef Line : Lines) {
82       Line = Line.trim();
83       if (!Line.empty())
84         CHRModules.insert(Line);
85     }
86   }
87   if (!CHRFunctionList.empty()) {
88     auto FileOrErr = MemoryBuffer::getFile(CHRFunctionList);
89     if (!FileOrErr) {
90       errs() << "Error: Couldn't read the chr-function-list file " << CHRFunctionList << "\n";
91       std::exit(1);
92     }
93     StringRef Buf = FileOrErr->get()->getBuffer();
94     SmallVector<StringRef, 0> Lines;
95     Buf.split(Lines, '\n');
96     for (StringRef Line : Lines) {
97       Line = Line.trim();
98       if (!Line.empty())
99         CHRFunctions.insert(Line);
100     }
101   }
102 }
103 
104 namespace {
105 class ControlHeightReductionLegacyPass : public FunctionPass {
106 public:
107   static char ID;
108 
ControlHeightReductionLegacyPass()109   ControlHeightReductionLegacyPass() : FunctionPass(ID) {
110     initializeControlHeightReductionLegacyPassPass(
111         *PassRegistry::getPassRegistry());
112     parseCHRFilterFiles();
113   }
114 
115   bool runOnFunction(Function &F) override;
getAnalysisUsage(AnalysisUsage & AU) const116   void getAnalysisUsage(AnalysisUsage &AU) const override {
117     AU.addRequired<BlockFrequencyInfoWrapperPass>();
118     AU.addRequired<DominatorTreeWrapperPass>();
119     AU.addRequired<ProfileSummaryInfoWrapperPass>();
120     AU.addRequired<RegionInfoPass>();
121     AU.addPreserved<GlobalsAAWrapperPass>();
122   }
123 };
124 } // end anonymous namespace
125 
126 char ControlHeightReductionLegacyPass::ID = 0;
127 
128 INITIALIZE_PASS_BEGIN(ControlHeightReductionLegacyPass,
129                       "chr",
130                       "Reduce control height in the hot paths",
131                       false, false)
INITIALIZE_PASS_DEPENDENCY(BlockFrequencyInfoWrapperPass)132 INITIALIZE_PASS_DEPENDENCY(BlockFrequencyInfoWrapperPass)
133 INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass)
134 INITIALIZE_PASS_DEPENDENCY(ProfileSummaryInfoWrapperPass)
135 INITIALIZE_PASS_DEPENDENCY(RegionInfoPass)
136 INITIALIZE_PASS_END(ControlHeightReductionLegacyPass,
137                     "chr",
138                     "Reduce control height in the hot paths",
139                     false, false)
140 
141 FunctionPass *llvm::createControlHeightReductionLegacyPass() {
142   return new ControlHeightReductionLegacyPass();
143 }
144 
145 namespace {
146 
147 struct CHRStats {
CHRStats__anon566c75c30211::CHRStats148   CHRStats() : NumBranches(0), NumBranchesDelta(0),
149                WeightedNumBranchesDelta(0) {}
print__anon566c75c30211::CHRStats150   void print(raw_ostream &OS) const {
151     OS << "CHRStats: NumBranches " << NumBranches
152        << " NumBranchesDelta " << NumBranchesDelta
153        << " WeightedNumBranchesDelta " << WeightedNumBranchesDelta;
154   }
155   uint64_t NumBranches;       // The original number of conditional branches /
156                               // selects
157   uint64_t NumBranchesDelta;  // The decrease of the number of conditional
158                               // branches / selects in the hot paths due to CHR.
159   uint64_t WeightedNumBranchesDelta; // NumBranchesDelta weighted by the profile
160                                      // count at the scope entry.
161 };
162 
163 // RegInfo - some properties of a Region.
164 struct RegInfo {
RegInfo__anon566c75c30211::RegInfo165   RegInfo() : R(nullptr), HasBranch(false) {}
RegInfo__anon566c75c30211::RegInfo166   RegInfo(Region *RegionIn) : R(RegionIn), HasBranch(false) {}
167   Region *R;
168   bool HasBranch;
169   SmallVector<SelectInst *, 8> Selects;
170 };
171 
172 typedef DenseMap<Region *, DenseSet<Instruction *>> HoistStopMapTy;
173 
174 // CHRScope - a sequence of regions to CHR together. It corresponds to a
175 // sequence of conditional blocks. It can have subscopes which correspond to
176 // nested conditional blocks. Nested CHRScopes form a tree.
177 class CHRScope {
178  public:
CHRScope(RegInfo RI)179   CHRScope(RegInfo RI) : BranchInsertPoint(nullptr) {
180     assert(RI.R && "Null RegionIn");
181     RegInfos.push_back(RI);
182   }
183 
getParentRegion()184   Region *getParentRegion() {
185     assert(RegInfos.size() > 0 && "Empty CHRScope");
186     Region *Parent = RegInfos[0].R->getParent();
187     assert(Parent && "Unexpected to call this on the top-level region");
188     return Parent;
189   }
190 
getEntryBlock()191   BasicBlock *getEntryBlock() {
192     assert(RegInfos.size() > 0 && "Empty CHRScope");
193     return RegInfos.front().R->getEntry();
194   }
195 
getExitBlock()196   BasicBlock *getExitBlock() {
197     assert(RegInfos.size() > 0 && "Empty CHRScope");
198     return RegInfos.back().R->getExit();
199   }
200 
appendable(CHRScope * Next)201   bool appendable(CHRScope *Next) {
202     // The next scope is appendable only if this scope is directly connected to
203     // it (which implies it post-dominates this scope) and this scope dominates
204     // it (no edge to the next scope outside this scope).
205     BasicBlock *NextEntry = Next->getEntryBlock();
206     if (getExitBlock() != NextEntry)
207       // Not directly connected.
208       return false;
209     Region *LastRegion = RegInfos.back().R;
210     for (BasicBlock *Pred : predecessors(NextEntry))
211       if (!LastRegion->contains(Pred))
212         // There's an edge going into the entry of the next scope from outside
213         // of this scope.
214         return false;
215     return true;
216   }
217 
append(CHRScope * Next)218   void append(CHRScope *Next) {
219     assert(RegInfos.size() > 0 && "Empty CHRScope");
220     assert(Next->RegInfos.size() > 0 && "Empty CHRScope");
221     assert(getParentRegion() == Next->getParentRegion() &&
222            "Must be siblings");
223     assert(getExitBlock() == Next->getEntryBlock() &&
224            "Must be adjacent");
225     RegInfos.append(Next->RegInfos.begin(), Next->RegInfos.end());
226     Subs.append(Next->Subs.begin(), Next->Subs.end());
227   }
228 
addSub(CHRScope * SubIn)229   void addSub(CHRScope *SubIn) {
230 #ifndef NDEBUG
231     bool IsChild = false;
232     for (RegInfo &RI : RegInfos)
233       if (RI.R == SubIn->getParentRegion()) {
234         IsChild = true;
235         break;
236       }
237     assert(IsChild && "Must be a child");
238 #endif
239     Subs.push_back(SubIn);
240   }
241 
242   // Split this scope at the boundary region into two, which will belong to the
243   // tail and returns the tail.
split(Region * Boundary)244   CHRScope *split(Region *Boundary) {
245     assert(Boundary && "Boundary null");
246     assert(RegInfos.begin()->R != Boundary &&
247            "Can't be split at beginning");
248     auto BoundaryIt = llvm::find_if(
249         RegInfos, [&Boundary](const RegInfo &RI) { return Boundary == RI.R; });
250     if (BoundaryIt == RegInfos.end())
251       return nullptr;
252     ArrayRef<RegInfo> TailRegInfos(BoundaryIt, RegInfos.end());
253     DenseSet<Region *> TailRegionSet;
254     for (const RegInfo &RI : TailRegInfos)
255       TailRegionSet.insert(RI.R);
256 
257     auto TailIt =
258         std::stable_partition(Subs.begin(), Subs.end(), [&](CHRScope *Sub) {
259           assert(Sub && "null Sub");
260           Region *Parent = Sub->getParentRegion();
261           if (TailRegionSet.count(Parent))
262             return false;
263 
264           assert(llvm::any_of(
265                      RegInfos,
266                      [&Parent](const RegInfo &RI) { return Parent == RI.R; }) &&
267                  "Must be in head");
268           return true;
269         });
270     ArrayRef<CHRScope *> TailSubs(TailIt, Subs.end());
271 
272     assert(HoistStopMap.empty() && "MapHoistStops must be empty");
273     auto *Scope = new CHRScope(TailRegInfos, TailSubs);
274     RegInfos.erase(BoundaryIt, RegInfos.end());
275     Subs.erase(TailIt, Subs.end());
276     return Scope;
277   }
278 
contains(Instruction * I) const279   bool contains(Instruction *I) const {
280     BasicBlock *Parent = I->getParent();
281     for (const RegInfo &RI : RegInfos)
282       if (RI.R->contains(Parent))
283         return true;
284     return false;
285   }
286 
287   void print(raw_ostream &OS) const;
288 
289   SmallVector<RegInfo, 8> RegInfos; // Regions that belong to this scope
290   SmallVector<CHRScope *, 8> Subs;  // Subscopes.
291 
292   // The instruction at which to insert the CHR conditional branch (and hoist
293   // the dependent condition values).
294   Instruction *BranchInsertPoint;
295 
296   // True-biased and false-biased regions (conditional blocks),
297   // respectively. Used only for the outermost scope and includes regions in
298   // subscopes. The rest are unbiased.
299   DenseSet<Region *> TrueBiasedRegions;
300   DenseSet<Region *> FalseBiasedRegions;
301   // Among the biased regions, the regions that get CHRed.
302   SmallVector<RegInfo, 8> CHRRegions;
303 
304   // True-biased and false-biased selects, respectively. Used only for the
305   // outermost scope and includes ones in subscopes.
306   DenseSet<SelectInst *> TrueBiasedSelects;
307   DenseSet<SelectInst *> FalseBiasedSelects;
308 
309   // Map from one of the above regions to the instructions to stop
310   // hoisting instructions at through use-def chains.
311   HoistStopMapTy HoistStopMap;
312 
313  private:
CHRScope(ArrayRef<RegInfo> RegInfosIn,ArrayRef<CHRScope * > SubsIn)314    CHRScope(ArrayRef<RegInfo> RegInfosIn, ArrayRef<CHRScope *> SubsIn)
315        : RegInfos(RegInfosIn.begin(), RegInfosIn.end()),
316          Subs(SubsIn.begin(), SubsIn.end()), BranchInsertPoint(nullptr) {}
317 };
318 
319 class CHR {
320  public:
CHR(Function & Fin,BlockFrequencyInfo & BFIin,DominatorTree & DTin,ProfileSummaryInfo & PSIin,RegionInfo & RIin,OptimizationRemarkEmitter & OREin)321   CHR(Function &Fin, BlockFrequencyInfo &BFIin, DominatorTree &DTin,
322       ProfileSummaryInfo &PSIin, RegionInfo &RIin,
323       OptimizationRemarkEmitter &OREin)
324       : F(Fin), BFI(BFIin), DT(DTin), PSI(PSIin), RI(RIin), ORE(OREin) {}
325 
~CHR()326   ~CHR() {
327     for (CHRScope *Scope : Scopes) {
328       delete Scope;
329     }
330   }
331 
332   bool run();
333 
334  private:
335   // See the comments in CHR::run() for the high level flow of the algorithm and
336   // what the following functions do.
337 
findScopes(SmallVectorImpl<CHRScope * > & Output)338   void findScopes(SmallVectorImpl<CHRScope *> &Output) {
339     Region *R = RI.getTopLevelRegion();
340     if (CHRScope *Scope = findScopes(R, nullptr, nullptr, Output)) {
341       Output.push_back(Scope);
342     }
343   }
344   CHRScope *findScopes(Region *R, Region *NextRegion, Region *ParentRegion,
345                         SmallVectorImpl<CHRScope *> &Scopes);
346   CHRScope *findScope(Region *R);
347   void checkScopeHoistable(CHRScope *Scope);
348 
349   void splitScopes(SmallVectorImpl<CHRScope *> &Input,
350                    SmallVectorImpl<CHRScope *> &Output);
351   SmallVector<CHRScope *, 8> splitScope(CHRScope *Scope,
352                                         CHRScope *Outer,
353                                         DenseSet<Value *> *OuterConditionValues,
354                                         Instruction *OuterInsertPoint,
355                                         SmallVectorImpl<CHRScope *> &Output,
356                                         DenseSet<Instruction *> &Unhoistables);
357 
358   void classifyBiasedScopes(SmallVectorImpl<CHRScope *> &Scopes);
359   void classifyBiasedScopes(CHRScope *Scope, CHRScope *OutermostScope);
360 
361   void filterScopes(SmallVectorImpl<CHRScope *> &Input,
362                     SmallVectorImpl<CHRScope *> &Output);
363 
364   void setCHRRegions(SmallVectorImpl<CHRScope *> &Input,
365                      SmallVectorImpl<CHRScope *> &Output);
366   void setCHRRegions(CHRScope *Scope, CHRScope *OutermostScope);
367 
368   void sortScopes(SmallVectorImpl<CHRScope *> &Input,
369                   SmallVectorImpl<CHRScope *> &Output);
370 
371   void transformScopes(SmallVectorImpl<CHRScope *> &CHRScopes);
372   void transformScopes(CHRScope *Scope, DenseSet<PHINode *> &TrivialPHIs);
373   void cloneScopeBlocks(CHRScope *Scope,
374                         BasicBlock *PreEntryBlock,
375                         BasicBlock *ExitBlock,
376                         Region *LastRegion,
377                         ValueToValueMapTy &VMap);
378   BranchInst *createMergedBranch(BasicBlock *PreEntryBlock,
379                                  BasicBlock *EntryBlock,
380                                  BasicBlock *NewEntryBlock,
381                                  ValueToValueMapTy &VMap);
382   void fixupBranchesAndSelects(CHRScope *Scope,
383                                BasicBlock *PreEntryBlock,
384                                BranchInst *MergedBR,
385                                uint64_t ProfileCount);
386   void fixupBranch(Region *R,
387                    CHRScope *Scope,
388                    IRBuilder<> &IRB,
389                    Value *&MergedCondition, BranchProbability &CHRBranchBias);
390   void fixupSelect(SelectInst* SI,
391                    CHRScope *Scope,
392                    IRBuilder<> &IRB,
393                    Value *&MergedCondition, BranchProbability &CHRBranchBias);
394   void addToMergedCondition(bool IsTrueBiased, Value *Cond,
395                             Instruction *BranchOrSelect,
396                             CHRScope *Scope,
397                             IRBuilder<> &IRB,
398                             Value *&MergedCondition);
399 
400   Function &F;
401   BlockFrequencyInfo &BFI;
402   DominatorTree &DT;
403   ProfileSummaryInfo &PSI;
404   RegionInfo &RI;
405   OptimizationRemarkEmitter &ORE;
406   CHRStats Stats;
407 
408   // All the true-biased regions in the function
409   DenseSet<Region *> TrueBiasedRegionsGlobal;
410   // All the false-biased regions in the function
411   DenseSet<Region *> FalseBiasedRegionsGlobal;
412   // All the true-biased selects in the function
413   DenseSet<SelectInst *> TrueBiasedSelectsGlobal;
414   // All the false-biased selects in the function
415   DenseSet<SelectInst *> FalseBiasedSelectsGlobal;
416   // A map from biased regions to their branch bias
417   DenseMap<Region *, BranchProbability> BranchBiasMap;
418   // A map from biased selects to their branch bias
419   DenseMap<SelectInst *, BranchProbability> SelectBiasMap;
420   // All the scopes.
421   DenseSet<CHRScope *> Scopes;
422 };
423 
424 } // end anonymous namespace
425 
426 static inline
operator <<(raw_ostream & OS,const CHRStats & Stats)427 raw_ostream LLVM_ATTRIBUTE_UNUSED &operator<<(raw_ostream &OS,
428                                               const CHRStats &Stats) {
429   Stats.print(OS);
430   return OS;
431 }
432 
433 static inline
operator <<(raw_ostream & OS,const CHRScope & Scope)434 raw_ostream &operator<<(raw_ostream &OS, const CHRScope &Scope) {
435   Scope.print(OS);
436   return OS;
437 }
438 
shouldApply(Function & F,ProfileSummaryInfo & PSI)439 static bool shouldApply(Function &F, ProfileSummaryInfo& PSI) {
440   if (ForceCHR)
441     return true;
442 
443   if (!CHRModuleList.empty() || !CHRFunctionList.empty()) {
444     if (CHRModules.count(F.getParent()->getName()))
445       return true;
446     return CHRFunctions.count(F.getName());
447   }
448 
449   assert(PSI.hasProfileSummary() && "Empty PSI?");
450   return PSI.isFunctionEntryHot(&F);
451 }
452 
dumpIR(Function & F,const char * Label,CHRStats * Stats)453 static void LLVM_ATTRIBUTE_UNUSED dumpIR(Function &F, const char *Label,
454                                          CHRStats *Stats) {
455   StringRef FuncName = F.getName();
456   StringRef ModuleName = F.getParent()->getName();
457   (void)(FuncName); // Unused in release build.
458   (void)(ModuleName); // Unused in release build.
459   CHR_DEBUG(dbgs() << "CHR IR dump " << Label << " " << ModuleName << " "
460             << FuncName);
461   if (Stats)
462     CHR_DEBUG(dbgs() << " " << *Stats);
463   CHR_DEBUG(dbgs() << "\n");
464   CHR_DEBUG(F.dump());
465 }
466 
print(raw_ostream & OS) const467 void CHRScope::print(raw_ostream &OS) const {
468   assert(RegInfos.size() > 0 && "Empty CHRScope");
469   OS << "CHRScope[";
470   OS << RegInfos.size() << ", Regions[";
471   for (const RegInfo &RI : RegInfos) {
472     OS << RI.R->getNameStr();
473     if (RI.HasBranch)
474       OS << " B";
475     if (RI.Selects.size() > 0)
476       OS << " S" << RI.Selects.size();
477     OS << ", ";
478   }
479   if (RegInfos[0].R->getParent()) {
480     OS << "], Parent " << RegInfos[0].R->getParent()->getNameStr();
481   } else {
482     // top level region
483     OS << "]";
484   }
485   OS << ", Subs[";
486   for (CHRScope *Sub : Subs) {
487     OS << *Sub << ", ";
488   }
489   OS << "]]";
490 }
491 
492 // Return true if the given instruction type can be hoisted by CHR.
isHoistableInstructionType(Instruction * I)493 static bool isHoistableInstructionType(Instruction *I) {
494   return isa<BinaryOperator>(I) || isa<CastInst>(I) || isa<SelectInst>(I) ||
495       isa<GetElementPtrInst>(I) || isa<CmpInst>(I) ||
496       isa<InsertElementInst>(I) || isa<ExtractElementInst>(I) ||
497       isa<ShuffleVectorInst>(I) || isa<ExtractValueInst>(I) ||
498       isa<InsertValueInst>(I);
499 }
500 
501 // Return true if the given instruction can be hoisted by CHR.
isHoistable(Instruction * I,DominatorTree & DT)502 static bool isHoistable(Instruction *I, DominatorTree &DT) {
503   if (!isHoistableInstructionType(I))
504     return false;
505   return isSafeToSpeculativelyExecute(I, nullptr, &DT);
506 }
507 
508 // Recursively traverse the use-def chains of the given value and return a set
509 // of the unhoistable base values defined within the scope (excluding the
510 // first-region entry block) or the (hoistable or unhoistable) base values that
511 // are defined outside (including the first-region entry block) of the
512 // scope. The returned set doesn't include constants.
513 static const std::set<Value *> &
getBaseValues(Value * V,DominatorTree & DT,DenseMap<Value *,std::set<Value * >> & Visited)514 getBaseValues(Value *V, DominatorTree &DT,
515               DenseMap<Value *, std::set<Value *>> &Visited) {
516   auto It = Visited.find(V);
517   if (It != Visited.end()) {
518     return It->second;
519   }
520   std::set<Value *> Result;
521   if (auto *I = dyn_cast<Instruction>(V)) {
522     // We don't stop at a block that's not in the Scope because we would miss
523     // some instructions that are based on the same base values if we stop
524     // there.
525     if (!isHoistable(I, DT)) {
526       Result.insert(I);
527       return Visited.insert(std::make_pair(V, std::move(Result))).first->second;
528     }
529     // I is hoistable above the Scope.
530     for (Value *Op : I->operands()) {
531       const std::set<Value *> &OpResult = getBaseValues(Op, DT, Visited);
532       Result.insert(OpResult.begin(), OpResult.end());
533     }
534     return Visited.insert(std::make_pair(V, std::move(Result))).first->second;
535   }
536   if (isa<Argument>(V)) {
537     Result.insert(V);
538   }
539   // We don't include others like constants because those won't lead to any
540   // chance of folding of conditions (eg two bit checks merged into one check)
541   // after CHR.
542   return Visited.insert(std::make_pair(V, std::move(Result))).first->second;
543 }
544 
545 // Return true if V is already hoisted or can be hoisted (along with its
546 // operands) above the insert point. When it returns true and HoistStops is
547 // non-null, the instructions to stop hoisting at through the use-def chains are
548 // inserted into HoistStops.
549 static bool
checkHoistValue(Value * V,Instruction * InsertPoint,DominatorTree & DT,DenseSet<Instruction * > & Unhoistables,DenseSet<Instruction * > * HoistStops,DenseMap<Instruction *,bool> & Visited)550 checkHoistValue(Value *V, Instruction *InsertPoint, DominatorTree &DT,
551                 DenseSet<Instruction *> &Unhoistables,
552                 DenseSet<Instruction *> *HoistStops,
553                 DenseMap<Instruction *, bool> &Visited) {
554   assert(InsertPoint && "Null InsertPoint");
555   if (auto *I = dyn_cast<Instruction>(V)) {
556     auto It = Visited.find(I);
557     if (It != Visited.end()) {
558       return It->second;
559     }
560     assert(DT.getNode(I->getParent()) && "DT must contain I's parent block");
561     assert(DT.getNode(InsertPoint->getParent()) && "DT must contain Destination");
562     if (Unhoistables.count(I)) {
563       // Don't hoist if they are not to be hoisted.
564       Visited[I] = false;
565       return false;
566     }
567     if (DT.dominates(I, InsertPoint)) {
568       // We are already above the insert point. Stop here.
569       if (HoistStops)
570         HoistStops->insert(I);
571       Visited[I] = true;
572       return true;
573     }
574     // We aren't not above the insert point, check if we can hoist it above the
575     // insert point.
576     if (isHoistable(I, DT)) {
577       // Check operands first.
578       DenseSet<Instruction *> OpsHoistStops;
579       bool AllOpsHoisted = true;
580       for (Value *Op : I->operands()) {
581         if (!checkHoistValue(Op, InsertPoint, DT, Unhoistables, &OpsHoistStops,
582                              Visited)) {
583           AllOpsHoisted = false;
584           break;
585         }
586       }
587       if (AllOpsHoisted) {
588         CHR_DEBUG(dbgs() << "checkHoistValue " << *I << "\n");
589         if (HoistStops)
590           HoistStops->insert(OpsHoistStops.begin(), OpsHoistStops.end());
591         Visited[I] = true;
592         return true;
593       }
594     }
595     Visited[I] = false;
596     return false;
597   }
598   // Non-instructions are considered hoistable.
599   return true;
600 }
601 
602 // Returns true and sets the true probability and false probability of an
603 // MD_prof metadata if it's well-formed.
checkMDProf(MDNode * MD,BranchProbability & TrueProb,BranchProbability & FalseProb)604 static bool checkMDProf(MDNode *MD, BranchProbability &TrueProb,
605                         BranchProbability &FalseProb) {
606   if (!MD) return false;
607   MDString *MDName = cast<MDString>(MD->getOperand(0));
608   if (MDName->getString() != "branch_weights" ||
609       MD->getNumOperands() != 3)
610     return false;
611   ConstantInt *TrueWeight = mdconst::extract<ConstantInt>(MD->getOperand(1));
612   ConstantInt *FalseWeight = mdconst::extract<ConstantInt>(MD->getOperand(2));
613   if (!TrueWeight || !FalseWeight)
614     return false;
615   uint64_t TrueWt = TrueWeight->getValue().getZExtValue();
616   uint64_t FalseWt = FalseWeight->getValue().getZExtValue();
617   uint64_t SumWt = TrueWt + FalseWt;
618 
619   assert(SumWt >= TrueWt && SumWt >= FalseWt &&
620          "Overflow calculating branch probabilities.");
621 
622   // Guard against 0-to-0 branch weights to avoid a division-by-zero crash.
623   if (SumWt == 0)
624     return false;
625 
626   TrueProb = BranchProbability::getBranchProbability(TrueWt, SumWt);
627   FalseProb = BranchProbability::getBranchProbability(FalseWt, SumWt);
628   return true;
629 }
630 
getCHRBiasThreshold()631 static BranchProbability getCHRBiasThreshold() {
632   return BranchProbability::getBranchProbability(
633       static_cast<uint64_t>(CHRBiasThreshold * 1000000), 1000000);
634 }
635 
636 // A helper for CheckBiasedBranch and CheckBiasedSelect. If TrueProb >=
637 // CHRBiasThreshold, put Key into TrueSet and return true. If FalseProb >=
638 // CHRBiasThreshold, put Key into FalseSet and return true. Otherwise, return
639 // false.
640 template <typename K, typename S, typename M>
checkBias(K * Key,BranchProbability TrueProb,BranchProbability FalseProb,S & TrueSet,S & FalseSet,M & BiasMap)641 static bool checkBias(K *Key, BranchProbability TrueProb,
642                       BranchProbability FalseProb, S &TrueSet, S &FalseSet,
643                       M &BiasMap) {
644   BranchProbability Threshold = getCHRBiasThreshold();
645   if (TrueProb >= Threshold) {
646     TrueSet.insert(Key);
647     BiasMap[Key] = TrueProb;
648     return true;
649   } else if (FalseProb >= Threshold) {
650     FalseSet.insert(Key);
651     BiasMap[Key] = FalseProb;
652     return true;
653   }
654   return false;
655 }
656 
657 // Returns true and insert a region into the right biased set and the map if the
658 // branch of the region is biased.
checkBiasedBranch(BranchInst * BI,Region * R,DenseSet<Region * > & TrueBiasedRegionsGlobal,DenseSet<Region * > & FalseBiasedRegionsGlobal,DenseMap<Region *,BranchProbability> & BranchBiasMap)659 static bool checkBiasedBranch(BranchInst *BI, Region *R,
660                               DenseSet<Region *> &TrueBiasedRegionsGlobal,
661                               DenseSet<Region *> &FalseBiasedRegionsGlobal,
662                               DenseMap<Region *, BranchProbability> &BranchBiasMap) {
663   if (!BI->isConditional())
664     return false;
665   BranchProbability ThenProb, ElseProb;
666   if (!checkMDProf(BI->getMetadata(LLVMContext::MD_prof),
667                    ThenProb, ElseProb))
668     return false;
669   BasicBlock *IfThen = BI->getSuccessor(0);
670   BasicBlock *IfElse = BI->getSuccessor(1);
671   assert((IfThen == R->getExit() || IfElse == R->getExit()) &&
672          IfThen != IfElse &&
673          "Invariant from findScopes");
674   if (IfThen == R->getExit()) {
675     // Swap them so that IfThen/ThenProb means going into the conditional code
676     // and IfElse/ElseProb means skipping it.
677     std::swap(IfThen, IfElse);
678     std::swap(ThenProb, ElseProb);
679   }
680   CHR_DEBUG(dbgs() << "BI " << *BI << " ");
681   CHR_DEBUG(dbgs() << "ThenProb " << ThenProb << " ");
682   CHR_DEBUG(dbgs() << "ElseProb " << ElseProb << "\n");
683   return checkBias(R, ThenProb, ElseProb,
684                    TrueBiasedRegionsGlobal, FalseBiasedRegionsGlobal,
685                    BranchBiasMap);
686 }
687 
688 // Returns true and insert a select into the right biased set and the map if the
689 // select is biased.
checkBiasedSelect(SelectInst * SI,Region * R,DenseSet<SelectInst * > & TrueBiasedSelectsGlobal,DenseSet<SelectInst * > & FalseBiasedSelectsGlobal,DenseMap<SelectInst *,BranchProbability> & SelectBiasMap)690 static bool checkBiasedSelect(
691     SelectInst *SI, Region *R,
692     DenseSet<SelectInst *> &TrueBiasedSelectsGlobal,
693     DenseSet<SelectInst *> &FalseBiasedSelectsGlobal,
694     DenseMap<SelectInst *, BranchProbability> &SelectBiasMap) {
695   BranchProbability TrueProb, FalseProb;
696   if (!checkMDProf(SI->getMetadata(LLVMContext::MD_prof),
697                    TrueProb, FalseProb))
698     return false;
699   CHR_DEBUG(dbgs() << "SI " << *SI << " ");
700   CHR_DEBUG(dbgs() << "TrueProb " << TrueProb << " ");
701   CHR_DEBUG(dbgs() << "FalseProb " << FalseProb << "\n");
702   return checkBias(SI, TrueProb, FalseProb,
703                    TrueBiasedSelectsGlobal, FalseBiasedSelectsGlobal,
704                    SelectBiasMap);
705 }
706 
707 // Returns the instruction at which to hoist the dependent condition values and
708 // insert the CHR branch for a region. This is the terminator branch in the
709 // entry block or the first select in the entry block, if any.
getBranchInsertPoint(RegInfo & RI)710 static Instruction* getBranchInsertPoint(RegInfo &RI) {
711   Region *R = RI.R;
712   BasicBlock *EntryBB = R->getEntry();
713   // The hoist point is by default the terminator of the entry block, which is
714   // the same as the branch instruction if RI.HasBranch is true.
715   Instruction *HoistPoint = EntryBB->getTerminator();
716   for (SelectInst *SI : RI.Selects) {
717     if (SI->getParent() == EntryBB) {
718       // Pick the first select in Selects in the entry block.  Note Selects is
719       // sorted in the instruction order within a block (asserted below).
720       HoistPoint = SI;
721       break;
722     }
723   }
724   assert(HoistPoint && "Null HoistPoint");
725 #ifndef NDEBUG
726   // Check that HoistPoint is the first one in Selects in the entry block,
727   // if any.
728   DenseSet<Instruction *> EntryBlockSelectSet;
729   for (SelectInst *SI : RI.Selects) {
730     if (SI->getParent() == EntryBB) {
731       EntryBlockSelectSet.insert(SI);
732     }
733   }
734   for (Instruction &I : *EntryBB) {
735     if (EntryBlockSelectSet.contains(&I)) {
736       assert(&I == HoistPoint &&
737              "HoistPoint must be the first one in Selects");
738       break;
739     }
740   }
741 #endif
742   return HoistPoint;
743 }
744 
745 // Find a CHR scope in the given region.
findScope(Region * R)746 CHRScope * CHR::findScope(Region *R) {
747   CHRScope *Result = nullptr;
748   BasicBlock *Entry = R->getEntry();
749   BasicBlock *Exit = R->getExit();  // null if top level.
750   assert(Entry && "Entry must not be null");
751   assert((Exit == nullptr) == (R->isTopLevelRegion()) &&
752          "Only top level region has a null exit");
753   if (Entry)
754     CHR_DEBUG(dbgs() << "Entry " << Entry->getName() << "\n");
755   else
756     CHR_DEBUG(dbgs() << "Entry null\n");
757   if (Exit)
758     CHR_DEBUG(dbgs() << "Exit " << Exit->getName() << "\n");
759   else
760     CHR_DEBUG(dbgs() << "Exit null\n");
761   // Exclude cases where Entry is part of a subregion (hence it doesn't belong
762   // to this region).
763   bool EntryInSubregion = RI.getRegionFor(Entry) != R;
764   if (EntryInSubregion)
765     return nullptr;
766   // Exclude loops
767   for (BasicBlock *Pred : predecessors(Entry))
768     if (R->contains(Pred))
769       return nullptr;
770   if (Exit) {
771     // Try to find an if-then block (check if R is an if-then).
772     // if (cond) {
773     //  ...
774     // }
775     auto *BI = dyn_cast<BranchInst>(Entry->getTerminator());
776     if (BI)
777       CHR_DEBUG(dbgs() << "BI.isConditional " << BI->isConditional() << "\n");
778     else
779       CHR_DEBUG(dbgs() << "BI null\n");
780     if (BI && BI->isConditional()) {
781       BasicBlock *S0 = BI->getSuccessor(0);
782       BasicBlock *S1 = BI->getSuccessor(1);
783       CHR_DEBUG(dbgs() << "S0 " << S0->getName() << "\n");
784       CHR_DEBUG(dbgs() << "S1 " << S1->getName() << "\n");
785       if (S0 != S1 && (S0 == Exit || S1 == Exit)) {
786         RegInfo RI(R);
787         RI.HasBranch = checkBiasedBranch(
788             BI, R, TrueBiasedRegionsGlobal, FalseBiasedRegionsGlobal,
789             BranchBiasMap);
790         Result = new CHRScope(RI);
791         Scopes.insert(Result);
792         CHR_DEBUG(dbgs() << "Found a region with a branch\n");
793         ++Stats.NumBranches;
794         if (!RI.HasBranch) {
795           ORE.emit([&]() {
796             return OptimizationRemarkMissed(DEBUG_TYPE, "BranchNotBiased", BI)
797                 << "Branch not biased";
798           });
799         }
800       }
801     }
802   }
803   {
804     // Try to look for selects in the direct child blocks (as opposed to in
805     // subregions) of R.
806     // ...
807     // if (..) { // Some subregion
808     //   ...
809     // }
810     // if (..) { // Some subregion
811     //   ...
812     // }
813     // ...
814     // a = cond ? b : c;
815     // ...
816     SmallVector<SelectInst *, 8> Selects;
817     for (RegionNode *E : R->elements()) {
818       if (E->isSubRegion())
819         continue;
820       // This returns the basic block of E if E is a direct child of R (not a
821       // subregion.)
822       BasicBlock *BB = E->getEntry();
823       // Need to push in the order to make it easier to find the first Select
824       // later.
825       for (Instruction &I : *BB) {
826         if (auto *SI = dyn_cast<SelectInst>(&I)) {
827           Selects.push_back(SI);
828           ++Stats.NumBranches;
829         }
830       }
831     }
832     if (Selects.size() > 0) {
833       auto AddSelects = [&](RegInfo &RI) {
834         for (auto *SI : Selects)
835           if (checkBiasedSelect(SI, RI.R,
836                                 TrueBiasedSelectsGlobal,
837                                 FalseBiasedSelectsGlobal,
838                                 SelectBiasMap))
839             RI.Selects.push_back(SI);
840           else
841             ORE.emit([&]() {
842               return OptimizationRemarkMissed(DEBUG_TYPE, "SelectNotBiased", SI)
843                   << "Select not biased";
844             });
845       };
846       if (!Result) {
847         CHR_DEBUG(dbgs() << "Found a select-only region\n");
848         RegInfo RI(R);
849         AddSelects(RI);
850         Result = new CHRScope(RI);
851         Scopes.insert(Result);
852       } else {
853         CHR_DEBUG(dbgs() << "Found select(s) in a region with a branch\n");
854         AddSelects(Result->RegInfos[0]);
855       }
856     }
857   }
858 
859   if (Result) {
860     checkScopeHoistable(Result);
861   }
862   return Result;
863 }
864 
865 // Check that any of the branch and the selects in the region could be
866 // hoisted above the the CHR branch insert point (the most dominating of
867 // them, either the branch (at the end of the first block) or the first
868 // select in the first block). If the branch can't be hoisted, drop the
869 // selects in the first blocks.
870 //
871 // For example, for the following scope/region with selects, we want to insert
872 // the merged branch right before the first select in the first/entry block by
873 // hoisting c1, c2, c3, and c4.
874 //
875 // // Branch insert point here.
876 // a = c1 ? b : c; // Select 1
877 // d = c2 ? e : f; // Select 2
878 // if (c3) { // Branch
879 //   ...
880 //   c4 = foo() // A call.
881 //   g = c4 ? h : i; // Select 3
882 // }
883 //
884 // But suppose we can't hoist c4 because it's dependent on the preceding
885 // call. Then, we drop Select 3. Furthermore, if we can't hoist c2, we also drop
886 // Select 2. If we can't hoist c3, we drop Selects 1 & 2.
checkScopeHoistable(CHRScope * Scope)887 void CHR::checkScopeHoistable(CHRScope *Scope) {
888   RegInfo &RI = Scope->RegInfos[0];
889   Region *R = RI.R;
890   BasicBlock *EntryBB = R->getEntry();
891   auto *Branch = RI.HasBranch ?
892                  cast<BranchInst>(EntryBB->getTerminator()) : nullptr;
893   SmallVector<SelectInst *, 8> &Selects = RI.Selects;
894   if (RI.HasBranch || !Selects.empty()) {
895     Instruction *InsertPoint = getBranchInsertPoint(RI);
896     CHR_DEBUG(dbgs() << "InsertPoint " << *InsertPoint << "\n");
897     // Avoid a data dependence from a select or a branch to a(nother)
898     // select. Note no instruction can't data-depend on a branch (a branch
899     // instruction doesn't produce a value).
900     DenseSet<Instruction *> Unhoistables;
901     // Initialize Unhoistables with the selects.
902     for (SelectInst *SI : Selects) {
903       Unhoistables.insert(SI);
904     }
905     // Remove Selects that can't be hoisted.
906     for (auto it = Selects.begin(); it != Selects.end(); ) {
907       SelectInst *SI = *it;
908       if (SI == InsertPoint) {
909         ++it;
910         continue;
911       }
912       DenseMap<Instruction *, bool> Visited;
913       bool IsHoistable = checkHoistValue(SI->getCondition(), InsertPoint,
914                                          DT, Unhoistables, nullptr, Visited);
915       if (!IsHoistable) {
916         CHR_DEBUG(dbgs() << "Dropping select " << *SI << "\n");
917         ORE.emit([&]() {
918           return OptimizationRemarkMissed(DEBUG_TYPE,
919                                           "DropUnhoistableSelect", SI)
920               << "Dropped unhoistable select";
921         });
922         it = Selects.erase(it);
923         // Since we are dropping the select here, we also drop it from
924         // Unhoistables.
925         Unhoistables.erase(SI);
926       } else
927         ++it;
928     }
929     // Update InsertPoint after potentially removing selects.
930     InsertPoint = getBranchInsertPoint(RI);
931     CHR_DEBUG(dbgs() << "InsertPoint " << *InsertPoint << "\n");
932     if (RI.HasBranch && InsertPoint != Branch) {
933       DenseMap<Instruction *, bool> Visited;
934       bool IsHoistable = checkHoistValue(Branch->getCondition(), InsertPoint,
935                                          DT, Unhoistables, nullptr, Visited);
936       if (!IsHoistable) {
937         // If the branch isn't hoistable, drop the selects in the entry
938         // block, preferring the branch, which makes the branch the hoist
939         // point.
940         assert(InsertPoint != Branch && "Branch must not be the hoist point");
941         CHR_DEBUG(dbgs() << "Dropping selects in entry block \n");
942         CHR_DEBUG(
943             for (SelectInst *SI : Selects) {
944               dbgs() << "SI " << *SI << "\n";
945             });
946         for (SelectInst *SI : Selects) {
947           ORE.emit([&]() {
948             return OptimizationRemarkMissed(DEBUG_TYPE,
949                                             "DropSelectUnhoistableBranch", SI)
950                 << "Dropped select due to unhoistable branch";
951           });
952         }
953         llvm::erase_if(Selects, [EntryBB](SelectInst *SI) {
954           return SI->getParent() == EntryBB;
955         });
956         Unhoistables.clear();
957         InsertPoint = Branch;
958       }
959     }
960     CHR_DEBUG(dbgs() << "InsertPoint " << *InsertPoint << "\n");
961 #ifndef NDEBUG
962     if (RI.HasBranch) {
963       assert(!DT.dominates(Branch, InsertPoint) &&
964              "Branch can't be already above the hoist point");
965       DenseMap<Instruction *, bool> Visited;
966       assert(checkHoistValue(Branch->getCondition(), InsertPoint,
967                              DT, Unhoistables, nullptr, Visited) &&
968              "checkHoistValue for branch");
969     }
970     for (auto *SI : Selects) {
971       assert(!DT.dominates(SI, InsertPoint) &&
972              "SI can't be already above the hoist point");
973       DenseMap<Instruction *, bool> Visited;
974       assert(checkHoistValue(SI->getCondition(), InsertPoint, DT,
975                              Unhoistables, nullptr, Visited) &&
976              "checkHoistValue for selects");
977     }
978     CHR_DEBUG(dbgs() << "Result\n");
979     if (RI.HasBranch) {
980       CHR_DEBUG(dbgs() << "BI " << *Branch << "\n");
981     }
982     for (auto *SI : Selects) {
983       CHR_DEBUG(dbgs() << "SI " << *SI << "\n");
984     }
985 #endif
986   }
987 }
988 
989 // Traverse the region tree, find all nested scopes and merge them if possible.
findScopes(Region * R,Region * NextRegion,Region * ParentRegion,SmallVectorImpl<CHRScope * > & Scopes)990 CHRScope * CHR::findScopes(Region *R, Region *NextRegion, Region *ParentRegion,
991                            SmallVectorImpl<CHRScope *> &Scopes) {
992   CHR_DEBUG(dbgs() << "findScopes " << R->getNameStr() << "\n");
993   CHRScope *Result = findScope(R);
994   // Visit subscopes.
995   CHRScope *ConsecutiveSubscope = nullptr;
996   SmallVector<CHRScope *, 8> Subscopes;
997   for (auto It = R->begin(); It != R->end(); ++It) {
998     const std::unique_ptr<Region> &SubR = *It;
999     auto NextIt = std::next(It);
1000     Region *NextSubR = NextIt != R->end() ? NextIt->get() : nullptr;
1001     CHR_DEBUG(dbgs() << "Looking at subregion " << SubR.get()->getNameStr()
1002               << "\n");
1003     CHRScope *SubCHRScope = findScopes(SubR.get(), NextSubR, R, Scopes);
1004     if (SubCHRScope) {
1005       CHR_DEBUG(dbgs() << "Subregion Scope " << *SubCHRScope << "\n");
1006     } else {
1007       CHR_DEBUG(dbgs() << "Subregion Scope null\n");
1008     }
1009     if (SubCHRScope) {
1010       if (!ConsecutiveSubscope)
1011         ConsecutiveSubscope = SubCHRScope;
1012       else if (!ConsecutiveSubscope->appendable(SubCHRScope)) {
1013         Subscopes.push_back(ConsecutiveSubscope);
1014         ConsecutiveSubscope = SubCHRScope;
1015       } else
1016         ConsecutiveSubscope->append(SubCHRScope);
1017     } else {
1018       if (ConsecutiveSubscope) {
1019         Subscopes.push_back(ConsecutiveSubscope);
1020       }
1021       ConsecutiveSubscope = nullptr;
1022     }
1023   }
1024   if (ConsecutiveSubscope) {
1025     Subscopes.push_back(ConsecutiveSubscope);
1026   }
1027   for (CHRScope *Sub : Subscopes) {
1028     if (Result) {
1029       // Combine it with the parent.
1030       Result->addSub(Sub);
1031     } else {
1032       // Push Subscopes as they won't be combined with the parent.
1033       Scopes.push_back(Sub);
1034     }
1035   }
1036   return Result;
1037 }
1038 
getCHRConditionValuesForRegion(RegInfo & RI)1039 static DenseSet<Value *> getCHRConditionValuesForRegion(RegInfo &RI) {
1040   DenseSet<Value *> ConditionValues;
1041   if (RI.HasBranch) {
1042     auto *BI = cast<BranchInst>(RI.R->getEntry()->getTerminator());
1043     ConditionValues.insert(BI->getCondition());
1044   }
1045   for (SelectInst *SI : RI.Selects) {
1046     ConditionValues.insert(SI->getCondition());
1047   }
1048   return ConditionValues;
1049 }
1050 
1051 
1052 // Determine whether to split a scope depending on the sets of the branch
1053 // condition values of the previous region and the current region. We split
1054 // (return true) it if 1) the condition values of the inner/lower scope can't be
1055 // hoisted up to the outer/upper scope, or 2) the two sets of the condition
1056 // values have an empty intersection (because the combined branch conditions
1057 // won't probably lead to a simpler combined condition).
shouldSplit(Instruction * InsertPoint,DenseSet<Value * > & PrevConditionValues,DenseSet<Value * > & ConditionValues,DominatorTree & DT,DenseSet<Instruction * > & Unhoistables)1058 static bool shouldSplit(Instruction *InsertPoint,
1059                         DenseSet<Value *> &PrevConditionValues,
1060                         DenseSet<Value *> &ConditionValues,
1061                         DominatorTree &DT,
1062                         DenseSet<Instruction *> &Unhoistables) {
1063   assert(InsertPoint && "Null InsertPoint");
1064   CHR_DEBUG(
1065       dbgs() << "shouldSplit " << *InsertPoint << " PrevConditionValues ";
1066       for (Value *V : PrevConditionValues) {
1067         dbgs() << *V << ", ";
1068       }
1069       dbgs() << " ConditionValues ";
1070       for (Value *V : ConditionValues) {
1071         dbgs() << *V << ", ";
1072       }
1073       dbgs() << "\n");
1074   // If any of Bases isn't hoistable to the hoist point, split.
1075   for (Value *V : ConditionValues) {
1076     DenseMap<Instruction *, bool> Visited;
1077     if (!checkHoistValue(V, InsertPoint, DT, Unhoistables, nullptr, Visited)) {
1078       CHR_DEBUG(dbgs() << "Split. checkHoistValue false " << *V << "\n");
1079       return true; // Not hoistable, split.
1080     }
1081   }
1082   // If PrevConditionValues or ConditionValues is empty, don't split to avoid
1083   // unnecessary splits at scopes with no branch/selects.  If
1084   // PrevConditionValues and ConditionValues don't intersect at all, split.
1085   if (!PrevConditionValues.empty() && !ConditionValues.empty()) {
1086     // Use std::set as DenseSet doesn't work with set_intersection.
1087     std::set<Value *> PrevBases, Bases;
1088     DenseMap<Value *, std::set<Value *>> Visited;
1089     for (Value *V : PrevConditionValues) {
1090       const std::set<Value *> &BaseValues = getBaseValues(V, DT, Visited);
1091       PrevBases.insert(BaseValues.begin(), BaseValues.end());
1092     }
1093     for (Value *V : ConditionValues) {
1094       const std::set<Value *> &BaseValues = getBaseValues(V, DT, Visited);
1095       Bases.insert(BaseValues.begin(), BaseValues.end());
1096     }
1097     CHR_DEBUG(
1098         dbgs() << "PrevBases ";
1099         for (Value *V : PrevBases) {
1100           dbgs() << *V << ", ";
1101         }
1102         dbgs() << " Bases ";
1103         for (Value *V : Bases) {
1104           dbgs() << *V << ", ";
1105         }
1106         dbgs() << "\n");
1107     std::vector<Value *> Intersection;
1108     std::set_intersection(PrevBases.begin(), PrevBases.end(), Bases.begin(),
1109                           Bases.end(), std::back_inserter(Intersection));
1110     if (Intersection.empty()) {
1111       // Empty intersection, split.
1112       CHR_DEBUG(dbgs() << "Split. Intersection empty\n");
1113       return true;
1114     }
1115   }
1116   CHR_DEBUG(dbgs() << "No split\n");
1117   return false;  // Don't split.
1118 }
1119 
getSelectsInScope(CHRScope * Scope,DenseSet<Instruction * > & Output)1120 static void getSelectsInScope(CHRScope *Scope,
1121                               DenseSet<Instruction *> &Output) {
1122   for (RegInfo &RI : Scope->RegInfos)
1123     for (SelectInst *SI : RI.Selects)
1124       Output.insert(SI);
1125   for (CHRScope *Sub : Scope->Subs)
1126     getSelectsInScope(Sub, Output);
1127 }
1128 
splitScopes(SmallVectorImpl<CHRScope * > & Input,SmallVectorImpl<CHRScope * > & Output)1129 void CHR::splitScopes(SmallVectorImpl<CHRScope *> &Input,
1130                       SmallVectorImpl<CHRScope *> &Output) {
1131   for (CHRScope *Scope : Input) {
1132     assert(!Scope->BranchInsertPoint &&
1133            "BranchInsertPoint must not be set");
1134     DenseSet<Instruction *> Unhoistables;
1135     getSelectsInScope(Scope, Unhoistables);
1136     splitScope(Scope, nullptr, nullptr, nullptr, Output, Unhoistables);
1137   }
1138 #ifndef NDEBUG
1139   for (CHRScope *Scope : Output) {
1140     assert(Scope->BranchInsertPoint && "BranchInsertPoint must be set");
1141   }
1142 #endif
1143 }
1144 
splitScope(CHRScope * Scope,CHRScope * Outer,DenseSet<Value * > * OuterConditionValues,Instruction * OuterInsertPoint,SmallVectorImpl<CHRScope * > & Output,DenseSet<Instruction * > & Unhoistables)1145 SmallVector<CHRScope *, 8> CHR::splitScope(
1146     CHRScope *Scope,
1147     CHRScope *Outer,
1148     DenseSet<Value *> *OuterConditionValues,
1149     Instruction *OuterInsertPoint,
1150     SmallVectorImpl<CHRScope *> &Output,
1151     DenseSet<Instruction *> &Unhoistables) {
1152   if (Outer) {
1153     assert(OuterConditionValues && "Null OuterConditionValues");
1154     assert(OuterInsertPoint && "Null OuterInsertPoint");
1155   }
1156   bool PrevSplitFromOuter = true;
1157   DenseSet<Value *> PrevConditionValues;
1158   Instruction *PrevInsertPoint = nullptr;
1159   SmallVector<CHRScope *, 8> Splits;
1160   SmallVector<bool, 8> SplitsSplitFromOuter;
1161   SmallVector<DenseSet<Value *>, 8> SplitsConditionValues;
1162   SmallVector<Instruction *, 8> SplitsInsertPoints;
1163   SmallVector<RegInfo, 8> RegInfos(Scope->RegInfos);  // Copy
1164   for (RegInfo &RI : RegInfos) {
1165     Instruction *InsertPoint = getBranchInsertPoint(RI);
1166     DenseSet<Value *> ConditionValues = getCHRConditionValuesForRegion(RI);
1167     CHR_DEBUG(
1168         dbgs() << "ConditionValues ";
1169         for (Value *V : ConditionValues) {
1170           dbgs() << *V << ", ";
1171         }
1172         dbgs() << "\n");
1173     if (RI.R == RegInfos[0].R) {
1174       // First iteration. Check to see if we should split from the outer.
1175       if (Outer) {
1176         CHR_DEBUG(dbgs() << "Outer " << *Outer << "\n");
1177         CHR_DEBUG(dbgs() << "Should split from outer at "
1178                   << RI.R->getNameStr() << "\n");
1179         if (shouldSplit(OuterInsertPoint, *OuterConditionValues,
1180                         ConditionValues, DT, Unhoistables)) {
1181           PrevConditionValues = ConditionValues;
1182           PrevInsertPoint = InsertPoint;
1183           ORE.emit([&]() {
1184             return OptimizationRemarkMissed(DEBUG_TYPE,
1185                                             "SplitScopeFromOuter",
1186                                             RI.R->getEntry()->getTerminator())
1187                 << "Split scope from outer due to unhoistable branch/select "
1188                 << "and/or lack of common condition values";
1189           });
1190         } else {
1191           // Not splitting from the outer. Use the outer bases and insert
1192           // point. Union the bases.
1193           PrevSplitFromOuter = false;
1194           PrevConditionValues = *OuterConditionValues;
1195           PrevConditionValues.insert(ConditionValues.begin(),
1196                                      ConditionValues.end());
1197           PrevInsertPoint = OuterInsertPoint;
1198         }
1199       } else {
1200         CHR_DEBUG(dbgs() << "Outer null\n");
1201         PrevConditionValues = ConditionValues;
1202         PrevInsertPoint = InsertPoint;
1203       }
1204     } else {
1205       CHR_DEBUG(dbgs() << "Should split from prev at "
1206                 << RI.R->getNameStr() << "\n");
1207       if (shouldSplit(PrevInsertPoint, PrevConditionValues, ConditionValues,
1208                       DT, Unhoistables)) {
1209         CHRScope *Tail = Scope->split(RI.R);
1210         Scopes.insert(Tail);
1211         Splits.push_back(Scope);
1212         SplitsSplitFromOuter.push_back(PrevSplitFromOuter);
1213         SplitsConditionValues.push_back(PrevConditionValues);
1214         SplitsInsertPoints.push_back(PrevInsertPoint);
1215         Scope = Tail;
1216         PrevConditionValues = ConditionValues;
1217         PrevInsertPoint = InsertPoint;
1218         PrevSplitFromOuter = true;
1219         ORE.emit([&]() {
1220           return OptimizationRemarkMissed(DEBUG_TYPE,
1221                                           "SplitScopeFromPrev",
1222                                           RI.R->getEntry()->getTerminator())
1223               << "Split scope from previous due to unhoistable branch/select "
1224               << "and/or lack of common condition values";
1225         });
1226       } else {
1227         // Not splitting. Union the bases. Keep the hoist point.
1228         PrevConditionValues.insert(ConditionValues.begin(), ConditionValues.end());
1229       }
1230     }
1231   }
1232   Splits.push_back(Scope);
1233   SplitsSplitFromOuter.push_back(PrevSplitFromOuter);
1234   SplitsConditionValues.push_back(PrevConditionValues);
1235   assert(PrevInsertPoint && "Null PrevInsertPoint");
1236   SplitsInsertPoints.push_back(PrevInsertPoint);
1237   assert(Splits.size() == SplitsConditionValues.size() &&
1238          Splits.size() == SplitsSplitFromOuter.size() &&
1239          Splits.size() == SplitsInsertPoints.size() && "Mismatching sizes");
1240   for (size_t I = 0; I < Splits.size(); ++I) {
1241     CHRScope *Split = Splits[I];
1242     DenseSet<Value *> &SplitConditionValues = SplitsConditionValues[I];
1243     Instruction *SplitInsertPoint = SplitsInsertPoints[I];
1244     SmallVector<CHRScope *, 8> NewSubs;
1245     DenseSet<Instruction *> SplitUnhoistables;
1246     getSelectsInScope(Split, SplitUnhoistables);
1247     for (CHRScope *Sub : Split->Subs) {
1248       SmallVector<CHRScope *, 8> SubSplits = splitScope(
1249           Sub, Split, &SplitConditionValues, SplitInsertPoint, Output,
1250           SplitUnhoistables);
1251       llvm::append_range(NewSubs, SubSplits);
1252     }
1253     Split->Subs = NewSubs;
1254   }
1255   SmallVector<CHRScope *, 8> Result;
1256   for (size_t I = 0; I < Splits.size(); ++I) {
1257     CHRScope *Split = Splits[I];
1258     if (SplitsSplitFromOuter[I]) {
1259       // Split from the outer.
1260       Output.push_back(Split);
1261       Split->BranchInsertPoint = SplitsInsertPoints[I];
1262       CHR_DEBUG(dbgs() << "BranchInsertPoint " << *SplitsInsertPoints[I]
1263                 << "\n");
1264     } else {
1265       // Connected to the outer.
1266       Result.push_back(Split);
1267     }
1268   }
1269   if (!Outer)
1270     assert(Result.empty() &&
1271            "If no outer (top-level), must return no nested ones");
1272   return Result;
1273 }
1274 
classifyBiasedScopes(SmallVectorImpl<CHRScope * > & Scopes)1275 void CHR::classifyBiasedScopes(SmallVectorImpl<CHRScope *> &Scopes) {
1276   for (CHRScope *Scope : Scopes) {
1277     assert(Scope->TrueBiasedRegions.empty() && Scope->FalseBiasedRegions.empty() && "Empty");
1278     classifyBiasedScopes(Scope, Scope);
1279     CHR_DEBUG(
1280         dbgs() << "classifyBiasedScopes " << *Scope << "\n";
1281         dbgs() << "TrueBiasedRegions ";
1282         for (Region *R : Scope->TrueBiasedRegions) {
1283           dbgs() << R->getNameStr() << ", ";
1284         }
1285         dbgs() << "\n";
1286         dbgs() << "FalseBiasedRegions ";
1287         for (Region *R : Scope->FalseBiasedRegions) {
1288           dbgs() << R->getNameStr() << ", ";
1289         }
1290         dbgs() << "\n";
1291         dbgs() << "TrueBiasedSelects ";
1292         for (SelectInst *SI : Scope->TrueBiasedSelects) {
1293           dbgs() << *SI << ", ";
1294         }
1295         dbgs() << "\n";
1296         dbgs() << "FalseBiasedSelects ";
1297         for (SelectInst *SI : Scope->FalseBiasedSelects) {
1298           dbgs() << *SI << ", ";
1299         }
1300         dbgs() << "\n";);
1301   }
1302 }
1303 
classifyBiasedScopes(CHRScope * Scope,CHRScope * OutermostScope)1304 void CHR::classifyBiasedScopes(CHRScope *Scope, CHRScope *OutermostScope) {
1305   for (RegInfo &RI : Scope->RegInfos) {
1306     if (RI.HasBranch) {
1307       Region *R = RI.R;
1308       if (TrueBiasedRegionsGlobal.contains(R))
1309         OutermostScope->TrueBiasedRegions.insert(R);
1310       else if (FalseBiasedRegionsGlobal.contains(R))
1311         OutermostScope->FalseBiasedRegions.insert(R);
1312       else
1313         llvm_unreachable("Must be biased");
1314     }
1315     for (SelectInst *SI : RI.Selects) {
1316       if (TrueBiasedSelectsGlobal.contains(SI))
1317         OutermostScope->TrueBiasedSelects.insert(SI);
1318       else if (FalseBiasedSelectsGlobal.contains(SI))
1319         OutermostScope->FalseBiasedSelects.insert(SI);
1320       else
1321         llvm_unreachable("Must be biased");
1322     }
1323   }
1324   for (CHRScope *Sub : Scope->Subs) {
1325     classifyBiasedScopes(Sub, OutermostScope);
1326   }
1327 }
1328 
hasAtLeastTwoBiasedBranches(CHRScope * Scope)1329 static bool hasAtLeastTwoBiasedBranches(CHRScope *Scope) {
1330   unsigned NumBiased = Scope->TrueBiasedRegions.size() +
1331                        Scope->FalseBiasedRegions.size() +
1332                        Scope->TrueBiasedSelects.size() +
1333                        Scope->FalseBiasedSelects.size();
1334   return NumBiased >= CHRMergeThreshold;
1335 }
1336 
filterScopes(SmallVectorImpl<CHRScope * > & Input,SmallVectorImpl<CHRScope * > & Output)1337 void CHR::filterScopes(SmallVectorImpl<CHRScope *> &Input,
1338                        SmallVectorImpl<CHRScope *> &Output) {
1339   for (CHRScope *Scope : Input) {
1340     // Filter out the ones with only one region and no subs.
1341     if (!hasAtLeastTwoBiasedBranches(Scope)) {
1342       CHR_DEBUG(dbgs() << "Filtered out by biased branches truthy-regions "
1343                 << Scope->TrueBiasedRegions.size()
1344                 << " falsy-regions " << Scope->FalseBiasedRegions.size()
1345                 << " true-selects " << Scope->TrueBiasedSelects.size()
1346                 << " false-selects " << Scope->FalseBiasedSelects.size() << "\n");
1347       ORE.emit([&]() {
1348         return OptimizationRemarkMissed(
1349             DEBUG_TYPE,
1350             "DropScopeWithOneBranchOrSelect",
1351             Scope->RegInfos[0].R->getEntry()->getTerminator())
1352             << "Drop scope with < "
1353             << ore::NV("CHRMergeThreshold", CHRMergeThreshold)
1354             << " biased branch(es) or select(s)";
1355       });
1356       continue;
1357     }
1358     Output.push_back(Scope);
1359   }
1360 }
1361 
setCHRRegions(SmallVectorImpl<CHRScope * > & Input,SmallVectorImpl<CHRScope * > & Output)1362 void CHR::setCHRRegions(SmallVectorImpl<CHRScope *> &Input,
1363                         SmallVectorImpl<CHRScope *> &Output) {
1364   for (CHRScope *Scope : Input) {
1365     assert(Scope->HoistStopMap.empty() && Scope->CHRRegions.empty() &&
1366            "Empty");
1367     setCHRRegions(Scope, Scope);
1368     Output.push_back(Scope);
1369     CHR_DEBUG(
1370         dbgs() << "setCHRRegions HoistStopMap " << *Scope << "\n";
1371         for (auto pair : Scope->HoistStopMap) {
1372           Region *R = pair.first;
1373           dbgs() << "Region " << R->getNameStr() << "\n";
1374           for (Instruction *I : pair.second) {
1375             dbgs() << "HoistStop " << *I << "\n";
1376           }
1377         }
1378         dbgs() << "CHRRegions" << "\n";
1379         for (RegInfo &RI : Scope->CHRRegions) {
1380           dbgs() << RI.R->getNameStr() << "\n";
1381         });
1382   }
1383 }
1384 
setCHRRegions(CHRScope * Scope,CHRScope * OutermostScope)1385 void CHR::setCHRRegions(CHRScope *Scope, CHRScope *OutermostScope) {
1386   DenseSet<Instruction *> Unhoistables;
1387   // Put the biased selects in Unhoistables because they should stay where they
1388   // are and constant-folded after CHR (in case one biased select or a branch
1389   // can depend on another biased select.)
1390   for (RegInfo &RI : Scope->RegInfos) {
1391     for (SelectInst *SI : RI.Selects) {
1392       Unhoistables.insert(SI);
1393     }
1394   }
1395   Instruction *InsertPoint = OutermostScope->BranchInsertPoint;
1396   for (RegInfo &RI : Scope->RegInfos) {
1397     Region *R = RI.R;
1398     DenseSet<Instruction *> HoistStops;
1399     bool IsHoisted = false;
1400     if (RI.HasBranch) {
1401       assert((OutermostScope->TrueBiasedRegions.contains(R) ||
1402               OutermostScope->FalseBiasedRegions.contains(R)) &&
1403              "Must be truthy or falsy");
1404       auto *BI = cast<BranchInst>(R->getEntry()->getTerminator());
1405       // Note checkHoistValue fills in HoistStops.
1406       DenseMap<Instruction *, bool> Visited;
1407       bool IsHoistable = checkHoistValue(BI->getCondition(), InsertPoint, DT,
1408                                          Unhoistables, &HoistStops, Visited);
1409       assert(IsHoistable && "Must be hoistable");
1410       (void)(IsHoistable);  // Unused in release build
1411       IsHoisted = true;
1412     }
1413     for (SelectInst *SI : RI.Selects) {
1414       assert((OutermostScope->TrueBiasedSelects.contains(SI) ||
1415               OutermostScope->FalseBiasedSelects.contains(SI)) &&
1416              "Must be true or false biased");
1417       // Note checkHoistValue fills in HoistStops.
1418       DenseMap<Instruction *, bool> Visited;
1419       bool IsHoistable = checkHoistValue(SI->getCondition(), InsertPoint, DT,
1420                                          Unhoistables, &HoistStops, Visited);
1421       assert(IsHoistable && "Must be hoistable");
1422       (void)(IsHoistable);  // Unused in release build
1423       IsHoisted = true;
1424     }
1425     if (IsHoisted) {
1426       OutermostScope->CHRRegions.push_back(RI);
1427       OutermostScope->HoistStopMap[R] = HoistStops;
1428     }
1429   }
1430   for (CHRScope *Sub : Scope->Subs)
1431     setCHRRegions(Sub, OutermostScope);
1432 }
1433 
CHRScopeSorter(CHRScope * Scope1,CHRScope * Scope2)1434 static bool CHRScopeSorter(CHRScope *Scope1, CHRScope *Scope2) {
1435   return Scope1->RegInfos[0].R->getDepth() < Scope2->RegInfos[0].R->getDepth();
1436 }
1437 
sortScopes(SmallVectorImpl<CHRScope * > & Input,SmallVectorImpl<CHRScope * > & Output)1438 void CHR::sortScopes(SmallVectorImpl<CHRScope *> &Input,
1439                      SmallVectorImpl<CHRScope *> &Output) {
1440   Output.resize(Input.size());
1441   llvm::copy(Input, Output.begin());
1442   llvm::stable_sort(Output, CHRScopeSorter);
1443 }
1444 
1445 // Return true if V is already hoisted or was hoisted (along with its operands)
1446 // to the insert point.
hoistValue(Value * V,Instruction * HoistPoint,Region * R,HoistStopMapTy & HoistStopMap,DenseSet<Instruction * > & HoistedSet,DenseSet<PHINode * > & TrivialPHIs,DominatorTree & DT)1447 static void hoistValue(Value *V, Instruction *HoistPoint, Region *R,
1448                        HoistStopMapTy &HoistStopMap,
1449                        DenseSet<Instruction *> &HoistedSet,
1450                        DenseSet<PHINode *> &TrivialPHIs,
1451                        DominatorTree &DT) {
1452   auto IT = HoistStopMap.find(R);
1453   assert(IT != HoistStopMap.end() && "Region must be in hoist stop map");
1454   DenseSet<Instruction *> &HoistStops = IT->second;
1455   if (auto *I = dyn_cast<Instruction>(V)) {
1456     if (I == HoistPoint)
1457       return;
1458     if (HoistStops.count(I))
1459       return;
1460     if (auto *PN = dyn_cast<PHINode>(I))
1461       if (TrivialPHIs.count(PN))
1462         // The trivial phi inserted by the previous CHR scope could replace a
1463         // non-phi in HoistStops. Note that since this phi is at the exit of a
1464         // previous CHR scope, which dominates this scope, it's safe to stop
1465         // hoisting there.
1466         return;
1467     if (HoistedSet.count(I))
1468       // Already hoisted, return.
1469       return;
1470     assert(isHoistableInstructionType(I) && "Unhoistable instruction type");
1471     assert(DT.getNode(I->getParent()) && "DT must contain I's block");
1472     assert(DT.getNode(HoistPoint->getParent()) &&
1473            "DT must contain HoistPoint block");
1474     if (DT.dominates(I, HoistPoint))
1475       // We are already above the hoist point. Stop here. This may be necessary
1476       // when multiple scopes would independently hoist the same
1477       // instruction. Since an outer (dominating) scope would hoist it to its
1478       // entry before an inner (dominated) scope would to its entry, the inner
1479       // scope may see the instruction already hoisted, in which case it
1480       // potentially wrong for the inner scope to hoist it and could cause bad
1481       // IR (non-dominating def), but safe to skip hoisting it instead because
1482       // it's already in a block that dominates the inner scope.
1483       return;
1484     for (Value *Op : I->operands()) {
1485       hoistValue(Op, HoistPoint, R, HoistStopMap, HoistedSet, TrivialPHIs, DT);
1486     }
1487     I->moveBefore(HoistPoint);
1488     HoistedSet.insert(I);
1489     CHR_DEBUG(dbgs() << "hoistValue " << *I << "\n");
1490   }
1491 }
1492 
1493 // Hoist the dependent condition values of the branches and the selects in the
1494 // scope to the insert point.
hoistScopeConditions(CHRScope * Scope,Instruction * HoistPoint,DenseSet<PHINode * > & TrivialPHIs,DominatorTree & DT)1495 static void hoistScopeConditions(CHRScope *Scope, Instruction *HoistPoint,
1496                                  DenseSet<PHINode *> &TrivialPHIs,
1497                                  DominatorTree &DT) {
1498   DenseSet<Instruction *> HoistedSet;
1499   for (const RegInfo &RI : Scope->CHRRegions) {
1500     Region *R = RI.R;
1501     bool IsTrueBiased = Scope->TrueBiasedRegions.count(R);
1502     bool IsFalseBiased = Scope->FalseBiasedRegions.count(R);
1503     if (RI.HasBranch && (IsTrueBiased || IsFalseBiased)) {
1504       auto *BI = cast<BranchInst>(R->getEntry()->getTerminator());
1505       hoistValue(BI->getCondition(), HoistPoint, R, Scope->HoistStopMap,
1506                  HoistedSet, TrivialPHIs, DT);
1507     }
1508     for (SelectInst *SI : RI.Selects) {
1509       bool IsTrueBiased = Scope->TrueBiasedSelects.count(SI);
1510       bool IsFalseBiased = Scope->FalseBiasedSelects.count(SI);
1511       if (!(IsTrueBiased || IsFalseBiased))
1512         continue;
1513       hoistValue(SI->getCondition(), HoistPoint, R, Scope->HoistStopMap,
1514                  HoistedSet, TrivialPHIs, DT);
1515     }
1516   }
1517 }
1518 
1519 // Negate the predicate if an ICmp if it's used only by branches or selects by
1520 // swapping the operands of the branches or the selects. Returns true if success.
negateICmpIfUsedByBranchOrSelectOnly(ICmpInst * ICmp,Instruction * ExcludedUser,CHRScope * Scope)1521 static bool negateICmpIfUsedByBranchOrSelectOnly(ICmpInst *ICmp,
1522                                                  Instruction *ExcludedUser,
1523                                                  CHRScope *Scope) {
1524   for (User *U : ICmp->users()) {
1525     if (U == ExcludedUser)
1526       continue;
1527     if (isa<BranchInst>(U) && cast<BranchInst>(U)->isConditional())
1528       continue;
1529     if (isa<SelectInst>(U) && cast<SelectInst>(U)->getCondition() == ICmp)
1530       continue;
1531     return false;
1532   }
1533   for (User *U : ICmp->users()) {
1534     if (U == ExcludedUser)
1535       continue;
1536     if (auto *BI = dyn_cast<BranchInst>(U)) {
1537       assert(BI->isConditional() && "Must be conditional");
1538       BI->swapSuccessors();
1539       // Don't need to swap this in terms of
1540       // TrueBiasedRegions/FalseBiasedRegions because true-based/false-based
1541       // mean whehter the branch is likely go into the if-then rather than
1542       // successor0/successor1 and because we can tell which edge is the then or
1543       // the else one by comparing the destination to the region exit block.
1544       continue;
1545     }
1546     if (auto *SI = dyn_cast<SelectInst>(U)) {
1547       // Swap operands
1548       SI->swapValues();
1549       SI->swapProfMetadata();
1550       if (Scope->TrueBiasedSelects.count(SI)) {
1551         assert(Scope->FalseBiasedSelects.count(SI) == 0 &&
1552                "Must not be already in");
1553         Scope->FalseBiasedSelects.insert(SI);
1554       } else if (Scope->FalseBiasedSelects.count(SI)) {
1555         assert(Scope->TrueBiasedSelects.count(SI) == 0 &&
1556                "Must not be already in");
1557         Scope->TrueBiasedSelects.insert(SI);
1558       }
1559       continue;
1560     }
1561     llvm_unreachable("Must be a branch or a select");
1562   }
1563   ICmp->setPredicate(CmpInst::getInversePredicate(ICmp->getPredicate()));
1564   return true;
1565 }
1566 
1567 // A helper for transformScopes. Insert a trivial phi at the scope exit block
1568 // for a value that's defined in the scope but used outside it (meaning it's
1569 // alive at the exit block).
insertTrivialPHIs(CHRScope * Scope,BasicBlock * EntryBlock,BasicBlock * ExitBlock,DenseSet<PHINode * > & TrivialPHIs)1570 static void insertTrivialPHIs(CHRScope *Scope,
1571                               BasicBlock *EntryBlock, BasicBlock *ExitBlock,
1572                               DenseSet<PHINode *> &TrivialPHIs) {
1573   SmallSetVector<BasicBlock *, 8> BlocksInScope;
1574   for (RegInfo &RI : Scope->RegInfos) {
1575     for (BasicBlock *BB : RI.R->blocks()) { // This includes the blocks in the
1576                                             // sub-Scopes.
1577       BlocksInScope.insert(BB);
1578     }
1579   }
1580   CHR_DEBUG({
1581     dbgs() << "Inserting redundant phis\n";
1582     for (BasicBlock *BB : BlocksInScope)
1583       dbgs() << "BlockInScope " << BB->getName() << "\n";
1584   });
1585   for (BasicBlock *BB : BlocksInScope) {
1586     for (Instruction &I : *BB) {
1587       SmallVector<Instruction *, 8> Users;
1588       for (User *U : I.users()) {
1589         if (auto *UI = dyn_cast<Instruction>(U)) {
1590           if (BlocksInScope.count(UI->getParent()) == 0 &&
1591               // Unless there's already a phi for I at the exit block.
1592               !(isa<PHINode>(UI) && UI->getParent() == ExitBlock)) {
1593             CHR_DEBUG(dbgs() << "V " << I << "\n");
1594             CHR_DEBUG(dbgs() << "Used outside scope by user " << *UI << "\n");
1595             Users.push_back(UI);
1596           } else if (UI->getParent() == EntryBlock && isa<PHINode>(UI)) {
1597             // There's a loop backedge from a block that's dominated by this
1598             // scope to the entry block.
1599             CHR_DEBUG(dbgs() << "V " << I << "\n");
1600             CHR_DEBUG(dbgs()
1601                       << "Used at entry block (for a back edge) by a phi user "
1602                       << *UI << "\n");
1603             Users.push_back(UI);
1604           }
1605         }
1606       }
1607       if (Users.size() > 0) {
1608         // Insert a trivial phi for I (phi [&I, P0], [&I, P1], ...) at
1609         // ExitBlock. Replace I with the new phi in UI unless UI is another
1610         // phi at ExitBlock.
1611         PHINode *PN = PHINode::Create(I.getType(), pred_size(ExitBlock), "",
1612                                       &ExitBlock->front());
1613         for (BasicBlock *Pred : predecessors(ExitBlock)) {
1614           PN->addIncoming(&I, Pred);
1615         }
1616         TrivialPHIs.insert(PN);
1617         CHR_DEBUG(dbgs() << "Insert phi " << *PN << "\n");
1618         for (Instruction *UI : Users) {
1619           for (unsigned J = 0, NumOps = UI->getNumOperands(); J < NumOps; ++J) {
1620             if (UI->getOperand(J) == &I) {
1621               UI->setOperand(J, PN);
1622             }
1623           }
1624           CHR_DEBUG(dbgs() << "Updated user " << *UI << "\n");
1625         }
1626       }
1627     }
1628   }
1629 }
1630 
1631 // Assert that all the CHR regions of the scope have a biased branch or select.
1632 static void LLVM_ATTRIBUTE_UNUSED
assertCHRRegionsHaveBiasedBranchOrSelect(CHRScope * Scope)1633 assertCHRRegionsHaveBiasedBranchOrSelect(CHRScope *Scope) {
1634 #ifndef NDEBUG
1635   auto HasBiasedBranchOrSelect = [](RegInfo &RI, CHRScope *Scope) {
1636     if (Scope->TrueBiasedRegions.count(RI.R) ||
1637         Scope->FalseBiasedRegions.count(RI.R))
1638       return true;
1639     for (SelectInst *SI : RI.Selects)
1640       if (Scope->TrueBiasedSelects.count(SI) ||
1641           Scope->FalseBiasedSelects.count(SI))
1642         return true;
1643     return false;
1644   };
1645   for (RegInfo &RI : Scope->CHRRegions) {
1646     assert(HasBiasedBranchOrSelect(RI, Scope) &&
1647            "Must have biased branch or select");
1648   }
1649 #endif
1650 }
1651 
1652 // Assert that all the condition values of the biased branches and selects have
1653 // been hoisted to the pre-entry block or outside of the scope.
assertBranchOrSelectConditionHoisted(CHRScope * Scope,BasicBlock * PreEntryBlock)1654 static void LLVM_ATTRIBUTE_UNUSED assertBranchOrSelectConditionHoisted(
1655     CHRScope *Scope, BasicBlock *PreEntryBlock) {
1656   CHR_DEBUG(dbgs() << "Biased regions condition values \n");
1657   for (RegInfo &RI : Scope->CHRRegions) {
1658     Region *R = RI.R;
1659     bool IsTrueBiased = Scope->TrueBiasedRegions.count(R);
1660     bool IsFalseBiased = Scope->FalseBiasedRegions.count(R);
1661     if (RI.HasBranch && (IsTrueBiased || IsFalseBiased)) {
1662       auto *BI = cast<BranchInst>(R->getEntry()->getTerminator());
1663       Value *V = BI->getCondition();
1664       CHR_DEBUG(dbgs() << *V << "\n");
1665       if (auto *I = dyn_cast<Instruction>(V)) {
1666         (void)(I); // Unused in release build.
1667         assert((I->getParent() == PreEntryBlock ||
1668                 !Scope->contains(I)) &&
1669                "Must have been hoisted to PreEntryBlock or outside the scope");
1670       }
1671     }
1672     for (SelectInst *SI : RI.Selects) {
1673       bool IsTrueBiased = Scope->TrueBiasedSelects.count(SI);
1674       bool IsFalseBiased = Scope->FalseBiasedSelects.count(SI);
1675       if (!(IsTrueBiased || IsFalseBiased))
1676         continue;
1677       Value *V = SI->getCondition();
1678       CHR_DEBUG(dbgs() << *V << "\n");
1679       if (auto *I = dyn_cast<Instruction>(V)) {
1680         (void)(I); // Unused in release build.
1681         assert((I->getParent() == PreEntryBlock ||
1682                 !Scope->contains(I)) &&
1683                "Must have been hoisted to PreEntryBlock or outside the scope");
1684       }
1685     }
1686   }
1687 }
1688 
transformScopes(CHRScope * Scope,DenseSet<PHINode * > & TrivialPHIs)1689 void CHR::transformScopes(CHRScope *Scope, DenseSet<PHINode *> &TrivialPHIs) {
1690   CHR_DEBUG(dbgs() << "transformScopes " << *Scope << "\n");
1691 
1692   assert(Scope->RegInfos.size() >= 1 && "Should have at least one Region");
1693   Region *FirstRegion = Scope->RegInfos[0].R;
1694   BasicBlock *EntryBlock = FirstRegion->getEntry();
1695   Region *LastRegion = Scope->RegInfos[Scope->RegInfos.size() - 1].R;
1696   BasicBlock *ExitBlock = LastRegion->getExit();
1697   Optional<uint64_t> ProfileCount = BFI.getBlockProfileCount(EntryBlock);
1698 
1699   if (ExitBlock) {
1700     // Insert a trivial phi at the exit block (where the CHR hot path and the
1701     // cold path merges) for a value that's defined in the scope but used
1702     // outside it (meaning it's alive at the exit block). We will add the
1703     // incoming values for the CHR cold paths to it below. Without this, we'd
1704     // miss updating phi's for such values unless there happens to already be a
1705     // phi for that value there.
1706     insertTrivialPHIs(Scope, EntryBlock, ExitBlock, TrivialPHIs);
1707   }
1708 
1709   // Split the entry block of the first region. The new block becomes the new
1710   // entry block of the first region. The old entry block becomes the block to
1711   // insert the CHR branch into. Note DT gets updated. Since DT gets updated
1712   // through the split, we update the entry of the first region after the split,
1713   // and Region only points to the entry and the exit blocks, rather than
1714   // keeping everything in a list or set, the blocks membership and the
1715   // entry/exit blocks of the region are still valid after the split.
1716   CHR_DEBUG(dbgs() << "Splitting entry block " << EntryBlock->getName()
1717             << " at " << *Scope->BranchInsertPoint << "\n");
1718   BasicBlock *NewEntryBlock =
1719       SplitBlock(EntryBlock, Scope->BranchInsertPoint, &DT);
1720   assert(NewEntryBlock->getSinglePredecessor() == EntryBlock &&
1721          "NewEntryBlock's only pred must be EntryBlock");
1722   FirstRegion->replaceEntryRecursive(NewEntryBlock);
1723   BasicBlock *PreEntryBlock = EntryBlock;
1724 
1725   ValueToValueMapTy VMap;
1726   // Clone the blocks in the scope (excluding the PreEntryBlock) to split into a
1727   // hot path (originals) and a cold path (clones) and update the PHIs at the
1728   // exit block.
1729   cloneScopeBlocks(Scope, PreEntryBlock, ExitBlock, LastRegion, VMap);
1730 
1731   // Replace the old (placeholder) branch with the new (merged) conditional
1732   // branch.
1733   BranchInst *MergedBr = createMergedBranch(PreEntryBlock, EntryBlock,
1734                                             NewEntryBlock, VMap);
1735 
1736 #ifndef NDEBUG
1737   assertCHRRegionsHaveBiasedBranchOrSelect(Scope);
1738 #endif
1739 
1740   // Hoist the conditional values of the branches/selects.
1741   hoistScopeConditions(Scope, PreEntryBlock->getTerminator(), TrivialPHIs, DT);
1742 
1743 #ifndef NDEBUG
1744   assertBranchOrSelectConditionHoisted(Scope, PreEntryBlock);
1745 #endif
1746 
1747   // Create the combined branch condition and constant-fold the branches/selects
1748   // in the hot path.
1749   fixupBranchesAndSelects(Scope, PreEntryBlock, MergedBr,
1750                           ProfileCount ? ProfileCount.getValue() : 0);
1751 }
1752 
1753 // A helper for transformScopes. Clone the blocks in the scope (excluding the
1754 // PreEntryBlock) to split into a hot path and a cold path and update the PHIs
1755 // at the exit block.
cloneScopeBlocks(CHRScope * Scope,BasicBlock * PreEntryBlock,BasicBlock * ExitBlock,Region * LastRegion,ValueToValueMapTy & VMap)1756 void CHR::cloneScopeBlocks(CHRScope *Scope,
1757                            BasicBlock *PreEntryBlock,
1758                            BasicBlock *ExitBlock,
1759                            Region *LastRegion,
1760                            ValueToValueMapTy &VMap) {
1761   // Clone all the blocks. The original blocks will be the hot-path
1762   // CHR-optimized code and the cloned blocks will be the original unoptimized
1763   // code. This is so that the block pointers from the
1764   // CHRScope/Region/RegionInfo can stay valid in pointing to the hot-path code
1765   // which CHR should apply to.
1766   SmallVector<BasicBlock*, 8> NewBlocks;
1767   for (RegInfo &RI : Scope->RegInfos)
1768     for (BasicBlock *BB : RI.R->blocks()) { // This includes the blocks in the
1769                                             // sub-Scopes.
1770       assert(BB != PreEntryBlock && "Don't copy the preetntry block");
1771       BasicBlock *NewBB = CloneBasicBlock(BB, VMap, ".nonchr", &F);
1772       NewBlocks.push_back(NewBB);
1773       VMap[BB] = NewBB;
1774     }
1775 
1776   // Place the cloned blocks right after the original blocks (right before the
1777   // exit block of.)
1778   if (ExitBlock)
1779     F.getBasicBlockList().splice(ExitBlock->getIterator(),
1780                                  F.getBasicBlockList(),
1781                                  NewBlocks[0]->getIterator(), F.end());
1782 
1783   // Update the cloned blocks/instructions to refer to themselves.
1784   for (unsigned i = 0, e = NewBlocks.size(); i != e; ++i)
1785     for (Instruction &I : *NewBlocks[i])
1786       RemapInstruction(&I, VMap,
1787                        RF_NoModuleLevelChanges | RF_IgnoreMissingLocals);
1788 
1789   // Add the cloned blocks to the PHIs of the exit blocks. ExitBlock is null for
1790   // the top-level region but we don't need to add PHIs. The trivial PHIs
1791   // inserted above will be updated here.
1792   if (ExitBlock)
1793     for (PHINode &PN : ExitBlock->phis())
1794       for (unsigned I = 0, NumOps = PN.getNumIncomingValues(); I < NumOps;
1795            ++I) {
1796         BasicBlock *Pred = PN.getIncomingBlock(I);
1797         if (LastRegion->contains(Pred)) {
1798           Value *V = PN.getIncomingValue(I);
1799           auto It = VMap.find(V);
1800           if (It != VMap.end()) V = It->second;
1801           assert(VMap.find(Pred) != VMap.end() && "Pred must have been cloned");
1802           PN.addIncoming(V, cast<BasicBlock>(VMap[Pred]));
1803         }
1804       }
1805 }
1806 
1807 // A helper for transformScope. Replace the old (placeholder) branch with the
1808 // new (merged) conditional branch.
createMergedBranch(BasicBlock * PreEntryBlock,BasicBlock * EntryBlock,BasicBlock * NewEntryBlock,ValueToValueMapTy & VMap)1809 BranchInst *CHR::createMergedBranch(BasicBlock *PreEntryBlock,
1810                                     BasicBlock *EntryBlock,
1811                                     BasicBlock *NewEntryBlock,
1812                                     ValueToValueMapTy &VMap) {
1813   BranchInst *OldBR = cast<BranchInst>(PreEntryBlock->getTerminator());
1814   assert(OldBR->isUnconditional() && OldBR->getSuccessor(0) == NewEntryBlock &&
1815          "SplitBlock did not work correctly!");
1816   assert(NewEntryBlock->getSinglePredecessor() == EntryBlock &&
1817          "NewEntryBlock's only pred must be EntryBlock");
1818   assert(VMap.find(NewEntryBlock) != VMap.end() &&
1819          "NewEntryBlock must have been copied");
1820   OldBR->dropAllReferences();
1821   OldBR->eraseFromParent();
1822   // The true predicate is a placeholder. It will be replaced later in
1823   // fixupBranchesAndSelects().
1824   BranchInst *NewBR = BranchInst::Create(NewEntryBlock,
1825                                          cast<BasicBlock>(VMap[NewEntryBlock]),
1826                                          ConstantInt::getTrue(F.getContext()));
1827   PreEntryBlock->getInstList().push_back(NewBR);
1828   assert(NewEntryBlock->getSinglePredecessor() == EntryBlock &&
1829          "NewEntryBlock's only pred must be EntryBlock");
1830   return NewBR;
1831 }
1832 
1833 // A helper for transformScopes. Create the combined branch condition and
1834 // constant-fold the branches/selects in the hot path.
fixupBranchesAndSelects(CHRScope * Scope,BasicBlock * PreEntryBlock,BranchInst * MergedBR,uint64_t ProfileCount)1835 void CHR::fixupBranchesAndSelects(CHRScope *Scope,
1836                                   BasicBlock *PreEntryBlock,
1837                                   BranchInst *MergedBR,
1838                                   uint64_t ProfileCount) {
1839   Value *MergedCondition = ConstantInt::getTrue(F.getContext());
1840   BranchProbability CHRBranchBias(1, 1);
1841   uint64_t NumCHRedBranches = 0;
1842   IRBuilder<> IRB(PreEntryBlock->getTerminator());
1843   for (RegInfo &RI : Scope->CHRRegions) {
1844     Region *R = RI.R;
1845     if (RI.HasBranch) {
1846       fixupBranch(R, Scope, IRB, MergedCondition, CHRBranchBias);
1847       ++NumCHRedBranches;
1848     }
1849     for (SelectInst *SI : RI.Selects) {
1850       fixupSelect(SI, Scope, IRB, MergedCondition, CHRBranchBias);
1851       ++NumCHRedBranches;
1852     }
1853   }
1854   Stats.NumBranchesDelta += NumCHRedBranches - 1;
1855   Stats.WeightedNumBranchesDelta += (NumCHRedBranches - 1) * ProfileCount;
1856   ORE.emit([&]() {
1857     return OptimizationRemark(DEBUG_TYPE,
1858                               "CHR",
1859                               // Refer to the hot (original) path
1860                               MergedBR->getSuccessor(0)->getTerminator())
1861         << "Merged " << ore::NV("NumCHRedBranches", NumCHRedBranches)
1862         << " branches or selects";
1863   });
1864   MergedBR->setCondition(MergedCondition);
1865   uint32_t Weights[] = {
1866       static_cast<uint32_t>(CHRBranchBias.scale(1000)),
1867       static_cast<uint32_t>(CHRBranchBias.getCompl().scale(1000)),
1868   };
1869   MDBuilder MDB(F.getContext());
1870   MergedBR->setMetadata(LLVMContext::MD_prof, MDB.createBranchWeights(Weights));
1871   CHR_DEBUG(dbgs() << "CHR branch bias " << Weights[0] << ":" << Weights[1]
1872             << "\n");
1873 }
1874 
1875 // A helper for fixupBranchesAndSelects. Add to the combined branch condition
1876 // and constant-fold a branch in the hot path.
fixupBranch(Region * R,CHRScope * Scope,IRBuilder<> & IRB,Value * & MergedCondition,BranchProbability & CHRBranchBias)1877 void CHR::fixupBranch(Region *R, CHRScope *Scope,
1878                       IRBuilder<> &IRB,
1879                       Value *&MergedCondition,
1880                       BranchProbability &CHRBranchBias) {
1881   bool IsTrueBiased = Scope->TrueBiasedRegions.count(R);
1882   assert((IsTrueBiased || Scope->FalseBiasedRegions.count(R)) &&
1883          "Must be truthy or falsy");
1884   auto *BI = cast<BranchInst>(R->getEntry()->getTerminator());
1885   assert(BranchBiasMap.find(R) != BranchBiasMap.end() &&
1886          "Must be in the bias map");
1887   BranchProbability Bias = BranchBiasMap[R];
1888   assert(Bias >= getCHRBiasThreshold() && "Must be highly biased");
1889   // Take the min.
1890   if (CHRBranchBias > Bias)
1891     CHRBranchBias = Bias;
1892   BasicBlock *IfThen = BI->getSuccessor(1);
1893   BasicBlock *IfElse = BI->getSuccessor(0);
1894   BasicBlock *RegionExitBlock = R->getExit();
1895   assert(RegionExitBlock && "Null ExitBlock");
1896   assert((IfThen == RegionExitBlock || IfElse == RegionExitBlock) &&
1897          IfThen != IfElse && "Invariant from findScopes");
1898   if (IfThen == RegionExitBlock) {
1899     // Swap them so that IfThen means going into it and IfElse means skipping
1900     // it.
1901     std::swap(IfThen, IfElse);
1902   }
1903   CHR_DEBUG(dbgs() << "IfThen " << IfThen->getName()
1904             << " IfElse " << IfElse->getName() << "\n");
1905   Value *Cond = BI->getCondition();
1906   BasicBlock *HotTarget = IsTrueBiased ? IfThen : IfElse;
1907   bool ConditionTrue = HotTarget == BI->getSuccessor(0);
1908   addToMergedCondition(ConditionTrue, Cond, BI, Scope, IRB,
1909                        MergedCondition);
1910   // Constant-fold the branch at ClonedEntryBlock.
1911   assert(ConditionTrue == (HotTarget == BI->getSuccessor(0)) &&
1912          "The successor shouldn't change");
1913   Value *NewCondition = ConditionTrue ?
1914                         ConstantInt::getTrue(F.getContext()) :
1915                         ConstantInt::getFalse(F.getContext());
1916   BI->setCondition(NewCondition);
1917 }
1918 
1919 // A helper for fixupBranchesAndSelects. Add to the combined branch condition
1920 // and constant-fold a select in the hot path.
fixupSelect(SelectInst * SI,CHRScope * Scope,IRBuilder<> & IRB,Value * & MergedCondition,BranchProbability & CHRBranchBias)1921 void CHR::fixupSelect(SelectInst *SI, CHRScope *Scope,
1922                       IRBuilder<> &IRB,
1923                       Value *&MergedCondition,
1924                       BranchProbability &CHRBranchBias) {
1925   bool IsTrueBiased = Scope->TrueBiasedSelects.count(SI);
1926   assert((IsTrueBiased ||
1927           Scope->FalseBiasedSelects.count(SI)) && "Must be biased");
1928   assert(SelectBiasMap.find(SI) != SelectBiasMap.end() &&
1929          "Must be in the bias map");
1930   BranchProbability Bias = SelectBiasMap[SI];
1931   assert(Bias >= getCHRBiasThreshold() && "Must be highly biased");
1932   // Take the min.
1933   if (CHRBranchBias > Bias)
1934     CHRBranchBias = Bias;
1935   Value *Cond = SI->getCondition();
1936   addToMergedCondition(IsTrueBiased, Cond, SI, Scope, IRB,
1937                        MergedCondition);
1938   Value *NewCondition = IsTrueBiased ?
1939                         ConstantInt::getTrue(F.getContext()) :
1940                         ConstantInt::getFalse(F.getContext());
1941   SI->setCondition(NewCondition);
1942 }
1943 
1944 // A helper for fixupBranch/fixupSelect. Add a branch condition to the merged
1945 // condition.
addToMergedCondition(bool IsTrueBiased,Value * Cond,Instruction * BranchOrSelect,CHRScope * Scope,IRBuilder<> & IRB,Value * & MergedCondition)1946 void CHR::addToMergedCondition(bool IsTrueBiased, Value *Cond,
1947                                Instruction *BranchOrSelect,
1948                                CHRScope *Scope,
1949                                IRBuilder<> &IRB,
1950                                Value *&MergedCondition) {
1951   if (IsTrueBiased) {
1952     MergedCondition = IRB.CreateAnd(MergedCondition, Cond);
1953   } else {
1954     // If Cond is an icmp and all users of V except for BranchOrSelect is a
1955     // branch, negate the icmp predicate and swap the branch targets and avoid
1956     // inserting an Xor to negate Cond.
1957     bool Done = false;
1958     if (auto *ICmp = dyn_cast<ICmpInst>(Cond))
1959       if (negateICmpIfUsedByBranchOrSelectOnly(ICmp, BranchOrSelect, Scope)) {
1960         MergedCondition = IRB.CreateAnd(MergedCondition, Cond);
1961         Done = true;
1962       }
1963     if (!Done) {
1964       Value *Negate = IRB.CreateXor(
1965           ConstantInt::getTrue(F.getContext()), Cond);
1966       MergedCondition = IRB.CreateAnd(MergedCondition, Negate);
1967     }
1968   }
1969 }
1970 
transformScopes(SmallVectorImpl<CHRScope * > & CHRScopes)1971 void CHR::transformScopes(SmallVectorImpl<CHRScope *> &CHRScopes) {
1972   unsigned I = 0;
1973   DenseSet<PHINode *> TrivialPHIs;
1974   for (CHRScope *Scope : CHRScopes) {
1975     transformScopes(Scope, TrivialPHIs);
1976     CHR_DEBUG(
1977         std::ostringstream oss;
1978         oss << " after transformScopes " << I++;
1979         dumpIR(F, oss.str().c_str(), nullptr));
1980     (void)I;
1981   }
1982 }
1983 
1984 static void LLVM_ATTRIBUTE_UNUSED
dumpScopes(SmallVectorImpl<CHRScope * > & Scopes,const char * Label)1985 dumpScopes(SmallVectorImpl<CHRScope *> &Scopes, const char *Label) {
1986   dbgs() << Label << " " << Scopes.size() << "\n";
1987   for (CHRScope *Scope : Scopes) {
1988     dbgs() << *Scope << "\n";
1989   }
1990 }
1991 
run()1992 bool CHR::run() {
1993   if (!shouldApply(F, PSI))
1994     return false;
1995 
1996   CHR_DEBUG(dumpIR(F, "before", nullptr));
1997 
1998   bool Changed = false;
1999   {
2000     CHR_DEBUG(
2001         dbgs() << "RegionInfo:\n";
2002         RI.print(dbgs()));
2003 
2004     // Recursively traverse the region tree and find regions that have biased
2005     // branches and/or selects and create scopes.
2006     SmallVector<CHRScope *, 8> AllScopes;
2007     findScopes(AllScopes);
2008     CHR_DEBUG(dumpScopes(AllScopes, "All scopes"));
2009 
2010     // Split the scopes if 1) the conditiona values of the biased
2011     // branches/selects of the inner/lower scope can't be hoisted up to the
2012     // outermost/uppermost scope entry, or 2) the condition values of the biased
2013     // branches/selects in a scope (including subscopes) don't share at least
2014     // one common value.
2015     SmallVector<CHRScope *, 8> SplitScopes;
2016     splitScopes(AllScopes, SplitScopes);
2017     CHR_DEBUG(dumpScopes(SplitScopes, "Split scopes"));
2018 
2019     // After splitting, set the biased regions and selects of a scope (a tree
2020     // root) that include those of the subscopes.
2021     classifyBiasedScopes(SplitScopes);
2022     CHR_DEBUG(dbgs() << "Set per-scope bias " << SplitScopes.size() << "\n");
2023 
2024     // Filter out the scopes that has only one biased region or select (CHR
2025     // isn't useful in such a case).
2026     SmallVector<CHRScope *, 8> FilteredScopes;
2027     filterScopes(SplitScopes, FilteredScopes);
2028     CHR_DEBUG(dumpScopes(FilteredScopes, "Filtered scopes"));
2029 
2030     // Set the regions to be CHR'ed and their hoist stops for each scope.
2031     SmallVector<CHRScope *, 8> SetScopes;
2032     setCHRRegions(FilteredScopes, SetScopes);
2033     CHR_DEBUG(dumpScopes(SetScopes, "Set CHR regions"));
2034 
2035     // Sort CHRScopes by the depth so that outer CHRScopes comes before inner
2036     // ones. We need to apply CHR from outer to inner so that we apply CHR only
2037     // to the hot path, rather than both hot and cold paths.
2038     SmallVector<CHRScope *, 8> SortedScopes;
2039     sortScopes(SetScopes, SortedScopes);
2040     CHR_DEBUG(dumpScopes(SortedScopes, "Sorted scopes"));
2041 
2042     CHR_DEBUG(
2043         dbgs() << "RegionInfo:\n";
2044         RI.print(dbgs()));
2045 
2046     // Apply the CHR transformation.
2047     if (!SortedScopes.empty()) {
2048       transformScopes(SortedScopes);
2049       Changed = true;
2050     }
2051   }
2052 
2053   if (Changed) {
2054     CHR_DEBUG(dumpIR(F, "after", &Stats));
2055     ORE.emit([&]() {
2056       return OptimizationRemark(DEBUG_TYPE, "Stats", &F)
2057           << ore::NV("Function", &F) << " "
2058           << "Reduced the number of branches in hot paths by "
2059           << ore::NV("NumBranchesDelta", Stats.NumBranchesDelta)
2060           << " (static) and "
2061           << ore::NV("WeightedNumBranchesDelta", Stats.WeightedNumBranchesDelta)
2062           << " (weighted by PGO count)";
2063     });
2064   }
2065 
2066   return Changed;
2067 }
2068 
runOnFunction(Function & F)2069 bool ControlHeightReductionLegacyPass::runOnFunction(Function &F) {
2070   BlockFrequencyInfo &BFI =
2071       getAnalysis<BlockFrequencyInfoWrapperPass>().getBFI();
2072   DominatorTree &DT = getAnalysis<DominatorTreeWrapperPass>().getDomTree();
2073   ProfileSummaryInfo &PSI =
2074       getAnalysis<ProfileSummaryInfoWrapperPass>().getPSI();
2075   RegionInfo &RI = getAnalysis<RegionInfoPass>().getRegionInfo();
2076   std::unique_ptr<OptimizationRemarkEmitter> OwnedORE =
2077       std::make_unique<OptimizationRemarkEmitter>(&F);
2078   return CHR(F, BFI, DT, PSI, RI, *OwnedORE.get()).run();
2079 }
2080 
2081 namespace llvm {
2082 
ControlHeightReductionPass()2083 ControlHeightReductionPass::ControlHeightReductionPass() {
2084   parseCHRFilterFiles();
2085 }
2086 
run(Function & F,FunctionAnalysisManager & FAM)2087 PreservedAnalyses ControlHeightReductionPass::run(
2088     Function &F,
2089     FunctionAnalysisManager &FAM) {
2090   auto &BFI = FAM.getResult<BlockFrequencyAnalysis>(F);
2091   auto &DT = FAM.getResult<DominatorTreeAnalysis>(F);
2092   auto &MAMProxy = FAM.getResult<ModuleAnalysisManagerFunctionProxy>(F);
2093   auto &PSI = *MAMProxy.getCachedResult<ProfileSummaryAnalysis>(*F.getParent());
2094   auto &RI = FAM.getResult<RegionInfoAnalysis>(F);
2095   auto &ORE = FAM.getResult<OptimizationRemarkEmitterAnalysis>(F);
2096   bool Changed = CHR(F, BFI, DT, PSI, RI, ORE).run();
2097   if (!Changed)
2098     return PreservedAnalyses::all();
2099   return PreservedAnalyses::none();
2100 }
2101 
2102 } // namespace llvm
2103