xref: /llvm-project/llvm/lib/CodeGen/SelectOptimize.cpp (revision 1d5ce614a7cd266909169bc251d7b1aee743e5a3)
1ca7c307dSSotiris Apostolakis //===--- SelectOptimize.cpp - Convert select to branches if profitable ---===//
2ca7c307dSSotiris Apostolakis //
3ca7c307dSSotiris Apostolakis // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4ca7c307dSSotiris Apostolakis // See https://llvm.org/LICENSE.txt for license information.
5ca7c307dSSotiris Apostolakis // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6ca7c307dSSotiris Apostolakis //
7ca7c307dSSotiris Apostolakis //===----------------------------------------------------------------------===//
8ca7c307dSSotiris Apostolakis //
9ca7c307dSSotiris Apostolakis // This pass converts selects to conditional jumps when profitable.
10ca7c307dSSotiris Apostolakis //
11ca7c307dSSotiris Apostolakis //===----------------------------------------------------------------------===//
12ca7c307dSSotiris Apostolakis 
13ce08c7eeSpaperchalice #include "llvm/CodeGen/SelectOptimize.h"
14e909c0ccSIgor Kirillov #include "llvm/ADT/SetVector.h"
1597c3ef5cSSotiris Apostolakis #include "llvm/ADT/SmallVector.h"
1697c3ef5cSSotiris Apostolakis #include "llvm/ADT/Statistic.h"
1797c3ef5cSSotiris Apostolakis #include "llvm/Analysis/BlockFrequencyInfo.h"
1897c3ef5cSSotiris Apostolakis #include "llvm/Analysis/BranchProbabilityInfo.h"
1997c3ef5cSSotiris Apostolakis #include "llvm/Analysis/LoopInfo.h"
208b42bc56SSotiris Apostolakis #include "llvm/Analysis/OptimizationRemarkEmitter.h"
218b42bc56SSotiris Apostolakis #include "llvm/Analysis/ProfileSummaryInfo.h"
228b42bc56SSotiris Apostolakis #include "llvm/Analysis/TargetTransformInfo.h"
23ca7c307dSSotiris Apostolakis #include "llvm/CodeGen/Passes.h"
2497c3ef5cSSotiris Apostolakis #include "llvm/CodeGen/TargetLowering.h"
2597c3ef5cSSotiris Apostolakis #include "llvm/CodeGen/TargetPassConfig.h"
2697c3ef5cSSotiris Apostolakis #include "llvm/CodeGen/TargetSchedule.h"
2797c3ef5cSSotiris Apostolakis #include "llvm/CodeGen/TargetSubtargetInfo.h"
2897c3ef5cSSotiris Apostolakis #include "llvm/IR/BasicBlock.h"
298b42bc56SSotiris Apostolakis #include "llvm/IR/Dominators.h"
30ca7c307dSSotiris Apostolakis #include "llvm/IR/Function.h"
3197c3ef5cSSotiris Apostolakis #include "llvm/IR/IRBuilder.h"
3297c3ef5cSSotiris Apostolakis #include "llvm/IR/Instruction.h"
33ac73c48eSElliot Goodrich #include "llvm/IR/PatternMatch.h"
34d434e40fSPaul Kirth #include "llvm/IR/ProfDataUtils.h"
35ca7c307dSSotiris Apostolakis #include "llvm/InitializePasses.h"
36ca7c307dSSotiris Apostolakis #include "llvm/Pass.h"
37d7ebb746SSotiris Apostolakis #include "llvm/Support/ScaledNumber.h"
3897c3ef5cSSotiris Apostolakis #include "llvm/Target/TargetMachine.h"
398b42bc56SSotiris Apostolakis #include "llvm/Transforms/Utils/SizeOpts.h"
408b42bc56SSotiris Apostolakis #include <algorithm>
418b42bc56SSotiris Apostolakis #include <queue>
428b42bc56SSotiris Apostolakis #include <stack>
43ca7c307dSSotiris Apostolakis 
44ca7c307dSSotiris Apostolakis using namespace llvm;
45a2d68b4bSDavid Green using namespace llvm::PatternMatch;
46ca7c307dSSotiris Apostolakis 
4797c3ef5cSSotiris Apostolakis #define DEBUG_TYPE "select-optimize"
4897c3ef5cSSotiris Apostolakis 
498b42bc56SSotiris Apostolakis STATISTIC(NumSelectOptAnalyzed,
508b42bc56SSotiris Apostolakis           "Number of select groups considered for conversion to branch");
518b42bc56SSotiris Apostolakis STATISTIC(NumSelectConvertedExpColdOperand,
528b42bc56SSotiris Apostolakis           "Number of select groups converted due to expensive cold operand");
538b42bc56SSotiris Apostolakis STATISTIC(NumSelectConvertedHighPred,
548b42bc56SSotiris Apostolakis           "Number of select groups converted due to high-predictability");
558b42bc56SSotiris Apostolakis STATISTIC(NumSelectUnPred,
568b42bc56SSotiris Apostolakis           "Number of select groups not converted due to unpredictability");
578b42bc56SSotiris Apostolakis STATISTIC(NumSelectColdBB,
588b42bc56SSotiris Apostolakis           "Number of select groups not converted due to cold basic block");
59d7ebb746SSotiris Apostolakis STATISTIC(NumSelectConvertedLoop,
60d7ebb746SSotiris Apostolakis           "Number of select groups converted due to loop-level analysis");
6197c3ef5cSSotiris Apostolakis STATISTIC(NumSelectsConverted, "Number of selects converted");
6297c3ef5cSSotiris Apostolakis 
638b42bc56SSotiris Apostolakis static cl::opt<unsigned> ColdOperandThreshold(
648b42bc56SSotiris Apostolakis     "cold-operand-threshold",
658b42bc56SSotiris Apostolakis     cl::desc("Maximum frequency of path for an operand to be considered cold."),
668b42bc56SSotiris Apostolakis     cl::init(20), cl::Hidden);
678b42bc56SSotiris Apostolakis 
688b42bc56SSotiris Apostolakis static cl::opt<unsigned> ColdOperandMaxCostMultiplier(
698b42bc56SSotiris Apostolakis     "cold-operand-max-cost-multiplier",
708b42bc56SSotiris Apostolakis     cl::desc("Maximum cost multiplier of TCC_expensive for the dependence "
718b42bc56SSotiris Apostolakis              "slice of a cold operand to be considered inexpensive."),
728b42bc56SSotiris Apostolakis     cl::init(1), cl::Hidden);
738b42bc56SSotiris Apostolakis 
74d7ebb746SSotiris Apostolakis static cl::opt<unsigned>
75d7ebb746SSotiris Apostolakis     GainGradientThreshold("select-opti-loop-gradient-gain-threshold",
76d7ebb746SSotiris Apostolakis                           cl::desc("Gradient gain threshold (%)."),
77d7ebb746SSotiris Apostolakis                           cl::init(25), cl::Hidden);
78d7ebb746SSotiris Apostolakis 
79d7ebb746SSotiris Apostolakis static cl::opt<unsigned>
80d7ebb746SSotiris Apostolakis     GainCycleThreshold("select-opti-loop-cycle-gain-threshold",
81d7ebb746SSotiris Apostolakis                        cl::desc("Minimum gain per loop (in cycles) threshold."),
82d7ebb746SSotiris Apostolakis                        cl::init(4), cl::Hidden);
83d7ebb746SSotiris Apostolakis 
84d7ebb746SSotiris Apostolakis static cl::opt<unsigned> GainRelativeThreshold(
85d7ebb746SSotiris Apostolakis     "select-opti-loop-relative-gain-threshold",
86d7ebb746SSotiris Apostolakis     cl::desc(
87d7ebb746SSotiris Apostolakis         "Minimum relative gain per loop threshold (1/X). Defaults to 12.5%"),
88d7ebb746SSotiris Apostolakis     cl::init(8), cl::Hidden);
89d7ebb746SSotiris Apostolakis 
90d7ebb746SSotiris Apostolakis static cl::opt<unsigned> MispredictDefaultRate(
91d7ebb746SSotiris Apostolakis     "mispredict-default-rate", cl::Hidden, cl::init(25),
92d7ebb746SSotiris Apostolakis     cl::desc("Default mispredict rate (initialized to 25%)."));
93d7ebb746SSotiris Apostolakis 
94d7ebb746SSotiris Apostolakis static cl::opt<bool>
95d7ebb746SSotiris Apostolakis     DisableLoopLevelHeuristics("disable-loop-level-heuristics", cl::Hidden,
96d7ebb746SSotiris Apostolakis                                cl::init(false),
97d7ebb746SSotiris Apostolakis                                cl::desc("Disable loop-level heuristics."));
98d7ebb746SSotiris Apostolakis 
99ca7c307dSSotiris Apostolakis namespace {
100ca7c307dSSotiris Apostolakis 
101ce08c7eeSpaperchalice class SelectOptimizeImpl {
10297c3ef5cSSotiris Apostolakis   const TargetMachine *TM = nullptr;
1038bf7f86dSAkshay Khadse   const TargetSubtargetInfo *TSI = nullptr;
10497c3ef5cSSotiris Apostolakis   const TargetLowering *TLI = nullptr;
1058b42bc56SSotiris Apostolakis   const TargetTransformInfo *TTI = nullptr;
1068bf7f86dSAkshay Khadse   const LoopInfo *LI = nullptr;
107ce08c7eeSpaperchalice   BlockFrequencyInfo *BFI;
1088bf7f86dSAkshay Khadse   ProfileSummaryInfo *PSI = nullptr;
1098bf7f86dSAkshay Khadse   OptimizationRemarkEmitter *ORE = nullptr;
110d7ebb746SSotiris Apostolakis   TargetSchedModel TSchedModel;
11197c3ef5cSSotiris Apostolakis 
112ca7c307dSSotiris Apostolakis public:
113ce08c7eeSpaperchalice   SelectOptimizeImpl() = default;
114ce08c7eeSpaperchalice   SelectOptimizeImpl(const TargetMachine *TM) : TM(TM){};
115ce08c7eeSpaperchalice   PreservedAnalyses run(Function &F, FunctionAnalysisManager &FAM);
116ce08c7eeSpaperchalice   bool runOnFunction(Function &F, Pass &P);
11797c3ef5cSSotiris Apostolakis 
118d7ebb746SSotiris Apostolakis   using Scaled64 = ScaledNumber<uint64_t>;
119d7ebb746SSotiris Apostolakis 
120d7ebb746SSotiris Apostolakis   struct CostInfo {
121d7ebb746SSotiris Apostolakis     /// Predicated cost (with selects as conditional moves).
122d7ebb746SSotiris Apostolakis     Scaled64 PredCost;
123d7ebb746SSotiris Apostolakis     /// Non-predicated cost (with selects converted to branches).
124d7ebb746SSotiris Apostolakis     Scaled64 NonPredCost;
125d7ebb746SSotiris Apostolakis   };
126d7ebb746SSotiris Apostolakis 
127a2d68b4bSDavid Green   /// SelectLike is an abstraction over SelectInst and other operations that can
128a2d68b4bSDavid Green   /// act like selects. For example Or(Zext(icmp), X) can be treated like
129a2d68b4bSDavid Green   /// select(icmp, X|1, X).
130a2d68b4bSDavid Green   class SelectLike {
1310a62a99aSDavid Green     /// The select (/or) instruction.
132a2d68b4bSDavid Green     Instruction *I;
1330a62a99aSDavid Green     /// Whether this select is inverted, "not(cond), FalseVal, TrueVal", as
1340a62a99aSDavid Green     /// opposed to the original condition.
1350a62a99aSDavid Green     bool Inverted = false;
136a2d68b4bSDavid Green 
137e874c8fcSIgor Kirillov     /// The index of the operand that depends on condition. Only for select-like
138e874c8fcSIgor Kirillov     /// instruction such as Or/Add.
139e874c8fcSIgor Kirillov     unsigned CondIdx;
140e874c8fcSIgor Kirillov 
141a2d68b4bSDavid Green   public:
142e874c8fcSIgor Kirillov     SelectLike(Instruction *I, bool Inverted = false, unsigned CondIdx = 0)
143e874c8fcSIgor Kirillov         : I(I), Inverted(Inverted), CondIdx(CondIdx) {}
1440a62a99aSDavid Green 
145a2d68b4bSDavid Green     Instruction *getI() { return I; }
146a2d68b4bSDavid Green     const Instruction *getI() const { return I; }
147a2d68b4bSDavid Green 
148a2d68b4bSDavid Green     Type *getType() const { return I->getType(); }
149a2d68b4bSDavid Green 
150e874c8fcSIgor Kirillov     unsigned getConditionOpIndex() { return CondIdx; };
1510a62a99aSDavid Green 
152a2d68b4bSDavid Green     /// Return the true value for the SelectLike instruction. Note this may not
153a2d68b4bSDavid Green     /// exist for all SelectLike instructions. For example, for `or(zext(c), x)`
154a2d68b4bSDavid Green     /// the true value would be `or(x,1)`. As this value does not exist, nullptr
155a2d68b4bSDavid Green     /// is returned.
1560a62a99aSDavid Green     Value *getTrueValue(bool HonorInverts = true) const {
1570a62a99aSDavid Green       if (Inverted && HonorInverts)
1580a62a99aSDavid Green         return getFalseValue(/*HonorInverts=*/false);
159a2d68b4bSDavid Green       if (auto *Sel = dyn_cast<SelectInst>(I))
160a2d68b4bSDavid Green         return Sel->getTrueValue();
161a2d68b4bSDavid Green       // Or(zext) case - The true value is Or(X), so return nullptr as the value
162a2d68b4bSDavid Green       // does not yet exist.
163a2d68b4bSDavid Green       if (isa<BinaryOperator>(I))
164a2d68b4bSDavid Green         return nullptr;
165a2d68b4bSDavid Green 
166a2d68b4bSDavid Green       llvm_unreachable("Unhandled case in getTrueValue");
167a2d68b4bSDavid Green     }
168a2d68b4bSDavid Green 
169a2d68b4bSDavid Green     /// Return the false value for the SelectLike instruction. For example the
170a2d68b4bSDavid Green     /// getFalseValue of a select or `x` in `or(zext(c), x)` (which is
171a2d68b4bSDavid Green     /// `select(c, x|1, x)`)
1720a62a99aSDavid Green     Value *getFalseValue(bool HonorInverts = true) const {
1730a62a99aSDavid Green       if (Inverted && HonorInverts)
1740a62a99aSDavid Green         return getTrueValue(/*HonorInverts=*/false);
175a2d68b4bSDavid Green       if (auto *Sel = dyn_cast<SelectInst>(I))
176a2d68b4bSDavid Green         return Sel->getFalseValue();
177e874c8fcSIgor Kirillov       // We are on the branch where the condition is zero, which means BinOp
178e874c8fcSIgor Kirillov       // does not perform any computation, and we can simply return the operand
179e874c8fcSIgor Kirillov       // that is not related to the condition
180e874c8fcSIgor Kirillov       if (auto *BO = dyn_cast<BinaryOperator>(I))
181e874c8fcSIgor Kirillov         return BO->getOperand(1 - CondIdx);
182a2d68b4bSDavid Green 
183a2d68b4bSDavid Green       llvm_unreachable("Unhandled case in getFalseValue");
184a2d68b4bSDavid Green     }
185a2d68b4bSDavid Green 
186e874c8fcSIgor Kirillov     /// Return the NonPredCost cost of the op on \p isTrue branch, given the
187e874c8fcSIgor Kirillov     /// costs in \p InstCostMap. This may need to be generated for select-like
188e874c8fcSIgor Kirillov     /// instructions.
189e874c8fcSIgor Kirillov     Scaled64 getOpCostOnBranch(
190e874c8fcSIgor Kirillov         bool IsTrue, const DenseMap<const Instruction *, CostInfo> &InstCostMap,
191a2d68b4bSDavid Green         const TargetTransformInfo *TTI) {
192e874c8fcSIgor Kirillov       auto *V = IsTrue ? getTrueValue() : getFalseValue();
193e874c8fcSIgor Kirillov       if (V) {
194e874c8fcSIgor Kirillov         if (auto *IV = dyn_cast<Instruction>(V)) {
195e874c8fcSIgor Kirillov           auto It = InstCostMap.find(IV);
196186dc9a4SKazu Hirata           return It != InstCostMap.end() ? It->second.NonPredCost
197a2d68b4bSDavid Green                                          : Scaled64::getZero();
198186dc9a4SKazu Hirata         }
199e874c8fcSIgor Kirillov         return Scaled64::getZero();
200e874c8fcSIgor Kirillov       }
201e874c8fcSIgor Kirillov       // If getTrue(False)Value() return nullptr, it means we are dealing with
202e874c8fcSIgor Kirillov       // select-like instructions on the branch where the actual computation is
203e874c8fcSIgor Kirillov       // happening. In that case the cost is equal to the cost of computation +
204e874c8fcSIgor Kirillov       // cost of non-dependant on condition operand
205e874c8fcSIgor Kirillov       InstructionCost Cost = TTI->getArithmeticInstrCost(
206e874c8fcSIgor Kirillov           getI()->getOpcode(), I->getType(), TargetTransformInfo::TCK_Latency,
207e874c8fcSIgor Kirillov           {TargetTransformInfo::OK_AnyValue, TargetTransformInfo::OP_None},
2084a7a27cbSIgor Kirillov           {TTI::OK_UniformConstantValue, TTI::OP_PowerOf2});
209e874c8fcSIgor Kirillov       auto TotalCost = Scaled64::get(*Cost.getValue());
210e874c8fcSIgor Kirillov       if (auto *OpI = dyn_cast<Instruction>(I->getOperand(1 - CondIdx))) {
211e874c8fcSIgor Kirillov         auto It = InstCostMap.find(OpI);
212e874c8fcSIgor Kirillov         if (It != InstCostMap.end())
213e874c8fcSIgor Kirillov           TotalCost += It->second.NonPredCost;
2144a7a27cbSIgor Kirillov       }
215e874c8fcSIgor Kirillov       return TotalCost;
216a2d68b4bSDavid Green     }
217a2d68b4bSDavid Green   };
218a2d68b4bSDavid Green 
219a2d68b4bSDavid Green private:
220e874c8fcSIgor Kirillov   // Select groups consist of consecutive select-like instructions with the same
221e874c8fcSIgor Kirillov   // condition. Between select-likes could be any number of auxiliary
222e909c0ccSIgor Kirillov   // instructions related to the condition like not, zext, ashr/lshr
223e874c8fcSIgor Kirillov   struct SelectGroup {
224e874c8fcSIgor Kirillov     Value *Condition;
225e874c8fcSIgor Kirillov     SmallVector<SelectLike, 2> Selects;
226e874c8fcSIgor Kirillov   };
227a2d68b4bSDavid Green   using SelectGroups = SmallVector<SelectGroup, 2>;
228a2d68b4bSDavid Green 
2298b42bc56SSotiris Apostolakis   // Converts select instructions of a function to conditional jumps when deemed
2308b42bc56SSotiris Apostolakis   // profitable. Returns true if at least one select was converted.
23197c3ef5cSSotiris Apostolakis   bool optimizeSelects(Function &F);
2328b42bc56SSotiris Apostolakis 
2338b42bc56SSotiris Apostolakis   // Heuristics for determining which select instructions can be profitably
2348b42bc56SSotiris Apostolakis   // conveted to branches. Separate heuristics for selects in inner-most loops
2358b42bc56SSotiris Apostolakis   // and the rest of code regions (base heuristics for non-inner-most loop
2368b42bc56SSotiris Apostolakis   // regions).
2378b42bc56SSotiris Apostolakis   void optimizeSelectsBase(Function &F, SelectGroups &ProfSIGroups);
2388b42bc56SSotiris Apostolakis   void optimizeSelectsInnerLoops(Function &F, SelectGroups &ProfSIGroups);
2398b42bc56SSotiris Apostolakis 
2408b42bc56SSotiris Apostolakis   // Converts to branches the select groups that were deemed
2418b42bc56SSotiris Apostolakis   // profitable-to-convert.
24297c3ef5cSSotiris Apostolakis   void convertProfitableSIGroups(SelectGroups &ProfSIGroups);
2438b42bc56SSotiris Apostolakis 
2448b42bc56SSotiris Apostolakis   // Splits selects of a given basic block into select groups.
24597c3ef5cSSotiris Apostolakis   void collectSelectGroups(BasicBlock &BB, SelectGroups &SIGroups);
2468b42bc56SSotiris Apostolakis 
2478b42bc56SSotiris Apostolakis   // Determines for which select groups it is profitable converting to branches
248d7ebb746SSotiris Apostolakis   // (base and inner-most-loop heuristics).
2498b42bc56SSotiris Apostolakis   void findProfitableSIGroupsBase(SelectGroups &SIGroups,
2508b42bc56SSotiris Apostolakis                                   SelectGroups &ProfSIGroups);
251d7ebb746SSotiris Apostolakis   void findProfitableSIGroupsInnerLoops(const Loop *L, SelectGroups &SIGroups,
252d7ebb746SSotiris Apostolakis                                         SelectGroups &ProfSIGroups);
253d7ebb746SSotiris Apostolakis 
2548b42bc56SSotiris Apostolakis   // Determines if a select group should be converted to a branch (base
2558b42bc56SSotiris Apostolakis   // heuristics).
256a2d68b4bSDavid Green   bool isConvertToBranchProfitableBase(const SelectGroup &ASI);
2578b42bc56SSotiris Apostolakis 
2588b42bc56SSotiris Apostolakis   // Returns true if there are expensive instructions in the cold value
2598b42bc56SSotiris Apostolakis   // operand's (if any) dependence slice of any of the selects of the given
2608b42bc56SSotiris Apostolakis   // group.
261a2d68b4bSDavid Green   bool hasExpensiveColdOperand(const SelectGroup &ASI);
2628b42bc56SSotiris Apostolakis 
2638b42bc56SSotiris Apostolakis   // For a given source instruction, collect its backwards dependence slice
2648b42bc56SSotiris Apostolakis   // consisting of instructions exclusively computed for producing the operands
2658b42bc56SSotiris Apostolakis   // of the source instruction.
26667be40dfSSotiris Apostolakis   void getExclBackwardsSlice(Instruction *I, std::stack<Instruction *> &Slice,
267b827e7c6SSotiris Apostolakis                              Instruction *SI, bool ForSinking = false);
2688b42bc56SSotiris Apostolakis 
2698b42bc56SSotiris Apostolakis   // Returns true if the condition of the select is highly predictable.
270a2d68b4bSDavid Green   bool isSelectHighlyPredictable(const SelectLike SI);
2718b42bc56SSotiris Apostolakis 
272d7ebb746SSotiris Apostolakis   // Loop-level checks to determine if a non-predicated version (with branches)
273d7ebb746SSotiris Apostolakis   // of the given loop is more profitable than its predicated version.
274d7ebb746SSotiris Apostolakis   bool checkLoopHeuristics(const Loop *L, const CostInfo LoopDepth[2]);
275d7ebb746SSotiris Apostolakis 
276d7ebb746SSotiris Apostolakis   // Computes instruction and loop-critical-path costs for both the predicated
277d7ebb746SSotiris Apostolakis   // and non-predicated version of the given loop.
278d7ebb746SSotiris Apostolakis   bool computeLoopCosts(const Loop *L, const SelectGroups &SIGroups,
279d7ebb746SSotiris Apostolakis                         DenseMap<const Instruction *, CostInfo> &InstCostMap,
280d7ebb746SSotiris Apostolakis                         CostInfo *LoopCost);
281d7ebb746SSotiris Apostolakis 
282d7ebb746SSotiris Apostolakis   // Returns a set of all the select instructions in the given select groups.
283a2d68b4bSDavid Green   SmallDenseMap<const Instruction *, SelectLike, 2>
284a2d68b4bSDavid Green   getSImap(const SelectGroups &SIGroups);
285d7ebb746SSotiris Apostolakis 
286e874c8fcSIgor Kirillov   // Returns a map from select-like instructions to the corresponding select
287e874c8fcSIgor Kirillov   // group.
288e874c8fcSIgor Kirillov   SmallDenseMap<const Instruction *, const SelectGroup *, 2>
289e874c8fcSIgor Kirillov   getSGmap(const SelectGroups &SIGroups);
290e874c8fcSIgor Kirillov 
291d7ebb746SSotiris Apostolakis   // Returns the latency cost of a given instruction.
29267819a72SFangrui Song   std::optional<uint64_t> computeInstCost(const Instruction *I);
293d7ebb746SSotiris Apostolakis 
294d7ebb746SSotiris Apostolakis   // Returns the misprediction cost of a given select when converted to branch.
295a2d68b4bSDavid Green   Scaled64 getMispredictionCost(const SelectLike SI, const Scaled64 CondCost);
296d7ebb746SSotiris Apostolakis 
297d7ebb746SSotiris Apostolakis   // Returns the cost of a branch when the prediction is correct.
298d7ebb746SSotiris Apostolakis   Scaled64 getPredictedPathCost(Scaled64 TrueCost, Scaled64 FalseCost,
299a2d68b4bSDavid Green                                 const SelectLike SI);
300d7ebb746SSotiris Apostolakis 
3018b42bc56SSotiris Apostolakis   // Returns true if the target architecture supports lowering a given select.
302a2d68b4bSDavid Green   bool isSelectKindSupported(const SelectLike SI);
303ca7c307dSSotiris Apostolakis };
304ce08c7eeSpaperchalice 
305ce08c7eeSpaperchalice class SelectOptimize : public FunctionPass {
306ce08c7eeSpaperchalice   SelectOptimizeImpl Impl;
307ce08c7eeSpaperchalice 
308ce08c7eeSpaperchalice public:
309ce08c7eeSpaperchalice   static char ID;
310ce08c7eeSpaperchalice 
311ce08c7eeSpaperchalice   SelectOptimize() : FunctionPass(ID) {
312ce08c7eeSpaperchalice     initializeSelectOptimizePass(*PassRegistry::getPassRegistry());
313ce08c7eeSpaperchalice   }
314ce08c7eeSpaperchalice 
315ce08c7eeSpaperchalice   bool runOnFunction(Function &F) override {
316ce08c7eeSpaperchalice     return Impl.runOnFunction(F, *this);
317ce08c7eeSpaperchalice   }
318ce08c7eeSpaperchalice 
319ce08c7eeSpaperchalice   void getAnalysisUsage(AnalysisUsage &AU) const override {
320ce08c7eeSpaperchalice     AU.addRequired<ProfileSummaryInfoWrapperPass>();
321ce08c7eeSpaperchalice     AU.addRequired<TargetPassConfig>();
322ce08c7eeSpaperchalice     AU.addRequired<TargetTransformInfoWrapperPass>();
323ce08c7eeSpaperchalice     AU.addRequired<LoopInfoWrapperPass>();
324ce08c7eeSpaperchalice     AU.addRequired<BlockFrequencyInfoWrapperPass>();
325ce08c7eeSpaperchalice     AU.addRequired<OptimizationRemarkEmitterWrapperPass>();
326ce08c7eeSpaperchalice   }
327ce08c7eeSpaperchalice };
328ce08c7eeSpaperchalice 
329ca7c307dSSotiris Apostolakis } // namespace
330ca7c307dSSotiris Apostolakis 
331ce08c7eeSpaperchalice PreservedAnalyses SelectOptimizePass::run(Function &F,
332ce08c7eeSpaperchalice                                           FunctionAnalysisManager &FAM) {
333ce08c7eeSpaperchalice   SelectOptimizeImpl Impl(TM);
334ce08c7eeSpaperchalice   return Impl.run(F, FAM);
335ce08c7eeSpaperchalice }
336ce08c7eeSpaperchalice 
337ca7c307dSSotiris Apostolakis char SelectOptimize::ID = 0;
33897c3ef5cSSotiris Apostolakis 
33997c3ef5cSSotiris Apostolakis INITIALIZE_PASS_BEGIN(SelectOptimize, DEBUG_TYPE, "Optimize selects", false,
34097c3ef5cSSotiris Apostolakis                       false)
34197c3ef5cSSotiris Apostolakis INITIALIZE_PASS_DEPENDENCY(LoopInfoWrapperPass)
3428b42bc56SSotiris Apostolakis INITIALIZE_PASS_DEPENDENCY(ProfileSummaryInfoWrapperPass)
34397c3ef5cSSotiris Apostolakis INITIALIZE_PASS_DEPENDENCY(TargetPassConfig)
3448b42bc56SSotiris Apostolakis INITIALIZE_PASS_DEPENDENCY(TargetTransformInfoWrapperPass)
345ce08c7eeSpaperchalice INITIALIZE_PASS_DEPENDENCY(BlockFrequencyInfoWrapperPass)
3468b42bc56SSotiris Apostolakis INITIALIZE_PASS_DEPENDENCY(OptimizationRemarkEmitterWrapperPass)
34797c3ef5cSSotiris Apostolakis INITIALIZE_PASS_END(SelectOptimize, DEBUG_TYPE, "Optimize selects", false,
348ca7c307dSSotiris Apostolakis                     false)
349ca7c307dSSotiris Apostolakis 
350ca7c307dSSotiris Apostolakis FunctionPass *llvm::createSelectOptimizePass() { return new SelectOptimize(); }
351ca7c307dSSotiris Apostolakis 
352ce08c7eeSpaperchalice PreservedAnalyses SelectOptimizeImpl::run(Function &F,
353ce08c7eeSpaperchalice                                           FunctionAnalysisManager &FAM) {
35497c3ef5cSSotiris Apostolakis   TSI = TM->getSubtargetImpl(F);
35597c3ef5cSSotiris Apostolakis   TLI = TSI->getTargetLowering();
3568b42bc56SSotiris Apostolakis 
357ce08c7eeSpaperchalice   // If none of the select types are supported then skip this pass.
358ce08c7eeSpaperchalice   // This is an optimization pass. Legality issues will be handled by
359ce08c7eeSpaperchalice   // instruction selection.
360ce08c7eeSpaperchalice   if (!TLI->isSelectSupported(TargetLowering::ScalarValSelect) &&
361ce08c7eeSpaperchalice       !TLI->isSelectSupported(TargetLowering::ScalarCondVectorVal) &&
362ce08c7eeSpaperchalice       !TLI->isSelectSupported(TargetLowering::VectorMaskSelect))
363ce08c7eeSpaperchalice     return PreservedAnalyses::all();
364ce08c7eeSpaperchalice 
365ce08c7eeSpaperchalice   TTI = &FAM.getResult<TargetIRAnalysis>(F);
366ce08c7eeSpaperchalice   if (!TTI->enableSelectOptimize())
367ce08c7eeSpaperchalice     return PreservedAnalyses::all();
368ce08c7eeSpaperchalice 
369ce08c7eeSpaperchalice   PSI = FAM.getResult<ModuleAnalysisManagerFunctionProxy>(F)
370ce08c7eeSpaperchalice             .getCachedResult<ProfileSummaryAnalysis>(*F.getParent());
371ce08c7eeSpaperchalice   assert(PSI && "This pass requires module analysis pass `profile-summary`!");
372ce08c7eeSpaperchalice   BFI = &FAM.getResult<BlockFrequencyAnalysis>(F);
373ce08c7eeSpaperchalice 
374ce08c7eeSpaperchalice   // When optimizing for size, selects are preferable over branches.
3756ab26eabSEllis Hoag   if (llvm::shouldOptimizeForSize(&F, PSI, BFI))
376ce08c7eeSpaperchalice     return PreservedAnalyses::all();
377ce08c7eeSpaperchalice 
378ce08c7eeSpaperchalice   LI = &FAM.getResult<LoopAnalysis>(F);
379ce08c7eeSpaperchalice   ORE = &FAM.getResult<OptimizationRemarkEmitterAnalysis>(F);
380ce08c7eeSpaperchalice   TSchedModel.init(TSI);
381ce08c7eeSpaperchalice 
382ce08c7eeSpaperchalice   bool Changed = optimizeSelects(F);
383ce08c7eeSpaperchalice   return Changed ? PreservedAnalyses::none() : PreservedAnalyses::all();
384ce08c7eeSpaperchalice }
385ce08c7eeSpaperchalice 
386ce08c7eeSpaperchalice bool SelectOptimizeImpl::runOnFunction(Function &F, Pass &P) {
387ce08c7eeSpaperchalice   TM = &P.getAnalysis<TargetPassConfig>().getTM<TargetMachine>();
388ce08c7eeSpaperchalice   TSI = TM->getSubtargetImpl(F);
389ce08c7eeSpaperchalice   TLI = TSI->getTargetLowering();
390ce08c7eeSpaperchalice 
391ce08c7eeSpaperchalice   // If none of the select types are supported then skip this pass.
3928b42bc56SSotiris Apostolakis   // This is an optimization pass. Legality issues will be handled by
3938b42bc56SSotiris Apostolakis   // instruction selection.
3948b42bc56SSotiris Apostolakis   if (!TLI->isSelectSupported(TargetLowering::ScalarValSelect) &&
3958b42bc56SSotiris Apostolakis       !TLI->isSelectSupported(TargetLowering::ScalarCondVectorVal) &&
3968b42bc56SSotiris Apostolakis       !TLI->isSelectSupported(TargetLowering::VectorMaskSelect))
3978b42bc56SSotiris Apostolakis     return false;
3988b42bc56SSotiris Apostolakis 
399ce08c7eeSpaperchalice   TTI = &P.getAnalysis<TargetTransformInfoWrapperPass>().getTTI(F);
40016a72a0fSDavid Green 
40116a72a0fSDavid Green   if (!TTI->enableSelectOptimize())
40216a72a0fSDavid Green     return false;
40316a72a0fSDavid Green 
404ce08c7eeSpaperchalice   LI = &P.getAnalysis<LoopInfoWrapperPass>().getLoopInfo();
405ce08c7eeSpaperchalice   BFI = &P.getAnalysis<BlockFrequencyInfoWrapperPass>().getBFI();
406ce08c7eeSpaperchalice   PSI = &P.getAnalysis<ProfileSummaryInfoWrapperPass>().getPSI();
407ce08c7eeSpaperchalice   ORE = &P.getAnalysis<OptimizationRemarkEmitterWrapperPass>().getORE();
408d7ebb746SSotiris Apostolakis   TSchedModel.init(TSI);
4098b42bc56SSotiris Apostolakis 
4108b42bc56SSotiris Apostolakis   // When optimizing for size, selects are preferable over branches.
4116ab26eabSEllis Hoag   if (llvm::shouldOptimizeForSize(&F, PSI, BFI))
4128b42bc56SSotiris Apostolakis     return false;
41397c3ef5cSSotiris Apostolakis 
41497c3ef5cSSotiris Apostolakis   return optimizeSelects(F);
41597c3ef5cSSotiris Apostolakis }
41697c3ef5cSSotiris Apostolakis 
417ce08c7eeSpaperchalice bool SelectOptimizeImpl::optimizeSelects(Function &F) {
41897c3ef5cSSotiris Apostolakis   // Determine for which select groups it is profitable converting to branches.
41997c3ef5cSSotiris Apostolakis   SelectGroups ProfSIGroups;
4208b42bc56SSotiris Apostolakis   // Base heuristics apply only to non-loops and outer loops.
4218b42bc56SSotiris Apostolakis   optimizeSelectsBase(F, ProfSIGroups);
4228b42bc56SSotiris Apostolakis   // Separate heuristics for inner-most loops.
4238b42bc56SSotiris Apostolakis   optimizeSelectsInnerLoops(F, ProfSIGroups);
42497c3ef5cSSotiris Apostolakis 
42597c3ef5cSSotiris Apostolakis   // Convert to branches the select groups that were deemed
42697c3ef5cSSotiris Apostolakis   // profitable-to-convert.
42797c3ef5cSSotiris Apostolakis   convertProfitableSIGroups(ProfSIGroups);
42897c3ef5cSSotiris Apostolakis 
42997c3ef5cSSotiris Apostolakis   // Code modified if at least one select group was converted.
43097c3ef5cSSotiris Apostolakis   return !ProfSIGroups.empty();
43197c3ef5cSSotiris Apostolakis }
43297c3ef5cSSotiris Apostolakis 
433ce08c7eeSpaperchalice void SelectOptimizeImpl::optimizeSelectsBase(Function &F,
4348b42bc56SSotiris Apostolakis                                              SelectGroups &ProfSIGroups) {
4358b42bc56SSotiris Apostolakis   // Collect all the select groups.
4368b42bc56SSotiris Apostolakis   SelectGroups SIGroups;
4378b42bc56SSotiris Apostolakis   for (BasicBlock &BB : F) {
4388b42bc56SSotiris Apostolakis     // Base heuristics apply only to non-loops and outer loops.
4398b42bc56SSotiris Apostolakis     Loop *L = LI->getLoopFor(&BB);
4408b42bc56SSotiris Apostolakis     if (L && L->isInnermost())
4418b42bc56SSotiris Apostolakis       continue;
4428b42bc56SSotiris Apostolakis     collectSelectGroups(BB, SIGroups);
4438b42bc56SSotiris Apostolakis   }
4448b42bc56SSotiris Apostolakis 
4458b42bc56SSotiris Apostolakis   // Determine for which select groups it is profitable converting to branches.
4468b42bc56SSotiris Apostolakis   findProfitableSIGroupsBase(SIGroups, ProfSIGroups);
4478b42bc56SSotiris Apostolakis }
4488b42bc56SSotiris Apostolakis 
449ce08c7eeSpaperchalice void SelectOptimizeImpl::optimizeSelectsInnerLoops(Function &F,
450d7ebb746SSotiris Apostolakis                                                    SelectGroups &ProfSIGroups) {
451d7ebb746SSotiris Apostolakis   SmallVector<Loop *, 4> Loops(LI->begin(), LI->end());
452d7ebb746SSotiris Apostolakis   // Need to check size on each iteration as we accumulate child loops.
453d7ebb746SSotiris Apostolakis   for (unsigned long i = 0; i < Loops.size(); ++i)
454d7ebb746SSotiris Apostolakis     for (Loop *ChildL : Loops[i]->getSubLoops())
455d7ebb746SSotiris Apostolakis       Loops.push_back(ChildL);
456d7ebb746SSotiris Apostolakis 
457d7ebb746SSotiris Apostolakis   for (Loop *L : Loops) {
458d7ebb746SSotiris Apostolakis     if (!L->isInnermost())
459d7ebb746SSotiris Apostolakis       continue;
460d7ebb746SSotiris Apostolakis 
461d7ebb746SSotiris Apostolakis     SelectGroups SIGroups;
462d7ebb746SSotiris Apostolakis     for (BasicBlock *BB : L->getBlocks())
463d7ebb746SSotiris Apostolakis       collectSelectGroups(*BB, SIGroups);
464d7ebb746SSotiris Apostolakis 
465d7ebb746SSotiris Apostolakis     findProfitableSIGroupsInnerLoops(L, SIGroups, ProfSIGroups);
466d7ebb746SSotiris Apostolakis   }
467d7ebb746SSotiris Apostolakis }
4688b42bc56SSotiris Apostolakis 
469e874c8fcSIgor Kirillov /// Returns optimised value on \p IsTrue branch. For SelectInst that would be
470e874c8fcSIgor Kirillov /// either True or False value. For (BinaryOperator) instructions, where the
471e874c8fcSIgor Kirillov /// condition may be skipped, the operation will use a non-conditional operand.
472e874c8fcSIgor Kirillov /// For example, for `or(V,zext(cond))` this function would return V.
473e874c8fcSIgor Kirillov /// However, if the conditional operand on \p IsTrue branch matters, we create a
474e874c8fcSIgor Kirillov /// clone of instruction at the end of that branch \p B and replace the
475e874c8fcSIgor Kirillov /// condition operand with a constant.
476e874c8fcSIgor Kirillov ///
477e874c8fcSIgor Kirillov /// Also /p OptSelects contains previously optimised select-like instructions.
478e874c8fcSIgor Kirillov /// If the current value uses one of the optimised values, we can optimise it
479e874c8fcSIgor Kirillov /// further by replacing it with the corresponding value on the given branch
480e874c8fcSIgor Kirillov static Value *getTrueOrFalseValue(
481e874c8fcSIgor Kirillov     SelectOptimizeImpl::SelectLike &SI, bool isTrue,
482e874c8fcSIgor Kirillov     SmallDenseMap<Instruction *, std::pair<Value *, Value *>, 2> &OptSelects,
483e874c8fcSIgor Kirillov     BasicBlock *B) {
484e874c8fcSIgor Kirillov   Value *V = isTrue ? SI.getTrueValue() : SI.getFalseValue();
485e874c8fcSIgor Kirillov   if (V) {
486*1d5ce614SKazu Hirata     if (auto *IV = dyn_cast<Instruction>(V))
487*1d5ce614SKazu Hirata       if (auto It = OptSelects.find(IV); It != OptSelects.end())
488*1d5ce614SKazu Hirata         return isTrue ? It->second.first : It->second.second;
4894a7a27cbSIgor Kirillov     return V;
490b5a11d37SIgor Kirillov   }
491b5a11d37SIgor Kirillov 
492e874c8fcSIgor Kirillov   auto *BO = cast<BinaryOperator>(SI.getI());
4939a0f2515SFlorian Hahn   assert((BO->getOpcode() == Instruction::Add ||
4949a0f2515SFlorian Hahn           BO->getOpcode() == Instruction::Or ||
4959a0f2515SFlorian Hahn           BO->getOpcode() == Instruction::Sub) &&
4969a0f2515SFlorian Hahn          "Only currently handling Add, Or and Sub binary operators.");
497e874c8fcSIgor Kirillov 
498e874c8fcSIgor Kirillov   auto *CBO = BO->clone();
499e874c8fcSIgor Kirillov   auto CondIdx = SI.getConditionOpIndex();
500e909c0ccSIgor Kirillov   auto *AuxI = cast<Instruction>(CBO->getOperand(CondIdx));
501e909c0ccSIgor Kirillov   if (isa<ZExtInst>(AuxI) || isa<LShrOperator>(AuxI)) {
502e874c8fcSIgor Kirillov     CBO->setOperand(CondIdx, ConstantInt::get(CBO->getType(), 1));
503e909c0ccSIgor Kirillov   } else {
504c1f5937eSFlorian Hahn     assert((isa<AShrOperator>(AuxI) || isa<SExtInst>(AuxI)) &&
505c1f5937eSFlorian Hahn            "Unexpected opcode");
506e909c0ccSIgor Kirillov     CBO->setOperand(CondIdx, ConstantInt::get(CBO->getType(), -1));
507e909c0ccSIgor Kirillov   }
508e874c8fcSIgor Kirillov 
509e874c8fcSIgor Kirillov   unsigned OtherIdx = 1 - CondIdx;
510e874c8fcSIgor Kirillov   if (auto *IV = dyn_cast<Instruction>(CBO->getOperand(OtherIdx))) {
511*1d5ce614SKazu Hirata     if (auto It = OptSelects.find(IV); It != OptSelects.end())
512*1d5ce614SKazu Hirata       CBO->setOperand(OtherIdx, isTrue ? It->second.first : It->second.second);
513e874c8fcSIgor Kirillov   }
5148e702735SJeremy Morse   CBO->insertBefore(B->getTerminator()->getIterator());
515e874c8fcSIgor Kirillov   return CBO;
516e874c8fcSIgor Kirillov }
517e874c8fcSIgor Kirillov 
518ce08c7eeSpaperchalice void SelectOptimizeImpl::convertProfitableSIGroups(SelectGroups &ProfSIGroups) {
51997c3ef5cSSotiris Apostolakis   for (SelectGroup &ASI : ProfSIGroups) {
52067be40dfSSotiris Apostolakis     // The code transformation here is a modified version of the sinking
52167be40dfSSotiris Apostolakis     // transformation in CodeGenPrepare::optimizeSelectInst with a more
52267be40dfSSotiris Apostolakis     // aggressive strategy of which instructions to sink.
52367be40dfSSotiris Apostolakis     //
52497c3ef5cSSotiris Apostolakis     // TODO: eliminate the redundancy of logic transforming selects to branches
52597c3ef5cSSotiris Apostolakis     // by removing CodeGenPrepare::optimizeSelectInst and optimizing here
52697c3ef5cSSotiris Apostolakis     // selects for all cases (with and without profile information).
52797c3ef5cSSotiris Apostolakis 
52897c3ef5cSSotiris Apostolakis     // Transform a sequence like this:
52997c3ef5cSSotiris Apostolakis     //    start:
53097c3ef5cSSotiris Apostolakis     //       %cmp = cmp uge i32 %a, %b
53197c3ef5cSSotiris Apostolakis     //       %sel = select i1 %cmp, i32 %c, i32 %d
53297c3ef5cSSotiris Apostolakis     //
53397c3ef5cSSotiris Apostolakis     // Into:
53497c3ef5cSSotiris Apostolakis     //    start:
53597c3ef5cSSotiris Apostolakis     //       %cmp = cmp uge i32 %a, %b
53697c3ef5cSSotiris Apostolakis     //       %cmp.frozen = freeze %cmp
53767be40dfSSotiris Apostolakis     //       br i1 %cmp.frozen, label %select.true, label %select.false
53867be40dfSSotiris Apostolakis     //    select.true:
53967be40dfSSotiris Apostolakis     //       br label %select.end
54097c3ef5cSSotiris Apostolakis     //    select.false:
54197c3ef5cSSotiris Apostolakis     //       br label %select.end
54297c3ef5cSSotiris Apostolakis     //    select.end:
54367be40dfSSotiris Apostolakis     //       %sel = phi i32 [ %c, %select.true ], [ %d, %select.false ]
54497c3ef5cSSotiris Apostolakis     //
54597c3ef5cSSotiris Apostolakis     // %cmp should be frozen, otherwise it may introduce undefined behavior.
54667be40dfSSotiris Apostolakis     // In addition, we may sink instructions that produce %c or %d into the
54767be40dfSSotiris Apostolakis     // destination(s) of the new branch.
54867be40dfSSotiris Apostolakis     // If the true or false blocks do not contain a sunken instruction, that
54967be40dfSSotiris Apostolakis     // block and its branch may be optimized away. In that case, one side of the
55067be40dfSSotiris Apostolakis     // first branch will point directly to select.end, and the corresponding PHI
55167be40dfSSotiris Apostolakis     // predecessor block will be the start block.
55267be40dfSSotiris Apostolakis 
55367be40dfSSotiris Apostolakis     // Find all the instructions that can be soundly sunk to the true/false
55467be40dfSSotiris Apostolakis     // blocks. These are instructions that are computed solely for producing the
55567be40dfSSotiris Apostolakis     // operands of the select instructions in the group and can be sunk without
55667be40dfSSotiris Apostolakis     // breaking the semantics of the LLVM IR (e.g., cannot sink instructions
55767be40dfSSotiris Apostolakis     // with side effects).
55867be40dfSSotiris Apostolakis     SmallVector<std::stack<Instruction *>, 2> TrueSlices, FalseSlices;
55967be40dfSSotiris Apostolakis     typedef std::stack<Instruction *>::size_type StackSizeType;
56067be40dfSSotiris Apostolakis     StackSizeType maxTrueSliceLen = 0, maxFalseSliceLen = 0;
561e874c8fcSIgor Kirillov     for (SelectLike &SI : ASI.Selects) {
562e874c8fcSIgor Kirillov       if (!isa<SelectInst>(SI.getI()))
563e874c8fcSIgor Kirillov         continue;
56467be40dfSSotiris Apostolakis       // For each select, compute the sinkable dependence chains of the true and
56567be40dfSSotiris Apostolakis       // false operands.
566a2d68b4bSDavid Green       if (auto *TI = dyn_cast_or_null<Instruction>(SI.getTrueValue())) {
56767be40dfSSotiris Apostolakis         std::stack<Instruction *> TrueSlice;
568a2d68b4bSDavid Green         getExclBackwardsSlice(TI, TrueSlice, SI.getI(), true);
56967be40dfSSotiris Apostolakis         maxTrueSliceLen = std::max(maxTrueSliceLen, TrueSlice.size());
57067be40dfSSotiris Apostolakis         TrueSlices.push_back(TrueSlice);
57167be40dfSSotiris Apostolakis       }
572a2d68b4bSDavid Green       if (auto *FI = dyn_cast_or_null<Instruction>(SI.getFalseValue())) {
573a2d68b4bSDavid Green         if (isa<SelectInst>(SI.getI()) || !FI->hasOneUse()) {
57467be40dfSSotiris Apostolakis           std::stack<Instruction *> FalseSlice;
575a2d68b4bSDavid Green           getExclBackwardsSlice(FI, FalseSlice, SI.getI(), true);
57667be40dfSSotiris Apostolakis           maxFalseSliceLen = std::max(maxFalseSliceLen, FalseSlice.size());
57767be40dfSSotiris Apostolakis           FalseSlices.push_back(FalseSlice);
57867be40dfSSotiris Apostolakis         }
57967be40dfSSotiris Apostolakis       }
580a2d68b4bSDavid Green     }
58167be40dfSSotiris Apostolakis     // In the case of multiple select instructions in the same group, the order
58267be40dfSSotiris Apostolakis     // of non-dependent instructions (instructions of different dependence
58367be40dfSSotiris Apostolakis     // slices) in the true/false blocks appears to affect performance.
58467be40dfSSotiris Apostolakis     // Interleaving the slices seems to experimentally be the optimal approach.
58567be40dfSSotiris Apostolakis     // This interleaving scheduling allows for more ILP (with a natural downside
58667be40dfSSotiris Apostolakis     // of increasing a bit register pressure) compared to a simple ordering of
58767be40dfSSotiris Apostolakis     // one whole chain after another. One would expect that this ordering would
58867be40dfSSotiris Apostolakis     // not matter since the scheduling in the backend of the compiler  would
58967be40dfSSotiris Apostolakis     // take care of it, but apparently the scheduler fails to deliver optimal
59067be40dfSSotiris Apostolakis     // ILP with a naive ordering here.
59167be40dfSSotiris Apostolakis     SmallVector<Instruction *, 2> TrueSlicesInterleaved, FalseSlicesInterleaved;
59267be40dfSSotiris Apostolakis     for (StackSizeType IS = 0; IS < maxTrueSliceLen; ++IS) {
59367be40dfSSotiris Apostolakis       for (auto &S : TrueSlices) {
59467be40dfSSotiris Apostolakis         if (!S.empty()) {
59567be40dfSSotiris Apostolakis           TrueSlicesInterleaved.push_back(S.top());
59667be40dfSSotiris Apostolakis           S.pop();
59767be40dfSSotiris Apostolakis         }
59867be40dfSSotiris Apostolakis       }
59967be40dfSSotiris Apostolakis     }
60067be40dfSSotiris Apostolakis     for (StackSizeType IS = 0; IS < maxFalseSliceLen; ++IS) {
60167be40dfSSotiris Apostolakis       for (auto &S : FalseSlices) {
60267be40dfSSotiris Apostolakis         if (!S.empty()) {
60367be40dfSSotiris Apostolakis           FalseSlicesInterleaved.push_back(S.top());
60467be40dfSSotiris Apostolakis           S.pop();
60567be40dfSSotiris Apostolakis         }
60667be40dfSSotiris Apostolakis       }
60767be40dfSSotiris Apostolakis     }
60897c3ef5cSSotiris Apostolakis 
60997c3ef5cSSotiris Apostolakis     // We split the block containing the select(s) into two blocks.
610e874c8fcSIgor Kirillov     SelectLike &SI = ASI.Selects.front();
611e874c8fcSIgor Kirillov     SelectLike &LastSI = ASI.Selects.back();
612a2d68b4bSDavid Green     BasicBlock *StartBlock = SI.getI()->getParent();
613a2d68b4bSDavid Green     BasicBlock::iterator SplitPt = ++(BasicBlock::iterator(LastSI.getI()));
614a50bd0d7SOrlando Cazalet-Hyams     // With RemoveDIs turned off, SplitPt can be a dbg.* intrinsic. With
615a50bd0d7SOrlando Cazalet-Hyams     // RemoveDIs turned on, SplitPt would instead point to the next
616a50bd0d7SOrlando Cazalet-Hyams     // instruction. To match existing dbg.* intrinsic behaviour with RemoveDIs,
617ffd08c77SStephen Tozer     // tell splitBasicBlock that we want to include any DbgVariableRecords
618ffd08c77SStephen Tozer     // attached to SplitPt in the splice.
619a50bd0d7SOrlando Cazalet-Hyams     SplitPt.setHeadBit(true);
62097c3ef5cSSotiris Apostolakis     BasicBlock *EndBlock = StartBlock->splitBasicBlock(SplitPt, "select.end");
6215181156bSMatthias Braun     BFI->setBlockFreq(EndBlock, BFI->getBlockFreq(StartBlock));
62297c3ef5cSSotiris Apostolakis     // Delete the unconditional branch that was just created by the split.
62397c3ef5cSSotiris Apostolakis     StartBlock->getTerminator()->eraseFromParent();
62497c3ef5cSSotiris Apostolakis 
625e874c8fcSIgor Kirillov     // Move any debug/pseudo and auxiliary instructions that were in-between the
6260a62a99aSDavid Green     // select group to the newly-created end block.
6270a62a99aSDavid Green     SmallVector<Instruction *, 2> SinkInstrs;
628a2d68b4bSDavid Green     auto DIt = SI.getI()->getIterator();
629e874c8fcSIgor Kirillov     auto NIt = ASI.Selects.begin();
630a2d68b4bSDavid Green     while (&*DIt != LastSI.getI()) {
631e874c8fcSIgor Kirillov       if (NIt != ASI.Selects.end() && &*DIt == NIt->getI())
632e874c8fcSIgor Kirillov         ++NIt;
633e874c8fcSIgor Kirillov       else
6340a62a99aSDavid Green         SinkInstrs.push_back(&*DIt);
63597c3ef5cSSotiris Apostolakis       DIt++;
63697c3ef5cSSotiris Apostolakis     }
637e874c8fcSIgor Kirillov     auto InsertionPoint = EndBlock->getFirstInsertionPt();
6380a62a99aSDavid Green     for (auto *DI : SinkInstrs)
6398e702735SJeremy Morse       DI->moveBeforePreserving(InsertionPoint);
64097c3ef5cSSotiris Apostolakis 
641360da838SStephen Tozer     // Duplicate implementation for DbgRecords, the non-instruction debug-info
642360da838SStephen Tozer     // format. Helper lambda for moving DbgRecords to the end block.
643360da838SStephen Tozer     auto TransferDbgRecords = [&](Instruction &I) {
644360da838SStephen Tozer       for (auto &DbgRecord :
645360da838SStephen Tozer            llvm::make_early_inc_range(I.getDbgRecordRange())) {
646360da838SStephen Tozer         DbgRecord.removeFromParent();
647360da838SStephen Tozer         EndBlock->insertDbgRecordBefore(&DbgRecord,
648d7fb9eb8SJeremy Morse                                         EndBlock->getFirstInsertionPt());
649d7fb9eb8SJeremy Morse       }
650d7fb9eb8SJeremy Morse     };
651d7fb9eb8SJeremy Morse 
652d7fb9eb8SJeremy Morse     // Iterate over all instructions in between SI and LastSI, not including
653d7fb9eb8SJeremy Morse     // SI itself. These are all the variable assignments that happen "in the
654d7fb9eb8SJeremy Morse     // middle" of the select group.
655a2d68b4bSDavid Green     auto R = make_range(std::next(SI.getI()->getIterator()),
656a2d68b4bSDavid Green                         std::next(LastSI.getI()->getIterator()));
657360da838SStephen Tozer     llvm::for_each(R, TransferDbgRecords);
658d7fb9eb8SJeremy Morse 
65997c3ef5cSSotiris Apostolakis     // These are the new basic blocks for the conditional branch.
66067be40dfSSotiris Apostolakis     // At least one will become an actual new basic block.
66197c3ef5cSSotiris Apostolakis     BasicBlock *TrueBlock = nullptr, *FalseBlock = nullptr;
66267be40dfSSotiris Apostolakis     BranchInst *TrueBranch = nullptr, *FalseBranch = nullptr;
663e874c8fcSIgor Kirillov     // Checks if select-like instruction would materialise on the given branch
664e874c8fcSIgor Kirillov     auto HasSelectLike = [](SelectGroup &SG, bool IsTrue) {
665e874c8fcSIgor Kirillov       for (auto &SL : SG.Selects) {
666e874c8fcSIgor Kirillov         if ((IsTrue ? SL.getTrueValue() : SL.getFalseValue()) == nullptr)
667e874c8fcSIgor Kirillov           return true;
668e874c8fcSIgor Kirillov       }
669e874c8fcSIgor Kirillov       return false;
670e874c8fcSIgor Kirillov     };
671e874c8fcSIgor Kirillov     if (!TrueSlicesInterleaved.empty() || HasSelectLike(ASI, true)) {
672a2d68b4bSDavid Green       TrueBlock = BasicBlock::Create(EndBlock->getContext(), "select.true.sink",
67367be40dfSSotiris Apostolakis                                      EndBlock->getParent(), EndBlock);
67467be40dfSSotiris Apostolakis       TrueBranch = BranchInst::Create(EndBlock, TrueBlock);
675a2d68b4bSDavid Green       TrueBranch->setDebugLoc(LastSI.getI()->getDebugLoc());
67667be40dfSSotiris Apostolakis       for (Instruction *TrueInst : TrueSlicesInterleaved)
6778e702735SJeremy Morse         TrueInst->moveBefore(TrueBranch->getIterator());
67867be40dfSSotiris Apostolakis     }
679e874c8fcSIgor Kirillov     if (!FalseSlicesInterleaved.empty() || HasSelectLike(ASI, false)) {
680a2d68b4bSDavid Green       FalseBlock =
681a2d68b4bSDavid Green           BasicBlock::Create(EndBlock->getContext(), "select.false.sink",
68267be40dfSSotiris Apostolakis                              EndBlock->getParent(), EndBlock);
68367be40dfSSotiris Apostolakis       FalseBranch = BranchInst::Create(EndBlock, FalseBlock);
684a2d68b4bSDavid Green       FalseBranch->setDebugLoc(LastSI.getI()->getDebugLoc());
68567be40dfSSotiris Apostolakis       for (Instruction *FalseInst : FalseSlicesInterleaved)
6868e702735SJeremy Morse         FalseInst->moveBefore(FalseBranch->getIterator());
68767be40dfSSotiris Apostolakis     }
68867be40dfSSotiris Apostolakis     // If there was nothing to sink, then arbitrarily choose the 'false' side
68967be40dfSSotiris Apostolakis     // for a new input value to the PHI.
69067be40dfSSotiris Apostolakis     if (TrueBlock == FalseBlock) {
69167be40dfSSotiris Apostolakis       assert(TrueBlock == nullptr &&
69267be40dfSSotiris Apostolakis              "Unexpected basic block transform while optimizing select");
69397c3ef5cSSotiris Apostolakis 
694a2d68b4bSDavid Green       FalseBlock = BasicBlock::Create(StartBlock->getContext(), "select.false",
69597c3ef5cSSotiris Apostolakis                                       EndBlock->getParent(), EndBlock);
69697c3ef5cSSotiris Apostolakis       auto *FalseBranch = BranchInst::Create(EndBlock, FalseBlock);
697a2d68b4bSDavid Green       FalseBranch->setDebugLoc(SI.getI()->getDebugLoc());
69867be40dfSSotiris Apostolakis     }
69997c3ef5cSSotiris Apostolakis 
70097c3ef5cSSotiris Apostolakis     // Insert the real conditional branch based on the original condition.
70167be40dfSSotiris Apostolakis     // If we did not create a new block for one of the 'true' or 'false' paths
70267be40dfSSotiris Apostolakis     // of the condition, it means that side of the branch goes to the end block
70367be40dfSSotiris Apostolakis     // directly and the path originates from the start block from the point of
70467be40dfSSotiris Apostolakis     // view of the new PHI.
70597c3ef5cSSotiris Apostolakis     BasicBlock *TT, *FT;
70667be40dfSSotiris Apostolakis     if (TrueBlock == nullptr) {
70797c3ef5cSSotiris Apostolakis       TT = EndBlock;
70897c3ef5cSSotiris Apostolakis       FT = FalseBlock;
70967be40dfSSotiris Apostolakis       TrueBlock = StartBlock;
71067be40dfSSotiris Apostolakis     } else if (FalseBlock == nullptr) {
71167be40dfSSotiris Apostolakis       TT = TrueBlock;
71267be40dfSSotiris Apostolakis       FT = EndBlock;
71367be40dfSSotiris Apostolakis       FalseBlock = StartBlock;
71467be40dfSSotiris Apostolakis     } else {
71567be40dfSSotiris Apostolakis       TT = TrueBlock;
71667be40dfSSotiris Apostolakis       FT = FalseBlock;
71767be40dfSSotiris Apostolakis     }
718a2d68b4bSDavid Green     IRBuilder<> IB(SI.getI());
719e874c8fcSIgor Kirillov     auto *CondFr =
720e874c8fcSIgor Kirillov         IB.CreateFreeze(ASI.Condition, ASI.Condition->getName() + ".frozen");
72197c3ef5cSSotiris Apostolakis 
722e874c8fcSIgor Kirillov     SmallDenseMap<Instruction *, std::pair<Value *, Value *>, 2> INS;
723a2d68b4bSDavid Green 
72497c3ef5cSSotiris Apostolakis     // Use reverse iterator because later select may use the value of the
72597c3ef5cSSotiris Apostolakis     // earlier select, and we need to propagate value through earlier select
72697c3ef5cSSotiris Apostolakis     // to get the PHI operand.
727e874c8fcSIgor Kirillov     InsertionPoint = EndBlock->begin();
728e874c8fcSIgor Kirillov     for (SelectLike &SI : ASI.Selects) {
72997c3ef5cSSotiris Apostolakis       // The select itself is replaced with a PHI Node.
730a2d68b4bSDavid Green       PHINode *PN = PHINode::Create(SI.getType(), 2, "");
731e874c8fcSIgor Kirillov       PN->insertBefore(InsertionPoint);
732a2d68b4bSDavid Green       PN->takeName(SI.getI());
733e874c8fcSIgor Kirillov       // Current instruction might be a condition of some other group, so we
734e874c8fcSIgor Kirillov       // need to replace it there to avoid dangling pointer
735e874c8fcSIgor Kirillov       if (PN->getType()->isIntegerTy(1)) {
736e874c8fcSIgor Kirillov         for (auto &SG : ProfSIGroups) {
737e874c8fcSIgor Kirillov           if (SG.Condition == SI.getI())
738e874c8fcSIgor Kirillov             SG.Condition = PN;
739e874c8fcSIgor Kirillov         }
740e874c8fcSIgor Kirillov       }
7414a7a27cbSIgor Kirillov       SI.getI()->replaceAllUsesWith(PN);
742e874c8fcSIgor Kirillov       auto *TV = getTrueOrFalseValue(SI, true, INS, TrueBlock);
743e874c8fcSIgor Kirillov       auto *FV = getTrueOrFalseValue(SI, false, INS, FalseBlock);
744e874c8fcSIgor Kirillov       INS[PN] = {TV, FV};
745e874c8fcSIgor Kirillov       PN->addIncoming(TV, TrueBlock);
746e874c8fcSIgor Kirillov       PN->addIncoming(FV, FalseBlock);
747e874c8fcSIgor Kirillov       PN->setDebugLoc(SI.getI()->getDebugLoc());
74897c3ef5cSSotiris Apostolakis       ++NumSelectsConverted;
74997c3ef5cSSotiris Apostolakis     }
750a2d68b4bSDavid Green     IB.CreateCondBr(CondFr, TT, FT, SI.getI());
751a2d68b4bSDavid Green 
752a2d68b4bSDavid Green     // Remove the old select instructions, now that they are not longer used.
753e874c8fcSIgor Kirillov     for (SelectLike &SI : ASI.Selects)
754a2d68b4bSDavid Green       SI.getI()->eraseFromParent();
75597c3ef5cSSotiris Apostolakis   }
75697c3ef5cSSotiris Apostolakis }
75797c3ef5cSSotiris Apostolakis 
758ce08c7eeSpaperchalice void SelectOptimizeImpl::collectSelectGroups(BasicBlock &BB,
75997c3ef5cSSotiris Apostolakis                                              SelectGroups &SIGroups) {
760e874c8fcSIgor Kirillov   // Represents something that can be considered as select instruction.
761e874c8fcSIgor Kirillov   // Auxiliary instruction are instructions that depends on a condition and have
762e874c8fcSIgor Kirillov   // zero or some constant value on True/False branch, such as:
763e874c8fcSIgor Kirillov   // * ZExt(1bit)
764c1f5937eSFlorian Hahn   // * SExt(1bit)
765e874c8fcSIgor Kirillov   // * Not(1bit)
766e909c0ccSIgor Kirillov   // * A(L)Shr(Val), ValBitSize - 1, where there is a condition like `Val <= 0`
767e909c0ccSIgor Kirillov   // earlier in the BB. For conditions that check the sign of the Val compiler
768e909c0ccSIgor Kirillov   // may generate shifts instead of ZExt/SExt.
769e874c8fcSIgor Kirillov   struct SelectLikeInfo {
770e874c8fcSIgor Kirillov     Value *Cond;
771e874c8fcSIgor Kirillov     bool IsAuxiliary;
772e874c8fcSIgor Kirillov     bool IsInverted;
773e874c8fcSIgor Kirillov     unsigned ConditionIdx;
774e874c8fcSIgor Kirillov   };
775e874c8fcSIgor Kirillov 
776e874c8fcSIgor Kirillov   DenseMap<Value *, SelectLikeInfo> SelectInfo;
777e909c0ccSIgor Kirillov   // Keeps visited comparisons to help identify AShr/LShr variants of auxiliary
778e909c0ccSIgor Kirillov   // instructions.
779e909c0ccSIgor Kirillov   SmallSetVector<CmpInst *, 4> SeenCmp;
780e874c8fcSIgor Kirillov 
781e874c8fcSIgor Kirillov   // Check if the instruction is SelectLike or might be part of SelectLike
782e874c8fcSIgor Kirillov   // expression, put information into SelectInfo and return the iterator to the
783e874c8fcSIgor Kirillov   // inserted position.
784e909c0ccSIgor Kirillov   auto ProcessSelectInfo = [&SelectInfo, &SeenCmp](Instruction *I) {
785e909c0ccSIgor Kirillov     if (auto *Cmp = dyn_cast<CmpInst>(I)) {
786e909c0ccSIgor Kirillov       SeenCmp.insert(Cmp);
787e909c0ccSIgor Kirillov       return SelectInfo.end();
788e909c0ccSIgor Kirillov     }
789e909c0ccSIgor Kirillov 
790e874c8fcSIgor Kirillov     Value *Cond;
791c1f5937eSFlorian Hahn     if (match(I, m_OneUse(m_ZExtOrSExt(m_Value(Cond)))) &&
792e874c8fcSIgor Kirillov         Cond->getType()->isIntegerTy(1)) {
793e874c8fcSIgor Kirillov       bool Inverted = match(Cond, m_Not(m_Value(Cond)));
794e874c8fcSIgor Kirillov       return SelectInfo.insert({I, {Cond, true, Inverted, 0}}).first;
795e874c8fcSIgor Kirillov     }
796e874c8fcSIgor Kirillov 
797e874c8fcSIgor Kirillov     if (match(I, m_Not(m_Value(Cond)))) {
798e874c8fcSIgor Kirillov       return SelectInfo.insert({I, {Cond, true, true, 0}}).first;
799e874c8fcSIgor Kirillov     }
800e874c8fcSIgor Kirillov 
801e874c8fcSIgor Kirillov     // Select instruction are what we are usually looking for.
802e874c8fcSIgor Kirillov     if (match(I, m_Select(m_Value(Cond), m_Value(), m_Value()))) {
803e874c8fcSIgor Kirillov       bool Inverted = match(Cond, m_Not(m_Value(Cond)));
804e874c8fcSIgor Kirillov       return SelectInfo.insert({I, {Cond, false, Inverted, 0}}).first;
805e874c8fcSIgor Kirillov     }
806e909c0ccSIgor Kirillov     Value *Val;
807e909c0ccSIgor Kirillov     ConstantInt *Shift;
808e909c0ccSIgor Kirillov     if (match(I, m_Shr(m_Value(Val), m_ConstantInt(Shift))) &&
809e909c0ccSIgor Kirillov         I->getType()->getIntegerBitWidth() == Shift->getZExtValue() + 1) {
810e909c0ccSIgor Kirillov       for (auto *CmpI : SeenCmp) {
811e909c0ccSIgor Kirillov         auto Pred = CmpI->getPredicate();
812e909c0ccSIgor Kirillov         if (Val != CmpI->getOperand(0))
813e909c0ccSIgor Kirillov           continue;
814e909c0ccSIgor Kirillov         if ((Pred == CmpInst::ICMP_SGT &&
815e909c0ccSIgor Kirillov              match(CmpI->getOperand(1), m_ConstantInt<-1>())) ||
816e909c0ccSIgor Kirillov             (Pred == CmpInst::ICMP_SGE &&
817e909c0ccSIgor Kirillov              match(CmpI->getOperand(1), m_Zero())) ||
818e909c0ccSIgor Kirillov             (Pred == CmpInst::ICMP_SLT &&
819e909c0ccSIgor Kirillov              match(CmpI->getOperand(1), m_Zero())) ||
820e909c0ccSIgor Kirillov             (Pred == CmpInst::ICMP_SLE &&
821e909c0ccSIgor Kirillov              match(CmpI->getOperand(1), m_ConstantInt<-1>()))) {
822e909c0ccSIgor Kirillov           bool Inverted =
823e909c0ccSIgor Kirillov               Pred == CmpInst::ICMP_SGT || Pred == CmpInst::ICMP_SGE;
824e909c0ccSIgor Kirillov           return SelectInfo.insert({I, {CmpI, true, Inverted, 0}}).first;
825e909c0ccSIgor Kirillov         }
826e909c0ccSIgor Kirillov       }
827e909c0ccSIgor Kirillov       return SelectInfo.end();
828e909c0ccSIgor Kirillov     }
829e874c8fcSIgor Kirillov 
830e909c0ccSIgor Kirillov     // An BinOp(Aux(X), Y) can also be treated like a select, with condition X
831e874c8fcSIgor Kirillov     // and values Y|1 and Y.
832c1f5937eSFlorian Hahn     // `Aux` can be either `ZExt(1bit)`, `SExt(1bit)` or `XShr(Val), ValBitSize
833c1f5937eSFlorian Hahn     // - 1` `BinOp` can be Add, Sub, Or
8349a0f2515SFlorian Hahn     Value *X;
835c1f5937eSFlorian Hahn     auto MatchZExtOrSExtPattern =
836c1f5937eSFlorian Hahn         m_c_BinOp(m_Value(), m_OneUse(m_ZExtOrSExt(m_Value(X))));
837e909c0ccSIgor Kirillov     auto MatchShiftPattern =
838e909c0ccSIgor Kirillov         m_c_BinOp(m_Value(), m_OneUse(m_Shr(m_Value(X), m_ConstantInt(Shift))));
839e909c0ccSIgor Kirillov 
840e909c0ccSIgor Kirillov     // This check is unnecessary, but it prevents costly access to the
841e909c0ccSIgor Kirillov     // SelectInfo map.
842c1f5937eSFlorian Hahn     if ((match(I, MatchZExtOrSExtPattern) && X->getType()->isIntegerTy(1)) ||
843e909c0ccSIgor Kirillov         (match(I, MatchShiftPattern) &&
844e909c0ccSIgor Kirillov          X->getType()->getIntegerBitWidth() == Shift->getZExtValue() + 1)) {
845e909c0ccSIgor Kirillov       if (I->getOpcode() != Instruction::Add &&
846e909c0ccSIgor Kirillov           I->getOpcode() != Instruction::Sub &&
847e909c0ccSIgor Kirillov           I->getOpcode() != Instruction::Or)
8489a0f2515SFlorian Hahn         return SelectInfo.end();
849e909c0ccSIgor Kirillov 
850e909c0ccSIgor Kirillov       if (I->getOpcode() == Instruction::Or && I->getType()->isIntegerTy(1))
851e874c8fcSIgor Kirillov         return SelectInfo.end();
852e874c8fcSIgor Kirillov 
853444e53f6SIgor Kirillov       // Iterate through operands and find dependant on recognised sign
854444e53f6SIgor Kirillov       // extending auxiliary select-like instructions. The operand index does
855444e53f6SIgor Kirillov       // not matter for Add and Or. However, for Sub, we can only safely
856444e53f6SIgor Kirillov       // transform when the operand is second.
857e909c0ccSIgor Kirillov       unsigned Idx = I->getOpcode() == Instruction::Sub ? 1 : 0;
858444e53f6SIgor Kirillov       for (; Idx < 2; Idx++) {
859e909c0ccSIgor Kirillov         auto *Op = I->getOperand(Idx);
860e874c8fcSIgor Kirillov         auto It = SelectInfo.find(Op);
861e874c8fcSIgor Kirillov         if (It != SelectInfo.end() && It->second.IsAuxiliary) {
862e874c8fcSIgor Kirillov           Cond = It->second.Cond;
863e874c8fcSIgor Kirillov           bool Inverted = It->second.IsInverted;
864e874c8fcSIgor Kirillov           return SelectInfo.insert({I, {Cond, false, Inverted, Idx}}).first;
865e874c8fcSIgor Kirillov         }
866e874c8fcSIgor Kirillov       }
867e874c8fcSIgor Kirillov     }
868e874c8fcSIgor Kirillov     return SelectInfo.end();
869e874c8fcSIgor Kirillov   };
870e874c8fcSIgor Kirillov 
871e874c8fcSIgor Kirillov   bool AlreadyProcessed = false;
87297c3ef5cSSotiris Apostolakis   BasicBlock::iterator BBIt = BB.begin();
873e874c8fcSIgor Kirillov   DenseMap<Value *, SelectLikeInfo>::iterator It;
87497c3ef5cSSotiris Apostolakis   while (BBIt != BB.end()) {
87597c3ef5cSSotiris Apostolakis     Instruction *I = &*BBIt++;
876e874c8fcSIgor Kirillov     if (I->isDebugOrPseudoInst())
877e874c8fcSIgor Kirillov       continue;
878e874c8fcSIgor Kirillov 
879e874c8fcSIgor Kirillov     if (!AlreadyProcessed)
880e874c8fcSIgor Kirillov       It = ProcessSelectInfo(I);
881e874c8fcSIgor Kirillov     else
882e874c8fcSIgor Kirillov       AlreadyProcessed = false;
883e874c8fcSIgor Kirillov 
884e874c8fcSIgor Kirillov     if (It == SelectInfo.end() || It->second.IsAuxiliary)
885e874c8fcSIgor Kirillov       continue;
886e874c8fcSIgor Kirillov 
887a2d68b4bSDavid Green     if (!TTI->shouldTreatInstructionLikeSelect(I))
888ca78b560SDavid Green       continue;
889ca78b560SDavid Green 
890e874c8fcSIgor Kirillov     Value *Cond = It->second.Cond;
891e874c8fcSIgor Kirillov     // Vector conditions are not supported.
892e874c8fcSIgor Kirillov     if (!Cond->getType()->isIntegerTy(1))
893e874c8fcSIgor Kirillov       continue;
894e874c8fcSIgor Kirillov 
895e874c8fcSIgor Kirillov     SelectGroup SIGroup = {Cond, {}};
896e874c8fcSIgor Kirillov     SIGroup.Selects.emplace_back(I, It->second.IsInverted,
897e874c8fcSIgor Kirillov                                  It->second.ConditionIdx);
898e874c8fcSIgor Kirillov 
899e874c8fcSIgor Kirillov     // If the select type is not supported, no point optimizing it.
900e874c8fcSIgor Kirillov     // Instruction selection will take care of it.
901e874c8fcSIgor Kirillov     if (!isSelectKindSupported(SIGroup.Selects.front()))
902e874c8fcSIgor Kirillov       continue;
903e874c8fcSIgor Kirillov 
90497c3ef5cSSotiris Apostolakis     while (BBIt != BB.end()) {
90597c3ef5cSSotiris Apostolakis       Instruction *NI = &*BBIt;
90697c3ef5cSSotiris Apostolakis       // Debug/pseudo instructions should be skipped and not prevent the
90797c3ef5cSSotiris Apostolakis       // formation of a select group.
908a2d68b4bSDavid Green       if (NI->isDebugOrPseudoInst()) {
909a2d68b4bSDavid Green         ++BBIt;
910a2d68b4bSDavid Green         continue;
91197c3ef5cSSotiris Apostolakis       }
9120a62a99aSDavid Green 
913e874c8fcSIgor Kirillov       It = ProcessSelectInfo(NI);
914e874c8fcSIgor Kirillov       if (It == SelectInfo.end()) {
915e874c8fcSIgor Kirillov         AlreadyProcessed = true;
916e874c8fcSIgor Kirillov         break;
9170a62a99aSDavid Green       }
9180a62a99aSDavid Green 
919e874c8fcSIgor Kirillov       // Auxiliary with same condition
920e874c8fcSIgor Kirillov       auto [CurrCond, IsAux, IsRev, CondIdx] = It->second;
921e874c8fcSIgor Kirillov       if (Cond != CurrCond) {
922e874c8fcSIgor Kirillov         AlreadyProcessed = true;
923a2d68b4bSDavid Green         break;
924b5a11d37SIgor Kirillov       }
9254a7a27cbSIgor Kirillov 
926e874c8fcSIgor Kirillov       if (!IsAux)
927e874c8fcSIgor Kirillov         SIGroup.Selects.emplace_back(NI, IsRev, CondIdx);
928e874c8fcSIgor Kirillov       ++BBIt;
929e874c8fcSIgor Kirillov     }
9300a62a99aSDavid Green     LLVM_DEBUG({
931e874c8fcSIgor Kirillov       dbgs() << "New Select group (" << SIGroup.Selects.size() << ") with\n";
932e874c8fcSIgor Kirillov       for (auto &SI : SIGroup.Selects)
9330a62a99aSDavid Green         dbgs() << "  " << *SI.getI() << "\n";
9340a62a99aSDavid Green     });
9350a62a99aSDavid Green 
93697c3ef5cSSotiris Apostolakis     SIGroups.push_back(SIGroup);
93797c3ef5cSSotiris Apostolakis   }
93897c3ef5cSSotiris Apostolakis }
93997c3ef5cSSotiris Apostolakis 
940ce08c7eeSpaperchalice void SelectOptimizeImpl::findProfitableSIGroupsBase(
941ce08c7eeSpaperchalice     SelectGroups &SIGroups, SelectGroups &ProfSIGroups) {
9428b42bc56SSotiris Apostolakis   for (SelectGroup &ASI : SIGroups) {
9438b42bc56SSotiris Apostolakis     ++NumSelectOptAnalyzed;
9448b42bc56SSotiris Apostolakis     if (isConvertToBranchProfitableBase(ASI))
9458b42bc56SSotiris Apostolakis       ProfSIGroups.push_back(ASI);
9468b42bc56SSotiris Apostolakis   }
9478b42bc56SSotiris Apostolakis }
9488b42bc56SSotiris Apostolakis 
9497d098988SDavid Green static void EmitAndPrintRemark(OptimizationRemarkEmitter *ORE,
9507d098988SDavid Green                                DiagnosticInfoOptimizationBase &Rem) {
9517d098988SDavid Green   LLVM_DEBUG(dbgs() << Rem.getMsg() << "\n");
9527d098988SDavid Green   ORE->emit(Rem);
9537d098988SDavid Green }
9547d098988SDavid Green 
955ce08c7eeSpaperchalice void SelectOptimizeImpl::findProfitableSIGroupsInnerLoops(
956d7ebb746SSotiris Apostolakis     const Loop *L, SelectGroups &SIGroups, SelectGroups &ProfSIGroups) {
957d7ebb746SSotiris Apostolakis   NumSelectOptAnalyzed += SIGroups.size();
958d7ebb746SSotiris Apostolakis   // For each select group in an inner-most loop,
959d7ebb746SSotiris Apostolakis   // a branch is more preferable than a select/conditional-move if:
960d7ebb746SSotiris Apostolakis   // i) conversion to branches for all the select groups of the loop satisfies
961d7ebb746SSotiris Apostolakis   //    loop-level heuristics including reducing the loop's critical path by
962ce08c7eeSpaperchalice   //    some threshold (see SelectOptimizeImpl::checkLoopHeuristics); and
963d7ebb746SSotiris Apostolakis   // ii) the total cost of the select group is cheaper with a branch compared
964d7ebb746SSotiris Apostolakis   //     to its predicated version. The cost is in terms of latency and the cost
965d7ebb746SSotiris Apostolakis   //     of a select group is the cost of its most expensive select instruction
966d7ebb746SSotiris Apostolakis   //     (assuming infinite resources and thus fully leveraging available ILP).
967d7ebb746SSotiris Apostolakis 
968d7ebb746SSotiris Apostolakis   DenseMap<const Instruction *, CostInfo> InstCostMap;
969d7ebb746SSotiris Apostolakis   CostInfo LoopCost[2] = {{Scaled64::getZero(), Scaled64::getZero()},
970d7ebb746SSotiris Apostolakis                           {Scaled64::getZero(), Scaled64::getZero()}};
971d7ebb746SSotiris Apostolakis   if (!computeLoopCosts(L, SIGroups, InstCostMap, LoopCost) ||
972d7ebb746SSotiris Apostolakis       !checkLoopHeuristics(L, LoopCost)) {
973d7ebb746SSotiris Apostolakis     return;
974d7ebb746SSotiris Apostolakis   }
975d7ebb746SSotiris Apostolakis 
976d7ebb746SSotiris Apostolakis   for (SelectGroup &ASI : SIGroups) {
977d7ebb746SSotiris Apostolakis     // Assuming infinite resources, the cost of a group of instructions is the
978d7ebb746SSotiris Apostolakis     // cost of the most expensive instruction of the group.
979d7ebb746SSotiris Apostolakis     Scaled64 SelectCost = Scaled64::getZero(), BranchCost = Scaled64::getZero();
980e874c8fcSIgor Kirillov     for (SelectLike &SI : ASI.Selects) {
981a2d68b4bSDavid Green       SelectCost = std::max(SelectCost, InstCostMap[SI.getI()].PredCost);
982a2d68b4bSDavid Green       BranchCost = std::max(BranchCost, InstCostMap[SI.getI()].NonPredCost);
983d7ebb746SSotiris Apostolakis     }
984d7ebb746SSotiris Apostolakis     if (BranchCost < SelectCost) {
985e874c8fcSIgor Kirillov       OptimizationRemark OR(DEBUG_TYPE, "SelectOpti",
986e874c8fcSIgor Kirillov                             ASI.Selects.front().getI());
987d7ebb746SSotiris Apostolakis       OR << "Profitable to convert to branch (loop analysis). BranchCost="
988d7ebb746SSotiris Apostolakis          << BranchCost.toString() << ", SelectCost=" << SelectCost.toString()
989d7ebb746SSotiris Apostolakis          << ". ";
9907d098988SDavid Green       EmitAndPrintRemark(ORE, OR);
991d7ebb746SSotiris Apostolakis       ++NumSelectConvertedLoop;
992d7ebb746SSotiris Apostolakis       ProfSIGroups.push_back(ASI);
993d7ebb746SSotiris Apostolakis     } else {
994a2d68b4bSDavid Green       OptimizationRemarkMissed ORmiss(DEBUG_TYPE, "SelectOpti",
995e874c8fcSIgor Kirillov                                       ASI.Selects.front().getI());
996d7ebb746SSotiris Apostolakis       ORmiss << "Select is more profitable (loop analysis). BranchCost="
997d7ebb746SSotiris Apostolakis              << BranchCost.toString()
998d7ebb746SSotiris Apostolakis              << ", SelectCost=" << SelectCost.toString() << ". ";
9997d098988SDavid Green       EmitAndPrintRemark(ORE, ORmiss);
1000d7ebb746SSotiris Apostolakis     }
1001d7ebb746SSotiris Apostolakis   }
1002d7ebb746SSotiris Apostolakis }
1003d7ebb746SSotiris Apostolakis 
1004ce08c7eeSpaperchalice bool SelectOptimizeImpl::isConvertToBranchProfitableBase(
1005a2d68b4bSDavid Green     const SelectGroup &ASI) {
1006e874c8fcSIgor Kirillov   const SelectLike &SI = ASI.Selects.front();
1007995d21bcSwangpc   LLVM_DEBUG(dbgs() << "Analyzing select group containing " << *SI.getI()
1008a2d68b4bSDavid Green                     << "\n");
1009a2d68b4bSDavid Green   OptimizationRemark OR(DEBUG_TYPE, "SelectOpti", SI.getI());
1010a2d68b4bSDavid Green   OptimizationRemarkMissed ORmiss(DEBUG_TYPE, "SelectOpti", SI.getI());
10118b42bc56SSotiris Apostolakis 
10128b42bc56SSotiris Apostolakis   // Skip cold basic blocks. Better to optimize for size for cold blocks.
1013a2d68b4bSDavid Green   if (PSI->isColdBlock(SI.getI()->getParent(), BFI)) {
10148b42bc56SSotiris Apostolakis     ++NumSelectColdBB;
10158b42bc56SSotiris Apostolakis     ORmiss << "Not converted to branch because of cold basic block. ";
10167d098988SDavid Green     EmitAndPrintRemark(ORE, ORmiss);
10178b42bc56SSotiris Apostolakis     return false;
10188b42bc56SSotiris Apostolakis   }
10198b42bc56SSotiris Apostolakis 
10208b42bc56SSotiris Apostolakis   // If unpredictable, branch form is less profitable.
1021a2d68b4bSDavid Green   if (SI.getI()->getMetadata(LLVMContext::MD_unpredictable)) {
10228b42bc56SSotiris Apostolakis     ++NumSelectUnPred;
10238b42bc56SSotiris Apostolakis     ORmiss << "Not converted to branch because of unpredictable branch. ";
10247d098988SDavid Green     EmitAndPrintRemark(ORE, ORmiss);
10258b42bc56SSotiris Apostolakis     return false;
10268b42bc56SSotiris Apostolakis   }
10278b42bc56SSotiris Apostolakis 
10288b42bc56SSotiris Apostolakis   // If highly predictable, branch form is more profitable, unless a
10298b42bc56SSotiris Apostolakis   // predictable select is inexpensive in the target architecture.
10308b42bc56SSotiris Apostolakis   if (isSelectHighlyPredictable(SI) && TLI->isPredictableSelectExpensive()) {
10318b42bc56SSotiris Apostolakis     ++NumSelectConvertedHighPred;
10328b42bc56SSotiris Apostolakis     OR << "Converted to branch because of highly predictable branch. ";
10337d098988SDavid Green     EmitAndPrintRemark(ORE, OR);
10348b42bc56SSotiris Apostolakis     return true;
10358b42bc56SSotiris Apostolakis   }
10368b42bc56SSotiris Apostolakis 
10378b42bc56SSotiris Apostolakis   // Look for expensive instructions in the cold operand's (if any) dependence
10388b42bc56SSotiris Apostolakis   // slice of any of the selects in the group.
10398b42bc56SSotiris Apostolakis   if (hasExpensiveColdOperand(ASI)) {
10408b42bc56SSotiris Apostolakis     ++NumSelectConvertedExpColdOperand;
10418b42bc56SSotiris Apostolakis     OR << "Converted to branch because of expensive cold operand.";
10427d098988SDavid Green     EmitAndPrintRemark(ORE, OR);
10438b42bc56SSotiris Apostolakis     return true;
10448b42bc56SSotiris Apostolakis   }
10458b42bc56SSotiris Apostolakis 
10463469996dSIgor Kirillov   // If latch has a select group with several elements, it is usually profitable
10473469996dSIgor Kirillov   // to convert it to branches. We let `optimizeSelectsInnerLoops` decide if
10483469996dSIgor Kirillov   // conversion is profitable for innermost loops.
10493469996dSIgor Kirillov   auto *BB = SI.getI()->getParent();
10503469996dSIgor Kirillov   auto *L = LI->getLoopFor(BB);
10513469996dSIgor Kirillov   if (L && !L->isInnermost() && L->getLoopLatch() == BB &&
10523469996dSIgor Kirillov       ASI.Selects.size() >= 3) {
10533469996dSIgor Kirillov     OR << "Converted to branch because select group in the latch block is big.";
10543469996dSIgor Kirillov     EmitAndPrintRemark(ORE, OR);
10553469996dSIgor Kirillov     return true;
10563469996dSIgor Kirillov   }
10573469996dSIgor Kirillov 
10588b42bc56SSotiris Apostolakis   ORmiss << "Not profitable to convert to branch (base heuristic).";
10597d098988SDavid Green   EmitAndPrintRemark(ORE, ORmiss);
10608b42bc56SSotiris Apostolakis   return false;
10618b42bc56SSotiris Apostolakis }
10628b42bc56SSotiris Apostolakis 
10638b42bc56SSotiris Apostolakis static InstructionCost divideNearest(InstructionCost Numerator,
10648b42bc56SSotiris Apostolakis                                      uint64_t Denominator) {
10658b42bc56SSotiris Apostolakis   return (Numerator + (Denominator / 2)) / Denominator;
10668b42bc56SSotiris Apostolakis }
10678b42bc56SSotiris Apostolakis 
1068a2d68b4bSDavid Green static bool extractBranchWeights(const SelectOptimizeImpl::SelectLike SI,
1069a2d68b4bSDavid Green                                  uint64_t &TrueVal, uint64_t &FalseVal) {
1070a2d68b4bSDavid Green   if (isa<SelectInst>(SI.getI()))
1071a2d68b4bSDavid Green     return extractBranchWeights(*SI.getI(), TrueVal, FalseVal);
1072a2d68b4bSDavid Green   return false;
1073a2d68b4bSDavid Green }
1074a2d68b4bSDavid Green 
1075a2d68b4bSDavid Green bool SelectOptimizeImpl::hasExpensiveColdOperand(const SelectGroup &ASI) {
10768b42bc56SSotiris Apostolakis   bool ColdOperand = false;
10778b42bc56SSotiris Apostolakis   uint64_t TrueWeight, FalseWeight, TotalWeight;
1078e874c8fcSIgor Kirillov   if (extractBranchWeights(ASI.Selects.front(), TrueWeight, FalseWeight)) {
10798b42bc56SSotiris Apostolakis     uint64_t MinWeight = std::min(TrueWeight, FalseWeight);
10808b42bc56SSotiris Apostolakis     TotalWeight = TrueWeight + FalseWeight;
10818b42bc56SSotiris Apostolakis     // Is there a path with frequency <ColdOperandThreshold% (default:20%) ?
10828b42bc56SSotiris Apostolakis     ColdOperand = TotalWeight * ColdOperandThreshold > 100 * MinWeight;
10838b42bc56SSotiris Apostolakis   } else if (PSI->hasProfileSummary()) {
1084a2d68b4bSDavid Green     OptimizationRemarkMissed ORmiss(DEBUG_TYPE, "SelectOpti",
1085e874c8fcSIgor Kirillov                                     ASI.Selects.front().getI());
10868b42bc56SSotiris Apostolakis     ORmiss << "Profile data available but missing branch-weights metadata for "
10878b42bc56SSotiris Apostolakis               "select instruction. ";
10887d098988SDavid Green     EmitAndPrintRemark(ORE, ORmiss);
10898b42bc56SSotiris Apostolakis   }
10908b42bc56SSotiris Apostolakis   if (!ColdOperand)
10918b42bc56SSotiris Apostolakis     return false;
10928b42bc56SSotiris Apostolakis   // Check if the cold path's dependence slice is expensive for any of the
10938b42bc56SSotiris Apostolakis   // selects of the group.
1094e874c8fcSIgor Kirillov   for (SelectLike SI : ASI.Selects) {
10958b42bc56SSotiris Apostolakis     Instruction *ColdI = nullptr;
10968b42bc56SSotiris Apostolakis     uint64_t HotWeight;
10978b42bc56SSotiris Apostolakis     if (TrueWeight < FalseWeight) {
1098a2d68b4bSDavid Green       ColdI = dyn_cast_or_null<Instruction>(SI.getTrueValue());
10998b42bc56SSotiris Apostolakis       HotWeight = FalseWeight;
11008b42bc56SSotiris Apostolakis     } else {
1101a2d68b4bSDavid Green       ColdI = dyn_cast_or_null<Instruction>(SI.getFalseValue());
11028b42bc56SSotiris Apostolakis       HotWeight = TrueWeight;
11038b42bc56SSotiris Apostolakis     }
11048b42bc56SSotiris Apostolakis     if (ColdI) {
110567be40dfSSotiris Apostolakis       std::stack<Instruction *> ColdSlice;
1106a2d68b4bSDavid Green       getExclBackwardsSlice(ColdI, ColdSlice, SI.getI());
11078b42bc56SSotiris Apostolakis       InstructionCost SliceCost = 0;
110867be40dfSSotiris Apostolakis       while (!ColdSlice.empty()) {
110967be40dfSSotiris Apostolakis         SliceCost += TTI->getInstructionCost(ColdSlice.top(),
111067be40dfSSotiris Apostolakis                                              TargetTransformInfo::TCK_Latency);
111167be40dfSSotiris Apostolakis         ColdSlice.pop();
11128b42bc56SSotiris Apostolakis       }
11138b42bc56SSotiris Apostolakis       // The colder the cold value operand of the select is the more expensive
11148b42bc56SSotiris Apostolakis       // the cmov becomes for computing the cold value operand every time. Thus,
11158b42bc56SSotiris Apostolakis       // the colder the cold operand is the more its cost counts.
11168b42bc56SSotiris Apostolakis       // Get nearest integer cost adjusted for coldness.
11178b42bc56SSotiris Apostolakis       InstructionCost AdjSliceCost =
11188b42bc56SSotiris Apostolakis           divideNearest(SliceCost * HotWeight, TotalWeight);
11198b42bc56SSotiris Apostolakis       if (AdjSliceCost >=
11208b42bc56SSotiris Apostolakis           ColdOperandMaxCostMultiplier * TargetTransformInfo::TCC_Expensive)
11218b42bc56SSotiris Apostolakis         return true;
11228b42bc56SSotiris Apostolakis     }
11238b42bc56SSotiris Apostolakis   }
11248b42bc56SSotiris Apostolakis   return false;
11258b42bc56SSotiris Apostolakis }
11268b42bc56SSotiris Apostolakis 
1127b827e7c6SSotiris Apostolakis // Check if it is safe to move LoadI next to the SI.
1128b827e7c6SSotiris Apostolakis // Conservatively assume it is safe only if there is no instruction
1129b827e7c6SSotiris Apostolakis // modifying memory in-between the load and the select instruction.
1130b827e7c6SSotiris Apostolakis static bool isSafeToSinkLoad(Instruction *LoadI, Instruction *SI) {
1131b827e7c6SSotiris Apostolakis   // Assume loads from different basic blocks are unsafe to move.
1132b827e7c6SSotiris Apostolakis   if (LoadI->getParent() != SI->getParent())
1133b827e7c6SSotiris Apostolakis     return false;
1134b827e7c6SSotiris Apostolakis   auto It = LoadI->getIterator();
1135b827e7c6SSotiris Apostolakis   while (&*It != SI) {
1136b827e7c6SSotiris Apostolakis     if (It->mayWriteToMemory())
1137b827e7c6SSotiris Apostolakis       return false;
1138b827e7c6SSotiris Apostolakis     It++;
1139b827e7c6SSotiris Apostolakis   }
1140b827e7c6SSotiris Apostolakis   return true;
1141b827e7c6SSotiris Apostolakis }
1142b827e7c6SSotiris Apostolakis 
11438b42bc56SSotiris Apostolakis // For a given source instruction, collect its backwards dependence slice
11448b42bc56SSotiris Apostolakis // consisting of instructions exclusively computed for the purpose of producing
11458b42bc56SSotiris Apostolakis // the operands of the source instruction. As an approximation
11468b42bc56SSotiris Apostolakis // (sufficiently-accurate in practice), we populate this set with the
11478b42bc56SSotiris Apostolakis // instructions of the backwards dependence slice that only have one-use and
11488b42bc56SSotiris Apostolakis // form an one-use chain that leads to the source instruction.
1149ce08c7eeSpaperchalice void SelectOptimizeImpl::getExclBackwardsSlice(Instruction *I,
115067be40dfSSotiris Apostolakis                                                std::stack<Instruction *> &Slice,
1151ce08c7eeSpaperchalice                                                Instruction *SI,
1152ce08c7eeSpaperchalice                                                bool ForSinking) {
11538b42bc56SSotiris Apostolakis   SmallPtrSet<Instruction *, 2> Visited;
11548b42bc56SSotiris Apostolakis   std::queue<Instruction *> Worklist;
11558b42bc56SSotiris Apostolakis   Worklist.push(I);
11568b42bc56SSotiris Apostolakis   while (!Worklist.empty()) {
11578b42bc56SSotiris Apostolakis     Instruction *II = Worklist.front();
11588b42bc56SSotiris Apostolakis     Worklist.pop();
11598b42bc56SSotiris Apostolakis 
11608b42bc56SSotiris Apostolakis     // Avoid cycles.
1161b254d671SKazu Hirata     if (!Visited.insert(II).second)
11628b42bc56SSotiris Apostolakis       continue;
11638b42bc56SSotiris Apostolakis 
11648b42bc56SSotiris Apostolakis     if (!II->hasOneUse())
11658b42bc56SSotiris Apostolakis       continue;
11668b42bc56SSotiris Apostolakis 
116767be40dfSSotiris Apostolakis     // Cannot soundly sink instructions with side-effects.
116867be40dfSSotiris Apostolakis     // Terminator or phi instructions cannot be sunk.
116967be40dfSSotiris Apostolakis     // Avoid sinking other select instructions (should be handled separetely).
117067be40dfSSotiris Apostolakis     if (ForSinking && (II->isTerminator() || II->mayHaveSideEffects() ||
117167be40dfSSotiris Apostolakis                        isa<SelectInst>(II) || isa<PHINode>(II)))
117267be40dfSSotiris Apostolakis       continue;
117367be40dfSSotiris Apostolakis 
1174b827e7c6SSotiris Apostolakis     // Avoid sinking loads in order not to skip state-modifying instructions,
1175b827e7c6SSotiris Apostolakis     // that may alias with the loaded address.
1176b827e7c6SSotiris Apostolakis     // Only allow sinking of loads within the same basic block that are
1177b827e7c6SSotiris Apostolakis     // conservatively proven to be safe.
1178b827e7c6SSotiris Apostolakis     if (ForSinking && II->mayReadFromMemory() && !isSafeToSinkLoad(II, SI))
1179b827e7c6SSotiris Apostolakis       continue;
1180b827e7c6SSotiris Apostolakis 
11818b42bc56SSotiris Apostolakis     // Avoid considering instructions with less frequency than the source
11828b42bc56SSotiris Apostolakis     // instruction (i.e., avoid colder code regions of the dependence slice).
11838b42bc56SSotiris Apostolakis     if (BFI->getBlockFreq(II->getParent()) < BFI->getBlockFreq(I->getParent()))
11848b42bc56SSotiris Apostolakis       continue;
11858b42bc56SSotiris Apostolakis 
11868b42bc56SSotiris Apostolakis     // Eligible one-use instruction added to the dependence slice.
118767be40dfSSotiris Apostolakis     Slice.push(II);
11888b42bc56SSotiris Apostolakis 
11898b42bc56SSotiris Apostolakis     // Explore all the operands of the current instruction to expand the slice.
119066cd2e0fSKazu Hirata     for (Value *Op : II->operand_values())
119166cd2e0fSKazu Hirata       if (auto *OpI = dyn_cast<Instruction>(Op))
11928b42bc56SSotiris Apostolakis         Worklist.push(OpI);
11938b42bc56SSotiris Apostolakis   }
11948b42bc56SSotiris Apostolakis }
11958b42bc56SSotiris Apostolakis 
1196a2d68b4bSDavid Green bool SelectOptimizeImpl::isSelectHighlyPredictable(const SelectLike SI) {
11978b42bc56SSotiris Apostolakis   uint64_t TrueWeight, FalseWeight;
1198a2d68b4bSDavid Green   if (extractBranchWeights(SI, TrueWeight, FalseWeight)) {
11998b42bc56SSotiris Apostolakis     uint64_t Max = std::max(TrueWeight, FalseWeight);
12008b42bc56SSotiris Apostolakis     uint64_t Sum = TrueWeight + FalseWeight;
12018b42bc56SSotiris Apostolakis     if (Sum != 0) {
12028b42bc56SSotiris Apostolakis       auto Probability = BranchProbability::getBranchProbability(Max, Sum);
12038b42bc56SSotiris Apostolakis       if (Probability > TTI->getPredictableBranchThreshold())
12048b42bc56SSotiris Apostolakis         return true;
12058b42bc56SSotiris Apostolakis     }
12068b42bc56SSotiris Apostolakis   }
12078b42bc56SSotiris Apostolakis   return false;
12088b42bc56SSotiris Apostolakis }
12098b42bc56SSotiris Apostolakis 
1210ce08c7eeSpaperchalice bool SelectOptimizeImpl::checkLoopHeuristics(const Loop *L,
1211d7ebb746SSotiris Apostolakis                                              const CostInfo LoopCost[2]) {
1212d7ebb746SSotiris Apostolakis   // Loop-level checks to determine if a non-predicated version (with branches)
1213d7ebb746SSotiris Apostolakis   // of the loop is more profitable than its predicated version.
1214d7ebb746SSotiris Apostolakis 
1215d7ebb746SSotiris Apostolakis   if (DisableLoopLevelHeuristics)
1216d7ebb746SSotiris Apostolakis     return true;
1217d7ebb746SSotiris Apostolakis 
1218d7ebb746SSotiris Apostolakis   OptimizationRemarkMissed ORmissL(DEBUG_TYPE, "SelectOpti",
12196292a808SJeremy Morse                                    &*L->getHeader()->getFirstNonPHIIt());
1220d7ebb746SSotiris Apostolakis 
1221d7ebb746SSotiris Apostolakis   if (LoopCost[0].NonPredCost > LoopCost[0].PredCost ||
1222d7ebb746SSotiris Apostolakis       LoopCost[1].NonPredCost >= LoopCost[1].PredCost) {
1223d7ebb746SSotiris Apostolakis     ORmissL << "No select conversion in the loop due to no reduction of loop's "
1224d7ebb746SSotiris Apostolakis                "critical path. ";
12257d098988SDavid Green     EmitAndPrintRemark(ORE, ORmissL);
1226d7ebb746SSotiris Apostolakis     return false;
1227d7ebb746SSotiris Apostolakis   }
1228d7ebb746SSotiris Apostolakis 
1229d7ebb746SSotiris Apostolakis   Scaled64 Gain[2] = {LoopCost[0].PredCost - LoopCost[0].NonPredCost,
1230d7ebb746SSotiris Apostolakis                       LoopCost[1].PredCost - LoopCost[1].NonPredCost};
1231d7ebb746SSotiris Apostolakis 
1232d7ebb746SSotiris Apostolakis   // Profitably converting to branches need to reduce the loop's critical path
1233d7ebb746SSotiris Apostolakis   // by at least some threshold (absolute gain of GainCycleThreshold cycles and
1234d7ebb746SSotiris Apostolakis   // relative gain of 12.5%).
1235d7ebb746SSotiris Apostolakis   if (Gain[1] < Scaled64::get(GainCycleThreshold) ||
1236d7ebb746SSotiris Apostolakis       Gain[1] * Scaled64::get(GainRelativeThreshold) < LoopCost[1].PredCost) {
1237d7ebb746SSotiris Apostolakis     Scaled64 RelativeGain = Scaled64::get(100) * Gain[1] / LoopCost[1].PredCost;
1238d7ebb746SSotiris Apostolakis     ORmissL << "No select conversion in the loop due to small reduction of "
1239d7ebb746SSotiris Apostolakis                "loop's critical path. Gain="
1240d7ebb746SSotiris Apostolakis             << Gain[1].toString()
1241d7ebb746SSotiris Apostolakis             << ", RelativeGain=" << RelativeGain.toString() << "%. ";
12427d098988SDavid Green     EmitAndPrintRemark(ORE, ORmissL);
1243d7ebb746SSotiris Apostolakis     return false;
1244d7ebb746SSotiris Apostolakis   }
1245d7ebb746SSotiris Apostolakis 
1246d7ebb746SSotiris Apostolakis   // If the loop's critical path involves loop-carried dependences, the gradient
1247d7ebb746SSotiris Apostolakis   // of the gain needs to be at least GainGradientThreshold% (defaults to 25%).
1248d7ebb746SSotiris Apostolakis   // This check ensures that the latency reduction for the loop's critical path
1249d7ebb746SSotiris Apostolakis   // keeps decreasing with sufficient rate beyond the two analyzed loop
1250d7ebb746SSotiris Apostolakis   // iterations.
1251d7ebb746SSotiris Apostolakis   if (Gain[1] > Gain[0]) {
1252d7ebb746SSotiris Apostolakis     Scaled64 GradientGain = Scaled64::get(100) * (Gain[1] - Gain[0]) /
1253d7ebb746SSotiris Apostolakis                             (LoopCost[1].PredCost - LoopCost[0].PredCost);
1254d7ebb746SSotiris Apostolakis     if (GradientGain < Scaled64::get(GainGradientThreshold)) {
1255d7ebb746SSotiris Apostolakis       ORmissL << "No select conversion in the loop due to small gradient gain. "
1256d7ebb746SSotiris Apostolakis                  "GradientGain="
1257d7ebb746SSotiris Apostolakis               << GradientGain.toString() << "%. ";
12587d098988SDavid Green       EmitAndPrintRemark(ORE, ORmissL);
1259d7ebb746SSotiris Apostolakis       return false;
1260d7ebb746SSotiris Apostolakis     }
1261d7ebb746SSotiris Apostolakis   }
1262d7ebb746SSotiris Apostolakis   // If the gain decreases it is not profitable to convert.
1263d7ebb746SSotiris Apostolakis   else if (Gain[1] < Gain[0]) {
1264d7ebb746SSotiris Apostolakis     ORmissL
1265d7ebb746SSotiris Apostolakis         << "No select conversion in the loop due to negative gradient gain. ";
12667d098988SDavid Green     EmitAndPrintRemark(ORE, ORmissL);
1267d7ebb746SSotiris Apostolakis     return false;
1268d7ebb746SSotiris Apostolakis   }
1269d7ebb746SSotiris Apostolakis 
1270d7ebb746SSotiris Apostolakis   // Non-predicated version of the loop is more profitable than its
1271d7ebb746SSotiris Apostolakis   // predicated version.
1272d7ebb746SSotiris Apostolakis   return true;
1273d7ebb746SSotiris Apostolakis }
1274d7ebb746SSotiris Apostolakis 
1275d7ebb746SSotiris Apostolakis // Computes instruction and loop-critical-path costs for both the predicated
1276d7ebb746SSotiris Apostolakis // and non-predicated version of the given loop.
1277d7ebb746SSotiris Apostolakis // Returns false if unable to compute these costs due to invalid cost of loop
1278d7ebb746SSotiris Apostolakis // instruction(s).
1279ce08c7eeSpaperchalice bool SelectOptimizeImpl::computeLoopCosts(
1280d7ebb746SSotiris Apostolakis     const Loop *L, const SelectGroups &SIGroups,
1281d7ebb746SSotiris Apostolakis     DenseMap<const Instruction *, CostInfo> &InstCostMap, CostInfo *LoopCost) {
12827d098988SDavid Green   LLVM_DEBUG(dbgs() << "Calculating Latency / IPredCost / INonPredCost of loop "
12837d098988SDavid Green                     << L->getHeader()->getName() << "\n");
1284e874c8fcSIgor Kirillov   const auto SImap = getSImap(SIGroups);
1285e874c8fcSIgor Kirillov   const auto SGmap = getSGmap(SIGroups);
1286d7ebb746SSotiris Apostolakis   // Compute instruction and loop-critical-path costs across two iterations for
1287d7ebb746SSotiris Apostolakis   // both predicated and non-predicated version.
1288d7ebb746SSotiris Apostolakis   const unsigned Iterations = 2;
1289d7ebb746SSotiris Apostolakis   for (unsigned Iter = 0; Iter < Iterations; ++Iter) {
1290d7ebb746SSotiris Apostolakis     // Cost of the loop's critical path.
1291d7ebb746SSotiris Apostolakis     CostInfo &MaxCost = LoopCost[Iter];
1292d7ebb746SSotiris Apostolakis     for (BasicBlock *BB : L->getBlocks()) {
1293d7ebb746SSotiris Apostolakis       for (const Instruction &I : *BB) {
1294d7ebb746SSotiris Apostolakis         if (I.isDebugOrPseudoInst())
1295d7ebb746SSotiris Apostolakis           continue;
1296d7ebb746SSotiris Apostolakis         // Compute the predicated and non-predicated cost of the instruction.
1297d7ebb746SSotiris Apostolakis         Scaled64 IPredCost = Scaled64::getZero(),
1298d7ebb746SSotiris Apostolakis                  INonPredCost = Scaled64::getZero();
1299d7ebb746SSotiris Apostolakis 
1300d7ebb746SSotiris Apostolakis         // Assume infinite resources that allow to fully exploit the available
1301d7ebb746SSotiris Apostolakis         // instruction-level parallelism.
1302d7ebb746SSotiris Apostolakis         // InstCost = InstLatency + max(Op1Cost, Op2Cost, … OpNCost)
1303d7ebb746SSotiris Apostolakis         for (const Use &U : I.operands()) {
1304d7ebb746SSotiris Apostolakis           auto UI = dyn_cast<Instruction>(U.get());
1305d7ebb746SSotiris Apostolakis           if (!UI)
1306d7ebb746SSotiris Apostolakis             continue;
1307*1d5ce614SKazu Hirata           if (auto It = InstCostMap.find(UI); It != InstCostMap.end()) {
1308*1d5ce614SKazu Hirata             IPredCost = std::max(IPredCost, It->second.PredCost);
1309*1d5ce614SKazu Hirata             INonPredCost = std::max(INonPredCost, It->second.NonPredCost);
1310d7ebb746SSotiris Apostolakis           }
1311d7ebb746SSotiris Apostolakis         }
1312d7ebb746SSotiris Apostolakis         auto ILatency = computeInstCost(&I);
1313e0e687a6SKazu Hirata         if (!ILatency) {
1314d7ebb746SSotiris Apostolakis           OptimizationRemarkMissed ORmissL(DEBUG_TYPE, "SelectOpti", &I);
1315d7ebb746SSotiris Apostolakis           ORmissL << "Invalid instruction cost preventing analysis and "
1316d7ebb746SSotiris Apostolakis                      "optimization of the inner-most loop containing this "
1317d7ebb746SSotiris Apostolakis                      "instruction. ";
13187d098988SDavid Green           EmitAndPrintRemark(ORE, ORmissL);
1319d7ebb746SSotiris Apostolakis           return false;
1320d7ebb746SSotiris Apostolakis         }
132151b68573SFangrui Song         IPredCost += Scaled64::get(*ILatency);
132251b68573SFangrui Song         INonPredCost += Scaled64::get(*ILatency);
1323d7ebb746SSotiris Apostolakis 
1324d7ebb746SSotiris Apostolakis         // For a select that can be converted to branch,
1325d7ebb746SSotiris Apostolakis         // compute its cost as a branch (non-predicated cost).
1326d7ebb746SSotiris Apostolakis         //
1327d7ebb746SSotiris Apostolakis         // BranchCost = PredictedPathCost + MispredictCost
1328d7ebb746SSotiris Apostolakis         // PredictedPathCost = TrueOpCost * TrueProb + FalseOpCost * FalseProb
1329d7ebb746SSotiris Apostolakis         // MispredictCost = max(MispredictPenalty, CondCost) * MispredictRate
1330a2d68b4bSDavid Green         if (SImap.contains(&I)) {
1331a2d68b4bSDavid Green           auto SI = SImap.at(&I);
1332e874c8fcSIgor Kirillov           const auto *SG = SGmap.at(&I);
1333e874c8fcSIgor Kirillov           Scaled64 TrueOpCost = SI.getOpCostOnBranch(true, InstCostMap, TTI);
1334e874c8fcSIgor Kirillov           Scaled64 FalseOpCost = SI.getOpCostOnBranch(false, InstCostMap, TTI);
1335d7ebb746SSotiris Apostolakis           Scaled64 PredictedPathCost =
1336d7ebb746SSotiris Apostolakis               getPredictedPathCost(TrueOpCost, FalseOpCost, SI);
1337d7ebb746SSotiris Apostolakis 
1338d7ebb746SSotiris Apostolakis           Scaled64 CondCost = Scaled64::getZero();
1339e874c8fcSIgor Kirillov           if (auto *CI = dyn_cast<Instruction>(SG->Condition))
1340*1d5ce614SKazu Hirata             if (auto It = InstCostMap.find(CI); It != InstCostMap.end())
1341*1d5ce614SKazu Hirata               CondCost = It->second.NonPredCost;
1342d7ebb746SSotiris Apostolakis           Scaled64 MispredictCost = getMispredictionCost(SI, CondCost);
1343d7ebb746SSotiris Apostolakis 
1344d7ebb746SSotiris Apostolakis           INonPredCost = PredictedPathCost + MispredictCost;
1345d7ebb746SSotiris Apostolakis         }
13467d098988SDavid Green         LLVM_DEBUG(dbgs() << " " << ILatency << "/" << IPredCost << "/"
13477d098988SDavid Green                           << INonPredCost << " for " << I << "\n");
1348d7ebb746SSotiris Apostolakis 
1349d7ebb746SSotiris Apostolakis         InstCostMap[&I] = {IPredCost, INonPredCost};
1350d7ebb746SSotiris Apostolakis         MaxCost.PredCost = std::max(MaxCost.PredCost, IPredCost);
1351d7ebb746SSotiris Apostolakis         MaxCost.NonPredCost = std::max(MaxCost.NonPredCost, INonPredCost);
1352d7ebb746SSotiris Apostolakis       }
1353d7ebb746SSotiris Apostolakis     }
13547d098988SDavid Green     LLVM_DEBUG(dbgs() << "Iteration " << Iter + 1
13557d098988SDavid Green                       << " MaxCost = " << MaxCost.PredCost << " "
13567d098988SDavid Green                       << MaxCost.NonPredCost << "\n");
1357d7ebb746SSotiris Apostolakis   }
1358d7ebb746SSotiris Apostolakis   return true;
1359d7ebb746SSotiris Apostolakis }
1360d7ebb746SSotiris Apostolakis 
1361a2d68b4bSDavid Green SmallDenseMap<const Instruction *, SelectOptimizeImpl::SelectLike, 2>
1362a2d68b4bSDavid Green SelectOptimizeImpl::getSImap(const SelectGroups &SIGroups) {
1363a2d68b4bSDavid Green   SmallDenseMap<const Instruction *, SelectLike, 2> SImap;
1364d7ebb746SSotiris Apostolakis   for (const SelectGroup &ASI : SIGroups)
1365e874c8fcSIgor Kirillov     for (const SelectLike &SI : ASI.Selects)
1366a2d68b4bSDavid Green       SImap.try_emplace(SI.getI(), SI);
1367a2d68b4bSDavid Green   return SImap;
1368d7ebb746SSotiris Apostolakis }
1369d7ebb746SSotiris Apostolakis 
1370e874c8fcSIgor Kirillov SmallDenseMap<const Instruction *, const SelectOptimizeImpl::SelectGroup *, 2>
1371e874c8fcSIgor Kirillov SelectOptimizeImpl::getSGmap(const SelectGroups &SIGroups) {
1372e874c8fcSIgor Kirillov   SmallDenseMap<const Instruction *, const SelectGroup *, 2> SImap;
1373e874c8fcSIgor Kirillov   for (const SelectGroup &ASI : SIGroups)
1374e874c8fcSIgor Kirillov     for (const SelectLike &SI : ASI.Selects)
1375e874c8fcSIgor Kirillov       SImap.try_emplace(SI.getI(), &ASI);
1376e874c8fcSIgor Kirillov   return SImap;
1377e874c8fcSIgor Kirillov }
1378e874c8fcSIgor Kirillov 
1379ce08c7eeSpaperchalice std::optional<uint64_t>
1380ce08c7eeSpaperchalice SelectOptimizeImpl::computeInstCost(const Instruction *I) {
1381d7ebb746SSotiris Apostolakis   InstructionCost ICost =
1382d7ebb746SSotiris Apostolakis       TTI->getInstructionCost(I, TargetTransformInfo::TCK_Latency);
1383d7ebb746SSotiris Apostolakis   if (auto OC = ICost.getValue())
138467819a72SFangrui Song     return std::optional<uint64_t>(*OC);
1385998960eeSKazu Hirata   return std::nullopt;
1386d7ebb746SSotiris Apostolakis }
1387d7ebb746SSotiris Apostolakis 
1388d7ebb746SSotiris Apostolakis ScaledNumber<uint64_t>
1389a2d68b4bSDavid Green SelectOptimizeImpl::getMispredictionCost(const SelectLike SI,
1390d7ebb746SSotiris Apostolakis                                          const Scaled64 CondCost) {
1391d7ebb746SSotiris Apostolakis   uint64_t MispredictPenalty = TSchedModel.getMCSchedModel()->MispredictPenalty;
1392d7ebb746SSotiris Apostolakis 
1393d7ebb746SSotiris Apostolakis   // Account for the default misprediction rate when using a branch
1394d7ebb746SSotiris Apostolakis   // (conservatively set to 25% by default).
1395d7ebb746SSotiris Apostolakis   uint64_t MispredictRate = MispredictDefaultRate;
1396d7ebb746SSotiris Apostolakis   // If the select condition is obviously predictable, then the misprediction
1397d7ebb746SSotiris Apostolakis   // rate is zero.
1398d7ebb746SSotiris Apostolakis   if (isSelectHighlyPredictable(SI))
1399d7ebb746SSotiris Apostolakis     MispredictRate = 0;
1400d7ebb746SSotiris Apostolakis 
1401d7ebb746SSotiris Apostolakis   // CondCost is included to account for cases where the computation of the
1402d7ebb746SSotiris Apostolakis   // condition is part of a long dependence chain (potentially loop-carried)
1403d7ebb746SSotiris Apostolakis   // that would delay detection of a misprediction and increase its cost.
1404d7ebb746SSotiris Apostolakis   Scaled64 MispredictCost =
1405d7ebb746SSotiris Apostolakis       std::max(Scaled64::get(MispredictPenalty), CondCost) *
1406d7ebb746SSotiris Apostolakis       Scaled64::get(MispredictRate);
1407d7ebb746SSotiris Apostolakis   MispredictCost /= Scaled64::get(100);
1408d7ebb746SSotiris Apostolakis 
1409d7ebb746SSotiris Apostolakis   return MispredictCost;
1410d7ebb746SSotiris Apostolakis }
1411d7ebb746SSotiris Apostolakis 
1412d7ebb746SSotiris Apostolakis // Returns the cost of a branch when the prediction is correct.
1413d7ebb746SSotiris Apostolakis // TrueCost * TrueProbability + FalseCost * FalseProbability.
1414d7ebb746SSotiris Apostolakis ScaledNumber<uint64_t>
1415ce08c7eeSpaperchalice SelectOptimizeImpl::getPredictedPathCost(Scaled64 TrueCost, Scaled64 FalseCost,
1416a2d68b4bSDavid Green                                          const SelectLike SI) {
1417d7ebb746SSotiris Apostolakis   Scaled64 PredPathCost;
1418d7ebb746SSotiris Apostolakis   uint64_t TrueWeight, FalseWeight;
1419a2d68b4bSDavid Green   if (extractBranchWeights(SI, TrueWeight, FalseWeight)) {
1420d7ebb746SSotiris Apostolakis     uint64_t SumWeight = TrueWeight + FalseWeight;
1421d7ebb746SSotiris Apostolakis     if (SumWeight != 0) {
1422d7ebb746SSotiris Apostolakis       PredPathCost = TrueCost * Scaled64::get(TrueWeight) +
1423d7ebb746SSotiris Apostolakis                      FalseCost * Scaled64::get(FalseWeight);
1424d7ebb746SSotiris Apostolakis       PredPathCost /= Scaled64::get(SumWeight);
1425d7ebb746SSotiris Apostolakis       return PredPathCost;
1426d7ebb746SSotiris Apostolakis     }
1427d7ebb746SSotiris Apostolakis   }
1428d7ebb746SSotiris Apostolakis   // Without branch weight metadata, we assume 75% for the one path and 25% for
1429d7ebb746SSotiris Apostolakis   // the other, and pick the result with the biggest cost.
1430d7ebb746SSotiris Apostolakis   PredPathCost = std::max(TrueCost * Scaled64::get(3) + FalseCost,
1431d7ebb746SSotiris Apostolakis                           FalseCost * Scaled64::get(3) + TrueCost);
1432d7ebb746SSotiris Apostolakis   PredPathCost /= Scaled64::get(4);
1433d7ebb746SSotiris Apostolakis   return PredPathCost;
1434d7ebb746SSotiris Apostolakis }
1435d7ebb746SSotiris Apostolakis 
1436a2d68b4bSDavid Green bool SelectOptimizeImpl::isSelectKindSupported(const SelectLike SI) {
143797c3ef5cSSotiris Apostolakis   TargetLowering::SelectSupportKind SelectKind;
1438a2d68b4bSDavid Green   if (SI.getType()->isVectorTy())
143997c3ef5cSSotiris Apostolakis     SelectKind = TargetLowering::ScalarCondVectorVal;
144097c3ef5cSSotiris Apostolakis   else
144197c3ef5cSSotiris Apostolakis     SelectKind = TargetLowering::ScalarValSelect;
144297c3ef5cSSotiris Apostolakis   return TLI->isSelectSupported(SelectKind);
1443ca7c307dSSotiris Apostolakis }
1444