181ad6265SDimitry Andric //===--- SelectOptimize.cpp - Convert select to branches if profitable ---===// 281ad6265SDimitry Andric // 381ad6265SDimitry Andric // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 481ad6265SDimitry Andric // See https://llvm.org/LICENSE.txt for license information. 581ad6265SDimitry Andric // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 681ad6265SDimitry Andric // 781ad6265SDimitry Andric //===----------------------------------------------------------------------===// 881ad6265SDimitry Andric // 981ad6265SDimitry Andric // This pass converts selects to conditional jumps when profitable. 1081ad6265SDimitry Andric // 1181ad6265SDimitry Andric //===----------------------------------------------------------------------===// 1281ad6265SDimitry Andric 135f757f3fSDimitry Andric #include "llvm/CodeGen/SelectOptimize.h" 1481ad6265SDimitry Andric #include "llvm/ADT/SmallVector.h" 1581ad6265SDimitry Andric #include "llvm/ADT/Statistic.h" 1681ad6265SDimitry Andric #include "llvm/Analysis/BlockFrequencyInfo.h" 1781ad6265SDimitry Andric #include "llvm/Analysis/BranchProbabilityInfo.h" 1881ad6265SDimitry Andric #include "llvm/Analysis/LoopInfo.h" 1981ad6265SDimitry Andric #include "llvm/Analysis/OptimizationRemarkEmitter.h" 2081ad6265SDimitry Andric #include "llvm/Analysis/ProfileSummaryInfo.h" 2181ad6265SDimitry Andric #include "llvm/Analysis/TargetTransformInfo.h" 2281ad6265SDimitry Andric #include "llvm/CodeGen/Passes.h" 2381ad6265SDimitry Andric #include "llvm/CodeGen/TargetLowering.h" 2481ad6265SDimitry Andric #include "llvm/CodeGen/TargetPassConfig.h" 2581ad6265SDimitry Andric #include "llvm/CodeGen/TargetSchedule.h" 2681ad6265SDimitry Andric #include "llvm/CodeGen/TargetSubtargetInfo.h" 2781ad6265SDimitry Andric #include "llvm/IR/BasicBlock.h" 2881ad6265SDimitry Andric #include "llvm/IR/Dominators.h" 2981ad6265SDimitry Andric #include "llvm/IR/Function.h" 3081ad6265SDimitry Andric #include "llvm/IR/IRBuilder.h" 3181ad6265SDimitry Andric #include "llvm/IR/Instruction.h" 3206c3fb27SDimitry Andric #include "llvm/IR/PatternMatch.h" 33bdd1243dSDimitry Andric #include "llvm/IR/ProfDataUtils.h" 3481ad6265SDimitry Andric #include "llvm/InitializePasses.h" 3581ad6265SDimitry Andric #include "llvm/Pass.h" 3681ad6265SDimitry Andric #include "llvm/Support/ScaledNumber.h" 3781ad6265SDimitry Andric #include "llvm/Target/TargetMachine.h" 3881ad6265SDimitry Andric #include "llvm/Transforms/Utils/SizeOpts.h" 3981ad6265SDimitry Andric #include <algorithm> 4081ad6265SDimitry Andric #include <memory> 4181ad6265SDimitry Andric #include <queue> 4281ad6265SDimitry Andric #include <stack> 4381ad6265SDimitry Andric 4481ad6265SDimitry Andric using namespace llvm; 457a6dacacSDimitry Andric using namespace llvm::PatternMatch; 4681ad6265SDimitry Andric 4781ad6265SDimitry Andric #define DEBUG_TYPE "select-optimize" 4881ad6265SDimitry Andric 4981ad6265SDimitry Andric STATISTIC(NumSelectOptAnalyzed, 5081ad6265SDimitry Andric "Number of select groups considered for conversion to branch"); 5181ad6265SDimitry Andric STATISTIC(NumSelectConvertedExpColdOperand, 5281ad6265SDimitry Andric "Number of select groups converted due to expensive cold operand"); 5381ad6265SDimitry Andric STATISTIC(NumSelectConvertedHighPred, 5481ad6265SDimitry Andric "Number of select groups converted due to high-predictability"); 5581ad6265SDimitry Andric STATISTIC(NumSelectUnPred, 5681ad6265SDimitry Andric "Number of select groups not converted due to unpredictability"); 5781ad6265SDimitry Andric STATISTIC(NumSelectColdBB, 5881ad6265SDimitry Andric "Number of select groups not converted due to cold basic block"); 5981ad6265SDimitry Andric STATISTIC(NumSelectConvertedLoop, 6081ad6265SDimitry Andric "Number of select groups converted due to loop-level analysis"); 6181ad6265SDimitry Andric STATISTIC(NumSelectsConverted, "Number of selects converted"); 6281ad6265SDimitry Andric 6381ad6265SDimitry Andric static cl::opt<unsigned> ColdOperandThreshold( 6481ad6265SDimitry Andric "cold-operand-threshold", 6581ad6265SDimitry Andric cl::desc("Maximum frequency of path for an operand to be considered cold."), 6681ad6265SDimitry Andric cl::init(20), cl::Hidden); 6781ad6265SDimitry Andric 6881ad6265SDimitry Andric static cl::opt<unsigned> ColdOperandMaxCostMultiplier( 6981ad6265SDimitry Andric "cold-operand-max-cost-multiplier", 7081ad6265SDimitry Andric cl::desc("Maximum cost multiplier of TCC_expensive for the dependence " 7181ad6265SDimitry Andric "slice of a cold operand to be considered inexpensive."), 7281ad6265SDimitry Andric cl::init(1), cl::Hidden); 7381ad6265SDimitry Andric 7481ad6265SDimitry Andric static cl::opt<unsigned> 7581ad6265SDimitry Andric GainGradientThreshold("select-opti-loop-gradient-gain-threshold", 7681ad6265SDimitry Andric cl::desc("Gradient gain threshold (%)."), 7781ad6265SDimitry Andric cl::init(25), cl::Hidden); 7881ad6265SDimitry Andric 7981ad6265SDimitry Andric static cl::opt<unsigned> 8081ad6265SDimitry Andric GainCycleThreshold("select-opti-loop-cycle-gain-threshold", 8181ad6265SDimitry Andric cl::desc("Minimum gain per loop (in cycles) threshold."), 8281ad6265SDimitry Andric cl::init(4), cl::Hidden); 8381ad6265SDimitry Andric 8481ad6265SDimitry Andric static cl::opt<unsigned> GainRelativeThreshold( 8581ad6265SDimitry Andric "select-opti-loop-relative-gain-threshold", 8681ad6265SDimitry Andric cl::desc( 8781ad6265SDimitry Andric "Minimum relative gain per loop threshold (1/X). Defaults to 12.5%"), 8881ad6265SDimitry Andric cl::init(8), cl::Hidden); 8981ad6265SDimitry Andric 9081ad6265SDimitry Andric static cl::opt<unsigned> MispredictDefaultRate( 9181ad6265SDimitry Andric "mispredict-default-rate", cl::Hidden, cl::init(25), 9281ad6265SDimitry Andric cl::desc("Default mispredict rate (initialized to 25%).")); 9381ad6265SDimitry Andric 9481ad6265SDimitry Andric static cl::opt<bool> 9581ad6265SDimitry Andric DisableLoopLevelHeuristics("disable-loop-level-heuristics", cl::Hidden, 9681ad6265SDimitry Andric cl::init(false), 9781ad6265SDimitry Andric cl::desc("Disable loop-level heuristics.")); 9881ad6265SDimitry Andric 9981ad6265SDimitry Andric namespace { 10081ad6265SDimitry Andric 1015f757f3fSDimitry Andric class SelectOptimizeImpl { 10281ad6265SDimitry Andric const TargetMachine *TM = nullptr; 10306c3fb27SDimitry Andric const TargetSubtargetInfo *TSI = nullptr; 10481ad6265SDimitry Andric const TargetLowering *TLI = nullptr; 10581ad6265SDimitry Andric const TargetTransformInfo *TTI = nullptr; 10606c3fb27SDimitry Andric const LoopInfo *LI = nullptr; 1075f757f3fSDimitry Andric BlockFrequencyInfo *BFI; 10806c3fb27SDimitry Andric ProfileSummaryInfo *PSI = nullptr; 10906c3fb27SDimitry Andric OptimizationRemarkEmitter *ORE = nullptr; 11081ad6265SDimitry Andric TargetSchedModel TSchedModel; 11181ad6265SDimitry Andric 11281ad6265SDimitry Andric public: 1135f757f3fSDimitry Andric SelectOptimizeImpl() = default; 1145f757f3fSDimitry Andric SelectOptimizeImpl(const TargetMachine *TM) : TM(TM){}; 1155f757f3fSDimitry Andric PreservedAnalyses run(Function &F, FunctionAnalysisManager &FAM); 1165f757f3fSDimitry Andric bool runOnFunction(Function &F, Pass &P); 11781ad6265SDimitry Andric 11881ad6265SDimitry Andric using Scaled64 = ScaledNumber<uint64_t>; 11981ad6265SDimitry Andric 12081ad6265SDimitry Andric struct CostInfo { 12181ad6265SDimitry Andric /// Predicated cost (with selects as conditional moves). 12281ad6265SDimitry Andric Scaled64 PredCost; 12381ad6265SDimitry Andric /// Non-predicated cost (with selects converted to branches). 12481ad6265SDimitry Andric Scaled64 NonPredCost; 12581ad6265SDimitry Andric }; 12681ad6265SDimitry Andric 1277a6dacacSDimitry Andric /// SelectLike is an abstraction over SelectInst and other operations that can 1287a6dacacSDimitry Andric /// act like selects. For example Or(Zext(icmp), X) can be treated like 1297a6dacacSDimitry Andric /// select(icmp, X|1, X). 1307a6dacacSDimitry Andric class SelectLike { 1317a6dacacSDimitry Andric SelectLike(Instruction *I) : I(I) {} 1327a6dacacSDimitry Andric 133*0fca6ea1SDimitry Andric /// The select (/or) instruction. 1347a6dacacSDimitry Andric Instruction *I; 135*0fca6ea1SDimitry Andric /// Whether this select is inverted, "not(cond), FalseVal, TrueVal", as 136*0fca6ea1SDimitry Andric /// opposed to the original condition. 137*0fca6ea1SDimitry Andric bool Inverted = false; 1387a6dacacSDimitry Andric 1397a6dacacSDimitry Andric public: 1407a6dacacSDimitry Andric /// Match a select or select-like instruction, returning a SelectLike. 1417a6dacacSDimitry Andric static SelectLike match(Instruction *I) { 1427a6dacacSDimitry Andric // Select instruction are what we are usually looking for. 1437a6dacacSDimitry Andric if (isa<SelectInst>(I)) 1447a6dacacSDimitry Andric return SelectLike(I); 1457a6dacacSDimitry Andric 1467a6dacacSDimitry Andric // An Or(zext(i1 X), Y) can also be treated like a select, with condition 1477a6dacacSDimitry Andric // C and values Y|1 and Y. 1487a6dacacSDimitry Andric Value *X; 1497a6dacacSDimitry Andric if (PatternMatch::match( 1507a6dacacSDimitry Andric I, m_c_Or(m_OneUse(m_ZExt(m_Value(X))), m_Value())) && 1517a6dacacSDimitry Andric X->getType()->isIntegerTy(1)) 1527a6dacacSDimitry Andric return SelectLike(I); 1537a6dacacSDimitry Andric 1547a6dacacSDimitry Andric return SelectLike(nullptr); 1557a6dacacSDimitry Andric } 1567a6dacacSDimitry Andric 1577a6dacacSDimitry Andric bool isValid() { return I; } 1587a6dacacSDimitry Andric operator bool() { return isValid(); } 1597a6dacacSDimitry Andric 160*0fca6ea1SDimitry Andric /// Invert the select by inverting the condition and switching the operands. 161*0fca6ea1SDimitry Andric void setInverted() { 162*0fca6ea1SDimitry Andric assert(!Inverted && "Trying to invert an inverted SelectLike"); 163*0fca6ea1SDimitry Andric assert(isa<Instruction>(getCondition()) && 164*0fca6ea1SDimitry Andric cast<Instruction>(getCondition())->getOpcode() == 165*0fca6ea1SDimitry Andric Instruction::Xor); 166*0fca6ea1SDimitry Andric Inverted = true; 167*0fca6ea1SDimitry Andric } 168*0fca6ea1SDimitry Andric bool isInverted() const { return Inverted; } 169*0fca6ea1SDimitry Andric 1707a6dacacSDimitry Andric Instruction *getI() { return I; } 1717a6dacacSDimitry Andric const Instruction *getI() const { return I; } 1727a6dacacSDimitry Andric 1737a6dacacSDimitry Andric Type *getType() const { return I->getType(); } 1747a6dacacSDimitry Andric 175*0fca6ea1SDimitry Andric Value *getNonInvertedCondition() const { 1767a6dacacSDimitry Andric if (auto *Sel = dyn_cast<SelectInst>(I)) 1777a6dacacSDimitry Andric return Sel->getCondition(); 1787a6dacacSDimitry Andric // Or(zext) case 1797a6dacacSDimitry Andric if (auto *BO = dyn_cast<BinaryOperator>(I)) { 1807a6dacacSDimitry Andric Value *X; 1817a6dacacSDimitry Andric if (PatternMatch::match(BO->getOperand(0), 1827a6dacacSDimitry Andric m_OneUse(m_ZExt(m_Value(X))))) 1837a6dacacSDimitry Andric return X; 1847a6dacacSDimitry Andric if (PatternMatch::match(BO->getOperand(1), 1857a6dacacSDimitry Andric m_OneUse(m_ZExt(m_Value(X))))) 1867a6dacacSDimitry Andric return X; 1877a6dacacSDimitry Andric } 1887a6dacacSDimitry Andric 1897a6dacacSDimitry Andric llvm_unreachable("Unhandled case in getCondition"); 1907a6dacacSDimitry Andric } 1917a6dacacSDimitry Andric 192*0fca6ea1SDimitry Andric /// Return the condition for the SelectLike instruction. For example the 193*0fca6ea1SDimitry Andric /// condition of a select or c in `or(zext(c), x)` 194*0fca6ea1SDimitry Andric Value *getCondition() const { 195*0fca6ea1SDimitry Andric Value *CC = getNonInvertedCondition(); 196*0fca6ea1SDimitry Andric // For inverted conditions the CC is checked when created to be a not 197*0fca6ea1SDimitry Andric // (xor) instruction. 198*0fca6ea1SDimitry Andric if (Inverted) 199*0fca6ea1SDimitry Andric return cast<Instruction>(CC)->getOperand(0); 200*0fca6ea1SDimitry Andric return CC; 201*0fca6ea1SDimitry Andric } 202*0fca6ea1SDimitry Andric 2037a6dacacSDimitry Andric /// Return the true value for the SelectLike instruction. Note this may not 2047a6dacacSDimitry Andric /// exist for all SelectLike instructions. For example, for `or(zext(c), x)` 2057a6dacacSDimitry Andric /// the true value would be `or(x,1)`. As this value does not exist, nullptr 2067a6dacacSDimitry Andric /// is returned. 207*0fca6ea1SDimitry Andric Value *getTrueValue(bool HonorInverts = true) const { 208*0fca6ea1SDimitry Andric if (Inverted && HonorInverts) 209*0fca6ea1SDimitry Andric return getFalseValue(/*HonorInverts=*/false); 2107a6dacacSDimitry Andric if (auto *Sel = dyn_cast<SelectInst>(I)) 2117a6dacacSDimitry Andric return Sel->getTrueValue(); 2127a6dacacSDimitry Andric // Or(zext) case - The true value is Or(X), so return nullptr as the value 2137a6dacacSDimitry Andric // does not yet exist. 2147a6dacacSDimitry Andric if (isa<BinaryOperator>(I)) 2157a6dacacSDimitry Andric return nullptr; 2167a6dacacSDimitry Andric 2177a6dacacSDimitry Andric llvm_unreachable("Unhandled case in getTrueValue"); 2187a6dacacSDimitry Andric } 2197a6dacacSDimitry Andric 2207a6dacacSDimitry Andric /// Return the false value for the SelectLike instruction. For example the 2217a6dacacSDimitry Andric /// getFalseValue of a select or `x` in `or(zext(c), x)` (which is 2227a6dacacSDimitry Andric /// `select(c, x|1, x)`) 223*0fca6ea1SDimitry Andric Value *getFalseValue(bool HonorInverts = true) const { 224*0fca6ea1SDimitry Andric if (Inverted && HonorInverts) 225*0fca6ea1SDimitry Andric return getTrueValue(/*HonorInverts=*/false); 2267a6dacacSDimitry Andric if (auto *Sel = dyn_cast<SelectInst>(I)) 2277a6dacacSDimitry Andric return Sel->getFalseValue(); 2287a6dacacSDimitry Andric // Or(zext) case - return the operand which is not the zext. 2297a6dacacSDimitry Andric if (auto *BO = dyn_cast<BinaryOperator>(I)) { 2307a6dacacSDimitry Andric Value *X; 2317a6dacacSDimitry Andric if (PatternMatch::match(BO->getOperand(0), 2327a6dacacSDimitry Andric m_OneUse(m_ZExt(m_Value(X))))) 2337a6dacacSDimitry Andric return BO->getOperand(1); 2347a6dacacSDimitry Andric if (PatternMatch::match(BO->getOperand(1), 2357a6dacacSDimitry Andric m_OneUse(m_ZExt(m_Value(X))))) 2367a6dacacSDimitry Andric return BO->getOperand(0); 2377a6dacacSDimitry Andric } 2387a6dacacSDimitry Andric 2397a6dacacSDimitry Andric llvm_unreachable("Unhandled case in getFalseValue"); 2407a6dacacSDimitry Andric } 2417a6dacacSDimitry Andric 2427a6dacacSDimitry Andric /// Return the NonPredCost cost of the true op, given the costs in 2437a6dacacSDimitry Andric /// InstCostMap. This may need to be generated for select-like instructions. 2447a6dacacSDimitry Andric Scaled64 getTrueOpCost(DenseMap<const Instruction *, CostInfo> &InstCostMap, 2457a6dacacSDimitry Andric const TargetTransformInfo *TTI) { 246*0fca6ea1SDimitry Andric if (isa<SelectInst>(I)) 247*0fca6ea1SDimitry Andric if (auto *I = dyn_cast<Instruction>(getTrueValue())) 2487a6dacacSDimitry Andric return InstCostMap.contains(I) ? InstCostMap[I].NonPredCost 2497a6dacacSDimitry Andric : Scaled64::getZero(); 2507a6dacacSDimitry Andric 2517a6dacacSDimitry Andric // Or case - add the cost of an extra Or to the cost of the False case. 2527a6dacacSDimitry Andric if (isa<BinaryOperator>(I)) 2537a6dacacSDimitry Andric if (auto I = dyn_cast<Instruction>(getFalseValue())) 2547a6dacacSDimitry Andric if (InstCostMap.contains(I)) { 2557a6dacacSDimitry Andric InstructionCost OrCost = TTI->getArithmeticInstrCost( 2567a6dacacSDimitry Andric Instruction::Or, I->getType(), TargetTransformInfo::TCK_Latency, 2577a6dacacSDimitry Andric {TargetTransformInfo::OK_AnyValue, 2587a6dacacSDimitry Andric TargetTransformInfo::OP_None}, 2597a6dacacSDimitry Andric {TTI::OK_UniformConstantValue, TTI::OP_PowerOf2}); 2607a6dacacSDimitry Andric return InstCostMap[I].NonPredCost + 2617a6dacacSDimitry Andric Scaled64::get(*OrCost.getValue()); 2627a6dacacSDimitry Andric } 2637a6dacacSDimitry Andric 2647a6dacacSDimitry Andric return Scaled64::getZero(); 2657a6dacacSDimitry Andric } 2667a6dacacSDimitry Andric 2677a6dacacSDimitry Andric /// Return the NonPredCost cost of the false op, given the costs in 2687a6dacacSDimitry Andric /// InstCostMap. This may need to be generated for select-like instructions. 2697a6dacacSDimitry Andric Scaled64 2707a6dacacSDimitry Andric getFalseOpCost(DenseMap<const Instruction *, CostInfo> &InstCostMap, 2717a6dacacSDimitry Andric const TargetTransformInfo *TTI) { 272*0fca6ea1SDimitry Andric if (isa<SelectInst>(I)) 273*0fca6ea1SDimitry Andric if (auto *I = dyn_cast<Instruction>(getFalseValue())) 2747a6dacacSDimitry Andric return InstCostMap.contains(I) ? InstCostMap[I].NonPredCost 2757a6dacacSDimitry Andric : Scaled64::getZero(); 2767a6dacacSDimitry Andric 2777a6dacacSDimitry Andric // Or case - return the cost of the false case 2787a6dacacSDimitry Andric if (isa<BinaryOperator>(I)) 2797a6dacacSDimitry Andric if (auto I = dyn_cast<Instruction>(getFalseValue())) 2807a6dacacSDimitry Andric if (InstCostMap.contains(I)) 2817a6dacacSDimitry Andric return InstCostMap[I].NonPredCost; 2827a6dacacSDimitry Andric 2837a6dacacSDimitry Andric return Scaled64::getZero(); 2847a6dacacSDimitry Andric } 2857a6dacacSDimitry Andric }; 2867a6dacacSDimitry Andric 2877a6dacacSDimitry Andric private: 2887a6dacacSDimitry Andric // Select groups consist of consecutive select instructions with the same 2897a6dacacSDimitry Andric // condition. 2907a6dacacSDimitry Andric using SelectGroup = SmallVector<SelectLike, 2>; 2917a6dacacSDimitry Andric using SelectGroups = SmallVector<SelectGroup, 2>; 2927a6dacacSDimitry Andric 29381ad6265SDimitry Andric // Converts select instructions of a function to conditional jumps when deemed 29481ad6265SDimitry Andric // profitable. Returns true if at least one select was converted. 29581ad6265SDimitry Andric bool optimizeSelects(Function &F); 29681ad6265SDimitry Andric 29781ad6265SDimitry Andric // Heuristics for determining which select instructions can be profitably 29881ad6265SDimitry Andric // conveted to branches. Separate heuristics for selects in inner-most loops 29981ad6265SDimitry Andric // and the rest of code regions (base heuristics for non-inner-most loop 30081ad6265SDimitry Andric // regions). 30181ad6265SDimitry Andric void optimizeSelectsBase(Function &F, SelectGroups &ProfSIGroups); 30281ad6265SDimitry Andric void optimizeSelectsInnerLoops(Function &F, SelectGroups &ProfSIGroups); 30381ad6265SDimitry Andric 30481ad6265SDimitry Andric // Converts to branches the select groups that were deemed 30581ad6265SDimitry Andric // profitable-to-convert. 30681ad6265SDimitry Andric void convertProfitableSIGroups(SelectGroups &ProfSIGroups); 30781ad6265SDimitry Andric 30881ad6265SDimitry Andric // Splits selects of a given basic block into select groups. 30981ad6265SDimitry Andric void collectSelectGroups(BasicBlock &BB, SelectGroups &SIGroups); 31081ad6265SDimitry Andric 31181ad6265SDimitry Andric // Determines for which select groups it is profitable converting to branches 31281ad6265SDimitry Andric // (base and inner-most-loop heuristics). 31381ad6265SDimitry Andric void findProfitableSIGroupsBase(SelectGroups &SIGroups, 31481ad6265SDimitry Andric SelectGroups &ProfSIGroups); 31581ad6265SDimitry Andric void findProfitableSIGroupsInnerLoops(const Loop *L, SelectGroups &SIGroups, 31681ad6265SDimitry Andric SelectGroups &ProfSIGroups); 31781ad6265SDimitry Andric 31881ad6265SDimitry Andric // Determines if a select group should be converted to a branch (base 31981ad6265SDimitry Andric // heuristics). 3207a6dacacSDimitry Andric bool isConvertToBranchProfitableBase(const SelectGroup &ASI); 32181ad6265SDimitry Andric 32281ad6265SDimitry Andric // Returns true if there are expensive instructions in the cold value 32381ad6265SDimitry Andric // operand's (if any) dependence slice of any of the selects of the given 32481ad6265SDimitry Andric // group. 3257a6dacacSDimitry Andric bool hasExpensiveColdOperand(const SelectGroup &ASI); 32681ad6265SDimitry Andric 32781ad6265SDimitry Andric // For a given source instruction, collect its backwards dependence slice 32881ad6265SDimitry Andric // consisting of instructions exclusively computed for producing the operands 32981ad6265SDimitry Andric // of the source instruction. 33081ad6265SDimitry Andric void getExclBackwardsSlice(Instruction *I, std::stack<Instruction *> &Slice, 331bdd1243dSDimitry Andric Instruction *SI, bool ForSinking = false); 33281ad6265SDimitry Andric 33381ad6265SDimitry Andric // Returns true if the condition of the select is highly predictable. 3347a6dacacSDimitry Andric bool isSelectHighlyPredictable(const SelectLike SI); 33581ad6265SDimitry Andric 33681ad6265SDimitry Andric // Loop-level checks to determine if a non-predicated version (with branches) 33781ad6265SDimitry Andric // of the given loop is more profitable than its predicated version. 33881ad6265SDimitry Andric bool checkLoopHeuristics(const Loop *L, const CostInfo LoopDepth[2]); 33981ad6265SDimitry Andric 34081ad6265SDimitry Andric // Computes instruction and loop-critical-path costs for both the predicated 34181ad6265SDimitry Andric // and non-predicated version of the given loop. 34281ad6265SDimitry Andric bool computeLoopCosts(const Loop *L, const SelectGroups &SIGroups, 34381ad6265SDimitry Andric DenseMap<const Instruction *, CostInfo> &InstCostMap, 34481ad6265SDimitry Andric CostInfo *LoopCost); 34581ad6265SDimitry Andric 34681ad6265SDimitry Andric // Returns a set of all the select instructions in the given select groups. 3477a6dacacSDimitry Andric SmallDenseMap<const Instruction *, SelectLike, 2> 3487a6dacacSDimitry Andric getSImap(const SelectGroups &SIGroups); 34981ad6265SDimitry Andric 35081ad6265SDimitry Andric // Returns the latency cost of a given instruction. 351bdd1243dSDimitry Andric std::optional<uint64_t> computeInstCost(const Instruction *I); 35281ad6265SDimitry Andric 35381ad6265SDimitry Andric // Returns the misprediction cost of a given select when converted to branch. 3547a6dacacSDimitry Andric Scaled64 getMispredictionCost(const SelectLike SI, const Scaled64 CondCost); 35581ad6265SDimitry Andric 35681ad6265SDimitry Andric // Returns the cost of a branch when the prediction is correct. 35781ad6265SDimitry Andric Scaled64 getPredictedPathCost(Scaled64 TrueCost, Scaled64 FalseCost, 3587a6dacacSDimitry Andric const SelectLike SI); 35981ad6265SDimitry Andric 36081ad6265SDimitry Andric // Returns true if the target architecture supports lowering a given select. 3617a6dacacSDimitry Andric bool isSelectKindSupported(const SelectLike SI); 36281ad6265SDimitry Andric }; 3635f757f3fSDimitry Andric 3645f757f3fSDimitry Andric class SelectOptimize : public FunctionPass { 3655f757f3fSDimitry Andric SelectOptimizeImpl Impl; 3665f757f3fSDimitry Andric 3675f757f3fSDimitry Andric public: 3685f757f3fSDimitry Andric static char ID; 3695f757f3fSDimitry Andric 3705f757f3fSDimitry Andric SelectOptimize() : FunctionPass(ID) { 3715f757f3fSDimitry Andric initializeSelectOptimizePass(*PassRegistry::getPassRegistry()); 3725f757f3fSDimitry Andric } 3735f757f3fSDimitry Andric 3745f757f3fSDimitry Andric bool runOnFunction(Function &F) override { 3755f757f3fSDimitry Andric return Impl.runOnFunction(F, *this); 3765f757f3fSDimitry Andric } 3775f757f3fSDimitry Andric 3785f757f3fSDimitry Andric void getAnalysisUsage(AnalysisUsage &AU) const override { 3795f757f3fSDimitry Andric AU.addRequired<ProfileSummaryInfoWrapperPass>(); 3805f757f3fSDimitry Andric AU.addRequired<TargetPassConfig>(); 3815f757f3fSDimitry Andric AU.addRequired<TargetTransformInfoWrapperPass>(); 3825f757f3fSDimitry Andric AU.addRequired<LoopInfoWrapperPass>(); 3835f757f3fSDimitry Andric AU.addRequired<BlockFrequencyInfoWrapperPass>(); 3845f757f3fSDimitry Andric AU.addRequired<OptimizationRemarkEmitterWrapperPass>(); 3855f757f3fSDimitry Andric } 3865f757f3fSDimitry Andric }; 3875f757f3fSDimitry Andric 38881ad6265SDimitry Andric } // namespace 38981ad6265SDimitry Andric 3905f757f3fSDimitry Andric PreservedAnalyses SelectOptimizePass::run(Function &F, 3915f757f3fSDimitry Andric FunctionAnalysisManager &FAM) { 3925f757f3fSDimitry Andric SelectOptimizeImpl Impl(TM); 3935f757f3fSDimitry Andric return Impl.run(F, FAM); 3945f757f3fSDimitry Andric } 3955f757f3fSDimitry Andric 39681ad6265SDimitry Andric char SelectOptimize::ID = 0; 39781ad6265SDimitry Andric 39881ad6265SDimitry Andric INITIALIZE_PASS_BEGIN(SelectOptimize, DEBUG_TYPE, "Optimize selects", false, 39981ad6265SDimitry Andric false) 40081ad6265SDimitry Andric INITIALIZE_PASS_DEPENDENCY(LoopInfoWrapperPass) 40181ad6265SDimitry Andric INITIALIZE_PASS_DEPENDENCY(ProfileSummaryInfoWrapperPass) 40281ad6265SDimitry Andric INITIALIZE_PASS_DEPENDENCY(TargetPassConfig) 40381ad6265SDimitry Andric INITIALIZE_PASS_DEPENDENCY(TargetTransformInfoWrapperPass) 4045f757f3fSDimitry Andric INITIALIZE_PASS_DEPENDENCY(BlockFrequencyInfoWrapperPass) 40581ad6265SDimitry Andric INITIALIZE_PASS_DEPENDENCY(OptimizationRemarkEmitterWrapperPass) 40681ad6265SDimitry Andric INITIALIZE_PASS_END(SelectOptimize, DEBUG_TYPE, "Optimize selects", false, 40781ad6265SDimitry Andric false) 40881ad6265SDimitry Andric 40981ad6265SDimitry Andric FunctionPass *llvm::createSelectOptimizePass() { return new SelectOptimize(); } 41081ad6265SDimitry Andric 4115f757f3fSDimitry Andric PreservedAnalyses SelectOptimizeImpl::run(Function &F, 4125f757f3fSDimitry Andric FunctionAnalysisManager &FAM) { 41381ad6265SDimitry Andric TSI = TM->getSubtargetImpl(F); 41481ad6265SDimitry Andric TLI = TSI->getTargetLowering(); 41581ad6265SDimitry Andric 4165f757f3fSDimitry Andric // If none of the select types are supported then skip this pass. 4175f757f3fSDimitry Andric // This is an optimization pass. Legality issues will be handled by 4185f757f3fSDimitry Andric // instruction selection. 4195f757f3fSDimitry Andric if (!TLI->isSelectSupported(TargetLowering::ScalarValSelect) && 4205f757f3fSDimitry Andric !TLI->isSelectSupported(TargetLowering::ScalarCondVectorVal) && 4215f757f3fSDimitry Andric !TLI->isSelectSupported(TargetLowering::VectorMaskSelect)) 4225f757f3fSDimitry Andric return PreservedAnalyses::all(); 4235f757f3fSDimitry Andric 4245f757f3fSDimitry Andric TTI = &FAM.getResult<TargetIRAnalysis>(F); 4255f757f3fSDimitry Andric if (!TTI->enableSelectOptimize()) 4265f757f3fSDimitry Andric return PreservedAnalyses::all(); 4275f757f3fSDimitry Andric 4285f757f3fSDimitry Andric PSI = FAM.getResult<ModuleAnalysisManagerFunctionProxy>(F) 4295f757f3fSDimitry Andric .getCachedResult<ProfileSummaryAnalysis>(*F.getParent()); 4305f757f3fSDimitry Andric assert(PSI && "This pass requires module analysis pass `profile-summary`!"); 4315f757f3fSDimitry Andric BFI = &FAM.getResult<BlockFrequencyAnalysis>(F); 4325f757f3fSDimitry Andric 4335f757f3fSDimitry Andric // When optimizing for size, selects are preferable over branches. 4345f757f3fSDimitry Andric if (F.hasOptSize() || llvm::shouldOptimizeForSize(&F, PSI, BFI)) 4355f757f3fSDimitry Andric return PreservedAnalyses::all(); 4365f757f3fSDimitry Andric 4375f757f3fSDimitry Andric LI = &FAM.getResult<LoopAnalysis>(F); 4385f757f3fSDimitry Andric ORE = &FAM.getResult<OptimizationRemarkEmitterAnalysis>(F); 4395f757f3fSDimitry Andric TSchedModel.init(TSI); 4405f757f3fSDimitry Andric 4415f757f3fSDimitry Andric bool Changed = optimizeSelects(F); 4425f757f3fSDimitry Andric return Changed ? PreservedAnalyses::none() : PreservedAnalyses::all(); 4435f757f3fSDimitry Andric } 4445f757f3fSDimitry Andric 4455f757f3fSDimitry Andric bool SelectOptimizeImpl::runOnFunction(Function &F, Pass &P) { 4465f757f3fSDimitry Andric TM = &P.getAnalysis<TargetPassConfig>().getTM<TargetMachine>(); 4475f757f3fSDimitry Andric TSI = TM->getSubtargetImpl(F); 4485f757f3fSDimitry Andric TLI = TSI->getTargetLowering(); 4495f757f3fSDimitry Andric 4505f757f3fSDimitry Andric // If none of the select types are supported then skip this pass. 45181ad6265SDimitry Andric // This is an optimization pass. Legality issues will be handled by 45281ad6265SDimitry Andric // instruction selection. 45381ad6265SDimitry Andric if (!TLI->isSelectSupported(TargetLowering::ScalarValSelect) && 45481ad6265SDimitry Andric !TLI->isSelectSupported(TargetLowering::ScalarCondVectorVal) && 45581ad6265SDimitry Andric !TLI->isSelectSupported(TargetLowering::VectorMaskSelect)) 45681ad6265SDimitry Andric return false; 45781ad6265SDimitry Andric 4585f757f3fSDimitry Andric TTI = &P.getAnalysis<TargetTransformInfoWrapperPass>().getTTI(F); 459bdd1243dSDimitry Andric 460bdd1243dSDimitry Andric if (!TTI->enableSelectOptimize()) 461bdd1243dSDimitry Andric return false; 462bdd1243dSDimitry Andric 4635f757f3fSDimitry Andric LI = &P.getAnalysis<LoopInfoWrapperPass>().getLoopInfo(); 4645f757f3fSDimitry Andric BFI = &P.getAnalysis<BlockFrequencyInfoWrapperPass>().getBFI(); 4655f757f3fSDimitry Andric PSI = &P.getAnalysis<ProfileSummaryInfoWrapperPass>().getPSI(); 4665f757f3fSDimitry Andric ORE = &P.getAnalysis<OptimizationRemarkEmitterWrapperPass>().getORE(); 46781ad6265SDimitry Andric TSchedModel.init(TSI); 46881ad6265SDimitry Andric 46981ad6265SDimitry Andric // When optimizing for size, selects are preferable over branches. 4705f757f3fSDimitry Andric if (F.hasOptSize() || llvm::shouldOptimizeForSize(&F, PSI, BFI)) 47181ad6265SDimitry Andric return false; 47281ad6265SDimitry Andric 47381ad6265SDimitry Andric return optimizeSelects(F); 47481ad6265SDimitry Andric } 47581ad6265SDimitry Andric 4765f757f3fSDimitry Andric bool SelectOptimizeImpl::optimizeSelects(Function &F) { 47781ad6265SDimitry Andric // Determine for which select groups it is profitable converting to branches. 47881ad6265SDimitry Andric SelectGroups ProfSIGroups; 47981ad6265SDimitry Andric // Base heuristics apply only to non-loops and outer loops. 48081ad6265SDimitry Andric optimizeSelectsBase(F, ProfSIGroups); 48181ad6265SDimitry Andric // Separate heuristics for inner-most loops. 48281ad6265SDimitry Andric optimizeSelectsInnerLoops(F, ProfSIGroups); 48381ad6265SDimitry Andric 48481ad6265SDimitry Andric // Convert to branches the select groups that were deemed 48581ad6265SDimitry Andric // profitable-to-convert. 48681ad6265SDimitry Andric convertProfitableSIGroups(ProfSIGroups); 48781ad6265SDimitry Andric 48881ad6265SDimitry Andric // Code modified if at least one select group was converted. 48981ad6265SDimitry Andric return !ProfSIGroups.empty(); 49081ad6265SDimitry Andric } 49181ad6265SDimitry Andric 4925f757f3fSDimitry Andric void SelectOptimizeImpl::optimizeSelectsBase(Function &F, 49381ad6265SDimitry Andric SelectGroups &ProfSIGroups) { 49481ad6265SDimitry Andric // Collect all the select groups. 49581ad6265SDimitry Andric SelectGroups SIGroups; 49681ad6265SDimitry Andric for (BasicBlock &BB : F) { 49781ad6265SDimitry Andric // Base heuristics apply only to non-loops and outer loops. 49881ad6265SDimitry Andric Loop *L = LI->getLoopFor(&BB); 49981ad6265SDimitry Andric if (L && L->isInnermost()) 50081ad6265SDimitry Andric continue; 50181ad6265SDimitry Andric collectSelectGroups(BB, SIGroups); 50281ad6265SDimitry Andric } 50381ad6265SDimitry Andric 50481ad6265SDimitry Andric // Determine for which select groups it is profitable converting to branches. 50581ad6265SDimitry Andric findProfitableSIGroupsBase(SIGroups, ProfSIGroups); 50681ad6265SDimitry Andric } 50781ad6265SDimitry Andric 5085f757f3fSDimitry Andric void SelectOptimizeImpl::optimizeSelectsInnerLoops(Function &F, 50981ad6265SDimitry Andric SelectGroups &ProfSIGroups) { 51081ad6265SDimitry Andric SmallVector<Loop *, 4> Loops(LI->begin(), LI->end()); 51181ad6265SDimitry Andric // Need to check size on each iteration as we accumulate child loops. 51281ad6265SDimitry Andric for (unsigned long i = 0; i < Loops.size(); ++i) 51381ad6265SDimitry Andric for (Loop *ChildL : Loops[i]->getSubLoops()) 51481ad6265SDimitry Andric Loops.push_back(ChildL); 51581ad6265SDimitry Andric 51681ad6265SDimitry Andric for (Loop *L : Loops) { 51781ad6265SDimitry Andric if (!L->isInnermost()) 51881ad6265SDimitry Andric continue; 51981ad6265SDimitry Andric 52081ad6265SDimitry Andric SelectGroups SIGroups; 52181ad6265SDimitry Andric for (BasicBlock *BB : L->getBlocks()) 52281ad6265SDimitry Andric collectSelectGroups(*BB, SIGroups); 52381ad6265SDimitry Andric 52481ad6265SDimitry Andric findProfitableSIGroupsInnerLoops(L, SIGroups, ProfSIGroups); 52581ad6265SDimitry Andric } 52681ad6265SDimitry Andric } 52781ad6265SDimitry Andric 52881ad6265SDimitry Andric /// If \p isTrue is true, return the true value of \p SI, otherwise return 52981ad6265SDimitry Andric /// false value of \p SI. If the true/false value of \p SI is defined by any 53081ad6265SDimitry Andric /// select instructions in \p Selects, look through the defining select 53181ad6265SDimitry Andric /// instruction until the true/false value is not defined in \p Selects. 53281ad6265SDimitry Andric static Value * 5337a6dacacSDimitry Andric getTrueOrFalseValue(SelectOptimizeImpl::SelectLike SI, bool isTrue, 5347a6dacacSDimitry Andric const SmallPtrSet<const Instruction *, 2> &Selects, 5357a6dacacSDimitry Andric IRBuilder<> &IB) { 53681ad6265SDimitry Andric Value *V = nullptr; 5377a6dacacSDimitry Andric for (SelectInst *DefSI = dyn_cast<SelectInst>(SI.getI()); 5387a6dacacSDimitry Andric DefSI != nullptr && Selects.count(DefSI); 53981ad6265SDimitry Andric DefSI = dyn_cast<SelectInst>(V)) { 540*0fca6ea1SDimitry Andric if (DefSI->getCondition() == SI.getCondition()) 54181ad6265SDimitry Andric V = (isTrue ? DefSI->getTrueValue() : DefSI->getFalseValue()); 542*0fca6ea1SDimitry Andric else // Handle inverted SI 543*0fca6ea1SDimitry Andric V = (!isTrue ? DefSI->getTrueValue() : DefSI->getFalseValue()); 54481ad6265SDimitry Andric } 5457a6dacacSDimitry Andric 5467a6dacacSDimitry Andric if (isa<BinaryOperator>(SI.getI())) { 5477a6dacacSDimitry Andric assert(SI.getI()->getOpcode() == Instruction::Or && 5487a6dacacSDimitry Andric "Only currently handling Or instructions."); 5497a6dacacSDimitry Andric V = SI.getFalseValue(); 5507a6dacacSDimitry Andric if (isTrue) 5517a6dacacSDimitry Andric V = IB.CreateOr(V, ConstantInt::get(V->getType(), 1)); 5527a6dacacSDimitry Andric } 5537a6dacacSDimitry Andric 55481ad6265SDimitry Andric assert(V && "Failed to get select true/false value"); 55581ad6265SDimitry Andric return V; 55681ad6265SDimitry Andric } 55781ad6265SDimitry Andric 5585f757f3fSDimitry Andric void SelectOptimizeImpl::convertProfitableSIGroups(SelectGroups &ProfSIGroups) { 55981ad6265SDimitry Andric for (SelectGroup &ASI : ProfSIGroups) { 56081ad6265SDimitry Andric // The code transformation here is a modified version of the sinking 56181ad6265SDimitry Andric // transformation in CodeGenPrepare::optimizeSelectInst with a more 56281ad6265SDimitry Andric // aggressive strategy of which instructions to sink. 56381ad6265SDimitry Andric // 56481ad6265SDimitry Andric // TODO: eliminate the redundancy of logic transforming selects to branches 56581ad6265SDimitry Andric // by removing CodeGenPrepare::optimizeSelectInst and optimizing here 56681ad6265SDimitry Andric // selects for all cases (with and without profile information). 56781ad6265SDimitry Andric 56881ad6265SDimitry Andric // Transform a sequence like this: 56981ad6265SDimitry Andric // start: 57081ad6265SDimitry Andric // %cmp = cmp uge i32 %a, %b 57181ad6265SDimitry Andric // %sel = select i1 %cmp, i32 %c, i32 %d 57281ad6265SDimitry Andric // 57381ad6265SDimitry Andric // Into: 57481ad6265SDimitry Andric // start: 57581ad6265SDimitry Andric // %cmp = cmp uge i32 %a, %b 57681ad6265SDimitry Andric // %cmp.frozen = freeze %cmp 57781ad6265SDimitry Andric // br i1 %cmp.frozen, label %select.true, label %select.false 57881ad6265SDimitry Andric // select.true: 57981ad6265SDimitry Andric // br label %select.end 58081ad6265SDimitry Andric // select.false: 58181ad6265SDimitry Andric // br label %select.end 58281ad6265SDimitry Andric // select.end: 58381ad6265SDimitry Andric // %sel = phi i32 [ %c, %select.true ], [ %d, %select.false ] 58481ad6265SDimitry Andric // 58581ad6265SDimitry Andric // %cmp should be frozen, otherwise it may introduce undefined behavior. 58681ad6265SDimitry Andric // In addition, we may sink instructions that produce %c or %d into the 58781ad6265SDimitry Andric // destination(s) of the new branch. 58881ad6265SDimitry Andric // If the true or false blocks do not contain a sunken instruction, that 58981ad6265SDimitry Andric // block and its branch may be optimized away. In that case, one side of the 59081ad6265SDimitry Andric // first branch will point directly to select.end, and the corresponding PHI 59181ad6265SDimitry Andric // predecessor block will be the start block. 59281ad6265SDimitry Andric 59381ad6265SDimitry Andric // Find all the instructions that can be soundly sunk to the true/false 59481ad6265SDimitry Andric // blocks. These are instructions that are computed solely for producing the 59581ad6265SDimitry Andric // operands of the select instructions in the group and can be sunk without 59681ad6265SDimitry Andric // breaking the semantics of the LLVM IR (e.g., cannot sink instructions 59781ad6265SDimitry Andric // with side effects). 59881ad6265SDimitry Andric SmallVector<std::stack<Instruction *>, 2> TrueSlices, FalseSlices; 59981ad6265SDimitry Andric typedef std::stack<Instruction *>::size_type StackSizeType; 60081ad6265SDimitry Andric StackSizeType maxTrueSliceLen = 0, maxFalseSliceLen = 0; 6017a6dacacSDimitry Andric for (SelectLike SI : ASI) { 60281ad6265SDimitry Andric // For each select, compute the sinkable dependence chains of the true and 60381ad6265SDimitry Andric // false operands. 6047a6dacacSDimitry Andric if (auto *TI = dyn_cast_or_null<Instruction>(SI.getTrueValue())) { 60581ad6265SDimitry Andric std::stack<Instruction *> TrueSlice; 6067a6dacacSDimitry Andric getExclBackwardsSlice(TI, TrueSlice, SI.getI(), true); 60781ad6265SDimitry Andric maxTrueSliceLen = std::max(maxTrueSliceLen, TrueSlice.size()); 60881ad6265SDimitry Andric TrueSlices.push_back(TrueSlice); 60981ad6265SDimitry Andric } 6107a6dacacSDimitry Andric if (auto *FI = dyn_cast_or_null<Instruction>(SI.getFalseValue())) { 6117a6dacacSDimitry Andric if (isa<SelectInst>(SI.getI()) || !FI->hasOneUse()) { 61281ad6265SDimitry Andric std::stack<Instruction *> FalseSlice; 6137a6dacacSDimitry Andric getExclBackwardsSlice(FI, FalseSlice, SI.getI(), true); 61481ad6265SDimitry Andric maxFalseSliceLen = std::max(maxFalseSliceLen, FalseSlice.size()); 61581ad6265SDimitry Andric FalseSlices.push_back(FalseSlice); 61681ad6265SDimitry Andric } 61781ad6265SDimitry Andric } 6187a6dacacSDimitry Andric } 61981ad6265SDimitry Andric // In the case of multiple select instructions in the same group, the order 62081ad6265SDimitry Andric // of non-dependent instructions (instructions of different dependence 62181ad6265SDimitry Andric // slices) in the true/false blocks appears to affect performance. 62281ad6265SDimitry Andric // Interleaving the slices seems to experimentally be the optimal approach. 62381ad6265SDimitry Andric // This interleaving scheduling allows for more ILP (with a natural downside 62481ad6265SDimitry Andric // of increasing a bit register pressure) compared to a simple ordering of 62581ad6265SDimitry Andric // one whole chain after another. One would expect that this ordering would 62681ad6265SDimitry Andric // not matter since the scheduling in the backend of the compiler would 62781ad6265SDimitry Andric // take care of it, but apparently the scheduler fails to deliver optimal 62881ad6265SDimitry Andric // ILP with a naive ordering here. 62981ad6265SDimitry Andric SmallVector<Instruction *, 2> TrueSlicesInterleaved, FalseSlicesInterleaved; 63081ad6265SDimitry Andric for (StackSizeType IS = 0; IS < maxTrueSliceLen; ++IS) { 63181ad6265SDimitry Andric for (auto &S : TrueSlices) { 63281ad6265SDimitry Andric if (!S.empty()) { 63381ad6265SDimitry Andric TrueSlicesInterleaved.push_back(S.top()); 63481ad6265SDimitry Andric S.pop(); 63581ad6265SDimitry Andric } 63681ad6265SDimitry Andric } 63781ad6265SDimitry Andric } 63881ad6265SDimitry Andric for (StackSizeType IS = 0; IS < maxFalseSliceLen; ++IS) { 63981ad6265SDimitry Andric for (auto &S : FalseSlices) { 64081ad6265SDimitry Andric if (!S.empty()) { 64181ad6265SDimitry Andric FalseSlicesInterleaved.push_back(S.top()); 64281ad6265SDimitry Andric S.pop(); 64381ad6265SDimitry Andric } 64481ad6265SDimitry Andric } 64581ad6265SDimitry Andric } 64681ad6265SDimitry Andric 64781ad6265SDimitry Andric // We split the block containing the select(s) into two blocks. 6487a6dacacSDimitry Andric SelectLike SI = ASI.front(); 6497a6dacacSDimitry Andric SelectLike LastSI = ASI.back(); 6507a6dacacSDimitry Andric BasicBlock *StartBlock = SI.getI()->getParent(); 6517a6dacacSDimitry Andric BasicBlock::iterator SplitPt = ++(BasicBlock::iterator(LastSI.getI())); 652*0fca6ea1SDimitry Andric // With RemoveDIs turned off, SplitPt can be a dbg.* intrinsic. With 653*0fca6ea1SDimitry Andric // RemoveDIs turned on, SplitPt would instead point to the next 654*0fca6ea1SDimitry Andric // instruction. To match existing dbg.* intrinsic behaviour with RemoveDIs, 655*0fca6ea1SDimitry Andric // tell splitBasicBlock that we want to include any DbgVariableRecords 656*0fca6ea1SDimitry Andric // attached to SplitPt in the splice. 657*0fca6ea1SDimitry Andric SplitPt.setHeadBit(true); 65881ad6265SDimitry Andric BasicBlock *EndBlock = StartBlock->splitBasicBlock(SplitPt, "select.end"); 6595f757f3fSDimitry Andric BFI->setBlockFreq(EndBlock, BFI->getBlockFreq(StartBlock)); 66081ad6265SDimitry Andric // Delete the unconditional branch that was just created by the split. 66181ad6265SDimitry Andric StartBlock->getTerminator()->eraseFromParent(); 66281ad6265SDimitry Andric 663*0fca6ea1SDimitry Andric // Move any debug/pseudo instructions and not's that were in-between the 664*0fca6ea1SDimitry Andric // select group to the newly-created end block. 665*0fca6ea1SDimitry Andric SmallVector<Instruction *, 2> SinkInstrs; 6667a6dacacSDimitry Andric auto DIt = SI.getI()->getIterator(); 6677a6dacacSDimitry Andric while (&*DIt != LastSI.getI()) { 66881ad6265SDimitry Andric if (DIt->isDebugOrPseudoInst()) 669*0fca6ea1SDimitry Andric SinkInstrs.push_back(&*DIt); 670*0fca6ea1SDimitry Andric if (match(&*DIt, m_Not(m_Specific(SI.getCondition())))) 671*0fca6ea1SDimitry Andric SinkInstrs.push_back(&*DIt); 67281ad6265SDimitry Andric DIt++; 67381ad6265SDimitry Andric } 674*0fca6ea1SDimitry Andric for (auto *DI : SinkInstrs) 6755f757f3fSDimitry Andric DI->moveBeforePreserving(&*EndBlock->getFirstInsertionPt()); 67681ad6265SDimitry Andric 677*0fca6ea1SDimitry Andric // Duplicate implementation for DbgRecords, the non-instruction debug-info 678*0fca6ea1SDimitry Andric // format. Helper lambda for moving DbgRecords to the end block. 679*0fca6ea1SDimitry Andric auto TransferDbgRecords = [&](Instruction &I) { 680*0fca6ea1SDimitry Andric for (auto &DbgRecord : 681*0fca6ea1SDimitry Andric llvm::make_early_inc_range(I.getDbgRecordRange())) { 682*0fca6ea1SDimitry Andric DbgRecord.removeFromParent(); 683*0fca6ea1SDimitry Andric EndBlock->insertDbgRecordBefore(&DbgRecord, 6847a6dacacSDimitry Andric EndBlock->getFirstInsertionPt()); 6857a6dacacSDimitry Andric } 6867a6dacacSDimitry Andric }; 6877a6dacacSDimitry Andric 6887a6dacacSDimitry Andric // Iterate over all instructions in between SI and LastSI, not including 6897a6dacacSDimitry Andric // SI itself. These are all the variable assignments that happen "in the 6907a6dacacSDimitry Andric // middle" of the select group. 6917a6dacacSDimitry Andric auto R = make_range(std::next(SI.getI()->getIterator()), 6927a6dacacSDimitry Andric std::next(LastSI.getI()->getIterator())); 693*0fca6ea1SDimitry Andric llvm::for_each(R, TransferDbgRecords); 6947a6dacacSDimitry Andric 69581ad6265SDimitry Andric // These are the new basic blocks for the conditional branch. 69681ad6265SDimitry Andric // At least one will become an actual new basic block. 69781ad6265SDimitry Andric BasicBlock *TrueBlock = nullptr, *FalseBlock = nullptr; 69881ad6265SDimitry Andric BranchInst *TrueBranch = nullptr, *FalseBranch = nullptr; 69981ad6265SDimitry Andric if (!TrueSlicesInterleaved.empty()) { 7007a6dacacSDimitry Andric TrueBlock = BasicBlock::Create(EndBlock->getContext(), "select.true.sink", 70181ad6265SDimitry Andric EndBlock->getParent(), EndBlock); 70281ad6265SDimitry Andric TrueBranch = BranchInst::Create(EndBlock, TrueBlock); 7037a6dacacSDimitry Andric TrueBranch->setDebugLoc(LastSI.getI()->getDebugLoc()); 70481ad6265SDimitry Andric for (Instruction *TrueInst : TrueSlicesInterleaved) 70581ad6265SDimitry Andric TrueInst->moveBefore(TrueBranch); 70681ad6265SDimitry Andric } 70781ad6265SDimitry Andric if (!FalseSlicesInterleaved.empty()) { 7087a6dacacSDimitry Andric FalseBlock = 7097a6dacacSDimitry Andric BasicBlock::Create(EndBlock->getContext(), "select.false.sink", 71081ad6265SDimitry Andric EndBlock->getParent(), EndBlock); 71181ad6265SDimitry Andric FalseBranch = BranchInst::Create(EndBlock, FalseBlock); 7127a6dacacSDimitry Andric FalseBranch->setDebugLoc(LastSI.getI()->getDebugLoc()); 71381ad6265SDimitry Andric for (Instruction *FalseInst : FalseSlicesInterleaved) 71481ad6265SDimitry Andric FalseInst->moveBefore(FalseBranch); 71581ad6265SDimitry Andric } 71681ad6265SDimitry Andric // If there was nothing to sink, then arbitrarily choose the 'false' side 71781ad6265SDimitry Andric // for a new input value to the PHI. 71881ad6265SDimitry Andric if (TrueBlock == FalseBlock) { 71981ad6265SDimitry Andric assert(TrueBlock == nullptr && 72081ad6265SDimitry Andric "Unexpected basic block transform while optimizing select"); 72181ad6265SDimitry Andric 7227a6dacacSDimitry Andric FalseBlock = BasicBlock::Create(StartBlock->getContext(), "select.false", 72381ad6265SDimitry Andric EndBlock->getParent(), EndBlock); 72481ad6265SDimitry Andric auto *FalseBranch = BranchInst::Create(EndBlock, FalseBlock); 7257a6dacacSDimitry Andric FalseBranch->setDebugLoc(SI.getI()->getDebugLoc()); 72681ad6265SDimitry Andric } 72781ad6265SDimitry Andric 72881ad6265SDimitry Andric // Insert the real conditional branch based on the original condition. 72981ad6265SDimitry Andric // If we did not create a new block for one of the 'true' or 'false' paths 73081ad6265SDimitry Andric // of the condition, it means that side of the branch goes to the end block 73181ad6265SDimitry Andric // directly and the path originates from the start block from the point of 73281ad6265SDimitry Andric // view of the new PHI. 73381ad6265SDimitry Andric BasicBlock *TT, *FT; 73481ad6265SDimitry Andric if (TrueBlock == nullptr) { 73581ad6265SDimitry Andric TT = EndBlock; 73681ad6265SDimitry Andric FT = FalseBlock; 73781ad6265SDimitry Andric TrueBlock = StartBlock; 73881ad6265SDimitry Andric } else if (FalseBlock == nullptr) { 73981ad6265SDimitry Andric TT = TrueBlock; 74081ad6265SDimitry Andric FT = EndBlock; 74181ad6265SDimitry Andric FalseBlock = StartBlock; 74281ad6265SDimitry Andric } else { 74381ad6265SDimitry Andric TT = TrueBlock; 74481ad6265SDimitry Andric FT = FalseBlock; 74581ad6265SDimitry Andric } 7467a6dacacSDimitry Andric IRBuilder<> IB(SI.getI()); 7477a6dacacSDimitry Andric auto *CondFr = IB.CreateFreeze(SI.getCondition(), 7487a6dacacSDimitry Andric SI.getCondition()->getName() + ".frozen"); 74981ad6265SDimitry Andric 75081ad6265SDimitry Andric SmallPtrSet<const Instruction *, 2> INS; 7517a6dacacSDimitry Andric for (auto SI : ASI) 7527a6dacacSDimitry Andric INS.insert(SI.getI()); 7537a6dacacSDimitry Andric 75481ad6265SDimitry Andric // Use reverse iterator because later select may use the value of the 75581ad6265SDimitry Andric // earlier select, and we need to propagate value through earlier select 75681ad6265SDimitry Andric // to get the PHI operand. 75781ad6265SDimitry Andric for (auto It = ASI.rbegin(); It != ASI.rend(); ++It) { 7587a6dacacSDimitry Andric SelectLike SI = *It; 75981ad6265SDimitry Andric // The select itself is replaced with a PHI Node. 7607a6dacacSDimitry Andric PHINode *PN = PHINode::Create(SI.getType(), 2, ""); 7615f757f3fSDimitry Andric PN->insertBefore(EndBlock->begin()); 7627a6dacacSDimitry Andric PN->takeName(SI.getI()); 7637a6dacacSDimitry Andric PN->addIncoming(getTrueOrFalseValue(SI, true, INS, IB), TrueBlock); 7647a6dacacSDimitry Andric PN->addIncoming(getTrueOrFalseValue(SI, false, INS, IB), FalseBlock); 7657a6dacacSDimitry Andric PN->setDebugLoc(SI.getI()->getDebugLoc()); 7667a6dacacSDimitry Andric SI.getI()->replaceAllUsesWith(PN); 7677a6dacacSDimitry Andric INS.erase(SI.getI()); 76881ad6265SDimitry Andric ++NumSelectsConverted; 76981ad6265SDimitry Andric } 7707a6dacacSDimitry Andric IB.CreateCondBr(CondFr, TT, FT, SI.getI()); 7717a6dacacSDimitry Andric 7727a6dacacSDimitry Andric // Remove the old select instructions, now that they are not longer used. 7737a6dacacSDimitry Andric for (auto SI : ASI) 7747a6dacacSDimitry Andric SI.getI()->eraseFromParent(); 77581ad6265SDimitry Andric } 77681ad6265SDimitry Andric } 77781ad6265SDimitry Andric 7785f757f3fSDimitry Andric void SelectOptimizeImpl::collectSelectGroups(BasicBlock &BB, 77981ad6265SDimitry Andric SelectGroups &SIGroups) { 78081ad6265SDimitry Andric BasicBlock::iterator BBIt = BB.begin(); 78181ad6265SDimitry Andric while (BBIt != BB.end()) { 78281ad6265SDimitry Andric Instruction *I = &*BBIt++; 7837a6dacacSDimitry Andric if (SelectLike SI = SelectLike::match(I)) { 7847a6dacacSDimitry Andric if (!TTI->shouldTreatInstructionLikeSelect(I)) 785bdd1243dSDimitry Andric continue; 786bdd1243dSDimitry Andric 78781ad6265SDimitry Andric SelectGroup SIGroup; 78881ad6265SDimitry Andric SIGroup.push_back(SI); 78981ad6265SDimitry Andric while (BBIt != BB.end()) { 79081ad6265SDimitry Andric Instruction *NI = &*BBIt; 79181ad6265SDimitry Andric // Debug/pseudo instructions should be skipped and not prevent the 79281ad6265SDimitry Andric // formation of a select group. 7937a6dacacSDimitry Andric if (NI->isDebugOrPseudoInst()) { 7947a6dacacSDimitry Andric ++BBIt; 7957a6dacacSDimitry Andric continue; 79681ad6265SDimitry Andric } 797*0fca6ea1SDimitry Andric 798*0fca6ea1SDimitry Andric // Skip not(select(..)), if the not is part of the same select group 799*0fca6ea1SDimitry Andric if (match(NI, m_Not(m_Specific(SI.getCondition())))) { 800*0fca6ea1SDimitry Andric ++BBIt; 801*0fca6ea1SDimitry Andric continue; 802*0fca6ea1SDimitry Andric } 803*0fca6ea1SDimitry Andric 8047a6dacacSDimitry Andric // We only allow selects in the same group, not other select-like 8057a6dacacSDimitry Andric // instructions. 8067a6dacacSDimitry Andric if (!isa<SelectInst>(NI)) 8077a6dacacSDimitry Andric break; 8087a6dacacSDimitry Andric 8097a6dacacSDimitry Andric SelectLike NSI = SelectLike::match(NI); 8107a6dacacSDimitry Andric if (NSI && SI.getCondition() == NSI.getCondition()) { 8117a6dacacSDimitry Andric SIGroup.push_back(NSI); 812*0fca6ea1SDimitry Andric } else if (NSI && match(NSI.getCondition(), 813*0fca6ea1SDimitry Andric m_Not(m_Specific(SI.getCondition())))) { 814*0fca6ea1SDimitry Andric NSI.setInverted(); 815*0fca6ea1SDimitry Andric SIGroup.push_back(NSI); 8167a6dacacSDimitry Andric } else 8177a6dacacSDimitry Andric break; 81881ad6265SDimitry Andric ++BBIt; 81981ad6265SDimitry Andric } 82081ad6265SDimitry Andric 82181ad6265SDimitry Andric // If the select type is not supported, no point optimizing it. 82281ad6265SDimitry Andric // Instruction selection will take care of it. 82381ad6265SDimitry Andric if (!isSelectKindSupported(SI)) 82481ad6265SDimitry Andric continue; 82581ad6265SDimitry Andric 826*0fca6ea1SDimitry Andric LLVM_DEBUG({ 827*0fca6ea1SDimitry Andric dbgs() << "New Select group with\n"; 828*0fca6ea1SDimitry Andric for (auto SI : SIGroup) 829*0fca6ea1SDimitry Andric dbgs() << " " << *SI.getI() << "\n"; 830*0fca6ea1SDimitry Andric }); 831*0fca6ea1SDimitry Andric 83281ad6265SDimitry Andric SIGroups.push_back(SIGroup); 83381ad6265SDimitry Andric } 83481ad6265SDimitry Andric } 83581ad6265SDimitry Andric } 83681ad6265SDimitry Andric 8375f757f3fSDimitry Andric void SelectOptimizeImpl::findProfitableSIGroupsBase( 8385f757f3fSDimitry Andric SelectGroups &SIGroups, SelectGroups &ProfSIGroups) { 83981ad6265SDimitry Andric for (SelectGroup &ASI : SIGroups) { 84081ad6265SDimitry Andric ++NumSelectOptAnalyzed; 84181ad6265SDimitry Andric if (isConvertToBranchProfitableBase(ASI)) 84281ad6265SDimitry Andric ProfSIGroups.push_back(ASI); 84381ad6265SDimitry Andric } 84481ad6265SDimitry Andric } 84581ad6265SDimitry Andric 846bdd1243dSDimitry Andric static void EmitAndPrintRemark(OptimizationRemarkEmitter *ORE, 847bdd1243dSDimitry Andric DiagnosticInfoOptimizationBase &Rem) { 848bdd1243dSDimitry Andric LLVM_DEBUG(dbgs() << Rem.getMsg() << "\n"); 849bdd1243dSDimitry Andric ORE->emit(Rem); 850bdd1243dSDimitry Andric } 851bdd1243dSDimitry Andric 8525f757f3fSDimitry Andric void SelectOptimizeImpl::findProfitableSIGroupsInnerLoops( 85381ad6265SDimitry Andric const Loop *L, SelectGroups &SIGroups, SelectGroups &ProfSIGroups) { 85481ad6265SDimitry Andric NumSelectOptAnalyzed += SIGroups.size(); 85581ad6265SDimitry Andric // For each select group in an inner-most loop, 85681ad6265SDimitry Andric // a branch is more preferable than a select/conditional-move if: 85781ad6265SDimitry Andric // i) conversion to branches for all the select groups of the loop satisfies 85881ad6265SDimitry Andric // loop-level heuristics including reducing the loop's critical path by 8595f757f3fSDimitry Andric // some threshold (see SelectOptimizeImpl::checkLoopHeuristics); and 86081ad6265SDimitry Andric // ii) the total cost of the select group is cheaper with a branch compared 86181ad6265SDimitry Andric // to its predicated version. The cost is in terms of latency and the cost 86281ad6265SDimitry Andric // of a select group is the cost of its most expensive select instruction 86381ad6265SDimitry Andric // (assuming infinite resources and thus fully leveraging available ILP). 86481ad6265SDimitry Andric 86581ad6265SDimitry Andric DenseMap<const Instruction *, CostInfo> InstCostMap; 86681ad6265SDimitry Andric CostInfo LoopCost[2] = {{Scaled64::getZero(), Scaled64::getZero()}, 86781ad6265SDimitry Andric {Scaled64::getZero(), Scaled64::getZero()}}; 86881ad6265SDimitry Andric if (!computeLoopCosts(L, SIGroups, InstCostMap, LoopCost) || 86981ad6265SDimitry Andric !checkLoopHeuristics(L, LoopCost)) { 87081ad6265SDimitry Andric return; 87181ad6265SDimitry Andric } 87281ad6265SDimitry Andric 87381ad6265SDimitry Andric for (SelectGroup &ASI : SIGroups) { 87481ad6265SDimitry Andric // Assuming infinite resources, the cost of a group of instructions is the 87581ad6265SDimitry Andric // cost of the most expensive instruction of the group. 87681ad6265SDimitry Andric Scaled64 SelectCost = Scaled64::getZero(), BranchCost = Scaled64::getZero(); 8777a6dacacSDimitry Andric for (SelectLike SI : ASI) { 8787a6dacacSDimitry Andric SelectCost = std::max(SelectCost, InstCostMap[SI.getI()].PredCost); 8797a6dacacSDimitry Andric BranchCost = std::max(BranchCost, InstCostMap[SI.getI()].NonPredCost); 88081ad6265SDimitry Andric } 88181ad6265SDimitry Andric if (BranchCost < SelectCost) { 8827a6dacacSDimitry Andric OptimizationRemark OR(DEBUG_TYPE, "SelectOpti", ASI.front().getI()); 88381ad6265SDimitry Andric OR << "Profitable to convert to branch (loop analysis). BranchCost=" 88481ad6265SDimitry Andric << BranchCost.toString() << ", SelectCost=" << SelectCost.toString() 88581ad6265SDimitry Andric << ". "; 886bdd1243dSDimitry Andric EmitAndPrintRemark(ORE, OR); 88781ad6265SDimitry Andric ++NumSelectConvertedLoop; 88881ad6265SDimitry Andric ProfSIGroups.push_back(ASI); 88981ad6265SDimitry Andric } else { 8907a6dacacSDimitry Andric OptimizationRemarkMissed ORmiss(DEBUG_TYPE, "SelectOpti", 8917a6dacacSDimitry Andric ASI.front().getI()); 89281ad6265SDimitry Andric ORmiss << "Select is more profitable (loop analysis). BranchCost=" 89381ad6265SDimitry Andric << BranchCost.toString() 89481ad6265SDimitry Andric << ", SelectCost=" << SelectCost.toString() << ". "; 895bdd1243dSDimitry Andric EmitAndPrintRemark(ORE, ORmiss); 89681ad6265SDimitry Andric } 89781ad6265SDimitry Andric } 89881ad6265SDimitry Andric } 89981ad6265SDimitry Andric 9005f757f3fSDimitry Andric bool SelectOptimizeImpl::isConvertToBranchProfitableBase( 9017a6dacacSDimitry Andric const SelectGroup &ASI) { 9027a6dacacSDimitry Andric SelectLike SI = ASI.front(); 903*0fca6ea1SDimitry Andric LLVM_DEBUG(dbgs() << "Analyzing select group containing " << *SI.getI() 9047a6dacacSDimitry Andric << "\n"); 9057a6dacacSDimitry Andric OptimizationRemark OR(DEBUG_TYPE, "SelectOpti", SI.getI()); 9067a6dacacSDimitry Andric OptimizationRemarkMissed ORmiss(DEBUG_TYPE, "SelectOpti", SI.getI()); 90781ad6265SDimitry Andric 90881ad6265SDimitry Andric // Skip cold basic blocks. Better to optimize for size for cold blocks. 9097a6dacacSDimitry Andric if (PSI->isColdBlock(SI.getI()->getParent(), BFI)) { 91081ad6265SDimitry Andric ++NumSelectColdBB; 91181ad6265SDimitry Andric ORmiss << "Not converted to branch because of cold basic block. "; 912bdd1243dSDimitry Andric EmitAndPrintRemark(ORE, ORmiss); 91381ad6265SDimitry Andric return false; 91481ad6265SDimitry Andric } 91581ad6265SDimitry Andric 91681ad6265SDimitry Andric // If unpredictable, branch form is less profitable. 9177a6dacacSDimitry Andric if (SI.getI()->getMetadata(LLVMContext::MD_unpredictable)) { 91881ad6265SDimitry Andric ++NumSelectUnPred; 91981ad6265SDimitry Andric ORmiss << "Not converted to branch because of unpredictable branch. "; 920bdd1243dSDimitry Andric EmitAndPrintRemark(ORE, ORmiss); 92181ad6265SDimitry Andric return false; 92281ad6265SDimitry Andric } 92381ad6265SDimitry Andric 92481ad6265SDimitry Andric // If highly predictable, branch form is more profitable, unless a 92581ad6265SDimitry Andric // predictable select is inexpensive in the target architecture. 92681ad6265SDimitry Andric if (isSelectHighlyPredictable(SI) && TLI->isPredictableSelectExpensive()) { 92781ad6265SDimitry Andric ++NumSelectConvertedHighPred; 92881ad6265SDimitry Andric OR << "Converted to branch because of highly predictable branch. "; 929bdd1243dSDimitry Andric EmitAndPrintRemark(ORE, OR); 93081ad6265SDimitry Andric return true; 93181ad6265SDimitry Andric } 93281ad6265SDimitry Andric 93381ad6265SDimitry Andric // Look for expensive instructions in the cold operand's (if any) dependence 93481ad6265SDimitry Andric // slice of any of the selects in the group. 93581ad6265SDimitry Andric if (hasExpensiveColdOperand(ASI)) { 93681ad6265SDimitry Andric ++NumSelectConvertedExpColdOperand; 93781ad6265SDimitry Andric OR << "Converted to branch because of expensive cold operand."; 938bdd1243dSDimitry Andric EmitAndPrintRemark(ORE, OR); 93981ad6265SDimitry Andric return true; 94081ad6265SDimitry Andric } 94181ad6265SDimitry Andric 94281ad6265SDimitry Andric ORmiss << "Not profitable to convert to branch (base heuristic)."; 943bdd1243dSDimitry Andric EmitAndPrintRemark(ORE, ORmiss); 94481ad6265SDimitry Andric return false; 94581ad6265SDimitry Andric } 94681ad6265SDimitry Andric 94781ad6265SDimitry Andric static InstructionCost divideNearest(InstructionCost Numerator, 94881ad6265SDimitry Andric uint64_t Denominator) { 94981ad6265SDimitry Andric return (Numerator + (Denominator / 2)) / Denominator; 95081ad6265SDimitry Andric } 95181ad6265SDimitry Andric 9527a6dacacSDimitry Andric static bool extractBranchWeights(const SelectOptimizeImpl::SelectLike SI, 9537a6dacacSDimitry Andric uint64_t &TrueVal, uint64_t &FalseVal) { 9547a6dacacSDimitry Andric if (isa<SelectInst>(SI.getI())) 9557a6dacacSDimitry Andric return extractBranchWeights(*SI.getI(), TrueVal, FalseVal); 9567a6dacacSDimitry Andric return false; 9577a6dacacSDimitry Andric } 9587a6dacacSDimitry Andric 9597a6dacacSDimitry Andric bool SelectOptimizeImpl::hasExpensiveColdOperand(const SelectGroup &ASI) { 96081ad6265SDimitry Andric bool ColdOperand = false; 96181ad6265SDimitry Andric uint64_t TrueWeight, FalseWeight, TotalWeight; 9627a6dacacSDimitry Andric if (extractBranchWeights(ASI.front(), TrueWeight, FalseWeight)) { 96381ad6265SDimitry Andric uint64_t MinWeight = std::min(TrueWeight, FalseWeight); 96481ad6265SDimitry Andric TotalWeight = TrueWeight + FalseWeight; 96581ad6265SDimitry Andric // Is there a path with frequency <ColdOperandThreshold% (default:20%) ? 96681ad6265SDimitry Andric ColdOperand = TotalWeight * ColdOperandThreshold > 100 * MinWeight; 96781ad6265SDimitry Andric } else if (PSI->hasProfileSummary()) { 9687a6dacacSDimitry Andric OptimizationRemarkMissed ORmiss(DEBUG_TYPE, "SelectOpti", 9697a6dacacSDimitry Andric ASI.front().getI()); 97081ad6265SDimitry Andric ORmiss << "Profile data available but missing branch-weights metadata for " 97181ad6265SDimitry Andric "select instruction. "; 972bdd1243dSDimitry Andric EmitAndPrintRemark(ORE, ORmiss); 97381ad6265SDimitry Andric } 97481ad6265SDimitry Andric if (!ColdOperand) 97581ad6265SDimitry Andric return false; 97681ad6265SDimitry Andric // Check if the cold path's dependence slice is expensive for any of the 97781ad6265SDimitry Andric // selects of the group. 9787a6dacacSDimitry Andric for (SelectLike SI : ASI) { 97981ad6265SDimitry Andric Instruction *ColdI = nullptr; 98081ad6265SDimitry Andric uint64_t HotWeight; 98181ad6265SDimitry Andric if (TrueWeight < FalseWeight) { 9827a6dacacSDimitry Andric ColdI = dyn_cast_or_null<Instruction>(SI.getTrueValue()); 98381ad6265SDimitry Andric HotWeight = FalseWeight; 98481ad6265SDimitry Andric } else { 9857a6dacacSDimitry Andric ColdI = dyn_cast_or_null<Instruction>(SI.getFalseValue()); 98681ad6265SDimitry Andric HotWeight = TrueWeight; 98781ad6265SDimitry Andric } 98881ad6265SDimitry Andric if (ColdI) { 98981ad6265SDimitry Andric std::stack<Instruction *> ColdSlice; 9907a6dacacSDimitry Andric getExclBackwardsSlice(ColdI, ColdSlice, SI.getI()); 99181ad6265SDimitry Andric InstructionCost SliceCost = 0; 99281ad6265SDimitry Andric while (!ColdSlice.empty()) { 99381ad6265SDimitry Andric SliceCost += TTI->getInstructionCost(ColdSlice.top(), 99481ad6265SDimitry Andric TargetTransformInfo::TCK_Latency); 99581ad6265SDimitry Andric ColdSlice.pop(); 99681ad6265SDimitry Andric } 99781ad6265SDimitry Andric // The colder the cold value operand of the select is the more expensive 99881ad6265SDimitry Andric // the cmov becomes for computing the cold value operand every time. Thus, 99981ad6265SDimitry Andric // the colder the cold operand is the more its cost counts. 100081ad6265SDimitry Andric // Get nearest integer cost adjusted for coldness. 100181ad6265SDimitry Andric InstructionCost AdjSliceCost = 100281ad6265SDimitry Andric divideNearest(SliceCost * HotWeight, TotalWeight); 100381ad6265SDimitry Andric if (AdjSliceCost >= 100481ad6265SDimitry Andric ColdOperandMaxCostMultiplier * TargetTransformInfo::TCC_Expensive) 100581ad6265SDimitry Andric return true; 100681ad6265SDimitry Andric } 100781ad6265SDimitry Andric } 100881ad6265SDimitry Andric return false; 100981ad6265SDimitry Andric } 101081ad6265SDimitry Andric 1011bdd1243dSDimitry Andric // Check if it is safe to move LoadI next to the SI. 1012bdd1243dSDimitry Andric // Conservatively assume it is safe only if there is no instruction 1013bdd1243dSDimitry Andric // modifying memory in-between the load and the select instruction. 1014bdd1243dSDimitry Andric static bool isSafeToSinkLoad(Instruction *LoadI, Instruction *SI) { 1015bdd1243dSDimitry Andric // Assume loads from different basic blocks are unsafe to move. 1016bdd1243dSDimitry Andric if (LoadI->getParent() != SI->getParent()) 1017bdd1243dSDimitry Andric return false; 1018bdd1243dSDimitry Andric auto It = LoadI->getIterator(); 1019bdd1243dSDimitry Andric while (&*It != SI) { 1020bdd1243dSDimitry Andric if (It->mayWriteToMemory()) 1021bdd1243dSDimitry Andric return false; 1022bdd1243dSDimitry Andric It++; 1023bdd1243dSDimitry Andric } 1024bdd1243dSDimitry Andric return true; 1025bdd1243dSDimitry Andric } 1026bdd1243dSDimitry Andric 102781ad6265SDimitry Andric // For a given source instruction, collect its backwards dependence slice 102881ad6265SDimitry Andric // consisting of instructions exclusively computed for the purpose of producing 102981ad6265SDimitry Andric // the operands of the source instruction. As an approximation 103081ad6265SDimitry Andric // (sufficiently-accurate in practice), we populate this set with the 103181ad6265SDimitry Andric // instructions of the backwards dependence slice that only have one-use and 103281ad6265SDimitry Andric // form an one-use chain that leads to the source instruction. 10335f757f3fSDimitry Andric void SelectOptimizeImpl::getExclBackwardsSlice(Instruction *I, 103481ad6265SDimitry Andric std::stack<Instruction *> &Slice, 10355f757f3fSDimitry Andric Instruction *SI, 10365f757f3fSDimitry Andric bool ForSinking) { 103781ad6265SDimitry Andric SmallPtrSet<Instruction *, 2> Visited; 103881ad6265SDimitry Andric std::queue<Instruction *> Worklist; 103981ad6265SDimitry Andric Worklist.push(I); 104081ad6265SDimitry Andric while (!Worklist.empty()) { 104181ad6265SDimitry Andric Instruction *II = Worklist.front(); 104281ad6265SDimitry Andric Worklist.pop(); 104381ad6265SDimitry Andric 104481ad6265SDimitry Andric // Avoid cycles. 104581ad6265SDimitry Andric if (!Visited.insert(II).second) 104681ad6265SDimitry Andric continue; 104781ad6265SDimitry Andric 104881ad6265SDimitry Andric if (!II->hasOneUse()) 104981ad6265SDimitry Andric continue; 105081ad6265SDimitry Andric 105181ad6265SDimitry Andric // Cannot soundly sink instructions with side-effects. 105281ad6265SDimitry Andric // Terminator or phi instructions cannot be sunk. 105381ad6265SDimitry Andric // Avoid sinking other select instructions (should be handled separetely). 105481ad6265SDimitry Andric if (ForSinking && (II->isTerminator() || II->mayHaveSideEffects() || 105581ad6265SDimitry Andric isa<SelectInst>(II) || isa<PHINode>(II))) 105681ad6265SDimitry Andric continue; 105781ad6265SDimitry Andric 1058bdd1243dSDimitry Andric // Avoid sinking loads in order not to skip state-modifying instructions, 1059bdd1243dSDimitry Andric // that may alias with the loaded address. 1060bdd1243dSDimitry Andric // Only allow sinking of loads within the same basic block that are 1061bdd1243dSDimitry Andric // conservatively proven to be safe. 1062bdd1243dSDimitry Andric if (ForSinking && II->mayReadFromMemory() && !isSafeToSinkLoad(II, SI)) 1063bdd1243dSDimitry Andric continue; 1064bdd1243dSDimitry Andric 106581ad6265SDimitry Andric // Avoid considering instructions with less frequency than the source 106681ad6265SDimitry Andric // instruction (i.e., avoid colder code regions of the dependence slice). 106781ad6265SDimitry Andric if (BFI->getBlockFreq(II->getParent()) < BFI->getBlockFreq(I->getParent())) 106881ad6265SDimitry Andric continue; 106981ad6265SDimitry Andric 107081ad6265SDimitry Andric // Eligible one-use instruction added to the dependence slice. 107181ad6265SDimitry Andric Slice.push(II); 107281ad6265SDimitry Andric 107381ad6265SDimitry Andric // Explore all the operands of the current instruction to expand the slice. 1074*0fca6ea1SDimitry Andric for (Value *Op : II->operand_values()) 1075*0fca6ea1SDimitry Andric if (auto *OpI = dyn_cast<Instruction>(Op)) 107681ad6265SDimitry Andric Worklist.push(OpI); 107781ad6265SDimitry Andric } 107881ad6265SDimitry Andric } 107981ad6265SDimitry Andric 10807a6dacacSDimitry Andric bool SelectOptimizeImpl::isSelectHighlyPredictable(const SelectLike SI) { 108181ad6265SDimitry Andric uint64_t TrueWeight, FalseWeight; 10827a6dacacSDimitry Andric if (extractBranchWeights(SI, TrueWeight, FalseWeight)) { 108381ad6265SDimitry Andric uint64_t Max = std::max(TrueWeight, FalseWeight); 108481ad6265SDimitry Andric uint64_t Sum = TrueWeight + FalseWeight; 108581ad6265SDimitry Andric if (Sum != 0) { 108681ad6265SDimitry Andric auto Probability = BranchProbability::getBranchProbability(Max, Sum); 108781ad6265SDimitry Andric if (Probability > TTI->getPredictableBranchThreshold()) 108881ad6265SDimitry Andric return true; 108981ad6265SDimitry Andric } 109081ad6265SDimitry Andric } 109181ad6265SDimitry Andric return false; 109281ad6265SDimitry Andric } 109381ad6265SDimitry Andric 10945f757f3fSDimitry Andric bool SelectOptimizeImpl::checkLoopHeuristics(const Loop *L, 109581ad6265SDimitry Andric const CostInfo LoopCost[2]) { 109681ad6265SDimitry Andric // Loop-level checks to determine if a non-predicated version (with branches) 109781ad6265SDimitry Andric // of the loop is more profitable than its predicated version. 109881ad6265SDimitry Andric 109981ad6265SDimitry Andric if (DisableLoopLevelHeuristics) 110081ad6265SDimitry Andric return true; 110181ad6265SDimitry Andric 110281ad6265SDimitry Andric OptimizationRemarkMissed ORmissL(DEBUG_TYPE, "SelectOpti", 110381ad6265SDimitry Andric L->getHeader()->getFirstNonPHI()); 110481ad6265SDimitry Andric 110581ad6265SDimitry Andric if (LoopCost[0].NonPredCost > LoopCost[0].PredCost || 110681ad6265SDimitry Andric LoopCost[1].NonPredCost >= LoopCost[1].PredCost) { 110781ad6265SDimitry Andric ORmissL << "No select conversion in the loop due to no reduction of loop's " 110881ad6265SDimitry Andric "critical path. "; 1109bdd1243dSDimitry Andric EmitAndPrintRemark(ORE, ORmissL); 111081ad6265SDimitry Andric return false; 111181ad6265SDimitry Andric } 111281ad6265SDimitry Andric 111381ad6265SDimitry Andric Scaled64 Gain[2] = {LoopCost[0].PredCost - LoopCost[0].NonPredCost, 111481ad6265SDimitry Andric LoopCost[1].PredCost - LoopCost[1].NonPredCost}; 111581ad6265SDimitry Andric 111681ad6265SDimitry Andric // Profitably converting to branches need to reduce the loop's critical path 111781ad6265SDimitry Andric // by at least some threshold (absolute gain of GainCycleThreshold cycles and 111881ad6265SDimitry Andric // relative gain of 12.5%). 111981ad6265SDimitry Andric if (Gain[1] < Scaled64::get(GainCycleThreshold) || 112081ad6265SDimitry Andric Gain[1] * Scaled64::get(GainRelativeThreshold) < LoopCost[1].PredCost) { 112181ad6265SDimitry Andric Scaled64 RelativeGain = Scaled64::get(100) * Gain[1] / LoopCost[1].PredCost; 112281ad6265SDimitry Andric ORmissL << "No select conversion in the loop due to small reduction of " 112381ad6265SDimitry Andric "loop's critical path. Gain=" 112481ad6265SDimitry Andric << Gain[1].toString() 112581ad6265SDimitry Andric << ", RelativeGain=" << RelativeGain.toString() << "%. "; 1126bdd1243dSDimitry Andric EmitAndPrintRemark(ORE, ORmissL); 112781ad6265SDimitry Andric return false; 112881ad6265SDimitry Andric } 112981ad6265SDimitry Andric 113081ad6265SDimitry Andric // If the loop's critical path involves loop-carried dependences, the gradient 113181ad6265SDimitry Andric // of the gain needs to be at least GainGradientThreshold% (defaults to 25%). 113281ad6265SDimitry Andric // This check ensures that the latency reduction for the loop's critical path 113381ad6265SDimitry Andric // keeps decreasing with sufficient rate beyond the two analyzed loop 113481ad6265SDimitry Andric // iterations. 113581ad6265SDimitry Andric if (Gain[1] > Gain[0]) { 113681ad6265SDimitry Andric Scaled64 GradientGain = Scaled64::get(100) * (Gain[1] - Gain[0]) / 113781ad6265SDimitry Andric (LoopCost[1].PredCost - LoopCost[0].PredCost); 113881ad6265SDimitry Andric if (GradientGain < Scaled64::get(GainGradientThreshold)) { 113981ad6265SDimitry Andric ORmissL << "No select conversion in the loop due to small gradient gain. " 114081ad6265SDimitry Andric "GradientGain=" 114181ad6265SDimitry Andric << GradientGain.toString() << "%. "; 1142bdd1243dSDimitry Andric EmitAndPrintRemark(ORE, ORmissL); 114381ad6265SDimitry Andric return false; 114481ad6265SDimitry Andric } 114581ad6265SDimitry Andric } 114681ad6265SDimitry Andric // If the gain decreases it is not profitable to convert. 114781ad6265SDimitry Andric else if (Gain[1] < Gain[0]) { 114881ad6265SDimitry Andric ORmissL 114981ad6265SDimitry Andric << "No select conversion in the loop due to negative gradient gain. "; 1150bdd1243dSDimitry Andric EmitAndPrintRemark(ORE, ORmissL); 115181ad6265SDimitry Andric return false; 115281ad6265SDimitry Andric } 115381ad6265SDimitry Andric 115481ad6265SDimitry Andric // Non-predicated version of the loop is more profitable than its 115581ad6265SDimitry Andric // predicated version. 115681ad6265SDimitry Andric return true; 115781ad6265SDimitry Andric } 115881ad6265SDimitry Andric 115981ad6265SDimitry Andric // Computes instruction and loop-critical-path costs for both the predicated 116081ad6265SDimitry Andric // and non-predicated version of the given loop. 116181ad6265SDimitry Andric // Returns false if unable to compute these costs due to invalid cost of loop 116281ad6265SDimitry Andric // instruction(s). 11635f757f3fSDimitry Andric bool SelectOptimizeImpl::computeLoopCosts( 116481ad6265SDimitry Andric const Loop *L, const SelectGroups &SIGroups, 116581ad6265SDimitry Andric DenseMap<const Instruction *, CostInfo> &InstCostMap, CostInfo *LoopCost) { 1166bdd1243dSDimitry Andric LLVM_DEBUG(dbgs() << "Calculating Latency / IPredCost / INonPredCost of loop " 1167bdd1243dSDimitry Andric << L->getHeader()->getName() << "\n"); 11687a6dacacSDimitry Andric const auto &SImap = getSImap(SIGroups); 116981ad6265SDimitry Andric // Compute instruction and loop-critical-path costs across two iterations for 117081ad6265SDimitry Andric // both predicated and non-predicated version. 117181ad6265SDimitry Andric const unsigned Iterations = 2; 117281ad6265SDimitry Andric for (unsigned Iter = 0; Iter < Iterations; ++Iter) { 117381ad6265SDimitry Andric // Cost of the loop's critical path. 117481ad6265SDimitry Andric CostInfo &MaxCost = LoopCost[Iter]; 117581ad6265SDimitry Andric for (BasicBlock *BB : L->getBlocks()) { 117681ad6265SDimitry Andric for (const Instruction &I : *BB) { 117781ad6265SDimitry Andric if (I.isDebugOrPseudoInst()) 117881ad6265SDimitry Andric continue; 117981ad6265SDimitry Andric // Compute the predicated and non-predicated cost of the instruction. 118081ad6265SDimitry Andric Scaled64 IPredCost = Scaled64::getZero(), 118181ad6265SDimitry Andric INonPredCost = Scaled64::getZero(); 118281ad6265SDimitry Andric 118381ad6265SDimitry Andric // Assume infinite resources that allow to fully exploit the available 118481ad6265SDimitry Andric // instruction-level parallelism. 118581ad6265SDimitry Andric // InstCost = InstLatency + max(Op1Cost, Op2Cost, … OpNCost) 118681ad6265SDimitry Andric for (const Use &U : I.operands()) { 118781ad6265SDimitry Andric auto UI = dyn_cast<Instruction>(U.get()); 118881ad6265SDimitry Andric if (!UI) 118981ad6265SDimitry Andric continue; 119081ad6265SDimitry Andric if (InstCostMap.count(UI)) { 119181ad6265SDimitry Andric IPredCost = std::max(IPredCost, InstCostMap[UI].PredCost); 119281ad6265SDimitry Andric INonPredCost = std::max(INonPredCost, InstCostMap[UI].NonPredCost); 119381ad6265SDimitry Andric } 119481ad6265SDimitry Andric } 119581ad6265SDimitry Andric auto ILatency = computeInstCost(&I); 119681ad6265SDimitry Andric if (!ILatency) { 119781ad6265SDimitry Andric OptimizationRemarkMissed ORmissL(DEBUG_TYPE, "SelectOpti", &I); 119881ad6265SDimitry Andric ORmissL << "Invalid instruction cost preventing analysis and " 119981ad6265SDimitry Andric "optimization of the inner-most loop containing this " 120081ad6265SDimitry Andric "instruction. "; 1201bdd1243dSDimitry Andric EmitAndPrintRemark(ORE, ORmissL); 120281ad6265SDimitry Andric return false; 120381ad6265SDimitry Andric } 1204bdd1243dSDimitry Andric IPredCost += Scaled64::get(*ILatency); 1205bdd1243dSDimitry Andric INonPredCost += Scaled64::get(*ILatency); 120681ad6265SDimitry Andric 120781ad6265SDimitry Andric // For a select that can be converted to branch, 120881ad6265SDimitry Andric // compute its cost as a branch (non-predicated cost). 120981ad6265SDimitry Andric // 121081ad6265SDimitry Andric // BranchCost = PredictedPathCost + MispredictCost 121181ad6265SDimitry Andric // PredictedPathCost = TrueOpCost * TrueProb + FalseOpCost * FalseProb 121281ad6265SDimitry Andric // MispredictCost = max(MispredictPenalty, CondCost) * MispredictRate 12137a6dacacSDimitry Andric if (SImap.contains(&I)) { 12147a6dacacSDimitry Andric auto SI = SImap.at(&I); 12157a6dacacSDimitry Andric Scaled64 TrueOpCost = SI.getTrueOpCost(InstCostMap, TTI); 12167a6dacacSDimitry Andric Scaled64 FalseOpCost = SI.getFalseOpCost(InstCostMap, TTI); 121781ad6265SDimitry Andric Scaled64 PredictedPathCost = 121881ad6265SDimitry Andric getPredictedPathCost(TrueOpCost, FalseOpCost, SI); 121981ad6265SDimitry Andric 122081ad6265SDimitry Andric Scaled64 CondCost = Scaled64::getZero(); 12217a6dacacSDimitry Andric if (auto *CI = dyn_cast<Instruction>(SI.getCondition())) 122281ad6265SDimitry Andric if (InstCostMap.count(CI)) 122381ad6265SDimitry Andric CondCost = InstCostMap[CI].NonPredCost; 122481ad6265SDimitry Andric Scaled64 MispredictCost = getMispredictionCost(SI, CondCost); 122581ad6265SDimitry Andric 122681ad6265SDimitry Andric INonPredCost = PredictedPathCost + MispredictCost; 122781ad6265SDimitry Andric } 1228bdd1243dSDimitry Andric LLVM_DEBUG(dbgs() << " " << ILatency << "/" << IPredCost << "/" 1229bdd1243dSDimitry Andric << INonPredCost << " for " << I << "\n"); 123081ad6265SDimitry Andric 123181ad6265SDimitry Andric InstCostMap[&I] = {IPredCost, INonPredCost}; 123281ad6265SDimitry Andric MaxCost.PredCost = std::max(MaxCost.PredCost, IPredCost); 123381ad6265SDimitry Andric MaxCost.NonPredCost = std::max(MaxCost.NonPredCost, INonPredCost); 123481ad6265SDimitry Andric } 123581ad6265SDimitry Andric } 1236bdd1243dSDimitry Andric LLVM_DEBUG(dbgs() << "Iteration " << Iter + 1 1237bdd1243dSDimitry Andric << " MaxCost = " << MaxCost.PredCost << " " 1238bdd1243dSDimitry Andric << MaxCost.NonPredCost << "\n"); 123981ad6265SDimitry Andric } 124081ad6265SDimitry Andric return true; 124181ad6265SDimitry Andric } 124281ad6265SDimitry Andric 12437a6dacacSDimitry Andric SmallDenseMap<const Instruction *, SelectOptimizeImpl::SelectLike, 2> 12447a6dacacSDimitry Andric SelectOptimizeImpl::getSImap(const SelectGroups &SIGroups) { 12457a6dacacSDimitry Andric SmallDenseMap<const Instruction *, SelectLike, 2> SImap; 124681ad6265SDimitry Andric for (const SelectGroup &ASI : SIGroups) 12477a6dacacSDimitry Andric for (SelectLike SI : ASI) 12487a6dacacSDimitry Andric SImap.try_emplace(SI.getI(), SI); 12497a6dacacSDimitry Andric return SImap; 125081ad6265SDimitry Andric } 125181ad6265SDimitry Andric 12525f757f3fSDimitry Andric std::optional<uint64_t> 12535f757f3fSDimitry Andric SelectOptimizeImpl::computeInstCost(const Instruction *I) { 125481ad6265SDimitry Andric InstructionCost ICost = 125581ad6265SDimitry Andric TTI->getInstructionCost(I, TargetTransformInfo::TCK_Latency); 125681ad6265SDimitry Andric if (auto OC = ICost.getValue()) 1257bdd1243dSDimitry Andric return std::optional<uint64_t>(*OC); 1258bdd1243dSDimitry Andric return std::nullopt; 125981ad6265SDimitry Andric } 126081ad6265SDimitry Andric 126181ad6265SDimitry Andric ScaledNumber<uint64_t> 12627a6dacacSDimitry Andric SelectOptimizeImpl::getMispredictionCost(const SelectLike SI, 126381ad6265SDimitry Andric const Scaled64 CondCost) { 126481ad6265SDimitry Andric uint64_t MispredictPenalty = TSchedModel.getMCSchedModel()->MispredictPenalty; 126581ad6265SDimitry Andric 126681ad6265SDimitry Andric // Account for the default misprediction rate when using a branch 126781ad6265SDimitry Andric // (conservatively set to 25% by default). 126881ad6265SDimitry Andric uint64_t MispredictRate = MispredictDefaultRate; 126981ad6265SDimitry Andric // If the select condition is obviously predictable, then the misprediction 127081ad6265SDimitry Andric // rate is zero. 127181ad6265SDimitry Andric if (isSelectHighlyPredictable(SI)) 127281ad6265SDimitry Andric MispredictRate = 0; 127381ad6265SDimitry Andric 127481ad6265SDimitry Andric // CondCost is included to account for cases where the computation of the 127581ad6265SDimitry Andric // condition is part of a long dependence chain (potentially loop-carried) 127681ad6265SDimitry Andric // that would delay detection of a misprediction and increase its cost. 127781ad6265SDimitry Andric Scaled64 MispredictCost = 127881ad6265SDimitry Andric std::max(Scaled64::get(MispredictPenalty), CondCost) * 127981ad6265SDimitry Andric Scaled64::get(MispredictRate); 128081ad6265SDimitry Andric MispredictCost /= Scaled64::get(100); 128181ad6265SDimitry Andric 128281ad6265SDimitry Andric return MispredictCost; 128381ad6265SDimitry Andric } 128481ad6265SDimitry Andric 128581ad6265SDimitry Andric // Returns the cost of a branch when the prediction is correct. 128681ad6265SDimitry Andric // TrueCost * TrueProbability + FalseCost * FalseProbability. 128781ad6265SDimitry Andric ScaledNumber<uint64_t> 12885f757f3fSDimitry Andric SelectOptimizeImpl::getPredictedPathCost(Scaled64 TrueCost, Scaled64 FalseCost, 12897a6dacacSDimitry Andric const SelectLike SI) { 129081ad6265SDimitry Andric Scaled64 PredPathCost; 129181ad6265SDimitry Andric uint64_t TrueWeight, FalseWeight; 12927a6dacacSDimitry Andric if (extractBranchWeights(SI, TrueWeight, FalseWeight)) { 129381ad6265SDimitry Andric uint64_t SumWeight = TrueWeight + FalseWeight; 129481ad6265SDimitry Andric if (SumWeight != 0) { 129581ad6265SDimitry Andric PredPathCost = TrueCost * Scaled64::get(TrueWeight) + 129681ad6265SDimitry Andric FalseCost * Scaled64::get(FalseWeight); 129781ad6265SDimitry Andric PredPathCost /= Scaled64::get(SumWeight); 129881ad6265SDimitry Andric return PredPathCost; 129981ad6265SDimitry Andric } 130081ad6265SDimitry Andric } 130181ad6265SDimitry Andric // Without branch weight metadata, we assume 75% for the one path and 25% for 130281ad6265SDimitry Andric // the other, and pick the result with the biggest cost. 130381ad6265SDimitry Andric PredPathCost = std::max(TrueCost * Scaled64::get(3) + FalseCost, 130481ad6265SDimitry Andric FalseCost * Scaled64::get(3) + TrueCost); 130581ad6265SDimitry Andric PredPathCost /= Scaled64::get(4); 130681ad6265SDimitry Andric return PredPathCost; 130781ad6265SDimitry Andric } 130881ad6265SDimitry Andric 13097a6dacacSDimitry Andric bool SelectOptimizeImpl::isSelectKindSupported(const SelectLike SI) { 13107a6dacacSDimitry Andric bool VectorCond = !SI.getCondition()->getType()->isIntegerTy(1); 131181ad6265SDimitry Andric if (VectorCond) 131281ad6265SDimitry Andric return false; 131381ad6265SDimitry Andric TargetLowering::SelectSupportKind SelectKind; 13147a6dacacSDimitry Andric if (SI.getType()->isVectorTy()) 131581ad6265SDimitry Andric SelectKind = TargetLowering::ScalarCondVectorVal; 131681ad6265SDimitry Andric else 131781ad6265SDimitry Andric SelectKind = TargetLowering::ScalarValSelect; 131881ad6265SDimitry Andric return TLI->isSelectSupported(SelectKind); 131981ad6265SDimitry Andric } 1320