1ca7c307dSSotiris Apostolakis //===--- SelectOptimize.cpp - Convert select to branches if profitable ---===// 2ca7c307dSSotiris Apostolakis // 3ca7c307dSSotiris Apostolakis // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4ca7c307dSSotiris Apostolakis // See https://llvm.org/LICENSE.txt for license information. 5ca7c307dSSotiris Apostolakis // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6ca7c307dSSotiris Apostolakis // 7ca7c307dSSotiris Apostolakis //===----------------------------------------------------------------------===// 8ca7c307dSSotiris Apostolakis // 9ca7c307dSSotiris Apostolakis // This pass converts selects to conditional jumps when profitable. 10ca7c307dSSotiris Apostolakis // 11ca7c307dSSotiris Apostolakis //===----------------------------------------------------------------------===// 12ca7c307dSSotiris Apostolakis 13ce08c7eeSpaperchalice #include "llvm/CodeGen/SelectOptimize.h" 14e909c0ccSIgor Kirillov #include "llvm/ADT/SetVector.h" 1597c3ef5cSSotiris Apostolakis #include "llvm/ADT/SmallVector.h" 1697c3ef5cSSotiris Apostolakis #include "llvm/ADT/Statistic.h" 1797c3ef5cSSotiris Apostolakis #include "llvm/Analysis/BlockFrequencyInfo.h" 1897c3ef5cSSotiris Apostolakis #include "llvm/Analysis/BranchProbabilityInfo.h" 1997c3ef5cSSotiris Apostolakis #include "llvm/Analysis/LoopInfo.h" 208b42bc56SSotiris Apostolakis #include "llvm/Analysis/OptimizationRemarkEmitter.h" 218b42bc56SSotiris Apostolakis #include "llvm/Analysis/ProfileSummaryInfo.h" 228b42bc56SSotiris Apostolakis #include "llvm/Analysis/TargetTransformInfo.h" 23ca7c307dSSotiris Apostolakis #include "llvm/CodeGen/Passes.h" 2497c3ef5cSSotiris Apostolakis #include "llvm/CodeGen/TargetLowering.h" 2597c3ef5cSSotiris Apostolakis #include "llvm/CodeGen/TargetPassConfig.h" 2697c3ef5cSSotiris Apostolakis #include "llvm/CodeGen/TargetSchedule.h" 2797c3ef5cSSotiris Apostolakis #include "llvm/CodeGen/TargetSubtargetInfo.h" 2897c3ef5cSSotiris Apostolakis #include "llvm/IR/BasicBlock.h" 298b42bc56SSotiris Apostolakis #include "llvm/IR/Dominators.h" 30ca7c307dSSotiris Apostolakis #include "llvm/IR/Function.h" 3197c3ef5cSSotiris Apostolakis #include "llvm/IR/IRBuilder.h" 3297c3ef5cSSotiris Apostolakis #include "llvm/IR/Instruction.h" 33ac73c48eSElliot Goodrich #include "llvm/IR/PatternMatch.h" 34d434e40fSPaul Kirth #include "llvm/IR/ProfDataUtils.h" 35ca7c307dSSotiris Apostolakis #include "llvm/InitializePasses.h" 36ca7c307dSSotiris Apostolakis #include "llvm/Pass.h" 37d7ebb746SSotiris Apostolakis #include "llvm/Support/ScaledNumber.h" 3897c3ef5cSSotiris Apostolakis #include "llvm/Target/TargetMachine.h" 398b42bc56SSotiris Apostolakis #include "llvm/Transforms/Utils/SizeOpts.h" 408b42bc56SSotiris Apostolakis #include <algorithm> 418b42bc56SSotiris Apostolakis #include <queue> 428b42bc56SSotiris Apostolakis #include <stack> 43ca7c307dSSotiris Apostolakis 44ca7c307dSSotiris Apostolakis using namespace llvm; 45a2d68b4bSDavid Green using namespace llvm::PatternMatch; 46ca7c307dSSotiris Apostolakis 4797c3ef5cSSotiris Apostolakis #define DEBUG_TYPE "select-optimize" 4897c3ef5cSSotiris Apostolakis 498b42bc56SSotiris Apostolakis STATISTIC(NumSelectOptAnalyzed, 508b42bc56SSotiris Apostolakis "Number of select groups considered for conversion to branch"); 518b42bc56SSotiris Apostolakis STATISTIC(NumSelectConvertedExpColdOperand, 528b42bc56SSotiris Apostolakis "Number of select groups converted due to expensive cold operand"); 538b42bc56SSotiris Apostolakis STATISTIC(NumSelectConvertedHighPred, 548b42bc56SSotiris Apostolakis "Number of select groups converted due to high-predictability"); 558b42bc56SSotiris Apostolakis STATISTIC(NumSelectUnPred, 568b42bc56SSotiris Apostolakis "Number of select groups not converted due to unpredictability"); 578b42bc56SSotiris Apostolakis STATISTIC(NumSelectColdBB, 588b42bc56SSotiris Apostolakis "Number of select groups not converted due to cold basic block"); 59d7ebb746SSotiris Apostolakis STATISTIC(NumSelectConvertedLoop, 60d7ebb746SSotiris Apostolakis "Number of select groups converted due to loop-level analysis"); 6197c3ef5cSSotiris Apostolakis STATISTIC(NumSelectsConverted, "Number of selects converted"); 6297c3ef5cSSotiris Apostolakis 638b42bc56SSotiris Apostolakis static cl::opt<unsigned> ColdOperandThreshold( 648b42bc56SSotiris Apostolakis "cold-operand-threshold", 658b42bc56SSotiris Apostolakis cl::desc("Maximum frequency of path for an operand to be considered cold."), 668b42bc56SSotiris Apostolakis cl::init(20), cl::Hidden); 678b42bc56SSotiris Apostolakis 688b42bc56SSotiris Apostolakis static cl::opt<unsigned> ColdOperandMaxCostMultiplier( 698b42bc56SSotiris Apostolakis "cold-operand-max-cost-multiplier", 708b42bc56SSotiris Apostolakis cl::desc("Maximum cost multiplier of TCC_expensive for the dependence " 718b42bc56SSotiris Apostolakis "slice of a cold operand to be considered inexpensive."), 728b42bc56SSotiris Apostolakis cl::init(1), cl::Hidden); 738b42bc56SSotiris Apostolakis 74d7ebb746SSotiris Apostolakis static cl::opt<unsigned> 75d7ebb746SSotiris Apostolakis GainGradientThreshold("select-opti-loop-gradient-gain-threshold", 76d7ebb746SSotiris Apostolakis cl::desc("Gradient gain threshold (%)."), 77d7ebb746SSotiris Apostolakis cl::init(25), cl::Hidden); 78d7ebb746SSotiris Apostolakis 79d7ebb746SSotiris Apostolakis static cl::opt<unsigned> 80d7ebb746SSotiris Apostolakis GainCycleThreshold("select-opti-loop-cycle-gain-threshold", 81d7ebb746SSotiris Apostolakis cl::desc("Minimum gain per loop (in cycles) threshold."), 82d7ebb746SSotiris Apostolakis cl::init(4), cl::Hidden); 83d7ebb746SSotiris Apostolakis 84d7ebb746SSotiris Apostolakis static cl::opt<unsigned> GainRelativeThreshold( 85d7ebb746SSotiris Apostolakis "select-opti-loop-relative-gain-threshold", 86d7ebb746SSotiris Apostolakis cl::desc( 87d7ebb746SSotiris Apostolakis "Minimum relative gain per loop threshold (1/X). Defaults to 12.5%"), 88d7ebb746SSotiris Apostolakis cl::init(8), cl::Hidden); 89d7ebb746SSotiris Apostolakis 90d7ebb746SSotiris Apostolakis static cl::opt<unsigned> MispredictDefaultRate( 91d7ebb746SSotiris Apostolakis "mispredict-default-rate", cl::Hidden, cl::init(25), 92d7ebb746SSotiris Apostolakis cl::desc("Default mispredict rate (initialized to 25%).")); 93d7ebb746SSotiris Apostolakis 94d7ebb746SSotiris Apostolakis static cl::opt<bool> 95d7ebb746SSotiris Apostolakis DisableLoopLevelHeuristics("disable-loop-level-heuristics", cl::Hidden, 96d7ebb746SSotiris Apostolakis cl::init(false), 97d7ebb746SSotiris Apostolakis cl::desc("Disable loop-level heuristics.")); 98d7ebb746SSotiris Apostolakis 99ca7c307dSSotiris Apostolakis namespace { 100ca7c307dSSotiris Apostolakis 101ce08c7eeSpaperchalice class SelectOptimizeImpl { 10297c3ef5cSSotiris Apostolakis const TargetMachine *TM = nullptr; 1038bf7f86dSAkshay Khadse const TargetSubtargetInfo *TSI = nullptr; 10497c3ef5cSSotiris Apostolakis const TargetLowering *TLI = nullptr; 1058b42bc56SSotiris Apostolakis const TargetTransformInfo *TTI = nullptr; 1068bf7f86dSAkshay Khadse const LoopInfo *LI = nullptr; 107ce08c7eeSpaperchalice BlockFrequencyInfo *BFI; 1088bf7f86dSAkshay Khadse ProfileSummaryInfo *PSI = nullptr; 1098bf7f86dSAkshay Khadse OptimizationRemarkEmitter *ORE = nullptr; 110d7ebb746SSotiris Apostolakis TargetSchedModel TSchedModel; 11197c3ef5cSSotiris Apostolakis 112ca7c307dSSotiris Apostolakis public: 113ce08c7eeSpaperchalice SelectOptimizeImpl() = default; 114ce08c7eeSpaperchalice SelectOptimizeImpl(const TargetMachine *TM) : TM(TM){}; 115ce08c7eeSpaperchalice PreservedAnalyses run(Function &F, FunctionAnalysisManager &FAM); 116ce08c7eeSpaperchalice bool runOnFunction(Function &F, Pass &P); 11797c3ef5cSSotiris Apostolakis 118d7ebb746SSotiris Apostolakis using Scaled64 = ScaledNumber<uint64_t>; 119d7ebb746SSotiris Apostolakis 120d7ebb746SSotiris Apostolakis struct CostInfo { 121d7ebb746SSotiris Apostolakis /// Predicated cost (with selects as conditional moves). 122d7ebb746SSotiris Apostolakis Scaled64 PredCost; 123d7ebb746SSotiris Apostolakis /// Non-predicated cost (with selects converted to branches). 124d7ebb746SSotiris Apostolakis Scaled64 NonPredCost; 125d7ebb746SSotiris Apostolakis }; 126d7ebb746SSotiris Apostolakis 127a2d68b4bSDavid Green /// SelectLike is an abstraction over SelectInst and other operations that can 128a2d68b4bSDavid Green /// act like selects. For example Or(Zext(icmp), X) can be treated like 129a2d68b4bSDavid Green /// select(icmp, X|1, X). 130a2d68b4bSDavid Green class SelectLike { 1310a62a99aSDavid Green /// The select (/or) instruction. 132a2d68b4bSDavid Green Instruction *I; 1330a62a99aSDavid Green /// Whether this select is inverted, "not(cond), FalseVal, TrueVal", as 1340a62a99aSDavid Green /// opposed to the original condition. 1350a62a99aSDavid Green bool Inverted = false; 136a2d68b4bSDavid Green 137e874c8fcSIgor Kirillov /// The index of the operand that depends on condition. Only for select-like 138e874c8fcSIgor Kirillov /// instruction such as Or/Add. 139e874c8fcSIgor Kirillov unsigned CondIdx; 140e874c8fcSIgor Kirillov 141a2d68b4bSDavid Green public: 142e874c8fcSIgor Kirillov SelectLike(Instruction *I, bool Inverted = false, unsigned CondIdx = 0) 143e874c8fcSIgor Kirillov : I(I), Inverted(Inverted), CondIdx(CondIdx) {} 1440a62a99aSDavid Green 145a2d68b4bSDavid Green Instruction *getI() { return I; } 146a2d68b4bSDavid Green const Instruction *getI() const { return I; } 147a2d68b4bSDavid Green 148a2d68b4bSDavid Green Type *getType() const { return I->getType(); } 149a2d68b4bSDavid Green 150e874c8fcSIgor Kirillov unsigned getConditionOpIndex() { return CondIdx; }; 1510a62a99aSDavid Green 152a2d68b4bSDavid Green /// Return the true value for the SelectLike instruction. Note this may not 153a2d68b4bSDavid Green /// exist for all SelectLike instructions. For example, for `or(zext(c), x)` 154a2d68b4bSDavid Green /// the true value would be `or(x,1)`. As this value does not exist, nullptr 155a2d68b4bSDavid Green /// is returned. 1560a62a99aSDavid Green Value *getTrueValue(bool HonorInverts = true) const { 1570a62a99aSDavid Green if (Inverted && HonorInverts) 1580a62a99aSDavid Green return getFalseValue(/*HonorInverts=*/false); 159a2d68b4bSDavid Green if (auto *Sel = dyn_cast<SelectInst>(I)) 160a2d68b4bSDavid Green return Sel->getTrueValue(); 161a2d68b4bSDavid Green // Or(zext) case - The true value is Or(X), so return nullptr as the value 162a2d68b4bSDavid Green // does not yet exist. 163a2d68b4bSDavid Green if (isa<BinaryOperator>(I)) 164a2d68b4bSDavid Green return nullptr; 165a2d68b4bSDavid Green 166a2d68b4bSDavid Green llvm_unreachable("Unhandled case in getTrueValue"); 167a2d68b4bSDavid Green } 168a2d68b4bSDavid Green 169a2d68b4bSDavid Green /// Return the false value for the SelectLike instruction. For example the 170a2d68b4bSDavid Green /// getFalseValue of a select or `x` in `or(zext(c), x)` (which is 171a2d68b4bSDavid Green /// `select(c, x|1, x)`) 1720a62a99aSDavid Green Value *getFalseValue(bool HonorInverts = true) const { 1730a62a99aSDavid Green if (Inverted && HonorInverts) 1740a62a99aSDavid Green return getTrueValue(/*HonorInverts=*/false); 175a2d68b4bSDavid Green if (auto *Sel = dyn_cast<SelectInst>(I)) 176a2d68b4bSDavid Green return Sel->getFalseValue(); 177e874c8fcSIgor Kirillov // We are on the branch where the condition is zero, which means BinOp 178e874c8fcSIgor Kirillov // does not perform any computation, and we can simply return the operand 179e874c8fcSIgor Kirillov // that is not related to the condition 180e874c8fcSIgor Kirillov if (auto *BO = dyn_cast<BinaryOperator>(I)) 181e874c8fcSIgor Kirillov return BO->getOperand(1 - CondIdx); 182a2d68b4bSDavid Green 183a2d68b4bSDavid Green llvm_unreachable("Unhandled case in getFalseValue"); 184a2d68b4bSDavid Green } 185a2d68b4bSDavid Green 186e874c8fcSIgor Kirillov /// Return the NonPredCost cost of the op on \p isTrue branch, given the 187e874c8fcSIgor Kirillov /// costs in \p InstCostMap. This may need to be generated for select-like 188e874c8fcSIgor Kirillov /// instructions. 189e874c8fcSIgor Kirillov Scaled64 getOpCostOnBranch( 190e874c8fcSIgor Kirillov bool IsTrue, const DenseMap<const Instruction *, CostInfo> &InstCostMap, 191a2d68b4bSDavid Green const TargetTransformInfo *TTI) { 192e874c8fcSIgor Kirillov auto *V = IsTrue ? getTrueValue() : getFalseValue(); 193e874c8fcSIgor Kirillov if (V) { 194e874c8fcSIgor Kirillov if (auto *IV = dyn_cast<Instruction>(V)) { 195e874c8fcSIgor Kirillov auto It = InstCostMap.find(IV); 196186dc9a4SKazu Hirata return It != InstCostMap.end() ? It->second.NonPredCost 197a2d68b4bSDavid Green : Scaled64::getZero(); 198186dc9a4SKazu Hirata } 199e874c8fcSIgor Kirillov return Scaled64::getZero(); 200e874c8fcSIgor Kirillov } 201e874c8fcSIgor Kirillov // If getTrue(False)Value() return nullptr, it means we are dealing with 202e874c8fcSIgor Kirillov // select-like instructions on the branch where the actual computation is 203e874c8fcSIgor Kirillov // happening. In that case the cost is equal to the cost of computation + 204e874c8fcSIgor Kirillov // cost of non-dependant on condition operand 205e874c8fcSIgor Kirillov InstructionCost Cost = TTI->getArithmeticInstrCost( 206e874c8fcSIgor Kirillov getI()->getOpcode(), I->getType(), TargetTransformInfo::TCK_Latency, 207e874c8fcSIgor Kirillov {TargetTransformInfo::OK_AnyValue, TargetTransformInfo::OP_None}, 2084a7a27cbSIgor Kirillov {TTI::OK_UniformConstantValue, TTI::OP_PowerOf2}); 209e874c8fcSIgor Kirillov auto TotalCost = Scaled64::get(*Cost.getValue()); 210e874c8fcSIgor Kirillov if (auto *OpI = dyn_cast<Instruction>(I->getOperand(1 - CondIdx))) { 211e874c8fcSIgor Kirillov auto It = InstCostMap.find(OpI); 212e874c8fcSIgor Kirillov if (It != InstCostMap.end()) 213e874c8fcSIgor Kirillov TotalCost += It->second.NonPredCost; 2144a7a27cbSIgor Kirillov } 215e874c8fcSIgor Kirillov return TotalCost; 216a2d68b4bSDavid Green } 217a2d68b4bSDavid Green }; 218a2d68b4bSDavid Green 219a2d68b4bSDavid Green private: 220e874c8fcSIgor Kirillov // Select groups consist of consecutive select-like instructions with the same 221e874c8fcSIgor Kirillov // condition. Between select-likes could be any number of auxiliary 222e909c0ccSIgor Kirillov // instructions related to the condition like not, zext, ashr/lshr 223e874c8fcSIgor Kirillov struct SelectGroup { 224e874c8fcSIgor Kirillov Value *Condition; 225e874c8fcSIgor Kirillov SmallVector<SelectLike, 2> Selects; 226e874c8fcSIgor Kirillov }; 227a2d68b4bSDavid Green using SelectGroups = SmallVector<SelectGroup, 2>; 228a2d68b4bSDavid Green 2298b42bc56SSotiris Apostolakis // Converts select instructions of a function to conditional jumps when deemed 2308b42bc56SSotiris Apostolakis // profitable. Returns true if at least one select was converted. 23197c3ef5cSSotiris Apostolakis bool optimizeSelects(Function &F); 2328b42bc56SSotiris Apostolakis 2338b42bc56SSotiris Apostolakis // Heuristics for determining which select instructions can be profitably 2348b42bc56SSotiris Apostolakis // conveted to branches. Separate heuristics for selects in inner-most loops 2358b42bc56SSotiris Apostolakis // and the rest of code regions (base heuristics for non-inner-most loop 2368b42bc56SSotiris Apostolakis // regions). 2378b42bc56SSotiris Apostolakis void optimizeSelectsBase(Function &F, SelectGroups &ProfSIGroups); 2388b42bc56SSotiris Apostolakis void optimizeSelectsInnerLoops(Function &F, SelectGroups &ProfSIGroups); 2398b42bc56SSotiris Apostolakis 2408b42bc56SSotiris Apostolakis // Converts to branches the select groups that were deemed 2418b42bc56SSotiris Apostolakis // profitable-to-convert. 24297c3ef5cSSotiris Apostolakis void convertProfitableSIGroups(SelectGroups &ProfSIGroups); 2438b42bc56SSotiris Apostolakis 2448b42bc56SSotiris Apostolakis // Splits selects of a given basic block into select groups. 24597c3ef5cSSotiris Apostolakis void collectSelectGroups(BasicBlock &BB, SelectGroups &SIGroups); 2468b42bc56SSotiris Apostolakis 2478b42bc56SSotiris Apostolakis // Determines for which select groups it is profitable converting to branches 248d7ebb746SSotiris Apostolakis // (base and inner-most-loop heuristics). 2498b42bc56SSotiris Apostolakis void findProfitableSIGroupsBase(SelectGroups &SIGroups, 2508b42bc56SSotiris Apostolakis SelectGroups &ProfSIGroups); 251d7ebb746SSotiris Apostolakis void findProfitableSIGroupsInnerLoops(const Loop *L, SelectGroups &SIGroups, 252d7ebb746SSotiris Apostolakis SelectGroups &ProfSIGroups); 253d7ebb746SSotiris Apostolakis 2548b42bc56SSotiris Apostolakis // Determines if a select group should be converted to a branch (base 2558b42bc56SSotiris Apostolakis // heuristics). 256a2d68b4bSDavid Green bool isConvertToBranchProfitableBase(const SelectGroup &ASI); 2578b42bc56SSotiris Apostolakis 2588b42bc56SSotiris Apostolakis // Returns true if there are expensive instructions in the cold value 2598b42bc56SSotiris Apostolakis // operand's (if any) dependence slice of any of the selects of the given 2608b42bc56SSotiris Apostolakis // group. 261a2d68b4bSDavid Green bool hasExpensiveColdOperand(const SelectGroup &ASI); 2628b42bc56SSotiris Apostolakis 2638b42bc56SSotiris Apostolakis // For a given source instruction, collect its backwards dependence slice 2648b42bc56SSotiris Apostolakis // consisting of instructions exclusively computed for producing the operands 2658b42bc56SSotiris Apostolakis // of the source instruction. 26667be40dfSSotiris Apostolakis void getExclBackwardsSlice(Instruction *I, std::stack<Instruction *> &Slice, 267b827e7c6SSotiris Apostolakis Instruction *SI, bool ForSinking = false); 2688b42bc56SSotiris Apostolakis 2698b42bc56SSotiris Apostolakis // Returns true if the condition of the select is highly predictable. 270a2d68b4bSDavid Green bool isSelectHighlyPredictable(const SelectLike SI); 2718b42bc56SSotiris Apostolakis 272d7ebb746SSotiris Apostolakis // Loop-level checks to determine if a non-predicated version (with branches) 273d7ebb746SSotiris Apostolakis // of the given loop is more profitable than its predicated version. 274d7ebb746SSotiris Apostolakis bool checkLoopHeuristics(const Loop *L, const CostInfo LoopDepth[2]); 275d7ebb746SSotiris Apostolakis 276d7ebb746SSotiris Apostolakis // Computes instruction and loop-critical-path costs for both the predicated 277d7ebb746SSotiris Apostolakis // and non-predicated version of the given loop. 278d7ebb746SSotiris Apostolakis bool computeLoopCosts(const Loop *L, const SelectGroups &SIGroups, 279d7ebb746SSotiris Apostolakis DenseMap<const Instruction *, CostInfo> &InstCostMap, 280d7ebb746SSotiris Apostolakis CostInfo *LoopCost); 281d7ebb746SSotiris Apostolakis 282d7ebb746SSotiris Apostolakis // Returns a set of all the select instructions in the given select groups. 283a2d68b4bSDavid Green SmallDenseMap<const Instruction *, SelectLike, 2> 284a2d68b4bSDavid Green getSImap(const SelectGroups &SIGroups); 285d7ebb746SSotiris Apostolakis 286e874c8fcSIgor Kirillov // Returns a map from select-like instructions to the corresponding select 287e874c8fcSIgor Kirillov // group. 288e874c8fcSIgor Kirillov SmallDenseMap<const Instruction *, const SelectGroup *, 2> 289e874c8fcSIgor Kirillov getSGmap(const SelectGroups &SIGroups); 290e874c8fcSIgor Kirillov 291d7ebb746SSotiris Apostolakis // Returns the latency cost of a given instruction. 29267819a72SFangrui Song std::optional<uint64_t> computeInstCost(const Instruction *I); 293d7ebb746SSotiris Apostolakis 294d7ebb746SSotiris Apostolakis // Returns the misprediction cost of a given select when converted to branch. 295a2d68b4bSDavid Green Scaled64 getMispredictionCost(const SelectLike SI, const Scaled64 CondCost); 296d7ebb746SSotiris Apostolakis 297d7ebb746SSotiris Apostolakis // Returns the cost of a branch when the prediction is correct. 298d7ebb746SSotiris Apostolakis Scaled64 getPredictedPathCost(Scaled64 TrueCost, Scaled64 FalseCost, 299a2d68b4bSDavid Green const SelectLike SI); 300d7ebb746SSotiris Apostolakis 3018b42bc56SSotiris Apostolakis // Returns true if the target architecture supports lowering a given select. 302a2d68b4bSDavid Green bool isSelectKindSupported(const SelectLike SI); 303ca7c307dSSotiris Apostolakis }; 304ce08c7eeSpaperchalice 305ce08c7eeSpaperchalice class SelectOptimize : public FunctionPass { 306ce08c7eeSpaperchalice SelectOptimizeImpl Impl; 307ce08c7eeSpaperchalice 308ce08c7eeSpaperchalice public: 309ce08c7eeSpaperchalice static char ID; 310ce08c7eeSpaperchalice 311ce08c7eeSpaperchalice SelectOptimize() : FunctionPass(ID) { 312ce08c7eeSpaperchalice initializeSelectOptimizePass(*PassRegistry::getPassRegistry()); 313ce08c7eeSpaperchalice } 314ce08c7eeSpaperchalice 315ce08c7eeSpaperchalice bool runOnFunction(Function &F) override { 316ce08c7eeSpaperchalice return Impl.runOnFunction(F, *this); 317ce08c7eeSpaperchalice } 318ce08c7eeSpaperchalice 319ce08c7eeSpaperchalice void getAnalysisUsage(AnalysisUsage &AU) const override { 320ce08c7eeSpaperchalice AU.addRequired<ProfileSummaryInfoWrapperPass>(); 321ce08c7eeSpaperchalice AU.addRequired<TargetPassConfig>(); 322ce08c7eeSpaperchalice AU.addRequired<TargetTransformInfoWrapperPass>(); 323ce08c7eeSpaperchalice AU.addRequired<LoopInfoWrapperPass>(); 324ce08c7eeSpaperchalice AU.addRequired<BlockFrequencyInfoWrapperPass>(); 325ce08c7eeSpaperchalice AU.addRequired<OptimizationRemarkEmitterWrapperPass>(); 326ce08c7eeSpaperchalice } 327ce08c7eeSpaperchalice }; 328ce08c7eeSpaperchalice 329ca7c307dSSotiris Apostolakis } // namespace 330ca7c307dSSotiris Apostolakis 331ce08c7eeSpaperchalice PreservedAnalyses SelectOptimizePass::run(Function &F, 332ce08c7eeSpaperchalice FunctionAnalysisManager &FAM) { 333ce08c7eeSpaperchalice SelectOptimizeImpl Impl(TM); 334ce08c7eeSpaperchalice return Impl.run(F, FAM); 335ce08c7eeSpaperchalice } 336ce08c7eeSpaperchalice 337ca7c307dSSotiris Apostolakis char SelectOptimize::ID = 0; 33897c3ef5cSSotiris Apostolakis 33997c3ef5cSSotiris Apostolakis INITIALIZE_PASS_BEGIN(SelectOptimize, DEBUG_TYPE, "Optimize selects", false, 34097c3ef5cSSotiris Apostolakis false) 34197c3ef5cSSotiris Apostolakis INITIALIZE_PASS_DEPENDENCY(LoopInfoWrapperPass) 3428b42bc56SSotiris Apostolakis INITIALIZE_PASS_DEPENDENCY(ProfileSummaryInfoWrapperPass) 34397c3ef5cSSotiris Apostolakis INITIALIZE_PASS_DEPENDENCY(TargetPassConfig) 3448b42bc56SSotiris Apostolakis INITIALIZE_PASS_DEPENDENCY(TargetTransformInfoWrapperPass) 345ce08c7eeSpaperchalice INITIALIZE_PASS_DEPENDENCY(BlockFrequencyInfoWrapperPass) 3468b42bc56SSotiris Apostolakis INITIALIZE_PASS_DEPENDENCY(OptimizationRemarkEmitterWrapperPass) 34797c3ef5cSSotiris Apostolakis INITIALIZE_PASS_END(SelectOptimize, DEBUG_TYPE, "Optimize selects", false, 348ca7c307dSSotiris Apostolakis false) 349ca7c307dSSotiris Apostolakis 350ca7c307dSSotiris Apostolakis FunctionPass *llvm::createSelectOptimizePass() { return new SelectOptimize(); } 351ca7c307dSSotiris Apostolakis 352ce08c7eeSpaperchalice PreservedAnalyses SelectOptimizeImpl::run(Function &F, 353ce08c7eeSpaperchalice FunctionAnalysisManager &FAM) { 35497c3ef5cSSotiris Apostolakis TSI = TM->getSubtargetImpl(F); 35597c3ef5cSSotiris Apostolakis TLI = TSI->getTargetLowering(); 3568b42bc56SSotiris Apostolakis 357ce08c7eeSpaperchalice // If none of the select types are supported then skip this pass. 358ce08c7eeSpaperchalice // This is an optimization pass. Legality issues will be handled by 359ce08c7eeSpaperchalice // instruction selection. 360ce08c7eeSpaperchalice if (!TLI->isSelectSupported(TargetLowering::ScalarValSelect) && 361ce08c7eeSpaperchalice !TLI->isSelectSupported(TargetLowering::ScalarCondVectorVal) && 362ce08c7eeSpaperchalice !TLI->isSelectSupported(TargetLowering::VectorMaskSelect)) 363ce08c7eeSpaperchalice return PreservedAnalyses::all(); 364ce08c7eeSpaperchalice 365ce08c7eeSpaperchalice TTI = &FAM.getResult<TargetIRAnalysis>(F); 366ce08c7eeSpaperchalice if (!TTI->enableSelectOptimize()) 367ce08c7eeSpaperchalice return PreservedAnalyses::all(); 368ce08c7eeSpaperchalice 369ce08c7eeSpaperchalice PSI = FAM.getResult<ModuleAnalysisManagerFunctionProxy>(F) 370ce08c7eeSpaperchalice .getCachedResult<ProfileSummaryAnalysis>(*F.getParent()); 371ce08c7eeSpaperchalice assert(PSI && "This pass requires module analysis pass `profile-summary`!"); 372ce08c7eeSpaperchalice BFI = &FAM.getResult<BlockFrequencyAnalysis>(F); 373ce08c7eeSpaperchalice 374ce08c7eeSpaperchalice // When optimizing for size, selects are preferable over branches. 3756ab26eabSEllis Hoag if (llvm::shouldOptimizeForSize(&F, PSI, BFI)) 376ce08c7eeSpaperchalice return PreservedAnalyses::all(); 377ce08c7eeSpaperchalice 378ce08c7eeSpaperchalice LI = &FAM.getResult<LoopAnalysis>(F); 379ce08c7eeSpaperchalice ORE = &FAM.getResult<OptimizationRemarkEmitterAnalysis>(F); 380ce08c7eeSpaperchalice TSchedModel.init(TSI); 381ce08c7eeSpaperchalice 382ce08c7eeSpaperchalice bool Changed = optimizeSelects(F); 383ce08c7eeSpaperchalice return Changed ? PreservedAnalyses::none() : PreservedAnalyses::all(); 384ce08c7eeSpaperchalice } 385ce08c7eeSpaperchalice 386ce08c7eeSpaperchalice bool SelectOptimizeImpl::runOnFunction(Function &F, Pass &P) { 387ce08c7eeSpaperchalice TM = &P.getAnalysis<TargetPassConfig>().getTM<TargetMachine>(); 388ce08c7eeSpaperchalice TSI = TM->getSubtargetImpl(F); 389ce08c7eeSpaperchalice TLI = TSI->getTargetLowering(); 390ce08c7eeSpaperchalice 391ce08c7eeSpaperchalice // If none of the select types are supported then skip this pass. 3928b42bc56SSotiris Apostolakis // This is an optimization pass. Legality issues will be handled by 3938b42bc56SSotiris Apostolakis // instruction selection. 3948b42bc56SSotiris Apostolakis if (!TLI->isSelectSupported(TargetLowering::ScalarValSelect) && 3958b42bc56SSotiris Apostolakis !TLI->isSelectSupported(TargetLowering::ScalarCondVectorVal) && 3968b42bc56SSotiris Apostolakis !TLI->isSelectSupported(TargetLowering::VectorMaskSelect)) 3978b42bc56SSotiris Apostolakis return false; 3988b42bc56SSotiris Apostolakis 399ce08c7eeSpaperchalice TTI = &P.getAnalysis<TargetTransformInfoWrapperPass>().getTTI(F); 40016a72a0fSDavid Green 40116a72a0fSDavid Green if (!TTI->enableSelectOptimize()) 40216a72a0fSDavid Green return false; 40316a72a0fSDavid Green 404ce08c7eeSpaperchalice LI = &P.getAnalysis<LoopInfoWrapperPass>().getLoopInfo(); 405ce08c7eeSpaperchalice BFI = &P.getAnalysis<BlockFrequencyInfoWrapperPass>().getBFI(); 406ce08c7eeSpaperchalice PSI = &P.getAnalysis<ProfileSummaryInfoWrapperPass>().getPSI(); 407ce08c7eeSpaperchalice ORE = &P.getAnalysis<OptimizationRemarkEmitterWrapperPass>().getORE(); 408d7ebb746SSotiris Apostolakis TSchedModel.init(TSI); 4098b42bc56SSotiris Apostolakis 4108b42bc56SSotiris Apostolakis // When optimizing for size, selects are preferable over branches. 4116ab26eabSEllis Hoag if (llvm::shouldOptimizeForSize(&F, PSI, BFI)) 4128b42bc56SSotiris Apostolakis return false; 41397c3ef5cSSotiris Apostolakis 41497c3ef5cSSotiris Apostolakis return optimizeSelects(F); 41597c3ef5cSSotiris Apostolakis } 41697c3ef5cSSotiris Apostolakis 417ce08c7eeSpaperchalice bool SelectOptimizeImpl::optimizeSelects(Function &F) { 41897c3ef5cSSotiris Apostolakis // Determine for which select groups it is profitable converting to branches. 41997c3ef5cSSotiris Apostolakis SelectGroups ProfSIGroups; 4208b42bc56SSotiris Apostolakis // Base heuristics apply only to non-loops and outer loops. 4218b42bc56SSotiris Apostolakis optimizeSelectsBase(F, ProfSIGroups); 4228b42bc56SSotiris Apostolakis // Separate heuristics for inner-most loops. 4238b42bc56SSotiris Apostolakis optimizeSelectsInnerLoops(F, ProfSIGroups); 42497c3ef5cSSotiris Apostolakis 42597c3ef5cSSotiris Apostolakis // Convert to branches the select groups that were deemed 42697c3ef5cSSotiris Apostolakis // profitable-to-convert. 42797c3ef5cSSotiris Apostolakis convertProfitableSIGroups(ProfSIGroups); 42897c3ef5cSSotiris Apostolakis 42997c3ef5cSSotiris Apostolakis // Code modified if at least one select group was converted. 43097c3ef5cSSotiris Apostolakis return !ProfSIGroups.empty(); 43197c3ef5cSSotiris Apostolakis } 43297c3ef5cSSotiris Apostolakis 433ce08c7eeSpaperchalice void SelectOptimizeImpl::optimizeSelectsBase(Function &F, 4348b42bc56SSotiris Apostolakis SelectGroups &ProfSIGroups) { 4358b42bc56SSotiris Apostolakis // Collect all the select groups. 4368b42bc56SSotiris Apostolakis SelectGroups SIGroups; 4378b42bc56SSotiris Apostolakis for (BasicBlock &BB : F) { 4388b42bc56SSotiris Apostolakis // Base heuristics apply only to non-loops and outer loops. 4398b42bc56SSotiris Apostolakis Loop *L = LI->getLoopFor(&BB); 4408b42bc56SSotiris Apostolakis if (L && L->isInnermost()) 4418b42bc56SSotiris Apostolakis continue; 4428b42bc56SSotiris Apostolakis collectSelectGroups(BB, SIGroups); 4438b42bc56SSotiris Apostolakis } 4448b42bc56SSotiris Apostolakis 4458b42bc56SSotiris Apostolakis // Determine for which select groups it is profitable converting to branches. 4468b42bc56SSotiris Apostolakis findProfitableSIGroupsBase(SIGroups, ProfSIGroups); 4478b42bc56SSotiris Apostolakis } 4488b42bc56SSotiris Apostolakis 449ce08c7eeSpaperchalice void SelectOptimizeImpl::optimizeSelectsInnerLoops(Function &F, 450d7ebb746SSotiris Apostolakis SelectGroups &ProfSIGroups) { 451d7ebb746SSotiris Apostolakis SmallVector<Loop *, 4> Loops(LI->begin(), LI->end()); 452d7ebb746SSotiris Apostolakis // Need to check size on each iteration as we accumulate child loops. 453d7ebb746SSotiris Apostolakis for (unsigned long i = 0; i < Loops.size(); ++i) 454d7ebb746SSotiris Apostolakis for (Loop *ChildL : Loops[i]->getSubLoops()) 455d7ebb746SSotiris Apostolakis Loops.push_back(ChildL); 456d7ebb746SSotiris Apostolakis 457d7ebb746SSotiris Apostolakis for (Loop *L : Loops) { 458d7ebb746SSotiris Apostolakis if (!L->isInnermost()) 459d7ebb746SSotiris Apostolakis continue; 460d7ebb746SSotiris Apostolakis 461d7ebb746SSotiris Apostolakis SelectGroups SIGroups; 462d7ebb746SSotiris Apostolakis for (BasicBlock *BB : L->getBlocks()) 463d7ebb746SSotiris Apostolakis collectSelectGroups(*BB, SIGroups); 464d7ebb746SSotiris Apostolakis 465d7ebb746SSotiris Apostolakis findProfitableSIGroupsInnerLoops(L, SIGroups, ProfSIGroups); 466d7ebb746SSotiris Apostolakis } 467d7ebb746SSotiris Apostolakis } 4688b42bc56SSotiris Apostolakis 469e874c8fcSIgor Kirillov /// Returns optimised value on \p IsTrue branch. For SelectInst that would be 470e874c8fcSIgor Kirillov /// either True or False value. For (BinaryOperator) instructions, where the 471e874c8fcSIgor Kirillov /// condition may be skipped, the operation will use a non-conditional operand. 472e874c8fcSIgor Kirillov /// For example, for `or(V,zext(cond))` this function would return V. 473e874c8fcSIgor Kirillov /// However, if the conditional operand on \p IsTrue branch matters, we create a 474e874c8fcSIgor Kirillov /// clone of instruction at the end of that branch \p B and replace the 475e874c8fcSIgor Kirillov /// condition operand with a constant. 476e874c8fcSIgor Kirillov /// 477e874c8fcSIgor Kirillov /// Also /p OptSelects contains previously optimised select-like instructions. 478e874c8fcSIgor Kirillov /// If the current value uses one of the optimised values, we can optimise it 479e874c8fcSIgor Kirillov /// further by replacing it with the corresponding value on the given branch 480e874c8fcSIgor Kirillov static Value *getTrueOrFalseValue( 481e874c8fcSIgor Kirillov SelectOptimizeImpl::SelectLike &SI, bool isTrue, 482e874c8fcSIgor Kirillov SmallDenseMap<Instruction *, std::pair<Value *, Value *>, 2> &OptSelects, 483e874c8fcSIgor Kirillov BasicBlock *B) { 484e874c8fcSIgor Kirillov Value *V = isTrue ? SI.getTrueValue() : SI.getFalseValue(); 485e874c8fcSIgor Kirillov if (V) { 486*1d5ce614SKazu Hirata if (auto *IV = dyn_cast<Instruction>(V)) 487*1d5ce614SKazu Hirata if (auto It = OptSelects.find(IV); It != OptSelects.end()) 488*1d5ce614SKazu Hirata return isTrue ? It->second.first : It->second.second; 4894a7a27cbSIgor Kirillov return V; 490b5a11d37SIgor Kirillov } 491b5a11d37SIgor Kirillov 492e874c8fcSIgor Kirillov auto *BO = cast<BinaryOperator>(SI.getI()); 4939a0f2515SFlorian Hahn assert((BO->getOpcode() == Instruction::Add || 4949a0f2515SFlorian Hahn BO->getOpcode() == Instruction::Or || 4959a0f2515SFlorian Hahn BO->getOpcode() == Instruction::Sub) && 4969a0f2515SFlorian Hahn "Only currently handling Add, Or and Sub binary operators."); 497e874c8fcSIgor Kirillov 498e874c8fcSIgor Kirillov auto *CBO = BO->clone(); 499e874c8fcSIgor Kirillov auto CondIdx = SI.getConditionOpIndex(); 500e909c0ccSIgor Kirillov auto *AuxI = cast<Instruction>(CBO->getOperand(CondIdx)); 501e909c0ccSIgor Kirillov if (isa<ZExtInst>(AuxI) || isa<LShrOperator>(AuxI)) { 502e874c8fcSIgor Kirillov CBO->setOperand(CondIdx, ConstantInt::get(CBO->getType(), 1)); 503e909c0ccSIgor Kirillov } else { 504c1f5937eSFlorian Hahn assert((isa<AShrOperator>(AuxI) || isa<SExtInst>(AuxI)) && 505c1f5937eSFlorian Hahn "Unexpected opcode"); 506e909c0ccSIgor Kirillov CBO->setOperand(CondIdx, ConstantInt::get(CBO->getType(), -1)); 507e909c0ccSIgor Kirillov } 508e874c8fcSIgor Kirillov 509e874c8fcSIgor Kirillov unsigned OtherIdx = 1 - CondIdx; 510e874c8fcSIgor Kirillov if (auto *IV = dyn_cast<Instruction>(CBO->getOperand(OtherIdx))) { 511*1d5ce614SKazu Hirata if (auto It = OptSelects.find(IV); It != OptSelects.end()) 512*1d5ce614SKazu Hirata CBO->setOperand(OtherIdx, isTrue ? It->second.first : It->second.second); 513e874c8fcSIgor Kirillov } 5148e702735SJeremy Morse CBO->insertBefore(B->getTerminator()->getIterator()); 515e874c8fcSIgor Kirillov return CBO; 516e874c8fcSIgor Kirillov } 517e874c8fcSIgor Kirillov 518ce08c7eeSpaperchalice void SelectOptimizeImpl::convertProfitableSIGroups(SelectGroups &ProfSIGroups) { 51997c3ef5cSSotiris Apostolakis for (SelectGroup &ASI : ProfSIGroups) { 52067be40dfSSotiris Apostolakis // The code transformation here is a modified version of the sinking 52167be40dfSSotiris Apostolakis // transformation in CodeGenPrepare::optimizeSelectInst with a more 52267be40dfSSotiris Apostolakis // aggressive strategy of which instructions to sink. 52367be40dfSSotiris Apostolakis // 52497c3ef5cSSotiris Apostolakis // TODO: eliminate the redundancy of logic transforming selects to branches 52597c3ef5cSSotiris Apostolakis // by removing CodeGenPrepare::optimizeSelectInst and optimizing here 52697c3ef5cSSotiris Apostolakis // selects for all cases (with and without profile information). 52797c3ef5cSSotiris Apostolakis 52897c3ef5cSSotiris Apostolakis // Transform a sequence like this: 52997c3ef5cSSotiris Apostolakis // start: 53097c3ef5cSSotiris Apostolakis // %cmp = cmp uge i32 %a, %b 53197c3ef5cSSotiris Apostolakis // %sel = select i1 %cmp, i32 %c, i32 %d 53297c3ef5cSSotiris Apostolakis // 53397c3ef5cSSotiris Apostolakis // Into: 53497c3ef5cSSotiris Apostolakis // start: 53597c3ef5cSSotiris Apostolakis // %cmp = cmp uge i32 %a, %b 53697c3ef5cSSotiris Apostolakis // %cmp.frozen = freeze %cmp 53767be40dfSSotiris Apostolakis // br i1 %cmp.frozen, label %select.true, label %select.false 53867be40dfSSotiris Apostolakis // select.true: 53967be40dfSSotiris Apostolakis // br label %select.end 54097c3ef5cSSotiris Apostolakis // select.false: 54197c3ef5cSSotiris Apostolakis // br label %select.end 54297c3ef5cSSotiris Apostolakis // select.end: 54367be40dfSSotiris Apostolakis // %sel = phi i32 [ %c, %select.true ], [ %d, %select.false ] 54497c3ef5cSSotiris Apostolakis // 54597c3ef5cSSotiris Apostolakis // %cmp should be frozen, otherwise it may introduce undefined behavior. 54667be40dfSSotiris Apostolakis // In addition, we may sink instructions that produce %c or %d into the 54767be40dfSSotiris Apostolakis // destination(s) of the new branch. 54867be40dfSSotiris Apostolakis // If the true or false blocks do not contain a sunken instruction, that 54967be40dfSSotiris Apostolakis // block and its branch may be optimized away. In that case, one side of the 55067be40dfSSotiris Apostolakis // first branch will point directly to select.end, and the corresponding PHI 55167be40dfSSotiris Apostolakis // predecessor block will be the start block. 55267be40dfSSotiris Apostolakis 55367be40dfSSotiris Apostolakis // Find all the instructions that can be soundly sunk to the true/false 55467be40dfSSotiris Apostolakis // blocks. These are instructions that are computed solely for producing the 55567be40dfSSotiris Apostolakis // operands of the select instructions in the group and can be sunk without 55667be40dfSSotiris Apostolakis // breaking the semantics of the LLVM IR (e.g., cannot sink instructions 55767be40dfSSotiris Apostolakis // with side effects). 55867be40dfSSotiris Apostolakis SmallVector<std::stack<Instruction *>, 2> TrueSlices, FalseSlices; 55967be40dfSSotiris Apostolakis typedef std::stack<Instruction *>::size_type StackSizeType; 56067be40dfSSotiris Apostolakis StackSizeType maxTrueSliceLen = 0, maxFalseSliceLen = 0; 561e874c8fcSIgor Kirillov for (SelectLike &SI : ASI.Selects) { 562e874c8fcSIgor Kirillov if (!isa<SelectInst>(SI.getI())) 563e874c8fcSIgor Kirillov continue; 56467be40dfSSotiris Apostolakis // For each select, compute the sinkable dependence chains of the true and 56567be40dfSSotiris Apostolakis // false operands. 566a2d68b4bSDavid Green if (auto *TI = dyn_cast_or_null<Instruction>(SI.getTrueValue())) { 56767be40dfSSotiris Apostolakis std::stack<Instruction *> TrueSlice; 568a2d68b4bSDavid Green getExclBackwardsSlice(TI, TrueSlice, SI.getI(), true); 56967be40dfSSotiris Apostolakis maxTrueSliceLen = std::max(maxTrueSliceLen, TrueSlice.size()); 57067be40dfSSotiris Apostolakis TrueSlices.push_back(TrueSlice); 57167be40dfSSotiris Apostolakis } 572a2d68b4bSDavid Green if (auto *FI = dyn_cast_or_null<Instruction>(SI.getFalseValue())) { 573a2d68b4bSDavid Green if (isa<SelectInst>(SI.getI()) || !FI->hasOneUse()) { 57467be40dfSSotiris Apostolakis std::stack<Instruction *> FalseSlice; 575a2d68b4bSDavid Green getExclBackwardsSlice(FI, FalseSlice, SI.getI(), true); 57667be40dfSSotiris Apostolakis maxFalseSliceLen = std::max(maxFalseSliceLen, FalseSlice.size()); 57767be40dfSSotiris Apostolakis FalseSlices.push_back(FalseSlice); 57867be40dfSSotiris Apostolakis } 57967be40dfSSotiris Apostolakis } 580a2d68b4bSDavid Green } 58167be40dfSSotiris Apostolakis // In the case of multiple select instructions in the same group, the order 58267be40dfSSotiris Apostolakis // of non-dependent instructions (instructions of different dependence 58367be40dfSSotiris Apostolakis // slices) in the true/false blocks appears to affect performance. 58467be40dfSSotiris Apostolakis // Interleaving the slices seems to experimentally be the optimal approach. 58567be40dfSSotiris Apostolakis // This interleaving scheduling allows for more ILP (with a natural downside 58667be40dfSSotiris Apostolakis // of increasing a bit register pressure) compared to a simple ordering of 58767be40dfSSotiris Apostolakis // one whole chain after another. One would expect that this ordering would 58867be40dfSSotiris Apostolakis // not matter since the scheduling in the backend of the compiler would 58967be40dfSSotiris Apostolakis // take care of it, but apparently the scheduler fails to deliver optimal 59067be40dfSSotiris Apostolakis // ILP with a naive ordering here. 59167be40dfSSotiris Apostolakis SmallVector<Instruction *, 2> TrueSlicesInterleaved, FalseSlicesInterleaved; 59267be40dfSSotiris Apostolakis for (StackSizeType IS = 0; IS < maxTrueSliceLen; ++IS) { 59367be40dfSSotiris Apostolakis for (auto &S : TrueSlices) { 59467be40dfSSotiris Apostolakis if (!S.empty()) { 59567be40dfSSotiris Apostolakis TrueSlicesInterleaved.push_back(S.top()); 59667be40dfSSotiris Apostolakis S.pop(); 59767be40dfSSotiris Apostolakis } 59867be40dfSSotiris Apostolakis } 59967be40dfSSotiris Apostolakis } 60067be40dfSSotiris Apostolakis for (StackSizeType IS = 0; IS < maxFalseSliceLen; ++IS) { 60167be40dfSSotiris Apostolakis for (auto &S : FalseSlices) { 60267be40dfSSotiris Apostolakis if (!S.empty()) { 60367be40dfSSotiris Apostolakis FalseSlicesInterleaved.push_back(S.top()); 60467be40dfSSotiris Apostolakis S.pop(); 60567be40dfSSotiris Apostolakis } 60667be40dfSSotiris Apostolakis } 60767be40dfSSotiris Apostolakis } 60897c3ef5cSSotiris Apostolakis 60997c3ef5cSSotiris Apostolakis // We split the block containing the select(s) into two blocks. 610e874c8fcSIgor Kirillov SelectLike &SI = ASI.Selects.front(); 611e874c8fcSIgor Kirillov SelectLike &LastSI = ASI.Selects.back(); 612a2d68b4bSDavid Green BasicBlock *StartBlock = SI.getI()->getParent(); 613a2d68b4bSDavid Green BasicBlock::iterator SplitPt = ++(BasicBlock::iterator(LastSI.getI())); 614a50bd0d7SOrlando Cazalet-Hyams // With RemoveDIs turned off, SplitPt can be a dbg.* intrinsic. With 615a50bd0d7SOrlando Cazalet-Hyams // RemoveDIs turned on, SplitPt would instead point to the next 616a50bd0d7SOrlando Cazalet-Hyams // instruction. To match existing dbg.* intrinsic behaviour with RemoveDIs, 617ffd08c77SStephen Tozer // tell splitBasicBlock that we want to include any DbgVariableRecords 618ffd08c77SStephen Tozer // attached to SplitPt in the splice. 619a50bd0d7SOrlando Cazalet-Hyams SplitPt.setHeadBit(true); 62097c3ef5cSSotiris Apostolakis BasicBlock *EndBlock = StartBlock->splitBasicBlock(SplitPt, "select.end"); 6215181156bSMatthias Braun BFI->setBlockFreq(EndBlock, BFI->getBlockFreq(StartBlock)); 62297c3ef5cSSotiris Apostolakis // Delete the unconditional branch that was just created by the split. 62397c3ef5cSSotiris Apostolakis StartBlock->getTerminator()->eraseFromParent(); 62497c3ef5cSSotiris Apostolakis 625e874c8fcSIgor Kirillov // Move any debug/pseudo and auxiliary instructions that were in-between the 6260a62a99aSDavid Green // select group to the newly-created end block. 6270a62a99aSDavid Green SmallVector<Instruction *, 2> SinkInstrs; 628a2d68b4bSDavid Green auto DIt = SI.getI()->getIterator(); 629e874c8fcSIgor Kirillov auto NIt = ASI.Selects.begin(); 630a2d68b4bSDavid Green while (&*DIt != LastSI.getI()) { 631e874c8fcSIgor Kirillov if (NIt != ASI.Selects.end() && &*DIt == NIt->getI()) 632e874c8fcSIgor Kirillov ++NIt; 633e874c8fcSIgor Kirillov else 6340a62a99aSDavid Green SinkInstrs.push_back(&*DIt); 63597c3ef5cSSotiris Apostolakis DIt++; 63697c3ef5cSSotiris Apostolakis } 637e874c8fcSIgor Kirillov auto InsertionPoint = EndBlock->getFirstInsertionPt(); 6380a62a99aSDavid Green for (auto *DI : SinkInstrs) 6398e702735SJeremy Morse DI->moveBeforePreserving(InsertionPoint); 64097c3ef5cSSotiris Apostolakis 641360da838SStephen Tozer // Duplicate implementation for DbgRecords, the non-instruction debug-info 642360da838SStephen Tozer // format. Helper lambda for moving DbgRecords to the end block. 643360da838SStephen Tozer auto TransferDbgRecords = [&](Instruction &I) { 644360da838SStephen Tozer for (auto &DbgRecord : 645360da838SStephen Tozer llvm::make_early_inc_range(I.getDbgRecordRange())) { 646360da838SStephen Tozer DbgRecord.removeFromParent(); 647360da838SStephen Tozer EndBlock->insertDbgRecordBefore(&DbgRecord, 648d7fb9eb8SJeremy Morse EndBlock->getFirstInsertionPt()); 649d7fb9eb8SJeremy Morse } 650d7fb9eb8SJeremy Morse }; 651d7fb9eb8SJeremy Morse 652d7fb9eb8SJeremy Morse // Iterate over all instructions in between SI and LastSI, not including 653d7fb9eb8SJeremy Morse // SI itself. These are all the variable assignments that happen "in the 654d7fb9eb8SJeremy Morse // middle" of the select group. 655a2d68b4bSDavid Green auto R = make_range(std::next(SI.getI()->getIterator()), 656a2d68b4bSDavid Green std::next(LastSI.getI()->getIterator())); 657360da838SStephen Tozer llvm::for_each(R, TransferDbgRecords); 658d7fb9eb8SJeremy Morse 65997c3ef5cSSotiris Apostolakis // These are the new basic blocks for the conditional branch. 66067be40dfSSotiris Apostolakis // At least one will become an actual new basic block. 66197c3ef5cSSotiris Apostolakis BasicBlock *TrueBlock = nullptr, *FalseBlock = nullptr; 66267be40dfSSotiris Apostolakis BranchInst *TrueBranch = nullptr, *FalseBranch = nullptr; 663e874c8fcSIgor Kirillov // Checks if select-like instruction would materialise on the given branch 664e874c8fcSIgor Kirillov auto HasSelectLike = [](SelectGroup &SG, bool IsTrue) { 665e874c8fcSIgor Kirillov for (auto &SL : SG.Selects) { 666e874c8fcSIgor Kirillov if ((IsTrue ? SL.getTrueValue() : SL.getFalseValue()) == nullptr) 667e874c8fcSIgor Kirillov return true; 668e874c8fcSIgor Kirillov } 669e874c8fcSIgor Kirillov return false; 670e874c8fcSIgor Kirillov }; 671e874c8fcSIgor Kirillov if (!TrueSlicesInterleaved.empty() || HasSelectLike(ASI, true)) { 672a2d68b4bSDavid Green TrueBlock = BasicBlock::Create(EndBlock->getContext(), "select.true.sink", 67367be40dfSSotiris Apostolakis EndBlock->getParent(), EndBlock); 67467be40dfSSotiris Apostolakis TrueBranch = BranchInst::Create(EndBlock, TrueBlock); 675a2d68b4bSDavid Green TrueBranch->setDebugLoc(LastSI.getI()->getDebugLoc()); 67667be40dfSSotiris Apostolakis for (Instruction *TrueInst : TrueSlicesInterleaved) 6778e702735SJeremy Morse TrueInst->moveBefore(TrueBranch->getIterator()); 67867be40dfSSotiris Apostolakis } 679e874c8fcSIgor Kirillov if (!FalseSlicesInterleaved.empty() || HasSelectLike(ASI, false)) { 680a2d68b4bSDavid Green FalseBlock = 681a2d68b4bSDavid Green BasicBlock::Create(EndBlock->getContext(), "select.false.sink", 68267be40dfSSotiris Apostolakis EndBlock->getParent(), EndBlock); 68367be40dfSSotiris Apostolakis FalseBranch = BranchInst::Create(EndBlock, FalseBlock); 684a2d68b4bSDavid Green FalseBranch->setDebugLoc(LastSI.getI()->getDebugLoc()); 68567be40dfSSotiris Apostolakis for (Instruction *FalseInst : FalseSlicesInterleaved) 6868e702735SJeremy Morse FalseInst->moveBefore(FalseBranch->getIterator()); 68767be40dfSSotiris Apostolakis } 68867be40dfSSotiris Apostolakis // If there was nothing to sink, then arbitrarily choose the 'false' side 68967be40dfSSotiris Apostolakis // for a new input value to the PHI. 69067be40dfSSotiris Apostolakis if (TrueBlock == FalseBlock) { 69167be40dfSSotiris Apostolakis assert(TrueBlock == nullptr && 69267be40dfSSotiris Apostolakis "Unexpected basic block transform while optimizing select"); 69397c3ef5cSSotiris Apostolakis 694a2d68b4bSDavid Green FalseBlock = BasicBlock::Create(StartBlock->getContext(), "select.false", 69597c3ef5cSSotiris Apostolakis EndBlock->getParent(), EndBlock); 69697c3ef5cSSotiris Apostolakis auto *FalseBranch = BranchInst::Create(EndBlock, FalseBlock); 697a2d68b4bSDavid Green FalseBranch->setDebugLoc(SI.getI()->getDebugLoc()); 69867be40dfSSotiris Apostolakis } 69997c3ef5cSSotiris Apostolakis 70097c3ef5cSSotiris Apostolakis // Insert the real conditional branch based on the original condition. 70167be40dfSSotiris Apostolakis // If we did not create a new block for one of the 'true' or 'false' paths 70267be40dfSSotiris Apostolakis // of the condition, it means that side of the branch goes to the end block 70367be40dfSSotiris Apostolakis // directly and the path originates from the start block from the point of 70467be40dfSSotiris Apostolakis // view of the new PHI. 70597c3ef5cSSotiris Apostolakis BasicBlock *TT, *FT; 70667be40dfSSotiris Apostolakis if (TrueBlock == nullptr) { 70797c3ef5cSSotiris Apostolakis TT = EndBlock; 70897c3ef5cSSotiris Apostolakis FT = FalseBlock; 70967be40dfSSotiris Apostolakis TrueBlock = StartBlock; 71067be40dfSSotiris Apostolakis } else if (FalseBlock == nullptr) { 71167be40dfSSotiris Apostolakis TT = TrueBlock; 71267be40dfSSotiris Apostolakis FT = EndBlock; 71367be40dfSSotiris Apostolakis FalseBlock = StartBlock; 71467be40dfSSotiris Apostolakis } else { 71567be40dfSSotiris Apostolakis TT = TrueBlock; 71667be40dfSSotiris Apostolakis FT = FalseBlock; 71767be40dfSSotiris Apostolakis } 718a2d68b4bSDavid Green IRBuilder<> IB(SI.getI()); 719e874c8fcSIgor Kirillov auto *CondFr = 720e874c8fcSIgor Kirillov IB.CreateFreeze(ASI.Condition, ASI.Condition->getName() + ".frozen"); 72197c3ef5cSSotiris Apostolakis 722e874c8fcSIgor Kirillov SmallDenseMap<Instruction *, std::pair<Value *, Value *>, 2> INS; 723a2d68b4bSDavid Green 72497c3ef5cSSotiris Apostolakis // Use reverse iterator because later select may use the value of the 72597c3ef5cSSotiris Apostolakis // earlier select, and we need to propagate value through earlier select 72697c3ef5cSSotiris Apostolakis // to get the PHI operand. 727e874c8fcSIgor Kirillov InsertionPoint = EndBlock->begin(); 728e874c8fcSIgor Kirillov for (SelectLike &SI : ASI.Selects) { 72997c3ef5cSSotiris Apostolakis // The select itself is replaced with a PHI Node. 730a2d68b4bSDavid Green PHINode *PN = PHINode::Create(SI.getType(), 2, ""); 731e874c8fcSIgor Kirillov PN->insertBefore(InsertionPoint); 732a2d68b4bSDavid Green PN->takeName(SI.getI()); 733e874c8fcSIgor Kirillov // Current instruction might be a condition of some other group, so we 734e874c8fcSIgor Kirillov // need to replace it there to avoid dangling pointer 735e874c8fcSIgor Kirillov if (PN->getType()->isIntegerTy(1)) { 736e874c8fcSIgor Kirillov for (auto &SG : ProfSIGroups) { 737e874c8fcSIgor Kirillov if (SG.Condition == SI.getI()) 738e874c8fcSIgor Kirillov SG.Condition = PN; 739e874c8fcSIgor Kirillov } 740e874c8fcSIgor Kirillov } 7414a7a27cbSIgor Kirillov SI.getI()->replaceAllUsesWith(PN); 742e874c8fcSIgor Kirillov auto *TV = getTrueOrFalseValue(SI, true, INS, TrueBlock); 743e874c8fcSIgor Kirillov auto *FV = getTrueOrFalseValue(SI, false, INS, FalseBlock); 744e874c8fcSIgor Kirillov INS[PN] = {TV, FV}; 745e874c8fcSIgor Kirillov PN->addIncoming(TV, TrueBlock); 746e874c8fcSIgor Kirillov PN->addIncoming(FV, FalseBlock); 747e874c8fcSIgor Kirillov PN->setDebugLoc(SI.getI()->getDebugLoc()); 74897c3ef5cSSotiris Apostolakis ++NumSelectsConverted; 74997c3ef5cSSotiris Apostolakis } 750a2d68b4bSDavid Green IB.CreateCondBr(CondFr, TT, FT, SI.getI()); 751a2d68b4bSDavid Green 752a2d68b4bSDavid Green // Remove the old select instructions, now that they are not longer used. 753e874c8fcSIgor Kirillov for (SelectLike &SI : ASI.Selects) 754a2d68b4bSDavid Green SI.getI()->eraseFromParent(); 75597c3ef5cSSotiris Apostolakis } 75697c3ef5cSSotiris Apostolakis } 75797c3ef5cSSotiris Apostolakis 758ce08c7eeSpaperchalice void SelectOptimizeImpl::collectSelectGroups(BasicBlock &BB, 75997c3ef5cSSotiris Apostolakis SelectGroups &SIGroups) { 760e874c8fcSIgor Kirillov // Represents something that can be considered as select instruction. 761e874c8fcSIgor Kirillov // Auxiliary instruction are instructions that depends on a condition and have 762e874c8fcSIgor Kirillov // zero or some constant value on True/False branch, such as: 763e874c8fcSIgor Kirillov // * ZExt(1bit) 764c1f5937eSFlorian Hahn // * SExt(1bit) 765e874c8fcSIgor Kirillov // * Not(1bit) 766e909c0ccSIgor Kirillov // * A(L)Shr(Val), ValBitSize - 1, where there is a condition like `Val <= 0` 767e909c0ccSIgor Kirillov // earlier in the BB. For conditions that check the sign of the Val compiler 768e909c0ccSIgor Kirillov // may generate shifts instead of ZExt/SExt. 769e874c8fcSIgor Kirillov struct SelectLikeInfo { 770e874c8fcSIgor Kirillov Value *Cond; 771e874c8fcSIgor Kirillov bool IsAuxiliary; 772e874c8fcSIgor Kirillov bool IsInverted; 773e874c8fcSIgor Kirillov unsigned ConditionIdx; 774e874c8fcSIgor Kirillov }; 775e874c8fcSIgor Kirillov 776e874c8fcSIgor Kirillov DenseMap<Value *, SelectLikeInfo> SelectInfo; 777e909c0ccSIgor Kirillov // Keeps visited comparisons to help identify AShr/LShr variants of auxiliary 778e909c0ccSIgor Kirillov // instructions. 779e909c0ccSIgor Kirillov SmallSetVector<CmpInst *, 4> SeenCmp; 780e874c8fcSIgor Kirillov 781e874c8fcSIgor Kirillov // Check if the instruction is SelectLike or might be part of SelectLike 782e874c8fcSIgor Kirillov // expression, put information into SelectInfo and return the iterator to the 783e874c8fcSIgor Kirillov // inserted position. 784e909c0ccSIgor Kirillov auto ProcessSelectInfo = [&SelectInfo, &SeenCmp](Instruction *I) { 785e909c0ccSIgor Kirillov if (auto *Cmp = dyn_cast<CmpInst>(I)) { 786e909c0ccSIgor Kirillov SeenCmp.insert(Cmp); 787e909c0ccSIgor Kirillov return SelectInfo.end(); 788e909c0ccSIgor Kirillov } 789e909c0ccSIgor Kirillov 790e874c8fcSIgor Kirillov Value *Cond; 791c1f5937eSFlorian Hahn if (match(I, m_OneUse(m_ZExtOrSExt(m_Value(Cond)))) && 792e874c8fcSIgor Kirillov Cond->getType()->isIntegerTy(1)) { 793e874c8fcSIgor Kirillov bool Inverted = match(Cond, m_Not(m_Value(Cond))); 794e874c8fcSIgor Kirillov return SelectInfo.insert({I, {Cond, true, Inverted, 0}}).first; 795e874c8fcSIgor Kirillov } 796e874c8fcSIgor Kirillov 797e874c8fcSIgor Kirillov if (match(I, m_Not(m_Value(Cond)))) { 798e874c8fcSIgor Kirillov return SelectInfo.insert({I, {Cond, true, true, 0}}).first; 799e874c8fcSIgor Kirillov } 800e874c8fcSIgor Kirillov 801e874c8fcSIgor Kirillov // Select instruction are what we are usually looking for. 802e874c8fcSIgor Kirillov if (match(I, m_Select(m_Value(Cond), m_Value(), m_Value()))) { 803e874c8fcSIgor Kirillov bool Inverted = match(Cond, m_Not(m_Value(Cond))); 804e874c8fcSIgor Kirillov return SelectInfo.insert({I, {Cond, false, Inverted, 0}}).first; 805e874c8fcSIgor Kirillov } 806e909c0ccSIgor Kirillov Value *Val; 807e909c0ccSIgor Kirillov ConstantInt *Shift; 808e909c0ccSIgor Kirillov if (match(I, m_Shr(m_Value(Val), m_ConstantInt(Shift))) && 809e909c0ccSIgor Kirillov I->getType()->getIntegerBitWidth() == Shift->getZExtValue() + 1) { 810e909c0ccSIgor Kirillov for (auto *CmpI : SeenCmp) { 811e909c0ccSIgor Kirillov auto Pred = CmpI->getPredicate(); 812e909c0ccSIgor Kirillov if (Val != CmpI->getOperand(0)) 813e909c0ccSIgor Kirillov continue; 814e909c0ccSIgor Kirillov if ((Pred == CmpInst::ICMP_SGT && 815e909c0ccSIgor Kirillov match(CmpI->getOperand(1), m_ConstantInt<-1>())) || 816e909c0ccSIgor Kirillov (Pred == CmpInst::ICMP_SGE && 817e909c0ccSIgor Kirillov match(CmpI->getOperand(1), m_Zero())) || 818e909c0ccSIgor Kirillov (Pred == CmpInst::ICMP_SLT && 819e909c0ccSIgor Kirillov match(CmpI->getOperand(1), m_Zero())) || 820e909c0ccSIgor Kirillov (Pred == CmpInst::ICMP_SLE && 821e909c0ccSIgor Kirillov match(CmpI->getOperand(1), m_ConstantInt<-1>()))) { 822e909c0ccSIgor Kirillov bool Inverted = 823e909c0ccSIgor Kirillov Pred == CmpInst::ICMP_SGT || Pred == CmpInst::ICMP_SGE; 824e909c0ccSIgor Kirillov return SelectInfo.insert({I, {CmpI, true, Inverted, 0}}).first; 825e909c0ccSIgor Kirillov } 826e909c0ccSIgor Kirillov } 827e909c0ccSIgor Kirillov return SelectInfo.end(); 828e909c0ccSIgor Kirillov } 829e874c8fcSIgor Kirillov 830e909c0ccSIgor Kirillov // An BinOp(Aux(X), Y) can also be treated like a select, with condition X 831e874c8fcSIgor Kirillov // and values Y|1 and Y. 832c1f5937eSFlorian Hahn // `Aux` can be either `ZExt(1bit)`, `SExt(1bit)` or `XShr(Val), ValBitSize 833c1f5937eSFlorian Hahn // - 1` `BinOp` can be Add, Sub, Or 8349a0f2515SFlorian Hahn Value *X; 835c1f5937eSFlorian Hahn auto MatchZExtOrSExtPattern = 836c1f5937eSFlorian Hahn m_c_BinOp(m_Value(), m_OneUse(m_ZExtOrSExt(m_Value(X)))); 837e909c0ccSIgor Kirillov auto MatchShiftPattern = 838e909c0ccSIgor Kirillov m_c_BinOp(m_Value(), m_OneUse(m_Shr(m_Value(X), m_ConstantInt(Shift)))); 839e909c0ccSIgor Kirillov 840e909c0ccSIgor Kirillov // This check is unnecessary, but it prevents costly access to the 841e909c0ccSIgor Kirillov // SelectInfo map. 842c1f5937eSFlorian Hahn if ((match(I, MatchZExtOrSExtPattern) && X->getType()->isIntegerTy(1)) || 843e909c0ccSIgor Kirillov (match(I, MatchShiftPattern) && 844e909c0ccSIgor Kirillov X->getType()->getIntegerBitWidth() == Shift->getZExtValue() + 1)) { 845e909c0ccSIgor Kirillov if (I->getOpcode() != Instruction::Add && 846e909c0ccSIgor Kirillov I->getOpcode() != Instruction::Sub && 847e909c0ccSIgor Kirillov I->getOpcode() != Instruction::Or) 8489a0f2515SFlorian Hahn return SelectInfo.end(); 849e909c0ccSIgor Kirillov 850e909c0ccSIgor Kirillov if (I->getOpcode() == Instruction::Or && I->getType()->isIntegerTy(1)) 851e874c8fcSIgor Kirillov return SelectInfo.end(); 852e874c8fcSIgor Kirillov 853444e53f6SIgor Kirillov // Iterate through operands and find dependant on recognised sign 854444e53f6SIgor Kirillov // extending auxiliary select-like instructions. The operand index does 855444e53f6SIgor Kirillov // not matter for Add and Or. However, for Sub, we can only safely 856444e53f6SIgor Kirillov // transform when the operand is second. 857e909c0ccSIgor Kirillov unsigned Idx = I->getOpcode() == Instruction::Sub ? 1 : 0; 858444e53f6SIgor Kirillov for (; Idx < 2; Idx++) { 859e909c0ccSIgor Kirillov auto *Op = I->getOperand(Idx); 860e874c8fcSIgor Kirillov auto It = SelectInfo.find(Op); 861e874c8fcSIgor Kirillov if (It != SelectInfo.end() && It->second.IsAuxiliary) { 862e874c8fcSIgor Kirillov Cond = It->second.Cond; 863e874c8fcSIgor Kirillov bool Inverted = It->second.IsInverted; 864e874c8fcSIgor Kirillov return SelectInfo.insert({I, {Cond, false, Inverted, Idx}}).first; 865e874c8fcSIgor Kirillov } 866e874c8fcSIgor Kirillov } 867e874c8fcSIgor Kirillov } 868e874c8fcSIgor Kirillov return SelectInfo.end(); 869e874c8fcSIgor Kirillov }; 870e874c8fcSIgor Kirillov 871e874c8fcSIgor Kirillov bool AlreadyProcessed = false; 87297c3ef5cSSotiris Apostolakis BasicBlock::iterator BBIt = BB.begin(); 873e874c8fcSIgor Kirillov DenseMap<Value *, SelectLikeInfo>::iterator It; 87497c3ef5cSSotiris Apostolakis while (BBIt != BB.end()) { 87597c3ef5cSSotiris Apostolakis Instruction *I = &*BBIt++; 876e874c8fcSIgor Kirillov if (I->isDebugOrPseudoInst()) 877e874c8fcSIgor Kirillov continue; 878e874c8fcSIgor Kirillov 879e874c8fcSIgor Kirillov if (!AlreadyProcessed) 880e874c8fcSIgor Kirillov It = ProcessSelectInfo(I); 881e874c8fcSIgor Kirillov else 882e874c8fcSIgor Kirillov AlreadyProcessed = false; 883e874c8fcSIgor Kirillov 884e874c8fcSIgor Kirillov if (It == SelectInfo.end() || It->second.IsAuxiliary) 885e874c8fcSIgor Kirillov continue; 886e874c8fcSIgor Kirillov 887a2d68b4bSDavid Green if (!TTI->shouldTreatInstructionLikeSelect(I)) 888ca78b560SDavid Green continue; 889ca78b560SDavid Green 890e874c8fcSIgor Kirillov Value *Cond = It->second.Cond; 891e874c8fcSIgor Kirillov // Vector conditions are not supported. 892e874c8fcSIgor Kirillov if (!Cond->getType()->isIntegerTy(1)) 893e874c8fcSIgor Kirillov continue; 894e874c8fcSIgor Kirillov 895e874c8fcSIgor Kirillov SelectGroup SIGroup = {Cond, {}}; 896e874c8fcSIgor Kirillov SIGroup.Selects.emplace_back(I, It->second.IsInverted, 897e874c8fcSIgor Kirillov It->second.ConditionIdx); 898e874c8fcSIgor Kirillov 899e874c8fcSIgor Kirillov // If the select type is not supported, no point optimizing it. 900e874c8fcSIgor Kirillov // Instruction selection will take care of it. 901e874c8fcSIgor Kirillov if (!isSelectKindSupported(SIGroup.Selects.front())) 902e874c8fcSIgor Kirillov continue; 903e874c8fcSIgor Kirillov 90497c3ef5cSSotiris Apostolakis while (BBIt != BB.end()) { 90597c3ef5cSSotiris Apostolakis Instruction *NI = &*BBIt; 90697c3ef5cSSotiris Apostolakis // Debug/pseudo instructions should be skipped and not prevent the 90797c3ef5cSSotiris Apostolakis // formation of a select group. 908a2d68b4bSDavid Green if (NI->isDebugOrPseudoInst()) { 909a2d68b4bSDavid Green ++BBIt; 910a2d68b4bSDavid Green continue; 91197c3ef5cSSotiris Apostolakis } 9120a62a99aSDavid Green 913e874c8fcSIgor Kirillov It = ProcessSelectInfo(NI); 914e874c8fcSIgor Kirillov if (It == SelectInfo.end()) { 915e874c8fcSIgor Kirillov AlreadyProcessed = true; 916e874c8fcSIgor Kirillov break; 9170a62a99aSDavid Green } 9180a62a99aSDavid Green 919e874c8fcSIgor Kirillov // Auxiliary with same condition 920e874c8fcSIgor Kirillov auto [CurrCond, IsAux, IsRev, CondIdx] = It->second; 921e874c8fcSIgor Kirillov if (Cond != CurrCond) { 922e874c8fcSIgor Kirillov AlreadyProcessed = true; 923a2d68b4bSDavid Green break; 924b5a11d37SIgor Kirillov } 9254a7a27cbSIgor Kirillov 926e874c8fcSIgor Kirillov if (!IsAux) 927e874c8fcSIgor Kirillov SIGroup.Selects.emplace_back(NI, IsRev, CondIdx); 928e874c8fcSIgor Kirillov ++BBIt; 929e874c8fcSIgor Kirillov } 9300a62a99aSDavid Green LLVM_DEBUG({ 931e874c8fcSIgor Kirillov dbgs() << "New Select group (" << SIGroup.Selects.size() << ") with\n"; 932e874c8fcSIgor Kirillov for (auto &SI : SIGroup.Selects) 9330a62a99aSDavid Green dbgs() << " " << *SI.getI() << "\n"; 9340a62a99aSDavid Green }); 9350a62a99aSDavid Green 93697c3ef5cSSotiris Apostolakis SIGroups.push_back(SIGroup); 93797c3ef5cSSotiris Apostolakis } 93897c3ef5cSSotiris Apostolakis } 93997c3ef5cSSotiris Apostolakis 940ce08c7eeSpaperchalice void SelectOptimizeImpl::findProfitableSIGroupsBase( 941ce08c7eeSpaperchalice SelectGroups &SIGroups, SelectGroups &ProfSIGroups) { 9428b42bc56SSotiris Apostolakis for (SelectGroup &ASI : SIGroups) { 9438b42bc56SSotiris Apostolakis ++NumSelectOptAnalyzed; 9448b42bc56SSotiris Apostolakis if (isConvertToBranchProfitableBase(ASI)) 9458b42bc56SSotiris Apostolakis ProfSIGroups.push_back(ASI); 9468b42bc56SSotiris Apostolakis } 9478b42bc56SSotiris Apostolakis } 9488b42bc56SSotiris Apostolakis 9497d098988SDavid Green static void EmitAndPrintRemark(OptimizationRemarkEmitter *ORE, 9507d098988SDavid Green DiagnosticInfoOptimizationBase &Rem) { 9517d098988SDavid Green LLVM_DEBUG(dbgs() << Rem.getMsg() << "\n"); 9527d098988SDavid Green ORE->emit(Rem); 9537d098988SDavid Green } 9547d098988SDavid Green 955ce08c7eeSpaperchalice void SelectOptimizeImpl::findProfitableSIGroupsInnerLoops( 956d7ebb746SSotiris Apostolakis const Loop *L, SelectGroups &SIGroups, SelectGroups &ProfSIGroups) { 957d7ebb746SSotiris Apostolakis NumSelectOptAnalyzed += SIGroups.size(); 958d7ebb746SSotiris Apostolakis // For each select group in an inner-most loop, 959d7ebb746SSotiris Apostolakis // a branch is more preferable than a select/conditional-move if: 960d7ebb746SSotiris Apostolakis // i) conversion to branches for all the select groups of the loop satisfies 961d7ebb746SSotiris Apostolakis // loop-level heuristics including reducing the loop's critical path by 962ce08c7eeSpaperchalice // some threshold (see SelectOptimizeImpl::checkLoopHeuristics); and 963d7ebb746SSotiris Apostolakis // ii) the total cost of the select group is cheaper with a branch compared 964d7ebb746SSotiris Apostolakis // to its predicated version. The cost is in terms of latency and the cost 965d7ebb746SSotiris Apostolakis // of a select group is the cost of its most expensive select instruction 966d7ebb746SSotiris Apostolakis // (assuming infinite resources and thus fully leveraging available ILP). 967d7ebb746SSotiris Apostolakis 968d7ebb746SSotiris Apostolakis DenseMap<const Instruction *, CostInfo> InstCostMap; 969d7ebb746SSotiris Apostolakis CostInfo LoopCost[2] = {{Scaled64::getZero(), Scaled64::getZero()}, 970d7ebb746SSotiris Apostolakis {Scaled64::getZero(), Scaled64::getZero()}}; 971d7ebb746SSotiris Apostolakis if (!computeLoopCosts(L, SIGroups, InstCostMap, LoopCost) || 972d7ebb746SSotiris Apostolakis !checkLoopHeuristics(L, LoopCost)) { 973d7ebb746SSotiris Apostolakis return; 974d7ebb746SSotiris Apostolakis } 975d7ebb746SSotiris Apostolakis 976d7ebb746SSotiris Apostolakis for (SelectGroup &ASI : SIGroups) { 977d7ebb746SSotiris Apostolakis // Assuming infinite resources, the cost of a group of instructions is the 978d7ebb746SSotiris Apostolakis // cost of the most expensive instruction of the group. 979d7ebb746SSotiris Apostolakis Scaled64 SelectCost = Scaled64::getZero(), BranchCost = Scaled64::getZero(); 980e874c8fcSIgor Kirillov for (SelectLike &SI : ASI.Selects) { 981a2d68b4bSDavid Green SelectCost = std::max(SelectCost, InstCostMap[SI.getI()].PredCost); 982a2d68b4bSDavid Green BranchCost = std::max(BranchCost, InstCostMap[SI.getI()].NonPredCost); 983d7ebb746SSotiris Apostolakis } 984d7ebb746SSotiris Apostolakis if (BranchCost < SelectCost) { 985e874c8fcSIgor Kirillov OptimizationRemark OR(DEBUG_TYPE, "SelectOpti", 986e874c8fcSIgor Kirillov ASI.Selects.front().getI()); 987d7ebb746SSotiris Apostolakis OR << "Profitable to convert to branch (loop analysis). BranchCost=" 988d7ebb746SSotiris Apostolakis << BranchCost.toString() << ", SelectCost=" << SelectCost.toString() 989d7ebb746SSotiris Apostolakis << ". "; 9907d098988SDavid Green EmitAndPrintRemark(ORE, OR); 991d7ebb746SSotiris Apostolakis ++NumSelectConvertedLoop; 992d7ebb746SSotiris Apostolakis ProfSIGroups.push_back(ASI); 993d7ebb746SSotiris Apostolakis } else { 994a2d68b4bSDavid Green OptimizationRemarkMissed ORmiss(DEBUG_TYPE, "SelectOpti", 995e874c8fcSIgor Kirillov ASI.Selects.front().getI()); 996d7ebb746SSotiris Apostolakis ORmiss << "Select is more profitable (loop analysis). BranchCost=" 997d7ebb746SSotiris Apostolakis << BranchCost.toString() 998d7ebb746SSotiris Apostolakis << ", SelectCost=" << SelectCost.toString() << ". "; 9997d098988SDavid Green EmitAndPrintRemark(ORE, ORmiss); 1000d7ebb746SSotiris Apostolakis } 1001d7ebb746SSotiris Apostolakis } 1002d7ebb746SSotiris Apostolakis } 1003d7ebb746SSotiris Apostolakis 1004ce08c7eeSpaperchalice bool SelectOptimizeImpl::isConvertToBranchProfitableBase( 1005a2d68b4bSDavid Green const SelectGroup &ASI) { 1006e874c8fcSIgor Kirillov const SelectLike &SI = ASI.Selects.front(); 1007995d21bcSwangpc LLVM_DEBUG(dbgs() << "Analyzing select group containing " << *SI.getI() 1008a2d68b4bSDavid Green << "\n"); 1009a2d68b4bSDavid Green OptimizationRemark OR(DEBUG_TYPE, "SelectOpti", SI.getI()); 1010a2d68b4bSDavid Green OptimizationRemarkMissed ORmiss(DEBUG_TYPE, "SelectOpti", SI.getI()); 10118b42bc56SSotiris Apostolakis 10128b42bc56SSotiris Apostolakis // Skip cold basic blocks. Better to optimize for size for cold blocks. 1013a2d68b4bSDavid Green if (PSI->isColdBlock(SI.getI()->getParent(), BFI)) { 10148b42bc56SSotiris Apostolakis ++NumSelectColdBB; 10158b42bc56SSotiris Apostolakis ORmiss << "Not converted to branch because of cold basic block. "; 10167d098988SDavid Green EmitAndPrintRemark(ORE, ORmiss); 10178b42bc56SSotiris Apostolakis return false; 10188b42bc56SSotiris Apostolakis } 10198b42bc56SSotiris Apostolakis 10208b42bc56SSotiris Apostolakis // If unpredictable, branch form is less profitable. 1021a2d68b4bSDavid Green if (SI.getI()->getMetadata(LLVMContext::MD_unpredictable)) { 10228b42bc56SSotiris Apostolakis ++NumSelectUnPred; 10238b42bc56SSotiris Apostolakis ORmiss << "Not converted to branch because of unpredictable branch. "; 10247d098988SDavid Green EmitAndPrintRemark(ORE, ORmiss); 10258b42bc56SSotiris Apostolakis return false; 10268b42bc56SSotiris Apostolakis } 10278b42bc56SSotiris Apostolakis 10288b42bc56SSotiris Apostolakis // If highly predictable, branch form is more profitable, unless a 10298b42bc56SSotiris Apostolakis // predictable select is inexpensive in the target architecture. 10308b42bc56SSotiris Apostolakis if (isSelectHighlyPredictable(SI) && TLI->isPredictableSelectExpensive()) { 10318b42bc56SSotiris Apostolakis ++NumSelectConvertedHighPred; 10328b42bc56SSotiris Apostolakis OR << "Converted to branch because of highly predictable branch. "; 10337d098988SDavid Green EmitAndPrintRemark(ORE, OR); 10348b42bc56SSotiris Apostolakis return true; 10358b42bc56SSotiris Apostolakis } 10368b42bc56SSotiris Apostolakis 10378b42bc56SSotiris Apostolakis // Look for expensive instructions in the cold operand's (if any) dependence 10388b42bc56SSotiris Apostolakis // slice of any of the selects in the group. 10398b42bc56SSotiris Apostolakis if (hasExpensiveColdOperand(ASI)) { 10408b42bc56SSotiris Apostolakis ++NumSelectConvertedExpColdOperand; 10418b42bc56SSotiris Apostolakis OR << "Converted to branch because of expensive cold operand."; 10427d098988SDavid Green EmitAndPrintRemark(ORE, OR); 10438b42bc56SSotiris Apostolakis return true; 10448b42bc56SSotiris Apostolakis } 10458b42bc56SSotiris Apostolakis 10463469996dSIgor Kirillov // If latch has a select group with several elements, it is usually profitable 10473469996dSIgor Kirillov // to convert it to branches. We let `optimizeSelectsInnerLoops` decide if 10483469996dSIgor Kirillov // conversion is profitable for innermost loops. 10493469996dSIgor Kirillov auto *BB = SI.getI()->getParent(); 10503469996dSIgor Kirillov auto *L = LI->getLoopFor(BB); 10513469996dSIgor Kirillov if (L && !L->isInnermost() && L->getLoopLatch() == BB && 10523469996dSIgor Kirillov ASI.Selects.size() >= 3) { 10533469996dSIgor Kirillov OR << "Converted to branch because select group in the latch block is big."; 10543469996dSIgor Kirillov EmitAndPrintRemark(ORE, OR); 10553469996dSIgor Kirillov return true; 10563469996dSIgor Kirillov } 10573469996dSIgor Kirillov 10588b42bc56SSotiris Apostolakis ORmiss << "Not profitable to convert to branch (base heuristic)."; 10597d098988SDavid Green EmitAndPrintRemark(ORE, ORmiss); 10608b42bc56SSotiris Apostolakis return false; 10618b42bc56SSotiris Apostolakis } 10628b42bc56SSotiris Apostolakis 10638b42bc56SSotiris Apostolakis static InstructionCost divideNearest(InstructionCost Numerator, 10648b42bc56SSotiris Apostolakis uint64_t Denominator) { 10658b42bc56SSotiris Apostolakis return (Numerator + (Denominator / 2)) / Denominator; 10668b42bc56SSotiris Apostolakis } 10678b42bc56SSotiris Apostolakis 1068a2d68b4bSDavid Green static bool extractBranchWeights(const SelectOptimizeImpl::SelectLike SI, 1069a2d68b4bSDavid Green uint64_t &TrueVal, uint64_t &FalseVal) { 1070a2d68b4bSDavid Green if (isa<SelectInst>(SI.getI())) 1071a2d68b4bSDavid Green return extractBranchWeights(*SI.getI(), TrueVal, FalseVal); 1072a2d68b4bSDavid Green return false; 1073a2d68b4bSDavid Green } 1074a2d68b4bSDavid Green 1075a2d68b4bSDavid Green bool SelectOptimizeImpl::hasExpensiveColdOperand(const SelectGroup &ASI) { 10768b42bc56SSotiris Apostolakis bool ColdOperand = false; 10778b42bc56SSotiris Apostolakis uint64_t TrueWeight, FalseWeight, TotalWeight; 1078e874c8fcSIgor Kirillov if (extractBranchWeights(ASI.Selects.front(), TrueWeight, FalseWeight)) { 10798b42bc56SSotiris Apostolakis uint64_t MinWeight = std::min(TrueWeight, FalseWeight); 10808b42bc56SSotiris Apostolakis TotalWeight = TrueWeight + FalseWeight; 10818b42bc56SSotiris Apostolakis // Is there a path with frequency <ColdOperandThreshold% (default:20%) ? 10828b42bc56SSotiris Apostolakis ColdOperand = TotalWeight * ColdOperandThreshold > 100 * MinWeight; 10838b42bc56SSotiris Apostolakis } else if (PSI->hasProfileSummary()) { 1084a2d68b4bSDavid Green OptimizationRemarkMissed ORmiss(DEBUG_TYPE, "SelectOpti", 1085e874c8fcSIgor Kirillov ASI.Selects.front().getI()); 10868b42bc56SSotiris Apostolakis ORmiss << "Profile data available but missing branch-weights metadata for " 10878b42bc56SSotiris Apostolakis "select instruction. "; 10887d098988SDavid Green EmitAndPrintRemark(ORE, ORmiss); 10898b42bc56SSotiris Apostolakis } 10908b42bc56SSotiris Apostolakis if (!ColdOperand) 10918b42bc56SSotiris Apostolakis return false; 10928b42bc56SSotiris Apostolakis // Check if the cold path's dependence slice is expensive for any of the 10938b42bc56SSotiris Apostolakis // selects of the group. 1094e874c8fcSIgor Kirillov for (SelectLike SI : ASI.Selects) { 10958b42bc56SSotiris Apostolakis Instruction *ColdI = nullptr; 10968b42bc56SSotiris Apostolakis uint64_t HotWeight; 10978b42bc56SSotiris Apostolakis if (TrueWeight < FalseWeight) { 1098a2d68b4bSDavid Green ColdI = dyn_cast_or_null<Instruction>(SI.getTrueValue()); 10998b42bc56SSotiris Apostolakis HotWeight = FalseWeight; 11008b42bc56SSotiris Apostolakis } else { 1101a2d68b4bSDavid Green ColdI = dyn_cast_or_null<Instruction>(SI.getFalseValue()); 11028b42bc56SSotiris Apostolakis HotWeight = TrueWeight; 11038b42bc56SSotiris Apostolakis } 11048b42bc56SSotiris Apostolakis if (ColdI) { 110567be40dfSSotiris Apostolakis std::stack<Instruction *> ColdSlice; 1106a2d68b4bSDavid Green getExclBackwardsSlice(ColdI, ColdSlice, SI.getI()); 11078b42bc56SSotiris Apostolakis InstructionCost SliceCost = 0; 110867be40dfSSotiris Apostolakis while (!ColdSlice.empty()) { 110967be40dfSSotiris Apostolakis SliceCost += TTI->getInstructionCost(ColdSlice.top(), 111067be40dfSSotiris Apostolakis TargetTransformInfo::TCK_Latency); 111167be40dfSSotiris Apostolakis ColdSlice.pop(); 11128b42bc56SSotiris Apostolakis } 11138b42bc56SSotiris Apostolakis // The colder the cold value operand of the select is the more expensive 11148b42bc56SSotiris Apostolakis // the cmov becomes for computing the cold value operand every time. Thus, 11158b42bc56SSotiris Apostolakis // the colder the cold operand is the more its cost counts. 11168b42bc56SSotiris Apostolakis // Get nearest integer cost adjusted for coldness. 11178b42bc56SSotiris Apostolakis InstructionCost AdjSliceCost = 11188b42bc56SSotiris Apostolakis divideNearest(SliceCost * HotWeight, TotalWeight); 11198b42bc56SSotiris Apostolakis if (AdjSliceCost >= 11208b42bc56SSotiris Apostolakis ColdOperandMaxCostMultiplier * TargetTransformInfo::TCC_Expensive) 11218b42bc56SSotiris Apostolakis return true; 11228b42bc56SSotiris Apostolakis } 11238b42bc56SSotiris Apostolakis } 11248b42bc56SSotiris Apostolakis return false; 11258b42bc56SSotiris Apostolakis } 11268b42bc56SSotiris Apostolakis 1127b827e7c6SSotiris Apostolakis // Check if it is safe to move LoadI next to the SI. 1128b827e7c6SSotiris Apostolakis // Conservatively assume it is safe only if there is no instruction 1129b827e7c6SSotiris Apostolakis // modifying memory in-between the load and the select instruction. 1130b827e7c6SSotiris Apostolakis static bool isSafeToSinkLoad(Instruction *LoadI, Instruction *SI) { 1131b827e7c6SSotiris Apostolakis // Assume loads from different basic blocks are unsafe to move. 1132b827e7c6SSotiris Apostolakis if (LoadI->getParent() != SI->getParent()) 1133b827e7c6SSotiris Apostolakis return false; 1134b827e7c6SSotiris Apostolakis auto It = LoadI->getIterator(); 1135b827e7c6SSotiris Apostolakis while (&*It != SI) { 1136b827e7c6SSotiris Apostolakis if (It->mayWriteToMemory()) 1137b827e7c6SSotiris Apostolakis return false; 1138b827e7c6SSotiris Apostolakis It++; 1139b827e7c6SSotiris Apostolakis } 1140b827e7c6SSotiris Apostolakis return true; 1141b827e7c6SSotiris Apostolakis } 1142b827e7c6SSotiris Apostolakis 11438b42bc56SSotiris Apostolakis // For a given source instruction, collect its backwards dependence slice 11448b42bc56SSotiris Apostolakis // consisting of instructions exclusively computed for the purpose of producing 11458b42bc56SSotiris Apostolakis // the operands of the source instruction. As an approximation 11468b42bc56SSotiris Apostolakis // (sufficiently-accurate in practice), we populate this set with the 11478b42bc56SSotiris Apostolakis // instructions of the backwards dependence slice that only have one-use and 11488b42bc56SSotiris Apostolakis // form an one-use chain that leads to the source instruction. 1149ce08c7eeSpaperchalice void SelectOptimizeImpl::getExclBackwardsSlice(Instruction *I, 115067be40dfSSotiris Apostolakis std::stack<Instruction *> &Slice, 1151ce08c7eeSpaperchalice Instruction *SI, 1152ce08c7eeSpaperchalice bool ForSinking) { 11538b42bc56SSotiris Apostolakis SmallPtrSet<Instruction *, 2> Visited; 11548b42bc56SSotiris Apostolakis std::queue<Instruction *> Worklist; 11558b42bc56SSotiris Apostolakis Worklist.push(I); 11568b42bc56SSotiris Apostolakis while (!Worklist.empty()) { 11578b42bc56SSotiris Apostolakis Instruction *II = Worklist.front(); 11588b42bc56SSotiris Apostolakis Worklist.pop(); 11598b42bc56SSotiris Apostolakis 11608b42bc56SSotiris Apostolakis // Avoid cycles. 1161b254d671SKazu Hirata if (!Visited.insert(II).second) 11628b42bc56SSotiris Apostolakis continue; 11638b42bc56SSotiris Apostolakis 11648b42bc56SSotiris Apostolakis if (!II->hasOneUse()) 11658b42bc56SSotiris Apostolakis continue; 11668b42bc56SSotiris Apostolakis 116767be40dfSSotiris Apostolakis // Cannot soundly sink instructions with side-effects. 116867be40dfSSotiris Apostolakis // Terminator or phi instructions cannot be sunk. 116967be40dfSSotiris Apostolakis // Avoid sinking other select instructions (should be handled separetely). 117067be40dfSSotiris Apostolakis if (ForSinking && (II->isTerminator() || II->mayHaveSideEffects() || 117167be40dfSSotiris Apostolakis isa<SelectInst>(II) || isa<PHINode>(II))) 117267be40dfSSotiris Apostolakis continue; 117367be40dfSSotiris Apostolakis 1174b827e7c6SSotiris Apostolakis // Avoid sinking loads in order not to skip state-modifying instructions, 1175b827e7c6SSotiris Apostolakis // that may alias with the loaded address. 1176b827e7c6SSotiris Apostolakis // Only allow sinking of loads within the same basic block that are 1177b827e7c6SSotiris Apostolakis // conservatively proven to be safe. 1178b827e7c6SSotiris Apostolakis if (ForSinking && II->mayReadFromMemory() && !isSafeToSinkLoad(II, SI)) 1179b827e7c6SSotiris Apostolakis continue; 1180b827e7c6SSotiris Apostolakis 11818b42bc56SSotiris Apostolakis // Avoid considering instructions with less frequency than the source 11828b42bc56SSotiris Apostolakis // instruction (i.e., avoid colder code regions of the dependence slice). 11838b42bc56SSotiris Apostolakis if (BFI->getBlockFreq(II->getParent()) < BFI->getBlockFreq(I->getParent())) 11848b42bc56SSotiris Apostolakis continue; 11858b42bc56SSotiris Apostolakis 11868b42bc56SSotiris Apostolakis // Eligible one-use instruction added to the dependence slice. 118767be40dfSSotiris Apostolakis Slice.push(II); 11888b42bc56SSotiris Apostolakis 11898b42bc56SSotiris Apostolakis // Explore all the operands of the current instruction to expand the slice. 119066cd2e0fSKazu Hirata for (Value *Op : II->operand_values()) 119166cd2e0fSKazu Hirata if (auto *OpI = dyn_cast<Instruction>(Op)) 11928b42bc56SSotiris Apostolakis Worklist.push(OpI); 11938b42bc56SSotiris Apostolakis } 11948b42bc56SSotiris Apostolakis } 11958b42bc56SSotiris Apostolakis 1196a2d68b4bSDavid Green bool SelectOptimizeImpl::isSelectHighlyPredictable(const SelectLike SI) { 11978b42bc56SSotiris Apostolakis uint64_t TrueWeight, FalseWeight; 1198a2d68b4bSDavid Green if (extractBranchWeights(SI, TrueWeight, FalseWeight)) { 11998b42bc56SSotiris Apostolakis uint64_t Max = std::max(TrueWeight, FalseWeight); 12008b42bc56SSotiris Apostolakis uint64_t Sum = TrueWeight + FalseWeight; 12018b42bc56SSotiris Apostolakis if (Sum != 0) { 12028b42bc56SSotiris Apostolakis auto Probability = BranchProbability::getBranchProbability(Max, Sum); 12038b42bc56SSotiris Apostolakis if (Probability > TTI->getPredictableBranchThreshold()) 12048b42bc56SSotiris Apostolakis return true; 12058b42bc56SSotiris Apostolakis } 12068b42bc56SSotiris Apostolakis } 12078b42bc56SSotiris Apostolakis return false; 12088b42bc56SSotiris Apostolakis } 12098b42bc56SSotiris Apostolakis 1210ce08c7eeSpaperchalice bool SelectOptimizeImpl::checkLoopHeuristics(const Loop *L, 1211d7ebb746SSotiris Apostolakis const CostInfo LoopCost[2]) { 1212d7ebb746SSotiris Apostolakis // Loop-level checks to determine if a non-predicated version (with branches) 1213d7ebb746SSotiris Apostolakis // of the loop is more profitable than its predicated version. 1214d7ebb746SSotiris Apostolakis 1215d7ebb746SSotiris Apostolakis if (DisableLoopLevelHeuristics) 1216d7ebb746SSotiris Apostolakis return true; 1217d7ebb746SSotiris Apostolakis 1218d7ebb746SSotiris Apostolakis OptimizationRemarkMissed ORmissL(DEBUG_TYPE, "SelectOpti", 12196292a808SJeremy Morse &*L->getHeader()->getFirstNonPHIIt()); 1220d7ebb746SSotiris Apostolakis 1221d7ebb746SSotiris Apostolakis if (LoopCost[0].NonPredCost > LoopCost[0].PredCost || 1222d7ebb746SSotiris Apostolakis LoopCost[1].NonPredCost >= LoopCost[1].PredCost) { 1223d7ebb746SSotiris Apostolakis ORmissL << "No select conversion in the loop due to no reduction of loop's " 1224d7ebb746SSotiris Apostolakis "critical path. "; 12257d098988SDavid Green EmitAndPrintRemark(ORE, ORmissL); 1226d7ebb746SSotiris Apostolakis return false; 1227d7ebb746SSotiris Apostolakis } 1228d7ebb746SSotiris Apostolakis 1229d7ebb746SSotiris Apostolakis Scaled64 Gain[2] = {LoopCost[0].PredCost - LoopCost[0].NonPredCost, 1230d7ebb746SSotiris Apostolakis LoopCost[1].PredCost - LoopCost[1].NonPredCost}; 1231d7ebb746SSotiris Apostolakis 1232d7ebb746SSotiris Apostolakis // Profitably converting to branches need to reduce the loop's critical path 1233d7ebb746SSotiris Apostolakis // by at least some threshold (absolute gain of GainCycleThreshold cycles and 1234d7ebb746SSotiris Apostolakis // relative gain of 12.5%). 1235d7ebb746SSotiris Apostolakis if (Gain[1] < Scaled64::get(GainCycleThreshold) || 1236d7ebb746SSotiris Apostolakis Gain[1] * Scaled64::get(GainRelativeThreshold) < LoopCost[1].PredCost) { 1237d7ebb746SSotiris Apostolakis Scaled64 RelativeGain = Scaled64::get(100) * Gain[1] / LoopCost[1].PredCost; 1238d7ebb746SSotiris Apostolakis ORmissL << "No select conversion in the loop due to small reduction of " 1239d7ebb746SSotiris Apostolakis "loop's critical path. Gain=" 1240d7ebb746SSotiris Apostolakis << Gain[1].toString() 1241d7ebb746SSotiris Apostolakis << ", RelativeGain=" << RelativeGain.toString() << "%. "; 12427d098988SDavid Green EmitAndPrintRemark(ORE, ORmissL); 1243d7ebb746SSotiris Apostolakis return false; 1244d7ebb746SSotiris Apostolakis } 1245d7ebb746SSotiris Apostolakis 1246d7ebb746SSotiris Apostolakis // If the loop's critical path involves loop-carried dependences, the gradient 1247d7ebb746SSotiris Apostolakis // of the gain needs to be at least GainGradientThreshold% (defaults to 25%). 1248d7ebb746SSotiris Apostolakis // This check ensures that the latency reduction for the loop's critical path 1249d7ebb746SSotiris Apostolakis // keeps decreasing with sufficient rate beyond the two analyzed loop 1250d7ebb746SSotiris Apostolakis // iterations. 1251d7ebb746SSotiris Apostolakis if (Gain[1] > Gain[0]) { 1252d7ebb746SSotiris Apostolakis Scaled64 GradientGain = Scaled64::get(100) * (Gain[1] - Gain[0]) / 1253d7ebb746SSotiris Apostolakis (LoopCost[1].PredCost - LoopCost[0].PredCost); 1254d7ebb746SSotiris Apostolakis if (GradientGain < Scaled64::get(GainGradientThreshold)) { 1255d7ebb746SSotiris Apostolakis ORmissL << "No select conversion in the loop due to small gradient gain. " 1256d7ebb746SSotiris Apostolakis "GradientGain=" 1257d7ebb746SSotiris Apostolakis << GradientGain.toString() << "%. "; 12587d098988SDavid Green EmitAndPrintRemark(ORE, ORmissL); 1259d7ebb746SSotiris Apostolakis return false; 1260d7ebb746SSotiris Apostolakis } 1261d7ebb746SSotiris Apostolakis } 1262d7ebb746SSotiris Apostolakis // If the gain decreases it is not profitable to convert. 1263d7ebb746SSotiris Apostolakis else if (Gain[1] < Gain[0]) { 1264d7ebb746SSotiris Apostolakis ORmissL 1265d7ebb746SSotiris Apostolakis << "No select conversion in the loop due to negative gradient gain. "; 12667d098988SDavid Green EmitAndPrintRemark(ORE, ORmissL); 1267d7ebb746SSotiris Apostolakis return false; 1268d7ebb746SSotiris Apostolakis } 1269d7ebb746SSotiris Apostolakis 1270d7ebb746SSotiris Apostolakis // Non-predicated version of the loop is more profitable than its 1271d7ebb746SSotiris Apostolakis // predicated version. 1272d7ebb746SSotiris Apostolakis return true; 1273d7ebb746SSotiris Apostolakis } 1274d7ebb746SSotiris Apostolakis 1275d7ebb746SSotiris Apostolakis // Computes instruction and loop-critical-path costs for both the predicated 1276d7ebb746SSotiris Apostolakis // and non-predicated version of the given loop. 1277d7ebb746SSotiris Apostolakis // Returns false if unable to compute these costs due to invalid cost of loop 1278d7ebb746SSotiris Apostolakis // instruction(s). 1279ce08c7eeSpaperchalice bool SelectOptimizeImpl::computeLoopCosts( 1280d7ebb746SSotiris Apostolakis const Loop *L, const SelectGroups &SIGroups, 1281d7ebb746SSotiris Apostolakis DenseMap<const Instruction *, CostInfo> &InstCostMap, CostInfo *LoopCost) { 12827d098988SDavid Green LLVM_DEBUG(dbgs() << "Calculating Latency / IPredCost / INonPredCost of loop " 12837d098988SDavid Green << L->getHeader()->getName() << "\n"); 1284e874c8fcSIgor Kirillov const auto SImap = getSImap(SIGroups); 1285e874c8fcSIgor Kirillov const auto SGmap = getSGmap(SIGroups); 1286d7ebb746SSotiris Apostolakis // Compute instruction and loop-critical-path costs across two iterations for 1287d7ebb746SSotiris Apostolakis // both predicated and non-predicated version. 1288d7ebb746SSotiris Apostolakis const unsigned Iterations = 2; 1289d7ebb746SSotiris Apostolakis for (unsigned Iter = 0; Iter < Iterations; ++Iter) { 1290d7ebb746SSotiris Apostolakis // Cost of the loop's critical path. 1291d7ebb746SSotiris Apostolakis CostInfo &MaxCost = LoopCost[Iter]; 1292d7ebb746SSotiris Apostolakis for (BasicBlock *BB : L->getBlocks()) { 1293d7ebb746SSotiris Apostolakis for (const Instruction &I : *BB) { 1294d7ebb746SSotiris Apostolakis if (I.isDebugOrPseudoInst()) 1295d7ebb746SSotiris Apostolakis continue; 1296d7ebb746SSotiris Apostolakis // Compute the predicated and non-predicated cost of the instruction. 1297d7ebb746SSotiris Apostolakis Scaled64 IPredCost = Scaled64::getZero(), 1298d7ebb746SSotiris Apostolakis INonPredCost = Scaled64::getZero(); 1299d7ebb746SSotiris Apostolakis 1300d7ebb746SSotiris Apostolakis // Assume infinite resources that allow to fully exploit the available 1301d7ebb746SSotiris Apostolakis // instruction-level parallelism. 1302d7ebb746SSotiris Apostolakis // InstCost = InstLatency + max(Op1Cost, Op2Cost, … OpNCost) 1303d7ebb746SSotiris Apostolakis for (const Use &U : I.operands()) { 1304d7ebb746SSotiris Apostolakis auto UI = dyn_cast<Instruction>(U.get()); 1305d7ebb746SSotiris Apostolakis if (!UI) 1306d7ebb746SSotiris Apostolakis continue; 1307*1d5ce614SKazu Hirata if (auto It = InstCostMap.find(UI); It != InstCostMap.end()) { 1308*1d5ce614SKazu Hirata IPredCost = std::max(IPredCost, It->second.PredCost); 1309*1d5ce614SKazu Hirata INonPredCost = std::max(INonPredCost, It->second.NonPredCost); 1310d7ebb746SSotiris Apostolakis } 1311d7ebb746SSotiris Apostolakis } 1312d7ebb746SSotiris Apostolakis auto ILatency = computeInstCost(&I); 1313e0e687a6SKazu Hirata if (!ILatency) { 1314d7ebb746SSotiris Apostolakis OptimizationRemarkMissed ORmissL(DEBUG_TYPE, "SelectOpti", &I); 1315d7ebb746SSotiris Apostolakis ORmissL << "Invalid instruction cost preventing analysis and " 1316d7ebb746SSotiris Apostolakis "optimization of the inner-most loop containing this " 1317d7ebb746SSotiris Apostolakis "instruction. "; 13187d098988SDavid Green EmitAndPrintRemark(ORE, ORmissL); 1319d7ebb746SSotiris Apostolakis return false; 1320d7ebb746SSotiris Apostolakis } 132151b68573SFangrui Song IPredCost += Scaled64::get(*ILatency); 132251b68573SFangrui Song INonPredCost += Scaled64::get(*ILatency); 1323d7ebb746SSotiris Apostolakis 1324d7ebb746SSotiris Apostolakis // For a select that can be converted to branch, 1325d7ebb746SSotiris Apostolakis // compute its cost as a branch (non-predicated cost). 1326d7ebb746SSotiris Apostolakis // 1327d7ebb746SSotiris Apostolakis // BranchCost = PredictedPathCost + MispredictCost 1328d7ebb746SSotiris Apostolakis // PredictedPathCost = TrueOpCost * TrueProb + FalseOpCost * FalseProb 1329d7ebb746SSotiris Apostolakis // MispredictCost = max(MispredictPenalty, CondCost) * MispredictRate 1330a2d68b4bSDavid Green if (SImap.contains(&I)) { 1331a2d68b4bSDavid Green auto SI = SImap.at(&I); 1332e874c8fcSIgor Kirillov const auto *SG = SGmap.at(&I); 1333e874c8fcSIgor Kirillov Scaled64 TrueOpCost = SI.getOpCostOnBranch(true, InstCostMap, TTI); 1334e874c8fcSIgor Kirillov Scaled64 FalseOpCost = SI.getOpCostOnBranch(false, InstCostMap, TTI); 1335d7ebb746SSotiris Apostolakis Scaled64 PredictedPathCost = 1336d7ebb746SSotiris Apostolakis getPredictedPathCost(TrueOpCost, FalseOpCost, SI); 1337d7ebb746SSotiris Apostolakis 1338d7ebb746SSotiris Apostolakis Scaled64 CondCost = Scaled64::getZero(); 1339e874c8fcSIgor Kirillov if (auto *CI = dyn_cast<Instruction>(SG->Condition)) 1340*1d5ce614SKazu Hirata if (auto It = InstCostMap.find(CI); It != InstCostMap.end()) 1341*1d5ce614SKazu Hirata CondCost = It->second.NonPredCost; 1342d7ebb746SSotiris Apostolakis Scaled64 MispredictCost = getMispredictionCost(SI, CondCost); 1343d7ebb746SSotiris Apostolakis 1344d7ebb746SSotiris Apostolakis INonPredCost = PredictedPathCost + MispredictCost; 1345d7ebb746SSotiris Apostolakis } 13467d098988SDavid Green LLVM_DEBUG(dbgs() << " " << ILatency << "/" << IPredCost << "/" 13477d098988SDavid Green << INonPredCost << " for " << I << "\n"); 1348d7ebb746SSotiris Apostolakis 1349d7ebb746SSotiris Apostolakis InstCostMap[&I] = {IPredCost, INonPredCost}; 1350d7ebb746SSotiris Apostolakis MaxCost.PredCost = std::max(MaxCost.PredCost, IPredCost); 1351d7ebb746SSotiris Apostolakis MaxCost.NonPredCost = std::max(MaxCost.NonPredCost, INonPredCost); 1352d7ebb746SSotiris Apostolakis } 1353d7ebb746SSotiris Apostolakis } 13547d098988SDavid Green LLVM_DEBUG(dbgs() << "Iteration " << Iter + 1 13557d098988SDavid Green << " MaxCost = " << MaxCost.PredCost << " " 13567d098988SDavid Green << MaxCost.NonPredCost << "\n"); 1357d7ebb746SSotiris Apostolakis } 1358d7ebb746SSotiris Apostolakis return true; 1359d7ebb746SSotiris Apostolakis } 1360d7ebb746SSotiris Apostolakis 1361a2d68b4bSDavid Green SmallDenseMap<const Instruction *, SelectOptimizeImpl::SelectLike, 2> 1362a2d68b4bSDavid Green SelectOptimizeImpl::getSImap(const SelectGroups &SIGroups) { 1363a2d68b4bSDavid Green SmallDenseMap<const Instruction *, SelectLike, 2> SImap; 1364d7ebb746SSotiris Apostolakis for (const SelectGroup &ASI : SIGroups) 1365e874c8fcSIgor Kirillov for (const SelectLike &SI : ASI.Selects) 1366a2d68b4bSDavid Green SImap.try_emplace(SI.getI(), SI); 1367a2d68b4bSDavid Green return SImap; 1368d7ebb746SSotiris Apostolakis } 1369d7ebb746SSotiris Apostolakis 1370e874c8fcSIgor Kirillov SmallDenseMap<const Instruction *, const SelectOptimizeImpl::SelectGroup *, 2> 1371e874c8fcSIgor Kirillov SelectOptimizeImpl::getSGmap(const SelectGroups &SIGroups) { 1372e874c8fcSIgor Kirillov SmallDenseMap<const Instruction *, const SelectGroup *, 2> SImap; 1373e874c8fcSIgor Kirillov for (const SelectGroup &ASI : SIGroups) 1374e874c8fcSIgor Kirillov for (const SelectLike &SI : ASI.Selects) 1375e874c8fcSIgor Kirillov SImap.try_emplace(SI.getI(), &ASI); 1376e874c8fcSIgor Kirillov return SImap; 1377e874c8fcSIgor Kirillov } 1378e874c8fcSIgor Kirillov 1379ce08c7eeSpaperchalice std::optional<uint64_t> 1380ce08c7eeSpaperchalice SelectOptimizeImpl::computeInstCost(const Instruction *I) { 1381d7ebb746SSotiris Apostolakis InstructionCost ICost = 1382d7ebb746SSotiris Apostolakis TTI->getInstructionCost(I, TargetTransformInfo::TCK_Latency); 1383d7ebb746SSotiris Apostolakis if (auto OC = ICost.getValue()) 138467819a72SFangrui Song return std::optional<uint64_t>(*OC); 1385998960eeSKazu Hirata return std::nullopt; 1386d7ebb746SSotiris Apostolakis } 1387d7ebb746SSotiris Apostolakis 1388d7ebb746SSotiris Apostolakis ScaledNumber<uint64_t> 1389a2d68b4bSDavid Green SelectOptimizeImpl::getMispredictionCost(const SelectLike SI, 1390d7ebb746SSotiris Apostolakis const Scaled64 CondCost) { 1391d7ebb746SSotiris Apostolakis uint64_t MispredictPenalty = TSchedModel.getMCSchedModel()->MispredictPenalty; 1392d7ebb746SSotiris Apostolakis 1393d7ebb746SSotiris Apostolakis // Account for the default misprediction rate when using a branch 1394d7ebb746SSotiris Apostolakis // (conservatively set to 25% by default). 1395d7ebb746SSotiris Apostolakis uint64_t MispredictRate = MispredictDefaultRate; 1396d7ebb746SSotiris Apostolakis // If the select condition is obviously predictable, then the misprediction 1397d7ebb746SSotiris Apostolakis // rate is zero. 1398d7ebb746SSotiris Apostolakis if (isSelectHighlyPredictable(SI)) 1399d7ebb746SSotiris Apostolakis MispredictRate = 0; 1400d7ebb746SSotiris Apostolakis 1401d7ebb746SSotiris Apostolakis // CondCost is included to account for cases where the computation of the 1402d7ebb746SSotiris Apostolakis // condition is part of a long dependence chain (potentially loop-carried) 1403d7ebb746SSotiris Apostolakis // that would delay detection of a misprediction and increase its cost. 1404d7ebb746SSotiris Apostolakis Scaled64 MispredictCost = 1405d7ebb746SSotiris Apostolakis std::max(Scaled64::get(MispredictPenalty), CondCost) * 1406d7ebb746SSotiris Apostolakis Scaled64::get(MispredictRate); 1407d7ebb746SSotiris Apostolakis MispredictCost /= Scaled64::get(100); 1408d7ebb746SSotiris Apostolakis 1409d7ebb746SSotiris Apostolakis return MispredictCost; 1410d7ebb746SSotiris Apostolakis } 1411d7ebb746SSotiris Apostolakis 1412d7ebb746SSotiris Apostolakis // Returns the cost of a branch when the prediction is correct. 1413d7ebb746SSotiris Apostolakis // TrueCost * TrueProbability + FalseCost * FalseProbability. 1414d7ebb746SSotiris Apostolakis ScaledNumber<uint64_t> 1415ce08c7eeSpaperchalice SelectOptimizeImpl::getPredictedPathCost(Scaled64 TrueCost, Scaled64 FalseCost, 1416a2d68b4bSDavid Green const SelectLike SI) { 1417d7ebb746SSotiris Apostolakis Scaled64 PredPathCost; 1418d7ebb746SSotiris Apostolakis uint64_t TrueWeight, FalseWeight; 1419a2d68b4bSDavid Green if (extractBranchWeights(SI, TrueWeight, FalseWeight)) { 1420d7ebb746SSotiris Apostolakis uint64_t SumWeight = TrueWeight + FalseWeight; 1421d7ebb746SSotiris Apostolakis if (SumWeight != 0) { 1422d7ebb746SSotiris Apostolakis PredPathCost = TrueCost * Scaled64::get(TrueWeight) + 1423d7ebb746SSotiris Apostolakis FalseCost * Scaled64::get(FalseWeight); 1424d7ebb746SSotiris Apostolakis PredPathCost /= Scaled64::get(SumWeight); 1425d7ebb746SSotiris Apostolakis return PredPathCost; 1426d7ebb746SSotiris Apostolakis } 1427d7ebb746SSotiris Apostolakis } 1428d7ebb746SSotiris Apostolakis // Without branch weight metadata, we assume 75% for the one path and 25% for 1429d7ebb746SSotiris Apostolakis // the other, and pick the result with the biggest cost. 1430d7ebb746SSotiris Apostolakis PredPathCost = std::max(TrueCost * Scaled64::get(3) + FalseCost, 1431d7ebb746SSotiris Apostolakis FalseCost * Scaled64::get(3) + TrueCost); 1432d7ebb746SSotiris Apostolakis PredPathCost /= Scaled64::get(4); 1433d7ebb746SSotiris Apostolakis return PredPathCost; 1434d7ebb746SSotiris Apostolakis } 1435d7ebb746SSotiris Apostolakis 1436a2d68b4bSDavid Green bool SelectOptimizeImpl::isSelectKindSupported(const SelectLike SI) { 143797c3ef5cSSotiris Apostolakis TargetLowering::SelectSupportKind SelectKind; 1438a2d68b4bSDavid Green if (SI.getType()->isVectorTy()) 143997c3ef5cSSotiris Apostolakis SelectKind = TargetLowering::ScalarCondVectorVal; 144097c3ef5cSSotiris Apostolakis else 144197c3ef5cSSotiris Apostolakis SelectKind = TargetLowering::ScalarValSelect; 144297c3ef5cSSotiris Apostolakis return TLI->isSelectSupported(SelectKind); 1443ca7c307dSSotiris Apostolakis } 1444