10b57cec5SDimitry Andric //===- LoopStrengthReduce.cpp - Strength Reduce IVs in Loops --------------===// 20b57cec5SDimitry Andric // 30b57cec5SDimitry Andric // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 40b57cec5SDimitry Andric // See https://llvm.org/LICENSE.txt for license information. 50b57cec5SDimitry Andric // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 60b57cec5SDimitry Andric // 70b57cec5SDimitry Andric //===----------------------------------------------------------------------===// 80b57cec5SDimitry Andric // 90b57cec5SDimitry Andric // This transformation analyzes and transforms the induction variables (and 100b57cec5SDimitry Andric // computations derived from them) into forms suitable for efficient execution 110b57cec5SDimitry Andric // on the target. 120b57cec5SDimitry Andric // 130b57cec5SDimitry Andric // This pass performs a strength reduction on array references inside loops that 140b57cec5SDimitry Andric // have as one or more of their components the loop induction variable, it 150b57cec5SDimitry Andric // rewrites expressions to take advantage of scaled-index addressing modes 160b57cec5SDimitry Andric // available on the target, and it performs a variety of other optimizations 170b57cec5SDimitry Andric // related to loop induction variables. 180b57cec5SDimitry Andric // 190b57cec5SDimitry Andric // Terminology note: this code has a lot of handling for "post-increment" or 200b57cec5SDimitry Andric // "post-inc" users. This is not talking about post-increment addressing modes; 210b57cec5SDimitry Andric // it is instead talking about code like this: 220b57cec5SDimitry Andric // 230b57cec5SDimitry Andric // %i = phi [ 0, %entry ], [ %i.next, %latch ] 240b57cec5SDimitry Andric // ... 250b57cec5SDimitry Andric // %i.next = add %i, 1 260b57cec5SDimitry Andric // %c = icmp eq %i.next, %n 270b57cec5SDimitry Andric // 280b57cec5SDimitry Andric // The SCEV for %i is {0,+,1}<%L>. The SCEV for %i.next is {1,+,1}<%L>, however 290b57cec5SDimitry Andric // it's useful to think about these as the same register, with some uses using 300b57cec5SDimitry Andric // the value of the register before the add and some using it after. In this 310b57cec5SDimitry Andric // example, the icmp is a post-increment user, since it uses %i.next, which is 320b57cec5SDimitry Andric // the value of the induction variable after the increment. The other common 330b57cec5SDimitry Andric // case of post-increment users is users outside the loop. 340b57cec5SDimitry Andric // 350b57cec5SDimitry Andric // TODO: More sophistication in the way Formulae are generated and filtered. 360b57cec5SDimitry Andric // 370b57cec5SDimitry Andric // TODO: Handle multiple loops at a time. 380b57cec5SDimitry Andric // 390b57cec5SDimitry Andric // TODO: Should the addressing mode BaseGV be changed to a ConstantExpr instead 400b57cec5SDimitry Andric // of a GlobalValue? 410b57cec5SDimitry Andric // 420b57cec5SDimitry Andric // TODO: When truncation is free, truncate ICmp users' operands to make it a 430b57cec5SDimitry Andric // smaller encoding (on x86 at least). 440b57cec5SDimitry Andric // 450b57cec5SDimitry Andric // TODO: When a negated register is used by an add (such as in a list of 460b57cec5SDimitry Andric // multiple base registers, or as the increment expression in an addrec), 470b57cec5SDimitry Andric // we may not actually need both reg and (-1 * reg) in registers; the 480b57cec5SDimitry Andric // negation can be implemented by using a sub instead of an add. The 490b57cec5SDimitry Andric // lack of support for taking this into consideration when making 500b57cec5SDimitry Andric // register pressure decisions is partly worked around by the "Special" 510b57cec5SDimitry Andric // use kind. 520b57cec5SDimitry Andric // 530b57cec5SDimitry Andric //===----------------------------------------------------------------------===// 540b57cec5SDimitry Andric 550b57cec5SDimitry Andric #include "llvm/Transforms/Scalar/LoopStrengthReduce.h" 560b57cec5SDimitry Andric #include "llvm/ADT/APInt.h" 570b57cec5SDimitry Andric #include "llvm/ADT/DenseMap.h" 580b57cec5SDimitry Andric #include "llvm/ADT/DenseSet.h" 590b57cec5SDimitry Andric #include "llvm/ADT/Hashing.h" 600b57cec5SDimitry Andric #include "llvm/ADT/PointerIntPair.h" 610b57cec5SDimitry Andric #include "llvm/ADT/STLExtras.h" 620b57cec5SDimitry Andric #include "llvm/ADT/SetVector.h" 630b57cec5SDimitry Andric #include "llvm/ADT/SmallBitVector.h" 640b57cec5SDimitry Andric #include "llvm/ADT/SmallPtrSet.h" 650b57cec5SDimitry Andric #include "llvm/ADT/SmallSet.h" 660b57cec5SDimitry Andric #include "llvm/ADT/SmallVector.h" 67bdd1243dSDimitry Andric #include "llvm/ADT/Statistic.h" 680b57cec5SDimitry Andric #include "llvm/ADT/iterator_range.h" 695ffd83dbSDimitry Andric #include "llvm/Analysis/AssumptionCache.h" 705f757f3fSDimitry Andric #include "llvm/Analysis/DomTreeUpdater.h" 710b57cec5SDimitry Andric #include "llvm/Analysis/IVUsers.h" 720b57cec5SDimitry Andric #include "llvm/Analysis/LoopAnalysisManager.h" 730b57cec5SDimitry Andric #include "llvm/Analysis/LoopInfo.h" 740b57cec5SDimitry Andric #include "llvm/Analysis/LoopPass.h" 755ffd83dbSDimitry Andric #include "llvm/Analysis/MemorySSA.h" 765ffd83dbSDimitry Andric #include "llvm/Analysis/MemorySSAUpdater.h" 770b57cec5SDimitry Andric #include "llvm/Analysis/ScalarEvolution.h" 780b57cec5SDimitry Andric #include "llvm/Analysis/ScalarEvolutionExpressions.h" 790b57cec5SDimitry Andric #include "llvm/Analysis/ScalarEvolutionNormalization.h" 80e8d8bef9SDimitry Andric #include "llvm/Analysis/TargetLibraryInfo.h" 810b57cec5SDimitry Andric #include "llvm/Analysis/TargetTransformInfo.h" 82fe6060f1SDimitry Andric #include "llvm/Analysis/ValueTracking.h" 8381ad6265SDimitry Andric #include "llvm/BinaryFormat/Dwarf.h" 840b57cec5SDimitry Andric #include "llvm/Config/llvm-config.h" 850b57cec5SDimitry Andric #include "llvm/IR/BasicBlock.h" 860b57cec5SDimitry Andric #include "llvm/IR/Constant.h" 870b57cec5SDimitry Andric #include "llvm/IR/Constants.h" 88e8d8bef9SDimitry Andric #include "llvm/IR/DebugInfoMetadata.h" 890b57cec5SDimitry Andric #include "llvm/IR/DerivedTypes.h" 900b57cec5SDimitry Andric #include "llvm/IR/Dominators.h" 910b57cec5SDimitry Andric #include "llvm/IR/GlobalValue.h" 920b57cec5SDimitry Andric #include "llvm/IR/IRBuilder.h" 930b57cec5SDimitry Andric #include "llvm/IR/InstrTypes.h" 940b57cec5SDimitry Andric #include "llvm/IR/Instruction.h" 950b57cec5SDimitry Andric #include "llvm/IR/Instructions.h" 960b57cec5SDimitry Andric #include "llvm/IR/IntrinsicInst.h" 970b57cec5SDimitry Andric #include "llvm/IR/Module.h" 980b57cec5SDimitry Andric #include "llvm/IR/Operator.h" 990b57cec5SDimitry Andric #include "llvm/IR/PassManager.h" 1000b57cec5SDimitry Andric #include "llvm/IR/Type.h" 1010b57cec5SDimitry Andric #include "llvm/IR/Use.h" 1020b57cec5SDimitry Andric #include "llvm/IR/User.h" 1030b57cec5SDimitry Andric #include "llvm/IR/Value.h" 1040b57cec5SDimitry Andric #include "llvm/IR/ValueHandle.h" 105480093f4SDimitry Andric #include "llvm/InitializePasses.h" 1060b57cec5SDimitry Andric #include "llvm/Pass.h" 1070b57cec5SDimitry Andric #include "llvm/Support/Casting.h" 1080b57cec5SDimitry Andric #include "llvm/Support/CommandLine.h" 1090b57cec5SDimitry Andric #include "llvm/Support/Compiler.h" 1100b57cec5SDimitry Andric #include "llvm/Support/Debug.h" 1110b57cec5SDimitry Andric #include "llvm/Support/ErrorHandling.h" 1120b57cec5SDimitry Andric #include "llvm/Support/MathExtras.h" 1130b57cec5SDimitry Andric #include "llvm/Support/raw_ostream.h" 1140b57cec5SDimitry Andric #include "llvm/Transforms/Scalar.h" 1150b57cec5SDimitry Andric #include "llvm/Transforms/Utils.h" 1160b57cec5SDimitry Andric #include "llvm/Transforms/Utils/BasicBlockUtils.h" 117480093f4SDimitry Andric #include "llvm/Transforms/Utils/Local.h" 11881ad6265SDimitry Andric #include "llvm/Transforms/Utils/LoopUtils.h" 1195ffd83dbSDimitry Andric #include "llvm/Transforms/Utils/ScalarEvolutionExpander.h" 1200b57cec5SDimitry Andric #include <algorithm> 1210b57cec5SDimitry Andric #include <cassert> 1220b57cec5SDimitry Andric #include <cstddef> 1230b57cec5SDimitry Andric #include <cstdint> 1240b57cec5SDimitry Andric #include <iterator> 1250b57cec5SDimitry Andric #include <limits> 1260b57cec5SDimitry Andric #include <map> 127480093f4SDimitry Andric #include <numeric> 128bdd1243dSDimitry Andric #include <optional> 1290b57cec5SDimitry Andric #include <utility> 1300b57cec5SDimitry Andric 1310b57cec5SDimitry Andric using namespace llvm; 1320b57cec5SDimitry Andric 1330b57cec5SDimitry Andric #define DEBUG_TYPE "loop-reduce" 1340b57cec5SDimitry Andric 1350b57cec5SDimitry Andric /// MaxIVUsers is an arbitrary threshold that provides an early opportunity for 1360b57cec5SDimitry Andric /// bail out. This threshold is far beyond the number of users that LSR can 1370b57cec5SDimitry Andric /// conceivably solve, so it should not affect generated code, but catches the 1380b57cec5SDimitry Andric /// worst cases before LSR burns too much compile time and stack space. 1390b57cec5SDimitry Andric static const unsigned MaxIVUsers = 200; 1400b57cec5SDimitry Andric 141349cc55cSDimitry Andric /// Limit the size of expression that SCEV-based salvaging will attempt to 142349cc55cSDimitry Andric /// translate into a DIExpression. 143349cc55cSDimitry Andric /// Choose a maximum size such that debuginfo is not excessively increased and 144349cc55cSDimitry Andric /// the salvaging is not too expensive for the compiler. 145349cc55cSDimitry Andric static const unsigned MaxSCEVSalvageExpressionSize = 64; 146349cc55cSDimitry Andric 14781ad6265SDimitry Andric // Cleanup congruent phis after LSR phi expansion. 1480b57cec5SDimitry Andric static cl::opt<bool> EnablePhiElim( 1490b57cec5SDimitry Andric "enable-lsr-phielim", cl::Hidden, cl::init(true), 1500b57cec5SDimitry Andric cl::desc("Enable LSR phi elimination")); 1510b57cec5SDimitry Andric 152bdd1243dSDimitry Andric // The flag adds instruction count to solutions cost comparison. 1530b57cec5SDimitry Andric static cl::opt<bool> InsnsCost( 1540b57cec5SDimitry Andric "lsr-insns-cost", cl::Hidden, cl::init(true), 1550b57cec5SDimitry Andric cl::desc("Add instruction count to a LSR cost model")); 1560b57cec5SDimitry Andric 1570b57cec5SDimitry Andric // Flag to choose how to narrow complex lsr solution 1580b57cec5SDimitry Andric static cl::opt<bool> LSRExpNarrow( 1590b57cec5SDimitry Andric "lsr-exp-narrow", cl::Hidden, cl::init(false), 1600b57cec5SDimitry Andric cl::desc("Narrow LSR complex solution using" 1610b57cec5SDimitry Andric " expectation of registers number")); 1620b57cec5SDimitry Andric 1630b57cec5SDimitry Andric // Flag to narrow search space by filtering non-optimal formulae with 1640b57cec5SDimitry Andric // the same ScaledReg and Scale. 1650b57cec5SDimitry Andric static cl::opt<bool> FilterSameScaledReg( 1660b57cec5SDimitry Andric "lsr-filter-same-scaled-reg", cl::Hidden, cl::init(true), 1670b57cec5SDimitry Andric cl::desc("Narrow LSR search space by filtering non-optimal formulae" 1680b57cec5SDimitry Andric " with the same ScaledReg and Scale")); 1690b57cec5SDimitry Andric 170fe6060f1SDimitry Andric static cl::opt<TTI::AddressingModeKind> PreferredAddresingMode( 171fe6060f1SDimitry Andric "lsr-preferred-addressing-mode", cl::Hidden, cl::init(TTI::AMK_None), 172fe6060f1SDimitry Andric cl::desc("A flag that overrides the target's preferred addressing mode."), 173fe6060f1SDimitry Andric cl::values(clEnumValN(TTI::AMK_None, 174fe6060f1SDimitry Andric "none", 175fe6060f1SDimitry Andric "Don't prefer any addressing mode"), 176fe6060f1SDimitry Andric clEnumValN(TTI::AMK_PreIndexed, 177fe6060f1SDimitry Andric "preindexed", 178fe6060f1SDimitry Andric "Prefer pre-indexed addressing mode"), 179fe6060f1SDimitry Andric clEnumValN(TTI::AMK_PostIndexed, 180fe6060f1SDimitry Andric "postindexed", 181fe6060f1SDimitry Andric "Prefer post-indexed addressing mode"))); 1820b57cec5SDimitry Andric 1830b57cec5SDimitry Andric static cl::opt<unsigned> ComplexityLimit( 1840b57cec5SDimitry Andric "lsr-complexity-limit", cl::Hidden, 1850b57cec5SDimitry Andric cl::init(std::numeric_limits<uint16_t>::max()), 1860b57cec5SDimitry Andric cl::desc("LSR search space complexity limit")); 1870b57cec5SDimitry Andric 1880b57cec5SDimitry Andric static cl::opt<unsigned> SetupCostDepthLimit( 1890b57cec5SDimitry Andric "lsr-setupcost-depth-limit", cl::Hidden, cl::init(7), 1900b57cec5SDimitry Andric cl::desc("The limit on recursion depth for LSRs setup cost")); 1910b57cec5SDimitry Andric 1925f757f3fSDimitry Andric static cl::opt<cl::boolOrDefault> AllowTerminatingConditionFoldingAfterLSR( 1935f757f3fSDimitry Andric "lsr-term-fold", cl::Hidden, 194bdd1243dSDimitry Andric cl::desc("Attempt to replace primary IV with other IV.")); 195bdd1243dSDimitry Andric 1960fca6ea1SDimitry Andric static cl::opt<cl::boolOrDefault> AllowDropSolutionIfLessProfitable( 1970fca6ea1SDimitry Andric "lsr-drop-solution", cl::Hidden, 198bdd1243dSDimitry Andric cl::desc("Attempt to drop solution if it is less profitable")); 199bdd1243dSDimitry Andric 2000fca6ea1SDimitry Andric static cl::opt<bool> EnableVScaleImmediates( 2010fca6ea1SDimitry Andric "lsr-enable-vscale-immediates", cl::Hidden, cl::init(true), 2020fca6ea1SDimitry Andric cl::desc("Enable analysis of vscale-relative immediates in LSR")); 2030fca6ea1SDimitry Andric 2040fca6ea1SDimitry Andric static cl::opt<bool> DropScaledForVScale( 2050fca6ea1SDimitry Andric "lsr-drop-scaled-reg-for-vscale", cl::Hidden, cl::init(true), 2060fca6ea1SDimitry Andric cl::desc("Avoid using scaled registers with vscale-relative addressing")); 2070fca6ea1SDimitry Andric 208bdd1243dSDimitry Andric STATISTIC(NumTermFold, 209bdd1243dSDimitry Andric "Number of terminating condition fold recognized and performed"); 210bdd1243dSDimitry Andric 2110b57cec5SDimitry Andric #ifndef NDEBUG 2120b57cec5SDimitry Andric // Stress test IV chain generation. 2130b57cec5SDimitry Andric static cl::opt<bool> StressIVChain( 2140b57cec5SDimitry Andric "stress-ivchain", cl::Hidden, cl::init(false), 2150b57cec5SDimitry Andric cl::desc("Stress test LSR IV chains")); 2160b57cec5SDimitry Andric #else 2170b57cec5SDimitry Andric static bool StressIVChain = false; 2180b57cec5SDimitry Andric #endif 2190b57cec5SDimitry Andric 2200b57cec5SDimitry Andric namespace { 2210b57cec5SDimitry Andric 2220b57cec5SDimitry Andric struct MemAccessTy { 2230b57cec5SDimitry Andric /// Used in situations where the accessed memory type is unknown. 2240b57cec5SDimitry Andric static const unsigned UnknownAddressSpace = 2250b57cec5SDimitry Andric std::numeric_limits<unsigned>::max(); 2260b57cec5SDimitry Andric 2270b57cec5SDimitry Andric Type *MemTy = nullptr; 2280b57cec5SDimitry Andric unsigned AddrSpace = UnknownAddressSpace; 2290b57cec5SDimitry Andric 2300b57cec5SDimitry Andric MemAccessTy() = default; 2310b57cec5SDimitry Andric MemAccessTy(Type *Ty, unsigned AS) : MemTy(Ty), AddrSpace(AS) {} 2320b57cec5SDimitry Andric 2330b57cec5SDimitry Andric bool operator==(MemAccessTy Other) const { 2340b57cec5SDimitry Andric return MemTy == Other.MemTy && AddrSpace == Other.AddrSpace; 2350b57cec5SDimitry Andric } 2360b57cec5SDimitry Andric 2370b57cec5SDimitry Andric bool operator!=(MemAccessTy Other) const { return !(*this == Other); } 2380b57cec5SDimitry Andric 2390b57cec5SDimitry Andric static MemAccessTy getUnknown(LLVMContext &Ctx, 2400b57cec5SDimitry Andric unsigned AS = UnknownAddressSpace) { 2410b57cec5SDimitry Andric return MemAccessTy(Type::getVoidTy(Ctx), AS); 2420b57cec5SDimitry Andric } 2430b57cec5SDimitry Andric 2440b57cec5SDimitry Andric Type *getType() { return MemTy; } 2450b57cec5SDimitry Andric }; 2460b57cec5SDimitry Andric 2470b57cec5SDimitry Andric /// This class holds data which is used to order reuse candidates. 2480b57cec5SDimitry Andric class RegSortData { 2490b57cec5SDimitry Andric public: 2500b57cec5SDimitry Andric /// This represents the set of LSRUse indices which reference 2510b57cec5SDimitry Andric /// a particular register. 2520b57cec5SDimitry Andric SmallBitVector UsedByIndices; 2530b57cec5SDimitry Andric 2540b57cec5SDimitry Andric void print(raw_ostream &OS) const; 2550b57cec5SDimitry Andric void dump() const; 2560b57cec5SDimitry Andric }; 2570b57cec5SDimitry Andric 2580fca6ea1SDimitry Andric // An offset from an address that is either scalable or fixed. Used for 2590fca6ea1SDimitry Andric // per-target optimizations of addressing modes. 2600fca6ea1SDimitry Andric class Immediate : public details::FixedOrScalableQuantity<Immediate, int64_t> { 2610fca6ea1SDimitry Andric constexpr Immediate(ScalarTy MinVal, bool Scalable) 2620fca6ea1SDimitry Andric : FixedOrScalableQuantity(MinVal, Scalable) {} 2630fca6ea1SDimitry Andric 2640fca6ea1SDimitry Andric constexpr Immediate(const FixedOrScalableQuantity<Immediate, int64_t> &V) 2650fca6ea1SDimitry Andric : FixedOrScalableQuantity(V) {} 2660fca6ea1SDimitry Andric 2670fca6ea1SDimitry Andric public: 2680fca6ea1SDimitry Andric constexpr Immediate() = delete; 2690fca6ea1SDimitry Andric 2700fca6ea1SDimitry Andric static constexpr Immediate getFixed(ScalarTy MinVal) { 2710fca6ea1SDimitry Andric return {MinVal, false}; 2720fca6ea1SDimitry Andric } 2730fca6ea1SDimitry Andric static constexpr Immediate getScalable(ScalarTy MinVal) { 2740fca6ea1SDimitry Andric return {MinVal, true}; 2750fca6ea1SDimitry Andric } 2760fca6ea1SDimitry Andric static constexpr Immediate get(ScalarTy MinVal, bool Scalable) { 2770fca6ea1SDimitry Andric return {MinVal, Scalable}; 2780fca6ea1SDimitry Andric } 2790fca6ea1SDimitry Andric static constexpr Immediate getZero() { return {0, false}; } 2800fca6ea1SDimitry Andric static constexpr Immediate getFixedMin() { 2810fca6ea1SDimitry Andric return {std::numeric_limits<int64_t>::min(), false}; 2820fca6ea1SDimitry Andric } 2830fca6ea1SDimitry Andric static constexpr Immediate getFixedMax() { 2840fca6ea1SDimitry Andric return {std::numeric_limits<int64_t>::max(), false}; 2850fca6ea1SDimitry Andric } 2860fca6ea1SDimitry Andric static constexpr Immediate getScalableMin() { 2870fca6ea1SDimitry Andric return {std::numeric_limits<int64_t>::min(), true}; 2880fca6ea1SDimitry Andric } 2890fca6ea1SDimitry Andric static constexpr Immediate getScalableMax() { 2900fca6ea1SDimitry Andric return {std::numeric_limits<int64_t>::max(), true}; 2910fca6ea1SDimitry Andric } 2920fca6ea1SDimitry Andric 2930fca6ea1SDimitry Andric constexpr bool isLessThanZero() const { return Quantity < 0; } 2940fca6ea1SDimitry Andric 2950fca6ea1SDimitry Andric constexpr bool isGreaterThanZero() const { return Quantity > 0; } 2960fca6ea1SDimitry Andric 2970fca6ea1SDimitry Andric constexpr bool isCompatibleImmediate(const Immediate &Imm) const { 2980fca6ea1SDimitry Andric return isZero() || Imm.isZero() || Imm.Scalable == Scalable; 2990fca6ea1SDimitry Andric } 3000fca6ea1SDimitry Andric 3010fca6ea1SDimitry Andric constexpr bool isMin() const { 3020fca6ea1SDimitry Andric return Quantity == std::numeric_limits<ScalarTy>::min(); 3030fca6ea1SDimitry Andric } 3040fca6ea1SDimitry Andric 3050fca6ea1SDimitry Andric constexpr bool isMax() const { 3060fca6ea1SDimitry Andric return Quantity == std::numeric_limits<ScalarTy>::max(); 3070fca6ea1SDimitry Andric } 3080fca6ea1SDimitry Andric 3090fca6ea1SDimitry Andric // Arithmetic 'operators' that cast to unsigned types first. 3100fca6ea1SDimitry Andric constexpr Immediate addUnsigned(const Immediate &RHS) const { 3110fca6ea1SDimitry Andric assert(isCompatibleImmediate(RHS) && "Incompatible Immediates"); 3120fca6ea1SDimitry Andric ScalarTy Value = (uint64_t)Quantity + RHS.getKnownMinValue(); 3130fca6ea1SDimitry Andric return {Value, Scalable || RHS.isScalable()}; 3140fca6ea1SDimitry Andric } 3150fca6ea1SDimitry Andric 3160fca6ea1SDimitry Andric constexpr Immediate subUnsigned(const Immediate &RHS) const { 3170fca6ea1SDimitry Andric assert(isCompatibleImmediate(RHS) && "Incompatible Immediates"); 3180fca6ea1SDimitry Andric ScalarTy Value = (uint64_t)Quantity - RHS.getKnownMinValue(); 3190fca6ea1SDimitry Andric return {Value, Scalable || RHS.isScalable()}; 3200fca6ea1SDimitry Andric } 3210fca6ea1SDimitry Andric 3220fca6ea1SDimitry Andric // Scale the quantity by a constant without caring about runtime scalability. 3230fca6ea1SDimitry Andric constexpr Immediate mulUnsigned(const ScalarTy RHS) const { 3240fca6ea1SDimitry Andric ScalarTy Value = (uint64_t)Quantity * RHS; 3250fca6ea1SDimitry Andric return {Value, Scalable}; 3260fca6ea1SDimitry Andric } 3270fca6ea1SDimitry Andric 3280fca6ea1SDimitry Andric // Helpers for generating SCEVs with vscale terms where needed. 3290fca6ea1SDimitry Andric const SCEV *getSCEV(ScalarEvolution &SE, Type *Ty) const { 3300fca6ea1SDimitry Andric const SCEV *S = SE.getConstant(Ty, Quantity); 3310fca6ea1SDimitry Andric if (Scalable) 3320fca6ea1SDimitry Andric S = SE.getMulExpr(S, SE.getVScale(S->getType())); 3330fca6ea1SDimitry Andric return S; 3340fca6ea1SDimitry Andric } 3350fca6ea1SDimitry Andric 3360fca6ea1SDimitry Andric const SCEV *getNegativeSCEV(ScalarEvolution &SE, Type *Ty) const { 3370fca6ea1SDimitry Andric const SCEV *NegS = SE.getConstant(Ty, -(uint64_t)Quantity); 3380fca6ea1SDimitry Andric if (Scalable) 3390fca6ea1SDimitry Andric NegS = SE.getMulExpr(NegS, SE.getVScale(NegS->getType())); 3400fca6ea1SDimitry Andric return NegS; 3410fca6ea1SDimitry Andric } 3420fca6ea1SDimitry Andric 3430fca6ea1SDimitry Andric const SCEV *getUnknownSCEV(ScalarEvolution &SE, Type *Ty) const { 3440fca6ea1SDimitry Andric const SCEV *SU = SE.getUnknown(ConstantInt::getSigned(Ty, Quantity)); 3450fca6ea1SDimitry Andric if (Scalable) 3460fca6ea1SDimitry Andric SU = SE.getMulExpr(SU, SE.getVScale(SU->getType())); 3470fca6ea1SDimitry Andric return SU; 3480fca6ea1SDimitry Andric } 3490fca6ea1SDimitry Andric }; 3500fca6ea1SDimitry Andric 3510fca6ea1SDimitry Andric // This is needed for the Compare type of std::map when Immediate is used 3520fca6ea1SDimitry Andric // as a key. We don't need it to be fully correct against any value of vscale, 3530fca6ea1SDimitry Andric // just to make sure that vscale-related terms in the map are considered against 3540fca6ea1SDimitry Andric // each other rather than being mixed up and potentially missing opportunities. 3550fca6ea1SDimitry Andric struct KeyOrderTargetImmediate { 3560fca6ea1SDimitry Andric bool operator()(const Immediate &LHS, const Immediate &RHS) const { 3570fca6ea1SDimitry Andric if (LHS.isScalable() && !RHS.isScalable()) 3580fca6ea1SDimitry Andric return false; 3590fca6ea1SDimitry Andric if (!LHS.isScalable() && RHS.isScalable()) 3600fca6ea1SDimitry Andric return true; 3610fca6ea1SDimitry Andric return LHS.getKnownMinValue() < RHS.getKnownMinValue(); 3620fca6ea1SDimitry Andric } 3630fca6ea1SDimitry Andric }; 3640fca6ea1SDimitry Andric 3650fca6ea1SDimitry Andric // This would be nicer if we could be generic instead of directly using size_t, 3660fca6ea1SDimitry Andric // but there doesn't seem to be a type trait for is_orderable or 3670fca6ea1SDimitry Andric // is_lessthan_comparable or similar. 3680fca6ea1SDimitry Andric struct KeyOrderSizeTAndImmediate { 3690fca6ea1SDimitry Andric bool operator()(const std::pair<size_t, Immediate> &LHS, 3700fca6ea1SDimitry Andric const std::pair<size_t, Immediate> &RHS) const { 3710fca6ea1SDimitry Andric size_t LSize = LHS.first; 3720fca6ea1SDimitry Andric size_t RSize = RHS.first; 3730fca6ea1SDimitry Andric if (LSize != RSize) 3740fca6ea1SDimitry Andric return LSize < RSize; 3750fca6ea1SDimitry Andric return KeyOrderTargetImmediate()(LHS.second, RHS.second); 3760fca6ea1SDimitry Andric } 3770fca6ea1SDimitry Andric }; 3780b57cec5SDimitry Andric } // end anonymous namespace 3790b57cec5SDimitry Andric 3800b57cec5SDimitry Andric #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) 3810b57cec5SDimitry Andric void RegSortData::print(raw_ostream &OS) const { 3820b57cec5SDimitry Andric OS << "[NumUses=" << UsedByIndices.count() << ']'; 3830b57cec5SDimitry Andric } 3840b57cec5SDimitry Andric 3850b57cec5SDimitry Andric LLVM_DUMP_METHOD void RegSortData::dump() const { 3860b57cec5SDimitry Andric print(errs()); errs() << '\n'; 3870b57cec5SDimitry Andric } 3880b57cec5SDimitry Andric #endif 3890b57cec5SDimitry Andric 3900b57cec5SDimitry Andric namespace { 3910b57cec5SDimitry Andric 3920b57cec5SDimitry Andric /// Map register candidates to information about how they are used. 3930b57cec5SDimitry Andric class RegUseTracker { 3940b57cec5SDimitry Andric using RegUsesTy = DenseMap<const SCEV *, RegSortData>; 3950b57cec5SDimitry Andric 3960b57cec5SDimitry Andric RegUsesTy RegUsesMap; 3970b57cec5SDimitry Andric SmallVector<const SCEV *, 16> RegSequence; 3980b57cec5SDimitry Andric 3990b57cec5SDimitry Andric public: 4000b57cec5SDimitry Andric void countRegister(const SCEV *Reg, size_t LUIdx); 4010b57cec5SDimitry Andric void dropRegister(const SCEV *Reg, size_t LUIdx); 4020b57cec5SDimitry Andric void swapAndDropUse(size_t LUIdx, size_t LastLUIdx); 4030b57cec5SDimitry Andric 4040b57cec5SDimitry Andric bool isRegUsedByUsesOtherThan(const SCEV *Reg, size_t LUIdx) const; 4050b57cec5SDimitry Andric 4060b57cec5SDimitry Andric const SmallBitVector &getUsedByIndices(const SCEV *Reg) const; 4070b57cec5SDimitry Andric 4080b57cec5SDimitry Andric void clear(); 4090b57cec5SDimitry Andric 4100b57cec5SDimitry Andric using iterator = SmallVectorImpl<const SCEV *>::iterator; 4110b57cec5SDimitry Andric using const_iterator = SmallVectorImpl<const SCEV *>::const_iterator; 4120b57cec5SDimitry Andric 4130b57cec5SDimitry Andric iterator begin() { return RegSequence.begin(); } 4140b57cec5SDimitry Andric iterator end() { return RegSequence.end(); } 4150b57cec5SDimitry Andric const_iterator begin() const { return RegSequence.begin(); } 4160b57cec5SDimitry Andric const_iterator end() const { return RegSequence.end(); } 4170b57cec5SDimitry Andric }; 4180b57cec5SDimitry Andric 4190b57cec5SDimitry Andric } // end anonymous namespace 4200b57cec5SDimitry Andric 4210b57cec5SDimitry Andric void 4220b57cec5SDimitry Andric RegUseTracker::countRegister(const SCEV *Reg, size_t LUIdx) { 4230b57cec5SDimitry Andric std::pair<RegUsesTy::iterator, bool> Pair = 4240b57cec5SDimitry Andric RegUsesMap.insert(std::make_pair(Reg, RegSortData())); 4250b57cec5SDimitry Andric RegSortData &RSD = Pair.first->second; 4260b57cec5SDimitry Andric if (Pair.second) 4270b57cec5SDimitry Andric RegSequence.push_back(Reg); 4280b57cec5SDimitry Andric RSD.UsedByIndices.resize(std::max(RSD.UsedByIndices.size(), LUIdx + 1)); 4290b57cec5SDimitry Andric RSD.UsedByIndices.set(LUIdx); 4300b57cec5SDimitry Andric } 4310b57cec5SDimitry Andric 4320b57cec5SDimitry Andric void 4330b57cec5SDimitry Andric RegUseTracker::dropRegister(const SCEV *Reg, size_t LUIdx) { 4340b57cec5SDimitry Andric RegUsesTy::iterator It = RegUsesMap.find(Reg); 4350b57cec5SDimitry Andric assert(It != RegUsesMap.end()); 4360b57cec5SDimitry Andric RegSortData &RSD = It->second; 4370b57cec5SDimitry Andric assert(RSD.UsedByIndices.size() > LUIdx); 4380b57cec5SDimitry Andric RSD.UsedByIndices.reset(LUIdx); 4390b57cec5SDimitry Andric } 4400b57cec5SDimitry Andric 4410b57cec5SDimitry Andric void 4420b57cec5SDimitry Andric RegUseTracker::swapAndDropUse(size_t LUIdx, size_t LastLUIdx) { 4430b57cec5SDimitry Andric assert(LUIdx <= LastLUIdx); 4440b57cec5SDimitry Andric 4450b57cec5SDimitry Andric // Update RegUses. The data structure is not optimized for this purpose; 4460b57cec5SDimitry Andric // we must iterate through it and update each of the bit vectors. 4470b57cec5SDimitry Andric for (auto &Pair : RegUsesMap) { 4480b57cec5SDimitry Andric SmallBitVector &UsedByIndices = Pair.second.UsedByIndices; 4490b57cec5SDimitry Andric if (LUIdx < UsedByIndices.size()) 4500b57cec5SDimitry Andric UsedByIndices[LUIdx] = 4510b57cec5SDimitry Andric LastLUIdx < UsedByIndices.size() ? UsedByIndices[LastLUIdx] : false; 4520b57cec5SDimitry Andric UsedByIndices.resize(std::min(UsedByIndices.size(), LastLUIdx)); 4530b57cec5SDimitry Andric } 4540b57cec5SDimitry Andric } 4550b57cec5SDimitry Andric 4560b57cec5SDimitry Andric bool 4570b57cec5SDimitry Andric RegUseTracker::isRegUsedByUsesOtherThan(const SCEV *Reg, size_t LUIdx) const { 4580b57cec5SDimitry Andric RegUsesTy::const_iterator I = RegUsesMap.find(Reg); 4590b57cec5SDimitry Andric if (I == RegUsesMap.end()) 4600b57cec5SDimitry Andric return false; 4610b57cec5SDimitry Andric const SmallBitVector &UsedByIndices = I->second.UsedByIndices; 4620b57cec5SDimitry Andric int i = UsedByIndices.find_first(); 4630b57cec5SDimitry Andric if (i == -1) return false; 4640b57cec5SDimitry Andric if ((size_t)i != LUIdx) return true; 4650b57cec5SDimitry Andric return UsedByIndices.find_next(i) != -1; 4660b57cec5SDimitry Andric } 4670b57cec5SDimitry Andric 4680b57cec5SDimitry Andric const SmallBitVector &RegUseTracker::getUsedByIndices(const SCEV *Reg) const { 4690b57cec5SDimitry Andric RegUsesTy::const_iterator I = RegUsesMap.find(Reg); 4700b57cec5SDimitry Andric assert(I != RegUsesMap.end() && "Unknown register!"); 4710b57cec5SDimitry Andric return I->second.UsedByIndices; 4720b57cec5SDimitry Andric } 4730b57cec5SDimitry Andric 4740b57cec5SDimitry Andric void RegUseTracker::clear() { 4750b57cec5SDimitry Andric RegUsesMap.clear(); 4760b57cec5SDimitry Andric RegSequence.clear(); 4770b57cec5SDimitry Andric } 4780b57cec5SDimitry Andric 4790b57cec5SDimitry Andric namespace { 4800b57cec5SDimitry Andric 4810b57cec5SDimitry Andric /// This class holds information that describes a formula for computing 4820b57cec5SDimitry Andric /// satisfying a use. It may include broken-out immediates and scaled registers. 4830b57cec5SDimitry Andric struct Formula { 4840b57cec5SDimitry Andric /// Global base address used for complex addressing. 4850b57cec5SDimitry Andric GlobalValue *BaseGV = nullptr; 4860b57cec5SDimitry Andric 4870b57cec5SDimitry Andric /// Base offset for complex addressing. 4880fca6ea1SDimitry Andric Immediate BaseOffset = Immediate::getZero(); 4890b57cec5SDimitry Andric 4900b57cec5SDimitry Andric /// Whether any complex addressing has a base register. 4910b57cec5SDimitry Andric bool HasBaseReg = false; 4920b57cec5SDimitry Andric 4930b57cec5SDimitry Andric /// The scale of any complex addressing. 4940b57cec5SDimitry Andric int64_t Scale = 0; 4950b57cec5SDimitry Andric 4960b57cec5SDimitry Andric /// The list of "base" registers for this use. When this is non-empty. The 4970b57cec5SDimitry Andric /// canonical representation of a formula is 4980b57cec5SDimitry Andric /// 1. BaseRegs.size > 1 implies ScaledReg != NULL and 4990b57cec5SDimitry Andric /// 2. ScaledReg != NULL implies Scale != 1 || !BaseRegs.empty(). 5000b57cec5SDimitry Andric /// 3. The reg containing recurrent expr related with currect loop in the 5010b57cec5SDimitry Andric /// formula should be put in the ScaledReg. 5020b57cec5SDimitry Andric /// #1 enforces that the scaled register is always used when at least two 5030b57cec5SDimitry Andric /// registers are needed by the formula: e.g., reg1 + reg2 is reg1 + 1 * reg2. 5040b57cec5SDimitry Andric /// #2 enforces that 1 * reg is reg. 5050b57cec5SDimitry Andric /// #3 ensures invariant regs with respect to current loop can be combined 5060b57cec5SDimitry Andric /// together in LSR codegen. 5070b57cec5SDimitry Andric /// This invariant can be temporarily broken while building a formula. 5080b57cec5SDimitry Andric /// However, every formula inserted into the LSRInstance must be in canonical 5090b57cec5SDimitry Andric /// form. 5100b57cec5SDimitry Andric SmallVector<const SCEV *, 4> BaseRegs; 5110b57cec5SDimitry Andric 5120b57cec5SDimitry Andric /// The 'scaled' register for this use. This should be non-null when Scale is 5130b57cec5SDimitry Andric /// not zero. 5140b57cec5SDimitry Andric const SCEV *ScaledReg = nullptr; 5150b57cec5SDimitry Andric 5160b57cec5SDimitry Andric /// An additional constant offset which added near the use. This requires a 5170b57cec5SDimitry Andric /// temporary register, but the offset itself can live in an add immediate 5180b57cec5SDimitry Andric /// field rather than a register. 5190fca6ea1SDimitry Andric Immediate UnfoldedOffset = Immediate::getZero(); 5200b57cec5SDimitry Andric 5210b57cec5SDimitry Andric Formula() = default; 5220b57cec5SDimitry Andric 5230b57cec5SDimitry Andric void initialMatch(const SCEV *S, Loop *L, ScalarEvolution &SE); 5240b57cec5SDimitry Andric 5250b57cec5SDimitry Andric bool isCanonical(const Loop &L) const; 5260b57cec5SDimitry Andric 5270b57cec5SDimitry Andric void canonicalize(const Loop &L); 5280b57cec5SDimitry Andric 5290b57cec5SDimitry Andric bool unscale(); 5300b57cec5SDimitry Andric 5310b57cec5SDimitry Andric bool hasZeroEnd() const; 5320b57cec5SDimitry Andric 5330b57cec5SDimitry Andric size_t getNumRegs() const; 5340b57cec5SDimitry Andric Type *getType() const; 5350b57cec5SDimitry Andric 5360b57cec5SDimitry Andric void deleteBaseReg(const SCEV *&S); 5370b57cec5SDimitry Andric 5380b57cec5SDimitry Andric bool referencesReg(const SCEV *S) const; 5390b57cec5SDimitry Andric bool hasRegsUsedByUsesOtherThan(size_t LUIdx, 5400b57cec5SDimitry Andric const RegUseTracker &RegUses) const; 5410b57cec5SDimitry Andric 5420b57cec5SDimitry Andric void print(raw_ostream &OS) const; 5430b57cec5SDimitry Andric void dump() const; 5440b57cec5SDimitry Andric }; 5450b57cec5SDimitry Andric 5460b57cec5SDimitry Andric } // end anonymous namespace 5470b57cec5SDimitry Andric 5480b57cec5SDimitry Andric /// Recursion helper for initialMatch. 5490b57cec5SDimitry Andric static void DoInitialMatch(const SCEV *S, Loop *L, 5500b57cec5SDimitry Andric SmallVectorImpl<const SCEV *> &Good, 5510b57cec5SDimitry Andric SmallVectorImpl<const SCEV *> &Bad, 5520b57cec5SDimitry Andric ScalarEvolution &SE) { 5530b57cec5SDimitry Andric // Collect expressions which properly dominate the loop header. 5540b57cec5SDimitry Andric if (SE.properlyDominates(S, L->getHeader())) { 5550b57cec5SDimitry Andric Good.push_back(S); 5560b57cec5SDimitry Andric return; 5570b57cec5SDimitry Andric } 5580b57cec5SDimitry Andric 5590b57cec5SDimitry Andric // Look at add operands. 5600b57cec5SDimitry Andric if (const SCEVAddExpr *Add = dyn_cast<SCEVAddExpr>(S)) { 5610b57cec5SDimitry Andric for (const SCEV *S : Add->operands()) 5620b57cec5SDimitry Andric DoInitialMatch(S, L, Good, Bad, SE); 5630b57cec5SDimitry Andric return; 5640b57cec5SDimitry Andric } 5650b57cec5SDimitry Andric 5660b57cec5SDimitry Andric // Look at addrec operands. 5670b57cec5SDimitry Andric if (const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(S)) 5680b57cec5SDimitry Andric if (!AR->getStart()->isZero() && AR->isAffine()) { 5690b57cec5SDimitry Andric DoInitialMatch(AR->getStart(), L, Good, Bad, SE); 5700b57cec5SDimitry Andric DoInitialMatch(SE.getAddRecExpr(SE.getConstant(AR->getType(), 0), 5710b57cec5SDimitry Andric AR->getStepRecurrence(SE), 5720b57cec5SDimitry Andric // FIXME: AR->getNoWrapFlags() 5730b57cec5SDimitry Andric AR->getLoop(), SCEV::FlagAnyWrap), 5740b57cec5SDimitry Andric L, Good, Bad, SE); 5750b57cec5SDimitry Andric return; 5760b57cec5SDimitry Andric } 5770b57cec5SDimitry Andric 5780b57cec5SDimitry Andric // Handle a multiplication by -1 (negation) if it didn't fold. 5790b57cec5SDimitry Andric if (const SCEVMulExpr *Mul = dyn_cast<SCEVMulExpr>(S)) 5800b57cec5SDimitry Andric if (Mul->getOperand(0)->isAllOnesValue()) { 581e8d8bef9SDimitry Andric SmallVector<const SCEV *, 4> Ops(drop_begin(Mul->operands())); 5820b57cec5SDimitry Andric const SCEV *NewMul = SE.getMulExpr(Ops); 5830b57cec5SDimitry Andric 5840b57cec5SDimitry Andric SmallVector<const SCEV *, 4> MyGood; 5850b57cec5SDimitry Andric SmallVector<const SCEV *, 4> MyBad; 5860b57cec5SDimitry Andric DoInitialMatch(NewMul, L, MyGood, MyBad, SE); 5870b57cec5SDimitry Andric const SCEV *NegOne = SE.getSCEV(ConstantInt::getAllOnesValue( 5880b57cec5SDimitry Andric SE.getEffectiveSCEVType(NewMul->getType()))); 5890b57cec5SDimitry Andric for (const SCEV *S : MyGood) 5900b57cec5SDimitry Andric Good.push_back(SE.getMulExpr(NegOne, S)); 5910b57cec5SDimitry Andric for (const SCEV *S : MyBad) 5920b57cec5SDimitry Andric Bad.push_back(SE.getMulExpr(NegOne, S)); 5930b57cec5SDimitry Andric return; 5940b57cec5SDimitry Andric } 5950b57cec5SDimitry Andric 5960b57cec5SDimitry Andric // Ok, we can't do anything interesting. Just stuff the whole thing into a 5970b57cec5SDimitry Andric // register and hope for the best. 5980b57cec5SDimitry Andric Bad.push_back(S); 5990b57cec5SDimitry Andric } 6000b57cec5SDimitry Andric 6010b57cec5SDimitry Andric /// Incorporate loop-variant parts of S into this Formula, attempting to keep 6020b57cec5SDimitry Andric /// all loop-invariant and loop-computable values in a single base register. 6030b57cec5SDimitry Andric void Formula::initialMatch(const SCEV *S, Loop *L, ScalarEvolution &SE) { 6040b57cec5SDimitry Andric SmallVector<const SCEV *, 4> Good; 6050b57cec5SDimitry Andric SmallVector<const SCEV *, 4> Bad; 6060b57cec5SDimitry Andric DoInitialMatch(S, L, Good, Bad, SE); 6070b57cec5SDimitry Andric if (!Good.empty()) { 6080b57cec5SDimitry Andric const SCEV *Sum = SE.getAddExpr(Good); 6090b57cec5SDimitry Andric if (!Sum->isZero()) 6100b57cec5SDimitry Andric BaseRegs.push_back(Sum); 6110b57cec5SDimitry Andric HasBaseReg = true; 6120b57cec5SDimitry Andric } 6130b57cec5SDimitry Andric if (!Bad.empty()) { 6140b57cec5SDimitry Andric const SCEV *Sum = SE.getAddExpr(Bad); 6150b57cec5SDimitry Andric if (!Sum->isZero()) 6160b57cec5SDimitry Andric BaseRegs.push_back(Sum); 6170b57cec5SDimitry Andric HasBaseReg = true; 6180b57cec5SDimitry Andric } 6190b57cec5SDimitry Andric canonicalize(*L); 6200b57cec5SDimitry Andric } 6210b57cec5SDimitry Andric 62281ad6265SDimitry Andric static bool containsAddRecDependentOnLoop(const SCEV *S, const Loop &L) { 62381ad6265SDimitry Andric return SCEVExprContains(S, [&L](const SCEV *S) { 62481ad6265SDimitry Andric return isa<SCEVAddRecExpr>(S) && (cast<SCEVAddRecExpr>(S)->getLoop() == &L); 62581ad6265SDimitry Andric }); 62681ad6265SDimitry Andric } 62781ad6265SDimitry Andric 6280b57cec5SDimitry Andric /// Check whether or not this formula satisfies the canonical 6290b57cec5SDimitry Andric /// representation. 6300b57cec5SDimitry Andric /// \see Formula::BaseRegs. 6310b57cec5SDimitry Andric bool Formula::isCanonical(const Loop &L) const { 6320b57cec5SDimitry Andric if (!ScaledReg) 6330b57cec5SDimitry Andric return BaseRegs.size() <= 1; 6340b57cec5SDimitry Andric 6350b57cec5SDimitry Andric if (Scale != 1) 6360b57cec5SDimitry Andric return true; 6370b57cec5SDimitry Andric 6380b57cec5SDimitry Andric if (Scale == 1 && BaseRegs.empty()) 6390b57cec5SDimitry Andric return false; 6400b57cec5SDimitry Andric 64181ad6265SDimitry Andric if (containsAddRecDependentOnLoop(ScaledReg, L)) 6420b57cec5SDimitry Andric return true; 6430b57cec5SDimitry Andric 6440b57cec5SDimitry Andric // If ScaledReg is not a recurrent expr, or it is but its loop is not current 6450b57cec5SDimitry Andric // loop, meanwhile BaseRegs contains a recurrent expr reg related with current 6460b57cec5SDimitry Andric // loop, we want to swap the reg in BaseRegs with ScaledReg. 64781ad6265SDimitry Andric return none_of(BaseRegs, [&L](const SCEV *S) { 64881ad6265SDimitry Andric return containsAddRecDependentOnLoop(S, L); 6490b57cec5SDimitry Andric }); 6500b57cec5SDimitry Andric } 6510b57cec5SDimitry Andric 6520b57cec5SDimitry Andric /// Helper method to morph a formula into its canonical representation. 6530b57cec5SDimitry Andric /// \see Formula::BaseRegs. 6540b57cec5SDimitry Andric /// Every formula having more than one base register, must use the ScaledReg 6550b57cec5SDimitry Andric /// field. Otherwise, we would have to do special cases everywhere in LSR 6560b57cec5SDimitry Andric /// to treat reg1 + reg2 + ... the same way as reg1 + 1*reg2 + ... 6570b57cec5SDimitry Andric /// On the other hand, 1*reg should be canonicalized into reg. 6580b57cec5SDimitry Andric void Formula::canonicalize(const Loop &L) { 6590b57cec5SDimitry Andric if (isCanonical(L)) 6600b57cec5SDimitry Andric return; 661fe6060f1SDimitry Andric 662fe6060f1SDimitry Andric if (BaseRegs.empty()) { 663fe6060f1SDimitry Andric // No base reg? Use scale reg with scale = 1 as such. 664fe6060f1SDimitry Andric assert(ScaledReg && "Expected 1*reg => reg"); 665fe6060f1SDimitry Andric assert(Scale == 1 && "Expected 1*reg => reg"); 666fe6060f1SDimitry Andric BaseRegs.push_back(ScaledReg); 667fe6060f1SDimitry Andric Scale = 0; 668fe6060f1SDimitry Andric ScaledReg = nullptr; 669fe6060f1SDimitry Andric return; 670fe6060f1SDimitry Andric } 6710b57cec5SDimitry Andric 6720b57cec5SDimitry Andric // Keep the invariant sum in BaseRegs and one of the variant sum in ScaledReg. 6730b57cec5SDimitry Andric if (!ScaledReg) { 674e8d8bef9SDimitry Andric ScaledReg = BaseRegs.pop_back_val(); 6750b57cec5SDimitry Andric Scale = 1; 6760b57cec5SDimitry Andric } 6770b57cec5SDimitry Andric 6780b57cec5SDimitry Andric // If ScaledReg is an invariant with respect to L, find the reg from 6790b57cec5SDimitry Andric // BaseRegs containing the recurrent expr related with Loop L. Swap the 6800b57cec5SDimitry Andric // reg with ScaledReg. 68181ad6265SDimitry Andric if (!containsAddRecDependentOnLoop(ScaledReg, L)) { 68281ad6265SDimitry Andric auto I = find_if(BaseRegs, [&L](const SCEV *S) { 68381ad6265SDimitry Andric return containsAddRecDependentOnLoop(S, L); 6840b57cec5SDimitry Andric }); 6850b57cec5SDimitry Andric if (I != BaseRegs.end()) 6860b57cec5SDimitry Andric std::swap(ScaledReg, *I); 6870b57cec5SDimitry Andric } 688fe6060f1SDimitry Andric assert(isCanonical(L) && "Failed to canonicalize?"); 6890b57cec5SDimitry Andric } 6900b57cec5SDimitry Andric 6910b57cec5SDimitry Andric /// Get rid of the scale in the formula. 6920b57cec5SDimitry Andric /// In other words, this method morphes reg1 + 1*reg2 into reg1 + reg2. 6930b57cec5SDimitry Andric /// \return true if it was possible to get rid of the scale, false otherwise. 6940b57cec5SDimitry Andric /// \note After this operation the formula may not be in the canonical form. 6950b57cec5SDimitry Andric bool Formula::unscale() { 6960b57cec5SDimitry Andric if (Scale != 1) 6970b57cec5SDimitry Andric return false; 6980b57cec5SDimitry Andric Scale = 0; 6990b57cec5SDimitry Andric BaseRegs.push_back(ScaledReg); 7000b57cec5SDimitry Andric ScaledReg = nullptr; 7010b57cec5SDimitry Andric return true; 7020b57cec5SDimitry Andric } 7030b57cec5SDimitry Andric 7040b57cec5SDimitry Andric bool Formula::hasZeroEnd() const { 7050b57cec5SDimitry Andric if (UnfoldedOffset || BaseOffset) 7060b57cec5SDimitry Andric return false; 7070b57cec5SDimitry Andric if (BaseRegs.size() != 1 || ScaledReg) 7080b57cec5SDimitry Andric return false; 7090b57cec5SDimitry Andric return true; 7100b57cec5SDimitry Andric } 7110b57cec5SDimitry Andric 7120b57cec5SDimitry Andric /// Return the total number of register operands used by this formula. This does 7130b57cec5SDimitry Andric /// not include register uses implied by non-constant addrec strides. 7140b57cec5SDimitry Andric size_t Formula::getNumRegs() const { 7150b57cec5SDimitry Andric return !!ScaledReg + BaseRegs.size(); 7160b57cec5SDimitry Andric } 7170b57cec5SDimitry Andric 7180b57cec5SDimitry Andric /// Return the type of this formula, if it has one, or null otherwise. This type 7190b57cec5SDimitry Andric /// is meaningless except for the bit size. 7200b57cec5SDimitry Andric Type *Formula::getType() const { 7210b57cec5SDimitry Andric return !BaseRegs.empty() ? BaseRegs.front()->getType() : 7220b57cec5SDimitry Andric ScaledReg ? ScaledReg->getType() : 7230b57cec5SDimitry Andric BaseGV ? BaseGV->getType() : 7240b57cec5SDimitry Andric nullptr; 7250b57cec5SDimitry Andric } 7260b57cec5SDimitry Andric 7270b57cec5SDimitry Andric /// Delete the given base reg from the BaseRegs list. 7280b57cec5SDimitry Andric void Formula::deleteBaseReg(const SCEV *&S) { 7290b57cec5SDimitry Andric if (&S != &BaseRegs.back()) 7300b57cec5SDimitry Andric std::swap(S, BaseRegs.back()); 7310b57cec5SDimitry Andric BaseRegs.pop_back(); 7320b57cec5SDimitry Andric } 7330b57cec5SDimitry Andric 7340b57cec5SDimitry Andric /// Test if this formula references the given register. 7350b57cec5SDimitry Andric bool Formula::referencesReg(const SCEV *S) const { 7360b57cec5SDimitry Andric return S == ScaledReg || is_contained(BaseRegs, S); 7370b57cec5SDimitry Andric } 7380b57cec5SDimitry Andric 7390b57cec5SDimitry Andric /// Test whether this formula uses registers which are used by uses other than 7400b57cec5SDimitry Andric /// the use with the given index. 7410b57cec5SDimitry Andric bool Formula::hasRegsUsedByUsesOtherThan(size_t LUIdx, 7420b57cec5SDimitry Andric const RegUseTracker &RegUses) const { 7430b57cec5SDimitry Andric if (ScaledReg) 7440b57cec5SDimitry Andric if (RegUses.isRegUsedByUsesOtherThan(ScaledReg, LUIdx)) 7450b57cec5SDimitry Andric return true; 7460b57cec5SDimitry Andric for (const SCEV *BaseReg : BaseRegs) 7470b57cec5SDimitry Andric if (RegUses.isRegUsedByUsesOtherThan(BaseReg, LUIdx)) 7480b57cec5SDimitry Andric return true; 7490b57cec5SDimitry Andric return false; 7500b57cec5SDimitry Andric } 7510b57cec5SDimitry Andric 7520b57cec5SDimitry Andric #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) 7530b57cec5SDimitry Andric void Formula::print(raw_ostream &OS) const { 7540b57cec5SDimitry Andric bool First = true; 7550b57cec5SDimitry Andric if (BaseGV) { 7560b57cec5SDimitry Andric if (!First) OS << " + "; else First = false; 7570b57cec5SDimitry Andric BaseGV->printAsOperand(OS, /*PrintType=*/false); 7580b57cec5SDimitry Andric } 7590fca6ea1SDimitry Andric if (BaseOffset.isNonZero()) { 7600b57cec5SDimitry Andric if (!First) OS << " + "; else First = false; 7610b57cec5SDimitry Andric OS << BaseOffset; 7620b57cec5SDimitry Andric } 7630b57cec5SDimitry Andric for (const SCEV *BaseReg : BaseRegs) { 7640b57cec5SDimitry Andric if (!First) OS << " + "; else First = false; 7650b57cec5SDimitry Andric OS << "reg(" << *BaseReg << ')'; 7660b57cec5SDimitry Andric } 7670b57cec5SDimitry Andric if (HasBaseReg && BaseRegs.empty()) { 7680b57cec5SDimitry Andric if (!First) OS << " + "; else First = false; 7690b57cec5SDimitry Andric OS << "**error: HasBaseReg**"; 7700b57cec5SDimitry Andric } else if (!HasBaseReg && !BaseRegs.empty()) { 7710b57cec5SDimitry Andric if (!First) OS << " + "; else First = false; 7720b57cec5SDimitry Andric OS << "**error: !HasBaseReg**"; 7730b57cec5SDimitry Andric } 7740b57cec5SDimitry Andric if (Scale != 0) { 7750b57cec5SDimitry Andric if (!First) OS << " + "; else First = false; 7760b57cec5SDimitry Andric OS << Scale << "*reg("; 7770b57cec5SDimitry Andric if (ScaledReg) 7780b57cec5SDimitry Andric OS << *ScaledReg; 7790b57cec5SDimitry Andric else 7800b57cec5SDimitry Andric OS << "<unknown>"; 7810b57cec5SDimitry Andric OS << ')'; 7820b57cec5SDimitry Andric } 7830fca6ea1SDimitry Andric if (UnfoldedOffset.isNonZero()) { 7840b57cec5SDimitry Andric if (!First) OS << " + "; 7850b57cec5SDimitry Andric OS << "imm(" << UnfoldedOffset << ')'; 7860b57cec5SDimitry Andric } 7870b57cec5SDimitry Andric } 7880b57cec5SDimitry Andric 7890b57cec5SDimitry Andric LLVM_DUMP_METHOD void Formula::dump() const { 7900b57cec5SDimitry Andric print(errs()); errs() << '\n'; 7910b57cec5SDimitry Andric } 7920b57cec5SDimitry Andric #endif 7930b57cec5SDimitry Andric 7940b57cec5SDimitry Andric /// Return true if the given addrec can be sign-extended without changing its 7950b57cec5SDimitry Andric /// value. 7960b57cec5SDimitry Andric static bool isAddRecSExtable(const SCEVAddRecExpr *AR, ScalarEvolution &SE) { 7970b57cec5SDimitry Andric Type *WideTy = 7980b57cec5SDimitry Andric IntegerType::get(SE.getContext(), SE.getTypeSizeInBits(AR->getType()) + 1); 7990b57cec5SDimitry Andric return isa<SCEVAddRecExpr>(SE.getSignExtendExpr(AR, WideTy)); 8000b57cec5SDimitry Andric } 8010b57cec5SDimitry Andric 8020b57cec5SDimitry Andric /// Return true if the given add can be sign-extended without changing its 8030b57cec5SDimitry Andric /// value. 8040b57cec5SDimitry Andric static bool isAddSExtable(const SCEVAddExpr *A, ScalarEvolution &SE) { 8050b57cec5SDimitry Andric Type *WideTy = 8060b57cec5SDimitry Andric IntegerType::get(SE.getContext(), SE.getTypeSizeInBits(A->getType()) + 1); 8070b57cec5SDimitry Andric return isa<SCEVAddExpr>(SE.getSignExtendExpr(A, WideTy)); 8080b57cec5SDimitry Andric } 8090b57cec5SDimitry Andric 8100b57cec5SDimitry Andric /// Return true if the given mul can be sign-extended without changing its 8110b57cec5SDimitry Andric /// value. 8120b57cec5SDimitry Andric static bool isMulSExtable(const SCEVMulExpr *M, ScalarEvolution &SE) { 8130b57cec5SDimitry Andric Type *WideTy = 8140b57cec5SDimitry Andric IntegerType::get(SE.getContext(), 8150b57cec5SDimitry Andric SE.getTypeSizeInBits(M->getType()) * M->getNumOperands()); 8160b57cec5SDimitry Andric return isa<SCEVMulExpr>(SE.getSignExtendExpr(M, WideTy)); 8170b57cec5SDimitry Andric } 8180b57cec5SDimitry Andric 8190b57cec5SDimitry Andric /// Return an expression for LHS /s RHS, if it can be determined and if the 8200b57cec5SDimitry Andric /// remainder is known to be zero, or null otherwise. If IgnoreSignificantBits 821fe6060f1SDimitry Andric /// is true, expressions like (X * Y) /s Y are simplified to X, ignoring that 8220b57cec5SDimitry Andric /// the multiplication may overflow, which is useful when the result will be 8230b57cec5SDimitry Andric /// used in a context where the most significant bits are ignored. 8240b57cec5SDimitry Andric static const SCEV *getExactSDiv(const SCEV *LHS, const SCEV *RHS, 8250b57cec5SDimitry Andric ScalarEvolution &SE, 8260b57cec5SDimitry Andric bool IgnoreSignificantBits = false) { 8270b57cec5SDimitry Andric // Handle the trivial case, which works for any SCEV type. 8280b57cec5SDimitry Andric if (LHS == RHS) 8290b57cec5SDimitry Andric return SE.getConstant(LHS->getType(), 1); 8300b57cec5SDimitry Andric 8310b57cec5SDimitry Andric // Handle a few RHS special cases. 8320b57cec5SDimitry Andric const SCEVConstant *RC = dyn_cast<SCEVConstant>(RHS); 8330b57cec5SDimitry Andric if (RC) { 8340b57cec5SDimitry Andric const APInt &RA = RC->getAPInt(); 8350b57cec5SDimitry Andric // Handle x /s -1 as x * -1, to give ScalarEvolution a chance to do 8360b57cec5SDimitry Andric // some folding. 837349cc55cSDimitry Andric if (RA.isAllOnes()) { 838fe6060f1SDimitry Andric if (LHS->getType()->isPointerTy()) 839fe6060f1SDimitry Andric return nullptr; 8400b57cec5SDimitry Andric return SE.getMulExpr(LHS, RC); 841fe6060f1SDimitry Andric } 8420b57cec5SDimitry Andric // Handle x /s 1 as x. 8430b57cec5SDimitry Andric if (RA == 1) 8440b57cec5SDimitry Andric return LHS; 8450b57cec5SDimitry Andric } 8460b57cec5SDimitry Andric 8470b57cec5SDimitry Andric // Check for a division of a constant by a constant. 8480b57cec5SDimitry Andric if (const SCEVConstant *C = dyn_cast<SCEVConstant>(LHS)) { 8490b57cec5SDimitry Andric if (!RC) 8500b57cec5SDimitry Andric return nullptr; 8510b57cec5SDimitry Andric const APInt &LA = C->getAPInt(); 8520b57cec5SDimitry Andric const APInt &RA = RC->getAPInt(); 8530b57cec5SDimitry Andric if (LA.srem(RA) != 0) 8540b57cec5SDimitry Andric return nullptr; 8550b57cec5SDimitry Andric return SE.getConstant(LA.sdiv(RA)); 8560b57cec5SDimitry Andric } 8570b57cec5SDimitry Andric 8580b57cec5SDimitry Andric // Distribute the sdiv over addrec operands, if the addrec doesn't overflow. 8590b57cec5SDimitry Andric if (const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(LHS)) { 8600b57cec5SDimitry Andric if ((IgnoreSignificantBits || isAddRecSExtable(AR, SE)) && AR->isAffine()) { 8610b57cec5SDimitry Andric const SCEV *Step = getExactSDiv(AR->getStepRecurrence(SE), RHS, SE, 8620b57cec5SDimitry Andric IgnoreSignificantBits); 8630b57cec5SDimitry Andric if (!Step) return nullptr; 8640b57cec5SDimitry Andric const SCEV *Start = getExactSDiv(AR->getStart(), RHS, SE, 8650b57cec5SDimitry Andric IgnoreSignificantBits); 8660b57cec5SDimitry Andric if (!Start) return nullptr; 8670b57cec5SDimitry Andric // FlagNW is independent of the start value, step direction, and is 8680b57cec5SDimitry Andric // preserved with smaller magnitude steps. 8690b57cec5SDimitry Andric // FIXME: AR->getNoWrapFlags(SCEV::FlagNW) 8700b57cec5SDimitry Andric return SE.getAddRecExpr(Start, Step, AR->getLoop(), SCEV::FlagAnyWrap); 8710b57cec5SDimitry Andric } 8720b57cec5SDimitry Andric return nullptr; 8730b57cec5SDimitry Andric } 8740b57cec5SDimitry Andric 8750b57cec5SDimitry Andric // Distribute the sdiv over add operands, if the add doesn't overflow. 8760b57cec5SDimitry Andric if (const SCEVAddExpr *Add = dyn_cast<SCEVAddExpr>(LHS)) { 8770b57cec5SDimitry Andric if (IgnoreSignificantBits || isAddSExtable(Add, SE)) { 8780b57cec5SDimitry Andric SmallVector<const SCEV *, 8> Ops; 8790b57cec5SDimitry Andric for (const SCEV *S : Add->operands()) { 8800b57cec5SDimitry Andric const SCEV *Op = getExactSDiv(S, RHS, SE, IgnoreSignificantBits); 8810b57cec5SDimitry Andric if (!Op) return nullptr; 8820b57cec5SDimitry Andric Ops.push_back(Op); 8830b57cec5SDimitry Andric } 8840b57cec5SDimitry Andric return SE.getAddExpr(Ops); 8850b57cec5SDimitry Andric } 8860b57cec5SDimitry Andric return nullptr; 8870b57cec5SDimitry Andric } 8880b57cec5SDimitry Andric 8890b57cec5SDimitry Andric // Check for a multiply operand that we can pull RHS out of. 8900b57cec5SDimitry Andric if (const SCEVMulExpr *Mul = dyn_cast<SCEVMulExpr>(LHS)) { 8910b57cec5SDimitry Andric if (IgnoreSignificantBits || isMulSExtable(Mul, SE)) { 892fe6060f1SDimitry Andric // Handle special case C1*X*Y /s C2*X*Y. 893fe6060f1SDimitry Andric if (const SCEVMulExpr *MulRHS = dyn_cast<SCEVMulExpr>(RHS)) { 894fe6060f1SDimitry Andric if (IgnoreSignificantBits || isMulSExtable(MulRHS, SE)) { 895fe6060f1SDimitry Andric const SCEVConstant *LC = dyn_cast<SCEVConstant>(Mul->getOperand(0)); 896fe6060f1SDimitry Andric const SCEVConstant *RC = 897fe6060f1SDimitry Andric dyn_cast<SCEVConstant>(MulRHS->getOperand(0)); 898fe6060f1SDimitry Andric if (LC && RC) { 899fe6060f1SDimitry Andric SmallVector<const SCEV *, 4> LOps(drop_begin(Mul->operands())); 900fe6060f1SDimitry Andric SmallVector<const SCEV *, 4> ROps(drop_begin(MulRHS->operands())); 901fe6060f1SDimitry Andric if (LOps == ROps) 902fe6060f1SDimitry Andric return getExactSDiv(LC, RC, SE, IgnoreSignificantBits); 903fe6060f1SDimitry Andric } 904fe6060f1SDimitry Andric } 905fe6060f1SDimitry Andric } 906fe6060f1SDimitry Andric 9070b57cec5SDimitry Andric SmallVector<const SCEV *, 4> Ops; 9080b57cec5SDimitry Andric bool Found = false; 9090b57cec5SDimitry Andric for (const SCEV *S : Mul->operands()) { 9100b57cec5SDimitry Andric if (!Found) 9110b57cec5SDimitry Andric if (const SCEV *Q = getExactSDiv(S, RHS, SE, 9120b57cec5SDimitry Andric IgnoreSignificantBits)) { 9130b57cec5SDimitry Andric S = Q; 9140b57cec5SDimitry Andric Found = true; 9150b57cec5SDimitry Andric } 9160b57cec5SDimitry Andric Ops.push_back(S); 9170b57cec5SDimitry Andric } 9180b57cec5SDimitry Andric return Found ? SE.getMulExpr(Ops) : nullptr; 9190b57cec5SDimitry Andric } 9200b57cec5SDimitry Andric return nullptr; 9210b57cec5SDimitry Andric } 9220b57cec5SDimitry Andric 9230b57cec5SDimitry Andric // Otherwise we don't know. 9240b57cec5SDimitry Andric return nullptr; 9250b57cec5SDimitry Andric } 9260b57cec5SDimitry Andric 9270b57cec5SDimitry Andric /// If S involves the addition of a constant integer value, return that integer 9280b57cec5SDimitry Andric /// value, and mutate S to point to a new SCEV with that value excluded. 9290fca6ea1SDimitry Andric static Immediate ExtractImmediate(const SCEV *&S, ScalarEvolution &SE) { 9300b57cec5SDimitry Andric if (const SCEVConstant *C = dyn_cast<SCEVConstant>(S)) { 93106c3fb27SDimitry Andric if (C->getAPInt().getSignificantBits() <= 64) { 9320b57cec5SDimitry Andric S = SE.getConstant(C->getType(), 0); 9330fca6ea1SDimitry Andric return Immediate::getFixed(C->getValue()->getSExtValue()); 9340b57cec5SDimitry Andric } 9350b57cec5SDimitry Andric } else if (const SCEVAddExpr *Add = dyn_cast<SCEVAddExpr>(S)) { 936e8d8bef9SDimitry Andric SmallVector<const SCEV *, 8> NewOps(Add->operands()); 9370fca6ea1SDimitry Andric Immediate Result = ExtractImmediate(NewOps.front(), SE); 9380fca6ea1SDimitry Andric if (Result.isNonZero()) 9390b57cec5SDimitry Andric S = SE.getAddExpr(NewOps); 9400b57cec5SDimitry Andric return Result; 9410b57cec5SDimitry Andric } else if (const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(S)) { 942e8d8bef9SDimitry Andric SmallVector<const SCEV *, 8> NewOps(AR->operands()); 9430fca6ea1SDimitry Andric Immediate Result = ExtractImmediate(NewOps.front(), SE); 9440fca6ea1SDimitry Andric if (Result.isNonZero()) 9450b57cec5SDimitry Andric S = SE.getAddRecExpr(NewOps, AR->getLoop(), 9460b57cec5SDimitry Andric // FIXME: AR->getNoWrapFlags(SCEV::FlagNW) 9470b57cec5SDimitry Andric SCEV::FlagAnyWrap); 9480b57cec5SDimitry Andric return Result; 949*36b606aeSDimitry Andric } else if (const SCEVMulExpr *M = dyn_cast<SCEVMulExpr>(S)) { 950*36b606aeSDimitry Andric if (EnableVScaleImmediates && M->getNumOperands() == 2) { 9510fca6ea1SDimitry Andric if (const SCEVConstant *C = dyn_cast<SCEVConstant>(M->getOperand(0))) 9520fca6ea1SDimitry Andric if (isa<SCEVVScale>(M->getOperand(1))) { 9530fca6ea1SDimitry Andric S = SE.getConstant(M->getType(), 0); 9540fca6ea1SDimitry Andric return Immediate::getScalable(C->getValue()->getSExtValue()); 9550b57cec5SDimitry Andric } 956*36b606aeSDimitry Andric } 957*36b606aeSDimitry Andric } 9580fca6ea1SDimitry Andric return Immediate::getZero(); 9590b57cec5SDimitry Andric } 9600b57cec5SDimitry Andric 9610b57cec5SDimitry Andric /// If S involves the addition of a GlobalValue address, return that symbol, and 9620b57cec5SDimitry Andric /// mutate S to point to a new SCEV with that value excluded. 9630b57cec5SDimitry Andric static GlobalValue *ExtractSymbol(const SCEV *&S, ScalarEvolution &SE) { 9640b57cec5SDimitry Andric if (const SCEVUnknown *U = dyn_cast<SCEVUnknown>(S)) { 9650b57cec5SDimitry Andric if (GlobalValue *GV = dyn_cast<GlobalValue>(U->getValue())) { 9660b57cec5SDimitry Andric S = SE.getConstant(GV->getType(), 0); 9670b57cec5SDimitry Andric return GV; 9680b57cec5SDimitry Andric } 9690b57cec5SDimitry Andric } else if (const SCEVAddExpr *Add = dyn_cast<SCEVAddExpr>(S)) { 970e8d8bef9SDimitry Andric SmallVector<const SCEV *, 8> NewOps(Add->operands()); 9710b57cec5SDimitry Andric GlobalValue *Result = ExtractSymbol(NewOps.back(), SE); 9720b57cec5SDimitry Andric if (Result) 9730b57cec5SDimitry Andric S = SE.getAddExpr(NewOps); 9740b57cec5SDimitry Andric return Result; 9750b57cec5SDimitry Andric } else if (const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(S)) { 976e8d8bef9SDimitry Andric SmallVector<const SCEV *, 8> NewOps(AR->operands()); 9770b57cec5SDimitry Andric GlobalValue *Result = ExtractSymbol(NewOps.front(), SE); 9780b57cec5SDimitry Andric if (Result) 9790b57cec5SDimitry Andric S = SE.getAddRecExpr(NewOps, AR->getLoop(), 9800b57cec5SDimitry Andric // FIXME: AR->getNoWrapFlags(SCEV::FlagNW) 9810b57cec5SDimitry Andric SCEV::FlagAnyWrap); 9820b57cec5SDimitry Andric return Result; 9830b57cec5SDimitry Andric } 9840b57cec5SDimitry Andric return nullptr; 9850b57cec5SDimitry Andric } 9860b57cec5SDimitry Andric 9870b57cec5SDimitry Andric /// Returns true if the specified instruction is using the specified value as an 9880b57cec5SDimitry Andric /// address. 9890b57cec5SDimitry Andric static bool isAddressUse(const TargetTransformInfo &TTI, 9900b57cec5SDimitry Andric Instruction *Inst, Value *OperandVal) { 9910b57cec5SDimitry Andric bool isAddress = isa<LoadInst>(Inst); 9920b57cec5SDimitry Andric if (StoreInst *SI = dyn_cast<StoreInst>(Inst)) { 9930b57cec5SDimitry Andric if (SI->getPointerOperand() == OperandVal) 9940b57cec5SDimitry Andric isAddress = true; 9950b57cec5SDimitry Andric } else if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(Inst)) { 9960b57cec5SDimitry Andric // Addressing modes can also be folded into prefetches and a variety 9970b57cec5SDimitry Andric // of intrinsics. 9980b57cec5SDimitry Andric switch (II->getIntrinsicID()) { 9990b57cec5SDimitry Andric case Intrinsic::memset: 10000b57cec5SDimitry Andric case Intrinsic::prefetch: 10015ffd83dbSDimitry Andric case Intrinsic::masked_load: 10020b57cec5SDimitry Andric if (II->getArgOperand(0) == OperandVal) 10030b57cec5SDimitry Andric isAddress = true; 10040b57cec5SDimitry Andric break; 10055ffd83dbSDimitry Andric case Intrinsic::masked_store: 10065ffd83dbSDimitry Andric if (II->getArgOperand(1) == OperandVal) 10075ffd83dbSDimitry Andric isAddress = true; 10085ffd83dbSDimitry Andric break; 10090b57cec5SDimitry Andric case Intrinsic::memmove: 10100b57cec5SDimitry Andric case Intrinsic::memcpy: 10110b57cec5SDimitry Andric if (II->getArgOperand(0) == OperandVal || 10120b57cec5SDimitry Andric II->getArgOperand(1) == OperandVal) 10130b57cec5SDimitry Andric isAddress = true; 10140b57cec5SDimitry Andric break; 10150b57cec5SDimitry Andric default: { 10160b57cec5SDimitry Andric MemIntrinsicInfo IntrInfo; 10170b57cec5SDimitry Andric if (TTI.getTgtMemIntrinsic(II, IntrInfo)) { 10180b57cec5SDimitry Andric if (IntrInfo.PtrVal == OperandVal) 10190b57cec5SDimitry Andric isAddress = true; 10200b57cec5SDimitry Andric } 10210b57cec5SDimitry Andric } 10220b57cec5SDimitry Andric } 10230b57cec5SDimitry Andric } else if (AtomicRMWInst *RMW = dyn_cast<AtomicRMWInst>(Inst)) { 10240b57cec5SDimitry Andric if (RMW->getPointerOperand() == OperandVal) 10250b57cec5SDimitry Andric isAddress = true; 10260b57cec5SDimitry Andric } else if (AtomicCmpXchgInst *CmpX = dyn_cast<AtomicCmpXchgInst>(Inst)) { 10270b57cec5SDimitry Andric if (CmpX->getPointerOperand() == OperandVal) 10280b57cec5SDimitry Andric isAddress = true; 10290b57cec5SDimitry Andric } 10300b57cec5SDimitry Andric return isAddress; 10310b57cec5SDimitry Andric } 10320b57cec5SDimitry Andric 10330b57cec5SDimitry Andric /// Return the type of the memory being accessed. 10340b57cec5SDimitry Andric static MemAccessTy getAccessType(const TargetTransformInfo &TTI, 10350b57cec5SDimitry Andric Instruction *Inst, Value *OperandVal) { 103606c3fb27SDimitry Andric MemAccessTy AccessTy = MemAccessTy::getUnknown(Inst->getContext()); 103706c3fb27SDimitry Andric 103806c3fb27SDimitry Andric // First get the type of memory being accessed. 103906c3fb27SDimitry Andric if (Type *Ty = Inst->getAccessType()) 104006c3fb27SDimitry Andric AccessTy.MemTy = Ty; 104106c3fb27SDimitry Andric 104206c3fb27SDimitry Andric // Then get the pointer address space. 10430b57cec5SDimitry Andric if (const StoreInst *SI = dyn_cast<StoreInst>(Inst)) { 10440b57cec5SDimitry Andric AccessTy.AddrSpace = SI->getPointerAddressSpace(); 10450b57cec5SDimitry Andric } else if (const LoadInst *LI = dyn_cast<LoadInst>(Inst)) { 10460b57cec5SDimitry Andric AccessTy.AddrSpace = LI->getPointerAddressSpace(); 10470b57cec5SDimitry Andric } else if (const AtomicRMWInst *RMW = dyn_cast<AtomicRMWInst>(Inst)) { 10480b57cec5SDimitry Andric AccessTy.AddrSpace = RMW->getPointerAddressSpace(); 10490b57cec5SDimitry Andric } else if (const AtomicCmpXchgInst *CmpX = dyn_cast<AtomicCmpXchgInst>(Inst)) { 10500b57cec5SDimitry Andric AccessTy.AddrSpace = CmpX->getPointerAddressSpace(); 10510b57cec5SDimitry Andric } else if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(Inst)) { 10520b57cec5SDimitry Andric switch (II->getIntrinsicID()) { 10530b57cec5SDimitry Andric case Intrinsic::prefetch: 10540b57cec5SDimitry Andric case Intrinsic::memset: 10550b57cec5SDimitry Andric AccessTy.AddrSpace = II->getArgOperand(0)->getType()->getPointerAddressSpace(); 10560b57cec5SDimitry Andric AccessTy.MemTy = OperandVal->getType(); 10570b57cec5SDimitry Andric break; 10580b57cec5SDimitry Andric case Intrinsic::memmove: 10590b57cec5SDimitry Andric case Intrinsic::memcpy: 10600b57cec5SDimitry Andric AccessTy.AddrSpace = OperandVal->getType()->getPointerAddressSpace(); 10610b57cec5SDimitry Andric AccessTy.MemTy = OperandVal->getType(); 10620b57cec5SDimitry Andric break; 10635ffd83dbSDimitry Andric case Intrinsic::masked_load: 10645ffd83dbSDimitry Andric AccessTy.AddrSpace = 10655ffd83dbSDimitry Andric II->getArgOperand(0)->getType()->getPointerAddressSpace(); 10665ffd83dbSDimitry Andric break; 10675ffd83dbSDimitry Andric case Intrinsic::masked_store: 10685ffd83dbSDimitry Andric AccessTy.AddrSpace = 10695ffd83dbSDimitry Andric II->getArgOperand(1)->getType()->getPointerAddressSpace(); 10705ffd83dbSDimitry Andric break; 10710b57cec5SDimitry Andric default: { 10720b57cec5SDimitry Andric MemIntrinsicInfo IntrInfo; 10730b57cec5SDimitry Andric if (TTI.getTgtMemIntrinsic(II, IntrInfo) && IntrInfo.PtrVal) { 10740b57cec5SDimitry Andric AccessTy.AddrSpace 10750b57cec5SDimitry Andric = IntrInfo.PtrVal->getType()->getPointerAddressSpace(); 10760b57cec5SDimitry Andric } 10770b57cec5SDimitry Andric 10780b57cec5SDimitry Andric break; 10790b57cec5SDimitry Andric } 10800b57cec5SDimitry Andric } 10810b57cec5SDimitry Andric } 10820b57cec5SDimitry Andric 10830b57cec5SDimitry Andric return AccessTy; 10840b57cec5SDimitry Andric } 10850b57cec5SDimitry Andric 10860b57cec5SDimitry Andric /// Return true if this AddRec is already a phi in its loop. 10870b57cec5SDimitry Andric static bool isExistingPhi(const SCEVAddRecExpr *AR, ScalarEvolution &SE) { 10880b57cec5SDimitry Andric for (PHINode &PN : AR->getLoop()->getHeader()->phis()) { 10890b57cec5SDimitry Andric if (SE.isSCEVable(PN.getType()) && 10900b57cec5SDimitry Andric (SE.getEffectiveSCEVType(PN.getType()) == 10910b57cec5SDimitry Andric SE.getEffectiveSCEVType(AR->getType())) && 10920b57cec5SDimitry Andric SE.getSCEV(&PN) == AR) 10930b57cec5SDimitry Andric return true; 10940b57cec5SDimitry Andric } 10950b57cec5SDimitry Andric return false; 10960b57cec5SDimitry Andric } 10970b57cec5SDimitry Andric 10980b57cec5SDimitry Andric /// Check if expanding this expression is likely to incur significant cost. This 10990b57cec5SDimitry Andric /// is tricky because SCEV doesn't track which expressions are actually computed 11000b57cec5SDimitry Andric /// by the current IR. 11010b57cec5SDimitry Andric /// 11020b57cec5SDimitry Andric /// We currently allow expansion of IV increments that involve adds, 11030b57cec5SDimitry Andric /// multiplication by constants, and AddRecs from existing phis. 11040b57cec5SDimitry Andric /// 11050b57cec5SDimitry Andric /// TODO: Allow UDivExpr if we can find an existing IV increment that is an 11060b57cec5SDimitry Andric /// obvious multiple of the UDivExpr. 11070b57cec5SDimitry Andric static bool isHighCostExpansion(const SCEV *S, 11080b57cec5SDimitry Andric SmallPtrSetImpl<const SCEV*> &Processed, 11090b57cec5SDimitry Andric ScalarEvolution &SE) { 11100b57cec5SDimitry Andric // Zero/One operand expressions 11110b57cec5SDimitry Andric switch (S->getSCEVType()) { 11120b57cec5SDimitry Andric case scUnknown: 11130b57cec5SDimitry Andric case scConstant: 111406c3fb27SDimitry Andric case scVScale: 11150b57cec5SDimitry Andric return false; 11160b57cec5SDimitry Andric case scTruncate: 11170b57cec5SDimitry Andric return isHighCostExpansion(cast<SCEVTruncateExpr>(S)->getOperand(), 11180b57cec5SDimitry Andric Processed, SE); 11190b57cec5SDimitry Andric case scZeroExtend: 11200b57cec5SDimitry Andric return isHighCostExpansion(cast<SCEVZeroExtendExpr>(S)->getOperand(), 11210b57cec5SDimitry Andric Processed, SE); 11220b57cec5SDimitry Andric case scSignExtend: 11230b57cec5SDimitry Andric return isHighCostExpansion(cast<SCEVSignExtendExpr>(S)->getOperand(), 11240b57cec5SDimitry Andric Processed, SE); 1125e8d8bef9SDimitry Andric default: 1126e8d8bef9SDimitry Andric break; 11270b57cec5SDimitry Andric } 11280b57cec5SDimitry Andric 11290b57cec5SDimitry Andric if (!Processed.insert(S).second) 11300b57cec5SDimitry Andric return false; 11310b57cec5SDimitry Andric 11320b57cec5SDimitry Andric if (const SCEVAddExpr *Add = dyn_cast<SCEVAddExpr>(S)) { 11330b57cec5SDimitry Andric for (const SCEV *S : Add->operands()) { 11340b57cec5SDimitry Andric if (isHighCostExpansion(S, Processed, SE)) 11350b57cec5SDimitry Andric return true; 11360b57cec5SDimitry Andric } 11370b57cec5SDimitry Andric return false; 11380b57cec5SDimitry Andric } 11390b57cec5SDimitry Andric 11400b57cec5SDimitry Andric if (const SCEVMulExpr *Mul = dyn_cast<SCEVMulExpr>(S)) { 11410b57cec5SDimitry Andric if (Mul->getNumOperands() == 2) { 11420b57cec5SDimitry Andric // Multiplication by a constant is ok 11430b57cec5SDimitry Andric if (isa<SCEVConstant>(Mul->getOperand(0))) 11440b57cec5SDimitry Andric return isHighCostExpansion(Mul->getOperand(1), Processed, SE); 11450b57cec5SDimitry Andric 11460b57cec5SDimitry Andric // If we have the value of one operand, check if an existing 11470b57cec5SDimitry Andric // multiplication already generates this expression. 11480b57cec5SDimitry Andric if (const SCEVUnknown *U = dyn_cast<SCEVUnknown>(Mul->getOperand(1))) { 11490b57cec5SDimitry Andric Value *UVal = U->getValue(); 11500b57cec5SDimitry Andric for (User *UR : UVal->users()) { 11510b57cec5SDimitry Andric // If U is a constant, it may be used by a ConstantExpr. 11520b57cec5SDimitry Andric Instruction *UI = dyn_cast<Instruction>(UR); 11530b57cec5SDimitry Andric if (UI && UI->getOpcode() == Instruction::Mul && 11540b57cec5SDimitry Andric SE.isSCEVable(UI->getType())) { 11550b57cec5SDimitry Andric return SE.getSCEV(UI) == Mul; 11560b57cec5SDimitry Andric } 11570b57cec5SDimitry Andric } 11580b57cec5SDimitry Andric } 11590b57cec5SDimitry Andric } 11600b57cec5SDimitry Andric } 11610b57cec5SDimitry Andric 11620b57cec5SDimitry Andric if (const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(S)) { 11630b57cec5SDimitry Andric if (isExistingPhi(AR, SE)) 11640b57cec5SDimitry Andric return false; 11650b57cec5SDimitry Andric } 11660b57cec5SDimitry Andric 11670b57cec5SDimitry Andric // Fow now, consider any other type of expression (div/mul/min/max) high cost. 11680b57cec5SDimitry Andric return true; 11690b57cec5SDimitry Andric } 11700b57cec5SDimitry Andric 11710b57cec5SDimitry Andric namespace { 11720b57cec5SDimitry Andric 11730b57cec5SDimitry Andric class LSRUse; 11740b57cec5SDimitry Andric 11750b57cec5SDimitry Andric } // end anonymous namespace 11760b57cec5SDimitry Andric 11770b57cec5SDimitry Andric /// Check if the addressing mode defined by \p F is completely 11780b57cec5SDimitry Andric /// folded in \p LU at isel time. 11790b57cec5SDimitry Andric /// This includes address-mode folding and special icmp tricks. 11800b57cec5SDimitry Andric /// This function returns true if \p LU can accommodate what \p F 11810b57cec5SDimitry Andric /// defines and up to 1 base + 1 scaled + offset. 11820b57cec5SDimitry Andric /// In other words, if \p F has several base registers, this function may 11830b57cec5SDimitry Andric /// still return true. Therefore, users still need to account for 11840b57cec5SDimitry Andric /// additional base registers and/or unfolded offsets to derive an 11850b57cec5SDimitry Andric /// accurate cost model. 11860b57cec5SDimitry Andric static bool isAMCompletelyFolded(const TargetTransformInfo &TTI, 11870b57cec5SDimitry Andric const LSRUse &LU, const Formula &F); 11880b57cec5SDimitry Andric 11890b57cec5SDimitry Andric // Get the cost of the scaling factor used in F for LU. 1190fe6060f1SDimitry Andric static InstructionCost getScalingFactorCost(const TargetTransformInfo &TTI, 11910b57cec5SDimitry Andric const LSRUse &LU, const Formula &F, 11920b57cec5SDimitry Andric const Loop &L); 11930b57cec5SDimitry Andric 11940b57cec5SDimitry Andric namespace { 11950b57cec5SDimitry Andric 11960b57cec5SDimitry Andric /// This class is used to measure and compare candidate formulae. 11970b57cec5SDimitry Andric class Cost { 11980b57cec5SDimitry Andric const Loop *L = nullptr; 11990b57cec5SDimitry Andric ScalarEvolution *SE = nullptr; 12000b57cec5SDimitry Andric const TargetTransformInfo *TTI = nullptr; 12010b57cec5SDimitry Andric TargetTransformInfo::LSRCost C; 1202fe6060f1SDimitry Andric TTI::AddressingModeKind AMK = TTI::AMK_None; 12030b57cec5SDimitry Andric 12040b57cec5SDimitry Andric public: 12050b57cec5SDimitry Andric Cost() = delete; 1206fe6060f1SDimitry Andric Cost(const Loop *L, ScalarEvolution &SE, const TargetTransformInfo &TTI, 1207fe6060f1SDimitry Andric TTI::AddressingModeKind AMK) : 1208fe6060f1SDimitry Andric L(L), SE(&SE), TTI(&TTI), AMK(AMK) { 12090b57cec5SDimitry Andric C.Insns = 0; 12100b57cec5SDimitry Andric C.NumRegs = 0; 12110b57cec5SDimitry Andric C.AddRecCost = 0; 12120b57cec5SDimitry Andric C.NumIVMuls = 0; 12130b57cec5SDimitry Andric C.NumBaseAdds = 0; 12140b57cec5SDimitry Andric C.ImmCost = 0; 12150b57cec5SDimitry Andric C.SetupCost = 0; 12160b57cec5SDimitry Andric C.ScaleCost = 0; 12170b57cec5SDimitry Andric } 12180b57cec5SDimitry Andric 1219bdd1243dSDimitry Andric bool isLess(const Cost &Other) const; 12200b57cec5SDimitry Andric 12210b57cec5SDimitry Andric void Lose(); 12220b57cec5SDimitry Andric 12230b57cec5SDimitry Andric #ifndef NDEBUG 12240b57cec5SDimitry Andric // Once any of the metrics loses, they must all remain losers. 12250b57cec5SDimitry Andric bool isValid() { 12260b57cec5SDimitry Andric return ((C.Insns | C.NumRegs | C.AddRecCost | C.NumIVMuls | C.NumBaseAdds 12270b57cec5SDimitry Andric | C.ImmCost | C.SetupCost | C.ScaleCost) != ~0u) 12280b57cec5SDimitry Andric || ((C.Insns & C.NumRegs & C.AddRecCost & C.NumIVMuls & C.NumBaseAdds 12290b57cec5SDimitry Andric & C.ImmCost & C.SetupCost & C.ScaleCost) == ~0u); 12300b57cec5SDimitry Andric } 12310b57cec5SDimitry Andric #endif 12320b57cec5SDimitry Andric 12330b57cec5SDimitry Andric bool isLoser() { 12340b57cec5SDimitry Andric assert(isValid() && "invalid cost"); 12350b57cec5SDimitry Andric return C.NumRegs == ~0u; 12360b57cec5SDimitry Andric } 12370b57cec5SDimitry Andric 12380b57cec5SDimitry Andric void RateFormula(const Formula &F, 12390b57cec5SDimitry Andric SmallPtrSetImpl<const SCEV *> &Regs, 12400b57cec5SDimitry Andric const DenseSet<const SCEV *> &VisitedRegs, 12410b57cec5SDimitry Andric const LSRUse &LU, 12420b57cec5SDimitry Andric SmallPtrSetImpl<const SCEV *> *LoserRegs = nullptr); 12430b57cec5SDimitry Andric 12440b57cec5SDimitry Andric void print(raw_ostream &OS) const; 12450b57cec5SDimitry Andric void dump() const; 12460b57cec5SDimitry Andric 12470b57cec5SDimitry Andric private: 12480b57cec5SDimitry Andric void RateRegister(const Formula &F, const SCEV *Reg, 12490b57cec5SDimitry Andric SmallPtrSetImpl<const SCEV *> &Regs); 12500b57cec5SDimitry Andric void RatePrimaryRegister(const Formula &F, const SCEV *Reg, 12510b57cec5SDimitry Andric SmallPtrSetImpl<const SCEV *> &Regs, 12520b57cec5SDimitry Andric SmallPtrSetImpl<const SCEV *> *LoserRegs); 12530b57cec5SDimitry Andric }; 12540b57cec5SDimitry Andric 12550b57cec5SDimitry Andric /// An operand value in an instruction which is to be replaced with some 12560b57cec5SDimitry Andric /// equivalent, possibly strength-reduced, replacement. 12570b57cec5SDimitry Andric struct LSRFixup { 12580b57cec5SDimitry Andric /// The instruction which will be updated. 12590b57cec5SDimitry Andric Instruction *UserInst = nullptr; 12600b57cec5SDimitry Andric 12610b57cec5SDimitry Andric /// The operand of the instruction which will be replaced. The operand may be 12620b57cec5SDimitry Andric /// used more than once; every instance will be replaced. 12630b57cec5SDimitry Andric Value *OperandValToReplace = nullptr; 12640b57cec5SDimitry Andric 12650b57cec5SDimitry Andric /// If this user is to use the post-incremented value of an induction 12660b57cec5SDimitry Andric /// variable, this set is non-empty and holds the loops associated with the 12670b57cec5SDimitry Andric /// induction variable. 12680b57cec5SDimitry Andric PostIncLoopSet PostIncLoops; 12690b57cec5SDimitry Andric 12700b57cec5SDimitry Andric /// A constant offset to be added to the LSRUse expression. This allows 12710b57cec5SDimitry Andric /// multiple fixups to share the same LSRUse with different offsets, for 12720b57cec5SDimitry Andric /// example in an unrolled loop. 12730fca6ea1SDimitry Andric Immediate Offset = Immediate::getZero(); 12740b57cec5SDimitry Andric 12750b57cec5SDimitry Andric LSRFixup() = default; 12760b57cec5SDimitry Andric 12770b57cec5SDimitry Andric bool isUseFullyOutsideLoop(const Loop *L) const; 12780b57cec5SDimitry Andric 12790b57cec5SDimitry Andric void print(raw_ostream &OS) const; 12800b57cec5SDimitry Andric void dump() const; 12810b57cec5SDimitry Andric }; 12820b57cec5SDimitry Andric 12830b57cec5SDimitry Andric /// A DenseMapInfo implementation for holding DenseMaps and DenseSets of sorted 12840b57cec5SDimitry Andric /// SmallVectors of const SCEV*. 12850b57cec5SDimitry Andric struct UniquifierDenseMapInfo { 12860b57cec5SDimitry Andric static SmallVector<const SCEV *, 4> getEmptyKey() { 12870b57cec5SDimitry Andric SmallVector<const SCEV *, 4> V; 12880b57cec5SDimitry Andric V.push_back(reinterpret_cast<const SCEV *>(-1)); 12890b57cec5SDimitry Andric return V; 12900b57cec5SDimitry Andric } 12910b57cec5SDimitry Andric 12920b57cec5SDimitry Andric static SmallVector<const SCEV *, 4> getTombstoneKey() { 12930b57cec5SDimitry Andric SmallVector<const SCEV *, 4> V; 12940b57cec5SDimitry Andric V.push_back(reinterpret_cast<const SCEV *>(-2)); 12950b57cec5SDimitry Andric return V; 12960b57cec5SDimitry Andric } 12970b57cec5SDimitry Andric 12980b57cec5SDimitry Andric static unsigned getHashValue(const SmallVector<const SCEV *, 4> &V) { 12990b57cec5SDimitry Andric return static_cast<unsigned>(hash_combine_range(V.begin(), V.end())); 13000b57cec5SDimitry Andric } 13010b57cec5SDimitry Andric 13020b57cec5SDimitry Andric static bool isEqual(const SmallVector<const SCEV *, 4> &LHS, 13030b57cec5SDimitry Andric const SmallVector<const SCEV *, 4> &RHS) { 13040b57cec5SDimitry Andric return LHS == RHS; 13050b57cec5SDimitry Andric } 13060b57cec5SDimitry Andric }; 13070b57cec5SDimitry Andric 13080b57cec5SDimitry Andric /// This class holds the state that LSR keeps for each use in IVUsers, as well 13090b57cec5SDimitry Andric /// as uses invented by LSR itself. It includes information about what kinds of 13100b57cec5SDimitry Andric /// things can be folded into the user, information about the user itself, and 13110b57cec5SDimitry Andric /// information about how the use may be satisfied. TODO: Represent multiple 13120b57cec5SDimitry Andric /// users of the same expression in common? 13130b57cec5SDimitry Andric class LSRUse { 13140b57cec5SDimitry Andric DenseSet<SmallVector<const SCEV *, 4>, UniquifierDenseMapInfo> Uniquifier; 13150b57cec5SDimitry Andric 13160b57cec5SDimitry Andric public: 13170b57cec5SDimitry Andric /// An enum for a kind of use, indicating what types of scaled and immediate 13180b57cec5SDimitry Andric /// operands it might support. 13190b57cec5SDimitry Andric enum KindType { 13200b57cec5SDimitry Andric Basic, ///< A normal use, with no folding. 13210b57cec5SDimitry Andric Special, ///< A special case of basic, allowing -1 scales. 13220b57cec5SDimitry Andric Address, ///< An address use; folding according to TargetLowering 13230b57cec5SDimitry Andric ICmpZero ///< An equality icmp with both operands folded into one. 13240b57cec5SDimitry Andric // TODO: Add a generic icmp too? 13250b57cec5SDimitry Andric }; 13260b57cec5SDimitry Andric 13270b57cec5SDimitry Andric using SCEVUseKindPair = PointerIntPair<const SCEV *, 2, KindType>; 13280b57cec5SDimitry Andric 13290b57cec5SDimitry Andric KindType Kind; 13300b57cec5SDimitry Andric MemAccessTy AccessTy; 13310b57cec5SDimitry Andric 13320b57cec5SDimitry Andric /// The list of operands which are to be replaced. 13330b57cec5SDimitry Andric SmallVector<LSRFixup, 8> Fixups; 13340b57cec5SDimitry Andric 13350b57cec5SDimitry Andric /// Keep track of the min and max offsets of the fixups. 13360fca6ea1SDimitry Andric Immediate MinOffset = Immediate::getFixedMax(); 13370fca6ea1SDimitry Andric Immediate MaxOffset = Immediate::getFixedMin(); 13380b57cec5SDimitry Andric 13390b57cec5SDimitry Andric /// This records whether all of the fixups using this LSRUse are outside of 13400b57cec5SDimitry Andric /// the loop, in which case some special-case heuristics may be used. 13410b57cec5SDimitry Andric bool AllFixupsOutsideLoop = true; 13420b57cec5SDimitry Andric 13430b57cec5SDimitry Andric /// RigidFormula is set to true to guarantee that this use will be associated 13440b57cec5SDimitry Andric /// with a single formula--the one that initially matched. Some SCEV 13450b57cec5SDimitry Andric /// expressions cannot be expanded. This allows LSR to consider the registers 13460b57cec5SDimitry Andric /// used by those expressions without the need to expand them later after 13470b57cec5SDimitry Andric /// changing the formula. 13480b57cec5SDimitry Andric bool RigidFormula = false; 13490b57cec5SDimitry Andric 13500b57cec5SDimitry Andric /// This records the widest use type for any fixup using this 13510b57cec5SDimitry Andric /// LSRUse. FindUseWithSimilarFormula can't consider uses with different max 13520b57cec5SDimitry Andric /// fixup widths to be equivalent, because the narrower one may be relying on 13530b57cec5SDimitry Andric /// the implicit truncation to truncate away bogus bits. 13540b57cec5SDimitry Andric Type *WidestFixupType = nullptr; 13550b57cec5SDimitry Andric 13560b57cec5SDimitry Andric /// A list of ways to build a value that can satisfy this user. After the 13570b57cec5SDimitry Andric /// list is populated, one of these is selected heuristically and used to 13580b57cec5SDimitry Andric /// formulate a replacement for OperandValToReplace in UserInst. 13590b57cec5SDimitry Andric SmallVector<Formula, 12> Formulae; 13600b57cec5SDimitry Andric 13610b57cec5SDimitry Andric /// The set of register candidates used by all formulae in this LSRUse. 13620b57cec5SDimitry Andric SmallPtrSet<const SCEV *, 4> Regs; 13630b57cec5SDimitry Andric 13640b57cec5SDimitry Andric LSRUse(KindType K, MemAccessTy AT) : Kind(K), AccessTy(AT) {} 13650b57cec5SDimitry Andric 13660b57cec5SDimitry Andric LSRFixup &getNewFixup() { 13670b57cec5SDimitry Andric Fixups.push_back(LSRFixup()); 13680b57cec5SDimitry Andric return Fixups.back(); 13690b57cec5SDimitry Andric } 13700b57cec5SDimitry Andric 13710b57cec5SDimitry Andric void pushFixup(LSRFixup &f) { 13720b57cec5SDimitry Andric Fixups.push_back(f); 13730fca6ea1SDimitry Andric if (Immediate::isKnownGT(f.Offset, MaxOffset)) 13740b57cec5SDimitry Andric MaxOffset = f.Offset; 13750fca6ea1SDimitry Andric if (Immediate::isKnownLT(f.Offset, MinOffset)) 13760b57cec5SDimitry Andric MinOffset = f.Offset; 13770b57cec5SDimitry Andric } 13780b57cec5SDimitry Andric 13790b57cec5SDimitry Andric bool HasFormulaWithSameRegs(const Formula &F) const; 13800b57cec5SDimitry Andric float getNotSelectedProbability(const SCEV *Reg) const; 13810b57cec5SDimitry Andric bool InsertFormula(const Formula &F, const Loop &L); 13820b57cec5SDimitry Andric void DeleteFormula(Formula &F); 13830b57cec5SDimitry Andric void RecomputeRegs(size_t LUIdx, RegUseTracker &Reguses); 13840b57cec5SDimitry Andric 13850b57cec5SDimitry Andric void print(raw_ostream &OS) const; 13860b57cec5SDimitry Andric void dump() const; 13870b57cec5SDimitry Andric }; 13880b57cec5SDimitry Andric 13890b57cec5SDimitry Andric } // end anonymous namespace 13900b57cec5SDimitry Andric 13910b57cec5SDimitry Andric static bool isAMCompletelyFolded(const TargetTransformInfo &TTI, 13920b57cec5SDimitry Andric LSRUse::KindType Kind, MemAccessTy AccessTy, 13930fca6ea1SDimitry Andric GlobalValue *BaseGV, Immediate BaseOffset, 13940b57cec5SDimitry Andric bool HasBaseReg, int64_t Scale, 13950b57cec5SDimitry Andric Instruction *Fixup = nullptr); 13960b57cec5SDimitry Andric 13970b57cec5SDimitry Andric static unsigned getSetupCost(const SCEV *Reg, unsigned Depth) { 13980b57cec5SDimitry Andric if (isa<SCEVUnknown>(Reg) || isa<SCEVConstant>(Reg)) 13990b57cec5SDimitry Andric return 1; 14000b57cec5SDimitry Andric if (Depth == 0) 14010b57cec5SDimitry Andric return 0; 14020b57cec5SDimitry Andric if (const auto *S = dyn_cast<SCEVAddRecExpr>(Reg)) 14030b57cec5SDimitry Andric return getSetupCost(S->getStart(), Depth - 1); 1404e8d8bef9SDimitry Andric if (auto S = dyn_cast<SCEVIntegralCastExpr>(Reg)) 14050b57cec5SDimitry Andric return getSetupCost(S->getOperand(), Depth - 1); 14060b57cec5SDimitry Andric if (auto S = dyn_cast<SCEVNAryExpr>(Reg)) 1407bdd1243dSDimitry Andric return std::accumulate(S->operands().begin(), S->operands().end(), 0, 14080b57cec5SDimitry Andric [&](unsigned i, const SCEV *Reg) { 14090b57cec5SDimitry Andric return i + getSetupCost(Reg, Depth - 1); 14100b57cec5SDimitry Andric }); 14110b57cec5SDimitry Andric if (auto S = dyn_cast<SCEVUDivExpr>(Reg)) 14120b57cec5SDimitry Andric return getSetupCost(S->getLHS(), Depth - 1) + 14130b57cec5SDimitry Andric getSetupCost(S->getRHS(), Depth - 1); 14140b57cec5SDimitry Andric return 0; 14150b57cec5SDimitry Andric } 14160b57cec5SDimitry Andric 14170b57cec5SDimitry Andric /// Tally up interesting quantities from the given register. 14180b57cec5SDimitry Andric void Cost::RateRegister(const Formula &F, const SCEV *Reg, 14190b57cec5SDimitry Andric SmallPtrSetImpl<const SCEV *> &Regs) { 14200b57cec5SDimitry Andric if (const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(Reg)) { 14210b57cec5SDimitry Andric // If this is an addrec for another loop, it should be an invariant 14220b57cec5SDimitry Andric // with respect to L since L is the innermost loop (at least 14230b57cec5SDimitry Andric // for now LSR only handles innermost loops). 14240b57cec5SDimitry Andric if (AR->getLoop() != L) { 14250b57cec5SDimitry Andric // If the AddRec exists, consider it's register free and leave it alone. 1426fe6060f1SDimitry Andric if (isExistingPhi(AR, *SE) && AMK != TTI::AMK_PostIndexed) 14270b57cec5SDimitry Andric return; 14280b57cec5SDimitry Andric 14290b57cec5SDimitry Andric // It is bad to allow LSR for current loop to add induction variables 14300b57cec5SDimitry Andric // for its sibling loops. 14310b57cec5SDimitry Andric if (!AR->getLoop()->contains(L)) { 14320b57cec5SDimitry Andric Lose(); 14330b57cec5SDimitry Andric return; 14340b57cec5SDimitry Andric } 14350b57cec5SDimitry Andric 14360b57cec5SDimitry Andric // Otherwise, it will be an invariant with respect to Loop L. 14370b57cec5SDimitry Andric ++C.NumRegs; 14380b57cec5SDimitry Andric return; 14390b57cec5SDimitry Andric } 14400b57cec5SDimitry Andric 14410b57cec5SDimitry Andric unsigned LoopCost = 1; 14420b57cec5SDimitry Andric if (TTI->isIndexedLoadLegal(TTI->MIM_PostInc, AR->getType()) || 14430b57cec5SDimitry Andric TTI->isIndexedStoreLegal(TTI->MIM_PostInc, AR->getType())) { 14440b57cec5SDimitry Andric 14450b57cec5SDimitry Andric // If the step size matches the base offset, we could use pre-indexed 14460b57cec5SDimitry Andric // addressing. 14470fca6ea1SDimitry Andric if (AMK == TTI::AMK_PreIndexed && F.BaseOffset.isFixed()) { 14480b57cec5SDimitry Andric if (auto *Step = dyn_cast<SCEVConstant>(AR->getStepRecurrence(*SE))) 14490fca6ea1SDimitry Andric if (Step->getAPInt() == F.BaseOffset.getFixedValue()) 14500b57cec5SDimitry Andric LoopCost = 0; 1451fe6060f1SDimitry Andric } else if (AMK == TTI::AMK_PostIndexed) { 14520b57cec5SDimitry Andric const SCEV *LoopStep = AR->getStepRecurrence(*SE); 14530b57cec5SDimitry Andric if (isa<SCEVConstant>(LoopStep)) { 14540b57cec5SDimitry Andric const SCEV *LoopStart = AR->getStart(); 14550b57cec5SDimitry Andric if (!isa<SCEVConstant>(LoopStart) && 14560b57cec5SDimitry Andric SE->isLoopInvariant(LoopStart, L)) 14570b57cec5SDimitry Andric LoopCost = 0; 14580b57cec5SDimitry Andric } 14590b57cec5SDimitry Andric } 14600b57cec5SDimitry Andric } 14610b57cec5SDimitry Andric C.AddRecCost += LoopCost; 14620b57cec5SDimitry Andric 14630b57cec5SDimitry Andric // Add the step value register, if it needs one. 14640b57cec5SDimitry Andric // TODO: The non-affine case isn't precisely modeled here. 14650b57cec5SDimitry Andric if (!AR->isAffine() || !isa<SCEVConstant>(AR->getOperand(1))) { 14660b57cec5SDimitry Andric if (!Regs.count(AR->getOperand(1))) { 14670b57cec5SDimitry Andric RateRegister(F, AR->getOperand(1), Regs); 14680b57cec5SDimitry Andric if (isLoser()) 14690b57cec5SDimitry Andric return; 14700b57cec5SDimitry Andric } 14710b57cec5SDimitry Andric } 14720b57cec5SDimitry Andric } 14730b57cec5SDimitry Andric ++C.NumRegs; 14740b57cec5SDimitry Andric 14750b57cec5SDimitry Andric // Rough heuristic; favor registers which don't require extra setup 14760b57cec5SDimitry Andric // instructions in the preheader. 14770b57cec5SDimitry Andric C.SetupCost += getSetupCost(Reg, SetupCostDepthLimit); 14780b57cec5SDimitry Andric // Ensure we don't, even with the recusion limit, produce invalid costs. 14790b57cec5SDimitry Andric C.SetupCost = std::min<unsigned>(C.SetupCost, 1 << 16); 14800b57cec5SDimitry Andric 14810b57cec5SDimitry Andric C.NumIVMuls += isa<SCEVMulExpr>(Reg) && 14820b57cec5SDimitry Andric SE->hasComputableLoopEvolution(Reg, L); 14830b57cec5SDimitry Andric } 14840b57cec5SDimitry Andric 14850b57cec5SDimitry Andric /// Record this register in the set. If we haven't seen it before, rate 14860b57cec5SDimitry Andric /// it. Optional LoserRegs provides a way to declare any formula that refers to 14870b57cec5SDimitry Andric /// one of those regs an instant loser. 14880b57cec5SDimitry Andric void Cost::RatePrimaryRegister(const Formula &F, const SCEV *Reg, 14890b57cec5SDimitry Andric SmallPtrSetImpl<const SCEV *> &Regs, 14900b57cec5SDimitry Andric SmallPtrSetImpl<const SCEV *> *LoserRegs) { 14910b57cec5SDimitry Andric if (LoserRegs && LoserRegs->count(Reg)) { 14920b57cec5SDimitry Andric Lose(); 14930b57cec5SDimitry Andric return; 14940b57cec5SDimitry Andric } 14950b57cec5SDimitry Andric if (Regs.insert(Reg).second) { 14960b57cec5SDimitry Andric RateRegister(F, Reg, Regs); 14970b57cec5SDimitry Andric if (LoserRegs && isLoser()) 14980b57cec5SDimitry Andric LoserRegs->insert(Reg); 14990b57cec5SDimitry Andric } 15000b57cec5SDimitry Andric } 15010b57cec5SDimitry Andric 15020b57cec5SDimitry Andric void Cost::RateFormula(const Formula &F, 15030b57cec5SDimitry Andric SmallPtrSetImpl<const SCEV *> &Regs, 15040b57cec5SDimitry Andric const DenseSet<const SCEV *> &VisitedRegs, 15050b57cec5SDimitry Andric const LSRUse &LU, 15060b57cec5SDimitry Andric SmallPtrSetImpl<const SCEV *> *LoserRegs) { 150781ad6265SDimitry Andric if (isLoser()) 150881ad6265SDimitry Andric return; 15090b57cec5SDimitry Andric assert(F.isCanonical(*L) && "Cost is accurate only for canonical formula"); 15100b57cec5SDimitry Andric // Tally up the registers. 15110b57cec5SDimitry Andric unsigned PrevAddRecCost = C.AddRecCost; 15120b57cec5SDimitry Andric unsigned PrevNumRegs = C.NumRegs; 15130b57cec5SDimitry Andric unsigned PrevNumBaseAdds = C.NumBaseAdds; 15140b57cec5SDimitry Andric if (const SCEV *ScaledReg = F.ScaledReg) { 15150b57cec5SDimitry Andric if (VisitedRegs.count(ScaledReg)) { 15160b57cec5SDimitry Andric Lose(); 15170b57cec5SDimitry Andric return; 15180b57cec5SDimitry Andric } 15190b57cec5SDimitry Andric RatePrimaryRegister(F, ScaledReg, Regs, LoserRegs); 15200b57cec5SDimitry Andric if (isLoser()) 15210b57cec5SDimitry Andric return; 15220b57cec5SDimitry Andric } 15230b57cec5SDimitry Andric for (const SCEV *BaseReg : F.BaseRegs) { 15240b57cec5SDimitry Andric if (VisitedRegs.count(BaseReg)) { 15250b57cec5SDimitry Andric Lose(); 15260b57cec5SDimitry Andric return; 15270b57cec5SDimitry Andric } 15280b57cec5SDimitry Andric RatePrimaryRegister(F, BaseReg, Regs, LoserRegs); 15290b57cec5SDimitry Andric if (isLoser()) 15300b57cec5SDimitry Andric return; 15310b57cec5SDimitry Andric } 15320b57cec5SDimitry Andric 15330b57cec5SDimitry Andric // Determine how many (unfolded) adds we'll need inside the loop. 15340b57cec5SDimitry Andric size_t NumBaseParts = F.getNumRegs(); 15350b57cec5SDimitry Andric if (NumBaseParts > 1) 15360b57cec5SDimitry Andric // Do not count the base and a possible second register if the target 15370b57cec5SDimitry Andric // allows to fold 2 registers. 15380b57cec5SDimitry Andric C.NumBaseAdds += 15390b57cec5SDimitry Andric NumBaseParts - (1 + (F.Scale && isAMCompletelyFolded(*TTI, LU, F))); 15400fca6ea1SDimitry Andric C.NumBaseAdds += (F.UnfoldedOffset.isNonZero()); 15410b57cec5SDimitry Andric 15420b57cec5SDimitry Andric // Accumulate non-free scaling amounts. 1543fe6060f1SDimitry Andric C.ScaleCost += *getScalingFactorCost(*TTI, LU, F, *L).getValue(); 15440b57cec5SDimitry Andric 15450b57cec5SDimitry Andric // Tally up the non-zero immediates. 15460b57cec5SDimitry Andric for (const LSRFixup &Fixup : LU.Fixups) { 15470fca6ea1SDimitry Andric if (Fixup.Offset.isCompatibleImmediate(F.BaseOffset)) { 15480fca6ea1SDimitry Andric Immediate Offset = Fixup.Offset.addUnsigned(F.BaseOffset); 15490b57cec5SDimitry Andric if (F.BaseGV) 15500b57cec5SDimitry Andric C.ImmCost += 64; // Handle symbolic values conservatively. 15510b57cec5SDimitry Andric // TODO: This should probably be the pointer size. 15520fca6ea1SDimitry Andric else if (Offset.isNonZero()) 15530fca6ea1SDimitry Andric C.ImmCost += 15540fca6ea1SDimitry Andric APInt(64, Offset.getKnownMinValue(), true).getSignificantBits(); 15550b57cec5SDimitry Andric 15560b57cec5SDimitry Andric // Check with target if this offset with this instruction is 15570b57cec5SDimitry Andric // specifically not supported. 15580fca6ea1SDimitry Andric if (LU.Kind == LSRUse::Address && Offset.isNonZero() && 15590b57cec5SDimitry Andric !isAMCompletelyFolded(*TTI, LSRUse::Address, LU.AccessTy, F.BaseGV, 15600b57cec5SDimitry Andric Offset, F.HasBaseReg, F.Scale, Fixup.UserInst)) 15610b57cec5SDimitry Andric C.NumBaseAdds++; 15620fca6ea1SDimitry Andric } else { 15630fca6ea1SDimitry Andric // Incompatible immediate type, increase cost to avoid using 15640fca6ea1SDimitry Andric C.ImmCost += 2048; 15650fca6ea1SDimitry Andric } 15660b57cec5SDimitry Andric } 15670b57cec5SDimitry Andric 15680b57cec5SDimitry Andric // If we don't count instruction cost exit here. 15690b57cec5SDimitry Andric if (!InsnsCost) { 15700b57cec5SDimitry Andric assert(isValid() && "invalid cost"); 15710b57cec5SDimitry Andric return; 15720b57cec5SDimitry Andric } 15730b57cec5SDimitry Andric 15740b57cec5SDimitry Andric // Treat every new register that exceeds TTI.getNumberOfRegisters() - 1 as 15750b57cec5SDimitry Andric // additional instruction (at least fill). 15768bcb0991SDimitry Andric // TODO: Need distinguish register class? 15778bcb0991SDimitry Andric unsigned TTIRegNum = TTI->getNumberOfRegisters( 15788bcb0991SDimitry Andric TTI->getRegisterClassForType(false, F.getType())) - 1; 15790b57cec5SDimitry Andric if (C.NumRegs > TTIRegNum) { 15800b57cec5SDimitry Andric // Cost already exceeded TTIRegNum, then only newly added register can add 15810b57cec5SDimitry Andric // new instructions. 15820b57cec5SDimitry Andric if (PrevNumRegs > TTIRegNum) 15830b57cec5SDimitry Andric C.Insns += (C.NumRegs - PrevNumRegs); 15840b57cec5SDimitry Andric else 15850b57cec5SDimitry Andric C.Insns += (C.NumRegs - TTIRegNum); 15860b57cec5SDimitry Andric } 15870b57cec5SDimitry Andric 15880b57cec5SDimitry Andric // If ICmpZero formula ends with not 0, it could not be replaced by 15890b57cec5SDimitry Andric // just add or sub. We'll need to compare final result of AddRec. 15900b57cec5SDimitry Andric // That means we'll need an additional instruction. But if the target can 15910b57cec5SDimitry Andric // macro-fuse a compare with a branch, don't count this extra instruction. 15920b57cec5SDimitry Andric // For -10 + {0, +, 1}: 15930b57cec5SDimitry Andric // i = i + 1; 15940b57cec5SDimitry Andric // cmp i, 10 15950b57cec5SDimitry Andric // 15960b57cec5SDimitry Andric // For {-10, +, 1}: 15970b57cec5SDimitry Andric // i = i + 1; 15980b57cec5SDimitry Andric if (LU.Kind == LSRUse::ICmpZero && !F.hasZeroEnd() && 15990b57cec5SDimitry Andric !TTI->canMacroFuseCmp()) 16000b57cec5SDimitry Andric C.Insns++; 16010b57cec5SDimitry Andric // Each new AddRec adds 1 instruction to calculation. 16020b57cec5SDimitry Andric C.Insns += (C.AddRecCost - PrevAddRecCost); 16030b57cec5SDimitry Andric 16040b57cec5SDimitry Andric // BaseAdds adds instructions for unfolded registers. 16050b57cec5SDimitry Andric if (LU.Kind != LSRUse::ICmpZero) 16060b57cec5SDimitry Andric C.Insns += C.NumBaseAdds - PrevNumBaseAdds; 16070b57cec5SDimitry Andric assert(isValid() && "invalid cost"); 16080b57cec5SDimitry Andric } 16090b57cec5SDimitry Andric 16100b57cec5SDimitry Andric /// Set this cost to a losing value. 16110b57cec5SDimitry Andric void Cost::Lose() { 16120b57cec5SDimitry Andric C.Insns = std::numeric_limits<unsigned>::max(); 16130b57cec5SDimitry Andric C.NumRegs = std::numeric_limits<unsigned>::max(); 16140b57cec5SDimitry Andric C.AddRecCost = std::numeric_limits<unsigned>::max(); 16150b57cec5SDimitry Andric C.NumIVMuls = std::numeric_limits<unsigned>::max(); 16160b57cec5SDimitry Andric C.NumBaseAdds = std::numeric_limits<unsigned>::max(); 16170b57cec5SDimitry Andric C.ImmCost = std::numeric_limits<unsigned>::max(); 16180b57cec5SDimitry Andric C.SetupCost = std::numeric_limits<unsigned>::max(); 16190b57cec5SDimitry Andric C.ScaleCost = std::numeric_limits<unsigned>::max(); 16200b57cec5SDimitry Andric } 16210b57cec5SDimitry Andric 16220b57cec5SDimitry Andric /// Choose the lower cost. 1623bdd1243dSDimitry Andric bool Cost::isLess(const Cost &Other) const { 16240b57cec5SDimitry Andric if (InsnsCost.getNumOccurrences() > 0 && InsnsCost && 16250b57cec5SDimitry Andric C.Insns != Other.C.Insns) 16260b57cec5SDimitry Andric return C.Insns < Other.C.Insns; 16270b57cec5SDimitry Andric return TTI->isLSRCostLess(C, Other.C); 16280b57cec5SDimitry Andric } 16290b57cec5SDimitry Andric 16300b57cec5SDimitry Andric #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) 16310b57cec5SDimitry Andric void Cost::print(raw_ostream &OS) const { 16320b57cec5SDimitry Andric if (InsnsCost) 16330b57cec5SDimitry Andric OS << C.Insns << " instruction" << (C.Insns == 1 ? " " : "s "); 16340b57cec5SDimitry Andric OS << C.NumRegs << " reg" << (C.NumRegs == 1 ? "" : "s"); 16350b57cec5SDimitry Andric if (C.AddRecCost != 0) 16360b57cec5SDimitry Andric OS << ", with addrec cost " << C.AddRecCost; 16370b57cec5SDimitry Andric if (C.NumIVMuls != 0) 16380b57cec5SDimitry Andric OS << ", plus " << C.NumIVMuls << " IV mul" 16390b57cec5SDimitry Andric << (C.NumIVMuls == 1 ? "" : "s"); 16400b57cec5SDimitry Andric if (C.NumBaseAdds != 0) 16410b57cec5SDimitry Andric OS << ", plus " << C.NumBaseAdds << " base add" 16420b57cec5SDimitry Andric << (C.NumBaseAdds == 1 ? "" : "s"); 16430b57cec5SDimitry Andric if (C.ScaleCost != 0) 16440b57cec5SDimitry Andric OS << ", plus " << C.ScaleCost << " scale cost"; 16450b57cec5SDimitry Andric if (C.ImmCost != 0) 16460b57cec5SDimitry Andric OS << ", plus " << C.ImmCost << " imm cost"; 16470b57cec5SDimitry Andric if (C.SetupCost != 0) 16480b57cec5SDimitry Andric OS << ", plus " << C.SetupCost << " setup cost"; 16490b57cec5SDimitry Andric } 16500b57cec5SDimitry Andric 16510b57cec5SDimitry Andric LLVM_DUMP_METHOD void Cost::dump() const { 16520b57cec5SDimitry Andric print(errs()); errs() << '\n'; 16530b57cec5SDimitry Andric } 16540b57cec5SDimitry Andric #endif 16550b57cec5SDimitry Andric 16560b57cec5SDimitry Andric /// Test whether this fixup always uses its value outside of the given loop. 16570b57cec5SDimitry Andric bool LSRFixup::isUseFullyOutsideLoop(const Loop *L) const { 16580b57cec5SDimitry Andric // PHI nodes use their value in their incoming blocks. 16590b57cec5SDimitry Andric if (const PHINode *PN = dyn_cast<PHINode>(UserInst)) { 16600b57cec5SDimitry Andric for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i) 16610b57cec5SDimitry Andric if (PN->getIncomingValue(i) == OperandValToReplace && 16620b57cec5SDimitry Andric L->contains(PN->getIncomingBlock(i))) 16630b57cec5SDimitry Andric return false; 16640b57cec5SDimitry Andric return true; 16650b57cec5SDimitry Andric } 16660b57cec5SDimitry Andric 16670b57cec5SDimitry Andric return !L->contains(UserInst); 16680b57cec5SDimitry Andric } 16690b57cec5SDimitry Andric 16700b57cec5SDimitry Andric #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) 16710b57cec5SDimitry Andric void LSRFixup::print(raw_ostream &OS) const { 16720b57cec5SDimitry Andric OS << "UserInst="; 16730b57cec5SDimitry Andric // Store is common and interesting enough to be worth special-casing. 16740b57cec5SDimitry Andric if (StoreInst *Store = dyn_cast<StoreInst>(UserInst)) { 16750b57cec5SDimitry Andric OS << "store "; 16760b57cec5SDimitry Andric Store->getOperand(0)->printAsOperand(OS, /*PrintType=*/false); 16770b57cec5SDimitry Andric } else if (UserInst->getType()->isVoidTy()) 16780b57cec5SDimitry Andric OS << UserInst->getOpcodeName(); 16790b57cec5SDimitry Andric else 16800b57cec5SDimitry Andric UserInst->printAsOperand(OS, /*PrintType=*/false); 16810b57cec5SDimitry Andric 16820b57cec5SDimitry Andric OS << ", OperandValToReplace="; 16830b57cec5SDimitry Andric OperandValToReplace->printAsOperand(OS, /*PrintType=*/false); 16840b57cec5SDimitry Andric 16850b57cec5SDimitry Andric for (const Loop *PIL : PostIncLoops) { 16860b57cec5SDimitry Andric OS << ", PostIncLoop="; 16870b57cec5SDimitry Andric PIL->getHeader()->printAsOperand(OS, /*PrintType=*/false); 16880b57cec5SDimitry Andric } 16890b57cec5SDimitry Andric 16900fca6ea1SDimitry Andric if (Offset.isNonZero()) 16910b57cec5SDimitry Andric OS << ", Offset=" << Offset; 16920b57cec5SDimitry Andric } 16930b57cec5SDimitry Andric 16940b57cec5SDimitry Andric LLVM_DUMP_METHOD void LSRFixup::dump() const { 16950b57cec5SDimitry Andric print(errs()); errs() << '\n'; 16960b57cec5SDimitry Andric } 16970b57cec5SDimitry Andric #endif 16980b57cec5SDimitry Andric 16990b57cec5SDimitry Andric /// Test whether this use as a formula which has the same registers as the given 17000b57cec5SDimitry Andric /// formula. 17010b57cec5SDimitry Andric bool LSRUse::HasFormulaWithSameRegs(const Formula &F) const { 17020b57cec5SDimitry Andric SmallVector<const SCEV *, 4> Key = F.BaseRegs; 17030b57cec5SDimitry Andric if (F.ScaledReg) Key.push_back(F.ScaledReg); 17040b57cec5SDimitry Andric // Unstable sort by host order ok, because this is only used for uniquifying. 17050b57cec5SDimitry Andric llvm::sort(Key); 17060b57cec5SDimitry Andric return Uniquifier.count(Key); 17070b57cec5SDimitry Andric } 17080b57cec5SDimitry Andric 17090b57cec5SDimitry Andric /// The function returns a probability of selecting formula without Reg. 17100b57cec5SDimitry Andric float LSRUse::getNotSelectedProbability(const SCEV *Reg) const { 17110b57cec5SDimitry Andric unsigned FNum = 0; 17120b57cec5SDimitry Andric for (const Formula &F : Formulae) 17130b57cec5SDimitry Andric if (F.referencesReg(Reg)) 17140b57cec5SDimitry Andric FNum++; 17150b57cec5SDimitry Andric return ((float)(Formulae.size() - FNum)) / Formulae.size(); 17160b57cec5SDimitry Andric } 17170b57cec5SDimitry Andric 17180b57cec5SDimitry Andric /// If the given formula has not yet been inserted, add it to the list, and 17190b57cec5SDimitry Andric /// return true. Return false otherwise. The formula must be in canonical form. 17200b57cec5SDimitry Andric bool LSRUse::InsertFormula(const Formula &F, const Loop &L) { 17210b57cec5SDimitry Andric assert(F.isCanonical(L) && "Invalid canonical representation"); 17220b57cec5SDimitry Andric 17230b57cec5SDimitry Andric if (!Formulae.empty() && RigidFormula) 17240b57cec5SDimitry Andric return false; 17250b57cec5SDimitry Andric 17260b57cec5SDimitry Andric SmallVector<const SCEV *, 4> Key = F.BaseRegs; 17270b57cec5SDimitry Andric if (F.ScaledReg) Key.push_back(F.ScaledReg); 17280b57cec5SDimitry Andric // Unstable sort by host order ok, because this is only used for uniquifying. 17290b57cec5SDimitry Andric llvm::sort(Key); 17300b57cec5SDimitry Andric 17310b57cec5SDimitry Andric if (!Uniquifier.insert(Key).second) 17320b57cec5SDimitry Andric return false; 17330b57cec5SDimitry Andric 17340b57cec5SDimitry Andric // Using a register to hold the value of 0 is not profitable. 17350b57cec5SDimitry Andric assert((!F.ScaledReg || !F.ScaledReg->isZero()) && 17360b57cec5SDimitry Andric "Zero allocated in a scaled register!"); 17370b57cec5SDimitry Andric #ifndef NDEBUG 17380b57cec5SDimitry Andric for (const SCEV *BaseReg : F.BaseRegs) 17390b57cec5SDimitry Andric assert(!BaseReg->isZero() && "Zero allocated in a base register!"); 17400b57cec5SDimitry Andric #endif 17410b57cec5SDimitry Andric 17420b57cec5SDimitry Andric // Add the formula to the list. 17430b57cec5SDimitry Andric Formulae.push_back(F); 17440b57cec5SDimitry Andric 17450b57cec5SDimitry Andric // Record registers now being used by this use. 17460b57cec5SDimitry Andric Regs.insert(F.BaseRegs.begin(), F.BaseRegs.end()); 17470b57cec5SDimitry Andric if (F.ScaledReg) 17480b57cec5SDimitry Andric Regs.insert(F.ScaledReg); 17490b57cec5SDimitry Andric 17500b57cec5SDimitry Andric return true; 17510b57cec5SDimitry Andric } 17520b57cec5SDimitry Andric 17530b57cec5SDimitry Andric /// Remove the given formula from this use's list. 17540b57cec5SDimitry Andric void LSRUse::DeleteFormula(Formula &F) { 17550b57cec5SDimitry Andric if (&F != &Formulae.back()) 17560b57cec5SDimitry Andric std::swap(F, Formulae.back()); 17570b57cec5SDimitry Andric Formulae.pop_back(); 17580b57cec5SDimitry Andric } 17590b57cec5SDimitry Andric 17600b57cec5SDimitry Andric /// Recompute the Regs field, and update RegUses. 17610b57cec5SDimitry Andric void LSRUse::RecomputeRegs(size_t LUIdx, RegUseTracker &RegUses) { 17620b57cec5SDimitry Andric // Now that we've filtered out some formulae, recompute the Regs set. 17630b57cec5SDimitry Andric SmallPtrSet<const SCEV *, 4> OldRegs = std::move(Regs); 17640b57cec5SDimitry Andric Regs.clear(); 17650b57cec5SDimitry Andric for (const Formula &F : Formulae) { 17660b57cec5SDimitry Andric if (F.ScaledReg) Regs.insert(F.ScaledReg); 17670b57cec5SDimitry Andric Regs.insert(F.BaseRegs.begin(), F.BaseRegs.end()); 17680b57cec5SDimitry Andric } 17690b57cec5SDimitry Andric 17700b57cec5SDimitry Andric // Update the RegTracker. 17710b57cec5SDimitry Andric for (const SCEV *S : OldRegs) 17720b57cec5SDimitry Andric if (!Regs.count(S)) 17730b57cec5SDimitry Andric RegUses.dropRegister(S, LUIdx); 17740b57cec5SDimitry Andric } 17750b57cec5SDimitry Andric 17760b57cec5SDimitry Andric #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) 17770b57cec5SDimitry Andric void LSRUse::print(raw_ostream &OS) const { 17780b57cec5SDimitry Andric OS << "LSR Use: Kind="; 17790b57cec5SDimitry Andric switch (Kind) { 17800b57cec5SDimitry Andric case Basic: OS << "Basic"; break; 17810b57cec5SDimitry Andric case Special: OS << "Special"; break; 17820b57cec5SDimitry Andric case ICmpZero: OS << "ICmpZero"; break; 17830b57cec5SDimitry Andric case Address: 17840b57cec5SDimitry Andric OS << "Address of "; 17850b57cec5SDimitry Andric if (AccessTy.MemTy->isPointerTy()) 17860b57cec5SDimitry Andric OS << "pointer"; // the full pointer type could be really verbose 17870b57cec5SDimitry Andric else { 17880b57cec5SDimitry Andric OS << *AccessTy.MemTy; 17890b57cec5SDimitry Andric } 17900b57cec5SDimitry Andric 17910b57cec5SDimitry Andric OS << " in addrspace(" << AccessTy.AddrSpace << ')'; 17920b57cec5SDimitry Andric } 17930b57cec5SDimitry Andric 17940b57cec5SDimitry Andric OS << ", Offsets={"; 17950b57cec5SDimitry Andric bool NeedComma = false; 17960b57cec5SDimitry Andric for (const LSRFixup &Fixup : Fixups) { 17970b57cec5SDimitry Andric if (NeedComma) OS << ','; 17980b57cec5SDimitry Andric OS << Fixup.Offset; 17990b57cec5SDimitry Andric NeedComma = true; 18000b57cec5SDimitry Andric } 18010b57cec5SDimitry Andric OS << '}'; 18020b57cec5SDimitry Andric 18030b57cec5SDimitry Andric if (AllFixupsOutsideLoop) 18040b57cec5SDimitry Andric OS << ", all-fixups-outside-loop"; 18050b57cec5SDimitry Andric 18060b57cec5SDimitry Andric if (WidestFixupType) 18070b57cec5SDimitry Andric OS << ", widest fixup type: " << *WidestFixupType; 18080b57cec5SDimitry Andric } 18090b57cec5SDimitry Andric 18100b57cec5SDimitry Andric LLVM_DUMP_METHOD void LSRUse::dump() const { 18110b57cec5SDimitry Andric print(errs()); errs() << '\n'; 18120b57cec5SDimitry Andric } 18130b57cec5SDimitry Andric #endif 18140b57cec5SDimitry Andric 18150b57cec5SDimitry Andric static bool isAMCompletelyFolded(const TargetTransformInfo &TTI, 18160b57cec5SDimitry Andric LSRUse::KindType Kind, MemAccessTy AccessTy, 18170fca6ea1SDimitry Andric GlobalValue *BaseGV, Immediate BaseOffset, 18180b57cec5SDimitry Andric bool HasBaseReg, int64_t Scale, 18190b57cec5SDimitry Andric Instruction *Fixup /* = nullptr */) { 18200b57cec5SDimitry Andric switch (Kind) { 18210fca6ea1SDimitry Andric case LSRUse::Address: { 18220fca6ea1SDimitry Andric int64_t FixedOffset = 18230fca6ea1SDimitry Andric BaseOffset.isScalable() ? 0 : BaseOffset.getFixedValue(); 18240fca6ea1SDimitry Andric int64_t ScalableOffset = 18250fca6ea1SDimitry Andric BaseOffset.isScalable() ? BaseOffset.getKnownMinValue() : 0; 18260fca6ea1SDimitry Andric return TTI.isLegalAddressingMode(AccessTy.MemTy, BaseGV, FixedOffset, 18270fca6ea1SDimitry Andric HasBaseReg, Scale, AccessTy.AddrSpace, 18280fca6ea1SDimitry Andric Fixup, ScalableOffset); 18290fca6ea1SDimitry Andric } 18300b57cec5SDimitry Andric case LSRUse::ICmpZero: 18310b57cec5SDimitry Andric // There's not even a target hook for querying whether it would be legal to 18320b57cec5SDimitry Andric // fold a GV into an ICmp. 18330b57cec5SDimitry Andric if (BaseGV) 18340b57cec5SDimitry Andric return false; 18350b57cec5SDimitry Andric 18360b57cec5SDimitry Andric // ICmp only has two operands; don't allow more than two non-trivial parts. 18370fca6ea1SDimitry Andric if (Scale != 0 && HasBaseReg && BaseOffset.isNonZero()) 18380b57cec5SDimitry Andric return false; 18390b57cec5SDimitry Andric 18400b57cec5SDimitry Andric // ICmp only supports no scale or a -1 scale, as we can "fold" a -1 scale by 18410b57cec5SDimitry Andric // putting the scaled register in the other operand of the icmp. 18420b57cec5SDimitry Andric if (Scale != 0 && Scale != -1) 18430b57cec5SDimitry Andric return false; 18440b57cec5SDimitry Andric 18450b57cec5SDimitry Andric // If we have low-level target information, ask the target if it can fold an 18460b57cec5SDimitry Andric // integer immediate on an icmp. 18470fca6ea1SDimitry Andric if (BaseOffset.isNonZero()) { 18480fca6ea1SDimitry Andric // We don't have an interface to query whether the target supports 18490fca6ea1SDimitry Andric // icmpzero against scalable quantities yet. 18500fca6ea1SDimitry Andric if (BaseOffset.isScalable()) 18510fca6ea1SDimitry Andric return false; 18520fca6ea1SDimitry Andric 18530b57cec5SDimitry Andric // We have one of: 18540b57cec5SDimitry Andric // ICmpZero BaseReg + BaseOffset => ICmp BaseReg, -BaseOffset 18550b57cec5SDimitry Andric // ICmpZero -1*ScaleReg + BaseOffset => ICmp ScaleReg, BaseOffset 18560b57cec5SDimitry Andric // Offs is the ICmp immediate. 18570b57cec5SDimitry Andric if (Scale == 0) 18580b57cec5SDimitry Andric // The cast does the right thing with 18590b57cec5SDimitry Andric // std::numeric_limits<int64_t>::min(). 18600fca6ea1SDimitry Andric BaseOffset = BaseOffset.getFixed(-(uint64_t)BaseOffset.getFixedValue()); 18610fca6ea1SDimitry Andric return TTI.isLegalICmpImmediate(BaseOffset.getFixedValue()); 18620b57cec5SDimitry Andric } 18630b57cec5SDimitry Andric 18640b57cec5SDimitry Andric // ICmpZero BaseReg + -1*ScaleReg => ICmp BaseReg, ScaleReg 18650b57cec5SDimitry Andric return true; 18660b57cec5SDimitry Andric 18670b57cec5SDimitry Andric case LSRUse::Basic: 18680b57cec5SDimitry Andric // Only handle single-register values. 18690fca6ea1SDimitry Andric return !BaseGV && Scale == 0 && BaseOffset.isZero(); 18700b57cec5SDimitry Andric 18710b57cec5SDimitry Andric case LSRUse::Special: 18720b57cec5SDimitry Andric // Special case Basic to handle -1 scales. 18730fca6ea1SDimitry Andric return !BaseGV && (Scale == 0 || Scale == -1) && BaseOffset.isZero(); 18740b57cec5SDimitry Andric } 18750b57cec5SDimitry Andric 18760b57cec5SDimitry Andric llvm_unreachable("Invalid LSRUse Kind!"); 18770b57cec5SDimitry Andric } 18780b57cec5SDimitry Andric 18790b57cec5SDimitry Andric static bool isAMCompletelyFolded(const TargetTransformInfo &TTI, 18800fca6ea1SDimitry Andric Immediate MinOffset, Immediate MaxOffset, 18810b57cec5SDimitry Andric LSRUse::KindType Kind, MemAccessTy AccessTy, 18820fca6ea1SDimitry Andric GlobalValue *BaseGV, Immediate BaseOffset, 18830b57cec5SDimitry Andric bool HasBaseReg, int64_t Scale) { 18840fca6ea1SDimitry Andric if (BaseOffset.isNonZero() && 18850fca6ea1SDimitry Andric (BaseOffset.isScalable() != MinOffset.isScalable() || 18860fca6ea1SDimitry Andric BaseOffset.isScalable() != MaxOffset.isScalable())) 18870fca6ea1SDimitry Andric return false; 18880b57cec5SDimitry Andric // Check for overflow. 18890fca6ea1SDimitry Andric int64_t Base = BaseOffset.getKnownMinValue(); 18900fca6ea1SDimitry Andric int64_t Min = MinOffset.getKnownMinValue(); 18910fca6ea1SDimitry Andric int64_t Max = MaxOffset.getKnownMinValue(); 18920fca6ea1SDimitry Andric if (((int64_t)((uint64_t)Base + Min) > Base) != (Min > 0)) 18930b57cec5SDimitry Andric return false; 18940fca6ea1SDimitry Andric MinOffset = Immediate::get((uint64_t)Base + Min, MinOffset.isScalable()); 18950fca6ea1SDimitry Andric if (((int64_t)((uint64_t)Base + Max) > Base) != (Max > 0)) 18960b57cec5SDimitry Andric return false; 18970fca6ea1SDimitry Andric MaxOffset = Immediate::get((uint64_t)Base + Max, MaxOffset.isScalable()); 18980b57cec5SDimitry Andric 18990b57cec5SDimitry Andric return isAMCompletelyFolded(TTI, Kind, AccessTy, BaseGV, MinOffset, 19000b57cec5SDimitry Andric HasBaseReg, Scale) && 19010b57cec5SDimitry Andric isAMCompletelyFolded(TTI, Kind, AccessTy, BaseGV, MaxOffset, 19020b57cec5SDimitry Andric HasBaseReg, Scale); 19030b57cec5SDimitry Andric } 19040b57cec5SDimitry Andric 19050b57cec5SDimitry Andric static bool isAMCompletelyFolded(const TargetTransformInfo &TTI, 19060fca6ea1SDimitry Andric Immediate MinOffset, Immediate MaxOffset, 19070b57cec5SDimitry Andric LSRUse::KindType Kind, MemAccessTy AccessTy, 19080b57cec5SDimitry Andric const Formula &F, const Loop &L) { 19090b57cec5SDimitry Andric // For the purpose of isAMCompletelyFolded either having a canonical formula 19100b57cec5SDimitry Andric // or a scale not equal to zero is correct. 19110b57cec5SDimitry Andric // Problems may arise from non canonical formulae having a scale == 0. 19120b57cec5SDimitry Andric // Strictly speaking it would best to just rely on canonical formulae. 19130b57cec5SDimitry Andric // However, when we generate the scaled formulae, we first check that the 19140b57cec5SDimitry Andric // scaling factor is profitable before computing the actual ScaledReg for 19150b57cec5SDimitry Andric // compile time sake. 19160b57cec5SDimitry Andric assert((F.isCanonical(L) || F.Scale != 0)); 19170b57cec5SDimitry Andric return isAMCompletelyFolded(TTI, MinOffset, MaxOffset, Kind, AccessTy, 19180b57cec5SDimitry Andric F.BaseGV, F.BaseOffset, F.HasBaseReg, F.Scale); 19190b57cec5SDimitry Andric } 19200b57cec5SDimitry Andric 19210b57cec5SDimitry Andric /// Test whether we know how to expand the current formula. 19220fca6ea1SDimitry Andric static bool isLegalUse(const TargetTransformInfo &TTI, Immediate MinOffset, 19230fca6ea1SDimitry Andric Immediate MaxOffset, LSRUse::KindType Kind, 19240b57cec5SDimitry Andric MemAccessTy AccessTy, GlobalValue *BaseGV, 19250fca6ea1SDimitry Andric Immediate BaseOffset, bool HasBaseReg, int64_t Scale) { 19260b57cec5SDimitry Andric // We know how to expand completely foldable formulae. 19270b57cec5SDimitry Andric return isAMCompletelyFolded(TTI, MinOffset, MaxOffset, Kind, AccessTy, BaseGV, 19280b57cec5SDimitry Andric BaseOffset, HasBaseReg, Scale) || 19290b57cec5SDimitry Andric // Or formulae that use a base register produced by a sum of base 19300b57cec5SDimitry Andric // registers. 19310b57cec5SDimitry Andric (Scale == 1 && 19320b57cec5SDimitry Andric isAMCompletelyFolded(TTI, MinOffset, MaxOffset, Kind, AccessTy, 19330b57cec5SDimitry Andric BaseGV, BaseOffset, true, 0)); 19340b57cec5SDimitry Andric } 19350b57cec5SDimitry Andric 19360fca6ea1SDimitry Andric static bool isLegalUse(const TargetTransformInfo &TTI, Immediate MinOffset, 19370fca6ea1SDimitry Andric Immediate MaxOffset, LSRUse::KindType Kind, 19380b57cec5SDimitry Andric MemAccessTy AccessTy, const Formula &F) { 19390b57cec5SDimitry Andric return isLegalUse(TTI, MinOffset, MaxOffset, Kind, AccessTy, F.BaseGV, 19400b57cec5SDimitry Andric F.BaseOffset, F.HasBaseReg, F.Scale); 19410b57cec5SDimitry Andric } 19420b57cec5SDimitry Andric 19430fca6ea1SDimitry Andric static bool isLegalAddImmediate(const TargetTransformInfo &TTI, 19440fca6ea1SDimitry Andric Immediate Offset) { 19450fca6ea1SDimitry Andric if (Offset.isScalable()) 19460fca6ea1SDimitry Andric return TTI.isLegalAddScalableImmediate(Offset.getKnownMinValue()); 19470fca6ea1SDimitry Andric 19480fca6ea1SDimitry Andric return TTI.isLegalAddImmediate(Offset.getFixedValue()); 19490fca6ea1SDimitry Andric } 19500fca6ea1SDimitry Andric 19510b57cec5SDimitry Andric static bool isAMCompletelyFolded(const TargetTransformInfo &TTI, 19520b57cec5SDimitry Andric const LSRUse &LU, const Formula &F) { 19530b57cec5SDimitry Andric // Target may want to look at the user instructions. 19540b57cec5SDimitry Andric if (LU.Kind == LSRUse::Address && TTI.LSRWithInstrQueries()) { 19550b57cec5SDimitry Andric for (const LSRFixup &Fixup : LU.Fixups) 19560b57cec5SDimitry Andric if (!isAMCompletelyFolded(TTI, LSRUse::Address, LU.AccessTy, F.BaseGV, 19570b57cec5SDimitry Andric (F.BaseOffset + Fixup.Offset), F.HasBaseReg, 19580b57cec5SDimitry Andric F.Scale, Fixup.UserInst)) 19590b57cec5SDimitry Andric return false; 19600b57cec5SDimitry Andric return true; 19610b57cec5SDimitry Andric } 19620b57cec5SDimitry Andric 19630b57cec5SDimitry Andric return isAMCompletelyFolded(TTI, LU.MinOffset, LU.MaxOffset, LU.Kind, 19640b57cec5SDimitry Andric LU.AccessTy, F.BaseGV, F.BaseOffset, F.HasBaseReg, 19650b57cec5SDimitry Andric F.Scale); 19660b57cec5SDimitry Andric } 19670b57cec5SDimitry Andric 1968fe6060f1SDimitry Andric static InstructionCost getScalingFactorCost(const TargetTransformInfo &TTI, 19690b57cec5SDimitry Andric const LSRUse &LU, const Formula &F, 19700b57cec5SDimitry Andric const Loop &L) { 19710b57cec5SDimitry Andric if (!F.Scale) 19720b57cec5SDimitry Andric return 0; 19730b57cec5SDimitry Andric 19740b57cec5SDimitry Andric // If the use is not completely folded in that instruction, we will have to 19750b57cec5SDimitry Andric // pay an extra cost only for scale != 1. 19760b57cec5SDimitry Andric if (!isAMCompletelyFolded(TTI, LU.MinOffset, LU.MaxOffset, LU.Kind, 19770b57cec5SDimitry Andric LU.AccessTy, F, L)) 19780b57cec5SDimitry Andric return F.Scale != 1; 19790b57cec5SDimitry Andric 19800b57cec5SDimitry Andric switch (LU.Kind) { 19810b57cec5SDimitry Andric case LSRUse::Address: { 19820b57cec5SDimitry Andric // Check the scaling factor cost with both the min and max offsets. 19830fca6ea1SDimitry Andric int64_t ScalableMin = 0, ScalableMax = 0, FixedMin = 0, FixedMax = 0; 19840fca6ea1SDimitry Andric if (F.BaseOffset.isScalable()) { 19850fca6ea1SDimitry Andric ScalableMin = (F.BaseOffset + LU.MinOffset).getKnownMinValue(); 19860fca6ea1SDimitry Andric ScalableMax = (F.BaseOffset + LU.MaxOffset).getKnownMinValue(); 19870fca6ea1SDimitry Andric } else { 19880fca6ea1SDimitry Andric FixedMin = (F.BaseOffset + LU.MinOffset).getFixedValue(); 19890fca6ea1SDimitry Andric FixedMax = (F.BaseOffset + LU.MaxOffset).getFixedValue(); 19900fca6ea1SDimitry Andric } 1991fe6060f1SDimitry Andric InstructionCost ScaleCostMinOffset = TTI.getScalingFactorCost( 19920fca6ea1SDimitry Andric LU.AccessTy.MemTy, F.BaseGV, StackOffset::get(FixedMin, ScalableMin), 19930fca6ea1SDimitry Andric F.HasBaseReg, F.Scale, LU.AccessTy.AddrSpace); 1994fe6060f1SDimitry Andric InstructionCost ScaleCostMaxOffset = TTI.getScalingFactorCost( 19950fca6ea1SDimitry Andric LU.AccessTy.MemTy, F.BaseGV, StackOffset::get(FixedMax, ScalableMax), 19960fca6ea1SDimitry Andric F.HasBaseReg, F.Scale, LU.AccessTy.AddrSpace); 19970b57cec5SDimitry Andric 1998fe6060f1SDimitry Andric assert(ScaleCostMinOffset.isValid() && ScaleCostMaxOffset.isValid() && 19990b57cec5SDimitry Andric "Legal addressing mode has an illegal cost!"); 20000b57cec5SDimitry Andric return std::max(ScaleCostMinOffset, ScaleCostMaxOffset); 20010b57cec5SDimitry Andric } 20020b57cec5SDimitry Andric case LSRUse::ICmpZero: 20030b57cec5SDimitry Andric case LSRUse::Basic: 20040b57cec5SDimitry Andric case LSRUse::Special: 20050b57cec5SDimitry Andric // The use is completely folded, i.e., everything is folded into the 20060b57cec5SDimitry Andric // instruction. 20070b57cec5SDimitry Andric return 0; 20080b57cec5SDimitry Andric } 20090b57cec5SDimitry Andric 20100b57cec5SDimitry Andric llvm_unreachable("Invalid LSRUse Kind!"); 20110b57cec5SDimitry Andric } 20120b57cec5SDimitry Andric 20130b57cec5SDimitry Andric static bool isAlwaysFoldable(const TargetTransformInfo &TTI, 20140b57cec5SDimitry Andric LSRUse::KindType Kind, MemAccessTy AccessTy, 20150fca6ea1SDimitry Andric GlobalValue *BaseGV, Immediate BaseOffset, 20160b57cec5SDimitry Andric bool HasBaseReg) { 20170b57cec5SDimitry Andric // Fast-path: zero is always foldable. 20180fca6ea1SDimitry Andric if (BaseOffset.isZero() && !BaseGV) 20190fca6ea1SDimitry Andric return true; 20200b57cec5SDimitry Andric 20210b57cec5SDimitry Andric // Conservatively, create an address with an immediate and a 20220b57cec5SDimitry Andric // base and a scale. 20230b57cec5SDimitry Andric int64_t Scale = Kind == LSRUse::ICmpZero ? -1 : 1; 20240b57cec5SDimitry Andric 20250b57cec5SDimitry Andric // Canonicalize a scale of 1 to a base register if the formula doesn't 20260b57cec5SDimitry Andric // already have a base register. 20270b57cec5SDimitry Andric if (!HasBaseReg && Scale == 1) { 20280b57cec5SDimitry Andric Scale = 0; 20290b57cec5SDimitry Andric HasBaseReg = true; 20300b57cec5SDimitry Andric } 20310b57cec5SDimitry Andric 20320fca6ea1SDimitry Andric // FIXME: Try with + without a scale? Maybe based on TTI? 20330fca6ea1SDimitry Andric // I think basereg + scaledreg + immediateoffset isn't a good 'conservative' 20340fca6ea1SDimitry Andric // default for many architectures, not just AArch64 SVE. More investigation 20350fca6ea1SDimitry Andric // needed later to determine if this should be used more widely than just 20360fca6ea1SDimitry Andric // on scalable types. 20370fca6ea1SDimitry Andric if (HasBaseReg && BaseOffset.isNonZero() && Kind != LSRUse::ICmpZero && 20380fca6ea1SDimitry Andric AccessTy.MemTy && AccessTy.MemTy->isScalableTy() && DropScaledForVScale) 20390fca6ea1SDimitry Andric Scale = 0; 20400fca6ea1SDimitry Andric 20410b57cec5SDimitry Andric return isAMCompletelyFolded(TTI, Kind, AccessTy, BaseGV, BaseOffset, 20420b57cec5SDimitry Andric HasBaseReg, Scale); 20430b57cec5SDimitry Andric } 20440b57cec5SDimitry Andric 20450b57cec5SDimitry Andric static bool isAlwaysFoldable(const TargetTransformInfo &TTI, 20460fca6ea1SDimitry Andric ScalarEvolution &SE, Immediate MinOffset, 20470fca6ea1SDimitry Andric Immediate MaxOffset, LSRUse::KindType Kind, 20480b57cec5SDimitry Andric MemAccessTy AccessTy, const SCEV *S, 20490b57cec5SDimitry Andric bool HasBaseReg) { 20500b57cec5SDimitry Andric // Fast-path: zero is always foldable. 20510b57cec5SDimitry Andric if (S->isZero()) return true; 20520b57cec5SDimitry Andric 20530b57cec5SDimitry Andric // Conservatively, create an address with an immediate and a 20540b57cec5SDimitry Andric // base and a scale. 20550fca6ea1SDimitry Andric Immediate BaseOffset = ExtractImmediate(S, SE); 20560b57cec5SDimitry Andric GlobalValue *BaseGV = ExtractSymbol(S, SE); 20570b57cec5SDimitry Andric 20580b57cec5SDimitry Andric // If there's anything else involved, it's not foldable. 20590b57cec5SDimitry Andric if (!S->isZero()) return false; 20600b57cec5SDimitry Andric 20610b57cec5SDimitry Andric // Fast-path: zero is always foldable. 20620fca6ea1SDimitry Andric if (BaseOffset.isZero() && !BaseGV) 20630fca6ea1SDimitry Andric return true; 20640fca6ea1SDimitry Andric 20650fca6ea1SDimitry Andric if (BaseOffset.isScalable()) 20660fca6ea1SDimitry Andric return false; 20670b57cec5SDimitry Andric 20680b57cec5SDimitry Andric // Conservatively, create an address with an immediate and a 20690b57cec5SDimitry Andric // base and a scale. 20700b57cec5SDimitry Andric int64_t Scale = Kind == LSRUse::ICmpZero ? -1 : 1; 20710b57cec5SDimitry Andric 20720b57cec5SDimitry Andric return isAMCompletelyFolded(TTI, MinOffset, MaxOffset, Kind, AccessTy, BaseGV, 20730b57cec5SDimitry Andric BaseOffset, HasBaseReg, Scale); 20740b57cec5SDimitry Andric } 20750b57cec5SDimitry Andric 20760b57cec5SDimitry Andric namespace { 20770b57cec5SDimitry Andric 20780b57cec5SDimitry Andric /// An individual increment in a Chain of IV increments. Relate an IV user to 20790b57cec5SDimitry Andric /// an expression that computes the IV it uses from the IV used by the previous 20800b57cec5SDimitry Andric /// link in the Chain. 20810b57cec5SDimitry Andric /// 20820b57cec5SDimitry Andric /// For the head of a chain, IncExpr holds the absolute SCEV expression for the 20830b57cec5SDimitry Andric /// original IVOperand. The head of the chain's IVOperand is only valid during 20840b57cec5SDimitry Andric /// chain collection, before LSR replaces IV users. During chain generation, 20850b57cec5SDimitry Andric /// IncExpr can be used to find the new IVOperand that computes the same 20860b57cec5SDimitry Andric /// expression. 20870b57cec5SDimitry Andric struct IVInc { 20880b57cec5SDimitry Andric Instruction *UserInst; 20890b57cec5SDimitry Andric Value* IVOperand; 20900b57cec5SDimitry Andric const SCEV *IncExpr; 20910b57cec5SDimitry Andric 20920b57cec5SDimitry Andric IVInc(Instruction *U, Value *O, const SCEV *E) 20930b57cec5SDimitry Andric : UserInst(U), IVOperand(O), IncExpr(E) {} 20940b57cec5SDimitry Andric }; 20950b57cec5SDimitry Andric 20960b57cec5SDimitry Andric // The list of IV increments in program order. We typically add the head of a 20970b57cec5SDimitry Andric // chain without finding subsequent links. 20980b57cec5SDimitry Andric struct IVChain { 20990b57cec5SDimitry Andric SmallVector<IVInc, 1> Incs; 21000b57cec5SDimitry Andric const SCEV *ExprBase = nullptr; 21010b57cec5SDimitry Andric 21020b57cec5SDimitry Andric IVChain() = default; 21030b57cec5SDimitry Andric IVChain(const IVInc &Head, const SCEV *Base) 21040b57cec5SDimitry Andric : Incs(1, Head), ExprBase(Base) {} 21050b57cec5SDimitry Andric 21060b57cec5SDimitry Andric using const_iterator = SmallVectorImpl<IVInc>::const_iterator; 21070b57cec5SDimitry Andric 21080b57cec5SDimitry Andric // Return the first increment in the chain. 21090b57cec5SDimitry Andric const_iterator begin() const { 21100b57cec5SDimitry Andric assert(!Incs.empty()); 21110b57cec5SDimitry Andric return std::next(Incs.begin()); 21120b57cec5SDimitry Andric } 21130b57cec5SDimitry Andric const_iterator end() const { 21140b57cec5SDimitry Andric return Incs.end(); 21150b57cec5SDimitry Andric } 21160b57cec5SDimitry Andric 21170b57cec5SDimitry Andric // Returns true if this chain contains any increments. 21180b57cec5SDimitry Andric bool hasIncs() const { return Incs.size() >= 2; } 21190b57cec5SDimitry Andric 21200b57cec5SDimitry Andric // Add an IVInc to the end of this chain. 21210b57cec5SDimitry Andric void add(const IVInc &X) { Incs.push_back(X); } 21220b57cec5SDimitry Andric 21230b57cec5SDimitry Andric // Returns the last UserInst in the chain. 21240b57cec5SDimitry Andric Instruction *tailUserInst() const { return Incs.back().UserInst; } 21250b57cec5SDimitry Andric 21260b57cec5SDimitry Andric // Returns true if IncExpr can be profitably added to this chain. 21270b57cec5SDimitry Andric bool isProfitableIncrement(const SCEV *OperExpr, 21280b57cec5SDimitry Andric const SCEV *IncExpr, 21290b57cec5SDimitry Andric ScalarEvolution&); 21300b57cec5SDimitry Andric }; 21310b57cec5SDimitry Andric 21320b57cec5SDimitry Andric /// Helper for CollectChains to track multiple IV increment uses. Distinguish 21330b57cec5SDimitry Andric /// between FarUsers that definitely cross IV increments and NearUsers that may 21340b57cec5SDimitry Andric /// be used between IV increments. 21350b57cec5SDimitry Andric struct ChainUsers { 21360b57cec5SDimitry Andric SmallPtrSet<Instruction*, 4> FarUsers; 21370b57cec5SDimitry Andric SmallPtrSet<Instruction*, 4> NearUsers; 21380b57cec5SDimitry Andric }; 21390b57cec5SDimitry Andric 21400b57cec5SDimitry Andric /// This class holds state for the main loop strength reduction logic. 21410b57cec5SDimitry Andric class LSRInstance { 21420b57cec5SDimitry Andric IVUsers &IU; 21430b57cec5SDimitry Andric ScalarEvolution &SE; 21440b57cec5SDimitry Andric DominatorTree &DT; 21450b57cec5SDimitry Andric LoopInfo &LI; 21460b57cec5SDimitry Andric AssumptionCache &AC; 21475ffd83dbSDimitry Andric TargetLibraryInfo &TLI; 21480b57cec5SDimitry Andric const TargetTransformInfo &TTI; 21490b57cec5SDimitry Andric Loop *const L; 21505ffd83dbSDimitry Andric MemorySSAUpdater *MSSAU; 2151fe6060f1SDimitry Andric TTI::AddressingModeKind AMK; 2152fcaf7f86SDimitry Andric mutable SCEVExpander Rewriter; 21530b57cec5SDimitry Andric bool Changed = false; 21540b57cec5SDimitry Andric 21550b57cec5SDimitry Andric /// This is the insert position that the current loop's induction variable 21560b57cec5SDimitry Andric /// increment should be placed. In simple loops, this is the latch block's 21570b57cec5SDimitry Andric /// terminator. But in more complicated cases, this is a position which will 21580b57cec5SDimitry Andric /// dominate all the in-loop post-increment users. 21590b57cec5SDimitry Andric Instruction *IVIncInsertPos = nullptr; 21600b57cec5SDimitry Andric 21610b57cec5SDimitry Andric /// Interesting factors between use strides. 21620b57cec5SDimitry Andric /// 21630b57cec5SDimitry Andric /// We explicitly use a SetVector which contains a SmallSet, instead of the 21640b57cec5SDimitry Andric /// default, a SmallDenseSet, because we need to use the full range of 21650b57cec5SDimitry Andric /// int64_ts, and there's currently no good way of doing that with 21660b57cec5SDimitry Andric /// SmallDenseSet. 21670b57cec5SDimitry Andric SetVector<int64_t, SmallVector<int64_t, 8>, SmallSet<int64_t, 8>> Factors; 21680b57cec5SDimitry Andric 2169bdd1243dSDimitry Andric /// The cost of the current SCEV, the best solution by LSR will be dropped if 2170bdd1243dSDimitry Andric /// the solution is not profitable. 2171bdd1243dSDimitry Andric Cost BaselineCost; 2172bdd1243dSDimitry Andric 21730b57cec5SDimitry Andric /// Interesting use types, to facilitate truncation reuse. 21740b57cec5SDimitry Andric SmallSetVector<Type *, 4> Types; 21750b57cec5SDimitry Andric 21760b57cec5SDimitry Andric /// The list of interesting uses. 21770b57cec5SDimitry Andric mutable SmallVector<LSRUse, 16> Uses; 21780b57cec5SDimitry Andric 21790b57cec5SDimitry Andric /// Track which uses use which register candidates. 21800b57cec5SDimitry Andric RegUseTracker RegUses; 21810b57cec5SDimitry Andric 21820b57cec5SDimitry Andric // Limit the number of chains to avoid quadratic behavior. We don't expect to 21830b57cec5SDimitry Andric // have more than a few IV increment chains in a loop. Missing a Chain falls 21840b57cec5SDimitry Andric // back to normal LSR behavior for those uses. 21850b57cec5SDimitry Andric static const unsigned MaxChains = 8; 21860b57cec5SDimitry Andric 21870b57cec5SDimitry Andric /// IV users can form a chain of IV increments. 21880b57cec5SDimitry Andric SmallVector<IVChain, MaxChains> IVChainVec; 21890b57cec5SDimitry Andric 21900b57cec5SDimitry Andric /// IV users that belong to profitable IVChains. 21910b57cec5SDimitry Andric SmallPtrSet<Use*, MaxChains> IVIncSet; 21920b57cec5SDimitry Andric 2193fe6060f1SDimitry Andric /// Induction variables that were generated and inserted by the SCEV Expander. 2194fe6060f1SDimitry Andric SmallVector<llvm::WeakVH, 2> ScalarEvolutionIVs; 2195fe6060f1SDimitry Andric 21960b57cec5SDimitry Andric void OptimizeShadowIV(); 21970b57cec5SDimitry Andric bool FindIVUserForCond(ICmpInst *Cond, IVStrideUse *&CondUse); 21980b57cec5SDimitry Andric ICmpInst *OptimizeMax(ICmpInst *Cond, IVStrideUse* &CondUse); 21990b57cec5SDimitry Andric void OptimizeLoopTermCond(); 22000b57cec5SDimitry Andric 22010b57cec5SDimitry Andric void ChainInstruction(Instruction *UserInst, Instruction *IVOper, 22020b57cec5SDimitry Andric SmallVectorImpl<ChainUsers> &ChainUsersVec); 22030b57cec5SDimitry Andric void FinalizeChain(IVChain &Chain); 22040b57cec5SDimitry Andric void CollectChains(); 2205fcaf7f86SDimitry Andric void GenerateIVChain(const IVChain &Chain, 22060b57cec5SDimitry Andric SmallVectorImpl<WeakTrackingVH> &DeadInsts); 22070b57cec5SDimitry Andric 22080b57cec5SDimitry Andric void CollectInterestingTypesAndFactors(); 22090b57cec5SDimitry Andric void CollectFixupsAndInitialFormulae(); 22100b57cec5SDimitry Andric 22110b57cec5SDimitry Andric // Support for sharing of LSRUses between LSRFixups. 22120b57cec5SDimitry Andric using UseMapTy = DenseMap<LSRUse::SCEVUseKindPair, size_t>; 22130b57cec5SDimitry Andric UseMapTy UseMap; 22140b57cec5SDimitry Andric 22150fca6ea1SDimitry Andric bool reconcileNewOffset(LSRUse &LU, Immediate NewOffset, bool HasBaseReg, 22160b57cec5SDimitry Andric LSRUse::KindType Kind, MemAccessTy AccessTy); 22170b57cec5SDimitry Andric 22180fca6ea1SDimitry Andric std::pair<size_t, Immediate> getUse(const SCEV *&Expr, LSRUse::KindType Kind, 22190b57cec5SDimitry Andric MemAccessTy AccessTy); 22200b57cec5SDimitry Andric 22210b57cec5SDimitry Andric void DeleteUse(LSRUse &LU, size_t LUIdx); 22220b57cec5SDimitry Andric 22230b57cec5SDimitry Andric LSRUse *FindUseWithSimilarFormula(const Formula &F, const LSRUse &OrigLU); 22240b57cec5SDimitry Andric 22250b57cec5SDimitry Andric void InsertInitialFormula(const SCEV *S, LSRUse &LU, size_t LUIdx); 22260b57cec5SDimitry Andric void InsertSupplementalFormula(const SCEV *S, LSRUse &LU, size_t LUIdx); 22270b57cec5SDimitry Andric void CountRegisters(const Formula &F, size_t LUIdx); 22280b57cec5SDimitry Andric bool InsertFormula(LSRUse &LU, unsigned LUIdx, const Formula &F); 22290b57cec5SDimitry Andric 22300b57cec5SDimitry Andric void CollectLoopInvariantFixupsAndFormulae(); 22310b57cec5SDimitry Andric 22320b57cec5SDimitry Andric void GenerateReassociations(LSRUse &LU, unsigned LUIdx, Formula Base, 22330b57cec5SDimitry Andric unsigned Depth = 0); 22340b57cec5SDimitry Andric 22350b57cec5SDimitry Andric void GenerateReassociationsImpl(LSRUse &LU, unsigned LUIdx, 22360b57cec5SDimitry Andric const Formula &Base, unsigned Depth, 22370b57cec5SDimitry Andric size_t Idx, bool IsScaledReg = false); 22380b57cec5SDimitry Andric void GenerateCombinations(LSRUse &LU, unsigned LUIdx, Formula Base); 22390b57cec5SDimitry Andric void GenerateSymbolicOffsetsImpl(LSRUse &LU, unsigned LUIdx, 22400b57cec5SDimitry Andric const Formula &Base, size_t Idx, 22410b57cec5SDimitry Andric bool IsScaledReg = false); 22420b57cec5SDimitry Andric void GenerateSymbolicOffsets(LSRUse &LU, unsigned LUIdx, Formula Base); 22430b57cec5SDimitry Andric void GenerateConstantOffsetsImpl(LSRUse &LU, unsigned LUIdx, 22440b57cec5SDimitry Andric const Formula &Base, 22450fca6ea1SDimitry Andric const SmallVectorImpl<Immediate> &Worklist, 22460b57cec5SDimitry Andric size_t Idx, bool IsScaledReg = false); 22470b57cec5SDimitry Andric void GenerateConstantOffsets(LSRUse &LU, unsigned LUIdx, Formula Base); 22480b57cec5SDimitry Andric void GenerateICmpZeroScales(LSRUse &LU, unsigned LUIdx, Formula Base); 22490b57cec5SDimitry Andric void GenerateScales(LSRUse &LU, unsigned LUIdx, Formula Base); 22500b57cec5SDimitry Andric void GenerateTruncates(LSRUse &LU, unsigned LUIdx, Formula Base); 22510b57cec5SDimitry Andric void GenerateCrossUseConstantOffsets(); 22520b57cec5SDimitry Andric void GenerateAllReuseFormulae(); 22530b57cec5SDimitry Andric 22540b57cec5SDimitry Andric void FilterOutUndesirableDedicatedRegisters(); 22550b57cec5SDimitry Andric 22560b57cec5SDimitry Andric size_t EstimateSearchSpaceComplexity() const; 22570b57cec5SDimitry Andric void NarrowSearchSpaceByDetectingSupersets(); 22580b57cec5SDimitry Andric void NarrowSearchSpaceByCollapsingUnrolledCode(); 22590b57cec5SDimitry Andric void NarrowSearchSpaceByRefilteringUndesirableDedicatedRegisters(); 22600b57cec5SDimitry Andric void NarrowSearchSpaceByFilterFormulaWithSameScaledReg(); 22615ffd83dbSDimitry Andric void NarrowSearchSpaceByFilterPostInc(); 22620b57cec5SDimitry Andric void NarrowSearchSpaceByDeletingCostlyFormulas(); 22630b57cec5SDimitry Andric void NarrowSearchSpaceByPickingWinnerRegs(); 22640b57cec5SDimitry Andric void NarrowSearchSpaceUsingHeuristics(); 22650b57cec5SDimitry Andric 22660b57cec5SDimitry Andric void SolveRecurse(SmallVectorImpl<const Formula *> &Solution, 22670b57cec5SDimitry Andric Cost &SolutionCost, 22680b57cec5SDimitry Andric SmallVectorImpl<const Formula *> &Workspace, 22690b57cec5SDimitry Andric const Cost &CurCost, 22700b57cec5SDimitry Andric const SmallPtrSet<const SCEV *, 16> &CurRegs, 22710b57cec5SDimitry Andric DenseSet<const SCEV *> &VisitedRegs) const; 22720b57cec5SDimitry Andric void Solve(SmallVectorImpl<const Formula *> &Solution) const; 22730b57cec5SDimitry Andric 22740b57cec5SDimitry Andric BasicBlock::iterator 22750b57cec5SDimitry Andric HoistInsertPosition(BasicBlock::iterator IP, 22760b57cec5SDimitry Andric const SmallVectorImpl<Instruction *> &Inputs) const; 2277fcaf7f86SDimitry Andric BasicBlock::iterator AdjustInsertPositionForExpand(BasicBlock::iterator IP, 22780b57cec5SDimitry Andric const LSRFixup &LF, 2279fcaf7f86SDimitry Andric const LSRUse &LU) const; 22800b57cec5SDimitry Andric 22810b57cec5SDimitry Andric Value *Expand(const LSRUse &LU, const LSRFixup &LF, const Formula &F, 2282fcaf7f86SDimitry Andric BasicBlock::iterator IP, 22830b57cec5SDimitry Andric SmallVectorImpl<WeakTrackingVH> &DeadInsts) const; 22840b57cec5SDimitry Andric void RewriteForPHI(PHINode *PN, const LSRUse &LU, const LSRFixup &LF, 2285fcaf7f86SDimitry Andric const Formula &F, 22860b57cec5SDimitry Andric SmallVectorImpl<WeakTrackingVH> &DeadInsts) const; 22870b57cec5SDimitry Andric void Rewrite(const LSRUse &LU, const LSRFixup &LF, const Formula &F, 22880b57cec5SDimitry Andric SmallVectorImpl<WeakTrackingVH> &DeadInsts) const; 22890b57cec5SDimitry Andric void ImplementSolution(const SmallVectorImpl<const Formula *> &Solution); 22900b57cec5SDimitry Andric 22910b57cec5SDimitry Andric public: 22920b57cec5SDimitry Andric LSRInstance(Loop *L, IVUsers &IU, ScalarEvolution &SE, DominatorTree &DT, 22930b57cec5SDimitry Andric LoopInfo &LI, const TargetTransformInfo &TTI, AssumptionCache &AC, 22945ffd83dbSDimitry Andric TargetLibraryInfo &TLI, MemorySSAUpdater *MSSAU); 22950b57cec5SDimitry Andric 22960b57cec5SDimitry Andric bool getChanged() const { return Changed; } 2297fe6060f1SDimitry Andric const SmallVectorImpl<WeakVH> &getScalarEvolutionIVs() const { 2298fe6060f1SDimitry Andric return ScalarEvolutionIVs; 2299fe6060f1SDimitry Andric } 23000b57cec5SDimitry Andric 23010b57cec5SDimitry Andric void print_factors_and_types(raw_ostream &OS) const; 23020b57cec5SDimitry Andric void print_fixups(raw_ostream &OS) const; 23030b57cec5SDimitry Andric void print_uses(raw_ostream &OS) const; 23040b57cec5SDimitry Andric void print(raw_ostream &OS) const; 23050b57cec5SDimitry Andric void dump() const; 23060b57cec5SDimitry Andric }; 23070b57cec5SDimitry Andric 23080b57cec5SDimitry Andric } // end anonymous namespace 23090b57cec5SDimitry Andric 23100b57cec5SDimitry Andric /// If IV is used in a int-to-float cast inside the loop then try to eliminate 23110b57cec5SDimitry Andric /// the cast operation. 23120b57cec5SDimitry Andric void LSRInstance::OptimizeShadowIV() { 23130b57cec5SDimitry Andric const SCEV *BackedgeTakenCount = SE.getBackedgeTakenCount(L); 23140b57cec5SDimitry Andric if (isa<SCEVCouldNotCompute>(BackedgeTakenCount)) 23150b57cec5SDimitry Andric return; 23160b57cec5SDimitry Andric 23170b57cec5SDimitry Andric for (IVUsers::const_iterator UI = IU.begin(), E = IU.end(); 23180b57cec5SDimitry Andric UI != E; /* empty */) { 23190b57cec5SDimitry Andric IVUsers::const_iterator CandidateUI = UI; 23200b57cec5SDimitry Andric ++UI; 23210b57cec5SDimitry Andric Instruction *ShadowUse = CandidateUI->getUser(); 23220b57cec5SDimitry Andric Type *DestTy = nullptr; 23230b57cec5SDimitry Andric bool IsSigned = false; 23240b57cec5SDimitry Andric 23250b57cec5SDimitry Andric /* If shadow use is a int->float cast then insert a second IV 23260b57cec5SDimitry Andric to eliminate this cast. 23270b57cec5SDimitry Andric 23280b57cec5SDimitry Andric for (unsigned i = 0; i < n; ++i) 23290b57cec5SDimitry Andric foo((double)i); 23300b57cec5SDimitry Andric 23310b57cec5SDimitry Andric is transformed into 23320b57cec5SDimitry Andric 23330b57cec5SDimitry Andric double d = 0.0; 23340b57cec5SDimitry Andric for (unsigned i = 0; i < n; ++i, ++d) 23350b57cec5SDimitry Andric foo(d); 23360b57cec5SDimitry Andric */ 23370b57cec5SDimitry Andric if (UIToFPInst *UCast = dyn_cast<UIToFPInst>(CandidateUI->getUser())) { 23380b57cec5SDimitry Andric IsSigned = false; 23390b57cec5SDimitry Andric DestTy = UCast->getDestTy(); 23400b57cec5SDimitry Andric } 23410b57cec5SDimitry Andric else if (SIToFPInst *SCast = dyn_cast<SIToFPInst>(CandidateUI->getUser())) { 23420b57cec5SDimitry Andric IsSigned = true; 23430b57cec5SDimitry Andric DestTy = SCast->getDestTy(); 23440b57cec5SDimitry Andric } 23450b57cec5SDimitry Andric if (!DestTy) continue; 23460b57cec5SDimitry Andric 23470b57cec5SDimitry Andric // If target does not support DestTy natively then do not apply 23480b57cec5SDimitry Andric // this transformation. 23490b57cec5SDimitry Andric if (!TTI.isTypeLegal(DestTy)) continue; 23500b57cec5SDimitry Andric 23510b57cec5SDimitry Andric PHINode *PH = dyn_cast<PHINode>(ShadowUse->getOperand(0)); 23520b57cec5SDimitry Andric if (!PH) continue; 23530b57cec5SDimitry Andric if (PH->getNumIncomingValues() != 2) continue; 23540b57cec5SDimitry Andric 23550b57cec5SDimitry Andric // If the calculation in integers overflows, the result in FP type will 23560b57cec5SDimitry Andric // differ. So we only can do this transformation if we are guaranteed to not 23570b57cec5SDimitry Andric // deal with overflowing values 23580b57cec5SDimitry Andric const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(SE.getSCEV(PH)); 23590b57cec5SDimitry Andric if (!AR) continue; 23600b57cec5SDimitry Andric if (IsSigned && !AR->hasNoSignedWrap()) continue; 23610b57cec5SDimitry Andric if (!IsSigned && !AR->hasNoUnsignedWrap()) continue; 23620b57cec5SDimitry Andric 23630b57cec5SDimitry Andric Type *SrcTy = PH->getType(); 23640b57cec5SDimitry Andric int Mantissa = DestTy->getFPMantissaWidth(); 23650b57cec5SDimitry Andric if (Mantissa == -1) continue; 23660b57cec5SDimitry Andric if ((int)SE.getTypeSizeInBits(SrcTy) > Mantissa) 23670b57cec5SDimitry Andric continue; 23680b57cec5SDimitry Andric 23690b57cec5SDimitry Andric unsigned Entry, Latch; 23700b57cec5SDimitry Andric if (PH->getIncomingBlock(0) == L->getLoopPreheader()) { 23710b57cec5SDimitry Andric Entry = 0; 23720b57cec5SDimitry Andric Latch = 1; 23730b57cec5SDimitry Andric } else { 23740b57cec5SDimitry Andric Entry = 1; 23750b57cec5SDimitry Andric Latch = 0; 23760b57cec5SDimitry Andric } 23770b57cec5SDimitry Andric 23780b57cec5SDimitry Andric ConstantInt *Init = dyn_cast<ConstantInt>(PH->getIncomingValue(Entry)); 23790b57cec5SDimitry Andric if (!Init) continue; 23800b57cec5SDimitry Andric Constant *NewInit = ConstantFP::get(DestTy, IsSigned ? 23810b57cec5SDimitry Andric (double)Init->getSExtValue() : 23820b57cec5SDimitry Andric (double)Init->getZExtValue()); 23830b57cec5SDimitry Andric 23840b57cec5SDimitry Andric BinaryOperator *Incr = 23850b57cec5SDimitry Andric dyn_cast<BinaryOperator>(PH->getIncomingValue(Latch)); 23860b57cec5SDimitry Andric if (!Incr) continue; 23870b57cec5SDimitry Andric if (Incr->getOpcode() != Instruction::Add 23880b57cec5SDimitry Andric && Incr->getOpcode() != Instruction::Sub) 23890b57cec5SDimitry Andric continue; 23900b57cec5SDimitry Andric 23910b57cec5SDimitry Andric /* Initialize new IV, double d = 0.0 in above example. */ 23920b57cec5SDimitry Andric ConstantInt *C = nullptr; 23930b57cec5SDimitry Andric if (Incr->getOperand(0) == PH) 23940b57cec5SDimitry Andric C = dyn_cast<ConstantInt>(Incr->getOperand(1)); 23950b57cec5SDimitry Andric else if (Incr->getOperand(1) == PH) 23960b57cec5SDimitry Andric C = dyn_cast<ConstantInt>(Incr->getOperand(0)); 23970b57cec5SDimitry Andric else 23980b57cec5SDimitry Andric continue; 23990b57cec5SDimitry Andric 24000b57cec5SDimitry Andric if (!C) continue; 24010b57cec5SDimitry Andric 24020b57cec5SDimitry Andric // Ignore negative constants, as the code below doesn't handle them 24030b57cec5SDimitry Andric // correctly. TODO: Remove this restriction. 24040fca6ea1SDimitry Andric if (!C->getValue().isStrictlyPositive()) 24050fca6ea1SDimitry Andric continue; 24060b57cec5SDimitry Andric 24070b57cec5SDimitry Andric /* Add new PHINode. */ 24080fca6ea1SDimitry Andric PHINode *NewPH = PHINode::Create(DestTy, 2, "IV.S.", PH->getIterator()); 24090fca6ea1SDimitry Andric NewPH->setDebugLoc(PH->getDebugLoc()); 24100b57cec5SDimitry Andric 24110b57cec5SDimitry Andric /* create new increment. '++d' in above example. */ 24120b57cec5SDimitry Andric Constant *CFP = ConstantFP::get(DestTy, C->getZExtValue()); 24130fca6ea1SDimitry Andric BinaryOperator *NewIncr = BinaryOperator::Create( 24140fca6ea1SDimitry Andric Incr->getOpcode() == Instruction::Add ? Instruction::FAdd 24150fca6ea1SDimitry Andric : Instruction::FSub, 24160fca6ea1SDimitry Andric NewPH, CFP, "IV.S.next.", Incr->getIterator()); 24170fca6ea1SDimitry Andric NewIncr->setDebugLoc(Incr->getDebugLoc()); 24180b57cec5SDimitry Andric 24190b57cec5SDimitry Andric NewPH->addIncoming(NewInit, PH->getIncomingBlock(Entry)); 24200b57cec5SDimitry Andric NewPH->addIncoming(NewIncr, PH->getIncomingBlock(Latch)); 24210b57cec5SDimitry Andric 24220b57cec5SDimitry Andric /* Remove cast operation */ 24230b57cec5SDimitry Andric ShadowUse->replaceAllUsesWith(NewPH); 24240b57cec5SDimitry Andric ShadowUse->eraseFromParent(); 24250b57cec5SDimitry Andric Changed = true; 24260b57cec5SDimitry Andric break; 24270b57cec5SDimitry Andric } 24280b57cec5SDimitry Andric } 24290b57cec5SDimitry Andric 24300b57cec5SDimitry Andric /// If Cond has an operand that is an expression of an IV, set the IV user and 24310b57cec5SDimitry Andric /// stride information and return true, otherwise return false. 24320b57cec5SDimitry Andric bool LSRInstance::FindIVUserForCond(ICmpInst *Cond, IVStrideUse *&CondUse) { 24330b57cec5SDimitry Andric for (IVStrideUse &U : IU) 24340b57cec5SDimitry Andric if (U.getUser() == Cond) { 24350b57cec5SDimitry Andric // NOTE: we could handle setcc instructions with multiple uses here, but 24360b57cec5SDimitry Andric // InstCombine does it as well for simple uses, it's not clear that it 24370b57cec5SDimitry Andric // occurs enough in real life to handle. 24380b57cec5SDimitry Andric CondUse = &U; 24390b57cec5SDimitry Andric return true; 24400b57cec5SDimitry Andric } 24410b57cec5SDimitry Andric return false; 24420b57cec5SDimitry Andric } 24430b57cec5SDimitry Andric 24440b57cec5SDimitry Andric /// Rewrite the loop's terminating condition if it uses a max computation. 24450b57cec5SDimitry Andric /// 24460b57cec5SDimitry Andric /// This is a narrow solution to a specific, but acute, problem. For loops 24470b57cec5SDimitry Andric /// like this: 24480b57cec5SDimitry Andric /// 24490b57cec5SDimitry Andric /// i = 0; 24500b57cec5SDimitry Andric /// do { 24510b57cec5SDimitry Andric /// p[i] = 0.0; 24520b57cec5SDimitry Andric /// } while (++i < n); 24530b57cec5SDimitry Andric /// 24540b57cec5SDimitry Andric /// the trip count isn't just 'n', because 'n' might not be positive. And 24550b57cec5SDimitry Andric /// unfortunately this can come up even for loops where the user didn't use 24560b57cec5SDimitry Andric /// a C do-while loop. For example, seemingly well-behaved top-test loops 24570b57cec5SDimitry Andric /// will commonly be lowered like this: 24580b57cec5SDimitry Andric /// 24590b57cec5SDimitry Andric /// if (n > 0) { 24600b57cec5SDimitry Andric /// i = 0; 24610b57cec5SDimitry Andric /// do { 24620b57cec5SDimitry Andric /// p[i] = 0.0; 24630b57cec5SDimitry Andric /// } while (++i < n); 24640b57cec5SDimitry Andric /// } 24650b57cec5SDimitry Andric /// 24660b57cec5SDimitry Andric /// and then it's possible for subsequent optimization to obscure the if 24670b57cec5SDimitry Andric /// test in such a way that indvars can't find it. 24680b57cec5SDimitry Andric /// 24690b57cec5SDimitry Andric /// When indvars can't find the if test in loops like this, it creates a 24700b57cec5SDimitry Andric /// max expression, which allows it to give the loop a canonical 24710b57cec5SDimitry Andric /// induction variable: 24720b57cec5SDimitry Andric /// 24730b57cec5SDimitry Andric /// i = 0; 24740b57cec5SDimitry Andric /// max = n < 1 ? 1 : n; 24750b57cec5SDimitry Andric /// do { 24760b57cec5SDimitry Andric /// p[i] = 0.0; 24770b57cec5SDimitry Andric /// } while (++i != max); 24780b57cec5SDimitry Andric /// 24790b57cec5SDimitry Andric /// Canonical induction variables are necessary because the loop passes 24800b57cec5SDimitry Andric /// are designed around them. The most obvious example of this is the 24810b57cec5SDimitry Andric /// LoopInfo analysis, which doesn't remember trip count values. It 24820b57cec5SDimitry Andric /// expects to be able to rediscover the trip count each time it is 24830b57cec5SDimitry Andric /// needed, and it does this using a simple analysis that only succeeds if 24840b57cec5SDimitry Andric /// the loop has a canonical induction variable. 24850b57cec5SDimitry Andric /// 24860b57cec5SDimitry Andric /// However, when it comes time to generate code, the maximum operation 24870b57cec5SDimitry Andric /// can be quite costly, especially if it's inside of an outer loop. 24880b57cec5SDimitry Andric /// 24890b57cec5SDimitry Andric /// This function solves this problem by detecting this type of loop and 24900b57cec5SDimitry Andric /// rewriting their conditions from ICMP_NE back to ICMP_SLT, and deleting 24910b57cec5SDimitry Andric /// the instructions for the maximum computation. 24920b57cec5SDimitry Andric ICmpInst *LSRInstance::OptimizeMax(ICmpInst *Cond, IVStrideUse* &CondUse) { 24930b57cec5SDimitry Andric // Check that the loop matches the pattern we're looking for. 24940b57cec5SDimitry Andric if (Cond->getPredicate() != CmpInst::ICMP_EQ && 24950b57cec5SDimitry Andric Cond->getPredicate() != CmpInst::ICMP_NE) 24960b57cec5SDimitry Andric return Cond; 24970b57cec5SDimitry Andric 24980b57cec5SDimitry Andric SelectInst *Sel = dyn_cast<SelectInst>(Cond->getOperand(1)); 24990b57cec5SDimitry Andric if (!Sel || !Sel->hasOneUse()) return Cond; 25000b57cec5SDimitry Andric 25010b57cec5SDimitry Andric const SCEV *BackedgeTakenCount = SE.getBackedgeTakenCount(L); 25020b57cec5SDimitry Andric if (isa<SCEVCouldNotCompute>(BackedgeTakenCount)) 25030b57cec5SDimitry Andric return Cond; 25040b57cec5SDimitry Andric const SCEV *One = SE.getConstant(BackedgeTakenCount->getType(), 1); 25050b57cec5SDimitry Andric 25060b57cec5SDimitry Andric // Add one to the backedge-taken count to get the trip count. 25070b57cec5SDimitry Andric const SCEV *IterationCount = SE.getAddExpr(One, BackedgeTakenCount); 25080b57cec5SDimitry Andric if (IterationCount != SE.getSCEV(Sel)) return Cond; 25090b57cec5SDimitry Andric 25100b57cec5SDimitry Andric // Check for a max calculation that matches the pattern. There's no check 25110b57cec5SDimitry Andric // for ICMP_ULE here because the comparison would be with zero, which 25120b57cec5SDimitry Andric // isn't interesting. 25130b57cec5SDimitry Andric CmpInst::Predicate Pred = ICmpInst::BAD_ICMP_PREDICATE; 25140b57cec5SDimitry Andric const SCEVNAryExpr *Max = nullptr; 25150b57cec5SDimitry Andric if (const SCEVSMaxExpr *S = dyn_cast<SCEVSMaxExpr>(BackedgeTakenCount)) { 25160b57cec5SDimitry Andric Pred = ICmpInst::ICMP_SLE; 25170b57cec5SDimitry Andric Max = S; 25180b57cec5SDimitry Andric } else if (const SCEVSMaxExpr *S = dyn_cast<SCEVSMaxExpr>(IterationCount)) { 25190b57cec5SDimitry Andric Pred = ICmpInst::ICMP_SLT; 25200b57cec5SDimitry Andric Max = S; 25210b57cec5SDimitry Andric } else if (const SCEVUMaxExpr *U = dyn_cast<SCEVUMaxExpr>(IterationCount)) { 25220b57cec5SDimitry Andric Pred = ICmpInst::ICMP_ULT; 25230b57cec5SDimitry Andric Max = U; 25240b57cec5SDimitry Andric } else { 25250b57cec5SDimitry Andric // No match; bail. 25260b57cec5SDimitry Andric return Cond; 25270b57cec5SDimitry Andric } 25280b57cec5SDimitry Andric 25290b57cec5SDimitry Andric // To handle a max with more than two operands, this optimization would 25300b57cec5SDimitry Andric // require additional checking and setup. 25310b57cec5SDimitry Andric if (Max->getNumOperands() != 2) 25320b57cec5SDimitry Andric return Cond; 25330b57cec5SDimitry Andric 25340b57cec5SDimitry Andric const SCEV *MaxLHS = Max->getOperand(0); 25350b57cec5SDimitry Andric const SCEV *MaxRHS = Max->getOperand(1); 25360b57cec5SDimitry Andric 25370b57cec5SDimitry Andric // ScalarEvolution canonicalizes constants to the left. For < and >, look 25380b57cec5SDimitry Andric // for a comparison with 1. For <= and >=, a comparison with zero. 25390b57cec5SDimitry Andric if (!MaxLHS || 25400b57cec5SDimitry Andric (ICmpInst::isTrueWhenEqual(Pred) ? !MaxLHS->isZero() : (MaxLHS != One))) 25410b57cec5SDimitry Andric return Cond; 25420b57cec5SDimitry Andric 25430b57cec5SDimitry Andric // Check the relevant induction variable for conformance to 25440b57cec5SDimitry Andric // the pattern. 25450b57cec5SDimitry Andric const SCEV *IV = SE.getSCEV(Cond->getOperand(0)); 25460b57cec5SDimitry Andric const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(IV); 25470b57cec5SDimitry Andric if (!AR || !AR->isAffine() || 25480b57cec5SDimitry Andric AR->getStart() != One || 25490b57cec5SDimitry Andric AR->getStepRecurrence(SE) != One) 25500b57cec5SDimitry Andric return Cond; 25510b57cec5SDimitry Andric 25520b57cec5SDimitry Andric assert(AR->getLoop() == L && 25530b57cec5SDimitry Andric "Loop condition operand is an addrec in a different loop!"); 25540b57cec5SDimitry Andric 25550b57cec5SDimitry Andric // Check the right operand of the select, and remember it, as it will 25560b57cec5SDimitry Andric // be used in the new comparison instruction. 25570b57cec5SDimitry Andric Value *NewRHS = nullptr; 25580b57cec5SDimitry Andric if (ICmpInst::isTrueWhenEqual(Pred)) { 25590b57cec5SDimitry Andric // Look for n+1, and grab n. 25600b57cec5SDimitry Andric if (AddOperator *BO = dyn_cast<AddOperator>(Sel->getOperand(1))) 25610b57cec5SDimitry Andric if (ConstantInt *BO1 = dyn_cast<ConstantInt>(BO->getOperand(1))) 25620b57cec5SDimitry Andric if (BO1->isOne() && SE.getSCEV(BO->getOperand(0)) == MaxRHS) 25630b57cec5SDimitry Andric NewRHS = BO->getOperand(0); 25640b57cec5SDimitry Andric if (AddOperator *BO = dyn_cast<AddOperator>(Sel->getOperand(2))) 25650b57cec5SDimitry Andric if (ConstantInt *BO1 = dyn_cast<ConstantInt>(BO->getOperand(1))) 25660b57cec5SDimitry Andric if (BO1->isOne() && SE.getSCEV(BO->getOperand(0)) == MaxRHS) 25670b57cec5SDimitry Andric NewRHS = BO->getOperand(0); 25680b57cec5SDimitry Andric if (!NewRHS) 25690b57cec5SDimitry Andric return Cond; 25700b57cec5SDimitry Andric } else if (SE.getSCEV(Sel->getOperand(1)) == MaxRHS) 25710b57cec5SDimitry Andric NewRHS = Sel->getOperand(1); 25720b57cec5SDimitry Andric else if (SE.getSCEV(Sel->getOperand(2)) == MaxRHS) 25730b57cec5SDimitry Andric NewRHS = Sel->getOperand(2); 25740b57cec5SDimitry Andric else if (const SCEVUnknown *SU = dyn_cast<SCEVUnknown>(MaxRHS)) 25750b57cec5SDimitry Andric NewRHS = SU->getValue(); 25760b57cec5SDimitry Andric else 25770b57cec5SDimitry Andric // Max doesn't match expected pattern. 25780b57cec5SDimitry Andric return Cond; 25790b57cec5SDimitry Andric 25800b57cec5SDimitry Andric // Determine the new comparison opcode. It may be signed or unsigned, 25810b57cec5SDimitry Andric // and the original comparison may be either equality or inequality. 25820b57cec5SDimitry Andric if (Cond->getPredicate() == CmpInst::ICMP_EQ) 25830b57cec5SDimitry Andric Pred = CmpInst::getInversePredicate(Pred); 25840b57cec5SDimitry Andric 25850b57cec5SDimitry Andric // Ok, everything looks ok to change the condition into an SLT or SGE and 25860b57cec5SDimitry Andric // delete the max calculation. 25870fca6ea1SDimitry Andric ICmpInst *NewCond = new ICmpInst(Cond->getIterator(), Pred, 25880fca6ea1SDimitry Andric Cond->getOperand(0), NewRHS, "scmp"); 25890b57cec5SDimitry Andric 25900b57cec5SDimitry Andric // Delete the max calculation instructions. 2591fe6060f1SDimitry Andric NewCond->setDebugLoc(Cond->getDebugLoc()); 25920b57cec5SDimitry Andric Cond->replaceAllUsesWith(NewCond); 25930b57cec5SDimitry Andric CondUse->setUser(NewCond); 25940b57cec5SDimitry Andric Instruction *Cmp = cast<Instruction>(Sel->getOperand(0)); 25950b57cec5SDimitry Andric Cond->eraseFromParent(); 25960b57cec5SDimitry Andric Sel->eraseFromParent(); 25970b57cec5SDimitry Andric if (Cmp->use_empty()) 25980b57cec5SDimitry Andric Cmp->eraseFromParent(); 25990b57cec5SDimitry Andric return NewCond; 26000b57cec5SDimitry Andric } 26010b57cec5SDimitry Andric 26020b57cec5SDimitry Andric /// Change loop terminating condition to use the postinc iv when possible. 26030b57cec5SDimitry Andric void 26040b57cec5SDimitry Andric LSRInstance::OptimizeLoopTermCond() { 26050b57cec5SDimitry Andric SmallPtrSet<Instruction *, 4> PostIncs; 26060b57cec5SDimitry Andric 26070b57cec5SDimitry Andric // We need a different set of heuristics for rotated and non-rotated loops. 26080b57cec5SDimitry Andric // If a loop is rotated then the latch is also the backedge, so inserting 26090b57cec5SDimitry Andric // post-inc expressions just before the latch is ideal. To reduce live ranges 26100b57cec5SDimitry Andric // it also makes sense to rewrite terminating conditions to use post-inc 26110b57cec5SDimitry Andric // expressions. 26120b57cec5SDimitry Andric // 26130b57cec5SDimitry Andric // If the loop is not rotated then the latch is not a backedge; the latch 26140b57cec5SDimitry Andric // check is done in the loop head. Adding post-inc expressions before the 26150b57cec5SDimitry Andric // latch will cause overlapping live-ranges of pre-inc and post-inc expressions 26160b57cec5SDimitry Andric // in the loop body. In this case we do *not* want to use post-inc expressions 26170b57cec5SDimitry Andric // in the latch check, and we want to insert post-inc expressions before 26180b57cec5SDimitry Andric // the backedge. 26190b57cec5SDimitry Andric BasicBlock *LatchBlock = L->getLoopLatch(); 26200b57cec5SDimitry Andric SmallVector<BasicBlock*, 8> ExitingBlocks; 26210b57cec5SDimitry Andric L->getExitingBlocks(ExitingBlocks); 2622bdd1243dSDimitry Andric if (!llvm::is_contained(ExitingBlocks, LatchBlock)) { 26230b57cec5SDimitry Andric // The backedge doesn't exit the loop; treat this as a head-tested loop. 26240b57cec5SDimitry Andric IVIncInsertPos = LatchBlock->getTerminator(); 26250b57cec5SDimitry Andric return; 26260b57cec5SDimitry Andric } 26270b57cec5SDimitry Andric 26280b57cec5SDimitry Andric // Otherwise treat this as a rotated loop. 26290b57cec5SDimitry Andric for (BasicBlock *ExitingBlock : ExitingBlocks) { 26300b57cec5SDimitry Andric // Get the terminating condition for the loop if possible. If we 26310b57cec5SDimitry Andric // can, we want to change it to use a post-incremented version of its 26320b57cec5SDimitry Andric // induction variable, to allow coalescing the live ranges for the IV into 26330b57cec5SDimitry Andric // one register value. 26340b57cec5SDimitry Andric 26350b57cec5SDimitry Andric BranchInst *TermBr = dyn_cast<BranchInst>(ExitingBlock->getTerminator()); 26360b57cec5SDimitry Andric if (!TermBr) 26370b57cec5SDimitry Andric continue; 26380b57cec5SDimitry Andric // FIXME: Overly conservative, termination condition could be an 'or' etc.. 26390b57cec5SDimitry Andric if (TermBr->isUnconditional() || !isa<ICmpInst>(TermBr->getCondition())) 26400b57cec5SDimitry Andric continue; 26410b57cec5SDimitry Andric 26420b57cec5SDimitry Andric // Search IVUsesByStride to find Cond's IVUse if there is one. 26430b57cec5SDimitry Andric IVStrideUse *CondUse = nullptr; 26440b57cec5SDimitry Andric ICmpInst *Cond = cast<ICmpInst>(TermBr->getCondition()); 26450b57cec5SDimitry Andric if (!FindIVUserForCond(Cond, CondUse)) 26460b57cec5SDimitry Andric continue; 26470b57cec5SDimitry Andric 26480b57cec5SDimitry Andric // If the trip count is computed in terms of a max (due to ScalarEvolution 26490b57cec5SDimitry Andric // being unable to find a sufficient guard, for example), change the loop 26500b57cec5SDimitry Andric // comparison to use SLT or ULT instead of NE. 26510b57cec5SDimitry Andric // One consequence of doing this now is that it disrupts the count-down 26520b57cec5SDimitry Andric // optimization. That's not always a bad thing though, because in such 26530b57cec5SDimitry Andric // cases it may still be worthwhile to avoid a max. 26540b57cec5SDimitry Andric Cond = OptimizeMax(Cond, CondUse); 26550b57cec5SDimitry Andric 26560b57cec5SDimitry Andric // If this exiting block dominates the latch block, it may also use 26570b57cec5SDimitry Andric // the post-inc value if it won't be shared with other uses. 26580b57cec5SDimitry Andric // Check for dominance. 26590b57cec5SDimitry Andric if (!DT.dominates(ExitingBlock, LatchBlock)) 26600b57cec5SDimitry Andric continue; 26610b57cec5SDimitry Andric 26620b57cec5SDimitry Andric // Conservatively avoid trying to use the post-inc value in non-latch 26630b57cec5SDimitry Andric // exits if there may be pre-inc users in intervening blocks. 26640b57cec5SDimitry Andric if (LatchBlock != ExitingBlock) 26650b57cec5SDimitry Andric for (IVUsers::const_iterator UI = IU.begin(), E = IU.end(); UI != E; ++UI) 26660b57cec5SDimitry Andric // Test if the use is reachable from the exiting block. This dominator 26670b57cec5SDimitry Andric // query is a conservative approximation of reachability. 26680b57cec5SDimitry Andric if (&*UI != CondUse && 26690b57cec5SDimitry Andric !DT.properlyDominates(UI->getUser()->getParent(), ExitingBlock)) { 26700b57cec5SDimitry Andric // Conservatively assume there may be reuse if the quotient of their 26710b57cec5SDimitry Andric // strides could be a legal scale. 26720b57cec5SDimitry Andric const SCEV *A = IU.getStride(*CondUse, L); 26730b57cec5SDimitry Andric const SCEV *B = IU.getStride(*UI, L); 26740b57cec5SDimitry Andric if (!A || !B) continue; 26750b57cec5SDimitry Andric if (SE.getTypeSizeInBits(A->getType()) != 26760b57cec5SDimitry Andric SE.getTypeSizeInBits(B->getType())) { 26770b57cec5SDimitry Andric if (SE.getTypeSizeInBits(A->getType()) > 26780b57cec5SDimitry Andric SE.getTypeSizeInBits(B->getType())) 26790b57cec5SDimitry Andric B = SE.getSignExtendExpr(B, A->getType()); 26800b57cec5SDimitry Andric else 26810b57cec5SDimitry Andric A = SE.getSignExtendExpr(A, B->getType()); 26820b57cec5SDimitry Andric } 26830b57cec5SDimitry Andric if (const SCEVConstant *D = 26840b57cec5SDimitry Andric dyn_cast_or_null<SCEVConstant>(getExactSDiv(B, A, SE))) { 26850b57cec5SDimitry Andric const ConstantInt *C = D->getValue(); 26860b57cec5SDimitry Andric // Stride of one or negative one can have reuse with non-addresses. 26870b57cec5SDimitry Andric if (C->isOne() || C->isMinusOne()) 26880b57cec5SDimitry Andric goto decline_post_inc; 26890b57cec5SDimitry Andric // Avoid weird situations. 269006c3fb27SDimitry Andric if (C->getValue().getSignificantBits() >= 64 || 26910b57cec5SDimitry Andric C->getValue().isMinSignedValue()) 26920b57cec5SDimitry Andric goto decline_post_inc; 26930b57cec5SDimitry Andric // Check for possible scaled-address reuse. 26940b57cec5SDimitry Andric if (isAddressUse(TTI, UI->getUser(), UI->getOperandValToReplace())) { 26950b57cec5SDimitry Andric MemAccessTy AccessTy = getAccessType( 26960b57cec5SDimitry Andric TTI, UI->getUser(), UI->getOperandValToReplace()); 26970b57cec5SDimitry Andric int64_t Scale = C->getSExtValue(); 26980b57cec5SDimitry Andric if (TTI.isLegalAddressingMode(AccessTy.MemTy, /*BaseGV=*/nullptr, 26990b57cec5SDimitry Andric /*BaseOffset=*/0, 270006c3fb27SDimitry Andric /*HasBaseReg=*/true, Scale, 27010b57cec5SDimitry Andric AccessTy.AddrSpace)) 27020b57cec5SDimitry Andric goto decline_post_inc; 27030b57cec5SDimitry Andric Scale = -Scale; 27040b57cec5SDimitry Andric if (TTI.isLegalAddressingMode(AccessTy.MemTy, /*BaseGV=*/nullptr, 27050b57cec5SDimitry Andric /*BaseOffset=*/0, 270606c3fb27SDimitry Andric /*HasBaseReg=*/true, Scale, 27070b57cec5SDimitry Andric AccessTy.AddrSpace)) 27080b57cec5SDimitry Andric goto decline_post_inc; 27090b57cec5SDimitry Andric } 27100b57cec5SDimitry Andric } 27110b57cec5SDimitry Andric } 27120b57cec5SDimitry Andric 27130b57cec5SDimitry Andric LLVM_DEBUG(dbgs() << " Change loop exiting icmp to use postinc iv: " 27140b57cec5SDimitry Andric << *Cond << '\n'); 27150b57cec5SDimitry Andric 27160b57cec5SDimitry Andric // It's possible for the setcc instruction to be anywhere in the loop, and 27170b57cec5SDimitry Andric // possible for it to have multiple users. If it is not immediately before 27180b57cec5SDimitry Andric // the exiting block branch, move it. 2719fe6060f1SDimitry Andric if (Cond->getNextNonDebugInstruction() != TermBr) { 27200b57cec5SDimitry Andric if (Cond->hasOneUse()) { 27210b57cec5SDimitry Andric Cond->moveBefore(TermBr); 27220b57cec5SDimitry Andric } else { 27230b57cec5SDimitry Andric // Clone the terminating condition and insert into the loopend. 27240b57cec5SDimitry Andric ICmpInst *OldCond = Cond; 27250b57cec5SDimitry Andric Cond = cast<ICmpInst>(Cond->clone()); 27260b57cec5SDimitry Andric Cond->setName(L->getHeader()->getName() + ".termcond"); 2727bdd1243dSDimitry Andric Cond->insertInto(ExitingBlock, TermBr->getIterator()); 27280b57cec5SDimitry Andric 27290b57cec5SDimitry Andric // Clone the IVUse, as the old use still exists! 27300b57cec5SDimitry Andric CondUse = &IU.AddUser(Cond, CondUse->getOperandValToReplace()); 27310b57cec5SDimitry Andric TermBr->replaceUsesOfWith(OldCond, Cond); 27320b57cec5SDimitry Andric } 27330b57cec5SDimitry Andric } 27340b57cec5SDimitry Andric 27350b57cec5SDimitry Andric // If we get to here, we know that we can transform the setcc instruction to 27360b57cec5SDimitry Andric // use the post-incremented version of the IV, allowing us to coalesce the 27370b57cec5SDimitry Andric // live ranges for the IV correctly. 27380b57cec5SDimitry Andric CondUse->transformToPostInc(L); 27390b57cec5SDimitry Andric Changed = true; 27400b57cec5SDimitry Andric 27410b57cec5SDimitry Andric PostIncs.insert(Cond); 27420b57cec5SDimitry Andric decline_post_inc:; 27430b57cec5SDimitry Andric } 27440b57cec5SDimitry Andric 27450b57cec5SDimitry Andric // Determine an insertion point for the loop induction variable increment. It 27460b57cec5SDimitry Andric // must dominate all the post-inc comparisons we just set up, and it must 27470b57cec5SDimitry Andric // dominate the loop latch edge. 27480b57cec5SDimitry Andric IVIncInsertPos = L->getLoopLatch()->getTerminator(); 2749bdd1243dSDimitry Andric for (Instruction *Inst : PostIncs) 2750bdd1243dSDimitry Andric IVIncInsertPos = DT.findNearestCommonDominator(IVIncInsertPos, Inst); 27510b57cec5SDimitry Andric } 27520b57cec5SDimitry Andric 27530b57cec5SDimitry Andric /// Determine if the given use can accommodate a fixup at the given offset and 27540b57cec5SDimitry Andric /// other details. If so, update the use and return true. 27550fca6ea1SDimitry Andric bool LSRInstance::reconcileNewOffset(LSRUse &LU, Immediate NewOffset, 27560b57cec5SDimitry Andric bool HasBaseReg, LSRUse::KindType Kind, 27570b57cec5SDimitry Andric MemAccessTy AccessTy) { 27580fca6ea1SDimitry Andric Immediate NewMinOffset = LU.MinOffset; 27590fca6ea1SDimitry Andric Immediate NewMaxOffset = LU.MaxOffset; 27600b57cec5SDimitry Andric MemAccessTy NewAccessTy = AccessTy; 27610b57cec5SDimitry Andric 27620b57cec5SDimitry Andric // Check for a mismatched kind. It's tempting to collapse mismatched kinds to 27630b57cec5SDimitry Andric // something conservative, however this can pessimize in the case that one of 27640b57cec5SDimitry Andric // the uses will have all its uses outside the loop, for example. 27650b57cec5SDimitry Andric if (LU.Kind != Kind) 27660b57cec5SDimitry Andric return false; 27670b57cec5SDimitry Andric 27680b57cec5SDimitry Andric // Check for a mismatched access type, and fall back conservatively as needed. 27690b57cec5SDimitry Andric // TODO: Be less conservative when the type is similar and can use the same 27700b57cec5SDimitry Andric // addressing modes. 27710b57cec5SDimitry Andric if (Kind == LSRUse::Address) { 27720b57cec5SDimitry Andric if (AccessTy.MemTy != LU.AccessTy.MemTy) { 27730b57cec5SDimitry Andric NewAccessTy = MemAccessTy::getUnknown(AccessTy.MemTy->getContext(), 27740b57cec5SDimitry Andric AccessTy.AddrSpace); 27750b57cec5SDimitry Andric } 27760b57cec5SDimitry Andric } 27770b57cec5SDimitry Andric 27780b57cec5SDimitry Andric // Conservatively assume HasBaseReg is true for now. 27790fca6ea1SDimitry Andric if (Immediate::isKnownLT(NewOffset, LU.MinOffset)) { 27800b57cec5SDimitry Andric if (!isAlwaysFoldable(TTI, Kind, NewAccessTy, /*BaseGV=*/nullptr, 27810b57cec5SDimitry Andric LU.MaxOffset - NewOffset, HasBaseReg)) 27820b57cec5SDimitry Andric return false; 27830b57cec5SDimitry Andric NewMinOffset = NewOffset; 27840fca6ea1SDimitry Andric } else if (Immediate::isKnownGT(NewOffset, LU.MaxOffset)) { 27850b57cec5SDimitry Andric if (!isAlwaysFoldable(TTI, Kind, NewAccessTy, /*BaseGV=*/nullptr, 27860b57cec5SDimitry Andric NewOffset - LU.MinOffset, HasBaseReg)) 27870b57cec5SDimitry Andric return false; 27880b57cec5SDimitry Andric NewMaxOffset = NewOffset; 27890b57cec5SDimitry Andric } 27900b57cec5SDimitry Andric 27910fca6ea1SDimitry Andric // FIXME: We should be able to handle some level of scalable offset support 27920fca6ea1SDimitry Andric // for 'void', but in order to get basic support up and running this is 27930fca6ea1SDimitry Andric // being left out. 27940fca6ea1SDimitry Andric if (NewAccessTy.MemTy && NewAccessTy.MemTy->isVoidTy() && 27950fca6ea1SDimitry Andric (NewMinOffset.isScalable() || NewMaxOffset.isScalable())) 27960fca6ea1SDimitry Andric return false; 27970fca6ea1SDimitry Andric 27980b57cec5SDimitry Andric // Update the use. 27990b57cec5SDimitry Andric LU.MinOffset = NewMinOffset; 28000b57cec5SDimitry Andric LU.MaxOffset = NewMaxOffset; 28010b57cec5SDimitry Andric LU.AccessTy = NewAccessTy; 28020b57cec5SDimitry Andric return true; 28030b57cec5SDimitry Andric } 28040b57cec5SDimitry Andric 28050b57cec5SDimitry Andric /// Return an LSRUse index and an offset value for a fixup which needs the given 28060b57cec5SDimitry Andric /// expression, with the given kind and optional access type. Either reuse an 28070b57cec5SDimitry Andric /// existing use or create a new one, as needed. 28080fca6ea1SDimitry Andric std::pair<size_t, Immediate> LSRInstance::getUse(const SCEV *&Expr, 28090b57cec5SDimitry Andric LSRUse::KindType Kind, 28100b57cec5SDimitry Andric MemAccessTy AccessTy) { 28110b57cec5SDimitry Andric const SCEV *Copy = Expr; 28120fca6ea1SDimitry Andric Immediate Offset = ExtractImmediate(Expr, SE); 28130b57cec5SDimitry Andric 28140b57cec5SDimitry Andric // Basic uses can't accept any offset, for example. 28150b57cec5SDimitry Andric if (!isAlwaysFoldable(TTI, Kind, AccessTy, /*BaseGV=*/ nullptr, 28160b57cec5SDimitry Andric Offset, /*HasBaseReg=*/ true)) { 28170b57cec5SDimitry Andric Expr = Copy; 28180fca6ea1SDimitry Andric Offset = Immediate::getFixed(0); 28190b57cec5SDimitry Andric } 28200b57cec5SDimitry Andric 28210b57cec5SDimitry Andric std::pair<UseMapTy::iterator, bool> P = 28220b57cec5SDimitry Andric UseMap.insert(std::make_pair(LSRUse::SCEVUseKindPair(Expr, Kind), 0)); 28230b57cec5SDimitry Andric if (!P.second) { 28240b57cec5SDimitry Andric // A use already existed with this base. 28250b57cec5SDimitry Andric size_t LUIdx = P.first->second; 28260b57cec5SDimitry Andric LSRUse &LU = Uses[LUIdx]; 28270b57cec5SDimitry Andric if (reconcileNewOffset(LU, Offset, /*HasBaseReg=*/true, Kind, AccessTy)) 28280b57cec5SDimitry Andric // Reuse this use. 28290b57cec5SDimitry Andric return std::make_pair(LUIdx, Offset); 28300b57cec5SDimitry Andric } 28310b57cec5SDimitry Andric 28320b57cec5SDimitry Andric // Create a new use. 28330b57cec5SDimitry Andric size_t LUIdx = Uses.size(); 28340b57cec5SDimitry Andric P.first->second = LUIdx; 28350b57cec5SDimitry Andric Uses.push_back(LSRUse(Kind, AccessTy)); 28360b57cec5SDimitry Andric LSRUse &LU = Uses[LUIdx]; 28370b57cec5SDimitry Andric 28380b57cec5SDimitry Andric LU.MinOffset = Offset; 28390b57cec5SDimitry Andric LU.MaxOffset = Offset; 28400b57cec5SDimitry Andric return std::make_pair(LUIdx, Offset); 28410b57cec5SDimitry Andric } 28420b57cec5SDimitry Andric 28430b57cec5SDimitry Andric /// Delete the given use from the Uses list. 28440b57cec5SDimitry Andric void LSRInstance::DeleteUse(LSRUse &LU, size_t LUIdx) { 28450b57cec5SDimitry Andric if (&LU != &Uses.back()) 28460b57cec5SDimitry Andric std::swap(LU, Uses.back()); 28470b57cec5SDimitry Andric Uses.pop_back(); 28480b57cec5SDimitry Andric 28490b57cec5SDimitry Andric // Update RegUses. 28500b57cec5SDimitry Andric RegUses.swapAndDropUse(LUIdx, Uses.size()); 28510b57cec5SDimitry Andric } 28520b57cec5SDimitry Andric 28530b57cec5SDimitry Andric /// Look for a use distinct from OrigLU which is has a formula that has the same 28540b57cec5SDimitry Andric /// registers as the given formula. 28550b57cec5SDimitry Andric LSRUse * 28560b57cec5SDimitry Andric LSRInstance::FindUseWithSimilarFormula(const Formula &OrigF, 28570b57cec5SDimitry Andric const LSRUse &OrigLU) { 28580b57cec5SDimitry Andric // Search all uses for the formula. This could be more clever. 285906c3fb27SDimitry Andric for (LSRUse &LU : Uses) { 28600b57cec5SDimitry Andric // Check whether this use is close enough to OrigLU, to see whether it's 28610b57cec5SDimitry Andric // worthwhile looking through its formulae. 28620b57cec5SDimitry Andric // Ignore ICmpZero uses because they may contain formulae generated by 28630b57cec5SDimitry Andric // GenerateICmpZeroScales, in which case adding fixup offsets may 28640b57cec5SDimitry Andric // be invalid. 28650b57cec5SDimitry Andric if (&LU != &OrigLU && 28660b57cec5SDimitry Andric LU.Kind != LSRUse::ICmpZero && 28670b57cec5SDimitry Andric LU.Kind == OrigLU.Kind && OrigLU.AccessTy == LU.AccessTy && 28680b57cec5SDimitry Andric LU.WidestFixupType == OrigLU.WidestFixupType && 28690b57cec5SDimitry Andric LU.HasFormulaWithSameRegs(OrigF)) { 28700b57cec5SDimitry Andric // Scan through this use's formulae. 28710b57cec5SDimitry Andric for (const Formula &F : LU.Formulae) { 28720b57cec5SDimitry Andric // Check to see if this formula has the same registers and symbols 28730b57cec5SDimitry Andric // as OrigF. 28740b57cec5SDimitry Andric if (F.BaseRegs == OrigF.BaseRegs && 28750b57cec5SDimitry Andric F.ScaledReg == OrigF.ScaledReg && 28760b57cec5SDimitry Andric F.BaseGV == OrigF.BaseGV && 28770b57cec5SDimitry Andric F.Scale == OrigF.Scale && 28780b57cec5SDimitry Andric F.UnfoldedOffset == OrigF.UnfoldedOffset) { 28790fca6ea1SDimitry Andric if (F.BaseOffset.isZero()) 28800b57cec5SDimitry Andric return &LU; 28810b57cec5SDimitry Andric // This is the formula where all the registers and symbols matched; 28820b57cec5SDimitry Andric // there aren't going to be any others. Since we declined it, we 28830b57cec5SDimitry Andric // can skip the rest of the formulae and proceed to the next LSRUse. 28840b57cec5SDimitry Andric break; 28850b57cec5SDimitry Andric } 28860b57cec5SDimitry Andric } 28870b57cec5SDimitry Andric } 28880b57cec5SDimitry Andric } 28890b57cec5SDimitry Andric 28900b57cec5SDimitry Andric // Nothing looked good. 28910b57cec5SDimitry Andric return nullptr; 28920b57cec5SDimitry Andric } 28930b57cec5SDimitry Andric 28940b57cec5SDimitry Andric void LSRInstance::CollectInterestingTypesAndFactors() { 28950b57cec5SDimitry Andric SmallSetVector<const SCEV *, 4> Strides; 28960b57cec5SDimitry Andric 28970b57cec5SDimitry Andric // Collect interesting types and strides. 28980b57cec5SDimitry Andric SmallVector<const SCEV *, 4> Worklist; 28990b57cec5SDimitry Andric for (const IVStrideUse &U : IU) { 29000b57cec5SDimitry Andric const SCEV *Expr = IU.getExpr(U); 290106c3fb27SDimitry Andric if (!Expr) 290206c3fb27SDimitry Andric continue; 29030b57cec5SDimitry Andric 29040b57cec5SDimitry Andric // Collect interesting types. 29050b57cec5SDimitry Andric Types.insert(SE.getEffectiveSCEVType(Expr->getType())); 29060b57cec5SDimitry Andric 29070b57cec5SDimitry Andric // Add strides for mentioned loops. 29080b57cec5SDimitry Andric Worklist.push_back(Expr); 29090b57cec5SDimitry Andric do { 29100b57cec5SDimitry Andric const SCEV *S = Worklist.pop_back_val(); 29110b57cec5SDimitry Andric if (const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(S)) { 29120b57cec5SDimitry Andric if (AR->getLoop() == L) 29130b57cec5SDimitry Andric Strides.insert(AR->getStepRecurrence(SE)); 29140b57cec5SDimitry Andric Worklist.push_back(AR->getStart()); 29150b57cec5SDimitry Andric } else if (const SCEVAddExpr *Add = dyn_cast<SCEVAddExpr>(S)) { 2916bdd1243dSDimitry Andric append_range(Worklist, Add->operands()); 29170b57cec5SDimitry Andric } 29180b57cec5SDimitry Andric } while (!Worklist.empty()); 29190b57cec5SDimitry Andric } 29200b57cec5SDimitry Andric 29210b57cec5SDimitry Andric // Compute interesting factors from the set of interesting strides. 29220b57cec5SDimitry Andric for (SmallSetVector<const SCEV *, 4>::const_iterator 29230b57cec5SDimitry Andric I = Strides.begin(), E = Strides.end(); I != E; ++I) 29240b57cec5SDimitry Andric for (SmallSetVector<const SCEV *, 4>::const_iterator NewStrideIter = 29250b57cec5SDimitry Andric std::next(I); NewStrideIter != E; ++NewStrideIter) { 29260b57cec5SDimitry Andric const SCEV *OldStride = *I; 29270b57cec5SDimitry Andric const SCEV *NewStride = *NewStrideIter; 29280b57cec5SDimitry Andric 29290b57cec5SDimitry Andric if (SE.getTypeSizeInBits(OldStride->getType()) != 29300b57cec5SDimitry Andric SE.getTypeSizeInBits(NewStride->getType())) { 29310b57cec5SDimitry Andric if (SE.getTypeSizeInBits(OldStride->getType()) > 29320b57cec5SDimitry Andric SE.getTypeSizeInBits(NewStride->getType())) 29330b57cec5SDimitry Andric NewStride = SE.getSignExtendExpr(NewStride, OldStride->getType()); 29340b57cec5SDimitry Andric else 29350b57cec5SDimitry Andric OldStride = SE.getSignExtendExpr(OldStride, NewStride->getType()); 29360b57cec5SDimitry Andric } 29370b57cec5SDimitry Andric if (const SCEVConstant *Factor = 29380b57cec5SDimitry Andric dyn_cast_or_null<SCEVConstant>(getExactSDiv(NewStride, OldStride, 29390b57cec5SDimitry Andric SE, true))) { 294006c3fb27SDimitry Andric if (Factor->getAPInt().getSignificantBits() <= 64 && !Factor->isZero()) 29410b57cec5SDimitry Andric Factors.insert(Factor->getAPInt().getSExtValue()); 29420b57cec5SDimitry Andric } else if (const SCEVConstant *Factor = 29430b57cec5SDimitry Andric dyn_cast_or_null<SCEVConstant>(getExactSDiv(OldStride, 29440b57cec5SDimitry Andric NewStride, 29450b57cec5SDimitry Andric SE, true))) { 294606c3fb27SDimitry Andric if (Factor->getAPInt().getSignificantBits() <= 64 && !Factor->isZero()) 29470b57cec5SDimitry Andric Factors.insert(Factor->getAPInt().getSExtValue()); 29480b57cec5SDimitry Andric } 29490b57cec5SDimitry Andric } 29500b57cec5SDimitry Andric 29510b57cec5SDimitry Andric // If all uses use the same type, don't bother looking for truncation-based 29520b57cec5SDimitry Andric // reuse. 29530b57cec5SDimitry Andric if (Types.size() == 1) 29540b57cec5SDimitry Andric Types.clear(); 29550b57cec5SDimitry Andric 29560b57cec5SDimitry Andric LLVM_DEBUG(print_factors_and_types(dbgs())); 29570b57cec5SDimitry Andric } 29580b57cec5SDimitry Andric 29590b57cec5SDimitry Andric /// Helper for CollectChains that finds an IV operand (computed by an AddRec in 29600b57cec5SDimitry Andric /// this loop) within [OI,OE) or returns OE. If IVUsers mapped Instructions to 29610b57cec5SDimitry Andric /// IVStrideUses, we could partially skip this. 29620b57cec5SDimitry Andric static User::op_iterator 29630b57cec5SDimitry Andric findIVOperand(User::op_iterator OI, User::op_iterator OE, 29640b57cec5SDimitry Andric Loop *L, ScalarEvolution &SE) { 29650b57cec5SDimitry Andric for(; OI != OE; ++OI) { 29660b57cec5SDimitry Andric if (Instruction *Oper = dyn_cast<Instruction>(*OI)) { 29670b57cec5SDimitry Andric if (!SE.isSCEVable(Oper->getType())) 29680b57cec5SDimitry Andric continue; 29690b57cec5SDimitry Andric 29700b57cec5SDimitry Andric if (const SCEVAddRecExpr *AR = 29710b57cec5SDimitry Andric dyn_cast<SCEVAddRecExpr>(SE.getSCEV(Oper))) { 29720b57cec5SDimitry Andric if (AR->getLoop() == L) 29730b57cec5SDimitry Andric break; 29740b57cec5SDimitry Andric } 29750b57cec5SDimitry Andric } 29760b57cec5SDimitry Andric } 29770b57cec5SDimitry Andric return OI; 29780b57cec5SDimitry Andric } 29790b57cec5SDimitry Andric 29800b57cec5SDimitry Andric /// IVChain logic must consistently peek base TruncInst operands, so wrap it in 29810b57cec5SDimitry Andric /// a convenient helper. 29820b57cec5SDimitry Andric static Value *getWideOperand(Value *Oper) { 29830b57cec5SDimitry Andric if (TruncInst *Trunc = dyn_cast<TruncInst>(Oper)) 29840b57cec5SDimitry Andric return Trunc->getOperand(0); 29850b57cec5SDimitry Andric return Oper; 29860b57cec5SDimitry Andric } 29870b57cec5SDimitry Andric 29880b57cec5SDimitry Andric /// Return an approximation of this SCEV expression's "base", or NULL for any 29890b57cec5SDimitry Andric /// constant. Returning the expression itself is conservative. Returning a 29900b57cec5SDimitry Andric /// deeper subexpression is more precise and valid as long as it isn't less 29910b57cec5SDimitry Andric /// complex than another subexpression. For expressions involving multiple 29920b57cec5SDimitry Andric /// unscaled values, we need to return the pointer-type SCEVUnknown. This avoids 29930b57cec5SDimitry Andric /// forming chains across objects, such as: PrevOper==a[i], IVOper==b[i], 29940b57cec5SDimitry Andric /// IVInc==b-a. 29950b57cec5SDimitry Andric /// 29960b57cec5SDimitry Andric /// Since SCEVUnknown is the rightmost type, and pointers are the rightmost 29970b57cec5SDimitry Andric /// SCEVUnknown, we simply return the rightmost SCEV operand. 29980b57cec5SDimitry Andric static const SCEV *getExprBase(const SCEV *S) { 29990b57cec5SDimitry Andric switch (S->getSCEVType()) { 300006c3fb27SDimitry Andric default: // including scUnknown. 30010b57cec5SDimitry Andric return S; 30020b57cec5SDimitry Andric case scConstant: 300306c3fb27SDimitry Andric case scVScale: 30040b57cec5SDimitry Andric return nullptr; 30050b57cec5SDimitry Andric case scTruncate: 30060b57cec5SDimitry Andric return getExprBase(cast<SCEVTruncateExpr>(S)->getOperand()); 30070b57cec5SDimitry Andric case scZeroExtend: 30080b57cec5SDimitry Andric return getExprBase(cast<SCEVZeroExtendExpr>(S)->getOperand()); 30090b57cec5SDimitry Andric case scSignExtend: 30100b57cec5SDimitry Andric return getExprBase(cast<SCEVSignExtendExpr>(S)->getOperand()); 30110b57cec5SDimitry Andric case scAddExpr: { 30120b57cec5SDimitry Andric // Skip over scaled operands (scMulExpr) to follow add operands as long as 30130b57cec5SDimitry Andric // there's nothing more complex. 30140b57cec5SDimitry Andric // FIXME: not sure if we want to recognize negation. 30150b57cec5SDimitry Andric const SCEVAddExpr *Add = cast<SCEVAddExpr>(S); 3016349cc55cSDimitry Andric for (const SCEV *SubExpr : reverse(Add->operands())) { 30170b57cec5SDimitry Andric if (SubExpr->getSCEVType() == scAddExpr) 30180b57cec5SDimitry Andric return getExprBase(SubExpr); 30190b57cec5SDimitry Andric 30200b57cec5SDimitry Andric if (SubExpr->getSCEVType() != scMulExpr) 30210b57cec5SDimitry Andric return SubExpr; 30220b57cec5SDimitry Andric } 30230b57cec5SDimitry Andric return S; // all operands are scaled, be conservative. 30240b57cec5SDimitry Andric } 30250b57cec5SDimitry Andric case scAddRecExpr: 30260b57cec5SDimitry Andric return getExprBase(cast<SCEVAddRecExpr>(S)->getStart()); 30270b57cec5SDimitry Andric } 3028e8d8bef9SDimitry Andric llvm_unreachable("Unknown SCEV kind!"); 30290b57cec5SDimitry Andric } 30300b57cec5SDimitry Andric 30310b57cec5SDimitry Andric /// Return true if the chain increment is profitable to expand into a loop 30320b57cec5SDimitry Andric /// invariant value, which may require its own register. A profitable chain 30330b57cec5SDimitry Andric /// increment will be an offset relative to the same base. We allow such offsets 30340b57cec5SDimitry Andric /// to potentially be used as chain increment as long as it's not obviously 30350b57cec5SDimitry Andric /// expensive to expand using real instructions. 30360b57cec5SDimitry Andric bool IVChain::isProfitableIncrement(const SCEV *OperExpr, 30370b57cec5SDimitry Andric const SCEV *IncExpr, 30380b57cec5SDimitry Andric ScalarEvolution &SE) { 30390b57cec5SDimitry Andric // Aggressively form chains when -stress-ivchain. 30400b57cec5SDimitry Andric if (StressIVChain) 30410b57cec5SDimitry Andric return true; 30420b57cec5SDimitry Andric 30430b57cec5SDimitry Andric // Do not replace a constant offset from IV head with a nonconstant IV 30440b57cec5SDimitry Andric // increment. 30450b57cec5SDimitry Andric if (!isa<SCEVConstant>(IncExpr)) { 30460b57cec5SDimitry Andric const SCEV *HeadExpr = SE.getSCEV(getWideOperand(Incs[0].IVOperand)); 30470b57cec5SDimitry Andric if (isa<SCEVConstant>(SE.getMinusSCEV(OperExpr, HeadExpr))) 30480b57cec5SDimitry Andric return false; 30490b57cec5SDimitry Andric } 30500b57cec5SDimitry Andric 30510b57cec5SDimitry Andric SmallPtrSet<const SCEV*, 8> Processed; 30520b57cec5SDimitry Andric return !isHighCostExpansion(IncExpr, Processed, SE); 30530b57cec5SDimitry Andric } 30540b57cec5SDimitry Andric 30550b57cec5SDimitry Andric /// Return true if the number of registers needed for the chain is estimated to 30560b57cec5SDimitry Andric /// be less than the number required for the individual IV users. First prohibit 30570b57cec5SDimitry Andric /// any IV users that keep the IV live across increments (the Users set should 30580b57cec5SDimitry Andric /// be empty). Next count the number and type of increments in the chain. 30590b57cec5SDimitry Andric /// 30600b57cec5SDimitry Andric /// Chaining IVs can lead to considerable code bloat if ISEL doesn't 30610b57cec5SDimitry Andric /// effectively use postinc addressing modes. Only consider it profitable it the 30620b57cec5SDimitry Andric /// increments can be computed in fewer registers when chained. 30630b57cec5SDimitry Andric /// 30640b57cec5SDimitry Andric /// TODO: Consider IVInc free if it's already used in another chains. 30655ffd83dbSDimitry Andric static bool isProfitableChain(IVChain &Chain, 30665ffd83dbSDimitry Andric SmallPtrSetImpl<Instruction *> &Users, 30675ffd83dbSDimitry Andric ScalarEvolution &SE, 30685ffd83dbSDimitry Andric const TargetTransformInfo &TTI) { 30690b57cec5SDimitry Andric if (StressIVChain) 30700b57cec5SDimitry Andric return true; 30710b57cec5SDimitry Andric 30720b57cec5SDimitry Andric if (!Chain.hasIncs()) 30730b57cec5SDimitry Andric return false; 30740b57cec5SDimitry Andric 30750b57cec5SDimitry Andric if (!Users.empty()) { 30760b57cec5SDimitry Andric LLVM_DEBUG(dbgs() << "Chain: " << *Chain.Incs[0].UserInst << " users:\n"; 30770b57cec5SDimitry Andric for (Instruction *Inst 30780b57cec5SDimitry Andric : Users) { dbgs() << " " << *Inst << "\n"; }); 30790b57cec5SDimitry Andric return false; 30800b57cec5SDimitry Andric } 30810b57cec5SDimitry Andric assert(!Chain.Incs.empty() && "empty IV chains are not allowed"); 30820b57cec5SDimitry Andric 30830b57cec5SDimitry Andric // The chain itself may require a register, so intialize cost to 1. 30840b57cec5SDimitry Andric int cost = 1; 30850b57cec5SDimitry Andric 30860b57cec5SDimitry Andric // A complete chain likely eliminates the need for keeping the original IV in 30870b57cec5SDimitry Andric // a register. LSR does not currently know how to form a complete chain unless 30880b57cec5SDimitry Andric // the header phi already exists. 30890b57cec5SDimitry Andric if (isa<PHINode>(Chain.tailUserInst()) 30900b57cec5SDimitry Andric && SE.getSCEV(Chain.tailUserInst()) == Chain.Incs[0].IncExpr) { 30910b57cec5SDimitry Andric --cost; 30920b57cec5SDimitry Andric } 30930b57cec5SDimitry Andric const SCEV *LastIncExpr = nullptr; 30940b57cec5SDimitry Andric unsigned NumConstIncrements = 0; 30950b57cec5SDimitry Andric unsigned NumVarIncrements = 0; 30960b57cec5SDimitry Andric unsigned NumReusedIncrements = 0; 30975ffd83dbSDimitry Andric 30985ffd83dbSDimitry Andric if (TTI.isProfitableLSRChainElement(Chain.Incs[0].UserInst)) 30995ffd83dbSDimitry Andric return true; 31005ffd83dbSDimitry Andric 31010b57cec5SDimitry Andric for (const IVInc &Inc : Chain) { 31025ffd83dbSDimitry Andric if (TTI.isProfitableLSRChainElement(Inc.UserInst)) 31035ffd83dbSDimitry Andric return true; 31040b57cec5SDimitry Andric if (Inc.IncExpr->isZero()) 31050b57cec5SDimitry Andric continue; 31060b57cec5SDimitry Andric 31070b57cec5SDimitry Andric // Incrementing by zero or some constant is neutral. We assume constants can 31080b57cec5SDimitry Andric // be folded into an addressing mode or an add's immediate operand. 31090b57cec5SDimitry Andric if (isa<SCEVConstant>(Inc.IncExpr)) { 31100b57cec5SDimitry Andric ++NumConstIncrements; 31110b57cec5SDimitry Andric continue; 31120b57cec5SDimitry Andric } 31130b57cec5SDimitry Andric 31140b57cec5SDimitry Andric if (Inc.IncExpr == LastIncExpr) 31150b57cec5SDimitry Andric ++NumReusedIncrements; 31160b57cec5SDimitry Andric else 31170b57cec5SDimitry Andric ++NumVarIncrements; 31180b57cec5SDimitry Andric 31190b57cec5SDimitry Andric LastIncExpr = Inc.IncExpr; 31200b57cec5SDimitry Andric } 31210b57cec5SDimitry Andric // An IV chain with a single increment is handled by LSR's postinc 31220b57cec5SDimitry Andric // uses. However, a chain with multiple increments requires keeping the IV's 31230b57cec5SDimitry Andric // value live longer than it needs to be if chained. 31240b57cec5SDimitry Andric if (NumConstIncrements > 1) 31250b57cec5SDimitry Andric --cost; 31260b57cec5SDimitry Andric 31270b57cec5SDimitry Andric // Materializing increment expressions in the preheader that didn't exist in 31280b57cec5SDimitry Andric // the original code may cost a register. For example, sign-extended array 31290b57cec5SDimitry Andric // indices can produce ridiculous increments like this: 31300b57cec5SDimitry Andric // IV + ((sext i32 (2 * %s) to i64) + (-1 * (sext i32 %s to i64))) 31310b57cec5SDimitry Andric cost += NumVarIncrements; 31320b57cec5SDimitry Andric 31330b57cec5SDimitry Andric // Reusing variable increments likely saves a register to hold the multiple of 31340b57cec5SDimitry Andric // the stride. 31350b57cec5SDimitry Andric cost -= NumReusedIncrements; 31360b57cec5SDimitry Andric 31370b57cec5SDimitry Andric LLVM_DEBUG(dbgs() << "Chain: " << *Chain.Incs[0].UserInst << " Cost: " << cost 31380b57cec5SDimitry Andric << "\n"); 31390b57cec5SDimitry Andric 31400b57cec5SDimitry Andric return cost < 0; 31410b57cec5SDimitry Andric } 31420b57cec5SDimitry Andric 31430b57cec5SDimitry Andric /// Add this IV user to an existing chain or make it the head of a new chain. 31440b57cec5SDimitry Andric void LSRInstance::ChainInstruction(Instruction *UserInst, Instruction *IVOper, 31450b57cec5SDimitry Andric SmallVectorImpl<ChainUsers> &ChainUsersVec) { 31460b57cec5SDimitry Andric // When IVs are used as types of varying widths, they are generally converted 31470b57cec5SDimitry Andric // to a wider type with some uses remaining narrow under a (free) trunc. 31480b57cec5SDimitry Andric Value *const NextIV = getWideOperand(IVOper); 31490b57cec5SDimitry Andric const SCEV *const OperExpr = SE.getSCEV(NextIV); 31500b57cec5SDimitry Andric const SCEV *const OperExprBase = getExprBase(OperExpr); 31510b57cec5SDimitry Andric 31520b57cec5SDimitry Andric // Visit all existing chains. Check if its IVOper can be computed as a 31530b57cec5SDimitry Andric // profitable loop invariant increment from the last link in the Chain. 31540b57cec5SDimitry Andric unsigned ChainIdx = 0, NChains = IVChainVec.size(); 31550b57cec5SDimitry Andric const SCEV *LastIncExpr = nullptr; 31560b57cec5SDimitry Andric for (; ChainIdx < NChains; ++ChainIdx) { 31570b57cec5SDimitry Andric IVChain &Chain = IVChainVec[ChainIdx]; 31580b57cec5SDimitry Andric 31590b57cec5SDimitry Andric // Prune the solution space aggressively by checking that both IV operands 31600b57cec5SDimitry Andric // are expressions that operate on the same unscaled SCEVUnknown. This 31610b57cec5SDimitry Andric // "base" will be canceled by the subsequent getMinusSCEV call. Checking 31620b57cec5SDimitry Andric // first avoids creating extra SCEV expressions. 31630b57cec5SDimitry Andric if (!StressIVChain && Chain.ExprBase != OperExprBase) 31640b57cec5SDimitry Andric continue; 31650b57cec5SDimitry Andric 31660b57cec5SDimitry Andric Value *PrevIV = getWideOperand(Chain.Incs.back().IVOperand); 31675f757f3fSDimitry Andric if (PrevIV->getType() != NextIV->getType()) 31680b57cec5SDimitry Andric continue; 31690b57cec5SDimitry Andric 31700b57cec5SDimitry Andric // A phi node terminates a chain. 31710b57cec5SDimitry Andric if (isa<PHINode>(UserInst) && isa<PHINode>(Chain.tailUserInst())) 31720b57cec5SDimitry Andric continue; 31730b57cec5SDimitry Andric 31740b57cec5SDimitry Andric // The increment must be loop-invariant so it can be kept in a register. 31750b57cec5SDimitry Andric const SCEV *PrevExpr = SE.getSCEV(PrevIV); 31760b57cec5SDimitry Andric const SCEV *IncExpr = SE.getMinusSCEV(OperExpr, PrevExpr); 3177fe6060f1SDimitry Andric if (isa<SCEVCouldNotCompute>(IncExpr) || !SE.isLoopInvariant(IncExpr, L)) 31780b57cec5SDimitry Andric continue; 31790b57cec5SDimitry Andric 31800b57cec5SDimitry Andric if (Chain.isProfitableIncrement(OperExpr, IncExpr, SE)) { 31810b57cec5SDimitry Andric LastIncExpr = IncExpr; 31820b57cec5SDimitry Andric break; 31830b57cec5SDimitry Andric } 31840b57cec5SDimitry Andric } 31850b57cec5SDimitry Andric // If we haven't found a chain, create a new one, unless we hit the max. Don't 31860b57cec5SDimitry Andric // bother for phi nodes, because they must be last in the chain. 31870b57cec5SDimitry Andric if (ChainIdx == NChains) { 31880b57cec5SDimitry Andric if (isa<PHINode>(UserInst)) 31890b57cec5SDimitry Andric return; 31900b57cec5SDimitry Andric if (NChains >= MaxChains && !StressIVChain) { 31910b57cec5SDimitry Andric LLVM_DEBUG(dbgs() << "IV Chain Limit\n"); 31920b57cec5SDimitry Andric return; 31930b57cec5SDimitry Andric } 31940b57cec5SDimitry Andric LastIncExpr = OperExpr; 31950b57cec5SDimitry Andric // IVUsers may have skipped over sign/zero extensions. We don't currently 31960b57cec5SDimitry Andric // attempt to form chains involving extensions unless they can be hoisted 31970b57cec5SDimitry Andric // into this loop's AddRec. 31980b57cec5SDimitry Andric if (!isa<SCEVAddRecExpr>(LastIncExpr)) 31990b57cec5SDimitry Andric return; 32000b57cec5SDimitry Andric ++NChains; 32010b57cec5SDimitry Andric IVChainVec.push_back(IVChain(IVInc(UserInst, IVOper, LastIncExpr), 32020b57cec5SDimitry Andric OperExprBase)); 32030b57cec5SDimitry Andric ChainUsersVec.resize(NChains); 32040b57cec5SDimitry Andric LLVM_DEBUG(dbgs() << "IV Chain#" << ChainIdx << " Head: (" << *UserInst 32050b57cec5SDimitry Andric << ") IV=" << *LastIncExpr << "\n"); 32060b57cec5SDimitry Andric } else { 32070b57cec5SDimitry Andric LLVM_DEBUG(dbgs() << "IV Chain#" << ChainIdx << " Inc: (" << *UserInst 32080b57cec5SDimitry Andric << ") IV+" << *LastIncExpr << "\n"); 32090b57cec5SDimitry Andric // Add this IV user to the end of the chain. 32100b57cec5SDimitry Andric IVChainVec[ChainIdx].add(IVInc(UserInst, IVOper, LastIncExpr)); 32110b57cec5SDimitry Andric } 32120b57cec5SDimitry Andric IVChain &Chain = IVChainVec[ChainIdx]; 32130b57cec5SDimitry Andric 32140b57cec5SDimitry Andric SmallPtrSet<Instruction*,4> &NearUsers = ChainUsersVec[ChainIdx].NearUsers; 32150b57cec5SDimitry Andric // This chain's NearUsers become FarUsers. 32160b57cec5SDimitry Andric if (!LastIncExpr->isZero()) { 32170b57cec5SDimitry Andric ChainUsersVec[ChainIdx].FarUsers.insert(NearUsers.begin(), 32180b57cec5SDimitry Andric NearUsers.end()); 32190b57cec5SDimitry Andric NearUsers.clear(); 32200b57cec5SDimitry Andric } 32210b57cec5SDimitry Andric 32220b57cec5SDimitry Andric // All other uses of IVOperand become near uses of the chain. 32230b57cec5SDimitry Andric // We currently ignore intermediate values within SCEV expressions, assuming 32240b57cec5SDimitry Andric // they will eventually be used be the current chain, or can be computed 32250b57cec5SDimitry Andric // from one of the chain increments. To be more precise we could 32260b57cec5SDimitry Andric // transitively follow its user and only add leaf IV users to the set. 32270b57cec5SDimitry Andric for (User *U : IVOper->users()) { 32280b57cec5SDimitry Andric Instruction *OtherUse = dyn_cast<Instruction>(U); 32290b57cec5SDimitry Andric if (!OtherUse) 32300b57cec5SDimitry Andric continue; 32310b57cec5SDimitry Andric // Uses in the chain will no longer be uses if the chain is formed. 32320b57cec5SDimitry Andric // Include the head of the chain in this iteration (not Chain.begin()). 32330b57cec5SDimitry Andric IVChain::const_iterator IncIter = Chain.Incs.begin(); 32340b57cec5SDimitry Andric IVChain::const_iterator IncEnd = Chain.Incs.end(); 32350b57cec5SDimitry Andric for( ; IncIter != IncEnd; ++IncIter) { 32360b57cec5SDimitry Andric if (IncIter->UserInst == OtherUse) 32370b57cec5SDimitry Andric break; 32380b57cec5SDimitry Andric } 32390b57cec5SDimitry Andric if (IncIter != IncEnd) 32400b57cec5SDimitry Andric continue; 32410b57cec5SDimitry Andric 32420b57cec5SDimitry Andric if (SE.isSCEVable(OtherUse->getType()) 32430b57cec5SDimitry Andric && !isa<SCEVUnknown>(SE.getSCEV(OtherUse)) 32440b57cec5SDimitry Andric && IU.isIVUserOrOperand(OtherUse)) { 32450b57cec5SDimitry Andric continue; 32460b57cec5SDimitry Andric } 32470b57cec5SDimitry Andric NearUsers.insert(OtherUse); 32480b57cec5SDimitry Andric } 32490b57cec5SDimitry Andric 32500b57cec5SDimitry Andric // Since this user is part of the chain, it's no longer considered a use 32510b57cec5SDimitry Andric // of the chain. 32520b57cec5SDimitry Andric ChainUsersVec[ChainIdx].FarUsers.erase(UserInst); 32530b57cec5SDimitry Andric } 32540b57cec5SDimitry Andric 32550b57cec5SDimitry Andric /// Populate the vector of Chains. 32560b57cec5SDimitry Andric /// 32570b57cec5SDimitry Andric /// This decreases ILP at the architecture level. Targets with ample registers, 32580b57cec5SDimitry Andric /// multiple memory ports, and no register renaming probably don't want 32590b57cec5SDimitry Andric /// this. However, such targets should probably disable LSR altogether. 32600b57cec5SDimitry Andric /// 32610b57cec5SDimitry Andric /// The job of LSR is to make a reasonable choice of induction variables across 32620b57cec5SDimitry Andric /// the loop. Subsequent passes can easily "unchain" computation exposing more 32630b57cec5SDimitry Andric /// ILP *within the loop* if the target wants it. 32640b57cec5SDimitry Andric /// 32650b57cec5SDimitry Andric /// Finding the best IV chain is potentially a scheduling problem. Since LSR 32660b57cec5SDimitry Andric /// will not reorder memory operations, it will recognize this as a chain, but 32670b57cec5SDimitry Andric /// will generate redundant IV increments. Ideally this would be corrected later 32680b57cec5SDimitry Andric /// by a smart scheduler: 32690b57cec5SDimitry Andric /// = A[i] 32700b57cec5SDimitry Andric /// = A[i+x] 32710b57cec5SDimitry Andric /// A[i] = 32720b57cec5SDimitry Andric /// A[i+x] = 32730b57cec5SDimitry Andric /// 32740b57cec5SDimitry Andric /// TODO: Walk the entire domtree within this loop, not just the path to the 32750b57cec5SDimitry Andric /// loop latch. This will discover chains on side paths, but requires 32760b57cec5SDimitry Andric /// maintaining multiple copies of the Chains state. 32770b57cec5SDimitry Andric void LSRInstance::CollectChains() { 32780b57cec5SDimitry Andric LLVM_DEBUG(dbgs() << "Collecting IV Chains.\n"); 32790b57cec5SDimitry Andric SmallVector<ChainUsers, 8> ChainUsersVec; 32800b57cec5SDimitry Andric 32810b57cec5SDimitry Andric SmallVector<BasicBlock *,8> LatchPath; 32820b57cec5SDimitry Andric BasicBlock *LoopHeader = L->getHeader(); 32830b57cec5SDimitry Andric for (DomTreeNode *Rung = DT.getNode(L->getLoopLatch()); 32840b57cec5SDimitry Andric Rung->getBlock() != LoopHeader; Rung = Rung->getIDom()) { 32850b57cec5SDimitry Andric LatchPath.push_back(Rung->getBlock()); 32860b57cec5SDimitry Andric } 32870b57cec5SDimitry Andric LatchPath.push_back(LoopHeader); 32880b57cec5SDimitry Andric 32890b57cec5SDimitry Andric // Walk the instruction stream from the loop header to the loop latch. 32900b57cec5SDimitry Andric for (BasicBlock *BB : reverse(LatchPath)) { 32910b57cec5SDimitry Andric for (Instruction &I : *BB) { 32920b57cec5SDimitry Andric // Skip instructions that weren't seen by IVUsers analysis. 32930b57cec5SDimitry Andric if (isa<PHINode>(I) || !IU.isIVUserOrOperand(&I)) 32940b57cec5SDimitry Andric continue; 32950b57cec5SDimitry Andric 32960b57cec5SDimitry Andric // Ignore users that are part of a SCEV expression. This way we only 32970b57cec5SDimitry Andric // consider leaf IV Users. This effectively rediscovers a portion of 32980b57cec5SDimitry Andric // IVUsers analysis but in program order this time. 32990b57cec5SDimitry Andric if (SE.isSCEVable(I.getType()) && !isa<SCEVUnknown>(SE.getSCEV(&I))) 33000b57cec5SDimitry Andric continue; 33010b57cec5SDimitry Andric 33020b57cec5SDimitry Andric // Remove this instruction from any NearUsers set it may be in. 33030b57cec5SDimitry Andric for (unsigned ChainIdx = 0, NChains = IVChainVec.size(); 33040b57cec5SDimitry Andric ChainIdx < NChains; ++ChainIdx) { 33050b57cec5SDimitry Andric ChainUsersVec[ChainIdx].NearUsers.erase(&I); 33060b57cec5SDimitry Andric } 33070b57cec5SDimitry Andric // Search for operands that can be chained. 33080b57cec5SDimitry Andric SmallPtrSet<Instruction*, 4> UniqueOperands; 33090b57cec5SDimitry Andric User::op_iterator IVOpEnd = I.op_end(); 33100b57cec5SDimitry Andric User::op_iterator IVOpIter = findIVOperand(I.op_begin(), IVOpEnd, L, SE); 33110b57cec5SDimitry Andric while (IVOpIter != IVOpEnd) { 33120b57cec5SDimitry Andric Instruction *IVOpInst = cast<Instruction>(*IVOpIter); 33130b57cec5SDimitry Andric if (UniqueOperands.insert(IVOpInst).second) 33140b57cec5SDimitry Andric ChainInstruction(&I, IVOpInst, ChainUsersVec); 33150b57cec5SDimitry Andric IVOpIter = findIVOperand(std::next(IVOpIter), IVOpEnd, L, SE); 33160b57cec5SDimitry Andric } 33170b57cec5SDimitry Andric } // Continue walking down the instructions. 33180b57cec5SDimitry Andric } // Continue walking down the domtree. 33190b57cec5SDimitry Andric // Visit phi backedges to determine if the chain can generate the IV postinc. 33200b57cec5SDimitry Andric for (PHINode &PN : L->getHeader()->phis()) { 33210b57cec5SDimitry Andric if (!SE.isSCEVable(PN.getType())) 33220b57cec5SDimitry Andric continue; 33230b57cec5SDimitry Andric 33240b57cec5SDimitry Andric Instruction *IncV = 33250b57cec5SDimitry Andric dyn_cast<Instruction>(PN.getIncomingValueForBlock(L->getLoopLatch())); 33260b57cec5SDimitry Andric if (IncV) 33270b57cec5SDimitry Andric ChainInstruction(&PN, IncV, ChainUsersVec); 33280b57cec5SDimitry Andric } 33290b57cec5SDimitry Andric // Remove any unprofitable chains. 33300b57cec5SDimitry Andric unsigned ChainIdx = 0; 33310b57cec5SDimitry Andric for (unsigned UsersIdx = 0, NChains = IVChainVec.size(); 33320b57cec5SDimitry Andric UsersIdx < NChains; ++UsersIdx) { 33330b57cec5SDimitry Andric if (!isProfitableChain(IVChainVec[UsersIdx], 33345ffd83dbSDimitry Andric ChainUsersVec[UsersIdx].FarUsers, SE, TTI)) 33350b57cec5SDimitry Andric continue; 33360b57cec5SDimitry Andric // Preserve the chain at UsesIdx. 33370b57cec5SDimitry Andric if (ChainIdx != UsersIdx) 33380b57cec5SDimitry Andric IVChainVec[ChainIdx] = IVChainVec[UsersIdx]; 33390b57cec5SDimitry Andric FinalizeChain(IVChainVec[ChainIdx]); 33400b57cec5SDimitry Andric ++ChainIdx; 33410b57cec5SDimitry Andric } 33420b57cec5SDimitry Andric IVChainVec.resize(ChainIdx); 33430b57cec5SDimitry Andric } 33440b57cec5SDimitry Andric 33450b57cec5SDimitry Andric void LSRInstance::FinalizeChain(IVChain &Chain) { 33460b57cec5SDimitry Andric assert(!Chain.Incs.empty() && "empty IV chains are not allowed"); 33470b57cec5SDimitry Andric LLVM_DEBUG(dbgs() << "Final Chain: " << *Chain.Incs[0].UserInst << "\n"); 33480b57cec5SDimitry Andric 33490b57cec5SDimitry Andric for (const IVInc &Inc : Chain) { 33500b57cec5SDimitry Andric LLVM_DEBUG(dbgs() << " Inc: " << *Inc.UserInst << "\n"); 33510b57cec5SDimitry Andric auto UseI = find(Inc.UserInst->operands(), Inc.IVOperand); 33520b57cec5SDimitry Andric assert(UseI != Inc.UserInst->op_end() && "cannot find IV operand"); 33530b57cec5SDimitry Andric IVIncSet.insert(UseI); 33540b57cec5SDimitry Andric } 33550b57cec5SDimitry Andric } 33560b57cec5SDimitry Andric 33570b57cec5SDimitry Andric /// Return true if the IVInc can be folded into an addressing mode. 33580b57cec5SDimitry Andric static bool canFoldIVIncExpr(const SCEV *IncExpr, Instruction *UserInst, 33590b57cec5SDimitry Andric Value *Operand, const TargetTransformInfo &TTI) { 33600b57cec5SDimitry Andric const SCEVConstant *IncConst = dyn_cast<SCEVConstant>(IncExpr); 33610fca6ea1SDimitry Andric Immediate IncOffset = Immediate::getZero(); 33620fca6ea1SDimitry Andric if (IncConst) { 33630fca6ea1SDimitry Andric if (IncConst && IncConst->getAPInt().getSignificantBits() > 64) 33640b57cec5SDimitry Andric return false; 33650fca6ea1SDimitry Andric IncOffset = Immediate::getFixed(IncConst->getValue()->getSExtValue()); 33660fca6ea1SDimitry Andric } else { 33670fca6ea1SDimitry Andric // Look for mul(vscale, constant), to detect a scalable offset. 33680fca6ea1SDimitry Andric auto *IncVScale = dyn_cast<SCEVMulExpr>(IncExpr); 33690fca6ea1SDimitry Andric if (!IncVScale || IncVScale->getNumOperands() != 2 || 33700fca6ea1SDimitry Andric !isa<SCEVVScale>(IncVScale->getOperand(1))) 33710fca6ea1SDimitry Andric return false; 33720fca6ea1SDimitry Andric auto *Scale = dyn_cast<SCEVConstant>(IncVScale->getOperand(0)); 33730fca6ea1SDimitry Andric if (!Scale || Scale->getType()->getScalarSizeInBits() > 64) 33740fca6ea1SDimitry Andric return false; 33750fca6ea1SDimitry Andric IncOffset = Immediate::getScalable(Scale->getValue()->getSExtValue()); 33760fca6ea1SDimitry Andric } 33770b57cec5SDimitry Andric 33780fca6ea1SDimitry Andric if (!isAddressUse(TTI, UserInst, Operand)) 33790b57cec5SDimitry Andric return false; 33800b57cec5SDimitry Andric 33810b57cec5SDimitry Andric MemAccessTy AccessTy = getAccessType(TTI, UserInst, Operand); 33820b57cec5SDimitry Andric if (!isAlwaysFoldable(TTI, LSRUse::Address, AccessTy, /*BaseGV=*/nullptr, 33830b57cec5SDimitry Andric IncOffset, /*HasBaseReg=*/false)) 33840b57cec5SDimitry Andric return false; 33850b57cec5SDimitry Andric 33860b57cec5SDimitry Andric return true; 33870b57cec5SDimitry Andric } 33880b57cec5SDimitry Andric 33890b57cec5SDimitry Andric /// Generate an add or subtract for each IVInc in a chain to materialize the IV 33900b57cec5SDimitry Andric /// user's operand from the previous IV user's operand. 3391fcaf7f86SDimitry Andric void LSRInstance::GenerateIVChain(const IVChain &Chain, 33920b57cec5SDimitry Andric SmallVectorImpl<WeakTrackingVH> &DeadInsts) { 33930b57cec5SDimitry Andric // Find the new IVOperand for the head of the chain. It may have been replaced 33940b57cec5SDimitry Andric // by LSR. 33950b57cec5SDimitry Andric const IVInc &Head = Chain.Incs[0]; 33960b57cec5SDimitry Andric User::op_iterator IVOpEnd = Head.UserInst->op_end(); 33970b57cec5SDimitry Andric // findIVOperand returns IVOpEnd if it can no longer find a valid IV user. 33980b57cec5SDimitry Andric User::op_iterator IVOpIter = findIVOperand(Head.UserInst->op_begin(), 33990b57cec5SDimitry Andric IVOpEnd, L, SE); 34000b57cec5SDimitry Andric Value *IVSrc = nullptr; 34010b57cec5SDimitry Andric while (IVOpIter != IVOpEnd) { 34020b57cec5SDimitry Andric IVSrc = getWideOperand(*IVOpIter); 34030b57cec5SDimitry Andric 34040b57cec5SDimitry Andric // If this operand computes the expression that the chain needs, we may use 34050b57cec5SDimitry Andric // it. (Check this after setting IVSrc which is used below.) 34060b57cec5SDimitry Andric // 34070b57cec5SDimitry Andric // Note that if Head.IncExpr is wider than IVSrc, then this phi is too 34080b57cec5SDimitry Andric // narrow for the chain, so we can no longer use it. We do allow using a 34090b57cec5SDimitry Andric // wider phi, assuming the LSR checked for free truncation. In that case we 34100b57cec5SDimitry Andric // should already have a truncate on this operand such that 34110b57cec5SDimitry Andric // getSCEV(IVSrc) == IncExpr. 34120b57cec5SDimitry Andric if (SE.getSCEV(*IVOpIter) == Head.IncExpr 34130b57cec5SDimitry Andric || SE.getSCEV(IVSrc) == Head.IncExpr) { 34140b57cec5SDimitry Andric break; 34150b57cec5SDimitry Andric } 34160b57cec5SDimitry Andric IVOpIter = findIVOperand(std::next(IVOpIter), IVOpEnd, L, SE); 34170b57cec5SDimitry Andric } 34180b57cec5SDimitry Andric if (IVOpIter == IVOpEnd) { 34190b57cec5SDimitry Andric // Gracefully give up on this chain. 34200b57cec5SDimitry Andric LLVM_DEBUG(dbgs() << "Concealed chain head: " << *Head.UserInst << "\n"); 34210b57cec5SDimitry Andric return; 34220b57cec5SDimitry Andric } 34238bcb0991SDimitry Andric assert(IVSrc && "Failed to find IV chain source"); 34240b57cec5SDimitry Andric 34250b57cec5SDimitry Andric LLVM_DEBUG(dbgs() << "Generate chain at: " << *IVSrc << "\n"); 34260b57cec5SDimitry Andric Type *IVTy = IVSrc->getType(); 34270b57cec5SDimitry Andric Type *IntTy = SE.getEffectiveSCEVType(IVTy); 34280b57cec5SDimitry Andric const SCEV *LeftOverExpr = nullptr; 34290fca6ea1SDimitry Andric const SCEV *Accum = SE.getZero(IntTy); 34300fca6ea1SDimitry Andric SmallVector<std::pair<const SCEV *, Value *>> Bases; 34310fca6ea1SDimitry Andric Bases.emplace_back(Accum, IVSrc); 34320fca6ea1SDimitry Andric 34330b57cec5SDimitry Andric for (const IVInc &Inc : Chain) { 34340b57cec5SDimitry Andric Instruction *InsertPt = Inc.UserInst; 34350b57cec5SDimitry Andric if (isa<PHINode>(InsertPt)) 34360b57cec5SDimitry Andric InsertPt = L->getLoopLatch()->getTerminator(); 34370b57cec5SDimitry Andric 34380b57cec5SDimitry Andric // IVOper will replace the current IV User's operand. IVSrc is the IV 34390b57cec5SDimitry Andric // value currently held in a register. 34400b57cec5SDimitry Andric Value *IVOper = IVSrc; 34410b57cec5SDimitry Andric if (!Inc.IncExpr->isZero()) { 34420b57cec5SDimitry Andric // IncExpr was the result of subtraction of two narrow values, so must 34430b57cec5SDimitry Andric // be signed. 34440b57cec5SDimitry Andric const SCEV *IncExpr = SE.getNoopOrSignExtend(Inc.IncExpr, IntTy); 34450fca6ea1SDimitry Andric Accum = SE.getAddExpr(Accum, IncExpr); 34460b57cec5SDimitry Andric LeftOverExpr = LeftOverExpr ? 34470b57cec5SDimitry Andric SE.getAddExpr(LeftOverExpr, IncExpr) : IncExpr; 34480b57cec5SDimitry Andric } 34490fca6ea1SDimitry Andric 34500fca6ea1SDimitry Andric // Look through each base to see if any can produce a nice addressing mode. 34510fca6ea1SDimitry Andric bool FoundBase = false; 34520fca6ea1SDimitry Andric for (auto [MapScev, MapIVOper] : reverse(Bases)) { 34530fca6ea1SDimitry Andric const SCEV *Remainder = SE.getMinusSCEV(Accum, MapScev); 34540fca6ea1SDimitry Andric if (canFoldIVIncExpr(Remainder, Inc.UserInst, Inc.IVOperand, TTI)) { 34550fca6ea1SDimitry Andric if (!Remainder->isZero()) { 34560fca6ea1SDimitry Andric Rewriter.clearPostInc(); 34570fca6ea1SDimitry Andric Value *IncV = Rewriter.expandCodeFor(Remainder, IntTy, InsertPt); 34580fca6ea1SDimitry Andric const SCEV *IVOperExpr = 34590fca6ea1SDimitry Andric SE.getAddExpr(SE.getUnknown(MapIVOper), SE.getUnknown(IncV)); 34600fca6ea1SDimitry Andric IVOper = Rewriter.expandCodeFor(IVOperExpr, IVTy, InsertPt); 34610fca6ea1SDimitry Andric } else { 34620fca6ea1SDimitry Andric IVOper = MapIVOper; 34630fca6ea1SDimitry Andric } 34640fca6ea1SDimitry Andric 34650fca6ea1SDimitry Andric FoundBase = true; 34660fca6ea1SDimitry Andric break; 34670fca6ea1SDimitry Andric } 34680fca6ea1SDimitry Andric } 34690fca6ea1SDimitry Andric if (!FoundBase && LeftOverExpr && !LeftOverExpr->isZero()) { 34700b57cec5SDimitry Andric // Expand the IV increment. 34710b57cec5SDimitry Andric Rewriter.clearPostInc(); 34720b57cec5SDimitry Andric Value *IncV = Rewriter.expandCodeFor(LeftOverExpr, IntTy, InsertPt); 34730b57cec5SDimitry Andric const SCEV *IVOperExpr = SE.getAddExpr(SE.getUnknown(IVSrc), 34740b57cec5SDimitry Andric SE.getUnknown(IncV)); 34750b57cec5SDimitry Andric IVOper = Rewriter.expandCodeFor(IVOperExpr, IVTy, InsertPt); 34760b57cec5SDimitry Andric 34770b57cec5SDimitry Andric // If an IV increment can't be folded, use it as the next IV value. 34780b57cec5SDimitry Andric if (!canFoldIVIncExpr(LeftOverExpr, Inc.UserInst, Inc.IVOperand, TTI)) { 34790b57cec5SDimitry Andric assert(IVTy == IVOper->getType() && "inconsistent IV increment type"); 34800fca6ea1SDimitry Andric Bases.emplace_back(Accum, IVOper); 34810b57cec5SDimitry Andric IVSrc = IVOper; 34820b57cec5SDimitry Andric LeftOverExpr = nullptr; 34830b57cec5SDimitry Andric } 34840b57cec5SDimitry Andric } 34850b57cec5SDimitry Andric Type *OperTy = Inc.IVOperand->getType(); 34860b57cec5SDimitry Andric if (IVTy != OperTy) { 34870b57cec5SDimitry Andric assert(SE.getTypeSizeInBits(IVTy) >= SE.getTypeSizeInBits(OperTy) && 34880b57cec5SDimitry Andric "cannot extend a chained IV"); 34890b57cec5SDimitry Andric IRBuilder<> Builder(InsertPt); 34900b57cec5SDimitry Andric IVOper = Builder.CreateTruncOrBitCast(IVOper, OperTy, "lsr.chain"); 34910b57cec5SDimitry Andric } 34920b57cec5SDimitry Andric Inc.UserInst->replaceUsesOfWith(Inc.IVOperand, IVOper); 34935ffd83dbSDimitry Andric if (auto *OperandIsInstr = dyn_cast<Instruction>(Inc.IVOperand)) 34945ffd83dbSDimitry Andric DeadInsts.emplace_back(OperandIsInstr); 34950b57cec5SDimitry Andric } 34960b57cec5SDimitry Andric // If LSR created a new, wider phi, we may also replace its postinc. We only 34970b57cec5SDimitry Andric // do this if we also found a wide value for the head of the chain. 34980b57cec5SDimitry Andric if (isa<PHINode>(Chain.tailUserInst())) { 34990b57cec5SDimitry Andric for (PHINode &Phi : L->getHeader()->phis()) { 35005f757f3fSDimitry Andric if (Phi.getType() != IVSrc->getType()) 35010b57cec5SDimitry Andric continue; 35020b57cec5SDimitry Andric Instruction *PostIncV = dyn_cast<Instruction>( 35030b57cec5SDimitry Andric Phi.getIncomingValueForBlock(L->getLoopLatch())); 35040b57cec5SDimitry Andric if (!PostIncV || (SE.getSCEV(PostIncV) != SE.getSCEV(IVSrc))) 35050b57cec5SDimitry Andric continue; 35060b57cec5SDimitry Andric Value *IVOper = IVSrc; 35070b57cec5SDimitry Andric Type *PostIncTy = PostIncV->getType(); 35080b57cec5SDimitry Andric if (IVTy != PostIncTy) { 35090b57cec5SDimitry Andric assert(PostIncTy->isPointerTy() && "mixing int/ptr IV types"); 35100b57cec5SDimitry Andric IRBuilder<> Builder(L->getLoopLatch()->getTerminator()); 35110b57cec5SDimitry Andric Builder.SetCurrentDebugLocation(PostIncV->getDebugLoc()); 35120b57cec5SDimitry Andric IVOper = Builder.CreatePointerCast(IVSrc, PostIncTy, "lsr.chain"); 35130b57cec5SDimitry Andric } 35140b57cec5SDimitry Andric Phi.replaceUsesOfWith(PostIncV, IVOper); 35150b57cec5SDimitry Andric DeadInsts.emplace_back(PostIncV); 35160b57cec5SDimitry Andric } 35170b57cec5SDimitry Andric } 35180b57cec5SDimitry Andric } 35190b57cec5SDimitry Andric 35200b57cec5SDimitry Andric void LSRInstance::CollectFixupsAndInitialFormulae() { 35210b57cec5SDimitry Andric BranchInst *ExitBranch = nullptr; 35225ffd83dbSDimitry Andric bool SaveCmp = TTI.canSaveCmp(L, &ExitBranch, &SE, &LI, &DT, &AC, &TLI); 35230b57cec5SDimitry Andric 3524bdd1243dSDimitry Andric // For calculating baseline cost 3525bdd1243dSDimitry Andric SmallPtrSet<const SCEV *, 16> Regs; 3526bdd1243dSDimitry Andric DenseSet<const SCEV *> VisitedRegs; 3527bdd1243dSDimitry Andric DenseSet<size_t> VisitedLSRUse; 3528bdd1243dSDimitry Andric 35290b57cec5SDimitry Andric for (const IVStrideUse &U : IU) { 35300b57cec5SDimitry Andric Instruction *UserInst = U.getUser(); 35310b57cec5SDimitry Andric // Skip IV users that are part of profitable IV Chains. 35320b57cec5SDimitry Andric User::op_iterator UseI = 35330b57cec5SDimitry Andric find(UserInst->operands(), U.getOperandValToReplace()); 35340b57cec5SDimitry Andric assert(UseI != UserInst->op_end() && "cannot find IV operand"); 35350b57cec5SDimitry Andric if (IVIncSet.count(UseI)) { 35360b57cec5SDimitry Andric LLVM_DEBUG(dbgs() << "Use is in profitable chain: " << **UseI << '\n'); 35370b57cec5SDimitry Andric continue; 35380b57cec5SDimitry Andric } 35390b57cec5SDimitry Andric 35400b57cec5SDimitry Andric LSRUse::KindType Kind = LSRUse::Basic; 35410b57cec5SDimitry Andric MemAccessTy AccessTy; 35420b57cec5SDimitry Andric if (isAddressUse(TTI, UserInst, U.getOperandValToReplace())) { 35430b57cec5SDimitry Andric Kind = LSRUse::Address; 35440b57cec5SDimitry Andric AccessTy = getAccessType(TTI, UserInst, U.getOperandValToReplace()); 35450b57cec5SDimitry Andric } 35460b57cec5SDimitry Andric 35470b57cec5SDimitry Andric const SCEV *S = IU.getExpr(U); 354806c3fb27SDimitry Andric if (!S) 354906c3fb27SDimitry Andric continue; 35500b57cec5SDimitry Andric PostIncLoopSet TmpPostIncLoops = U.getPostIncLoops(); 35510b57cec5SDimitry Andric 35520b57cec5SDimitry Andric // Equality (== and !=) ICmps are special. We can rewrite (i == N) as 35530b57cec5SDimitry Andric // (N - i == 0), and this allows (N - i) to be the expression that we work 35540b57cec5SDimitry Andric // with rather than just N or i, so we can consider the register 35550b57cec5SDimitry Andric // requirements for both N and i at the same time. Limiting this code to 35560b57cec5SDimitry Andric // equality icmps is not a problem because all interesting loops use 35570b57cec5SDimitry Andric // equality icmps, thanks to IndVarSimplify. 35588bcb0991SDimitry Andric if (ICmpInst *CI = dyn_cast<ICmpInst>(UserInst)) { 35590b57cec5SDimitry Andric // If CI can be saved in some target, like replaced inside hardware loop 35600b57cec5SDimitry Andric // in PowerPC, no need to generate initial formulae for it. 35610b57cec5SDimitry Andric if (SaveCmp && CI == dyn_cast<ICmpInst>(ExitBranch->getCondition())) 35620b57cec5SDimitry Andric continue; 35638bcb0991SDimitry Andric if (CI->isEquality()) { 35640b57cec5SDimitry Andric // Swap the operands if needed to put the OperandValToReplace on the 35650b57cec5SDimitry Andric // left, for consistency. 35660b57cec5SDimitry Andric Value *NV = CI->getOperand(1); 35670b57cec5SDimitry Andric if (NV == U.getOperandValToReplace()) { 35680b57cec5SDimitry Andric CI->setOperand(1, CI->getOperand(0)); 35690b57cec5SDimitry Andric CI->setOperand(0, NV); 35700b57cec5SDimitry Andric NV = CI->getOperand(1); 35710b57cec5SDimitry Andric Changed = true; 35720b57cec5SDimitry Andric } 35730b57cec5SDimitry Andric 35740b57cec5SDimitry Andric // x == y --> x - y == 0 35750b57cec5SDimitry Andric const SCEV *N = SE.getSCEV(NV); 3576fcaf7f86SDimitry Andric if (SE.isLoopInvariant(N, L) && Rewriter.isSafeToExpand(N) && 3577fe6060f1SDimitry Andric (!NV->getType()->isPointerTy() || 3578fe6060f1SDimitry Andric SE.getPointerBase(N) == SE.getPointerBase(S))) { 35790b57cec5SDimitry Andric // S is normalized, so normalize N before folding it into S 35800b57cec5SDimitry Andric // to keep the result normalized. 35810b57cec5SDimitry Andric N = normalizeForPostIncUse(N, TmpPostIncLoops, SE); 358206c3fb27SDimitry Andric if (!N) 358306c3fb27SDimitry Andric continue; 35840b57cec5SDimitry Andric Kind = LSRUse::ICmpZero; 35850b57cec5SDimitry Andric S = SE.getMinusSCEV(N, S); 3586fcaf7f86SDimitry Andric } else if (L->isLoopInvariant(NV) && 3587fcaf7f86SDimitry Andric (!isa<Instruction>(NV) || 3588fcaf7f86SDimitry Andric DT.dominates(cast<Instruction>(NV), L->getHeader())) && 3589fcaf7f86SDimitry Andric !NV->getType()->isPointerTy()) { 3590fcaf7f86SDimitry Andric // If we can't generally expand the expression (e.g. it contains 3591fcaf7f86SDimitry Andric // a divide), but it is already at a loop invariant point before the 3592fcaf7f86SDimitry Andric // loop, wrap it in an unknown (to prevent the expander from trying 3593fcaf7f86SDimitry Andric // to re-expand in a potentially unsafe way.) The restriction to 3594fcaf7f86SDimitry Andric // integer types is required because the unknown hides the base, and 3595fcaf7f86SDimitry Andric // SCEV can't compute the difference of two unknown pointers. 3596fcaf7f86SDimitry Andric N = SE.getUnknown(NV); 3597fcaf7f86SDimitry Andric N = normalizeForPostIncUse(N, TmpPostIncLoops, SE); 359806c3fb27SDimitry Andric if (!N) 359906c3fb27SDimitry Andric continue; 3600fcaf7f86SDimitry Andric Kind = LSRUse::ICmpZero; 3601fcaf7f86SDimitry Andric S = SE.getMinusSCEV(N, S); 3602fcaf7f86SDimitry Andric assert(!isa<SCEVCouldNotCompute>(S)); 36030b57cec5SDimitry Andric } 36040b57cec5SDimitry Andric 36050b57cec5SDimitry Andric // -1 and the negations of all interesting strides (except the negation 36060b57cec5SDimitry Andric // of -1) are now also interesting. 36070b57cec5SDimitry Andric for (size_t i = 0, e = Factors.size(); i != e; ++i) 36080b57cec5SDimitry Andric if (Factors[i] != -1) 36090b57cec5SDimitry Andric Factors.insert(-(uint64_t)Factors[i]); 36100b57cec5SDimitry Andric Factors.insert(-1); 36110b57cec5SDimitry Andric } 36128bcb0991SDimitry Andric } 36130b57cec5SDimitry Andric 36140b57cec5SDimitry Andric // Get or create an LSRUse. 36150fca6ea1SDimitry Andric std::pair<size_t, Immediate> P = getUse(S, Kind, AccessTy); 36160b57cec5SDimitry Andric size_t LUIdx = P.first; 36170fca6ea1SDimitry Andric Immediate Offset = P.second; 36180b57cec5SDimitry Andric LSRUse &LU = Uses[LUIdx]; 36190b57cec5SDimitry Andric 36200b57cec5SDimitry Andric // Record the fixup. 36210b57cec5SDimitry Andric LSRFixup &LF = LU.getNewFixup(); 36220b57cec5SDimitry Andric LF.UserInst = UserInst; 36230b57cec5SDimitry Andric LF.OperandValToReplace = U.getOperandValToReplace(); 36240b57cec5SDimitry Andric LF.PostIncLoops = TmpPostIncLoops; 36250b57cec5SDimitry Andric LF.Offset = Offset; 36260b57cec5SDimitry Andric LU.AllFixupsOutsideLoop &= LF.isUseFullyOutsideLoop(L); 36270b57cec5SDimitry Andric 3628bdd1243dSDimitry Andric // Create SCEV as Formula for calculating baseline cost 3629bdd1243dSDimitry Andric if (!VisitedLSRUse.count(LUIdx) && !LF.isUseFullyOutsideLoop(L)) { 3630bdd1243dSDimitry Andric Formula F; 3631bdd1243dSDimitry Andric F.initialMatch(S, L, SE); 3632bdd1243dSDimitry Andric BaselineCost.RateFormula(F, Regs, VisitedRegs, LU); 3633bdd1243dSDimitry Andric VisitedLSRUse.insert(LUIdx); 3634bdd1243dSDimitry Andric } 3635bdd1243dSDimitry Andric 36360b57cec5SDimitry Andric if (!LU.WidestFixupType || 36370b57cec5SDimitry Andric SE.getTypeSizeInBits(LU.WidestFixupType) < 36380b57cec5SDimitry Andric SE.getTypeSizeInBits(LF.OperandValToReplace->getType())) 36390b57cec5SDimitry Andric LU.WidestFixupType = LF.OperandValToReplace->getType(); 36400b57cec5SDimitry Andric 36410b57cec5SDimitry Andric // If this is the first use of this LSRUse, give it a formula. 36420b57cec5SDimitry Andric if (LU.Formulae.empty()) { 36430b57cec5SDimitry Andric InsertInitialFormula(S, LU, LUIdx); 36440b57cec5SDimitry Andric CountRegisters(LU.Formulae.back(), LUIdx); 36450b57cec5SDimitry Andric } 36460b57cec5SDimitry Andric } 36470b57cec5SDimitry Andric 36480b57cec5SDimitry Andric LLVM_DEBUG(print_fixups(dbgs())); 36490b57cec5SDimitry Andric } 36500b57cec5SDimitry Andric 36510b57cec5SDimitry Andric /// Insert a formula for the given expression into the given use, separating out 36520b57cec5SDimitry Andric /// loop-variant portions from loop-invariant and loop-computable portions. 3653fcaf7f86SDimitry Andric void LSRInstance::InsertInitialFormula(const SCEV *S, LSRUse &LU, 3654fcaf7f86SDimitry Andric size_t LUIdx) { 36550b57cec5SDimitry Andric // Mark uses whose expressions cannot be expanded. 3656fcaf7f86SDimitry Andric if (!Rewriter.isSafeToExpand(S)) 36570b57cec5SDimitry Andric LU.RigidFormula = true; 36580b57cec5SDimitry Andric 36590b57cec5SDimitry Andric Formula F; 36600b57cec5SDimitry Andric F.initialMatch(S, L, SE); 36610b57cec5SDimitry Andric bool Inserted = InsertFormula(LU, LUIdx, F); 36620b57cec5SDimitry Andric assert(Inserted && "Initial formula already exists!"); (void)Inserted; 36630b57cec5SDimitry Andric } 36640b57cec5SDimitry Andric 36650b57cec5SDimitry Andric /// Insert a simple single-register formula for the given expression into the 36660b57cec5SDimitry Andric /// given use. 36670b57cec5SDimitry Andric void 36680b57cec5SDimitry Andric LSRInstance::InsertSupplementalFormula(const SCEV *S, 36690b57cec5SDimitry Andric LSRUse &LU, size_t LUIdx) { 36700b57cec5SDimitry Andric Formula F; 36710b57cec5SDimitry Andric F.BaseRegs.push_back(S); 36720b57cec5SDimitry Andric F.HasBaseReg = true; 36730b57cec5SDimitry Andric bool Inserted = InsertFormula(LU, LUIdx, F); 36740b57cec5SDimitry Andric assert(Inserted && "Supplemental formula already exists!"); (void)Inserted; 36750b57cec5SDimitry Andric } 36760b57cec5SDimitry Andric 36770b57cec5SDimitry Andric /// Note which registers are used by the given formula, updating RegUses. 36780b57cec5SDimitry Andric void LSRInstance::CountRegisters(const Formula &F, size_t LUIdx) { 36790b57cec5SDimitry Andric if (F.ScaledReg) 36800b57cec5SDimitry Andric RegUses.countRegister(F.ScaledReg, LUIdx); 36810b57cec5SDimitry Andric for (const SCEV *BaseReg : F.BaseRegs) 36820b57cec5SDimitry Andric RegUses.countRegister(BaseReg, LUIdx); 36830b57cec5SDimitry Andric } 36840b57cec5SDimitry Andric 36850b57cec5SDimitry Andric /// If the given formula has not yet been inserted, add it to the list, and 36860b57cec5SDimitry Andric /// return true. Return false otherwise. 36870b57cec5SDimitry Andric bool LSRInstance::InsertFormula(LSRUse &LU, unsigned LUIdx, const Formula &F) { 36880b57cec5SDimitry Andric // Do not insert formula that we will not be able to expand. 36890b57cec5SDimitry Andric assert(isLegalUse(TTI, LU.MinOffset, LU.MaxOffset, LU.Kind, LU.AccessTy, F) && 36900b57cec5SDimitry Andric "Formula is illegal"); 36910b57cec5SDimitry Andric 36920b57cec5SDimitry Andric if (!LU.InsertFormula(F, *L)) 36930b57cec5SDimitry Andric return false; 36940b57cec5SDimitry Andric 36950b57cec5SDimitry Andric CountRegisters(F, LUIdx); 36960b57cec5SDimitry Andric return true; 36970b57cec5SDimitry Andric } 36980b57cec5SDimitry Andric 36990b57cec5SDimitry Andric /// Check for other uses of loop-invariant values which we're tracking. These 37000b57cec5SDimitry Andric /// other uses will pin these values in registers, making them less profitable 37010b57cec5SDimitry Andric /// for elimination. 37020b57cec5SDimitry Andric /// TODO: This currently misses non-constant addrec step registers. 37030b57cec5SDimitry Andric /// TODO: Should this give more weight to users inside the loop? 37040b57cec5SDimitry Andric void 37050b57cec5SDimitry Andric LSRInstance::CollectLoopInvariantFixupsAndFormulae() { 37060b57cec5SDimitry Andric SmallVector<const SCEV *, 8> Worklist(RegUses.begin(), RegUses.end()); 37070b57cec5SDimitry Andric SmallPtrSet<const SCEV *, 32> Visited; 37080b57cec5SDimitry Andric 37095f757f3fSDimitry Andric // Don't collect outside uses if we are favoring postinc - the instructions in 37105f757f3fSDimitry Andric // the loop are more important than the ones outside of it. 37115f757f3fSDimitry Andric if (AMK == TTI::AMK_PostIndexed) 37125f757f3fSDimitry Andric return; 37135f757f3fSDimitry Andric 37140b57cec5SDimitry Andric while (!Worklist.empty()) { 37150b57cec5SDimitry Andric const SCEV *S = Worklist.pop_back_val(); 37160b57cec5SDimitry Andric 37170b57cec5SDimitry Andric // Don't process the same SCEV twice 37180b57cec5SDimitry Andric if (!Visited.insert(S).second) 37190b57cec5SDimitry Andric continue; 37200b57cec5SDimitry Andric 37210b57cec5SDimitry Andric if (const SCEVNAryExpr *N = dyn_cast<SCEVNAryExpr>(S)) 3722bdd1243dSDimitry Andric append_range(Worklist, N->operands()); 3723e8d8bef9SDimitry Andric else if (const SCEVIntegralCastExpr *C = dyn_cast<SCEVIntegralCastExpr>(S)) 37240b57cec5SDimitry Andric Worklist.push_back(C->getOperand()); 37250b57cec5SDimitry Andric else if (const SCEVUDivExpr *D = dyn_cast<SCEVUDivExpr>(S)) { 37260b57cec5SDimitry Andric Worklist.push_back(D->getLHS()); 37270b57cec5SDimitry Andric Worklist.push_back(D->getRHS()); 37280b57cec5SDimitry Andric } else if (const SCEVUnknown *US = dyn_cast<SCEVUnknown>(S)) { 37290b57cec5SDimitry Andric const Value *V = US->getValue(); 37300b57cec5SDimitry Andric if (const Instruction *Inst = dyn_cast<Instruction>(V)) { 37310b57cec5SDimitry Andric // Look for instructions defined outside the loop. 37320b57cec5SDimitry Andric if (L->contains(Inst)) continue; 373306c3fb27SDimitry Andric } else if (isa<Constant>(V)) 373406c3fb27SDimitry Andric // Constants can be re-materialized. 37350b57cec5SDimitry Andric continue; 37360b57cec5SDimitry Andric for (const Use &U : V->uses()) { 37370b57cec5SDimitry Andric const Instruction *UserInst = dyn_cast<Instruction>(U.getUser()); 37380b57cec5SDimitry Andric // Ignore non-instructions. 37390b57cec5SDimitry Andric if (!UserInst) 37400b57cec5SDimitry Andric continue; 3741fe6060f1SDimitry Andric // Don't bother if the instruction is an EHPad. 3742fe6060f1SDimitry Andric if (UserInst->isEHPad()) 3743fe6060f1SDimitry Andric continue; 37440b57cec5SDimitry Andric // Ignore instructions in other functions (as can happen with 37450b57cec5SDimitry Andric // Constants). 37460b57cec5SDimitry Andric if (UserInst->getParent()->getParent() != L->getHeader()->getParent()) 37470b57cec5SDimitry Andric continue; 37480b57cec5SDimitry Andric // Ignore instructions not dominated by the loop. 37490b57cec5SDimitry Andric const BasicBlock *UseBB = !isa<PHINode>(UserInst) ? 37500b57cec5SDimitry Andric UserInst->getParent() : 37510b57cec5SDimitry Andric cast<PHINode>(UserInst)->getIncomingBlock( 37520b57cec5SDimitry Andric PHINode::getIncomingValueNumForOperand(U.getOperandNo())); 37530b57cec5SDimitry Andric if (!DT.dominates(L->getHeader(), UseBB)) 37540b57cec5SDimitry Andric continue; 37550b57cec5SDimitry Andric // Don't bother if the instruction is in a BB which ends in an EHPad. 37560b57cec5SDimitry Andric if (UseBB->getTerminator()->isEHPad()) 37570b57cec5SDimitry Andric continue; 375804eeddc0SDimitry Andric 375904eeddc0SDimitry Andric // Ignore cases in which the currently-examined value could come from 376004eeddc0SDimitry Andric // a basic block terminated with an EHPad. This checks all incoming 376104eeddc0SDimitry Andric // blocks of the phi node since it is possible that the same incoming 376204eeddc0SDimitry Andric // value comes from multiple basic blocks, only some of which may end 376304eeddc0SDimitry Andric // in an EHPad. If any of them do, a subsequent rewrite attempt by this 376404eeddc0SDimitry Andric // pass would try to insert instructions into an EHPad, hitting an 376504eeddc0SDimitry Andric // assertion. 376604eeddc0SDimitry Andric if (isa<PHINode>(UserInst)) { 376704eeddc0SDimitry Andric const auto *PhiNode = cast<PHINode>(UserInst); 376804eeddc0SDimitry Andric bool HasIncompatibleEHPTerminatedBlock = false; 376904eeddc0SDimitry Andric llvm::Value *ExpectedValue = U; 377004eeddc0SDimitry Andric for (unsigned int I = 0; I < PhiNode->getNumIncomingValues(); I++) { 377104eeddc0SDimitry Andric if (PhiNode->getIncomingValue(I) == ExpectedValue) { 377204eeddc0SDimitry Andric if (PhiNode->getIncomingBlock(I)->getTerminator()->isEHPad()) { 377304eeddc0SDimitry Andric HasIncompatibleEHPTerminatedBlock = true; 377404eeddc0SDimitry Andric break; 377504eeddc0SDimitry Andric } 377604eeddc0SDimitry Andric } 377704eeddc0SDimitry Andric } 377804eeddc0SDimitry Andric if (HasIncompatibleEHPTerminatedBlock) { 377904eeddc0SDimitry Andric continue; 378004eeddc0SDimitry Andric } 378104eeddc0SDimitry Andric } 378204eeddc0SDimitry Andric 37830b57cec5SDimitry Andric // Don't bother rewriting PHIs in catchswitch blocks. 37840b57cec5SDimitry Andric if (isa<CatchSwitchInst>(UserInst->getParent()->getTerminator())) 37850b57cec5SDimitry Andric continue; 37860b57cec5SDimitry Andric // Ignore uses which are part of other SCEV expressions, to avoid 37870b57cec5SDimitry Andric // analyzing them multiple times. 37880b57cec5SDimitry Andric if (SE.isSCEVable(UserInst->getType())) { 37890b57cec5SDimitry Andric const SCEV *UserS = SE.getSCEV(const_cast<Instruction *>(UserInst)); 37900b57cec5SDimitry Andric // If the user is a no-op, look through to its uses. 37910b57cec5SDimitry Andric if (!isa<SCEVUnknown>(UserS)) 37920b57cec5SDimitry Andric continue; 37930b57cec5SDimitry Andric if (UserS == US) { 37940b57cec5SDimitry Andric Worklist.push_back( 37950b57cec5SDimitry Andric SE.getUnknown(const_cast<Instruction *>(UserInst))); 37960b57cec5SDimitry Andric continue; 37970b57cec5SDimitry Andric } 37980b57cec5SDimitry Andric } 37990b57cec5SDimitry Andric // Ignore icmp instructions which are already being analyzed. 38000b57cec5SDimitry Andric if (const ICmpInst *ICI = dyn_cast<ICmpInst>(UserInst)) { 38010b57cec5SDimitry Andric unsigned OtherIdx = !U.getOperandNo(); 38020b57cec5SDimitry Andric Value *OtherOp = const_cast<Value *>(ICI->getOperand(OtherIdx)); 38030b57cec5SDimitry Andric if (SE.hasComputableLoopEvolution(SE.getSCEV(OtherOp), L)) 38040b57cec5SDimitry Andric continue; 38050b57cec5SDimitry Andric } 38060b57cec5SDimitry Andric 38070fca6ea1SDimitry Andric std::pair<size_t, Immediate> P = 38080fca6ea1SDimitry Andric getUse(S, LSRUse::Basic, MemAccessTy()); 38090b57cec5SDimitry Andric size_t LUIdx = P.first; 38100fca6ea1SDimitry Andric Immediate Offset = P.second; 38110b57cec5SDimitry Andric LSRUse &LU = Uses[LUIdx]; 38120b57cec5SDimitry Andric LSRFixup &LF = LU.getNewFixup(); 38130b57cec5SDimitry Andric LF.UserInst = const_cast<Instruction *>(UserInst); 38140b57cec5SDimitry Andric LF.OperandValToReplace = U; 38150b57cec5SDimitry Andric LF.Offset = Offset; 38160b57cec5SDimitry Andric LU.AllFixupsOutsideLoop &= LF.isUseFullyOutsideLoop(L); 38170b57cec5SDimitry Andric if (!LU.WidestFixupType || 38180b57cec5SDimitry Andric SE.getTypeSizeInBits(LU.WidestFixupType) < 38190b57cec5SDimitry Andric SE.getTypeSizeInBits(LF.OperandValToReplace->getType())) 38200b57cec5SDimitry Andric LU.WidestFixupType = LF.OperandValToReplace->getType(); 38210b57cec5SDimitry Andric InsertSupplementalFormula(US, LU, LUIdx); 38220b57cec5SDimitry Andric CountRegisters(LU.Formulae.back(), Uses.size() - 1); 38230b57cec5SDimitry Andric break; 38240b57cec5SDimitry Andric } 38250b57cec5SDimitry Andric } 38260b57cec5SDimitry Andric } 38270b57cec5SDimitry Andric } 38280b57cec5SDimitry Andric 38290b57cec5SDimitry Andric /// Split S into subexpressions which can be pulled out into separate 38300b57cec5SDimitry Andric /// registers. If C is non-null, multiply each subexpression by C. 38310b57cec5SDimitry Andric /// 38320b57cec5SDimitry Andric /// Return remainder expression after factoring the subexpressions captured by 38330b57cec5SDimitry Andric /// Ops. If Ops is complete, return NULL. 38340b57cec5SDimitry Andric static const SCEV *CollectSubexprs(const SCEV *S, const SCEVConstant *C, 38350b57cec5SDimitry Andric SmallVectorImpl<const SCEV *> &Ops, 38360b57cec5SDimitry Andric const Loop *L, 38370b57cec5SDimitry Andric ScalarEvolution &SE, 38380b57cec5SDimitry Andric unsigned Depth = 0) { 38390b57cec5SDimitry Andric // Arbitrarily cap recursion to protect compile time. 38400b57cec5SDimitry Andric if (Depth >= 3) 38410b57cec5SDimitry Andric return S; 38420b57cec5SDimitry Andric 38430b57cec5SDimitry Andric if (const SCEVAddExpr *Add = dyn_cast<SCEVAddExpr>(S)) { 38440b57cec5SDimitry Andric // Break out add operands. 38450b57cec5SDimitry Andric for (const SCEV *S : Add->operands()) { 38460b57cec5SDimitry Andric const SCEV *Remainder = CollectSubexprs(S, C, Ops, L, SE, Depth+1); 38470b57cec5SDimitry Andric if (Remainder) 38480b57cec5SDimitry Andric Ops.push_back(C ? SE.getMulExpr(C, Remainder) : Remainder); 38490b57cec5SDimitry Andric } 38500b57cec5SDimitry Andric return nullptr; 38510b57cec5SDimitry Andric } else if (const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(S)) { 38520b57cec5SDimitry Andric // Split a non-zero base out of an addrec. 38530b57cec5SDimitry Andric if (AR->getStart()->isZero() || !AR->isAffine()) 38540b57cec5SDimitry Andric return S; 38550b57cec5SDimitry Andric 38560b57cec5SDimitry Andric const SCEV *Remainder = CollectSubexprs(AR->getStart(), 38570b57cec5SDimitry Andric C, Ops, L, SE, Depth+1); 38580b57cec5SDimitry Andric // Split the non-zero AddRec unless it is part of a nested recurrence that 38590b57cec5SDimitry Andric // does not pertain to this loop. 38600b57cec5SDimitry Andric if (Remainder && (AR->getLoop() == L || !isa<SCEVAddRecExpr>(Remainder))) { 38610b57cec5SDimitry Andric Ops.push_back(C ? SE.getMulExpr(C, Remainder) : Remainder); 38620b57cec5SDimitry Andric Remainder = nullptr; 38630b57cec5SDimitry Andric } 38640b57cec5SDimitry Andric if (Remainder != AR->getStart()) { 38650b57cec5SDimitry Andric if (!Remainder) 38660b57cec5SDimitry Andric Remainder = SE.getConstant(AR->getType(), 0); 38670b57cec5SDimitry Andric return SE.getAddRecExpr(Remainder, 38680b57cec5SDimitry Andric AR->getStepRecurrence(SE), 38690b57cec5SDimitry Andric AR->getLoop(), 38700b57cec5SDimitry Andric //FIXME: AR->getNoWrapFlags(SCEV::FlagNW) 38710b57cec5SDimitry Andric SCEV::FlagAnyWrap); 38720b57cec5SDimitry Andric } 38730b57cec5SDimitry Andric } else if (const SCEVMulExpr *Mul = dyn_cast<SCEVMulExpr>(S)) { 38740b57cec5SDimitry Andric // Break (C * (a + b + c)) into C*a + C*b + C*c. 38750b57cec5SDimitry Andric if (Mul->getNumOperands() != 2) 38760b57cec5SDimitry Andric return S; 38770b57cec5SDimitry Andric if (const SCEVConstant *Op0 = 38780b57cec5SDimitry Andric dyn_cast<SCEVConstant>(Mul->getOperand(0))) { 38790b57cec5SDimitry Andric C = C ? cast<SCEVConstant>(SE.getMulExpr(C, Op0)) : Op0; 38800b57cec5SDimitry Andric const SCEV *Remainder = 38810b57cec5SDimitry Andric CollectSubexprs(Mul->getOperand(1), C, Ops, L, SE, Depth+1); 38820b57cec5SDimitry Andric if (Remainder) 38830b57cec5SDimitry Andric Ops.push_back(SE.getMulExpr(C, Remainder)); 38840b57cec5SDimitry Andric return nullptr; 38850b57cec5SDimitry Andric } 38860b57cec5SDimitry Andric } 38870b57cec5SDimitry Andric return S; 38880b57cec5SDimitry Andric } 38890b57cec5SDimitry Andric 38900b57cec5SDimitry Andric /// Return true if the SCEV represents a value that may end up as a 38910b57cec5SDimitry Andric /// post-increment operation. 38920b57cec5SDimitry Andric static bool mayUsePostIncMode(const TargetTransformInfo &TTI, 38930b57cec5SDimitry Andric LSRUse &LU, const SCEV *S, const Loop *L, 38940b57cec5SDimitry Andric ScalarEvolution &SE) { 38950b57cec5SDimitry Andric if (LU.Kind != LSRUse::Address || 38960b57cec5SDimitry Andric !LU.AccessTy.getType()->isIntOrIntVectorTy()) 38970b57cec5SDimitry Andric return false; 38980b57cec5SDimitry Andric const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(S); 38990b57cec5SDimitry Andric if (!AR) 39000b57cec5SDimitry Andric return false; 39010b57cec5SDimitry Andric const SCEV *LoopStep = AR->getStepRecurrence(SE); 39020b57cec5SDimitry Andric if (!isa<SCEVConstant>(LoopStep)) 39030b57cec5SDimitry Andric return false; 39040b57cec5SDimitry Andric // Check if a post-indexed load/store can be used. 39050b57cec5SDimitry Andric if (TTI.isIndexedLoadLegal(TTI.MIM_PostInc, AR->getType()) || 39060b57cec5SDimitry Andric TTI.isIndexedStoreLegal(TTI.MIM_PostInc, AR->getType())) { 39070b57cec5SDimitry Andric const SCEV *LoopStart = AR->getStart(); 39080b57cec5SDimitry Andric if (!isa<SCEVConstant>(LoopStart) && SE.isLoopInvariant(LoopStart, L)) 39090b57cec5SDimitry Andric return true; 39100b57cec5SDimitry Andric } 39110b57cec5SDimitry Andric return false; 39120b57cec5SDimitry Andric } 39130b57cec5SDimitry Andric 39140b57cec5SDimitry Andric /// Helper function for LSRInstance::GenerateReassociations. 39150b57cec5SDimitry Andric void LSRInstance::GenerateReassociationsImpl(LSRUse &LU, unsigned LUIdx, 39160b57cec5SDimitry Andric const Formula &Base, 39170b57cec5SDimitry Andric unsigned Depth, size_t Idx, 39180b57cec5SDimitry Andric bool IsScaledReg) { 39190b57cec5SDimitry Andric const SCEV *BaseReg = IsScaledReg ? Base.ScaledReg : Base.BaseRegs[Idx]; 39200b57cec5SDimitry Andric // Don't generate reassociations for the base register of a value that 39210b57cec5SDimitry Andric // may generate a post-increment operator. The reason is that the 39220b57cec5SDimitry Andric // reassociations cause extra base+register formula to be created, 39230b57cec5SDimitry Andric // and possibly chosen, but the post-increment is more efficient. 3924fe6060f1SDimitry Andric if (AMK == TTI::AMK_PostIndexed && mayUsePostIncMode(TTI, LU, BaseReg, L, SE)) 39250b57cec5SDimitry Andric return; 39260b57cec5SDimitry Andric SmallVector<const SCEV *, 8> AddOps; 39270b57cec5SDimitry Andric const SCEV *Remainder = CollectSubexprs(BaseReg, nullptr, AddOps, L, SE); 39280b57cec5SDimitry Andric if (Remainder) 39290b57cec5SDimitry Andric AddOps.push_back(Remainder); 39300b57cec5SDimitry Andric 39310b57cec5SDimitry Andric if (AddOps.size() == 1) 39320b57cec5SDimitry Andric return; 39330b57cec5SDimitry Andric 39340b57cec5SDimitry Andric for (SmallVectorImpl<const SCEV *>::const_iterator J = AddOps.begin(), 39350b57cec5SDimitry Andric JE = AddOps.end(); 39360b57cec5SDimitry Andric J != JE; ++J) { 39370b57cec5SDimitry Andric // Loop-variant "unknown" values are uninteresting; we won't be able to 39380b57cec5SDimitry Andric // do anything meaningful with them. 39390b57cec5SDimitry Andric if (isa<SCEVUnknown>(*J) && !SE.isLoopInvariant(*J, L)) 39400b57cec5SDimitry Andric continue; 39410b57cec5SDimitry Andric 39420b57cec5SDimitry Andric // Don't pull a constant into a register if the constant could be folded 39430b57cec5SDimitry Andric // into an immediate field. 39440b57cec5SDimitry Andric if (isAlwaysFoldable(TTI, SE, LU.MinOffset, LU.MaxOffset, LU.Kind, 39450b57cec5SDimitry Andric LU.AccessTy, *J, Base.getNumRegs() > 1)) 39460b57cec5SDimitry Andric continue; 39470b57cec5SDimitry Andric 39480b57cec5SDimitry Andric // Collect all operands except *J. 39490b57cec5SDimitry Andric SmallVector<const SCEV *, 8> InnerAddOps( 39500b57cec5SDimitry Andric ((const SmallVector<const SCEV *, 8> &)AddOps).begin(), J); 39510b57cec5SDimitry Andric InnerAddOps.append(std::next(J), 39520b57cec5SDimitry Andric ((const SmallVector<const SCEV *, 8> &)AddOps).end()); 39530b57cec5SDimitry Andric 39540b57cec5SDimitry Andric // Don't leave just a constant behind in a register if the constant could 39550b57cec5SDimitry Andric // be folded into an immediate field. 39560b57cec5SDimitry Andric if (InnerAddOps.size() == 1 && 39570b57cec5SDimitry Andric isAlwaysFoldable(TTI, SE, LU.MinOffset, LU.MaxOffset, LU.Kind, 39580b57cec5SDimitry Andric LU.AccessTy, InnerAddOps[0], Base.getNumRegs() > 1)) 39590b57cec5SDimitry Andric continue; 39600b57cec5SDimitry Andric 39610b57cec5SDimitry Andric const SCEV *InnerSum = SE.getAddExpr(InnerAddOps); 39620b57cec5SDimitry Andric if (InnerSum->isZero()) 39630b57cec5SDimitry Andric continue; 39640b57cec5SDimitry Andric Formula F = Base; 39650b57cec5SDimitry Andric 39660fca6ea1SDimitry Andric if (F.UnfoldedOffset.isNonZero() && F.UnfoldedOffset.isScalable()) 39670fca6ea1SDimitry Andric continue; 39680fca6ea1SDimitry Andric 39690b57cec5SDimitry Andric // Add the remaining pieces of the add back into the new formula. 39700b57cec5SDimitry Andric const SCEVConstant *InnerSumSC = dyn_cast<SCEVConstant>(InnerSum); 39710b57cec5SDimitry Andric if (InnerSumSC && SE.getTypeSizeInBits(InnerSumSC->getType()) <= 64 && 39720fca6ea1SDimitry Andric TTI.isLegalAddImmediate((uint64_t)F.UnfoldedOffset.getFixedValue() + 39730b57cec5SDimitry Andric InnerSumSC->getValue()->getZExtValue())) { 39740b57cec5SDimitry Andric F.UnfoldedOffset = 39750fca6ea1SDimitry Andric Immediate::getFixed((uint64_t)F.UnfoldedOffset.getFixedValue() + 39760fca6ea1SDimitry Andric InnerSumSC->getValue()->getZExtValue()); 39770b57cec5SDimitry Andric if (IsScaledReg) 39780b57cec5SDimitry Andric F.ScaledReg = nullptr; 39790b57cec5SDimitry Andric else 39800b57cec5SDimitry Andric F.BaseRegs.erase(F.BaseRegs.begin() + Idx); 39810b57cec5SDimitry Andric } else if (IsScaledReg) 39820b57cec5SDimitry Andric F.ScaledReg = InnerSum; 39830b57cec5SDimitry Andric else 39840b57cec5SDimitry Andric F.BaseRegs[Idx] = InnerSum; 39850b57cec5SDimitry Andric 39860b57cec5SDimitry Andric // Add J as its own register, or an unfolded immediate. 39870b57cec5SDimitry Andric const SCEVConstant *SC = dyn_cast<SCEVConstant>(*J); 39880b57cec5SDimitry Andric if (SC && SE.getTypeSizeInBits(SC->getType()) <= 64 && 39890fca6ea1SDimitry Andric TTI.isLegalAddImmediate((uint64_t)F.UnfoldedOffset.getFixedValue() + 39900b57cec5SDimitry Andric SC->getValue()->getZExtValue())) 39910b57cec5SDimitry Andric F.UnfoldedOffset = 39920fca6ea1SDimitry Andric Immediate::getFixed((uint64_t)F.UnfoldedOffset.getFixedValue() + 39930fca6ea1SDimitry Andric SC->getValue()->getZExtValue()); 39940b57cec5SDimitry Andric else 39950b57cec5SDimitry Andric F.BaseRegs.push_back(*J); 39960b57cec5SDimitry Andric // We may have changed the number of register in base regs, adjust the 39970b57cec5SDimitry Andric // formula accordingly. 39980b57cec5SDimitry Andric F.canonicalize(*L); 39990b57cec5SDimitry Andric 40000b57cec5SDimitry Andric if (InsertFormula(LU, LUIdx, F)) 40010b57cec5SDimitry Andric // If that formula hadn't been seen before, recurse to find more like 40020b57cec5SDimitry Andric // it. 40030b57cec5SDimitry Andric // Add check on Log16(AddOps.size()) - same as Log2_32(AddOps.size()) >> 2) 40040b57cec5SDimitry Andric // Because just Depth is not enough to bound compile time. 40050b57cec5SDimitry Andric // This means that every time AddOps.size() is greater 16^x we will add 40060b57cec5SDimitry Andric // x to Depth. 40070b57cec5SDimitry Andric GenerateReassociations(LU, LUIdx, LU.Formulae.back(), 40080b57cec5SDimitry Andric Depth + 1 + (Log2_32(AddOps.size()) >> 2)); 40090b57cec5SDimitry Andric } 40100b57cec5SDimitry Andric } 40110b57cec5SDimitry Andric 40120b57cec5SDimitry Andric /// Split out subexpressions from adds and the bases of addrecs. 40130b57cec5SDimitry Andric void LSRInstance::GenerateReassociations(LSRUse &LU, unsigned LUIdx, 40140b57cec5SDimitry Andric Formula Base, unsigned Depth) { 40150b57cec5SDimitry Andric assert(Base.isCanonical(*L) && "Input must be in the canonical form"); 40160b57cec5SDimitry Andric // Arbitrarily cap recursion to protect compile time. 40170b57cec5SDimitry Andric if (Depth >= 3) 40180b57cec5SDimitry Andric return; 40190b57cec5SDimitry Andric 40200b57cec5SDimitry Andric for (size_t i = 0, e = Base.BaseRegs.size(); i != e; ++i) 40210b57cec5SDimitry Andric GenerateReassociationsImpl(LU, LUIdx, Base, Depth, i); 40220b57cec5SDimitry Andric 40230b57cec5SDimitry Andric if (Base.Scale == 1) 40240b57cec5SDimitry Andric GenerateReassociationsImpl(LU, LUIdx, Base, Depth, 40250b57cec5SDimitry Andric /* Idx */ -1, /* IsScaledReg */ true); 40260b57cec5SDimitry Andric } 40270b57cec5SDimitry Andric 40280b57cec5SDimitry Andric /// Generate a formula consisting of all of the loop-dominating registers added 40290b57cec5SDimitry Andric /// into a single register. 40300b57cec5SDimitry Andric void LSRInstance::GenerateCombinations(LSRUse &LU, unsigned LUIdx, 40310b57cec5SDimitry Andric Formula Base) { 40320b57cec5SDimitry Andric // This method is only interesting on a plurality of registers. 40330b57cec5SDimitry Andric if (Base.BaseRegs.size() + (Base.Scale == 1) + 40340fca6ea1SDimitry Andric (Base.UnfoldedOffset.isNonZero()) <= 40350fca6ea1SDimitry Andric 1) 40360b57cec5SDimitry Andric return; 40370b57cec5SDimitry Andric 40380b57cec5SDimitry Andric // Flatten the representation, i.e., reg1 + 1*reg2 => reg1 + reg2, before 40390b57cec5SDimitry Andric // processing the formula. 40400b57cec5SDimitry Andric Base.unscale(); 40410b57cec5SDimitry Andric SmallVector<const SCEV *, 4> Ops; 40420b57cec5SDimitry Andric Formula NewBase = Base; 40430b57cec5SDimitry Andric NewBase.BaseRegs.clear(); 40440b57cec5SDimitry Andric Type *CombinedIntegerType = nullptr; 40450b57cec5SDimitry Andric for (const SCEV *BaseReg : Base.BaseRegs) { 40460b57cec5SDimitry Andric if (SE.properlyDominates(BaseReg, L->getHeader()) && 40470b57cec5SDimitry Andric !SE.hasComputableLoopEvolution(BaseReg, L)) { 40480b57cec5SDimitry Andric if (!CombinedIntegerType) 40490b57cec5SDimitry Andric CombinedIntegerType = SE.getEffectiveSCEVType(BaseReg->getType()); 40500b57cec5SDimitry Andric Ops.push_back(BaseReg); 40510b57cec5SDimitry Andric } 40520b57cec5SDimitry Andric else 40530b57cec5SDimitry Andric NewBase.BaseRegs.push_back(BaseReg); 40540b57cec5SDimitry Andric } 40550b57cec5SDimitry Andric 40560b57cec5SDimitry Andric // If no register is relevant, we're done. 40570b57cec5SDimitry Andric if (Ops.size() == 0) 40580b57cec5SDimitry Andric return; 40590b57cec5SDimitry Andric 40600b57cec5SDimitry Andric // Utility function for generating the required variants of the combined 40610b57cec5SDimitry Andric // registers. 40620b57cec5SDimitry Andric auto GenerateFormula = [&](const SCEV *Sum) { 40630b57cec5SDimitry Andric Formula F = NewBase; 40640b57cec5SDimitry Andric 40650b57cec5SDimitry Andric // TODO: If Sum is zero, it probably means ScalarEvolution missed an 40660b57cec5SDimitry Andric // opportunity to fold something. For now, just ignore such cases 40670b57cec5SDimitry Andric // rather than proceed with zero in a register. 40680b57cec5SDimitry Andric if (Sum->isZero()) 40690b57cec5SDimitry Andric return; 40700b57cec5SDimitry Andric 40710b57cec5SDimitry Andric F.BaseRegs.push_back(Sum); 40720b57cec5SDimitry Andric F.canonicalize(*L); 40730b57cec5SDimitry Andric (void)InsertFormula(LU, LUIdx, F); 40740b57cec5SDimitry Andric }; 40750b57cec5SDimitry Andric 40760b57cec5SDimitry Andric // If we collected at least two registers, generate a formula combining them. 40770b57cec5SDimitry Andric if (Ops.size() > 1) { 40780b57cec5SDimitry Andric SmallVector<const SCEV *, 4> OpsCopy(Ops); // Don't let SE modify Ops. 40790b57cec5SDimitry Andric GenerateFormula(SE.getAddExpr(OpsCopy)); 40800b57cec5SDimitry Andric } 40810b57cec5SDimitry Andric 40820b57cec5SDimitry Andric // If we have an unfolded offset, generate a formula combining it with the 40830b57cec5SDimitry Andric // registers collected. 40840fca6ea1SDimitry Andric if (NewBase.UnfoldedOffset.isNonZero() && NewBase.UnfoldedOffset.isFixed()) { 40850b57cec5SDimitry Andric assert(CombinedIntegerType && "Missing a type for the unfolded offset"); 40860fca6ea1SDimitry Andric Ops.push_back(SE.getConstant(CombinedIntegerType, 40870fca6ea1SDimitry Andric NewBase.UnfoldedOffset.getFixedValue(), true)); 40880fca6ea1SDimitry Andric NewBase.UnfoldedOffset = Immediate::getFixed(0); 40890b57cec5SDimitry Andric GenerateFormula(SE.getAddExpr(Ops)); 40900b57cec5SDimitry Andric } 40910b57cec5SDimitry Andric } 40920b57cec5SDimitry Andric 40930b57cec5SDimitry Andric /// Helper function for LSRInstance::GenerateSymbolicOffsets. 40940b57cec5SDimitry Andric void LSRInstance::GenerateSymbolicOffsetsImpl(LSRUse &LU, unsigned LUIdx, 40950b57cec5SDimitry Andric const Formula &Base, size_t Idx, 40960b57cec5SDimitry Andric bool IsScaledReg) { 40970b57cec5SDimitry Andric const SCEV *G = IsScaledReg ? Base.ScaledReg : Base.BaseRegs[Idx]; 40980b57cec5SDimitry Andric GlobalValue *GV = ExtractSymbol(G, SE); 40990b57cec5SDimitry Andric if (G->isZero() || !GV) 41000b57cec5SDimitry Andric return; 41010b57cec5SDimitry Andric Formula F = Base; 41020b57cec5SDimitry Andric F.BaseGV = GV; 41030b57cec5SDimitry Andric if (!isLegalUse(TTI, LU.MinOffset, LU.MaxOffset, LU.Kind, LU.AccessTy, F)) 41040b57cec5SDimitry Andric return; 41050b57cec5SDimitry Andric if (IsScaledReg) 41060b57cec5SDimitry Andric F.ScaledReg = G; 41070b57cec5SDimitry Andric else 41080b57cec5SDimitry Andric F.BaseRegs[Idx] = G; 41090b57cec5SDimitry Andric (void)InsertFormula(LU, LUIdx, F); 41100b57cec5SDimitry Andric } 41110b57cec5SDimitry Andric 41120b57cec5SDimitry Andric /// Generate reuse formulae using symbolic offsets. 41130b57cec5SDimitry Andric void LSRInstance::GenerateSymbolicOffsets(LSRUse &LU, unsigned LUIdx, 41140b57cec5SDimitry Andric Formula Base) { 41150b57cec5SDimitry Andric // We can't add a symbolic offset if the address already contains one. 41160b57cec5SDimitry Andric if (Base.BaseGV) return; 41170b57cec5SDimitry Andric 41180b57cec5SDimitry Andric for (size_t i = 0, e = Base.BaseRegs.size(); i != e; ++i) 41190b57cec5SDimitry Andric GenerateSymbolicOffsetsImpl(LU, LUIdx, Base, i); 41200b57cec5SDimitry Andric if (Base.Scale == 1) 41210b57cec5SDimitry Andric GenerateSymbolicOffsetsImpl(LU, LUIdx, Base, /* Idx */ -1, 41220b57cec5SDimitry Andric /* IsScaledReg */ true); 41230b57cec5SDimitry Andric } 41240b57cec5SDimitry Andric 41250b57cec5SDimitry Andric /// Helper function for LSRInstance::GenerateConstantOffsets. 41260b57cec5SDimitry Andric void LSRInstance::GenerateConstantOffsetsImpl( 41270b57cec5SDimitry Andric LSRUse &LU, unsigned LUIdx, const Formula &Base, 41280fca6ea1SDimitry Andric const SmallVectorImpl<Immediate> &Worklist, size_t Idx, bool IsScaledReg) { 41290b57cec5SDimitry Andric 41300fca6ea1SDimitry Andric auto GenerateOffset = [&](const SCEV *G, Immediate Offset) { 41310b57cec5SDimitry Andric Formula F = Base; 41320fca6ea1SDimitry Andric if (!Base.BaseOffset.isCompatibleImmediate(Offset)) 41330fca6ea1SDimitry Andric return; 41340fca6ea1SDimitry Andric F.BaseOffset = Base.BaseOffset.subUnsigned(Offset); 41350b57cec5SDimitry Andric 4136fe6060f1SDimitry Andric if (isLegalUse(TTI, LU.MinOffset, LU.MaxOffset, LU.Kind, LU.AccessTy, F)) { 41370b57cec5SDimitry Andric // Add the offset to the base register. 41380fca6ea1SDimitry Andric const SCEV *NewOffset = Offset.getSCEV(SE, G->getType()); 41390fca6ea1SDimitry Andric const SCEV *NewG = SE.getAddExpr(NewOffset, G); 41400b57cec5SDimitry Andric // If it cancelled out, drop the base register, otherwise update it. 41410b57cec5SDimitry Andric if (NewG->isZero()) { 41420b57cec5SDimitry Andric if (IsScaledReg) { 41430b57cec5SDimitry Andric F.Scale = 0; 41440b57cec5SDimitry Andric F.ScaledReg = nullptr; 41450b57cec5SDimitry Andric } else 41460b57cec5SDimitry Andric F.deleteBaseReg(F.BaseRegs[Idx]); 41470b57cec5SDimitry Andric F.canonicalize(*L); 41480b57cec5SDimitry Andric } else if (IsScaledReg) 41490b57cec5SDimitry Andric F.ScaledReg = NewG; 41500b57cec5SDimitry Andric else 41510b57cec5SDimitry Andric F.BaseRegs[Idx] = NewG; 41520b57cec5SDimitry Andric 41530b57cec5SDimitry Andric (void)InsertFormula(LU, LUIdx, F); 41540b57cec5SDimitry Andric } 41550b57cec5SDimitry Andric }; 41560b57cec5SDimitry Andric 41570b57cec5SDimitry Andric const SCEV *G = IsScaledReg ? Base.ScaledReg : Base.BaseRegs[Idx]; 41580b57cec5SDimitry Andric 41590b57cec5SDimitry Andric // With constant offsets and constant steps, we can generate pre-inc 41600b57cec5SDimitry Andric // accesses by having the offset equal the step. So, for access #0 with a 41610b57cec5SDimitry Andric // step of 8, we generate a G - 8 base which would require the first access 41620b57cec5SDimitry Andric // to be ((G - 8) + 8),+,8. The pre-indexed access then updates the pointer 41630b57cec5SDimitry Andric // for itself and hopefully becomes the base for other accesses. This means 41640b57cec5SDimitry Andric // means that a single pre-indexed access can be generated to become the new 41650b57cec5SDimitry Andric // base pointer for each iteration of the loop, resulting in no extra add/sub 41660b57cec5SDimitry Andric // instructions for pointer updating. 4167fe6060f1SDimitry Andric if (AMK == TTI::AMK_PreIndexed && LU.Kind == LSRUse::Address) { 41680b57cec5SDimitry Andric if (auto *GAR = dyn_cast<SCEVAddRecExpr>(G)) { 41690b57cec5SDimitry Andric if (auto *StepRec = 41700b57cec5SDimitry Andric dyn_cast<SCEVConstant>(GAR->getStepRecurrence(SE))) { 41710b57cec5SDimitry Andric const APInt &StepInt = StepRec->getAPInt(); 41720b57cec5SDimitry Andric int64_t Step = StepInt.isNegative() ? 41730b57cec5SDimitry Andric StepInt.getSExtValue() : StepInt.getZExtValue(); 41740b57cec5SDimitry Andric 41750fca6ea1SDimitry Andric for (Immediate Offset : Worklist) { 41760fca6ea1SDimitry Andric if (Offset.isFixed()) { 41770fca6ea1SDimitry Andric Offset = Immediate::getFixed(Offset.getFixedValue() - Step); 41780b57cec5SDimitry Andric GenerateOffset(G, Offset); 41790b57cec5SDimitry Andric } 41800b57cec5SDimitry Andric } 41810b57cec5SDimitry Andric } 41820b57cec5SDimitry Andric } 41830fca6ea1SDimitry Andric } 41840fca6ea1SDimitry Andric for (Immediate Offset : Worklist) 41850b57cec5SDimitry Andric GenerateOffset(G, Offset); 41860b57cec5SDimitry Andric 41870fca6ea1SDimitry Andric Immediate Imm = ExtractImmediate(G, SE); 41880fca6ea1SDimitry Andric if (G->isZero() || Imm.isZero() || 41890fca6ea1SDimitry Andric !Base.BaseOffset.isCompatibleImmediate(Imm)) 41900b57cec5SDimitry Andric return; 41910b57cec5SDimitry Andric Formula F = Base; 41920fca6ea1SDimitry Andric F.BaseOffset = F.BaseOffset.addUnsigned(Imm); 41930b57cec5SDimitry Andric if (!isLegalUse(TTI, LU.MinOffset, LU.MaxOffset, LU.Kind, LU.AccessTy, F)) 41940b57cec5SDimitry Andric return; 4195e8d8bef9SDimitry Andric if (IsScaledReg) { 41960b57cec5SDimitry Andric F.ScaledReg = G; 4197e8d8bef9SDimitry Andric } else { 41980b57cec5SDimitry Andric F.BaseRegs[Idx] = G; 4199e8d8bef9SDimitry Andric // We may generate non canonical Formula if G is a recurrent expr reg 4200e8d8bef9SDimitry Andric // related with current loop while F.ScaledReg is not. 4201e8d8bef9SDimitry Andric F.canonicalize(*L); 4202e8d8bef9SDimitry Andric } 42030b57cec5SDimitry Andric (void)InsertFormula(LU, LUIdx, F); 42040b57cec5SDimitry Andric } 42050b57cec5SDimitry Andric 42060b57cec5SDimitry Andric /// GenerateConstantOffsets - Generate reuse formulae using symbolic offsets. 42070b57cec5SDimitry Andric void LSRInstance::GenerateConstantOffsets(LSRUse &LU, unsigned LUIdx, 42080b57cec5SDimitry Andric Formula Base) { 42090b57cec5SDimitry Andric // TODO: For now, just add the min and max offset, because it usually isn't 42100b57cec5SDimitry Andric // worthwhile looking at everything inbetween. 42110fca6ea1SDimitry Andric SmallVector<Immediate, 2> Worklist; 42120b57cec5SDimitry Andric Worklist.push_back(LU.MinOffset); 42130b57cec5SDimitry Andric if (LU.MaxOffset != LU.MinOffset) 42140b57cec5SDimitry Andric Worklist.push_back(LU.MaxOffset); 42150b57cec5SDimitry Andric 42160b57cec5SDimitry Andric for (size_t i = 0, e = Base.BaseRegs.size(); i != e; ++i) 42170b57cec5SDimitry Andric GenerateConstantOffsetsImpl(LU, LUIdx, Base, Worklist, i); 42180b57cec5SDimitry Andric if (Base.Scale == 1) 42190b57cec5SDimitry Andric GenerateConstantOffsetsImpl(LU, LUIdx, Base, Worklist, /* Idx */ -1, 42200b57cec5SDimitry Andric /* IsScaledReg */ true); 42210b57cec5SDimitry Andric } 42220b57cec5SDimitry Andric 42230b57cec5SDimitry Andric /// For ICmpZero, check to see if we can scale up the comparison. For example, x 42240b57cec5SDimitry Andric /// == y -> x*c == y*c. 42250b57cec5SDimitry Andric void LSRInstance::GenerateICmpZeroScales(LSRUse &LU, unsigned LUIdx, 42260b57cec5SDimitry Andric Formula Base) { 42270b57cec5SDimitry Andric if (LU.Kind != LSRUse::ICmpZero) return; 42280b57cec5SDimitry Andric 42290b57cec5SDimitry Andric // Determine the integer type for the base formula. 42300b57cec5SDimitry Andric Type *IntTy = Base.getType(); 42310b57cec5SDimitry Andric if (!IntTy) return; 42320b57cec5SDimitry Andric if (SE.getTypeSizeInBits(IntTy) > 64) return; 42330b57cec5SDimitry Andric 42340b57cec5SDimitry Andric // Don't do this if there is more than one offset. 42350b57cec5SDimitry Andric if (LU.MinOffset != LU.MaxOffset) return; 42360b57cec5SDimitry Andric 42370b57cec5SDimitry Andric // Check if transformation is valid. It is illegal to multiply pointer. 42380b57cec5SDimitry Andric if (Base.ScaledReg && Base.ScaledReg->getType()->isPointerTy()) 42390b57cec5SDimitry Andric return; 42400b57cec5SDimitry Andric for (const SCEV *BaseReg : Base.BaseRegs) 42410b57cec5SDimitry Andric if (BaseReg->getType()->isPointerTy()) 42420b57cec5SDimitry Andric return; 42430b57cec5SDimitry Andric assert(!Base.BaseGV && "ICmpZero use is not legal!"); 42440b57cec5SDimitry Andric 42450b57cec5SDimitry Andric // Check each interesting stride. 42460b57cec5SDimitry Andric for (int64_t Factor : Factors) { 4247349cc55cSDimitry Andric // Check that Factor can be represented by IntTy 4248349cc55cSDimitry Andric if (!ConstantInt::isValueValidForType(IntTy, Factor)) 4249349cc55cSDimitry Andric continue; 42500b57cec5SDimitry Andric // Check that the multiplication doesn't overflow. 42510fca6ea1SDimitry Andric if (Base.BaseOffset.isMin() && Factor == -1) 42520b57cec5SDimitry Andric continue; 42530fca6ea1SDimitry Andric // Not supporting scalable immediates. 42540fca6ea1SDimitry Andric if (Base.BaseOffset.isNonZero() && Base.BaseOffset.isScalable()) 42550fca6ea1SDimitry Andric continue; 42560fca6ea1SDimitry Andric Immediate NewBaseOffset = Base.BaseOffset.mulUnsigned(Factor); 4257fe6060f1SDimitry Andric assert(Factor != 0 && "Zero factor not expected!"); 42580fca6ea1SDimitry Andric if (NewBaseOffset.getFixedValue() / Factor != 42590fca6ea1SDimitry Andric Base.BaseOffset.getFixedValue()) 42600b57cec5SDimitry Andric continue; 42610b57cec5SDimitry Andric // If the offset will be truncated at this use, check that it is in bounds. 42620b57cec5SDimitry Andric if (!IntTy->isPointerTy() && 42630fca6ea1SDimitry Andric !ConstantInt::isValueValidForType(IntTy, NewBaseOffset.getFixedValue())) 42640b57cec5SDimitry Andric continue; 42650b57cec5SDimitry Andric 42660b57cec5SDimitry Andric // Check that multiplying with the use offset doesn't overflow. 42670fca6ea1SDimitry Andric Immediate Offset = LU.MinOffset; 42680fca6ea1SDimitry Andric if (Offset.isMin() && Factor == -1) 42690b57cec5SDimitry Andric continue; 42700fca6ea1SDimitry Andric Offset = Offset.mulUnsigned(Factor); 42710fca6ea1SDimitry Andric if (Offset.getFixedValue() / Factor != LU.MinOffset.getFixedValue()) 42720b57cec5SDimitry Andric continue; 42730b57cec5SDimitry Andric // If the offset will be truncated at this use, check that it is in bounds. 42740b57cec5SDimitry Andric if (!IntTy->isPointerTy() && 42750fca6ea1SDimitry Andric !ConstantInt::isValueValidForType(IntTy, Offset.getFixedValue())) 42760b57cec5SDimitry Andric continue; 42770b57cec5SDimitry Andric 42780b57cec5SDimitry Andric Formula F = Base; 42790b57cec5SDimitry Andric F.BaseOffset = NewBaseOffset; 42800b57cec5SDimitry Andric 42810b57cec5SDimitry Andric // Check that this scale is legal. 42820b57cec5SDimitry Andric if (!isLegalUse(TTI, Offset, Offset, LU.Kind, LU.AccessTy, F)) 42830b57cec5SDimitry Andric continue; 42840b57cec5SDimitry Andric 42850b57cec5SDimitry Andric // Compensate for the use having MinOffset built into it. 42860fca6ea1SDimitry Andric F.BaseOffset = F.BaseOffset.addUnsigned(Offset).subUnsigned(LU.MinOffset); 42870b57cec5SDimitry Andric 42880b57cec5SDimitry Andric const SCEV *FactorS = SE.getConstant(IntTy, Factor); 42890b57cec5SDimitry Andric 42900b57cec5SDimitry Andric // Check that multiplying with each base register doesn't overflow. 42910b57cec5SDimitry Andric for (size_t i = 0, e = F.BaseRegs.size(); i != e; ++i) { 42920b57cec5SDimitry Andric F.BaseRegs[i] = SE.getMulExpr(F.BaseRegs[i], FactorS); 42930b57cec5SDimitry Andric if (getExactSDiv(F.BaseRegs[i], FactorS, SE) != Base.BaseRegs[i]) 42940b57cec5SDimitry Andric goto next; 42950b57cec5SDimitry Andric } 42960b57cec5SDimitry Andric 42970b57cec5SDimitry Andric // Check that multiplying with the scaled register doesn't overflow. 42980b57cec5SDimitry Andric if (F.ScaledReg) { 42990b57cec5SDimitry Andric F.ScaledReg = SE.getMulExpr(F.ScaledReg, FactorS); 43000b57cec5SDimitry Andric if (getExactSDiv(F.ScaledReg, FactorS, SE) != Base.ScaledReg) 43010b57cec5SDimitry Andric continue; 43020b57cec5SDimitry Andric } 43030b57cec5SDimitry Andric 43040b57cec5SDimitry Andric // Check that multiplying with the unfolded offset doesn't overflow. 43050fca6ea1SDimitry Andric if (F.UnfoldedOffset.isNonZero()) { 43060fca6ea1SDimitry Andric if (F.UnfoldedOffset.isMin() && Factor == -1) 43070b57cec5SDimitry Andric continue; 43080fca6ea1SDimitry Andric F.UnfoldedOffset = F.UnfoldedOffset.mulUnsigned(Factor); 43090fca6ea1SDimitry Andric if (F.UnfoldedOffset.getFixedValue() / Factor != 43100fca6ea1SDimitry Andric Base.UnfoldedOffset.getFixedValue()) 43110b57cec5SDimitry Andric continue; 43120b57cec5SDimitry Andric // If the offset will be truncated, check that it is in bounds. 43130fca6ea1SDimitry Andric if (!IntTy->isPointerTy() && !ConstantInt::isValueValidForType( 43140fca6ea1SDimitry Andric IntTy, F.UnfoldedOffset.getFixedValue())) 43150b57cec5SDimitry Andric continue; 43160b57cec5SDimitry Andric } 43170b57cec5SDimitry Andric 43180b57cec5SDimitry Andric // If we make it here and it's legal, add it. 43190b57cec5SDimitry Andric (void)InsertFormula(LU, LUIdx, F); 43200b57cec5SDimitry Andric next:; 43210b57cec5SDimitry Andric } 43220b57cec5SDimitry Andric } 43230b57cec5SDimitry Andric 43240b57cec5SDimitry Andric /// Generate stride factor reuse formulae by making use of scaled-offset address 43250b57cec5SDimitry Andric /// modes, for example. 43260b57cec5SDimitry Andric void LSRInstance::GenerateScales(LSRUse &LU, unsigned LUIdx, Formula Base) { 43270b57cec5SDimitry Andric // Determine the integer type for the base formula. 43280b57cec5SDimitry Andric Type *IntTy = Base.getType(); 43290b57cec5SDimitry Andric if (!IntTy) return; 43300b57cec5SDimitry Andric 43310b57cec5SDimitry Andric // If this Formula already has a scaled register, we can't add another one. 43320b57cec5SDimitry Andric // Try to unscale the formula to generate a better scale. 43330b57cec5SDimitry Andric if (Base.Scale != 0 && !Base.unscale()) 43340b57cec5SDimitry Andric return; 43350b57cec5SDimitry Andric 43360b57cec5SDimitry Andric assert(Base.Scale == 0 && "unscale did not did its job!"); 43370b57cec5SDimitry Andric 43380b57cec5SDimitry Andric // Check each interesting stride. 43390b57cec5SDimitry Andric for (int64_t Factor : Factors) { 43400b57cec5SDimitry Andric Base.Scale = Factor; 43410b57cec5SDimitry Andric Base.HasBaseReg = Base.BaseRegs.size() > 1; 43420b57cec5SDimitry Andric // Check whether this scale is going to be legal. 43430b57cec5SDimitry Andric if (!isLegalUse(TTI, LU.MinOffset, LU.MaxOffset, LU.Kind, LU.AccessTy, 43440b57cec5SDimitry Andric Base)) { 43450b57cec5SDimitry Andric // As a special-case, handle special out-of-loop Basic users specially. 43460b57cec5SDimitry Andric // TODO: Reconsider this special case. 43470b57cec5SDimitry Andric if (LU.Kind == LSRUse::Basic && 43480b57cec5SDimitry Andric isLegalUse(TTI, LU.MinOffset, LU.MaxOffset, LSRUse::Special, 43490b57cec5SDimitry Andric LU.AccessTy, Base) && 43500b57cec5SDimitry Andric LU.AllFixupsOutsideLoop) 43510b57cec5SDimitry Andric LU.Kind = LSRUse::Special; 43520b57cec5SDimitry Andric else 43530b57cec5SDimitry Andric continue; 43540b57cec5SDimitry Andric } 43550b57cec5SDimitry Andric // For an ICmpZero, negating a solitary base register won't lead to 43560b57cec5SDimitry Andric // new solutions. 43570fca6ea1SDimitry Andric if (LU.Kind == LSRUse::ICmpZero && !Base.HasBaseReg && 43580fca6ea1SDimitry Andric Base.BaseOffset.isZero() && !Base.BaseGV) 43590b57cec5SDimitry Andric continue; 43600b57cec5SDimitry Andric // For each addrec base reg, if its loop is current loop, apply the scale. 43610b57cec5SDimitry Andric for (size_t i = 0, e = Base.BaseRegs.size(); i != e; ++i) { 43620b57cec5SDimitry Andric const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(Base.BaseRegs[i]); 43630b57cec5SDimitry Andric if (AR && (AR->getLoop() == L || LU.AllFixupsOutsideLoop)) { 43640b57cec5SDimitry Andric const SCEV *FactorS = SE.getConstant(IntTy, Factor); 43650b57cec5SDimitry Andric if (FactorS->isZero()) 43660b57cec5SDimitry Andric continue; 43670b57cec5SDimitry Andric // Divide out the factor, ignoring high bits, since we'll be 43680b57cec5SDimitry Andric // scaling the value back up in the end. 436981ad6265SDimitry Andric if (const SCEV *Quotient = getExactSDiv(AR, FactorS, SE, true)) 437081ad6265SDimitry Andric if (!Quotient->isZero()) { 43710b57cec5SDimitry Andric // TODO: This could be optimized to avoid all the copying. 43720b57cec5SDimitry Andric Formula F = Base; 43730b57cec5SDimitry Andric F.ScaledReg = Quotient; 43740b57cec5SDimitry Andric F.deleteBaseReg(F.BaseRegs[i]); 43750b57cec5SDimitry Andric // The canonical representation of 1*reg is reg, which is already in 43760b57cec5SDimitry Andric // Base. In that case, do not try to insert the formula, it will be 43770b57cec5SDimitry Andric // rejected anyway. 43780b57cec5SDimitry Andric if (F.Scale == 1 && (F.BaseRegs.empty() || 43790b57cec5SDimitry Andric (AR->getLoop() != L && LU.AllFixupsOutsideLoop))) 43800b57cec5SDimitry Andric continue; 43810b57cec5SDimitry Andric // If AllFixupsOutsideLoop is true and F.Scale is 1, we may generate 43820b57cec5SDimitry Andric // non canonical Formula with ScaledReg's loop not being L. 43830b57cec5SDimitry Andric if (F.Scale == 1 && LU.AllFixupsOutsideLoop) 43840b57cec5SDimitry Andric F.canonicalize(*L); 43850b57cec5SDimitry Andric (void)InsertFormula(LU, LUIdx, F); 43860b57cec5SDimitry Andric } 43870b57cec5SDimitry Andric } 43880b57cec5SDimitry Andric } 43890b57cec5SDimitry Andric } 43900b57cec5SDimitry Andric } 43910b57cec5SDimitry Andric 439206c3fb27SDimitry Andric /// Extend/Truncate \p Expr to \p ToTy considering post-inc uses in \p Loops. 439306c3fb27SDimitry Andric /// For all PostIncLoopSets in \p Loops, first de-normalize \p Expr, then 439406c3fb27SDimitry Andric /// perform the extension/truncate and normalize again, as the normalized form 439506c3fb27SDimitry Andric /// can result in folds that are not valid in the post-inc use contexts. The 439606c3fb27SDimitry Andric /// expressions for all PostIncLoopSets must match, otherwise return nullptr. 439706c3fb27SDimitry Andric static const SCEV * 439806c3fb27SDimitry Andric getAnyExtendConsideringPostIncUses(ArrayRef<PostIncLoopSet> Loops, 439906c3fb27SDimitry Andric const SCEV *Expr, Type *ToTy, 440006c3fb27SDimitry Andric ScalarEvolution &SE) { 440106c3fb27SDimitry Andric const SCEV *Result = nullptr; 440206c3fb27SDimitry Andric for (auto &L : Loops) { 440306c3fb27SDimitry Andric auto *DenormExpr = denormalizeForPostIncUse(Expr, L, SE); 440406c3fb27SDimitry Andric const SCEV *NewDenormExpr = SE.getAnyExtendExpr(DenormExpr, ToTy); 440506c3fb27SDimitry Andric const SCEV *New = normalizeForPostIncUse(NewDenormExpr, L, SE); 440606c3fb27SDimitry Andric if (!New || (Result && New != Result)) 440706c3fb27SDimitry Andric return nullptr; 440806c3fb27SDimitry Andric Result = New; 440906c3fb27SDimitry Andric } 441006c3fb27SDimitry Andric 441106c3fb27SDimitry Andric assert(Result && "failed to create expression"); 441206c3fb27SDimitry Andric return Result; 441306c3fb27SDimitry Andric } 441406c3fb27SDimitry Andric 44150b57cec5SDimitry Andric /// Generate reuse formulae from different IV types. 44160b57cec5SDimitry Andric void LSRInstance::GenerateTruncates(LSRUse &LU, unsigned LUIdx, Formula Base) { 44170b57cec5SDimitry Andric // Don't bother truncating symbolic values. 44180b57cec5SDimitry Andric if (Base.BaseGV) return; 44190b57cec5SDimitry Andric 44200b57cec5SDimitry Andric // Determine the integer type for the base formula. 44210b57cec5SDimitry Andric Type *DstTy = Base.getType(); 44220b57cec5SDimitry Andric if (!DstTy) return; 4423fe6060f1SDimitry Andric if (DstTy->isPointerTy()) 4424fe6060f1SDimitry Andric return; 44250b57cec5SDimitry Andric 4426349cc55cSDimitry Andric // It is invalid to extend a pointer type so exit early if ScaledReg or 4427349cc55cSDimitry Andric // any of the BaseRegs are pointers. 4428349cc55cSDimitry Andric if (Base.ScaledReg && Base.ScaledReg->getType()->isPointerTy()) 4429349cc55cSDimitry Andric return; 4430349cc55cSDimitry Andric if (any_of(Base.BaseRegs, 4431349cc55cSDimitry Andric [](const SCEV *S) { return S->getType()->isPointerTy(); })) 4432349cc55cSDimitry Andric return; 4433349cc55cSDimitry Andric 443406c3fb27SDimitry Andric SmallVector<PostIncLoopSet> Loops; 443506c3fb27SDimitry Andric for (auto &LF : LU.Fixups) 443606c3fb27SDimitry Andric Loops.push_back(LF.PostIncLoops); 443706c3fb27SDimitry Andric 44380b57cec5SDimitry Andric for (Type *SrcTy : Types) { 44390b57cec5SDimitry Andric if (SrcTy != DstTy && TTI.isTruncateFree(SrcTy, DstTy)) { 44400b57cec5SDimitry Andric Formula F = Base; 44410b57cec5SDimitry Andric 44420b57cec5SDimitry Andric // Sometimes SCEV is able to prove zero during ext transform. It may 44430b57cec5SDimitry Andric // happen if SCEV did not do all possible transforms while creating the 44440b57cec5SDimitry Andric // initial node (maybe due to depth limitations), but it can do them while 44450b57cec5SDimitry Andric // taking ext. 44460b57cec5SDimitry Andric if (F.ScaledReg) { 444706c3fb27SDimitry Andric const SCEV *NewScaledReg = 444806c3fb27SDimitry Andric getAnyExtendConsideringPostIncUses(Loops, F.ScaledReg, SrcTy, SE); 444906c3fb27SDimitry Andric if (!NewScaledReg || NewScaledReg->isZero()) 44500b57cec5SDimitry Andric continue; 44510b57cec5SDimitry Andric F.ScaledReg = NewScaledReg; 44520b57cec5SDimitry Andric } 44530b57cec5SDimitry Andric bool HasZeroBaseReg = false; 44540b57cec5SDimitry Andric for (const SCEV *&BaseReg : F.BaseRegs) { 445506c3fb27SDimitry Andric const SCEV *NewBaseReg = 445606c3fb27SDimitry Andric getAnyExtendConsideringPostIncUses(Loops, BaseReg, SrcTy, SE); 445706c3fb27SDimitry Andric if (!NewBaseReg || NewBaseReg->isZero()) { 44580b57cec5SDimitry Andric HasZeroBaseReg = true; 44590b57cec5SDimitry Andric break; 44600b57cec5SDimitry Andric } 44610b57cec5SDimitry Andric BaseReg = NewBaseReg; 44620b57cec5SDimitry Andric } 44630b57cec5SDimitry Andric if (HasZeroBaseReg) 44640b57cec5SDimitry Andric continue; 44650b57cec5SDimitry Andric 44660b57cec5SDimitry Andric // TODO: This assumes we've done basic processing on all uses and 44670b57cec5SDimitry Andric // have an idea what the register usage is. 44680b57cec5SDimitry Andric if (!F.hasRegsUsedByUsesOtherThan(LUIdx, RegUses)) 44690b57cec5SDimitry Andric continue; 44700b57cec5SDimitry Andric 44710b57cec5SDimitry Andric F.canonicalize(*L); 44720b57cec5SDimitry Andric (void)InsertFormula(LU, LUIdx, F); 44730b57cec5SDimitry Andric } 44740b57cec5SDimitry Andric } 44750b57cec5SDimitry Andric } 44760b57cec5SDimitry Andric 44770b57cec5SDimitry Andric namespace { 44780b57cec5SDimitry Andric 44790b57cec5SDimitry Andric /// Helper class for GenerateCrossUseConstantOffsets. It's used to defer 44800b57cec5SDimitry Andric /// modifications so that the search phase doesn't have to worry about the data 44810b57cec5SDimitry Andric /// structures moving underneath it. 44820b57cec5SDimitry Andric struct WorkItem { 44830b57cec5SDimitry Andric size_t LUIdx; 44840fca6ea1SDimitry Andric Immediate Imm; 44850b57cec5SDimitry Andric const SCEV *OrigReg; 44860b57cec5SDimitry Andric 44870fca6ea1SDimitry Andric WorkItem(size_t LI, Immediate I, const SCEV *R) 44880b57cec5SDimitry Andric : LUIdx(LI), Imm(I), OrigReg(R) {} 44890b57cec5SDimitry Andric 44900b57cec5SDimitry Andric void print(raw_ostream &OS) const; 44910b57cec5SDimitry Andric void dump() const; 44920b57cec5SDimitry Andric }; 44930b57cec5SDimitry Andric 44940b57cec5SDimitry Andric } // end anonymous namespace 44950b57cec5SDimitry Andric 44960b57cec5SDimitry Andric #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) 44970b57cec5SDimitry Andric void WorkItem::print(raw_ostream &OS) const { 44980b57cec5SDimitry Andric OS << "in formulae referencing " << *OrigReg << " in use " << LUIdx 44990b57cec5SDimitry Andric << " , add offset " << Imm; 45000b57cec5SDimitry Andric } 45010b57cec5SDimitry Andric 45020b57cec5SDimitry Andric LLVM_DUMP_METHOD void WorkItem::dump() const { 45030b57cec5SDimitry Andric print(errs()); errs() << '\n'; 45040b57cec5SDimitry Andric } 45050b57cec5SDimitry Andric #endif 45060b57cec5SDimitry Andric 45070b57cec5SDimitry Andric /// Look for registers which are a constant distance apart and try to form reuse 45080b57cec5SDimitry Andric /// opportunities between them. 45090b57cec5SDimitry Andric void LSRInstance::GenerateCrossUseConstantOffsets() { 45100b57cec5SDimitry Andric // Group the registers by their value without any added constant offset. 45110fca6ea1SDimitry Andric using ImmMapTy = std::map<Immediate, const SCEV *, KeyOrderTargetImmediate>; 45120b57cec5SDimitry Andric 45130b57cec5SDimitry Andric DenseMap<const SCEV *, ImmMapTy> Map; 45140b57cec5SDimitry Andric DenseMap<const SCEV *, SmallBitVector> UsedByIndicesMap; 45150b57cec5SDimitry Andric SmallVector<const SCEV *, 8> Sequence; 45160b57cec5SDimitry Andric for (const SCEV *Use : RegUses) { 45170b57cec5SDimitry Andric const SCEV *Reg = Use; // Make a copy for ExtractImmediate to modify. 45180fca6ea1SDimitry Andric Immediate Imm = ExtractImmediate(Reg, SE); 45190b57cec5SDimitry Andric auto Pair = Map.insert(std::make_pair(Reg, ImmMapTy())); 45200b57cec5SDimitry Andric if (Pair.second) 45210b57cec5SDimitry Andric Sequence.push_back(Reg); 45220b57cec5SDimitry Andric Pair.first->second.insert(std::make_pair(Imm, Use)); 45230b57cec5SDimitry Andric UsedByIndicesMap[Reg] |= RegUses.getUsedByIndices(Use); 45240b57cec5SDimitry Andric } 45250b57cec5SDimitry Andric 45260b57cec5SDimitry Andric // Now examine each set of registers with the same base value. Build up 45270b57cec5SDimitry Andric // a list of work to do and do the work in a separate step so that we're 45280b57cec5SDimitry Andric // not adding formulae and register counts while we're searching. 45290b57cec5SDimitry Andric SmallVector<WorkItem, 32> WorkItems; 45300fca6ea1SDimitry Andric SmallSet<std::pair<size_t, Immediate>, 32, KeyOrderSizeTAndImmediate> 45310fca6ea1SDimitry Andric UniqueItems; 45320b57cec5SDimitry Andric for (const SCEV *Reg : Sequence) { 45330b57cec5SDimitry Andric const ImmMapTy &Imms = Map.find(Reg)->second; 45340b57cec5SDimitry Andric 45350b57cec5SDimitry Andric // It's not worthwhile looking for reuse if there's only one offset. 45360b57cec5SDimitry Andric if (Imms.size() == 1) 45370b57cec5SDimitry Andric continue; 45380b57cec5SDimitry Andric 45390b57cec5SDimitry Andric LLVM_DEBUG(dbgs() << "Generating cross-use offsets for " << *Reg << ':'; 45400b57cec5SDimitry Andric for (const auto &Entry 45410b57cec5SDimitry Andric : Imms) dbgs() 45420b57cec5SDimitry Andric << ' ' << Entry.first; 45430b57cec5SDimitry Andric dbgs() << '\n'); 45440b57cec5SDimitry Andric 45450b57cec5SDimitry Andric // Examine each offset. 45460b57cec5SDimitry Andric for (ImmMapTy::const_iterator J = Imms.begin(), JE = Imms.end(); 45470b57cec5SDimitry Andric J != JE; ++J) { 45480b57cec5SDimitry Andric const SCEV *OrigReg = J->second; 45490b57cec5SDimitry Andric 45500fca6ea1SDimitry Andric Immediate JImm = J->first; 45510b57cec5SDimitry Andric const SmallBitVector &UsedByIndices = RegUses.getUsedByIndices(OrigReg); 45520b57cec5SDimitry Andric 45530b57cec5SDimitry Andric if (!isa<SCEVConstant>(OrigReg) && 45540b57cec5SDimitry Andric UsedByIndicesMap[Reg].count() == 1) { 45550b57cec5SDimitry Andric LLVM_DEBUG(dbgs() << "Skipping cross-use reuse for " << *OrigReg 45560b57cec5SDimitry Andric << '\n'); 45570b57cec5SDimitry Andric continue; 45580b57cec5SDimitry Andric } 45590b57cec5SDimitry Andric 45600b57cec5SDimitry Andric // Conservatively examine offsets between this orig reg a few selected 45610b57cec5SDimitry Andric // other orig regs. 45620fca6ea1SDimitry Andric Immediate First = Imms.begin()->first; 45630fca6ea1SDimitry Andric Immediate Last = std::prev(Imms.end())->first; 45640fca6ea1SDimitry Andric if (!First.isCompatibleImmediate(Last)) { 45650fca6ea1SDimitry Andric LLVM_DEBUG(dbgs() << "Skipping cross-use reuse for " << *OrigReg 45660fca6ea1SDimitry Andric << "\n"); 45670fca6ea1SDimitry Andric continue; 45680fca6ea1SDimitry Andric } 45690fca6ea1SDimitry Andric // Only scalable if both terms are scalable, or if one is scalable and 45700fca6ea1SDimitry Andric // the other is 0. 45710fca6ea1SDimitry Andric bool Scalable = First.isScalable() || Last.isScalable(); 45720fca6ea1SDimitry Andric int64_t FI = First.getKnownMinValue(); 45730fca6ea1SDimitry Andric int64_t LI = Last.getKnownMinValue(); 45740b57cec5SDimitry Andric // Compute (First + Last) / 2 without overflow using the fact that 45750b57cec5SDimitry Andric // First + Last = 2 * (First + Last) + (First ^ Last). 45760fca6ea1SDimitry Andric int64_t Avg = (FI & LI) + ((FI ^ LI) >> 1); 45770fca6ea1SDimitry Andric // If the result is negative and FI is odd and LI even (or vice versa), 45780b57cec5SDimitry Andric // we rounded towards -inf. Add 1 in that case, to round towards 0. 45790fca6ea1SDimitry Andric Avg = Avg + ((FI ^ LI) & ((uint64_t)Avg >> 63)); 45800b57cec5SDimitry Andric ImmMapTy::const_iterator OtherImms[] = { 45810b57cec5SDimitry Andric Imms.begin(), std::prev(Imms.end()), 45820fca6ea1SDimitry Andric Imms.lower_bound(Immediate::get(Avg, Scalable))}; 4583bdd1243dSDimitry Andric for (const auto &M : OtherImms) { 45840b57cec5SDimitry Andric if (M == J || M == JE) continue; 45850fca6ea1SDimitry Andric if (!JImm.isCompatibleImmediate(M->first)) 45860fca6ea1SDimitry Andric continue; 45870b57cec5SDimitry Andric 45880b57cec5SDimitry Andric // Compute the difference between the two. 45890fca6ea1SDimitry Andric Immediate Imm = JImm.subUnsigned(M->first); 45900b57cec5SDimitry Andric for (unsigned LUIdx : UsedByIndices.set_bits()) 45910b57cec5SDimitry Andric // Make a memo of this use, offset, and register tuple. 45920b57cec5SDimitry Andric if (UniqueItems.insert(std::make_pair(LUIdx, Imm)).second) 45930b57cec5SDimitry Andric WorkItems.push_back(WorkItem(LUIdx, Imm, OrigReg)); 45940b57cec5SDimitry Andric } 45950b57cec5SDimitry Andric } 45960b57cec5SDimitry Andric } 45970b57cec5SDimitry Andric 45980b57cec5SDimitry Andric Map.clear(); 45990b57cec5SDimitry Andric Sequence.clear(); 46000b57cec5SDimitry Andric UsedByIndicesMap.clear(); 46010b57cec5SDimitry Andric UniqueItems.clear(); 46020b57cec5SDimitry Andric 46030b57cec5SDimitry Andric // Now iterate through the worklist and add new formulae. 46040b57cec5SDimitry Andric for (const WorkItem &WI : WorkItems) { 46050b57cec5SDimitry Andric size_t LUIdx = WI.LUIdx; 46060b57cec5SDimitry Andric LSRUse &LU = Uses[LUIdx]; 46070fca6ea1SDimitry Andric Immediate Imm = WI.Imm; 46080b57cec5SDimitry Andric const SCEV *OrigReg = WI.OrigReg; 46090b57cec5SDimitry Andric 46100b57cec5SDimitry Andric Type *IntTy = SE.getEffectiveSCEVType(OrigReg->getType()); 46110fca6ea1SDimitry Andric const SCEV *NegImmS = Imm.getNegativeSCEV(SE, IntTy); 46120b57cec5SDimitry Andric unsigned BitWidth = SE.getTypeSizeInBits(IntTy); 46130b57cec5SDimitry Andric 46140b57cec5SDimitry Andric // TODO: Use a more targeted data structure. 46150b57cec5SDimitry Andric for (size_t L = 0, LE = LU.Formulae.size(); L != LE; ++L) { 46160b57cec5SDimitry Andric Formula F = LU.Formulae[L]; 46170b57cec5SDimitry Andric // FIXME: The code for the scaled and unscaled registers looks 46180b57cec5SDimitry Andric // very similar but slightly different. Investigate if they 46190b57cec5SDimitry Andric // could be merged. That way, we would not have to unscale the 46200b57cec5SDimitry Andric // Formula. 46210b57cec5SDimitry Andric F.unscale(); 46220b57cec5SDimitry Andric // Use the immediate in the scaled register. 46230b57cec5SDimitry Andric if (F.ScaledReg == OrigReg) { 46240fca6ea1SDimitry Andric if (!F.BaseOffset.isCompatibleImmediate(Imm)) 46250fca6ea1SDimitry Andric continue; 46260fca6ea1SDimitry Andric Immediate Offset = F.BaseOffset.addUnsigned(Imm.mulUnsigned(F.Scale)); 46270b57cec5SDimitry Andric // Don't create 50 + reg(-50). 46280fca6ea1SDimitry Andric const SCEV *S = Offset.getNegativeSCEV(SE, IntTy); 46290fca6ea1SDimitry Andric if (F.referencesReg(S)) 46300b57cec5SDimitry Andric continue; 46310b57cec5SDimitry Andric Formula NewF = F; 46320b57cec5SDimitry Andric NewF.BaseOffset = Offset; 46330b57cec5SDimitry Andric if (!isLegalUse(TTI, LU.MinOffset, LU.MaxOffset, LU.Kind, LU.AccessTy, 46340b57cec5SDimitry Andric NewF)) 46350b57cec5SDimitry Andric continue; 46360b57cec5SDimitry Andric NewF.ScaledReg = SE.getAddExpr(NegImmS, NewF.ScaledReg); 46370b57cec5SDimitry Andric 46380b57cec5SDimitry Andric // If the new scale is a constant in a register, and adding the constant 46390b57cec5SDimitry Andric // value to the immediate would produce a value closer to zero than the 46400b57cec5SDimitry Andric // immediate itself, then the formula isn't worthwhile. 46410fca6ea1SDimitry Andric if (const SCEVConstant *C = dyn_cast<SCEVConstant>(NewF.ScaledReg)) { 46420fca6ea1SDimitry Andric // FIXME: Do we need to do something for scalable immediates here? 46430fca6ea1SDimitry Andric // A scalable SCEV won't be constant, but we might still have 46440fca6ea1SDimitry Andric // something in the offset? Bail out for now to be safe. 46450fca6ea1SDimitry Andric if (NewF.BaseOffset.isNonZero() && NewF.BaseOffset.isScalable()) 46460b57cec5SDimitry Andric continue; 46470fca6ea1SDimitry Andric if (C->getValue()->isNegative() != 46480fca6ea1SDimitry Andric (NewF.BaseOffset.isLessThanZero()) && 46490fca6ea1SDimitry Andric (C->getAPInt().abs() * APInt(BitWidth, F.Scale)) 46500fca6ea1SDimitry Andric .ule(std::abs(NewF.BaseOffset.getFixedValue()))) 46510fca6ea1SDimitry Andric continue; 46520fca6ea1SDimitry Andric } 46530b57cec5SDimitry Andric 46540b57cec5SDimitry Andric // OK, looks good. 46550b57cec5SDimitry Andric NewF.canonicalize(*this->L); 46560b57cec5SDimitry Andric (void)InsertFormula(LU, LUIdx, NewF); 46570b57cec5SDimitry Andric } else { 46580b57cec5SDimitry Andric // Use the immediate in a base register. 46590b57cec5SDimitry Andric for (size_t N = 0, NE = F.BaseRegs.size(); N != NE; ++N) { 46600b57cec5SDimitry Andric const SCEV *BaseReg = F.BaseRegs[N]; 46610b57cec5SDimitry Andric if (BaseReg != OrigReg) 46620b57cec5SDimitry Andric continue; 46630b57cec5SDimitry Andric Formula NewF = F; 46640fca6ea1SDimitry Andric if (!NewF.BaseOffset.isCompatibleImmediate(Imm) || 46650fca6ea1SDimitry Andric !NewF.UnfoldedOffset.isCompatibleImmediate(Imm) || 46660fca6ea1SDimitry Andric !NewF.BaseOffset.isCompatibleImmediate(NewF.UnfoldedOffset)) 46670fca6ea1SDimitry Andric continue; 46680fca6ea1SDimitry Andric NewF.BaseOffset = NewF.BaseOffset.addUnsigned(Imm); 46690b57cec5SDimitry Andric if (!isLegalUse(TTI, LU.MinOffset, LU.MaxOffset, 46700b57cec5SDimitry Andric LU.Kind, LU.AccessTy, NewF)) { 4671fe6060f1SDimitry Andric if (AMK == TTI::AMK_PostIndexed && 46720b57cec5SDimitry Andric mayUsePostIncMode(TTI, LU, OrigReg, this->L, SE)) 46730b57cec5SDimitry Andric continue; 46740fca6ea1SDimitry Andric Immediate NewUnfoldedOffset = NewF.UnfoldedOffset.addUnsigned(Imm); 46750fca6ea1SDimitry Andric if (!isLegalAddImmediate(TTI, NewUnfoldedOffset)) 46760b57cec5SDimitry Andric continue; 46770b57cec5SDimitry Andric NewF = F; 46780fca6ea1SDimitry Andric NewF.UnfoldedOffset = NewUnfoldedOffset; 46790b57cec5SDimitry Andric } 46800b57cec5SDimitry Andric NewF.BaseRegs[N] = SE.getAddExpr(NegImmS, BaseReg); 46810b57cec5SDimitry Andric 46820b57cec5SDimitry Andric // If the new formula has a constant in a register, and adding the 46830b57cec5SDimitry Andric // constant value to the immediate would produce a value closer to 46840b57cec5SDimitry Andric // zero than the immediate itself, then the formula isn't worthwhile. 46850b57cec5SDimitry Andric for (const SCEV *NewReg : NewF.BaseRegs) 46860fca6ea1SDimitry Andric if (const SCEVConstant *C = dyn_cast<SCEVConstant>(NewReg)) { 46870fca6ea1SDimitry Andric if (NewF.BaseOffset.isNonZero() && NewF.BaseOffset.isScalable()) 46880b57cec5SDimitry Andric goto skip_formula; 46890fca6ea1SDimitry Andric if ((C->getAPInt() + NewF.BaseOffset.getFixedValue()) 46900fca6ea1SDimitry Andric .abs() 46910fca6ea1SDimitry Andric .slt(std::abs(NewF.BaseOffset.getFixedValue())) && 46920fca6ea1SDimitry Andric (C->getAPInt() + NewF.BaseOffset.getFixedValue()) 46930fca6ea1SDimitry Andric .countr_zero() >= 46940fca6ea1SDimitry Andric (unsigned)llvm::countr_zero<uint64_t>( 46950fca6ea1SDimitry Andric NewF.BaseOffset.getFixedValue())) 46960fca6ea1SDimitry Andric goto skip_formula; 46970fca6ea1SDimitry Andric } 46980b57cec5SDimitry Andric 46990b57cec5SDimitry Andric // Ok, looks good. 47000b57cec5SDimitry Andric NewF.canonicalize(*this->L); 47010b57cec5SDimitry Andric (void)InsertFormula(LU, LUIdx, NewF); 47020b57cec5SDimitry Andric break; 47030b57cec5SDimitry Andric skip_formula:; 47040b57cec5SDimitry Andric } 47050b57cec5SDimitry Andric } 47060b57cec5SDimitry Andric } 47070b57cec5SDimitry Andric } 47080b57cec5SDimitry Andric } 47090b57cec5SDimitry Andric 47100b57cec5SDimitry Andric /// Generate formulae for each use. 47110b57cec5SDimitry Andric void 47120b57cec5SDimitry Andric LSRInstance::GenerateAllReuseFormulae() { 47130b57cec5SDimitry Andric // This is split into multiple loops so that hasRegsUsedByUsesOtherThan 47140b57cec5SDimitry Andric // queries are more precise. 47150b57cec5SDimitry Andric for (size_t LUIdx = 0, NumUses = Uses.size(); LUIdx != NumUses; ++LUIdx) { 47160b57cec5SDimitry Andric LSRUse &LU = Uses[LUIdx]; 47170b57cec5SDimitry Andric for (size_t i = 0, f = LU.Formulae.size(); i != f; ++i) 47180b57cec5SDimitry Andric GenerateReassociations(LU, LUIdx, LU.Formulae[i]); 47190b57cec5SDimitry Andric for (size_t i = 0, f = LU.Formulae.size(); i != f; ++i) 47200b57cec5SDimitry Andric GenerateCombinations(LU, LUIdx, LU.Formulae[i]); 47210b57cec5SDimitry Andric } 47220b57cec5SDimitry Andric for (size_t LUIdx = 0, NumUses = Uses.size(); LUIdx != NumUses; ++LUIdx) { 47230b57cec5SDimitry Andric LSRUse &LU = Uses[LUIdx]; 47240b57cec5SDimitry Andric for (size_t i = 0, f = LU.Formulae.size(); i != f; ++i) 47250b57cec5SDimitry Andric GenerateSymbolicOffsets(LU, LUIdx, LU.Formulae[i]); 47260b57cec5SDimitry Andric for (size_t i = 0, f = LU.Formulae.size(); i != f; ++i) 47270b57cec5SDimitry Andric GenerateConstantOffsets(LU, LUIdx, LU.Formulae[i]); 47280b57cec5SDimitry Andric for (size_t i = 0, f = LU.Formulae.size(); i != f; ++i) 47290b57cec5SDimitry Andric GenerateICmpZeroScales(LU, LUIdx, LU.Formulae[i]); 47300b57cec5SDimitry Andric for (size_t i = 0, f = LU.Formulae.size(); i != f; ++i) 47310b57cec5SDimitry Andric GenerateScales(LU, LUIdx, LU.Formulae[i]); 47320b57cec5SDimitry Andric } 47330b57cec5SDimitry Andric for (size_t LUIdx = 0, NumUses = Uses.size(); LUIdx != NumUses; ++LUIdx) { 47340b57cec5SDimitry Andric LSRUse &LU = Uses[LUIdx]; 47350b57cec5SDimitry Andric for (size_t i = 0, f = LU.Formulae.size(); i != f; ++i) 47360b57cec5SDimitry Andric GenerateTruncates(LU, LUIdx, LU.Formulae[i]); 47370b57cec5SDimitry Andric } 47380b57cec5SDimitry Andric 47390b57cec5SDimitry Andric GenerateCrossUseConstantOffsets(); 47400b57cec5SDimitry Andric 47410b57cec5SDimitry Andric LLVM_DEBUG(dbgs() << "\n" 47420b57cec5SDimitry Andric "After generating reuse formulae:\n"; 47430b57cec5SDimitry Andric print_uses(dbgs())); 47440b57cec5SDimitry Andric } 47450b57cec5SDimitry Andric 47460b57cec5SDimitry Andric /// If there are multiple formulae with the same set of registers used 47470b57cec5SDimitry Andric /// by other uses, pick the best one and delete the others. 47480b57cec5SDimitry Andric void LSRInstance::FilterOutUndesirableDedicatedRegisters() { 47490b57cec5SDimitry Andric DenseSet<const SCEV *> VisitedRegs; 47500b57cec5SDimitry Andric SmallPtrSet<const SCEV *, 16> Regs; 47510b57cec5SDimitry Andric SmallPtrSet<const SCEV *, 16> LoserRegs; 47520b57cec5SDimitry Andric #ifndef NDEBUG 47530b57cec5SDimitry Andric bool ChangedFormulae = false; 47540b57cec5SDimitry Andric #endif 47550b57cec5SDimitry Andric 47560b57cec5SDimitry Andric // Collect the best formula for each unique set of shared registers. This 47570b57cec5SDimitry Andric // is reset for each use. 47580b57cec5SDimitry Andric using BestFormulaeTy = 47590b57cec5SDimitry Andric DenseMap<SmallVector<const SCEV *, 4>, size_t, UniquifierDenseMapInfo>; 47600b57cec5SDimitry Andric 47610b57cec5SDimitry Andric BestFormulaeTy BestFormulae; 47620b57cec5SDimitry Andric 47630b57cec5SDimitry Andric for (size_t LUIdx = 0, NumUses = Uses.size(); LUIdx != NumUses; ++LUIdx) { 47640b57cec5SDimitry Andric LSRUse &LU = Uses[LUIdx]; 47650b57cec5SDimitry Andric LLVM_DEBUG(dbgs() << "Filtering for use "; LU.print(dbgs()); 47660b57cec5SDimitry Andric dbgs() << '\n'); 47670b57cec5SDimitry Andric 47680b57cec5SDimitry Andric bool Any = false; 47690b57cec5SDimitry Andric for (size_t FIdx = 0, NumForms = LU.Formulae.size(); 47700b57cec5SDimitry Andric FIdx != NumForms; ++FIdx) { 47710b57cec5SDimitry Andric Formula &F = LU.Formulae[FIdx]; 47720b57cec5SDimitry Andric 47730b57cec5SDimitry Andric // Some formulas are instant losers. For example, they may depend on 47740b57cec5SDimitry Andric // nonexistent AddRecs from other loops. These need to be filtered 47750b57cec5SDimitry Andric // immediately, otherwise heuristics could choose them over others leading 47760b57cec5SDimitry Andric // to an unsatisfactory solution. Passing LoserRegs into RateFormula here 47770b57cec5SDimitry Andric // avoids the need to recompute this information across formulae using the 47780b57cec5SDimitry Andric // same bad AddRec. Passing LoserRegs is also essential unless we remove 47790b57cec5SDimitry Andric // the corresponding bad register from the Regs set. 4780fe6060f1SDimitry Andric Cost CostF(L, SE, TTI, AMK); 47810b57cec5SDimitry Andric Regs.clear(); 47820b57cec5SDimitry Andric CostF.RateFormula(F, Regs, VisitedRegs, LU, &LoserRegs); 47830b57cec5SDimitry Andric if (CostF.isLoser()) { 47840b57cec5SDimitry Andric // During initial formula generation, undesirable formulae are generated 47850b57cec5SDimitry Andric // by uses within other loops that have some non-trivial address mode or 47860b57cec5SDimitry Andric // use the postinc form of the IV. LSR needs to provide these formulae 47870b57cec5SDimitry Andric // as the basis of rediscovering the desired formula that uses an AddRec 47880b57cec5SDimitry Andric // corresponding to the existing phi. Once all formulae have been 47890b57cec5SDimitry Andric // generated, these initial losers may be pruned. 47900b57cec5SDimitry Andric LLVM_DEBUG(dbgs() << " Filtering loser "; F.print(dbgs()); 47910b57cec5SDimitry Andric dbgs() << "\n"); 47920b57cec5SDimitry Andric } 47930b57cec5SDimitry Andric else { 47940b57cec5SDimitry Andric SmallVector<const SCEV *, 4> Key; 47950b57cec5SDimitry Andric for (const SCEV *Reg : F.BaseRegs) { 47960b57cec5SDimitry Andric if (RegUses.isRegUsedByUsesOtherThan(Reg, LUIdx)) 47970b57cec5SDimitry Andric Key.push_back(Reg); 47980b57cec5SDimitry Andric } 47990b57cec5SDimitry Andric if (F.ScaledReg && 48000b57cec5SDimitry Andric RegUses.isRegUsedByUsesOtherThan(F.ScaledReg, LUIdx)) 48010b57cec5SDimitry Andric Key.push_back(F.ScaledReg); 48020b57cec5SDimitry Andric // Unstable sort by host order ok, because this is only used for 48030b57cec5SDimitry Andric // uniquifying. 48040b57cec5SDimitry Andric llvm::sort(Key); 48050b57cec5SDimitry Andric 48060b57cec5SDimitry Andric std::pair<BestFormulaeTy::const_iterator, bool> P = 48070b57cec5SDimitry Andric BestFormulae.insert(std::make_pair(Key, FIdx)); 48080b57cec5SDimitry Andric if (P.second) 48090b57cec5SDimitry Andric continue; 48100b57cec5SDimitry Andric 48110b57cec5SDimitry Andric Formula &Best = LU.Formulae[P.first->second]; 48120b57cec5SDimitry Andric 4813fe6060f1SDimitry Andric Cost CostBest(L, SE, TTI, AMK); 48140b57cec5SDimitry Andric Regs.clear(); 48150b57cec5SDimitry Andric CostBest.RateFormula(Best, Regs, VisitedRegs, LU); 48160b57cec5SDimitry Andric if (CostF.isLess(CostBest)) 48170b57cec5SDimitry Andric std::swap(F, Best); 48180b57cec5SDimitry Andric LLVM_DEBUG(dbgs() << " Filtering out formula "; F.print(dbgs()); 48190b57cec5SDimitry Andric dbgs() << "\n" 48200b57cec5SDimitry Andric " in favor of formula "; 48210b57cec5SDimitry Andric Best.print(dbgs()); dbgs() << '\n'); 48220b57cec5SDimitry Andric } 48230b57cec5SDimitry Andric #ifndef NDEBUG 48240b57cec5SDimitry Andric ChangedFormulae = true; 48250b57cec5SDimitry Andric #endif 48260b57cec5SDimitry Andric LU.DeleteFormula(F); 48270b57cec5SDimitry Andric --FIdx; 48280b57cec5SDimitry Andric --NumForms; 48290b57cec5SDimitry Andric Any = true; 48300b57cec5SDimitry Andric } 48310b57cec5SDimitry Andric 48320b57cec5SDimitry Andric // Now that we've filtered out some formulae, recompute the Regs set. 48330b57cec5SDimitry Andric if (Any) 48340b57cec5SDimitry Andric LU.RecomputeRegs(LUIdx, RegUses); 48350b57cec5SDimitry Andric 48360b57cec5SDimitry Andric // Reset this to prepare for the next use. 48370b57cec5SDimitry Andric BestFormulae.clear(); 48380b57cec5SDimitry Andric } 48390b57cec5SDimitry Andric 48400b57cec5SDimitry Andric LLVM_DEBUG(if (ChangedFormulae) { 48410b57cec5SDimitry Andric dbgs() << "\n" 48420b57cec5SDimitry Andric "After filtering out undesirable candidates:\n"; 48430b57cec5SDimitry Andric print_uses(dbgs()); 48440b57cec5SDimitry Andric }); 48450b57cec5SDimitry Andric } 48460b57cec5SDimitry Andric 48470b57cec5SDimitry Andric /// Estimate the worst-case number of solutions the solver might have to 48480b57cec5SDimitry Andric /// consider. It almost never considers this many solutions because it prune the 48490b57cec5SDimitry Andric /// search space, but the pruning isn't always sufficient. 48500b57cec5SDimitry Andric size_t LSRInstance::EstimateSearchSpaceComplexity() const { 48510b57cec5SDimitry Andric size_t Power = 1; 48520b57cec5SDimitry Andric for (const LSRUse &LU : Uses) { 48530b57cec5SDimitry Andric size_t FSize = LU.Formulae.size(); 48540b57cec5SDimitry Andric if (FSize >= ComplexityLimit) { 48550b57cec5SDimitry Andric Power = ComplexityLimit; 48560b57cec5SDimitry Andric break; 48570b57cec5SDimitry Andric } 48580b57cec5SDimitry Andric Power *= FSize; 48590b57cec5SDimitry Andric if (Power >= ComplexityLimit) 48600b57cec5SDimitry Andric break; 48610b57cec5SDimitry Andric } 48620b57cec5SDimitry Andric return Power; 48630b57cec5SDimitry Andric } 48640b57cec5SDimitry Andric 48650b57cec5SDimitry Andric /// When one formula uses a superset of the registers of another formula, it 48660b57cec5SDimitry Andric /// won't help reduce register pressure (though it may not necessarily hurt 48670b57cec5SDimitry Andric /// register pressure); remove it to simplify the system. 48680b57cec5SDimitry Andric void LSRInstance::NarrowSearchSpaceByDetectingSupersets() { 48690b57cec5SDimitry Andric if (EstimateSearchSpaceComplexity() >= ComplexityLimit) { 48700b57cec5SDimitry Andric LLVM_DEBUG(dbgs() << "The search space is too complex.\n"); 48710b57cec5SDimitry Andric 48720b57cec5SDimitry Andric LLVM_DEBUG(dbgs() << "Narrowing the search space by eliminating formulae " 48730b57cec5SDimitry Andric "which use a superset of registers used by other " 48740b57cec5SDimitry Andric "formulae.\n"); 48750b57cec5SDimitry Andric 48760b57cec5SDimitry Andric for (size_t LUIdx = 0, NumUses = Uses.size(); LUIdx != NumUses; ++LUIdx) { 48770b57cec5SDimitry Andric LSRUse &LU = Uses[LUIdx]; 48780b57cec5SDimitry Andric bool Any = false; 48790b57cec5SDimitry Andric for (size_t i = 0, e = LU.Formulae.size(); i != e; ++i) { 48800b57cec5SDimitry Andric Formula &F = LU.Formulae[i]; 48810fca6ea1SDimitry Andric if (F.BaseOffset.isNonZero() && F.BaseOffset.isScalable()) 48820fca6ea1SDimitry Andric continue; 48830b57cec5SDimitry Andric // Look for a formula with a constant or GV in a register. If the use 48840b57cec5SDimitry Andric // also has a formula with that same value in an immediate field, 48850b57cec5SDimitry Andric // delete the one that uses a register. 48860b57cec5SDimitry Andric for (SmallVectorImpl<const SCEV *>::const_iterator 48870b57cec5SDimitry Andric I = F.BaseRegs.begin(), E = F.BaseRegs.end(); I != E; ++I) { 48880b57cec5SDimitry Andric if (const SCEVConstant *C = dyn_cast<SCEVConstant>(*I)) { 48890b57cec5SDimitry Andric Formula NewF = F; 48900b57cec5SDimitry Andric //FIXME: Formulas should store bitwidth to do wrapping properly. 48910b57cec5SDimitry Andric // See PR41034. 48920fca6ea1SDimitry Andric NewF.BaseOffset = 48930fca6ea1SDimitry Andric Immediate::getFixed(NewF.BaseOffset.getFixedValue() + 48940fca6ea1SDimitry Andric (uint64_t)C->getValue()->getSExtValue()); 48950b57cec5SDimitry Andric NewF.BaseRegs.erase(NewF.BaseRegs.begin() + 48960b57cec5SDimitry Andric (I - F.BaseRegs.begin())); 48970b57cec5SDimitry Andric if (LU.HasFormulaWithSameRegs(NewF)) { 48980b57cec5SDimitry Andric LLVM_DEBUG(dbgs() << " Deleting "; F.print(dbgs()); 48990b57cec5SDimitry Andric dbgs() << '\n'); 49000b57cec5SDimitry Andric LU.DeleteFormula(F); 49010b57cec5SDimitry Andric --i; 49020b57cec5SDimitry Andric --e; 49030b57cec5SDimitry Andric Any = true; 49040b57cec5SDimitry Andric break; 49050b57cec5SDimitry Andric } 49060b57cec5SDimitry Andric } else if (const SCEVUnknown *U = dyn_cast<SCEVUnknown>(*I)) { 49070b57cec5SDimitry Andric if (GlobalValue *GV = dyn_cast<GlobalValue>(U->getValue())) 49080b57cec5SDimitry Andric if (!F.BaseGV) { 49090b57cec5SDimitry Andric Formula NewF = F; 49100b57cec5SDimitry Andric NewF.BaseGV = GV; 49110b57cec5SDimitry Andric NewF.BaseRegs.erase(NewF.BaseRegs.begin() + 49120b57cec5SDimitry Andric (I - F.BaseRegs.begin())); 49130b57cec5SDimitry Andric if (LU.HasFormulaWithSameRegs(NewF)) { 49140b57cec5SDimitry Andric LLVM_DEBUG(dbgs() << " Deleting "; F.print(dbgs()); 49150b57cec5SDimitry Andric dbgs() << '\n'); 49160b57cec5SDimitry Andric LU.DeleteFormula(F); 49170b57cec5SDimitry Andric --i; 49180b57cec5SDimitry Andric --e; 49190b57cec5SDimitry Andric Any = true; 49200b57cec5SDimitry Andric break; 49210b57cec5SDimitry Andric } 49220b57cec5SDimitry Andric } 49230b57cec5SDimitry Andric } 49240b57cec5SDimitry Andric } 49250b57cec5SDimitry Andric } 49260b57cec5SDimitry Andric if (Any) 49270b57cec5SDimitry Andric LU.RecomputeRegs(LUIdx, RegUses); 49280b57cec5SDimitry Andric } 49290b57cec5SDimitry Andric 49300b57cec5SDimitry Andric LLVM_DEBUG(dbgs() << "After pre-selection:\n"; print_uses(dbgs())); 49310b57cec5SDimitry Andric } 49320b57cec5SDimitry Andric } 49330b57cec5SDimitry Andric 49340b57cec5SDimitry Andric /// When there are many registers for expressions like A, A+1, A+2, etc., 49350b57cec5SDimitry Andric /// allocate a single register for them. 49360b57cec5SDimitry Andric void LSRInstance::NarrowSearchSpaceByCollapsingUnrolledCode() { 49370b57cec5SDimitry Andric if (EstimateSearchSpaceComplexity() < ComplexityLimit) 49380b57cec5SDimitry Andric return; 49390b57cec5SDimitry Andric 49400b57cec5SDimitry Andric LLVM_DEBUG( 49410b57cec5SDimitry Andric dbgs() << "The search space is too complex.\n" 49420b57cec5SDimitry Andric "Narrowing the search space by assuming that uses separated " 49430b57cec5SDimitry Andric "by a constant offset will use the same registers.\n"); 49440b57cec5SDimitry Andric 49450b57cec5SDimitry Andric // This is especially useful for unrolled loops. 49460b57cec5SDimitry Andric 49470b57cec5SDimitry Andric for (size_t LUIdx = 0, NumUses = Uses.size(); LUIdx != NumUses; ++LUIdx) { 49480b57cec5SDimitry Andric LSRUse &LU = Uses[LUIdx]; 49490b57cec5SDimitry Andric for (const Formula &F : LU.Formulae) { 49500fca6ea1SDimitry Andric if (F.BaseOffset.isZero() || (F.Scale != 0 && F.Scale != 1)) 49510b57cec5SDimitry Andric continue; 49520b57cec5SDimitry Andric 49530b57cec5SDimitry Andric LSRUse *LUThatHas = FindUseWithSimilarFormula(F, LU); 49540b57cec5SDimitry Andric if (!LUThatHas) 49550b57cec5SDimitry Andric continue; 49560b57cec5SDimitry Andric 49570b57cec5SDimitry Andric if (!reconcileNewOffset(*LUThatHas, F.BaseOffset, /*HasBaseReg=*/ false, 49580b57cec5SDimitry Andric LU.Kind, LU.AccessTy)) 49590b57cec5SDimitry Andric continue; 49600b57cec5SDimitry Andric 49610b57cec5SDimitry Andric LLVM_DEBUG(dbgs() << " Deleting use "; LU.print(dbgs()); dbgs() << '\n'); 49620b57cec5SDimitry Andric 49630b57cec5SDimitry Andric LUThatHas->AllFixupsOutsideLoop &= LU.AllFixupsOutsideLoop; 49640b57cec5SDimitry Andric 49650b57cec5SDimitry Andric // Transfer the fixups of LU to LUThatHas. 49660b57cec5SDimitry Andric for (LSRFixup &Fixup : LU.Fixups) { 49670b57cec5SDimitry Andric Fixup.Offset += F.BaseOffset; 49680b57cec5SDimitry Andric LUThatHas->pushFixup(Fixup); 49690b57cec5SDimitry Andric LLVM_DEBUG(dbgs() << "New fixup has offset " << Fixup.Offset << '\n'); 49700b57cec5SDimitry Andric } 49710b57cec5SDimitry Andric 49720b57cec5SDimitry Andric // Delete formulae from the new use which are no longer legal. 49730b57cec5SDimitry Andric bool Any = false; 49740b57cec5SDimitry Andric for (size_t i = 0, e = LUThatHas->Formulae.size(); i != e; ++i) { 49750b57cec5SDimitry Andric Formula &F = LUThatHas->Formulae[i]; 49760b57cec5SDimitry Andric if (!isLegalUse(TTI, LUThatHas->MinOffset, LUThatHas->MaxOffset, 49770b57cec5SDimitry Andric LUThatHas->Kind, LUThatHas->AccessTy, F)) { 49780b57cec5SDimitry Andric LLVM_DEBUG(dbgs() << " Deleting "; F.print(dbgs()); dbgs() << '\n'); 49790b57cec5SDimitry Andric LUThatHas->DeleteFormula(F); 49800b57cec5SDimitry Andric --i; 49810b57cec5SDimitry Andric --e; 49820b57cec5SDimitry Andric Any = true; 49830b57cec5SDimitry Andric } 49840b57cec5SDimitry Andric } 49850b57cec5SDimitry Andric 49860b57cec5SDimitry Andric if (Any) 49870b57cec5SDimitry Andric LUThatHas->RecomputeRegs(LUThatHas - &Uses.front(), RegUses); 49880b57cec5SDimitry Andric 49890b57cec5SDimitry Andric // Delete the old use. 49900b57cec5SDimitry Andric DeleteUse(LU, LUIdx); 49910b57cec5SDimitry Andric --LUIdx; 49920b57cec5SDimitry Andric --NumUses; 49930b57cec5SDimitry Andric break; 49940b57cec5SDimitry Andric } 49950b57cec5SDimitry Andric } 49960b57cec5SDimitry Andric 49970b57cec5SDimitry Andric LLVM_DEBUG(dbgs() << "After pre-selection:\n"; print_uses(dbgs())); 49980b57cec5SDimitry Andric } 49990b57cec5SDimitry Andric 50000b57cec5SDimitry Andric /// Call FilterOutUndesirableDedicatedRegisters again, if necessary, now that 50010b57cec5SDimitry Andric /// we've done more filtering, as it may be able to find more formulae to 50020b57cec5SDimitry Andric /// eliminate. 50030b57cec5SDimitry Andric void LSRInstance::NarrowSearchSpaceByRefilteringUndesirableDedicatedRegisters(){ 50040b57cec5SDimitry Andric if (EstimateSearchSpaceComplexity() >= ComplexityLimit) { 50050b57cec5SDimitry Andric LLVM_DEBUG(dbgs() << "The search space is too complex.\n"); 50060b57cec5SDimitry Andric 50070b57cec5SDimitry Andric LLVM_DEBUG(dbgs() << "Narrowing the search space by re-filtering out " 50080b57cec5SDimitry Andric "undesirable dedicated registers.\n"); 50090b57cec5SDimitry Andric 50100b57cec5SDimitry Andric FilterOutUndesirableDedicatedRegisters(); 50110b57cec5SDimitry Andric 50120b57cec5SDimitry Andric LLVM_DEBUG(dbgs() << "After pre-selection:\n"; print_uses(dbgs())); 50130b57cec5SDimitry Andric } 50140b57cec5SDimitry Andric } 50150b57cec5SDimitry Andric 50160b57cec5SDimitry Andric /// If a LSRUse has multiple formulae with the same ScaledReg and Scale. 50170b57cec5SDimitry Andric /// Pick the best one and delete the others. 50180b57cec5SDimitry Andric /// This narrowing heuristic is to keep as many formulae with different 50190b57cec5SDimitry Andric /// Scale and ScaledReg pair as possible while narrowing the search space. 50200b57cec5SDimitry Andric /// The benefit is that it is more likely to find out a better solution 50210b57cec5SDimitry Andric /// from a formulae set with more Scale and ScaledReg variations than 50220b57cec5SDimitry Andric /// a formulae set with the same Scale and ScaledReg. The picking winner 50230b57cec5SDimitry Andric /// reg heuristic will often keep the formulae with the same Scale and 50240b57cec5SDimitry Andric /// ScaledReg and filter others, and we want to avoid that if possible. 50250b57cec5SDimitry Andric void LSRInstance::NarrowSearchSpaceByFilterFormulaWithSameScaledReg() { 50260b57cec5SDimitry Andric if (EstimateSearchSpaceComplexity() < ComplexityLimit) 50270b57cec5SDimitry Andric return; 50280b57cec5SDimitry Andric 50290b57cec5SDimitry Andric LLVM_DEBUG( 50300b57cec5SDimitry Andric dbgs() << "The search space is too complex.\n" 50310b57cec5SDimitry Andric "Narrowing the search space by choosing the best Formula " 50320b57cec5SDimitry Andric "from the Formulae with the same Scale and ScaledReg.\n"); 50330b57cec5SDimitry Andric 50340b57cec5SDimitry Andric // Map the "Scale * ScaledReg" pair to the best formula of current LSRUse. 50350b57cec5SDimitry Andric using BestFormulaeTy = DenseMap<std::pair<const SCEV *, int64_t>, size_t>; 50360b57cec5SDimitry Andric 50370b57cec5SDimitry Andric BestFormulaeTy BestFormulae; 50380b57cec5SDimitry Andric #ifndef NDEBUG 50390b57cec5SDimitry Andric bool ChangedFormulae = false; 50400b57cec5SDimitry Andric #endif 50410b57cec5SDimitry Andric DenseSet<const SCEV *> VisitedRegs; 50420b57cec5SDimitry Andric SmallPtrSet<const SCEV *, 16> Regs; 50430b57cec5SDimitry Andric 50440b57cec5SDimitry Andric for (size_t LUIdx = 0, NumUses = Uses.size(); LUIdx != NumUses; ++LUIdx) { 50450b57cec5SDimitry Andric LSRUse &LU = Uses[LUIdx]; 50460b57cec5SDimitry Andric LLVM_DEBUG(dbgs() << "Filtering for use "; LU.print(dbgs()); 50470b57cec5SDimitry Andric dbgs() << '\n'); 50480b57cec5SDimitry Andric 50490b57cec5SDimitry Andric // Return true if Formula FA is better than Formula FB. 50500b57cec5SDimitry Andric auto IsBetterThan = [&](Formula &FA, Formula &FB) { 50510b57cec5SDimitry Andric // First we will try to choose the Formula with fewer new registers. 50520b57cec5SDimitry Andric // For a register used by current Formula, the more the register is 50530b57cec5SDimitry Andric // shared among LSRUses, the less we increase the register number 50540b57cec5SDimitry Andric // counter of the formula. 50550b57cec5SDimitry Andric size_t FARegNum = 0; 50560b57cec5SDimitry Andric for (const SCEV *Reg : FA.BaseRegs) { 50570b57cec5SDimitry Andric const SmallBitVector &UsedByIndices = RegUses.getUsedByIndices(Reg); 50580b57cec5SDimitry Andric FARegNum += (NumUses - UsedByIndices.count() + 1); 50590b57cec5SDimitry Andric } 50600b57cec5SDimitry Andric size_t FBRegNum = 0; 50610b57cec5SDimitry Andric for (const SCEV *Reg : FB.BaseRegs) { 50620b57cec5SDimitry Andric const SmallBitVector &UsedByIndices = RegUses.getUsedByIndices(Reg); 50630b57cec5SDimitry Andric FBRegNum += (NumUses - UsedByIndices.count() + 1); 50640b57cec5SDimitry Andric } 50650b57cec5SDimitry Andric if (FARegNum != FBRegNum) 50660b57cec5SDimitry Andric return FARegNum < FBRegNum; 50670b57cec5SDimitry Andric 50680b57cec5SDimitry Andric // If the new register numbers are the same, choose the Formula with 50690b57cec5SDimitry Andric // less Cost. 5070fe6060f1SDimitry Andric Cost CostFA(L, SE, TTI, AMK); 5071fe6060f1SDimitry Andric Cost CostFB(L, SE, TTI, AMK); 50720b57cec5SDimitry Andric Regs.clear(); 50730b57cec5SDimitry Andric CostFA.RateFormula(FA, Regs, VisitedRegs, LU); 50740b57cec5SDimitry Andric Regs.clear(); 50750b57cec5SDimitry Andric CostFB.RateFormula(FB, Regs, VisitedRegs, LU); 50760b57cec5SDimitry Andric return CostFA.isLess(CostFB); 50770b57cec5SDimitry Andric }; 50780b57cec5SDimitry Andric 50790b57cec5SDimitry Andric bool Any = false; 50800b57cec5SDimitry Andric for (size_t FIdx = 0, NumForms = LU.Formulae.size(); FIdx != NumForms; 50810b57cec5SDimitry Andric ++FIdx) { 50820b57cec5SDimitry Andric Formula &F = LU.Formulae[FIdx]; 50830b57cec5SDimitry Andric if (!F.ScaledReg) 50840b57cec5SDimitry Andric continue; 50850b57cec5SDimitry Andric auto P = BestFormulae.insert({{F.ScaledReg, F.Scale}, FIdx}); 50860b57cec5SDimitry Andric if (P.second) 50870b57cec5SDimitry Andric continue; 50880b57cec5SDimitry Andric 50890b57cec5SDimitry Andric Formula &Best = LU.Formulae[P.first->second]; 50900b57cec5SDimitry Andric if (IsBetterThan(F, Best)) 50910b57cec5SDimitry Andric std::swap(F, Best); 50920b57cec5SDimitry Andric LLVM_DEBUG(dbgs() << " Filtering out formula "; F.print(dbgs()); 50930b57cec5SDimitry Andric dbgs() << "\n" 50940b57cec5SDimitry Andric " in favor of formula "; 50950b57cec5SDimitry Andric Best.print(dbgs()); dbgs() << '\n'); 50960b57cec5SDimitry Andric #ifndef NDEBUG 50970b57cec5SDimitry Andric ChangedFormulae = true; 50980b57cec5SDimitry Andric #endif 50990b57cec5SDimitry Andric LU.DeleteFormula(F); 51000b57cec5SDimitry Andric --FIdx; 51010b57cec5SDimitry Andric --NumForms; 51020b57cec5SDimitry Andric Any = true; 51030b57cec5SDimitry Andric } 51040b57cec5SDimitry Andric if (Any) 51050b57cec5SDimitry Andric LU.RecomputeRegs(LUIdx, RegUses); 51060b57cec5SDimitry Andric 51070b57cec5SDimitry Andric // Reset this to prepare for the next use. 51080b57cec5SDimitry Andric BestFormulae.clear(); 51090b57cec5SDimitry Andric } 51100b57cec5SDimitry Andric 51110b57cec5SDimitry Andric LLVM_DEBUG(if (ChangedFormulae) { 51120b57cec5SDimitry Andric dbgs() << "\n" 51130b57cec5SDimitry Andric "After filtering out undesirable candidates:\n"; 51140b57cec5SDimitry Andric print_uses(dbgs()); 51150b57cec5SDimitry Andric }); 51160b57cec5SDimitry Andric } 51170b57cec5SDimitry Andric 51185ffd83dbSDimitry Andric /// If we are over the complexity limit, filter out any post-inc prefering 51195ffd83dbSDimitry Andric /// variables to only post-inc values. 51205ffd83dbSDimitry Andric void LSRInstance::NarrowSearchSpaceByFilterPostInc() { 5121fe6060f1SDimitry Andric if (AMK != TTI::AMK_PostIndexed) 51225ffd83dbSDimitry Andric return; 51235ffd83dbSDimitry Andric if (EstimateSearchSpaceComplexity() < ComplexityLimit) 51245ffd83dbSDimitry Andric return; 51255ffd83dbSDimitry Andric 51265ffd83dbSDimitry Andric LLVM_DEBUG(dbgs() << "The search space is too complex.\n" 51275ffd83dbSDimitry Andric "Narrowing the search space by choosing the lowest " 51285ffd83dbSDimitry Andric "register Formula for PostInc Uses.\n"); 51295ffd83dbSDimitry Andric 51305ffd83dbSDimitry Andric for (size_t LUIdx = 0, NumUses = Uses.size(); LUIdx != NumUses; ++LUIdx) { 51315ffd83dbSDimitry Andric LSRUse &LU = Uses[LUIdx]; 51325ffd83dbSDimitry Andric 51335ffd83dbSDimitry Andric if (LU.Kind != LSRUse::Address) 51345ffd83dbSDimitry Andric continue; 51355ffd83dbSDimitry Andric if (!TTI.isIndexedLoadLegal(TTI.MIM_PostInc, LU.AccessTy.getType()) && 51365ffd83dbSDimitry Andric !TTI.isIndexedStoreLegal(TTI.MIM_PostInc, LU.AccessTy.getType())) 51375ffd83dbSDimitry Andric continue; 51385ffd83dbSDimitry Andric 51395ffd83dbSDimitry Andric size_t MinRegs = std::numeric_limits<size_t>::max(); 51405ffd83dbSDimitry Andric for (const Formula &F : LU.Formulae) 51415ffd83dbSDimitry Andric MinRegs = std::min(F.getNumRegs(), MinRegs); 51425ffd83dbSDimitry Andric 51435ffd83dbSDimitry Andric bool Any = false; 51445ffd83dbSDimitry Andric for (size_t FIdx = 0, NumForms = LU.Formulae.size(); FIdx != NumForms; 51455ffd83dbSDimitry Andric ++FIdx) { 51465ffd83dbSDimitry Andric Formula &F = LU.Formulae[FIdx]; 51475ffd83dbSDimitry Andric if (F.getNumRegs() > MinRegs) { 51485ffd83dbSDimitry Andric LLVM_DEBUG(dbgs() << " Filtering out formula "; F.print(dbgs()); 51495ffd83dbSDimitry Andric dbgs() << "\n"); 51505ffd83dbSDimitry Andric LU.DeleteFormula(F); 51515ffd83dbSDimitry Andric --FIdx; 51525ffd83dbSDimitry Andric --NumForms; 51535ffd83dbSDimitry Andric Any = true; 51545ffd83dbSDimitry Andric } 51555ffd83dbSDimitry Andric } 51565ffd83dbSDimitry Andric if (Any) 51575ffd83dbSDimitry Andric LU.RecomputeRegs(LUIdx, RegUses); 51585ffd83dbSDimitry Andric 51595ffd83dbSDimitry Andric if (EstimateSearchSpaceComplexity() < ComplexityLimit) 51605ffd83dbSDimitry Andric break; 51615ffd83dbSDimitry Andric } 51625ffd83dbSDimitry Andric 51635ffd83dbSDimitry Andric LLVM_DEBUG(dbgs() << "After pre-selection:\n"; print_uses(dbgs())); 51645ffd83dbSDimitry Andric } 51655ffd83dbSDimitry Andric 51660b57cec5SDimitry Andric /// The function delete formulas with high registers number expectation. 51670b57cec5SDimitry Andric /// Assuming we don't know the value of each formula (already delete 51680b57cec5SDimitry Andric /// all inefficient), generate probability of not selecting for each 51690b57cec5SDimitry Andric /// register. 51700b57cec5SDimitry Andric /// For example, 51710b57cec5SDimitry Andric /// Use1: 51720b57cec5SDimitry Andric /// reg(a) + reg({0,+,1}) 51730b57cec5SDimitry Andric /// reg(a) + reg({-1,+,1}) + 1 51740b57cec5SDimitry Andric /// reg({a,+,1}) 51750b57cec5SDimitry Andric /// Use2: 51760b57cec5SDimitry Andric /// reg(b) + reg({0,+,1}) 51770b57cec5SDimitry Andric /// reg(b) + reg({-1,+,1}) + 1 51780b57cec5SDimitry Andric /// reg({b,+,1}) 51790b57cec5SDimitry Andric /// Use3: 51800b57cec5SDimitry Andric /// reg(c) + reg(b) + reg({0,+,1}) 51810b57cec5SDimitry Andric /// reg(c) + reg({b,+,1}) 51820b57cec5SDimitry Andric /// 51830b57cec5SDimitry Andric /// Probability of not selecting 51840b57cec5SDimitry Andric /// Use1 Use2 Use3 51850b57cec5SDimitry Andric /// reg(a) (1/3) * 1 * 1 51860b57cec5SDimitry Andric /// reg(b) 1 * (1/3) * (1/2) 51870b57cec5SDimitry Andric /// reg({0,+,1}) (2/3) * (2/3) * (1/2) 51880b57cec5SDimitry Andric /// reg({-1,+,1}) (2/3) * (2/3) * 1 51890b57cec5SDimitry Andric /// reg({a,+,1}) (2/3) * 1 * 1 51900b57cec5SDimitry Andric /// reg({b,+,1}) 1 * (2/3) * (2/3) 51910b57cec5SDimitry Andric /// reg(c) 1 * 1 * 0 51920b57cec5SDimitry Andric /// 51930b57cec5SDimitry Andric /// Now count registers number mathematical expectation for each formula: 51940b57cec5SDimitry Andric /// Note that for each use we exclude probability if not selecting for the use. 51950b57cec5SDimitry Andric /// For example for Use1 probability for reg(a) would be just 1 * 1 (excluding 51960b57cec5SDimitry Andric /// probabilty 1/3 of not selecting for Use1). 51970b57cec5SDimitry Andric /// Use1: 51980b57cec5SDimitry Andric /// reg(a) + reg({0,+,1}) 1 + 1/3 -- to be deleted 51990b57cec5SDimitry Andric /// reg(a) + reg({-1,+,1}) + 1 1 + 4/9 -- to be deleted 52000b57cec5SDimitry Andric /// reg({a,+,1}) 1 52010b57cec5SDimitry Andric /// Use2: 52020b57cec5SDimitry Andric /// reg(b) + reg({0,+,1}) 1/2 + 1/3 -- to be deleted 52030b57cec5SDimitry Andric /// reg(b) + reg({-1,+,1}) + 1 1/2 + 2/3 -- to be deleted 52040b57cec5SDimitry Andric /// reg({b,+,1}) 2/3 52050b57cec5SDimitry Andric /// Use3: 52060b57cec5SDimitry Andric /// reg(c) + reg(b) + reg({0,+,1}) 1 + 1/3 + 4/9 -- to be deleted 52070b57cec5SDimitry Andric /// reg(c) + reg({b,+,1}) 1 + 2/3 52080b57cec5SDimitry Andric void LSRInstance::NarrowSearchSpaceByDeletingCostlyFormulas() { 52090b57cec5SDimitry Andric if (EstimateSearchSpaceComplexity() < ComplexityLimit) 52100b57cec5SDimitry Andric return; 52110b57cec5SDimitry Andric // Ok, we have too many of formulae on our hands to conveniently handle. 52120b57cec5SDimitry Andric // Use a rough heuristic to thin out the list. 52130b57cec5SDimitry Andric 52140b57cec5SDimitry Andric // Set of Regs wich will be 100% used in final solution. 52150b57cec5SDimitry Andric // Used in each formula of a solution (in example above this is reg(c)). 52160b57cec5SDimitry Andric // We can skip them in calculations. 52170b57cec5SDimitry Andric SmallPtrSet<const SCEV *, 4> UniqRegs; 52180b57cec5SDimitry Andric LLVM_DEBUG(dbgs() << "The search space is too complex.\n"); 52190b57cec5SDimitry Andric 52200b57cec5SDimitry Andric // Map each register to probability of not selecting 52210b57cec5SDimitry Andric DenseMap <const SCEV *, float> RegNumMap; 52220b57cec5SDimitry Andric for (const SCEV *Reg : RegUses) { 52230b57cec5SDimitry Andric if (UniqRegs.count(Reg)) 52240b57cec5SDimitry Andric continue; 52250b57cec5SDimitry Andric float PNotSel = 1; 52260b57cec5SDimitry Andric for (const LSRUse &LU : Uses) { 52270b57cec5SDimitry Andric if (!LU.Regs.count(Reg)) 52280b57cec5SDimitry Andric continue; 52290b57cec5SDimitry Andric float P = LU.getNotSelectedProbability(Reg); 52300b57cec5SDimitry Andric if (P != 0.0) 52310b57cec5SDimitry Andric PNotSel *= P; 52320b57cec5SDimitry Andric else 52330b57cec5SDimitry Andric UniqRegs.insert(Reg); 52340b57cec5SDimitry Andric } 52350b57cec5SDimitry Andric RegNumMap.insert(std::make_pair(Reg, PNotSel)); 52360b57cec5SDimitry Andric } 52370b57cec5SDimitry Andric 52380b57cec5SDimitry Andric LLVM_DEBUG( 52390b57cec5SDimitry Andric dbgs() << "Narrowing the search space by deleting costly formulas\n"); 52400b57cec5SDimitry Andric 52410b57cec5SDimitry Andric // Delete formulas where registers number expectation is high. 52420b57cec5SDimitry Andric for (size_t LUIdx = 0, NumUses = Uses.size(); LUIdx != NumUses; ++LUIdx) { 52430b57cec5SDimitry Andric LSRUse &LU = Uses[LUIdx]; 52440b57cec5SDimitry Andric // If nothing to delete - continue. 52450b57cec5SDimitry Andric if (LU.Formulae.size() < 2) 52460b57cec5SDimitry Andric continue; 52470b57cec5SDimitry Andric // This is temporary solution to test performance. Float should be 52480b57cec5SDimitry Andric // replaced with round independent type (based on integers) to avoid 52490b57cec5SDimitry Andric // different results for different target builds. 52500b57cec5SDimitry Andric float FMinRegNum = LU.Formulae[0].getNumRegs(); 52510b57cec5SDimitry Andric float FMinARegNum = LU.Formulae[0].getNumRegs(); 52520b57cec5SDimitry Andric size_t MinIdx = 0; 52530b57cec5SDimitry Andric for (size_t i = 0, e = LU.Formulae.size(); i != e; ++i) { 52540b57cec5SDimitry Andric Formula &F = LU.Formulae[i]; 52550b57cec5SDimitry Andric float FRegNum = 0; 52560b57cec5SDimitry Andric float FARegNum = 0; 52570b57cec5SDimitry Andric for (const SCEV *BaseReg : F.BaseRegs) { 52580b57cec5SDimitry Andric if (UniqRegs.count(BaseReg)) 52590b57cec5SDimitry Andric continue; 52600b57cec5SDimitry Andric FRegNum += RegNumMap[BaseReg] / LU.getNotSelectedProbability(BaseReg); 52610b57cec5SDimitry Andric if (isa<SCEVAddRecExpr>(BaseReg)) 52620b57cec5SDimitry Andric FARegNum += 52630b57cec5SDimitry Andric RegNumMap[BaseReg] / LU.getNotSelectedProbability(BaseReg); 52640b57cec5SDimitry Andric } 52650b57cec5SDimitry Andric if (const SCEV *ScaledReg = F.ScaledReg) { 52660b57cec5SDimitry Andric if (!UniqRegs.count(ScaledReg)) { 52670b57cec5SDimitry Andric FRegNum += 52680b57cec5SDimitry Andric RegNumMap[ScaledReg] / LU.getNotSelectedProbability(ScaledReg); 52690b57cec5SDimitry Andric if (isa<SCEVAddRecExpr>(ScaledReg)) 52700b57cec5SDimitry Andric FARegNum += 52710b57cec5SDimitry Andric RegNumMap[ScaledReg] / LU.getNotSelectedProbability(ScaledReg); 52720b57cec5SDimitry Andric } 52730b57cec5SDimitry Andric } 52740b57cec5SDimitry Andric if (FMinRegNum > FRegNum || 52750b57cec5SDimitry Andric (FMinRegNum == FRegNum && FMinARegNum > FARegNum)) { 52760b57cec5SDimitry Andric FMinRegNum = FRegNum; 52770b57cec5SDimitry Andric FMinARegNum = FARegNum; 52780b57cec5SDimitry Andric MinIdx = i; 52790b57cec5SDimitry Andric } 52800b57cec5SDimitry Andric } 52810b57cec5SDimitry Andric LLVM_DEBUG(dbgs() << " The formula "; LU.Formulae[MinIdx].print(dbgs()); 52820b57cec5SDimitry Andric dbgs() << " with min reg num " << FMinRegNum << '\n'); 52830b57cec5SDimitry Andric if (MinIdx != 0) 52840b57cec5SDimitry Andric std::swap(LU.Formulae[MinIdx], LU.Formulae[0]); 52850b57cec5SDimitry Andric while (LU.Formulae.size() != 1) { 52860b57cec5SDimitry Andric LLVM_DEBUG(dbgs() << " Deleting "; LU.Formulae.back().print(dbgs()); 52870b57cec5SDimitry Andric dbgs() << '\n'); 52880b57cec5SDimitry Andric LU.Formulae.pop_back(); 52890b57cec5SDimitry Andric } 52900b57cec5SDimitry Andric LU.RecomputeRegs(LUIdx, RegUses); 52910b57cec5SDimitry Andric assert(LU.Formulae.size() == 1 && "Should be exactly 1 min regs formula"); 52920b57cec5SDimitry Andric Formula &F = LU.Formulae[0]; 52930b57cec5SDimitry Andric LLVM_DEBUG(dbgs() << " Leaving only "; F.print(dbgs()); dbgs() << '\n'); 52940b57cec5SDimitry Andric // When we choose the formula, the regs become unique. 52950b57cec5SDimitry Andric UniqRegs.insert(F.BaseRegs.begin(), F.BaseRegs.end()); 52960b57cec5SDimitry Andric if (F.ScaledReg) 52970b57cec5SDimitry Andric UniqRegs.insert(F.ScaledReg); 52980b57cec5SDimitry Andric } 52990b57cec5SDimitry Andric LLVM_DEBUG(dbgs() << "After pre-selection:\n"; print_uses(dbgs())); 53000b57cec5SDimitry Andric } 53010b57cec5SDimitry Andric 530206c3fb27SDimitry Andric // Check if Best and Reg are SCEVs separated by a constant amount C, and if so 530306c3fb27SDimitry Andric // would the addressing offset +C would be legal where the negative offset -C is 530406c3fb27SDimitry Andric // not. 530506c3fb27SDimitry Andric static bool IsSimplerBaseSCEVForTarget(const TargetTransformInfo &TTI, 530606c3fb27SDimitry Andric ScalarEvolution &SE, const SCEV *Best, 530706c3fb27SDimitry Andric const SCEV *Reg, 530806c3fb27SDimitry Andric MemAccessTy AccessType) { 530906c3fb27SDimitry Andric if (Best->getType() != Reg->getType() || 531006c3fb27SDimitry Andric (isa<SCEVAddRecExpr>(Best) && isa<SCEVAddRecExpr>(Reg) && 531106c3fb27SDimitry Andric cast<SCEVAddRecExpr>(Best)->getLoop() != 531206c3fb27SDimitry Andric cast<SCEVAddRecExpr>(Reg)->getLoop())) 531306c3fb27SDimitry Andric return false; 531406c3fb27SDimitry Andric const auto *Diff = dyn_cast<SCEVConstant>(SE.getMinusSCEV(Best, Reg)); 531506c3fb27SDimitry Andric if (!Diff) 531606c3fb27SDimitry Andric return false; 531706c3fb27SDimitry Andric 531806c3fb27SDimitry Andric return TTI.isLegalAddressingMode( 531906c3fb27SDimitry Andric AccessType.MemTy, /*BaseGV=*/nullptr, 532006c3fb27SDimitry Andric /*BaseOffset=*/Diff->getAPInt().getSExtValue(), 532106c3fb27SDimitry Andric /*HasBaseReg=*/true, /*Scale=*/0, AccessType.AddrSpace) && 532206c3fb27SDimitry Andric !TTI.isLegalAddressingMode( 532306c3fb27SDimitry Andric AccessType.MemTy, /*BaseGV=*/nullptr, 532406c3fb27SDimitry Andric /*BaseOffset=*/-Diff->getAPInt().getSExtValue(), 532506c3fb27SDimitry Andric /*HasBaseReg=*/true, /*Scale=*/0, AccessType.AddrSpace); 532606c3fb27SDimitry Andric } 532706c3fb27SDimitry Andric 53280b57cec5SDimitry Andric /// Pick a register which seems likely to be profitable, and then in any use 53290b57cec5SDimitry Andric /// which has any reference to that register, delete all formulae which do not 53300b57cec5SDimitry Andric /// reference that register. 53310b57cec5SDimitry Andric void LSRInstance::NarrowSearchSpaceByPickingWinnerRegs() { 53320b57cec5SDimitry Andric // With all other options exhausted, loop until the system is simple 53330b57cec5SDimitry Andric // enough to handle. 53340b57cec5SDimitry Andric SmallPtrSet<const SCEV *, 4> Taken; 53350b57cec5SDimitry Andric while (EstimateSearchSpaceComplexity() >= ComplexityLimit) { 53360b57cec5SDimitry Andric // Ok, we have too many of formulae on our hands to conveniently handle. 53370b57cec5SDimitry Andric // Use a rough heuristic to thin out the list. 53380b57cec5SDimitry Andric LLVM_DEBUG(dbgs() << "The search space is too complex.\n"); 53390b57cec5SDimitry Andric 53400b57cec5SDimitry Andric // Pick the register which is used by the most LSRUses, which is likely 53410b57cec5SDimitry Andric // to be a good reuse register candidate. 53420b57cec5SDimitry Andric const SCEV *Best = nullptr; 53430b57cec5SDimitry Andric unsigned BestNum = 0; 53440b57cec5SDimitry Andric for (const SCEV *Reg : RegUses) { 53450b57cec5SDimitry Andric if (Taken.count(Reg)) 53460b57cec5SDimitry Andric continue; 53470b57cec5SDimitry Andric if (!Best) { 53480b57cec5SDimitry Andric Best = Reg; 53490b57cec5SDimitry Andric BestNum = RegUses.getUsedByIndices(Reg).count(); 53500b57cec5SDimitry Andric } else { 53510b57cec5SDimitry Andric unsigned Count = RegUses.getUsedByIndices(Reg).count(); 53520b57cec5SDimitry Andric if (Count > BestNum) { 53530b57cec5SDimitry Andric Best = Reg; 53540b57cec5SDimitry Andric BestNum = Count; 53550b57cec5SDimitry Andric } 535606c3fb27SDimitry Andric 535706c3fb27SDimitry Andric // If the scores are the same, but the Reg is simpler for the target 535806c3fb27SDimitry Andric // (for example {x,+,1} as opposed to {x+C,+,1}, where the target can 535906c3fb27SDimitry Andric // handle +C but not -C), opt for the simpler formula. 536006c3fb27SDimitry Andric if (Count == BestNum) { 536106c3fb27SDimitry Andric int LUIdx = RegUses.getUsedByIndices(Reg).find_first(); 536206c3fb27SDimitry Andric if (LUIdx >= 0 && Uses[LUIdx].Kind == LSRUse::Address && 536306c3fb27SDimitry Andric IsSimplerBaseSCEVForTarget(TTI, SE, Best, Reg, 536406c3fb27SDimitry Andric Uses[LUIdx].AccessTy)) { 536506c3fb27SDimitry Andric Best = Reg; 536606c3fb27SDimitry Andric BestNum = Count; 536706c3fb27SDimitry Andric } 536806c3fb27SDimitry Andric } 53690b57cec5SDimitry Andric } 53700b57cec5SDimitry Andric } 53718bcb0991SDimitry Andric assert(Best && "Failed to find best LSRUse candidate"); 53720b57cec5SDimitry Andric 53730b57cec5SDimitry Andric LLVM_DEBUG(dbgs() << "Narrowing the search space by assuming " << *Best 53740b57cec5SDimitry Andric << " will yield profitable reuse.\n"); 53750b57cec5SDimitry Andric Taken.insert(Best); 53760b57cec5SDimitry Andric 53770b57cec5SDimitry Andric // In any use with formulae which references this register, delete formulae 53780b57cec5SDimitry Andric // which don't reference it. 53790b57cec5SDimitry Andric for (size_t LUIdx = 0, NumUses = Uses.size(); LUIdx != NumUses; ++LUIdx) { 53800b57cec5SDimitry Andric LSRUse &LU = Uses[LUIdx]; 53810b57cec5SDimitry Andric if (!LU.Regs.count(Best)) continue; 53820b57cec5SDimitry Andric 53830b57cec5SDimitry Andric bool Any = false; 53840b57cec5SDimitry Andric for (size_t i = 0, e = LU.Formulae.size(); i != e; ++i) { 53850b57cec5SDimitry Andric Formula &F = LU.Formulae[i]; 53860b57cec5SDimitry Andric if (!F.referencesReg(Best)) { 53870b57cec5SDimitry Andric LLVM_DEBUG(dbgs() << " Deleting "; F.print(dbgs()); dbgs() << '\n'); 53880b57cec5SDimitry Andric LU.DeleteFormula(F); 53890b57cec5SDimitry Andric --e; 53900b57cec5SDimitry Andric --i; 53910b57cec5SDimitry Andric Any = true; 53920b57cec5SDimitry Andric assert(e != 0 && "Use has no formulae left! Is Regs inconsistent?"); 53930b57cec5SDimitry Andric continue; 53940b57cec5SDimitry Andric } 53950b57cec5SDimitry Andric } 53960b57cec5SDimitry Andric 53970b57cec5SDimitry Andric if (Any) 53980b57cec5SDimitry Andric LU.RecomputeRegs(LUIdx, RegUses); 53990b57cec5SDimitry Andric } 54000b57cec5SDimitry Andric 54010b57cec5SDimitry Andric LLVM_DEBUG(dbgs() << "After pre-selection:\n"; print_uses(dbgs())); 54020b57cec5SDimitry Andric } 54030b57cec5SDimitry Andric } 54040b57cec5SDimitry Andric 54050b57cec5SDimitry Andric /// If there are an extraordinary number of formulae to choose from, use some 54060b57cec5SDimitry Andric /// rough heuristics to prune down the number of formulae. This keeps the main 54070b57cec5SDimitry Andric /// solver from taking an extraordinary amount of time in some worst-case 54080b57cec5SDimitry Andric /// scenarios. 54090b57cec5SDimitry Andric void LSRInstance::NarrowSearchSpaceUsingHeuristics() { 54100b57cec5SDimitry Andric NarrowSearchSpaceByDetectingSupersets(); 54110b57cec5SDimitry Andric NarrowSearchSpaceByCollapsingUnrolledCode(); 54120b57cec5SDimitry Andric NarrowSearchSpaceByRefilteringUndesirableDedicatedRegisters(); 54130b57cec5SDimitry Andric if (FilterSameScaledReg) 54140b57cec5SDimitry Andric NarrowSearchSpaceByFilterFormulaWithSameScaledReg(); 54155ffd83dbSDimitry Andric NarrowSearchSpaceByFilterPostInc(); 54160b57cec5SDimitry Andric if (LSRExpNarrow) 54170b57cec5SDimitry Andric NarrowSearchSpaceByDeletingCostlyFormulas(); 54180b57cec5SDimitry Andric else 54190b57cec5SDimitry Andric NarrowSearchSpaceByPickingWinnerRegs(); 54200b57cec5SDimitry Andric } 54210b57cec5SDimitry Andric 54220b57cec5SDimitry Andric /// This is the recursive solver. 54230b57cec5SDimitry Andric void LSRInstance::SolveRecurse(SmallVectorImpl<const Formula *> &Solution, 54240b57cec5SDimitry Andric Cost &SolutionCost, 54250b57cec5SDimitry Andric SmallVectorImpl<const Formula *> &Workspace, 54260b57cec5SDimitry Andric const Cost &CurCost, 54270b57cec5SDimitry Andric const SmallPtrSet<const SCEV *, 16> &CurRegs, 54280b57cec5SDimitry Andric DenseSet<const SCEV *> &VisitedRegs) const { 54290b57cec5SDimitry Andric // Some ideas: 54300b57cec5SDimitry Andric // - prune more: 54310b57cec5SDimitry Andric // - use more aggressive filtering 54320b57cec5SDimitry Andric // - sort the formula so that the most profitable solutions are found first 54330b57cec5SDimitry Andric // - sort the uses too 54340b57cec5SDimitry Andric // - search faster: 54350b57cec5SDimitry Andric // - don't compute a cost, and then compare. compare while computing a cost 54360b57cec5SDimitry Andric // and bail early. 54370b57cec5SDimitry Andric // - track register sets with SmallBitVector 54380b57cec5SDimitry Andric 54390b57cec5SDimitry Andric const LSRUse &LU = Uses[Workspace.size()]; 54400b57cec5SDimitry Andric 54410b57cec5SDimitry Andric // If this use references any register that's already a part of the 54420b57cec5SDimitry Andric // in-progress solution, consider it a requirement that a formula must 54430b57cec5SDimitry Andric // reference that register in order to be considered. This prunes out 54440b57cec5SDimitry Andric // unprofitable searching. 54450b57cec5SDimitry Andric SmallSetVector<const SCEV *, 4> ReqRegs; 54460b57cec5SDimitry Andric for (const SCEV *S : CurRegs) 54470b57cec5SDimitry Andric if (LU.Regs.count(S)) 54480b57cec5SDimitry Andric ReqRegs.insert(S); 54490b57cec5SDimitry Andric 54500b57cec5SDimitry Andric SmallPtrSet<const SCEV *, 16> NewRegs; 5451fe6060f1SDimitry Andric Cost NewCost(L, SE, TTI, AMK); 54520b57cec5SDimitry Andric for (const Formula &F : LU.Formulae) { 54530b57cec5SDimitry Andric // Ignore formulae which may not be ideal in terms of register reuse of 54540b57cec5SDimitry Andric // ReqRegs. The formula should use all required registers before 54550b57cec5SDimitry Andric // introducing new ones. 54565ffd83dbSDimitry Andric // This can sometimes (notably when trying to favour postinc) lead to 54575ffd83dbSDimitry Andric // sub-optimial decisions. There it is best left to the cost modelling to 54585ffd83dbSDimitry Andric // get correct. 5459fe6060f1SDimitry Andric if (AMK != TTI::AMK_PostIndexed || LU.Kind != LSRUse::Address) { 54600b57cec5SDimitry Andric int NumReqRegsToFind = std::min(F.getNumRegs(), ReqRegs.size()); 54610b57cec5SDimitry Andric for (const SCEV *Reg : ReqRegs) { 54620b57cec5SDimitry Andric if ((F.ScaledReg && F.ScaledReg == Reg) || 54630b57cec5SDimitry Andric is_contained(F.BaseRegs, Reg)) { 54640b57cec5SDimitry Andric --NumReqRegsToFind; 54650b57cec5SDimitry Andric if (NumReqRegsToFind == 0) 54660b57cec5SDimitry Andric break; 54670b57cec5SDimitry Andric } 54680b57cec5SDimitry Andric } 54690b57cec5SDimitry Andric if (NumReqRegsToFind != 0) { 54700b57cec5SDimitry Andric // If none of the formulae satisfied the required registers, then we could 54710b57cec5SDimitry Andric // clear ReqRegs and try again. Currently, we simply give up in this case. 54720b57cec5SDimitry Andric continue; 54730b57cec5SDimitry Andric } 54745ffd83dbSDimitry Andric } 54750b57cec5SDimitry Andric 54760b57cec5SDimitry Andric // Evaluate the cost of the current formula. If it's already worse than 54770b57cec5SDimitry Andric // the current best, prune the search at that point. 54780b57cec5SDimitry Andric NewCost = CurCost; 54790b57cec5SDimitry Andric NewRegs = CurRegs; 54800b57cec5SDimitry Andric NewCost.RateFormula(F, NewRegs, VisitedRegs, LU); 54810b57cec5SDimitry Andric if (NewCost.isLess(SolutionCost)) { 54820b57cec5SDimitry Andric Workspace.push_back(&F); 54830b57cec5SDimitry Andric if (Workspace.size() != Uses.size()) { 54840b57cec5SDimitry Andric SolveRecurse(Solution, SolutionCost, Workspace, NewCost, 54850b57cec5SDimitry Andric NewRegs, VisitedRegs); 54860b57cec5SDimitry Andric if (F.getNumRegs() == 1 && Workspace.size() == 1) 54870b57cec5SDimitry Andric VisitedRegs.insert(F.ScaledReg ? F.ScaledReg : F.BaseRegs[0]); 54880b57cec5SDimitry Andric } else { 54890b57cec5SDimitry Andric LLVM_DEBUG(dbgs() << "New best at "; NewCost.print(dbgs()); 54900b57cec5SDimitry Andric dbgs() << ".\nRegs:\n"; 54910b57cec5SDimitry Andric for (const SCEV *S : NewRegs) dbgs() 54920b57cec5SDimitry Andric << "- " << *S << "\n"; 54930b57cec5SDimitry Andric dbgs() << '\n'); 54940b57cec5SDimitry Andric 54950b57cec5SDimitry Andric SolutionCost = NewCost; 54960b57cec5SDimitry Andric Solution = Workspace; 54970b57cec5SDimitry Andric } 54980b57cec5SDimitry Andric Workspace.pop_back(); 54990b57cec5SDimitry Andric } 55000b57cec5SDimitry Andric } 55010b57cec5SDimitry Andric } 55020b57cec5SDimitry Andric 55030b57cec5SDimitry Andric /// Choose one formula from each use. Return the results in the given Solution 55040b57cec5SDimitry Andric /// vector. 55050b57cec5SDimitry Andric void LSRInstance::Solve(SmallVectorImpl<const Formula *> &Solution) const { 55060b57cec5SDimitry Andric SmallVector<const Formula *, 8> Workspace; 5507fe6060f1SDimitry Andric Cost SolutionCost(L, SE, TTI, AMK); 55080b57cec5SDimitry Andric SolutionCost.Lose(); 5509fe6060f1SDimitry Andric Cost CurCost(L, SE, TTI, AMK); 55100b57cec5SDimitry Andric SmallPtrSet<const SCEV *, 16> CurRegs; 55110b57cec5SDimitry Andric DenseSet<const SCEV *> VisitedRegs; 55120b57cec5SDimitry Andric Workspace.reserve(Uses.size()); 55130b57cec5SDimitry Andric 55140b57cec5SDimitry Andric // SolveRecurse does all the work. 55150b57cec5SDimitry Andric SolveRecurse(Solution, SolutionCost, Workspace, CurCost, 55160b57cec5SDimitry Andric CurRegs, VisitedRegs); 55170b57cec5SDimitry Andric if (Solution.empty()) { 55180b57cec5SDimitry Andric LLVM_DEBUG(dbgs() << "\nNo Satisfactory Solution\n"); 55190b57cec5SDimitry Andric return; 55200b57cec5SDimitry Andric } 55210b57cec5SDimitry Andric 55220b57cec5SDimitry Andric // Ok, we've now made all our decisions. 55230b57cec5SDimitry Andric LLVM_DEBUG(dbgs() << "\n" 55240b57cec5SDimitry Andric "The chosen solution requires "; 55250b57cec5SDimitry Andric SolutionCost.print(dbgs()); dbgs() << ":\n"; 55260b57cec5SDimitry Andric for (size_t i = 0, e = Uses.size(); i != e; ++i) { 55270b57cec5SDimitry Andric dbgs() << " "; 55280b57cec5SDimitry Andric Uses[i].print(dbgs()); 55290b57cec5SDimitry Andric dbgs() << "\n" 55300b57cec5SDimitry Andric " "; 55310b57cec5SDimitry Andric Solution[i]->print(dbgs()); 55320b57cec5SDimitry Andric dbgs() << '\n'; 55330b57cec5SDimitry Andric }); 55340b57cec5SDimitry Andric 55350b57cec5SDimitry Andric assert(Solution.size() == Uses.size() && "Malformed solution!"); 5536bdd1243dSDimitry Andric 55370fca6ea1SDimitry Andric const bool EnableDropUnprofitableSolution = [&] { 55380fca6ea1SDimitry Andric switch (AllowDropSolutionIfLessProfitable) { 55390fca6ea1SDimitry Andric case cl::BOU_TRUE: 55400fca6ea1SDimitry Andric return true; 55410fca6ea1SDimitry Andric case cl::BOU_FALSE: 55420fca6ea1SDimitry Andric return false; 55430fca6ea1SDimitry Andric case cl::BOU_UNSET: 55440fca6ea1SDimitry Andric return TTI.shouldDropLSRSolutionIfLessProfitable(); 55450fca6ea1SDimitry Andric } 55460fca6ea1SDimitry Andric llvm_unreachable("Unhandled cl::boolOrDefault enum"); 55470fca6ea1SDimitry Andric }(); 55480fca6ea1SDimitry Andric 5549bdd1243dSDimitry Andric if (BaselineCost.isLess(SolutionCost)) { 55500fca6ea1SDimitry Andric if (!EnableDropUnprofitableSolution) 5551bdd1243dSDimitry Andric LLVM_DEBUG( 5552bdd1243dSDimitry Andric dbgs() << "Baseline is more profitable than chosen solution, " 5553bdd1243dSDimitry Andric "add option 'lsr-drop-solution' to drop LSR solution.\n"); 5554bdd1243dSDimitry Andric else { 5555bdd1243dSDimitry Andric LLVM_DEBUG(dbgs() << "Baseline is more profitable than chosen " 5556bdd1243dSDimitry Andric "solution, dropping LSR solution.\n";); 5557bdd1243dSDimitry Andric Solution.clear(); 5558bdd1243dSDimitry Andric } 5559bdd1243dSDimitry Andric } 55600b57cec5SDimitry Andric } 55610b57cec5SDimitry Andric 55620b57cec5SDimitry Andric /// Helper for AdjustInsertPositionForExpand. Climb up the dominator tree far as 55630b57cec5SDimitry Andric /// we can go while still being dominated by the input positions. This helps 55640b57cec5SDimitry Andric /// canonicalize the insert position, which encourages sharing. 55650b57cec5SDimitry Andric BasicBlock::iterator 55660b57cec5SDimitry Andric LSRInstance::HoistInsertPosition(BasicBlock::iterator IP, 55670b57cec5SDimitry Andric const SmallVectorImpl<Instruction *> &Inputs) 55680b57cec5SDimitry Andric const { 55690b57cec5SDimitry Andric Instruction *Tentative = &*IP; 55700b57cec5SDimitry Andric while (true) { 55710b57cec5SDimitry Andric bool AllDominate = true; 55720b57cec5SDimitry Andric Instruction *BetterPos = nullptr; 55730b57cec5SDimitry Andric // Don't bother attempting to insert before a catchswitch, their basic block 55740b57cec5SDimitry Andric // cannot have other non-PHI instructions. 55750b57cec5SDimitry Andric if (isa<CatchSwitchInst>(Tentative)) 55760b57cec5SDimitry Andric return IP; 55770b57cec5SDimitry Andric 55780b57cec5SDimitry Andric for (Instruction *Inst : Inputs) { 55790b57cec5SDimitry Andric if (Inst == Tentative || !DT.dominates(Inst, Tentative)) { 55800b57cec5SDimitry Andric AllDominate = false; 55810b57cec5SDimitry Andric break; 55820b57cec5SDimitry Andric } 55830b57cec5SDimitry Andric // Attempt to find an insert position in the middle of the block, 55840b57cec5SDimitry Andric // instead of at the end, so that it can be used for other expansions. 55850b57cec5SDimitry Andric if (Tentative->getParent() == Inst->getParent() && 55860b57cec5SDimitry Andric (!BetterPos || !DT.dominates(Inst, BetterPos))) 55870b57cec5SDimitry Andric BetterPos = &*std::next(BasicBlock::iterator(Inst)); 55880b57cec5SDimitry Andric } 55890b57cec5SDimitry Andric if (!AllDominate) 55900b57cec5SDimitry Andric break; 55910b57cec5SDimitry Andric if (BetterPos) 55920b57cec5SDimitry Andric IP = BetterPos->getIterator(); 55930b57cec5SDimitry Andric else 55940b57cec5SDimitry Andric IP = Tentative->getIterator(); 55950b57cec5SDimitry Andric 55960b57cec5SDimitry Andric const Loop *IPLoop = LI.getLoopFor(IP->getParent()); 55970b57cec5SDimitry Andric unsigned IPLoopDepth = IPLoop ? IPLoop->getLoopDepth() : 0; 55980b57cec5SDimitry Andric 55990b57cec5SDimitry Andric BasicBlock *IDom; 56000b57cec5SDimitry Andric for (DomTreeNode *Rung = DT.getNode(IP->getParent()); ; ) { 56010b57cec5SDimitry Andric if (!Rung) return IP; 56020b57cec5SDimitry Andric Rung = Rung->getIDom(); 56030b57cec5SDimitry Andric if (!Rung) return IP; 56040b57cec5SDimitry Andric IDom = Rung->getBlock(); 56050b57cec5SDimitry Andric 56060b57cec5SDimitry Andric // Don't climb into a loop though. 56070b57cec5SDimitry Andric const Loop *IDomLoop = LI.getLoopFor(IDom); 56080b57cec5SDimitry Andric unsigned IDomDepth = IDomLoop ? IDomLoop->getLoopDepth() : 0; 56090b57cec5SDimitry Andric if (IDomDepth <= IPLoopDepth && 56100b57cec5SDimitry Andric (IDomDepth != IPLoopDepth || IDomLoop == IPLoop)) 56110b57cec5SDimitry Andric break; 56120b57cec5SDimitry Andric } 56130b57cec5SDimitry Andric 56140b57cec5SDimitry Andric Tentative = IDom->getTerminator(); 56150b57cec5SDimitry Andric } 56160b57cec5SDimitry Andric 56170b57cec5SDimitry Andric return IP; 56180b57cec5SDimitry Andric } 56190b57cec5SDimitry Andric 56200b57cec5SDimitry Andric /// Determine an input position which will be dominated by the operands and 56210b57cec5SDimitry Andric /// which will dominate the result. 5622fcaf7f86SDimitry Andric BasicBlock::iterator LSRInstance::AdjustInsertPositionForExpand( 5623fcaf7f86SDimitry Andric BasicBlock::iterator LowestIP, const LSRFixup &LF, const LSRUse &LU) const { 56240b57cec5SDimitry Andric // Collect some instructions which must be dominated by the 56250b57cec5SDimitry Andric // expanding replacement. These must be dominated by any operands that 56260b57cec5SDimitry Andric // will be required in the expansion. 56270b57cec5SDimitry Andric SmallVector<Instruction *, 4> Inputs; 56280b57cec5SDimitry Andric if (Instruction *I = dyn_cast<Instruction>(LF.OperandValToReplace)) 56290b57cec5SDimitry Andric Inputs.push_back(I); 56300b57cec5SDimitry Andric if (LU.Kind == LSRUse::ICmpZero) 56310b57cec5SDimitry Andric if (Instruction *I = 56320b57cec5SDimitry Andric dyn_cast<Instruction>(cast<ICmpInst>(LF.UserInst)->getOperand(1))) 56330b57cec5SDimitry Andric Inputs.push_back(I); 56340b57cec5SDimitry Andric if (LF.PostIncLoops.count(L)) { 56350b57cec5SDimitry Andric if (LF.isUseFullyOutsideLoop(L)) 56360b57cec5SDimitry Andric Inputs.push_back(L->getLoopLatch()->getTerminator()); 56370b57cec5SDimitry Andric else 56380b57cec5SDimitry Andric Inputs.push_back(IVIncInsertPos); 56390b57cec5SDimitry Andric } 56400b57cec5SDimitry Andric // The expansion must also be dominated by the increment positions of any 56410b57cec5SDimitry Andric // loops it for which it is using post-inc mode. 56420b57cec5SDimitry Andric for (const Loop *PIL : LF.PostIncLoops) { 56430b57cec5SDimitry Andric if (PIL == L) continue; 56440b57cec5SDimitry Andric 56450b57cec5SDimitry Andric // Be dominated by the loop exit. 56460b57cec5SDimitry Andric SmallVector<BasicBlock *, 4> ExitingBlocks; 56470b57cec5SDimitry Andric PIL->getExitingBlocks(ExitingBlocks); 56480b57cec5SDimitry Andric if (!ExitingBlocks.empty()) { 56490b57cec5SDimitry Andric BasicBlock *BB = ExitingBlocks[0]; 56500b57cec5SDimitry Andric for (unsigned i = 1, e = ExitingBlocks.size(); i != e; ++i) 56510b57cec5SDimitry Andric BB = DT.findNearestCommonDominator(BB, ExitingBlocks[i]); 56520b57cec5SDimitry Andric Inputs.push_back(BB->getTerminator()); 56530b57cec5SDimitry Andric } 56540b57cec5SDimitry Andric } 56550b57cec5SDimitry Andric 56560b57cec5SDimitry Andric assert(!isa<PHINode>(LowestIP) && !LowestIP->isEHPad() 56570b57cec5SDimitry Andric && !isa<DbgInfoIntrinsic>(LowestIP) && 56580b57cec5SDimitry Andric "Insertion point must be a normal instruction"); 56590b57cec5SDimitry Andric 56600b57cec5SDimitry Andric // Then, climb up the immediate dominator tree as far as we can go while 56610b57cec5SDimitry Andric // still being dominated by the input positions. 56620b57cec5SDimitry Andric BasicBlock::iterator IP = HoistInsertPosition(LowestIP, Inputs); 56630b57cec5SDimitry Andric 56640b57cec5SDimitry Andric // Don't insert instructions before PHI nodes. 56650b57cec5SDimitry Andric while (isa<PHINode>(IP)) ++IP; 56660b57cec5SDimitry Andric 56670b57cec5SDimitry Andric // Ignore landingpad instructions. 56680b57cec5SDimitry Andric while (IP->isEHPad()) ++IP; 56690b57cec5SDimitry Andric 56700b57cec5SDimitry Andric // Ignore debug intrinsics. 56710b57cec5SDimitry Andric while (isa<DbgInfoIntrinsic>(IP)) ++IP; 56720b57cec5SDimitry Andric 56730b57cec5SDimitry Andric // Set IP below instructions recently inserted by SCEVExpander. This keeps the 56740b57cec5SDimitry Andric // IP consistent across expansions and allows the previously inserted 56750b57cec5SDimitry Andric // instructions to be reused by subsequent expansion. 56760b57cec5SDimitry Andric while (Rewriter.isInsertedInstruction(&*IP) && IP != LowestIP) 56770b57cec5SDimitry Andric ++IP; 56780b57cec5SDimitry Andric 56790b57cec5SDimitry Andric return IP; 56800b57cec5SDimitry Andric } 56810b57cec5SDimitry Andric 56820b57cec5SDimitry Andric /// Emit instructions for the leading candidate expression for this LSRUse (this 56830b57cec5SDimitry Andric /// is called "expanding"). 56840b57cec5SDimitry Andric Value *LSRInstance::Expand(const LSRUse &LU, const LSRFixup &LF, 56850b57cec5SDimitry Andric const Formula &F, BasicBlock::iterator IP, 56860b57cec5SDimitry Andric SmallVectorImpl<WeakTrackingVH> &DeadInsts) const { 56870b57cec5SDimitry Andric if (LU.RigidFormula) 56880b57cec5SDimitry Andric return LF.OperandValToReplace; 56890b57cec5SDimitry Andric 56900b57cec5SDimitry Andric // Determine an input position which will be dominated by the operands and 56910b57cec5SDimitry Andric // which will dominate the result. 5692fcaf7f86SDimitry Andric IP = AdjustInsertPositionForExpand(IP, LF, LU); 56930b57cec5SDimitry Andric Rewriter.setInsertPoint(&*IP); 56940b57cec5SDimitry Andric 56950b57cec5SDimitry Andric // Inform the Rewriter if we have a post-increment use, so that it can 56960b57cec5SDimitry Andric // perform an advantageous expansion. 56970b57cec5SDimitry Andric Rewriter.setPostInc(LF.PostIncLoops); 56980b57cec5SDimitry Andric 56990b57cec5SDimitry Andric // This is the type that the user actually needs. 57000b57cec5SDimitry Andric Type *OpTy = LF.OperandValToReplace->getType(); 57010b57cec5SDimitry Andric // This will be the type that we'll initially expand to. 57020b57cec5SDimitry Andric Type *Ty = F.getType(); 57030b57cec5SDimitry Andric if (!Ty) 57040b57cec5SDimitry Andric // No type known; just expand directly to the ultimate type. 57050b57cec5SDimitry Andric Ty = OpTy; 57060b57cec5SDimitry Andric else if (SE.getEffectiveSCEVType(Ty) == SE.getEffectiveSCEVType(OpTy)) 57070b57cec5SDimitry Andric // Expand directly to the ultimate type if it's the right size. 57080b57cec5SDimitry Andric Ty = OpTy; 57090b57cec5SDimitry Andric // This is the type to do integer arithmetic in. 57100b57cec5SDimitry Andric Type *IntTy = SE.getEffectiveSCEVType(Ty); 57110b57cec5SDimitry Andric 57120b57cec5SDimitry Andric // Build up a list of operands to add together to form the full base. 57130b57cec5SDimitry Andric SmallVector<const SCEV *, 8> Ops; 57140b57cec5SDimitry Andric 57150b57cec5SDimitry Andric // Expand the BaseRegs portion. 57160b57cec5SDimitry Andric for (const SCEV *Reg : F.BaseRegs) { 57170b57cec5SDimitry Andric assert(!Reg->isZero() && "Zero allocated in a base register!"); 57180b57cec5SDimitry Andric 57190b57cec5SDimitry Andric // If we're expanding for a post-inc user, make the post-inc adjustment. 57200b57cec5SDimitry Andric Reg = denormalizeForPostIncUse(Reg, LF.PostIncLoops, SE); 57210b57cec5SDimitry Andric Ops.push_back(SE.getUnknown(Rewriter.expandCodeFor(Reg, nullptr))); 57220b57cec5SDimitry Andric } 57230b57cec5SDimitry Andric 57240b57cec5SDimitry Andric // Expand the ScaledReg portion. 57250b57cec5SDimitry Andric Value *ICmpScaledV = nullptr; 57260b57cec5SDimitry Andric if (F.Scale != 0) { 57270b57cec5SDimitry Andric const SCEV *ScaledS = F.ScaledReg; 57280b57cec5SDimitry Andric 57290b57cec5SDimitry Andric // If we're expanding for a post-inc user, make the post-inc adjustment. 57300b57cec5SDimitry Andric PostIncLoopSet &Loops = const_cast<PostIncLoopSet &>(LF.PostIncLoops); 57310b57cec5SDimitry Andric ScaledS = denormalizeForPostIncUse(ScaledS, Loops, SE); 57320b57cec5SDimitry Andric 57330b57cec5SDimitry Andric if (LU.Kind == LSRUse::ICmpZero) { 57340b57cec5SDimitry Andric // Expand ScaleReg as if it was part of the base regs. 57350b57cec5SDimitry Andric if (F.Scale == 1) 57360b57cec5SDimitry Andric Ops.push_back( 57370b57cec5SDimitry Andric SE.getUnknown(Rewriter.expandCodeFor(ScaledS, nullptr))); 57380b57cec5SDimitry Andric else { 57390b57cec5SDimitry Andric // An interesting way of "folding" with an icmp is to use a negated 57400b57cec5SDimitry Andric // scale, which we'll implement by inserting it into the other operand 57410b57cec5SDimitry Andric // of the icmp. 57420b57cec5SDimitry Andric assert(F.Scale == -1 && 57430b57cec5SDimitry Andric "The only scale supported by ICmpZero uses is -1!"); 57440b57cec5SDimitry Andric ICmpScaledV = Rewriter.expandCodeFor(ScaledS, nullptr); 57450b57cec5SDimitry Andric } 57460b57cec5SDimitry Andric } else { 57470b57cec5SDimitry Andric // Otherwise just expand the scaled register and an explicit scale, 57480b57cec5SDimitry Andric // which is expected to be matched as part of the address. 57490b57cec5SDimitry Andric 57500b57cec5SDimitry Andric // Flush the operand list to suppress SCEVExpander hoisting address modes. 57510b57cec5SDimitry Andric // Unless the addressing mode will not be folded. 57520b57cec5SDimitry Andric if (!Ops.empty() && LU.Kind == LSRUse::Address && 57530b57cec5SDimitry Andric isAMCompletelyFolded(TTI, LU, F)) { 57540b57cec5SDimitry Andric Value *FullV = Rewriter.expandCodeFor(SE.getAddExpr(Ops), nullptr); 57550b57cec5SDimitry Andric Ops.clear(); 57560b57cec5SDimitry Andric Ops.push_back(SE.getUnknown(FullV)); 57570b57cec5SDimitry Andric } 57580b57cec5SDimitry Andric ScaledS = SE.getUnknown(Rewriter.expandCodeFor(ScaledS, nullptr)); 57590b57cec5SDimitry Andric if (F.Scale != 1) 57600b57cec5SDimitry Andric ScaledS = 57610b57cec5SDimitry Andric SE.getMulExpr(ScaledS, SE.getConstant(ScaledS->getType(), F.Scale)); 57620b57cec5SDimitry Andric Ops.push_back(ScaledS); 57630b57cec5SDimitry Andric } 57640b57cec5SDimitry Andric } 57650b57cec5SDimitry Andric 57660b57cec5SDimitry Andric // Expand the GV portion. 57670b57cec5SDimitry Andric if (F.BaseGV) { 57680b57cec5SDimitry Andric // Flush the operand list to suppress SCEVExpander hoisting. 57690b57cec5SDimitry Andric if (!Ops.empty()) { 5770fe6060f1SDimitry Andric Value *FullV = Rewriter.expandCodeFor(SE.getAddExpr(Ops), IntTy); 57710b57cec5SDimitry Andric Ops.clear(); 57720b57cec5SDimitry Andric Ops.push_back(SE.getUnknown(FullV)); 57730b57cec5SDimitry Andric } 57740b57cec5SDimitry Andric Ops.push_back(SE.getUnknown(F.BaseGV)); 57750b57cec5SDimitry Andric } 57760b57cec5SDimitry Andric 57770b57cec5SDimitry Andric // Flush the operand list to suppress SCEVExpander hoisting of both folded and 57780b57cec5SDimitry Andric // unfolded offsets. LSR assumes they both live next to their uses. 57790b57cec5SDimitry Andric if (!Ops.empty()) { 57800b57cec5SDimitry Andric Value *FullV = Rewriter.expandCodeFor(SE.getAddExpr(Ops), Ty); 57810b57cec5SDimitry Andric Ops.clear(); 57820b57cec5SDimitry Andric Ops.push_back(SE.getUnknown(FullV)); 57830b57cec5SDimitry Andric } 57840b57cec5SDimitry Andric 57850fca6ea1SDimitry Andric // FIXME: Are we sure we won't get a mismatch here? Is there a way to bail 57860fca6ea1SDimitry Andric // out at this point, or should we generate a SCEV adding together mixed 57870fca6ea1SDimitry Andric // offsets? 57880fca6ea1SDimitry Andric assert(F.BaseOffset.isCompatibleImmediate(LF.Offset) && 57890fca6ea1SDimitry Andric "Expanding mismatched offsets\n"); 57900b57cec5SDimitry Andric // Expand the immediate portion. 57910fca6ea1SDimitry Andric Immediate Offset = F.BaseOffset.addUnsigned(LF.Offset); 57920fca6ea1SDimitry Andric if (Offset.isNonZero()) { 57930b57cec5SDimitry Andric if (LU.Kind == LSRUse::ICmpZero) { 57940b57cec5SDimitry Andric // The other interesting way of "folding" with an ICmpZero is to use a 57950b57cec5SDimitry Andric // negated immediate. 57960b57cec5SDimitry Andric if (!ICmpScaledV) 57970fca6ea1SDimitry Andric ICmpScaledV = 57980fca6ea1SDimitry Andric ConstantInt::get(IntTy, -(uint64_t)Offset.getFixedValue()); 57990b57cec5SDimitry Andric else { 58000b57cec5SDimitry Andric Ops.push_back(SE.getUnknown(ICmpScaledV)); 58010fca6ea1SDimitry Andric ICmpScaledV = ConstantInt::get(IntTy, Offset.getFixedValue()); 58020b57cec5SDimitry Andric } 58030b57cec5SDimitry Andric } else { 58040b57cec5SDimitry Andric // Just add the immediate values. These again are expected to be matched 58050b57cec5SDimitry Andric // as part of the address. 58060fca6ea1SDimitry Andric Ops.push_back(Offset.getUnknownSCEV(SE, IntTy)); 58070b57cec5SDimitry Andric } 58080b57cec5SDimitry Andric } 58090b57cec5SDimitry Andric 58100b57cec5SDimitry Andric // Expand the unfolded offset portion. 58110fca6ea1SDimitry Andric Immediate UnfoldedOffset = F.UnfoldedOffset; 58120fca6ea1SDimitry Andric if (UnfoldedOffset.isNonZero()) { 58130b57cec5SDimitry Andric // Just add the immediate values. 58140fca6ea1SDimitry Andric Ops.push_back(UnfoldedOffset.getUnknownSCEV(SE, IntTy)); 58150b57cec5SDimitry Andric } 58160b57cec5SDimitry Andric 58170b57cec5SDimitry Andric // Emit instructions summing all the operands. 58180b57cec5SDimitry Andric const SCEV *FullS = Ops.empty() ? 58190b57cec5SDimitry Andric SE.getConstant(IntTy, 0) : 58200b57cec5SDimitry Andric SE.getAddExpr(Ops); 58210b57cec5SDimitry Andric Value *FullV = Rewriter.expandCodeFor(FullS, Ty); 58220b57cec5SDimitry Andric 58230b57cec5SDimitry Andric // We're done expanding now, so reset the rewriter. 58240b57cec5SDimitry Andric Rewriter.clearPostInc(); 58250b57cec5SDimitry Andric 58260b57cec5SDimitry Andric // An ICmpZero Formula represents an ICmp which we're handling as a 58270b57cec5SDimitry Andric // comparison against zero. Now that we've expanded an expression for that 58280b57cec5SDimitry Andric // form, update the ICmp's other operand. 58290b57cec5SDimitry Andric if (LU.Kind == LSRUse::ICmpZero) { 58300b57cec5SDimitry Andric ICmpInst *CI = cast<ICmpInst>(LF.UserInst); 58315ffd83dbSDimitry Andric if (auto *OperandIsInstr = dyn_cast<Instruction>(CI->getOperand(1))) 58325ffd83dbSDimitry Andric DeadInsts.emplace_back(OperandIsInstr); 58330b57cec5SDimitry Andric assert(!F.BaseGV && "ICmp does not support folding a global value and " 58340b57cec5SDimitry Andric "a scale at the same time!"); 58350b57cec5SDimitry Andric if (F.Scale == -1) { 58360b57cec5SDimitry Andric if (ICmpScaledV->getType() != OpTy) { 58370fca6ea1SDimitry Andric Instruction *Cast = CastInst::Create( 58380fca6ea1SDimitry Andric CastInst::getCastOpcode(ICmpScaledV, false, OpTy, false), 58390fca6ea1SDimitry Andric ICmpScaledV, OpTy, "tmp", CI->getIterator()); 58400b57cec5SDimitry Andric ICmpScaledV = Cast; 58410b57cec5SDimitry Andric } 58420b57cec5SDimitry Andric CI->setOperand(1, ICmpScaledV); 58430b57cec5SDimitry Andric } else { 58440b57cec5SDimitry Andric // A scale of 1 means that the scale has been expanded as part of the 58450b57cec5SDimitry Andric // base regs. 58460b57cec5SDimitry Andric assert((F.Scale == 0 || F.Scale == 1) && 58470b57cec5SDimitry Andric "ICmp does not support folding a global value and " 58480b57cec5SDimitry Andric "a scale at the same time!"); 58490b57cec5SDimitry Andric Constant *C = ConstantInt::getSigned(SE.getEffectiveSCEVType(OpTy), 58500fca6ea1SDimitry Andric -(uint64_t)Offset.getFixedValue()); 58515f757f3fSDimitry Andric if (C->getType() != OpTy) { 58525f757f3fSDimitry Andric C = ConstantFoldCastOperand( 58535f757f3fSDimitry Andric CastInst::getCastOpcode(C, false, OpTy, false), C, OpTy, 58540fca6ea1SDimitry Andric CI->getDataLayout()); 58555f757f3fSDimitry Andric assert(C && "Cast of ConstantInt should have folded"); 58565f757f3fSDimitry Andric } 58570b57cec5SDimitry Andric 58580b57cec5SDimitry Andric CI->setOperand(1, C); 58590b57cec5SDimitry Andric } 58600b57cec5SDimitry Andric } 58610b57cec5SDimitry Andric 58620b57cec5SDimitry Andric return FullV; 58630b57cec5SDimitry Andric } 58640b57cec5SDimitry Andric 58650b57cec5SDimitry Andric /// Helper for Rewrite. PHI nodes are special because the use of their operands 58660b57cec5SDimitry Andric /// effectively happens in their predecessor blocks, so the expression may need 58670b57cec5SDimitry Andric /// to be expanded in multiple places. 58680b57cec5SDimitry Andric void LSRInstance::RewriteForPHI( 58690b57cec5SDimitry Andric PHINode *PN, const LSRUse &LU, const LSRFixup &LF, const Formula &F, 5870fcaf7f86SDimitry Andric SmallVectorImpl<WeakTrackingVH> &DeadInsts) const { 58710b57cec5SDimitry Andric DenseMap<BasicBlock *, Value *> Inserted; 587206c3fb27SDimitry Andric 587306c3fb27SDimitry Andric // Inserting instructions in the loop and using them as PHI's input could 587406c3fb27SDimitry Andric // break LCSSA in case if PHI's parent block is not a loop exit (i.e. the 587506c3fb27SDimitry Andric // corresponding incoming block is not loop exiting). So collect all such 587606c3fb27SDimitry Andric // instructions to form LCSSA for them later. 587706c3fb27SDimitry Andric SmallVector<Instruction *, 4> InsertedNonLCSSAInsts; 587806c3fb27SDimitry Andric 58790b57cec5SDimitry Andric for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i) 58800b57cec5SDimitry Andric if (PN->getIncomingValue(i) == LF.OperandValToReplace) { 58810b57cec5SDimitry Andric bool needUpdateFixups = false; 58820b57cec5SDimitry Andric BasicBlock *BB = PN->getIncomingBlock(i); 58830b57cec5SDimitry Andric 58840b57cec5SDimitry Andric // If this is a critical edge, split the edge so that we do not insert 58850b57cec5SDimitry Andric // the code on all predecessor/successor paths. We do this unless this 58860b57cec5SDimitry Andric // is the canonical backedge for this loop, which complicates post-inc 58870b57cec5SDimitry Andric // users. 58880b57cec5SDimitry Andric if (e != 1 && BB->getTerminator()->getNumSuccessors() > 1 && 58890b57cec5SDimitry Andric !isa<IndirectBrInst>(BB->getTerminator()) && 58900b57cec5SDimitry Andric !isa<CatchSwitchInst>(BB->getTerminator())) { 58910b57cec5SDimitry Andric BasicBlock *Parent = PN->getParent(); 58920b57cec5SDimitry Andric Loop *PNLoop = LI.getLoopFor(Parent); 58930b57cec5SDimitry Andric if (!PNLoop || Parent != PNLoop->getHeader()) { 58940b57cec5SDimitry Andric // Split the critical edge. 58950b57cec5SDimitry Andric BasicBlock *NewBB = nullptr; 58960b57cec5SDimitry Andric if (!Parent->isLandingPad()) { 5897e8d8bef9SDimitry Andric NewBB = 5898e8d8bef9SDimitry Andric SplitCriticalEdge(BB, Parent, 5899e8d8bef9SDimitry Andric CriticalEdgeSplittingOptions(&DT, &LI, MSSAU) 59000b57cec5SDimitry Andric .setMergeIdenticalEdges() 59010b57cec5SDimitry Andric .setKeepOneInputPHIs()); 59020b57cec5SDimitry Andric } else { 59030b57cec5SDimitry Andric SmallVector<BasicBlock*, 2> NewBBs; 59045f757f3fSDimitry Andric DomTreeUpdater DTU(DT, DomTreeUpdater::UpdateStrategy::Eager); 59055f757f3fSDimitry Andric SplitLandingPadPredecessors(Parent, BB, "", "", NewBBs, &DTU, &LI); 59060b57cec5SDimitry Andric NewBB = NewBBs[0]; 59070b57cec5SDimitry Andric } 59080b57cec5SDimitry Andric // If NewBB==NULL, then SplitCriticalEdge refused to split because all 59090b57cec5SDimitry Andric // phi predecessors are identical. The simple thing to do is skip 59100b57cec5SDimitry Andric // splitting in this case rather than complicate the API. 59110b57cec5SDimitry Andric if (NewBB) { 59120b57cec5SDimitry Andric // If PN is outside of the loop and BB is in the loop, we want to 59130b57cec5SDimitry Andric // move the block to be immediately before the PHI block, not 59140b57cec5SDimitry Andric // immediately after BB. 59150b57cec5SDimitry Andric if (L->contains(BB) && !L->contains(PN)) 59160b57cec5SDimitry Andric NewBB->moveBefore(PN->getParent()); 59170b57cec5SDimitry Andric 59180b57cec5SDimitry Andric // Splitting the edge can reduce the number of PHI entries we have. 59190b57cec5SDimitry Andric e = PN->getNumIncomingValues(); 59200b57cec5SDimitry Andric BB = NewBB; 59210b57cec5SDimitry Andric i = PN->getBasicBlockIndex(BB); 59220b57cec5SDimitry Andric 59230b57cec5SDimitry Andric needUpdateFixups = true; 59240b57cec5SDimitry Andric } 59250b57cec5SDimitry Andric } 59260b57cec5SDimitry Andric } 59270b57cec5SDimitry Andric 59280b57cec5SDimitry Andric std::pair<DenseMap<BasicBlock *, Value *>::iterator, bool> Pair = 59290b57cec5SDimitry Andric Inserted.insert(std::make_pair(BB, static_cast<Value *>(nullptr))); 59300b57cec5SDimitry Andric if (!Pair.second) 59310b57cec5SDimitry Andric PN->setIncomingValue(i, Pair.first->second); 59320b57cec5SDimitry Andric else { 5933fcaf7f86SDimitry Andric Value *FullV = 5934fcaf7f86SDimitry Andric Expand(LU, LF, F, BB->getTerminator()->getIterator(), DeadInsts); 59350b57cec5SDimitry Andric 59360b57cec5SDimitry Andric // If this is reuse-by-noop-cast, insert the noop cast. 59370b57cec5SDimitry Andric Type *OpTy = LF.OperandValToReplace->getType(); 59380b57cec5SDimitry Andric if (FullV->getType() != OpTy) 59390fca6ea1SDimitry Andric FullV = CastInst::Create( 59400fca6ea1SDimitry Andric CastInst::getCastOpcode(FullV, false, OpTy, false), FullV, 59410fca6ea1SDimitry Andric LF.OperandValToReplace->getType(), "tmp", 59420fca6ea1SDimitry Andric BB->getTerminator()->getIterator()); 59430b57cec5SDimitry Andric 594406c3fb27SDimitry Andric // If the incoming block for this value is not in the loop, it means the 594506c3fb27SDimitry Andric // current PHI is not in a loop exit, so we must create a LCSSA PHI for 594606c3fb27SDimitry Andric // the inserted value. 594706c3fb27SDimitry Andric if (auto *I = dyn_cast<Instruction>(FullV)) 594806c3fb27SDimitry Andric if (L->contains(I) && !L->contains(BB)) 594906c3fb27SDimitry Andric InsertedNonLCSSAInsts.push_back(I); 595006c3fb27SDimitry Andric 59510b57cec5SDimitry Andric PN->setIncomingValue(i, FullV); 59520b57cec5SDimitry Andric Pair.first->second = FullV; 59530b57cec5SDimitry Andric } 59540b57cec5SDimitry Andric 59550b57cec5SDimitry Andric // If LSR splits critical edge and phi node has other pending 59560b57cec5SDimitry Andric // fixup operands, we need to update those pending fixups. Otherwise 59570b57cec5SDimitry Andric // formulae will not be implemented completely and some instructions 59580b57cec5SDimitry Andric // will not be eliminated. 59590b57cec5SDimitry Andric if (needUpdateFixups) { 59600fca6ea1SDimitry Andric for (LSRUse &LU : Uses) 59610fca6ea1SDimitry Andric for (LSRFixup &Fixup : LU.Fixups) 59620b57cec5SDimitry Andric // If fixup is supposed to rewrite some operand in the phi 59630b57cec5SDimitry Andric // that was just updated, it may be already moved to 59640b57cec5SDimitry Andric // another phi node. Such fixup requires update. 59650b57cec5SDimitry Andric if (Fixup.UserInst == PN) { 59660b57cec5SDimitry Andric // Check if the operand we try to replace still exists in the 59670b57cec5SDimitry Andric // original phi. 59680b57cec5SDimitry Andric bool foundInOriginalPHI = false; 59690b57cec5SDimitry Andric for (const auto &val : PN->incoming_values()) 59700b57cec5SDimitry Andric if (val == Fixup.OperandValToReplace) { 59710b57cec5SDimitry Andric foundInOriginalPHI = true; 59720b57cec5SDimitry Andric break; 59730b57cec5SDimitry Andric } 59740b57cec5SDimitry Andric 59750b57cec5SDimitry Andric // If fixup operand found in original PHI - nothing to do. 59760b57cec5SDimitry Andric if (foundInOriginalPHI) 59770b57cec5SDimitry Andric continue; 59780b57cec5SDimitry Andric 59790b57cec5SDimitry Andric // Otherwise it might be moved to another PHI and requires update. 59800b57cec5SDimitry Andric // If fixup operand not found in any of the incoming blocks that 59810b57cec5SDimitry Andric // means we have already rewritten it - nothing to do. 59820b57cec5SDimitry Andric for (const auto &Block : PN->blocks()) 59830b57cec5SDimitry Andric for (BasicBlock::iterator I = Block->begin(); isa<PHINode>(I); 59840b57cec5SDimitry Andric ++I) { 59850b57cec5SDimitry Andric PHINode *NewPN = cast<PHINode>(I); 59860b57cec5SDimitry Andric for (const auto &val : NewPN->incoming_values()) 59870b57cec5SDimitry Andric if (val == Fixup.OperandValToReplace) 59880b57cec5SDimitry Andric Fixup.UserInst = NewPN; 59890b57cec5SDimitry Andric } 59900b57cec5SDimitry Andric } 59910b57cec5SDimitry Andric } 59920b57cec5SDimitry Andric } 599306c3fb27SDimitry Andric 599406c3fb27SDimitry Andric formLCSSAForInstructions(InsertedNonLCSSAInsts, DT, LI, &SE); 59950b57cec5SDimitry Andric } 59960b57cec5SDimitry Andric 59970b57cec5SDimitry Andric /// Emit instructions for the leading candidate expression for this LSRUse (this 59980b57cec5SDimitry Andric /// is called "expanding"), and update the UserInst to reference the newly 59990b57cec5SDimitry Andric /// expanded value. 60000b57cec5SDimitry Andric void LSRInstance::Rewrite(const LSRUse &LU, const LSRFixup &LF, 6001fcaf7f86SDimitry Andric const Formula &F, 60020b57cec5SDimitry Andric SmallVectorImpl<WeakTrackingVH> &DeadInsts) const { 60030b57cec5SDimitry Andric // First, find an insertion point that dominates UserInst. For PHI nodes, 60040b57cec5SDimitry Andric // find the nearest block which dominates all the relevant uses. 60050b57cec5SDimitry Andric if (PHINode *PN = dyn_cast<PHINode>(LF.UserInst)) { 6006fcaf7f86SDimitry Andric RewriteForPHI(PN, LU, LF, F, DeadInsts); 60070b57cec5SDimitry Andric } else { 6008fcaf7f86SDimitry Andric Value *FullV = Expand(LU, LF, F, LF.UserInst->getIterator(), DeadInsts); 60090b57cec5SDimitry Andric 60100b57cec5SDimitry Andric // If this is reuse-by-noop-cast, insert the noop cast. 60110b57cec5SDimitry Andric Type *OpTy = LF.OperandValToReplace->getType(); 60120b57cec5SDimitry Andric if (FullV->getType() != OpTy) { 60130b57cec5SDimitry Andric Instruction *Cast = 60140b57cec5SDimitry Andric CastInst::Create(CastInst::getCastOpcode(FullV, false, OpTy, false), 60150fca6ea1SDimitry Andric FullV, OpTy, "tmp", LF.UserInst->getIterator()); 60160b57cec5SDimitry Andric FullV = Cast; 60170b57cec5SDimitry Andric } 60180b57cec5SDimitry Andric 60190b57cec5SDimitry Andric // Update the user. ICmpZero is handled specially here (for now) because 60200b57cec5SDimitry Andric // Expand may have updated one of the operands of the icmp already, and 60210b57cec5SDimitry Andric // its new value may happen to be equal to LF.OperandValToReplace, in 60220b57cec5SDimitry Andric // which case doing replaceUsesOfWith leads to replacing both operands 60230b57cec5SDimitry Andric // with the same value. TODO: Reorganize this. 60240b57cec5SDimitry Andric if (LU.Kind == LSRUse::ICmpZero) 60250b57cec5SDimitry Andric LF.UserInst->setOperand(0, FullV); 60260b57cec5SDimitry Andric else 60270b57cec5SDimitry Andric LF.UserInst->replaceUsesOfWith(LF.OperandValToReplace, FullV); 60280b57cec5SDimitry Andric } 60290b57cec5SDimitry Andric 60305ffd83dbSDimitry Andric if (auto *OperandIsInstr = dyn_cast<Instruction>(LF.OperandValToReplace)) 60315ffd83dbSDimitry Andric DeadInsts.emplace_back(OperandIsInstr); 60320b57cec5SDimitry Andric } 60330b57cec5SDimitry Andric 603406c3fb27SDimitry Andric // Trying to hoist the IVInc to loop header if all IVInc users are in 603506c3fb27SDimitry Andric // the loop header. It will help backend to generate post index load/store 603606c3fb27SDimitry Andric // when the latch block is different from loop header block. 603706c3fb27SDimitry Andric static bool canHoistIVInc(const TargetTransformInfo &TTI, const LSRFixup &Fixup, 603806c3fb27SDimitry Andric const LSRUse &LU, Instruction *IVIncInsertPos, 603906c3fb27SDimitry Andric Loop *L) { 604006c3fb27SDimitry Andric if (LU.Kind != LSRUse::Address) 604106c3fb27SDimitry Andric return false; 604206c3fb27SDimitry Andric 604306c3fb27SDimitry Andric // For now this code do the conservative optimization, only work for 604406c3fb27SDimitry Andric // the header block. Later we can hoist the IVInc to the block post 604506c3fb27SDimitry Andric // dominate all users. 604606c3fb27SDimitry Andric BasicBlock *LHeader = L->getHeader(); 604706c3fb27SDimitry Andric if (IVIncInsertPos->getParent() == LHeader) 604806c3fb27SDimitry Andric return false; 604906c3fb27SDimitry Andric 605006c3fb27SDimitry Andric if (!Fixup.OperandValToReplace || 605106c3fb27SDimitry Andric any_of(Fixup.OperandValToReplace->users(), [&LHeader](User *U) { 605206c3fb27SDimitry Andric Instruction *UI = cast<Instruction>(U); 605306c3fb27SDimitry Andric return UI->getParent() != LHeader; 605406c3fb27SDimitry Andric })) 605506c3fb27SDimitry Andric return false; 605606c3fb27SDimitry Andric 605706c3fb27SDimitry Andric Instruction *I = Fixup.UserInst; 605806c3fb27SDimitry Andric Type *Ty = I->getType(); 605906c3fb27SDimitry Andric return Ty->isIntegerTy() && 606006c3fb27SDimitry Andric ((isa<LoadInst>(I) && TTI.isIndexedLoadLegal(TTI.MIM_PostInc, Ty)) || 606106c3fb27SDimitry Andric (isa<StoreInst>(I) && TTI.isIndexedStoreLegal(TTI.MIM_PostInc, Ty))); 606206c3fb27SDimitry Andric } 606306c3fb27SDimitry Andric 60640b57cec5SDimitry Andric /// Rewrite all the fixup locations with new values, following the chosen 60650b57cec5SDimitry Andric /// solution. 60660b57cec5SDimitry Andric void LSRInstance::ImplementSolution( 60670b57cec5SDimitry Andric const SmallVectorImpl<const Formula *> &Solution) { 60680b57cec5SDimitry Andric // Keep track of instructions we may have made dead, so that 60690b57cec5SDimitry Andric // we can remove them after we are done working. 60700b57cec5SDimitry Andric SmallVector<WeakTrackingVH, 16> DeadInsts; 60710b57cec5SDimitry Andric 60720b57cec5SDimitry Andric // Mark phi nodes that terminate chains so the expander tries to reuse them. 60730b57cec5SDimitry Andric for (const IVChain &Chain : IVChainVec) { 60740b57cec5SDimitry Andric if (PHINode *PN = dyn_cast<PHINode>(Chain.tailUserInst())) 60750b57cec5SDimitry Andric Rewriter.setChainedPhi(PN); 60760b57cec5SDimitry Andric } 60770b57cec5SDimitry Andric 60780b57cec5SDimitry Andric // Expand the new value definitions and update the users. 60790b57cec5SDimitry Andric for (size_t LUIdx = 0, NumUses = Uses.size(); LUIdx != NumUses; ++LUIdx) 60800b57cec5SDimitry Andric for (const LSRFixup &Fixup : Uses[LUIdx].Fixups) { 608106c3fb27SDimitry Andric Instruction *InsertPos = 608206c3fb27SDimitry Andric canHoistIVInc(TTI, Fixup, Uses[LUIdx], IVIncInsertPos, L) 608306c3fb27SDimitry Andric ? L->getHeader()->getTerminator() 608406c3fb27SDimitry Andric : IVIncInsertPos; 608506c3fb27SDimitry Andric Rewriter.setIVIncInsertPos(L, InsertPos); 6086fcaf7f86SDimitry Andric Rewrite(Uses[LUIdx], Fixup, *Solution[LUIdx], DeadInsts); 60870b57cec5SDimitry Andric Changed = true; 60880b57cec5SDimitry Andric } 60890b57cec5SDimitry Andric 60900b57cec5SDimitry Andric for (const IVChain &Chain : IVChainVec) { 6091fcaf7f86SDimitry Andric GenerateIVChain(Chain, DeadInsts); 60920b57cec5SDimitry Andric Changed = true; 60930b57cec5SDimitry Andric } 6094fe6060f1SDimitry Andric 6095fe6060f1SDimitry Andric for (const WeakVH &IV : Rewriter.getInsertedIVs()) 6096fe6060f1SDimitry Andric if (IV && dyn_cast<Instruction>(&*IV)->getParent()) 6097fe6060f1SDimitry Andric ScalarEvolutionIVs.push_back(IV); 6098fe6060f1SDimitry Andric 60990b57cec5SDimitry Andric // Clean up after ourselves. This must be done before deleting any 61000b57cec5SDimitry Andric // instructions. 61010b57cec5SDimitry Andric Rewriter.clear(); 61020b57cec5SDimitry Andric 61035ffd83dbSDimitry Andric Changed |= RecursivelyDeleteTriviallyDeadInstructionsPermissive(DeadInsts, 61045ffd83dbSDimitry Andric &TLI, MSSAU); 6105fe6060f1SDimitry Andric 6106fe6060f1SDimitry Andric // In our cost analysis above, we assume that each addrec consumes exactly 6107fe6060f1SDimitry Andric // one register, and arrange to have increments inserted just before the 6108fe6060f1SDimitry Andric // latch to maximimize the chance this is true. However, if we reused 6109fe6060f1SDimitry Andric // existing IVs, we now need to move the increments to match our 6110fe6060f1SDimitry Andric // expectations. Otherwise, our cost modeling results in us having a 6111fe6060f1SDimitry Andric // chosen a non-optimal result for the actual schedule. (And yes, this 6112fe6060f1SDimitry Andric // scheduling decision does impact later codegen.) 6113fe6060f1SDimitry Andric for (PHINode &PN : L->getHeader()->phis()) { 6114fe6060f1SDimitry Andric BinaryOperator *BO = nullptr; 6115fe6060f1SDimitry Andric Value *Start = nullptr, *Step = nullptr; 6116fe6060f1SDimitry Andric if (!matchSimpleRecurrence(&PN, BO, Start, Step)) 6117fe6060f1SDimitry Andric continue; 6118fe6060f1SDimitry Andric 6119fe6060f1SDimitry Andric switch (BO->getOpcode()) { 6120fe6060f1SDimitry Andric case Instruction::Sub: 6121fe6060f1SDimitry Andric if (BO->getOperand(0) != &PN) 6122fe6060f1SDimitry Andric // sub is non-commutative - match handling elsewhere in LSR 6123fe6060f1SDimitry Andric continue; 6124fe6060f1SDimitry Andric break; 6125fe6060f1SDimitry Andric case Instruction::Add: 6126fe6060f1SDimitry Andric break; 6127fe6060f1SDimitry Andric default: 6128fe6060f1SDimitry Andric continue; 6129fe6060f1SDimitry Andric }; 6130fe6060f1SDimitry Andric 6131fe6060f1SDimitry Andric if (!isa<Constant>(Step)) 6132fe6060f1SDimitry Andric // If not a constant step, might increase register pressure 6133fe6060f1SDimitry Andric // (We assume constants have been canonicalized to RHS) 6134fe6060f1SDimitry Andric continue; 6135fe6060f1SDimitry Andric 6136fe6060f1SDimitry Andric if (BO->getParent() == IVIncInsertPos->getParent()) 6137fe6060f1SDimitry Andric // Only bother moving across blocks. Isel can handle block local case. 6138fe6060f1SDimitry Andric continue; 6139fe6060f1SDimitry Andric 6140fe6060f1SDimitry Andric // Can we legally schedule inc at the desired point? 6141fe6060f1SDimitry Andric if (!llvm::all_of(BO->uses(), 6142fe6060f1SDimitry Andric [&](Use &U) {return DT.dominates(IVIncInsertPos, U);})) 6143fe6060f1SDimitry Andric continue; 6144fe6060f1SDimitry Andric BO->moveBefore(IVIncInsertPos); 6145fe6060f1SDimitry Andric Changed = true; 6146fe6060f1SDimitry Andric } 6147fe6060f1SDimitry Andric 6148fe6060f1SDimitry Andric 61490b57cec5SDimitry Andric } 61500b57cec5SDimitry Andric 61510b57cec5SDimitry Andric LSRInstance::LSRInstance(Loop *L, IVUsers &IU, ScalarEvolution &SE, 61520b57cec5SDimitry Andric DominatorTree &DT, LoopInfo &LI, 61530b57cec5SDimitry Andric const TargetTransformInfo &TTI, AssumptionCache &AC, 61545ffd83dbSDimitry Andric TargetLibraryInfo &TLI, MemorySSAUpdater *MSSAU) 61555ffd83dbSDimitry Andric : IU(IU), SE(SE), DT(DT), LI(LI), AC(AC), TLI(TLI), TTI(TTI), L(L), 6156fcaf7f86SDimitry Andric MSSAU(MSSAU), AMK(PreferredAddresingMode.getNumOccurrences() > 0 6157fcaf7f86SDimitry Andric ? PreferredAddresingMode 6158fcaf7f86SDimitry Andric : TTI.getPreferredAddressingMode(L, &SE)), 61590fca6ea1SDimitry Andric Rewriter(SE, L->getHeader()->getDataLayout(), "lsr", false), 6160bdd1243dSDimitry Andric BaselineCost(L, SE, TTI, AMK) { 61610b57cec5SDimitry Andric // If LoopSimplify form is not available, stay out of trouble. 61620b57cec5SDimitry Andric if (!L->isLoopSimplifyForm()) 61630b57cec5SDimitry Andric return; 61640b57cec5SDimitry Andric 61650b57cec5SDimitry Andric // If there's no interesting work to be done, bail early. 61660b57cec5SDimitry Andric if (IU.empty()) return; 61670b57cec5SDimitry Andric 61680b57cec5SDimitry Andric // If there's too much analysis to be done, bail early. We won't be able to 61690b57cec5SDimitry Andric // model the problem anyway. 61700b57cec5SDimitry Andric unsigned NumUsers = 0; 61710b57cec5SDimitry Andric for (const IVStrideUse &U : IU) { 61720b57cec5SDimitry Andric if (++NumUsers > MaxIVUsers) { 61730b57cec5SDimitry Andric (void)U; 61740b57cec5SDimitry Andric LLVM_DEBUG(dbgs() << "LSR skipping loop, too many IV Users in " << U 61750b57cec5SDimitry Andric << "\n"); 61760b57cec5SDimitry Andric return; 61770b57cec5SDimitry Andric } 61780b57cec5SDimitry Andric // Bail out if we have a PHI on an EHPad that gets a value from a 61790b57cec5SDimitry Andric // CatchSwitchInst. Because the CatchSwitchInst cannot be split, there is 61800b57cec5SDimitry Andric // no good place to stick any instructions. 61810b57cec5SDimitry Andric if (auto *PN = dyn_cast<PHINode>(U.getUser())) { 61820b57cec5SDimitry Andric auto *FirstNonPHI = PN->getParent()->getFirstNonPHI(); 61830b57cec5SDimitry Andric if (isa<FuncletPadInst>(FirstNonPHI) || 61840b57cec5SDimitry Andric isa<CatchSwitchInst>(FirstNonPHI)) 61850b57cec5SDimitry Andric for (BasicBlock *PredBB : PN->blocks()) 61860b57cec5SDimitry Andric if (isa<CatchSwitchInst>(PredBB->getFirstNonPHI())) 61870b57cec5SDimitry Andric return; 61880b57cec5SDimitry Andric } 61890b57cec5SDimitry Andric } 61900b57cec5SDimitry Andric 61910b57cec5SDimitry Andric LLVM_DEBUG(dbgs() << "\nLSR on loop "; 61920b57cec5SDimitry Andric L->getHeader()->printAsOperand(dbgs(), /*PrintType=*/false); 61930b57cec5SDimitry Andric dbgs() << ":\n"); 61940b57cec5SDimitry Andric 6195fcaf7f86SDimitry Andric // Configure SCEVExpander already now, so the correct mode is used for 6196fcaf7f86SDimitry Andric // isSafeToExpand() checks. 6197fcaf7f86SDimitry Andric #ifndef NDEBUG 6198fcaf7f86SDimitry Andric Rewriter.setDebugType(DEBUG_TYPE); 6199fcaf7f86SDimitry Andric #endif 6200fcaf7f86SDimitry Andric Rewriter.disableCanonicalMode(); 6201fcaf7f86SDimitry Andric Rewriter.enableLSRMode(); 6202fcaf7f86SDimitry Andric 62030b57cec5SDimitry Andric // First, perform some low-level loop optimizations. 62040b57cec5SDimitry Andric OptimizeShadowIV(); 62050b57cec5SDimitry Andric OptimizeLoopTermCond(); 62060b57cec5SDimitry Andric 62070b57cec5SDimitry Andric // If loop preparation eliminates all interesting IV users, bail. 62080b57cec5SDimitry Andric if (IU.empty()) return; 62090b57cec5SDimitry Andric 62100b57cec5SDimitry Andric // Skip nested loops until we can model them better with formulae. 6211e8d8bef9SDimitry Andric if (!L->isInnermost()) { 62120b57cec5SDimitry Andric LLVM_DEBUG(dbgs() << "LSR skipping outer loop " << *L << "\n"); 62130b57cec5SDimitry Andric return; 62140b57cec5SDimitry Andric } 62150b57cec5SDimitry Andric 62160b57cec5SDimitry Andric // Start collecting data and preparing for the solver. 6217e8d8bef9SDimitry Andric // If number of registers is not the major cost, we cannot benefit from the 6218e8d8bef9SDimitry Andric // current profitable chain optimization which is based on number of 6219e8d8bef9SDimitry Andric // registers. 6220e8d8bef9SDimitry Andric // FIXME: add profitable chain optimization for other kinds major cost, for 6221e8d8bef9SDimitry Andric // example number of instructions. 6222e8d8bef9SDimitry Andric if (TTI.isNumRegsMajorCostOfLSR() || StressIVChain) 62230b57cec5SDimitry Andric CollectChains(); 62240b57cec5SDimitry Andric CollectInterestingTypesAndFactors(); 62250b57cec5SDimitry Andric CollectFixupsAndInitialFormulae(); 62260b57cec5SDimitry Andric CollectLoopInvariantFixupsAndFormulae(); 62270b57cec5SDimitry Andric 62280b57cec5SDimitry Andric if (Uses.empty()) 62290b57cec5SDimitry Andric return; 62300b57cec5SDimitry Andric 62310b57cec5SDimitry Andric LLVM_DEBUG(dbgs() << "LSR found " << Uses.size() << " uses:\n"; 62320b57cec5SDimitry Andric print_uses(dbgs())); 62330fca6ea1SDimitry Andric LLVM_DEBUG(dbgs() << "The baseline solution requires "; 62340fca6ea1SDimitry Andric BaselineCost.print(dbgs()); dbgs() << "\n"); 62350b57cec5SDimitry Andric 62360b57cec5SDimitry Andric // Now use the reuse data to generate a bunch of interesting ways 62370b57cec5SDimitry Andric // to formulate the values needed for the uses. 62380b57cec5SDimitry Andric GenerateAllReuseFormulae(); 62390b57cec5SDimitry Andric 62400b57cec5SDimitry Andric FilterOutUndesirableDedicatedRegisters(); 62410b57cec5SDimitry Andric NarrowSearchSpaceUsingHeuristics(); 62420b57cec5SDimitry Andric 62430b57cec5SDimitry Andric SmallVector<const Formula *, 8> Solution; 62440b57cec5SDimitry Andric Solve(Solution); 62450b57cec5SDimitry Andric 62460b57cec5SDimitry Andric // Release memory that is no longer needed. 62470b57cec5SDimitry Andric Factors.clear(); 62480b57cec5SDimitry Andric Types.clear(); 62490b57cec5SDimitry Andric RegUses.clear(); 62500b57cec5SDimitry Andric 62510b57cec5SDimitry Andric if (Solution.empty()) 62520b57cec5SDimitry Andric return; 62530b57cec5SDimitry Andric 62540b57cec5SDimitry Andric #ifndef NDEBUG 62550b57cec5SDimitry Andric // Formulae should be legal. 62560b57cec5SDimitry Andric for (const LSRUse &LU : Uses) { 62570b57cec5SDimitry Andric for (const Formula &F : LU.Formulae) 62580b57cec5SDimitry Andric assert(isLegalUse(TTI, LU.MinOffset, LU.MaxOffset, LU.Kind, LU.AccessTy, 62590b57cec5SDimitry Andric F) && "Illegal formula generated!"); 62600b57cec5SDimitry Andric }; 62610b57cec5SDimitry Andric #endif 62620b57cec5SDimitry Andric 62630b57cec5SDimitry Andric // Now that we've decided what we want, make it so. 62640b57cec5SDimitry Andric ImplementSolution(Solution); 62650b57cec5SDimitry Andric } 62660b57cec5SDimitry Andric 62670b57cec5SDimitry Andric #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) 62680b57cec5SDimitry Andric void LSRInstance::print_factors_and_types(raw_ostream &OS) const { 62690b57cec5SDimitry Andric if (Factors.empty() && Types.empty()) return; 62700b57cec5SDimitry Andric 62710b57cec5SDimitry Andric OS << "LSR has identified the following interesting factors and types: "; 62720b57cec5SDimitry Andric bool First = true; 62730b57cec5SDimitry Andric 62740b57cec5SDimitry Andric for (int64_t Factor : Factors) { 62750b57cec5SDimitry Andric if (!First) OS << ", "; 62760b57cec5SDimitry Andric First = false; 62770b57cec5SDimitry Andric OS << '*' << Factor; 62780b57cec5SDimitry Andric } 62790b57cec5SDimitry Andric 62800b57cec5SDimitry Andric for (Type *Ty : Types) { 62810b57cec5SDimitry Andric if (!First) OS << ", "; 62820b57cec5SDimitry Andric First = false; 62830b57cec5SDimitry Andric OS << '(' << *Ty << ')'; 62840b57cec5SDimitry Andric } 62850b57cec5SDimitry Andric OS << '\n'; 62860b57cec5SDimitry Andric } 62870b57cec5SDimitry Andric 62880b57cec5SDimitry Andric void LSRInstance::print_fixups(raw_ostream &OS) const { 62890b57cec5SDimitry Andric OS << "LSR is examining the following fixup sites:\n"; 62900b57cec5SDimitry Andric for (const LSRUse &LU : Uses) 62910b57cec5SDimitry Andric for (const LSRFixup &LF : LU.Fixups) { 62920b57cec5SDimitry Andric dbgs() << " "; 62930b57cec5SDimitry Andric LF.print(OS); 62940b57cec5SDimitry Andric OS << '\n'; 62950b57cec5SDimitry Andric } 62960b57cec5SDimitry Andric } 62970b57cec5SDimitry Andric 62980b57cec5SDimitry Andric void LSRInstance::print_uses(raw_ostream &OS) const { 62990b57cec5SDimitry Andric OS << "LSR is examining the following uses:\n"; 63000b57cec5SDimitry Andric for (const LSRUse &LU : Uses) { 63010b57cec5SDimitry Andric dbgs() << " "; 63020b57cec5SDimitry Andric LU.print(OS); 63030b57cec5SDimitry Andric OS << '\n'; 63040b57cec5SDimitry Andric for (const Formula &F : LU.Formulae) { 63050b57cec5SDimitry Andric OS << " "; 63060b57cec5SDimitry Andric F.print(OS); 63070b57cec5SDimitry Andric OS << '\n'; 63080b57cec5SDimitry Andric } 63090b57cec5SDimitry Andric } 63100b57cec5SDimitry Andric } 63110b57cec5SDimitry Andric 63120b57cec5SDimitry Andric void LSRInstance::print(raw_ostream &OS) const { 63130b57cec5SDimitry Andric print_factors_and_types(OS); 63140b57cec5SDimitry Andric print_fixups(OS); 63150b57cec5SDimitry Andric print_uses(OS); 63160b57cec5SDimitry Andric } 63170b57cec5SDimitry Andric 63180b57cec5SDimitry Andric LLVM_DUMP_METHOD void LSRInstance::dump() const { 63190b57cec5SDimitry Andric print(errs()); errs() << '\n'; 63200b57cec5SDimitry Andric } 63210b57cec5SDimitry Andric #endif 63220b57cec5SDimitry Andric 63230b57cec5SDimitry Andric namespace { 63240b57cec5SDimitry Andric 63250b57cec5SDimitry Andric class LoopStrengthReduce : public LoopPass { 63260b57cec5SDimitry Andric public: 63270b57cec5SDimitry Andric static char ID; // Pass ID, replacement for typeid 63280b57cec5SDimitry Andric 63290b57cec5SDimitry Andric LoopStrengthReduce(); 63300b57cec5SDimitry Andric 63310b57cec5SDimitry Andric private: 63320b57cec5SDimitry Andric bool runOnLoop(Loop *L, LPPassManager &LPM) override; 63330b57cec5SDimitry Andric void getAnalysisUsage(AnalysisUsage &AU) const override; 63340b57cec5SDimitry Andric }; 63350b57cec5SDimitry Andric 63360b57cec5SDimitry Andric } // end anonymous namespace 63370b57cec5SDimitry Andric 63380b57cec5SDimitry Andric LoopStrengthReduce::LoopStrengthReduce() : LoopPass(ID) { 63390b57cec5SDimitry Andric initializeLoopStrengthReducePass(*PassRegistry::getPassRegistry()); 63400b57cec5SDimitry Andric } 63410b57cec5SDimitry Andric 63420b57cec5SDimitry Andric void LoopStrengthReduce::getAnalysisUsage(AnalysisUsage &AU) const { 63430b57cec5SDimitry Andric // We split critical edges, so we change the CFG. However, we do update 63440b57cec5SDimitry Andric // many analyses if they are around. 63450b57cec5SDimitry Andric AU.addPreservedID(LoopSimplifyID); 63460b57cec5SDimitry Andric 63470b57cec5SDimitry Andric AU.addRequired<LoopInfoWrapperPass>(); 63480b57cec5SDimitry Andric AU.addPreserved<LoopInfoWrapperPass>(); 63490b57cec5SDimitry Andric AU.addRequiredID(LoopSimplifyID); 63500b57cec5SDimitry Andric AU.addRequired<DominatorTreeWrapperPass>(); 63510b57cec5SDimitry Andric AU.addPreserved<DominatorTreeWrapperPass>(); 63520b57cec5SDimitry Andric AU.addRequired<ScalarEvolutionWrapperPass>(); 63530b57cec5SDimitry Andric AU.addPreserved<ScalarEvolutionWrapperPass>(); 63540b57cec5SDimitry Andric AU.addRequired<AssumptionCacheTracker>(); 63550b57cec5SDimitry Andric AU.addRequired<TargetLibraryInfoWrapperPass>(); 63560b57cec5SDimitry Andric // Requiring LoopSimplify a second time here prevents IVUsers from running 63570b57cec5SDimitry Andric // twice, since LoopSimplify was invalidated by running ScalarEvolution. 63580b57cec5SDimitry Andric AU.addRequiredID(LoopSimplifyID); 63590b57cec5SDimitry Andric AU.addRequired<IVUsersWrapperPass>(); 63600b57cec5SDimitry Andric AU.addPreserved<IVUsersWrapperPass>(); 63610b57cec5SDimitry Andric AU.addRequired<TargetTransformInfoWrapperPass>(); 63625ffd83dbSDimitry Andric AU.addPreserved<MemorySSAWrapperPass>(); 63630b57cec5SDimitry Andric } 63640b57cec5SDimitry Andric 6365349cc55cSDimitry Andric namespace { 636681ad6265SDimitry Andric 636781ad6265SDimitry Andric /// Enables more convenient iteration over a DWARF expression vector. 636881ad6265SDimitry Andric static iterator_range<llvm::DIExpression::expr_op_iterator> 636981ad6265SDimitry Andric ToDwarfOpIter(SmallVectorImpl<uint64_t> &Expr) { 637081ad6265SDimitry Andric llvm::DIExpression::expr_op_iterator Begin = 637181ad6265SDimitry Andric llvm::DIExpression::expr_op_iterator(Expr.begin()); 637281ad6265SDimitry Andric llvm::DIExpression::expr_op_iterator End = 637381ad6265SDimitry Andric llvm::DIExpression::expr_op_iterator(Expr.end()); 637481ad6265SDimitry Andric return {Begin, End}; 637581ad6265SDimitry Andric } 637681ad6265SDimitry Andric 6377fe6060f1SDimitry Andric struct SCEVDbgValueBuilder { 6378fe6060f1SDimitry Andric SCEVDbgValueBuilder() = default; 637981ad6265SDimitry Andric SCEVDbgValueBuilder(const SCEVDbgValueBuilder &Base) { clone(Base); } 638081ad6265SDimitry Andric 638181ad6265SDimitry Andric void clone(const SCEVDbgValueBuilder &Base) { 638281ad6265SDimitry Andric LocationOps = Base.LocationOps; 6383fe6060f1SDimitry Andric Expr = Base.Expr; 6384fe6060f1SDimitry Andric } 6385e8d8bef9SDimitry Andric 638681ad6265SDimitry Andric void clear() { 638781ad6265SDimitry Andric LocationOps.clear(); 638881ad6265SDimitry Andric Expr.clear(); 638981ad6265SDimitry Andric } 639081ad6265SDimitry Andric 6391fe6060f1SDimitry Andric /// The DIExpression as we translate the SCEV. 6392fe6060f1SDimitry Andric SmallVector<uint64_t, 6> Expr; 6393fe6060f1SDimitry Andric /// The location ops of the DIExpression. 639481ad6265SDimitry Andric SmallVector<Value *, 2> LocationOps; 6395fe6060f1SDimitry Andric 6396fe6060f1SDimitry Andric void pushOperator(uint64_t Op) { Expr.push_back(Op); } 6397fe6060f1SDimitry Andric void pushUInt(uint64_t Operand) { Expr.push_back(Operand); } 6398fe6060f1SDimitry Andric 6399fe6060f1SDimitry Andric /// Add a DW_OP_LLVM_arg to the expression, followed by the index of the value 6400fe6060f1SDimitry Andric /// in the set of values referenced by the expression. 640181ad6265SDimitry Andric void pushLocation(llvm::Value *V) { 6402fe6060f1SDimitry Andric Expr.push_back(llvm::dwarf::DW_OP_LLVM_arg); 6403bdd1243dSDimitry Andric auto *It = llvm::find(LocationOps, V); 6404fe6060f1SDimitry Andric unsigned ArgIndex = 0; 640581ad6265SDimitry Andric if (It != LocationOps.end()) { 640681ad6265SDimitry Andric ArgIndex = std::distance(LocationOps.begin(), It); 6407fe6060f1SDimitry Andric } else { 640881ad6265SDimitry Andric ArgIndex = LocationOps.size(); 640981ad6265SDimitry Andric LocationOps.push_back(V); 6410fe6060f1SDimitry Andric } 6411fe6060f1SDimitry Andric Expr.push_back(ArgIndex); 6412fe6060f1SDimitry Andric } 6413fe6060f1SDimitry Andric 6414fe6060f1SDimitry Andric void pushValue(const SCEVUnknown *U) { 6415fe6060f1SDimitry Andric llvm::Value *V = cast<SCEVUnknown>(U)->getValue(); 641681ad6265SDimitry Andric pushLocation(V); 6417fe6060f1SDimitry Andric } 6418fe6060f1SDimitry Andric 64196e75b2fbSDimitry Andric bool pushConst(const SCEVConstant *C) { 642006c3fb27SDimitry Andric if (C->getAPInt().getSignificantBits() > 64) 64216e75b2fbSDimitry Andric return false; 6422fe6060f1SDimitry Andric Expr.push_back(llvm::dwarf::DW_OP_consts); 6423fe6060f1SDimitry Andric Expr.push_back(C->getAPInt().getSExtValue()); 64246e75b2fbSDimitry Andric return true; 6425fe6060f1SDimitry Andric } 6426fe6060f1SDimitry Andric 642781ad6265SDimitry Andric // Iterating the expression as DWARF ops is convenient when updating 642881ad6265SDimitry Andric // DWARF_OP_LLVM_args. 642981ad6265SDimitry Andric iterator_range<llvm::DIExpression::expr_op_iterator> expr_ops() { 643081ad6265SDimitry Andric return ToDwarfOpIter(Expr); 643181ad6265SDimitry Andric } 643281ad6265SDimitry Andric 6433fe6060f1SDimitry Andric /// Several SCEV types are sequences of the same arithmetic operator applied 6434fe6060f1SDimitry Andric /// to constants and values that may be extended or truncated. 6435fe6060f1SDimitry Andric bool pushArithmeticExpr(const llvm::SCEVCommutativeExpr *CommExpr, 6436fe6060f1SDimitry Andric uint64_t DwarfOp) { 6437fe6060f1SDimitry Andric assert((isa<llvm::SCEVAddExpr>(CommExpr) || isa<SCEVMulExpr>(CommExpr)) && 6438fe6060f1SDimitry Andric "Expected arithmetic SCEV type"); 6439fe6060f1SDimitry Andric bool Success = true; 6440fe6060f1SDimitry Andric unsigned EmitOperator = 0; 6441bdd1243dSDimitry Andric for (const auto &Op : CommExpr->operands()) { 6442fe6060f1SDimitry Andric Success &= pushSCEV(Op); 6443fe6060f1SDimitry Andric 6444fe6060f1SDimitry Andric if (EmitOperator >= 1) 6445fe6060f1SDimitry Andric pushOperator(DwarfOp); 6446fe6060f1SDimitry Andric ++EmitOperator; 6447fe6060f1SDimitry Andric } 6448fe6060f1SDimitry Andric return Success; 6449fe6060f1SDimitry Andric } 6450fe6060f1SDimitry Andric 6451fe6060f1SDimitry Andric // TODO: Identify and omit noop casts. 6452fe6060f1SDimitry Andric bool pushCast(const llvm::SCEVCastExpr *C, bool IsSigned) { 6453fe6060f1SDimitry Andric const llvm::SCEV *Inner = C->getOperand(0); 6454fe6060f1SDimitry Andric const llvm::Type *Type = C->getType(); 6455fe6060f1SDimitry Andric uint64_t ToWidth = Type->getIntegerBitWidth(); 6456fe6060f1SDimitry Andric bool Success = pushSCEV(Inner); 6457fe6060f1SDimitry Andric uint64_t CastOps[] = {dwarf::DW_OP_LLVM_convert, ToWidth, 6458fe6060f1SDimitry Andric IsSigned ? llvm::dwarf::DW_ATE_signed 6459fe6060f1SDimitry Andric : llvm::dwarf::DW_ATE_unsigned}; 6460fe6060f1SDimitry Andric for (const auto &Op : CastOps) 6461fe6060f1SDimitry Andric pushOperator(Op); 6462fe6060f1SDimitry Andric return Success; 6463fe6060f1SDimitry Andric } 6464fe6060f1SDimitry Andric 6465fe6060f1SDimitry Andric // TODO: MinMax - although these haven't been encountered in the test suite. 6466fe6060f1SDimitry Andric bool pushSCEV(const llvm::SCEV *S) { 6467fe6060f1SDimitry Andric bool Success = true; 6468fe6060f1SDimitry Andric if (const SCEVConstant *StartInt = dyn_cast<SCEVConstant>(S)) { 64696e75b2fbSDimitry Andric Success &= pushConst(StartInt); 6470fe6060f1SDimitry Andric 6471fe6060f1SDimitry Andric } else if (const SCEVUnknown *U = dyn_cast<SCEVUnknown>(S)) { 6472fe6060f1SDimitry Andric if (!U->getValue()) 6473fe6060f1SDimitry Andric return false; 647481ad6265SDimitry Andric pushLocation(U->getValue()); 6475fe6060f1SDimitry Andric 6476fe6060f1SDimitry Andric } else if (const SCEVMulExpr *MulRec = dyn_cast<SCEVMulExpr>(S)) { 6477fe6060f1SDimitry Andric Success &= pushArithmeticExpr(MulRec, llvm::dwarf::DW_OP_mul); 6478fe6060f1SDimitry Andric 6479fe6060f1SDimitry Andric } else if (const SCEVUDivExpr *UDiv = dyn_cast<SCEVUDivExpr>(S)) { 6480fe6060f1SDimitry Andric Success &= pushSCEV(UDiv->getLHS()); 6481fe6060f1SDimitry Andric Success &= pushSCEV(UDiv->getRHS()); 6482fe6060f1SDimitry Andric pushOperator(llvm::dwarf::DW_OP_div); 6483fe6060f1SDimitry Andric 6484fe6060f1SDimitry Andric } else if (const SCEVCastExpr *Cast = dyn_cast<SCEVCastExpr>(S)) { 6485fe6060f1SDimitry Andric // Assert if a new and unknown SCEVCastEXpr type is encountered. 6486fe6060f1SDimitry Andric assert((isa<SCEVZeroExtendExpr>(Cast) || isa<SCEVTruncateExpr>(Cast) || 6487fe6060f1SDimitry Andric isa<SCEVPtrToIntExpr>(Cast) || isa<SCEVSignExtendExpr>(Cast)) && 6488fe6060f1SDimitry Andric "Unexpected cast type in SCEV."); 6489fe6060f1SDimitry Andric Success &= pushCast(Cast, (isa<SCEVSignExtendExpr>(Cast))); 6490fe6060f1SDimitry Andric 6491fe6060f1SDimitry Andric } else if (const SCEVAddExpr *AddExpr = dyn_cast<SCEVAddExpr>(S)) { 6492fe6060f1SDimitry Andric Success &= pushArithmeticExpr(AddExpr, llvm::dwarf::DW_OP_plus); 6493fe6060f1SDimitry Andric 6494fe6060f1SDimitry Andric } else if (isa<SCEVAddRecExpr>(S)) { 6495fe6060f1SDimitry Andric // Nested SCEVAddRecExpr are generated by nested loops and are currently 6496fe6060f1SDimitry Andric // unsupported. 6497fe6060f1SDimitry Andric return false; 6498fe6060f1SDimitry Andric 6499fe6060f1SDimitry Andric } else { 6500fe6060f1SDimitry Andric return false; 6501fe6060f1SDimitry Andric } 6502fe6060f1SDimitry Andric return Success; 6503fe6060f1SDimitry Andric } 6504fe6060f1SDimitry Andric 6505fe6060f1SDimitry Andric /// Return true if the combination of arithmetic operator and underlying 6506fe6060f1SDimitry Andric /// SCEV constant value is an identity function. 6507fe6060f1SDimitry Andric bool isIdentityFunction(uint64_t Op, const SCEV *S) { 6508fe6060f1SDimitry Andric if (const SCEVConstant *C = dyn_cast<SCEVConstant>(S)) { 650906c3fb27SDimitry Andric if (C->getAPInt().getSignificantBits() > 64) 65106e75b2fbSDimitry Andric return false; 6511fe6060f1SDimitry Andric int64_t I = C->getAPInt().getSExtValue(); 6512fe6060f1SDimitry Andric switch (Op) { 6513fe6060f1SDimitry Andric case llvm::dwarf::DW_OP_plus: 6514fe6060f1SDimitry Andric case llvm::dwarf::DW_OP_minus: 6515fe6060f1SDimitry Andric return I == 0; 6516fe6060f1SDimitry Andric case llvm::dwarf::DW_OP_mul: 6517fe6060f1SDimitry Andric case llvm::dwarf::DW_OP_div: 6518fe6060f1SDimitry Andric return I == 1; 6519fe6060f1SDimitry Andric } 6520fe6060f1SDimitry Andric } 6521fe6060f1SDimitry Andric return false; 6522fe6060f1SDimitry Andric } 6523fe6060f1SDimitry Andric 6524fe6060f1SDimitry Andric /// Convert a SCEV of a value to a DIExpression that is pushed onto the 6525fe6060f1SDimitry Andric /// builder's expression stack. The stack should already contain an 6526fe6060f1SDimitry Andric /// expression for the iteration count, so that it can be multiplied by 6527fe6060f1SDimitry Andric /// the stride and added to the start. 6528fe6060f1SDimitry Andric /// Components of the expression are omitted if they are an identity function. 6529fe6060f1SDimitry Andric /// Chain (non-affine) SCEVs are not supported. 6530fe6060f1SDimitry Andric bool SCEVToValueExpr(const llvm::SCEVAddRecExpr &SAR, ScalarEvolution &SE) { 6531fe6060f1SDimitry Andric assert(SAR.isAffine() && "Expected affine SCEV"); 6532fe6060f1SDimitry Andric // TODO: Is this check needed? 6533fe6060f1SDimitry Andric if (isa<SCEVAddRecExpr>(SAR.getStart())) 6534fe6060f1SDimitry Andric return false; 6535fe6060f1SDimitry Andric 6536fe6060f1SDimitry Andric const SCEV *Start = SAR.getStart(); 6537fe6060f1SDimitry Andric const SCEV *Stride = SAR.getStepRecurrence(SE); 6538fe6060f1SDimitry Andric 6539fe6060f1SDimitry Andric // Skip pushing arithmetic noops. 6540fe6060f1SDimitry Andric if (!isIdentityFunction(llvm::dwarf::DW_OP_mul, Stride)) { 6541fe6060f1SDimitry Andric if (!pushSCEV(Stride)) 6542fe6060f1SDimitry Andric return false; 6543fe6060f1SDimitry Andric pushOperator(llvm::dwarf::DW_OP_mul); 6544fe6060f1SDimitry Andric } 6545fe6060f1SDimitry Andric if (!isIdentityFunction(llvm::dwarf::DW_OP_plus, Start)) { 6546fe6060f1SDimitry Andric if (!pushSCEV(Start)) 6547fe6060f1SDimitry Andric return false; 6548fe6060f1SDimitry Andric pushOperator(llvm::dwarf::DW_OP_plus); 6549fe6060f1SDimitry Andric } 6550fe6060f1SDimitry Andric return true; 6551fe6060f1SDimitry Andric } 6552fe6060f1SDimitry Andric 655381ad6265SDimitry Andric /// Create an expression that is an offset from a value (usually the IV). 655481ad6265SDimitry Andric void createOffsetExpr(int64_t Offset, Value *OffsetValue) { 655581ad6265SDimitry Andric pushLocation(OffsetValue); 655681ad6265SDimitry Andric DIExpression::appendOffset(Expr, Offset); 655781ad6265SDimitry Andric LLVM_DEBUG( 655881ad6265SDimitry Andric dbgs() << "scev-salvage: Generated IV offset expression. Offset: " 655981ad6265SDimitry Andric << std::to_string(Offset) << "\n"); 656081ad6265SDimitry Andric } 656181ad6265SDimitry Andric 656281ad6265SDimitry Andric /// Combine a translation of the SCEV and the IV to create an expression that 656381ad6265SDimitry Andric /// recovers a location's value. 656481ad6265SDimitry Andric /// returns true if an expression was created. 656581ad6265SDimitry Andric bool createIterCountExpr(const SCEV *S, 656681ad6265SDimitry Andric const SCEVDbgValueBuilder &IterationCount, 656781ad6265SDimitry Andric ScalarEvolution &SE) { 656881ad6265SDimitry Andric // SCEVs for SSA values are most frquently of the form 656981ad6265SDimitry Andric // {start,+,stride}, but sometimes they are ({start,+,stride} + %a + ..). 657081ad6265SDimitry Andric // This is because %a is a PHI node that is not the IV. However, these 657181ad6265SDimitry Andric // SCEVs have not been observed to result in debuginfo-lossy optimisations, 657281ad6265SDimitry Andric // so its not expected this point will be reached. 657381ad6265SDimitry Andric if (!isa<SCEVAddRecExpr>(S)) 657481ad6265SDimitry Andric return false; 657581ad6265SDimitry Andric 657681ad6265SDimitry Andric LLVM_DEBUG(dbgs() << "scev-salvage: Location to salvage SCEV: " << *S 657781ad6265SDimitry Andric << '\n'); 657881ad6265SDimitry Andric 657981ad6265SDimitry Andric const auto *Rec = cast<SCEVAddRecExpr>(S); 658081ad6265SDimitry Andric if (!Rec->isAffine()) 658181ad6265SDimitry Andric return false; 658281ad6265SDimitry Andric 658381ad6265SDimitry Andric if (S->getExpressionSize() > MaxSCEVSalvageExpressionSize) 658481ad6265SDimitry Andric return false; 658581ad6265SDimitry Andric 658681ad6265SDimitry Andric // Initialise a new builder with the iteration count expression. In 658781ad6265SDimitry Andric // combination with the value's SCEV this enables recovery. 658881ad6265SDimitry Andric clone(IterationCount); 658981ad6265SDimitry Andric if (!SCEVToValueExpr(*Rec, SE)) 659081ad6265SDimitry Andric return false; 659181ad6265SDimitry Andric 659281ad6265SDimitry Andric return true; 659381ad6265SDimitry Andric } 659481ad6265SDimitry Andric 6595fe6060f1SDimitry Andric /// Convert a SCEV of a value to a DIExpression that is pushed onto the 6596fe6060f1SDimitry Andric /// builder's expression stack. The stack should already contain an 6597fe6060f1SDimitry Andric /// expression for the iteration count, so that it can be multiplied by 6598fe6060f1SDimitry Andric /// the stride and added to the start. 6599fe6060f1SDimitry Andric /// Components of the expression are omitted if they are an identity function. 6600fe6060f1SDimitry Andric bool SCEVToIterCountExpr(const llvm::SCEVAddRecExpr &SAR, 6601fe6060f1SDimitry Andric ScalarEvolution &SE) { 6602fe6060f1SDimitry Andric assert(SAR.isAffine() && "Expected affine SCEV"); 6603fe6060f1SDimitry Andric if (isa<SCEVAddRecExpr>(SAR.getStart())) { 6604fe6060f1SDimitry Andric LLVM_DEBUG(dbgs() << "scev-salvage: IV SCEV. Unsupported nested AddRec: " 6605fe6060f1SDimitry Andric << SAR << '\n'); 6606fe6060f1SDimitry Andric return false; 6607fe6060f1SDimitry Andric } 6608fe6060f1SDimitry Andric const SCEV *Start = SAR.getStart(); 6609fe6060f1SDimitry Andric const SCEV *Stride = SAR.getStepRecurrence(SE); 6610fe6060f1SDimitry Andric 6611fe6060f1SDimitry Andric // Skip pushing arithmetic noops. 6612fe6060f1SDimitry Andric if (!isIdentityFunction(llvm::dwarf::DW_OP_minus, Start)) { 6613fe6060f1SDimitry Andric if (!pushSCEV(Start)) 6614fe6060f1SDimitry Andric return false; 6615fe6060f1SDimitry Andric pushOperator(llvm::dwarf::DW_OP_minus); 6616fe6060f1SDimitry Andric } 6617fe6060f1SDimitry Andric if (!isIdentityFunction(llvm::dwarf::DW_OP_div, Stride)) { 6618fe6060f1SDimitry Andric if (!pushSCEV(Stride)) 6619fe6060f1SDimitry Andric return false; 6620fe6060f1SDimitry Andric pushOperator(llvm::dwarf::DW_OP_div); 6621fe6060f1SDimitry Andric } 6622fe6060f1SDimitry Andric return true; 6623fe6060f1SDimitry Andric } 662481ad6265SDimitry Andric 662581ad6265SDimitry Andric // Append the current expression and locations to a location list and an 662681ad6265SDimitry Andric // expression list. Modify the DW_OP_LLVM_arg indexes to account for 662781ad6265SDimitry Andric // the locations already present in the destination list. 662881ad6265SDimitry Andric void appendToVectors(SmallVectorImpl<uint64_t> &DestExpr, 662981ad6265SDimitry Andric SmallVectorImpl<Value *> &DestLocations) { 663081ad6265SDimitry Andric assert(!DestLocations.empty() && 663181ad6265SDimitry Andric "Expected the locations vector to contain the IV"); 663281ad6265SDimitry Andric // The DWARF_OP_LLVM_arg arguments of the expression being appended must be 663381ad6265SDimitry Andric // modified to account for the locations already in the destination vector. 663481ad6265SDimitry Andric // All builders contain the IV as the first location op. 663581ad6265SDimitry Andric assert(!LocationOps.empty() && 663681ad6265SDimitry Andric "Expected the location ops to contain the IV."); 663781ad6265SDimitry Andric // DestIndexMap[n] contains the index in DestLocations for the nth 663881ad6265SDimitry Andric // location in this SCEVDbgValueBuilder. 663981ad6265SDimitry Andric SmallVector<uint64_t, 2> DestIndexMap; 664081ad6265SDimitry Andric for (const auto &Op : LocationOps) { 664181ad6265SDimitry Andric auto It = find(DestLocations, Op); 664281ad6265SDimitry Andric if (It != DestLocations.end()) { 664381ad6265SDimitry Andric // Location already exists in DestLocations, reuse existing ArgIndex. 664481ad6265SDimitry Andric DestIndexMap.push_back(std::distance(DestLocations.begin(), It)); 664581ad6265SDimitry Andric continue; 664681ad6265SDimitry Andric } 664781ad6265SDimitry Andric // Location is not in DestLocations, add it. 664881ad6265SDimitry Andric DestIndexMap.push_back(DestLocations.size()); 664981ad6265SDimitry Andric DestLocations.push_back(Op); 665081ad6265SDimitry Andric } 665181ad6265SDimitry Andric 665281ad6265SDimitry Andric for (const auto &Op : expr_ops()) { 665381ad6265SDimitry Andric if (Op.getOp() != dwarf::DW_OP_LLVM_arg) { 665481ad6265SDimitry Andric Op.appendToVector(DestExpr); 665581ad6265SDimitry Andric continue; 665681ad6265SDimitry Andric } 665781ad6265SDimitry Andric 665881ad6265SDimitry Andric DestExpr.push_back(dwarf::DW_OP_LLVM_arg); 665981ad6265SDimitry Andric // `DW_OP_LLVM_arg n` represents the nth LocationOp in this SCEV, 666081ad6265SDimitry Andric // DestIndexMap[n] contains its new index in DestLocations. 666181ad6265SDimitry Andric uint64_t NewIndex = DestIndexMap[Op.getArg(0)]; 666281ad6265SDimitry Andric DestExpr.push_back(NewIndex); 666381ad6265SDimitry Andric } 666481ad6265SDimitry Andric } 6665fe6060f1SDimitry Andric }; 6666fe6060f1SDimitry Andric 666781ad6265SDimitry Andric /// Holds all the required data to salvage a dbg.value using the pre-LSR SCEVs 666881ad6265SDimitry Andric /// and DIExpression. 6669fe6060f1SDimitry Andric struct DVIRecoveryRec { 667081ad6265SDimitry Andric DVIRecoveryRec(DbgValueInst *DbgValue) 66717a6dacacSDimitry Andric : DbgRef(DbgValue), Expr(DbgValue->getExpression()), 667281ad6265SDimitry Andric HadLocationArgList(false) {} 66730fca6ea1SDimitry Andric DVIRecoveryRec(DbgVariableRecord *DVR) 66740fca6ea1SDimitry Andric : DbgRef(DVR), Expr(DVR->getExpression()), HadLocationArgList(false) {} 667581ad6265SDimitry Andric 66760fca6ea1SDimitry Andric PointerUnion<DbgValueInst *, DbgVariableRecord *> DbgRef; 6677fe6060f1SDimitry Andric DIExpression *Expr; 667881ad6265SDimitry Andric bool HadLocationArgList; 667981ad6265SDimitry Andric SmallVector<WeakVH, 2> LocationOps; 668081ad6265SDimitry Andric SmallVector<const llvm::SCEV *, 2> SCEVs; 668181ad6265SDimitry Andric SmallVector<std::unique_ptr<SCEVDbgValueBuilder>, 2> RecoveryExprs; 668281ad6265SDimitry Andric 668381ad6265SDimitry Andric void clear() { 668481ad6265SDimitry Andric for (auto &RE : RecoveryExprs) 668581ad6265SDimitry Andric RE.reset(); 668681ad6265SDimitry Andric RecoveryExprs.clear(); 668781ad6265SDimitry Andric } 668881ad6265SDimitry Andric 668981ad6265SDimitry Andric ~DVIRecoveryRec() { clear(); } 6690fe6060f1SDimitry Andric }; 6691349cc55cSDimitry Andric } // namespace 6692fe6060f1SDimitry Andric 669381ad6265SDimitry Andric /// Returns the total number of DW_OP_llvm_arg operands in the expression. 669481ad6265SDimitry Andric /// This helps in determining if a DIArglist is necessary or can be omitted from 669581ad6265SDimitry Andric /// the dbg.value. 669681ad6265SDimitry Andric static unsigned numLLVMArgOps(SmallVectorImpl<uint64_t> &Expr) { 669781ad6265SDimitry Andric auto expr_ops = ToDwarfOpIter(Expr); 669881ad6265SDimitry Andric unsigned Count = 0; 669981ad6265SDimitry Andric for (auto Op : expr_ops) 670081ad6265SDimitry Andric if (Op.getOp() == dwarf::DW_OP_LLVM_arg) 670181ad6265SDimitry Andric Count++; 670281ad6265SDimitry Andric return Count; 6703fe6060f1SDimitry Andric } 6704fe6060f1SDimitry Andric 670581ad6265SDimitry Andric /// Overwrites DVI with the location and Ops as the DIExpression. This will 670681ad6265SDimitry Andric /// create an invalid expression if Ops has any dwarf::DW_OP_llvm_arg operands, 670781ad6265SDimitry Andric /// because a DIArglist is not created for the first argument of the dbg.value. 67087a6dacacSDimitry Andric template <typename T> 67097a6dacacSDimitry Andric static void updateDVIWithLocation(T &DbgVal, Value *Location, 671081ad6265SDimitry Andric SmallVectorImpl<uint64_t> &Ops) { 67117a6dacacSDimitry Andric assert(numLLVMArgOps(Ops) == 0 && "Expected expression that does not " 67127a6dacacSDimitry Andric "contain any DW_OP_llvm_arg operands."); 67137a6dacacSDimitry Andric DbgVal.setRawLocation(ValueAsMetadata::get(Location)); 67147a6dacacSDimitry Andric DbgVal.setExpression(DIExpression::get(DbgVal.getContext(), Ops)); 67157a6dacacSDimitry Andric DbgVal.setExpression(DIExpression::get(DbgVal.getContext(), Ops)); 6716349cc55cSDimitry Andric } 6717349cc55cSDimitry Andric 671881ad6265SDimitry Andric /// Overwrite DVI with locations placed into a DIArglist. 67197a6dacacSDimitry Andric template <typename T> 67207a6dacacSDimitry Andric static void updateDVIWithLocations(T &DbgVal, 672181ad6265SDimitry Andric SmallVectorImpl<Value *> &Locations, 672281ad6265SDimitry Andric SmallVectorImpl<uint64_t> &Ops) { 672381ad6265SDimitry Andric assert(numLLVMArgOps(Ops) != 0 && 672481ad6265SDimitry Andric "Expected expression that references DIArglist locations using " 672581ad6265SDimitry Andric "DW_OP_llvm_arg operands."); 672681ad6265SDimitry Andric SmallVector<ValueAsMetadata *, 3> MetadataLocs; 672781ad6265SDimitry Andric for (Value *V : Locations) 672881ad6265SDimitry Andric MetadataLocs.push_back(ValueAsMetadata::get(V)); 672981ad6265SDimitry Andric auto ValArrayRef = llvm::ArrayRef<llvm::ValueAsMetadata *>(MetadataLocs); 67307a6dacacSDimitry Andric DbgVal.setRawLocation(llvm::DIArgList::get(DbgVal.getContext(), ValArrayRef)); 67317a6dacacSDimitry Andric DbgVal.setExpression(DIExpression::get(DbgVal.getContext(), Ops)); 673281ad6265SDimitry Andric } 673381ad6265SDimitry Andric 673481ad6265SDimitry Andric /// Write the new expression and new location ops for the dbg.value. If possible 673581ad6265SDimitry Andric /// reduce the szie of the dbg.value intrinsic by omitting DIArglist. This 673681ad6265SDimitry Andric /// can be omitted if: 673781ad6265SDimitry Andric /// 1. There is only a single location, refenced by a single DW_OP_llvm_arg. 673881ad6265SDimitry Andric /// 2. The DW_OP_LLVM_arg is the first operand in the expression. 673981ad6265SDimitry Andric static void UpdateDbgValueInst(DVIRecoveryRec &DVIRec, 674081ad6265SDimitry Andric SmallVectorImpl<Value *> &NewLocationOps, 674181ad6265SDimitry Andric SmallVectorImpl<uint64_t> &NewExpr) { 67427a6dacacSDimitry Andric auto UpdateDbgValueInstImpl = [&](auto *DbgVal) { 674381ad6265SDimitry Andric unsigned NumLLVMArgs = numLLVMArgOps(NewExpr); 674481ad6265SDimitry Andric if (NumLLVMArgs == 0) { 674581ad6265SDimitry Andric // Location assumed to be on the stack. 67467a6dacacSDimitry Andric updateDVIWithLocation(*DbgVal, NewLocationOps[0], NewExpr); 674781ad6265SDimitry Andric } else if (NumLLVMArgs == 1 && NewExpr[0] == dwarf::DW_OP_LLVM_arg) { 674881ad6265SDimitry Andric // There is only a single DW_OP_llvm_arg at the start of the expression, 674981ad6265SDimitry Andric // so it can be omitted along with DIArglist. 675081ad6265SDimitry Andric assert(NewExpr[1] == 0 && 675181ad6265SDimitry Andric "Lone LLVM_arg in a DIExpression should refer to location-op 0."); 675281ad6265SDimitry Andric llvm::SmallVector<uint64_t, 6> ShortenedOps(llvm::drop_begin(NewExpr, 2)); 67537a6dacacSDimitry Andric updateDVIWithLocation(*DbgVal, NewLocationOps[0], ShortenedOps); 675481ad6265SDimitry Andric } else { 675581ad6265SDimitry Andric // Multiple DW_OP_llvm_arg, so DIArgList is strictly necessary. 67567a6dacacSDimitry Andric updateDVIWithLocations(*DbgVal, NewLocationOps, NewExpr); 675781ad6265SDimitry Andric } 675881ad6265SDimitry Andric 675981ad6265SDimitry Andric // If the DIExpression was previously empty then add the stack terminator. 67607a6dacacSDimitry Andric // Non-empty expressions have only had elements inserted into them and so 67617a6dacacSDimitry Andric // the terminator should already be present e.g. stack_value or fragment. 67627a6dacacSDimitry Andric DIExpression *SalvageExpr = DbgVal->getExpression(); 676381ad6265SDimitry Andric if (!DVIRec.Expr->isComplex() && SalvageExpr->isComplex()) { 67647a6dacacSDimitry Andric SalvageExpr = 67657a6dacacSDimitry Andric DIExpression::append(SalvageExpr, {dwarf::DW_OP_stack_value}); 67667a6dacacSDimitry Andric DbgVal->setExpression(SalvageExpr); 676781ad6265SDimitry Andric } 67687a6dacacSDimitry Andric }; 67697a6dacacSDimitry Andric if (isa<DbgValueInst *>(DVIRec.DbgRef)) 67707a6dacacSDimitry Andric UpdateDbgValueInstImpl(cast<DbgValueInst *>(DVIRec.DbgRef)); 67717a6dacacSDimitry Andric else 67720fca6ea1SDimitry Andric UpdateDbgValueInstImpl(cast<DbgVariableRecord *>(DVIRec.DbgRef)); 677381ad6265SDimitry Andric } 677481ad6265SDimitry Andric 677506c3fb27SDimitry Andric /// Cached location ops may be erased during LSR, in which case a poison is 677681ad6265SDimitry Andric /// required when restoring from the cache. The type of that location is no 677706c3fb27SDimitry Andric /// longer available, so just use int8. The poison will be replaced by one or 677881ad6265SDimitry Andric /// more locations later when a SCEVDbgValueBuilder selects alternative 677981ad6265SDimitry Andric /// locations to use for the salvage. 678006c3fb27SDimitry Andric static Value *getValueOrPoison(WeakVH &VH, LLVMContext &C) { 678106c3fb27SDimitry Andric return (VH) ? VH : PoisonValue::get(llvm::Type::getInt8Ty(C)); 678281ad6265SDimitry Andric } 678381ad6265SDimitry Andric 678481ad6265SDimitry Andric /// Restore the DVI's pre-LSR arguments. Substitute undef for any erased values. 678581ad6265SDimitry Andric static void restorePreTransformState(DVIRecoveryRec &DVIRec) { 67867a6dacacSDimitry Andric auto RestorePreTransformStateImpl = [&](auto *DbgVal) { 678781ad6265SDimitry Andric LLVM_DEBUG(dbgs() << "scev-salvage: restore dbg.value to pre-LSR state\n" 67887a6dacacSDimitry Andric << "scev-salvage: post-LSR: " << *DbgVal << '\n'); 678981ad6265SDimitry Andric assert(DVIRec.Expr && "Expected an expression"); 67907a6dacacSDimitry Andric DbgVal->setExpression(DVIRec.Expr); 679181ad6265SDimitry Andric 679281ad6265SDimitry Andric // Even a single location-op may be inside a DIArgList and referenced with 679381ad6265SDimitry Andric // DW_OP_LLVM_arg, which is valid only with a DIArgList. 679481ad6265SDimitry Andric if (!DVIRec.HadLocationArgList) { 679581ad6265SDimitry Andric assert(DVIRec.LocationOps.size() == 1 && 679681ad6265SDimitry Andric "Unexpected number of location ops."); 679781ad6265SDimitry Andric // LSR's unsuccessful salvage attempt may have added DIArgList, which in 67987a6dacacSDimitry Andric // this case was not present before, so force the location back to a 67997a6dacacSDimitry Andric // single uncontained Value. 680081ad6265SDimitry Andric Value *CachedValue = 68017a6dacacSDimitry Andric getValueOrPoison(DVIRec.LocationOps[0], DbgVal->getContext()); 68027a6dacacSDimitry Andric DbgVal->setRawLocation(ValueAsMetadata::get(CachedValue)); 680381ad6265SDimitry Andric } else { 680481ad6265SDimitry Andric SmallVector<ValueAsMetadata *, 3> MetadataLocs; 680581ad6265SDimitry Andric for (WeakVH VH : DVIRec.LocationOps) { 68067a6dacacSDimitry Andric Value *CachedValue = getValueOrPoison(VH, DbgVal->getContext()); 680781ad6265SDimitry Andric MetadataLocs.push_back(ValueAsMetadata::get(CachedValue)); 680881ad6265SDimitry Andric } 680981ad6265SDimitry Andric auto ValArrayRef = llvm::ArrayRef<llvm::ValueAsMetadata *>(MetadataLocs); 68107a6dacacSDimitry Andric DbgVal->setRawLocation( 68117a6dacacSDimitry Andric llvm::DIArgList::get(DbgVal->getContext(), ValArrayRef)); 681281ad6265SDimitry Andric } 68137a6dacacSDimitry Andric LLVM_DEBUG(dbgs() << "scev-salvage: pre-LSR: " << *DbgVal << '\n'); 68147a6dacacSDimitry Andric }; 68157a6dacacSDimitry Andric if (isa<DbgValueInst *>(DVIRec.DbgRef)) 68167a6dacacSDimitry Andric RestorePreTransformStateImpl(cast<DbgValueInst *>(DVIRec.DbgRef)); 68177a6dacacSDimitry Andric else 68180fca6ea1SDimitry Andric RestorePreTransformStateImpl(cast<DbgVariableRecord *>(DVIRec.DbgRef)); 681981ad6265SDimitry Andric } 682081ad6265SDimitry Andric 682181ad6265SDimitry Andric static bool SalvageDVI(llvm::Loop *L, ScalarEvolution &SE, 682281ad6265SDimitry Andric llvm::PHINode *LSRInductionVar, DVIRecoveryRec &DVIRec, 682381ad6265SDimitry Andric const SCEV *SCEVInductionVar, 682481ad6265SDimitry Andric SCEVDbgValueBuilder IterCountExpr) { 68257a6dacacSDimitry Andric 68267a6dacacSDimitry Andric if (isa<DbgValueInst *>(DVIRec.DbgRef) 68277a6dacacSDimitry Andric ? !cast<DbgValueInst *>(DVIRec.DbgRef)->isKillLocation() 68280fca6ea1SDimitry Andric : !cast<DbgVariableRecord *>(DVIRec.DbgRef)->isKillLocation()) 682981ad6265SDimitry Andric return false; 683081ad6265SDimitry Andric 683181ad6265SDimitry Andric // LSR may have caused several changes to the dbg.value in the failed salvage 683281ad6265SDimitry Andric // attempt. So restore the DIExpression, the location ops and also the 683381ad6265SDimitry Andric // location ops format, which is always DIArglist for multiple ops, but only 683481ad6265SDimitry Andric // sometimes for a single op. 683581ad6265SDimitry Andric restorePreTransformState(DVIRec); 683681ad6265SDimitry Andric 683781ad6265SDimitry Andric // LocationOpIndexMap[i] will store the post-LSR location index of 683881ad6265SDimitry Andric // the non-optimised out location at pre-LSR index i. 683981ad6265SDimitry Andric SmallVector<int64_t, 2> LocationOpIndexMap; 684081ad6265SDimitry Andric LocationOpIndexMap.assign(DVIRec.LocationOps.size(), -1); 684181ad6265SDimitry Andric SmallVector<Value *, 2> NewLocationOps; 684281ad6265SDimitry Andric NewLocationOps.push_back(LSRInductionVar); 684381ad6265SDimitry Andric 684481ad6265SDimitry Andric for (unsigned i = 0; i < DVIRec.LocationOps.size(); i++) { 684581ad6265SDimitry Andric WeakVH VH = DVIRec.LocationOps[i]; 684681ad6265SDimitry Andric // Place the locations not optimised out in the list first, avoiding 684781ad6265SDimitry Andric // inserts later. The map is used to update the DIExpression's 684881ad6265SDimitry Andric // DW_OP_LLVM_arg arguments as the expression is updated. 684981ad6265SDimitry Andric if (VH && !isa<UndefValue>(VH)) { 685081ad6265SDimitry Andric NewLocationOps.push_back(VH); 685181ad6265SDimitry Andric LocationOpIndexMap[i] = NewLocationOps.size() - 1; 685281ad6265SDimitry Andric LLVM_DEBUG(dbgs() << "scev-salvage: Location index " << i 685381ad6265SDimitry Andric << " now at index " << LocationOpIndexMap[i] << "\n"); 685481ad6265SDimitry Andric continue; 685581ad6265SDimitry Andric } 685681ad6265SDimitry Andric 685781ad6265SDimitry Andric // It's possible that a value referred to in the SCEV may have been 685881ad6265SDimitry Andric // optimised out by LSR. 685981ad6265SDimitry Andric if (SE.containsErasedValue(DVIRec.SCEVs[i]) || 686081ad6265SDimitry Andric SE.containsUndefs(DVIRec.SCEVs[i])) { 686181ad6265SDimitry Andric LLVM_DEBUG(dbgs() << "scev-salvage: SCEV for location at index: " << i 686281ad6265SDimitry Andric << " refers to a location that is now undef or erased. " 686381ad6265SDimitry Andric "Salvage abandoned.\n"); 686481ad6265SDimitry Andric return false; 686581ad6265SDimitry Andric } 686681ad6265SDimitry Andric 686781ad6265SDimitry Andric LLVM_DEBUG(dbgs() << "scev-salvage: salvaging location at index " << i 686881ad6265SDimitry Andric << " with SCEV: " << *DVIRec.SCEVs[i] << "\n"); 686981ad6265SDimitry Andric 687081ad6265SDimitry Andric DVIRec.RecoveryExprs[i] = std::make_unique<SCEVDbgValueBuilder>(); 687181ad6265SDimitry Andric SCEVDbgValueBuilder *SalvageExpr = DVIRec.RecoveryExprs[i].get(); 687281ad6265SDimitry Andric 687381ad6265SDimitry Andric // Create an offset-based salvage expression if possible, as it requires 687481ad6265SDimitry Andric // less DWARF ops than an iteration count-based expression. 6875bdd1243dSDimitry Andric if (std::optional<APInt> Offset = 687681ad6265SDimitry Andric SE.computeConstantDifference(DVIRec.SCEVs[i], SCEVInductionVar)) { 687706c3fb27SDimitry Andric if (Offset->getSignificantBits() <= 64) 6878bdd1243dSDimitry Andric SalvageExpr->createOffsetExpr(Offset->getSExtValue(), LSRInductionVar); 687981ad6265SDimitry Andric } else if (!SalvageExpr->createIterCountExpr(DVIRec.SCEVs[i], IterCountExpr, 688081ad6265SDimitry Andric SE)) 688181ad6265SDimitry Andric return false; 688281ad6265SDimitry Andric } 688381ad6265SDimitry Andric 688481ad6265SDimitry Andric // Merge the DbgValueBuilder generated expressions and the original 688581ad6265SDimitry Andric // DIExpression, place the result into an new vector. 688681ad6265SDimitry Andric SmallVector<uint64_t, 3> NewExpr; 688781ad6265SDimitry Andric if (DVIRec.Expr->getNumElements() == 0) { 688881ad6265SDimitry Andric assert(DVIRec.RecoveryExprs.size() == 1 && 688981ad6265SDimitry Andric "Expected only a single recovery expression for an empty " 689081ad6265SDimitry Andric "DIExpression."); 689181ad6265SDimitry Andric assert(DVIRec.RecoveryExprs[0] && 689281ad6265SDimitry Andric "Expected a SCEVDbgSalvageBuilder for location 0"); 689381ad6265SDimitry Andric SCEVDbgValueBuilder *B = DVIRec.RecoveryExprs[0].get(); 689481ad6265SDimitry Andric B->appendToVectors(NewExpr, NewLocationOps); 689581ad6265SDimitry Andric } 689681ad6265SDimitry Andric for (const auto &Op : DVIRec.Expr->expr_ops()) { 689781ad6265SDimitry Andric // Most Ops needn't be updated. 689881ad6265SDimitry Andric if (Op.getOp() != dwarf::DW_OP_LLVM_arg) { 689981ad6265SDimitry Andric Op.appendToVector(NewExpr); 690081ad6265SDimitry Andric continue; 690181ad6265SDimitry Andric } 690281ad6265SDimitry Andric 690381ad6265SDimitry Andric uint64_t LocationArgIndex = Op.getArg(0); 690481ad6265SDimitry Andric SCEVDbgValueBuilder *DbgBuilder = 690581ad6265SDimitry Andric DVIRec.RecoveryExprs[LocationArgIndex].get(); 690681ad6265SDimitry Andric // The location doesn't have s SCEVDbgValueBuilder, so LSR did not 690781ad6265SDimitry Andric // optimise it away. So just translate the argument to the updated 690881ad6265SDimitry Andric // location index. 690981ad6265SDimitry Andric if (!DbgBuilder) { 691081ad6265SDimitry Andric NewExpr.push_back(dwarf::DW_OP_LLVM_arg); 691181ad6265SDimitry Andric assert(LocationOpIndexMap[Op.getArg(0)] != -1 && 691281ad6265SDimitry Andric "Expected a positive index for the location-op position."); 691381ad6265SDimitry Andric NewExpr.push_back(LocationOpIndexMap[Op.getArg(0)]); 691481ad6265SDimitry Andric continue; 691581ad6265SDimitry Andric } 691681ad6265SDimitry Andric // The location has a recovery expression. 691781ad6265SDimitry Andric DbgBuilder->appendToVectors(NewExpr, NewLocationOps); 691881ad6265SDimitry Andric } 691981ad6265SDimitry Andric 692081ad6265SDimitry Andric UpdateDbgValueInst(DVIRec, NewLocationOps, NewExpr); 69217a6dacacSDimitry Andric if (isa<DbgValueInst *>(DVIRec.DbgRef)) 69227a6dacacSDimitry Andric LLVM_DEBUG(dbgs() << "scev-salvage: Updated DVI: " 69237a6dacacSDimitry Andric << *cast<DbgValueInst *>(DVIRec.DbgRef) << "\n"); 69247a6dacacSDimitry Andric else 69257a6dacacSDimitry Andric LLVM_DEBUG(dbgs() << "scev-salvage: Updated DVI: " 69260fca6ea1SDimitry Andric << *cast<DbgVariableRecord *>(DVIRec.DbgRef) << "\n"); 692781ad6265SDimitry Andric return true; 692881ad6265SDimitry Andric } 692981ad6265SDimitry Andric 693081ad6265SDimitry Andric /// Obtain an expression for the iteration count, then attempt to salvage the 693181ad6265SDimitry Andric /// dbg.value intrinsics. 69327a6dacacSDimitry Andric static void DbgRewriteSalvageableDVIs( 69337a6dacacSDimitry Andric llvm::Loop *L, ScalarEvolution &SE, llvm::PHINode *LSRInductionVar, 693481ad6265SDimitry Andric SmallVector<std::unique_ptr<DVIRecoveryRec>, 2> &DVIToUpdate) { 6935fe6060f1SDimitry Andric if (DVIToUpdate.empty()) 6936349cc55cSDimitry Andric return; 6937fe6060f1SDimitry Andric 6938fe6060f1SDimitry Andric const llvm::SCEV *SCEVInductionVar = SE.getSCEV(LSRInductionVar); 6939fe6060f1SDimitry Andric assert(SCEVInductionVar && 6940fe6060f1SDimitry Andric "Anticipated a SCEV for the post-LSR induction variable"); 6941fe6060f1SDimitry Andric 6942fe6060f1SDimitry Andric if (const SCEVAddRecExpr *IVAddRec = 6943fe6060f1SDimitry Andric dyn_cast<SCEVAddRecExpr>(SCEVInductionVar)) { 69446e75b2fbSDimitry Andric if (!IVAddRec->isAffine()) 6945349cc55cSDimitry Andric return; 69466e75b2fbSDimitry Andric 694781ad6265SDimitry Andric // Prevent translation using excessive resources. 6948349cc55cSDimitry Andric if (IVAddRec->getExpressionSize() > MaxSCEVSalvageExpressionSize) 6949349cc55cSDimitry Andric return; 6950349cc55cSDimitry Andric 6951349cc55cSDimitry Andric // The iteration count is required to recover location values. 6952fe6060f1SDimitry Andric SCEVDbgValueBuilder IterCountExpr; 695381ad6265SDimitry Andric IterCountExpr.pushLocation(LSRInductionVar); 6954fe6060f1SDimitry Andric if (!IterCountExpr.SCEVToIterCountExpr(*IVAddRec, SE)) 6955349cc55cSDimitry Andric return; 6956fe6060f1SDimitry Andric 6957fe6060f1SDimitry Andric LLVM_DEBUG(dbgs() << "scev-salvage: IV SCEV: " << *SCEVInductionVar 6958fe6060f1SDimitry Andric << '\n'); 6959fe6060f1SDimitry Andric 6960fe6060f1SDimitry Andric for (auto &DVIRec : DVIToUpdate) { 696181ad6265SDimitry Andric SalvageDVI(L, SE, LSRInductionVar, *DVIRec, SCEVInductionVar, 696281ad6265SDimitry Andric IterCountExpr); 6963fe6060f1SDimitry Andric } 6964349cc55cSDimitry Andric } 6965fe6060f1SDimitry Andric } 6966fe6060f1SDimitry Andric 6967fe6060f1SDimitry Andric /// Identify and cache salvageable DVI locations and expressions along with the 6968349cc55cSDimitry Andric /// corresponding SCEV(s). Also ensure that the DVI is not deleted between 6969349cc55cSDimitry Andric /// cacheing and salvaging. 697081ad6265SDimitry Andric static void DbgGatherSalvagableDVI( 697181ad6265SDimitry Andric Loop *L, ScalarEvolution &SE, 697281ad6265SDimitry Andric SmallVector<std::unique_ptr<DVIRecoveryRec>, 2> &SalvageableDVISCEVs, 6973fe6060f1SDimitry Andric SmallSet<AssertingVH<DbgValueInst>, 2> &DVIHandles) { 6974bdd1243dSDimitry Andric for (const auto &B : L->getBlocks()) { 6975e8d8bef9SDimitry Andric for (auto &I : *B) { 69767a6dacacSDimitry Andric auto ProcessDbgValue = [&](auto *DbgVal) -> bool { 697781ad6265SDimitry Andric // Ensure that if any location op is undef that the dbg.vlue is not 697881ad6265SDimitry Andric // cached. 69797a6dacacSDimitry Andric if (DbgVal->isKillLocation()) 69807a6dacacSDimitry Andric return false; 6981349cc55cSDimitry Andric 698281ad6265SDimitry Andric // Check that the location op SCEVs are suitable for translation to 698381ad6265SDimitry Andric // DIExpression. 698481ad6265SDimitry Andric const auto &HasTranslatableLocationOps = 69857a6dacacSDimitry Andric [&](const auto *DbgValToTranslate) -> bool { 69867a6dacacSDimitry Andric for (const auto LocOp : DbgValToTranslate->location_ops()) { 698781ad6265SDimitry Andric if (!LocOp) 698881ad6265SDimitry Andric return false; 6989fe6060f1SDimitry Andric 699081ad6265SDimitry Andric if (!SE.isSCEVable(LocOp->getType())) 699181ad6265SDimitry Andric return false; 6992fe6060f1SDimitry Andric 699381ad6265SDimitry Andric const SCEV *S = SE.getSCEV(LocOp); 6994349cc55cSDimitry Andric if (SE.containsUndefs(S)) 699581ad6265SDimitry Andric return false; 699681ad6265SDimitry Andric } 699781ad6265SDimitry Andric return true; 699881ad6265SDimitry Andric }; 699981ad6265SDimitry Andric 70007a6dacacSDimitry Andric if (!HasTranslatableLocationOps(DbgVal)) 70017a6dacacSDimitry Andric return false; 7002349cc55cSDimitry Andric 700381ad6265SDimitry Andric std::unique_ptr<DVIRecoveryRec> NewRec = 70047a6dacacSDimitry Andric std::make_unique<DVIRecoveryRec>(DbgVal); 70057a6dacacSDimitry Andric // Each location Op may need a SCEVDbgValueBuilder in order to recover 70067a6dacacSDimitry Andric // it. Pre-allocating a vector will enable quick lookups of the builder 70077a6dacacSDimitry Andric // later during the salvage. 70087a6dacacSDimitry Andric NewRec->RecoveryExprs.resize(DbgVal->getNumVariableLocationOps()); 70097a6dacacSDimitry Andric for (const auto LocOp : DbgVal->location_ops()) { 701081ad6265SDimitry Andric NewRec->SCEVs.push_back(SE.getSCEV(LocOp)); 701181ad6265SDimitry Andric NewRec->LocationOps.push_back(LocOp); 70127a6dacacSDimitry Andric NewRec->HadLocationArgList = DbgVal->hasArgList(); 701381ad6265SDimitry Andric } 701481ad6265SDimitry Andric SalvageableDVISCEVs.push_back(std::move(NewRec)); 70157a6dacacSDimitry Andric return true; 70167a6dacacSDimitry Andric }; 70170fca6ea1SDimitry Andric for (DbgVariableRecord &DVR : filterDbgVars(I.getDbgRecordRange())) { 70180fca6ea1SDimitry Andric if (DVR.isDbgValue() || DVR.isDbgAssign()) 70190fca6ea1SDimitry Andric ProcessDbgValue(&DVR); 70207a6dacacSDimitry Andric } 70217a6dacacSDimitry Andric auto DVI = dyn_cast<DbgValueInst>(&I); 70227a6dacacSDimitry Andric if (!DVI) 70237a6dacacSDimitry Andric continue; 70247a6dacacSDimitry Andric if (ProcessDbgValue(DVI)) 7025fe6060f1SDimitry Andric DVIHandles.insert(DVI); 7026e8d8bef9SDimitry Andric } 7027e8d8bef9SDimitry Andric } 7028e8d8bef9SDimitry Andric } 7029e8d8bef9SDimitry Andric 7030fe6060f1SDimitry Andric /// Ideally pick the PHI IV inserted by ScalarEvolutionExpander. As a fallback 7031fe6060f1SDimitry Andric /// any PHi from the loop header is usable, but may have less chance of 7032fe6060f1SDimitry Andric /// surviving subsequent transforms. 7033fe6060f1SDimitry Andric static llvm::PHINode *GetInductionVariable(const Loop &L, ScalarEvolution &SE, 7034fe6060f1SDimitry Andric const LSRInstance &LSR) { 7035349cc55cSDimitry Andric 7036349cc55cSDimitry Andric auto IsSuitableIV = [&](PHINode *P) { 7037349cc55cSDimitry Andric if (!SE.isSCEVable(P->getType())) 7038349cc55cSDimitry Andric return false; 7039349cc55cSDimitry Andric if (const SCEVAddRecExpr *Rec = dyn_cast<SCEVAddRecExpr>(SE.getSCEV(P))) 7040349cc55cSDimitry Andric return Rec->isAffine() && !SE.containsUndefs(SE.getSCEV(P)); 7041349cc55cSDimitry Andric return false; 7042349cc55cSDimitry Andric }; 7043349cc55cSDimitry Andric 7044349cc55cSDimitry Andric // For now, just pick the first IV that was generated and inserted by 7045349cc55cSDimitry Andric // ScalarEvolution. Ideally pick an IV that is unlikely to be optimised away 7046349cc55cSDimitry Andric // by subsequent transforms. 7047fe6060f1SDimitry Andric for (const WeakVH &IV : LSR.getScalarEvolutionIVs()) { 7048fe6060f1SDimitry Andric if (!IV) 7049e8d8bef9SDimitry Andric continue; 7050fe6060f1SDimitry Andric 7051349cc55cSDimitry Andric // There should only be PHI node IVs. 7052349cc55cSDimitry Andric PHINode *P = cast<PHINode>(&*IV); 7053349cc55cSDimitry Andric 7054349cc55cSDimitry Andric if (IsSuitableIV(P)) 7055349cc55cSDimitry Andric return P; 7056fe6060f1SDimitry Andric } 7057fe6060f1SDimitry Andric 7058349cc55cSDimitry Andric for (PHINode &P : L.getHeader()->phis()) { 7059349cc55cSDimitry Andric if (IsSuitableIV(&P)) 7060349cc55cSDimitry Andric return &P; 7061e8d8bef9SDimitry Andric } 7062fe6060f1SDimitry Andric return nullptr; 7063e8d8bef9SDimitry Andric } 7064e8d8bef9SDimitry Andric 706506c3fb27SDimitry Andric static std::optional<std::tuple<PHINode *, PHINode *, const SCEV *, bool>> 7066bdd1243dSDimitry Andric canFoldTermCondOfLoop(Loop *L, ScalarEvolution &SE, DominatorTree &DT, 70670fca6ea1SDimitry Andric const LoopInfo &LI, const TargetTransformInfo &TTI) { 7068bdd1243dSDimitry Andric if (!L->isInnermost()) { 7069bdd1243dSDimitry Andric LLVM_DEBUG(dbgs() << "Cannot fold on non-innermost loop\n"); 7070bdd1243dSDimitry Andric return std::nullopt; 7071bdd1243dSDimitry Andric } 7072bdd1243dSDimitry Andric // Only inspect on simple loop structure 7073bdd1243dSDimitry Andric if (!L->isLoopSimplifyForm()) { 7074bdd1243dSDimitry Andric LLVM_DEBUG(dbgs() << "Cannot fold on non-simple loop\n"); 7075bdd1243dSDimitry Andric return std::nullopt; 7076bdd1243dSDimitry Andric } 7077bdd1243dSDimitry Andric 7078bdd1243dSDimitry Andric if (!SE.hasLoopInvariantBackedgeTakenCount(L)) { 7079bdd1243dSDimitry Andric LLVM_DEBUG(dbgs() << "Cannot fold on backedge that is loop variant\n"); 7080bdd1243dSDimitry Andric return std::nullopt; 7081bdd1243dSDimitry Andric } 7082bdd1243dSDimitry Andric 7083bdd1243dSDimitry Andric BasicBlock *LoopLatch = L->getLoopLatch(); 708406c3fb27SDimitry Andric BranchInst *BI = dyn_cast<BranchInst>(LoopLatch->getTerminator()); 708506c3fb27SDimitry Andric if (!BI || BI->isUnconditional()) 7086bdd1243dSDimitry Andric return std::nullopt; 708706c3fb27SDimitry Andric auto *TermCond = dyn_cast<ICmpInst>(BI->getCondition()); 708806c3fb27SDimitry Andric if (!TermCond) { 708906c3fb27SDimitry Andric LLVM_DEBUG( 709006c3fb27SDimitry Andric dbgs() << "Cannot fold on branching condition that is not an ICmpInst"); 7091bdd1243dSDimitry Andric return std::nullopt; 7092bdd1243dSDimitry Andric } 7093bdd1243dSDimitry Andric if (!TermCond->hasOneUse()) { 7094bdd1243dSDimitry Andric LLVM_DEBUG( 7095bdd1243dSDimitry Andric dbgs() 7096bdd1243dSDimitry Andric << "Cannot replace terminating condition with more than one use\n"); 7097bdd1243dSDimitry Andric return std::nullopt; 7098bdd1243dSDimitry Andric } 7099bdd1243dSDimitry Andric 710006c3fb27SDimitry Andric BinaryOperator *LHS = dyn_cast<BinaryOperator>(TermCond->getOperand(0)); 710106c3fb27SDimitry Andric Value *RHS = TermCond->getOperand(1); 710206c3fb27SDimitry Andric if (!LHS || !L->isLoopInvariant(RHS)) 710306c3fb27SDimitry Andric // We could pattern match the inverse form of the icmp, but that is 710406c3fb27SDimitry Andric // non-canonical, and this pass is running *very* late in the pipeline. 710506c3fb27SDimitry Andric return std::nullopt; 7106bdd1243dSDimitry Andric 710706c3fb27SDimitry Andric // Find the IV used by the current exit condition. 710806c3fb27SDimitry Andric PHINode *ToFold; 710906c3fb27SDimitry Andric Value *ToFoldStart, *ToFoldStep; 711006c3fb27SDimitry Andric if (!matchSimpleRecurrence(LHS, ToFold, ToFoldStart, ToFoldStep)) 711106c3fb27SDimitry Andric return std::nullopt; 7112bdd1243dSDimitry Andric 71130fca6ea1SDimitry Andric // Ensure the simple recurrence is a part of the current loop. 71140fca6ea1SDimitry Andric if (ToFold->getParent() != L->getHeader()) 71150fca6ea1SDimitry Andric return std::nullopt; 71160fca6ea1SDimitry Andric 711706c3fb27SDimitry Andric // If that IV isn't dead after we rewrite the exit condition in terms of 711806c3fb27SDimitry Andric // another IV, there's no point in doing the transform. 711906c3fb27SDimitry Andric if (!isAlmostDeadIV(ToFold, LoopLatch, TermCond)) 712006c3fb27SDimitry Andric return std::nullopt; 7121bdd1243dSDimitry Andric 71220fca6ea1SDimitry Andric // Inserting instructions in the preheader has a runtime cost, scale 71230fca6ea1SDimitry Andric // the allowed cost with the loops trip count as best we can. 71240fca6ea1SDimitry Andric const unsigned ExpansionBudget = [&]() { 71250fca6ea1SDimitry Andric unsigned Budget = 2 * SCEVCheapExpansionBudget; 71260fca6ea1SDimitry Andric if (unsigned SmallTC = SE.getSmallConstantMaxTripCount(L)) 71270fca6ea1SDimitry Andric return std::min(Budget, SmallTC); 71280fca6ea1SDimitry Andric if (std::optional<unsigned> SmallTC = getLoopEstimatedTripCount(L)) 71290fca6ea1SDimitry Andric return std::min(Budget, *SmallTC); 71300fca6ea1SDimitry Andric // Unknown trip count, assume long running by default. 71310fca6ea1SDimitry Andric return Budget; 71320fca6ea1SDimitry Andric }(); 71330fca6ea1SDimitry Andric 7134bdd1243dSDimitry Andric const SCEV *BECount = SE.getBackedgeTakenCount(L); 71350fca6ea1SDimitry Andric const DataLayout &DL = L->getHeader()->getDataLayout(); 7136bdd1243dSDimitry Andric SCEVExpander Expander(SE, DL, "lsr_fold_term_cond"); 7137bdd1243dSDimitry Andric 7138bdd1243dSDimitry Andric PHINode *ToHelpFold = nullptr; 7139bdd1243dSDimitry Andric const SCEV *TermValueS = nullptr; 714006c3fb27SDimitry Andric bool MustDropPoison = false; 71410fca6ea1SDimitry Andric auto InsertPt = L->getLoopPreheader()->getTerminator(); 7142bdd1243dSDimitry Andric for (PHINode &PN : L->getHeader()->phis()) { 714306c3fb27SDimitry Andric if (ToFold == &PN) 714406c3fb27SDimitry Andric continue; 714506c3fb27SDimitry Andric 7146bdd1243dSDimitry Andric if (!SE.isSCEVable(PN.getType())) { 7147bdd1243dSDimitry Andric LLVM_DEBUG(dbgs() << "IV of phi '" << PN 7148bdd1243dSDimitry Andric << "' is not SCEV-able, not qualified for the " 7149bdd1243dSDimitry Andric "terminating condition folding.\n"); 7150bdd1243dSDimitry Andric continue; 7151bdd1243dSDimitry Andric } 715206c3fb27SDimitry Andric const SCEVAddRecExpr *AddRec = dyn_cast<SCEVAddRecExpr>(SE.getSCEV(&PN)); 7153bdd1243dSDimitry Andric // Only speculate on affine AddRec 7154bdd1243dSDimitry Andric if (!AddRec || !AddRec->isAffine()) { 7155bdd1243dSDimitry Andric LLVM_DEBUG(dbgs() << "SCEV of phi '" << PN 7156bdd1243dSDimitry Andric << "' is not an affine add recursion, not qualified " 7157bdd1243dSDimitry Andric "for the terminating condition folding.\n"); 7158bdd1243dSDimitry Andric continue; 7159bdd1243dSDimitry Andric } 7160bdd1243dSDimitry Andric 716106c3fb27SDimitry Andric // Check that we can compute the value of AddRec on the exiting iteration 716206c3fb27SDimitry Andric // without soundness problems. evaluateAtIteration internally needs 716306c3fb27SDimitry Andric // to multiply the stride of the iteration number - which may wrap around. 716406c3fb27SDimitry Andric // The issue here is subtle because computing the result accounting for 716506c3fb27SDimitry Andric // wrap is insufficient. In order to use the result in an exit test, we 716606c3fb27SDimitry Andric // must also know that AddRec doesn't take the same value on any previous 716706c3fb27SDimitry Andric // iteration. The simplest case to consider is a candidate IV which is 716806c3fb27SDimitry Andric // narrower than the trip count (and thus original IV), but this can 716906c3fb27SDimitry Andric // also happen due to non-unit strides on the candidate IVs. 71707a6dacacSDimitry Andric if (!AddRec->hasNoSelfWrap() || 71717a6dacacSDimitry Andric !SE.isKnownNonZero(AddRec->getStepRecurrence(SE))) 717206c3fb27SDimitry Andric continue; 717306c3fb27SDimitry Andric 717406c3fb27SDimitry Andric const SCEVAddRecExpr *PostInc = AddRec->getPostIncExpr(SE); 717506c3fb27SDimitry Andric const SCEV *TermValueSLocal = PostInc->evaluateAtIteration(BECount, SE); 717606c3fb27SDimitry Andric if (!Expander.isSafeToExpand(TermValueSLocal)) { 717706c3fb27SDimitry Andric LLVM_DEBUG( 717806c3fb27SDimitry Andric dbgs() << "Is not safe to expand terminating value for phi node" << PN 717906c3fb27SDimitry Andric << "\n"); 718006c3fb27SDimitry Andric continue; 7181bdd1243dSDimitry Andric } 718206c3fb27SDimitry Andric 71830fca6ea1SDimitry Andric if (Expander.isHighCostExpansion(TermValueSLocal, L, ExpansionBudget, 71840fca6ea1SDimitry Andric &TTI, InsertPt)) { 71850fca6ea1SDimitry Andric LLVM_DEBUG( 71860fca6ea1SDimitry Andric dbgs() << "Is too expensive to expand terminating value for phi node" 71870fca6ea1SDimitry Andric << PN << "\n"); 71880fca6ea1SDimitry Andric continue; 71890fca6ea1SDimitry Andric } 71900fca6ea1SDimitry Andric 719106c3fb27SDimitry Andric // The candidate IV may have been otherwise dead and poison from the 719206c3fb27SDimitry Andric // very first iteration. If we can't disprove that, we can't use the IV. 719306c3fb27SDimitry Andric if (!mustExecuteUBIfPoisonOnPathTo(&PN, LoopLatch->getTerminator(), &DT)) { 719406c3fb27SDimitry Andric LLVM_DEBUG(dbgs() << "Can not prove poison safety for IV " 719506c3fb27SDimitry Andric << PN << "\n"); 719606c3fb27SDimitry Andric continue; 719706c3fb27SDimitry Andric } 719806c3fb27SDimitry Andric 719906c3fb27SDimitry Andric // The candidate IV may become poison on the last iteration. If this 720006c3fb27SDimitry Andric // value is not branched on, this is a well defined program. We're 720106c3fb27SDimitry Andric // about to add a new use to this IV, and we have to ensure we don't 720206c3fb27SDimitry Andric // insert UB which didn't previously exist. 720306c3fb27SDimitry Andric bool MustDropPoisonLocal = false; 720406c3fb27SDimitry Andric Instruction *PostIncV = 720506c3fb27SDimitry Andric cast<Instruction>(PN.getIncomingValueForBlock(LoopLatch)); 720606c3fb27SDimitry Andric if (!mustExecuteUBIfPoisonOnPathTo(PostIncV, LoopLatch->getTerminator(), 720706c3fb27SDimitry Andric &DT)) { 720806c3fb27SDimitry Andric LLVM_DEBUG(dbgs() << "Can not prove poison safety to insert use" 720906c3fb27SDimitry Andric << PN << "\n"); 721006c3fb27SDimitry Andric 721106c3fb27SDimitry Andric // If this is a complex recurrance with multiple instructions computing 721206c3fb27SDimitry Andric // the backedge value, we might need to strip poison flags from all of 721306c3fb27SDimitry Andric // them. 721406c3fb27SDimitry Andric if (PostIncV->getOperand(0) != &PN) 721506c3fb27SDimitry Andric continue; 721606c3fb27SDimitry Andric 721706c3fb27SDimitry Andric // In order to perform the transform, we need to drop the poison generating 721806c3fb27SDimitry Andric // flags on this instruction (if any). 721906c3fb27SDimitry Andric MustDropPoisonLocal = PostIncV->hasPoisonGeneratingFlags(); 722006c3fb27SDimitry Andric } 722106c3fb27SDimitry Andric 722206c3fb27SDimitry Andric // We pick the last legal alternate IV. We could expore choosing an optimal 722306c3fb27SDimitry Andric // alternate IV if we had a decent heuristic to do so. 722406c3fb27SDimitry Andric ToHelpFold = &PN; 722506c3fb27SDimitry Andric TermValueS = TermValueSLocal; 722606c3fb27SDimitry Andric MustDropPoison = MustDropPoisonLocal; 7227bdd1243dSDimitry Andric } 7228bdd1243dSDimitry Andric 7229bdd1243dSDimitry Andric LLVM_DEBUG(if (ToFold && !ToHelpFold) dbgs() 7230bdd1243dSDimitry Andric << "Cannot find other AddRec IV to help folding\n";); 7231bdd1243dSDimitry Andric 7232bdd1243dSDimitry Andric LLVM_DEBUG(if (ToFold && ToHelpFold) dbgs() 7233bdd1243dSDimitry Andric << "\nFound loop that can fold terminating condition\n" 7234bdd1243dSDimitry Andric << " BECount (SCEV): " << *SE.getBackedgeTakenCount(L) << "\n" 7235bdd1243dSDimitry Andric << " TermCond: " << *TermCond << "\n" 7236bdd1243dSDimitry Andric << " BrandInst: " << *BI << "\n" 7237bdd1243dSDimitry Andric << " ToFold: " << *ToFold << "\n" 7238bdd1243dSDimitry Andric << " ToHelpFold: " << *ToHelpFold << "\n"); 7239bdd1243dSDimitry Andric 7240bdd1243dSDimitry Andric if (!ToFold || !ToHelpFold) 7241bdd1243dSDimitry Andric return std::nullopt; 724206c3fb27SDimitry Andric return std::make_tuple(ToFold, ToHelpFold, TermValueS, MustDropPoison); 7243bdd1243dSDimitry Andric } 7244bdd1243dSDimitry Andric 72450b57cec5SDimitry Andric static bool ReduceLoopStrength(Loop *L, IVUsers &IU, ScalarEvolution &SE, 72460b57cec5SDimitry Andric DominatorTree &DT, LoopInfo &LI, 72470b57cec5SDimitry Andric const TargetTransformInfo &TTI, 72485ffd83dbSDimitry Andric AssumptionCache &AC, TargetLibraryInfo &TLI, 72495ffd83dbSDimitry Andric MemorySSA *MSSA) { 72500b57cec5SDimitry Andric 7251fe6060f1SDimitry Andric // Debug preservation - before we start removing anything identify which DVI 7252fe6060f1SDimitry Andric // meet the salvageable criteria and store their DIExpression and SCEVs. 725381ad6265SDimitry Andric SmallVector<std::unique_ptr<DVIRecoveryRec>, 2> SalvageableDVIRecords; 7254fe6060f1SDimitry Andric SmallSet<AssertingVH<DbgValueInst>, 2> DVIHandles; 725581ad6265SDimitry Andric DbgGatherSalvagableDVI(L, SE, SalvageableDVIRecords, DVIHandles); 7256fe6060f1SDimitry Andric 72570b57cec5SDimitry Andric bool Changed = false; 72585ffd83dbSDimitry Andric std::unique_ptr<MemorySSAUpdater> MSSAU; 72595ffd83dbSDimitry Andric if (MSSA) 72605ffd83dbSDimitry Andric MSSAU = std::make_unique<MemorySSAUpdater>(MSSA); 72610b57cec5SDimitry Andric 72620b57cec5SDimitry Andric // Run the main LSR transformation. 7263fe6060f1SDimitry Andric const LSRInstance &Reducer = 7264fe6060f1SDimitry Andric LSRInstance(L, IU, SE, DT, LI, TTI, AC, TLI, MSSAU.get()); 7265fe6060f1SDimitry Andric Changed |= Reducer.getChanged(); 7266e8d8bef9SDimitry Andric 72670b57cec5SDimitry Andric // Remove any extra phis created by processing inner loops. 72685ffd83dbSDimitry Andric Changed |= DeleteDeadPHIs(L->getHeader(), &TLI, MSSAU.get()); 72690b57cec5SDimitry Andric if (EnablePhiElim && L->isLoopSimplifyForm()) { 72700b57cec5SDimitry Andric SmallVector<WeakTrackingVH, 16> DeadInsts; 72710fca6ea1SDimitry Andric const DataLayout &DL = L->getHeader()->getDataLayout(); 7272e8d8bef9SDimitry Andric SCEVExpander Rewriter(SE, DL, "lsr", false); 72730b57cec5SDimitry Andric #ifndef NDEBUG 72740b57cec5SDimitry Andric Rewriter.setDebugType(DEBUG_TYPE); 72750b57cec5SDimitry Andric #endif 72760b57cec5SDimitry Andric unsigned numFolded = Rewriter.replaceCongruentIVs(L, &DT, DeadInsts, &TTI); 72770fca6ea1SDimitry Andric Rewriter.clear(); 72780b57cec5SDimitry Andric if (numFolded) { 72790b57cec5SDimitry Andric Changed = true; 72805ffd83dbSDimitry Andric RecursivelyDeleteTriviallyDeadInstructionsPermissive(DeadInsts, &TLI, 72815ffd83dbSDimitry Andric MSSAU.get()); 72825ffd83dbSDimitry Andric DeleteDeadPHIs(L->getHeader(), &TLI, MSSAU.get()); 72830b57cec5SDimitry Andric } 72840b57cec5SDimitry Andric } 728581ad6265SDimitry Andric // LSR may at times remove all uses of an induction variable from a loop. 728681ad6265SDimitry Andric // The only remaining use is the PHI in the exit block. 728781ad6265SDimitry Andric // When this is the case, if the exit value of the IV can be calculated using 728881ad6265SDimitry Andric // SCEV, we can replace the exit block PHI with the final value of the IV and 728981ad6265SDimitry Andric // skip the updates in each loop iteration. 7290753f127fSDimitry Andric if (L->isRecursivelyLCSSAForm(DT, LI) && L->getExitBlock()) { 729181ad6265SDimitry Andric SmallVector<WeakTrackingVH, 16> DeadInsts; 72920fca6ea1SDimitry Andric const DataLayout &DL = L->getHeader()->getDataLayout(); 7293bdd1243dSDimitry Andric SCEVExpander Rewriter(SE, DL, "lsr", true); 729481ad6265SDimitry Andric int Rewrites = rewriteLoopExitValues(L, &LI, &TLI, &SE, &TTI, Rewriter, &DT, 7295753f127fSDimitry Andric UnusedIndVarInLoop, DeadInsts); 72960fca6ea1SDimitry Andric Rewriter.clear(); 729781ad6265SDimitry Andric if (Rewrites) { 729881ad6265SDimitry Andric Changed = true; 729981ad6265SDimitry Andric RecursivelyDeleteTriviallyDeadInstructionsPermissive(DeadInsts, &TLI, 730081ad6265SDimitry Andric MSSAU.get()); 730181ad6265SDimitry Andric DeleteDeadPHIs(L->getHeader(), &TLI, MSSAU.get()); 730281ad6265SDimitry Andric } 730381ad6265SDimitry Andric } 7304e8d8bef9SDimitry Andric 73055f757f3fSDimitry Andric const bool EnableFormTerm = [&] { 73065f757f3fSDimitry Andric switch (AllowTerminatingConditionFoldingAfterLSR) { 73075f757f3fSDimitry Andric case cl::BOU_TRUE: 73085f757f3fSDimitry Andric return true; 73095f757f3fSDimitry Andric case cl::BOU_FALSE: 73105f757f3fSDimitry Andric return false; 73115f757f3fSDimitry Andric case cl::BOU_UNSET: 73125f757f3fSDimitry Andric return TTI.shouldFoldTerminatingConditionAfterLSR(); 73135f757f3fSDimitry Andric } 73145f757f3fSDimitry Andric llvm_unreachable("Unhandled cl::boolOrDefault enum"); 73155f757f3fSDimitry Andric }(); 73165f757f3fSDimitry Andric 73175f757f3fSDimitry Andric if (EnableFormTerm) { 73180fca6ea1SDimitry Andric if (auto Opt = canFoldTermCondOfLoop(L, SE, DT, LI, TTI)) { 731906c3fb27SDimitry Andric auto [ToFold, ToHelpFold, TermValueS, MustDrop] = *Opt; 7320bdd1243dSDimitry Andric 7321bdd1243dSDimitry Andric Changed = true; 7322bdd1243dSDimitry Andric NumTermFold++; 7323bdd1243dSDimitry Andric 7324bdd1243dSDimitry Andric BasicBlock *LoopPreheader = L->getLoopPreheader(); 7325bdd1243dSDimitry Andric BasicBlock *LoopLatch = L->getLoopLatch(); 7326bdd1243dSDimitry Andric 7327bdd1243dSDimitry Andric (void)ToFold; 7328bdd1243dSDimitry Andric LLVM_DEBUG(dbgs() << "To fold phi-node:\n" 7329bdd1243dSDimitry Andric << *ToFold << "\n" 7330bdd1243dSDimitry Andric << "New term-cond phi-node:\n" 7331bdd1243dSDimitry Andric << *ToHelpFold << "\n"); 7332bdd1243dSDimitry Andric 7333bdd1243dSDimitry Andric Value *StartValue = ToHelpFold->getIncomingValueForBlock(LoopPreheader); 7334bdd1243dSDimitry Andric (void)StartValue; 7335bdd1243dSDimitry Andric Value *LoopValue = ToHelpFold->getIncomingValueForBlock(LoopLatch); 7336bdd1243dSDimitry Andric 733706c3fb27SDimitry Andric // See comment in canFoldTermCondOfLoop on why this is sufficient. 733806c3fb27SDimitry Andric if (MustDrop) 733906c3fb27SDimitry Andric cast<Instruction>(LoopValue)->dropPoisonGeneratingFlags(); 734006c3fb27SDimitry Andric 7341bdd1243dSDimitry Andric // SCEVExpander for both use in preheader and latch 73420fca6ea1SDimitry Andric const DataLayout &DL = L->getHeader()->getDataLayout(); 7343bdd1243dSDimitry Andric SCEVExpander Expander(SE, DL, "lsr_fold_term_cond"); 7344bdd1243dSDimitry Andric 7345bdd1243dSDimitry Andric assert(Expander.isSafeToExpand(TermValueS) && 7346bdd1243dSDimitry Andric "Terminating value was checked safe in canFoldTerminatingCondition"); 7347bdd1243dSDimitry Andric 73487a6dacacSDimitry Andric // Create new terminating value at loop preheader 7349bdd1243dSDimitry Andric Value *TermValue = Expander.expandCodeFor(TermValueS, ToHelpFold->getType(), 7350bdd1243dSDimitry Andric LoopPreheader->getTerminator()); 7351bdd1243dSDimitry Andric 7352bdd1243dSDimitry Andric LLVM_DEBUG(dbgs() << "Start value of new term-cond phi-node:\n" 7353bdd1243dSDimitry Andric << *StartValue << "\n" 7354bdd1243dSDimitry Andric << "Terminating value of new term-cond phi-node:\n" 7355bdd1243dSDimitry Andric << *TermValue << "\n"); 7356bdd1243dSDimitry Andric 7357bdd1243dSDimitry Andric // Create new terminating condition at loop latch 7358bdd1243dSDimitry Andric BranchInst *BI = cast<BranchInst>(LoopLatch->getTerminator()); 7359bdd1243dSDimitry Andric ICmpInst *OldTermCond = cast<ICmpInst>(BI->getCondition()); 7360bdd1243dSDimitry Andric IRBuilder<> LatchBuilder(LoopLatch->getTerminator()); 736106c3fb27SDimitry Andric Value *NewTermCond = 736206c3fb27SDimitry Andric LatchBuilder.CreateICmp(CmpInst::ICMP_EQ, LoopValue, TermValue, 7363bdd1243dSDimitry Andric "lsr_fold_term_cond.replaced_term_cond"); 736406c3fb27SDimitry Andric // Swap successors to exit loop body if IV equals to new TermValue 736506c3fb27SDimitry Andric if (BI->getSuccessor(0) == L->getHeader()) 736606c3fb27SDimitry Andric BI->swapSuccessors(); 7367bdd1243dSDimitry Andric 7368bdd1243dSDimitry Andric LLVM_DEBUG(dbgs() << "Old term-cond:\n" 7369bdd1243dSDimitry Andric << *OldTermCond << "\n" 7370cb14a3feSDimitry Andric << "New term-cond:\n" << *NewTermCond << "\n"); 7371bdd1243dSDimitry Andric 7372bdd1243dSDimitry Andric BI->setCondition(NewTermCond); 7373bdd1243dSDimitry Andric 73740fca6ea1SDimitry Andric Expander.clear(); 7375bdd1243dSDimitry Andric OldTermCond->eraseFromParent(); 7376bdd1243dSDimitry Andric DeleteDeadPHIs(L->getHeader(), &TLI, MSSAU.get()); 7377bdd1243dSDimitry Andric } 7378bdd1243dSDimitry Andric } 7379bdd1243dSDimitry Andric 738081ad6265SDimitry Andric if (SalvageableDVIRecords.empty()) 7381fe6060f1SDimitry Andric return Changed; 7382e8d8bef9SDimitry Andric 7383fe6060f1SDimitry Andric // Obtain relevant IVs and attempt to rewrite the salvageable DVIs with 7384fe6060f1SDimitry Andric // expressions composed using the derived iteration count. 7385fe6060f1SDimitry Andric // TODO: Allow for multiple IV references for nested AddRecSCEVs 7386bdd1243dSDimitry Andric for (const auto &L : LI) { 7387fe6060f1SDimitry Andric if (llvm::PHINode *IV = GetInductionVariable(*L, SE, Reducer)) 738881ad6265SDimitry Andric DbgRewriteSalvageableDVIs(L, SE, IV, SalvageableDVIRecords); 7389fe6060f1SDimitry Andric else { 7390fe6060f1SDimitry Andric LLVM_DEBUG(dbgs() << "scev-salvage: SCEV salvaging not possible. An IV " 7391fe6060f1SDimitry Andric "could not be identified.\n"); 7392fe6060f1SDimitry Andric } 7393fe6060f1SDimitry Andric } 7394fe6060f1SDimitry Andric 739581ad6265SDimitry Andric for (auto &Rec : SalvageableDVIRecords) 739681ad6265SDimitry Andric Rec->clear(); 739781ad6265SDimitry Andric SalvageableDVIRecords.clear(); 7398fe6060f1SDimitry Andric DVIHandles.clear(); 73990b57cec5SDimitry Andric return Changed; 74000b57cec5SDimitry Andric } 74010b57cec5SDimitry Andric 74020b57cec5SDimitry Andric bool LoopStrengthReduce::runOnLoop(Loop *L, LPPassManager & /*LPM*/) { 74030b57cec5SDimitry Andric if (skipLoop(L)) 74040b57cec5SDimitry Andric return false; 74050b57cec5SDimitry Andric 74060b57cec5SDimitry Andric auto &IU = getAnalysis<IVUsersWrapperPass>().getIU(); 74070b57cec5SDimitry Andric auto &SE = getAnalysis<ScalarEvolutionWrapperPass>().getSE(); 74080b57cec5SDimitry Andric auto &DT = getAnalysis<DominatorTreeWrapperPass>().getDomTree(); 74090b57cec5SDimitry Andric auto &LI = getAnalysis<LoopInfoWrapperPass>().getLoopInfo(); 74100b57cec5SDimitry Andric const auto &TTI = getAnalysis<TargetTransformInfoWrapperPass>().getTTI( 74110b57cec5SDimitry Andric *L->getHeader()->getParent()); 74120b57cec5SDimitry Andric auto &AC = getAnalysis<AssumptionCacheTracker>().getAssumptionCache( 74130b57cec5SDimitry Andric *L->getHeader()->getParent()); 74145ffd83dbSDimitry Andric auto &TLI = getAnalysis<TargetLibraryInfoWrapperPass>().getTLI( 74158bcb0991SDimitry Andric *L->getHeader()->getParent()); 74165ffd83dbSDimitry Andric auto *MSSAAnalysis = getAnalysisIfAvailable<MemorySSAWrapperPass>(); 74175ffd83dbSDimitry Andric MemorySSA *MSSA = nullptr; 74185ffd83dbSDimitry Andric if (MSSAAnalysis) 74195ffd83dbSDimitry Andric MSSA = &MSSAAnalysis->getMSSA(); 74205ffd83dbSDimitry Andric return ReduceLoopStrength(L, IU, SE, DT, LI, TTI, AC, TLI, MSSA); 74210b57cec5SDimitry Andric } 74220b57cec5SDimitry Andric 74230b57cec5SDimitry Andric PreservedAnalyses LoopStrengthReducePass::run(Loop &L, LoopAnalysisManager &AM, 74240b57cec5SDimitry Andric LoopStandardAnalysisResults &AR, 74250b57cec5SDimitry Andric LPMUpdater &) { 74260b57cec5SDimitry Andric if (!ReduceLoopStrength(&L, AM.getResult<IVUsersAnalysis>(L, AR), AR.SE, 74275ffd83dbSDimitry Andric AR.DT, AR.LI, AR.TTI, AR.AC, AR.TLI, AR.MSSA)) 74280b57cec5SDimitry Andric return PreservedAnalyses::all(); 74290b57cec5SDimitry Andric 74305ffd83dbSDimitry Andric auto PA = getLoopPassPreservedAnalyses(); 74315ffd83dbSDimitry Andric if (AR.MSSA) 74325ffd83dbSDimitry Andric PA.preserve<MemorySSAAnalysis>(); 74335ffd83dbSDimitry Andric return PA; 74340b57cec5SDimitry Andric } 74350b57cec5SDimitry Andric 74360b57cec5SDimitry Andric char LoopStrengthReduce::ID = 0; 74370b57cec5SDimitry Andric 74380b57cec5SDimitry Andric INITIALIZE_PASS_BEGIN(LoopStrengthReduce, "loop-reduce", 74390b57cec5SDimitry Andric "Loop Strength Reduction", false, false) 74400b57cec5SDimitry Andric INITIALIZE_PASS_DEPENDENCY(TargetTransformInfoWrapperPass) 74410b57cec5SDimitry Andric INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass) 74420b57cec5SDimitry Andric INITIALIZE_PASS_DEPENDENCY(ScalarEvolutionWrapperPass) 74430b57cec5SDimitry Andric INITIALIZE_PASS_DEPENDENCY(IVUsersWrapperPass) 74440b57cec5SDimitry Andric INITIALIZE_PASS_DEPENDENCY(LoopInfoWrapperPass) 74450b57cec5SDimitry Andric INITIALIZE_PASS_DEPENDENCY(LoopSimplify) 74460b57cec5SDimitry Andric INITIALIZE_PASS_END(LoopStrengthReduce, "loop-reduce", 74470b57cec5SDimitry Andric "Loop Strength Reduction", false, false) 74480b57cec5SDimitry Andric 74490b57cec5SDimitry Andric Pass *llvm::createLoopStrengthReducePass() { return new LoopStrengthReduce(); } 7450