xref: /freebsd-src/contrib/llvm-project/llvm/lib/Transforms/Scalar/LoopStrengthReduce.cpp (revision 36b606ae6aa4b24061096ba18582e0a08ccd5dba)
10b57cec5SDimitry Andric //===- LoopStrengthReduce.cpp - Strength Reduce IVs in Loops --------------===//
20b57cec5SDimitry Andric //
30b57cec5SDimitry Andric // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
40b57cec5SDimitry Andric // See https://llvm.org/LICENSE.txt for license information.
50b57cec5SDimitry Andric // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
60b57cec5SDimitry Andric //
70b57cec5SDimitry Andric //===----------------------------------------------------------------------===//
80b57cec5SDimitry Andric //
90b57cec5SDimitry Andric // This transformation analyzes and transforms the induction variables (and
100b57cec5SDimitry Andric // computations derived from them) into forms suitable for efficient execution
110b57cec5SDimitry Andric // on the target.
120b57cec5SDimitry Andric //
130b57cec5SDimitry Andric // This pass performs a strength reduction on array references inside loops that
140b57cec5SDimitry Andric // have as one or more of their components the loop induction variable, it
150b57cec5SDimitry Andric // rewrites expressions to take advantage of scaled-index addressing modes
160b57cec5SDimitry Andric // available on the target, and it performs a variety of other optimizations
170b57cec5SDimitry Andric // related to loop induction variables.
180b57cec5SDimitry Andric //
190b57cec5SDimitry Andric // Terminology note: this code has a lot of handling for "post-increment" or
200b57cec5SDimitry Andric // "post-inc" users. This is not talking about post-increment addressing modes;
210b57cec5SDimitry Andric // it is instead talking about code like this:
220b57cec5SDimitry Andric //
230b57cec5SDimitry Andric //   %i = phi [ 0, %entry ], [ %i.next, %latch ]
240b57cec5SDimitry Andric //   ...
250b57cec5SDimitry Andric //   %i.next = add %i, 1
260b57cec5SDimitry Andric //   %c = icmp eq %i.next, %n
270b57cec5SDimitry Andric //
280b57cec5SDimitry Andric // The SCEV for %i is {0,+,1}<%L>. The SCEV for %i.next is {1,+,1}<%L>, however
290b57cec5SDimitry Andric // it's useful to think about these as the same register, with some uses using
300b57cec5SDimitry Andric // the value of the register before the add and some using it after. In this
310b57cec5SDimitry Andric // example, the icmp is a post-increment user, since it uses %i.next, which is
320b57cec5SDimitry Andric // the value of the induction variable after the increment. The other common
330b57cec5SDimitry Andric // case of post-increment users is users outside the loop.
340b57cec5SDimitry Andric //
350b57cec5SDimitry Andric // TODO: More sophistication in the way Formulae are generated and filtered.
360b57cec5SDimitry Andric //
370b57cec5SDimitry Andric // TODO: Handle multiple loops at a time.
380b57cec5SDimitry Andric //
390b57cec5SDimitry Andric // TODO: Should the addressing mode BaseGV be changed to a ConstantExpr instead
400b57cec5SDimitry Andric //       of a GlobalValue?
410b57cec5SDimitry Andric //
420b57cec5SDimitry Andric // TODO: When truncation is free, truncate ICmp users' operands to make it a
430b57cec5SDimitry Andric //       smaller encoding (on x86 at least).
440b57cec5SDimitry Andric //
450b57cec5SDimitry Andric // TODO: When a negated register is used by an add (such as in a list of
460b57cec5SDimitry Andric //       multiple base registers, or as the increment expression in an addrec),
470b57cec5SDimitry Andric //       we may not actually need both reg and (-1 * reg) in registers; the
480b57cec5SDimitry Andric //       negation can be implemented by using a sub instead of an add. The
490b57cec5SDimitry Andric //       lack of support for taking this into consideration when making
500b57cec5SDimitry Andric //       register pressure decisions is partly worked around by the "Special"
510b57cec5SDimitry Andric //       use kind.
520b57cec5SDimitry Andric //
530b57cec5SDimitry Andric //===----------------------------------------------------------------------===//
540b57cec5SDimitry Andric 
550b57cec5SDimitry Andric #include "llvm/Transforms/Scalar/LoopStrengthReduce.h"
560b57cec5SDimitry Andric #include "llvm/ADT/APInt.h"
570b57cec5SDimitry Andric #include "llvm/ADT/DenseMap.h"
580b57cec5SDimitry Andric #include "llvm/ADT/DenseSet.h"
590b57cec5SDimitry Andric #include "llvm/ADT/Hashing.h"
600b57cec5SDimitry Andric #include "llvm/ADT/PointerIntPair.h"
610b57cec5SDimitry Andric #include "llvm/ADT/STLExtras.h"
620b57cec5SDimitry Andric #include "llvm/ADT/SetVector.h"
630b57cec5SDimitry Andric #include "llvm/ADT/SmallBitVector.h"
640b57cec5SDimitry Andric #include "llvm/ADT/SmallPtrSet.h"
650b57cec5SDimitry Andric #include "llvm/ADT/SmallSet.h"
660b57cec5SDimitry Andric #include "llvm/ADT/SmallVector.h"
67bdd1243dSDimitry Andric #include "llvm/ADT/Statistic.h"
680b57cec5SDimitry Andric #include "llvm/ADT/iterator_range.h"
695ffd83dbSDimitry Andric #include "llvm/Analysis/AssumptionCache.h"
705f757f3fSDimitry Andric #include "llvm/Analysis/DomTreeUpdater.h"
710b57cec5SDimitry Andric #include "llvm/Analysis/IVUsers.h"
720b57cec5SDimitry Andric #include "llvm/Analysis/LoopAnalysisManager.h"
730b57cec5SDimitry Andric #include "llvm/Analysis/LoopInfo.h"
740b57cec5SDimitry Andric #include "llvm/Analysis/LoopPass.h"
755ffd83dbSDimitry Andric #include "llvm/Analysis/MemorySSA.h"
765ffd83dbSDimitry Andric #include "llvm/Analysis/MemorySSAUpdater.h"
770b57cec5SDimitry Andric #include "llvm/Analysis/ScalarEvolution.h"
780b57cec5SDimitry Andric #include "llvm/Analysis/ScalarEvolutionExpressions.h"
790b57cec5SDimitry Andric #include "llvm/Analysis/ScalarEvolutionNormalization.h"
80e8d8bef9SDimitry Andric #include "llvm/Analysis/TargetLibraryInfo.h"
810b57cec5SDimitry Andric #include "llvm/Analysis/TargetTransformInfo.h"
82fe6060f1SDimitry Andric #include "llvm/Analysis/ValueTracking.h"
8381ad6265SDimitry Andric #include "llvm/BinaryFormat/Dwarf.h"
840b57cec5SDimitry Andric #include "llvm/Config/llvm-config.h"
850b57cec5SDimitry Andric #include "llvm/IR/BasicBlock.h"
860b57cec5SDimitry Andric #include "llvm/IR/Constant.h"
870b57cec5SDimitry Andric #include "llvm/IR/Constants.h"
88e8d8bef9SDimitry Andric #include "llvm/IR/DebugInfoMetadata.h"
890b57cec5SDimitry Andric #include "llvm/IR/DerivedTypes.h"
900b57cec5SDimitry Andric #include "llvm/IR/Dominators.h"
910b57cec5SDimitry Andric #include "llvm/IR/GlobalValue.h"
920b57cec5SDimitry Andric #include "llvm/IR/IRBuilder.h"
930b57cec5SDimitry Andric #include "llvm/IR/InstrTypes.h"
940b57cec5SDimitry Andric #include "llvm/IR/Instruction.h"
950b57cec5SDimitry Andric #include "llvm/IR/Instructions.h"
960b57cec5SDimitry Andric #include "llvm/IR/IntrinsicInst.h"
970b57cec5SDimitry Andric #include "llvm/IR/Module.h"
980b57cec5SDimitry Andric #include "llvm/IR/Operator.h"
990b57cec5SDimitry Andric #include "llvm/IR/PassManager.h"
1000b57cec5SDimitry Andric #include "llvm/IR/Type.h"
1010b57cec5SDimitry Andric #include "llvm/IR/Use.h"
1020b57cec5SDimitry Andric #include "llvm/IR/User.h"
1030b57cec5SDimitry Andric #include "llvm/IR/Value.h"
1040b57cec5SDimitry Andric #include "llvm/IR/ValueHandle.h"
105480093f4SDimitry Andric #include "llvm/InitializePasses.h"
1060b57cec5SDimitry Andric #include "llvm/Pass.h"
1070b57cec5SDimitry Andric #include "llvm/Support/Casting.h"
1080b57cec5SDimitry Andric #include "llvm/Support/CommandLine.h"
1090b57cec5SDimitry Andric #include "llvm/Support/Compiler.h"
1100b57cec5SDimitry Andric #include "llvm/Support/Debug.h"
1110b57cec5SDimitry Andric #include "llvm/Support/ErrorHandling.h"
1120b57cec5SDimitry Andric #include "llvm/Support/MathExtras.h"
1130b57cec5SDimitry Andric #include "llvm/Support/raw_ostream.h"
1140b57cec5SDimitry Andric #include "llvm/Transforms/Scalar.h"
1150b57cec5SDimitry Andric #include "llvm/Transforms/Utils.h"
1160b57cec5SDimitry Andric #include "llvm/Transforms/Utils/BasicBlockUtils.h"
117480093f4SDimitry Andric #include "llvm/Transforms/Utils/Local.h"
11881ad6265SDimitry Andric #include "llvm/Transforms/Utils/LoopUtils.h"
1195ffd83dbSDimitry Andric #include "llvm/Transforms/Utils/ScalarEvolutionExpander.h"
1200b57cec5SDimitry Andric #include <algorithm>
1210b57cec5SDimitry Andric #include <cassert>
1220b57cec5SDimitry Andric #include <cstddef>
1230b57cec5SDimitry Andric #include <cstdint>
1240b57cec5SDimitry Andric #include <iterator>
1250b57cec5SDimitry Andric #include <limits>
1260b57cec5SDimitry Andric #include <map>
127480093f4SDimitry Andric #include <numeric>
128bdd1243dSDimitry Andric #include <optional>
1290b57cec5SDimitry Andric #include <utility>
1300b57cec5SDimitry Andric 
1310b57cec5SDimitry Andric using namespace llvm;
1320b57cec5SDimitry Andric 
1330b57cec5SDimitry Andric #define DEBUG_TYPE "loop-reduce"
1340b57cec5SDimitry Andric 
1350b57cec5SDimitry Andric /// MaxIVUsers is an arbitrary threshold that provides an early opportunity for
1360b57cec5SDimitry Andric /// bail out. This threshold is far beyond the number of users that LSR can
1370b57cec5SDimitry Andric /// conceivably solve, so it should not affect generated code, but catches the
1380b57cec5SDimitry Andric /// worst cases before LSR burns too much compile time and stack space.
1390b57cec5SDimitry Andric static const unsigned MaxIVUsers = 200;
1400b57cec5SDimitry Andric 
141349cc55cSDimitry Andric /// Limit the size of expression that SCEV-based salvaging will attempt to
142349cc55cSDimitry Andric /// translate into a DIExpression.
143349cc55cSDimitry Andric /// Choose a maximum size such that debuginfo is not excessively increased and
144349cc55cSDimitry Andric /// the salvaging is not too expensive for the compiler.
145349cc55cSDimitry Andric static const unsigned MaxSCEVSalvageExpressionSize = 64;
146349cc55cSDimitry Andric 
14781ad6265SDimitry Andric // Cleanup congruent phis after LSR phi expansion.
1480b57cec5SDimitry Andric static cl::opt<bool> EnablePhiElim(
1490b57cec5SDimitry Andric   "enable-lsr-phielim", cl::Hidden, cl::init(true),
1500b57cec5SDimitry Andric   cl::desc("Enable LSR phi elimination"));
1510b57cec5SDimitry Andric 
152bdd1243dSDimitry Andric // The flag adds instruction count to solutions cost comparison.
1530b57cec5SDimitry Andric static cl::opt<bool> InsnsCost(
1540b57cec5SDimitry Andric   "lsr-insns-cost", cl::Hidden, cl::init(true),
1550b57cec5SDimitry Andric   cl::desc("Add instruction count to a LSR cost model"));
1560b57cec5SDimitry Andric 
1570b57cec5SDimitry Andric // Flag to choose how to narrow complex lsr solution
1580b57cec5SDimitry Andric static cl::opt<bool> LSRExpNarrow(
1590b57cec5SDimitry Andric   "lsr-exp-narrow", cl::Hidden, cl::init(false),
1600b57cec5SDimitry Andric   cl::desc("Narrow LSR complex solution using"
1610b57cec5SDimitry Andric            " expectation of registers number"));
1620b57cec5SDimitry Andric 
1630b57cec5SDimitry Andric // Flag to narrow search space by filtering non-optimal formulae with
1640b57cec5SDimitry Andric // the same ScaledReg and Scale.
1650b57cec5SDimitry Andric static cl::opt<bool> FilterSameScaledReg(
1660b57cec5SDimitry Andric     "lsr-filter-same-scaled-reg", cl::Hidden, cl::init(true),
1670b57cec5SDimitry Andric     cl::desc("Narrow LSR search space by filtering non-optimal formulae"
1680b57cec5SDimitry Andric              " with the same ScaledReg and Scale"));
1690b57cec5SDimitry Andric 
170fe6060f1SDimitry Andric static cl::opt<TTI::AddressingModeKind> PreferredAddresingMode(
171fe6060f1SDimitry Andric   "lsr-preferred-addressing-mode", cl::Hidden, cl::init(TTI::AMK_None),
172fe6060f1SDimitry Andric    cl::desc("A flag that overrides the target's preferred addressing mode."),
173fe6060f1SDimitry Andric    cl::values(clEnumValN(TTI::AMK_None,
174fe6060f1SDimitry Andric                          "none",
175fe6060f1SDimitry Andric                          "Don't prefer any addressing mode"),
176fe6060f1SDimitry Andric               clEnumValN(TTI::AMK_PreIndexed,
177fe6060f1SDimitry Andric                          "preindexed",
178fe6060f1SDimitry Andric                          "Prefer pre-indexed addressing mode"),
179fe6060f1SDimitry Andric               clEnumValN(TTI::AMK_PostIndexed,
180fe6060f1SDimitry Andric                          "postindexed",
181fe6060f1SDimitry Andric                          "Prefer post-indexed addressing mode")));
1820b57cec5SDimitry Andric 
1830b57cec5SDimitry Andric static cl::opt<unsigned> ComplexityLimit(
1840b57cec5SDimitry Andric   "lsr-complexity-limit", cl::Hidden,
1850b57cec5SDimitry Andric   cl::init(std::numeric_limits<uint16_t>::max()),
1860b57cec5SDimitry Andric   cl::desc("LSR search space complexity limit"));
1870b57cec5SDimitry Andric 
1880b57cec5SDimitry Andric static cl::opt<unsigned> SetupCostDepthLimit(
1890b57cec5SDimitry Andric     "lsr-setupcost-depth-limit", cl::Hidden, cl::init(7),
1900b57cec5SDimitry Andric     cl::desc("The limit on recursion depth for LSRs setup cost"));
1910b57cec5SDimitry Andric 
1925f757f3fSDimitry Andric static cl::opt<cl::boolOrDefault> AllowTerminatingConditionFoldingAfterLSR(
1935f757f3fSDimitry Andric     "lsr-term-fold", cl::Hidden,
194bdd1243dSDimitry Andric     cl::desc("Attempt to replace primary IV with other IV."));
195bdd1243dSDimitry Andric 
1960fca6ea1SDimitry Andric static cl::opt<cl::boolOrDefault> AllowDropSolutionIfLessProfitable(
1970fca6ea1SDimitry Andric     "lsr-drop-solution", cl::Hidden,
198bdd1243dSDimitry Andric     cl::desc("Attempt to drop solution if it is less profitable"));
199bdd1243dSDimitry Andric 
2000fca6ea1SDimitry Andric static cl::opt<bool> EnableVScaleImmediates(
2010fca6ea1SDimitry Andric     "lsr-enable-vscale-immediates", cl::Hidden, cl::init(true),
2020fca6ea1SDimitry Andric     cl::desc("Enable analysis of vscale-relative immediates in LSR"));
2030fca6ea1SDimitry Andric 
2040fca6ea1SDimitry Andric static cl::opt<bool> DropScaledForVScale(
2050fca6ea1SDimitry Andric     "lsr-drop-scaled-reg-for-vscale", cl::Hidden, cl::init(true),
2060fca6ea1SDimitry Andric     cl::desc("Avoid using scaled registers with vscale-relative addressing"));
2070fca6ea1SDimitry Andric 
208bdd1243dSDimitry Andric STATISTIC(NumTermFold,
209bdd1243dSDimitry Andric           "Number of terminating condition fold recognized and performed");
210bdd1243dSDimitry Andric 
2110b57cec5SDimitry Andric #ifndef NDEBUG
2120b57cec5SDimitry Andric // Stress test IV chain generation.
2130b57cec5SDimitry Andric static cl::opt<bool> StressIVChain(
2140b57cec5SDimitry Andric   "stress-ivchain", cl::Hidden, cl::init(false),
2150b57cec5SDimitry Andric   cl::desc("Stress test LSR IV chains"));
2160b57cec5SDimitry Andric #else
2170b57cec5SDimitry Andric static bool StressIVChain = false;
2180b57cec5SDimitry Andric #endif
2190b57cec5SDimitry Andric 
2200b57cec5SDimitry Andric namespace {
2210b57cec5SDimitry Andric 
2220b57cec5SDimitry Andric struct MemAccessTy {
2230b57cec5SDimitry Andric   /// Used in situations where the accessed memory type is unknown.
2240b57cec5SDimitry Andric   static const unsigned UnknownAddressSpace =
2250b57cec5SDimitry Andric       std::numeric_limits<unsigned>::max();
2260b57cec5SDimitry Andric 
2270b57cec5SDimitry Andric   Type *MemTy = nullptr;
2280b57cec5SDimitry Andric   unsigned AddrSpace = UnknownAddressSpace;
2290b57cec5SDimitry Andric 
2300b57cec5SDimitry Andric   MemAccessTy() = default;
2310b57cec5SDimitry Andric   MemAccessTy(Type *Ty, unsigned AS) : MemTy(Ty), AddrSpace(AS) {}
2320b57cec5SDimitry Andric 
2330b57cec5SDimitry Andric   bool operator==(MemAccessTy Other) const {
2340b57cec5SDimitry Andric     return MemTy == Other.MemTy && AddrSpace == Other.AddrSpace;
2350b57cec5SDimitry Andric   }
2360b57cec5SDimitry Andric 
2370b57cec5SDimitry Andric   bool operator!=(MemAccessTy Other) const { return !(*this == Other); }
2380b57cec5SDimitry Andric 
2390b57cec5SDimitry Andric   static MemAccessTy getUnknown(LLVMContext &Ctx,
2400b57cec5SDimitry Andric                                 unsigned AS = UnknownAddressSpace) {
2410b57cec5SDimitry Andric     return MemAccessTy(Type::getVoidTy(Ctx), AS);
2420b57cec5SDimitry Andric   }
2430b57cec5SDimitry Andric 
2440b57cec5SDimitry Andric   Type *getType() { return MemTy; }
2450b57cec5SDimitry Andric };
2460b57cec5SDimitry Andric 
2470b57cec5SDimitry Andric /// This class holds data which is used to order reuse candidates.
2480b57cec5SDimitry Andric class RegSortData {
2490b57cec5SDimitry Andric public:
2500b57cec5SDimitry Andric   /// This represents the set of LSRUse indices which reference
2510b57cec5SDimitry Andric   /// a particular register.
2520b57cec5SDimitry Andric   SmallBitVector UsedByIndices;
2530b57cec5SDimitry Andric 
2540b57cec5SDimitry Andric   void print(raw_ostream &OS) const;
2550b57cec5SDimitry Andric   void dump() const;
2560b57cec5SDimitry Andric };
2570b57cec5SDimitry Andric 
2580fca6ea1SDimitry Andric // An offset from an address that is either scalable or fixed. Used for
2590fca6ea1SDimitry Andric // per-target optimizations of addressing modes.
2600fca6ea1SDimitry Andric class Immediate : public details::FixedOrScalableQuantity<Immediate, int64_t> {
2610fca6ea1SDimitry Andric   constexpr Immediate(ScalarTy MinVal, bool Scalable)
2620fca6ea1SDimitry Andric       : FixedOrScalableQuantity(MinVal, Scalable) {}
2630fca6ea1SDimitry Andric 
2640fca6ea1SDimitry Andric   constexpr Immediate(const FixedOrScalableQuantity<Immediate, int64_t> &V)
2650fca6ea1SDimitry Andric       : FixedOrScalableQuantity(V) {}
2660fca6ea1SDimitry Andric 
2670fca6ea1SDimitry Andric public:
2680fca6ea1SDimitry Andric   constexpr Immediate() = delete;
2690fca6ea1SDimitry Andric 
2700fca6ea1SDimitry Andric   static constexpr Immediate getFixed(ScalarTy MinVal) {
2710fca6ea1SDimitry Andric     return {MinVal, false};
2720fca6ea1SDimitry Andric   }
2730fca6ea1SDimitry Andric   static constexpr Immediate getScalable(ScalarTy MinVal) {
2740fca6ea1SDimitry Andric     return {MinVal, true};
2750fca6ea1SDimitry Andric   }
2760fca6ea1SDimitry Andric   static constexpr Immediate get(ScalarTy MinVal, bool Scalable) {
2770fca6ea1SDimitry Andric     return {MinVal, Scalable};
2780fca6ea1SDimitry Andric   }
2790fca6ea1SDimitry Andric   static constexpr Immediate getZero() { return {0, false}; }
2800fca6ea1SDimitry Andric   static constexpr Immediate getFixedMin() {
2810fca6ea1SDimitry Andric     return {std::numeric_limits<int64_t>::min(), false};
2820fca6ea1SDimitry Andric   }
2830fca6ea1SDimitry Andric   static constexpr Immediate getFixedMax() {
2840fca6ea1SDimitry Andric     return {std::numeric_limits<int64_t>::max(), false};
2850fca6ea1SDimitry Andric   }
2860fca6ea1SDimitry Andric   static constexpr Immediate getScalableMin() {
2870fca6ea1SDimitry Andric     return {std::numeric_limits<int64_t>::min(), true};
2880fca6ea1SDimitry Andric   }
2890fca6ea1SDimitry Andric   static constexpr Immediate getScalableMax() {
2900fca6ea1SDimitry Andric     return {std::numeric_limits<int64_t>::max(), true};
2910fca6ea1SDimitry Andric   }
2920fca6ea1SDimitry Andric 
2930fca6ea1SDimitry Andric   constexpr bool isLessThanZero() const { return Quantity < 0; }
2940fca6ea1SDimitry Andric 
2950fca6ea1SDimitry Andric   constexpr bool isGreaterThanZero() const { return Quantity > 0; }
2960fca6ea1SDimitry Andric 
2970fca6ea1SDimitry Andric   constexpr bool isCompatibleImmediate(const Immediate &Imm) const {
2980fca6ea1SDimitry Andric     return isZero() || Imm.isZero() || Imm.Scalable == Scalable;
2990fca6ea1SDimitry Andric   }
3000fca6ea1SDimitry Andric 
3010fca6ea1SDimitry Andric   constexpr bool isMin() const {
3020fca6ea1SDimitry Andric     return Quantity == std::numeric_limits<ScalarTy>::min();
3030fca6ea1SDimitry Andric   }
3040fca6ea1SDimitry Andric 
3050fca6ea1SDimitry Andric   constexpr bool isMax() const {
3060fca6ea1SDimitry Andric     return Quantity == std::numeric_limits<ScalarTy>::max();
3070fca6ea1SDimitry Andric   }
3080fca6ea1SDimitry Andric 
3090fca6ea1SDimitry Andric   // Arithmetic 'operators' that cast to unsigned types first.
3100fca6ea1SDimitry Andric   constexpr Immediate addUnsigned(const Immediate &RHS) const {
3110fca6ea1SDimitry Andric     assert(isCompatibleImmediate(RHS) && "Incompatible Immediates");
3120fca6ea1SDimitry Andric     ScalarTy Value = (uint64_t)Quantity + RHS.getKnownMinValue();
3130fca6ea1SDimitry Andric     return {Value, Scalable || RHS.isScalable()};
3140fca6ea1SDimitry Andric   }
3150fca6ea1SDimitry Andric 
3160fca6ea1SDimitry Andric   constexpr Immediate subUnsigned(const Immediate &RHS) const {
3170fca6ea1SDimitry Andric     assert(isCompatibleImmediate(RHS) && "Incompatible Immediates");
3180fca6ea1SDimitry Andric     ScalarTy Value = (uint64_t)Quantity - RHS.getKnownMinValue();
3190fca6ea1SDimitry Andric     return {Value, Scalable || RHS.isScalable()};
3200fca6ea1SDimitry Andric   }
3210fca6ea1SDimitry Andric 
3220fca6ea1SDimitry Andric   // Scale the quantity by a constant without caring about runtime scalability.
3230fca6ea1SDimitry Andric   constexpr Immediate mulUnsigned(const ScalarTy RHS) const {
3240fca6ea1SDimitry Andric     ScalarTy Value = (uint64_t)Quantity * RHS;
3250fca6ea1SDimitry Andric     return {Value, Scalable};
3260fca6ea1SDimitry Andric   }
3270fca6ea1SDimitry Andric 
3280fca6ea1SDimitry Andric   // Helpers for generating SCEVs with vscale terms where needed.
3290fca6ea1SDimitry Andric   const SCEV *getSCEV(ScalarEvolution &SE, Type *Ty) const {
3300fca6ea1SDimitry Andric     const SCEV *S = SE.getConstant(Ty, Quantity);
3310fca6ea1SDimitry Andric     if (Scalable)
3320fca6ea1SDimitry Andric       S = SE.getMulExpr(S, SE.getVScale(S->getType()));
3330fca6ea1SDimitry Andric     return S;
3340fca6ea1SDimitry Andric   }
3350fca6ea1SDimitry Andric 
3360fca6ea1SDimitry Andric   const SCEV *getNegativeSCEV(ScalarEvolution &SE, Type *Ty) const {
3370fca6ea1SDimitry Andric     const SCEV *NegS = SE.getConstant(Ty, -(uint64_t)Quantity);
3380fca6ea1SDimitry Andric     if (Scalable)
3390fca6ea1SDimitry Andric       NegS = SE.getMulExpr(NegS, SE.getVScale(NegS->getType()));
3400fca6ea1SDimitry Andric     return NegS;
3410fca6ea1SDimitry Andric   }
3420fca6ea1SDimitry Andric 
3430fca6ea1SDimitry Andric   const SCEV *getUnknownSCEV(ScalarEvolution &SE, Type *Ty) const {
3440fca6ea1SDimitry Andric     const SCEV *SU = SE.getUnknown(ConstantInt::getSigned(Ty, Quantity));
3450fca6ea1SDimitry Andric     if (Scalable)
3460fca6ea1SDimitry Andric       SU = SE.getMulExpr(SU, SE.getVScale(SU->getType()));
3470fca6ea1SDimitry Andric     return SU;
3480fca6ea1SDimitry Andric   }
3490fca6ea1SDimitry Andric };
3500fca6ea1SDimitry Andric 
3510fca6ea1SDimitry Andric // This is needed for the Compare type of std::map when Immediate is used
3520fca6ea1SDimitry Andric // as a key. We don't need it to be fully correct against any value of vscale,
3530fca6ea1SDimitry Andric // just to make sure that vscale-related terms in the map are considered against
3540fca6ea1SDimitry Andric // each other rather than being mixed up and potentially missing opportunities.
3550fca6ea1SDimitry Andric struct KeyOrderTargetImmediate {
3560fca6ea1SDimitry Andric   bool operator()(const Immediate &LHS, const Immediate &RHS) const {
3570fca6ea1SDimitry Andric     if (LHS.isScalable() && !RHS.isScalable())
3580fca6ea1SDimitry Andric       return false;
3590fca6ea1SDimitry Andric     if (!LHS.isScalable() && RHS.isScalable())
3600fca6ea1SDimitry Andric       return true;
3610fca6ea1SDimitry Andric     return LHS.getKnownMinValue() < RHS.getKnownMinValue();
3620fca6ea1SDimitry Andric   }
3630fca6ea1SDimitry Andric };
3640fca6ea1SDimitry Andric 
3650fca6ea1SDimitry Andric // This would be nicer if we could be generic instead of directly using size_t,
3660fca6ea1SDimitry Andric // but there doesn't seem to be a type trait for is_orderable or
3670fca6ea1SDimitry Andric // is_lessthan_comparable or similar.
3680fca6ea1SDimitry Andric struct KeyOrderSizeTAndImmediate {
3690fca6ea1SDimitry Andric   bool operator()(const std::pair<size_t, Immediate> &LHS,
3700fca6ea1SDimitry Andric                   const std::pair<size_t, Immediate> &RHS) const {
3710fca6ea1SDimitry Andric     size_t LSize = LHS.first;
3720fca6ea1SDimitry Andric     size_t RSize = RHS.first;
3730fca6ea1SDimitry Andric     if (LSize != RSize)
3740fca6ea1SDimitry Andric       return LSize < RSize;
3750fca6ea1SDimitry Andric     return KeyOrderTargetImmediate()(LHS.second, RHS.second);
3760fca6ea1SDimitry Andric   }
3770fca6ea1SDimitry Andric };
3780b57cec5SDimitry Andric } // end anonymous namespace
3790b57cec5SDimitry Andric 
3800b57cec5SDimitry Andric #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
3810b57cec5SDimitry Andric void RegSortData::print(raw_ostream &OS) const {
3820b57cec5SDimitry Andric   OS << "[NumUses=" << UsedByIndices.count() << ']';
3830b57cec5SDimitry Andric }
3840b57cec5SDimitry Andric 
3850b57cec5SDimitry Andric LLVM_DUMP_METHOD void RegSortData::dump() const {
3860b57cec5SDimitry Andric   print(errs()); errs() << '\n';
3870b57cec5SDimitry Andric }
3880b57cec5SDimitry Andric #endif
3890b57cec5SDimitry Andric 
3900b57cec5SDimitry Andric namespace {
3910b57cec5SDimitry Andric 
3920b57cec5SDimitry Andric /// Map register candidates to information about how they are used.
3930b57cec5SDimitry Andric class RegUseTracker {
3940b57cec5SDimitry Andric   using RegUsesTy = DenseMap<const SCEV *, RegSortData>;
3950b57cec5SDimitry Andric 
3960b57cec5SDimitry Andric   RegUsesTy RegUsesMap;
3970b57cec5SDimitry Andric   SmallVector<const SCEV *, 16> RegSequence;
3980b57cec5SDimitry Andric 
3990b57cec5SDimitry Andric public:
4000b57cec5SDimitry Andric   void countRegister(const SCEV *Reg, size_t LUIdx);
4010b57cec5SDimitry Andric   void dropRegister(const SCEV *Reg, size_t LUIdx);
4020b57cec5SDimitry Andric   void swapAndDropUse(size_t LUIdx, size_t LastLUIdx);
4030b57cec5SDimitry Andric 
4040b57cec5SDimitry Andric   bool isRegUsedByUsesOtherThan(const SCEV *Reg, size_t LUIdx) const;
4050b57cec5SDimitry Andric 
4060b57cec5SDimitry Andric   const SmallBitVector &getUsedByIndices(const SCEV *Reg) const;
4070b57cec5SDimitry Andric 
4080b57cec5SDimitry Andric   void clear();
4090b57cec5SDimitry Andric 
4100b57cec5SDimitry Andric   using iterator = SmallVectorImpl<const SCEV *>::iterator;
4110b57cec5SDimitry Andric   using const_iterator = SmallVectorImpl<const SCEV *>::const_iterator;
4120b57cec5SDimitry Andric 
4130b57cec5SDimitry Andric   iterator begin() { return RegSequence.begin(); }
4140b57cec5SDimitry Andric   iterator end()   { return RegSequence.end(); }
4150b57cec5SDimitry Andric   const_iterator begin() const { return RegSequence.begin(); }
4160b57cec5SDimitry Andric   const_iterator end() const   { return RegSequence.end(); }
4170b57cec5SDimitry Andric };
4180b57cec5SDimitry Andric 
4190b57cec5SDimitry Andric } // end anonymous namespace
4200b57cec5SDimitry Andric 
4210b57cec5SDimitry Andric void
4220b57cec5SDimitry Andric RegUseTracker::countRegister(const SCEV *Reg, size_t LUIdx) {
4230b57cec5SDimitry Andric   std::pair<RegUsesTy::iterator, bool> Pair =
4240b57cec5SDimitry Andric     RegUsesMap.insert(std::make_pair(Reg, RegSortData()));
4250b57cec5SDimitry Andric   RegSortData &RSD = Pair.first->second;
4260b57cec5SDimitry Andric   if (Pair.second)
4270b57cec5SDimitry Andric     RegSequence.push_back(Reg);
4280b57cec5SDimitry Andric   RSD.UsedByIndices.resize(std::max(RSD.UsedByIndices.size(), LUIdx + 1));
4290b57cec5SDimitry Andric   RSD.UsedByIndices.set(LUIdx);
4300b57cec5SDimitry Andric }
4310b57cec5SDimitry Andric 
4320b57cec5SDimitry Andric void
4330b57cec5SDimitry Andric RegUseTracker::dropRegister(const SCEV *Reg, size_t LUIdx) {
4340b57cec5SDimitry Andric   RegUsesTy::iterator It = RegUsesMap.find(Reg);
4350b57cec5SDimitry Andric   assert(It != RegUsesMap.end());
4360b57cec5SDimitry Andric   RegSortData &RSD = It->second;
4370b57cec5SDimitry Andric   assert(RSD.UsedByIndices.size() > LUIdx);
4380b57cec5SDimitry Andric   RSD.UsedByIndices.reset(LUIdx);
4390b57cec5SDimitry Andric }
4400b57cec5SDimitry Andric 
4410b57cec5SDimitry Andric void
4420b57cec5SDimitry Andric RegUseTracker::swapAndDropUse(size_t LUIdx, size_t LastLUIdx) {
4430b57cec5SDimitry Andric   assert(LUIdx <= LastLUIdx);
4440b57cec5SDimitry Andric 
4450b57cec5SDimitry Andric   // Update RegUses. The data structure is not optimized for this purpose;
4460b57cec5SDimitry Andric   // we must iterate through it and update each of the bit vectors.
4470b57cec5SDimitry Andric   for (auto &Pair : RegUsesMap) {
4480b57cec5SDimitry Andric     SmallBitVector &UsedByIndices = Pair.second.UsedByIndices;
4490b57cec5SDimitry Andric     if (LUIdx < UsedByIndices.size())
4500b57cec5SDimitry Andric       UsedByIndices[LUIdx] =
4510b57cec5SDimitry Andric         LastLUIdx < UsedByIndices.size() ? UsedByIndices[LastLUIdx] : false;
4520b57cec5SDimitry Andric     UsedByIndices.resize(std::min(UsedByIndices.size(), LastLUIdx));
4530b57cec5SDimitry Andric   }
4540b57cec5SDimitry Andric }
4550b57cec5SDimitry Andric 
4560b57cec5SDimitry Andric bool
4570b57cec5SDimitry Andric RegUseTracker::isRegUsedByUsesOtherThan(const SCEV *Reg, size_t LUIdx) const {
4580b57cec5SDimitry Andric   RegUsesTy::const_iterator I = RegUsesMap.find(Reg);
4590b57cec5SDimitry Andric   if (I == RegUsesMap.end())
4600b57cec5SDimitry Andric     return false;
4610b57cec5SDimitry Andric   const SmallBitVector &UsedByIndices = I->second.UsedByIndices;
4620b57cec5SDimitry Andric   int i = UsedByIndices.find_first();
4630b57cec5SDimitry Andric   if (i == -1) return false;
4640b57cec5SDimitry Andric   if ((size_t)i != LUIdx) return true;
4650b57cec5SDimitry Andric   return UsedByIndices.find_next(i) != -1;
4660b57cec5SDimitry Andric }
4670b57cec5SDimitry Andric 
4680b57cec5SDimitry Andric const SmallBitVector &RegUseTracker::getUsedByIndices(const SCEV *Reg) const {
4690b57cec5SDimitry Andric   RegUsesTy::const_iterator I = RegUsesMap.find(Reg);
4700b57cec5SDimitry Andric   assert(I != RegUsesMap.end() && "Unknown register!");
4710b57cec5SDimitry Andric   return I->second.UsedByIndices;
4720b57cec5SDimitry Andric }
4730b57cec5SDimitry Andric 
4740b57cec5SDimitry Andric void RegUseTracker::clear() {
4750b57cec5SDimitry Andric   RegUsesMap.clear();
4760b57cec5SDimitry Andric   RegSequence.clear();
4770b57cec5SDimitry Andric }
4780b57cec5SDimitry Andric 
4790b57cec5SDimitry Andric namespace {
4800b57cec5SDimitry Andric 
4810b57cec5SDimitry Andric /// This class holds information that describes a formula for computing
4820b57cec5SDimitry Andric /// satisfying a use. It may include broken-out immediates and scaled registers.
4830b57cec5SDimitry Andric struct Formula {
4840b57cec5SDimitry Andric   /// Global base address used for complex addressing.
4850b57cec5SDimitry Andric   GlobalValue *BaseGV = nullptr;
4860b57cec5SDimitry Andric 
4870b57cec5SDimitry Andric   /// Base offset for complex addressing.
4880fca6ea1SDimitry Andric   Immediate BaseOffset = Immediate::getZero();
4890b57cec5SDimitry Andric 
4900b57cec5SDimitry Andric   /// Whether any complex addressing has a base register.
4910b57cec5SDimitry Andric   bool HasBaseReg = false;
4920b57cec5SDimitry Andric 
4930b57cec5SDimitry Andric   /// The scale of any complex addressing.
4940b57cec5SDimitry Andric   int64_t Scale = 0;
4950b57cec5SDimitry Andric 
4960b57cec5SDimitry Andric   /// The list of "base" registers for this use. When this is non-empty. The
4970b57cec5SDimitry Andric   /// canonical representation of a formula is
4980b57cec5SDimitry Andric   /// 1. BaseRegs.size > 1 implies ScaledReg != NULL and
4990b57cec5SDimitry Andric   /// 2. ScaledReg != NULL implies Scale != 1 || !BaseRegs.empty().
5000b57cec5SDimitry Andric   /// 3. The reg containing recurrent expr related with currect loop in the
5010b57cec5SDimitry Andric   /// formula should be put in the ScaledReg.
5020b57cec5SDimitry Andric   /// #1 enforces that the scaled register is always used when at least two
5030b57cec5SDimitry Andric   /// registers are needed by the formula: e.g., reg1 + reg2 is reg1 + 1 * reg2.
5040b57cec5SDimitry Andric   /// #2 enforces that 1 * reg is reg.
5050b57cec5SDimitry Andric   /// #3 ensures invariant regs with respect to current loop can be combined
5060b57cec5SDimitry Andric   /// together in LSR codegen.
5070b57cec5SDimitry Andric   /// This invariant can be temporarily broken while building a formula.
5080b57cec5SDimitry Andric   /// However, every formula inserted into the LSRInstance must be in canonical
5090b57cec5SDimitry Andric   /// form.
5100b57cec5SDimitry Andric   SmallVector<const SCEV *, 4> BaseRegs;
5110b57cec5SDimitry Andric 
5120b57cec5SDimitry Andric   /// The 'scaled' register for this use. This should be non-null when Scale is
5130b57cec5SDimitry Andric   /// not zero.
5140b57cec5SDimitry Andric   const SCEV *ScaledReg = nullptr;
5150b57cec5SDimitry Andric 
5160b57cec5SDimitry Andric   /// An additional constant offset which added near the use. This requires a
5170b57cec5SDimitry Andric   /// temporary register, but the offset itself can live in an add immediate
5180b57cec5SDimitry Andric   /// field rather than a register.
5190fca6ea1SDimitry Andric   Immediate UnfoldedOffset = Immediate::getZero();
5200b57cec5SDimitry Andric 
5210b57cec5SDimitry Andric   Formula() = default;
5220b57cec5SDimitry Andric 
5230b57cec5SDimitry Andric   void initialMatch(const SCEV *S, Loop *L, ScalarEvolution &SE);
5240b57cec5SDimitry Andric 
5250b57cec5SDimitry Andric   bool isCanonical(const Loop &L) const;
5260b57cec5SDimitry Andric 
5270b57cec5SDimitry Andric   void canonicalize(const Loop &L);
5280b57cec5SDimitry Andric 
5290b57cec5SDimitry Andric   bool unscale();
5300b57cec5SDimitry Andric 
5310b57cec5SDimitry Andric   bool hasZeroEnd() const;
5320b57cec5SDimitry Andric 
5330b57cec5SDimitry Andric   size_t getNumRegs() const;
5340b57cec5SDimitry Andric   Type *getType() const;
5350b57cec5SDimitry Andric 
5360b57cec5SDimitry Andric   void deleteBaseReg(const SCEV *&S);
5370b57cec5SDimitry Andric 
5380b57cec5SDimitry Andric   bool referencesReg(const SCEV *S) const;
5390b57cec5SDimitry Andric   bool hasRegsUsedByUsesOtherThan(size_t LUIdx,
5400b57cec5SDimitry Andric                                   const RegUseTracker &RegUses) const;
5410b57cec5SDimitry Andric 
5420b57cec5SDimitry Andric   void print(raw_ostream &OS) const;
5430b57cec5SDimitry Andric   void dump() const;
5440b57cec5SDimitry Andric };
5450b57cec5SDimitry Andric 
5460b57cec5SDimitry Andric } // end anonymous namespace
5470b57cec5SDimitry Andric 
5480b57cec5SDimitry Andric /// Recursion helper for initialMatch.
5490b57cec5SDimitry Andric static void DoInitialMatch(const SCEV *S, Loop *L,
5500b57cec5SDimitry Andric                            SmallVectorImpl<const SCEV *> &Good,
5510b57cec5SDimitry Andric                            SmallVectorImpl<const SCEV *> &Bad,
5520b57cec5SDimitry Andric                            ScalarEvolution &SE) {
5530b57cec5SDimitry Andric   // Collect expressions which properly dominate the loop header.
5540b57cec5SDimitry Andric   if (SE.properlyDominates(S, L->getHeader())) {
5550b57cec5SDimitry Andric     Good.push_back(S);
5560b57cec5SDimitry Andric     return;
5570b57cec5SDimitry Andric   }
5580b57cec5SDimitry Andric 
5590b57cec5SDimitry Andric   // Look at add operands.
5600b57cec5SDimitry Andric   if (const SCEVAddExpr *Add = dyn_cast<SCEVAddExpr>(S)) {
5610b57cec5SDimitry Andric     for (const SCEV *S : Add->operands())
5620b57cec5SDimitry Andric       DoInitialMatch(S, L, Good, Bad, SE);
5630b57cec5SDimitry Andric     return;
5640b57cec5SDimitry Andric   }
5650b57cec5SDimitry Andric 
5660b57cec5SDimitry Andric   // Look at addrec operands.
5670b57cec5SDimitry Andric   if (const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(S))
5680b57cec5SDimitry Andric     if (!AR->getStart()->isZero() && AR->isAffine()) {
5690b57cec5SDimitry Andric       DoInitialMatch(AR->getStart(), L, Good, Bad, SE);
5700b57cec5SDimitry Andric       DoInitialMatch(SE.getAddRecExpr(SE.getConstant(AR->getType(), 0),
5710b57cec5SDimitry Andric                                       AR->getStepRecurrence(SE),
5720b57cec5SDimitry Andric                                       // FIXME: AR->getNoWrapFlags()
5730b57cec5SDimitry Andric                                       AR->getLoop(), SCEV::FlagAnyWrap),
5740b57cec5SDimitry Andric                      L, Good, Bad, SE);
5750b57cec5SDimitry Andric       return;
5760b57cec5SDimitry Andric     }
5770b57cec5SDimitry Andric 
5780b57cec5SDimitry Andric   // Handle a multiplication by -1 (negation) if it didn't fold.
5790b57cec5SDimitry Andric   if (const SCEVMulExpr *Mul = dyn_cast<SCEVMulExpr>(S))
5800b57cec5SDimitry Andric     if (Mul->getOperand(0)->isAllOnesValue()) {
581e8d8bef9SDimitry Andric       SmallVector<const SCEV *, 4> Ops(drop_begin(Mul->operands()));
5820b57cec5SDimitry Andric       const SCEV *NewMul = SE.getMulExpr(Ops);
5830b57cec5SDimitry Andric 
5840b57cec5SDimitry Andric       SmallVector<const SCEV *, 4> MyGood;
5850b57cec5SDimitry Andric       SmallVector<const SCEV *, 4> MyBad;
5860b57cec5SDimitry Andric       DoInitialMatch(NewMul, L, MyGood, MyBad, SE);
5870b57cec5SDimitry Andric       const SCEV *NegOne = SE.getSCEV(ConstantInt::getAllOnesValue(
5880b57cec5SDimitry Andric         SE.getEffectiveSCEVType(NewMul->getType())));
5890b57cec5SDimitry Andric       for (const SCEV *S : MyGood)
5900b57cec5SDimitry Andric         Good.push_back(SE.getMulExpr(NegOne, S));
5910b57cec5SDimitry Andric       for (const SCEV *S : MyBad)
5920b57cec5SDimitry Andric         Bad.push_back(SE.getMulExpr(NegOne, S));
5930b57cec5SDimitry Andric       return;
5940b57cec5SDimitry Andric     }
5950b57cec5SDimitry Andric 
5960b57cec5SDimitry Andric   // Ok, we can't do anything interesting. Just stuff the whole thing into a
5970b57cec5SDimitry Andric   // register and hope for the best.
5980b57cec5SDimitry Andric   Bad.push_back(S);
5990b57cec5SDimitry Andric }
6000b57cec5SDimitry Andric 
6010b57cec5SDimitry Andric /// Incorporate loop-variant parts of S into this Formula, attempting to keep
6020b57cec5SDimitry Andric /// all loop-invariant and loop-computable values in a single base register.
6030b57cec5SDimitry Andric void Formula::initialMatch(const SCEV *S, Loop *L, ScalarEvolution &SE) {
6040b57cec5SDimitry Andric   SmallVector<const SCEV *, 4> Good;
6050b57cec5SDimitry Andric   SmallVector<const SCEV *, 4> Bad;
6060b57cec5SDimitry Andric   DoInitialMatch(S, L, Good, Bad, SE);
6070b57cec5SDimitry Andric   if (!Good.empty()) {
6080b57cec5SDimitry Andric     const SCEV *Sum = SE.getAddExpr(Good);
6090b57cec5SDimitry Andric     if (!Sum->isZero())
6100b57cec5SDimitry Andric       BaseRegs.push_back(Sum);
6110b57cec5SDimitry Andric     HasBaseReg = true;
6120b57cec5SDimitry Andric   }
6130b57cec5SDimitry Andric   if (!Bad.empty()) {
6140b57cec5SDimitry Andric     const SCEV *Sum = SE.getAddExpr(Bad);
6150b57cec5SDimitry Andric     if (!Sum->isZero())
6160b57cec5SDimitry Andric       BaseRegs.push_back(Sum);
6170b57cec5SDimitry Andric     HasBaseReg = true;
6180b57cec5SDimitry Andric   }
6190b57cec5SDimitry Andric   canonicalize(*L);
6200b57cec5SDimitry Andric }
6210b57cec5SDimitry Andric 
62281ad6265SDimitry Andric static bool containsAddRecDependentOnLoop(const SCEV *S, const Loop &L) {
62381ad6265SDimitry Andric   return SCEVExprContains(S, [&L](const SCEV *S) {
62481ad6265SDimitry Andric     return isa<SCEVAddRecExpr>(S) && (cast<SCEVAddRecExpr>(S)->getLoop() == &L);
62581ad6265SDimitry Andric   });
62681ad6265SDimitry Andric }
62781ad6265SDimitry Andric 
6280b57cec5SDimitry Andric /// Check whether or not this formula satisfies the canonical
6290b57cec5SDimitry Andric /// representation.
6300b57cec5SDimitry Andric /// \see Formula::BaseRegs.
6310b57cec5SDimitry Andric bool Formula::isCanonical(const Loop &L) const {
6320b57cec5SDimitry Andric   if (!ScaledReg)
6330b57cec5SDimitry Andric     return BaseRegs.size() <= 1;
6340b57cec5SDimitry Andric 
6350b57cec5SDimitry Andric   if (Scale != 1)
6360b57cec5SDimitry Andric     return true;
6370b57cec5SDimitry Andric 
6380b57cec5SDimitry Andric   if (Scale == 1 && BaseRegs.empty())
6390b57cec5SDimitry Andric     return false;
6400b57cec5SDimitry Andric 
64181ad6265SDimitry Andric   if (containsAddRecDependentOnLoop(ScaledReg, L))
6420b57cec5SDimitry Andric     return true;
6430b57cec5SDimitry Andric 
6440b57cec5SDimitry Andric   // If ScaledReg is not a recurrent expr, or it is but its loop is not current
6450b57cec5SDimitry Andric   // loop, meanwhile BaseRegs contains a recurrent expr reg related with current
6460b57cec5SDimitry Andric   // loop, we want to swap the reg in BaseRegs with ScaledReg.
64781ad6265SDimitry Andric   return none_of(BaseRegs, [&L](const SCEV *S) {
64881ad6265SDimitry Andric     return containsAddRecDependentOnLoop(S, L);
6490b57cec5SDimitry Andric   });
6500b57cec5SDimitry Andric }
6510b57cec5SDimitry Andric 
6520b57cec5SDimitry Andric /// Helper method to morph a formula into its canonical representation.
6530b57cec5SDimitry Andric /// \see Formula::BaseRegs.
6540b57cec5SDimitry Andric /// Every formula having more than one base register, must use the ScaledReg
6550b57cec5SDimitry Andric /// field. Otherwise, we would have to do special cases everywhere in LSR
6560b57cec5SDimitry Andric /// to treat reg1 + reg2 + ... the same way as reg1 + 1*reg2 + ...
6570b57cec5SDimitry Andric /// On the other hand, 1*reg should be canonicalized into reg.
6580b57cec5SDimitry Andric void Formula::canonicalize(const Loop &L) {
6590b57cec5SDimitry Andric   if (isCanonical(L))
6600b57cec5SDimitry Andric     return;
661fe6060f1SDimitry Andric 
662fe6060f1SDimitry Andric   if (BaseRegs.empty()) {
663fe6060f1SDimitry Andric     // No base reg? Use scale reg with scale = 1 as such.
664fe6060f1SDimitry Andric     assert(ScaledReg && "Expected 1*reg => reg");
665fe6060f1SDimitry Andric     assert(Scale == 1 && "Expected 1*reg => reg");
666fe6060f1SDimitry Andric     BaseRegs.push_back(ScaledReg);
667fe6060f1SDimitry Andric     Scale = 0;
668fe6060f1SDimitry Andric     ScaledReg = nullptr;
669fe6060f1SDimitry Andric     return;
670fe6060f1SDimitry Andric   }
6710b57cec5SDimitry Andric 
6720b57cec5SDimitry Andric   // Keep the invariant sum in BaseRegs and one of the variant sum in ScaledReg.
6730b57cec5SDimitry Andric   if (!ScaledReg) {
674e8d8bef9SDimitry Andric     ScaledReg = BaseRegs.pop_back_val();
6750b57cec5SDimitry Andric     Scale = 1;
6760b57cec5SDimitry Andric   }
6770b57cec5SDimitry Andric 
6780b57cec5SDimitry Andric   // If ScaledReg is an invariant with respect to L, find the reg from
6790b57cec5SDimitry Andric   // BaseRegs containing the recurrent expr related with Loop L. Swap the
6800b57cec5SDimitry Andric   // reg with ScaledReg.
68181ad6265SDimitry Andric   if (!containsAddRecDependentOnLoop(ScaledReg, L)) {
68281ad6265SDimitry Andric     auto I = find_if(BaseRegs, [&L](const SCEV *S) {
68381ad6265SDimitry Andric       return containsAddRecDependentOnLoop(S, L);
6840b57cec5SDimitry Andric     });
6850b57cec5SDimitry Andric     if (I != BaseRegs.end())
6860b57cec5SDimitry Andric       std::swap(ScaledReg, *I);
6870b57cec5SDimitry Andric   }
688fe6060f1SDimitry Andric   assert(isCanonical(L) && "Failed to canonicalize?");
6890b57cec5SDimitry Andric }
6900b57cec5SDimitry Andric 
6910b57cec5SDimitry Andric /// Get rid of the scale in the formula.
6920b57cec5SDimitry Andric /// In other words, this method morphes reg1 + 1*reg2 into reg1 + reg2.
6930b57cec5SDimitry Andric /// \return true if it was possible to get rid of the scale, false otherwise.
6940b57cec5SDimitry Andric /// \note After this operation the formula may not be in the canonical form.
6950b57cec5SDimitry Andric bool Formula::unscale() {
6960b57cec5SDimitry Andric   if (Scale != 1)
6970b57cec5SDimitry Andric     return false;
6980b57cec5SDimitry Andric   Scale = 0;
6990b57cec5SDimitry Andric   BaseRegs.push_back(ScaledReg);
7000b57cec5SDimitry Andric   ScaledReg = nullptr;
7010b57cec5SDimitry Andric   return true;
7020b57cec5SDimitry Andric }
7030b57cec5SDimitry Andric 
7040b57cec5SDimitry Andric bool Formula::hasZeroEnd() const {
7050b57cec5SDimitry Andric   if (UnfoldedOffset || BaseOffset)
7060b57cec5SDimitry Andric     return false;
7070b57cec5SDimitry Andric   if (BaseRegs.size() != 1 || ScaledReg)
7080b57cec5SDimitry Andric     return false;
7090b57cec5SDimitry Andric   return true;
7100b57cec5SDimitry Andric }
7110b57cec5SDimitry Andric 
7120b57cec5SDimitry Andric /// Return the total number of register operands used by this formula. This does
7130b57cec5SDimitry Andric /// not include register uses implied by non-constant addrec strides.
7140b57cec5SDimitry Andric size_t Formula::getNumRegs() const {
7150b57cec5SDimitry Andric   return !!ScaledReg + BaseRegs.size();
7160b57cec5SDimitry Andric }
7170b57cec5SDimitry Andric 
7180b57cec5SDimitry Andric /// Return the type of this formula, if it has one, or null otherwise. This type
7190b57cec5SDimitry Andric /// is meaningless except for the bit size.
7200b57cec5SDimitry Andric Type *Formula::getType() const {
7210b57cec5SDimitry Andric   return !BaseRegs.empty() ? BaseRegs.front()->getType() :
7220b57cec5SDimitry Andric          ScaledReg ? ScaledReg->getType() :
7230b57cec5SDimitry Andric          BaseGV ? BaseGV->getType() :
7240b57cec5SDimitry Andric          nullptr;
7250b57cec5SDimitry Andric }
7260b57cec5SDimitry Andric 
7270b57cec5SDimitry Andric /// Delete the given base reg from the BaseRegs list.
7280b57cec5SDimitry Andric void Formula::deleteBaseReg(const SCEV *&S) {
7290b57cec5SDimitry Andric   if (&S != &BaseRegs.back())
7300b57cec5SDimitry Andric     std::swap(S, BaseRegs.back());
7310b57cec5SDimitry Andric   BaseRegs.pop_back();
7320b57cec5SDimitry Andric }
7330b57cec5SDimitry Andric 
7340b57cec5SDimitry Andric /// Test if this formula references the given register.
7350b57cec5SDimitry Andric bool Formula::referencesReg(const SCEV *S) const {
7360b57cec5SDimitry Andric   return S == ScaledReg || is_contained(BaseRegs, S);
7370b57cec5SDimitry Andric }
7380b57cec5SDimitry Andric 
7390b57cec5SDimitry Andric /// Test whether this formula uses registers which are used by uses other than
7400b57cec5SDimitry Andric /// the use with the given index.
7410b57cec5SDimitry Andric bool Formula::hasRegsUsedByUsesOtherThan(size_t LUIdx,
7420b57cec5SDimitry Andric                                          const RegUseTracker &RegUses) const {
7430b57cec5SDimitry Andric   if (ScaledReg)
7440b57cec5SDimitry Andric     if (RegUses.isRegUsedByUsesOtherThan(ScaledReg, LUIdx))
7450b57cec5SDimitry Andric       return true;
7460b57cec5SDimitry Andric   for (const SCEV *BaseReg : BaseRegs)
7470b57cec5SDimitry Andric     if (RegUses.isRegUsedByUsesOtherThan(BaseReg, LUIdx))
7480b57cec5SDimitry Andric       return true;
7490b57cec5SDimitry Andric   return false;
7500b57cec5SDimitry Andric }
7510b57cec5SDimitry Andric 
7520b57cec5SDimitry Andric #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
7530b57cec5SDimitry Andric void Formula::print(raw_ostream &OS) const {
7540b57cec5SDimitry Andric   bool First = true;
7550b57cec5SDimitry Andric   if (BaseGV) {
7560b57cec5SDimitry Andric     if (!First) OS << " + "; else First = false;
7570b57cec5SDimitry Andric     BaseGV->printAsOperand(OS, /*PrintType=*/false);
7580b57cec5SDimitry Andric   }
7590fca6ea1SDimitry Andric   if (BaseOffset.isNonZero()) {
7600b57cec5SDimitry Andric     if (!First) OS << " + "; else First = false;
7610b57cec5SDimitry Andric     OS << BaseOffset;
7620b57cec5SDimitry Andric   }
7630b57cec5SDimitry Andric   for (const SCEV *BaseReg : BaseRegs) {
7640b57cec5SDimitry Andric     if (!First) OS << " + "; else First = false;
7650b57cec5SDimitry Andric     OS << "reg(" << *BaseReg << ')';
7660b57cec5SDimitry Andric   }
7670b57cec5SDimitry Andric   if (HasBaseReg && BaseRegs.empty()) {
7680b57cec5SDimitry Andric     if (!First) OS << " + "; else First = false;
7690b57cec5SDimitry Andric     OS << "**error: HasBaseReg**";
7700b57cec5SDimitry Andric   } else if (!HasBaseReg && !BaseRegs.empty()) {
7710b57cec5SDimitry Andric     if (!First) OS << " + "; else First = false;
7720b57cec5SDimitry Andric     OS << "**error: !HasBaseReg**";
7730b57cec5SDimitry Andric   }
7740b57cec5SDimitry Andric   if (Scale != 0) {
7750b57cec5SDimitry Andric     if (!First) OS << " + "; else First = false;
7760b57cec5SDimitry Andric     OS << Scale << "*reg(";
7770b57cec5SDimitry Andric     if (ScaledReg)
7780b57cec5SDimitry Andric       OS << *ScaledReg;
7790b57cec5SDimitry Andric     else
7800b57cec5SDimitry Andric       OS << "<unknown>";
7810b57cec5SDimitry Andric     OS << ')';
7820b57cec5SDimitry Andric   }
7830fca6ea1SDimitry Andric   if (UnfoldedOffset.isNonZero()) {
7840b57cec5SDimitry Andric     if (!First) OS << " + ";
7850b57cec5SDimitry Andric     OS << "imm(" << UnfoldedOffset << ')';
7860b57cec5SDimitry Andric   }
7870b57cec5SDimitry Andric }
7880b57cec5SDimitry Andric 
7890b57cec5SDimitry Andric LLVM_DUMP_METHOD void Formula::dump() const {
7900b57cec5SDimitry Andric   print(errs()); errs() << '\n';
7910b57cec5SDimitry Andric }
7920b57cec5SDimitry Andric #endif
7930b57cec5SDimitry Andric 
7940b57cec5SDimitry Andric /// Return true if the given addrec can be sign-extended without changing its
7950b57cec5SDimitry Andric /// value.
7960b57cec5SDimitry Andric static bool isAddRecSExtable(const SCEVAddRecExpr *AR, ScalarEvolution &SE) {
7970b57cec5SDimitry Andric   Type *WideTy =
7980b57cec5SDimitry Andric     IntegerType::get(SE.getContext(), SE.getTypeSizeInBits(AR->getType()) + 1);
7990b57cec5SDimitry Andric   return isa<SCEVAddRecExpr>(SE.getSignExtendExpr(AR, WideTy));
8000b57cec5SDimitry Andric }
8010b57cec5SDimitry Andric 
8020b57cec5SDimitry Andric /// Return true if the given add can be sign-extended without changing its
8030b57cec5SDimitry Andric /// value.
8040b57cec5SDimitry Andric static bool isAddSExtable(const SCEVAddExpr *A, ScalarEvolution &SE) {
8050b57cec5SDimitry Andric   Type *WideTy =
8060b57cec5SDimitry Andric     IntegerType::get(SE.getContext(), SE.getTypeSizeInBits(A->getType()) + 1);
8070b57cec5SDimitry Andric   return isa<SCEVAddExpr>(SE.getSignExtendExpr(A, WideTy));
8080b57cec5SDimitry Andric }
8090b57cec5SDimitry Andric 
8100b57cec5SDimitry Andric /// Return true if the given mul can be sign-extended without changing its
8110b57cec5SDimitry Andric /// value.
8120b57cec5SDimitry Andric static bool isMulSExtable(const SCEVMulExpr *M, ScalarEvolution &SE) {
8130b57cec5SDimitry Andric   Type *WideTy =
8140b57cec5SDimitry Andric     IntegerType::get(SE.getContext(),
8150b57cec5SDimitry Andric                      SE.getTypeSizeInBits(M->getType()) * M->getNumOperands());
8160b57cec5SDimitry Andric   return isa<SCEVMulExpr>(SE.getSignExtendExpr(M, WideTy));
8170b57cec5SDimitry Andric }
8180b57cec5SDimitry Andric 
8190b57cec5SDimitry Andric /// Return an expression for LHS /s RHS, if it can be determined and if the
8200b57cec5SDimitry Andric /// remainder is known to be zero, or null otherwise. If IgnoreSignificantBits
821fe6060f1SDimitry Andric /// is true, expressions like (X * Y) /s Y are simplified to X, ignoring that
8220b57cec5SDimitry Andric /// the multiplication may overflow, which is useful when the result will be
8230b57cec5SDimitry Andric /// used in a context where the most significant bits are ignored.
8240b57cec5SDimitry Andric static const SCEV *getExactSDiv(const SCEV *LHS, const SCEV *RHS,
8250b57cec5SDimitry Andric                                 ScalarEvolution &SE,
8260b57cec5SDimitry Andric                                 bool IgnoreSignificantBits = false) {
8270b57cec5SDimitry Andric   // Handle the trivial case, which works for any SCEV type.
8280b57cec5SDimitry Andric   if (LHS == RHS)
8290b57cec5SDimitry Andric     return SE.getConstant(LHS->getType(), 1);
8300b57cec5SDimitry Andric 
8310b57cec5SDimitry Andric   // Handle a few RHS special cases.
8320b57cec5SDimitry Andric   const SCEVConstant *RC = dyn_cast<SCEVConstant>(RHS);
8330b57cec5SDimitry Andric   if (RC) {
8340b57cec5SDimitry Andric     const APInt &RA = RC->getAPInt();
8350b57cec5SDimitry Andric     // Handle x /s -1 as x * -1, to give ScalarEvolution a chance to do
8360b57cec5SDimitry Andric     // some folding.
837349cc55cSDimitry Andric     if (RA.isAllOnes()) {
838fe6060f1SDimitry Andric       if (LHS->getType()->isPointerTy())
839fe6060f1SDimitry Andric         return nullptr;
8400b57cec5SDimitry Andric       return SE.getMulExpr(LHS, RC);
841fe6060f1SDimitry Andric     }
8420b57cec5SDimitry Andric     // Handle x /s 1 as x.
8430b57cec5SDimitry Andric     if (RA == 1)
8440b57cec5SDimitry Andric       return LHS;
8450b57cec5SDimitry Andric   }
8460b57cec5SDimitry Andric 
8470b57cec5SDimitry Andric   // Check for a division of a constant by a constant.
8480b57cec5SDimitry Andric   if (const SCEVConstant *C = dyn_cast<SCEVConstant>(LHS)) {
8490b57cec5SDimitry Andric     if (!RC)
8500b57cec5SDimitry Andric       return nullptr;
8510b57cec5SDimitry Andric     const APInt &LA = C->getAPInt();
8520b57cec5SDimitry Andric     const APInt &RA = RC->getAPInt();
8530b57cec5SDimitry Andric     if (LA.srem(RA) != 0)
8540b57cec5SDimitry Andric       return nullptr;
8550b57cec5SDimitry Andric     return SE.getConstant(LA.sdiv(RA));
8560b57cec5SDimitry Andric   }
8570b57cec5SDimitry Andric 
8580b57cec5SDimitry Andric   // Distribute the sdiv over addrec operands, if the addrec doesn't overflow.
8590b57cec5SDimitry Andric   if (const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(LHS)) {
8600b57cec5SDimitry Andric     if ((IgnoreSignificantBits || isAddRecSExtable(AR, SE)) && AR->isAffine()) {
8610b57cec5SDimitry Andric       const SCEV *Step = getExactSDiv(AR->getStepRecurrence(SE), RHS, SE,
8620b57cec5SDimitry Andric                                       IgnoreSignificantBits);
8630b57cec5SDimitry Andric       if (!Step) return nullptr;
8640b57cec5SDimitry Andric       const SCEV *Start = getExactSDiv(AR->getStart(), RHS, SE,
8650b57cec5SDimitry Andric                                        IgnoreSignificantBits);
8660b57cec5SDimitry Andric       if (!Start) return nullptr;
8670b57cec5SDimitry Andric       // FlagNW is independent of the start value, step direction, and is
8680b57cec5SDimitry Andric       // preserved with smaller magnitude steps.
8690b57cec5SDimitry Andric       // FIXME: AR->getNoWrapFlags(SCEV::FlagNW)
8700b57cec5SDimitry Andric       return SE.getAddRecExpr(Start, Step, AR->getLoop(), SCEV::FlagAnyWrap);
8710b57cec5SDimitry Andric     }
8720b57cec5SDimitry Andric     return nullptr;
8730b57cec5SDimitry Andric   }
8740b57cec5SDimitry Andric 
8750b57cec5SDimitry Andric   // Distribute the sdiv over add operands, if the add doesn't overflow.
8760b57cec5SDimitry Andric   if (const SCEVAddExpr *Add = dyn_cast<SCEVAddExpr>(LHS)) {
8770b57cec5SDimitry Andric     if (IgnoreSignificantBits || isAddSExtable(Add, SE)) {
8780b57cec5SDimitry Andric       SmallVector<const SCEV *, 8> Ops;
8790b57cec5SDimitry Andric       for (const SCEV *S : Add->operands()) {
8800b57cec5SDimitry Andric         const SCEV *Op = getExactSDiv(S, RHS, SE, IgnoreSignificantBits);
8810b57cec5SDimitry Andric         if (!Op) return nullptr;
8820b57cec5SDimitry Andric         Ops.push_back(Op);
8830b57cec5SDimitry Andric       }
8840b57cec5SDimitry Andric       return SE.getAddExpr(Ops);
8850b57cec5SDimitry Andric     }
8860b57cec5SDimitry Andric     return nullptr;
8870b57cec5SDimitry Andric   }
8880b57cec5SDimitry Andric 
8890b57cec5SDimitry Andric   // Check for a multiply operand that we can pull RHS out of.
8900b57cec5SDimitry Andric   if (const SCEVMulExpr *Mul = dyn_cast<SCEVMulExpr>(LHS)) {
8910b57cec5SDimitry Andric     if (IgnoreSignificantBits || isMulSExtable(Mul, SE)) {
892fe6060f1SDimitry Andric       // Handle special case C1*X*Y /s C2*X*Y.
893fe6060f1SDimitry Andric       if (const SCEVMulExpr *MulRHS = dyn_cast<SCEVMulExpr>(RHS)) {
894fe6060f1SDimitry Andric         if (IgnoreSignificantBits || isMulSExtable(MulRHS, SE)) {
895fe6060f1SDimitry Andric           const SCEVConstant *LC = dyn_cast<SCEVConstant>(Mul->getOperand(0));
896fe6060f1SDimitry Andric           const SCEVConstant *RC =
897fe6060f1SDimitry Andric               dyn_cast<SCEVConstant>(MulRHS->getOperand(0));
898fe6060f1SDimitry Andric           if (LC && RC) {
899fe6060f1SDimitry Andric             SmallVector<const SCEV *, 4> LOps(drop_begin(Mul->operands()));
900fe6060f1SDimitry Andric             SmallVector<const SCEV *, 4> ROps(drop_begin(MulRHS->operands()));
901fe6060f1SDimitry Andric             if (LOps == ROps)
902fe6060f1SDimitry Andric               return getExactSDiv(LC, RC, SE, IgnoreSignificantBits);
903fe6060f1SDimitry Andric           }
904fe6060f1SDimitry Andric         }
905fe6060f1SDimitry Andric       }
906fe6060f1SDimitry Andric 
9070b57cec5SDimitry Andric       SmallVector<const SCEV *, 4> Ops;
9080b57cec5SDimitry Andric       bool Found = false;
9090b57cec5SDimitry Andric       for (const SCEV *S : Mul->operands()) {
9100b57cec5SDimitry Andric         if (!Found)
9110b57cec5SDimitry Andric           if (const SCEV *Q = getExactSDiv(S, RHS, SE,
9120b57cec5SDimitry Andric                                            IgnoreSignificantBits)) {
9130b57cec5SDimitry Andric             S = Q;
9140b57cec5SDimitry Andric             Found = true;
9150b57cec5SDimitry Andric           }
9160b57cec5SDimitry Andric         Ops.push_back(S);
9170b57cec5SDimitry Andric       }
9180b57cec5SDimitry Andric       return Found ? SE.getMulExpr(Ops) : nullptr;
9190b57cec5SDimitry Andric     }
9200b57cec5SDimitry Andric     return nullptr;
9210b57cec5SDimitry Andric   }
9220b57cec5SDimitry Andric 
9230b57cec5SDimitry Andric   // Otherwise we don't know.
9240b57cec5SDimitry Andric   return nullptr;
9250b57cec5SDimitry Andric }
9260b57cec5SDimitry Andric 
9270b57cec5SDimitry Andric /// If S involves the addition of a constant integer value, return that integer
9280b57cec5SDimitry Andric /// value, and mutate S to point to a new SCEV with that value excluded.
9290fca6ea1SDimitry Andric static Immediate ExtractImmediate(const SCEV *&S, ScalarEvolution &SE) {
9300b57cec5SDimitry Andric   if (const SCEVConstant *C = dyn_cast<SCEVConstant>(S)) {
93106c3fb27SDimitry Andric     if (C->getAPInt().getSignificantBits() <= 64) {
9320b57cec5SDimitry Andric       S = SE.getConstant(C->getType(), 0);
9330fca6ea1SDimitry Andric       return Immediate::getFixed(C->getValue()->getSExtValue());
9340b57cec5SDimitry Andric     }
9350b57cec5SDimitry Andric   } else if (const SCEVAddExpr *Add = dyn_cast<SCEVAddExpr>(S)) {
936e8d8bef9SDimitry Andric     SmallVector<const SCEV *, 8> NewOps(Add->operands());
9370fca6ea1SDimitry Andric     Immediate Result = ExtractImmediate(NewOps.front(), SE);
9380fca6ea1SDimitry Andric     if (Result.isNonZero())
9390b57cec5SDimitry Andric       S = SE.getAddExpr(NewOps);
9400b57cec5SDimitry Andric     return Result;
9410b57cec5SDimitry Andric   } else if (const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(S)) {
942e8d8bef9SDimitry Andric     SmallVector<const SCEV *, 8> NewOps(AR->operands());
9430fca6ea1SDimitry Andric     Immediate Result = ExtractImmediate(NewOps.front(), SE);
9440fca6ea1SDimitry Andric     if (Result.isNonZero())
9450b57cec5SDimitry Andric       S = SE.getAddRecExpr(NewOps, AR->getLoop(),
9460b57cec5SDimitry Andric                            // FIXME: AR->getNoWrapFlags(SCEV::FlagNW)
9470b57cec5SDimitry Andric                            SCEV::FlagAnyWrap);
9480b57cec5SDimitry Andric     return Result;
949*36b606aeSDimitry Andric   } else if (const SCEVMulExpr *M = dyn_cast<SCEVMulExpr>(S)) {
950*36b606aeSDimitry Andric     if (EnableVScaleImmediates && M->getNumOperands() == 2) {
9510fca6ea1SDimitry Andric       if (const SCEVConstant *C = dyn_cast<SCEVConstant>(M->getOperand(0)))
9520fca6ea1SDimitry Andric         if (isa<SCEVVScale>(M->getOperand(1))) {
9530fca6ea1SDimitry Andric           S = SE.getConstant(M->getType(), 0);
9540fca6ea1SDimitry Andric           return Immediate::getScalable(C->getValue()->getSExtValue());
9550b57cec5SDimitry Andric         }
956*36b606aeSDimitry Andric     }
957*36b606aeSDimitry Andric   }
9580fca6ea1SDimitry Andric   return Immediate::getZero();
9590b57cec5SDimitry Andric }
9600b57cec5SDimitry Andric 
9610b57cec5SDimitry Andric /// If S involves the addition of a GlobalValue address, return that symbol, and
9620b57cec5SDimitry Andric /// mutate S to point to a new SCEV with that value excluded.
9630b57cec5SDimitry Andric static GlobalValue *ExtractSymbol(const SCEV *&S, ScalarEvolution &SE) {
9640b57cec5SDimitry Andric   if (const SCEVUnknown *U = dyn_cast<SCEVUnknown>(S)) {
9650b57cec5SDimitry Andric     if (GlobalValue *GV = dyn_cast<GlobalValue>(U->getValue())) {
9660b57cec5SDimitry Andric       S = SE.getConstant(GV->getType(), 0);
9670b57cec5SDimitry Andric       return GV;
9680b57cec5SDimitry Andric     }
9690b57cec5SDimitry Andric   } else if (const SCEVAddExpr *Add = dyn_cast<SCEVAddExpr>(S)) {
970e8d8bef9SDimitry Andric     SmallVector<const SCEV *, 8> NewOps(Add->operands());
9710b57cec5SDimitry Andric     GlobalValue *Result = ExtractSymbol(NewOps.back(), SE);
9720b57cec5SDimitry Andric     if (Result)
9730b57cec5SDimitry Andric       S = SE.getAddExpr(NewOps);
9740b57cec5SDimitry Andric     return Result;
9750b57cec5SDimitry Andric   } else if (const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(S)) {
976e8d8bef9SDimitry Andric     SmallVector<const SCEV *, 8> NewOps(AR->operands());
9770b57cec5SDimitry Andric     GlobalValue *Result = ExtractSymbol(NewOps.front(), SE);
9780b57cec5SDimitry Andric     if (Result)
9790b57cec5SDimitry Andric       S = SE.getAddRecExpr(NewOps, AR->getLoop(),
9800b57cec5SDimitry Andric                            // FIXME: AR->getNoWrapFlags(SCEV::FlagNW)
9810b57cec5SDimitry Andric                            SCEV::FlagAnyWrap);
9820b57cec5SDimitry Andric     return Result;
9830b57cec5SDimitry Andric   }
9840b57cec5SDimitry Andric   return nullptr;
9850b57cec5SDimitry Andric }
9860b57cec5SDimitry Andric 
9870b57cec5SDimitry Andric /// Returns true if the specified instruction is using the specified value as an
9880b57cec5SDimitry Andric /// address.
9890b57cec5SDimitry Andric static bool isAddressUse(const TargetTransformInfo &TTI,
9900b57cec5SDimitry Andric                          Instruction *Inst, Value *OperandVal) {
9910b57cec5SDimitry Andric   bool isAddress = isa<LoadInst>(Inst);
9920b57cec5SDimitry Andric   if (StoreInst *SI = dyn_cast<StoreInst>(Inst)) {
9930b57cec5SDimitry Andric     if (SI->getPointerOperand() == OperandVal)
9940b57cec5SDimitry Andric       isAddress = true;
9950b57cec5SDimitry Andric   } else if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(Inst)) {
9960b57cec5SDimitry Andric     // Addressing modes can also be folded into prefetches and a variety
9970b57cec5SDimitry Andric     // of intrinsics.
9980b57cec5SDimitry Andric     switch (II->getIntrinsicID()) {
9990b57cec5SDimitry Andric     case Intrinsic::memset:
10000b57cec5SDimitry Andric     case Intrinsic::prefetch:
10015ffd83dbSDimitry Andric     case Intrinsic::masked_load:
10020b57cec5SDimitry Andric       if (II->getArgOperand(0) == OperandVal)
10030b57cec5SDimitry Andric         isAddress = true;
10040b57cec5SDimitry Andric       break;
10055ffd83dbSDimitry Andric     case Intrinsic::masked_store:
10065ffd83dbSDimitry Andric       if (II->getArgOperand(1) == OperandVal)
10075ffd83dbSDimitry Andric         isAddress = true;
10085ffd83dbSDimitry Andric       break;
10090b57cec5SDimitry Andric     case Intrinsic::memmove:
10100b57cec5SDimitry Andric     case Intrinsic::memcpy:
10110b57cec5SDimitry Andric       if (II->getArgOperand(0) == OperandVal ||
10120b57cec5SDimitry Andric           II->getArgOperand(1) == OperandVal)
10130b57cec5SDimitry Andric         isAddress = true;
10140b57cec5SDimitry Andric       break;
10150b57cec5SDimitry Andric     default: {
10160b57cec5SDimitry Andric       MemIntrinsicInfo IntrInfo;
10170b57cec5SDimitry Andric       if (TTI.getTgtMemIntrinsic(II, IntrInfo)) {
10180b57cec5SDimitry Andric         if (IntrInfo.PtrVal == OperandVal)
10190b57cec5SDimitry Andric           isAddress = true;
10200b57cec5SDimitry Andric       }
10210b57cec5SDimitry Andric     }
10220b57cec5SDimitry Andric     }
10230b57cec5SDimitry Andric   } else if (AtomicRMWInst *RMW = dyn_cast<AtomicRMWInst>(Inst)) {
10240b57cec5SDimitry Andric     if (RMW->getPointerOperand() == OperandVal)
10250b57cec5SDimitry Andric       isAddress = true;
10260b57cec5SDimitry Andric   } else if (AtomicCmpXchgInst *CmpX = dyn_cast<AtomicCmpXchgInst>(Inst)) {
10270b57cec5SDimitry Andric     if (CmpX->getPointerOperand() == OperandVal)
10280b57cec5SDimitry Andric       isAddress = true;
10290b57cec5SDimitry Andric   }
10300b57cec5SDimitry Andric   return isAddress;
10310b57cec5SDimitry Andric }
10320b57cec5SDimitry Andric 
10330b57cec5SDimitry Andric /// Return the type of the memory being accessed.
10340b57cec5SDimitry Andric static MemAccessTy getAccessType(const TargetTransformInfo &TTI,
10350b57cec5SDimitry Andric                                  Instruction *Inst, Value *OperandVal) {
103606c3fb27SDimitry Andric   MemAccessTy AccessTy = MemAccessTy::getUnknown(Inst->getContext());
103706c3fb27SDimitry Andric 
103806c3fb27SDimitry Andric   // First get the type of memory being accessed.
103906c3fb27SDimitry Andric   if (Type *Ty = Inst->getAccessType())
104006c3fb27SDimitry Andric     AccessTy.MemTy = Ty;
104106c3fb27SDimitry Andric 
104206c3fb27SDimitry Andric   // Then get the pointer address space.
10430b57cec5SDimitry Andric   if (const StoreInst *SI = dyn_cast<StoreInst>(Inst)) {
10440b57cec5SDimitry Andric     AccessTy.AddrSpace = SI->getPointerAddressSpace();
10450b57cec5SDimitry Andric   } else if (const LoadInst *LI = dyn_cast<LoadInst>(Inst)) {
10460b57cec5SDimitry Andric     AccessTy.AddrSpace = LI->getPointerAddressSpace();
10470b57cec5SDimitry Andric   } else if (const AtomicRMWInst *RMW = dyn_cast<AtomicRMWInst>(Inst)) {
10480b57cec5SDimitry Andric     AccessTy.AddrSpace = RMW->getPointerAddressSpace();
10490b57cec5SDimitry Andric   } else if (const AtomicCmpXchgInst *CmpX = dyn_cast<AtomicCmpXchgInst>(Inst)) {
10500b57cec5SDimitry Andric     AccessTy.AddrSpace = CmpX->getPointerAddressSpace();
10510b57cec5SDimitry Andric   } else if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(Inst)) {
10520b57cec5SDimitry Andric     switch (II->getIntrinsicID()) {
10530b57cec5SDimitry Andric     case Intrinsic::prefetch:
10540b57cec5SDimitry Andric     case Intrinsic::memset:
10550b57cec5SDimitry Andric       AccessTy.AddrSpace = II->getArgOperand(0)->getType()->getPointerAddressSpace();
10560b57cec5SDimitry Andric       AccessTy.MemTy = OperandVal->getType();
10570b57cec5SDimitry Andric       break;
10580b57cec5SDimitry Andric     case Intrinsic::memmove:
10590b57cec5SDimitry Andric     case Intrinsic::memcpy:
10600b57cec5SDimitry Andric       AccessTy.AddrSpace = OperandVal->getType()->getPointerAddressSpace();
10610b57cec5SDimitry Andric       AccessTy.MemTy = OperandVal->getType();
10620b57cec5SDimitry Andric       break;
10635ffd83dbSDimitry Andric     case Intrinsic::masked_load:
10645ffd83dbSDimitry Andric       AccessTy.AddrSpace =
10655ffd83dbSDimitry Andric           II->getArgOperand(0)->getType()->getPointerAddressSpace();
10665ffd83dbSDimitry Andric       break;
10675ffd83dbSDimitry Andric     case Intrinsic::masked_store:
10685ffd83dbSDimitry Andric       AccessTy.AddrSpace =
10695ffd83dbSDimitry Andric           II->getArgOperand(1)->getType()->getPointerAddressSpace();
10705ffd83dbSDimitry Andric       break;
10710b57cec5SDimitry Andric     default: {
10720b57cec5SDimitry Andric       MemIntrinsicInfo IntrInfo;
10730b57cec5SDimitry Andric       if (TTI.getTgtMemIntrinsic(II, IntrInfo) && IntrInfo.PtrVal) {
10740b57cec5SDimitry Andric         AccessTy.AddrSpace
10750b57cec5SDimitry Andric           = IntrInfo.PtrVal->getType()->getPointerAddressSpace();
10760b57cec5SDimitry Andric       }
10770b57cec5SDimitry Andric 
10780b57cec5SDimitry Andric       break;
10790b57cec5SDimitry Andric     }
10800b57cec5SDimitry Andric     }
10810b57cec5SDimitry Andric   }
10820b57cec5SDimitry Andric 
10830b57cec5SDimitry Andric   return AccessTy;
10840b57cec5SDimitry Andric }
10850b57cec5SDimitry Andric 
10860b57cec5SDimitry Andric /// Return true if this AddRec is already a phi in its loop.
10870b57cec5SDimitry Andric static bool isExistingPhi(const SCEVAddRecExpr *AR, ScalarEvolution &SE) {
10880b57cec5SDimitry Andric   for (PHINode &PN : AR->getLoop()->getHeader()->phis()) {
10890b57cec5SDimitry Andric     if (SE.isSCEVable(PN.getType()) &&
10900b57cec5SDimitry Andric         (SE.getEffectiveSCEVType(PN.getType()) ==
10910b57cec5SDimitry Andric          SE.getEffectiveSCEVType(AR->getType())) &&
10920b57cec5SDimitry Andric         SE.getSCEV(&PN) == AR)
10930b57cec5SDimitry Andric       return true;
10940b57cec5SDimitry Andric   }
10950b57cec5SDimitry Andric   return false;
10960b57cec5SDimitry Andric }
10970b57cec5SDimitry Andric 
10980b57cec5SDimitry Andric /// Check if expanding this expression is likely to incur significant cost. This
10990b57cec5SDimitry Andric /// is tricky because SCEV doesn't track which expressions are actually computed
11000b57cec5SDimitry Andric /// by the current IR.
11010b57cec5SDimitry Andric ///
11020b57cec5SDimitry Andric /// We currently allow expansion of IV increments that involve adds,
11030b57cec5SDimitry Andric /// multiplication by constants, and AddRecs from existing phis.
11040b57cec5SDimitry Andric ///
11050b57cec5SDimitry Andric /// TODO: Allow UDivExpr if we can find an existing IV increment that is an
11060b57cec5SDimitry Andric /// obvious multiple of the UDivExpr.
11070b57cec5SDimitry Andric static bool isHighCostExpansion(const SCEV *S,
11080b57cec5SDimitry Andric                                 SmallPtrSetImpl<const SCEV*> &Processed,
11090b57cec5SDimitry Andric                                 ScalarEvolution &SE) {
11100b57cec5SDimitry Andric   // Zero/One operand expressions
11110b57cec5SDimitry Andric   switch (S->getSCEVType()) {
11120b57cec5SDimitry Andric   case scUnknown:
11130b57cec5SDimitry Andric   case scConstant:
111406c3fb27SDimitry Andric   case scVScale:
11150b57cec5SDimitry Andric     return false;
11160b57cec5SDimitry Andric   case scTruncate:
11170b57cec5SDimitry Andric     return isHighCostExpansion(cast<SCEVTruncateExpr>(S)->getOperand(),
11180b57cec5SDimitry Andric                                Processed, SE);
11190b57cec5SDimitry Andric   case scZeroExtend:
11200b57cec5SDimitry Andric     return isHighCostExpansion(cast<SCEVZeroExtendExpr>(S)->getOperand(),
11210b57cec5SDimitry Andric                                Processed, SE);
11220b57cec5SDimitry Andric   case scSignExtend:
11230b57cec5SDimitry Andric     return isHighCostExpansion(cast<SCEVSignExtendExpr>(S)->getOperand(),
11240b57cec5SDimitry Andric                                Processed, SE);
1125e8d8bef9SDimitry Andric   default:
1126e8d8bef9SDimitry Andric     break;
11270b57cec5SDimitry Andric   }
11280b57cec5SDimitry Andric 
11290b57cec5SDimitry Andric   if (!Processed.insert(S).second)
11300b57cec5SDimitry Andric     return false;
11310b57cec5SDimitry Andric 
11320b57cec5SDimitry Andric   if (const SCEVAddExpr *Add = dyn_cast<SCEVAddExpr>(S)) {
11330b57cec5SDimitry Andric     for (const SCEV *S : Add->operands()) {
11340b57cec5SDimitry Andric       if (isHighCostExpansion(S, Processed, SE))
11350b57cec5SDimitry Andric         return true;
11360b57cec5SDimitry Andric     }
11370b57cec5SDimitry Andric     return false;
11380b57cec5SDimitry Andric   }
11390b57cec5SDimitry Andric 
11400b57cec5SDimitry Andric   if (const SCEVMulExpr *Mul = dyn_cast<SCEVMulExpr>(S)) {
11410b57cec5SDimitry Andric     if (Mul->getNumOperands() == 2) {
11420b57cec5SDimitry Andric       // Multiplication by a constant is ok
11430b57cec5SDimitry Andric       if (isa<SCEVConstant>(Mul->getOperand(0)))
11440b57cec5SDimitry Andric         return isHighCostExpansion(Mul->getOperand(1), Processed, SE);
11450b57cec5SDimitry Andric 
11460b57cec5SDimitry Andric       // If we have the value of one operand, check if an existing
11470b57cec5SDimitry Andric       // multiplication already generates this expression.
11480b57cec5SDimitry Andric       if (const SCEVUnknown *U = dyn_cast<SCEVUnknown>(Mul->getOperand(1))) {
11490b57cec5SDimitry Andric         Value *UVal = U->getValue();
11500b57cec5SDimitry Andric         for (User *UR : UVal->users()) {
11510b57cec5SDimitry Andric           // If U is a constant, it may be used by a ConstantExpr.
11520b57cec5SDimitry Andric           Instruction *UI = dyn_cast<Instruction>(UR);
11530b57cec5SDimitry Andric           if (UI && UI->getOpcode() == Instruction::Mul &&
11540b57cec5SDimitry Andric               SE.isSCEVable(UI->getType())) {
11550b57cec5SDimitry Andric             return SE.getSCEV(UI) == Mul;
11560b57cec5SDimitry Andric           }
11570b57cec5SDimitry Andric         }
11580b57cec5SDimitry Andric       }
11590b57cec5SDimitry Andric     }
11600b57cec5SDimitry Andric   }
11610b57cec5SDimitry Andric 
11620b57cec5SDimitry Andric   if (const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(S)) {
11630b57cec5SDimitry Andric     if (isExistingPhi(AR, SE))
11640b57cec5SDimitry Andric       return false;
11650b57cec5SDimitry Andric   }
11660b57cec5SDimitry Andric 
11670b57cec5SDimitry Andric   // Fow now, consider any other type of expression (div/mul/min/max) high cost.
11680b57cec5SDimitry Andric   return true;
11690b57cec5SDimitry Andric }
11700b57cec5SDimitry Andric 
11710b57cec5SDimitry Andric namespace {
11720b57cec5SDimitry Andric 
11730b57cec5SDimitry Andric class LSRUse;
11740b57cec5SDimitry Andric 
11750b57cec5SDimitry Andric } // end anonymous namespace
11760b57cec5SDimitry Andric 
11770b57cec5SDimitry Andric /// Check if the addressing mode defined by \p F is completely
11780b57cec5SDimitry Andric /// folded in \p LU at isel time.
11790b57cec5SDimitry Andric /// This includes address-mode folding and special icmp tricks.
11800b57cec5SDimitry Andric /// This function returns true if \p LU can accommodate what \p F
11810b57cec5SDimitry Andric /// defines and up to 1 base + 1 scaled + offset.
11820b57cec5SDimitry Andric /// In other words, if \p F has several base registers, this function may
11830b57cec5SDimitry Andric /// still return true. Therefore, users still need to account for
11840b57cec5SDimitry Andric /// additional base registers and/or unfolded offsets to derive an
11850b57cec5SDimitry Andric /// accurate cost model.
11860b57cec5SDimitry Andric static bool isAMCompletelyFolded(const TargetTransformInfo &TTI,
11870b57cec5SDimitry Andric                                  const LSRUse &LU, const Formula &F);
11880b57cec5SDimitry Andric 
11890b57cec5SDimitry Andric // Get the cost of the scaling factor used in F for LU.
1190fe6060f1SDimitry Andric static InstructionCost getScalingFactorCost(const TargetTransformInfo &TTI,
11910b57cec5SDimitry Andric                                             const LSRUse &LU, const Formula &F,
11920b57cec5SDimitry Andric                                             const Loop &L);
11930b57cec5SDimitry Andric 
11940b57cec5SDimitry Andric namespace {
11950b57cec5SDimitry Andric 
11960b57cec5SDimitry Andric /// This class is used to measure and compare candidate formulae.
11970b57cec5SDimitry Andric class Cost {
11980b57cec5SDimitry Andric   const Loop *L = nullptr;
11990b57cec5SDimitry Andric   ScalarEvolution *SE = nullptr;
12000b57cec5SDimitry Andric   const TargetTransformInfo *TTI = nullptr;
12010b57cec5SDimitry Andric   TargetTransformInfo::LSRCost C;
1202fe6060f1SDimitry Andric   TTI::AddressingModeKind AMK = TTI::AMK_None;
12030b57cec5SDimitry Andric 
12040b57cec5SDimitry Andric public:
12050b57cec5SDimitry Andric   Cost() = delete;
1206fe6060f1SDimitry Andric   Cost(const Loop *L, ScalarEvolution &SE, const TargetTransformInfo &TTI,
1207fe6060f1SDimitry Andric        TTI::AddressingModeKind AMK) :
1208fe6060f1SDimitry Andric     L(L), SE(&SE), TTI(&TTI), AMK(AMK) {
12090b57cec5SDimitry Andric     C.Insns = 0;
12100b57cec5SDimitry Andric     C.NumRegs = 0;
12110b57cec5SDimitry Andric     C.AddRecCost = 0;
12120b57cec5SDimitry Andric     C.NumIVMuls = 0;
12130b57cec5SDimitry Andric     C.NumBaseAdds = 0;
12140b57cec5SDimitry Andric     C.ImmCost = 0;
12150b57cec5SDimitry Andric     C.SetupCost = 0;
12160b57cec5SDimitry Andric     C.ScaleCost = 0;
12170b57cec5SDimitry Andric   }
12180b57cec5SDimitry Andric 
1219bdd1243dSDimitry Andric   bool isLess(const Cost &Other) const;
12200b57cec5SDimitry Andric 
12210b57cec5SDimitry Andric   void Lose();
12220b57cec5SDimitry Andric 
12230b57cec5SDimitry Andric #ifndef NDEBUG
12240b57cec5SDimitry Andric   // Once any of the metrics loses, they must all remain losers.
12250b57cec5SDimitry Andric   bool isValid() {
12260b57cec5SDimitry Andric     return ((C.Insns | C.NumRegs | C.AddRecCost | C.NumIVMuls | C.NumBaseAdds
12270b57cec5SDimitry Andric              | C.ImmCost | C.SetupCost | C.ScaleCost) != ~0u)
12280b57cec5SDimitry Andric       || ((C.Insns & C.NumRegs & C.AddRecCost & C.NumIVMuls & C.NumBaseAdds
12290b57cec5SDimitry Andric            & C.ImmCost & C.SetupCost & C.ScaleCost) == ~0u);
12300b57cec5SDimitry Andric   }
12310b57cec5SDimitry Andric #endif
12320b57cec5SDimitry Andric 
12330b57cec5SDimitry Andric   bool isLoser() {
12340b57cec5SDimitry Andric     assert(isValid() && "invalid cost");
12350b57cec5SDimitry Andric     return C.NumRegs == ~0u;
12360b57cec5SDimitry Andric   }
12370b57cec5SDimitry Andric 
12380b57cec5SDimitry Andric   void RateFormula(const Formula &F,
12390b57cec5SDimitry Andric                    SmallPtrSetImpl<const SCEV *> &Regs,
12400b57cec5SDimitry Andric                    const DenseSet<const SCEV *> &VisitedRegs,
12410b57cec5SDimitry Andric                    const LSRUse &LU,
12420b57cec5SDimitry Andric                    SmallPtrSetImpl<const SCEV *> *LoserRegs = nullptr);
12430b57cec5SDimitry Andric 
12440b57cec5SDimitry Andric   void print(raw_ostream &OS) const;
12450b57cec5SDimitry Andric   void dump() const;
12460b57cec5SDimitry Andric 
12470b57cec5SDimitry Andric private:
12480b57cec5SDimitry Andric   void RateRegister(const Formula &F, const SCEV *Reg,
12490b57cec5SDimitry Andric                     SmallPtrSetImpl<const SCEV *> &Regs);
12500b57cec5SDimitry Andric   void RatePrimaryRegister(const Formula &F, const SCEV *Reg,
12510b57cec5SDimitry Andric                            SmallPtrSetImpl<const SCEV *> &Regs,
12520b57cec5SDimitry Andric                            SmallPtrSetImpl<const SCEV *> *LoserRegs);
12530b57cec5SDimitry Andric };
12540b57cec5SDimitry Andric 
12550b57cec5SDimitry Andric /// An operand value in an instruction which is to be replaced with some
12560b57cec5SDimitry Andric /// equivalent, possibly strength-reduced, replacement.
12570b57cec5SDimitry Andric struct LSRFixup {
12580b57cec5SDimitry Andric   /// The instruction which will be updated.
12590b57cec5SDimitry Andric   Instruction *UserInst = nullptr;
12600b57cec5SDimitry Andric 
12610b57cec5SDimitry Andric   /// The operand of the instruction which will be replaced. The operand may be
12620b57cec5SDimitry Andric   /// used more than once; every instance will be replaced.
12630b57cec5SDimitry Andric   Value *OperandValToReplace = nullptr;
12640b57cec5SDimitry Andric 
12650b57cec5SDimitry Andric   /// If this user is to use the post-incremented value of an induction
12660b57cec5SDimitry Andric   /// variable, this set is non-empty and holds the loops associated with the
12670b57cec5SDimitry Andric   /// induction variable.
12680b57cec5SDimitry Andric   PostIncLoopSet PostIncLoops;
12690b57cec5SDimitry Andric 
12700b57cec5SDimitry Andric   /// A constant offset to be added to the LSRUse expression.  This allows
12710b57cec5SDimitry Andric   /// multiple fixups to share the same LSRUse with different offsets, for
12720b57cec5SDimitry Andric   /// example in an unrolled loop.
12730fca6ea1SDimitry Andric   Immediate Offset = Immediate::getZero();
12740b57cec5SDimitry Andric 
12750b57cec5SDimitry Andric   LSRFixup() = default;
12760b57cec5SDimitry Andric 
12770b57cec5SDimitry Andric   bool isUseFullyOutsideLoop(const Loop *L) const;
12780b57cec5SDimitry Andric 
12790b57cec5SDimitry Andric   void print(raw_ostream &OS) const;
12800b57cec5SDimitry Andric   void dump() const;
12810b57cec5SDimitry Andric };
12820b57cec5SDimitry Andric 
12830b57cec5SDimitry Andric /// A DenseMapInfo implementation for holding DenseMaps and DenseSets of sorted
12840b57cec5SDimitry Andric /// SmallVectors of const SCEV*.
12850b57cec5SDimitry Andric struct UniquifierDenseMapInfo {
12860b57cec5SDimitry Andric   static SmallVector<const SCEV *, 4> getEmptyKey() {
12870b57cec5SDimitry Andric     SmallVector<const SCEV *, 4>  V;
12880b57cec5SDimitry Andric     V.push_back(reinterpret_cast<const SCEV *>(-1));
12890b57cec5SDimitry Andric     return V;
12900b57cec5SDimitry Andric   }
12910b57cec5SDimitry Andric 
12920b57cec5SDimitry Andric   static SmallVector<const SCEV *, 4> getTombstoneKey() {
12930b57cec5SDimitry Andric     SmallVector<const SCEV *, 4> V;
12940b57cec5SDimitry Andric     V.push_back(reinterpret_cast<const SCEV *>(-2));
12950b57cec5SDimitry Andric     return V;
12960b57cec5SDimitry Andric   }
12970b57cec5SDimitry Andric 
12980b57cec5SDimitry Andric   static unsigned getHashValue(const SmallVector<const SCEV *, 4> &V) {
12990b57cec5SDimitry Andric     return static_cast<unsigned>(hash_combine_range(V.begin(), V.end()));
13000b57cec5SDimitry Andric   }
13010b57cec5SDimitry Andric 
13020b57cec5SDimitry Andric   static bool isEqual(const SmallVector<const SCEV *, 4> &LHS,
13030b57cec5SDimitry Andric                       const SmallVector<const SCEV *, 4> &RHS) {
13040b57cec5SDimitry Andric     return LHS == RHS;
13050b57cec5SDimitry Andric   }
13060b57cec5SDimitry Andric };
13070b57cec5SDimitry Andric 
13080b57cec5SDimitry Andric /// This class holds the state that LSR keeps for each use in IVUsers, as well
13090b57cec5SDimitry Andric /// as uses invented by LSR itself. It includes information about what kinds of
13100b57cec5SDimitry Andric /// things can be folded into the user, information about the user itself, and
13110b57cec5SDimitry Andric /// information about how the use may be satisfied.  TODO: Represent multiple
13120b57cec5SDimitry Andric /// users of the same expression in common?
13130b57cec5SDimitry Andric class LSRUse {
13140b57cec5SDimitry Andric   DenseSet<SmallVector<const SCEV *, 4>, UniquifierDenseMapInfo> Uniquifier;
13150b57cec5SDimitry Andric 
13160b57cec5SDimitry Andric public:
13170b57cec5SDimitry Andric   /// An enum for a kind of use, indicating what types of scaled and immediate
13180b57cec5SDimitry Andric   /// operands it might support.
13190b57cec5SDimitry Andric   enum KindType {
13200b57cec5SDimitry Andric     Basic,   ///< A normal use, with no folding.
13210b57cec5SDimitry Andric     Special, ///< A special case of basic, allowing -1 scales.
13220b57cec5SDimitry Andric     Address, ///< An address use; folding according to TargetLowering
13230b57cec5SDimitry Andric     ICmpZero ///< An equality icmp with both operands folded into one.
13240b57cec5SDimitry Andric     // TODO: Add a generic icmp too?
13250b57cec5SDimitry Andric   };
13260b57cec5SDimitry Andric 
13270b57cec5SDimitry Andric   using SCEVUseKindPair = PointerIntPair<const SCEV *, 2, KindType>;
13280b57cec5SDimitry Andric 
13290b57cec5SDimitry Andric   KindType Kind;
13300b57cec5SDimitry Andric   MemAccessTy AccessTy;
13310b57cec5SDimitry Andric 
13320b57cec5SDimitry Andric   /// The list of operands which are to be replaced.
13330b57cec5SDimitry Andric   SmallVector<LSRFixup, 8> Fixups;
13340b57cec5SDimitry Andric 
13350b57cec5SDimitry Andric   /// Keep track of the min and max offsets of the fixups.
13360fca6ea1SDimitry Andric   Immediate MinOffset = Immediate::getFixedMax();
13370fca6ea1SDimitry Andric   Immediate MaxOffset = Immediate::getFixedMin();
13380b57cec5SDimitry Andric 
13390b57cec5SDimitry Andric   /// This records whether all of the fixups using this LSRUse are outside of
13400b57cec5SDimitry Andric   /// the loop, in which case some special-case heuristics may be used.
13410b57cec5SDimitry Andric   bool AllFixupsOutsideLoop = true;
13420b57cec5SDimitry Andric 
13430b57cec5SDimitry Andric   /// RigidFormula is set to true to guarantee that this use will be associated
13440b57cec5SDimitry Andric   /// with a single formula--the one that initially matched. Some SCEV
13450b57cec5SDimitry Andric   /// expressions cannot be expanded. This allows LSR to consider the registers
13460b57cec5SDimitry Andric   /// used by those expressions without the need to expand them later after
13470b57cec5SDimitry Andric   /// changing the formula.
13480b57cec5SDimitry Andric   bool RigidFormula = false;
13490b57cec5SDimitry Andric 
13500b57cec5SDimitry Andric   /// This records the widest use type for any fixup using this
13510b57cec5SDimitry Andric   /// LSRUse. FindUseWithSimilarFormula can't consider uses with different max
13520b57cec5SDimitry Andric   /// fixup widths to be equivalent, because the narrower one may be relying on
13530b57cec5SDimitry Andric   /// the implicit truncation to truncate away bogus bits.
13540b57cec5SDimitry Andric   Type *WidestFixupType = nullptr;
13550b57cec5SDimitry Andric 
13560b57cec5SDimitry Andric   /// A list of ways to build a value that can satisfy this user.  After the
13570b57cec5SDimitry Andric   /// list is populated, one of these is selected heuristically and used to
13580b57cec5SDimitry Andric   /// formulate a replacement for OperandValToReplace in UserInst.
13590b57cec5SDimitry Andric   SmallVector<Formula, 12> Formulae;
13600b57cec5SDimitry Andric 
13610b57cec5SDimitry Andric   /// The set of register candidates used by all formulae in this LSRUse.
13620b57cec5SDimitry Andric   SmallPtrSet<const SCEV *, 4> Regs;
13630b57cec5SDimitry Andric 
13640b57cec5SDimitry Andric   LSRUse(KindType K, MemAccessTy AT) : Kind(K), AccessTy(AT) {}
13650b57cec5SDimitry Andric 
13660b57cec5SDimitry Andric   LSRFixup &getNewFixup() {
13670b57cec5SDimitry Andric     Fixups.push_back(LSRFixup());
13680b57cec5SDimitry Andric     return Fixups.back();
13690b57cec5SDimitry Andric   }
13700b57cec5SDimitry Andric 
13710b57cec5SDimitry Andric   void pushFixup(LSRFixup &f) {
13720b57cec5SDimitry Andric     Fixups.push_back(f);
13730fca6ea1SDimitry Andric     if (Immediate::isKnownGT(f.Offset, MaxOffset))
13740b57cec5SDimitry Andric       MaxOffset = f.Offset;
13750fca6ea1SDimitry Andric     if (Immediate::isKnownLT(f.Offset, MinOffset))
13760b57cec5SDimitry Andric       MinOffset = f.Offset;
13770b57cec5SDimitry Andric   }
13780b57cec5SDimitry Andric 
13790b57cec5SDimitry Andric   bool HasFormulaWithSameRegs(const Formula &F) const;
13800b57cec5SDimitry Andric   float getNotSelectedProbability(const SCEV *Reg) const;
13810b57cec5SDimitry Andric   bool InsertFormula(const Formula &F, const Loop &L);
13820b57cec5SDimitry Andric   void DeleteFormula(Formula &F);
13830b57cec5SDimitry Andric   void RecomputeRegs(size_t LUIdx, RegUseTracker &Reguses);
13840b57cec5SDimitry Andric 
13850b57cec5SDimitry Andric   void print(raw_ostream &OS) const;
13860b57cec5SDimitry Andric   void dump() const;
13870b57cec5SDimitry Andric };
13880b57cec5SDimitry Andric 
13890b57cec5SDimitry Andric } // end anonymous namespace
13900b57cec5SDimitry Andric 
13910b57cec5SDimitry Andric static bool isAMCompletelyFolded(const TargetTransformInfo &TTI,
13920b57cec5SDimitry Andric                                  LSRUse::KindType Kind, MemAccessTy AccessTy,
13930fca6ea1SDimitry Andric                                  GlobalValue *BaseGV, Immediate BaseOffset,
13940b57cec5SDimitry Andric                                  bool HasBaseReg, int64_t Scale,
13950b57cec5SDimitry Andric                                  Instruction *Fixup = nullptr);
13960b57cec5SDimitry Andric 
13970b57cec5SDimitry Andric static unsigned getSetupCost(const SCEV *Reg, unsigned Depth) {
13980b57cec5SDimitry Andric   if (isa<SCEVUnknown>(Reg) || isa<SCEVConstant>(Reg))
13990b57cec5SDimitry Andric     return 1;
14000b57cec5SDimitry Andric   if (Depth == 0)
14010b57cec5SDimitry Andric     return 0;
14020b57cec5SDimitry Andric   if (const auto *S = dyn_cast<SCEVAddRecExpr>(Reg))
14030b57cec5SDimitry Andric     return getSetupCost(S->getStart(), Depth - 1);
1404e8d8bef9SDimitry Andric   if (auto S = dyn_cast<SCEVIntegralCastExpr>(Reg))
14050b57cec5SDimitry Andric     return getSetupCost(S->getOperand(), Depth - 1);
14060b57cec5SDimitry Andric   if (auto S = dyn_cast<SCEVNAryExpr>(Reg))
1407bdd1243dSDimitry Andric     return std::accumulate(S->operands().begin(), S->operands().end(), 0,
14080b57cec5SDimitry Andric                            [&](unsigned i, const SCEV *Reg) {
14090b57cec5SDimitry Andric                              return i + getSetupCost(Reg, Depth - 1);
14100b57cec5SDimitry Andric                            });
14110b57cec5SDimitry Andric   if (auto S = dyn_cast<SCEVUDivExpr>(Reg))
14120b57cec5SDimitry Andric     return getSetupCost(S->getLHS(), Depth - 1) +
14130b57cec5SDimitry Andric            getSetupCost(S->getRHS(), Depth - 1);
14140b57cec5SDimitry Andric   return 0;
14150b57cec5SDimitry Andric }
14160b57cec5SDimitry Andric 
14170b57cec5SDimitry Andric /// Tally up interesting quantities from the given register.
14180b57cec5SDimitry Andric void Cost::RateRegister(const Formula &F, const SCEV *Reg,
14190b57cec5SDimitry Andric                         SmallPtrSetImpl<const SCEV *> &Regs) {
14200b57cec5SDimitry Andric   if (const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(Reg)) {
14210b57cec5SDimitry Andric     // If this is an addrec for another loop, it should be an invariant
14220b57cec5SDimitry Andric     // with respect to L since L is the innermost loop (at least
14230b57cec5SDimitry Andric     // for now LSR only handles innermost loops).
14240b57cec5SDimitry Andric     if (AR->getLoop() != L) {
14250b57cec5SDimitry Andric       // If the AddRec exists, consider it's register free and leave it alone.
1426fe6060f1SDimitry Andric       if (isExistingPhi(AR, *SE) && AMK != TTI::AMK_PostIndexed)
14270b57cec5SDimitry Andric         return;
14280b57cec5SDimitry Andric 
14290b57cec5SDimitry Andric       // It is bad to allow LSR for current loop to add induction variables
14300b57cec5SDimitry Andric       // for its sibling loops.
14310b57cec5SDimitry Andric       if (!AR->getLoop()->contains(L)) {
14320b57cec5SDimitry Andric         Lose();
14330b57cec5SDimitry Andric         return;
14340b57cec5SDimitry Andric       }
14350b57cec5SDimitry Andric 
14360b57cec5SDimitry Andric       // Otherwise, it will be an invariant with respect to Loop L.
14370b57cec5SDimitry Andric       ++C.NumRegs;
14380b57cec5SDimitry Andric       return;
14390b57cec5SDimitry Andric     }
14400b57cec5SDimitry Andric 
14410b57cec5SDimitry Andric     unsigned LoopCost = 1;
14420b57cec5SDimitry Andric     if (TTI->isIndexedLoadLegal(TTI->MIM_PostInc, AR->getType()) ||
14430b57cec5SDimitry Andric         TTI->isIndexedStoreLegal(TTI->MIM_PostInc, AR->getType())) {
14440b57cec5SDimitry Andric 
14450b57cec5SDimitry Andric       // If the step size matches the base offset, we could use pre-indexed
14460b57cec5SDimitry Andric       // addressing.
14470fca6ea1SDimitry Andric       if (AMK == TTI::AMK_PreIndexed && F.BaseOffset.isFixed()) {
14480b57cec5SDimitry Andric         if (auto *Step = dyn_cast<SCEVConstant>(AR->getStepRecurrence(*SE)))
14490fca6ea1SDimitry Andric           if (Step->getAPInt() == F.BaseOffset.getFixedValue())
14500b57cec5SDimitry Andric             LoopCost = 0;
1451fe6060f1SDimitry Andric       } else if (AMK == TTI::AMK_PostIndexed) {
14520b57cec5SDimitry Andric         const SCEV *LoopStep = AR->getStepRecurrence(*SE);
14530b57cec5SDimitry Andric         if (isa<SCEVConstant>(LoopStep)) {
14540b57cec5SDimitry Andric           const SCEV *LoopStart = AR->getStart();
14550b57cec5SDimitry Andric           if (!isa<SCEVConstant>(LoopStart) &&
14560b57cec5SDimitry Andric               SE->isLoopInvariant(LoopStart, L))
14570b57cec5SDimitry Andric             LoopCost = 0;
14580b57cec5SDimitry Andric         }
14590b57cec5SDimitry Andric       }
14600b57cec5SDimitry Andric     }
14610b57cec5SDimitry Andric     C.AddRecCost += LoopCost;
14620b57cec5SDimitry Andric 
14630b57cec5SDimitry Andric     // Add the step value register, if it needs one.
14640b57cec5SDimitry Andric     // TODO: The non-affine case isn't precisely modeled here.
14650b57cec5SDimitry Andric     if (!AR->isAffine() || !isa<SCEVConstant>(AR->getOperand(1))) {
14660b57cec5SDimitry Andric       if (!Regs.count(AR->getOperand(1))) {
14670b57cec5SDimitry Andric         RateRegister(F, AR->getOperand(1), Regs);
14680b57cec5SDimitry Andric         if (isLoser())
14690b57cec5SDimitry Andric           return;
14700b57cec5SDimitry Andric       }
14710b57cec5SDimitry Andric     }
14720b57cec5SDimitry Andric   }
14730b57cec5SDimitry Andric   ++C.NumRegs;
14740b57cec5SDimitry Andric 
14750b57cec5SDimitry Andric   // Rough heuristic; favor registers which don't require extra setup
14760b57cec5SDimitry Andric   // instructions in the preheader.
14770b57cec5SDimitry Andric   C.SetupCost += getSetupCost(Reg, SetupCostDepthLimit);
14780b57cec5SDimitry Andric   // Ensure we don't, even with the recusion limit, produce invalid costs.
14790b57cec5SDimitry Andric   C.SetupCost = std::min<unsigned>(C.SetupCost, 1 << 16);
14800b57cec5SDimitry Andric 
14810b57cec5SDimitry Andric   C.NumIVMuls += isa<SCEVMulExpr>(Reg) &&
14820b57cec5SDimitry Andric                SE->hasComputableLoopEvolution(Reg, L);
14830b57cec5SDimitry Andric }
14840b57cec5SDimitry Andric 
14850b57cec5SDimitry Andric /// Record this register in the set. If we haven't seen it before, rate
14860b57cec5SDimitry Andric /// it. Optional LoserRegs provides a way to declare any formula that refers to
14870b57cec5SDimitry Andric /// one of those regs an instant loser.
14880b57cec5SDimitry Andric void Cost::RatePrimaryRegister(const Formula &F, const SCEV *Reg,
14890b57cec5SDimitry Andric                                SmallPtrSetImpl<const SCEV *> &Regs,
14900b57cec5SDimitry Andric                                SmallPtrSetImpl<const SCEV *> *LoserRegs) {
14910b57cec5SDimitry Andric   if (LoserRegs && LoserRegs->count(Reg)) {
14920b57cec5SDimitry Andric     Lose();
14930b57cec5SDimitry Andric     return;
14940b57cec5SDimitry Andric   }
14950b57cec5SDimitry Andric   if (Regs.insert(Reg).second) {
14960b57cec5SDimitry Andric     RateRegister(F, Reg, Regs);
14970b57cec5SDimitry Andric     if (LoserRegs && isLoser())
14980b57cec5SDimitry Andric       LoserRegs->insert(Reg);
14990b57cec5SDimitry Andric   }
15000b57cec5SDimitry Andric }
15010b57cec5SDimitry Andric 
15020b57cec5SDimitry Andric void Cost::RateFormula(const Formula &F,
15030b57cec5SDimitry Andric                        SmallPtrSetImpl<const SCEV *> &Regs,
15040b57cec5SDimitry Andric                        const DenseSet<const SCEV *> &VisitedRegs,
15050b57cec5SDimitry Andric                        const LSRUse &LU,
15060b57cec5SDimitry Andric                        SmallPtrSetImpl<const SCEV *> *LoserRegs) {
150781ad6265SDimitry Andric   if (isLoser())
150881ad6265SDimitry Andric     return;
15090b57cec5SDimitry Andric   assert(F.isCanonical(*L) && "Cost is accurate only for canonical formula");
15100b57cec5SDimitry Andric   // Tally up the registers.
15110b57cec5SDimitry Andric   unsigned PrevAddRecCost = C.AddRecCost;
15120b57cec5SDimitry Andric   unsigned PrevNumRegs = C.NumRegs;
15130b57cec5SDimitry Andric   unsigned PrevNumBaseAdds = C.NumBaseAdds;
15140b57cec5SDimitry Andric   if (const SCEV *ScaledReg = F.ScaledReg) {
15150b57cec5SDimitry Andric     if (VisitedRegs.count(ScaledReg)) {
15160b57cec5SDimitry Andric       Lose();
15170b57cec5SDimitry Andric       return;
15180b57cec5SDimitry Andric     }
15190b57cec5SDimitry Andric     RatePrimaryRegister(F, ScaledReg, Regs, LoserRegs);
15200b57cec5SDimitry Andric     if (isLoser())
15210b57cec5SDimitry Andric       return;
15220b57cec5SDimitry Andric   }
15230b57cec5SDimitry Andric   for (const SCEV *BaseReg : F.BaseRegs) {
15240b57cec5SDimitry Andric     if (VisitedRegs.count(BaseReg)) {
15250b57cec5SDimitry Andric       Lose();
15260b57cec5SDimitry Andric       return;
15270b57cec5SDimitry Andric     }
15280b57cec5SDimitry Andric     RatePrimaryRegister(F, BaseReg, Regs, LoserRegs);
15290b57cec5SDimitry Andric     if (isLoser())
15300b57cec5SDimitry Andric       return;
15310b57cec5SDimitry Andric   }
15320b57cec5SDimitry Andric 
15330b57cec5SDimitry Andric   // Determine how many (unfolded) adds we'll need inside the loop.
15340b57cec5SDimitry Andric   size_t NumBaseParts = F.getNumRegs();
15350b57cec5SDimitry Andric   if (NumBaseParts > 1)
15360b57cec5SDimitry Andric     // Do not count the base and a possible second register if the target
15370b57cec5SDimitry Andric     // allows to fold 2 registers.
15380b57cec5SDimitry Andric     C.NumBaseAdds +=
15390b57cec5SDimitry Andric         NumBaseParts - (1 + (F.Scale && isAMCompletelyFolded(*TTI, LU, F)));
15400fca6ea1SDimitry Andric   C.NumBaseAdds += (F.UnfoldedOffset.isNonZero());
15410b57cec5SDimitry Andric 
15420b57cec5SDimitry Andric   // Accumulate non-free scaling amounts.
1543fe6060f1SDimitry Andric   C.ScaleCost += *getScalingFactorCost(*TTI, LU, F, *L).getValue();
15440b57cec5SDimitry Andric 
15450b57cec5SDimitry Andric   // Tally up the non-zero immediates.
15460b57cec5SDimitry Andric   for (const LSRFixup &Fixup : LU.Fixups) {
15470fca6ea1SDimitry Andric     if (Fixup.Offset.isCompatibleImmediate(F.BaseOffset)) {
15480fca6ea1SDimitry Andric       Immediate Offset = Fixup.Offset.addUnsigned(F.BaseOffset);
15490b57cec5SDimitry Andric       if (F.BaseGV)
15500b57cec5SDimitry Andric         C.ImmCost += 64; // Handle symbolic values conservatively.
15510b57cec5SDimitry Andric                          // TODO: This should probably be the pointer size.
15520fca6ea1SDimitry Andric       else if (Offset.isNonZero())
15530fca6ea1SDimitry Andric         C.ImmCost +=
15540fca6ea1SDimitry Andric             APInt(64, Offset.getKnownMinValue(), true).getSignificantBits();
15550b57cec5SDimitry Andric 
15560b57cec5SDimitry Andric       // Check with target if this offset with this instruction is
15570b57cec5SDimitry Andric       // specifically not supported.
15580fca6ea1SDimitry Andric       if (LU.Kind == LSRUse::Address && Offset.isNonZero() &&
15590b57cec5SDimitry Andric           !isAMCompletelyFolded(*TTI, LSRUse::Address, LU.AccessTy, F.BaseGV,
15600b57cec5SDimitry Andric                                 Offset, F.HasBaseReg, F.Scale, Fixup.UserInst))
15610b57cec5SDimitry Andric         C.NumBaseAdds++;
15620fca6ea1SDimitry Andric     } else {
15630fca6ea1SDimitry Andric       // Incompatible immediate type, increase cost to avoid using
15640fca6ea1SDimitry Andric       C.ImmCost += 2048;
15650fca6ea1SDimitry Andric     }
15660b57cec5SDimitry Andric   }
15670b57cec5SDimitry Andric 
15680b57cec5SDimitry Andric   // If we don't count instruction cost exit here.
15690b57cec5SDimitry Andric   if (!InsnsCost) {
15700b57cec5SDimitry Andric     assert(isValid() && "invalid cost");
15710b57cec5SDimitry Andric     return;
15720b57cec5SDimitry Andric   }
15730b57cec5SDimitry Andric 
15740b57cec5SDimitry Andric   // Treat every new register that exceeds TTI.getNumberOfRegisters() - 1 as
15750b57cec5SDimitry Andric   // additional instruction (at least fill).
15768bcb0991SDimitry Andric   // TODO: Need distinguish register class?
15778bcb0991SDimitry Andric   unsigned TTIRegNum = TTI->getNumberOfRegisters(
15788bcb0991SDimitry Andric                        TTI->getRegisterClassForType(false, F.getType())) - 1;
15790b57cec5SDimitry Andric   if (C.NumRegs > TTIRegNum) {
15800b57cec5SDimitry Andric     // Cost already exceeded TTIRegNum, then only newly added register can add
15810b57cec5SDimitry Andric     // new instructions.
15820b57cec5SDimitry Andric     if (PrevNumRegs > TTIRegNum)
15830b57cec5SDimitry Andric       C.Insns += (C.NumRegs - PrevNumRegs);
15840b57cec5SDimitry Andric     else
15850b57cec5SDimitry Andric       C.Insns += (C.NumRegs - TTIRegNum);
15860b57cec5SDimitry Andric   }
15870b57cec5SDimitry Andric 
15880b57cec5SDimitry Andric   // If ICmpZero formula ends with not 0, it could not be replaced by
15890b57cec5SDimitry Andric   // just add or sub. We'll need to compare final result of AddRec.
15900b57cec5SDimitry Andric   // That means we'll need an additional instruction. But if the target can
15910b57cec5SDimitry Andric   // macro-fuse a compare with a branch, don't count this extra instruction.
15920b57cec5SDimitry Andric   // For -10 + {0, +, 1}:
15930b57cec5SDimitry Andric   // i = i + 1;
15940b57cec5SDimitry Andric   // cmp i, 10
15950b57cec5SDimitry Andric   //
15960b57cec5SDimitry Andric   // For {-10, +, 1}:
15970b57cec5SDimitry Andric   // i = i + 1;
15980b57cec5SDimitry Andric   if (LU.Kind == LSRUse::ICmpZero && !F.hasZeroEnd() &&
15990b57cec5SDimitry Andric       !TTI->canMacroFuseCmp())
16000b57cec5SDimitry Andric     C.Insns++;
16010b57cec5SDimitry Andric   // Each new AddRec adds 1 instruction to calculation.
16020b57cec5SDimitry Andric   C.Insns += (C.AddRecCost - PrevAddRecCost);
16030b57cec5SDimitry Andric 
16040b57cec5SDimitry Andric   // BaseAdds adds instructions for unfolded registers.
16050b57cec5SDimitry Andric   if (LU.Kind != LSRUse::ICmpZero)
16060b57cec5SDimitry Andric     C.Insns += C.NumBaseAdds - PrevNumBaseAdds;
16070b57cec5SDimitry Andric   assert(isValid() && "invalid cost");
16080b57cec5SDimitry Andric }
16090b57cec5SDimitry Andric 
16100b57cec5SDimitry Andric /// Set this cost to a losing value.
16110b57cec5SDimitry Andric void Cost::Lose() {
16120b57cec5SDimitry Andric   C.Insns = std::numeric_limits<unsigned>::max();
16130b57cec5SDimitry Andric   C.NumRegs = std::numeric_limits<unsigned>::max();
16140b57cec5SDimitry Andric   C.AddRecCost = std::numeric_limits<unsigned>::max();
16150b57cec5SDimitry Andric   C.NumIVMuls = std::numeric_limits<unsigned>::max();
16160b57cec5SDimitry Andric   C.NumBaseAdds = std::numeric_limits<unsigned>::max();
16170b57cec5SDimitry Andric   C.ImmCost = std::numeric_limits<unsigned>::max();
16180b57cec5SDimitry Andric   C.SetupCost = std::numeric_limits<unsigned>::max();
16190b57cec5SDimitry Andric   C.ScaleCost = std::numeric_limits<unsigned>::max();
16200b57cec5SDimitry Andric }
16210b57cec5SDimitry Andric 
16220b57cec5SDimitry Andric /// Choose the lower cost.
1623bdd1243dSDimitry Andric bool Cost::isLess(const Cost &Other) const {
16240b57cec5SDimitry Andric   if (InsnsCost.getNumOccurrences() > 0 && InsnsCost &&
16250b57cec5SDimitry Andric       C.Insns != Other.C.Insns)
16260b57cec5SDimitry Andric     return C.Insns < Other.C.Insns;
16270b57cec5SDimitry Andric   return TTI->isLSRCostLess(C, Other.C);
16280b57cec5SDimitry Andric }
16290b57cec5SDimitry Andric 
16300b57cec5SDimitry Andric #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
16310b57cec5SDimitry Andric void Cost::print(raw_ostream &OS) const {
16320b57cec5SDimitry Andric   if (InsnsCost)
16330b57cec5SDimitry Andric     OS << C.Insns << " instruction" << (C.Insns == 1 ? " " : "s ");
16340b57cec5SDimitry Andric   OS << C.NumRegs << " reg" << (C.NumRegs == 1 ? "" : "s");
16350b57cec5SDimitry Andric   if (C.AddRecCost != 0)
16360b57cec5SDimitry Andric     OS << ", with addrec cost " << C.AddRecCost;
16370b57cec5SDimitry Andric   if (C.NumIVMuls != 0)
16380b57cec5SDimitry Andric     OS << ", plus " << C.NumIVMuls << " IV mul"
16390b57cec5SDimitry Andric        << (C.NumIVMuls == 1 ? "" : "s");
16400b57cec5SDimitry Andric   if (C.NumBaseAdds != 0)
16410b57cec5SDimitry Andric     OS << ", plus " << C.NumBaseAdds << " base add"
16420b57cec5SDimitry Andric        << (C.NumBaseAdds == 1 ? "" : "s");
16430b57cec5SDimitry Andric   if (C.ScaleCost != 0)
16440b57cec5SDimitry Andric     OS << ", plus " << C.ScaleCost << " scale cost";
16450b57cec5SDimitry Andric   if (C.ImmCost != 0)
16460b57cec5SDimitry Andric     OS << ", plus " << C.ImmCost << " imm cost";
16470b57cec5SDimitry Andric   if (C.SetupCost != 0)
16480b57cec5SDimitry Andric     OS << ", plus " << C.SetupCost << " setup cost";
16490b57cec5SDimitry Andric }
16500b57cec5SDimitry Andric 
16510b57cec5SDimitry Andric LLVM_DUMP_METHOD void Cost::dump() const {
16520b57cec5SDimitry Andric   print(errs()); errs() << '\n';
16530b57cec5SDimitry Andric }
16540b57cec5SDimitry Andric #endif
16550b57cec5SDimitry Andric 
16560b57cec5SDimitry Andric /// Test whether this fixup always uses its value outside of the given loop.
16570b57cec5SDimitry Andric bool LSRFixup::isUseFullyOutsideLoop(const Loop *L) const {
16580b57cec5SDimitry Andric   // PHI nodes use their value in their incoming blocks.
16590b57cec5SDimitry Andric   if (const PHINode *PN = dyn_cast<PHINode>(UserInst)) {
16600b57cec5SDimitry Andric     for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i)
16610b57cec5SDimitry Andric       if (PN->getIncomingValue(i) == OperandValToReplace &&
16620b57cec5SDimitry Andric           L->contains(PN->getIncomingBlock(i)))
16630b57cec5SDimitry Andric         return false;
16640b57cec5SDimitry Andric     return true;
16650b57cec5SDimitry Andric   }
16660b57cec5SDimitry Andric 
16670b57cec5SDimitry Andric   return !L->contains(UserInst);
16680b57cec5SDimitry Andric }
16690b57cec5SDimitry Andric 
16700b57cec5SDimitry Andric #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
16710b57cec5SDimitry Andric void LSRFixup::print(raw_ostream &OS) const {
16720b57cec5SDimitry Andric   OS << "UserInst=";
16730b57cec5SDimitry Andric   // Store is common and interesting enough to be worth special-casing.
16740b57cec5SDimitry Andric   if (StoreInst *Store = dyn_cast<StoreInst>(UserInst)) {
16750b57cec5SDimitry Andric     OS << "store ";
16760b57cec5SDimitry Andric     Store->getOperand(0)->printAsOperand(OS, /*PrintType=*/false);
16770b57cec5SDimitry Andric   } else if (UserInst->getType()->isVoidTy())
16780b57cec5SDimitry Andric     OS << UserInst->getOpcodeName();
16790b57cec5SDimitry Andric   else
16800b57cec5SDimitry Andric     UserInst->printAsOperand(OS, /*PrintType=*/false);
16810b57cec5SDimitry Andric 
16820b57cec5SDimitry Andric   OS << ", OperandValToReplace=";
16830b57cec5SDimitry Andric   OperandValToReplace->printAsOperand(OS, /*PrintType=*/false);
16840b57cec5SDimitry Andric 
16850b57cec5SDimitry Andric   for (const Loop *PIL : PostIncLoops) {
16860b57cec5SDimitry Andric     OS << ", PostIncLoop=";
16870b57cec5SDimitry Andric     PIL->getHeader()->printAsOperand(OS, /*PrintType=*/false);
16880b57cec5SDimitry Andric   }
16890b57cec5SDimitry Andric 
16900fca6ea1SDimitry Andric   if (Offset.isNonZero())
16910b57cec5SDimitry Andric     OS << ", Offset=" << Offset;
16920b57cec5SDimitry Andric }
16930b57cec5SDimitry Andric 
16940b57cec5SDimitry Andric LLVM_DUMP_METHOD void LSRFixup::dump() const {
16950b57cec5SDimitry Andric   print(errs()); errs() << '\n';
16960b57cec5SDimitry Andric }
16970b57cec5SDimitry Andric #endif
16980b57cec5SDimitry Andric 
16990b57cec5SDimitry Andric /// Test whether this use as a formula which has the same registers as the given
17000b57cec5SDimitry Andric /// formula.
17010b57cec5SDimitry Andric bool LSRUse::HasFormulaWithSameRegs(const Formula &F) const {
17020b57cec5SDimitry Andric   SmallVector<const SCEV *, 4> Key = F.BaseRegs;
17030b57cec5SDimitry Andric   if (F.ScaledReg) Key.push_back(F.ScaledReg);
17040b57cec5SDimitry Andric   // Unstable sort by host order ok, because this is only used for uniquifying.
17050b57cec5SDimitry Andric   llvm::sort(Key);
17060b57cec5SDimitry Andric   return Uniquifier.count(Key);
17070b57cec5SDimitry Andric }
17080b57cec5SDimitry Andric 
17090b57cec5SDimitry Andric /// The function returns a probability of selecting formula without Reg.
17100b57cec5SDimitry Andric float LSRUse::getNotSelectedProbability(const SCEV *Reg) const {
17110b57cec5SDimitry Andric   unsigned FNum = 0;
17120b57cec5SDimitry Andric   for (const Formula &F : Formulae)
17130b57cec5SDimitry Andric     if (F.referencesReg(Reg))
17140b57cec5SDimitry Andric       FNum++;
17150b57cec5SDimitry Andric   return ((float)(Formulae.size() - FNum)) / Formulae.size();
17160b57cec5SDimitry Andric }
17170b57cec5SDimitry Andric 
17180b57cec5SDimitry Andric /// If the given formula has not yet been inserted, add it to the list, and
17190b57cec5SDimitry Andric /// return true. Return false otherwise.  The formula must be in canonical form.
17200b57cec5SDimitry Andric bool LSRUse::InsertFormula(const Formula &F, const Loop &L) {
17210b57cec5SDimitry Andric   assert(F.isCanonical(L) && "Invalid canonical representation");
17220b57cec5SDimitry Andric 
17230b57cec5SDimitry Andric   if (!Formulae.empty() && RigidFormula)
17240b57cec5SDimitry Andric     return false;
17250b57cec5SDimitry Andric 
17260b57cec5SDimitry Andric   SmallVector<const SCEV *, 4> Key = F.BaseRegs;
17270b57cec5SDimitry Andric   if (F.ScaledReg) Key.push_back(F.ScaledReg);
17280b57cec5SDimitry Andric   // Unstable sort by host order ok, because this is only used for uniquifying.
17290b57cec5SDimitry Andric   llvm::sort(Key);
17300b57cec5SDimitry Andric 
17310b57cec5SDimitry Andric   if (!Uniquifier.insert(Key).second)
17320b57cec5SDimitry Andric     return false;
17330b57cec5SDimitry Andric 
17340b57cec5SDimitry Andric   // Using a register to hold the value of 0 is not profitable.
17350b57cec5SDimitry Andric   assert((!F.ScaledReg || !F.ScaledReg->isZero()) &&
17360b57cec5SDimitry Andric          "Zero allocated in a scaled register!");
17370b57cec5SDimitry Andric #ifndef NDEBUG
17380b57cec5SDimitry Andric   for (const SCEV *BaseReg : F.BaseRegs)
17390b57cec5SDimitry Andric     assert(!BaseReg->isZero() && "Zero allocated in a base register!");
17400b57cec5SDimitry Andric #endif
17410b57cec5SDimitry Andric 
17420b57cec5SDimitry Andric   // Add the formula to the list.
17430b57cec5SDimitry Andric   Formulae.push_back(F);
17440b57cec5SDimitry Andric 
17450b57cec5SDimitry Andric   // Record registers now being used by this use.
17460b57cec5SDimitry Andric   Regs.insert(F.BaseRegs.begin(), F.BaseRegs.end());
17470b57cec5SDimitry Andric   if (F.ScaledReg)
17480b57cec5SDimitry Andric     Regs.insert(F.ScaledReg);
17490b57cec5SDimitry Andric 
17500b57cec5SDimitry Andric   return true;
17510b57cec5SDimitry Andric }
17520b57cec5SDimitry Andric 
17530b57cec5SDimitry Andric /// Remove the given formula from this use's list.
17540b57cec5SDimitry Andric void LSRUse::DeleteFormula(Formula &F) {
17550b57cec5SDimitry Andric   if (&F != &Formulae.back())
17560b57cec5SDimitry Andric     std::swap(F, Formulae.back());
17570b57cec5SDimitry Andric   Formulae.pop_back();
17580b57cec5SDimitry Andric }
17590b57cec5SDimitry Andric 
17600b57cec5SDimitry Andric /// Recompute the Regs field, and update RegUses.
17610b57cec5SDimitry Andric void LSRUse::RecomputeRegs(size_t LUIdx, RegUseTracker &RegUses) {
17620b57cec5SDimitry Andric   // Now that we've filtered out some formulae, recompute the Regs set.
17630b57cec5SDimitry Andric   SmallPtrSet<const SCEV *, 4> OldRegs = std::move(Regs);
17640b57cec5SDimitry Andric   Regs.clear();
17650b57cec5SDimitry Andric   for (const Formula &F : Formulae) {
17660b57cec5SDimitry Andric     if (F.ScaledReg) Regs.insert(F.ScaledReg);
17670b57cec5SDimitry Andric     Regs.insert(F.BaseRegs.begin(), F.BaseRegs.end());
17680b57cec5SDimitry Andric   }
17690b57cec5SDimitry Andric 
17700b57cec5SDimitry Andric   // Update the RegTracker.
17710b57cec5SDimitry Andric   for (const SCEV *S : OldRegs)
17720b57cec5SDimitry Andric     if (!Regs.count(S))
17730b57cec5SDimitry Andric       RegUses.dropRegister(S, LUIdx);
17740b57cec5SDimitry Andric }
17750b57cec5SDimitry Andric 
17760b57cec5SDimitry Andric #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
17770b57cec5SDimitry Andric void LSRUse::print(raw_ostream &OS) const {
17780b57cec5SDimitry Andric   OS << "LSR Use: Kind=";
17790b57cec5SDimitry Andric   switch (Kind) {
17800b57cec5SDimitry Andric   case Basic:    OS << "Basic"; break;
17810b57cec5SDimitry Andric   case Special:  OS << "Special"; break;
17820b57cec5SDimitry Andric   case ICmpZero: OS << "ICmpZero"; break;
17830b57cec5SDimitry Andric   case Address:
17840b57cec5SDimitry Andric     OS << "Address of ";
17850b57cec5SDimitry Andric     if (AccessTy.MemTy->isPointerTy())
17860b57cec5SDimitry Andric       OS << "pointer"; // the full pointer type could be really verbose
17870b57cec5SDimitry Andric     else {
17880b57cec5SDimitry Andric       OS << *AccessTy.MemTy;
17890b57cec5SDimitry Andric     }
17900b57cec5SDimitry Andric 
17910b57cec5SDimitry Andric     OS << " in addrspace(" << AccessTy.AddrSpace << ')';
17920b57cec5SDimitry Andric   }
17930b57cec5SDimitry Andric 
17940b57cec5SDimitry Andric   OS << ", Offsets={";
17950b57cec5SDimitry Andric   bool NeedComma = false;
17960b57cec5SDimitry Andric   for (const LSRFixup &Fixup : Fixups) {
17970b57cec5SDimitry Andric     if (NeedComma) OS << ',';
17980b57cec5SDimitry Andric     OS << Fixup.Offset;
17990b57cec5SDimitry Andric     NeedComma = true;
18000b57cec5SDimitry Andric   }
18010b57cec5SDimitry Andric   OS << '}';
18020b57cec5SDimitry Andric 
18030b57cec5SDimitry Andric   if (AllFixupsOutsideLoop)
18040b57cec5SDimitry Andric     OS << ", all-fixups-outside-loop";
18050b57cec5SDimitry Andric 
18060b57cec5SDimitry Andric   if (WidestFixupType)
18070b57cec5SDimitry Andric     OS << ", widest fixup type: " << *WidestFixupType;
18080b57cec5SDimitry Andric }
18090b57cec5SDimitry Andric 
18100b57cec5SDimitry Andric LLVM_DUMP_METHOD void LSRUse::dump() const {
18110b57cec5SDimitry Andric   print(errs()); errs() << '\n';
18120b57cec5SDimitry Andric }
18130b57cec5SDimitry Andric #endif
18140b57cec5SDimitry Andric 
18150b57cec5SDimitry Andric static bool isAMCompletelyFolded(const TargetTransformInfo &TTI,
18160b57cec5SDimitry Andric                                  LSRUse::KindType Kind, MemAccessTy AccessTy,
18170fca6ea1SDimitry Andric                                  GlobalValue *BaseGV, Immediate BaseOffset,
18180b57cec5SDimitry Andric                                  bool HasBaseReg, int64_t Scale,
18190b57cec5SDimitry Andric                                  Instruction *Fixup /* = nullptr */) {
18200b57cec5SDimitry Andric   switch (Kind) {
18210fca6ea1SDimitry Andric   case LSRUse::Address: {
18220fca6ea1SDimitry Andric     int64_t FixedOffset =
18230fca6ea1SDimitry Andric         BaseOffset.isScalable() ? 0 : BaseOffset.getFixedValue();
18240fca6ea1SDimitry Andric     int64_t ScalableOffset =
18250fca6ea1SDimitry Andric         BaseOffset.isScalable() ? BaseOffset.getKnownMinValue() : 0;
18260fca6ea1SDimitry Andric     return TTI.isLegalAddressingMode(AccessTy.MemTy, BaseGV, FixedOffset,
18270fca6ea1SDimitry Andric                                      HasBaseReg, Scale, AccessTy.AddrSpace,
18280fca6ea1SDimitry Andric                                      Fixup, ScalableOffset);
18290fca6ea1SDimitry Andric   }
18300b57cec5SDimitry Andric   case LSRUse::ICmpZero:
18310b57cec5SDimitry Andric     // There's not even a target hook for querying whether it would be legal to
18320b57cec5SDimitry Andric     // fold a GV into an ICmp.
18330b57cec5SDimitry Andric     if (BaseGV)
18340b57cec5SDimitry Andric       return false;
18350b57cec5SDimitry Andric 
18360b57cec5SDimitry Andric     // ICmp only has two operands; don't allow more than two non-trivial parts.
18370fca6ea1SDimitry Andric     if (Scale != 0 && HasBaseReg && BaseOffset.isNonZero())
18380b57cec5SDimitry Andric       return false;
18390b57cec5SDimitry Andric 
18400b57cec5SDimitry Andric     // ICmp only supports no scale or a -1 scale, as we can "fold" a -1 scale by
18410b57cec5SDimitry Andric     // putting the scaled register in the other operand of the icmp.
18420b57cec5SDimitry Andric     if (Scale != 0 && Scale != -1)
18430b57cec5SDimitry Andric       return false;
18440b57cec5SDimitry Andric 
18450b57cec5SDimitry Andric     // If we have low-level target information, ask the target if it can fold an
18460b57cec5SDimitry Andric     // integer immediate on an icmp.
18470fca6ea1SDimitry Andric     if (BaseOffset.isNonZero()) {
18480fca6ea1SDimitry Andric       // We don't have an interface to query whether the target supports
18490fca6ea1SDimitry Andric       // icmpzero against scalable quantities yet.
18500fca6ea1SDimitry Andric       if (BaseOffset.isScalable())
18510fca6ea1SDimitry Andric         return false;
18520fca6ea1SDimitry Andric 
18530b57cec5SDimitry Andric       // We have one of:
18540b57cec5SDimitry Andric       // ICmpZero     BaseReg + BaseOffset => ICmp BaseReg, -BaseOffset
18550b57cec5SDimitry Andric       // ICmpZero -1*ScaleReg + BaseOffset => ICmp ScaleReg, BaseOffset
18560b57cec5SDimitry Andric       // Offs is the ICmp immediate.
18570b57cec5SDimitry Andric       if (Scale == 0)
18580b57cec5SDimitry Andric         // The cast does the right thing with
18590b57cec5SDimitry Andric         // std::numeric_limits<int64_t>::min().
18600fca6ea1SDimitry Andric         BaseOffset = BaseOffset.getFixed(-(uint64_t)BaseOffset.getFixedValue());
18610fca6ea1SDimitry Andric       return TTI.isLegalICmpImmediate(BaseOffset.getFixedValue());
18620b57cec5SDimitry Andric     }
18630b57cec5SDimitry Andric 
18640b57cec5SDimitry Andric     // ICmpZero BaseReg + -1*ScaleReg => ICmp BaseReg, ScaleReg
18650b57cec5SDimitry Andric     return true;
18660b57cec5SDimitry Andric 
18670b57cec5SDimitry Andric   case LSRUse::Basic:
18680b57cec5SDimitry Andric     // Only handle single-register values.
18690fca6ea1SDimitry Andric     return !BaseGV && Scale == 0 && BaseOffset.isZero();
18700b57cec5SDimitry Andric 
18710b57cec5SDimitry Andric   case LSRUse::Special:
18720b57cec5SDimitry Andric     // Special case Basic to handle -1 scales.
18730fca6ea1SDimitry Andric     return !BaseGV && (Scale == 0 || Scale == -1) && BaseOffset.isZero();
18740b57cec5SDimitry Andric   }
18750b57cec5SDimitry Andric 
18760b57cec5SDimitry Andric   llvm_unreachable("Invalid LSRUse Kind!");
18770b57cec5SDimitry Andric }
18780b57cec5SDimitry Andric 
18790b57cec5SDimitry Andric static bool isAMCompletelyFolded(const TargetTransformInfo &TTI,
18800fca6ea1SDimitry Andric                                  Immediate MinOffset, Immediate MaxOffset,
18810b57cec5SDimitry Andric                                  LSRUse::KindType Kind, MemAccessTy AccessTy,
18820fca6ea1SDimitry Andric                                  GlobalValue *BaseGV, Immediate BaseOffset,
18830b57cec5SDimitry Andric                                  bool HasBaseReg, int64_t Scale) {
18840fca6ea1SDimitry Andric   if (BaseOffset.isNonZero() &&
18850fca6ea1SDimitry Andric       (BaseOffset.isScalable() != MinOffset.isScalable() ||
18860fca6ea1SDimitry Andric        BaseOffset.isScalable() != MaxOffset.isScalable()))
18870fca6ea1SDimitry Andric     return false;
18880b57cec5SDimitry Andric   // Check for overflow.
18890fca6ea1SDimitry Andric   int64_t Base = BaseOffset.getKnownMinValue();
18900fca6ea1SDimitry Andric   int64_t Min = MinOffset.getKnownMinValue();
18910fca6ea1SDimitry Andric   int64_t Max = MaxOffset.getKnownMinValue();
18920fca6ea1SDimitry Andric   if (((int64_t)((uint64_t)Base + Min) > Base) != (Min > 0))
18930b57cec5SDimitry Andric     return false;
18940fca6ea1SDimitry Andric   MinOffset = Immediate::get((uint64_t)Base + Min, MinOffset.isScalable());
18950fca6ea1SDimitry Andric   if (((int64_t)((uint64_t)Base + Max) > Base) != (Max > 0))
18960b57cec5SDimitry Andric     return false;
18970fca6ea1SDimitry Andric   MaxOffset = Immediate::get((uint64_t)Base + Max, MaxOffset.isScalable());
18980b57cec5SDimitry Andric 
18990b57cec5SDimitry Andric   return isAMCompletelyFolded(TTI, Kind, AccessTy, BaseGV, MinOffset,
19000b57cec5SDimitry Andric                               HasBaseReg, Scale) &&
19010b57cec5SDimitry Andric          isAMCompletelyFolded(TTI, Kind, AccessTy, BaseGV, MaxOffset,
19020b57cec5SDimitry Andric                               HasBaseReg, Scale);
19030b57cec5SDimitry Andric }
19040b57cec5SDimitry Andric 
19050b57cec5SDimitry Andric static bool isAMCompletelyFolded(const TargetTransformInfo &TTI,
19060fca6ea1SDimitry Andric                                  Immediate MinOffset, Immediate MaxOffset,
19070b57cec5SDimitry Andric                                  LSRUse::KindType Kind, MemAccessTy AccessTy,
19080b57cec5SDimitry Andric                                  const Formula &F, const Loop &L) {
19090b57cec5SDimitry Andric   // For the purpose of isAMCompletelyFolded either having a canonical formula
19100b57cec5SDimitry Andric   // or a scale not equal to zero is correct.
19110b57cec5SDimitry Andric   // Problems may arise from non canonical formulae having a scale == 0.
19120b57cec5SDimitry Andric   // Strictly speaking it would best to just rely on canonical formulae.
19130b57cec5SDimitry Andric   // However, when we generate the scaled formulae, we first check that the
19140b57cec5SDimitry Andric   // scaling factor is profitable before computing the actual ScaledReg for
19150b57cec5SDimitry Andric   // compile time sake.
19160b57cec5SDimitry Andric   assert((F.isCanonical(L) || F.Scale != 0));
19170b57cec5SDimitry Andric   return isAMCompletelyFolded(TTI, MinOffset, MaxOffset, Kind, AccessTy,
19180b57cec5SDimitry Andric                               F.BaseGV, F.BaseOffset, F.HasBaseReg, F.Scale);
19190b57cec5SDimitry Andric }
19200b57cec5SDimitry Andric 
19210b57cec5SDimitry Andric /// Test whether we know how to expand the current formula.
19220fca6ea1SDimitry Andric static bool isLegalUse(const TargetTransformInfo &TTI, Immediate MinOffset,
19230fca6ea1SDimitry Andric                        Immediate MaxOffset, LSRUse::KindType Kind,
19240b57cec5SDimitry Andric                        MemAccessTy AccessTy, GlobalValue *BaseGV,
19250fca6ea1SDimitry Andric                        Immediate BaseOffset, bool HasBaseReg, int64_t Scale) {
19260b57cec5SDimitry Andric   // We know how to expand completely foldable formulae.
19270b57cec5SDimitry Andric   return isAMCompletelyFolded(TTI, MinOffset, MaxOffset, Kind, AccessTy, BaseGV,
19280b57cec5SDimitry Andric                               BaseOffset, HasBaseReg, Scale) ||
19290b57cec5SDimitry Andric          // Or formulae that use a base register produced by a sum of base
19300b57cec5SDimitry Andric          // registers.
19310b57cec5SDimitry Andric          (Scale == 1 &&
19320b57cec5SDimitry Andric           isAMCompletelyFolded(TTI, MinOffset, MaxOffset, Kind, AccessTy,
19330b57cec5SDimitry Andric                                BaseGV, BaseOffset, true, 0));
19340b57cec5SDimitry Andric }
19350b57cec5SDimitry Andric 
19360fca6ea1SDimitry Andric static bool isLegalUse(const TargetTransformInfo &TTI, Immediate MinOffset,
19370fca6ea1SDimitry Andric                        Immediate MaxOffset, LSRUse::KindType Kind,
19380b57cec5SDimitry Andric                        MemAccessTy AccessTy, const Formula &F) {
19390b57cec5SDimitry Andric   return isLegalUse(TTI, MinOffset, MaxOffset, Kind, AccessTy, F.BaseGV,
19400b57cec5SDimitry Andric                     F.BaseOffset, F.HasBaseReg, F.Scale);
19410b57cec5SDimitry Andric }
19420b57cec5SDimitry Andric 
19430fca6ea1SDimitry Andric static bool isLegalAddImmediate(const TargetTransformInfo &TTI,
19440fca6ea1SDimitry Andric                                 Immediate Offset) {
19450fca6ea1SDimitry Andric   if (Offset.isScalable())
19460fca6ea1SDimitry Andric     return TTI.isLegalAddScalableImmediate(Offset.getKnownMinValue());
19470fca6ea1SDimitry Andric 
19480fca6ea1SDimitry Andric   return TTI.isLegalAddImmediate(Offset.getFixedValue());
19490fca6ea1SDimitry Andric }
19500fca6ea1SDimitry Andric 
19510b57cec5SDimitry Andric static bool isAMCompletelyFolded(const TargetTransformInfo &TTI,
19520b57cec5SDimitry Andric                                  const LSRUse &LU, const Formula &F) {
19530b57cec5SDimitry Andric   // Target may want to look at the user instructions.
19540b57cec5SDimitry Andric   if (LU.Kind == LSRUse::Address && TTI.LSRWithInstrQueries()) {
19550b57cec5SDimitry Andric     for (const LSRFixup &Fixup : LU.Fixups)
19560b57cec5SDimitry Andric       if (!isAMCompletelyFolded(TTI, LSRUse::Address, LU.AccessTy, F.BaseGV,
19570b57cec5SDimitry Andric                                 (F.BaseOffset + Fixup.Offset), F.HasBaseReg,
19580b57cec5SDimitry Andric                                 F.Scale, Fixup.UserInst))
19590b57cec5SDimitry Andric         return false;
19600b57cec5SDimitry Andric     return true;
19610b57cec5SDimitry Andric   }
19620b57cec5SDimitry Andric 
19630b57cec5SDimitry Andric   return isAMCompletelyFolded(TTI, LU.MinOffset, LU.MaxOffset, LU.Kind,
19640b57cec5SDimitry Andric                               LU.AccessTy, F.BaseGV, F.BaseOffset, F.HasBaseReg,
19650b57cec5SDimitry Andric                               F.Scale);
19660b57cec5SDimitry Andric }
19670b57cec5SDimitry Andric 
1968fe6060f1SDimitry Andric static InstructionCost getScalingFactorCost(const TargetTransformInfo &TTI,
19690b57cec5SDimitry Andric                                             const LSRUse &LU, const Formula &F,
19700b57cec5SDimitry Andric                                             const Loop &L) {
19710b57cec5SDimitry Andric   if (!F.Scale)
19720b57cec5SDimitry Andric     return 0;
19730b57cec5SDimitry Andric 
19740b57cec5SDimitry Andric   // If the use is not completely folded in that instruction, we will have to
19750b57cec5SDimitry Andric   // pay an extra cost only for scale != 1.
19760b57cec5SDimitry Andric   if (!isAMCompletelyFolded(TTI, LU.MinOffset, LU.MaxOffset, LU.Kind,
19770b57cec5SDimitry Andric                             LU.AccessTy, F, L))
19780b57cec5SDimitry Andric     return F.Scale != 1;
19790b57cec5SDimitry Andric 
19800b57cec5SDimitry Andric   switch (LU.Kind) {
19810b57cec5SDimitry Andric   case LSRUse::Address: {
19820b57cec5SDimitry Andric     // Check the scaling factor cost with both the min and max offsets.
19830fca6ea1SDimitry Andric     int64_t ScalableMin = 0, ScalableMax = 0, FixedMin = 0, FixedMax = 0;
19840fca6ea1SDimitry Andric     if (F.BaseOffset.isScalable()) {
19850fca6ea1SDimitry Andric       ScalableMin = (F.BaseOffset + LU.MinOffset).getKnownMinValue();
19860fca6ea1SDimitry Andric       ScalableMax = (F.BaseOffset + LU.MaxOffset).getKnownMinValue();
19870fca6ea1SDimitry Andric     } else {
19880fca6ea1SDimitry Andric       FixedMin = (F.BaseOffset + LU.MinOffset).getFixedValue();
19890fca6ea1SDimitry Andric       FixedMax = (F.BaseOffset + LU.MaxOffset).getFixedValue();
19900fca6ea1SDimitry Andric     }
1991fe6060f1SDimitry Andric     InstructionCost ScaleCostMinOffset = TTI.getScalingFactorCost(
19920fca6ea1SDimitry Andric         LU.AccessTy.MemTy, F.BaseGV, StackOffset::get(FixedMin, ScalableMin),
19930fca6ea1SDimitry Andric         F.HasBaseReg, F.Scale, LU.AccessTy.AddrSpace);
1994fe6060f1SDimitry Andric     InstructionCost ScaleCostMaxOffset = TTI.getScalingFactorCost(
19950fca6ea1SDimitry Andric         LU.AccessTy.MemTy, F.BaseGV, StackOffset::get(FixedMax, ScalableMax),
19960fca6ea1SDimitry Andric         F.HasBaseReg, F.Scale, LU.AccessTy.AddrSpace);
19970b57cec5SDimitry Andric 
1998fe6060f1SDimitry Andric     assert(ScaleCostMinOffset.isValid() && ScaleCostMaxOffset.isValid() &&
19990b57cec5SDimitry Andric            "Legal addressing mode has an illegal cost!");
20000b57cec5SDimitry Andric     return std::max(ScaleCostMinOffset, ScaleCostMaxOffset);
20010b57cec5SDimitry Andric   }
20020b57cec5SDimitry Andric   case LSRUse::ICmpZero:
20030b57cec5SDimitry Andric   case LSRUse::Basic:
20040b57cec5SDimitry Andric   case LSRUse::Special:
20050b57cec5SDimitry Andric     // The use is completely folded, i.e., everything is folded into the
20060b57cec5SDimitry Andric     // instruction.
20070b57cec5SDimitry Andric     return 0;
20080b57cec5SDimitry Andric   }
20090b57cec5SDimitry Andric 
20100b57cec5SDimitry Andric   llvm_unreachable("Invalid LSRUse Kind!");
20110b57cec5SDimitry Andric }
20120b57cec5SDimitry Andric 
20130b57cec5SDimitry Andric static bool isAlwaysFoldable(const TargetTransformInfo &TTI,
20140b57cec5SDimitry Andric                              LSRUse::KindType Kind, MemAccessTy AccessTy,
20150fca6ea1SDimitry Andric                              GlobalValue *BaseGV, Immediate BaseOffset,
20160b57cec5SDimitry Andric                              bool HasBaseReg) {
20170b57cec5SDimitry Andric   // Fast-path: zero is always foldable.
20180fca6ea1SDimitry Andric   if (BaseOffset.isZero() && !BaseGV)
20190fca6ea1SDimitry Andric     return true;
20200b57cec5SDimitry Andric 
20210b57cec5SDimitry Andric   // Conservatively, create an address with an immediate and a
20220b57cec5SDimitry Andric   // base and a scale.
20230b57cec5SDimitry Andric   int64_t Scale = Kind == LSRUse::ICmpZero ? -1 : 1;
20240b57cec5SDimitry Andric 
20250b57cec5SDimitry Andric   // Canonicalize a scale of 1 to a base register if the formula doesn't
20260b57cec5SDimitry Andric   // already have a base register.
20270b57cec5SDimitry Andric   if (!HasBaseReg && Scale == 1) {
20280b57cec5SDimitry Andric     Scale = 0;
20290b57cec5SDimitry Andric     HasBaseReg = true;
20300b57cec5SDimitry Andric   }
20310b57cec5SDimitry Andric 
20320fca6ea1SDimitry Andric   // FIXME: Try with + without a scale? Maybe based on TTI?
20330fca6ea1SDimitry Andric   // I think basereg + scaledreg + immediateoffset isn't a good 'conservative'
20340fca6ea1SDimitry Andric   // default for many architectures, not just AArch64 SVE. More investigation
20350fca6ea1SDimitry Andric   // needed later to determine if this should be used more widely than just
20360fca6ea1SDimitry Andric   // on scalable types.
20370fca6ea1SDimitry Andric   if (HasBaseReg && BaseOffset.isNonZero() && Kind != LSRUse::ICmpZero &&
20380fca6ea1SDimitry Andric       AccessTy.MemTy && AccessTy.MemTy->isScalableTy() && DropScaledForVScale)
20390fca6ea1SDimitry Andric     Scale = 0;
20400fca6ea1SDimitry Andric 
20410b57cec5SDimitry Andric   return isAMCompletelyFolded(TTI, Kind, AccessTy, BaseGV, BaseOffset,
20420b57cec5SDimitry Andric                               HasBaseReg, Scale);
20430b57cec5SDimitry Andric }
20440b57cec5SDimitry Andric 
20450b57cec5SDimitry Andric static bool isAlwaysFoldable(const TargetTransformInfo &TTI,
20460fca6ea1SDimitry Andric                              ScalarEvolution &SE, Immediate MinOffset,
20470fca6ea1SDimitry Andric                              Immediate MaxOffset, LSRUse::KindType Kind,
20480b57cec5SDimitry Andric                              MemAccessTy AccessTy, const SCEV *S,
20490b57cec5SDimitry Andric                              bool HasBaseReg) {
20500b57cec5SDimitry Andric   // Fast-path: zero is always foldable.
20510b57cec5SDimitry Andric   if (S->isZero()) return true;
20520b57cec5SDimitry Andric 
20530b57cec5SDimitry Andric   // Conservatively, create an address with an immediate and a
20540b57cec5SDimitry Andric   // base and a scale.
20550fca6ea1SDimitry Andric   Immediate BaseOffset = ExtractImmediate(S, SE);
20560b57cec5SDimitry Andric   GlobalValue *BaseGV = ExtractSymbol(S, SE);
20570b57cec5SDimitry Andric 
20580b57cec5SDimitry Andric   // If there's anything else involved, it's not foldable.
20590b57cec5SDimitry Andric   if (!S->isZero()) return false;
20600b57cec5SDimitry Andric 
20610b57cec5SDimitry Andric   // Fast-path: zero is always foldable.
20620fca6ea1SDimitry Andric   if (BaseOffset.isZero() && !BaseGV)
20630fca6ea1SDimitry Andric     return true;
20640fca6ea1SDimitry Andric 
20650fca6ea1SDimitry Andric   if (BaseOffset.isScalable())
20660fca6ea1SDimitry Andric     return false;
20670b57cec5SDimitry Andric 
20680b57cec5SDimitry Andric   // Conservatively, create an address with an immediate and a
20690b57cec5SDimitry Andric   // base and a scale.
20700b57cec5SDimitry Andric   int64_t Scale = Kind == LSRUse::ICmpZero ? -1 : 1;
20710b57cec5SDimitry Andric 
20720b57cec5SDimitry Andric   return isAMCompletelyFolded(TTI, MinOffset, MaxOffset, Kind, AccessTy, BaseGV,
20730b57cec5SDimitry Andric                               BaseOffset, HasBaseReg, Scale);
20740b57cec5SDimitry Andric }
20750b57cec5SDimitry Andric 
20760b57cec5SDimitry Andric namespace {
20770b57cec5SDimitry Andric 
20780b57cec5SDimitry Andric /// An individual increment in a Chain of IV increments.  Relate an IV user to
20790b57cec5SDimitry Andric /// an expression that computes the IV it uses from the IV used by the previous
20800b57cec5SDimitry Andric /// link in the Chain.
20810b57cec5SDimitry Andric ///
20820b57cec5SDimitry Andric /// For the head of a chain, IncExpr holds the absolute SCEV expression for the
20830b57cec5SDimitry Andric /// original IVOperand. The head of the chain's IVOperand is only valid during
20840b57cec5SDimitry Andric /// chain collection, before LSR replaces IV users. During chain generation,
20850b57cec5SDimitry Andric /// IncExpr can be used to find the new IVOperand that computes the same
20860b57cec5SDimitry Andric /// expression.
20870b57cec5SDimitry Andric struct IVInc {
20880b57cec5SDimitry Andric   Instruction *UserInst;
20890b57cec5SDimitry Andric   Value* IVOperand;
20900b57cec5SDimitry Andric   const SCEV *IncExpr;
20910b57cec5SDimitry Andric 
20920b57cec5SDimitry Andric   IVInc(Instruction *U, Value *O, const SCEV *E)
20930b57cec5SDimitry Andric       : UserInst(U), IVOperand(O), IncExpr(E) {}
20940b57cec5SDimitry Andric };
20950b57cec5SDimitry Andric 
20960b57cec5SDimitry Andric // The list of IV increments in program order.  We typically add the head of a
20970b57cec5SDimitry Andric // chain without finding subsequent links.
20980b57cec5SDimitry Andric struct IVChain {
20990b57cec5SDimitry Andric   SmallVector<IVInc, 1> Incs;
21000b57cec5SDimitry Andric   const SCEV *ExprBase = nullptr;
21010b57cec5SDimitry Andric 
21020b57cec5SDimitry Andric   IVChain() = default;
21030b57cec5SDimitry Andric   IVChain(const IVInc &Head, const SCEV *Base)
21040b57cec5SDimitry Andric       : Incs(1, Head), ExprBase(Base) {}
21050b57cec5SDimitry Andric 
21060b57cec5SDimitry Andric   using const_iterator = SmallVectorImpl<IVInc>::const_iterator;
21070b57cec5SDimitry Andric 
21080b57cec5SDimitry Andric   // Return the first increment in the chain.
21090b57cec5SDimitry Andric   const_iterator begin() const {
21100b57cec5SDimitry Andric     assert(!Incs.empty());
21110b57cec5SDimitry Andric     return std::next(Incs.begin());
21120b57cec5SDimitry Andric   }
21130b57cec5SDimitry Andric   const_iterator end() const {
21140b57cec5SDimitry Andric     return Incs.end();
21150b57cec5SDimitry Andric   }
21160b57cec5SDimitry Andric 
21170b57cec5SDimitry Andric   // Returns true if this chain contains any increments.
21180b57cec5SDimitry Andric   bool hasIncs() const { return Incs.size() >= 2; }
21190b57cec5SDimitry Andric 
21200b57cec5SDimitry Andric   // Add an IVInc to the end of this chain.
21210b57cec5SDimitry Andric   void add(const IVInc &X) { Incs.push_back(X); }
21220b57cec5SDimitry Andric 
21230b57cec5SDimitry Andric   // Returns the last UserInst in the chain.
21240b57cec5SDimitry Andric   Instruction *tailUserInst() const { return Incs.back().UserInst; }
21250b57cec5SDimitry Andric 
21260b57cec5SDimitry Andric   // Returns true if IncExpr can be profitably added to this chain.
21270b57cec5SDimitry Andric   bool isProfitableIncrement(const SCEV *OperExpr,
21280b57cec5SDimitry Andric                              const SCEV *IncExpr,
21290b57cec5SDimitry Andric                              ScalarEvolution&);
21300b57cec5SDimitry Andric };
21310b57cec5SDimitry Andric 
21320b57cec5SDimitry Andric /// Helper for CollectChains to track multiple IV increment uses.  Distinguish
21330b57cec5SDimitry Andric /// between FarUsers that definitely cross IV increments and NearUsers that may
21340b57cec5SDimitry Andric /// be used between IV increments.
21350b57cec5SDimitry Andric struct ChainUsers {
21360b57cec5SDimitry Andric   SmallPtrSet<Instruction*, 4> FarUsers;
21370b57cec5SDimitry Andric   SmallPtrSet<Instruction*, 4> NearUsers;
21380b57cec5SDimitry Andric };
21390b57cec5SDimitry Andric 
21400b57cec5SDimitry Andric /// This class holds state for the main loop strength reduction logic.
21410b57cec5SDimitry Andric class LSRInstance {
21420b57cec5SDimitry Andric   IVUsers &IU;
21430b57cec5SDimitry Andric   ScalarEvolution &SE;
21440b57cec5SDimitry Andric   DominatorTree &DT;
21450b57cec5SDimitry Andric   LoopInfo &LI;
21460b57cec5SDimitry Andric   AssumptionCache &AC;
21475ffd83dbSDimitry Andric   TargetLibraryInfo &TLI;
21480b57cec5SDimitry Andric   const TargetTransformInfo &TTI;
21490b57cec5SDimitry Andric   Loop *const L;
21505ffd83dbSDimitry Andric   MemorySSAUpdater *MSSAU;
2151fe6060f1SDimitry Andric   TTI::AddressingModeKind AMK;
2152fcaf7f86SDimitry Andric   mutable SCEVExpander Rewriter;
21530b57cec5SDimitry Andric   bool Changed = false;
21540b57cec5SDimitry Andric 
21550b57cec5SDimitry Andric   /// This is the insert position that the current loop's induction variable
21560b57cec5SDimitry Andric   /// increment should be placed. In simple loops, this is the latch block's
21570b57cec5SDimitry Andric   /// terminator. But in more complicated cases, this is a position which will
21580b57cec5SDimitry Andric   /// dominate all the in-loop post-increment users.
21590b57cec5SDimitry Andric   Instruction *IVIncInsertPos = nullptr;
21600b57cec5SDimitry Andric 
21610b57cec5SDimitry Andric   /// Interesting factors between use strides.
21620b57cec5SDimitry Andric   ///
21630b57cec5SDimitry Andric   /// We explicitly use a SetVector which contains a SmallSet, instead of the
21640b57cec5SDimitry Andric   /// default, a SmallDenseSet, because we need to use the full range of
21650b57cec5SDimitry Andric   /// int64_ts, and there's currently no good way of doing that with
21660b57cec5SDimitry Andric   /// SmallDenseSet.
21670b57cec5SDimitry Andric   SetVector<int64_t, SmallVector<int64_t, 8>, SmallSet<int64_t, 8>> Factors;
21680b57cec5SDimitry Andric 
2169bdd1243dSDimitry Andric   /// The cost of the current SCEV, the best solution by LSR will be dropped if
2170bdd1243dSDimitry Andric   /// the solution is not profitable.
2171bdd1243dSDimitry Andric   Cost BaselineCost;
2172bdd1243dSDimitry Andric 
21730b57cec5SDimitry Andric   /// Interesting use types, to facilitate truncation reuse.
21740b57cec5SDimitry Andric   SmallSetVector<Type *, 4> Types;
21750b57cec5SDimitry Andric 
21760b57cec5SDimitry Andric   /// The list of interesting uses.
21770b57cec5SDimitry Andric   mutable SmallVector<LSRUse, 16> Uses;
21780b57cec5SDimitry Andric 
21790b57cec5SDimitry Andric   /// Track which uses use which register candidates.
21800b57cec5SDimitry Andric   RegUseTracker RegUses;
21810b57cec5SDimitry Andric 
21820b57cec5SDimitry Andric   // Limit the number of chains to avoid quadratic behavior. We don't expect to
21830b57cec5SDimitry Andric   // have more than a few IV increment chains in a loop. Missing a Chain falls
21840b57cec5SDimitry Andric   // back to normal LSR behavior for those uses.
21850b57cec5SDimitry Andric   static const unsigned MaxChains = 8;
21860b57cec5SDimitry Andric 
21870b57cec5SDimitry Andric   /// IV users can form a chain of IV increments.
21880b57cec5SDimitry Andric   SmallVector<IVChain, MaxChains> IVChainVec;
21890b57cec5SDimitry Andric 
21900b57cec5SDimitry Andric   /// IV users that belong to profitable IVChains.
21910b57cec5SDimitry Andric   SmallPtrSet<Use*, MaxChains> IVIncSet;
21920b57cec5SDimitry Andric 
2193fe6060f1SDimitry Andric   /// Induction variables that were generated and inserted by the SCEV Expander.
2194fe6060f1SDimitry Andric   SmallVector<llvm::WeakVH, 2> ScalarEvolutionIVs;
2195fe6060f1SDimitry Andric 
21960b57cec5SDimitry Andric   void OptimizeShadowIV();
21970b57cec5SDimitry Andric   bool FindIVUserForCond(ICmpInst *Cond, IVStrideUse *&CondUse);
21980b57cec5SDimitry Andric   ICmpInst *OptimizeMax(ICmpInst *Cond, IVStrideUse* &CondUse);
21990b57cec5SDimitry Andric   void OptimizeLoopTermCond();
22000b57cec5SDimitry Andric 
22010b57cec5SDimitry Andric   void ChainInstruction(Instruction *UserInst, Instruction *IVOper,
22020b57cec5SDimitry Andric                         SmallVectorImpl<ChainUsers> &ChainUsersVec);
22030b57cec5SDimitry Andric   void FinalizeChain(IVChain &Chain);
22040b57cec5SDimitry Andric   void CollectChains();
2205fcaf7f86SDimitry Andric   void GenerateIVChain(const IVChain &Chain,
22060b57cec5SDimitry Andric                        SmallVectorImpl<WeakTrackingVH> &DeadInsts);
22070b57cec5SDimitry Andric 
22080b57cec5SDimitry Andric   void CollectInterestingTypesAndFactors();
22090b57cec5SDimitry Andric   void CollectFixupsAndInitialFormulae();
22100b57cec5SDimitry Andric 
22110b57cec5SDimitry Andric   // Support for sharing of LSRUses between LSRFixups.
22120b57cec5SDimitry Andric   using UseMapTy = DenseMap<LSRUse::SCEVUseKindPair, size_t>;
22130b57cec5SDimitry Andric   UseMapTy UseMap;
22140b57cec5SDimitry Andric 
22150fca6ea1SDimitry Andric   bool reconcileNewOffset(LSRUse &LU, Immediate NewOffset, bool HasBaseReg,
22160b57cec5SDimitry Andric                           LSRUse::KindType Kind, MemAccessTy AccessTy);
22170b57cec5SDimitry Andric 
22180fca6ea1SDimitry Andric   std::pair<size_t, Immediate> getUse(const SCEV *&Expr, LSRUse::KindType Kind,
22190b57cec5SDimitry Andric                                       MemAccessTy AccessTy);
22200b57cec5SDimitry Andric 
22210b57cec5SDimitry Andric   void DeleteUse(LSRUse &LU, size_t LUIdx);
22220b57cec5SDimitry Andric 
22230b57cec5SDimitry Andric   LSRUse *FindUseWithSimilarFormula(const Formula &F, const LSRUse &OrigLU);
22240b57cec5SDimitry Andric 
22250b57cec5SDimitry Andric   void InsertInitialFormula(const SCEV *S, LSRUse &LU, size_t LUIdx);
22260b57cec5SDimitry Andric   void InsertSupplementalFormula(const SCEV *S, LSRUse &LU, size_t LUIdx);
22270b57cec5SDimitry Andric   void CountRegisters(const Formula &F, size_t LUIdx);
22280b57cec5SDimitry Andric   bool InsertFormula(LSRUse &LU, unsigned LUIdx, const Formula &F);
22290b57cec5SDimitry Andric 
22300b57cec5SDimitry Andric   void CollectLoopInvariantFixupsAndFormulae();
22310b57cec5SDimitry Andric 
22320b57cec5SDimitry Andric   void GenerateReassociations(LSRUse &LU, unsigned LUIdx, Formula Base,
22330b57cec5SDimitry Andric                               unsigned Depth = 0);
22340b57cec5SDimitry Andric 
22350b57cec5SDimitry Andric   void GenerateReassociationsImpl(LSRUse &LU, unsigned LUIdx,
22360b57cec5SDimitry Andric                                   const Formula &Base, unsigned Depth,
22370b57cec5SDimitry Andric                                   size_t Idx, bool IsScaledReg = false);
22380b57cec5SDimitry Andric   void GenerateCombinations(LSRUse &LU, unsigned LUIdx, Formula Base);
22390b57cec5SDimitry Andric   void GenerateSymbolicOffsetsImpl(LSRUse &LU, unsigned LUIdx,
22400b57cec5SDimitry Andric                                    const Formula &Base, size_t Idx,
22410b57cec5SDimitry Andric                                    bool IsScaledReg = false);
22420b57cec5SDimitry Andric   void GenerateSymbolicOffsets(LSRUse &LU, unsigned LUIdx, Formula Base);
22430b57cec5SDimitry Andric   void GenerateConstantOffsetsImpl(LSRUse &LU, unsigned LUIdx,
22440b57cec5SDimitry Andric                                    const Formula &Base,
22450fca6ea1SDimitry Andric                                    const SmallVectorImpl<Immediate> &Worklist,
22460b57cec5SDimitry Andric                                    size_t Idx, bool IsScaledReg = false);
22470b57cec5SDimitry Andric   void GenerateConstantOffsets(LSRUse &LU, unsigned LUIdx, Formula Base);
22480b57cec5SDimitry Andric   void GenerateICmpZeroScales(LSRUse &LU, unsigned LUIdx, Formula Base);
22490b57cec5SDimitry Andric   void GenerateScales(LSRUse &LU, unsigned LUIdx, Formula Base);
22500b57cec5SDimitry Andric   void GenerateTruncates(LSRUse &LU, unsigned LUIdx, Formula Base);
22510b57cec5SDimitry Andric   void GenerateCrossUseConstantOffsets();
22520b57cec5SDimitry Andric   void GenerateAllReuseFormulae();
22530b57cec5SDimitry Andric 
22540b57cec5SDimitry Andric   void FilterOutUndesirableDedicatedRegisters();
22550b57cec5SDimitry Andric 
22560b57cec5SDimitry Andric   size_t EstimateSearchSpaceComplexity() const;
22570b57cec5SDimitry Andric   void NarrowSearchSpaceByDetectingSupersets();
22580b57cec5SDimitry Andric   void NarrowSearchSpaceByCollapsingUnrolledCode();
22590b57cec5SDimitry Andric   void NarrowSearchSpaceByRefilteringUndesirableDedicatedRegisters();
22600b57cec5SDimitry Andric   void NarrowSearchSpaceByFilterFormulaWithSameScaledReg();
22615ffd83dbSDimitry Andric   void NarrowSearchSpaceByFilterPostInc();
22620b57cec5SDimitry Andric   void NarrowSearchSpaceByDeletingCostlyFormulas();
22630b57cec5SDimitry Andric   void NarrowSearchSpaceByPickingWinnerRegs();
22640b57cec5SDimitry Andric   void NarrowSearchSpaceUsingHeuristics();
22650b57cec5SDimitry Andric 
22660b57cec5SDimitry Andric   void SolveRecurse(SmallVectorImpl<const Formula *> &Solution,
22670b57cec5SDimitry Andric                     Cost &SolutionCost,
22680b57cec5SDimitry Andric                     SmallVectorImpl<const Formula *> &Workspace,
22690b57cec5SDimitry Andric                     const Cost &CurCost,
22700b57cec5SDimitry Andric                     const SmallPtrSet<const SCEV *, 16> &CurRegs,
22710b57cec5SDimitry Andric                     DenseSet<const SCEV *> &VisitedRegs) const;
22720b57cec5SDimitry Andric   void Solve(SmallVectorImpl<const Formula *> &Solution) const;
22730b57cec5SDimitry Andric 
22740b57cec5SDimitry Andric   BasicBlock::iterator
22750b57cec5SDimitry Andric   HoistInsertPosition(BasicBlock::iterator IP,
22760b57cec5SDimitry Andric                       const SmallVectorImpl<Instruction *> &Inputs) const;
2277fcaf7f86SDimitry Andric   BasicBlock::iterator AdjustInsertPositionForExpand(BasicBlock::iterator IP,
22780b57cec5SDimitry Andric                                                      const LSRFixup &LF,
2279fcaf7f86SDimitry Andric                                                      const LSRUse &LU) const;
22800b57cec5SDimitry Andric 
22810b57cec5SDimitry Andric   Value *Expand(const LSRUse &LU, const LSRFixup &LF, const Formula &F,
2282fcaf7f86SDimitry Andric                 BasicBlock::iterator IP,
22830b57cec5SDimitry Andric                 SmallVectorImpl<WeakTrackingVH> &DeadInsts) const;
22840b57cec5SDimitry Andric   void RewriteForPHI(PHINode *PN, const LSRUse &LU, const LSRFixup &LF,
2285fcaf7f86SDimitry Andric                      const Formula &F,
22860b57cec5SDimitry Andric                      SmallVectorImpl<WeakTrackingVH> &DeadInsts) const;
22870b57cec5SDimitry Andric   void Rewrite(const LSRUse &LU, const LSRFixup &LF, const Formula &F,
22880b57cec5SDimitry Andric                SmallVectorImpl<WeakTrackingVH> &DeadInsts) const;
22890b57cec5SDimitry Andric   void ImplementSolution(const SmallVectorImpl<const Formula *> &Solution);
22900b57cec5SDimitry Andric 
22910b57cec5SDimitry Andric public:
22920b57cec5SDimitry Andric   LSRInstance(Loop *L, IVUsers &IU, ScalarEvolution &SE, DominatorTree &DT,
22930b57cec5SDimitry Andric               LoopInfo &LI, const TargetTransformInfo &TTI, AssumptionCache &AC,
22945ffd83dbSDimitry Andric               TargetLibraryInfo &TLI, MemorySSAUpdater *MSSAU);
22950b57cec5SDimitry Andric 
22960b57cec5SDimitry Andric   bool getChanged() const { return Changed; }
2297fe6060f1SDimitry Andric   const SmallVectorImpl<WeakVH> &getScalarEvolutionIVs() const {
2298fe6060f1SDimitry Andric     return ScalarEvolutionIVs;
2299fe6060f1SDimitry Andric   }
23000b57cec5SDimitry Andric 
23010b57cec5SDimitry Andric   void print_factors_and_types(raw_ostream &OS) const;
23020b57cec5SDimitry Andric   void print_fixups(raw_ostream &OS) const;
23030b57cec5SDimitry Andric   void print_uses(raw_ostream &OS) const;
23040b57cec5SDimitry Andric   void print(raw_ostream &OS) const;
23050b57cec5SDimitry Andric   void dump() const;
23060b57cec5SDimitry Andric };
23070b57cec5SDimitry Andric 
23080b57cec5SDimitry Andric } // end anonymous namespace
23090b57cec5SDimitry Andric 
23100b57cec5SDimitry Andric /// If IV is used in a int-to-float cast inside the loop then try to eliminate
23110b57cec5SDimitry Andric /// the cast operation.
23120b57cec5SDimitry Andric void LSRInstance::OptimizeShadowIV() {
23130b57cec5SDimitry Andric   const SCEV *BackedgeTakenCount = SE.getBackedgeTakenCount(L);
23140b57cec5SDimitry Andric   if (isa<SCEVCouldNotCompute>(BackedgeTakenCount))
23150b57cec5SDimitry Andric     return;
23160b57cec5SDimitry Andric 
23170b57cec5SDimitry Andric   for (IVUsers::const_iterator UI = IU.begin(), E = IU.end();
23180b57cec5SDimitry Andric        UI != E; /* empty */) {
23190b57cec5SDimitry Andric     IVUsers::const_iterator CandidateUI = UI;
23200b57cec5SDimitry Andric     ++UI;
23210b57cec5SDimitry Andric     Instruction *ShadowUse = CandidateUI->getUser();
23220b57cec5SDimitry Andric     Type *DestTy = nullptr;
23230b57cec5SDimitry Andric     bool IsSigned = false;
23240b57cec5SDimitry Andric 
23250b57cec5SDimitry Andric     /* If shadow use is a int->float cast then insert a second IV
23260b57cec5SDimitry Andric        to eliminate this cast.
23270b57cec5SDimitry Andric 
23280b57cec5SDimitry Andric          for (unsigned i = 0; i < n; ++i)
23290b57cec5SDimitry Andric            foo((double)i);
23300b57cec5SDimitry Andric 
23310b57cec5SDimitry Andric        is transformed into
23320b57cec5SDimitry Andric 
23330b57cec5SDimitry Andric          double d = 0.0;
23340b57cec5SDimitry Andric          for (unsigned i = 0; i < n; ++i, ++d)
23350b57cec5SDimitry Andric            foo(d);
23360b57cec5SDimitry Andric     */
23370b57cec5SDimitry Andric     if (UIToFPInst *UCast = dyn_cast<UIToFPInst>(CandidateUI->getUser())) {
23380b57cec5SDimitry Andric       IsSigned = false;
23390b57cec5SDimitry Andric       DestTy = UCast->getDestTy();
23400b57cec5SDimitry Andric     }
23410b57cec5SDimitry Andric     else if (SIToFPInst *SCast = dyn_cast<SIToFPInst>(CandidateUI->getUser())) {
23420b57cec5SDimitry Andric       IsSigned = true;
23430b57cec5SDimitry Andric       DestTy = SCast->getDestTy();
23440b57cec5SDimitry Andric     }
23450b57cec5SDimitry Andric     if (!DestTy) continue;
23460b57cec5SDimitry Andric 
23470b57cec5SDimitry Andric     // If target does not support DestTy natively then do not apply
23480b57cec5SDimitry Andric     // this transformation.
23490b57cec5SDimitry Andric     if (!TTI.isTypeLegal(DestTy)) continue;
23500b57cec5SDimitry Andric 
23510b57cec5SDimitry Andric     PHINode *PH = dyn_cast<PHINode>(ShadowUse->getOperand(0));
23520b57cec5SDimitry Andric     if (!PH) continue;
23530b57cec5SDimitry Andric     if (PH->getNumIncomingValues() != 2) continue;
23540b57cec5SDimitry Andric 
23550b57cec5SDimitry Andric     // If the calculation in integers overflows, the result in FP type will
23560b57cec5SDimitry Andric     // differ. So we only can do this transformation if we are guaranteed to not
23570b57cec5SDimitry Andric     // deal with overflowing values
23580b57cec5SDimitry Andric     const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(SE.getSCEV(PH));
23590b57cec5SDimitry Andric     if (!AR) continue;
23600b57cec5SDimitry Andric     if (IsSigned && !AR->hasNoSignedWrap()) continue;
23610b57cec5SDimitry Andric     if (!IsSigned && !AR->hasNoUnsignedWrap()) continue;
23620b57cec5SDimitry Andric 
23630b57cec5SDimitry Andric     Type *SrcTy = PH->getType();
23640b57cec5SDimitry Andric     int Mantissa = DestTy->getFPMantissaWidth();
23650b57cec5SDimitry Andric     if (Mantissa == -1) continue;
23660b57cec5SDimitry Andric     if ((int)SE.getTypeSizeInBits(SrcTy) > Mantissa)
23670b57cec5SDimitry Andric       continue;
23680b57cec5SDimitry Andric 
23690b57cec5SDimitry Andric     unsigned Entry, Latch;
23700b57cec5SDimitry Andric     if (PH->getIncomingBlock(0) == L->getLoopPreheader()) {
23710b57cec5SDimitry Andric       Entry = 0;
23720b57cec5SDimitry Andric       Latch = 1;
23730b57cec5SDimitry Andric     } else {
23740b57cec5SDimitry Andric       Entry = 1;
23750b57cec5SDimitry Andric       Latch = 0;
23760b57cec5SDimitry Andric     }
23770b57cec5SDimitry Andric 
23780b57cec5SDimitry Andric     ConstantInt *Init = dyn_cast<ConstantInt>(PH->getIncomingValue(Entry));
23790b57cec5SDimitry Andric     if (!Init) continue;
23800b57cec5SDimitry Andric     Constant *NewInit = ConstantFP::get(DestTy, IsSigned ?
23810b57cec5SDimitry Andric                                         (double)Init->getSExtValue() :
23820b57cec5SDimitry Andric                                         (double)Init->getZExtValue());
23830b57cec5SDimitry Andric 
23840b57cec5SDimitry Andric     BinaryOperator *Incr =
23850b57cec5SDimitry Andric       dyn_cast<BinaryOperator>(PH->getIncomingValue(Latch));
23860b57cec5SDimitry Andric     if (!Incr) continue;
23870b57cec5SDimitry Andric     if (Incr->getOpcode() != Instruction::Add
23880b57cec5SDimitry Andric         && Incr->getOpcode() != Instruction::Sub)
23890b57cec5SDimitry Andric       continue;
23900b57cec5SDimitry Andric 
23910b57cec5SDimitry Andric     /* Initialize new IV, double d = 0.0 in above example. */
23920b57cec5SDimitry Andric     ConstantInt *C = nullptr;
23930b57cec5SDimitry Andric     if (Incr->getOperand(0) == PH)
23940b57cec5SDimitry Andric       C = dyn_cast<ConstantInt>(Incr->getOperand(1));
23950b57cec5SDimitry Andric     else if (Incr->getOperand(1) == PH)
23960b57cec5SDimitry Andric       C = dyn_cast<ConstantInt>(Incr->getOperand(0));
23970b57cec5SDimitry Andric     else
23980b57cec5SDimitry Andric       continue;
23990b57cec5SDimitry Andric 
24000b57cec5SDimitry Andric     if (!C) continue;
24010b57cec5SDimitry Andric 
24020b57cec5SDimitry Andric     // Ignore negative constants, as the code below doesn't handle them
24030b57cec5SDimitry Andric     // correctly. TODO: Remove this restriction.
24040fca6ea1SDimitry Andric     if (!C->getValue().isStrictlyPositive())
24050fca6ea1SDimitry Andric       continue;
24060b57cec5SDimitry Andric 
24070b57cec5SDimitry Andric     /* Add new PHINode. */
24080fca6ea1SDimitry Andric     PHINode *NewPH = PHINode::Create(DestTy, 2, "IV.S.", PH->getIterator());
24090fca6ea1SDimitry Andric     NewPH->setDebugLoc(PH->getDebugLoc());
24100b57cec5SDimitry Andric 
24110b57cec5SDimitry Andric     /* create new increment. '++d' in above example. */
24120b57cec5SDimitry Andric     Constant *CFP = ConstantFP::get(DestTy, C->getZExtValue());
24130fca6ea1SDimitry Andric     BinaryOperator *NewIncr = BinaryOperator::Create(
24140fca6ea1SDimitry Andric         Incr->getOpcode() == Instruction::Add ? Instruction::FAdd
24150fca6ea1SDimitry Andric                                               : Instruction::FSub,
24160fca6ea1SDimitry Andric         NewPH, CFP, "IV.S.next.", Incr->getIterator());
24170fca6ea1SDimitry Andric     NewIncr->setDebugLoc(Incr->getDebugLoc());
24180b57cec5SDimitry Andric 
24190b57cec5SDimitry Andric     NewPH->addIncoming(NewInit, PH->getIncomingBlock(Entry));
24200b57cec5SDimitry Andric     NewPH->addIncoming(NewIncr, PH->getIncomingBlock(Latch));
24210b57cec5SDimitry Andric 
24220b57cec5SDimitry Andric     /* Remove cast operation */
24230b57cec5SDimitry Andric     ShadowUse->replaceAllUsesWith(NewPH);
24240b57cec5SDimitry Andric     ShadowUse->eraseFromParent();
24250b57cec5SDimitry Andric     Changed = true;
24260b57cec5SDimitry Andric     break;
24270b57cec5SDimitry Andric   }
24280b57cec5SDimitry Andric }
24290b57cec5SDimitry Andric 
24300b57cec5SDimitry Andric /// If Cond has an operand that is an expression of an IV, set the IV user and
24310b57cec5SDimitry Andric /// stride information and return true, otherwise return false.
24320b57cec5SDimitry Andric bool LSRInstance::FindIVUserForCond(ICmpInst *Cond, IVStrideUse *&CondUse) {
24330b57cec5SDimitry Andric   for (IVStrideUse &U : IU)
24340b57cec5SDimitry Andric     if (U.getUser() == Cond) {
24350b57cec5SDimitry Andric       // NOTE: we could handle setcc instructions with multiple uses here, but
24360b57cec5SDimitry Andric       // InstCombine does it as well for simple uses, it's not clear that it
24370b57cec5SDimitry Andric       // occurs enough in real life to handle.
24380b57cec5SDimitry Andric       CondUse = &U;
24390b57cec5SDimitry Andric       return true;
24400b57cec5SDimitry Andric     }
24410b57cec5SDimitry Andric   return false;
24420b57cec5SDimitry Andric }
24430b57cec5SDimitry Andric 
24440b57cec5SDimitry Andric /// Rewrite the loop's terminating condition if it uses a max computation.
24450b57cec5SDimitry Andric ///
24460b57cec5SDimitry Andric /// This is a narrow solution to a specific, but acute, problem. For loops
24470b57cec5SDimitry Andric /// like this:
24480b57cec5SDimitry Andric ///
24490b57cec5SDimitry Andric ///   i = 0;
24500b57cec5SDimitry Andric ///   do {
24510b57cec5SDimitry Andric ///     p[i] = 0.0;
24520b57cec5SDimitry Andric ///   } while (++i < n);
24530b57cec5SDimitry Andric ///
24540b57cec5SDimitry Andric /// the trip count isn't just 'n', because 'n' might not be positive. And
24550b57cec5SDimitry Andric /// unfortunately this can come up even for loops where the user didn't use
24560b57cec5SDimitry Andric /// a C do-while loop. For example, seemingly well-behaved top-test loops
24570b57cec5SDimitry Andric /// will commonly be lowered like this:
24580b57cec5SDimitry Andric ///
24590b57cec5SDimitry Andric ///   if (n > 0) {
24600b57cec5SDimitry Andric ///     i = 0;
24610b57cec5SDimitry Andric ///     do {
24620b57cec5SDimitry Andric ///       p[i] = 0.0;
24630b57cec5SDimitry Andric ///     } while (++i < n);
24640b57cec5SDimitry Andric ///   }
24650b57cec5SDimitry Andric ///
24660b57cec5SDimitry Andric /// and then it's possible for subsequent optimization to obscure the if
24670b57cec5SDimitry Andric /// test in such a way that indvars can't find it.
24680b57cec5SDimitry Andric ///
24690b57cec5SDimitry Andric /// When indvars can't find the if test in loops like this, it creates a
24700b57cec5SDimitry Andric /// max expression, which allows it to give the loop a canonical
24710b57cec5SDimitry Andric /// induction variable:
24720b57cec5SDimitry Andric ///
24730b57cec5SDimitry Andric ///   i = 0;
24740b57cec5SDimitry Andric ///   max = n < 1 ? 1 : n;
24750b57cec5SDimitry Andric ///   do {
24760b57cec5SDimitry Andric ///     p[i] = 0.0;
24770b57cec5SDimitry Andric ///   } while (++i != max);
24780b57cec5SDimitry Andric ///
24790b57cec5SDimitry Andric /// Canonical induction variables are necessary because the loop passes
24800b57cec5SDimitry Andric /// are designed around them. The most obvious example of this is the
24810b57cec5SDimitry Andric /// LoopInfo analysis, which doesn't remember trip count values. It
24820b57cec5SDimitry Andric /// expects to be able to rediscover the trip count each time it is
24830b57cec5SDimitry Andric /// needed, and it does this using a simple analysis that only succeeds if
24840b57cec5SDimitry Andric /// the loop has a canonical induction variable.
24850b57cec5SDimitry Andric ///
24860b57cec5SDimitry Andric /// However, when it comes time to generate code, the maximum operation
24870b57cec5SDimitry Andric /// can be quite costly, especially if it's inside of an outer loop.
24880b57cec5SDimitry Andric ///
24890b57cec5SDimitry Andric /// This function solves this problem by detecting this type of loop and
24900b57cec5SDimitry Andric /// rewriting their conditions from ICMP_NE back to ICMP_SLT, and deleting
24910b57cec5SDimitry Andric /// the instructions for the maximum computation.
24920b57cec5SDimitry Andric ICmpInst *LSRInstance::OptimizeMax(ICmpInst *Cond, IVStrideUse* &CondUse) {
24930b57cec5SDimitry Andric   // Check that the loop matches the pattern we're looking for.
24940b57cec5SDimitry Andric   if (Cond->getPredicate() != CmpInst::ICMP_EQ &&
24950b57cec5SDimitry Andric       Cond->getPredicate() != CmpInst::ICMP_NE)
24960b57cec5SDimitry Andric     return Cond;
24970b57cec5SDimitry Andric 
24980b57cec5SDimitry Andric   SelectInst *Sel = dyn_cast<SelectInst>(Cond->getOperand(1));
24990b57cec5SDimitry Andric   if (!Sel || !Sel->hasOneUse()) return Cond;
25000b57cec5SDimitry Andric 
25010b57cec5SDimitry Andric   const SCEV *BackedgeTakenCount = SE.getBackedgeTakenCount(L);
25020b57cec5SDimitry Andric   if (isa<SCEVCouldNotCompute>(BackedgeTakenCount))
25030b57cec5SDimitry Andric     return Cond;
25040b57cec5SDimitry Andric   const SCEV *One = SE.getConstant(BackedgeTakenCount->getType(), 1);
25050b57cec5SDimitry Andric 
25060b57cec5SDimitry Andric   // Add one to the backedge-taken count to get the trip count.
25070b57cec5SDimitry Andric   const SCEV *IterationCount = SE.getAddExpr(One, BackedgeTakenCount);
25080b57cec5SDimitry Andric   if (IterationCount != SE.getSCEV(Sel)) return Cond;
25090b57cec5SDimitry Andric 
25100b57cec5SDimitry Andric   // Check for a max calculation that matches the pattern. There's no check
25110b57cec5SDimitry Andric   // for ICMP_ULE here because the comparison would be with zero, which
25120b57cec5SDimitry Andric   // isn't interesting.
25130b57cec5SDimitry Andric   CmpInst::Predicate Pred = ICmpInst::BAD_ICMP_PREDICATE;
25140b57cec5SDimitry Andric   const SCEVNAryExpr *Max = nullptr;
25150b57cec5SDimitry Andric   if (const SCEVSMaxExpr *S = dyn_cast<SCEVSMaxExpr>(BackedgeTakenCount)) {
25160b57cec5SDimitry Andric     Pred = ICmpInst::ICMP_SLE;
25170b57cec5SDimitry Andric     Max = S;
25180b57cec5SDimitry Andric   } else if (const SCEVSMaxExpr *S = dyn_cast<SCEVSMaxExpr>(IterationCount)) {
25190b57cec5SDimitry Andric     Pred = ICmpInst::ICMP_SLT;
25200b57cec5SDimitry Andric     Max = S;
25210b57cec5SDimitry Andric   } else if (const SCEVUMaxExpr *U = dyn_cast<SCEVUMaxExpr>(IterationCount)) {
25220b57cec5SDimitry Andric     Pred = ICmpInst::ICMP_ULT;
25230b57cec5SDimitry Andric     Max = U;
25240b57cec5SDimitry Andric   } else {
25250b57cec5SDimitry Andric     // No match; bail.
25260b57cec5SDimitry Andric     return Cond;
25270b57cec5SDimitry Andric   }
25280b57cec5SDimitry Andric 
25290b57cec5SDimitry Andric   // To handle a max with more than two operands, this optimization would
25300b57cec5SDimitry Andric   // require additional checking and setup.
25310b57cec5SDimitry Andric   if (Max->getNumOperands() != 2)
25320b57cec5SDimitry Andric     return Cond;
25330b57cec5SDimitry Andric 
25340b57cec5SDimitry Andric   const SCEV *MaxLHS = Max->getOperand(0);
25350b57cec5SDimitry Andric   const SCEV *MaxRHS = Max->getOperand(1);
25360b57cec5SDimitry Andric 
25370b57cec5SDimitry Andric   // ScalarEvolution canonicalizes constants to the left. For < and >, look
25380b57cec5SDimitry Andric   // for a comparison with 1. For <= and >=, a comparison with zero.
25390b57cec5SDimitry Andric   if (!MaxLHS ||
25400b57cec5SDimitry Andric       (ICmpInst::isTrueWhenEqual(Pred) ? !MaxLHS->isZero() : (MaxLHS != One)))
25410b57cec5SDimitry Andric     return Cond;
25420b57cec5SDimitry Andric 
25430b57cec5SDimitry Andric   // Check the relevant induction variable for conformance to
25440b57cec5SDimitry Andric   // the pattern.
25450b57cec5SDimitry Andric   const SCEV *IV = SE.getSCEV(Cond->getOperand(0));
25460b57cec5SDimitry Andric   const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(IV);
25470b57cec5SDimitry Andric   if (!AR || !AR->isAffine() ||
25480b57cec5SDimitry Andric       AR->getStart() != One ||
25490b57cec5SDimitry Andric       AR->getStepRecurrence(SE) != One)
25500b57cec5SDimitry Andric     return Cond;
25510b57cec5SDimitry Andric 
25520b57cec5SDimitry Andric   assert(AR->getLoop() == L &&
25530b57cec5SDimitry Andric          "Loop condition operand is an addrec in a different loop!");
25540b57cec5SDimitry Andric 
25550b57cec5SDimitry Andric   // Check the right operand of the select, and remember it, as it will
25560b57cec5SDimitry Andric   // be used in the new comparison instruction.
25570b57cec5SDimitry Andric   Value *NewRHS = nullptr;
25580b57cec5SDimitry Andric   if (ICmpInst::isTrueWhenEqual(Pred)) {
25590b57cec5SDimitry Andric     // Look for n+1, and grab n.
25600b57cec5SDimitry Andric     if (AddOperator *BO = dyn_cast<AddOperator>(Sel->getOperand(1)))
25610b57cec5SDimitry Andric       if (ConstantInt *BO1 = dyn_cast<ConstantInt>(BO->getOperand(1)))
25620b57cec5SDimitry Andric          if (BO1->isOne() && SE.getSCEV(BO->getOperand(0)) == MaxRHS)
25630b57cec5SDimitry Andric            NewRHS = BO->getOperand(0);
25640b57cec5SDimitry Andric     if (AddOperator *BO = dyn_cast<AddOperator>(Sel->getOperand(2)))
25650b57cec5SDimitry Andric       if (ConstantInt *BO1 = dyn_cast<ConstantInt>(BO->getOperand(1)))
25660b57cec5SDimitry Andric         if (BO1->isOne() && SE.getSCEV(BO->getOperand(0)) == MaxRHS)
25670b57cec5SDimitry Andric           NewRHS = BO->getOperand(0);
25680b57cec5SDimitry Andric     if (!NewRHS)
25690b57cec5SDimitry Andric       return Cond;
25700b57cec5SDimitry Andric   } else if (SE.getSCEV(Sel->getOperand(1)) == MaxRHS)
25710b57cec5SDimitry Andric     NewRHS = Sel->getOperand(1);
25720b57cec5SDimitry Andric   else if (SE.getSCEV(Sel->getOperand(2)) == MaxRHS)
25730b57cec5SDimitry Andric     NewRHS = Sel->getOperand(2);
25740b57cec5SDimitry Andric   else if (const SCEVUnknown *SU = dyn_cast<SCEVUnknown>(MaxRHS))
25750b57cec5SDimitry Andric     NewRHS = SU->getValue();
25760b57cec5SDimitry Andric   else
25770b57cec5SDimitry Andric     // Max doesn't match expected pattern.
25780b57cec5SDimitry Andric     return Cond;
25790b57cec5SDimitry Andric 
25800b57cec5SDimitry Andric   // Determine the new comparison opcode. It may be signed or unsigned,
25810b57cec5SDimitry Andric   // and the original comparison may be either equality or inequality.
25820b57cec5SDimitry Andric   if (Cond->getPredicate() == CmpInst::ICMP_EQ)
25830b57cec5SDimitry Andric     Pred = CmpInst::getInversePredicate(Pred);
25840b57cec5SDimitry Andric 
25850b57cec5SDimitry Andric   // Ok, everything looks ok to change the condition into an SLT or SGE and
25860b57cec5SDimitry Andric   // delete the max calculation.
25870fca6ea1SDimitry Andric   ICmpInst *NewCond = new ICmpInst(Cond->getIterator(), Pred,
25880fca6ea1SDimitry Andric                                    Cond->getOperand(0), NewRHS, "scmp");
25890b57cec5SDimitry Andric 
25900b57cec5SDimitry Andric   // Delete the max calculation instructions.
2591fe6060f1SDimitry Andric   NewCond->setDebugLoc(Cond->getDebugLoc());
25920b57cec5SDimitry Andric   Cond->replaceAllUsesWith(NewCond);
25930b57cec5SDimitry Andric   CondUse->setUser(NewCond);
25940b57cec5SDimitry Andric   Instruction *Cmp = cast<Instruction>(Sel->getOperand(0));
25950b57cec5SDimitry Andric   Cond->eraseFromParent();
25960b57cec5SDimitry Andric   Sel->eraseFromParent();
25970b57cec5SDimitry Andric   if (Cmp->use_empty())
25980b57cec5SDimitry Andric     Cmp->eraseFromParent();
25990b57cec5SDimitry Andric   return NewCond;
26000b57cec5SDimitry Andric }
26010b57cec5SDimitry Andric 
26020b57cec5SDimitry Andric /// Change loop terminating condition to use the postinc iv when possible.
26030b57cec5SDimitry Andric void
26040b57cec5SDimitry Andric LSRInstance::OptimizeLoopTermCond() {
26050b57cec5SDimitry Andric   SmallPtrSet<Instruction *, 4> PostIncs;
26060b57cec5SDimitry Andric 
26070b57cec5SDimitry Andric   // We need a different set of heuristics for rotated and non-rotated loops.
26080b57cec5SDimitry Andric   // If a loop is rotated then the latch is also the backedge, so inserting
26090b57cec5SDimitry Andric   // post-inc expressions just before the latch is ideal. To reduce live ranges
26100b57cec5SDimitry Andric   // it also makes sense to rewrite terminating conditions to use post-inc
26110b57cec5SDimitry Andric   // expressions.
26120b57cec5SDimitry Andric   //
26130b57cec5SDimitry Andric   // If the loop is not rotated then the latch is not a backedge; the latch
26140b57cec5SDimitry Andric   // check is done in the loop head. Adding post-inc expressions before the
26150b57cec5SDimitry Andric   // latch will cause overlapping live-ranges of pre-inc and post-inc expressions
26160b57cec5SDimitry Andric   // in the loop body. In this case we do *not* want to use post-inc expressions
26170b57cec5SDimitry Andric   // in the latch check, and we want to insert post-inc expressions before
26180b57cec5SDimitry Andric   // the backedge.
26190b57cec5SDimitry Andric   BasicBlock *LatchBlock = L->getLoopLatch();
26200b57cec5SDimitry Andric   SmallVector<BasicBlock*, 8> ExitingBlocks;
26210b57cec5SDimitry Andric   L->getExitingBlocks(ExitingBlocks);
2622bdd1243dSDimitry Andric   if (!llvm::is_contained(ExitingBlocks, LatchBlock)) {
26230b57cec5SDimitry Andric     // The backedge doesn't exit the loop; treat this as a head-tested loop.
26240b57cec5SDimitry Andric     IVIncInsertPos = LatchBlock->getTerminator();
26250b57cec5SDimitry Andric     return;
26260b57cec5SDimitry Andric   }
26270b57cec5SDimitry Andric 
26280b57cec5SDimitry Andric   // Otherwise treat this as a rotated loop.
26290b57cec5SDimitry Andric   for (BasicBlock *ExitingBlock : ExitingBlocks) {
26300b57cec5SDimitry Andric     // Get the terminating condition for the loop if possible.  If we
26310b57cec5SDimitry Andric     // can, we want to change it to use a post-incremented version of its
26320b57cec5SDimitry Andric     // induction variable, to allow coalescing the live ranges for the IV into
26330b57cec5SDimitry Andric     // one register value.
26340b57cec5SDimitry Andric 
26350b57cec5SDimitry Andric     BranchInst *TermBr = dyn_cast<BranchInst>(ExitingBlock->getTerminator());
26360b57cec5SDimitry Andric     if (!TermBr)
26370b57cec5SDimitry Andric       continue;
26380b57cec5SDimitry Andric     // FIXME: Overly conservative, termination condition could be an 'or' etc..
26390b57cec5SDimitry Andric     if (TermBr->isUnconditional() || !isa<ICmpInst>(TermBr->getCondition()))
26400b57cec5SDimitry Andric       continue;
26410b57cec5SDimitry Andric 
26420b57cec5SDimitry Andric     // Search IVUsesByStride to find Cond's IVUse if there is one.
26430b57cec5SDimitry Andric     IVStrideUse *CondUse = nullptr;
26440b57cec5SDimitry Andric     ICmpInst *Cond = cast<ICmpInst>(TermBr->getCondition());
26450b57cec5SDimitry Andric     if (!FindIVUserForCond(Cond, CondUse))
26460b57cec5SDimitry Andric       continue;
26470b57cec5SDimitry Andric 
26480b57cec5SDimitry Andric     // If the trip count is computed in terms of a max (due to ScalarEvolution
26490b57cec5SDimitry Andric     // being unable to find a sufficient guard, for example), change the loop
26500b57cec5SDimitry Andric     // comparison to use SLT or ULT instead of NE.
26510b57cec5SDimitry Andric     // One consequence of doing this now is that it disrupts the count-down
26520b57cec5SDimitry Andric     // optimization. That's not always a bad thing though, because in such
26530b57cec5SDimitry Andric     // cases it may still be worthwhile to avoid a max.
26540b57cec5SDimitry Andric     Cond = OptimizeMax(Cond, CondUse);
26550b57cec5SDimitry Andric 
26560b57cec5SDimitry Andric     // If this exiting block dominates the latch block, it may also use
26570b57cec5SDimitry Andric     // the post-inc value if it won't be shared with other uses.
26580b57cec5SDimitry Andric     // Check for dominance.
26590b57cec5SDimitry Andric     if (!DT.dominates(ExitingBlock, LatchBlock))
26600b57cec5SDimitry Andric       continue;
26610b57cec5SDimitry Andric 
26620b57cec5SDimitry Andric     // Conservatively avoid trying to use the post-inc value in non-latch
26630b57cec5SDimitry Andric     // exits if there may be pre-inc users in intervening blocks.
26640b57cec5SDimitry Andric     if (LatchBlock != ExitingBlock)
26650b57cec5SDimitry Andric       for (IVUsers::const_iterator UI = IU.begin(), E = IU.end(); UI != E; ++UI)
26660b57cec5SDimitry Andric         // Test if the use is reachable from the exiting block. This dominator
26670b57cec5SDimitry Andric         // query is a conservative approximation of reachability.
26680b57cec5SDimitry Andric         if (&*UI != CondUse &&
26690b57cec5SDimitry Andric             !DT.properlyDominates(UI->getUser()->getParent(), ExitingBlock)) {
26700b57cec5SDimitry Andric           // Conservatively assume there may be reuse if the quotient of their
26710b57cec5SDimitry Andric           // strides could be a legal scale.
26720b57cec5SDimitry Andric           const SCEV *A = IU.getStride(*CondUse, L);
26730b57cec5SDimitry Andric           const SCEV *B = IU.getStride(*UI, L);
26740b57cec5SDimitry Andric           if (!A || !B) continue;
26750b57cec5SDimitry Andric           if (SE.getTypeSizeInBits(A->getType()) !=
26760b57cec5SDimitry Andric               SE.getTypeSizeInBits(B->getType())) {
26770b57cec5SDimitry Andric             if (SE.getTypeSizeInBits(A->getType()) >
26780b57cec5SDimitry Andric                 SE.getTypeSizeInBits(B->getType()))
26790b57cec5SDimitry Andric               B = SE.getSignExtendExpr(B, A->getType());
26800b57cec5SDimitry Andric             else
26810b57cec5SDimitry Andric               A = SE.getSignExtendExpr(A, B->getType());
26820b57cec5SDimitry Andric           }
26830b57cec5SDimitry Andric           if (const SCEVConstant *D =
26840b57cec5SDimitry Andric                 dyn_cast_or_null<SCEVConstant>(getExactSDiv(B, A, SE))) {
26850b57cec5SDimitry Andric             const ConstantInt *C = D->getValue();
26860b57cec5SDimitry Andric             // Stride of one or negative one can have reuse with non-addresses.
26870b57cec5SDimitry Andric             if (C->isOne() || C->isMinusOne())
26880b57cec5SDimitry Andric               goto decline_post_inc;
26890b57cec5SDimitry Andric             // Avoid weird situations.
269006c3fb27SDimitry Andric             if (C->getValue().getSignificantBits() >= 64 ||
26910b57cec5SDimitry Andric                 C->getValue().isMinSignedValue())
26920b57cec5SDimitry Andric               goto decline_post_inc;
26930b57cec5SDimitry Andric             // Check for possible scaled-address reuse.
26940b57cec5SDimitry Andric             if (isAddressUse(TTI, UI->getUser(), UI->getOperandValToReplace())) {
26950b57cec5SDimitry Andric               MemAccessTy AccessTy = getAccessType(
26960b57cec5SDimitry Andric                   TTI, UI->getUser(), UI->getOperandValToReplace());
26970b57cec5SDimitry Andric               int64_t Scale = C->getSExtValue();
26980b57cec5SDimitry Andric               if (TTI.isLegalAddressingMode(AccessTy.MemTy, /*BaseGV=*/nullptr,
26990b57cec5SDimitry Andric                                             /*BaseOffset=*/0,
270006c3fb27SDimitry Andric                                             /*HasBaseReg=*/true, Scale,
27010b57cec5SDimitry Andric                                             AccessTy.AddrSpace))
27020b57cec5SDimitry Andric                 goto decline_post_inc;
27030b57cec5SDimitry Andric               Scale = -Scale;
27040b57cec5SDimitry Andric               if (TTI.isLegalAddressingMode(AccessTy.MemTy, /*BaseGV=*/nullptr,
27050b57cec5SDimitry Andric                                             /*BaseOffset=*/0,
270606c3fb27SDimitry Andric                                             /*HasBaseReg=*/true, Scale,
27070b57cec5SDimitry Andric                                             AccessTy.AddrSpace))
27080b57cec5SDimitry Andric                 goto decline_post_inc;
27090b57cec5SDimitry Andric             }
27100b57cec5SDimitry Andric           }
27110b57cec5SDimitry Andric         }
27120b57cec5SDimitry Andric 
27130b57cec5SDimitry Andric     LLVM_DEBUG(dbgs() << "  Change loop exiting icmp to use postinc iv: "
27140b57cec5SDimitry Andric                       << *Cond << '\n');
27150b57cec5SDimitry Andric 
27160b57cec5SDimitry Andric     // It's possible for the setcc instruction to be anywhere in the loop, and
27170b57cec5SDimitry Andric     // possible for it to have multiple users.  If it is not immediately before
27180b57cec5SDimitry Andric     // the exiting block branch, move it.
2719fe6060f1SDimitry Andric     if (Cond->getNextNonDebugInstruction() != TermBr) {
27200b57cec5SDimitry Andric       if (Cond->hasOneUse()) {
27210b57cec5SDimitry Andric         Cond->moveBefore(TermBr);
27220b57cec5SDimitry Andric       } else {
27230b57cec5SDimitry Andric         // Clone the terminating condition and insert into the loopend.
27240b57cec5SDimitry Andric         ICmpInst *OldCond = Cond;
27250b57cec5SDimitry Andric         Cond = cast<ICmpInst>(Cond->clone());
27260b57cec5SDimitry Andric         Cond->setName(L->getHeader()->getName() + ".termcond");
2727bdd1243dSDimitry Andric         Cond->insertInto(ExitingBlock, TermBr->getIterator());
27280b57cec5SDimitry Andric 
27290b57cec5SDimitry Andric         // Clone the IVUse, as the old use still exists!
27300b57cec5SDimitry Andric         CondUse = &IU.AddUser(Cond, CondUse->getOperandValToReplace());
27310b57cec5SDimitry Andric         TermBr->replaceUsesOfWith(OldCond, Cond);
27320b57cec5SDimitry Andric       }
27330b57cec5SDimitry Andric     }
27340b57cec5SDimitry Andric 
27350b57cec5SDimitry Andric     // If we get to here, we know that we can transform the setcc instruction to
27360b57cec5SDimitry Andric     // use the post-incremented version of the IV, allowing us to coalesce the
27370b57cec5SDimitry Andric     // live ranges for the IV correctly.
27380b57cec5SDimitry Andric     CondUse->transformToPostInc(L);
27390b57cec5SDimitry Andric     Changed = true;
27400b57cec5SDimitry Andric 
27410b57cec5SDimitry Andric     PostIncs.insert(Cond);
27420b57cec5SDimitry Andric   decline_post_inc:;
27430b57cec5SDimitry Andric   }
27440b57cec5SDimitry Andric 
27450b57cec5SDimitry Andric   // Determine an insertion point for the loop induction variable increment. It
27460b57cec5SDimitry Andric   // must dominate all the post-inc comparisons we just set up, and it must
27470b57cec5SDimitry Andric   // dominate the loop latch edge.
27480b57cec5SDimitry Andric   IVIncInsertPos = L->getLoopLatch()->getTerminator();
2749bdd1243dSDimitry Andric   for (Instruction *Inst : PostIncs)
2750bdd1243dSDimitry Andric     IVIncInsertPos = DT.findNearestCommonDominator(IVIncInsertPos, Inst);
27510b57cec5SDimitry Andric }
27520b57cec5SDimitry Andric 
27530b57cec5SDimitry Andric /// Determine if the given use can accommodate a fixup at the given offset and
27540b57cec5SDimitry Andric /// other details. If so, update the use and return true.
27550fca6ea1SDimitry Andric bool LSRInstance::reconcileNewOffset(LSRUse &LU, Immediate NewOffset,
27560b57cec5SDimitry Andric                                      bool HasBaseReg, LSRUse::KindType Kind,
27570b57cec5SDimitry Andric                                      MemAccessTy AccessTy) {
27580fca6ea1SDimitry Andric   Immediate NewMinOffset = LU.MinOffset;
27590fca6ea1SDimitry Andric   Immediate NewMaxOffset = LU.MaxOffset;
27600b57cec5SDimitry Andric   MemAccessTy NewAccessTy = AccessTy;
27610b57cec5SDimitry Andric 
27620b57cec5SDimitry Andric   // Check for a mismatched kind. It's tempting to collapse mismatched kinds to
27630b57cec5SDimitry Andric   // something conservative, however this can pessimize in the case that one of
27640b57cec5SDimitry Andric   // the uses will have all its uses outside the loop, for example.
27650b57cec5SDimitry Andric   if (LU.Kind != Kind)
27660b57cec5SDimitry Andric     return false;
27670b57cec5SDimitry Andric 
27680b57cec5SDimitry Andric   // Check for a mismatched access type, and fall back conservatively as needed.
27690b57cec5SDimitry Andric   // TODO: Be less conservative when the type is similar and can use the same
27700b57cec5SDimitry Andric   // addressing modes.
27710b57cec5SDimitry Andric   if (Kind == LSRUse::Address) {
27720b57cec5SDimitry Andric     if (AccessTy.MemTy != LU.AccessTy.MemTy) {
27730b57cec5SDimitry Andric       NewAccessTy = MemAccessTy::getUnknown(AccessTy.MemTy->getContext(),
27740b57cec5SDimitry Andric                                             AccessTy.AddrSpace);
27750b57cec5SDimitry Andric     }
27760b57cec5SDimitry Andric   }
27770b57cec5SDimitry Andric 
27780b57cec5SDimitry Andric   // Conservatively assume HasBaseReg is true for now.
27790fca6ea1SDimitry Andric   if (Immediate::isKnownLT(NewOffset, LU.MinOffset)) {
27800b57cec5SDimitry Andric     if (!isAlwaysFoldable(TTI, Kind, NewAccessTy, /*BaseGV=*/nullptr,
27810b57cec5SDimitry Andric                           LU.MaxOffset - NewOffset, HasBaseReg))
27820b57cec5SDimitry Andric       return false;
27830b57cec5SDimitry Andric     NewMinOffset = NewOffset;
27840fca6ea1SDimitry Andric   } else if (Immediate::isKnownGT(NewOffset, LU.MaxOffset)) {
27850b57cec5SDimitry Andric     if (!isAlwaysFoldable(TTI, Kind, NewAccessTy, /*BaseGV=*/nullptr,
27860b57cec5SDimitry Andric                           NewOffset - LU.MinOffset, HasBaseReg))
27870b57cec5SDimitry Andric       return false;
27880b57cec5SDimitry Andric     NewMaxOffset = NewOffset;
27890b57cec5SDimitry Andric   }
27900b57cec5SDimitry Andric 
27910fca6ea1SDimitry Andric   // FIXME: We should be able to handle some level of scalable offset support
27920fca6ea1SDimitry Andric   // for 'void', but in order to get basic support up and running this is
27930fca6ea1SDimitry Andric   // being left out.
27940fca6ea1SDimitry Andric   if (NewAccessTy.MemTy && NewAccessTy.MemTy->isVoidTy() &&
27950fca6ea1SDimitry Andric       (NewMinOffset.isScalable() || NewMaxOffset.isScalable()))
27960fca6ea1SDimitry Andric     return false;
27970fca6ea1SDimitry Andric 
27980b57cec5SDimitry Andric   // Update the use.
27990b57cec5SDimitry Andric   LU.MinOffset = NewMinOffset;
28000b57cec5SDimitry Andric   LU.MaxOffset = NewMaxOffset;
28010b57cec5SDimitry Andric   LU.AccessTy = NewAccessTy;
28020b57cec5SDimitry Andric   return true;
28030b57cec5SDimitry Andric }
28040b57cec5SDimitry Andric 
28050b57cec5SDimitry Andric /// Return an LSRUse index and an offset value for a fixup which needs the given
28060b57cec5SDimitry Andric /// expression, with the given kind and optional access type.  Either reuse an
28070b57cec5SDimitry Andric /// existing use or create a new one, as needed.
28080fca6ea1SDimitry Andric std::pair<size_t, Immediate> LSRInstance::getUse(const SCEV *&Expr,
28090b57cec5SDimitry Andric                                                  LSRUse::KindType Kind,
28100b57cec5SDimitry Andric                                                  MemAccessTy AccessTy) {
28110b57cec5SDimitry Andric   const SCEV *Copy = Expr;
28120fca6ea1SDimitry Andric   Immediate Offset = ExtractImmediate(Expr, SE);
28130b57cec5SDimitry Andric 
28140b57cec5SDimitry Andric   // Basic uses can't accept any offset, for example.
28150b57cec5SDimitry Andric   if (!isAlwaysFoldable(TTI, Kind, AccessTy, /*BaseGV=*/ nullptr,
28160b57cec5SDimitry Andric                         Offset, /*HasBaseReg=*/ true)) {
28170b57cec5SDimitry Andric     Expr = Copy;
28180fca6ea1SDimitry Andric     Offset = Immediate::getFixed(0);
28190b57cec5SDimitry Andric   }
28200b57cec5SDimitry Andric 
28210b57cec5SDimitry Andric   std::pair<UseMapTy::iterator, bool> P =
28220b57cec5SDimitry Andric     UseMap.insert(std::make_pair(LSRUse::SCEVUseKindPair(Expr, Kind), 0));
28230b57cec5SDimitry Andric   if (!P.second) {
28240b57cec5SDimitry Andric     // A use already existed with this base.
28250b57cec5SDimitry Andric     size_t LUIdx = P.first->second;
28260b57cec5SDimitry Andric     LSRUse &LU = Uses[LUIdx];
28270b57cec5SDimitry Andric     if (reconcileNewOffset(LU, Offset, /*HasBaseReg=*/true, Kind, AccessTy))
28280b57cec5SDimitry Andric       // Reuse this use.
28290b57cec5SDimitry Andric       return std::make_pair(LUIdx, Offset);
28300b57cec5SDimitry Andric   }
28310b57cec5SDimitry Andric 
28320b57cec5SDimitry Andric   // Create a new use.
28330b57cec5SDimitry Andric   size_t LUIdx = Uses.size();
28340b57cec5SDimitry Andric   P.first->second = LUIdx;
28350b57cec5SDimitry Andric   Uses.push_back(LSRUse(Kind, AccessTy));
28360b57cec5SDimitry Andric   LSRUse &LU = Uses[LUIdx];
28370b57cec5SDimitry Andric 
28380b57cec5SDimitry Andric   LU.MinOffset = Offset;
28390b57cec5SDimitry Andric   LU.MaxOffset = Offset;
28400b57cec5SDimitry Andric   return std::make_pair(LUIdx, Offset);
28410b57cec5SDimitry Andric }
28420b57cec5SDimitry Andric 
28430b57cec5SDimitry Andric /// Delete the given use from the Uses list.
28440b57cec5SDimitry Andric void LSRInstance::DeleteUse(LSRUse &LU, size_t LUIdx) {
28450b57cec5SDimitry Andric   if (&LU != &Uses.back())
28460b57cec5SDimitry Andric     std::swap(LU, Uses.back());
28470b57cec5SDimitry Andric   Uses.pop_back();
28480b57cec5SDimitry Andric 
28490b57cec5SDimitry Andric   // Update RegUses.
28500b57cec5SDimitry Andric   RegUses.swapAndDropUse(LUIdx, Uses.size());
28510b57cec5SDimitry Andric }
28520b57cec5SDimitry Andric 
28530b57cec5SDimitry Andric /// Look for a use distinct from OrigLU which is has a formula that has the same
28540b57cec5SDimitry Andric /// registers as the given formula.
28550b57cec5SDimitry Andric LSRUse *
28560b57cec5SDimitry Andric LSRInstance::FindUseWithSimilarFormula(const Formula &OrigF,
28570b57cec5SDimitry Andric                                        const LSRUse &OrigLU) {
28580b57cec5SDimitry Andric   // Search all uses for the formula. This could be more clever.
285906c3fb27SDimitry Andric   for (LSRUse &LU : Uses) {
28600b57cec5SDimitry Andric     // Check whether this use is close enough to OrigLU, to see whether it's
28610b57cec5SDimitry Andric     // worthwhile looking through its formulae.
28620b57cec5SDimitry Andric     // Ignore ICmpZero uses because they may contain formulae generated by
28630b57cec5SDimitry Andric     // GenerateICmpZeroScales, in which case adding fixup offsets may
28640b57cec5SDimitry Andric     // be invalid.
28650b57cec5SDimitry Andric     if (&LU != &OrigLU &&
28660b57cec5SDimitry Andric         LU.Kind != LSRUse::ICmpZero &&
28670b57cec5SDimitry Andric         LU.Kind == OrigLU.Kind && OrigLU.AccessTy == LU.AccessTy &&
28680b57cec5SDimitry Andric         LU.WidestFixupType == OrigLU.WidestFixupType &&
28690b57cec5SDimitry Andric         LU.HasFormulaWithSameRegs(OrigF)) {
28700b57cec5SDimitry Andric       // Scan through this use's formulae.
28710b57cec5SDimitry Andric       for (const Formula &F : LU.Formulae) {
28720b57cec5SDimitry Andric         // Check to see if this formula has the same registers and symbols
28730b57cec5SDimitry Andric         // as OrigF.
28740b57cec5SDimitry Andric         if (F.BaseRegs == OrigF.BaseRegs &&
28750b57cec5SDimitry Andric             F.ScaledReg == OrigF.ScaledReg &&
28760b57cec5SDimitry Andric             F.BaseGV == OrigF.BaseGV &&
28770b57cec5SDimitry Andric             F.Scale == OrigF.Scale &&
28780b57cec5SDimitry Andric             F.UnfoldedOffset == OrigF.UnfoldedOffset) {
28790fca6ea1SDimitry Andric           if (F.BaseOffset.isZero())
28800b57cec5SDimitry Andric             return &LU;
28810b57cec5SDimitry Andric           // This is the formula where all the registers and symbols matched;
28820b57cec5SDimitry Andric           // there aren't going to be any others. Since we declined it, we
28830b57cec5SDimitry Andric           // can skip the rest of the formulae and proceed to the next LSRUse.
28840b57cec5SDimitry Andric           break;
28850b57cec5SDimitry Andric         }
28860b57cec5SDimitry Andric       }
28870b57cec5SDimitry Andric     }
28880b57cec5SDimitry Andric   }
28890b57cec5SDimitry Andric 
28900b57cec5SDimitry Andric   // Nothing looked good.
28910b57cec5SDimitry Andric   return nullptr;
28920b57cec5SDimitry Andric }
28930b57cec5SDimitry Andric 
28940b57cec5SDimitry Andric void LSRInstance::CollectInterestingTypesAndFactors() {
28950b57cec5SDimitry Andric   SmallSetVector<const SCEV *, 4> Strides;
28960b57cec5SDimitry Andric 
28970b57cec5SDimitry Andric   // Collect interesting types and strides.
28980b57cec5SDimitry Andric   SmallVector<const SCEV *, 4> Worklist;
28990b57cec5SDimitry Andric   for (const IVStrideUse &U : IU) {
29000b57cec5SDimitry Andric     const SCEV *Expr = IU.getExpr(U);
290106c3fb27SDimitry Andric     if (!Expr)
290206c3fb27SDimitry Andric       continue;
29030b57cec5SDimitry Andric 
29040b57cec5SDimitry Andric     // Collect interesting types.
29050b57cec5SDimitry Andric     Types.insert(SE.getEffectiveSCEVType(Expr->getType()));
29060b57cec5SDimitry Andric 
29070b57cec5SDimitry Andric     // Add strides for mentioned loops.
29080b57cec5SDimitry Andric     Worklist.push_back(Expr);
29090b57cec5SDimitry Andric     do {
29100b57cec5SDimitry Andric       const SCEV *S = Worklist.pop_back_val();
29110b57cec5SDimitry Andric       if (const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(S)) {
29120b57cec5SDimitry Andric         if (AR->getLoop() == L)
29130b57cec5SDimitry Andric           Strides.insert(AR->getStepRecurrence(SE));
29140b57cec5SDimitry Andric         Worklist.push_back(AR->getStart());
29150b57cec5SDimitry Andric       } else if (const SCEVAddExpr *Add = dyn_cast<SCEVAddExpr>(S)) {
2916bdd1243dSDimitry Andric         append_range(Worklist, Add->operands());
29170b57cec5SDimitry Andric       }
29180b57cec5SDimitry Andric     } while (!Worklist.empty());
29190b57cec5SDimitry Andric   }
29200b57cec5SDimitry Andric 
29210b57cec5SDimitry Andric   // Compute interesting factors from the set of interesting strides.
29220b57cec5SDimitry Andric   for (SmallSetVector<const SCEV *, 4>::const_iterator
29230b57cec5SDimitry Andric        I = Strides.begin(), E = Strides.end(); I != E; ++I)
29240b57cec5SDimitry Andric     for (SmallSetVector<const SCEV *, 4>::const_iterator NewStrideIter =
29250b57cec5SDimitry Andric          std::next(I); NewStrideIter != E; ++NewStrideIter) {
29260b57cec5SDimitry Andric       const SCEV *OldStride = *I;
29270b57cec5SDimitry Andric       const SCEV *NewStride = *NewStrideIter;
29280b57cec5SDimitry Andric 
29290b57cec5SDimitry Andric       if (SE.getTypeSizeInBits(OldStride->getType()) !=
29300b57cec5SDimitry Andric           SE.getTypeSizeInBits(NewStride->getType())) {
29310b57cec5SDimitry Andric         if (SE.getTypeSizeInBits(OldStride->getType()) >
29320b57cec5SDimitry Andric             SE.getTypeSizeInBits(NewStride->getType()))
29330b57cec5SDimitry Andric           NewStride = SE.getSignExtendExpr(NewStride, OldStride->getType());
29340b57cec5SDimitry Andric         else
29350b57cec5SDimitry Andric           OldStride = SE.getSignExtendExpr(OldStride, NewStride->getType());
29360b57cec5SDimitry Andric       }
29370b57cec5SDimitry Andric       if (const SCEVConstant *Factor =
29380b57cec5SDimitry Andric             dyn_cast_or_null<SCEVConstant>(getExactSDiv(NewStride, OldStride,
29390b57cec5SDimitry Andric                                                         SE, true))) {
294006c3fb27SDimitry Andric         if (Factor->getAPInt().getSignificantBits() <= 64 && !Factor->isZero())
29410b57cec5SDimitry Andric           Factors.insert(Factor->getAPInt().getSExtValue());
29420b57cec5SDimitry Andric       } else if (const SCEVConstant *Factor =
29430b57cec5SDimitry Andric                    dyn_cast_or_null<SCEVConstant>(getExactSDiv(OldStride,
29440b57cec5SDimitry Andric                                                                NewStride,
29450b57cec5SDimitry Andric                                                                SE, true))) {
294606c3fb27SDimitry Andric         if (Factor->getAPInt().getSignificantBits() <= 64 && !Factor->isZero())
29470b57cec5SDimitry Andric           Factors.insert(Factor->getAPInt().getSExtValue());
29480b57cec5SDimitry Andric       }
29490b57cec5SDimitry Andric     }
29500b57cec5SDimitry Andric 
29510b57cec5SDimitry Andric   // If all uses use the same type, don't bother looking for truncation-based
29520b57cec5SDimitry Andric   // reuse.
29530b57cec5SDimitry Andric   if (Types.size() == 1)
29540b57cec5SDimitry Andric     Types.clear();
29550b57cec5SDimitry Andric 
29560b57cec5SDimitry Andric   LLVM_DEBUG(print_factors_and_types(dbgs()));
29570b57cec5SDimitry Andric }
29580b57cec5SDimitry Andric 
29590b57cec5SDimitry Andric /// Helper for CollectChains that finds an IV operand (computed by an AddRec in
29600b57cec5SDimitry Andric /// this loop) within [OI,OE) or returns OE. If IVUsers mapped Instructions to
29610b57cec5SDimitry Andric /// IVStrideUses, we could partially skip this.
29620b57cec5SDimitry Andric static User::op_iterator
29630b57cec5SDimitry Andric findIVOperand(User::op_iterator OI, User::op_iterator OE,
29640b57cec5SDimitry Andric               Loop *L, ScalarEvolution &SE) {
29650b57cec5SDimitry Andric   for(; OI != OE; ++OI) {
29660b57cec5SDimitry Andric     if (Instruction *Oper = dyn_cast<Instruction>(*OI)) {
29670b57cec5SDimitry Andric       if (!SE.isSCEVable(Oper->getType()))
29680b57cec5SDimitry Andric         continue;
29690b57cec5SDimitry Andric 
29700b57cec5SDimitry Andric       if (const SCEVAddRecExpr *AR =
29710b57cec5SDimitry Andric           dyn_cast<SCEVAddRecExpr>(SE.getSCEV(Oper))) {
29720b57cec5SDimitry Andric         if (AR->getLoop() == L)
29730b57cec5SDimitry Andric           break;
29740b57cec5SDimitry Andric       }
29750b57cec5SDimitry Andric     }
29760b57cec5SDimitry Andric   }
29770b57cec5SDimitry Andric   return OI;
29780b57cec5SDimitry Andric }
29790b57cec5SDimitry Andric 
29800b57cec5SDimitry Andric /// IVChain logic must consistently peek base TruncInst operands, so wrap it in
29810b57cec5SDimitry Andric /// a convenient helper.
29820b57cec5SDimitry Andric static Value *getWideOperand(Value *Oper) {
29830b57cec5SDimitry Andric   if (TruncInst *Trunc = dyn_cast<TruncInst>(Oper))
29840b57cec5SDimitry Andric     return Trunc->getOperand(0);
29850b57cec5SDimitry Andric   return Oper;
29860b57cec5SDimitry Andric }
29870b57cec5SDimitry Andric 
29880b57cec5SDimitry Andric /// Return an approximation of this SCEV expression's "base", or NULL for any
29890b57cec5SDimitry Andric /// constant. Returning the expression itself is conservative. Returning a
29900b57cec5SDimitry Andric /// deeper subexpression is more precise and valid as long as it isn't less
29910b57cec5SDimitry Andric /// complex than another subexpression. For expressions involving multiple
29920b57cec5SDimitry Andric /// unscaled values, we need to return the pointer-type SCEVUnknown. This avoids
29930b57cec5SDimitry Andric /// forming chains across objects, such as: PrevOper==a[i], IVOper==b[i],
29940b57cec5SDimitry Andric /// IVInc==b-a.
29950b57cec5SDimitry Andric ///
29960b57cec5SDimitry Andric /// Since SCEVUnknown is the rightmost type, and pointers are the rightmost
29970b57cec5SDimitry Andric /// SCEVUnknown, we simply return the rightmost SCEV operand.
29980b57cec5SDimitry Andric static const SCEV *getExprBase(const SCEV *S) {
29990b57cec5SDimitry Andric   switch (S->getSCEVType()) {
300006c3fb27SDimitry Andric   default: // including scUnknown.
30010b57cec5SDimitry Andric     return S;
30020b57cec5SDimitry Andric   case scConstant:
300306c3fb27SDimitry Andric   case scVScale:
30040b57cec5SDimitry Andric     return nullptr;
30050b57cec5SDimitry Andric   case scTruncate:
30060b57cec5SDimitry Andric     return getExprBase(cast<SCEVTruncateExpr>(S)->getOperand());
30070b57cec5SDimitry Andric   case scZeroExtend:
30080b57cec5SDimitry Andric     return getExprBase(cast<SCEVZeroExtendExpr>(S)->getOperand());
30090b57cec5SDimitry Andric   case scSignExtend:
30100b57cec5SDimitry Andric     return getExprBase(cast<SCEVSignExtendExpr>(S)->getOperand());
30110b57cec5SDimitry Andric   case scAddExpr: {
30120b57cec5SDimitry Andric     // Skip over scaled operands (scMulExpr) to follow add operands as long as
30130b57cec5SDimitry Andric     // there's nothing more complex.
30140b57cec5SDimitry Andric     // FIXME: not sure if we want to recognize negation.
30150b57cec5SDimitry Andric     const SCEVAddExpr *Add = cast<SCEVAddExpr>(S);
3016349cc55cSDimitry Andric     for (const SCEV *SubExpr : reverse(Add->operands())) {
30170b57cec5SDimitry Andric       if (SubExpr->getSCEVType() == scAddExpr)
30180b57cec5SDimitry Andric         return getExprBase(SubExpr);
30190b57cec5SDimitry Andric 
30200b57cec5SDimitry Andric       if (SubExpr->getSCEVType() != scMulExpr)
30210b57cec5SDimitry Andric         return SubExpr;
30220b57cec5SDimitry Andric     }
30230b57cec5SDimitry Andric     return S; // all operands are scaled, be conservative.
30240b57cec5SDimitry Andric   }
30250b57cec5SDimitry Andric   case scAddRecExpr:
30260b57cec5SDimitry Andric     return getExprBase(cast<SCEVAddRecExpr>(S)->getStart());
30270b57cec5SDimitry Andric   }
3028e8d8bef9SDimitry Andric   llvm_unreachable("Unknown SCEV kind!");
30290b57cec5SDimitry Andric }
30300b57cec5SDimitry Andric 
30310b57cec5SDimitry Andric /// Return true if the chain increment is profitable to expand into a loop
30320b57cec5SDimitry Andric /// invariant value, which may require its own register. A profitable chain
30330b57cec5SDimitry Andric /// increment will be an offset relative to the same base. We allow such offsets
30340b57cec5SDimitry Andric /// to potentially be used as chain increment as long as it's not obviously
30350b57cec5SDimitry Andric /// expensive to expand using real instructions.
30360b57cec5SDimitry Andric bool IVChain::isProfitableIncrement(const SCEV *OperExpr,
30370b57cec5SDimitry Andric                                     const SCEV *IncExpr,
30380b57cec5SDimitry Andric                                     ScalarEvolution &SE) {
30390b57cec5SDimitry Andric   // Aggressively form chains when -stress-ivchain.
30400b57cec5SDimitry Andric   if (StressIVChain)
30410b57cec5SDimitry Andric     return true;
30420b57cec5SDimitry Andric 
30430b57cec5SDimitry Andric   // Do not replace a constant offset from IV head with a nonconstant IV
30440b57cec5SDimitry Andric   // increment.
30450b57cec5SDimitry Andric   if (!isa<SCEVConstant>(IncExpr)) {
30460b57cec5SDimitry Andric     const SCEV *HeadExpr = SE.getSCEV(getWideOperand(Incs[0].IVOperand));
30470b57cec5SDimitry Andric     if (isa<SCEVConstant>(SE.getMinusSCEV(OperExpr, HeadExpr)))
30480b57cec5SDimitry Andric       return false;
30490b57cec5SDimitry Andric   }
30500b57cec5SDimitry Andric 
30510b57cec5SDimitry Andric   SmallPtrSet<const SCEV*, 8> Processed;
30520b57cec5SDimitry Andric   return !isHighCostExpansion(IncExpr, Processed, SE);
30530b57cec5SDimitry Andric }
30540b57cec5SDimitry Andric 
30550b57cec5SDimitry Andric /// Return true if the number of registers needed for the chain is estimated to
30560b57cec5SDimitry Andric /// be less than the number required for the individual IV users. First prohibit
30570b57cec5SDimitry Andric /// any IV users that keep the IV live across increments (the Users set should
30580b57cec5SDimitry Andric /// be empty). Next count the number and type of increments in the chain.
30590b57cec5SDimitry Andric ///
30600b57cec5SDimitry Andric /// Chaining IVs can lead to considerable code bloat if ISEL doesn't
30610b57cec5SDimitry Andric /// effectively use postinc addressing modes. Only consider it profitable it the
30620b57cec5SDimitry Andric /// increments can be computed in fewer registers when chained.
30630b57cec5SDimitry Andric ///
30640b57cec5SDimitry Andric /// TODO: Consider IVInc free if it's already used in another chains.
30655ffd83dbSDimitry Andric static bool isProfitableChain(IVChain &Chain,
30665ffd83dbSDimitry Andric                               SmallPtrSetImpl<Instruction *> &Users,
30675ffd83dbSDimitry Andric                               ScalarEvolution &SE,
30685ffd83dbSDimitry Andric                               const TargetTransformInfo &TTI) {
30690b57cec5SDimitry Andric   if (StressIVChain)
30700b57cec5SDimitry Andric     return true;
30710b57cec5SDimitry Andric 
30720b57cec5SDimitry Andric   if (!Chain.hasIncs())
30730b57cec5SDimitry Andric     return false;
30740b57cec5SDimitry Andric 
30750b57cec5SDimitry Andric   if (!Users.empty()) {
30760b57cec5SDimitry Andric     LLVM_DEBUG(dbgs() << "Chain: " << *Chain.Incs[0].UserInst << " users:\n";
30770b57cec5SDimitry Andric                for (Instruction *Inst
30780b57cec5SDimitry Andric                     : Users) { dbgs() << "  " << *Inst << "\n"; });
30790b57cec5SDimitry Andric     return false;
30800b57cec5SDimitry Andric   }
30810b57cec5SDimitry Andric   assert(!Chain.Incs.empty() && "empty IV chains are not allowed");
30820b57cec5SDimitry Andric 
30830b57cec5SDimitry Andric   // The chain itself may require a register, so intialize cost to 1.
30840b57cec5SDimitry Andric   int cost = 1;
30850b57cec5SDimitry Andric 
30860b57cec5SDimitry Andric   // A complete chain likely eliminates the need for keeping the original IV in
30870b57cec5SDimitry Andric   // a register. LSR does not currently know how to form a complete chain unless
30880b57cec5SDimitry Andric   // the header phi already exists.
30890b57cec5SDimitry Andric   if (isa<PHINode>(Chain.tailUserInst())
30900b57cec5SDimitry Andric       && SE.getSCEV(Chain.tailUserInst()) == Chain.Incs[0].IncExpr) {
30910b57cec5SDimitry Andric     --cost;
30920b57cec5SDimitry Andric   }
30930b57cec5SDimitry Andric   const SCEV *LastIncExpr = nullptr;
30940b57cec5SDimitry Andric   unsigned NumConstIncrements = 0;
30950b57cec5SDimitry Andric   unsigned NumVarIncrements = 0;
30960b57cec5SDimitry Andric   unsigned NumReusedIncrements = 0;
30975ffd83dbSDimitry Andric 
30985ffd83dbSDimitry Andric   if (TTI.isProfitableLSRChainElement(Chain.Incs[0].UserInst))
30995ffd83dbSDimitry Andric     return true;
31005ffd83dbSDimitry Andric 
31010b57cec5SDimitry Andric   for (const IVInc &Inc : Chain) {
31025ffd83dbSDimitry Andric     if (TTI.isProfitableLSRChainElement(Inc.UserInst))
31035ffd83dbSDimitry Andric       return true;
31040b57cec5SDimitry Andric     if (Inc.IncExpr->isZero())
31050b57cec5SDimitry Andric       continue;
31060b57cec5SDimitry Andric 
31070b57cec5SDimitry Andric     // Incrementing by zero or some constant is neutral. We assume constants can
31080b57cec5SDimitry Andric     // be folded into an addressing mode or an add's immediate operand.
31090b57cec5SDimitry Andric     if (isa<SCEVConstant>(Inc.IncExpr)) {
31100b57cec5SDimitry Andric       ++NumConstIncrements;
31110b57cec5SDimitry Andric       continue;
31120b57cec5SDimitry Andric     }
31130b57cec5SDimitry Andric 
31140b57cec5SDimitry Andric     if (Inc.IncExpr == LastIncExpr)
31150b57cec5SDimitry Andric       ++NumReusedIncrements;
31160b57cec5SDimitry Andric     else
31170b57cec5SDimitry Andric       ++NumVarIncrements;
31180b57cec5SDimitry Andric 
31190b57cec5SDimitry Andric     LastIncExpr = Inc.IncExpr;
31200b57cec5SDimitry Andric   }
31210b57cec5SDimitry Andric   // An IV chain with a single increment is handled by LSR's postinc
31220b57cec5SDimitry Andric   // uses. However, a chain with multiple increments requires keeping the IV's
31230b57cec5SDimitry Andric   // value live longer than it needs to be if chained.
31240b57cec5SDimitry Andric   if (NumConstIncrements > 1)
31250b57cec5SDimitry Andric     --cost;
31260b57cec5SDimitry Andric 
31270b57cec5SDimitry Andric   // Materializing increment expressions in the preheader that didn't exist in
31280b57cec5SDimitry Andric   // the original code may cost a register. For example, sign-extended array
31290b57cec5SDimitry Andric   // indices can produce ridiculous increments like this:
31300b57cec5SDimitry Andric   // IV + ((sext i32 (2 * %s) to i64) + (-1 * (sext i32 %s to i64)))
31310b57cec5SDimitry Andric   cost += NumVarIncrements;
31320b57cec5SDimitry Andric 
31330b57cec5SDimitry Andric   // Reusing variable increments likely saves a register to hold the multiple of
31340b57cec5SDimitry Andric   // the stride.
31350b57cec5SDimitry Andric   cost -= NumReusedIncrements;
31360b57cec5SDimitry Andric 
31370b57cec5SDimitry Andric   LLVM_DEBUG(dbgs() << "Chain: " << *Chain.Incs[0].UserInst << " Cost: " << cost
31380b57cec5SDimitry Andric                     << "\n");
31390b57cec5SDimitry Andric 
31400b57cec5SDimitry Andric   return cost < 0;
31410b57cec5SDimitry Andric }
31420b57cec5SDimitry Andric 
31430b57cec5SDimitry Andric /// Add this IV user to an existing chain or make it the head of a new chain.
31440b57cec5SDimitry Andric void LSRInstance::ChainInstruction(Instruction *UserInst, Instruction *IVOper,
31450b57cec5SDimitry Andric                                    SmallVectorImpl<ChainUsers> &ChainUsersVec) {
31460b57cec5SDimitry Andric   // When IVs are used as types of varying widths, they are generally converted
31470b57cec5SDimitry Andric   // to a wider type with some uses remaining narrow under a (free) trunc.
31480b57cec5SDimitry Andric   Value *const NextIV = getWideOperand(IVOper);
31490b57cec5SDimitry Andric   const SCEV *const OperExpr = SE.getSCEV(NextIV);
31500b57cec5SDimitry Andric   const SCEV *const OperExprBase = getExprBase(OperExpr);
31510b57cec5SDimitry Andric 
31520b57cec5SDimitry Andric   // Visit all existing chains. Check if its IVOper can be computed as a
31530b57cec5SDimitry Andric   // profitable loop invariant increment from the last link in the Chain.
31540b57cec5SDimitry Andric   unsigned ChainIdx = 0, NChains = IVChainVec.size();
31550b57cec5SDimitry Andric   const SCEV *LastIncExpr = nullptr;
31560b57cec5SDimitry Andric   for (; ChainIdx < NChains; ++ChainIdx) {
31570b57cec5SDimitry Andric     IVChain &Chain = IVChainVec[ChainIdx];
31580b57cec5SDimitry Andric 
31590b57cec5SDimitry Andric     // Prune the solution space aggressively by checking that both IV operands
31600b57cec5SDimitry Andric     // are expressions that operate on the same unscaled SCEVUnknown. This
31610b57cec5SDimitry Andric     // "base" will be canceled by the subsequent getMinusSCEV call. Checking
31620b57cec5SDimitry Andric     // first avoids creating extra SCEV expressions.
31630b57cec5SDimitry Andric     if (!StressIVChain && Chain.ExprBase != OperExprBase)
31640b57cec5SDimitry Andric       continue;
31650b57cec5SDimitry Andric 
31660b57cec5SDimitry Andric     Value *PrevIV = getWideOperand(Chain.Incs.back().IVOperand);
31675f757f3fSDimitry Andric     if (PrevIV->getType() != NextIV->getType())
31680b57cec5SDimitry Andric       continue;
31690b57cec5SDimitry Andric 
31700b57cec5SDimitry Andric     // A phi node terminates a chain.
31710b57cec5SDimitry Andric     if (isa<PHINode>(UserInst) && isa<PHINode>(Chain.tailUserInst()))
31720b57cec5SDimitry Andric       continue;
31730b57cec5SDimitry Andric 
31740b57cec5SDimitry Andric     // The increment must be loop-invariant so it can be kept in a register.
31750b57cec5SDimitry Andric     const SCEV *PrevExpr = SE.getSCEV(PrevIV);
31760b57cec5SDimitry Andric     const SCEV *IncExpr = SE.getMinusSCEV(OperExpr, PrevExpr);
3177fe6060f1SDimitry Andric     if (isa<SCEVCouldNotCompute>(IncExpr) || !SE.isLoopInvariant(IncExpr, L))
31780b57cec5SDimitry Andric       continue;
31790b57cec5SDimitry Andric 
31800b57cec5SDimitry Andric     if (Chain.isProfitableIncrement(OperExpr, IncExpr, SE)) {
31810b57cec5SDimitry Andric       LastIncExpr = IncExpr;
31820b57cec5SDimitry Andric       break;
31830b57cec5SDimitry Andric     }
31840b57cec5SDimitry Andric   }
31850b57cec5SDimitry Andric   // If we haven't found a chain, create a new one, unless we hit the max. Don't
31860b57cec5SDimitry Andric   // bother for phi nodes, because they must be last in the chain.
31870b57cec5SDimitry Andric   if (ChainIdx == NChains) {
31880b57cec5SDimitry Andric     if (isa<PHINode>(UserInst))
31890b57cec5SDimitry Andric       return;
31900b57cec5SDimitry Andric     if (NChains >= MaxChains && !StressIVChain) {
31910b57cec5SDimitry Andric       LLVM_DEBUG(dbgs() << "IV Chain Limit\n");
31920b57cec5SDimitry Andric       return;
31930b57cec5SDimitry Andric     }
31940b57cec5SDimitry Andric     LastIncExpr = OperExpr;
31950b57cec5SDimitry Andric     // IVUsers may have skipped over sign/zero extensions. We don't currently
31960b57cec5SDimitry Andric     // attempt to form chains involving extensions unless they can be hoisted
31970b57cec5SDimitry Andric     // into this loop's AddRec.
31980b57cec5SDimitry Andric     if (!isa<SCEVAddRecExpr>(LastIncExpr))
31990b57cec5SDimitry Andric       return;
32000b57cec5SDimitry Andric     ++NChains;
32010b57cec5SDimitry Andric     IVChainVec.push_back(IVChain(IVInc(UserInst, IVOper, LastIncExpr),
32020b57cec5SDimitry Andric                                  OperExprBase));
32030b57cec5SDimitry Andric     ChainUsersVec.resize(NChains);
32040b57cec5SDimitry Andric     LLVM_DEBUG(dbgs() << "IV Chain#" << ChainIdx << " Head: (" << *UserInst
32050b57cec5SDimitry Andric                       << ") IV=" << *LastIncExpr << "\n");
32060b57cec5SDimitry Andric   } else {
32070b57cec5SDimitry Andric     LLVM_DEBUG(dbgs() << "IV Chain#" << ChainIdx << "  Inc: (" << *UserInst
32080b57cec5SDimitry Andric                       << ") IV+" << *LastIncExpr << "\n");
32090b57cec5SDimitry Andric     // Add this IV user to the end of the chain.
32100b57cec5SDimitry Andric     IVChainVec[ChainIdx].add(IVInc(UserInst, IVOper, LastIncExpr));
32110b57cec5SDimitry Andric   }
32120b57cec5SDimitry Andric   IVChain &Chain = IVChainVec[ChainIdx];
32130b57cec5SDimitry Andric 
32140b57cec5SDimitry Andric   SmallPtrSet<Instruction*,4> &NearUsers = ChainUsersVec[ChainIdx].NearUsers;
32150b57cec5SDimitry Andric   // This chain's NearUsers become FarUsers.
32160b57cec5SDimitry Andric   if (!LastIncExpr->isZero()) {
32170b57cec5SDimitry Andric     ChainUsersVec[ChainIdx].FarUsers.insert(NearUsers.begin(),
32180b57cec5SDimitry Andric                                             NearUsers.end());
32190b57cec5SDimitry Andric     NearUsers.clear();
32200b57cec5SDimitry Andric   }
32210b57cec5SDimitry Andric 
32220b57cec5SDimitry Andric   // All other uses of IVOperand become near uses of the chain.
32230b57cec5SDimitry Andric   // We currently ignore intermediate values within SCEV expressions, assuming
32240b57cec5SDimitry Andric   // they will eventually be used be the current chain, or can be computed
32250b57cec5SDimitry Andric   // from one of the chain increments. To be more precise we could
32260b57cec5SDimitry Andric   // transitively follow its user and only add leaf IV users to the set.
32270b57cec5SDimitry Andric   for (User *U : IVOper->users()) {
32280b57cec5SDimitry Andric     Instruction *OtherUse = dyn_cast<Instruction>(U);
32290b57cec5SDimitry Andric     if (!OtherUse)
32300b57cec5SDimitry Andric       continue;
32310b57cec5SDimitry Andric     // Uses in the chain will no longer be uses if the chain is formed.
32320b57cec5SDimitry Andric     // Include the head of the chain in this iteration (not Chain.begin()).
32330b57cec5SDimitry Andric     IVChain::const_iterator IncIter = Chain.Incs.begin();
32340b57cec5SDimitry Andric     IVChain::const_iterator IncEnd = Chain.Incs.end();
32350b57cec5SDimitry Andric     for( ; IncIter != IncEnd; ++IncIter) {
32360b57cec5SDimitry Andric       if (IncIter->UserInst == OtherUse)
32370b57cec5SDimitry Andric         break;
32380b57cec5SDimitry Andric     }
32390b57cec5SDimitry Andric     if (IncIter != IncEnd)
32400b57cec5SDimitry Andric       continue;
32410b57cec5SDimitry Andric 
32420b57cec5SDimitry Andric     if (SE.isSCEVable(OtherUse->getType())
32430b57cec5SDimitry Andric         && !isa<SCEVUnknown>(SE.getSCEV(OtherUse))
32440b57cec5SDimitry Andric         && IU.isIVUserOrOperand(OtherUse)) {
32450b57cec5SDimitry Andric       continue;
32460b57cec5SDimitry Andric     }
32470b57cec5SDimitry Andric     NearUsers.insert(OtherUse);
32480b57cec5SDimitry Andric   }
32490b57cec5SDimitry Andric 
32500b57cec5SDimitry Andric   // Since this user is part of the chain, it's no longer considered a use
32510b57cec5SDimitry Andric   // of the chain.
32520b57cec5SDimitry Andric   ChainUsersVec[ChainIdx].FarUsers.erase(UserInst);
32530b57cec5SDimitry Andric }
32540b57cec5SDimitry Andric 
32550b57cec5SDimitry Andric /// Populate the vector of Chains.
32560b57cec5SDimitry Andric ///
32570b57cec5SDimitry Andric /// This decreases ILP at the architecture level. Targets with ample registers,
32580b57cec5SDimitry Andric /// multiple memory ports, and no register renaming probably don't want
32590b57cec5SDimitry Andric /// this. However, such targets should probably disable LSR altogether.
32600b57cec5SDimitry Andric ///
32610b57cec5SDimitry Andric /// The job of LSR is to make a reasonable choice of induction variables across
32620b57cec5SDimitry Andric /// the loop. Subsequent passes can easily "unchain" computation exposing more
32630b57cec5SDimitry Andric /// ILP *within the loop* if the target wants it.
32640b57cec5SDimitry Andric ///
32650b57cec5SDimitry Andric /// Finding the best IV chain is potentially a scheduling problem. Since LSR
32660b57cec5SDimitry Andric /// will not reorder memory operations, it will recognize this as a chain, but
32670b57cec5SDimitry Andric /// will generate redundant IV increments. Ideally this would be corrected later
32680b57cec5SDimitry Andric /// by a smart scheduler:
32690b57cec5SDimitry Andric ///        = A[i]
32700b57cec5SDimitry Andric ///        = A[i+x]
32710b57cec5SDimitry Andric /// A[i]   =
32720b57cec5SDimitry Andric /// A[i+x] =
32730b57cec5SDimitry Andric ///
32740b57cec5SDimitry Andric /// TODO: Walk the entire domtree within this loop, not just the path to the
32750b57cec5SDimitry Andric /// loop latch. This will discover chains on side paths, but requires
32760b57cec5SDimitry Andric /// maintaining multiple copies of the Chains state.
32770b57cec5SDimitry Andric void LSRInstance::CollectChains() {
32780b57cec5SDimitry Andric   LLVM_DEBUG(dbgs() << "Collecting IV Chains.\n");
32790b57cec5SDimitry Andric   SmallVector<ChainUsers, 8> ChainUsersVec;
32800b57cec5SDimitry Andric 
32810b57cec5SDimitry Andric   SmallVector<BasicBlock *,8> LatchPath;
32820b57cec5SDimitry Andric   BasicBlock *LoopHeader = L->getHeader();
32830b57cec5SDimitry Andric   for (DomTreeNode *Rung = DT.getNode(L->getLoopLatch());
32840b57cec5SDimitry Andric        Rung->getBlock() != LoopHeader; Rung = Rung->getIDom()) {
32850b57cec5SDimitry Andric     LatchPath.push_back(Rung->getBlock());
32860b57cec5SDimitry Andric   }
32870b57cec5SDimitry Andric   LatchPath.push_back(LoopHeader);
32880b57cec5SDimitry Andric 
32890b57cec5SDimitry Andric   // Walk the instruction stream from the loop header to the loop latch.
32900b57cec5SDimitry Andric   for (BasicBlock *BB : reverse(LatchPath)) {
32910b57cec5SDimitry Andric     for (Instruction &I : *BB) {
32920b57cec5SDimitry Andric       // Skip instructions that weren't seen by IVUsers analysis.
32930b57cec5SDimitry Andric       if (isa<PHINode>(I) || !IU.isIVUserOrOperand(&I))
32940b57cec5SDimitry Andric         continue;
32950b57cec5SDimitry Andric 
32960b57cec5SDimitry Andric       // Ignore users that are part of a SCEV expression. This way we only
32970b57cec5SDimitry Andric       // consider leaf IV Users. This effectively rediscovers a portion of
32980b57cec5SDimitry Andric       // IVUsers analysis but in program order this time.
32990b57cec5SDimitry Andric       if (SE.isSCEVable(I.getType()) && !isa<SCEVUnknown>(SE.getSCEV(&I)))
33000b57cec5SDimitry Andric           continue;
33010b57cec5SDimitry Andric 
33020b57cec5SDimitry Andric       // Remove this instruction from any NearUsers set it may be in.
33030b57cec5SDimitry Andric       for (unsigned ChainIdx = 0, NChains = IVChainVec.size();
33040b57cec5SDimitry Andric            ChainIdx < NChains; ++ChainIdx) {
33050b57cec5SDimitry Andric         ChainUsersVec[ChainIdx].NearUsers.erase(&I);
33060b57cec5SDimitry Andric       }
33070b57cec5SDimitry Andric       // Search for operands that can be chained.
33080b57cec5SDimitry Andric       SmallPtrSet<Instruction*, 4> UniqueOperands;
33090b57cec5SDimitry Andric       User::op_iterator IVOpEnd = I.op_end();
33100b57cec5SDimitry Andric       User::op_iterator IVOpIter = findIVOperand(I.op_begin(), IVOpEnd, L, SE);
33110b57cec5SDimitry Andric       while (IVOpIter != IVOpEnd) {
33120b57cec5SDimitry Andric         Instruction *IVOpInst = cast<Instruction>(*IVOpIter);
33130b57cec5SDimitry Andric         if (UniqueOperands.insert(IVOpInst).second)
33140b57cec5SDimitry Andric           ChainInstruction(&I, IVOpInst, ChainUsersVec);
33150b57cec5SDimitry Andric         IVOpIter = findIVOperand(std::next(IVOpIter), IVOpEnd, L, SE);
33160b57cec5SDimitry Andric       }
33170b57cec5SDimitry Andric     } // Continue walking down the instructions.
33180b57cec5SDimitry Andric   } // Continue walking down the domtree.
33190b57cec5SDimitry Andric   // Visit phi backedges to determine if the chain can generate the IV postinc.
33200b57cec5SDimitry Andric   for (PHINode &PN : L->getHeader()->phis()) {
33210b57cec5SDimitry Andric     if (!SE.isSCEVable(PN.getType()))
33220b57cec5SDimitry Andric       continue;
33230b57cec5SDimitry Andric 
33240b57cec5SDimitry Andric     Instruction *IncV =
33250b57cec5SDimitry Andric         dyn_cast<Instruction>(PN.getIncomingValueForBlock(L->getLoopLatch()));
33260b57cec5SDimitry Andric     if (IncV)
33270b57cec5SDimitry Andric       ChainInstruction(&PN, IncV, ChainUsersVec);
33280b57cec5SDimitry Andric   }
33290b57cec5SDimitry Andric   // Remove any unprofitable chains.
33300b57cec5SDimitry Andric   unsigned ChainIdx = 0;
33310b57cec5SDimitry Andric   for (unsigned UsersIdx = 0, NChains = IVChainVec.size();
33320b57cec5SDimitry Andric        UsersIdx < NChains; ++UsersIdx) {
33330b57cec5SDimitry Andric     if (!isProfitableChain(IVChainVec[UsersIdx],
33345ffd83dbSDimitry Andric                            ChainUsersVec[UsersIdx].FarUsers, SE, TTI))
33350b57cec5SDimitry Andric       continue;
33360b57cec5SDimitry Andric     // Preserve the chain at UsesIdx.
33370b57cec5SDimitry Andric     if (ChainIdx != UsersIdx)
33380b57cec5SDimitry Andric       IVChainVec[ChainIdx] = IVChainVec[UsersIdx];
33390b57cec5SDimitry Andric     FinalizeChain(IVChainVec[ChainIdx]);
33400b57cec5SDimitry Andric     ++ChainIdx;
33410b57cec5SDimitry Andric   }
33420b57cec5SDimitry Andric   IVChainVec.resize(ChainIdx);
33430b57cec5SDimitry Andric }
33440b57cec5SDimitry Andric 
33450b57cec5SDimitry Andric void LSRInstance::FinalizeChain(IVChain &Chain) {
33460b57cec5SDimitry Andric   assert(!Chain.Incs.empty() && "empty IV chains are not allowed");
33470b57cec5SDimitry Andric   LLVM_DEBUG(dbgs() << "Final Chain: " << *Chain.Incs[0].UserInst << "\n");
33480b57cec5SDimitry Andric 
33490b57cec5SDimitry Andric   for (const IVInc &Inc : Chain) {
33500b57cec5SDimitry Andric     LLVM_DEBUG(dbgs() << "        Inc: " << *Inc.UserInst << "\n");
33510b57cec5SDimitry Andric     auto UseI = find(Inc.UserInst->operands(), Inc.IVOperand);
33520b57cec5SDimitry Andric     assert(UseI != Inc.UserInst->op_end() && "cannot find IV operand");
33530b57cec5SDimitry Andric     IVIncSet.insert(UseI);
33540b57cec5SDimitry Andric   }
33550b57cec5SDimitry Andric }
33560b57cec5SDimitry Andric 
33570b57cec5SDimitry Andric /// Return true if the IVInc can be folded into an addressing mode.
33580b57cec5SDimitry Andric static bool canFoldIVIncExpr(const SCEV *IncExpr, Instruction *UserInst,
33590b57cec5SDimitry Andric                              Value *Operand, const TargetTransformInfo &TTI) {
33600b57cec5SDimitry Andric   const SCEVConstant *IncConst = dyn_cast<SCEVConstant>(IncExpr);
33610fca6ea1SDimitry Andric   Immediate IncOffset = Immediate::getZero();
33620fca6ea1SDimitry Andric   if (IncConst) {
33630fca6ea1SDimitry Andric     if (IncConst && IncConst->getAPInt().getSignificantBits() > 64)
33640b57cec5SDimitry Andric       return false;
33650fca6ea1SDimitry Andric     IncOffset = Immediate::getFixed(IncConst->getValue()->getSExtValue());
33660fca6ea1SDimitry Andric   } else {
33670fca6ea1SDimitry Andric     // Look for mul(vscale, constant), to detect a scalable offset.
33680fca6ea1SDimitry Andric     auto *IncVScale = dyn_cast<SCEVMulExpr>(IncExpr);
33690fca6ea1SDimitry Andric     if (!IncVScale || IncVScale->getNumOperands() != 2 ||
33700fca6ea1SDimitry Andric         !isa<SCEVVScale>(IncVScale->getOperand(1)))
33710fca6ea1SDimitry Andric       return false;
33720fca6ea1SDimitry Andric     auto *Scale = dyn_cast<SCEVConstant>(IncVScale->getOperand(0));
33730fca6ea1SDimitry Andric     if (!Scale || Scale->getType()->getScalarSizeInBits() > 64)
33740fca6ea1SDimitry Andric       return false;
33750fca6ea1SDimitry Andric     IncOffset = Immediate::getScalable(Scale->getValue()->getSExtValue());
33760fca6ea1SDimitry Andric   }
33770b57cec5SDimitry Andric 
33780fca6ea1SDimitry Andric   if (!isAddressUse(TTI, UserInst, Operand))
33790b57cec5SDimitry Andric     return false;
33800b57cec5SDimitry Andric 
33810b57cec5SDimitry Andric   MemAccessTy AccessTy = getAccessType(TTI, UserInst, Operand);
33820b57cec5SDimitry Andric   if (!isAlwaysFoldable(TTI, LSRUse::Address, AccessTy, /*BaseGV=*/nullptr,
33830b57cec5SDimitry Andric                         IncOffset, /*HasBaseReg=*/false))
33840b57cec5SDimitry Andric     return false;
33850b57cec5SDimitry Andric 
33860b57cec5SDimitry Andric   return true;
33870b57cec5SDimitry Andric }
33880b57cec5SDimitry Andric 
33890b57cec5SDimitry Andric /// Generate an add or subtract for each IVInc in a chain to materialize the IV
33900b57cec5SDimitry Andric /// user's operand from the previous IV user's operand.
3391fcaf7f86SDimitry Andric void LSRInstance::GenerateIVChain(const IVChain &Chain,
33920b57cec5SDimitry Andric                                   SmallVectorImpl<WeakTrackingVH> &DeadInsts) {
33930b57cec5SDimitry Andric   // Find the new IVOperand for the head of the chain. It may have been replaced
33940b57cec5SDimitry Andric   // by LSR.
33950b57cec5SDimitry Andric   const IVInc &Head = Chain.Incs[0];
33960b57cec5SDimitry Andric   User::op_iterator IVOpEnd = Head.UserInst->op_end();
33970b57cec5SDimitry Andric   // findIVOperand returns IVOpEnd if it can no longer find a valid IV user.
33980b57cec5SDimitry Andric   User::op_iterator IVOpIter = findIVOperand(Head.UserInst->op_begin(),
33990b57cec5SDimitry Andric                                              IVOpEnd, L, SE);
34000b57cec5SDimitry Andric   Value *IVSrc = nullptr;
34010b57cec5SDimitry Andric   while (IVOpIter != IVOpEnd) {
34020b57cec5SDimitry Andric     IVSrc = getWideOperand(*IVOpIter);
34030b57cec5SDimitry Andric 
34040b57cec5SDimitry Andric     // If this operand computes the expression that the chain needs, we may use
34050b57cec5SDimitry Andric     // it. (Check this after setting IVSrc which is used below.)
34060b57cec5SDimitry Andric     //
34070b57cec5SDimitry Andric     // Note that if Head.IncExpr is wider than IVSrc, then this phi is too
34080b57cec5SDimitry Andric     // narrow for the chain, so we can no longer use it. We do allow using a
34090b57cec5SDimitry Andric     // wider phi, assuming the LSR checked for free truncation. In that case we
34100b57cec5SDimitry Andric     // should already have a truncate on this operand such that
34110b57cec5SDimitry Andric     // getSCEV(IVSrc) == IncExpr.
34120b57cec5SDimitry Andric     if (SE.getSCEV(*IVOpIter) == Head.IncExpr
34130b57cec5SDimitry Andric         || SE.getSCEV(IVSrc) == Head.IncExpr) {
34140b57cec5SDimitry Andric       break;
34150b57cec5SDimitry Andric     }
34160b57cec5SDimitry Andric     IVOpIter = findIVOperand(std::next(IVOpIter), IVOpEnd, L, SE);
34170b57cec5SDimitry Andric   }
34180b57cec5SDimitry Andric   if (IVOpIter == IVOpEnd) {
34190b57cec5SDimitry Andric     // Gracefully give up on this chain.
34200b57cec5SDimitry Andric     LLVM_DEBUG(dbgs() << "Concealed chain head: " << *Head.UserInst << "\n");
34210b57cec5SDimitry Andric     return;
34220b57cec5SDimitry Andric   }
34238bcb0991SDimitry Andric   assert(IVSrc && "Failed to find IV chain source");
34240b57cec5SDimitry Andric 
34250b57cec5SDimitry Andric   LLVM_DEBUG(dbgs() << "Generate chain at: " << *IVSrc << "\n");
34260b57cec5SDimitry Andric   Type *IVTy = IVSrc->getType();
34270b57cec5SDimitry Andric   Type *IntTy = SE.getEffectiveSCEVType(IVTy);
34280b57cec5SDimitry Andric   const SCEV *LeftOverExpr = nullptr;
34290fca6ea1SDimitry Andric   const SCEV *Accum = SE.getZero(IntTy);
34300fca6ea1SDimitry Andric   SmallVector<std::pair<const SCEV *, Value *>> Bases;
34310fca6ea1SDimitry Andric   Bases.emplace_back(Accum, IVSrc);
34320fca6ea1SDimitry Andric 
34330b57cec5SDimitry Andric   for (const IVInc &Inc : Chain) {
34340b57cec5SDimitry Andric     Instruction *InsertPt = Inc.UserInst;
34350b57cec5SDimitry Andric     if (isa<PHINode>(InsertPt))
34360b57cec5SDimitry Andric       InsertPt = L->getLoopLatch()->getTerminator();
34370b57cec5SDimitry Andric 
34380b57cec5SDimitry Andric     // IVOper will replace the current IV User's operand. IVSrc is the IV
34390b57cec5SDimitry Andric     // value currently held in a register.
34400b57cec5SDimitry Andric     Value *IVOper = IVSrc;
34410b57cec5SDimitry Andric     if (!Inc.IncExpr->isZero()) {
34420b57cec5SDimitry Andric       // IncExpr was the result of subtraction of two narrow values, so must
34430b57cec5SDimitry Andric       // be signed.
34440b57cec5SDimitry Andric       const SCEV *IncExpr = SE.getNoopOrSignExtend(Inc.IncExpr, IntTy);
34450fca6ea1SDimitry Andric       Accum = SE.getAddExpr(Accum, IncExpr);
34460b57cec5SDimitry Andric       LeftOverExpr = LeftOverExpr ?
34470b57cec5SDimitry Andric         SE.getAddExpr(LeftOverExpr, IncExpr) : IncExpr;
34480b57cec5SDimitry Andric     }
34490fca6ea1SDimitry Andric 
34500fca6ea1SDimitry Andric     // Look through each base to see if any can produce a nice addressing mode.
34510fca6ea1SDimitry Andric     bool FoundBase = false;
34520fca6ea1SDimitry Andric     for (auto [MapScev, MapIVOper] : reverse(Bases)) {
34530fca6ea1SDimitry Andric       const SCEV *Remainder = SE.getMinusSCEV(Accum, MapScev);
34540fca6ea1SDimitry Andric       if (canFoldIVIncExpr(Remainder, Inc.UserInst, Inc.IVOperand, TTI)) {
34550fca6ea1SDimitry Andric         if (!Remainder->isZero()) {
34560fca6ea1SDimitry Andric           Rewriter.clearPostInc();
34570fca6ea1SDimitry Andric           Value *IncV = Rewriter.expandCodeFor(Remainder, IntTy, InsertPt);
34580fca6ea1SDimitry Andric           const SCEV *IVOperExpr =
34590fca6ea1SDimitry Andric               SE.getAddExpr(SE.getUnknown(MapIVOper), SE.getUnknown(IncV));
34600fca6ea1SDimitry Andric           IVOper = Rewriter.expandCodeFor(IVOperExpr, IVTy, InsertPt);
34610fca6ea1SDimitry Andric         } else {
34620fca6ea1SDimitry Andric           IVOper = MapIVOper;
34630fca6ea1SDimitry Andric         }
34640fca6ea1SDimitry Andric 
34650fca6ea1SDimitry Andric         FoundBase = true;
34660fca6ea1SDimitry Andric         break;
34670fca6ea1SDimitry Andric       }
34680fca6ea1SDimitry Andric     }
34690fca6ea1SDimitry Andric     if (!FoundBase && LeftOverExpr && !LeftOverExpr->isZero()) {
34700b57cec5SDimitry Andric       // Expand the IV increment.
34710b57cec5SDimitry Andric       Rewriter.clearPostInc();
34720b57cec5SDimitry Andric       Value *IncV = Rewriter.expandCodeFor(LeftOverExpr, IntTy, InsertPt);
34730b57cec5SDimitry Andric       const SCEV *IVOperExpr = SE.getAddExpr(SE.getUnknown(IVSrc),
34740b57cec5SDimitry Andric                                              SE.getUnknown(IncV));
34750b57cec5SDimitry Andric       IVOper = Rewriter.expandCodeFor(IVOperExpr, IVTy, InsertPt);
34760b57cec5SDimitry Andric 
34770b57cec5SDimitry Andric       // If an IV increment can't be folded, use it as the next IV value.
34780b57cec5SDimitry Andric       if (!canFoldIVIncExpr(LeftOverExpr, Inc.UserInst, Inc.IVOperand, TTI)) {
34790b57cec5SDimitry Andric         assert(IVTy == IVOper->getType() && "inconsistent IV increment type");
34800fca6ea1SDimitry Andric         Bases.emplace_back(Accum, IVOper);
34810b57cec5SDimitry Andric         IVSrc = IVOper;
34820b57cec5SDimitry Andric         LeftOverExpr = nullptr;
34830b57cec5SDimitry Andric       }
34840b57cec5SDimitry Andric     }
34850b57cec5SDimitry Andric     Type *OperTy = Inc.IVOperand->getType();
34860b57cec5SDimitry Andric     if (IVTy != OperTy) {
34870b57cec5SDimitry Andric       assert(SE.getTypeSizeInBits(IVTy) >= SE.getTypeSizeInBits(OperTy) &&
34880b57cec5SDimitry Andric              "cannot extend a chained IV");
34890b57cec5SDimitry Andric       IRBuilder<> Builder(InsertPt);
34900b57cec5SDimitry Andric       IVOper = Builder.CreateTruncOrBitCast(IVOper, OperTy, "lsr.chain");
34910b57cec5SDimitry Andric     }
34920b57cec5SDimitry Andric     Inc.UserInst->replaceUsesOfWith(Inc.IVOperand, IVOper);
34935ffd83dbSDimitry Andric     if (auto *OperandIsInstr = dyn_cast<Instruction>(Inc.IVOperand))
34945ffd83dbSDimitry Andric       DeadInsts.emplace_back(OperandIsInstr);
34950b57cec5SDimitry Andric   }
34960b57cec5SDimitry Andric   // If LSR created a new, wider phi, we may also replace its postinc. We only
34970b57cec5SDimitry Andric   // do this if we also found a wide value for the head of the chain.
34980b57cec5SDimitry Andric   if (isa<PHINode>(Chain.tailUserInst())) {
34990b57cec5SDimitry Andric     for (PHINode &Phi : L->getHeader()->phis()) {
35005f757f3fSDimitry Andric       if (Phi.getType() != IVSrc->getType())
35010b57cec5SDimitry Andric         continue;
35020b57cec5SDimitry Andric       Instruction *PostIncV = dyn_cast<Instruction>(
35030b57cec5SDimitry Andric           Phi.getIncomingValueForBlock(L->getLoopLatch()));
35040b57cec5SDimitry Andric       if (!PostIncV || (SE.getSCEV(PostIncV) != SE.getSCEV(IVSrc)))
35050b57cec5SDimitry Andric         continue;
35060b57cec5SDimitry Andric       Value *IVOper = IVSrc;
35070b57cec5SDimitry Andric       Type *PostIncTy = PostIncV->getType();
35080b57cec5SDimitry Andric       if (IVTy != PostIncTy) {
35090b57cec5SDimitry Andric         assert(PostIncTy->isPointerTy() && "mixing int/ptr IV types");
35100b57cec5SDimitry Andric         IRBuilder<> Builder(L->getLoopLatch()->getTerminator());
35110b57cec5SDimitry Andric         Builder.SetCurrentDebugLocation(PostIncV->getDebugLoc());
35120b57cec5SDimitry Andric         IVOper = Builder.CreatePointerCast(IVSrc, PostIncTy, "lsr.chain");
35130b57cec5SDimitry Andric       }
35140b57cec5SDimitry Andric       Phi.replaceUsesOfWith(PostIncV, IVOper);
35150b57cec5SDimitry Andric       DeadInsts.emplace_back(PostIncV);
35160b57cec5SDimitry Andric     }
35170b57cec5SDimitry Andric   }
35180b57cec5SDimitry Andric }
35190b57cec5SDimitry Andric 
35200b57cec5SDimitry Andric void LSRInstance::CollectFixupsAndInitialFormulae() {
35210b57cec5SDimitry Andric   BranchInst *ExitBranch = nullptr;
35225ffd83dbSDimitry Andric   bool SaveCmp = TTI.canSaveCmp(L, &ExitBranch, &SE, &LI, &DT, &AC, &TLI);
35230b57cec5SDimitry Andric 
3524bdd1243dSDimitry Andric   // For calculating baseline cost
3525bdd1243dSDimitry Andric   SmallPtrSet<const SCEV *, 16> Regs;
3526bdd1243dSDimitry Andric   DenseSet<const SCEV *> VisitedRegs;
3527bdd1243dSDimitry Andric   DenseSet<size_t> VisitedLSRUse;
3528bdd1243dSDimitry Andric 
35290b57cec5SDimitry Andric   for (const IVStrideUse &U : IU) {
35300b57cec5SDimitry Andric     Instruction *UserInst = U.getUser();
35310b57cec5SDimitry Andric     // Skip IV users that are part of profitable IV Chains.
35320b57cec5SDimitry Andric     User::op_iterator UseI =
35330b57cec5SDimitry Andric         find(UserInst->operands(), U.getOperandValToReplace());
35340b57cec5SDimitry Andric     assert(UseI != UserInst->op_end() && "cannot find IV operand");
35350b57cec5SDimitry Andric     if (IVIncSet.count(UseI)) {
35360b57cec5SDimitry Andric       LLVM_DEBUG(dbgs() << "Use is in profitable chain: " << **UseI << '\n');
35370b57cec5SDimitry Andric       continue;
35380b57cec5SDimitry Andric     }
35390b57cec5SDimitry Andric 
35400b57cec5SDimitry Andric     LSRUse::KindType Kind = LSRUse::Basic;
35410b57cec5SDimitry Andric     MemAccessTy AccessTy;
35420b57cec5SDimitry Andric     if (isAddressUse(TTI, UserInst, U.getOperandValToReplace())) {
35430b57cec5SDimitry Andric       Kind = LSRUse::Address;
35440b57cec5SDimitry Andric       AccessTy = getAccessType(TTI, UserInst, U.getOperandValToReplace());
35450b57cec5SDimitry Andric     }
35460b57cec5SDimitry Andric 
35470b57cec5SDimitry Andric     const SCEV *S = IU.getExpr(U);
354806c3fb27SDimitry Andric     if (!S)
354906c3fb27SDimitry Andric       continue;
35500b57cec5SDimitry Andric     PostIncLoopSet TmpPostIncLoops = U.getPostIncLoops();
35510b57cec5SDimitry Andric 
35520b57cec5SDimitry Andric     // Equality (== and !=) ICmps are special. We can rewrite (i == N) as
35530b57cec5SDimitry Andric     // (N - i == 0), and this allows (N - i) to be the expression that we work
35540b57cec5SDimitry Andric     // with rather than just N or i, so we can consider the register
35550b57cec5SDimitry Andric     // requirements for both N and i at the same time. Limiting this code to
35560b57cec5SDimitry Andric     // equality icmps is not a problem because all interesting loops use
35570b57cec5SDimitry Andric     // equality icmps, thanks to IndVarSimplify.
35588bcb0991SDimitry Andric     if (ICmpInst *CI = dyn_cast<ICmpInst>(UserInst)) {
35590b57cec5SDimitry Andric       // If CI can be saved in some target, like replaced inside hardware loop
35600b57cec5SDimitry Andric       // in PowerPC, no need to generate initial formulae for it.
35610b57cec5SDimitry Andric       if (SaveCmp && CI == dyn_cast<ICmpInst>(ExitBranch->getCondition()))
35620b57cec5SDimitry Andric         continue;
35638bcb0991SDimitry Andric       if (CI->isEquality()) {
35640b57cec5SDimitry Andric         // Swap the operands if needed to put the OperandValToReplace on the
35650b57cec5SDimitry Andric         // left, for consistency.
35660b57cec5SDimitry Andric         Value *NV = CI->getOperand(1);
35670b57cec5SDimitry Andric         if (NV == U.getOperandValToReplace()) {
35680b57cec5SDimitry Andric           CI->setOperand(1, CI->getOperand(0));
35690b57cec5SDimitry Andric           CI->setOperand(0, NV);
35700b57cec5SDimitry Andric           NV = CI->getOperand(1);
35710b57cec5SDimitry Andric           Changed = true;
35720b57cec5SDimitry Andric         }
35730b57cec5SDimitry Andric 
35740b57cec5SDimitry Andric         // x == y  -->  x - y == 0
35750b57cec5SDimitry Andric         const SCEV *N = SE.getSCEV(NV);
3576fcaf7f86SDimitry Andric         if (SE.isLoopInvariant(N, L) && Rewriter.isSafeToExpand(N) &&
3577fe6060f1SDimitry Andric             (!NV->getType()->isPointerTy() ||
3578fe6060f1SDimitry Andric              SE.getPointerBase(N) == SE.getPointerBase(S))) {
35790b57cec5SDimitry Andric           // S is normalized, so normalize N before folding it into S
35800b57cec5SDimitry Andric           // to keep the result normalized.
35810b57cec5SDimitry Andric           N = normalizeForPostIncUse(N, TmpPostIncLoops, SE);
358206c3fb27SDimitry Andric           if (!N)
358306c3fb27SDimitry Andric             continue;
35840b57cec5SDimitry Andric           Kind = LSRUse::ICmpZero;
35850b57cec5SDimitry Andric           S = SE.getMinusSCEV(N, S);
3586fcaf7f86SDimitry Andric         } else if (L->isLoopInvariant(NV) &&
3587fcaf7f86SDimitry Andric                    (!isa<Instruction>(NV) ||
3588fcaf7f86SDimitry Andric                     DT.dominates(cast<Instruction>(NV), L->getHeader())) &&
3589fcaf7f86SDimitry Andric                    !NV->getType()->isPointerTy()) {
3590fcaf7f86SDimitry Andric           // If we can't generally expand the expression (e.g. it contains
3591fcaf7f86SDimitry Andric           // a divide), but it is already at a loop invariant point before the
3592fcaf7f86SDimitry Andric           // loop, wrap it in an unknown (to prevent the expander from trying
3593fcaf7f86SDimitry Andric           // to re-expand in a potentially unsafe way.)  The restriction to
3594fcaf7f86SDimitry Andric           // integer types is required because the unknown hides the base, and
3595fcaf7f86SDimitry Andric           // SCEV can't compute the difference of two unknown pointers.
3596fcaf7f86SDimitry Andric           N = SE.getUnknown(NV);
3597fcaf7f86SDimitry Andric           N = normalizeForPostIncUse(N, TmpPostIncLoops, SE);
359806c3fb27SDimitry Andric           if (!N)
359906c3fb27SDimitry Andric             continue;
3600fcaf7f86SDimitry Andric           Kind = LSRUse::ICmpZero;
3601fcaf7f86SDimitry Andric           S = SE.getMinusSCEV(N, S);
3602fcaf7f86SDimitry Andric           assert(!isa<SCEVCouldNotCompute>(S));
36030b57cec5SDimitry Andric         }
36040b57cec5SDimitry Andric 
36050b57cec5SDimitry Andric         // -1 and the negations of all interesting strides (except the negation
36060b57cec5SDimitry Andric         // of -1) are now also interesting.
36070b57cec5SDimitry Andric         for (size_t i = 0, e = Factors.size(); i != e; ++i)
36080b57cec5SDimitry Andric           if (Factors[i] != -1)
36090b57cec5SDimitry Andric             Factors.insert(-(uint64_t)Factors[i]);
36100b57cec5SDimitry Andric         Factors.insert(-1);
36110b57cec5SDimitry Andric       }
36128bcb0991SDimitry Andric     }
36130b57cec5SDimitry Andric 
36140b57cec5SDimitry Andric     // Get or create an LSRUse.
36150fca6ea1SDimitry Andric     std::pair<size_t, Immediate> P = getUse(S, Kind, AccessTy);
36160b57cec5SDimitry Andric     size_t LUIdx = P.first;
36170fca6ea1SDimitry Andric     Immediate Offset = P.second;
36180b57cec5SDimitry Andric     LSRUse &LU = Uses[LUIdx];
36190b57cec5SDimitry Andric 
36200b57cec5SDimitry Andric     // Record the fixup.
36210b57cec5SDimitry Andric     LSRFixup &LF = LU.getNewFixup();
36220b57cec5SDimitry Andric     LF.UserInst = UserInst;
36230b57cec5SDimitry Andric     LF.OperandValToReplace = U.getOperandValToReplace();
36240b57cec5SDimitry Andric     LF.PostIncLoops = TmpPostIncLoops;
36250b57cec5SDimitry Andric     LF.Offset = Offset;
36260b57cec5SDimitry Andric     LU.AllFixupsOutsideLoop &= LF.isUseFullyOutsideLoop(L);
36270b57cec5SDimitry Andric 
3628bdd1243dSDimitry Andric     // Create SCEV as Formula for calculating baseline cost
3629bdd1243dSDimitry Andric     if (!VisitedLSRUse.count(LUIdx) && !LF.isUseFullyOutsideLoop(L)) {
3630bdd1243dSDimitry Andric       Formula F;
3631bdd1243dSDimitry Andric       F.initialMatch(S, L, SE);
3632bdd1243dSDimitry Andric       BaselineCost.RateFormula(F, Regs, VisitedRegs, LU);
3633bdd1243dSDimitry Andric       VisitedLSRUse.insert(LUIdx);
3634bdd1243dSDimitry Andric     }
3635bdd1243dSDimitry Andric 
36360b57cec5SDimitry Andric     if (!LU.WidestFixupType ||
36370b57cec5SDimitry Andric         SE.getTypeSizeInBits(LU.WidestFixupType) <
36380b57cec5SDimitry Andric         SE.getTypeSizeInBits(LF.OperandValToReplace->getType()))
36390b57cec5SDimitry Andric       LU.WidestFixupType = LF.OperandValToReplace->getType();
36400b57cec5SDimitry Andric 
36410b57cec5SDimitry Andric     // If this is the first use of this LSRUse, give it a formula.
36420b57cec5SDimitry Andric     if (LU.Formulae.empty()) {
36430b57cec5SDimitry Andric       InsertInitialFormula(S, LU, LUIdx);
36440b57cec5SDimitry Andric       CountRegisters(LU.Formulae.back(), LUIdx);
36450b57cec5SDimitry Andric     }
36460b57cec5SDimitry Andric   }
36470b57cec5SDimitry Andric 
36480b57cec5SDimitry Andric   LLVM_DEBUG(print_fixups(dbgs()));
36490b57cec5SDimitry Andric }
36500b57cec5SDimitry Andric 
36510b57cec5SDimitry Andric /// Insert a formula for the given expression into the given use, separating out
36520b57cec5SDimitry Andric /// loop-variant portions from loop-invariant and loop-computable portions.
3653fcaf7f86SDimitry Andric void LSRInstance::InsertInitialFormula(const SCEV *S, LSRUse &LU,
3654fcaf7f86SDimitry Andric                                        size_t LUIdx) {
36550b57cec5SDimitry Andric   // Mark uses whose expressions cannot be expanded.
3656fcaf7f86SDimitry Andric   if (!Rewriter.isSafeToExpand(S))
36570b57cec5SDimitry Andric     LU.RigidFormula = true;
36580b57cec5SDimitry Andric 
36590b57cec5SDimitry Andric   Formula F;
36600b57cec5SDimitry Andric   F.initialMatch(S, L, SE);
36610b57cec5SDimitry Andric   bool Inserted = InsertFormula(LU, LUIdx, F);
36620b57cec5SDimitry Andric   assert(Inserted && "Initial formula already exists!"); (void)Inserted;
36630b57cec5SDimitry Andric }
36640b57cec5SDimitry Andric 
36650b57cec5SDimitry Andric /// Insert a simple single-register formula for the given expression into the
36660b57cec5SDimitry Andric /// given use.
36670b57cec5SDimitry Andric void
36680b57cec5SDimitry Andric LSRInstance::InsertSupplementalFormula(const SCEV *S,
36690b57cec5SDimitry Andric                                        LSRUse &LU, size_t LUIdx) {
36700b57cec5SDimitry Andric   Formula F;
36710b57cec5SDimitry Andric   F.BaseRegs.push_back(S);
36720b57cec5SDimitry Andric   F.HasBaseReg = true;
36730b57cec5SDimitry Andric   bool Inserted = InsertFormula(LU, LUIdx, F);
36740b57cec5SDimitry Andric   assert(Inserted && "Supplemental formula already exists!"); (void)Inserted;
36750b57cec5SDimitry Andric }
36760b57cec5SDimitry Andric 
36770b57cec5SDimitry Andric /// Note which registers are used by the given formula, updating RegUses.
36780b57cec5SDimitry Andric void LSRInstance::CountRegisters(const Formula &F, size_t LUIdx) {
36790b57cec5SDimitry Andric   if (F.ScaledReg)
36800b57cec5SDimitry Andric     RegUses.countRegister(F.ScaledReg, LUIdx);
36810b57cec5SDimitry Andric   for (const SCEV *BaseReg : F.BaseRegs)
36820b57cec5SDimitry Andric     RegUses.countRegister(BaseReg, LUIdx);
36830b57cec5SDimitry Andric }
36840b57cec5SDimitry Andric 
36850b57cec5SDimitry Andric /// If the given formula has not yet been inserted, add it to the list, and
36860b57cec5SDimitry Andric /// return true. Return false otherwise.
36870b57cec5SDimitry Andric bool LSRInstance::InsertFormula(LSRUse &LU, unsigned LUIdx, const Formula &F) {
36880b57cec5SDimitry Andric   // Do not insert formula that we will not be able to expand.
36890b57cec5SDimitry Andric   assert(isLegalUse(TTI, LU.MinOffset, LU.MaxOffset, LU.Kind, LU.AccessTy, F) &&
36900b57cec5SDimitry Andric          "Formula is illegal");
36910b57cec5SDimitry Andric 
36920b57cec5SDimitry Andric   if (!LU.InsertFormula(F, *L))
36930b57cec5SDimitry Andric     return false;
36940b57cec5SDimitry Andric 
36950b57cec5SDimitry Andric   CountRegisters(F, LUIdx);
36960b57cec5SDimitry Andric   return true;
36970b57cec5SDimitry Andric }
36980b57cec5SDimitry Andric 
36990b57cec5SDimitry Andric /// Check for other uses of loop-invariant values which we're tracking. These
37000b57cec5SDimitry Andric /// other uses will pin these values in registers, making them less profitable
37010b57cec5SDimitry Andric /// for elimination.
37020b57cec5SDimitry Andric /// TODO: This currently misses non-constant addrec step registers.
37030b57cec5SDimitry Andric /// TODO: Should this give more weight to users inside the loop?
37040b57cec5SDimitry Andric void
37050b57cec5SDimitry Andric LSRInstance::CollectLoopInvariantFixupsAndFormulae() {
37060b57cec5SDimitry Andric   SmallVector<const SCEV *, 8> Worklist(RegUses.begin(), RegUses.end());
37070b57cec5SDimitry Andric   SmallPtrSet<const SCEV *, 32> Visited;
37080b57cec5SDimitry Andric 
37095f757f3fSDimitry Andric   // Don't collect outside uses if we are favoring postinc - the instructions in
37105f757f3fSDimitry Andric   // the loop are more important than the ones outside of it.
37115f757f3fSDimitry Andric   if (AMK == TTI::AMK_PostIndexed)
37125f757f3fSDimitry Andric     return;
37135f757f3fSDimitry Andric 
37140b57cec5SDimitry Andric   while (!Worklist.empty()) {
37150b57cec5SDimitry Andric     const SCEV *S = Worklist.pop_back_val();
37160b57cec5SDimitry Andric 
37170b57cec5SDimitry Andric     // Don't process the same SCEV twice
37180b57cec5SDimitry Andric     if (!Visited.insert(S).second)
37190b57cec5SDimitry Andric       continue;
37200b57cec5SDimitry Andric 
37210b57cec5SDimitry Andric     if (const SCEVNAryExpr *N = dyn_cast<SCEVNAryExpr>(S))
3722bdd1243dSDimitry Andric       append_range(Worklist, N->operands());
3723e8d8bef9SDimitry Andric     else if (const SCEVIntegralCastExpr *C = dyn_cast<SCEVIntegralCastExpr>(S))
37240b57cec5SDimitry Andric       Worklist.push_back(C->getOperand());
37250b57cec5SDimitry Andric     else if (const SCEVUDivExpr *D = dyn_cast<SCEVUDivExpr>(S)) {
37260b57cec5SDimitry Andric       Worklist.push_back(D->getLHS());
37270b57cec5SDimitry Andric       Worklist.push_back(D->getRHS());
37280b57cec5SDimitry Andric     } else if (const SCEVUnknown *US = dyn_cast<SCEVUnknown>(S)) {
37290b57cec5SDimitry Andric       const Value *V = US->getValue();
37300b57cec5SDimitry Andric       if (const Instruction *Inst = dyn_cast<Instruction>(V)) {
37310b57cec5SDimitry Andric         // Look for instructions defined outside the loop.
37320b57cec5SDimitry Andric         if (L->contains(Inst)) continue;
373306c3fb27SDimitry Andric       } else if (isa<Constant>(V))
373406c3fb27SDimitry Andric         // Constants can be re-materialized.
37350b57cec5SDimitry Andric         continue;
37360b57cec5SDimitry Andric       for (const Use &U : V->uses()) {
37370b57cec5SDimitry Andric         const Instruction *UserInst = dyn_cast<Instruction>(U.getUser());
37380b57cec5SDimitry Andric         // Ignore non-instructions.
37390b57cec5SDimitry Andric         if (!UserInst)
37400b57cec5SDimitry Andric           continue;
3741fe6060f1SDimitry Andric         // Don't bother if the instruction is an EHPad.
3742fe6060f1SDimitry Andric         if (UserInst->isEHPad())
3743fe6060f1SDimitry Andric           continue;
37440b57cec5SDimitry Andric         // Ignore instructions in other functions (as can happen with
37450b57cec5SDimitry Andric         // Constants).
37460b57cec5SDimitry Andric         if (UserInst->getParent()->getParent() != L->getHeader()->getParent())
37470b57cec5SDimitry Andric           continue;
37480b57cec5SDimitry Andric         // Ignore instructions not dominated by the loop.
37490b57cec5SDimitry Andric         const BasicBlock *UseBB = !isa<PHINode>(UserInst) ?
37500b57cec5SDimitry Andric           UserInst->getParent() :
37510b57cec5SDimitry Andric           cast<PHINode>(UserInst)->getIncomingBlock(
37520b57cec5SDimitry Andric             PHINode::getIncomingValueNumForOperand(U.getOperandNo()));
37530b57cec5SDimitry Andric         if (!DT.dominates(L->getHeader(), UseBB))
37540b57cec5SDimitry Andric           continue;
37550b57cec5SDimitry Andric         // Don't bother if the instruction is in a BB which ends in an EHPad.
37560b57cec5SDimitry Andric         if (UseBB->getTerminator()->isEHPad())
37570b57cec5SDimitry Andric           continue;
375804eeddc0SDimitry Andric 
375904eeddc0SDimitry Andric         // Ignore cases in which the currently-examined value could come from
376004eeddc0SDimitry Andric         // a basic block terminated with an EHPad. This checks all incoming
376104eeddc0SDimitry Andric         // blocks of the phi node since it is possible that the same incoming
376204eeddc0SDimitry Andric         // value comes from multiple basic blocks, only some of which may end
376304eeddc0SDimitry Andric         // in an EHPad. If any of them do, a subsequent rewrite attempt by this
376404eeddc0SDimitry Andric         // pass would try to insert instructions into an EHPad, hitting an
376504eeddc0SDimitry Andric         // assertion.
376604eeddc0SDimitry Andric         if (isa<PHINode>(UserInst)) {
376704eeddc0SDimitry Andric           const auto *PhiNode = cast<PHINode>(UserInst);
376804eeddc0SDimitry Andric           bool HasIncompatibleEHPTerminatedBlock = false;
376904eeddc0SDimitry Andric           llvm::Value *ExpectedValue = U;
377004eeddc0SDimitry Andric           for (unsigned int I = 0; I < PhiNode->getNumIncomingValues(); I++) {
377104eeddc0SDimitry Andric             if (PhiNode->getIncomingValue(I) == ExpectedValue) {
377204eeddc0SDimitry Andric               if (PhiNode->getIncomingBlock(I)->getTerminator()->isEHPad()) {
377304eeddc0SDimitry Andric                 HasIncompatibleEHPTerminatedBlock = true;
377404eeddc0SDimitry Andric                 break;
377504eeddc0SDimitry Andric               }
377604eeddc0SDimitry Andric             }
377704eeddc0SDimitry Andric           }
377804eeddc0SDimitry Andric           if (HasIncompatibleEHPTerminatedBlock) {
377904eeddc0SDimitry Andric             continue;
378004eeddc0SDimitry Andric           }
378104eeddc0SDimitry Andric         }
378204eeddc0SDimitry Andric 
37830b57cec5SDimitry Andric         // Don't bother rewriting PHIs in catchswitch blocks.
37840b57cec5SDimitry Andric         if (isa<CatchSwitchInst>(UserInst->getParent()->getTerminator()))
37850b57cec5SDimitry Andric           continue;
37860b57cec5SDimitry Andric         // Ignore uses which are part of other SCEV expressions, to avoid
37870b57cec5SDimitry Andric         // analyzing them multiple times.
37880b57cec5SDimitry Andric         if (SE.isSCEVable(UserInst->getType())) {
37890b57cec5SDimitry Andric           const SCEV *UserS = SE.getSCEV(const_cast<Instruction *>(UserInst));
37900b57cec5SDimitry Andric           // If the user is a no-op, look through to its uses.
37910b57cec5SDimitry Andric           if (!isa<SCEVUnknown>(UserS))
37920b57cec5SDimitry Andric             continue;
37930b57cec5SDimitry Andric           if (UserS == US) {
37940b57cec5SDimitry Andric             Worklist.push_back(
37950b57cec5SDimitry Andric               SE.getUnknown(const_cast<Instruction *>(UserInst)));
37960b57cec5SDimitry Andric             continue;
37970b57cec5SDimitry Andric           }
37980b57cec5SDimitry Andric         }
37990b57cec5SDimitry Andric         // Ignore icmp instructions which are already being analyzed.
38000b57cec5SDimitry Andric         if (const ICmpInst *ICI = dyn_cast<ICmpInst>(UserInst)) {
38010b57cec5SDimitry Andric           unsigned OtherIdx = !U.getOperandNo();
38020b57cec5SDimitry Andric           Value *OtherOp = const_cast<Value *>(ICI->getOperand(OtherIdx));
38030b57cec5SDimitry Andric           if (SE.hasComputableLoopEvolution(SE.getSCEV(OtherOp), L))
38040b57cec5SDimitry Andric             continue;
38050b57cec5SDimitry Andric         }
38060b57cec5SDimitry Andric 
38070fca6ea1SDimitry Andric         std::pair<size_t, Immediate> P =
38080fca6ea1SDimitry Andric             getUse(S, LSRUse::Basic, MemAccessTy());
38090b57cec5SDimitry Andric         size_t LUIdx = P.first;
38100fca6ea1SDimitry Andric         Immediate Offset = P.second;
38110b57cec5SDimitry Andric         LSRUse &LU = Uses[LUIdx];
38120b57cec5SDimitry Andric         LSRFixup &LF = LU.getNewFixup();
38130b57cec5SDimitry Andric         LF.UserInst = const_cast<Instruction *>(UserInst);
38140b57cec5SDimitry Andric         LF.OperandValToReplace = U;
38150b57cec5SDimitry Andric         LF.Offset = Offset;
38160b57cec5SDimitry Andric         LU.AllFixupsOutsideLoop &= LF.isUseFullyOutsideLoop(L);
38170b57cec5SDimitry Andric         if (!LU.WidestFixupType ||
38180b57cec5SDimitry Andric             SE.getTypeSizeInBits(LU.WidestFixupType) <
38190b57cec5SDimitry Andric             SE.getTypeSizeInBits(LF.OperandValToReplace->getType()))
38200b57cec5SDimitry Andric           LU.WidestFixupType = LF.OperandValToReplace->getType();
38210b57cec5SDimitry Andric         InsertSupplementalFormula(US, LU, LUIdx);
38220b57cec5SDimitry Andric         CountRegisters(LU.Formulae.back(), Uses.size() - 1);
38230b57cec5SDimitry Andric         break;
38240b57cec5SDimitry Andric       }
38250b57cec5SDimitry Andric     }
38260b57cec5SDimitry Andric   }
38270b57cec5SDimitry Andric }
38280b57cec5SDimitry Andric 
38290b57cec5SDimitry Andric /// Split S into subexpressions which can be pulled out into separate
38300b57cec5SDimitry Andric /// registers. If C is non-null, multiply each subexpression by C.
38310b57cec5SDimitry Andric ///
38320b57cec5SDimitry Andric /// Return remainder expression after factoring the subexpressions captured by
38330b57cec5SDimitry Andric /// Ops. If Ops is complete, return NULL.
38340b57cec5SDimitry Andric static const SCEV *CollectSubexprs(const SCEV *S, const SCEVConstant *C,
38350b57cec5SDimitry Andric                                    SmallVectorImpl<const SCEV *> &Ops,
38360b57cec5SDimitry Andric                                    const Loop *L,
38370b57cec5SDimitry Andric                                    ScalarEvolution &SE,
38380b57cec5SDimitry Andric                                    unsigned Depth = 0) {
38390b57cec5SDimitry Andric   // Arbitrarily cap recursion to protect compile time.
38400b57cec5SDimitry Andric   if (Depth >= 3)
38410b57cec5SDimitry Andric     return S;
38420b57cec5SDimitry Andric 
38430b57cec5SDimitry Andric   if (const SCEVAddExpr *Add = dyn_cast<SCEVAddExpr>(S)) {
38440b57cec5SDimitry Andric     // Break out add operands.
38450b57cec5SDimitry Andric     for (const SCEV *S : Add->operands()) {
38460b57cec5SDimitry Andric       const SCEV *Remainder = CollectSubexprs(S, C, Ops, L, SE, Depth+1);
38470b57cec5SDimitry Andric       if (Remainder)
38480b57cec5SDimitry Andric         Ops.push_back(C ? SE.getMulExpr(C, Remainder) : Remainder);
38490b57cec5SDimitry Andric     }
38500b57cec5SDimitry Andric     return nullptr;
38510b57cec5SDimitry Andric   } else if (const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(S)) {
38520b57cec5SDimitry Andric     // Split a non-zero base out of an addrec.
38530b57cec5SDimitry Andric     if (AR->getStart()->isZero() || !AR->isAffine())
38540b57cec5SDimitry Andric       return S;
38550b57cec5SDimitry Andric 
38560b57cec5SDimitry Andric     const SCEV *Remainder = CollectSubexprs(AR->getStart(),
38570b57cec5SDimitry Andric                                             C, Ops, L, SE, Depth+1);
38580b57cec5SDimitry Andric     // Split the non-zero AddRec unless it is part of a nested recurrence that
38590b57cec5SDimitry Andric     // does not pertain to this loop.
38600b57cec5SDimitry Andric     if (Remainder && (AR->getLoop() == L || !isa<SCEVAddRecExpr>(Remainder))) {
38610b57cec5SDimitry Andric       Ops.push_back(C ? SE.getMulExpr(C, Remainder) : Remainder);
38620b57cec5SDimitry Andric       Remainder = nullptr;
38630b57cec5SDimitry Andric     }
38640b57cec5SDimitry Andric     if (Remainder != AR->getStart()) {
38650b57cec5SDimitry Andric       if (!Remainder)
38660b57cec5SDimitry Andric         Remainder = SE.getConstant(AR->getType(), 0);
38670b57cec5SDimitry Andric       return SE.getAddRecExpr(Remainder,
38680b57cec5SDimitry Andric                               AR->getStepRecurrence(SE),
38690b57cec5SDimitry Andric                               AR->getLoop(),
38700b57cec5SDimitry Andric                               //FIXME: AR->getNoWrapFlags(SCEV::FlagNW)
38710b57cec5SDimitry Andric                               SCEV::FlagAnyWrap);
38720b57cec5SDimitry Andric     }
38730b57cec5SDimitry Andric   } else if (const SCEVMulExpr *Mul = dyn_cast<SCEVMulExpr>(S)) {
38740b57cec5SDimitry Andric     // Break (C * (a + b + c)) into C*a + C*b + C*c.
38750b57cec5SDimitry Andric     if (Mul->getNumOperands() != 2)
38760b57cec5SDimitry Andric       return S;
38770b57cec5SDimitry Andric     if (const SCEVConstant *Op0 =
38780b57cec5SDimitry Andric         dyn_cast<SCEVConstant>(Mul->getOperand(0))) {
38790b57cec5SDimitry Andric       C = C ? cast<SCEVConstant>(SE.getMulExpr(C, Op0)) : Op0;
38800b57cec5SDimitry Andric       const SCEV *Remainder =
38810b57cec5SDimitry Andric         CollectSubexprs(Mul->getOperand(1), C, Ops, L, SE, Depth+1);
38820b57cec5SDimitry Andric       if (Remainder)
38830b57cec5SDimitry Andric         Ops.push_back(SE.getMulExpr(C, Remainder));
38840b57cec5SDimitry Andric       return nullptr;
38850b57cec5SDimitry Andric     }
38860b57cec5SDimitry Andric   }
38870b57cec5SDimitry Andric   return S;
38880b57cec5SDimitry Andric }
38890b57cec5SDimitry Andric 
38900b57cec5SDimitry Andric /// Return true if the SCEV represents a value that may end up as a
38910b57cec5SDimitry Andric /// post-increment operation.
38920b57cec5SDimitry Andric static bool mayUsePostIncMode(const TargetTransformInfo &TTI,
38930b57cec5SDimitry Andric                               LSRUse &LU, const SCEV *S, const Loop *L,
38940b57cec5SDimitry Andric                               ScalarEvolution &SE) {
38950b57cec5SDimitry Andric   if (LU.Kind != LSRUse::Address ||
38960b57cec5SDimitry Andric       !LU.AccessTy.getType()->isIntOrIntVectorTy())
38970b57cec5SDimitry Andric     return false;
38980b57cec5SDimitry Andric   const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(S);
38990b57cec5SDimitry Andric   if (!AR)
39000b57cec5SDimitry Andric     return false;
39010b57cec5SDimitry Andric   const SCEV *LoopStep = AR->getStepRecurrence(SE);
39020b57cec5SDimitry Andric   if (!isa<SCEVConstant>(LoopStep))
39030b57cec5SDimitry Andric     return false;
39040b57cec5SDimitry Andric   // Check if a post-indexed load/store can be used.
39050b57cec5SDimitry Andric   if (TTI.isIndexedLoadLegal(TTI.MIM_PostInc, AR->getType()) ||
39060b57cec5SDimitry Andric       TTI.isIndexedStoreLegal(TTI.MIM_PostInc, AR->getType())) {
39070b57cec5SDimitry Andric     const SCEV *LoopStart = AR->getStart();
39080b57cec5SDimitry Andric     if (!isa<SCEVConstant>(LoopStart) && SE.isLoopInvariant(LoopStart, L))
39090b57cec5SDimitry Andric       return true;
39100b57cec5SDimitry Andric   }
39110b57cec5SDimitry Andric   return false;
39120b57cec5SDimitry Andric }
39130b57cec5SDimitry Andric 
39140b57cec5SDimitry Andric /// Helper function for LSRInstance::GenerateReassociations.
39150b57cec5SDimitry Andric void LSRInstance::GenerateReassociationsImpl(LSRUse &LU, unsigned LUIdx,
39160b57cec5SDimitry Andric                                              const Formula &Base,
39170b57cec5SDimitry Andric                                              unsigned Depth, size_t Idx,
39180b57cec5SDimitry Andric                                              bool IsScaledReg) {
39190b57cec5SDimitry Andric   const SCEV *BaseReg = IsScaledReg ? Base.ScaledReg : Base.BaseRegs[Idx];
39200b57cec5SDimitry Andric   // Don't generate reassociations for the base register of a value that
39210b57cec5SDimitry Andric   // may generate a post-increment operator. The reason is that the
39220b57cec5SDimitry Andric   // reassociations cause extra base+register formula to be created,
39230b57cec5SDimitry Andric   // and possibly chosen, but the post-increment is more efficient.
3924fe6060f1SDimitry Andric   if (AMK == TTI::AMK_PostIndexed && mayUsePostIncMode(TTI, LU, BaseReg, L, SE))
39250b57cec5SDimitry Andric     return;
39260b57cec5SDimitry Andric   SmallVector<const SCEV *, 8> AddOps;
39270b57cec5SDimitry Andric   const SCEV *Remainder = CollectSubexprs(BaseReg, nullptr, AddOps, L, SE);
39280b57cec5SDimitry Andric   if (Remainder)
39290b57cec5SDimitry Andric     AddOps.push_back(Remainder);
39300b57cec5SDimitry Andric 
39310b57cec5SDimitry Andric   if (AddOps.size() == 1)
39320b57cec5SDimitry Andric     return;
39330b57cec5SDimitry Andric 
39340b57cec5SDimitry Andric   for (SmallVectorImpl<const SCEV *>::const_iterator J = AddOps.begin(),
39350b57cec5SDimitry Andric                                                      JE = AddOps.end();
39360b57cec5SDimitry Andric        J != JE; ++J) {
39370b57cec5SDimitry Andric     // Loop-variant "unknown" values are uninteresting; we won't be able to
39380b57cec5SDimitry Andric     // do anything meaningful with them.
39390b57cec5SDimitry Andric     if (isa<SCEVUnknown>(*J) && !SE.isLoopInvariant(*J, L))
39400b57cec5SDimitry Andric       continue;
39410b57cec5SDimitry Andric 
39420b57cec5SDimitry Andric     // Don't pull a constant into a register if the constant could be folded
39430b57cec5SDimitry Andric     // into an immediate field.
39440b57cec5SDimitry Andric     if (isAlwaysFoldable(TTI, SE, LU.MinOffset, LU.MaxOffset, LU.Kind,
39450b57cec5SDimitry Andric                          LU.AccessTy, *J, Base.getNumRegs() > 1))
39460b57cec5SDimitry Andric       continue;
39470b57cec5SDimitry Andric 
39480b57cec5SDimitry Andric     // Collect all operands except *J.
39490b57cec5SDimitry Andric     SmallVector<const SCEV *, 8> InnerAddOps(
39500b57cec5SDimitry Andric         ((const SmallVector<const SCEV *, 8> &)AddOps).begin(), J);
39510b57cec5SDimitry Andric     InnerAddOps.append(std::next(J),
39520b57cec5SDimitry Andric                        ((const SmallVector<const SCEV *, 8> &)AddOps).end());
39530b57cec5SDimitry Andric 
39540b57cec5SDimitry Andric     // Don't leave just a constant behind in a register if the constant could
39550b57cec5SDimitry Andric     // be folded into an immediate field.
39560b57cec5SDimitry Andric     if (InnerAddOps.size() == 1 &&
39570b57cec5SDimitry Andric         isAlwaysFoldable(TTI, SE, LU.MinOffset, LU.MaxOffset, LU.Kind,
39580b57cec5SDimitry Andric                          LU.AccessTy, InnerAddOps[0], Base.getNumRegs() > 1))
39590b57cec5SDimitry Andric       continue;
39600b57cec5SDimitry Andric 
39610b57cec5SDimitry Andric     const SCEV *InnerSum = SE.getAddExpr(InnerAddOps);
39620b57cec5SDimitry Andric     if (InnerSum->isZero())
39630b57cec5SDimitry Andric       continue;
39640b57cec5SDimitry Andric     Formula F = Base;
39650b57cec5SDimitry Andric 
39660fca6ea1SDimitry Andric     if (F.UnfoldedOffset.isNonZero() && F.UnfoldedOffset.isScalable())
39670fca6ea1SDimitry Andric       continue;
39680fca6ea1SDimitry Andric 
39690b57cec5SDimitry Andric     // Add the remaining pieces of the add back into the new formula.
39700b57cec5SDimitry Andric     const SCEVConstant *InnerSumSC = dyn_cast<SCEVConstant>(InnerSum);
39710b57cec5SDimitry Andric     if (InnerSumSC && SE.getTypeSizeInBits(InnerSumSC->getType()) <= 64 &&
39720fca6ea1SDimitry Andric         TTI.isLegalAddImmediate((uint64_t)F.UnfoldedOffset.getFixedValue() +
39730b57cec5SDimitry Andric                                 InnerSumSC->getValue()->getZExtValue())) {
39740b57cec5SDimitry Andric       F.UnfoldedOffset =
39750fca6ea1SDimitry Andric           Immediate::getFixed((uint64_t)F.UnfoldedOffset.getFixedValue() +
39760fca6ea1SDimitry Andric                               InnerSumSC->getValue()->getZExtValue());
39770b57cec5SDimitry Andric       if (IsScaledReg)
39780b57cec5SDimitry Andric         F.ScaledReg = nullptr;
39790b57cec5SDimitry Andric       else
39800b57cec5SDimitry Andric         F.BaseRegs.erase(F.BaseRegs.begin() + Idx);
39810b57cec5SDimitry Andric     } else if (IsScaledReg)
39820b57cec5SDimitry Andric       F.ScaledReg = InnerSum;
39830b57cec5SDimitry Andric     else
39840b57cec5SDimitry Andric       F.BaseRegs[Idx] = InnerSum;
39850b57cec5SDimitry Andric 
39860b57cec5SDimitry Andric     // Add J as its own register, or an unfolded immediate.
39870b57cec5SDimitry Andric     const SCEVConstant *SC = dyn_cast<SCEVConstant>(*J);
39880b57cec5SDimitry Andric     if (SC && SE.getTypeSizeInBits(SC->getType()) <= 64 &&
39890fca6ea1SDimitry Andric         TTI.isLegalAddImmediate((uint64_t)F.UnfoldedOffset.getFixedValue() +
39900b57cec5SDimitry Andric                                 SC->getValue()->getZExtValue()))
39910b57cec5SDimitry Andric       F.UnfoldedOffset =
39920fca6ea1SDimitry Andric           Immediate::getFixed((uint64_t)F.UnfoldedOffset.getFixedValue() +
39930fca6ea1SDimitry Andric                               SC->getValue()->getZExtValue());
39940b57cec5SDimitry Andric     else
39950b57cec5SDimitry Andric       F.BaseRegs.push_back(*J);
39960b57cec5SDimitry Andric     // We may have changed the number of register in base regs, adjust the
39970b57cec5SDimitry Andric     // formula accordingly.
39980b57cec5SDimitry Andric     F.canonicalize(*L);
39990b57cec5SDimitry Andric 
40000b57cec5SDimitry Andric     if (InsertFormula(LU, LUIdx, F))
40010b57cec5SDimitry Andric       // If that formula hadn't been seen before, recurse to find more like
40020b57cec5SDimitry Andric       // it.
40030b57cec5SDimitry Andric       // Add check on Log16(AddOps.size()) - same as Log2_32(AddOps.size()) >> 2)
40040b57cec5SDimitry Andric       // Because just Depth is not enough to bound compile time.
40050b57cec5SDimitry Andric       // This means that every time AddOps.size() is greater 16^x we will add
40060b57cec5SDimitry Andric       // x to Depth.
40070b57cec5SDimitry Andric       GenerateReassociations(LU, LUIdx, LU.Formulae.back(),
40080b57cec5SDimitry Andric                              Depth + 1 + (Log2_32(AddOps.size()) >> 2));
40090b57cec5SDimitry Andric   }
40100b57cec5SDimitry Andric }
40110b57cec5SDimitry Andric 
40120b57cec5SDimitry Andric /// Split out subexpressions from adds and the bases of addrecs.
40130b57cec5SDimitry Andric void LSRInstance::GenerateReassociations(LSRUse &LU, unsigned LUIdx,
40140b57cec5SDimitry Andric                                          Formula Base, unsigned Depth) {
40150b57cec5SDimitry Andric   assert(Base.isCanonical(*L) && "Input must be in the canonical form");
40160b57cec5SDimitry Andric   // Arbitrarily cap recursion to protect compile time.
40170b57cec5SDimitry Andric   if (Depth >= 3)
40180b57cec5SDimitry Andric     return;
40190b57cec5SDimitry Andric 
40200b57cec5SDimitry Andric   for (size_t i = 0, e = Base.BaseRegs.size(); i != e; ++i)
40210b57cec5SDimitry Andric     GenerateReassociationsImpl(LU, LUIdx, Base, Depth, i);
40220b57cec5SDimitry Andric 
40230b57cec5SDimitry Andric   if (Base.Scale == 1)
40240b57cec5SDimitry Andric     GenerateReassociationsImpl(LU, LUIdx, Base, Depth,
40250b57cec5SDimitry Andric                                /* Idx */ -1, /* IsScaledReg */ true);
40260b57cec5SDimitry Andric }
40270b57cec5SDimitry Andric 
40280b57cec5SDimitry Andric ///  Generate a formula consisting of all of the loop-dominating registers added
40290b57cec5SDimitry Andric /// into a single register.
40300b57cec5SDimitry Andric void LSRInstance::GenerateCombinations(LSRUse &LU, unsigned LUIdx,
40310b57cec5SDimitry Andric                                        Formula Base) {
40320b57cec5SDimitry Andric   // This method is only interesting on a plurality of registers.
40330b57cec5SDimitry Andric   if (Base.BaseRegs.size() + (Base.Scale == 1) +
40340fca6ea1SDimitry Andric           (Base.UnfoldedOffset.isNonZero()) <=
40350fca6ea1SDimitry Andric       1)
40360b57cec5SDimitry Andric     return;
40370b57cec5SDimitry Andric 
40380b57cec5SDimitry Andric   // Flatten the representation, i.e., reg1 + 1*reg2 => reg1 + reg2, before
40390b57cec5SDimitry Andric   // processing the formula.
40400b57cec5SDimitry Andric   Base.unscale();
40410b57cec5SDimitry Andric   SmallVector<const SCEV *, 4> Ops;
40420b57cec5SDimitry Andric   Formula NewBase = Base;
40430b57cec5SDimitry Andric   NewBase.BaseRegs.clear();
40440b57cec5SDimitry Andric   Type *CombinedIntegerType = nullptr;
40450b57cec5SDimitry Andric   for (const SCEV *BaseReg : Base.BaseRegs) {
40460b57cec5SDimitry Andric     if (SE.properlyDominates(BaseReg, L->getHeader()) &&
40470b57cec5SDimitry Andric         !SE.hasComputableLoopEvolution(BaseReg, L)) {
40480b57cec5SDimitry Andric       if (!CombinedIntegerType)
40490b57cec5SDimitry Andric         CombinedIntegerType = SE.getEffectiveSCEVType(BaseReg->getType());
40500b57cec5SDimitry Andric       Ops.push_back(BaseReg);
40510b57cec5SDimitry Andric     }
40520b57cec5SDimitry Andric     else
40530b57cec5SDimitry Andric       NewBase.BaseRegs.push_back(BaseReg);
40540b57cec5SDimitry Andric   }
40550b57cec5SDimitry Andric 
40560b57cec5SDimitry Andric   // If no register is relevant, we're done.
40570b57cec5SDimitry Andric   if (Ops.size() == 0)
40580b57cec5SDimitry Andric     return;
40590b57cec5SDimitry Andric 
40600b57cec5SDimitry Andric   // Utility function for generating the required variants of the combined
40610b57cec5SDimitry Andric   // registers.
40620b57cec5SDimitry Andric   auto GenerateFormula = [&](const SCEV *Sum) {
40630b57cec5SDimitry Andric     Formula F = NewBase;
40640b57cec5SDimitry Andric 
40650b57cec5SDimitry Andric     // TODO: If Sum is zero, it probably means ScalarEvolution missed an
40660b57cec5SDimitry Andric     // opportunity to fold something. For now, just ignore such cases
40670b57cec5SDimitry Andric     // rather than proceed with zero in a register.
40680b57cec5SDimitry Andric     if (Sum->isZero())
40690b57cec5SDimitry Andric       return;
40700b57cec5SDimitry Andric 
40710b57cec5SDimitry Andric     F.BaseRegs.push_back(Sum);
40720b57cec5SDimitry Andric     F.canonicalize(*L);
40730b57cec5SDimitry Andric     (void)InsertFormula(LU, LUIdx, F);
40740b57cec5SDimitry Andric   };
40750b57cec5SDimitry Andric 
40760b57cec5SDimitry Andric   // If we collected at least two registers, generate a formula combining them.
40770b57cec5SDimitry Andric   if (Ops.size() > 1) {
40780b57cec5SDimitry Andric     SmallVector<const SCEV *, 4> OpsCopy(Ops); // Don't let SE modify Ops.
40790b57cec5SDimitry Andric     GenerateFormula(SE.getAddExpr(OpsCopy));
40800b57cec5SDimitry Andric   }
40810b57cec5SDimitry Andric 
40820b57cec5SDimitry Andric   // If we have an unfolded offset, generate a formula combining it with the
40830b57cec5SDimitry Andric   // registers collected.
40840fca6ea1SDimitry Andric   if (NewBase.UnfoldedOffset.isNonZero() && NewBase.UnfoldedOffset.isFixed()) {
40850b57cec5SDimitry Andric     assert(CombinedIntegerType && "Missing a type for the unfolded offset");
40860fca6ea1SDimitry Andric     Ops.push_back(SE.getConstant(CombinedIntegerType,
40870fca6ea1SDimitry Andric                                  NewBase.UnfoldedOffset.getFixedValue(), true));
40880fca6ea1SDimitry Andric     NewBase.UnfoldedOffset = Immediate::getFixed(0);
40890b57cec5SDimitry Andric     GenerateFormula(SE.getAddExpr(Ops));
40900b57cec5SDimitry Andric   }
40910b57cec5SDimitry Andric }
40920b57cec5SDimitry Andric 
40930b57cec5SDimitry Andric /// Helper function for LSRInstance::GenerateSymbolicOffsets.
40940b57cec5SDimitry Andric void LSRInstance::GenerateSymbolicOffsetsImpl(LSRUse &LU, unsigned LUIdx,
40950b57cec5SDimitry Andric                                               const Formula &Base, size_t Idx,
40960b57cec5SDimitry Andric                                               bool IsScaledReg) {
40970b57cec5SDimitry Andric   const SCEV *G = IsScaledReg ? Base.ScaledReg : Base.BaseRegs[Idx];
40980b57cec5SDimitry Andric   GlobalValue *GV = ExtractSymbol(G, SE);
40990b57cec5SDimitry Andric   if (G->isZero() || !GV)
41000b57cec5SDimitry Andric     return;
41010b57cec5SDimitry Andric   Formula F = Base;
41020b57cec5SDimitry Andric   F.BaseGV = GV;
41030b57cec5SDimitry Andric   if (!isLegalUse(TTI, LU.MinOffset, LU.MaxOffset, LU.Kind, LU.AccessTy, F))
41040b57cec5SDimitry Andric     return;
41050b57cec5SDimitry Andric   if (IsScaledReg)
41060b57cec5SDimitry Andric     F.ScaledReg = G;
41070b57cec5SDimitry Andric   else
41080b57cec5SDimitry Andric     F.BaseRegs[Idx] = G;
41090b57cec5SDimitry Andric   (void)InsertFormula(LU, LUIdx, F);
41100b57cec5SDimitry Andric }
41110b57cec5SDimitry Andric 
41120b57cec5SDimitry Andric /// Generate reuse formulae using symbolic offsets.
41130b57cec5SDimitry Andric void LSRInstance::GenerateSymbolicOffsets(LSRUse &LU, unsigned LUIdx,
41140b57cec5SDimitry Andric                                           Formula Base) {
41150b57cec5SDimitry Andric   // We can't add a symbolic offset if the address already contains one.
41160b57cec5SDimitry Andric   if (Base.BaseGV) return;
41170b57cec5SDimitry Andric 
41180b57cec5SDimitry Andric   for (size_t i = 0, e = Base.BaseRegs.size(); i != e; ++i)
41190b57cec5SDimitry Andric     GenerateSymbolicOffsetsImpl(LU, LUIdx, Base, i);
41200b57cec5SDimitry Andric   if (Base.Scale == 1)
41210b57cec5SDimitry Andric     GenerateSymbolicOffsetsImpl(LU, LUIdx, Base, /* Idx */ -1,
41220b57cec5SDimitry Andric                                 /* IsScaledReg */ true);
41230b57cec5SDimitry Andric }
41240b57cec5SDimitry Andric 
41250b57cec5SDimitry Andric /// Helper function for LSRInstance::GenerateConstantOffsets.
41260b57cec5SDimitry Andric void LSRInstance::GenerateConstantOffsetsImpl(
41270b57cec5SDimitry Andric     LSRUse &LU, unsigned LUIdx, const Formula &Base,
41280fca6ea1SDimitry Andric     const SmallVectorImpl<Immediate> &Worklist, size_t Idx, bool IsScaledReg) {
41290b57cec5SDimitry Andric 
41300fca6ea1SDimitry Andric   auto GenerateOffset = [&](const SCEV *G, Immediate Offset) {
41310b57cec5SDimitry Andric     Formula F = Base;
41320fca6ea1SDimitry Andric     if (!Base.BaseOffset.isCompatibleImmediate(Offset))
41330fca6ea1SDimitry Andric       return;
41340fca6ea1SDimitry Andric     F.BaseOffset = Base.BaseOffset.subUnsigned(Offset);
41350b57cec5SDimitry Andric 
4136fe6060f1SDimitry Andric     if (isLegalUse(TTI, LU.MinOffset, LU.MaxOffset, LU.Kind, LU.AccessTy, F)) {
41370b57cec5SDimitry Andric       // Add the offset to the base register.
41380fca6ea1SDimitry Andric       const SCEV *NewOffset = Offset.getSCEV(SE, G->getType());
41390fca6ea1SDimitry Andric       const SCEV *NewG = SE.getAddExpr(NewOffset, G);
41400b57cec5SDimitry Andric       // If it cancelled out, drop the base register, otherwise update it.
41410b57cec5SDimitry Andric       if (NewG->isZero()) {
41420b57cec5SDimitry Andric         if (IsScaledReg) {
41430b57cec5SDimitry Andric           F.Scale = 0;
41440b57cec5SDimitry Andric           F.ScaledReg = nullptr;
41450b57cec5SDimitry Andric         } else
41460b57cec5SDimitry Andric           F.deleteBaseReg(F.BaseRegs[Idx]);
41470b57cec5SDimitry Andric         F.canonicalize(*L);
41480b57cec5SDimitry Andric       } else if (IsScaledReg)
41490b57cec5SDimitry Andric         F.ScaledReg = NewG;
41500b57cec5SDimitry Andric       else
41510b57cec5SDimitry Andric         F.BaseRegs[Idx] = NewG;
41520b57cec5SDimitry Andric 
41530b57cec5SDimitry Andric       (void)InsertFormula(LU, LUIdx, F);
41540b57cec5SDimitry Andric     }
41550b57cec5SDimitry Andric   };
41560b57cec5SDimitry Andric 
41570b57cec5SDimitry Andric   const SCEV *G = IsScaledReg ? Base.ScaledReg : Base.BaseRegs[Idx];
41580b57cec5SDimitry Andric 
41590b57cec5SDimitry Andric   // With constant offsets and constant steps, we can generate pre-inc
41600b57cec5SDimitry Andric   // accesses by having the offset equal the step. So, for access #0 with a
41610b57cec5SDimitry Andric   // step of 8, we generate a G - 8 base which would require the first access
41620b57cec5SDimitry Andric   // to be ((G - 8) + 8),+,8. The pre-indexed access then updates the pointer
41630b57cec5SDimitry Andric   // for itself and hopefully becomes the base for other accesses. This means
41640b57cec5SDimitry Andric   // means that a single pre-indexed access can be generated to become the new
41650b57cec5SDimitry Andric   // base pointer for each iteration of the loop, resulting in no extra add/sub
41660b57cec5SDimitry Andric   // instructions for pointer updating.
4167fe6060f1SDimitry Andric   if (AMK == TTI::AMK_PreIndexed && LU.Kind == LSRUse::Address) {
41680b57cec5SDimitry Andric     if (auto *GAR = dyn_cast<SCEVAddRecExpr>(G)) {
41690b57cec5SDimitry Andric       if (auto *StepRec =
41700b57cec5SDimitry Andric           dyn_cast<SCEVConstant>(GAR->getStepRecurrence(SE))) {
41710b57cec5SDimitry Andric         const APInt &StepInt = StepRec->getAPInt();
41720b57cec5SDimitry Andric         int64_t Step = StepInt.isNegative() ?
41730b57cec5SDimitry Andric           StepInt.getSExtValue() : StepInt.getZExtValue();
41740b57cec5SDimitry Andric 
41750fca6ea1SDimitry Andric         for (Immediate Offset : Worklist) {
41760fca6ea1SDimitry Andric           if (Offset.isFixed()) {
41770fca6ea1SDimitry Andric             Offset = Immediate::getFixed(Offset.getFixedValue() - Step);
41780b57cec5SDimitry Andric             GenerateOffset(G, Offset);
41790b57cec5SDimitry Andric           }
41800b57cec5SDimitry Andric         }
41810b57cec5SDimitry Andric       }
41820b57cec5SDimitry Andric     }
41830fca6ea1SDimitry Andric   }
41840fca6ea1SDimitry Andric   for (Immediate Offset : Worklist)
41850b57cec5SDimitry Andric     GenerateOffset(G, Offset);
41860b57cec5SDimitry Andric 
41870fca6ea1SDimitry Andric   Immediate Imm = ExtractImmediate(G, SE);
41880fca6ea1SDimitry Andric   if (G->isZero() || Imm.isZero() ||
41890fca6ea1SDimitry Andric       !Base.BaseOffset.isCompatibleImmediate(Imm))
41900b57cec5SDimitry Andric     return;
41910b57cec5SDimitry Andric   Formula F = Base;
41920fca6ea1SDimitry Andric   F.BaseOffset = F.BaseOffset.addUnsigned(Imm);
41930b57cec5SDimitry Andric   if (!isLegalUse(TTI, LU.MinOffset, LU.MaxOffset, LU.Kind, LU.AccessTy, F))
41940b57cec5SDimitry Andric     return;
4195e8d8bef9SDimitry Andric   if (IsScaledReg) {
41960b57cec5SDimitry Andric     F.ScaledReg = G;
4197e8d8bef9SDimitry Andric   } else {
41980b57cec5SDimitry Andric     F.BaseRegs[Idx] = G;
4199e8d8bef9SDimitry Andric     // We may generate non canonical Formula if G is a recurrent expr reg
4200e8d8bef9SDimitry Andric     // related with current loop while F.ScaledReg is not.
4201e8d8bef9SDimitry Andric     F.canonicalize(*L);
4202e8d8bef9SDimitry Andric   }
42030b57cec5SDimitry Andric   (void)InsertFormula(LU, LUIdx, F);
42040b57cec5SDimitry Andric }
42050b57cec5SDimitry Andric 
42060b57cec5SDimitry Andric /// GenerateConstantOffsets - Generate reuse formulae using symbolic offsets.
42070b57cec5SDimitry Andric void LSRInstance::GenerateConstantOffsets(LSRUse &LU, unsigned LUIdx,
42080b57cec5SDimitry Andric                                           Formula Base) {
42090b57cec5SDimitry Andric   // TODO: For now, just add the min and max offset, because it usually isn't
42100b57cec5SDimitry Andric   // worthwhile looking at everything inbetween.
42110fca6ea1SDimitry Andric   SmallVector<Immediate, 2> Worklist;
42120b57cec5SDimitry Andric   Worklist.push_back(LU.MinOffset);
42130b57cec5SDimitry Andric   if (LU.MaxOffset != LU.MinOffset)
42140b57cec5SDimitry Andric     Worklist.push_back(LU.MaxOffset);
42150b57cec5SDimitry Andric 
42160b57cec5SDimitry Andric   for (size_t i = 0, e = Base.BaseRegs.size(); i != e; ++i)
42170b57cec5SDimitry Andric     GenerateConstantOffsetsImpl(LU, LUIdx, Base, Worklist, i);
42180b57cec5SDimitry Andric   if (Base.Scale == 1)
42190b57cec5SDimitry Andric     GenerateConstantOffsetsImpl(LU, LUIdx, Base, Worklist, /* Idx */ -1,
42200b57cec5SDimitry Andric                                 /* IsScaledReg */ true);
42210b57cec5SDimitry Andric }
42220b57cec5SDimitry Andric 
42230b57cec5SDimitry Andric /// For ICmpZero, check to see if we can scale up the comparison. For example, x
42240b57cec5SDimitry Andric /// == y -> x*c == y*c.
42250b57cec5SDimitry Andric void LSRInstance::GenerateICmpZeroScales(LSRUse &LU, unsigned LUIdx,
42260b57cec5SDimitry Andric                                          Formula Base) {
42270b57cec5SDimitry Andric   if (LU.Kind != LSRUse::ICmpZero) return;
42280b57cec5SDimitry Andric 
42290b57cec5SDimitry Andric   // Determine the integer type for the base formula.
42300b57cec5SDimitry Andric   Type *IntTy = Base.getType();
42310b57cec5SDimitry Andric   if (!IntTy) return;
42320b57cec5SDimitry Andric   if (SE.getTypeSizeInBits(IntTy) > 64) return;
42330b57cec5SDimitry Andric 
42340b57cec5SDimitry Andric   // Don't do this if there is more than one offset.
42350b57cec5SDimitry Andric   if (LU.MinOffset != LU.MaxOffset) return;
42360b57cec5SDimitry Andric 
42370b57cec5SDimitry Andric   // Check if transformation is valid. It is illegal to multiply pointer.
42380b57cec5SDimitry Andric   if (Base.ScaledReg && Base.ScaledReg->getType()->isPointerTy())
42390b57cec5SDimitry Andric     return;
42400b57cec5SDimitry Andric   for (const SCEV *BaseReg : Base.BaseRegs)
42410b57cec5SDimitry Andric     if (BaseReg->getType()->isPointerTy())
42420b57cec5SDimitry Andric       return;
42430b57cec5SDimitry Andric   assert(!Base.BaseGV && "ICmpZero use is not legal!");
42440b57cec5SDimitry Andric 
42450b57cec5SDimitry Andric   // Check each interesting stride.
42460b57cec5SDimitry Andric   for (int64_t Factor : Factors) {
4247349cc55cSDimitry Andric     // Check that Factor can be represented by IntTy
4248349cc55cSDimitry Andric     if (!ConstantInt::isValueValidForType(IntTy, Factor))
4249349cc55cSDimitry Andric       continue;
42500b57cec5SDimitry Andric     // Check that the multiplication doesn't overflow.
42510fca6ea1SDimitry Andric     if (Base.BaseOffset.isMin() && Factor == -1)
42520b57cec5SDimitry Andric       continue;
42530fca6ea1SDimitry Andric     // Not supporting scalable immediates.
42540fca6ea1SDimitry Andric     if (Base.BaseOffset.isNonZero() && Base.BaseOffset.isScalable())
42550fca6ea1SDimitry Andric       continue;
42560fca6ea1SDimitry Andric     Immediate NewBaseOffset = Base.BaseOffset.mulUnsigned(Factor);
4257fe6060f1SDimitry Andric     assert(Factor != 0 && "Zero factor not expected!");
42580fca6ea1SDimitry Andric     if (NewBaseOffset.getFixedValue() / Factor !=
42590fca6ea1SDimitry Andric         Base.BaseOffset.getFixedValue())
42600b57cec5SDimitry Andric       continue;
42610b57cec5SDimitry Andric     // If the offset will be truncated at this use, check that it is in bounds.
42620b57cec5SDimitry Andric     if (!IntTy->isPointerTy() &&
42630fca6ea1SDimitry Andric         !ConstantInt::isValueValidForType(IntTy, NewBaseOffset.getFixedValue()))
42640b57cec5SDimitry Andric       continue;
42650b57cec5SDimitry Andric 
42660b57cec5SDimitry Andric     // Check that multiplying with the use offset doesn't overflow.
42670fca6ea1SDimitry Andric     Immediate Offset = LU.MinOffset;
42680fca6ea1SDimitry Andric     if (Offset.isMin() && Factor == -1)
42690b57cec5SDimitry Andric       continue;
42700fca6ea1SDimitry Andric     Offset = Offset.mulUnsigned(Factor);
42710fca6ea1SDimitry Andric     if (Offset.getFixedValue() / Factor != LU.MinOffset.getFixedValue())
42720b57cec5SDimitry Andric       continue;
42730b57cec5SDimitry Andric     // If the offset will be truncated at this use, check that it is in bounds.
42740b57cec5SDimitry Andric     if (!IntTy->isPointerTy() &&
42750fca6ea1SDimitry Andric         !ConstantInt::isValueValidForType(IntTy, Offset.getFixedValue()))
42760b57cec5SDimitry Andric       continue;
42770b57cec5SDimitry Andric 
42780b57cec5SDimitry Andric     Formula F = Base;
42790b57cec5SDimitry Andric     F.BaseOffset = NewBaseOffset;
42800b57cec5SDimitry Andric 
42810b57cec5SDimitry Andric     // Check that this scale is legal.
42820b57cec5SDimitry Andric     if (!isLegalUse(TTI, Offset, Offset, LU.Kind, LU.AccessTy, F))
42830b57cec5SDimitry Andric       continue;
42840b57cec5SDimitry Andric 
42850b57cec5SDimitry Andric     // Compensate for the use having MinOffset built into it.
42860fca6ea1SDimitry Andric     F.BaseOffset = F.BaseOffset.addUnsigned(Offset).subUnsigned(LU.MinOffset);
42870b57cec5SDimitry Andric 
42880b57cec5SDimitry Andric     const SCEV *FactorS = SE.getConstant(IntTy, Factor);
42890b57cec5SDimitry Andric 
42900b57cec5SDimitry Andric     // Check that multiplying with each base register doesn't overflow.
42910b57cec5SDimitry Andric     for (size_t i = 0, e = F.BaseRegs.size(); i != e; ++i) {
42920b57cec5SDimitry Andric       F.BaseRegs[i] = SE.getMulExpr(F.BaseRegs[i], FactorS);
42930b57cec5SDimitry Andric       if (getExactSDiv(F.BaseRegs[i], FactorS, SE) != Base.BaseRegs[i])
42940b57cec5SDimitry Andric         goto next;
42950b57cec5SDimitry Andric     }
42960b57cec5SDimitry Andric 
42970b57cec5SDimitry Andric     // Check that multiplying with the scaled register doesn't overflow.
42980b57cec5SDimitry Andric     if (F.ScaledReg) {
42990b57cec5SDimitry Andric       F.ScaledReg = SE.getMulExpr(F.ScaledReg, FactorS);
43000b57cec5SDimitry Andric       if (getExactSDiv(F.ScaledReg, FactorS, SE) != Base.ScaledReg)
43010b57cec5SDimitry Andric         continue;
43020b57cec5SDimitry Andric     }
43030b57cec5SDimitry Andric 
43040b57cec5SDimitry Andric     // Check that multiplying with the unfolded offset doesn't overflow.
43050fca6ea1SDimitry Andric     if (F.UnfoldedOffset.isNonZero()) {
43060fca6ea1SDimitry Andric       if (F.UnfoldedOffset.isMin() && Factor == -1)
43070b57cec5SDimitry Andric         continue;
43080fca6ea1SDimitry Andric       F.UnfoldedOffset = F.UnfoldedOffset.mulUnsigned(Factor);
43090fca6ea1SDimitry Andric       if (F.UnfoldedOffset.getFixedValue() / Factor !=
43100fca6ea1SDimitry Andric           Base.UnfoldedOffset.getFixedValue())
43110b57cec5SDimitry Andric         continue;
43120b57cec5SDimitry Andric       // If the offset will be truncated, check that it is in bounds.
43130fca6ea1SDimitry Andric       if (!IntTy->isPointerTy() && !ConstantInt::isValueValidForType(
43140fca6ea1SDimitry Andric                                        IntTy, F.UnfoldedOffset.getFixedValue()))
43150b57cec5SDimitry Andric         continue;
43160b57cec5SDimitry Andric     }
43170b57cec5SDimitry Andric 
43180b57cec5SDimitry Andric     // If we make it here and it's legal, add it.
43190b57cec5SDimitry Andric     (void)InsertFormula(LU, LUIdx, F);
43200b57cec5SDimitry Andric   next:;
43210b57cec5SDimitry Andric   }
43220b57cec5SDimitry Andric }
43230b57cec5SDimitry Andric 
43240b57cec5SDimitry Andric /// Generate stride factor reuse formulae by making use of scaled-offset address
43250b57cec5SDimitry Andric /// modes, for example.
43260b57cec5SDimitry Andric void LSRInstance::GenerateScales(LSRUse &LU, unsigned LUIdx, Formula Base) {
43270b57cec5SDimitry Andric   // Determine the integer type for the base formula.
43280b57cec5SDimitry Andric   Type *IntTy = Base.getType();
43290b57cec5SDimitry Andric   if (!IntTy) return;
43300b57cec5SDimitry Andric 
43310b57cec5SDimitry Andric   // If this Formula already has a scaled register, we can't add another one.
43320b57cec5SDimitry Andric   // Try to unscale the formula to generate a better scale.
43330b57cec5SDimitry Andric   if (Base.Scale != 0 && !Base.unscale())
43340b57cec5SDimitry Andric     return;
43350b57cec5SDimitry Andric 
43360b57cec5SDimitry Andric   assert(Base.Scale == 0 && "unscale did not did its job!");
43370b57cec5SDimitry Andric 
43380b57cec5SDimitry Andric   // Check each interesting stride.
43390b57cec5SDimitry Andric   for (int64_t Factor : Factors) {
43400b57cec5SDimitry Andric     Base.Scale = Factor;
43410b57cec5SDimitry Andric     Base.HasBaseReg = Base.BaseRegs.size() > 1;
43420b57cec5SDimitry Andric     // Check whether this scale is going to be legal.
43430b57cec5SDimitry Andric     if (!isLegalUse(TTI, LU.MinOffset, LU.MaxOffset, LU.Kind, LU.AccessTy,
43440b57cec5SDimitry Andric                     Base)) {
43450b57cec5SDimitry Andric       // As a special-case, handle special out-of-loop Basic users specially.
43460b57cec5SDimitry Andric       // TODO: Reconsider this special case.
43470b57cec5SDimitry Andric       if (LU.Kind == LSRUse::Basic &&
43480b57cec5SDimitry Andric           isLegalUse(TTI, LU.MinOffset, LU.MaxOffset, LSRUse::Special,
43490b57cec5SDimitry Andric                      LU.AccessTy, Base) &&
43500b57cec5SDimitry Andric           LU.AllFixupsOutsideLoop)
43510b57cec5SDimitry Andric         LU.Kind = LSRUse::Special;
43520b57cec5SDimitry Andric       else
43530b57cec5SDimitry Andric         continue;
43540b57cec5SDimitry Andric     }
43550b57cec5SDimitry Andric     // For an ICmpZero, negating a solitary base register won't lead to
43560b57cec5SDimitry Andric     // new solutions.
43570fca6ea1SDimitry Andric     if (LU.Kind == LSRUse::ICmpZero && !Base.HasBaseReg &&
43580fca6ea1SDimitry Andric         Base.BaseOffset.isZero() && !Base.BaseGV)
43590b57cec5SDimitry Andric       continue;
43600b57cec5SDimitry Andric     // For each addrec base reg, if its loop is current loop, apply the scale.
43610b57cec5SDimitry Andric     for (size_t i = 0, e = Base.BaseRegs.size(); i != e; ++i) {
43620b57cec5SDimitry Andric       const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(Base.BaseRegs[i]);
43630b57cec5SDimitry Andric       if (AR && (AR->getLoop() == L || LU.AllFixupsOutsideLoop)) {
43640b57cec5SDimitry Andric         const SCEV *FactorS = SE.getConstant(IntTy, Factor);
43650b57cec5SDimitry Andric         if (FactorS->isZero())
43660b57cec5SDimitry Andric           continue;
43670b57cec5SDimitry Andric         // Divide out the factor, ignoring high bits, since we'll be
43680b57cec5SDimitry Andric         // scaling the value back up in the end.
436981ad6265SDimitry Andric         if (const SCEV *Quotient = getExactSDiv(AR, FactorS, SE, true))
437081ad6265SDimitry Andric           if (!Quotient->isZero()) {
43710b57cec5SDimitry Andric             // TODO: This could be optimized to avoid all the copying.
43720b57cec5SDimitry Andric             Formula F = Base;
43730b57cec5SDimitry Andric             F.ScaledReg = Quotient;
43740b57cec5SDimitry Andric             F.deleteBaseReg(F.BaseRegs[i]);
43750b57cec5SDimitry Andric             // The canonical representation of 1*reg is reg, which is already in
43760b57cec5SDimitry Andric             // Base. In that case, do not try to insert the formula, it will be
43770b57cec5SDimitry Andric             // rejected anyway.
43780b57cec5SDimitry Andric             if (F.Scale == 1 && (F.BaseRegs.empty() ||
43790b57cec5SDimitry Andric                                  (AR->getLoop() != L && LU.AllFixupsOutsideLoop)))
43800b57cec5SDimitry Andric               continue;
43810b57cec5SDimitry Andric             // If AllFixupsOutsideLoop is true and F.Scale is 1, we may generate
43820b57cec5SDimitry Andric             // non canonical Formula with ScaledReg's loop not being L.
43830b57cec5SDimitry Andric             if (F.Scale == 1 && LU.AllFixupsOutsideLoop)
43840b57cec5SDimitry Andric               F.canonicalize(*L);
43850b57cec5SDimitry Andric             (void)InsertFormula(LU, LUIdx, F);
43860b57cec5SDimitry Andric           }
43870b57cec5SDimitry Andric       }
43880b57cec5SDimitry Andric     }
43890b57cec5SDimitry Andric   }
43900b57cec5SDimitry Andric }
43910b57cec5SDimitry Andric 
439206c3fb27SDimitry Andric /// Extend/Truncate \p Expr to \p ToTy considering post-inc uses in \p Loops.
439306c3fb27SDimitry Andric /// For all PostIncLoopSets in \p Loops, first de-normalize \p Expr, then
439406c3fb27SDimitry Andric /// perform the extension/truncate and normalize again, as the normalized form
439506c3fb27SDimitry Andric /// can result in folds that are not valid in the post-inc use contexts. The
439606c3fb27SDimitry Andric /// expressions for all PostIncLoopSets must match, otherwise return nullptr.
439706c3fb27SDimitry Andric static const SCEV *
439806c3fb27SDimitry Andric getAnyExtendConsideringPostIncUses(ArrayRef<PostIncLoopSet> Loops,
439906c3fb27SDimitry Andric                                    const SCEV *Expr, Type *ToTy,
440006c3fb27SDimitry Andric                                    ScalarEvolution &SE) {
440106c3fb27SDimitry Andric   const SCEV *Result = nullptr;
440206c3fb27SDimitry Andric   for (auto &L : Loops) {
440306c3fb27SDimitry Andric     auto *DenormExpr = denormalizeForPostIncUse(Expr, L, SE);
440406c3fb27SDimitry Andric     const SCEV *NewDenormExpr = SE.getAnyExtendExpr(DenormExpr, ToTy);
440506c3fb27SDimitry Andric     const SCEV *New = normalizeForPostIncUse(NewDenormExpr, L, SE);
440606c3fb27SDimitry Andric     if (!New || (Result && New != Result))
440706c3fb27SDimitry Andric       return nullptr;
440806c3fb27SDimitry Andric     Result = New;
440906c3fb27SDimitry Andric   }
441006c3fb27SDimitry Andric 
441106c3fb27SDimitry Andric   assert(Result && "failed to create expression");
441206c3fb27SDimitry Andric   return Result;
441306c3fb27SDimitry Andric }
441406c3fb27SDimitry Andric 
44150b57cec5SDimitry Andric /// Generate reuse formulae from different IV types.
44160b57cec5SDimitry Andric void LSRInstance::GenerateTruncates(LSRUse &LU, unsigned LUIdx, Formula Base) {
44170b57cec5SDimitry Andric   // Don't bother truncating symbolic values.
44180b57cec5SDimitry Andric   if (Base.BaseGV) return;
44190b57cec5SDimitry Andric 
44200b57cec5SDimitry Andric   // Determine the integer type for the base formula.
44210b57cec5SDimitry Andric   Type *DstTy = Base.getType();
44220b57cec5SDimitry Andric   if (!DstTy) return;
4423fe6060f1SDimitry Andric   if (DstTy->isPointerTy())
4424fe6060f1SDimitry Andric     return;
44250b57cec5SDimitry Andric 
4426349cc55cSDimitry Andric   // It is invalid to extend a pointer type so exit early if ScaledReg or
4427349cc55cSDimitry Andric   // any of the BaseRegs are pointers.
4428349cc55cSDimitry Andric   if (Base.ScaledReg && Base.ScaledReg->getType()->isPointerTy())
4429349cc55cSDimitry Andric     return;
4430349cc55cSDimitry Andric   if (any_of(Base.BaseRegs,
4431349cc55cSDimitry Andric              [](const SCEV *S) { return S->getType()->isPointerTy(); }))
4432349cc55cSDimitry Andric     return;
4433349cc55cSDimitry Andric 
443406c3fb27SDimitry Andric   SmallVector<PostIncLoopSet> Loops;
443506c3fb27SDimitry Andric   for (auto &LF : LU.Fixups)
443606c3fb27SDimitry Andric     Loops.push_back(LF.PostIncLoops);
443706c3fb27SDimitry Andric 
44380b57cec5SDimitry Andric   for (Type *SrcTy : Types) {
44390b57cec5SDimitry Andric     if (SrcTy != DstTy && TTI.isTruncateFree(SrcTy, DstTy)) {
44400b57cec5SDimitry Andric       Formula F = Base;
44410b57cec5SDimitry Andric 
44420b57cec5SDimitry Andric       // Sometimes SCEV is able to prove zero during ext transform. It may
44430b57cec5SDimitry Andric       // happen if SCEV did not do all possible transforms while creating the
44440b57cec5SDimitry Andric       // initial node (maybe due to depth limitations), but it can do them while
44450b57cec5SDimitry Andric       // taking ext.
44460b57cec5SDimitry Andric       if (F.ScaledReg) {
444706c3fb27SDimitry Andric         const SCEV *NewScaledReg =
444806c3fb27SDimitry Andric             getAnyExtendConsideringPostIncUses(Loops, F.ScaledReg, SrcTy, SE);
444906c3fb27SDimitry Andric         if (!NewScaledReg || NewScaledReg->isZero())
44500b57cec5SDimitry Andric           continue;
44510b57cec5SDimitry Andric         F.ScaledReg = NewScaledReg;
44520b57cec5SDimitry Andric       }
44530b57cec5SDimitry Andric       bool HasZeroBaseReg = false;
44540b57cec5SDimitry Andric       for (const SCEV *&BaseReg : F.BaseRegs) {
445506c3fb27SDimitry Andric         const SCEV *NewBaseReg =
445606c3fb27SDimitry Andric             getAnyExtendConsideringPostIncUses(Loops, BaseReg, SrcTy, SE);
445706c3fb27SDimitry Andric         if (!NewBaseReg || NewBaseReg->isZero()) {
44580b57cec5SDimitry Andric           HasZeroBaseReg = true;
44590b57cec5SDimitry Andric           break;
44600b57cec5SDimitry Andric         }
44610b57cec5SDimitry Andric         BaseReg = NewBaseReg;
44620b57cec5SDimitry Andric       }
44630b57cec5SDimitry Andric       if (HasZeroBaseReg)
44640b57cec5SDimitry Andric         continue;
44650b57cec5SDimitry Andric 
44660b57cec5SDimitry Andric       // TODO: This assumes we've done basic processing on all uses and
44670b57cec5SDimitry Andric       // have an idea what the register usage is.
44680b57cec5SDimitry Andric       if (!F.hasRegsUsedByUsesOtherThan(LUIdx, RegUses))
44690b57cec5SDimitry Andric         continue;
44700b57cec5SDimitry Andric 
44710b57cec5SDimitry Andric       F.canonicalize(*L);
44720b57cec5SDimitry Andric       (void)InsertFormula(LU, LUIdx, F);
44730b57cec5SDimitry Andric     }
44740b57cec5SDimitry Andric   }
44750b57cec5SDimitry Andric }
44760b57cec5SDimitry Andric 
44770b57cec5SDimitry Andric namespace {
44780b57cec5SDimitry Andric 
44790b57cec5SDimitry Andric /// Helper class for GenerateCrossUseConstantOffsets. It's used to defer
44800b57cec5SDimitry Andric /// modifications so that the search phase doesn't have to worry about the data
44810b57cec5SDimitry Andric /// structures moving underneath it.
44820b57cec5SDimitry Andric struct WorkItem {
44830b57cec5SDimitry Andric   size_t LUIdx;
44840fca6ea1SDimitry Andric   Immediate Imm;
44850b57cec5SDimitry Andric   const SCEV *OrigReg;
44860b57cec5SDimitry Andric 
44870fca6ea1SDimitry Andric   WorkItem(size_t LI, Immediate I, const SCEV *R)
44880b57cec5SDimitry Andric       : LUIdx(LI), Imm(I), OrigReg(R) {}
44890b57cec5SDimitry Andric 
44900b57cec5SDimitry Andric   void print(raw_ostream &OS) const;
44910b57cec5SDimitry Andric   void dump() const;
44920b57cec5SDimitry Andric };
44930b57cec5SDimitry Andric 
44940b57cec5SDimitry Andric } // end anonymous namespace
44950b57cec5SDimitry Andric 
44960b57cec5SDimitry Andric #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
44970b57cec5SDimitry Andric void WorkItem::print(raw_ostream &OS) const {
44980b57cec5SDimitry Andric   OS << "in formulae referencing " << *OrigReg << " in use " << LUIdx
44990b57cec5SDimitry Andric      << " , add offset " << Imm;
45000b57cec5SDimitry Andric }
45010b57cec5SDimitry Andric 
45020b57cec5SDimitry Andric LLVM_DUMP_METHOD void WorkItem::dump() const {
45030b57cec5SDimitry Andric   print(errs()); errs() << '\n';
45040b57cec5SDimitry Andric }
45050b57cec5SDimitry Andric #endif
45060b57cec5SDimitry Andric 
45070b57cec5SDimitry Andric /// Look for registers which are a constant distance apart and try to form reuse
45080b57cec5SDimitry Andric /// opportunities between them.
45090b57cec5SDimitry Andric void LSRInstance::GenerateCrossUseConstantOffsets() {
45100b57cec5SDimitry Andric   // Group the registers by their value without any added constant offset.
45110fca6ea1SDimitry Andric   using ImmMapTy = std::map<Immediate, const SCEV *, KeyOrderTargetImmediate>;
45120b57cec5SDimitry Andric 
45130b57cec5SDimitry Andric   DenseMap<const SCEV *, ImmMapTy> Map;
45140b57cec5SDimitry Andric   DenseMap<const SCEV *, SmallBitVector> UsedByIndicesMap;
45150b57cec5SDimitry Andric   SmallVector<const SCEV *, 8> Sequence;
45160b57cec5SDimitry Andric   for (const SCEV *Use : RegUses) {
45170b57cec5SDimitry Andric     const SCEV *Reg = Use; // Make a copy for ExtractImmediate to modify.
45180fca6ea1SDimitry Andric     Immediate Imm = ExtractImmediate(Reg, SE);
45190b57cec5SDimitry Andric     auto Pair = Map.insert(std::make_pair(Reg, ImmMapTy()));
45200b57cec5SDimitry Andric     if (Pair.second)
45210b57cec5SDimitry Andric       Sequence.push_back(Reg);
45220b57cec5SDimitry Andric     Pair.first->second.insert(std::make_pair(Imm, Use));
45230b57cec5SDimitry Andric     UsedByIndicesMap[Reg] |= RegUses.getUsedByIndices(Use);
45240b57cec5SDimitry Andric   }
45250b57cec5SDimitry Andric 
45260b57cec5SDimitry Andric   // Now examine each set of registers with the same base value. Build up
45270b57cec5SDimitry Andric   // a list of work to do and do the work in a separate step so that we're
45280b57cec5SDimitry Andric   // not adding formulae and register counts while we're searching.
45290b57cec5SDimitry Andric   SmallVector<WorkItem, 32> WorkItems;
45300fca6ea1SDimitry Andric   SmallSet<std::pair<size_t, Immediate>, 32, KeyOrderSizeTAndImmediate>
45310fca6ea1SDimitry Andric       UniqueItems;
45320b57cec5SDimitry Andric   for (const SCEV *Reg : Sequence) {
45330b57cec5SDimitry Andric     const ImmMapTy &Imms = Map.find(Reg)->second;
45340b57cec5SDimitry Andric 
45350b57cec5SDimitry Andric     // It's not worthwhile looking for reuse if there's only one offset.
45360b57cec5SDimitry Andric     if (Imms.size() == 1)
45370b57cec5SDimitry Andric       continue;
45380b57cec5SDimitry Andric 
45390b57cec5SDimitry Andric     LLVM_DEBUG(dbgs() << "Generating cross-use offsets for " << *Reg << ':';
45400b57cec5SDimitry Andric                for (const auto &Entry
45410b57cec5SDimitry Andric                     : Imms) dbgs()
45420b57cec5SDimitry Andric                << ' ' << Entry.first;
45430b57cec5SDimitry Andric                dbgs() << '\n');
45440b57cec5SDimitry Andric 
45450b57cec5SDimitry Andric     // Examine each offset.
45460b57cec5SDimitry Andric     for (ImmMapTy::const_iterator J = Imms.begin(), JE = Imms.end();
45470b57cec5SDimitry Andric          J != JE; ++J) {
45480b57cec5SDimitry Andric       const SCEV *OrigReg = J->second;
45490b57cec5SDimitry Andric 
45500fca6ea1SDimitry Andric       Immediate JImm = J->first;
45510b57cec5SDimitry Andric       const SmallBitVector &UsedByIndices = RegUses.getUsedByIndices(OrigReg);
45520b57cec5SDimitry Andric 
45530b57cec5SDimitry Andric       if (!isa<SCEVConstant>(OrigReg) &&
45540b57cec5SDimitry Andric           UsedByIndicesMap[Reg].count() == 1) {
45550b57cec5SDimitry Andric         LLVM_DEBUG(dbgs() << "Skipping cross-use reuse for " << *OrigReg
45560b57cec5SDimitry Andric                           << '\n');
45570b57cec5SDimitry Andric         continue;
45580b57cec5SDimitry Andric       }
45590b57cec5SDimitry Andric 
45600b57cec5SDimitry Andric       // Conservatively examine offsets between this orig reg a few selected
45610b57cec5SDimitry Andric       // other orig regs.
45620fca6ea1SDimitry Andric       Immediate First = Imms.begin()->first;
45630fca6ea1SDimitry Andric       Immediate Last = std::prev(Imms.end())->first;
45640fca6ea1SDimitry Andric       if (!First.isCompatibleImmediate(Last)) {
45650fca6ea1SDimitry Andric         LLVM_DEBUG(dbgs() << "Skipping cross-use reuse for " << *OrigReg
45660fca6ea1SDimitry Andric                           << "\n");
45670fca6ea1SDimitry Andric         continue;
45680fca6ea1SDimitry Andric       }
45690fca6ea1SDimitry Andric       // Only scalable if both terms are scalable, or if one is scalable and
45700fca6ea1SDimitry Andric       // the other is 0.
45710fca6ea1SDimitry Andric       bool Scalable = First.isScalable() || Last.isScalable();
45720fca6ea1SDimitry Andric       int64_t FI = First.getKnownMinValue();
45730fca6ea1SDimitry Andric       int64_t LI = Last.getKnownMinValue();
45740b57cec5SDimitry Andric       // Compute (First + Last)  / 2 without overflow using the fact that
45750b57cec5SDimitry Andric       // First + Last = 2 * (First + Last) + (First ^ Last).
45760fca6ea1SDimitry Andric       int64_t Avg = (FI & LI) + ((FI ^ LI) >> 1);
45770fca6ea1SDimitry Andric       // If the result is negative and FI is odd and LI even (or vice versa),
45780b57cec5SDimitry Andric       // we rounded towards -inf. Add 1 in that case, to round towards 0.
45790fca6ea1SDimitry Andric       Avg = Avg + ((FI ^ LI) & ((uint64_t)Avg >> 63));
45800b57cec5SDimitry Andric       ImmMapTy::const_iterator OtherImms[] = {
45810b57cec5SDimitry Andric           Imms.begin(), std::prev(Imms.end()),
45820fca6ea1SDimitry Andric           Imms.lower_bound(Immediate::get(Avg, Scalable))};
4583bdd1243dSDimitry Andric       for (const auto &M : OtherImms) {
45840b57cec5SDimitry Andric         if (M == J || M == JE) continue;
45850fca6ea1SDimitry Andric         if (!JImm.isCompatibleImmediate(M->first))
45860fca6ea1SDimitry Andric           continue;
45870b57cec5SDimitry Andric 
45880b57cec5SDimitry Andric         // Compute the difference between the two.
45890fca6ea1SDimitry Andric         Immediate Imm = JImm.subUnsigned(M->first);
45900b57cec5SDimitry Andric         for (unsigned LUIdx : UsedByIndices.set_bits())
45910b57cec5SDimitry Andric           // Make a memo of this use, offset, and register tuple.
45920b57cec5SDimitry Andric           if (UniqueItems.insert(std::make_pair(LUIdx, Imm)).second)
45930b57cec5SDimitry Andric             WorkItems.push_back(WorkItem(LUIdx, Imm, OrigReg));
45940b57cec5SDimitry Andric       }
45950b57cec5SDimitry Andric     }
45960b57cec5SDimitry Andric   }
45970b57cec5SDimitry Andric 
45980b57cec5SDimitry Andric   Map.clear();
45990b57cec5SDimitry Andric   Sequence.clear();
46000b57cec5SDimitry Andric   UsedByIndicesMap.clear();
46010b57cec5SDimitry Andric   UniqueItems.clear();
46020b57cec5SDimitry Andric 
46030b57cec5SDimitry Andric   // Now iterate through the worklist and add new formulae.
46040b57cec5SDimitry Andric   for (const WorkItem &WI : WorkItems) {
46050b57cec5SDimitry Andric     size_t LUIdx = WI.LUIdx;
46060b57cec5SDimitry Andric     LSRUse &LU = Uses[LUIdx];
46070fca6ea1SDimitry Andric     Immediate Imm = WI.Imm;
46080b57cec5SDimitry Andric     const SCEV *OrigReg = WI.OrigReg;
46090b57cec5SDimitry Andric 
46100b57cec5SDimitry Andric     Type *IntTy = SE.getEffectiveSCEVType(OrigReg->getType());
46110fca6ea1SDimitry Andric     const SCEV *NegImmS = Imm.getNegativeSCEV(SE, IntTy);
46120b57cec5SDimitry Andric     unsigned BitWidth = SE.getTypeSizeInBits(IntTy);
46130b57cec5SDimitry Andric 
46140b57cec5SDimitry Andric     // TODO: Use a more targeted data structure.
46150b57cec5SDimitry Andric     for (size_t L = 0, LE = LU.Formulae.size(); L != LE; ++L) {
46160b57cec5SDimitry Andric       Formula F = LU.Formulae[L];
46170b57cec5SDimitry Andric       // FIXME: The code for the scaled and unscaled registers looks
46180b57cec5SDimitry Andric       // very similar but slightly different. Investigate if they
46190b57cec5SDimitry Andric       // could be merged. That way, we would not have to unscale the
46200b57cec5SDimitry Andric       // Formula.
46210b57cec5SDimitry Andric       F.unscale();
46220b57cec5SDimitry Andric       // Use the immediate in the scaled register.
46230b57cec5SDimitry Andric       if (F.ScaledReg == OrigReg) {
46240fca6ea1SDimitry Andric         if (!F.BaseOffset.isCompatibleImmediate(Imm))
46250fca6ea1SDimitry Andric           continue;
46260fca6ea1SDimitry Andric         Immediate Offset = F.BaseOffset.addUnsigned(Imm.mulUnsigned(F.Scale));
46270b57cec5SDimitry Andric         // Don't create 50 + reg(-50).
46280fca6ea1SDimitry Andric         const SCEV *S = Offset.getNegativeSCEV(SE, IntTy);
46290fca6ea1SDimitry Andric         if (F.referencesReg(S))
46300b57cec5SDimitry Andric           continue;
46310b57cec5SDimitry Andric         Formula NewF = F;
46320b57cec5SDimitry Andric         NewF.BaseOffset = Offset;
46330b57cec5SDimitry Andric         if (!isLegalUse(TTI, LU.MinOffset, LU.MaxOffset, LU.Kind, LU.AccessTy,
46340b57cec5SDimitry Andric                         NewF))
46350b57cec5SDimitry Andric           continue;
46360b57cec5SDimitry Andric         NewF.ScaledReg = SE.getAddExpr(NegImmS, NewF.ScaledReg);
46370b57cec5SDimitry Andric 
46380b57cec5SDimitry Andric         // If the new scale is a constant in a register, and adding the constant
46390b57cec5SDimitry Andric         // value to the immediate would produce a value closer to zero than the
46400b57cec5SDimitry Andric         // immediate itself, then the formula isn't worthwhile.
46410fca6ea1SDimitry Andric         if (const SCEVConstant *C = dyn_cast<SCEVConstant>(NewF.ScaledReg)) {
46420fca6ea1SDimitry Andric           // FIXME: Do we need to do something for scalable immediates here?
46430fca6ea1SDimitry Andric           //        A scalable SCEV won't be constant, but we might still have
46440fca6ea1SDimitry Andric           //        something in the offset? Bail out for now to be safe.
46450fca6ea1SDimitry Andric           if (NewF.BaseOffset.isNonZero() && NewF.BaseOffset.isScalable())
46460b57cec5SDimitry Andric             continue;
46470fca6ea1SDimitry Andric           if (C->getValue()->isNegative() !=
46480fca6ea1SDimitry Andric                   (NewF.BaseOffset.isLessThanZero()) &&
46490fca6ea1SDimitry Andric               (C->getAPInt().abs() * APInt(BitWidth, F.Scale))
46500fca6ea1SDimitry Andric                   .ule(std::abs(NewF.BaseOffset.getFixedValue())))
46510fca6ea1SDimitry Andric             continue;
46520fca6ea1SDimitry Andric         }
46530b57cec5SDimitry Andric 
46540b57cec5SDimitry Andric         // OK, looks good.
46550b57cec5SDimitry Andric         NewF.canonicalize(*this->L);
46560b57cec5SDimitry Andric         (void)InsertFormula(LU, LUIdx, NewF);
46570b57cec5SDimitry Andric       } else {
46580b57cec5SDimitry Andric         // Use the immediate in a base register.
46590b57cec5SDimitry Andric         for (size_t N = 0, NE = F.BaseRegs.size(); N != NE; ++N) {
46600b57cec5SDimitry Andric           const SCEV *BaseReg = F.BaseRegs[N];
46610b57cec5SDimitry Andric           if (BaseReg != OrigReg)
46620b57cec5SDimitry Andric             continue;
46630b57cec5SDimitry Andric           Formula NewF = F;
46640fca6ea1SDimitry Andric           if (!NewF.BaseOffset.isCompatibleImmediate(Imm) ||
46650fca6ea1SDimitry Andric               !NewF.UnfoldedOffset.isCompatibleImmediate(Imm) ||
46660fca6ea1SDimitry Andric               !NewF.BaseOffset.isCompatibleImmediate(NewF.UnfoldedOffset))
46670fca6ea1SDimitry Andric             continue;
46680fca6ea1SDimitry Andric           NewF.BaseOffset = NewF.BaseOffset.addUnsigned(Imm);
46690b57cec5SDimitry Andric           if (!isLegalUse(TTI, LU.MinOffset, LU.MaxOffset,
46700b57cec5SDimitry Andric                           LU.Kind, LU.AccessTy, NewF)) {
4671fe6060f1SDimitry Andric             if (AMK == TTI::AMK_PostIndexed &&
46720b57cec5SDimitry Andric                 mayUsePostIncMode(TTI, LU, OrigReg, this->L, SE))
46730b57cec5SDimitry Andric               continue;
46740fca6ea1SDimitry Andric             Immediate NewUnfoldedOffset = NewF.UnfoldedOffset.addUnsigned(Imm);
46750fca6ea1SDimitry Andric             if (!isLegalAddImmediate(TTI, NewUnfoldedOffset))
46760b57cec5SDimitry Andric               continue;
46770b57cec5SDimitry Andric             NewF = F;
46780fca6ea1SDimitry Andric             NewF.UnfoldedOffset = NewUnfoldedOffset;
46790b57cec5SDimitry Andric           }
46800b57cec5SDimitry Andric           NewF.BaseRegs[N] = SE.getAddExpr(NegImmS, BaseReg);
46810b57cec5SDimitry Andric 
46820b57cec5SDimitry Andric           // If the new formula has a constant in a register, and adding the
46830b57cec5SDimitry Andric           // constant value to the immediate would produce a value closer to
46840b57cec5SDimitry Andric           // zero than the immediate itself, then the formula isn't worthwhile.
46850b57cec5SDimitry Andric           for (const SCEV *NewReg : NewF.BaseRegs)
46860fca6ea1SDimitry Andric             if (const SCEVConstant *C = dyn_cast<SCEVConstant>(NewReg)) {
46870fca6ea1SDimitry Andric               if (NewF.BaseOffset.isNonZero() && NewF.BaseOffset.isScalable())
46880b57cec5SDimitry Andric                 goto skip_formula;
46890fca6ea1SDimitry Andric               if ((C->getAPInt() + NewF.BaseOffset.getFixedValue())
46900fca6ea1SDimitry Andric                       .abs()
46910fca6ea1SDimitry Andric                       .slt(std::abs(NewF.BaseOffset.getFixedValue())) &&
46920fca6ea1SDimitry Andric                   (C->getAPInt() + NewF.BaseOffset.getFixedValue())
46930fca6ea1SDimitry Andric                           .countr_zero() >=
46940fca6ea1SDimitry Andric                       (unsigned)llvm::countr_zero<uint64_t>(
46950fca6ea1SDimitry Andric                           NewF.BaseOffset.getFixedValue()))
46960fca6ea1SDimitry Andric                 goto skip_formula;
46970fca6ea1SDimitry Andric             }
46980b57cec5SDimitry Andric 
46990b57cec5SDimitry Andric           // Ok, looks good.
47000b57cec5SDimitry Andric           NewF.canonicalize(*this->L);
47010b57cec5SDimitry Andric           (void)InsertFormula(LU, LUIdx, NewF);
47020b57cec5SDimitry Andric           break;
47030b57cec5SDimitry Andric         skip_formula:;
47040b57cec5SDimitry Andric         }
47050b57cec5SDimitry Andric       }
47060b57cec5SDimitry Andric     }
47070b57cec5SDimitry Andric   }
47080b57cec5SDimitry Andric }
47090b57cec5SDimitry Andric 
47100b57cec5SDimitry Andric /// Generate formulae for each use.
47110b57cec5SDimitry Andric void
47120b57cec5SDimitry Andric LSRInstance::GenerateAllReuseFormulae() {
47130b57cec5SDimitry Andric   // This is split into multiple loops so that hasRegsUsedByUsesOtherThan
47140b57cec5SDimitry Andric   // queries are more precise.
47150b57cec5SDimitry Andric   for (size_t LUIdx = 0, NumUses = Uses.size(); LUIdx != NumUses; ++LUIdx) {
47160b57cec5SDimitry Andric     LSRUse &LU = Uses[LUIdx];
47170b57cec5SDimitry Andric     for (size_t i = 0, f = LU.Formulae.size(); i != f; ++i)
47180b57cec5SDimitry Andric       GenerateReassociations(LU, LUIdx, LU.Formulae[i]);
47190b57cec5SDimitry Andric     for (size_t i = 0, f = LU.Formulae.size(); i != f; ++i)
47200b57cec5SDimitry Andric       GenerateCombinations(LU, LUIdx, LU.Formulae[i]);
47210b57cec5SDimitry Andric   }
47220b57cec5SDimitry Andric   for (size_t LUIdx = 0, NumUses = Uses.size(); LUIdx != NumUses; ++LUIdx) {
47230b57cec5SDimitry Andric     LSRUse &LU = Uses[LUIdx];
47240b57cec5SDimitry Andric     for (size_t i = 0, f = LU.Formulae.size(); i != f; ++i)
47250b57cec5SDimitry Andric       GenerateSymbolicOffsets(LU, LUIdx, LU.Formulae[i]);
47260b57cec5SDimitry Andric     for (size_t i = 0, f = LU.Formulae.size(); i != f; ++i)
47270b57cec5SDimitry Andric       GenerateConstantOffsets(LU, LUIdx, LU.Formulae[i]);
47280b57cec5SDimitry Andric     for (size_t i = 0, f = LU.Formulae.size(); i != f; ++i)
47290b57cec5SDimitry Andric       GenerateICmpZeroScales(LU, LUIdx, LU.Formulae[i]);
47300b57cec5SDimitry Andric     for (size_t i = 0, f = LU.Formulae.size(); i != f; ++i)
47310b57cec5SDimitry Andric       GenerateScales(LU, LUIdx, LU.Formulae[i]);
47320b57cec5SDimitry Andric   }
47330b57cec5SDimitry Andric   for (size_t LUIdx = 0, NumUses = Uses.size(); LUIdx != NumUses; ++LUIdx) {
47340b57cec5SDimitry Andric     LSRUse &LU = Uses[LUIdx];
47350b57cec5SDimitry Andric     for (size_t i = 0, f = LU.Formulae.size(); i != f; ++i)
47360b57cec5SDimitry Andric       GenerateTruncates(LU, LUIdx, LU.Formulae[i]);
47370b57cec5SDimitry Andric   }
47380b57cec5SDimitry Andric 
47390b57cec5SDimitry Andric   GenerateCrossUseConstantOffsets();
47400b57cec5SDimitry Andric 
47410b57cec5SDimitry Andric   LLVM_DEBUG(dbgs() << "\n"
47420b57cec5SDimitry Andric                        "After generating reuse formulae:\n";
47430b57cec5SDimitry Andric              print_uses(dbgs()));
47440b57cec5SDimitry Andric }
47450b57cec5SDimitry Andric 
47460b57cec5SDimitry Andric /// If there are multiple formulae with the same set of registers used
47470b57cec5SDimitry Andric /// by other uses, pick the best one and delete the others.
47480b57cec5SDimitry Andric void LSRInstance::FilterOutUndesirableDedicatedRegisters() {
47490b57cec5SDimitry Andric   DenseSet<const SCEV *> VisitedRegs;
47500b57cec5SDimitry Andric   SmallPtrSet<const SCEV *, 16> Regs;
47510b57cec5SDimitry Andric   SmallPtrSet<const SCEV *, 16> LoserRegs;
47520b57cec5SDimitry Andric #ifndef NDEBUG
47530b57cec5SDimitry Andric   bool ChangedFormulae = false;
47540b57cec5SDimitry Andric #endif
47550b57cec5SDimitry Andric 
47560b57cec5SDimitry Andric   // Collect the best formula for each unique set of shared registers. This
47570b57cec5SDimitry Andric   // is reset for each use.
47580b57cec5SDimitry Andric   using BestFormulaeTy =
47590b57cec5SDimitry Andric       DenseMap<SmallVector<const SCEV *, 4>, size_t, UniquifierDenseMapInfo>;
47600b57cec5SDimitry Andric 
47610b57cec5SDimitry Andric   BestFormulaeTy BestFormulae;
47620b57cec5SDimitry Andric 
47630b57cec5SDimitry Andric   for (size_t LUIdx = 0, NumUses = Uses.size(); LUIdx != NumUses; ++LUIdx) {
47640b57cec5SDimitry Andric     LSRUse &LU = Uses[LUIdx];
47650b57cec5SDimitry Andric     LLVM_DEBUG(dbgs() << "Filtering for use "; LU.print(dbgs());
47660b57cec5SDimitry Andric                dbgs() << '\n');
47670b57cec5SDimitry Andric 
47680b57cec5SDimitry Andric     bool Any = false;
47690b57cec5SDimitry Andric     for (size_t FIdx = 0, NumForms = LU.Formulae.size();
47700b57cec5SDimitry Andric          FIdx != NumForms; ++FIdx) {
47710b57cec5SDimitry Andric       Formula &F = LU.Formulae[FIdx];
47720b57cec5SDimitry Andric 
47730b57cec5SDimitry Andric       // Some formulas are instant losers. For example, they may depend on
47740b57cec5SDimitry Andric       // nonexistent AddRecs from other loops. These need to be filtered
47750b57cec5SDimitry Andric       // immediately, otherwise heuristics could choose them over others leading
47760b57cec5SDimitry Andric       // to an unsatisfactory solution. Passing LoserRegs into RateFormula here
47770b57cec5SDimitry Andric       // avoids the need to recompute this information across formulae using the
47780b57cec5SDimitry Andric       // same bad AddRec. Passing LoserRegs is also essential unless we remove
47790b57cec5SDimitry Andric       // the corresponding bad register from the Regs set.
4780fe6060f1SDimitry Andric       Cost CostF(L, SE, TTI, AMK);
47810b57cec5SDimitry Andric       Regs.clear();
47820b57cec5SDimitry Andric       CostF.RateFormula(F, Regs, VisitedRegs, LU, &LoserRegs);
47830b57cec5SDimitry Andric       if (CostF.isLoser()) {
47840b57cec5SDimitry Andric         // During initial formula generation, undesirable formulae are generated
47850b57cec5SDimitry Andric         // by uses within other loops that have some non-trivial address mode or
47860b57cec5SDimitry Andric         // use the postinc form of the IV. LSR needs to provide these formulae
47870b57cec5SDimitry Andric         // as the basis of rediscovering the desired formula that uses an AddRec
47880b57cec5SDimitry Andric         // corresponding to the existing phi. Once all formulae have been
47890b57cec5SDimitry Andric         // generated, these initial losers may be pruned.
47900b57cec5SDimitry Andric         LLVM_DEBUG(dbgs() << "  Filtering loser "; F.print(dbgs());
47910b57cec5SDimitry Andric                    dbgs() << "\n");
47920b57cec5SDimitry Andric       }
47930b57cec5SDimitry Andric       else {
47940b57cec5SDimitry Andric         SmallVector<const SCEV *, 4> Key;
47950b57cec5SDimitry Andric         for (const SCEV *Reg : F.BaseRegs) {
47960b57cec5SDimitry Andric           if (RegUses.isRegUsedByUsesOtherThan(Reg, LUIdx))
47970b57cec5SDimitry Andric             Key.push_back(Reg);
47980b57cec5SDimitry Andric         }
47990b57cec5SDimitry Andric         if (F.ScaledReg &&
48000b57cec5SDimitry Andric             RegUses.isRegUsedByUsesOtherThan(F.ScaledReg, LUIdx))
48010b57cec5SDimitry Andric           Key.push_back(F.ScaledReg);
48020b57cec5SDimitry Andric         // Unstable sort by host order ok, because this is only used for
48030b57cec5SDimitry Andric         // uniquifying.
48040b57cec5SDimitry Andric         llvm::sort(Key);
48050b57cec5SDimitry Andric 
48060b57cec5SDimitry Andric         std::pair<BestFormulaeTy::const_iterator, bool> P =
48070b57cec5SDimitry Andric           BestFormulae.insert(std::make_pair(Key, FIdx));
48080b57cec5SDimitry Andric         if (P.second)
48090b57cec5SDimitry Andric           continue;
48100b57cec5SDimitry Andric 
48110b57cec5SDimitry Andric         Formula &Best = LU.Formulae[P.first->second];
48120b57cec5SDimitry Andric 
4813fe6060f1SDimitry Andric         Cost CostBest(L, SE, TTI, AMK);
48140b57cec5SDimitry Andric         Regs.clear();
48150b57cec5SDimitry Andric         CostBest.RateFormula(Best, Regs, VisitedRegs, LU);
48160b57cec5SDimitry Andric         if (CostF.isLess(CostBest))
48170b57cec5SDimitry Andric           std::swap(F, Best);
48180b57cec5SDimitry Andric         LLVM_DEBUG(dbgs() << "  Filtering out formula "; F.print(dbgs());
48190b57cec5SDimitry Andric                    dbgs() << "\n"
48200b57cec5SDimitry Andric                              "    in favor of formula ";
48210b57cec5SDimitry Andric                    Best.print(dbgs()); dbgs() << '\n');
48220b57cec5SDimitry Andric       }
48230b57cec5SDimitry Andric #ifndef NDEBUG
48240b57cec5SDimitry Andric       ChangedFormulae = true;
48250b57cec5SDimitry Andric #endif
48260b57cec5SDimitry Andric       LU.DeleteFormula(F);
48270b57cec5SDimitry Andric       --FIdx;
48280b57cec5SDimitry Andric       --NumForms;
48290b57cec5SDimitry Andric       Any = true;
48300b57cec5SDimitry Andric     }
48310b57cec5SDimitry Andric 
48320b57cec5SDimitry Andric     // Now that we've filtered out some formulae, recompute the Regs set.
48330b57cec5SDimitry Andric     if (Any)
48340b57cec5SDimitry Andric       LU.RecomputeRegs(LUIdx, RegUses);
48350b57cec5SDimitry Andric 
48360b57cec5SDimitry Andric     // Reset this to prepare for the next use.
48370b57cec5SDimitry Andric     BestFormulae.clear();
48380b57cec5SDimitry Andric   }
48390b57cec5SDimitry Andric 
48400b57cec5SDimitry Andric   LLVM_DEBUG(if (ChangedFormulae) {
48410b57cec5SDimitry Andric     dbgs() << "\n"
48420b57cec5SDimitry Andric               "After filtering out undesirable candidates:\n";
48430b57cec5SDimitry Andric     print_uses(dbgs());
48440b57cec5SDimitry Andric   });
48450b57cec5SDimitry Andric }
48460b57cec5SDimitry Andric 
48470b57cec5SDimitry Andric /// Estimate the worst-case number of solutions the solver might have to
48480b57cec5SDimitry Andric /// consider. It almost never considers this many solutions because it prune the
48490b57cec5SDimitry Andric /// search space, but the pruning isn't always sufficient.
48500b57cec5SDimitry Andric size_t LSRInstance::EstimateSearchSpaceComplexity() const {
48510b57cec5SDimitry Andric   size_t Power = 1;
48520b57cec5SDimitry Andric   for (const LSRUse &LU : Uses) {
48530b57cec5SDimitry Andric     size_t FSize = LU.Formulae.size();
48540b57cec5SDimitry Andric     if (FSize >= ComplexityLimit) {
48550b57cec5SDimitry Andric       Power = ComplexityLimit;
48560b57cec5SDimitry Andric       break;
48570b57cec5SDimitry Andric     }
48580b57cec5SDimitry Andric     Power *= FSize;
48590b57cec5SDimitry Andric     if (Power >= ComplexityLimit)
48600b57cec5SDimitry Andric       break;
48610b57cec5SDimitry Andric   }
48620b57cec5SDimitry Andric   return Power;
48630b57cec5SDimitry Andric }
48640b57cec5SDimitry Andric 
48650b57cec5SDimitry Andric /// When one formula uses a superset of the registers of another formula, it
48660b57cec5SDimitry Andric /// won't help reduce register pressure (though it may not necessarily hurt
48670b57cec5SDimitry Andric /// register pressure); remove it to simplify the system.
48680b57cec5SDimitry Andric void LSRInstance::NarrowSearchSpaceByDetectingSupersets() {
48690b57cec5SDimitry Andric   if (EstimateSearchSpaceComplexity() >= ComplexityLimit) {
48700b57cec5SDimitry Andric     LLVM_DEBUG(dbgs() << "The search space is too complex.\n");
48710b57cec5SDimitry Andric 
48720b57cec5SDimitry Andric     LLVM_DEBUG(dbgs() << "Narrowing the search space by eliminating formulae "
48730b57cec5SDimitry Andric                          "which use a superset of registers used by other "
48740b57cec5SDimitry Andric                          "formulae.\n");
48750b57cec5SDimitry Andric 
48760b57cec5SDimitry Andric     for (size_t LUIdx = 0, NumUses = Uses.size(); LUIdx != NumUses; ++LUIdx) {
48770b57cec5SDimitry Andric       LSRUse &LU = Uses[LUIdx];
48780b57cec5SDimitry Andric       bool Any = false;
48790b57cec5SDimitry Andric       for (size_t i = 0, e = LU.Formulae.size(); i != e; ++i) {
48800b57cec5SDimitry Andric         Formula &F = LU.Formulae[i];
48810fca6ea1SDimitry Andric         if (F.BaseOffset.isNonZero() && F.BaseOffset.isScalable())
48820fca6ea1SDimitry Andric           continue;
48830b57cec5SDimitry Andric         // Look for a formula with a constant or GV in a register. If the use
48840b57cec5SDimitry Andric         // also has a formula with that same value in an immediate field,
48850b57cec5SDimitry Andric         // delete the one that uses a register.
48860b57cec5SDimitry Andric         for (SmallVectorImpl<const SCEV *>::const_iterator
48870b57cec5SDimitry Andric              I = F.BaseRegs.begin(), E = F.BaseRegs.end(); I != E; ++I) {
48880b57cec5SDimitry Andric           if (const SCEVConstant *C = dyn_cast<SCEVConstant>(*I)) {
48890b57cec5SDimitry Andric             Formula NewF = F;
48900b57cec5SDimitry Andric             //FIXME: Formulas should store bitwidth to do wrapping properly.
48910b57cec5SDimitry Andric             //       See PR41034.
48920fca6ea1SDimitry Andric             NewF.BaseOffset =
48930fca6ea1SDimitry Andric                 Immediate::getFixed(NewF.BaseOffset.getFixedValue() +
48940fca6ea1SDimitry Andric                                     (uint64_t)C->getValue()->getSExtValue());
48950b57cec5SDimitry Andric             NewF.BaseRegs.erase(NewF.BaseRegs.begin() +
48960b57cec5SDimitry Andric                                 (I - F.BaseRegs.begin()));
48970b57cec5SDimitry Andric             if (LU.HasFormulaWithSameRegs(NewF)) {
48980b57cec5SDimitry Andric               LLVM_DEBUG(dbgs() << "  Deleting "; F.print(dbgs());
48990b57cec5SDimitry Andric                          dbgs() << '\n');
49000b57cec5SDimitry Andric               LU.DeleteFormula(F);
49010b57cec5SDimitry Andric               --i;
49020b57cec5SDimitry Andric               --e;
49030b57cec5SDimitry Andric               Any = true;
49040b57cec5SDimitry Andric               break;
49050b57cec5SDimitry Andric             }
49060b57cec5SDimitry Andric           } else if (const SCEVUnknown *U = dyn_cast<SCEVUnknown>(*I)) {
49070b57cec5SDimitry Andric             if (GlobalValue *GV = dyn_cast<GlobalValue>(U->getValue()))
49080b57cec5SDimitry Andric               if (!F.BaseGV) {
49090b57cec5SDimitry Andric                 Formula NewF = F;
49100b57cec5SDimitry Andric                 NewF.BaseGV = GV;
49110b57cec5SDimitry Andric                 NewF.BaseRegs.erase(NewF.BaseRegs.begin() +
49120b57cec5SDimitry Andric                                     (I - F.BaseRegs.begin()));
49130b57cec5SDimitry Andric                 if (LU.HasFormulaWithSameRegs(NewF)) {
49140b57cec5SDimitry Andric                   LLVM_DEBUG(dbgs() << "  Deleting "; F.print(dbgs());
49150b57cec5SDimitry Andric                              dbgs() << '\n');
49160b57cec5SDimitry Andric                   LU.DeleteFormula(F);
49170b57cec5SDimitry Andric                   --i;
49180b57cec5SDimitry Andric                   --e;
49190b57cec5SDimitry Andric                   Any = true;
49200b57cec5SDimitry Andric                   break;
49210b57cec5SDimitry Andric                 }
49220b57cec5SDimitry Andric               }
49230b57cec5SDimitry Andric           }
49240b57cec5SDimitry Andric         }
49250b57cec5SDimitry Andric       }
49260b57cec5SDimitry Andric       if (Any)
49270b57cec5SDimitry Andric         LU.RecomputeRegs(LUIdx, RegUses);
49280b57cec5SDimitry Andric     }
49290b57cec5SDimitry Andric 
49300b57cec5SDimitry Andric     LLVM_DEBUG(dbgs() << "After pre-selection:\n"; print_uses(dbgs()));
49310b57cec5SDimitry Andric   }
49320b57cec5SDimitry Andric }
49330b57cec5SDimitry Andric 
49340b57cec5SDimitry Andric /// When there are many registers for expressions like A, A+1, A+2, etc.,
49350b57cec5SDimitry Andric /// allocate a single register for them.
49360b57cec5SDimitry Andric void LSRInstance::NarrowSearchSpaceByCollapsingUnrolledCode() {
49370b57cec5SDimitry Andric   if (EstimateSearchSpaceComplexity() < ComplexityLimit)
49380b57cec5SDimitry Andric     return;
49390b57cec5SDimitry Andric 
49400b57cec5SDimitry Andric   LLVM_DEBUG(
49410b57cec5SDimitry Andric       dbgs() << "The search space is too complex.\n"
49420b57cec5SDimitry Andric                 "Narrowing the search space by assuming that uses separated "
49430b57cec5SDimitry Andric                 "by a constant offset will use the same registers.\n");
49440b57cec5SDimitry Andric 
49450b57cec5SDimitry Andric   // This is especially useful for unrolled loops.
49460b57cec5SDimitry Andric 
49470b57cec5SDimitry Andric   for (size_t LUIdx = 0, NumUses = Uses.size(); LUIdx != NumUses; ++LUIdx) {
49480b57cec5SDimitry Andric     LSRUse &LU = Uses[LUIdx];
49490b57cec5SDimitry Andric     for (const Formula &F : LU.Formulae) {
49500fca6ea1SDimitry Andric       if (F.BaseOffset.isZero() || (F.Scale != 0 && F.Scale != 1))
49510b57cec5SDimitry Andric         continue;
49520b57cec5SDimitry Andric 
49530b57cec5SDimitry Andric       LSRUse *LUThatHas = FindUseWithSimilarFormula(F, LU);
49540b57cec5SDimitry Andric       if (!LUThatHas)
49550b57cec5SDimitry Andric         continue;
49560b57cec5SDimitry Andric 
49570b57cec5SDimitry Andric       if (!reconcileNewOffset(*LUThatHas, F.BaseOffset, /*HasBaseReg=*/ false,
49580b57cec5SDimitry Andric                               LU.Kind, LU.AccessTy))
49590b57cec5SDimitry Andric         continue;
49600b57cec5SDimitry Andric 
49610b57cec5SDimitry Andric       LLVM_DEBUG(dbgs() << "  Deleting use "; LU.print(dbgs()); dbgs() << '\n');
49620b57cec5SDimitry Andric 
49630b57cec5SDimitry Andric       LUThatHas->AllFixupsOutsideLoop &= LU.AllFixupsOutsideLoop;
49640b57cec5SDimitry Andric 
49650b57cec5SDimitry Andric       // Transfer the fixups of LU to LUThatHas.
49660b57cec5SDimitry Andric       for (LSRFixup &Fixup : LU.Fixups) {
49670b57cec5SDimitry Andric         Fixup.Offset += F.BaseOffset;
49680b57cec5SDimitry Andric         LUThatHas->pushFixup(Fixup);
49690b57cec5SDimitry Andric         LLVM_DEBUG(dbgs() << "New fixup has offset " << Fixup.Offset << '\n');
49700b57cec5SDimitry Andric       }
49710b57cec5SDimitry Andric 
49720b57cec5SDimitry Andric       // Delete formulae from the new use which are no longer legal.
49730b57cec5SDimitry Andric       bool Any = false;
49740b57cec5SDimitry Andric       for (size_t i = 0, e = LUThatHas->Formulae.size(); i != e; ++i) {
49750b57cec5SDimitry Andric         Formula &F = LUThatHas->Formulae[i];
49760b57cec5SDimitry Andric         if (!isLegalUse(TTI, LUThatHas->MinOffset, LUThatHas->MaxOffset,
49770b57cec5SDimitry Andric                         LUThatHas->Kind, LUThatHas->AccessTy, F)) {
49780b57cec5SDimitry Andric           LLVM_DEBUG(dbgs() << "  Deleting "; F.print(dbgs()); dbgs() << '\n');
49790b57cec5SDimitry Andric           LUThatHas->DeleteFormula(F);
49800b57cec5SDimitry Andric           --i;
49810b57cec5SDimitry Andric           --e;
49820b57cec5SDimitry Andric           Any = true;
49830b57cec5SDimitry Andric         }
49840b57cec5SDimitry Andric       }
49850b57cec5SDimitry Andric 
49860b57cec5SDimitry Andric       if (Any)
49870b57cec5SDimitry Andric         LUThatHas->RecomputeRegs(LUThatHas - &Uses.front(), RegUses);
49880b57cec5SDimitry Andric 
49890b57cec5SDimitry Andric       // Delete the old use.
49900b57cec5SDimitry Andric       DeleteUse(LU, LUIdx);
49910b57cec5SDimitry Andric       --LUIdx;
49920b57cec5SDimitry Andric       --NumUses;
49930b57cec5SDimitry Andric       break;
49940b57cec5SDimitry Andric     }
49950b57cec5SDimitry Andric   }
49960b57cec5SDimitry Andric 
49970b57cec5SDimitry Andric   LLVM_DEBUG(dbgs() << "After pre-selection:\n"; print_uses(dbgs()));
49980b57cec5SDimitry Andric }
49990b57cec5SDimitry Andric 
50000b57cec5SDimitry Andric /// Call FilterOutUndesirableDedicatedRegisters again, if necessary, now that
50010b57cec5SDimitry Andric /// we've done more filtering, as it may be able to find more formulae to
50020b57cec5SDimitry Andric /// eliminate.
50030b57cec5SDimitry Andric void LSRInstance::NarrowSearchSpaceByRefilteringUndesirableDedicatedRegisters(){
50040b57cec5SDimitry Andric   if (EstimateSearchSpaceComplexity() >= ComplexityLimit) {
50050b57cec5SDimitry Andric     LLVM_DEBUG(dbgs() << "The search space is too complex.\n");
50060b57cec5SDimitry Andric 
50070b57cec5SDimitry Andric     LLVM_DEBUG(dbgs() << "Narrowing the search space by re-filtering out "
50080b57cec5SDimitry Andric                          "undesirable dedicated registers.\n");
50090b57cec5SDimitry Andric 
50100b57cec5SDimitry Andric     FilterOutUndesirableDedicatedRegisters();
50110b57cec5SDimitry Andric 
50120b57cec5SDimitry Andric     LLVM_DEBUG(dbgs() << "After pre-selection:\n"; print_uses(dbgs()));
50130b57cec5SDimitry Andric   }
50140b57cec5SDimitry Andric }
50150b57cec5SDimitry Andric 
50160b57cec5SDimitry Andric /// If a LSRUse has multiple formulae with the same ScaledReg and Scale.
50170b57cec5SDimitry Andric /// Pick the best one and delete the others.
50180b57cec5SDimitry Andric /// This narrowing heuristic is to keep as many formulae with different
50190b57cec5SDimitry Andric /// Scale and ScaledReg pair as possible while narrowing the search space.
50200b57cec5SDimitry Andric /// The benefit is that it is more likely to find out a better solution
50210b57cec5SDimitry Andric /// from a formulae set with more Scale and ScaledReg variations than
50220b57cec5SDimitry Andric /// a formulae set with the same Scale and ScaledReg. The picking winner
50230b57cec5SDimitry Andric /// reg heuristic will often keep the formulae with the same Scale and
50240b57cec5SDimitry Andric /// ScaledReg and filter others, and we want to avoid that if possible.
50250b57cec5SDimitry Andric void LSRInstance::NarrowSearchSpaceByFilterFormulaWithSameScaledReg() {
50260b57cec5SDimitry Andric   if (EstimateSearchSpaceComplexity() < ComplexityLimit)
50270b57cec5SDimitry Andric     return;
50280b57cec5SDimitry Andric 
50290b57cec5SDimitry Andric   LLVM_DEBUG(
50300b57cec5SDimitry Andric       dbgs() << "The search space is too complex.\n"
50310b57cec5SDimitry Andric                 "Narrowing the search space by choosing the best Formula "
50320b57cec5SDimitry Andric                 "from the Formulae with the same Scale and ScaledReg.\n");
50330b57cec5SDimitry Andric 
50340b57cec5SDimitry Andric   // Map the "Scale * ScaledReg" pair to the best formula of current LSRUse.
50350b57cec5SDimitry Andric   using BestFormulaeTy = DenseMap<std::pair<const SCEV *, int64_t>, size_t>;
50360b57cec5SDimitry Andric 
50370b57cec5SDimitry Andric   BestFormulaeTy BestFormulae;
50380b57cec5SDimitry Andric #ifndef NDEBUG
50390b57cec5SDimitry Andric   bool ChangedFormulae = false;
50400b57cec5SDimitry Andric #endif
50410b57cec5SDimitry Andric   DenseSet<const SCEV *> VisitedRegs;
50420b57cec5SDimitry Andric   SmallPtrSet<const SCEV *, 16> Regs;
50430b57cec5SDimitry Andric 
50440b57cec5SDimitry Andric   for (size_t LUIdx = 0, NumUses = Uses.size(); LUIdx != NumUses; ++LUIdx) {
50450b57cec5SDimitry Andric     LSRUse &LU = Uses[LUIdx];
50460b57cec5SDimitry Andric     LLVM_DEBUG(dbgs() << "Filtering for use "; LU.print(dbgs());
50470b57cec5SDimitry Andric                dbgs() << '\n');
50480b57cec5SDimitry Andric 
50490b57cec5SDimitry Andric     // Return true if Formula FA is better than Formula FB.
50500b57cec5SDimitry Andric     auto IsBetterThan = [&](Formula &FA, Formula &FB) {
50510b57cec5SDimitry Andric       // First we will try to choose the Formula with fewer new registers.
50520b57cec5SDimitry Andric       // For a register used by current Formula, the more the register is
50530b57cec5SDimitry Andric       // shared among LSRUses, the less we increase the register number
50540b57cec5SDimitry Andric       // counter of the formula.
50550b57cec5SDimitry Andric       size_t FARegNum = 0;
50560b57cec5SDimitry Andric       for (const SCEV *Reg : FA.BaseRegs) {
50570b57cec5SDimitry Andric         const SmallBitVector &UsedByIndices = RegUses.getUsedByIndices(Reg);
50580b57cec5SDimitry Andric         FARegNum += (NumUses - UsedByIndices.count() + 1);
50590b57cec5SDimitry Andric       }
50600b57cec5SDimitry Andric       size_t FBRegNum = 0;
50610b57cec5SDimitry Andric       for (const SCEV *Reg : FB.BaseRegs) {
50620b57cec5SDimitry Andric         const SmallBitVector &UsedByIndices = RegUses.getUsedByIndices(Reg);
50630b57cec5SDimitry Andric         FBRegNum += (NumUses - UsedByIndices.count() + 1);
50640b57cec5SDimitry Andric       }
50650b57cec5SDimitry Andric       if (FARegNum != FBRegNum)
50660b57cec5SDimitry Andric         return FARegNum < FBRegNum;
50670b57cec5SDimitry Andric 
50680b57cec5SDimitry Andric       // If the new register numbers are the same, choose the Formula with
50690b57cec5SDimitry Andric       // less Cost.
5070fe6060f1SDimitry Andric       Cost CostFA(L, SE, TTI, AMK);
5071fe6060f1SDimitry Andric       Cost CostFB(L, SE, TTI, AMK);
50720b57cec5SDimitry Andric       Regs.clear();
50730b57cec5SDimitry Andric       CostFA.RateFormula(FA, Regs, VisitedRegs, LU);
50740b57cec5SDimitry Andric       Regs.clear();
50750b57cec5SDimitry Andric       CostFB.RateFormula(FB, Regs, VisitedRegs, LU);
50760b57cec5SDimitry Andric       return CostFA.isLess(CostFB);
50770b57cec5SDimitry Andric     };
50780b57cec5SDimitry Andric 
50790b57cec5SDimitry Andric     bool Any = false;
50800b57cec5SDimitry Andric     for (size_t FIdx = 0, NumForms = LU.Formulae.size(); FIdx != NumForms;
50810b57cec5SDimitry Andric          ++FIdx) {
50820b57cec5SDimitry Andric       Formula &F = LU.Formulae[FIdx];
50830b57cec5SDimitry Andric       if (!F.ScaledReg)
50840b57cec5SDimitry Andric         continue;
50850b57cec5SDimitry Andric       auto P = BestFormulae.insert({{F.ScaledReg, F.Scale}, FIdx});
50860b57cec5SDimitry Andric       if (P.second)
50870b57cec5SDimitry Andric         continue;
50880b57cec5SDimitry Andric 
50890b57cec5SDimitry Andric       Formula &Best = LU.Formulae[P.first->second];
50900b57cec5SDimitry Andric       if (IsBetterThan(F, Best))
50910b57cec5SDimitry Andric         std::swap(F, Best);
50920b57cec5SDimitry Andric       LLVM_DEBUG(dbgs() << "  Filtering out formula "; F.print(dbgs());
50930b57cec5SDimitry Andric                  dbgs() << "\n"
50940b57cec5SDimitry Andric                            "    in favor of formula ";
50950b57cec5SDimitry Andric                  Best.print(dbgs()); dbgs() << '\n');
50960b57cec5SDimitry Andric #ifndef NDEBUG
50970b57cec5SDimitry Andric       ChangedFormulae = true;
50980b57cec5SDimitry Andric #endif
50990b57cec5SDimitry Andric       LU.DeleteFormula(F);
51000b57cec5SDimitry Andric       --FIdx;
51010b57cec5SDimitry Andric       --NumForms;
51020b57cec5SDimitry Andric       Any = true;
51030b57cec5SDimitry Andric     }
51040b57cec5SDimitry Andric     if (Any)
51050b57cec5SDimitry Andric       LU.RecomputeRegs(LUIdx, RegUses);
51060b57cec5SDimitry Andric 
51070b57cec5SDimitry Andric     // Reset this to prepare for the next use.
51080b57cec5SDimitry Andric     BestFormulae.clear();
51090b57cec5SDimitry Andric   }
51100b57cec5SDimitry Andric 
51110b57cec5SDimitry Andric   LLVM_DEBUG(if (ChangedFormulae) {
51120b57cec5SDimitry Andric     dbgs() << "\n"
51130b57cec5SDimitry Andric               "After filtering out undesirable candidates:\n";
51140b57cec5SDimitry Andric     print_uses(dbgs());
51150b57cec5SDimitry Andric   });
51160b57cec5SDimitry Andric }
51170b57cec5SDimitry Andric 
51185ffd83dbSDimitry Andric /// If we are over the complexity limit, filter out any post-inc prefering
51195ffd83dbSDimitry Andric /// variables to only post-inc values.
51205ffd83dbSDimitry Andric void LSRInstance::NarrowSearchSpaceByFilterPostInc() {
5121fe6060f1SDimitry Andric   if (AMK != TTI::AMK_PostIndexed)
51225ffd83dbSDimitry Andric     return;
51235ffd83dbSDimitry Andric   if (EstimateSearchSpaceComplexity() < ComplexityLimit)
51245ffd83dbSDimitry Andric     return;
51255ffd83dbSDimitry Andric 
51265ffd83dbSDimitry Andric   LLVM_DEBUG(dbgs() << "The search space is too complex.\n"
51275ffd83dbSDimitry Andric                        "Narrowing the search space by choosing the lowest "
51285ffd83dbSDimitry Andric                        "register Formula for PostInc Uses.\n");
51295ffd83dbSDimitry Andric 
51305ffd83dbSDimitry Andric   for (size_t LUIdx = 0, NumUses = Uses.size(); LUIdx != NumUses; ++LUIdx) {
51315ffd83dbSDimitry Andric     LSRUse &LU = Uses[LUIdx];
51325ffd83dbSDimitry Andric 
51335ffd83dbSDimitry Andric     if (LU.Kind != LSRUse::Address)
51345ffd83dbSDimitry Andric       continue;
51355ffd83dbSDimitry Andric     if (!TTI.isIndexedLoadLegal(TTI.MIM_PostInc, LU.AccessTy.getType()) &&
51365ffd83dbSDimitry Andric         !TTI.isIndexedStoreLegal(TTI.MIM_PostInc, LU.AccessTy.getType()))
51375ffd83dbSDimitry Andric       continue;
51385ffd83dbSDimitry Andric 
51395ffd83dbSDimitry Andric     size_t MinRegs = std::numeric_limits<size_t>::max();
51405ffd83dbSDimitry Andric     for (const Formula &F : LU.Formulae)
51415ffd83dbSDimitry Andric       MinRegs = std::min(F.getNumRegs(), MinRegs);
51425ffd83dbSDimitry Andric 
51435ffd83dbSDimitry Andric     bool Any = false;
51445ffd83dbSDimitry Andric     for (size_t FIdx = 0, NumForms = LU.Formulae.size(); FIdx != NumForms;
51455ffd83dbSDimitry Andric          ++FIdx) {
51465ffd83dbSDimitry Andric       Formula &F = LU.Formulae[FIdx];
51475ffd83dbSDimitry Andric       if (F.getNumRegs() > MinRegs) {
51485ffd83dbSDimitry Andric         LLVM_DEBUG(dbgs() << "  Filtering out formula "; F.print(dbgs());
51495ffd83dbSDimitry Andric                    dbgs() << "\n");
51505ffd83dbSDimitry Andric         LU.DeleteFormula(F);
51515ffd83dbSDimitry Andric         --FIdx;
51525ffd83dbSDimitry Andric         --NumForms;
51535ffd83dbSDimitry Andric         Any = true;
51545ffd83dbSDimitry Andric       }
51555ffd83dbSDimitry Andric     }
51565ffd83dbSDimitry Andric     if (Any)
51575ffd83dbSDimitry Andric       LU.RecomputeRegs(LUIdx, RegUses);
51585ffd83dbSDimitry Andric 
51595ffd83dbSDimitry Andric     if (EstimateSearchSpaceComplexity() < ComplexityLimit)
51605ffd83dbSDimitry Andric       break;
51615ffd83dbSDimitry Andric   }
51625ffd83dbSDimitry Andric 
51635ffd83dbSDimitry Andric   LLVM_DEBUG(dbgs() << "After pre-selection:\n"; print_uses(dbgs()));
51645ffd83dbSDimitry Andric }
51655ffd83dbSDimitry Andric 
51660b57cec5SDimitry Andric /// The function delete formulas with high registers number expectation.
51670b57cec5SDimitry Andric /// Assuming we don't know the value of each formula (already delete
51680b57cec5SDimitry Andric /// all inefficient), generate probability of not selecting for each
51690b57cec5SDimitry Andric /// register.
51700b57cec5SDimitry Andric /// For example,
51710b57cec5SDimitry Andric /// Use1:
51720b57cec5SDimitry Andric ///  reg(a) + reg({0,+,1})
51730b57cec5SDimitry Andric ///  reg(a) + reg({-1,+,1}) + 1
51740b57cec5SDimitry Andric ///  reg({a,+,1})
51750b57cec5SDimitry Andric /// Use2:
51760b57cec5SDimitry Andric ///  reg(b) + reg({0,+,1})
51770b57cec5SDimitry Andric ///  reg(b) + reg({-1,+,1}) + 1
51780b57cec5SDimitry Andric ///  reg({b,+,1})
51790b57cec5SDimitry Andric /// Use3:
51800b57cec5SDimitry Andric ///  reg(c) + reg(b) + reg({0,+,1})
51810b57cec5SDimitry Andric ///  reg(c) + reg({b,+,1})
51820b57cec5SDimitry Andric ///
51830b57cec5SDimitry Andric /// Probability of not selecting
51840b57cec5SDimitry Andric ///                 Use1   Use2    Use3
51850b57cec5SDimitry Andric /// reg(a)         (1/3) *   1   *   1
51860b57cec5SDimitry Andric /// reg(b)           1   * (1/3) * (1/2)
51870b57cec5SDimitry Andric /// reg({0,+,1})   (2/3) * (2/3) * (1/2)
51880b57cec5SDimitry Andric /// reg({-1,+,1})  (2/3) * (2/3) *   1
51890b57cec5SDimitry Andric /// reg({a,+,1})   (2/3) *   1   *   1
51900b57cec5SDimitry Andric /// reg({b,+,1})     1   * (2/3) * (2/3)
51910b57cec5SDimitry Andric /// reg(c)           1   *   1   *   0
51920b57cec5SDimitry Andric ///
51930b57cec5SDimitry Andric /// Now count registers number mathematical expectation for each formula:
51940b57cec5SDimitry Andric /// Note that for each use we exclude probability if not selecting for the use.
51950b57cec5SDimitry Andric /// For example for Use1 probability for reg(a) would be just 1 * 1 (excluding
51960b57cec5SDimitry Andric /// probabilty 1/3 of not selecting for Use1).
51970b57cec5SDimitry Andric /// Use1:
51980b57cec5SDimitry Andric ///  reg(a) + reg({0,+,1})          1 + 1/3       -- to be deleted
51990b57cec5SDimitry Andric ///  reg(a) + reg({-1,+,1}) + 1     1 + 4/9       -- to be deleted
52000b57cec5SDimitry Andric ///  reg({a,+,1})                   1
52010b57cec5SDimitry Andric /// Use2:
52020b57cec5SDimitry Andric ///  reg(b) + reg({0,+,1})          1/2 + 1/3     -- to be deleted
52030b57cec5SDimitry Andric ///  reg(b) + reg({-1,+,1}) + 1     1/2 + 2/3     -- to be deleted
52040b57cec5SDimitry Andric ///  reg({b,+,1})                   2/3
52050b57cec5SDimitry Andric /// Use3:
52060b57cec5SDimitry Andric ///  reg(c) + reg(b) + reg({0,+,1}) 1 + 1/3 + 4/9 -- to be deleted
52070b57cec5SDimitry Andric ///  reg(c) + reg({b,+,1})          1 + 2/3
52080b57cec5SDimitry Andric void LSRInstance::NarrowSearchSpaceByDeletingCostlyFormulas() {
52090b57cec5SDimitry Andric   if (EstimateSearchSpaceComplexity() < ComplexityLimit)
52100b57cec5SDimitry Andric     return;
52110b57cec5SDimitry Andric   // Ok, we have too many of formulae on our hands to conveniently handle.
52120b57cec5SDimitry Andric   // Use a rough heuristic to thin out the list.
52130b57cec5SDimitry Andric 
52140b57cec5SDimitry Andric   // Set of Regs wich will be 100% used in final solution.
52150b57cec5SDimitry Andric   // Used in each formula of a solution (in example above this is reg(c)).
52160b57cec5SDimitry Andric   // We can skip them in calculations.
52170b57cec5SDimitry Andric   SmallPtrSet<const SCEV *, 4> UniqRegs;
52180b57cec5SDimitry Andric   LLVM_DEBUG(dbgs() << "The search space is too complex.\n");
52190b57cec5SDimitry Andric 
52200b57cec5SDimitry Andric   // Map each register to probability of not selecting
52210b57cec5SDimitry Andric   DenseMap <const SCEV *, float> RegNumMap;
52220b57cec5SDimitry Andric   for (const SCEV *Reg : RegUses) {
52230b57cec5SDimitry Andric     if (UniqRegs.count(Reg))
52240b57cec5SDimitry Andric       continue;
52250b57cec5SDimitry Andric     float PNotSel = 1;
52260b57cec5SDimitry Andric     for (const LSRUse &LU : Uses) {
52270b57cec5SDimitry Andric       if (!LU.Regs.count(Reg))
52280b57cec5SDimitry Andric         continue;
52290b57cec5SDimitry Andric       float P = LU.getNotSelectedProbability(Reg);
52300b57cec5SDimitry Andric       if (P != 0.0)
52310b57cec5SDimitry Andric         PNotSel *= P;
52320b57cec5SDimitry Andric       else
52330b57cec5SDimitry Andric         UniqRegs.insert(Reg);
52340b57cec5SDimitry Andric     }
52350b57cec5SDimitry Andric     RegNumMap.insert(std::make_pair(Reg, PNotSel));
52360b57cec5SDimitry Andric   }
52370b57cec5SDimitry Andric 
52380b57cec5SDimitry Andric   LLVM_DEBUG(
52390b57cec5SDimitry Andric       dbgs() << "Narrowing the search space by deleting costly formulas\n");
52400b57cec5SDimitry Andric 
52410b57cec5SDimitry Andric   // Delete formulas where registers number expectation is high.
52420b57cec5SDimitry Andric   for (size_t LUIdx = 0, NumUses = Uses.size(); LUIdx != NumUses; ++LUIdx) {
52430b57cec5SDimitry Andric     LSRUse &LU = Uses[LUIdx];
52440b57cec5SDimitry Andric     // If nothing to delete - continue.
52450b57cec5SDimitry Andric     if (LU.Formulae.size() < 2)
52460b57cec5SDimitry Andric       continue;
52470b57cec5SDimitry Andric     // This is temporary solution to test performance. Float should be
52480b57cec5SDimitry Andric     // replaced with round independent type (based on integers) to avoid
52490b57cec5SDimitry Andric     // different results for different target builds.
52500b57cec5SDimitry Andric     float FMinRegNum = LU.Formulae[0].getNumRegs();
52510b57cec5SDimitry Andric     float FMinARegNum = LU.Formulae[0].getNumRegs();
52520b57cec5SDimitry Andric     size_t MinIdx = 0;
52530b57cec5SDimitry Andric     for (size_t i = 0, e = LU.Formulae.size(); i != e; ++i) {
52540b57cec5SDimitry Andric       Formula &F = LU.Formulae[i];
52550b57cec5SDimitry Andric       float FRegNum = 0;
52560b57cec5SDimitry Andric       float FARegNum = 0;
52570b57cec5SDimitry Andric       for (const SCEV *BaseReg : F.BaseRegs) {
52580b57cec5SDimitry Andric         if (UniqRegs.count(BaseReg))
52590b57cec5SDimitry Andric           continue;
52600b57cec5SDimitry Andric         FRegNum += RegNumMap[BaseReg] / LU.getNotSelectedProbability(BaseReg);
52610b57cec5SDimitry Andric         if (isa<SCEVAddRecExpr>(BaseReg))
52620b57cec5SDimitry Andric           FARegNum +=
52630b57cec5SDimitry Andric               RegNumMap[BaseReg] / LU.getNotSelectedProbability(BaseReg);
52640b57cec5SDimitry Andric       }
52650b57cec5SDimitry Andric       if (const SCEV *ScaledReg = F.ScaledReg) {
52660b57cec5SDimitry Andric         if (!UniqRegs.count(ScaledReg)) {
52670b57cec5SDimitry Andric           FRegNum +=
52680b57cec5SDimitry Andric               RegNumMap[ScaledReg] / LU.getNotSelectedProbability(ScaledReg);
52690b57cec5SDimitry Andric           if (isa<SCEVAddRecExpr>(ScaledReg))
52700b57cec5SDimitry Andric             FARegNum +=
52710b57cec5SDimitry Andric                 RegNumMap[ScaledReg] / LU.getNotSelectedProbability(ScaledReg);
52720b57cec5SDimitry Andric         }
52730b57cec5SDimitry Andric       }
52740b57cec5SDimitry Andric       if (FMinRegNum > FRegNum ||
52750b57cec5SDimitry Andric           (FMinRegNum == FRegNum && FMinARegNum > FARegNum)) {
52760b57cec5SDimitry Andric         FMinRegNum = FRegNum;
52770b57cec5SDimitry Andric         FMinARegNum = FARegNum;
52780b57cec5SDimitry Andric         MinIdx = i;
52790b57cec5SDimitry Andric       }
52800b57cec5SDimitry Andric     }
52810b57cec5SDimitry Andric     LLVM_DEBUG(dbgs() << "  The formula "; LU.Formulae[MinIdx].print(dbgs());
52820b57cec5SDimitry Andric                dbgs() << " with min reg num " << FMinRegNum << '\n');
52830b57cec5SDimitry Andric     if (MinIdx != 0)
52840b57cec5SDimitry Andric       std::swap(LU.Formulae[MinIdx], LU.Formulae[0]);
52850b57cec5SDimitry Andric     while (LU.Formulae.size() != 1) {
52860b57cec5SDimitry Andric       LLVM_DEBUG(dbgs() << "  Deleting "; LU.Formulae.back().print(dbgs());
52870b57cec5SDimitry Andric                  dbgs() << '\n');
52880b57cec5SDimitry Andric       LU.Formulae.pop_back();
52890b57cec5SDimitry Andric     }
52900b57cec5SDimitry Andric     LU.RecomputeRegs(LUIdx, RegUses);
52910b57cec5SDimitry Andric     assert(LU.Formulae.size() == 1 && "Should be exactly 1 min regs formula");
52920b57cec5SDimitry Andric     Formula &F = LU.Formulae[0];
52930b57cec5SDimitry Andric     LLVM_DEBUG(dbgs() << "  Leaving only "; F.print(dbgs()); dbgs() << '\n');
52940b57cec5SDimitry Andric     // When we choose the formula, the regs become unique.
52950b57cec5SDimitry Andric     UniqRegs.insert(F.BaseRegs.begin(), F.BaseRegs.end());
52960b57cec5SDimitry Andric     if (F.ScaledReg)
52970b57cec5SDimitry Andric       UniqRegs.insert(F.ScaledReg);
52980b57cec5SDimitry Andric   }
52990b57cec5SDimitry Andric   LLVM_DEBUG(dbgs() << "After pre-selection:\n"; print_uses(dbgs()));
53000b57cec5SDimitry Andric }
53010b57cec5SDimitry Andric 
530206c3fb27SDimitry Andric // Check if Best and Reg are SCEVs separated by a constant amount C, and if so
530306c3fb27SDimitry Andric // would the addressing offset +C would be legal where the negative offset -C is
530406c3fb27SDimitry Andric // not.
530506c3fb27SDimitry Andric static bool IsSimplerBaseSCEVForTarget(const TargetTransformInfo &TTI,
530606c3fb27SDimitry Andric                                        ScalarEvolution &SE, const SCEV *Best,
530706c3fb27SDimitry Andric                                        const SCEV *Reg,
530806c3fb27SDimitry Andric                                        MemAccessTy AccessType) {
530906c3fb27SDimitry Andric   if (Best->getType() != Reg->getType() ||
531006c3fb27SDimitry Andric       (isa<SCEVAddRecExpr>(Best) && isa<SCEVAddRecExpr>(Reg) &&
531106c3fb27SDimitry Andric        cast<SCEVAddRecExpr>(Best)->getLoop() !=
531206c3fb27SDimitry Andric            cast<SCEVAddRecExpr>(Reg)->getLoop()))
531306c3fb27SDimitry Andric     return false;
531406c3fb27SDimitry Andric   const auto *Diff = dyn_cast<SCEVConstant>(SE.getMinusSCEV(Best, Reg));
531506c3fb27SDimitry Andric   if (!Diff)
531606c3fb27SDimitry Andric     return false;
531706c3fb27SDimitry Andric 
531806c3fb27SDimitry Andric   return TTI.isLegalAddressingMode(
531906c3fb27SDimitry Andric              AccessType.MemTy, /*BaseGV=*/nullptr,
532006c3fb27SDimitry Andric              /*BaseOffset=*/Diff->getAPInt().getSExtValue(),
532106c3fb27SDimitry Andric              /*HasBaseReg=*/true, /*Scale=*/0, AccessType.AddrSpace) &&
532206c3fb27SDimitry Andric          !TTI.isLegalAddressingMode(
532306c3fb27SDimitry Andric              AccessType.MemTy, /*BaseGV=*/nullptr,
532406c3fb27SDimitry Andric              /*BaseOffset=*/-Diff->getAPInt().getSExtValue(),
532506c3fb27SDimitry Andric              /*HasBaseReg=*/true, /*Scale=*/0, AccessType.AddrSpace);
532606c3fb27SDimitry Andric }
532706c3fb27SDimitry Andric 
53280b57cec5SDimitry Andric /// Pick a register which seems likely to be profitable, and then in any use
53290b57cec5SDimitry Andric /// which has any reference to that register, delete all formulae which do not
53300b57cec5SDimitry Andric /// reference that register.
53310b57cec5SDimitry Andric void LSRInstance::NarrowSearchSpaceByPickingWinnerRegs() {
53320b57cec5SDimitry Andric   // With all other options exhausted, loop until the system is simple
53330b57cec5SDimitry Andric   // enough to handle.
53340b57cec5SDimitry Andric   SmallPtrSet<const SCEV *, 4> Taken;
53350b57cec5SDimitry Andric   while (EstimateSearchSpaceComplexity() >= ComplexityLimit) {
53360b57cec5SDimitry Andric     // Ok, we have too many of formulae on our hands to conveniently handle.
53370b57cec5SDimitry Andric     // Use a rough heuristic to thin out the list.
53380b57cec5SDimitry Andric     LLVM_DEBUG(dbgs() << "The search space is too complex.\n");
53390b57cec5SDimitry Andric 
53400b57cec5SDimitry Andric     // Pick the register which is used by the most LSRUses, which is likely
53410b57cec5SDimitry Andric     // to be a good reuse register candidate.
53420b57cec5SDimitry Andric     const SCEV *Best = nullptr;
53430b57cec5SDimitry Andric     unsigned BestNum = 0;
53440b57cec5SDimitry Andric     for (const SCEV *Reg : RegUses) {
53450b57cec5SDimitry Andric       if (Taken.count(Reg))
53460b57cec5SDimitry Andric         continue;
53470b57cec5SDimitry Andric       if (!Best) {
53480b57cec5SDimitry Andric         Best = Reg;
53490b57cec5SDimitry Andric         BestNum = RegUses.getUsedByIndices(Reg).count();
53500b57cec5SDimitry Andric       } else {
53510b57cec5SDimitry Andric         unsigned Count = RegUses.getUsedByIndices(Reg).count();
53520b57cec5SDimitry Andric         if (Count > BestNum) {
53530b57cec5SDimitry Andric           Best = Reg;
53540b57cec5SDimitry Andric           BestNum = Count;
53550b57cec5SDimitry Andric         }
535606c3fb27SDimitry Andric 
535706c3fb27SDimitry Andric         // If the scores are the same, but the Reg is simpler for the target
535806c3fb27SDimitry Andric         // (for example {x,+,1} as opposed to {x+C,+,1}, where the target can
535906c3fb27SDimitry Andric         // handle +C but not -C), opt for the simpler formula.
536006c3fb27SDimitry Andric         if (Count == BestNum) {
536106c3fb27SDimitry Andric           int LUIdx = RegUses.getUsedByIndices(Reg).find_first();
536206c3fb27SDimitry Andric           if (LUIdx >= 0 && Uses[LUIdx].Kind == LSRUse::Address &&
536306c3fb27SDimitry Andric               IsSimplerBaseSCEVForTarget(TTI, SE, Best, Reg,
536406c3fb27SDimitry Andric                                          Uses[LUIdx].AccessTy)) {
536506c3fb27SDimitry Andric             Best = Reg;
536606c3fb27SDimitry Andric             BestNum = Count;
536706c3fb27SDimitry Andric           }
536806c3fb27SDimitry Andric         }
53690b57cec5SDimitry Andric       }
53700b57cec5SDimitry Andric     }
53718bcb0991SDimitry Andric     assert(Best && "Failed to find best LSRUse candidate");
53720b57cec5SDimitry Andric 
53730b57cec5SDimitry Andric     LLVM_DEBUG(dbgs() << "Narrowing the search space by assuming " << *Best
53740b57cec5SDimitry Andric                       << " will yield profitable reuse.\n");
53750b57cec5SDimitry Andric     Taken.insert(Best);
53760b57cec5SDimitry Andric 
53770b57cec5SDimitry Andric     // In any use with formulae which references this register, delete formulae
53780b57cec5SDimitry Andric     // which don't reference it.
53790b57cec5SDimitry Andric     for (size_t LUIdx = 0, NumUses = Uses.size(); LUIdx != NumUses; ++LUIdx) {
53800b57cec5SDimitry Andric       LSRUse &LU = Uses[LUIdx];
53810b57cec5SDimitry Andric       if (!LU.Regs.count(Best)) continue;
53820b57cec5SDimitry Andric 
53830b57cec5SDimitry Andric       bool Any = false;
53840b57cec5SDimitry Andric       for (size_t i = 0, e = LU.Formulae.size(); i != e; ++i) {
53850b57cec5SDimitry Andric         Formula &F = LU.Formulae[i];
53860b57cec5SDimitry Andric         if (!F.referencesReg(Best)) {
53870b57cec5SDimitry Andric           LLVM_DEBUG(dbgs() << "  Deleting "; F.print(dbgs()); dbgs() << '\n');
53880b57cec5SDimitry Andric           LU.DeleteFormula(F);
53890b57cec5SDimitry Andric           --e;
53900b57cec5SDimitry Andric           --i;
53910b57cec5SDimitry Andric           Any = true;
53920b57cec5SDimitry Andric           assert(e != 0 && "Use has no formulae left! Is Regs inconsistent?");
53930b57cec5SDimitry Andric           continue;
53940b57cec5SDimitry Andric         }
53950b57cec5SDimitry Andric       }
53960b57cec5SDimitry Andric 
53970b57cec5SDimitry Andric       if (Any)
53980b57cec5SDimitry Andric         LU.RecomputeRegs(LUIdx, RegUses);
53990b57cec5SDimitry Andric     }
54000b57cec5SDimitry Andric 
54010b57cec5SDimitry Andric     LLVM_DEBUG(dbgs() << "After pre-selection:\n"; print_uses(dbgs()));
54020b57cec5SDimitry Andric   }
54030b57cec5SDimitry Andric }
54040b57cec5SDimitry Andric 
54050b57cec5SDimitry Andric /// If there are an extraordinary number of formulae to choose from, use some
54060b57cec5SDimitry Andric /// rough heuristics to prune down the number of formulae. This keeps the main
54070b57cec5SDimitry Andric /// solver from taking an extraordinary amount of time in some worst-case
54080b57cec5SDimitry Andric /// scenarios.
54090b57cec5SDimitry Andric void LSRInstance::NarrowSearchSpaceUsingHeuristics() {
54100b57cec5SDimitry Andric   NarrowSearchSpaceByDetectingSupersets();
54110b57cec5SDimitry Andric   NarrowSearchSpaceByCollapsingUnrolledCode();
54120b57cec5SDimitry Andric   NarrowSearchSpaceByRefilteringUndesirableDedicatedRegisters();
54130b57cec5SDimitry Andric   if (FilterSameScaledReg)
54140b57cec5SDimitry Andric     NarrowSearchSpaceByFilterFormulaWithSameScaledReg();
54155ffd83dbSDimitry Andric   NarrowSearchSpaceByFilterPostInc();
54160b57cec5SDimitry Andric   if (LSRExpNarrow)
54170b57cec5SDimitry Andric     NarrowSearchSpaceByDeletingCostlyFormulas();
54180b57cec5SDimitry Andric   else
54190b57cec5SDimitry Andric     NarrowSearchSpaceByPickingWinnerRegs();
54200b57cec5SDimitry Andric }
54210b57cec5SDimitry Andric 
54220b57cec5SDimitry Andric /// This is the recursive solver.
54230b57cec5SDimitry Andric void LSRInstance::SolveRecurse(SmallVectorImpl<const Formula *> &Solution,
54240b57cec5SDimitry Andric                                Cost &SolutionCost,
54250b57cec5SDimitry Andric                                SmallVectorImpl<const Formula *> &Workspace,
54260b57cec5SDimitry Andric                                const Cost &CurCost,
54270b57cec5SDimitry Andric                                const SmallPtrSet<const SCEV *, 16> &CurRegs,
54280b57cec5SDimitry Andric                                DenseSet<const SCEV *> &VisitedRegs) const {
54290b57cec5SDimitry Andric   // Some ideas:
54300b57cec5SDimitry Andric   //  - prune more:
54310b57cec5SDimitry Andric   //    - use more aggressive filtering
54320b57cec5SDimitry Andric   //    - sort the formula so that the most profitable solutions are found first
54330b57cec5SDimitry Andric   //    - sort the uses too
54340b57cec5SDimitry Andric   //  - search faster:
54350b57cec5SDimitry Andric   //    - don't compute a cost, and then compare. compare while computing a cost
54360b57cec5SDimitry Andric   //      and bail early.
54370b57cec5SDimitry Andric   //    - track register sets with SmallBitVector
54380b57cec5SDimitry Andric 
54390b57cec5SDimitry Andric   const LSRUse &LU = Uses[Workspace.size()];
54400b57cec5SDimitry Andric 
54410b57cec5SDimitry Andric   // If this use references any register that's already a part of the
54420b57cec5SDimitry Andric   // in-progress solution, consider it a requirement that a formula must
54430b57cec5SDimitry Andric   // reference that register in order to be considered. This prunes out
54440b57cec5SDimitry Andric   // unprofitable searching.
54450b57cec5SDimitry Andric   SmallSetVector<const SCEV *, 4> ReqRegs;
54460b57cec5SDimitry Andric   for (const SCEV *S : CurRegs)
54470b57cec5SDimitry Andric     if (LU.Regs.count(S))
54480b57cec5SDimitry Andric       ReqRegs.insert(S);
54490b57cec5SDimitry Andric 
54500b57cec5SDimitry Andric   SmallPtrSet<const SCEV *, 16> NewRegs;
5451fe6060f1SDimitry Andric   Cost NewCost(L, SE, TTI, AMK);
54520b57cec5SDimitry Andric   for (const Formula &F : LU.Formulae) {
54530b57cec5SDimitry Andric     // Ignore formulae which may not be ideal in terms of register reuse of
54540b57cec5SDimitry Andric     // ReqRegs.  The formula should use all required registers before
54550b57cec5SDimitry Andric     // introducing new ones.
54565ffd83dbSDimitry Andric     // This can sometimes (notably when trying to favour postinc) lead to
54575ffd83dbSDimitry Andric     // sub-optimial decisions. There it is best left to the cost modelling to
54585ffd83dbSDimitry Andric     // get correct.
5459fe6060f1SDimitry Andric     if (AMK != TTI::AMK_PostIndexed || LU.Kind != LSRUse::Address) {
54600b57cec5SDimitry Andric       int NumReqRegsToFind = std::min(F.getNumRegs(), ReqRegs.size());
54610b57cec5SDimitry Andric       for (const SCEV *Reg : ReqRegs) {
54620b57cec5SDimitry Andric         if ((F.ScaledReg && F.ScaledReg == Reg) ||
54630b57cec5SDimitry Andric             is_contained(F.BaseRegs, Reg)) {
54640b57cec5SDimitry Andric           --NumReqRegsToFind;
54650b57cec5SDimitry Andric           if (NumReqRegsToFind == 0)
54660b57cec5SDimitry Andric             break;
54670b57cec5SDimitry Andric         }
54680b57cec5SDimitry Andric       }
54690b57cec5SDimitry Andric       if (NumReqRegsToFind != 0) {
54700b57cec5SDimitry Andric         // If none of the formulae satisfied the required registers, then we could
54710b57cec5SDimitry Andric         // clear ReqRegs and try again. Currently, we simply give up in this case.
54720b57cec5SDimitry Andric         continue;
54730b57cec5SDimitry Andric       }
54745ffd83dbSDimitry Andric     }
54750b57cec5SDimitry Andric 
54760b57cec5SDimitry Andric     // Evaluate the cost of the current formula. If it's already worse than
54770b57cec5SDimitry Andric     // the current best, prune the search at that point.
54780b57cec5SDimitry Andric     NewCost = CurCost;
54790b57cec5SDimitry Andric     NewRegs = CurRegs;
54800b57cec5SDimitry Andric     NewCost.RateFormula(F, NewRegs, VisitedRegs, LU);
54810b57cec5SDimitry Andric     if (NewCost.isLess(SolutionCost)) {
54820b57cec5SDimitry Andric       Workspace.push_back(&F);
54830b57cec5SDimitry Andric       if (Workspace.size() != Uses.size()) {
54840b57cec5SDimitry Andric         SolveRecurse(Solution, SolutionCost, Workspace, NewCost,
54850b57cec5SDimitry Andric                      NewRegs, VisitedRegs);
54860b57cec5SDimitry Andric         if (F.getNumRegs() == 1 && Workspace.size() == 1)
54870b57cec5SDimitry Andric           VisitedRegs.insert(F.ScaledReg ? F.ScaledReg : F.BaseRegs[0]);
54880b57cec5SDimitry Andric       } else {
54890b57cec5SDimitry Andric         LLVM_DEBUG(dbgs() << "New best at "; NewCost.print(dbgs());
54900b57cec5SDimitry Andric                    dbgs() << ".\nRegs:\n";
54910b57cec5SDimitry Andric                    for (const SCEV *S : NewRegs) dbgs()
54920b57cec5SDimitry Andric                       << "- " << *S << "\n";
54930b57cec5SDimitry Andric                    dbgs() << '\n');
54940b57cec5SDimitry Andric 
54950b57cec5SDimitry Andric         SolutionCost = NewCost;
54960b57cec5SDimitry Andric         Solution = Workspace;
54970b57cec5SDimitry Andric       }
54980b57cec5SDimitry Andric       Workspace.pop_back();
54990b57cec5SDimitry Andric     }
55000b57cec5SDimitry Andric   }
55010b57cec5SDimitry Andric }
55020b57cec5SDimitry Andric 
55030b57cec5SDimitry Andric /// Choose one formula from each use. Return the results in the given Solution
55040b57cec5SDimitry Andric /// vector.
55050b57cec5SDimitry Andric void LSRInstance::Solve(SmallVectorImpl<const Formula *> &Solution) const {
55060b57cec5SDimitry Andric   SmallVector<const Formula *, 8> Workspace;
5507fe6060f1SDimitry Andric   Cost SolutionCost(L, SE, TTI, AMK);
55080b57cec5SDimitry Andric   SolutionCost.Lose();
5509fe6060f1SDimitry Andric   Cost CurCost(L, SE, TTI, AMK);
55100b57cec5SDimitry Andric   SmallPtrSet<const SCEV *, 16> CurRegs;
55110b57cec5SDimitry Andric   DenseSet<const SCEV *> VisitedRegs;
55120b57cec5SDimitry Andric   Workspace.reserve(Uses.size());
55130b57cec5SDimitry Andric 
55140b57cec5SDimitry Andric   // SolveRecurse does all the work.
55150b57cec5SDimitry Andric   SolveRecurse(Solution, SolutionCost, Workspace, CurCost,
55160b57cec5SDimitry Andric                CurRegs, VisitedRegs);
55170b57cec5SDimitry Andric   if (Solution.empty()) {
55180b57cec5SDimitry Andric     LLVM_DEBUG(dbgs() << "\nNo Satisfactory Solution\n");
55190b57cec5SDimitry Andric     return;
55200b57cec5SDimitry Andric   }
55210b57cec5SDimitry Andric 
55220b57cec5SDimitry Andric   // Ok, we've now made all our decisions.
55230b57cec5SDimitry Andric   LLVM_DEBUG(dbgs() << "\n"
55240b57cec5SDimitry Andric                        "The chosen solution requires ";
55250b57cec5SDimitry Andric              SolutionCost.print(dbgs()); dbgs() << ":\n";
55260b57cec5SDimitry Andric              for (size_t i = 0, e = Uses.size(); i != e; ++i) {
55270b57cec5SDimitry Andric                dbgs() << "  ";
55280b57cec5SDimitry Andric                Uses[i].print(dbgs());
55290b57cec5SDimitry Andric                dbgs() << "\n"
55300b57cec5SDimitry Andric                          "    ";
55310b57cec5SDimitry Andric                Solution[i]->print(dbgs());
55320b57cec5SDimitry Andric                dbgs() << '\n';
55330b57cec5SDimitry Andric              });
55340b57cec5SDimitry Andric 
55350b57cec5SDimitry Andric   assert(Solution.size() == Uses.size() && "Malformed solution!");
5536bdd1243dSDimitry Andric 
55370fca6ea1SDimitry Andric   const bool EnableDropUnprofitableSolution = [&] {
55380fca6ea1SDimitry Andric     switch (AllowDropSolutionIfLessProfitable) {
55390fca6ea1SDimitry Andric     case cl::BOU_TRUE:
55400fca6ea1SDimitry Andric       return true;
55410fca6ea1SDimitry Andric     case cl::BOU_FALSE:
55420fca6ea1SDimitry Andric       return false;
55430fca6ea1SDimitry Andric     case cl::BOU_UNSET:
55440fca6ea1SDimitry Andric       return TTI.shouldDropLSRSolutionIfLessProfitable();
55450fca6ea1SDimitry Andric     }
55460fca6ea1SDimitry Andric     llvm_unreachable("Unhandled cl::boolOrDefault enum");
55470fca6ea1SDimitry Andric   }();
55480fca6ea1SDimitry Andric 
5549bdd1243dSDimitry Andric   if (BaselineCost.isLess(SolutionCost)) {
55500fca6ea1SDimitry Andric     if (!EnableDropUnprofitableSolution)
5551bdd1243dSDimitry Andric       LLVM_DEBUG(
5552bdd1243dSDimitry Andric           dbgs() << "Baseline is more profitable than chosen solution, "
5553bdd1243dSDimitry Andric                     "add option 'lsr-drop-solution' to drop LSR solution.\n");
5554bdd1243dSDimitry Andric     else {
5555bdd1243dSDimitry Andric       LLVM_DEBUG(dbgs() << "Baseline is more profitable than chosen "
5556bdd1243dSDimitry Andric                            "solution, dropping LSR solution.\n";);
5557bdd1243dSDimitry Andric       Solution.clear();
5558bdd1243dSDimitry Andric     }
5559bdd1243dSDimitry Andric   }
55600b57cec5SDimitry Andric }
55610b57cec5SDimitry Andric 
55620b57cec5SDimitry Andric /// Helper for AdjustInsertPositionForExpand. Climb up the dominator tree far as
55630b57cec5SDimitry Andric /// we can go while still being dominated by the input positions. This helps
55640b57cec5SDimitry Andric /// canonicalize the insert position, which encourages sharing.
55650b57cec5SDimitry Andric BasicBlock::iterator
55660b57cec5SDimitry Andric LSRInstance::HoistInsertPosition(BasicBlock::iterator IP,
55670b57cec5SDimitry Andric                                  const SmallVectorImpl<Instruction *> &Inputs)
55680b57cec5SDimitry Andric                                                                          const {
55690b57cec5SDimitry Andric   Instruction *Tentative = &*IP;
55700b57cec5SDimitry Andric   while (true) {
55710b57cec5SDimitry Andric     bool AllDominate = true;
55720b57cec5SDimitry Andric     Instruction *BetterPos = nullptr;
55730b57cec5SDimitry Andric     // Don't bother attempting to insert before a catchswitch, their basic block
55740b57cec5SDimitry Andric     // cannot have other non-PHI instructions.
55750b57cec5SDimitry Andric     if (isa<CatchSwitchInst>(Tentative))
55760b57cec5SDimitry Andric       return IP;
55770b57cec5SDimitry Andric 
55780b57cec5SDimitry Andric     for (Instruction *Inst : Inputs) {
55790b57cec5SDimitry Andric       if (Inst == Tentative || !DT.dominates(Inst, Tentative)) {
55800b57cec5SDimitry Andric         AllDominate = false;
55810b57cec5SDimitry Andric         break;
55820b57cec5SDimitry Andric       }
55830b57cec5SDimitry Andric       // Attempt to find an insert position in the middle of the block,
55840b57cec5SDimitry Andric       // instead of at the end, so that it can be used for other expansions.
55850b57cec5SDimitry Andric       if (Tentative->getParent() == Inst->getParent() &&
55860b57cec5SDimitry Andric           (!BetterPos || !DT.dominates(Inst, BetterPos)))
55870b57cec5SDimitry Andric         BetterPos = &*std::next(BasicBlock::iterator(Inst));
55880b57cec5SDimitry Andric     }
55890b57cec5SDimitry Andric     if (!AllDominate)
55900b57cec5SDimitry Andric       break;
55910b57cec5SDimitry Andric     if (BetterPos)
55920b57cec5SDimitry Andric       IP = BetterPos->getIterator();
55930b57cec5SDimitry Andric     else
55940b57cec5SDimitry Andric       IP = Tentative->getIterator();
55950b57cec5SDimitry Andric 
55960b57cec5SDimitry Andric     const Loop *IPLoop = LI.getLoopFor(IP->getParent());
55970b57cec5SDimitry Andric     unsigned IPLoopDepth = IPLoop ? IPLoop->getLoopDepth() : 0;
55980b57cec5SDimitry Andric 
55990b57cec5SDimitry Andric     BasicBlock *IDom;
56000b57cec5SDimitry Andric     for (DomTreeNode *Rung = DT.getNode(IP->getParent()); ; ) {
56010b57cec5SDimitry Andric       if (!Rung) return IP;
56020b57cec5SDimitry Andric       Rung = Rung->getIDom();
56030b57cec5SDimitry Andric       if (!Rung) return IP;
56040b57cec5SDimitry Andric       IDom = Rung->getBlock();
56050b57cec5SDimitry Andric 
56060b57cec5SDimitry Andric       // Don't climb into a loop though.
56070b57cec5SDimitry Andric       const Loop *IDomLoop = LI.getLoopFor(IDom);
56080b57cec5SDimitry Andric       unsigned IDomDepth = IDomLoop ? IDomLoop->getLoopDepth() : 0;
56090b57cec5SDimitry Andric       if (IDomDepth <= IPLoopDepth &&
56100b57cec5SDimitry Andric           (IDomDepth != IPLoopDepth || IDomLoop == IPLoop))
56110b57cec5SDimitry Andric         break;
56120b57cec5SDimitry Andric     }
56130b57cec5SDimitry Andric 
56140b57cec5SDimitry Andric     Tentative = IDom->getTerminator();
56150b57cec5SDimitry Andric   }
56160b57cec5SDimitry Andric 
56170b57cec5SDimitry Andric   return IP;
56180b57cec5SDimitry Andric }
56190b57cec5SDimitry Andric 
56200b57cec5SDimitry Andric /// Determine an input position which will be dominated by the operands and
56210b57cec5SDimitry Andric /// which will dominate the result.
5622fcaf7f86SDimitry Andric BasicBlock::iterator LSRInstance::AdjustInsertPositionForExpand(
5623fcaf7f86SDimitry Andric     BasicBlock::iterator LowestIP, const LSRFixup &LF, const LSRUse &LU) const {
56240b57cec5SDimitry Andric   // Collect some instructions which must be dominated by the
56250b57cec5SDimitry Andric   // expanding replacement. These must be dominated by any operands that
56260b57cec5SDimitry Andric   // will be required in the expansion.
56270b57cec5SDimitry Andric   SmallVector<Instruction *, 4> Inputs;
56280b57cec5SDimitry Andric   if (Instruction *I = dyn_cast<Instruction>(LF.OperandValToReplace))
56290b57cec5SDimitry Andric     Inputs.push_back(I);
56300b57cec5SDimitry Andric   if (LU.Kind == LSRUse::ICmpZero)
56310b57cec5SDimitry Andric     if (Instruction *I =
56320b57cec5SDimitry Andric           dyn_cast<Instruction>(cast<ICmpInst>(LF.UserInst)->getOperand(1)))
56330b57cec5SDimitry Andric       Inputs.push_back(I);
56340b57cec5SDimitry Andric   if (LF.PostIncLoops.count(L)) {
56350b57cec5SDimitry Andric     if (LF.isUseFullyOutsideLoop(L))
56360b57cec5SDimitry Andric       Inputs.push_back(L->getLoopLatch()->getTerminator());
56370b57cec5SDimitry Andric     else
56380b57cec5SDimitry Andric       Inputs.push_back(IVIncInsertPos);
56390b57cec5SDimitry Andric   }
56400b57cec5SDimitry Andric   // The expansion must also be dominated by the increment positions of any
56410b57cec5SDimitry Andric   // loops it for which it is using post-inc mode.
56420b57cec5SDimitry Andric   for (const Loop *PIL : LF.PostIncLoops) {
56430b57cec5SDimitry Andric     if (PIL == L) continue;
56440b57cec5SDimitry Andric 
56450b57cec5SDimitry Andric     // Be dominated by the loop exit.
56460b57cec5SDimitry Andric     SmallVector<BasicBlock *, 4> ExitingBlocks;
56470b57cec5SDimitry Andric     PIL->getExitingBlocks(ExitingBlocks);
56480b57cec5SDimitry Andric     if (!ExitingBlocks.empty()) {
56490b57cec5SDimitry Andric       BasicBlock *BB = ExitingBlocks[0];
56500b57cec5SDimitry Andric       for (unsigned i = 1, e = ExitingBlocks.size(); i != e; ++i)
56510b57cec5SDimitry Andric         BB = DT.findNearestCommonDominator(BB, ExitingBlocks[i]);
56520b57cec5SDimitry Andric       Inputs.push_back(BB->getTerminator());
56530b57cec5SDimitry Andric     }
56540b57cec5SDimitry Andric   }
56550b57cec5SDimitry Andric 
56560b57cec5SDimitry Andric   assert(!isa<PHINode>(LowestIP) && !LowestIP->isEHPad()
56570b57cec5SDimitry Andric          && !isa<DbgInfoIntrinsic>(LowestIP) &&
56580b57cec5SDimitry Andric          "Insertion point must be a normal instruction");
56590b57cec5SDimitry Andric 
56600b57cec5SDimitry Andric   // Then, climb up the immediate dominator tree as far as we can go while
56610b57cec5SDimitry Andric   // still being dominated by the input positions.
56620b57cec5SDimitry Andric   BasicBlock::iterator IP = HoistInsertPosition(LowestIP, Inputs);
56630b57cec5SDimitry Andric 
56640b57cec5SDimitry Andric   // Don't insert instructions before PHI nodes.
56650b57cec5SDimitry Andric   while (isa<PHINode>(IP)) ++IP;
56660b57cec5SDimitry Andric 
56670b57cec5SDimitry Andric   // Ignore landingpad instructions.
56680b57cec5SDimitry Andric   while (IP->isEHPad()) ++IP;
56690b57cec5SDimitry Andric 
56700b57cec5SDimitry Andric   // Ignore debug intrinsics.
56710b57cec5SDimitry Andric   while (isa<DbgInfoIntrinsic>(IP)) ++IP;
56720b57cec5SDimitry Andric 
56730b57cec5SDimitry Andric   // Set IP below instructions recently inserted by SCEVExpander. This keeps the
56740b57cec5SDimitry Andric   // IP consistent across expansions and allows the previously inserted
56750b57cec5SDimitry Andric   // instructions to be reused by subsequent expansion.
56760b57cec5SDimitry Andric   while (Rewriter.isInsertedInstruction(&*IP) && IP != LowestIP)
56770b57cec5SDimitry Andric     ++IP;
56780b57cec5SDimitry Andric 
56790b57cec5SDimitry Andric   return IP;
56800b57cec5SDimitry Andric }
56810b57cec5SDimitry Andric 
56820b57cec5SDimitry Andric /// Emit instructions for the leading candidate expression for this LSRUse (this
56830b57cec5SDimitry Andric /// is called "expanding").
56840b57cec5SDimitry Andric Value *LSRInstance::Expand(const LSRUse &LU, const LSRFixup &LF,
56850b57cec5SDimitry Andric                            const Formula &F, BasicBlock::iterator IP,
56860b57cec5SDimitry Andric                            SmallVectorImpl<WeakTrackingVH> &DeadInsts) const {
56870b57cec5SDimitry Andric   if (LU.RigidFormula)
56880b57cec5SDimitry Andric     return LF.OperandValToReplace;
56890b57cec5SDimitry Andric 
56900b57cec5SDimitry Andric   // Determine an input position which will be dominated by the operands and
56910b57cec5SDimitry Andric   // which will dominate the result.
5692fcaf7f86SDimitry Andric   IP = AdjustInsertPositionForExpand(IP, LF, LU);
56930b57cec5SDimitry Andric   Rewriter.setInsertPoint(&*IP);
56940b57cec5SDimitry Andric 
56950b57cec5SDimitry Andric   // Inform the Rewriter if we have a post-increment use, so that it can
56960b57cec5SDimitry Andric   // perform an advantageous expansion.
56970b57cec5SDimitry Andric   Rewriter.setPostInc(LF.PostIncLoops);
56980b57cec5SDimitry Andric 
56990b57cec5SDimitry Andric   // This is the type that the user actually needs.
57000b57cec5SDimitry Andric   Type *OpTy = LF.OperandValToReplace->getType();
57010b57cec5SDimitry Andric   // This will be the type that we'll initially expand to.
57020b57cec5SDimitry Andric   Type *Ty = F.getType();
57030b57cec5SDimitry Andric   if (!Ty)
57040b57cec5SDimitry Andric     // No type known; just expand directly to the ultimate type.
57050b57cec5SDimitry Andric     Ty = OpTy;
57060b57cec5SDimitry Andric   else if (SE.getEffectiveSCEVType(Ty) == SE.getEffectiveSCEVType(OpTy))
57070b57cec5SDimitry Andric     // Expand directly to the ultimate type if it's the right size.
57080b57cec5SDimitry Andric     Ty = OpTy;
57090b57cec5SDimitry Andric   // This is the type to do integer arithmetic in.
57100b57cec5SDimitry Andric   Type *IntTy = SE.getEffectiveSCEVType(Ty);
57110b57cec5SDimitry Andric 
57120b57cec5SDimitry Andric   // Build up a list of operands to add together to form the full base.
57130b57cec5SDimitry Andric   SmallVector<const SCEV *, 8> Ops;
57140b57cec5SDimitry Andric 
57150b57cec5SDimitry Andric   // Expand the BaseRegs portion.
57160b57cec5SDimitry Andric   for (const SCEV *Reg : F.BaseRegs) {
57170b57cec5SDimitry Andric     assert(!Reg->isZero() && "Zero allocated in a base register!");
57180b57cec5SDimitry Andric 
57190b57cec5SDimitry Andric     // If we're expanding for a post-inc user, make the post-inc adjustment.
57200b57cec5SDimitry Andric     Reg = denormalizeForPostIncUse(Reg, LF.PostIncLoops, SE);
57210b57cec5SDimitry Andric     Ops.push_back(SE.getUnknown(Rewriter.expandCodeFor(Reg, nullptr)));
57220b57cec5SDimitry Andric   }
57230b57cec5SDimitry Andric 
57240b57cec5SDimitry Andric   // Expand the ScaledReg portion.
57250b57cec5SDimitry Andric   Value *ICmpScaledV = nullptr;
57260b57cec5SDimitry Andric   if (F.Scale != 0) {
57270b57cec5SDimitry Andric     const SCEV *ScaledS = F.ScaledReg;
57280b57cec5SDimitry Andric 
57290b57cec5SDimitry Andric     // If we're expanding for a post-inc user, make the post-inc adjustment.
57300b57cec5SDimitry Andric     PostIncLoopSet &Loops = const_cast<PostIncLoopSet &>(LF.PostIncLoops);
57310b57cec5SDimitry Andric     ScaledS = denormalizeForPostIncUse(ScaledS, Loops, SE);
57320b57cec5SDimitry Andric 
57330b57cec5SDimitry Andric     if (LU.Kind == LSRUse::ICmpZero) {
57340b57cec5SDimitry Andric       // Expand ScaleReg as if it was part of the base regs.
57350b57cec5SDimitry Andric       if (F.Scale == 1)
57360b57cec5SDimitry Andric         Ops.push_back(
57370b57cec5SDimitry Andric             SE.getUnknown(Rewriter.expandCodeFor(ScaledS, nullptr)));
57380b57cec5SDimitry Andric       else {
57390b57cec5SDimitry Andric         // An interesting way of "folding" with an icmp is to use a negated
57400b57cec5SDimitry Andric         // scale, which we'll implement by inserting it into the other operand
57410b57cec5SDimitry Andric         // of the icmp.
57420b57cec5SDimitry Andric         assert(F.Scale == -1 &&
57430b57cec5SDimitry Andric                "The only scale supported by ICmpZero uses is -1!");
57440b57cec5SDimitry Andric         ICmpScaledV = Rewriter.expandCodeFor(ScaledS, nullptr);
57450b57cec5SDimitry Andric       }
57460b57cec5SDimitry Andric     } else {
57470b57cec5SDimitry Andric       // Otherwise just expand the scaled register and an explicit scale,
57480b57cec5SDimitry Andric       // which is expected to be matched as part of the address.
57490b57cec5SDimitry Andric 
57500b57cec5SDimitry Andric       // Flush the operand list to suppress SCEVExpander hoisting address modes.
57510b57cec5SDimitry Andric       // Unless the addressing mode will not be folded.
57520b57cec5SDimitry Andric       if (!Ops.empty() && LU.Kind == LSRUse::Address &&
57530b57cec5SDimitry Andric           isAMCompletelyFolded(TTI, LU, F)) {
57540b57cec5SDimitry Andric         Value *FullV = Rewriter.expandCodeFor(SE.getAddExpr(Ops), nullptr);
57550b57cec5SDimitry Andric         Ops.clear();
57560b57cec5SDimitry Andric         Ops.push_back(SE.getUnknown(FullV));
57570b57cec5SDimitry Andric       }
57580b57cec5SDimitry Andric       ScaledS = SE.getUnknown(Rewriter.expandCodeFor(ScaledS, nullptr));
57590b57cec5SDimitry Andric       if (F.Scale != 1)
57600b57cec5SDimitry Andric         ScaledS =
57610b57cec5SDimitry Andric             SE.getMulExpr(ScaledS, SE.getConstant(ScaledS->getType(), F.Scale));
57620b57cec5SDimitry Andric       Ops.push_back(ScaledS);
57630b57cec5SDimitry Andric     }
57640b57cec5SDimitry Andric   }
57650b57cec5SDimitry Andric 
57660b57cec5SDimitry Andric   // Expand the GV portion.
57670b57cec5SDimitry Andric   if (F.BaseGV) {
57680b57cec5SDimitry Andric     // Flush the operand list to suppress SCEVExpander hoisting.
57690b57cec5SDimitry Andric     if (!Ops.empty()) {
5770fe6060f1SDimitry Andric       Value *FullV = Rewriter.expandCodeFor(SE.getAddExpr(Ops), IntTy);
57710b57cec5SDimitry Andric       Ops.clear();
57720b57cec5SDimitry Andric       Ops.push_back(SE.getUnknown(FullV));
57730b57cec5SDimitry Andric     }
57740b57cec5SDimitry Andric     Ops.push_back(SE.getUnknown(F.BaseGV));
57750b57cec5SDimitry Andric   }
57760b57cec5SDimitry Andric 
57770b57cec5SDimitry Andric   // Flush the operand list to suppress SCEVExpander hoisting of both folded and
57780b57cec5SDimitry Andric   // unfolded offsets. LSR assumes they both live next to their uses.
57790b57cec5SDimitry Andric   if (!Ops.empty()) {
57800b57cec5SDimitry Andric     Value *FullV = Rewriter.expandCodeFor(SE.getAddExpr(Ops), Ty);
57810b57cec5SDimitry Andric     Ops.clear();
57820b57cec5SDimitry Andric     Ops.push_back(SE.getUnknown(FullV));
57830b57cec5SDimitry Andric   }
57840b57cec5SDimitry Andric 
57850fca6ea1SDimitry Andric   // FIXME: Are we sure we won't get a mismatch here? Is there a way to bail
57860fca6ea1SDimitry Andric   // out at this point, or should we generate a SCEV adding together mixed
57870fca6ea1SDimitry Andric   // offsets?
57880fca6ea1SDimitry Andric   assert(F.BaseOffset.isCompatibleImmediate(LF.Offset) &&
57890fca6ea1SDimitry Andric          "Expanding mismatched offsets\n");
57900b57cec5SDimitry Andric   // Expand the immediate portion.
57910fca6ea1SDimitry Andric   Immediate Offset = F.BaseOffset.addUnsigned(LF.Offset);
57920fca6ea1SDimitry Andric   if (Offset.isNonZero()) {
57930b57cec5SDimitry Andric     if (LU.Kind == LSRUse::ICmpZero) {
57940b57cec5SDimitry Andric       // The other interesting way of "folding" with an ICmpZero is to use a
57950b57cec5SDimitry Andric       // negated immediate.
57960b57cec5SDimitry Andric       if (!ICmpScaledV)
57970fca6ea1SDimitry Andric         ICmpScaledV =
57980fca6ea1SDimitry Andric             ConstantInt::get(IntTy, -(uint64_t)Offset.getFixedValue());
57990b57cec5SDimitry Andric       else {
58000b57cec5SDimitry Andric         Ops.push_back(SE.getUnknown(ICmpScaledV));
58010fca6ea1SDimitry Andric         ICmpScaledV = ConstantInt::get(IntTy, Offset.getFixedValue());
58020b57cec5SDimitry Andric       }
58030b57cec5SDimitry Andric     } else {
58040b57cec5SDimitry Andric       // Just add the immediate values. These again are expected to be matched
58050b57cec5SDimitry Andric       // as part of the address.
58060fca6ea1SDimitry Andric       Ops.push_back(Offset.getUnknownSCEV(SE, IntTy));
58070b57cec5SDimitry Andric     }
58080b57cec5SDimitry Andric   }
58090b57cec5SDimitry Andric 
58100b57cec5SDimitry Andric   // Expand the unfolded offset portion.
58110fca6ea1SDimitry Andric   Immediate UnfoldedOffset = F.UnfoldedOffset;
58120fca6ea1SDimitry Andric   if (UnfoldedOffset.isNonZero()) {
58130b57cec5SDimitry Andric     // Just add the immediate values.
58140fca6ea1SDimitry Andric     Ops.push_back(UnfoldedOffset.getUnknownSCEV(SE, IntTy));
58150b57cec5SDimitry Andric   }
58160b57cec5SDimitry Andric 
58170b57cec5SDimitry Andric   // Emit instructions summing all the operands.
58180b57cec5SDimitry Andric   const SCEV *FullS = Ops.empty() ?
58190b57cec5SDimitry Andric                       SE.getConstant(IntTy, 0) :
58200b57cec5SDimitry Andric                       SE.getAddExpr(Ops);
58210b57cec5SDimitry Andric   Value *FullV = Rewriter.expandCodeFor(FullS, Ty);
58220b57cec5SDimitry Andric 
58230b57cec5SDimitry Andric   // We're done expanding now, so reset the rewriter.
58240b57cec5SDimitry Andric   Rewriter.clearPostInc();
58250b57cec5SDimitry Andric 
58260b57cec5SDimitry Andric   // An ICmpZero Formula represents an ICmp which we're handling as a
58270b57cec5SDimitry Andric   // comparison against zero. Now that we've expanded an expression for that
58280b57cec5SDimitry Andric   // form, update the ICmp's other operand.
58290b57cec5SDimitry Andric   if (LU.Kind == LSRUse::ICmpZero) {
58300b57cec5SDimitry Andric     ICmpInst *CI = cast<ICmpInst>(LF.UserInst);
58315ffd83dbSDimitry Andric     if (auto *OperandIsInstr = dyn_cast<Instruction>(CI->getOperand(1)))
58325ffd83dbSDimitry Andric       DeadInsts.emplace_back(OperandIsInstr);
58330b57cec5SDimitry Andric     assert(!F.BaseGV && "ICmp does not support folding a global value and "
58340b57cec5SDimitry Andric                            "a scale at the same time!");
58350b57cec5SDimitry Andric     if (F.Scale == -1) {
58360b57cec5SDimitry Andric       if (ICmpScaledV->getType() != OpTy) {
58370fca6ea1SDimitry Andric         Instruction *Cast = CastInst::Create(
58380fca6ea1SDimitry Andric             CastInst::getCastOpcode(ICmpScaledV, false, OpTy, false),
58390fca6ea1SDimitry Andric             ICmpScaledV, OpTy, "tmp", CI->getIterator());
58400b57cec5SDimitry Andric         ICmpScaledV = Cast;
58410b57cec5SDimitry Andric       }
58420b57cec5SDimitry Andric       CI->setOperand(1, ICmpScaledV);
58430b57cec5SDimitry Andric     } else {
58440b57cec5SDimitry Andric       // A scale of 1 means that the scale has been expanded as part of the
58450b57cec5SDimitry Andric       // base regs.
58460b57cec5SDimitry Andric       assert((F.Scale == 0 || F.Scale == 1) &&
58470b57cec5SDimitry Andric              "ICmp does not support folding a global value and "
58480b57cec5SDimitry Andric              "a scale at the same time!");
58490b57cec5SDimitry Andric       Constant *C = ConstantInt::getSigned(SE.getEffectiveSCEVType(OpTy),
58500fca6ea1SDimitry Andric                                            -(uint64_t)Offset.getFixedValue());
58515f757f3fSDimitry Andric       if (C->getType() != OpTy) {
58525f757f3fSDimitry Andric         C = ConstantFoldCastOperand(
58535f757f3fSDimitry Andric             CastInst::getCastOpcode(C, false, OpTy, false), C, OpTy,
58540fca6ea1SDimitry Andric             CI->getDataLayout());
58555f757f3fSDimitry Andric         assert(C && "Cast of ConstantInt should have folded");
58565f757f3fSDimitry Andric       }
58570b57cec5SDimitry Andric 
58580b57cec5SDimitry Andric       CI->setOperand(1, C);
58590b57cec5SDimitry Andric     }
58600b57cec5SDimitry Andric   }
58610b57cec5SDimitry Andric 
58620b57cec5SDimitry Andric   return FullV;
58630b57cec5SDimitry Andric }
58640b57cec5SDimitry Andric 
58650b57cec5SDimitry Andric /// Helper for Rewrite. PHI nodes are special because the use of their operands
58660b57cec5SDimitry Andric /// effectively happens in their predecessor blocks, so the expression may need
58670b57cec5SDimitry Andric /// to be expanded in multiple places.
58680b57cec5SDimitry Andric void LSRInstance::RewriteForPHI(
58690b57cec5SDimitry Andric     PHINode *PN, const LSRUse &LU, const LSRFixup &LF, const Formula &F,
5870fcaf7f86SDimitry Andric     SmallVectorImpl<WeakTrackingVH> &DeadInsts) const {
58710b57cec5SDimitry Andric   DenseMap<BasicBlock *, Value *> Inserted;
587206c3fb27SDimitry Andric 
587306c3fb27SDimitry Andric   // Inserting instructions in the loop and using them as PHI's input could
587406c3fb27SDimitry Andric   // break LCSSA in case if PHI's parent block is not a loop exit (i.e. the
587506c3fb27SDimitry Andric   // corresponding incoming block is not loop exiting). So collect all such
587606c3fb27SDimitry Andric   // instructions to form LCSSA for them later.
587706c3fb27SDimitry Andric   SmallVector<Instruction *, 4> InsertedNonLCSSAInsts;
587806c3fb27SDimitry Andric 
58790b57cec5SDimitry Andric   for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i)
58800b57cec5SDimitry Andric     if (PN->getIncomingValue(i) == LF.OperandValToReplace) {
58810b57cec5SDimitry Andric       bool needUpdateFixups = false;
58820b57cec5SDimitry Andric       BasicBlock *BB = PN->getIncomingBlock(i);
58830b57cec5SDimitry Andric 
58840b57cec5SDimitry Andric       // If this is a critical edge, split the edge so that we do not insert
58850b57cec5SDimitry Andric       // the code on all predecessor/successor paths.  We do this unless this
58860b57cec5SDimitry Andric       // is the canonical backedge for this loop, which complicates post-inc
58870b57cec5SDimitry Andric       // users.
58880b57cec5SDimitry Andric       if (e != 1 && BB->getTerminator()->getNumSuccessors() > 1 &&
58890b57cec5SDimitry Andric           !isa<IndirectBrInst>(BB->getTerminator()) &&
58900b57cec5SDimitry Andric           !isa<CatchSwitchInst>(BB->getTerminator())) {
58910b57cec5SDimitry Andric         BasicBlock *Parent = PN->getParent();
58920b57cec5SDimitry Andric         Loop *PNLoop = LI.getLoopFor(Parent);
58930b57cec5SDimitry Andric         if (!PNLoop || Parent != PNLoop->getHeader()) {
58940b57cec5SDimitry Andric           // Split the critical edge.
58950b57cec5SDimitry Andric           BasicBlock *NewBB = nullptr;
58960b57cec5SDimitry Andric           if (!Parent->isLandingPad()) {
5897e8d8bef9SDimitry Andric             NewBB =
5898e8d8bef9SDimitry Andric                 SplitCriticalEdge(BB, Parent,
5899e8d8bef9SDimitry Andric                                   CriticalEdgeSplittingOptions(&DT, &LI, MSSAU)
59000b57cec5SDimitry Andric                                       .setMergeIdenticalEdges()
59010b57cec5SDimitry Andric                                       .setKeepOneInputPHIs());
59020b57cec5SDimitry Andric           } else {
59030b57cec5SDimitry Andric             SmallVector<BasicBlock*, 2> NewBBs;
59045f757f3fSDimitry Andric             DomTreeUpdater DTU(DT, DomTreeUpdater::UpdateStrategy::Eager);
59055f757f3fSDimitry Andric             SplitLandingPadPredecessors(Parent, BB, "", "", NewBBs, &DTU, &LI);
59060b57cec5SDimitry Andric             NewBB = NewBBs[0];
59070b57cec5SDimitry Andric           }
59080b57cec5SDimitry Andric           // If NewBB==NULL, then SplitCriticalEdge refused to split because all
59090b57cec5SDimitry Andric           // phi predecessors are identical. The simple thing to do is skip
59100b57cec5SDimitry Andric           // splitting in this case rather than complicate the API.
59110b57cec5SDimitry Andric           if (NewBB) {
59120b57cec5SDimitry Andric             // If PN is outside of the loop and BB is in the loop, we want to
59130b57cec5SDimitry Andric             // move the block to be immediately before the PHI block, not
59140b57cec5SDimitry Andric             // immediately after BB.
59150b57cec5SDimitry Andric             if (L->contains(BB) && !L->contains(PN))
59160b57cec5SDimitry Andric               NewBB->moveBefore(PN->getParent());
59170b57cec5SDimitry Andric 
59180b57cec5SDimitry Andric             // Splitting the edge can reduce the number of PHI entries we have.
59190b57cec5SDimitry Andric             e = PN->getNumIncomingValues();
59200b57cec5SDimitry Andric             BB = NewBB;
59210b57cec5SDimitry Andric             i = PN->getBasicBlockIndex(BB);
59220b57cec5SDimitry Andric 
59230b57cec5SDimitry Andric             needUpdateFixups = true;
59240b57cec5SDimitry Andric           }
59250b57cec5SDimitry Andric         }
59260b57cec5SDimitry Andric       }
59270b57cec5SDimitry Andric 
59280b57cec5SDimitry Andric       std::pair<DenseMap<BasicBlock *, Value *>::iterator, bool> Pair =
59290b57cec5SDimitry Andric         Inserted.insert(std::make_pair(BB, static_cast<Value *>(nullptr)));
59300b57cec5SDimitry Andric       if (!Pair.second)
59310b57cec5SDimitry Andric         PN->setIncomingValue(i, Pair.first->second);
59320b57cec5SDimitry Andric       else {
5933fcaf7f86SDimitry Andric         Value *FullV =
5934fcaf7f86SDimitry Andric             Expand(LU, LF, F, BB->getTerminator()->getIterator(), DeadInsts);
59350b57cec5SDimitry Andric 
59360b57cec5SDimitry Andric         // If this is reuse-by-noop-cast, insert the noop cast.
59370b57cec5SDimitry Andric         Type *OpTy = LF.OperandValToReplace->getType();
59380b57cec5SDimitry Andric         if (FullV->getType() != OpTy)
59390fca6ea1SDimitry Andric           FullV = CastInst::Create(
59400fca6ea1SDimitry Andric               CastInst::getCastOpcode(FullV, false, OpTy, false), FullV,
59410fca6ea1SDimitry Andric               LF.OperandValToReplace->getType(), "tmp",
59420fca6ea1SDimitry Andric               BB->getTerminator()->getIterator());
59430b57cec5SDimitry Andric 
594406c3fb27SDimitry Andric         // If the incoming block for this value is not in the loop, it means the
594506c3fb27SDimitry Andric         // current PHI is not in a loop exit, so we must create a LCSSA PHI for
594606c3fb27SDimitry Andric         // the inserted value.
594706c3fb27SDimitry Andric         if (auto *I = dyn_cast<Instruction>(FullV))
594806c3fb27SDimitry Andric           if (L->contains(I) && !L->contains(BB))
594906c3fb27SDimitry Andric             InsertedNonLCSSAInsts.push_back(I);
595006c3fb27SDimitry Andric 
59510b57cec5SDimitry Andric         PN->setIncomingValue(i, FullV);
59520b57cec5SDimitry Andric         Pair.first->second = FullV;
59530b57cec5SDimitry Andric       }
59540b57cec5SDimitry Andric 
59550b57cec5SDimitry Andric       // If LSR splits critical edge and phi node has other pending
59560b57cec5SDimitry Andric       // fixup operands, we need to update those pending fixups. Otherwise
59570b57cec5SDimitry Andric       // formulae will not be implemented completely and some instructions
59580b57cec5SDimitry Andric       // will not be eliminated.
59590b57cec5SDimitry Andric       if (needUpdateFixups) {
59600fca6ea1SDimitry Andric         for (LSRUse &LU : Uses)
59610fca6ea1SDimitry Andric           for (LSRFixup &Fixup : LU.Fixups)
59620b57cec5SDimitry Andric             // If fixup is supposed to rewrite some operand in the phi
59630b57cec5SDimitry Andric             // that was just updated, it may be already moved to
59640b57cec5SDimitry Andric             // another phi node. Such fixup requires update.
59650b57cec5SDimitry Andric             if (Fixup.UserInst == PN) {
59660b57cec5SDimitry Andric               // Check if the operand we try to replace still exists in the
59670b57cec5SDimitry Andric               // original phi.
59680b57cec5SDimitry Andric               bool foundInOriginalPHI = false;
59690b57cec5SDimitry Andric               for (const auto &val : PN->incoming_values())
59700b57cec5SDimitry Andric                 if (val == Fixup.OperandValToReplace) {
59710b57cec5SDimitry Andric                   foundInOriginalPHI = true;
59720b57cec5SDimitry Andric                   break;
59730b57cec5SDimitry Andric                 }
59740b57cec5SDimitry Andric 
59750b57cec5SDimitry Andric               // If fixup operand found in original PHI - nothing to do.
59760b57cec5SDimitry Andric               if (foundInOriginalPHI)
59770b57cec5SDimitry Andric                 continue;
59780b57cec5SDimitry Andric 
59790b57cec5SDimitry Andric               // Otherwise it might be moved to another PHI and requires update.
59800b57cec5SDimitry Andric               // If fixup operand not found in any of the incoming blocks that
59810b57cec5SDimitry Andric               // means we have already rewritten it - nothing to do.
59820b57cec5SDimitry Andric               for (const auto &Block : PN->blocks())
59830b57cec5SDimitry Andric                 for (BasicBlock::iterator I = Block->begin(); isa<PHINode>(I);
59840b57cec5SDimitry Andric                      ++I) {
59850b57cec5SDimitry Andric                   PHINode *NewPN = cast<PHINode>(I);
59860b57cec5SDimitry Andric                   for (const auto &val : NewPN->incoming_values())
59870b57cec5SDimitry Andric                     if (val == Fixup.OperandValToReplace)
59880b57cec5SDimitry Andric                       Fixup.UserInst = NewPN;
59890b57cec5SDimitry Andric                 }
59900b57cec5SDimitry Andric             }
59910b57cec5SDimitry Andric       }
59920b57cec5SDimitry Andric     }
599306c3fb27SDimitry Andric 
599406c3fb27SDimitry Andric   formLCSSAForInstructions(InsertedNonLCSSAInsts, DT, LI, &SE);
59950b57cec5SDimitry Andric }
59960b57cec5SDimitry Andric 
59970b57cec5SDimitry Andric /// Emit instructions for the leading candidate expression for this LSRUse (this
59980b57cec5SDimitry Andric /// is called "expanding"), and update the UserInst to reference the newly
59990b57cec5SDimitry Andric /// expanded value.
60000b57cec5SDimitry Andric void LSRInstance::Rewrite(const LSRUse &LU, const LSRFixup &LF,
6001fcaf7f86SDimitry Andric                           const Formula &F,
60020b57cec5SDimitry Andric                           SmallVectorImpl<WeakTrackingVH> &DeadInsts) const {
60030b57cec5SDimitry Andric   // First, find an insertion point that dominates UserInst. For PHI nodes,
60040b57cec5SDimitry Andric   // find the nearest block which dominates all the relevant uses.
60050b57cec5SDimitry Andric   if (PHINode *PN = dyn_cast<PHINode>(LF.UserInst)) {
6006fcaf7f86SDimitry Andric     RewriteForPHI(PN, LU, LF, F, DeadInsts);
60070b57cec5SDimitry Andric   } else {
6008fcaf7f86SDimitry Andric     Value *FullV = Expand(LU, LF, F, LF.UserInst->getIterator(), DeadInsts);
60090b57cec5SDimitry Andric 
60100b57cec5SDimitry Andric     // If this is reuse-by-noop-cast, insert the noop cast.
60110b57cec5SDimitry Andric     Type *OpTy = LF.OperandValToReplace->getType();
60120b57cec5SDimitry Andric     if (FullV->getType() != OpTy) {
60130b57cec5SDimitry Andric       Instruction *Cast =
60140b57cec5SDimitry Andric           CastInst::Create(CastInst::getCastOpcode(FullV, false, OpTy, false),
60150fca6ea1SDimitry Andric                            FullV, OpTy, "tmp", LF.UserInst->getIterator());
60160b57cec5SDimitry Andric       FullV = Cast;
60170b57cec5SDimitry Andric     }
60180b57cec5SDimitry Andric 
60190b57cec5SDimitry Andric     // Update the user. ICmpZero is handled specially here (for now) because
60200b57cec5SDimitry Andric     // Expand may have updated one of the operands of the icmp already, and
60210b57cec5SDimitry Andric     // its new value may happen to be equal to LF.OperandValToReplace, in
60220b57cec5SDimitry Andric     // which case doing replaceUsesOfWith leads to replacing both operands
60230b57cec5SDimitry Andric     // with the same value. TODO: Reorganize this.
60240b57cec5SDimitry Andric     if (LU.Kind == LSRUse::ICmpZero)
60250b57cec5SDimitry Andric       LF.UserInst->setOperand(0, FullV);
60260b57cec5SDimitry Andric     else
60270b57cec5SDimitry Andric       LF.UserInst->replaceUsesOfWith(LF.OperandValToReplace, FullV);
60280b57cec5SDimitry Andric   }
60290b57cec5SDimitry Andric 
60305ffd83dbSDimitry Andric   if (auto *OperandIsInstr = dyn_cast<Instruction>(LF.OperandValToReplace))
60315ffd83dbSDimitry Andric     DeadInsts.emplace_back(OperandIsInstr);
60320b57cec5SDimitry Andric }
60330b57cec5SDimitry Andric 
603406c3fb27SDimitry Andric // Trying to hoist the IVInc to loop header if all IVInc users are in
603506c3fb27SDimitry Andric // the loop header. It will help backend to generate post index load/store
603606c3fb27SDimitry Andric // when the latch block is different from loop header block.
603706c3fb27SDimitry Andric static bool canHoistIVInc(const TargetTransformInfo &TTI, const LSRFixup &Fixup,
603806c3fb27SDimitry Andric                           const LSRUse &LU, Instruction *IVIncInsertPos,
603906c3fb27SDimitry Andric                           Loop *L) {
604006c3fb27SDimitry Andric   if (LU.Kind != LSRUse::Address)
604106c3fb27SDimitry Andric     return false;
604206c3fb27SDimitry Andric 
604306c3fb27SDimitry Andric   // For now this code do the conservative optimization, only work for
604406c3fb27SDimitry Andric   // the header block. Later we can hoist the IVInc to the block post
604506c3fb27SDimitry Andric   // dominate all users.
604606c3fb27SDimitry Andric   BasicBlock *LHeader = L->getHeader();
604706c3fb27SDimitry Andric   if (IVIncInsertPos->getParent() == LHeader)
604806c3fb27SDimitry Andric     return false;
604906c3fb27SDimitry Andric 
605006c3fb27SDimitry Andric   if (!Fixup.OperandValToReplace ||
605106c3fb27SDimitry Andric       any_of(Fixup.OperandValToReplace->users(), [&LHeader](User *U) {
605206c3fb27SDimitry Andric         Instruction *UI = cast<Instruction>(U);
605306c3fb27SDimitry Andric         return UI->getParent() != LHeader;
605406c3fb27SDimitry Andric       }))
605506c3fb27SDimitry Andric     return false;
605606c3fb27SDimitry Andric 
605706c3fb27SDimitry Andric   Instruction *I = Fixup.UserInst;
605806c3fb27SDimitry Andric   Type *Ty = I->getType();
605906c3fb27SDimitry Andric   return Ty->isIntegerTy() &&
606006c3fb27SDimitry Andric          ((isa<LoadInst>(I) && TTI.isIndexedLoadLegal(TTI.MIM_PostInc, Ty)) ||
606106c3fb27SDimitry Andric           (isa<StoreInst>(I) && TTI.isIndexedStoreLegal(TTI.MIM_PostInc, Ty)));
606206c3fb27SDimitry Andric }
606306c3fb27SDimitry Andric 
60640b57cec5SDimitry Andric /// Rewrite all the fixup locations with new values, following the chosen
60650b57cec5SDimitry Andric /// solution.
60660b57cec5SDimitry Andric void LSRInstance::ImplementSolution(
60670b57cec5SDimitry Andric     const SmallVectorImpl<const Formula *> &Solution) {
60680b57cec5SDimitry Andric   // Keep track of instructions we may have made dead, so that
60690b57cec5SDimitry Andric   // we can remove them after we are done working.
60700b57cec5SDimitry Andric   SmallVector<WeakTrackingVH, 16> DeadInsts;
60710b57cec5SDimitry Andric 
60720b57cec5SDimitry Andric   // Mark phi nodes that terminate chains so the expander tries to reuse them.
60730b57cec5SDimitry Andric   for (const IVChain &Chain : IVChainVec) {
60740b57cec5SDimitry Andric     if (PHINode *PN = dyn_cast<PHINode>(Chain.tailUserInst()))
60750b57cec5SDimitry Andric       Rewriter.setChainedPhi(PN);
60760b57cec5SDimitry Andric   }
60770b57cec5SDimitry Andric 
60780b57cec5SDimitry Andric   // Expand the new value definitions and update the users.
60790b57cec5SDimitry Andric   for (size_t LUIdx = 0, NumUses = Uses.size(); LUIdx != NumUses; ++LUIdx)
60800b57cec5SDimitry Andric     for (const LSRFixup &Fixup : Uses[LUIdx].Fixups) {
608106c3fb27SDimitry Andric       Instruction *InsertPos =
608206c3fb27SDimitry Andric           canHoistIVInc(TTI, Fixup, Uses[LUIdx], IVIncInsertPos, L)
608306c3fb27SDimitry Andric               ? L->getHeader()->getTerminator()
608406c3fb27SDimitry Andric               : IVIncInsertPos;
608506c3fb27SDimitry Andric       Rewriter.setIVIncInsertPos(L, InsertPos);
6086fcaf7f86SDimitry Andric       Rewrite(Uses[LUIdx], Fixup, *Solution[LUIdx], DeadInsts);
60870b57cec5SDimitry Andric       Changed = true;
60880b57cec5SDimitry Andric     }
60890b57cec5SDimitry Andric 
60900b57cec5SDimitry Andric   for (const IVChain &Chain : IVChainVec) {
6091fcaf7f86SDimitry Andric     GenerateIVChain(Chain, DeadInsts);
60920b57cec5SDimitry Andric     Changed = true;
60930b57cec5SDimitry Andric   }
6094fe6060f1SDimitry Andric 
6095fe6060f1SDimitry Andric   for (const WeakVH &IV : Rewriter.getInsertedIVs())
6096fe6060f1SDimitry Andric     if (IV && dyn_cast<Instruction>(&*IV)->getParent())
6097fe6060f1SDimitry Andric       ScalarEvolutionIVs.push_back(IV);
6098fe6060f1SDimitry Andric 
60990b57cec5SDimitry Andric   // Clean up after ourselves. This must be done before deleting any
61000b57cec5SDimitry Andric   // instructions.
61010b57cec5SDimitry Andric   Rewriter.clear();
61020b57cec5SDimitry Andric 
61035ffd83dbSDimitry Andric   Changed |= RecursivelyDeleteTriviallyDeadInstructionsPermissive(DeadInsts,
61045ffd83dbSDimitry Andric                                                                   &TLI, MSSAU);
6105fe6060f1SDimitry Andric 
6106fe6060f1SDimitry Andric   // In our cost analysis above, we assume that each addrec consumes exactly
6107fe6060f1SDimitry Andric   // one register, and arrange to have increments inserted just before the
6108fe6060f1SDimitry Andric   // latch to maximimize the chance this is true.  However, if we reused
6109fe6060f1SDimitry Andric   // existing IVs, we now need to move the increments to match our
6110fe6060f1SDimitry Andric   // expectations.  Otherwise, our cost modeling results in us having a
6111fe6060f1SDimitry Andric   // chosen a non-optimal result for the actual schedule.  (And yes, this
6112fe6060f1SDimitry Andric   // scheduling decision does impact later codegen.)
6113fe6060f1SDimitry Andric   for (PHINode &PN : L->getHeader()->phis()) {
6114fe6060f1SDimitry Andric     BinaryOperator *BO = nullptr;
6115fe6060f1SDimitry Andric     Value *Start = nullptr, *Step = nullptr;
6116fe6060f1SDimitry Andric     if (!matchSimpleRecurrence(&PN, BO, Start, Step))
6117fe6060f1SDimitry Andric       continue;
6118fe6060f1SDimitry Andric 
6119fe6060f1SDimitry Andric     switch (BO->getOpcode()) {
6120fe6060f1SDimitry Andric     case Instruction::Sub:
6121fe6060f1SDimitry Andric       if (BO->getOperand(0) != &PN)
6122fe6060f1SDimitry Andric         // sub is non-commutative - match handling elsewhere in LSR
6123fe6060f1SDimitry Andric         continue;
6124fe6060f1SDimitry Andric       break;
6125fe6060f1SDimitry Andric     case Instruction::Add:
6126fe6060f1SDimitry Andric       break;
6127fe6060f1SDimitry Andric     default:
6128fe6060f1SDimitry Andric       continue;
6129fe6060f1SDimitry Andric     };
6130fe6060f1SDimitry Andric 
6131fe6060f1SDimitry Andric     if (!isa<Constant>(Step))
6132fe6060f1SDimitry Andric       // If not a constant step, might increase register pressure
6133fe6060f1SDimitry Andric       // (We assume constants have been canonicalized to RHS)
6134fe6060f1SDimitry Andric       continue;
6135fe6060f1SDimitry Andric 
6136fe6060f1SDimitry Andric     if (BO->getParent() == IVIncInsertPos->getParent())
6137fe6060f1SDimitry Andric       // Only bother moving across blocks.  Isel can handle block local case.
6138fe6060f1SDimitry Andric       continue;
6139fe6060f1SDimitry Andric 
6140fe6060f1SDimitry Andric     // Can we legally schedule inc at the desired point?
6141fe6060f1SDimitry Andric     if (!llvm::all_of(BO->uses(),
6142fe6060f1SDimitry Andric                       [&](Use &U) {return DT.dominates(IVIncInsertPos, U);}))
6143fe6060f1SDimitry Andric       continue;
6144fe6060f1SDimitry Andric     BO->moveBefore(IVIncInsertPos);
6145fe6060f1SDimitry Andric     Changed = true;
6146fe6060f1SDimitry Andric   }
6147fe6060f1SDimitry Andric 
6148fe6060f1SDimitry Andric 
61490b57cec5SDimitry Andric }
61500b57cec5SDimitry Andric 
61510b57cec5SDimitry Andric LSRInstance::LSRInstance(Loop *L, IVUsers &IU, ScalarEvolution &SE,
61520b57cec5SDimitry Andric                          DominatorTree &DT, LoopInfo &LI,
61530b57cec5SDimitry Andric                          const TargetTransformInfo &TTI, AssumptionCache &AC,
61545ffd83dbSDimitry Andric                          TargetLibraryInfo &TLI, MemorySSAUpdater *MSSAU)
61555ffd83dbSDimitry Andric     : IU(IU), SE(SE), DT(DT), LI(LI), AC(AC), TLI(TLI), TTI(TTI), L(L),
6156fcaf7f86SDimitry Andric       MSSAU(MSSAU), AMK(PreferredAddresingMode.getNumOccurrences() > 0
6157fcaf7f86SDimitry Andric                             ? PreferredAddresingMode
6158fcaf7f86SDimitry Andric                             : TTI.getPreferredAddressingMode(L, &SE)),
61590fca6ea1SDimitry Andric       Rewriter(SE, L->getHeader()->getDataLayout(), "lsr", false),
6160bdd1243dSDimitry Andric       BaselineCost(L, SE, TTI, AMK) {
61610b57cec5SDimitry Andric   // If LoopSimplify form is not available, stay out of trouble.
61620b57cec5SDimitry Andric   if (!L->isLoopSimplifyForm())
61630b57cec5SDimitry Andric     return;
61640b57cec5SDimitry Andric 
61650b57cec5SDimitry Andric   // If there's no interesting work to be done, bail early.
61660b57cec5SDimitry Andric   if (IU.empty()) return;
61670b57cec5SDimitry Andric 
61680b57cec5SDimitry Andric   // If there's too much analysis to be done, bail early. We won't be able to
61690b57cec5SDimitry Andric   // model the problem anyway.
61700b57cec5SDimitry Andric   unsigned NumUsers = 0;
61710b57cec5SDimitry Andric   for (const IVStrideUse &U : IU) {
61720b57cec5SDimitry Andric     if (++NumUsers > MaxIVUsers) {
61730b57cec5SDimitry Andric       (void)U;
61740b57cec5SDimitry Andric       LLVM_DEBUG(dbgs() << "LSR skipping loop, too many IV Users in " << U
61750b57cec5SDimitry Andric                         << "\n");
61760b57cec5SDimitry Andric       return;
61770b57cec5SDimitry Andric     }
61780b57cec5SDimitry Andric     // Bail out if we have a PHI on an EHPad that gets a value from a
61790b57cec5SDimitry Andric     // CatchSwitchInst.  Because the CatchSwitchInst cannot be split, there is
61800b57cec5SDimitry Andric     // no good place to stick any instructions.
61810b57cec5SDimitry Andric     if (auto *PN = dyn_cast<PHINode>(U.getUser())) {
61820b57cec5SDimitry Andric        auto *FirstNonPHI = PN->getParent()->getFirstNonPHI();
61830b57cec5SDimitry Andric        if (isa<FuncletPadInst>(FirstNonPHI) ||
61840b57cec5SDimitry Andric            isa<CatchSwitchInst>(FirstNonPHI))
61850b57cec5SDimitry Andric          for (BasicBlock *PredBB : PN->blocks())
61860b57cec5SDimitry Andric            if (isa<CatchSwitchInst>(PredBB->getFirstNonPHI()))
61870b57cec5SDimitry Andric              return;
61880b57cec5SDimitry Andric     }
61890b57cec5SDimitry Andric   }
61900b57cec5SDimitry Andric 
61910b57cec5SDimitry Andric   LLVM_DEBUG(dbgs() << "\nLSR on loop ";
61920b57cec5SDimitry Andric              L->getHeader()->printAsOperand(dbgs(), /*PrintType=*/false);
61930b57cec5SDimitry Andric              dbgs() << ":\n");
61940b57cec5SDimitry Andric 
6195fcaf7f86SDimitry Andric   // Configure SCEVExpander already now, so the correct mode is used for
6196fcaf7f86SDimitry Andric   // isSafeToExpand() checks.
6197fcaf7f86SDimitry Andric #ifndef NDEBUG
6198fcaf7f86SDimitry Andric   Rewriter.setDebugType(DEBUG_TYPE);
6199fcaf7f86SDimitry Andric #endif
6200fcaf7f86SDimitry Andric   Rewriter.disableCanonicalMode();
6201fcaf7f86SDimitry Andric   Rewriter.enableLSRMode();
6202fcaf7f86SDimitry Andric 
62030b57cec5SDimitry Andric   // First, perform some low-level loop optimizations.
62040b57cec5SDimitry Andric   OptimizeShadowIV();
62050b57cec5SDimitry Andric   OptimizeLoopTermCond();
62060b57cec5SDimitry Andric 
62070b57cec5SDimitry Andric   // If loop preparation eliminates all interesting IV users, bail.
62080b57cec5SDimitry Andric   if (IU.empty()) return;
62090b57cec5SDimitry Andric 
62100b57cec5SDimitry Andric   // Skip nested loops until we can model them better with formulae.
6211e8d8bef9SDimitry Andric   if (!L->isInnermost()) {
62120b57cec5SDimitry Andric     LLVM_DEBUG(dbgs() << "LSR skipping outer loop " << *L << "\n");
62130b57cec5SDimitry Andric     return;
62140b57cec5SDimitry Andric   }
62150b57cec5SDimitry Andric 
62160b57cec5SDimitry Andric   // Start collecting data and preparing for the solver.
6217e8d8bef9SDimitry Andric   // If number of registers is not the major cost, we cannot benefit from the
6218e8d8bef9SDimitry Andric   // current profitable chain optimization which is based on number of
6219e8d8bef9SDimitry Andric   // registers.
6220e8d8bef9SDimitry Andric   // FIXME: add profitable chain optimization for other kinds major cost, for
6221e8d8bef9SDimitry Andric   // example number of instructions.
6222e8d8bef9SDimitry Andric   if (TTI.isNumRegsMajorCostOfLSR() || StressIVChain)
62230b57cec5SDimitry Andric     CollectChains();
62240b57cec5SDimitry Andric   CollectInterestingTypesAndFactors();
62250b57cec5SDimitry Andric   CollectFixupsAndInitialFormulae();
62260b57cec5SDimitry Andric   CollectLoopInvariantFixupsAndFormulae();
62270b57cec5SDimitry Andric 
62280b57cec5SDimitry Andric   if (Uses.empty())
62290b57cec5SDimitry Andric     return;
62300b57cec5SDimitry Andric 
62310b57cec5SDimitry Andric   LLVM_DEBUG(dbgs() << "LSR found " << Uses.size() << " uses:\n";
62320b57cec5SDimitry Andric              print_uses(dbgs()));
62330fca6ea1SDimitry Andric   LLVM_DEBUG(dbgs() << "The baseline solution requires ";
62340fca6ea1SDimitry Andric              BaselineCost.print(dbgs()); dbgs() << "\n");
62350b57cec5SDimitry Andric 
62360b57cec5SDimitry Andric   // Now use the reuse data to generate a bunch of interesting ways
62370b57cec5SDimitry Andric   // to formulate the values needed for the uses.
62380b57cec5SDimitry Andric   GenerateAllReuseFormulae();
62390b57cec5SDimitry Andric 
62400b57cec5SDimitry Andric   FilterOutUndesirableDedicatedRegisters();
62410b57cec5SDimitry Andric   NarrowSearchSpaceUsingHeuristics();
62420b57cec5SDimitry Andric 
62430b57cec5SDimitry Andric   SmallVector<const Formula *, 8> Solution;
62440b57cec5SDimitry Andric   Solve(Solution);
62450b57cec5SDimitry Andric 
62460b57cec5SDimitry Andric   // Release memory that is no longer needed.
62470b57cec5SDimitry Andric   Factors.clear();
62480b57cec5SDimitry Andric   Types.clear();
62490b57cec5SDimitry Andric   RegUses.clear();
62500b57cec5SDimitry Andric 
62510b57cec5SDimitry Andric   if (Solution.empty())
62520b57cec5SDimitry Andric     return;
62530b57cec5SDimitry Andric 
62540b57cec5SDimitry Andric #ifndef NDEBUG
62550b57cec5SDimitry Andric   // Formulae should be legal.
62560b57cec5SDimitry Andric   for (const LSRUse &LU : Uses) {
62570b57cec5SDimitry Andric     for (const Formula &F : LU.Formulae)
62580b57cec5SDimitry Andric       assert(isLegalUse(TTI, LU.MinOffset, LU.MaxOffset, LU.Kind, LU.AccessTy,
62590b57cec5SDimitry Andric                         F) && "Illegal formula generated!");
62600b57cec5SDimitry Andric   };
62610b57cec5SDimitry Andric #endif
62620b57cec5SDimitry Andric 
62630b57cec5SDimitry Andric   // Now that we've decided what we want, make it so.
62640b57cec5SDimitry Andric   ImplementSolution(Solution);
62650b57cec5SDimitry Andric }
62660b57cec5SDimitry Andric 
62670b57cec5SDimitry Andric #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
62680b57cec5SDimitry Andric void LSRInstance::print_factors_and_types(raw_ostream &OS) const {
62690b57cec5SDimitry Andric   if (Factors.empty() && Types.empty()) return;
62700b57cec5SDimitry Andric 
62710b57cec5SDimitry Andric   OS << "LSR has identified the following interesting factors and types: ";
62720b57cec5SDimitry Andric   bool First = true;
62730b57cec5SDimitry Andric 
62740b57cec5SDimitry Andric   for (int64_t Factor : Factors) {
62750b57cec5SDimitry Andric     if (!First) OS << ", ";
62760b57cec5SDimitry Andric     First = false;
62770b57cec5SDimitry Andric     OS << '*' << Factor;
62780b57cec5SDimitry Andric   }
62790b57cec5SDimitry Andric 
62800b57cec5SDimitry Andric   for (Type *Ty : Types) {
62810b57cec5SDimitry Andric     if (!First) OS << ", ";
62820b57cec5SDimitry Andric     First = false;
62830b57cec5SDimitry Andric     OS << '(' << *Ty << ')';
62840b57cec5SDimitry Andric   }
62850b57cec5SDimitry Andric   OS << '\n';
62860b57cec5SDimitry Andric }
62870b57cec5SDimitry Andric 
62880b57cec5SDimitry Andric void LSRInstance::print_fixups(raw_ostream &OS) const {
62890b57cec5SDimitry Andric   OS << "LSR is examining the following fixup sites:\n";
62900b57cec5SDimitry Andric   for (const LSRUse &LU : Uses)
62910b57cec5SDimitry Andric     for (const LSRFixup &LF : LU.Fixups) {
62920b57cec5SDimitry Andric       dbgs() << "  ";
62930b57cec5SDimitry Andric       LF.print(OS);
62940b57cec5SDimitry Andric       OS << '\n';
62950b57cec5SDimitry Andric     }
62960b57cec5SDimitry Andric }
62970b57cec5SDimitry Andric 
62980b57cec5SDimitry Andric void LSRInstance::print_uses(raw_ostream &OS) const {
62990b57cec5SDimitry Andric   OS << "LSR is examining the following uses:\n";
63000b57cec5SDimitry Andric   for (const LSRUse &LU : Uses) {
63010b57cec5SDimitry Andric     dbgs() << "  ";
63020b57cec5SDimitry Andric     LU.print(OS);
63030b57cec5SDimitry Andric     OS << '\n';
63040b57cec5SDimitry Andric     for (const Formula &F : LU.Formulae) {
63050b57cec5SDimitry Andric       OS << "    ";
63060b57cec5SDimitry Andric       F.print(OS);
63070b57cec5SDimitry Andric       OS << '\n';
63080b57cec5SDimitry Andric     }
63090b57cec5SDimitry Andric   }
63100b57cec5SDimitry Andric }
63110b57cec5SDimitry Andric 
63120b57cec5SDimitry Andric void LSRInstance::print(raw_ostream &OS) const {
63130b57cec5SDimitry Andric   print_factors_and_types(OS);
63140b57cec5SDimitry Andric   print_fixups(OS);
63150b57cec5SDimitry Andric   print_uses(OS);
63160b57cec5SDimitry Andric }
63170b57cec5SDimitry Andric 
63180b57cec5SDimitry Andric LLVM_DUMP_METHOD void LSRInstance::dump() const {
63190b57cec5SDimitry Andric   print(errs()); errs() << '\n';
63200b57cec5SDimitry Andric }
63210b57cec5SDimitry Andric #endif
63220b57cec5SDimitry Andric 
63230b57cec5SDimitry Andric namespace {
63240b57cec5SDimitry Andric 
63250b57cec5SDimitry Andric class LoopStrengthReduce : public LoopPass {
63260b57cec5SDimitry Andric public:
63270b57cec5SDimitry Andric   static char ID; // Pass ID, replacement for typeid
63280b57cec5SDimitry Andric 
63290b57cec5SDimitry Andric   LoopStrengthReduce();
63300b57cec5SDimitry Andric 
63310b57cec5SDimitry Andric private:
63320b57cec5SDimitry Andric   bool runOnLoop(Loop *L, LPPassManager &LPM) override;
63330b57cec5SDimitry Andric   void getAnalysisUsage(AnalysisUsage &AU) const override;
63340b57cec5SDimitry Andric };
63350b57cec5SDimitry Andric 
63360b57cec5SDimitry Andric } // end anonymous namespace
63370b57cec5SDimitry Andric 
63380b57cec5SDimitry Andric LoopStrengthReduce::LoopStrengthReduce() : LoopPass(ID) {
63390b57cec5SDimitry Andric   initializeLoopStrengthReducePass(*PassRegistry::getPassRegistry());
63400b57cec5SDimitry Andric }
63410b57cec5SDimitry Andric 
63420b57cec5SDimitry Andric void LoopStrengthReduce::getAnalysisUsage(AnalysisUsage &AU) const {
63430b57cec5SDimitry Andric   // We split critical edges, so we change the CFG.  However, we do update
63440b57cec5SDimitry Andric   // many analyses if they are around.
63450b57cec5SDimitry Andric   AU.addPreservedID(LoopSimplifyID);
63460b57cec5SDimitry Andric 
63470b57cec5SDimitry Andric   AU.addRequired<LoopInfoWrapperPass>();
63480b57cec5SDimitry Andric   AU.addPreserved<LoopInfoWrapperPass>();
63490b57cec5SDimitry Andric   AU.addRequiredID(LoopSimplifyID);
63500b57cec5SDimitry Andric   AU.addRequired<DominatorTreeWrapperPass>();
63510b57cec5SDimitry Andric   AU.addPreserved<DominatorTreeWrapperPass>();
63520b57cec5SDimitry Andric   AU.addRequired<ScalarEvolutionWrapperPass>();
63530b57cec5SDimitry Andric   AU.addPreserved<ScalarEvolutionWrapperPass>();
63540b57cec5SDimitry Andric   AU.addRequired<AssumptionCacheTracker>();
63550b57cec5SDimitry Andric   AU.addRequired<TargetLibraryInfoWrapperPass>();
63560b57cec5SDimitry Andric   // Requiring LoopSimplify a second time here prevents IVUsers from running
63570b57cec5SDimitry Andric   // twice, since LoopSimplify was invalidated by running ScalarEvolution.
63580b57cec5SDimitry Andric   AU.addRequiredID(LoopSimplifyID);
63590b57cec5SDimitry Andric   AU.addRequired<IVUsersWrapperPass>();
63600b57cec5SDimitry Andric   AU.addPreserved<IVUsersWrapperPass>();
63610b57cec5SDimitry Andric   AU.addRequired<TargetTransformInfoWrapperPass>();
63625ffd83dbSDimitry Andric   AU.addPreserved<MemorySSAWrapperPass>();
63630b57cec5SDimitry Andric }
63640b57cec5SDimitry Andric 
6365349cc55cSDimitry Andric namespace {
636681ad6265SDimitry Andric 
636781ad6265SDimitry Andric /// Enables more convenient iteration over a DWARF expression vector.
636881ad6265SDimitry Andric static iterator_range<llvm::DIExpression::expr_op_iterator>
636981ad6265SDimitry Andric ToDwarfOpIter(SmallVectorImpl<uint64_t> &Expr) {
637081ad6265SDimitry Andric   llvm::DIExpression::expr_op_iterator Begin =
637181ad6265SDimitry Andric       llvm::DIExpression::expr_op_iterator(Expr.begin());
637281ad6265SDimitry Andric   llvm::DIExpression::expr_op_iterator End =
637381ad6265SDimitry Andric       llvm::DIExpression::expr_op_iterator(Expr.end());
637481ad6265SDimitry Andric   return {Begin, End};
637581ad6265SDimitry Andric }
637681ad6265SDimitry Andric 
6377fe6060f1SDimitry Andric struct SCEVDbgValueBuilder {
6378fe6060f1SDimitry Andric   SCEVDbgValueBuilder() = default;
637981ad6265SDimitry Andric   SCEVDbgValueBuilder(const SCEVDbgValueBuilder &Base) { clone(Base); }
638081ad6265SDimitry Andric 
638181ad6265SDimitry Andric   void clone(const SCEVDbgValueBuilder &Base) {
638281ad6265SDimitry Andric     LocationOps = Base.LocationOps;
6383fe6060f1SDimitry Andric     Expr = Base.Expr;
6384fe6060f1SDimitry Andric   }
6385e8d8bef9SDimitry Andric 
638681ad6265SDimitry Andric   void clear() {
638781ad6265SDimitry Andric     LocationOps.clear();
638881ad6265SDimitry Andric     Expr.clear();
638981ad6265SDimitry Andric   }
639081ad6265SDimitry Andric 
6391fe6060f1SDimitry Andric   /// The DIExpression as we translate the SCEV.
6392fe6060f1SDimitry Andric   SmallVector<uint64_t, 6> Expr;
6393fe6060f1SDimitry Andric   /// The location ops of the DIExpression.
639481ad6265SDimitry Andric   SmallVector<Value *, 2> LocationOps;
6395fe6060f1SDimitry Andric 
6396fe6060f1SDimitry Andric   void pushOperator(uint64_t Op) { Expr.push_back(Op); }
6397fe6060f1SDimitry Andric   void pushUInt(uint64_t Operand) { Expr.push_back(Operand); }
6398fe6060f1SDimitry Andric 
6399fe6060f1SDimitry Andric   /// Add a DW_OP_LLVM_arg to the expression, followed by the index of the value
6400fe6060f1SDimitry Andric   /// in the set of values referenced by the expression.
640181ad6265SDimitry Andric   void pushLocation(llvm::Value *V) {
6402fe6060f1SDimitry Andric     Expr.push_back(llvm::dwarf::DW_OP_LLVM_arg);
6403bdd1243dSDimitry Andric     auto *It = llvm::find(LocationOps, V);
6404fe6060f1SDimitry Andric     unsigned ArgIndex = 0;
640581ad6265SDimitry Andric     if (It != LocationOps.end()) {
640681ad6265SDimitry Andric       ArgIndex = std::distance(LocationOps.begin(), It);
6407fe6060f1SDimitry Andric     } else {
640881ad6265SDimitry Andric       ArgIndex = LocationOps.size();
640981ad6265SDimitry Andric       LocationOps.push_back(V);
6410fe6060f1SDimitry Andric     }
6411fe6060f1SDimitry Andric     Expr.push_back(ArgIndex);
6412fe6060f1SDimitry Andric   }
6413fe6060f1SDimitry Andric 
6414fe6060f1SDimitry Andric   void pushValue(const SCEVUnknown *U) {
6415fe6060f1SDimitry Andric     llvm::Value *V = cast<SCEVUnknown>(U)->getValue();
641681ad6265SDimitry Andric     pushLocation(V);
6417fe6060f1SDimitry Andric   }
6418fe6060f1SDimitry Andric 
64196e75b2fbSDimitry Andric   bool pushConst(const SCEVConstant *C) {
642006c3fb27SDimitry Andric     if (C->getAPInt().getSignificantBits() > 64)
64216e75b2fbSDimitry Andric       return false;
6422fe6060f1SDimitry Andric     Expr.push_back(llvm::dwarf::DW_OP_consts);
6423fe6060f1SDimitry Andric     Expr.push_back(C->getAPInt().getSExtValue());
64246e75b2fbSDimitry Andric     return true;
6425fe6060f1SDimitry Andric   }
6426fe6060f1SDimitry Andric 
642781ad6265SDimitry Andric   // Iterating the expression as DWARF ops is convenient when updating
642881ad6265SDimitry Andric   // DWARF_OP_LLVM_args.
642981ad6265SDimitry Andric   iterator_range<llvm::DIExpression::expr_op_iterator> expr_ops() {
643081ad6265SDimitry Andric     return ToDwarfOpIter(Expr);
643181ad6265SDimitry Andric   }
643281ad6265SDimitry Andric 
6433fe6060f1SDimitry Andric   /// Several SCEV types are sequences of the same arithmetic operator applied
6434fe6060f1SDimitry Andric   /// to constants and values that may be extended or truncated.
6435fe6060f1SDimitry Andric   bool pushArithmeticExpr(const llvm::SCEVCommutativeExpr *CommExpr,
6436fe6060f1SDimitry Andric                           uint64_t DwarfOp) {
6437fe6060f1SDimitry Andric     assert((isa<llvm::SCEVAddExpr>(CommExpr) || isa<SCEVMulExpr>(CommExpr)) &&
6438fe6060f1SDimitry Andric            "Expected arithmetic SCEV type");
6439fe6060f1SDimitry Andric     bool Success = true;
6440fe6060f1SDimitry Andric     unsigned EmitOperator = 0;
6441bdd1243dSDimitry Andric     for (const auto &Op : CommExpr->operands()) {
6442fe6060f1SDimitry Andric       Success &= pushSCEV(Op);
6443fe6060f1SDimitry Andric 
6444fe6060f1SDimitry Andric       if (EmitOperator >= 1)
6445fe6060f1SDimitry Andric         pushOperator(DwarfOp);
6446fe6060f1SDimitry Andric       ++EmitOperator;
6447fe6060f1SDimitry Andric     }
6448fe6060f1SDimitry Andric     return Success;
6449fe6060f1SDimitry Andric   }
6450fe6060f1SDimitry Andric 
6451fe6060f1SDimitry Andric   // TODO: Identify and omit noop casts.
6452fe6060f1SDimitry Andric   bool pushCast(const llvm::SCEVCastExpr *C, bool IsSigned) {
6453fe6060f1SDimitry Andric     const llvm::SCEV *Inner = C->getOperand(0);
6454fe6060f1SDimitry Andric     const llvm::Type *Type = C->getType();
6455fe6060f1SDimitry Andric     uint64_t ToWidth = Type->getIntegerBitWidth();
6456fe6060f1SDimitry Andric     bool Success = pushSCEV(Inner);
6457fe6060f1SDimitry Andric     uint64_t CastOps[] = {dwarf::DW_OP_LLVM_convert, ToWidth,
6458fe6060f1SDimitry Andric                           IsSigned ? llvm::dwarf::DW_ATE_signed
6459fe6060f1SDimitry Andric                                    : llvm::dwarf::DW_ATE_unsigned};
6460fe6060f1SDimitry Andric     for (const auto &Op : CastOps)
6461fe6060f1SDimitry Andric       pushOperator(Op);
6462fe6060f1SDimitry Andric     return Success;
6463fe6060f1SDimitry Andric   }
6464fe6060f1SDimitry Andric 
6465fe6060f1SDimitry Andric   // TODO: MinMax - although these haven't been encountered in the test suite.
6466fe6060f1SDimitry Andric   bool pushSCEV(const llvm::SCEV *S) {
6467fe6060f1SDimitry Andric     bool Success = true;
6468fe6060f1SDimitry Andric     if (const SCEVConstant *StartInt = dyn_cast<SCEVConstant>(S)) {
64696e75b2fbSDimitry Andric       Success &= pushConst(StartInt);
6470fe6060f1SDimitry Andric 
6471fe6060f1SDimitry Andric     } else if (const SCEVUnknown *U = dyn_cast<SCEVUnknown>(S)) {
6472fe6060f1SDimitry Andric       if (!U->getValue())
6473fe6060f1SDimitry Andric         return false;
647481ad6265SDimitry Andric       pushLocation(U->getValue());
6475fe6060f1SDimitry Andric 
6476fe6060f1SDimitry Andric     } else if (const SCEVMulExpr *MulRec = dyn_cast<SCEVMulExpr>(S)) {
6477fe6060f1SDimitry Andric       Success &= pushArithmeticExpr(MulRec, llvm::dwarf::DW_OP_mul);
6478fe6060f1SDimitry Andric 
6479fe6060f1SDimitry Andric     } else if (const SCEVUDivExpr *UDiv = dyn_cast<SCEVUDivExpr>(S)) {
6480fe6060f1SDimitry Andric       Success &= pushSCEV(UDiv->getLHS());
6481fe6060f1SDimitry Andric       Success &= pushSCEV(UDiv->getRHS());
6482fe6060f1SDimitry Andric       pushOperator(llvm::dwarf::DW_OP_div);
6483fe6060f1SDimitry Andric 
6484fe6060f1SDimitry Andric     } else if (const SCEVCastExpr *Cast = dyn_cast<SCEVCastExpr>(S)) {
6485fe6060f1SDimitry Andric       // Assert if a new and unknown SCEVCastEXpr type is encountered.
6486fe6060f1SDimitry Andric       assert((isa<SCEVZeroExtendExpr>(Cast) || isa<SCEVTruncateExpr>(Cast) ||
6487fe6060f1SDimitry Andric               isa<SCEVPtrToIntExpr>(Cast) || isa<SCEVSignExtendExpr>(Cast)) &&
6488fe6060f1SDimitry Andric              "Unexpected cast type in SCEV.");
6489fe6060f1SDimitry Andric       Success &= pushCast(Cast, (isa<SCEVSignExtendExpr>(Cast)));
6490fe6060f1SDimitry Andric 
6491fe6060f1SDimitry Andric     } else if (const SCEVAddExpr *AddExpr = dyn_cast<SCEVAddExpr>(S)) {
6492fe6060f1SDimitry Andric       Success &= pushArithmeticExpr(AddExpr, llvm::dwarf::DW_OP_plus);
6493fe6060f1SDimitry Andric 
6494fe6060f1SDimitry Andric     } else if (isa<SCEVAddRecExpr>(S)) {
6495fe6060f1SDimitry Andric       // Nested SCEVAddRecExpr are generated by nested loops and are currently
6496fe6060f1SDimitry Andric       // unsupported.
6497fe6060f1SDimitry Andric       return false;
6498fe6060f1SDimitry Andric 
6499fe6060f1SDimitry Andric     } else {
6500fe6060f1SDimitry Andric       return false;
6501fe6060f1SDimitry Andric     }
6502fe6060f1SDimitry Andric     return Success;
6503fe6060f1SDimitry Andric   }
6504fe6060f1SDimitry Andric 
6505fe6060f1SDimitry Andric   /// Return true if the combination of arithmetic operator and underlying
6506fe6060f1SDimitry Andric   /// SCEV constant value is an identity function.
6507fe6060f1SDimitry Andric   bool isIdentityFunction(uint64_t Op, const SCEV *S) {
6508fe6060f1SDimitry Andric     if (const SCEVConstant *C = dyn_cast<SCEVConstant>(S)) {
650906c3fb27SDimitry Andric       if (C->getAPInt().getSignificantBits() > 64)
65106e75b2fbSDimitry Andric         return false;
6511fe6060f1SDimitry Andric       int64_t I = C->getAPInt().getSExtValue();
6512fe6060f1SDimitry Andric       switch (Op) {
6513fe6060f1SDimitry Andric       case llvm::dwarf::DW_OP_plus:
6514fe6060f1SDimitry Andric       case llvm::dwarf::DW_OP_minus:
6515fe6060f1SDimitry Andric         return I == 0;
6516fe6060f1SDimitry Andric       case llvm::dwarf::DW_OP_mul:
6517fe6060f1SDimitry Andric       case llvm::dwarf::DW_OP_div:
6518fe6060f1SDimitry Andric         return I == 1;
6519fe6060f1SDimitry Andric       }
6520fe6060f1SDimitry Andric     }
6521fe6060f1SDimitry Andric     return false;
6522fe6060f1SDimitry Andric   }
6523fe6060f1SDimitry Andric 
6524fe6060f1SDimitry Andric   /// Convert a SCEV of a value to a DIExpression that is pushed onto the
6525fe6060f1SDimitry Andric   /// builder's expression stack. The stack should already contain an
6526fe6060f1SDimitry Andric   /// expression for the iteration count, so that it can be multiplied by
6527fe6060f1SDimitry Andric   /// the stride and added to the start.
6528fe6060f1SDimitry Andric   /// Components of the expression are omitted if they are an identity function.
6529fe6060f1SDimitry Andric   /// Chain (non-affine) SCEVs are not supported.
6530fe6060f1SDimitry Andric   bool SCEVToValueExpr(const llvm::SCEVAddRecExpr &SAR, ScalarEvolution &SE) {
6531fe6060f1SDimitry Andric     assert(SAR.isAffine() && "Expected affine SCEV");
6532fe6060f1SDimitry Andric     // TODO: Is this check needed?
6533fe6060f1SDimitry Andric     if (isa<SCEVAddRecExpr>(SAR.getStart()))
6534fe6060f1SDimitry Andric       return false;
6535fe6060f1SDimitry Andric 
6536fe6060f1SDimitry Andric     const SCEV *Start = SAR.getStart();
6537fe6060f1SDimitry Andric     const SCEV *Stride = SAR.getStepRecurrence(SE);
6538fe6060f1SDimitry Andric 
6539fe6060f1SDimitry Andric     // Skip pushing arithmetic noops.
6540fe6060f1SDimitry Andric     if (!isIdentityFunction(llvm::dwarf::DW_OP_mul, Stride)) {
6541fe6060f1SDimitry Andric       if (!pushSCEV(Stride))
6542fe6060f1SDimitry Andric         return false;
6543fe6060f1SDimitry Andric       pushOperator(llvm::dwarf::DW_OP_mul);
6544fe6060f1SDimitry Andric     }
6545fe6060f1SDimitry Andric     if (!isIdentityFunction(llvm::dwarf::DW_OP_plus, Start)) {
6546fe6060f1SDimitry Andric       if (!pushSCEV(Start))
6547fe6060f1SDimitry Andric         return false;
6548fe6060f1SDimitry Andric       pushOperator(llvm::dwarf::DW_OP_plus);
6549fe6060f1SDimitry Andric     }
6550fe6060f1SDimitry Andric     return true;
6551fe6060f1SDimitry Andric   }
6552fe6060f1SDimitry Andric 
655381ad6265SDimitry Andric   /// Create an expression that is an offset from a value (usually the IV).
655481ad6265SDimitry Andric   void createOffsetExpr(int64_t Offset, Value *OffsetValue) {
655581ad6265SDimitry Andric     pushLocation(OffsetValue);
655681ad6265SDimitry Andric     DIExpression::appendOffset(Expr, Offset);
655781ad6265SDimitry Andric     LLVM_DEBUG(
655881ad6265SDimitry Andric         dbgs() << "scev-salvage: Generated IV offset expression. Offset: "
655981ad6265SDimitry Andric                << std::to_string(Offset) << "\n");
656081ad6265SDimitry Andric   }
656181ad6265SDimitry Andric 
656281ad6265SDimitry Andric   /// Combine a translation of the SCEV and the IV to create an expression that
656381ad6265SDimitry Andric   /// recovers a location's value.
656481ad6265SDimitry Andric   /// returns true if an expression was created.
656581ad6265SDimitry Andric   bool createIterCountExpr(const SCEV *S,
656681ad6265SDimitry Andric                            const SCEVDbgValueBuilder &IterationCount,
656781ad6265SDimitry Andric                            ScalarEvolution &SE) {
656881ad6265SDimitry Andric     // SCEVs for SSA values are most frquently of the form
656981ad6265SDimitry Andric     // {start,+,stride}, but sometimes they are ({start,+,stride} + %a + ..).
657081ad6265SDimitry Andric     // This is because %a is a PHI node that is not the IV. However, these
657181ad6265SDimitry Andric     // SCEVs have not been observed to result in debuginfo-lossy optimisations,
657281ad6265SDimitry Andric     // so its not expected this point will be reached.
657381ad6265SDimitry Andric     if (!isa<SCEVAddRecExpr>(S))
657481ad6265SDimitry Andric       return false;
657581ad6265SDimitry Andric 
657681ad6265SDimitry Andric     LLVM_DEBUG(dbgs() << "scev-salvage: Location to salvage SCEV: " << *S
657781ad6265SDimitry Andric                       << '\n');
657881ad6265SDimitry Andric 
657981ad6265SDimitry Andric     const auto *Rec = cast<SCEVAddRecExpr>(S);
658081ad6265SDimitry Andric     if (!Rec->isAffine())
658181ad6265SDimitry Andric       return false;
658281ad6265SDimitry Andric 
658381ad6265SDimitry Andric     if (S->getExpressionSize() > MaxSCEVSalvageExpressionSize)
658481ad6265SDimitry Andric       return false;
658581ad6265SDimitry Andric 
658681ad6265SDimitry Andric     // Initialise a new builder with the iteration count expression. In
658781ad6265SDimitry Andric     // combination with the value's SCEV this enables recovery.
658881ad6265SDimitry Andric     clone(IterationCount);
658981ad6265SDimitry Andric     if (!SCEVToValueExpr(*Rec, SE))
659081ad6265SDimitry Andric       return false;
659181ad6265SDimitry Andric 
659281ad6265SDimitry Andric     return true;
659381ad6265SDimitry Andric   }
659481ad6265SDimitry Andric 
6595fe6060f1SDimitry Andric   /// Convert a SCEV of a value to a DIExpression that is pushed onto the
6596fe6060f1SDimitry Andric   /// builder's expression stack. The stack should already contain an
6597fe6060f1SDimitry Andric   /// expression for the iteration count, so that it can be multiplied by
6598fe6060f1SDimitry Andric   /// the stride and added to the start.
6599fe6060f1SDimitry Andric   /// Components of the expression are omitted if they are an identity function.
6600fe6060f1SDimitry Andric   bool SCEVToIterCountExpr(const llvm::SCEVAddRecExpr &SAR,
6601fe6060f1SDimitry Andric                            ScalarEvolution &SE) {
6602fe6060f1SDimitry Andric     assert(SAR.isAffine() && "Expected affine SCEV");
6603fe6060f1SDimitry Andric     if (isa<SCEVAddRecExpr>(SAR.getStart())) {
6604fe6060f1SDimitry Andric       LLVM_DEBUG(dbgs() << "scev-salvage: IV SCEV. Unsupported nested AddRec: "
6605fe6060f1SDimitry Andric                         << SAR << '\n');
6606fe6060f1SDimitry Andric       return false;
6607fe6060f1SDimitry Andric     }
6608fe6060f1SDimitry Andric     const SCEV *Start = SAR.getStart();
6609fe6060f1SDimitry Andric     const SCEV *Stride = SAR.getStepRecurrence(SE);
6610fe6060f1SDimitry Andric 
6611fe6060f1SDimitry Andric     // Skip pushing arithmetic noops.
6612fe6060f1SDimitry Andric     if (!isIdentityFunction(llvm::dwarf::DW_OP_minus, Start)) {
6613fe6060f1SDimitry Andric       if (!pushSCEV(Start))
6614fe6060f1SDimitry Andric         return false;
6615fe6060f1SDimitry Andric       pushOperator(llvm::dwarf::DW_OP_minus);
6616fe6060f1SDimitry Andric     }
6617fe6060f1SDimitry Andric     if (!isIdentityFunction(llvm::dwarf::DW_OP_div, Stride)) {
6618fe6060f1SDimitry Andric       if (!pushSCEV(Stride))
6619fe6060f1SDimitry Andric         return false;
6620fe6060f1SDimitry Andric       pushOperator(llvm::dwarf::DW_OP_div);
6621fe6060f1SDimitry Andric     }
6622fe6060f1SDimitry Andric     return true;
6623fe6060f1SDimitry Andric   }
662481ad6265SDimitry Andric 
662581ad6265SDimitry Andric   // Append the current expression and locations to a location list and an
662681ad6265SDimitry Andric   // expression list. Modify the DW_OP_LLVM_arg indexes to account for
662781ad6265SDimitry Andric   // the locations already present in the destination list.
662881ad6265SDimitry Andric   void appendToVectors(SmallVectorImpl<uint64_t> &DestExpr,
662981ad6265SDimitry Andric                        SmallVectorImpl<Value *> &DestLocations) {
663081ad6265SDimitry Andric     assert(!DestLocations.empty() &&
663181ad6265SDimitry Andric            "Expected the locations vector to contain the IV");
663281ad6265SDimitry Andric     // The DWARF_OP_LLVM_arg arguments of the expression being appended must be
663381ad6265SDimitry Andric     // modified to account for the locations already in the destination vector.
663481ad6265SDimitry Andric     // All builders contain the IV as the first location op.
663581ad6265SDimitry Andric     assert(!LocationOps.empty() &&
663681ad6265SDimitry Andric            "Expected the location ops to contain the IV.");
663781ad6265SDimitry Andric     // DestIndexMap[n] contains the index in DestLocations for the nth
663881ad6265SDimitry Andric     // location in this SCEVDbgValueBuilder.
663981ad6265SDimitry Andric     SmallVector<uint64_t, 2> DestIndexMap;
664081ad6265SDimitry Andric     for (const auto &Op : LocationOps) {
664181ad6265SDimitry Andric       auto It = find(DestLocations, Op);
664281ad6265SDimitry Andric       if (It != DestLocations.end()) {
664381ad6265SDimitry Andric         // Location already exists in DestLocations, reuse existing ArgIndex.
664481ad6265SDimitry Andric         DestIndexMap.push_back(std::distance(DestLocations.begin(), It));
664581ad6265SDimitry Andric         continue;
664681ad6265SDimitry Andric       }
664781ad6265SDimitry Andric       // Location is not in DestLocations, add it.
664881ad6265SDimitry Andric       DestIndexMap.push_back(DestLocations.size());
664981ad6265SDimitry Andric       DestLocations.push_back(Op);
665081ad6265SDimitry Andric     }
665181ad6265SDimitry Andric 
665281ad6265SDimitry Andric     for (const auto &Op : expr_ops()) {
665381ad6265SDimitry Andric       if (Op.getOp() != dwarf::DW_OP_LLVM_arg) {
665481ad6265SDimitry Andric         Op.appendToVector(DestExpr);
665581ad6265SDimitry Andric         continue;
665681ad6265SDimitry Andric       }
665781ad6265SDimitry Andric 
665881ad6265SDimitry Andric       DestExpr.push_back(dwarf::DW_OP_LLVM_arg);
665981ad6265SDimitry Andric       // `DW_OP_LLVM_arg n` represents the nth LocationOp in this SCEV,
666081ad6265SDimitry Andric       // DestIndexMap[n] contains its new index in DestLocations.
666181ad6265SDimitry Andric       uint64_t NewIndex = DestIndexMap[Op.getArg(0)];
666281ad6265SDimitry Andric       DestExpr.push_back(NewIndex);
666381ad6265SDimitry Andric     }
666481ad6265SDimitry Andric   }
6665fe6060f1SDimitry Andric };
6666fe6060f1SDimitry Andric 
666781ad6265SDimitry Andric /// Holds all the required data to salvage a dbg.value using the pre-LSR SCEVs
666881ad6265SDimitry Andric /// and DIExpression.
6669fe6060f1SDimitry Andric struct DVIRecoveryRec {
667081ad6265SDimitry Andric   DVIRecoveryRec(DbgValueInst *DbgValue)
66717a6dacacSDimitry Andric       : DbgRef(DbgValue), Expr(DbgValue->getExpression()),
667281ad6265SDimitry Andric         HadLocationArgList(false) {}
66730fca6ea1SDimitry Andric   DVIRecoveryRec(DbgVariableRecord *DVR)
66740fca6ea1SDimitry Andric       : DbgRef(DVR), Expr(DVR->getExpression()), HadLocationArgList(false) {}
667581ad6265SDimitry Andric 
66760fca6ea1SDimitry Andric   PointerUnion<DbgValueInst *, DbgVariableRecord *> DbgRef;
6677fe6060f1SDimitry Andric   DIExpression *Expr;
667881ad6265SDimitry Andric   bool HadLocationArgList;
667981ad6265SDimitry Andric   SmallVector<WeakVH, 2> LocationOps;
668081ad6265SDimitry Andric   SmallVector<const llvm::SCEV *, 2> SCEVs;
668181ad6265SDimitry Andric   SmallVector<std::unique_ptr<SCEVDbgValueBuilder>, 2> RecoveryExprs;
668281ad6265SDimitry Andric 
668381ad6265SDimitry Andric   void clear() {
668481ad6265SDimitry Andric     for (auto &RE : RecoveryExprs)
668581ad6265SDimitry Andric       RE.reset();
668681ad6265SDimitry Andric     RecoveryExprs.clear();
668781ad6265SDimitry Andric   }
668881ad6265SDimitry Andric 
668981ad6265SDimitry Andric   ~DVIRecoveryRec() { clear(); }
6690fe6060f1SDimitry Andric };
6691349cc55cSDimitry Andric } // namespace
6692fe6060f1SDimitry Andric 
669381ad6265SDimitry Andric /// Returns the total number of DW_OP_llvm_arg operands in the expression.
669481ad6265SDimitry Andric /// This helps in determining if a DIArglist is necessary or can be omitted from
669581ad6265SDimitry Andric /// the dbg.value.
669681ad6265SDimitry Andric static unsigned numLLVMArgOps(SmallVectorImpl<uint64_t> &Expr) {
669781ad6265SDimitry Andric   auto expr_ops = ToDwarfOpIter(Expr);
669881ad6265SDimitry Andric   unsigned Count = 0;
669981ad6265SDimitry Andric   for (auto Op : expr_ops)
670081ad6265SDimitry Andric     if (Op.getOp() == dwarf::DW_OP_LLVM_arg)
670181ad6265SDimitry Andric       Count++;
670281ad6265SDimitry Andric   return Count;
6703fe6060f1SDimitry Andric }
6704fe6060f1SDimitry Andric 
670581ad6265SDimitry Andric /// Overwrites DVI with the location and Ops as the DIExpression. This will
670681ad6265SDimitry Andric /// create an invalid expression if Ops has any dwarf::DW_OP_llvm_arg operands,
670781ad6265SDimitry Andric /// because a DIArglist is not created for the first argument of the dbg.value.
67087a6dacacSDimitry Andric template <typename T>
67097a6dacacSDimitry Andric static void updateDVIWithLocation(T &DbgVal, Value *Location,
671081ad6265SDimitry Andric                                   SmallVectorImpl<uint64_t> &Ops) {
67117a6dacacSDimitry Andric   assert(numLLVMArgOps(Ops) == 0 && "Expected expression that does not "
67127a6dacacSDimitry Andric                                     "contain any DW_OP_llvm_arg operands.");
67137a6dacacSDimitry Andric   DbgVal.setRawLocation(ValueAsMetadata::get(Location));
67147a6dacacSDimitry Andric   DbgVal.setExpression(DIExpression::get(DbgVal.getContext(), Ops));
67157a6dacacSDimitry Andric   DbgVal.setExpression(DIExpression::get(DbgVal.getContext(), Ops));
6716349cc55cSDimitry Andric }
6717349cc55cSDimitry Andric 
671881ad6265SDimitry Andric /// Overwrite DVI with locations placed into a DIArglist.
67197a6dacacSDimitry Andric template <typename T>
67207a6dacacSDimitry Andric static void updateDVIWithLocations(T &DbgVal,
672181ad6265SDimitry Andric                                    SmallVectorImpl<Value *> &Locations,
672281ad6265SDimitry Andric                                    SmallVectorImpl<uint64_t> &Ops) {
672381ad6265SDimitry Andric   assert(numLLVMArgOps(Ops) != 0 &&
672481ad6265SDimitry Andric          "Expected expression that references DIArglist locations using "
672581ad6265SDimitry Andric          "DW_OP_llvm_arg operands.");
672681ad6265SDimitry Andric   SmallVector<ValueAsMetadata *, 3> MetadataLocs;
672781ad6265SDimitry Andric   for (Value *V : Locations)
672881ad6265SDimitry Andric     MetadataLocs.push_back(ValueAsMetadata::get(V));
672981ad6265SDimitry Andric   auto ValArrayRef = llvm::ArrayRef<llvm::ValueAsMetadata *>(MetadataLocs);
67307a6dacacSDimitry Andric   DbgVal.setRawLocation(llvm::DIArgList::get(DbgVal.getContext(), ValArrayRef));
67317a6dacacSDimitry Andric   DbgVal.setExpression(DIExpression::get(DbgVal.getContext(), Ops));
673281ad6265SDimitry Andric }
673381ad6265SDimitry Andric 
673481ad6265SDimitry Andric /// Write the new expression and new location ops for the dbg.value. If possible
673581ad6265SDimitry Andric /// reduce the szie of the dbg.value intrinsic by omitting DIArglist. This
673681ad6265SDimitry Andric /// can be omitted if:
673781ad6265SDimitry Andric /// 1. There is only a single location, refenced by a single DW_OP_llvm_arg.
673881ad6265SDimitry Andric /// 2. The DW_OP_LLVM_arg is the first operand in the expression.
673981ad6265SDimitry Andric static void UpdateDbgValueInst(DVIRecoveryRec &DVIRec,
674081ad6265SDimitry Andric                                SmallVectorImpl<Value *> &NewLocationOps,
674181ad6265SDimitry Andric                                SmallVectorImpl<uint64_t> &NewExpr) {
67427a6dacacSDimitry Andric   auto UpdateDbgValueInstImpl = [&](auto *DbgVal) {
674381ad6265SDimitry Andric     unsigned NumLLVMArgs = numLLVMArgOps(NewExpr);
674481ad6265SDimitry Andric     if (NumLLVMArgs == 0) {
674581ad6265SDimitry Andric       // Location assumed to be on the stack.
67467a6dacacSDimitry Andric       updateDVIWithLocation(*DbgVal, NewLocationOps[0], NewExpr);
674781ad6265SDimitry Andric     } else if (NumLLVMArgs == 1 && NewExpr[0] == dwarf::DW_OP_LLVM_arg) {
674881ad6265SDimitry Andric       // There is only a single DW_OP_llvm_arg at the start of the expression,
674981ad6265SDimitry Andric       // so it can be omitted along with DIArglist.
675081ad6265SDimitry Andric       assert(NewExpr[1] == 0 &&
675181ad6265SDimitry Andric              "Lone LLVM_arg in a DIExpression should refer to location-op 0.");
675281ad6265SDimitry Andric       llvm::SmallVector<uint64_t, 6> ShortenedOps(llvm::drop_begin(NewExpr, 2));
67537a6dacacSDimitry Andric       updateDVIWithLocation(*DbgVal, NewLocationOps[0], ShortenedOps);
675481ad6265SDimitry Andric     } else {
675581ad6265SDimitry Andric       // Multiple DW_OP_llvm_arg, so DIArgList is strictly necessary.
67567a6dacacSDimitry Andric       updateDVIWithLocations(*DbgVal, NewLocationOps, NewExpr);
675781ad6265SDimitry Andric     }
675881ad6265SDimitry Andric 
675981ad6265SDimitry Andric     // If the DIExpression was previously empty then add the stack terminator.
67607a6dacacSDimitry Andric     // Non-empty expressions have only had elements inserted into them and so
67617a6dacacSDimitry Andric     // the terminator should already be present e.g. stack_value or fragment.
67627a6dacacSDimitry Andric     DIExpression *SalvageExpr = DbgVal->getExpression();
676381ad6265SDimitry Andric     if (!DVIRec.Expr->isComplex() && SalvageExpr->isComplex()) {
67647a6dacacSDimitry Andric       SalvageExpr =
67657a6dacacSDimitry Andric           DIExpression::append(SalvageExpr, {dwarf::DW_OP_stack_value});
67667a6dacacSDimitry Andric       DbgVal->setExpression(SalvageExpr);
676781ad6265SDimitry Andric     }
67687a6dacacSDimitry Andric   };
67697a6dacacSDimitry Andric   if (isa<DbgValueInst *>(DVIRec.DbgRef))
67707a6dacacSDimitry Andric     UpdateDbgValueInstImpl(cast<DbgValueInst *>(DVIRec.DbgRef));
67717a6dacacSDimitry Andric   else
67720fca6ea1SDimitry Andric     UpdateDbgValueInstImpl(cast<DbgVariableRecord *>(DVIRec.DbgRef));
677381ad6265SDimitry Andric }
677481ad6265SDimitry Andric 
677506c3fb27SDimitry Andric /// Cached location ops may be erased during LSR, in which case a poison is
677681ad6265SDimitry Andric /// required when restoring from the cache. The type of that location is no
677706c3fb27SDimitry Andric /// longer available, so just use int8. The poison will be replaced by one or
677881ad6265SDimitry Andric /// more locations later when a SCEVDbgValueBuilder selects alternative
677981ad6265SDimitry Andric /// locations to use for the salvage.
678006c3fb27SDimitry Andric static Value *getValueOrPoison(WeakVH &VH, LLVMContext &C) {
678106c3fb27SDimitry Andric   return (VH) ? VH : PoisonValue::get(llvm::Type::getInt8Ty(C));
678281ad6265SDimitry Andric }
678381ad6265SDimitry Andric 
678481ad6265SDimitry Andric /// Restore the DVI's pre-LSR arguments. Substitute undef for any erased values.
678581ad6265SDimitry Andric static void restorePreTransformState(DVIRecoveryRec &DVIRec) {
67867a6dacacSDimitry Andric   auto RestorePreTransformStateImpl = [&](auto *DbgVal) {
678781ad6265SDimitry Andric     LLVM_DEBUG(dbgs() << "scev-salvage: restore dbg.value to pre-LSR state\n"
67887a6dacacSDimitry Andric                       << "scev-salvage: post-LSR: " << *DbgVal << '\n');
678981ad6265SDimitry Andric     assert(DVIRec.Expr && "Expected an expression");
67907a6dacacSDimitry Andric     DbgVal->setExpression(DVIRec.Expr);
679181ad6265SDimitry Andric 
679281ad6265SDimitry Andric     // Even a single location-op may be inside a DIArgList and referenced with
679381ad6265SDimitry Andric     // DW_OP_LLVM_arg, which is valid only with a DIArgList.
679481ad6265SDimitry Andric     if (!DVIRec.HadLocationArgList) {
679581ad6265SDimitry Andric       assert(DVIRec.LocationOps.size() == 1 &&
679681ad6265SDimitry Andric              "Unexpected number of location ops.");
679781ad6265SDimitry Andric       // LSR's unsuccessful salvage attempt may have added DIArgList, which in
67987a6dacacSDimitry Andric       // this case was not present before, so force the location back to a
67997a6dacacSDimitry Andric       // single uncontained Value.
680081ad6265SDimitry Andric       Value *CachedValue =
68017a6dacacSDimitry Andric           getValueOrPoison(DVIRec.LocationOps[0], DbgVal->getContext());
68027a6dacacSDimitry Andric       DbgVal->setRawLocation(ValueAsMetadata::get(CachedValue));
680381ad6265SDimitry Andric     } else {
680481ad6265SDimitry Andric       SmallVector<ValueAsMetadata *, 3> MetadataLocs;
680581ad6265SDimitry Andric       for (WeakVH VH : DVIRec.LocationOps) {
68067a6dacacSDimitry Andric         Value *CachedValue = getValueOrPoison(VH, DbgVal->getContext());
680781ad6265SDimitry Andric         MetadataLocs.push_back(ValueAsMetadata::get(CachedValue));
680881ad6265SDimitry Andric       }
680981ad6265SDimitry Andric       auto ValArrayRef = llvm::ArrayRef<llvm::ValueAsMetadata *>(MetadataLocs);
68107a6dacacSDimitry Andric       DbgVal->setRawLocation(
68117a6dacacSDimitry Andric           llvm::DIArgList::get(DbgVal->getContext(), ValArrayRef));
681281ad6265SDimitry Andric     }
68137a6dacacSDimitry Andric     LLVM_DEBUG(dbgs() << "scev-salvage: pre-LSR: " << *DbgVal << '\n');
68147a6dacacSDimitry Andric   };
68157a6dacacSDimitry Andric   if (isa<DbgValueInst *>(DVIRec.DbgRef))
68167a6dacacSDimitry Andric     RestorePreTransformStateImpl(cast<DbgValueInst *>(DVIRec.DbgRef));
68177a6dacacSDimitry Andric   else
68180fca6ea1SDimitry Andric     RestorePreTransformStateImpl(cast<DbgVariableRecord *>(DVIRec.DbgRef));
681981ad6265SDimitry Andric }
682081ad6265SDimitry Andric 
682181ad6265SDimitry Andric static bool SalvageDVI(llvm::Loop *L, ScalarEvolution &SE,
682281ad6265SDimitry Andric                        llvm::PHINode *LSRInductionVar, DVIRecoveryRec &DVIRec,
682381ad6265SDimitry Andric                        const SCEV *SCEVInductionVar,
682481ad6265SDimitry Andric                        SCEVDbgValueBuilder IterCountExpr) {
68257a6dacacSDimitry Andric 
68267a6dacacSDimitry Andric   if (isa<DbgValueInst *>(DVIRec.DbgRef)
68277a6dacacSDimitry Andric           ? !cast<DbgValueInst *>(DVIRec.DbgRef)->isKillLocation()
68280fca6ea1SDimitry Andric           : !cast<DbgVariableRecord *>(DVIRec.DbgRef)->isKillLocation())
682981ad6265SDimitry Andric     return false;
683081ad6265SDimitry Andric 
683181ad6265SDimitry Andric   // LSR may have caused several changes to the dbg.value in the failed salvage
683281ad6265SDimitry Andric   // attempt. So restore the DIExpression, the location ops and also the
683381ad6265SDimitry Andric   // location ops format, which is always DIArglist for multiple ops, but only
683481ad6265SDimitry Andric   // sometimes for a single op.
683581ad6265SDimitry Andric   restorePreTransformState(DVIRec);
683681ad6265SDimitry Andric 
683781ad6265SDimitry Andric   // LocationOpIndexMap[i] will store the post-LSR location index of
683881ad6265SDimitry Andric   // the non-optimised out location at pre-LSR index i.
683981ad6265SDimitry Andric   SmallVector<int64_t, 2> LocationOpIndexMap;
684081ad6265SDimitry Andric   LocationOpIndexMap.assign(DVIRec.LocationOps.size(), -1);
684181ad6265SDimitry Andric   SmallVector<Value *, 2> NewLocationOps;
684281ad6265SDimitry Andric   NewLocationOps.push_back(LSRInductionVar);
684381ad6265SDimitry Andric 
684481ad6265SDimitry Andric   for (unsigned i = 0; i < DVIRec.LocationOps.size(); i++) {
684581ad6265SDimitry Andric     WeakVH VH = DVIRec.LocationOps[i];
684681ad6265SDimitry Andric     // Place the locations not optimised out in the list first, avoiding
684781ad6265SDimitry Andric     // inserts later. The map is used to update the DIExpression's
684881ad6265SDimitry Andric     // DW_OP_LLVM_arg arguments as the expression is updated.
684981ad6265SDimitry Andric     if (VH && !isa<UndefValue>(VH)) {
685081ad6265SDimitry Andric       NewLocationOps.push_back(VH);
685181ad6265SDimitry Andric       LocationOpIndexMap[i] = NewLocationOps.size() - 1;
685281ad6265SDimitry Andric       LLVM_DEBUG(dbgs() << "scev-salvage: Location index " << i
685381ad6265SDimitry Andric                         << " now at index " << LocationOpIndexMap[i] << "\n");
685481ad6265SDimitry Andric       continue;
685581ad6265SDimitry Andric     }
685681ad6265SDimitry Andric 
685781ad6265SDimitry Andric     // It's possible that a value referred to in the SCEV may have been
685881ad6265SDimitry Andric     // optimised out by LSR.
685981ad6265SDimitry Andric     if (SE.containsErasedValue(DVIRec.SCEVs[i]) ||
686081ad6265SDimitry Andric         SE.containsUndefs(DVIRec.SCEVs[i])) {
686181ad6265SDimitry Andric       LLVM_DEBUG(dbgs() << "scev-salvage: SCEV for location at index: " << i
686281ad6265SDimitry Andric                         << " refers to a location that is now undef or erased. "
686381ad6265SDimitry Andric                            "Salvage abandoned.\n");
686481ad6265SDimitry Andric       return false;
686581ad6265SDimitry Andric     }
686681ad6265SDimitry Andric 
686781ad6265SDimitry Andric     LLVM_DEBUG(dbgs() << "scev-salvage: salvaging location at index " << i
686881ad6265SDimitry Andric                       << " with SCEV: " << *DVIRec.SCEVs[i] << "\n");
686981ad6265SDimitry Andric 
687081ad6265SDimitry Andric     DVIRec.RecoveryExprs[i] = std::make_unique<SCEVDbgValueBuilder>();
687181ad6265SDimitry Andric     SCEVDbgValueBuilder *SalvageExpr = DVIRec.RecoveryExprs[i].get();
687281ad6265SDimitry Andric 
687381ad6265SDimitry Andric     // Create an offset-based salvage expression if possible, as it requires
687481ad6265SDimitry Andric     // less DWARF ops than an iteration count-based expression.
6875bdd1243dSDimitry Andric     if (std::optional<APInt> Offset =
687681ad6265SDimitry Andric             SE.computeConstantDifference(DVIRec.SCEVs[i], SCEVInductionVar)) {
687706c3fb27SDimitry Andric       if (Offset->getSignificantBits() <= 64)
6878bdd1243dSDimitry Andric         SalvageExpr->createOffsetExpr(Offset->getSExtValue(), LSRInductionVar);
687981ad6265SDimitry Andric     } else if (!SalvageExpr->createIterCountExpr(DVIRec.SCEVs[i], IterCountExpr,
688081ad6265SDimitry Andric                                                  SE))
688181ad6265SDimitry Andric       return false;
688281ad6265SDimitry Andric   }
688381ad6265SDimitry Andric 
688481ad6265SDimitry Andric   // Merge the DbgValueBuilder generated expressions and the original
688581ad6265SDimitry Andric   // DIExpression, place the result into an new vector.
688681ad6265SDimitry Andric   SmallVector<uint64_t, 3> NewExpr;
688781ad6265SDimitry Andric   if (DVIRec.Expr->getNumElements() == 0) {
688881ad6265SDimitry Andric     assert(DVIRec.RecoveryExprs.size() == 1 &&
688981ad6265SDimitry Andric            "Expected only a single recovery expression for an empty "
689081ad6265SDimitry Andric            "DIExpression.");
689181ad6265SDimitry Andric     assert(DVIRec.RecoveryExprs[0] &&
689281ad6265SDimitry Andric            "Expected a SCEVDbgSalvageBuilder for location 0");
689381ad6265SDimitry Andric     SCEVDbgValueBuilder *B = DVIRec.RecoveryExprs[0].get();
689481ad6265SDimitry Andric     B->appendToVectors(NewExpr, NewLocationOps);
689581ad6265SDimitry Andric   }
689681ad6265SDimitry Andric   for (const auto &Op : DVIRec.Expr->expr_ops()) {
689781ad6265SDimitry Andric     // Most Ops needn't be updated.
689881ad6265SDimitry Andric     if (Op.getOp() != dwarf::DW_OP_LLVM_arg) {
689981ad6265SDimitry Andric       Op.appendToVector(NewExpr);
690081ad6265SDimitry Andric       continue;
690181ad6265SDimitry Andric     }
690281ad6265SDimitry Andric 
690381ad6265SDimitry Andric     uint64_t LocationArgIndex = Op.getArg(0);
690481ad6265SDimitry Andric     SCEVDbgValueBuilder *DbgBuilder =
690581ad6265SDimitry Andric         DVIRec.RecoveryExprs[LocationArgIndex].get();
690681ad6265SDimitry Andric     // The location doesn't have s SCEVDbgValueBuilder, so LSR did not
690781ad6265SDimitry Andric     // optimise it away. So just translate the argument to the updated
690881ad6265SDimitry Andric     // location index.
690981ad6265SDimitry Andric     if (!DbgBuilder) {
691081ad6265SDimitry Andric       NewExpr.push_back(dwarf::DW_OP_LLVM_arg);
691181ad6265SDimitry Andric       assert(LocationOpIndexMap[Op.getArg(0)] != -1 &&
691281ad6265SDimitry Andric              "Expected a positive index for the location-op position.");
691381ad6265SDimitry Andric       NewExpr.push_back(LocationOpIndexMap[Op.getArg(0)]);
691481ad6265SDimitry Andric       continue;
691581ad6265SDimitry Andric     }
691681ad6265SDimitry Andric     // The location has a recovery expression.
691781ad6265SDimitry Andric     DbgBuilder->appendToVectors(NewExpr, NewLocationOps);
691881ad6265SDimitry Andric   }
691981ad6265SDimitry Andric 
692081ad6265SDimitry Andric   UpdateDbgValueInst(DVIRec, NewLocationOps, NewExpr);
69217a6dacacSDimitry Andric   if (isa<DbgValueInst *>(DVIRec.DbgRef))
69227a6dacacSDimitry Andric     LLVM_DEBUG(dbgs() << "scev-salvage: Updated DVI: "
69237a6dacacSDimitry Andric                       << *cast<DbgValueInst *>(DVIRec.DbgRef) << "\n");
69247a6dacacSDimitry Andric   else
69257a6dacacSDimitry Andric     LLVM_DEBUG(dbgs() << "scev-salvage: Updated DVI: "
69260fca6ea1SDimitry Andric                       << *cast<DbgVariableRecord *>(DVIRec.DbgRef) << "\n");
692781ad6265SDimitry Andric   return true;
692881ad6265SDimitry Andric }
692981ad6265SDimitry Andric 
693081ad6265SDimitry Andric /// Obtain an expression for the iteration count, then attempt to salvage the
693181ad6265SDimitry Andric /// dbg.value intrinsics.
69327a6dacacSDimitry Andric static void DbgRewriteSalvageableDVIs(
69337a6dacacSDimitry Andric     llvm::Loop *L, ScalarEvolution &SE, llvm::PHINode *LSRInductionVar,
693481ad6265SDimitry Andric     SmallVector<std::unique_ptr<DVIRecoveryRec>, 2> &DVIToUpdate) {
6935fe6060f1SDimitry Andric   if (DVIToUpdate.empty())
6936349cc55cSDimitry Andric     return;
6937fe6060f1SDimitry Andric 
6938fe6060f1SDimitry Andric   const llvm::SCEV *SCEVInductionVar = SE.getSCEV(LSRInductionVar);
6939fe6060f1SDimitry Andric   assert(SCEVInductionVar &&
6940fe6060f1SDimitry Andric          "Anticipated a SCEV for the post-LSR induction variable");
6941fe6060f1SDimitry Andric 
6942fe6060f1SDimitry Andric   if (const SCEVAddRecExpr *IVAddRec =
6943fe6060f1SDimitry Andric           dyn_cast<SCEVAddRecExpr>(SCEVInductionVar)) {
69446e75b2fbSDimitry Andric     if (!IVAddRec->isAffine())
6945349cc55cSDimitry Andric       return;
69466e75b2fbSDimitry Andric 
694781ad6265SDimitry Andric     // Prevent translation using excessive resources.
6948349cc55cSDimitry Andric     if (IVAddRec->getExpressionSize() > MaxSCEVSalvageExpressionSize)
6949349cc55cSDimitry Andric       return;
6950349cc55cSDimitry Andric 
6951349cc55cSDimitry Andric     // The iteration count is required to recover location values.
6952fe6060f1SDimitry Andric     SCEVDbgValueBuilder IterCountExpr;
695381ad6265SDimitry Andric     IterCountExpr.pushLocation(LSRInductionVar);
6954fe6060f1SDimitry Andric     if (!IterCountExpr.SCEVToIterCountExpr(*IVAddRec, SE))
6955349cc55cSDimitry Andric       return;
6956fe6060f1SDimitry Andric 
6957fe6060f1SDimitry Andric     LLVM_DEBUG(dbgs() << "scev-salvage: IV SCEV: " << *SCEVInductionVar
6958fe6060f1SDimitry Andric                       << '\n');
6959fe6060f1SDimitry Andric 
6960fe6060f1SDimitry Andric     for (auto &DVIRec : DVIToUpdate) {
696181ad6265SDimitry Andric       SalvageDVI(L, SE, LSRInductionVar, *DVIRec, SCEVInductionVar,
696281ad6265SDimitry Andric                  IterCountExpr);
6963fe6060f1SDimitry Andric     }
6964349cc55cSDimitry Andric   }
6965fe6060f1SDimitry Andric }
6966fe6060f1SDimitry Andric 
6967fe6060f1SDimitry Andric /// Identify and cache salvageable DVI locations and expressions along with the
6968349cc55cSDimitry Andric /// corresponding SCEV(s). Also ensure that the DVI is not deleted between
6969349cc55cSDimitry Andric /// cacheing and salvaging.
697081ad6265SDimitry Andric static void DbgGatherSalvagableDVI(
697181ad6265SDimitry Andric     Loop *L, ScalarEvolution &SE,
697281ad6265SDimitry Andric     SmallVector<std::unique_ptr<DVIRecoveryRec>, 2> &SalvageableDVISCEVs,
6973fe6060f1SDimitry Andric     SmallSet<AssertingVH<DbgValueInst>, 2> &DVIHandles) {
6974bdd1243dSDimitry Andric   for (const auto &B : L->getBlocks()) {
6975e8d8bef9SDimitry Andric     for (auto &I : *B) {
69767a6dacacSDimitry Andric       auto ProcessDbgValue = [&](auto *DbgVal) -> bool {
697781ad6265SDimitry Andric         // Ensure that if any location op is undef that the dbg.vlue is not
697881ad6265SDimitry Andric         // cached.
69797a6dacacSDimitry Andric         if (DbgVal->isKillLocation())
69807a6dacacSDimitry Andric           return false;
6981349cc55cSDimitry Andric 
698281ad6265SDimitry Andric         // Check that the location op SCEVs are suitable for translation to
698381ad6265SDimitry Andric         // DIExpression.
698481ad6265SDimitry Andric         const auto &HasTranslatableLocationOps =
69857a6dacacSDimitry Andric             [&](const auto *DbgValToTranslate) -> bool {
69867a6dacacSDimitry Andric           for (const auto LocOp : DbgValToTranslate->location_ops()) {
698781ad6265SDimitry Andric             if (!LocOp)
698881ad6265SDimitry Andric               return false;
6989fe6060f1SDimitry Andric 
699081ad6265SDimitry Andric             if (!SE.isSCEVable(LocOp->getType()))
699181ad6265SDimitry Andric               return false;
6992fe6060f1SDimitry Andric 
699381ad6265SDimitry Andric             const SCEV *S = SE.getSCEV(LocOp);
6994349cc55cSDimitry Andric             if (SE.containsUndefs(S))
699581ad6265SDimitry Andric               return false;
699681ad6265SDimitry Andric           }
699781ad6265SDimitry Andric           return true;
699881ad6265SDimitry Andric         };
699981ad6265SDimitry Andric 
70007a6dacacSDimitry Andric         if (!HasTranslatableLocationOps(DbgVal))
70017a6dacacSDimitry Andric           return false;
7002349cc55cSDimitry Andric 
700381ad6265SDimitry Andric         std::unique_ptr<DVIRecoveryRec> NewRec =
70047a6dacacSDimitry Andric             std::make_unique<DVIRecoveryRec>(DbgVal);
70057a6dacacSDimitry Andric         // Each location Op may need a SCEVDbgValueBuilder in order to recover
70067a6dacacSDimitry Andric         // it. Pre-allocating a vector will enable quick lookups of the builder
70077a6dacacSDimitry Andric         // later during the salvage.
70087a6dacacSDimitry Andric         NewRec->RecoveryExprs.resize(DbgVal->getNumVariableLocationOps());
70097a6dacacSDimitry Andric         for (const auto LocOp : DbgVal->location_ops()) {
701081ad6265SDimitry Andric           NewRec->SCEVs.push_back(SE.getSCEV(LocOp));
701181ad6265SDimitry Andric           NewRec->LocationOps.push_back(LocOp);
70127a6dacacSDimitry Andric           NewRec->HadLocationArgList = DbgVal->hasArgList();
701381ad6265SDimitry Andric         }
701481ad6265SDimitry Andric         SalvageableDVISCEVs.push_back(std::move(NewRec));
70157a6dacacSDimitry Andric         return true;
70167a6dacacSDimitry Andric       };
70170fca6ea1SDimitry Andric       for (DbgVariableRecord &DVR : filterDbgVars(I.getDbgRecordRange())) {
70180fca6ea1SDimitry Andric         if (DVR.isDbgValue() || DVR.isDbgAssign())
70190fca6ea1SDimitry Andric           ProcessDbgValue(&DVR);
70207a6dacacSDimitry Andric       }
70217a6dacacSDimitry Andric       auto DVI = dyn_cast<DbgValueInst>(&I);
70227a6dacacSDimitry Andric       if (!DVI)
70237a6dacacSDimitry Andric         continue;
70247a6dacacSDimitry Andric       if (ProcessDbgValue(DVI))
7025fe6060f1SDimitry Andric         DVIHandles.insert(DVI);
7026e8d8bef9SDimitry Andric     }
7027e8d8bef9SDimitry Andric   }
7028e8d8bef9SDimitry Andric }
7029e8d8bef9SDimitry Andric 
7030fe6060f1SDimitry Andric /// Ideally pick the PHI IV inserted by ScalarEvolutionExpander. As a fallback
7031fe6060f1SDimitry Andric /// any PHi from the loop header is usable, but may have less chance of
7032fe6060f1SDimitry Andric /// surviving subsequent transforms.
7033fe6060f1SDimitry Andric static llvm::PHINode *GetInductionVariable(const Loop &L, ScalarEvolution &SE,
7034fe6060f1SDimitry Andric                                            const LSRInstance &LSR) {
7035349cc55cSDimitry Andric 
7036349cc55cSDimitry Andric   auto IsSuitableIV = [&](PHINode *P) {
7037349cc55cSDimitry Andric     if (!SE.isSCEVable(P->getType()))
7038349cc55cSDimitry Andric       return false;
7039349cc55cSDimitry Andric     if (const SCEVAddRecExpr *Rec = dyn_cast<SCEVAddRecExpr>(SE.getSCEV(P)))
7040349cc55cSDimitry Andric       return Rec->isAffine() && !SE.containsUndefs(SE.getSCEV(P));
7041349cc55cSDimitry Andric     return false;
7042349cc55cSDimitry Andric   };
7043349cc55cSDimitry Andric 
7044349cc55cSDimitry Andric   // For now, just pick the first IV that was generated and inserted by
7045349cc55cSDimitry Andric   // ScalarEvolution. Ideally pick an IV that is unlikely to be optimised away
7046349cc55cSDimitry Andric   // by subsequent transforms.
7047fe6060f1SDimitry Andric   for (const WeakVH &IV : LSR.getScalarEvolutionIVs()) {
7048fe6060f1SDimitry Andric     if (!IV)
7049e8d8bef9SDimitry Andric       continue;
7050fe6060f1SDimitry Andric 
7051349cc55cSDimitry Andric     // There should only be PHI node IVs.
7052349cc55cSDimitry Andric     PHINode *P = cast<PHINode>(&*IV);
7053349cc55cSDimitry Andric 
7054349cc55cSDimitry Andric     if (IsSuitableIV(P))
7055349cc55cSDimitry Andric       return P;
7056fe6060f1SDimitry Andric   }
7057fe6060f1SDimitry Andric 
7058349cc55cSDimitry Andric   for (PHINode &P : L.getHeader()->phis()) {
7059349cc55cSDimitry Andric     if (IsSuitableIV(&P))
7060349cc55cSDimitry Andric       return &P;
7061e8d8bef9SDimitry Andric   }
7062fe6060f1SDimitry Andric   return nullptr;
7063e8d8bef9SDimitry Andric }
7064e8d8bef9SDimitry Andric 
706506c3fb27SDimitry Andric static std::optional<std::tuple<PHINode *, PHINode *, const SCEV *, bool>>
7066bdd1243dSDimitry Andric canFoldTermCondOfLoop(Loop *L, ScalarEvolution &SE, DominatorTree &DT,
70670fca6ea1SDimitry Andric                       const LoopInfo &LI, const TargetTransformInfo &TTI) {
7068bdd1243dSDimitry Andric   if (!L->isInnermost()) {
7069bdd1243dSDimitry Andric     LLVM_DEBUG(dbgs() << "Cannot fold on non-innermost loop\n");
7070bdd1243dSDimitry Andric     return std::nullopt;
7071bdd1243dSDimitry Andric   }
7072bdd1243dSDimitry Andric   // Only inspect on simple loop structure
7073bdd1243dSDimitry Andric   if (!L->isLoopSimplifyForm()) {
7074bdd1243dSDimitry Andric     LLVM_DEBUG(dbgs() << "Cannot fold on non-simple loop\n");
7075bdd1243dSDimitry Andric     return std::nullopt;
7076bdd1243dSDimitry Andric   }
7077bdd1243dSDimitry Andric 
7078bdd1243dSDimitry Andric   if (!SE.hasLoopInvariantBackedgeTakenCount(L)) {
7079bdd1243dSDimitry Andric     LLVM_DEBUG(dbgs() << "Cannot fold on backedge that is loop variant\n");
7080bdd1243dSDimitry Andric     return std::nullopt;
7081bdd1243dSDimitry Andric   }
7082bdd1243dSDimitry Andric 
7083bdd1243dSDimitry Andric   BasicBlock *LoopLatch = L->getLoopLatch();
708406c3fb27SDimitry Andric   BranchInst *BI = dyn_cast<BranchInst>(LoopLatch->getTerminator());
708506c3fb27SDimitry Andric   if (!BI || BI->isUnconditional())
7086bdd1243dSDimitry Andric     return std::nullopt;
708706c3fb27SDimitry Andric   auto *TermCond = dyn_cast<ICmpInst>(BI->getCondition());
708806c3fb27SDimitry Andric   if (!TermCond) {
708906c3fb27SDimitry Andric     LLVM_DEBUG(
709006c3fb27SDimitry Andric         dbgs() << "Cannot fold on branching condition that is not an ICmpInst");
7091bdd1243dSDimitry Andric     return std::nullopt;
7092bdd1243dSDimitry Andric   }
7093bdd1243dSDimitry Andric   if (!TermCond->hasOneUse()) {
7094bdd1243dSDimitry Andric     LLVM_DEBUG(
7095bdd1243dSDimitry Andric         dbgs()
7096bdd1243dSDimitry Andric         << "Cannot replace terminating condition with more than one use\n");
7097bdd1243dSDimitry Andric     return std::nullopt;
7098bdd1243dSDimitry Andric   }
7099bdd1243dSDimitry Andric 
710006c3fb27SDimitry Andric   BinaryOperator *LHS = dyn_cast<BinaryOperator>(TermCond->getOperand(0));
710106c3fb27SDimitry Andric   Value *RHS = TermCond->getOperand(1);
710206c3fb27SDimitry Andric   if (!LHS || !L->isLoopInvariant(RHS))
710306c3fb27SDimitry Andric     // We could pattern match the inverse form of the icmp, but that is
710406c3fb27SDimitry Andric     // non-canonical, and this pass is running *very* late in the pipeline.
710506c3fb27SDimitry Andric     return std::nullopt;
7106bdd1243dSDimitry Andric 
710706c3fb27SDimitry Andric   // Find the IV used by the current exit condition.
710806c3fb27SDimitry Andric   PHINode *ToFold;
710906c3fb27SDimitry Andric   Value *ToFoldStart, *ToFoldStep;
711006c3fb27SDimitry Andric   if (!matchSimpleRecurrence(LHS, ToFold, ToFoldStart, ToFoldStep))
711106c3fb27SDimitry Andric     return std::nullopt;
7112bdd1243dSDimitry Andric 
71130fca6ea1SDimitry Andric   // Ensure the simple recurrence is a part of the current loop.
71140fca6ea1SDimitry Andric   if (ToFold->getParent() != L->getHeader())
71150fca6ea1SDimitry Andric     return std::nullopt;
71160fca6ea1SDimitry Andric 
711706c3fb27SDimitry Andric   // If that IV isn't dead after we rewrite the exit condition in terms of
711806c3fb27SDimitry Andric   // another IV, there's no point in doing the transform.
711906c3fb27SDimitry Andric   if (!isAlmostDeadIV(ToFold, LoopLatch, TermCond))
712006c3fb27SDimitry Andric     return std::nullopt;
7121bdd1243dSDimitry Andric 
71220fca6ea1SDimitry Andric   // Inserting instructions in the preheader has a runtime cost, scale
71230fca6ea1SDimitry Andric   // the allowed cost with the loops trip count as best we can.
71240fca6ea1SDimitry Andric   const unsigned ExpansionBudget = [&]() {
71250fca6ea1SDimitry Andric     unsigned Budget = 2 * SCEVCheapExpansionBudget;
71260fca6ea1SDimitry Andric     if (unsigned SmallTC = SE.getSmallConstantMaxTripCount(L))
71270fca6ea1SDimitry Andric       return std::min(Budget, SmallTC);
71280fca6ea1SDimitry Andric     if (std::optional<unsigned> SmallTC = getLoopEstimatedTripCount(L))
71290fca6ea1SDimitry Andric       return std::min(Budget, *SmallTC);
71300fca6ea1SDimitry Andric     // Unknown trip count, assume long running by default.
71310fca6ea1SDimitry Andric     return Budget;
71320fca6ea1SDimitry Andric   }();
71330fca6ea1SDimitry Andric 
7134bdd1243dSDimitry Andric   const SCEV *BECount = SE.getBackedgeTakenCount(L);
71350fca6ea1SDimitry Andric   const DataLayout &DL = L->getHeader()->getDataLayout();
7136bdd1243dSDimitry Andric   SCEVExpander Expander(SE, DL, "lsr_fold_term_cond");
7137bdd1243dSDimitry Andric 
7138bdd1243dSDimitry Andric   PHINode *ToHelpFold = nullptr;
7139bdd1243dSDimitry Andric   const SCEV *TermValueS = nullptr;
714006c3fb27SDimitry Andric   bool MustDropPoison = false;
71410fca6ea1SDimitry Andric   auto InsertPt = L->getLoopPreheader()->getTerminator();
7142bdd1243dSDimitry Andric   for (PHINode &PN : L->getHeader()->phis()) {
714306c3fb27SDimitry Andric     if (ToFold == &PN)
714406c3fb27SDimitry Andric       continue;
714506c3fb27SDimitry Andric 
7146bdd1243dSDimitry Andric     if (!SE.isSCEVable(PN.getType())) {
7147bdd1243dSDimitry Andric       LLVM_DEBUG(dbgs() << "IV of phi '" << PN
7148bdd1243dSDimitry Andric                         << "' is not SCEV-able, not qualified for the "
7149bdd1243dSDimitry Andric                            "terminating condition folding.\n");
7150bdd1243dSDimitry Andric       continue;
7151bdd1243dSDimitry Andric     }
715206c3fb27SDimitry Andric     const SCEVAddRecExpr *AddRec = dyn_cast<SCEVAddRecExpr>(SE.getSCEV(&PN));
7153bdd1243dSDimitry Andric     // Only speculate on affine AddRec
7154bdd1243dSDimitry Andric     if (!AddRec || !AddRec->isAffine()) {
7155bdd1243dSDimitry Andric       LLVM_DEBUG(dbgs() << "SCEV of phi '" << PN
7156bdd1243dSDimitry Andric                         << "' is not an affine add recursion, not qualified "
7157bdd1243dSDimitry Andric                            "for the terminating condition folding.\n");
7158bdd1243dSDimitry Andric       continue;
7159bdd1243dSDimitry Andric     }
7160bdd1243dSDimitry Andric 
716106c3fb27SDimitry Andric     // Check that we can compute the value of AddRec on the exiting iteration
716206c3fb27SDimitry Andric     // without soundness problems.  evaluateAtIteration internally needs
716306c3fb27SDimitry Andric     // to multiply the stride of the iteration number - which may wrap around.
716406c3fb27SDimitry Andric     // The issue here is subtle because computing the result accounting for
716506c3fb27SDimitry Andric     // wrap is insufficient. In order to use the result in an exit test, we
716606c3fb27SDimitry Andric     // must also know that AddRec doesn't take the same value on any previous
716706c3fb27SDimitry Andric     // iteration. The simplest case to consider is a candidate IV which is
716806c3fb27SDimitry Andric     // narrower than the trip count (and thus original IV), but this can
716906c3fb27SDimitry Andric     // also happen due to non-unit strides on the candidate IVs.
71707a6dacacSDimitry Andric     if (!AddRec->hasNoSelfWrap() ||
71717a6dacacSDimitry Andric         !SE.isKnownNonZero(AddRec->getStepRecurrence(SE)))
717206c3fb27SDimitry Andric       continue;
717306c3fb27SDimitry Andric 
717406c3fb27SDimitry Andric     const SCEVAddRecExpr *PostInc = AddRec->getPostIncExpr(SE);
717506c3fb27SDimitry Andric     const SCEV *TermValueSLocal = PostInc->evaluateAtIteration(BECount, SE);
717606c3fb27SDimitry Andric     if (!Expander.isSafeToExpand(TermValueSLocal)) {
717706c3fb27SDimitry Andric       LLVM_DEBUG(
717806c3fb27SDimitry Andric           dbgs() << "Is not safe to expand terminating value for phi node" << PN
717906c3fb27SDimitry Andric                  << "\n");
718006c3fb27SDimitry Andric       continue;
7181bdd1243dSDimitry Andric     }
718206c3fb27SDimitry Andric 
71830fca6ea1SDimitry Andric     if (Expander.isHighCostExpansion(TermValueSLocal, L, ExpansionBudget,
71840fca6ea1SDimitry Andric                                      &TTI, InsertPt)) {
71850fca6ea1SDimitry Andric       LLVM_DEBUG(
71860fca6ea1SDimitry Andric           dbgs() << "Is too expensive to expand terminating value for phi node"
71870fca6ea1SDimitry Andric                  << PN << "\n");
71880fca6ea1SDimitry Andric       continue;
71890fca6ea1SDimitry Andric     }
71900fca6ea1SDimitry Andric 
719106c3fb27SDimitry Andric     // The candidate IV may have been otherwise dead and poison from the
719206c3fb27SDimitry Andric     // very first iteration.  If we can't disprove that, we can't use the IV.
719306c3fb27SDimitry Andric     if (!mustExecuteUBIfPoisonOnPathTo(&PN, LoopLatch->getTerminator(), &DT)) {
719406c3fb27SDimitry Andric       LLVM_DEBUG(dbgs() << "Can not prove poison safety for IV "
719506c3fb27SDimitry Andric                         << PN << "\n");
719606c3fb27SDimitry Andric       continue;
719706c3fb27SDimitry Andric     }
719806c3fb27SDimitry Andric 
719906c3fb27SDimitry Andric     // The candidate IV may become poison on the last iteration.  If this
720006c3fb27SDimitry Andric     // value is not branched on, this is a well defined program.  We're
720106c3fb27SDimitry Andric     // about to add a new use to this IV, and we have to ensure we don't
720206c3fb27SDimitry Andric     // insert UB which didn't previously exist.
720306c3fb27SDimitry Andric     bool MustDropPoisonLocal = false;
720406c3fb27SDimitry Andric     Instruction *PostIncV =
720506c3fb27SDimitry Andric       cast<Instruction>(PN.getIncomingValueForBlock(LoopLatch));
720606c3fb27SDimitry Andric     if (!mustExecuteUBIfPoisonOnPathTo(PostIncV, LoopLatch->getTerminator(),
720706c3fb27SDimitry Andric                                        &DT)) {
720806c3fb27SDimitry Andric       LLVM_DEBUG(dbgs() << "Can not prove poison safety to insert use"
720906c3fb27SDimitry Andric                         << PN << "\n");
721006c3fb27SDimitry Andric 
721106c3fb27SDimitry Andric       // If this is a complex recurrance with multiple instructions computing
721206c3fb27SDimitry Andric       // the backedge value, we might need to strip poison flags from all of
721306c3fb27SDimitry Andric       // them.
721406c3fb27SDimitry Andric       if (PostIncV->getOperand(0) != &PN)
721506c3fb27SDimitry Andric         continue;
721606c3fb27SDimitry Andric 
721706c3fb27SDimitry Andric       // In order to perform the transform, we need to drop the poison generating
721806c3fb27SDimitry Andric       // flags on this instruction (if any).
721906c3fb27SDimitry Andric       MustDropPoisonLocal = PostIncV->hasPoisonGeneratingFlags();
722006c3fb27SDimitry Andric     }
722106c3fb27SDimitry Andric 
722206c3fb27SDimitry Andric     // We pick the last legal alternate IV.  We could expore choosing an optimal
722306c3fb27SDimitry Andric     // alternate IV if we had a decent heuristic to do so.
722406c3fb27SDimitry Andric     ToHelpFold = &PN;
722506c3fb27SDimitry Andric     TermValueS = TermValueSLocal;
722606c3fb27SDimitry Andric     MustDropPoison = MustDropPoisonLocal;
7227bdd1243dSDimitry Andric   }
7228bdd1243dSDimitry Andric 
7229bdd1243dSDimitry Andric   LLVM_DEBUG(if (ToFold && !ToHelpFold) dbgs()
7230bdd1243dSDimitry Andric                  << "Cannot find other AddRec IV to help folding\n";);
7231bdd1243dSDimitry Andric 
7232bdd1243dSDimitry Andric   LLVM_DEBUG(if (ToFold && ToHelpFold) dbgs()
7233bdd1243dSDimitry Andric              << "\nFound loop that can fold terminating condition\n"
7234bdd1243dSDimitry Andric              << "  BECount (SCEV): " << *SE.getBackedgeTakenCount(L) << "\n"
7235bdd1243dSDimitry Andric              << "  TermCond: " << *TermCond << "\n"
7236bdd1243dSDimitry Andric              << "  BrandInst: " << *BI << "\n"
7237bdd1243dSDimitry Andric              << "  ToFold: " << *ToFold << "\n"
7238bdd1243dSDimitry Andric              << "  ToHelpFold: " << *ToHelpFold << "\n");
7239bdd1243dSDimitry Andric 
7240bdd1243dSDimitry Andric   if (!ToFold || !ToHelpFold)
7241bdd1243dSDimitry Andric     return std::nullopt;
724206c3fb27SDimitry Andric   return std::make_tuple(ToFold, ToHelpFold, TermValueS, MustDropPoison);
7243bdd1243dSDimitry Andric }
7244bdd1243dSDimitry Andric 
72450b57cec5SDimitry Andric static bool ReduceLoopStrength(Loop *L, IVUsers &IU, ScalarEvolution &SE,
72460b57cec5SDimitry Andric                                DominatorTree &DT, LoopInfo &LI,
72470b57cec5SDimitry Andric                                const TargetTransformInfo &TTI,
72485ffd83dbSDimitry Andric                                AssumptionCache &AC, TargetLibraryInfo &TLI,
72495ffd83dbSDimitry Andric                                MemorySSA *MSSA) {
72500b57cec5SDimitry Andric 
7251fe6060f1SDimitry Andric   // Debug preservation - before we start removing anything identify which DVI
7252fe6060f1SDimitry Andric   // meet the salvageable criteria and store their DIExpression and SCEVs.
725381ad6265SDimitry Andric   SmallVector<std::unique_ptr<DVIRecoveryRec>, 2> SalvageableDVIRecords;
7254fe6060f1SDimitry Andric   SmallSet<AssertingVH<DbgValueInst>, 2> DVIHandles;
725581ad6265SDimitry Andric   DbgGatherSalvagableDVI(L, SE, SalvageableDVIRecords, DVIHandles);
7256fe6060f1SDimitry Andric 
72570b57cec5SDimitry Andric   bool Changed = false;
72585ffd83dbSDimitry Andric   std::unique_ptr<MemorySSAUpdater> MSSAU;
72595ffd83dbSDimitry Andric   if (MSSA)
72605ffd83dbSDimitry Andric     MSSAU = std::make_unique<MemorySSAUpdater>(MSSA);
72610b57cec5SDimitry Andric 
72620b57cec5SDimitry Andric   // Run the main LSR transformation.
7263fe6060f1SDimitry Andric   const LSRInstance &Reducer =
7264fe6060f1SDimitry Andric       LSRInstance(L, IU, SE, DT, LI, TTI, AC, TLI, MSSAU.get());
7265fe6060f1SDimitry Andric   Changed |= Reducer.getChanged();
7266e8d8bef9SDimitry Andric 
72670b57cec5SDimitry Andric   // Remove any extra phis created by processing inner loops.
72685ffd83dbSDimitry Andric   Changed |= DeleteDeadPHIs(L->getHeader(), &TLI, MSSAU.get());
72690b57cec5SDimitry Andric   if (EnablePhiElim && L->isLoopSimplifyForm()) {
72700b57cec5SDimitry Andric     SmallVector<WeakTrackingVH, 16> DeadInsts;
72710fca6ea1SDimitry Andric     const DataLayout &DL = L->getHeader()->getDataLayout();
7272e8d8bef9SDimitry Andric     SCEVExpander Rewriter(SE, DL, "lsr", false);
72730b57cec5SDimitry Andric #ifndef NDEBUG
72740b57cec5SDimitry Andric     Rewriter.setDebugType(DEBUG_TYPE);
72750b57cec5SDimitry Andric #endif
72760b57cec5SDimitry Andric     unsigned numFolded = Rewriter.replaceCongruentIVs(L, &DT, DeadInsts, &TTI);
72770fca6ea1SDimitry Andric     Rewriter.clear();
72780b57cec5SDimitry Andric     if (numFolded) {
72790b57cec5SDimitry Andric       Changed = true;
72805ffd83dbSDimitry Andric       RecursivelyDeleteTriviallyDeadInstructionsPermissive(DeadInsts, &TLI,
72815ffd83dbSDimitry Andric                                                            MSSAU.get());
72825ffd83dbSDimitry Andric       DeleteDeadPHIs(L->getHeader(), &TLI, MSSAU.get());
72830b57cec5SDimitry Andric     }
72840b57cec5SDimitry Andric   }
728581ad6265SDimitry Andric   // LSR may at times remove all uses of an induction variable from a loop.
728681ad6265SDimitry Andric   // The only remaining use is the PHI in the exit block.
728781ad6265SDimitry Andric   // When this is the case, if the exit value of the IV can be calculated using
728881ad6265SDimitry Andric   // SCEV, we can replace the exit block PHI with the final value of the IV and
728981ad6265SDimitry Andric   // skip the updates in each loop iteration.
7290753f127fSDimitry Andric   if (L->isRecursivelyLCSSAForm(DT, LI) && L->getExitBlock()) {
729181ad6265SDimitry Andric     SmallVector<WeakTrackingVH, 16> DeadInsts;
72920fca6ea1SDimitry Andric     const DataLayout &DL = L->getHeader()->getDataLayout();
7293bdd1243dSDimitry Andric     SCEVExpander Rewriter(SE, DL, "lsr", true);
729481ad6265SDimitry Andric     int Rewrites = rewriteLoopExitValues(L, &LI, &TLI, &SE, &TTI, Rewriter, &DT,
7295753f127fSDimitry Andric                                          UnusedIndVarInLoop, DeadInsts);
72960fca6ea1SDimitry Andric     Rewriter.clear();
729781ad6265SDimitry Andric     if (Rewrites) {
729881ad6265SDimitry Andric       Changed = true;
729981ad6265SDimitry Andric       RecursivelyDeleteTriviallyDeadInstructionsPermissive(DeadInsts, &TLI,
730081ad6265SDimitry Andric                                                            MSSAU.get());
730181ad6265SDimitry Andric       DeleteDeadPHIs(L->getHeader(), &TLI, MSSAU.get());
730281ad6265SDimitry Andric     }
730381ad6265SDimitry Andric   }
7304e8d8bef9SDimitry Andric 
73055f757f3fSDimitry Andric   const bool EnableFormTerm = [&] {
73065f757f3fSDimitry Andric     switch (AllowTerminatingConditionFoldingAfterLSR) {
73075f757f3fSDimitry Andric     case cl::BOU_TRUE:
73085f757f3fSDimitry Andric       return true;
73095f757f3fSDimitry Andric     case cl::BOU_FALSE:
73105f757f3fSDimitry Andric       return false;
73115f757f3fSDimitry Andric     case cl::BOU_UNSET:
73125f757f3fSDimitry Andric       return TTI.shouldFoldTerminatingConditionAfterLSR();
73135f757f3fSDimitry Andric     }
73145f757f3fSDimitry Andric     llvm_unreachable("Unhandled cl::boolOrDefault enum");
73155f757f3fSDimitry Andric   }();
73165f757f3fSDimitry Andric 
73175f757f3fSDimitry Andric   if (EnableFormTerm) {
73180fca6ea1SDimitry Andric     if (auto Opt = canFoldTermCondOfLoop(L, SE, DT, LI, TTI)) {
731906c3fb27SDimitry Andric       auto [ToFold, ToHelpFold, TermValueS, MustDrop] = *Opt;
7320bdd1243dSDimitry Andric 
7321bdd1243dSDimitry Andric       Changed = true;
7322bdd1243dSDimitry Andric       NumTermFold++;
7323bdd1243dSDimitry Andric 
7324bdd1243dSDimitry Andric       BasicBlock *LoopPreheader = L->getLoopPreheader();
7325bdd1243dSDimitry Andric       BasicBlock *LoopLatch = L->getLoopLatch();
7326bdd1243dSDimitry Andric 
7327bdd1243dSDimitry Andric       (void)ToFold;
7328bdd1243dSDimitry Andric       LLVM_DEBUG(dbgs() << "To fold phi-node:\n"
7329bdd1243dSDimitry Andric                         << *ToFold << "\n"
7330bdd1243dSDimitry Andric                         << "New term-cond phi-node:\n"
7331bdd1243dSDimitry Andric                         << *ToHelpFold << "\n");
7332bdd1243dSDimitry Andric 
7333bdd1243dSDimitry Andric       Value *StartValue = ToHelpFold->getIncomingValueForBlock(LoopPreheader);
7334bdd1243dSDimitry Andric       (void)StartValue;
7335bdd1243dSDimitry Andric       Value *LoopValue = ToHelpFold->getIncomingValueForBlock(LoopLatch);
7336bdd1243dSDimitry Andric 
733706c3fb27SDimitry Andric       // See comment in canFoldTermCondOfLoop on why this is sufficient.
733806c3fb27SDimitry Andric       if (MustDrop)
733906c3fb27SDimitry Andric         cast<Instruction>(LoopValue)->dropPoisonGeneratingFlags();
734006c3fb27SDimitry Andric 
7341bdd1243dSDimitry Andric       // SCEVExpander for both use in preheader and latch
73420fca6ea1SDimitry Andric       const DataLayout &DL = L->getHeader()->getDataLayout();
7343bdd1243dSDimitry Andric       SCEVExpander Expander(SE, DL, "lsr_fold_term_cond");
7344bdd1243dSDimitry Andric 
7345bdd1243dSDimitry Andric       assert(Expander.isSafeToExpand(TermValueS) &&
7346bdd1243dSDimitry Andric              "Terminating value was checked safe in canFoldTerminatingCondition");
7347bdd1243dSDimitry Andric 
73487a6dacacSDimitry Andric       // Create new terminating value at loop preheader
7349bdd1243dSDimitry Andric       Value *TermValue = Expander.expandCodeFor(TermValueS, ToHelpFold->getType(),
7350bdd1243dSDimitry Andric                                                 LoopPreheader->getTerminator());
7351bdd1243dSDimitry Andric 
7352bdd1243dSDimitry Andric       LLVM_DEBUG(dbgs() << "Start value of new term-cond phi-node:\n"
7353bdd1243dSDimitry Andric                         << *StartValue << "\n"
7354bdd1243dSDimitry Andric                         << "Terminating value of new term-cond phi-node:\n"
7355bdd1243dSDimitry Andric                         << *TermValue << "\n");
7356bdd1243dSDimitry Andric 
7357bdd1243dSDimitry Andric       // Create new terminating condition at loop latch
7358bdd1243dSDimitry Andric       BranchInst *BI = cast<BranchInst>(LoopLatch->getTerminator());
7359bdd1243dSDimitry Andric       ICmpInst *OldTermCond = cast<ICmpInst>(BI->getCondition());
7360bdd1243dSDimitry Andric       IRBuilder<> LatchBuilder(LoopLatch->getTerminator());
736106c3fb27SDimitry Andric       Value *NewTermCond =
736206c3fb27SDimitry Andric           LatchBuilder.CreateICmp(CmpInst::ICMP_EQ, LoopValue, TermValue,
7363bdd1243dSDimitry Andric                                   "lsr_fold_term_cond.replaced_term_cond");
736406c3fb27SDimitry Andric       // Swap successors to exit loop body if IV equals to new TermValue
736506c3fb27SDimitry Andric       if (BI->getSuccessor(0) == L->getHeader())
736606c3fb27SDimitry Andric         BI->swapSuccessors();
7367bdd1243dSDimitry Andric 
7368bdd1243dSDimitry Andric       LLVM_DEBUG(dbgs() << "Old term-cond:\n"
7369bdd1243dSDimitry Andric                         << *OldTermCond << "\n"
7370cb14a3feSDimitry Andric                         << "New term-cond:\n" << *NewTermCond << "\n");
7371bdd1243dSDimitry Andric 
7372bdd1243dSDimitry Andric       BI->setCondition(NewTermCond);
7373bdd1243dSDimitry Andric 
73740fca6ea1SDimitry Andric       Expander.clear();
7375bdd1243dSDimitry Andric       OldTermCond->eraseFromParent();
7376bdd1243dSDimitry Andric       DeleteDeadPHIs(L->getHeader(), &TLI, MSSAU.get());
7377bdd1243dSDimitry Andric     }
7378bdd1243dSDimitry Andric   }
7379bdd1243dSDimitry Andric 
738081ad6265SDimitry Andric   if (SalvageableDVIRecords.empty())
7381fe6060f1SDimitry Andric     return Changed;
7382e8d8bef9SDimitry Andric 
7383fe6060f1SDimitry Andric   // Obtain relevant IVs and attempt to rewrite the salvageable DVIs with
7384fe6060f1SDimitry Andric   // expressions composed using the derived iteration count.
7385fe6060f1SDimitry Andric   // TODO: Allow for multiple IV references for nested AddRecSCEVs
7386bdd1243dSDimitry Andric   for (const auto &L : LI) {
7387fe6060f1SDimitry Andric     if (llvm::PHINode *IV = GetInductionVariable(*L, SE, Reducer))
738881ad6265SDimitry Andric       DbgRewriteSalvageableDVIs(L, SE, IV, SalvageableDVIRecords);
7389fe6060f1SDimitry Andric     else {
7390fe6060f1SDimitry Andric       LLVM_DEBUG(dbgs() << "scev-salvage: SCEV salvaging not possible. An IV "
7391fe6060f1SDimitry Andric                            "could not be identified.\n");
7392fe6060f1SDimitry Andric     }
7393fe6060f1SDimitry Andric   }
7394fe6060f1SDimitry Andric 
739581ad6265SDimitry Andric   for (auto &Rec : SalvageableDVIRecords)
739681ad6265SDimitry Andric     Rec->clear();
739781ad6265SDimitry Andric   SalvageableDVIRecords.clear();
7398fe6060f1SDimitry Andric   DVIHandles.clear();
73990b57cec5SDimitry Andric   return Changed;
74000b57cec5SDimitry Andric }
74010b57cec5SDimitry Andric 
74020b57cec5SDimitry Andric bool LoopStrengthReduce::runOnLoop(Loop *L, LPPassManager & /*LPM*/) {
74030b57cec5SDimitry Andric   if (skipLoop(L))
74040b57cec5SDimitry Andric     return false;
74050b57cec5SDimitry Andric 
74060b57cec5SDimitry Andric   auto &IU = getAnalysis<IVUsersWrapperPass>().getIU();
74070b57cec5SDimitry Andric   auto &SE = getAnalysis<ScalarEvolutionWrapperPass>().getSE();
74080b57cec5SDimitry Andric   auto &DT = getAnalysis<DominatorTreeWrapperPass>().getDomTree();
74090b57cec5SDimitry Andric   auto &LI = getAnalysis<LoopInfoWrapperPass>().getLoopInfo();
74100b57cec5SDimitry Andric   const auto &TTI = getAnalysis<TargetTransformInfoWrapperPass>().getTTI(
74110b57cec5SDimitry Andric       *L->getHeader()->getParent());
74120b57cec5SDimitry Andric   auto &AC = getAnalysis<AssumptionCacheTracker>().getAssumptionCache(
74130b57cec5SDimitry Andric       *L->getHeader()->getParent());
74145ffd83dbSDimitry Andric   auto &TLI = getAnalysis<TargetLibraryInfoWrapperPass>().getTLI(
74158bcb0991SDimitry Andric       *L->getHeader()->getParent());
74165ffd83dbSDimitry Andric   auto *MSSAAnalysis = getAnalysisIfAvailable<MemorySSAWrapperPass>();
74175ffd83dbSDimitry Andric   MemorySSA *MSSA = nullptr;
74185ffd83dbSDimitry Andric   if (MSSAAnalysis)
74195ffd83dbSDimitry Andric     MSSA = &MSSAAnalysis->getMSSA();
74205ffd83dbSDimitry Andric   return ReduceLoopStrength(L, IU, SE, DT, LI, TTI, AC, TLI, MSSA);
74210b57cec5SDimitry Andric }
74220b57cec5SDimitry Andric 
74230b57cec5SDimitry Andric PreservedAnalyses LoopStrengthReducePass::run(Loop &L, LoopAnalysisManager &AM,
74240b57cec5SDimitry Andric                                               LoopStandardAnalysisResults &AR,
74250b57cec5SDimitry Andric                                               LPMUpdater &) {
74260b57cec5SDimitry Andric   if (!ReduceLoopStrength(&L, AM.getResult<IVUsersAnalysis>(L, AR), AR.SE,
74275ffd83dbSDimitry Andric                           AR.DT, AR.LI, AR.TTI, AR.AC, AR.TLI, AR.MSSA))
74280b57cec5SDimitry Andric     return PreservedAnalyses::all();
74290b57cec5SDimitry Andric 
74305ffd83dbSDimitry Andric   auto PA = getLoopPassPreservedAnalyses();
74315ffd83dbSDimitry Andric   if (AR.MSSA)
74325ffd83dbSDimitry Andric     PA.preserve<MemorySSAAnalysis>();
74335ffd83dbSDimitry Andric   return PA;
74340b57cec5SDimitry Andric }
74350b57cec5SDimitry Andric 
74360b57cec5SDimitry Andric char LoopStrengthReduce::ID = 0;
74370b57cec5SDimitry Andric 
74380b57cec5SDimitry Andric INITIALIZE_PASS_BEGIN(LoopStrengthReduce, "loop-reduce",
74390b57cec5SDimitry Andric                       "Loop Strength Reduction", false, false)
74400b57cec5SDimitry Andric INITIALIZE_PASS_DEPENDENCY(TargetTransformInfoWrapperPass)
74410b57cec5SDimitry Andric INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass)
74420b57cec5SDimitry Andric INITIALIZE_PASS_DEPENDENCY(ScalarEvolutionWrapperPass)
74430b57cec5SDimitry Andric INITIALIZE_PASS_DEPENDENCY(IVUsersWrapperPass)
74440b57cec5SDimitry Andric INITIALIZE_PASS_DEPENDENCY(LoopInfoWrapperPass)
74450b57cec5SDimitry Andric INITIALIZE_PASS_DEPENDENCY(LoopSimplify)
74460b57cec5SDimitry Andric INITIALIZE_PASS_END(LoopStrengthReduce, "loop-reduce",
74470b57cec5SDimitry Andric                     "Loop Strength Reduction", false, false)
74480b57cec5SDimitry Andric 
74490b57cec5SDimitry Andric Pass *llvm::createLoopStrengthReducePass() { return new LoopStrengthReduce(); }
7450