10b57cec5SDimitry Andric //===- StraightLineStrengthReduce.cpp - -----------------------------------===// 20b57cec5SDimitry Andric // 30b57cec5SDimitry Andric // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 40b57cec5SDimitry Andric // See https://llvm.org/LICENSE.txt for license information. 50b57cec5SDimitry Andric // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 60b57cec5SDimitry Andric // 70b57cec5SDimitry Andric //===----------------------------------------------------------------------===// 80b57cec5SDimitry Andric // 90b57cec5SDimitry Andric // This file implements straight-line strength reduction (SLSR). Unlike loop 100b57cec5SDimitry Andric // strength reduction, this algorithm is designed to reduce arithmetic 110b57cec5SDimitry Andric // redundancy in straight-line code instead of loops. It has proven to be 120b57cec5SDimitry Andric // effective in simplifying arithmetic statements derived from an unrolled loop. 130b57cec5SDimitry Andric // It can also simplify the logic of SeparateConstOffsetFromGEP. 140b57cec5SDimitry Andric // 150b57cec5SDimitry Andric // There are many optimizations we can perform in the domain of SLSR. This file 160b57cec5SDimitry Andric // for now contains only an initial step. Specifically, we look for strength 170b57cec5SDimitry Andric // reduction candidates in the following forms: 180b57cec5SDimitry Andric // 190b57cec5SDimitry Andric // Form 1: B + i * S 200b57cec5SDimitry Andric // Form 2: (B + i) * S 210b57cec5SDimitry Andric // Form 3: &B[i * S] 220b57cec5SDimitry Andric // 230b57cec5SDimitry Andric // where S is an integer variable, and i is a constant integer. If we found two 240b57cec5SDimitry Andric // candidates S1 and S2 in the same form and S1 dominates S2, we may rewrite S2 250b57cec5SDimitry Andric // in a simpler way with respect to S1. For example, 260b57cec5SDimitry Andric // 270b57cec5SDimitry Andric // S1: X = B + i * S 280b57cec5SDimitry Andric // S2: Y = B + i' * S => X + (i' - i) * S 290b57cec5SDimitry Andric // 300b57cec5SDimitry Andric // S1: X = (B + i) * S 310b57cec5SDimitry Andric // S2: Y = (B + i') * S => X + (i' - i) * S 320b57cec5SDimitry Andric // 330b57cec5SDimitry Andric // S1: X = &B[i * S] 340b57cec5SDimitry Andric // S2: Y = &B[i' * S] => &X[(i' - i) * S] 350b57cec5SDimitry Andric // 360b57cec5SDimitry Andric // Note: (i' - i) * S is folded to the extent possible. 370b57cec5SDimitry Andric // 380b57cec5SDimitry Andric // This rewriting is in general a good idea. The code patterns we focus on 390b57cec5SDimitry Andric // usually come from loop unrolling, so (i' - i) * S is likely the same 400b57cec5SDimitry Andric // across iterations and can be reused. When that happens, the optimized form 410b57cec5SDimitry Andric // takes only one add starting from the second iteration. 420b57cec5SDimitry Andric // 430b57cec5SDimitry Andric // When such rewriting is possible, we call S1 a "basis" of S2. When S2 has 440b57cec5SDimitry Andric // multiple bases, we choose to rewrite S2 with respect to its "immediate" 450b57cec5SDimitry Andric // basis, the basis that is the closest ancestor in the dominator tree. 460b57cec5SDimitry Andric // 470b57cec5SDimitry Andric // TODO: 480b57cec5SDimitry Andric // 490b57cec5SDimitry Andric // - Floating point arithmetics when fast math is enabled. 500b57cec5SDimitry Andric // 510b57cec5SDimitry Andric // - SLSR may decrease ILP at the architecture level. Targets that are very 520b57cec5SDimitry Andric // sensitive to ILP may want to disable it. Having SLSR to consider ILP is 530b57cec5SDimitry Andric // left as future work. 540b57cec5SDimitry Andric // 550b57cec5SDimitry Andric // - When (i' - i) is constant but i and i' are not, we could still perform 560b57cec5SDimitry Andric // SLSR. 570b57cec5SDimitry Andric 58e8d8bef9SDimitry Andric #include "llvm/Transforms/Scalar/StraightLineStrengthReduce.h" 590b57cec5SDimitry Andric #include "llvm/ADT/APInt.h" 600b57cec5SDimitry Andric #include "llvm/ADT/DepthFirstIterator.h" 610b57cec5SDimitry Andric #include "llvm/ADT/SmallVector.h" 620b57cec5SDimitry Andric #include "llvm/Analysis/ScalarEvolution.h" 630b57cec5SDimitry Andric #include "llvm/Analysis/TargetTransformInfo.h" 640b57cec5SDimitry Andric #include "llvm/Analysis/ValueTracking.h" 650b57cec5SDimitry Andric #include "llvm/IR/Constants.h" 660b57cec5SDimitry Andric #include "llvm/IR/DataLayout.h" 670b57cec5SDimitry Andric #include "llvm/IR/DerivedTypes.h" 680b57cec5SDimitry Andric #include "llvm/IR/Dominators.h" 690b57cec5SDimitry Andric #include "llvm/IR/GetElementPtrTypeIterator.h" 700b57cec5SDimitry Andric #include "llvm/IR/IRBuilder.h" 710b57cec5SDimitry Andric #include "llvm/IR/Instruction.h" 720b57cec5SDimitry Andric #include "llvm/IR/Instructions.h" 730b57cec5SDimitry Andric #include "llvm/IR/Module.h" 740b57cec5SDimitry Andric #include "llvm/IR/Operator.h" 750b57cec5SDimitry Andric #include "llvm/IR/PatternMatch.h" 760b57cec5SDimitry Andric #include "llvm/IR/Type.h" 770b57cec5SDimitry Andric #include "llvm/IR/Value.h" 78480093f4SDimitry Andric #include "llvm/InitializePasses.h" 790b57cec5SDimitry Andric #include "llvm/Pass.h" 800b57cec5SDimitry Andric #include "llvm/Support/Casting.h" 810b57cec5SDimitry Andric #include "llvm/Support/ErrorHandling.h" 820b57cec5SDimitry Andric #include "llvm/Transforms/Scalar.h" 83480093f4SDimitry Andric #include "llvm/Transforms/Utils/Local.h" 840b57cec5SDimitry Andric #include <cassert> 850b57cec5SDimitry Andric #include <cstdint> 860b57cec5SDimitry Andric #include <limits> 870b57cec5SDimitry Andric #include <list> 880b57cec5SDimitry Andric #include <vector> 890b57cec5SDimitry Andric 900b57cec5SDimitry Andric using namespace llvm; 910b57cec5SDimitry Andric using namespace PatternMatch; 920b57cec5SDimitry Andric 930b57cec5SDimitry Andric static const unsigned UnknownAddressSpace = 940b57cec5SDimitry Andric std::numeric_limits<unsigned>::max(); 950b57cec5SDimitry Andric 960b57cec5SDimitry Andric namespace { 970b57cec5SDimitry Andric 98e8d8bef9SDimitry Andric class StraightLineStrengthReduceLegacyPass : public FunctionPass { 99e8d8bef9SDimitry Andric const DataLayout *DL = nullptr; 100e8d8bef9SDimitry Andric 1010b57cec5SDimitry Andric public: 102e8d8bef9SDimitry Andric static char ID; 103e8d8bef9SDimitry Andric 104e8d8bef9SDimitry Andric StraightLineStrengthReduceLegacyPass() : FunctionPass(ID) { 105e8d8bef9SDimitry Andric initializeStraightLineStrengthReduceLegacyPassPass( 106e8d8bef9SDimitry Andric *PassRegistry::getPassRegistry()); 107e8d8bef9SDimitry Andric } 108e8d8bef9SDimitry Andric 109e8d8bef9SDimitry Andric void getAnalysisUsage(AnalysisUsage &AU) const override { 110e8d8bef9SDimitry Andric AU.addRequired<DominatorTreeWrapperPass>(); 111e8d8bef9SDimitry Andric AU.addRequired<ScalarEvolutionWrapperPass>(); 112e8d8bef9SDimitry Andric AU.addRequired<TargetTransformInfoWrapperPass>(); 113e8d8bef9SDimitry Andric // We do not modify the shape of the CFG. 114e8d8bef9SDimitry Andric AU.setPreservesCFG(); 115e8d8bef9SDimitry Andric } 116e8d8bef9SDimitry Andric 117e8d8bef9SDimitry Andric bool doInitialization(Module &M) override { 118e8d8bef9SDimitry Andric DL = &M.getDataLayout(); 119e8d8bef9SDimitry Andric return false; 120e8d8bef9SDimitry Andric } 121e8d8bef9SDimitry Andric 122e8d8bef9SDimitry Andric bool runOnFunction(Function &F) override; 123e8d8bef9SDimitry Andric }; 124e8d8bef9SDimitry Andric 125e8d8bef9SDimitry Andric class StraightLineStrengthReduce { 126e8d8bef9SDimitry Andric public: 127e8d8bef9SDimitry Andric StraightLineStrengthReduce(const DataLayout *DL, DominatorTree *DT, 128e8d8bef9SDimitry Andric ScalarEvolution *SE, TargetTransformInfo *TTI) 129e8d8bef9SDimitry Andric : DL(DL), DT(DT), SE(SE), TTI(TTI) {} 130e8d8bef9SDimitry Andric 1310b57cec5SDimitry Andric // SLSR candidate. Such a candidate must be in one of the forms described in 1320b57cec5SDimitry Andric // the header comments. 1330b57cec5SDimitry Andric struct Candidate { 1340b57cec5SDimitry Andric enum Kind { 1350b57cec5SDimitry Andric Invalid, // reserved for the default constructor 1360b57cec5SDimitry Andric Add, // B + i * S 1370b57cec5SDimitry Andric Mul, // (B + i) * S 1380b57cec5SDimitry Andric GEP, // &B[..][i * S][..] 1390b57cec5SDimitry Andric }; 1400b57cec5SDimitry Andric 1410b57cec5SDimitry Andric Candidate() = default; 1420b57cec5SDimitry Andric Candidate(Kind CT, const SCEV *B, ConstantInt *Idx, Value *S, 1430b57cec5SDimitry Andric Instruction *I) 1440b57cec5SDimitry Andric : CandidateKind(CT), Base(B), Index(Idx), Stride(S), Ins(I) {} 1450b57cec5SDimitry Andric 1460b57cec5SDimitry Andric Kind CandidateKind = Invalid; 1470b57cec5SDimitry Andric 1480b57cec5SDimitry Andric const SCEV *Base = nullptr; 1490b57cec5SDimitry Andric 1500b57cec5SDimitry Andric // Note that Index and Stride of a GEP candidate do not necessarily have the 1510b57cec5SDimitry Andric // same integer type. In that case, during rewriting, Stride will be 1520b57cec5SDimitry Andric // sign-extended or truncated to Index's type. 1530b57cec5SDimitry Andric ConstantInt *Index = nullptr; 1540b57cec5SDimitry Andric 1550b57cec5SDimitry Andric Value *Stride = nullptr; 1560b57cec5SDimitry Andric 1570b57cec5SDimitry Andric // The instruction this candidate corresponds to. It helps us to rewrite a 1580b57cec5SDimitry Andric // candidate with respect to its immediate basis. Note that one instruction 1590b57cec5SDimitry Andric // can correspond to multiple candidates depending on how you associate the 1600b57cec5SDimitry Andric // expression. For instance, 1610b57cec5SDimitry Andric // 1620b57cec5SDimitry Andric // (a + 1) * (b + 2) 1630b57cec5SDimitry Andric // 1640b57cec5SDimitry Andric // can be treated as 1650b57cec5SDimitry Andric // 1660b57cec5SDimitry Andric // <Base: a, Index: 1, Stride: b + 2> 1670b57cec5SDimitry Andric // 1680b57cec5SDimitry Andric // or 1690b57cec5SDimitry Andric // 1700b57cec5SDimitry Andric // <Base: b, Index: 2, Stride: a + 1> 1710b57cec5SDimitry Andric Instruction *Ins = nullptr; 1720b57cec5SDimitry Andric 1730b57cec5SDimitry Andric // Points to the immediate basis of this candidate, or nullptr if we cannot 1740b57cec5SDimitry Andric // find any basis for this candidate. 1750b57cec5SDimitry Andric Candidate *Basis = nullptr; 1760b57cec5SDimitry Andric }; 1770b57cec5SDimitry Andric 178e8d8bef9SDimitry Andric bool runOnFunction(Function &F); 1790b57cec5SDimitry Andric 1800b57cec5SDimitry Andric private: 1810b57cec5SDimitry Andric // Returns true if Basis is a basis for C, i.e., Basis dominates C and they 1820b57cec5SDimitry Andric // share the same base and stride. 1830b57cec5SDimitry Andric bool isBasisFor(const Candidate &Basis, const Candidate &C); 1840b57cec5SDimitry Andric 1850b57cec5SDimitry Andric // Returns whether the candidate can be folded into an addressing mode. 1860b57cec5SDimitry Andric bool isFoldable(const Candidate &C, TargetTransformInfo *TTI, 1870b57cec5SDimitry Andric const DataLayout *DL); 1880b57cec5SDimitry Andric 1890b57cec5SDimitry Andric // Returns true if C is already in a simplest form and not worth being 1900b57cec5SDimitry Andric // rewritten. 1910b57cec5SDimitry Andric bool isSimplestForm(const Candidate &C); 1920b57cec5SDimitry Andric 1930b57cec5SDimitry Andric // Checks whether I is in a candidate form. If so, adds all the matching forms 1940b57cec5SDimitry Andric // to Candidates, and tries to find the immediate basis for each of them. 1950b57cec5SDimitry Andric void allocateCandidatesAndFindBasis(Instruction *I); 1960b57cec5SDimitry Andric 1970b57cec5SDimitry Andric // Allocate candidates and find bases for Add instructions. 1980b57cec5SDimitry Andric void allocateCandidatesAndFindBasisForAdd(Instruction *I); 1990b57cec5SDimitry Andric 2000b57cec5SDimitry Andric // Given I = LHS + RHS, factors RHS into i * S and makes (LHS + i * S) a 2010b57cec5SDimitry Andric // candidate. 2020b57cec5SDimitry Andric void allocateCandidatesAndFindBasisForAdd(Value *LHS, Value *RHS, 2030b57cec5SDimitry Andric Instruction *I); 2040b57cec5SDimitry Andric // Allocate candidates and find bases for Mul instructions. 2050b57cec5SDimitry Andric void allocateCandidatesAndFindBasisForMul(Instruction *I); 2060b57cec5SDimitry Andric 2070b57cec5SDimitry Andric // Splits LHS into Base + Index and, if succeeds, calls 2080b57cec5SDimitry Andric // allocateCandidatesAndFindBasis. 2090b57cec5SDimitry Andric void allocateCandidatesAndFindBasisForMul(Value *LHS, Value *RHS, 2100b57cec5SDimitry Andric Instruction *I); 2110b57cec5SDimitry Andric 2120b57cec5SDimitry Andric // Allocate candidates and find bases for GetElementPtr instructions. 2130b57cec5SDimitry Andric void allocateCandidatesAndFindBasisForGEP(GetElementPtrInst *GEP); 2140b57cec5SDimitry Andric 2150b57cec5SDimitry Andric // A helper function that scales Idx with ElementSize before invoking 2160b57cec5SDimitry Andric // allocateCandidatesAndFindBasis. 2170b57cec5SDimitry Andric void allocateCandidatesAndFindBasisForGEP(const SCEV *B, ConstantInt *Idx, 2180b57cec5SDimitry Andric Value *S, uint64_t ElementSize, 2190b57cec5SDimitry Andric Instruction *I); 2200b57cec5SDimitry Andric 2210b57cec5SDimitry Andric // Adds the given form <CT, B, Idx, S> to Candidates, and finds its immediate 2220b57cec5SDimitry Andric // basis. 2230b57cec5SDimitry Andric void allocateCandidatesAndFindBasis(Candidate::Kind CT, const SCEV *B, 2240b57cec5SDimitry Andric ConstantInt *Idx, Value *S, 2250b57cec5SDimitry Andric Instruction *I); 2260b57cec5SDimitry Andric 2270b57cec5SDimitry Andric // Rewrites candidate C with respect to Basis. 2280b57cec5SDimitry Andric void rewriteCandidateWithBasis(const Candidate &C, const Candidate &Basis); 2290b57cec5SDimitry Andric 2300b57cec5SDimitry Andric // A helper function that factors ArrayIdx to a product of a stride and a 2310b57cec5SDimitry Andric // constant index, and invokes allocateCandidatesAndFindBasis with the 2320b57cec5SDimitry Andric // factorings. 2330b57cec5SDimitry Andric void factorArrayIndex(Value *ArrayIdx, const SCEV *Base, uint64_t ElementSize, 2340b57cec5SDimitry Andric GetElementPtrInst *GEP); 2350b57cec5SDimitry Andric 236297eecfbSDimitry Andric // Emit code that computes the "bump" from Basis to C. 2370b57cec5SDimitry Andric static Value *emitBump(const Candidate &Basis, const Candidate &C, 238297eecfbSDimitry Andric IRBuilder<> &Builder, const DataLayout *DL); 2390b57cec5SDimitry Andric 2400b57cec5SDimitry Andric const DataLayout *DL = nullptr; 2410b57cec5SDimitry Andric DominatorTree *DT = nullptr; 2420b57cec5SDimitry Andric ScalarEvolution *SE; 2430b57cec5SDimitry Andric TargetTransformInfo *TTI = nullptr; 2440b57cec5SDimitry Andric std::list<Candidate> Candidates; 2450b57cec5SDimitry Andric 2460b57cec5SDimitry Andric // Temporarily holds all instructions that are unlinked (but not deleted) by 2470b57cec5SDimitry Andric // rewriteCandidateWithBasis. These instructions will be actually removed 2480b57cec5SDimitry Andric // after all rewriting finishes. 2490b57cec5SDimitry Andric std::vector<Instruction *> UnlinkedInstructions; 2500b57cec5SDimitry Andric }; 2510b57cec5SDimitry Andric 2520b57cec5SDimitry Andric } // end anonymous namespace 2530b57cec5SDimitry Andric 254e8d8bef9SDimitry Andric char StraightLineStrengthReduceLegacyPass::ID = 0; 2550b57cec5SDimitry Andric 256e8d8bef9SDimitry Andric INITIALIZE_PASS_BEGIN(StraightLineStrengthReduceLegacyPass, "slsr", 2570b57cec5SDimitry Andric "Straight line strength reduction", false, false) 2580b57cec5SDimitry Andric INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass) 2590b57cec5SDimitry Andric INITIALIZE_PASS_DEPENDENCY(ScalarEvolutionWrapperPass) 2600b57cec5SDimitry Andric INITIALIZE_PASS_DEPENDENCY(TargetTransformInfoWrapperPass) 261e8d8bef9SDimitry Andric INITIALIZE_PASS_END(StraightLineStrengthReduceLegacyPass, "slsr", 2620b57cec5SDimitry Andric "Straight line strength reduction", false, false) 2630b57cec5SDimitry Andric 2640b57cec5SDimitry Andric FunctionPass *llvm::createStraightLineStrengthReducePass() { 265e8d8bef9SDimitry Andric return new StraightLineStrengthReduceLegacyPass(); 2660b57cec5SDimitry Andric } 2670b57cec5SDimitry Andric 2680b57cec5SDimitry Andric bool StraightLineStrengthReduce::isBasisFor(const Candidate &Basis, 2690b57cec5SDimitry Andric const Candidate &C) { 2700b57cec5SDimitry Andric return (Basis.Ins != C.Ins && // skip the same instruction 2710b57cec5SDimitry Andric // They must have the same type too. Basis.Base == C.Base doesn't 2720b57cec5SDimitry Andric // guarantee their types are the same (PR23975). 2730b57cec5SDimitry Andric Basis.Ins->getType() == C.Ins->getType() && 2740b57cec5SDimitry Andric // Basis must dominate C in order to rewrite C with respect to Basis. 2750b57cec5SDimitry Andric DT->dominates(Basis.Ins->getParent(), C.Ins->getParent()) && 2760b57cec5SDimitry Andric // They share the same base, stride, and candidate kind. 2770b57cec5SDimitry Andric Basis.Base == C.Base && Basis.Stride == C.Stride && 2780b57cec5SDimitry Andric Basis.CandidateKind == C.CandidateKind); 2790b57cec5SDimitry Andric } 2800b57cec5SDimitry Andric 2810b57cec5SDimitry Andric static bool isGEPFoldable(GetElementPtrInst *GEP, 2820b57cec5SDimitry Andric const TargetTransformInfo *TTI) { 283e8d8bef9SDimitry Andric SmallVector<const Value *, 4> Indices(GEP->indices()); 2840b57cec5SDimitry Andric return TTI->getGEPCost(GEP->getSourceElementType(), GEP->getPointerOperand(), 2850b57cec5SDimitry Andric Indices) == TargetTransformInfo::TCC_Free; 2860b57cec5SDimitry Andric } 2870b57cec5SDimitry Andric 2880b57cec5SDimitry Andric // Returns whether (Base + Index * Stride) can be folded to an addressing mode. 2890b57cec5SDimitry Andric static bool isAddFoldable(const SCEV *Base, ConstantInt *Index, Value *Stride, 2900b57cec5SDimitry Andric TargetTransformInfo *TTI) { 2910b57cec5SDimitry Andric // Index->getSExtValue() may crash if Index is wider than 64-bit. 2920b57cec5SDimitry Andric return Index->getBitWidth() <= 64 && 2930b57cec5SDimitry Andric TTI->isLegalAddressingMode(Base->getType(), nullptr, 0, true, 2940b57cec5SDimitry Andric Index->getSExtValue(), UnknownAddressSpace); 2950b57cec5SDimitry Andric } 2960b57cec5SDimitry Andric 2970b57cec5SDimitry Andric bool StraightLineStrengthReduce::isFoldable(const Candidate &C, 2980b57cec5SDimitry Andric TargetTransformInfo *TTI, 2990b57cec5SDimitry Andric const DataLayout *DL) { 3000b57cec5SDimitry Andric if (C.CandidateKind == Candidate::Add) 3010b57cec5SDimitry Andric return isAddFoldable(C.Base, C.Index, C.Stride, TTI); 3020b57cec5SDimitry Andric if (C.CandidateKind == Candidate::GEP) 3030b57cec5SDimitry Andric return isGEPFoldable(cast<GetElementPtrInst>(C.Ins), TTI); 3040b57cec5SDimitry Andric return false; 3050b57cec5SDimitry Andric } 3060b57cec5SDimitry Andric 3070b57cec5SDimitry Andric // Returns true if GEP has zero or one non-zero index. 3080b57cec5SDimitry Andric static bool hasOnlyOneNonZeroIndex(GetElementPtrInst *GEP) { 3090b57cec5SDimitry Andric unsigned NumNonZeroIndices = 0; 310fe6060f1SDimitry Andric for (Use &Idx : GEP->indices()) { 311fe6060f1SDimitry Andric ConstantInt *ConstIdx = dyn_cast<ConstantInt>(Idx); 3120b57cec5SDimitry Andric if (ConstIdx == nullptr || !ConstIdx->isZero()) 3130b57cec5SDimitry Andric ++NumNonZeroIndices; 3140b57cec5SDimitry Andric } 3150b57cec5SDimitry Andric return NumNonZeroIndices <= 1; 3160b57cec5SDimitry Andric } 3170b57cec5SDimitry Andric 3180b57cec5SDimitry Andric bool StraightLineStrengthReduce::isSimplestForm(const Candidate &C) { 3190b57cec5SDimitry Andric if (C.CandidateKind == Candidate::Add) { 3200b57cec5SDimitry Andric // B + 1 * S or B + (-1) * S 3210b57cec5SDimitry Andric return C.Index->isOne() || C.Index->isMinusOne(); 3220b57cec5SDimitry Andric } 3230b57cec5SDimitry Andric if (C.CandidateKind == Candidate::Mul) { 3240b57cec5SDimitry Andric // (B + 0) * S 3250b57cec5SDimitry Andric return C.Index->isZero(); 3260b57cec5SDimitry Andric } 3270b57cec5SDimitry Andric if (C.CandidateKind == Candidate::GEP) { 3280b57cec5SDimitry Andric // (char*)B + S or (char*)B - S 3290b57cec5SDimitry Andric return ((C.Index->isOne() || C.Index->isMinusOne()) && 3300b57cec5SDimitry Andric hasOnlyOneNonZeroIndex(cast<GetElementPtrInst>(C.Ins))); 3310b57cec5SDimitry Andric } 3320b57cec5SDimitry Andric return false; 3330b57cec5SDimitry Andric } 3340b57cec5SDimitry Andric 3350b57cec5SDimitry Andric // TODO: We currently implement an algorithm whose time complexity is linear in 3360b57cec5SDimitry Andric // the number of existing candidates. However, we could do better by using 3370b57cec5SDimitry Andric // ScopedHashTable. Specifically, while traversing the dominator tree, we could 3380b57cec5SDimitry Andric // maintain all the candidates that dominate the basic block being traversed in 3390b57cec5SDimitry Andric // a ScopedHashTable. This hash table is indexed by the base and the stride of 3400b57cec5SDimitry Andric // a candidate. Therefore, finding the immediate basis of a candidate boils down 3410b57cec5SDimitry Andric // to one hash-table look up. 3420b57cec5SDimitry Andric void StraightLineStrengthReduce::allocateCandidatesAndFindBasis( 3430b57cec5SDimitry Andric Candidate::Kind CT, const SCEV *B, ConstantInt *Idx, Value *S, 3440b57cec5SDimitry Andric Instruction *I) { 3450b57cec5SDimitry Andric Candidate C(CT, B, Idx, S, I); 3460b57cec5SDimitry Andric // SLSR can complicate an instruction in two cases: 3470b57cec5SDimitry Andric // 3480b57cec5SDimitry Andric // 1. If we can fold I into an addressing mode, computing I is likely free or 3490b57cec5SDimitry Andric // takes only one instruction. 3500b57cec5SDimitry Andric // 3510b57cec5SDimitry Andric // 2. I is already in a simplest form. For example, when 3520b57cec5SDimitry Andric // X = B + 8 * S 3530b57cec5SDimitry Andric // Y = B + S, 3540b57cec5SDimitry Andric // rewriting Y to X - 7 * S is probably a bad idea. 3550b57cec5SDimitry Andric // 3560b57cec5SDimitry Andric // In the above cases, we still add I to the candidate list so that I can be 3570b57cec5SDimitry Andric // the basis of other candidates, but we leave I's basis blank so that I 3580b57cec5SDimitry Andric // won't be rewritten. 3590b57cec5SDimitry Andric if (!isFoldable(C, TTI, DL) && !isSimplestForm(C)) { 3600b57cec5SDimitry Andric // Try to compute the immediate basis of C. 3610b57cec5SDimitry Andric unsigned NumIterations = 0; 3620b57cec5SDimitry Andric // Limit the scan radius to avoid running in quadratice time. 3630b57cec5SDimitry Andric static const unsigned MaxNumIterations = 50; 3640b57cec5SDimitry Andric for (auto Basis = Candidates.rbegin(); 3650b57cec5SDimitry Andric Basis != Candidates.rend() && NumIterations < MaxNumIterations; 3660b57cec5SDimitry Andric ++Basis, ++NumIterations) { 3670b57cec5SDimitry Andric if (isBasisFor(*Basis, C)) { 3680b57cec5SDimitry Andric C.Basis = &(*Basis); 3690b57cec5SDimitry Andric break; 3700b57cec5SDimitry Andric } 3710b57cec5SDimitry Andric } 3720b57cec5SDimitry Andric } 3730b57cec5SDimitry Andric // Regardless of whether we find a basis for C, we need to push C to the 3740b57cec5SDimitry Andric // candidate list so that it can be the basis of other candidates. 3750b57cec5SDimitry Andric Candidates.push_back(C); 3760b57cec5SDimitry Andric } 3770b57cec5SDimitry Andric 3780b57cec5SDimitry Andric void StraightLineStrengthReduce::allocateCandidatesAndFindBasis( 3790b57cec5SDimitry Andric Instruction *I) { 3800b57cec5SDimitry Andric switch (I->getOpcode()) { 3810b57cec5SDimitry Andric case Instruction::Add: 3820b57cec5SDimitry Andric allocateCandidatesAndFindBasisForAdd(I); 3830b57cec5SDimitry Andric break; 3840b57cec5SDimitry Andric case Instruction::Mul: 3850b57cec5SDimitry Andric allocateCandidatesAndFindBasisForMul(I); 3860b57cec5SDimitry Andric break; 3870b57cec5SDimitry Andric case Instruction::GetElementPtr: 3880b57cec5SDimitry Andric allocateCandidatesAndFindBasisForGEP(cast<GetElementPtrInst>(I)); 3890b57cec5SDimitry Andric break; 3900b57cec5SDimitry Andric } 3910b57cec5SDimitry Andric } 3920b57cec5SDimitry Andric 3930b57cec5SDimitry Andric void StraightLineStrengthReduce::allocateCandidatesAndFindBasisForAdd( 3940b57cec5SDimitry Andric Instruction *I) { 3950b57cec5SDimitry Andric // Try matching B + i * S. 3960b57cec5SDimitry Andric if (!isa<IntegerType>(I->getType())) 3970b57cec5SDimitry Andric return; 3980b57cec5SDimitry Andric 3990b57cec5SDimitry Andric assert(I->getNumOperands() == 2 && "isn't I an add?"); 4000b57cec5SDimitry Andric Value *LHS = I->getOperand(0), *RHS = I->getOperand(1); 4010b57cec5SDimitry Andric allocateCandidatesAndFindBasisForAdd(LHS, RHS, I); 4020b57cec5SDimitry Andric if (LHS != RHS) 4030b57cec5SDimitry Andric allocateCandidatesAndFindBasisForAdd(RHS, LHS, I); 4040b57cec5SDimitry Andric } 4050b57cec5SDimitry Andric 4060b57cec5SDimitry Andric void StraightLineStrengthReduce::allocateCandidatesAndFindBasisForAdd( 4070b57cec5SDimitry Andric Value *LHS, Value *RHS, Instruction *I) { 4080b57cec5SDimitry Andric Value *S = nullptr; 4090b57cec5SDimitry Andric ConstantInt *Idx = nullptr; 4100b57cec5SDimitry Andric if (match(RHS, m_Mul(m_Value(S), m_ConstantInt(Idx)))) { 4110b57cec5SDimitry Andric // I = LHS + RHS = LHS + Idx * S 4120b57cec5SDimitry Andric allocateCandidatesAndFindBasis(Candidate::Add, SE->getSCEV(LHS), Idx, S, I); 4130b57cec5SDimitry Andric } else if (match(RHS, m_Shl(m_Value(S), m_ConstantInt(Idx)))) { 4140b57cec5SDimitry Andric // I = LHS + RHS = LHS + (S << Idx) = LHS + S * (1 << Idx) 4150b57cec5SDimitry Andric APInt One(Idx->getBitWidth(), 1); 4160b57cec5SDimitry Andric Idx = ConstantInt::get(Idx->getContext(), One << Idx->getValue()); 4170b57cec5SDimitry Andric allocateCandidatesAndFindBasis(Candidate::Add, SE->getSCEV(LHS), Idx, S, I); 4180b57cec5SDimitry Andric } else { 4190b57cec5SDimitry Andric // At least, I = LHS + 1 * RHS 4200b57cec5SDimitry Andric ConstantInt *One = ConstantInt::get(cast<IntegerType>(I->getType()), 1); 4210b57cec5SDimitry Andric allocateCandidatesAndFindBasis(Candidate::Add, SE->getSCEV(LHS), One, RHS, 4220b57cec5SDimitry Andric I); 4230b57cec5SDimitry Andric } 4240b57cec5SDimitry Andric } 4250b57cec5SDimitry Andric 4260b57cec5SDimitry Andric // Returns true if A matches B + C where C is constant. 4270b57cec5SDimitry Andric static bool matchesAdd(Value *A, Value *&B, ConstantInt *&C) { 428*0fca6ea1SDimitry Andric return match(A, m_c_Add(m_Value(B), m_ConstantInt(C))); 4290b57cec5SDimitry Andric } 4300b57cec5SDimitry Andric 4310b57cec5SDimitry Andric // Returns true if A matches B | C where C is constant. 4320b57cec5SDimitry Andric static bool matchesOr(Value *A, Value *&B, ConstantInt *&C) { 433*0fca6ea1SDimitry Andric return match(A, m_c_Or(m_Value(B), m_ConstantInt(C))); 4340b57cec5SDimitry Andric } 4350b57cec5SDimitry Andric 4360b57cec5SDimitry Andric void StraightLineStrengthReduce::allocateCandidatesAndFindBasisForMul( 4370b57cec5SDimitry Andric Value *LHS, Value *RHS, Instruction *I) { 4380b57cec5SDimitry Andric Value *B = nullptr; 4390b57cec5SDimitry Andric ConstantInt *Idx = nullptr; 4400b57cec5SDimitry Andric if (matchesAdd(LHS, B, Idx)) { 4410b57cec5SDimitry Andric // If LHS is in the form of "Base + Index", then I is in the form of 4420b57cec5SDimitry Andric // "(Base + Index) * RHS". 4430b57cec5SDimitry Andric allocateCandidatesAndFindBasis(Candidate::Mul, SE->getSCEV(B), Idx, RHS, I); 4440b57cec5SDimitry Andric } else if (matchesOr(LHS, B, Idx) && haveNoCommonBitsSet(B, Idx, *DL)) { 4450b57cec5SDimitry Andric // If LHS is in the form of "Base | Index" and Base and Index have no common 4460b57cec5SDimitry Andric // bits set, then 4470b57cec5SDimitry Andric // Base | Index = Base + Index 4480b57cec5SDimitry Andric // and I is thus in the form of "(Base + Index) * RHS". 4490b57cec5SDimitry Andric allocateCandidatesAndFindBasis(Candidate::Mul, SE->getSCEV(B), Idx, RHS, I); 4500b57cec5SDimitry Andric } else { 4510b57cec5SDimitry Andric // Otherwise, at least try the form (LHS + 0) * RHS. 4520b57cec5SDimitry Andric ConstantInt *Zero = ConstantInt::get(cast<IntegerType>(I->getType()), 0); 4530b57cec5SDimitry Andric allocateCandidatesAndFindBasis(Candidate::Mul, SE->getSCEV(LHS), Zero, RHS, 4540b57cec5SDimitry Andric I); 4550b57cec5SDimitry Andric } 4560b57cec5SDimitry Andric } 4570b57cec5SDimitry Andric 4580b57cec5SDimitry Andric void StraightLineStrengthReduce::allocateCandidatesAndFindBasisForMul( 4590b57cec5SDimitry Andric Instruction *I) { 4600b57cec5SDimitry Andric // Try matching (B + i) * S. 4610b57cec5SDimitry Andric // TODO: we could extend SLSR to float and vector types. 4620b57cec5SDimitry Andric if (!isa<IntegerType>(I->getType())) 4630b57cec5SDimitry Andric return; 4640b57cec5SDimitry Andric 4650b57cec5SDimitry Andric assert(I->getNumOperands() == 2 && "isn't I a mul?"); 4660b57cec5SDimitry Andric Value *LHS = I->getOperand(0), *RHS = I->getOperand(1); 4670b57cec5SDimitry Andric allocateCandidatesAndFindBasisForMul(LHS, RHS, I); 4680b57cec5SDimitry Andric if (LHS != RHS) { 4690b57cec5SDimitry Andric // Symmetrically, try to split RHS to Base + Index. 4700b57cec5SDimitry Andric allocateCandidatesAndFindBasisForMul(RHS, LHS, I); 4710b57cec5SDimitry Andric } 4720b57cec5SDimitry Andric } 4730b57cec5SDimitry Andric 4740b57cec5SDimitry Andric void StraightLineStrengthReduce::allocateCandidatesAndFindBasisForGEP( 4750b57cec5SDimitry Andric const SCEV *B, ConstantInt *Idx, Value *S, uint64_t ElementSize, 4760b57cec5SDimitry Andric Instruction *I) { 4770b57cec5SDimitry Andric // I = B + sext(Idx *nsw S) * ElementSize 4780b57cec5SDimitry Andric // = B + (sext(Idx) * sext(S)) * ElementSize 4790b57cec5SDimitry Andric // = B + (sext(Idx) * ElementSize) * sext(S) 4800b57cec5SDimitry Andric // Casting to IntegerType is safe because we skipped vector GEPs. 48106c3fb27SDimitry Andric IntegerType *PtrIdxTy = cast<IntegerType>(DL->getIndexType(I->getType())); 4820b57cec5SDimitry Andric ConstantInt *ScaledIdx = ConstantInt::get( 48306c3fb27SDimitry Andric PtrIdxTy, Idx->getSExtValue() * (int64_t)ElementSize, true); 4840b57cec5SDimitry Andric allocateCandidatesAndFindBasis(Candidate::GEP, B, ScaledIdx, S, I); 4850b57cec5SDimitry Andric } 4860b57cec5SDimitry Andric 4870b57cec5SDimitry Andric void StraightLineStrengthReduce::factorArrayIndex(Value *ArrayIdx, 4880b57cec5SDimitry Andric const SCEV *Base, 4890b57cec5SDimitry Andric uint64_t ElementSize, 4900b57cec5SDimitry Andric GetElementPtrInst *GEP) { 4910b57cec5SDimitry Andric // At least, ArrayIdx = ArrayIdx *nsw 1. 4920b57cec5SDimitry Andric allocateCandidatesAndFindBasisForGEP( 4930b57cec5SDimitry Andric Base, ConstantInt::get(cast<IntegerType>(ArrayIdx->getType()), 1), 4940b57cec5SDimitry Andric ArrayIdx, ElementSize, GEP); 4950b57cec5SDimitry Andric Value *LHS = nullptr; 4960b57cec5SDimitry Andric ConstantInt *RHS = nullptr; 4970b57cec5SDimitry Andric // One alternative is matching the SCEV of ArrayIdx instead of ArrayIdx 4980b57cec5SDimitry Andric // itself. This would allow us to handle the shl case for free. However, 4990b57cec5SDimitry Andric // matching SCEVs has two issues: 5000b57cec5SDimitry Andric // 5010b57cec5SDimitry Andric // 1. this would complicate rewriting because the rewriting procedure 5020b57cec5SDimitry Andric // would have to translate SCEVs back to IR instructions. This translation 5030b57cec5SDimitry Andric // is difficult when LHS is further evaluated to a composite SCEV. 5040b57cec5SDimitry Andric // 5050b57cec5SDimitry Andric // 2. ScalarEvolution is designed to be control-flow oblivious. It tends 5060b57cec5SDimitry Andric // to strip nsw/nuw flags which are critical for SLSR to trace into 5070b57cec5SDimitry Andric // sext'ed multiplication. 5080b57cec5SDimitry Andric if (match(ArrayIdx, m_NSWMul(m_Value(LHS), m_ConstantInt(RHS)))) { 5090b57cec5SDimitry Andric // SLSR is currently unsafe if i * S may overflow. 5100b57cec5SDimitry Andric // GEP = Base + sext(LHS *nsw RHS) * ElementSize 5110b57cec5SDimitry Andric allocateCandidatesAndFindBasisForGEP(Base, RHS, LHS, ElementSize, GEP); 5120b57cec5SDimitry Andric } else if (match(ArrayIdx, m_NSWShl(m_Value(LHS), m_ConstantInt(RHS)))) { 5130b57cec5SDimitry Andric // GEP = Base + sext(LHS <<nsw RHS) * ElementSize 5140b57cec5SDimitry Andric // = Base + sext(LHS *nsw (1 << RHS)) * ElementSize 5150b57cec5SDimitry Andric APInt One(RHS->getBitWidth(), 1); 5160b57cec5SDimitry Andric ConstantInt *PowerOf2 = 5170b57cec5SDimitry Andric ConstantInt::get(RHS->getContext(), One << RHS->getValue()); 5180b57cec5SDimitry Andric allocateCandidatesAndFindBasisForGEP(Base, PowerOf2, LHS, ElementSize, GEP); 5190b57cec5SDimitry Andric } 5200b57cec5SDimitry Andric } 5210b57cec5SDimitry Andric 5220b57cec5SDimitry Andric void StraightLineStrengthReduce::allocateCandidatesAndFindBasisForGEP( 5230b57cec5SDimitry Andric GetElementPtrInst *GEP) { 5240b57cec5SDimitry Andric // TODO: handle vector GEPs 5250b57cec5SDimitry Andric if (GEP->getType()->isVectorTy()) 5260b57cec5SDimitry Andric return; 5270b57cec5SDimitry Andric 5280b57cec5SDimitry Andric SmallVector<const SCEV *, 4> IndexExprs; 529fe6060f1SDimitry Andric for (Use &Idx : GEP->indices()) 530fe6060f1SDimitry Andric IndexExprs.push_back(SE->getSCEV(Idx)); 5310b57cec5SDimitry Andric 5320b57cec5SDimitry Andric gep_type_iterator GTI = gep_type_begin(GEP); 5330b57cec5SDimitry Andric for (unsigned I = 1, E = GEP->getNumOperands(); I != E; ++I, ++GTI) { 5340b57cec5SDimitry Andric if (GTI.isStruct()) 5350b57cec5SDimitry Andric continue; 5360b57cec5SDimitry Andric 5370b57cec5SDimitry Andric const SCEV *OrigIndexExpr = IndexExprs[I - 1]; 5380b57cec5SDimitry Andric IndexExprs[I - 1] = SE->getZero(OrigIndexExpr->getType()); 5390b57cec5SDimitry Andric 5400b57cec5SDimitry Andric // The base of this candidate is GEP's base plus the offsets of all 5410b57cec5SDimitry Andric // indices except this current one. 5420b57cec5SDimitry Andric const SCEV *BaseExpr = SE->getGEPExpr(cast<GEPOperator>(GEP), IndexExprs); 5430b57cec5SDimitry Andric Value *ArrayIdx = GEP->getOperand(I); 5441db9f3b2SDimitry Andric uint64_t ElementSize = GTI.getSequentialElementStride(*DL); 5450b57cec5SDimitry Andric if (ArrayIdx->getType()->getIntegerBitWidth() <= 54606c3fb27SDimitry Andric DL->getIndexSizeInBits(GEP->getAddressSpace())) { 54706c3fb27SDimitry Andric // Skip factoring if ArrayIdx is wider than the index size, because 54806c3fb27SDimitry Andric // ArrayIdx is implicitly truncated to the index size. 5490b57cec5SDimitry Andric factorArrayIndex(ArrayIdx, BaseExpr, ElementSize, GEP); 5500b57cec5SDimitry Andric } 5510b57cec5SDimitry Andric // When ArrayIdx is the sext of a value, we try to factor that value as 5520b57cec5SDimitry Andric // well. Handling this case is important because array indices are 55306c3fb27SDimitry Andric // typically sign-extended to the pointer index size. 5540b57cec5SDimitry Andric Value *TruncatedArrayIdx = nullptr; 5550b57cec5SDimitry Andric if (match(ArrayIdx, m_SExt(m_Value(TruncatedArrayIdx))) && 5560b57cec5SDimitry Andric TruncatedArrayIdx->getType()->getIntegerBitWidth() <= 55706c3fb27SDimitry Andric DL->getIndexSizeInBits(GEP->getAddressSpace())) { 5580b57cec5SDimitry Andric // Skip factoring if TruncatedArrayIdx is wider than the pointer size, 5590b57cec5SDimitry Andric // because TruncatedArrayIdx is implicitly truncated to the pointer size. 5600b57cec5SDimitry Andric factorArrayIndex(TruncatedArrayIdx, BaseExpr, ElementSize, GEP); 5610b57cec5SDimitry Andric } 5620b57cec5SDimitry Andric 5630b57cec5SDimitry Andric IndexExprs[I - 1] = OrigIndexExpr; 5640b57cec5SDimitry Andric } 5650b57cec5SDimitry Andric } 5660b57cec5SDimitry Andric 5670b57cec5SDimitry Andric // A helper function that unifies the bitwidth of A and B. 5680b57cec5SDimitry Andric static void unifyBitWidth(APInt &A, APInt &B) { 5690b57cec5SDimitry Andric if (A.getBitWidth() < B.getBitWidth()) 5700b57cec5SDimitry Andric A = A.sext(B.getBitWidth()); 5710b57cec5SDimitry Andric else if (A.getBitWidth() > B.getBitWidth()) 5720b57cec5SDimitry Andric B = B.sext(A.getBitWidth()); 5730b57cec5SDimitry Andric } 5740b57cec5SDimitry Andric 5750b57cec5SDimitry Andric Value *StraightLineStrengthReduce::emitBump(const Candidate &Basis, 5760b57cec5SDimitry Andric const Candidate &C, 5770b57cec5SDimitry Andric IRBuilder<> &Builder, 578297eecfbSDimitry Andric const DataLayout *DL) { 5790b57cec5SDimitry Andric APInt Idx = C.Index->getValue(), BasisIdx = Basis.Index->getValue(); 5800b57cec5SDimitry Andric unifyBitWidth(Idx, BasisIdx); 5810b57cec5SDimitry Andric APInt IndexOffset = Idx - BasisIdx; 5820b57cec5SDimitry Andric 5830b57cec5SDimitry Andric // Compute Bump = C - Basis = (i' - i) * S. 5840b57cec5SDimitry Andric // Common case 1: if (i' - i) is 1, Bump = S. 5850b57cec5SDimitry Andric if (IndexOffset == 1) 5860b57cec5SDimitry Andric return C.Stride; 5870b57cec5SDimitry Andric // Common case 2: if (i' - i) is -1, Bump = -S. 588349cc55cSDimitry Andric if (IndexOffset.isAllOnes()) 5890b57cec5SDimitry Andric return Builder.CreateNeg(C.Stride); 5900b57cec5SDimitry Andric 5910b57cec5SDimitry Andric // Otherwise, Bump = (i' - i) * sext/trunc(S). Note that (i' - i) and S may 5920b57cec5SDimitry Andric // have different bit widths. 5930b57cec5SDimitry Andric IntegerType *DeltaType = 5940b57cec5SDimitry Andric IntegerType::get(Basis.Ins->getContext(), IndexOffset.getBitWidth()); 5950b57cec5SDimitry Andric Value *ExtendedStride = Builder.CreateSExtOrTrunc(C.Stride, DeltaType); 5960b57cec5SDimitry Andric if (IndexOffset.isPowerOf2()) { 5970b57cec5SDimitry Andric // If (i' - i) is a power of 2, Bump = sext/trunc(S) << log(i' - i). 5980b57cec5SDimitry Andric ConstantInt *Exponent = ConstantInt::get(DeltaType, IndexOffset.logBase2()); 5990b57cec5SDimitry Andric return Builder.CreateShl(ExtendedStride, Exponent); 6000b57cec5SDimitry Andric } 601349cc55cSDimitry Andric if (IndexOffset.isNegatedPowerOf2()) { 6020b57cec5SDimitry Andric // If (i - i') is a power of 2, Bump = -sext/trunc(S) << log(i' - i). 6030b57cec5SDimitry Andric ConstantInt *Exponent = 6040b57cec5SDimitry Andric ConstantInt::get(DeltaType, (-IndexOffset).logBase2()); 6050b57cec5SDimitry Andric return Builder.CreateNeg(Builder.CreateShl(ExtendedStride, Exponent)); 6060b57cec5SDimitry Andric } 6070b57cec5SDimitry Andric Constant *Delta = ConstantInt::get(DeltaType, IndexOffset); 6080b57cec5SDimitry Andric return Builder.CreateMul(ExtendedStride, Delta); 6090b57cec5SDimitry Andric } 6100b57cec5SDimitry Andric 6110b57cec5SDimitry Andric void StraightLineStrengthReduce::rewriteCandidateWithBasis( 6120b57cec5SDimitry Andric const Candidate &C, const Candidate &Basis) { 6130b57cec5SDimitry Andric assert(C.CandidateKind == Basis.CandidateKind && C.Base == Basis.Base && 6140b57cec5SDimitry Andric C.Stride == Basis.Stride); 6150b57cec5SDimitry Andric // We run rewriteCandidateWithBasis on all candidates in a post-order, so the 6160b57cec5SDimitry Andric // basis of a candidate cannot be unlinked before the candidate. 6170b57cec5SDimitry Andric assert(Basis.Ins->getParent() != nullptr && "the basis is unlinked"); 6180b57cec5SDimitry Andric 6190b57cec5SDimitry Andric // An instruction can correspond to multiple candidates. Therefore, instead of 6200b57cec5SDimitry Andric // simply deleting an instruction when we rewrite it, we mark its parent as 6210b57cec5SDimitry Andric // nullptr (i.e. unlink it) so that we can skip the candidates whose 6220b57cec5SDimitry Andric // instruction is already rewritten. 6230b57cec5SDimitry Andric if (!C.Ins->getParent()) 6240b57cec5SDimitry Andric return; 6250b57cec5SDimitry Andric 6260b57cec5SDimitry Andric IRBuilder<> Builder(C.Ins); 627297eecfbSDimitry Andric Value *Bump = emitBump(Basis, C, Builder, DL); 6280b57cec5SDimitry Andric Value *Reduced = nullptr; // equivalent to but weaker than C.Ins 6290b57cec5SDimitry Andric switch (C.CandidateKind) { 6300b57cec5SDimitry Andric case Candidate::Add: 6310b57cec5SDimitry Andric case Candidate::Mul: { 6320b57cec5SDimitry Andric // C = Basis + Bump 6330b57cec5SDimitry Andric Value *NegBump; 6340b57cec5SDimitry Andric if (match(Bump, m_Neg(m_Value(NegBump)))) { 6350b57cec5SDimitry Andric // If Bump is a neg instruction, emit C = Basis - (-Bump). 6360b57cec5SDimitry Andric Reduced = Builder.CreateSub(Basis.Ins, NegBump); 6370b57cec5SDimitry Andric // We only use the negative argument of Bump, and Bump itself may be 6380b57cec5SDimitry Andric // trivially dead. 6390b57cec5SDimitry Andric RecursivelyDeleteTriviallyDeadInstructions(Bump); 6400b57cec5SDimitry Andric } else { 6410b57cec5SDimitry Andric // It's tempting to preserve nsw on Bump and/or Reduced. However, it's 6420b57cec5SDimitry Andric // usually unsound, e.g., 6430b57cec5SDimitry Andric // 6440b57cec5SDimitry Andric // X = (-2 +nsw 1) *nsw INT_MAX 6450b57cec5SDimitry Andric // Y = (-2 +nsw 3) *nsw INT_MAX 6460b57cec5SDimitry Andric // => 6470b57cec5SDimitry Andric // Y = X + 2 * INT_MAX 6480b57cec5SDimitry Andric // 6490b57cec5SDimitry Andric // Neither + and * in the resultant expression are nsw. 6500b57cec5SDimitry Andric Reduced = Builder.CreateAdd(Basis.Ins, Bump); 6510b57cec5SDimitry Andric } 6520b57cec5SDimitry Andric break; 6530b57cec5SDimitry Andric } 654297eecfbSDimitry Andric case Candidate::GEP: { 6550b57cec5SDimitry Andric bool InBounds = cast<GetElementPtrInst>(C.Ins)->isInBounds(); 6560b57cec5SDimitry Andric // C = (char *)Basis + Bump 6577a6dacacSDimitry Andric Reduced = Builder.CreatePtrAdd(Basis.Ins, Bump, "", InBounds); 6580b57cec5SDimitry Andric break; 6590b57cec5SDimitry Andric } 6600b57cec5SDimitry Andric default: 6610b57cec5SDimitry Andric llvm_unreachable("C.CandidateKind is invalid"); 6620b57cec5SDimitry Andric }; 6630b57cec5SDimitry Andric Reduced->takeName(C.Ins); 6640b57cec5SDimitry Andric C.Ins->replaceAllUsesWith(Reduced); 6650b57cec5SDimitry Andric // Unlink C.Ins so that we can skip other candidates also corresponding to 6660b57cec5SDimitry Andric // C.Ins. The actual deletion is postponed to the end of runOnFunction. 6670b57cec5SDimitry Andric C.Ins->removeFromParent(); 6680b57cec5SDimitry Andric UnlinkedInstructions.push_back(C.Ins); 6690b57cec5SDimitry Andric } 6700b57cec5SDimitry Andric 671e8d8bef9SDimitry Andric bool StraightLineStrengthReduceLegacyPass::runOnFunction(Function &F) { 6720b57cec5SDimitry Andric if (skipFunction(F)) 6730b57cec5SDimitry Andric return false; 6740b57cec5SDimitry Andric 675e8d8bef9SDimitry Andric auto *TTI = &getAnalysis<TargetTransformInfoWrapperPass>().getTTI(F); 676e8d8bef9SDimitry Andric auto *DT = &getAnalysis<DominatorTreeWrapperPass>().getDomTree(); 677e8d8bef9SDimitry Andric auto *SE = &getAnalysis<ScalarEvolutionWrapperPass>().getSE(); 678e8d8bef9SDimitry Andric return StraightLineStrengthReduce(DL, DT, SE, TTI).runOnFunction(F); 679e8d8bef9SDimitry Andric } 680e8d8bef9SDimitry Andric 681e8d8bef9SDimitry Andric bool StraightLineStrengthReduce::runOnFunction(Function &F) { 6820b57cec5SDimitry Andric // Traverse the dominator tree in the depth-first order. This order makes sure 6830b57cec5SDimitry Andric // all bases of a candidate are in Candidates when we process it. 6840b57cec5SDimitry Andric for (const auto Node : depth_first(DT)) 6850b57cec5SDimitry Andric for (auto &I : *(Node->getBlock())) 6860b57cec5SDimitry Andric allocateCandidatesAndFindBasis(&I); 6870b57cec5SDimitry Andric 6880b57cec5SDimitry Andric // Rewrite candidates in the reverse depth-first order. This order makes sure 6890b57cec5SDimitry Andric // a candidate being rewritten is not a basis for any other candidate. 6900b57cec5SDimitry Andric while (!Candidates.empty()) { 6910b57cec5SDimitry Andric const Candidate &C = Candidates.back(); 6920b57cec5SDimitry Andric if (C.Basis != nullptr) { 6930b57cec5SDimitry Andric rewriteCandidateWithBasis(C, *C.Basis); 6940b57cec5SDimitry Andric } 6950b57cec5SDimitry Andric Candidates.pop_back(); 6960b57cec5SDimitry Andric } 6970b57cec5SDimitry Andric 6980b57cec5SDimitry Andric // Delete all unlink instructions. 6990b57cec5SDimitry Andric for (auto *UnlinkedInst : UnlinkedInstructions) { 7000b57cec5SDimitry Andric for (unsigned I = 0, E = UnlinkedInst->getNumOperands(); I != E; ++I) { 7010b57cec5SDimitry Andric Value *Op = UnlinkedInst->getOperand(I); 7020b57cec5SDimitry Andric UnlinkedInst->setOperand(I, nullptr); 7030b57cec5SDimitry Andric RecursivelyDeleteTriviallyDeadInstructions(Op); 7040b57cec5SDimitry Andric } 7050b57cec5SDimitry Andric UnlinkedInst->deleteValue(); 7060b57cec5SDimitry Andric } 7070b57cec5SDimitry Andric bool Ret = !UnlinkedInstructions.empty(); 7080b57cec5SDimitry Andric UnlinkedInstructions.clear(); 7090b57cec5SDimitry Andric return Ret; 7100b57cec5SDimitry Andric } 711e8d8bef9SDimitry Andric 712e8d8bef9SDimitry Andric namespace llvm { 713e8d8bef9SDimitry Andric 714e8d8bef9SDimitry Andric PreservedAnalyses 715e8d8bef9SDimitry Andric StraightLineStrengthReducePass::run(Function &F, FunctionAnalysisManager &AM) { 716*0fca6ea1SDimitry Andric const DataLayout *DL = &F.getDataLayout(); 717e8d8bef9SDimitry Andric auto *DT = &AM.getResult<DominatorTreeAnalysis>(F); 718e8d8bef9SDimitry Andric auto *SE = &AM.getResult<ScalarEvolutionAnalysis>(F); 719e8d8bef9SDimitry Andric auto *TTI = &AM.getResult<TargetIRAnalysis>(F); 720e8d8bef9SDimitry Andric 721e8d8bef9SDimitry Andric if (!StraightLineStrengthReduce(DL, DT, SE, TTI).runOnFunction(F)) 722e8d8bef9SDimitry Andric return PreservedAnalyses::all(); 723e8d8bef9SDimitry Andric 724e8d8bef9SDimitry Andric PreservedAnalyses PA; 725e8d8bef9SDimitry Andric PA.preserveSet<CFGAnalyses>(); 726e8d8bef9SDimitry Andric PA.preserve<DominatorTreeAnalysis>(); 727e8d8bef9SDimitry Andric PA.preserve<ScalarEvolutionAnalysis>(); 728e8d8bef9SDimitry Andric PA.preserve<TargetIRAnalysis>(); 729e8d8bef9SDimitry Andric return PA; 730e8d8bef9SDimitry Andric } 731e8d8bef9SDimitry Andric 732e8d8bef9SDimitry Andric } // namespace llvm 733