1*5ffd83dbSDimitry Andric //===- ScalarEvolutionExpander.cpp - Scalar Evolution Analysis ------------===// 2*5ffd83dbSDimitry Andric // 3*5ffd83dbSDimitry Andric // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4*5ffd83dbSDimitry Andric // See https://llvm.org/LICENSE.txt for license information. 5*5ffd83dbSDimitry Andric // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6*5ffd83dbSDimitry Andric // 7*5ffd83dbSDimitry Andric //===----------------------------------------------------------------------===// 8*5ffd83dbSDimitry Andric // 9*5ffd83dbSDimitry Andric // This file contains the implementation of the scalar evolution expander, 10*5ffd83dbSDimitry Andric // which is used to generate the code corresponding to a given scalar evolution 11*5ffd83dbSDimitry Andric // expression. 12*5ffd83dbSDimitry Andric // 13*5ffd83dbSDimitry Andric //===----------------------------------------------------------------------===// 14*5ffd83dbSDimitry Andric 15*5ffd83dbSDimitry Andric #include "llvm/Transforms/Utils/ScalarEvolutionExpander.h" 16*5ffd83dbSDimitry Andric #include "llvm/ADT/STLExtras.h" 17*5ffd83dbSDimitry Andric #include "llvm/ADT/SmallSet.h" 18*5ffd83dbSDimitry Andric #include "llvm/Analysis/InstructionSimplify.h" 19*5ffd83dbSDimitry Andric #include "llvm/Analysis/LoopInfo.h" 20*5ffd83dbSDimitry Andric #include "llvm/Analysis/TargetTransformInfo.h" 21*5ffd83dbSDimitry Andric #include "llvm/IR/DataLayout.h" 22*5ffd83dbSDimitry Andric #include "llvm/IR/Dominators.h" 23*5ffd83dbSDimitry Andric #include "llvm/IR/IntrinsicInst.h" 24*5ffd83dbSDimitry Andric #include "llvm/IR/LLVMContext.h" 25*5ffd83dbSDimitry Andric #include "llvm/IR/Module.h" 26*5ffd83dbSDimitry Andric #include "llvm/IR/PatternMatch.h" 27*5ffd83dbSDimitry Andric #include "llvm/Support/CommandLine.h" 28*5ffd83dbSDimitry Andric #include "llvm/Support/Debug.h" 29*5ffd83dbSDimitry Andric #include "llvm/Support/raw_ostream.h" 30*5ffd83dbSDimitry Andric 31*5ffd83dbSDimitry Andric using namespace llvm; 32*5ffd83dbSDimitry Andric 33*5ffd83dbSDimitry Andric cl::opt<unsigned> llvm::SCEVCheapExpansionBudget( 34*5ffd83dbSDimitry Andric "scev-cheap-expansion-budget", cl::Hidden, cl::init(4), 35*5ffd83dbSDimitry Andric cl::desc("When performing SCEV expansion only if it is cheap to do, this " 36*5ffd83dbSDimitry Andric "controls the budget that is considered cheap (default = 4)")); 37*5ffd83dbSDimitry Andric 38*5ffd83dbSDimitry Andric using namespace PatternMatch; 39*5ffd83dbSDimitry Andric 40*5ffd83dbSDimitry Andric /// ReuseOrCreateCast - Arrange for there to be a cast of V to Ty at IP, 41*5ffd83dbSDimitry Andric /// reusing an existing cast if a suitable one exists, moving an existing 42*5ffd83dbSDimitry Andric /// cast if a suitable one exists but isn't in the right place, or 43*5ffd83dbSDimitry Andric /// creating a new one. 44*5ffd83dbSDimitry Andric Value *SCEVExpander::ReuseOrCreateCast(Value *V, Type *Ty, 45*5ffd83dbSDimitry Andric Instruction::CastOps Op, 46*5ffd83dbSDimitry Andric BasicBlock::iterator IP) { 47*5ffd83dbSDimitry Andric // This function must be called with the builder having a valid insertion 48*5ffd83dbSDimitry Andric // point. It doesn't need to be the actual IP where the uses of the returned 49*5ffd83dbSDimitry Andric // cast will be added, but it must dominate such IP. 50*5ffd83dbSDimitry Andric // We use this precondition to produce a cast that will dominate all its 51*5ffd83dbSDimitry Andric // uses. In particular, this is crucial for the case where the builder's 52*5ffd83dbSDimitry Andric // insertion point *is* the point where we were asked to put the cast. 53*5ffd83dbSDimitry Andric // Since we don't know the builder's insertion point is actually 54*5ffd83dbSDimitry Andric // where the uses will be added (only that it dominates it), we are 55*5ffd83dbSDimitry Andric // not allowed to move it. 56*5ffd83dbSDimitry Andric BasicBlock::iterator BIP = Builder.GetInsertPoint(); 57*5ffd83dbSDimitry Andric 58*5ffd83dbSDimitry Andric Instruction *Ret = nullptr; 59*5ffd83dbSDimitry Andric 60*5ffd83dbSDimitry Andric // Check to see if there is already a cast! 61*5ffd83dbSDimitry Andric for (User *U : V->users()) 62*5ffd83dbSDimitry Andric if (U->getType() == Ty) 63*5ffd83dbSDimitry Andric if (CastInst *CI = dyn_cast<CastInst>(U)) 64*5ffd83dbSDimitry Andric if (CI->getOpcode() == Op) { 65*5ffd83dbSDimitry Andric // If the cast isn't where we want it, create a new cast at IP. 66*5ffd83dbSDimitry Andric // Likewise, do not reuse a cast at BIP because it must dominate 67*5ffd83dbSDimitry Andric // instructions that might be inserted before BIP. 68*5ffd83dbSDimitry Andric if (BasicBlock::iterator(CI) != IP || BIP == IP) { 69*5ffd83dbSDimitry Andric // Create a new cast, and leave the old cast in place in case 70*5ffd83dbSDimitry Andric // it is being used as an insert point. 71*5ffd83dbSDimitry Andric Ret = CastInst::Create(Op, V, Ty, "", &*IP); 72*5ffd83dbSDimitry Andric Ret->takeName(CI); 73*5ffd83dbSDimitry Andric CI->replaceAllUsesWith(Ret); 74*5ffd83dbSDimitry Andric break; 75*5ffd83dbSDimitry Andric } 76*5ffd83dbSDimitry Andric Ret = CI; 77*5ffd83dbSDimitry Andric break; 78*5ffd83dbSDimitry Andric } 79*5ffd83dbSDimitry Andric 80*5ffd83dbSDimitry Andric // Create a new cast. 81*5ffd83dbSDimitry Andric if (!Ret) 82*5ffd83dbSDimitry Andric Ret = CastInst::Create(Op, V, Ty, V->getName(), &*IP); 83*5ffd83dbSDimitry Andric 84*5ffd83dbSDimitry Andric // We assert at the end of the function since IP might point to an 85*5ffd83dbSDimitry Andric // instruction with different dominance properties than a cast 86*5ffd83dbSDimitry Andric // (an invoke for example) and not dominate BIP (but the cast does). 87*5ffd83dbSDimitry Andric assert(SE.DT.dominates(Ret, &*BIP)); 88*5ffd83dbSDimitry Andric 89*5ffd83dbSDimitry Andric rememberInstruction(Ret); 90*5ffd83dbSDimitry Andric return Ret; 91*5ffd83dbSDimitry Andric } 92*5ffd83dbSDimitry Andric 93*5ffd83dbSDimitry Andric static BasicBlock::iterator findInsertPointAfter(Instruction *I, 94*5ffd83dbSDimitry Andric BasicBlock *MustDominate) { 95*5ffd83dbSDimitry Andric BasicBlock::iterator IP = ++I->getIterator(); 96*5ffd83dbSDimitry Andric if (auto *II = dyn_cast<InvokeInst>(I)) 97*5ffd83dbSDimitry Andric IP = II->getNormalDest()->begin(); 98*5ffd83dbSDimitry Andric 99*5ffd83dbSDimitry Andric while (isa<PHINode>(IP)) 100*5ffd83dbSDimitry Andric ++IP; 101*5ffd83dbSDimitry Andric 102*5ffd83dbSDimitry Andric if (isa<FuncletPadInst>(IP) || isa<LandingPadInst>(IP)) { 103*5ffd83dbSDimitry Andric ++IP; 104*5ffd83dbSDimitry Andric } else if (isa<CatchSwitchInst>(IP)) { 105*5ffd83dbSDimitry Andric IP = MustDominate->getFirstInsertionPt(); 106*5ffd83dbSDimitry Andric } else { 107*5ffd83dbSDimitry Andric assert(!IP->isEHPad() && "unexpected eh pad!"); 108*5ffd83dbSDimitry Andric } 109*5ffd83dbSDimitry Andric 110*5ffd83dbSDimitry Andric return IP; 111*5ffd83dbSDimitry Andric } 112*5ffd83dbSDimitry Andric 113*5ffd83dbSDimitry Andric /// InsertNoopCastOfTo - Insert a cast of V to the specified type, 114*5ffd83dbSDimitry Andric /// which must be possible with a noop cast, doing what we can to share 115*5ffd83dbSDimitry Andric /// the casts. 116*5ffd83dbSDimitry Andric Value *SCEVExpander::InsertNoopCastOfTo(Value *V, Type *Ty) { 117*5ffd83dbSDimitry Andric Instruction::CastOps Op = CastInst::getCastOpcode(V, false, Ty, false); 118*5ffd83dbSDimitry Andric assert((Op == Instruction::BitCast || 119*5ffd83dbSDimitry Andric Op == Instruction::PtrToInt || 120*5ffd83dbSDimitry Andric Op == Instruction::IntToPtr) && 121*5ffd83dbSDimitry Andric "InsertNoopCastOfTo cannot perform non-noop casts!"); 122*5ffd83dbSDimitry Andric assert(SE.getTypeSizeInBits(V->getType()) == SE.getTypeSizeInBits(Ty) && 123*5ffd83dbSDimitry Andric "InsertNoopCastOfTo cannot change sizes!"); 124*5ffd83dbSDimitry Andric 125*5ffd83dbSDimitry Andric // Short-circuit unnecessary bitcasts. 126*5ffd83dbSDimitry Andric if (Op == Instruction::BitCast) { 127*5ffd83dbSDimitry Andric if (V->getType() == Ty) 128*5ffd83dbSDimitry Andric return V; 129*5ffd83dbSDimitry Andric if (CastInst *CI = dyn_cast<CastInst>(V)) { 130*5ffd83dbSDimitry Andric if (CI->getOperand(0)->getType() == Ty) 131*5ffd83dbSDimitry Andric return CI->getOperand(0); 132*5ffd83dbSDimitry Andric } 133*5ffd83dbSDimitry Andric } 134*5ffd83dbSDimitry Andric // Short-circuit unnecessary inttoptr<->ptrtoint casts. 135*5ffd83dbSDimitry Andric if ((Op == Instruction::PtrToInt || Op == Instruction::IntToPtr) && 136*5ffd83dbSDimitry Andric SE.getTypeSizeInBits(Ty) == SE.getTypeSizeInBits(V->getType())) { 137*5ffd83dbSDimitry Andric if (CastInst *CI = dyn_cast<CastInst>(V)) 138*5ffd83dbSDimitry Andric if ((CI->getOpcode() == Instruction::PtrToInt || 139*5ffd83dbSDimitry Andric CI->getOpcode() == Instruction::IntToPtr) && 140*5ffd83dbSDimitry Andric SE.getTypeSizeInBits(CI->getType()) == 141*5ffd83dbSDimitry Andric SE.getTypeSizeInBits(CI->getOperand(0)->getType())) 142*5ffd83dbSDimitry Andric return CI->getOperand(0); 143*5ffd83dbSDimitry Andric if (ConstantExpr *CE = dyn_cast<ConstantExpr>(V)) 144*5ffd83dbSDimitry Andric if ((CE->getOpcode() == Instruction::PtrToInt || 145*5ffd83dbSDimitry Andric CE->getOpcode() == Instruction::IntToPtr) && 146*5ffd83dbSDimitry Andric SE.getTypeSizeInBits(CE->getType()) == 147*5ffd83dbSDimitry Andric SE.getTypeSizeInBits(CE->getOperand(0)->getType())) 148*5ffd83dbSDimitry Andric return CE->getOperand(0); 149*5ffd83dbSDimitry Andric } 150*5ffd83dbSDimitry Andric 151*5ffd83dbSDimitry Andric // Fold a cast of a constant. 152*5ffd83dbSDimitry Andric if (Constant *C = dyn_cast<Constant>(V)) 153*5ffd83dbSDimitry Andric return ConstantExpr::getCast(Op, C, Ty); 154*5ffd83dbSDimitry Andric 155*5ffd83dbSDimitry Andric // Cast the argument at the beginning of the entry block, after 156*5ffd83dbSDimitry Andric // any bitcasts of other arguments. 157*5ffd83dbSDimitry Andric if (Argument *A = dyn_cast<Argument>(V)) { 158*5ffd83dbSDimitry Andric BasicBlock::iterator IP = A->getParent()->getEntryBlock().begin(); 159*5ffd83dbSDimitry Andric while ((isa<BitCastInst>(IP) && 160*5ffd83dbSDimitry Andric isa<Argument>(cast<BitCastInst>(IP)->getOperand(0)) && 161*5ffd83dbSDimitry Andric cast<BitCastInst>(IP)->getOperand(0) != A) || 162*5ffd83dbSDimitry Andric isa<DbgInfoIntrinsic>(IP)) 163*5ffd83dbSDimitry Andric ++IP; 164*5ffd83dbSDimitry Andric return ReuseOrCreateCast(A, Ty, Op, IP); 165*5ffd83dbSDimitry Andric } 166*5ffd83dbSDimitry Andric 167*5ffd83dbSDimitry Andric // Cast the instruction immediately after the instruction. 168*5ffd83dbSDimitry Andric Instruction *I = cast<Instruction>(V); 169*5ffd83dbSDimitry Andric BasicBlock::iterator IP = findInsertPointAfter(I, Builder.GetInsertBlock()); 170*5ffd83dbSDimitry Andric return ReuseOrCreateCast(I, Ty, Op, IP); 171*5ffd83dbSDimitry Andric } 172*5ffd83dbSDimitry Andric 173*5ffd83dbSDimitry Andric /// InsertBinop - Insert the specified binary operator, doing a small amount 174*5ffd83dbSDimitry Andric /// of work to avoid inserting an obviously redundant operation, and hoisting 175*5ffd83dbSDimitry Andric /// to an outer loop when the opportunity is there and it is safe. 176*5ffd83dbSDimitry Andric Value *SCEVExpander::InsertBinop(Instruction::BinaryOps Opcode, 177*5ffd83dbSDimitry Andric Value *LHS, Value *RHS, 178*5ffd83dbSDimitry Andric SCEV::NoWrapFlags Flags, bool IsSafeToHoist) { 179*5ffd83dbSDimitry Andric // Fold a binop with constant operands. 180*5ffd83dbSDimitry Andric if (Constant *CLHS = dyn_cast<Constant>(LHS)) 181*5ffd83dbSDimitry Andric if (Constant *CRHS = dyn_cast<Constant>(RHS)) 182*5ffd83dbSDimitry Andric return ConstantExpr::get(Opcode, CLHS, CRHS); 183*5ffd83dbSDimitry Andric 184*5ffd83dbSDimitry Andric // Do a quick scan to see if we have this binop nearby. If so, reuse it. 185*5ffd83dbSDimitry Andric unsigned ScanLimit = 6; 186*5ffd83dbSDimitry Andric BasicBlock::iterator BlockBegin = Builder.GetInsertBlock()->begin(); 187*5ffd83dbSDimitry Andric // Scanning starts from the last instruction before the insertion point. 188*5ffd83dbSDimitry Andric BasicBlock::iterator IP = Builder.GetInsertPoint(); 189*5ffd83dbSDimitry Andric if (IP != BlockBegin) { 190*5ffd83dbSDimitry Andric --IP; 191*5ffd83dbSDimitry Andric for (; ScanLimit; --IP, --ScanLimit) { 192*5ffd83dbSDimitry Andric // Don't count dbg.value against the ScanLimit, to avoid perturbing the 193*5ffd83dbSDimitry Andric // generated code. 194*5ffd83dbSDimitry Andric if (isa<DbgInfoIntrinsic>(IP)) 195*5ffd83dbSDimitry Andric ScanLimit++; 196*5ffd83dbSDimitry Andric 197*5ffd83dbSDimitry Andric auto canGenerateIncompatiblePoison = [&Flags](Instruction *I) { 198*5ffd83dbSDimitry Andric // Ensure that no-wrap flags match. 199*5ffd83dbSDimitry Andric if (isa<OverflowingBinaryOperator>(I)) { 200*5ffd83dbSDimitry Andric if (I->hasNoSignedWrap() != (Flags & SCEV::FlagNSW)) 201*5ffd83dbSDimitry Andric return true; 202*5ffd83dbSDimitry Andric if (I->hasNoUnsignedWrap() != (Flags & SCEV::FlagNUW)) 203*5ffd83dbSDimitry Andric return true; 204*5ffd83dbSDimitry Andric } 205*5ffd83dbSDimitry Andric // Conservatively, do not use any instruction which has any of exact 206*5ffd83dbSDimitry Andric // flags installed. 207*5ffd83dbSDimitry Andric if (isa<PossiblyExactOperator>(I) && I->isExact()) 208*5ffd83dbSDimitry Andric return true; 209*5ffd83dbSDimitry Andric return false; 210*5ffd83dbSDimitry Andric }; 211*5ffd83dbSDimitry Andric if (IP->getOpcode() == (unsigned)Opcode && IP->getOperand(0) == LHS && 212*5ffd83dbSDimitry Andric IP->getOperand(1) == RHS && !canGenerateIncompatiblePoison(&*IP)) 213*5ffd83dbSDimitry Andric return &*IP; 214*5ffd83dbSDimitry Andric if (IP == BlockBegin) break; 215*5ffd83dbSDimitry Andric } 216*5ffd83dbSDimitry Andric } 217*5ffd83dbSDimitry Andric 218*5ffd83dbSDimitry Andric // Save the original insertion point so we can restore it when we're done. 219*5ffd83dbSDimitry Andric DebugLoc Loc = Builder.GetInsertPoint()->getDebugLoc(); 220*5ffd83dbSDimitry Andric SCEVInsertPointGuard Guard(Builder, this); 221*5ffd83dbSDimitry Andric 222*5ffd83dbSDimitry Andric if (IsSafeToHoist) { 223*5ffd83dbSDimitry Andric // Move the insertion point out of as many loops as we can. 224*5ffd83dbSDimitry Andric while (const Loop *L = SE.LI.getLoopFor(Builder.GetInsertBlock())) { 225*5ffd83dbSDimitry Andric if (!L->isLoopInvariant(LHS) || !L->isLoopInvariant(RHS)) break; 226*5ffd83dbSDimitry Andric BasicBlock *Preheader = L->getLoopPreheader(); 227*5ffd83dbSDimitry Andric if (!Preheader) break; 228*5ffd83dbSDimitry Andric 229*5ffd83dbSDimitry Andric // Ok, move up a level. 230*5ffd83dbSDimitry Andric Builder.SetInsertPoint(Preheader->getTerminator()); 231*5ffd83dbSDimitry Andric } 232*5ffd83dbSDimitry Andric } 233*5ffd83dbSDimitry Andric 234*5ffd83dbSDimitry Andric // If we haven't found this binop, insert it. 235*5ffd83dbSDimitry Andric Instruction *BO = cast<Instruction>(Builder.CreateBinOp(Opcode, LHS, RHS)); 236*5ffd83dbSDimitry Andric BO->setDebugLoc(Loc); 237*5ffd83dbSDimitry Andric if (Flags & SCEV::FlagNUW) 238*5ffd83dbSDimitry Andric BO->setHasNoUnsignedWrap(); 239*5ffd83dbSDimitry Andric if (Flags & SCEV::FlagNSW) 240*5ffd83dbSDimitry Andric BO->setHasNoSignedWrap(); 241*5ffd83dbSDimitry Andric rememberInstruction(BO); 242*5ffd83dbSDimitry Andric 243*5ffd83dbSDimitry Andric return BO; 244*5ffd83dbSDimitry Andric } 245*5ffd83dbSDimitry Andric 246*5ffd83dbSDimitry Andric /// FactorOutConstant - Test if S is divisible by Factor, using signed 247*5ffd83dbSDimitry Andric /// division. If so, update S with Factor divided out and return true. 248*5ffd83dbSDimitry Andric /// S need not be evenly divisible if a reasonable remainder can be 249*5ffd83dbSDimitry Andric /// computed. 250*5ffd83dbSDimitry Andric static bool FactorOutConstant(const SCEV *&S, const SCEV *&Remainder, 251*5ffd83dbSDimitry Andric const SCEV *Factor, ScalarEvolution &SE, 252*5ffd83dbSDimitry Andric const DataLayout &DL) { 253*5ffd83dbSDimitry Andric // Everything is divisible by one. 254*5ffd83dbSDimitry Andric if (Factor->isOne()) 255*5ffd83dbSDimitry Andric return true; 256*5ffd83dbSDimitry Andric 257*5ffd83dbSDimitry Andric // x/x == 1. 258*5ffd83dbSDimitry Andric if (S == Factor) { 259*5ffd83dbSDimitry Andric S = SE.getConstant(S->getType(), 1); 260*5ffd83dbSDimitry Andric return true; 261*5ffd83dbSDimitry Andric } 262*5ffd83dbSDimitry Andric 263*5ffd83dbSDimitry Andric // For a Constant, check for a multiple of the given factor. 264*5ffd83dbSDimitry Andric if (const SCEVConstant *C = dyn_cast<SCEVConstant>(S)) { 265*5ffd83dbSDimitry Andric // 0/x == 0. 266*5ffd83dbSDimitry Andric if (C->isZero()) 267*5ffd83dbSDimitry Andric return true; 268*5ffd83dbSDimitry Andric // Check for divisibility. 269*5ffd83dbSDimitry Andric if (const SCEVConstant *FC = dyn_cast<SCEVConstant>(Factor)) { 270*5ffd83dbSDimitry Andric ConstantInt *CI = 271*5ffd83dbSDimitry Andric ConstantInt::get(SE.getContext(), C->getAPInt().sdiv(FC->getAPInt())); 272*5ffd83dbSDimitry Andric // If the quotient is zero and the remainder is non-zero, reject 273*5ffd83dbSDimitry Andric // the value at this scale. It will be considered for subsequent 274*5ffd83dbSDimitry Andric // smaller scales. 275*5ffd83dbSDimitry Andric if (!CI->isZero()) { 276*5ffd83dbSDimitry Andric const SCEV *Div = SE.getConstant(CI); 277*5ffd83dbSDimitry Andric S = Div; 278*5ffd83dbSDimitry Andric Remainder = SE.getAddExpr( 279*5ffd83dbSDimitry Andric Remainder, SE.getConstant(C->getAPInt().srem(FC->getAPInt()))); 280*5ffd83dbSDimitry Andric return true; 281*5ffd83dbSDimitry Andric } 282*5ffd83dbSDimitry Andric } 283*5ffd83dbSDimitry Andric } 284*5ffd83dbSDimitry Andric 285*5ffd83dbSDimitry Andric // In a Mul, check if there is a constant operand which is a multiple 286*5ffd83dbSDimitry Andric // of the given factor. 287*5ffd83dbSDimitry Andric if (const SCEVMulExpr *M = dyn_cast<SCEVMulExpr>(S)) { 288*5ffd83dbSDimitry Andric // Size is known, check if there is a constant operand which is a multiple 289*5ffd83dbSDimitry Andric // of the given factor. If so, we can factor it. 290*5ffd83dbSDimitry Andric if (const SCEVConstant *FC = dyn_cast<SCEVConstant>(Factor)) 291*5ffd83dbSDimitry Andric if (const SCEVConstant *C = dyn_cast<SCEVConstant>(M->getOperand(0))) 292*5ffd83dbSDimitry Andric if (!C->getAPInt().srem(FC->getAPInt())) { 293*5ffd83dbSDimitry Andric SmallVector<const SCEV *, 4> NewMulOps(M->op_begin(), M->op_end()); 294*5ffd83dbSDimitry Andric NewMulOps[0] = SE.getConstant(C->getAPInt().sdiv(FC->getAPInt())); 295*5ffd83dbSDimitry Andric S = SE.getMulExpr(NewMulOps); 296*5ffd83dbSDimitry Andric return true; 297*5ffd83dbSDimitry Andric } 298*5ffd83dbSDimitry Andric } 299*5ffd83dbSDimitry Andric 300*5ffd83dbSDimitry Andric // In an AddRec, check if both start and step are divisible. 301*5ffd83dbSDimitry Andric if (const SCEVAddRecExpr *A = dyn_cast<SCEVAddRecExpr>(S)) { 302*5ffd83dbSDimitry Andric const SCEV *Step = A->getStepRecurrence(SE); 303*5ffd83dbSDimitry Andric const SCEV *StepRem = SE.getConstant(Step->getType(), 0); 304*5ffd83dbSDimitry Andric if (!FactorOutConstant(Step, StepRem, Factor, SE, DL)) 305*5ffd83dbSDimitry Andric return false; 306*5ffd83dbSDimitry Andric if (!StepRem->isZero()) 307*5ffd83dbSDimitry Andric return false; 308*5ffd83dbSDimitry Andric const SCEV *Start = A->getStart(); 309*5ffd83dbSDimitry Andric if (!FactorOutConstant(Start, Remainder, Factor, SE, DL)) 310*5ffd83dbSDimitry Andric return false; 311*5ffd83dbSDimitry Andric S = SE.getAddRecExpr(Start, Step, A->getLoop(), 312*5ffd83dbSDimitry Andric A->getNoWrapFlags(SCEV::FlagNW)); 313*5ffd83dbSDimitry Andric return true; 314*5ffd83dbSDimitry Andric } 315*5ffd83dbSDimitry Andric 316*5ffd83dbSDimitry Andric return false; 317*5ffd83dbSDimitry Andric } 318*5ffd83dbSDimitry Andric 319*5ffd83dbSDimitry Andric /// SimplifyAddOperands - Sort and simplify a list of add operands. NumAddRecs 320*5ffd83dbSDimitry Andric /// is the number of SCEVAddRecExprs present, which are kept at the end of 321*5ffd83dbSDimitry Andric /// the list. 322*5ffd83dbSDimitry Andric /// 323*5ffd83dbSDimitry Andric static void SimplifyAddOperands(SmallVectorImpl<const SCEV *> &Ops, 324*5ffd83dbSDimitry Andric Type *Ty, 325*5ffd83dbSDimitry Andric ScalarEvolution &SE) { 326*5ffd83dbSDimitry Andric unsigned NumAddRecs = 0; 327*5ffd83dbSDimitry Andric for (unsigned i = Ops.size(); i > 0 && isa<SCEVAddRecExpr>(Ops[i-1]); --i) 328*5ffd83dbSDimitry Andric ++NumAddRecs; 329*5ffd83dbSDimitry Andric // Group Ops into non-addrecs and addrecs. 330*5ffd83dbSDimitry Andric SmallVector<const SCEV *, 8> NoAddRecs(Ops.begin(), Ops.end() - NumAddRecs); 331*5ffd83dbSDimitry Andric SmallVector<const SCEV *, 8> AddRecs(Ops.end() - NumAddRecs, Ops.end()); 332*5ffd83dbSDimitry Andric // Let ScalarEvolution sort and simplify the non-addrecs list. 333*5ffd83dbSDimitry Andric const SCEV *Sum = NoAddRecs.empty() ? 334*5ffd83dbSDimitry Andric SE.getConstant(Ty, 0) : 335*5ffd83dbSDimitry Andric SE.getAddExpr(NoAddRecs); 336*5ffd83dbSDimitry Andric // If it returned an add, use the operands. Otherwise it simplified 337*5ffd83dbSDimitry Andric // the sum into a single value, so just use that. 338*5ffd83dbSDimitry Andric Ops.clear(); 339*5ffd83dbSDimitry Andric if (const SCEVAddExpr *Add = dyn_cast<SCEVAddExpr>(Sum)) 340*5ffd83dbSDimitry Andric Ops.append(Add->op_begin(), Add->op_end()); 341*5ffd83dbSDimitry Andric else if (!Sum->isZero()) 342*5ffd83dbSDimitry Andric Ops.push_back(Sum); 343*5ffd83dbSDimitry Andric // Then append the addrecs. 344*5ffd83dbSDimitry Andric Ops.append(AddRecs.begin(), AddRecs.end()); 345*5ffd83dbSDimitry Andric } 346*5ffd83dbSDimitry Andric 347*5ffd83dbSDimitry Andric /// SplitAddRecs - Flatten a list of add operands, moving addrec start values 348*5ffd83dbSDimitry Andric /// out to the top level. For example, convert {a + b,+,c} to a, b, {0,+,d}. 349*5ffd83dbSDimitry Andric /// This helps expose more opportunities for folding parts of the expressions 350*5ffd83dbSDimitry Andric /// into GEP indices. 351*5ffd83dbSDimitry Andric /// 352*5ffd83dbSDimitry Andric static void SplitAddRecs(SmallVectorImpl<const SCEV *> &Ops, 353*5ffd83dbSDimitry Andric Type *Ty, 354*5ffd83dbSDimitry Andric ScalarEvolution &SE) { 355*5ffd83dbSDimitry Andric // Find the addrecs. 356*5ffd83dbSDimitry Andric SmallVector<const SCEV *, 8> AddRecs; 357*5ffd83dbSDimitry Andric for (unsigned i = 0, e = Ops.size(); i != e; ++i) 358*5ffd83dbSDimitry Andric while (const SCEVAddRecExpr *A = dyn_cast<SCEVAddRecExpr>(Ops[i])) { 359*5ffd83dbSDimitry Andric const SCEV *Start = A->getStart(); 360*5ffd83dbSDimitry Andric if (Start->isZero()) break; 361*5ffd83dbSDimitry Andric const SCEV *Zero = SE.getConstant(Ty, 0); 362*5ffd83dbSDimitry Andric AddRecs.push_back(SE.getAddRecExpr(Zero, 363*5ffd83dbSDimitry Andric A->getStepRecurrence(SE), 364*5ffd83dbSDimitry Andric A->getLoop(), 365*5ffd83dbSDimitry Andric A->getNoWrapFlags(SCEV::FlagNW))); 366*5ffd83dbSDimitry Andric if (const SCEVAddExpr *Add = dyn_cast<SCEVAddExpr>(Start)) { 367*5ffd83dbSDimitry Andric Ops[i] = Zero; 368*5ffd83dbSDimitry Andric Ops.append(Add->op_begin(), Add->op_end()); 369*5ffd83dbSDimitry Andric e += Add->getNumOperands(); 370*5ffd83dbSDimitry Andric } else { 371*5ffd83dbSDimitry Andric Ops[i] = Start; 372*5ffd83dbSDimitry Andric } 373*5ffd83dbSDimitry Andric } 374*5ffd83dbSDimitry Andric if (!AddRecs.empty()) { 375*5ffd83dbSDimitry Andric // Add the addrecs onto the end of the list. 376*5ffd83dbSDimitry Andric Ops.append(AddRecs.begin(), AddRecs.end()); 377*5ffd83dbSDimitry Andric // Resort the operand list, moving any constants to the front. 378*5ffd83dbSDimitry Andric SimplifyAddOperands(Ops, Ty, SE); 379*5ffd83dbSDimitry Andric } 380*5ffd83dbSDimitry Andric } 381*5ffd83dbSDimitry Andric 382*5ffd83dbSDimitry Andric /// expandAddToGEP - Expand an addition expression with a pointer type into 383*5ffd83dbSDimitry Andric /// a GEP instead of using ptrtoint+arithmetic+inttoptr. This helps 384*5ffd83dbSDimitry Andric /// BasicAliasAnalysis and other passes analyze the result. See the rules 385*5ffd83dbSDimitry Andric /// for getelementptr vs. inttoptr in 386*5ffd83dbSDimitry Andric /// http://llvm.org/docs/LangRef.html#pointeraliasing 387*5ffd83dbSDimitry Andric /// for details. 388*5ffd83dbSDimitry Andric /// 389*5ffd83dbSDimitry Andric /// Design note: The correctness of using getelementptr here depends on 390*5ffd83dbSDimitry Andric /// ScalarEvolution not recognizing inttoptr and ptrtoint operators, as 391*5ffd83dbSDimitry Andric /// they may introduce pointer arithmetic which may not be safely converted 392*5ffd83dbSDimitry Andric /// into getelementptr. 393*5ffd83dbSDimitry Andric /// 394*5ffd83dbSDimitry Andric /// Design note: It might seem desirable for this function to be more 395*5ffd83dbSDimitry Andric /// loop-aware. If some of the indices are loop-invariant while others 396*5ffd83dbSDimitry Andric /// aren't, it might seem desirable to emit multiple GEPs, keeping the 397*5ffd83dbSDimitry Andric /// loop-invariant portions of the overall computation outside the loop. 398*5ffd83dbSDimitry Andric /// However, there are a few reasons this is not done here. Hoisting simple 399*5ffd83dbSDimitry Andric /// arithmetic is a low-level optimization that often isn't very 400*5ffd83dbSDimitry Andric /// important until late in the optimization process. In fact, passes 401*5ffd83dbSDimitry Andric /// like InstructionCombining will combine GEPs, even if it means 402*5ffd83dbSDimitry Andric /// pushing loop-invariant computation down into loops, so even if the 403*5ffd83dbSDimitry Andric /// GEPs were split here, the work would quickly be undone. The 404*5ffd83dbSDimitry Andric /// LoopStrengthReduction pass, which is usually run quite late (and 405*5ffd83dbSDimitry Andric /// after the last InstructionCombining pass), takes care of hoisting 406*5ffd83dbSDimitry Andric /// loop-invariant portions of expressions, after considering what 407*5ffd83dbSDimitry Andric /// can be folded using target addressing modes. 408*5ffd83dbSDimitry Andric /// 409*5ffd83dbSDimitry Andric Value *SCEVExpander::expandAddToGEP(const SCEV *const *op_begin, 410*5ffd83dbSDimitry Andric const SCEV *const *op_end, 411*5ffd83dbSDimitry Andric PointerType *PTy, 412*5ffd83dbSDimitry Andric Type *Ty, 413*5ffd83dbSDimitry Andric Value *V) { 414*5ffd83dbSDimitry Andric Type *OriginalElTy = PTy->getElementType(); 415*5ffd83dbSDimitry Andric Type *ElTy = OriginalElTy; 416*5ffd83dbSDimitry Andric SmallVector<Value *, 4> GepIndices; 417*5ffd83dbSDimitry Andric SmallVector<const SCEV *, 8> Ops(op_begin, op_end); 418*5ffd83dbSDimitry Andric bool AnyNonZeroIndices = false; 419*5ffd83dbSDimitry Andric 420*5ffd83dbSDimitry Andric // Split AddRecs up into parts as either of the parts may be usable 421*5ffd83dbSDimitry Andric // without the other. 422*5ffd83dbSDimitry Andric SplitAddRecs(Ops, Ty, SE); 423*5ffd83dbSDimitry Andric 424*5ffd83dbSDimitry Andric Type *IntIdxTy = DL.getIndexType(PTy); 425*5ffd83dbSDimitry Andric 426*5ffd83dbSDimitry Andric // Descend down the pointer's type and attempt to convert the other 427*5ffd83dbSDimitry Andric // operands into GEP indices, at each level. The first index in a GEP 428*5ffd83dbSDimitry Andric // indexes into the array implied by the pointer operand; the rest of 429*5ffd83dbSDimitry Andric // the indices index into the element or field type selected by the 430*5ffd83dbSDimitry Andric // preceding index. 431*5ffd83dbSDimitry Andric for (;;) { 432*5ffd83dbSDimitry Andric // If the scale size is not 0, attempt to factor out a scale for 433*5ffd83dbSDimitry Andric // array indexing. 434*5ffd83dbSDimitry Andric SmallVector<const SCEV *, 8> ScaledOps; 435*5ffd83dbSDimitry Andric if (ElTy->isSized()) { 436*5ffd83dbSDimitry Andric const SCEV *ElSize = SE.getSizeOfExpr(IntIdxTy, ElTy); 437*5ffd83dbSDimitry Andric if (!ElSize->isZero()) { 438*5ffd83dbSDimitry Andric SmallVector<const SCEV *, 8> NewOps; 439*5ffd83dbSDimitry Andric for (const SCEV *Op : Ops) { 440*5ffd83dbSDimitry Andric const SCEV *Remainder = SE.getConstant(Ty, 0); 441*5ffd83dbSDimitry Andric if (FactorOutConstant(Op, Remainder, ElSize, SE, DL)) { 442*5ffd83dbSDimitry Andric // Op now has ElSize factored out. 443*5ffd83dbSDimitry Andric ScaledOps.push_back(Op); 444*5ffd83dbSDimitry Andric if (!Remainder->isZero()) 445*5ffd83dbSDimitry Andric NewOps.push_back(Remainder); 446*5ffd83dbSDimitry Andric AnyNonZeroIndices = true; 447*5ffd83dbSDimitry Andric } else { 448*5ffd83dbSDimitry Andric // The operand was not divisible, so add it to the list of operands 449*5ffd83dbSDimitry Andric // we'll scan next iteration. 450*5ffd83dbSDimitry Andric NewOps.push_back(Op); 451*5ffd83dbSDimitry Andric } 452*5ffd83dbSDimitry Andric } 453*5ffd83dbSDimitry Andric // If we made any changes, update Ops. 454*5ffd83dbSDimitry Andric if (!ScaledOps.empty()) { 455*5ffd83dbSDimitry Andric Ops = NewOps; 456*5ffd83dbSDimitry Andric SimplifyAddOperands(Ops, Ty, SE); 457*5ffd83dbSDimitry Andric } 458*5ffd83dbSDimitry Andric } 459*5ffd83dbSDimitry Andric } 460*5ffd83dbSDimitry Andric 461*5ffd83dbSDimitry Andric // Record the scaled array index for this level of the type. If 462*5ffd83dbSDimitry Andric // we didn't find any operands that could be factored, tentatively 463*5ffd83dbSDimitry Andric // assume that element zero was selected (since the zero offset 464*5ffd83dbSDimitry Andric // would obviously be folded away). 465*5ffd83dbSDimitry Andric Value *Scaled = ScaledOps.empty() ? 466*5ffd83dbSDimitry Andric Constant::getNullValue(Ty) : 467*5ffd83dbSDimitry Andric expandCodeFor(SE.getAddExpr(ScaledOps), Ty); 468*5ffd83dbSDimitry Andric GepIndices.push_back(Scaled); 469*5ffd83dbSDimitry Andric 470*5ffd83dbSDimitry Andric // Collect struct field index operands. 471*5ffd83dbSDimitry Andric while (StructType *STy = dyn_cast<StructType>(ElTy)) { 472*5ffd83dbSDimitry Andric bool FoundFieldNo = false; 473*5ffd83dbSDimitry Andric // An empty struct has no fields. 474*5ffd83dbSDimitry Andric if (STy->getNumElements() == 0) break; 475*5ffd83dbSDimitry Andric // Field offsets are known. See if a constant offset falls within any of 476*5ffd83dbSDimitry Andric // the struct fields. 477*5ffd83dbSDimitry Andric if (Ops.empty()) 478*5ffd83dbSDimitry Andric break; 479*5ffd83dbSDimitry Andric if (const SCEVConstant *C = dyn_cast<SCEVConstant>(Ops[0])) 480*5ffd83dbSDimitry Andric if (SE.getTypeSizeInBits(C->getType()) <= 64) { 481*5ffd83dbSDimitry Andric const StructLayout &SL = *DL.getStructLayout(STy); 482*5ffd83dbSDimitry Andric uint64_t FullOffset = C->getValue()->getZExtValue(); 483*5ffd83dbSDimitry Andric if (FullOffset < SL.getSizeInBytes()) { 484*5ffd83dbSDimitry Andric unsigned ElIdx = SL.getElementContainingOffset(FullOffset); 485*5ffd83dbSDimitry Andric GepIndices.push_back( 486*5ffd83dbSDimitry Andric ConstantInt::get(Type::getInt32Ty(Ty->getContext()), ElIdx)); 487*5ffd83dbSDimitry Andric ElTy = STy->getTypeAtIndex(ElIdx); 488*5ffd83dbSDimitry Andric Ops[0] = 489*5ffd83dbSDimitry Andric SE.getConstant(Ty, FullOffset - SL.getElementOffset(ElIdx)); 490*5ffd83dbSDimitry Andric AnyNonZeroIndices = true; 491*5ffd83dbSDimitry Andric FoundFieldNo = true; 492*5ffd83dbSDimitry Andric } 493*5ffd83dbSDimitry Andric } 494*5ffd83dbSDimitry Andric // If no struct field offsets were found, tentatively assume that 495*5ffd83dbSDimitry Andric // field zero was selected (since the zero offset would obviously 496*5ffd83dbSDimitry Andric // be folded away). 497*5ffd83dbSDimitry Andric if (!FoundFieldNo) { 498*5ffd83dbSDimitry Andric ElTy = STy->getTypeAtIndex(0u); 499*5ffd83dbSDimitry Andric GepIndices.push_back( 500*5ffd83dbSDimitry Andric Constant::getNullValue(Type::getInt32Ty(Ty->getContext()))); 501*5ffd83dbSDimitry Andric } 502*5ffd83dbSDimitry Andric } 503*5ffd83dbSDimitry Andric 504*5ffd83dbSDimitry Andric if (ArrayType *ATy = dyn_cast<ArrayType>(ElTy)) 505*5ffd83dbSDimitry Andric ElTy = ATy->getElementType(); 506*5ffd83dbSDimitry Andric else 507*5ffd83dbSDimitry Andric // FIXME: Handle VectorType. 508*5ffd83dbSDimitry Andric // E.g., If ElTy is scalable vector, then ElSize is not a compile-time 509*5ffd83dbSDimitry Andric // constant, therefore can not be factored out. The generated IR is less 510*5ffd83dbSDimitry Andric // ideal with base 'V' cast to i8* and do ugly getelementptr over that. 511*5ffd83dbSDimitry Andric break; 512*5ffd83dbSDimitry Andric } 513*5ffd83dbSDimitry Andric 514*5ffd83dbSDimitry Andric // If none of the operands were convertible to proper GEP indices, cast 515*5ffd83dbSDimitry Andric // the base to i8* and do an ugly getelementptr with that. It's still 516*5ffd83dbSDimitry Andric // better than ptrtoint+arithmetic+inttoptr at least. 517*5ffd83dbSDimitry Andric if (!AnyNonZeroIndices) { 518*5ffd83dbSDimitry Andric // Cast the base to i8*. 519*5ffd83dbSDimitry Andric V = InsertNoopCastOfTo(V, 520*5ffd83dbSDimitry Andric Type::getInt8PtrTy(Ty->getContext(), PTy->getAddressSpace())); 521*5ffd83dbSDimitry Andric 522*5ffd83dbSDimitry Andric assert(!isa<Instruction>(V) || 523*5ffd83dbSDimitry Andric SE.DT.dominates(cast<Instruction>(V), &*Builder.GetInsertPoint())); 524*5ffd83dbSDimitry Andric 525*5ffd83dbSDimitry Andric // Expand the operands for a plain byte offset. 526*5ffd83dbSDimitry Andric Value *Idx = expandCodeFor(SE.getAddExpr(Ops), Ty); 527*5ffd83dbSDimitry Andric 528*5ffd83dbSDimitry Andric // Fold a GEP with constant operands. 529*5ffd83dbSDimitry Andric if (Constant *CLHS = dyn_cast<Constant>(V)) 530*5ffd83dbSDimitry Andric if (Constant *CRHS = dyn_cast<Constant>(Idx)) 531*5ffd83dbSDimitry Andric return ConstantExpr::getGetElementPtr(Type::getInt8Ty(Ty->getContext()), 532*5ffd83dbSDimitry Andric CLHS, CRHS); 533*5ffd83dbSDimitry Andric 534*5ffd83dbSDimitry Andric // Do a quick scan to see if we have this GEP nearby. If so, reuse it. 535*5ffd83dbSDimitry Andric unsigned ScanLimit = 6; 536*5ffd83dbSDimitry Andric BasicBlock::iterator BlockBegin = Builder.GetInsertBlock()->begin(); 537*5ffd83dbSDimitry Andric // Scanning starts from the last instruction before the insertion point. 538*5ffd83dbSDimitry Andric BasicBlock::iterator IP = Builder.GetInsertPoint(); 539*5ffd83dbSDimitry Andric if (IP != BlockBegin) { 540*5ffd83dbSDimitry Andric --IP; 541*5ffd83dbSDimitry Andric for (; ScanLimit; --IP, --ScanLimit) { 542*5ffd83dbSDimitry Andric // Don't count dbg.value against the ScanLimit, to avoid perturbing the 543*5ffd83dbSDimitry Andric // generated code. 544*5ffd83dbSDimitry Andric if (isa<DbgInfoIntrinsic>(IP)) 545*5ffd83dbSDimitry Andric ScanLimit++; 546*5ffd83dbSDimitry Andric if (IP->getOpcode() == Instruction::GetElementPtr && 547*5ffd83dbSDimitry Andric IP->getOperand(0) == V && IP->getOperand(1) == Idx) 548*5ffd83dbSDimitry Andric return &*IP; 549*5ffd83dbSDimitry Andric if (IP == BlockBegin) break; 550*5ffd83dbSDimitry Andric } 551*5ffd83dbSDimitry Andric } 552*5ffd83dbSDimitry Andric 553*5ffd83dbSDimitry Andric // Save the original insertion point so we can restore it when we're done. 554*5ffd83dbSDimitry Andric SCEVInsertPointGuard Guard(Builder, this); 555*5ffd83dbSDimitry Andric 556*5ffd83dbSDimitry Andric // Move the insertion point out of as many loops as we can. 557*5ffd83dbSDimitry Andric while (const Loop *L = SE.LI.getLoopFor(Builder.GetInsertBlock())) { 558*5ffd83dbSDimitry Andric if (!L->isLoopInvariant(V) || !L->isLoopInvariant(Idx)) break; 559*5ffd83dbSDimitry Andric BasicBlock *Preheader = L->getLoopPreheader(); 560*5ffd83dbSDimitry Andric if (!Preheader) break; 561*5ffd83dbSDimitry Andric 562*5ffd83dbSDimitry Andric // Ok, move up a level. 563*5ffd83dbSDimitry Andric Builder.SetInsertPoint(Preheader->getTerminator()); 564*5ffd83dbSDimitry Andric } 565*5ffd83dbSDimitry Andric 566*5ffd83dbSDimitry Andric // Emit a GEP. 567*5ffd83dbSDimitry Andric Value *GEP = Builder.CreateGEP(Builder.getInt8Ty(), V, Idx, "uglygep"); 568*5ffd83dbSDimitry Andric rememberInstruction(GEP); 569*5ffd83dbSDimitry Andric 570*5ffd83dbSDimitry Andric return GEP; 571*5ffd83dbSDimitry Andric } 572*5ffd83dbSDimitry Andric 573*5ffd83dbSDimitry Andric { 574*5ffd83dbSDimitry Andric SCEVInsertPointGuard Guard(Builder, this); 575*5ffd83dbSDimitry Andric 576*5ffd83dbSDimitry Andric // Move the insertion point out of as many loops as we can. 577*5ffd83dbSDimitry Andric while (const Loop *L = SE.LI.getLoopFor(Builder.GetInsertBlock())) { 578*5ffd83dbSDimitry Andric if (!L->isLoopInvariant(V)) break; 579*5ffd83dbSDimitry Andric 580*5ffd83dbSDimitry Andric bool AnyIndexNotLoopInvariant = any_of( 581*5ffd83dbSDimitry Andric GepIndices, [L](Value *Op) { return !L->isLoopInvariant(Op); }); 582*5ffd83dbSDimitry Andric 583*5ffd83dbSDimitry Andric if (AnyIndexNotLoopInvariant) 584*5ffd83dbSDimitry Andric break; 585*5ffd83dbSDimitry Andric 586*5ffd83dbSDimitry Andric BasicBlock *Preheader = L->getLoopPreheader(); 587*5ffd83dbSDimitry Andric if (!Preheader) break; 588*5ffd83dbSDimitry Andric 589*5ffd83dbSDimitry Andric // Ok, move up a level. 590*5ffd83dbSDimitry Andric Builder.SetInsertPoint(Preheader->getTerminator()); 591*5ffd83dbSDimitry Andric } 592*5ffd83dbSDimitry Andric 593*5ffd83dbSDimitry Andric // Insert a pretty getelementptr. Note that this GEP is not marked inbounds, 594*5ffd83dbSDimitry Andric // because ScalarEvolution may have changed the address arithmetic to 595*5ffd83dbSDimitry Andric // compute a value which is beyond the end of the allocated object. 596*5ffd83dbSDimitry Andric Value *Casted = V; 597*5ffd83dbSDimitry Andric if (V->getType() != PTy) 598*5ffd83dbSDimitry Andric Casted = InsertNoopCastOfTo(Casted, PTy); 599*5ffd83dbSDimitry Andric Value *GEP = Builder.CreateGEP(OriginalElTy, Casted, GepIndices, "scevgep"); 600*5ffd83dbSDimitry Andric Ops.push_back(SE.getUnknown(GEP)); 601*5ffd83dbSDimitry Andric rememberInstruction(GEP); 602*5ffd83dbSDimitry Andric } 603*5ffd83dbSDimitry Andric 604*5ffd83dbSDimitry Andric return expand(SE.getAddExpr(Ops)); 605*5ffd83dbSDimitry Andric } 606*5ffd83dbSDimitry Andric 607*5ffd83dbSDimitry Andric Value *SCEVExpander::expandAddToGEP(const SCEV *Op, PointerType *PTy, Type *Ty, 608*5ffd83dbSDimitry Andric Value *V) { 609*5ffd83dbSDimitry Andric const SCEV *const Ops[1] = {Op}; 610*5ffd83dbSDimitry Andric return expandAddToGEP(Ops, Ops + 1, PTy, Ty, V); 611*5ffd83dbSDimitry Andric } 612*5ffd83dbSDimitry Andric 613*5ffd83dbSDimitry Andric /// PickMostRelevantLoop - Given two loops pick the one that's most relevant for 614*5ffd83dbSDimitry Andric /// SCEV expansion. If they are nested, this is the most nested. If they are 615*5ffd83dbSDimitry Andric /// neighboring, pick the later. 616*5ffd83dbSDimitry Andric static const Loop *PickMostRelevantLoop(const Loop *A, const Loop *B, 617*5ffd83dbSDimitry Andric DominatorTree &DT) { 618*5ffd83dbSDimitry Andric if (!A) return B; 619*5ffd83dbSDimitry Andric if (!B) return A; 620*5ffd83dbSDimitry Andric if (A->contains(B)) return B; 621*5ffd83dbSDimitry Andric if (B->contains(A)) return A; 622*5ffd83dbSDimitry Andric if (DT.dominates(A->getHeader(), B->getHeader())) return B; 623*5ffd83dbSDimitry Andric if (DT.dominates(B->getHeader(), A->getHeader())) return A; 624*5ffd83dbSDimitry Andric return A; // Arbitrarily break the tie. 625*5ffd83dbSDimitry Andric } 626*5ffd83dbSDimitry Andric 627*5ffd83dbSDimitry Andric /// getRelevantLoop - Get the most relevant loop associated with the given 628*5ffd83dbSDimitry Andric /// expression, according to PickMostRelevantLoop. 629*5ffd83dbSDimitry Andric const Loop *SCEVExpander::getRelevantLoop(const SCEV *S) { 630*5ffd83dbSDimitry Andric // Test whether we've already computed the most relevant loop for this SCEV. 631*5ffd83dbSDimitry Andric auto Pair = RelevantLoops.insert(std::make_pair(S, nullptr)); 632*5ffd83dbSDimitry Andric if (!Pair.second) 633*5ffd83dbSDimitry Andric return Pair.first->second; 634*5ffd83dbSDimitry Andric 635*5ffd83dbSDimitry Andric if (isa<SCEVConstant>(S)) 636*5ffd83dbSDimitry Andric // A constant has no relevant loops. 637*5ffd83dbSDimitry Andric return nullptr; 638*5ffd83dbSDimitry Andric if (const SCEVUnknown *U = dyn_cast<SCEVUnknown>(S)) { 639*5ffd83dbSDimitry Andric if (const Instruction *I = dyn_cast<Instruction>(U->getValue())) 640*5ffd83dbSDimitry Andric return Pair.first->second = SE.LI.getLoopFor(I->getParent()); 641*5ffd83dbSDimitry Andric // A non-instruction has no relevant loops. 642*5ffd83dbSDimitry Andric return nullptr; 643*5ffd83dbSDimitry Andric } 644*5ffd83dbSDimitry Andric if (const SCEVNAryExpr *N = dyn_cast<SCEVNAryExpr>(S)) { 645*5ffd83dbSDimitry Andric const Loop *L = nullptr; 646*5ffd83dbSDimitry Andric if (const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(S)) 647*5ffd83dbSDimitry Andric L = AR->getLoop(); 648*5ffd83dbSDimitry Andric for (const SCEV *Op : N->operands()) 649*5ffd83dbSDimitry Andric L = PickMostRelevantLoop(L, getRelevantLoop(Op), SE.DT); 650*5ffd83dbSDimitry Andric return RelevantLoops[N] = L; 651*5ffd83dbSDimitry Andric } 652*5ffd83dbSDimitry Andric if (const SCEVCastExpr *C = dyn_cast<SCEVCastExpr>(S)) { 653*5ffd83dbSDimitry Andric const Loop *Result = getRelevantLoop(C->getOperand()); 654*5ffd83dbSDimitry Andric return RelevantLoops[C] = Result; 655*5ffd83dbSDimitry Andric } 656*5ffd83dbSDimitry Andric if (const SCEVUDivExpr *D = dyn_cast<SCEVUDivExpr>(S)) { 657*5ffd83dbSDimitry Andric const Loop *Result = PickMostRelevantLoop( 658*5ffd83dbSDimitry Andric getRelevantLoop(D->getLHS()), getRelevantLoop(D->getRHS()), SE.DT); 659*5ffd83dbSDimitry Andric return RelevantLoops[D] = Result; 660*5ffd83dbSDimitry Andric } 661*5ffd83dbSDimitry Andric llvm_unreachable("Unexpected SCEV type!"); 662*5ffd83dbSDimitry Andric } 663*5ffd83dbSDimitry Andric 664*5ffd83dbSDimitry Andric namespace { 665*5ffd83dbSDimitry Andric 666*5ffd83dbSDimitry Andric /// LoopCompare - Compare loops by PickMostRelevantLoop. 667*5ffd83dbSDimitry Andric class LoopCompare { 668*5ffd83dbSDimitry Andric DominatorTree &DT; 669*5ffd83dbSDimitry Andric public: 670*5ffd83dbSDimitry Andric explicit LoopCompare(DominatorTree &dt) : DT(dt) {} 671*5ffd83dbSDimitry Andric 672*5ffd83dbSDimitry Andric bool operator()(std::pair<const Loop *, const SCEV *> LHS, 673*5ffd83dbSDimitry Andric std::pair<const Loop *, const SCEV *> RHS) const { 674*5ffd83dbSDimitry Andric // Keep pointer operands sorted at the end. 675*5ffd83dbSDimitry Andric if (LHS.second->getType()->isPointerTy() != 676*5ffd83dbSDimitry Andric RHS.second->getType()->isPointerTy()) 677*5ffd83dbSDimitry Andric return LHS.second->getType()->isPointerTy(); 678*5ffd83dbSDimitry Andric 679*5ffd83dbSDimitry Andric // Compare loops with PickMostRelevantLoop. 680*5ffd83dbSDimitry Andric if (LHS.first != RHS.first) 681*5ffd83dbSDimitry Andric return PickMostRelevantLoop(LHS.first, RHS.first, DT) != LHS.first; 682*5ffd83dbSDimitry Andric 683*5ffd83dbSDimitry Andric // If one operand is a non-constant negative and the other is not, 684*5ffd83dbSDimitry Andric // put the non-constant negative on the right so that a sub can 685*5ffd83dbSDimitry Andric // be used instead of a negate and add. 686*5ffd83dbSDimitry Andric if (LHS.second->isNonConstantNegative()) { 687*5ffd83dbSDimitry Andric if (!RHS.second->isNonConstantNegative()) 688*5ffd83dbSDimitry Andric return false; 689*5ffd83dbSDimitry Andric } else if (RHS.second->isNonConstantNegative()) 690*5ffd83dbSDimitry Andric return true; 691*5ffd83dbSDimitry Andric 692*5ffd83dbSDimitry Andric // Otherwise they are equivalent according to this comparison. 693*5ffd83dbSDimitry Andric return false; 694*5ffd83dbSDimitry Andric } 695*5ffd83dbSDimitry Andric }; 696*5ffd83dbSDimitry Andric 697*5ffd83dbSDimitry Andric } 698*5ffd83dbSDimitry Andric 699*5ffd83dbSDimitry Andric Value *SCEVExpander::visitAddExpr(const SCEVAddExpr *S) { 700*5ffd83dbSDimitry Andric Type *Ty = SE.getEffectiveSCEVType(S->getType()); 701*5ffd83dbSDimitry Andric 702*5ffd83dbSDimitry Andric // Collect all the add operands in a loop, along with their associated loops. 703*5ffd83dbSDimitry Andric // Iterate in reverse so that constants are emitted last, all else equal, and 704*5ffd83dbSDimitry Andric // so that pointer operands are inserted first, which the code below relies on 705*5ffd83dbSDimitry Andric // to form more involved GEPs. 706*5ffd83dbSDimitry Andric SmallVector<std::pair<const Loop *, const SCEV *>, 8> OpsAndLoops; 707*5ffd83dbSDimitry Andric for (std::reverse_iterator<SCEVAddExpr::op_iterator> I(S->op_end()), 708*5ffd83dbSDimitry Andric E(S->op_begin()); I != E; ++I) 709*5ffd83dbSDimitry Andric OpsAndLoops.push_back(std::make_pair(getRelevantLoop(*I), *I)); 710*5ffd83dbSDimitry Andric 711*5ffd83dbSDimitry Andric // Sort by loop. Use a stable sort so that constants follow non-constants and 712*5ffd83dbSDimitry Andric // pointer operands precede non-pointer operands. 713*5ffd83dbSDimitry Andric llvm::stable_sort(OpsAndLoops, LoopCompare(SE.DT)); 714*5ffd83dbSDimitry Andric 715*5ffd83dbSDimitry Andric // Emit instructions to add all the operands. Hoist as much as possible 716*5ffd83dbSDimitry Andric // out of loops, and form meaningful getelementptrs where possible. 717*5ffd83dbSDimitry Andric Value *Sum = nullptr; 718*5ffd83dbSDimitry Andric for (auto I = OpsAndLoops.begin(), E = OpsAndLoops.end(); I != E;) { 719*5ffd83dbSDimitry Andric const Loop *CurLoop = I->first; 720*5ffd83dbSDimitry Andric const SCEV *Op = I->second; 721*5ffd83dbSDimitry Andric if (!Sum) { 722*5ffd83dbSDimitry Andric // This is the first operand. Just expand it. 723*5ffd83dbSDimitry Andric Sum = expand(Op); 724*5ffd83dbSDimitry Andric ++I; 725*5ffd83dbSDimitry Andric } else if (PointerType *PTy = dyn_cast<PointerType>(Sum->getType())) { 726*5ffd83dbSDimitry Andric // The running sum expression is a pointer. Try to form a getelementptr 727*5ffd83dbSDimitry Andric // at this level with that as the base. 728*5ffd83dbSDimitry Andric SmallVector<const SCEV *, 4> NewOps; 729*5ffd83dbSDimitry Andric for (; I != E && I->first == CurLoop; ++I) { 730*5ffd83dbSDimitry Andric // If the operand is SCEVUnknown and not instructions, peek through 731*5ffd83dbSDimitry Andric // it, to enable more of it to be folded into the GEP. 732*5ffd83dbSDimitry Andric const SCEV *X = I->second; 733*5ffd83dbSDimitry Andric if (const SCEVUnknown *U = dyn_cast<SCEVUnknown>(X)) 734*5ffd83dbSDimitry Andric if (!isa<Instruction>(U->getValue())) 735*5ffd83dbSDimitry Andric X = SE.getSCEV(U->getValue()); 736*5ffd83dbSDimitry Andric NewOps.push_back(X); 737*5ffd83dbSDimitry Andric } 738*5ffd83dbSDimitry Andric Sum = expandAddToGEP(NewOps.begin(), NewOps.end(), PTy, Ty, Sum); 739*5ffd83dbSDimitry Andric } else if (PointerType *PTy = dyn_cast<PointerType>(Op->getType())) { 740*5ffd83dbSDimitry Andric // The running sum is an integer, and there's a pointer at this level. 741*5ffd83dbSDimitry Andric // Try to form a getelementptr. If the running sum is instructions, 742*5ffd83dbSDimitry Andric // use a SCEVUnknown to avoid re-analyzing them. 743*5ffd83dbSDimitry Andric SmallVector<const SCEV *, 4> NewOps; 744*5ffd83dbSDimitry Andric NewOps.push_back(isa<Instruction>(Sum) ? SE.getUnknown(Sum) : 745*5ffd83dbSDimitry Andric SE.getSCEV(Sum)); 746*5ffd83dbSDimitry Andric for (++I; I != E && I->first == CurLoop; ++I) 747*5ffd83dbSDimitry Andric NewOps.push_back(I->second); 748*5ffd83dbSDimitry Andric Sum = expandAddToGEP(NewOps.begin(), NewOps.end(), PTy, Ty, expand(Op)); 749*5ffd83dbSDimitry Andric } else if (Op->isNonConstantNegative()) { 750*5ffd83dbSDimitry Andric // Instead of doing a negate and add, just do a subtract. 751*5ffd83dbSDimitry Andric Value *W = expandCodeFor(SE.getNegativeSCEV(Op), Ty); 752*5ffd83dbSDimitry Andric Sum = InsertNoopCastOfTo(Sum, Ty); 753*5ffd83dbSDimitry Andric Sum = InsertBinop(Instruction::Sub, Sum, W, SCEV::FlagAnyWrap, 754*5ffd83dbSDimitry Andric /*IsSafeToHoist*/ true); 755*5ffd83dbSDimitry Andric ++I; 756*5ffd83dbSDimitry Andric } else { 757*5ffd83dbSDimitry Andric // A simple add. 758*5ffd83dbSDimitry Andric Value *W = expandCodeFor(Op, Ty); 759*5ffd83dbSDimitry Andric Sum = InsertNoopCastOfTo(Sum, Ty); 760*5ffd83dbSDimitry Andric // Canonicalize a constant to the RHS. 761*5ffd83dbSDimitry Andric if (isa<Constant>(Sum)) std::swap(Sum, W); 762*5ffd83dbSDimitry Andric Sum = InsertBinop(Instruction::Add, Sum, W, S->getNoWrapFlags(), 763*5ffd83dbSDimitry Andric /*IsSafeToHoist*/ true); 764*5ffd83dbSDimitry Andric ++I; 765*5ffd83dbSDimitry Andric } 766*5ffd83dbSDimitry Andric } 767*5ffd83dbSDimitry Andric 768*5ffd83dbSDimitry Andric return Sum; 769*5ffd83dbSDimitry Andric } 770*5ffd83dbSDimitry Andric 771*5ffd83dbSDimitry Andric Value *SCEVExpander::visitMulExpr(const SCEVMulExpr *S) { 772*5ffd83dbSDimitry Andric Type *Ty = SE.getEffectiveSCEVType(S->getType()); 773*5ffd83dbSDimitry Andric 774*5ffd83dbSDimitry Andric // Collect all the mul operands in a loop, along with their associated loops. 775*5ffd83dbSDimitry Andric // Iterate in reverse so that constants are emitted last, all else equal. 776*5ffd83dbSDimitry Andric SmallVector<std::pair<const Loop *, const SCEV *>, 8> OpsAndLoops; 777*5ffd83dbSDimitry Andric for (std::reverse_iterator<SCEVMulExpr::op_iterator> I(S->op_end()), 778*5ffd83dbSDimitry Andric E(S->op_begin()); I != E; ++I) 779*5ffd83dbSDimitry Andric OpsAndLoops.push_back(std::make_pair(getRelevantLoop(*I), *I)); 780*5ffd83dbSDimitry Andric 781*5ffd83dbSDimitry Andric // Sort by loop. Use a stable sort so that constants follow non-constants. 782*5ffd83dbSDimitry Andric llvm::stable_sort(OpsAndLoops, LoopCompare(SE.DT)); 783*5ffd83dbSDimitry Andric 784*5ffd83dbSDimitry Andric // Emit instructions to mul all the operands. Hoist as much as possible 785*5ffd83dbSDimitry Andric // out of loops. 786*5ffd83dbSDimitry Andric Value *Prod = nullptr; 787*5ffd83dbSDimitry Andric auto I = OpsAndLoops.begin(); 788*5ffd83dbSDimitry Andric 789*5ffd83dbSDimitry Andric // Expand the calculation of X pow N in the following manner: 790*5ffd83dbSDimitry Andric // Let N = P1 + P2 + ... + PK, where all P are powers of 2. Then: 791*5ffd83dbSDimitry Andric // X pow N = (X pow P1) * (X pow P2) * ... * (X pow PK). 792*5ffd83dbSDimitry Andric const auto ExpandOpBinPowN = [this, &I, &OpsAndLoops, &Ty]() { 793*5ffd83dbSDimitry Andric auto E = I; 794*5ffd83dbSDimitry Andric // Calculate how many times the same operand from the same loop is included 795*5ffd83dbSDimitry Andric // into this power. 796*5ffd83dbSDimitry Andric uint64_t Exponent = 0; 797*5ffd83dbSDimitry Andric const uint64_t MaxExponent = UINT64_MAX >> 1; 798*5ffd83dbSDimitry Andric // No one sane will ever try to calculate such huge exponents, but if we 799*5ffd83dbSDimitry Andric // need this, we stop on UINT64_MAX / 2 because we need to exit the loop 800*5ffd83dbSDimitry Andric // below when the power of 2 exceeds our Exponent, and we want it to be 801*5ffd83dbSDimitry Andric // 1u << 31 at most to not deal with unsigned overflow. 802*5ffd83dbSDimitry Andric while (E != OpsAndLoops.end() && *I == *E && Exponent != MaxExponent) { 803*5ffd83dbSDimitry Andric ++Exponent; 804*5ffd83dbSDimitry Andric ++E; 805*5ffd83dbSDimitry Andric } 806*5ffd83dbSDimitry Andric assert(Exponent > 0 && "Trying to calculate a zeroth exponent of operand?"); 807*5ffd83dbSDimitry Andric 808*5ffd83dbSDimitry Andric // Calculate powers with exponents 1, 2, 4, 8 etc. and include those of them 809*5ffd83dbSDimitry Andric // that are needed into the result. 810*5ffd83dbSDimitry Andric Value *P = expandCodeFor(I->second, Ty); 811*5ffd83dbSDimitry Andric Value *Result = nullptr; 812*5ffd83dbSDimitry Andric if (Exponent & 1) 813*5ffd83dbSDimitry Andric Result = P; 814*5ffd83dbSDimitry Andric for (uint64_t BinExp = 2; BinExp <= Exponent; BinExp <<= 1) { 815*5ffd83dbSDimitry Andric P = InsertBinop(Instruction::Mul, P, P, SCEV::FlagAnyWrap, 816*5ffd83dbSDimitry Andric /*IsSafeToHoist*/ true); 817*5ffd83dbSDimitry Andric if (Exponent & BinExp) 818*5ffd83dbSDimitry Andric Result = Result ? InsertBinop(Instruction::Mul, Result, P, 819*5ffd83dbSDimitry Andric SCEV::FlagAnyWrap, 820*5ffd83dbSDimitry Andric /*IsSafeToHoist*/ true) 821*5ffd83dbSDimitry Andric : P; 822*5ffd83dbSDimitry Andric } 823*5ffd83dbSDimitry Andric 824*5ffd83dbSDimitry Andric I = E; 825*5ffd83dbSDimitry Andric assert(Result && "Nothing was expanded?"); 826*5ffd83dbSDimitry Andric return Result; 827*5ffd83dbSDimitry Andric }; 828*5ffd83dbSDimitry Andric 829*5ffd83dbSDimitry Andric while (I != OpsAndLoops.end()) { 830*5ffd83dbSDimitry Andric if (!Prod) { 831*5ffd83dbSDimitry Andric // This is the first operand. Just expand it. 832*5ffd83dbSDimitry Andric Prod = ExpandOpBinPowN(); 833*5ffd83dbSDimitry Andric } else if (I->second->isAllOnesValue()) { 834*5ffd83dbSDimitry Andric // Instead of doing a multiply by negative one, just do a negate. 835*5ffd83dbSDimitry Andric Prod = InsertNoopCastOfTo(Prod, Ty); 836*5ffd83dbSDimitry Andric Prod = InsertBinop(Instruction::Sub, Constant::getNullValue(Ty), Prod, 837*5ffd83dbSDimitry Andric SCEV::FlagAnyWrap, /*IsSafeToHoist*/ true); 838*5ffd83dbSDimitry Andric ++I; 839*5ffd83dbSDimitry Andric } else { 840*5ffd83dbSDimitry Andric // A simple mul. 841*5ffd83dbSDimitry Andric Value *W = ExpandOpBinPowN(); 842*5ffd83dbSDimitry Andric Prod = InsertNoopCastOfTo(Prod, Ty); 843*5ffd83dbSDimitry Andric // Canonicalize a constant to the RHS. 844*5ffd83dbSDimitry Andric if (isa<Constant>(Prod)) std::swap(Prod, W); 845*5ffd83dbSDimitry Andric const APInt *RHS; 846*5ffd83dbSDimitry Andric if (match(W, m_Power2(RHS))) { 847*5ffd83dbSDimitry Andric // Canonicalize Prod*(1<<C) to Prod<<C. 848*5ffd83dbSDimitry Andric assert(!Ty->isVectorTy() && "vector types are not SCEVable"); 849*5ffd83dbSDimitry Andric auto NWFlags = S->getNoWrapFlags(); 850*5ffd83dbSDimitry Andric // clear nsw flag if shl will produce poison value. 851*5ffd83dbSDimitry Andric if (RHS->logBase2() == RHS->getBitWidth() - 1) 852*5ffd83dbSDimitry Andric NWFlags = ScalarEvolution::clearFlags(NWFlags, SCEV::FlagNSW); 853*5ffd83dbSDimitry Andric Prod = InsertBinop(Instruction::Shl, Prod, 854*5ffd83dbSDimitry Andric ConstantInt::get(Ty, RHS->logBase2()), NWFlags, 855*5ffd83dbSDimitry Andric /*IsSafeToHoist*/ true); 856*5ffd83dbSDimitry Andric } else { 857*5ffd83dbSDimitry Andric Prod = InsertBinop(Instruction::Mul, Prod, W, S->getNoWrapFlags(), 858*5ffd83dbSDimitry Andric /*IsSafeToHoist*/ true); 859*5ffd83dbSDimitry Andric } 860*5ffd83dbSDimitry Andric } 861*5ffd83dbSDimitry Andric } 862*5ffd83dbSDimitry Andric 863*5ffd83dbSDimitry Andric return Prod; 864*5ffd83dbSDimitry Andric } 865*5ffd83dbSDimitry Andric 866*5ffd83dbSDimitry Andric Value *SCEVExpander::visitUDivExpr(const SCEVUDivExpr *S) { 867*5ffd83dbSDimitry Andric Type *Ty = SE.getEffectiveSCEVType(S->getType()); 868*5ffd83dbSDimitry Andric 869*5ffd83dbSDimitry Andric Value *LHS = expandCodeFor(S->getLHS(), Ty); 870*5ffd83dbSDimitry Andric if (const SCEVConstant *SC = dyn_cast<SCEVConstant>(S->getRHS())) { 871*5ffd83dbSDimitry Andric const APInt &RHS = SC->getAPInt(); 872*5ffd83dbSDimitry Andric if (RHS.isPowerOf2()) 873*5ffd83dbSDimitry Andric return InsertBinop(Instruction::LShr, LHS, 874*5ffd83dbSDimitry Andric ConstantInt::get(Ty, RHS.logBase2()), 875*5ffd83dbSDimitry Andric SCEV::FlagAnyWrap, /*IsSafeToHoist*/ true); 876*5ffd83dbSDimitry Andric } 877*5ffd83dbSDimitry Andric 878*5ffd83dbSDimitry Andric Value *RHS = expandCodeFor(S->getRHS(), Ty); 879*5ffd83dbSDimitry Andric return InsertBinop(Instruction::UDiv, LHS, RHS, SCEV::FlagAnyWrap, 880*5ffd83dbSDimitry Andric /*IsSafeToHoist*/ SE.isKnownNonZero(S->getRHS())); 881*5ffd83dbSDimitry Andric } 882*5ffd83dbSDimitry Andric 883*5ffd83dbSDimitry Andric /// Move parts of Base into Rest to leave Base with the minimal 884*5ffd83dbSDimitry Andric /// expression that provides a pointer operand suitable for a 885*5ffd83dbSDimitry Andric /// GEP expansion. 886*5ffd83dbSDimitry Andric static void ExposePointerBase(const SCEV *&Base, const SCEV *&Rest, 887*5ffd83dbSDimitry Andric ScalarEvolution &SE) { 888*5ffd83dbSDimitry Andric while (const SCEVAddRecExpr *A = dyn_cast<SCEVAddRecExpr>(Base)) { 889*5ffd83dbSDimitry Andric Base = A->getStart(); 890*5ffd83dbSDimitry Andric Rest = SE.getAddExpr(Rest, 891*5ffd83dbSDimitry Andric SE.getAddRecExpr(SE.getConstant(A->getType(), 0), 892*5ffd83dbSDimitry Andric A->getStepRecurrence(SE), 893*5ffd83dbSDimitry Andric A->getLoop(), 894*5ffd83dbSDimitry Andric A->getNoWrapFlags(SCEV::FlagNW))); 895*5ffd83dbSDimitry Andric } 896*5ffd83dbSDimitry Andric if (const SCEVAddExpr *A = dyn_cast<SCEVAddExpr>(Base)) { 897*5ffd83dbSDimitry Andric Base = A->getOperand(A->getNumOperands()-1); 898*5ffd83dbSDimitry Andric SmallVector<const SCEV *, 8> NewAddOps(A->op_begin(), A->op_end()); 899*5ffd83dbSDimitry Andric NewAddOps.back() = Rest; 900*5ffd83dbSDimitry Andric Rest = SE.getAddExpr(NewAddOps); 901*5ffd83dbSDimitry Andric ExposePointerBase(Base, Rest, SE); 902*5ffd83dbSDimitry Andric } 903*5ffd83dbSDimitry Andric } 904*5ffd83dbSDimitry Andric 905*5ffd83dbSDimitry Andric /// Determine if this is a well-behaved chain of instructions leading back to 906*5ffd83dbSDimitry Andric /// the PHI. If so, it may be reused by expanded expressions. 907*5ffd83dbSDimitry Andric bool SCEVExpander::isNormalAddRecExprPHI(PHINode *PN, Instruction *IncV, 908*5ffd83dbSDimitry Andric const Loop *L) { 909*5ffd83dbSDimitry Andric if (IncV->getNumOperands() == 0 || isa<PHINode>(IncV) || 910*5ffd83dbSDimitry Andric (isa<CastInst>(IncV) && !isa<BitCastInst>(IncV))) 911*5ffd83dbSDimitry Andric return false; 912*5ffd83dbSDimitry Andric // If any of the operands don't dominate the insert position, bail. 913*5ffd83dbSDimitry Andric // Addrec operands are always loop-invariant, so this can only happen 914*5ffd83dbSDimitry Andric // if there are instructions which haven't been hoisted. 915*5ffd83dbSDimitry Andric if (L == IVIncInsertLoop) { 916*5ffd83dbSDimitry Andric for (User::op_iterator OI = IncV->op_begin()+1, 917*5ffd83dbSDimitry Andric OE = IncV->op_end(); OI != OE; ++OI) 918*5ffd83dbSDimitry Andric if (Instruction *OInst = dyn_cast<Instruction>(OI)) 919*5ffd83dbSDimitry Andric if (!SE.DT.dominates(OInst, IVIncInsertPos)) 920*5ffd83dbSDimitry Andric return false; 921*5ffd83dbSDimitry Andric } 922*5ffd83dbSDimitry Andric // Advance to the next instruction. 923*5ffd83dbSDimitry Andric IncV = dyn_cast<Instruction>(IncV->getOperand(0)); 924*5ffd83dbSDimitry Andric if (!IncV) 925*5ffd83dbSDimitry Andric return false; 926*5ffd83dbSDimitry Andric 927*5ffd83dbSDimitry Andric if (IncV->mayHaveSideEffects()) 928*5ffd83dbSDimitry Andric return false; 929*5ffd83dbSDimitry Andric 930*5ffd83dbSDimitry Andric if (IncV == PN) 931*5ffd83dbSDimitry Andric return true; 932*5ffd83dbSDimitry Andric 933*5ffd83dbSDimitry Andric return isNormalAddRecExprPHI(PN, IncV, L); 934*5ffd83dbSDimitry Andric } 935*5ffd83dbSDimitry Andric 936*5ffd83dbSDimitry Andric /// getIVIncOperand returns an induction variable increment's induction 937*5ffd83dbSDimitry Andric /// variable operand. 938*5ffd83dbSDimitry Andric /// 939*5ffd83dbSDimitry Andric /// If allowScale is set, any type of GEP is allowed as long as the nonIV 940*5ffd83dbSDimitry Andric /// operands dominate InsertPos. 941*5ffd83dbSDimitry Andric /// 942*5ffd83dbSDimitry Andric /// If allowScale is not set, ensure that a GEP increment conforms to one of the 943*5ffd83dbSDimitry Andric /// simple patterns generated by getAddRecExprPHILiterally and 944*5ffd83dbSDimitry Andric /// expandAddtoGEP. If the pattern isn't recognized, return NULL. 945*5ffd83dbSDimitry Andric Instruction *SCEVExpander::getIVIncOperand(Instruction *IncV, 946*5ffd83dbSDimitry Andric Instruction *InsertPos, 947*5ffd83dbSDimitry Andric bool allowScale) { 948*5ffd83dbSDimitry Andric if (IncV == InsertPos) 949*5ffd83dbSDimitry Andric return nullptr; 950*5ffd83dbSDimitry Andric 951*5ffd83dbSDimitry Andric switch (IncV->getOpcode()) { 952*5ffd83dbSDimitry Andric default: 953*5ffd83dbSDimitry Andric return nullptr; 954*5ffd83dbSDimitry Andric // Check for a simple Add/Sub or GEP of a loop invariant step. 955*5ffd83dbSDimitry Andric case Instruction::Add: 956*5ffd83dbSDimitry Andric case Instruction::Sub: { 957*5ffd83dbSDimitry Andric Instruction *OInst = dyn_cast<Instruction>(IncV->getOperand(1)); 958*5ffd83dbSDimitry Andric if (!OInst || SE.DT.dominates(OInst, InsertPos)) 959*5ffd83dbSDimitry Andric return dyn_cast<Instruction>(IncV->getOperand(0)); 960*5ffd83dbSDimitry Andric return nullptr; 961*5ffd83dbSDimitry Andric } 962*5ffd83dbSDimitry Andric case Instruction::BitCast: 963*5ffd83dbSDimitry Andric return dyn_cast<Instruction>(IncV->getOperand(0)); 964*5ffd83dbSDimitry Andric case Instruction::GetElementPtr: 965*5ffd83dbSDimitry Andric for (auto I = IncV->op_begin() + 1, E = IncV->op_end(); I != E; ++I) { 966*5ffd83dbSDimitry Andric if (isa<Constant>(*I)) 967*5ffd83dbSDimitry Andric continue; 968*5ffd83dbSDimitry Andric if (Instruction *OInst = dyn_cast<Instruction>(*I)) { 969*5ffd83dbSDimitry Andric if (!SE.DT.dominates(OInst, InsertPos)) 970*5ffd83dbSDimitry Andric return nullptr; 971*5ffd83dbSDimitry Andric } 972*5ffd83dbSDimitry Andric if (allowScale) { 973*5ffd83dbSDimitry Andric // allow any kind of GEP as long as it can be hoisted. 974*5ffd83dbSDimitry Andric continue; 975*5ffd83dbSDimitry Andric } 976*5ffd83dbSDimitry Andric // This must be a pointer addition of constants (pretty), which is already 977*5ffd83dbSDimitry Andric // handled, or some number of address-size elements (ugly). Ugly geps 978*5ffd83dbSDimitry Andric // have 2 operands. i1* is used by the expander to represent an 979*5ffd83dbSDimitry Andric // address-size element. 980*5ffd83dbSDimitry Andric if (IncV->getNumOperands() != 2) 981*5ffd83dbSDimitry Andric return nullptr; 982*5ffd83dbSDimitry Andric unsigned AS = cast<PointerType>(IncV->getType())->getAddressSpace(); 983*5ffd83dbSDimitry Andric if (IncV->getType() != Type::getInt1PtrTy(SE.getContext(), AS) 984*5ffd83dbSDimitry Andric && IncV->getType() != Type::getInt8PtrTy(SE.getContext(), AS)) 985*5ffd83dbSDimitry Andric return nullptr; 986*5ffd83dbSDimitry Andric break; 987*5ffd83dbSDimitry Andric } 988*5ffd83dbSDimitry Andric return dyn_cast<Instruction>(IncV->getOperand(0)); 989*5ffd83dbSDimitry Andric } 990*5ffd83dbSDimitry Andric } 991*5ffd83dbSDimitry Andric 992*5ffd83dbSDimitry Andric /// If the insert point of the current builder or any of the builders on the 993*5ffd83dbSDimitry Andric /// stack of saved builders has 'I' as its insert point, update it to point to 994*5ffd83dbSDimitry Andric /// the instruction after 'I'. This is intended to be used when the instruction 995*5ffd83dbSDimitry Andric /// 'I' is being moved. If this fixup is not done and 'I' is moved to a 996*5ffd83dbSDimitry Andric /// different block, the inconsistent insert point (with a mismatched 997*5ffd83dbSDimitry Andric /// Instruction and Block) can lead to an instruction being inserted in a block 998*5ffd83dbSDimitry Andric /// other than its parent. 999*5ffd83dbSDimitry Andric void SCEVExpander::fixupInsertPoints(Instruction *I) { 1000*5ffd83dbSDimitry Andric BasicBlock::iterator It(*I); 1001*5ffd83dbSDimitry Andric BasicBlock::iterator NewInsertPt = std::next(It); 1002*5ffd83dbSDimitry Andric if (Builder.GetInsertPoint() == It) 1003*5ffd83dbSDimitry Andric Builder.SetInsertPoint(&*NewInsertPt); 1004*5ffd83dbSDimitry Andric for (auto *InsertPtGuard : InsertPointGuards) 1005*5ffd83dbSDimitry Andric if (InsertPtGuard->GetInsertPoint() == It) 1006*5ffd83dbSDimitry Andric InsertPtGuard->SetInsertPoint(NewInsertPt); 1007*5ffd83dbSDimitry Andric } 1008*5ffd83dbSDimitry Andric 1009*5ffd83dbSDimitry Andric /// hoistStep - Attempt to hoist a simple IV increment above InsertPos to make 1010*5ffd83dbSDimitry Andric /// it available to other uses in this loop. Recursively hoist any operands, 1011*5ffd83dbSDimitry Andric /// until we reach a value that dominates InsertPos. 1012*5ffd83dbSDimitry Andric bool SCEVExpander::hoistIVInc(Instruction *IncV, Instruction *InsertPos) { 1013*5ffd83dbSDimitry Andric if (SE.DT.dominates(IncV, InsertPos)) 1014*5ffd83dbSDimitry Andric return true; 1015*5ffd83dbSDimitry Andric 1016*5ffd83dbSDimitry Andric // InsertPos must itself dominate IncV so that IncV's new position satisfies 1017*5ffd83dbSDimitry Andric // its existing users. 1018*5ffd83dbSDimitry Andric if (isa<PHINode>(InsertPos) || 1019*5ffd83dbSDimitry Andric !SE.DT.dominates(InsertPos->getParent(), IncV->getParent())) 1020*5ffd83dbSDimitry Andric return false; 1021*5ffd83dbSDimitry Andric 1022*5ffd83dbSDimitry Andric if (!SE.LI.movementPreservesLCSSAForm(IncV, InsertPos)) 1023*5ffd83dbSDimitry Andric return false; 1024*5ffd83dbSDimitry Andric 1025*5ffd83dbSDimitry Andric // Check that the chain of IV operands leading back to Phi can be hoisted. 1026*5ffd83dbSDimitry Andric SmallVector<Instruction*, 4> IVIncs; 1027*5ffd83dbSDimitry Andric for(;;) { 1028*5ffd83dbSDimitry Andric Instruction *Oper = getIVIncOperand(IncV, InsertPos, /*allowScale*/true); 1029*5ffd83dbSDimitry Andric if (!Oper) 1030*5ffd83dbSDimitry Andric return false; 1031*5ffd83dbSDimitry Andric // IncV is safe to hoist. 1032*5ffd83dbSDimitry Andric IVIncs.push_back(IncV); 1033*5ffd83dbSDimitry Andric IncV = Oper; 1034*5ffd83dbSDimitry Andric if (SE.DT.dominates(IncV, InsertPos)) 1035*5ffd83dbSDimitry Andric break; 1036*5ffd83dbSDimitry Andric } 1037*5ffd83dbSDimitry Andric for (auto I = IVIncs.rbegin(), E = IVIncs.rend(); I != E; ++I) { 1038*5ffd83dbSDimitry Andric fixupInsertPoints(*I); 1039*5ffd83dbSDimitry Andric (*I)->moveBefore(InsertPos); 1040*5ffd83dbSDimitry Andric } 1041*5ffd83dbSDimitry Andric return true; 1042*5ffd83dbSDimitry Andric } 1043*5ffd83dbSDimitry Andric 1044*5ffd83dbSDimitry Andric /// Determine if this cyclic phi is in a form that would have been generated by 1045*5ffd83dbSDimitry Andric /// LSR. We don't care if the phi was actually expanded in this pass, as long 1046*5ffd83dbSDimitry Andric /// as it is in a low-cost form, for example, no implied multiplication. This 1047*5ffd83dbSDimitry Andric /// should match any patterns generated by getAddRecExprPHILiterally and 1048*5ffd83dbSDimitry Andric /// expandAddtoGEP. 1049*5ffd83dbSDimitry Andric bool SCEVExpander::isExpandedAddRecExprPHI(PHINode *PN, Instruction *IncV, 1050*5ffd83dbSDimitry Andric const Loop *L) { 1051*5ffd83dbSDimitry Andric for(Instruction *IVOper = IncV; 1052*5ffd83dbSDimitry Andric (IVOper = getIVIncOperand(IVOper, L->getLoopPreheader()->getTerminator(), 1053*5ffd83dbSDimitry Andric /*allowScale=*/false));) { 1054*5ffd83dbSDimitry Andric if (IVOper == PN) 1055*5ffd83dbSDimitry Andric return true; 1056*5ffd83dbSDimitry Andric } 1057*5ffd83dbSDimitry Andric return false; 1058*5ffd83dbSDimitry Andric } 1059*5ffd83dbSDimitry Andric 1060*5ffd83dbSDimitry Andric /// expandIVInc - Expand an IV increment at Builder's current InsertPos. 1061*5ffd83dbSDimitry Andric /// Typically this is the LatchBlock terminator or IVIncInsertPos, but we may 1062*5ffd83dbSDimitry Andric /// need to materialize IV increments elsewhere to handle difficult situations. 1063*5ffd83dbSDimitry Andric Value *SCEVExpander::expandIVInc(PHINode *PN, Value *StepV, const Loop *L, 1064*5ffd83dbSDimitry Andric Type *ExpandTy, Type *IntTy, 1065*5ffd83dbSDimitry Andric bool useSubtract) { 1066*5ffd83dbSDimitry Andric Value *IncV; 1067*5ffd83dbSDimitry Andric // If the PHI is a pointer, use a GEP, otherwise use an add or sub. 1068*5ffd83dbSDimitry Andric if (ExpandTy->isPointerTy()) { 1069*5ffd83dbSDimitry Andric PointerType *GEPPtrTy = cast<PointerType>(ExpandTy); 1070*5ffd83dbSDimitry Andric // If the step isn't constant, don't use an implicitly scaled GEP, because 1071*5ffd83dbSDimitry Andric // that would require a multiply inside the loop. 1072*5ffd83dbSDimitry Andric if (!isa<ConstantInt>(StepV)) 1073*5ffd83dbSDimitry Andric GEPPtrTy = PointerType::get(Type::getInt1Ty(SE.getContext()), 1074*5ffd83dbSDimitry Andric GEPPtrTy->getAddressSpace()); 1075*5ffd83dbSDimitry Andric IncV = expandAddToGEP(SE.getSCEV(StepV), GEPPtrTy, IntTy, PN); 1076*5ffd83dbSDimitry Andric if (IncV->getType() != PN->getType()) { 1077*5ffd83dbSDimitry Andric IncV = Builder.CreateBitCast(IncV, PN->getType()); 1078*5ffd83dbSDimitry Andric rememberInstruction(IncV); 1079*5ffd83dbSDimitry Andric } 1080*5ffd83dbSDimitry Andric } else { 1081*5ffd83dbSDimitry Andric IncV = useSubtract ? 1082*5ffd83dbSDimitry Andric Builder.CreateSub(PN, StepV, Twine(IVName) + ".iv.next") : 1083*5ffd83dbSDimitry Andric Builder.CreateAdd(PN, StepV, Twine(IVName) + ".iv.next"); 1084*5ffd83dbSDimitry Andric rememberInstruction(IncV); 1085*5ffd83dbSDimitry Andric } 1086*5ffd83dbSDimitry Andric return IncV; 1087*5ffd83dbSDimitry Andric } 1088*5ffd83dbSDimitry Andric 1089*5ffd83dbSDimitry Andric /// Hoist the addrec instruction chain rooted in the loop phi above the 1090*5ffd83dbSDimitry Andric /// position. This routine assumes that this is possible (has been checked). 1091*5ffd83dbSDimitry Andric void SCEVExpander::hoistBeforePos(DominatorTree *DT, Instruction *InstToHoist, 1092*5ffd83dbSDimitry Andric Instruction *Pos, PHINode *LoopPhi) { 1093*5ffd83dbSDimitry Andric do { 1094*5ffd83dbSDimitry Andric if (DT->dominates(InstToHoist, Pos)) 1095*5ffd83dbSDimitry Andric break; 1096*5ffd83dbSDimitry Andric // Make sure the increment is where we want it. But don't move it 1097*5ffd83dbSDimitry Andric // down past a potential existing post-inc user. 1098*5ffd83dbSDimitry Andric fixupInsertPoints(InstToHoist); 1099*5ffd83dbSDimitry Andric InstToHoist->moveBefore(Pos); 1100*5ffd83dbSDimitry Andric Pos = InstToHoist; 1101*5ffd83dbSDimitry Andric InstToHoist = cast<Instruction>(InstToHoist->getOperand(0)); 1102*5ffd83dbSDimitry Andric } while (InstToHoist != LoopPhi); 1103*5ffd83dbSDimitry Andric } 1104*5ffd83dbSDimitry Andric 1105*5ffd83dbSDimitry Andric /// Check whether we can cheaply express the requested SCEV in terms of 1106*5ffd83dbSDimitry Andric /// the available PHI SCEV by truncation and/or inversion of the step. 1107*5ffd83dbSDimitry Andric static bool canBeCheaplyTransformed(ScalarEvolution &SE, 1108*5ffd83dbSDimitry Andric const SCEVAddRecExpr *Phi, 1109*5ffd83dbSDimitry Andric const SCEVAddRecExpr *Requested, 1110*5ffd83dbSDimitry Andric bool &InvertStep) { 1111*5ffd83dbSDimitry Andric Type *PhiTy = SE.getEffectiveSCEVType(Phi->getType()); 1112*5ffd83dbSDimitry Andric Type *RequestedTy = SE.getEffectiveSCEVType(Requested->getType()); 1113*5ffd83dbSDimitry Andric 1114*5ffd83dbSDimitry Andric if (RequestedTy->getIntegerBitWidth() > PhiTy->getIntegerBitWidth()) 1115*5ffd83dbSDimitry Andric return false; 1116*5ffd83dbSDimitry Andric 1117*5ffd83dbSDimitry Andric // Try truncate it if necessary. 1118*5ffd83dbSDimitry Andric Phi = dyn_cast<SCEVAddRecExpr>(SE.getTruncateOrNoop(Phi, RequestedTy)); 1119*5ffd83dbSDimitry Andric if (!Phi) 1120*5ffd83dbSDimitry Andric return false; 1121*5ffd83dbSDimitry Andric 1122*5ffd83dbSDimitry Andric // Check whether truncation will help. 1123*5ffd83dbSDimitry Andric if (Phi == Requested) { 1124*5ffd83dbSDimitry Andric InvertStep = false; 1125*5ffd83dbSDimitry Andric return true; 1126*5ffd83dbSDimitry Andric } 1127*5ffd83dbSDimitry Andric 1128*5ffd83dbSDimitry Andric // Check whether inverting will help: {R,+,-1} == R - {0,+,1}. 1129*5ffd83dbSDimitry Andric if (SE.getAddExpr(Requested->getStart(), 1130*5ffd83dbSDimitry Andric SE.getNegativeSCEV(Requested)) == Phi) { 1131*5ffd83dbSDimitry Andric InvertStep = true; 1132*5ffd83dbSDimitry Andric return true; 1133*5ffd83dbSDimitry Andric } 1134*5ffd83dbSDimitry Andric 1135*5ffd83dbSDimitry Andric return false; 1136*5ffd83dbSDimitry Andric } 1137*5ffd83dbSDimitry Andric 1138*5ffd83dbSDimitry Andric static bool IsIncrementNSW(ScalarEvolution &SE, const SCEVAddRecExpr *AR) { 1139*5ffd83dbSDimitry Andric if (!isa<IntegerType>(AR->getType())) 1140*5ffd83dbSDimitry Andric return false; 1141*5ffd83dbSDimitry Andric 1142*5ffd83dbSDimitry Andric unsigned BitWidth = cast<IntegerType>(AR->getType())->getBitWidth(); 1143*5ffd83dbSDimitry Andric Type *WideTy = IntegerType::get(AR->getType()->getContext(), BitWidth * 2); 1144*5ffd83dbSDimitry Andric const SCEV *Step = AR->getStepRecurrence(SE); 1145*5ffd83dbSDimitry Andric const SCEV *OpAfterExtend = SE.getAddExpr(SE.getSignExtendExpr(Step, WideTy), 1146*5ffd83dbSDimitry Andric SE.getSignExtendExpr(AR, WideTy)); 1147*5ffd83dbSDimitry Andric const SCEV *ExtendAfterOp = 1148*5ffd83dbSDimitry Andric SE.getSignExtendExpr(SE.getAddExpr(AR, Step), WideTy); 1149*5ffd83dbSDimitry Andric return ExtendAfterOp == OpAfterExtend; 1150*5ffd83dbSDimitry Andric } 1151*5ffd83dbSDimitry Andric 1152*5ffd83dbSDimitry Andric static bool IsIncrementNUW(ScalarEvolution &SE, const SCEVAddRecExpr *AR) { 1153*5ffd83dbSDimitry Andric if (!isa<IntegerType>(AR->getType())) 1154*5ffd83dbSDimitry Andric return false; 1155*5ffd83dbSDimitry Andric 1156*5ffd83dbSDimitry Andric unsigned BitWidth = cast<IntegerType>(AR->getType())->getBitWidth(); 1157*5ffd83dbSDimitry Andric Type *WideTy = IntegerType::get(AR->getType()->getContext(), BitWidth * 2); 1158*5ffd83dbSDimitry Andric const SCEV *Step = AR->getStepRecurrence(SE); 1159*5ffd83dbSDimitry Andric const SCEV *OpAfterExtend = SE.getAddExpr(SE.getZeroExtendExpr(Step, WideTy), 1160*5ffd83dbSDimitry Andric SE.getZeroExtendExpr(AR, WideTy)); 1161*5ffd83dbSDimitry Andric const SCEV *ExtendAfterOp = 1162*5ffd83dbSDimitry Andric SE.getZeroExtendExpr(SE.getAddExpr(AR, Step), WideTy); 1163*5ffd83dbSDimitry Andric return ExtendAfterOp == OpAfterExtend; 1164*5ffd83dbSDimitry Andric } 1165*5ffd83dbSDimitry Andric 1166*5ffd83dbSDimitry Andric /// getAddRecExprPHILiterally - Helper for expandAddRecExprLiterally. Expand 1167*5ffd83dbSDimitry Andric /// the base addrec, which is the addrec without any non-loop-dominating 1168*5ffd83dbSDimitry Andric /// values, and return the PHI. 1169*5ffd83dbSDimitry Andric PHINode * 1170*5ffd83dbSDimitry Andric SCEVExpander::getAddRecExprPHILiterally(const SCEVAddRecExpr *Normalized, 1171*5ffd83dbSDimitry Andric const Loop *L, 1172*5ffd83dbSDimitry Andric Type *ExpandTy, 1173*5ffd83dbSDimitry Andric Type *IntTy, 1174*5ffd83dbSDimitry Andric Type *&TruncTy, 1175*5ffd83dbSDimitry Andric bool &InvertStep) { 1176*5ffd83dbSDimitry Andric assert((!IVIncInsertLoop||IVIncInsertPos) && "Uninitialized insert position"); 1177*5ffd83dbSDimitry Andric 1178*5ffd83dbSDimitry Andric // Reuse a previously-inserted PHI, if present. 1179*5ffd83dbSDimitry Andric BasicBlock *LatchBlock = L->getLoopLatch(); 1180*5ffd83dbSDimitry Andric if (LatchBlock) { 1181*5ffd83dbSDimitry Andric PHINode *AddRecPhiMatch = nullptr; 1182*5ffd83dbSDimitry Andric Instruction *IncV = nullptr; 1183*5ffd83dbSDimitry Andric TruncTy = nullptr; 1184*5ffd83dbSDimitry Andric InvertStep = false; 1185*5ffd83dbSDimitry Andric 1186*5ffd83dbSDimitry Andric // Only try partially matching scevs that need truncation and/or 1187*5ffd83dbSDimitry Andric // step-inversion if we know this loop is outside the current loop. 1188*5ffd83dbSDimitry Andric bool TryNonMatchingSCEV = 1189*5ffd83dbSDimitry Andric IVIncInsertLoop && 1190*5ffd83dbSDimitry Andric SE.DT.properlyDominates(LatchBlock, IVIncInsertLoop->getHeader()); 1191*5ffd83dbSDimitry Andric 1192*5ffd83dbSDimitry Andric for (PHINode &PN : L->getHeader()->phis()) { 1193*5ffd83dbSDimitry Andric if (!SE.isSCEVable(PN.getType())) 1194*5ffd83dbSDimitry Andric continue; 1195*5ffd83dbSDimitry Andric 1196*5ffd83dbSDimitry Andric const SCEVAddRecExpr *PhiSCEV = dyn_cast<SCEVAddRecExpr>(SE.getSCEV(&PN)); 1197*5ffd83dbSDimitry Andric if (!PhiSCEV) 1198*5ffd83dbSDimitry Andric continue; 1199*5ffd83dbSDimitry Andric 1200*5ffd83dbSDimitry Andric bool IsMatchingSCEV = PhiSCEV == Normalized; 1201*5ffd83dbSDimitry Andric // We only handle truncation and inversion of phi recurrences for the 1202*5ffd83dbSDimitry Andric // expanded expression if the expanded expression's loop dominates the 1203*5ffd83dbSDimitry Andric // loop we insert to. Check now, so we can bail out early. 1204*5ffd83dbSDimitry Andric if (!IsMatchingSCEV && !TryNonMatchingSCEV) 1205*5ffd83dbSDimitry Andric continue; 1206*5ffd83dbSDimitry Andric 1207*5ffd83dbSDimitry Andric // TODO: this possibly can be reworked to avoid this cast at all. 1208*5ffd83dbSDimitry Andric Instruction *TempIncV = 1209*5ffd83dbSDimitry Andric dyn_cast<Instruction>(PN.getIncomingValueForBlock(LatchBlock)); 1210*5ffd83dbSDimitry Andric if (!TempIncV) 1211*5ffd83dbSDimitry Andric continue; 1212*5ffd83dbSDimitry Andric 1213*5ffd83dbSDimitry Andric // Check whether we can reuse this PHI node. 1214*5ffd83dbSDimitry Andric if (LSRMode) { 1215*5ffd83dbSDimitry Andric if (!isExpandedAddRecExprPHI(&PN, TempIncV, L)) 1216*5ffd83dbSDimitry Andric continue; 1217*5ffd83dbSDimitry Andric if (L == IVIncInsertLoop && !hoistIVInc(TempIncV, IVIncInsertPos)) 1218*5ffd83dbSDimitry Andric continue; 1219*5ffd83dbSDimitry Andric } else { 1220*5ffd83dbSDimitry Andric if (!isNormalAddRecExprPHI(&PN, TempIncV, L)) 1221*5ffd83dbSDimitry Andric continue; 1222*5ffd83dbSDimitry Andric } 1223*5ffd83dbSDimitry Andric 1224*5ffd83dbSDimitry Andric // Stop if we have found an exact match SCEV. 1225*5ffd83dbSDimitry Andric if (IsMatchingSCEV) { 1226*5ffd83dbSDimitry Andric IncV = TempIncV; 1227*5ffd83dbSDimitry Andric TruncTy = nullptr; 1228*5ffd83dbSDimitry Andric InvertStep = false; 1229*5ffd83dbSDimitry Andric AddRecPhiMatch = &PN; 1230*5ffd83dbSDimitry Andric break; 1231*5ffd83dbSDimitry Andric } 1232*5ffd83dbSDimitry Andric 1233*5ffd83dbSDimitry Andric // Try whether the phi can be translated into the requested form 1234*5ffd83dbSDimitry Andric // (truncated and/or offset by a constant). 1235*5ffd83dbSDimitry Andric if ((!TruncTy || InvertStep) && 1236*5ffd83dbSDimitry Andric canBeCheaplyTransformed(SE, PhiSCEV, Normalized, InvertStep)) { 1237*5ffd83dbSDimitry Andric // Record the phi node. But don't stop we might find an exact match 1238*5ffd83dbSDimitry Andric // later. 1239*5ffd83dbSDimitry Andric AddRecPhiMatch = &PN; 1240*5ffd83dbSDimitry Andric IncV = TempIncV; 1241*5ffd83dbSDimitry Andric TruncTy = SE.getEffectiveSCEVType(Normalized->getType()); 1242*5ffd83dbSDimitry Andric } 1243*5ffd83dbSDimitry Andric } 1244*5ffd83dbSDimitry Andric 1245*5ffd83dbSDimitry Andric if (AddRecPhiMatch) { 1246*5ffd83dbSDimitry Andric // Potentially, move the increment. We have made sure in 1247*5ffd83dbSDimitry Andric // isExpandedAddRecExprPHI or hoistIVInc that this is possible. 1248*5ffd83dbSDimitry Andric if (L == IVIncInsertLoop) 1249*5ffd83dbSDimitry Andric hoistBeforePos(&SE.DT, IncV, IVIncInsertPos, AddRecPhiMatch); 1250*5ffd83dbSDimitry Andric 1251*5ffd83dbSDimitry Andric // Ok, the add recurrence looks usable. 1252*5ffd83dbSDimitry Andric // Remember this PHI, even in post-inc mode. 1253*5ffd83dbSDimitry Andric InsertedValues.insert(AddRecPhiMatch); 1254*5ffd83dbSDimitry Andric // Remember the increment. 1255*5ffd83dbSDimitry Andric rememberInstruction(IncV); 1256*5ffd83dbSDimitry Andric return AddRecPhiMatch; 1257*5ffd83dbSDimitry Andric } 1258*5ffd83dbSDimitry Andric } 1259*5ffd83dbSDimitry Andric 1260*5ffd83dbSDimitry Andric // Save the original insertion point so we can restore it when we're done. 1261*5ffd83dbSDimitry Andric SCEVInsertPointGuard Guard(Builder, this); 1262*5ffd83dbSDimitry Andric 1263*5ffd83dbSDimitry Andric // Another AddRec may need to be recursively expanded below. For example, if 1264*5ffd83dbSDimitry Andric // this AddRec is quadratic, the StepV may itself be an AddRec in this 1265*5ffd83dbSDimitry Andric // loop. Remove this loop from the PostIncLoops set before expanding such 1266*5ffd83dbSDimitry Andric // AddRecs. Otherwise, we cannot find a valid position for the step 1267*5ffd83dbSDimitry Andric // (i.e. StepV can never dominate its loop header). Ideally, we could do 1268*5ffd83dbSDimitry Andric // SavedIncLoops.swap(PostIncLoops), but we generally have a single element, 1269*5ffd83dbSDimitry Andric // so it's not worth implementing SmallPtrSet::swap. 1270*5ffd83dbSDimitry Andric PostIncLoopSet SavedPostIncLoops = PostIncLoops; 1271*5ffd83dbSDimitry Andric PostIncLoops.clear(); 1272*5ffd83dbSDimitry Andric 1273*5ffd83dbSDimitry Andric // Expand code for the start value into the loop preheader. 1274*5ffd83dbSDimitry Andric assert(L->getLoopPreheader() && 1275*5ffd83dbSDimitry Andric "Can't expand add recurrences without a loop preheader!"); 1276*5ffd83dbSDimitry Andric Value *StartV = expandCodeFor(Normalized->getStart(), ExpandTy, 1277*5ffd83dbSDimitry Andric L->getLoopPreheader()->getTerminator()); 1278*5ffd83dbSDimitry Andric 1279*5ffd83dbSDimitry Andric // StartV must have been be inserted into L's preheader to dominate the new 1280*5ffd83dbSDimitry Andric // phi. 1281*5ffd83dbSDimitry Andric assert(!isa<Instruction>(StartV) || 1282*5ffd83dbSDimitry Andric SE.DT.properlyDominates(cast<Instruction>(StartV)->getParent(), 1283*5ffd83dbSDimitry Andric L->getHeader())); 1284*5ffd83dbSDimitry Andric 1285*5ffd83dbSDimitry Andric // Expand code for the step value. Do this before creating the PHI so that PHI 1286*5ffd83dbSDimitry Andric // reuse code doesn't see an incomplete PHI. 1287*5ffd83dbSDimitry Andric const SCEV *Step = Normalized->getStepRecurrence(SE); 1288*5ffd83dbSDimitry Andric // If the stride is negative, insert a sub instead of an add for the increment 1289*5ffd83dbSDimitry Andric // (unless it's a constant, because subtracts of constants are canonicalized 1290*5ffd83dbSDimitry Andric // to adds). 1291*5ffd83dbSDimitry Andric bool useSubtract = !ExpandTy->isPointerTy() && Step->isNonConstantNegative(); 1292*5ffd83dbSDimitry Andric if (useSubtract) 1293*5ffd83dbSDimitry Andric Step = SE.getNegativeSCEV(Step); 1294*5ffd83dbSDimitry Andric // Expand the step somewhere that dominates the loop header. 1295*5ffd83dbSDimitry Andric Value *StepV = expandCodeFor(Step, IntTy, &L->getHeader()->front()); 1296*5ffd83dbSDimitry Andric 1297*5ffd83dbSDimitry Andric // The no-wrap behavior proved by IsIncrement(NUW|NSW) is only applicable if 1298*5ffd83dbSDimitry Andric // we actually do emit an addition. It does not apply if we emit a 1299*5ffd83dbSDimitry Andric // subtraction. 1300*5ffd83dbSDimitry Andric bool IncrementIsNUW = !useSubtract && IsIncrementNUW(SE, Normalized); 1301*5ffd83dbSDimitry Andric bool IncrementIsNSW = !useSubtract && IsIncrementNSW(SE, Normalized); 1302*5ffd83dbSDimitry Andric 1303*5ffd83dbSDimitry Andric // Create the PHI. 1304*5ffd83dbSDimitry Andric BasicBlock *Header = L->getHeader(); 1305*5ffd83dbSDimitry Andric Builder.SetInsertPoint(Header, Header->begin()); 1306*5ffd83dbSDimitry Andric pred_iterator HPB = pred_begin(Header), HPE = pred_end(Header); 1307*5ffd83dbSDimitry Andric PHINode *PN = Builder.CreatePHI(ExpandTy, std::distance(HPB, HPE), 1308*5ffd83dbSDimitry Andric Twine(IVName) + ".iv"); 1309*5ffd83dbSDimitry Andric rememberInstruction(PN); 1310*5ffd83dbSDimitry Andric 1311*5ffd83dbSDimitry Andric // Create the step instructions and populate the PHI. 1312*5ffd83dbSDimitry Andric for (pred_iterator HPI = HPB; HPI != HPE; ++HPI) { 1313*5ffd83dbSDimitry Andric BasicBlock *Pred = *HPI; 1314*5ffd83dbSDimitry Andric 1315*5ffd83dbSDimitry Andric // Add a start value. 1316*5ffd83dbSDimitry Andric if (!L->contains(Pred)) { 1317*5ffd83dbSDimitry Andric PN->addIncoming(StartV, Pred); 1318*5ffd83dbSDimitry Andric continue; 1319*5ffd83dbSDimitry Andric } 1320*5ffd83dbSDimitry Andric 1321*5ffd83dbSDimitry Andric // Create a step value and add it to the PHI. 1322*5ffd83dbSDimitry Andric // If IVIncInsertLoop is non-null and equal to the addrec's loop, insert the 1323*5ffd83dbSDimitry Andric // instructions at IVIncInsertPos. 1324*5ffd83dbSDimitry Andric Instruction *InsertPos = L == IVIncInsertLoop ? 1325*5ffd83dbSDimitry Andric IVIncInsertPos : Pred->getTerminator(); 1326*5ffd83dbSDimitry Andric Builder.SetInsertPoint(InsertPos); 1327*5ffd83dbSDimitry Andric Value *IncV = expandIVInc(PN, StepV, L, ExpandTy, IntTy, useSubtract); 1328*5ffd83dbSDimitry Andric 1329*5ffd83dbSDimitry Andric if (isa<OverflowingBinaryOperator>(IncV)) { 1330*5ffd83dbSDimitry Andric if (IncrementIsNUW) 1331*5ffd83dbSDimitry Andric cast<BinaryOperator>(IncV)->setHasNoUnsignedWrap(); 1332*5ffd83dbSDimitry Andric if (IncrementIsNSW) 1333*5ffd83dbSDimitry Andric cast<BinaryOperator>(IncV)->setHasNoSignedWrap(); 1334*5ffd83dbSDimitry Andric } 1335*5ffd83dbSDimitry Andric PN->addIncoming(IncV, Pred); 1336*5ffd83dbSDimitry Andric } 1337*5ffd83dbSDimitry Andric 1338*5ffd83dbSDimitry Andric // After expanding subexpressions, restore the PostIncLoops set so the caller 1339*5ffd83dbSDimitry Andric // can ensure that IVIncrement dominates the current uses. 1340*5ffd83dbSDimitry Andric PostIncLoops = SavedPostIncLoops; 1341*5ffd83dbSDimitry Andric 1342*5ffd83dbSDimitry Andric // Remember this PHI, even in post-inc mode. 1343*5ffd83dbSDimitry Andric InsertedValues.insert(PN); 1344*5ffd83dbSDimitry Andric 1345*5ffd83dbSDimitry Andric return PN; 1346*5ffd83dbSDimitry Andric } 1347*5ffd83dbSDimitry Andric 1348*5ffd83dbSDimitry Andric Value *SCEVExpander::expandAddRecExprLiterally(const SCEVAddRecExpr *S) { 1349*5ffd83dbSDimitry Andric Type *STy = S->getType(); 1350*5ffd83dbSDimitry Andric Type *IntTy = SE.getEffectiveSCEVType(STy); 1351*5ffd83dbSDimitry Andric const Loop *L = S->getLoop(); 1352*5ffd83dbSDimitry Andric 1353*5ffd83dbSDimitry Andric // Determine a normalized form of this expression, which is the expression 1354*5ffd83dbSDimitry Andric // before any post-inc adjustment is made. 1355*5ffd83dbSDimitry Andric const SCEVAddRecExpr *Normalized = S; 1356*5ffd83dbSDimitry Andric if (PostIncLoops.count(L)) { 1357*5ffd83dbSDimitry Andric PostIncLoopSet Loops; 1358*5ffd83dbSDimitry Andric Loops.insert(L); 1359*5ffd83dbSDimitry Andric Normalized = cast<SCEVAddRecExpr>(normalizeForPostIncUse(S, Loops, SE)); 1360*5ffd83dbSDimitry Andric } 1361*5ffd83dbSDimitry Andric 1362*5ffd83dbSDimitry Andric // Strip off any non-loop-dominating component from the addrec start. 1363*5ffd83dbSDimitry Andric const SCEV *Start = Normalized->getStart(); 1364*5ffd83dbSDimitry Andric const SCEV *PostLoopOffset = nullptr; 1365*5ffd83dbSDimitry Andric if (!SE.properlyDominates(Start, L->getHeader())) { 1366*5ffd83dbSDimitry Andric PostLoopOffset = Start; 1367*5ffd83dbSDimitry Andric Start = SE.getConstant(Normalized->getType(), 0); 1368*5ffd83dbSDimitry Andric Normalized = cast<SCEVAddRecExpr>( 1369*5ffd83dbSDimitry Andric SE.getAddRecExpr(Start, Normalized->getStepRecurrence(SE), 1370*5ffd83dbSDimitry Andric Normalized->getLoop(), 1371*5ffd83dbSDimitry Andric Normalized->getNoWrapFlags(SCEV::FlagNW))); 1372*5ffd83dbSDimitry Andric } 1373*5ffd83dbSDimitry Andric 1374*5ffd83dbSDimitry Andric // Strip off any non-loop-dominating component from the addrec step. 1375*5ffd83dbSDimitry Andric const SCEV *Step = Normalized->getStepRecurrence(SE); 1376*5ffd83dbSDimitry Andric const SCEV *PostLoopScale = nullptr; 1377*5ffd83dbSDimitry Andric if (!SE.dominates(Step, L->getHeader())) { 1378*5ffd83dbSDimitry Andric PostLoopScale = Step; 1379*5ffd83dbSDimitry Andric Step = SE.getConstant(Normalized->getType(), 1); 1380*5ffd83dbSDimitry Andric if (!Start->isZero()) { 1381*5ffd83dbSDimitry Andric // The normalization below assumes that Start is constant zero, so if 1382*5ffd83dbSDimitry Andric // it isn't re-associate Start to PostLoopOffset. 1383*5ffd83dbSDimitry Andric assert(!PostLoopOffset && "Start not-null but PostLoopOffset set?"); 1384*5ffd83dbSDimitry Andric PostLoopOffset = Start; 1385*5ffd83dbSDimitry Andric Start = SE.getConstant(Normalized->getType(), 0); 1386*5ffd83dbSDimitry Andric } 1387*5ffd83dbSDimitry Andric Normalized = 1388*5ffd83dbSDimitry Andric cast<SCEVAddRecExpr>(SE.getAddRecExpr( 1389*5ffd83dbSDimitry Andric Start, Step, Normalized->getLoop(), 1390*5ffd83dbSDimitry Andric Normalized->getNoWrapFlags(SCEV::FlagNW))); 1391*5ffd83dbSDimitry Andric } 1392*5ffd83dbSDimitry Andric 1393*5ffd83dbSDimitry Andric // Expand the core addrec. If we need post-loop scaling, force it to 1394*5ffd83dbSDimitry Andric // expand to an integer type to avoid the need for additional casting. 1395*5ffd83dbSDimitry Andric Type *ExpandTy = PostLoopScale ? IntTy : STy; 1396*5ffd83dbSDimitry Andric // We can't use a pointer type for the addrec if the pointer type is 1397*5ffd83dbSDimitry Andric // non-integral. 1398*5ffd83dbSDimitry Andric Type *AddRecPHIExpandTy = 1399*5ffd83dbSDimitry Andric DL.isNonIntegralPointerType(STy) ? Normalized->getType() : ExpandTy; 1400*5ffd83dbSDimitry Andric 1401*5ffd83dbSDimitry Andric // In some cases, we decide to reuse an existing phi node but need to truncate 1402*5ffd83dbSDimitry Andric // it and/or invert the step. 1403*5ffd83dbSDimitry Andric Type *TruncTy = nullptr; 1404*5ffd83dbSDimitry Andric bool InvertStep = false; 1405*5ffd83dbSDimitry Andric PHINode *PN = getAddRecExprPHILiterally(Normalized, L, AddRecPHIExpandTy, 1406*5ffd83dbSDimitry Andric IntTy, TruncTy, InvertStep); 1407*5ffd83dbSDimitry Andric 1408*5ffd83dbSDimitry Andric // Accommodate post-inc mode, if necessary. 1409*5ffd83dbSDimitry Andric Value *Result; 1410*5ffd83dbSDimitry Andric if (!PostIncLoops.count(L)) 1411*5ffd83dbSDimitry Andric Result = PN; 1412*5ffd83dbSDimitry Andric else { 1413*5ffd83dbSDimitry Andric // In PostInc mode, use the post-incremented value. 1414*5ffd83dbSDimitry Andric BasicBlock *LatchBlock = L->getLoopLatch(); 1415*5ffd83dbSDimitry Andric assert(LatchBlock && "PostInc mode requires a unique loop latch!"); 1416*5ffd83dbSDimitry Andric Result = PN->getIncomingValueForBlock(LatchBlock); 1417*5ffd83dbSDimitry Andric 1418*5ffd83dbSDimitry Andric // For an expansion to use the postinc form, the client must call 1419*5ffd83dbSDimitry Andric // expandCodeFor with an InsertPoint that is either outside the PostIncLoop 1420*5ffd83dbSDimitry Andric // or dominated by IVIncInsertPos. 1421*5ffd83dbSDimitry Andric if (isa<Instruction>(Result) && 1422*5ffd83dbSDimitry Andric !SE.DT.dominates(cast<Instruction>(Result), 1423*5ffd83dbSDimitry Andric &*Builder.GetInsertPoint())) { 1424*5ffd83dbSDimitry Andric // The induction variable's postinc expansion does not dominate this use. 1425*5ffd83dbSDimitry Andric // IVUsers tries to prevent this case, so it is rare. However, it can 1426*5ffd83dbSDimitry Andric // happen when an IVUser outside the loop is not dominated by the latch 1427*5ffd83dbSDimitry Andric // block. Adjusting IVIncInsertPos before expansion begins cannot handle 1428*5ffd83dbSDimitry Andric // all cases. Consider a phi outside whose operand is replaced during 1429*5ffd83dbSDimitry Andric // expansion with the value of the postinc user. Without fundamentally 1430*5ffd83dbSDimitry Andric // changing the way postinc users are tracked, the only remedy is 1431*5ffd83dbSDimitry Andric // inserting an extra IV increment. StepV might fold into PostLoopOffset, 1432*5ffd83dbSDimitry Andric // but hopefully expandCodeFor handles that. 1433*5ffd83dbSDimitry Andric bool useSubtract = 1434*5ffd83dbSDimitry Andric !ExpandTy->isPointerTy() && Step->isNonConstantNegative(); 1435*5ffd83dbSDimitry Andric if (useSubtract) 1436*5ffd83dbSDimitry Andric Step = SE.getNegativeSCEV(Step); 1437*5ffd83dbSDimitry Andric Value *StepV; 1438*5ffd83dbSDimitry Andric { 1439*5ffd83dbSDimitry Andric // Expand the step somewhere that dominates the loop header. 1440*5ffd83dbSDimitry Andric SCEVInsertPointGuard Guard(Builder, this); 1441*5ffd83dbSDimitry Andric StepV = expandCodeFor(Step, IntTy, &L->getHeader()->front()); 1442*5ffd83dbSDimitry Andric } 1443*5ffd83dbSDimitry Andric Result = expandIVInc(PN, StepV, L, ExpandTy, IntTy, useSubtract); 1444*5ffd83dbSDimitry Andric } 1445*5ffd83dbSDimitry Andric } 1446*5ffd83dbSDimitry Andric 1447*5ffd83dbSDimitry Andric // We have decided to reuse an induction variable of a dominating loop. Apply 1448*5ffd83dbSDimitry Andric // truncation and/or inversion of the step. 1449*5ffd83dbSDimitry Andric if (TruncTy) { 1450*5ffd83dbSDimitry Andric Type *ResTy = Result->getType(); 1451*5ffd83dbSDimitry Andric // Normalize the result type. 1452*5ffd83dbSDimitry Andric if (ResTy != SE.getEffectiveSCEVType(ResTy)) 1453*5ffd83dbSDimitry Andric Result = InsertNoopCastOfTo(Result, SE.getEffectiveSCEVType(ResTy)); 1454*5ffd83dbSDimitry Andric // Truncate the result. 1455*5ffd83dbSDimitry Andric if (TruncTy != Result->getType()) { 1456*5ffd83dbSDimitry Andric Result = Builder.CreateTrunc(Result, TruncTy); 1457*5ffd83dbSDimitry Andric rememberInstruction(Result); 1458*5ffd83dbSDimitry Andric } 1459*5ffd83dbSDimitry Andric // Invert the result. 1460*5ffd83dbSDimitry Andric if (InvertStep) { 1461*5ffd83dbSDimitry Andric Result = Builder.CreateSub(expandCodeFor(Normalized->getStart(), TruncTy), 1462*5ffd83dbSDimitry Andric Result); 1463*5ffd83dbSDimitry Andric rememberInstruction(Result); 1464*5ffd83dbSDimitry Andric } 1465*5ffd83dbSDimitry Andric } 1466*5ffd83dbSDimitry Andric 1467*5ffd83dbSDimitry Andric // Re-apply any non-loop-dominating scale. 1468*5ffd83dbSDimitry Andric if (PostLoopScale) { 1469*5ffd83dbSDimitry Andric assert(S->isAffine() && "Can't linearly scale non-affine recurrences."); 1470*5ffd83dbSDimitry Andric Result = InsertNoopCastOfTo(Result, IntTy); 1471*5ffd83dbSDimitry Andric Result = Builder.CreateMul(Result, 1472*5ffd83dbSDimitry Andric expandCodeFor(PostLoopScale, IntTy)); 1473*5ffd83dbSDimitry Andric rememberInstruction(Result); 1474*5ffd83dbSDimitry Andric } 1475*5ffd83dbSDimitry Andric 1476*5ffd83dbSDimitry Andric // Re-apply any non-loop-dominating offset. 1477*5ffd83dbSDimitry Andric if (PostLoopOffset) { 1478*5ffd83dbSDimitry Andric if (PointerType *PTy = dyn_cast<PointerType>(ExpandTy)) { 1479*5ffd83dbSDimitry Andric if (Result->getType()->isIntegerTy()) { 1480*5ffd83dbSDimitry Andric Value *Base = expandCodeFor(PostLoopOffset, ExpandTy); 1481*5ffd83dbSDimitry Andric Result = expandAddToGEP(SE.getUnknown(Result), PTy, IntTy, Base); 1482*5ffd83dbSDimitry Andric } else { 1483*5ffd83dbSDimitry Andric Result = expandAddToGEP(PostLoopOffset, PTy, IntTy, Result); 1484*5ffd83dbSDimitry Andric } 1485*5ffd83dbSDimitry Andric } else { 1486*5ffd83dbSDimitry Andric Result = InsertNoopCastOfTo(Result, IntTy); 1487*5ffd83dbSDimitry Andric Result = Builder.CreateAdd(Result, 1488*5ffd83dbSDimitry Andric expandCodeFor(PostLoopOffset, IntTy)); 1489*5ffd83dbSDimitry Andric rememberInstruction(Result); 1490*5ffd83dbSDimitry Andric } 1491*5ffd83dbSDimitry Andric } 1492*5ffd83dbSDimitry Andric 1493*5ffd83dbSDimitry Andric return Result; 1494*5ffd83dbSDimitry Andric } 1495*5ffd83dbSDimitry Andric 1496*5ffd83dbSDimitry Andric Value *SCEVExpander::visitAddRecExpr(const SCEVAddRecExpr *S) { 1497*5ffd83dbSDimitry Andric // In canonical mode we compute the addrec as an expression of a canonical IV 1498*5ffd83dbSDimitry Andric // using evaluateAtIteration and expand the resulting SCEV expression. This 1499*5ffd83dbSDimitry Andric // way we avoid introducing new IVs to carry on the comutation of the addrec 1500*5ffd83dbSDimitry Andric // throughout the loop. 1501*5ffd83dbSDimitry Andric // 1502*5ffd83dbSDimitry Andric // For nested addrecs evaluateAtIteration might need a canonical IV of a 1503*5ffd83dbSDimitry Andric // type wider than the addrec itself. Emitting a canonical IV of the 1504*5ffd83dbSDimitry Andric // proper type might produce non-legal types, for example expanding an i64 1505*5ffd83dbSDimitry Andric // {0,+,2,+,1} addrec would need an i65 canonical IV. To avoid this just fall 1506*5ffd83dbSDimitry Andric // back to non-canonical mode for nested addrecs. 1507*5ffd83dbSDimitry Andric if (!CanonicalMode || (S->getNumOperands() > 2)) 1508*5ffd83dbSDimitry Andric return expandAddRecExprLiterally(S); 1509*5ffd83dbSDimitry Andric 1510*5ffd83dbSDimitry Andric Type *Ty = SE.getEffectiveSCEVType(S->getType()); 1511*5ffd83dbSDimitry Andric const Loop *L = S->getLoop(); 1512*5ffd83dbSDimitry Andric 1513*5ffd83dbSDimitry Andric // First check for an existing canonical IV in a suitable type. 1514*5ffd83dbSDimitry Andric PHINode *CanonicalIV = nullptr; 1515*5ffd83dbSDimitry Andric if (PHINode *PN = L->getCanonicalInductionVariable()) 1516*5ffd83dbSDimitry Andric if (SE.getTypeSizeInBits(PN->getType()) >= SE.getTypeSizeInBits(Ty)) 1517*5ffd83dbSDimitry Andric CanonicalIV = PN; 1518*5ffd83dbSDimitry Andric 1519*5ffd83dbSDimitry Andric // Rewrite an AddRec in terms of the canonical induction variable, if 1520*5ffd83dbSDimitry Andric // its type is more narrow. 1521*5ffd83dbSDimitry Andric if (CanonicalIV && 1522*5ffd83dbSDimitry Andric SE.getTypeSizeInBits(CanonicalIV->getType()) > 1523*5ffd83dbSDimitry Andric SE.getTypeSizeInBits(Ty)) { 1524*5ffd83dbSDimitry Andric SmallVector<const SCEV *, 4> NewOps(S->getNumOperands()); 1525*5ffd83dbSDimitry Andric for (unsigned i = 0, e = S->getNumOperands(); i != e; ++i) 1526*5ffd83dbSDimitry Andric NewOps[i] = SE.getAnyExtendExpr(S->op_begin()[i], CanonicalIV->getType()); 1527*5ffd83dbSDimitry Andric Value *V = expand(SE.getAddRecExpr(NewOps, S->getLoop(), 1528*5ffd83dbSDimitry Andric S->getNoWrapFlags(SCEV::FlagNW))); 1529*5ffd83dbSDimitry Andric BasicBlock::iterator NewInsertPt = 1530*5ffd83dbSDimitry Andric findInsertPointAfter(cast<Instruction>(V), Builder.GetInsertBlock()); 1531*5ffd83dbSDimitry Andric V = expandCodeFor(SE.getTruncateExpr(SE.getUnknown(V), Ty), nullptr, 1532*5ffd83dbSDimitry Andric &*NewInsertPt); 1533*5ffd83dbSDimitry Andric return V; 1534*5ffd83dbSDimitry Andric } 1535*5ffd83dbSDimitry Andric 1536*5ffd83dbSDimitry Andric // {X,+,F} --> X + {0,+,F} 1537*5ffd83dbSDimitry Andric if (!S->getStart()->isZero()) { 1538*5ffd83dbSDimitry Andric SmallVector<const SCEV *, 4> NewOps(S->op_begin(), S->op_end()); 1539*5ffd83dbSDimitry Andric NewOps[0] = SE.getConstant(Ty, 0); 1540*5ffd83dbSDimitry Andric const SCEV *Rest = SE.getAddRecExpr(NewOps, L, 1541*5ffd83dbSDimitry Andric S->getNoWrapFlags(SCEV::FlagNW)); 1542*5ffd83dbSDimitry Andric 1543*5ffd83dbSDimitry Andric // Turn things like ptrtoint+arithmetic+inttoptr into GEP. See the 1544*5ffd83dbSDimitry Andric // comments on expandAddToGEP for details. 1545*5ffd83dbSDimitry Andric const SCEV *Base = S->getStart(); 1546*5ffd83dbSDimitry Andric // Dig into the expression to find the pointer base for a GEP. 1547*5ffd83dbSDimitry Andric const SCEV *ExposedRest = Rest; 1548*5ffd83dbSDimitry Andric ExposePointerBase(Base, ExposedRest, SE); 1549*5ffd83dbSDimitry Andric // If we found a pointer, expand the AddRec with a GEP. 1550*5ffd83dbSDimitry Andric if (PointerType *PTy = dyn_cast<PointerType>(Base->getType())) { 1551*5ffd83dbSDimitry Andric // Make sure the Base isn't something exotic, such as a multiplied 1552*5ffd83dbSDimitry Andric // or divided pointer value. In those cases, the result type isn't 1553*5ffd83dbSDimitry Andric // actually a pointer type. 1554*5ffd83dbSDimitry Andric if (!isa<SCEVMulExpr>(Base) && !isa<SCEVUDivExpr>(Base)) { 1555*5ffd83dbSDimitry Andric Value *StartV = expand(Base); 1556*5ffd83dbSDimitry Andric assert(StartV->getType() == PTy && "Pointer type mismatch for GEP!"); 1557*5ffd83dbSDimitry Andric return expandAddToGEP(ExposedRest, PTy, Ty, StartV); 1558*5ffd83dbSDimitry Andric } 1559*5ffd83dbSDimitry Andric } 1560*5ffd83dbSDimitry Andric 1561*5ffd83dbSDimitry Andric // Just do a normal add. Pre-expand the operands to suppress folding. 1562*5ffd83dbSDimitry Andric // 1563*5ffd83dbSDimitry Andric // The LHS and RHS values are factored out of the expand call to make the 1564*5ffd83dbSDimitry Andric // output independent of the argument evaluation order. 1565*5ffd83dbSDimitry Andric const SCEV *AddExprLHS = SE.getUnknown(expand(S->getStart())); 1566*5ffd83dbSDimitry Andric const SCEV *AddExprRHS = SE.getUnknown(expand(Rest)); 1567*5ffd83dbSDimitry Andric return expand(SE.getAddExpr(AddExprLHS, AddExprRHS)); 1568*5ffd83dbSDimitry Andric } 1569*5ffd83dbSDimitry Andric 1570*5ffd83dbSDimitry Andric // If we don't yet have a canonical IV, create one. 1571*5ffd83dbSDimitry Andric if (!CanonicalIV) { 1572*5ffd83dbSDimitry Andric // Create and insert the PHI node for the induction variable in the 1573*5ffd83dbSDimitry Andric // specified loop. 1574*5ffd83dbSDimitry Andric BasicBlock *Header = L->getHeader(); 1575*5ffd83dbSDimitry Andric pred_iterator HPB = pred_begin(Header), HPE = pred_end(Header); 1576*5ffd83dbSDimitry Andric CanonicalIV = PHINode::Create(Ty, std::distance(HPB, HPE), "indvar", 1577*5ffd83dbSDimitry Andric &Header->front()); 1578*5ffd83dbSDimitry Andric rememberInstruction(CanonicalIV); 1579*5ffd83dbSDimitry Andric 1580*5ffd83dbSDimitry Andric SmallSet<BasicBlock *, 4> PredSeen; 1581*5ffd83dbSDimitry Andric Constant *One = ConstantInt::get(Ty, 1); 1582*5ffd83dbSDimitry Andric for (pred_iterator HPI = HPB; HPI != HPE; ++HPI) { 1583*5ffd83dbSDimitry Andric BasicBlock *HP = *HPI; 1584*5ffd83dbSDimitry Andric if (!PredSeen.insert(HP).second) { 1585*5ffd83dbSDimitry Andric // There must be an incoming value for each predecessor, even the 1586*5ffd83dbSDimitry Andric // duplicates! 1587*5ffd83dbSDimitry Andric CanonicalIV->addIncoming(CanonicalIV->getIncomingValueForBlock(HP), HP); 1588*5ffd83dbSDimitry Andric continue; 1589*5ffd83dbSDimitry Andric } 1590*5ffd83dbSDimitry Andric 1591*5ffd83dbSDimitry Andric if (L->contains(HP)) { 1592*5ffd83dbSDimitry Andric // Insert a unit add instruction right before the terminator 1593*5ffd83dbSDimitry Andric // corresponding to the back-edge. 1594*5ffd83dbSDimitry Andric Instruction *Add = BinaryOperator::CreateAdd(CanonicalIV, One, 1595*5ffd83dbSDimitry Andric "indvar.next", 1596*5ffd83dbSDimitry Andric HP->getTerminator()); 1597*5ffd83dbSDimitry Andric Add->setDebugLoc(HP->getTerminator()->getDebugLoc()); 1598*5ffd83dbSDimitry Andric rememberInstruction(Add); 1599*5ffd83dbSDimitry Andric CanonicalIV->addIncoming(Add, HP); 1600*5ffd83dbSDimitry Andric } else { 1601*5ffd83dbSDimitry Andric CanonicalIV->addIncoming(Constant::getNullValue(Ty), HP); 1602*5ffd83dbSDimitry Andric } 1603*5ffd83dbSDimitry Andric } 1604*5ffd83dbSDimitry Andric } 1605*5ffd83dbSDimitry Andric 1606*5ffd83dbSDimitry Andric // {0,+,1} --> Insert a canonical induction variable into the loop! 1607*5ffd83dbSDimitry Andric if (S->isAffine() && S->getOperand(1)->isOne()) { 1608*5ffd83dbSDimitry Andric assert(Ty == SE.getEffectiveSCEVType(CanonicalIV->getType()) && 1609*5ffd83dbSDimitry Andric "IVs with types different from the canonical IV should " 1610*5ffd83dbSDimitry Andric "already have been handled!"); 1611*5ffd83dbSDimitry Andric return CanonicalIV; 1612*5ffd83dbSDimitry Andric } 1613*5ffd83dbSDimitry Andric 1614*5ffd83dbSDimitry Andric // {0,+,F} --> {0,+,1} * F 1615*5ffd83dbSDimitry Andric 1616*5ffd83dbSDimitry Andric // If this is a simple linear addrec, emit it now as a special case. 1617*5ffd83dbSDimitry Andric if (S->isAffine()) // {0,+,F} --> i*F 1618*5ffd83dbSDimitry Andric return 1619*5ffd83dbSDimitry Andric expand(SE.getTruncateOrNoop( 1620*5ffd83dbSDimitry Andric SE.getMulExpr(SE.getUnknown(CanonicalIV), 1621*5ffd83dbSDimitry Andric SE.getNoopOrAnyExtend(S->getOperand(1), 1622*5ffd83dbSDimitry Andric CanonicalIV->getType())), 1623*5ffd83dbSDimitry Andric Ty)); 1624*5ffd83dbSDimitry Andric 1625*5ffd83dbSDimitry Andric // If this is a chain of recurrences, turn it into a closed form, using the 1626*5ffd83dbSDimitry Andric // folders, then expandCodeFor the closed form. This allows the folders to 1627*5ffd83dbSDimitry Andric // simplify the expression without having to build a bunch of special code 1628*5ffd83dbSDimitry Andric // into this folder. 1629*5ffd83dbSDimitry Andric const SCEV *IH = SE.getUnknown(CanonicalIV); // Get I as a "symbolic" SCEV. 1630*5ffd83dbSDimitry Andric 1631*5ffd83dbSDimitry Andric // Promote S up to the canonical IV type, if the cast is foldable. 1632*5ffd83dbSDimitry Andric const SCEV *NewS = S; 1633*5ffd83dbSDimitry Andric const SCEV *Ext = SE.getNoopOrAnyExtend(S, CanonicalIV->getType()); 1634*5ffd83dbSDimitry Andric if (isa<SCEVAddRecExpr>(Ext)) 1635*5ffd83dbSDimitry Andric NewS = Ext; 1636*5ffd83dbSDimitry Andric 1637*5ffd83dbSDimitry Andric const SCEV *V = cast<SCEVAddRecExpr>(NewS)->evaluateAtIteration(IH, SE); 1638*5ffd83dbSDimitry Andric //cerr << "Evaluated: " << *this << "\n to: " << *V << "\n"; 1639*5ffd83dbSDimitry Andric 1640*5ffd83dbSDimitry Andric // Truncate the result down to the original type, if needed. 1641*5ffd83dbSDimitry Andric const SCEV *T = SE.getTruncateOrNoop(V, Ty); 1642*5ffd83dbSDimitry Andric return expand(T); 1643*5ffd83dbSDimitry Andric } 1644*5ffd83dbSDimitry Andric 1645*5ffd83dbSDimitry Andric Value *SCEVExpander::visitTruncateExpr(const SCEVTruncateExpr *S) { 1646*5ffd83dbSDimitry Andric Type *Ty = SE.getEffectiveSCEVType(S->getType()); 1647*5ffd83dbSDimitry Andric Value *V = expandCodeFor(S->getOperand(), 1648*5ffd83dbSDimitry Andric SE.getEffectiveSCEVType(S->getOperand()->getType())); 1649*5ffd83dbSDimitry Andric Value *I = Builder.CreateTrunc(V, Ty); 1650*5ffd83dbSDimitry Andric rememberInstruction(I); 1651*5ffd83dbSDimitry Andric return I; 1652*5ffd83dbSDimitry Andric } 1653*5ffd83dbSDimitry Andric 1654*5ffd83dbSDimitry Andric Value *SCEVExpander::visitZeroExtendExpr(const SCEVZeroExtendExpr *S) { 1655*5ffd83dbSDimitry Andric Type *Ty = SE.getEffectiveSCEVType(S->getType()); 1656*5ffd83dbSDimitry Andric Value *V = expandCodeFor(S->getOperand(), 1657*5ffd83dbSDimitry Andric SE.getEffectiveSCEVType(S->getOperand()->getType())); 1658*5ffd83dbSDimitry Andric Value *I = Builder.CreateZExt(V, Ty); 1659*5ffd83dbSDimitry Andric rememberInstruction(I); 1660*5ffd83dbSDimitry Andric return I; 1661*5ffd83dbSDimitry Andric } 1662*5ffd83dbSDimitry Andric 1663*5ffd83dbSDimitry Andric Value *SCEVExpander::visitSignExtendExpr(const SCEVSignExtendExpr *S) { 1664*5ffd83dbSDimitry Andric Type *Ty = SE.getEffectiveSCEVType(S->getType()); 1665*5ffd83dbSDimitry Andric Value *V = expandCodeFor(S->getOperand(), 1666*5ffd83dbSDimitry Andric SE.getEffectiveSCEVType(S->getOperand()->getType())); 1667*5ffd83dbSDimitry Andric Value *I = Builder.CreateSExt(V, Ty); 1668*5ffd83dbSDimitry Andric rememberInstruction(I); 1669*5ffd83dbSDimitry Andric return I; 1670*5ffd83dbSDimitry Andric } 1671*5ffd83dbSDimitry Andric 1672*5ffd83dbSDimitry Andric Value *SCEVExpander::visitSMaxExpr(const SCEVSMaxExpr *S) { 1673*5ffd83dbSDimitry Andric Value *LHS = expand(S->getOperand(S->getNumOperands()-1)); 1674*5ffd83dbSDimitry Andric Type *Ty = LHS->getType(); 1675*5ffd83dbSDimitry Andric for (int i = S->getNumOperands()-2; i >= 0; --i) { 1676*5ffd83dbSDimitry Andric // In the case of mixed integer and pointer types, do the 1677*5ffd83dbSDimitry Andric // rest of the comparisons as integer. 1678*5ffd83dbSDimitry Andric Type *OpTy = S->getOperand(i)->getType(); 1679*5ffd83dbSDimitry Andric if (OpTy->isIntegerTy() != Ty->isIntegerTy()) { 1680*5ffd83dbSDimitry Andric Ty = SE.getEffectiveSCEVType(Ty); 1681*5ffd83dbSDimitry Andric LHS = InsertNoopCastOfTo(LHS, Ty); 1682*5ffd83dbSDimitry Andric } 1683*5ffd83dbSDimitry Andric Value *RHS = expandCodeFor(S->getOperand(i), Ty); 1684*5ffd83dbSDimitry Andric Value *ICmp = Builder.CreateICmpSGT(LHS, RHS); 1685*5ffd83dbSDimitry Andric rememberInstruction(ICmp); 1686*5ffd83dbSDimitry Andric Value *Sel = Builder.CreateSelect(ICmp, LHS, RHS, "smax"); 1687*5ffd83dbSDimitry Andric rememberInstruction(Sel); 1688*5ffd83dbSDimitry Andric LHS = Sel; 1689*5ffd83dbSDimitry Andric } 1690*5ffd83dbSDimitry Andric // In the case of mixed integer and pointer types, cast the 1691*5ffd83dbSDimitry Andric // final result back to the pointer type. 1692*5ffd83dbSDimitry Andric if (LHS->getType() != S->getType()) 1693*5ffd83dbSDimitry Andric LHS = InsertNoopCastOfTo(LHS, S->getType()); 1694*5ffd83dbSDimitry Andric return LHS; 1695*5ffd83dbSDimitry Andric } 1696*5ffd83dbSDimitry Andric 1697*5ffd83dbSDimitry Andric Value *SCEVExpander::visitUMaxExpr(const SCEVUMaxExpr *S) { 1698*5ffd83dbSDimitry Andric Value *LHS = expand(S->getOperand(S->getNumOperands()-1)); 1699*5ffd83dbSDimitry Andric Type *Ty = LHS->getType(); 1700*5ffd83dbSDimitry Andric for (int i = S->getNumOperands()-2; i >= 0; --i) { 1701*5ffd83dbSDimitry Andric // In the case of mixed integer and pointer types, do the 1702*5ffd83dbSDimitry Andric // rest of the comparisons as integer. 1703*5ffd83dbSDimitry Andric Type *OpTy = S->getOperand(i)->getType(); 1704*5ffd83dbSDimitry Andric if (OpTy->isIntegerTy() != Ty->isIntegerTy()) { 1705*5ffd83dbSDimitry Andric Ty = SE.getEffectiveSCEVType(Ty); 1706*5ffd83dbSDimitry Andric LHS = InsertNoopCastOfTo(LHS, Ty); 1707*5ffd83dbSDimitry Andric } 1708*5ffd83dbSDimitry Andric Value *RHS = expandCodeFor(S->getOperand(i), Ty); 1709*5ffd83dbSDimitry Andric Value *ICmp = Builder.CreateICmpUGT(LHS, RHS); 1710*5ffd83dbSDimitry Andric rememberInstruction(ICmp); 1711*5ffd83dbSDimitry Andric Value *Sel = Builder.CreateSelect(ICmp, LHS, RHS, "umax"); 1712*5ffd83dbSDimitry Andric rememberInstruction(Sel); 1713*5ffd83dbSDimitry Andric LHS = Sel; 1714*5ffd83dbSDimitry Andric } 1715*5ffd83dbSDimitry Andric // In the case of mixed integer and pointer types, cast the 1716*5ffd83dbSDimitry Andric // final result back to the pointer type. 1717*5ffd83dbSDimitry Andric if (LHS->getType() != S->getType()) 1718*5ffd83dbSDimitry Andric LHS = InsertNoopCastOfTo(LHS, S->getType()); 1719*5ffd83dbSDimitry Andric return LHS; 1720*5ffd83dbSDimitry Andric } 1721*5ffd83dbSDimitry Andric 1722*5ffd83dbSDimitry Andric Value *SCEVExpander::visitSMinExpr(const SCEVSMinExpr *S) { 1723*5ffd83dbSDimitry Andric Value *LHS = expand(S->getOperand(S->getNumOperands() - 1)); 1724*5ffd83dbSDimitry Andric Type *Ty = LHS->getType(); 1725*5ffd83dbSDimitry Andric for (int i = S->getNumOperands() - 2; i >= 0; --i) { 1726*5ffd83dbSDimitry Andric // In the case of mixed integer and pointer types, do the 1727*5ffd83dbSDimitry Andric // rest of the comparisons as integer. 1728*5ffd83dbSDimitry Andric Type *OpTy = S->getOperand(i)->getType(); 1729*5ffd83dbSDimitry Andric if (OpTy->isIntegerTy() != Ty->isIntegerTy()) { 1730*5ffd83dbSDimitry Andric Ty = SE.getEffectiveSCEVType(Ty); 1731*5ffd83dbSDimitry Andric LHS = InsertNoopCastOfTo(LHS, Ty); 1732*5ffd83dbSDimitry Andric } 1733*5ffd83dbSDimitry Andric Value *RHS = expandCodeFor(S->getOperand(i), Ty); 1734*5ffd83dbSDimitry Andric Value *ICmp = Builder.CreateICmpSLT(LHS, RHS); 1735*5ffd83dbSDimitry Andric rememberInstruction(ICmp); 1736*5ffd83dbSDimitry Andric Value *Sel = Builder.CreateSelect(ICmp, LHS, RHS, "smin"); 1737*5ffd83dbSDimitry Andric rememberInstruction(Sel); 1738*5ffd83dbSDimitry Andric LHS = Sel; 1739*5ffd83dbSDimitry Andric } 1740*5ffd83dbSDimitry Andric // In the case of mixed integer and pointer types, cast the 1741*5ffd83dbSDimitry Andric // final result back to the pointer type. 1742*5ffd83dbSDimitry Andric if (LHS->getType() != S->getType()) 1743*5ffd83dbSDimitry Andric LHS = InsertNoopCastOfTo(LHS, S->getType()); 1744*5ffd83dbSDimitry Andric return LHS; 1745*5ffd83dbSDimitry Andric } 1746*5ffd83dbSDimitry Andric 1747*5ffd83dbSDimitry Andric Value *SCEVExpander::visitUMinExpr(const SCEVUMinExpr *S) { 1748*5ffd83dbSDimitry Andric Value *LHS = expand(S->getOperand(S->getNumOperands() - 1)); 1749*5ffd83dbSDimitry Andric Type *Ty = LHS->getType(); 1750*5ffd83dbSDimitry Andric for (int i = S->getNumOperands() - 2; i >= 0; --i) { 1751*5ffd83dbSDimitry Andric // In the case of mixed integer and pointer types, do the 1752*5ffd83dbSDimitry Andric // rest of the comparisons as integer. 1753*5ffd83dbSDimitry Andric Type *OpTy = S->getOperand(i)->getType(); 1754*5ffd83dbSDimitry Andric if (OpTy->isIntegerTy() != Ty->isIntegerTy()) { 1755*5ffd83dbSDimitry Andric Ty = SE.getEffectiveSCEVType(Ty); 1756*5ffd83dbSDimitry Andric LHS = InsertNoopCastOfTo(LHS, Ty); 1757*5ffd83dbSDimitry Andric } 1758*5ffd83dbSDimitry Andric Value *RHS = expandCodeFor(S->getOperand(i), Ty); 1759*5ffd83dbSDimitry Andric Value *ICmp = Builder.CreateICmpULT(LHS, RHS); 1760*5ffd83dbSDimitry Andric rememberInstruction(ICmp); 1761*5ffd83dbSDimitry Andric Value *Sel = Builder.CreateSelect(ICmp, LHS, RHS, "umin"); 1762*5ffd83dbSDimitry Andric rememberInstruction(Sel); 1763*5ffd83dbSDimitry Andric LHS = Sel; 1764*5ffd83dbSDimitry Andric } 1765*5ffd83dbSDimitry Andric // In the case of mixed integer and pointer types, cast the 1766*5ffd83dbSDimitry Andric // final result back to the pointer type. 1767*5ffd83dbSDimitry Andric if (LHS->getType() != S->getType()) 1768*5ffd83dbSDimitry Andric LHS = InsertNoopCastOfTo(LHS, S->getType()); 1769*5ffd83dbSDimitry Andric return LHS; 1770*5ffd83dbSDimitry Andric } 1771*5ffd83dbSDimitry Andric 1772*5ffd83dbSDimitry Andric Value *SCEVExpander::expandCodeFor(const SCEV *SH, Type *Ty, 1773*5ffd83dbSDimitry Andric Instruction *IP) { 1774*5ffd83dbSDimitry Andric setInsertPoint(IP); 1775*5ffd83dbSDimitry Andric return expandCodeFor(SH, Ty); 1776*5ffd83dbSDimitry Andric } 1777*5ffd83dbSDimitry Andric 1778*5ffd83dbSDimitry Andric Value *SCEVExpander::expandCodeFor(const SCEV *SH, Type *Ty) { 1779*5ffd83dbSDimitry Andric // Expand the code for this SCEV. 1780*5ffd83dbSDimitry Andric Value *V = expand(SH); 1781*5ffd83dbSDimitry Andric if (Ty) { 1782*5ffd83dbSDimitry Andric assert(SE.getTypeSizeInBits(Ty) == SE.getTypeSizeInBits(SH->getType()) && 1783*5ffd83dbSDimitry Andric "non-trivial casts should be done with the SCEVs directly!"); 1784*5ffd83dbSDimitry Andric V = InsertNoopCastOfTo(V, Ty); 1785*5ffd83dbSDimitry Andric } 1786*5ffd83dbSDimitry Andric return V; 1787*5ffd83dbSDimitry Andric } 1788*5ffd83dbSDimitry Andric 1789*5ffd83dbSDimitry Andric ScalarEvolution::ValueOffsetPair 1790*5ffd83dbSDimitry Andric SCEVExpander::FindValueInExprValueMap(const SCEV *S, 1791*5ffd83dbSDimitry Andric const Instruction *InsertPt) { 1792*5ffd83dbSDimitry Andric SetVector<ScalarEvolution::ValueOffsetPair> *Set = SE.getSCEVValues(S); 1793*5ffd83dbSDimitry Andric // If the expansion is not in CanonicalMode, and the SCEV contains any 1794*5ffd83dbSDimitry Andric // sub scAddRecExpr type SCEV, it is required to expand the SCEV literally. 1795*5ffd83dbSDimitry Andric if (CanonicalMode || !SE.containsAddRecurrence(S)) { 1796*5ffd83dbSDimitry Andric // If S is scConstant, it may be worse to reuse an existing Value. 1797*5ffd83dbSDimitry Andric if (S->getSCEVType() != scConstant && Set) { 1798*5ffd83dbSDimitry Andric // Choose a Value from the set which dominates the insertPt. 1799*5ffd83dbSDimitry Andric // insertPt should be inside the Value's parent loop so as not to break 1800*5ffd83dbSDimitry Andric // the LCSSA form. 1801*5ffd83dbSDimitry Andric for (auto const &VOPair : *Set) { 1802*5ffd83dbSDimitry Andric Value *V = VOPair.first; 1803*5ffd83dbSDimitry Andric ConstantInt *Offset = VOPair.second; 1804*5ffd83dbSDimitry Andric Instruction *EntInst = nullptr; 1805*5ffd83dbSDimitry Andric if (V && isa<Instruction>(V) && (EntInst = cast<Instruction>(V)) && 1806*5ffd83dbSDimitry Andric S->getType() == V->getType() && 1807*5ffd83dbSDimitry Andric EntInst->getFunction() == InsertPt->getFunction() && 1808*5ffd83dbSDimitry Andric SE.DT.dominates(EntInst, InsertPt) && 1809*5ffd83dbSDimitry Andric (SE.LI.getLoopFor(EntInst->getParent()) == nullptr || 1810*5ffd83dbSDimitry Andric SE.LI.getLoopFor(EntInst->getParent())->contains(InsertPt))) 1811*5ffd83dbSDimitry Andric return {V, Offset}; 1812*5ffd83dbSDimitry Andric } 1813*5ffd83dbSDimitry Andric } 1814*5ffd83dbSDimitry Andric } 1815*5ffd83dbSDimitry Andric return {nullptr, nullptr}; 1816*5ffd83dbSDimitry Andric } 1817*5ffd83dbSDimitry Andric 1818*5ffd83dbSDimitry Andric // The expansion of SCEV will either reuse a previous Value in ExprValueMap, 1819*5ffd83dbSDimitry Andric // or expand the SCEV literally. Specifically, if the expansion is in LSRMode, 1820*5ffd83dbSDimitry Andric // and the SCEV contains any sub scAddRecExpr type SCEV, it will be expanded 1821*5ffd83dbSDimitry Andric // literally, to prevent LSR's transformed SCEV from being reverted. Otherwise, 1822*5ffd83dbSDimitry Andric // the expansion will try to reuse Value from ExprValueMap, and only when it 1823*5ffd83dbSDimitry Andric // fails, expand the SCEV literally. 1824*5ffd83dbSDimitry Andric Value *SCEVExpander::expand(const SCEV *S) { 1825*5ffd83dbSDimitry Andric // Compute an insertion point for this SCEV object. Hoist the instructions 1826*5ffd83dbSDimitry Andric // as far out in the loop nest as possible. 1827*5ffd83dbSDimitry Andric Instruction *InsertPt = &*Builder.GetInsertPoint(); 1828*5ffd83dbSDimitry Andric 1829*5ffd83dbSDimitry Andric // We can move insertion point only if there is no div or rem operations 1830*5ffd83dbSDimitry Andric // otherwise we are risky to move it over the check for zero denominator. 1831*5ffd83dbSDimitry Andric auto SafeToHoist = [](const SCEV *S) { 1832*5ffd83dbSDimitry Andric return !SCEVExprContains(S, [](const SCEV *S) { 1833*5ffd83dbSDimitry Andric if (const auto *D = dyn_cast<SCEVUDivExpr>(S)) { 1834*5ffd83dbSDimitry Andric if (const auto *SC = dyn_cast<SCEVConstant>(D->getRHS())) 1835*5ffd83dbSDimitry Andric // Division by non-zero constants can be hoisted. 1836*5ffd83dbSDimitry Andric return SC->getValue()->isZero(); 1837*5ffd83dbSDimitry Andric // All other divisions should not be moved as they may be 1838*5ffd83dbSDimitry Andric // divisions by zero and should be kept within the 1839*5ffd83dbSDimitry Andric // conditions of the surrounding loops that guard their 1840*5ffd83dbSDimitry Andric // execution (see PR35406). 1841*5ffd83dbSDimitry Andric return true; 1842*5ffd83dbSDimitry Andric } 1843*5ffd83dbSDimitry Andric return false; 1844*5ffd83dbSDimitry Andric }); 1845*5ffd83dbSDimitry Andric }; 1846*5ffd83dbSDimitry Andric if (SafeToHoist(S)) { 1847*5ffd83dbSDimitry Andric for (Loop *L = SE.LI.getLoopFor(Builder.GetInsertBlock());; 1848*5ffd83dbSDimitry Andric L = L->getParentLoop()) { 1849*5ffd83dbSDimitry Andric if (SE.isLoopInvariant(S, L)) { 1850*5ffd83dbSDimitry Andric if (!L) break; 1851*5ffd83dbSDimitry Andric if (BasicBlock *Preheader = L->getLoopPreheader()) 1852*5ffd83dbSDimitry Andric InsertPt = Preheader->getTerminator(); 1853*5ffd83dbSDimitry Andric else 1854*5ffd83dbSDimitry Andric // LSR sets the insertion point for AddRec start/step values to the 1855*5ffd83dbSDimitry Andric // block start to simplify value reuse, even though it's an invalid 1856*5ffd83dbSDimitry Andric // position. SCEVExpander must correct for this in all cases. 1857*5ffd83dbSDimitry Andric InsertPt = &*L->getHeader()->getFirstInsertionPt(); 1858*5ffd83dbSDimitry Andric } else { 1859*5ffd83dbSDimitry Andric // If the SCEV is computable at this level, insert it into the header 1860*5ffd83dbSDimitry Andric // after the PHIs (and after any other instructions that we've inserted 1861*5ffd83dbSDimitry Andric // there) so that it is guaranteed to dominate any user inside the loop. 1862*5ffd83dbSDimitry Andric if (L && SE.hasComputableLoopEvolution(S, L) && !PostIncLoops.count(L)) 1863*5ffd83dbSDimitry Andric InsertPt = &*L->getHeader()->getFirstInsertionPt(); 1864*5ffd83dbSDimitry Andric while (InsertPt->getIterator() != Builder.GetInsertPoint() && 1865*5ffd83dbSDimitry Andric (isInsertedInstruction(InsertPt) || 1866*5ffd83dbSDimitry Andric isa<DbgInfoIntrinsic>(InsertPt))) 1867*5ffd83dbSDimitry Andric InsertPt = &*std::next(InsertPt->getIterator()); 1868*5ffd83dbSDimitry Andric break; 1869*5ffd83dbSDimitry Andric } 1870*5ffd83dbSDimitry Andric } 1871*5ffd83dbSDimitry Andric } 1872*5ffd83dbSDimitry Andric 1873*5ffd83dbSDimitry Andric // IndVarSimplify sometimes sets the insertion point at the block start, even 1874*5ffd83dbSDimitry Andric // when there are PHIs at that point. We must correct for this. 1875*5ffd83dbSDimitry Andric if (isa<PHINode>(*InsertPt)) 1876*5ffd83dbSDimitry Andric InsertPt = &*InsertPt->getParent()->getFirstInsertionPt(); 1877*5ffd83dbSDimitry Andric 1878*5ffd83dbSDimitry Andric // Check to see if we already expanded this here. 1879*5ffd83dbSDimitry Andric auto I = InsertedExpressions.find(std::make_pair(S, InsertPt)); 1880*5ffd83dbSDimitry Andric if (I != InsertedExpressions.end()) 1881*5ffd83dbSDimitry Andric return I->second; 1882*5ffd83dbSDimitry Andric 1883*5ffd83dbSDimitry Andric SCEVInsertPointGuard Guard(Builder, this); 1884*5ffd83dbSDimitry Andric Builder.SetInsertPoint(InsertPt); 1885*5ffd83dbSDimitry Andric 1886*5ffd83dbSDimitry Andric // Expand the expression into instructions. 1887*5ffd83dbSDimitry Andric ScalarEvolution::ValueOffsetPair VO = FindValueInExprValueMap(S, InsertPt); 1888*5ffd83dbSDimitry Andric Value *V = VO.first; 1889*5ffd83dbSDimitry Andric 1890*5ffd83dbSDimitry Andric if (!V) 1891*5ffd83dbSDimitry Andric V = visit(S); 1892*5ffd83dbSDimitry Andric else if (VO.second) { 1893*5ffd83dbSDimitry Andric if (PointerType *Vty = dyn_cast<PointerType>(V->getType())) { 1894*5ffd83dbSDimitry Andric Type *Ety = Vty->getPointerElementType(); 1895*5ffd83dbSDimitry Andric int64_t Offset = VO.second->getSExtValue(); 1896*5ffd83dbSDimitry Andric int64_t ESize = SE.getTypeSizeInBits(Ety); 1897*5ffd83dbSDimitry Andric if ((Offset * 8) % ESize == 0) { 1898*5ffd83dbSDimitry Andric ConstantInt *Idx = 1899*5ffd83dbSDimitry Andric ConstantInt::getSigned(VO.second->getType(), -(Offset * 8) / ESize); 1900*5ffd83dbSDimitry Andric V = Builder.CreateGEP(Ety, V, Idx, "scevgep"); 1901*5ffd83dbSDimitry Andric } else { 1902*5ffd83dbSDimitry Andric ConstantInt *Idx = 1903*5ffd83dbSDimitry Andric ConstantInt::getSigned(VO.second->getType(), -Offset); 1904*5ffd83dbSDimitry Andric unsigned AS = Vty->getAddressSpace(); 1905*5ffd83dbSDimitry Andric V = Builder.CreateBitCast(V, Type::getInt8PtrTy(SE.getContext(), AS)); 1906*5ffd83dbSDimitry Andric V = Builder.CreateGEP(Type::getInt8Ty(SE.getContext()), V, Idx, 1907*5ffd83dbSDimitry Andric "uglygep"); 1908*5ffd83dbSDimitry Andric V = Builder.CreateBitCast(V, Vty); 1909*5ffd83dbSDimitry Andric } 1910*5ffd83dbSDimitry Andric } else { 1911*5ffd83dbSDimitry Andric V = Builder.CreateSub(V, VO.second); 1912*5ffd83dbSDimitry Andric } 1913*5ffd83dbSDimitry Andric } 1914*5ffd83dbSDimitry Andric // Remember the expanded value for this SCEV at this location. 1915*5ffd83dbSDimitry Andric // 1916*5ffd83dbSDimitry Andric // This is independent of PostIncLoops. The mapped value simply materializes 1917*5ffd83dbSDimitry Andric // the expression at this insertion point. If the mapped value happened to be 1918*5ffd83dbSDimitry Andric // a postinc expansion, it could be reused by a non-postinc user, but only if 1919*5ffd83dbSDimitry Andric // its insertion point was already at the head of the loop. 1920*5ffd83dbSDimitry Andric InsertedExpressions[std::make_pair(S, InsertPt)] = V; 1921*5ffd83dbSDimitry Andric return V; 1922*5ffd83dbSDimitry Andric } 1923*5ffd83dbSDimitry Andric 1924*5ffd83dbSDimitry Andric void SCEVExpander::rememberInstruction(Value *I) { 1925*5ffd83dbSDimitry Andric if (!PostIncLoops.empty()) 1926*5ffd83dbSDimitry Andric InsertedPostIncValues.insert(I); 1927*5ffd83dbSDimitry Andric else 1928*5ffd83dbSDimitry Andric InsertedValues.insert(I); 1929*5ffd83dbSDimitry Andric } 1930*5ffd83dbSDimitry Andric 1931*5ffd83dbSDimitry Andric /// getOrInsertCanonicalInductionVariable - This method returns the 1932*5ffd83dbSDimitry Andric /// canonical induction variable of the specified type for the specified 1933*5ffd83dbSDimitry Andric /// loop (inserting one if there is none). A canonical induction variable 1934*5ffd83dbSDimitry Andric /// starts at zero and steps by one on each iteration. 1935*5ffd83dbSDimitry Andric PHINode * 1936*5ffd83dbSDimitry Andric SCEVExpander::getOrInsertCanonicalInductionVariable(const Loop *L, 1937*5ffd83dbSDimitry Andric Type *Ty) { 1938*5ffd83dbSDimitry Andric assert(Ty->isIntegerTy() && "Can only insert integer induction variables!"); 1939*5ffd83dbSDimitry Andric 1940*5ffd83dbSDimitry Andric // Build a SCEV for {0,+,1}<L>. 1941*5ffd83dbSDimitry Andric // Conservatively use FlagAnyWrap for now. 1942*5ffd83dbSDimitry Andric const SCEV *H = SE.getAddRecExpr(SE.getConstant(Ty, 0), 1943*5ffd83dbSDimitry Andric SE.getConstant(Ty, 1), L, SCEV::FlagAnyWrap); 1944*5ffd83dbSDimitry Andric 1945*5ffd83dbSDimitry Andric // Emit code for it. 1946*5ffd83dbSDimitry Andric SCEVInsertPointGuard Guard(Builder, this); 1947*5ffd83dbSDimitry Andric PHINode *V = 1948*5ffd83dbSDimitry Andric cast<PHINode>(expandCodeFor(H, nullptr, &L->getHeader()->front())); 1949*5ffd83dbSDimitry Andric 1950*5ffd83dbSDimitry Andric return V; 1951*5ffd83dbSDimitry Andric } 1952*5ffd83dbSDimitry Andric 1953*5ffd83dbSDimitry Andric /// replaceCongruentIVs - Check for congruent phis in this loop header and 1954*5ffd83dbSDimitry Andric /// replace them with their most canonical representative. Return the number of 1955*5ffd83dbSDimitry Andric /// phis eliminated. 1956*5ffd83dbSDimitry Andric /// 1957*5ffd83dbSDimitry Andric /// This does not depend on any SCEVExpander state but should be used in 1958*5ffd83dbSDimitry Andric /// the same context that SCEVExpander is used. 1959*5ffd83dbSDimitry Andric unsigned 1960*5ffd83dbSDimitry Andric SCEVExpander::replaceCongruentIVs(Loop *L, const DominatorTree *DT, 1961*5ffd83dbSDimitry Andric SmallVectorImpl<WeakTrackingVH> &DeadInsts, 1962*5ffd83dbSDimitry Andric const TargetTransformInfo *TTI) { 1963*5ffd83dbSDimitry Andric // Find integer phis in order of increasing width. 1964*5ffd83dbSDimitry Andric SmallVector<PHINode*, 8> Phis; 1965*5ffd83dbSDimitry Andric for (PHINode &PN : L->getHeader()->phis()) 1966*5ffd83dbSDimitry Andric Phis.push_back(&PN); 1967*5ffd83dbSDimitry Andric 1968*5ffd83dbSDimitry Andric if (TTI) 1969*5ffd83dbSDimitry Andric llvm::sort(Phis, [](Value *LHS, Value *RHS) { 1970*5ffd83dbSDimitry Andric // Put pointers at the back and make sure pointer < pointer = false. 1971*5ffd83dbSDimitry Andric if (!LHS->getType()->isIntegerTy() || !RHS->getType()->isIntegerTy()) 1972*5ffd83dbSDimitry Andric return RHS->getType()->isIntegerTy() && !LHS->getType()->isIntegerTy(); 1973*5ffd83dbSDimitry Andric return RHS->getType()->getPrimitiveSizeInBits() < 1974*5ffd83dbSDimitry Andric LHS->getType()->getPrimitiveSizeInBits(); 1975*5ffd83dbSDimitry Andric }); 1976*5ffd83dbSDimitry Andric 1977*5ffd83dbSDimitry Andric unsigned NumElim = 0; 1978*5ffd83dbSDimitry Andric DenseMap<const SCEV *, PHINode *> ExprToIVMap; 1979*5ffd83dbSDimitry Andric // Process phis from wide to narrow. Map wide phis to their truncation 1980*5ffd83dbSDimitry Andric // so narrow phis can reuse them. 1981*5ffd83dbSDimitry Andric for (PHINode *Phi : Phis) { 1982*5ffd83dbSDimitry Andric auto SimplifyPHINode = [&](PHINode *PN) -> Value * { 1983*5ffd83dbSDimitry Andric if (Value *V = SimplifyInstruction(PN, {DL, &SE.TLI, &SE.DT, &SE.AC})) 1984*5ffd83dbSDimitry Andric return V; 1985*5ffd83dbSDimitry Andric if (!SE.isSCEVable(PN->getType())) 1986*5ffd83dbSDimitry Andric return nullptr; 1987*5ffd83dbSDimitry Andric auto *Const = dyn_cast<SCEVConstant>(SE.getSCEV(PN)); 1988*5ffd83dbSDimitry Andric if (!Const) 1989*5ffd83dbSDimitry Andric return nullptr; 1990*5ffd83dbSDimitry Andric return Const->getValue(); 1991*5ffd83dbSDimitry Andric }; 1992*5ffd83dbSDimitry Andric 1993*5ffd83dbSDimitry Andric // Fold constant phis. They may be congruent to other constant phis and 1994*5ffd83dbSDimitry Andric // would confuse the logic below that expects proper IVs. 1995*5ffd83dbSDimitry Andric if (Value *V = SimplifyPHINode(Phi)) { 1996*5ffd83dbSDimitry Andric if (V->getType() != Phi->getType()) 1997*5ffd83dbSDimitry Andric continue; 1998*5ffd83dbSDimitry Andric Phi->replaceAllUsesWith(V); 1999*5ffd83dbSDimitry Andric DeadInsts.emplace_back(Phi); 2000*5ffd83dbSDimitry Andric ++NumElim; 2001*5ffd83dbSDimitry Andric DEBUG_WITH_TYPE(DebugType, dbgs() 2002*5ffd83dbSDimitry Andric << "INDVARS: Eliminated constant iv: " << *Phi << '\n'); 2003*5ffd83dbSDimitry Andric continue; 2004*5ffd83dbSDimitry Andric } 2005*5ffd83dbSDimitry Andric 2006*5ffd83dbSDimitry Andric if (!SE.isSCEVable(Phi->getType())) 2007*5ffd83dbSDimitry Andric continue; 2008*5ffd83dbSDimitry Andric 2009*5ffd83dbSDimitry Andric PHINode *&OrigPhiRef = ExprToIVMap[SE.getSCEV(Phi)]; 2010*5ffd83dbSDimitry Andric if (!OrigPhiRef) { 2011*5ffd83dbSDimitry Andric OrigPhiRef = Phi; 2012*5ffd83dbSDimitry Andric if (Phi->getType()->isIntegerTy() && TTI && 2013*5ffd83dbSDimitry Andric TTI->isTruncateFree(Phi->getType(), Phis.back()->getType())) { 2014*5ffd83dbSDimitry Andric // This phi can be freely truncated to the narrowest phi type. Map the 2015*5ffd83dbSDimitry Andric // truncated expression to it so it will be reused for narrow types. 2016*5ffd83dbSDimitry Andric const SCEV *TruncExpr = 2017*5ffd83dbSDimitry Andric SE.getTruncateExpr(SE.getSCEV(Phi), Phis.back()->getType()); 2018*5ffd83dbSDimitry Andric ExprToIVMap[TruncExpr] = Phi; 2019*5ffd83dbSDimitry Andric } 2020*5ffd83dbSDimitry Andric continue; 2021*5ffd83dbSDimitry Andric } 2022*5ffd83dbSDimitry Andric 2023*5ffd83dbSDimitry Andric // Replacing a pointer phi with an integer phi or vice-versa doesn't make 2024*5ffd83dbSDimitry Andric // sense. 2025*5ffd83dbSDimitry Andric if (OrigPhiRef->getType()->isPointerTy() != Phi->getType()->isPointerTy()) 2026*5ffd83dbSDimitry Andric continue; 2027*5ffd83dbSDimitry Andric 2028*5ffd83dbSDimitry Andric if (BasicBlock *LatchBlock = L->getLoopLatch()) { 2029*5ffd83dbSDimitry Andric Instruction *OrigInc = dyn_cast<Instruction>( 2030*5ffd83dbSDimitry Andric OrigPhiRef->getIncomingValueForBlock(LatchBlock)); 2031*5ffd83dbSDimitry Andric Instruction *IsomorphicInc = 2032*5ffd83dbSDimitry Andric dyn_cast<Instruction>(Phi->getIncomingValueForBlock(LatchBlock)); 2033*5ffd83dbSDimitry Andric 2034*5ffd83dbSDimitry Andric if (OrigInc && IsomorphicInc) { 2035*5ffd83dbSDimitry Andric // If this phi has the same width but is more canonical, replace the 2036*5ffd83dbSDimitry Andric // original with it. As part of the "more canonical" determination, 2037*5ffd83dbSDimitry Andric // respect a prior decision to use an IV chain. 2038*5ffd83dbSDimitry Andric if (OrigPhiRef->getType() == Phi->getType() && 2039*5ffd83dbSDimitry Andric !(ChainedPhis.count(Phi) || 2040*5ffd83dbSDimitry Andric isExpandedAddRecExprPHI(OrigPhiRef, OrigInc, L)) && 2041*5ffd83dbSDimitry Andric (ChainedPhis.count(Phi) || 2042*5ffd83dbSDimitry Andric isExpandedAddRecExprPHI(Phi, IsomorphicInc, L))) { 2043*5ffd83dbSDimitry Andric std::swap(OrigPhiRef, Phi); 2044*5ffd83dbSDimitry Andric std::swap(OrigInc, IsomorphicInc); 2045*5ffd83dbSDimitry Andric } 2046*5ffd83dbSDimitry Andric // Replacing the congruent phi is sufficient because acyclic 2047*5ffd83dbSDimitry Andric // redundancy elimination, CSE/GVN, should handle the 2048*5ffd83dbSDimitry Andric // rest. However, once SCEV proves that a phi is congruent, 2049*5ffd83dbSDimitry Andric // it's often the head of an IV user cycle that is isomorphic 2050*5ffd83dbSDimitry Andric // with the original phi. It's worth eagerly cleaning up the 2051*5ffd83dbSDimitry Andric // common case of a single IV increment so that DeleteDeadPHIs 2052*5ffd83dbSDimitry Andric // can remove cycles that had postinc uses. 2053*5ffd83dbSDimitry Andric const SCEV *TruncExpr = 2054*5ffd83dbSDimitry Andric SE.getTruncateOrNoop(SE.getSCEV(OrigInc), IsomorphicInc->getType()); 2055*5ffd83dbSDimitry Andric if (OrigInc != IsomorphicInc && 2056*5ffd83dbSDimitry Andric TruncExpr == SE.getSCEV(IsomorphicInc) && 2057*5ffd83dbSDimitry Andric SE.LI.replacementPreservesLCSSAForm(IsomorphicInc, OrigInc) && 2058*5ffd83dbSDimitry Andric hoistIVInc(OrigInc, IsomorphicInc)) { 2059*5ffd83dbSDimitry Andric DEBUG_WITH_TYPE(DebugType, 2060*5ffd83dbSDimitry Andric dbgs() << "INDVARS: Eliminated congruent iv.inc: " 2061*5ffd83dbSDimitry Andric << *IsomorphicInc << '\n'); 2062*5ffd83dbSDimitry Andric Value *NewInc = OrigInc; 2063*5ffd83dbSDimitry Andric if (OrigInc->getType() != IsomorphicInc->getType()) { 2064*5ffd83dbSDimitry Andric Instruction *IP = nullptr; 2065*5ffd83dbSDimitry Andric if (PHINode *PN = dyn_cast<PHINode>(OrigInc)) 2066*5ffd83dbSDimitry Andric IP = &*PN->getParent()->getFirstInsertionPt(); 2067*5ffd83dbSDimitry Andric else 2068*5ffd83dbSDimitry Andric IP = OrigInc->getNextNode(); 2069*5ffd83dbSDimitry Andric 2070*5ffd83dbSDimitry Andric IRBuilder<> Builder(IP); 2071*5ffd83dbSDimitry Andric Builder.SetCurrentDebugLocation(IsomorphicInc->getDebugLoc()); 2072*5ffd83dbSDimitry Andric NewInc = Builder.CreateTruncOrBitCast( 2073*5ffd83dbSDimitry Andric OrigInc, IsomorphicInc->getType(), IVName); 2074*5ffd83dbSDimitry Andric } 2075*5ffd83dbSDimitry Andric IsomorphicInc->replaceAllUsesWith(NewInc); 2076*5ffd83dbSDimitry Andric DeadInsts.emplace_back(IsomorphicInc); 2077*5ffd83dbSDimitry Andric } 2078*5ffd83dbSDimitry Andric } 2079*5ffd83dbSDimitry Andric } 2080*5ffd83dbSDimitry Andric DEBUG_WITH_TYPE(DebugType, dbgs() << "INDVARS: Eliminated congruent iv: " 2081*5ffd83dbSDimitry Andric << *Phi << '\n'); 2082*5ffd83dbSDimitry Andric ++NumElim; 2083*5ffd83dbSDimitry Andric Value *NewIV = OrigPhiRef; 2084*5ffd83dbSDimitry Andric if (OrigPhiRef->getType() != Phi->getType()) { 2085*5ffd83dbSDimitry Andric IRBuilder<> Builder(&*L->getHeader()->getFirstInsertionPt()); 2086*5ffd83dbSDimitry Andric Builder.SetCurrentDebugLocation(Phi->getDebugLoc()); 2087*5ffd83dbSDimitry Andric NewIV = Builder.CreateTruncOrBitCast(OrigPhiRef, Phi->getType(), IVName); 2088*5ffd83dbSDimitry Andric } 2089*5ffd83dbSDimitry Andric Phi->replaceAllUsesWith(NewIV); 2090*5ffd83dbSDimitry Andric DeadInsts.emplace_back(Phi); 2091*5ffd83dbSDimitry Andric } 2092*5ffd83dbSDimitry Andric return NumElim; 2093*5ffd83dbSDimitry Andric } 2094*5ffd83dbSDimitry Andric 2095*5ffd83dbSDimitry Andric Value *SCEVExpander::getExactExistingExpansion(const SCEV *S, 2096*5ffd83dbSDimitry Andric const Instruction *At, Loop *L) { 2097*5ffd83dbSDimitry Andric Optional<ScalarEvolution::ValueOffsetPair> VO = 2098*5ffd83dbSDimitry Andric getRelatedExistingExpansion(S, At, L); 2099*5ffd83dbSDimitry Andric if (VO && VO.getValue().second == nullptr) 2100*5ffd83dbSDimitry Andric return VO.getValue().first; 2101*5ffd83dbSDimitry Andric return nullptr; 2102*5ffd83dbSDimitry Andric } 2103*5ffd83dbSDimitry Andric 2104*5ffd83dbSDimitry Andric Optional<ScalarEvolution::ValueOffsetPair> 2105*5ffd83dbSDimitry Andric SCEVExpander::getRelatedExistingExpansion(const SCEV *S, const Instruction *At, 2106*5ffd83dbSDimitry Andric Loop *L) { 2107*5ffd83dbSDimitry Andric using namespace llvm::PatternMatch; 2108*5ffd83dbSDimitry Andric 2109*5ffd83dbSDimitry Andric SmallVector<BasicBlock *, 4> ExitingBlocks; 2110*5ffd83dbSDimitry Andric L->getExitingBlocks(ExitingBlocks); 2111*5ffd83dbSDimitry Andric 2112*5ffd83dbSDimitry Andric // Look for suitable value in simple conditions at the loop exits. 2113*5ffd83dbSDimitry Andric for (BasicBlock *BB : ExitingBlocks) { 2114*5ffd83dbSDimitry Andric ICmpInst::Predicate Pred; 2115*5ffd83dbSDimitry Andric Instruction *LHS, *RHS; 2116*5ffd83dbSDimitry Andric 2117*5ffd83dbSDimitry Andric if (!match(BB->getTerminator(), 2118*5ffd83dbSDimitry Andric m_Br(m_ICmp(Pred, m_Instruction(LHS), m_Instruction(RHS)), 2119*5ffd83dbSDimitry Andric m_BasicBlock(), m_BasicBlock()))) 2120*5ffd83dbSDimitry Andric continue; 2121*5ffd83dbSDimitry Andric 2122*5ffd83dbSDimitry Andric if (SE.getSCEV(LHS) == S && SE.DT.dominates(LHS, At)) 2123*5ffd83dbSDimitry Andric return ScalarEvolution::ValueOffsetPair(LHS, nullptr); 2124*5ffd83dbSDimitry Andric 2125*5ffd83dbSDimitry Andric if (SE.getSCEV(RHS) == S && SE.DT.dominates(RHS, At)) 2126*5ffd83dbSDimitry Andric return ScalarEvolution::ValueOffsetPair(RHS, nullptr); 2127*5ffd83dbSDimitry Andric } 2128*5ffd83dbSDimitry Andric 2129*5ffd83dbSDimitry Andric // Use expand's logic which is used for reusing a previous Value in 2130*5ffd83dbSDimitry Andric // ExprValueMap. 2131*5ffd83dbSDimitry Andric ScalarEvolution::ValueOffsetPair VO = FindValueInExprValueMap(S, At); 2132*5ffd83dbSDimitry Andric if (VO.first) 2133*5ffd83dbSDimitry Andric return VO; 2134*5ffd83dbSDimitry Andric 2135*5ffd83dbSDimitry Andric // There is potential to make this significantly smarter, but this simple 2136*5ffd83dbSDimitry Andric // heuristic already gets some interesting cases. 2137*5ffd83dbSDimitry Andric 2138*5ffd83dbSDimitry Andric // Can not find suitable value. 2139*5ffd83dbSDimitry Andric return None; 2140*5ffd83dbSDimitry Andric } 2141*5ffd83dbSDimitry Andric 2142*5ffd83dbSDimitry Andric bool SCEVExpander::isHighCostExpansionHelper( 2143*5ffd83dbSDimitry Andric const SCEV *S, Loop *L, const Instruction &At, int &BudgetRemaining, 2144*5ffd83dbSDimitry Andric const TargetTransformInfo &TTI, SmallPtrSetImpl<const SCEV *> &Processed, 2145*5ffd83dbSDimitry Andric SmallVectorImpl<const SCEV *> &Worklist) { 2146*5ffd83dbSDimitry Andric if (BudgetRemaining < 0) 2147*5ffd83dbSDimitry Andric return true; // Already run out of budget, give up. 2148*5ffd83dbSDimitry Andric 2149*5ffd83dbSDimitry Andric // Was the cost of expansion of this expression already accounted for? 2150*5ffd83dbSDimitry Andric if (!Processed.insert(S).second) 2151*5ffd83dbSDimitry Andric return false; // We have already accounted for this expression. 2152*5ffd83dbSDimitry Andric 2153*5ffd83dbSDimitry Andric // If we can find an existing value for this scev available at the point "At" 2154*5ffd83dbSDimitry Andric // then consider the expression cheap. 2155*5ffd83dbSDimitry Andric if (getRelatedExistingExpansion(S, &At, L)) 2156*5ffd83dbSDimitry Andric return false; // Consider the expression to be free. 2157*5ffd83dbSDimitry Andric 2158*5ffd83dbSDimitry Andric switch (S->getSCEVType()) { 2159*5ffd83dbSDimitry Andric case scUnknown: 2160*5ffd83dbSDimitry Andric case scConstant: 2161*5ffd83dbSDimitry Andric return false; // Assume to be zero-cost. 2162*5ffd83dbSDimitry Andric } 2163*5ffd83dbSDimitry Andric 2164*5ffd83dbSDimitry Andric TargetTransformInfo::TargetCostKind CostKind = 2165*5ffd83dbSDimitry Andric TargetTransformInfo::TCK_RecipThroughput; 2166*5ffd83dbSDimitry Andric 2167*5ffd83dbSDimitry Andric if (auto *CastExpr = dyn_cast<SCEVCastExpr>(S)) { 2168*5ffd83dbSDimitry Andric unsigned Opcode; 2169*5ffd83dbSDimitry Andric switch (S->getSCEVType()) { 2170*5ffd83dbSDimitry Andric case scTruncate: 2171*5ffd83dbSDimitry Andric Opcode = Instruction::Trunc; 2172*5ffd83dbSDimitry Andric break; 2173*5ffd83dbSDimitry Andric case scZeroExtend: 2174*5ffd83dbSDimitry Andric Opcode = Instruction::ZExt; 2175*5ffd83dbSDimitry Andric break; 2176*5ffd83dbSDimitry Andric case scSignExtend: 2177*5ffd83dbSDimitry Andric Opcode = Instruction::SExt; 2178*5ffd83dbSDimitry Andric break; 2179*5ffd83dbSDimitry Andric default: 2180*5ffd83dbSDimitry Andric llvm_unreachable("There are no other cast types."); 2181*5ffd83dbSDimitry Andric } 2182*5ffd83dbSDimitry Andric const SCEV *Op = CastExpr->getOperand(); 2183*5ffd83dbSDimitry Andric BudgetRemaining -= TTI.getCastInstrCost(Opcode, /*Dst=*/S->getType(), 2184*5ffd83dbSDimitry Andric /*Src=*/Op->getType(), CostKind); 2185*5ffd83dbSDimitry Andric Worklist.emplace_back(Op); 2186*5ffd83dbSDimitry Andric return false; // Will answer upon next entry into this function. 2187*5ffd83dbSDimitry Andric } 2188*5ffd83dbSDimitry Andric 2189*5ffd83dbSDimitry Andric if (auto *UDivExpr = dyn_cast<SCEVUDivExpr>(S)) { 2190*5ffd83dbSDimitry Andric // If the divisor is a power of two count this as a logical right-shift. 2191*5ffd83dbSDimitry Andric if (auto *SC = dyn_cast<SCEVConstant>(UDivExpr->getRHS())) { 2192*5ffd83dbSDimitry Andric if (SC->getAPInt().isPowerOf2()) { 2193*5ffd83dbSDimitry Andric BudgetRemaining -= 2194*5ffd83dbSDimitry Andric TTI.getArithmeticInstrCost(Instruction::LShr, S->getType(), 2195*5ffd83dbSDimitry Andric CostKind); 2196*5ffd83dbSDimitry Andric // Note that we don't count the cost of RHS, because it is a constant, 2197*5ffd83dbSDimitry Andric // and we consider those to be free. But if that changes, we would need 2198*5ffd83dbSDimitry Andric // to log2() it first before calling isHighCostExpansionHelper(). 2199*5ffd83dbSDimitry Andric Worklist.emplace_back(UDivExpr->getLHS()); 2200*5ffd83dbSDimitry Andric return false; // Will answer upon next entry into this function. 2201*5ffd83dbSDimitry Andric } 2202*5ffd83dbSDimitry Andric } 2203*5ffd83dbSDimitry Andric 2204*5ffd83dbSDimitry Andric // UDivExpr is very likely a UDiv that ScalarEvolution's HowFarToZero or 2205*5ffd83dbSDimitry Andric // HowManyLessThans produced to compute a precise expression, rather than a 2206*5ffd83dbSDimitry Andric // UDiv from the user's code. If we can't find a UDiv in the code with some 2207*5ffd83dbSDimitry Andric // simple searching, we need to account for it's cost. 2208*5ffd83dbSDimitry Andric 2209*5ffd83dbSDimitry Andric // At the beginning of this function we already tried to find existing 2210*5ffd83dbSDimitry Andric // value for plain 'S'. Now try to lookup 'S + 1' since it is common 2211*5ffd83dbSDimitry Andric // pattern involving division. This is just a simple search heuristic. 2212*5ffd83dbSDimitry Andric if (getRelatedExistingExpansion( 2213*5ffd83dbSDimitry Andric SE.getAddExpr(S, SE.getConstant(S->getType(), 1)), &At, L)) 2214*5ffd83dbSDimitry Andric return false; // Consider it to be free. 2215*5ffd83dbSDimitry Andric 2216*5ffd83dbSDimitry Andric // Need to count the cost of this UDiv. 2217*5ffd83dbSDimitry Andric BudgetRemaining -= 2218*5ffd83dbSDimitry Andric TTI.getArithmeticInstrCost(Instruction::UDiv, S->getType(), 2219*5ffd83dbSDimitry Andric CostKind); 2220*5ffd83dbSDimitry Andric Worklist.insert(Worklist.end(), {UDivExpr->getLHS(), UDivExpr->getRHS()}); 2221*5ffd83dbSDimitry Andric return false; // Will answer upon next entry into this function. 2222*5ffd83dbSDimitry Andric } 2223*5ffd83dbSDimitry Andric 2224*5ffd83dbSDimitry Andric if (const auto *NAry = dyn_cast<SCEVAddRecExpr>(S)) { 2225*5ffd83dbSDimitry Andric Type *OpType = NAry->getType(); 2226*5ffd83dbSDimitry Andric 2227*5ffd83dbSDimitry Andric assert(NAry->getNumOperands() >= 2 && 2228*5ffd83dbSDimitry Andric "Polynomial should be at least linear"); 2229*5ffd83dbSDimitry Andric 2230*5ffd83dbSDimitry Andric int AddCost = 2231*5ffd83dbSDimitry Andric TTI.getArithmeticInstrCost(Instruction::Add, OpType, CostKind); 2232*5ffd83dbSDimitry Andric int MulCost = 2233*5ffd83dbSDimitry Andric TTI.getArithmeticInstrCost(Instruction::Mul, OpType, CostKind); 2234*5ffd83dbSDimitry Andric 2235*5ffd83dbSDimitry Andric // In this polynominal, we may have some zero operands, and we shouldn't 2236*5ffd83dbSDimitry Andric // really charge for those. So how many non-zero coeffients are there? 2237*5ffd83dbSDimitry Andric int NumTerms = llvm::count_if(NAry->operands(), 2238*5ffd83dbSDimitry Andric [](const SCEV *S) { return !S->isZero(); }); 2239*5ffd83dbSDimitry Andric assert(NumTerms >= 1 && "Polynominal should have at least one term."); 2240*5ffd83dbSDimitry Andric assert(!(*std::prev(NAry->operands().end()))->isZero() && 2241*5ffd83dbSDimitry Andric "Last operand should not be zero"); 2242*5ffd83dbSDimitry Andric 2243*5ffd83dbSDimitry Andric // Much like with normal add expr, the polynominal will require 2244*5ffd83dbSDimitry Andric // one less addition than the number of it's terms. 2245*5ffd83dbSDimitry Andric BudgetRemaining -= AddCost * (NumTerms - 1); 2246*5ffd83dbSDimitry Andric if (BudgetRemaining < 0) 2247*5ffd83dbSDimitry Andric return true; 2248*5ffd83dbSDimitry Andric 2249*5ffd83dbSDimitry Andric // Ignoring constant term (operand 0), how many of the coeffients are u> 1? 2250*5ffd83dbSDimitry Andric int NumNonZeroDegreeNonOneTerms = 2251*5ffd83dbSDimitry Andric llvm::count_if(make_range(std::next(NAry->op_begin()), NAry->op_end()), 2252*5ffd83dbSDimitry Andric [](const SCEV *S) { 2253*5ffd83dbSDimitry Andric auto *SConst = dyn_cast<SCEVConstant>(S); 2254*5ffd83dbSDimitry Andric return !SConst || SConst->getAPInt().ugt(1); 2255*5ffd83dbSDimitry Andric }); 2256*5ffd83dbSDimitry Andric // Here, *each* one of those will require a multiplication. 2257*5ffd83dbSDimitry Andric BudgetRemaining -= MulCost * NumNonZeroDegreeNonOneTerms; 2258*5ffd83dbSDimitry Andric if (BudgetRemaining < 0) 2259*5ffd83dbSDimitry Andric return true; 2260*5ffd83dbSDimitry Andric 2261*5ffd83dbSDimitry Andric // What is the degree of this polynominal? 2262*5ffd83dbSDimitry Andric int PolyDegree = NAry->getNumOperands() - 1; 2263*5ffd83dbSDimitry Andric assert(PolyDegree >= 1 && "Should be at least affine."); 2264*5ffd83dbSDimitry Andric 2265*5ffd83dbSDimitry Andric // The final term will be: 2266*5ffd83dbSDimitry Andric // Op_{PolyDegree} * x ^ {PolyDegree} 2267*5ffd83dbSDimitry Andric // Where x ^ {PolyDegree} will again require PolyDegree-1 mul operations. 2268*5ffd83dbSDimitry Andric // Note that x ^ {PolyDegree} = x * x ^ {PolyDegree-1} so charging for 2269*5ffd83dbSDimitry Andric // x ^ {PolyDegree} will give us x ^ {2} .. x ^ {PolyDegree-1} for free. 2270*5ffd83dbSDimitry Andric // FIXME: this is conservatively correct, but might be overly pessimistic. 2271*5ffd83dbSDimitry Andric BudgetRemaining -= MulCost * (PolyDegree - 1); 2272*5ffd83dbSDimitry Andric if (BudgetRemaining < 0) 2273*5ffd83dbSDimitry Andric return true; 2274*5ffd83dbSDimitry Andric 2275*5ffd83dbSDimitry Andric // And finally, the operands themselves should fit within the budget. 2276*5ffd83dbSDimitry Andric Worklist.insert(Worklist.end(), NAry->operands().begin(), 2277*5ffd83dbSDimitry Andric NAry->operands().end()); 2278*5ffd83dbSDimitry Andric return false; // So far so good, though ops may be too costly? 2279*5ffd83dbSDimitry Andric } 2280*5ffd83dbSDimitry Andric 2281*5ffd83dbSDimitry Andric if (const SCEVNAryExpr *NAry = dyn_cast<SCEVNAryExpr>(S)) { 2282*5ffd83dbSDimitry Andric Type *OpType = NAry->getType(); 2283*5ffd83dbSDimitry Andric 2284*5ffd83dbSDimitry Andric int PairCost; 2285*5ffd83dbSDimitry Andric switch (S->getSCEVType()) { 2286*5ffd83dbSDimitry Andric case scAddExpr: 2287*5ffd83dbSDimitry Andric PairCost = 2288*5ffd83dbSDimitry Andric TTI.getArithmeticInstrCost(Instruction::Add, OpType, CostKind); 2289*5ffd83dbSDimitry Andric break; 2290*5ffd83dbSDimitry Andric case scMulExpr: 2291*5ffd83dbSDimitry Andric // TODO: this is a very pessimistic cost modelling for Mul, 2292*5ffd83dbSDimitry Andric // because of Bin Pow algorithm actually used by the expander, 2293*5ffd83dbSDimitry Andric // see SCEVExpander::visitMulExpr(), ExpandOpBinPowN(). 2294*5ffd83dbSDimitry Andric PairCost = 2295*5ffd83dbSDimitry Andric TTI.getArithmeticInstrCost(Instruction::Mul, OpType, CostKind); 2296*5ffd83dbSDimitry Andric break; 2297*5ffd83dbSDimitry Andric case scSMaxExpr: 2298*5ffd83dbSDimitry Andric case scUMaxExpr: 2299*5ffd83dbSDimitry Andric case scSMinExpr: 2300*5ffd83dbSDimitry Andric case scUMinExpr: 2301*5ffd83dbSDimitry Andric PairCost = TTI.getCmpSelInstrCost(Instruction::ICmp, OpType, 2302*5ffd83dbSDimitry Andric CmpInst::makeCmpResultType(OpType), 2303*5ffd83dbSDimitry Andric CostKind) + 2304*5ffd83dbSDimitry Andric TTI.getCmpSelInstrCost(Instruction::Select, OpType, 2305*5ffd83dbSDimitry Andric CmpInst::makeCmpResultType(OpType), 2306*5ffd83dbSDimitry Andric CostKind); 2307*5ffd83dbSDimitry Andric break; 2308*5ffd83dbSDimitry Andric default: 2309*5ffd83dbSDimitry Andric llvm_unreachable("There are no other variants here."); 2310*5ffd83dbSDimitry Andric } 2311*5ffd83dbSDimitry Andric 2312*5ffd83dbSDimitry Andric assert(NAry->getNumOperands() > 1 && 2313*5ffd83dbSDimitry Andric "Nary expr should have more than 1 operand."); 2314*5ffd83dbSDimitry Andric // The simple nary expr will require one less op (or pair of ops) 2315*5ffd83dbSDimitry Andric // than the number of it's terms. 2316*5ffd83dbSDimitry Andric BudgetRemaining -= PairCost * (NAry->getNumOperands() - 1); 2317*5ffd83dbSDimitry Andric if (BudgetRemaining < 0) 2318*5ffd83dbSDimitry Andric return true; 2319*5ffd83dbSDimitry Andric 2320*5ffd83dbSDimitry Andric // And finally, the operands themselves should fit within the budget. 2321*5ffd83dbSDimitry Andric Worklist.insert(Worklist.end(), NAry->operands().begin(), 2322*5ffd83dbSDimitry Andric NAry->operands().end()); 2323*5ffd83dbSDimitry Andric return false; // So far so good, though ops may be too costly? 2324*5ffd83dbSDimitry Andric } 2325*5ffd83dbSDimitry Andric 2326*5ffd83dbSDimitry Andric llvm_unreachable("No other scev expressions possible."); 2327*5ffd83dbSDimitry Andric } 2328*5ffd83dbSDimitry Andric 2329*5ffd83dbSDimitry Andric Value *SCEVExpander::expandCodeForPredicate(const SCEVPredicate *Pred, 2330*5ffd83dbSDimitry Andric Instruction *IP) { 2331*5ffd83dbSDimitry Andric assert(IP); 2332*5ffd83dbSDimitry Andric switch (Pred->getKind()) { 2333*5ffd83dbSDimitry Andric case SCEVPredicate::P_Union: 2334*5ffd83dbSDimitry Andric return expandUnionPredicate(cast<SCEVUnionPredicate>(Pred), IP); 2335*5ffd83dbSDimitry Andric case SCEVPredicate::P_Equal: 2336*5ffd83dbSDimitry Andric return expandEqualPredicate(cast<SCEVEqualPredicate>(Pred), IP); 2337*5ffd83dbSDimitry Andric case SCEVPredicate::P_Wrap: { 2338*5ffd83dbSDimitry Andric auto *AddRecPred = cast<SCEVWrapPredicate>(Pred); 2339*5ffd83dbSDimitry Andric return expandWrapPredicate(AddRecPred, IP); 2340*5ffd83dbSDimitry Andric } 2341*5ffd83dbSDimitry Andric } 2342*5ffd83dbSDimitry Andric llvm_unreachable("Unknown SCEV predicate type"); 2343*5ffd83dbSDimitry Andric } 2344*5ffd83dbSDimitry Andric 2345*5ffd83dbSDimitry Andric Value *SCEVExpander::expandEqualPredicate(const SCEVEqualPredicate *Pred, 2346*5ffd83dbSDimitry Andric Instruction *IP) { 2347*5ffd83dbSDimitry Andric Value *Expr0 = expandCodeFor(Pred->getLHS(), Pred->getLHS()->getType(), IP); 2348*5ffd83dbSDimitry Andric Value *Expr1 = expandCodeFor(Pred->getRHS(), Pred->getRHS()->getType(), IP); 2349*5ffd83dbSDimitry Andric 2350*5ffd83dbSDimitry Andric Builder.SetInsertPoint(IP); 2351*5ffd83dbSDimitry Andric auto *I = Builder.CreateICmpNE(Expr0, Expr1, "ident.check"); 2352*5ffd83dbSDimitry Andric return I; 2353*5ffd83dbSDimitry Andric } 2354*5ffd83dbSDimitry Andric 2355*5ffd83dbSDimitry Andric Value *SCEVExpander::generateOverflowCheck(const SCEVAddRecExpr *AR, 2356*5ffd83dbSDimitry Andric Instruction *Loc, bool Signed) { 2357*5ffd83dbSDimitry Andric assert(AR->isAffine() && "Cannot generate RT check for " 2358*5ffd83dbSDimitry Andric "non-affine expression"); 2359*5ffd83dbSDimitry Andric 2360*5ffd83dbSDimitry Andric SCEVUnionPredicate Pred; 2361*5ffd83dbSDimitry Andric const SCEV *ExitCount = 2362*5ffd83dbSDimitry Andric SE.getPredicatedBackedgeTakenCount(AR->getLoop(), Pred); 2363*5ffd83dbSDimitry Andric 2364*5ffd83dbSDimitry Andric assert(ExitCount != SE.getCouldNotCompute() && "Invalid loop count"); 2365*5ffd83dbSDimitry Andric 2366*5ffd83dbSDimitry Andric const SCEV *Step = AR->getStepRecurrence(SE); 2367*5ffd83dbSDimitry Andric const SCEV *Start = AR->getStart(); 2368*5ffd83dbSDimitry Andric 2369*5ffd83dbSDimitry Andric Type *ARTy = AR->getType(); 2370*5ffd83dbSDimitry Andric unsigned SrcBits = SE.getTypeSizeInBits(ExitCount->getType()); 2371*5ffd83dbSDimitry Andric unsigned DstBits = SE.getTypeSizeInBits(ARTy); 2372*5ffd83dbSDimitry Andric 2373*5ffd83dbSDimitry Andric // The expression {Start,+,Step} has nusw/nssw if 2374*5ffd83dbSDimitry Andric // Step < 0, Start - |Step| * Backedge <= Start 2375*5ffd83dbSDimitry Andric // Step >= 0, Start + |Step| * Backedge > Start 2376*5ffd83dbSDimitry Andric // and |Step| * Backedge doesn't unsigned overflow. 2377*5ffd83dbSDimitry Andric 2378*5ffd83dbSDimitry Andric IntegerType *CountTy = IntegerType::get(Loc->getContext(), SrcBits); 2379*5ffd83dbSDimitry Andric Builder.SetInsertPoint(Loc); 2380*5ffd83dbSDimitry Andric Value *TripCountVal = expandCodeFor(ExitCount, CountTy, Loc); 2381*5ffd83dbSDimitry Andric 2382*5ffd83dbSDimitry Andric IntegerType *Ty = 2383*5ffd83dbSDimitry Andric IntegerType::get(Loc->getContext(), SE.getTypeSizeInBits(ARTy)); 2384*5ffd83dbSDimitry Andric Type *ARExpandTy = DL.isNonIntegralPointerType(ARTy) ? ARTy : Ty; 2385*5ffd83dbSDimitry Andric 2386*5ffd83dbSDimitry Andric Value *StepValue = expandCodeFor(Step, Ty, Loc); 2387*5ffd83dbSDimitry Andric Value *NegStepValue = expandCodeFor(SE.getNegativeSCEV(Step), Ty, Loc); 2388*5ffd83dbSDimitry Andric Value *StartValue = expandCodeFor(Start, ARExpandTy, Loc); 2389*5ffd83dbSDimitry Andric 2390*5ffd83dbSDimitry Andric ConstantInt *Zero = 2391*5ffd83dbSDimitry Andric ConstantInt::get(Loc->getContext(), APInt::getNullValue(DstBits)); 2392*5ffd83dbSDimitry Andric 2393*5ffd83dbSDimitry Andric Builder.SetInsertPoint(Loc); 2394*5ffd83dbSDimitry Andric // Compute |Step| 2395*5ffd83dbSDimitry Andric Value *StepCompare = Builder.CreateICmp(ICmpInst::ICMP_SLT, StepValue, Zero); 2396*5ffd83dbSDimitry Andric Value *AbsStep = Builder.CreateSelect(StepCompare, NegStepValue, StepValue); 2397*5ffd83dbSDimitry Andric 2398*5ffd83dbSDimitry Andric // Get the backedge taken count and truncate or extended to the AR type. 2399*5ffd83dbSDimitry Andric Value *TruncTripCount = Builder.CreateZExtOrTrunc(TripCountVal, Ty); 2400*5ffd83dbSDimitry Andric auto *MulF = Intrinsic::getDeclaration(Loc->getModule(), 2401*5ffd83dbSDimitry Andric Intrinsic::umul_with_overflow, Ty); 2402*5ffd83dbSDimitry Andric 2403*5ffd83dbSDimitry Andric // Compute |Step| * Backedge 2404*5ffd83dbSDimitry Andric CallInst *Mul = Builder.CreateCall(MulF, {AbsStep, TruncTripCount}, "mul"); 2405*5ffd83dbSDimitry Andric Value *MulV = Builder.CreateExtractValue(Mul, 0, "mul.result"); 2406*5ffd83dbSDimitry Andric Value *OfMul = Builder.CreateExtractValue(Mul, 1, "mul.overflow"); 2407*5ffd83dbSDimitry Andric 2408*5ffd83dbSDimitry Andric // Compute: 2409*5ffd83dbSDimitry Andric // Start + |Step| * Backedge < Start 2410*5ffd83dbSDimitry Andric // Start - |Step| * Backedge > Start 2411*5ffd83dbSDimitry Andric Value *Add = nullptr, *Sub = nullptr; 2412*5ffd83dbSDimitry Andric if (PointerType *ARPtrTy = dyn_cast<PointerType>(ARExpandTy)) { 2413*5ffd83dbSDimitry Andric const SCEV *MulS = SE.getSCEV(MulV); 2414*5ffd83dbSDimitry Andric const SCEV *NegMulS = SE.getNegativeSCEV(MulS); 2415*5ffd83dbSDimitry Andric Add = Builder.CreateBitCast(expandAddToGEP(MulS, ARPtrTy, Ty, StartValue), 2416*5ffd83dbSDimitry Andric ARPtrTy); 2417*5ffd83dbSDimitry Andric Sub = Builder.CreateBitCast( 2418*5ffd83dbSDimitry Andric expandAddToGEP(NegMulS, ARPtrTy, Ty, StartValue), ARPtrTy); 2419*5ffd83dbSDimitry Andric } else { 2420*5ffd83dbSDimitry Andric Add = Builder.CreateAdd(StartValue, MulV); 2421*5ffd83dbSDimitry Andric Sub = Builder.CreateSub(StartValue, MulV); 2422*5ffd83dbSDimitry Andric } 2423*5ffd83dbSDimitry Andric 2424*5ffd83dbSDimitry Andric Value *EndCompareGT = Builder.CreateICmp( 2425*5ffd83dbSDimitry Andric Signed ? ICmpInst::ICMP_SGT : ICmpInst::ICMP_UGT, Sub, StartValue); 2426*5ffd83dbSDimitry Andric 2427*5ffd83dbSDimitry Andric Value *EndCompareLT = Builder.CreateICmp( 2428*5ffd83dbSDimitry Andric Signed ? ICmpInst::ICMP_SLT : ICmpInst::ICMP_ULT, Add, StartValue); 2429*5ffd83dbSDimitry Andric 2430*5ffd83dbSDimitry Andric // Select the answer based on the sign of Step. 2431*5ffd83dbSDimitry Andric Value *EndCheck = 2432*5ffd83dbSDimitry Andric Builder.CreateSelect(StepCompare, EndCompareGT, EndCompareLT); 2433*5ffd83dbSDimitry Andric 2434*5ffd83dbSDimitry Andric // If the backedge taken count type is larger than the AR type, 2435*5ffd83dbSDimitry Andric // check that we don't drop any bits by truncating it. If we are 2436*5ffd83dbSDimitry Andric // dropping bits, then we have overflow (unless the step is zero). 2437*5ffd83dbSDimitry Andric if (SE.getTypeSizeInBits(CountTy) > SE.getTypeSizeInBits(Ty)) { 2438*5ffd83dbSDimitry Andric auto MaxVal = APInt::getMaxValue(DstBits).zext(SrcBits); 2439*5ffd83dbSDimitry Andric auto *BackedgeCheck = 2440*5ffd83dbSDimitry Andric Builder.CreateICmp(ICmpInst::ICMP_UGT, TripCountVal, 2441*5ffd83dbSDimitry Andric ConstantInt::get(Loc->getContext(), MaxVal)); 2442*5ffd83dbSDimitry Andric BackedgeCheck = Builder.CreateAnd( 2443*5ffd83dbSDimitry Andric BackedgeCheck, Builder.CreateICmp(ICmpInst::ICMP_NE, StepValue, Zero)); 2444*5ffd83dbSDimitry Andric 2445*5ffd83dbSDimitry Andric EndCheck = Builder.CreateOr(EndCheck, BackedgeCheck); 2446*5ffd83dbSDimitry Andric } 2447*5ffd83dbSDimitry Andric 2448*5ffd83dbSDimitry Andric EndCheck = Builder.CreateOr(EndCheck, OfMul); 2449*5ffd83dbSDimitry Andric return EndCheck; 2450*5ffd83dbSDimitry Andric } 2451*5ffd83dbSDimitry Andric 2452*5ffd83dbSDimitry Andric Value *SCEVExpander::expandWrapPredicate(const SCEVWrapPredicate *Pred, 2453*5ffd83dbSDimitry Andric Instruction *IP) { 2454*5ffd83dbSDimitry Andric const auto *A = cast<SCEVAddRecExpr>(Pred->getExpr()); 2455*5ffd83dbSDimitry Andric Value *NSSWCheck = nullptr, *NUSWCheck = nullptr; 2456*5ffd83dbSDimitry Andric 2457*5ffd83dbSDimitry Andric // Add a check for NUSW 2458*5ffd83dbSDimitry Andric if (Pred->getFlags() & SCEVWrapPredicate::IncrementNUSW) 2459*5ffd83dbSDimitry Andric NUSWCheck = generateOverflowCheck(A, IP, false); 2460*5ffd83dbSDimitry Andric 2461*5ffd83dbSDimitry Andric // Add a check for NSSW 2462*5ffd83dbSDimitry Andric if (Pred->getFlags() & SCEVWrapPredicate::IncrementNSSW) 2463*5ffd83dbSDimitry Andric NSSWCheck = generateOverflowCheck(A, IP, true); 2464*5ffd83dbSDimitry Andric 2465*5ffd83dbSDimitry Andric if (NUSWCheck && NSSWCheck) 2466*5ffd83dbSDimitry Andric return Builder.CreateOr(NUSWCheck, NSSWCheck); 2467*5ffd83dbSDimitry Andric 2468*5ffd83dbSDimitry Andric if (NUSWCheck) 2469*5ffd83dbSDimitry Andric return NUSWCheck; 2470*5ffd83dbSDimitry Andric 2471*5ffd83dbSDimitry Andric if (NSSWCheck) 2472*5ffd83dbSDimitry Andric return NSSWCheck; 2473*5ffd83dbSDimitry Andric 2474*5ffd83dbSDimitry Andric return ConstantInt::getFalse(IP->getContext()); 2475*5ffd83dbSDimitry Andric } 2476*5ffd83dbSDimitry Andric 2477*5ffd83dbSDimitry Andric Value *SCEVExpander::expandUnionPredicate(const SCEVUnionPredicate *Union, 2478*5ffd83dbSDimitry Andric Instruction *IP) { 2479*5ffd83dbSDimitry Andric auto *BoolType = IntegerType::get(IP->getContext(), 1); 2480*5ffd83dbSDimitry Andric Value *Check = ConstantInt::getNullValue(BoolType); 2481*5ffd83dbSDimitry Andric 2482*5ffd83dbSDimitry Andric // Loop over all checks in this set. 2483*5ffd83dbSDimitry Andric for (auto Pred : Union->getPredicates()) { 2484*5ffd83dbSDimitry Andric auto *NextCheck = expandCodeForPredicate(Pred, IP); 2485*5ffd83dbSDimitry Andric Builder.SetInsertPoint(IP); 2486*5ffd83dbSDimitry Andric Check = Builder.CreateOr(Check, NextCheck); 2487*5ffd83dbSDimitry Andric } 2488*5ffd83dbSDimitry Andric 2489*5ffd83dbSDimitry Andric return Check; 2490*5ffd83dbSDimitry Andric } 2491*5ffd83dbSDimitry Andric 2492*5ffd83dbSDimitry Andric namespace { 2493*5ffd83dbSDimitry Andric // Search for a SCEV subexpression that is not safe to expand. Any expression 2494*5ffd83dbSDimitry Andric // that may expand to a !isSafeToSpeculativelyExecute value is unsafe, namely 2495*5ffd83dbSDimitry Andric // UDiv expressions. We don't know if the UDiv is derived from an IR divide 2496*5ffd83dbSDimitry Andric // instruction, but the important thing is that we prove the denominator is 2497*5ffd83dbSDimitry Andric // nonzero before expansion. 2498*5ffd83dbSDimitry Andric // 2499*5ffd83dbSDimitry Andric // IVUsers already checks that IV-derived expressions are safe. So this check is 2500*5ffd83dbSDimitry Andric // only needed when the expression includes some subexpression that is not IV 2501*5ffd83dbSDimitry Andric // derived. 2502*5ffd83dbSDimitry Andric // 2503*5ffd83dbSDimitry Andric // Currently, we only allow division by a nonzero constant here. If this is 2504*5ffd83dbSDimitry Andric // inadequate, we could easily allow division by SCEVUnknown by using 2505*5ffd83dbSDimitry Andric // ValueTracking to check isKnownNonZero(). 2506*5ffd83dbSDimitry Andric // 2507*5ffd83dbSDimitry Andric // We cannot generally expand recurrences unless the step dominates the loop 2508*5ffd83dbSDimitry Andric // header. The expander handles the special case of affine recurrences by 2509*5ffd83dbSDimitry Andric // scaling the recurrence outside the loop, but this technique isn't generally 2510*5ffd83dbSDimitry Andric // applicable. Expanding a nested recurrence outside a loop requires computing 2511*5ffd83dbSDimitry Andric // binomial coefficients. This could be done, but the recurrence has to be in a 2512*5ffd83dbSDimitry Andric // perfectly reduced form, which can't be guaranteed. 2513*5ffd83dbSDimitry Andric struct SCEVFindUnsafe { 2514*5ffd83dbSDimitry Andric ScalarEvolution &SE; 2515*5ffd83dbSDimitry Andric bool IsUnsafe; 2516*5ffd83dbSDimitry Andric 2517*5ffd83dbSDimitry Andric SCEVFindUnsafe(ScalarEvolution &se): SE(se), IsUnsafe(false) {} 2518*5ffd83dbSDimitry Andric 2519*5ffd83dbSDimitry Andric bool follow(const SCEV *S) { 2520*5ffd83dbSDimitry Andric if (const SCEVUDivExpr *D = dyn_cast<SCEVUDivExpr>(S)) { 2521*5ffd83dbSDimitry Andric const SCEVConstant *SC = dyn_cast<SCEVConstant>(D->getRHS()); 2522*5ffd83dbSDimitry Andric if (!SC || SC->getValue()->isZero()) { 2523*5ffd83dbSDimitry Andric IsUnsafe = true; 2524*5ffd83dbSDimitry Andric return false; 2525*5ffd83dbSDimitry Andric } 2526*5ffd83dbSDimitry Andric } 2527*5ffd83dbSDimitry Andric if (const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(S)) { 2528*5ffd83dbSDimitry Andric const SCEV *Step = AR->getStepRecurrence(SE); 2529*5ffd83dbSDimitry Andric if (!AR->isAffine() && !SE.dominates(Step, AR->getLoop()->getHeader())) { 2530*5ffd83dbSDimitry Andric IsUnsafe = true; 2531*5ffd83dbSDimitry Andric return false; 2532*5ffd83dbSDimitry Andric } 2533*5ffd83dbSDimitry Andric } 2534*5ffd83dbSDimitry Andric return true; 2535*5ffd83dbSDimitry Andric } 2536*5ffd83dbSDimitry Andric bool isDone() const { return IsUnsafe; } 2537*5ffd83dbSDimitry Andric }; 2538*5ffd83dbSDimitry Andric } 2539*5ffd83dbSDimitry Andric 2540*5ffd83dbSDimitry Andric namespace llvm { 2541*5ffd83dbSDimitry Andric bool isSafeToExpand(const SCEV *S, ScalarEvolution &SE) { 2542*5ffd83dbSDimitry Andric SCEVFindUnsafe Search(SE); 2543*5ffd83dbSDimitry Andric visitAll(S, Search); 2544*5ffd83dbSDimitry Andric return !Search.IsUnsafe; 2545*5ffd83dbSDimitry Andric } 2546*5ffd83dbSDimitry Andric 2547*5ffd83dbSDimitry Andric bool isSafeToExpandAt(const SCEV *S, const Instruction *InsertionPoint, 2548*5ffd83dbSDimitry Andric ScalarEvolution &SE) { 2549*5ffd83dbSDimitry Andric if (!isSafeToExpand(S, SE)) 2550*5ffd83dbSDimitry Andric return false; 2551*5ffd83dbSDimitry Andric // We have to prove that the expanded site of S dominates InsertionPoint. 2552*5ffd83dbSDimitry Andric // This is easy when not in the same block, but hard when S is an instruction 2553*5ffd83dbSDimitry Andric // to be expanded somewhere inside the same block as our insertion point. 2554*5ffd83dbSDimitry Andric // What we really need here is something analogous to an OrderedBasicBlock, 2555*5ffd83dbSDimitry Andric // but for the moment, we paper over the problem by handling two common and 2556*5ffd83dbSDimitry Andric // cheap to check cases. 2557*5ffd83dbSDimitry Andric if (SE.properlyDominates(S, InsertionPoint->getParent())) 2558*5ffd83dbSDimitry Andric return true; 2559*5ffd83dbSDimitry Andric if (SE.dominates(S, InsertionPoint->getParent())) { 2560*5ffd83dbSDimitry Andric if (InsertionPoint->getParent()->getTerminator() == InsertionPoint) 2561*5ffd83dbSDimitry Andric return true; 2562*5ffd83dbSDimitry Andric if (const SCEVUnknown *U = dyn_cast<SCEVUnknown>(S)) 2563*5ffd83dbSDimitry Andric for (const Value *V : InsertionPoint->operand_values()) 2564*5ffd83dbSDimitry Andric if (V == U->getValue()) 2565*5ffd83dbSDimitry Andric return true; 2566*5ffd83dbSDimitry Andric } 2567*5ffd83dbSDimitry Andric return false; 2568*5ffd83dbSDimitry Andric } 2569*5ffd83dbSDimitry Andric } 2570