1 //===- ScalarEvolutionExpander.cpp - Scalar Evolution Analysis ------------===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 // 9 // This file contains the implementation of the scalar evolution expander, 10 // which is used to generate the code corresponding to a given scalar evolution 11 // expression. 12 // 13 //===----------------------------------------------------------------------===// 14 15 #include "llvm/Transforms/Utils/ScalarEvolutionExpander.h" 16 #include "llvm/ADT/STLExtras.h" 17 #include "llvm/ADT/SmallSet.h" 18 #include "llvm/Analysis/InstructionSimplify.h" 19 #include "llvm/Analysis/LoopInfo.h" 20 #include "llvm/Analysis/TargetTransformInfo.h" 21 #include "llvm/IR/DataLayout.h" 22 #include "llvm/IR/Dominators.h" 23 #include "llvm/IR/IntrinsicInst.h" 24 #include "llvm/IR/LLVMContext.h" 25 #include "llvm/IR/Module.h" 26 #include "llvm/IR/PatternMatch.h" 27 #include "llvm/Support/CommandLine.h" 28 #include "llvm/Support/Debug.h" 29 #include "llvm/Support/raw_ostream.h" 30 #include "llvm/Transforms/Utils/LoopUtils.h" 31 32 using namespace llvm; 33 34 cl::opt<unsigned> llvm::SCEVCheapExpansionBudget( 35 "scev-cheap-expansion-budget", cl::Hidden, cl::init(4), 36 cl::desc("When performing SCEV expansion only if it is cheap to do, this " 37 "controls the budget that is considered cheap (default = 4)")); 38 39 using namespace PatternMatch; 40 41 /// ReuseOrCreateCast - Arrange for there to be a cast of V to Ty at IP, 42 /// reusing an existing cast if a suitable one exists, moving an existing 43 /// cast if a suitable one exists but isn't in the right place, or 44 /// creating a new one. 45 Value *SCEVExpander::ReuseOrCreateCast(Value *V, Type *Ty, 46 Instruction::CastOps Op, 47 BasicBlock::iterator IP) { 48 // This function must be called with the builder having a valid insertion 49 // point. It doesn't need to be the actual IP where the uses of the returned 50 // cast will be added, but it must dominate such IP. 51 // We use this precondition to produce a cast that will dominate all its 52 // uses. In particular, this is crucial for the case where the builder's 53 // insertion point *is* the point where we were asked to put the cast. 54 // Since we don't know the builder's insertion point is actually 55 // where the uses will be added (only that it dominates it), we are 56 // not allowed to move it. 57 BasicBlock::iterator BIP = Builder.GetInsertPoint(); 58 59 Instruction *Ret = nullptr; 60 61 // Check to see if there is already a cast! 62 for (User *U : V->users()) 63 if (U->getType() == Ty) 64 if (CastInst *CI = dyn_cast<CastInst>(U)) 65 if (CI->getOpcode() == Op) { 66 // If the cast isn't where we want it, create a new cast at IP. 67 // Likewise, do not reuse a cast at BIP because it must dominate 68 // instructions that might be inserted before BIP. 69 if (BasicBlock::iterator(CI) != IP || BIP == IP) { 70 // Create a new cast, and leave the old cast in place in case 71 // it is being used as an insert point. 72 Ret = CastInst::Create(Op, V, Ty, "", &*IP); 73 Ret->takeName(CI); 74 CI->replaceAllUsesWith(Ret); 75 break; 76 } 77 Ret = CI; 78 break; 79 } 80 81 // Create a new cast. 82 if (!Ret) 83 Ret = CastInst::Create(Op, V, Ty, V->getName(), &*IP); 84 85 // We assert at the end of the function since IP might point to an 86 // instruction with different dominance properties than a cast 87 // (an invoke for example) and not dominate BIP (but the cast does). 88 assert(SE.DT.dominates(Ret, &*BIP)); 89 90 rememberInstruction(Ret); 91 return Ret; 92 } 93 94 static BasicBlock::iterator findInsertPointAfter(Instruction *I, 95 BasicBlock *MustDominate) { 96 BasicBlock::iterator IP = ++I->getIterator(); 97 if (auto *II = dyn_cast<InvokeInst>(I)) 98 IP = II->getNormalDest()->begin(); 99 100 while (isa<PHINode>(IP)) 101 ++IP; 102 103 if (isa<FuncletPadInst>(IP) || isa<LandingPadInst>(IP)) { 104 ++IP; 105 } else if (isa<CatchSwitchInst>(IP)) { 106 IP = MustDominate->getFirstInsertionPt(); 107 } else { 108 assert(!IP->isEHPad() && "unexpected eh pad!"); 109 } 110 111 return IP; 112 } 113 114 /// InsertNoopCastOfTo - Insert a cast of V to the specified type, 115 /// which must be possible with a noop cast, doing what we can to share 116 /// the casts. 117 Value *SCEVExpander::InsertNoopCastOfTo(Value *V, Type *Ty) { 118 Instruction::CastOps Op = CastInst::getCastOpcode(V, false, Ty, false); 119 assert((Op == Instruction::BitCast || 120 Op == Instruction::PtrToInt || 121 Op == Instruction::IntToPtr) && 122 "InsertNoopCastOfTo cannot perform non-noop casts!"); 123 assert(SE.getTypeSizeInBits(V->getType()) == SE.getTypeSizeInBits(Ty) && 124 "InsertNoopCastOfTo cannot change sizes!"); 125 126 // Short-circuit unnecessary bitcasts. 127 if (Op == Instruction::BitCast) { 128 if (V->getType() == Ty) 129 return V; 130 if (CastInst *CI = dyn_cast<CastInst>(V)) { 131 if (CI->getOperand(0)->getType() == Ty) 132 return CI->getOperand(0); 133 } 134 } 135 // Short-circuit unnecessary inttoptr<->ptrtoint casts. 136 if ((Op == Instruction::PtrToInt || Op == Instruction::IntToPtr) && 137 SE.getTypeSizeInBits(Ty) == SE.getTypeSizeInBits(V->getType())) { 138 if (CastInst *CI = dyn_cast<CastInst>(V)) 139 if ((CI->getOpcode() == Instruction::PtrToInt || 140 CI->getOpcode() == Instruction::IntToPtr) && 141 SE.getTypeSizeInBits(CI->getType()) == 142 SE.getTypeSizeInBits(CI->getOperand(0)->getType())) 143 return CI->getOperand(0); 144 if (ConstantExpr *CE = dyn_cast<ConstantExpr>(V)) 145 if ((CE->getOpcode() == Instruction::PtrToInt || 146 CE->getOpcode() == Instruction::IntToPtr) && 147 SE.getTypeSizeInBits(CE->getType()) == 148 SE.getTypeSizeInBits(CE->getOperand(0)->getType())) 149 return CE->getOperand(0); 150 } 151 152 // Fold a cast of a constant. 153 if (Constant *C = dyn_cast<Constant>(V)) 154 return ConstantExpr::getCast(Op, C, Ty); 155 156 // Cast the argument at the beginning of the entry block, after 157 // any bitcasts of other arguments. 158 if (Argument *A = dyn_cast<Argument>(V)) { 159 BasicBlock::iterator IP = A->getParent()->getEntryBlock().begin(); 160 while ((isa<BitCastInst>(IP) && 161 isa<Argument>(cast<BitCastInst>(IP)->getOperand(0)) && 162 cast<BitCastInst>(IP)->getOperand(0) != A) || 163 isa<DbgInfoIntrinsic>(IP)) 164 ++IP; 165 return ReuseOrCreateCast(A, Ty, Op, IP); 166 } 167 168 // Cast the instruction immediately after the instruction. 169 Instruction *I = cast<Instruction>(V); 170 BasicBlock::iterator IP = findInsertPointAfter(I, Builder.GetInsertBlock()); 171 return ReuseOrCreateCast(I, Ty, Op, IP); 172 } 173 174 /// InsertBinop - Insert the specified binary operator, doing a small amount 175 /// of work to avoid inserting an obviously redundant operation, and hoisting 176 /// to an outer loop when the opportunity is there and it is safe. 177 Value *SCEVExpander::InsertBinop(Instruction::BinaryOps Opcode, 178 Value *LHS, Value *RHS, 179 SCEV::NoWrapFlags Flags, bool IsSafeToHoist) { 180 // Fold a binop with constant operands. 181 if (Constant *CLHS = dyn_cast<Constant>(LHS)) 182 if (Constant *CRHS = dyn_cast<Constant>(RHS)) 183 return ConstantExpr::get(Opcode, CLHS, CRHS); 184 185 // Do a quick scan to see if we have this binop nearby. If so, reuse it. 186 unsigned ScanLimit = 6; 187 BasicBlock::iterator BlockBegin = Builder.GetInsertBlock()->begin(); 188 // Scanning starts from the last instruction before the insertion point. 189 BasicBlock::iterator IP = Builder.GetInsertPoint(); 190 if (IP != BlockBegin) { 191 --IP; 192 for (; ScanLimit; --IP, --ScanLimit) { 193 // Don't count dbg.value against the ScanLimit, to avoid perturbing the 194 // generated code. 195 if (isa<DbgInfoIntrinsic>(IP)) 196 ScanLimit++; 197 198 auto canGenerateIncompatiblePoison = [&Flags](Instruction *I) { 199 // Ensure that no-wrap flags match. 200 if (isa<OverflowingBinaryOperator>(I)) { 201 if (I->hasNoSignedWrap() != (Flags & SCEV::FlagNSW)) 202 return true; 203 if (I->hasNoUnsignedWrap() != (Flags & SCEV::FlagNUW)) 204 return true; 205 } 206 // Conservatively, do not use any instruction which has any of exact 207 // flags installed. 208 if (isa<PossiblyExactOperator>(I) && I->isExact()) 209 return true; 210 return false; 211 }; 212 if (IP->getOpcode() == (unsigned)Opcode && IP->getOperand(0) == LHS && 213 IP->getOperand(1) == RHS && !canGenerateIncompatiblePoison(&*IP)) 214 return &*IP; 215 if (IP == BlockBegin) break; 216 } 217 } 218 219 // Save the original insertion point so we can restore it when we're done. 220 DebugLoc Loc = Builder.GetInsertPoint()->getDebugLoc(); 221 SCEVInsertPointGuard Guard(Builder, this); 222 223 if (IsSafeToHoist) { 224 // Move the insertion point out of as many loops as we can. 225 while (const Loop *L = SE.LI.getLoopFor(Builder.GetInsertBlock())) { 226 if (!L->isLoopInvariant(LHS) || !L->isLoopInvariant(RHS)) break; 227 BasicBlock *Preheader = L->getLoopPreheader(); 228 if (!Preheader) break; 229 230 // Ok, move up a level. 231 Builder.SetInsertPoint(Preheader->getTerminator()); 232 } 233 } 234 235 // If we haven't found this binop, insert it. 236 Instruction *BO = cast<Instruction>(Builder.CreateBinOp(Opcode, LHS, RHS)); 237 BO->setDebugLoc(Loc); 238 if (Flags & SCEV::FlagNUW) 239 BO->setHasNoUnsignedWrap(); 240 if (Flags & SCEV::FlagNSW) 241 BO->setHasNoSignedWrap(); 242 243 return BO; 244 } 245 246 /// FactorOutConstant - Test if S is divisible by Factor, using signed 247 /// division. If so, update S with Factor divided out and return true. 248 /// S need not be evenly divisible if a reasonable remainder can be 249 /// computed. 250 static bool FactorOutConstant(const SCEV *&S, const SCEV *&Remainder, 251 const SCEV *Factor, ScalarEvolution &SE, 252 const DataLayout &DL) { 253 // Everything is divisible by one. 254 if (Factor->isOne()) 255 return true; 256 257 // x/x == 1. 258 if (S == Factor) { 259 S = SE.getConstant(S->getType(), 1); 260 return true; 261 } 262 263 // For a Constant, check for a multiple of the given factor. 264 if (const SCEVConstant *C = dyn_cast<SCEVConstant>(S)) { 265 // 0/x == 0. 266 if (C->isZero()) 267 return true; 268 // Check for divisibility. 269 if (const SCEVConstant *FC = dyn_cast<SCEVConstant>(Factor)) { 270 ConstantInt *CI = 271 ConstantInt::get(SE.getContext(), C->getAPInt().sdiv(FC->getAPInt())); 272 // If the quotient is zero and the remainder is non-zero, reject 273 // the value at this scale. It will be considered for subsequent 274 // smaller scales. 275 if (!CI->isZero()) { 276 const SCEV *Div = SE.getConstant(CI); 277 S = Div; 278 Remainder = SE.getAddExpr( 279 Remainder, SE.getConstant(C->getAPInt().srem(FC->getAPInt()))); 280 return true; 281 } 282 } 283 } 284 285 // In a Mul, check if there is a constant operand which is a multiple 286 // of the given factor. 287 if (const SCEVMulExpr *M = dyn_cast<SCEVMulExpr>(S)) { 288 // Size is known, check if there is a constant operand which is a multiple 289 // of the given factor. If so, we can factor it. 290 if (const SCEVConstant *FC = dyn_cast<SCEVConstant>(Factor)) 291 if (const SCEVConstant *C = dyn_cast<SCEVConstant>(M->getOperand(0))) 292 if (!C->getAPInt().srem(FC->getAPInt())) { 293 SmallVector<const SCEV *, 4> NewMulOps(M->op_begin(), M->op_end()); 294 NewMulOps[0] = SE.getConstant(C->getAPInt().sdiv(FC->getAPInt())); 295 S = SE.getMulExpr(NewMulOps); 296 return true; 297 } 298 } 299 300 // In an AddRec, check if both start and step are divisible. 301 if (const SCEVAddRecExpr *A = dyn_cast<SCEVAddRecExpr>(S)) { 302 const SCEV *Step = A->getStepRecurrence(SE); 303 const SCEV *StepRem = SE.getConstant(Step->getType(), 0); 304 if (!FactorOutConstant(Step, StepRem, Factor, SE, DL)) 305 return false; 306 if (!StepRem->isZero()) 307 return false; 308 const SCEV *Start = A->getStart(); 309 if (!FactorOutConstant(Start, Remainder, Factor, SE, DL)) 310 return false; 311 S = SE.getAddRecExpr(Start, Step, A->getLoop(), 312 A->getNoWrapFlags(SCEV::FlagNW)); 313 return true; 314 } 315 316 return false; 317 } 318 319 /// SimplifyAddOperands - Sort and simplify a list of add operands. NumAddRecs 320 /// is the number of SCEVAddRecExprs present, which are kept at the end of 321 /// the list. 322 /// 323 static void SimplifyAddOperands(SmallVectorImpl<const SCEV *> &Ops, 324 Type *Ty, 325 ScalarEvolution &SE) { 326 unsigned NumAddRecs = 0; 327 for (unsigned i = Ops.size(); i > 0 && isa<SCEVAddRecExpr>(Ops[i-1]); --i) 328 ++NumAddRecs; 329 // Group Ops into non-addrecs and addrecs. 330 SmallVector<const SCEV *, 8> NoAddRecs(Ops.begin(), Ops.end() - NumAddRecs); 331 SmallVector<const SCEV *, 8> AddRecs(Ops.end() - NumAddRecs, Ops.end()); 332 // Let ScalarEvolution sort and simplify the non-addrecs list. 333 const SCEV *Sum = NoAddRecs.empty() ? 334 SE.getConstant(Ty, 0) : 335 SE.getAddExpr(NoAddRecs); 336 // If it returned an add, use the operands. Otherwise it simplified 337 // the sum into a single value, so just use that. 338 Ops.clear(); 339 if (const SCEVAddExpr *Add = dyn_cast<SCEVAddExpr>(Sum)) 340 Ops.append(Add->op_begin(), Add->op_end()); 341 else if (!Sum->isZero()) 342 Ops.push_back(Sum); 343 // Then append the addrecs. 344 Ops.append(AddRecs.begin(), AddRecs.end()); 345 } 346 347 /// SplitAddRecs - Flatten a list of add operands, moving addrec start values 348 /// out to the top level. For example, convert {a + b,+,c} to a, b, {0,+,d}. 349 /// This helps expose more opportunities for folding parts of the expressions 350 /// into GEP indices. 351 /// 352 static void SplitAddRecs(SmallVectorImpl<const SCEV *> &Ops, 353 Type *Ty, 354 ScalarEvolution &SE) { 355 // Find the addrecs. 356 SmallVector<const SCEV *, 8> AddRecs; 357 for (unsigned i = 0, e = Ops.size(); i != e; ++i) 358 while (const SCEVAddRecExpr *A = dyn_cast<SCEVAddRecExpr>(Ops[i])) { 359 const SCEV *Start = A->getStart(); 360 if (Start->isZero()) break; 361 const SCEV *Zero = SE.getConstant(Ty, 0); 362 AddRecs.push_back(SE.getAddRecExpr(Zero, 363 A->getStepRecurrence(SE), 364 A->getLoop(), 365 A->getNoWrapFlags(SCEV::FlagNW))); 366 if (const SCEVAddExpr *Add = dyn_cast<SCEVAddExpr>(Start)) { 367 Ops[i] = Zero; 368 Ops.append(Add->op_begin(), Add->op_end()); 369 e += Add->getNumOperands(); 370 } else { 371 Ops[i] = Start; 372 } 373 } 374 if (!AddRecs.empty()) { 375 // Add the addrecs onto the end of the list. 376 Ops.append(AddRecs.begin(), AddRecs.end()); 377 // Resort the operand list, moving any constants to the front. 378 SimplifyAddOperands(Ops, Ty, SE); 379 } 380 } 381 382 /// expandAddToGEP - Expand an addition expression with a pointer type into 383 /// a GEP instead of using ptrtoint+arithmetic+inttoptr. This helps 384 /// BasicAliasAnalysis and other passes analyze the result. See the rules 385 /// for getelementptr vs. inttoptr in 386 /// http://llvm.org/docs/LangRef.html#pointeraliasing 387 /// for details. 388 /// 389 /// Design note: The correctness of using getelementptr here depends on 390 /// ScalarEvolution not recognizing inttoptr and ptrtoint operators, as 391 /// they may introduce pointer arithmetic which may not be safely converted 392 /// into getelementptr. 393 /// 394 /// Design note: It might seem desirable for this function to be more 395 /// loop-aware. If some of the indices are loop-invariant while others 396 /// aren't, it might seem desirable to emit multiple GEPs, keeping the 397 /// loop-invariant portions of the overall computation outside the loop. 398 /// However, there are a few reasons this is not done here. Hoisting simple 399 /// arithmetic is a low-level optimization that often isn't very 400 /// important until late in the optimization process. In fact, passes 401 /// like InstructionCombining will combine GEPs, even if it means 402 /// pushing loop-invariant computation down into loops, so even if the 403 /// GEPs were split here, the work would quickly be undone. The 404 /// LoopStrengthReduction pass, which is usually run quite late (and 405 /// after the last InstructionCombining pass), takes care of hoisting 406 /// loop-invariant portions of expressions, after considering what 407 /// can be folded using target addressing modes. 408 /// 409 Value *SCEVExpander::expandAddToGEP(const SCEV *const *op_begin, 410 const SCEV *const *op_end, 411 PointerType *PTy, 412 Type *Ty, 413 Value *V) { 414 Type *OriginalElTy = PTy->getElementType(); 415 Type *ElTy = OriginalElTy; 416 SmallVector<Value *, 4> GepIndices; 417 SmallVector<const SCEV *, 8> Ops(op_begin, op_end); 418 bool AnyNonZeroIndices = false; 419 420 // Split AddRecs up into parts as either of the parts may be usable 421 // without the other. 422 SplitAddRecs(Ops, Ty, SE); 423 424 Type *IntIdxTy = DL.getIndexType(PTy); 425 426 // Descend down the pointer's type and attempt to convert the other 427 // operands into GEP indices, at each level. The first index in a GEP 428 // indexes into the array implied by the pointer operand; the rest of 429 // the indices index into the element or field type selected by the 430 // preceding index. 431 for (;;) { 432 // If the scale size is not 0, attempt to factor out a scale for 433 // array indexing. 434 SmallVector<const SCEV *, 8> ScaledOps; 435 if (ElTy->isSized()) { 436 const SCEV *ElSize = SE.getSizeOfExpr(IntIdxTy, ElTy); 437 if (!ElSize->isZero()) { 438 SmallVector<const SCEV *, 8> NewOps; 439 for (const SCEV *Op : Ops) { 440 const SCEV *Remainder = SE.getConstant(Ty, 0); 441 if (FactorOutConstant(Op, Remainder, ElSize, SE, DL)) { 442 // Op now has ElSize factored out. 443 ScaledOps.push_back(Op); 444 if (!Remainder->isZero()) 445 NewOps.push_back(Remainder); 446 AnyNonZeroIndices = true; 447 } else { 448 // The operand was not divisible, so add it to the list of operands 449 // we'll scan next iteration. 450 NewOps.push_back(Op); 451 } 452 } 453 // If we made any changes, update Ops. 454 if (!ScaledOps.empty()) { 455 Ops = NewOps; 456 SimplifyAddOperands(Ops, Ty, SE); 457 } 458 } 459 } 460 461 // Record the scaled array index for this level of the type. If 462 // we didn't find any operands that could be factored, tentatively 463 // assume that element zero was selected (since the zero offset 464 // would obviously be folded away). 465 Value *Scaled = 466 ScaledOps.empty() 467 ? Constant::getNullValue(Ty) 468 : expandCodeForImpl(SE.getAddExpr(ScaledOps), Ty, false); 469 GepIndices.push_back(Scaled); 470 471 // Collect struct field index operands. 472 while (StructType *STy = dyn_cast<StructType>(ElTy)) { 473 bool FoundFieldNo = false; 474 // An empty struct has no fields. 475 if (STy->getNumElements() == 0) break; 476 // Field offsets are known. See if a constant offset falls within any of 477 // the struct fields. 478 if (Ops.empty()) 479 break; 480 if (const SCEVConstant *C = dyn_cast<SCEVConstant>(Ops[0])) 481 if (SE.getTypeSizeInBits(C->getType()) <= 64) { 482 const StructLayout &SL = *DL.getStructLayout(STy); 483 uint64_t FullOffset = C->getValue()->getZExtValue(); 484 if (FullOffset < SL.getSizeInBytes()) { 485 unsigned ElIdx = SL.getElementContainingOffset(FullOffset); 486 GepIndices.push_back( 487 ConstantInt::get(Type::getInt32Ty(Ty->getContext()), ElIdx)); 488 ElTy = STy->getTypeAtIndex(ElIdx); 489 Ops[0] = 490 SE.getConstant(Ty, FullOffset - SL.getElementOffset(ElIdx)); 491 AnyNonZeroIndices = true; 492 FoundFieldNo = true; 493 } 494 } 495 // If no struct field offsets were found, tentatively assume that 496 // field zero was selected (since the zero offset would obviously 497 // be folded away). 498 if (!FoundFieldNo) { 499 ElTy = STy->getTypeAtIndex(0u); 500 GepIndices.push_back( 501 Constant::getNullValue(Type::getInt32Ty(Ty->getContext()))); 502 } 503 } 504 505 if (ArrayType *ATy = dyn_cast<ArrayType>(ElTy)) 506 ElTy = ATy->getElementType(); 507 else 508 // FIXME: Handle VectorType. 509 // E.g., If ElTy is scalable vector, then ElSize is not a compile-time 510 // constant, therefore can not be factored out. The generated IR is less 511 // ideal with base 'V' cast to i8* and do ugly getelementptr over that. 512 break; 513 } 514 515 // If none of the operands were convertible to proper GEP indices, cast 516 // the base to i8* and do an ugly getelementptr with that. It's still 517 // better than ptrtoint+arithmetic+inttoptr at least. 518 if (!AnyNonZeroIndices) { 519 // Cast the base to i8*. 520 V = InsertNoopCastOfTo(V, 521 Type::getInt8PtrTy(Ty->getContext(), PTy->getAddressSpace())); 522 523 assert(!isa<Instruction>(V) || 524 SE.DT.dominates(cast<Instruction>(V), &*Builder.GetInsertPoint())); 525 526 // Expand the operands for a plain byte offset. 527 Value *Idx = expandCodeForImpl(SE.getAddExpr(Ops), Ty, false); 528 529 // Fold a GEP with constant operands. 530 if (Constant *CLHS = dyn_cast<Constant>(V)) 531 if (Constant *CRHS = dyn_cast<Constant>(Idx)) 532 return ConstantExpr::getGetElementPtr(Type::getInt8Ty(Ty->getContext()), 533 CLHS, CRHS); 534 535 // Do a quick scan to see if we have this GEP nearby. If so, reuse it. 536 unsigned ScanLimit = 6; 537 BasicBlock::iterator BlockBegin = Builder.GetInsertBlock()->begin(); 538 // Scanning starts from the last instruction before the insertion point. 539 BasicBlock::iterator IP = Builder.GetInsertPoint(); 540 if (IP != BlockBegin) { 541 --IP; 542 for (; ScanLimit; --IP, --ScanLimit) { 543 // Don't count dbg.value against the ScanLimit, to avoid perturbing the 544 // generated code. 545 if (isa<DbgInfoIntrinsic>(IP)) 546 ScanLimit++; 547 if (IP->getOpcode() == Instruction::GetElementPtr && 548 IP->getOperand(0) == V && IP->getOperand(1) == Idx) 549 return &*IP; 550 if (IP == BlockBegin) break; 551 } 552 } 553 554 // Save the original insertion point so we can restore it when we're done. 555 SCEVInsertPointGuard Guard(Builder, this); 556 557 // Move the insertion point out of as many loops as we can. 558 while (const Loop *L = SE.LI.getLoopFor(Builder.GetInsertBlock())) { 559 if (!L->isLoopInvariant(V) || !L->isLoopInvariant(Idx)) break; 560 BasicBlock *Preheader = L->getLoopPreheader(); 561 if (!Preheader) break; 562 563 // Ok, move up a level. 564 Builder.SetInsertPoint(Preheader->getTerminator()); 565 } 566 567 // Emit a GEP. 568 return Builder.CreateGEP(Builder.getInt8Ty(), V, Idx, "uglygep"); 569 } 570 571 { 572 SCEVInsertPointGuard Guard(Builder, this); 573 574 // Move the insertion point out of as many loops as we can. 575 while (const Loop *L = SE.LI.getLoopFor(Builder.GetInsertBlock())) { 576 if (!L->isLoopInvariant(V)) break; 577 578 bool AnyIndexNotLoopInvariant = any_of( 579 GepIndices, [L](Value *Op) { return !L->isLoopInvariant(Op); }); 580 581 if (AnyIndexNotLoopInvariant) 582 break; 583 584 BasicBlock *Preheader = L->getLoopPreheader(); 585 if (!Preheader) break; 586 587 // Ok, move up a level. 588 Builder.SetInsertPoint(Preheader->getTerminator()); 589 } 590 591 // Insert a pretty getelementptr. Note that this GEP is not marked inbounds, 592 // because ScalarEvolution may have changed the address arithmetic to 593 // compute a value which is beyond the end of the allocated object. 594 Value *Casted = V; 595 if (V->getType() != PTy) 596 Casted = InsertNoopCastOfTo(Casted, PTy); 597 Value *GEP = Builder.CreateGEP(OriginalElTy, Casted, GepIndices, "scevgep"); 598 Ops.push_back(SE.getUnknown(GEP)); 599 } 600 601 return expand(SE.getAddExpr(Ops)); 602 } 603 604 Value *SCEVExpander::expandAddToGEP(const SCEV *Op, PointerType *PTy, Type *Ty, 605 Value *V) { 606 const SCEV *const Ops[1] = {Op}; 607 return expandAddToGEP(Ops, Ops + 1, PTy, Ty, V); 608 } 609 610 /// PickMostRelevantLoop - Given two loops pick the one that's most relevant for 611 /// SCEV expansion. If they are nested, this is the most nested. If they are 612 /// neighboring, pick the later. 613 static const Loop *PickMostRelevantLoop(const Loop *A, const Loop *B, 614 DominatorTree &DT) { 615 if (!A) return B; 616 if (!B) return A; 617 if (A->contains(B)) return B; 618 if (B->contains(A)) return A; 619 if (DT.dominates(A->getHeader(), B->getHeader())) return B; 620 if (DT.dominates(B->getHeader(), A->getHeader())) return A; 621 return A; // Arbitrarily break the tie. 622 } 623 624 /// getRelevantLoop - Get the most relevant loop associated with the given 625 /// expression, according to PickMostRelevantLoop. 626 const Loop *SCEVExpander::getRelevantLoop(const SCEV *S) { 627 // Test whether we've already computed the most relevant loop for this SCEV. 628 auto Pair = RelevantLoops.insert(std::make_pair(S, nullptr)); 629 if (!Pair.second) 630 return Pair.first->second; 631 632 if (isa<SCEVConstant>(S)) 633 // A constant has no relevant loops. 634 return nullptr; 635 if (const SCEVUnknown *U = dyn_cast<SCEVUnknown>(S)) { 636 if (const Instruction *I = dyn_cast<Instruction>(U->getValue())) 637 return Pair.first->second = SE.LI.getLoopFor(I->getParent()); 638 // A non-instruction has no relevant loops. 639 return nullptr; 640 } 641 if (const SCEVNAryExpr *N = dyn_cast<SCEVNAryExpr>(S)) { 642 const Loop *L = nullptr; 643 if (const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(S)) 644 L = AR->getLoop(); 645 for (const SCEV *Op : N->operands()) 646 L = PickMostRelevantLoop(L, getRelevantLoop(Op), SE.DT); 647 return RelevantLoops[N] = L; 648 } 649 if (const SCEVCastExpr *C = dyn_cast<SCEVCastExpr>(S)) { 650 const Loop *Result = getRelevantLoop(C->getOperand()); 651 return RelevantLoops[C] = Result; 652 } 653 if (const SCEVUDivExpr *D = dyn_cast<SCEVUDivExpr>(S)) { 654 const Loop *Result = PickMostRelevantLoop( 655 getRelevantLoop(D->getLHS()), getRelevantLoop(D->getRHS()), SE.DT); 656 return RelevantLoops[D] = Result; 657 } 658 llvm_unreachable("Unexpected SCEV type!"); 659 } 660 661 namespace { 662 663 /// LoopCompare - Compare loops by PickMostRelevantLoop. 664 class LoopCompare { 665 DominatorTree &DT; 666 public: 667 explicit LoopCompare(DominatorTree &dt) : DT(dt) {} 668 669 bool operator()(std::pair<const Loop *, const SCEV *> LHS, 670 std::pair<const Loop *, const SCEV *> RHS) const { 671 // Keep pointer operands sorted at the end. 672 if (LHS.second->getType()->isPointerTy() != 673 RHS.second->getType()->isPointerTy()) 674 return LHS.second->getType()->isPointerTy(); 675 676 // Compare loops with PickMostRelevantLoop. 677 if (LHS.first != RHS.first) 678 return PickMostRelevantLoop(LHS.first, RHS.first, DT) != LHS.first; 679 680 // If one operand is a non-constant negative and the other is not, 681 // put the non-constant negative on the right so that a sub can 682 // be used instead of a negate and add. 683 if (LHS.second->isNonConstantNegative()) { 684 if (!RHS.second->isNonConstantNegative()) 685 return false; 686 } else if (RHS.second->isNonConstantNegative()) 687 return true; 688 689 // Otherwise they are equivalent according to this comparison. 690 return false; 691 } 692 }; 693 694 } 695 696 Value *SCEVExpander::visitAddExpr(const SCEVAddExpr *S) { 697 Type *Ty = SE.getEffectiveSCEVType(S->getType()); 698 699 // Collect all the add operands in a loop, along with their associated loops. 700 // Iterate in reverse so that constants are emitted last, all else equal, and 701 // so that pointer operands are inserted first, which the code below relies on 702 // to form more involved GEPs. 703 SmallVector<std::pair<const Loop *, const SCEV *>, 8> OpsAndLoops; 704 for (std::reverse_iterator<SCEVAddExpr::op_iterator> I(S->op_end()), 705 E(S->op_begin()); I != E; ++I) 706 OpsAndLoops.push_back(std::make_pair(getRelevantLoop(*I), *I)); 707 708 // Sort by loop. Use a stable sort so that constants follow non-constants and 709 // pointer operands precede non-pointer operands. 710 llvm::stable_sort(OpsAndLoops, LoopCompare(SE.DT)); 711 712 // Emit instructions to add all the operands. Hoist as much as possible 713 // out of loops, and form meaningful getelementptrs where possible. 714 Value *Sum = nullptr; 715 for (auto I = OpsAndLoops.begin(), E = OpsAndLoops.end(); I != E;) { 716 const Loop *CurLoop = I->first; 717 const SCEV *Op = I->second; 718 if (!Sum) { 719 // This is the first operand. Just expand it. 720 Sum = expand(Op); 721 ++I; 722 } else if (PointerType *PTy = dyn_cast<PointerType>(Sum->getType())) { 723 // The running sum expression is a pointer. Try to form a getelementptr 724 // at this level with that as the base. 725 SmallVector<const SCEV *, 4> NewOps; 726 for (; I != E && I->first == CurLoop; ++I) { 727 // If the operand is SCEVUnknown and not instructions, peek through 728 // it, to enable more of it to be folded into the GEP. 729 const SCEV *X = I->second; 730 if (const SCEVUnknown *U = dyn_cast<SCEVUnknown>(X)) 731 if (!isa<Instruction>(U->getValue())) 732 X = SE.getSCEV(U->getValue()); 733 NewOps.push_back(X); 734 } 735 Sum = expandAddToGEP(NewOps.begin(), NewOps.end(), PTy, Ty, Sum); 736 } else if (PointerType *PTy = dyn_cast<PointerType>(Op->getType())) { 737 // The running sum is an integer, and there's a pointer at this level. 738 // Try to form a getelementptr. If the running sum is instructions, 739 // use a SCEVUnknown to avoid re-analyzing them. 740 SmallVector<const SCEV *, 4> NewOps; 741 NewOps.push_back(isa<Instruction>(Sum) ? SE.getUnknown(Sum) : 742 SE.getSCEV(Sum)); 743 for (++I; I != E && I->first == CurLoop; ++I) 744 NewOps.push_back(I->second); 745 Sum = expandAddToGEP(NewOps.begin(), NewOps.end(), PTy, Ty, expand(Op)); 746 } else if (Op->isNonConstantNegative()) { 747 // Instead of doing a negate and add, just do a subtract. 748 Value *W = expandCodeForImpl(SE.getNegativeSCEV(Op), Ty, false); 749 Sum = InsertNoopCastOfTo(Sum, Ty); 750 Sum = InsertBinop(Instruction::Sub, Sum, W, SCEV::FlagAnyWrap, 751 /*IsSafeToHoist*/ true); 752 ++I; 753 } else { 754 // A simple add. 755 Value *W = expandCodeForImpl(Op, Ty, false); 756 Sum = InsertNoopCastOfTo(Sum, Ty); 757 // Canonicalize a constant to the RHS. 758 if (isa<Constant>(Sum)) std::swap(Sum, W); 759 Sum = InsertBinop(Instruction::Add, Sum, W, S->getNoWrapFlags(), 760 /*IsSafeToHoist*/ true); 761 ++I; 762 } 763 } 764 765 return Sum; 766 } 767 768 Value *SCEVExpander::visitMulExpr(const SCEVMulExpr *S) { 769 Type *Ty = SE.getEffectiveSCEVType(S->getType()); 770 771 // Collect all the mul operands in a loop, along with their associated loops. 772 // Iterate in reverse so that constants are emitted last, all else equal. 773 SmallVector<std::pair<const Loop *, const SCEV *>, 8> OpsAndLoops; 774 for (std::reverse_iterator<SCEVMulExpr::op_iterator> I(S->op_end()), 775 E(S->op_begin()); I != E; ++I) 776 OpsAndLoops.push_back(std::make_pair(getRelevantLoop(*I), *I)); 777 778 // Sort by loop. Use a stable sort so that constants follow non-constants. 779 llvm::stable_sort(OpsAndLoops, LoopCompare(SE.DT)); 780 781 // Emit instructions to mul all the operands. Hoist as much as possible 782 // out of loops. 783 Value *Prod = nullptr; 784 auto I = OpsAndLoops.begin(); 785 786 // Expand the calculation of X pow N in the following manner: 787 // Let N = P1 + P2 + ... + PK, where all P are powers of 2. Then: 788 // X pow N = (X pow P1) * (X pow P2) * ... * (X pow PK). 789 const auto ExpandOpBinPowN = [this, &I, &OpsAndLoops, &Ty]() { 790 auto E = I; 791 // Calculate how many times the same operand from the same loop is included 792 // into this power. 793 uint64_t Exponent = 0; 794 const uint64_t MaxExponent = UINT64_MAX >> 1; 795 // No one sane will ever try to calculate such huge exponents, but if we 796 // need this, we stop on UINT64_MAX / 2 because we need to exit the loop 797 // below when the power of 2 exceeds our Exponent, and we want it to be 798 // 1u << 31 at most to not deal with unsigned overflow. 799 while (E != OpsAndLoops.end() && *I == *E && Exponent != MaxExponent) { 800 ++Exponent; 801 ++E; 802 } 803 assert(Exponent > 0 && "Trying to calculate a zeroth exponent of operand?"); 804 805 // Calculate powers with exponents 1, 2, 4, 8 etc. and include those of them 806 // that are needed into the result. 807 Value *P = expandCodeForImpl(I->second, Ty, false); 808 Value *Result = nullptr; 809 if (Exponent & 1) 810 Result = P; 811 for (uint64_t BinExp = 2; BinExp <= Exponent; BinExp <<= 1) { 812 P = InsertBinop(Instruction::Mul, P, P, SCEV::FlagAnyWrap, 813 /*IsSafeToHoist*/ true); 814 if (Exponent & BinExp) 815 Result = Result ? InsertBinop(Instruction::Mul, Result, P, 816 SCEV::FlagAnyWrap, 817 /*IsSafeToHoist*/ true) 818 : P; 819 } 820 821 I = E; 822 assert(Result && "Nothing was expanded?"); 823 return Result; 824 }; 825 826 while (I != OpsAndLoops.end()) { 827 if (!Prod) { 828 // This is the first operand. Just expand it. 829 Prod = ExpandOpBinPowN(); 830 } else if (I->second->isAllOnesValue()) { 831 // Instead of doing a multiply by negative one, just do a negate. 832 Prod = InsertNoopCastOfTo(Prod, Ty); 833 Prod = InsertBinop(Instruction::Sub, Constant::getNullValue(Ty), Prod, 834 SCEV::FlagAnyWrap, /*IsSafeToHoist*/ true); 835 ++I; 836 } else { 837 // A simple mul. 838 Value *W = ExpandOpBinPowN(); 839 Prod = InsertNoopCastOfTo(Prod, Ty); 840 // Canonicalize a constant to the RHS. 841 if (isa<Constant>(Prod)) std::swap(Prod, W); 842 const APInt *RHS; 843 if (match(W, m_Power2(RHS))) { 844 // Canonicalize Prod*(1<<C) to Prod<<C. 845 assert(!Ty->isVectorTy() && "vector types are not SCEVable"); 846 auto NWFlags = S->getNoWrapFlags(); 847 // clear nsw flag if shl will produce poison value. 848 if (RHS->logBase2() == RHS->getBitWidth() - 1) 849 NWFlags = ScalarEvolution::clearFlags(NWFlags, SCEV::FlagNSW); 850 Prod = InsertBinop(Instruction::Shl, Prod, 851 ConstantInt::get(Ty, RHS->logBase2()), NWFlags, 852 /*IsSafeToHoist*/ true); 853 } else { 854 Prod = InsertBinop(Instruction::Mul, Prod, W, S->getNoWrapFlags(), 855 /*IsSafeToHoist*/ true); 856 } 857 } 858 } 859 860 return Prod; 861 } 862 863 Value *SCEVExpander::visitUDivExpr(const SCEVUDivExpr *S) { 864 Type *Ty = SE.getEffectiveSCEVType(S->getType()); 865 866 Value *LHS = expandCodeForImpl(S->getLHS(), Ty, false); 867 if (const SCEVConstant *SC = dyn_cast<SCEVConstant>(S->getRHS())) { 868 const APInt &RHS = SC->getAPInt(); 869 if (RHS.isPowerOf2()) 870 return InsertBinop(Instruction::LShr, LHS, 871 ConstantInt::get(Ty, RHS.logBase2()), 872 SCEV::FlagAnyWrap, /*IsSafeToHoist*/ true); 873 } 874 875 Value *RHS = expandCodeForImpl(S->getRHS(), Ty, false); 876 return InsertBinop(Instruction::UDiv, LHS, RHS, SCEV::FlagAnyWrap, 877 /*IsSafeToHoist*/ SE.isKnownNonZero(S->getRHS())); 878 } 879 880 /// Move parts of Base into Rest to leave Base with the minimal 881 /// expression that provides a pointer operand suitable for a 882 /// GEP expansion. 883 static void ExposePointerBase(const SCEV *&Base, const SCEV *&Rest, 884 ScalarEvolution &SE) { 885 while (const SCEVAddRecExpr *A = dyn_cast<SCEVAddRecExpr>(Base)) { 886 Base = A->getStart(); 887 Rest = SE.getAddExpr(Rest, 888 SE.getAddRecExpr(SE.getConstant(A->getType(), 0), 889 A->getStepRecurrence(SE), 890 A->getLoop(), 891 A->getNoWrapFlags(SCEV::FlagNW))); 892 } 893 if (const SCEVAddExpr *A = dyn_cast<SCEVAddExpr>(Base)) { 894 Base = A->getOperand(A->getNumOperands()-1); 895 SmallVector<const SCEV *, 8> NewAddOps(A->op_begin(), A->op_end()); 896 NewAddOps.back() = Rest; 897 Rest = SE.getAddExpr(NewAddOps); 898 ExposePointerBase(Base, Rest, SE); 899 } 900 } 901 902 /// Determine if this is a well-behaved chain of instructions leading back to 903 /// the PHI. If so, it may be reused by expanded expressions. 904 bool SCEVExpander::isNormalAddRecExprPHI(PHINode *PN, Instruction *IncV, 905 const Loop *L) { 906 if (IncV->getNumOperands() == 0 || isa<PHINode>(IncV) || 907 (isa<CastInst>(IncV) && !isa<BitCastInst>(IncV))) 908 return false; 909 // If any of the operands don't dominate the insert position, bail. 910 // Addrec operands are always loop-invariant, so this can only happen 911 // if there are instructions which haven't been hoisted. 912 if (L == IVIncInsertLoop) { 913 for (User::op_iterator OI = IncV->op_begin()+1, 914 OE = IncV->op_end(); OI != OE; ++OI) 915 if (Instruction *OInst = dyn_cast<Instruction>(OI)) 916 if (!SE.DT.dominates(OInst, IVIncInsertPos)) 917 return false; 918 } 919 // Advance to the next instruction. 920 IncV = dyn_cast<Instruction>(IncV->getOperand(0)); 921 if (!IncV) 922 return false; 923 924 if (IncV->mayHaveSideEffects()) 925 return false; 926 927 if (IncV == PN) 928 return true; 929 930 return isNormalAddRecExprPHI(PN, IncV, L); 931 } 932 933 /// getIVIncOperand returns an induction variable increment's induction 934 /// variable operand. 935 /// 936 /// If allowScale is set, any type of GEP is allowed as long as the nonIV 937 /// operands dominate InsertPos. 938 /// 939 /// If allowScale is not set, ensure that a GEP increment conforms to one of the 940 /// simple patterns generated by getAddRecExprPHILiterally and 941 /// expandAddtoGEP. If the pattern isn't recognized, return NULL. 942 Instruction *SCEVExpander::getIVIncOperand(Instruction *IncV, 943 Instruction *InsertPos, 944 bool allowScale) { 945 if (IncV == InsertPos) 946 return nullptr; 947 948 switch (IncV->getOpcode()) { 949 default: 950 return nullptr; 951 // Check for a simple Add/Sub or GEP of a loop invariant step. 952 case Instruction::Add: 953 case Instruction::Sub: { 954 Instruction *OInst = dyn_cast<Instruction>(IncV->getOperand(1)); 955 if (!OInst || SE.DT.dominates(OInst, InsertPos)) 956 return dyn_cast<Instruction>(IncV->getOperand(0)); 957 return nullptr; 958 } 959 case Instruction::BitCast: 960 return dyn_cast<Instruction>(IncV->getOperand(0)); 961 case Instruction::GetElementPtr: 962 for (auto I = IncV->op_begin() + 1, E = IncV->op_end(); I != E; ++I) { 963 if (isa<Constant>(*I)) 964 continue; 965 if (Instruction *OInst = dyn_cast<Instruction>(*I)) { 966 if (!SE.DT.dominates(OInst, InsertPos)) 967 return nullptr; 968 } 969 if (allowScale) { 970 // allow any kind of GEP as long as it can be hoisted. 971 continue; 972 } 973 // This must be a pointer addition of constants (pretty), which is already 974 // handled, or some number of address-size elements (ugly). Ugly geps 975 // have 2 operands. i1* is used by the expander to represent an 976 // address-size element. 977 if (IncV->getNumOperands() != 2) 978 return nullptr; 979 unsigned AS = cast<PointerType>(IncV->getType())->getAddressSpace(); 980 if (IncV->getType() != Type::getInt1PtrTy(SE.getContext(), AS) 981 && IncV->getType() != Type::getInt8PtrTy(SE.getContext(), AS)) 982 return nullptr; 983 break; 984 } 985 return dyn_cast<Instruction>(IncV->getOperand(0)); 986 } 987 } 988 989 /// If the insert point of the current builder or any of the builders on the 990 /// stack of saved builders has 'I' as its insert point, update it to point to 991 /// the instruction after 'I'. This is intended to be used when the instruction 992 /// 'I' is being moved. If this fixup is not done and 'I' is moved to a 993 /// different block, the inconsistent insert point (with a mismatched 994 /// Instruction and Block) can lead to an instruction being inserted in a block 995 /// other than its parent. 996 void SCEVExpander::fixupInsertPoints(Instruction *I) { 997 BasicBlock::iterator It(*I); 998 BasicBlock::iterator NewInsertPt = std::next(It); 999 if (Builder.GetInsertPoint() == It) 1000 Builder.SetInsertPoint(&*NewInsertPt); 1001 for (auto *InsertPtGuard : InsertPointGuards) 1002 if (InsertPtGuard->GetInsertPoint() == It) 1003 InsertPtGuard->SetInsertPoint(NewInsertPt); 1004 } 1005 1006 /// hoistStep - Attempt to hoist a simple IV increment above InsertPos to make 1007 /// it available to other uses in this loop. Recursively hoist any operands, 1008 /// until we reach a value that dominates InsertPos. 1009 bool SCEVExpander::hoistIVInc(Instruction *IncV, Instruction *InsertPos) { 1010 if (SE.DT.dominates(IncV, InsertPos)) 1011 return true; 1012 1013 // InsertPos must itself dominate IncV so that IncV's new position satisfies 1014 // its existing users. 1015 if (isa<PHINode>(InsertPos) || 1016 !SE.DT.dominates(InsertPos->getParent(), IncV->getParent())) 1017 return false; 1018 1019 if (!SE.LI.movementPreservesLCSSAForm(IncV, InsertPos)) 1020 return false; 1021 1022 // Check that the chain of IV operands leading back to Phi can be hoisted. 1023 SmallVector<Instruction*, 4> IVIncs; 1024 for(;;) { 1025 Instruction *Oper = getIVIncOperand(IncV, InsertPos, /*allowScale*/true); 1026 if (!Oper) 1027 return false; 1028 // IncV is safe to hoist. 1029 IVIncs.push_back(IncV); 1030 IncV = Oper; 1031 if (SE.DT.dominates(IncV, InsertPos)) 1032 break; 1033 } 1034 for (auto I = IVIncs.rbegin(), E = IVIncs.rend(); I != E; ++I) { 1035 fixupInsertPoints(*I); 1036 (*I)->moveBefore(InsertPos); 1037 } 1038 return true; 1039 } 1040 1041 /// Determine if this cyclic phi is in a form that would have been generated by 1042 /// LSR. We don't care if the phi was actually expanded in this pass, as long 1043 /// as it is in a low-cost form, for example, no implied multiplication. This 1044 /// should match any patterns generated by getAddRecExprPHILiterally and 1045 /// expandAddtoGEP. 1046 bool SCEVExpander::isExpandedAddRecExprPHI(PHINode *PN, Instruction *IncV, 1047 const Loop *L) { 1048 for(Instruction *IVOper = IncV; 1049 (IVOper = getIVIncOperand(IVOper, L->getLoopPreheader()->getTerminator(), 1050 /*allowScale=*/false));) { 1051 if (IVOper == PN) 1052 return true; 1053 } 1054 return false; 1055 } 1056 1057 /// expandIVInc - Expand an IV increment at Builder's current InsertPos. 1058 /// Typically this is the LatchBlock terminator or IVIncInsertPos, but we may 1059 /// need to materialize IV increments elsewhere to handle difficult situations. 1060 Value *SCEVExpander::expandIVInc(PHINode *PN, Value *StepV, const Loop *L, 1061 Type *ExpandTy, Type *IntTy, 1062 bool useSubtract) { 1063 Value *IncV; 1064 // If the PHI is a pointer, use a GEP, otherwise use an add or sub. 1065 if (ExpandTy->isPointerTy()) { 1066 PointerType *GEPPtrTy = cast<PointerType>(ExpandTy); 1067 // If the step isn't constant, don't use an implicitly scaled GEP, because 1068 // that would require a multiply inside the loop. 1069 if (!isa<ConstantInt>(StepV)) 1070 GEPPtrTy = PointerType::get(Type::getInt1Ty(SE.getContext()), 1071 GEPPtrTy->getAddressSpace()); 1072 IncV = expandAddToGEP(SE.getSCEV(StepV), GEPPtrTy, IntTy, PN); 1073 if (IncV->getType() != PN->getType()) 1074 IncV = Builder.CreateBitCast(IncV, PN->getType()); 1075 } else { 1076 IncV = useSubtract ? 1077 Builder.CreateSub(PN, StepV, Twine(IVName) + ".iv.next") : 1078 Builder.CreateAdd(PN, StepV, Twine(IVName) + ".iv.next"); 1079 } 1080 return IncV; 1081 } 1082 1083 /// Hoist the addrec instruction chain rooted in the loop phi above the 1084 /// position. This routine assumes that this is possible (has been checked). 1085 void SCEVExpander::hoistBeforePos(DominatorTree *DT, Instruction *InstToHoist, 1086 Instruction *Pos, PHINode *LoopPhi) { 1087 do { 1088 if (DT->dominates(InstToHoist, Pos)) 1089 break; 1090 // Make sure the increment is where we want it. But don't move it 1091 // down past a potential existing post-inc user. 1092 fixupInsertPoints(InstToHoist); 1093 InstToHoist->moveBefore(Pos); 1094 Pos = InstToHoist; 1095 InstToHoist = cast<Instruction>(InstToHoist->getOperand(0)); 1096 } while (InstToHoist != LoopPhi); 1097 } 1098 1099 /// Check whether we can cheaply express the requested SCEV in terms of 1100 /// the available PHI SCEV by truncation and/or inversion of the step. 1101 static bool canBeCheaplyTransformed(ScalarEvolution &SE, 1102 const SCEVAddRecExpr *Phi, 1103 const SCEVAddRecExpr *Requested, 1104 bool &InvertStep) { 1105 Type *PhiTy = SE.getEffectiveSCEVType(Phi->getType()); 1106 Type *RequestedTy = SE.getEffectiveSCEVType(Requested->getType()); 1107 1108 if (RequestedTy->getIntegerBitWidth() > PhiTy->getIntegerBitWidth()) 1109 return false; 1110 1111 // Try truncate it if necessary. 1112 Phi = dyn_cast<SCEVAddRecExpr>(SE.getTruncateOrNoop(Phi, RequestedTy)); 1113 if (!Phi) 1114 return false; 1115 1116 // Check whether truncation will help. 1117 if (Phi == Requested) { 1118 InvertStep = false; 1119 return true; 1120 } 1121 1122 // Check whether inverting will help: {R,+,-1} == R - {0,+,1}. 1123 if (SE.getAddExpr(Requested->getStart(), 1124 SE.getNegativeSCEV(Requested)) == Phi) { 1125 InvertStep = true; 1126 return true; 1127 } 1128 1129 return false; 1130 } 1131 1132 static bool IsIncrementNSW(ScalarEvolution &SE, const SCEVAddRecExpr *AR) { 1133 if (!isa<IntegerType>(AR->getType())) 1134 return false; 1135 1136 unsigned BitWidth = cast<IntegerType>(AR->getType())->getBitWidth(); 1137 Type *WideTy = IntegerType::get(AR->getType()->getContext(), BitWidth * 2); 1138 const SCEV *Step = AR->getStepRecurrence(SE); 1139 const SCEV *OpAfterExtend = SE.getAddExpr(SE.getSignExtendExpr(Step, WideTy), 1140 SE.getSignExtendExpr(AR, WideTy)); 1141 const SCEV *ExtendAfterOp = 1142 SE.getSignExtendExpr(SE.getAddExpr(AR, Step), WideTy); 1143 return ExtendAfterOp == OpAfterExtend; 1144 } 1145 1146 static bool IsIncrementNUW(ScalarEvolution &SE, const SCEVAddRecExpr *AR) { 1147 if (!isa<IntegerType>(AR->getType())) 1148 return false; 1149 1150 unsigned BitWidth = cast<IntegerType>(AR->getType())->getBitWidth(); 1151 Type *WideTy = IntegerType::get(AR->getType()->getContext(), BitWidth * 2); 1152 const SCEV *Step = AR->getStepRecurrence(SE); 1153 const SCEV *OpAfterExtend = SE.getAddExpr(SE.getZeroExtendExpr(Step, WideTy), 1154 SE.getZeroExtendExpr(AR, WideTy)); 1155 const SCEV *ExtendAfterOp = 1156 SE.getZeroExtendExpr(SE.getAddExpr(AR, Step), WideTy); 1157 return ExtendAfterOp == OpAfterExtend; 1158 } 1159 1160 /// getAddRecExprPHILiterally - Helper for expandAddRecExprLiterally. Expand 1161 /// the base addrec, which is the addrec without any non-loop-dominating 1162 /// values, and return the PHI. 1163 PHINode * 1164 SCEVExpander::getAddRecExprPHILiterally(const SCEVAddRecExpr *Normalized, 1165 const Loop *L, 1166 Type *ExpandTy, 1167 Type *IntTy, 1168 Type *&TruncTy, 1169 bool &InvertStep) { 1170 assert((!IVIncInsertLoop||IVIncInsertPos) && "Uninitialized insert position"); 1171 1172 // Reuse a previously-inserted PHI, if present. 1173 BasicBlock *LatchBlock = L->getLoopLatch(); 1174 if (LatchBlock) { 1175 PHINode *AddRecPhiMatch = nullptr; 1176 Instruction *IncV = nullptr; 1177 TruncTy = nullptr; 1178 InvertStep = false; 1179 1180 // Only try partially matching scevs that need truncation and/or 1181 // step-inversion if we know this loop is outside the current loop. 1182 bool TryNonMatchingSCEV = 1183 IVIncInsertLoop && 1184 SE.DT.properlyDominates(LatchBlock, IVIncInsertLoop->getHeader()); 1185 1186 for (PHINode &PN : L->getHeader()->phis()) { 1187 if (!SE.isSCEVable(PN.getType())) 1188 continue; 1189 1190 const SCEVAddRecExpr *PhiSCEV = dyn_cast<SCEVAddRecExpr>(SE.getSCEV(&PN)); 1191 if (!PhiSCEV) 1192 continue; 1193 1194 bool IsMatchingSCEV = PhiSCEV == Normalized; 1195 // We only handle truncation and inversion of phi recurrences for the 1196 // expanded expression if the expanded expression's loop dominates the 1197 // loop we insert to. Check now, so we can bail out early. 1198 if (!IsMatchingSCEV && !TryNonMatchingSCEV) 1199 continue; 1200 1201 // TODO: this possibly can be reworked to avoid this cast at all. 1202 Instruction *TempIncV = 1203 dyn_cast<Instruction>(PN.getIncomingValueForBlock(LatchBlock)); 1204 if (!TempIncV) 1205 continue; 1206 1207 // Check whether we can reuse this PHI node. 1208 if (LSRMode) { 1209 if (!isExpandedAddRecExprPHI(&PN, TempIncV, L)) 1210 continue; 1211 if (L == IVIncInsertLoop && !hoistIVInc(TempIncV, IVIncInsertPos)) 1212 continue; 1213 } else { 1214 if (!isNormalAddRecExprPHI(&PN, TempIncV, L)) 1215 continue; 1216 } 1217 1218 // Stop if we have found an exact match SCEV. 1219 if (IsMatchingSCEV) { 1220 IncV = TempIncV; 1221 TruncTy = nullptr; 1222 InvertStep = false; 1223 AddRecPhiMatch = &PN; 1224 break; 1225 } 1226 1227 // Try whether the phi can be translated into the requested form 1228 // (truncated and/or offset by a constant). 1229 if ((!TruncTy || InvertStep) && 1230 canBeCheaplyTransformed(SE, PhiSCEV, Normalized, InvertStep)) { 1231 // Record the phi node. But don't stop we might find an exact match 1232 // later. 1233 AddRecPhiMatch = &PN; 1234 IncV = TempIncV; 1235 TruncTy = SE.getEffectiveSCEVType(Normalized->getType()); 1236 } 1237 } 1238 1239 if (AddRecPhiMatch) { 1240 // Potentially, move the increment. We have made sure in 1241 // isExpandedAddRecExprPHI or hoistIVInc that this is possible. 1242 if (L == IVIncInsertLoop) 1243 hoistBeforePos(&SE.DT, IncV, IVIncInsertPos, AddRecPhiMatch); 1244 1245 // Ok, the add recurrence looks usable. 1246 // Remember this PHI, even in post-inc mode. 1247 InsertedValues.insert(AddRecPhiMatch); 1248 // Remember the increment. 1249 rememberInstruction(IncV); 1250 return AddRecPhiMatch; 1251 } 1252 } 1253 1254 // Save the original insertion point so we can restore it when we're done. 1255 SCEVInsertPointGuard Guard(Builder, this); 1256 1257 // Another AddRec may need to be recursively expanded below. For example, if 1258 // this AddRec is quadratic, the StepV may itself be an AddRec in this 1259 // loop. Remove this loop from the PostIncLoops set before expanding such 1260 // AddRecs. Otherwise, we cannot find a valid position for the step 1261 // (i.e. StepV can never dominate its loop header). Ideally, we could do 1262 // SavedIncLoops.swap(PostIncLoops), but we generally have a single element, 1263 // so it's not worth implementing SmallPtrSet::swap. 1264 PostIncLoopSet SavedPostIncLoops = PostIncLoops; 1265 PostIncLoops.clear(); 1266 1267 // Expand code for the start value into the loop preheader. 1268 assert(L->getLoopPreheader() && 1269 "Can't expand add recurrences without a loop preheader!"); 1270 Value *StartV = 1271 expandCodeForImpl(Normalized->getStart(), ExpandTy, 1272 L->getLoopPreheader()->getTerminator(), false); 1273 1274 // StartV must have been be inserted into L's preheader to dominate the new 1275 // phi. 1276 assert(!isa<Instruction>(StartV) || 1277 SE.DT.properlyDominates(cast<Instruction>(StartV)->getParent(), 1278 L->getHeader())); 1279 1280 // Expand code for the step value. Do this before creating the PHI so that PHI 1281 // reuse code doesn't see an incomplete PHI. 1282 const SCEV *Step = Normalized->getStepRecurrence(SE); 1283 // If the stride is negative, insert a sub instead of an add for the increment 1284 // (unless it's a constant, because subtracts of constants are canonicalized 1285 // to adds). 1286 bool useSubtract = !ExpandTy->isPointerTy() && Step->isNonConstantNegative(); 1287 if (useSubtract) 1288 Step = SE.getNegativeSCEV(Step); 1289 // Expand the step somewhere that dominates the loop header. 1290 Value *StepV = expandCodeForImpl( 1291 Step, IntTy, &*L->getHeader()->getFirstInsertionPt(), false); 1292 1293 // The no-wrap behavior proved by IsIncrement(NUW|NSW) is only applicable if 1294 // we actually do emit an addition. It does not apply if we emit a 1295 // subtraction. 1296 bool IncrementIsNUW = !useSubtract && IsIncrementNUW(SE, Normalized); 1297 bool IncrementIsNSW = !useSubtract && IsIncrementNSW(SE, Normalized); 1298 1299 // Create the PHI. 1300 BasicBlock *Header = L->getHeader(); 1301 Builder.SetInsertPoint(Header, Header->begin()); 1302 pred_iterator HPB = pred_begin(Header), HPE = pred_end(Header); 1303 PHINode *PN = Builder.CreatePHI(ExpandTy, std::distance(HPB, HPE), 1304 Twine(IVName) + ".iv"); 1305 1306 // Create the step instructions and populate the PHI. 1307 for (pred_iterator HPI = HPB; HPI != HPE; ++HPI) { 1308 BasicBlock *Pred = *HPI; 1309 1310 // Add a start value. 1311 if (!L->contains(Pred)) { 1312 PN->addIncoming(StartV, Pred); 1313 continue; 1314 } 1315 1316 // Create a step value and add it to the PHI. 1317 // If IVIncInsertLoop is non-null and equal to the addrec's loop, insert the 1318 // instructions at IVIncInsertPos. 1319 Instruction *InsertPos = L == IVIncInsertLoop ? 1320 IVIncInsertPos : Pred->getTerminator(); 1321 Builder.SetInsertPoint(InsertPos); 1322 Value *IncV = expandIVInc(PN, StepV, L, ExpandTy, IntTy, useSubtract); 1323 1324 if (isa<OverflowingBinaryOperator>(IncV)) { 1325 if (IncrementIsNUW) 1326 cast<BinaryOperator>(IncV)->setHasNoUnsignedWrap(); 1327 if (IncrementIsNSW) 1328 cast<BinaryOperator>(IncV)->setHasNoSignedWrap(); 1329 } 1330 PN->addIncoming(IncV, Pred); 1331 } 1332 1333 // After expanding subexpressions, restore the PostIncLoops set so the caller 1334 // can ensure that IVIncrement dominates the current uses. 1335 PostIncLoops = SavedPostIncLoops; 1336 1337 // Remember this PHI, even in post-inc mode. 1338 InsertedValues.insert(PN); 1339 1340 return PN; 1341 } 1342 1343 Value *SCEVExpander::expandAddRecExprLiterally(const SCEVAddRecExpr *S) { 1344 Type *STy = S->getType(); 1345 Type *IntTy = SE.getEffectiveSCEVType(STy); 1346 const Loop *L = S->getLoop(); 1347 1348 // Determine a normalized form of this expression, which is the expression 1349 // before any post-inc adjustment is made. 1350 const SCEVAddRecExpr *Normalized = S; 1351 if (PostIncLoops.count(L)) { 1352 PostIncLoopSet Loops; 1353 Loops.insert(L); 1354 Normalized = cast<SCEVAddRecExpr>(normalizeForPostIncUse(S, Loops, SE)); 1355 } 1356 1357 // Strip off any non-loop-dominating component from the addrec start. 1358 const SCEV *Start = Normalized->getStart(); 1359 const SCEV *PostLoopOffset = nullptr; 1360 if (!SE.properlyDominates(Start, L->getHeader())) { 1361 PostLoopOffset = Start; 1362 Start = SE.getConstant(Normalized->getType(), 0); 1363 Normalized = cast<SCEVAddRecExpr>( 1364 SE.getAddRecExpr(Start, Normalized->getStepRecurrence(SE), 1365 Normalized->getLoop(), 1366 Normalized->getNoWrapFlags(SCEV::FlagNW))); 1367 } 1368 1369 // Strip off any non-loop-dominating component from the addrec step. 1370 const SCEV *Step = Normalized->getStepRecurrence(SE); 1371 const SCEV *PostLoopScale = nullptr; 1372 if (!SE.dominates(Step, L->getHeader())) { 1373 PostLoopScale = Step; 1374 Step = SE.getConstant(Normalized->getType(), 1); 1375 if (!Start->isZero()) { 1376 // The normalization below assumes that Start is constant zero, so if 1377 // it isn't re-associate Start to PostLoopOffset. 1378 assert(!PostLoopOffset && "Start not-null but PostLoopOffset set?"); 1379 PostLoopOffset = Start; 1380 Start = SE.getConstant(Normalized->getType(), 0); 1381 } 1382 Normalized = 1383 cast<SCEVAddRecExpr>(SE.getAddRecExpr( 1384 Start, Step, Normalized->getLoop(), 1385 Normalized->getNoWrapFlags(SCEV::FlagNW))); 1386 } 1387 1388 // Expand the core addrec. If we need post-loop scaling, force it to 1389 // expand to an integer type to avoid the need for additional casting. 1390 Type *ExpandTy = PostLoopScale ? IntTy : STy; 1391 // We can't use a pointer type for the addrec if the pointer type is 1392 // non-integral. 1393 Type *AddRecPHIExpandTy = 1394 DL.isNonIntegralPointerType(STy) ? Normalized->getType() : ExpandTy; 1395 1396 // In some cases, we decide to reuse an existing phi node but need to truncate 1397 // it and/or invert the step. 1398 Type *TruncTy = nullptr; 1399 bool InvertStep = false; 1400 PHINode *PN = getAddRecExprPHILiterally(Normalized, L, AddRecPHIExpandTy, 1401 IntTy, TruncTy, InvertStep); 1402 1403 // Accommodate post-inc mode, if necessary. 1404 Value *Result; 1405 if (!PostIncLoops.count(L)) 1406 Result = PN; 1407 else { 1408 // In PostInc mode, use the post-incremented value. 1409 BasicBlock *LatchBlock = L->getLoopLatch(); 1410 assert(LatchBlock && "PostInc mode requires a unique loop latch!"); 1411 Result = PN->getIncomingValueForBlock(LatchBlock); 1412 1413 // For an expansion to use the postinc form, the client must call 1414 // expandCodeFor with an InsertPoint that is either outside the PostIncLoop 1415 // or dominated by IVIncInsertPos. 1416 if (isa<Instruction>(Result) && 1417 !SE.DT.dominates(cast<Instruction>(Result), 1418 &*Builder.GetInsertPoint())) { 1419 // The induction variable's postinc expansion does not dominate this use. 1420 // IVUsers tries to prevent this case, so it is rare. However, it can 1421 // happen when an IVUser outside the loop is not dominated by the latch 1422 // block. Adjusting IVIncInsertPos before expansion begins cannot handle 1423 // all cases. Consider a phi outside whose operand is replaced during 1424 // expansion with the value of the postinc user. Without fundamentally 1425 // changing the way postinc users are tracked, the only remedy is 1426 // inserting an extra IV increment. StepV might fold into PostLoopOffset, 1427 // but hopefully expandCodeFor handles that. 1428 bool useSubtract = 1429 !ExpandTy->isPointerTy() && Step->isNonConstantNegative(); 1430 if (useSubtract) 1431 Step = SE.getNegativeSCEV(Step); 1432 Value *StepV; 1433 { 1434 // Expand the step somewhere that dominates the loop header. 1435 SCEVInsertPointGuard Guard(Builder, this); 1436 StepV = expandCodeForImpl( 1437 Step, IntTy, &*L->getHeader()->getFirstInsertionPt(), false); 1438 } 1439 Result = expandIVInc(PN, StepV, L, ExpandTy, IntTy, useSubtract); 1440 } 1441 } 1442 1443 // We have decided to reuse an induction variable of a dominating loop. Apply 1444 // truncation and/or inversion of the step. 1445 if (TruncTy) { 1446 Type *ResTy = Result->getType(); 1447 // Normalize the result type. 1448 if (ResTy != SE.getEffectiveSCEVType(ResTy)) 1449 Result = InsertNoopCastOfTo(Result, SE.getEffectiveSCEVType(ResTy)); 1450 // Truncate the result. 1451 if (TruncTy != Result->getType()) 1452 Result = Builder.CreateTrunc(Result, TruncTy); 1453 1454 // Invert the result. 1455 if (InvertStep) 1456 Result = Builder.CreateSub( 1457 expandCodeForImpl(Normalized->getStart(), TruncTy, false), Result); 1458 } 1459 1460 // Re-apply any non-loop-dominating scale. 1461 if (PostLoopScale) { 1462 assert(S->isAffine() && "Can't linearly scale non-affine recurrences."); 1463 Result = InsertNoopCastOfTo(Result, IntTy); 1464 Result = Builder.CreateMul(Result, 1465 expandCodeForImpl(PostLoopScale, IntTy, false)); 1466 } 1467 1468 // Re-apply any non-loop-dominating offset. 1469 if (PostLoopOffset) { 1470 if (PointerType *PTy = dyn_cast<PointerType>(ExpandTy)) { 1471 if (Result->getType()->isIntegerTy()) { 1472 Value *Base = expandCodeForImpl(PostLoopOffset, ExpandTy, false); 1473 Result = expandAddToGEP(SE.getUnknown(Result), PTy, IntTy, Base); 1474 } else { 1475 Result = expandAddToGEP(PostLoopOffset, PTy, IntTy, Result); 1476 } 1477 } else { 1478 Result = InsertNoopCastOfTo(Result, IntTy); 1479 Result = Builder.CreateAdd( 1480 Result, expandCodeForImpl(PostLoopOffset, IntTy, false)); 1481 } 1482 } 1483 1484 return Result; 1485 } 1486 1487 Value *SCEVExpander::visitAddRecExpr(const SCEVAddRecExpr *S) { 1488 // In canonical mode we compute the addrec as an expression of a canonical IV 1489 // using evaluateAtIteration and expand the resulting SCEV expression. This 1490 // way we avoid introducing new IVs to carry on the comutation of the addrec 1491 // throughout the loop. 1492 // 1493 // For nested addrecs evaluateAtIteration might need a canonical IV of a 1494 // type wider than the addrec itself. Emitting a canonical IV of the 1495 // proper type might produce non-legal types, for example expanding an i64 1496 // {0,+,2,+,1} addrec would need an i65 canonical IV. To avoid this just fall 1497 // back to non-canonical mode for nested addrecs. 1498 if (!CanonicalMode || (S->getNumOperands() > 2)) 1499 return expandAddRecExprLiterally(S); 1500 1501 Type *Ty = SE.getEffectiveSCEVType(S->getType()); 1502 const Loop *L = S->getLoop(); 1503 1504 // First check for an existing canonical IV in a suitable type. 1505 PHINode *CanonicalIV = nullptr; 1506 if (PHINode *PN = L->getCanonicalInductionVariable()) 1507 if (SE.getTypeSizeInBits(PN->getType()) >= SE.getTypeSizeInBits(Ty)) 1508 CanonicalIV = PN; 1509 1510 // Rewrite an AddRec in terms of the canonical induction variable, if 1511 // its type is more narrow. 1512 if (CanonicalIV && 1513 SE.getTypeSizeInBits(CanonicalIV->getType()) > 1514 SE.getTypeSizeInBits(Ty)) { 1515 SmallVector<const SCEV *, 4> NewOps(S->getNumOperands()); 1516 for (unsigned i = 0, e = S->getNumOperands(); i != e; ++i) 1517 NewOps[i] = SE.getAnyExtendExpr(S->op_begin()[i], CanonicalIV->getType()); 1518 Value *V = expand(SE.getAddRecExpr(NewOps, S->getLoop(), 1519 S->getNoWrapFlags(SCEV::FlagNW))); 1520 BasicBlock::iterator NewInsertPt = 1521 findInsertPointAfter(cast<Instruction>(V), Builder.GetInsertBlock()); 1522 V = expandCodeForImpl(SE.getTruncateExpr(SE.getUnknown(V), Ty), nullptr, 1523 &*NewInsertPt, false); 1524 return V; 1525 } 1526 1527 // {X,+,F} --> X + {0,+,F} 1528 if (!S->getStart()->isZero()) { 1529 SmallVector<const SCEV *, 4> NewOps(S->op_begin(), S->op_end()); 1530 NewOps[0] = SE.getConstant(Ty, 0); 1531 const SCEV *Rest = SE.getAddRecExpr(NewOps, L, 1532 S->getNoWrapFlags(SCEV::FlagNW)); 1533 1534 // Turn things like ptrtoint+arithmetic+inttoptr into GEP. See the 1535 // comments on expandAddToGEP for details. 1536 const SCEV *Base = S->getStart(); 1537 // Dig into the expression to find the pointer base for a GEP. 1538 const SCEV *ExposedRest = Rest; 1539 ExposePointerBase(Base, ExposedRest, SE); 1540 // If we found a pointer, expand the AddRec with a GEP. 1541 if (PointerType *PTy = dyn_cast<PointerType>(Base->getType())) { 1542 // Make sure the Base isn't something exotic, such as a multiplied 1543 // or divided pointer value. In those cases, the result type isn't 1544 // actually a pointer type. 1545 if (!isa<SCEVMulExpr>(Base) && !isa<SCEVUDivExpr>(Base)) { 1546 Value *StartV = expand(Base); 1547 assert(StartV->getType() == PTy && "Pointer type mismatch for GEP!"); 1548 return expandAddToGEP(ExposedRest, PTy, Ty, StartV); 1549 } 1550 } 1551 1552 // Just do a normal add. Pre-expand the operands to suppress folding. 1553 // 1554 // The LHS and RHS values are factored out of the expand call to make the 1555 // output independent of the argument evaluation order. 1556 const SCEV *AddExprLHS = SE.getUnknown(expand(S->getStart())); 1557 const SCEV *AddExprRHS = SE.getUnknown(expand(Rest)); 1558 return expand(SE.getAddExpr(AddExprLHS, AddExprRHS)); 1559 } 1560 1561 // If we don't yet have a canonical IV, create one. 1562 if (!CanonicalIV) { 1563 // Create and insert the PHI node for the induction variable in the 1564 // specified loop. 1565 BasicBlock *Header = L->getHeader(); 1566 pred_iterator HPB = pred_begin(Header), HPE = pred_end(Header); 1567 CanonicalIV = PHINode::Create(Ty, std::distance(HPB, HPE), "indvar", 1568 &Header->front()); 1569 rememberInstruction(CanonicalIV); 1570 1571 SmallSet<BasicBlock *, 4> PredSeen; 1572 Constant *One = ConstantInt::get(Ty, 1); 1573 for (pred_iterator HPI = HPB; HPI != HPE; ++HPI) { 1574 BasicBlock *HP = *HPI; 1575 if (!PredSeen.insert(HP).second) { 1576 // There must be an incoming value for each predecessor, even the 1577 // duplicates! 1578 CanonicalIV->addIncoming(CanonicalIV->getIncomingValueForBlock(HP), HP); 1579 continue; 1580 } 1581 1582 if (L->contains(HP)) { 1583 // Insert a unit add instruction right before the terminator 1584 // corresponding to the back-edge. 1585 Instruction *Add = BinaryOperator::CreateAdd(CanonicalIV, One, 1586 "indvar.next", 1587 HP->getTerminator()); 1588 Add->setDebugLoc(HP->getTerminator()->getDebugLoc()); 1589 rememberInstruction(Add); 1590 CanonicalIV->addIncoming(Add, HP); 1591 } else { 1592 CanonicalIV->addIncoming(Constant::getNullValue(Ty), HP); 1593 } 1594 } 1595 } 1596 1597 // {0,+,1} --> Insert a canonical induction variable into the loop! 1598 if (S->isAffine() && S->getOperand(1)->isOne()) { 1599 assert(Ty == SE.getEffectiveSCEVType(CanonicalIV->getType()) && 1600 "IVs with types different from the canonical IV should " 1601 "already have been handled!"); 1602 return CanonicalIV; 1603 } 1604 1605 // {0,+,F} --> {0,+,1} * F 1606 1607 // If this is a simple linear addrec, emit it now as a special case. 1608 if (S->isAffine()) // {0,+,F} --> i*F 1609 return 1610 expand(SE.getTruncateOrNoop( 1611 SE.getMulExpr(SE.getUnknown(CanonicalIV), 1612 SE.getNoopOrAnyExtend(S->getOperand(1), 1613 CanonicalIV->getType())), 1614 Ty)); 1615 1616 // If this is a chain of recurrences, turn it into a closed form, using the 1617 // folders, then expandCodeFor the closed form. This allows the folders to 1618 // simplify the expression without having to build a bunch of special code 1619 // into this folder. 1620 const SCEV *IH = SE.getUnknown(CanonicalIV); // Get I as a "symbolic" SCEV. 1621 1622 // Promote S up to the canonical IV type, if the cast is foldable. 1623 const SCEV *NewS = S; 1624 const SCEV *Ext = SE.getNoopOrAnyExtend(S, CanonicalIV->getType()); 1625 if (isa<SCEVAddRecExpr>(Ext)) 1626 NewS = Ext; 1627 1628 const SCEV *V = cast<SCEVAddRecExpr>(NewS)->evaluateAtIteration(IH, SE); 1629 //cerr << "Evaluated: " << *this << "\n to: " << *V << "\n"; 1630 1631 // Truncate the result down to the original type, if needed. 1632 const SCEV *T = SE.getTruncateOrNoop(V, Ty); 1633 return expand(T); 1634 } 1635 1636 Value *SCEVExpander::visitTruncateExpr(const SCEVTruncateExpr *S) { 1637 Type *Ty = SE.getEffectiveSCEVType(S->getType()); 1638 Value *V = expandCodeForImpl( 1639 S->getOperand(), SE.getEffectiveSCEVType(S->getOperand()->getType()), 1640 false); 1641 return Builder.CreateTrunc(V, Ty); 1642 } 1643 1644 Value *SCEVExpander::visitZeroExtendExpr(const SCEVZeroExtendExpr *S) { 1645 Type *Ty = SE.getEffectiveSCEVType(S->getType()); 1646 Value *V = expandCodeForImpl( 1647 S->getOperand(), SE.getEffectiveSCEVType(S->getOperand()->getType()), 1648 false); 1649 return Builder.CreateZExt(V, Ty); 1650 } 1651 1652 Value *SCEVExpander::visitSignExtendExpr(const SCEVSignExtendExpr *S) { 1653 Type *Ty = SE.getEffectiveSCEVType(S->getType()); 1654 Value *V = expandCodeForImpl( 1655 S->getOperand(), SE.getEffectiveSCEVType(S->getOperand()->getType()), 1656 false); 1657 return Builder.CreateSExt(V, Ty); 1658 } 1659 1660 Value *SCEVExpander::visitSMaxExpr(const SCEVSMaxExpr *S) { 1661 Value *LHS = expand(S->getOperand(S->getNumOperands()-1)); 1662 Type *Ty = LHS->getType(); 1663 for (int i = S->getNumOperands()-2; i >= 0; --i) { 1664 // In the case of mixed integer and pointer types, do the 1665 // rest of the comparisons as integer. 1666 Type *OpTy = S->getOperand(i)->getType(); 1667 if (OpTy->isIntegerTy() != Ty->isIntegerTy()) { 1668 Ty = SE.getEffectiveSCEVType(Ty); 1669 LHS = InsertNoopCastOfTo(LHS, Ty); 1670 } 1671 Value *RHS = expandCodeForImpl(S->getOperand(i), Ty, false); 1672 Value *ICmp = Builder.CreateICmpSGT(LHS, RHS); 1673 Value *Sel = Builder.CreateSelect(ICmp, LHS, RHS, "smax"); 1674 LHS = Sel; 1675 } 1676 // In the case of mixed integer and pointer types, cast the 1677 // final result back to the pointer type. 1678 if (LHS->getType() != S->getType()) 1679 LHS = InsertNoopCastOfTo(LHS, S->getType()); 1680 return LHS; 1681 } 1682 1683 Value *SCEVExpander::visitUMaxExpr(const SCEVUMaxExpr *S) { 1684 Value *LHS = expand(S->getOperand(S->getNumOperands()-1)); 1685 Type *Ty = LHS->getType(); 1686 for (int i = S->getNumOperands()-2; i >= 0; --i) { 1687 // In the case of mixed integer and pointer types, do the 1688 // rest of the comparisons as integer. 1689 Type *OpTy = S->getOperand(i)->getType(); 1690 if (OpTy->isIntegerTy() != Ty->isIntegerTy()) { 1691 Ty = SE.getEffectiveSCEVType(Ty); 1692 LHS = InsertNoopCastOfTo(LHS, Ty); 1693 } 1694 Value *RHS = expandCodeForImpl(S->getOperand(i), Ty, false); 1695 Value *ICmp = Builder.CreateICmpUGT(LHS, RHS); 1696 Value *Sel = Builder.CreateSelect(ICmp, LHS, RHS, "umax"); 1697 LHS = Sel; 1698 } 1699 // In the case of mixed integer and pointer types, cast the 1700 // final result back to the pointer type. 1701 if (LHS->getType() != S->getType()) 1702 LHS = InsertNoopCastOfTo(LHS, S->getType()); 1703 return LHS; 1704 } 1705 1706 Value *SCEVExpander::visitSMinExpr(const SCEVSMinExpr *S) { 1707 Value *LHS = expand(S->getOperand(S->getNumOperands() - 1)); 1708 Type *Ty = LHS->getType(); 1709 for (int i = S->getNumOperands() - 2; i >= 0; --i) { 1710 // In the case of mixed integer and pointer types, do the 1711 // rest of the comparisons as integer. 1712 Type *OpTy = S->getOperand(i)->getType(); 1713 if (OpTy->isIntegerTy() != Ty->isIntegerTy()) { 1714 Ty = SE.getEffectiveSCEVType(Ty); 1715 LHS = InsertNoopCastOfTo(LHS, Ty); 1716 } 1717 Value *RHS = expandCodeForImpl(S->getOperand(i), Ty, false); 1718 Value *ICmp = Builder.CreateICmpSLT(LHS, RHS); 1719 Value *Sel = Builder.CreateSelect(ICmp, LHS, RHS, "smin"); 1720 LHS = Sel; 1721 } 1722 // In the case of mixed integer and pointer types, cast the 1723 // final result back to the pointer type. 1724 if (LHS->getType() != S->getType()) 1725 LHS = InsertNoopCastOfTo(LHS, S->getType()); 1726 return LHS; 1727 } 1728 1729 Value *SCEVExpander::visitUMinExpr(const SCEVUMinExpr *S) { 1730 Value *LHS = expand(S->getOperand(S->getNumOperands() - 1)); 1731 Type *Ty = LHS->getType(); 1732 for (int i = S->getNumOperands() - 2; i >= 0; --i) { 1733 // In the case of mixed integer and pointer types, do the 1734 // rest of the comparisons as integer. 1735 Type *OpTy = S->getOperand(i)->getType(); 1736 if (OpTy->isIntegerTy() != Ty->isIntegerTy()) { 1737 Ty = SE.getEffectiveSCEVType(Ty); 1738 LHS = InsertNoopCastOfTo(LHS, Ty); 1739 } 1740 Value *RHS = expandCodeForImpl(S->getOperand(i), Ty, false); 1741 Value *ICmp = Builder.CreateICmpULT(LHS, RHS); 1742 Value *Sel = Builder.CreateSelect(ICmp, LHS, RHS, "umin"); 1743 LHS = Sel; 1744 } 1745 // In the case of mixed integer and pointer types, cast the 1746 // final result back to the pointer type. 1747 if (LHS->getType() != S->getType()) 1748 LHS = InsertNoopCastOfTo(LHS, S->getType()); 1749 return LHS; 1750 } 1751 1752 Value *SCEVExpander::expandCodeForImpl(const SCEV *SH, Type *Ty, 1753 Instruction *IP, bool Root) { 1754 setInsertPoint(IP); 1755 Value *V = expandCodeForImpl(SH, Ty, Root); 1756 return V; 1757 } 1758 1759 Value *SCEVExpander::expandCodeForImpl(const SCEV *SH, Type *Ty, bool Root) { 1760 // Expand the code for this SCEV. 1761 Value *V = expand(SH); 1762 1763 if (PreserveLCSSA) { 1764 if (auto *Inst = dyn_cast<Instruction>(V)) { 1765 // Create a temporary instruction to at the current insertion point, so we 1766 // can hand it off to the helper to create LCSSA PHIs if required for the 1767 // new use. 1768 // FIXME: Ideally formLCSSAForInstructions (used in fixupLCSSAFormFor) 1769 // would accept a insertion point and return an LCSSA phi for that 1770 // insertion point, so there is no need to insert & remove the temporary 1771 // instruction. 1772 Instruction *Tmp; 1773 if (Inst->getType()->isIntegerTy()) 1774 Tmp = 1775 cast<Instruction>(Builder.CreateAdd(Inst, Inst, "tmp.lcssa.user")); 1776 else { 1777 assert(Inst->getType()->isPointerTy()); 1778 Tmp = cast<Instruction>( 1779 Builder.CreateGEP(Inst, Builder.getInt32(1), "tmp.lcssa.user")); 1780 } 1781 V = fixupLCSSAFormFor(Tmp, 0); 1782 1783 // Clean up temporary instruction. 1784 InsertedValues.erase(Tmp); 1785 InsertedPostIncValues.erase(Tmp); 1786 Tmp->eraseFromParent(); 1787 } 1788 } 1789 1790 InsertedExpressions[std::make_pair(SH, &*Builder.GetInsertPoint())] = V; 1791 if (Ty) { 1792 assert(SE.getTypeSizeInBits(Ty) == SE.getTypeSizeInBits(SH->getType()) && 1793 "non-trivial casts should be done with the SCEVs directly!"); 1794 V = InsertNoopCastOfTo(V, Ty); 1795 } 1796 return V; 1797 } 1798 1799 ScalarEvolution::ValueOffsetPair 1800 SCEVExpander::FindValueInExprValueMap(const SCEV *S, 1801 const Instruction *InsertPt) { 1802 SetVector<ScalarEvolution::ValueOffsetPair> *Set = SE.getSCEVValues(S); 1803 // If the expansion is not in CanonicalMode, and the SCEV contains any 1804 // sub scAddRecExpr type SCEV, it is required to expand the SCEV literally. 1805 if (CanonicalMode || !SE.containsAddRecurrence(S)) { 1806 // If S is scConstant, it may be worse to reuse an existing Value. 1807 if (S->getSCEVType() != scConstant && Set) { 1808 // Choose a Value from the set which dominates the insertPt. 1809 // insertPt should be inside the Value's parent loop so as not to break 1810 // the LCSSA form. 1811 for (auto const &VOPair : *Set) { 1812 Value *V = VOPair.first; 1813 ConstantInt *Offset = VOPair.second; 1814 Instruction *EntInst = nullptr; 1815 if (V && isa<Instruction>(V) && (EntInst = cast<Instruction>(V)) && 1816 S->getType() == V->getType() && 1817 EntInst->getFunction() == InsertPt->getFunction() && 1818 SE.DT.dominates(EntInst, InsertPt) && 1819 (SE.LI.getLoopFor(EntInst->getParent()) == nullptr || 1820 SE.LI.getLoopFor(EntInst->getParent())->contains(InsertPt))) 1821 return {V, Offset}; 1822 } 1823 } 1824 } 1825 return {nullptr, nullptr}; 1826 } 1827 1828 // The expansion of SCEV will either reuse a previous Value in ExprValueMap, 1829 // or expand the SCEV literally. Specifically, if the expansion is in LSRMode, 1830 // and the SCEV contains any sub scAddRecExpr type SCEV, it will be expanded 1831 // literally, to prevent LSR's transformed SCEV from being reverted. Otherwise, 1832 // the expansion will try to reuse Value from ExprValueMap, and only when it 1833 // fails, expand the SCEV literally. 1834 Value *SCEVExpander::expand(const SCEV *S) { 1835 // Compute an insertion point for this SCEV object. Hoist the instructions 1836 // as far out in the loop nest as possible. 1837 Instruction *InsertPt = &*Builder.GetInsertPoint(); 1838 1839 // We can move insertion point only if there is no div or rem operations 1840 // otherwise we are risky to move it over the check for zero denominator. 1841 auto SafeToHoist = [](const SCEV *S) { 1842 return !SCEVExprContains(S, [](const SCEV *S) { 1843 if (const auto *D = dyn_cast<SCEVUDivExpr>(S)) { 1844 if (const auto *SC = dyn_cast<SCEVConstant>(D->getRHS())) 1845 // Division by non-zero constants can be hoisted. 1846 return SC->getValue()->isZero(); 1847 // All other divisions should not be moved as they may be 1848 // divisions by zero and should be kept within the 1849 // conditions of the surrounding loops that guard their 1850 // execution (see PR35406). 1851 return true; 1852 } 1853 return false; 1854 }); 1855 }; 1856 if (SafeToHoist(S)) { 1857 for (Loop *L = SE.LI.getLoopFor(Builder.GetInsertBlock());; 1858 L = L->getParentLoop()) { 1859 if (SE.isLoopInvariant(S, L)) { 1860 if (!L) break; 1861 if (BasicBlock *Preheader = L->getLoopPreheader()) 1862 InsertPt = Preheader->getTerminator(); 1863 else 1864 // LSR sets the insertion point for AddRec start/step values to the 1865 // block start to simplify value reuse, even though it's an invalid 1866 // position. SCEVExpander must correct for this in all cases. 1867 InsertPt = &*L->getHeader()->getFirstInsertionPt(); 1868 } else { 1869 // If the SCEV is computable at this level, insert it into the header 1870 // after the PHIs (and after any other instructions that we've inserted 1871 // there) so that it is guaranteed to dominate any user inside the loop. 1872 if (L && SE.hasComputableLoopEvolution(S, L) && !PostIncLoops.count(L)) 1873 InsertPt = &*L->getHeader()->getFirstInsertionPt(); 1874 while (InsertPt->getIterator() != Builder.GetInsertPoint() && 1875 (isInsertedInstruction(InsertPt) || 1876 isa<DbgInfoIntrinsic>(InsertPt))) 1877 InsertPt = &*std::next(InsertPt->getIterator()); 1878 break; 1879 } 1880 } 1881 } 1882 1883 // Check to see if we already expanded this here. 1884 auto I = InsertedExpressions.find(std::make_pair(S, InsertPt)); 1885 if (I != InsertedExpressions.end()) 1886 return I->second; 1887 1888 SCEVInsertPointGuard Guard(Builder, this); 1889 Builder.SetInsertPoint(InsertPt); 1890 1891 // Expand the expression into instructions. 1892 ScalarEvolution::ValueOffsetPair VO = FindValueInExprValueMap(S, InsertPt); 1893 Value *V = VO.first; 1894 1895 if (!V) 1896 V = visit(S); 1897 else if (VO.second) { 1898 if (PointerType *Vty = dyn_cast<PointerType>(V->getType())) { 1899 Type *Ety = Vty->getPointerElementType(); 1900 int64_t Offset = VO.second->getSExtValue(); 1901 int64_t ESize = SE.getTypeSizeInBits(Ety); 1902 if ((Offset * 8) % ESize == 0) { 1903 ConstantInt *Idx = 1904 ConstantInt::getSigned(VO.second->getType(), -(Offset * 8) / ESize); 1905 V = Builder.CreateGEP(Ety, V, Idx, "scevgep"); 1906 } else { 1907 ConstantInt *Idx = 1908 ConstantInt::getSigned(VO.second->getType(), -Offset); 1909 unsigned AS = Vty->getAddressSpace(); 1910 V = Builder.CreateBitCast(V, Type::getInt8PtrTy(SE.getContext(), AS)); 1911 V = Builder.CreateGEP(Type::getInt8Ty(SE.getContext()), V, Idx, 1912 "uglygep"); 1913 V = Builder.CreateBitCast(V, Vty); 1914 } 1915 } else { 1916 V = Builder.CreateSub(V, VO.second); 1917 } 1918 } 1919 // Remember the expanded value for this SCEV at this location. 1920 // 1921 // This is independent of PostIncLoops. The mapped value simply materializes 1922 // the expression at this insertion point. If the mapped value happened to be 1923 // a postinc expansion, it could be reused by a non-postinc user, but only if 1924 // its insertion point was already at the head of the loop. 1925 InsertedExpressions[std::make_pair(S, InsertPt)] = V; 1926 return V; 1927 } 1928 1929 void SCEVExpander::rememberInstruction(Value *I) { 1930 auto DoInsert = [this](Value *V) { 1931 if (!PostIncLoops.empty()) 1932 InsertedPostIncValues.insert(V); 1933 else 1934 InsertedValues.insert(V); 1935 }; 1936 DoInsert(I); 1937 1938 if (!PreserveLCSSA) 1939 return; 1940 1941 if (auto *Inst = dyn_cast<Instruction>(I)) { 1942 // A new instruction has been added, which might introduce new uses outside 1943 // a defining loop. Fix LCSSA from for each operand of the new instruction, 1944 // if required. 1945 for (unsigned OpIdx = 0, OpEnd = Inst->getNumOperands(); OpIdx != OpEnd; 1946 OpIdx++) { 1947 auto *V = fixupLCSSAFormFor(Inst, OpIdx); 1948 if (V != I) 1949 DoInsert(V); 1950 } 1951 } 1952 } 1953 1954 /// getOrInsertCanonicalInductionVariable - This method returns the 1955 /// canonical induction variable of the specified type for the specified 1956 /// loop (inserting one if there is none). A canonical induction variable 1957 /// starts at zero and steps by one on each iteration. 1958 PHINode * 1959 SCEVExpander::getOrInsertCanonicalInductionVariable(const Loop *L, 1960 Type *Ty) { 1961 assert(Ty->isIntegerTy() && "Can only insert integer induction variables!"); 1962 1963 // Build a SCEV for {0,+,1}<L>. 1964 // Conservatively use FlagAnyWrap for now. 1965 const SCEV *H = SE.getAddRecExpr(SE.getConstant(Ty, 0), 1966 SE.getConstant(Ty, 1), L, SCEV::FlagAnyWrap); 1967 1968 // Emit code for it. 1969 SCEVInsertPointGuard Guard(Builder, this); 1970 PHINode *V = cast<PHINode>(expandCodeForImpl( 1971 H, nullptr, &*L->getHeader()->getFirstInsertionPt(), false)); 1972 1973 return V; 1974 } 1975 1976 /// replaceCongruentIVs - Check for congruent phis in this loop header and 1977 /// replace them with their most canonical representative. Return the number of 1978 /// phis eliminated. 1979 /// 1980 /// This does not depend on any SCEVExpander state but should be used in 1981 /// the same context that SCEVExpander is used. 1982 unsigned 1983 SCEVExpander::replaceCongruentIVs(Loop *L, const DominatorTree *DT, 1984 SmallVectorImpl<WeakTrackingVH> &DeadInsts, 1985 const TargetTransformInfo *TTI) { 1986 // Find integer phis in order of increasing width. 1987 SmallVector<PHINode*, 8> Phis; 1988 for (PHINode &PN : L->getHeader()->phis()) 1989 Phis.push_back(&PN); 1990 1991 if (TTI) 1992 llvm::sort(Phis, [](Value *LHS, Value *RHS) { 1993 // Put pointers at the back and make sure pointer < pointer = false. 1994 if (!LHS->getType()->isIntegerTy() || !RHS->getType()->isIntegerTy()) 1995 return RHS->getType()->isIntegerTy() && !LHS->getType()->isIntegerTy(); 1996 return RHS->getType()->getPrimitiveSizeInBits() < 1997 LHS->getType()->getPrimitiveSizeInBits(); 1998 }); 1999 2000 unsigned NumElim = 0; 2001 DenseMap<const SCEV *, PHINode *> ExprToIVMap; 2002 // Process phis from wide to narrow. Map wide phis to their truncation 2003 // so narrow phis can reuse them. 2004 for (PHINode *Phi : Phis) { 2005 auto SimplifyPHINode = [&](PHINode *PN) -> Value * { 2006 if (Value *V = SimplifyInstruction(PN, {DL, &SE.TLI, &SE.DT, &SE.AC})) 2007 return V; 2008 if (!SE.isSCEVable(PN->getType())) 2009 return nullptr; 2010 auto *Const = dyn_cast<SCEVConstant>(SE.getSCEV(PN)); 2011 if (!Const) 2012 return nullptr; 2013 return Const->getValue(); 2014 }; 2015 2016 // Fold constant phis. They may be congruent to other constant phis and 2017 // would confuse the logic below that expects proper IVs. 2018 if (Value *V = SimplifyPHINode(Phi)) { 2019 if (V->getType() != Phi->getType()) 2020 continue; 2021 Phi->replaceAllUsesWith(V); 2022 DeadInsts.emplace_back(Phi); 2023 ++NumElim; 2024 DEBUG_WITH_TYPE(DebugType, dbgs() 2025 << "INDVARS: Eliminated constant iv: " << *Phi << '\n'); 2026 continue; 2027 } 2028 2029 if (!SE.isSCEVable(Phi->getType())) 2030 continue; 2031 2032 PHINode *&OrigPhiRef = ExprToIVMap[SE.getSCEV(Phi)]; 2033 if (!OrigPhiRef) { 2034 OrigPhiRef = Phi; 2035 if (Phi->getType()->isIntegerTy() && TTI && 2036 TTI->isTruncateFree(Phi->getType(), Phis.back()->getType())) { 2037 // This phi can be freely truncated to the narrowest phi type. Map the 2038 // truncated expression to it so it will be reused for narrow types. 2039 const SCEV *TruncExpr = 2040 SE.getTruncateExpr(SE.getSCEV(Phi), Phis.back()->getType()); 2041 ExprToIVMap[TruncExpr] = Phi; 2042 } 2043 continue; 2044 } 2045 2046 // Replacing a pointer phi with an integer phi or vice-versa doesn't make 2047 // sense. 2048 if (OrigPhiRef->getType()->isPointerTy() != Phi->getType()->isPointerTy()) 2049 continue; 2050 2051 if (BasicBlock *LatchBlock = L->getLoopLatch()) { 2052 Instruction *OrigInc = dyn_cast<Instruction>( 2053 OrigPhiRef->getIncomingValueForBlock(LatchBlock)); 2054 Instruction *IsomorphicInc = 2055 dyn_cast<Instruction>(Phi->getIncomingValueForBlock(LatchBlock)); 2056 2057 if (OrigInc && IsomorphicInc) { 2058 // If this phi has the same width but is more canonical, replace the 2059 // original with it. As part of the "more canonical" determination, 2060 // respect a prior decision to use an IV chain. 2061 if (OrigPhiRef->getType() == Phi->getType() && 2062 !(ChainedPhis.count(Phi) || 2063 isExpandedAddRecExprPHI(OrigPhiRef, OrigInc, L)) && 2064 (ChainedPhis.count(Phi) || 2065 isExpandedAddRecExprPHI(Phi, IsomorphicInc, L))) { 2066 std::swap(OrigPhiRef, Phi); 2067 std::swap(OrigInc, IsomorphicInc); 2068 } 2069 // Replacing the congruent phi is sufficient because acyclic 2070 // redundancy elimination, CSE/GVN, should handle the 2071 // rest. However, once SCEV proves that a phi is congruent, 2072 // it's often the head of an IV user cycle that is isomorphic 2073 // with the original phi. It's worth eagerly cleaning up the 2074 // common case of a single IV increment so that DeleteDeadPHIs 2075 // can remove cycles that had postinc uses. 2076 const SCEV *TruncExpr = 2077 SE.getTruncateOrNoop(SE.getSCEV(OrigInc), IsomorphicInc->getType()); 2078 if (OrigInc != IsomorphicInc && 2079 TruncExpr == SE.getSCEV(IsomorphicInc) && 2080 SE.LI.replacementPreservesLCSSAForm(IsomorphicInc, OrigInc) && 2081 hoistIVInc(OrigInc, IsomorphicInc)) { 2082 DEBUG_WITH_TYPE(DebugType, 2083 dbgs() << "INDVARS: Eliminated congruent iv.inc: " 2084 << *IsomorphicInc << '\n'); 2085 Value *NewInc = OrigInc; 2086 if (OrigInc->getType() != IsomorphicInc->getType()) { 2087 Instruction *IP = nullptr; 2088 if (PHINode *PN = dyn_cast<PHINode>(OrigInc)) 2089 IP = &*PN->getParent()->getFirstInsertionPt(); 2090 else 2091 IP = OrigInc->getNextNode(); 2092 2093 IRBuilder<> Builder(IP); 2094 Builder.SetCurrentDebugLocation(IsomorphicInc->getDebugLoc()); 2095 NewInc = Builder.CreateTruncOrBitCast( 2096 OrigInc, IsomorphicInc->getType(), IVName); 2097 } 2098 IsomorphicInc->replaceAllUsesWith(NewInc); 2099 DeadInsts.emplace_back(IsomorphicInc); 2100 } 2101 } 2102 } 2103 DEBUG_WITH_TYPE(DebugType, dbgs() << "INDVARS: Eliminated congruent iv: " 2104 << *Phi << '\n'); 2105 ++NumElim; 2106 Value *NewIV = OrigPhiRef; 2107 if (OrigPhiRef->getType() != Phi->getType()) { 2108 IRBuilder<> Builder(&*L->getHeader()->getFirstInsertionPt()); 2109 Builder.SetCurrentDebugLocation(Phi->getDebugLoc()); 2110 NewIV = Builder.CreateTruncOrBitCast(OrigPhiRef, Phi->getType(), IVName); 2111 } 2112 Phi->replaceAllUsesWith(NewIV); 2113 DeadInsts.emplace_back(Phi); 2114 } 2115 return NumElim; 2116 } 2117 2118 Value *SCEVExpander::getExactExistingExpansion(const SCEV *S, 2119 const Instruction *At, Loop *L) { 2120 Optional<ScalarEvolution::ValueOffsetPair> VO = 2121 getRelatedExistingExpansion(S, At, L); 2122 if (VO && VO.getValue().second == nullptr) 2123 return VO.getValue().first; 2124 return nullptr; 2125 } 2126 2127 Optional<ScalarEvolution::ValueOffsetPair> 2128 SCEVExpander::getRelatedExistingExpansion(const SCEV *S, const Instruction *At, 2129 Loop *L) { 2130 using namespace llvm::PatternMatch; 2131 2132 SmallVector<BasicBlock *, 4> ExitingBlocks; 2133 L->getExitingBlocks(ExitingBlocks); 2134 2135 // Look for suitable value in simple conditions at the loop exits. 2136 for (BasicBlock *BB : ExitingBlocks) { 2137 ICmpInst::Predicate Pred; 2138 Instruction *LHS, *RHS; 2139 2140 if (!match(BB->getTerminator(), 2141 m_Br(m_ICmp(Pred, m_Instruction(LHS), m_Instruction(RHS)), 2142 m_BasicBlock(), m_BasicBlock()))) 2143 continue; 2144 2145 if (SE.getSCEV(LHS) == S && SE.DT.dominates(LHS, At)) 2146 return ScalarEvolution::ValueOffsetPair(LHS, nullptr); 2147 2148 if (SE.getSCEV(RHS) == S && SE.DT.dominates(RHS, At)) 2149 return ScalarEvolution::ValueOffsetPair(RHS, nullptr); 2150 } 2151 2152 // Use expand's logic which is used for reusing a previous Value in 2153 // ExprValueMap. 2154 ScalarEvolution::ValueOffsetPair VO = FindValueInExprValueMap(S, At); 2155 if (VO.first) 2156 return VO; 2157 2158 // There is potential to make this significantly smarter, but this simple 2159 // heuristic already gets some interesting cases. 2160 2161 // Can not find suitable value. 2162 return None; 2163 } 2164 2165 bool SCEVExpander::isHighCostExpansionHelper( 2166 const SCEV *S, Loop *L, const Instruction &At, int &BudgetRemaining, 2167 const TargetTransformInfo &TTI, SmallPtrSetImpl<const SCEV *> &Processed, 2168 SmallVectorImpl<const SCEV *> &Worklist) { 2169 if (BudgetRemaining < 0) 2170 return true; // Already run out of budget, give up. 2171 2172 // Was the cost of expansion of this expression already accounted for? 2173 if (!Processed.insert(S).second) 2174 return false; // We have already accounted for this expression. 2175 2176 // If we can find an existing value for this scev available at the point "At" 2177 // then consider the expression cheap. 2178 if (getRelatedExistingExpansion(S, &At, L)) 2179 return false; // Consider the expression to be free. 2180 2181 switch (S->getSCEVType()) { 2182 case scUnknown: 2183 case scConstant: 2184 return false; // Assume to be zero-cost. 2185 } 2186 2187 TargetTransformInfo::TargetCostKind CostKind = 2188 TargetTransformInfo::TCK_RecipThroughput; 2189 2190 if (auto *CastExpr = dyn_cast<SCEVCastExpr>(S)) { 2191 unsigned Opcode; 2192 switch (S->getSCEVType()) { 2193 case scTruncate: 2194 Opcode = Instruction::Trunc; 2195 break; 2196 case scZeroExtend: 2197 Opcode = Instruction::ZExt; 2198 break; 2199 case scSignExtend: 2200 Opcode = Instruction::SExt; 2201 break; 2202 default: 2203 llvm_unreachable("There are no other cast types."); 2204 } 2205 const SCEV *Op = CastExpr->getOperand(); 2206 BudgetRemaining -= TTI.getCastInstrCost( 2207 Opcode, /*Dst=*/S->getType(), 2208 /*Src=*/Op->getType(), TTI::CastContextHint::None, CostKind); 2209 Worklist.emplace_back(Op); 2210 return false; // Will answer upon next entry into this function. 2211 } 2212 2213 if (auto *UDivExpr = dyn_cast<SCEVUDivExpr>(S)) { 2214 // If the divisor is a power of two count this as a logical right-shift. 2215 if (auto *SC = dyn_cast<SCEVConstant>(UDivExpr->getRHS())) { 2216 if (SC->getAPInt().isPowerOf2()) { 2217 BudgetRemaining -= 2218 TTI.getArithmeticInstrCost(Instruction::LShr, S->getType(), 2219 CostKind); 2220 // Note that we don't count the cost of RHS, because it is a constant, 2221 // and we consider those to be free. But if that changes, we would need 2222 // to log2() it first before calling isHighCostExpansionHelper(). 2223 Worklist.emplace_back(UDivExpr->getLHS()); 2224 return false; // Will answer upon next entry into this function. 2225 } 2226 } 2227 2228 // UDivExpr is very likely a UDiv that ScalarEvolution's HowFarToZero or 2229 // HowManyLessThans produced to compute a precise expression, rather than a 2230 // UDiv from the user's code. If we can't find a UDiv in the code with some 2231 // simple searching, we need to account for it's cost. 2232 2233 // At the beginning of this function we already tried to find existing 2234 // value for plain 'S'. Now try to lookup 'S + 1' since it is common 2235 // pattern involving division. This is just a simple search heuristic. 2236 if (getRelatedExistingExpansion( 2237 SE.getAddExpr(S, SE.getConstant(S->getType(), 1)), &At, L)) 2238 return false; // Consider it to be free. 2239 2240 // Need to count the cost of this UDiv. 2241 BudgetRemaining -= 2242 TTI.getArithmeticInstrCost(Instruction::UDiv, S->getType(), 2243 CostKind); 2244 Worklist.insert(Worklist.end(), {UDivExpr->getLHS(), UDivExpr->getRHS()}); 2245 return false; // Will answer upon next entry into this function. 2246 } 2247 2248 if (const auto *NAry = dyn_cast<SCEVAddRecExpr>(S)) { 2249 Type *OpType = NAry->getType(); 2250 2251 assert(NAry->getNumOperands() >= 2 && 2252 "Polynomial should be at least linear"); 2253 2254 int AddCost = 2255 TTI.getArithmeticInstrCost(Instruction::Add, OpType, CostKind); 2256 int MulCost = 2257 TTI.getArithmeticInstrCost(Instruction::Mul, OpType, CostKind); 2258 2259 // In this polynominal, we may have some zero operands, and we shouldn't 2260 // really charge for those. So how many non-zero coeffients are there? 2261 int NumTerms = llvm::count_if(NAry->operands(), 2262 [](const SCEV *S) { return !S->isZero(); }); 2263 assert(NumTerms >= 1 && "Polynominal should have at least one term."); 2264 assert(!(*std::prev(NAry->operands().end()))->isZero() && 2265 "Last operand should not be zero"); 2266 2267 // Much like with normal add expr, the polynominal will require 2268 // one less addition than the number of it's terms. 2269 BudgetRemaining -= AddCost * (NumTerms - 1); 2270 if (BudgetRemaining < 0) 2271 return true; 2272 2273 // Ignoring constant term (operand 0), how many of the coeffients are u> 1? 2274 int NumNonZeroDegreeNonOneTerms = 2275 llvm::count_if(make_range(std::next(NAry->op_begin()), NAry->op_end()), 2276 [](const SCEV *S) { 2277 auto *SConst = dyn_cast<SCEVConstant>(S); 2278 return !SConst || SConst->getAPInt().ugt(1); 2279 }); 2280 // Here, *each* one of those will require a multiplication. 2281 BudgetRemaining -= MulCost * NumNonZeroDegreeNonOneTerms; 2282 if (BudgetRemaining < 0) 2283 return true; 2284 2285 // What is the degree of this polynominal? 2286 int PolyDegree = NAry->getNumOperands() - 1; 2287 assert(PolyDegree >= 1 && "Should be at least affine."); 2288 2289 // The final term will be: 2290 // Op_{PolyDegree} * x ^ {PolyDegree} 2291 // Where x ^ {PolyDegree} will again require PolyDegree-1 mul operations. 2292 // Note that x ^ {PolyDegree} = x * x ^ {PolyDegree-1} so charging for 2293 // x ^ {PolyDegree} will give us x ^ {2} .. x ^ {PolyDegree-1} for free. 2294 // FIXME: this is conservatively correct, but might be overly pessimistic. 2295 BudgetRemaining -= MulCost * (PolyDegree - 1); 2296 if (BudgetRemaining < 0) 2297 return true; 2298 2299 // And finally, the operands themselves should fit within the budget. 2300 Worklist.insert(Worklist.end(), NAry->operands().begin(), 2301 NAry->operands().end()); 2302 return false; // So far so good, though ops may be too costly? 2303 } 2304 2305 if (const SCEVNAryExpr *NAry = dyn_cast<SCEVNAryExpr>(S)) { 2306 Type *OpType = NAry->getType(); 2307 2308 int PairCost; 2309 switch (S->getSCEVType()) { 2310 case scAddExpr: 2311 PairCost = 2312 TTI.getArithmeticInstrCost(Instruction::Add, OpType, CostKind); 2313 break; 2314 case scMulExpr: 2315 // TODO: this is a very pessimistic cost modelling for Mul, 2316 // because of Bin Pow algorithm actually used by the expander, 2317 // see SCEVExpander::visitMulExpr(), ExpandOpBinPowN(). 2318 PairCost = 2319 TTI.getArithmeticInstrCost(Instruction::Mul, OpType, CostKind); 2320 break; 2321 case scSMaxExpr: 2322 case scUMaxExpr: 2323 case scSMinExpr: 2324 case scUMinExpr: 2325 PairCost = TTI.getCmpSelInstrCost(Instruction::ICmp, OpType, 2326 CmpInst::makeCmpResultType(OpType), 2327 CostKind) + 2328 TTI.getCmpSelInstrCost(Instruction::Select, OpType, 2329 CmpInst::makeCmpResultType(OpType), 2330 CostKind); 2331 break; 2332 default: 2333 llvm_unreachable("There are no other variants here."); 2334 } 2335 2336 assert(NAry->getNumOperands() > 1 && 2337 "Nary expr should have more than 1 operand."); 2338 // The simple nary expr will require one less op (or pair of ops) 2339 // than the number of it's terms. 2340 BudgetRemaining -= PairCost * (NAry->getNumOperands() - 1); 2341 if (BudgetRemaining < 0) 2342 return true; 2343 2344 // And finally, the operands themselves should fit within the budget. 2345 Worklist.insert(Worklist.end(), NAry->operands().begin(), 2346 NAry->operands().end()); 2347 return false; // So far so good, though ops may be too costly? 2348 } 2349 2350 llvm_unreachable("No other scev expressions possible."); 2351 } 2352 2353 Value *SCEVExpander::expandCodeForPredicate(const SCEVPredicate *Pred, 2354 Instruction *IP) { 2355 assert(IP); 2356 switch (Pred->getKind()) { 2357 case SCEVPredicate::P_Union: 2358 return expandUnionPredicate(cast<SCEVUnionPredicate>(Pred), IP); 2359 case SCEVPredicate::P_Equal: 2360 return expandEqualPredicate(cast<SCEVEqualPredicate>(Pred), IP); 2361 case SCEVPredicate::P_Wrap: { 2362 auto *AddRecPred = cast<SCEVWrapPredicate>(Pred); 2363 return expandWrapPredicate(AddRecPred, IP); 2364 } 2365 } 2366 llvm_unreachable("Unknown SCEV predicate type"); 2367 } 2368 2369 Value *SCEVExpander::expandEqualPredicate(const SCEVEqualPredicate *Pred, 2370 Instruction *IP) { 2371 Value *Expr0 = 2372 expandCodeForImpl(Pred->getLHS(), Pred->getLHS()->getType(), IP, false); 2373 Value *Expr1 = 2374 expandCodeForImpl(Pred->getRHS(), Pred->getRHS()->getType(), IP, false); 2375 2376 Builder.SetInsertPoint(IP); 2377 auto *I = Builder.CreateICmpNE(Expr0, Expr1, "ident.check"); 2378 return I; 2379 } 2380 2381 Value *SCEVExpander::generateOverflowCheck(const SCEVAddRecExpr *AR, 2382 Instruction *Loc, bool Signed) { 2383 assert(AR->isAffine() && "Cannot generate RT check for " 2384 "non-affine expression"); 2385 2386 SCEVUnionPredicate Pred; 2387 const SCEV *ExitCount = 2388 SE.getPredicatedBackedgeTakenCount(AR->getLoop(), Pred); 2389 2390 assert(ExitCount != SE.getCouldNotCompute() && "Invalid loop count"); 2391 2392 const SCEV *Step = AR->getStepRecurrence(SE); 2393 const SCEV *Start = AR->getStart(); 2394 2395 Type *ARTy = AR->getType(); 2396 unsigned SrcBits = SE.getTypeSizeInBits(ExitCount->getType()); 2397 unsigned DstBits = SE.getTypeSizeInBits(ARTy); 2398 2399 // The expression {Start,+,Step} has nusw/nssw if 2400 // Step < 0, Start - |Step| * Backedge <= Start 2401 // Step >= 0, Start + |Step| * Backedge > Start 2402 // and |Step| * Backedge doesn't unsigned overflow. 2403 2404 IntegerType *CountTy = IntegerType::get(Loc->getContext(), SrcBits); 2405 Builder.SetInsertPoint(Loc); 2406 Value *TripCountVal = expandCodeForImpl(ExitCount, CountTy, Loc, false); 2407 2408 IntegerType *Ty = 2409 IntegerType::get(Loc->getContext(), SE.getTypeSizeInBits(ARTy)); 2410 Type *ARExpandTy = DL.isNonIntegralPointerType(ARTy) ? ARTy : Ty; 2411 2412 Value *StepValue = expandCodeForImpl(Step, Ty, Loc, false); 2413 Value *NegStepValue = 2414 expandCodeForImpl(SE.getNegativeSCEV(Step), Ty, Loc, false); 2415 Value *StartValue = expandCodeForImpl(Start, ARExpandTy, Loc, false); 2416 2417 ConstantInt *Zero = 2418 ConstantInt::get(Loc->getContext(), APInt::getNullValue(DstBits)); 2419 2420 Builder.SetInsertPoint(Loc); 2421 // Compute |Step| 2422 Value *StepCompare = Builder.CreateICmp(ICmpInst::ICMP_SLT, StepValue, Zero); 2423 Value *AbsStep = Builder.CreateSelect(StepCompare, NegStepValue, StepValue); 2424 2425 // Get the backedge taken count and truncate or extended to the AR type. 2426 Value *TruncTripCount = Builder.CreateZExtOrTrunc(TripCountVal, Ty); 2427 auto *MulF = Intrinsic::getDeclaration(Loc->getModule(), 2428 Intrinsic::umul_with_overflow, Ty); 2429 2430 // Compute |Step| * Backedge 2431 CallInst *Mul = Builder.CreateCall(MulF, {AbsStep, TruncTripCount}, "mul"); 2432 Value *MulV = Builder.CreateExtractValue(Mul, 0, "mul.result"); 2433 Value *OfMul = Builder.CreateExtractValue(Mul, 1, "mul.overflow"); 2434 2435 // Compute: 2436 // Start + |Step| * Backedge < Start 2437 // Start - |Step| * Backedge > Start 2438 Value *Add = nullptr, *Sub = nullptr; 2439 if (PointerType *ARPtrTy = dyn_cast<PointerType>(ARExpandTy)) { 2440 const SCEV *MulS = SE.getSCEV(MulV); 2441 const SCEV *NegMulS = SE.getNegativeSCEV(MulS); 2442 Add = Builder.CreateBitCast(expandAddToGEP(MulS, ARPtrTy, Ty, StartValue), 2443 ARPtrTy); 2444 Sub = Builder.CreateBitCast( 2445 expandAddToGEP(NegMulS, ARPtrTy, Ty, StartValue), ARPtrTy); 2446 } else { 2447 Add = Builder.CreateAdd(StartValue, MulV); 2448 Sub = Builder.CreateSub(StartValue, MulV); 2449 } 2450 2451 Value *EndCompareGT = Builder.CreateICmp( 2452 Signed ? ICmpInst::ICMP_SGT : ICmpInst::ICMP_UGT, Sub, StartValue); 2453 2454 Value *EndCompareLT = Builder.CreateICmp( 2455 Signed ? ICmpInst::ICMP_SLT : ICmpInst::ICMP_ULT, Add, StartValue); 2456 2457 // Select the answer based on the sign of Step. 2458 Value *EndCheck = 2459 Builder.CreateSelect(StepCompare, EndCompareGT, EndCompareLT); 2460 2461 // If the backedge taken count type is larger than the AR type, 2462 // check that we don't drop any bits by truncating it. If we are 2463 // dropping bits, then we have overflow (unless the step is zero). 2464 if (SE.getTypeSizeInBits(CountTy) > SE.getTypeSizeInBits(Ty)) { 2465 auto MaxVal = APInt::getMaxValue(DstBits).zext(SrcBits); 2466 auto *BackedgeCheck = 2467 Builder.CreateICmp(ICmpInst::ICMP_UGT, TripCountVal, 2468 ConstantInt::get(Loc->getContext(), MaxVal)); 2469 BackedgeCheck = Builder.CreateAnd( 2470 BackedgeCheck, Builder.CreateICmp(ICmpInst::ICMP_NE, StepValue, Zero)); 2471 2472 EndCheck = Builder.CreateOr(EndCheck, BackedgeCheck); 2473 } 2474 2475 return Builder.CreateOr(EndCheck, OfMul); 2476 } 2477 2478 Value *SCEVExpander::expandWrapPredicate(const SCEVWrapPredicate *Pred, 2479 Instruction *IP) { 2480 const auto *A = cast<SCEVAddRecExpr>(Pred->getExpr()); 2481 Value *NSSWCheck = nullptr, *NUSWCheck = nullptr; 2482 2483 // Add a check for NUSW 2484 if (Pred->getFlags() & SCEVWrapPredicate::IncrementNUSW) 2485 NUSWCheck = generateOverflowCheck(A, IP, false); 2486 2487 // Add a check for NSSW 2488 if (Pred->getFlags() & SCEVWrapPredicate::IncrementNSSW) 2489 NSSWCheck = generateOverflowCheck(A, IP, true); 2490 2491 if (NUSWCheck && NSSWCheck) 2492 return Builder.CreateOr(NUSWCheck, NSSWCheck); 2493 2494 if (NUSWCheck) 2495 return NUSWCheck; 2496 2497 if (NSSWCheck) 2498 return NSSWCheck; 2499 2500 return ConstantInt::getFalse(IP->getContext()); 2501 } 2502 2503 Value *SCEVExpander::expandUnionPredicate(const SCEVUnionPredicate *Union, 2504 Instruction *IP) { 2505 auto *BoolType = IntegerType::get(IP->getContext(), 1); 2506 Value *Check = ConstantInt::getNullValue(BoolType); 2507 2508 // Loop over all checks in this set. 2509 for (auto Pred : Union->getPredicates()) { 2510 auto *NextCheck = expandCodeForPredicate(Pred, IP); 2511 Builder.SetInsertPoint(IP); 2512 Check = Builder.CreateOr(Check, NextCheck); 2513 } 2514 2515 return Check; 2516 } 2517 2518 Value *SCEVExpander::fixupLCSSAFormFor(Instruction *User, unsigned OpIdx) { 2519 assert(PreserveLCSSA); 2520 SmallVector<Instruction *, 1> ToUpdate; 2521 2522 auto *OpV = User->getOperand(OpIdx); 2523 auto *OpI = dyn_cast<Instruction>(OpV); 2524 if (!OpI) 2525 return OpV; 2526 2527 Loop *DefLoop = SE.LI.getLoopFor(OpI->getParent()); 2528 Loop *UseLoop = SE.LI.getLoopFor(User->getParent()); 2529 if (!DefLoop || UseLoop == DefLoop || DefLoop->contains(UseLoop)) 2530 return OpV; 2531 2532 ToUpdate.push_back(OpI); 2533 formLCSSAForInstructions(ToUpdate, SE.DT, SE.LI, &SE); 2534 return User->getOperand(OpIdx); 2535 } 2536 2537 namespace { 2538 // Search for a SCEV subexpression that is not safe to expand. Any expression 2539 // that may expand to a !isSafeToSpeculativelyExecute value is unsafe, namely 2540 // UDiv expressions. We don't know if the UDiv is derived from an IR divide 2541 // instruction, but the important thing is that we prove the denominator is 2542 // nonzero before expansion. 2543 // 2544 // IVUsers already checks that IV-derived expressions are safe. So this check is 2545 // only needed when the expression includes some subexpression that is not IV 2546 // derived. 2547 // 2548 // Currently, we only allow division by a nonzero constant here. If this is 2549 // inadequate, we could easily allow division by SCEVUnknown by using 2550 // ValueTracking to check isKnownNonZero(). 2551 // 2552 // We cannot generally expand recurrences unless the step dominates the loop 2553 // header. The expander handles the special case of affine recurrences by 2554 // scaling the recurrence outside the loop, but this technique isn't generally 2555 // applicable. Expanding a nested recurrence outside a loop requires computing 2556 // binomial coefficients. This could be done, but the recurrence has to be in a 2557 // perfectly reduced form, which can't be guaranteed. 2558 struct SCEVFindUnsafe { 2559 ScalarEvolution &SE; 2560 bool IsUnsafe; 2561 2562 SCEVFindUnsafe(ScalarEvolution &se): SE(se), IsUnsafe(false) {} 2563 2564 bool follow(const SCEV *S) { 2565 if (const SCEVUDivExpr *D = dyn_cast<SCEVUDivExpr>(S)) { 2566 const SCEVConstant *SC = dyn_cast<SCEVConstant>(D->getRHS()); 2567 if (!SC || SC->getValue()->isZero()) { 2568 IsUnsafe = true; 2569 return false; 2570 } 2571 } 2572 if (const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(S)) { 2573 const SCEV *Step = AR->getStepRecurrence(SE); 2574 if (!AR->isAffine() && !SE.dominates(Step, AR->getLoop()->getHeader())) { 2575 IsUnsafe = true; 2576 return false; 2577 } 2578 } 2579 return true; 2580 } 2581 bool isDone() const { return IsUnsafe; } 2582 }; 2583 } 2584 2585 namespace llvm { 2586 bool isSafeToExpand(const SCEV *S, ScalarEvolution &SE) { 2587 SCEVFindUnsafe Search(SE); 2588 visitAll(S, Search); 2589 return !Search.IsUnsafe; 2590 } 2591 2592 bool isSafeToExpandAt(const SCEV *S, const Instruction *InsertionPoint, 2593 ScalarEvolution &SE) { 2594 if (!isSafeToExpand(S, SE)) 2595 return false; 2596 // We have to prove that the expanded site of S dominates InsertionPoint. 2597 // This is easy when not in the same block, but hard when S is an instruction 2598 // to be expanded somewhere inside the same block as our insertion point. 2599 // What we really need here is something analogous to an OrderedBasicBlock, 2600 // but for the moment, we paper over the problem by handling two common and 2601 // cheap to check cases. 2602 if (SE.properlyDominates(S, InsertionPoint->getParent())) 2603 return true; 2604 if (SE.dominates(S, InsertionPoint->getParent())) { 2605 if (InsertionPoint->getParent()->getTerminator() == InsertionPoint) 2606 return true; 2607 if (const SCEVUnknown *U = dyn_cast<SCEVUnknown>(S)) 2608 for (const Value *V : InsertionPoint->operand_values()) 2609 if (V == U->getValue()) 2610 return true; 2611 } 2612 return false; 2613 } 2614 } 2615