1 //===- ScalarEvolutionExpander.cpp - Scalar Evolution Analysis ------------===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 // 9 // This file contains the implementation of the scalar evolution expander, 10 // which is used to generate the code corresponding to a given scalar evolution 11 // expression. 12 // 13 //===----------------------------------------------------------------------===// 14 15 #include "llvm/Transforms/Utils/ScalarEvolutionExpander.h" 16 #include "llvm/ADT/STLExtras.h" 17 #include "llvm/ADT/SmallSet.h" 18 #include "llvm/Analysis/InstructionSimplify.h" 19 #include "llvm/Analysis/LoopInfo.h" 20 #include "llvm/Analysis/TargetTransformInfo.h" 21 #include "llvm/IR/DataLayout.h" 22 #include "llvm/IR/Dominators.h" 23 #include "llvm/IR/IntrinsicInst.h" 24 #include "llvm/IR/LLVMContext.h" 25 #include "llvm/IR/Module.h" 26 #include "llvm/IR/PatternMatch.h" 27 #include "llvm/Support/CommandLine.h" 28 #include "llvm/Support/Debug.h" 29 #include "llvm/Support/raw_ostream.h" 30 #include "llvm/Transforms/Utils/LoopUtils.h" 31 32 using namespace llvm; 33 34 cl::opt<unsigned> llvm::SCEVCheapExpansionBudget( 35 "scev-cheap-expansion-budget", cl::Hidden, cl::init(4), 36 cl::desc("When performing SCEV expansion only if it is cheap to do, this " 37 "controls the budget that is considered cheap (default = 4)")); 38 39 using namespace PatternMatch; 40 41 /// ReuseOrCreateCast - Arrange for there to be a cast of V to Ty at IP, 42 /// reusing an existing cast if a suitable one (= dominating IP) exists, or 43 /// creating a new one. 44 Value *SCEVExpander::ReuseOrCreateCast(Value *V, Type *Ty, 45 Instruction::CastOps Op, 46 BasicBlock::iterator IP) { 47 // This function must be called with the builder having a valid insertion 48 // point. It doesn't need to be the actual IP where the uses of the returned 49 // cast will be added, but it must dominate such IP. 50 // We use this precondition to produce a cast that will dominate all its 51 // uses. In particular, this is crucial for the case where the builder's 52 // insertion point *is* the point where we were asked to put the cast. 53 // Since we don't know the builder's insertion point is actually 54 // where the uses will be added (only that it dominates it), we are 55 // not allowed to move it. 56 BasicBlock::iterator BIP = Builder.GetInsertPoint(); 57 58 Instruction *Ret = nullptr; 59 60 // Check to see if there is already a cast! 61 for (User *U : V->users()) { 62 if (U->getType() != Ty) 63 continue; 64 CastInst *CI = dyn_cast<CastInst>(U); 65 if (!CI || CI->getOpcode() != Op) 66 continue; 67 68 // Found a suitable cast that is at IP or comes before IP. Use it. Note that 69 // the cast must also properly dominate the Builder's insertion point. 70 if (IP->getParent() == CI->getParent() && &*BIP != CI && 71 (&*IP == CI || CI->comesBefore(&*IP))) { 72 Ret = CI; 73 break; 74 } 75 } 76 77 // Create a new cast. 78 if (!Ret) { 79 Ret = CastInst::Create(Op, V, Ty, V->getName(), &*IP); 80 rememberInstruction(Ret); 81 } 82 83 // We assert at the end of the function since IP might point to an 84 // instruction with different dominance properties than a cast 85 // (an invoke for example) and not dominate BIP (but the cast does). 86 assert(SE.DT.dominates(Ret, &*BIP)); 87 88 return Ret; 89 } 90 91 BasicBlock::iterator 92 SCEVExpander::findInsertPointAfter(Instruction *I, 93 Instruction *MustDominate) const { 94 BasicBlock::iterator IP = ++I->getIterator(); 95 if (auto *II = dyn_cast<InvokeInst>(I)) 96 IP = II->getNormalDest()->begin(); 97 98 while (isa<PHINode>(IP)) 99 ++IP; 100 101 if (isa<FuncletPadInst>(IP) || isa<LandingPadInst>(IP)) { 102 ++IP; 103 } else if (isa<CatchSwitchInst>(IP)) { 104 IP = MustDominate->getParent()->getFirstInsertionPt(); 105 } else { 106 assert(!IP->isEHPad() && "unexpected eh pad!"); 107 } 108 109 // Adjust insert point to be after instructions inserted by the expander, so 110 // we can re-use already inserted instructions. Avoid skipping past the 111 // original \p MustDominate, in case it is an inserted instruction. 112 while (isInsertedInstruction(&*IP) && &*IP != MustDominate) 113 ++IP; 114 115 return IP; 116 } 117 118 BasicBlock::iterator 119 SCEVExpander::GetOptimalInsertionPointForCastOf(Value *V) const { 120 // Cast the argument at the beginning of the entry block, after 121 // any bitcasts of other arguments. 122 if (Argument *A = dyn_cast<Argument>(V)) { 123 BasicBlock::iterator IP = A->getParent()->getEntryBlock().begin(); 124 while ((isa<BitCastInst>(IP) && 125 isa<Argument>(cast<BitCastInst>(IP)->getOperand(0)) && 126 cast<BitCastInst>(IP)->getOperand(0) != A) || 127 isa<DbgInfoIntrinsic>(IP)) 128 ++IP; 129 return IP; 130 } 131 132 // Cast the instruction immediately after the instruction. 133 Instruction *I = cast<Instruction>(V); 134 return findInsertPointAfter(I, &*Builder.GetInsertPoint()); 135 } 136 137 /// InsertNoopCastOfTo - Insert a cast of V to the specified type, 138 /// which must be possible with a noop cast, doing what we can to share 139 /// the casts. 140 Value *SCEVExpander::InsertNoopCastOfTo(Value *V, Type *Ty) { 141 Instruction::CastOps Op = CastInst::getCastOpcode(V, false, Ty, false); 142 assert((Op == Instruction::BitCast || 143 Op == Instruction::PtrToInt || 144 Op == Instruction::IntToPtr) && 145 "InsertNoopCastOfTo cannot perform non-noop casts!"); 146 assert(SE.getTypeSizeInBits(V->getType()) == SE.getTypeSizeInBits(Ty) && 147 "InsertNoopCastOfTo cannot change sizes!"); 148 149 // inttoptr only works for integral pointers. For non-integral pointers, we 150 // can create a GEP on i8* null with the integral value as index. Note that 151 // it is safe to use GEP of null instead of inttoptr here, because only 152 // expressions already based on a GEP of null should be converted to pointers 153 // during expansion. 154 if (Op == Instruction::IntToPtr) { 155 auto *PtrTy = cast<PointerType>(Ty); 156 if (DL.isNonIntegralPointerType(PtrTy)) { 157 auto *Int8PtrTy = Builder.getInt8PtrTy(PtrTy->getAddressSpace()); 158 assert(DL.getTypeAllocSize(Int8PtrTy->getElementType()) == 1 && 159 "alloc size of i8 must by 1 byte for the GEP to be correct"); 160 auto *GEP = Builder.CreateGEP( 161 Builder.getInt8Ty(), Constant::getNullValue(Int8PtrTy), V, "uglygep"); 162 return Builder.CreateBitCast(GEP, Ty); 163 } 164 } 165 // Short-circuit unnecessary bitcasts. 166 if (Op == Instruction::BitCast) { 167 if (V->getType() == Ty) 168 return V; 169 if (CastInst *CI = dyn_cast<CastInst>(V)) { 170 if (CI->getOperand(0)->getType() == Ty) 171 return CI->getOperand(0); 172 } 173 } 174 // Short-circuit unnecessary inttoptr<->ptrtoint casts. 175 if ((Op == Instruction::PtrToInt || Op == Instruction::IntToPtr) && 176 SE.getTypeSizeInBits(Ty) == SE.getTypeSizeInBits(V->getType())) { 177 if (CastInst *CI = dyn_cast<CastInst>(V)) 178 if ((CI->getOpcode() == Instruction::PtrToInt || 179 CI->getOpcode() == Instruction::IntToPtr) && 180 SE.getTypeSizeInBits(CI->getType()) == 181 SE.getTypeSizeInBits(CI->getOperand(0)->getType())) 182 return CI->getOperand(0); 183 if (ConstantExpr *CE = dyn_cast<ConstantExpr>(V)) 184 if ((CE->getOpcode() == Instruction::PtrToInt || 185 CE->getOpcode() == Instruction::IntToPtr) && 186 SE.getTypeSizeInBits(CE->getType()) == 187 SE.getTypeSizeInBits(CE->getOperand(0)->getType())) 188 return CE->getOperand(0); 189 } 190 191 // Fold a cast of a constant. 192 if (Constant *C = dyn_cast<Constant>(V)) 193 return ConstantExpr::getCast(Op, C, Ty); 194 195 // Try to reuse existing cast, or insert one. 196 return ReuseOrCreateCast(V, Ty, Op, GetOptimalInsertionPointForCastOf(V)); 197 } 198 199 /// InsertBinop - Insert the specified binary operator, doing a small amount 200 /// of work to avoid inserting an obviously redundant operation, and hoisting 201 /// to an outer loop when the opportunity is there and it is safe. 202 Value *SCEVExpander::InsertBinop(Instruction::BinaryOps Opcode, 203 Value *LHS, Value *RHS, 204 SCEV::NoWrapFlags Flags, bool IsSafeToHoist) { 205 // Fold a binop with constant operands. 206 if (Constant *CLHS = dyn_cast<Constant>(LHS)) 207 if (Constant *CRHS = dyn_cast<Constant>(RHS)) 208 return ConstantExpr::get(Opcode, CLHS, CRHS); 209 210 // Do a quick scan to see if we have this binop nearby. If so, reuse it. 211 unsigned ScanLimit = 6; 212 BasicBlock::iterator BlockBegin = Builder.GetInsertBlock()->begin(); 213 // Scanning starts from the last instruction before the insertion point. 214 BasicBlock::iterator IP = Builder.GetInsertPoint(); 215 if (IP != BlockBegin) { 216 --IP; 217 for (; ScanLimit; --IP, --ScanLimit) { 218 // Don't count dbg.value against the ScanLimit, to avoid perturbing the 219 // generated code. 220 if (isa<DbgInfoIntrinsic>(IP)) 221 ScanLimit++; 222 223 auto canGenerateIncompatiblePoison = [&Flags](Instruction *I) { 224 // Ensure that no-wrap flags match. 225 if (isa<OverflowingBinaryOperator>(I)) { 226 if (I->hasNoSignedWrap() != (Flags & SCEV::FlagNSW)) 227 return true; 228 if (I->hasNoUnsignedWrap() != (Flags & SCEV::FlagNUW)) 229 return true; 230 } 231 // Conservatively, do not use any instruction which has any of exact 232 // flags installed. 233 if (isa<PossiblyExactOperator>(I) && I->isExact()) 234 return true; 235 return false; 236 }; 237 if (IP->getOpcode() == (unsigned)Opcode && IP->getOperand(0) == LHS && 238 IP->getOperand(1) == RHS && !canGenerateIncompatiblePoison(&*IP)) 239 return &*IP; 240 if (IP == BlockBegin) break; 241 } 242 } 243 244 // Save the original insertion point so we can restore it when we're done. 245 DebugLoc Loc = Builder.GetInsertPoint()->getDebugLoc(); 246 SCEVInsertPointGuard Guard(Builder, this); 247 248 if (IsSafeToHoist) { 249 // Move the insertion point out of as many loops as we can. 250 while (const Loop *L = SE.LI.getLoopFor(Builder.GetInsertBlock())) { 251 if (!L->isLoopInvariant(LHS) || !L->isLoopInvariant(RHS)) break; 252 BasicBlock *Preheader = L->getLoopPreheader(); 253 if (!Preheader) break; 254 255 // Ok, move up a level. 256 Builder.SetInsertPoint(Preheader->getTerminator()); 257 } 258 } 259 260 // If we haven't found this binop, insert it. 261 Instruction *BO = cast<Instruction>(Builder.CreateBinOp(Opcode, LHS, RHS)); 262 BO->setDebugLoc(Loc); 263 if (Flags & SCEV::FlagNUW) 264 BO->setHasNoUnsignedWrap(); 265 if (Flags & SCEV::FlagNSW) 266 BO->setHasNoSignedWrap(); 267 268 return BO; 269 } 270 271 /// FactorOutConstant - Test if S is divisible by Factor, using signed 272 /// division. If so, update S with Factor divided out and return true. 273 /// S need not be evenly divisible if a reasonable remainder can be 274 /// computed. 275 static bool FactorOutConstant(const SCEV *&S, const SCEV *&Remainder, 276 const SCEV *Factor, ScalarEvolution &SE, 277 const DataLayout &DL) { 278 // Everything is divisible by one. 279 if (Factor->isOne()) 280 return true; 281 282 // x/x == 1. 283 if (S == Factor) { 284 S = SE.getConstant(S->getType(), 1); 285 return true; 286 } 287 288 // For a Constant, check for a multiple of the given factor. 289 if (const SCEVConstant *C = dyn_cast<SCEVConstant>(S)) { 290 // 0/x == 0. 291 if (C->isZero()) 292 return true; 293 // Check for divisibility. 294 if (const SCEVConstant *FC = dyn_cast<SCEVConstant>(Factor)) { 295 ConstantInt *CI = 296 ConstantInt::get(SE.getContext(), C->getAPInt().sdiv(FC->getAPInt())); 297 // If the quotient is zero and the remainder is non-zero, reject 298 // the value at this scale. It will be considered for subsequent 299 // smaller scales. 300 if (!CI->isZero()) { 301 const SCEV *Div = SE.getConstant(CI); 302 S = Div; 303 Remainder = SE.getAddExpr( 304 Remainder, SE.getConstant(C->getAPInt().srem(FC->getAPInt()))); 305 return true; 306 } 307 } 308 } 309 310 // In a Mul, check if there is a constant operand which is a multiple 311 // of the given factor. 312 if (const SCEVMulExpr *M = dyn_cast<SCEVMulExpr>(S)) { 313 // Size is known, check if there is a constant operand which is a multiple 314 // of the given factor. If so, we can factor it. 315 if (const SCEVConstant *FC = dyn_cast<SCEVConstant>(Factor)) 316 if (const SCEVConstant *C = dyn_cast<SCEVConstant>(M->getOperand(0))) 317 if (!C->getAPInt().srem(FC->getAPInt())) { 318 SmallVector<const SCEV *, 4> NewMulOps(M->operands()); 319 NewMulOps[0] = SE.getConstant(C->getAPInt().sdiv(FC->getAPInt())); 320 S = SE.getMulExpr(NewMulOps); 321 return true; 322 } 323 } 324 325 // In an AddRec, check if both start and step are divisible. 326 if (const SCEVAddRecExpr *A = dyn_cast<SCEVAddRecExpr>(S)) { 327 const SCEV *Step = A->getStepRecurrence(SE); 328 const SCEV *StepRem = SE.getConstant(Step->getType(), 0); 329 if (!FactorOutConstant(Step, StepRem, Factor, SE, DL)) 330 return false; 331 if (!StepRem->isZero()) 332 return false; 333 const SCEV *Start = A->getStart(); 334 if (!FactorOutConstant(Start, Remainder, Factor, SE, DL)) 335 return false; 336 S = SE.getAddRecExpr(Start, Step, A->getLoop(), 337 A->getNoWrapFlags(SCEV::FlagNW)); 338 return true; 339 } 340 341 return false; 342 } 343 344 /// SimplifyAddOperands - Sort and simplify a list of add operands. NumAddRecs 345 /// is the number of SCEVAddRecExprs present, which are kept at the end of 346 /// the list. 347 /// 348 static void SimplifyAddOperands(SmallVectorImpl<const SCEV *> &Ops, 349 Type *Ty, 350 ScalarEvolution &SE) { 351 unsigned NumAddRecs = 0; 352 for (unsigned i = Ops.size(); i > 0 && isa<SCEVAddRecExpr>(Ops[i-1]); --i) 353 ++NumAddRecs; 354 // Group Ops into non-addrecs and addrecs. 355 SmallVector<const SCEV *, 8> NoAddRecs(Ops.begin(), Ops.end() - NumAddRecs); 356 SmallVector<const SCEV *, 8> AddRecs(Ops.end() - NumAddRecs, Ops.end()); 357 // Let ScalarEvolution sort and simplify the non-addrecs list. 358 const SCEV *Sum = NoAddRecs.empty() ? 359 SE.getConstant(Ty, 0) : 360 SE.getAddExpr(NoAddRecs); 361 // If it returned an add, use the operands. Otherwise it simplified 362 // the sum into a single value, so just use that. 363 Ops.clear(); 364 if (const SCEVAddExpr *Add = dyn_cast<SCEVAddExpr>(Sum)) 365 Ops.append(Add->op_begin(), Add->op_end()); 366 else if (!Sum->isZero()) 367 Ops.push_back(Sum); 368 // Then append the addrecs. 369 Ops.append(AddRecs.begin(), AddRecs.end()); 370 } 371 372 /// SplitAddRecs - Flatten a list of add operands, moving addrec start values 373 /// out to the top level. For example, convert {a + b,+,c} to a, b, {0,+,d}. 374 /// This helps expose more opportunities for folding parts of the expressions 375 /// into GEP indices. 376 /// 377 static void SplitAddRecs(SmallVectorImpl<const SCEV *> &Ops, 378 Type *Ty, 379 ScalarEvolution &SE) { 380 // Find the addrecs. 381 SmallVector<const SCEV *, 8> AddRecs; 382 for (unsigned i = 0, e = Ops.size(); i != e; ++i) 383 while (const SCEVAddRecExpr *A = dyn_cast<SCEVAddRecExpr>(Ops[i])) { 384 const SCEV *Start = A->getStart(); 385 if (Start->isZero()) break; 386 const SCEV *Zero = SE.getConstant(Ty, 0); 387 AddRecs.push_back(SE.getAddRecExpr(Zero, 388 A->getStepRecurrence(SE), 389 A->getLoop(), 390 A->getNoWrapFlags(SCEV::FlagNW))); 391 if (const SCEVAddExpr *Add = dyn_cast<SCEVAddExpr>(Start)) { 392 Ops[i] = Zero; 393 Ops.append(Add->op_begin(), Add->op_end()); 394 e += Add->getNumOperands(); 395 } else { 396 Ops[i] = Start; 397 } 398 } 399 if (!AddRecs.empty()) { 400 // Add the addrecs onto the end of the list. 401 Ops.append(AddRecs.begin(), AddRecs.end()); 402 // Resort the operand list, moving any constants to the front. 403 SimplifyAddOperands(Ops, Ty, SE); 404 } 405 } 406 407 /// expandAddToGEP - Expand an addition expression with a pointer type into 408 /// a GEP instead of using ptrtoint+arithmetic+inttoptr. This helps 409 /// BasicAliasAnalysis and other passes analyze the result. See the rules 410 /// for getelementptr vs. inttoptr in 411 /// http://llvm.org/docs/LangRef.html#pointeraliasing 412 /// for details. 413 /// 414 /// Design note: The correctness of using getelementptr here depends on 415 /// ScalarEvolution not recognizing inttoptr and ptrtoint operators, as 416 /// they may introduce pointer arithmetic which may not be safely converted 417 /// into getelementptr. 418 /// 419 /// Design note: It might seem desirable for this function to be more 420 /// loop-aware. If some of the indices are loop-invariant while others 421 /// aren't, it might seem desirable to emit multiple GEPs, keeping the 422 /// loop-invariant portions of the overall computation outside the loop. 423 /// However, there are a few reasons this is not done here. Hoisting simple 424 /// arithmetic is a low-level optimization that often isn't very 425 /// important until late in the optimization process. In fact, passes 426 /// like InstructionCombining will combine GEPs, even if it means 427 /// pushing loop-invariant computation down into loops, so even if the 428 /// GEPs were split here, the work would quickly be undone. The 429 /// LoopStrengthReduction pass, which is usually run quite late (and 430 /// after the last InstructionCombining pass), takes care of hoisting 431 /// loop-invariant portions of expressions, after considering what 432 /// can be folded using target addressing modes. 433 /// 434 Value *SCEVExpander::expandAddToGEP(const SCEV *const *op_begin, 435 const SCEV *const *op_end, 436 PointerType *PTy, 437 Type *Ty, 438 Value *V) { 439 Type *OriginalElTy = PTy->getElementType(); 440 Type *ElTy = OriginalElTy; 441 SmallVector<Value *, 4> GepIndices; 442 SmallVector<const SCEV *, 8> Ops(op_begin, op_end); 443 bool AnyNonZeroIndices = false; 444 445 // Split AddRecs up into parts as either of the parts may be usable 446 // without the other. 447 SplitAddRecs(Ops, Ty, SE); 448 449 Type *IntIdxTy = DL.getIndexType(PTy); 450 451 // Descend down the pointer's type and attempt to convert the other 452 // operands into GEP indices, at each level. The first index in a GEP 453 // indexes into the array implied by the pointer operand; the rest of 454 // the indices index into the element or field type selected by the 455 // preceding index. 456 for (;;) { 457 // If the scale size is not 0, attempt to factor out a scale for 458 // array indexing. 459 SmallVector<const SCEV *, 8> ScaledOps; 460 if (ElTy->isSized()) { 461 const SCEV *ElSize = SE.getSizeOfExpr(IntIdxTy, ElTy); 462 if (!ElSize->isZero()) { 463 SmallVector<const SCEV *, 8> NewOps; 464 for (const SCEV *Op : Ops) { 465 const SCEV *Remainder = SE.getConstant(Ty, 0); 466 if (FactorOutConstant(Op, Remainder, ElSize, SE, DL)) { 467 // Op now has ElSize factored out. 468 ScaledOps.push_back(Op); 469 if (!Remainder->isZero()) 470 NewOps.push_back(Remainder); 471 AnyNonZeroIndices = true; 472 } else { 473 // The operand was not divisible, so add it to the list of operands 474 // we'll scan next iteration. 475 NewOps.push_back(Op); 476 } 477 } 478 // If we made any changes, update Ops. 479 if (!ScaledOps.empty()) { 480 Ops = NewOps; 481 SimplifyAddOperands(Ops, Ty, SE); 482 } 483 } 484 } 485 486 // Record the scaled array index for this level of the type. If 487 // we didn't find any operands that could be factored, tentatively 488 // assume that element zero was selected (since the zero offset 489 // would obviously be folded away). 490 Value *Scaled = 491 ScaledOps.empty() 492 ? Constant::getNullValue(Ty) 493 : expandCodeForImpl(SE.getAddExpr(ScaledOps), Ty, false); 494 GepIndices.push_back(Scaled); 495 496 // Collect struct field index operands. 497 while (StructType *STy = dyn_cast<StructType>(ElTy)) { 498 bool FoundFieldNo = false; 499 // An empty struct has no fields. 500 if (STy->getNumElements() == 0) break; 501 // Field offsets are known. See if a constant offset falls within any of 502 // the struct fields. 503 if (Ops.empty()) 504 break; 505 if (const SCEVConstant *C = dyn_cast<SCEVConstant>(Ops[0])) 506 if (SE.getTypeSizeInBits(C->getType()) <= 64) { 507 const StructLayout &SL = *DL.getStructLayout(STy); 508 uint64_t FullOffset = C->getValue()->getZExtValue(); 509 if (FullOffset < SL.getSizeInBytes()) { 510 unsigned ElIdx = SL.getElementContainingOffset(FullOffset); 511 GepIndices.push_back( 512 ConstantInt::get(Type::getInt32Ty(Ty->getContext()), ElIdx)); 513 ElTy = STy->getTypeAtIndex(ElIdx); 514 Ops[0] = 515 SE.getConstant(Ty, FullOffset - SL.getElementOffset(ElIdx)); 516 AnyNonZeroIndices = true; 517 FoundFieldNo = true; 518 } 519 } 520 // If no struct field offsets were found, tentatively assume that 521 // field zero was selected (since the zero offset would obviously 522 // be folded away). 523 if (!FoundFieldNo) { 524 ElTy = STy->getTypeAtIndex(0u); 525 GepIndices.push_back( 526 Constant::getNullValue(Type::getInt32Ty(Ty->getContext()))); 527 } 528 } 529 530 if (ArrayType *ATy = dyn_cast<ArrayType>(ElTy)) 531 ElTy = ATy->getElementType(); 532 else 533 // FIXME: Handle VectorType. 534 // E.g., If ElTy is scalable vector, then ElSize is not a compile-time 535 // constant, therefore can not be factored out. The generated IR is less 536 // ideal with base 'V' cast to i8* and do ugly getelementptr over that. 537 break; 538 } 539 540 // If none of the operands were convertible to proper GEP indices, cast 541 // the base to i8* and do an ugly getelementptr with that. It's still 542 // better than ptrtoint+arithmetic+inttoptr at least. 543 if (!AnyNonZeroIndices) { 544 // Cast the base to i8*. 545 V = InsertNoopCastOfTo(V, 546 Type::getInt8PtrTy(Ty->getContext(), PTy->getAddressSpace())); 547 548 assert(!isa<Instruction>(V) || 549 SE.DT.dominates(cast<Instruction>(V), &*Builder.GetInsertPoint())); 550 551 // Expand the operands for a plain byte offset. 552 Value *Idx = expandCodeForImpl(SE.getAddExpr(Ops), Ty, false); 553 554 // Fold a GEP with constant operands. 555 if (Constant *CLHS = dyn_cast<Constant>(V)) 556 if (Constant *CRHS = dyn_cast<Constant>(Idx)) 557 return ConstantExpr::getGetElementPtr(Type::getInt8Ty(Ty->getContext()), 558 CLHS, CRHS); 559 560 // Do a quick scan to see if we have this GEP nearby. If so, reuse it. 561 unsigned ScanLimit = 6; 562 BasicBlock::iterator BlockBegin = Builder.GetInsertBlock()->begin(); 563 // Scanning starts from the last instruction before the insertion point. 564 BasicBlock::iterator IP = Builder.GetInsertPoint(); 565 if (IP != BlockBegin) { 566 --IP; 567 for (; ScanLimit; --IP, --ScanLimit) { 568 // Don't count dbg.value against the ScanLimit, to avoid perturbing the 569 // generated code. 570 if (isa<DbgInfoIntrinsic>(IP)) 571 ScanLimit++; 572 if (IP->getOpcode() == Instruction::GetElementPtr && 573 IP->getOperand(0) == V && IP->getOperand(1) == Idx) 574 return &*IP; 575 if (IP == BlockBegin) break; 576 } 577 } 578 579 // Save the original insertion point so we can restore it when we're done. 580 SCEVInsertPointGuard Guard(Builder, this); 581 582 // Move the insertion point out of as many loops as we can. 583 while (const Loop *L = SE.LI.getLoopFor(Builder.GetInsertBlock())) { 584 if (!L->isLoopInvariant(V) || !L->isLoopInvariant(Idx)) break; 585 BasicBlock *Preheader = L->getLoopPreheader(); 586 if (!Preheader) break; 587 588 // Ok, move up a level. 589 Builder.SetInsertPoint(Preheader->getTerminator()); 590 } 591 592 // Emit a GEP. 593 return Builder.CreateGEP(Builder.getInt8Ty(), V, Idx, "uglygep"); 594 } 595 596 { 597 SCEVInsertPointGuard Guard(Builder, this); 598 599 // Move the insertion point out of as many loops as we can. 600 while (const Loop *L = SE.LI.getLoopFor(Builder.GetInsertBlock())) { 601 if (!L->isLoopInvariant(V)) break; 602 603 bool AnyIndexNotLoopInvariant = any_of( 604 GepIndices, [L](Value *Op) { return !L->isLoopInvariant(Op); }); 605 606 if (AnyIndexNotLoopInvariant) 607 break; 608 609 BasicBlock *Preheader = L->getLoopPreheader(); 610 if (!Preheader) break; 611 612 // Ok, move up a level. 613 Builder.SetInsertPoint(Preheader->getTerminator()); 614 } 615 616 // Insert a pretty getelementptr. Note that this GEP is not marked inbounds, 617 // because ScalarEvolution may have changed the address arithmetic to 618 // compute a value which is beyond the end of the allocated object. 619 Value *Casted = V; 620 if (V->getType() != PTy) 621 Casted = InsertNoopCastOfTo(Casted, PTy); 622 Value *GEP = Builder.CreateGEP(OriginalElTy, Casted, GepIndices, "scevgep"); 623 Ops.push_back(SE.getUnknown(GEP)); 624 } 625 626 return expand(SE.getAddExpr(Ops)); 627 } 628 629 Value *SCEVExpander::expandAddToGEP(const SCEV *Op, PointerType *PTy, Type *Ty, 630 Value *V) { 631 const SCEV *const Ops[1] = {Op}; 632 return expandAddToGEP(Ops, Ops + 1, PTy, Ty, V); 633 } 634 635 /// PickMostRelevantLoop - Given two loops pick the one that's most relevant for 636 /// SCEV expansion. If they are nested, this is the most nested. If they are 637 /// neighboring, pick the later. 638 static const Loop *PickMostRelevantLoop(const Loop *A, const Loop *B, 639 DominatorTree &DT) { 640 if (!A) return B; 641 if (!B) return A; 642 if (A->contains(B)) return B; 643 if (B->contains(A)) return A; 644 if (DT.dominates(A->getHeader(), B->getHeader())) return B; 645 if (DT.dominates(B->getHeader(), A->getHeader())) return A; 646 return A; // Arbitrarily break the tie. 647 } 648 649 /// getRelevantLoop - Get the most relevant loop associated with the given 650 /// expression, according to PickMostRelevantLoop. 651 const Loop *SCEVExpander::getRelevantLoop(const SCEV *S) { 652 // Test whether we've already computed the most relevant loop for this SCEV. 653 auto Pair = RelevantLoops.insert(std::make_pair(S, nullptr)); 654 if (!Pair.second) 655 return Pair.first->second; 656 657 if (isa<SCEVConstant>(S)) 658 // A constant has no relevant loops. 659 return nullptr; 660 if (const SCEVUnknown *U = dyn_cast<SCEVUnknown>(S)) { 661 if (const Instruction *I = dyn_cast<Instruction>(U->getValue())) 662 return Pair.first->second = SE.LI.getLoopFor(I->getParent()); 663 // A non-instruction has no relevant loops. 664 return nullptr; 665 } 666 if (const SCEVNAryExpr *N = dyn_cast<SCEVNAryExpr>(S)) { 667 const Loop *L = nullptr; 668 if (const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(S)) 669 L = AR->getLoop(); 670 for (const SCEV *Op : N->operands()) 671 L = PickMostRelevantLoop(L, getRelevantLoop(Op), SE.DT); 672 return RelevantLoops[N] = L; 673 } 674 if (const SCEVCastExpr *C = dyn_cast<SCEVCastExpr>(S)) { 675 const Loop *Result = getRelevantLoop(C->getOperand()); 676 return RelevantLoops[C] = Result; 677 } 678 if (const SCEVUDivExpr *D = dyn_cast<SCEVUDivExpr>(S)) { 679 const Loop *Result = PickMostRelevantLoop( 680 getRelevantLoop(D->getLHS()), getRelevantLoop(D->getRHS()), SE.DT); 681 return RelevantLoops[D] = Result; 682 } 683 llvm_unreachable("Unexpected SCEV type!"); 684 } 685 686 namespace { 687 688 /// LoopCompare - Compare loops by PickMostRelevantLoop. 689 class LoopCompare { 690 DominatorTree &DT; 691 public: 692 explicit LoopCompare(DominatorTree &dt) : DT(dt) {} 693 694 bool operator()(std::pair<const Loop *, const SCEV *> LHS, 695 std::pair<const Loop *, const SCEV *> RHS) const { 696 // Keep pointer operands sorted at the end. 697 if (LHS.second->getType()->isPointerTy() != 698 RHS.second->getType()->isPointerTy()) 699 return LHS.second->getType()->isPointerTy(); 700 701 // Compare loops with PickMostRelevantLoop. 702 if (LHS.first != RHS.first) 703 return PickMostRelevantLoop(LHS.first, RHS.first, DT) != LHS.first; 704 705 // If one operand is a non-constant negative and the other is not, 706 // put the non-constant negative on the right so that a sub can 707 // be used instead of a negate and add. 708 if (LHS.second->isNonConstantNegative()) { 709 if (!RHS.second->isNonConstantNegative()) 710 return false; 711 } else if (RHS.second->isNonConstantNegative()) 712 return true; 713 714 // Otherwise they are equivalent according to this comparison. 715 return false; 716 } 717 }; 718 719 } 720 721 Value *SCEVExpander::visitAddExpr(const SCEVAddExpr *S) { 722 Type *Ty = SE.getEffectiveSCEVType(S->getType()); 723 724 // Collect all the add operands in a loop, along with their associated loops. 725 // Iterate in reverse so that constants are emitted last, all else equal, and 726 // so that pointer operands are inserted first, which the code below relies on 727 // to form more involved GEPs. 728 SmallVector<std::pair<const Loop *, const SCEV *>, 8> OpsAndLoops; 729 for (std::reverse_iterator<SCEVAddExpr::op_iterator> I(S->op_end()), 730 E(S->op_begin()); I != E; ++I) 731 OpsAndLoops.push_back(std::make_pair(getRelevantLoop(*I), *I)); 732 733 // Sort by loop. Use a stable sort so that constants follow non-constants and 734 // pointer operands precede non-pointer operands. 735 llvm::stable_sort(OpsAndLoops, LoopCompare(SE.DT)); 736 737 // Emit instructions to add all the operands. Hoist as much as possible 738 // out of loops, and form meaningful getelementptrs where possible. 739 Value *Sum = nullptr; 740 for (auto I = OpsAndLoops.begin(), E = OpsAndLoops.end(); I != E;) { 741 const Loop *CurLoop = I->first; 742 const SCEV *Op = I->second; 743 if (!Sum) { 744 // This is the first operand. Just expand it. 745 Sum = expand(Op); 746 ++I; 747 } else if (PointerType *PTy = dyn_cast<PointerType>(Sum->getType())) { 748 // The running sum expression is a pointer. Try to form a getelementptr 749 // at this level with that as the base. 750 SmallVector<const SCEV *, 4> NewOps; 751 for (; I != E && I->first == CurLoop; ++I) { 752 // If the operand is SCEVUnknown and not instructions, peek through 753 // it, to enable more of it to be folded into the GEP. 754 const SCEV *X = I->second; 755 if (const SCEVUnknown *U = dyn_cast<SCEVUnknown>(X)) 756 if (!isa<Instruction>(U->getValue())) 757 X = SE.getSCEV(U->getValue()); 758 NewOps.push_back(X); 759 } 760 Sum = expandAddToGEP(NewOps.begin(), NewOps.end(), PTy, Ty, Sum); 761 } else if (PointerType *PTy = dyn_cast<PointerType>(Op->getType())) { 762 // The running sum is an integer, and there's a pointer at this level. 763 // Try to form a getelementptr. If the running sum is instructions, 764 // use a SCEVUnknown to avoid re-analyzing them. 765 SmallVector<const SCEV *, 4> NewOps; 766 NewOps.push_back(isa<Instruction>(Sum) ? SE.getUnknown(Sum) : 767 SE.getSCEV(Sum)); 768 for (++I; I != E && I->first == CurLoop; ++I) 769 NewOps.push_back(I->second); 770 Sum = expandAddToGEP(NewOps.begin(), NewOps.end(), PTy, Ty, expand(Op)); 771 } else if (Op->isNonConstantNegative()) { 772 // Instead of doing a negate and add, just do a subtract. 773 Value *W = expandCodeForImpl(SE.getNegativeSCEV(Op), Ty, false); 774 Sum = InsertNoopCastOfTo(Sum, Ty); 775 Sum = InsertBinop(Instruction::Sub, Sum, W, SCEV::FlagAnyWrap, 776 /*IsSafeToHoist*/ true); 777 ++I; 778 } else { 779 // A simple add. 780 Value *W = expandCodeForImpl(Op, Ty, false); 781 Sum = InsertNoopCastOfTo(Sum, Ty); 782 // Canonicalize a constant to the RHS. 783 if (isa<Constant>(Sum)) std::swap(Sum, W); 784 Sum = InsertBinop(Instruction::Add, Sum, W, S->getNoWrapFlags(), 785 /*IsSafeToHoist*/ true); 786 ++I; 787 } 788 } 789 790 return Sum; 791 } 792 793 Value *SCEVExpander::visitMulExpr(const SCEVMulExpr *S) { 794 Type *Ty = SE.getEffectiveSCEVType(S->getType()); 795 796 // Collect all the mul operands in a loop, along with their associated loops. 797 // Iterate in reverse so that constants are emitted last, all else equal. 798 SmallVector<std::pair<const Loop *, const SCEV *>, 8> OpsAndLoops; 799 for (std::reverse_iterator<SCEVMulExpr::op_iterator> I(S->op_end()), 800 E(S->op_begin()); I != E; ++I) 801 OpsAndLoops.push_back(std::make_pair(getRelevantLoop(*I), *I)); 802 803 // Sort by loop. Use a stable sort so that constants follow non-constants. 804 llvm::stable_sort(OpsAndLoops, LoopCompare(SE.DT)); 805 806 // Emit instructions to mul all the operands. Hoist as much as possible 807 // out of loops. 808 Value *Prod = nullptr; 809 auto I = OpsAndLoops.begin(); 810 811 // Expand the calculation of X pow N in the following manner: 812 // Let N = P1 + P2 + ... + PK, where all P are powers of 2. Then: 813 // X pow N = (X pow P1) * (X pow P2) * ... * (X pow PK). 814 const auto ExpandOpBinPowN = [this, &I, &OpsAndLoops, &Ty]() { 815 auto E = I; 816 // Calculate how many times the same operand from the same loop is included 817 // into this power. 818 uint64_t Exponent = 0; 819 const uint64_t MaxExponent = UINT64_MAX >> 1; 820 // No one sane will ever try to calculate such huge exponents, but if we 821 // need this, we stop on UINT64_MAX / 2 because we need to exit the loop 822 // below when the power of 2 exceeds our Exponent, and we want it to be 823 // 1u << 31 at most to not deal with unsigned overflow. 824 while (E != OpsAndLoops.end() && *I == *E && Exponent != MaxExponent) { 825 ++Exponent; 826 ++E; 827 } 828 assert(Exponent > 0 && "Trying to calculate a zeroth exponent of operand?"); 829 830 // Calculate powers with exponents 1, 2, 4, 8 etc. and include those of them 831 // that are needed into the result. 832 Value *P = expandCodeForImpl(I->second, Ty, false); 833 Value *Result = nullptr; 834 if (Exponent & 1) 835 Result = P; 836 for (uint64_t BinExp = 2; BinExp <= Exponent; BinExp <<= 1) { 837 P = InsertBinop(Instruction::Mul, P, P, SCEV::FlagAnyWrap, 838 /*IsSafeToHoist*/ true); 839 if (Exponent & BinExp) 840 Result = Result ? InsertBinop(Instruction::Mul, Result, P, 841 SCEV::FlagAnyWrap, 842 /*IsSafeToHoist*/ true) 843 : P; 844 } 845 846 I = E; 847 assert(Result && "Nothing was expanded?"); 848 return Result; 849 }; 850 851 while (I != OpsAndLoops.end()) { 852 if (!Prod) { 853 // This is the first operand. Just expand it. 854 Prod = ExpandOpBinPowN(); 855 } else if (I->second->isAllOnesValue()) { 856 // Instead of doing a multiply by negative one, just do a negate. 857 Prod = InsertNoopCastOfTo(Prod, Ty); 858 Prod = InsertBinop(Instruction::Sub, Constant::getNullValue(Ty), Prod, 859 SCEV::FlagAnyWrap, /*IsSafeToHoist*/ true); 860 ++I; 861 } else { 862 // A simple mul. 863 Value *W = ExpandOpBinPowN(); 864 Prod = InsertNoopCastOfTo(Prod, Ty); 865 // Canonicalize a constant to the RHS. 866 if (isa<Constant>(Prod)) std::swap(Prod, W); 867 const APInt *RHS; 868 if (match(W, m_Power2(RHS))) { 869 // Canonicalize Prod*(1<<C) to Prod<<C. 870 assert(!Ty->isVectorTy() && "vector types are not SCEVable"); 871 auto NWFlags = S->getNoWrapFlags(); 872 // clear nsw flag if shl will produce poison value. 873 if (RHS->logBase2() == RHS->getBitWidth() - 1) 874 NWFlags = ScalarEvolution::clearFlags(NWFlags, SCEV::FlagNSW); 875 Prod = InsertBinop(Instruction::Shl, Prod, 876 ConstantInt::get(Ty, RHS->logBase2()), NWFlags, 877 /*IsSafeToHoist*/ true); 878 } else { 879 Prod = InsertBinop(Instruction::Mul, Prod, W, S->getNoWrapFlags(), 880 /*IsSafeToHoist*/ true); 881 } 882 } 883 } 884 885 return Prod; 886 } 887 888 Value *SCEVExpander::visitUDivExpr(const SCEVUDivExpr *S) { 889 Type *Ty = SE.getEffectiveSCEVType(S->getType()); 890 891 Value *LHS = expandCodeForImpl(S->getLHS(), Ty, false); 892 if (const SCEVConstant *SC = dyn_cast<SCEVConstant>(S->getRHS())) { 893 const APInt &RHS = SC->getAPInt(); 894 if (RHS.isPowerOf2()) 895 return InsertBinop(Instruction::LShr, LHS, 896 ConstantInt::get(Ty, RHS.logBase2()), 897 SCEV::FlagAnyWrap, /*IsSafeToHoist*/ true); 898 } 899 900 Value *RHS = expandCodeForImpl(S->getRHS(), Ty, false); 901 return InsertBinop(Instruction::UDiv, LHS, RHS, SCEV::FlagAnyWrap, 902 /*IsSafeToHoist*/ SE.isKnownNonZero(S->getRHS())); 903 } 904 905 /// Move parts of Base into Rest to leave Base with the minimal 906 /// expression that provides a pointer operand suitable for a 907 /// GEP expansion. 908 static void ExposePointerBase(const SCEV *&Base, const SCEV *&Rest, 909 ScalarEvolution &SE) { 910 while (const SCEVAddRecExpr *A = dyn_cast<SCEVAddRecExpr>(Base)) { 911 Base = A->getStart(); 912 Rest = SE.getAddExpr(Rest, 913 SE.getAddRecExpr(SE.getConstant(A->getType(), 0), 914 A->getStepRecurrence(SE), 915 A->getLoop(), 916 A->getNoWrapFlags(SCEV::FlagNW))); 917 } 918 if (const SCEVAddExpr *A = dyn_cast<SCEVAddExpr>(Base)) { 919 Base = A->getOperand(A->getNumOperands()-1); 920 SmallVector<const SCEV *, 8> NewAddOps(A->operands()); 921 NewAddOps.back() = Rest; 922 Rest = SE.getAddExpr(NewAddOps); 923 ExposePointerBase(Base, Rest, SE); 924 } 925 } 926 927 /// Determine if this is a well-behaved chain of instructions leading back to 928 /// the PHI. If so, it may be reused by expanded expressions. 929 bool SCEVExpander::isNormalAddRecExprPHI(PHINode *PN, Instruction *IncV, 930 const Loop *L) { 931 if (IncV->getNumOperands() == 0 || isa<PHINode>(IncV) || 932 (isa<CastInst>(IncV) && !isa<BitCastInst>(IncV))) 933 return false; 934 // If any of the operands don't dominate the insert position, bail. 935 // Addrec operands are always loop-invariant, so this can only happen 936 // if there are instructions which haven't been hoisted. 937 if (L == IVIncInsertLoop) { 938 for (Use &Op : llvm::drop_begin(IncV->operands())) 939 if (Instruction *OInst = dyn_cast<Instruction>(Op)) 940 if (!SE.DT.dominates(OInst, IVIncInsertPos)) 941 return false; 942 } 943 // Advance to the next instruction. 944 IncV = dyn_cast<Instruction>(IncV->getOperand(0)); 945 if (!IncV) 946 return false; 947 948 if (IncV->mayHaveSideEffects()) 949 return false; 950 951 if (IncV == PN) 952 return true; 953 954 return isNormalAddRecExprPHI(PN, IncV, L); 955 } 956 957 /// getIVIncOperand returns an induction variable increment's induction 958 /// variable operand. 959 /// 960 /// If allowScale is set, any type of GEP is allowed as long as the nonIV 961 /// operands dominate InsertPos. 962 /// 963 /// If allowScale is not set, ensure that a GEP increment conforms to one of the 964 /// simple patterns generated by getAddRecExprPHILiterally and 965 /// expandAddtoGEP. If the pattern isn't recognized, return NULL. 966 Instruction *SCEVExpander::getIVIncOperand(Instruction *IncV, 967 Instruction *InsertPos, 968 bool allowScale) { 969 if (IncV == InsertPos) 970 return nullptr; 971 972 switch (IncV->getOpcode()) { 973 default: 974 return nullptr; 975 // Check for a simple Add/Sub or GEP of a loop invariant step. 976 case Instruction::Add: 977 case Instruction::Sub: { 978 Instruction *OInst = dyn_cast<Instruction>(IncV->getOperand(1)); 979 if (!OInst || SE.DT.dominates(OInst, InsertPos)) 980 return dyn_cast<Instruction>(IncV->getOperand(0)); 981 return nullptr; 982 } 983 case Instruction::BitCast: 984 return dyn_cast<Instruction>(IncV->getOperand(0)); 985 case Instruction::GetElementPtr: 986 for (Use &U : llvm::drop_begin(IncV->operands())) { 987 if (isa<Constant>(U)) 988 continue; 989 if (Instruction *OInst = dyn_cast<Instruction>(U)) { 990 if (!SE.DT.dominates(OInst, InsertPos)) 991 return nullptr; 992 } 993 if (allowScale) { 994 // allow any kind of GEP as long as it can be hoisted. 995 continue; 996 } 997 // This must be a pointer addition of constants (pretty), which is already 998 // handled, or some number of address-size elements (ugly). Ugly geps 999 // have 2 operands. i1* is used by the expander to represent an 1000 // address-size element. 1001 if (IncV->getNumOperands() != 2) 1002 return nullptr; 1003 unsigned AS = cast<PointerType>(IncV->getType())->getAddressSpace(); 1004 if (IncV->getType() != Type::getInt1PtrTy(SE.getContext(), AS) 1005 && IncV->getType() != Type::getInt8PtrTy(SE.getContext(), AS)) 1006 return nullptr; 1007 break; 1008 } 1009 return dyn_cast<Instruction>(IncV->getOperand(0)); 1010 } 1011 } 1012 1013 /// If the insert point of the current builder or any of the builders on the 1014 /// stack of saved builders has 'I' as its insert point, update it to point to 1015 /// the instruction after 'I'. This is intended to be used when the instruction 1016 /// 'I' is being moved. If this fixup is not done and 'I' is moved to a 1017 /// different block, the inconsistent insert point (with a mismatched 1018 /// Instruction and Block) can lead to an instruction being inserted in a block 1019 /// other than its parent. 1020 void SCEVExpander::fixupInsertPoints(Instruction *I) { 1021 BasicBlock::iterator It(*I); 1022 BasicBlock::iterator NewInsertPt = std::next(It); 1023 if (Builder.GetInsertPoint() == It) 1024 Builder.SetInsertPoint(&*NewInsertPt); 1025 for (auto *InsertPtGuard : InsertPointGuards) 1026 if (InsertPtGuard->GetInsertPoint() == It) 1027 InsertPtGuard->SetInsertPoint(NewInsertPt); 1028 } 1029 1030 /// hoistStep - Attempt to hoist a simple IV increment above InsertPos to make 1031 /// it available to other uses in this loop. Recursively hoist any operands, 1032 /// until we reach a value that dominates InsertPos. 1033 bool SCEVExpander::hoistIVInc(Instruction *IncV, Instruction *InsertPos) { 1034 if (SE.DT.dominates(IncV, InsertPos)) 1035 return true; 1036 1037 // InsertPos must itself dominate IncV so that IncV's new position satisfies 1038 // its existing users. 1039 if (isa<PHINode>(InsertPos) || 1040 !SE.DT.dominates(InsertPos->getParent(), IncV->getParent())) 1041 return false; 1042 1043 if (!SE.LI.movementPreservesLCSSAForm(IncV, InsertPos)) 1044 return false; 1045 1046 // Check that the chain of IV operands leading back to Phi can be hoisted. 1047 SmallVector<Instruction*, 4> IVIncs; 1048 for(;;) { 1049 Instruction *Oper = getIVIncOperand(IncV, InsertPos, /*allowScale*/true); 1050 if (!Oper) 1051 return false; 1052 // IncV is safe to hoist. 1053 IVIncs.push_back(IncV); 1054 IncV = Oper; 1055 if (SE.DT.dominates(IncV, InsertPos)) 1056 break; 1057 } 1058 for (auto I = IVIncs.rbegin(), E = IVIncs.rend(); I != E; ++I) { 1059 fixupInsertPoints(*I); 1060 (*I)->moveBefore(InsertPos); 1061 } 1062 return true; 1063 } 1064 1065 /// Determine if this cyclic phi is in a form that would have been generated by 1066 /// LSR. We don't care if the phi was actually expanded in this pass, as long 1067 /// as it is in a low-cost form, for example, no implied multiplication. This 1068 /// should match any patterns generated by getAddRecExprPHILiterally and 1069 /// expandAddtoGEP. 1070 bool SCEVExpander::isExpandedAddRecExprPHI(PHINode *PN, Instruction *IncV, 1071 const Loop *L) { 1072 for(Instruction *IVOper = IncV; 1073 (IVOper = getIVIncOperand(IVOper, L->getLoopPreheader()->getTerminator(), 1074 /*allowScale=*/false));) { 1075 if (IVOper == PN) 1076 return true; 1077 } 1078 return false; 1079 } 1080 1081 /// expandIVInc - Expand an IV increment at Builder's current InsertPos. 1082 /// Typically this is the LatchBlock terminator or IVIncInsertPos, but we may 1083 /// need to materialize IV increments elsewhere to handle difficult situations. 1084 Value *SCEVExpander::expandIVInc(PHINode *PN, Value *StepV, const Loop *L, 1085 Type *ExpandTy, Type *IntTy, 1086 bool useSubtract) { 1087 Value *IncV; 1088 // If the PHI is a pointer, use a GEP, otherwise use an add or sub. 1089 if (ExpandTy->isPointerTy()) { 1090 PointerType *GEPPtrTy = cast<PointerType>(ExpandTy); 1091 // If the step isn't constant, don't use an implicitly scaled GEP, because 1092 // that would require a multiply inside the loop. 1093 if (!isa<ConstantInt>(StepV)) 1094 GEPPtrTy = PointerType::get(Type::getInt1Ty(SE.getContext()), 1095 GEPPtrTy->getAddressSpace()); 1096 IncV = expandAddToGEP(SE.getSCEV(StepV), GEPPtrTy, IntTy, PN); 1097 if (IncV->getType() != PN->getType()) 1098 IncV = Builder.CreateBitCast(IncV, PN->getType()); 1099 } else { 1100 IncV = useSubtract ? 1101 Builder.CreateSub(PN, StepV, Twine(IVName) + ".iv.next") : 1102 Builder.CreateAdd(PN, StepV, Twine(IVName) + ".iv.next"); 1103 } 1104 return IncV; 1105 } 1106 1107 /// Hoist the addrec instruction chain rooted in the loop phi above the 1108 /// position. This routine assumes that this is possible (has been checked). 1109 void SCEVExpander::hoistBeforePos(DominatorTree *DT, Instruction *InstToHoist, 1110 Instruction *Pos, PHINode *LoopPhi) { 1111 do { 1112 if (DT->dominates(InstToHoist, Pos)) 1113 break; 1114 // Make sure the increment is where we want it. But don't move it 1115 // down past a potential existing post-inc user. 1116 fixupInsertPoints(InstToHoist); 1117 InstToHoist->moveBefore(Pos); 1118 Pos = InstToHoist; 1119 InstToHoist = cast<Instruction>(InstToHoist->getOperand(0)); 1120 } while (InstToHoist != LoopPhi); 1121 } 1122 1123 /// Check whether we can cheaply express the requested SCEV in terms of 1124 /// the available PHI SCEV by truncation and/or inversion of the step. 1125 static bool canBeCheaplyTransformed(ScalarEvolution &SE, 1126 const SCEVAddRecExpr *Phi, 1127 const SCEVAddRecExpr *Requested, 1128 bool &InvertStep) { 1129 Type *PhiTy = SE.getEffectiveSCEVType(Phi->getType()); 1130 Type *RequestedTy = SE.getEffectiveSCEVType(Requested->getType()); 1131 1132 if (RequestedTy->getIntegerBitWidth() > PhiTy->getIntegerBitWidth()) 1133 return false; 1134 1135 // Try truncate it if necessary. 1136 Phi = dyn_cast<SCEVAddRecExpr>(SE.getTruncateOrNoop(Phi, RequestedTy)); 1137 if (!Phi) 1138 return false; 1139 1140 // Check whether truncation will help. 1141 if (Phi == Requested) { 1142 InvertStep = false; 1143 return true; 1144 } 1145 1146 // Check whether inverting will help: {R,+,-1} == R - {0,+,1}. 1147 if (SE.getAddExpr(Requested->getStart(), 1148 SE.getNegativeSCEV(Requested)) == Phi) { 1149 InvertStep = true; 1150 return true; 1151 } 1152 1153 return false; 1154 } 1155 1156 static bool IsIncrementNSW(ScalarEvolution &SE, const SCEVAddRecExpr *AR) { 1157 if (!isa<IntegerType>(AR->getType())) 1158 return false; 1159 1160 unsigned BitWidth = cast<IntegerType>(AR->getType())->getBitWidth(); 1161 Type *WideTy = IntegerType::get(AR->getType()->getContext(), BitWidth * 2); 1162 const SCEV *Step = AR->getStepRecurrence(SE); 1163 const SCEV *OpAfterExtend = SE.getAddExpr(SE.getSignExtendExpr(Step, WideTy), 1164 SE.getSignExtendExpr(AR, WideTy)); 1165 const SCEV *ExtendAfterOp = 1166 SE.getSignExtendExpr(SE.getAddExpr(AR, Step), WideTy); 1167 return ExtendAfterOp == OpAfterExtend; 1168 } 1169 1170 static bool IsIncrementNUW(ScalarEvolution &SE, const SCEVAddRecExpr *AR) { 1171 if (!isa<IntegerType>(AR->getType())) 1172 return false; 1173 1174 unsigned BitWidth = cast<IntegerType>(AR->getType())->getBitWidth(); 1175 Type *WideTy = IntegerType::get(AR->getType()->getContext(), BitWidth * 2); 1176 const SCEV *Step = AR->getStepRecurrence(SE); 1177 const SCEV *OpAfterExtend = SE.getAddExpr(SE.getZeroExtendExpr(Step, WideTy), 1178 SE.getZeroExtendExpr(AR, WideTy)); 1179 const SCEV *ExtendAfterOp = 1180 SE.getZeroExtendExpr(SE.getAddExpr(AR, Step), WideTy); 1181 return ExtendAfterOp == OpAfterExtend; 1182 } 1183 1184 /// getAddRecExprPHILiterally - Helper for expandAddRecExprLiterally. Expand 1185 /// the base addrec, which is the addrec without any non-loop-dominating 1186 /// values, and return the PHI. 1187 PHINode * 1188 SCEVExpander::getAddRecExprPHILiterally(const SCEVAddRecExpr *Normalized, 1189 const Loop *L, 1190 Type *ExpandTy, 1191 Type *IntTy, 1192 Type *&TruncTy, 1193 bool &InvertStep) { 1194 assert((!IVIncInsertLoop||IVIncInsertPos) && "Uninitialized insert position"); 1195 1196 // Reuse a previously-inserted PHI, if present. 1197 BasicBlock *LatchBlock = L->getLoopLatch(); 1198 if (LatchBlock) { 1199 PHINode *AddRecPhiMatch = nullptr; 1200 Instruction *IncV = nullptr; 1201 TruncTy = nullptr; 1202 InvertStep = false; 1203 1204 // Only try partially matching scevs that need truncation and/or 1205 // step-inversion if we know this loop is outside the current loop. 1206 bool TryNonMatchingSCEV = 1207 IVIncInsertLoop && 1208 SE.DT.properlyDominates(LatchBlock, IVIncInsertLoop->getHeader()); 1209 1210 for (PHINode &PN : L->getHeader()->phis()) { 1211 if (!SE.isSCEVable(PN.getType())) 1212 continue; 1213 1214 // We should not look for a incomplete PHI. Getting SCEV for a incomplete 1215 // PHI has no meaning at all. 1216 if (!PN.isComplete()) { 1217 DEBUG_WITH_TYPE( 1218 DebugType, dbgs() << "One incomplete PHI is found: " << PN << "\n"); 1219 continue; 1220 } 1221 1222 const SCEVAddRecExpr *PhiSCEV = dyn_cast<SCEVAddRecExpr>(SE.getSCEV(&PN)); 1223 if (!PhiSCEV) 1224 continue; 1225 1226 bool IsMatchingSCEV = PhiSCEV == Normalized; 1227 // We only handle truncation and inversion of phi recurrences for the 1228 // expanded expression if the expanded expression's loop dominates the 1229 // loop we insert to. Check now, so we can bail out early. 1230 if (!IsMatchingSCEV && !TryNonMatchingSCEV) 1231 continue; 1232 1233 // TODO: this possibly can be reworked to avoid this cast at all. 1234 Instruction *TempIncV = 1235 dyn_cast<Instruction>(PN.getIncomingValueForBlock(LatchBlock)); 1236 if (!TempIncV) 1237 continue; 1238 1239 // Check whether we can reuse this PHI node. 1240 if (LSRMode) { 1241 if (!isExpandedAddRecExprPHI(&PN, TempIncV, L)) 1242 continue; 1243 if (L == IVIncInsertLoop && !hoistIVInc(TempIncV, IVIncInsertPos)) 1244 continue; 1245 } else { 1246 if (!isNormalAddRecExprPHI(&PN, TempIncV, L)) 1247 continue; 1248 } 1249 1250 // Stop if we have found an exact match SCEV. 1251 if (IsMatchingSCEV) { 1252 IncV = TempIncV; 1253 TruncTy = nullptr; 1254 InvertStep = false; 1255 AddRecPhiMatch = &PN; 1256 break; 1257 } 1258 1259 // Try whether the phi can be translated into the requested form 1260 // (truncated and/or offset by a constant). 1261 if ((!TruncTy || InvertStep) && 1262 canBeCheaplyTransformed(SE, PhiSCEV, Normalized, InvertStep)) { 1263 // Record the phi node. But don't stop we might find an exact match 1264 // later. 1265 AddRecPhiMatch = &PN; 1266 IncV = TempIncV; 1267 TruncTy = SE.getEffectiveSCEVType(Normalized->getType()); 1268 } 1269 } 1270 1271 if (AddRecPhiMatch) { 1272 // Potentially, move the increment. We have made sure in 1273 // isExpandedAddRecExprPHI or hoistIVInc that this is possible. 1274 if (L == IVIncInsertLoop) 1275 hoistBeforePos(&SE.DT, IncV, IVIncInsertPos, AddRecPhiMatch); 1276 1277 // Ok, the add recurrence looks usable. 1278 // Remember this PHI, even in post-inc mode. 1279 InsertedValues.insert(AddRecPhiMatch); 1280 // Remember the increment. 1281 rememberInstruction(IncV); 1282 // Those values were not actually inserted but re-used. 1283 ReusedValues.insert(AddRecPhiMatch); 1284 ReusedValues.insert(IncV); 1285 return AddRecPhiMatch; 1286 } 1287 } 1288 1289 // Save the original insertion point so we can restore it when we're done. 1290 SCEVInsertPointGuard Guard(Builder, this); 1291 1292 // Another AddRec may need to be recursively expanded below. For example, if 1293 // this AddRec is quadratic, the StepV may itself be an AddRec in this 1294 // loop. Remove this loop from the PostIncLoops set before expanding such 1295 // AddRecs. Otherwise, we cannot find a valid position for the step 1296 // (i.e. StepV can never dominate its loop header). Ideally, we could do 1297 // SavedIncLoops.swap(PostIncLoops), but we generally have a single element, 1298 // so it's not worth implementing SmallPtrSet::swap. 1299 PostIncLoopSet SavedPostIncLoops = PostIncLoops; 1300 PostIncLoops.clear(); 1301 1302 // Expand code for the start value into the loop preheader. 1303 assert(L->getLoopPreheader() && 1304 "Can't expand add recurrences without a loop preheader!"); 1305 Value *StartV = 1306 expandCodeForImpl(Normalized->getStart(), ExpandTy, 1307 L->getLoopPreheader()->getTerminator(), false); 1308 1309 // StartV must have been be inserted into L's preheader to dominate the new 1310 // phi. 1311 assert(!isa<Instruction>(StartV) || 1312 SE.DT.properlyDominates(cast<Instruction>(StartV)->getParent(), 1313 L->getHeader())); 1314 1315 // Expand code for the step value. Do this before creating the PHI so that PHI 1316 // reuse code doesn't see an incomplete PHI. 1317 const SCEV *Step = Normalized->getStepRecurrence(SE); 1318 // If the stride is negative, insert a sub instead of an add for the increment 1319 // (unless it's a constant, because subtracts of constants are canonicalized 1320 // to adds). 1321 bool useSubtract = !ExpandTy->isPointerTy() && Step->isNonConstantNegative(); 1322 if (useSubtract) 1323 Step = SE.getNegativeSCEV(Step); 1324 // Expand the step somewhere that dominates the loop header. 1325 Value *StepV = expandCodeForImpl( 1326 Step, IntTy, &*L->getHeader()->getFirstInsertionPt(), false); 1327 1328 // The no-wrap behavior proved by IsIncrement(NUW|NSW) is only applicable if 1329 // we actually do emit an addition. It does not apply if we emit a 1330 // subtraction. 1331 bool IncrementIsNUW = !useSubtract && IsIncrementNUW(SE, Normalized); 1332 bool IncrementIsNSW = !useSubtract && IsIncrementNSW(SE, Normalized); 1333 1334 // Create the PHI. 1335 BasicBlock *Header = L->getHeader(); 1336 Builder.SetInsertPoint(Header, Header->begin()); 1337 pred_iterator HPB = pred_begin(Header), HPE = pred_end(Header); 1338 PHINode *PN = Builder.CreatePHI(ExpandTy, std::distance(HPB, HPE), 1339 Twine(IVName) + ".iv"); 1340 1341 // Create the step instructions and populate the PHI. 1342 for (pred_iterator HPI = HPB; HPI != HPE; ++HPI) { 1343 BasicBlock *Pred = *HPI; 1344 1345 // Add a start value. 1346 if (!L->contains(Pred)) { 1347 PN->addIncoming(StartV, Pred); 1348 continue; 1349 } 1350 1351 // Create a step value and add it to the PHI. 1352 // If IVIncInsertLoop is non-null and equal to the addrec's loop, insert the 1353 // instructions at IVIncInsertPos. 1354 Instruction *InsertPos = L == IVIncInsertLoop ? 1355 IVIncInsertPos : Pred->getTerminator(); 1356 Builder.SetInsertPoint(InsertPos); 1357 Value *IncV = expandIVInc(PN, StepV, L, ExpandTy, IntTy, useSubtract); 1358 1359 if (isa<OverflowingBinaryOperator>(IncV)) { 1360 if (IncrementIsNUW) 1361 cast<BinaryOperator>(IncV)->setHasNoUnsignedWrap(); 1362 if (IncrementIsNSW) 1363 cast<BinaryOperator>(IncV)->setHasNoSignedWrap(); 1364 } 1365 PN->addIncoming(IncV, Pred); 1366 } 1367 1368 // After expanding subexpressions, restore the PostIncLoops set so the caller 1369 // can ensure that IVIncrement dominates the current uses. 1370 PostIncLoops = SavedPostIncLoops; 1371 1372 // Remember this PHI, even in post-inc mode. 1373 InsertedValues.insert(PN); 1374 1375 return PN; 1376 } 1377 1378 Value *SCEVExpander::expandAddRecExprLiterally(const SCEVAddRecExpr *S) { 1379 Type *STy = S->getType(); 1380 Type *IntTy = SE.getEffectiveSCEVType(STy); 1381 const Loop *L = S->getLoop(); 1382 1383 // Determine a normalized form of this expression, which is the expression 1384 // before any post-inc adjustment is made. 1385 const SCEVAddRecExpr *Normalized = S; 1386 if (PostIncLoops.count(L)) { 1387 PostIncLoopSet Loops; 1388 Loops.insert(L); 1389 Normalized = cast<SCEVAddRecExpr>(normalizeForPostIncUse(S, Loops, SE)); 1390 } 1391 1392 // Strip off any non-loop-dominating component from the addrec start. 1393 const SCEV *Start = Normalized->getStart(); 1394 const SCEV *PostLoopOffset = nullptr; 1395 if (!SE.properlyDominates(Start, L->getHeader())) { 1396 PostLoopOffset = Start; 1397 Start = SE.getConstant(Normalized->getType(), 0); 1398 Normalized = cast<SCEVAddRecExpr>( 1399 SE.getAddRecExpr(Start, Normalized->getStepRecurrence(SE), 1400 Normalized->getLoop(), 1401 Normalized->getNoWrapFlags(SCEV::FlagNW))); 1402 } 1403 1404 // Strip off any non-loop-dominating component from the addrec step. 1405 const SCEV *Step = Normalized->getStepRecurrence(SE); 1406 const SCEV *PostLoopScale = nullptr; 1407 if (!SE.dominates(Step, L->getHeader())) { 1408 PostLoopScale = Step; 1409 Step = SE.getConstant(Normalized->getType(), 1); 1410 if (!Start->isZero()) { 1411 // The normalization below assumes that Start is constant zero, so if 1412 // it isn't re-associate Start to PostLoopOffset. 1413 assert(!PostLoopOffset && "Start not-null but PostLoopOffset set?"); 1414 PostLoopOffset = Start; 1415 Start = SE.getConstant(Normalized->getType(), 0); 1416 } 1417 Normalized = 1418 cast<SCEVAddRecExpr>(SE.getAddRecExpr( 1419 Start, Step, Normalized->getLoop(), 1420 Normalized->getNoWrapFlags(SCEV::FlagNW))); 1421 } 1422 1423 // Expand the core addrec. If we need post-loop scaling, force it to 1424 // expand to an integer type to avoid the need for additional casting. 1425 Type *ExpandTy = PostLoopScale ? IntTy : STy; 1426 // We can't use a pointer type for the addrec if the pointer type is 1427 // non-integral. 1428 Type *AddRecPHIExpandTy = 1429 DL.isNonIntegralPointerType(STy) ? Normalized->getType() : ExpandTy; 1430 1431 // In some cases, we decide to reuse an existing phi node but need to truncate 1432 // it and/or invert the step. 1433 Type *TruncTy = nullptr; 1434 bool InvertStep = false; 1435 PHINode *PN = getAddRecExprPHILiterally(Normalized, L, AddRecPHIExpandTy, 1436 IntTy, TruncTy, InvertStep); 1437 1438 // Accommodate post-inc mode, if necessary. 1439 Value *Result; 1440 if (!PostIncLoops.count(L)) 1441 Result = PN; 1442 else { 1443 // In PostInc mode, use the post-incremented value. 1444 BasicBlock *LatchBlock = L->getLoopLatch(); 1445 assert(LatchBlock && "PostInc mode requires a unique loop latch!"); 1446 Result = PN->getIncomingValueForBlock(LatchBlock); 1447 1448 // We might be introducing a new use of the post-inc IV that is not poison 1449 // safe, in which case we should drop poison generating flags. Only keep 1450 // those flags for which SCEV has proven that they always hold. 1451 if (isa<OverflowingBinaryOperator>(Result)) { 1452 auto *I = cast<Instruction>(Result); 1453 if (!S->hasNoUnsignedWrap()) 1454 I->setHasNoUnsignedWrap(false); 1455 if (!S->hasNoSignedWrap()) 1456 I->setHasNoSignedWrap(false); 1457 } 1458 1459 // For an expansion to use the postinc form, the client must call 1460 // expandCodeFor with an InsertPoint that is either outside the PostIncLoop 1461 // or dominated by IVIncInsertPos. 1462 if (isa<Instruction>(Result) && 1463 !SE.DT.dominates(cast<Instruction>(Result), 1464 &*Builder.GetInsertPoint())) { 1465 // The induction variable's postinc expansion does not dominate this use. 1466 // IVUsers tries to prevent this case, so it is rare. However, it can 1467 // happen when an IVUser outside the loop is not dominated by the latch 1468 // block. Adjusting IVIncInsertPos before expansion begins cannot handle 1469 // all cases. Consider a phi outside whose operand is replaced during 1470 // expansion with the value of the postinc user. Without fundamentally 1471 // changing the way postinc users are tracked, the only remedy is 1472 // inserting an extra IV increment. StepV might fold into PostLoopOffset, 1473 // but hopefully expandCodeFor handles that. 1474 bool useSubtract = 1475 !ExpandTy->isPointerTy() && Step->isNonConstantNegative(); 1476 if (useSubtract) 1477 Step = SE.getNegativeSCEV(Step); 1478 Value *StepV; 1479 { 1480 // Expand the step somewhere that dominates the loop header. 1481 SCEVInsertPointGuard Guard(Builder, this); 1482 StepV = expandCodeForImpl( 1483 Step, IntTy, &*L->getHeader()->getFirstInsertionPt(), false); 1484 } 1485 Result = expandIVInc(PN, StepV, L, ExpandTy, IntTy, useSubtract); 1486 } 1487 } 1488 1489 // We have decided to reuse an induction variable of a dominating loop. Apply 1490 // truncation and/or inversion of the step. 1491 if (TruncTy) { 1492 Type *ResTy = Result->getType(); 1493 // Normalize the result type. 1494 if (ResTy != SE.getEffectiveSCEVType(ResTy)) 1495 Result = InsertNoopCastOfTo(Result, SE.getEffectiveSCEVType(ResTy)); 1496 // Truncate the result. 1497 if (TruncTy != Result->getType()) 1498 Result = Builder.CreateTrunc(Result, TruncTy); 1499 1500 // Invert the result. 1501 if (InvertStep) 1502 Result = Builder.CreateSub( 1503 expandCodeForImpl(Normalized->getStart(), TruncTy, false), Result); 1504 } 1505 1506 // Re-apply any non-loop-dominating scale. 1507 if (PostLoopScale) { 1508 assert(S->isAffine() && "Can't linearly scale non-affine recurrences."); 1509 Result = InsertNoopCastOfTo(Result, IntTy); 1510 Result = Builder.CreateMul(Result, 1511 expandCodeForImpl(PostLoopScale, IntTy, false)); 1512 } 1513 1514 // Re-apply any non-loop-dominating offset. 1515 if (PostLoopOffset) { 1516 if (PointerType *PTy = dyn_cast<PointerType>(ExpandTy)) { 1517 if (Result->getType()->isIntegerTy()) { 1518 Value *Base = expandCodeForImpl(PostLoopOffset, ExpandTy, false); 1519 Result = expandAddToGEP(SE.getUnknown(Result), PTy, IntTy, Base); 1520 } else { 1521 Result = expandAddToGEP(PostLoopOffset, PTy, IntTy, Result); 1522 } 1523 } else { 1524 Result = InsertNoopCastOfTo(Result, IntTy); 1525 Result = Builder.CreateAdd( 1526 Result, expandCodeForImpl(PostLoopOffset, IntTy, false)); 1527 } 1528 } 1529 1530 return Result; 1531 } 1532 1533 Value *SCEVExpander::visitAddRecExpr(const SCEVAddRecExpr *S) { 1534 // In canonical mode we compute the addrec as an expression of a canonical IV 1535 // using evaluateAtIteration and expand the resulting SCEV expression. This 1536 // way we avoid introducing new IVs to carry on the comutation of the addrec 1537 // throughout the loop. 1538 // 1539 // For nested addrecs evaluateAtIteration might need a canonical IV of a 1540 // type wider than the addrec itself. Emitting a canonical IV of the 1541 // proper type might produce non-legal types, for example expanding an i64 1542 // {0,+,2,+,1} addrec would need an i65 canonical IV. To avoid this just fall 1543 // back to non-canonical mode for nested addrecs. 1544 if (!CanonicalMode || (S->getNumOperands() > 2)) 1545 return expandAddRecExprLiterally(S); 1546 1547 Type *Ty = SE.getEffectiveSCEVType(S->getType()); 1548 const Loop *L = S->getLoop(); 1549 1550 // First check for an existing canonical IV in a suitable type. 1551 PHINode *CanonicalIV = nullptr; 1552 if (PHINode *PN = L->getCanonicalInductionVariable()) 1553 if (SE.getTypeSizeInBits(PN->getType()) >= SE.getTypeSizeInBits(Ty)) 1554 CanonicalIV = PN; 1555 1556 // Rewrite an AddRec in terms of the canonical induction variable, if 1557 // its type is more narrow. 1558 if (CanonicalIV && 1559 SE.getTypeSizeInBits(CanonicalIV->getType()) > 1560 SE.getTypeSizeInBits(Ty)) { 1561 SmallVector<const SCEV *, 4> NewOps(S->getNumOperands()); 1562 for (unsigned i = 0, e = S->getNumOperands(); i != e; ++i) 1563 NewOps[i] = SE.getAnyExtendExpr(S->op_begin()[i], CanonicalIV->getType()); 1564 Value *V = expand(SE.getAddRecExpr(NewOps, S->getLoop(), 1565 S->getNoWrapFlags(SCEV::FlagNW))); 1566 BasicBlock::iterator NewInsertPt = 1567 findInsertPointAfter(cast<Instruction>(V), &*Builder.GetInsertPoint()); 1568 V = expandCodeForImpl(SE.getTruncateExpr(SE.getUnknown(V), Ty), nullptr, 1569 &*NewInsertPt, false); 1570 return V; 1571 } 1572 1573 // {X,+,F} --> X + {0,+,F} 1574 if (!S->getStart()->isZero()) { 1575 SmallVector<const SCEV *, 4> NewOps(S->operands()); 1576 NewOps[0] = SE.getConstant(Ty, 0); 1577 const SCEV *Rest = SE.getAddRecExpr(NewOps, L, 1578 S->getNoWrapFlags(SCEV::FlagNW)); 1579 1580 // Turn things like ptrtoint+arithmetic+inttoptr into GEP. See the 1581 // comments on expandAddToGEP for details. 1582 const SCEV *Base = S->getStart(); 1583 // Dig into the expression to find the pointer base for a GEP. 1584 const SCEV *ExposedRest = Rest; 1585 ExposePointerBase(Base, ExposedRest, SE); 1586 // If we found a pointer, expand the AddRec with a GEP. 1587 if (PointerType *PTy = dyn_cast<PointerType>(Base->getType())) { 1588 // Make sure the Base isn't something exotic, such as a multiplied 1589 // or divided pointer value. In those cases, the result type isn't 1590 // actually a pointer type. 1591 if (!isa<SCEVMulExpr>(Base) && !isa<SCEVUDivExpr>(Base)) { 1592 Value *StartV = expand(Base); 1593 assert(StartV->getType() == PTy && "Pointer type mismatch for GEP!"); 1594 return expandAddToGEP(ExposedRest, PTy, Ty, StartV); 1595 } 1596 } 1597 1598 // Just do a normal add. Pre-expand the operands to suppress folding. 1599 // 1600 // The LHS and RHS values are factored out of the expand call to make the 1601 // output independent of the argument evaluation order. 1602 const SCEV *AddExprLHS = SE.getUnknown(expand(S->getStart())); 1603 const SCEV *AddExprRHS = SE.getUnknown(expand(Rest)); 1604 return expand(SE.getAddExpr(AddExprLHS, AddExprRHS)); 1605 } 1606 1607 // If we don't yet have a canonical IV, create one. 1608 if (!CanonicalIV) { 1609 // Create and insert the PHI node for the induction variable in the 1610 // specified loop. 1611 BasicBlock *Header = L->getHeader(); 1612 pred_iterator HPB = pred_begin(Header), HPE = pred_end(Header); 1613 CanonicalIV = PHINode::Create(Ty, std::distance(HPB, HPE), "indvar", 1614 &Header->front()); 1615 rememberInstruction(CanonicalIV); 1616 1617 SmallSet<BasicBlock *, 4> PredSeen; 1618 Constant *One = ConstantInt::get(Ty, 1); 1619 for (pred_iterator HPI = HPB; HPI != HPE; ++HPI) { 1620 BasicBlock *HP = *HPI; 1621 if (!PredSeen.insert(HP).second) { 1622 // There must be an incoming value for each predecessor, even the 1623 // duplicates! 1624 CanonicalIV->addIncoming(CanonicalIV->getIncomingValueForBlock(HP), HP); 1625 continue; 1626 } 1627 1628 if (L->contains(HP)) { 1629 // Insert a unit add instruction right before the terminator 1630 // corresponding to the back-edge. 1631 Instruction *Add = BinaryOperator::CreateAdd(CanonicalIV, One, 1632 "indvar.next", 1633 HP->getTerminator()); 1634 Add->setDebugLoc(HP->getTerminator()->getDebugLoc()); 1635 rememberInstruction(Add); 1636 CanonicalIV->addIncoming(Add, HP); 1637 } else { 1638 CanonicalIV->addIncoming(Constant::getNullValue(Ty), HP); 1639 } 1640 } 1641 } 1642 1643 // {0,+,1} --> Insert a canonical induction variable into the loop! 1644 if (S->isAffine() && S->getOperand(1)->isOne()) { 1645 assert(Ty == SE.getEffectiveSCEVType(CanonicalIV->getType()) && 1646 "IVs with types different from the canonical IV should " 1647 "already have been handled!"); 1648 return CanonicalIV; 1649 } 1650 1651 // {0,+,F} --> {0,+,1} * F 1652 1653 // If this is a simple linear addrec, emit it now as a special case. 1654 if (S->isAffine()) // {0,+,F} --> i*F 1655 return 1656 expand(SE.getTruncateOrNoop( 1657 SE.getMulExpr(SE.getUnknown(CanonicalIV), 1658 SE.getNoopOrAnyExtend(S->getOperand(1), 1659 CanonicalIV->getType())), 1660 Ty)); 1661 1662 // If this is a chain of recurrences, turn it into a closed form, using the 1663 // folders, then expandCodeFor the closed form. This allows the folders to 1664 // simplify the expression without having to build a bunch of special code 1665 // into this folder. 1666 const SCEV *IH = SE.getUnknown(CanonicalIV); // Get I as a "symbolic" SCEV. 1667 1668 // Promote S up to the canonical IV type, if the cast is foldable. 1669 const SCEV *NewS = S; 1670 const SCEV *Ext = SE.getNoopOrAnyExtend(S, CanonicalIV->getType()); 1671 if (isa<SCEVAddRecExpr>(Ext)) 1672 NewS = Ext; 1673 1674 const SCEV *V = cast<SCEVAddRecExpr>(NewS)->evaluateAtIteration(IH, SE); 1675 //cerr << "Evaluated: " << *this << "\n to: " << *V << "\n"; 1676 1677 // Truncate the result down to the original type, if needed. 1678 const SCEV *T = SE.getTruncateOrNoop(V, Ty); 1679 return expand(T); 1680 } 1681 1682 Value *SCEVExpander::visitPtrToIntExpr(const SCEVPtrToIntExpr *S) { 1683 Value *V = 1684 expandCodeForImpl(S->getOperand(), S->getOperand()->getType(), false); 1685 return Builder.CreatePtrToInt(V, S->getType()); 1686 } 1687 1688 Value *SCEVExpander::visitTruncateExpr(const SCEVTruncateExpr *S) { 1689 Type *Ty = SE.getEffectiveSCEVType(S->getType()); 1690 Value *V = expandCodeForImpl( 1691 S->getOperand(), SE.getEffectiveSCEVType(S->getOperand()->getType()), 1692 false); 1693 return Builder.CreateTrunc(V, Ty); 1694 } 1695 1696 Value *SCEVExpander::visitZeroExtendExpr(const SCEVZeroExtendExpr *S) { 1697 Type *Ty = SE.getEffectiveSCEVType(S->getType()); 1698 Value *V = expandCodeForImpl( 1699 S->getOperand(), SE.getEffectiveSCEVType(S->getOperand()->getType()), 1700 false); 1701 return Builder.CreateZExt(V, Ty); 1702 } 1703 1704 Value *SCEVExpander::visitSignExtendExpr(const SCEVSignExtendExpr *S) { 1705 Type *Ty = SE.getEffectiveSCEVType(S->getType()); 1706 Value *V = expandCodeForImpl( 1707 S->getOperand(), SE.getEffectiveSCEVType(S->getOperand()->getType()), 1708 false); 1709 return Builder.CreateSExt(V, Ty); 1710 } 1711 1712 Value *SCEVExpander::visitSMaxExpr(const SCEVSMaxExpr *S) { 1713 Value *LHS = expand(S->getOperand(S->getNumOperands()-1)); 1714 Type *Ty = LHS->getType(); 1715 for (int i = S->getNumOperands()-2; i >= 0; --i) { 1716 // In the case of mixed integer and pointer types, do the 1717 // rest of the comparisons as integer. 1718 Type *OpTy = S->getOperand(i)->getType(); 1719 if (OpTy->isIntegerTy() != Ty->isIntegerTy()) { 1720 Ty = SE.getEffectiveSCEVType(Ty); 1721 LHS = InsertNoopCastOfTo(LHS, Ty); 1722 } 1723 Value *RHS = expandCodeForImpl(S->getOperand(i), Ty, false); 1724 Value *Sel; 1725 if (Ty->isIntegerTy()) 1726 Sel = Builder.CreateIntrinsic(Intrinsic::smax, {Ty}, {LHS, RHS}, 1727 /*FMFSource=*/nullptr, "smax"); 1728 else { 1729 Value *ICmp = Builder.CreateICmpSGT(LHS, RHS); 1730 Sel = Builder.CreateSelect(ICmp, LHS, RHS, "smax"); 1731 } 1732 LHS = Sel; 1733 } 1734 // In the case of mixed integer and pointer types, cast the 1735 // final result back to the pointer type. 1736 if (LHS->getType() != S->getType()) 1737 LHS = InsertNoopCastOfTo(LHS, S->getType()); 1738 return LHS; 1739 } 1740 1741 Value *SCEVExpander::visitUMaxExpr(const SCEVUMaxExpr *S) { 1742 Value *LHS = expand(S->getOperand(S->getNumOperands()-1)); 1743 Type *Ty = LHS->getType(); 1744 for (int i = S->getNumOperands()-2; i >= 0; --i) { 1745 // In the case of mixed integer and pointer types, do the 1746 // rest of the comparisons as integer. 1747 Type *OpTy = S->getOperand(i)->getType(); 1748 if (OpTy->isIntegerTy() != Ty->isIntegerTy()) { 1749 Ty = SE.getEffectiveSCEVType(Ty); 1750 LHS = InsertNoopCastOfTo(LHS, Ty); 1751 } 1752 Value *RHS = expandCodeForImpl(S->getOperand(i), Ty, false); 1753 Value *Sel; 1754 if (Ty->isIntegerTy()) 1755 Sel = Builder.CreateIntrinsic(Intrinsic::umax, {Ty}, {LHS, RHS}, 1756 /*FMFSource=*/nullptr, "umax"); 1757 else { 1758 Value *ICmp = Builder.CreateICmpUGT(LHS, RHS); 1759 Sel = Builder.CreateSelect(ICmp, LHS, RHS, "umax"); 1760 } 1761 LHS = Sel; 1762 } 1763 // In the case of mixed integer and pointer types, cast the 1764 // final result back to the pointer type. 1765 if (LHS->getType() != S->getType()) 1766 LHS = InsertNoopCastOfTo(LHS, S->getType()); 1767 return LHS; 1768 } 1769 1770 Value *SCEVExpander::visitSMinExpr(const SCEVSMinExpr *S) { 1771 Value *LHS = expand(S->getOperand(S->getNumOperands() - 1)); 1772 Type *Ty = LHS->getType(); 1773 for (int i = S->getNumOperands() - 2; i >= 0; --i) { 1774 // In the case of mixed integer and pointer types, do the 1775 // rest of the comparisons as integer. 1776 Type *OpTy = S->getOperand(i)->getType(); 1777 if (OpTy->isIntegerTy() != Ty->isIntegerTy()) { 1778 Ty = SE.getEffectiveSCEVType(Ty); 1779 LHS = InsertNoopCastOfTo(LHS, Ty); 1780 } 1781 Value *RHS = expandCodeForImpl(S->getOperand(i), Ty, false); 1782 Value *Sel; 1783 if (Ty->isIntegerTy()) 1784 Sel = Builder.CreateIntrinsic(Intrinsic::smin, {Ty}, {LHS, RHS}, 1785 /*FMFSource=*/nullptr, "smin"); 1786 else { 1787 Value *ICmp = Builder.CreateICmpSLT(LHS, RHS); 1788 Sel = Builder.CreateSelect(ICmp, LHS, RHS, "smin"); 1789 } 1790 LHS = Sel; 1791 } 1792 // In the case of mixed integer and pointer types, cast the 1793 // final result back to the pointer type. 1794 if (LHS->getType() != S->getType()) 1795 LHS = InsertNoopCastOfTo(LHS, S->getType()); 1796 return LHS; 1797 } 1798 1799 Value *SCEVExpander::visitUMinExpr(const SCEVUMinExpr *S) { 1800 Value *LHS = expand(S->getOperand(S->getNumOperands() - 1)); 1801 Type *Ty = LHS->getType(); 1802 for (int i = S->getNumOperands() - 2; i >= 0; --i) { 1803 // In the case of mixed integer and pointer types, do the 1804 // rest of the comparisons as integer. 1805 Type *OpTy = S->getOperand(i)->getType(); 1806 if (OpTy->isIntegerTy() != Ty->isIntegerTy()) { 1807 Ty = SE.getEffectiveSCEVType(Ty); 1808 LHS = InsertNoopCastOfTo(LHS, Ty); 1809 } 1810 Value *RHS = expandCodeForImpl(S->getOperand(i), Ty, false); 1811 Value *Sel; 1812 if (Ty->isIntegerTy()) 1813 Sel = Builder.CreateIntrinsic(Intrinsic::umin, {Ty}, {LHS, RHS}, 1814 /*FMFSource=*/nullptr, "umin"); 1815 else { 1816 Value *ICmp = Builder.CreateICmpULT(LHS, RHS); 1817 Sel = Builder.CreateSelect(ICmp, LHS, RHS, "umin"); 1818 } 1819 LHS = Sel; 1820 } 1821 // In the case of mixed integer and pointer types, cast the 1822 // final result back to the pointer type. 1823 if (LHS->getType() != S->getType()) 1824 LHS = InsertNoopCastOfTo(LHS, S->getType()); 1825 return LHS; 1826 } 1827 1828 Value *SCEVExpander::expandCodeForImpl(const SCEV *SH, Type *Ty, 1829 Instruction *IP, bool Root) { 1830 setInsertPoint(IP); 1831 Value *V = expandCodeForImpl(SH, Ty, Root); 1832 return V; 1833 } 1834 1835 Value *SCEVExpander::expandCodeForImpl(const SCEV *SH, Type *Ty, bool Root) { 1836 // Expand the code for this SCEV. 1837 Value *V = expand(SH); 1838 1839 if (PreserveLCSSA) { 1840 if (auto *Inst = dyn_cast<Instruction>(V)) { 1841 // Create a temporary instruction to at the current insertion point, so we 1842 // can hand it off to the helper to create LCSSA PHIs if required for the 1843 // new use. 1844 // FIXME: Ideally formLCSSAForInstructions (used in fixupLCSSAFormFor) 1845 // would accept a insertion point and return an LCSSA phi for that 1846 // insertion point, so there is no need to insert & remove the temporary 1847 // instruction. 1848 Instruction *Tmp; 1849 if (Inst->getType()->isIntegerTy()) 1850 Tmp = 1851 cast<Instruction>(Builder.CreateAdd(Inst, Inst, "tmp.lcssa.user")); 1852 else { 1853 assert(Inst->getType()->isPointerTy()); 1854 Tmp = cast<Instruction>( 1855 Builder.CreateGEP(Inst, Builder.getInt32(1), "tmp.lcssa.user")); 1856 } 1857 V = fixupLCSSAFormFor(Tmp, 0); 1858 1859 // Clean up temporary instruction. 1860 InsertedValues.erase(Tmp); 1861 InsertedPostIncValues.erase(Tmp); 1862 Tmp->eraseFromParent(); 1863 } 1864 } 1865 1866 InsertedExpressions[std::make_pair(SH, &*Builder.GetInsertPoint())] = V; 1867 if (Ty) { 1868 assert(SE.getTypeSizeInBits(Ty) == SE.getTypeSizeInBits(SH->getType()) && 1869 "non-trivial casts should be done with the SCEVs directly!"); 1870 V = InsertNoopCastOfTo(V, Ty); 1871 } 1872 return V; 1873 } 1874 1875 ScalarEvolution::ValueOffsetPair 1876 SCEVExpander::FindValueInExprValueMap(const SCEV *S, 1877 const Instruction *InsertPt) { 1878 SetVector<ScalarEvolution::ValueOffsetPair> *Set = SE.getSCEVValues(S); 1879 // If the expansion is not in CanonicalMode, and the SCEV contains any 1880 // sub scAddRecExpr type SCEV, it is required to expand the SCEV literally. 1881 if (CanonicalMode || !SE.containsAddRecurrence(S)) { 1882 // If S is scConstant, it may be worse to reuse an existing Value. 1883 if (S->getSCEVType() != scConstant && Set) { 1884 // Choose a Value from the set which dominates the insertPt. 1885 // insertPt should be inside the Value's parent loop so as not to break 1886 // the LCSSA form. 1887 for (auto const &VOPair : *Set) { 1888 Value *V = VOPair.first; 1889 ConstantInt *Offset = VOPair.second; 1890 Instruction *EntInst = nullptr; 1891 if (V && isa<Instruction>(V) && (EntInst = cast<Instruction>(V)) && 1892 S->getType() == V->getType() && 1893 EntInst->getFunction() == InsertPt->getFunction() && 1894 SE.DT.dominates(EntInst, InsertPt) && 1895 (SE.LI.getLoopFor(EntInst->getParent()) == nullptr || 1896 SE.LI.getLoopFor(EntInst->getParent())->contains(InsertPt))) 1897 return {V, Offset}; 1898 } 1899 } 1900 } 1901 return {nullptr, nullptr}; 1902 } 1903 1904 // The expansion of SCEV will either reuse a previous Value in ExprValueMap, 1905 // or expand the SCEV literally. Specifically, if the expansion is in LSRMode, 1906 // and the SCEV contains any sub scAddRecExpr type SCEV, it will be expanded 1907 // literally, to prevent LSR's transformed SCEV from being reverted. Otherwise, 1908 // the expansion will try to reuse Value from ExprValueMap, and only when it 1909 // fails, expand the SCEV literally. 1910 Value *SCEVExpander::expand(const SCEV *S) { 1911 // Compute an insertion point for this SCEV object. Hoist the instructions 1912 // as far out in the loop nest as possible. 1913 Instruction *InsertPt = &*Builder.GetInsertPoint(); 1914 1915 // We can move insertion point only if there is no div or rem operations 1916 // otherwise we are risky to move it over the check for zero denominator. 1917 auto SafeToHoist = [](const SCEV *S) { 1918 return !SCEVExprContains(S, [](const SCEV *S) { 1919 if (const auto *D = dyn_cast<SCEVUDivExpr>(S)) { 1920 if (const auto *SC = dyn_cast<SCEVConstant>(D->getRHS())) 1921 // Division by non-zero constants can be hoisted. 1922 return SC->getValue()->isZero(); 1923 // All other divisions should not be moved as they may be 1924 // divisions by zero and should be kept within the 1925 // conditions of the surrounding loops that guard their 1926 // execution (see PR35406). 1927 return true; 1928 } 1929 return false; 1930 }); 1931 }; 1932 if (SafeToHoist(S)) { 1933 for (Loop *L = SE.LI.getLoopFor(Builder.GetInsertBlock());; 1934 L = L->getParentLoop()) { 1935 if (SE.isLoopInvariant(S, L)) { 1936 if (!L) break; 1937 if (BasicBlock *Preheader = L->getLoopPreheader()) 1938 InsertPt = Preheader->getTerminator(); 1939 else 1940 // LSR sets the insertion point for AddRec start/step values to the 1941 // block start to simplify value reuse, even though it's an invalid 1942 // position. SCEVExpander must correct for this in all cases. 1943 InsertPt = &*L->getHeader()->getFirstInsertionPt(); 1944 } else { 1945 // If the SCEV is computable at this level, insert it into the header 1946 // after the PHIs (and after any other instructions that we've inserted 1947 // there) so that it is guaranteed to dominate any user inside the loop. 1948 if (L && SE.hasComputableLoopEvolution(S, L) && !PostIncLoops.count(L)) 1949 InsertPt = &*L->getHeader()->getFirstInsertionPt(); 1950 1951 while (InsertPt->getIterator() != Builder.GetInsertPoint() && 1952 (isInsertedInstruction(InsertPt) || 1953 isa<DbgInfoIntrinsic>(InsertPt))) { 1954 InsertPt = &*std::next(InsertPt->getIterator()); 1955 } 1956 break; 1957 } 1958 } 1959 } 1960 1961 // Check to see if we already expanded this here. 1962 auto I = InsertedExpressions.find(std::make_pair(S, InsertPt)); 1963 if (I != InsertedExpressions.end()) 1964 return I->second; 1965 1966 SCEVInsertPointGuard Guard(Builder, this); 1967 Builder.SetInsertPoint(InsertPt); 1968 1969 // Expand the expression into instructions. 1970 ScalarEvolution::ValueOffsetPair VO = FindValueInExprValueMap(S, InsertPt); 1971 Value *V = VO.first; 1972 1973 if (!V) 1974 V = visit(S); 1975 else if (VO.second) { 1976 if (PointerType *Vty = dyn_cast<PointerType>(V->getType())) { 1977 Type *Ety = Vty->getPointerElementType(); 1978 int64_t Offset = VO.second->getSExtValue(); 1979 int64_t ESize = SE.getTypeSizeInBits(Ety); 1980 if ((Offset * 8) % ESize == 0) { 1981 ConstantInt *Idx = 1982 ConstantInt::getSigned(VO.second->getType(), -(Offset * 8) / ESize); 1983 V = Builder.CreateGEP(Ety, V, Idx, "scevgep"); 1984 } else { 1985 ConstantInt *Idx = 1986 ConstantInt::getSigned(VO.second->getType(), -Offset); 1987 unsigned AS = Vty->getAddressSpace(); 1988 V = Builder.CreateBitCast(V, Type::getInt8PtrTy(SE.getContext(), AS)); 1989 V = Builder.CreateGEP(Type::getInt8Ty(SE.getContext()), V, Idx, 1990 "uglygep"); 1991 V = Builder.CreateBitCast(V, Vty); 1992 } 1993 } else { 1994 V = Builder.CreateSub(V, VO.second); 1995 } 1996 } 1997 // Remember the expanded value for this SCEV at this location. 1998 // 1999 // This is independent of PostIncLoops. The mapped value simply materializes 2000 // the expression at this insertion point. If the mapped value happened to be 2001 // a postinc expansion, it could be reused by a non-postinc user, but only if 2002 // its insertion point was already at the head of the loop. 2003 InsertedExpressions[std::make_pair(S, InsertPt)] = V; 2004 return V; 2005 } 2006 2007 void SCEVExpander::rememberInstruction(Value *I) { 2008 auto DoInsert = [this](Value *V) { 2009 if (!PostIncLoops.empty()) 2010 InsertedPostIncValues.insert(V); 2011 else 2012 InsertedValues.insert(V); 2013 }; 2014 DoInsert(I); 2015 2016 if (!PreserveLCSSA) 2017 return; 2018 2019 if (auto *Inst = dyn_cast<Instruction>(I)) { 2020 // A new instruction has been added, which might introduce new uses outside 2021 // a defining loop. Fix LCSSA from for each operand of the new instruction, 2022 // if required. 2023 for (unsigned OpIdx = 0, OpEnd = Inst->getNumOperands(); OpIdx != OpEnd; 2024 OpIdx++) 2025 fixupLCSSAFormFor(Inst, OpIdx); 2026 } 2027 } 2028 2029 /// replaceCongruentIVs - Check for congruent phis in this loop header and 2030 /// replace them with their most canonical representative. Return the number of 2031 /// phis eliminated. 2032 /// 2033 /// This does not depend on any SCEVExpander state but should be used in 2034 /// the same context that SCEVExpander is used. 2035 unsigned 2036 SCEVExpander::replaceCongruentIVs(Loop *L, const DominatorTree *DT, 2037 SmallVectorImpl<WeakTrackingVH> &DeadInsts, 2038 const TargetTransformInfo *TTI) { 2039 // Find integer phis in order of increasing width. 2040 SmallVector<PHINode*, 8> Phis; 2041 for (PHINode &PN : L->getHeader()->phis()) 2042 Phis.push_back(&PN); 2043 2044 if (TTI) 2045 llvm::sort(Phis, [](Value *LHS, Value *RHS) { 2046 // Put pointers at the back and make sure pointer < pointer = false. 2047 if (!LHS->getType()->isIntegerTy() || !RHS->getType()->isIntegerTy()) 2048 return RHS->getType()->isIntegerTy() && !LHS->getType()->isIntegerTy(); 2049 return RHS->getType()->getPrimitiveSizeInBits().getFixedSize() < 2050 LHS->getType()->getPrimitiveSizeInBits().getFixedSize(); 2051 }); 2052 2053 unsigned NumElim = 0; 2054 DenseMap<const SCEV *, PHINode *> ExprToIVMap; 2055 // Process phis from wide to narrow. Map wide phis to their truncation 2056 // so narrow phis can reuse them. 2057 for (PHINode *Phi : Phis) { 2058 auto SimplifyPHINode = [&](PHINode *PN) -> Value * { 2059 if (Value *V = SimplifyInstruction(PN, {DL, &SE.TLI, &SE.DT, &SE.AC})) 2060 return V; 2061 if (!SE.isSCEVable(PN->getType())) 2062 return nullptr; 2063 auto *Const = dyn_cast<SCEVConstant>(SE.getSCEV(PN)); 2064 if (!Const) 2065 return nullptr; 2066 return Const->getValue(); 2067 }; 2068 2069 // Fold constant phis. They may be congruent to other constant phis and 2070 // would confuse the logic below that expects proper IVs. 2071 if (Value *V = SimplifyPHINode(Phi)) { 2072 if (V->getType() != Phi->getType()) 2073 continue; 2074 Phi->replaceAllUsesWith(V); 2075 DeadInsts.emplace_back(Phi); 2076 ++NumElim; 2077 DEBUG_WITH_TYPE(DebugType, dbgs() 2078 << "INDVARS: Eliminated constant iv: " << *Phi << '\n'); 2079 continue; 2080 } 2081 2082 if (!SE.isSCEVable(Phi->getType())) 2083 continue; 2084 2085 PHINode *&OrigPhiRef = ExprToIVMap[SE.getSCEV(Phi)]; 2086 if (!OrigPhiRef) { 2087 OrigPhiRef = Phi; 2088 if (Phi->getType()->isIntegerTy() && TTI && 2089 TTI->isTruncateFree(Phi->getType(), Phis.back()->getType())) { 2090 // This phi can be freely truncated to the narrowest phi type. Map the 2091 // truncated expression to it so it will be reused for narrow types. 2092 const SCEV *TruncExpr = 2093 SE.getTruncateExpr(SE.getSCEV(Phi), Phis.back()->getType()); 2094 ExprToIVMap[TruncExpr] = Phi; 2095 } 2096 continue; 2097 } 2098 2099 // Replacing a pointer phi with an integer phi or vice-versa doesn't make 2100 // sense. 2101 if (OrigPhiRef->getType()->isPointerTy() != Phi->getType()->isPointerTy()) 2102 continue; 2103 2104 if (BasicBlock *LatchBlock = L->getLoopLatch()) { 2105 Instruction *OrigInc = dyn_cast<Instruction>( 2106 OrigPhiRef->getIncomingValueForBlock(LatchBlock)); 2107 Instruction *IsomorphicInc = 2108 dyn_cast<Instruction>(Phi->getIncomingValueForBlock(LatchBlock)); 2109 2110 if (OrigInc && IsomorphicInc) { 2111 // If this phi has the same width but is more canonical, replace the 2112 // original with it. As part of the "more canonical" determination, 2113 // respect a prior decision to use an IV chain. 2114 if (OrigPhiRef->getType() == Phi->getType() && 2115 !(ChainedPhis.count(Phi) || 2116 isExpandedAddRecExprPHI(OrigPhiRef, OrigInc, L)) && 2117 (ChainedPhis.count(Phi) || 2118 isExpandedAddRecExprPHI(Phi, IsomorphicInc, L))) { 2119 std::swap(OrigPhiRef, Phi); 2120 std::swap(OrigInc, IsomorphicInc); 2121 } 2122 // Replacing the congruent phi is sufficient because acyclic 2123 // redundancy elimination, CSE/GVN, should handle the 2124 // rest. However, once SCEV proves that a phi is congruent, 2125 // it's often the head of an IV user cycle that is isomorphic 2126 // with the original phi. It's worth eagerly cleaning up the 2127 // common case of a single IV increment so that DeleteDeadPHIs 2128 // can remove cycles that had postinc uses. 2129 const SCEV *TruncExpr = 2130 SE.getTruncateOrNoop(SE.getSCEV(OrigInc), IsomorphicInc->getType()); 2131 if (OrigInc != IsomorphicInc && 2132 TruncExpr == SE.getSCEV(IsomorphicInc) && 2133 SE.LI.replacementPreservesLCSSAForm(IsomorphicInc, OrigInc) && 2134 hoistIVInc(OrigInc, IsomorphicInc)) { 2135 DEBUG_WITH_TYPE(DebugType, 2136 dbgs() << "INDVARS: Eliminated congruent iv.inc: " 2137 << *IsomorphicInc << '\n'); 2138 Value *NewInc = OrigInc; 2139 if (OrigInc->getType() != IsomorphicInc->getType()) { 2140 Instruction *IP = nullptr; 2141 if (PHINode *PN = dyn_cast<PHINode>(OrigInc)) 2142 IP = &*PN->getParent()->getFirstInsertionPt(); 2143 else 2144 IP = OrigInc->getNextNode(); 2145 2146 IRBuilder<> Builder(IP); 2147 Builder.SetCurrentDebugLocation(IsomorphicInc->getDebugLoc()); 2148 NewInc = Builder.CreateTruncOrBitCast( 2149 OrigInc, IsomorphicInc->getType(), IVName); 2150 } 2151 IsomorphicInc->replaceAllUsesWith(NewInc); 2152 DeadInsts.emplace_back(IsomorphicInc); 2153 } 2154 } 2155 } 2156 DEBUG_WITH_TYPE(DebugType, dbgs() << "INDVARS: Eliminated congruent iv: " 2157 << *Phi << '\n'); 2158 DEBUG_WITH_TYPE(DebugType, dbgs() << "INDVARS: Original iv: " 2159 << *OrigPhiRef << '\n'); 2160 ++NumElim; 2161 Value *NewIV = OrigPhiRef; 2162 if (OrigPhiRef->getType() != Phi->getType()) { 2163 IRBuilder<> Builder(&*L->getHeader()->getFirstInsertionPt()); 2164 Builder.SetCurrentDebugLocation(Phi->getDebugLoc()); 2165 NewIV = Builder.CreateTruncOrBitCast(OrigPhiRef, Phi->getType(), IVName); 2166 } 2167 Phi->replaceAllUsesWith(NewIV); 2168 DeadInsts.emplace_back(Phi); 2169 } 2170 return NumElim; 2171 } 2172 2173 Optional<ScalarEvolution::ValueOffsetPair> 2174 SCEVExpander::getRelatedExistingExpansion(const SCEV *S, const Instruction *At, 2175 Loop *L) { 2176 using namespace llvm::PatternMatch; 2177 2178 SmallVector<BasicBlock *, 4> ExitingBlocks; 2179 L->getExitingBlocks(ExitingBlocks); 2180 2181 // Look for suitable value in simple conditions at the loop exits. 2182 for (BasicBlock *BB : ExitingBlocks) { 2183 ICmpInst::Predicate Pred; 2184 Instruction *LHS, *RHS; 2185 2186 if (!match(BB->getTerminator(), 2187 m_Br(m_ICmp(Pred, m_Instruction(LHS), m_Instruction(RHS)), 2188 m_BasicBlock(), m_BasicBlock()))) 2189 continue; 2190 2191 if (SE.getSCEV(LHS) == S && SE.DT.dominates(LHS, At)) 2192 return ScalarEvolution::ValueOffsetPair(LHS, nullptr); 2193 2194 if (SE.getSCEV(RHS) == S && SE.DT.dominates(RHS, At)) 2195 return ScalarEvolution::ValueOffsetPair(RHS, nullptr); 2196 } 2197 2198 // Use expand's logic which is used for reusing a previous Value in 2199 // ExprValueMap. 2200 ScalarEvolution::ValueOffsetPair VO = FindValueInExprValueMap(S, At); 2201 if (VO.first) 2202 return VO; 2203 2204 // There is potential to make this significantly smarter, but this simple 2205 // heuristic already gets some interesting cases. 2206 2207 // Can not find suitable value. 2208 return None; 2209 } 2210 2211 template<typename T> static InstructionCost costAndCollectOperands( 2212 const SCEVOperand &WorkItem, const TargetTransformInfo &TTI, 2213 TargetTransformInfo::TargetCostKind CostKind, 2214 SmallVectorImpl<SCEVOperand> &Worklist) { 2215 2216 const T *S = cast<T>(WorkItem.S); 2217 InstructionCost Cost = 0; 2218 // Object to help map SCEV operands to expanded IR instructions. 2219 struct OperationIndices { 2220 OperationIndices(unsigned Opc, size_t min, size_t max) : 2221 Opcode(Opc), MinIdx(min), MaxIdx(max) { } 2222 unsigned Opcode; 2223 size_t MinIdx; 2224 size_t MaxIdx; 2225 }; 2226 2227 // Collect the operations of all the instructions that will be needed to 2228 // expand the SCEVExpr. This is so that when we come to cost the operands, 2229 // we know what the generated user(s) will be. 2230 SmallVector<OperationIndices, 2> Operations; 2231 2232 auto CastCost = [&](unsigned Opcode) -> InstructionCost { 2233 Operations.emplace_back(Opcode, 0, 0); 2234 return TTI.getCastInstrCost(Opcode, S->getType(), 2235 S->getOperand(0)->getType(), 2236 TTI::CastContextHint::None, CostKind); 2237 }; 2238 2239 auto ArithCost = [&](unsigned Opcode, unsigned NumRequired, 2240 unsigned MinIdx = 0, 2241 unsigned MaxIdx = 1) -> InstructionCost { 2242 Operations.emplace_back(Opcode, MinIdx, MaxIdx); 2243 return NumRequired * 2244 TTI.getArithmeticInstrCost(Opcode, S->getType(), CostKind); 2245 }; 2246 2247 auto CmpSelCost = [&](unsigned Opcode, unsigned NumRequired, unsigned MinIdx, 2248 unsigned MaxIdx) -> InstructionCost { 2249 Operations.emplace_back(Opcode, MinIdx, MaxIdx); 2250 Type *OpType = S->getOperand(0)->getType(); 2251 return NumRequired * TTI.getCmpSelInstrCost( 2252 Opcode, OpType, CmpInst::makeCmpResultType(OpType), 2253 CmpInst::BAD_ICMP_PREDICATE, CostKind); 2254 }; 2255 2256 switch (S->getSCEVType()) { 2257 case scCouldNotCompute: 2258 llvm_unreachable("Attempt to use a SCEVCouldNotCompute object!"); 2259 case scUnknown: 2260 case scConstant: 2261 return 0; 2262 case scPtrToInt: 2263 Cost = CastCost(Instruction::PtrToInt); 2264 break; 2265 case scTruncate: 2266 Cost = CastCost(Instruction::Trunc); 2267 break; 2268 case scZeroExtend: 2269 Cost = CastCost(Instruction::ZExt); 2270 break; 2271 case scSignExtend: 2272 Cost = CastCost(Instruction::SExt); 2273 break; 2274 case scUDivExpr: { 2275 unsigned Opcode = Instruction::UDiv; 2276 if (auto *SC = dyn_cast<SCEVConstant>(S->getOperand(1))) 2277 if (SC->getAPInt().isPowerOf2()) 2278 Opcode = Instruction::LShr; 2279 Cost = ArithCost(Opcode, 1); 2280 break; 2281 } 2282 case scAddExpr: 2283 Cost = ArithCost(Instruction::Add, S->getNumOperands() - 1); 2284 break; 2285 case scMulExpr: 2286 // TODO: this is a very pessimistic cost modelling for Mul, 2287 // because of Bin Pow algorithm actually used by the expander, 2288 // see SCEVExpander::visitMulExpr(), ExpandOpBinPowN(). 2289 Cost = ArithCost(Instruction::Mul, S->getNumOperands() - 1); 2290 break; 2291 case scSMaxExpr: 2292 case scUMaxExpr: 2293 case scSMinExpr: 2294 case scUMinExpr: { 2295 // FIXME: should this ask the cost for Intrinsic's? 2296 Cost += CmpSelCost(Instruction::ICmp, S->getNumOperands() - 1, 0, 1); 2297 Cost += CmpSelCost(Instruction::Select, S->getNumOperands() - 1, 0, 2); 2298 break; 2299 } 2300 case scAddRecExpr: { 2301 // In this polynominal, we may have some zero operands, and we shouldn't 2302 // really charge for those. So how many non-zero coeffients are there? 2303 int NumTerms = llvm::count_if(S->operands(), [](const SCEV *Op) { 2304 return !Op->isZero(); 2305 }); 2306 2307 assert(NumTerms >= 1 && "Polynominal should have at least one term."); 2308 assert(!(*std::prev(S->operands().end()))->isZero() && 2309 "Last operand should not be zero"); 2310 2311 // Ignoring constant term (operand 0), how many of the coeffients are u> 1? 2312 int NumNonZeroDegreeNonOneTerms = 2313 llvm::count_if(S->operands(), [](const SCEV *Op) { 2314 auto *SConst = dyn_cast<SCEVConstant>(Op); 2315 return !SConst || SConst->getAPInt().ugt(1); 2316 }); 2317 2318 // Much like with normal add expr, the polynominal will require 2319 // one less addition than the number of it's terms. 2320 InstructionCost AddCost = ArithCost(Instruction::Add, NumTerms - 1, 2321 /*MinIdx*/ 1, /*MaxIdx*/ 1); 2322 // Here, *each* one of those will require a multiplication. 2323 InstructionCost MulCost = 2324 ArithCost(Instruction::Mul, NumNonZeroDegreeNonOneTerms); 2325 Cost = AddCost + MulCost; 2326 2327 // What is the degree of this polynominal? 2328 int PolyDegree = S->getNumOperands() - 1; 2329 assert(PolyDegree >= 1 && "Should be at least affine."); 2330 2331 // The final term will be: 2332 // Op_{PolyDegree} * x ^ {PolyDegree} 2333 // Where x ^ {PolyDegree} will again require PolyDegree-1 mul operations. 2334 // Note that x ^ {PolyDegree} = x * x ^ {PolyDegree-1} so charging for 2335 // x ^ {PolyDegree} will give us x ^ {2} .. x ^ {PolyDegree-1} for free. 2336 // FIXME: this is conservatively correct, but might be overly pessimistic. 2337 Cost += MulCost * (PolyDegree - 1); 2338 break; 2339 } 2340 } 2341 2342 for (auto &CostOp : Operations) { 2343 for (auto SCEVOp : enumerate(S->operands())) { 2344 // Clamp the index to account for multiple IR operations being chained. 2345 size_t MinIdx = std::max(SCEVOp.index(), CostOp.MinIdx); 2346 size_t OpIdx = std::min(MinIdx, CostOp.MaxIdx); 2347 Worklist.emplace_back(CostOp.Opcode, OpIdx, SCEVOp.value()); 2348 } 2349 } 2350 return Cost; 2351 } 2352 2353 bool SCEVExpander::isHighCostExpansionHelper( 2354 const SCEVOperand &WorkItem, Loop *L, const Instruction &At, 2355 InstructionCost &Cost, unsigned Budget, const TargetTransformInfo &TTI, 2356 SmallPtrSetImpl<const SCEV *> &Processed, 2357 SmallVectorImpl<SCEVOperand> &Worklist) { 2358 if (Cost > Budget) 2359 return true; // Already run out of budget, give up. 2360 2361 const SCEV *S = WorkItem.S; 2362 // Was the cost of expansion of this expression already accounted for? 2363 if (!isa<SCEVConstant>(S) && !Processed.insert(S).second) 2364 return false; // We have already accounted for this expression. 2365 2366 // If we can find an existing value for this scev available at the point "At" 2367 // then consider the expression cheap. 2368 if (getRelatedExistingExpansion(S, &At, L)) 2369 return false; // Consider the expression to be free. 2370 2371 TargetTransformInfo::TargetCostKind CostKind = 2372 L->getHeader()->getParent()->hasMinSize() 2373 ? TargetTransformInfo::TCK_CodeSize 2374 : TargetTransformInfo::TCK_RecipThroughput; 2375 2376 switch (S->getSCEVType()) { 2377 case scCouldNotCompute: 2378 llvm_unreachable("Attempt to use a SCEVCouldNotCompute object!"); 2379 case scUnknown: 2380 // Assume to be zero-cost. 2381 return false; 2382 case scConstant: { 2383 // Only evalulate the costs of constants when optimizing for size. 2384 if (CostKind != TargetTransformInfo::TCK_CodeSize) 2385 return 0; 2386 const APInt &Imm = cast<SCEVConstant>(S)->getAPInt(); 2387 Type *Ty = S->getType(); 2388 Cost += TTI.getIntImmCostInst( 2389 WorkItem.ParentOpcode, WorkItem.OperandIdx, Imm, Ty, CostKind); 2390 return Cost > Budget; 2391 } 2392 case scTruncate: 2393 case scPtrToInt: 2394 case scZeroExtend: 2395 case scSignExtend: { 2396 Cost += 2397 costAndCollectOperands<SCEVCastExpr>(WorkItem, TTI, CostKind, Worklist); 2398 return false; // Will answer upon next entry into this function. 2399 } 2400 case scUDivExpr: { 2401 // UDivExpr is very likely a UDiv that ScalarEvolution's HowFarToZero or 2402 // HowManyLessThans produced to compute a precise expression, rather than a 2403 // UDiv from the user's code. If we can't find a UDiv in the code with some 2404 // simple searching, we need to account for it's cost. 2405 2406 // At the beginning of this function we already tried to find existing 2407 // value for plain 'S'. Now try to lookup 'S + 1' since it is common 2408 // pattern involving division. This is just a simple search heuristic. 2409 if (getRelatedExistingExpansion( 2410 SE.getAddExpr(S, SE.getConstant(S->getType(), 1)), &At, L)) 2411 return false; // Consider it to be free. 2412 2413 Cost += 2414 costAndCollectOperands<SCEVUDivExpr>(WorkItem, TTI, CostKind, Worklist); 2415 return false; // Will answer upon next entry into this function. 2416 } 2417 case scAddExpr: 2418 case scMulExpr: 2419 case scUMaxExpr: 2420 case scSMaxExpr: 2421 case scUMinExpr: 2422 case scSMinExpr: { 2423 assert(cast<SCEVNAryExpr>(S)->getNumOperands() > 1 && 2424 "Nary expr should have more than 1 operand."); 2425 // The simple nary expr will require one less op (or pair of ops) 2426 // than the number of it's terms. 2427 Cost += 2428 costAndCollectOperands<SCEVNAryExpr>(WorkItem, TTI, CostKind, Worklist); 2429 return Cost > Budget; 2430 } 2431 case scAddRecExpr: { 2432 assert(cast<SCEVAddRecExpr>(S)->getNumOperands() >= 2 && 2433 "Polynomial should be at least linear"); 2434 Cost += costAndCollectOperands<SCEVAddRecExpr>( 2435 WorkItem, TTI, CostKind, Worklist); 2436 return Cost > Budget; 2437 } 2438 } 2439 llvm_unreachable("Unknown SCEV kind!"); 2440 } 2441 2442 Value *SCEVExpander::expandCodeForPredicate(const SCEVPredicate *Pred, 2443 Instruction *IP) { 2444 assert(IP); 2445 switch (Pred->getKind()) { 2446 case SCEVPredicate::P_Union: 2447 return expandUnionPredicate(cast<SCEVUnionPredicate>(Pred), IP); 2448 case SCEVPredicate::P_Equal: 2449 return expandEqualPredicate(cast<SCEVEqualPredicate>(Pred), IP); 2450 case SCEVPredicate::P_Wrap: { 2451 auto *AddRecPred = cast<SCEVWrapPredicate>(Pred); 2452 return expandWrapPredicate(AddRecPred, IP); 2453 } 2454 } 2455 llvm_unreachable("Unknown SCEV predicate type"); 2456 } 2457 2458 Value *SCEVExpander::expandEqualPredicate(const SCEVEqualPredicate *Pred, 2459 Instruction *IP) { 2460 Value *Expr0 = 2461 expandCodeForImpl(Pred->getLHS(), Pred->getLHS()->getType(), IP, false); 2462 Value *Expr1 = 2463 expandCodeForImpl(Pred->getRHS(), Pred->getRHS()->getType(), IP, false); 2464 2465 Builder.SetInsertPoint(IP); 2466 auto *I = Builder.CreateICmpNE(Expr0, Expr1, "ident.check"); 2467 return I; 2468 } 2469 2470 Value *SCEVExpander::generateOverflowCheck(const SCEVAddRecExpr *AR, 2471 Instruction *Loc, bool Signed) { 2472 assert(AR->isAffine() && "Cannot generate RT check for " 2473 "non-affine expression"); 2474 2475 SCEVUnionPredicate Pred; 2476 const SCEV *ExitCount = 2477 SE.getPredicatedBackedgeTakenCount(AR->getLoop(), Pred); 2478 2479 assert(!isa<SCEVCouldNotCompute>(ExitCount) && "Invalid loop count"); 2480 2481 const SCEV *Step = AR->getStepRecurrence(SE); 2482 const SCEV *Start = AR->getStart(); 2483 2484 Type *ARTy = AR->getType(); 2485 unsigned SrcBits = SE.getTypeSizeInBits(ExitCount->getType()); 2486 unsigned DstBits = SE.getTypeSizeInBits(ARTy); 2487 2488 // The expression {Start,+,Step} has nusw/nssw if 2489 // Step < 0, Start - |Step| * Backedge <= Start 2490 // Step >= 0, Start + |Step| * Backedge > Start 2491 // and |Step| * Backedge doesn't unsigned overflow. 2492 2493 IntegerType *CountTy = IntegerType::get(Loc->getContext(), SrcBits); 2494 Builder.SetInsertPoint(Loc); 2495 Value *TripCountVal = expandCodeForImpl(ExitCount, CountTy, Loc, false); 2496 2497 IntegerType *Ty = 2498 IntegerType::get(Loc->getContext(), SE.getTypeSizeInBits(ARTy)); 2499 Type *ARExpandTy = DL.isNonIntegralPointerType(ARTy) ? ARTy : Ty; 2500 2501 Value *StepValue = expandCodeForImpl(Step, Ty, Loc, false); 2502 Value *NegStepValue = 2503 expandCodeForImpl(SE.getNegativeSCEV(Step), Ty, Loc, false); 2504 Value *StartValue = expandCodeForImpl( 2505 isa<PointerType>(ARExpandTy) ? Start 2506 : SE.getPtrToIntExpr(Start, ARExpandTy), 2507 ARExpandTy, Loc, false); 2508 2509 ConstantInt *Zero = 2510 ConstantInt::get(Loc->getContext(), APInt::getNullValue(DstBits)); 2511 2512 Builder.SetInsertPoint(Loc); 2513 // Compute |Step| 2514 Value *StepCompare = Builder.CreateICmp(ICmpInst::ICMP_SLT, StepValue, Zero); 2515 Value *AbsStep = Builder.CreateSelect(StepCompare, NegStepValue, StepValue); 2516 2517 // Get the backedge taken count and truncate or extended to the AR type. 2518 Value *TruncTripCount = Builder.CreateZExtOrTrunc(TripCountVal, Ty); 2519 auto *MulF = Intrinsic::getDeclaration(Loc->getModule(), 2520 Intrinsic::umul_with_overflow, Ty); 2521 2522 // Compute |Step| * Backedge 2523 CallInst *Mul = Builder.CreateCall(MulF, {AbsStep, TruncTripCount}, "mul"); 2524 Value *MulV = Builder.CreateExtractValue(Mul, 0, "mul.result"); 2525 Value *OfMul = Builder.CreateExtractValue(Mul, 1, "mul.overflow"); 2526 2527 // Compute: 2528 // Start + |Step| * Backedge < Start 2529 // Start - |Step| * Backedge > Start 2530 Value *Add = nullptr, *Sub = nullptr; 2531 if (PointerType *ARPtrTy = dyn_cast<PointerType>(ARExpandTy)) { 2532 const SCEV *MulS = SE.getSCEV(MulV); 2533 const SCEV *NegMulS = SE.getNegativeSCEV(MulS); 2534 Add = Builder.CreateBitCast(expandAddToGEP(MulS, ARPtrTy, Ty, StartValue), 2535 ARPtrTy); 2536 Sub = Builder.CreateBitCast( 2537 expandAddToGEP(NegMulS, ARPtrTy, Ty, StartValue), ARPtrTy); 2538 } else { 2539 Add = Builder.CreateAdd(StartValue, MulV); 2540 Sub = Builder.CreateSub(StartValue, MulV); 2541 } 2542 2543 Value *EndCompareGT = Builder.CreateICmp( 2544 Signed ? ICmpInst::ICMP_SGT : ICmpInst::ICMP_UGT, Sub, StartValue); 2545 2546 Value *EndCompareLT = Builder.CreateICmp( 2547 Signed ? ICmpInst::ICMP_SLT : ICmpInst::ICMP_ULT, Add, StartValue); 2548 2549 // Select the answer based on the sign of Step. 2550 Value *EndCheck = 2551 Builder.CreateSelect(StepCompare, EndCompareGT, EndCompareLT); 2552 2553 // If the backedge taken count type is larger than the AR type, 2554 // check that we don't drop any bits by truncating it. If we are 2555 // dropping bits, then we have overflow (unless the step is zero). 2556 if (SE.getTypeSizeInBits(CountTy) > SE.getTypeSizeInBits(Ty)) { 2557 auto MaxVal = APInt::getMaxValue(DstBits).zext(SrcBits); 2558 auto *BackedgeCheck = 2559 Builder.CreateICmp(ICmpInst::ICMP_UGT, TripCountVal, 2560 ConstantInt::get(Loc->getContext(), MaxVal)); 2561 BackedgeCheck = Builder.CreateAnd( 2562 BackedgeCheck, Builder.CreateICmp(ICmpInst::ICMP_NE, StepValue, Zero)); 2563 2564 EndCheck = Builder.CreateOr(EndCheck, BackedgeCheck); 2565 } 2566 2567 return Builder.CreateOr(EndCheck, OfMul); 2568 } 2569 2570 Value *SCEVExpander::expandWrapPredicate(const SCEVWrapPredicate *Pred, 2571 Instruction *IP) { 2572 const auto *A = cast<SCEVAddRecExpr>(Pred->getExpr()); 2573 Value *NSSWCheck = nullptr, *NUSWCheck = nullptr; 2574 2575 // Add a check for NUSW 2576 if (Pred->getFlags() & SCEVWrapPredicate::IncrementNUSW) 2577 NUSWCheck = generateOverflowCheck(A, IP, false); 2578 2579 // Add a check for NSSW 2580 if (Pred->getFlags() & SCEVWrapPredicate::IncrementNSSW) 2581 NSSWCheck = generateOverflowCheck(A, IP, true); 2582 2583 if (NUSWCheck && NSSWCheck) 2584 return Builder.CreateOr(NUSWCheck, NSSWCheck); 2585 2586 if (NUSWCheck) 2587 return NUSWCheck; 2588 2589 if (NSSWCheck) 2590 return NSSWCheck; 2591 2592 return ConstantInt::getFalse(IP->getContext()); 2593 } 2594 2595 Value *SCEVExpander::expandUnionPredicate(const SCEVUnionPredicate *Union, 2596 Instruction *IP) { 2597 auto *BoolType = IntegerType::get(IP->getContext(), 1); 2598 Value *Check = ConstantInt::getNullValue(BoolType); 2599 2600 // Loop over all checks in this set. 2601 for (auto Pred : Union->getPredicates()) { 2602 auto *NextCheck = expandCodeForPredicate(Pred, IP); 2603 Builder.SetInsertPoint(IP); 2604 Check = Builder.CreateOr(Check, NextCheck); 2605 } 2606 2607 return Check; 2608 } 2609 2610 Value *SCEVExpander::fixupLCSSAFormFor(Instruction *User, unsigned OpIdx) { 2611 assert(PreserveLCSSA); 2612 SmallVector<Instruction *, 1> ToUpdate; 2613 2614 auto *OpV = User->getOperand(OpIdx); 2615 auto *OpI = dyn_cast<Instruction>(OpV); 2616 if (!OpI) 2617 return OpV; 2618 2619 Loop *DefLoop = SE.LI.getLoopFor(OpI->getParent()); 2620 Loop *UseLoop = SE.LI.getLoopFor(User->getParent()); 2621 if (!DefLoop || UseLoop == DefLoop || DefLoop->contains(UseLoop)) 2622 return OpV; 2623 2624 ToUpdate.push_back(OpI); 2625 SmallVector<PHINode *, 16> PHIsToRemove; 2626 formLCSSAForInstructions(ToUpdate, SE.DT, SE.LI, &SE, Builder, &PHIsToRemove); 2627 for (PHINode *PN : PHIsToRemove) { 2628 if (!PN->use_empty()) 2629 continue; 2630 InsertedValues.erase(PN); 2631 InsertedPostIncValues.erase(PN); 2632 PN->eraseFromParent(); 2633 } 2634 2635 return User->getOperand(OpIdx); 2636 } 2637 2638 namespace { 2639 // Search for a SCEV subexpression that is not safe to expand. Any expression 2640 // that may expand to a !isSafeToSpeculativelyExecute value is unsafe, namely 2641 // UDiv expressions. We don't know if the UDiv is derived from an IR divide 2642 // instruction, but the important thing is that we prove the denominator is 2643 // nonzero before expansion. 2644 // 2645 // IVUsers already checks that IV-derived expressions are safe. So this check is 2646 // only needed when the expression includes some subexpression that is not IV 2647 // derived. 2648 // 2649 // Currently, we only allow division by a nonzero constant here. If this is 2650 // inadequate, we could easily allow division by SCEVUnknown by using 2651 // ValueTracking to check isKnownNonZero(). 2652 // 2653 // We cannot generally expand recurrences unless the step dominates the loop 2654 // header. The expander handles the special case of affine recurrences by 2655 // scaling the recurrence outside the loop, but this technique isn't generally 2656 // applicable. Expanding a nested recurrence outside a loop requires computing 2657 // binomial coefficients. This could be done, but the recurrence has to be in a 2658 // perfectly reduced form, which can't be guaranteed. 2659 struct SCEVFindUnsafe { 2660 ScalarEvolution &SE; 2661 bool IsUnsafe; 2662 2663 SCEVFindUnsafe(ScalarEvolution &se): SE(se), IsUnsafe(false) {} 2664 2665 bool follow(const SCEV *S) { 2666 if (const SCEVUDivExpr *D = dyn_cast<SCEVUDivExpr>(S)) { 2667 const SCEVConstant *SC = dyn_cast<SCEVConstant>(D->getRHS()); 2668 if (!SC || SC->getValue()->isZero()) { 2669 IsUnsafe = true; 2670 return false; 2671 } 2672 } 2673 if (const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(S)) { 2674 const SCEV *Step = AR->getStepRecurrence(SE); 2675 if (!AR->isAffine() && !SE.dominates(Step, AR->getLoop()->getHeader())) { 2676 IsUnsafe = true; 2677 return false; 2678 } 2679 } 2680 return true; 2681 } 2682 bool isDone() const { return IsUnsafe; } 2683 }; 2684 } 2685 2686 namespace llvm { 2687 bool isSafeToExpand(const SCEV *S, ScalarEvolution &SE) { 2688 SCEVFindUnsafe Search(SE); 2689 visitAll(S, Search); 2690 return !Search.IsUnsafe; 2691 } 2692 2693 bool isSafeToExpandAt(const SCEV *S, const Instruction *InsertionPoint, 2694 ScalarEvolution &SE) { 2695 if (!isSafeToExpand(S, SE)) 2696 return false; 2697 // We have to prove that the expanded site of S dominates InsertionPoint. 2698 // This is easy when not in the same block, but hard when S is an instruction 2699 // to be expanded somewhere inside the same block as our insertion point. 2700 // What we really need here is something analogous to an OrderedBasicBlock, 2701 // but for the moment, we paper over the problem by handling two common and 2702 // cheap to check cases. 2703 if (SE.properlyDominates(S, InsertionPoint->getParent())) 2704 return true; 2705 if (SE.dominates(S, InsertionPoint->getParent())) { 2706 if (InsertionPoint->getParent()->getTerminator() == InsertionPoint) 2707 return true; 2708 if (const SCEVUnknown *U = dyn_cast<SCEVUnknown>(S)) 2709 if (llvm::is_contained(InsertionPoint->operand_values(), U->getValue())) 2710 return true; 2711 } 2712 return false; 2713 } 2714 2715 void SCEVExpanderCleaner::cleanup() { 2716 // Result is used, nothing to remove. 2717 if (ResultUsed) 2718 return; 2719 2720 auto InsertedInstructions = Expander.getAllInsertedInstructions(); 2721 #ifndef NDEBUG 2722 SmallPtrSet<Instruction *, 8> InsertedSet(InsertedInstructions.begin(), 2723 InsertedInstructions.end()); 2724 (void)InsertedSet; 2725 #endif 2726 // Remove sets with value handles. 2727 Expander.clear(); 2728 2729 // Sort so that earlier instructions do not dominate later instructions. 2730 stable_sort(InsertedInstructions, [this](Instruction *A, Instruction *B) { 2731 return DT.dominates(B, A); 2732 }); 2733 // Remove all inserted instructions. 2734 for (Instruction *I : InsertedInstructions) { 2735 2736 #ifndef NDEBUG 2737 assert(all_of(I->users(), 2738 [&InsertedSet](Value *U) { 2739 return InsertedSet.contains(cast<Instruction>(U)); 2740 }) && 2741 "removed instruction should only be used by instructions inserted " 2742 "during expansion"); 2743 #endif 2744 assert(!I->getType()->isVoidTy() && 2745 "inserted instruction should have non-void types"); 2746 I->replaceAllUsesWith(UndefValue::get(I->getType())); 2747 I->eraseFromParent(); 2748 } 2749 } 2750 } 2751