1 //===- llvm/Analysis/IVDescriptors.cpp - IndVar Descriptors -----*- C++ -*-===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This file "describes" induction and recurrence variables.
10 //
11 //===----------------------------------------------------------------------===//
12
13 #include "llvm/Analysis/IVDescriptors.h"
14 #include "llvm/ADT/ScopeExit.h"
15 #include "llvm/Analysis/BasicAliasAnalysis.h"
16 #include "llvm/Analysis/DemandedBits.h"
17 #include "llvm/Analysis/DomTreeUpdater.h"
18 #include "llvm/Analysis/GlobalsModRef.h"
19 #include "llvm/Analysis/InstructionSimplify.h"
20 #include "llvm/Analysis/LoopInfo.h"
21 #include "llvm/Analysis/LoopPass.h"
22 #include "llvm/Analysis/MustExecute.h"
23 #include "llvm/Analysis/ScalarEvolution.h"
24 #include "llvm/Analysis/ScalarEvolutionAliasAnalysis.h"
25 #include "llvm/Analysis/ScalarEvolutionExpressions.h"
26 #include "llvm/Analysis/TargetTransformInfo.h"
27 #include "llvm/Analysis/ValueTracking.h"
28 #include "llvm/IR/Dominators.h"
29 #include "llvm/IR/Instructions.h"
30 #include "llvm/IR/Module.h"
31 #include "llvm/IR/PatternMatch.h"
32 #include "llvm/IR/ValueHandle.h"
33 #include "llvm/Pass.h"
34 #include "llvm/Support/Debug.h"
35 #include "llvm/Support/KnownBits.h"
36
37 using namespace llvm;
38 using namespace llvm::PatternMatch;
39
40 #define DEBUG_TYPE "iv-descriptors"
41
areAllUsesIn(Instruction * I,SmallPtrSetImpl<Instruction * > & Set)42 bool RecurrenceDescriptor::areAllUsesIn(Instruction *I,
43 SmallPtrSetImpl<Instruction *> &Set) {
44 for (User::op_iterator Use = I->op_begin(), E = I->op_end(); Use != E; ++Use)
45 if (!Set.count(dyn_cast<Instruction>(*Use)))
46 return false;
47 return true;
48 }
49
isIntegerRecurrenceKind(RecurKind Kind)50 bool RecurrenceDescriptor::isIntegerRecurrenceKind(RecurKind Kind) {
51 switch (Kind) {
52 default:
53 break;
54 case RecurKind::Add:
55 case RecurKind::Mul:
56 case RecurKind::Or:
57 case RecurKind::And:
58 case RecurKind::Xor:
59 case RecurKind::SMax:
60 case RecurKind::SMin:
61 case RecurKind::UMax:
62 case RecurKind::UMin:
63 return true;
64 }
65 return false;
66 }
67
isFloatingPointRecurrenceKind(RecurKind Kind)68 bool RecurrenceDescriptor::isFloatingPointRecurrenceKind(RecurKind Kind) {
69 return (Kind != RecurKind::None) && !isIntegerRecurrenceKind(Kind);
70 }
71
isArithmeticRecurrenceKind(RecurKind Kind)72 bool RecurrenceDescriptor::isArithmeticRecurrenceKind(RecurKind Kind) {
73 switch (Kind) {
74 default:
75 break;
76 case RecurKind::Add:
77 case RecurKind::Mul:
78 case RecurKind::FAdd:
79 case RecurKind::FMul:
80 return true;
81 }
82 return false;
83 }
84
85 /// Determines if Phi may have been type-promoted. If Phi has a single user
86 /// that ANDs the Phi with a type mask, return the user. RT is updated to
87 /// account for the narrower bit width represented by the mask, and the AND
88 /// instruction is added to CI.
lookThroughAnd(PHINode * Phi,Type * & RT,SmallPtrSetImpl<Instruction * > & Visited,SmallPtrSetImpl<Instruction * > & CI)89 static Instruction *lookThroughAnd(PHINode *Phi, Type *&RT,
90 SmallPtrSetImpl<Instruction *> &Visited,
91 SmallPtrSetImpl<Instruction *> &CI) {
92 if (!Phi->hasOneUse())
93 return Phi;
94
95 const APInt *M = nullptr;
96 Instruction *I, *J = cast<Instruction>(Phi->use_begin()->getUser());
97
98 // Matches either I & 2^x-1 or 2^x-1 & I. If we find a match, we update RT
99 // with a new integer type of the corresponding bit width.
100 if (match(J, m_c_And(m_Instruction(I), m_APInt(M)))) {
101 int32_t Bits = (*M + 1).exactLogBase2();
102 if (Bits > 0) {
103 RT = IntegerType::get(Phi->getContext(), Bits);
104 Visited.insert(Phi);
105 CI.insert(J);
106 return J;
107 }
108 }
109 return Phi;
110 }
111
112 /// Compute the minimal bit width needed to represent a reduction whose exit
113 /// instruction is given by Exit.
computeRecurrenceType(Instruction * Exit,DemandedBits * DB,AssumptionCache * AC,DominatorTree * DT)114 static std::pair<Type *, bool> computeRecurrenceType(Instruction *Exit,
115 DemandedBits *DB,
116 AssumptionCache *AC,
117 DominatorTree *DT) {
118 bool IsSigned = false;
119 const DataLayout &DL = Exit->getModule()->getDataLayout();
120 uint64_t MaxBitWidth = DL.getTypeSizeInBits(Exit->getType());
121
122 if (DB) {
123 // Use the demanded bits analysis to determine the bits that are live out
124 // of the exit instruction, rounding up to the nearest power of two. If the
125 // use of demanded bits results in a smaller bit width, we know the value
126 // must be positive (i.e., IsSigned = false), because if this were not the
127 // case, the sign bit would have been demanded.
128 auto Mask = DB->getDemandedBits(Exit);
129 MaxBitWidth = Mask.getBitWidth() - Mask.countLeadingZeros();
130 }
131
132 if (MaxBitWidth == DL.getTypeSizeInBits(Exit->getType()) && AC && DT) {
133 // If demanded bits wasn't able to limit the bit width, we can try to use
134 // value tracking instead. This can be the case, for example, if the value
135 // may be negative.
136 auto NumSignBits = ComputeNumSignBits(Exit, DL, 0, AC, nullptr, DT);
137 auto NumTypeBits = DL.getTypeSizeInBits(Exit->getType());
138 MaxBitWidth = NumTypeBits - NumSignBits;
139 KnownBits Bits = computeKnownBits(Exit, DL);
140 if (!Bits.isNonNegative()) {
141 // If the value is not known to be non-negative, we set IsSigned to true,
142 // meaning that we will use sext instructions instead of zext
143 // instructions to restore the original type.
144 IsSigned = true;
145 if (!Bits.isNegative())
146 // If the value is not known to be negative, we don't known what the
147 // upper bit is, and therefore, we don't know what kind of extend we
148 // will need. In this case, just increase the bit width by one bit and
149 // use sext.
150 ++MaxBitWidth;
151 }
152 }
153 if (!isPowerOf2_64(MaxBitWidth))
154 MaxBitWidth = NextPowerOf2(MaxBitWidth);
155
156 return std::make_pair(Type::getIntNTy(Exit->getContext(), MaxBitWidth),
157 IsSigned);
158 }
159
160 /// Collect cast instructions that can be ignored in the vectorizer's cost
161 /// model, given a reduction exit value and the minimal type in which the
162 /// reduction can be represented.
collectCastsToIgnore(Loop * TheLoop,Instruction * Exit,Type * RecurrenceType,SmallPtrSetImpl<Instruction * > & Casts)163 static void collectCastsToIgnore(Loop *TheLoop, Instruction *Exit,
164 Type *RecurrenceType,
165 SmallPtrSetImpl<Instruction *> &Casts) {
166
167 SmallVector<Instruction *, 8> Worklist;
168 SmallPtrSet<Instruction *, 8> Visited;
169 Worklist.push_back(Exit);
170
171 while (!Worklist.empty()) {
172 Instruction *Val = Worklist.pop_back_val();
173 Visited.insert(Val);
174 if (auto *Cast = dyn_cast<CastInst>(Val))
175 if (Cast->getSrcTy() == RecurrenceType) {
176 // If the source type of a cast instruction is equal to the recurrence
177 // type, it will be eliminated, and should be ignored in the vectorizer
178 // cost model.
179 Casts.insert(Cast);
180 continue;
181 }
182
183 // Add all operands to the work list if they are loop-varying values that
184 // we haven't yet visited.
185 for (Value *O : cast<User>(Val)->operands())
186 if (auto *I = dyn_cast<Instruction>(O))
187 if (TheLoop->contains(I) && !Visited.count(I))
188 Worklist.push_back(I);
189 }
190 }
191
192 // Check if a given Phi node can be recognized as an ordered reduction for
193 // vectorizing floating point operations without unsafe math.
checkOrderedReduction(RecurKind Kind,Instruction * ExactFPMathInst,Instruction * Exit,PHINode * Phi)194 static bool checkOrderedReduction(RecurKind Kind, Instruction *ExactFPMathInst,
195 Instruction *Exit, PHINode *Phi) {
196 // Currently only FAdd is supported
197 if (Kind != RecurKind::FAdd)
198 return false;
199
200 bool IsOrdered =
201 Exit->getOpcode() == Instruction::FAdd && Exit == ExactFPMathInst;
202
203 // The only pattern accepted is the one in which the reduction PHI
204 // is used as one of the operands of the exit instruction
205 auto *LHS = Exit->getOperand(0);
206 auto *RHS = Exit->getOperand(1);
207 IsOrdered &= ((LHS == Phi) || (RHS == Phi));
208
209 if (!IsOrdered)
210 return false;
211
212 LLVM_DEBUG(dbgs() << "LV: Found an ordered reduction: Phi: " << *Phi
213 << ", ExitInst: " << *Exit << "\n");
214
215 return true;
216 }
217
AddReductionVar(PHINode * Phi,RecurKind Kind,Loop * TheLoop,FastMathFlags FuncFMF,RecurrenceDescriptor & RedDes,DemandedBits * DB,AssumptionCache * AC,DominatorTree * DT)218 bool RecurrenceDescriptor::AddReductionVar(PHINode *Phi, RecurKind Kind,
219 Loop *TheLoop, FastMathFlags FuncFMF,
220 RecurrenceDescriptor &RedDes,
221 DemandedBits *DB,
222 AssumptionCache *AC,
223 DominatorTree *DT) {
224 if (Phi->getNumIncomingValues() != 2)
225 return false;
226
227 // Reduction variables are only found in the loop header block.
228 if (Phi->getParent() != TheLoop->getHeader())
229 return false;
230
231 // Obtain the reduction start value from the value that comes from the loop
232 // preheader.
233 Value *RdxStart = Phi->getIncomingValueForBlock(TheLoop->getLoopPreheader());
234
235 // ExitInstruction is the single value which is used outside the loop.
236 // We only allow for a single reduction value to be used outside the loop.
237 // This includes users of the reduction, variables (which form a cycle
238 // which ends in the phi node).
239 Instruction *ExitInstruction = nullptr;
240 // Indicates that we found a reduction operation in our scan.
241 bool FoundReduxOp = false;
242
243 // We start with the PHI node and scan for all of the users of this
244 // instruction. All users must be instructions that can be used as reduction
245 // variables (such as ADD). We must have a single out-of-block user. The cycle
246 // must include the original PHI.
247 bool FoundStartPHI = false;
248
249 // To recognize min/max patterns formed by a icmp select sequence, we store
250 // the number of instruction we saw from the recognized min/max pattern,
251 // to make sure we only see exactly the two instructions.
252 unsigned NumCmpSelectPatternInst = 0;
253 InstDesc ReduxDesc(false, nullptr);
254
255 // Data used for determining if the recurrence has been type-promoted.
256 Type *RecurrenceType = Phi->getType();
257 SmallPtrSet<Instruction *, 4> CastInsts;
258 Instruction *Start = Phi;
259 bool IsSigned = false;
260
261 SmallPtrSet<Instruction *, 8> VisitedInsts;
262 SmallVector<Instruction *, 8> Worklist;
263
264 // Return early if the recurrence kind does not match the type of Phi. If the
265 // recurrence kind is arithmetic, we attempt to look through AND operations
266 // resulting from the type promotion performed by InstCombine. Vector
267 // operations are not limited to the legal integer widths, so we may be able
268 // to evaluate the reduction in the narrower width.
269 if (RecurrenceType->isFloatingPointTy()) {
270 if (!isFloatingPointRecurrenceKind(Kind))
271 return false;
272 } else if (RecurrenceType->isIntegerTy()) {
273 if (!isIntegerRecurrenceKind(Kind))
274 return false;
275 if (isArithmeticRecurrenceKind(Kind))
276 Start = lookThroughAnd(Phi, RecurrenceType, VisitedInsts, CastInsts);
277 } else {
278 // Pointer min/max may exist, but it is not supported as a reduction op.
279 return false;
280 }
281
282 Worklist.push_back(Start);
283 VisitedInsts.insert(Start);
284
285 // Start with all flags set because we will intersect this with the reduction
286 // flags from all the reduction operations.
287 FastMathFlags FMF = FastMathFlags::getFast();
288
289 // A value in the reduction can be used:
290 // - By the reduction:
291 // - Reduction operation:
292 // - One use of reduction value (safe).
293 // - Multiple use of reduction value (not safe).
294 // - PHI:
295 // - All uses of the PHI must be the reduction (safe).
296 // - Otherwise, not safe.
297 // - By instructions outside of the loop (safe).
298 // * One value may have several outside users, but all outside
299 // uses must be of the same value.
300 // - By an instruction that is not part of the reduction (not safe).
301 // This is either:
302 // * An instruction type other than PHI or the reduction operation.
303 // * A PHI in the header other than the initial PHI.
304 while (!Worklist.empty()) {
305 Instruction *Cur = Worklist.pop_back_val();
306
307 // No Users.
308 // If the instruction has no users then this is a broken chain and can't be
309 // a reduction variable.
310 if (Cur->use_empty())
311 return false;
312
313 bool IsAPhi = isa<PHINode>(Cur);
314
315 // A header PHI use other than the original PHI.
316 if (Cur != Phi && IsAPhi && Cur->getParent() == Phi->getParent())
317 return false;
318
319 // Reductions of instructions such as Div, and Sub is only possible if the
320 // LHS is the reduction variable.
321 if (!Cur->isCommutative() && !IsAPhi && !isa<SelectInst>(Cur) &&
322 !isa<ICmpInst>(Cur) && !isa<FCmpInst>(Cur) &&
323 !VisitedInsts.count(dyn_cast<Instruction>(Cur->getOperand(0))))
324 return false;
325
326 // Any reduction instruction must be of one of the allowed kinds. We ignore
327 // the starting value (the Phi or an AND instruction if the Phi has been
328 // type-promoted).
329 if (Cur != Start) {
330 ReduxDesc = isRecurrenceInstr(Cur, Kind, ReduxDesc, FuncFMF);
331 if (!ReduxDesc.isRecurrence())
332 return false;
333 // FIXME: FMF is allowed on phi, but propagation is not handled correctly.
334 if (isa<FPMathOperator>(ReduxDesc.getPatternInst()) && !IsAPhi) {
335 FastMathFlags CurFMF = ReduxDesc.getPatternInst()->getFastMathFlags();
336 if (auto *Sel = dyn_cast<SelectInst>(ReduxDesc.getPatternInst())) {
337 // Accept FMF on either fcmp or select of a min/max idiom.
338 // TODO: This is a hack to work-around the fact that FMF may not be
339 // assigned/propagated correctly. If that problem is fixed or we
340 // standardize on fmin/fmax via intrinsics, this can be removed.
341 if (auto *FCmp = dyn_cast<FCmpInst>(Sel->getCondition()))
342 CurFMF |= FCmp->getFastMathFlags();
343 }
344 FMF &= CurFMF;
345 }
346 // Update this reduction kind if we matched a new instruction.
347 // TODO: Can we eliminate the need for a 2nd InstDesc by keeping 'Kind'
348 // state accurate while processing the worklist?
349 if (ReduxDesc.getRecKind() != RecurKind::None)
350 Kind = ReduxDesc.getRecKind();
351 }
352
353 bool IsASelect = isa<SelectInst>(Cur);
354
355 // A conditional reduction operation must only have 2 or less uses in
356 // VisitedInsts.
357 if (IsASelect && (Kind == RecurKind::FAdd || Kind == RecurKind::FMul) &&
358 hasMultipleUsesOf(Cur, VisitedInsts, 2))
359 return false;
360
361 // A reduction operation must only have one use of the reduction value.
362 if (!IsAPhi && !IsASelect && !isMinMaxRecurrenceKind(Kind) &&
363 hasMultipleUsesOf(Cur, VisitedInsts, 1))
364 return false;
365
366 // All inputs to a PHI node must be a reduction value.
367 if (IsAPhi && Cur != Phi && !areAllUsesIn(Cur, VisitedInsts))
368 return false;
369
370 if (isIntMinMaxRecurrenceKind(Kind) &&
371 (isa<ICmpInst>(Cur) || isa<SelectInst>(Cur)))
372 ++NumCmpSelectPatternInst;
373 if (isFPMinMaxRecurrenceKind(Kind) &&
374 (isa<FCmpInst>(Cur) || isa<SelectInst>(Cur)))
375 ++NumCmpSelectPatternInst;
376
377 // Check whether we found a reduction operator.
378 FoundReduxOp |= !IsAPhi && Cur != Start;
379
380 // Process users of current instruction. Push non-PHI nodes after PHI nodes
381 // onto the stack. This way we are going to have seen all inputs to PHI
382 // nodes once we get to them.
383 SmallVector<Instruction *, 8> NonPHIs;
384 SmallVector<Instruction *, 8> PHIs;
385 for (User *U : Cur->users()) {
386 Instruction *UI = cast<Instruction>(U);
387
388 // Check if we found the exit user.
389 BasicBlock *Parent = UI->getParent();
390 if (!TheLoop->contains(Parent)) {
391 // If we already know this instruction is used externally, move on to
392 // the next user.
393 if (ExitInstruction == Cur)
394 continue;
395
396 // Exit if you find multiple values used outside or if the header phi
397 // node is being used. In this case the user uses the value of the
398 // previous iteration, in which case we would loose "VF-1" iterations of
399 // the reduction operation if we vectorize.
400 if (ExitInstruction != nullptr || Cur == Phi)
401 return false;
402
403 // The instruction used by an outside user must be the last instruction
404 // before we feed back to the reduction phi. Otherwise, we loose VF-1
405 // operations on the value.
406 if (!is_contained(Phi->operands(), Cur))
407 return false;
408
409 ExitInstruction = Cur;
410 continue;
411 }
412
413 // Process instructions only once (termination). Each reduction cycle
414 // value must only be used once, except by phi nodes and min/max
415 // reductions which are represented as a cmp followed by a select.
416 InstDesc IgnoredVal(false, nullptr);
417 if (VisitedInsts.insert(UI).second) {
418 if (isa<PHINode>(UI))
419 PHIs.push_back(UI);
420 else
421 NonPHIs.push_back(UI);
422 } else if (!isa<PHINode>(UI) &&
423 ((!isa<FCmpInst>(UI) && !isa<ICmpInst>(UI) &&
424 !isa<SelectInst>(UI)) ||
425 (!isConditionalRdxPattern(Kind, UI).isRecurrence() &&
426 !isMinMaxSelectCmpPattern(UI, IgnoredVal).isRecurrence())))
427 return false;
428
429 // Remember that we completed the cycle.
430 if (UI == Phi)
431 FoundStartPHI = true;
432 }
433 Worklist.append(PHIs.begin(), PHIs.end());
434 Worklist.append(NonPHIs.begin(), NonPHIs.end());
435 }
436
437 // This means we have seen one but not the other instruction of the
438 // pattern or more than just a select and cmp.
439 if (isMinMaxRecurrenceKind(Kind) && NumCmpSelectPatternInst != 2)
440 return false;
441
442 if (!FoundStartPHI || !FoundReduxOp || !ExitInstruction)
443 return false;
444
445 const bool IsOrdered = checkOrderedReduction(
446 Kind, ReduxDesc.getExactFPMathInst(), ExitInstruction, Phi);
447
448 if (Start != Phi) {
449 // If the starting value is not the same as the phi node, we speculatively
450 // looked through an 'and' instruction when evaluating a potential
451 // arithmetic reduction to determine if it may have been type-promoted.
452 //
453 // We now compute the minimal bit width that is required to represent the
454 // reduction. If this is the same width that was indicated by the 'and', we
455 // can represent the reduction in the smaller type. The 'and' instruction
456 // will be eliminated since it will essentially be a cast instruction that
457 // can be ignore in the cost model. If we compute a different type than we
458 // did when evaluating the 'and', the 'and' will not be eliminated, and we
459 // will end up with different kinds of operations in the recurrence
460 // expression (e.g., IntegerAND, IntegerADD). We give up if this is
461 // the case.
462 //
463 // The vectorizer relies on InstCombine to perform the actual
464 // type-shrinking. It does this by inserting instructions to truncate the
465 // exit value of the reduction to the width indicated by RecurrenceType and
466 // then extend this value back to the original width. If IsSigned is false,
467 // a 'zext' instruction will be generated; otherwise, a 'sext' will be
468 // used.
469 //
470 // TODO: We should not rely on InstCombine to rewrite the reduction in the
471 // smaller type. We should just generate a correctly typed expression
472 // to begin with.
473 Type *ComputedType;
474 std::tie(ComputedType, IsSigned) =
475 computeRecurrenceType(ExitInstruction, DB, AC, DT);
476 if (ComputedType != RecurrenceType)
477 return false;
478
479 // The recurrence expression will be represented in a narrower type. If
480 // there are any cast instructions that will be unnecessary, collect them
481 // in CastInsts. Note that the 'and' instruction was already included in
482 // this list.
483 //
484 // TODO: A better way to represent this may be to tag in some way all the
485 // instructions that are a part of the reduction. The vectorizer cost
486 // model could then apply the recurrence type to these instructions,
487 // without needing a white list of instructions to ignore.
488 // This may also be useful for the inloop reductions, if it can be
489 // kept simple enough.
490 collectCastsToIgnore(TheLoop, ExitInstruction, RecurrenceType, CastInsts);
491 }
492
493 // We found a reduction var if we have reached the original phi node and we
494 // only have a single instruction with out-of-loop users.
495
496 // The ExitInstruction(Instruction which is allowed to have out-of-loop users)
497 // is saved as part of the RecurrenceDescriptor.
498
499 // Save the description of this reduction variable.
500 RecurrenceDescriptor RD(RdxStart, ExitInstruction, Kind, FMF,
501 ReduxDesc.getExactFPMathInst(), RecurrenceType,
502 IsSigned, IsOrdered, CastInsts);
503 RedDes = RD;
504
505 return true;
506 }
507
508 RecurrenceDescriptor::InstDesc
isMinMaxSelectCmpPattern(Instruction * I,const InstDesc & Prev)509 RecurrenceDescriptor::isMinMaxSelectCmpPattern(Instruction *I,
510 const InstDesc &Prev) {
511 assert((isa<CmpInst>(I) || isa<SelectInst>(I)) &&
512 "Expected a cmp or select instruction");
513
514 // We must handle the select(cmp()) as a single instruction. Advance to the
515 // select.
516 CmpInst::Predicate Pred;
517 if (match(I, m_OneUse(m_Cmp(Pred, m_Value(), m_Value())))) {
518 if (auto *Select = dyn_cast<SelectInst>(*I->user_begin()))
519 return InstDesc(Select, Prev.getRecKind());
520 }
521
522 // Only match select with single use cmp condition.
523 if (!match(I, m_Select(m_OneUse(m_Cmp(Pred, m_Value(), m_Value())), m_Value(),
524 m_Value())))
525 return InstDesc(false, I);
526
527 // Look for a min/max pattern.
528 if (match(I, m_UMin(m_Value(), m_Value())))
529 return InstDesc(I, RecurKind::UMin);
530 if (match(I, m_UMax(m_Value(), m_Value())))
531 return InstDesc(I, RecurKind::UMax);
532 if (match(I, m_SMax(m_Value(), m_Value())))
533 return InstDesc(I, RecurKind::SMax);
534 if (match(I, m_SMin(m_Value(), m_Value())))
535 return InstDesc(I, RecurKind::SMin);
536 if (match(I, m_OrdFMin(m_Value(), m_Value())))
537 return InstDesc(I, RecurKind::FMin);
538 if (match(I, m_OrdFMax(m_Value(), m_Value())))
539 return InstDesc(I, RecurKind::FMax);
540 if (match(I, m_UnordFMin(m_Value(), m_Value())))
541 return InstDesc(I, RecurKind::FMin);
542 if (match(I, m_UnordFMax(m_Value(), m_Value())))
543 return InstDesc(I, RecurKind::FMax);
544
545 return InstDesc(false, I);
546 }
547
548 /// Returns true if the select instruction has users in the compare-and-add
549 /// reduction pattern below. The select instruction argument is the last one
550 /// in the sequence.
551 ///
552 /// %sum.1 = phi ...
553 /// ...
554 /// %cmp = fcmp pred %0, %CFP
555 /// %add = fadd %0, %sum.1
556 /// %sum.2 = select %cmp, %add, %sum.1
557 RecurrenceDescriptor::InstDesc
isConditionalRdxPattern(RecurKind Kind,Instruction * I)558 RecurrenceDescriptor::isConditionalRdxPattern(RecurKind Kind, Instruction *I) {
559 SelectInst *SI = dyn_cast<SelectInst>(I);
560 if (!SI)
561 return InstDesc(false, I);
562
563 CmpInst *CI = dyn_cast<CmpInst>(SI->getCondition());
564 // Only handle single use cases for now.
565 if (!CI || !CI->hasOneUse())
566 return InstDesc(false, I);
567
568 Value *TrueVal = SI->getTrueValue();
569 Value *FalseVal = SI->getFalseValue();
570 // Handle only when either of operands of select instruction is a PHI
571 // node for now.
572 if ((isa<PHINode>(*TrueVal) && isa<PHINode>(*FalseVal)) ||
573 (!isa<PHINode>(*TrueVal) && !isa<PHINode>(*FalseVal)))
574 return InstDesc(false, I);
575
576 Instruction *I1 =
577 isa<PHINode>(*TrueVal) ? dyn_cast<Instruction>(FalseVal)
578 : dyn_cast<Instruction>(TrueVal);
579 if (!I1 || !I1->isBinaryOp())
580 return InstDesc(false, I);
581
582 Value *Op1, *Op2;
583 if ((m_FAdd(m_Value(Op1), m_Value(Op2)).match(I1) ||
584 m_FSub(m_Value(Op1), m_Value(Op2)).match(I1)) &&
585 I1->isFast())
586 return InstDesc(Kind == RecurKind::FAdd, SI);
587
588 if (m_FMul(m_Value(Op1), m_Value(Op2)).match(I1) && (I1->isFast()))
589 return InstDesc(Kind == RecurKind::FMul, SI);
590
591 return InstDesc(false, I);
592 }
593
594 RecurrenceDescriptor::InstDesc
isRecurrenceInstr(Instruction * I,RecurKind Kind,InstDesc & Prev,FastMathFlags FMF)595 RecurrenceDescriptor::isRecurrenceInstr(Instruction *I, RecurKind Kind,
596 InstDesc &Prev, FastMathFlags FMF) {
597 switch (I->getOpcode()) {
598 default:
599 return InstDesc(false, I);
600 case Instruction::PHI:
601 return InstDesc(I, Prev.getRecKind(), Prev.getExactFPMathInst());
602 case Instruction::Sub:
603 case Instruction::Add:
604 return InstDesc(Kind == RecurKind::Add, I);
605 case Instruction::Mul:
606 return InstDesc(Kind == RecurKind::Mul, I);
607 case Instruction::And:
608 return InstDesc(Kind == RecurKind::And, I);
609 case Instruction::Or:
610 return InstDesc(Kind == RecurKind::Or, I);
611 case Instruction::Xor:
612 return InstDesc(Kind == RecurKind::Xor, I);
613 case Instruction::FDiv:
614 case Instruction::FMul:
615 return InstDesc(Kind == RecurKind::FMul, I,
616 I->hasAllowReassoc() ? nullptr : I);
617 case Instruction::FSub:
618 case Instruction::FAdd:
619 return InstDesc(Kind == RecurKind::FAdd, I,
620 I->hasAllowReassoc() ? nullptr : I);
621 case Instruction::Select:
622 if (Kind == RecurKind::FAdd || Kind == RecurKind::FMul)
623 return isConditionalRdxPattern(Kind, I);
624 LLVM_FALLTHROUGH;
625 case Instruction::FCmp:
626 case Instruction::ICmp:
627 if (isIntMinMaxRecurrenceKind(Kind) ||
628 (FMF.noNaNs() && FMF.noSignedZeros() && isFPMinMaxRecurrenceKind(Kind)))
629 return isMinMaxSelectCmpPattern(I, Prev);
630 return InstDesc(false, I);
631 }
632 }
633
hasMultipleUsesOf(Instruction * I,SmallPtrSetImpl<Instruction * > & Insts,unsigned MaxNumUses)634 bool RecurrenceDescriptor::hasMultipleUsesOf(
635 Instruction *I, SmallPtrSetImpl<Instruction *> &Insts,
636 unsigned MaxNumUses) {
637 unsigned NumUses = 0;
638 for (const Use &U : I->operands()) {
639 if (Insts.count(dyn_cast<Instruction>(U)))
640 ++NumUses;
641 if (NumUses > MaxNumUses)
642 return true;
643 }
644
645 return false;
646 }
isReductionPHI(PHINode * Phi,Loop * TheLoop,RecurrenceDescriptor & RedDes,DemandedBits * DB,AssumptionCache * AC,DominatorTree * DT)647 bool RecurrenceDescriptor::isReductionPHI(PHINode *Phi, Loop *TheLoop,
648 RecurrenceDescriptor &RedDes,
649 DemandedBits *DB, AssumptionCache *AC,
650 DominatorTree *DT) {
651
652 BasicBlock *Header = TheLoop->getHeader();
653 Function &F = *Header->getParent();
654 FastMathFlags FMF;
655 FMF.setNoNaNs(
656 F.getFnAttribute("no-nans-fp-math").getValueAsBool());
657 FMF.setNoSignedZeros(
658 F.getFnAttribute("no-signed-zeros-fp-math").getValueAsBool());
659
660 if (AddReductionVar(Phi, RecurKind::Add, TheLoop, FMF, RedDes, DB, AC, DT)) {
661 LLVM_DEBUG(dbgs() << "Found an ADD reduction PHI." << *Phi << "\n");
662 return true;
663 }
664 if (AddReductionVar(Phi, RecurKind::Mul, TheLoop, FMF, RedDes, DB, AC, DT)) {
665 LLVM_DEBUG(dbgs() << "Found a MUL reduction PHI." << *Phi << "\n");
666 return true;
667 }
668 if (AddReductionVar(Phi, RecurKind::Or, TheLoop, FMF, RedDes, DB, AC, DT)) {
669 LLVM_DEBUG(dbgs() << "Found an OR reduction PHI." << *Phi << "\n");
670 return true;
671 }
672 if (AddReductionVar(Phi, RecurKind::And, TheLoop, FMF, RedDes, DB, AC, DT)) {
673 LLVM_DEBUG(dbgs() << "Found an AND reduction PHI." << *Phi << "\n");
674 return true;
675 }
676 if (AddReductionVar(Phi, RecurKind::Xor, TheLoop, FMF, RedDes, DB, AC, DT)) {
677 LLVM_DEBUG(dbgs() << "Found a XOR reduction PHI." << *Phi << "\n");
678 return true;
679 }
680 if (AddReductionVar(Phi, RecurKind::SMax, TheLoop, FMF, RedDes, DB, AC, DT)) {
681 LLVM_DEBUG(dbgs() << "Found a SMAX reduction PHI." << *Phi << "\n");
682 return true;
683 }
684 if (AddReductionVar(Phi, RecurKind::SMin, TheLoop, FMF, RedDes, DB, AC, DT)) {
685 LLVM_DEBUG(dbgs() << "Found a SMIN reduction PHI." << *Phi << "\n");
686 return true;
687 }
688 if (AddReductionVar(Phi, RecurKind::UMax, TheLoop, FMF, RedDes, DB, AC, DT)) {
689 LLVM_DEBUG(dbgs() << "Found a UMAX reduction PHI." << *Phi << "\n");
690 return true;
691 }
692 if (AddReductionVar(Phi, RecurKind::UMin, TheLoop, FMF, RedDes, DB, AC, DT)) {
693 LLVM_DEBUG(dbgs() << "Found a UMIN reduction PHI." << *Phi << "\n");
694 return true;
695 }
696 if (AddReductionVar(Phi, RecurKind::FMul, TheLoop, FMF, RedDes, DB, AC, DT)) {
697 LLVM_DEBUG(dbgs() << "Found an FMult reduction PHI." << *Phi << "\n");
698 return true;
699 }
700 if (AddReductionVar(Phi, RecurKind::FAdd, TheLoop, FMF, RedDes, DB, AC, DT)) {
701 LLVM_DEBUG(dbgs() << "Found an FAdd reduction PHI." << *Phi << "\n");
702 return true;
703 }
704 if (AddReductionVar(Phi, RecurKind::FMax, TheLoop, FMF, RedDes, DB, AC, DT)) {
705 LLVM_DEBUG(dbgs() << "Found a float MAX reduction PHI." << *Phi << "\n");
706 return true;
707 }
708 if (AddReductionVar(Phi, RecurKind::FMin, TheLoop, FMF, RedDes, DB, AC, DT)) {
709 LLVM_DEBUG(dbgs() << "Found a float MIN reduction PHI." << *Phi << "\n");
710 return true;
711 }
712 // Not a reduction of known type.
713 return false;
714 }
715
isFirstOrderRecurrence(PHINode * Phi,Loop * TheLoop,DenseMap<Instruction *,Instruction * > & SinkAfter,DominatorTree * DT)716 bool RecurrenceDescriptor::isFirstOrderRecurrence(
717 PHINode *Phi, Loop *TheLoop,
718 DenseMap<Instruction *, Instruction *> &SinkAfter, DominatorTree *DT) {
719
720 // Ensure the phi node is in the loop header and has two incoming values.
721 if (Phi->getParent() != TheLoop->getHeader() ||
722 Phi->getNumIncomingValues() != 2)
723 return false;
724
725 // Ensure the loop has a preheader and a single latch block. The loop
726 // vectorizer will need the latch to set up the next iteration of the loop.
727 auto *Preheader = TheLoop->getLoopPreheader();
728 auto *Latch = TheLoop->getLoopLatch();
729 if (!Preheader || !Latch)
730 return false;
731
732 // Ensure the phi node's incoming blocks are the loop preheader and latch.
733 if (Phi->getBasicBlockIndex(Preheader) < 0 ||
734 Phi->getBasicBlockIndex(Latch) < 0)
735 return false;
736
737 // Get the previous value. The previous value comes from the latch edge while
738 // the initial value comes form the preheader edge.
739 auto *Previous = dyn_cast<Instruction>(Phi->getIncomingValueForBlock(Latch));
740 if (!Previous || !TheLoop->contains(Previous) || isa<PHINode>(Previous) ||
741 SinkAfter.count(Previous)) // Cannot rely on dominance due to motion.
742 return false;
743
744 // Ensure every user of the phi node is dominated by the previous value.
745 // The dominance requirement ensures the loop vectorizer will not need to
746 // vectorize the initial value prior to the first iteration of the loop.
747 // TODO: Consider extending this sinking to handle memory instructions and
748 // phis with multiple users.
749
750 // Returns true, if all users of I are dominated by DominatedBy.
751 auto allUsesDominatedBy = [DT](Instruction *I, Instruction *DominatedBy) {
752 return all_of(I->uses(), [DT, DominatedBy](Use &U) {
753 return DT->dominates(DominatedBy, U);
754 });
755 };
756
757 if (Phi->hasOneUse()) {
758 Instruction *I = Phi->user_back();
759
760 // If the user of the PHI is also the incoming value, we potentially have a
761 // reduction and which cannot be handled by sinking.
762 if (Previous == I)
763 return false;
764
765 // We cannot sink terminator instructions.
766 if (I->getParent()->getTerminator() == I)
767 return false;
768
769 // Do not try to sink an instruction multiple times (if multiple operands
770 // are first order recurrences).
771 // TODO: We can support this case, by sinking the instruction after the
772 // 'deepest' previous instruction.
773 if (SinkAfter.find(I) != SinkAfter.end())
774 return false;
775
776 if (DT->dominates(Previous, I)) // We already are good w/o sinking.
777 return true;
778
779 // We can sink any instruction without side effects, as long as all users
780 // are dominated by the instruction we are sinking after.
781 if (I->getParent() == Phi->getParent() && !I->mayHaveSideEffects() &&
782 allUsesDominatedBy(I, Previous)) {
783 SinkAfter[I] = Previous;
784 return true;
785 }
786 }
787
788 return allUsesDominatedBy(Phi, Previous);
789 }
790
791 /// This function returns the identity element (or neutral element) for
792 /// the operation K.
getRecurrenceIdentity(RecurKind K,Type * Tp,FastMathFlags FMF)793 Constant *RecurrenceDescriptor::getRecurrenceIdentity(RecurKind K, Type *Tp,
794 FastMathFlags FMF) {
795 switch (K) {
796 case RecurKind::Xor:
797 case RecurKind::Add:
798 case RecurKind::Or:
799 // Adding, Xoring, Oring zero to a number does not change it.
800 return ConstantInt::get(Tp, 0);
801 case RecurKind::Mul:
802 // Multiplying a number by 1 does not change it.
803 return ConstantInt::get(Tp, 1);
804 case RecurKind::And:
805 // AND-ing a number with an all-1 value does not change it.
806 return ConstantInt::get(Tp, -1, true);
807 case RecurKind::FMul:
808 // Multiplying a number by 1 does not change it.
809 return ConstantFP::get(Tp, 1.0L);
810 case RecurKind::FAdd:
811 // Adding zero to a number does not change it.
812 // FIXME: Ideally we should not need to check FMF for FAdd and should always
813 // use -0.0. However, this will currently result in mixed vectors of 0.0/-0.0.
814 // Instead, we should ensure that 1) the FMF from FAdd are propagated to the PHI
815 // nodes where possible, and 2) PHIs with the nsz flag + -0.0 use 0.0. This would
816 // mean we can then remove the check for noSignedZeros() below (see D98963).
817 if (FMF.noSignedZeros())
818 return ConstantFP::get(Tp, 0.0L);
819 return ConstantFP::get(Tp, -0.0L);
820 case RecurKind::UMin:
821 return ConstantInt::get(Tp, -1);
822 case RecurKind::UMax:
823 return ConstantInt::get(Tp, 0);
824 case RecurKind::SMin:
825 return ConstantInt::get(Tp,
826 APInt::getSignedMaxValue(Tp->getIntegerBitWidth()));
827 case RecurKind::SMax:
828 return ConstantInt::get(Tp,
829 APInt::getSignedMinValue(Tp->getIntegerBitWidth()));
830 case RecurKind::FMin:
831 return ConstantFP::getInfinity(Tp, true);
832 case RecurKind::FMax:
833 return ConstantFP::getInfinity(Tp, false);
834 default:
835 llvm_unreachable("Unknown recurrence kind");
836 }
837 }
838
getOpcode(RecurKind Kind)839 unsigned RecurrenceDescriptor::getOpcode(RecurKind Kind) {
840 switch (Kind) {
841 case RecurKind::Add:
842 return Instruction::Add;
843 case RecurKind::Mul:
844 return Instruction::Mul;
845 case RecurKind::Or:
846 return Instruction::Or;
847 case RecurKind::And:
848 return Instruction::And;
849 case RecurKind::Xor:
850 return Instruction::Xor;
851 case RecurKind::FMul:
852 return Instruction::FMul;
853 case RecurKind::FAdd:
854 return Instruction::FAdd;
855 case RecurKind::SMax:
856 case RecurKind::SMin:
857 case RecurKind::UMax:
858 case RecurKind::UMin:
859 return Instruction::ICmp;
860 case RecurKind::FMax:
861 case RecurKind::FMin:
862 return Instruction::FCmp;
863 default:
864 llvm_unreachable("Unknown recurrence operation");
865 }
866 }
867
868 SmallVector<Instruction *, 4>
getReductionOpChain(PHINode * Phi,Loop * L) const869 RecurrenceDescriptor::getReductionOpChain(PHINode *Phi, Loop *L) const {
870 SmallVector<Instruction *, 4> ReductionOperations;
871 unsigned RedOp = getOpcode(Kind);
872
873 // Search down from the Phi to the LoopExitInstr, looking for instructions
874 // with a single user of the correct type for the reduction.
875
876 // Note that we check that the type of the operand is correct for each item in
877 // the chain, including the last (the loop exit value). This can come up from
878 // sub, which would otherwise be treated as an add reduction. MinMax also need
879 // to check for a pair of icmp/select, for which we use getNextInstruction and
880 // isCorrectOpcode functions to step the right number of instruction, and
881 // check the icmp/select pair.
882 // FIXME: We also do not attempt to look through Phi/Select's yet, which might
883 // be part of the reduction chain, or attempt to looks through And's to find a
884 // smaller bitwidth. Subs are also currently not allowed (which are usually
885 // treated as part of a add reduction) as they are expected to generally be
886 // more expensive than out-of-loop reductions, and need to be costed more
887 // carefully.
888 unsigned ExpectedUses = 1;
889 if (RedOp == Instruction::ICmp || RedOp == Instruction::FCmp)
890 ExpectedUses = 2;
891
892 auto getNextInstruction = [&](Instruction *Cur) {
893 if (RedOp == Instruction::ICmp || RedOp == Instruction::FCmp) {
894 // We are expecting a icmp/select pair, which we go to the next select
895 // instruction if we can. We already know that Cur has 2 uses.
896 if (isa<SelectInst>(*Cur->user_begin()))
897 return cast<Instruction>(*Cur->user_begin());
898 else
899 return cast<Instruction>(*std::next(Cur->user_begin()));
900 }
901 return cast<Instruction>(*Cur->user_begin());
902 };
903 auto isCorrectOpcode = [&](Instruction *Cur) {
904 if (RedOp == Instruction::ICmp || RedOp == Instruction::FCmp) {
905 Value *LHS, *RHS;
906 return SelectPatternResult::isMinOrMax(
907 matchSelectPattern(Cur, LHS, RHS).Flavor);
908 }
909 return Cur->getOpcode() == RedOp;
910 };
911
912 // The loop exit instruction we check first (as a quick test) but add last. We
913 // check the opcode is correct (and dont allow them to be Subs) and that they
914 // have expected to have the expected number of uses. They will have one use
915 // from the phi and one from a LCSSA value, no matter the type.
916 if (!isCorrectOpcode(LoopExitInstr) || !LoopExitInstr->hasNUses(2))
917 return {};
918
919 // Check that the Phi has one (or two for min/max) uses.
920 if (!Phi->hasNUses(ExpectedUses))
921 return {};
922 Instruction *Cur = getNextInstruction(Phi);
923
924 // Each other instruction in the chain should have the expected number of uses
925 // and be the correct opcode.
926 while (Cur != LoopExitInstr) {
927 if (!isCorrectOpcode(Cur) || !Cur->hasNUses(ExpectedUses))
928 return {};
929
930 ReductionOperations.push_back(Cur);
931 Cur = getNextInstruction(Cur);
932 }
933
934 ReductionOperations.push_back(Cur);
935 return ReductionOperations;
936 }
937
InductionDescriptor(Value * Start,InductionKind K,const SCEV * Step,BinaryOperator * BOp,SmallVectorImpl<Instruction * > * Casts)938 InductionDescriptor::InductionDescriptor(Value *Start, InductionKind K,
939 const SCEV *Step, BinaryOperator *BOp,
940 SmallVectorImpl<Instruction *> *Casts)
941 : StartValue(Start), IK(K), Step(Step), InductionBinOp(BOp) {
942 assert(IK != IK_NoInduction && "Not an induction");
943
944 // Start value type should match the induction kind and the value
945 // itself should not be null.
946 assert(StartValue && "StartValue is null");
947 assert((IK != IK_PtrInduction || StartValue->getType()->isPointerTy()) &&
948 "StartValue is not a pointer for pointer induction");
949 assert((IK != IK_IntInduction || StartValue->getType()->isIntegerTy()) &&
950 "StartValue is not an integer for integer induction");
951
952 // Check the Step Value. It should be non-zero integer value.
953 assert((!getConstIntStepValue() || !getConstIntStepValue()->isZero()) &&
954 "Step value is zero");
955
956 assert((IK != IK_PtrInduction || getConstIntStepValue()) &&
957 "Step value should be constant for pointer induction");
958 assert((IK == IK_FpInduction || Step->getType()->isIntegerTy()) &&
959 "StepValue is not an integer");
960
961 assert((IK != IK_FpInduction || Step->getType()->isFloatingPointTy()) &&
962 "StepValue is not FP for FpInduction");
963 assert((IK != IK_FpInduction ||
964 (InductionBinOp &&
965 (InductionBinOp->getOpcode() == Instruction::FAdd ||
966 InductionBinOp->getOpcode() == Instruction::FSub))) &&
967 "Binary opcode should be specified for FP induction");
968
969 if (Casts) {
970 for (auto &Inst : *Casts) {
971 RedundantCasts.push_back(Inst);
972 }
973 }
974 }
975
getConstIntStepValue() const976 ConstantInt *InductionDescriptor::getConstIntStepValue() const {
977 if (isa<SCEVConstant>(Step))
978 return dyn_cast<ConstantInt>(cast<SCEVConstant>(Step)->getValue());
979 return nullptr;
980 }
981
isFPInductionPHI(PHINode * Phi,const Loop * TheLoop,ScalarEvolution * SE,InductionDescriptor & D)982 bool InductionDescriptor::isFPInductionPHI(PHINode *Phi, const Loop *TheLoop,
983 ScalarEvolution *SE,
984 InductionDescriptor &D) {
985
986 // Here we only handle FP induction variables.
987 assert(Phi->getType()->isFloatingPointTy() && "Unexpected Phi type");
988
989 if (TheLoop->getHeader() != Phi->getParent())
990 return false;
991
992 // The loop may have multiple entrances or multiple exits; we can analyze
993 // this phi if it has a unique entry value and a unique backedge value.
994 if (Phi->getNumIncomingValues() != 2)
995 return false;
996 Value *BEValue = nullptr, *StartValue = nullptr;
997 if (TheLoop->contains(Phi->getIncomingBlock(0))) {
998 BEValue = Phi->getIncomingValue(0);
999 StartValue = Phi->getIncomingValue(1);
1000 } else {
1001 assert(TheLoop->contains(Phi->getIncomingBlock(1)) &&
1002 "Unexpected Phi node in the loop");
1003 BEValue = Phi->getIncomingValue(1);
1004 StartValue = Phi->getIncomingValue(0);
1005 }
1006
1007 BinaryOperator *BOp = dyn_cast<BinaryOperator>(BEValue);
1008 if (!BOp)
1009 return false;
1010
1011 Value *Addend = nullptr;
1012 if (BOp->getOpcode() == Instruction::FAdd) {
1013 if (BOp->getOperand(0) == Phi)
1014 Addend = BOp->getOperand(1);
1015 else if (BOp->getOperand(1) == Phi)
1016 Addend = BOp->getOperand(0);
1017 } else if (BOp->getOpcode() == Instruction::FSub)
1018 if (BOp->getOperand(0) == Phi)
1019 Addend = BOp->getOperand(1);
1020
1021 if (!Addend)
1022 return false;
1023
1024 // The addend should be loop invariant
1025 if (auto *I = dyn_cast<Instruction>(Addend))
1026 if (TheLoop->contains(I))
1027 return false;
1028
1029 // FP Step has unknown SCEV
1030 const SCEV *Step = SE->getUnknown(Addend);
1031 D = InductionDescriptor(StartValue, IK_FpInduction, Step, BOp);
1032 return true;
1033 }
1034
1035 /// This function is called when we suspect that the update-chain of a phi node
1036 /// (whose symbolic SCEV expression sin \p PhiScev) contains redundant casts,
1037 /// that can be ignored. (This can happen when the PSCEV rewriter adds a runtime
1038 /// predicate P under which the SCEV expression for the phi can be the
1039 /// AddRecurrence \p AR; See createAddRecFromPHIWithCast). We want to find the
1040 /// cast instructions that are involved in the update-chain of this induction.
1041 /// A caller that adds the required runtime predicate can be free to drop these
1042 /// cast instructions, and compute the phi using \p AR (instead of some scev
1043 /// expression with casts).
1044 ///
1045 /// For example, without a predicate the scev expression can take the following
1046 /// form:
1047 /// (Ext ix (Trunc iy ( Start + i*Step ) to ix) to iy)
1048 ///
1049 /// It corresponds to the following IR sequence:
1050 /// %for.body:
1051 /// %x = phi i64 [ 0, %ph ], [ %add, %for.body ]
1052 /// %casted_phi = "ExtTrunc i64 %x"
1053 /// %add = add i64 %casted_phi, %step
1054 ///
1055 /// where %x is given in \p PN,
1056 /// PSE.getSCEV(%x) is equal to PSE.getSCEV(%casted_phi) under a predicate,
1057 /// and the IR sequence that "ExtTrunc i64 %x" represents can take one of
1058 /// several forms, for example, such as:
1059 /// ExtTrunc1: %casted_phi = and %x, 2^n-1
1060 /// or:
1061 /// ExtTrunc2: %t = shl %x, m
1062 /// %casted_phi = ashr %t, m
1063 ///
1064 /// If we are able to find such sequence, we return the instructions
1065 /// we found, namely %casted_phi and the instructions on its use-def chain up
1066 /// to the phi (not including the phi).
getCastsForInductionPHI(PredicatedScalarEvolution & PSE,const SCEVUnknown * PhiScev,const SCEVAddRecExpr * AR,SmallVectorImpl<Instruction * > & CastInsts)1067 static bool getCastsForInductionPHI(PredicatedScalarEvolution &PSE,
1068 const SCEVUnknown *PhiScev,
1069 const SCEVAddRecExpr *AR,
1070 SmallVectorImpl<Instruction *> &CastInsts) {
1071
1072 assert(CastInsts.empty() && "CastInsts is expected to be empty.");
1073 auto *PN = cast<PHINode>(PhiScev->getValue());
1074 assert(PSE.getSCEV(PN) == AR && "Unexpected phi node SCEV expression");
1075 const Loop *L = AR->getLoop();
1076
1077 // Find any cast instructions that participate in the def-use chain of
1078 // PhiScev in the loop.
1079 // FORNOW/TODO: We currently expect the def-use chain to include only
1080 // two-operand instructions, where one of the operands is an invariant.
1081 // createAddRecFromPHIWithCasts() currently does not support anything more
1082 // involved than that, so we keep the search simple. This can be
1083 // extended/generalized as needed.
1084
1085 auto getDef = [&](const Value *Val) -> Value * {
1086 const BinaryOperator *BinOp = dyn_cast<BinaryOperator>(Val);
1087 if (!BinOp)
1088 return nullptr;
1089 Value *Op0 = BinOp->getOperand(0);
1090 Value *Op1 = BinOp->getOperand(1);
1091 Value *Def = nullptr;
1092 if (L->isLoopInvariant(Op0))
1093 Def = Op1;
1094 else if (L->isLoopInvariant(Op1))
1095 Def = Op0;
1096 return Def;
1097 };
1098
1099 // Look for the instruction that defines the induction via the
1100 // loop backedge.
1101 BasicBlock *Latch = L->getLoopLatch();
1102 if (!Latch)
1103 return false;
1104 Value *Val = PN->getIncomingValueForBlock(Latch);
1105 if (!Val)
1106 return false;
1107
1108 // Follow the def-use chain until the induction phi is reached.
1109 // If on the way we encounter a Value that has the same SCEV Expr as the
1110 // phi node, we can consider the instructions we visit from that point
1111 // as part of the cast-sequence that can be ignored.
1112 bool InCastSequence = false;
1113 auto *Inst = dyn_cast<Instruction>(Val);
1114 while (Val != PN) {
1115 // If we encountered a phi node other than PN, or if we left the loop,
1116 // we bail out.
1117 if (!Inst || !L->contains(Inst)) {
1118 return false;
1119 }
1120 auto *AddRec = dyn_cast<SCEVAddRecExpr>(PSE.getSCEV(Val));
1121 if (AddRec && PSE.areAddRecsEqualWithPreds(AddRec, AR))
1122 InCastSequence = true;
1123 if (InCastSequence) {
1124 // Only the last instruction in the cast sequence is expected to have
1125 // uses outside the induction def-use chain.
1126 if (!CastInsts.empty())
1127 if (!Inst->hasOneUse())
1128 return false;
1129 CastInsts.push_back(Inst);
1130 }
1131 Val = getDef(Val);
1132 if (!Val)
1133 return false;
1134 Inst = dyn_cast<Instruction>(Val);
1135 }
1136
1137 return InCastSequence;
1138 }
1139
isInductionPHI(PHINode * Phi,const Loop * TheLoop,PredicatedScalarEvolution & PSE,InductionDescriptor & D,bool Assume)1140 bool InductionDescriptor::isInductionPHI(PHINode *Phi, const Loop *TheLoop,
1141 PredicatedScalarEvolution &PSE,
1142 InductionDescriptor &D, bool Assume) {
1143 Type *PhiTy = Phi->getType();
1144
1145 // Handle integer and pointer inductions variables.
1146 // Now we handle also FP induction but not trying to make a
1147 // recurrent expression from the PHI node in-place.
1148
1149 if (!PhiTy->isIntegerTy() && !PhiTy->isPointerTy() && !PhiTy->isFloatTy() &&
1150 !PhiTy->isDoubleTy() && !PhiTy->isHalfTy())
1151 return false;
1152
1153 if (PhiTy->isFloatingPointTy())
1154 return isFPInductionPHI(Phi, TheLoop, PSE.getSE(), D);
1155
1156 const SCEV *PhiScev = PSE.getSCEV(Phi);
1157 const auto *AR = dyn_cast<SCEVAddRecExpr>(PhiScev);
1158
1159 // We need this expression to be an AddRecExpr.
1160 if (Assume && !AR)
1161 AR = PSE.getAsAddRec(Phi);
1162
1163 if (!AR) {
1164 LLVM_DEBUG(dbgs() << "LV: PHI is not a poly recurrence.\n");
1165 return false;
1166 }
1167
1168 // Record any Cast instructions that participate in the induction update
1169 const auto *SymbolicPhi = dyn_cast<SCEVUnknown>(PhiScev);
1170 // If we started from an UnknownSCEV, and managed to build an addRecurrence
1171 // only after enabling Assume with PSCEV, this means we may have encountered
1172 // cast instructions that required adding a runtime check in order to
1173 // guarantee the correctness of the AddRecurrence respresentation of the
1174 // induction.
1175 if (PhiScev != AR && SymbolicPhi) {
1176 SmallVector<Instruction *, 2> Casts;
1177 if (getCastsForInductionPHI(PSE, SymbolicPhi, AR, Casts))
1178 return isInductionPHI(Phi, TheLoop, PSE.getSE(), D, AR, &Casts);
1179 }
1180
1181 return isInductionPHI(Phi, TheLoop, PSE.getSE(), D, AR);
1182 }
1183
isInductionPHI(PHINode * Phi,const Loop * TheLoop,ScalarEvolution * SE,InductionDescriptor & D,const SCEV * Expr,SmallVectorImpl<Instruction * > * CastsToIgnore)1184 bool InductionDescriptor::isInductionPHI(
1185 PHINode *Phi, const Loop *TheLoop, ScalarEvolution *SE,
1186 InductionDescriptor &D, const SCEV *Expr,
1187 SmallVectorImpl<Instruction *> *CastsToIgnore) {
1188 Type *PhiTy = Phi->getType();
1189 // We only handle integer and pointer inductions variables.
1190 if (!PhiTy->isIntegerTy() && !PhiTy->isPointerTy())
1191 return false;
1192
1193 // Check that the PHI is consecutive.
1194 const SCEV *PhiScev = Expr ? Expr : SE->getSCEV(Phi);
1195 const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(PhiScev);
1196
1197 if (!AR) {
1198 LLVM_DEBUG(dbgs() << "LV: PHI is not a poly recurrence.\n");
1199 return false;
1200 }
1201
1202 if (AR->getLoop() != TheLoop) {
1203 // FIXME: We should treat this as a uniform. Unfortunately, we
1204 // don't currently know how to handled uniform PHIs.
1205 LLVM_DEBUG(
1206 dbgs() << "LV: PHI is a recurrence with respect to an outer loop.\n");
1207 return false;
1208 }
1209
1210 Value *StartValue =
1211 Phi->getIncomingValueForBlock(AR->getLoop()->getLoopPreheader());
1212
1213 BasicBlock *Latch = AR->getLoop()->getLoopLatch();
1214 if (!Latch)
1215 return false;
1216 BinaryOperator *BOp =
1217 dyn_cast<BinaryOperator>(Phi->getIncomingValueForBlock(Latch));
1218
1219 const SCEV *Step = AR->getStepRecurrence(*SE);
1220 // Calculate the pointer stride and check if it is consecutive.
1221 // The stride may be a constant or a loop invariant integer value.
1222 const SCEVConstant *ConstStep = dyn_cast<SCEVConstant>(Step);
1223 if (!ConstStep && !SE->isLoopInvariant(Step, TheLoop))
1224 return false;
1225
1226 if (PhiTy->isIntegerTy()) {
1227 D = InductionDescriptor(StartValue, IK_IntInduction, Step, BOp,
1228 CastsToIgnore);
1229 return true;
1230 }
1231
1232 assert(PhiTy->isPointerTy() && "The PHI must be a pointer");
1233 // Pointer induction should be a constant.
1234 if (!ConstStep)
1235 return false;
1236
1237 ConstantInt *CV = ConstStep->getValue();
1238 Type *PointerElementType = PhiTy->getPointerElementType();
1239 // The pointer stride cannot be determined if the pointer element type is not
1240 // sized.
1241 if (!PointerElementType->isSized())
1242 return false;
1243
1244 const DataLayout &DL = Phi->getModule()->getDataLayout();
1245 int64_t Size = static_cast<int64_t>(DL.getTypeAllocSize(PointerElementType));
1246 if (!Size)
1247 return false;
1248
1249 int64_t CVSize = CV->getSExtValue();
1250 if (CVSize % Size)
1251 return false;
1252 auto *StepValue =
1253 SE->getConstant(CV->getType(), CVSize / Size, true /* signed */);
1254 D = InductionDescriptor(StartValue, IK_PtrInduction, StepValue, BOp);
1255 return true;
1256 }
1257