xref: /freebsd-src/contrib/llvm-project/llvm/lib/Analysis/IVDescriptors.cpp (revision 0eae32dcef82f6f06de6419a0d623d7def0cc8f6)
1 //===- llvm/Analysis/IVDescriptors.cpp - IndVar Descriptors -----*- C++ -*-===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This file "describes" induction and recurrence variables.
10 //
11 //===----------------------------------------------------------------------===//
12 
13 #include "llvm/Analysis/IVDescriptors.h"
14 #include "llvm/ADT/ScopeExit.h"
15 #include "llvm/Analysis/BasicAliasAnalysis.h"
16 #include "llvm/Analysis/DemandedBits.h"
17 #include "llvm/Analysis/DomTreeUpdater.h"
18 #include "llvm/Analysis/GlobalsModRef.h"
19 #include "llvm/Analysis/InstructionSimplify.h"
20 #include "llvm/Analysis/LoopInfo.h"
21 #include "llvm/Analysis/LoopPass.h"
22 #include "llvm/Analysis/MustExecute.h"
23 #include "llvm/Analysis/ScalarEvolution.h"
24 #include "llvm/Analysis/ScalarEvolutionAliasAnalysis.h"
25 #include "llvm/Analysis/ScalarEvolutionExpressions.h"
26 #include "llvm/Analysis/TargetTransformInfo.h"
27 #include "llvm/Analysis/ValueTracking.h"
28 #include "llvm/IR/Dominators.h"
29 #include "llvm/IR/Instructions.h"
30 #include "llvm/IR/Module.h"
31 #include "llvm/IR/PatternMatch.h"
32 #include "llvm/IR/ValueHandle.h"
33 #include "llvm/Pass.h"
34 #include "llvm/Support/Debug.h"
35 #include "llvm/Support/KnownBits.h"
36 
37 #include <set>
38 
39 using namespace llvm;
40 using namespace llvm::PatternMatch;
41 
42 #define DEBUG_TYPE "iv-descriptors"
43 
44 bool RecurrenceDescriptor::areAllUsesIn(Instruction *I,
45                                         SmallPtrSetImpl<Instruction *> &Set) {
46   for (const Use &Use : I->operands())
47     if (!Set.count(dyn_cast<Instruction>(Use)))
48       return false;
49   return true;
50 }
51 
52 bool RecurrenceDescriptor::isIntegerRecurrenceKind(RecurKind Kind) {
53   switch (Kind) {
54   default:
55     break;
56   case RecurKind::Add:
57   case RecurKind::Mul:
58   case RecurKind::Or:
59   case RecurKind::And:
60   case RecurKind::Xor:
61   case RecurKind::SMax:
62   case RecurKind::SMin:
63   case RecurKind::UMax:
64   case RecurKind::UMin:
65   case RecurKind::SelectICmp:
66   case RecurKind::SelectFCmp:
67     return true;
68   }
69   return false;
70 }
71 
72 bool RecurrenceDescriptor::isFloatingPointRecurrenceKind(RecurKind Kind) {
73   return (Kind != RecurKind::None) && !isIntegerRecurrenceKind(Kind);
74 }
75 
76 bool RecurrenceDescriptor::isArithmeticRecurrenceKind(RecurKind Kind) {
77   switch (Kind) {
78   default:
79     break;
80   case RecurKind::Add:
81   case RecurKind::Mul:
82   case RecurKind::FAdd:
83   case RecurKind::FMul:
84   case RecurKind::FMulAdd:
85     return true;
86   }
87   return false;
88 }
89 
90 /// Determines if Phi may have been type-promoted. If Phi has a single user
91 /// that ANDs the Phi with a type mask, return the user. RT is updated to
92 /// account for the narrower bit width represented by the mask, and the AND
93 /// instruction is added to CI.
94 static Instruction *lookThroughAnd(PHINode *Phi, Type *&RT,
95                                    SmallPtrSetImpl<Instruction *> &Visited,
96                                    SmallPtrSetImpl<Instruction *> &CI) {
97   if (!Phi->hasOneUse())
98     return Phi;
99 
100   const APInt *M = nullptr;
101   Instruction *I, *J = cast<Instruction>(Phi->use_begin()->getUser());
102 
103   // Matches either I & 2^x-1 or 2^x-1 & I. If we find a match, we update RT
104   // with a new integer type of the corresponding bit width.
105   if (match(J, m_c_And(m_Instruction(I), m_APInt(M)))) {
106     int32_t Bits = (*M + 1).exactLogBase2();
107     if (Bits > 0) {
108       RT = IntegerType::get(Phi->getContext(), Bits);
109       Visited.insert(Phi);
110       CI.insert(J);
111       return J;
112     }
113   }
114   return Phi;
115 }
116 
117 /// Compute the minimal bit width needed to represent a reduction whose exit
118 /// instruction is given by Exit.
119 static std::pair<Type *, bool> computeRecurrenceType(Instruction *Exit,
120                                                      DemandedBits *DB,
121                                                      AssumptionCache *AC,
122                                                      DominatorTree *DT) {
123   bool IsSigned = false;
124   const DataLayout &DL = Exit->getModule()->getDataLayout();
125   uint64_t MaxBitWidth = DL.getTypeSizeInBits(Exit->getType());
126 
127   if (DB) {
128     // Use the demanded bits analysis to determine the bits that are live out
129     // of the exit instruction, rounding up to the nearest power of two. If the
130     // use of demanded bits results in a smaller bit width, we know the value
131     // must be positive (i.e., IsSigned = false), because if this were not the
132     // case, the sign bit would have been demanded.
133     auto Mask = DB->getDemandedBits(Exit);
134     MaxBitWidth = Mask.getBitWidth() - Mask.countLeadingZeros();
135   }
136 
137   if (MaxBitWidth == DL.getTypeSizeInBits(Exit->getType()) && AC && DT) {
138     // If demanded bits wasn't able to limit the bit width, we can try to use
139     // value tracking instead. This can be the case, for example, if the value
140     // may be negative.
141     auto NumSignBits = ComputeNumSignBits(Exit, DL, 0, AC, nullptr, DT);
142     auto NumTypeBits = DL.getTypeSizeInBits(Exit->getType());
143     MaxBitWidth = NumTypeBits - NumSignBits;
144     KnownBits Bits = computeKnownBits(Exit, DL);
145     if (!Bits.isNonNegative()) {
146       // If the value is not known to be non-negative, we set IsSigned to true,
147       // meaning that we will use sext instructions instead of zext
148       // instructions to restore the original type.
149       IsSigned = true;
150       // Make sure at at least one sign bit is included in the result, so it
151       // will get properly sign-extended.
152       ++MaxBitWidth;
153     }
154   }
155   if (!isPowerOf2_64(MaxBitWidth))
156     MaxBitWidth = NextPowerOf2(MaxBitWidth);
157 
158   return std::make_pair(Type::getIntNTy(Exit->getContext(), MaxBitWidth),
159                         IsSigned);
160 }
161 
162 /// Collect cast instructions that can be ignored in the vectorizer's cost
163 /// model, given a reduction exit value and the minimal type in which the
164 /// reduction can be represented.
165 static void collectCastsToIgnore(Loop *TheLoop, Instruction *Exit,
166                                  Type *RecurrenceType,
167                                  SmallPtrSetImpl<Instruction *> &Casts) {
168 
169   SmallVector<Instruction *, 8> Worklist;
170   SmallPtrSet<Instruction *, 8> Visited;
171   Worklist.push_back(Exit);
172 
173   while (!Worklist.empty()) {
174     Instruction *Val = Worklist.pop_back_val();
175     Visited.insert(Val);
176     if (auto *Cast = dyn_cast<CastInst>(Val))
177       if (Cast->getSrcTy() == RecurrenceType) {
178         // If the source type of a cast instruction is equal to the recurrence
179         // type, it will be eliminated, and should be ignored in the vectorizer
180         // cost model.
181         Casts.insert(Cast);
182         continue;
183       }
184 
185     // Add all operands to the work list if they are loop-varying values that
186     // we haven't yet visited.
187     for (Value *O : cast<User>(Val)->operands())
188       if (auto *I = dyn_cast<Instruction>(O))
189         if (TheLoop->contains(I) && !Visited.count(I))
190           Worklist.push_back(I);
191   }
192 }
193 
194 // Check if a given Phi node can be recognized as an ordered reduction for
195 // vectorizing floating point operations without unsafe math.
196 static bool checkOrderedReduction(RecurKind Kind, Instruction *ExactFPMathInst,
197                                   Instruction *Exit, PHINode *Phi) {
198   // Currently only FAdd and FMulAdd are supported.
199   if (Kind != RecurKind::FAdd && Kind != RecurKind::FMulAdd)
200     return false;
201 
202   if (Kind == RecurKind::FAdd && Exit->getOpcode() != Instruction::FAdd)
203     return false;
204 
205   if (Kind == RecurKind::FMulAdd &&
206       !RecurrenceDescriptor::isFMulAddIntrinsic(Exit))
207     return false;
208 
209   // Ensure the exit instruction has only one user other than the reduction PHI
210   if (Exit != ExactFPMathInst || Exit->hasNUsesOrMore(3))
211     return false;
212 
213   // The only pattern accepted is the one in which the reduction PHI
214   // is used as one of the operands of the exit instruction
215   auto *Op0 = Exit->getOperand(0);
216   auto *Op1 = Exit->getOperand(1);
217   if (Kind == RecurKind::FAdd && Op0 != Phi && Op1 != Phi)
218     return false;
219   if (Kind == RecurKind::FMulAdd && Exit->getOperand(2) != Phi)
220     return false;
221 
222   LLVM_DEBUG(dbgs() << "LV: Found an ordered reduction: Phi: " << *Phi
223                     << ", ExitInst: " << *Exit << "\n");
224 
225   return true;
226 }
227 
228 bool RecurrenceDescriptor::AddReductionVar(PHINode *Phi, RecurKind Kind,
229                                            Loop *TheLoop, FastMathFlags FuncFMF,
230                                            RecurrenceDescriptor &RedDes,
231                                            DemandedBits *DB,
232                                            AssumptionCache *AC,
233                                            DominatorTree *DT) {
234   if (Phi->getNumIncomingValues() != 2)
235     return false;
236 
237   // Reduction variables are only found in the loop header block.
238   if (Phi->getParent() != TheLoop->getHeader())
239     return false;
240 
241   // Obtain the reduction start value from the value that comes from the loop
242   // preheader.
243   Value *RdxStart = Phi->getIncomingValueForBlock(TheLoop->getLoopPreheader());
244 
245   // ExitInstruction is the single value which is used outside the loop.
246   // We only allow for a single reduction value to be used outside the loop.
247   // This includes users of the reduction, variables (which form a cycle
248   // which ends in the phi node).
249   Instruction *ExitInstruction = nullptr;
250   // Indicates that we found a reduction operation in our scan.
251   bool FoundReduxOp = false;
252 
253   // We start with the PHI node and scan for all of the users of this
254   // instruction. All users must be instructions that can be used as reduction
255   // variables (such as ADD). We must have a single out-of-block user. The cycle
256   // must include the original PHI.
257   bool FoundStartPHI = false;
258 
259   // To recognize min/max patterns formed by a icmp select sequence, we store
260   // the number of instruction we saw from the recognized min/max pattern,
261   //  to make sure we only see exactly the two instructions.
262   unsigned NumCmpSelectPatternInst = 0;
263   InstDesc ReduxDesc(false, nullptr);
264 
265   // Data used for determining if the recurrence has been type-promoted.
266   Type *RecurrenceType = Phi->getType();
267   SmallPtrSet<Instruction *, 4> CastInsts;
268   Instruction *Start = Phi;
269   bool IsSigned = false;
270 
271   SmallPtrSet<Instruction *, 8> VisitedInsts;
272   SmallVector<Instruction *, 8> Worklist;
273 
274   // Return early if the recurrence kind does not match the type of Phi. If the
275   // recurrence kind is arithmetic, we attempt to look through AND operations
276   // resulting from the type promotion performed by InstCombine.  Vector
277   // operations are not limited to the legal integer widths, so we may be able
278   // to evaluate the reduction in the narrower width.
279   if (RecurrenceType->isFloatingPointTy()) {
280     if (!isFloatingPointRecurrenceKind(Kind))
281       return false;
282   } else if (RecurrenceType->isIntegerTy()) {
283     if (!isIntegerRecurrenceKind(Kind))
284       return false;
285     if (!isMinMaxRecurrenceKind(Kind))
286       Start = lookThroughAnd(Phi, RecurrenceType, VisitedInsts, CastInsts);
287   } else {
288     // Pointer min/max may exist, but it is not supported as a reduction op.
289     return false;
290   }
291 
292   Worklist.push_back(Start);
293   VisitedInsts.insert(Start);
294 
295   // Start with all flags set because we will intersect this with the reduction
296   // flags from all the reduction operations.
297   FastMathFlags FMF = FastMathFlags::getFast();
298 
299   // A value in the reduction can be used:
300   //  - By the reduction:
301   //      - Reduction operation:
302   //        - One use of reduction value (safe).
303   //        - Multiple use of reduction value (not safe).
304   //      - PHI:
305   //        - All uses of the PHI must be the reduction (safe).
306   //        - Otherwise, not safe.
307   //  - By instructions outside of the loop (safe).
308   //      * One value may have several outside users, but all outside
309   //        uses must be of the same value.
310   //  - By an instruction that is not part of the reduction (not safe).
311   //    This is either:
312   //      * An instruction type other than PHI or the reduction operation.
313   //      * A PHI in the header other than the initial PHI.
314   while (!Worklist.empty()) {
315     Instruction *Cur = Worklist.pop_back_val();
316 
317     // No Users.
318     // If the instruction has no users then this is a broken chain and can't be
319     // a reduction variable.
320     if (Cur->use_empty())
321       return false;
322 
323     bool IsAPhi = isa<PHINode>(Cur);
324 
325     // A header PHI use other than the original PHI.
326     if (Cur != Phi && IsAPhi && Cur->getParent() == Phi->getParent())
327       return false;
328 
329     // Reductions of instructions such as Div, and Sub is only possible if the
330     // LHS is the reduction variable.
331     if (!Cur->isCommutative() && !IsAPhi && !isa<SelectInst>(Cur) &&
332         !isa<ICmpInst>(Cur) && !isa<FCmpInst>(Cur) &&
333         !VisitedInsts.count(dyn_cast<Instruction>(Cur->getOperand(0))))
334       return false;
335 
336     // Any reduction instruction must be of one of the allowed kinds. We ignore
337     // the starting value (the Phi or an AND instruction if the Phi has been
338     // type-promoted).
339     if (Cur != Start) {
340       ReduxDesc =
341           isRecurrenceInstr(TheLoop, Phi, Cur, Kind, ReduxDesc, FuncFMF);
342       if (!ReduxDesc.isRecurrence())
343         return false;
344       // FIXME: FMF is allowed on phi, but propagation is not handled correctly.
345       if (isa<FPMathOperator>(ReduxDesc.getPatternInst()) && !IsAPhi) {
346         FastMathFlags CurFMF = ReduxDesc.getPatternInst()->getFastMathFlags();
347         if (auto *Sel = dyn_cast<SelectInst>(ReduxDesc.getPatternInst())) {
348           // Accept FMF on either fcmp or select of a min/max idiom.
349           // TODO: This is a hack to work-around the fact that FMF may not be
350           //       assigned/propagated correctly. If that problem is fixed or we
351           //       standardize on fmin/fmax via intrinsics, this can be removed.
352           if (auto *FCmp = dyn_cast<FCmpInst>(Sel->getCondition()))
353             CurFMF |= FCmp->getFastMathFlags();
354         }
355         FMF &= CurFMF;
356       }
357       // Update this reduction kind if we matched a new instruction.
358       // TODO: Can we eliminate the need for a 2nd InstDesc by keeping 'Kind'
359       //       state accurate while processing the worklist?
360       if (ReduxDesc.getRecKind() != RecurKind::None)
361         Kind = ReduxDesc.getRecKind();
362     }
363 
364     bool IsASelect = isa<SelectInst>(Cur);
365 
366     // A conditional reduction operation must only have 2 or less uses in
367     // VisitedInsts.
368     if (IsASelect && (Kind == RecurKind::FAdd || Kind == RecurKind::FMul) &&
369         hasMultipleUsesOf(Cur, VisitedInsts, 2))
370       return false;
371 
372     // A reduction operation must only have one use of the reduction value.
373     if (!IsAPhi && !IsASelect && !isMinMaxRecurrenceKind(Kind) &&
374         !isSelectCmpRecurrenceKind(Kind) &&
375         hasMultipleUsesOf(Cur, VisitedInsts, 1))
376       return false;
377 
378     // All inputs to a PHI node must be a reduction value.
379     if (IsAPhi && Cur != Phi && !areAllUsesIn(Cur, VisitedInsts))
380       return false;
381 
382     if ((isIntMinMaxRecurrenceKind(Kind) || Kind == RecurKind::SelectICmp) &&
383         (isa<ICmpInst>(Cur) || isa<SelectInst>(Cur)))
384       ++NumCmpSelectPatternInst;
385     if ((isFPMinMaxRecurrenceKind(Kind) || Kind == RecurKind::SelectFCmp) &&
386         (isa<FCmpInst>(Cur) || isa<SelectInst>(Cur)))
387       ++NumCmpSelectPatternInst;
388 
389     // Check  whether we found a reduction operator.
390     FoundReduxOp |= !IsAPhi && Cur != Start;
391 
392     // Process users of current instruction. Push non-PHI nodes after PHI nodes
393     // onto the stack. This way we are going to have seen all inputs to PHI
394     // nodes once we get to them.
395     SmallVector<Instruction *, 8> NonPHIs;
396     SmallVector<Instruction *, 8> PHIs;
397     for (User *U : Cur->users()) {
398       Instruction *UI = cast<Instruction>(U);
399 
400       // If the user is a call to llvm.fmuladd then the instruction can only be
401       // the final operand.
402       if (isFMulAddIntrinsic(UI))
403         if (Cur == UI->getOperand(0) || Cur == UI->getOperand(1))
404           return false;
405 
406       // Check if we found the exit user.
407       BasicBlock *Parent = UI->getParent();
408       if (!TheLoop->contains(Parent)) {
409         // If we already know this instruction is used externally, move on to
410         // the next user.
411         if (ExitInstruction == Cur)
412           continue;
413 
414         // Exit if you find multiple values used outside or if the header phi
415         // node is being used. In this case the user uses the value of the
416         // previous iteration, in which case we would loose "VF-1" iterations of
417         // the reduction operation if we vectorize.
418         if (ExitInstruction != nullptr || Cur == Phi)
419           return false;
420 
421         // The instruction used by an outside user must be the last instruction
422         // before we feed back to the reduction phi. Otherwise, we loose VF-1
423         // operations on the value.
424         if (!is_contained(Phi->operands(), Cur))
425           return false;
426 
427         ExitInstruction = Cur;
428         continue;
429       }
430 
431       // Process instructions only once (termination). Each reduction cycle
432       // value must only be used once, except by phi nodes and min/max
433       // reductions which are represented as a cmp followed by a select.
434       InstDesc IgnoredVal(false, nullptr);
435       if (VisitedInsts.insert(UI).second) {
436         if (isa<PHINode>(UI))
437           PHIs.push_back(UI);
438         else
439           NonPHIs.push_back(UI);
440       } else if (!isa<PHINode>(UI) &&
441                  ((!isa<FCmpInst>(UI) && !isa<ICmpInst>(UI) &&
442                    !isa<SelectInst>(UI)) ||
443                   (!isConditionalRdxPattern(Kind, UI).isRecurrence() &&
444                    !isSelectCmpPattern(TheLoop, Phi, UI, IgnoredVal)
445                         .isRecurrence() &&
446                    !isMinMaxPattern(UI, Kind, IgnoredVal).isRecurrence())))
447         return false;
448 
449       // Remember that we completed the cycle.
450       if (UI == Phi)
451         FoundStartPHI = true;
452     }
453     Worklist.append(PHIs.begin(), PHIs.end());
454     Worklist.append(NonPHIs.begin(), NonPHIs.end());
455   }
456 
457   // This means we have seen one but not the other instruction of the
458   // pattern or more than just a select and cmp. Zero implies that we saw a
459   // llvm.min/max instrinsic, which is always OK.
460   if (isMinMaxRecurrenceKind(Kind) && NumCmpSelectPatternInst != 2 &&
461       NumCmpSelectPatternInst != 0)
462     return false;
463 
464   if (isSelectCmpRecurrenceKind(Kind) && NumCmpSelectPatternInst != 1)
465     return false;
466 
467   if (!FoundStartPHI || !FoundReduxOp || !ExitInstruction)
468     return false;
469 
470   const bool IsOrdered = checkOrderedReduction(
471       Kind, ReduxDesc.getExactFPMathInst(), ExitInstruction, Phi);
472 
473   if (Start != Phi) {
474     // If the starting value is not the same as the phi node, we speculatively
475     // looked through an 'and' instruction when evaluating a potential
476     // arithmetic reduction to determine if it may have been type-promoted.
477     //
478     // We now compute the minimal bit width that is required to represent the
479     // reduction. If this is the same width that was indicated by the 'and', we
480     // can represent the reduction in the smaller type. The 'and' instruction
481     // will be eliminated since it will essentially be a cast instruction that
482     // can be ignore in the cost model. If we compute a different type than we
483     // did when evaluating the 'and', the 'and' will not be eliminated, and we
484     // will end up with different kinds of operations in the recurrence
485     // expression (e.g., IntegerAND, IntegerADD). We give up if this is
486     // the case.
487     //
488     // The vectorizer relies on InstCombine to perform the actual
489     // type-shrinking. It does this by inserting instructions to truncate the
490     // exit value of the reduction to the width indicated by RecurrenceType and
491     // then extend this value back to the original width. If IsSigned is false,
492     // a 'zext' instruction will be generated; otherwise, a 'sext' will be
493     // used.
494     //
495     // TODO: We should not rely on InstCombine to rewrite the reduction in the
496     //       smaller type. We should just generate a correctly typed expression
497     //       to begin with.
498     Type *ComputedType;
499     std::tie(ComputedType, IsSigned) =
500         computeRecurrenceType(ExitInstruction, DB, AC, DT);
501     if (ComputedType != RecurrenceType)
502       return false;
503 
504     // The recurrence expression will be represented in a narrower type. If
505     // there are any cast instructions that will be unnecessary, collect them
506     // in CastInsts. Note that the 'and' instruction was already included in
507     // this list.
508     //
509     // TODO: A better way to represent this may be to tag in some way all the
510     //       instructions that are a part of the reduction. The vectorizer cost
511     //       model could then apply the recurrence type to these instructions,
512     //       without needing a white list of instructions to ignore.
513     //       This may also be useful for the inloop reductions, if it can be
514     //       kept simple enough.
515     collectCastsToIgnore(TheLoop, ExitInstruction, RecurrenceType, CastInsts);
516   }
517 
518   // We found a reduction var if we have reached the original phi node and we
519   // only have a single instruction with out-of-loop users.
520 
521   // The ExitInstruction(Instruction which is allowed to have out-of-loop users)
522   // is saved as part of the RecurrenceDescriptor.
523 
524   // Save the description of this reduction variable.
525   RecurrenceDescriptor RD(RdxStart, ExitInstruction, Kind, FMF,
526                           ReduxDesc.getExactFPMathInst(), RecurrenceType,
527                           IsSigned, IsOrdered, CastInsts);
528   RedDes = RD;
529 
530   return true;
531 }
532 
533 // We are looking for loops that do something like this:
534 //   int r = 0;
535 //   for (int i = 0; i < n; i++) {
536 //     if (src[i] > 3)
537 //       r = 3;
538 //   }
539 // where the reduction value (r) only has two states, in this example 0 or 3.
540 // The generated LLVM IR for this type of loop will be like this:
541 //   for.body:
542 //     %r = phi i32 [ %spec.select, %for.body ], [ 0, %entry ]
543 //     ...
544 //     %cmp = icmp sgt i32 %5, 3
545 //     %spec.select = select i1 %cmp, i32 3, i32 %r
546 //     ...
547 // In general we can support vectorization of loops where 'r' flips between
548 // any two non-constants, provided they are loop invariant. The only thing
549 // we actually care about at the end of the loop is whether or not any lane
550 // in the selected vector is different from the start value. The final
551 // across-vector reduction after the loop simply involves choosing the start
552 // value if nothing changed (0 in the example above) or the other selected
553 // value (3 in the example above).
554 RecurrenceDescriptor::InstDesc
555 RecurrenceDescriptor::isSelectCmpPattern(Loop *Loop, PHINode *OrigPhi,
556                                          Instruction *I, InstDesc &Prev) {
557   // We must handle the select(cmp(),x,y) as a single instruction. Advance to
558   // the select.
559   CmpInst::Predicate Pred;
560   if (match(I, m_OneUse(m_Cmp(Pred, m_Value(), m_Value())))) {
561     if (auto *Select = dyn_cast<SelectInst>(*I->user_begin()))
562       return InstDesc(Select, Prev.getRecKind());
563   }
564 
565   // Only match select with single use cmp condition.
566   if (!match(I, m_Select(m_OneUse(m_Cmp(Pred, m_Value(), m_Value())), m_Value(),
567                          m_Value())))
568     return InstDesc(false, I);
569 
570   SelectInst *SI = cast<SelectInst>(I);
571   Value *NonPhi = nullptr;
572 
573   if (OrigPhi == dyn_cast<PHINode>(SI->getTrueValue()))
574     NonPhi = SI->getFalseValue();
575   else if (OrigPhi == dyn_cast<PHINode>(SI->getFalseValue()))
576     NonPhi = SI->getTrueValue();
577   else
578     return InstDesc(false, I);
579 
580   // We are looking for selects of the form:
581   //   select(cmp(), phi, loop_invariant) or
582   //   select(cmp(), loop_invariant, phi)
583   if (!Loop->isLoopInvariant(NonPhi))
584     return InstDesc(false, I);
585 
586   return InstDesc(I, isa<ICmpInst>(I->getOperand(0)) ? RecurKind::SelectICmp
587                                                      : RecurKind::SelectFCmp);
588 }
589 
590 RecurrenceDescriptor::InstDesc
591 RecurrenceDescriptor::isMinMaxPattern(Instruction *I, RecurKind Kind,
592                                       const InstDesc &Prev) {
593   assert((isa<CmpInst>(I) || isa<SelectInst>(I) || isa<CallInst>(I)) &&
594          "Expected a cmp or select or call instruction");
595   if (!isMinMaxRecurrenceKind(Kind))
596     return InstDesc(false, I);
597 
598   // We must handle the select(cmp()) as a single instruction. Advance to the
599   // select.
600   CmpInst::Predicate Pred;
601   if (match(I, m_OneUse(m_Cmp(Pred, m_Value(), m_Value())))) {
602     if (auto *Select = dyn_cast<SelectInst>(*I->user_begin()))
603       return InstDesc(Select, Prev.getRecKind());
604   }
605 
606   // Only match select with single use cmp condition, or a min/max intrinsic.
607   if (!isa<IntrinsicInst>(I) &&
608       !match(I, m_Select(m_OneUse(m_Cmp(Pred, m_Value(), m_Value())), m_Value(),
609                          m_Value())))
610     return InstDesc(false, I);
611 
612   // Look for a min/max pattern.
613   if (match(I, m_UMin(m_Value(), m_Value())))
614     return InstDesc(Kind == RecurKind::UMin, I);
615   if (match(I, m_UMax(m_Value(), m_Value())))
616     return InstDesc(Kind == RecurKind::UMax, I);
617   if (match(I, m_SMax(m_Value(), m_Value())))
618     return InstDesc(Kind == RecurKind::SMax, I);
619   if (match(I, m_SMin(m_Value(), m_Value())))
620     return InstDesc(Kind == RecurKind::SMin, I);
621   if (match(I, m_OrdFMin(m_Value(), m_Value())))
622     return InstDesc(Kind == RecurKind::FMin, I);
623   if (match(I, m_OrdFMax(m_Value(), m_Value())))
624     return InstDesc(Kind == RecurKind::FMax, I);
625   if (match(I, m_UnordFMin(m_Value(), m_Value())))
626     return InstDesc(Kind == RecurKind::FMin, I);
627   if (match(I, m_UnordFMax(m_Value(), m_Value())))
628     return InstDesc(Kind == RecurKind::FMax, I);
629   if (match(I, m_Intrinsic<Intrinsic::minnum>(m_Value(), m_Value())))
630     return InstDesc(Kind == RecurKind::FMin, I);
631   if (match(I, m_Intrinsic<Intrinsic::maxnum>(m_Value(), m_Value())))
632     return InstDesc(Kind == RecurKind::FMax, I);
633 
634   return InstDesc(false, I);
635 }
636 
637 /// Returns true if the select instruction has users in the compare-and-add
638 /// reduction pattern below. The select instruction argument is the last one
639 /// in the sequence.
640 ///
641 /// %sum.1 = phi ...
642 /// ...
643 /// %cmp = fcmp pred %0, %CFP
644 /// %add = fadd %0, %sum.1
645 /// %sum.2 = select %cmp, %add, %sum.1
646 RecurrenceDescriptor::InstDesc
647 RecurrenceDescriptor::isConditionalRdxPattern(RecurKind Kind, Instruction *I) {
648   SelectInst *SI = dyn_cast<SelectInst>(I);
649   if (!SI)
650     return InstDesc(false, I);
651 
652   CmpInst *CI = dyn_cast<CmpInst>(SI->getCondition());
653   // Only handle single use cases for now.
654   if (!CI || !CI->hasOneUse())
655     return InstDesc(false, I);
656 
657   Value *TrueVal = SI->getTrueValue();
658   Value *FalseVal = SI->getFalseValue();
659   // Handle only when either of operands of select instruction is a PHI
660   // node for now.
661   if ((isa<PHINode>(*TrueVal) && isa<PHINode>(*FalseVal)) ||
662       (!isa<PHINode>(*TrueVal) && !isa<PHINode>(*FalseVal)))
663     return InstDesc(false, I);
664 
665   Instruction *I1 =
666       isa<PHINode>(*TrueVal) ? dyn_cast<Instruction>(FalseVal)
667                              : dyn_cast<Instruction>(TrueVal);
668   if (!I1 || !I1->isBinaryOp())
669     return InstDesc(false, I);
670 
671   Value *Op1, *Op2;
672   if ((m_FAdd(m_Value(Op1), m_Value(Op2)).match(I1)  ||
673        m_FSub(m_Value(Op1), m_Value(Op2)).match(I1)) &&
674       I1->isFast())
675     return InstDesc(Kind == RecurKind::FAdd, SI);
676 
677   if (m_FMul(m_Value(Op1), m_Value(Op2)).match(I1) && (I1->isFast()))
678     return InstDesc(Kind == RecurKind::FMul, SI);
679 
680   return InstDesc(false, I);
681 }
682 
683 RecurrenceDescriptor::InstDesc
684 RecurrenceDescriptor::isRecurrenceInstr(Loop *L, PHINode *OrigPhi,
685                                         Instruction *I, RecurKind Kind,
686                                         InstDesc &Prev, FastMathFlags FuncFMF) {
687   assert(Prev.getRecKind() == RecurKind::None || Prev.getRecKind() == Kind);
688   switch (I->getOpcode()) {
689   default:
690     return InstDesc(false, I);
691   case Instruction::PHI:
692     return InstDesc(I, Prev.getRecKind(), Prev.getExactFPMathInst());
693   case Instruction::Sub:
694   case Instruction::Add:
695     return InstDesc(Kind == RecurKind::Add, I);
696   case Instruction::Mul:
697     return InstDesc(Kind == RecurKind::Mul, I);
698   case Instruction::And:
699     return InstDesc(Kind == RecurKind::And, I);
700   case Instruction::Or:
701     return InstDesc(Kind == RecurKind::Or, I);
702   case Instruction::Xor:
703     return InstDesc(Kind == RecurKind::Xor, I);
704   case Instruction::FDiv:
705   case Instruction::FMul:
706     return InstDesc(Kind == RecurKind::FMul, I,
707                     I->hasAllowReassoc() ? nullptr : I);
708   case Instruction::FSub:
709   case Instruction::FAdd:
710     return InstDesc(Kind == RecurKind::FAdd, I,
711                     I->hasAllowReassoc() ? nullptr : I);
712   case Instruction::Select:
713     if (Kind == RecurKind::FAdd || Kind == RecurKind::FMul)
714       return isConditionalRdxPattern(Kind, I);
715     LLVM_FALLTHROUGH;
716   case Instruction::FCmp:
717   case Instruction::ICmp:
718   case Instruction::Call:
719     if (isSelectCmpRecurrenceKind(Kind))
720       return isSelectCmpPattern(L, OrigPhi, I, Prev);
721     if (isIntMinMaxRecurrenceKind(Kind) ||
722         (((FuncFMF.noNaNs() && FuncFMF.noSignedZeros()) ||
723           (isa<FPMathOperator>(I) && I->hasNoNaNs() &&
724            I->hasNoSignedZeros())) &&
725          isFPMinMaxRecurrenceKind(Kind)))
726       return isMinMaxPattern(I, Kind, Prev);
727     else if (isFMulAddIntrinsic(I))
728       return InstDesc(Kind == RecurKind::FMulAdd, I,
729                       I->hasAllowReassoc() ? nullptr : I);
730     return InstDesc(false, I);
731   }
732 }
733 
734 bool RecurrenceDescriptor::hasMultipleUsesOf(
735     Instruction *I, SmallPtrSetImpl<Instruction *> &Insts,
736     unsigned MaxNumUses) {
737   unsigned NumUses = 0;
738   for (const Use &U : I->operands()) {
739     if (Insts.count(dyn_cast<Instruction>(U)))
740       ++NumUses;
741     if (NumUses > MaxNumUses)
742       return true;
743   }
744 
745   return false;
746 }
747 
748 bool RecurrenceDescriptor::isReductionPHI(PHINode *Phi, Loop *TheLoop,
749                                           RecurrenceDescriptor &RedDes,
750                                           DemandedBits *DB, AssumptionCache *AC,
751                                           DominatorTree *DT) {
752   BasicBlock *Header = TheLoop->getHeader();
753   Function &F = *Header->getParent();
754   FastMathFlags FMF;
755   FMF.setNoNaNs(
756       F.getFnAttribute("no-nans-fp-math").getValueAsBool());
757   FMF.setNoSignedZeros(
758       F.getFnAttribute("no-signed-zeros-fp-math").getValueAsBool());
759 
760   if (AddReductionVar(Phi, RecurKind::Add, TheLoop, FMF, RedDes, DB, AC, DT)) {
761     LLVM_DEBUG(dbgs() << "Found an ADD reduction PHI." << *Phi << "\n");
762     return true;
763   }
764   if (AddReductionVar(Phi, RecurKind::Mul, TheLoop, FMF, RedDes, DB, AC, DT)) {
765     LLVM_DEBUG(dbgs() << "Found a MUL reduction PHI." << *Phi << "\n");
766     return true;
767   }
768   if (AddReductionVar(Phi, RecurKind::Or, TheLoop, FMF, RedDes, DB, AC, DT)) {
769     LLVM_DEBUG(dbgs() << "Found an OR reduction PHI." << *Phi << "\n");
770     return true;
771   }
772   if (AddReductionVar(Phi, RecurKind::And, TheLoop, FMF, RedDes, DB, AC, DT)) {
773     LLVM_DEBUG(dbgs() << "Found an AND reduction PHI." << *Phi << "\n");
774     return true;
775   }
776   if (AddReductionVar(Phi, RecurKind::Xor, TheLoop, FMF, RedDes, DB, AC, DT)) {
777     LLVM_DEBUG(dbgs() << "Found a XOR reduction PHI." << *Phi << "\n");
778     return true;
779   }
780   if (AddReductionVar(Phi, RecurKind::SMax, TheLoop, FMF, RedDes, DB, AC, DT)) {
781     LLVM_DEBUG(dbgs() << "Found a SMAX reduction PHI." << *Phi << "\n");
782     return true;
783   }
784   if (AddReductionVar(Phi, RecurKind::SMin, TheLoop, FMF, RedDes, DB, AC, DT)) {
785     LLVM_DEBUG(dbgs() << "Found a SMIN reduction PHI." << *Phi << "\n");
786     return true;
787   }
788   if (AddReductionVar(Phi, RecurKind::UMax, TheLoop, FMF, RedDes, DB, AC, DT)) {
789     LLVM_DEBUG(dbgs() << "Found a UMAX reduction PHI." << *Phi << "\n");
790     return true;
791   }
792   if (AddReductionVar(Phi, RecurKind::UMin, TheLoop, FMF, RedDes, DB, AC, DT)) {
793     LLVM_DEBUG(dbgs() << "Found a UMIN reduction PHI." << *Phi << "\n");
794     return true;
795   }
796   if (AddReductionVar(Phi, RecurKind::SelectICmp, TheLoop, FMF, RedDes, DB, AC,
797                       DT)) {
798     LLVM_DEBUG(dbgs() << "Found an integer conditional select reduction PHI."
799                       << *Phi << "\n");
800     return true;
801   }
802   if (AddReductionVar(Phi, RecurKind::FMul, TheLoop, FMF, RedDes, DB, AC, DT)) {
803     LLVM_DEBUG(dbgs() << "Found an FMult reduction PHI." << *Phi << "\n");
804     return true;
805   }
806   if (AddReductionVar(Phi, RecurKind::FAdd, TheLoop, FMF, RedDes, DB, AC, DT)) {
807     LLVM_DEBUG(dbgs() << "Found an FAdd reduction PHI." << *Phi << "\n");
808     return true;
809   }
810   if (AddReductionVar(Phi, RecurKind::FMax, TheLoop, FMF, RedDes, DB, AC, DT)) {
811     LLVM_DEBUG(dbgs() << "Found a float MAX reduction PHI." << *Phi << "\n");
812     return true;
813   }
814   if (AddReductionVar(Phi, RecurKind::FMin, TheLoop, FMF, RedDes, DB, AC, DT)) {
815     LLVM_DEBUG(dbgs() << "Found a float MIN reduction PHI." << *Phi << "\n");
816     return true;
817   }
818   if (AddReductionVar(Phi, RecurKind::SelectFCmp, TheLoop, FMF, RedDes, DB, AC,
819                       DT)) {
820     LLVM_DEBUG(dbgs() << "Found a float conditional select reduction PHI."
821                       << " PHI." << *Phi << "\n");
822     return true;
823   }
824   if (AddReductionVar(Phi, RecurKind::FMulAdd, TheLoop, FMF, RedDes, DB, AC,
825                       DT)) {
826     LLVM_DEBUG(dbgs() << "Found an FMulAdd reduction PHI." << *Phi << "\n");
827     return true;
828   }
829   // Not a reduction of known type.
830   return false;
831 }
832 
833 bool RecurrenceDescriptor::isFirstOrderRecurrence(
834     PHINode *Phi, Loop *TheLoop,
835     MapVector<Instruction *, Instruction *> &SinkAfter, DominatorTree *DT) {
836 
837   // Ensure the phi node is in the loop header and has two incoming values.
838   if (Phi->getParent() != TheLoop->getHeader() ||
839       Phi->getNumIncomingValues() != 2)
840     return false;
841 
842   // Ensure the loop has a preheader and a single latch block. The loop
843   // vectorizer will need the latch to set up the next iteration of the loop.
844   auto *Preheader = TheLoop->getLoopPreheader();
845   auto *Latch = TheLoop->getLoopLatch();
846   if (!Preheader || !Latch)
847     return false;
848 
849   // Ensure the phi node's incoming blocks are the loop preheader and latch.
850   if (Phi->getBasicBlockIndex(Preheader) < 0 ||
851       Phi->getBasicBlockIndex(Latch) < 0)
852     return false;
853 
854   // Get the previous value. The previous value comes from the latch edge while
855   // the initial value comes form the preheader edge.
856   auto *Previous = dyn_cast<Instruction>(Phi->getIncomingValueForBlock(Latch));
857   if (!Previous || !TheLoop->contains(Previous) || isa<PHINode>(Previous) ||
858       SinkAfter.count(Previous)) // Cannot rely on dominance due to motion.
859     return false;
860 
861   // Ensure every user of the phi node (recursively) is dominated by the
862   // previous value. The dominance requirement ensures the loop vectorizer will
863   // not need to vectorize the initial value prior to the first iteration of the
864   // loop.
865   // TODO: Consider extending this sinking to handle memory instructions.
866 
867   // We optimistically assume we can sink all users after Previous. Keep a set
868   // of instructions to sink after Previous ordered by dominance in the common
869   // basic block. It will be applied to SinkAfter if all users can be sunk.
870   auto CompareByComesBefore = [](const Instruction *A, const Instruction *B) {
871     return A->comesBefore(B);
872   };
873   std::set<Instruction *, decltype(CompareByComesBefore)> InstrsToSink(
874       CompareByComesBefore);
875 
876   BasicBlock *PhiBB = Phi->getParent();
877   SmallVector<Instruction *, 8> WorkList;
878   auto TryToPushSinkCandidate = [&](Instruction *SinkCandidate) {
879     // Already sunk SinkCandidate.
880     if (SinkCandidate->getParent() == PhiBB &&
881         InstrsToSink.find(SinkCandidate) != InstrsToSink.end())
882       return true;
883 
884     // Cyclic dependence.
885     if (Previous == SinkCandidate)
886       return false;
887 
888     if (DT->dominates(Previous,
889                       SinkCandidate)) // We already are good w/o sinking.
890       return true;
891 
892     if (SinkCandidate->getParent() != PhiBB ||
893         SinkCandidate->mayHaveSideEffects() ||
894         SinkCandidate->mayReadFromMemory() || SinkCandidate->isTerminator())
895       return false;
896 
897     // Do not try to sink an instruction multiple times (if multiple operands
898     // are first order recurrences).
899     // TODO: We can support this case, by sinking the instruction after the
900     // 'deepest' previous instruction.
901     if (SinkAfter.find(SinkCandidate) != SinkAfter.end())
902       return false;
903 
904     // If we reach a PHI node that is not dominated by Previous, we reached a
905     // header PHI. No need for sinking.
906     if (isa<PHINode>(SinkCandidate))
907       return true;
908 
909     // Sink User tentatively and check its users
910     InstrsToSink.insert(SinkCandidate);
911     WorkList.push_back(SinkCandidate);
912     return true;
913   };
914 
915   WorkList.push_back(Phi);
916   // Try to recursively sink instructions and their users after Previous.
917   while (!WorkList.empty()) {
918     Instruction *Current = WorkList.pop_back_val();
919     for (User *User : Current->users()) {
920       if (!TryToPushSinkCandidate(cast<Instruction>(User)))
921         return false;
922     }
923   }
924 
925   // We can sink all users of Phi. Update the mapping.
926   for (Instruction *I : InstrsToSink) {
927     SinkAfter[I] = Previous;
928     Previous = I;
929   }
930   return true;
931 }
932 
933 /// This function returns the identity element (or neutral element) for
934 /// the operation K.
935 Value *RecurrenceDescriptor::getRecurrenceIdentity(RecurKind K, Type *Tp,
936                                                    FastMathFlags FMF) const {
937   switch (K) {
938   case RecurKind::Xor:
939   case RecurKind::Add:
940   case RecurKind::Or:
941     // Adding, Xoring, Oring zero to a number does not change it.
942     return ConstantInt::get(Tp, 0);
943   case RecurKind::Mul:
944     // Multiplying a number by 1 does not change it.
945     return ConstantInt::get(Tp, 1);
946   case RecurKind::And:
947     // AND-ing a number with an all-1 value does not change it.
948     return ConstantInt::get(Tp, -1, true);
949   case RecurKind::FMul:
950     // Multiplying a number by 1 does not change it.
951     return ConstantFP::get(Tp, 1.0L);
952   case RecurKind::FMulAdd:
953   case RecurKind::FAdd:
954     // Adding zero to a number does not change it.
955     // FIXME: Ideally we should not need to check FMF for FAdd and should always
956     // use -0.0. However, this will currently result in mixed vectors of 0.0/-0.0.
957     // Instead, we should ensure that 1) the FMF from FAdd are propagated to the PHI
958     // nodes where possible, and 2) PHIs with the nsz flag + -0.0 use 0.0. This would
959     // mean we can then remove the check for noSignedZeros() below (see D98963).
960     if (FMF.noSignedZeros())
961       return ConstantFP::get(Tp, 0.0L);
962     return ConstantFP::get(Tp, -0.0L);
963   case RecurKind::UMin:
964     return ConstantInt::get(Tp, -1);
965   case RecurKind::UMax:
966     return ConstantInt::get(Tp, 0);
967   case RecurKind::SMin:
968     return ConstantInt::get(Tp,
969                             APInt::getSignedMaxValue(Tp->getIntegerBitWidth()));
970   case RecurKind::SMax:
971     return ConstantInt::get(Tp,
972                             APInt::getSignedMinValue(Tp->getIntegerBitWidth()));
973   case RecurKind::FMin:
974     return ConstantFP::getInfinity(Tp, true);
975   case RecurKind::FMax:
976     return ConstantFP::getInfinity(Tp, false);
977   case RecurKind::SelectICmp:
978   case RecurKind::SelectFCmp:
979     return getRecurrenceStartValue();
980     break;
981   default:
982     llvm_unreachable("Unknown recurrence kind");
983   }
984 }
985 
986 unsigned RecurrenceDescriptor::getOpcode(RecurKind Kind) {
987   switch (Kind) {
988   case RecurKind::Add:
989     return Instruction::Add;
990   case RecurKind::Mul:
991     return Instruction::Mul;
992   case RecurKind::Or:
993     return Instruction::Or;
994   case RecurKind::And:
995     return Instruction::And;
996   case RecurKind::Xor:
997     return Instruction::Xor;
998   case RecurKind::FMul:
999     return Instruction::FMul;
1000   case RecurKind::FMulAdd:
1001   case RecurKind::FAdd:
1002     return Instruction::FAdd;
1003   case RecurKind::SMax:
1004   case RecurKind::SMin:
1005   case RecurKind::UMax:
1006   case RecurKind::UMin:
1007   case RecurKind::SelectICmp:
1008     return Instruction::ICmp;
1009   case RecurKind::FMax:
1010   case RecurKind::FMin:
1011   case RecurKind::SelectFCmp:
1012     return Instruction::FCmp;
1013   default:
1014     llvm_unreachable("Unknown recurrence operation");
1015   }
1016 }
1017 
1018 SmallVector<Instruction *, 4>
1019 RecurrenceDescriptor::getReductionOpChain(PHINode *Phi, Loop *L) const {
1020   SmallVector<Instruction *, 4> ReductionOperations;
1021   unsigned RedOp = getOpcode(Kind);
1022 
1023   // Search down from the Phi to the LoopExitInstr, looking for instructions
1024   // with a single user of the correct type for the reduction.
1025 
1026   // Note that we check that the type of the operand is correct for each item in
1027   // the chain, including the last (the loop exit value). This can come up from
1028   // sub, which would otherwise be treated as an add reduction. MinMax also need
1029   // to check for a pair of icmp/select, for which we use getNextInstruction and
1030   // isCorrectOpcode functions to step the right number of instruction, and
1031   // check the icmp/select pair.
1032   // FIXME: We also do not attempt to look through Phi/Select's yet, which might
1033   // be part of the reduction chain, or attempt to looks through And's to find a
1034   // smaller bitwidth. Subs are also currently not allowed (which are usually
1035   // treated as part of a add reduction) as they are expected to generally be
1036   // more expensive than out-of-loop reductions, and need to be costed more
1037   // carefully.
1038   unsigned ExpectedUses = 1;
1039   if (RedOp == Instruction::ICmp || RedOp == Instruction::FCmp)
1040     ExpectedUses = 2;
1041 
1042   auto getNextInstruction = [&](Instruction *Cur) {
1043     if (RedOp == Instruction::ICmp || RedOp == Instruction::FCmp) {
1044       // We are expecting a icmp/select pair, which we go to the next select
1045       // instruction if we can. We already know that Cur has 2 uses.
1046       if (isa<SelectInst>(*Cur->user_begin()))
1047         return cast<Instruction>(*Cur->user_begin());
1048       else
1049         return cast<Instruction>(*std::next(Cur->user_begin()));
1050     }
1051     return cast<Instruction>(*Cur->user_begin());
1052   };
1053   auto isCorrectOpcode = [&](Instruction *Cur) {
1054     if (RedOp == Instruction::ICmp || RedOp == Instruction::FCmp) {
1055       Value *LHS, *RHS;
1056       return SelectPatternResult::isMinOrMax(
1057           matchSelectPattern(Cur, LHS, RHS).Flavor);
1058     }
1059     // Recognize a call to the llvm.fmuladd intrinsic.
1060     if (isFMulAddIntrinsic(Cur))
1061       return true;
1062 
1063     return Cur->getOpcode() == RedOp;
1064   };
1065 
1066   // The loop exit instruction we check first (as a quick test) but add last. We
1067   // check the opcode is correct (and dont allow them to be Subs) and that they
1068   // have expected to have the expected number of uses. They will have one use
1069   // from the phi and one from a LCSSA value, no matter the type.
1070   if (!isCorrectOpcode(LoopExitInstr) || !LoopExitInstr->hasNUses(2))
1071     return {};
1072 
1073   // Check that the Phi has one (or two for min/max) uses.
1074   if (!Phi->hasNUses(ExpectedUses))
1075     return {};
1076   Instruction *Cur = getNextInstruction(Phi);
1077 
1078   // Each other instruction in the chain should have the expected number of uses
1079   // and be the correct opcode.
1080   while (Cur != LoopExitInstr) {
1081     if (!isCorrectOpcode(Cur) || !Cur->hasNUses(ExpectedUses))
1082       return {};
1083 
1084     ReductionOperations.push_back(Cur);
1085     Cur = getNextInstruction(Cur);
1086   }
1087 
1088   ReductionOperations.push_back(Cur);
1089   return ReductionOperations;
1090 }
1091 
1092 InductionDescriptor::InductionDescriptor(Value *Start, InductionKind K,
1093                                          const SCEV *Step, BinaryOperator *BOp,
1094                                          Type *ElementType,
1095                                          SmallVectorImpl<Instruction *> *Casts)
1096     : StartValue(Start), IK(K), Step(Step), InductionBinOp(BOp),
1097       ElementType(ElementType) {
1098   assert(IK != IK_NoInduction && "Not an induction");
1099 
1100   // Start value type should match the induction kind and the value
1101   // itself should not be null.
1102   assert(StartValue && "StartValue is null");
1103   assert((IK != IK_PtrInduction || StartValue->getType()->isPointerTy()) &&
1104          "StartValue is not a pointer for pointer induction");
1105   assert((IK != IK_IntInduction || StartValue->getType()->isIntegerTy()) &&
1106          "StartValue is not an integer for integer induction");
1107 
1108   // Check the Step Value. It should be non-zero integer value.
1109   assert((!getConstIntStepValue() || !getConstIntStepValue()->isZero()) &&
1110          "Step value is zero");
1111 
1112   assert((IK != IK_PtrInduction || getConstIntStepValue()) &&
1113          "Step value should be constant for pointer induction");
1114   assert((IK == IK_FpInduction || Step->getType()->isIntegerTy()) &&
1115          "StepValue is not an integer");
1116 
1117   assert((IK != IK_FpInduction || Step->getType()->isFloatingPointTy()) &&
1118          "StepValue is not FP for FpInduction");
1119   assert((IK != IK_FpInduction ||
1120           (InductionBinOp &&
1121            (InductionBinOp->getOpcode() == Instruction::FAdd ||
1122             InductionBinOp->getOpcode() == Instruction::FSub))) &&
1123          "Binary opcode should be specified for FP induction");
1124 
1125   if (IK == IK_PtrInduction)
1126     assert(ElementType && "Pointer induction must have element type");
1127   else
1128     assert(!ElementType && "Non-pointer induction cannot have element type");
1129 
1130   if (Casts) {
1131     for (auto &Inst : *Casts) {
1132       RedundantCasts.push_back(Inst);
1133     }
1134   }
1135 }
1136 
1137 ConstantInt *InductionDescriptor::getConstIntStepValue() const {
1138   if (isa<SCEVConstant>(Step))
1139     return dyn_cast<ConstantInt>(cast<SCEVConstant>(Step)->getValue());
1140   return nullptr;
1141 }
1142 
1143 bool InductionDescriptor::isFPInductionPHI(PHINode *Phi, const Loop *TheLoop,
1144                                            ScalarEvolution *SE,
1145                                            InductionDescriptor &D) {
1146 
1147   // Here we only handle FP induction variables.
1148   assert(Phi->getType()->isFloatingPointTy() && "Unexpected Phi type");
1149 
1150   if (TheLoop->getHeader() != Phi->getParent())
1151     return false;
1152 
1153   // The loop may have multiple entrances or multiple exits; we can analyze
1154   // this phi if it has a unique entry value and a unique backedge value.
1155   if (Phi->getNumIncomingValues() != 2)
1156     return false;
1157   Value *BEValue = nullptr, *StartValue = nullptr;
1158   if (TheLoop->contains(Phi->getIncomingBlock(0))) {
1159     BEValue = Phi->getIncomingValue(0);
1160     StartValue = Phi->getIncomingValue(1);
1161   } else {
1162     assert(TheLoop->contains(Phi->getIncomingBlock(1)) &&
1163            "Unexpected Phi node in the loop");
1164     BEValue = Phi->getIncomingValue(1);
1165     StartValue = Phi->getIncomingValue(0);
1166   }
1167 
1168   BinaryOperator *BOp = dyn_cast<BinaryOperator>(BEValue);
1169   if (!BOp)
1170     return false;
1171 
1172   Value *Addend = nullptr;
1173   if (BOp->getOpcode() == Instruction::FAdd) {
1174     if (BOp->getOperand(0) == Phi)
1175       Addend = BOp->getOperand(1);
1176     else if (BOp->getOperand(1) == Phi)
1177       Addend = BOp->getOperand(0);
1178   } else if (BOp->getOpcode() == Instruction::FSub)
1179     if (BOp->getOperand(0) == Phi)
1180       Addend = BOp->getOperand(1);
1181 
1182   if (!Addend)
1183     return false;
1184 
1185   // The addend should be loop invariant
1186   if (auto *I = dyn_cast<Instruction>(Addend))
1187     if (TheLoop->contains(I))
1188       return false;
1189 
1190   // FP Step has unknown SCEV
1191   const SCEV *Step = SE->getUnknown(Addend);
1192   D = InductionDescriptor(StartValue, IK_FpInduction, Step, BOp);
1193   return true;
1194 }
1195 
1196 /// This function is called when we suspect that the update-chain of a phi node
1197 /// (whose symbolic SCEV expression sin \p PhiScev) contains redundant casts,
1198 /// that can be ignored. (This can happen when the PSCEV rewriter adds a runtime
1199 /// predicate P under which the SCEV expression for the phi can be the
1200 /// AddRecurrence \p AR; See createAddRecFromPHIWithCast). We want to find the
1201 /// cast instructions that are involved in the update-chain of this induction.
1202 /// A caller that adds the required runtime predicate can be free to drop these
1203 /// cast instructions, and compute the phi using \p AR (instead of some scev
1204 /// expression with casts).
1205 ///
1206 /// For example, without a predicate the scev expression can take the following
1207 /// form:
1208 ///      (Ext ix (Trunc iy ( Start + i*Step ) to ix) to iy)
1209 ///
1210 /// It corresponds to the following IR sequence:
1211 /// %for.body:
1212 ///   %x = phi i64 [ 0, %ph ], [ %add, %for.body ]
1213 ///   %casted_phi = "ExtTrunc i64 %x"
1214 ///   %add = add i64 %casted_phi, %step
1215 ///
1216 /// where %x is given in \p PN,
1217 /// PSE.getSCEV(%x) is equal to PSE.getSCEV(%casted_phi) under a predicate,
1218 /// and the IR sequence that "ExtTrunc i64 %x" represents can take one of
1219 /// several forms, for example, such as:
1220 ///   ExtTrunc1:    %casted_phi = and  %x, 2^n-1
1221 /// or:
1222 ///   ExtTrunc2:    %t = shl %x, m
1223 ///                 %casted_phi = ashr %t, m
1224 ///
1225 /// If we are able to find such sequence, we return the instructions
1226 /// we found, namely %casted_phi and the instructions on its use-def chain up
1227 /// to the phi (not including the phi).
1228 static bool getCastsForInductionPHI(PredicatedScalarEvolution &PSE,
1229                                     const SCEVUnknown *PhiScev,
1230                                     const SCEVAddRecExpr *AR,
1231                                     SmallVectorImpl<Instruction *> &CastInsts) {
1232 
1233   assert(CastInsts.empty() && "CastInsts is expected to be empty.");
1234   auto *PN = cast<PHINode>(PhiScev->getValue());
1235   assert(PSE.getSCEV(PN) == AR && "Unexpected phi node SCEV expression");
1236   const Loop *L = AR->getLoop();
1237 
1238   // Find any cast instructions that participate in the def-use chain of
1239   // PhiScev in the loop.
1240   // FORNOW/TODO: We currently expect the def-use chain to include only
1241   // two-operand instructions, where one of the operands is an invariant.
1242   // createAddRecFromPHIWithCasts() currently does not support anything more
1243   // involved than that, so we keep the search simple. This can be
1244   // extended/generalized as needed.
1245 
1246   auto getDef = [&](const Value *Val) -> Value * {
1247     const BinaryOperator *BinOp = dyn_cast<BinaryOperator>(Val);
1248     if (!BinOp)
1249       return nullptr;
1250     Value *Op0 = BinOp->getOperand(0);
1251     Value *Op1 = BinOp->getOperand(1);
1252     Value *Def = nullptr;
1253     if (L->isLoopInvariant(Op0))
1254       Def = Op1;
1255     else if (L->isLoopInvariant(Op1))
1256       Def = Op0;
1257     return Def;
1258   };
1259 
1260   // Look for the instruction that defines the induction via the
1261   // loop backedge.
1262   BasicBlock *Latch = L->getLoopLatch();
1263   if (!Latch)
1264     return false;
1265   Value *Val = PN->getIncomingValueForBlock(Latch);
1266   if (!Val)
1267     return false;
1268 
1269   // Follow the def-use chain until the induction phi is reached.
1270   // If on the way we encounter a Value that has the same SCEV Expr as the
1271   // phi node, we can consider the instructions we visit from that point
1272   // as part of the cast-sequence that can be ignored.
1273   bool InCastSequence = false;
1274   auto *Inst = dyn_cast<Instruction>(Val);
1275   while (Val != PN) {
1276     // If we encountered a phi node other than PN, or if we left the loop,
1277     // we bail out.
1278     if (!Inst || !L->contains(Inst)) {
1279       return false;
1280     }
1281     auto *AddRec = dyn_cast<SCEVAddRecExpr>(PSE.getSCEV(Val));
1282     if (AddRec && PSE.areAddRecsEqualWithPreds(AddRec, AR))
1283       InCastSequence = true;
1284     if (InCastSequence) {
1285       // Only the last instruction in the cast sequence is expected to have
1286       // uses outside the induction def-use chain.
1287       if (!CastInsts.empty())
1288         if (!Inst->hasOneUse())
1289           return false;
1290       CastInsts.push_back(Inst);
1291     }
1292     Val = getDef(Val);
1293     if (!Val)
1294       return false;
1295     Inst = dyn_cast<Instruction>(Val);
1296   }
1297 
1298   return InCastSequence;
1299 }
1300 
1301 bool InductionDescriptor::isInductionPHI(PHINode *Phi, const Loop *TheLoop,
1302                                          PredicatedScalarEvolution &PSE,
1303                                          InductionDescriptor &D, bool Assume) {
1304   Type *PhiTy = Phi->getType();
1305 
1306   // Handle integer and pointer inductions variables.
1307   // Now we handle also FP induction but not trying to make a
1308   // recurrent expression from the PHI node in-place.
1309 
1310   if (!PhiTy->isIntegerTy() && !PhiTy->isPointerTy() && !PhiTy->isFloatTy() &&
1311       !PhiTy->isDoubleTy() && !PhiTy->isHalfTy())
1312     return false;
1313 
1314   if (PhiTy->isFloatingPointTy())
1315     return isFPInductionPHI(Phi, TheLoop, PSE.getSE(), D);
1316 
1317   const SCEV *PhiScev = PSE.getSCEV(Phi);
1318   const auto *AR = dyn_cast<SCEVAddRecExpr>(PhiScev);
1319 
1320   // We need this expression to be an AddRecExpr.
1321   if (Assume && !AR)
1322     AR = PSE.getAsAddRec(Phi);
1323 
1324   if (!AR) {
1325     LLVM_DEBUG(dbgs() << "LV: PHI is not a poly recurrence.\n");
1326     return false;
1327   }
1328 
1329   // Record any Cast instructions that participate in the induction update
1330   const auto *SymbolicPhi = dyn_cast<SCEVUnknown>(PhiScev);
1331   // If we started from an UnknownSCEV, and managed to build an addRecurrence
1332   // only after enabling Assume with PSCEV, this means we may have encountered
1333   // cast instructions that required adding a runtime check in order to
1334   // guarantee the correctness of the AddRecurrence respresentation of the
1335   // induction.
1336   if (PhiScev != AR && SymbolicPhi) {
1337     SmallVector<Instruction *, 2> Casts;
1338     if (getCastsForInductionPHI(PSE, SymbolicPhi, AR, Casts))
1339       return isInductionPHI(Phi, TheLoop, PSE.getSE(), D, AR, &Casts);
1340   }
1341 
1342   return isInductionPHI(Phi, TheLoop, PSE.getSE(), D, AR);
1343 }
1344 
1345 bool InductionDescriptor::isInductionPHI(
1346     PHINode *Phi, const Loop *TheLoop, ScalarEvolution *SE,
1347     InductionDescriptor &D, const SCEV *Expr,
1348     SmallVectorImpl<Instruction *> *CastsToIgnore) {
1349   Type *PhiTy = Phi->getType();
1350   // We only handle integer and pointer inductions variables.
1351   if (!PhiTy->isIntegerTy() && !PhiTy->isPointerTy())
1352     return false;
1353 
1354   // Check that the PHI is consecutive.
1355   const SCEV *PhiScev = Expr ? Expr : SE->getSCEV(Phi);
1356   const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(PhiScev);
1357 
1358   if (!AR) {
1359     LLVM_DEBUG(dbgs() << "LV: PHI is not a poly recurrence.\n");
1360     return false;
1361   }
1362 
1363   if (AR->getLoop() != TheLoop) {
1364     // FIXME: We should treat this as a uniform. Unfortunately, we
1365     // don't currently know how to handled uniform PHIs.
1366     LLVM_DEBUG(
1367         dbgs() << "LV: PHI is a recurrence with respect to an outer loop.\n");
1368     return false;
1369   }
1370 
1371   Value *StartValue =
1372       Phi->getIncomingValueForBlock(AR->getLoop()->getLoopPreheader());
1373 
1374   BasicBlock *Latch = AR->getLoop()->getLoopLatch();
1375   if (!Latch)
1376     return false;
1377 
1378   const SCEV *Step = AR->getStepRecurrence(*SE);
1379   // Calculate the pointer stride and check if it is consecutive.
1380   // The stride may be a constant or a loop invariant integer value.
1381   const SCEVConstant *ConstStep = dyn_cast<SCEVConstant>(Step);
1382   if (!ConstStep && !SE->isLoopInvariant(Step, TheLoop))
1383     return false;
1384 
1385   if (PhiTy->isIntegerTy()) {
1386     BinaryOperator *BOp =
1387         dyn_cast<BinaryOperator>(Phi->getIncomingValueForBlock(Latch));
1388     D = InductionDescriptor(StartValue, IK_IntInduction, Step, BOp,
1389                             /* ElementType */ nullptr, CastsToIgnore);
1390     return true;
1391   }
1392 
1393   assert(PhiTy->isPointerTy() && "The PHI must be a pointer");
1394   // Pointer induction should be a constant.
1395   if (!ConstStep)
1396     return false;
1397 
1398   // Always use i8 element type for opaque pointer inductions.
1399   PointerType *PtrTy = cast<PointerType>(PhiTy);
1400   Type *ElementType = PtrTy->isOpaque() ? Type::getInt8Ty(PtrTy->getContext())
1401                                         : PtrTy->getElementType();
1402   if (!ElementType->isSized())
1403     return false;
1404 
1405   ConstantInt *CV = ConstStep->getValue();
1406   const DataLayout &DL = Phi->getModule()->getDataLayout();
1407   int64_t Size = static_cast<int64_t>(DL.getTypeAllocSize(ElementType));
1408   if (!Size)
1409     return false;
1410 
1411   int64_t CVSize = CV->getSExtValue();
1412   if (CVSize % Size)
1413     return false;
1414   auto *StepValue =
1415       SE->getConstant(CV->getType(), CVSize / Size, true /* signed */);
1416   D = InductionDescriptor(StartValue, IK_PtrInduction, StepValue,
1417                           /* BinOp */ nullptr, ElementType);
1418   return true;
1419 }
1420