xref: /llvm-project/llvm/lib/Analysis/LoopAccessAnalysis.cpp (revision 3a4376b8f90686f754ee51b296a064ab03c12895)
1 //===- LoopAccessAnalysis.cpp - Loop Access Analysis Implementation --------==//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // The implementation for the loop memory dependence that was originally
10 // developed for the loop vectorizer.
11 //
12 //===----------------------------------------------------------------------===//
13 
14 #include "llvm/Analysis/LoopAccessAnalysis.h"
15 #include "llvm/ADT/APInt.h"
16 #include "llvm/ADT/DenseMap.h"
17 #include "llvm/ADT/EquivalenceClasses.h"
18 #include "llvm/ADT/PointerIntPair.h"
19 #include "llvm/ADT/STLExtras.h"
20 #include "llvm/ADT/SetVector.h"
21 #include "llvm/ADT/SmallPtrSet.h"
22 #include "llvm/ADT/SmallSet.h"
23 #include "llvm/ADT/SmallVector.h"
24 #include "llvm/Analysis/AliasAnalysis.h"
25 #include "llvm/Analysis/AliasSetTracker.h"
26 #include "llvm/Analysis/LoopAnalysisManager.h"
27 #include "llvm/Analysis/LoopInfo.h"
28 #include "llvm/Analysis/LoopIterator.h"
29 #include "llvm/Analysis/MemoryLocation.h"
30 #include "llvm/Analysis/OptimizationRemarkEmitter.h"
31 #include "llvm/Analysis/ScalarEvolution.h"
32 #include "llvm/Analysis/ScalarEvolutionExpressions.h"
33 #include "llvm/Analysis/TargetLibraryInfo.h"
34 #include "llvm/Analysis/TargetTransformInfo.h"
35 #include "llvm/Analysis/ValueTracking.h"
36 #include "llvm/Analysis/VectorUtils.h"
37 #include "llvm/IR/BasicBlock.h"
38 #include "llvm/IR/Constants.h"
39 #include "llvm/IR/DataLayout.h"
40 #include "llvm/IR/DebugLoc.h"
41 #include "llvm/IR/DerivedTypes.h"
42 #include "llvm/IR/DiagnosticInfo.h"
43 #include "llvm/IR/Dominators.h"
44 #include "llvm/IR/Function.h"
45 #include "llvm/IR/GetElementPtrTypeIterator.h"
46 #include "llvm/IR/InstrTypes.h"
47 #include "llvm/IR/Instruction.h"
48 #include "llvm/IR/Instructions.h"
49 #include "llvm/IR/Operator.h"
50 #include "llvm/IR/PassManager.h"
51 #include "llvm/IR/PatternMatch.h"
52 #include "llvm/IR/Type.h"
53 #include "llvm/IR/Value.h"
54 #include "llvm/IR/ValueHandle.h"
55 #include "llvm/Support/Casting.h"
56 #include "llvm/Support/CommandLine.h"
57 #include "llvm/Support/Debug.h"
58 #include "llvm/Support/ErrorHandling.h"
59 #include "llvm/Support/raw_ostream.h"
60 #include <algorithm>
61 #include <cassert>
62 #include <cstdint>
63 #include <iterator>
64 #include <utility>
65 #include <variant>
66 #include <vector>
67 
68 using namespace llvm;
69 using namespace llvm::PatternMatch;
70 
71 #define DEBUG_TYPE "loop-accesses"
72 
73 static cl::opt<unsigned, true>
74 VectorizationFactor("force-vector-width", cl::Hidden,
75                     cl::desc("Sets the SIMD width. Zero is autoselect."),
76                     cl::location(VectorizerParams::VectorizationFactor));
77 unsigned VectorizerParams::VectorizationFactor;
78 
79 static cl::opt<unsigned, true>
80 VectorizationInterleave("force-vector-interleave", cl::Hidden,
81                         cl::desc("Sets the vectorization interleave count. "
82                                  "Zero is autoselect."),
83                         cl::location(
84                             VectorizerParams::VectorizationInterleave));
85 unsigned VectorizerParams::VectorizationInterleave;
86 
87 static cl::opt<unsigned, true> RuntimeMemoryCheckThreshold(
88     "runtime-memory-check-threshold", cl::Hidden,
89     cl::desc("When performing memory disambiguation checks at runtime do not "
90              "generate more than this number of comparisons (default = 8)."),
91     cl::location(VectorizerParams::RuntimeMemoryCheckThreshold), cl::init(8));
92 unsigned VectorizerParams::RuntimeMemoryCheckThreshold;
93 
94 /// The maximum iterations used to merge memory checks
95 static cl::opt<unsigned> MemoryCheckMergeThreshold(
96     "memory-check-merge-threshold", cl::Hidden,
97     cl::desc("Maximum number of comparisons done when trying to merge "
98              "runtime memory checks. (default = 100)"),
99     cl::init(100));
100 
101 /// Maximum SIMD width.
102 const unsigned VectorizerParams::MaxVectorWidth = 64;
103 
104 /// We collect dependences up to this threshold.
105 static cl::opt<unsigned>
106     MaxDependences("max-dependences", cl::Hidden,
107                    cl::desc("Maximum number of dependences collected by "
108                             "loop-access analysis (default = 100)"),
109                    cl::init(100));
110 
111 /// This enables versioning on the strides of symbolically striding memory
112 /// accesses in code like the following.
113 ///   for (i = 0; i < N; ++i)
114 ///     A[i * Stride1] += B[i * Stride2] ...
115 ///
116 /// Will be roughly translated to
117 ///    if (Stride1 == 1 && Stride2 == 1) {
118 ///      for (i = 0; i < N; i+=4)
119 ///       A[i:i+3] += ...
120 ///    } else
121 ///      ...
122 static cl::opt<bool> EnableMemAccessVersioning(
123     "enable-mem-access-versioning", cl::init(true), cl::Hidden,
124     cl::desc("Enable symbolic stride memory access versioning"));
125 
126 /// Enable store-to-load forwarding conflict detection. This option can
127 /// be disabled for correctness testing.
128 static cl::opt<bool> EnableForwardingConflictDetection(
129     "store-to-load-forwarding-conflict-detection", cl::Hidden,
130     cl::desc("Enable conflict detection in loop-access analysis"),
131     cl::init(true));
132 
133 static cl::opt<unsigned> MaxForkedSCEVDepth(
134     "max-forked-scev-depth", cl::Hidden,
135     cl::desc("Maximum recursion depth when finding forked SCEVs (default = 5)"),
136     cl::init(5));
137 
138 static cl::opt<bool> SpeculateUnitStride(
139     "laa-speculate-unit-stride", cl::Hidden,
140     cl::desc("Speculate that non-constant strides are unit in LAA"),
141     cl::init(true));
142 
143 static cl::opt<bool, true> HoistRuntimeChecks(
144     "hoist-runtime-checks", cl::Hidden,
145     cl::desc(
146         "Hoist inner loop runtime memory checks to outer loop if possible"),
147     cl::location(VectorizerParams::HoistRuntimeChecks), cl::init(true));
148 bool VectorizerParams::HoistRuntimeChecks;
149 
150 bool VectorizerParams::isInterleaveForced() {
151   return ::VectorizationInterleave.getNumOccurrences() > 0;
152 }
153 
154 const SCEV *llvm::replaceSymbolicStrideSCEV(PredicatedScalarEvolution &PSE,
155                                             const DenseMap<Value *, const SCEV *> &PtrToStride,
156                                             Value *Ptr) {
157   const SCEV *OrigSCEV = PSE.getSCEV(Ptr);
158 
159   // If there is an entry in the map return the SCEV of the pointer with the
160   // symbolic stride replaced by one.
161   DenseMap<Value *, const SCEV *>::const_iterator SI = PtrToStride.find(Ptr);
162   if (SI == PtrToStride.end())
163     // For a non-symbolic stride, just return the original expression.
164     return OrigSCEV;
165 
166   const SCEV *StrideSCEV = SI->second;
167   // Note: This assert is both overly strong and overly weak.  The actual
168   // invariant here is that StrideSCEV should be loop invariant.  The only
169   // such invariant strides we happen to speculate right now are unknowns
170   // and thus this is a reasonable proxy of the actual invariant.
171   assert(isa<SCEVUnknown>(StrideSCEV) && "shouldn't be in map");
172 
173   ScalarEvolution *SE = PSE.getSE();
174   const SCEV *CT = SE->getOne(StrideSCEV->getType());
175   PSE.addPredicate(*SE->getEqualPredicate(StrideSCEV, CT));
176   const SCEV *Expr = PSE.getSCEV(Ptr);
177 
178   LLVM_DEBUG(dbgs() << "LAA: Replacing SCEV: " << *OrigSCEV
179 	     << " by: " << *Expr << "\n");
180   return Expr;
181 }
182 
183 RuntimeCheckingPtrGroup::RuntimeCheckingPtrGroup(
184     unsigned Index, const RuntimePointerChecking &RtCheck)
185     : High(RtCheck.Pointers[Index].End), Low(RtCheck.Pointers[Index].Start),
186       AddressSpace(RtCheck.Pointers[Index]
187                        .PointerValue->getType()
188                        ->getPointerAddressSpace()),
189       NeedsFreeze(RtCheck.Pointers[Index].NeedsFreeze) {
190   Members.push_back(Index);
191 }
192 
193 std::pair<const SCEV *, const SCEV *> llvm::getStartAndEndForAccess(
194     const Loop *Lp, const SCEV *PtrExpr, Type *AccessTy, const SCEV *MaxBECount,
195     ScalarEvolution *SE,
196     DenseMap<std::pair<const SCEV *, Type *>,
197              std::pair<const SCEV *, const SCEV *>> *PointerBounds) {
198   std::pair<const SCEV *, const SCEV *> *PtrBoundsPair;
199   if (PointerBounds) {
200     auto [Iter, Ins] = PointerBounds->insert(
201         {{PtrExpr, AccessTy},
202          {SE->getCouldNotCompute(), SE->getCouldNotCompute()}});
203     if (!Ins)
204       return Iter->second;
205     PtrBoundsPair = &Iter->second;
206   }
207 
208   const SCEV *ScStart;
209   const SCEV *ScEnd;
210 
211   if (SE->isLoopInvariant(PtrExpr, Lp)) {
212     ScStart = ScEnd = PtrExpr;
213   } else if (auto *AR = dyn_cast<SCEVAddRecExpr>(PtrExpr)) {
214     ScStart = AR->getStart();
215     ScEnd = AR->evaluateAtIteration(MaxBECount, *SE);
216     const SCEV *Step = AR->getStepRecurrence(*SE);
217 
218     // For expressions with negative step, the upper bound is ScStart and the
219     // lower bound is ScEnd.
220     if (const auto *CStep = dyn_cast<SCEVConstant>(Step)) {
221       if (CStep->getValue()->isNegative())
222         std::swap(ScStart, ScEnd);
223     } else {
224       // Fallback case: the step is not constant, but we can still
225       // get the upper and lower bounds of the interval by using min/max
226       // expressions.
227       ScStart = SE->getUMinExpr(ScStart, ScEnd);
228       ScEnd = SE->getUMaxExpr(AR->getStart(), ScEnd);
229     }
230   } else
231     return {SE->getCouldNotCompute(), SE->getCouldNotCompute()};
232 
233   assert(SE->isLoopInvariant(ScStart, Lp) && "ScStart needs to be invariant");
234   assert(SE->isLoopInvariant(ScEnd, Lp) && "ScEnd needs to be invariant");
235 
236   // Add the size of the pointed element to ScEnd.
237   auto &DL = Lp->getHeader()->getDataLayout();
238   Type *IdxTy = DL.getIndexType(PtrExpr->getType());
239   const SCEV *EltSizeSCEV = SE->getStoreSizeOfExpr(IdxTy, AccessTy);
240   ScEnd = SE->getAddExpr(ScEnd, EltSizeSCEV);
241 
242   std::pair<const SCEV *, const SCEV *> Res = {ScStart, ScEnd};
243   if (PointerBounds)
244     *PtrBoundsPair = Res;
245   return Res;
246 }
247 
248 /// Calculate Start and End points of memory access using
249 /// getStartAndEndForAccess.
250 void RuntimePointerChecking::insert(Loop *Lp, Value *Ptr, const SCEV *PtrExpr,
251                                     Type *AccessTy, bool WritePtr,
252                                     unsigned DepSetId, unsigned ASId,
253                                     PredicatedScalarEvolution &PSE,
254                                     bool NeedsFreeze) {
255   const SCEV *MaxBECount = PSE.getSymbolicMaxBackedgeTakenCount();
256   const auto &[ScStart, ScEnd] = getStartAndEndForAccess(
257       Lp, PtrExpr, AccessTy, MaxBECount, PSE.getSE(), &DC.getPointerBounds());
258   assert(!isa<SCEVCouldNotCompute>(ScStart) &&
259          !isa<SCEVCouldNotCompute>(ScEnd) &&
260          "must be able to compute both start and end expressions");
261   Pointers.emplace_back(Ptr, ScStart, ScEnd, WritePtr, DepSetId, ASId, PtrExpr,
262                         NeedsFreeze);
263 }
264 
265 bool RuntimePointerChecking::tryToCreateDiffCheck(
266     const RuntimeCheckingPtrGroup &CGI, const RuntimeCheckingPtrGroup &CGJ) {
267   // If either group contains multiple different pointers, bail out.
268   // TODO: Support multiple pointers by using the minimum or maximum pointer,
269   // depending on src & sink.
270   if (CGI.Members.size() != 1 || CGJ.Members.size() != 1)
271     return false;
272 
273   const PointerInfo *Src = &Pointers[CGI.Members[0]];
274   const PointerInfo *Sink = &Pointers[CGJ.Members[0]];
275 
276   // If either pointer is read and written, multiple checks may be needed. Bail
277   // out.
278   if (!DC.getOrderForAccess(Src->PointerValue, !Src->IsWritePtr).empty() ||
279       !DC.getOrderForAccess(Sink->PointerValue, !Sink->IsWritePtr).empty())
280     return false;
281 
282   ArrayRef<unsigned> AccSrc =
283       DC.getOrderForAccess(Src->PointerValue, Src->IsWritePtr);
284   ArrayRef<unsigned> AccSink =
285       DC.getOrderForAccess(Sink->PointerValue, Sink->IsWritePtr);
286   // If either pointer is accessed multiple times, there may not be a clear
287   // src/sink relation. Bail out for now.
288   if (AccSrc.size() != 1 || AccSink.size() != 1)
289     return false;
290 
291   // If the sink is accessed before src, swap src/sink.
292   if (AccSink[0] < AccSrc[0])
293     std::swap(Src, Sink);
294 
295   auto *SrcAR = dyn_cast<SCEVAddRecExpr>(Src->Expr);
296   auto *SinkAR = dyn_cast<SCEVAddRecExpr>(Sink->Expr);
297   if (!SrcAR || !SinkAR || SrcAR->getLoop() != DC.getInnermostLoop() ||
298       SinkAR->getLoop() != DC.getInnermostLoop())
299     return false;
300 
301   SmallVector<Instruction *, 4> SrcInsts =
302       DC.getInstructionsForAccess(Src->PointerValue, Src->IsWritePtr);
303   SmallVector<Instruction *, 4> SinkInsts =
304       DC.getInstructionsForAccess(Sink->PointerValue, Sink->IsWritePtr);
305   Type *SrcTy = getLoadStoreType(SrcInsts[0]);
306   Type *DstTy = getLoadStoreType(SinkInsts[0]);
307   if (isa<ScalableVectorType>(SrcTy) || isa<ScalableVectorType>(DstTy))
308     return false;
309 
310   const DataLayout &DL =
311       SinkAR->getLoop()->getHeader()->getDataLayout();
312   unsigned AllocSize =
313       std::max(DL.getTypeAllocSize(SrcTy), DL.getTypeAllocSize(DstTy));
314 
315   // Only matching constant steps matching the AllocSize are supported at the
316   // moment. This simplifies the difference computation. Can be extended in the
317   // future.
318   auto *Step = dyn_cast<SCEVConstant>(SinkAR->getStepRecurrence(*SE));
319   if (!Step || Step != SrcAR->getStepRecurrence(*SE) ||
320       Step->getAPInt().abs() != AllocSize)
321     return false;
322 
323   IntegerType *IntTy =
324       IntegerType::get(Src->PointerValue->getContext(),
325                        DL.getPointerSizeInBits(CGI.AddressSpace));
326 
327   // When counting down, the dependence distance needs to be swapped.
328   if (Step->getValue()->isNegative())
329     std::swap(SinkAR, SrcAR);
330 
331   const SCEV *SinkStartInt = SE->getPtrToIntExpr(SinkAR->getStart(), IntTy);
332   const SCEV *SrcStartInt = SE->getPtrToIntExpr(SrcAR->getStart(), IntTy);
333   if (isa<SCEVCouldNotCompute>(SinkStartInt) ||
334       isa<SCEVCouldNotCompute>(SrcStartInt))
335     return false;
336 
337   const Loop *InnerLoop = SrcAR->getLoop();
338   // If the start values for both Src and Sink also vary according to an outer
339   // loop, then it's probably better to avoid creating diff checks because
340   // they may not be hoisted. We should instead let llvm::addRuntimeChecks
341   // do the expanded full range overlap checks, which can be hoisted.
342   if (HoistRuntimeChecks && InnerLoop->getParentLoop() &&
343       isa<SCEVAddRecExpr>(SinkStartInt) && isa<SCEVAddRecExpr>(SrcStartInt)) {
344     auto *SrcStartAR = cast<SCEVAddRecExpr>(SrcStartInt);
345     auto *SinkStartAR = cast<SCEVAddRecExpr>(SinkStartInt);
346     const Loop *StartARLoop = SrcStartAR->getLoop();
347     if (StartARLoop == SinkStartAR->getLoop() &&
348         StartARLoop == InnerLoop->getParentLoop() &&
349         // If the diff check would already be loop invariant (due to the
350         // recurrences being the same), then we prefer to keep the diff checks
351         // because they are cheaper.
352         SrcStartAR->getStepRecurrence(*SE) !=
353             SinkStartAR->getStepRecurrence(*SE)) {
354       LLVM_DEBUG(dbgs() << "LAA: Not creating diff runtime check, since these "
355                            "cannot be hoisted out of the outer loop\n");
356       return false;
357     }
358   }
359 
360   LLVM_DEBUG(dbgs() << "LAA: Creating diff runtime check for:\n"
361                     << "SrcStart: " << *SrcStartInt << '\n'
362                     << "SinkStartInt: " << *SinkStartInt << '\n');
363   DiffChecks.emplace_back(SrcStartInt, SinkStartInt, AllocSize,
364                           Src->NeedsFreeze || Sink->NeedsFreeze);
365   return true;
366 }
367 
368 SmallVector<RuntimePointerCheck, 4> RuntimePointerChecking::generateChecks() {
369   SmallVector<RuntimePointerCheck, 4> Checks;
370 
371   for (unsigned I = 0; I < CheckingGroups.size(); ++I) {
372     for (unsigned J = I + 1; J < CheckingGroups.size(); ++J) {
373       const RuntimeCheckingPtrGroup &CGI = CheckingGroups[I];
374       const RuntimeCheckingPtrGroup &CGJ = CheckingGroups[J];
375 
376       if (needsChecking(CGI, CGJ)) {
377         CanUseDiffCheck = CanUseDiffCheck && tryToCreateDiffCheck(CGI, CGJ);
378         Checks.emplace_back(&CGI, &CGJ);
379       }
380     }
381   }
382   return Checks;
383 }
384 
385 void RuntimePointerChecking::generateChecks(
386     MemoryDepChecker::DepCandidates &DepCands, bool UseDependencies) {
387   assert(Checks.empty() && "Checks is not empty");
388   groupChecks(DepCands, UseDependencies);
389   Checks = generateChecks();
390 }
391 
392 bool RuntimePointerChecking::needsChecking(
393     const RuntimeCheckingPtrGroup &M, const RuntimeCheckingPtrGroup &N) const {
394   for (const auto &I : M.Members)
395     for (const auto &J : N.Members)
396       if (needsChecking(I, J))
397         return true;
398   return false;
399 }
400 
401 /// Compare \p I and \p J and return the minimum.
402 /// Return nullptr in case we couldn't find an answer.
403 static const SCEV *getMinFromExprs(const SCEV *I, const SCEV *J,
404                                    ScalarEvolution *SE) {
405   std::optional<APInt> Diff = SE->computeConstantDifference(J, I);
406   if (!Diff)
407     return nullptr;
408   return Diff->isNegative() ? J : I;
409 }
410 
411 bool RuntimeCheckingPtrGroup::addPointer(
412     unsigned Index, const RuntimePointerChecking &RtCheck) {
413   return addPointer(
414       Index, RtCheck.Pointers[Index].Start, RtCheck.Pointers[Index].End,
415       RtCheck.Pointers[Index].PointerValue->getType()->getPointerAddressSpace(),
416       RtCheck.Pointers[Index].NeedsFreeze, *RtCheck.SE);
417 }
418 
419 bool RuntimeCheckingPtrGroup::addPointer(unsigned Index, const SCEV *Start,
420                                          const SCEV *End, unsigned AS,
421                                          bool NeedsFreeze,
422                                          ScalarEvolution &SE) {
423   assert(AddressSpace == AS &&
424          "all pointers in a checking group must be in the same address space");
425 
426   // Compare the starts and ends with the known minimum and maximum
427   // of this set. We need to know how we compare against the min/max
428   // of the set in order to be able to emit memchecks.
429   const SCEV *Min0 = getMinFromExprs(Start, Low, &SE);
430   if (!Min0)
431     return false;
432 
433   const SCEV *Min1 = getMinFromExprs(End, High, &SE);
434   if (!Min1)
435     return false;
436 
437   // Update the low bound  expression if we've found a new min value.
438   if (Min0 == Start)
439     Low = Start;
440 
441   // Update the high bound expression if we've found a new max value.
442   if (Min1 != End)
443     High = End;
444 
445   Members.push_back(Index);
446   this->NeedsFreeze |= NeedsFreeze;
447   return true;
448 }
449 
450 void RuntimePointerChecking::groupChecks(
451     MemoryDepChecker::DepCandidates &DepCands, bool UseDependencies) {
452   // We build the groups from dependency candidates equivalence classes
453   // because:
454   //    - We know that pointers in the same equivalence class share
455   //      the same underlying object and therefore there is a chance
456   //      that we can compare pointers
457   //    - We wouldn't be able to merge two pointers for which we need
458   //      to emit a memcheck. The classes in DepCands are already
459   //      conveniently built such that no two pointers in the same
460   //      class need checking against each other.
461 
462   // We use the following (greedy) algorithm to construct the groups
463   // For every pointer in the equivalence class:
464   //   For each existing group:
465   //   - if the difference between this pointer and the min/max bounds
466   //     of the group is a constant, then make the pointer part of the
467   //     group and update the min/max bounds of that group as required.
468 
469   CheckingGroups.clear();
470 
471   // If we need to check two pointers to the same underlying object
472   // with a non-constant difference, we shouldn't perform any pointer
473   // grouping with those pointers. This is because we can easily get
474   // into cases where the resulting check would return false, even when
475   // the accesses are safe.
476   //
477   // The following example shows this:
478   // for (i = 0; i < 1000; ++i)
479   //   a[5000 + i * m] = a[i] + a[i + 9000]
480   //
481   // Here grouping gives a check of (5000, 5000 + 1000 * m) against
482   // (0, 10000) which is always false. However, if m is 1, there is no
483   // dependence. Not grouping the checks for a[i] and a[i + 9000] allows
484   // us to perform an accurate check in this case.
485   //
486   // The above case requires that we have an UnknownDependence between
487   // accesses to the same underlying object. This cannot happen unless
488   // FoundNonConstantDistanceDependence is set, and therefore UseDependencies
489   // is also false. In this case we will use the fallback path and create
490   // separate checking groups for all pointers.
491 
492   // If we don't have the dependency partitions, construct a new
493   // checking pointer group for each pointer. This is also required
494   // for correctness, because in this case we can have checking between
495   // pointers to the same underlying object.
496   if (!UseDependencies) {
497     for (unsigned I = 0; I < Pointers.size(); ++I)
498       CheckingGroups.emplace_back(I, *this);
499     return;
500   }
501 
502   unsigned TotalComparisons = 0;
503 
504   DenseMap<Value *, SmallVector<unsigned>> PositionMap;
505   for (unsigned Index = 0; Index < Pointers.size(); ++Index)
506     PositionMap[Pointers[Index].PointerValue].push_back(Index);
507 
508   // We need to keep track of what pointers we've already seen so we
509   // don't process them twice.
510   SmallSet<unsigned, 2> Seen;
511 
512   // Go through all equivalence classes, get the "pointer check groups"
513   // and add them to the overall solution. We use the order in which accesses
514   // appear in 'Pointers' to enforce determinism.
515   for (unsigned I = 0; I < Pointers.size(); ++I) {
516     // We've seen this pointer before, and therefore already processed
517     // its equivalence class.
518     if (Seen.count(I))
519       continue;
520 
521     MemoryDepChecker::MemAccessInfo Access(Pointers[I].PointerValue,
522                                            Pointers[I].IsWritePtr);
523 
524     SmallVector<RuntimeCheckingPtrGroup, 2> Groups;
525     auto LeaderI = DepCands.findValue(DepCands.getLeaderValue(Access));
526 
527     // Because DepCands is constructed by visiting accesses in the order in
528     // which they appear in alias sets (which is deterministic) and the
529     // iteration order within an equivalence class member is only dependent on
530     // the order in which unions and insertions are performed on the
531     // equivalence class, the iteration order is deterministic.
532     for (auto MI = DepCands.member_begin(LeaderI), ME = DepCands.member_end();
533          MI != ME; ++MI) {
534       auto PointerI = PositionMap.find(MI->getPointer());
535       assert(PointerI != PositionMap.end() &&
536              "pointer in equivalence class not found in PositionMap");
537       for (unsigned Pointer : PointerI->second) {
538         bool Merged = false;
539         // Mark this pointer as seen.
540         Seen.insert(Pointer);
541 
542         // Go through all the existing sets and see if we can find one
543         // which can include this pointer.
544         for (RuntimeCheckingPtrGroup &Group : Groups) {
545           // Don't perform more than a certain amount of comparisons.
546           // This should limit the cost of grouping the pointers to something
547           // reasonable.  If we do end up hitting this threshold, the algorithm
548           // will create separate groups for all remaining pointers.
549           if (TotalComparisons > MemoryCheckMergeThreshold)
550             break;
551 
552           TotalComparisons++;
553 
554           if (Group.addPointer(Pointer, *this)) {
555             Merged = true;
556             break;
557           }
558         }
559 
560         if (!Merged)
561           // We couldn't add this pointer to any existing set or the threshold
562           // for the number of comparisons has been reached. Create a new group
563           // to hold the current pointer.
564           Groups.emplace_back(Pointer, *this);
565       }
566     }
567 
568     // We've computed the grouped checks for this partition.
569     // Save the results and continue with the next one.
570     llvm::copy(Groups, std::back_inserter(CheckingGroups));
571   }
572 }
573 
574 bool RuntimePointerChecking::arePointersInSamePartition(
575     const SmallVectorImpl<int> &PtrToPartition, unsigned PtrIdx1,
576     unsigned PtrIdx2) {
577   return (PtrToPartition[PtrIdx1] != -1 &&
578           PtrToPartition[PtrIdx1] == PtrToPartition[PtrIdx2]);
579 }
580 
581 bool RuntimePointerChecking::needsChecking(unsigned I, unsigned J) const {
582   const PointerInfo &PointerI = Pointers[I];
583   const PointerInfo &PointerJ = Pointers[J];
584 
585   // No need to check if two readonly pointers intersect.
586   if (!PointerI.IsWritePtr && !PointerJ.IsWritePtr)
587     return false;
588 
589   // Only need to check pointers between two different dependency sets.
590   if (PointerI.DependencySetId == PointerJ.DependencySetId)
591     return false;
592 
593   // Only need to check pointers in the same alias set.
594   return PointerI.AliasSetId == PointerJ.AliasSetId;
595 }
596 
597 void RuntimePointerChecking::printChecks(
598     raw_ostream &OS, const SmallVectorImpl<RuntimePointerCheck> &Checks,
599     unsigned Depth) const {
600   unsigned N = 0;
601   for (const auto &[Check1, Check2] : Checks) {
602     const auto &First = Check1->Members, &Second = Check2->Members;
603 
604     OS.indent(Depth) << "Check " << N++ << ":\n";
605 
606     OS.indent(Depth + 2) << "Comparing group (" << Check1 << "):\n";
607     for (unsigned K : First)
608       OS.indent(Depth + 2) << *Pointers[K].PointerValue << "\n";
609 
610     OS.indent(Depth + 2) << "Against group (" << Check2 << "):\n";
611     for (unsigned K : Second)
612       OS.indent(Depth + 2) << *Pointers[K].PointerValue << "\n";
613   }
614 }
615 
616 void RuntimePointerChecking::print(raw_ostream &OS, unsigned Depth) const {
617 
618   OS.indent(Depth) << "Run-time memory checks:\n";
619   printChecks(OS, Checks, Depth);
620 
621   OS.indent(Depth) << "Grouped accesses:\n";
622   for (const auto &CG : CheckingGroups) {
623     OS.indent(Depth + 2) << "Group " << &CG << ":\n";
624     OS.indent(Depth + 4) << "(Low: " << *CG.Low << " High: " << *CG.High
625                          << ")\n";
626     for (unsigned Member : CG.Members) {
627       OS.indent(Depth + 6) << "Member: " << *Pointers[Member].Expr << "\n";
628     }
629   }
630 }
631 
632 namespace {
633 
634 /// Analyses memory accesses in a loop.
635 ///
636 /// Checks whether run time pointer checks are needed and builds sets for data
637 /// dependence checking.
638 class AccessAnalysis {
639 public:
640   /// Read or write access location.
641   typedef PointerIntPair<Value *, 1, bool> MemAccessInfo;
642   typedef SmallVector<MemAccessInfo, 8> MemAccessInfoList;
643 
644   AccessAnalysis(const Loop *TheLoop, AAResults *AA, const LoopInfo *LI,
645                  MemoryDepChecker::DepCandidates &DA,
646                  PredicatedScalarEvolution &PSE,
647                  SmallPtrSetImpl<MDNode *> &LoopAliasScopes)
648       : TheLoop(TheLoop), BAA(*AA), AST(BAA), LI(LI), DepCands(DA), PSE(PSE),
649         LoopAliasScopes(LoopAliasScopes) {
650     // We're analyzing dependences across loop iterations.
651     BAA.enableCrossIterationMode();
652   }
653 
654   /// Register a load  and whether it is only read from.
655   void addLoad(const MemoryLocation &Loc, Type *AccessTy, bool IsReadOnly) {
656     Value *Ptr = const_cast<Value *>(Loc.Ptr);
657     AST.add(adjustLoc(Loc));
658     Accesses[MemAccessInfo(Ptr, false)].insert(AccessTy);
659     if (IsReadOnly)
660       ReadOnlyPtr.insert(Ptr);
661   }
662 
663   /// Register a store.
664   void addStore(const MemoryLocation &Loc, Type *AccessTy) {
665     Value *Ptr = const_cast<Value *>(Loc.Ptr);
666     AST.add(adjustLoc(Loc));
667     Accesses[MemAccessInfo(Ptr, true)].insert(AccessTy);
668   }
669 
670   /// Check if we can emit a run-time no-alias check for \p Access.
671   ///
672   /// Returns true if we can emit a run-time no alias check for \p Access.
673   /// If we can check this access, this also adds it to a dependence set and
674   /// adds a run-time to check for it to \p RtCheck. If \p Assume is true,
675   /// we will attempt to use additional run-time checks in order to get
676   /// the bounds of the pointer.
677   bool createCheckForAccess(RuntimePointerChecking &RtCheck,
678                             MemAccessInfo Access, Type *AccessTy,
679                             const DenseMap<Value *, const SCEV *> &Strides,
680                             DenseMap<Value *, unsigned> &DepSetId,
681                             Loop *TheLoop, unsigned &RunningDepId,
682                             unsigned ASId, bool ShouldCheckStride, bool Assume);
683 
684   /// Check whether we can check the pointers at runtime for
685   /// non-intersection.
686   ///
687   /// Returns true if we need no check or if we do and we can generate them
688   /// (i.e. the pointers have computable bounds).
689   bool canCheckPtrAtRT(RuntimePointerChecking &RtCheck, ScalarEvolution *SE,
690                        Loop *TheLoop, const DenseMap<Value *, const SCEV *> &Strides,
691                        Value *&UncomputablePtr, bool ShouldCheckWrap = false);
692 
693   /// Goes over all memory accesses, checks whether a RT check is needed
694   /// and builds sets of dependent accesses.
695   void buildDependenceSets() {
696     processMemAccesses();
697   }
698 
699   /// Initial processing of memory accesses determined that we need to
700   /// perform dependency checking.
701   ///
702   /// Note that this can later be cleared if we retry memcheck analysis without
703   /// dependency checking (i.e. FoundNonConstantDistanceDependence).
704   bool isDependencyCheckNeeded() const { return !CheckDeps.empty(); }
705 
706   /// We decided that no dependence analysis would be used.  Reset the state.
707   void resetDepChecks(MemoryDepChecker &DepChecker) {
708     CheckDeps.clear();
709     DepChecker.clearDependences();
710   }
711 
712   const MemAccessInfoList &getDependenciesToCheck() const { return CheckDeps; }
713 
714 private:
715   typedef MapVector<MemAccessInfo, SmallSetVector<Type *, 1>> PtrAccessMap;
716 
717   /// Adjust the MemoryLocation so that it represents accesses to this
718   /// location across all iterations, rather than a single one.
719   MemoryLocation adjustLoc(MemoryLocation Loc) const {
720     // The accessed location varies within the loop, but remains within the
721     // underlying object.
722     Loc.Size = LocationSize::beforeOrAfterPointer();
723     Loc.AATags.Scope = adjustAliasScopeList(Loc.AATags.Scope);
724     Loc.AATags.NoAlias = adjustAliasScopeList(Loc.AATags.NoAlias);
725     return Loc;
726   }
727 
728   /// Drop alias scopes that are only valid within a single loop iteration.
729   MDNode *adjustAliasScopeList(MDNode *ScopeList) const {
730     if (!ScopeList)
731       return nullptr;
732 
733     // For the sake of simplicity, drop the whole scope list if any scope is
734     // iteration-local.
735     if (any_of(ScopeList->operands(), [&](Metadata *Scope) {
736           return LoopAliasScopes.contains(cast<MDNode>(Scope));
737         }))
738       return nullptr;
739 
740     return ScopeList;
741   }
742 
743   /// Go over all memory access and check whether runtime pointer checks
744   /// are needed and build sets of dependency check candidates.
745   void processMemAccesses();
746 
747   /// Map of all accesses. Values are the types used to access memory pointed to
748   /// by the pointer.
749   PtrAccessMap Accesses;
750 
751   /// The loop being checked.
752   const Loop *TheLoop;
753 
754   /// List of accesses that need a further dependence check.
755   MemAccessInfoList CheckDeps;
756 
757   /// Set of pointers that are read only.
758   SmallPtrSet<Value*, 16> ReadOnlyPtr;
759 
760   /// Batched alias analysis results.
761   BatchAAResults BAA;
762 
763   /// An alias set tracker to partition the access set by underlying object and
764   //intrinsic property (such as TBAA metadata).
765   AliasSetTracker AST;
766 
767   /// The LoopInfo of the loop being checked.
768   const LoopInfo *LI;
769 
770   /// Sets of potentially dependent accesses - members of one set share an
771   /// underlying pointer. The set "CheckDeps" identfies which sets really need a
772   /// dependence check.
773   MemoryDepChecker::DepCandidates &DepCands;
774 
775   /// Initial processing of memory accesses determined that we may need
776   /// to add memchecks.  Perform the analysis to determine the necessary checks.
777   ///
778   /// Note that, this is different from isDependencyCheckNeeded.  When we retry
779   /// memcheck analysis without dependency checking
780   /// (i.e. FoundNonConstantDistanceDependence), isDependencyCheckNeeded is
781   /// cleared while this remains set if we have potentially dependent accesses.
782   bool IsRTCheckAnalysisNeeded = false;
783 
784   /// The SCEV predicate containing all the SCEV-related assumptions.
785   PredicatedScalarEvolution &PSE;
786 
787   DenseMap<Value *, SmallVector<const Value *, 16>> UnderlyingObjects;
788 
789   /// Alias scopes that are declared inside the loop, and as such not valid
790   /// across iterations.
791   SmallPtrSetImpl<MDNode *> &LoopAliasScopes;
792 };
793 
794 } // end anonymous namespace
795 
796 /// Check whether a pointer can participate in a runtime bounds check.
797 /// If \p Assume, try harder to prove that we can compute the bounds of \p Ptr
798 /// by adding run-time checks (overflow checks) if necessary.
799 static bool hasComputableBounds(PredicatedScalarEvolution &PSE, Value *Ptr,
800                                 const SCEV *PtrScev, Loop *L, bool Assume) {
801   // The bounds for loop-invariant pointer is trivial.
802   if (PSE.getSE()->isLoopInvariant(PtrScev, L))
803     return true;
804 
805   const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(PtrScev);
806 
807   if (!AR && Assume)
808     AR = PSE.getAsAddRec(Ptr);
809 
810   if (!AR)
811     return false;
812 
813   return AR->isAffine();
814 }
815 
816 /// Check whether a pointer address cannot wrap.
817 static bool isNoWrap(PredicatedScalarEvolution &PSE,
818                      const DenseMap<Value *, const SCEV *> &Strides, Value *Ptr,
819                      Type *AccessTy, Loop *L, bool Assume) {
820   const SCEV *PtrScev = PSE.getSCEV(Ptr);
821   if (PSE.getSE()->isLoopInvariant(PtrScev, L))
822     return true;
823 
824   return getPtrStride(PSE, AccessTy, Ptr, L, Strides, Assume).has_value() ||
825          PSE.hasNoOverflow(Ptr, SCEVWrapPredicate::IncrementNUSW);
826 }
827 
828 static void visitPointers(Value *StartPtr, const Loop &InnermostLoop,
829                           function_ref<void(Value *)> AddPointer) {
830   SmallPtrSet<Value *, 8> Visited;
831   SmallVector<Value *> WorkList;
832   WorkList.push_back(StartPtr);
833 
834   while (!WorkList.empty()) {
835     Value *Ptr = WorkList.pop_back_val();
836     if (!Visited.insert(Ptr).second)
837       continue;
838     auto *PN = dyn_cast<PHINode>(Ptr);
839     // SCEV does not look through non-header PHIs inside the loop. Such phis
840     // can be analyzed by adding separate accesses for each incoming pointer
841     // value.
842     if (PN && InnermostLoop.contains(PN->getParent()) &&
843         PN->getParent() != InnermostLoop.getHeader()) {
844       for (const Use &Inc : PN->incoming_values())
845         WorkList.push_back(Inc);
846     } else
847       AddPointer(Ptr);
848   }
849 }
850 
851 // Walk back through the IR for a pointer, looking for a select like the
852 // following:
853 //
854 //  %offset = select i1 %cmp, i64 %a, i64 %b
855 //  %addr = getelementptr double, double* %base, i64 %offset
856 //  %ld = load double, double* %addr, align 8
857 //
858 // We won't be able to form a single SCEVAddRecExpr from this since the
859 // address for each loop iteration depends on %cmp. We could potentially
860 // produce multiple valid SCEVAddRecExprs, though, and check all of them for
861 // memory safety/aliasing if needed.
862 //
863 // If we encounter some IR we don't yet handle, or something obviously fine
864 // like a constant, then we just add the SCEV for that term to the list passed
865 // in by the caller. If we have a node that may potentially yield a valid
866 // SCEVAddRecExpr then we decompose it into parts and build the SCEV terms
867 // ourselves before adding to the list.
868 static void findForkedSCEVs(
869     ScalarEvolution *SE, const Loop *L, Value *Ptr,
870     SmallVectorImpl<PointerIntPair<const SCEV *, 1, bool>> &ScevList,
871     unsigned Depth) {
872   // If our Value is a SCEVAddRecExpr, loop invariant, not an instruction, or
873   // we've exceeded our limit on recursion, just return whatever we have
874   // regardless of whether it can be used for a forked pointer or not, along
875   // with an indication of whether it might be a poison or undef value.
876   const SCEV *Scev = SE->getSCEV(Ptr);
877   if (isa<SCEVAddRecExpr>(Scev) || L->isLoopInvariant(Ptr) ||
878       !isa<Instruction>(Ptr) || Depth == 0) {
879     ScevList.emplace_back(Scev, !isGuaranteedNotToBeUndefOrPoison(Ptr));
880     return;
881   }
882 
883   Depth--;
884 
885   auto UndefPoisonCheck = [](PointerIntPair<const SCEV *, 1, bool> S) {
886     return get<1>(S);
887   };
888 
889   auto GetBinOpExpr = [&SE](unsigned Opcode, const SCEV *L, const SCEV *R) {
890     switch (Opcode) {
891     case Instruction::Add:
892       return SE->getAddExpr(L, R);
893     case Instruction::Sub:
894       return SE->getMinusSCEV(L, R);
895     default:
896       llvm_unreachable("Unexpected binary operator when walking ForkedPtrs");
897     }
898   };
899 
900   Instruction *I = cast<Instruction>(Ptr);
901   unsigned Opcode = I->getOpcode();
902   switch (Opcode) {
903   case Instruction::GetElementPtr: {
904     auto *GEP = cast<GetElementPtrInst>(I);
905     Type *SourceTy = GEP->getSourceElementType();
906     // We only handle base + single offset GEPs here for now.
907     // Not dealing with preexisting gathers yet, so no vectors.
908     if (I->getNumOperands() != 2 || SourceTy->isVectorTy()) {
909       ScevList.emplace_back(Scev, !isGuaranteedNotToBeUndefOrPoison(GEP));
910       break;
911     }
912     SmallVector<PointerIntPair<const SCEV *, 1, bool>, 2> BaseScevs;
913     SmallVector<PointerIntPair<const SCEV *, 1, bool>, 2> OffsetScevs;
914     findForkedSCEVs(SE, L, I->getOperand(0), BaseScevs, Depth);
915     findForkedSCEVs(SE, L, I->getOperand(1), OffsetScevs, Depth);
916 
917     // See if we need to freeze our fork...
918     bool NeedsFreeze = any_of(BaseScevs, UndefPoisonCheck) ||
919                        any_of(OffsetScevs, UndefPoisonCheck);
920 
921     // Check that we only have a single fork, on either the base or the offset.
922     // Copy the SCEV across for the one without a fork in order to generate
923     // the full SCEV for both sides of the GEP.
924     if (OffsetScevs.size() == 2 && BaseScevs.size() == 1)
925       BaseScevs.push_back(BaseScevs[0]);
926     else if (BaseScevs.size() == 2 && OffsetScevs.size() == 1)
927       OffsetScevs.push_back(OffsetScevs[0]);
928     else {
929       ScevList.emplace_back(Scev, NeedsFreeze);
930       break;
931     }
932 
933     // Find the pointer type we need to extend to.
934     Type *IntPtrTy = SE->getEffectiveSCEVType(
935         SE->getSCEV(GEP->getPointerOperand())->getType());
936 
937     // Find the size of the type being pointed to. We only have a single
938     // index term (guarded above) so we don't need to index into arrays or
939     // structures, just get the size of the scalar value.
940     const SCEV *Size = SE->getSizeOfExpr(IntPtrTy, SourceTy);
941 
942     // Scale up the offsets by the size of the type, then add to the bases.
943     const SCEV *Scaled1 = SE->getMulExpr(
944         Size, SE->getTruncateOrSignExtend(get<0>(OffsetScevs[0]), IntPtrTy));
945     const SCEV *Scaled2 = SE->getMulExpr(
946         Size, SE->getTruncateOrSignExtend(get<0>(OffsetScevs[1]), IntPtrTy));
947     ScevList.emplace_back(SE->getAddExpr(get<0>(BaseScevs[0]), Scaled1),
948                           NeedsFreeze);
949     ScevList.emplace_back(SE->getAddExpr(get<0>(BaseScevs[1]), Scaled2),
950                           NeedsFreeze);
951     break;
952   }
953   case Instruction::Select: {
954     SmallVector<PointerIntPair<const SCEV *, 1, bool>, 2> ChildScevs;
955     // A select means we've found a forked pointer, but we currently only
956     // support a single select per pointer so if there's another behind this
957     // then we just bail out and return the generic SCEV.
958     findForkedSCEVs(SE, L, I->getOperand(1), ChildScevs, Depth);
959     findForkedSCEVs(SE, L, I->getOperand(2), ChildScevs, Depth);
960     if (ChildScevs.size() == 2) {
961       ScevList.push_back(ChildScevs[0]);
962       ScevList.push_back(ChildScevs[1]);
963     } else
964       ScevList.emplace_back(Scev, !isGuaranteedNotToBeUndefOrPoison(Ptr));
965     break;
966   }
967   case Instruction::PHI: {
968     SmallVector<PointerIntPair<const SCEV *, 1, bool>, 2> ChildScevs;
969     // A phi means we've found a forked pointer, but we currently only
970     // support a single phi per pointer so if there's another behind this
971     // then we just bail out and return the generic SCEV.
972     if (I->getNumOperands() == 2) {
973       findForkedSCEVs(SE, L, I->getOperand(0), ChildScevs, Depth);
974       findForkedSCEVs(SE, L, I->getOperand(1), ChildScevs, Depth);
975     }
976     if (ChildScevs.size() == 2) {
977       ScevList.push_back(ChildScevs[0]);
978       ScevList.push_back(ChildScevs[1]);
979     } else
980       ScevList.emplace_back(Scev, !isGuaranteedNotToBeUndefOrPoison(Ptr));
981     break;
982   }
983   case Instruction::Add:
984   case Instruction::Sub: {
985     SmallVector<PointerIntPair<const SCEV *, 1, bool>> LScevs;
986     SmallVector<PointerIntPair<const SCEV *, 1, bool>> RScevs;
987     findForkedSCEVs(SE, L, I->getOperand(0), LScevs, Depth);
988     findForkedSCEVs(SE, L, I->getOperand(1), RScevs, Depth);
989 
990     // See if we need to freeze our fork...
991     bool NeedsFreeze =
992         any_of(LScevs, UndefPoisonCheck) || any_of(RScevs, UndefPoisonCheck);
993 
994     // Check that we only have a single fork, on either the left or right side.
995     // Copy the SCEV across for the one without a fork in order to generate
996     // the full SCEV for both sides of the BinOp.
997     if (LScevs.size() == 2 && RScevs.size() == 1)
998       RScevs.push_back(RScevs[0]);
999     else if (RScevs.size() == 2 && LScevs.size() == 1)
1000       LScevs.push_back(LScevs[0]);
1001     else {
1002       ScevList.emplace_back(Scev, NeedsFreeze);
1003       break;
1004     }
1005 
1006     ScevList.emplace_back(
1007         GetBinOpExpr(Opcode, get<0>(LScevs[0]), get<0>(RScevs[0])),
1008         NeedsFreeze);
1009     ScevList.emplace_back(
1010         GetBinOpExpr(Opcode, get<0>(LScevs[1]), get<0>(RScevs[1])),
1011         NeedsFreeze);
1012     break;
1013   }
1014   default:
1015     // Just return the current SCEV if we haven't handled the instruction yet.
1016     LLVM_DEBUG(dbgs() << "ForkedPtr unhandled instruction: " << *I << "\n");
1017     ScevList.emplace_back(Scev, !isGuaranteedNotToBeUndefOrPoison(Ptr));
1018     break;
1019   }
1020 }
1021 
1022 static SmallVector<PointerIntPair<const SCEV *, 1, bool>>
1023 findForkedPointer(PredicatedScalarEvolution &PSE,
1024                   const DenseMap<Value *, const SCEV *> &StridesMap, Value *Ptr,
1025                   const Loop *L) {
1026   ScalarEvolution *SE = PSE.getSE();
1027   assert(SE->isSCEVable(Ptr->getType()) && "Value is not SCEVable!");
1028   SmallVector<PointerIntPair<const SCEV *, 1, bool>> Scevs;
1029   findForkedSCEVs(SE, L, Ptr, Scevs, MaxForkedSCEVDepth);
1030 
1031   // For now, we will only accept a forked pointer with two possible SCEVs
1032   // that are either SCEVAddRecExprs or loop invariant.
1033   if (Scevs.size() == 2 &&
1034       (isa<SCEVAddRecExpr>(get<0>(Scevs[0])) ||
1035        SE->isLoopInvariant(get<0>(Scevs[0]), L)) &&
1036       (isa<SCEVAddRecExpr>(get<0>(Scevs[1])) ||
1037        SE->isLoopInvariant(get<0>(Scevs[1]), L))) {
1038     LLVM_DEBUG(dbgs() << "LAA: Found forked pointer: " << *Ptr << "\n");
1039     LLVM_DEBUG(dbgs() << "\t(1) " << *get<0>(Scevs[0]) << "\n");
1040     LLVM_DEBUG(dbgs() << "\t(2) " << *get<0>(Scevs[1]) << "\n");
1041     return Scevs;
1042   }
1043 
1044   return {{replaceSymbolicStrideSCEV(PSE, StridesMap, Ptr), false}};
1045 }
1046 
1047 bool AccessAnalysis::createCheckForAccess(RuntimePointerChecking &RtCheck,
1048                                           MemAccessInfo Access, Type *AccessTy,
1049                                           const DenseMap<Value *, const SCEV *> &StridesMap,
1050                                           DenseMap<Value *, unsigned> &DepSetId,
1051                                           Loop *TheLoop, unsigned &RunningDepId,
1052                                           unsigned ASId, bool ShouldCheckWrap,
1053                                           bool Assume) {
1054   Value *Ptr = Access.getPointer();
1055 
1056   SmallVector<PointerIntPair<const SCEV *, 1, bool>> TranslatedPtrs =
1057       findForkedPointer(PSE, StridesMap, Ptr, TheLoop);
1058 
1059   for (const auto &P : TranslatedPtrs) {
1060     const SCEV *PtrExpr = get<0>(P);
1061     if (!hasComputableBounds(PSE, Ptr, PtrExpr, TheLoop, Assume))
1062       return false;
1063 
1064     // When we run after a failing dependency check we have to make sure
1065     // we don't have wrapping pointers.
1066     if (ShouldCheckWrap) {
1067       // Skip wrap checking when translating pointers.
1068       if (TranslatedPtrs.size() > 1)
1069         return false;
1070 
1071       if (!isNoWrap(PSE, StridesMap, Ptr, AccessTy, TheLoop, Assume))
1072         return false;
1073     }
1074     // If there's only one option for Ptr, look it up after bounds and wrap
1075     // checking, because assumptions might have been added to PSE.
1076     if (TranslatedPtrs.size() == 1)
1077       TranslatedPtrs[0] = {replaceSymbolicStrideSCEV(PSE, StridesMap, Ptr),
1078                            false};
1079   }
1080 
1081   for (auto [PtrExpr, NeedsFreeze] : TranslatedPtrs) {
1082     // The id of the dependence set.
1083     unsigned DepId;
1084 
1085     if (isDependencyCheckNeeded()) {
1086       Value *Leader = DepCands.getLeaderValue(Access).getPointer();
1087       unsigned &LeaderId = DepSetId[Leader];
1088       if (!LeaderId)
1089         LeaderId = RunningDepId++;
1090       DepId = LeaderId;
1091     } else
1092       // Each access has its own dependence set.
1093       DepId = RunningDepId++;
1094 
1095     bool IsWrite = Access.getInt();
1096     RtCheck.insert(TheLoop, Ptr, PtrExpr, AccessTy, IsWrite, DepId, ASId, PSE,
1097                    NeedsFreeze);
1098     LLVM_DEBUG(dbgs() << "LAA: Found a runtime check ptr:" << *Ptr << '\n');
1099   }
1100 
1101   return true;
1102 }
1103 
1104 bool AccessAnalysis::canCheckPtrAtRT(RuntimePointerChecking &RtCheck,
1105                                      ScalarEvolution *SE, Loop *TheLoop,
1106                                      const DenseMap<Value *, const SCEV *> &StridesMap,
1107                                      Value *&UncomputablePtr, bool ShouldCheckWrap) {
1108   // Find pointers with computable bounds. We are going to use this information
1109   // to place a runtime bound check.
1110   bool CanDoRT = true;
1111 
1112   bool MayNeedRTCheck = false;
1113   if (!IsRTCheckAnalysisNeeded) return true;
1114 
1115   bool IsDepCheckNeeded = isDependencyCheckNeeded();
1116 
1117   // We assign a consecutive id to access from different alias sets.
1118   // Accesses between different groups doesn't need to be checked.
1119   unsigned ASId = 0;
1120   for (const auto &AS : AST) {
1121     int NumReadPtrChecks = 0;
1122     int NumWritePtrChecks = 0;
1123     bool CanDoAliasSetRT = true;
1124     ++ASId;
1125     auto ASPointers = AS.getPointers();
1126 
1127     // We assign consecutive id to access from different dependence sets.
1128     // Accesses within the same set don't need a runtime check.
1129     unsigned RunningDepId = 1;
1130     DenseMap<Value *, unsigned> DepSetId;
1131 
1132     SmallVector<std::pair<MemAccessInfo, Type *>, 4> Retries;
1133 
1134     // First, count how many write and read accesses are in the alias set. Also
1135     // collect MemAccessInfos for later.
1136     SmallVector<MemAccessInfo, 4> AccessInfos;
1137     for (const Value *ConstPtr : ASPointers) {
1138       Value *Ptr = const_cast<Value *>(ConstPtr);
1139       bool IsWrite = Accesses.count(MemAccessInfo(Ptr, true));
1140       if (IsWrite)
1141         ++NumWritePtrChecks;
1142       else
1143         ++NumReadPtrChecks;
1144       AccessInfos.emplace_back(Ptr, IsWrite);
1145     }
1146 
1147     // We do not need runtime checks for this alias set, if there are no writes
1148     // or a single write and no reads.
1149     if (NumWritePtrChecks == 0 ||
1150         (NumWritePtrChecks == 1 && NumReadPtrChecks == 0)) {
1151       assert((ASPointers.size() <= 1 ||
1152               all_of(ASPointers,
1153                      [this](const Value *Ptr) {
1154                        MemAccessInfo AccessWrite(const_cast<Value *>(Ptr),
1155                                                  true);
1156                        return DepCands.findValue(AccessWrite) == DepCands.end();
1157                      })) &&
1158              "Can only skip updating CanDoRT below, if all entries in AS "
1159              "are reads or there is at most 1 entry");
1160       continue;
1161     }
1162 
1163     for (auto &Access : AccessInfos) {
1164       for (const auto &AccessTy : Accesses[Access]) {
1165         if (!createCheckForAccess(RtCheck, Access, AccessTy, StridesMap,
1166                                   DepSetId, TheLoop, RunningDepId, ASId,
1167                                   ShouldCheckWrap, false)) {
1168           LLVM_DEBUG(dbgs() << "LAA: Can't find bounds for ptr:"
1169                             << *Access.getPointer() << '\n');
1170           Retries.emplace_back(Access, AccessTy);
1171           CanDoAliasSetRT = false;
1172         }
1173       }
1174     }
1175 
1176     // Note that this function computes CanDoRT and MayNeedRTCheck
1177     // independently. For example CanDoRT=false, MayNeedRTCheck=false means that
1178     // we have a pointer for which we couldn't find the bounds but we don't
1179     // actually need to emit any checks so it does not matter.
1180     //
1181     // We need runtime checks for this alias set, if there are at least 2
1182     // dependence sets (in which case RunningDepId > 2) or if we need to re-try
1183     // any bound checks (because in that case the number of dependence sets is
1184     // incomplete).
1185     bool NeedsAliasSetRTCheck = RunningDepId > 2 || !Retries.empty();
1186 
1187     // We need to perform run-time alias checks, but some pointers had bounds
1188     // that couldn't be checked.
1189     if (NeedsAliasSetRTCheck && !CanDoAliasSetRT) {
1190       // Reset the CanDoSetRt flag and retry all accesses that have failed.
1191       // We know that we need these checks, so we can now be more aggressive
1192       // and add further checks if required (overflow checks).
1193       CanDoAliasSetRT = true;
1194       for (const auto &[Access, AccessTy] : Retries) {
1195         if (!createCheckForAccess(RtCheck, Access, AccessTy, StridesMap,
1196                                   DepSetId, TheLoop, RunningDepId, ASId,
1197                                   ShouldCheckWrap, /*Assume=*/true)) {
1198           CanDoAliasSetRT = false;
1199           UncomputablePtr = Access.getPointer();
1200           break;
1201         }
1202       }
1203     }
1204 
1205     CanDoRT &= CanDoAliasSetRT;
1206     MayNeedRTCheck |= NeedsAliasSetRTCheck;
1207     ++ASId;
1208   }
1209 
1210   // If the pointers that we would use for the bounds comparison have different
1211   // address spaces, assume the values aren't directly comparable, so we can't
1212   // use them for the runtime check. We also have to assume they could
1213   // overlap. In the future there should be metadata for whether address spaces
1214   // are disjoint.
1215   unsigned NumPointers = RtCheck.Pointers.size();
1216   for (unsigned i = 0; i < NumPointers; ++i) {
1217     for (unsigned j = i + 1; j < NumPointers; ++j) {
1218       // Only need to check pointers between two different dependency sets.
1219       if (RtCheck.Pointers[i].DependencySetId ==
1220           RtCheck.Pointers[j].DependencySetId)
1221        continue;
1222       // Only need to check pointers in the same alias set.
1223       if (RtCheck.Pointers[i].AliasSetId != RtCheck.Pointers[j].AliasSetId)
1224         continue;
1225 
1226       Value *PtrI = RtCheck.Pointers[i].PointerValue;
1227       Value *PtrJ = RtCheck.Pointers[j].PointerValue;
1228 
1229       unsigned ASi = PtrI->getType()->getPointerAddressSpace();
1230       unsigned ASj = PtrJ->getType()->getPointerAddressSpace();
1231       if (ASi != ASj) {
1232         LLVM_DEBUG(
1233             dbgs() << "LAA: Runtime check would require comparison between"
1234                       " different address spaces\n");
1235         return false;
1236       }
1237     }
1238   }
1239 
1240   if (MayNeedRTCheck && CanDoRT)
1241     RtCheck.generateChecks(DepCands, IsDepCheckNeeded);
1242 
1243   LLVM_DEBUG(dbgs() << "LAA: We need to do " << RtCheck.getNumberOfChecks()
1244                     << " pointer comparisons.\n");
1245 
1246   // If we can do run-time checks, but there are no checks, no runtime checks
1247   // are needed. This can happen when all pointers point to the same underlying
1248   // object for example.
1249   RtCheck.Need = CanDoRT ? RtCheck.getNumberOfChecks() != 0 : MayNeedRTCheck;
1250 
1251   bool CanDoRTIfNeeded = !RtCheck.Need || CanDoRT;
1252   if (!CanDoRTIfNeeded)
1253     RtCheck.reset();
1254   return CanDoRTIfNeeded;
1255 }
1256 
1257 void AccessAnalysis::processMemAccesses() {
1258   // We process the set twice: first we process read-write pointers, last we
1259   // process read-only pointers. This allows us to skip dependence tests for
1260   // read-only pointers.
1261 
1262   LLVM_DEBUG(dbgs() << "LAA: Processing memory accesses...\n");
1263   LLVM_DEBUG(dbgs() << "  AST: "; AST.dump());
1264   LLVM_DEBUG(dbgs() << "LAA:   Accesses(" << Accesses.size() << "):\n");
1265   LLVM_DEBUG({
1266     for (const auto &[A, _] : Accesses)
1267       dbgs() << "\t" << *A.getPointer() << " ("
1268              << (A.getInt() ? "write"
1269                             : (ReadOnlyPtr.count(A.getPointer()) ? "read-only"
1270                                                                  : "read"))
1271              << ")\n";
1272   });
1273 
1274   // The AliasSetTracker has nicely partitioned our pointers by metadata
1275   // compatibility and potential for underlying-object overlap. As a result, we
1276   // only need to check for potential pointer dependencies within each alias
1277   // set.
1278   for (const auto &AS : AST) {
1279     // Note that both the alias-set tracker and the alias sets themselves used
1280     // ordered collections internally and so the iteration order here is
1281     // deterministic.
1282     auto ASPointers = AS.getPointers();
1283 
1284     bool SetHasWrite = false;
1285 
1286     // Map of pointers to last access encountered.
1287     typedef DenseMap<const Value*, MemAccessInfo> UnderlyingObjToAccessMap;
1288     UnderlyingObjToAccessMap ObjToLastAccess;
1289 
1290     // Set of access to check after all writes have been processed.
1291     PtrAccessMap DeferredAccesses;
1292 
1293     // Iterate over each alias set twice, once to process read/write pointers,
1294     // and then to process read-only pointers.
1295     for (int SetIteration = 0; SetIteration < 2; ++SetIteration) {
1296       bool UseDeferred = SetIteration > 0;
1297       PtrAccessMap &S = UseDeferred ? DeferredAccesses : Accesses;
1298 
1299       for (const Value *ConstPtr : ASPointers) {
1300         Value *Ptr = const_cast<Value *>(ConstPtr);
1301 
1302         // For a single memory access in AliasSetTracker, Accesses may contain
1303         // both read and write, and they both need to be handled for CheckDeps.
1304         for (const auto &[AC, _] : S) {
1305           if (AC.getPointer() != Ptr)
1306             continue;
1307 
1308           bool IsWrite = AC.getInt();
1309 
1310           // If we're using the deferred access set, then it contains only
1311           // reads.
1312           bool IsReadOnlyPtr = ReadOnlyPtr.count(Ptr) && !IsWrite;
1313           if (UseDeferred && !IsReadOnlyPtr)
1314             continue;
1315           // Otherwise, the pointer must be in the PtrAccessSet, either as a
1316           // read or a write.
1317           assert(((IsReadOnlyPtr && UseDeferred) || IsWrite ||
1318                   S.count(MemAccessInfo(Ptr, false))) &&
1319                  "Alias-set pointer not in the access set?");
1320 
1321           MemAccessInfo Access(Ptr, IsWrite);
1322           DepCands.insert(Access);
1323 
1324           // Memorize read-only pointers for later processing and skip them in
1325           // the first round (they need to be checked after we have seen all
1326           // write pointers). Note: we also mark pointer that are not
1327           // consecutive as "read-only" pointers (so that we check
1328           // "a[b[i]] +="). Hence, we need the second check for "!IsWrite".
1329           if (!UseDeferred && IsReadOnlyPtr) {
1330             // We only use the pointer keys, the types vector values don't
1331             // matter.
1332             DeferredAccesses.insert({Access, {}});
1333             continue;
1334           }
1335 
1336           // If this is a write - check other reads and writes for conflicts. If
1337           // this is a read only check other writes for conflicts (but only if
1338           // there is no other write to the ptr - this is an optimization to
1339           // catch "a[i] = a[i] + " without having to do a dependence check).
1340           if ((IsWrite || IsReadOnlyPtr) && SetHasWrite) {
1341             CheckDeps.push_back(Access);
1342             IsRTCheckAnalysisNeeded = true;
1343           }
1344 
1345           if (IsWrite)
1346             SetHasWrite = true;
1347 
1348           // Create sets of pointers connected by a shared alias set and
1349           // underlying object.
1350           typedef SmallVector<const Value *, 16> ValueVector;
1351           ValueVector TempObjects;
1352 
1353           UnderlyingObjects[Ptr] = {};
1354           SmallVector<const Value *, 16> &UOs = UnderlyingObjects[Ptr];
1355           ::getUnderlyingObjects(Ptr, UOs, LI);
1356           LLVM_DEBUG(dbgs()
1357                      << "Underlying objects for pointer " << *Ptr << "\n");
1358           for (const Value *UnderlyingObj : UOs) {
1359             // nullptr never alias, don't join sets for pointer that have "null"
1360             // in their UnderlyingObjects list.
1361             if (isa<ConstantPointerNull>(UnderlyingObj) &&
1362                 !NullPointerIsDefined(
1363                     TheLoop->getHeader()->getParent(),
1364                     UnderlyingObj->getType()->getPointerAddressSpace()))
1365               continue;
1366 
1367             UnderlyingObjToAccessMap::iterator Prev =
1368                 ObjToLastAccess.find(UnderlyingObj);
1369             if (Prev != ObjToLastAccess.end())
1370               DepCands.unionSets(Access, Prev->second);
1371 
1372             ObjToLastAccess[UnderlyingObj] = Access;
1373             LLVM_DEBUG(dbgs() << "  " << *UnderlyingObj << "\n");
1374           }
1375         }
1376       }
1377     }
1378   }
1379 }
1380 
1381 /// Return true if an AddRec pointer \p Ptr is unsigned non-wrapping,
1382 /// i.e. monotonically increasing/decreasing.
1383 static bool isNoWrapAddRec(Value *Ptr, const SCEVAddRecExpr *AR,
1384                            PredicatedScalarEvolution &PSE, const Loop *L) {
1385 
1386   // FIXME: This should probably only return true for NUW.
1387   if (AR->getNoWrapFlags(SCEV::NoWrapMask))
1388     return true;
1389 
1390   if (PSE.hasNoOverflow(Ptr, SCEVWrapPredicate::IncrementNUSW))
1391     return true;
1392 
1393   // Scalar evolution does not propagate the non-wrapping flags to values that
1394   // are derived from a non-wrapping induction variable because non-wrapping
1395   // could be flow-sensitive.
1396   //
1397   // Look through the potentially overflowing instruction to try to prove
1398   // non-wrapping for the *specific* value of Ptr.
1399 
1400   // The arithmetic implied by an nusw GEP can't overflow.
1401   const auto *GEP = dyn_cast<GetElementPtrInst>(Ptr);
1402   if (!GEP || !GEP->hasNoUnsignedSignedWrap())
1403     return false;
1404 
1405   // Make sure there is only one non-const index and analyze that.
1406   Value *NonConstIndex = nullptr;
1407   for (Value *Index : GEP->indices())
1408     if (!isa<ConstantInt>(Index)) {
1409       if (NonConstIndex)
1410         return false;
1411       NonConstIndex = Index;
1412     }
1413   if (!NonConstIndex)
1414     // The recurrence is on the pointer, ignore for now.
1415     return false;
1416 
1417   // The index in GEP is signed.  It is non-wrapping if it's derived from a NSW
1418   // AddRec using a NSW operation.
1419   if (auto *OBO = dyn_cast<OverflowingBinaryOperator>(NonConstIndex))
1420     if (OBO->hasNoSignedWrap() &&
1421         // Assume constant for other the operand so that the AddRec can be
1422         // easily found.
1423         isa<ConstantInt>(OBO->getOperand(1))) {
1424       const SCEV *OpScev = PSE.getSCEV(OBO->getOperand(0));
1425 
1426       if (auto *OpAR = dyn_cast<SCEVAddRecExpr>(OpScev))
1427         return OpAR->getLoop() == L && OpAR->getNoWrapFlags(SCEV::FlagNSW);
1428     }
1429 
1430   return false;
1431 }
1432 
1433 /// Check whether the access through \p Ptr has a constant stride.
1434 std::optional<int64_t>
1435 llvm::getPtrStride(PredicatedScalarEvolution &PSE, Type *AccessTy, Value *Ptr,
1436                    const Loop *Lp,
1437                    const DenseMap<Value *, const SCEV *> &StridesMap,
1438                    bool Assume, bool ShouldCheckWrap) {
1439   const SCEV *PtrScev = replaceSymbolicStrideSCEV(PSE, StridesMap, Ptr);
1440   if (PSE.getSE()->isLoopInvariant(PtrScev, Lp))
1441     return 0;
1442 
1443   Type *Ty = Ptr->getType();
1444   assert(Ty->isPointerTy() && "Unexpected non-ptr");
1445   if (isa<ScalableVectorType>(AccessTy)) {
1446     LLVM_DEBUG(dbgs() << "LAA: Bad stride - Scalable object: " << *AccessTy
1447                       << "\n");
1448     return std::nullopt;
1449   }
1450 
1451   const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(PtrScev);
1452   if (Assume && !AR)
1453     AR = PSE.getAsAddRec(Ptr);
1454 
1455   if (!AR) {
1456     LLVM_DEBUG(dbgs() << "LAA: Bad stride - Not an AddRecExpr pointer " << *Ptr
1457                       << " SCEV: " << *PtrScev << "\n");
1458     return std::nullopt;
1459   }
1460 
1461   // The access function must stride over the innermost loop.
1462   if (Lp != AR->getLoop()) {
1463     LLVM_DEBUG(dbgs() << "LAA: Bad stride - Not striding over innermost loop "
1464                       << *Ptr << " SCEV: " << *AR << "\n");
1465     return std::nullopt;
1466   }
1467 
1468   // Check the step is constant.
1469   const SCEV *Step = AR->getStepRecurrence(*PSE.getSE());
1470 
1471   // Calculate the pointer stride and check if it is constant.
1472   const SCEVConstant *C = dyn_cast<SCEVConstant>(Step);
1473   if (!C) {
1474     LLVM_DEBUG(dbgs() << "LAA: Bad stride - Not a constant strided " << *Ptr
1475                       << " SCEV: " << *AR << "\n");
1476     return std::nullopt;
1477   }
1478 
1479   const auto &DL = Lp->getHeader()->getDataLayout();
1480   TypeSize AllocSize = DL.getTypeAllocSize(AccessTy);
1481   int64_t Size = AllocSize.getFixedValue();
1482   const APInt &APStepVal = C->getAPInt();
1483 
1484   // Huge step value - give up.
1485   if (APStepVal.getBitWidth() > 64)
1486     return std::nullopt;
1487 
1488   int64_t StepVal = APStepVal.getSExtValue();
1489 
1490   // Strided access.
1491   int64_t Stride = StepVal / Size;
1492   int64_t Rem = StepVal % Size;
1493   if (Rem)
1494     return std::nullopt;
1495 
1496   if (!ShouldCheckWrap)
1497     return Stride;
1498 
1499   // The address calculation must not wrap. Otherwise, a dependence could be
1500   // inverted.
1501   if (isNoWrapAddRec(Ptr, AR, PSE, Lp))
1502     return Stride;
1503 
1504   // An nusw getelementptr that is an AddRec cannot wrap. If it would wrap,
1505   // the distance between the previously accessed location and the wrapped
1506   // location will be larger than half the pointer index type space. In that
1507   // case, the GEP would be  poison and any memory access dependent on it would
1508   // be immediate UB when executed.
1509   if (auto *GEP = dyn_cast<GetElementPtrInst>(Ptr);
1510       GEP && GEP->hasNoUnsignedSignedWrap())
1511     return Stride;
1512 
1513   // If the null pointer is undefined, then a access sequence which would
1514   // otherwise access it can be assumed not to unsigned wrap.  Note that this
1515   // assumes the object in memory is aligned to the natural alignment.
1516   unsigned AddrSpace = Ty->getPointerAddressSpace();
1517   if (!NullPointerIsDefined(Lp->getHeader()->getParent(), AddrSpace) &&
1518       (Stride == 1 || Stride == -1))
1519     return Stride;
1520 
1521   if (Assume) {
1522     PSE.setNoOverflow(Ptr, SCEVWrapPredicate::IncrementNUSW);
1523     LLVM_DEBUG(dbgs() << "LAA: Pointer may wrap:\n"
1524                       << "LAA:   Pointer: " << *Ptr << "\n"
1525                       << "LAA:   SCEV: " << *AR << "\n"
1526                       << "LAA:   Added an overflow assumption\n");
1527     return Stride;
1528   }
1529   LLVM_DEBUG(
1530       dbgs() << "LAA: Bad stride - Pointer may wrap in the address space "
1531              << *Ptr << " SCEV: " << *AR << "\n");
1532   return std::nullopt;
1533 }
1534 
1535 std::optional<int> llvm::getPointersDiff(Type *ElemTyA, Value *PtrA,
1536                                          Type *ElemTyB, Value *PtrB,
1537                                          const DataLayout &DL,
1538                                          ScalarEvolution &SE, bool StrictCheck,
1539                                          bool CheckType) {
1540   assert(PtrA && PtrB && "Expected non-nullptr pointers.");
1541 
1542   // Make sure that A and B are different pointers.
1543   if (PtrA == PtrB)
1544     return 0;
1545 
1546   // Make sure that the element types are the same if required.
1547   if (CheckType && ElemTyA != ElemTyB)
1548     return std::nullopt;
1549 
1550   unsigned ASA = PtrA->getType()->getPointerAddressSpace();
1551   unsigned ASB = PtrB->getType()->getPointerAddressSpace();
1552 
1553   // Check that the address spaces match.
1554   if (ASA != ASB)
1555     return std::nullopt;
1556   unsigned IdxWidth = DL.getIndexSizeInBits(ASA);
1557 
1558   APInt OffsetA(IdxWidth, 0), OffsetB(IdxWidth, 0);
1559   const Value *PtrA1 = PtrA->stripAndAccumulateConstantOffsets(
1560       DL, OffsetA, /*AllowNonInbounds=*/true);
1561   const Value *PtrB1 = PtrB->stripAndAccumulateConstantOffsets(
1562       DL, OffsetB, /*AllowNonInbounds=*/true);
1563 
1564   int Val;
1565   if (PtrA1 == PtrB1) {
1566     // Retrieve the address space again as pointer stripping now tracks through
1567     // `addrspacecast`.
1568     ASA = cast<PointerType>(PtrA1->getType())->getAddressSpace();
1569     ASB = cast<PointerType>(PtrB1->getType())->getAddressSpace();
1570     // Check that the address spaces match and that the pointers are valid.
1571     if (ASA != ASB)
1572       return std::nullopt;
1573 
1574     IdxWidth = DL.getIndexSizeInBits(ASA);
1575     OffsetA = OffsetA.sextOrTrunc(IdxWidth);
1576     OffsetB = OffsetB.sextOrTrunc(IdxWidth);
1577 
1578     OffsetB -= OffsetA;
1579     Val = OffsetB.getSExtValue();
1580   } else {
1581     // Otherwise compute the distance with SCEV between the base pointers.
1582     const SCEV *PtrSCEVA = SE.getSCEV(PtrA);
1583     const SCEV *PtrSCEVB = SE.getSCEV(PtrB);
1584     std::optional<APInt> Diff =
1585         SE.computeConstantDifference(PtrSCEVB, PtrSCEVA);
1586     if (!Diff)
1587       return std::nullopt;
1588     Val = Diff->getSExtValue();
1589   }
1590   int Size = DL.getTypeStoreSize(ElemTyA);
1591   int Dist = Val / Size;
1592 
1593   // Ensure that the calculated distance matches the type-based one after all
1594   // the bitcasts removal in the provided pointers.
1595   if (!StrictCheck || Dist * Size == Val)
1596     return Dist;
1597   return std::nullopt;
1598 }
1599 
1600 bool llvm::sortPtrAccesses(ArrayRef<Value *> VL, Type *ElemTy,
1601                            const DataLayout &DL, ScalarEvolution &SE,
1602                            SmallVectorImpl<unsigned> &SortedIndices) {
1603   assert(llvm::all_of(
1604              VL, [](const Value *V) { return V->getType()->isPointerTy(); }) &&
1605          "Expected list of pointer operands.");
1606   // Walk over the pointers, and map each of them to an offset relative to
1607   // first pointer in the array.
1608   Value *Ptr0 = VL[0];
1609 
1610   using DistOrdPair = std::pair<int64_t, int>;
1611   auto Compare = llvm::less_first();
1612   std::set<DistOrdPair, decltype(Compare)> Offsets(Compare);
1613   Offsets.emplace(0, 0);
1614   bool IsConsecutive = true;
1615   for (auto [Idx, Ptr] : drop_begin(enumerate(VL))) {
1616     std::optional<int> Diff = getPointersDiff(ElemTy, Ptr0, ElemTy, Ptr, DL, SE,
1617                                               /*StrictCheck=*/true);
1618     if (!Diff)
1619       return false;
1620 
1621     // Check if the pointer with the same offset is found.
1622     int64_t Offset = *Diff;
1623     auto [It, IsInserted] = Offsets.emplace(Offset, Idx);
1624     if (!IsInserted)
1625       return false;
1626     // Consecutive order if the inserted element is the last one.
1627     IsConsecutive &= std::next(It) == Offsets.end();
1628   }
1629   SortedIndices.clear();
1630   if (!IsConsecutive) {
1631     // Fill SortedIndices array only if it is non-consecutive.
1632     SortedIndices.resize(VL.size());
1633     for (auto [Idx, Off] : enumerate(Offsets))
1634       SortedIndices[Idx] = Off.second;
1635   }
1636   return true;
1637 }
1638 
1639 /// Returns true if the memory operations \p A and \p B are consecutive.
1640 bool llvm::isConsecutiveAccess(Value *A, Value *B, const DataLayout &DL,
1641                                ScalarEvolution &SE, bool CheckType) {
1642   Value *PtrA = getLoadStorePointerOperand(A);
1643   Value *PtrB = getLoadStorePointerOperand(B);
1644   if (!PtrA || !PtrB)
1645     return false;
1646   Type *ElemTyA = getLoadStoreType(A);
1647   Type *ElemTyB = getLoadStoreType(B);
1648   std::optional<int> Diff =
1649       getPointersDiff(ElemTyA, PtrA, ElemTyB, PtrB, DL, SE,
1650                       /*StrictCheck=*/true, CheckType);
1651   return Diff && *Diff == 1;
1652 }
1653 
1654 void MemoryDepChecker::addAccess(StoreInst *SI) {
1655   visitPointers(SI->getPointerOperand(), *InnermostLoop,
1656                 [this, SI](Value *Ptr) {
1657                   Accesses[MemAccessInfo(Ptr, true)].push_back(AccessIdx);
1658                   InstMap.push_back(SI);
1659                   ++AccessIdx;
1660                 });
1661 }
1662 
1663 void MemoryDepChecker::addAccess(LoadInst *LI) {
1664   visitPointers(LI->getPointerOperand(), *InnermostLoop,
1665                 [this, LI](Value *Ptr) {
1666                   Accesses[MemAccessInfo(Ptr, false)].push_back(AccessIdx);
1667                   InstMap.push_back(LI);
1668                   ++AccessIdx;
1669                 });
1670 }
1671 
1672 MemoryDepChecker::VectorizationSafetyStatus
1673 MemoryDepChecker::Dependence::isSafeForVectorization(DepType Type) {
1674   switch (Type) {
1675   case NoDep:
1676   case Forward:
1677   case BackwardVectorizable:
1678     return VectorizationSafetyStatus::Safe;
1679 
1680   case Unknown:
1681     return VectorizationSafetyStatus::PossiblySafeWithRtChecks;
1682   case ForwardButPreventsForwarding:
1683   case Backward:
1684   case BackwardVectorizableButPreventsForwarding:
1685   case IndirectUnsafe:
1686     return VectorizationSafetyStatus::Unsafe;
1687   }
1688   llvm_unreachable("unexpected DepType!");
1689 }
1690 
1691 bool MemoryDepChecker::Dependence::isBackward() const {
1692   switch (Type) {
1693   case NoDep:
1694   case Forward:
1695   case ForwardButPreventsForwarding:
1696   case Unknown:
1697   case IndirectUnsafe:
1698     return false;
1699 
1700   case BackwardVectorizable:
1701   case Backward:
1702   case BackwardVectorizableButPreventsForwarding:
1703     return true;
1704   }
1705   llvm_unreachable("unexpected DepType!");
1706 }
1707 
1708 bool MemoryDepChecker::Dependence::isPossiblyBackward() const {
1709   return isBackward() || Type == Unknown || Type == IndirectUnsafe;
1710 }
1711 
1712 bool MemoryDepChecker::Dependence::isForward() const {
1713   switch (Type) {
1714   case Forward:
1715   case ForwardButPreventsForwarding:
1716     return true;
1717 
1718   case NoDep:
1719   case Unknown:
1720   case BackwardVectorizable:
1721   case Backward:
1722   case BackwardVectorizableButPreventsForwarding:
1723   case IndirectUnsafe:
1724     return false;
1725   }
1726   llvm_unreachable("unexpected DepType!");
1727 }
1728 
1729 bool MemoryDepChecker::couldPreventStoreLoadForward(uint64_t Distance,
1730                                                     uint64_t TypeByteSize) {
1731   // If loads occur at a distance that is not a multiple of a feasible vector
1732   // factor store-load forwarding does not take place.
1733   // Positive dependences might cause troubles because vectorizing them might
1734   // prevent store-load forwarding making vectorized code run a lot slower.
1735   //   a[i] = a[i-3] ^ a[i-8];
1736   //   The stores to a[i:i+1] don't align with the stores to a[i-3:i-2] and
1737   //   hence on your typical architecture store-load forwarding does not take
1738   //   place. Vectorizing in such cases does not make sense.
1739   // Store-load forwarding distance.
1740 
1741   // After this many iterations store-to-load forwarding conflicts should not
1742   // cause any slowdowns.
1743   const uint64_t NumItersForStoreLoadThroughMemory = 8 * TypeByteSize;
1744   // Maximum vector factor.
1745   uint64_t MaxVFWithoutSLForwardIssues = std::min(
1746       VectorizerParams::MaxVectorWidth * TypeByteSize, MinDepDistBytes);
1747 
1748   // Compute the smallest VF at which the store and load would be misaligned.
1749   for (uint64_t VF = 2 * TypeByteSize; VF <= MaxVFWithoutSLForwardIssues;
1750        VF *= 2) {
1751     // If the number of vector iteration between the store and the load are
1752     // small we could incur conflicts.
1753     if (Distance % VF && Distance / VF < NumItersForStoreLoadThroughMemory) {
1754       MaxVFWithoutSLForwardIssues = (VF >> 1);
1755       break;
1756     }
1757   }
1758 
1759   if (MaxVFWithoutSLForwardIssues < 2 * TypeByteSize) {
1760     LLVM_DEBUG(
1761         dbgs() << "LAA: Distance " << Distance
1762                << " that could cause a store-load forwarding conflict\n");
1763     return true;
1764   }
1765 
1766   if (MaxVFWithoutSLForwardIssues < MinDepDistBytes &&
1767       MaxVFWithoutSLForwardIssues !=
1768           VectorizerParams::MaxVectorWidth * TypeByteSize)
1769     MinDepDistBytes = MaxVFWithoutSLForwardIssues;
1770   return false;
1771 }
1772 
1773 void MemoryDepChecker::mergeInStatus(VectorizationSafetyStatus S) {
1774   if (Status < S)
1775     Status = S;
1776 }
1777 
1778 /// Given a dependence-distance \p Dist between two
1779 /// memory accesses, that have strides in the same direction whose absolute
1780 /// value of the maximum stride is given in \p MaxStride, and that have the same
1781 /// type size \p TypeByteSize, in a loop whose maximum backedge taken count is
1782 /// \p MaxBTC, check if it is possible to prove statically that the dependence
1783 /// distance is larger than the range that the accesses will travel through the
1784 /// execution of the loop. If so, return true; false otherwise. This is useful
1785 /// for example in loops such as the following (PR31098):
1786 ///     for (i = 0; i < D; ++i) {
1787 ///                = out[i];
1788 ///       out[i+D] =
1789 ///     }
1790 static bool isSafeDependenceDistance(const DataLayout &DL, ScalarEvolution &SE,
1791                                      const SCEV &MaxBTC, const SCEV &Dist,
1792                                      uint64_t MaxStride,
1793                                      uint64_t TypeByteSize) {
1794 
1795   // If we can prove that
1796   //      (**) |Dist| > MaxBTC * Step
1797   // where Step is the absolute stride of the memory accesses in bytes,
1798   // then there is no dependence.
1799   //
1800   // Rationale:
1801   // We basically want to check if the absolute distance (|Dist/Step|)
1802   // is >= the loop iteration count (or > MaxBTC).
1803   // This is equivalent to the Strong SIV Test (Practical Dependence Testing,
1804   // Section 4.2.1); Note, that for vectorization it is sufficient to prove
1805   // that the dependence distance is >= VF; This is checked elsewhere.
1806   // But in some cases we can prune dependence distances early, and
1807   // even before selecting the VF, and without a runtime test, by comparing
1808   // the distance against the loop iteration count. Since the vectorized code
1809   // will be executed only if LoopCount >= VF, proving distance >= LoopCount
1810   // also guarantees that distance >= VF.
1811   //
1812   const uint64_t ByteStride = MaxStride * TypeByteSize;
1813   const SCEV *Step = SE.getConstant(MaxBTC.getType(), ByteStride);
1814   const SCEV *Product = SE.getMulExpr(&MaxBTC, Step);
1815 
1816   const SCEV *CastedDist = &Dist;
1817   const SCEV *CastedProduct = Product;
1818   uint64_t DistTypeSizeBits = DL.getTypeSizeInBits(Dist.getType());
1819   uint64_t ProductTypeSizeBits = DL.getTypeSizeInBits(Product->getType());
1820 
1821   // The dependence distance can be positive/negative, so we sign extend Dist;
1822   // The multiplication of the absolute stride in bytes and the
1823   // backedgeTakenCount is non-negative, so we zero extend Product.
1824   if (DistTypeSizeBits > ProductTypeSizeBits)
1825     CastedProduct = SE.getZeroExtendExpr(Product, Dist.getType());
1826   else
1827     CastedDist = SE.getNoopOrSignExtend(&Dist, Product->getType());
1828 
1829   // Is  Dist - (MaxBTC * Step) > 0 ?
1830   // (If so, then we have proven (**) because |Dist| >= Dist)
1831   const SCEV *Minus = SE.getMinusSCEV(CastedDist, CastedProduct);
1832   if (SE.isKnownPositive(Minus))
1833     return true;
1834 
1835   // Second try: Is  -Dist - (MaxBTC * Step) > 0 ?
1836   // (If so, then we have proven (**) because |Dist| >= -1*Dist)
1837   const SCEV *NegDist = SE.getNegativeSCEV(CastedDist);
1838   Minus = SE.getMinusSCEV(NegDist, CastedProduct);
1839   return SE.isKnownPositive(Minus);
1840 }
1841 
1842 /// Check the dependence for two accesses with the same stride \p Stride.
1843 /// \p Distance is the positive distance and \p TypeByteSize is type size in
1844 /// bytes.
1845 ///
1846 /// \returns true if they are independent.
1847 static bool areStridedAccessesIndependent(uint64_t Distance, uint64_t Stride,
1848                                           uint64_t TypeByteSize) {
1849   assert(Stride > 1 && "The stride must be greater than 1");
1850   assert(TypeByteSize > 0 && "The type size in byte must be non-zero");
1851   assert(Distance > 0 && "The distance must be non-zero");
1852 
1853   // Skip if the distance is not multiple of type byte size.
1854   if (Distance % TypeByteSize)
1855     return false;
1856 
1857   uint64_t ScaledDist = Distance / TypeByteSize;
1858 
1859   // No dependence if the scaled distance is not multiple of the stride.
1860   // E.g.
1861   //      for (i = 0; i < 1024 ; i += 4)
1862   //        A[i+2] = A[i] + 1;
1863   //
1864   // Two accesses in memory (scaled distance is 2, stride is 4):
1865   //     | A[0] |      |      |      | A[4] |      |      |      |
1866   //     |      |      | A[2] |      |      |      | A[6] |      |
1867   //
1868   // E.g.
1869   //      for (i = 0; i < 1024 ; i += 3)
1870   //        A[i+4] = A[i] + 1;
1871   //
1872   // Two accesses in memory (scaled distance is 4, stride is 3):
1873   //     | A[0] |      |      | A[3] |      |      | A[6] |      |      |
1874   //     |      |      |      |      | A[4] |      |      | A[7] |      |
1875   return ScaledDist % Stride;
1876 }
1877 
1878 std::variant<MemoryDepChecker::Dependence::DepType,
1879              MemoryDepChecker::DepDistanceStrideAndSizeInfo>
1880 MemoryDepChecker::getDependenceDistanceStrideAndSize(
1881     const AccessAnalysis::MemAccessInfo &A, Instruction *AInst,
1882     const AccessAnalysis::MemAccessInfo &B, Instruction *BInst) {
1883   const auto &DL = InnermostLoop->getHeader()->getDataLayout();
1884   auto &SE = *PSE.getSE();
1885   const auto &[APtr, AIsWrite] = A;
1886   const auto &[BPtr, BIsWrite] = B;
1887 
1888   // Two reads are independent.
1889   if (!AIsWrite && !BIsWrite)
1890     return MemoryDepChecker::Dependence::NoDep;
1891 
1892   Type *ATy = getLoadStoreType(AInst);
1893   Type *BTy = getLoadStoreType(BInst);
1894 
1895   // We cannot check pointers in different address spaces.
1896   if (APtr->getType()->getPointerAddressSpace() !=
1897       BPtr->getType()->getPointerAddressSpace())
1898     return MemoryDepChecker::Dependence::Unknown;
1899 
1900   std::optional<int64_t> StrideAPtr =
1901       getPtrStride(PSE, ATy, APtr, InnermostLoop, SymbolicStrides, true, true);
1902   std::optional<int64_t> StrideBPtr =
1903       getPtrStride(PSE, BTy, BPtr, InnermostLoop, SymbolicStrides, true, true);
1904 
1905   const SCEV *Src = PSE.getSCEV(APtr);
1906   const SCEV *Sink = PSE.getSCEV(BPtr);
1907 
1908   // If the induction step is negative we have to invert source and sink of the
1909   // dependence when measuring the distance between them. We should not swap
1910   // AIsWrite with BIsWrite, as their uses expect them in program order.
1911   if (StrideAPtr && *StrideAPtr < 0) {
1912     std::swap(Src, Sink);
1913     std::swap(AInst, BInst);
1914     std::swap(ATy, BTy);
1915     std::swap(StrideAPtr, StrideBPtr);
1916   }
1917 
1918   const SCEV *Dist = SE.getMinusSCEV(Sink, Src);
1919 
1920   LLVM_DEBUG(dbgs() << "LAA: Src Scev: " << *Src << "Sink Scev: " << *Sink
1921                     << "\n");
1922   LLVM_DEBUG(dbgs() << "LAA: Distance for " << *AInst << " to " << *BInst
1923                     << ": " << *Dist << "\n");
1924 
1925   // Check if we can prove that Sink only accesses memory after Src's end or
1926   // vice versa. At the moment this is limited to cases where either source or
1927   // sink are loop invariant to avoid compile-time increases. This is not
1928   // required for correctness.
1929   if (SE.isLoopInvariant(Src, InnermostLoop) ||
1930       SE.isLoopInvariant(Sink, InnermostLoop)) {
1931     const SCEV *MaxBECount = PSE.getSymbolicMaxBackedgeTakenCount();
1932     const auto &[SrcStart_, SrcEnd_] = getStartAndEndForAccess(
1933         InnermostLoop, Src, ATy, MaxBECount, PSE.getSE(), &PointerBounds);
1934     const auto &[SinkStart_, SinkEnd_] = getStartAndEndForAccess(
1935         InnermostLoop, Sink, BTy, MaxBECount, PSE.getSE(), &PointerBounds);
1936     if (!isa<SCEVCouldNotCompute>(SrcStart_) &&
1937         !isa<SCEVCouldNotCompute>(SrcEnd_) &&
1938         !isa<SCEVCouldNotCompute>(SinkStart_) &&
1939         !isa<SCEVCouldNotCompute>(SinkEnd_)) {
1940       if (!LoopGuards)
1941         LoopGuards.emplace(
1942             ScalarEvolution::LoopGuards::collect(InnermostLoop, SE));
1943       auto SrcEnd = SE.applyLoopGuards(SrcEnd_, *LoopGuards);
1944       auto SinkStart = SE.applyLoopGuards(SinkStart_, *LoopGuards);
1945       if (SE.isKnownPredicate(CmpInst::ICMP_ULE, SrcEnd, SinkStart))
1946         return MemoryDepChecker::Dependence::NoDep;
1947 
1948       auto SinkEnd = SE.applyLoopGuards(SinkEnd_, *LoopGuards);
1949       auto SrcStart = SE.applyLoopGuards(SrcStart_, *LoopGuards);
1950       if (SE.isKnownPredicate(CmpInst::ICMP_ULE, SinkEnd, SrcStart))
1951         return MemoryDepChecker::Dependence::NoDep;
1952     }
1953   }
1954 
1955   // Need accesses with constant strides and the same direction for further
1956   // dependence analysis. We don't want to vectorize "A[B[i]] += ..." and
1957   // similar code or pointer arithmetic that could wrap in the address space.
1958 
1959   // If either Src or Sink are not strided (i.e. not a non-wrapping AddRec) and
1960   // not loop-invariant (stride will be 0 in that case), we cannot analyze the
1961   // dependence further and also cannot generate runtime checks.
1962   if (!StrideAPtr || !StrideBPtr) {
1963     LLVM_DEBUG(dbgs() << "Pointer access with non-constant stride\n");
1964     return MemoryDepChecker::Dependence::IndirectUnsafe;
1965   }
1966 
1967   int64_t StrideAPtrInt = *StrideAPtr;
1968   int64_t StrideBPtrInt = *StrideBPtr;
1969   LLVM_DEBUG(dbgs() << "LAA:  Src induction step: " << StrideAPtrInt
1970                     << " Sink induction step: " << StrideBPtrInt << "\n");
1971   // At least Src or Sink are loop invariant and the other is strided or
1972   // invariant. We can generate a runtime check to disambiguate the accesses.
1973   if (!StrideAPtrInt || !StrideBPtrInt)
1974     return MemoryDepChecker::Dependence::Unknown;
1975 
1976   // Both Src and Sink have a constant stride, check if they are in the same
1977   // direction.
1978   if ((StrideAPtrInt > 0) != (StrideBPtrInt > 0)) {
1979     LLVM_DEBUG(
1980         dbgs() << "Pointer access with strides in different directions\n");
1981     return MemoryDepChecker::Dependence::Unknown;
1982   }
1983 
1984   uint64_t TypeByteSize = DL.getTypeAllocSize(ATy);
1985   bool HasSameSize =
1986       DL.getTypeStoreSizeInBits(ATy) == DL.getTypeStoreSizeInBits(BTy);
1987   if (!HasSameSize)
1988     TypeByteSize = 0;
1989 
1990   StrideAPtrInt = std::abs(StrideAPtrInt);
1991   StrideBPtrInt = std::abs(StrideBPtrInt);
1992 
1993   uint64_t MaxStride = std::max(StrideAPtrInt, StrideBPtrInt);
1994 
1995   std::optional<uint64_t> CommonStride;
1996   if (StrideAPtrInt == StrideBPtrInt)
1997     CommonStride = StrideAPtrInt;
1998 
1999   // TODO: Historically, we don't retry with runtime checks unless the
2000   // (unscaled) strides are the same. Fix this once the condition for runtime
2001   // checks in isDependent is fixed.
2002   bool ShouldRetryWithRuntimeCheck = CommonStride.has_value();
2003 
2004   return DepDistanceStrideAndSizeInfo(Dist, MaxStride, CommonStride,
2005                                       ShouldRetryWithRuntimeCheck, TypeByteSize,
2006                                       AIsWrite, BIsWrite);
2007 }
2008 
2009 MemoryDepChecker::Dependence::DepType
2010 MemoryDepChecker::isDependent(const MemAccessInfo &A, unsigned AIdx,
2011                               const MemAccessInfo &B, unsigned BIdx) {
2012   assert(AIdx < BIdx && "Must pass arguments in program order");
2013 
2014   // Get the dependence distance, stride, type size and what access writes for
2015   // the dependence between A and B.
2016   auto Res =
2017       getDependenceDistanceStrideAndSize(A, InstMap[AIdx], B, InstMap[BIdx]);
2018   if (std::holds_alternative<Dependence::DepType>(Res))
2019     return std::get<Dependence::DepType>(Res);
2020 
2021   auto &[Dist, MaxStride, CommonStride, ShouldRetryWithRuntimeCheck,
2022          TypeByteSize, AIsWrite, BIsWrite] =
2023       std::get<DepDistanceStrideAndSizeInfo>(Res);
2024   bool HasSameSize = TypeByteSize > 0;
2025 
2026   if (isa<SCEVCouldNotCompute>(Dist)) {
2027     // TODO: Relax requirement that there is a common unscaled stride to retry
2028     // with non-constant distance dependencies.
2029     FoundNonConstantDistanceDependence |= ShouldRetryWithRuntimeCheck;
2030     LLVM_DEBUG(dbgs() << "LAA: Dependence because of uncomputable distance.\n");
2031     return Dependence::Unknown;
2032   }
2033 
2034   ScalarEvolution &SE = *PSE.getSE();
2035   auto &DL = InnermostLoop->getHeader()->getDataLayout();
2036 
2037   // If the distance between the acecsses is larger than their maximum absolute
2038   // stride multiplied by the symbolic maximum backedge taken count (which is an
2039   // upper bound of the number of iterations), the accesses are independet, i.e.
2040   // they are far enough appart that accesses won't access the same location
2041   // across all loop ierations.
2042   if (HasSameSize && isSafeDependenceDistance(
2043                          DL, SE, *(PSE.getSymbolicMaxBackedgeTakenCount()),
2044                          *Dist, MaxStride, TypeByteSize))
2045     return Dependence::NoDep;
2046 
2047   const SCEVConstant *ConstDist = dyn_cast<SCEVConstant>(Dist);
2048 
2049   // Attempt to prove strided accesses independent.
2050   if (ConstDist) {
2051     uint64_t Distance = ConstDist->getAPInt().abs().getZExtValue();
2052 
2053     // If the distance between accesses and their strides are known constants,
2054     // check whether the accesses interlace each other.
2055     if (Distance > 0 && CommonStride && CommonStride > 1 && HasSameSize &&
2056         areStridedAccessesIndependent(Distance, *CommonStride, TypeByteSize)) {
2057       LLVM_DEBUG(dbgs() << "LAA: Strided accesses are independent\n");
2058       return Dependence::NoDep;
2059     }
2060   } else {
2061     if (!LoopGuards)
2062       LoopGuards.emplace(
2063           ScalarEvolution::LoopGuards::collect(InnermostLoop, SE));
2064     Dist = SE.applyLoopGuards(Dist, *LoopGuards);
2065   }
2066 
2067   // Negative distances are not plausible dependencies.
2068   if (SE.isKnownNonPositive(Dist)) {
2069     if (SE.isKnownNonNegative(Dist)) {
2070       if (HasSameSize) {
2071         // Write to the same location with the same size.
2072         return Dependence::Forward;
2073       }
2074       LLVM_DEBUG(dbgs() << "LAA: possibly zero dependence difference but "
2075                            "different type sizes\n");
2076       return Dependence::Unknown;
2077     }
2078 
2079     bool IsTrueDataDependence = (AIsWrite && !BIsWrite);
2080     // Check if the first access writes to a location that is read in a later
2081     // iteration, where the distance between them is not a multiple of a vector
2082     // factor and relatively small.
2083     //
2084     // NOTE: There is no need to update MaxSafeVectorWidthInBits after call to
2085     // couldPreventStoreLoadForward, even if it changed MinDepDistBytes, since a
2086     // forward dependency will allow vectorization using any width.
2087 
2088     if (IsTrueDataDependence && EnableForwardingConflictDetection) {
2089       if (!ConstDist) {
2090         // TODO: FoundNonConstantDistanceDependence is used as a necessary
2091         // condition to consider retrying with runtime checks. Historically, we
2092         // did not set it when strides were different but there is no inherent
2093         // reason to.
2094         FoundNonConstantDistanceDependence |= ShouldRetryWithRuntimeCheck;
2095         return Dependence::Unknown;
2096       }
2097       if (!HasSameSize ||
2098           couldPreventStoreLoadForward(
2099               ConstDist->getAPInt().abs().getZExtValue(), TypeByteSize)) {
2100         LLVM_DEBUG(
2101             dbgs() << "LAA: Forward but may prevent st->ld forwarding\n");
2102         return Dependence::ForwardButPreventsForwarding;
2103       }
2104     }
2105 
2106     LLVM_DEBUG(dbgs() << "LAA: Dependence is negative\n");
2107     return Dependence::Forward;
2108   }
2109 
2110   int64_t MinDistance = SE.getSignedRangeMin(Dist).getSExtValue();
2111   // Below we only handle strictly positive distances.
2112   if (MinDistance <= 0) {
2113     FoundNonConstantDistanceDependence |= ShouldRetryWithRuntimeCheck;
2114     return Dependence::Unknown;
2115   }
2116 
2117   if (!ConstDist) {
2118     // Previously this case would be treated as Unknown, possibly setting
2119     // FoundNonConstantDistanceDependence to force re-trying with runtime
2120     // checks. Until the TODO below is addressed, set it here to preserve
2121     // original behavior w.r.t. re-trying with runtime checks.
2122     // TODO: FoundNonConstantDistanceDependence is used as a necessary
2123     // condition to consider retrying with runtime checks. Historically, we
2124     // did not set it when strides were different but there is no inherent
2125     // reason to.
2126     FoundNonConstantDistanceDependence |= ShouldRetryWithRuntimeCheck;
2127   }
2128 
2129   if (!HasSameSize) {
2130     LLVM_DEBUG(dbgs() << "LAA: ReadWrite-Write positive dependency with "
2131                          "different type sizes\n");
2132     return Dependence::Unknown;
2133   }
2134 
2135   if (!CommonStride)
2136     return Dependence::Unknown;
2137 
2138   // Bail out early if passed-in parameters make vectorization not feasible.
2139   unsigned ForcedFactor = (VectorizerParams::VectorizationFactor ?
2140                            VectorizerParams::VectorizationFactor : 1);
2141   unsigned ForcedUnroll = (VectorizerParams::VectorizationInterleave ?
2142                            VectorizerParams::VectorizationInterleave : 1);
2143   // The minimum number of iterations for a vectorized/unrolled version.
2144   unsigned MinNumIter = std::max(ForcedFactor * ForcedUnroll, 2U);
2145 
2146   // It's not vectorizable if the distance is smaller than the minimum distance
2147   // needed for a vectroized/unrolled version. Vectorizing one iteration in
2148   // front needs TypeByteSize * Stride. Vectorizing the last iteration needs
2149   // TypeByteSize (No need to plus the last gap distance).
2150   //
2151   // E.g. Assume one char is 1 byte in memory and one int is 4 bytes.
2152   //      foo(int *A) {
2153   //        int *B = (int *)((char *)A + 14);
2154   //        for (i = 0 ; i < 1024 ; i += 2)
2155   //          B[i] = A[i] + 1;
2156   //      }
2157   //
2158   // Two accesses in memory (stride is 2):
2159   //     | A[0] |      | A[2] |      | A[4] |      | A[6] |      |
2160   //                              | B[0] |      | B[2] |      | B[4] |
2161   //
2162   // MinDistance needs for vectorizing iterations except the last iteration:
2163   // 4 * 2 * (MinNumIter - 1). MinDistance needs for the last iteration: 4.
2164   // So the minimum distance needed is: 4 * 2 * (MinNumIter - 1) + 4.
2165   //
2166   // If MinNumIter is 2, it is vectorizable as the minimum distance needed is
2167   // 12, which is less than distance.
2168   //
2169   // If MinNumIter is 4 (Say if a user forces the vectorization factor to be 4),
2170   // the minimum distance needed is 28, which is greater than distance. It is
2171   // not safe to do vectorization.
2172 
2173   // We know that Dist is positive, but it may not be constant. Use the signed
2174   // minimum for computations below, as this ensures we compute the closest
2175   // possible dependence distance.
2176   uint64_t MinDistanceNeeded =
2177       TypeByteSize * *CommonStride * (MinNumIter - 1) + TypeByteSize;
2178   if (MinDistanceNeeded > static_cast<uint64_t>(MinDistance)) {
2179     if (!ConstDist) {
2180       // For non-constant distances, we checked the lower bound of the
2181       // dependence distance and the distance may be larger at runtime (and safe
2182       // for vectorization). Classify it as Unknown, so we re-try with runtime
2183       // checks.
2184       return Dependence::Unknown;
2185     }
2186     LLVM_DEBUG(dbgs() << "LAA: Failure because of positive minimum distance "
2187                       << MinDistance << '\n');
2188     return Dependence::Backward;
2189   }
2190 
2191   // Unsafe if the minimum distance needed is greater than smallest dependence
2192   // distance distance.
2193   if (MinDistanceNeeded > MinDepDistBytes) {
2194     LLVM_DEBUG(dbgs() << "LAA: Failure because it needs at least "
2195                       << MinDistanceNeeded << " size in bytes\n");
2196     return Dependence::Backward;
2197   }
2198 
2199   // Positive distance bigger than max vectorization factor.
2200   // FIXME: Should use max factor instead of max distance in bytes, which could
2201   // not handle different types.
2202   // E.g. Assume one char is 1 byte in memory and one int is 4 bytes.
2203   //      void foo (int *A, char *B) {
2204   //        for (unsigned i = 0; i < 1024; i++) {
2205   //          A[i+2] = A[i] + 1;
2206   //          B[i+2] = B[i] + 1;
2207   //        }
2208   //      }
2209   //
2210   // This case is currently unsafe according to the max safe distance. If we
2211   // analyze the two accesses on array B, the max safe dependence distance
2212   // is 2. Then we analyze the accesses on array A, the minimum distance needed
2213   // is 8, which is less than 2 and forbidden vectorization, But actually
2214   // both A and B could be vectorized by 2 iterations.
2215   MinDepDistBytes =
2216       std::min(static_cast<uint64_t>(MinDistance), MinDepDistBytes);
2217 
2218   bool IsTrueDataDependence = (!AIsWrite && BIsWrite);
2219   uint64_t MinDepDistBytesOld = MinDepDistBytes;
2220   if (IsTrueDataDependence && EnableForwardingConflictDetection && ConstDist &&
2221       couldPreventStoreLoadForward(MinDistance, TypeByteSize)) {
2222     // Sanity check that we didn't update MinDepDistBytes when calling
2223     // couldPreventStoreLoadForward
2224     assert(MinDepDistBytes == MinDepDistBytesOld &&
2225            "An update to MinDepDistBytes requires an update to "
2226            "MaxSafeVectorWidthInBits");
2227     (void)MinDepDistBytesOld;
2228     return Dependence::BackwardVectorizableButPreventsForwarding;
2229   }
2230 
2231   // An update to MinDepDistBytes requires an update to MaxSafeVectorWidthInBits
2232   // since there is a backwards dependency.
2233   uint64_t MaxVF = MinDepDistBytes / (TypeByteSize * *CommonStride);
2234   LLVM_DEBUG(dbgs() << "LAA: Positive min distance " << MinDistance
2235                     << " with max VF = " << MaxVF << '\n');
2236 
2237   uint64_t MaxVFInBits = MaxVF * TypeByteSize * 8;
2238   if (!ConstDist && MaxVFInBits < MaxTargetVectorWidthInBits) {
2239     // For non-constant distances, we checked the lower bound of the dependence
2240     // distance and the distance may be larger at runtime (and safe for
2241     // vectorization). Classify it as Unknown, so we re-try with runtime checks.
2242     return Dependence::Unknown;
2243   }
2244 
2245   MaxSafeVectorWidthInBits = std::min(MaxSafeVectorWidthInBits, MaxVFInBits);
2246   return Dependence::BackwardVectorizable;
2247 }
2248 
2249 bool MemoryDepChecker::areDepsSafe(const DepCandidates &AccessSets,
2250                                    const MemAccessInfoList &CheckDeps) {
2251 
2252   MinDepDistBytes = -1;
2253   SmallPtrSet<MemAccessInfo, 8> Visited;
2254   for (MemAccessInfo CurAccess : CheckDeps) {
2255     if (Visited.count(CurAccess))
2256       continue;
2257 
2258     // Get the relevant memory access set.
2259     EquivalenceClasses<MemAccessInfo>::iterator I =
2260       AccessSets.findValue(AccessSets.getLeaderValue(CurAccess));
2261 
2262     // Check accesses within this set.
2263     EquivalenceClasses<MemAccessInfo>::member_iterator AI =
2264         AccessSets.member_begin(I);
2265     EquivalenceClasses<MemAccessInfo>::member_iterator AE =
2266         AccessSets.member_end();
2267 
2268     // Check every access pair.
2269     while (AI != AE) {
2270       Visited.insert(*AI);
2271       bool AIIsWrite = AI->getInt();
2272       // Check loads only against next equivalent class, but stores also against
2273       // other stores in the same equivalence class - to the same address.
2274       EquivalenceClasses<MemAccessInfo>::member_iterator OI =
2275           (AIIsWrite ? AI : std::next(AI));
2276       while (OI != AE) {
2277         // Check every accessing instruction pair in program order.
2278         for (std::vector<unsigned>::iterator I1 = Accesses[*AI].begin(),
2279              I1E = Accesses[*AI].end(); I1 != I1E; ++I1)
2280           // Scan all accesses of another equivalence class, but only the next
2281           // accesses of the same equivalent class.
2282           for (std::vector<unsigned>::iterator
2283                    I2 = (OI == AI ? std::next(I1) : Accesses[*OI].begin()),
2284                    I2E = (OI == AI ? I1E : Accesses[*OI].end());
2285                I2 != I2E; ++I2) {
2286             auto A = std::make_pair(&*AI, *I1);
2287             auto B = std::make_pair(&*OI, *I2);
2288 
2289             assert(*I1 != *I2);
2290             if (*I1 > *I2)
2291               std::swap(A, B);
2292 
2293             Dependence::DepType Type =
2294                 isDependent(*A.first, A.second, *B.first, B.second);
2295             mergeInStatus(Dependence::isSafeForVectorization(Type));
2296 
2297             // Gather dependences unless we accumulated MaxDependences
2298             // dependences.  In that case return as soon as we find the first
2299             // unsafe dependence.  This puts a limit on this quadratic
2300             // algorithm.
2301             if (RecordDependences) {
2302               if (Type != Dependence::NoDep)
2303                 Dependences.emplace_back(A.second, B.second, Type);
2304 
2305               if (Dependences.size() >= MaxDependences) {
2306                 RecordDependences = false;
2307                 Dependences.clear();
2308                 LLVM_DEBUG(dbgs()
2309                            << "Too many dependences, stopped recording\n");
2310               }
2311             }
2312             if (!RecordDependences && !isSafeForVectorization())
2313               return false;
2314           }
2315         ++OI;
2316       }
2317       ++AI;
2318     }
2319   }
2320 
2321   LLVM_DEBUG(dbgs() << "Total Dependences: " << Dependences.size() << "\n");
2322   return isSafeForVectorization();
2323 }
2324 
2325 SmallVector<Instruction *, 4>
2326 MemoryDepChecker::getInstructionsForAccess(Value *Ptr, bool IsWrite) const {
2327   MemAccessInfo Access(Ptr, IsWrite);
2328   auto &IndexVector = Accesses.find(Access)->second;
2329 
2330   SmallVector<Instruction *, 4> Insts;
2331   transform(IndexVector,
2332                  std::back_inserter(Insts),
2333                  [&](unsigned Idx) { return this->InstMap[Idx]; });
2334   return Insts;
2335 }
2336 
2337 const char *MemoryDepChecker::Dependence::DepName[] = {
2338     "NoDep",
2339     "Unknown",
2340     "IndirectUnsafe",
2341     "Forward",
2342     "ForwardButPreventsForwarding",
2343     "Backward",
2344     "BackwardVectorizable",
2345     "BackwardVectorizableButPreventsForwarding"};
2346 
2347 void MemoryDepChecker::Dependence::print(
2348     raw_ostream &OS, unsigned Depth,
2349     const SmallVectorImpl<Instruction *> &Instrs) const {
2350   OS.indent(Depth) << DepName[Type] << ":\n";
2351   OS.indent(Depth + 2) << *Instrs[Source] << " -> \n";
2352   OS.indent(Depth + 2) << *Instrs[Destination] << "\n";
2353 }
2354 
2355 bool LoopAccessInfo::canAnalyzeLoop() {
2356   // We need to have a loop header.
2357   LLVM_DEBUG(dbgs() << "\nLAA: Checking a loop in '"
2358                     << TheLoop->getHeader()->getParent()->getName() << "' from "
2359                     << TheLoop->getLocStr() << "\n");
2360 
2361   // We can only analyze innermost loops.
2362   if (!TheLoop->isInnermost()) {
2363     LLVM_DEBUG(dbgs() << "LAA: loop is not the innermost loop\n");
2364     recordAnalysis("NotInnerMostLoop") << "loop is not the innermost loop";
2365     return false;
2366   }
2367 
2368   // We must have a single backedge.
2369   if (TheLoop->getNumBackEdges() != 1) {
2370     LLVM_DEBUG(
2371         dbgs() << "LAA: loop control flow is not understood by analyzer\n");
2372     recordAnalysis("CFGNotUnderstood")
2373         << "loop control flow is not understood by analyzer";
2374     return false;
2375   }
2376 
2377   // ScalarEvolution needs to be able to find the symbolic max backedge taken
2378   // count, which is an upper bound on the number of loop iterations. The loop
2379   // may execute fewer iterations, if it exits via an uncountable exit.
2380   const SCEV *ExitCount = PSE->getSymbolicMaxBackedgeTakenCount();
2381   if (isa<SCEVCouldNotCompute>(ExitCount)) {
2382     recordAnalysis("CantComputeNumberOfIterations")
2383         << "could not determine number of loop iterations";
2384     LLVM_DEBUG(dbgs() << "LAA: SCEV could not compute the loop exit count.\n");
2385     return false;
2386   }
2387 
2388   LLVM_DEBUG(dbgs() << "LAA: Found an analyzable loop: "
2389                     << TheLoop->getHeader()->getName() << "\n");
2390   return true;
2391 }
2392 
2393 bool LoopAccessInfo::analyzeLoop(AAResults *AA, const LoopInfo *LI,
2394                                  const TargetLibraryInfo *TLI,
2395                                  DominatorTree *DT) {
2396   // Holds the Load and Store instructions.
2397   SmallVector<LoadInst *, 16> Loads;
2398   SmallVector<StoreInst *, 16> Stores;
2399   SmallPtrSet<MDNode *, 8> LoopAliasScopes;
2400 
2401   // Holds all the different accesses in the loop.
2402   unsigned NumReads = 0;
2403   unsigned NumReadWrites = 0;
2404 
2405   bool HasComplexMemInst = false;
2406 
2407   // A runtime check is only legal to insert if there are no convergent calls.
2408   HasConvergentOp = false;
2409 
2410   PtrRtChecking->Pointers.clear();
2411   PtrRtChecking->Need = false;
2412 
2413   const bool IsAnnotatedParallel = TheLoop->isAnnotatedParallel();
2414 
2415   const bool EnableMemAccessVersioningOfLoop =
2416       EnableMemAccessVersioning &&
2417       !TheLoop->getHeader()->getParent()->hasOptSize();
2418 
2419   // Traverse blocks in fixed RPOT order, regardless of their storage in the
2420   // loop info, as it may be arbitrary.
2421   LoopBlocksRPO RPOT(TheLoop);
2422   RPOT.perform(LI);
2423   for (BasicBlock *BB : RPOT) {
2424     // Scan the BB and collect legal loads and stores. Also detect any
2425     // convergent instructions.
2426     for (Instruction &I : *BB) {
2427       if (auto *Call = dyn_cast<CallBase>(&I)) {
2428         if (Call->isConvergent())
2429           HasConvergentOp = true;
2430       }
2431 
2432       // With both a non-vectorizable memory instruction and a convergent
2433       // operation, found in this loop, no reason to continue the search.
2434       if (HasComplexMemInst && HasConvergentOp)
2435         return false;
2436 
2437       // Avoid hitting recordAnalysis multiple times.
2438       if (HasComplexMemInst)
2439         continue;
2440 
2441       // Record alias scopes defined inside the loop.
2442       if (auto *Decl = dyn_cast<NoAliasScopeDeclInst>(&I))
2443         for (Metadata *Op : Decl->getScopeList()->operands())
2444           LoopAliasScopes.insert(cast<MDNode>(Op));
2445 
2446       // Many math library functions read the rounding mode. We will only
2447       // vectorize a loop if it contains known function calls that don't set
2448       // the flag. Therefore, it is safe to ignore this read from memory.
2449       auto *Call = dyn_cast<CallInst>(&I);
2450       if (Call && getVectorIntrinsicIDForCall(Call, TLI))
2451         continue;
2452 
2453       // If this is a load, save it. If this instruction can read from memory
2454       // but is not a load, we only allow it if it's a call to a function with a
2455       // vector mapping and no pointer arguments.
2456       if (I.mayReadFromMemory()) {
2457         auto hasPointerArgs = [](CallBase *CB) {
2458           return any_of(CB->args(), [](Value const *Arg) {
2459             return Arg->getType()->isPointerTy();
2460           });
2461         };
2462 
2463         // If the function has an explicit vectorized counterpart, and does not
2464         // take output/input pointers, we can safely assume that it can be
2465         // vectorized.
2466         if (Call && !Call->isNoBuiltin() && Call->getCalledFunction() &&
2467             !hasPointerArgs(Call) && !VFDatabase::getMappings(*Call).empty())
2468           continue;
2469 
2470         auto *Ld = dyn_cast<LoadInst>(&I);
2471         if (!Ld) {
2472           recordAnalysis("CantVectorizeInstruction", Ld)
2473             << "instruction cannot be vectorized";
2474           HasComplexMemInst = true;
2475           continue;
2476         }
2477         if (!Ld->isSimple() && !IsAnnotatedParallel) {
2478           recordAnalysis("NonSimpleLoad", Ld)
2479               << "read with atomic ordering or volatile read";
2480           LLVM_DEBUG(dbgs() << "LAA: Found a non-simple load.\n");
2481           HasComplexMemInst = true;
2482           continue;
2483         }
2484         NumLoads++;
2485         Loads.push_back(Ld);
2486         DepChecker->addAccess(Ld);
2487         if (EnableMemAccessVersioningOfLoop)
2488           collectStridedAccess(Ld);
2489         continue;
2490       }
2491 
2492       // Save 'store' instructions. Abort if other instructions write to memory.
2493       if (I.mayWriteToMemory()) {
2494         auto *St = dyn_cast<StoreInst>(&I);
2495         if (!St) {
2496           recordAnalysis("CantVectorizeInstruction", St)
2497               << "instruction cannot be vectorized";
2498           HasComplexMemInst = true;
2499           continue;
2500         }
2501         if (!St->isSimple() && !IsAnnotatedParallel) {
2502           recordAnalysis("NonSimpleStore", St)
2503               << "write with atomic ordering or volatile write";
2504           LLVM_DEBUG(dbgs() << "LAA: Found a non-simple store.\n");
2505           HasComplexMemInst = true;
2506           continue;
2507         }
2508         NumStores++;
2509         Stores.push_back(St);
2510         DepChecker->addAccess(St);
2511         if (EnableMemAccessVersioningOfLoop)
2512           collectStridedAccess(St);
2513       }
2514     } // Next instr.
2515   } // Next block.
2516 
2517   if (HasComplexMemInst)
2518     return false;
2519 
2520   // Now we have two lists that hold the loads and the stores.
2521   // Next, we find the pointers that they use.
2522 
2523   // Check if we see any stores. If there are no stores, then we don't
2524   // care if the pointers are *restrict*.
2525   if (!Stores.size()) {
2526     LLVM_DEBUG(dbgs() << "LAA: Found a read-only loop!\n");
2527     return true;
2528   }
2529 
2530   MemoryDepChecker::DepCandidates DependentAccesses;
2531   AccessAnalysis Accesses(TheLoop, AA, LI, DependentAccesses, *PSE,
2532                           LoopAliasScopes);
2533 
2534   // Holds the analyzed pointers. We don't want to call getUnderlyingObjects
2535   // multiple times on the same object. If the ptr is accessed twice, once
2536   // for read and once for write, it will only appear once (on the write
2537   // list). This is okay, since we are going to check for conflicts between
2538   // writes and between reads and writes, but not between reads and reads.
2539   SmallSet<std::pair<Value *, Type *>, 16> Seen;
2540 
2541   // Record uniform store addresses to identify if we have multiple stores
2542   // to the same address.
2543   SmallPtrSet<Value *, 16> UniformStores;
2544 
2545   for (StoreInst *ST : Stores) {
2546     Value *Ptr = ST->getPointerOperand();
2547 
2548     if (isInvariant(Ptr)) {
2549       // Record store instructions to loop invariant addresses
2550       StoresToInvariantAddresses.push_back(ST);
2551       HasStoreStoreDependenceInvolvingLoopInvariantAddress |=
2552           !UniformStores.insert(Ptr).second;
2553     }
2554 
2555     // If we did *not* see this pointer before, insert it to  the read-write
2556     // list. At this phase it is only a 'write' list.
2557     Type *AccessTy = getLoadStoreType(ST);
2558     if (Seen.insert({Ptr, AccessTy}).second) {
2559       ++NumReadWrites;
2560 
2561       MemoryLocation Loc = MemoryLocation::get(ST);
2562       // The TBAA metadata could have a control dependency on the predication
2563       // condition, so we cannot rely on it when determining whether or not we
2564       // need runtime pointer checks.
2565       if (blockNeedsPredication(ST->getParent(), TheLoop, DT))
2566         Loc.AATags.TBAA = nullptr;
2567 
2568       visitPointers(const_cast<Value *>(Loc.Ptr), *TheLoop,
2569                     [&Accesses, AccessTy, Loc](Value *Ptr) {
2570                       MemoryLocation NewLoc = Loc.getWithNewPtr(Ptr);
2571                       Accesses.addStore(NewLoc, AccessTy);
2572                     });
2573     }
2574   }
2575 
2576   if (IsAnnotatedParallel) {
2577     LLVM_DEBUG(
2578         dbgs() << "LAA: A loop annotated parallel, ignore memory dependency "
2579                << "checks.\n");
2580     return true;
2581   }
2582 
2583   for (LoadInst *LD : Loads) {
2584     Value *Ptr = LD->getPointerOperand();
2585     // If we did *not* see this pointer before, insert it to the
2586     // read list. If we *did* see it before, then it is already in
2587     // the read-write list. This allows us to vectorize expressions
2588     // such as A[i] += x;  Because the address of A[i] is a read-write
2589     // pointer. This only works if the index of A[i] is consecutive.
2590     // If the address of i is unknown (for example A[B[i]]) then we may
2591     // read a few words, modify, and write a few words, and some of the
2592     // words may be written to the same address.
2593     bool IsReadOnlyPtr = false;
2594     Type *AccessTy = getLoadStoreType(LD);
2595     if (Seen.insert({Ptr, AccessTy}).second ||
2596         !getPtrStride(*PSE, AccessTy, Ptr, TheLoop, SymbolicStrides)) {
2597       ++NumReads;
2598       IsReadOnlyPtr = true;
2599     }
2600 
2601     // See if there is an unsafe dependency between a load to a uniform address and
2602     // store to the same uniform address.
2603     if (UniformStores.count(Ptr)) {
2604       LLVM_DEBUG(dbgs() << "LAA: Found an unsafe dependency between a uniform "
2605                            "load and uniform store to the same address!\n");
2606       HasLoadStoreDependenceInvolvingLoopInvariantAddress = true;
2607     }
2608 
2609     MemoryLocation Loc = MemoryLocation::get(LD);
2610     // The TBAA metadata could have a control dependency on the predication
2611     // condition, so we cannot rely on it when determining whether or not we
2612     // need runtime pointer checks.
2613     if (blockNeedsPredication(LD->getParent(), TheLoop, DT))
2614       Loc.AATags.TBAA = nullptr;
2615 
2616     visitPointers(const_cast<Value *>(Loc.Ptr), *TheLoop,
2617                   [&Accesses, AccessTy, Loc, IsReadOnlyPtr](Value *Ptr) {
2618                     MemoryLocation NewLoc = Loc.getWithNewPtr(Ptr);
2619                     Accesses.addLoad(NewLoc, AccessTy, IsReadOnlyPtr);
2620                   });
2621   }
2622 
2623   // If we write (or read-write) to a single destination and there are no
2624   // other reads in this loop then is it safe to vectorize.
2625   if (NumReadWrites == 1 && NumReads == 0) {
2626     LLVM_DEBUG(dbgs() << "LAA: Found a write-only loop!\n");
2627     return true;
2628   }
2629 
2630   // Build dependence sets and check whether we need a runtime pointer bounds
2631   // check.
2632   Accesses.buildDependenceSets();
2633 
2634   // Find pointers with computable bounds. We are going to use this information
2635   // to place a runtime bound check.
2636   Value *UncomputablePtr = nullptr;
2637   bool CanDoRTIfNeeded =
2638       Accesses.canCheckPtrAtRT(*PtrRtChecking, PSE->getSE(), TheLoop,
2639                                SymbolicStrides, UncomputablePtr, false);
2640   if (!CanDoRTIfNeeded) {
2641     const auto *I = dyn_cast_or_null<Instruction>(UncomputablePtr);
2642     recordAnalysis("CantIdentifyArrayBounds", I)
2643         << "cannot identify array bounds";
2644     LLVM_DEBUG(dbgs() << "LAA: We can't vectorize because we can't find "
2645                       << "the array bounds.\n");
2646     return false;
2647   }
2648 
2649   LLVM_DEBUG(
2650     dbgs() << "LAA: May be able to perform a memory runtime check if needed.\n");
2651 
2652   bool DepsAreSafe = true;
2653   if (Accesses.isDependencyCheckNeeded()) {
2654     LLVM_DEBUG(dbgs() << "LAA: Checking memory dependencies\n");
2655     DepsAreSafe = DepChecker->areDepsSafe(DependentAccesses,
2656                                           Accesses.getDependenciesToCheck());
2657 
2658     if (!DepsAreSafe && DepChecker->shouldRetryWithRuntimeCheck()) {
2659       LLVM_DEBUG(dbgs() << "LAA: Retrying with memory checks\n");
2660 
2661       // Clear the dependency checks. We assume they are not needed.
2662       Accesses.resetDepChecks(*DepChecker);
2663 
2664       PtrRtChecking->reset();
2665       PtrRtChecking->Need = true;
2666 
2667       auto *SE = PSE->getSE();
2668       UncomputablePtr = nullptr;
2669       CanDoRTIfNeeded = Accesses.canCheckPtrAtRT(
2670           *PtrRtChecking, SE, TheLoop, SymbolicStrides, UncomputablePtr, true);
2671 
2672       // Check that we found the bounds for the pointer.
2673       if (!CanDoRTIfNeeded) {
2674         auto *I = dyn_cast_or_null<Instruction>(UncomputablePtr);
2675         recordAnalysis("CantCheckMemDepsAtRunTime", I)
2676             << "cannot check memory dependencies at runtime";
2677         LLVM_DEBUG(dbgs() << "LAA: Can't vectorize with memory checks\n");
2678         return false;
2679       }
2680       DepsAreSafe = true;
2681     }
2682   }
2683 
2684   if (HasConvergentOp) {
2685     recordAnalysis("CantInsertRuntimeCheckWithConvergent")
2686         << "cannot add control dependency to convergent operation";
2687     LLVM_DEBUG(dbgs() << "LAA: We can't vectorize because a runtime check "
2688                          "would be needed with a convergent operation\n");
2689     return false;
2690   }
2691 
2692   if (DepsAreSafe) {
2693     LLVM_DEBUG(
2694         dbgs() << "LAA: No unsafe dependent memory operations in loop.  We"
2695                << (PtrRtChecking->Need ? "" : " don't")
2696                << " need runtime memory checks.\n");
2697     return true;
2698   }
2699 
2700   emitUnsafeDependenceRemark();
2701   return false;
2702 }
2703 
2704 void LoopAccessInfo::emitUnsafeDependenceRemark() {
2705   const auto *Deps = getDepChecker().getDependences();
2706   if (!Deps)
2707     return;
2708   const auto *Found =
2709       llvm::find_if(*Deps, [](const MemoryDepChecker::Dependence &D) {
2710         return MemoryDepChecker::Dependence::isSafeForVectorization(D.Type) !=
2711                MemoryDepChecker::VectorizationSafetyStatus::Safe;
2712       });
2713   if (Found == Deps->end())
2714     return;
2715   MemoryDepChecker::Dependence Dep = *Found;
2716 
2717   LLVM_DEBUG(dbgs() << "LAA: unsafe dependent memory operations in loop\n");
2718 
2719   // Emit remark for first unsafe dependence
2720   bool HasForcedDistribution = false;
2721   std::optional<const MDOperand *> Value =
2722       findStringMetadataForLoop(TheLoop, "llvm.loop.distribute.enable");
2723   if (Value) {
2724     const MDOperand *Op = *Value;
2725     assert(Op && mdconst::hasa<ConstantInt>(*Op) && "invalid metadata");
2726     HasForcedDistribution = mdconst::extract<ConstantInt>(*Op)->getZExtValue();
2727   }
2728 
2729   const std::string Info =
2730       HasForcedDistribution
2731           ? "unsafe dependent memory operations in loop."
2732           : "unsafe dependent memory operations in loop. Use "
2733             "#pragma clang loop distribute(enable) to allow loop distribution "
2734             "to attempt to isolate the offending operations into a separate "
2735             "loop";
2736   OptimizationRemarkAnalysis &R =
2737       recordAnalysis("UnsafeDep", Dep.getDestination(getDepChecker())) << Info;
2738 
2739   switch (Dep.Type) {
2740   case MemoryDepChecker::Dependence::NoDep:
2741   case MemoryDepChecker::Dependence::Forward:
2742   case MemoryDepChecker::Dependence::BackwardVectorizable:
2743     llvm_unreachable("Unexpected dependence");
2744   case MemoryDepChecker::Dependence::Backward:
2745     R << "\nBackward loop carried data dependence.";
2746     break;
2747   case MemoryDepChecker::Dependence::ForwardButPreventsForwarding:
2748     R << "\nForward loop carried data dependence that prevents "
2749          "store-to-load forwarding.";
2750     break;
2751   case MemoryDepChecker::Dependence::BackwardVectorizableButPreventsForwarding:
2752     R << "\nBackward loop carried data dependence that prevents "
2753          "store-to-load forwarding.";
2754     break;
2755   case MemoryDepChecker::Dependence::IndirectUnsafe:
2756     R << "\nUnsafe indirect dependence.";
2757     break;
2758   case MemoryDepChecker::Dependence::Unknown:
2759     R << "\nUnknown data dependence.";
2760     break;
2761   }
2762 
2763   if (Instruction *I = Dep.getSource(getDepChecker())) {
2764     DebugLoc SourceLoc = I->getDebugLoc();
2765     if (auto *DD = dyn_cast_or_null<Instruction>(getPointerOperand(I)))
2766       SourceLoc = DD->getDebugLoc();
2767     if (SourceLoc)
2768       R << " Memory location is the same as accessed at "
2769         << ore::NV("Location", SourceLoc);
2770   }
2771 }
2772 
2773 bool LoopAccessInfo::blockNeedsPredication(BasicBlock *BB, Loop *TheLoop,
2774                                            DominatorTree *DT)  {
2775   assert(TheLoop->contains(BB) && "Unknown block used");
2776 
2777   // Blocks that do not dominate the latch need predication.
2778   const BasicBlock *Latch = TheLoop->getLoopLatch();
2779   return !DT->dominates(BB, Latch);
2780 }
2781 
2782 OptimizationRemarkAnalysis &
2783 LoopAccessInfo::recordAnalysis(StringRef RemarkName, const Instruction *I) {
2784   assert(!Report && "Multiple reports generated");
2785 
2786   const Value *CodeRegion = TheLoop->getHeader();
2787   DebugLoc DL = TheLoop->getStartLoc();
2788 
2789   if (I) {
2790     CodeRegion = I->getParent();
2791     // If there is no debug location attached to the instruction, revert back to
2792     // using the loop's.
2793     if (I->getDebugLoc())
2794       DL = I->getDebugLoc();
2795   }
2796 
2797   Report = std::make_unique<OptimizationRemarkAnalysis>(DEBUG_TYPE, RemarkName, DL,
2798                                                    CodeRegion);
2799   return *Report;
2800 }
2801 
2802 bool LoopAccessInfo::isInvariant(Value *V) const {
2803   auto *SE = PSE->getSE();
2804   // TODO: Is this really what we want? Even without FP SCEV, we may want some
2805   // trivially loop-invariant FP values to be considered invariant.
2806   if (!SE->isSCEVable(V->getType()))
2807     return false;
2808   const SCEV *S = SE->getSCEV(V);
2809   return SE->isLoopInvariant(S, TheLoop);
2810 }
2811 
2812 /// Find the operand of the GEP that should be checked for consecutive
2813 /// stores. This ignores trailing indices that have no effect on the final
2814 /// pointer.
2815 static unsigned getGEPInductionOperand(const GetElementPtrInst *Gep) {
2816   const DataLayout &DL = Gep->getDataLayout();
2817   unsigned LastOperand = Gep->getNumOperands() - 1;
2818   TypeSize GEPAllocSize = DL.getTypeAllocSize(Gep->getResultElementType());
2819 
2820   // Walk backwards and try to peel off zeros.
2821   while (LastOperand > 1 && match(Gep->getOperand(LastOperand), m_Zero())) {
2822     // Find the type we're currently indexing into.
2823     gep_type_iterator GEPTI = gep_type_begin(Gep);
2824     std::advance(GEPTI, LastOperand - 2);
2825 
2826     // If it's a type with the same allocation size as the result of the GEP we
2827     // can peel off the zero index.
2828     TypeSize ElemSize = GEPTI.isStruct()
2829                             ? DL.getTypeAllocSize(GEPTI.getIndexedType())
2830                             : GEPTI.getSequentialElementStride(DL);
2831     if (ElemSize != GEPAllocSize)
2832       break;
2833     --LastOperand;
2834   }
2835 
2836   return LastOperand;
2837 }
2838 
2839 /// If the argument is a GEP, then returns the operand identified by
2840 /// getGEPInductionOperand. However, if there is some other non-loop-invariant
2841 /// operand, it returns that instead.
2842 static Value *stripGetElementPtr(Value *Ptr, ScalarEvolution *SE, Loop *Lp) {
2843   auto *GEP = dyn_cast<GetElementPtrInst>(Ptr);
2844   if (!GEP)
2845     return Ptr;
2846 
2847   unsigned InductionOperand = getGEPInductionOperand(GEP);
2848 
2849   // Check that all of the gep indices are uniform except for our induction
2850   // operand.
2851   for (unsigned I = 0, E = GEP->getNumOperands(); I != E; ++I)
2852     if (I != InductionOperand &&
2853         !SE->isLoopInvariant(SE->getSCEV(GEP->getOperand(I)), Lp))
2854       return Ptr;
2855   return GEP->getOperand(InductionOperand);
2856 }
2857 
2858 /// Get the stride of a pointer access in a loop. Looks for symbolic
2859 /// strides "a[i*stride]". Returns the symbolic stride, or null otherwise.
2860 static const SCEV *getStrideFromPointer(Value *Ptr, ScalarEvolution *SE, Loop *Lp) {
2861   auto *PtrTy = dyn_cast<PointerType>(Ptr->getType());
2862   if (!PtrTy || PtrTy->isAggregateType())
2863     return nullptr;
2864 
2865   // Try to remove a gep instruction to make the pointer (actually index at this
2866   // point) easier analyzable. If OrigPtr is equal to Ptr we are analyzing the
2867   // pointer, otherwise, we are analyzing the index.
2868   Value *OrigPtr = Ptr;
2869 
2870   // The size of the pointer access.
2871   int64_t PtrAccessSize = 1;
2872 
2873   Ptr = stripGetElementPtr(Ptr, SE, Lp);
2874   const SCEV *V = SE->getSCEV(Ptr);
2875 
2876   if (Ptr != OrigPtr)
2877     // Strip off casts.
2878     while (const SCEVIntegralCastExpr *C = dyn_cast<SCEVIntegralCastExpr>(V))
2879       V = C->getOperand();
2880 
2881   const SCEVAddRecExpr *S = dyn_cast<SCEVAddRecExpr>(V);
2882   if (!S)
2883     return nullptr;
2884 
2885   // If the pointer is invariant then there is no stride and it makes no
2886   // sense to add it here.
2887   if (Lp != S->getLoop())
2888     return nullptr;
2889 
2890   V = S->getStepRecurrence(*SE);
2891   if (!V)
2892     return nullptr;
2893 
2894   // Strip off the size of access multiplication if we are still analyzing the
2895   // pointer.
2896   if (OrigPtr == Ptr) {
2897     if (const SCEVMulExpr *M = dyn_cast<SCEVMulExpr>(V)) {
2898       if (M->getOperand(0)->getSCEVType() != scConstant)
2899         return nullptr;
2900 
2901       const APInt &APStepVal = cast<SCEVConstant>(M->getOperand(0))->getAPInt();
2902 
2903       // Huge step value - give up.
2904       if (APStepVal.getBitWidth() > 64)
2905         return nullptr;
2906 
2907       int64_t StepVal = APStepVal.getSExtValue();
2908       if (PtrAccessSize != StepVal)
2909         return nullptr;
2910       V = M->getOperand(1);
2911     }
2912   }
2913 
2914   // Note that the restriction after this loop invariant check are only
2915   // profitability restrictions.
2916   if (!SE->isLoopInvariant(V, Lp))
2917     return nullptr;
2918 
2919   // Look for the loop invariant symbolic value.
2920   if (isa<SCEVUnknown>(V))
2921     return V;
2922 
2923   if (const auto *C = dyn_cast<SCEVIntegralCastExpr>(V))
2924     if (isa<SCEVUnknown>(C->getOperand()))
2925       return V;
2926 
2927   return nullptr;
2928 }
2929 
2930 void LoopAccessInfo::collectStridedAccess(Value *MemAccess) {
2931   Value *Ptr = getLoadStorePointerOperand(MemAccess);
2932   if (!Ptr)
2933     return;
2934 
2935   // Note: getStrideFromPointer is a *profitability* heuristic.  We
2936   // could broaden the scope of values returned here - to anything
2937   // which happens to be loop invariant and contributes to the
2938   // computation of an interesting IV - but we chose not to as we
2939   // don't have a cost model here, and broadening the scope exposes
2940   // far too many unprofitable cases.
2941   const SCEV *StrideExpr = getStrideFromPointer(Ptr, PSE->getSE(), TheLoop);
2942   if (!StrideExpr)
2943     return;
2944 
2945   LLVM_DEBUG(dbgs() << "LAA: Found a strided access that is a candidate for "
2946                        "versioning:");
2947   LLVM_DEBUG(dbgs() << "  Ptr: " << *Ptr << " Stride: " << *StrideExpr << "\n");
2948 
2949   if (!SpeculateUnitStride) {
2950     LLVM_DEBUG(dbgs() << "  Chose not to due to -laa-speculate-unit-stride\n");
2951     return;
2952   }
2953 
2954   // Avoid adding the "Stride == 1" predicate when we know that
2955   // Stride >= Trip-Count. Such a predicate will effectively optimize a single
2956   // or zero iteration loop, as Trip-Count <= Stride == 1.
2957   //
2958   // TODO: We are currently not making a very informed decision on when it is
2959   // beneficial to apply stride versioning. It might make more sense that the
2960   // users of this analysis (such as the vectorizer) will trigger it, based on
2961   // their specific cost considerations; For example, in cases where stride
2962   // versioning does  not help resolving memory accesses/dependences, the
2963   // vectorizer should evaluate the cost of the runtime test, and the benefit
2964   // of various possible stride specializations, considering the alternatives
2965   // of using gather/scatters (if available).
2966 
2967   const SCEV *MaxBTC = PSE->getSymbolicMaxBackedgeTakenCount();
2968 
2969   // Match the types so we can compare the stride and the MaxBTC.
2970   // The Stride can be positive/negative, so we sign extend Stride;
2971   // The backedgeTakenCount is non-negative, so we zero extend MaxBTC.
2972   const DataLayout &DL = TheLoop->getHeader()->getDataLayout();
2973   uint64_t StrideTypeSizeBits = DL.getTypeSizeInBits(StrideExpr->getType());
2974   uint64_t BETypeSizeBits = DL.getTypeSizeInBits(MaxBTC->getType());
2975   const SCEV *CastedStride = StrideExpr;
2976   const SCEV *CastedBECount = MaxBTC;
2977   ScalarEvolution *SE = PSE->getSE();
2978   if (BETypeSizeBits >= StrideTypeSizeBits)
2979     CastedStride = SE->getNoopOrSignExtend(StrideExpr, MaxBTC->getType());
2980   else
2981     CastedBECount = SE->getZeroExtendExpr(MaxBTC, StrideExpr->getType());
2982   const SCEV *StrideMinusBETaken = SE->getMinusSCEV(CastedStride, CastedBECount);
2983   // Since TripCount == BackEdgeTakenCount + 1, checking:
2984   // "Stride >= TripCount" is equivalent to checking:
2985   // Stride - MaxBTC> 0
2986   if (SE->isKnownPositive(StrideMinusBETaken)) {
2987     LLVM_DEBUG(
2988         dbgs() << "LAA: Stride>=TripCount; No point in versioning as the "
2989                   "Stride==1 predicate will imply that the loop executes "
2990                   "at most once.\n");
2991     return;
2992   }
2993   LLVM_DEBUG(dbgs() << "LAA: Found a strided access that we can version.\n");
2994 
2995   // Strip back off the integer cast, and check that our result is a
2996   // SCEVUnknown as we expect.
2997   const SCEV *StrideBase = StrideExpr;
2998   if (const auto *C = dyn_cast<SCEVIntegralCastExpr>(StrideBase))
2999     StrideBase = C->getOperand();
3000   SymbolicStrides[Ptr] = cast<SCEVUnknown>(StrideBase);
3001 }
3002 
3003 LoopAccessInfo::LoopAccessInfo(Loop *L, ScalarEvolution *SE,
3004                                const TargetTransformInfo *TTI,
3005                                const TargetLibraryInfo *TLI, AAResults *AA,
3006                                DominatorTree *DT, LoopInfo *LI)
3007     : PSE(std::make_unique<PredicatedScalarEvolution>(*SE, *L)),
3008       PtrRtChecking(nullptr), TheLoop(L) {
3009   unsigned MaxTargetVectorWidthInBits = std::numeric_limits<unsigned>::max();
3010   if (TTI) {
3011     TypeSize FixedWidth =
3012         TTI->getRegisterBitWidth(TargetTransformInfo::RGK_FixedWidthVector);
3013     if (FixedWidth.isNonZero()) {
3014       // Scale the vector width by 2 as rough estimate to also consider
3015       // interleaving.
3016       MaxTargetVectorWidthInBits = FixedWidth.getFixedValue() * 2;
3017     }
3018 
3019     TypeSize ScalableWidth =
3020         TTI->getRegisterBitWidth(TargetTransformInfo::RGK_ScalableVector);
3021     if (ScalableWidth.isNonZero())
3022       MaxTargetVectorWidthInBits = std::numeric_limits<unsigned>::max();
3023   }
3024   DepChecker = std::make_unique<MemoryDepChecker>(*PSE, L, SymbolicStrides,
3025                                                   MaxTargetVectorWidthInBits);
3026   PtrRtChecking = std::make_unique<RuntimePointerChecking>(*DepChecker, SE);
3027   if (canAnalyzeLoop())
3028     CanVecMem = analyzeLoop(AA, LI, TLI, DT);
3029 }
3030 
3031 void LoopAccessInfo::print(raw_ostream &OS, unsigned Depth) const {
3032   if (CanVecMem) {
3033     OS.indent(Depth) << "Memory dependences are safe";
3034     const MemoryDepChecker &DC = getDepChecker();
3035     if (!DC.isSafeForAnyVectorWidth())
3036       OS << " with a maximum safe vector width of "
3037          << DC.getMaxSafeVectorWidthInBits() << " bits";
3038     if (PtrRtChecking->Need)
3039       OS << " with run-time checks";
3040     OS << "\n";
3041   }
3042 
3043   if (HasConvergentOp)
3044     OS.indent(Depth) << "Has convergent operation in loop\n";
3045 
3046   if (Report)
3047     OS.indent(Depth) << "Report: " << Report->getMsg() << "\n";
3048 
3049   if (auto *Dependences = DepChecker->getDependences()) {
3050     OS.indent(Depth) << "Dependences:\n";
3051     for (const auto &Dep : *Dependences) {
3052       Dep.print(OS, Depth + 2, DepChecker->getMemoryInstructions());
3053       OS << "\n";
3054     }
3055   } else
3056     OS.indent(Depth) << "Too many dependences, not recorded\n";
3057 
3058   // List the pair of accesses need run-time checks to prove independence.
3059   PtrRtChecking->print(OS, Depth);
3060   OS << "\n";
3061 
3062   OS.indent(Depth)
3063       << "Non vectorizable stores to invariant address were "
3064       << (HasStoreStoreDependenceInvolvingLoopInvariantAddress ||
3065                   HasLoadStoreDependenceInvolvingLoopInvariantAddress
3066               ? ""
3067               : "not ")
3068       << "found in loop.\n";
3069 
3070   OS.indent(Depth) << "SCEV assumptions:\n";
3071   PSE->getPredicate().print(OS, Depth);
3072 
3073   OS << "\n";
3074 
3075   OS.indent(Depth) << "Expressions re-written:\n";
3076   PSE->print(OS, Depth);
3077 }
3078 
3079 const LoopAccessInfo &LoopAccessInfoManager::getInfo(Loop &L) {
3080   const auto &[It, Inserted] = LoopAccessInfoMap.insert({&L, nullptr});
3081 
3082   if (Inserted)
3083     It->second =
3084         std::make_unique<LoopAccessInfo>(&L, &SE, TTI, TLI, &AA, &DT, &LI);
3085 
3086   return *It->second;
3087 }
3088 void LoopAccessInfoManager::clear() {
3089   SmallVector<Loop *> ToRemove;
3090   // Collect LoopAccessInfo entries that may keep references to IR outside the
3091   // analyzed loop or SCEVs that may have been modified or invalidated. At the
3092   // moment, that is loops requiring memory or SCEV runtime checks, as those cache
3093   // SCEVs, e.g. for pointer expressions.
3094   for (const auto &[L, LAI] : LoopAccessInfoMap) {
3095     if (LAI->getRuntimePointerChecking()->getChecks().empty() &&
3096         LAI->getPSE().getPredicate().isAlwaysTrue())
3097       continue;
3098     ToRemove.push_back(L);
3099   }
3100 
3101   for (Loop *L : ToRemove)
3102     LoopAccessInfoMap.erase(L);
3103 }
3104 
3105 bool LoopAccessInfoManager::invalidate(
3106     Function &F, const PreservedAnalyses &PA,
3107     FunctionAnalysisManager::Invalidator &Inv) {
3108   // Check whether our analysis is preserved.
3109   auto PAC = PA.getChecker<LoopAccessAnalysis>();
3110   if (!PAC.preserved() && !PAC.preservedSet<AllAnalysesOn<Function>>())
3111     // If not, give up now.
3112     return true;
3113 
3114   // Check whether the analyses we depend on became invalid for any reason.
3115   // Skip checking TargetLibraryAnalysis as it is immutable and can't become
3116   // invalid.
3117   return Inv.invalidate<AAManager>(F, PA) ||
3118          Inv.invalidate<ScalarEvolutionAnalysis>(F, PA) ||
3119          Inv.invalidate<LoopAnalysis>(F, PA) ||
3120          Inv.invalidate<DominatorTreeAnalysis>(F, PA);
3121 }
3122 
3123 LoopAccessInfoManager LoopAccessAnalysis::run(Function &F,
3124                                               FunctionAnalysisManager &FAM) {
3125   auto &SE = FAM.getResult<ScalarEvolutionAnalysis>(F);
3126   auto &AA = FAM.getResult<AAManager>(F);
3127   auto &DT = FAM.getResult<DominatorTreeAnalysis>(F);
3128   auto &LI = FAM.getResult<LoopAnalysis>(F);
3129   auto &TTI = FAM.getResult<TargetIRAnalysis>(F);
3130   auto &TLI = FAM.getResult<TargetLibraryAnalysis>(F);
3131   return LoopAccessInfoManager(SE, AA, DT, LI, &TTI, &TLI);
3132 }
3133 
3134 AnalysisKey LoopAccessAnalysis::Key;
3135