xref: /netbsd-src/external/apache2/llvm/dist/llvm/lib/Analysis/LoopAccessAnalysis.cpp (revision 82d56013d7b633d116a93943de88e08335357a7c)
17330f729Sjoerg //===- LoopAccessAnalysis.cpp - Loop Access Analysis Implementation --------==//
27330f729Sjoerg //
37330f729Sjoerg // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
47330f729Sjoerg // See https://llvm.org/LICENSE.txt for license information.
57330f729Sjoerg // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
67330f729Sjoerg //
77330f729Sjoerg //===----------------------------------------------------------------------===//
87330f729Sjoerg //
97330f729Sjoerg // The implementation for the loop memory dependence that was originally
107330f729Sjoerg // developed for the loop vectorizer.
117330f729Sjoerg //
127330f729Sjoerg //===----------------------------------------------------------------------===//
137330f729Sjoerg 
147330f729Sjoerg #include "llvm/Analysis/LoopAccessAnalysis.h"
157330f729Sjoerg #include "llvm/ADT/APInt.h"
167330f729Sjoerg #include "llvm/ADT/DenseMap.h"
177330f729Sjoerg #include "llvm/ADT/DepthFirstIterator.h"
187330f729Sjoerg #include "llvm/ADT/EquivalenceClasses.h"
197330f729Sjoerg #include "llvm/ADT/PointerIntPair.h"
207330f729Sjoerg #include "llvm/ADT/STLExtras.h"
217330f729Sjoerg #include "llvm/ADT/SetVector.h"
227330f729Sjoerg #include "llvm/ADT/SmallPtrSet.h"
237330f729Sjoerg #include "llvm/ADT/SmallSet.h"
247330f729Sjoerg #include "llvm/ADT/SmallVector.h"
257330f729Sjoerg #include "llvm/ADT/iterator_range.h"
267330f729Sjoerg #include "llvm/Analysis/AliasAnalysis.h"
277330f729Sjoerg #include "llvm/Analysis/AliasSetTracker.h"
287330f729Sjoerg #include "llvm/Analysis/LoopAnalysisManager.h"
297330f729Sjoerg #include "llvm/Analysis/LoopInfo.h"
307330f729Sjoerg #include "llvm/Analysis/MemoryLocation.h"
317330f729Sjoerg #include "llvm/Analysis/OptimizationRemarkEmitter.h"
327330f729Sjoerg #include "llvm/Analysis/ScalarEvolution.h"
337330f729Sjoerg #include "llvm/Analysis/ScalarEvolutionExpressions.h"
347330f729Sjoerg #include "llvm/Analysis/TargetLibraryInfo.h"
357330f729Sjoerg #include "llvm/Analysis/ValueTracking.h"
367330f729Sjoerg #include "llvm/Analysis/VectorUtils.h"
377330f729Sjoerg #include "llvm/IR/BasicBlock.h"
387330f729Sjoerg #include "llvm/IR/Constants.h"
397330f729Sjoerg #include "llvm/IR/DataLayout.h"
407330f729Sjoerg #include "llvm/IR/DebugLoc.h"
417330f729Sjoerg #include "llvm/IR/DerivedTypes.h"
427330f729Sjoerg #include "llvm/IR/DiagnosticInfo.h"
437330f729Sjoerg #include "llvm/IR/Dominators.h"
447330f729Sjoerg #include "llvm/IR/Function.h"
457330f729Sjoerg #include "llvm/IR/InstrTypes.h"
467330f729Sjoerg #include "llvm/IR/Instruction.h"
477330f729Sjoerg #include "llvm/IR/Instructions.h"
487330f729Sjoerg #include "llvm/IR/Operator.h"
497330f729Sjoerg #include "llvm/IR/PassManager.h"
507330f729Sjoerg #include "llvm/IR/Type.h"
517330f729Sjoerg #include "llvm/IR/Value.h"
527330f729Sjoerg #include "llvm/IR/ValueHandle.h"
53*82d56013Sjoerg #include "llvm/InitializePasses.h"
547330f729Sjoerg #include "llvm/Pass.h"
557330f729Sjoerg #include "llvm/Support/Casting.h"
567330f729Sjoerg #include "llvm/Support/CommandLine.h"
577330f729Sjoerg #include "llvm/Support/Debug.h"
587330f729Sjoerg #include "llvm/Support/ErrorHandling.h"
597330f729Sjoerg #include "llvm/Support/raw_ostream.h"
607330f729Sjoerg #include <algorithm>
617330f729Sjoerg #include <cassert>
627330f729Sjoerg #include <cstdint>
637330f729Sjoerg #include <cstdlib>
647330f729Sjoerg #include <iterator>
657330f729Sjoerg #include <utility>
667330f729Sjoerg #include <vector>
677330f729Sjoerg 
687330f729Sjoerg using namespace llvm;
697330f729Sjoerg 
707330f729Sjoerg #define DEBUG_TYPE "loop-accesses"
717330f729Sjoerg 
727330f729Sjoerg static cl::opt<unsigned, true>
737330f729Sjoerg VectorizationFactor("force-vector-width", cl::Hidden,
747330f729Sjoerg                     cl::desc("Sets the SIMD width. Zero is autoselect."),
757330f729Sjoerg                     cl::location(VectorizerParams::VectorizationFactor));
767330f729Sjoerg unsigned VectorizerParams::VectorizationFactor;
777330f729Sjoerg 
787330f729Sjoerg static cl::opt<unsigned, true>
797330f729Sjoerg VectorizationInterleave("force-vector-interleave", cl::Hidden,
807330f729Sjoerg                         cl::desc("Sets the vectorization interleave count. "
817330f729Sjoerg                                  "Zero is autoselect."),
827330f729Sjoerg                         cl::location(
837330f729Sjoerg                             VectorizerParams::VectorizationInterleave));
847330f729Sjoerg unsigned VectorizerParams::VectorizationInterleave;
857330f729Sjoerg 
867330f729Sjoerg static cl::opt<unsigned, true> RuntimeMemoryCheckThreshold(
877330f729Sjoerg     "runtime-memory-check-threshold", cl::Hidden,
887330f729Sjoerg     cl::desc("When performing memory disambiguation checks at runtime do not "
897330f729Sjoerg              "generate more than this number of comparisons (default = 8)."),
907330f729Sjoerg     cl::location(VectorizerParams::RuntimeMemoryCheckThreshold), cl::init(8));
917330f729Sjoerg unsigned VectorizerParams::RuntimeMemoryCheckThreshold;
927330f729Sjoerg 
937330f729Sjoerg /// The maximum iterations used to merge memory checks
947330f729Sjoerg static cl::opt<unsigned> MemoryCheckMergeThreshold(
957330f729Sjoerg     "memory-check-merge-threshold", cl::Hidden,
967330f729Sjoerg     cl::desc("Maximum number of comparisons done when trying to merge "
977330f729Sjoerg              "runtime memory checks. (default = 100)"),
987330f729Sjoerg     cl::init(100));
997330f729Sjoerg 
1007330f729Sjoerg /// Maximum SIMD width.
1017330f729Sjoerg const unsigned VectorizerParams::MaxVectorWidth = 64;
1027330f729Sjoerg 
1037330f729Sjoerg /// We collect dependences up to this threshold.
1047330f729Sjoerg static cl::opt<unsigned>
1057330f729Sjoerg     MaxDependences("max-dependences", cl::Hidden,
1067330f729Sjoerg                    cl::desc("Maximum number of dependences collected by "
1077330f729Sjoerg                             "loop-access analysis (default = 100)"),
1087330f729Sjoerg                    cl::init(100));
1097330f729Sjoerg 
1107330f729Sjoerg /// This enables versioning on the strides of symbolically striding memory
1117330f729Sjoerg /// accesses in code like the following.
1127330f729Sjoerg ///   for (i = 0; i < N; ++i)
1137330f729Sjoerg ///     A[i * Stride1] += B[i * Stride2] ...
1147330f729Sjoerg ///
1157330f729Sjoerg /// Will be roughly translated to
1167330f729Sjoerg ///    if (Stride1 == 1 && Stride2 == 1) {
1177330f729Sjoerg ///      for (i = 0; i < N; i+=4)
1187330f729Sjoerg ///       A[i:i+3] += ...
1197330f729Sjoerg ///    } else
1207330f729Sjoerg ///      ...
1217330f729Sjoerg static cl::opt<bool> EnableMemAccessVersioning(
1227330f729Sjoerg     "enable-mem-access-versioning", cl::init(true), cl::Hidden,
1237330f729Sjoerg     cl::desc("Enable symbolic stride memory access versioning"));
1247330f729Sjoerg 
1257330f729Sjoerg /// Enable store-to-load forwarding conflict detection. This option can
1267330f729Sjoerg /// be disabled for correctness testing.
1277330f729Sjoerg static cl::opt<bool> EnableForwardingConflictDetection(
1287330f729Sjoerg     "store-to-load-forwarding-conflict-detection", cl::Hidden,
1297330f729Sjoerg     cl::desc("Enable conflict detection in loop-access analysis"),
1307330f729Sjoerg     cl::init(true));
1317330f729Sjoerg 
isInterleaveForced()1327330f729Sjoerg bool VectorizerParams::isInterleaveForced() {
1337330f729Sjoerg   return ::VectorizationInterleave.getNumOccurrences() > 0;
1347330f729Sjoerg }
1357330f729Sjoerg 
stripIntegerCast(Value * V)1367330f729Sjoerg Value *llvm::stripIntegerCast(Value *V) {
1377330f729Sjoerg   if (auto *CI = dyn_cast<CastInst>(V))
1387330f729Sjoerg     if (CI->getOperand(0)->getType()->isIntegerTy())
1397330f729Sjoerg       return CI->getOperand(0);
1407330f729Sjoerg   return V;
1417330f729Sjoerg }
1427330f729Sjoerg 
replaceSymbolicStrideSCEV(PredicatedScalarEvolution & PSE,const ValueToValueMap & PtrToStride,Value * Ptr,Value * OrigPtr)1437330f729Sjoerg const SCEV *llvm::replaceSymbolicStrideSCEV(PredicatedScalarEvolution &PSE,
1447330f729Sjoerg                                             const ValueToValueMap &PtrToStride,
1457330f729Sjoerg                                             Value *Ptr, Value *OrigPtr) {
1467330f729Sjoerg   const SCEV *OrigSCEV = PSE.getSCEV(Ptr);
1477330f729Sjoerg 
1487330f729Sjoerg   // If there is an entry in the map return the SCEV of the pointer with the
1497330f729Sjoerg   // symbolic stride replaced by one.
1507330f729Sjoerg   ValueToValueMap::const_iterator SI =
1517330f729Sjoerg       PtrToStride.find(OrigPtr ? OrigPtr : Ptr);
152*82d56013Sjoerg   if (SI == PtrToStride.end())
153*82d56013Sjoerg     // For a non-symbolic stride, just return the original expression.
154*82d56013Sjoerg     return OrigSCEV;
1557330f729Sjoerg 
156*82d56013Sjoerg   Value *StrideVal = stripIntegerCast(SI->second);
1577330f729Sjoerg 
1587330f729Sjoerg   ScalarEvolution *SE = PSE.getSE();
1597330f729Sjoerg   const auto *U = cast<SCEVUnknown>(SE->getSCEV(StrideVal));
1607330f729Sjoerg   const auto *CT =
1617330f729Sjoerg     static_cast<const SCEVConstant *>(SE->getOne(StrideVal->getType()));
1627330f729Sjoerg 
1637330f729Sjoerg   PSE.addPredicate(*SE->getEqualPredicate(U, CT));
1647330f729Sjoerg   auto *Expr = PSE.getSCEV(Ptr);
1657330f729Sjoerg 
1667330f729Sjoerg   LLVM_DEBUG(dbgs() << "LAA: Replacing SCEV: " << *OrigSCEV
1677330f729Sjoerg 	     << " by: " << *Expr << "\n");
1687330f729Sjoerg   return Expr;
1697330f729Sjoerg }
1707330f729Sjoerg 
RuntimeCheckingPtrGroup(unsigned Index,RuntimePointerChecking & RtCheck)171*82d56013Sjoerg RuntimeCheckingPtrGroup::RuntimeCheckingPtrGroup(
172*82d56013Sjoerg     unsigned Index, RuntimePointerChecking &RtCheck)
173*82d56013Sjoerg     : RtCheck(RtCheck), High(RtCheck.Pointers[Index].End),
174*82d56013Sjoerg       Low(RtCheck.Pointers[Index].Start) {
175*82d56013Sjoerg   Members.push_back(Index);
1767330f729Sjoerg }
1777330f729Sjoerg 
1787330f729Sjoerg /// Calculate Start and End points of memory access.
1797330f729Sjoerg /// Let's assume A is the first access and B is a memory access on N-th loop
1807330f729Sjoerg /// iteration. Then B is calculated as:
1817330f729Sjoerg ///   B = A + Step*N .
1827330f729Sjoerg /// Step value may be positive or negative.
1837330f729Sjoerg /// N is a calculated back-edge taken count:
1847330f729Sjoerg ///     N = (TripCount > 0) ? RoundDown(TripCount -1 , VF) : 0
1857330f729Sjoerg /// Start and End points are calculated in the following way:
1867330f729Sjoerg /// Start = UMIN(A, B) ; End = UMAX(A, B) + SizeOfElt,
1877330f729Sjoerg /// where SizeOfElt is the size of single memory access in bytes.
1887330f729Sjoerg ///
1897330f729Sjoerg /// There is no conflict when the intervals are disjoint:
1907330f729Sjoerg /// NoConflict = (P2.Start >= P1.End) || (P1.Start >= P2.End)
insert(Loop * Lp,Value * Ptr,bool WritePtr,unsigned DepSetId,unsigned ASId,const ValueToValueMap & Strides,PredicatedScalarEvolution & PSE)1917330f729Sjoerg void RuntimePointerChecking::insert(Loop *Lp, Value *Ptr, bool WritePtr,
1927330f729Sjoerg                                     unsigned DepSetId, unsigned ASId,
1937330f729Sjoerg                                     const ValueToValueMap &Strides,
1947330f729Sjoerg                                     PredicatedScalarEvolution &PSE) {
1957330f729Sjoerg   // Get the stride replaced scev.
1967330f729Sjoerg   const SCEV *Sc = replaceSymbolicStrideSCEV(PSE, Strides, Ptr);
1977330f729Sjoerg   ScalarEvolution *SE = PSE.getSE();
1987330f729Sjoerg 
1997330f729Sjoerg   const SCEV *ScStart;
2007330f729Sjoerg   const SCEV *ScEnd;
2017330f729Sjoerg 
2027330f729Sjoerg   if (SE->isLoopInvariant(Sc, Lp))
2037330f729Sjoerg     ScStart = ScEnd = Sc;
2047330f729Sjoerg   else {
2057330f729Sjoerg     const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(Sc);
2067330f729Sjoerg     assert(AR && "Invalid addrec expression");
2077330f729Sjoerg     const SCEV *Ex = PSE.getBackedgeTakenCount();
2087330f729Sjoerg 
2097330f729Sjoerg     ScStart = AR->getStart();
2107330f729Sjoerg     ScEnd = AR->evaluateAtIteration(Ex, *SE);
2117330f729Sjoerg     const SCEV *Step = AR->getStepRecurrence(*SE);
2127330f729Sjoerg 
2137330f729Sjoerg     // For expressions with negative step, the upper bound is ScStart and the
2147330f729Sjoerg     // lower bound is ScEnd.
2157330f729Sjoerg     if (const auto *CStep = dyn_cast<SCEVConstant>(Step)) {
2167330f729Sjoerg       if (CStep->getValue()->isNegative())
2177330f729Sjoerg         std::swap(ScStart, ScEnd);
2187330f729Sjoerg     } else {
2197330f729Sjoerg       // Fallback case: the step is not constant, but we can still
2207330f729Sjoerg       // get the upper and lower bounds of the interval by using min/max
2217330f729Sjoerg       // expressions.
2227330f729Sjoerg       ScStart = SE->getUMinExpr(ScStart, ScEnd);
2237330f729Sjoerg       ScEnd = SE->getUMaxExpr(AR->getStart(), ScEnd);
2247330f729Sjoerg     }
2257330f729Sjoerg     // Add the size of the pointed element to ScEnd.
226*82d56013Sjoerg     auto &DL = Lp->getHeader()->getModule()->getDataLayout();
227*82d56013Sjoerg     Type *IdxTy = DL.getIndexType(Ptr->getType());
228*82d56013Sjoerg     const SCEV *EltSizeSCEV =
229*82d56013Sjoerg         SE->getStoreSizeOfExpr(IdxTy, Ptr->getType()->getPointerElementType());
2307330f729Sjoerg     ScEnd = SE->getAddExpr(ScEnd, EltSizeSCEV);
2317330f729Sjoerg   }
2327330f729Sjoerg 
2337330f729Sjoerg   Pointers.emplace_back(Ptr, ScStart, ScEnd, WritePtr, DepSetId, ASId, Sc);
2347330f729Sjoerg }
2357330f729Sjoerg 
236*82d56013Sjoerg SmallVector<RuntimePointerCheck, 4>
generateChecks() const2377330f729Sjoerg RuntimePointerChecking::generateChecks() const {
238*82d56013Sjoerg   SmallVector<RuntimePointerCheck, 4> Checks;
2397330f729Sjoerg 
2407330f729Sjoerg   for (unsigned I = 0; I < CheckingGroups.size(); ++I) {
2417330f729Sjoerg     for (unsigned J = I + 1; J < CheckingGroups.size(); ++J) {
242*82d56013Sjoerg       const RuntimeCheckingPtrGroup &CGI = CheckingGroups[I];
243*82d56013Sjoerg       const RuntimeCheckingPtrGroup &CGJ = CheckingGroups[J];
2447330f729Sjoerg 
2457330f729Sjoerg       if (needsChecking(CGI, CGJ))
2467330f729Sjoerg         Checks.push_back(std::make_pair(&CGI, &CGJ));
2477330f729Sjoerg     }
2487330f729Sjoerg   }
2497330f729Sjoerg   return Checks;
2507330f729Sjoerg }
2517330f729Sjoerg 
generateChecks(MemoryDepChecker::DepCandidates & DepCands,bool UseDependencies)2527330f729Sjoerg void RuntimePointerChecking::generateChecks(
2537330f729Sjoerg     MemoryDepChecker::DepCandidates &DepCands, bool UseDependencies) {
2547330f729Sjoerg   assert(Checks.empty() && "Checks is not empty");
2557330f729Sjoerg   groupChecks(DepCands, UseDependencies);
2567330f729Sjoerg   Checks = generateChecks();
2577330f729Sjoerg }
2587330f729Sjoerg 
needsChecking(const RuntimeCheckingPtrGroup & M,const RuntimeCheckingPtrGroup & N) const259*82d56013Sjoerg bool RuntimePointerChecking::needsChecking(
260*82d56013Sjoerg     const RuntimeCheckingPtrGroup &M, const RuntimeCheckingPtrGroup &N) const {
2617330f729Sjoerg   for (unsigned I = 0, EI = M.Members.size(); EI != I; ++I)
2627330f729Sjoerg     for (unsigned J = 0, EJ = N.Members.size(); EJ != J; ++J)
2637330f729Sjoerg       if (needsChecking(M.Members[I], N.Members[J]))
2647330f729Sjoerg         return true;
2657330f729Sjoerg   return false;
2667330f729Sjoerg }
2677330f729Sjoerg 
2687330f729Sjoerg /// Compare \p I and \p J and return the minimum.
2697330f729Sjoerg /// Return nullptr in case we couldn't find an answer.
getMinFromExprs(const SCEV * I,const SCEV * J,ScalarEvolution * SE)2707330f729Sjoerg static const SCEV *getMinFromExprs(const SCEV *I, const SCEV *J,
2717330f729Sjoerg                                    ScalarEvolution *SE) {
2727330f729Sjoerg   const SCEV *Diff = SE->getMinusSCEV(J, I);
2737330f729Sjoerg   const SCEVConstant *C = dyn_cast<const SCEVConstant>(Diff);
2747330f729Sjoerg 
2757330f729Sjoerg   if (!C)
2767330f729Sjoerg     return nullptr;
2777330f729Sjoerg   if (C->getValue()->isNegative())
2787330f729Sjoerg     return J;
2797330f729Sjoerg   return I;
2807330f729Sjoerg }
2817330f729Sjoerg 
addPointer(unsigned Index)282*82d56013Sjoerg bool RuntimeCheckingPtrGroup::addPointer(unsigned Index) {
2837330f729Sjoerg   const SCEV *Start = RtCheck.Pointers[Index].Start;
2847330f729Sjoerg   const SCEV *End = RtCheck.Pointers[Index].End;
2857330f729Sjoerg 
2867330f729Sjoerg   // Compare the starts and ends with the known minimum and maximum
2877330f729Sjoerg   // of this set. We need to know how we compare against the min/max
2887330f729Sjoerg   // of the set in order to be able to emit memchecks.
2897330f729Sjoerg   const SCEV *Min0 = getMinFromExprs(Start, Low, RtCheck.SE);
2907330f729Sjoerg   if (!Min0)
2917330f729Sjoerg     return false;
2927330f729Sjoerg 
2937330f729Sjoerg   const SCEV *Min1 = getMinFromExprs(End, High, RtCheck.SE);
2947330f729Sjoerg   if (!Min1)
2957330f729Sjoerg     return false;
2967330f729Sjoerg 
2977330f729Sjoerg   // Update the low bound  expression if we've found a new min value.
2987330f729Sjoerg   if (Min0 == Start)
2997330f729Sjoerg     Low = Start;
3007330f729Sjoerg 
3017330f729Sjoerg   // Update the high bound expression if we've found a new max value.
3027330f729Sjoerg   if (Min1 != End)
3037330f729Sjoerg     High = End;
3047330f729Sjoerg 
3057330f729Sjoerg   Members.push_back(Index);
3067330f729Sjoerg   return true;
3077330f729Sjoerg }
3087330f729Sjoerg 
groupChecks(MemoryDepChecker::DepCandidates & DepCands,bool UseDependencies)3097330f729Sjoerg void RuntimePointerChecking::groupChecks(
3107330f729Sjoerg     MemoryDepChecker::DepCandidates &DepCands, bool UseDependencies) {
3117330f729Sjoerg   // We build the groups from dependency candidates equivalence classes
3127330f729Sjoerg   // because:
3137330f729Sjoerg   //    - We know that pointers in the same equivalence class share
3147330f729Sjoerg   //      the same underlying object and therefore there is a chance
3157330f729Sjoerg   //      that we can compare pointers
3167330f729Sjoerg   //    - We wouldn't be able to merge two pointers for which we need
3177330f729Sjoerg   //      to emit a memcheck. The classes in DepCands are already
3187330f729Sjoerg   //      conveniently built such that no two pointers in the same
3197330f729Sjoerg   //      class need checking against each other.
3207330f729Sjoerg 
3217330f729Sjoerg   // We use the following (greedy) algorithm to construct the groups
3227330f729Sjoerg   // For every pointer in the equivalence class:
3237330f729Sjoerg   //   For each existing group:
3247330f729Sjoerg   //   - if the difference between this pointer and the min/max bounds
3257330f729Sjoerg   //     of the group is a constant, then make the pointer part of the
3267330f729Sjoerg   //     group and update the min/max bounds of that group as required.
3277330f729Sjoerg 
3287330f729Sjoerg   CheckingGroups.clear();
3297330f729Sjoerg 
3307330f729Sjoerg   // If we need to check two pointers to the same underlying object
3317330f729Sjoerg   // with a non-constant difference, we shouldn't perform any pointer
3327330f729Sjoerg   // grouping with those pointers. This is because we can easily get
3337330f729Sjoerg   // into cases where the resulting check would return false, even when
3347330f729Sjoerg   // the accesses are safe.
3357330f729Sjoerg   //
3367330f729Sjoerg   // The following example shows this:
3377330f729Sjoerg   // for (i = 0; i < 1000; ++i)
3387330f729Sjoerg   //   a[5000 + i * m] = a[i] + a[i + 9000]
3397330f729Sjoerg   //
3407330f729Sjoerg   // Here grouping gives a check of (5000, 5000 + 1000 * m) against
3417330f729Sjoerg   // (0, 10000) which is always false. However, if m is 1, there is no
3427330f729Sjoerg   // dependence. Not grouping the checks for a[i] and a[i + 9000] allows
3437330f729Sjoerg   // us to perform an accurate check in this case.
3447330f729Sjoerg   //
3457330f729Sjoerg   // The above case requires that we have an UnknownDependence between
3467330f729Sjoerg   // accesses to the same underlying object. This cannot happen unless
3477330f729Sjoerg   // FoundNonConstantDistanceDependence is set, and therefore UseDependencies
3487330f729Sjoerg   // is also false. In this case we will use the fallback path and create
3497330f729Sjoerg   // separate checking groups for all pointers.
3507330f729Sjoerg 
3517330f729Sjoerg   // If we don't have the dependency partitions, construct a new
3527330f729Sjoerg   // checking pointer group for each pointer. This is also required
3537330f729Sjoerg   // for correctness, because in this case we can have checking between
3547330f729Sjoerg   // pointers to the same underlying object.
3557330f729Sjoerg   if (!UseDependencies) {
3567330f729Sjoerg     for (unsigned I = 0; I < Pointers.size(); ++I)
357*82d56013Sjoerg       CheckingGroups.push_back(RuntimeCheckingPtrGroup(I, *this));
3587330f729Sjoerg     return;
3597330f729Sjoerg   }
3607330f729Sjoerg 
3617330f729Sjoerg   unsigned TotalComparisons = 0;
3627330f729Sjoerg 
3637330f729Sjoerg   DenseMap<Value *, unsigned> PositionMap;
3647330f729Sjoerg   for (unsigned Index = 0; Index < Pointers.size(); ++Index)
3657330f729Sjoerg     PositionMap[Pointers[Index].PointerValue] = Index;
3667330f729Sjoerg 
3677330f729Sjoerg   // We need to keep track of what pointers we've already seen so we
3687330f729Sjoerg   // don't process them twice.
3697330f729Sjoerg   SmallSet<unsigned, 2> Seen;
3707330f729Sjoerg 
3717330f729Sjoerg   // Go through all equivalence classes, get the "pointer check groups"
3727330f729Sjoerg   // and add them to the overall solution. We use the order in which accesses
3737330f729Sjoerg   // appear in 'Pointers' to enforce determinism.
3747330f729Sjoerg   for (unsigned I = 0; I < Pointers.size(); ++I) {
3757330f729Sjoerg     // We've seen this pointer before, and therefore already processed
3767330f729Sjoerg     // its equivalence class.
3777330f729Sjoerg     if (Seen.count(I))
3787330f729Sjoerg       continue;
3797330f729Sjoerg 
3807330f729Sjoerg     MemoryDepChecker::MemAccessInfo Access(Pointers[I].PointerValue,
3817330f729Sjoerg                                            Pointers[I].IsWritePtr);
3827330f729Sjoerg 
383*82d56013Sjoerg     SmallVector<RuntimeCheckingPtrGroup, 2> Groups;
3847330f729Sjoerg     auto LeaderI = DepCands.findValue(DepCands.getLeaderValue(Access));
3857330f729Sjoerg 
3867330f729Sjoerg     // Because DepCands is constructed by visiting accesses in the order in
3877330f729Sjoerg     // which they appear in alias sets (which is deterministic) and the
3887330f729Sjoerg     // iteration order within an equivalence class member is only dependent on
3897330f729Sjoerg     // the order in which unions and insertions are performed on the
3907330f729Sjoerg     // equivalence class, the iteration order is deterministic.
3917330f729Sjoerg     for (auto MI = DepCands.member_begin(LeaderI), ME = DepCands.member_end();
3927330f729Sjoerg          MI != ME; ++MI) {
393*82d56013Sjoerg       auto PointerI = PositionMap.find(MI->getPointer());
394*82d56013Sjoerg       assert(PointerI != PositionMap.end() &&
395*82d56013Sjoerg              "pointer in equivalence class not found in PositionMap");
396*82d56013Sjoerg       unsigned Pointer = PointerI->second;
3977330f729Sjoerg       bool Merged = false;
3987330f729Sjoerg       // Mark this pointer as seen.
3997330f729Sjoerg       Seen.insert(Pointer);
4007330f729Sjoerg 
4017330f729Sjoerg       // Go through all the existing sets and see if we can find one
4027330f729Sjoerg       // which can include this pointer.
403*82d56013Sjoerg       for (RuntimeCheckingPtrGroup &Group : Groups) {
4047330f729Sjoerg         // Don't perform more than a certain amount of comparisons.
4057330f729Sjoerg         // This should limit the cost of grouping the pointers to something
4067330f729Sjoerg         // reasonable.  If we do end up hitting this threshold, the algorithm
4077330f729Sjoerg         // will create separate groups for all remaining pointers.
4087330f729Sjoerg         if (TotalComparisons > MemoryCheckMergeThreshold)
4097330f729Sjoerg           break;
4107330f729Sjoerg 
4117330f729Sjoerg         TotalComparisons++;
4127330f729Sjoerg 
4137330f729Sjoerg         if (Group.addPointer(Pointer)) {
4147330f729Sjoerg           Merged = true;
4157330f729Sjoerg           break;
4167330f729Sjoerg         }
4177330f729Sjoerg       }
4187330f729Sjoerg 
4197330f729Sjoerg       if (!Merged)
4207330f729Sjoerg         // We couldn't add this pointer to any existing set or the threshold
4217330f729Sjoerg         // for the number of comparisons has been reached. Create a new group
4227330f729Sjoerg         // to hold the current pointer.
423*82d56013Sjoerg         Groups.push_back(RuntimeCheckingPtrGroup(Pointer, *this));
4247330f729Sjoerg     }
4257330f729Sjoerg 
4267330f729Sjoerg     // We've computed the grouped checks for this partition.
4277330f729Sjoerg     // Save the results and continue with the next one.
4287330f729Sjoerg     llvm::copy(Groups, std::back_inserter(CheckingGroups));
4297330f729Sjoerg   }
4307330f729Sjoerg }
4317330f729Sjoerg 
arePointersInSamePartition(const SmallVectorImpl<int> & PtrToPartition,unsigned PtrIdx1,unsigned PtrIdx2)4327330f729Sjoerg bool RuntimePointerChecking::arePointersInSamePartition(
4337330f729Sjoerg     const SmallVectorImpl<int> &PtrToPartition, unsigned PtrIdx1,
4347330f729Sjoerg     unsigned PtrIdx2) {
4357330f729Sjoerg   return (PtrToPartition[PtrIdx1] != -1 &&
4367330f729Sjoerg           PtrToPartition[PtrIdx1] == PtrToPartition[PtrIdx2]);
4377330f729Sjoerg }
4387330f729Sjoerg 
needsChecking(unsigned I,unsigned J) const4397330f729Sjoerg bool RuntimePointerChecking::needsChecking(unsigned I, unsigned J) const {
4407330f729Sjoerg   const PointerInfo &PointerI = Pointers[I];
4417330f729Sjoerg   const PointerInfo &PointerJ = Pointers[J];
4427330f729Sjoerg 
4437330f729Sjoerg   // No need to check if two readonly pointers intersect.
4447330f729Sjoerg   if (!PointerI.IsWritePtr && !PointerJ.IsWritePtr)
4457330f729Sjoerg     return false;
4467330f729Sjoerg 
4477330f729Sjoerg   // Only need to check pointers between two different dependency sets.
4487330f729Sjoerg   if (PointerI.DependencySetId == PointerJ.DependencySetId)
4497330f729Sjoerg     return false;
4507330f729Sjoerg 
4517330f729Sjoerg   // Only need to check pointers in the same alias set.
4527330f729Sjoerg   if (PointerI.AliasSetId != PointerJ.AliasSetId)
4537330f729Sjoerg     return false;
4547330f729Sjoerg 
4557330f729Sjoerg   return true;
4567330f729Sjoerg }
4577330f729Sjoerg 
printChecks(raw_ostream & OS,const SmallVectorImpl<RuntimePointerCheck> & Checks,unsigned Depth) const4587330f729Sjoerg void RuntimePointerChecking::printChecks(
459*82d56013Sjoerg     raw_ostream &OS, const SmallVectorImpl<RuntimePointerCheck> &Checks,
4607330f729Sjoerg     unsigned Depth) const {
4617330f729Sjoerg   unsigned N = 0;
4627330f729Sjoerg   for (const auto &Check : Checks) {
4637330f729Sjoerg     const auto &First = Check.first->Members, &Second = Check.second->Members;
4647330f729Sjoerg 
4657330f729Sjoerg     OS.indent(Depth) << "Check " << N++ << ":\n";
4667330f729Sjoerg 
4677330f729Sjoerg     OS.indent(Depth + 2) << "Comparing group (" << Check.first << "):\n";
4687330f729Sjoerg     for (unsigned K = 0; K < First.size(); ++K)
4697330f729Sjoerg       OS.indent(Depth + 2) << *Pointers[First[K]].PointerValue << "\n";
4707330f729Sjoerg 
4717330f729Sjoerg     OS.indent(Depth + 2) << "Against group (" << Check.second << "):\n";
4727330f729Sjoerg     for (unsigned K = 0; K < Second.size(); ++K)
4737330f729Sjoerg       OS.indent(Depth + 2) << *Pointers[Second[K]].PointerValue << "\n";
4747330f729Sjoerg   }
4757330f729Sjoerg }
4767330f729Sjoerg 
print(raw_ostream & OS,unsigned Depth) const4777330f729Sjoerg void RuntimePointerChecking::print(raw_ostream &OS, unsigned Depth) const {
4787330f729Sjoerg 
4797330f729Sjoerg   OS.indent(Depth) << "Run-time memory checks:\n";
4807330f729Sjoerg   printChecks(OS, Checks, Depth);
4817330f729Sjoerg 
4827330f729Sjoerg   OS.indent(Depth) << "Grouped accesses:\n";
4837330f729Sjoerg   for (unsigned I = 0; I < CheckingGroups.size(); ++I) {
4847330f729Sjoerg     const auto &CG = CheckingGroups[I];
4857330f729Sjoerg 
4867330f729Sjoerg     OS.indent(Depth + 2) << "Group " << &CG << ":\n";
4877330f729Sjoerg     OS.indent(Depth + 4) << "(Low: " << *CG.Low << " High: " << *CG.High
4887330f729Sjoerg                          << ")\n";
4897330f729Sjoerg     for (unsigned J = 0; J < CG.Members.size(); ++J) {
4907330f729Sjoerg       OS.indent(Depth + 6) << "Member: " << *Pointers[CG.Members[J]].Expr
4917330f729Sjoerg                            << "\n";
4927330f729Sjoerg     }
4937330f729Sjoerg   }
4947330f729Sjoerg }
4957330f729Sjoerg 
4967330f729Sjoerg namespace {
4977330f729Sjoerg 
4987330f729Sjoerg /// Analyses memory accesses in a loop.
4997330f729Sjoerg ///
5007330f729Sjoerg /// Checks whether run time pointer checks are needed and builds sets for data
5017330f729Sjoerg /// dependence checking.
5027330f729Sjoerg class AccessAnalysis {
5037330f729Sjoerg public:
5047330f729Sjoerg   /// Read or write access location.
5057330f729Sjoerg   typedef PointerIntPair<Value *, 1, bool> MemAccessInfo;
5067330f729Sjoerg   typedef SmallVector<MemAccessInfo, 8> MemAccessInfoList;
5077330f729Sjoerg 
AccessAnalysis(Loop * TheLoop,AAResults * AA,LoopInfo * LI,MemoryDepChecker::DepCandidates & DA,PredicatedScalarEvolution & PSE)508*82d56013Sjoerg   AccessAnalysis(Loop *TheLoop, AAResults *AA, LoopInfo *LI,
509*82d56013Sjoerg                  MemoryDepChecker::DepCandidates &DA,
5107330f729Sjoerg                  PredicatedScalarEvolution &PSE)
511*82d56013Sjoerg       : TheLoop(TheLoop), AST(*AA), LI(LI), DepCands(DA),
5127330f729Sjoerg         IsRTCheckAnalysisNeeded(false), PSE(PSE) {}
5137330f729Sjoerg 
5147330f729Sjoerg   /// Register a load  and whether it is only read from.
addLoad(MemoryLocation & Loc,bool IsReadOnly)5157330f729Sjoerg   void addLoad(MemoryLocation &Loc, bool IsReadOnly) {
5167330f729Sjoerg     Value *Ptr = const_cast<Value*>(Loc.Ptr);
517*82d56013Sjoerg     AST.add(Ptr, LocationSize::beforeOrAfterPointer(), Loc.AATags);
5187330f729Sjoerg     Accesses.insert(MemAccessInfo(Ptr, false));
5197330f729Sjoerg     if (IsReadOnly)
5207330f729Sjoerg       ReadOnlyPtr.insert(Ptr);
5217330f729Sjoerg   }
5227330f729Sjoerg 
5237330f729Sjoerg   /// Register a store.
addStore(MemoryLocation & Loc)5247330f729Sjoerg   void addStore(MemoryLocation &Loc) {
5257330f729Sjoerg     Value *Ptr = const_cast<Value*>(Loc.Ptr);
526*82d56013Sjoerg     AST.add(Ptr, LocationSize::beforeOrAfterPointer(), Loc.AATags);
5277330f729Sjoerg     Accesses.insert(MemAccessInfo(Ptr, true));
5287330f729Sjoerg   }
5297330f729Sjoerg 
5307330f729Sjoerg   /// Check if we can emit a run-time no-alias check for \p Access.
5317330f729Sjoerg   ///
5327330f729Sjoerg   /// Returns true if we can emit a run-time no alias check for \p Access.
5337330f729Sjoerg   /// If we can check this access, this also adds it to a dependence set and
5347330f729Sjoerg   /// adds a run-time to check for it to \p RtCheck. If \p Assume is true,
5357330f729Sjoerg   /// we will attempt to use additional run-time checks in order to get
5367330f729Sjoerg   /// the bounds of the pointer.
5377330f729Sjoerg   bool createCheckForAccess(RuntimePointerChecking &RtCheck,
5387330f729Sjoerg                             MemAccessInfo Access,
5397330f729Sjoerg                             const ValueToValueMap &Strides,
5407330f729Sjoerg                             DenseMap<Value *, unsigned> &DepSetId,
5417330f729Sjoerg                             Loop *TheLoop, unsigned &RunningDepId,
5427330f729Sjoerg                             unsigned ASId, bool ShouldCheckStride,
5437330f729Sjoerg                             bool Assume);
5447330f729Sjoerg 
5457330f729Sjoerg   /// Check whether we can check the pointers at runtime for
5467330f729Sjoerg   /// non-intersection.
5477330f729Sjoerg   ///
5487330f729Sjoerg   /// Returns true if we need no check or if we do and we can generate them
5497330f729Sjoerg   /// (i.e. the pointers have computable bounds).
5507330f729Sjoerg   bool canCheckPtrAtRT(RuntimePointerChecking &RtCheck, ScalarEvolution *SE,
5517330f729Sjoerg                        Loop *TheLoop, const ValueToValueMap &Strides,
5527330f729Sjoerg                        bool ShouldCheckWrap = false);
5537330f729Sjoerg 
5547330f729Sjoerg   /// Goes over all memory accesses, checks whether a RT check is needed
5557330f729Sjoerg   /// and builds sets of dependent accesses.
buildDependenceSets()5567330f729Sjoerg   void buildDependenceSets() {
5577330f729Sjoerg     processMemAccesses();
5587330f729Sjoerg   }
5597330f729Sjoerg 
5607330f729Sjoerg   /// Initial processing of memory accesses determined that we need to
5617330f729Sjoerg   /// perform dependency checking.
5627330f729Sjoerg   ///
5637330f729Sjoerg   /// Note that this can later be cleared if we retry memcheck analysis without
5647330f729Sjoerg   /// dependency checking (i.e. FoundNonConstantDistanceDependence).
isDependencyCheckNeeded()5657330f729Sjoerg   bool isDependencyCheckNeeded() { return !CheckDeps.empty(); }
5667330f729Sjoerg 
5677330f729Sjoerg   /// We decided that no dependence analysis would be used.  Reset the state.
resetDepChecks(MemoryDepChecker & DepChecker)5687330f729Sjoerg   void resetDepChecks(MemoryDepChecker &DepChecker) {
5697330f729Sjoerg     CheckDeps.clear();
5707330f729Sjoerg     DepChecker.clearDependences();
5717330f729Sjoerg   }
5727330f729Sjoerg 
getDependenciesToCheck()5737330f729Sjoerg   MemAccessInfoList &getDependenciesToCheck() { return CheckDeps; }
5747330f729Sjoerg 
5757330f729Sjoerg private:
5767330f729Sjoerg   typedef SetVector<MemAccessInfo> PtrAccessSet;
5777330f729Sjoerg 
5787330f729Sjoerg   /// Go over all memory access and check whether runtime pointer checks
5797330f729Sjoerg   /// are needed and build sets of dependency check candidates.
5807330f729Sjoerg   void processMemAccesses();
5817330f729Sjoerg 
5827330f729Sjoerg   /// Set of all accesses.
5837330f729Sjoerg   PtrAccessSet Accesses;
5847330f729Sjoerg 
5857330f729Sjoerg   /// The loop being checked.
5867330f729Sjoerg   const Loop *TheLoop;
5877330f729Sjoerg 
5887330f729Sjoerg   /// List of accesses that need a further dependence check.
5897330f729Sjoerg   MemAccessInfoList CheckDeps;
5907330f729Sjoerg 
5917330f729Sjoerg   /// Set of pointers that are read only.
5927330f729Sjoerg   SmallPtrSet<Value*, 16> ReadOnlyPtr;
5937330f729Sjoerg 
5947330f729Sjoerg   /// An alias set tracker to partition the access set by underlying object and
5957330f729Sjoerg   //intrinsic property (such as TBAA metadata).
5967330f729Sjoerg   AliasSetTracker AST;
5977330f729Sjoerg 
5987330f729Sjoerg   LoopInfo *LI;
5997330f729Sjoerg 
6007330f729Sjoerg   /// Sets of potentially dependent accesses - members of one set share an
6017330f729Sjoerg   /// underlying pointer. The set "CheckDeps" identfies which sets really need a
6027330f729Sjoerg   /// dependence check.
6037330f729Sjoerg   MemoryDepChecker::DepCandidates &DepCands;
6047330f729Sjoerg 
6057330f729Sjoerg   /// Initial processing of memory accesses determined that we may need
6067330f729Sjoerg   /// to add memchecks.  Perform the analysis to determine the necessary checks.
6077330f729Sjoerg   ///
6087330f729Sjoerg   /// Note that, this is different from isDependencyCheckNeeded.  When we retry
6097330f729Sjoerg   /// memcheck analysis without dependency checking
6107330f729Sjoerg   /// (i.e. FoundNonConstantDistanceDependence), isDependencyCheckNeeded is
6117330f729Sjoerg   /// cleared while this remains set if we have potentially dependent accesses.
6127330f729Sjoerg   bool IsRTCheckAnalysisNeeded;
6137330f729Sjoerg 
6147330f729Sjoerg   /// The SCEV predicate containing all the SCEV-related assumptions.
6157330f729Sjoerg   PredicatedScalarEvolution &PSE;
6167330f729Sjoerg };
6177330f729Sjoerg 
6187330f729Sjoerg } // end anonymous namespace
6197330f729Sjoerg 
6207330f729Sjoerg /// Check whether a pointer can participate in a runtime bounds check.
6217330f729Sjoerg /// If \p Assume, try harder to prove that we can compute the bounds of \p Ptr
6227330f729Sjoerg /// by adding run-time checks (overflow checks) if necessary.
hasComputableBounds(PredicatedScalarEvolution & PSE,const ValueToValueMap & Strides,Value * Ptr,Loop * L,bool Assume)6237330f729Sjoerg static bool hasComputableBounds(PredicatedScalarEvolution &PSE,
6247330f729Sjoerg                                 const ValueToValueMap &Strides, Value *Ptr,
6257330f729Sjoerg                                 Loop *L, bool Assume) {
6267330f729Sjoerg   const SCEV *PtrScev = replaceSymbolicStrideSCEV(PSE, Strides, Ptr);
6277330f729Sjoerg 
6287330f729Sjoerg   // The bounds for loop-invariant pointer is trivial.
6297330f729Sjoerg   if (PSE.getSE()->isLoopInvariant(PtrScev, L))
6307330f729Sjoerg     return true;
6317330f729Sjoerg 
6327330f729Sjoerg   const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(PtrScev);
6337330f729Sjoerg 
6347330f729Sjoerg   if (!AR && Assume)
6357330f729Sjoerg     AR = PSE.getAsAddRec(Ptr);
6367330f729Sjoerg 
6377330f729Sjoerg   if (!AR)
6387330f729Sjoerg     return false;
6397330f729Sjoerg 
6407330f729Sjoerg   return AR->isAffine();
6417330f729Sjoerg }
6427330f729Sjoerg 
6437330f729Sjoerg /// Check whether a pointer address cannot wrap.
isNoWrap(PredicatedScalarEvolution & PSE,const ValueToValueMap & Strides,Value * Ptr,Loop * L)6447330f729Sjoerg static bool isNoWrap(PredicatedScalarEvolution &PSE,
6457330f729Sjoerg                      const ValueToValueMap &Strides, Value *Ptr, Loop *L) {
6467330f729Sjoerg   const SCEV *PtrScev = PSE.getSCEV(Ptr);
6477330f729Sjoerg   if (PSE.getSE()->isLoopInvariant(PtrScev, L))
6487330f729Sjoerg     return true;
6497330f729Sjoerg 
6507330f729Sjoerg   int64_t Stride = getPtrStride(PSE, Ptr, L, Strides);
6517330f729Sjoerg   if (Stride == 1 || PSE.hasNoOverflow(Ptr, SCEVWrapPredicate::IncrementNUSW))
6527330f729Sjoerg     return true;
6537330f729Sjoerg 
6547330f729Sjoerg   return false;
6557330f729Sjoerg }
6567330f729Sjoerg 
createCheckForAccess(RuntimePointerChecking & RtCheck,MemAccessInfo Access,const ValueToValueMap & StridesMap,DenseMap<Value *,unsigned> & DepSetId,Loop * TheLoop,unsigned & RunningDepId,unsigned ASId,bool ShouldCheckWrap,bool Assume)6577330f729Sjoerg bool AccessAnalysis::createCheckForAccess(RuntimePointerChecking &RtCheck,
6587330f729Sjoerg                                           MemAccessInfo Access,
6597330f729Sjoerg                                           const ValueToValueMap &StridesMap,
6607330f729Sjoerg                                           DenseMap<Value *, unsigned> &DepSetId,
6617330f729Sjoerg                                           Loop *TheLoop, unsigned &RunningDepId,
6627330f729Sjoerg                                           unsigned ASId, bool ShouldCheckWrap,
6637330f729Sjoerg                                           bool Assume) {
6647330f729Sjoerg   Value *Ptr = Access.getPointer();
6657330f729Sjoerg 
6667330f729Sjoerg   if (!hasComputableBounds(PSE, StridesMap, Ptr, TheLoop, Assume))
6677330f729Sjoerg     return false;
6687330f729Sjoerg 
6697330f729Sjoerg   // When we run after a failing dependency check we have to make sure
6707330f729Sjoerg   // we don't have wrapping pointers.
6717330f729Sjoerg   if (ShouldCheckWrap && !isNoWrap(PSE, StridesMap, Ptr, TheLoop)) {
6727330f729Sjoerg     auto *Expr = PSE.getSCEV(Ptr);
6737330f729Sjoerg     if (!Assume || !isa<SCEVAddRecExpr>(Expr))
6747330f729Sjoerg       return false;
6757330f729Sjoerg     PSE.setNoOverflow(Ptr, SCEVWrapPredicate::IncrementNUSW);
6767330f729Sjoerg   }
6777330f729Sjoerg 
6787330f729Sjoerg   // The id of the dependence set.
6797330f729Sjoerg   unsigned DepId;
6807330f729Sjoerg 
6817330f729Sjoerg   if (isDependencyCheckNeeded()) {
6827330f729Sjoerg     Value *Leader = DepCands.getLeaderValue(Access).getPointer();
6837330f729Sjoerg     unsigned &LeaderId = DepSetId[Leader];
6847330f729Sjoerg     if (!LeaderId)
6857330f729Sjoerg       LeaderId = RunningDepId++;
6867330f729Sjoerg     DepId = LeaderId;
6877330f729Sjoerg   } else
6887330f729Sjoerg     // Each access has its own dependence set.
6897330f729Sjoerg     DepId = RunningDepId++;
6907330f729Sjoerg 
6917330f729Sjoerg   bool IsWrite = Access.getInt();
6927330f729Sjoerg   RtCheck.insert(TheLoop, Ptr, IsWrite, DepId, ASId, StridesMap, PSE);
6937330f729Sjoerg   LLVM_DEBUG(dbgs() << "LAA: Found a runtime check ptr:" << *Ptr << '\n');
6947330f729Sjoerg 
6957330f729Sjoerg   return true;
6967330f729Sjoerg  }
6977330f729Sjoerg 
canCheckPtrAtRT(RuntimePointerChecking & RtCheck,ScalarEvolution * SE,Loop * TheLoop,const ValueToValueMap & StridesMap,bool ShouldCheckWrap)6987330f729Sjoerg bool AccessAnalysis::canCheckPtrAtRT(RuntimePointerChecking &RtCheck,
6997330f729Sjoerg                                      ScalarEvolution *SE, Loop *TheLoop,
7007330f729Sjoerg                                      const ValueToValueMap &StridesMap,
7017330f729Sjoerg                                      bool ShouldCheckWrap) {
7027330f729Sjoerg   // Find pointers with computable bounds. We are going to use this information
7037330f729Sjoerg   // to place a runtime bound check.
7047330f729Sjoerg   bool CanDoRT = true;
7057330f729Sjoerg 
706*82d56013Sjoerg   bool MayNeedRTCheck = false;
7077330f729Sjoerg   if (!IsRTCheckAnalysisNeeded) return true;
7087330f729Sjoerg 
7097330f729Sjoerg   bool IsDepCheckNeeded = isDependencyCheckNeeded();
7107330f729Sjoerg 
7117330f729Sjoerg   // We assign a consecutive id to access from different alias sets.
7127330f729Sjoerg   // Accesses between different groups doesn't need to be checked.
713*82d56013Sjoerg   unsigned ASId = 0;
7147330f729Sjoerg   for (auto &AS : AST) {
7157330f729Sjoerg     int NumReadPtrChecks = 0;
7167330f729Sjoerg     int NumWritePtrChecks = 0;
7177330f729Sjoerg     bool CanDoAliasSetRT = true;
718*82d56013Sjoerg     ++ASId;
7197330f729Sjoerg 
7207330f729Sjoerg     // We assign consecutive id to access from different dependence sets.
7217330f729Sjoerg     // Accesses within the same set don't need a runtime check.
7227330f729Sjoerg     unsigned RunningDepId = 1;
7237330f729Sjoerg     DenseMap<Value *, unsigned> DepSetId;
7247330f729Sjoerg 
7257330f729Sjoerg     SmallVector<MemAccessInfo, 4> Retries;
7267330f729Sjoerg 
727*82d56013Sjoerg     // First, count how many write and read accesses are in the alias set. Also
728*82d56013Sjoerg     // collect MemAccessInfos for later.
729*82d56013Sjoerg     SmallVector<MemAccessInfo, 4> AccessInfos;
730*82d56013Sjoerg     for (const auto &A : AS) {
7317330f729Sjoerg       Value *Ptr = A.getValue();
7327330f729Sjoerg       bool IsWrite = Accesses.count(MemAccessInfo(Ptr, true));
7337330f729Sjoerg 
7347330f729Sjoerg       if (IsWrite)
7357330f729Sjoerg         ++NumWritePtrChecks;
7367330f729Sjoerg       else
7377330f729Sjoerg         ++NumReadPtrChecks;
738*82d56013Sjoerg       AccessInfos.emplace_back(Ptr, IsWrite);
739*82d56013Sjoerg     }
7407330f729Sjoerg 
741*82d56013Sjoerg     // We do not need runtime checks for this alias set, if there are no writes
742*82d56013Sjoerg     // or a single write and no reads.
743*82d56013Sjoerg     if (NumWritePtrChecks == 0 ||
744*82d56013Sjoerg         (NumWritePtrChecks == 1 && NumReadPtrChecks == 0)) {
745*82d56013Sjoerg       assert((AS.size() <= 1 ||
746*82d56013Sjoerg               all_of(AS,
747*82d56013Sjoerg                      [this](auto AC) {
748*82d56013Sjoerg                        MemAccessInfo AccessWrite(AC.getValue(), true);
749*82d56013Sjoerg                        return DepCands.findValue(AccessWrite) == DepCands.end();
750*82d56013Sjoerg                      })) &&
751*82d56013Sjoerg              "Can only skip updating CanDoRT below, if all entries in AS "
752*82d56013Sjoerg              "are reads or there is at most 1 entry");
753*82d56013Sjoerg       continue;
754*82d56013Sjoerg     }
755*82d56013Sjoerg 
756*82d56013Sjoerg     for (auto &Access : AccessInfos) {
7577330f729Sjoerg       if (!createCheckForAccess(RtCheck, Access, StridesMap, DepSetId, TheLoop,
7587330f729Sjoerg                                 RunningDepId, ASId, ShouldCheckWrap, false)) {
759*82d56013Sjoerg         LLVM_DEBUG(dbgs() << "LAA: Can't find bounds for ptr:"
760*82d56013Sjoerg                           << *Access.getPointer() << '\n');
7617330f729Sjoerg         Retries.push_back(Access);
7627330f729Sjoerg         CanDoAliasSetRT = false;
7637330f729Sjoerg       }
7647330f729Sjoerg     }
7657330f729Sjoerg 
766*82d56013Sjoerg     // Note that this function computes CanDoRT and MayNeedRTCheck
767*82d56013Sjoerg     // independently. For example CanDoRT=false, MayNeedRTCheck=false means that
768*82d56013Sjoerg     // we have a pointer for which we couldn't find the bounds but we don't
769*82d56013Sjoerg     // actually need to emit any checks so it does not matter.
7707330f729Sjoerg     //
771*82d56013Sjoerg     // We need runtime checks for this alias set, if there are at least 2
772*82d56013Sjoerg     // dependence sets (in which case RunningDepId > 2) or if we need to re-try
773*82d56013Sjoerg     // any bound checks (because in that case the number of dependence sets is
774*82d56013Sjoerg     // incomplete).
775*82d56013Sjoerg     bool NeedsAliasSetRTCheck = RunningDepId > 2 || !Retries.empty();
7767330f729Sjoerg 
7777330f729Sjoerg     // We need to perform run-time alias checks, but some pointers had bounds
7787330f729Sjoerg     // that couldn't be checked.
7797330f729Sjoerg     if (NeedsAliasSetRTCheck && !CanDoAliasSetRT) {
7807330f729Sjoerg       // Reset the CanDoSetRt flag and retry all accesses that have failed.
7817330f729Sjoerg       // We know that we need these checks, so we can now be more aggressive
7827330f729Sjoerg       // and add further checks if required (overflow checks).
7837330f729Sjoerg       CanDoAliasSetRT = true;
7847330f729Sjoerg       for (auto Access : Retries)
7857330f729Sjoerg         if (!createCheckForAccess(RtCheck, Access, StridesMap, DepSetId,
7867330f729Sjoerg                                   TheLoop, RunningDepId, ASId,
7877330f729Sjoerg                                   ShouldCheckWrap, /*Assume=*/true)) {
7887330f729Sjoerg           CanDoAliasSetRT = false;
7897330f729Sjoerg           break;
7907330f729Sjoerg         }
7917330f729Sjoerg     }
7927330f729Sjoerg 
7937330f729Sjoerg     CanDoRT &= CanDoAliasSetRT;
794*82d56013Sjoerg     MayNeedRTCheck |= NeedsAliasSetRTCheck;
7957330f729Sjoerg     ++ASId;
7967330f729Sjoerg   }
7977330f729Sjoerg 
7987330f729Sjoerg   // If the pointers that we would use for the bounds comparison have different
7997330f729Sjoerg   // address spaces, assume the values aren't directly comparable, so we can't
8007330f729Sjoerg   // use them for the runtime check. We also have to assume they could
8017330f729Sjoerg   // overlap. In the future there should be metadata for whether address spaces
8027330f729Sjoerg   // are disjoint.
8037330f729Sjoerg   unsigned NumPointers = RtCheck.Pointers.size();
8047330f729Sjoerg   for (unsigned i = 0; i < NumPointers; ++i) {
8057330f729Sjoerg     for (unsigned j = i + 1; j < NumPointers; ++j) {
8067330f729Sjoerg       // Only need to check pointers between two different dependency sets.
8077330f729Sjoerg       if (RtCheck.Pointers[i].DependencySetId ==
8087330f729Sjoerg           RtCheck.Pointers[j].DependencySetId)
8097330f729Sjoerg        continue;
8107330f729Sjoerg       // Only need to check pointers in the same alias set.
8117330f729Sjoerg       if (RtCheck.Pointers[i].AliasSetId != RtCheck.Pointers[j].AliasSetId)
8127330f729Sjoerg         continue;
8137330f729Sjoerg 
8147330f729Sjoerg       Value *PtrI = RtCheck.Pointers[i].PointerValue;
8157330f729Sjoerg       Value *PtrJ = RtCheck.Pointers[j].PointerValue;
8167330f729Sjoerg 
8177330f729Sjoerg       unsigned ASi = PtrI->getType()->getPointerAddressSpace();
8187330f729Sjoerg       unsigned ASj = PtrJ->getType()->getPointerAddressSpace();
8197330f729Sjoerg       if (ASi != ASj) {
8207330f729Sjoerg         LLVM_DEBUG(
8217330f729Sjoerg             dbgs() << "LAA: Runtime check would require comparison between"
8227330f729Sjoerg                       " different address spaces\n");
8237330f729Sjoerg         return false;
8247330f729Sjoerg       }
8257330f729Sjoerg     }
8267330f729Sjoerg   }
8277330f729Sjoerg 
828*82d56013Sjoerg   if (MayNeedRTCheck && CanDoRT)
8297330f729Sjoerg     RtCheck.generateChecks(DepCands, IsDepCheckNeeded);
8307330f729Sjoerg 
8317330f729Sjoerg   LLVM_DEBUG(dbgs() << "LAA: We need to do " << RtCheck.getNumberOfChecks()
8327330f729Sjoerg                     << " pointer comparisons.\n");
8337330f729Sjoerg 
834*82d56013Sjoerg   // If we can do run-time checks, but there are no checks, no runtime checks
835*82d56013Sjoerg   // are needed. This can happen when all pointers point to the same underlying
836*82d56013Sjoerg   // object for example.
837*82d56013Sjoerg   RtCheck.Need = CanDoRT ? RtCheck.getNumberOfChecks() != 0 : MayNeedRTCheck;
8387330f729Sjoerg 
839*82d56013Sjoerg   bool CanDoRTIfNeeded = !RtCheck.Need || CanDoRT;
8407330f729Sjoerg   if (!CanDoRTIfNeeded)
8417330f729Sjoerg     RtCheck.reset();
8427330f729Sjoerg   return CanDoRTIfNeeded;
8437330f729Sjoerg }
8447330f729Sjoerg 
processMemAccesses()8457330f729Sjoerg void AccessAnalysis::processMemAccesses() {
8467330f729Sjoerg   // We process the set twice: first we process read-write pointers, last we
8477330f729Sjoerg   // process read-only pointers. This allows us to skip dependence tests for
8487330f729Sjoerg   // read-only pointers.
8497330f729Sjoerg 
8507330f729Sjoerg   LLVM_DEBUG(dbgs() << "LAA: Processing memory accesses...\n");
8517330f729Sjoerg   LLVM_DEBUG(dbgs() << "  AST: "; AST.dump());
8527330f729Sjoerg   LLVM_DEBUG(dbgs() << "LAA:   Accesses(" << Accesses.size() << "):\n");
8537330f729Sjoerg   LLVM_DEBUG({
8547330f729Sjoerg     for (auto A : Accesses)
8557330f729Sjoerg       dbgs() << "\t" << *A.getPointer() << " (" <<
8567330f729Sjoerg                 (A.getInt() ? "write" : (ReadOnlyPtr.count(A.getPointer()) ?
8577330f729Sjoerg                                          "read-only" : "read")) << ")\n";
8587330f729Sjoerg   });
8597330f729Sjoerg 
8607330f729Sjoerg   // The AliasSetTracker has nicely partitioned our pointers by metadata
8617330f729Sjoerg   // compatibility and potential for underlying-object overlap. As a result, we
8627330f729Sjoerg   // only need to check for potential pointer dependencies within each alias
8637330f729Sjoerg   // set.
864*82d56013Sjoerg   for (const auto &AS : AST) {
8657330f729Sjoerg     // Note that both the alias-set tracker and the alias sets themselves used
8667330f729Sjoerg     // linked lists internally and so the iteration order here is deterministic
8677330f729Sjoerg     // (matching the original instruction order within each set).
8687330f729Sjoerg 
8697330f729Sjoerg     bool SetHasWrite = false;
8707330f729Sjoerg 
8717330f729Sjoerg     // Map of pointers to last access encountered.
8727330f729Sjoerg     typedef DenseMap<const Value*, MemAccessInfo> UnderlyingObjToAccessMap;
8737330f729Sjoerg     UnderlyingObjToAccessMap ObjToLastAccess;
8747330f729Sjoerg 
8757330f729Sjoerg     // Set of access to check after all writes have been processed.
8767330f729Sjoerg     PtrAccessSet DeferredAccesses;
8777330f729Sjoerg 
8787330f729Sjoerg     // Iterate over each alias set twice, once to process read/write pointers,
8797330f729Sjoerg     // and then to process read-only pointers.
8807330f729Sjoerg     for (int SetIteration = 0; SetIteration < 2; ++SetIteration) {
8817330f729Sjoerg       bool UseDeferred = SetIteration > 0;
8827330f729Sjoerg       PtrAccessSet &S = UseDeferred ? DeferredAccesses : Accesses;
8837330f729Sjoerg 
884*82d56013Sjoerg       for (const auto &AV : AS) {
8857330f729Sjoerg         Value *Ptr = AV.getValue();
8867330f729Sjoerg 
8877330f729Sjoerg         // For a single memory access in AliasSetTracker, Accesses may contain
8887330f729Sjoerg         // both read and write, and they both need to be handled for CheckDeps.
889*82d56013Sjoerg         for (const auto &AC : S) {
8907330f729Sjoerg           if (AC.getPointer() != Ptr)
8917330f729Sjoerg             continue;
8927330f729Sjoerg 
8937330f729Sjoerg           bool IsWrite = AC.getInt();
8947330f729Sjoerg 
8957330f729Sjoerg           // If we're using the deferred access set, then it contains only
8967330f729Sjoerg           // reads.
8977330f729Sjoerg           bool IsReadOnlyPtr = ReadOnlyPtr.count(Ptr) && !IsWrite;
8987330f729Sjoerg           if (UseDeferred && !IsReadOnlyPtr)
8997330f729Sjoerg             continue;
9007330f729Sjoerg           // Otherwise, the pointer must be in the PtrAccessSet, either as a
9017330f729Sjoerg           // read or a write.
9027330f729Sjoerg           assert(((IsReadOnlyPtr && UseDeferred) || IsWrite ||
9037330f729Sjoerg                   S.count(MemAccessInfo(Ptr, false))) &&
9047330f729Sjoerg                  "Alias-set pointer not in the access set?");
9057330f729Sjoerg 
9067330f729Sjoerg           MemAccessInfo Access(Ptr, IsWrite);
9077330f729Sjoerg           DepCands.insert(Access);
9087330f729Sjoerg 
9097330f729Sjoerg           // Memorize read-only pointers for later processing and skip them in
9107330f729Sjoerg           // the first round (they need to be checked after we have seen all
9117330f729Sjoerg           // write pointers). Note: we also mark pointer that are not
9127330f729Sjoerg           // consecutive as "read-only" pointers (so that we check
9137330f729Sjoerg           // "a[b[i]] +="). Hence, we need the second check for "!IsWrite".
9147330f729Sjoerg           if (!UseDeferred && IsReadOnlyPtr) {
9157330f729Sjoerg             DeferredAccesses.insert(Access);
9167330f729Sjoerg             continue;
9177330f729Sjoerg           }
9187330f729Sjoerg 
9197330f729Sjoerg           // If this is a write - check other reads and writes for conflicts. If
9207330f729Sjoerg           // this is a read only check other writes for conflicts (but only if
9217330f729Sjoerg           // there is no other write to the ptr - this is an optimization to
9227330f729Sjoerg           // catch "a[i] = a[i] + " without having to do a dependence check).
9237330f729Sjoerg           if ((IsWrite || IsReadOnlyPtr) && SetHasWrite) {
9247330f729Sjoerg             CheckDeps.push_back(Access);
9257330f729Sjoerg             IsRTCheckAnalysisNeeded = true;
9267330f729Sjoerg           }
9277330f729Sjoerg 
9287330f729Sjoerg           if (IsWrite)
9297330f729Sjoerg             SetHasWrite = true;
9307330f729Sjoerg 
9317330f729Sjoerg           // Create sets of pointers connected by a shared alias set and
9327330f729Sjoerg           // underlying object.
9337330f729Sjoerg           typedef SmallVector<const Value *, 16> ValueVector;
9347330f729Sjoerg           ValueVector TempObjects;
9357330f729Sjoerg 
936*82d56013Sjoerg           getUnderlyingObjects(Ptr, TempObjects, LI);
9377330f729Sjoerg           LLVM_DEBUG(dbgs()
9387330f729Sjoerg                      << "Underlying objects for pointer " << *Ptr << "\n");
9397330f729Sjoerg           for (const Value *UnderlyingObj : TempObjects) {
9407330f729Sjoerg             // nullptr never alias, don't join sets for pointer that have "null"
9417330f729Sjoerg             // in their UnderlyingObjects list.
9427330f729Sjoerg             if (isa<ConstantPointerNull>(UnderlyingObj) &&
9437330f729Sjoerg                 !NullPointerIsDefined(
9447330f729Sjoerg                     TheLoop->getHeader()->getParent(),
9457330f729Sjoerg                     UnderlyingObj->getType()->getPointerAddressSpace()))
9467330f729Sjoerg               continue;
9477330f729Sjoerg 
9487330f729Sjoerg             UnderlyingObjToAccessMap::iterator Prev =
9497330f729Sjoerg                 ObjToLastAccess.find(UnderlyingObj);
9507330f729Sjoerg             if (Prev != ObjToLastAccess.end())
9517330f729Sjoerg               DepCands.unionSets(Access, Prev->second);
9527330f729Sjoerg 
9537330f729Sjoerg             ObjToLastAccess[UnderlyingObj] = Access;
9547330f729Sjoerg             LLVM_DEBUG(dbgs() << "  " << *UnderlyingObj << "\n");
9557330f729Sjoerg           }
9567330f729Sjoerg         }
9577330f729Sjoerg       }
9587330f729Sjoerg     }
9597330f729Sjoerg   }
9607330f729Sjoerg }
9617330f729Sjoerg 
isInBoundsGep(Value * Ptr)9627330f729Sjoerg static bool isInBoundsGep(Value *Ptr) {
9637330f729Sjoerg   if (GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(Ptr))
9647330f729Sjoerg     return GEP->isInBounds();
9657330f729Sjoerg   return false;
9667330f729Sjoerg }
9677330f729Sjoerg 
9687330f729Sjoerg /// Return true if an AddRec pointer \p Ptr is unsigned non-wrapping,
9697330f729Sjoerg /// i.e. monotonically increasing/decreasing.
isNoWrapAddRec(Value * Ptr,const SCEVAddRecExpr * AR,PredicatedScalarEvolution & PSE,const Loop * L)9707330f729Sjoerg static bool isNoWrapAddRec(Value *Ptr, const SCEVAddRecExpr *AR,
9717330f729Sjoerg                            PredicatedScalarEvolution &PSE, const Loop *L) {
9727330f729Sjoerg   // FIXME: This should probably only return true for NUW.
9737330f729Sjoerg   if (AR->getNoWrapFlags(SCEV::NoWrapMask))
9747330f729Sjoerg     return true;
9757330f729Sjoerg 
9767330f729Sjoerg   // Scalar evolution does not propagate the non-wrapping flags to values that
9777330f729Sjoerg   // are derived from a non-wrapping induction variable because non-wrapping
9787330f729Sjoerg   // could be flow-sensitive.
9797330f729Sjoerg   //
9807330f729Sjoerg   // Look through the potentially overflowing instruction to try to prove
9817330f729Sjoerg   // non-wrapping for the *specific* value of Ptr.
9827330f729Sjoerg 
9837330f729Sjoerg   // The arithmetic implied by an inbounds GEP can't overflow.
9847330f729Sjoerg   auto *GEP = dyn_cast<GetElementPtrInst>(Ptr);
9857330f729Sjoerg   if (!GEP || !GEP->isInBounds())
9867330f729Sjoerg     return false;
9877330f729Sjoerg 
9887330f729Sjoerg   // Make sure there is only one non-const index and analyze that.
9897330f729Sjoerg   Value *NonConstIndex = nullptr;
990*82d56013Sjoerg   for (Value *Index : GEP->indices())
9917330f729Sjoerg     if (!isa<ConstantInt>(Index)) {
9927330f729Sjoerg       if (NonConstIndex)
9937330f729Sjoerg         return false;
9947330f729Sjoerg       NonConstIndex = Index;
9957330f729Sjoerg     }
9967330f729Sjoerg   if (!NonConstIndex)
9977330f729Sjoerg     // The recurrence is on the pointer, ignore for now.
9987330f729Sjoerg     return false;
9997330f729Sjoerg 
10007330f729Sjoerg   // The index in GEP is signed.  It is non-wrapping if it's derived from a NSW
10017330f729Sjoerg   // AddRec using a NSW operation.
10027330f729Sjoerg   if (auto *OBO = dyn_cast<OverflowingBinaryOperator>(NonConstIndex))
10037330f729Sjoerg     if (OBO->hasNoSignedWrap() &&
10047330f729Sjoerg         // Assume constant for other the operand so that the AddRec can be
10057330f729Sjoerg         // easily found.
10067330f729Sjoerg         isa<ConstantInt>(OBO->getOperand(1))) {
10077330f729Sjoerg       auto *OpScev = PSE.getSCEV(OBO->getOperand(0));
10087330f729Sjoerg 
10097330f729Sjoerg       if (auto *OpAR = dyn_cast<SCEVAddRecExpr>(OpScev))
10107330f729Sjoerg         return OpAR->getLoop() == L && OpAR->getNoWrapFlags(SCEV::FlagNSW);
10117330f729Sjoerg     }
10127330f729Sjoerg 
10137330f729Sjoerg   return false;
10147330f729Sjoerg }
10157330f729Sjoerg 
10167330f729Sjoerg /// Check whether the access through \p Ptr has a constant stride.
getPtrStride(PredicatedScalarEvolution & PSE,Value * Ptr,const Loop * Lp,const ValueToValueMap & StridesMap,bool Assume,bool ShouldCheckWrap)10177330f729Sjoerg int64_t llvm::getPtrStride(PredicatedScalarEvolution &PSE, Value *Ptr,
10187330f729Sjoerg                            const Loop *Lp, const ValueToValueMap &StridesMap,
10197330f729Sjoerg                            bool Assume, bool ShouldCheckWrap) {
10207330f729Sjoerg   Type *Ty = Ptr->getType();
10217330f729Sjoerg   assert(Ty->isPointerTy() && "Unexpected non-ptr");
10227330f729Sjoerg 
10237330f729Sjoerg   // Make sure that the pointer does not point to aggregate types.
10247330f729Sjoerg   auto *PtrTy = cast<PointerType>(Ty);
10257330f729Sjoerg   if (PtrTy->getElementType()->isAggregateType()) {
10267330f729Sjoerg     LLVM_DEBUG(dbgs() << "LAA: Bad stride - Not a pointer to a scalar type"
10277330f729Sjoerg                       << *Ptr << "\n");
10287330f729Sjoerg     return 0;
10297330f729Sjoerg   }
10307330f729Sjoerg 
10317330f729Sjoerg   const SCEV *PtrScev = replaceSymbolicStrideSCEV(PSE, StridesMap, Ptr);
10327330f729Sjoerg 
10337330f729Sjoerg   const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(PtrScev);
10347330f729Sjoerg   if (Assume && !AR)
10357330f729Sjoerg     AR = PSE.getAsAddRec(Ptr);
10367330f729Sjoerg 
10377330f729Sjoerg   if (!AR) {
10387330f729Sjoerg     LLVM_DEBUG(dbgs() << "LAA: Bad stride - Not an AddRecExpr pointer " << *Ptr
10397330f729Sjoerg                       << " SCEV: " << *PtrScev << "\n");
10407330f729Sjoerg     return 0;
10417330f729Sjoerg   }
10427330f729Sjoerg 
10437330f729Sjoerg   // The access function must stride over the innermost loop.
10447330f729Sjoerg   if (Lp != AR->getLoop()) {
10457330f729Sjoerg     LLVM_DEBUG(dbgs() << "LAA: Bad stride - Not striding over innermost loop "
10467330f729Sjoerg                       << *Ptr << " SCEV: " << *AR << "\n");
10477330f729Sjoerg     return 0;
10487330f729Sjoerg   }
10497330f729Sjoerg 
10507330f729Sjoerg   // The address calculation must not wrap. Otherwise, a dependence could be
10517330f729Sjoerg   // inverted.
10527330f729Sjoerg   // An inbounds getelementptr that is a AddRec with a unit stride
10537330f729Sjoerg   // cannot wrap per definition. The unit stride requirement is checked later.
10547330f729Sjoerg   // An getelementptr without an inbounds attribute and unit stride would have
10557330f729Sjoerg   // to access the pointer value "0" which is undefined behavior in address
10567330f729Sjoerg   // space 0, therefore we can also vectorize this case.
10577330f729Sjoerg   bool IsInBoundsGEP = isInBoundsGep(Ptr);
10587330f729Sjoerg   bool IsNoWrapAddRec = !ShouldCheckWrap ||
10597330f729Sjoerg     PSE.hasNoOverflow(Ptr, SCEVWrapPredicate::IncrementNUSW) ||
10607330f729Sjoerg     isNoWrapAddRec(Ptr, AR, PSE, Lp);
10617330f729Sjoerg   if (!IsNoWrapAddRec && !IsInBoundsGEP &&
10627330f729Sjoerg       NullPointerIsDefined(Lp->getHeader()->getParent(),
10637330f729Sjoerg                            PtrTy->getAddressSpace())) {
10647330f729Sjoerg     if (Assume) {
10657330f729Sjoerg       PSE.setNoOverflow(Ptr, SCEVWrapPredicate::IncrementNUSW);
10667330f729Sjoerg       IsNoWrapAddRec = true;
10677330f729Sjoerg       LLVM_DEBUG(dbgs() << "LAA: Pointer may wrap in the address space:\n"
10687330f729Sjoerg                         << "LAA:   Pointer: " << *Ptr << "\n"
10697330f729Sjoerg                         << "LAA:   SCEV: " << *AR << "\n"
10707330f729Sjoerg                         << "LAA:   Added an overflow assumption\n");
10717330f729Sjoerg     } else {
10727330f729Sjoerg       LLVM_DEBUG(
10737330f729Sjoerg           dbgs() << "LAA: Bad stride - Pointer may wrap in the address space "
10747330f729Sjoerg                  << *Ptr << " SCEV: " << *AR << "\n");
10757330f729Sjoerg       return 0;
10767330f729Sjoerg     }
10777330f729Sjoerg   }
10787330f729Sjoerg 
10797330f729Sjoerg   // Check the step is constant.
10807330f729Sjoerg   const SCEV *Step = AR->getStepRecurrence(*PSE.getSE());
10817330f729Sjoerg 
10827330f729Sjoerg   // Calculate the pointer stride and check if it is constant.
10837330f729Sjoerg   const SCEVConstant *C = dyn_cast<SCEVConstant>(Step);
10847330f729Sjoerg   if (!C) {
10857330f729Sjoerg     LLVM_DEBUG(dbgs() << "LAA: Bad stride - Not a constant strided " << *Ptr
10867330f729Sjoerg                       << " SCEV: " << *AR << "\n");
10877330f729Sjoerg     return 0;
10887330f729Sjoerg   }
10897330f729Sjoerg 
10907330f729Sjoerg   auto &DL = Lp->getHeader()->getModule()->getDataLayout();
10917330f729Sjoerg   int64_t Size = DL.getTypeAllocSize(PtrTy->getElementType());
10927330f729Sjoerg   const APInt &APStepVal = C->getAPInt();
10937330f729Sjoerg 
10947330f729Sjoerg   // Huge step value - give up.
10957330f729Sjoerg   if (APStepVal.getBitWidth() > 64)
10967330f729Sjoerg     return 0;
10977330f729Sjoerg 
10987330f729Sjoerg   int64_t StepVal = APStepVal.getSExtValue();
10997330f729Sjoerg 
11007330f729Sjoerg   // Strided access.
11017330f729Sjoerg   int64_t Stride = StepVal / Size;
11027330f729Sjoerg   int64_t Rem = StepVal % Size;
11037330f729Sjoerg   if (Rem)
11047330f729Sjoerg     return 0;
11057330f729Sjoerg 
11067330f729Sjoerg   // If the SCEV could wrap but we have an inbounds gep with a unit stride we
11077330f729Sjoerg   // know we can't "wrap around the address space". In case of address space
11087330f729Sjoerg   // zero we know that this won't happen without triggering undefined behavior.
11097330f729Sjoerg   if (!IsNoWrapAddRec && Stride != 1 && Stride != -1 &&
11107330f729Sjoerg       (IsInBoundsGEP || !NullPointerIsDefined(Lp->getHeader()->getParent(),
11117330f729Sjoerg                                               PtrTy->getAddressSpace()))) {
11127330f729Sjoerg     if (Assume) {
11137330f729Sjoerg       // We can avoid this case by adding a run-time check.
11147330f729Sjoerg       LLVM_DEBUG(dbgs() << "LAA: Non unit strided pointer which is not either "
11157330f729Sjoerg                         << "inbounds or in address space 0 may wrap:\n"
11167330f729Sjoerg                         << "LAA:   Pointer: " << *Ptr << "\n"
11177330f729Sjoerg                         << "LAA:   SCEV: " << *AR << "\n"
11187330f729Sjoerg                         << "LAA:   Added an overflow assumption\n");
11197330f729Sjoerg       PSE.setNoOverflow(Ptr, SCEVWrapPredicate::IncrementNUSW);
11207330f729Sjoerg     } else
11217330f729Sjoerg       return 0;
11227330f729Sjoerg   }
11237330f729Sjoerg 
11247330f729Sjoerg   return Stride;
11257330f729Sjoerg }
11267330f729Sjoerg 
getPointersDiff(Value * PtrA,Value * PtrB,const DataLayout & DL,ScalarEvolution & SE,bool StrictCheck,bool CheckType)1127*82d56013Sjoerg Optional<int> llvm::getPointersDiff(Value *PtrA, Value *PtrB,
1128*82d56013Sjoerg                                     const DataLayout &DL, ScalarEvolution &SE,
1129*82d56013Sjoerg                                     bool StrictCheck, bool CheckType) {
1130*82d56013Sjoerg   assert(PtrA && PtrB && "Expected non-nullptr pointers.");
1131*82d56013Sjoerg   // Make sure that A and B are different pointers.
1132*82d56013Sjoerg   if (PtrA == PtrB)
1133*82d56013Sjoerg     return 0;
1134*82d56013Sjoerg 
1135*82d56013Sjoerg   // Make sure that PtrA and PtrB have the same type if required
1136*82d56013Sjoerg   if (CheckType && PtrA->getType() != PtrB->getType())
1137*82d56013Sjoerg     return None;
1138*82d56013Sjoerg 
1139*82d56013Sjoerg   unsigned ASA = PtrA->getType()->getPointerAddressSpace();
1140*82d56013Sjoerg   unsigned ASB = PtrB->getType()->getPointerAddressSpace();
1141*82d56013Sjoerg 
1142*82d56013Sjoerg   // Check that the address spaces match.
1143*82d56013Sjoerg   if (ASA != ASB)
1144*82d56013Sjoerg     return None;
1145*82d56013Sjoerg   unsigned IdxWidth = DL.getIndexSizeInBits(ASA);
1146*82d56013Sjoerg 
1147*82d56013Sjoerg   APInt OffsetA(IdxWidth, 0), OffsetB(IdxWidth, 0);
1148*82d56013Sjoerg   Value *PtrA1 = PtrA->stripAndAccumulateInBoundsConstantOffsets(DL, OffsetA);
1149*82d56013Sjoerg   Value *PtrB1 = PtrB->stripAndAccumulateInBoundsConstantOffsets(DL, OffsetB);
1150*82d56013Sjoerg 
1151*82d56013Sjoerg   int Val;
1152*82d56013Sjoerg   if (PtrA1 == PtrB1) {
1153*82d56013Sjoerg     // Retrieve the address space again as pointer stripping now tracks through
1154*82d56013Sjoerg     // `addrspacecast`.
1155*82d56013Sjoerg     ASA = cast<PointerType>(PtrA1->getType())->getAddressSpace();
1156*82d56013Sjoerg     ASB = cast<PointerType>(PtrB1->getType())->getAddressSpace();
1157*82d56013Sjoerg     // Check that the address spaces match and that the pointers are valid.
1158*82d56013Sjoerg     if (ASA != ASB)
1159*82d56013Sjoerg       return None;
1160*82d56013Sjoerg 
1161*82d56013Sjoerg     IdxWidth = DL.getIndexSizeInBits(ASA);
1162*82d56013Sjoerg     OffsetA = OffsetA.sextOrTrunc(IdxWidth);
1163*82d56013Sjoerg     OffsetB = OffsetB.sextOrTrunc(IdxWidth);
1164*82d56013Sjoerg 
1165*82d56013Sjoerg     OffsetB -= OffsetA;
1166*82d56013Sjoerg     Val = OffsetB.getSExtValue();
1167*82d56013Sjoerg   } else {
1168*82d56013Sjoerg     // Otherwise compute the distance with SCEV between the base pointers.
1169*82d56013Sjoerg     const SCEV *PtrSCEVA = SE.getSCEV(PtrA);
1170*82d56013Sjoerg     const SCEV *PtrSCEVB = SE.getSCEV(PtrB);
1171*82d56013Sjoerg     const auto *Diff =
1172*82d56013Sjoerg         dyn_cast<SCEVConstant>(SE.getMinusSCEV(PtrSCEVB, PtrSCEVA));
1173*82d56013Sjoerg     if (!Diff)
1174*82d56013Sjoerg       return None;
1175*82d56013Sjoerg     Val = Diff->getAPInt().getSExtValue();
1176*82d56013Sjoerg   }
1177*82d56013Sjoerg   Type *Ty = cast<PointerType>(PtrA->getType())->getElementType();
1178*82d56013Sjoerg   int Size = DL.getTypeStoreSize(Ty);
1179*82d56013Sjoerg   int Dist = Val / Size;
1180*82d56013Sjoerg 
1181*82d56013Sjoerg   // Ensure that the calculated distance matches the type-based one after all
1182*82d56013Sjoerg   // the bitcasts removal in the provided pointers.
1183*82d56013Sjoerg   if (!StrictCheck || Dist * Size == Val)
1184*82d56013Sjoerg     return Dist;
1185*82d56013Sjoerg   return None;
1186*82d56013Sjoerg }
1187*82d56013Sjoerg 
sortPtrAccesses(ArrayRef<Value * > VL,const DataLayout & DL,ScalarEvolution & SE,SmallVectorImpl<unsigned> & SortedIndices)11887330f729Sjoerg bool llvm::sortPtrAccesses(ArrayRef<Value *> VL, const DataLayout &DL,
11897330f729Sjoerg                            ScalarEvolution &SE,
11907330f729Sjoerg                            SmallVectorImpl<unsigned> &SortedIndices) {
11917330f729Sjoerg   assert(llvm::all_of(
11927330f729Sjoerg              VL, [](const Value *V) { return V->getType()->isPointerTy(); }) &&
11937330f729Sjoerg          "Expected list of pointer operands.");
11947330f729Sjoerg   // Walk over the pointers, and map each of them to an offset relative to
11957330f729Sjoerg   // first pointer in the array.
11967330f729Sjoerg   Value *Ptr0 = VL[0];
11977330f729Sjoerg 
1198*82d56013Sjoerg   using DistOrdPair = std::pair<int64_t, int>;
1199*82d56013Sjoerg   auto Compare = [](const DistOrdPair &L, const DistOrdPair &R) {
1200*82d56013Sjoerg     return L.first < R.first;
1201*82d56013Sjoerg   };
1202*82d56013Sjoerg   std::set<DistOrdPair, decltype(Compare)> Offsets(Compare);
1203*82d56013Sjoerg   Offsets.emplace(0, 0);
1204*82d56013Sjoerg   int Cnt = 1;
1205*82d56013Sjoerg   bool IsConsecutive = true;
1206*82d56013Sjoerg   for (auto *Ptr : VL.drop_front()) {
1207*82d56013Sjoerg     Optional<int> Diff =
1208*82d56013Sjoerg         getPointersDiff(Ptr0, Ptr, DL, SE, /*StrictCheck=*/true);
12097330f729Sjoerg     if (!Diff)
12107330f729Sjoerg       return false;
12117330f729Sjoerg 
12127330f729Sjoerg     // Check if the pointer with the same offset is found.
1213*82d56013Sjoerg     int64_t Offset = *Diff;
1214*82d56013Sjoerg     auto Res = Offsets.emplace(Offset, Cnt);
1215*82d56013Sjoerg     if (!Res.second)
12167330f729Sjoerg       return false;
1217*82d56013Sjoerg     // Consecutive order if the inserted element is the last one.
1218*82d56013Sjoerg     IsConsecutive = IsConsecutive && std::next(Res.first) == Offsets.end();
1219*82d56013Sjoerg     ++Cnt;
12207330f729Sjoerg   }
12217330f729Sjoerg   SortedIndices.clear();
1222*82d56013Sjoerg   if (!IsConsecutive) {
1223*82d56013Sjoerg     // Fill SortedIndices array only if it is non-consecutive.
12247330f729Sjoerg     SortedIndices.resize(VL.size());
1225*82d56013Sjoerg     Cnt = 0;
1226*82d56013Sjoerg     for (const std::pair<int64_t, int> &Pair : Offsets) {
1227*82d56013Sjoerg       SortedIndices[Cnt] = Pair.second;
1228*82d56013Sjoerg       ++Cnt;
12297330f729Sjoerg     }
1230*82d56013Sjoerg   }
1231*82d56013Sjoerg   return true;
12327330f729Sjoerg }
12337330f729Sjoerg 
12347330f729Sjoerg /// Returns true if the memory operations \p A and \p B are consecutive.
isConsecutiveAccess(Value * A,Value * B,const DataLayout & DL,ScalarEvolution & SE,bool CheckType)12357330f729Sjoerg bool llvm::isConsecutiveAccess(Value *A, Value *B, const DataLayout &DL,
12367330f729Sjoerg                                ScalarEvolution &SE, bool CheckType) {
12377330f729Sjoerg   Value *PtrA = getLoadStorePointerOperand(A);
12387330f729Sjoerg   Value *PtrB = getLoadStorePointerOperand(B);
1239*82d56013Sjoerg   if (!PtrA || !PtrB)
12407330f729Sjoerg     return false;
1241*82d56013Sjoerg   Optional<int> Diff =
1242*82d56013Sjoerg       getPointersDiff(PtrA, PtrB, DL, SE, /*StrictCheck=*/true, CheckType);
1243*82d56013Sjoerg   return Diff && *Diff == 1;
12447330f729Sjoerg }
12457330f729Sjoerg 
12467330f729Sjoerg MemoryDepChecker::VectorizationSafetyStatus
isSafeForVectorization(DepType Type)12477330f729Sjoerg MemoryDepChecker::Dependence::isSafeForVectorization(DepType Type) {
12487330f729Sjoerg   switch (Type) {
12497330f729Sjoerg   case NoDep:
12507330f729Sjoerg   case Forward:
12517330f729Sjoerg   case BackwardVectorizable:
12527330f729Sjoerg     return VectorizationSafetyStatus::Safe;
12537330f729Sjoerg 
12547330f729Sjoerg   case Unknown:
12557330f729Sjoerg     return VectorizationSafetyStatus::PossiblySafeWithRtChecks;
12567330f729Sjoerg   case ForwardButPreventsForwarding:
12577330f729Sjoerg   case Backward:
12587330f729Sjoerg   case BackwardVectorizableButPreventsForwarding:
12597330f729Sjoerg     return VectorizationSafetyStatus::Unsafe;
12607330f729Sjoerg   }
12617330f729Sjoerg   llvm_unreachable("unexpected DepType!");
12627330f729Sjoerg }
12637330f729Sjoerg 
isBackward() const12647330f729Sjoerg bool MemoryDepChecker::Dependence::isBackward() const {
12657330f729Sjoerg   switch (Type) {
12667330f729Sjoerg   case NoDep:
12677330f729Sjoerg   case Forward:
12687330f729Sjoerg   case ForwardButPreventsForwarding:
12697330f729Sjoerg   case Unknown:
12707330f729Sjoerg     return false;
12717330f729Sjoerg 
12727330f729Sjoerg   case BackwardVectorizable:
12737330f729Sjoerg   case Backward:
12747330f729Sjoerg   case BackwardVectorizableButPreventsForwarding:
12757330f729Sjoerg     return true;
12767330f729Sjoerg   }
12777330f729Sjoerg   llvm_unreachable("unexpected DepType!");
12787330f729Sjoerg }
12797330f729Sjoerg 
isPossiblyBackward() const12807330f729Sjoerg bool MemoryDepChecker::Dependence::isPossiblyBackward() const {
12817330f729Sjoerg   return isBackward() || Type == Unknown;
12827330f729Sjoerg }
12837330f729Sjoerg 
isForward() const12847330f729Sjoerg bool MemoryDepChecker::Dependence::isForward() const {
12857330f729Sjoerg   switch (Type) {
12867330f729Sjoerg   case Forward:
12877330f729Sjoerg   case ForwardButPreventsForwarding:
12887330f729Sjoerg     return true;
12897330f729Sjoerg 
12907330f729Sjoerg   case NoDep:
12917330f729Sjoerg   case Unknown:
12927330f729Sjoerg   case BackwardVectorizable:
12937330f729Sjoerg   case Backward:
12947330f729Sjoerg   case BackwardVectorizableButPreventsForwarding:
12957330f729Sjoerg     return false;
12967330f729Sjoerg   }
12977330f729Sjoerg   llvm_unreachable("unexpected DepType!");
12987330f729Sjoerg }
12997330f729Sjoerg 
couldPreventStoreLoadForward(uint64_t Distance,uint64_t TypeByteSize)13007330f729Sjoerg bool MemoryDepChecker::couldPreventStoreLoadForward(uint64_t Distance,
13017330f729Sjoerg                                                     uint64_t TypeByteSize) {
13027330f729Sjoerg   // If loads occur at a distance that is not a multiple of a feasible vector
13037330f729Sjoerg   // factor store-load forwarding does not take place.
13047330f729Sjoerg   // Positive dependences might cause troubles because vectorizing them might
13057330f729Sjoerg   // prevent store-load forwarding making vectorized code run a lot slower.
13067330f729Sjoerg   //   a[i] = a[i-3] ^ a[i-8];
13077330f729Sjoerg   //   The stores to a[i:i+1] don't align with the stores to a[i-3:i-2] and
13087330f729Sjoerg   //   hence on your typical architecture store-load forwarding does not take
13097330f729Sjoerg   //   place. Vectorizing in such cases does not make sense.
13107330f729Sjoerg   // Store-load forwarding distance.
13117330f729Sjoerg 
13127330f729Sjoerg   // After this many iterations store-to-load forwarding conflicts should not
13137330f729Sjoerg   // cause any slowdowns.
13147330f729Sjoerg   const uint64_t NumItersForStoreLoadThroughMemory = 8 * TypeByteSize;
13157330f729Sjoerg   // Maximum vector factor.
13167330f729Sjoerg   uint64_t MaxVFWithoutSLForwardIssues = std::min(
13177330f729Sjoerg       VectorizerParams::MaxVectorWidth * TypeByteSize, MaxSafeDepDistBytes);
13187330f729Sjoerg 
13197330f729Sjoerg   // Compute the smallest VF at which the store and load would be misaligned.
13207330f729Sjoerg   for (uint64_t VF = 2 * TypeByteSize; VF <= MaxVFWithoutSLForwardIssues;
13217330f729Sjoerg        VF *= 2) {
13227330f729Sjoerg     // If the number of vector iteration between the store and the load are
13237330f729Sjoerg     // small we could incur conflicts.
13247330f729Sjoerg     if (Distance % VF && Distance / VF < NumItersForStoreLoadThroughMemory) {
1325*82d56013Sjoerg       MaxVFWithoutSLForwardIssues = (VF >> 1);
13267330f729Sjoerg       break;
13277330f729Sjoerg     }
13287330f729Sjoerg   }
13297330f729Sjoerg 
13307330f729Sjoerg   if (MaxVFWithoutSLForwardIssues < 2 * TypeByteSize) {
13317330f729Sjoerg     LLVM_DEBUG(
13327330f729Sjoerg         dbgs() << "LAA: Distance " << Distance
13337330f729Sjoerg                << " that could cause a store-load forwarding conflict\n");
13347330f729Sjoerg     return true;
13357330f729Sjoerg   }
13367330f729Sjoerg 
13377330f729Sjoerg   if (MaxVFWithoutSLForwardIssues < MaxSafeDepDistBytes &&
13387330f729Sjoerg       MaxVFWithoutSLForwardIssues !=
13397330f729Sjoerg           VectorizerParams::MaxVectorWidth * TypeByteSize)
13407330f729Sjoerg     MaxSafeDepDistBytes = MaxVFWithoutSLForwardIssues;
13417330f729Sjoerg   return false;
13427330f729Sjoerg }
13437330f729Sjoerg 
mergeInStatus(VectorizationSafetyStatus S)13447330f729Sjoerg void MemoryDepChecker::mergeInStatus(VectorizationSafetyStatus S) {
13457330f729Sjoerg   if (Status < S)
13467330f729Sjoerg     Status = S;
13477330f729Sjoerg }
13487330f729Sjoerg 
13497330f729Sjoerg /// Given a non-constant (unknown) dependence-distance \p Dist between two
13507330f729Sjoerg /// memory accesses, that have the same stride whose absolute value is given
13517330f729Sjoerg /// in \p Stride, and that have the same type size \p TypeByteSize,
13527330f729Sjoerg /// in a loop whose takenCount is \p BackedgeTakenCount, check if it is
13537330f729Sjoerg /// possible to prove statically that the dependence distance is larger
13547330f729Sjoerg /// than the range that the accesses will travel through the execution of
13557330f729Sjoerg /// the loop. If so, return true; false otherwise. This is useful for
13567330f729Sjoerg /// example in loops such as the following (PR31098):
13577330f729Sjoerg ///     for (i = 0; i < D; ++i) {
13587330f729Sjoerg ///                = out[i];
13597330f729Sjoerg ///       out[i+D] =
13607330f729Sjoerg ///     }
isSafeDependenceDistance(const DataLayout & DL,ScalarEvolution & SE,const SCEV & BackedgeTakenCount,const SCEV & Dist,uint64_t Stride,uint64_t TypeByteSize)13617330f729Sjoerg static bool isSafeDependenceDistance(const DataLayout &DL, ScalarEvolution &SE,
13627330f729Sjoerg                                      const SCEV &BackedgeTakenCount,
13637330f729Sjoerg                                      const SCEV &Dist, uint64_t Stride,
13647330f729Sjoerg                                      uint64_t TypeByteSize) {
13657330f729Sjoerg 
13667330f729Sjoerg   // If we can prove that
13677330f729Sjoerg   //      (**) |Dist| > BackedgeTakenCount * Step
13687330f729Sjoerg   // where Step is the absolute stride of the memory accesses in bytes,
13697330f729Sjoerg   // then there is no dependence.
13707330f729Sjoerg   //
13717330f729Sjoerg   // Rationale:
13727330f729Sjoerg   // We basically want to check if the absolute distance (|Dist/Step|)
13737330f729Sjoerg   // is >= the loop iteration count (or > BackedgeTakenCount).
13747330f729Sjoerg   // This is equivalent to the Strong SIV Test (Practical Dependence Testing,
13757330f729Sjoerg   // Section 4.2.1); Note, that for vectorization it is sufficient to prove
13767330f729Sjoerg   // that the dependence distance is >= VF; This is checked elsewhere.
13777330f729Sjoerg   // But in some cases we can prune unknown dependence distances early, and
13787330f729Sjoerg   // even before selecting the VF, and without a runtime test, by comparing
13797330f729Sjoerg   // the distance against the loop iteration count. Since the vectorized code
13807330f729Sjoerg   // will be executed only if LoopCount >= VF, proving distance >= LoopCount
13817330f729Sjoerg   // also guarantees that distance >= VF.
13827330f729Sjoerg   //
13837330f729Sjoerg   const uint64_t ByteStride = Stride * TypeByteSize;
13847330f729Sjoerg   const SCEV *Step = SE.getConstant(BackedgeTakenCount.getType(), ByteStride);
13857330f729Sjoerg   const SCEV *Product = SE.getMulExpr(&BackedgeTakenCount, Step);
13867330f729Sjoerg 
13877330f729Sjoerg   const SCEV *CastedDist = &Dist;
13887330f729Sjoerg   const SCEV *CastedProduct = Product;
13897330f729Sjoerg   uint64_t DistTypeSize = DL.getTypeAllocSize(Dist.getType());
13907330f729Sjoerg   uint64_t ProductTypeSize = DL.getTypeAllocSize(Product->getType());
13917330f729Sjoerg 
13927330f729Sjoerg   // The dependence distance can be positive/negative, so we sign extend Dist;
13937330f729Sjoerg   // The multiplication of the absolute stride in bytes and the
13947330f729Sjoerg   // backedgeTakenCount is non-negative, so we zero extend Product.
13957330f729Sjoerg   if (DistTypeSize > ProductTypeSize)
13967330f729Sjoerg     CastedProduct = SE.getZeroExtendExpr(Product, Dist.getType());
13977330f729Sjoerg   else
13987330f729Sjoerg     CastedDist = SE.getNoopOrSignExtend(&Dist, Product->getType());
13997330f729Sjoerg 
14007330f729Sjoerg   // Is  Dist - (BackedgeTakenCount * Step) > 0 ?
14017330f729Sjoerg   // (If so, then we have proven (**) because |Dist| >= Dist)
14027330f729Sjoerg   const SCEV *Minus = SE.getMinusSCEV(CastedDist, CastedProduct);
14037330f729Sjoerg   if (SE.isKnownPositive(Minus))
14047330f729Sjoerg     return true;
14057330f729Sjoerg 
14067330f729Sjoerg   // Second try: Is  -Dist - (BackedgeTakenCount * Step) > 0 ?
14077330f729Sjoerg   // (If so, then we have proven (**) because |Dist| >= -1*Dist)
14087330f729Sjoerg   const SCEV *NegDist = SE.getNegativeSCEV(CastedDist);
14097330f729Sjoerg   Minus = SE.getMinusSCEV(NegDist, CastedProduct);
14107330f729Sjoerg   if (SE.isKnownPositive(Minus))
14117330f729Sjoerg     return true;
14127330f729Sjoerg 
14137330f729Sjoerg   return false;
14147330f729Sjoerg }
14157330f729Sjoerg 
14167330f729Sjoerg /// Check the dependence for two accesses with the same stride \p Stride.
14177330f729Sjoerg /// \p Distance is the positive distance and \p TypeByteSize is type size in
14187330f729Sjoerg /// bytes.
14197330f729Sjoerg ///
14207330f729Sjoerg /// \returns true if they are independent.
areStridedAccessesIndependent(uint64_t Distance,uint64_t Stride,uint64_t TypeByteSize)14217330f729Sjoerg static bool areStridedAccessesIndependent(uint64_t Distance, uint64_t Stride,
14227330f729Sjoerg                                           uint64_t TypeByteSize) {
14237330f729Sjoerg   assert(Stride > 1 && "The stride must be greater than 1");
14247330f729Sjoerg   assert(TypeByteSize > 0 && "The type size in byte must be non-zero");
14257330f729Sjoerg   assert(Distance > 0 && "The distance must be non-zero");
14267330f729Sjoerg 
14277330f729Sjoerg   // Skip if the distance is not multiple of type byte size.
14287330f729Sjoerg   if (Distance % TypeByteSize)
14297330f729Sjoerg     return false;
14307330f729Sjoerg 
14317330f729Sjoerg   uint64_t ScaledDist = Distance / TypeByteSize;
14327330f729Sjoerg 
14337330f729Sjoerg   // No dependence if the scaled distance is not multiple of the stride.
14347330f729Sjoerg   // E.g.
14357330f729Sjoerg   //      for (i = 0; i < 1024 ; i += 4)
14367330f729Sjoerg   //        A[i+2] = A[i] + 1;
14377330f729Sjoerg   //
14387330f729Sjoerg   // Two accesses in memory (scaled distance is 2, stride is 4):
14397330f729Sjoerg   //     | A[0] |      |      |      | A[4] |      |      |      |
14407330f729Sjoerg   //     |      |      | A[2] |      |      |      | A[6] |      |
14417330f729Sjoerg   //
14427330f729Sjoerg   // E.g.
14437330f729Sjoerg   //      for (i = 0; i < 1024 ; i += 3)
14447330f729Sjoerg   //        A[i+4] = A[i] + 1;
14457330f729Sjoerg   //
14467330f729Sjoerg   // Two accesses in memory (scaled distance is 4, stride is 3):
14477330f729Sjoerg   //     | A[0] |      |      | A[3] |      |      | A[6] |      |      |
14487330f729Sjoerg   //     |      |      |      |      | A[4] |      |      | A[7] |      |
14497330f729Sjoerg   return ScaledDist % Stride;
14507330f729Sjoerg }
14517330f729Sjoerg 
14527330f729Sjoerg MemoryDepChecker::Dependence::DepType
isDependent(const MemAccessInfo & A,unsigned AIdx,const MemAccessInfo & B,unsigned BIdx,const ValueToValueMap & Strides)14537330f729Sjoerg MemoryDepChecker::isDependent(const MemAccessInfo &A, unsigned AIdx,
14547330f729Sjoerg                               const MemAccessInfo &B, unsigned BIdx,
14557330f729Sjoerg                               const ValueToValueMap &Strides) {
14567330f729Sjoerg   assert (AIdx < BIdx && "Must pass arguments in program order");
14577330f729Sjoerg 
14587330f729Sjoerg   Value *APtr = A.getPointer();
14597330f729Sjoerg   Value *BPtr = B.getPointer();
14607330f729Sjoerg   bool AIsWrite = A.getInt();
14617330f729Sjoerg   bool BIsWrite = B.getInt();
14627330f729Sjoerg 
14637330f729Sjoerg   // Two reads are independent.
14647330f729Sjoerg   if (!AIsWrite && !BIsWrite)
14657330f729Sjoerg     return Dependence::NoDep;
14667330f729Sjoerg 
14677330f729Sjoerg   // We cannot check pointers in different address spaces.
14687330f729Sjoerg   if (APtr->getType()->getPointerAddressSpace() !=
14697330f729Sjoerg       BPtr->getType()->getPointerAddressSpace())
14707330f729Sjoerg     return Dependence::Unknown;
14717330f729Sjoerg 
14727330f729Sjoerg   int64_t StrideAPtr = getPtrStride(PSE, APtr, InnermostLoop, Strides, true);
14737330f729Sjoerg   int64_t StrideBPtr = getPtrStride(PSE, BPtr, InnermostLoop, Strides, true);
14747330f729Sjoerg 
14757330f729Sjoerg   const SCEV *Src = PSE.getSCEV(APtr);
14767330f729Sjoerg   const SCEV *Sink = PSE.getSCEV(BPtr);
14777330f729Sjoerg 
14787330f729Sjoerg   // If the induction step is negative we have to invert source and sink of the
14797330f729Sjoerg   // dependence.
14807330f729Sjoerg   if (StrideAPtr < 0) {
14817330f729Sjoerg     std::swap(APtr, BPtr);
14827330f729Sjoerg     std::swap(Src, Sink);
14837330f729Sjoerg     std::swap(AIsWrite, BIsWrite);
14847330f729Sjoerg     std::swap(AIdx, BIdx);
14857330f729Sjoerg     std::swap(StrideAPtr, StrideBPtr);
14867330f729Sjoerg   }
14877330f729Sjoerg 
14887330f729Sjoerg   const SCEV *Dist = PSE.getSE()->getMinusSCEV(Sink, Src);
14897330f729Sjoerg 
14907330f729Sjoerg   LLVM_DEBUG(dbgs() << "LAA: Src Scev: " << *Src << "Sink Scev: " << *Sink
14917330f729Sjoerg                     << "(Induction step: " << StrideAPtr << ")\n");
14927330f729Sjoerg   LLVM_DEBUG(dbgs() << "LAA: Distance for " << *InstMap[AIdx] << " to "
14937330f729Sjoerg                     << *InstMap[BIdx] << ": " << *Dist << "\n");
14947330f729Sjoerg 
14957330f729Sjoerg   // Need accesses with constant stride. We don't want to vectorize
14967330f729Sjoerg   // "A[B[i]] += ..." and similar code or pointer arithmetic that could wrap in
14977330f729Sjoerg   // the address space.
14987330f729Sjoerg   if (!StrideAPtr || !StrideBPtr || StrideAPtr != StrideBPtr){
14997330f729Sjoerg     LLVM_DEBUG(dbgs() << "Pointer access with non-constant stride\n");
15007330f729Sjoerg     return Dependence::Unknown;
15017330f729Sjoerg   }
15027330f729Sjoerg 
15037330f729Sjoerg   Type *ATy = APtr->getType()->getPointerElementType();
15047330f729Sjoerg   Type *BTy = BPtr->getType()->getPointerElementType();
15057330f729Sjoerg   auto &DL = InnermostLoop->getHeader()->getModule()->getDataLayout();
15067330f729Sjoerg   uint64_t TypeByteSize = DL.getTypeAllocSize(ATy);
15077330f729Sjoerg   uint64_t Stride = std::abs(StrideAPtr);
15087330f729Sjoerg   const SCEVConstant *C = dyn_cast<SCEVConstant>(Dist);
15097330f729Sjoerg   if (!C) {
15107330f729Sjoerg     if (TypeByteSize == DL.getTypeAllocSize(BTy) &&
15117330f729Sjoerg         isSafeDependenceDistance(DL, *(PSE.getSE()),
15127330f729Sjoerg                                  *(PSE.getBackedgeTakenCount()), *Dist, Stride,
15137330f729Sjoerg                                  TypeByteSize))
15147330f729Sjoerg       return Dependence::NoDep;
15157330f729Sjoerg 
15167330f729Sjoerg     LLVM_DEBUG(dbgs() << "LAA: Dependence because of non-constant distance\n");
15177330f729Sjoerg     FoundNonConstantDistanceDependence = true;
15187330f729Sjoerg     return Dependence::Unknown;
15197330f729Sjoerg   }
15207330f729Sjoerg 
15217330f729Sjoerg   const APInt &Val = C->getAPInt();
15227330f729Sjoerg   int64_t Distance = Val.getSExtValue();
15237330f729Sjoerg 
15247330f729Sjoerg   // Attempt to prove strided accesses independent.
15257330f729Sjoerg   if (std::abs(Distance) > 0 && Stride > 1 && ATy == BTy &&
15267330f729Sjoerg       areStridedAccessesIndependent(std::abs(Distance), Stride, TypeByteSize)) {
15277330f729Sjoerg     LLVM_DEBUG(dbgs() << "LAA: Strided accesses are independent\n");
15287330f729Sjoerg     return Dependence::NoDep;
15297330f729Sjoerg   }
15307330f729Sjoerg 
15317330f729Sjoerg   // Negative distances are not plausible dependencies.
15327330f729Sjoerg   if (Val.isNegative()) {
15337330f729Sjoerg     bool IsTrueDataDependence = (AIsWrite && !BIsWrite);
15347330f729Sjoerg     if (IsTrueDataDependence && EnableForwardingConflictDetection &&
15357330f729Sjoerg         (couldPreventStoreLoadForward(Val.abs().getZExtValue(), TypeByteSize) ||
15367330f729Sjoerg          ATy != BTy)) {
15377330f729Sjoerg       LLVM_DEBUG(dbgs() << "LAA: Forward but may prevent st->ld forwarding\n");
15387330f729Sjoerg       return Dependence::ForwardButPreventsForwarding;
15397330f729Sjoerg     }
15407330f729Sjoerg 
15417330f729Sjoerg     LLVM_DEBUG(dbgs() << "LAA: Dependence is negative\n");
15427330f729Sjoerg     return Dependence::Forward;
15437330f729Sjoerg   }
15447330f729Sjoerg 
15457330f729Sjoerg   // Write to the same location with the same size.
15467330f729Sjoerg   // Could be improved to assert type sizes are the same (i32 == float, etc).
15477330f729Sjoerg   if (Val == 0) {
15487330f729Sjoerg     if (ATy == BTy)
15497330f729Sjoerg       return Dependence::Forward;
15507330f729Sjoerg     LLVM_DEBUG(
15517330f729Sjoerg         dbgs() << "LAA: Zero dependence difference but different types\n");
15527330f729Sjoerg     return Dependence::Unknown;
15537330f729Sjoerg   }
15547330f729Sjoerg 
15557330f729Sjoerg   assert(Val.isStrictlyPositive() && "Expect a positive value");
15567330f729Sjoerg 
15577330f729Sjoerg   if (ATy != BTy) {
15587330f729Sjoerg     LLVM_DEBUG(
15597330f729Sjoerg         dbgs()
15607330f729Sjoerg         << "LAA: ReadWrite-Write positive dependency with different types\n");
15617330f729Sjoerg     return Dependence::Unknown;
15627330f729Sjoerg   }
15637330f729Sjoerg 
15647330f729Sjoerg   // Bail out early if passed-in parameters make vectorization not feasible.
15657330f729Sjoerg   unsigned ForcedFactor = (VectorizerParams::VectorizationFactor ?
15667330f729Sjoerg                            VectorizerParams::VectorizationFactor : 1);
15677330f729Sjoerg   unsigned ForcedUnroll = (VectorizerParams::VectorizationInterleave ?
15687330f729Sjoerg                            VectorizerParams::VectorizationInterleave : 1);
15697330f729Sjoerg   // The minimum number of iterations for a vectorized/unrolled version.
15707330f729Sjoerg   unsigned MinNumIter = std::max(ForcedFactor * ForcedUnroll, 2U);
15717330f729Sjoerg 
15727330f729Sjoerg   // It's not vectorizable if the distance is smaller than the minimum distance
15737330f729Sjoerg   // needed for a vectroized/unrolled version. Vectorizing one iteration in
15747330f729Sjoerg   // front needs TypeByteSize * Stride. Vectorizing the last iteration needs
15757330f729Sjoerg   // TypeByteSize (No need to plus the last gap distance).
15767330f729Sjoerg   //
15777330f729Sjoerg   // E.g. Assume one char is 1 byte in memory and one int is 4 bytes.
15787330f729Sjoerg   //      foo(int *A) {
15797330f729Sjoerg   //        int *B = (int *)((char *)A + 14);
15807330f729Sjoerg   //        for (i = 0 ; i < 1024 ; i += 2)
15817330f729Sjoerg   //          B[i] = A[i] + 1;
15827330f729Sjoerg   //      }
15837330f729Sjoerg   //
15847330f729Sjoerg   // Two accesses in memory (stride is 2):
15857330f729Sjoerg   //     | A[0] |      | A[2] |      | A[4] |      | A[6] |      |
15867330f729Sjoerg   //                              | B[0] |      | B[2] |      | B[4] |
15877330f729Sjoerg   //
15887330f729Sjoerg   // Distance needs for vectorizing iterations except the last iteration:
15897330f729Sjoerg   // 4 * 2 * (MinNumIter - 1). Distance needs for the last iteration: 4.
15907330f729Sjoerg   // So the minimum distance needed is: 4 * 2 * (MinNumIter - 1) + 4.
15917330f729Sjoerg   //
15927330f729Sjoerg   // If MinNumIter is 2, it is vectorizable as the minimum distance needed is
15937330f729Sjoerg   // 12, which is less than distance.
15947330f729Sjoerg   //
15957330f729Sjoerg   // If MinNumIter is 4 (Say if a user forces the vectorization factor to be 4),
15967330f729Sjoerg   // the minimum distance needed is 28, which is greater than distance. It is
15977330f729Sjoerg   // not safe to do vectorization.
15987330f729Sjoerg   uint64_t MinDistanceNeeded =
15997330f729Sjoerg       TypeByteSize * Stride * (MinNumIter - 1) + TypeByteSize;
16007330f729Sjoerg   if (MinDistanceNeeded > static_cast<uint64_t>(Distance)) {
16017330f729Sjoerg     LLVM_DEBUG(dbgs() << "LAA: Failure because of positive distance "
16027330f729Sjoerg                       << Distance << '\n');
16037330f729Sjoerg     return Dependence::Backward;
16047330f729Sjoerg   }
16057330f729Sjoerg 
16067330f729Sjoerg   // Unsafe if the minimum distance needed is greater than max safe distance.
16077330f729Sjoerg   if (MinDistanceNeeded > MaxSafeDepDistBytes) {
16087330f729Sjoerg     LLVM_DEBUG(dbgs() << "LAA: Failure because it needs at least "
16097330f729Sjoerg                       << MinDistanceNeeded << " size in bytes");
16107330f729Sjoerg     return Dependence::Backward;
16117330f729Sjoerg   }
16127330f729Sjoerg 
16137330f729Sjoerg   // Positive distance bigger than max vectorization factor.
16147330f729Sjoerg   // FIXME: Should use max factor instead of max distance in bytes, which could
16157330f729Sjoerg   // not handle different types.
16167330f729Sjoerg   // E.g. Assume one char is 1 byte in memory and one int is 4 bytes.
16177330f729Sjoerg   //      void foo (int *A, char *B) {
16187330f729Sjoerg   //        for (unsigned i = 0; i < 1024; i++) {
16197330f729Sjoerg   //          A[i+2] = A[i] + 1;
16207330f729Sjoerg   //          B[i+2] = B[i] + 1;
16217330f729Sjoerg   //        }
16227330f729Sjoerg   //      }
16237330f729Sjoerg   //
16247330f729Sjoerg   // This case is currently unsafe according to the max safe distance. If we
16257330f729Sjoerg   // analyze the two accesses on array B, the max safe dependence distance
16267330f729Sjoerg   // is 2. Then we analyze the accesses on array A, the minimum distance needed
16277330f729Sjoerg   // is 8, which is less than 2 and forbidden vectorization, But actually
16287330f729Sjoerg   // both A and B could be vectorized by 2 iterations.
16297330f729Sjoerg   MaxSafeDepDistBytes =
16307330f729Sjoerg       std::min(static_cast<uint64_t>(Distance), MaxSafeDepDistBytes);
16317330f729Sjoerg 
16327330f729Sjoerg   bool IsTrueDataDependence = (!AIsWrite && BIsWrite);
16337330f729Sjoerg   if (IsTrueDataDependence && EnableForwardingConflictDetection &&
16347330f729Sjoerg       couldPreventStoreLoadForward(Distance, TypeByteSize))
16357330f729Sjoerg     return Dependence::BackwardVectorizableButPreventsForwarding;
16367330f729Sjoerg 
16377330f729Sjoerg   uint64_t MaxVF = MaxSafeDepDistBytes / (TypeByteSize * Stride);
16387330f729Sjoerg   LLVM_DEBUG(dbgs() << "LAA: Positive distance " << Val.getSExtValue()
16397330f729Sjoerg                     << " with max VF = " << MaxVF << '\n');
16407330f729Sjoerg   uint64_t MaxVFInBits = MaxVF * TypeByteSize * 8;
1641*82d56013Sjoerg   MaxSafeVectorWidthInBits = std::min(MaxSafeVectorWidthInBits, MaxVFInBits);
16427330f729Sjoerg   return Dependence::BackwardVectorizable;
16437330f729Sjoerg }
16447330f729Sjoerg 
areDepsSafe(DepCandidates & AccessSets,MemAccessInfoList & CheckDeps,const ValueToValueMap & Strides)16457330f729Sjoerg bool MemoryDepChecker::areDepsSafe(DepCandidates &AccessSets,
16467330f729Sjoerg                                    MemAccessInfoList &CheckDeps,
16477330f729Sjoerg                                    const ValueToValueMap &Strides) {
16487330f729Sjoerg 
16497330f729Sjoerg   MaxSafeDepDistBytes = -1;
16507330f729Sjoerg   SmallPtrSet<MemAccessInfo, 8> Visited;
16517330f729Sjoerg   for (MemAccessInfo CurAccess : CheckDeps) {
16527330f729Sjoerg     if (Visited.count(CurAccess))
16537330f729Sjoerg       continue;
16547330f729Sjoerg 
16557330f729Sjoerg     // Get the relevant memory access set.
16567330f729Sjoerg     EquivalenceClasses<MemAccessInfo>::iterator I =
16577330f729Sjoerg       AccessSets.findValue(AccessSets.getLeaderValue(CurAccess));
16587330f729Sjoerg 
16597330f729Sjoerg     // Check accesses within this set.
16607330f729Sjoerg     EquivalenceClasses<MemAccessInfo>::member_iterator AI =
16617330f729Sjoerg         AccessSets.member_begin(I);
16627330f729Sjoerg     EquivalenceClasses<MemAccessInfo>::member_iterator AE =
16637330f729Sjoerg         AccessSets.member_end();
16647330f729Sjoerg 
16657330f729Sjoerg     // Check every access pair.
16667330f729Sjoerg     while (AI != AE) {
16677330f729Sjoerg       Visited.insert(*AI);
16687330f729Sjoerg       bool AIIsWrite = AI->getInt();
16697330f729Sjoerg       // Check loads only against next equivalent class, but stores also against
16707330f729Sjoerg       // other stores in the same equivalence class - to the same address.
16717330f729Sjoerg       EquivalenceClasses<MemAccessInfo>::member_iterator OI =
16727330f729Sjoerg           (AIIsWrite ? AI : std::next(AI));
16737330f729Sjoerg       while (OI != AE) {
16747330f729Sjoerg         // Check every accessing instruction pair in program order.
16757330f729Sjoerg         for (std::vector<unsigned>::iterator I1 = Accesses[*AI].begin(),
16767330f729Sjoerg              I1E = Accesses[*AI].end(); I1 != I1E; ++I1)
16777330f729Sjoerg           // Scan all accesses of another equivalence class, but only the next
16787330f729Sjoerg           // accesses of the same equivalent class.
16797330f729Sjoerg           for (std::vector<unsigned>::iterator
16807330f729Sjoerg                    I2 = (OI == AI ? std::next(I1) : Accesses[*OI].begin()),
16817330f729Sjoerg                    I2E = (OI == AI ? I1E : Accesses[*OI].end());
16827330f729Sjoerg                I2 != I2E; ++I2) {
16837330f729Sjoerg             auto A = std::make_pair(&*AI, *I1);
16847330f729Sjoerg             auto B = std::make_pair(&*OI, *I2);
16857330f729Sjoerg 
16867330f729Sjoerg             assert(*I1 != *I2);
16877330f729Sjoerg             if (*I1 > *I2)
16887330f729Sjoerg               std::swap(A, B);
16897330f729Sjoerg 
16907330f729Sjoerg             Dependence::DepType Type =
16917330f729Sjoerg                 isDependent(*A.first, A.second, *B.first, B.second, Strides);
16927330f729Sjoerg             mergeInStatus(Dependence::isSafeForVectorization(Type));
16937330f729Sjoerg 
16947330f729Sjoerg             // Gather dependences unless we accumulated MaxDependences
16957330f729Sjoerg             // dependences.  In that case return as soon as we find the first
16967330f729Sjoerg             // unsafe dependence.  This puts a limit on this quadratic
16977330f729Sjoerg             // algorithm.
16987330f729Sjoerg             if (RecordDependences) {
16997330f729Sjoerg               if (Type != Dependence::NoDep)
17007330f729Sjoerg                 Dependences.push_back(Dependence(A.second, B.second, Type));
17017330f729Sjoerg 
17027330f729Sjoerg               if (Dependences.size() >= MaxDependences) {
17037330f729Sjoerg                 RecordDependences = false;
17047330f729Sjoerg                 Dependences.clear();
17057330f729Sjoerg                 LLVM_DEBUG(dbgs()
17067330f729Sjoerg                            << "Too many dependences, stopped recording\n");
17077330f729Sjoerg               }
17087330f729Sjoerg             }
17097330f729Sjoerg             if (!RecordDependences && !isSafeForVectorization())
17107330f729Sjoerg               return false;
17117330f729Sjoerg           }
17127330f729Sjoerg         ++OI;
17137330f729Sjoerg       }
17147330f729Sjoerg       AI++;
17157330f729Sjoerg     }
17167330f729Sjoerg   }
17177330f729Sjoerg 
17187330f729Sjoerg   LLVM_DEBUG(dbgs() << "Total Dependences: " << Dependences.size() << "\n");
17197330f729Sjoerg   return isSafeForVectorization();
17207330f729Sjoerg }
17217330f729Sjoerg 
17227330f729Sjoerg SmallVector<Instruction *, 4>
getInstructionsForAccess(Value * Ptr,bool isWrite) const17237330f729Sjoerg MemoryDepChecker::getInstructionsForAccess(Value *Ptr, bool isWrite) const {
17247330f729Sjoerg   MemAccessInfo Access(Ptr, isWrite);
17257330f729Sjoerg   auto &IndexVector = Accesses.find(Access)->second;
17267330f729Sjoerg 
17277330f729Sjoerg   SmallVector<Instruction *, 4> Insts;
17287330f729Sjoerg   transform(IndexVector,
17297330f729Sjoerg                  std::back_inserter(Insts),
17307330f729Sjoerg                  [&](unsigned Idx) { return this->InstMap[Idx]; });
17317330f729Sjoerg   return Insts;
17327330f729Sjoerg }
17337330f729Sjoerg 
17347330f729Sjoerg const char *MemoryDepChecker::Dependence::DepName[] = {
17357330f729Sjoerg     "NoDep", "Unknown", "Forward", "ForwardButPreventsForwarding", "Backward",
17367330f729Sjoerg     "BackwardVectorizable", "BackwardVectorizableButPreventsForwarding"};
17377330f729Sjoerg 
print(raw_ostream & OS,unsigned Depth,const SmallVectorImpl<Instruction * > & Instrs) const17387330f729Sjoerg void MemoryDepChecker::Dependence::print(
17397330f729Sjoerg     raw_ostream &OS, unsigned Depth,
17407330f729Sjoerg     const SmallVectorImpl<Instruction *> &Instrs) const {
17417330f729Sjoerg   OS.indent(Depth) << DepName[Type] << ":\n";
17427330f729Sjoerg   OS.indent(Depth + 2) << *Instrs[Source] << " -> \n";
17437330f729Sjoerg   OS.indent(Depth + 2) << *Instrs[Destination] << "\n";
17447330f729Sjoerg }
17457330f729Sjoerg 
canAnalyzeLoop()17467330f729Sjoerg bool LoopAccessInfo::canAnalyzeLoop() {
17477330f729Sjoerg   // We need to have a loop header.
17487330f729Sjoerg   LLVM_DEBUG(dbgs() << "LAA: Found a loop in "
17497330f729Sjoerg                     << TheLoop->getHeader()->getParent()->getName() << ": "
17507330f729Sjoerg                     << TheLoop->getHeader()->getName() << '\n');
17517330f729Sjoerg 
17527330f729Sjoerg   // We can only analyze innermost loops.
1753*82d56013Sjoerg   if (!TheLoop->isInnermost()) {
17547330f729Sjoerg     LLVM_DEBUG(dbgs() << "LAA: loop is not the innermost loop\n");
17557330f729Sjoerg     recordAnalysis("NotInnerMostLoop") << "loop is not the innermost loop";
17567330f729Sjoerg     return false;
17577330f729Sjoerg   }
17587330f729Sjoerg 
17597330f729Sjoerg   // We must have a single backedge.
17607330f729Sjoerg   if (TheLoop->getNumBackEdges() != 1) {
17617330f729Sjoerg     LLVM_DEBUG(
17627330f729Sjoerg         dbgs() << "LAA: loop control flow is not understood by analyzer\n");
17637330f729Sjoerg     recordAnalysis("CFGNotUnderstood")
17647330f729Sjoerg         << "loop control flow is not understood by analyzer";
17657330f729Sjoerg     return false;
17667330f729Sjoerg   }
17677330f729Sjoerg 
17687330f729Sjoerg   // ScalarEvolution needs to be able to find the exit count.
17697330f729Sjoerg   const SCEV *ExitCount = PSE->getBackedgeTakenCount();
1770*82d56013Sjoerg   if (isa<SCEVCouldNotCompute>(ExitCount)) {
17717330f729Sjoerg     recordAnalysis("CantComputeNumberOfIterations")
17727330f729Sjoerg         << "could not determine number of loop iterations";
17737330f729Sjoerg     LLVM_DEBUG(dbgs() << "LAA: SCEV could not compute the loop exit count.\n");
17747330f729Sjoerg     return false;
17757330f729Sjoerg   }
17767330f729Sjoerg 
17777330f729Sjoerg   return true;
17787330f729Sjoerg }
17797330f729Sjoerg 
analyzeLoop(AAResults * AA,LoopInfo * LI,const TargetLibraryInfo * TLI,DominatorTree * DT)1780*82d56013Sjoerg void LoopAccessInfo::analyzeLoop(AAResults *AA, LoopInfo *LI,
17817330f729Sjoerg                                  const TargetLibraryInfo *TLI,
17827330f729Sjoerg                                  DominatorTree *DT) {
17837330f729Sjoerg   typedef SmallPtrSet<Value*, 16> ValueSet;
17847330f729Sjoerg 
17857330f729Sjoerg   // Holds the Load and Store instructions.
17867330f729Sjoerg   SmallVector<LoadInst *, 16> Loads;
17877330f729Sjoerg   SmallVector<StoreInst *, 16> Stores;
17887330f729Sjoerg 
17897330f729Sjoerg   // Holds all the different accesses in the loop.
17907330f729Sjoerg   unsigned NumReads = 0;
17917330f729Sjoerg   unsigned NumReadWrites = 0;
17927330f729Sjoerg 
17937330f729Sjoerg   bool HasComplexMemInst = false;
17947330f729Sjoerg 
17957330f729Sjoerg   // A runtime check is only legal to insert if there are no convergent calls.
17967330f729Sjoerg   HasConvergentOp = false;
17977330f729Sjoerg 
17987330f729Sjoerg   PtrRtChecking->Pointers.clear();
17997330f729Sjoerg   PtrRtChecking->Need = false;
18007330f729Sjoerg 
18017330f729Sjoerg   const bool IsAnnotatedParallel = TheLoop->isAnnotatedParallel();
18027330f729Sjoerg 
1803*82d56013Sjoerg   const bool EnableMemAccessVersioningOfLoop =
1804*82d56013Sjoerg       EnableMemAccessVersioning &&
1805*82d56013Sjoerg       !TheLoop->getHeader()->getParent()->hasOptSize();
1806*82d56013Sjoerg 
18077330f729Sjoerg   // For each block.
18087330f729Sjoerg   for (BasicBlock *BB : TheLoop->blocks()) {
18097330f729Sjoerg     // Scan the BB and collect legal loads and stores. Also detect any
18107330f729Sjoerg     // convergent instructions.
18117330f729Sjoerg     for (Instruction &I : *BB) {
18127330f729Sjoerg       if (auto *Call = dyn_cast<CallBase>(&I)) {
18137330f729Sjoerg         if (Call->isConvergent())
18147330f729Sjoerg           HasConvergentOp = true;
18157330f729Sjoerg       }
18167330f729Sjoerg 
18177330f729Sjoerg       // With both a non-vectorizable memory instruction and a convergent
18187330f729Sjoerg       // operation, found in this loop, no reason to continue the search.
18197330f729Sjoerg       if (HasComplexMemInst && HasConvergentOp) {
18207330f729Sjoerg         CanVecMem = false;
18217330f729Sjoerg         return;
18227330f729Sjoerg       }
18237330f729Sjoerg 
18247330f729Sjoerg       // Avoid hitting recordAnalysis multiple times.
18257330f729Sjoerg       if (HasComplexMemInst)
18267330f729Sjoerg         continue;
18277330f729Sjoerg 
18287330f729Sjoerg       // If this is a load, save it. If this instruction can read from memory
18297330f729Sjoerg       // but is not a load, then we quit. Notice that we don't handle function
18307330f729Sjoerg       // calls that read or write.
18317330f729Sjoerg       if (I.mayReadFromMemory()) {
18327330f729Sjoerg         // Many math library functions read the rounding mode. We will only
18337330f729Sjoerg         // vectorize a loop if it contains known function calls that don't set
18347330f729Sjoerg         // the flag. Therefore, it is safe to ignore this read from memory.
18357330f729Sjoerg         auto *Call = dyn_cast<CallInst>(&I);
18367330f729Sjoerg         if (Call && getVectorIntrinsicIDForCall(Call, TLI))
18377330f729Sjoerg           continue;
18387330f729Sjoerg 
18397330f729Sjoerg         // If the function has an explicit vectorized counterpart, we can safely
18407330f729Sjoerg         // assume that it can be vectorized.
18417330f729Sjoerg         if (Call && !Call->isNoBuiltin() && Call->getCalledFunction() &&
1842*82d56013Sjoerg             !VFDatabase::getMappings(*Call).empty())
18437330f729Sjoerg           continue;
18447330f729Sjoerg 
18457330f729Sjoerg         auto *Ld = dyn_cast<LoadInst>(&I);
18467330f729Sjoerg         if (!Ld) {
18477330f729Sjoerg           recordAnalysis("CantVectorizeInstruction", Ld)
18487330f729Sjoerg             << "instruction cannot be vectorized";
18497330f729Sjoerg           HasComplexMemInst = true;
18507330f729Sjoerg           continue;
18517330f729Sjoerg         }
18527330f729Sjoerg         if (!Ld->isSimple() && !IsAnnotatedParallel) {
18537330f729Sjoerg           recordAnalysis("NonSimpleLoad", Ld)
18547330f729Sjoerg               << "read with atomic ordering or volatile read";
18557330f729Sjoerg           LLVM_DEBUG(dbgs() << "LAA: Found a non-simple load.\n");
18567330f729Sjoerg           HasComplexMemInst = true;
18577330f729Sjoerg           continue;
18587330f729Sjoerg         }
18597330f729Sjoerg         NumLoads++;
18607330f729Sjoerg         Loads.push_back(Ld);
18617330f729Sjoerg         DepChecker->addAccess(Ld);
1862*82d56013Sjoerg         if (EnableMemAccessVersioningOfLoop)
18637330f729Sjoerg           collectStridedAccess(Ld);
18647330f729Sjoerg         continue;
18657330f729Sjoerg       }
18667330f729Sjoerg 
18677330f729Sjoerg       // Save 'store' instructions. Abort if other instructions write to memory.
18687330f729Sjoerg       if (I.mayWriteToMemory()) {
18697330f729Sjoerg         auto *St = dyn_cast<StoreInst>(&I);
18707330f729Sjoerg         if (!St) {
18717330f729Sjoerg           recordAnalysis("CantVectorizeInstruction", St)
18727330f729Sjoerg               << "instruction cannot be vectorized";
18737330f729Sjoerg           HasComplexMemInst = true;
18747330f729Sjoerg           continue;
18757330f729Sjoerg         }
18767330f729Sjoerg         if (!St->isSimple() && !IsAnnotatedParallel) {
18777330f729Sjoerg           recordAnalysis("NonSimpleStore", St)
18787330f729Sjoerg               << "write with atomic ordering or volatile write";
18797330f729Sjoerg           LLVM_DEBUG(dbgs() << "LAA: Found a non-simple store.\n");
18807330f729Sjoerg           HasComplexMemInst = true;
18817330f729Sjoerg           continue;
18827330f729Sjoerg         }
18837330f729Sjoerg         NumStores++;
18847330f729Sjoerg         Stores.push_back(St);
18857330f729Sjoerg         DepChecker->addAccess(St);
1886*82d56013Sjoerg         if (EnableMemAccessVersioningOfLoop)
18877330f729Sjoerg           collectStridedAccess(St);
18887330f729Sjoerg       }
18897330f729Sjoerg     } // Next instr.
18907330f729Sjoerg   } // Next block.
18917330f729Sjoerg 
18927330f729Sjoerg   if (HasComplexMemInst) {
18937330f729Sjoerg     CanVecMem = false;
18947330f729Sjoerg     return;
18957330f729Sjoerg   }
18967330f729Sjoerg 
18977330f729Sjoerg   // Now we have two lists that hold the loads and the stores.
18987330f729Sjoerg   // Next, we find the pointers that they use.
18997330f729Sjoerg 
19007330f729Sjoerg   // Check if we see any stores. If there are no stores, then we don't
19017330f729Sjoerg   // care if the pointers are *restrict*.
19027330f729Sjoerg   if (!Stores.size()) {
19037330f729Sjoerg     LLVM_DEBUG(dbgs() << "LAA: Found a read-only loop!\n");
19047330f729Sjoerg     CanVecMem = true;
19057330f729Sjoerg     return;
19067330f729Sjoerg   }
19077330f729Sjoerg 
19087330f729Sjoerg   MemoryDepChecker::DepCandidates DependentAccesses;
1909*82d56013Sjoerg   AccessAnalysis Accesses(TheLoop, AA, LI, DependentAccesses, *PSE);
19107330f729Sjoerg 
1911*82d56013Sjoerg   // Holds the analyzed pointers. We don't want to call getUnderlyingObjects
19127330f729Sjoerg   // multiple times on the same object. If the ptr is accessed twice, once
19137330f729Sjoerg   // for read and once for write, it will only appear once (on the write
19147330f729Sjoerg   // list). This is okay, since we are going to check for conflicts between
19157330f729Sjoerg   // writes and between reads and writes, but not between reads and reads.
19167330f729Sjoerg   ValueSet Seen;
19177330f729Sjoerg 
19187330f729Sjoerg   // Record uniform store addresses to identify if we have multiple stores
19197330f729Sjoerg   // to the same address.
19207330f729Sjoerg   ValueSet UniformStores;
19217330f729Sjoerg 
19227330f729Sjoerg   for (StoreInst *ST : Stores) {
19237330f729Sjoerg     Value *Ptr = ST->getPointerOperand();
19247330f729Sjoerg 
19257330f729Sjoerg     if (isUniform(Ptr))
19267330f729Sjoerg       HasDependenceInvolvingLoopInvariantAddress |=
19277330f729Sjoerg           !UniformStores.insert(Ptr).second;
19287330f729Sjoerg 
19297330f729Sjoerg     // If we did *not* see this pointer before, insert it to  the read-write
19307330f729Sjoerg     // list. At this phase it is only a 'write' list.
19317330f729Sjoerg     if (Seen.insert(Ptr).second) {
19327330f729Sjoerg       ++NumReadWrites;
19337330f729Sjoerg 
19347330f729Sjoerg       MemoryLocation Loc = MemoryLocation::get(ST);
19357330f729Sjoerg       // The TBAA metadata could have a control dependency on the predication
19367330f729Sjoerg       // condition, so we cannot rely on it when determining whether or not we
19377330f729Sjoerg       // need runtime pointer checks.
19387330f729Sjoerg       if (blockNeedsPredication(ST->getParent(), TheLoop, DT))
19397330f729Sjoerg         Loc.AATags.TBAA = nullptr;
19407330f729Sjoerg 
1941*82d56013Sjoerg       // SCEV does not look through non-header PHIs inside the loop. Such phis
1942*82d56013Sjoerg       // can be analyzed by adding separate accesses for each incoming pointer
1943*82d56013Sjoerg       // value.
1944*82d56013Sjoerg       auto *PN = dyn_cast<PHINode>(Loc.Ptr);
1945*82d56013Sjoerg       if (PN && TheLoop->contains(PN->getParent()) &&
1946*82d56013Sjoerg           PN->getParent() != TheLoop->getHeader()) {
1947*82d56013Sjoerg         for (const Use &Inc : PN->incoming_values()) {
1948*82d56013Sjoerg           MemoryLocation NewLoc = Loc.getWithNewPtr(Inc);
1949*82d56013Sjoerg           Accesses.addStore(NewLoc);
1950*82d56013Sjoerg         }
1951*82d56013Sjoerg       } else
19527330f729Sjoerg         Accesses.addStore(Loc);
19537330f729Sjoerg     }
19547330f729Sjoerg   }
19557330f729Sjoerg 
19567330f729Sjoerg   if (IsAnnotatedParallel) {
19577330f729Sjoerg     LLVM_DEBUG(
19587330f729Sjoerg         dbgs() << "LAA: A loop annotated parallel, ignore memory dependency "
19597330f729Sjoerg                << "checks.\n");
19607330f729Sjoerg     CanVecMem = true;
19617330f729Sjoerg     return;
19627330f729Sjoerg   }
19637330f729Sjoerg 
19647330f729Sjoerg   for (LoadInst *LD : Loads) {
19657330f729Sjoerg     Value *Ptr = LD->getPointerOperand();
19667330f729Sjoerg     // If we did *not* see this pointer before, insert it to the
19677330f729Sjoerg     // read list. If we *did* see it before, then it is already in
19687330f729Sjoerg     // the read-write list. This allows us to vectorize expressions
19697330f729Sjoerg     // such as A[i] += x;  Because the address of A[i] is a read-write
19707330f729Sjoerg     // pointer. This only works if the index of A[i] is consecutive.
19717330f729Sjoerg     // If the address of i is unknown (for example A[B[i]]) then we may
19727330f729Sjoerg     // read a few words, modify, and write a few words, and some of the
19737330f729Sjoerg     // words may be written to the same address.
19747330f729Sjoerg     bool IsReadOnlyPtr = false;
19757330f729Sjoerg     if (Seen.insert(Ptr).second ||
19767330f729Sjoerg         !getPtrStride(*PSE, Ptr, TheLoop, SymbolicStrides)) {
19777330f729Sjoerg       ++NumReads;
19787330f729Sjoerg       IsReadOnlyPtr = true;
19797330f729Sjoerg     }
19807330f729Sjoerg 
19817330f729Sjoerg     // See if there is an unsafe dependency between a load to a uniform address and
19827330f729Sjoerg     // store to the same uniform address.
19837330f729Sjoerg     if (UniformStores.count(Ptr)) {
19847330f729Sjoerg       LLVM_DEBUG(dbgs() << "LAA: Found an unsafe dependency between a uniform "
19857330f729Sjoerg                            "load and uniform store to the same address!\n");
19867330f729Sjoerg       HasDependenceInvolvingLoopInvariantAddress = true;
19877330f729Sjoerg     }
19887330f729Sjoerg 
19897330f729Sjoerg     MemoryLocation Loc = MemoryLocation::get(LD);
19907330f729Sjoerg     // The TBAA metadata could have a control dependency on the predication
19917330f729Sjoerg     // condition, so we cannot rely on it when determining whether or not we
19927330f729Sjoerg     // need runtime pointer checks.
19937330f729Sjoerg     if (blockNeedsPredication(LD->getParent(), TheLoop, DT))
19947330f729Sjoerg       Loc.AATags.TBAA = nullptr;
19957330f729Sjoerg 
1996*82d56013Sjoerg     // SCEV does not look through non-header PHIs inside the loop. Such phis can
1997*82d56013Sjoerg     // be analyzed by adding separate accesses for each incoming pointer value.
1998*82d56013Sjoerg     auto *PN = dyn_cast<PHINode>(Loc.Ptr);
1999*82d56013Sjoerg     if (PN && TheLoop->contains(PN->getParent()) &&
2000*82d56013Sjoerg         PN->getParent() != TheLoop->getHeader()) {
2001*82d56013Sjoerg       for (const Use &Inc : PN->incoming_values()) {
2002*82d56013Sjoerg         MemoryLocation NewLoc = Loc.getWithNewPtr(Inc);
2003*82d56013Sjoerg         Accesses.addLoad(NewLoc, IsReadOnlyPtr);
2004*82d56013Sjoerg       }
2005*82d56013Sjoerg     } else
20067330f729Sjoerg       Accesses.addLoad(Loc, IsReadOnlyPtr);
20077330f729Sjoerg   }
20087330f729Sjoerg 
20097330f729Sjoerg   // If we write (or read-write) to a single destination and there are no
20107330f729Sjoerg   // other reads in this loop then is it safe to vectorize.
20117330f729Sjoerg   if (NumReadWrites == 1 && NumReads == 0) {
20127330f729Sjoerg     LLVM_DEBUG(dbgs() << "LAA: Found a write-only loop!\n");
20137330f729Sjoerg     CanVecMem = true;
20147330f729Sjoerg     return;
20157330f729Sjoerg   }
20167330f729Sjoerg 
20177330f729Sjoerg   // Build dependence sets and check whether we need a runtime pointer bounds
20187330f729Sjoerg   // check.
20197330f729Sjoerg   Accesses.buildDependenceSets();
20207330f729Sjoerg 
20217330f729Sjoerg   // Find pointers with computable bounds. We are going to use this information
20227330f729Sjoerg   // to place a runtime bound check.
20237330f729Sjoerg   bool CanDoRTIfNeeded = Accesses.canCheckPtrAtRT(*PtrRtChecking, PSE->getSE(),
20247330f729Sjoerg                                                   TheLoop, SymbolicStrides);
20257330f729Sjoerg   if (!CanDoRTIfNeeded) {
20267330f729Sjoerg     recordAnalysis("CantIdentifyArrayBounds") << "cannot identify array bounds";
20277330f729Sjoerg     LLVM_DEBUG(dbgs() << "LAA: We can't vectorize because we can't find "
20287330f729Sjoerg                       << "the array bounds.\n");
20297330f729Sjoerg     CanVecMem = false;
20307330f729Sjoerg     return;
20317330f729Sjoerg   }
20327330f729Sjoerg 
20337330f729Sjoerg   LLVM_DEBUG(
20347330f729Sjoerg     dbgs() << "LAA: May be able to perform a memory runtime check if needed.\n");
20357330f729Sjoerg 
20367330f729Sjoerg   CanVecMem = true;
20377330f729Sjoerg   if (Accesses.isDependencyCheckNeeded()) {
20387330f729Sjoerg     LLVM_DEBUG(dbgs() << "LAA: Checking memory dependencies\n");
20397330f729Sjoerg     CanVecMem = DepChecker->areDepsSafe(
20407330f729Sjoerg         DependentAccesses, Accesses.getDependenciesToCheck(), SymbolicStrides);
20417330f729Sjoerg     MaxSafeDepDistBytes = DepChecker->getMaxSafeDepDistBytes();
20427330f729Sjoerg 
20437330f729Sjoerg     if (!CanVecMem && DepChecker->shouldRetryWithRuntimeCheck()) {
20447330f729Sjoerg       LLVM_DEBUG(dbgs() << "LAA: Retrying with memory checks\n");
20457330f729Sjoerg 
20467330f729Sjoerg       // Clear the dependency checks. We assume they are not needed.
20477330f729Sjoerg       Accesses.resetDepChecks(*DepChecker);
20487330f729Sjoerg 
20497330f729Sjoerg       PtrRtChecking->reset();
20507330f729Sjoerg       PtrRtChecking->Need = true;
20517330f729Sjoerg 
20527330f729Sjoerg       auto *SE = PSE->getSE();
20537330f729Sjoerg       CanDoRTIfNeeded = Accesses.canCheckPtrAtRT(*PtrRtChecking, SE, TheLoop,
20547330f729Sjoerg                                                  SymbolicStrides, true);
20557330f729Sjoerg 
20567330f729Sjoerg       // Check that we found the bounds for the pointer.
20577330f729Sjoerg       if (!CanDoRTIfNeeded) {
20587330f729Sjoerg         recordAnalysis("CantCheckMemDepsAtRunTime")
20597330f729Sjoerg             << "cannot check memory dependencies at runtime";
20607330f729Sjoerg         LLVM_DEBUG(dbgs() << "LAA: Can't vectorize with memory checks\n");
20617330f729Sjoerg         CanVecMem = false;
20627330f729Sjoerg         return;
20637330f729Sjoerg       }
20647330f729Sjoerg 
20657330f729Sjoerg       CanVecMem = true;
20667330f729Sjoerg     }
20677330f729Sjoerg   }
20687330f729Sjoerg 
20697330f729Sjoerg   if (HasConvergentOp) {
20707330f729Sjoerg     recordAnalysis("CantInsertRuntimeCheckWithConvergent")
20717330f729Sjoerg       << "cannot add control dependency to convergent operation";
20727330f729Sjoerg     LLVM_DEBUG(dbgs() << "LAA: We can't vectorize because a runtime check "
20737330f729Sjoerg                          "would be needed with a convergent operation\n");
20747330f729Sjoerg     CanVecMem = false;
20757330f729Sjoerg     return;
20767330f729Sjoerg   }
20777330f729Sjoerg 
20787330f729Sjoerg   if (CanVecMem)
20797330f729Sjoerg     LLVM_DEBUG(
20807330f729Sjoerg         dbgs() << "LAA: No unsafe dependent memory operations in loop.  We"
20817330f729Sjoerg                << (PtrRtChecking->Need ? "" : " don't")
20827330f729Sjoerg                << " need runtime memory checks.\n");
20837330f729Sjoerg   else {
20847330f729Sjoerg     recordAnalysis("UnsafeMemDep")
20857330f729Sjoerg         << "unsafe dependent memory operations in loop. Use "
20867330f729Sjoerg            "#pragma loop distribute(enable) to allow loop distribution "
20877330f729Sjoerg            "to attempt to isolate the offending operations into a separate "
20887330f729Sjoerg            "loop";
20897330f729Sjoerg     LLVM_DEBUG(dbgs() << "LAA: unsafe dependent memory operations in loop\n");
20907330f729Sjoerg   }
20917330f729Sjoerg }
20927330f729Sjoerg 
blockNeedsPredication(BasicBlock * BB,Loop * TheLoop,DominatorTree * DT)20937330f729Sjoerg bool LoopAccessInfo::blockNeedsPredication(BasicBlock *BB, Loop *TheLoop,
20947330f729Sjoerg                                            DominatorTree *DT)  {
20957330f729Sjoerg   assert(TheLoop->contains(BB) && "Unknown block used");
20967330f729Sjoerg 
20977330f729Sjoerg   // Blocks that do not dominate the latch need predication.
20987330f729Sjoerg   BasicBlock* Latch = TheLoop->getLoopLatch();
20997330f729Sjoerg   return !DT->dominates(BB, Latch);
21007330f729Sjoerg }
21017330f729Sjoerg 
recordAnalysis(StringRef RemarkName,Instruction * I)21027330f729Sjoerg OptimizationRemarkAnalysis &LoopAccessInfo::recordAnalysis(StringRef RemarkName,
21037330f729Sjoerg                                                            Instruction *I) {
21047330f729Sjoerg   assert(!Report && "Multiple reports generated");
21057330f729Sjoerg 
21067330f729Sjoerg   Value *CodeRegion = TheLoop->getHeader();
21077330f729Sjoerg   DebugLoc DL = TheLoop->getStartLoc();
21087330f729Sjoerg 
21097330f729Sjoerg   if (I) {
21107330f729Sjoerg     CodeRegion = I->getParent();
21117330f729Sjoerg     // If there is no debug location attached to the instruction, revert back to
21127330f729Sjoerg     // using the loop's.
21137330f729Sjoerg     if (I->getDebugLoc())
21147330f729Sjoerg       DL = I->getDebugLoc();
21157330f729Sjoerg   }
21167330f729Sjoerg 
21177330f729Sjoerg   Report = std::make_unique<OptimizationRemarkAnalysis>(DEBUG_TYPE, RemarkName, DL,
21187330f729Sjoerg                                                    CodeRegion);
21197330f729Sjoerg   return *Report;
21207330f729Sjoerg }
21217330f729Sjoerg 
isUniform(Value * V) const21227330f729Sjoerg bool LoopAccessInfo::isUniform(Value *V) const {
21237330f729Sjoerg   auto *SE = PSE->getSE();
21247330f729Sjoerg   // Since we rely on SCEV for uniformity, if the type is not SCEVable, it is
21257330f729Sjoerg   // never considered uniform.
21267330f729Sjoerg   // TODO: Is this really what we want? Even without FP SCEV, we may want some
21277330f729Sjoerg   // trivially loop-invariant FP values to be considered uniform.
21287330f729Sjoerg   if (!SE->isSCEVable(V->getType()))
21297330f729Sjoerg     return false;
21307330f729Sjoerg   return (SE->isLoopInvariant(SE->getSCEV(V), TheLoop));
21317330f729Sjoerg }
21327330f729Sjoerg 
collectStridedAccess(Value * MemAccess)21337330f729Sjoerg void LoopAccessInfo::collectStridedAccess(Value *MemAccess) {
2134*82d56013Sjoerg   Value *Ptr = getLoadStorePointerOperand(MemAccess);
2135*82d56013Sjoerg   if (!Ptr)
21367330f729Sjoerg     return;
21377330f729Sjoerg 
21387330f729Sjoerg   Value *Stride = getStrideFromPointer(Ptr, PSE->getSE(), TheLoop);
21397330f729Sjoerg   if (!Stride)
21407330f729Sjoerg     return;
21417330f729Sjoerg 
21427330f729Sjoerg   LLVM_DEBUG(dbgs() << "LAA: Found a strided access that is a candidate for "
21437330f729Sjoerg                        "versioning:");
21447330f729Sjoerg   LLVM_DEBUG(dbgs() << "  Ptr: " << *Ptr << " Stride: " << *Stride << "\n");
21457330f729Sjoerg 
21467330f729Sjoerg   // Avoid adding the "Stride == 1" predicate when we know that
21477330f729Sjoerg   // Stride >= Trip-Count. Such a predicate will effectively optimize a single
21487330f729Sjoerg   // or zero iteration loop, as Trip-Count <= Stride == 1.
21497330f729Sjoerg   //
21507330f729Sjoerg   // TODO: We are currently not making a very informed decision on when it is
21517330f729Sjoerg   // beneficial to apply stride versioning. It might make more sense that the
21527330f729Sjoerg   // users of this analysis (such as the vectorizer) will trigger it, based on
21537330f729Sjoerg   // their specific cost considerations; For example, in cases where stride
21547330f729Sjoerg   // versioning does  not help resolving memory accesses/dependences, the
21557330f729Sjoerg   // vectorizer should evaluate the cost of the runtime test, and the benefit
21567330f729Sjoerg   // of various possible stride specializations, considering the alternatives
21577330f729Sjoerg   // of using gather/scatters (if available).
21587330f729Sjoerg 
21597330f729Sjoerg   const SCEV *StrideExpr = PSE->getSCEV(Stride);
21607330f729Sjoerg   const SCEV *BETakenCount = PSE->getBackedgeTakenCount();
21617330f729Sjoerg 
21627330f729Sjoerg   // Match the types so we can compare the stride and the BETakenCount.
21637330f729Sjoerg   // The Stride can be positive/negative, so we sign extend Stride;
21647330f729Sjoerg   // The backedgeTakenCount is non-negative, so we zero extend BETakenCount.
21657330f729Sjoerg   const DataLayout &DL = TheLoop->getHeader()->getModule()->getDataLayout();
21667330f729Sjoerg   uint64_t StrideTypeSize = DL.getTypeAllocSize(StrideExpr->getType());
21677330f729Sjoerg   uint64_t BETypeSize = DL.getTypeAllocSize(BETakenCount->getType());
21687330f729Sjoerg   const SCEV *CastedStride = StrideExpr;
21697330f729Sjoerg   const SCEV *CastedBECount = BETakenCount;
21707330f729Sjoerg   ScalarEvolution *SE = PSE->getSE();
21717330f729Sjoerg   if (BETypeSize >= StrideTypeSize)
21727330f729Sjoerg     CastedStride = SE->getNoopOrSignExtend(StrideExpr, BETakenCount->getType());
21737330f729Sjoerg   else
21747330f729Sjoerg     CastedBECount = SE->getZeroExtendExpr(BETakenCount, StrideExpr->getType());
21757330f729Sjoerg   const SCEV *StrideMinusBETaken = SE->getMinusSCEV(CastedStride, CastedBECount);
21767330f729Sjoerg   // Since TripCount == BackEdgeTakenCount + 1, checking:
21777330f729Sjoerg   // "Stride >= TripCount" is equivalent to checking:
21787330f729Sjoerg   // Stride - BETakenCount > 0
21797330f729Sjoerg   if (SE->isKnownPositive(StrideMinusBETaken)) {
21807330f729Sjoerg     LLVM_DEBUG(
21817330f729Sjoerg         dbgs() << "LAA: Stride>=TripCount; No point in versioning as the "
21827330f729Sjoerg                   "Stride==1 predicate will imply that the loop executes "
21837330f729Sjoerg                   "at most once.\n");
21847330f729Sjoerg     return;
21857330f729Sjoerg   }
21867330f729Sjoerg   LLVM_DEBUG(dbgs() << "LAA: Found a strided access that we can version.");
21877330f729Sjoerg 
21887330f729Sjoerg   SymbolicStrides[Ptr] = Stride;
21897330f729Sjoerg   StrideSet.insert(Stride);
21907330f729Sjoerg }
21917330f729Sjoerg 
LoopAccessInfo(Loop * L,ScalarEvolution * SE,const TargetLibraryInfo * TLI,AAResults * AA,DominatorTree * DT,LoopInfo * LI)21927330f729Sjoerg LoopAccessInfo::LoopAccessInfo(Loop *L, ScalarEvolution *SE,
2193*82d56013Sjoerg                                const TargetLibraryInfo *TLI, AAResults *AA,
21947330f729Sjoerg                                DominatorTree *DT, LoopInfo *LI)
21957330f729Sjoerg     : PSE(std::make_unique<PredicatedScalarEvolution>(*SE, *L)),
21967330f729Sjoerg       PtrRtChecking(std::make_unique<RuntimePointerChecking>(SE)),
21977330f729Sjoerg       DepChecker(std::make_unique<MemoryDepChecker>(*PSE, L)), TheLoop(L),
21987330f729Sjoerg       NumLoads(0), NumStores(0), MaxSafeDepDistBytes(-1), CanVecMem(false),
21997330f729Sjoerg       HasConvergentOp(false),
22007330f729Sjoerg       HasDependenceInvolvingLoopInvariantAddress(false) {
22017330f729Sjoerg   if (canAnalyzeLoop())
22027330f729Sjoerg     analyzeLoop(AA, LI, TLI, DT);
22037330f729Sjoerg }
22047330f729Sjoerg 
print(raw_ostream & OS,unsigned Depth) const22057330f729Sjoerg void LoopAccessInfo::print(raw_ostream &OS, unsigned Depth) const {
22067330f729Sjoerg   if (CanVecMem) {
22077330f729Sjoerg     OS.indent(Depth) << "Memory dependences are safe";
22087330f729Sjoerg     if (MaxSafeDepDistBytes != -1ULL)
22097330f729Sjoerg       OS << " with a maximum dependence distance of " << MaxSafeDepDistBytes
22107330f729Sjoerg          << " bytes";
22117330f729Sjoerg     if (PtrRtChecking->Need)
22127330f729Sjoerg       OS << " with run-time checks";
22137330f729Sjoerg     OS << "\n";
22147330f729Sjoerg   }
22157330f729Sjoerg 
22167330f729Sjoerg   if (HasConvergentOp)
22177330f729Sjoerg     OS.indent(Depth) << "Has convergent operation in loop\n";
22187330f729Sjoerg 
22197330f729Sjoerg   if (Report)
22207330f729Sjoerg     OS.indent(Depth) << "Report: " << Report->getMsg() << "\n";
22217330f729Sjoerg 
22227330f729Sjoerg   if (auto *Dependences = DepChecker->getDependences()) {
22237330f729Sjoerg     OS.indent(Depth) << "Dependences:\n";
22247330f729Sjoerg     for (auto &Dep : *Dependences) {
22257330f729Sjoerg       Dep.print(OS, Depth + 2, DepChecker->getMemoryInstructions());
22267330f729Sjoerg       OS << "\n";
22277330f729Sjoerg     }
22287330f729Sjoerg   } else
22297330f729Sjoerg     OS.indent(Depth) << "Too many dependences, not recorded\n";
22307330f729Sjoerg 
22317330f729Sjoerg   // List the pair of accesses need run-time checks to prove independence.
22327330f729Sjoerg   PtrRtChecking->print(OS, Depth);
22337330f729Sjoerg   OS << "\n";
22347330f729Sjoerg 
22357330f729Sjoerg   OS.indent(Depth) << "Non vectorizable stores to invariant address were "
22367330f729Sjoerg                    << (HasDependenceInvolvingLoopInvariantAddress ? "" : "not ")
22377330f729Sjoerg                    << "found in loop.\n";
22387330f729Sjoerg 
22397330f729Sjoerg   OS.indent(Depth) << "SCEV assumptions:\n";
22407330f729Sjoerg   PSE->getUnionPredicate().print(OS, Depth);
22417330f729Sjoerg 
22427330f729Sjoerg   OS << "\n";
22437330f729Sjoerg 
22447330f729Sjoerg   OS.indent(Depth) << "Expressions re-written:\n";
22457330f729Sjoerg   PSE->print(OS, Depth);
22467330f729Sjoerg }
22477330f729Sjoerg 
LoopAccessLegacyAnalysis()2248*82d56013Sjoerg LoopAccessLegacyAnalysis::LoopAccessLegacyAnalysis() : FunctionPass(ID) {
2249*82d56013Sjoerg   initializeLoopAccessLegacyAnalysisPass(*PassRegistry::getPassRegistry());
2250*82d56013Sjoerg }
2251*82d56013Sjoerg 
getInfo(Loop * L)22527330f729Sjoerg const LoopAccessInfo &LoopAccessLegacyAnalysis::getInfo(Loop *L) {
22537330f729Sjoerg   auto &LAI = LoopAccessInfoMap[L];
22547330f729Sjoerg 
22557330f729Sjoerg   if (!LAI)
22567330f729Sjoerg     LAI = std::make_unique<LoopAccessInfo>(L, SE, TLI, AA, DT, LI);
22577330f729Sjoerg 
22587330f729Sjoerg   return *LAI.get();
22597330f729Sjoerg }
22607330f729Sjoerg 
print(raw_ostream & OS,const Module * M) const22617330f729Sjoerg void LoopAccessLegacyAnalysis::print(raw_ostream &OS, const Module *M) const {
22627330f729Sjoerg   LoopAccessLegacyAnalysis &LAA = *const_cast<LoopAccessLegacyAnalysis *>(this);
22637330f729Sjoerg 
22647330f729Sjoerg   for (Loop *TopLevelLoop : *LI)
22657330f729Sjoerg     for (Loop *L : depth_first(TopLevelLoop)) {
22667330f729Sjoerg       OS.indent(2) << L->getHeader()->getName() << ":\n";
22677330f729Sjoerg       auto &LAI = LAA.getInfo(L);
22687330f729Sjoerg       LAI.print(OS, 4);
22697330f729Sjoerg     }
22707330f729Sjoerg }
22717330f729Sjoerg 
runOnFunction(Function & F)22727330f729Sjoerg bool LoopAccessLegacyAnalysis::runOnFunction(Function &F) {
22737330f729Sjoerg   SE = &getAnalysis<ScalarEvolutionWrapperPass>().getSE();
22747330f729Sjoerg   auto *TLIP = getAnalysisIfAvailable<TargetLibraryInfoWrapperPass>();
22757330f729Sjoerg   TLI = TLIP ? &TLIP->getTLI(F) : nullptr;
22767330f729Sjoerg   AA = &getAnalysis<AAResultsWrapperPass>().getAAResults();
22777330f729Sjoerg   DT = &getAnalysis<DominatorTreeWrapperPass>().getDomTree();
22787330f729Sjoerg   LI = &getAnalysis<LoopInfoWrapperPass>().getLoopInfo();
22797330f729Sjoerg 
22807330f729Sjoerg   return false;
22817330f729Sjoerg }
22827330f729Sjoerg 
getAnalysisUsage(AnalysisUsage & AU) const22837330f729Sjoerg void LoopAccessLegacyAnalysis::getAnalysisUsage(AnalysisUsage &AU) const {
2284*82d56013Sjoerg   AU.addRequiredTransitive<ScalarEvolutionWrapperPass>();
2285*82d56013Sjoerg   AU.addRequiredTransitive<AAResultsWrapperPass>();
2286*82d56013Sjoerg   AU.addRequiredTransitive<DominatorTreeWrapperPass>();
2287*82d56013Sjoerg   AU.addRequiredTransitive<LoopInfoWrapperPass>();
22887330f729Sjoerg 
22897330f729Sjoerg   AU.setPreservesAll();
22907330f729Sjoerg }
22917330f729Sjoerg 
22927330f729Sjoerg char LoopAccessLegacyAnalysis::ID = 0;
22937330f729Sjoerg static const char laa_name[] = "Loop Access Analysis";
22947330f729Sjoerg #define LAA_NAME "loop-accesses"
22957330f729Sjoerg 
22967330f729Sjoerg INITIALIZE_PASS_BEGIN(LoopAccessLegacyAnalysis, LAA_NAME, laa_name, false, true)
22977330f729Sjoerg INITIALIZE_PASS_DEPENDENCY(AAResultsWrapperPass)
22987330f729Sjoerg INITIALIZE_PASS_DEPENDENCY(ScalarEvolutionWrapperPass)
22997330f729Sjoerg INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass)
23007330f729Sjoerg INITIALIZE_PASS_DEPENDENCY(LoopInfoWrapperPass)
23017330f729Sjoerg INITIALIZE_PASS_END(LoopAccessLegacyAnalysis, LAA_NAME, laa_name, false, true)
23027330f729Sjoerg 
23037330f729Sjoerg AnalysisKey LoopAccessAnalysis::Key;
23047330f729Sjoerg 
run(Loop & L,LoopAnalysisManager & AM,LoopStandardAnalysisResults & AR)23057330f729Sjoerg LoopAccessInfo LoopAccessAnalysis::run(Loop &L, LoopAnalysisManager &AM,
23067330f729Sjoerg                                        LoopStandardAnalysisResults &AR) {
23077330f729Sjoerg   return LoopAccessInfo(&L, &AR.SE, &AR.TLI, &AR.AA, &AR.DT, &AR.LI);
23087330f729Sjoerg }
23097330f729Sjoerg 
23107330f729Sjoerg namespace llvm {
23117330f729Sjoerg 
createLAAPass()23127330f729Sjoerg   Pass *createLAAPass() {
23137330f729Sjoerg     return new LoopAccessLegacyAnalysis();
23147330f729Sjoerg   }
23157330f729Sjoerg 
23167330f729Sjoerg } // end namespace llvm
2317