10b57cec5SDimitry Andric //===- LoopAccessAnalysis.cpp - Loop Access Analysis Implementation --------==// 20b57cec5SDimitry Andric // 30b57cec5SDimitry Andric // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 40b57cec5SDimitry Andric // See https://llvm.org/LICENSE.txt for license information. 50b57cec5SDimitry Andric // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 60b57cec5SDimitry Andric // 70b57cec5SDimitry Andric //===----------------------------------------------------------------------===// 80b57cec5SDimitry Andric // 90b57cec5SDimitry Andric // The implementation for the loop memory dependence that was originally 100b57cec5SDimitry Andric // developed for the loop vectorizer. 110b57cec5SDimitry Andric // 120b57cec5SDimitry Andric //===----------------------------------------------------------------------===// 130b57cec5SDimitry Andric 140b57cec5SDimitry Andric #include "llvm/Analysis/LoopAccessAnalysis.h" 150b57cec5SDimitry Andric #include "llvm/ADT/APInt.h" 160b57cec5SDimitry Andric #include "llvm/ADT/DenseMap.h" 170b57cec5SDimitry Andric #include "llvm/ADT/EquivalenceClasses.h" 180b57cec5SDimitry Andric #include "llvm/ADT/PointerIntPair.h" 190b57cec5SDimitry Andric #include "llvm/ADT/STLExtras.h" 200b57cec5SDimitry Andric #include "llvm/ADT/SetVector.h" 210b57cec5SDimitry Andric #include "llvm/ADT/SmallPtrSet.h" 220b57cec5SDimitry Andric #include "llvm/ADT/SmallSet.h" 230b57cec5SDimitry Andric #include "llvm/ADT/SmallVector.h" 240b57cec5SDimitry Andric #include "llvm/Analysis/AliasAnalysis.h" 250b57cec5SDimitry Andric #include "llvm/Analysis/AliasSetTracker.h" 260b57cec5SDimitry Andric #include "llvm/Analysis/LoopAnalysisManager.h" 270b57cec5SDimitry Andric #include "llvm/Analysis/LoopInfo.h" 28bdd1243dSDimitry Andric #include "llvm/Analysis/LoopIterator.h" 290b57cec5SDimitry Andric #include "llvm/Analysis/MemoryLocation.h" 300b57cec5SDimitry Andric #include "llvm/Analysis/OptimizationRemarkEmitter.h" 310b57cec5SDimitry Andric #include "llvm/Analysis/ScalarEvolution.h" 320b57cec5SDimitry Andric #include "llvm/Analysis/ScalarEvolutionExpressions.h" 330b57cec5SDimitry Andric #include "llvm/Analysis/TargetLibraryInfo.h" 340fca6ea1SDimitry Andric #include "llvm/Analysis/TargetTransformInfo.h" 350b57cec5SDimitry Andric #include "llvm/Analysis/ValueTracking.h" 360b57cec5SDimitry Andric #include "llvm/Analysis/VectorUtils.h" 370b57cec5SDimitry Andric #include "llvm/IR/BasicBlock.h" 380b57cec5SDimitry Andric #include "llvm/IR/Constants.h" 390b57cec5SDimitry Andric #include "llvm/IR/DataLayout.h" 400b57cec5SDimitry Andric #include "llvm/IR/DebugLoc.h" 410b57cec5SDimitry Andric #include "llvm/IR/DerivedTypes.h" 420b57cec5SDimitry Andric #include "llvm/IR/DiagnosticInfo.h" 430b57cec5SDimitry Andric #include "llvm/IR/Dominators.h" 440b57cec5SDimitry Andric #include "llvm/IR/Function.h" 4506c3fb27SDimitry Andric #include "llvm/IR/GetElementPtrTypeIterator.h" 460b57cec5SDimitry Andric #include "llvm/IR/InstrTypes.h" 470b57cec5SDimitry Andric #include "llvm/IR/Instruction.h" 480b57cec5SDimitry Andric #include "llvm/IR/Instructions.h" 490b57cec5SDimitry Andric #include "llvm/IR/Operator.h" 500b57cec5SDimitry Andric #include "llvm/IR/PassManager.h" 5181ad6265SDimitry Andric #include "llvm/IR/PatternMatch.h" 520b57cec5SDimitry Andric #include "llvm/IR/Type.h" 530b57cec5SDimitry Andric #include "llvm/IR/Value.h" 540b57cec5SDimitry Andric #include "llvm/IR/ValueHandle.h" 550b57cec5SDimitry Andric #include "llvm/Support/Casting.h" 560b57cec5SDimitry Andric #include "llvm/Support/CommandLine.h" 570b57cec5SDimitry Andric #include "llvm/Support/Debug.h" 580b57cec5SDimitry Andric #include "llvm/Support/ErrorHandling.h" 590b57cec5SDimitry Andric #include "llvm/Support/raw_ostream.h" 600b57cec5SDimitry Andric #include <algorithm> 610b57cec5SDimitry Andric #include <cassert> 620b57cec5SDimitry Andric #include <cstdint> 630b57cec5SDimitry Andric #include <iterator> 640b57cec5SDimitry Andric #include <utility> 655f757f3fSDimitry Andric #include <variant> 660b57cec5SDimitry Andric #include <vector> 670b57cec5SDimitry Andric 680b57cec5SDimitry Andric using namespace llvm; 6981ad6265SDimitry Andric using namespace llvm::PatternMatch; 700b57cec5SDimitry Andric 710b57cec5SDimitry Andric #define DEBUG_TYPE "loop-accesses" 720b57cec5SDimitry Andric 730b57cec5SDimitry Andric static cl::opt<unsigned, true> 740b57cec5SDimitry Andric VectorizationFactor("force-vector-width", cl::Hidden, 750b57cec5SDimitry Andric cl::desc("Sets the SIMD width. Zero is autoselect."), 760b57cec5SDimitry Andric cl::location(VectorizerParams::VectorizationFactor)); 770b57cec5SDimitry Andric unsigned VectorizerParams::VectorizationFactor; 780b57cec5SDimitry Andric 790b57cec5SDimitry Andric static cl::opt<unsigned, true> 800b57cec5SDimitry Andric VectorizationInterleave("force-vector-interleave", cl::Hidden, 810b57cec5SDimitry Andric cl::desc("Sets the vectorization interleave count. " 820b57cec5SDimitry Andric "Zero is autoselect."), 830b57cec5SDimitry Andric cl::location( 840b57cec5SDimitry Andric VectorizerParams::VectorizationInterleave)); 850b57cec5SDimitry Andric unsigned VectorizerParams::VectorizationInterleave; 860b57cec5SDimitry Andric 870b57cec5SDimitry Andric static cl::opt<unsigned, true> RuntimeMemoryCheckThreshold( 880b57cec5SDimitry Andric "runtime-memory-check-threshold", cl::Hidden, 890b57cec5SDimitry Andric cl::desc("When performing memory disambiguation checks at runtime do not " 900b57cec5SDimitry Andric "generate more than this number of comparisons (default = 8)."), 910b57cec5SDimitry Andric cl::location(VectorizerParams::RuntimeMemoryCheckThreshold), cl::init(8)); 920b57cec5SDimitry Andric unsigned VectorizerParams::RuntimeMemoryCheckThreshold; 930b57cec5SDimitry Andric 940b57cec5SDimitry Andric /// The maximum iterations used to merge memory checks 950b57cec5SDimitry Andric static cl::opt<unsigned> MemoryCheckMergeThreshold( 960b57cec5SDimitry Andric "memory-check-merge-threshold", cl::Hidden, 970b57cec5SDimitry Andric cl::desc("Maximum number of comparisons done when trying to merge " 980b57cec5SDimitry Andric "runtime memory checks. (default = 100)"), 990b57cec5SDimitry Andric cl::init(100)); 1000b57cec5SDimitry Andric 1010b57cec5SDimitry Andric /// Maximum SIMD width. 1020b57cec5SDimitry Andric const unsigned VectorizerParams::MaxVectorWidth = 64; 1030b57cec5SDimitry Andric 1040b57cec5SDimitry Andric /// We collect dependences up to this threshold. 1050b57cec5SDimitry Andric static cl::opt<unsigned> 1060b57cec5SDimitry Andric MaxDependences("max-dependences", cl::Hidden, 1070b57cec5SDimitry Andric cl::desc("Maximum number of dependences collected by " 1080b57cec5SDimitry Andric "loop-access analysis (default = 100)"), 1090b57cec5SDimitry Andric cl::init(100)); 1100b57cec5SDimitry Andric 1110b57cec5SDimitry Andric /// This enables versioning on the strides of symbolically striding memory 1120b57cec5SDimitry Andric /// accesses in code like the following. 1130b57cec5SDimitry Andric /// for (i = 0; i < N; ++i) 1140b57cec5SDimitry Andric /// A[i * Stride1] += B[i * Stride2] ... 1150b57cec5SDimitry Andric /// 1160b57cec5SDimitry Andric /// Will be roughly translated to 1170b57cec5SDimitry Andric /// if (Stride1 == 1 && Stride2 == 1) { 1180b57cec5SDimitry Andric /// for (i = 0; i < N; i+=4) 1190b57cec5SDimitry Andric /// A[i:i+3] += ... 1200b57cec5SDimitry Andric /// } else 1210b57cec5SDimitry Andric /// ... 1220b57cec5SDimitry Andric static cl::opt<bool> EnableMemAccessVersioning( 1230b57cec5SDimitry Andric "enable-mem-access-versioning", cl::init(true), cl::Hidden, 1240b57cec5SDimitry Andric cl::desc("Enable symbolic stride memory access versioning")); 1250b57cec5SDimitry Andric 1260b57cec5SDimitry Andric /// Enable store-to-load forwarding conflict detection. This option can 1270b57cec5SDimitry Andric /// be disabled for correctness testing. 1280b57cec5SDimitry Andric static cl::opt<bool> EnableForwardingConflictDetection( 1290b57cec5SDimitry Andric "store-to-load-forwarding-conflict-detection", cl::Hidden, 1300b57cec5SDimitry Andric cl::desc("Enable conflict detection in loop-access analysis"), 1310b57cec5SDimitry Andric cl::init(true)); 1320b57cec5SDimitry Andric 133fcaf7f86SDimitry Andric static cl::opt<unsigned> MaxForkedSCEVDepth( 134fcaf7f86SDimitry Andric "max-forked-scev-depth", cl::Hidden, 135fcaf7f86SDimitry Andric cl::desc("Maximum recursion depth when finding forked SCEVs (default = 5)"), 136fcaf7f86SDimitry Andric cl::init(5)); 137fcaf7f86SDimitry Andric 13806c3fb27SDimitry Andric static cl::opt<bool> SpeculateUnitStride( 13906c3fb27SDimitry Andric "laa-speculate-unit-stride", cl::Hidden, 14006c3fb27SDimitry Andric cl::desc("Speculate that non-constant strides are unit in LAA"), 14106c3fb27SDimitry Andric cl::init(true)); 14206c3fb27SDimitry Andric 1435f757f3fSDimitry Andric static cl::opt<bool, true> HoistRuntimeChecks( 1445f757f3fSDimitry Andric "hoist-runtime-checks", cl::Hidden, 1455f757f3fSDimitry Andric cl::desc( 1465f757f3fSDimitry Andric "Hoist inner loop runtime memory checks to outer loop if possible"), 147cb14a3feSDimitry Andric cl::location(VectorizerParams::HoistRuntimeChecks), cl::init(true)); 1485f757f3fSDimitry Andric bool VectorizerParams::HoistRuntimeChecks; 1495f757f3fSDimitry Andric 1500b57cec5SDimitry Andric bool VectorizerParams::isInterleaveForced() { 1510b57cec5SDimitry Andric return ::VectorizationInterleave.getNumOccurrences() > 0; 1520b57cec5SDimitry Andric } 1530b57cec5SDimitry Andric 1540b57cec5SDimitry Andric const SCEV *llvm::replaceSymbolicStrideSCEV(PredicatedScalarEvolution &PSE, 15506c3fb27SDimitry Andric const DenseMap<Value *, const SCEV *> &PtrToStride, 156349cc55cSDimitry Andric Value *Ptr) { 1570b57cec5SDimitry Andric const SCEV *OrigSCEV = PSE.getSCEV(Ptr); 1580b57cec5SDimitry Andric 1590b57cec5SDimitry Andric // If there is an entry in the map return the SCEV of the pointer with the 1600b57cec5SDimitry Andric // symbolic stride replaced by one. 16106c3fb27SDimitry Andric DenseMap<Value *, const SCEV *>::const_iterator SI = PtrToStride.find(Ptr); 162e8d8bef9SDimitry Andric if (SI == PtrToStride.end()) 163e8d8bef9SDimitry Andric // For a non-symbolic stride, just return the original expression. 164e8d8bef9SDimitry Andric return OrigSCEV; 1650b57cec5SDimitry Andric 16606c3fb27SDimitry Andric const SCEV *StrideSCEV = SI->second; 16706c3fb27SDimitry Andric // Note: This assert is both overly strong and overly weak. The actual 16806c3fb27SDimitry Andric // invariant here is that StrideSCEV should be loop invariant. The only 16906c3fb27SDimitry Andric // such invariant strides we happen to speculate right now are unknowns 17006c3fb27SDimitry Andric // and thus this is a reasonable proxy of the actual invariant. 17106c3fb27SDimitry Andric assert(isa<SCEVUnknown>(StrideSCEV) && "shouldn't be in map"); 1720b57cec5SDimitry Andric 1730b57cec5SDimitry Andric ScalarEvolution *SE = PSE.getSE(); 17406c3fb27SDimitry Andric const auto *CT = SE->getOne(StrideSCEV->getType()); 17506c3fb27SDimitry Andric PSE.addPredicate(*SE->getEqualPredicate(StrideSCEV, CT)); 1760b57cec5SDimitry Andric auto *Expr = PSE.getSCEV(Ptr); 1770b57cec5SDimitry Andric 1780b57cec5SDimitry Andric LLVM_DEBUG(dbgs() << "LAA: Replacing SCEV: " << *OrigSCEV 1790b57cec5SDimitry Andric << " by: " << *Expr << "\n"); 1800b57cec5SDimitry Andric return Expr; 1810b57cec5SDimitry Andric } 1820b57cec5SDimitry Andric 1835ffd83dbSDimitry Andric RuntimeCheckingPtrGroup::RuntimeCheckingPtrGroup( 1845ffd83dbSDimitry Andric unsigned Index, RuntimePointerChecking &RtCheck) 185fe6060f1SDimitry Andric : High(RtCheck.Pointers[Index].End), Low(RtCheck.Pointers[Index].Start), 186fe6060f1SDimitry Andric AddressSpace(RtCheck.Pointers[Index] 187fe6060f1SDimitry Andric .PointerValue->getType() 18881ad6265SDimitry Andric ->getPointerAddressSpace()), 18981ad6265SDimitry Andric NeedsFreeze(RtCheck.Pointers[Index].NeedsFreeze) { 1905ffd83dbSDimitry Andric Members.push_back(Index); 1915ffd83dbSDimitry Andric } 1925ffd83dbSDimitry Andric 1930b57cec5SDimitry Andric /// Calculate Start and End points of memory access. 1940b57cec5SDimitry Andric /// Let's assume A is the first access and B is a memory access on N-th loop 1950b57cec5SDimitry Andric /// iteration. Then B is calculated as: 1960b57cec5SDimitry Andric /// B = A + Step*N . 1970b57cec5SDimitry Andric /// Step value may be positive or negative. 1980b57cec5SDimitry Andric /// N is a calculated back-edge taken count: 1990b57cec5SDimitry Andric /// N = (TripCount > 0) ? RoundDown(TripCount -1 , VF) : 0 2000b57cec5SDimitry Andric /// Start and End points are calculated in the following way: 2010b57cec5SDimitry Andric /// Start = UMIN(A, B) ; End = UMAX(A, B) + SizeOfElt, 2020b57cec5SDimitry Andric /// where SizeOfElt is the size of single memory access in bytes. 2030b57cec5SDimitry Andric /// 2040b57cec5SDimitry Andric /// There is no conflict when the intervals are disjoint: 2050b57cec5SDimitry Andric /// NoConflict = (P2.Start >= P1.End) || (P1.Start >= P2.End) 2060fca6ea1SDimitry Andric static std::pair<const SCEV *, const SCEV *> getStartAndEndForAccess( 2070fca6ea1SDimitry Andric const Loop *Lp, const SCEV *PtrExpr, Type *AccessTy, 20881ad6265SDimitry Andric PredicatedScalarEvolution &PSE, 2090fca6ea1SDimitry Andric DenseMap<std::pair<const SCEV *, Type *>, 2100fca6ea1SDimitry Andric std::pair<const SCEV *, const SCEV *>> &PointerBounds) { 2110b57cec5SDimitry Andric ScalarEvolution *SE = PSE.getSE(); 2120b57cec5SDimitry Andric 2130fca6ea1SDimitry Andric auto [Iter, Ins] = PointerBounds.insert( 2140fca6ea1SDimitry Andric {{PtrExpr, AccessTy}, 2150fca6ea1SDimitry Andric {SE->getCouldNotCompute(), SE->getCouldNotCompute()}}); 2160fca6ea1SDimitry Andric if (!Ins) 2170fca6ea1SDimitry Andric return Iter->second; 2180fca6ea1SDimitry Andric 2190b57cec5SDimitry Andric const SCEV *ScStart; 2200b57cec5SDimitry Andric const SCEV *ScEnd; 2210b57cec5SDimitry Andric 22281ad6265SDimitry Andric if (SE->isLoopInvariant(PtrExpr, Lp)) { 22381ad6265SDimitry Andric ScStart = ScEnd = PtrExpr; 2240fca6ea1SDimitry Andric } else if (auto *AR = dyn_cast<SCEVAddRecExpr>(PtrExpr)) { 2250fca6ea1SDimitry Andric const SCEV *Ex = PSE.getSymbolicMaxBackedgeTakenCount(); 2260b57cec5SDimitry Andric 2270b57cec5SDimitry Andric ScStart = AR->getStart(); 2280b57cec5SDimitry Andric ScEnd = AR->evaluateAtIteration(Ex, *SE); 2290b57cec5SDimitry Andric const SCEV *Step = AR->getStepRecurrence(*SE); 2300b57cec5SDimitry Andric 2310b57cec5SDimitry Andric // For expressions with negative step, the upper bound is ScStart and the 2320b57cec5SDimitry Andric // lower bound is ScEnd. 2330b57cec5SDimitry Andric if (const auto *CStep = dyn_cast<SCEVConstant>(Step)) { 2340b57cec5SDimitry Andric if (CStep->getValue()->isNegative()) 2350b57cec5SDimitry Andric std::swap(ScStart, ScEnd); 2360b57cec5SDimitry Andric } else { 2370b57cec5SDimitry Andric // Fallback case: the step is not constant, but we can still 2380b57cec5SDimitry Andric // get the upper and lower bounds of the interval by using min/max 2390b57cec5SDimitry Andric // expressions. 2400b57cec5SDimitry Andric ScStart = SE->getUMinExpr(ScStart, ScEnd); 2410b57cec5SDimitry Andric ScEnd = SE->getUMaxExpr(AR->getStart(), ScEnd); 2420b57cec5SDimitry Andric } 2430fca6ea1SDimitry Andric } else 2440fca6ea1SDimitry Andric return {SE->getCouldNotCompute(), SE->getCouldNotCompute()}; 2450fca6ea1SDimitry Andric 24606c3fb27SDimitry Andric assert(SE->isLoopInvariant(ScStart, Lp) && "ScStart needs to be invariant"); 24706c3fb27SDimitry Andric assert(SE->isLoopInvariant(ScEnd, Lp)&& "ScEnd needs to be invariant"); 24806c3fb27SDimitry Andric 2490b57cec5SDimitry Andric // Add the size of the pointed element to ScEnd. 2500fca6ea1SDimitry Andric auto &DL = Lp->getHeader()->getDataLayout(); 2510fca6ea1SDimitry Andric Type *IdxTy = DL.getIndexType(PtrExpr->getType()); 25281ad6265SDimitry Andric const SCEV *EltSizeSCEV = SE->getStoreSizeOfExpr(IdxTy, AccessTy); 2530b57cec5SDimitry Andric ScEnd = SE->getAddExpr(ScEnd, EltSizeSCEV); 2540b57cec5SDimitry Andric 2550fca6ea1SDimitry Andric Iter->second = {ScStart, ScEnd}; 2560fca6ea1SDimitry Andric return Iter->second; 2570fca6ea1SDimitry Andric } 2580fca6ea1SDimitry Andric 2590fca6ea1SDimitry Andric /// Calculate Start and End points of memory access using 2600fca6ea1SDimitry Andric /// getStartAndEndForAccess. 2610fca6ea1SDimitry Andric void RuntimePointerChecking::insert(Loop *Lp, Value *Ptr, const SCEV *PtrExpr, 2620fca6ea1SDimitry Andric Type *AccessTy, bool WritePtr, 2630fca6ea1SDimitry Andric unsigned DepSetId, unsigned ASId, 2640fca6ea1SDimitry Andric PredicatedScalarEvolution &PSE, 2650fca6ea1SDimitry Andric bool NeedsFreeze) { 2660fca6ea1SDimitry Andric const auto &[ScStart, ScEnd] = getStartAndEndForAccess( 2670fca6ea1SDimitry Andric Lp, PtrExpr, AccessTy, PSE, DC.getPointerBounds()); 2680fca6ea1SDimitry Andric assert(!isa<SCEVCouldNotCompute>(ScStart) && 2690fca6ea1SDimitry Andric !isa<SCEVCouldNotCompute>(ScEnd) && 2700fca6ea1SDimitry Andric "must be able to compute both start and end expressions"); 27181ad6265SDimitry Andric Pointers.emplace_back(Ptr, ScStart, ScEnd, WritePtr, DepSetId, ASId, PtrExpr, 27281ad6265SDimitry Andric NeedsFreeze); 2730b57cec5SDimitry Andric } 2740b57cec5SDimitry Andric 2750fca6ea1SDimitry Andric bool RuntimePointerChecking::tryToCreateDiffCheck( 27681ad6265SDimitry Andric const RuntimeCheckingPtrGroup &CGI, const RuntimeCheckingPtrGroup &CGJ) { 27781ad6265SDimitry Andric // If either group contains multiple different pointers, bail out. 27881ad6265SDimitry Andric // TODO: Support multiple pointers by using the minimum or maximum pointer, 27981ad6265SDimitry Andric // depending on src & sink. 2800fca6ea1SDimitry Andric if (CGI.Members.size() != 1 || CGJ.Members.size() != 1) 2810fca6ea1SDimitry Andric return false; 28281ad6265SDimitry Andric 28381ad6265SDimitry Andric PointerInfo *Src = &Pointers[CGI.Members[0]]; 28481ad6265SDimitry Andric PointerInfo *Sink = &Pointers[CGJ.Members[0]]; 28581ad6265SDimitry Andric 28681ad6265SDimitry Andric // If either pointer is read and written, multiple checks may be needed. Bail 28781ad6265SDimitry Andric // out. 28881ad6265SDimitry Andric if (!DC.getOrderForAccess(Src->PointerValue, !Src->IsWritePtr).empty() || 2890fca6ea1SDimitry Andric !DC.getOrderForAccess(Sink->PointerValue, !Sink->IsWritePtr).empty()) 2900fca6ea1SDimitry Andric return false; 29181ad6265SDimitry Andric 29281ad6265SDimitry Andric ArrayRef<unsigned> AccSrc = 29381ad6265SDimitry Andric DC.getOrderForAccess(Src->PointerValue, Src->IsWritePtr); 29481ad6265SDimitry Andric ArrayRef<unsigned> AccSink = 29581ad6265SDimitry Andric DC.getOrderForAccess(Sink->PointerValue, Sink->IsWritePtr); 29681ad6265SDimitry Andric // If either pointer is accessed multiple times, there may not be a clear 29781ad6265SDimitry Andric // src/sink relation. Bail out for now. 2980fca6ea1SDimitry Andric if (AccSrc.size() != 1 || AccSink.size() != 1) 2990fca6ea1SDimitry Andric return false; 3000fca6ea1SDimitry Andric 30181ad6265SDimitry Andric // If the sink is accessed before src, swap src/sink. 30281ad6265SDimitry Andric if (AccSink[0] < AccSrc[0]) 30381ad6265SDimitry Andric std::swap(Src, Sink); 30481ad6265SDimitry Andric 30581ad6265SDimitry Andric auto *SrcAR = dyn_cast<SCEVAddRecExpr>(Src->Expr); 30681ad6265SDimitry Andric auto *SinkAR = dyn_cast<SCEVAddRecExpr>(Sink->Expr); 307a4a491e2SDimitry Andric if (!SrcAR || !SinkAR || SrcAR->getLoop() != DC.getInnermostLoop() || 3080fca6ea1SDimitry Andric SinkAR->getLoop() != DC.getInnermostLoop()) 3090fca6ea1SDimitry Andric return false; 31081ad6265SDimitry Andric 31181ad6265SDimitry Andric SmallVector<Instruction *, 4> SrcInsts = 31281ad6265SDimitry Andric DC.getInstructionsForAccess(Src->PointerValue, Src->IsWritePtr); 31381ad6265SDimitry Andric SmallVector<Instruction *, 4> SinkInsts = 31481ad6265SDimitry Andric DC.getInstructionsForAccess(Sink->PointerValue, Sink->IsWritePtr); 31581ad6265SDimitry Andric Type *SrcTy = getLoadStoreType(SrcInsts[0]); 31681ad6265SDimitry Andric Type *DstTy = getLoadStoreType(SinkInsts[0]); 3170fca6ea1SDimitry Andric if (isa<ScalableVectorType>(SrcTy) || isa<ScalableVectorType>(DstTy)) 3180fca6ea1SDimitry Andric return false; 3190fca6ea1SDimitry Andric 320bdd1243dSDimitry Andric const DataLayout &DL = 3210fca6ea1SDimitry Andric SinkAR->getLoop()->getHeader()->getDataLayout(); 32281ad6265SDimitry Andric unsigned AllocSize = 32381ad6265SDimitry Andric std::max(DL.getTypeAllocSize(SrcTy), DL.getTypeAllocSize(DstTy)); 32481ad6265SDimitry Andric 32581ad6265SDimitry Andric // Only matching constant steps matching the AllocSize are supported at the 32681ad6265SDimitry Andric // moment. This simplifies the difference computation. Can be extended in the 32781ad6265SDimitry Andric // future. 32881ad6265SDimitry Andric auto *Step = dyn_cast<SCEVConstant>(SinkAR->getStepRecurrence(*SE)); 32981ad6265SDimitry Andric if (!Step || Step != SrcAR->getStepRecurrence(*SE) || 3300fca6ea1SDimitry Andric Step->getAPInt().abs() != AllocSize) 3310fca6ea1SDimitry Andric return false; 33281ad6265SDimitry Andric 333bdd1243dSDimitry Andric IntegerType *IntTy = 334bdd1243dSDimitry Andric IntegerType::get(Src->PointerValue->getContext(), 335bdd1243dSDimitry Andric DL.getPointerSizeInBits(CGI.AddressSpace)); 336bdd1243dSDimitry Andric 33781ad6265SDimitry Andric // When counting down, the dependence distance needs to be swapped. 33881ad6265SDimitry Andric if (Step->getValue()->isNegative()) 33981ad6265SDimitry Andric std::swap(SinkAR, SrcAR); 34081ad6265SDimitry Andric 34181ad6265SDimitry Andric const SCEV *SinkStartInt = SE->getPtrToIntExpr(SinkAR->getStart(), IntTy); 34281ad6265SDimitry Andric const SCEV *SrcStartInt = SE->getPtrToIntExpr(SrcAR->getStart(), IntTy); 34381ad6265SDimitry Andric if (isa<SCEVCouldNotCompute>(SinkStartInt) || 3440fca6ea1SDimitry Andric isa<SCEVCouldNotCompute>(SrcStartInt)) 3450fca6ea1SDimitry Andric return false; 3465f757f3fSDimitry Andric 3475f757f3fSDimitry Andric const Loop *InnerLoop = SrcAR->getLoop(); 3485f757f3fSDimitry Andric // If the start values for both Src and Sink also vary according to an outer 3495f757f3fSDimitry Andric // loop, then it's probably better to avoid creating diff checks because 3505f757f3fSDimitry Andric // they may not be hoisted. We should instead let llvm::addRuntimeChecks 3515f757f3fSDimitry Andric // do the expanded full range overlap checks, which can be hoisted. 3525f757f3fSDimitry Andric if (HoistRuntimeChecks && InnerLoop->getParentLoop() && 3535f757f3fSDimitry Andric isa<SCEVAddRecExpr>(SinkStartInt) && isa<SCEVAddRecExpr>(SrcStartInt)) { 3545f757f3fSDimitry Andric auto *SrcStartAR = cast<SCEVAddRecExpr>(SrcStartInt); 3555f757f3fSDimitry Andric auto *SinkStartAR = cast<SCEVAddRecExpr>(SinkStartInt); 3565f757f3fSDimitry Andric const Loop *StartARLoop = SrcStartAR->getLoop(); 3575f757f3fSDimitry Andric if (StartARLoop == SinkStartAR->getLoop() && 3585f757f3fSDimitry Andric StartARLoop == InnerLoop->getParentLoop() && 3595f757f3fSDimitry Andric // If the diff check would already be loop invariant (due to the 3605f757f3fSDimitry Andric // recurrences being the same), then we prefer to keep the diff checks 3615f757f3fSDimitry Andric // because they are cheaper. 3625f757f3fSDimitry Andric SrcStartAR->getStepRecurrence(*SE) != 3635f757f3fSDimitry Andric SinkStartAR->getStepRecurrence(*SE)) { 3645f757f3fSDimitry Andric LLVM_DEBUG(dbgs() << "LAA: Not creating diff runtime check, since these " 3655f757f3fSDimitry Andric "cannot be hoisted out of the outer loop\n"); 3660fca6ea1SDimitry Andric return false; 3675f757f3fSDimitry Andric } 3685f757f3fSDimitry Andric } 3695f757f3fSDimitry Andric 3705f757f3fSDimitry Andric LLVM_DEBUG(dbgs() << "LAA: Creating diff runtime check for:\n" 3715f757f3fSDimitry Andric << "SrcStart: " << *SrcStartInt << '\n' 3725f757f3fSDimitry Andric << "SinkStartInt: " << *SinkStartInt << '\n'); 37381ad6265SDimitry Andric DiffChecks.emplace_back(SrcStartInt, SinkStartInt, AllocSize, 37481ad6265SDimitry Andric Src->NeedsFreeze || Sink->NeedsFreeze); 3750fca6ea1SDimitry Andric return true; 37681ad6265SDimitry Andric } 37781ad6265SDimitry Andric 37881ad6265SDimitry Andric SmallVector<RuntimePointerCheck, 4> RuntimePointerChecking::generateChecks() { 3795ffd83dbSDimitry Andric SmallVector<RuntimePointerCheck, 4> Checks; 3800b57cec5SDimitry Andric 3810b57cec5SDimitry Andric for (unsigned I = 0; I < CheckingGroups.size(); ++I) { 3820b57cec5SDimitry Andric for (unsigned J = I + 1; J < CheckingGroups.size(); ++J) { 3835ffd83dbSDimitry Andric const RuntimeCheckingPtrGroup &CGI = CheckingGroups[I]; 3845ffd83dbSDimitry Andric const RuntimeCheckingPtrGroup &CGJ = CheckingGroups[J]; 3850b57cec5SDimitry Andric 38681ad6265SDimitry Andric if (needsChecking(CGI, CGJ)) { 3870fca6ea1SDimitry Andric CanUseDiffCheck = CanUseDiffCheck && tryToCreateDiffCheck(CGI, CGJ); 3880b57cec5SDimitry Andric Checks.push_back(std::make_pair(&CGI, &CGJ)); 3890b57cec5SDimitry Andric } 3900b57cec5SDimitry Andric } 39181ad6265SDimitry Andric } 3920b57cec5SDimitry Andric return Checks; 3930b57cec5SDimitry Andric } 3940b57cec5SDimitry Andric 3950b57cec5SDimitry Andric void RuntimePointerChecking::generateChecks( 3960b57cec5SDimitry Andric MemoryDepChecker::DepCandidates &DepCands, bool UseDependencies) { 3970b57cec5SDimitry Andric assert(Checks.empty() && "Checks is not empty"); 3980b57cec5SDimitry Andric groupChecks(DepCands, UseDependencies); 3990b57cec5SDimitry Andric Checks = generateChecks(); 4000b57cec5SDimitry Andric } 4010b57cec5SDimitry Andric 4025ffd83dbSDimitry Andric bool RuntimePointerChecking::needsChecking( 4035ffd83dbSDimitry Andric const RuntimeCheckingPtrGroup &M, const RuntimeCheckingPtrGroup &N) const { 4040fca6ea1SDimitry Andric for (const auto &I : M.Members) 4050fca6ea1SDimitry Andric for (const auto &J : N.Members) 4060fca6ea1SDimitry Andric if (needsChecking(I, J)) 4070b57cec5SDimitry Andric return true; 4080b57cec5SDimitry Andric return false; 4090b57cec5SDimitry Andric } 4100b57cec5SDimitry Andric 4110b57cec5SDimitry Andric /// Compare \p I and \p J and return the minimum. 4120b57cec5SDimitry Andric /// Return nullptr in case we couldn't find an answer. 4130b57cec5SDimitry Andric static const SCEV *getMinFromExprs(const SCEV *I, const SCEV *J, 4140b57cec5SDimitry Andric ScalarEvolution *SE) { 4150b57cec5SDimitry Andric const SCEV *Diff = SE->getMinusSCEV(J, I); 4160b57cec5SDimitry Andric const SCEVConstant *C = dyn_cast<const SCEVConstant>(Diff); 4170b57cec5SDimitry Andric 4180b57cec5SDimitry Andric if (!C) 4190b57cec5SDimitry Andric return nullptr; 4200fca6ea1SDimitry Andric return C->getValue()->isNegative() ? J : I; 4210b57cec5SDimitry Andric } 4220b57cec5SDimitry Andric 423fe6060f1SDimitry Andric bool RuntimeCheckingPtrGroup::addPointer(unsigned Index, 424fe6060f1SDimitry Andric RuntimePointerChecking &RtCheck) { 425fe6060f1SDimitry Andric return addPointer( 426fe6060f1SDimitry Andric Index, RtCheck.Pointers[Index].Start, RtCheck.Pointers[Index].End, 427fe6060f1SDimitry Andric RtCheck.Pointers[Index].PointerValue->getType()->getPointerAddressSpace(), 42881ad6265SDimitry Andric RtCheck.Pointers[Index].NeedsFreeze, *RtCheck.SE); 429fe6060f1SDimitry Andric } 430fe6060f1SDimitry Andric 431fe6060f1SDimitry Andric bool RuntimeCheckingPtrGroup::addPointer(unsigned Index, const SCEV *Start, 432fe6060f1SDimitry Andric const SCEV *End, unsigned AS, 43381ad6265SDimitry Andric bool NeedsFreeze, 434fe6060f1SDimitry Andric ScalarEvolution &SE) { 435fe6060f1SDimitry Andric assert(AddressSpace == AS && 436fe6060f1SDimitry Andric "all pointers in a checking group must be in the same address space"); 4370b57cec5SDimitry Andric 4380b57cec5SDimitry Andric // Compare the starts and ends with the known minimum and maximum 4390b57cec5SDimitry Andric // of this set. We need to know how we compare against the min/max 4400b57cec5SDimitry Andric // of the set in order to be able to emit memchecks. 441fe6060f1SDimitry Andric const SCEV *Min0 = getMinFromExprs(Start, Low, &SE); 4420b57cec5SDimitry Andric if (!Min0) 4430b57cec5SDimitry Andric return false; 4440b57cec5SDimitry Andric 445fe6060f1SDimitry Andric const SCEV *Min1 = getMinFromExprs(End, High, &SE); 4460b57cec5SDimitry Andric if (!Min1) 4470b57cec5SDimitry Andric return false; 4480b57cec5SDimitry Andric 4490b57cec5SDimitry Andric // Update the low bound expression if we've found a new min value. 4500b57cec5SDimitry Andric if (Min0 == Start) 4510b57cec5SDimitry Andric Low = Start; 4520b57cec5SDimitry Andric 4530b57cec5SDimitry Andric // Update the high bound expression if we've found a new max value. 4540b57cec5SDimitry Andric if (Min1 != End) 4550b57cec5SDimitry Andric High = End; 4560b57cec5SDimitry Andric 4570b57cec5SDimitry Andric Members.push_back(Index); 45881ad6265SDimitry Andric this->NeedsFreeze |= NeedsFreeze; 4590b57cec5SDimitry Andric return true; 4600b57cec5SDimitry Andric } 4610b57cec5SDimitry Andric 4620b57cec5SDimitry Andric void RuntimePointerChecking::groupChecks( 4630b57cec5SDimitry Andric MemoryDepChecker::DepCandidates &DepCands, bool UseDependencies) { 4640b57cec5SDimitry Andric // We build the groups from dependency candidates equivalence classes 4650b57cec5SDimitry Andric // because: 4660b57cec5SDimitry Andric // - We know that pointers in the same equivalence class share 4670b57cec5SDimitry Andric // the same underlying object and therefore there is a chance 4680b57cec5SDimitry Andric // that we can compare pointers 4690b57cec5SDimitry Andric // - We wouldn't be able to merge two pointers for which we need 4700b57cec5SDimitry Andric // to emit a memcheck. The classes in DepCands are already 4710b57cec5SDimitry Andric // conveniently built such that no two pointers in the same 4720b57cec5SDimitry Andric // class need checking against each other. 4730b57cec5SDimitry Andric 4740b57cec5SDimitry Andric // We use the following (greedy) algorithm to construct the groups 4750b57cec5SDimitry Andric // For every pointer in the equivalence class: 4760b57cec5SDimitry Andric // For each existing group: 4770b57cec5SDimitry Andric // - if the difference between this pointer and the min/max bounds 4780b57cec5SDimitry Andric // of the group is a constant, then make the pointer part of the 4790b57cec5SDimitry Andric // group and update the min/max bounds of that group as required. 4800b57cec5SDimitry Andric 4810b57cec5SDimitry Andric CheckingGroups.clear(); 4820b57cec5SDimitry Andric 4830b57cec5SDimitry Andric // If we need to check two pointers to the same underlying object 4840b57cec5SDimitry Andric // with a non-constant difference, we shouldn't perform any pointer 4850b57cec5SDimitry Andric // grouping with those pointers. This is because we can easily get 4860b57cec5SDimitry Andric // into cases where the resulting check would return false, even when 4870b57cec5SDimitry Andric // the accesses are safe. 4880b57cec5SDimitry Andric // 4890b57cec5SDimitry Andric // The following example shows this: 4900b57cec5SDimitry Andric // for (i = 0; i < 1000; ++i) 4910b57cec5SDimitry Andric // a[5000 + i * m] = a[i] + a[i + 9000] 4920b57cec5SDimitry Andric // 4930b57cec5SDimitry Andric // Here grouping gives a check of (5000, 5000 + 1000 * m) against 4940b57cec5SDimitry Andric // (0, 10000) which is always false. However, if m is 1, there is no 4950b57cec5SDimitry Andric // dependence. Not grouping the checks for a[i] and a[i + 9000] allows 4960b57cec5SDimitry Andric // us to perform an accurate check in this case. 4970b57cec5SDimitry Andric // 4980b57cec5SDimitry Andric // The above case requires that we have an UnknownDependence between 4990b57cec5SDimitry Andric // accesses to the same underlying object. This cannot happen unless 5000b57cec5SDimitry Andric // FoundNonConstantDistanceDependence is set, and therefore UseDependencies 5010b57cec5SDimitry Andric // is also false. In this case we will use the fallback path and create 5020b57cec5SDimitry Andric // separate checking groups for all pointers. 5030b57cec5SDimitry Andric 5040b57cec5SDimitry Andric // If we don't have the dependency partitions, construct a new 5050b57cec5SDimitry Andric // checking pointer group for each pointer. This is also required 5060b57cec5SDimitry Andric // for correctness, because in this case we can have checking between 5070b57cec5SDimitry Andric // pointers to the same underlying object. 5080b57cec5SDimitry Andric if (!UseDependencies) { 5090b57cec5SDimitry Andric for (unsigned I = 0; I < Pointers.size(); ++I) 5105ffd83dbSDimitry Andric CheckingGroups.push_back(RuntimeCheckingPtrGroup(I, *this)); 5110b57cec5SDimitry Andric return; 5120b57cec5SDimitry Andric } 5130b57cec5SDimitry Andric 5140b57cec5SDimitry Andric unsigned TotalComparisons = 0; 5150b57cec5SDimitry Andric 51681ad6265SDimitry Andric DenseMap<Value *, SmallVector<unsigned>> PositionMap; 51781ad6265SDimitry Andric for (unsigned Index = 0; Index < Pointers.size(); ++Index) { 5180fca6ea1SDimitry Andric auto [It, _] = PositionMap.insert({Pointers[Index].PointerValue, {}}); 5190fca6ea1SDimitry Andric It->second.push_back(Index); 52081ad6265SDimitry Andric } 5210b57cec5SDimitry Andric 5220b57cec5SDimitry Andric // We need to keep track of what pointers we've already seen so we 5230b57cec5SDimitry Andric // don't process them twice. 5240b57cec5SDimitry Andric SmallSet<unsigned, 2> Seen; 5250b57cec5SDimitry Andric 5260b57cec5SDimitry Andric // Go through all equivalence classes, get the "pointer check groups" 5270b57cec5SDimitry Andric // and add them to the overall solution. We use the order in which accesses 5280b57cec5SDimitry Andric // appear in 'Pointers' to enforce determinism. 5290b57cec5SDimitry Andric for (unsigned I = 0; I < Pointers.size(); ++I) { 5300b57cec5SDimitry Andric // We've seen this pointer before, and therefore already processed 5310b57cec5SDimitry Andric // its equivalence class. 5320b57cec5SDimitry Andric if (Seen.count(I)) 5330b57cec5SDimitry Andric continue; 5340b57cec5SDimitry Andric 5350b57cec5SDimitry Andric MemoryDepChecker::MemAccessInfo Access(Pointers[I].PointerValue, 5360b57cec5SDimitry Andric Pointers[I].IsWritePtr); 5370b57cec5SDimitry Andric 5385ffd83dbSDimitry Andric SmallVector<RuntimeCheckingPtrGroup, 2> Groups; 5390b57cec5SDimitry Andric auto LeaderI = DepCands.findValue(DepCands.getLeaderValue(Access)); 5400b57cec5SDimitry Andric 5410b57cec5SDimitry Andric // Because DepCands is constructed by visiting accesses in the order in 5420b57cec5SDimitry Andric // which they appear in alias sets (which is deterministic) and the 5430b57cec5SDimitry Andric // iteration order within an equivalence class member is only dependent on 5440b57cec5SDimitry Andric // the order in which unions and insertions are performed on the 5450b57cec5SDimitry Andric // equivalence class, the iteration order is deterministic. 5460b57cec5SDimitry Andric for (auto MI = DepCands.member_begin(LeaderI), ME = DepCands.member_end(); 5470b57cec5SDimitry Andric MI != ME; ++MI) { 5481106035dSDimitry Andric auto PointerI = PositionMap.find(MI->getPointer()); 5491106035dSDimitry Andric assert(PointerI != PositionMap.end() && 5501106035dSDimitry Andric "pointer in equivalence class not found in PositionMap"); 55181ad6265SDimitry Andric for (unsigned Pointer : PointerI->second) { 5520b57cec5SDimitry Andric bool Merged = false; 5530b57cec5SDimitry Andric // Mark this pointer as seen. 5540b57cec5SDimitry Andric Seen.insert(Pointer); 5550b57cec5SDimitry Andric 5560b57cec5SDimitry Andric // Go through all the existing sets and see if we can find one 5570b57cec5SDimitry Andric // which can include this pointer. 5585ffd83dbSDimitry Andric for (RuntimeCheckingPtrGroup &Group : Groups) { 5590b57cec5SDimitry Andric // Don't perform more than a certain amount of comparisons. 5600b57cec5SDimitry Andric // This should limit the cost of grouping the pointers to something 5610b57cec5SDimitry Andric // reasonable. If we do end up hitting this threshold, the algorithm 5620b57cec5SDimitry Andric // will create separate groups for all remaining pointers. 5630b57cec5SDimitry Andric if (TotalComparisons > MemoryCheckMergeThreshold) 5640b57cec5SDimitry Andric break; 5650b57cec5SDimitry Andric 5660b57cec5SDimitry Andric TotalComparisons++; 5670b57cec5SDimitry Andric 568fe6060f1SDimitry Andric if (Group.addPointer(Pointer, *this)) { 5690b57cec5SDimitry Andric Merged = true; 5700b57cec5SDimitry Andric break; 5710b57cec5SDimitry Andric } 5720b57cec5SDimitry Andric } 5730b57cec5SDimitry Andric 5740b57cec5SDimitry Andric if (!Merged) 5750b57cec5SDimitry Andric // We couldn't add this pointer to any existing set or the threshold 5760b57cec5SDimitry Andric // for the number of comparisons has been reached. Create a new group 5770b57cec5SDimitry Andric // to hold the current pointer. 5785ffd83dbSDimitry Andric Groups.push_back(RuntimeCheckingPtrGroup(Pointer, *this)); 5790b57cec5SDimitry Andric } 58081ad6265SDimitry Andric } 5810b57cec5SDimitry Andric 5820b57cec5SDimitry Andric // We've computed the grouped checks for this partition. 5830b57cec5SDimitry Andric // Save the results and continue with the next one. 5840b57cec5SDimitry Andric llvm::copy(Groups, std::back_inserter(CheckingGroups)); 5850b57cec5SDimitry Andric } 5860b57cec5SDimitry Andric } 5870b57cec5SDimitry Andric 5880b57cec5SDimitry Andric bool RuntimePointerChecking::arePointersInSamePartition( 5890b57cec5SDimitry Andric const SmallVectorImpl<int> &PtrToPartition, unsigned PtrIdx1, 5900b57cec5SDimitry Andric unsigned PtrIdx2) { 5910b57cec5SDimitry Andric return (PtrToPartition[PtrIdx1] != -1 && 5920b57cec5SDimitry Andric PtrToPartition[PtrIdx1] == PtrToPartition[PtrIdx2]); 5930b57cec5SDimitry Andric } 5940b57cec5SDimitry Andric 5950b57cec5SDimitry Andric bool RuntimePointerChecking::needsChecking(unsigned I, unsigned J) const { 5960b57cec5SDimitry Andric const PointerInfo &PointerI = Pointers[I]; 5970b57cec5SDimitry Andric const PointerInfo &PointerJ = Pointers[J]; 5980b57cec5SDimitry Andric 5990b57cec5SDimitry Andric // No need to check if two readonly pointers intersect. 6000b57cec5SDimitry Andric if (!PointerI.IsWritePtr && !PointerJ.IsWritePtr) 6010b57cec5SDimitry Andric return false; 6020b57cec5SDimitry Andric 6030b57cec5SDimitry Andric // Only need to check pointers between two different dependency sets. 6040b57cec5SDimitry Andric if (PointerI.DependencySetId == PointerJ.DependencySetId) 6050b57cec5SDimitry Andric return false; 6060b57cec5SDimitry Andric 6070b57cec5SDimitry Andric // Only need to check pointers in the same alias set. 6080b57cec5SDimitry Andric if (PointerI.AliasSetId != PointerJ.AliasSetId) 6090b57cec5SDimitry Andric return false; 6100b57cec5SDimitry Andric 6110b57cec5SDimitry Andric return true; 6120b57cec5SDimitry Andric } 6130b57cec5SDimitry Andric 6140b57cec5SDimitry Andric void RuntimePointerChecking::printChecks( 6155ffd83dbSDimitry Andric raw_ostream &OS, const SmallVectorImpl<RuntimePointerCheck> &Checks, 6160b57cec5SDimitry Andric unsigned Depth) const { 6170b57cec5SDimitry Andric unsigned N = 0; 6180fca6ea1SDimitry Andric for (const auto &[Check1, Check2] : Checks) { 6190fca6ea1SDimitry Andric const auto &First = Check1->Members, &Second = Check2->Members; 6200b57cec5SDimitry Andric 6210b57cec5SDimitry Andric OS.indent(Depth) << "Check " << N++ << ":\n"; 6220b57cec5SDimitry Andric 6230fca6ea1SDimitry Andric OS.indent(Depth + 2) << "Comparing group (" << Check1 << "):\n"; 6240fca6ea1SDimitry Andric for (unsigned K : First) 6250fca6ea1SDimitry Andric OS.indent(Depth + 2) << *Pointers[K].PointerValue << "\n"; 6260b57cec5SDimitry Andric 6270fca6ea1SDimitry Andric OS.indent(Depth + 2) << "Against group (" << Check2 << "):\n"; 6280fca6ea1SDimitry Andric for (unsigned K : Second) 6290fca6ea1SDimitry Andric OS.indent(Depth + 2) << *Pointers[K].PointerValue << "\n"; 6300b57cec5SDimitry Andric } 6310b57cec5SDimitry Andric } 6320b57cec5SDimitry Andric 6330b57cec5SDimitry Andric void RuntimePointerChecking::print(raw_ostream &OS, unsigned Depth) const { 6340b57cec5SDimitry Andric 6350b57cec5SDimitry Andric OS.indent(Depth) << "Run-time memory checks:\n"; 6360b57cec5SDimitry Andric printChecks(OS, Checks, Depth); 6370b57cec5SDimitry Andric 6380b57cec5SDimitry Andric OS.indent(Depth) << "Grouped accesses:\n"; 6390fca6ea1SDimitry Andric for (const auto &CG : CheckingGroups) { 6400b57cec5SDimitry Andric OS.indent(Depth + 2) << "Group " << &CG << ":\n"; 6410b57cec5SDimitry Andric OS.indent(Depth + 4) << "(Low: " << *CG.Low << " High: " << *CG.High 6420b57cec5SDimitry Andric << ")\n"; 6430fca6ea1SDimitry Andric for (unsigned Member : CG.Members) { 6440fca6ea1SDimitry Andric OS.indent(Depth + 6) << "Member: " << *Pointers[Member].Expr << "\n"; 6450b57cec5SDimitry Andric } 6460b57cec5SDimitry Andric } 6470b57cec5SDimitry Andric } 6480b57cec5SDimitry Andric 6490b57cec5SDimitry Andric namespace { 6500b57cec5SDimitry Andric 6510b57cec5SDimitry Andric /// Analyses memory accesses in a loop. 6520b57cec5SDimitry Andric /// 6530b57cec5SDimitry Andric /// Checks whether run time pointer checks are needed and builds sets for data 6540b57cec5SDimitry Andric /// dependence checking. 6550b57cec5SDimitry Andric class AccessAnalysis { 6560b57cec5SDimitry Andric public: 6570b57cec5SDimitry Andric /// Read or write access location. 6580b57cec5SDimitry Andric typedef PointerIntPair<Value *, 1, bool> MemAccessInfo; 6590b57cec5SDimitry Andric typedef SmallVector<MemAccessInfo, 8> MemAccessInfoList; 6600b57cec5SDimitry Andric 661e8d8bef9SDimitry Andric AccessAnalysis(Loop *TheLoop, AAResults *AA, LoopInfo *LI, 662e8d8bef9SDimitry Andric MemoryDepChecker::DepCandidates &DA, 663b3edf446SDimitry Andric PredicatedScalarEvolution &PSE, 664b3edf446SDimitry Andric SmallPtrSetImpl<MDNode *> &LoopAliasScopes) 665b3edf446SDimitry Andric : TheLoop(TheLoop), BAA(*AA), AST(BAA), LI(LI), DepCands(DA), PSE(PSE), 666b3edf446SDimitry Andric LoopAliasScopes(LoopAliasScopes) { 667bdd1243dSDimitry Andric // We're analyzing dependences across loop iterations. 668bdd1243dSDimitry Andric BAA.enableCrossIterationMode(); 669bdd1243dSDimitry Andric } 6700b57cec5SDimitry Andric 6710b57cec5SDimitry Andric /// Register a load and whether it is only read from. 67281ad6265SDimitry Andric void addLoad(MemoryLocation &Loc, Type *AccessTy, bool IsReadOnly) { 6730b57cec5SDimitry Andric Value *Ptr = const_cast<Value *>(Loc.Ptr); 674b3edf446SDimitry Andric AST.add(adjustLoc(Loc)); 67581ad6265SDimitry Andric Accesses[MemAccessInfo(Ptr, false)].insert(AccessTy); 6760b57cec5SDimitry Andric if (IsReadOnly) 6770b57cec5SDimitry Andric ReadOnlyPtr.insert(Ptr); 6780b57cec5SDimitry Andric } 6790b57cec5SDimitry Andric 6800b57cec5SDimitry Andric /// Register a store. 68181ad6265SDimitry Andric void addStore(MemoryLocation &Loc, Type *AccessTy) { 6820b57cec5SDimitry Andric Value *Ptr = const_cast<Value *>(Loc.Ptr); 683b3edf446SDimitry Andric AST.add(adjustLoc(Loc)); 68481ad6265SDimitry Andric Accesses[MemAccessInfo(Ptr, true)].insert(AccessTy); 6850b57cec5SDimitry Andric } 6860b57cec5SDimitry Andric 6870b57cec5SDimitry Andric /// Check if we can emit a run-time no-alias check for \p Access. 6880b57cec5SDimitry Andric /// 6890b57cec5SDimitry Andric /// Returns true if we can emit a run-time no alias check for \p Access. 6900b57cec5SDimitry Andric /// If we can check this access, this also adds it to a dependence set and 6910b57cec5SDimitry Andric /// adds a run-time to check for it to \p RtCheck. If \p Assume is true, 6920b57cec5SDimitry Andric /// we will attempt to use additional run-time checks in order to get 6930b57cec5SDimitry Andric /// the bounds of the pointer. 6940b57cec5SDimitry Andric bool createCheckForAccess(RuntimePointerChecking &RtCheck, 69581ad6265SDimitry Andric MemAccessInfo Access, Type *AccessTy, 69606c3fb27SDimitry Andric const DenseMap<Value *, const SCEV *> &Strides, 6970b57cec5SDimitry Andric DenseMap<Value *, unsigned> &DepSetId, 6980b57cec5SDimitry Andric Loop *TheLoop, unsigned &RunningDepId, 69981ad6265SDimitry Andric unsigned ASId, bool ShouldCheckStride, bool Assume); 7000b57cec5SDimitry Andric 7010b57cec5SDimitry Andric /// Check whether we can check the pointers at runtime for 7020b57cec5SDimitry Andric /// non-intersection. 7030b57cec5SDimitry Andric /// 7040b57cec5SDimitry Andric /// Returns true if we need no check or if we do and we can generate them 7050b57cec5SDimitry Andric /// (i.e. the pointers have computable bounds). 7060b57cec5SDimitry Andric bool canCheckPtrAtRT(RuntimePointerChecking &RtCheck, ScalarEvolution *SE, 70706c3fb27SDimitry Andric Loop *TheLoop, const DenseMap<Value *, const SCEV *> &Strides, 70881ad6265SDimitry Andric Value *&UncomputablePtr, bool ShouldCheckWrap = false); 7090b57cec5SDimitry Andric 7100b57cec5SDimitry Andric /// Goes over all memory accesses, checks whether a RT check is needed 7110b57cec5SDimitry Andric /// and builds sets of dependent accesses. 7120b57cec5SDimitry Andric void buildDependenceSets() { 7130b57cec5SDimitry Andric processMemAccesses(); 7140b57cec5SDimitry Andric } 7150b57cec5SDimitry Andric 7160b57cec5SDimitry Andric /// Initial processing of memory accesses determined that we need to 7170b57cec5SDimitry Andric /// perform dependency checking. 7180b57cec5SDimitry Andric /// 7190b57cec5SDimitry Andric /// Note that this can later be cleared if we retry memcheck analysis without 7200b57cec5SDimitry Andric /// dependency checking (i.e. FoundNonConstantDistanceDependence). 7210b57cec5SDimitry Andric bool isDependencyCheckNeeded() { return !CheckDeps.empty(); } 7220b57cec5SDimitry Andric 7230b57cec5SDimitry Andric /// We decided that no dependence analysis would be used. Reset the state. 7240b57cec5SDimitry Andric void resetDepChecks(MemoryDepChecker &DepChecker) { 7250b57cec5SDimitry Andric CheckDeps.clear(); 7260b57cec5SDimitry Andric DepChecker.clearDependences(); 7270b57cec5SDimitry Andric } 7280b57cec5SDimitry Andric 7290b57cec5SDimitry Andric MemAccessInfoList &getDependenciesToCheck() { return CheckDeps; } 7300b57cec5SDimitry Andric 7310b57cec5SDimitry Andric private: 73281ad6265SDimitry Andric typedef MapVector<MemAccessInfo, SmallSetVector<Type *, 1>> PtrAccessMap; 7330b57cec5SDimitry Andric 734b3edf446SDimitry Andric /// Adjust the MemoryLocation so that it represents accesses to this 735b3edf446SDimitry Andric /// location across all iterations, rather than a single one. 736b3edf446SDimitry Andric MemoryLocation adjustLoc(MemoryLocation Loc) const { 737b3edf446SDimitry Andric // The accessed location varies within the loop, but remains within the 738b3edf446SDimitry Andric // underlying object. 739b3edf446SDimitry Andric Loc.Size = LocationSize::beforeOrAfterPointer(); 740b3edf446SDimitry Andric Loc.AATags.Scope = adjustAliasScopeList(Loc.AATags.Scope); 741b3edf446SDimitry Andric Loc.AATags.NoAlias = adjustAliasScopeList(Loc.AATags.NoAlias); 742b3edf446SDimitry Andric return Loc; 743b3edf446SDimitry Andric } 744b3edf446SDimitry Andric 745b3edf446SDimitry Andric /// Drop alias scopes that are only valid within a single loop iteration. 746b3edf446SDimitry Andric MDNode *adjustAliasScopeList(MDNode *ScopeList) const { 747b3edf446SDimitry Andric if (!ScopeList) 748b3edf446SDimitry Andric return nullptr; 749b3edf446SDimitry Andric 750b3edf446SDimitry Andric // For the sake of simplicity, drop the whole scope list if any scope is 751b3edf446SDimitry Andric // iteration-local. 752b3edf446SDimitry Andric if (any_of(ScopeList->operands(), [&](Metadata *Scope) { 753b3edf446SDimitry Andric return LoopAliasScopes.contains(cast<MDNode>(Scope)); 754b3edf446SDimitry Andric })) 755b3edf446SDimitry Andric return nullptr; 756b3edf446SDimitry Andric 757b3edf446SDimitry Andric return ScopeList; 758b3edf446SDimitry Andric } 759b3edf446SDimitry Andric 7600b57cec5SDimitry Andric /// Go over all memory access and check whether runtime pointer checks 7610b57cec5SDimitry Andric /// are needed and build sets of dependency check candidates. 7620b57cec5SDimitry Andric void processMemAccesses(); 7630b57cec5SDimitry Andric 76481ad6265SDimitry Andric /// Map of all accesses. Values are the types used to access memory pointed to 76581ad6265SDimitry Andric /// by the pointer. 76681ad6265SDimitry Andric PtrAccessMap Accesses; 7670b57cec5SDimitry Andric 7680b57cec5SDimitry Andric /// The loop being checked. 7690b57cec5SDimitry Andric const Loop *TheLoop; 7700b57cec5SDimitry Andric 7710b57cec5SDimitry Andric /// List of accesses that need a further dependence check. 7720b57cec5SDimitry Andric MemAccessInfoList CheckDeps; 7730b57cec5SDimitry Andric 7740b57cec5SDimitry Andric /// Set of pointers that are read only. 7750b57cec5SDimitry Andric SmallPtrSet<Value*, 16> ReadOnlyPtr; 7760b57cec5SDimitry Andric 777bdd1243dSDimitry Andric /// Batched alias analysis results. 778bdd1243dSDimitry Andric BatchAAResults BAA; 779bdd1243dSDimitry Andric 7800b57cec5SDimitry Andric /// An alias set tracker to partition the access set by underlying object and 7810b57cec5SDimitry Andric //intrinsic property (such as TBAA metadata). 7820b57cec5SDimitry Andric AliasSetTracker AST; 7830b57cec5SDimitry Andric 7840b57cec5SDimitry Andric LoopInfo *LI; 7850b57cec5SDimitry Andric 7860b57cec5SDimitry Andric /// Sets of potentially dependent accesses - members of one set share an 7870b57cec5SDimitry Andric /// underlying pointer. The set "CheckDeps" identfies which sets really need a 7880b57cec5SDimitry Andric /// dependence check. 7890b57cec5SDimitry Andric MemoryDepChecker::DepCandidates &DepCands; 7900b57cec5SDimitry Andric 7910b57cec5SDimitry Andric /// Initial processing of memory accesses determined that we may need 7920b57cec5SDimitry Andric /// to add memchecks. Perform the analysis to determine the necessary checks. 7930b57cec5SDimitry Andric /// 7940b57cec5SDimitry Andric /// Note that, this is different from isDependencyCheckNeeded. When we retry 7950b57cec5SDimitry Andric /// memcheck analysis without dependency checking 7960b57cec5SDimitry Andric /// (i.e. FoundNonConstantDistanceDependence), isDependencyCheckNeeded is 7970b57cec5SDimitry Andric /// cleared while this remains set if we have potentially dependent accesses. 79804eeddc0SDimitry Andric bool IsRTCheckAnalysisNeeded = false; 7990b57cec5SDimitry Andric 8000b57cec5SDimitry Andric /// The SCEV predicate containing all the SCEV-related assumptions. 8010b57cec5SDimitry Andric PredicatedScalarEvolution &PSE; 8025f757f3fSDimitry Andric 8035f757f3fSDimitry Andric DenseMap<Value *, SmallVector<const Value *, 16>> UnderlyingObjects; 804b3edf446SDimitry Andric 805b3edf446SDimitry Andric /// Alias scopes that are declared inside the loop, and as such not valid 806b3edf446SDimitry Andric /// across iterations. 807b3edf446SDimitry Andric SmallPtrSetImpl<MDNode *> &LoopAliasScopes; 8080b57cec5SDimitry Andric }; 8090b57cec5SDimitry Andric 8100b57cec5SDimitry Andric } // end anonymous namespace 8110b57cec5SDimitry Andric 8120b57cec5SDimitry Andric /// Check whether a pointer can participate in a runtime bounds check. 8130b57cec5SDimitry Andric /// If \p Assume, try harder to prove that we can compute the bounds of \p Ptr 8140b57cec5SDimitry Andric /// by adding run-time checks (overflow checks) if necessary. 81581ad6265SDimitry Andric static bool hasComputableBounds(PredicatedScalarEvolution &PSE, Value *Ptr, 81681ad6265SDimitry Andric const SCEV *PtrScev, Loop *L, bool Assume) { 8170b57cec5SDimitry Andric // The bounds for loop-invariant pointer is trivial. 8180b57cec5SDimitry Andric if (PSE.getSE()->isLoopInvariant(PtrScev, L)) 8190b57cec5SDimitry Andric return true; 8200b57cec5SDimitry Andric 8210b57cec5SDimitry Andric const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(PtrScev); 8220b57cec5SDimitry Andric 8230b57cec5SDimitry Andric if (!AR && Assume) 8240b57cec5SDimitry Andric AR = PSE.getAsAddRec(Ptr); 8250b57cec5SDimitry Andric 8260b57cec5SDimitry Andric if (!AR) 8270b57cec5SDimitry Andric return false; 8280b57cec5SDimitry Andric 8290b57cec5SDimitry Andric return AR->isAffine(); 8300b57cec5SDimitry Andric } 8310b57cec5SDimitry Andric 8320b57cec5SDimitry Andric /// Check whether a pointer address cannot wrap. 8330b57cec5SDimitry Andric static bool isNoWrap(PredicatedScalarEvolution &PSE, 83406c3fb27SDimitry Andric const DenseMap<Value *, const SCEV *> &Strides, Value *Ptr, Type *AccessTy, 83581ad6265SDimitry Andric Loop *L) { 8360b57cec5SDimitry Andric const SCEV *PtrScev = PSE.getSCEV(Ptr); 8370b57cec5SDimitry Andric if (PSE.getSE()->isLoopInvariant(PtrScev, L)) 8380b57cec5SDimitry Andric return true; 8390b57cec5SDimitry Andric 840bdd1243dSDimitry Andric int64_t Stride = getPtrStride(PSE, AccessTy, Ptr, L, Strides).value_or(0); 8410b57cec5SDimitry Andric if (Stride == 1 || PSE.hasNoOverflow(Ptr, SCEVWrapPredicate::IncrementNUSW)) 8420b57cec5SDimitry Andric return true; 8430b57cec5SDimitry Andric 8440b57cec5SDimitry Andric return false; 8450b57cec5SDimitry Andric } 8460b57cec5SDimitry Andric 8474824e7fdSDimitry Andric static void visitPointers(Value *StartPtr, const Loop &InnermostLoop, 8484824e7fdSDimitry Andric function_ref<void(Value *)> AddPointer) { 8494824e7fdSDimitry Andric SmallPtrSet<Value *, 8> Visited; 8504824e7fdSDimitry Andric SmallVector<Value *> WorkList; 8514824e7fdSDimitry Andric WorkList.push_back(StartPtr); 8524824e7fdSDimitry Andric 8534824e7fdSDimitry Andric while (!WorkList.empty()) { 8544824e7fdSDimitry Andric Value *Ptr = WorkList.pop_back_val(); 8554824e7fdSDimitry Andric if (!Visited.insert(Ptr).second) 8564824e7fdSDimitry Andric continue; 8574824e7fdSDimitry Andric auto *PN = dyn_cast<PHINode>(Ptr); 8584824e7fdSDimitry Andric // SCEV does not look through non-header PHIs inside the loop. Such phis 8594824e7fdSDimitry Andric // can be analyzed by adding separate accesses for each incoming pointer 8604824e7fdSDimitry Andric // value. 8614824e7fdSDimitry Andric if (PN && InnermostLoop.contains(PN->getParent()) && 8624824e7fdSDimitry Andric PN->getParent() != InnermostLoop.getHeader()) { 8634824e7fdSDimitry Andric for (const Use &Inc : PN->incoming_values()) 8644824e7fdSDimitry Andric WorkList.push_back(Inc); 8654824e7fdSDimitry Andric } else 8664824e7fdSDimitry Andric AddPointer(Ptr); 8674824e7fdSDimitry Andric } 8684824e7fdSDimitry Andric } 8694824e7fdSDimitry Andric 870fcaf7f86SDimitry Andric // Walk back through the IR for a pointer, looking for a select like the 871fcaf7f86SDimitry Andric // following: 872fcaf7f86SDimitry Andric // 873fcaf7f86SDimitry Andric // %offset = select i1 %cmp, i64 %a, i64 %b 874fcaf7f86SDimitry Andric // %addr = getelementptr double, double* %base, i64 %offset 875fcaf7f86SDimitry Andric // %ld = load double, double* %addr, align 8 876fcaf7f86SDimitry Andric // 877fcaf7f86SDimitry Andric // We won't be able to form a single SCEVAddRecExpr from this since the 878fcaf7f86SDimitry Andric // address for each loop iteration depends on %cmp. We could potentially 879fcaf7f86SDimitry Andric // produce multiple valid SCEVAddRecExprs, though, and check all of them for 880fcaf7f86SDimitry Andric // memory safety/aliasing if needed. 881fcaf7f86SDimitry Andric // 882fcaf7f86SDimitry Andric // If we encounter some IR we don't yet handle, or something obviously fine 883fcaf7f86SDimitry Andric // like a constant, then we just add the SCEV for that term to the list passed 884fcaf7f86SDimitry Andric // in by the caller. If we have a node that may potentially yield a valid 885fcaf7f86SDimitry Andric // SCEVAddRecExpr then we decompose it into parts and build the SCEV terms 886fcaf7f86SDimitry Andric // ourselves before adding to the list. 887bdd1243dSDimitry Andric static void findForkedSCEVs( 888bdd1243dSDimitry Andric ScalarEvolution *SE, const Loop *L, Value *Ptr, 889bdd1243dSDimitry Andric SmallVectorImpl<PointerIntPair<const SCEV *, 1, bool>> &ScevList, 890fcaf7f86SDimitry Andric unsigned Depth) { 891fcaf7f86SDimitry Andric // If our Value is a SCEVAddRecExpr, loop invariant, not an instruction, or 892fcaf7f86SDimitry Andric // we've exceeded our limit on recursion, just return whatever we have 893fcaf7f86SDimitry Andric // regardless of whether it can be used for a forked pointer or not, along 894fcaf7f86SDimitry Andric // with an indication of whether it might be a poison or undef value. 895fcaf7f86SDimitry Andric const SCEV *Scev = SE->getSCEV(Ptr); 896fcaf7f86SDimitry Andric if (isa<SCEVAddRecExpr>(Scev) || L->isLoopInvariant(Ptr) || 897fcaf7f86SDimitry Andric !isa<Instruction>(Ptr) || Depth == 0) { 898bdd1243dSDimitry Andric ScevList.emplace_back(Scev, !isGuaranteedNotToBeUndefOrPoison(Ptr)); 899fcaf7f86SDimitry Andric return; 900fcaf7f86SDimitry Andric } 901fcaf7f86SDimitry Andric 902fcaf7f86SDimitry Andric Depth--; 903fcaf7f86SDimitry Andric 904bdd1243dSDimitry Andric auto UndefPoisonCheck = [](PointerIntPair<const SCEV *, 1, bool> S) { 905bdd1243dSDimitry Andric return get<1>(S); 906bdd1243dSDimitry Andric }; 907bdd1243dSDimitry Andric 908bdd1243dSDimitry Andric auto GetBinOpExpr = [&SE](unsigned Opcode, const SCEV *L, const SCEV *R) { 909bdd1243dSDimitry Andric switch (Opcode) { 910bdd1243dSDimitry Andric case Instruction::Add: 911bdd1243dSDimitry Andric return SE->getAddExpr(L, R); 912bdd1243dSDimitry Andric case Instruction::Sub: 913bdd1243dSDimitry Andric return SE->getMinusSCEV(L, R); 914bdd1243dSDimitry Andric default: 915bdd1243dSDimitry Andric llvm_unreachable("Unexpected binary operator when walking ForkedPtrs"); 916bdd1243dSDimitry Andric } 917fcaf7f86SDimitry Andric }; 918fcaf7f86SDimitry Andric 919fcaf7f86SDimitry Andric Instruction *I = cast<Instruction>(Ptr); 920fcaf7f86SDimitry Andric unsigned Opcode = I->getOpcode(); 921fcaf7f86SDimitry Andric switch (Opcode) { 922fcaf7f86SDimitry Andric case Instruction::GetElementPtr: { 923fcaf7f86SDimitry Andric GetElementPtrInst *GEP = cast<GetElementPtrInst>(I); 924fcaf7f86SDimitry Andric Type *SourceTy = GEP->getSourceElementType(); 925fcaf7f86SDimitry Andric // We only handle base + single offset GEPs here for now. 926fcaf7f86SDimitry Andric // Not dealing with preexisting gathers yet, so no vectors. 927fcaf7f86SDimitry Andric if (I->getNumOperands() != 2 || SourceTy->isVectorTy()) { 928bdd1243dSDimitry Andric ScevList.emplace_back(Scev, !isGuaranteedNotToBeUndefOrPoison(GEP)); 929fcaf7f86SDimitry Andric break; 930fcaf7f86SDimitry Andric } 931bdd1243dSDimitry Andric SmallVector<PointerIntPair<const SCEV *, 1, bool>, 2> BaseScevs; 932bdd1243dSDimitry Andric SmallVector<PointerIntPair<const SCEV *, 1, bool>, 2> OffsetScevs; 933fcaf7f86SDimitry Andric findForkedSCEVs(SE, L, I->getOperand(0), BaseScevs, Depth); 934fcaf7f86SDimitry Andric findForkedSCEVs(SE, L, I->getOperand(1), OffsetScevs, Depth); 935fcaf7f86SDimitry Andric 936fcaf7f86SDimitry Andric // See if we need to freeze our fork... 937fcaf7f86SDimitry Andric bool NeedsFreeze = any_of(BaseScevs, UndefPoisonCheck) || 938fcaf7f86SDimitry Andric any_of(OffsetScevs, UndefPoisonCheck); 939fcaf7f86SDimitry Andric 940fcaf7f86SDimitry Andric // Check that we only have a single fork, on either the base or the offset. 941fcaf7f86SDimitry Andric // Copy the SCEV across for the one without a fork in order to generate 942fcaf7f86SDimitry Andric // the full SCEV for both sides of the GEP. 943fcaf7f86SDimitry Andric if (OffsetScevs.size() == 2 && BaseScevs.size() == 1) 944fcaf7f86SDimitry Andric BaseScevs.push_back(BaseScevs[0]); 945fcaf7f86SDimitry Andric else if (BaseScevs.size() == 2 && OffsetScevs.size() == 1) 946fcaf7f86SDimitry Andric OffsetScevs.push_back(OffsetScevs[0]); 947fcaf7f86SDimitry Andric else { 948bdd1243dSDimitry Andric ScevList.emplace_back(Scev, NeedsFreeze); 949fcaf7f86SDimitry Andric break; 950fcaf7f86SDimitry Andric } 951fcaf7f86SDimitry Andric 952fcaf7f86SDimitry Andric // Find the pointer type we need to extend to. 953fcaf7f86SDimitry Andric Type *IntPtrTy = SE->getEffectiveSCEVType( 954fcaf7f86SDimitry Andric SE->getSCEV(GEP->getPointerOperand())->getType()); 955fcaf7f86SDimitry Andric 956fcaf7f86SDimitry Andric // Find the size of the type being pointed to. We only have a single 957fcaf7f86SDimitry Andric // index term (guarded above) so we don't need to index into arrays or 958fcaf7f86SDimitry Andric // structures, just get the size of the scalar value. 959fcaf7f86SDimitry Andric const SCEV *Size = SE->getSizeOfExpr(IntPtrTy, SourceTy); 960fcaf7f86SDimitry Andric 961fcaf7f86SDimitry Andric // Scale up the offsets by the size of the type, then add to the bases. 962fcaf7f86SDimitry Andric const SCEV *Scaled1 = SE->getMulExpr( 963bdd1243dSDimitry Andric Size, SE->getTruncateOrSignExtend(get<0>(OffsetScevs[0]), IntPtrTy)); 964fcaf7f86SDimitry Andric const SCEV *Scaled2 = SE->getMulExpr( 965bdd1243dSDimitry Andric Size, SE->getTruncateOrSignExtend(get<0>(OffsetScevs[1]), IntPtrTy)); 966bdd1243dSDimitry Andric ScevList.emplace_back(SE->getAddExpr(get<0>(BaseScevs[0]), Scaled1), 967bdd1243dSDimitry Andric NeedsFreeze); 968bdd1243dSDimitry Andric ScevList.emplace_back(SE->getAddExpr(get<0>(BaseScevs[1]), Scaled2), 969bdd1243dSDimitry Andric NeedsFreeze); 970fcaf7f86SDimitry Andric break; 971fcaf7f86SDimitry Andric } 972fcaf7f86SDimitry Andric case Instruction::Select: { 973bdd1243dSDimitry Andric SmallVector<PointerIntPair<const SCEV *, 1, bool>, 2> ChildScevs; 974fcaf7f86SDimitry Andric // A select means we've found a forked pointer, but we currently only 975fcaf7f86SDimitry Andric // support a single select per pointer so if there's another behind this 976fcaf7f86SDimitry Andric // then we just bail out and return the generic SCEV. 977fcaf7f86SDimitry Andric findForkedSCEVs(SE, L, I->getOperand(1), ChildScevs, Depth); 978fcaf7f86SDimitry Andric findForkedSCEVs(SE, L, I->getOperand(2), ChildScevs, Depth); 979fcaf7f86SDimitry Andric if (ChildScevs.size() == 2) { 980fcaf7f86SDimitry Andric ScevList.push_back(ChildScevs[0]); 981fcaf7f86SDimitry Andric ScevList.push_back(ChildScevs[1]); 982fcaf7f86SDimitry Andric } else 983bdd1243dSDimitry Andric ScevList.emplace_back(Scev, !isGuaranteedNotToBeUndefOrPoison(Ptr)); 984bdd1243dSDimitry Andric break; 985bdd1243dSDimitry Andric } 9865f757f3fSDimitry Andric case Instruction::PHI: { 9875f757f3fSDimitry Andric SmallVector<PointerIntPair<const SCEV *, 1, bool>, 2> ChildScevs; 9885f757f3fSDimitry Andric // A phi means we've found a forked pointer, but we currently only 9895f757f3fSDimitry Andric // support a single phi per pointer so if there's another behind this 9905f757f3fSDimitry Andric // then we just bail out and return the generic SCEV. 9915f757f3fSDimitry Andric if (I->getNumOperands() == 2) { 9925f757f3fSDimitry Andric findForkedSCEVs(SE, L, I->getOperand(0), ChildScevs, Depth); 9935f757f3fSDimitry Andric findForkedSCEVs(SE, L, I->getOperand(1), ChildScevs, Depth); 9945f757f3fSDimitry Andric } 9955f757f3fSDimitry Andric if (ChildScevs.size() == 2) { 9965f757f3fSDimitry Andric ScevList.push_back(ChildScevs[0]); 9975f757f3fSDimitry Andric ScevList.push_back(ChildScevs[1]); 9985f757f3fSDimitry Andric } else 9995f757f3fSDimitry Andric ScevList.emplace_back(Scev, !isGuaranteedNotToBeUndefOrPoison(Ptr)); 10005f757f3fSDimitry Andric break; 10015f757f3fSDimitry Andric } 1002bdd1243dSDimitry Andric case Instruction::Add: 1003bdd1243dSDimitry Andric case Instruction::Sub: { 1004bdd1243dSDimitry Andric SmallVector<PointerIntPair<const SCEV *, 1, bool>> LScevs; 1005bdd1243dSDimitry Andric SmallVector<PointerIntPair<const SCEV *, 1, bool>> RScevs; 1006bdd1243dSDimitry Andric findForkedSCEVs(SE, L, I->getOperand(0), LScevs, Depth); 1007bdd1243dSDimitry Andric findForkedSCEVs(SE, L, I->getOperand(1), RScevs, Depth); 1008bdd1243dSDimitry Andric 1009bdd1243dSDimitry Andric // See if we need to freeze our fork... 1010bdd1243dSDimitry Andric bool NeedsFreeze = 1011bdd1243dSDimitry Andric any_of(LScevs, UndefPoisonCheck) || any_of(RScevs, UndefPoisonCheck); 1012bdd1243dSDimitry Andric 1013bdd1243dSDimitry Andric // Check that we only have a single fork, on either the left or right side. 1014bdd1243dSDimitry Andric // Copy the SCEV across for the one without a fork in order to generate 1015bdd1243dSDimitry Andric // the full SCEV for both sides of the BinOp. 1016bdd1243dSDimitry Andric if (LScevs.size() == 2 && RScevs.size() == 1) 1017bdd1243dSDimitry Andric RScevs.push_back(RScevs[0]); 1018bdd1243dSDimitry Andric else if (RScevs.size() == 2 && LScevs.size() == 1) 1019bdd1243dSDimitry Andric LScevs.push_back(LScevs[0]); 1020bdd1243dSDimitry Andric else { 1021bdd1243dSDimitry Andric ScevList.emplace_back(Scev, NeedsFreeze); 1022bdd1243dSDimitry Andric break; 1023bdd1243dSDimitry Andric } 1024bdd1243dSDimitry Andric 1025bdd1243dSDimitry Andric ScevList.emplace_back( 1026bdd1243dSDimitry Andric GetBinOpExpr(Opcode, get<0>(LScevs[0]), get<0>(RScevs[0])), 1027bdd1243dSDimitry Andric NeedsFreeze); 1028bdd1243dSDimitry Andric ScevList.emplace_back( 1029bdd1243dSDimitry Andric GetBinOpExpr(Opcode, get<0>(LScevs[1]), get<0>(RScevs[1])), 1030bdd1243dSDimitry Andric NeedsFreeze); 1031fcaf7f86SDimitry Andric break; 1032fcaf7f86SDimitry Andric } 1033fcaf7f86SDimitry Andric default: 1034fcaf7f86SDimitry Andric // Just return the current SCEV if we haven't handled the instruction yet. 1035fcaf7f86SDimitry Andric LLVM_DEBUG(dbgs() << "ForkedPtr unhandled instruction: " << *I << "\n"); 1036bdd1243dSDimitry Andric ScevList.emplace_back(Scev, !isGuaranteedNotToBeUndefOrPoison(Ptr)); 1037fcaf7f86SDimitry Andric break; 1038fcaf7f86SDimitry Andric } 1039fcaf7f86SDimitry Andric } 1040fcaf7f86SDimitry Andric 1041bdd1243dSDimitry Andric static SmallVector<PointerIntPair<const SCEV *, 1, bool>> 1042fcaf7f86SDimitry Andric findForkedPointer(PredicatedScalarEvolution &PSE, 104306c3fb27SDimitry Andric const DenseMap<Value *, const SCEV *> &StridesMap, Value *Ptr, 1044fcaf7f86SDimitry Andric const Loop *L) { 1045fcaf7f86SDimitry Andric ScalarEvolution *SE = PSE.getSE(); 1046fcaf7f86SDimitry Andric assert(SE->isSCEVable(Ptr->getType()) && "Value is not SCEVable!"); 1047bdd1243dSDimitry Andric SmallVector<PointerIntPair<const SCEV *, 1, bool>> Scevs; 1048fcaf7f86SDimitry Andric findForkedSCEVs(SE, L, Ptr, Scevs, MaxForkedSCEVDepth); 1049fcaf7f86SDimitry Andric 1050bdd1243dSDimitry Andric // For now, we will only accept a forked pointer with two possible SCEVs 1051bdd1243dSDimitry Andric // that are either SCEVAddRecExprs or loop invariant. 1052bdd1243dSDimitry Andric if (Scevs.size() == 2 && 1053bdd1243dSDimitry Andric (isa<SCEVAddRecExpr>(get<0>(Scevs[0])) || 1054bdd1243dSDimitry Andric SE->isLoopInvariant(get<0>(Scevs[0]), L)) && 1055bdd1243dSDimitry Andric (isa<SCEVAddRecExpr>(get<0>(Scevs[1])) || 1056bdd1243dSDimitry Andric SE->isLoopInvariant(get<0>(Scevs[1]), L))) { 1057bdd1243dSDimitry Andric LLVM_DEBUG(dbgs() << "LAA: Found forked pointer: " << *Ptr << "\n"); 1058bdd1243dSDimitry Andric LLVM_DEBUG(dbgs() << "\t(1) " << *get<0>(Scevs[0]) << "\n"); 1059bdd1243dSDimitry Andric LLVM_DEBUG(dbgs() << "\t(2) " << *get<0>(Scevs[1]) << "\n"); 1060fcaf7f86SDimitry Andric return Scevs; 1061bdd1243dSDimitry Andric } 1062fcaf7f86SDimitry Andric 1063bdd1243dSDimitry Andric return {{replaceSymbolicStrideSCEV(PSE, StridesMap, Ptr), false}}; 1064fcaf7f86SDimitry Andric } 1065fcaf7f86SDimitry Andric 10660b57cec5SDimitry Andric bool AccessAnalysis::createCheckForAccess(RuntimePointerChecking &RtCheck, 106781ad6265SDimitry Andric MemAccessInfo Access, Type *AccessTy, 106806c3fb27SDimitry Andric const DenseMap<Value *, const SCEV *> &StridesMap, 10690b57cec5SDimitry Andric DenseMap<Value *, unsigned> &DepSetId, 10700b57cec5SDimitry Andric Loop *TheLoop, unsigned &RunningDepId, 10710b57cec5SDimitry Andric unsigned ASId, bool ShouldCheckWrap, 10720b57cec5SDimitry Andric bool Assume) { 10730b57cec5SDimitry Andric Value *Ptr = Access.getPointer(); 10740b57cec5SDimitry Andric 1075bdd1243dSDimitry Andric SmallVector<PointerIntPair<const SCEV *, 1, bool>> TranslatedPtrs = 1076fcaf7f86SDimitry Andric findForkedPointer(PSE, StridesMap, Ptr, TheLoop); 107781ad6265SDimitry Andric 107881ad6265SDimitry Andric for (auto &P : TranslatedPtrs) { 1079bdd1243dSDimitry Andric const SCEV *PtrExpr = get<0>(P); 108081ad6265SDimitry Andric if (!hasComputableBounds(PSE, Ptr, PtrExpr, TheLoop, Assume)) 10810b57cec5SDimitry Andric return false; 10820b57cec5SDimitry Andric 10830b57cec5SDimitry Andric // When we run after a failing dependency check we have to make sure 10840b57cec5SDimitry Andric // we don't have wrapping pointers. 108581ad6265SDimitry Andric if (ShouldCheckWrap) { 108681ad6265SDimitry Andric // Skip wrap checking when translating pointers. 108781ad6265SDimitry Andric if (TranslatedPtrs.size() > 1) 108881ad6265SDimitry Andric return false; 108981ad6265SDimitry Andric 109081ad6265SDimitry Andric if (!isNoWrap(PSE, StridesMap, Ptr, AccessTy, TheLoop)) { 10910b57cec5SDimitry Andric auto *Expr = PSE.getSCEV(Ptr); 10920b57cec5SDimitry Andric if (!Assume || !isa<SCEVAddRecExpr>(Expr)) 10930b57cec5SDimitry Andric return false; 10940b57cec5SDimitry Andric PSE.setNoOverflow(Ptr, SCEVWrapPredicate::IncrementNUSW); 10950b57cec5SDimitry Andric } 109681ad6265SDimitry Andric } 109781ad6265SDimitry Andric // If there's only one option for Ptr, look it up after bounds and wrap 109881ad6265SDimitry Andric // checking, because assumptions might have been added to PSE. 109981ad6265SDimitry Andric if (TranslatedPtrs.size() == 1) 1100bdd1243dSDimitry Andric TranslatedPtrs[0] = {replaceSymbolicStrideSCEV(PSE, StridesMap, Ptr), 1101bdd1243dSDimitry Andric false}; 110281ad6265SDimitry Andric } 110381ad6265SDimitry Andric 1104bdd1243dSDimitry Andric for (auto [PtrExpr, NeedsFreeze] : TranslatedPtrs) { 11050b57cec5SDimitry Andric // The id of the dependence set. 11060b57cec5SDimitry Andric unsigned DepId; 11070b57cec5SDimitry Andric 11080b57cec5SDimitry Andric if (isDependencyCheckNeeded()) { 11090b57cec5SDimitry Andric Value *Leader = DepCands.getLeaderValue(Access).getPointer(); 11100b57cec5SDimitry Andric unsigned &LeaderId = DepSetId[Leader]; 11110b57cec5SDimitry Andric if (!LeaderId) 11120b57cec5SDimitry Andric LeaderId = RunningDepId++; 11130b57cec5SDimitry Andric DepId = LeaderId; 11140b57cec5SDimitry Andric } else 11150b57cec5SDimitry Andric // Each access has its own dependence set. 11160b57cec5SDimitry Andric DepId = RunningDepId++; 11170b57cec5SDimitry Andric 11180b57cec5SDimitry Andric bool IsWrite = Access.getInt(); 111981ad6265SDimitry Andric RtCheck.insert(TheLoop, Ptr, PtrExpr, AccessTy, IsWrite, DepId, ASId, PSE, 1120bdd1243dSDimitry Andric NeedsFreeze); 11210b57cec5SDimitry Andric LLVM_DEBUG(dbgs() << "LAA: Found a runtime check ptr:" << *Ptr << '\n'); 112281ad6265SDimitry Andric } 11230b57cec5SDimitry Andric 11240b57cec5SDimitry Andric return true; 11250b57cec5SDimitry Andric } 11260b57cec5SDimitry Andric 11270b57cec5SDimitry Andric bool AccessAnalysis::canCheckPtrAtRT(RuntimePointerChecking &RtCheck, 11280b57cec5SDimitry Andric ScalarEvolution *SE, Loop *TheLoop, 112906c3fb27SDimitry Andric const DenseMap<Value *, const SCEV *> &StridesMap, 113081ad6265SDimitry Andric Value *&UncomputablePtr, bool ShouldCheckWrap) { 11310b57cec5SDimitry Andric // Find pointers with computable bounds. We are going to use this information 11320b57cec5SDimitry Andric // to place a runtime bound check. 11330b57cec5SDimitry Andric bool CanDoRT = true; 11340b57cec5SDimitry Andric 11355ffd83dbSDimitry Andric bool MayNeedRTCheck = false; 11360b57cec5SDimitry Andric if (!IsRTCheckAnalysisNeeded) return true; 11370b57cec5SDimitry Andric 11380b57cec5SDimitry Andric bool IsDepCheckNeeded = isDependencyCheckNeeded(); 11390b57cec5SDimitry Andric 11400b57cec5SDimitry Andric // We assign a consecutive id to access from different alias sets. 11410b57cec5SDimitry Andric // Accesses between different groups doesn't need to be checked. 11425ffd83dbSDimitry Andric unsigned ASId = 0; 11430b57cec5SDimitry Andric for (auto &AS : AST) { 11440b57cec5SDimitry Andric int NumReadPtrChecks = 0; 11450b57cec5SDimitry Andric int NumWritePtrChecks = 0; 11460b57cec5SDimitry Andric bool CanDoAliasSetRT = true; 11475ffd83dbSDimitry Andric ++ASId; 11487a6dacacSDimitry Andric auto ASPointers = AS.getPointers(); 11490b57cec5SDimitry Andric 11500b57cec5SDimitry Andric // We assign consecutive id to access from different dependence sets. 11510b57cec5SDimitry Andric // Accesses within the same set don't need a runtime check. 11520b57cec5SDimitry Andric unsigned RunningDepId = 1; 11530b57cec5SDimitry Andric DenseMap<Value *, unsigned> DepSetId; 11540b57cec5SDimitry Andric 1155fcaf7f86SDimitry Andric SmallVector<std::pair<MemAccessInfo, Type *>, 4> Retries; 11560b57cec5SDimitry Andric 11571106035dSDimitry Andric // First, count how many write and read accesses are in the alias set. Also 11581106035dSDimitry Andric // collect MemAccessInfos for later. 11591106035dSDimitry Andric SmallVector<MemAccessInfo, 4> AccessInfos; 11600fca6ea1SDimitry Andric for (const Value *ConstPtr : ASPointers) { 11610fca6ea1SDimitry Andric Value *Ptr = const_cast<Value *>(ConstPtr); 11620b57cec5SDimitry Andric bool IsWrite = Accesses.count(MemAccessInfo(Ptr, true)); 11630b57cec5SDimitry Andric if (IsWrite) 11640b57cec5SDimitry Andric ++NumWritePtrChecks; 11650b57cec5SDimitry Andric else 11660b57cec5SDimitry Andric ++NumReadPtrChecks; 11671106035dSDimitry Andric AccessInfos.emplace_back(Ptr, IsWrite); 11680b57cec5SDimitry Andric } 11690b57cec5SDimitry Andric 11701106035dSDimitry Andric // We do not need runtime checks for this alias set, if there are no writes 11711106035dSDimitry Andric // or a single write and no reads. 11721106035dSDimitry Andric if (NumWritePtrChecks == 0 || 11731106035dSDimitry Andric (NumWritePtrChecks == 1 && NumReadPtrChecks == 0)) { 11747a6dacacSDimitry Andric assert((ASPointers.size() <= 1 || 11757a6dacacSDimitry Andric all_of(ASPointers, 11767a6dacacSDimitry Andric [this](const Value *Ptr) { 11777a6dacacSDimitry Andric MemAccessInfo AccessWrite(const_cast<Value *>(Ptr), 11787a6dacacSDimitry Andric true); 11791106035dSDimitry Andric return DepCands.findValue(AccessWrite) == DepCands.end(); 11805ffd83dbSDimitry Andric })) && 11815ffd83dbSDimitry Andric "Can only skip updating CanDoRT below, if all entries in AS " 11825ffd83dbSDimitry Andric "are reads or there is at most 1 entry"); 11835ffd83dbSDimitry Andric continue; 11845ffd83dbSDimitry Andric } 11851106035dSDimitry Andric 11861106035dSDimitry Andric for (auto &Access : AccessInfos) { 1187fcaf7f86SDimitry Andric for (const auto &AccessTy : Accesses[Access]) { 118881ad6265SDimitry Andric if (!createCheckForAccess(RtCheck, Access, AccessTy, StridesMap, 118981ad6265SDimitry Andric DepSetId, TheLoop, RunningDepId, ASId, 119081ad6265SDimitry Andric ShouldCheckWrap, false)) { 11911106035dSDimitry Andric LLVM_DEBUG(dbgs() << "LAA: Can't find bounds for ptr:" 11921106035dSDimitry Andric << *Access.getPointer() << '\n'); 1193fcaf7f86SDimitry Andric Retries.push_back({Access, AccessTy}); 11941106035dSDimitry Andric CanDoAliasSetRT = false; 11955ffd83dbSDimitry Andric } 11961106035dSDimitry Andric } 119781ad6265SDimitry Andric } 11981106035dSDimitry Andric 11991106035dSDimitry Andric // Note that this function computes CanDoRT and MayNeedRTCheck 12001106035dSDimitry Andric // independently. For example CanDoRT=false, MayNeedRTCheck=false means that 12011106035dSDimitry Andric // we have a pointer for which we couldn't find the bounds but we don't 12021106035dSDimitry Andric // actually need to emit any checks so it does not matter. 12031106035dSDimitry Andric // 12041106035dSDimitry Andric // We need runtime checks for this alias set, if there are at least 2 12051106035dSDimitry Andric // dependence sets (in which case RunningDepId > 2) or if we need to re-try 12061106035dSDimitry Andric // any bound checks (because in that case the number of dependence sets is 12071106035dSDimitry Andric // incomplete). 12081106035dSDimitry Andric bool NeedsAliasSetRTCheck = RunningDepId > 2 || !Retries.empty(); 12090b57cec5SDimitry Andric 12100b57cec5SDimitry Andric // We need to perform run-time alias checks, but some pointers had bounds 12110b57cec5SDimitry Andric // that couldn't be checked. 12120b57cec5SDimitry Andric if (NeedsAliasSetRTCheck && !CanDoAliasSetRT) { 12130b57cec5SDimitry Andric // Reset the CanDoSetRt flag and retry all accesses that have failed. 12140b57cec5SDimitry Andric // We know that we need these checks, so we can now be more aggressive 12150b57cec5SDimitry Andric // and add further checks if required (overflow checks). 12160b57cec5SDimitry Andric CanDoAliasSetRT = true; 12170fca6ea1SDimitry Andric for (const auto &[Access, AccessTy] : Retries) { 121881ad6265SDimitry Andric if (!createCheckForAccess(RtCheck, Access, AccessTy, StridesMap, 121981ad6265SDimitry Andric DepSetId, TheLoop, RunningDepId, ASId, 12200b57cec5SDimitry Andric ShouldCheckWrap, /*Assume=*/true)) { 12210b57cec5SDimitry Andric CanDoAliasSetRT = false; 122281ad6265SDimitry Andric UncomputablePtr = Access.getPointer(); 12230b57cec5SDimitry Andric break; 12240b57cec5SDimitry Andric } 12250b57cec5SDimitry Andric } 122681ad6265SDimitry Andric } 12270b57cec5SDimitry Andric 12280b57cec5SDimitry Andric CanDoRT &= CanDoAliasSetRT; 12295ffd83dbSDimitry Andric MayNeedRTCheck |= NeedsAliasSetRTCheck; 12300b57cec5SDimitry Andric ++ASId; 12310b57cec5SDimitry Andric } 12320b57cec5SDimitry Andric 12330b57cec5SDimitry Andric // If the pointers that we would use for the bounds comparison have different 12340b57cec5SDimitry Andric // address spaces, assume the values aren't directly comparable, so we can't 12350b57cec5SDimitry Andric // use them for the runtime check. We also have to assume they could 12360b57cec5SDimitry Andric // overlap. In the future there should be metadata for whether address spaces 12370b57cec5SDimitry Andric // are disjoint. 12380b57cec5SDimitry Andric unsigned NumPointers = RtCheck.Pointers.size(); 12390b57cec5SDimitry Andric for (unsigned i = 0; i < NumPointers; ++i) { 12400b57cec5SDimitry Andric for (unsigned j = i + 1; j < NumPointers; ++j) { 12410b57cec5SDimitry Andric // Only need to check pointers between two different dependency sets. 12420b57cec5SDimitry Andric if (RtCheck.Pointers[i].DependencySetId == 12430b57cec5SDimitry Andric RtCheck.Pointers[j].DependencySetId) 12440b57cec5SDimitry Andric continue; 12450b57cec5SDimitry Andric // Only need to check pointers in the same alias set. 12460b57cec5SDimitry Andric if (RtCheck.Pointers[i].AliasSetId != RtCheck.Pointers[j].AliasSetId) 12470b57cec5SDimitry Andric continue; 12480b57cec5SDimitry Andric 12490b57cec5SDimitry Andric Value *PtrI = RtCheck.Pointers[i].PointerValue; 12500b57cec5SDimitry Andric Value *PtrJ = RtCheck.Pointers[j].PointerValue; 12510b57cec5SDimitry Andric 12520b57cec5SDimitry Andric unsigned ASi = PtrI->getType()->getPointerAddressSpace(); 12530b57cec5SDimitry Andric unsigned ASj = PtrJ->getType()->getPointerAddressSpace(); 12540b57cec5SDimitry Andric if (ASi != ASj) { 12550b57cec5SDimitry Andric LLVM_DEBUG( 12560b57cec5SDimitry Andric dbgs() << "LAA: Runtime check would require comparison between" 12570b57cec5SDimitry Andric " different address spaces\n"); 12580b57cec5SDimitry Andric return false; 12590b57cec5SDimitry Andric } 12600b57cec5SDimitry Andric } 12610b57cec5SDimitry Andric } 12620b57cec5SDimitry Andric 12635ffd83dbSDimitry Andric if (MayNeedRTCheck && CanDoRT) 12640b57cec5SDimitry Andric RtCheck.generateChecks(DepCands, IsDepCheckNeeded); 12650b57cec5SDimitry Andric 12660b57cec5SDimitry Andric LLVM_DEBUG(dbgs() << "LAA: We need to do " << RtCheck.getNumberOfChecks() 12670b57cec5SDimitry Andric << " pointer comparisons.\n"); 12680b57cec5SDimitry Andric 12695ffd83dbSDimitry Andric // If we can do run-time checks, but there are no checks, no runtime checks 12705ffd83dbSDimitry Andric // are needed. This can happen when all pointers point to the same underlying 12715ffd83dbSDimitry Andric // object for example. 12725ffd83dbSDimitry Andric RtCheck.Need = CanDoRT ? RtCheck.getNumberOfChecks() != 0 : MayNeedRTCheck; 12730b57cec5SDimitry Andric 12745ffd83dbSDimitry Andric bool CanDoRTIfNeeded = !RtCheck.Need || CanDoRT; 12750b57cec5SDimitry Andric if (!CanDoRTIfNeeded) 12760b57cec5SDimitry Andric RtCheck.reset(); 12770b57cec5SDimitry Andric return CanDoRTIfNeeded; 12780b57cec5SDimitry Andric } 12790b57cec5SDimitry Andric 12800b57cec5SDimitry Andric void AccessAnalysis::processMemAccesses() { 12810b57cec5SDimitry Andric // We process the set twice: first we process read-write pointers, last we 12820b57cec5SDimitry Andric // process read-only pointers. This allows us to skip dependence tests for 12830b57cec5SDimitry Andric // read-only pointers. 12840b57cec5SDimitry Andric 12850b57cec5SDimitry Andric LLVM_DEBUG(dbgs() << "LAA: Processing memory accesses...\n"); 12860b57cec5SDimitry Andric LLVM_DEBUG(dbgs() << " AST: "; AST.dump()); 12870b57cec5SDimitry Andric LLVM_DEBUG(dbgs() << "LAA: Accesses(" << Accesses.size() << "):\n"); 12880b57cec5SDimitry Andric LLVM_DEBUG({ 12890fca6ea1SDimitry Andric for (const auto &[A, _] : Accesses) 12900fca6ea1SDimitry Andric dbgs() << "\t" << *A.getPointer() << " (" 12910fca6ea1SDimitry Andric << (A.getInt() ? "write" 12920fca6ea1SDimitry Andric : (ReadOnlyPtr.count(A.getPointer()) ? "read-only" 129381ad6265SDimitry Andric : "read")) 129481ad6265SDimitry Andric << ")\n"; 12950b57cec5SDimitry Andric }); 12960b57cec5SDimitry Andric 12970b57cec5SDimitry Andric // The AliasSetTracker has nicely partitioned our pointers by metadata 12980b57cec5SDimitry Andric // compatibility and potential for underlying-object overlap. As a result, we 12990b57cec5SDimitry Andric // only need to check for potential pointer dependencies within each alias 13000b57cec5SDimitry Andric // set. 1301e8d8bef9SDimitry Andric for (const auto &AS : AST) { 13020b57cec5SDimitry Andric // Note that both the alias-set tracker and the alias sets themselves used 13037a6dacacSDimitry Andric // ordered collections internally and so the iteration order here is 13047a6dacacSDimitry Andric // deterministic. 13057a6dacacSDimitry Andric auto ASPointers = AS.getPointers(); 13060b57cec5SDimitry Andric 13070b57cec5SDimitry Andric bool SetHasWrite = false; 13080b57cec5SDimitry Andric 13090b57cec5SDimitry Andric // Map of pointers to last access encountered. 13100b57cec5SDimitry Andric typedef DenseMap<const Value*, MemAccessInfo> UnderlyingObjToAccessMap; 13110b57cec5SDimitry Andric UnderlyingObjToAccessMap ObjToLastAccess; 13120b57cec5SDimitry Andric 13130b57cec5SDimitry Andric // Set of access to check after all writes have been processed. 131481ad6265SDimitry Andric PtrAccessMap DeferredAccesses; 13150b57cec5SDimitry Andric 13160b57cec5SDimitry Andric // Iterate over each alias set twice, once to process read/write pointers, 13170b57cec5SDimitry Andric // and then to process read-only pointers. 13180b57cec5SDimitry Andric for (int SetIteration = 0; SetIteration < 2; ++SetIteration) { 13190b57cec5SDimitry Andric bool UseDeferred = SetIteration > 0; 132081ad6265SDimitry Andric PtrAccessMap &S = UseDeferred ? DeferredAccesses : Accesses; 13210b57cec5SDimitry Andric 13220fca6ea1SDimitry Andric for (const Value *ConstPtr : ASPointers) { 13230fca6ea1SDimitry Andric Value *Ptr = const_cast<Value *>(ConstPtr); 13240b57cec5SDimitry Andric 13250b57cec5SDimitry Andric // For a single memory access in AliasSetTracker, Accesses may contain 13260b57cec5SDimitry Andric // both read and write, and they both need to be handled for CheckDeps. 13270fca6ea1SDimitry Andric for (const auto &[AC, _] : S) { 13280fca6ea1SDimitry Andric if (AC.getPointer() != Ptr) 13290b57cec5SDimitry Andric continue; 13300b57cec5SDimitry Andric 13310fca6ea1SDimitry Andric bool IsWrite = AC.getInt(); 13320b57cec5SDimitry Andric 13330b57cec5SDimitry Andric // If we're using the deferred access set, then it contains only 13340b57cec5SDimitry Andric // reads. 13350b57cec5SDimitry Andric bool IsReadOnlyPtr = ReadOnlyPtr.count(Ptr) && !IsWrite; 13360b57cec5SDimitry Andric if (UseDeferred && !IsReadOnlyPtr) 13370b57cec5SDimitry Andric continue; 13380b57cec5SDimitry Andric // Otherwise, the pointer must be in the PtrAccessSet, either as a 13390b57cec5SDimitry Andric // read or a write. 13400b57cec5SDimitry Andric assert(((IsReadOnlyPtr && UseDeferred) || IsWrite || 13410b57cec5SDimitry Andric S.count(MemAccessInfo(Ptr, false))) && 13420b57cec5SDimitry Andric "Alias-set pointer not in the access set?"); 13430b57cec5SDimitry Andric 13440b57cec5SDimitry Andric MemAccessInfo Access(Ptr, IsWrite); 13450b57cec5SDimitry Andric DepCands.insert(Access); 13460b57cec5SDimitry Andric 13470b57cec5SDimitry Andric // Memorize read-only pointers for later processing and skip them in 13480b57cec5SDimitry Andric // the first round (they need to be checked after we have seen all 13490b57cec5SDimitry Andric // write pointers). Note: we also mark pointer that are not 13500b57cec5SDimitry Andric // consecutive as "read-only" pointers (so that we check 13510b57cec5SDimitry Andric // "a[b[i]] +="). Hence, we need the second check for "!IsWrite". 13520b57cec5SDimitry Andric if (!UseDeferred && IsReadOnlyPtr) { 135381ad6265SDimitry Andric // We only use the pointer keys, the types vector values don't 135481ad6265SDimitry Andric // matter. 135581ad6265SDimitry Andric DeferredAccesses.insert({Access, {}}); 13560b57cec5SDimitry Andric continue; 13570b57cec5SDimitry Andric } 13580b57cec5SDimitry Andric 13590b57cec5SDimitry Andric // If this is a write - check other reads and writes for conflicts. If 13600b57cec5SDimitry Andric // this is a read only check other writes for conflicts (but only if 13610b57cec5SDimitry Andric // there is no other write to the ptr - this is an optimization to 13620b57cec5SDimitry Andric // catch "a[i] = a[i] + " without having to do a dependence check). 13630b57cec5SDimitry Andric if ((IsWrite || IsReadOnlyPtr) && SetHasWrite) { 13640b57cec5SDimitry Andric CheckDeps.push_back(Access); 13650b57cec5SDimitry Andric IsRTCheckAnalysisNeeded = true; 13660b57cec5SDimitry Andric } 13670b57cec5SDimitry Andric 13680b57cec5SDimitry Andric if (IsWrite) 13690b57cec5SDimitry Andric SetHasWrite = true; 13700b57cec5SDimitry Andric 13710b57cec5SDimitry Andric // Create sets of pointers connected by a shared alias set and 13720b57cec5SDimitry Andric // underlying object. 13730b57cec5SDimitry Andric typedef SmallVector<const Value *, 16> ValueVector; 13740b57cec5SDimitry Andric ValueVector TempObjects; 13750b57cec5SDimitry Andric 13765f757f3fSDimitry Andric UnderlyingObjects[Ptr] = {}; 13775f757f3fSDimitry Andric SmallVector<const Value *, 16> &UOs = UnderlyingObjects[Ptr]; 13785f757f3fSDimitry Andric ::getUnderlyingObjects(Ptr, UOs, LI); 13790b57cec5SDimitry Andric LLVM_DEBUG(dbgs() 13800b57cec5SDimitry Andric << "Underlying objects for pointer " << *Ptr << "\n"); 13815f757f3fSDimitry Andric for (const Value *UnderlyingObj : UOs) { 13820b57cec5SDimitry Andric // nullptr never alias, don't join sets for pointer that have "null" 13830b57cec5SDimitry Andric // in their UnderlyingObjects list. 13840b57cec5SDimitry Andric if (isa<ConstantPointerNull>(UnderlyingObj) && 13850b57cec5SDimitry Andric !NullPointerIsDefined( 13860b57cec5SDimitry Andric TheLoop->getHeader()->getParent(), 13870b57cec5SDimitry Andric UnderlyingObj->getType()->getPointerAddressSpace())) 13880b57cec5SDimitry Andric continue; 13890b57cec5SDimitry Andric 13900b57cec5SDimitry Andric UnderlyingObjToAccessMap::iterator Prev = 13910b57cec5SDimitry Andric ObjToLastAccess.find(UnderlyingObj); 13920b57cec5SDimitry Andric if (Prev != ObjToLastAccess.end()) 13930b57cec5SDimitry Andric DepCands.unionSets(Access, Prev->second); 13940b57cec5SDimitry Andric 13950b57cec5SDimitry Andric ObjToLastAccess[UnderlyingObj] = Access; 13960b57cec5SDimitry Andric LLVM_DEBUG(dbgs() << " " << *UnderlyingObj << "\n"); 13970b57cec5SDimitry Andric } 13980b57cec5SDimitry Andric } 13990b57cec5SDimitry Andric } 14000b57cec5SDimitry Andric } 14010b57cec5SDimitry Andric } 14020b57cec5SDimitry Andric } 14030b57cec5SDimitry Andric 14040b57cec5SDimitry Andric /// Return true if an AddRec pointer \p Ptr is unsigned non-wrapping, 14050b57cec5SDimitry Andric /// i.e. monotonically increasing/decreasing. 14060b57cec5SDimitry Andric static bool isNoWrapAddRec(Value *Ptr, const SCEVAddRecExpr *AR, 14070b57cec5SDimitry Andric PredicatedScalarEvolution &PSE, const Loop *L) { 140806c3fb27SDimitry Andric 14090b57cec5SDimitry Andric // FIXME: This should probably only return true for NUW. 14100b57cec5SDimitry Andric if (AR->getNoWrapFlags(SCEV::NoWrapMask)) 14110b57cec5SDimitry Andric return true; 14120b57cec5SDimitry Andric 141306c3fb27SDimitry Andric if (PSE.hasNoOverflow(Ptr, SCEVWrapPredicate::IncrementNUSW)) 141406c3fb27SDimitry Andric return true; 141506c3fb27SDimitry Andric 14160b57cec5SDimitry Andric // Scalar evolution does not propagate the non-wrapping flags to values that 14170b57cec5SDimitry Andric // are derived from a non-wrapping induction variable because non-wrapping 14180b57cec5SDimitry Andric // could be flow-sensitive. 14190b57cec5SDimitry Andric // 14200b57cec5SDimitry Andric // Look through the potentially overflowing instruction to try to prove 14210b57cec5SDimitry Andric // non-wrapping for the *specific* value of Ptr. 14220b57cec5SDimitry Andric 14230b57cec5SDimitry Andric // The arithmetic implied by an inbounds GEP can't overflow. 14240b57cec5SDimitry Andric auto *GEP = dyn_cast<GetElementPtrInst>(Ptr); 14250b57cec5SDimitry Andric if (!GEP || !GEP->isInBounds()) 14260b57cec5SDimitry Andric return false; 14270b57cec5SDimitry Andric 14280b57cec5SDimitry Andric // Make sure there is only one non-const index and analyze that. 14290b57cec5SDimitry Andric Value *NonConstIndex = nullptr; 1430e8d8bef9SDimitry Andric for (Value *Index : GEP->indices()) 14310b57cec5SDimitry Andric if (!isa<ConstantInt>(Index)) { 14320b57cec5SDimitry Andric if (NonConstIndex) 14330b57cec5SDimitry Andric return false; 14340b57cec5SDimitry Andric NonConstIndex = Index; 14350b57cec5SDimitry Andric } 14360b57cec5SDimitry Andric if (!NonConstIndex) 14370b57cec5SDimitry Andric // The recurrence is on the pointer, ignore for now. 14380b57cec5SDimitry Andric return false; 14390b57cec5SDimitry Andric 14400b57cec5SDimitry Andric // The index in GEP is signed. It is non-wrapping if it's derived from a NSW 14410b57cec5SDimitry Andric // AddRec using a NSW operation. 14420b57cec5SDimitry Andric if (auto *OBO = dyn_cast<OverflowingBinaryOperator>(NonConstIndex)) 14430b57cec5SDimitry Andric if (OBO->hasNoSignedWrap() && 14440b57cec5SDimitry Andric // Assume constant for other the operand so that the AddRec can be 14450b57cec5SDimitry Andric // easily found. 14460b57cec5SDimitry Andric isa<ConstantInt>(OBO->getOperand(1))) { 14470b57cec5SDimitry Andric auto *OpScev = PSE.getSCEV(OBO->getOperand(0)); 14480b57cec5SDimitry Andric 14490b57cec5SDimitry Andric if (auto *OpAR = dyn_cast<SCEVAddRecExpr>(OpScev)) 14500b57cec5SDimitry Andric return OpAR->getLoop() == L && OpAR->getNoWrapFlags(SCEV::FlagNSW); 14510b57cec5SDimitry Andric } 14520b57cec5SDimitry Andric 14530b57cec5SDimitry Andric return false; 14540b57cec5SDimitry Andric } 14550b57cec5SDimitry Andric 14560b57cec5SDimitry Andric /// Check whether the access through \p Ptr has a constant stride. 1457*62987288SDimitry Andric std::optional<int64_t> 1458*62987288SDimitry Andric llvm::getPtrStride(PredicatedScalarEvolution &PSE, Type *AccessTy, Value *Ptr, 1459bdd1243dSDimitry Andric const Loop *Lp, 146006c3fb27SDimitry Andric const DenseMap<Value *, const SCEV *> &StridesMap, 1461bdd1243dSDimitry Andric bool Assume, bool ShouldCheckWrap) { 1462*62987288SDimitry Andric const SCEV *PtrScev = replaceSymbolicStrideSCEV(PSE, StridesMap, Ptr); 1463*62987288SDimitry Andric if (PSE.getSE()->isLoopInvariant(PtrScev, Lp)) 1464*62987288SDimitry Andric return {0}; 1465*62987288SDimitry Andric 14660b57cec5SDimitry Andric Type *Ty = Ptr->getType(); 14670b57cec5SDimitry Andric assert(Ty->isPointerTy() && "Unexpected non-ptr"); 14684824e7fdSDimitry Andric if (isa<ScalableVectorType>(AccessTy)) { 14694824e7fdSDimitry Andric LLVM_DEBUG(dbgs() << "LAA: Bad stride - Scalable object: " << *AccessTy 14704824e7fdSDimitry Andric << "\n"); 1471bdd1243dSDimitry Andric return std::nullopt; 14720b57cec5SDimitry Andric } 14730b57cec5SDimitry Andric 14740b57cec5SDimitry Andric const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(PtrScev); 14750b57cec5SDimitry Andric if (Assume && !AR) 14760b57cec5SDimitry Andric AR = PSE.getAsAddRec(Ptr); 14770b57cec5SDimitry Andric 14780b57cec5SDimitry Andric if (!AR) { 14790b57cec5SDimitry Andric LLVM_DEBUG(dbgs() << "LAA: Bad stride - Not an AddRecExpr pointer " << *Ptr 14800b57cec5SDimitry Andric << " SCEV: " << *PtrScev << "\n"); 1481bdd1243dSDimitry Andric return std::nullopt; 14820b57cec5SDimitry Andric } 14830b57cec5SDimitry Andric 14840b57cec5SDimitry Andric // The access function must stride over the innermost loop. 14850b57cec5SDimitry Andric if (Lp != AR->getLoop()) { 14860b57cec5SDimitry Andric LLVM_DEBUG(dbgs() << "LAA: Bad stride - Not striding over innermost loop " 14870b57cec5SDimitry Andric << *Ptr << " SCEV: " << *AR << "\n"); 1488bdd1243dSDimitry Andric return std::nullopt; 14890b57cec5SDimitry Andric } 14900b57cec5SDimitry Andric 14910b57cec5SDimitry Andric // Check the step is constant. 14920b57cec5SDimitry Andric const SCEV *Step = AR->getStepRecurrence(*PSE.getSE()); 14930b57cec5SDimitry Andric 14940b57cec5SDimitry Andric // Calculate the pointer stride and check if it is constant. 14950b57cec5SDimitry Andric const SCEVConstant *C = dyn_cast<SCEVConstant>(Step); 14960b57cec5SDimitry Andric if (!C) { 14970b57cec5SDimitry Andric LLVM_DEBUG(dbgs() << "LAA: Bad stride - Not a constant strided " << *Ptr 14980b57cec5SDimitry Andric << " SCEV: " << *AR << "\n"); 1499bdd1243dSDimitry Andric return std::nullopt; 15000b57cec5SDimitry Andric } 15010b57cec5SDimitry Andric 15020fca6ea1SDimitry Andric auto &DL = Lp->getHeader()->getDataLayout(); 15034824e7fdSDimitry Andric TypeSize AllocSize = DL.getTypeAllocSize(AccessTy); 1504bdd1243dSDimitry Andric int64_t Size = AllocSize.getFixedValue(); 15050b57cec5SDimitry Andric const APInt &APStepVal = C->getAPInt(); 15060b57cec5SDimitry Andric 15070b57cec5SDimitry Andric // Huge step value - give up. 15080b57cec5SDimitry Andric if (APStepVal.getBitWidth() > 64) 1509bdd1243dSDimitry Andric return std::nullopt; 15100b57cec5SDimitry Andric 15110b57cec5SDimitry Andric int64_t StepVal = APStepVal.getSExtValue(); 15120b57cec5SDimitry Andric 15130b57cec5SDimitry Andric // Strided access. 15140b57cec5SDimitry Andric int64_t Stride = StepVal / Size; 15150b57cec5SDimitry Andric int64_t Rem = StepVal % Size; 15160b57cec5SDimitry Andric if (Rem) 1517bdd1243dSDimitry Andric return std::nullopt; 15180b57cec5SDimitry Andric 151906c3fb27SDimitry Andric if (!ShouldCheckWrap) 152006c3fb27SDimitry Andric return Stride; 152106c3fb27SDimitry Andric 152206c3fb27SDimitry Andric // The address calculation must not wrap. Otherwise, a dependence could be 152306c3fb27SDimitry Andric // inverted. 152406c3fb27SDimitry Andric if (isNoWrapAddRec(Ptr, AR, PSE, Lp)) 152506c3fb27SDimitry Andric return Stride; 152606c3fb27SDimitry Andric 152706c3fb27SDimitry Andric // An inbounds getelementptr that is a AddRec with a unit stride 152806c3fb27SDimitry Andric // cannot wrap per definition. If it did, the result would be poison 152906c3fb27SDimitry Andric // and any memory access dependent on it would be immediate UB 153006c3fb27SDimitry Andric // when executed. 153106c3fb27SDimitry Andric if (auto *GEP = dyn_cast<GetElementPtrInst>(Ptr); 153206c3fb27SDimitry Andric GEP && GEP->isInBounds() && (Stride == 1 || Stride == -1)) 153306c3fb27SDimitry Andric return Stride; 153406c3fb27SDimitry Andric 153506c3fb27SDimitry Andric // If the null pointer is undefined, then a access sequence which would 153606c3fb27SDimitry Andric // otherwise access it can be assumed not to unsigned wrap. Note that this 153706c3fb27SDimitry Andric // assumes the object in memory is aligned to the natural alignment. 153806c3fb27SDimitry Andric unsigned AddrSpace = Ty->getPointerAddressSpace(); 153906c3fb27SDimitry Andric if (!NullPointerIsDefined(Lp->getHeader()->getParent(), AddrSpace) && 154006c3fb27SDimitry Andric (Stride == 1 || Stride == -1)) 154106c3fb27SDimitry Andric return Stride; 154206c3fb27SDimitry Andric 15430b57cec5SDimitry Andric if (Assume) { 154406c3fb27SDimitry Andric PSE.setNoOverflow(Ptr, SCEVWrapPredicate::IncrementNUSW); 154506c3fb27SDimitry Andric LLVM_DEBUG(dbgs() << "LAA: Pointer may wrap:\n" 15460b57cec5SDimitry Andric << "LAA: Pointer: " << *Ptr << "\n" 15470b57cec5SDimitry Andric << "LAA: SCEV: " << *AR << "\n" 15480b57cec5SDimitry Andric << "LAA: Added an overflow assumption\n"); 15490b57cec5SDimitry Andric return Stride; 15500b57cec5SDimitry Andric } 155106c3fb27SDimitry Andric LLVM_DEBUG( 155206c3fb27SDimitry Andric dbgs() << "LAA: Bad stride - Pointer may wrap in the address space " 155306c3fb27SDimitry Andric << *Ptr << " SCEV: " << *AR << "\n"); 155406c3fb27SDimitry Andric return std::nullopt; 155506c3fb27SDimitry Andric } 15560b57cec5SDimitry Andric 1557bdd1243dSDimitry Andric std::optional<int> llvm::getPointersDiff(Type *ElemTyA, Value *PtrA, 1558bdd1243dSDimitry Andric Type *ElemTyB, Value *PtrB, 1559bdd1243dSDimitry Andric const DataLayout &DL, 1560fe6060f1SDimitry Andric ScalarEvolution &SE, bool StrictCheck, 1561fe6060f1SDimitry Andric bool CheckType) { 1562fe6060f1SDimitry Andric assert(PtrA && PtrB && "Expected non-nullptr pointers."); 1563fe6060f1SDimitry Andric 1564fe6060f1SDimitry Andric // Make sure that A and B are different pointers. 1565fe6060f1SDimitry Andric if (PtrA == PtrB) 1566fe6060f1SDimitry Andric return 0; 1567fe6060f1SDimitry Andric 1568fe6060f1SDimitry Andric // Make sure that the element types are the same if required. 1569fe6060f1SDimitry Andric if (CheckType && ElemTyA != ElemTyB) 1570bdd1243dSDimitry Andric return std::nullopt; 1571fe6060f1SDimitry Andric 1572fe6060f1SDimitry Andric unsigned ASA = PtrA->getType()->getPointerAddressSpace(); 1573fe6060f1SDimitry Andric unsigned ASB = PtrB->getType()->getPointerAddressSpace(); 1574fe6060f1SDimitry Andric 1575fe6060f1SDimitry Andric // Check that the address spaces match. 1576fe6060f1SDimitry Andric if (ASA != ASB) 1577bdd1243dSDimitry Andric return std::nullopt; 1578fe6060f1SDimitry Andric unsigned IdxWidth = DL.getIndexSizeInBits(ASA); 1579fe6060f1SDimitry Andric 1580fe6060f1SDimitry Andric APInt OffsetA(IdxWidth, 0), OffsetB(IdxWidth, 0); 1581fe6060f1SDimitry Andric Value *PtrA1 = PtrA->stripAndAccumulateInBoundsConstantOffsets(DL, OffsetA); 1582fe6060f1SDimitry Andric Value *PtrB1 = PtrB->stripAndAccumulateInBoundsConstantOffsets(DL, OffsetB); 1583fe6060f1SDimitry Andric 1584fe6060f1SDimitry Andric int Val; 1585fe6060f1SDimitry Andric if (PtrA1 == PtrB1) { 1586fe6060f1SDimitry Andric // Retrieve the address space again as pointer stripping now tracks through 1587fe6060f1SDimitry Andric // `addrspacecast`. 1588fe6060f1SDimitry Andric ASA = cast<PointerType>(PtrA1->getType())->getAddressSpace(); 1589fe6060f1SDimitry Andric ASB = cast<PointerType>(PtrB1->getType())->getAddressSpace(); 1590fe6060f1SDimitry Andric // Check that the address spaces match and that the pointers are valid. 1591fe6060f1SDimitry Andric if (ASA != ASB) 1592bdd1243dSDimitry Andric return std::nullopt; 1593fe6060f1SDimitry Andric 1594fe6060f1SDimitry Andric IdxWidth = DL.getIndexSizeInBits(ASA); 1595fe6060f1SDimitry Andric OffsetA = OffsetA.sextOrTrunc(IdxWidth); 1596fe6060f1SDimitry Andric OffsetB = OffsetB.sextOrTrunc(IdxWidth); 1597fe6060f1SDimitry Andric 1598fe6060f1SDimitry Andric OffsetB -= OffsetA; 1599fe6060f1SDimitry Andric Val = OffsetB.getSExtValue(); 1600fe6060f1SDimitry Andric } else { 1601fe6060f1SDimitry Andric // Otherwise compute the distance with SCEV between the base pointers. 1602fe6060f1SDimitry Andric const SCEV *PtrSCEVA = SE.getSCEV(PtrA); 1603fe6060f1SDimitry Andric const SCEV *PtrSCEVB = SE.getSCEV(PtrB); 1604fe6060f1SDimitry Andric const auto *Diff = 1605fe6060f1SDimitry Andric dyn_cast<SCEVConstant>(SE.getMinusSCEV(PtrSCEVB, PtrSCEVA)); 1606fe6060f1SDimitry Andric if (!Diff) 1607bdd1243dSDimitry Andric return std::nullopt; 1608fe6060f1SDimitry Andric Val = Diff->getAPInt().getSExtValue(); 1609fe6060f1SDimitry Andric } 1610fe6060f1SDimitry Andric int Size = DL.getTypeStoreSize(ElemTyA); 1611fe6060f1SDimitry Andric int Dist = Val / Size; 1612fe6060f1SDimitry Andric 1613fe6060f1SDimitry Andric // Ensure that the calculated distance matches the type-based one after all 1614fe6060f1SDimitry Andric // the bitcasts removal in the provided pointers. 1615fe6060f1SDimitry Andric if (!StrictCheck || Dist * Size == Val) 1616fe6060f1SDimitry Andric return Dist; 1617bdd1243dSDimitry Andric return std::nullopt; 1618fe6060f1SDimitry Andric } 1619fe6060f1SDimitry Andric 1620fe6060f1SDimitry Andric bool llvm::sortPtrAccesses(ArrayRef<Value *> VL, Type *ElemTy, 1621fe6060f1SDimitry Andric const DataLayout &DL, ScalarEvolution &SE, 16220b57cec5SDimitry Andric SmallVectorImpl<unsigned> &SortedIndices) { 16230b57cec5SDimitry Andric assert(llvm::all_of( 16240b57cec5SDimitry Andric VL, [](const Value *V) { return V->getType()->isPointerTy(); }) && 16250b57cec5SDimitry Andric "Expected list of pointer operands."); 16260b57cec5SDimitry Andric // Walk over the pointers, and map each of them to an offset relative to 16270b57cec5SDimitry Andric // first pointer in the array. 16280b57cec5SDimitry Andric Value *Ptr0 = VL[0]; 16290b57cec5SDimitry Andric 1630fe6060f1SDimitry Andric using DistOrdPair = std::pair<int64_t, int>; 1631972a253aSDimitry Andric auto Compare = llvm::less_first(); 1632fe6060f1SDimitry Andric std::set<DistOrdPair, decltype(Compare)> Offsets(Compare); 1633fe6060f1SDimitry Andric Offsets.emplace(0, 0); 1634fe6060f1SDimitry Andric bool IsConsecutive = true; 16350fca6ea1SDimitry Andric for (auto [Idx, Ptr] : drop_begin(enumerate(VL))) { 1636bdd1243dSDimitry Andric std::optional<int> Diff = getPointersDiff(ElemTy, Ptr0, ElemTy, Ptr, DL, SE, 1637fe6060f1SDimitry Andric /*StrictCheck=*/true); 16380b57cec5SDimitry Andric if (!Diff) 16390b57cec5SDimitry Andric return false; 16400b57cec5SDimitry Andric 16410b57cec5SDimitry Andric // Check if the pointer with the same offset is found. 1642fe6060f1SDimitry Andric int64_t Offset = *Diff; 16430fca6ea1SDimitry Andric auto [It, IsInserted] = Offsets.emplace(Offset, Idx); 16440fca6ea1SDimitry Andric if (!IsInserted) 16450b57cec5SDimitry Andric return false; 1646fe6060f1SDimitry Andric // Consecutive order if the inserted element is the last one. 16470fca6ea1SDimitry Andric IsConsecutive &= std::next(It) == Offsets.end(); 16480b57cec5SDimitry Andric } 16490b57cec5SDimitry Andric SortedIndices.clear(); 1650fe6060f1SDimitry Andric if (!IsConsecutive) { 1651fe6060f1SDimitry Andric // Fill SortedIndices array only if it is non-consecutive. 16520b57cec5SDimitry Andric SortedIndices.resize(VL.size()); 16530fca6ea1SDimitry Andric for (auto [Idx, Off] : enumerate(Offsets)) 16540fca6ea1SDimitry Andric SortedIndices[Idx] = Off.second; 1655fe6060f1SDimitry Andric } 1656fe6060f1SDimitry Andric return true; 16570b57cec5SDimitry Andric } 16580b57cec5SDimitry Andric 16590b57cec5SDimitry Andric /// Returns true if the memory operations \p A and \p B are consecutive. 16600b57cec5SDimitry Andric bool llvm::isConsecutiveAccess(Value *A, Value *B, const DataLayout &DL, 16610b57cec5SDimitry Andric ScalarEvolution &SE, bool CheckType) { 16620b57cec5SDimitry Andric Value *PtrA = getLoadStorePointerOperand(A); 16630b57cec5SDimitry Andric Value *PtrB = getLoadStorePointerOperand(B); 1664fe6060f1SDimitry Andric if (!PtrA || !PtrB) 16650b57cec5SDimitry Andric return false; 1666fe6060f1SDimitry Andric Type *ElemTyA = getLoadStoreType(A); 1667fe6060f1SDimitry Andric Type *ElemTyB = getLoadStoreType(B); 1668bdd1243dSDimitry Andric std::optional<int> Diff = 1669bdd1243dSDimitry Andric getPointersDiff(ElemTyA, PtrA, ElemTyB, PtrB, DL, SE, 1670fe6060f1SDimitry Andric /*StrictCheck=*/true, CheckType); 1671fe6060f1SDimitry Andric return Diff && *Diff == 1; 16720b57cec5SDimitry Andric } 16730b57cec5SDimitry Andric 1674349cc55cSDimitry Andric void MemoryDepChecker::addAccess(StoreInst *SI) { 1675349cc55cSDimitry Andric visitPointers(SI->getPointerOperand(), *InnermostLoop, 1676349cc55cSDimitry Andric [this, SI](Value *Ptr) { 1677349cc55cSDimitry Andric Accesses[MemAccessInfo(Ptr, true)].push_back(AccessIdx); 1678349cc55cSDimitry Andric InstMap.push_back(SI); 1679349cc55cSDimitry Andric ++AccessIdx; 1680349cc55cSDimitry Andric }); 1681349cc55cSDimitry Andric } 1682349cc55cSDimitry Andric 1683349cc55cSDimitry Andric void MemoryDepChecker::addAccess(LoadInst *LI) { 1684349cc55cSDimitry Andric visitPointers(LI->getPointerOperand(), *InnermostLoop, 1685349cc55cSDimitry Andric [this, LI](Value *Ptr) { 1686349cc55cSDimitry Andric Accesses[MemAccessInfo(Ptr, false)].push_back(AccessIdx); 1687349cc55cSDimitry Andric InstMap.push_back(LI); 1688349cc55cSDimitry Andric ++AccessIdx; 1689349cc55cSDimitry Andric }); 1690349cc55cSDimitry Andric } 1691349cc55cSDimitry Andric 16920b57cec5SDimitry Andric MemoryDepChecker::VectorizationSafetyStatus 16930b57cec5SDimitry Andric MemoryDepChecker::Dependence::isSafeForVectorization(DepType Type) { 16940b57cec5SDimitry Andric switch (Type) { 16950b57cec5SDimitry Andric case NoDep: 16960b57cec5SDimitry Andric case Forward: 16970b57cec5SDimitry Andric case BackwardVectorizable: 16980b57cec5SDimitry Andric return VectorizationSafetyStatus::Safe; 16990b57cec5SDimitry Andric 17000b57cec5SDimitry Andric case Unknown: 17010b57cec5SDimitry Andric return VectorizationSafetyStatus::PossiblySafeWithRtChecks; 17020b57cec5SDimitry Andric case ForwardButPreventsForwarding: 17030b57cec5SDimitry Andric case Backward: 17040b57cec5SDimitry Andric case BackwardVectorizableButPreventsForwarding: 17055f757f3fSDimitry Andric case IndirectUnsafe: 17060b57cec5SDimitry Andric return VectorizationSafetyStatus::Unsafe; 17070b57cec5SDimitry Andric } 17080b57cec5SDimitry Andric llvm_unreachable("unexpected DepType!"); 17090b57cec5SDimitry Andric } 17100b57cec5SDimitry Andric 17110b57cec5SDimitry Andric bool MemoryDepChecker::Dependence::isBackward() const { 17120b57cec5SDimitry Andric switch (Type) { 17130b57cec5SDimitry Andric case NoDep: 17140b57cec5SDimitry Andric case Forward: 17150b57cec5SDimitry Andric case ForwardButPreventsForwarding: 17160b57cec5SDimitry Andric case Unknown: 17175f757f3fSDimitry Andric case IndirectUnsafe: 17180b57cec5SDimitry Andric return false; 17190b57cec5SDimitry Andric 17200b57cec5SDimitry Andric case BackwardVectorizable: 17210b57cec5SDimitry Andric case Backward: 17220b57cec5SDimitry Andric case BackwardVectorizableButPreventsForwarding: 17230b57cec5SDimitry Andric return true; 17240b57cec5SDimitry Andric } 17250b57cec5SDimitry Andric llvm_unreachable("unexpected DepType!"); 17260b57cec5SDimitry Andric } 17270b57cec5SDimitry Andric 17280b57cec5SDimitry Andric bool MemoryDepChecker::Dependence::isPossiblyBackward() const { 17290fca6ea1SDimitry Andric return isBackward() || Type == Unknown || Type == IndirectUnsafe; 17300b57cec5SDimitry Andric } 17310b57cec5SDimitry Andric 17320b57cec5SDimitry Andric bool MemoryDepChecker::Dependence::isForward() const { 17330b57cec5SDimitry Andric switch (Type) { 17340b57cec5SDimitry Andric case Forward: 17350b57cec5SDimitry Andric case ForwardButPreventsForwarding: 17360b57cec5SDimitry Andric return true; 17370b57cec5SDimitry Andric 17380b57cec5SDimitry Andric case NoDep: 17390b57cec5SDimitry Andric case Unknown: 17400b57cec5SDimitry Andric case BackwardVectorizable: 17410b57cec5SDimitry Andric case Backward: 17420b57cec5SDimitry Andric case BackwardVectorizableButPreventsForwarding: 17435f757f3fSDimitry Andric case IndirectUnsafe: 17440b57cec5SDimitry Andric return false; 17450b57cec5SDimitry Andric } 17460b57cec5SDimitry Andric llvm_unreachable("unexpected DepType!"); 17470b57cec5SDimitry Andric } 17480b57cec5SDimitry Andric 17490b57cec5SDimitry Andric bool MemoryDepChecker::couldPreventStoreLoadForward(uint64_t Distance, 17500b57cec5SDimitry Andric uint64_t TypeByteSize) { 17510b57cec5SDimitry Andric // If loads occur at a distance that is not a multiple of a feasible vector 17520b57cec5SDimitry Andric // factor store-load forwarding does not take place. 17530b57cec5SDimitry Andric // Positive dependences might cause troubles because vectorizing them might 17540b57cec5SDimitry Andric // prevent store-load forwarding making vectorized code run a lot slower. 17550b57cec5SDimitry Andric // a[i] = a[i-3] ^ a[i-8]; 17560b57cec5SDimitry Andric // The stores to a[i:i+1] don't align with the stores to a[i-3:i-2] and 17570b57cec5SDimitry Andric // hence on your typical architecture store-load forwarding does not take 17580b57cec5SDimitry Andric // place. Vectorizing in such cases does not make sense. 17590b57cec5SDimitry Andric // Store-load forwarding distance. 17600b57cec5SDimitry Andric 17610b57cec5SDimitry Andric // After this many iterations store-to-load forwarding conflicts should not 17620b57cec5SDimitry Andric // cause any slowdowns. 17630b57cec5SDimitry Andric const uint64_t NumItersForStoreLoadThroughMemory = 8 * TypeByteSize; 17640b57cec5SDimitry Andric // Maximum vector factor. 17650b57cec5SDimitry Andric uint64_t MaxVFWithoutSLForwardIssues = std::min( 17665f757f3fSDimitry Andric VectorizerParams::MaxVectorWidth * TypeByteSize, MinDepDistBytes); 17670b57cec5SDimitry Andric 17680b57cec5SDimitry Andric // Compute the smallest VF at which the store and load would be misaligned. 17690b57cec5SDimitry Andric for (uint64_t VF = 2 * TypeByteSize; VF <= MaxVFWithoutSLForwardIssues; 17700b57cec5SDimitry Andric VF *= 2) { 17710b57cec5SDimitry Andric // If the number of vector iteration between the store and the load are 17720b57cec5SDimitry Andric // small we could incur conflicts. 17730b57cec5SDimitry Andric if (Distance % VF && Distance / VF < NumItersForStoreLoadThroughMemory) { 1774e8d8bef9SDimitry Andric MaxVFWithoutSLForwardIssues = (VF >> 1); 17750b57cec5SDimitry Andric break; 17760b57cec5SDimitry Andric } 17770b57cec5SDimitry Andric } 17780b57cec5SDimitry Andric 17790b57cec5SDimitry Andric if (MaxVFWithoutSLForwardIssues < 2 * TypeByteSize) { 17800b57cec5SDimitry Andric LLVM_DEBUG( 17810b57cec5SDimitry Andric dbgs() << "LAA: Distance " << Distance 17820b57cec5SDimitry Andric << " that could cause a store-load forwarding conflict\n"); 17830b57cec5SDimitry Andric return true; 17840b57cec5SDimitry Andric } 17850b57cec5SDimitry Andric 17865f757f3fSDimitry Andric if (MaxVFWithoutSLForwardIssues < MinDepDistBytes && 17870b57cec5SDimitry Andric MaxVFWithoutSLForwardIssues != 17880b57cec5SDimitry Andric VectorizerParams::MaxVectorWidth * TypeByteSize) 17895f757f3fSDimitry Andric MinDepDistBytes = MaxVFWithoutSLForwardIssues; 17900b57cec5SDimitry Andric return false; 17910b57cec5SDimitry Andric } 17920b57cec5SDimitry Andric 17930b57cec5SDimitry Andric void MemoryDepChecker::mergeInStatus(VectorizationSafetyStatus S) { 17940b57cec5SDimitry Andric if (Status < S) 17950b57cec5SDimitry Andric Status = S; 17960b57cec5SDimitry Andric } 17970b57cec5SDimitry Andric 1798bdd1243dSDimitry Andric /// Given a dependence-distance \p Dist between two 17990fca6ea1SDimitry Andric /// memory accesses, that have strides in the same direction whose absolute 18000fca6ea1SDimitry Andric /// value of the maximum stride is given in \p MaxStride, and that have the same 18010fca6ea1SDimitry Andric /// type size \p TypeByteSize, in a loop whose maximum backedge taken count is 18020fca6ea1SDimitry Andric /// \p MaxBTC, check if it is possible to prove statically that the dependence 18030fca6ea1SDimitry Andric /// distance is larger than the range that the accesses will travel through the 18040fca6ea1SDimitry Andric /// execution of the loop. If so, return true; false otherwise. This is useful 18050fca6ea1SDimitry Andric /// for example in loops such as the following (PR31098): 18060b57cec5SDimitry Andric /// for (i = 0; i < D; ++i) { 18070b57cec5SDimitry Andric /// = out[i]; 18080b57cec5SDimitry Andric /// out[i+D] = 18090b57cec5SDimitry Andric /// } 18100b57cec5SDimitry Andric static bool isSafeDependenceDistance(const DataLayout &DL, ScalarEvolution &SE, 18110fca6ea1SDimitry Andric const SCEV &MaxBTC, const SCEV &Dist, 18120fca6ea1SDimitry Andric uint64_t MaxStride, 18130b57cec5SDimitry Andric uint64_t TypeByteSize) { 18140b57cec5SDimitry Andric 18150b57cec5SDimitry Andric // If we can prove that 18160fca6ea1SDimitry Andric // (**) |Dist| > MaxBTC * Step 18170b57cec5SDimitry Andric // where Step is the absolute stride of the memory accesses in bytes, 18180b57cec5SDimitry Andric // then there is no dependence. 18190b57cec5SDimitry Andric // 18200b57cec5SDimitry Andric // Rationale: 18210b57cec5SDimitry Andric // We basically want to check if the absolute distance (|Dist/Step|) 18220fca6ea1SDimitry Andric // is >= the loop iteration count (or > MaxBTC). 18230b57cec5SDimitry Andric // This is equivalent to the Strong SIV Test (Practical Dependence Testing, 18240b57cec5SDimitry Andric // Section 4.2.1); Note, that for vectorization it is sufficient to prove 18250b57cec5SDimitry Andric // that the dependence distance is >= VF; This is checked elsewhere. 1826bdd1243dSDimitry Andric // But in some cases we can prune dependence distances early, and 18270b57cec5SDimitry Andric // even before selecting the VF, and without a runtime test, by comparing 18280b57cec5SDimitry Andric // the distance against the loop iteration count. Since the vectorized code 18290b57cec5SDimitry Andric // will be executed only if LoopCount >= VF, proving distance >= LoopCount 18300b57cec5SDimitry Andric // also guarantees that distance >= VF. 18310b57cec5SDimitry Andric // 18320fca6ea1SDimitry Andric const uint64_t ByteStride = MaxStride * TypeByteSize; 18330fca6ea1SDimitry Andric const SCEV *Step = SE.getConstant(MaxBTC.getType(), ByteStride); 18340fca6ea1SDimitry Andric const SCEV *Product = SE.getMulExpr(&MaxBTC, Step); 18350b57cec5SDimitry Andric 18360b57cec5SDimitry Andric const SCEV *CastedDist = &Dist; 18370b57cec5SDimitry Andric const SCEV *CastedProduct = Product; 183881ad6265SDimitry Andric uint64_t DistTypeSizeBits = DL.getTypeSizeInBits(Dist.getType()); 183981ad6265SDimitry Andric uint64_t ProductTypeSizeBits = DL.getTypeSizeInBits(Product->getType()); 18400b57cec5SDimitry Andric 18410b57cec5SDimitry Andric // The dependence distance can be positive/negative, so we sign extend Dist; 18420b57cec5SDimitry Andric // The multiplication of the absolute stride in bytes and the 18430b57cec5SDimitry Andric // backedgeTakenCount is non-negative, so we zero extend Product. 184481ad6265SDimitry Andric if (DistTypeSizeBits > ProductTypeSizeBits) 18450b57cec5SDimitry Andric CastedProduct = SE.getZeroExtendExpr(Product, Dist.getType()); 18460b57cec5SDimitry Andric else 18470b57cec5SDimitry Andric CastedDist = SE.getNoopOrSignExtend(&Dist, Product->getType()); 18480b57cec5SDimitry Andric 18490fca6ea1SDimitry Andric // Is Dist - (MaxBTC * Step) > 0 ? 18500b57cec5SDimitry Andric // (If so, then we have proven (**) because |Dist| >= Dist) 18510b57cec5SDimitry Andric const SCEV *Minus = SE.getMinusSCEV(CastedDist, CastedProduct); 18520b57cec5SDimitry Andric if (SE.isKnownPositive(Minus)) 18530b57cec5SDimitry Andric return true; 18540b57cec5SDimitry Andric 18550fca6ea1SDimitry Andric // Second try: Is -Dist - (MaxBTC * Step) > 0 ? 18560b57cec5SDimitry Andric // (If so, then we have proven (**) because |Dist| >= -1*Dist) 18570b57cec5SDimitry Andric const SCEV *NegDist = SE.getNegativeSCEV(CastedDist); 18580b57cec5SDimitry Andric Minus = SE.getMinusSCEV(NegDist, CastedProduct); 18590fca6ea1SDimitry Andric return SE.isKnownPositive(Minus); 18600b57cec5SDimitry Andric } 18610b57cec5SDimitry Andric 18620b57cec5SDimitry Andric /// Check the dependence for two accesses with the same stride \p Stride. 18630b57cec5SDimitry Andric /// \p Distance is the positive distance and \p TypeByteSize is type size in 18640b57cec5SDimitry Andric /// bytes. 18650b57cec5SDimitry Andric /// 18660b57cec5SDimitry Andric /// \returns true if they are independent. 18670b57cec5SDimitry Andric static bool areStridedAccessesIndependent(uint64_t Distance, uint64_t Stride, 18680b57cec5SDimitry Andric uint64_t TypeByteSize) { 18690b57cec5SDimitry Andric assert(Stride > 1 && "The stride must be greater than 1"); 18700b57cec5SDimitry Andric assert(TypeByteSize > 0 && "The type size in byte must be non-zero"); 18710b57cec5SDimitry Andric assert(Distance > 0 && "The distance must be non-zero"); 18720b57cec5SDimitry Andric 18730b57cec5SDimitry Andric // Skip if the distance is not multiple of type byte size. 18740b57cec5SDimitry Andric if (Distance % TypeByteSize) 18750b57cec5SDimitry Andric return false; 18760b57cec5SDimitry Andric 18770b57cec5SDimitry Andric uint64_t ScaledDist = Distance / TypeByteSize; 18780b57cec5SDimitry Andric 18790b57cec5SDimitry Andric // No dependence if the scaled distance is not multiple of the stride. 18800b57cec5SDimitry Andric // E.g. 18810b57cec5SDimitry Andric // for (i = 0; i < 1024 ; i += 4) 18820b57cec5SDimitry Andric // A[i+2] = A[i] + 1; 18830b57cec5SDimitry Andric // 18840b57cec5SDimitry Andric // Two accesses in memory (scaled distance is 2, stride is 4): 18850b57cec5SDimitry Andric // | A[0] | | | | A[4] | | | | 18860b57cec5SDimitry Andric // | | | A[2] | | | | A[6] | | 18870b57cec5SDimitry Andric // 18880b57cec5SDimitry Andric // E.g. 18890b57cec5SDimitry Andric // for (i = 0; i < 1024 ; i += 3) 18900b57cec5SDimitry Andric // A[i+4] = A[i] + 1; 18910b57cec5SDimitry Andric // 18920b57cec5SDimitry Andric // Two accesses in memory (scaled distance is 4, stride is 3): 18930b57cec5SDimitry Andric // | A[0] | | | A[3] | | | A[6] | | | 18940b57cec5SDimitry Andric // | | | | | A[4] | | | A[7] | | 18950b57cec5SDimitry Andric return ScaledDist % Stride; 18960b57cec5SDimitry Andric } 18970b57cec5SDimitry Andric 18980fca6ea1SDimitry Andric std::variant<MemoryDepChecker::Dependence::DepType, 18990fca6ea1SDimitry Andric MemoryDepChecker::DepDistanceStrideAndSizeInfo> 19000fca6ea1SDimitry Andric MemoryDepChecker::getDependenceDistanceStrideAndSize( 19015f757f3fSDimitry Andric const AccessAnalysis::MemAccessInfo &A, Instruction *AInst, 1902*62987288SDimitry Andric const AccessAnalysis::MemAccessInfo &B, Instruction *BInst) { 1903*62987288SDimitry Andric const auto &DL = InnermostLoop->getHeader()->getDataLayout(); 19045f757f3fSDimitry Andric auto &SE = *PSE.getSE(); 1905bdd1243dSDimitry Andric auto [APtr, AIsWrite] = A; 1906bdd1243dSDimitry Andric auto [BPtr, BIsWrite] = B; 19070b57cec5SDimitry Andric 19080b57cec5SDimitry Andric // Two reads are independent. 19090b57cec5SDimitry Andric if (!AIsWrite && !BIsWrite) 19105f757f3fSDimitry Andric return MemoryDepChecker::Dependence::NoDep; 19115f757f3fSDimitry Andric 19125f757f3fSDimitry Andric Type *ATy = getLoadStoreType(AInst); 19135f757f3fSDimitry Andric Type *BTy = getLoadStoreType(BInst); 19140b57cec5SDimitry Andric 19150b57cec5SDimitry Andric // We cannot check pointers in different address spaces. 19160b57cec5SDimitry Andric if (APtr->getType()->getPointerAddressSpace() != 19170b57cec5SDimitry Andric BPtr->getType()->getPointerAddressSpace()) 19185f757f3fSDimitry Andric return MemoryDepChecker::Dependence::Unknown; 19190b57cec5SDimitry Andric 1920*62987288SDimitry Andric std::optional<int64_t> StrideAPtr = 1921*62987288SDimitry Andric getPtrStride(PSE, ATy, APtr, InnermostLoop, SymbolicStrides, true, true); 1922*62987288SDimitry Andric std::optional<int64_t> StrideBPtr = 1923*62987288SDimitry Andric getPtrStride(PSE, BTy, BPtr, InnermostLoop, SymbolicStrides, true, true); 19240b57cec5SDimitry Andric 19250b57cec5SDimitry Andric const SCEV *Src = PSE.getSCEV(APtr); 19260b57cec5SDimitry Andric const SCEV *Sink = PSE.getSCEV(BPtr); 19270b57cec5SDimitry Andric 19280b57cec5SDimitry Andric // If the induction step is negative we have to invert source and sink of the 19295f757f3fSDimitry Andric // dependence when measuring the distance between them. We should not swap 19305f757f3fSDimitry Andric // AIsWrite with BIsWrite, as their uses expect them in program order. 1931*62987288SDimitry Andric if (StrideAPtr && *StrideAPtr < 0) { 19320b57cec5SDimitry Andric std::swap(Src, Sink); 19335f757f3fSDimitry Andric std::swap(AInst, BInst); 1934*62987288SDimitry Andric std::swap(StrideAPtr, StrideBPtr); 19350b57cec5SDimitry Andric } 19360b57cec5SDimitry Andric 1937bdd1243dSDimitry Andric const SCEV *Dist = SE.getMinusSCEV(Sink, Src); 19380b57cec5SDimitry Andric 19390b57cec5SDimitry Andric LLVM_DEBUG(dbgs() << "LAA: Src Scev: " << *Src << "Sink Scev: " << *Sink 1940*62987288SDimitry Andric << "\n"); 19415f757f3fSDimitry Andric LLVM_DEBUG(dbgs() << "LAA: Distance for " << *AInst << " to " << *BInst 19425f757f3fSDimitry Andric << ": " << *Dist << "\n"); 19435f757f3fSDimitry Andric 19440fca6ea1SDimitry Andric // Check if we can prove that Sink only accesses memory after Src's end or 19450fca6ea1SDimitry Andric // vice versa. At the moment this is limited to cases where either source or 19460fca6ea1SDimitry Andric // sink are loop invariant to avoid compile-time increases. This is not 19470fca6ea1SDimitry Andric // required for correctness. 19480fca6ea1SDimitry Andric if (SE.isLoopInvariant(Src, InnermostLoop) || 19490fca6ea1SDimitry Andric SE.isLoopInvariant(Sink, InnermostLoop)) { 19500fca6ea1SDimitry Andric const auto &[SrcStart, SrcEnd] = 19510fca6ea1SDimitry Andric getStartAndEndForAccess(InnermostLoop, Src, ATy, PSE, PointerBounds); 19520fca6ea1SDimitry Andric const auto &[SinkStart, SinkEnd] = 19530fca6ea1SDimitry Andric getStartAndEndForAccess(InnermostLoop, Sink, BTy, PSE, PointerBounds); 19540fca6ea1SDimitry Andric if (!isa<SCEVCouldNotCompute>(SrcStart) && 19550fca6ea1SDimitry Andric !isa<SCEVCouldNotCompute>(SrcEnd) && 19560fca6ea1SDimitry Andric !isa<SCEVCouldNotCompute>(SinkStart) && 19570fca6ea1SDimitry Andric !isa<SCEVCouldNotCompute>(SinkEnd)) { 19580fca6ea1SDimitry Andric if (SE.isKnownPredicate(CmpInst::ICMP_ULE, SrcEnd, SinkStart)) 19590fca6ea1SDimitry Andric return MemoryDepChecker::Dependence::NoDep; 19600fca6ea1SDimitry Andric if (SE.isKnownPredicate(CmpInst::ICMP_ULE, SinkEnd, SrcStart)) 19610fca6ea1SDimitry Andric return MemoryDepChecker::Dependence::NoDep; 19620fca6ea1SDimitry Andric } 19630fca6ea1SDimitry Andric } 19640fca6ea1SDimitry Andric 1965*62987288SDimitry Andric // Need accesses with constant strides and the same direction for further 1966*62987288SDimitry Andric // dependence analysis. We don't want to vectorize "A[B[i]] += ..." and 1967*62987288SDimitry Andric // similar code or pointer arithmetic that could wrap in the address space. 1968*62987288SDimitry Andric 1969*62987288SDimitry Andric // If either Src or Sink are not strided (i.e. not a non-wrapping AddRec) and 1970*62987288SDimitry Andric // not loop-invariant (stride will be 0 in that case), we cannot analyze the 1971*62987288SDimitry Andric // dependence further and also cannot generate runtime checks. 1972*62987288SDimitry Andric if (!StrideAPtr || !StrideBPtr) { 19730b57cec5SDimitry Andric LLVM_DEBUG(dbgs() << "Pointer access with non-constant stride\n"); 1974*62987288SDimitry Andric return MemoryDepChecker::Dependence::IndirectUnsafe; 1975*62987288SDimitry Andric } 1976*62987288SDimitry Andric 1977*62987288SDimitry Andric int64_t StrideAPtrInt = *StrideAPtr; 1978*62987288SDimitry Andric int64_t StrideBPtrInt = *StrideBPtr; 1979*62987288SDimitry Andric LLVM_DEBUG(dbgs() << "LAA: Src induction step: " << StrideAPtrInt 1980*62987288SDimitry Andric << " Sink induction step: " << StrideBPtrInt << "\n"); 1981*62987288SDimitry Andric // At least Src or Sink are loop invariant and the other is strided or 1982*62987288SDimitry Andric // invariant. We can generate a runtime check to disambiguate the accesses. 1983*62987288SDimitry Andric if (StrideAPtrInt == 0 || StrideBPtrInt == 0) 1984*62987288SDimitry Andric return MemoryDepChecker::Dependence::Unknown; 1985*62987288SDimitry Andric 1986*62987288SDimitry Andric // Both Src and Sink have a constant stride, check if they are in the same 1987*62987288SDimitry Andric // direction. 1988*62987288SDimitry Andric if ((StrideAPtrInt > 0 && StrideBPtrInt < 0) || 1989*62987288SDimitry Andric (StrideAPtrInt < 0 && StrideBPtrInt > 0)) { 1990*62987288SDimitry Andric LLVM_DEBUG( 1991*62987288SDimitry Andric dbgs() << "Pointer access with strides in different directions\n"); 19925f757f3fSDimitry Andric return MemoryDepChecker::Dependence::Unknown; 19930b57cec5SDimitry Andric } 19940b57cec5SDimitry Andric 19950b57cec5SDimitry Andric uint64_t TypeByteSize = DL.getTypeAllocSize(ATy); 19960eae32dcSDimitry Andric bool HasSameSize = 19970eae32dcSDimitry Andric DL.getTypeStoreSizeInBits(ATy) == DL.getTypeStoreSizeInBits(BTy); 19985f757f3fSDimitry Andric if (!HasSameSize) 19995f757f3fSDimitry Andric TypeByteSize = 0; 2000*62987288SDimitry Andric return DepDistanceStrideAndSizeInfo(Dist, std::abs(StrideAPtrInt), 2001*62987288SDimitry Andric std::abs(StrideBPtrInt), TypeByteSize, 20020fca6ea1SDimitry Andric AIsWrite, BIsWrite); 20035f757f3fSDimitry Andric } 2004bdd1243dSDimitry Andric 2005*62987288SDimitry Andric MemoryDepChecker::Dependence::DepType 2006*62987288SDimitry Andric MemoryDepChecker::isDependent(const MemAccessInfo &A, unsigned AIdx, 2007*62987288SDimitry Andric const MemAccessInfo &B, unsigned BIdx) { 20085f757f3fSDimitry Andric assert(AIdx < BIdx && "Must pass arguments in program order"); 20095f757f3fSDimitry Andric 20105f757f3fSDimitry Andric // Get the dependence distance, stride, type size and what access writes for 20115f757f3fSDimitry Andric // the dependence between A and B. 2012*62987288SDimitry Andric auto Res = 2013*62987288SDimitry Andric getDependenceDistanceStrideAndSize(A, InstMap[AIdx], B, InstMap[BIdx]); 20145f757f3fSDimitry Andric if (std::holds_alternative<Dependence::DepType>(Res)) 20155f757f3fSDimitry Andric return std::get<Dependence::DepType>(Res); 20165f757f3fSDimitry Andric 20170fca6ea1SDimitry Andric auto &[Dist, StrideA, StrideB, TypeByteSize, AIsWrite, BIsWrite] = 20180fca6ea1SDimitry Andric std::get<DepDistanceStrideAndSizeInfo>(Res); 20195f757f3fSDimitry Andric bool HasSameSize = TypeByteSize > 0; 20205f757f3fSDimitry Andric 20210fca6ea1SDimitry Andric std::optional<uint64_t> CommonStride = 20220fca6ea1SDimitry Andric StrideA == StrideB ? std::make_optional(StrideA) : std::nullopt; 20230fca6ea1SDimitry Andric if (isa<SCEVCouldNotCompute>(Dist)) { 20240fca6ea1SDimitry Andric // TODO: Relax requirement that there is a common stride to retry with 20250fca6ea1SDimitry Andric // non-constant distance dependencies. 20260fca6ea1SDimitry Andric FoundNonConstantDistanceDependence |= CommonStride.has_value(); 20270fca6ea1SDimitry Andric LLVM_DEBUG(dbgs() << "LAA: Dependence because of uncomputable distance.\n"); 20280b57cec5SDimitry Andric return Dependence::Unknown; 20290b57cec5SDimitry Andric } 20300b57cec5SDimitry Andric 20310fca6ea1SDimitry Andric ScalarEvolution &SE = *PSE.getSE(); 20320fca6ea1SDimitry Andric auto &DL = InnermostLoop->getHeader()->getDataLayout(); 20330fca6ea1SDimitry Andric uint64_t MaxStride = std::max(StrideA, StrideB); 20340fca6ea1SDimitry Andric 20350fca6ea1SDimitry Andric // If the distance between the acecsses is larger than their maximum absolute 20360fca6ea1SDimitry Andric // stride multiplied by the symbolic maximum backedge taken count (which is an 20370fca6ea1SDimitry Andric // upper bound of the number of iterations), the accesses are independet, i.e. 20380fca6ea1SDimitry Andric // they are far enough appart that accesses won't access the same location 20390fca6ea1SDimitry Andric // across all loop ierations. 20400fca6ea1SDimitry Andric if (HasSameSize && isSafeDependenceDistance( 20410fca6ea1SDimitry Andric DL, SE, *(PSE.getSymbolicMaxBackedgeTakenCount()), 20420fca6ea1SDimitry Andric *Dist, MaxStride, TypeByteSize)) 20430fca6ea1SDimitry Andric return Dependence::NoDep; 20440fca6ea1SDimitry Andric 20450fca6ea1SDimitry Andric const SCEVConstant *C = dyn_cast<SCEVConstant>(Dist); 20460fca6ea1SDimitry Andric 20470fca6ea1SDimitry Andric // Attempt to prove strided accesses independent. 20480fca6ea1SDimitry Andric if (C) { 20490b57cec5SDimitry Andric const APInt &Val = C->getAPInt(); 20500b57cec5SDimitry Andric int64_t Distance = Val.getSExtValue(); 20510b57cec5SDimitry Andric 20520fca6ea1SDimitry Andric // If the distance between accesses and their strides are known constants, 20530fca6ea1SDimitry Andric // check whether the accesses interlace each other. 20540fca6ea1SDimitry Andric if (std::abs(Distance) > 0 && CommonStride && *CommonStride > 1 && 20550fca6ea1SDimitry Andric HasSameSize && 20560fca6ea1SDimitry Andric areStridedAccessesIndependent(std::abs(Distance), *CommonStride, 20570fca6ea1SDimitry Andric TypeByteSize)) { 20580b57cec5SDimitry Andric LLVM_DEBUG(dbgs() << "LAA: Strided accesses are independent\n"); 20590b57cec5SDimitry Andric return Dependence::NoDep; 20600b57cec5SDimitry Andric } 20610fca6ea1SDimitry Andric } else 20620fca6ea1SDimitry Andric Dist = SE.applyLoopGuards(Dist, InnermostLoop); 20630b57cec5SDimitry Andric 20640b57cec5SDimitry Andric // Negative distances are not plausible dependencies. 20650fca6ea1SDimitry Andric if (SE.isKnownNonPositive(Dist)) { 20660fca6ea1SDimitry Andric if (SE.isKnownNonNegative(Dist)) { 20670fca6ea1SDimitry Andric if (HasSameSize) { 20680fca6ea1SDimitry Andric // Write to the same location with the same size. 20690fca6ea1SDimitry Andric return Dependence::Forward; 20700fca6ea1SDimitry Andric } 20710fca6ea1SDimitry Andric LLVM_DEBUG(dbgs() << "LAA: possibly zero dependence difference but " 20720fca6ea1SDimitry Andric "different type sizes\n"); 20730fca6ea1SDimitry Andric return Dependence::Unknown; 20740fca6ea1SDimitry Andric } 20750fca6ea1SDimitry Andric 20760b57cec5SDimitry Andric bool IsTrueDataDependence = (AIsWrite && !BIsWrite); 20770fca6ea1SDimitry Andric // Check if the first access writes to a location that is read in a later 20780fca6ea1SDimitry Andric // iteration, where the distance between them is not a multiple of a vector 20790fca6ea1SDimitry Andric // factor and relatively small. 20800fca6ea1SDimitry Andric // 20810fca6ea1SDimitry Andric // NOTE: There is no need to update MaxSafeVectorWidthInBits after call to 20820fca6ea1SDimitry Andric // couldPreventStoreLoadForward, even if it changed MinDepDistBytes, since a 20830fca6ea1SDimitry Andric // forward dependency will allow vectorization using any width. 20840fca6ea1SDimitry Andric 20850fca6ea1SDimitry Andric if (IsTrueDataDependence && EnableForwardingConflictDetection) { 20860fca6ea1SDimitry Andric if (!C) { 20870fca6ea1SDimitry Andric // TODO: FoundNonConstantDistanceDependence is used as a necessary 20880fca6ea1SDimitry Andric // condition to consider retrying with runtime checks. Historically, we 20890fca6ea1SDimitry Andric // did not set it when strides were different but there is no inherent 20900fca6ea1SDimitry Andric // reason to. 20910fca6ea1SDimitry Andric FoundNonConstantDistanceDependence |= CommonStride.has_value(); 20920fca6ea1SDimitry Andric return Dependence::Unknown; 20930fca6ea1SDimitry Andric } 20940fca6ea1SDimitry Andric if (!HasSameSize || 20950fca6ea1SDimitry Andric couldPreventStoreLoadForward(C->getAPInt().abs().getZExtValue(), 20960fca6ea1SDimitry Andric TypeByteSize)) { 20970fca6ea1SDimitry Andric LLVM_DEBUG( 20980fca6ea1SDimitry Andric dbgs() << "LAA: Forward but may prevent st->ld forwarding\n"); 20990b57cec5SDimitry Andric return Dependence::ForwardButPreventsForwarding; 21000b57cec5SDimitry Andric } 21010fca6ea1SDimitry Andric } 21020b57cec5SDimitry Andric 21030b57cec5SDimitry Andric LLVM_DEBUG(dbgs() << "LAA: Dependence is negative\n"); 21040b57cec5SDimitry Andric return Dependence::Forward; 21050b57cec5SDimitry Andric } 21060b57cec5SDimitry Andric 21070fca6ea1SDimitry Andric int64_t MinDistance = SE.getSignedRangeMin(Dist).getSExtValue(); 21080fca6ea1SDimitry Andric // Below we only handle strictly positive distances. 21090fca6ea1SDimitry Andric if (MinDistance <= 0) { 21100fca6ea1SDimitry Andric FoundNonConstantDistanceDependence |= CommonStride.has_value(); 21110b57cec5SDimitry Andric return Dependence::Unknown; 21120b57cec5SDimitry Andric } 21130b57cec5SDimitry Andric 21140fca6ea1SDimitry Andric if (!isa<SCEVConstant>(Dist)) { 21150fca6ea1SDimitry Andric // Previously this case would be treated as Unknown, possibly setting 21160fca6ea1SDimitry Andric // FoundNonConstantDistanceDependence to force re-trying with runtime 21170fca6ea1SDimitry Andric // checks. Until the TODO below is addressed, set it here to preserve 21180fca6ea1SDimitry Andric // original behavior w.r.t. re-trying with runtime checks. 21190fca6ea1SDimitry Andric // TODO: FoundNonConstantDistanceDependence is used as a necessary 21200fca6ea1SDimitry Andric // condition to consider retrying with runtime checks. Historically, we 21210fca6ea1SDimitry Andric // did not set it when strides were different but there is no inherent 21220fca6ea1SDimitry Andric // reason to. 21230fca6ea1SDimitry Andric FoundNonConstantDistanceDependence |= CommonStride.has_value(); 21240fca6ea1SDimitry Andric } 21250b57cec5SDimitry Andric 21260eae32dcSDimitry Andric if (!HasSameSize) { 21270eae32dcSDimitry Andric LLVM_DEBUG(dbgs() << "LAA: ReadWrite-Write positive dependency with " 21280eae32dcSDimitry Andric "different type sizes\n"); 21290b57cec5SDimitry Andric return Dependence::Unknown; 21300b57cec5SDimitry Andric } 21310b57cec5SDimitry Andric 21320fca6ea1SDimitry Andric if (!CommonStride) 21330fca6ea1SDimitry Andric return Dependence::Unknown; 21340fca6ea1SDimitry Andric 21350b57cec5SDimitry Andric // Bail out early if passed-in parameters make vectorization not feasible. 21360b57cec5SDimitry Andric unsigned ForcedFactor = (VectorizerParams::VectorizationFactor ? 21370b57cec5SDimitry Andric VectorizerParams::VectorizationFactor : 1); 21380b57cec5SDimitry Andric unsigned ForcedUnroll = (VectorizerParams::VectorizationInterleave ? 21390b57cec5SDimitry Andric VectorizerParams::VectorizationInterleave : 1); 21400b57cec5SDimitry Andric // The minimum number of iterations for a vectorized/unrolled version. 21410b57cec5SDimitry Andric unsigned MinNumIter = std::max(ForcedFactor * ForcedUnroll, 2U); 21420b57cec5SDimitry Andric 21430b57cec5SDimitry Andric // It's not vectorizable if the distance is smaller than the minimum distance 21440b57cec5SDimitry Andric // needed for a vectroized/unrolled version. Vectorizing one iteration in 21450b57cec5SDimitry Andric // front needs TypeByteSize * Stride. Vectorizing the last iteration needs 21460b57cec5SDimitry Andric // TypeByteSize (No need to plus the last gap distance). 21470b57cec5SDimitry Andric // 21480b57cec5SDimitry Andric // E.g. Assume one char is 1 byte in memory and one int is 4 bytes. 21490b57cec5SDimitry Andric // foo(int *A) { 21500b57cec5SDimitry Andric // int *B = (int *)((char *)A + 14); 21510b57cec5SDimitry Andric // for (i = 0 ; i < 1024 ; i += 2) 21520b57cec5SDimitry Andric // B[i] = A[i] + 1; 21530b57cec5SDimitry Andric // } 21540b57cec5SDimitry Andric // 21550b57cec5SDimitry Andric // Two accesses in memory (stride is 2): 21560b57cec5SDimitry Andric // | A[0] | | A[2] | | A[4] | | A[6] | | 21570b57cec5SDimitry Andric // | B[0] | | B[2] | | B[4] | 21580b57cec5SDimitry Andric // 21590fca6ea1SDimitry Andric // MinDistance needs for vectorizing iterations except the last iteration: 21600fca6ea1SDimitry Andric // 4 * 2 * (MinNumIter - 1). MinDistance needs for the last iteration: 4. 21610b57cec5SDimitry Andric // So the minimum distance needed is: 4 * 2 * (MinNumIter - 1) + 4. 21620b57cec5SDimitry Andric // 21630b57cec5SDimitry Andric // If MinNumIter is 2, it is vectorizable as the minimum distance needed is 21640b57cec5SDimitry Andric // 12, which is less than distance. 21650b57cec5SDimitry Andric // 21660b57cec5SDimitry Andric // If MinNumIter is 4 (Say if a user forces the vectorization factor to be 4), 21670b57cec5SDimitry Andric // the minimum distance needed is 28, which is greater than distance. It is 21680b57cec5SDimitry Andric // not safe to do vectorization. 21690fca6ea1SDimitry Andric 21700fca6ea1SDimitry Andric // We know that Dist is positive, but it may not be constant. Use the signed 21710fca6ea1SDimitry Andric // minimum for computations below, as this ensures we compute the closest 21720fca6ea1SDimitry Andric // possible dependence distance. 21730b57cec5SDimitry Andric uint64_t MinDistanceNeeded = 21740fca6ea1SDimitry Andric TypeByteSize * *CommonStride * (MinNumIter - 1) + TypeByteSize; 21750fca6ea1SDimitry Andric if (MinDistanceNeeded > static_cast<uint64_t>(MinDistance)) { 21760fca6ea1SDimitry Andric if (!isa<SCEVConstant>(Dist)) { 21770fca6ea1SDimitry Andric // For non-constant distances, we checked the lower bound of the 21780fca6ea1SDimitry Andric // dependence distance and the distance may be larger at runtime (and safe 21790fca6ea1SDimitry Andric // for vectorization). Classify it as Unknown, so we re-try with runtime 21800fca6ea1SDimitry Andric // checks. 21810fca6ea1SDimitry Andric return Dependence::Unknown; 21820fca6ea1SDimitry Andric } 21830fca6ea1SDimitry Andric LLVM_DEBUG(dbgs() << "LAA: Failure because of positive minimum distance " 21840fca6ea1SDimitry Andric << MinDistance << '\n'); 21850b57cec5SDimitry Andric return Dependence::Backward; 21860b57cec5SDimitry Andric } 21870b57cec5SDimitry Andric 21885f757f3fSDimitry Andric // Unsafe if the minimum distance needed is greater than smallest dependence 21895f757f3fSDimitry Andric // distance distance. 21905f757f3fSDimitry Andric if (MinDistanceNeeded > MinDepDistBytes) { 21910b57cec5SDimitry Andric LLVM_DEBUG(dbgs() << "LAA: Failure because it needs at least " 2192bdd1243dSDimitry Andric << MinDistanceNeeded << " size in bytes\n"); 21930b57cec5SDimitry Andric return Dependence::Backward; 21940b57cec5SDimitry Andric } 21950b57cec5SDimitry Andric 21960b57cec5SDimitry Andric // Positive distance bigger than max vectorization factor. 21970b57cec5SDimitry Andric // FIXME: Should use max factor instead of max distance in bytes, which could 21980b57cec5SDimitry Andric // not handle different types. 21990b57cec5SDimitry Andric // E.g. Assume one char is 1 byte in memory and one int is 4 bytes. 22000b57cec5SDimitry Andric // void foo (int *A, char *B) { 22010b57cec5SDimitry Andric // for (unsigned i = 0; i < 1024; i++) { 22020b57cec5SDimitry Andric // A[i+2] = A[i] + 1; 22030b57cec5SDimitry Andric // B[i+2] = B[i] + 1; 22040b57cec5SDimitry Andric // } 22050b57cec5SDimitry Andric // } 22060b57cec5SDimitry Andric // 22070b57cec5SDimitry Andric // This case is currently unsafe according to the max safe distance. If we 22080b57cec5SDimitry Andric // analyze the two accesses on array B, the max safe dependence distance 22090b57cec5SDimitry Andric // is 2. Then we analyze the accesses on array A, the minimum distance needed 22100b57cec5SDimitry Andric // is 8, which is less than 2 and forbidden vectorization, But actually 22110b57cec5SDimitry Andric // both A and B could be vectorized by 2 iterations. 22125f757f3fSDimitry Andric MinDepDistBytes = 22130fca6ea1SDimitry Andric std::min(static_cast<uint64_t>(MinDistance), MinDepDistBytes); 22140b57cec5SDimitry Andric 22150b57cec5SDimitry Andric bool IsTrueDataDependence = (!AIsWrite && BIsWrite); 22165f757f3fSDimitry Andric uint64_t MinDepDistBytesOld = MinDepDistBytes; 22170b57cec5SDimitry Andric if (IsTrueDataDependence && EnableForwardingConflictDetection && 22180fca6ea1SDimitry Andric isa<SCEVConstant>(Dist) && 22190fca6ea1SDimitry Andric couldPreventStoreLoadForward(MinDistance, TypeByteSize)) { 22205f757f3fSDimitry Andric // Sanity check that we didn't update MinDepDistBytes when calling 22215f757f3fSDimitry Andric // couldPreventStoreLoadForward 22225f757f3fSDimitry Andric assert(MinDepDistBytes == MinDepDistBytesOld && 22235f757f3fSDimitry Andric "An update to MinDepDistBytes requires an update to " 22245f757f3fSDimitry Andric "MaxSafeVectorWidthInBits"); 22255f757f3fSDimitry Andric (void)MinDepDistBytesOld; 22260b57cec5SDimitry Andric return Dependence::BackwardVectorizableButPreventsForwarding; 22275f757f3fSDimitry Andric } 22280b57cec5SDimitry Andric 22295f757f3fSDimitry Andric // An update to MinDepDistBytes requires an update to MaxSafeVectorWidthInBits 22305f757f3fSDimitry Andric // since there is a backwards dependency. 22310fca6ea1SDimitry Andric uint64_t MaxVF = MinDepDistBytes / (TypeByteSize * *CommonStride); 22320fca6ea1SDimitry Andric LLVM_DEBUG(dbgs() << "LAA: Positive min distance " << MinDistance 22330b57cec5SDimitry Andric << " with max VF = " << MaxVF << '\n'); 22340fca6ea1SDimitry Andric 22350b57cec5SDimitry Andric uint64_t MaxVFInBits = MaxVF * TypeByteSize * 8; 22360fca6ea1SDimitry Andric if (!isa<SCEVConstant>(Dist) && MaxVFInBits < MaxTargetVectorWidthInBits) { 22370fca6ea1SDimitry Andric // For non-constant distances, we checked the lower bound of the dependence 22380fca6ea1SDimitry Andric // distance and the distance may be larger at runtime (and safe for 22390fca6ea1SDimitry Andric // vectorization). Classify it as Unknown, so we re-try with runtime checks. 22400fca6ea1SDimitry Andric return Dependence::Unknown; 22410fca6ea1SDimitry Andric } 22420fca6ea1SDimitry Andric 2243e8d8bef9SDimitry Andric MaxSafeVectorWidthInBits = std::min(MaxSafeVectorWidthInBits, MaxVFInBits); 22440b57cec5SDimitry Andric return Dependence::BackwardVectorizable; 22450b57cec5SDimitry Andric } 22460b57cec5SDimitry Andric 2247*62987288SDimitry Andric bool MemoryDepChecker::areDepsSafe(const DepCandidates &AccessSets, 2248*62987288SDimitry Andric const MemAccessInfoList &CheckDeps) { 22490b57cec5SDimitry Andric 22505f757f3fSDimitry Andric MinDepDistBytes = -1; 22510b57cec5SDimitry Andric SmallPtrSet<MemAccessInfo, 8> Visited; 22520b57cec5SDimitry Andric for (MemAccessInfo CurAccess : CheckDeps) { 22530b57cec5SDimitry Andric if (Visited.count(CurAccess)) 22540b57cec5SDimitry Andric continue; 22550b57cec5SDimitry Andric 22560b57cec5SDimitry Andric // Get the relevant memory access set. 22570b57cec5SDimitry Andric EquivalenceClasses<MemAccessInfo>::iterator I = 22580b57cec5SDimitry Andric AccessSets.findValue(AccessSets.getLeaderValue(CurAccess)); 22590b57cec5SDimitry Andric 22600b57cec5SDimitry Andric // Check accesses within this set. 22610b57cec5SDimitry Andric EquivalenceClasses<MemAccessInfo>::member_iterator AI = 22620b57cec5SDimitry Andric AccessSets.member_begin(I); 22630b57cec5SDimitry Andric EquivalenceClasses<MemAccessInfo>::member_iterator AE = 22640b57cec5SDimitry Andric AccessSets.member_end(); 22650b57cec5SDimitry Andric 22660b57cec5SDimitry Andric // Check every access pair. 22670b57cec5SDimitry Andric while (AI != AE) { 22680b57cec5SDimitry Andric Visited.insert(*AI); 22698bcb0991SDimitry Andric bool AIIsWrite = AI->getInt(); 22708bcb0991SDimitry Andric // Check loads only against next equivalent class, but stores also against 22718bcb0991SDimitry Andric // other stores in the same equivalence class - to the same address. 22728bcb0991SDimitry Andric EquivalenceClasses<MemAccessInfo>::member_iterator OI = 22738bcb0991SDimitry Andric (AIIsWrite ? AI : std::next(AI)); 22740b57cec5SDimitry Andric while (OI != AE) { 22750b57cec5SDimitry Andric // Check every accessing instruction pair in program order. 22760b57cec5SDimitry Andric for (std::vector<unsigned>::iterator I1 = Accesses[*AI].begin(), 22770b57cec5SDimitry Andric I1E = Accesses[*AI].end(); I1 != I1E; ++I1) 22788bcb0991SDimitry Andric // Scan all accesses of another equivalence class, but only the next 22798bcb0991SDimitry Andric // accesses of the same equivalent class. 22808bcb0991SDimitry Andric for (std::vector<unsigned>::iterator 22818bcb0991SDimitry Andric I2 = (OI == AI ? std::next(I1) : Accesses[*OI].begin()), 22828bcb0991SDimitry Andric I2E = (OI == AI ? I1E : Accesses[*OI].end()); 22838bcb0991SDimitry Andric I2 != I2E; ++I2) { 22840b57cec5SDimitry Andric auto A = std::make_pair(&*AI, *I1); 22850b57cec5SDimitry Andric auto B = std::make_pair(&*OI, *I2); 22860b57cec5SDimitry Andric 22870b57cec5SDimitry Andric assert(*I1 != *I2); 22880b57cec5SDimitry Andric if (*I1 > *I2) 22890b57cec5SDimitry Andric std::swap(A, B); 22900b57cec5SDimitry Andric 2291*62987288SDimitry Andric Dependence::DepType Type = 2292*62987288SDimitry Andric isDependent(*A.first, A.second, *B.first, B.second); 22930b57cec5SDimitry Andric mergeInStatus(Dependence::isSafeForVectorization(Type)); 22940b57cec5SDimitry Andric 22950b57cec5SDimitry Andric // Gather dependences unless we accumulated MaxDependences 22960b57cec5SDimitry Andric // dependences. In that case return as soon as we find the first 22970b57cec5SDimitry Andric // unsafe dependence. This puts a limit on this quadratic 22980b57cec5SDimitry Andric // algorithm. 22990b57cec5SDimitry Andric if (RecordDependences) { 23000b57cec5SDimitry Andric if (Type != Dependence::NoDep) 23010b57cec5SDimitry Andric Dependences.push_back(Dependence(A.second, B.second, Type)); 23020b57cec5SDimitry Andric 23030b57cec5SDimitry Andric if (Dependences.size() >= MaxDependences) { 23040b57cec5SDimitry Andric RecordDependences = false; 23050b57cec5SDimitry Andric Dependences.clear(); 23060b57cec5SDimitry Andric LLVM_DEBUG(dbgs() 23070b57cec5SDimitry Andric << "Too many dependences, stopped recording\n"); 23080b57cec5SDimitry Andric } 23090b57cec5SDimitry Andric } 23100b57cec5SDimitry Andric if (!RecordDependences && !isSafeForVectorization()) 23110b57cec5SDimitry Andric return false; 23120b57cec5SDimitry Andric } 23130b57cec5SDimitry Andric ++OI; 23140b57cec5SDimitry Andric } 23150fca6ea1SDimitry Andric ++AI; 23160b57cec5SDimitry Andric } 23170b57cec5SDimitry Andric } 23180b57cec5SDimitry Andric 23190b57cec5SDimitry Andric LLVM_DEBUG(dbgs() << "Total Dependences: " << Dependences.size() << "\n"); 23200b57cec5SDimitry Andric return isSafeForVectorization(); 23210b57cec5SDimitry Andric } 23220b57cec5SDimitry Andric 23230b57cec5SDimitry Andric SmallVector<Instruction *, 4> 23240fca6ea1SDimitry Andric MemoryDepChecker::getInstructionsForAccess(Value *Ptr, bool IsWrite) const { 23250fca6ea1SDimitry Andric MemAccessInfo Access(Ptr, IsWrite); 23260b57cec5SDimitry Andric auto &IndexVector = Accesses.find(Access)->second; 23270b57cec5SDimitry Andric 23280b57cec5SDimitry Andric SmallVector<Instruction *, 4> Insts; 23290b57cec5SDimitry Andric transform(IndexVector, 23300b57cec5SDimitry Andric std::back_inserter(Insts), 23310b57cec5SDimitry Andric [&](unsigned Idx) { return this->InstMap[Idx]; }); 23320b57cec5SDimitry Andric return Insts; 23330b57cec5SDimitry Andric } 23340b57cec5SDimitry Andric 23350b57cec5SDimitry Andric const char *MemoryDepChecker::Dependence::DepName[] = { 23365f757f3fSDimitry Andric "NoDep", 23375f757f3fSDimitry Andric "Unknown", 23380fca6ea1SDimitry Andric "IndirectUnsafe", 23395f757f3fSDimitry Andric "Forward", 23405f757f3fSDimitry Andric "ForwardButPreventsForwarding", 23415f757f3fSDimitry Andric "Backward", 23425f757f3fSDimitry Andric "BackwardVectorizable", 23435f757f3fSDimitry Andric "BackwardVectorizableButPreventsForwarding"}; 23440b57cec5SDimitry Andric 23450b57cec5SDimitry Andric void MemoryDepChecker::Dependence::print( 23460b57cec5SDimitry Andric raw_ostream &OS, unsigned Depth, 23470b57cec5SDimitry Andric const SmallVectorImpl<Instruction *> &Instrs) const { 23480b57cec5SDimitry Andric OS.indent(Depth) << DepName[Type] << ":\n"; 23490b57cec5SDimitry Andric OS.indent(Depth + 2) << *Instrs[Source] << " -> \n"; 23500b57cec5SDimitry Andric OS.indent(Depth + 2) << *Instrs[Destination] << "\n"; 23510b57cec5SDimitry Andric } 23520b57cec5SDimitry Andric 23530b57cec5SDimitry Andric bool LoopAccessInfo::canAnalyzeLoop() { 23540b57cec5SDimitry Andric // We need to have a loop header. 23550fca6ea1SDimitry Andric LLVM_DEBUG(dbgs() << "\nLAA: Checking a loop in '" 23560fca6ea1SDimitry Andric << TheLoop->getHeader()->getParent()->getName() << "' from " 23570fca6ea1SDimitry Andric << TheLoop->getLocStr() << "\n"); 23580b57cec5SDimitry Andric 23590b57cec5SDimitry Andric // We can only analyze innermost loops. 2360e8d8bef9SDimitry Andric if (!TheLoop->isInnermost()) { 23610b57cec5SDimitry Andric LLVM_DEBUG(dbgs() << "LAA: loop is not the innermost loop\n"); 23620b57cec5SDimitry Andric recordAnalysis("NotInnerMostLoop") << "loop is not the innermost loop"; 23630b57cec5SDimitry Andric return false; 23640b57cec5SDimitry Andric } 23650b57cec5SDimitry Andric 23660b57cec5SDimitry Andric // We must have a single backedge. 23670b57cec5SDimitry Andric if (TheLoop->getNumBackEdges() != 1) { 23680b57cec5SDimitry Andric LLVM_DEBUG( 23690b57cec5SDimitry Andric dbgs() << "LAA: loop control flow is not understood by analyzer\n"); 23700b57cec5SDimitry Andric recordAnalysis("CFGNotUnderstood") 23710b57cec5SDimitry Andric << "loop control flow is not understood by analyzer"; 23720b57cec5SDimitry Andric return false; 23730b57cec5SDimitry Andric } 23740b57cec5SDimitry Andric 23750fca6ea1SDimitry Andric // ScalarEvolution needs to be able to find the symbolic max backedge taken 23760fca6ea1SDimitry Andric // count, which is an upper bound on the number of loop iterations. The loop 23770fca6ea1SDimitry Andric // may execute fewer iterations, if it exits via an uncountable exit. 23780fca6ea1SDimitry Andric const SCEV *ExitCount = PSE->getSymbolicMaxBackedgeTakenCount(); 2379e8d8bef9SDimitry Andric if (isa<SCEVCouldNotCompute>(ExitCount)) { 23800b57cec5SDimitry Andric recordAnalysis("CantComputeNumberOfIterations") 23810b57cec5SDimitry Andric << "could not determine number of loop iterations"; 23820b57cec5SDimitry Andric LLVM_DEBUG(dbgs() << "LAA: SCEV could not compute the loop exit count.\n"); 23830b57cec5SDimitry Andric return false; 23840b57cec5SDimitry Andric } 23850b57cec5SDimitry Andric 23860fca6ea1SDimitry Andric LLVM_DEBUG(dbgs() << "LAA: Found an analyzable loop: " 23870fca6ea1SDimitry Andric << TheLoop->getHeader()->getName() << "\n"); 23880b57cec5SDimitry Andric return true; 23890b57cec5SDimitry Andric } 23900b57cec5SDimitry Andric 23910fca6ea1SDimitry Andric bool LoopAccessInfo::analyzeLoop(AAResults *AA, LoopInfo *LI, 23920b57cec5SDimitry Andric const TargetLibraryInfo *TLI, 23930b57cec5SDimitry Andric DominatorTree *DT) { 23940b57cec5SDimitry Andric // Holds the Load and Store instructions. 23950b57cec5SDimitry Andric SmallVector<LoadInst *, 16> Loads; 23960b57cec5SDimitry Andric SmallVector<StoreInst *, 16> Stores; 2397b3edf446SDimitry Andric SmallPtrSet<MDNode *, 8> LoopAliasScopes; 23980b57cec5SDimitry Andric 23990b57cec5SDimitry Andric // Holds all the different accesses in the loop. 24000b57cec5SDimitry Andric unsigned NumReads = 0; 24010b57cec5SDimitry Andric unsigned NumReadWrites = 0; 24020b57cec5SDimitry Andric 24030b57cec5SDimitry Andric bool HasComplexMemInst = false; 24040b57cec5SDimitry Andric 24050b57cec5SDimitry Andric // A runtime check is only legal to insert if there are no convergent calls. 24060b57cec5SDimitry Andric HasConvergentOp = false; 24070b57cec5SDimitry Andric 24080b57cec5SDimitry Andric PtrRtChecking->Pointers.clear(); 24090b57cec5SDimitry Andric PtrRtChecking->Need = false; 24100b57cec5SDimitry Andric 24110b57cec5SDimitry Andric const bool IsAnnotatedParallel = TheLoop->isAnnotatedParallel(); 24120b57cec5SDimitry Andric 24135ffd83dbSDimitry Andric const bool EnableMemAccessVersioningOfLoop = 24145ffd83dbSDimitry Andric EnableMemAccessVersioning && 24155ffd83dbSDimitry Andric !TheLoop->getHeader()->getParent()->hasOptSize(); 24165ffd83dbSDimitry Andric 2417bdd1243dSDimitry Andric // Traverse blocks in fixed RPOT order, regardless of their storage in the 2418bdd1243dSDimitry Andric // loop info, as it may be arbitrary. 2419bdd1243dSDimitry Andric LoopBlocksRPO RPOT(TheLoop); 2420bdd1243dSDimitry Andric RPOT.perform(LI); 2421bdd1243dSDimitry Andric for (BasicBlock *BB : RPOT) { 24220b57cec5SDimitry Andric // Scan the BB and collect legal loads and stores. Also detect any 24230b57cec5SDimitry Andric // convergent instructions. 24240b57cec5SDimitry Andric for (Instruction &I : *BB) { 24250b57cec5SDimitry Andric if (auto *Call = dyn_cast<CallBase>(&I)) { 24260b57cec5SDimitry Andric if (Call->isConvergent()) 24270b57cec5SDimitry Andric HasConvergentOp = true; 24280b57cec5SDimitry Andric } 24290b57cec5SDimitry Andric 24300b57cec5SDimitry Andric // With both a non-vectorizable memory instruction and a convergent 24310b57cec5SDimitry Andric // operation, found in this loop, no reason to continue the search. 24320fca6ea1SDimitry Andric if (HasComplexMemInst && HasConvergentOp) 24330fca6ea1SDimitry Andric return false; 24340b57cec5SDimitry Andric 24350b57cec5SDimitry Andric // Avoid hitting recordAnalysis multiple times. 24360b57cec5SDimitry Andric if (HasComplexMemInst) 24370b57cec5SDimitry Andric continue; 24380b57cec5SDimitry Andric 2439b3edf446SDimitry Andric // Record alias scopes defined inside the loop. 2440b3edf446SDimitry Andric if (auto *Decl = dyn_cast<NoAliasScopeDeclInst>(&I)) 2441b3edf446SDimitry Andric for (Metadata *Op : Decl->getScopeList()->operands()) 2442b3edf446SDimitry Andric LoopAliasScopes.insert(cast<MDNode>(Op)); 2443b3edf446SDimitry Andric 24440b57cec5SDimitry Andric // Many math library functions read the rounding mode. We will only 24450b57cec5SDimitry Andric // vectorize a loop if it contains known function calls that don't set 24460b57cec5SDimitry Andric // the flag. Therefore, it is safe to ignore this read from memory. 24470b57cec5SDimitry Andric auto *Call = dyn_cast<CallInst>(&I); 24480b57cec5SDimitry Andric if (Call && getVectorIntrinsicIDForCall(Call, TLI)) 24490b57cec5SDimitry Andric continue; 24500b57cec5SDimitry Andric 24515f757f3fSDimitry Andric // If this is a load, save it. If this instruction can read from memory 24525f757f3fSDimitry Andric // but is not a load, then we quit. Notice that we don't handle function 24535f757f3fSDimitry Andric // calls that read or write. 24545f757f3fSDimitry Andric if (I.mayReadFromMemory()) { 24550b57cec5SDimitry Andric // If the function has an explicit vectorized counterpart, we can safely 24560b57cec5SDimitry Andric // assume that it can be vectorized. 24570b57cec5SDimitry Andric if (Call && !Call->isNoBuiltin() && Call->getCalledFunction() && 24585ffd83dbSDimitry Andric !VFDatabase::getMappings(*Call).empty()) 24590b57cec5SDimitry Andric continue; 24600b57cec5SDimitry Andric 24610b57cec5SDimitry Andric auto *Ld = dyn_cast<LoadInst>(&I); 24620b57cec5SDimitry Andric if (!Ld) { 24630b57cec5SDimitry Andric recordAnalysis("CantVectorizeInstruction", Ld) 24640b57cec5SDimitry Andric << "instruction cannot be vectorized"; 24650b57cec5SDimitry Andric HasComplexMemInst = true; 24660b57cec5SDimitry Andric continue; 24670b57cec5SDimitry Andric } 24680b57cec5SDimitry Andric if (!Ld->isSimple() && !IsAnnotatedParallel) { 24690b57cec5SDimitry Andric recordAnalysis("NonSimpleLoad", Ld) 24700b57cec5SDimitry Andric << "read with atomic ordering or volatile read"; 24710b57cec5SDimitry Andric LLVM_DEBUG(dbgs() << "LAA: Found a non-simple load.\n"); 24720b57cec5SDimitry Andric HasComplexMemInst = true; 24730b57cec5SDimitry Andric continue; 24740b57cec5SDimitry Andric } 24750b57cec5SDimitry Andric NumLoads++; 24760b57cec5SDimitry Andric Loads.push_back(Ld); 24770b57cec5SDimitry Andric DepChecker->addAccess(Ld); 24785ffd83dbSDimitry Andric if (EnableMemAccessVersioningOfLoop) 24790b57cec5SDimitry Andric collectStridedAccess(Ld); 24800b57cec5SDimitry Andric continue; 24810b57cec5SDimitry Andric } 24820b57cec5SDimitry Andric 24830b57cec5SDimitry Andric // Save 'store' instructions. Abort if other instructions write to memory. 24840b57cec5SDimitry Andric if (I.mayWriteToMemory()) { 24850b57cec5SDimitry Andric auto *St = dyn_cast<StoreInst>(&I); 24860b57cec5SDimitry Andric if (!St) { 24870b57cec5SDimitry Andric recordAnalysis("CantVectorizeInstruction", St) 24880b57cec5SDimitry Andric << "instruction cannot be vectorized"; 24890b57cec5SDimitry Andric HasComplexMemInst = true; 24900b57cec5SDimitry Andric continue; 24910b57cec5SDimitry Andric } 24920b57cec5SDimitry Andric if (!St->isSimple() && !IsAnnotatedParallel) { 24930b57cec5SDimitry Andric recordAnalysis("NonSimpleStore", St) 24940b57cec5SDimitry Andric << "write with atomic ordering or volatile write"; 24950b57cec5SDimitry Andric LLVM_DEBUG(dbgs() << "LAA: Found a non-simple store.\n"); 24960b57cec5SDimitry Andric HasComplexMemInst = true; 24970b57cec5SDimitry Andric continue; 24980b57cec5SDimitry Andric } 24990b57cec5SDimitry Andric NumStores++; 25000b57cec5SDimitry Andric Stores.push_back(St); 25010b57cec5SDimitry Andric DepChecker->addAccess(St); 25025ffd83dbSDimitry Andric if (EnableMemAccessVersioningOfLoop) 25030b57cec5SDimitry Andric collectStridedAccess(St); 25040b57cec5SDimitry Andric } 25050b57cec5SDimitry Andric } // Next instr. 25060b57cec5SDimitry Andric } // Next block. 25070b57cec5SDimitry Andric 25080fca6ea1SDimitry Andric if (HasComplexMemInst) 25090fca6ea1SDimitry Andric return false; 25100b57cec5SDimitry Andric 25110b57cec5SDimitry Andric // Now we have two lists that hold the loads and the stores. 25120b57cec5SDimitry Andric // Next, we find the pointers that they use. 25130b57cec5SDimitry Andric 25140b57cec5SDimitry Andric // Check if we see any stores. If there are no stores, then we don't 25150b57cec5SDimitry Andric // care if the pointers are *restrict*. 25160b57cec5SDimitry Andric if (!Stores.size()) { 25170b57cec5SDimitry Andric LLVM_DEBUG(dbgs() << "LAA: Found a read-only loop!\n"); 25180fca6ea1SDimitry Andric return true; 25190b57cec5SDimitry Andric } 25200b57cec5SDimitry Andric 25210b57cec5SDimitry Andric MemoryDepChecker::DepCandidates DependentAccesses; 2522b3edf446SDimitry Andric AccessAnalysis Accesses(TheLoop, AA, LI, DependentAccesses, *PSE, 2523b3edf446SDimitry Andric LoopAliasScopes); 25240b57cec5SDimitry Andric 2525e8d8bef9SDimitry Andric // Holds the analyzed pointers. We don't want to call getUnderlyingObjects 25260b57cec5SDimitry Andric // multiple times on the same object. If the ptr is accessed twice, once 25270b57cec5SDimitry Andric // for read and once for write, it will only appear once (on the write 25280b57cec5SDimitry Andric // list). This is okay, since we are going to check for conflicts between 25290b57cec5SDimitry Andric // writes and between reads and writes, but not between reads and reads. 253081ad6265SDimitry Andric SmallSet<std::pair<Value *, Type *>, 16> Seen; 25310b57cec5SDimitry Andric 25320b57cec5SDimitry Andric // Record uniform store addresses to identify if we have multiple stores 25330b57cec5SDimitry Andric // to the same address. 253481ad6265SDimitry Andric SmallPtrSet<Value *, 16> UniformStores; 25350b57cec5SDimitry Andric 25360b57cec5SDimitry Andric for (StoreInst *ST : Stores) { 25370b57cec5SDimitry Andric Value *Ptr = ST->getPointerOperand(); 25380b57cec5SDimitry Andric 253906c3fb27SDimitry Andric if (isInvariant(Ptr)) { 254081ad6265SDimitry Andric // Record store instructions to loop invariant addresses 254181ad6265SDimitry Andric StoresToInvariantAddresses.push_back(ST); 25420fca6ea1SDimitry Andric HasStoreStoreDependenceInvolvingLoopInvariantAddress |= 25430b57cec5SDimitry Andric !UniformStores.insert(Ptr).second; 254481ad6265SDimitry Andric } 25450b57cec5SDimitry Andric 25460b57cec5SDimitry Andric // If we did *not* see this pointer before, insert it to the read-write 25470b57cec5SDimitry Andric // list. At this phase it is only a 'write' list. 254881ad6265SDimitry Andric Type *AccessTy = getLoadStoreType(ST); 254981ad6265SDimitry Andric if (Seen.insert({Ptr, AccessTy}).second) { 25500b57cec5SDimitry Andric ++NumReadWrites; 25510b57cec5SDimitry Andric 25520b57cec5SDimitry Andric MemoryLocation Loc = MemoryLocation::get(ST); 25530b57cec5SDimitry Andric // The TBAA metadata could have a control dependency on the predication 25540b57cec5SDimitry Andric // condition, so we cannot rely on it when determining whether or not we 25550b57cec5SDimitry Andric // need runtime pointer checks. 25560b57cec5SDimitry Andric if (blockNeedsPredication(ST->getParent(), TheLoop, DT)) 25570b57cec5SDimitry Andric Loc.AATags.TBAA = nullptr; 25580b57cec5SDimitry Andric 2559349cc55cSDimitry Andric visitPointers(const_cast<Value *>(Loc.Ptr), *TheLoop, 256081ad6265SDimitry Andric [&Accesses, AccessTy, Loc](Value *Ptr) { 2561349cc55cSDimitry Andric MemoryLocation NewLoc = Loc.getWithNewPtr(Ptr); 256281ad6265SDimitry Andric Accesses.addStore(NewLoc, AccessTy); 2563349cc55cSDimitry Andric }); 25640b57cec5SDimitry Andric } 25650b57cec5SDimitry Andric } 25660b57cec5SDimitry Andric 25670b57cec5SDimitry Andric if (IsAnnotatedParallel) { 25680b57cec5SDimitry Andric LLVM_DEBUG( 25690b57cec5SDimitry Andric dbgs() << "LAA: A loop annotated parallel, ignore memory dependency " 25700b57cec5SDimitry Andric << "checks.\n"); 25710fca6ea1SDimitry Andric return true; 25720b57cec5SDimitry Andric } 25730b57cec5SDimitry Andric 25740b57cec5SDimitry Andric for (LoadInst *LD : Loads) { 25750b57cec5SDimitry Andric Value *Ptr = LD->getPointerOperand(); 25760b57cec5SDimitry Andric // If we did *not* see this pointer before, insert it to the 25770b57cec5SDimitry Andric // read list. If we *did* see it before, then it is already in 25780b57cec5SDimitry Andric // the read-write list. This allows us to vectorize expressions 25790b57cec5SDimitry Andric // such as A[i] += x; Because the address of A[i] is a read-write 25800b57cec5SDimitry Andric // pointer. This only works if the index of A[i] is consecutive. 25810b57cec5SDimitry Andric // If the address of i is unknown (for example A[B[i]]) then we may 25820b57cec5SDimitry Andric // read a few words, modify, and write a few words, and some of the 25830b57cec5SDimitry Andric // words may be written to the same address. 25840b57cec5SDimitry Andric bool IsReadOnlyPtr = false; 258581ad6265SDimitry Andric Type *AccessTy = getLoadStoreType(LD); 258681ad6265SDimitry Andric if (Seen.insert({Ptr, AccessTy}).second || 2587bdd1243dSDimitry Andric !getPtrStride(*PSE, LD->getType(), Ptr, TheLoop, SymbolicStrides).value_or(0)) { 25880b57cec5SDimitry Andric ++NumReads; 25890b57cec5SDimitry Andric IsReadOnlyPtr = true; 25900b57cec5SDimitry Andric } 25910b57cec5SDimitry Andric 25920b57cec5SDimitry Andric // See if there is an unsafe dependency between a load to a uniform address and 25930b57cec5SDimitry Andric // store to the same uniform address. 25940b57cec5SDimitry Andric if (UniformStores.count(Ptr)) { 25950b57cec5SDimitry Andric LLVM_DEBUG(dbgs() << "LAA: Found an unsafe dependency between a uniform " 25960b57cec5SDimitry Andric "load and uniform store to the same address!\n"); 25970fca6ea1SDimitry Andric HasLoadStoreDependenceInvolvingLoopInvariantAddress = true; 25980b57cec5SDimitry Andric } 25990b57cec5SDimitry Andric 26000b57cec5SDimitry Andric MemoryLocation Loc = MemoryLocation::get(LD); 26010b57cec5SDimitry Andric // The TBAA metadata could have a control dependency on the predication 26020b57cec5SDimitry Andric // condition, so we cannot rely on it when determining whether or not we 26030b57cec5SDimitry Andric // need runtime pointer checks. 26040b57cec5SDimitry Andric if (blockNeedsPredication(LD->getParent(), TheLoop, DT)) 26050b57cec5SDimitry Andric Loc.AATags.TBAA = nullptr; 26060b57cec5SDimitry Andric 2607349cc55cSDimitry Andric visitPointers(const_cast<Value *>(Loc.Ptr), *TheLoop, 260881ad6265SDimitry Andric [&Accesses, AccessTy, Loc, IsReadOnlyPtr](Value *Ptr) { 2609349cc55cSDimitry Andric MemoryLocation NewLoc = Loc.getWithNewPtr(Ptr); 261081ad6265SDimitry Andric Accesses.addLoad(NewLoc, AccessTy, IsReadOnlyPtr); 2611349cc55cSDimitry Andric }); 26120b57cec5SDimitry Andric } 26130b57cec5SDimitry Andric 26140b57cec5SDimitry Andric // If we write (or read-write) to a single destination and there are no 26150b57cec5SDimitry Andric // other reads in this loop then is it safe to vectorize. 26160b57cec5SDimitry Andric if (NumReadWrites == 1 && NumReads == 0) { 26170b57cec5SDimitry Andric LLVM_DEBUG(dbgs() << "LAA: Found a write-only loop!\n"); 26180fca6ea1SDimitry Andric return true; 26190b57cec5SDimitry Andric } 26200b57cec5SDimitry Andric 26210b57cec5SDimitry Andric // Build dependence sets and check whether we need a runtime pointer bounds 26220b57cec5SDimitry Andric // check. 26230b57cec5SDimitry Andric Accesses.buildDependenceSets(); 26240b57cec5SDimitry Andric 26250b57cec5SDimitry Andric // Find pointers with computable bounds. We are going to use this information 26260b57cec5SDimitry Andric // to place a runtime bound check. 262781ad6265SDimitry Andric Value *UncomputablePtr = nullptr; 262881ad6265SDimitry Andric bool CanDoRTIfNeeded = 262981ad6265SDimitry Andric Accesses.canCheckPtrAtRT(*PtrRtChecking, PSE->getSE(), TheLoop, 263081ad6265SDimitry Andric SymbolicStrides, UncomputablePtr, false); 26310b57cec5SDimitry Andric if (!CanDoRTIfNeeded) { 263281ad6265SDimitry Andric auto *I = dyn_cast_or_null<Instruction>(UncomputablePtr); 263381ad6265SDimitry Andric recordAnalysis("CantIdentifyArrayBounds", I) 263481ad6265SDimitry Andric << "cannot identify array bounds"; 26350b57cec5SDimitry Andric LLVM_DEBUG(dbgs() << "LAA: We can't vectorize because we can't find " 26360b57cec5SDimitry Andric << "the array bounds.\n"); 26370fca6ea1SDimitry Andric return false; 26380b57cec5SDimitry Andric } 26390b57cec5SDimitry Andric 26400b57cec5SDimitry Andric LLVM_DEBUG( 26410b57cec5SDimitry Andric dbgs() << "LAA: May be able to perform a memory runtime check if needed.\n"); 26420b57cec5SDimitry Andric 26430fca6ea1SDimitry Andric bool DepsAreSafe = true; 26440b57cec5SDimitry Andric if (Accesses.isDependencyCheckNeeded()) { 26450b57cec5SDimitry Andric LLVM_DEBUG(dbgs() << "LAA: Checking memory dependencies\n"); 26460fca6ea1SDimitry Andric DepsAreSafe = DepChecker->areDepsSafe(DependentAccesses, 2647*62987288SDimitry Andric Accesses.getDependenciesToCheck()); 26480b57cec5SDimitry Andric 26490fca6ea1SDimitry Andric if (!DepsAreSafe && DepChecker->shouldRetryWithRuntimeCheck()) { 26500b57cec5SDimitry Andric LLVM_DEBUG(dbgs() << "LAA: Retrying with memory checks\n"); 26510b57cec5SDimitry Andric 26520b57cec5SDimitry Andric // Clear the dependency checks. We assume they are not needed. 26530b57cec5SDimitry Andric Accesses.resetDepChecks(*DepChecker); 26540b57cec5SDimitry Andric 26550b57cec5SDimitry Andric PtrRtChecking->reset(); 26560b57cec5SDimitry Andric PtrRtChecking->Need = true; 26570b57cec5SDimitry Andric 26580b57cec5SDimitry Andric auto *SE = PSE->getSE(); 265981ad6265SDimitry Andric UncomputablePtr = nullptr; 266081ad6265SDimitry Andric CanDoRTIfNeeded = Accesses.canCheckPtrAtRT( 266181ad6265SDimitry Andric *PtrRtChecking, SE, TheLoop, SymbolicStrides, UncomputablePtr, true); 26620b57cec5SDimitry Andric 26630b57cec5SDimitry Andric // Check that we found the bounds for the pointer. 26640b57cec5SDimitry Andric if (!CanDoRTIfNeeded) { 266581ad6265SDimitry Andric auto *I = dyn_cast_or_null<Instruction>(UncomputablePtr); 266681ad6265SDimitry Andric recordAnalysis("CantCheckMemDepsAtRunTime", I) 26670b57cec5SDimitry Andric << "cannot check memory dependencies at runtime"; 26680b57cec5SDimitry Andric LLVM_DEBUG(dbgs() << "LAA: Can't vectorize with memory checks\n"); 26690fca6ea1SDimitry Andric return false; 26700b57cec5SDimitry Andric } 26710fca6ea1SDimitry Andric DepsAreSafe = true; 26720b57cec5SDimitry Andric } 26730b57cec5SDimitry Andric } 26740b57cec5SDimitry Andric 26750b57cec5SDimitry Andric if (HasConvergentOp) { 26760b57cec5SDimitry Andric recordAnalysis("CantInsertRuntimeCheckWithConvergent") 26770b57cec5SDimitry Andric << "cannot add control dependency to convergent operation"; 26780b57cec5SDimitry Andric LLVM_DEBUG(dbgs() << "LAA: We can't vectorize because a runtime check " 26790b57cec5SDimitry Andric "would be needed with a convergent operation\n"); 26800fca6ea1SDimitry Andric return false; 26810b57cec5SDimitry Andric } 26820b57cec5SDimitry Andric 26830fca6ea1SDimitry Andric if (DepsAreSafe) { 26840b57cec5SDimitry Andric LLVM_DEBUG( 26850b57cec5SDimitry Andric dbgs() << "LAA: No unsafe dependent memory operations in loop. We" 26860b57cec5SDimitry Andric << (PtrRtChecking->Need ? "" : " don't") 26870b57cec5SDimitry Andric << " need runtime memory checks.\n"); 26880fca6ea1SDimitry Andric return true; 26890fca6ea1SDimitry Andric } 26900fca6ea1SDimitry Andric 269181ad6265SDimitry Andric emitUnsafeDependenceRemark(); 26920fca6ea1SDimitry Andric return false; 269381ad6265SDimitry Andric } 269481ad6265SDimitry Andric 269581ad6265SDimitry Andric void LoopAccessInfo::emitUnsafeDependenceRemark() { 26960fca6ea1SDimitry Andric const auto *Deps = getDepChecker().getDependences(); 269781ad6265SDimitry Andric if (!Deps) 269881ad6265SDimitry Andric return; 26990fca6ea1SDimitry Andric const auto *Found = 27000fca6ea1SDimitry Andric llvm::find_if(*Deps, [](const MemoryDepChecker::Dependence &D) { 270181ad6265SDimitry Andric return MemoryDepChecker::Dependence::isSafeForVectorization(D.Type) != 270281ad6265SDimitry Andric MemoryDepChecker::VectorizationSafetyStatus::Safe; 270381ad6265SDimitry Andric }); 270481ad6265SDimitry Andric if (Found == Deps->end()) 270581ad6265SDimitry Andric return; 270681ad6265SDimitry Andric MemoryDepChecker::Dependence Dep = *Found; 270781ad6265SDimitry Andric 270881ad6265SDimitry Andric LLVM_DEBUG(dbgs() << "LAA: unsafe dependent memory operations in loop\n"); 270981ad6265SDimitry Andric 271081ad6265SDimitry Andric // Emit remark for first unsafe dependence 27115f757f3fSDimitry Andric bool HasForcedDistribution = false; 27125f757f3fSDimitry Andric std::optional<const MDOperand *> Value = 27135f757f3fSDimitry Andric findStringMetadataForLoop(TheLoop, "llvm.loop.distribute.enable"); 27145f757f3fSDimitry Andric if (Value) { 27155f757f3fSDimitry Andric const MDOperand *Op = *Value; 27165f757f3fSDimitry Andric assert(Op && mdconst::hasa<ConstantInt>(*Op) && "invalid metadata"); 27175f757f3fSDimitry Andric HasForcedDistribution = mdconst::extract<ConstantInt>(*Op)->getZExtValue(); 27185f757f3fSDimitry Andric } 27195f757f3fSDimitry Andric 27205f757f3fSDimitry Andric const std::string Info = 27215f757f3fSDimitry Andric HasForcedDistribution 27225f757f3fSDimitry Andric ? "unsafe dependent memory operations in loop." 27235f757f3fSDimitry Andric : "unsafe dependent memory operations in loop. Use " 27245f757f3fSDimitry Andric "#pragma clang loop distribute(enable) to allow loop distribution " 27250b57cec5SDimitry Andric "to attempt to isolate the offending operations into a separate " 27260b57cec5SDimitry Andric "loop"; 27275f757f3fSDimitry Andric OptimizationRemarkAnalysis &R = 27280fca6ea1SDimitry Andric recordAnalysis("UnsafeDep", Dep.getDestination(getDepChecker())) << Info; 272981ad6265SDimitry Andric 273081ad6265SDimitry Andric switch (Dep.Type) { 273181ad6265SDimitry Andric case MemoryDepChecker::Dependence::NoDep: 273281ad6265SDimitry Andric case MemoryDepChecker::Dependence::Forward: 273381ad6265SDimitry Andric case MemoryDepChecker::Dependence::BackwardVectorizable: 273481ad6265SDimitry Andric llvm_unreachable("Unexpected dependence"); 273581ad6265SDimitry Andric case MemoryDepChecker::Dependence::Backward: 273681ad6265SDimitry Andric R << "\nBackward loop carried data dependence."; 273781ad6265SDimitry Andric break; 273881ad6265SDimitry Andric case MemoryDepChecker::Dependence::ForwardButPreventsForwarding: 273981ad6265SDimitry Andric R << "\nForward loop carried data dependence that prevents " 274081ad6265SDimitry Andric "store-to-load forwarding."; 274181ad6265SDimitry Andric break; 274281ad6265SDimitry Andric case MemoryDepChecker::Dependence::BackwardVectorizableButPreventsForwarding: 274381ad6265SDimitry Andric R << "\nBackward loop carried data dependence that prevents " 274481ad6265SDimitry Andric "store-to-load forwarding."; 274581ad6265SDimitry Andric break; 27465f757f3fSDimitry Andric case MemoryDepChecker::Dependence::IndirectUnsafe: 27475f757f3fSDimitry Andric R << "\nUnsafe indirect dependence."; 27485f757f3fSDimitry Andric break; 274981ad6265SDimitry Andric case MemoryDepChecker::Dependence::Unknown: 275081ad6265SDimitry Andric R << "\nUnknown data dependence."; 275181ad6265SDimitry Andric break; 275281ad6265SDimitry Andric } 275381ad6265SDimitry Andric 27540fca6ea1SDimitry Andric if (Instruction *I = Dep.getSource(getDepChecker())) { 275581ad6265SDimitry Andric DebugLoc SourceLoc = I->getDebugLoc(); 275681ad6265SDimitry Andric if (auto *DD = dyn_cast_or_null<Instruction>(getPointerOperand(I))) 275781ad6265SDimitry Andric SourceLoc = DD->getDebugLoc(); 275881ad6265SDimitry Andric if (SourceLoc) 275981ad6265SDimitry Andric R << " Memory location is the same as accessed at " 276081ad6265SDimitry Andric << ore::NV("Location", SourceLoc); 27610b57cec5SDimitry Andric } 27620b57cec5SDimitry Andric } 27630b57cec5SDimitry Andric 27640b57cec5SDimitry Andric bool LoopAccessInfo::blockNeedsPredication(BasicBlock *BB, Loop *TheLoop, 27650b57cec5SDimitry Andric DominatorTree *DT) { 27660b57cec5SDimitry Andric assert(TheLoop->contains(BB) && "Unknown block used"); 27670b57cec5SDimitry Andric 27680b57cec5SDimitry Andric // Blocks that do not dominate the latch need predication. 27690b57cec5SDimitry Andric BasicBlock* Latch = TheLoop->getLoopLatch(); 27700b57cec5SDimitry Andric return !DT->dominates(BB, Latch); 27710b57cec5SDimitry Andric } 27720b57cec5SDimitry Andric 27730b57cec5SDimitry Andric OptimizationRemarkAnalysis &LoopAccessInfo::recordAnalysis(StringRef RemarkName, 27740b57cec5SDimitry Andric Instruction *I) { 27750b57cec5SDimitry Andric assert(!Report && "Multiple reports generated"); 27760b57cec5SDimitry Andric 27770b57cec5SDimitry Andric Value *CodeRegion = TheLoop->getHeader(); 27780b57cec5SDimitry Andric DebugLoc DL = TheLoop->getStartLoc(); 27790b57cec5SDimitry Andric 27800b57cec5SDimitry Andric if (I) { 27810b57cec5SDimitry Andric CodeRegion = I->getParent(); 27820b57cec5SDimitry Andric // If there is no debug location attached to the instruction, revert back to 27830b57cec5SDimitry Andric // using the loop's. 27840b57cec5SDimitry Andric if (I->getDebugLoc()) 27850b57cec5SDimitry Andric DL = I->getDebugLoc(); 27860b57cec5SDimitry Andric } 27870b57cec5SDimitry Andric 27888bcb0991SDimitry Andric Report = std::make_unique<OptimizationRemarkAnalysis>(DEBUG_TYPE, RemarkName, DL, 27890b57cec5SDimitry Andric CodeRegion); 27900b57cec5SDimitry Andric return *Report; 27910b57cec5SDimitry Andric } 27920b57cec5SDimitry Andric 279306c3fb27SDimitry Andric bool LoopAccessInfo::isInvariant(Value *V) const { 27940b57cec5SDimitry Andric auto *SE = PSE->getSE(); 27950b57cec5SDimitry Andric // TODO: Is this really what we want? Even without FP SCEV, we may want some 279606c3fb27SDimitry Andric // trivially loop-invariant FP values to be considered invariant. 27970b57cec5SDimitry Andric if (!SE->isSCEVable(V->getType())) 27980b57cec5SDimitry Andric return false; 279906c3fb27SDimitry Andric const SCEV *S = SE->getSCEV(V); 280006c3fb27SDimitry Andric return SE->isLoopInvariant(S, TheLoop); 280106c3fb27SDimitry Andric } 280206c3fb27SDimitry Andric 280306c3fb27SDimitry Andric /// Find the operand of the GEP that should be checked for consecutive 280406c3fb27SDimitry Andric /// stores. This ignores trailing indices that have no effect on the final 280506c3fb27SDimitry Andric /// pointer. 280606c3fb27SDimitry Andric static unsigned getGEPInductionOperand(const GetElementPtrInst *Gep) { 28070fca6ea1SDimitry Andric const DataLayout &DL = Gep->getDataLayout(); 280806c3fb27SDimitry Andric unsigned LastOperand = Gep->getNumOperands() - 1; 280906c3fb27SDimitry Andric TypeSize GEPAllocSize = DL.getTypeAllocSize(Gep->getResultElementType()); 281006c3fb27SDimitry Andric 281106c3fb27SDimitry Andric // Walk backwards and try to peel off zeros. 281206c3fb27SDimitry Andric while (LastOperand > 1 && match(Gep->getOperand(LastOperand), m_Zero())) { 281306c3fb27SDimitry Andric // Find the type we're currently indexing into. 281406c3fb27SDimitry Andric gep_type_iterator GEPTI = gep_type_begin(Gep); 281506c3fb27SDimitry Andric std::advance(GEPTI, LastOperand - 2); 281606c3fb27SDimitry Andric 281706c3fb27SDimitry Andric // If it's a type with the same allocation size as the result of the GEP we 281806c3fb27SDimitry Andric // can peel off the zero index. 28191db9f3b2SDimitry Andric TypeSize ElemSize = GEPTI.isStruct() 28201db9f3b2SDimitry Andric ? DL.getTypeAllocSize(GEPTI.getIndexedType()) 28211db9f3b2SDimitry Andric : GEPTI.getSequentialElementStride(DL); 28221db9f3b2SDimitry Andric if (ElemSize != GEPAllocSize) 282306c3fb27SDimitry Andric break; 282406c3fb27SDimitry Andric --LastOperand; 282506c3fb27SDimitry Andric } 282606c3fb27SDimitry Andric 282706c3fb27SDimitry Andric return LastOperand; 282806c3fb27SDimitry Andric } 282906c3fb27SDimitry Andric 283006c3fb27SDimitry Andric /// If the argument is a GEP, then returns the operand identified by 283106c3fb27SDimitry Andric /// getGEPInductionOperand. However, if there is some other non-loop-invariant 283206c3fb27SDimitry Andric /// operand, it returns that instead. 283306c3fb27SDimitry Andric static Value *stripGetElementPtr(Value *Ptr, ScalarEvolution *SE, Loop *Lp) { 283406c3fb27SDimitry Andric GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(Ptr); 283506c3fb27SDimitry Andric if (!GEP) 283606c3fb27SDimitry Andric return Ptr; 283706c3fb27SDimitry Andric 283806c3fb27SDimitry Andric unsigned InductionOperand = getGEPInductionOperand(GEP); 283906c3fb27SDimitry Andric 284006c3fb27SDimitry Andric // Check that all of the gep indices are uniform except for our induction 284106c3fb27SDimitry Andric // operand. 28420fca6ea1SDimitry Andric for (unsigned I = 0, E = GEP->getNumOperands(); I != E; ++I) 28430fca6ea1SDimitry Andric if (I != InductionOperand && 28440fca6ea1SDimitry Andric !SE->isLoopInvariant(SE->getSCEV(GEP->getOperand(I)), Lp)) 284506c3fb27SDimitry Andric return Ptr; 284606c3fb27SDimitry Andric return GEP->getOperand(InductionOperand); 284706c3fb27SDimitry Andric } 284806c3fb27SDimitry Andric 284906c3fb27SDimitry Andric /// Get the stride of a pointer access in a loop. Looks for symbolic 285006c3fb27SDimitry Andric /// strides "a[i*stride]". Returns the symbolic stride, or null otherwise. 285106c3fb27SDimitry Andric static const SCEV *getStrideFromPointer(Value *Ptr, ScalarEvolution *SE, Loop *Lp) { 285206c3fb27SDimitry Andric auto *PtrTy = dyn_cast<PointerType>(Ptr->getType()); 285306c3fb27SDimitry Andric if (!PtrTy || PtrTy->isAggregateType()) 285406c3fb27SDimitry Andric return nullptr; 285506c3fb27SDimitry Andric 285606c3fb27SDimitry Andric // Try to remove a gep instruction to make the pointer (actually index at this 285706c3fb27SDimitry Andric // point) easier analyzable. If OrigPtr is equal to Ptr we are analyzing the 285806c3fb27SDimitry Andric // pointer, otherwise, we are analyzing the index. 285906c3fb27SDimitry Andric Value *OrigPtr = Ptr; 286006c3fb27SDimitry Andric 286106c3fb27SDimitry Andric // The size of the pointer access. 286206c3fb27SDimitry Andric int64_t PtrAccessSize = 1; 286306c3fb27SDimitry Andric 286406c3fb27SDimitry Andric Ptr = stripGetElementPtr(Ptr, SE, Lp); 286506c3fb27SDimitry Andric const SCEV *V = SE->getSCEV(Ptr); 286606c3fb27SDimitry Andric 286706c3fb27SDimitry Andric if (Ptr != OrigPtr) 286806c3fb27SDimitry Andric // Strip off casts. 286906c3fb27SDimitry Andric while (const SCEVIntegralCastExpr *C = dyn_cast<SCEVIntegralCastExpr>(V)) 287006c3fb27SDimitry Andric V = C->getOperand(); 287106c3fb27SDimitry Andric 287206c3fb27SDimitry Andric const SCEVAddRecExpr *S = dyn_cast<SCEVAddRecExpr>(V); 287306c3fb27SDimitry Andric if (!S) 287406c3fb27SDimitry Andric return nullptr; 287506c3fb27SDimitry Andric 287606c3fb27SDimitry Andric // If the pointer is invariant then there is no stride and it makes no 287706c3fb27SDimitry Andric // sense to add it here. 287806c3fb27SDimitry Andric if (Lp != S->getLoop()) 287906c3fb27SDimitry Andric return nullptr; 288006c3fb27SDimitry Andric 288106c3fb27SDimitry Andric V = S->getStepRecurrence(*SE); 288206c3fb27SDimitry Andric if (!V) 288306c3fb27SDimitry Andric return nullptr; 288406c3fb27SDimitry Andric 288506c3fb27SDimitry Andric // Strip off the size of access multiplication if we are still analyzing the 288606c3fb27SDimitry Andric // pointer. 288706c3fb27SDimitry Andric if (OrigPtr == Ptr) { 288806c3fb27SDimitry Andric if (const SCEVMulExpr *M = dyn_cast<SCEVMulExpr>(V)) { 288906c3fb27SDimitry Andric if (M->getOperand(0)->getSCEVType() != scConstant) 289006c3fb27SDimitry Andric return nullptr; 289106c3fb27SDimitry Andric 289206c3fb27SDimitry Andric const APInt &APStepVal = cast<SCEVConstant>(M->getOperand(0))->getAPInt(); 289306c3fb27SDimitry Andric 289406c3fb27SDimitry Andric // Huge step value - give up. 289506c3fb27SDimitry Andric if (APStepVal.getBitWidth() > 64) 289606c3fb27SDimitry Andric return nullptr; 289706c3fb27SDimitry Andric 289806c3fb27SDimitry Andric int64_t StepVal = APStepVal.getSExtValue(); 289906c3fb27SDimitry Andric if (PtrAccessSize != StepVal) 290006c3fb27SDimitry Andric return nullptr; 290106c3fb27SDimitry Andric V = M->getOperand(1); 290206c3fb27SDimitry Andric } 290306c3fb27SDimitry Andric } 290406c3fb27SDimitry Andric 290506c3fb27SDimitry Andric // Note that the restriction after this loop invariant check are only 290606c3fb27SDimitry Andric // profitability restrictions. 290706c3fb27SDimitry Andric if (!SE->isLoopInvariant(V, Lp)) 290806c3fb27SDimitry Andric return nullptr; 290906c3fb27SDimitry Andric 291006c3fb27SDimitry Andric // Look for the loop invariant symbolic value. 29110fca6ea1SDimitry Andric if (isa<SCEVUnknown>(V)) 291206c3fb27SDimitry Andric return V; 29130fca6ea1SDimitry Andric 29140fca6ea1SDimitry Andric if (const auto *C = dyn_cast<SCEVIntegralCastExpr>(V)) 29150fca6ea1SDimitry Andric if (isa<SCEVUnknown>(C->getOperand())) 29160fca6ea1SDimitry Andric return V; 29170fca6ea1SDimitry Andric 29180fca6ea1SDimitry Andric return nullptr; 29190b57cec5SDimitry Andric } 29200b57cec5SDimitry Andric 29210b57cec5SDimitry Andric void LoopAccessInfo::collectStridedAccess(Value *MemAccess) { 2922e8d8bef9SDimitry Andric Value *Ptr = getLoadStorePointerOperand(MemAccess); 2923e8d8bef9SDimitry Andric if (!Ptr) 29240b57cec5SDimitry Andric return; 29250b57cec5SDimitry Andric 292606c3fb27SDimitry Andric // Note: getStrideFromPointer is a *profitability* heuristic. We 292706c3fb27SDimitry Andric // could broaden the scope of values returned here - to anything 292806c3fb27SDimitry Andric // which happens to be loop invariant and contributes to the 292906c3fb27SDimitry Andric // computation of an interesting IV - but we chose not to as we 293006c3fb27SDimitry Andric // don't have a cost model here, and broadening the scope exposes 293106c3fb27SDimitry Andric // far too many unprofitable cases. 293206c3fb27SDimitry Andric const SCEV *StrideExpr = getStrideFromPointer(Ptr, PSE->getSE(), TheLoop); 293306c3fb27SDimitry Andric if (!StrideExpr) 29340b57cec5SDimitry Andric return; 29350b57cec5SDimitry Andric 29360b57cec5SDimitry Andric LLVM_DEBUG(dbgs() << "LAA: Found a strided access that is a candidate for " 29370b57cec5SDimitry Andric "versioning:"); 293806c3fb27SDimitry Andric LLVM_DEBUG(dbgs() << " Ptr: " << *Ptr << " Stride: " << *StrideExpr << "\n"); 293906c3fb27SDimitry Andric 294006c3fb27SDimitry Andric if (!SpeculateUnitStride) { 294106c3fb27SDimitry Andric LLVM_DEBUG(dbgs() << " Chose not to due to -laa-speculate-unit-stride\n"); 294206c3fb27SDimitry Andric return; 294306c3fb27SDimitry Andric } 29440b57cec5SDimitry Andric 29450b57cec5SDimitry Andric // Avoid adding the "Stride == 1" predicate when we know that 29460b57cec5SDimitry Andric // Stride >= Trip-Count. Such a predicate will effectively optimize a single 29470b57cec5SDimitry Andric // or zero iteration loop, as Trip-Count <= Stride == 1. 29480b57cec5SDimitry Andric // 29490b57cec5SDimitry Andric // TODO: We are currently not making a very informed decision on when it is 29500b57cec5SDimitry Andric // beneficial to apply stride versioning. It might make more sense that the 29510b57cec5SDimitry Andric // users of this analysis (such as the vectorizer) will trigger it, based on 29520b57cec5SDimitry Andric // their specific cost considerations; For example, in cases where stride 29530b57cec5SDimitry Andric // versioning does not help resolving memory accesses/dependences, the 29540b57cec5SDimitry Andric // vectorizer should evaluate the cost of the runtime test, and the benefit 29550b57cec5SDimitry Andric // of various possible stride specializations, considering the alternatives 29560b57cec5SDimitry Andric // of using gather/scatters (if available). 29570b57cec5SDimitry Andric 29580fca6ea1SDimitry Andric const SCEV *MaxBTC = PSE->getSymbolicMaxBackedgeTakenCount(); 29590b57cec5SDimitry Andric 29600fca6ea1SDimitry Andric // Match the types so we can compare the stride and the MaxBTC. 29610b57cec5SDimitry Andric // The Stride can be positive/negative, so we sign extend Stride; 29620fca6ea1SDimitry Andric // The backedgeTakenCount is non-negative, so we zero extend MaxBTC. 29630fca6ea1SDimitry Andric const DataLayout &DL = TheLoop->getHeader()->getDataLayout(); 296481ad6265SDimitry Andric uint64_t StrideTypeSizeBits = DL.getTypeSizeInBits(StrideExpr->getType()); 29650fca6ea1SDimitry Andric uint64_t BETypeSizeBits = DL.getTypeSizeInBits(MaxBTC->getType()); 29660b57cec5SDimitry Andric const SCEV *CastedStride = StrideExpr; 29670fca6ea1SDimitry Andric const SCEV *CastedBECount = MaxBTC; 29680b57cec5SDimitry Andric ScalarEvolution *SE = PSE->getSE(); 296981ad6265SDimitry Andric if (BETypeSizeBits >= StrideTypeSizeBits) 29700fca6ea1SDimitry Andric CastedStride = SE->getNoopOrSignExtend(StrideExpr, MaxBTC->getType()); 29710b57cec5SDimitry Andric else 29720fca6ea1SDimitry Andric CastedBECount = SE->getZeroExtendExpr(MaxBTC, StrideExpr->getType()); 29730b57cec5SDimitry Andric const SCEV *StrideMinusBETaken = SE->getMinusSCEV(CastedStride, CastedBECount); 29740b57cec5SDimitry Andric // Since TripCount == BackEdgeTakenCount + 1, checking: 29750b57cec5SDimitry Andric // "Stride >= TripCount" is equivalent to checking: 29760fca6ea1SDimitry Andric // Stride - MaxBTC> 0 29770b57cec5SDimitry Andric if (SE->isKnownPositive(StrideMinusBETaken)) { 29780b57cec5SDimitry Andric LLVM_DEBUG( 29790b57cec5SDimitry Andric dbgs() << "LAA: Stride>=TripCount; No point in versioning as the " 29800b57cec5SDimitry Andric "Stride==1 predicate will imply that the loop executes " 29810b57cec5SDimitry Andric "at most once.\n"); 29820b57cec5SDimitry Andric return; 29830b57cec5SDimitry Andric } 298481ad6265SDimitry Andric LLVM_DEBUG(dbgs() << "LAA: Found a strided access that we can version.\n"); 29850b57cec5SDimitry Andric 298606c3fb27SDimitry Andric // Strip back off the integer cast, and check that our result is a 298706c3fb27SDimitry Andric // SCEVUnknown as we expect. 298806c3fb27SDimitry Andric const SCEV *StrideBase = StrideExpr; 298906c3fb27SDimitry Andric if (const auto *C = dyn_cast<SCEVIntegralCastExpr>(StrideBase)) 299006c3fb27SDimitry Andric StrideBase = C->getOperand(); 299106c3fb27SDimitry Andric SymbolicStrides[Ptr] = cast<SCEVUnknown>(StrideBase); 29920b57cec5SDimitry Andric } 29930b57cec5SDimitry Andric 29940b57cec5SDimitry Andric LoopAccessInfo::LoopAccessInfo(Loop *L, ScalarEvolution *SE, 29950fca6ea1SDimitry Andric const TargetTransformInfo *TTI, 29965ffd83dbSDimitry Andric const TargetLibraryInfo *TLI, AAResults *AA, 29970b57cec5SDimitry Andric DominatorTree *DT, LoopInfo *LI) 29988bcb0991SDimitry Andric : PSE(std::make_unique<PredicatedScalarEvolution>(*SE, *L)), 29990fca6ea1SDimitry Andric PtrRtChecking(nullptr), TheLoop(L) { 30000fca6ea1SDimitry Andric unsigned MaxTargetVectorWidthInBits = std::numeric_limits<unsigned>::max(); 30010fca6ea1SDimitry Andric if (TTI) { 30020fca6ea1SDimitry Andric TypeSize FixedWidth = 30030fca6ea1SDimitry Andric TTI->getRegisterBitWidth(TargetTransformInfo::RGK_FixedWidthVector); 30040fca6ea1SDimitry Andric if (FixedWidth.isNonZero()) { 30050fca6ea1SDimitry Andric // Scale the vector width by 2 as rough estimate to also consider 30060fca6ea1SDimitry Andric // interleaving. 30070fca6ea1SDimitry Andric MaxTargetVectorWidthInBits = FixedWidth.getFixedValue() * 2; 30080b57cec5SDimitry Andric } 30090fca6ea1SDimitry Andric 30100fca6ea1SDimitry Andric TypeSize ScalableWidth = 30110fca6ea1SDimitry Andric TTI->getRegisterBitWidth(TargetTransformInfo::RGK_ScalableVector); 30120fca6ea1SDimitry Andric if (ScalableWidth.isNonZero()) 30130fca6ea1SDimitry Andric MaxTargetVectorWidthInBits = std::numeric_limits<unsigned>::max(); 30140fca6ea1SDimitry Andric } 30150fca6ea1SDimitry Andric DepChecker = std::make_unique<MemoryDepChecker>(*PSE, L, SymbolicStrides, 30160fca6ea1SDimitry Andric MaxTargetVectorWidthInBits); 30170fca6ea1SDimitry Andric PtrRtChecking = std::make_unique<RuntimePointerChecking>(*DepChecker, SE); 30180fca6ea1SDimitry Andric if (canAnalyzeLoop()) 30190fca6ea1SDimitry Andric CanVecMem = analyzeLoop(AA, LI, TLI, DT); 302081ad6265SDimitry Andric } 30210b57cec5SDimitry Andric 30220b57cec5SDimitry Andric void LoopAccessInfo::print(raw_ostream &OS, unsigned Depth) const { 30230b57cec5SDimitry Andric if (CanVecMem) { 30240b57cec5SDimitry Andric OS.indent(Depth) << "Memory dependences are safe"; 30255f757f3fSDimitry Andric const MemoryDepChecker &DC = getDepChecker(); 30265f757f3fSDimitry Andric if (!DC.isSafeForAnyVectorWidth()) 30275f757f3fSDimitry Andric OS << " with a maximum safe vector width of " 30285f757f3fSDimitry Andric << DC.getMaxSafeVectorWidthInBits() << " bits"; 30290b57cec5SDimitry Andric if (PtrRtChecking->Need) 30300b57cec5SDimitry Andric OS << " with run-time checks"; 30310b57cec5SDimitry Andric OS << "\n"; 30320b57cec5SDimitry Andric } 30330b57cec5SDimitry Andric 30340b57cec5SDimitry Andric if (HasConvergentOp) 30350b57cec5SDimitry Andric OS.indent(Depth) << "Has convergent operation in loop\n"; 30360b57cec5SDimitry Andric 30370b57cec5SDimitry Andric if (Report) 30380b57cec5SDimitry Andric OS.indent(Depth) << "Report: " << Report->getMsg() << "\n"; 30390b57cec5SDimitry Andric 30400b57cec5SDimitry Andric if (auto *Dependences = DepChecker->getDependences()) { 30410b57cec5SDimitry Andric OS.indent(Depth) << "Dependences:\n"; 3042fcaf7f86SDimitry Andric for (const auto &Dep : *Dependences) { 30430b57cec5SDimitry Andric Dep.print(OS, Depth + 2, DepChecker->getMemoryInstructions()); 30440b57cec5SDimitry Andric OS << "\n"; 30450b57cec5SDimitry Andric } 30460b57cec5SDimitry Andric } else 30470b57cec5SDimitry Andric OS.indent(Depth) << "Too many dependences, not recorded\n"; 30480b57cec5SDimitry Andric 30490b57cec5SDimitry Andric // List the pair of accesses need run-time checks to prove independence. 30500b57cec5SDimitry Andric PtrRtChecking->print(OS, Depth); 30510b57cec5SDimitry Andric OS << "\n"; 30520b57cec5SDimitry Andric 30530fca6ea1SDimitry Andric OS.indent(Depth) 30540fca6ea1SDimitry Andric << "Non vectorizable stores to invariant address were " 30550fca6ea1SDimitry Andric << (HasStoreStoreDependenceInvolvingLoopInvariantAddress || 30560fca6ea1SDimitry Andric HasLoadStoreDependenceInvolvingLoopInvariantAddress 30570fca6ea1SDimitry Andric ? "" 30580fca6ea1SDimitry Andric : "not ") 30590b57cec5SDimitry Andric << "found in loop.\n"; 30600b57cec5SDimitry Andric 30610b57cec5SDimitry Andric OS.indent(Depth) << "SCEV assumptions:\n"; 306281ad6265SDimitry Andric PSE->getPredicate().print(OS, Depth); 30630b57cec5SDimitry Andric 30640b57cec5SDimitry Andric OS << "\n"; 30650b57cec5SDimitry Andric 30660b57cec5SDimitry Andric OS.indent(Depth) << "Expressions re-written:\n"; 30670b57cec5SDimitry Andric PSE->print(OS, Depth); 30680b57cec5SDimitry Andric } 30690b57cec5SDimitry Andric 3070bdd1243dSDimitry Andric const LoopAccessInfo &LoopAccessInfoManager::getInfo(Loop &L) { 30710fca6ea1SDimitry Andric auto [It, Inserted] = LoopAccessInfoMap.insert({&L, nullptr}); 3072bdd1243dSDimitry Andric 30730fca6ea1SDimitry Andric if (Inserted) 30740fca6ea1SDimitry Andric It->second = 30750fca6ea1SDimitry Andric std::make_unique<LoopAccessInfo>(&L, &SE, TTI, TLI, &AA, &DT, &LI); 3076bdd1243dSDimitry Andric 30770fca6ea1SDimitry Andric return *It->second; 30780fca6ea1SDimitry Andric } 30790fca6ea1SDimitry Andric void LoopAccessInfoManager::clear() { 30800fca6ea1SDimitry Andric SmallVector<Loop *> ToRemove; 30810fca6ea1SDimitry Andric // Collect LoopAccessInfo entries that may keep references to IR outside the 30820fca6ea1SDimitry Andric // analyzed loop or SCEVs that may have been modified or invalidated. At the 30830fca6ea1SDimitry Andric // moment, that is loops requiring memory or SCEV runtime checks, as those cache 30840fca6ea1SDimitry Andric // SCEVs, e.g. for pointer expressions. 30850fca6ea1SDimitry Andric for (const auto &[L, LAI] : LoopAccessInfoMap) { 30860fca6ea1SDimitry Andric if (LAI->getRuntimePointerChecking()->getChecks().empty() && 30870fca6ea1SDimitry Andric LAI->getPSE().getPredicate().isAlwaysTrue()) 30880fca6ea1SDimitry Andric continue; 30890fca6ea1SDimitry Andric ToRemove.push_back(L); 30900fca6ea1SDimitry Andric } 30910fca6ea1SDimitry Andric 30920fca6ea1SDimitry Andric for (Loop *L : ToRemove) 30930fca6ea1SDimitry Andric LoopAccessInfoMap.erase(L); 3094bdd1243dSDimitry Andric } 3095bdd1243dSDimitry Andric 309606c3fb27SDimitry Andric bool LoopAccessInfoManager::invalidate( 309706c3fb27SDimitry Andric Function &F, const PreservedAnalyses &PA, 309806c3fb27SDimitry Andric FunctionAnalysisManager::Invalidator &Inv) { 309906c3fb27SDimitry Andric // Check whether our analysis is preserved. 310006c3fb27SDimitry Andric auto PAC = PA.getChecker<LoopAccessAnalysis>(); 310106c3fb27SDimitry Andric if (!PAC.preserved() && !PAC.preservedSet<AllAnalysesOn<Function>>()) 310206c3fb27SDimitry Andric // If not, give up now. 310306c3fb27SDimitry Andric return true; 3104480093f4SDimitry Andric 310506c3fb27SDimitry Andric // Check whether the analyses we depend on became invalid for any reason. 310606c3fb27SDimitry Andric // Skip checking TargetLibraryAnalysis as it is immutable and can't become 310706c3fb27SDimitry Andric // invalid. 310806c3fb27SDimitry Andric return Inv.invalidate<AAManager>(F, PA) || 310906c3fb27SDimitry Andric Inv.invalidate<ScalarEvolutionAnalysis>(F, PA) || 311006c3fb27SDimitry Andric Inv.invalidate<LoopAnalysis>(F, PA) || 311106c3fb27SDimitry Andric Inv.invalidate<DominatorTreeAnalysis>(F, PA); 31120b57cec5SDimitry Andric } 31130b57cec5SDimitry Andric 3114bdd1243dSDimitry Andric LoopAccessInfoManager LoopAccessAnalysis::run(Function &F, 311506c3fb27SDimitry Andric FunctionAnalysisManager &FAM) { 311606c3fb27SDimitry Andric auto &SE = FAM.getResult<ScalarEvolutionAnalysis>(F); 311706c3fb27SDimitry Andric auto &AA = FAM.getResult<AAManager>(F); 311806c3fb27SDimitry Andric auto &DT = FAM.getResult<DominatorTreeAnalysis>(F); 311906c3fb27SDimitry Andric auto &LI = FAM.getResult<LoopAnalysis>(F); 31200fca6ea1SDimitry Andric auto &TTI = FAM.getResult<TargetIRAnalysis>(F); 312106c3fb27SDimitry Andric auto &TLI = FAM.getResult<TargetLibraryAnalysis>(F); 31220fca6ea1SDimitry Andric return LoopAccessInfoManager(SE, AA, DT, LI, &TTI, &TLI); 3123bdd1243dSDimitry Andric } 3124bdd1243dSDimitry Andric 31250b57cec5SDimitry Andric AnalysisKey LoopAccessAnalysis::Key; 3126