10b57cec5SDimitry Andric //===- LoopVectorizationLegality.cpp --------------------------------------===// 20b57cec5SDimitry Andric // 30b57cec5SDimitry Andric // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 40b57cec5SDimitry Andric // See https://llvm.org/LICENSE.txt for license information. 50b57cec5SDimitry Andric // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 60b57cec5SDimitry Andric // 70b57cec5SDimitry Andric //===----------------------------------------------------------------------===// 80b57cec5SDimitry Andric // 90b57cec5SDimitry Andric // This file provides loop vectorization legality analysis. Original code 100b57cec5SDimitry Andric // resided in LoopVectorize.cpp for a long time. 110b57cec5SDimitry Andric // 120b57cec5SDimitry Andric // At this point, it is implemented as a utility class, not as an analysis 130b57cec5SDimitry Andric // pass. It should be easy to create an analysis pass around it if there 140b57cec5SDimitry Andric // is a need (but D45420 needs to happen first). 150b57cec5SDimitry Andric // 16e8d8bef9SDimitry Andric 170b57cec5SDimitry Andric #include "llvm/Transforms/Vectorize/LoopVectorizationLegality.h" 188bcb0991SDimitry Andric #include "llvm/Analysis/Loads.h" 195ffd83dbSDimitry Andric #include "llvm/Analysis/LoopInfo.h" 2081ad6265SDimitry Andric #include "llvm/Analysis/OptimizationRemarkEmitter.h" 21e8d8bef9SDimitry Andric #include "llvm/Analysis/TargetLibraryInfo.h" 2281ad6265SDimitry Andric #include "llvm/Analysis/TargetTransformInfo.h" 238bcb0991SDimitry Andric #include "llvm/Analysis/ValueTracking.h" 240b57cec5SDimitry Andric #include "llvm/Analysis/VectorUtils.h" 250b57cec5SDimitry Andric #include "llvm/IR/IntrinsicInst.h" 265ffd83dbSDimitry Andric #include "llvm/IR/PatternMatch.h" 27e8d8bef9SDimitry Andric #include "llvm/Transforms/Utils/SizeOpts.h" 285ffd83dbSDimitry Andric #include "llvm/Transforms/Vectorize/LoopVectorize.h" 290b57cec5SDimitry Andric 300b57cec5SDimitry Andric using namespace llvm; 315ffd83dbSDimitry Andric using namespace PatternMatch; 320b57cec5SDimitry Andric 330b57cec5SDimitry Andric #define LV_NAME "loop-vectorize" 340b57cec5SDimitry Andric #define DEBUG_TYPE LV_NAME 350b57cec5SDimitry Andric 360b57cec5SDimitry Andric static cl::opt<bool> 370b57cec5SDimitry Andric EnableIfConversion("enable-if-conversion", cl::init(true), cl::Hidden, 380b57cec5SDimitry Andric cl::desc("Enable if-conversion during vectorization.")); 390b57cec5SDimitry Andric 4006c3fb27SDimitry Andric static cl::opt<bool> 4106c3fb27SDimitry Andric AllowStridedPointerIVs("lv-strided-pointer-ivs", cl::init(false), cl::Hidden, 4206c3fb27SDimitry Andric cl::desc("Enable recognition of non-constant strided " 4306c3fb27SDimitry Andric "pointer induction variables.")); 4406c3fb27SDimitry Andric 45fe6060f1SDimitry Andric namespace llvm { 46fe6060f1SDimitry Andric cl::opt<bool> 47fe6060f1SDimitry Andric HintsAllowReordering("hints-allow-reordering", cl::init(true), cl::Hidden, 48fe6060f1SDimitry Andric cl::desc("Allow enabling loop hints to reorder " 49fe6060f1SDimitry Andric "FP operations during vectorization.")); 50fe6060f1SDimitry Andric } 510b57cec5SDimitry Andric 52fe6060f1SDimitry Andric // TODO: Move size-based thresholds out of legality checking, make cost based 53fe6060f1SDimitry Andric // decisions instead of hard thresholds. 540b57cec5SDimitry Andric static cl::opt<unsigned> VectorizeSCEVCheckThreshold( 550b57cec5SDimitry Andric "vectorize-scev-check-threshold", cl::init(16), cl::Hidden, 560b57cec5SDimitry Andric cl::desc("The maximum number of SCEV checks allowed.")); 570b57cec5SDimitry Andric 580b57cec5SDimitry Andric static cl::opt<unsigned> PragmaVectorizeSCEVCheckThreshold( 590b57cec5SDimitry Andric "pragma-vectorize-scev-check-threshold", cl::init(128), cl::Hidden, 600b57cec5SDimitry Andric cl::desc("The maximum number of SCEV checks allowed with a " 610b57cec5SDimitry Andric "vectorize(enable) pragma")); 620b57cec5SDimitry Andric 630eae32dcSDimitry Andric static cl::opt<LoopVectorizeHints::ScalableForceKind> 640eae32dcSDimitry Andric ForceScalableVectorization( 650eae32dcSDimitry Andric "scalable-vectorization", cl::init(LoopVectorizeHints::SK_Unspecified), 66fe6060f1SDimitry Andric cl::Hidden, 67fe6060f1SDimitry Andric cl::desc("Control whether the compiler can use scalable vectors to " 68fe6060f1SDimitry Andric "vectorize a loop"), 69fe6060f1SDimitry Andric cl::values( 70fe6060f1SDimitry Andric clEnumValN(LoopVectorizeHints::SK_FixedWidthOnly, "off", 71fe6060f1SDimitry Andric "Scalable vectorization is disabled."), 720eae32dcSDimitry Andric clEnumValN( 730eae32dcSDimitry Andric LoopVectorizeHints::SK_PreferScalable, "preferred", 740eae32dcSDimitry Andric "Scalable vectorization is available and favored when the " 750eae32dcSDimitry Andric "cost is inconclusive."), 760eae32dcSDimitry Andric clEnumValN( 770eae32dcSDimitry Andric LoopVectorizeHints::SK_PreferScalable, "on", 78fe6060f1SDimitry Andric "Scalable vectorization is available and favored when the " 79fe6060f1SDimitry Andric "cost is inconclusive."))); 80fe6060f1SDimitry Andric 810b57cec5SDimitry Andric /// Maximum vectorization interleave count. 820b57cec5SDimitry Andric static const unsigned MaxInterleaveFactor = 16; 830b57cec5SDimitry Andric 840b57cec5SDimitry Andric namespace llvm { 850b57cec5SDimitry Andric 860b57cec5SDimitry Andric bool LoopVectorizeHints::Hint::validate(unsigned Val) { 870b57cec5SDimitry Andric switch (Kind) { 880b57cec5SDimitry Andric case HK_WIDTH: 890b57cec5SDimitry Andric return isPowerOf2_32(Val) && Val <= VectorizerParams::MaxVectorWidth; 90fe6060f1SDimitry Andric case HK_INTERLEAVE: 910b57cec5SDimitry Andric return isPowerOf2_32(Val) && Val <= MaxInterleaveFactor; 920b57cec5SDimitry Andric case HK_FORCE: 930b57cec5SDimitry Andric return (Val <= 1); 940b57cec5SDimitry Andric case HK_ISVECTORIZED: 958bcb0991SDimitry Andric case HK_PREDICATE: 96e8d8bef9SDimitry Andric case HK_SCALABLE: 970b57cec5SDimitry Andric return (Val == 0 || Val == 1); 980b57cec5SDimitry Andric } 990b57cec5SDimitry Andric return false; 1000b57cec5SDimitry Andric } 1010b57cec5SDimitry Andric 1020b57cec5SDimitry Andric LoopVectorizeHints::LoopVectorizeHints(const Loop *L, 1030b57cec5SDimitry Andric bool InterleaveOnlyWhenForced, 1040eae32dcSDimitry Andric OptimizationRemarkEmitter &ORE, 1050eae32dcSDimitry Andric const TargetTransformInfo *TTI) 1060b57cec5SDimitry Andric : Width("vectorize.width", VectorizerParams::VectorizationFactor, HK_WIDTH), 107fe6060f1SDimitry Andric Interleave("interleave.count", InterleaveOnlyWhenForced, HK_INTERLEAVE), 1080b57cec5SDimitry Andric Force("vectorize.enable", FK_Undefined, HK_FORCE), 1098bcb0991SDimitry Andric IsVectorized("isvectorized", 0, HK_ISVECTORIZED), 110e8d8bef9SDimitry Andric Predicate("vectorize.predicate.enable", FK_Undefined, HK_PREDICATE), 111fe6060f1SDimitry Andric Scalable("vectorize.scalable.enable", SK_Unspecified, HK_SCALABLE), 112fe6060f1SDimitry Andric TheLoop(L), ORE(ORE) { 1130b57cec5SDimitry Andric // Populate values with existing loop metadata. 1140b57cec5SDimitry Andric getHintsFromMetadata(); 1150b57cec5SDimitry Andric 1160b57cec5SDimitry Andric // force-vector-interleave overrides DisableInterleaving. 1170b57cec5SDimitry Andric if (VectorizerParams::isInterleaveForced()) 1180b57cec5SDimitry Andric Interleave.Value = VectorizerParams::VectorizationInterleave; 1190b57cec5SDimitry Andric 1200eae32dcSDimitry Andric // If the metadata doesn't explicitly specify whether to enable scalable 1210eae32dcSDimitry Andric // vectorization, then decide based on the following criteria (increasing 1220eae32dcSDimitry Andric // level of priority): 1230eae32dcSDimitry Andric // - Target default 1240eae32dcSDimitry Andric // - Metadata width 1250eae32dcSDimitry Andric // - Force option (always overrides) 1260eae32dcSDimitry Andric if ((LoopVectorizeHints::ScalableForceKind)Scalable.Value == SK_Unspecified) { 1270eae32dcSDimitry Andric if (TTI) 1280eae32dcSDimitry Andric Scalable.Value = TTI->enableScalableVectorization() ? SK_PreferScalable 1290eae32dcSDimitry Andric : SK_FixedWidthOnly; 1300eae32dcSDimitry Andric 1310eae32dcSDimitry Andric if (Width.Value) 132fe6060f1SDimitry Andric // If the width is set, but the metadata says nothing about the scalable 133fe6060f1SDimitry Andric // property, then assume it concerns only a fixed-width UserVF. 134fe6060f1SDimitry Andric // If width is not set, the flag takes precedence. 1350eae32dcSDimitry Andric Scalable.Value = SK_FixedWidthOnly; 1360eae32dcSDimitry Andric } 1370eae32dcSDimitry Andric 1380eae32dcSDimitry Andric // If the flag is set to force any use of scalable vectors, override the loop 1390eae32dcSDimitry Andric // hints. 1400eae32dcSDimitry Andric if (ForceScalableVectorization.getValue() != 1410eae32dcSDimitry Andric LoopVectorizeHints::SK_Unspecified) 1420eae32dcSDimitry Andric Scalable.Value = ForceScalableVectorization.getValue(); 1430eae32dcSDimitry Andric 1440eae32dcSDimitry Andric // Scalable vectorization is disabled if no preference is specified. 1450eae32dcSDimitry Andric if ((LoopVectorizeHints::ScalableForceKind)Scalable.Value == SK_Unspecified) 146fe6060f1SDimitry Andric Scalable.Value = SK_FixedWidthOnly; 147fe6060f1SDimitry Andric 1480b57cec5SDimitry Andric if (IsVectorized.Value != 1) 1490b57cec5SDimitry Andric // If the vectorization width and interleaving count are both 1 then 1500b57cec5SDimitry Andric // consider the loop to have been already vectorized because there's 1510b57cec5SDimitry Andric // nothing more that we can do. 152e8d8bef9SDimitry Andric IsVectorized.Value = 153fe6060f1SDimitry Andric getWidth() == ElementCount::getFixed(1) && getInterleave() == 1; 154fe6060f1SDimitry Andric LLVM_DEBUG(if (InterleaveOnlyWhenForced && getInterleave() == 1) dbgs() 1550b57cec5SDimitry Andric << "LV: Interleaving disabled by the pass manager\n"); 1560b57cec5SDimitry Andric } 1570b57cec5SDimitry Andric 1580b57cec5SDimitry Andric void LoopVectorizeHints::setAlreadyVectorized() { 1590b57cec5SDimitry Andric LLVMContext &Context = TheLoop->getHeader()->getContext(); 1600b57cec5SDimitry Andric 1610b57cec5SDimitry Andric MDNode *IsVectorizedMD = MDNode::get( 1620b57cec5SDimitry Andric Context, 1630b57cec5SDimitry Andric {MDString::get(Context, "llvm.loop.isvectorized"), 1640b57cec5SDimitry Andric ConstantAsMetadata::get(ConstantInt::get(Context, APInt(32, 1)))}); 1650b57cec5SDimitry Andric MDNode *LoopID = TheLoop->getLoopID(); 1660b57cec5SDimitry Andric MDNode *NewLoopID = 1670b57cec5SDimitry Andric makePostTransformationMetadata(Context, LoopID, 1680b57cec5SDimitry Andric {Twine(Prefix(), "vectorize.").str(), 1690b57cec5SDimitry Andric Twine(Prefix(), "interleave.").str()}, 1700b57cec5SDimitry Andric {IsVectorizedMD}); 1710b57cec5SDimitry Andric TheLoop->setLoopID(NewLoopID); 1720b57cec5SDimitry Andric 1730b57cec5SDimitry Andric // Update internal cache. 1740b57cec5SDimitry Andric IsVectorized.Value = 1; 1750b57cec5SDimitry Andric } 1760b57cec5SDimitry Andric 1770b57cec5SDimitry Andric bool LoopVectorizeHints::allowVectorization( 1780b57cec5SDimitry Andric Function *F, Loop *L, bool VectorizeOnlyWhenForced) const { 1790b57cec5SDimitry Andric if (getForce() == LoopVectorizeHints::FK_Disabled) { 1800b57cec5SDimitry Andric LLVM_DEBUG(dbgs() << "LV: Not vectorizing: #pragma vectorize disable.\n"); 1810b57cec5SDimitry Andric emitRemarkWithHints(); 1820b57cec5SDimitry Andric return false; 1830b57cec5SDimitry Andric } 1840b57cec5SDimitry Andric 1850b57cec5SDimitry Andric if (VectorizeOnlyWhenForced && getForce() != LoopVectorizeHints::FK_Enabled) { 1860b57cec5SDimitry Andric LLVM_DEBUG(dbgs() << "LV: Not vectorizing: No #pragma vectorize enable.\n"); 1870b57cec5SDimitry Andric emitRemarkWithHints(); 1880b57cec5SDimitry Andric return false; 1890b57cec5SDimitry Andric } 1900b57cec5SDimitry Andric 1910b57cec5SDimitry Andric if (getIsVectorized() == 1) { 1920b57cec5SDimitry Andric LLVM_DEBUG(dbgs() << "LV: Not vectorizing: Disabled/already vectorized.\n"); 1930b57cec5SDimitry Andric // FIXME: Add interleave.disable metadata. This will allow 1940b57cec5SDimitry Andric // vectorize.disable to be used without disabling the pass and errors 1950b57cec5SDimitry Andric // to differentiate between disabled vectorization and a width of 1. 1960b57cec5SDimitry Andric ORE.emit([&]() { 1970b57cec5SDimitry Andric return OptimizationRemarkAnalysis(vectorizeAnalysisPassName(), 1980b57cec5SDimitry Andric "AllDisabled", L->getStartLoc(), 1990b57cec5SDimitry Andric L->getHeader()) 2000b57cec5SDimitry Andric << "loop not vectorized: vectorization and interleaving are " 2010b57cec5SDimitry Andric "explicitly disabled, or the loop has already been " 2020b57cec5SDimitry Andric "vectorized"; 2030b57cec5SDimitry Andric }); 2040b57cec5SDimitry Andric return false; 2050b57cec5SDimitry Andric } 2060b57cec5SDimitry Andric 2070b57cec5SDimitry Andric return true; 2080b57cec5SDimitry Andric } 2090b57cec5SDimitry Andric 2100b57cec5SDimitry Andric void LoopVectorizeHints::emitRemarkWithHints() const { 2110b57cec5SDimitry Andric using namespace ore; 2120b57cec5SDimitry Andric 2130b57cec5SDimitry Andric ORE.emit([&]() { 2140b57cec5SDimitry Andric if (Force.Value == LoopVectorizeHints::FK_Disabled) 2150b57cec5SDimitry Andric return OptimizationRemarkMissed(LV_NAME, "MissedExplicitlyDisabled", 2160b57cec5SDimitry Andric TheLoop->getStartLoc(), 2170b57cec5SDimitry Andric TheLoop->getHeader()) 2180b57cec5SDimitry Andric << "loop not vectorized: vectorization is explicitly disabled"; 2190b57cec5SDimitry Andric else { 2200b57cec5SDimitry Andric OptimizationRemarkMissed R(LV_NAME, "MissedDetails", 2210b57cec5SDimitry Andric TheLoop->getStartLoc(), TheLoop->getHeader()); 2220b57cec5SDimitry Andric R << "loop not vectorized"; 2230b57cec5SDimitry Andric if (Force.Value == LoopVectorizeHints::FK_Enabled) { 2240b57cec5SDimitry Andric R << " (Force=" << NV("Force", true); 2250b57cec5SDimitry Andric if (Width.Value != 0) 226e8d8bef9SDimitry Andric R << ", Vector Width=" << NV("VectorWidth", getWidth()); 227fe6060f1SDimitry Andric if (getInterleave() != 0) 228fe6060f1SDimitry Andric R << ", Interleave Count=" << NV("InterleaveCount", getInterleave()); 2290b57cec5SDimitry Andric R << ")"; 2300b57cec5SDimitry Andric } 2310b57cec5SDimitry Andric return R; 2320b57cec5SDimitry Andric } 2330b57cec5SDimitry Andric }); 2340b57cec5SDimitry Andric } 2350b57cec5SDimitry Andric 2360b57cec5SDimitry Andric const char *LoopVectorizeHints::vectorizeAnalysisPassName() const { 237e8d8bef9SDimitry Andric if (getWidth() == ElementCount::getFixed(1)) 2380b57cec5SDimitry Andric return LV_NAME; 2390b57cec5SDimitry Andric if (getForce() == LoopVectorizeHints::FK_Disabled) 2400b57cec5SDimitry Andric return LV_NAME; 241e8d8bef9SDimitry Andric if (getForce() == LoopVectorizeHints::FK_Undefined && getWidth().isZero()) 2420b57cec5SDimitry Andric return LV_NAME; 2430b57cec5SDimitry Andric return OptimizationRemarkAnalysis::AlwaysPrint; 2440b57cec5SDimitry Andric } 2450b57cec5SDimitry Andric 246fe6060f1SDimitry Andric bool LoopVectorizeHints::allowReordering() const { 247fe6060f1SDimitry Andric // Allow the vectorizer to change the order of operations if enabling 248fe6060f1SDimitry Andric // loop hints are provided 249fe6060f1SDimitry Andric ElementCount EC = getWidth(); 250fe6060f1SDimitry Andric return HintsAllowReordering && 251fe6060f1SDimitry Andric (getForce() == LoopVectorizeHints::FK_Enabled || 252fe6060f1SDimitry Andric EC.getKnownMinValue() > 1); 253fe6060f1SDimitry Andric } 254fe6060f1SDimitry Andric 2550b57cec5SDimitry Andric void LoopVectorizeHints::getHintsFromMetadata() { 2560b57cec5SDimitry Andric MDNode *LoopID = TheLoop->getLoopID(); 2570b57cec5SDimitry Andric if (!LoopID) 2580b57cec5SDimitry Andric return; 2590b57cec5SDimitry Andric 2600b57cec5SDimitry Andric // First operand should refer to the loop id itself. 2610b57cec5SDimitry Andric assert(LoopID->getNumOperands() > 0 && "requires at least one operand"); 2620b57cec5SDimitry Andric assert(LoopID->getOperand(0) == LoopID && "invalid loop id"); 2630b57cec5SDimitry Andric 264*0fca6ea1SDimitry Andric for (const MDOperand &MDO : llvm::drop_begin(LoopID->operands())) { 2650b57cec5SDimitry Andric const MDString *S = nullptr; 2660b57cec5SDimitry Andric SmallVector<Metadata *, 4> Args; 2670b57cec5SDimitry Andric 2680b57cec5SDimitry Andric // The expected hint is either a MDString or a MDNode with the first 2690b57cec5SDimitry Andric // operand a MDString. 270*0fca6ea1SDimitry Andric if (const MDNode *MD = dyn_cast<MDNode>(MDO)) { 2710b57cec5SDimitry Andric if (!MD || MD->getNumOperands() == 0) 2720b57cec5SDimitry Andric continue; 2730b57cec5SDimitry Andric S = dyn_cast<MDString>(MD->getOperand(0)); 2740b57cec5SDimitry Andric for (unsigned i = 1, ie = MD->getNumOperands(); i < ie; ++i) 2750b57cec5SDimitry Andric Args.push_back(MD->getOperand(i)); 2760b57cec5SDimitry Andric } else { 277*0fca6ea1SDimitry Andric S = dyn_cast<MDString>(MDO); 2780b57cec5SDimitry Andric assert(Args.size() == 0 && "too many arguments for MDString"); 2790b57cec5SDimitry Andric } 2800b57cec5SDimitry Andric 2810b57cec5SDimitry Andric if (!S) 2820b57cec5SDimitry Andric continue; 2830b57cec5SDimitry Andric 2840b57cec5SDimitry Andric // Check if the hint starts with the loop metadata prefix. 2850b57cec5SDimitry Andric StringRef Name = S->getString(); 2860b57cec5SDimitry Andric if (Args.size() == 1) 2870b57cec5SDimitry Andric setHint(Name, Args[0]); 2880b57cec5SDimitry Andric } 2890b57cec5SDimitry Andric } 2900b57cec5SDimitry Andric 2910b57cec5SDimitry Andric void LoopVectorizeHints::setHint(StringRef Name, Metadata *Arg) { 2925f757f3fSDimitry Andric if (!Name.starts_with(Prefix())) 2930b57cec5SDimitry Andric return; 2940b57cec5SDimitry Andric Name = Name.substr(Prefix().size(), StringRef::npos); 2950b57cec5SDimitry Andric 2960b57cec5SDimitry Andric const ConstantInt *C = mdconst::dyn_extract<ConstantInt>(Arg); 2970b57cec5SDimitry Andric if (!C) 2980b57cec5SDimitry Andric return; 2990b57cec5SDimitry Andric unsigned Val = C->getZExtValue(); 3000b57cec5SDimitry Andric 301e8d8bef9SDimitry Andric Hint *Hints[] = {&Width, &Interleave, &Force, 302e8d8bef9SDimitry Andric &IsVectorized, &Predicate, &Scalable}; 303bdd1243dSDimitry Andric for (auto *H : Hints) { 3040b57cec5SDimitry Andric if (Name == H->Name) { 3050b57cec5SDimitry Andric if (H->validate(Val)) 3060b57cec5SDimitry Andric H->Value = Val; 3070b57cec5SDimitry Andric else 3080b57cec5SDimitry Andric LLVM_DEBUG(dbgs() << "LV: ignoring invalid hint '" << Name << "'\n"); 3090b57cec5SDimitry Andric break; 3100b57cec5SDimitry Andric } 3110b57cec5SDimitry Andric } 3120b57cec5SDimitry Andric } 3130b57cec5SDimitry Andric 3140b57cec5SDimitry Andric // Return true if the inner loop \p Lp is uniform with regard to the outer loop 3150b57cec5SDimitry Andric // \p OuterLp (i.e., if the outer loop is vectorized, all the vector lanes 3160b57cec5SDimitry Andric // executing the inner loop will execute the same iterations). This check is 3170b57cec5SDimitry Andric // very constrained for now but it will be relaxed in the future. \p Lp is 3180b57cec5SDimitry Andric // considered uniform if it meets all the following conditions: 3190b57cec5SDimitry Andric // 1) it has a canonical IV (starting from 0 and with stride 1), 3200b57cec5SDimitry Andric // 2) its latch terminator is a conditional branch and, 3210b57cec5SDimitry Andric // 3) its latch condition is a compare instruction whose operands are the 3220b57cec5SDimitry Andric // canonical IV and an OuterLp invariant. 3230b57cec5SDimitry Andric // This check doesn't take into account the uniformity of other conditions not 3240b57cec5SDimitry Andric // related to the loop latch because they don't affect the loop uniformity. 3250b57cec5SDimitry Andric // 3260b57cec5SDimitry Andric // NOTE: We decided to keep all these checks and its associated documentation 3270b57cec5SDimitry Andric // together so that we can easily have a picture of the current supported loop 3280b57cec5SDimitry Andric // nests. However, some of the current checks don't depend on \p OuterLp and 3290b57cec5SDimitry Andric // would be redundantly executed for each \p Lp if we invoked this function for 3300b57cec5SDimitry Andric // different candidate outer loops. This is not the case for now because we 3310b57cec5SDimitry Andric // don't currently have the infrastructure to evaluate multiple candidate outer 3320b57cec5SDimitry Andric // loops and \p OuterLp will be a fixed parameter while we only support explicit 3330b57cec5SDimitry Andric // outer loop vectorization. It's also very likely that these checks go away 3340b57cec5SDimitry Andric // before introducing the aforementioned infrastructure. However, if this is not 3350b57cec5SDimitry Andric // the case, we should move the \p OuterLp independent checks to a separate 3360b57cec5SDimitry Andric // function that is only executed once for each \p Lp. 3370b57cec5SDimitry Andric static bool isUniformLoop(Loop *Lp, Loop *OuterLp) { 3380b57cec5SDimitry Andric assert(Lp->getLoopLatch() && "Expected loop with a single latch."); 3390b57cec5SDimitry Andric 3400b57cec5SDimitry Andric // If Lp is the outer loop, it's uniform by definition. 3410b57cec5SDimitry Andric if (Lp == OuterLp) 3420b57cec5SDimitry Andric return true; 3430b57cec5SDimitry Andric assert(OuterLp->contains(Lp) && "OuterLp must contain Lp."); 3440b57cec5SDimitry Andric 3450b57cec5SDimitry Andric // 1. 3460b57cec5SDimitry Andric PHINode *IV = Lp->getCanonicalInductionVariable(); 3470b57cec5SDimitry Andric if (!IV) { 3480b57cec5SDimitry Andric LLVM_DEBUG(dbgs() << "LV: Canonical IV not found.\n"); 3490b57cec5SDimitry Andric return false; 3500b57cec5SDimitry Andric } 3510b57cec5SDimitry Andric 3520b57cec5SDimitry Andric // 2. 3530b57cec5SDimitry Andric BasicBlock *Latch = Lp->getLoopLatch(); 3540b57cec5SDimitry Andric auto *LatchBr = dyn_cast<BranchInst>(Latch->getTerminator()); 3550b57cec5SDimitry Andric if (!LatchBr || LatchBr->isUnconditional()) { 3560b57cec5SDimitry Andric LLVM_DEBUG(dbgs() << "LV: Unsupported loop latch branch.\n"); 3570b57cec5SDimitry Andric return false; 3580b57cec5SDimitry Andric } 3590b57cec5SDimitry Andric 3600b57cec5SDimitry Andric // 3. 3610b57cec5SDimitry Andric auto *LatchCmp = dyn_cast<CmpInst>(LatchBr->getCondition()); 3620b57cec5SDimitry Andric if (!LatchCmp) { 3630b57cec5SDimitry Andric LLVM_DEBUG( 3640b57cec5SDimitry Andric dbgs() << "LV: Loop latch condition is not a compare instruction.\n"); 3650b57cec5SDimitry Andric return false; 3660b57cec5SDimitry Andric } 3670b57cec5SDimitry Andric 3680b57cec5SDimitry Andric Value *CondOp0 = LatchCmp->getOperand(0); 3690b57cec5SDimitry Andric Value *CondOp1 = LatchCmp->getOperand(1); 3700b57cec5SDimitry Andric Value *IVUpdate = IV->getIncomingValueForBlock(Latch); 3710b57cec5SDimitry Andric if (!(CondOp0 == IVUpdate && OuterLp->isLoopInvariant(CondOp1)) && 3720b57cec5SDimitry Andric !(CondOp1 == IVUpdate && OuterLp->isLoopInvariant(CondOp0))) { 3730b57cec5SDimitry Andric LLVM_DEBUG(dbgs() << "LV: Loop latch condition is not uniform.\n"); 3740b57cec5SDimitry Andric return false; 3750b57cec5SDimitry Andric } 3760b57cec5SDimitry Andric 3770b57cec5SDimitry Andric return true; 3780b57cec5SDimitry Andric } 3790b57cec5SDimitry Andric 3800b57cec5SDimitry Andric // Return true if \p Lp and all its nested loops are uniform with regard to \p 3810b57cec5SDimitry Andric // OuterLp. 3820b57cec5SDimitry Andric static bool isUniformLoopNest(Loop *Lp, Loop *OuterLp) { 3830b57cec5SDimitry Andric if (!isUniformLoop(Lp, OuterLp)) 3840b57cec5SDimitry Andric return false; 3850b57cec5SDimitry Andric 3860b57cec5SDimitry Andric // Check if nested loops are uniform. 3870b57cec5SDimitry Andric for (Loop *SubLp : *Lp) 3880b57cec5SDimitry Andric if (!isUniformLoopNest(SubLp, OuterLp)) 3890b57cec5SDimitry Andric return false; 3900b57cec5SDimitry Andric 3910b57cec5SDimitry Andric return true; 3920b57cec5SDimitry Andric } 3930b57cec5SDimitry Andric 3940b57cec5SDimitry Andric static Type *convertPointerToIntegerType(const DataLayout &DL, Type *Ty) { 3950b57cec5SDimitry Andric if (Ty->isPointerTy()) 3960b57cec5SDimitry Andric return DL.getIntPtrType(Ty); 3970b57cec5SDimitry Andric 3980b57cec5SDimitry Andric // It is possible that char's or short's overflow when we ask for the loop's 3990b57cec5SDimitry Andric // trip count, work around this by changing the type size. 4000b57cec5SDimitry Andric if (Ty->getScalarSizeInBits() < 32) 4010b57cec5SDimitry Andric return Type::getInt32Ty(Ty->getContext()); 4020b57cec5SDimitry Andric 4030b57cec5SDimitry Andric return Ty; 4040b57cec5SDimitry Andric } 4050b57cec5SDimitry Andric 4060b57cec5SDimitry Andric static Type *getWiderType(const DataLayout &DL, Type *Ty0, Type *Ty1) { 4070b57cec5SDimitry Andric Ty0 = convertPointerToIntegerType(DL, Ty0); 4080b57cec5SDimitry Andric Ty1 = convertPointerToIntegerType(DL, Ty1); 4090b57cec5SDimitry Andric if (Ty0->getScalarSizeInBits() > Ty1->getScalarSizeInBits()) 4100b57cec5SDimitry Andric return Ty0; 4110b57cec5SDimitry Andric return Ty1; 4120b57cec5SDimitry Andric } 4130b57cec5SDimitry Andric 4140b57cec5SDimitry Andric /// Check that the instruction has outside loop users and is not an 4150b57cec5SDimitry Andric /// identified reduction variable. 4160b57cec5SDimitry Andric static bool hasOutsideLoopUser(const Loop *TheLoop, Instruction *Inst, 4170b57cec5SDimitry Andric SmallPtrSetImpl<Value *> &AllowedExit) { 4180b57cec5SDimitry Andric // Reductions, Inductions and non-header phis are allowed to have exit users. All 4190b57cec5SDimitry Andric // other instructions must not have external users. 4200b57cec5SDimitry Andric if (!AllowedExit.count(Inst)) 4210b57cec5SDimitry Andric // Check that all of the users of the loop are inside the BB. 4220b57cec5SDimitry Andric for (User *U : Inst->users()) { 4230b57cec5SDimitry Andric Instruction *UI = cast<Instruction>(U); 4240b57cec5SDimitry Andric // This user may be a reduction exit value. 4250b57cec5SDimitry Andric if (!TheLoop->contains(UI)) { 4260b57cec5SDimitry Andric LLVM_DEBUG(dbgs() << "LV: Found an outside user for : " << *UI << '\n'); 4270b57cec5SDimitry Andric return true; 4280b57cec5SDimitry Andric } 4290b57cec5SDimitry Andric } 4300b57cec5SDimitry Andric return false; 4310b57cec5SDimitry Andric } 4320b57cec5SDimitry Andric 43381ad6265SDimitry Andric /// Returns true if A and B have same pointer operands or same SCEVs addresses 43481ad6265SDimitry Andric static bool storeToSameAddress(ScalarEvolution *SE, StoreInst *A, 43581ad6265SDimitry Andric StoreInst *B) { 43681ad6265SDimitry Andric // Compare store 43781ad6265SDimitry Andric if (A == B) 43881ad6265SDimitry Andric return true; 43981ad6265SDimitry Andric 44081ad6265SDimitry Andric // Otherwise Compare pointers 44181ad6265SDimitry Andric Value *APtr = A->getPointerOperand(); 44281ad6265SDimitry Andric Value *BPtr = B->getPointerOperand(); 44381ad6265SDimitry Andric if (APtr == BPtr) 44481ad6265SDimitry Andric return true; 44581ad6265SDimitry Andric 44681ad6265SDimitry Andric // Otherwise compare address SCEVs 44781ad6265SDimitry Andric if (SE->getSCEV(APtr) == SE->getSCEV(BPtr)) 44881ad6265SDimitry Andric return true; 44981ad6265SDimitry Andric 45081ad6265SDimitry Andric return false; 45181ad6265SDimitry Andric } 45281ad6265SDimitry Andric 453349cc55cSDimitry Andric int LoopVectorizationLegality::isConsecutivePtr(Type *AccessTy, 454349cc55cSDimitry Andric Value *Ptr) const { 45506c3fb27SDimitry Andric // FIXME: Currently, the set of symbolic strides is sometimes queried before 45606c3fb27SDimitry Andric // it's collected. This happens from canVectorizeWithIfConvert, when the 45706c3fb27SDimitry Andric // pointer is checked to reference consecutive elements suitable for a 45806c3fb27SDimitry Andric // masked access. 45906c3fb27SDimitry Andric const auto &Strides = 46006c3fb27SDimitry Andric LAI ? LAI->getSymbolicStrides() : DenseMap<Value *, const SCEV *>(); 4610b57cec5SDimitry Andric 462e8d8bef9SDimitry Andric Function *F = TheLoop->getHeader()->getParent(); 463e8d8bef9SDimitry Andric bool OptForSize = F->hasOptSize() || 464e8d8bef9SDimitry Andric llvm::shouldOptimizeForSize(TheLoop->getHeader(), PSI, BFI, 465e8d8bef9SDimitry Andric PGSOQueryType::IRPass); 466e8d8bef9SDimitry Andric bool CanAddPredicate = !OptForSize; 467349cc55cSDimitry Andric int Stride = getPtrStride(PSE, AccessTy, Ptr, TheLoop, Strides, 468bdd1243dSDimitry Andric CanAddPredicate, false).value_or(0); 4690b57cec5SDimitry Andric if (Stride == 1 || Stride == -1) 4700b57cec5SDimitry Andric return Stride; 4710b57cec5SDimitry Andric return 0; 4720b57cec5SDimitry Andric } 4730b57cec5SDimitry Andric 47406c3fb27SDimitry Andric bool LoopVectorizationLegality::isInvariant(Value *V) const { 47506c3fb27SDimitry Andric return LAI->isInvariant(V); 4760b57cec5SDimitry Andric } 4770b57cec5SDimitry Andric 47806c3fb27SDimitry Andric namespace { 47906c3fb27SDimitry Andric /// A rewriter to build the SCEVs for each of the VF lanes in the expected 48006c3fb27SDimitry Andric /// vectorized loop, which can then be compared to detect their uniformity. This 48106c3fb27SDimitry Andric /// is done by replacing the AddRec SCEVs of the original scalar loop (TheLoop) 48206c3fb27SDimitry Andric /// with new AddRecs where the step is multiplied by StepMultiplier and Offset * 48306c3fb27SDimitry Andric /// Step is added. Also checks if all sub-expressions are analyzable w.r.t. 48406c3fb27SDimitry Andric /// uniformity. 48506c3fb27SDimitry Andric class SCEVAddRecForUniformityRewriter 48606c3fb27SDimitry Andric : public SCEVRewriteVisitor<SCEVAddRecForUniformityRewriter> { 48706c3fb27SDimitry Andric /// Multiplier to be applied to the step of AddRecs in TheLoop. 48806c3fb27SDimitry Andric unsigned StepMultiplier; 48906c3fb27SDimitry Andric 49006c3fb27SDimitry Andric /// Offset to be added to the AddRecs in TheLoop. 49106c3fb27SDimitry Andric unsigned Offset; 49206c3fb27SDimitry Andric 49306c3fb27SDimitry Andric /// Loop for which to rewrite AddRecsFor. 49406c3fb27SDimitry Andric Loop *TheLoop; 49506c3fb27SDimitry Andric 49606c3fb27SDimitry Andric /// Is any sub-expressions not analyzable w.r.t. uniformity? 49706c3fb27SDimitry Andric bool CannotAnalyze = false; 49806c3fb27SDimitry Andric 49906c3fb27SDimitry Andric bool canAnalyze() const { return !CannotAnalyze; } 50006c3fb27SDimitry Andric 50106c3fb27SDimitry Andric public: 50206c3fb27SDimitry Andric SCEVAddRecForUniformityRewriter(ScalarEvolution &SE, unsigned StepMultiplier, 50306c3fb27SDimitry Andric unsigned Offset, Loop *TheLoop) 50406c3fb27SDimitry Andric : SCEVRewriteVisitor(SE), StepMultiplier(StepMultiplier), Offset(Offset), 50506c3fb27SDimitry Andric TheLoop(TheLoop) {} 50606c3fb27SDimitry Andric 50706c3fb27SDimitry Andric const SCEV *visitAddRecExpr(const SCEVAddRecExpr *Expr) { 50806c3fb27SDimitry Andric assert(Expr->getLoop() == TheLoop && 50906c3fb27SDimitry Andric "addrec outside of TheLoop must be invariant and should have been " 51006c3fb27SDimitry Andric "handled earlier"); 51106c3fb27SDimitry Andric // Build a new AddRec by multiplying the step by StepMultiplier and 51206c3fb27SDimitry Andric // incrementing the start by Offset * step. 51306c3fb27SDimitry Andric Type *Ty = Expr->getType(); 51406c3fb27SDimitry Andric auto *Step = Expr->getStepRecurrence(SE); 51506c3fb27SDimitry Andric if (!SE.isLoopInvariant(Step, TheLoop)) { 51606c3fb27SDimitry Andric CannotAnalyze = true; 51706c3fb27SDimitry Andric return Expr; 51806c3fb27SDimitry Andric } 51906c3fb27SDimitry Andric auto *NewStep = SE.getMulExpr(Step, SE.getConstant(Ty, StepMultiplier)); 52006c3fb27SDimitry Andric auto *ScaledOffset = SE.getMulExpr(Step, SE.getConstant(Ty, Offset)); 52106c3fb27SDimitry Andric auto *NewStart = SE.getAddExpr(Expr->getStart(), ScaledOffset); 52206c3fb27SDimitry Andric return SE.getAddRecExpr(NewStart, NewStep, TheLoop, SCEV::FlagAnyWrap); 52306c3fb27SDimitry Andric } 52406c3fb27SDimitry Andric 52506c3fb27SDimitry Andric const SCEV *visit(const SCEV *S) { 52606c3fb27SDimitry Andric if (CannotAnalyze || SE.isLoopInvariant(S, TheLoop)) 52706c3fb27SDimitry Andric return S; 52806c3fb27SDimitry Andric return SCEVRewriteVisitor<SCEVAddRecForUniformityRewriter>::visit(S); 52906c3fb27SDimitry Andric } 53006c3fb27SDimitry Andric 53106c3fb27SDimitry Andric const SCEV *visitUnknown(const SCEVUnknown *S) { 53206c3fb27SDimitry Andric if (SE.isLoopInvariant(S, TheLoop)) 53306c3fb27SDimitry Andric return S; 53406c3fb27SDimitry Andric // The value could vary across iterations. 53506c3fb27SDimitry Andric CannotAnalyze = true; 53606c3fb27SDimitry Andric return S; 53706c3fb27SDimitry Andric } 53806c3fb27SDimitry Andric 53906c3fb27SDimitry Andric const SCEV *visitCouldNotCompute(const SCEVCouldNotCompute *S) { 54006c3fb27SDimitry Andric // Could not analyze the expression. 54106c3fb27SDimitry Andric CannotAnalyze = true; 54206c3fb27SDimitry Andric return S; 54306c3fb27SDimitry Andric } 54406c3fb27SDimitry Andric 54506c3fb27SDimitry Andric static const SCEV *rewrite(const SCEV *S, ScalarEvolution &SE, 54606c3fb27SDimitry Andric unsigned StepMultiplier, unsigned Offset, 54706c3fb27SDimitry Andric Loop *TheLoop) { 54806c3fb27SDimitry Andric /// Bail out if the expression does not contain an UDiv expression. 54906c3fb27SDimitry Andric /// Uniform values which are not loop invariant require operations to strip 55006c3fb27SDimitry Andric /// out the lowest bits. For now just look for UDivs and use it to avoid 55106c3fb27SDimitry Andric /// re-writing UDIV-free expressions for other lanes to limit compile time. 55206c3fb27SDimitry Andric if (!SCEVExprContains(S, 55306c3fb27SDimitry Andric [](const SCEV *S) { return isa<SCEVUDivExpr>(S); })) 55406c3fb27SDimitry Andric return SE.getCouldNotCompute(); 55506c3fb27SDimitry Andric 55606c3fb27SDimitry Andric SCEVAddRecForUniformityRewriter Rewriter(SE, StepMultiplier, Offset, 55706c3fb27SDimitry Andric TheLoop); 55806c3fb27SDimitry Andric const SCEV *Result = Rewriter.visit(S); 55906c3fb27SDimitry Andric 56006c3fb27SDimitry Andric if (Rewriter.canAnalyze()) 56106c3fb27SDimitry Andric return Result; 56206c3fb27SDimitry Andric return SE.getCouldNotCompute(); 56306c3fb27SDimitry Andric } 56406c3fb27SDimitry Andric }; 56506c3fb27SDimitry Andric 56606c3fb27SDimitry Andric } // namespace 56706c3fb27SDimitry Andric 56806c3fb27SDimitry Andric bool LoopVectorizationLegality::isUniform(Value *V, ElementCount VF) const { 56906c3fb27SDimitry Andric if (isInvariant(V)) 57006c3fb27SDimitry Andric return true; 57106c3fb27SDimitry Andric if (VF.isScalable()) 57206c3fb27SDimitry Andric return false; 57306c3fb27SDimitry Andric if (VF.isScalar()) 57406c3fb27SDimitry Andric return true; 57506c3fb27SDimitry Andric 57606c3fb27SDimitry Andric // Since we rely on SCEV for uniformity, if the type is not SCEVable, it is 57706c3fb27SDimitry Andric // never considered uniform. 57806c3fb27SDimitry Andric auto *SE = PSE.getSE(); 57906c3fb27SDimitry Andric if (!SE->isSCEVable(V->getType())) 58006c3fb27SDimitry Andric return false; 58106c3fb27SDimitry Andric const SCEV *S = SE->getSCEV(V); 58206c3fb27SDimitry Andric 58306c3fb27SDimitry Andric // Rewrite AddRecs in TheLoop to step by VF and check if the expression for 58406c3fb27SDimitry Andric // lane 0 matches the expressions for all other lanes. 58506c3fb27SDimitry Andric unsigned FixedVF = VF.getKnownMinValue(); 58606c3fb27SDimitry Andric const SCEV *FirstLaneExpr = 58706c3fb27SDimitry Andric SCEVAddRecForUniformityRewriter::rewrite(S, *SE, FixedVF, 0, TheLoop); 58806c3fb27SDimitry Andric if (isa<SCEVCouldNotCompute>(FirstLaneExpr)) 58906c3fb27SDimitry Andric return false; 59006c3fb27SDimitry Andric 59106c3fb27SDimitry Andric // Make sure the expressions for lanes FixedVF-1..1 match the expression for 59206c3fb27SDimitry Andric // lane 0. We check lanes in reverse order for compile-time, as frequently 59306c3fb27SDimitry Andric // checking the last lane is sufficient to rule out uniformity. 59406c3fb27SDimitry Andric return all_of(reverse(seq<unsigned>(1, FixedVF)), [&](unsigned I) { 59506c3fb27SDimitry Andric const SCEV *IthLaneExpr = 59606c3fb27SDimitry Andric SCEVAddRecForUniformityRewriter::rewrite(S, *SE, FixedVF, I, TheLoop); 59706c3fb27SDimitry Andric return FirstLaneExpr == IthLaneExpr; 59806c3fb27SDimitry Andric }); 59906c3fb27SDimitry Andric } 60006c3fb27SDimitry Andric 60106c3fb27SDimitry Andric bool LoopVectorizationLegality::isUniformMemOp(Instruction &I, 60206c3fb27SDimitry Andric ElementCount VF) const { 603bdd1243dSDimitry Andric Value *Ptr = getLoadStorePointerOperand(&I); 604bdd1243dSDimitry Andric if (!Ptr) 605bdd1243dSDimitry Andric return false; 606bdd1243dSDimitry Andric // Note: There's nothing inherent which prevents predicated loads and 607bdd1243dSDimitry Andric // stores from being uniform. The current lowering simply doesn't handle 608bdd1243dSDimitry Andric // it; in particular, the cost model distinguishes scatter/gather from 609bdd1243dSDimitry Andric // scalar w/predication, and we currently rely on the scalar path. 61006c3fb27SDimitry Andric return isUniform(Ptr, VF) && !blockNeedsPredication(I.getParent()); 611bdd1243dSDimitry Andric } 612bdd1243dSDimitry Andric 6130b57cec5SDimitry Andric bool LoopVectorizationLegality::canVectorizeOuterLoop() { 614e8d8bef9SDimitry Andric assert(!TheLoop->isInnermost() && "We are not vectorizing an outer loop."); 6150b57cec5SDimitry Andric // Store the result and return it at the end instead of exiting early, in case 6160b57cec5SDimitry Andric // allowExtraAnalysis is used to report multiple reasons for not vectorizing. 6170b57cec5SDimitry Andric bool Result = true; 6180b57cec5SDimitry Andric bool DoExtraAnalysis = ORE->allowExtraAnalysis(DEBUG_TYPE); 6190b57cec5SDimitry Andric 6200b57cec5SDimitry Andric for (BasicBlock *BB : TheLoop->blocks()) { 6210b57cec5SDimitry Andric // Check whether the BB terminator is a BranchInst. Any other terminator is 6220b57cec5SDimitry Andric // not supported yet. 6230b57cec5SDimitry Andric auto *Br = dyn_cast<BranchInst>(BB->getTerminator()); 6240b57cec5SDimitry Andric if (!Br) { 6250b57cec5SDimitry Andric reportVectorizationFailure("Unsupported basic block terminator", 6260b57cec5SDimitry Andric "loop control flow is not understood by vectorizer", 6278bcb0991SDimitry Andric "CFGNotUnderstood", ORE, TheLoop); 6280b57cec5SDimitry Andric if (DoExtraAnalysis) 6290b57cec5SDimitry Andric Result = false; 6300b57cec5SDimitry Andric else 6310b57cec5SDimitry Andric return false; 6320b57cec5SDimitry Andric } 6330b57cec5SDimitry Andric 6340b57cec5SDimitry Andric // Check whether the BranchInst is a supported one. Only unconditional 6350b57cec5SDimitry Andric // branches, conditional branches with an outer loop invariant condition or 6360b57cec5SDimitry Andric // backedges are supported. 6370b57cec5SDimitry Andric // FIXME: We skip these checks when VPlan predication is enabled as we 6380b57cec5SDimitry Andric // want to allow divergent branches. This whole check will be removed 6390b57cec5SDimitry Andric // once VPlan predication is on by default. 64081ad6265SDimitry Andric if (Br && Br->isConditional() && 6410b57cec5SDimitry Andric !TheLoop->isLoopInvariant(Br->getCondition()) && 6420b57cec5SDimitry Andric !LI->isLoopHeader(Br->getSuccessor(0)) && 6430b57cec5SDimitry Andric !LI->isLoopHeader(Br->getSuccessor(1))) { 6440b57cec5SDimitry Andric reportVectorizationFailure("Unsupported conditional branch", 6450b57cec5SDimitry Andric "loop control flow is not understood by vectorizer", 6468bcb0991SDimitry Andric "CFGNotUnderstood", ORE, TheLoop); 6470b57cec5SDimitry Andric if (DoExtraAnalysis) 6480b57cec5SDimitry Andric Result = false; 6490b57cec5SDimitry Andric else 6500b57cec5SDimitry Andric return false; 6510b57cec5SDimitry Andric } 6520b57cec5SDimitry Andric } 6530b57cec5SDimitry Andric 6540b57cec5SDimitry Andric // Check whether inner loops are uniform. At this point, we only support 6550b57cec5SDimitry Andric // simple outer loops scenarios with uniform nested loops. 6560b57cec5SDimitry Andric if (!isUniformLoopNest(TheLoop /*loop nest*/, 6570b57cec5SDimitry Andric TheLoop /*context outer loop*/)) { 6580b57cec5SDimitry Andric reportVectorizationFailure("Outer loop contains divergent loops", 6590b57cec5SDimitry Andric "loop control flow is not understood by vectorizer", 6608bcb0991SDimitry Andric "CFGNotUnderstood", ORE, TheLoop); 6610b57cec5SDimitry Andric if (DoExtraAnalysis) 6620b57cec5SDimitry Andric Result = false; 6630b57cec5SDimitry Andric else 6640b57cec5SDimitry Andric return false; 6650b57cec5SDimitry Andric } 6660b57cec5SDimitry Andric 6670b57cec5SDimitry Andric // Check whether we are able to set up outer loop induction. 6680b57cec5SDimitry Andric if (!setupOuterLoopInductions()) { 6690b57cec5SDimitry Andric reportVectorizationFailure("Unsupported outer loop Phi(s)", 6700b57cec5SDimitry Andric "Unsupported outer loop Phi(s)", 6718bcb0991SDimitry Andric "UnsupportedPhi", ORE, TheLoop); 6720b57cec5SDimitry Andric if (DoExtraAnalysis) 6730b57cec5SDimitry Andric Result = false; 6740b57cec5SDimitry Andric else 6750b57cec5SDimitry Andric return false; 6760b57cec5SDimitry Andric } 6770b57cec5SDimitry Andric 6780b57cec5SDimitry Andric return Result; 6790b57cec5SDimitry Andric } 6800b57cec5SDimitry Andric 6810b57cec5SDimitry Andric void LoopVectorizationLegality::addInductionPhi( 6820b57cec5SDimitry Andric PHINode *Phi, const InductionDescriptor &ID, 6830b57cec5SDimitry Andric SmallPtrSetImpl<Value *> &AllowedExit) { 6840b57cec5SDimitry Andric Inductions[Phi] = ID; 6850b57cec5SDimitry Andric 6860b57cec5SDimitry Andric // In case this induction also comes with casts that we know we can ignore 6870b57cec5SDimitry Andric // in the vectorized loop body, record them here. All casts could be recorded 6880b57cec5SDimitry Andric // here for ignoring, but suffices to record only the first (as it is the 6890b57cec5SDimitry Andric // only one that may bw used outside the cast sequence). 6900b57cec5SDimitry Andric const SmallVectorImpl<Instruction *> &Casts = ID.getCastInsts(); 6910b57cec5SDimitry Andric if (!Casts.empty()) 6920b57cec5SDimitry Andric InductionCastsToIgnore.insert(*Casts.begin()); 6930b57cec5SDimitry Andric 6940b57cec5SDimitry Andric Type *PhiTy = Phi->getType(); 695*0fca6ea1SDimitry Andric const DataLayout &DL = Phi->getDataLayout(); 6960b57cec5SDimitry Andric 6970b57cec5SDimitry Andric // Get the widest type. 6980b57cec5SDimitry Andric if (!PhiTy->isFloatingPointTy()) { 6990b57cec5SDimitry Andric if (!WidestIndTy) 7000b57cec5SDimitry Andric WidestIndTy = convertPointerToIntegerType(DL, PhiTy); 7010b57cec5SDimitry Andric else 7020b57cec5SDimitry Andric WidestIndTy = getWiderType(DL, PhiTy, WidestIndTy); 7030b57cec5SDimitry Andric } 7040b57cec5SDimitry Andric 7050b57cec5SDimitry Andric // Int inductions are special because we only allow one IV. 7060b57cec5SDimitry Andric if (ID.getKind() == InductionDescriptor::IK_IntInduction && 7070b57cec5SDimitry Andric ID.getConstIntStepValue() && ID.getConstIntStepValue()->isOne() && 7080b57cec5SDimitry Andric isa<Constant>(ID.getStartValue()) && 7090b57cec5SDimitry Andric cast<Constant>(ID.getStartValue())->isNullValue()) { 7100b57cec5SDimitry Andric 7110b57cec5SDimitry Andric // Use the phi node with the widest type as induction. Use the last 7120b57cec5SDimitry Andric // one if there are multiple (no good reason for doing this other 7130b57cec5SDimitry Andric // than it is expedient). We've checked that it begins at zero and 7140b57cec5SDimitry Andric // steps by one, so this is a canonical induction variable. 7150b57cec5SDimitry Andric if (!PrimaryInduction || PhiTy == WidestIndTy) 7160b57cec5SDimitry Andric PrimaryInduction = Phi; 7170b57cec5SDimitry Andric } 7180b57cec5SDimitry Andric 7190b57cec5SDimitry Andric // Both the PHI node itself, and the "post-increment" value feeding 7200b57cec5SDimitry Andric // back into the PHI node may have external users. 7210b57cec5SDimitry Andric // We can allow those uses, except if the SCEVs we have for them rely 7220b57cec5SDimitry Andric // on predicates that only hold within the loop, since allowing the exit 7230b57cec5SDimitry Andric // currently means re-using this SCEV outside the loop (see PR33706 for more 7240b57cec5SDimitry Andric // details). 72581ad6265SDimitry Andric if (PSE.getPredicate().isAlwaysTrue()) { 7260b57cec5SDimitry Andric AllowedExit.insert(Phi); 7270b57cec5SDimitry Andric AllowedExit.insert(Phi->getIncomingValueForBlock(TheLoop->getLoopLatch())); 7280b57cec5SDimitry Andric } 7290b57cec5SDimitry Andric 7300b57cec5SDimitry Andric LLVM_DEBUG(dbgs() << "LV: Found an induction variable.\n"); 7310b57cec5SDimitry Andric } 7320b57cec5SDimitry Andric 7330b57cec5SDimitry Andric bool LoopVectorizationLegality::setupOuterLoopInductions() { 7340b57cec5SDimitry Andric BasicBlock *Header = TheLoop->getHeader(); 7350b57cec5SDimitry Andric 7360b57cec5SDimitry Andric // Returns true if a given Phi is a supported induction. 7370b57cec5SDimitry Andric auto isSupportedPhi = [&](PHINode &Phi) -> bool { 7380b57cec5SDimitry Andric InductionDescriptor ID; 7390b57cec5SDimitry Andric if (InductionDescriptor::isInductionPHI(&Phi, TheLoop, PSE, ID) && 7400b57cec5SDimitry Andric ID.getKind() == InductionDescriptor::IK_IntInduction) { 7410b57cec5SDimitry Andric addInductionPhi(&Phi, ID, AllowedExit); 7420b57cec5SDimitry Andric return true; 7430b57cec5SDimitry Andric } else { 7440b57cec5SDimitry Andric // Bail out for any Phi in the outer loop header that is not a supported 7450b57cec5SDimitry Andric // induction. 7460b57cec5SDimitry Andric LLVM_DEBUG( 7470b57cec5SDimitry Andric dbgs() 7480b57cec5SDimitry Andric << "LV: Found unsupported PHI for outer loop vectorization.\n"); 7490b57cec5SDimitry Andric return false; 7500b57cec5SDimitry Andric } 7510b57cec5SDimitry Andric }; 7520b57cec5SDimitry Andric 7530b57cec5SDimitry Andric if (llvm::all_of(Header->phis(), isSupportedPhi)) 7540b57cec5SDimitry Andric return true; 7550b57cec5SDimitry Andric else 7560b57cec5SDimitry Andric return false; 7570b57cec5SDimitry Andric } 7580b57cec5SDimitry Andric 7595ffd83dbSDimitry Andric /// Checks if a function is scalarizable according to the TLI, in 7605ffd83dbSDimitry Andric /// the sense that it should be vectorized and then expanded in 7615ffd83dbSDimitry Andric /// multiple scalar calls. This is represented in the 7625ffd83dbSDimitry Andric /// TLI via mappings that do not specify a vector name, as in the 7635ffd83dbSDimitry Andric /// following example: 7645ffd83dbSDimitry Andric /// 7655ffd83dbSDimitry Andric /// const VecDesc VecIntrinsics[] = { 7665ffd83dbSDimitry Andric /// {"llvm.phx.abs.i32", "", 4} 7675ffd83dbSDimitry Andric /// }; 7685ffd83dbSDimitry Andric static bool isTLIScalarize(const TargetLibraryInfo &TLI, const CallInst &CI) { 7695ffd83dbSDimitry Andric const StringRef ScalarName = CI.getCalledFunction()->getName(); 7705ffd83dbSDimitry Andric bool Scalarize = TLI.isFunctionVectorizable(ScalarName); 7715ffd83dbSDimitry Andric // Check that all known VFs are not associated to a vector 7725ffd83dbSDimitry Andric // function, i.e. the vector name is emty. 773fe6060f1SDimitry Andric if (Scalarize) { 774fe6060f1SDimitry Andric ElementCount WidestFixedVF, WidestScalableVF; 775fe6060f1SDimitry Andric TLI.getWidestVF(ScalarName, WidestFixedVF, WidestScalableVF); 776fe6060f1SDimitry Andric for (ElementCount VF = ElementCount::getFixed(2); 777fe6060f1SDimitry Andric ElementCount::isKnownLE(VF, WidestFixedVF); VF *= 2) 7785ffd83dbSDimitry Andric Scalarize &= !TLI.isFunctionVectorizable(ScalarName, VF); 779fe6060f1SDimitry Andric for (ElementCount VF = ElementCount::getScalable(1); 780fe6060f1SDimitry Andric ElementCount::isKnownLE(VF, WidestScalableVF); VF *= 2) 781fe6060f1SDimitry Andric Scalarize &= !TLI.isFunctionVectorizable(ScalarName, VF); 782fe6060f1SDimitry Andric assert((WidestScalableVF.isZero() || !Scalarize) && 783fe6060f1SDimitry Andric "Caller may decide to scalarize a variant using a scalable VF"); 7845ffd83dbSDimitry Andric } 7855ffd83dbSDimitry Andric return Scalarize; 7865ffd83dbSDimitry Andric } 7875ffd83dbSDimitry Andric 7880b57cec5SDimitry Andric bool LoopVectorizationLegality::canVectorizeInstrs() { 7890b57cec5SDimitry Andric BasicBlock *Header = TheLoop->getHeader(); 7900b57cec5SDimitry Andric 7910b57cec5SDimitry Andric // For each block in the loop. 7920b57cec5SDimitry Andric for (BasicBlock *BB : TheLoop->blocks()) { 7930b57cec5SDimitry Andric // Scan the instructions in the block and look for hazards. 7940b57cec5SDimitry Andric for (Instruction &I : *BB) { 7950b57cec5SDimitry Andric if (auto *Phi = dyn_cast<PHINode>(&I)) { 7960b57cec5SDimitry Andric Type *PhiTy = Phi->getType(); 7970b57cec5SDimitry Andric // Check that this PHI type is allowed. 7980b57cec5SDimitry Andric if (!PhiTy->isIntegerTy() && !PhiTy->isFloatingPointTy() && 7990b57cec5SDimitry Andric !PhiTy->isPointerTy()) { 8000b57cec5SDimitry Andric reportVectorizationFailure("Found a non-int non-pointer PHI", 8010b57cec5SDimitry Andric "loop control flow is not understood by vectorizer", 8028bcb0991SDimitry Andric "CFGNotUnderstood", ORE, TheLoop); 8030b57cec5SDimitry Andric return false; 8040b57cec5SDimitry Andric } 8050b57cec5SDimitry Andric 8060b57cec5SDimitry Andric // If this PHINode is not in the header block, then we know that we 8070b57cec5SDimitry Andric // can convert it to select during if-conversion. No need to check if 8080b57cec5SDimitry Andric // the PHIs in this block are induction or reduction variables. 8090b57cec5SDimitry Andric if (BB != Header) { 8100b57cec5SDimitry Andric // Non-header phi nodes that have outside uses can be vectorized. Add 8110b57cec5SDimitry Andric // them to the list of allowed exits. 8120b57cec5SDimitry Andric // Unsafe cyclic dependencies with header phis are identified during 813bdd1243dSDimitry Andric // legalization for reduction, induction and fixed order 8140b57cec5SDimitry Andric // recurrences. 8150b57cec5SDimitry Andric AllowedExit.insert(&I); 8160b57cec5SDimitry Andric continue; 8170b57cec5SDimitry Andric } 8180b57cec5SDimitry Andric 8190b57cec5SDimitry Andric // We only allow if-converted PHIs with exactly two incoming values. 8200b57cec5SDimitry Andric if (Phi->getNumIncomingValues() != 2) { 8210b57cec5SDimitry Andric reportVectorizationFailure("Found an invalid PHI", 8220b57cec5SDimitry Andric "loop control flow is not understood by vectorizer", 8238bcb0991SDimitry Andric "CFGNotUnderstood", ORE, TheLoop, Phi); 8240b57cec5SDimitry Andric return false; 8250b57cec5SDimitry Andric } 8260b57cec5SDimitry Andric 8270b57cec5SDimitry Andric RecurrenceDescriptor RedDes; 8280b57cec5SDimitry Andric if (RecurrenceDescriptor::isReductionPHI(Phi, TheLoop, RedDes, DB, AC, 82981ad6265SDimitry Andric DT, PSE.getSE())) { 830fe6060f1SDimitry Andric Requirements->addExactFPMathInst(RedDes.getExactFPMathInst()); 8310b57cec5SDimitry Andric AllowedExit.insert(RedDes.getLoopExitInstr()); 8320b57cec5SDimitry Andric Reductions[Phi] = RedDes; 8330b57cec5SDimitry Andric continue; 8340b57cec5SDimitry Andric } 8350b57cec5SDimitry Andric 83606c3fb27SDimitry Andric // We prevent matching non-constant strided pointer IVS to preserve 83706c3fb27SDimitry Andric // historical vectorizer behavior after a generalization of the 83806c3fb27SDimitry Andric // IVDescriptor code. The intent is to remove this check, but we 83906c3fb27SDimitry Andric // have to fix issues around code quality for such loops first. 84006c3fb27SDimitry Andric auto isDisallowedStridedPointerInduction = 84106c3fb27SDimitry Andric [](const InductionDescriptor &ID) { 84206c3fb27SDimitry Andric if (AllowStridedPointerIVs) 84306c3fb27SDimitry Andric return false; 84406c3fb27SDimitry Andric return ID.getKind() == InductionDescriptor::IK_PtrInduction && 84506c3fb27SDimitry Andric ID.getConstIntStepValue() == nullptr; 84606c3fb27SDimitry Andric }; 84706c3fb27SDimitry Andric 848bdd1243dSDimitry Andric // TODO: Instead of recording the AllowedExit, it would be good to 849bdd1243dSDimitry Andric // record the complementary set: NotAllowedExit. These include (but may 850bdd1243dSDimitry Andric // not be limited to): 8510b57cec5SDimitry Andric // 1. Reduction phis as they represent the one-before-last value, which 8520b57cec5SDimitry Andric // is not available when vectorized 8530b57cec5SDimitry Andric // 2. Induction phis and increment when SCEV predicates cannot be used 8540b57cec5SDimitry Andric // outside the loop - see addInductionPhi 8550b57cec5SDimitry Andric // 3. Non-Phis with outside uses when SCEV predicates cannot be used 8560b57cec5SDimitry Andric // outside the loop - see call to hasOutsideLoopUser in the non-phi 8570b57cec5SDimitry Andric // handling below 858bdd1243dSDimitry Andric // 4. FixedOrderRecurrence phis that can possibly be handled by 8590b57cec5SDimitry Andric // extraction. 8600b57cec5SDimitry Andric // By recording these, we can then reason about ways to vectorize each 8610b57cec5SDimitry Andric // of these NotAllowedExit. 8620b57cec5SDimitry Andric InductionDescriptor ID; 86306c3fb27SDimitry Andric if (InductionDescriptor::isInductionPHI(Phi, TheLoop, PSE, ID) && 86406c3fb27SDimitry Andric !isDisallowedStridedPointerInduction(ID)) { 8650b57cec5SDimitry Andric addInductionPhi(Phi, ID, AllowedExit); 866fe6060f1SDimitry Andric Requirements->addExactFPMathInst(ID.getExactFPMathInst()); 8670b57cec5SDimitry Andric continue; 8680b57cec5SDimitry Andric } 8690b57cec5SDimitry Andric 87006c3fb27SDimitry Andric if (RecurrenceDescriptor::isFixedOrderRecurrence(Phi, TheLoop, DT)) { 8715ffd83dbSDimitry Andric AllowedExit.insert(Phi); 872bdd1243dSDimitry Andric FixedOrderRecurrences.insert(Phi); 8730b57cec5SDimitry Andric continue; 8740b57cec5SDimitry Andric } 8750b57cec5SDimitry Andric 8760b57cec5SDimitry Andric // As a last resort, coerce the PHI to a AddRec expression 8770b57cec5SDimitry Andric // and re-try classifying it a an induction PHI. 87806c3fb27SDimitry Andric if (InductionDescriptor::isInductionPHI(Phi, TheLoop, PSE, ID, true) && 87906c3fb27SDimitry Andric !isDisallowedStridedPointerInduction(ID)) { 8800b57cec5SDimitry Andric addInductionPhi(Phi, ID, AllowedExit); 8810b57cec5SDimitry Andric continue; 8820b57cec5SDimitry Andric } 8830b57cec5SDimitry Andric 8840b57cec5SDimitry Andric reportVectorizationFailure("Found an unidentified PHI", 8850b57cec5SDimitry Andric "value that could not be identified as " 8860b57cec5SDimitry Andric "reduction is used outside the loop", 8878bcb0991SDimitry Andric "NonReductionValueUsedOutsideLoop", ORE, TheLoop, Phi); 8880b57cec5SDimitry Andric return false; 8890b57cec5SDimitry Andric } // end of PHI handling 8900b57cec5SDimitry Andric 8910b57cec5SDimitry Andric // We handle calls that: 8920b57cec5SDimitry Andric // * Are debug info intrinsics. 8930b57cec5SDimitry Andric // * Have a mapping to an IR intrinsic. 8940b57cec5SDimitry Andric // * Have a vector version available. 8950b57cec5SDimitry Andric auto *CI = dyn_cast<CallInst>(&I); 8965ffd83dbSDimitry Andric 8970b57cec5SDimitry Andric if (CI && !getVectorIntrinsicIDForCall(CI, TLI) && 8980b57cec5SDimitry Andric !isa<DbgInfoIntrinsic>(CI) && 8990b57cec5SDimitry Andric !(CI->getCalledFunction() && TLI && 9005ffd83dbSDimitry Andric (!VFDatabase::getMappings(*CI).empty() || 9015ffd83dbSDimitry Andric isTLIScalarize(*TLI, *CI)))) { 9020b57cec5SDimitry Andric // If the call is a recognized math libary call, it is likely that 9030b57cec5SDimitry Andric // we can vectorize it given loosened floating-point constraints. 9040b57cec5SDimitry Andric LibFunc Func; 9050b57cec5SDimitry Andric bool IsMathLibCall = 9060b57cec5SDimitry Andric TLI && CI->getCalledFunction() && 9070b57cec5SDimitry Andric CI->getType()->isFloatingPointTy() && 9080b57cec5SDimitry Andric TLI->getLibFunc(CI->getCalledFunction()->getName(), Func) && 9090b57cec5SDimitry Andric TLI->hasOptimizedCodeGen(Func); 9100b57cec5SDimitry Andric 9110b57cec5SDimitry Andric if (IsMathLibCall) { 9120b57cec5SDimitry Andric // TODO: Ideally, we should not use clang-specific language here, 9130b57cec5SDimitry Andric // but it's hard to provide meaningful yet generic advice. 9140b57cec5SDimitry Andric // Also, should this be guarded by allowExtraAnalysis() and/or be part 9150b57cec5SDimitry Andric // of the returned info from isFunctionVectorizable()? 9165ffd83dbSDimitry Andric reportVectorizationFailure( 9175ffd83dbSDimitry Andric "Found a non-intrinsic callsite", 9180b57cec5SDimitry Andric "library call cannot be vectorized. " 9190b57cec5SDimitry Andric "Try compiling with -fno-math-errno, -ffast-math, " 9200b57cec5SDimitry Andric "or similar flags", 9218bcb0991SDimitry Andric "CantVectorizeLibcall", ORE, TheLoop, CI); 9220b57cec5SDimitry Andric } else { 9230b57cec5SDimitry Andric reportVectorizationFailure("Found a non-intrinsic callsite", 9240b57cec5SDimitry Andric "call instruction cannot be vectorized", 9258bcb0991SDimitry Andric "CantVectorizeLibcall", ORE, TheLoop, CI); 9260b57cec5SDimitry Andric } 9270b57cec5SDimitry Andric return false; 9280b57cec5SDimitry Andric } 9290b57cec5SDimitry Andric 9300b57cec5SDimitry Andric // Some intrinsics have scalar arguments and should be same in order for 9310b57cec5SDimitry Andric // them to be vectorized (i.e. loop invariant). 9320b57cec5SDimitry Andric if (CI) { 9330b57cec5SDimitry Andric auto *SE = PSE.getSE(); 9340b57cec5SDimitry Andric Intrinsic::ID IntrinID = getVectorIntrinsicIDForCall(CI, TLI); 935349cc55cSDimitry Andric for (unsigned i = 0, e = CI->arg_size(); i != e; ++i) 93681ad6265SDimitry Andric if (isVectorIntrinsicWithScalarOpAtArg(IntrinID, i)) { 9370b57cec5SDimitry Andric if (!SE->isLoopInvariant(PSE.getSCEV(CI->getOperand(i)), TheLoop)) { 9380b57cec5SDimitry Andric reportVectorizationFailure("Found unvectorizable intrinsic", 9390b57cec5SDimitry Andric "intrinsic instruction cannot be vectorized", 9408bcb0991SDimitry Andric "CantVectorizeIntrinsic", ORE, TheLoop, CI); 9410b57cec5SDimitry Andric return false; 9420b57cec5SDimitry Andric } 9430b57cec5SDimitry Andric } 9440b57cec5SDimitry Andric } 9450b57cec5SDimitry Andric 9465f757f3fSDimitry Andric // If we found a vectorized variant of a function, note that so LV can 9475f757f3fSDimitry Andric // make better decisions about maximum VF. 9485f757f3fSDimitry Andric if (CI && !VFDatabase::getMappings(*CI).empty()) 9495f757f3fSDimitry Andric VecCallVariantsFound = true; 9505f757f3fSDimitry Andric 9510b57cec5SDimitry Andric // Check that the instruction return type is vectorizable. 9520b57cec5SDimitry Andric // Also, we can't vectorize extractelement instructions. 9530b57cec5SDimitry Andric if ((!VectorType::isValidElementType(I.getType()) && 9540b57cec5SDimitry Andric !I.getType()->isVoidTy()) || 9550b57cec5SDimitry Andric isa<ExtractElementInst>(I)) { 9560b57cec5SDimitry Andric reportVectorizationFailure("Found unvectorizable type", 9570b57cec5SDimitry Andric "instruction return type cannot be vectorized", 9588bcb0991SDimitry Andric "CantVectorizeInstructionReturnType", ORE, TheLoop, &I); 9590b57cec5SDimitry Andric return false; 9600b57cec5SDimitry Andric } 9610b57cec5SDimitry Andric 9620b57cec5SDimitry Andric // Check that the stored type is vectorizable. 9630b57cec5SDimitry Andric if (auto *ST = dyn_cast<StoreInst>(&I)) { 9640b57cec5SDimitry Andric Type *T = ST->getValueOperand()->getType(); 9650b57cec5SDimitry Andric if (!VectorType::isValidElementType(T)) { 9660b57cec5SDimitry Andric reportVectorizationFailure("Store instruction cannot be vectorized", 9670b57cec5SDimitry Andric "store instruction cannot be vectorized", 9688bcb0991SDimitry Andric "CantVectorizeStore", ORE, TheLoop, ST); 9690b57cec5SDimitry Andric return false; 9700b57cec5SDimitry Andric } 9710b57cec5SDimitry Andric 9720b57cec5SDimitry Andric // For nontemporal stores, check that a nontemporal vector version is 9730b57cec5SDimitry Andric // supported on the target. 9740b57cec5SDimitry Andric if (ST->getMetadata(LLVMContext::MD_nontemporal)) { 9750b57cec5SDimitry Andric // Arbitrarily try a vector of 2 elements. 976e8d8bef9SDimitry Andric auto *VecTy = FixedVectorType::get(T, /*NumElts=*/2); 9770b57cec5SDimitry Andric assert(VecTy && "did not find vectorized version of stored type"); 9785ffd83dbSDimitry Andric if (!TTI->isLegalNTStore(VecTy, ST->getAlign())) { 9790b57cec5SDimitry Andric reportVectorizationFailure( 9800b57cec5SDimitry Andric "nontemporal store instruction cannot be vectorized", 9810b57cec5SDimitry Andric "nontemporal store instruction cannot be vectorized", 9828bcb0991SDimitry Andric "CantVectorizeNontemporalStore", ORE, TheLoop, ST); 9830b57cec5SDimitry Andric return false; 9840b57cec5SDimitry Andric } 9850b57cec5SDimitry Andric } 9860b57cec5SDimitry Andric 9870b57cec5SDimitry Andric } else if (auto *LD = dyn_cast<LoadInst>(&I)) { 9880b57cec5SDimitry Andric if (LD->getMetadata(LLVMContext::MD_nontemporal)) { 9890b57cec5SDimitry Andric // For nontemporal loads, check that a nontemporal vector version is 9900b57cec5SDimitry Andric // supported on the target (arbitrarily try a vector of 2 elements). 991e8d8bef9SDimitry Andric auto *VecTy = FixedVectorType::get(I.getType(), /*NumElts=*/2); 9920b57cec5SDimitry Andric assert(VecTy && "did not find vectorized version of load type"); 9935ffd83dbSDimitry Andric if (!TTI->isLegalNTLoad(VecTy, LD->getAlign())) { 9940b57cec5SDimitry Andric reportVectorizationFailure( 9950b57cec5SDimitry Andric "nontemporal load instruction cannot be vectorized", 9960b57cec5SDimitry Andric "nontemporal load instruction cannot be vectorized", 9978bcb0991SDimitry Andric "CantVectorizeNontemporalLoad", ORE, TheLoop, LD); 9980b57cec5SDimitry Andric return false; 9990b57cec5SDimitry Andric } 10000b57cec5SDimitry Andric } 10010b57cec5SDimitry Andric 10020b57cec5SDimitry Andric // FP instructions can allow unsafe algebra, thus vectorizable by 10030b57cec5SDimitry Andric // non-IEEE-754 compliant SIMD units. 10040b57cec5SDimitry Andric // This applies to floating-point math operations and calls, not memory 10050b57cec5SDimitry Andric // operations, shuffles, or casts, as they don't change precision or 10060b57cec5SDimitry Andric // semantics. 10070b57cec5SDimitry Andric } else if (I.getType()->isFloatingPointTy() && (CI || I.isBinaryOp()) && 10080b57cec5SDimitry Andric !I.isFast()) { 10090b57cec5SDimitry Andric LLVM_DEBUG(dbgs() << "LV: Found FP op with unsafe algebra.\n"); 10100b57cec5SDimitry Andric Hints->setPotentiallyUnsafe(); 10110b57cec5SDimitry Andric } 10120b57cec5SDimitry Andric 10130b57cec5SDimitry Andric // Reduction instructions are allowed to have exit users. 10140b57cec5SDimitry Andric // All other instructions must not have external users. 10150b57cec5SDimitry Andric if (hasOutsideLoopUser(TheLoop, &I, AllowedExit)) { 10160b57cec5SDimitry Andric // We can safely vectorize loops where instructions within the loop are 10170b57cec5SDimitry Andric // used outside the loop only if the SCEV predicates within the loop is 10180b57cec5SDimitry Andric // same as outside the loop. Allowing the exit means reusing the SCEV 10190b57cec5SDimitry Andric // outside the loop. 102081ad6265SDimitry Andric if (PSE.getPredicate().isAlwaysTrue()) { 10210b57cec5SDimitry Andric AllowedExit.insert(&I); 10220b57cec5SDimitry Andric continue; 10230b57cec5SDimitry Andric } 10240b57cec5SDimitry Andric reportVectorizationFailure("Value cannot be used outside the loop", 10250b57cec5SDimitry Andric "value cannot be used outside the loop", 10268bcb0991SDimitry Andric "ValueUsedOutsideLoop", ORE, TheLoop, &I); 10270b57cec5SDimitry Andric return false; 10280b57cec5SDimitry Andric } 10290b57cec5SDimitry Andric } // next instr. 10300b57cec5SDimitry Andric } 10310b57cec5SDimitry Andric 10320b57cec5SDimitry Andric if (!PrimaryInduction) { 10330b57cec5SDimitry Andric if (Inductions.empty()) { 10340b57cec5SDimitry Andric reportVectorizationFailure("Did not find one integer induction var", 10350b57cec5SDimitry Andric "loop induction variable could not be identified", 10368bcb0991SDimitry Andric "NoInductionVariable", ORE, TheLoop); 10370b57cec5SDimitry Andric return false; 10380b57cec5SDimitry Andric } else if (!WidestIndTy) { 10390b57cec5SDimitry Andric reportVectorizationFailure("Did not find one integer induction var", 10400b57cec5SDimitry Andric "integer loop induction variable could not be identified", 10418bcb0991SDimitry Andric "NoIntegerInductionVariable", ORE, TheLoop); 10420b57cec5SDimitry Andric return false; 10430b57cec5SDimitry Andric } else { 10440b57cec5SDimitry Andric LLVM_DEBUG(dbgs() << "LV: Did not find one integer induction var.\n"); 10450b57cec5SDimitry Andric } 10460b57cec5SDimitry Andric } 10470b57cec5SDimitry Andric 10480b57cec5SDimitry Andric // Now we know the widest induction type, check if our found induction 10490b57cec5SDimitry Andric // is the same size. If it's not, unset it here and InnerLoopVectorizer 10500b57cec5SDimitry Andric // will create another. 10510b57cec5SDimitry Andric if (PrimaryInduction && WidestIndTy != PrimaryInduction->getType()) 10520b57cec5SDimitry Andric PrimaryInduction = nullptr; 10530b57cec5SDimitry Andric 10540b57cec5SDimitry Andric return true; 10550b57cec5SDimitry Andric } 10560b57cec5SDimitry Andric 10570b57cec5SDimitry Andric bool LoopVectorizationLegality::canVectorizeMemory() { 1058bdd1243dSDimitry Andric LAI = &LAIs.getInfo(*TheLoop); 10590b57cec5SDimitry Andric const OptimizationRemarkAnalysis *LAR = LAI->getReport(); 10600b57cec5SDimitry Andric if (LAR) { 10610b57cec5SDimitry Andric ORE->emit([&]() { 10620b57cec5SDimitry Andric return OptimizationRemarkAnalysis(Hints->vectorizeAnalysisPassName(), 10630b57cec5SDimitry Andric "loop not vectorized: ", *LAR); 10640b57cec5SDimitry Andric }); 10650b57cec5SDimitry Andric } 1066fe6060f1SDimitry Andric 10670b57cec5SDimitry Andric if (!LAI->canVectorizeMemory()) 10680b57cec5SDimitry Andric return false; 10690b57cec5SDimitry Andric 1070*0fca6ea1SDimitry Andric if (LAI->hasLoadStoreDependenceInvolvingLoopInvariantAddress()) { 1071*0fca6ea1SDimitry Andric reportVectorizationFailure("We don't allow storing to uniform addresses", 1072*0fca6ea1SDimitry Andric "write to a loop invariant address could not " 1073*0fca6ea1SDimitry Andric "be vectorized", 1074*0fca6ea1SDimitry Andric "CantVectorizeStoreToLoopInvariantAddress", ORE, 1075*0fca6ea1SDimitry Andric TheLoop); 1076*0fca6ea1SDimitry Andric return false; 1077*0fca6ea1SDimitry Andric } 1078*0fca6ea1SDimitry Andric 107981ad6265SDimitry Andric // We can vectorize stores to invariant address when final reduction value is 108081ad6265SDimitry Andric // guaranteed to be stored at the end of the loop. Also, if decision to 108181ad6265SDimitry Andric // vectorize loop is made, runtime checks are added so as to make sure that 108281ad6265SDimitry Andric // invariant address won't alias with any other objects. 108381ad6265SDimitry Andric if (!LAI->getStoresToInvariantAddresses().empty()) { 1084bdd1243dSDimitry Andric // For each invariant address, check if last stored value is unconditional 1085bdd1243dSDimitry Andric // and the address is not calculated inside the loop. 108681ad6265SDimitry Andric for (StoreInst *SI : LAI->getStoresToInvariantAddresses()) { 1087bdd1243dSDimitry Andric if (!isInvariantStoreOfReduction(SI)) 1088bdd1243dSDimitry Andric continue; 1089bdd1243dSDimitry Andric 1090bdd1243dSDimitry Andric if (blockNeedsPredication(SI->getParent())) { 109181ad6265SDimitry Andric reportVectorizationFailure( 109281ad6265SDimitry Andric "We don't allow storing to uniform addresses", 109381ad6265SDimitry Andric "write of conditional recurring variant value to a loop " 109481ad6265SDimitry Andric "invariant address could not be vectorized", 10958bcb0991SDimitry Andric "CantVectorizeStoreToLoopInvariantAddress", ORE, TheLoop); 10960b57cec5SDimitry Andric return false; 10970b57cec5SDimitry Andric } 1098bdd1243dSDimitry Andric 1099bdd1243dSDimitry Andric // Invariant address should be defined outside of loop. LICM pass usually 1100bdd1243dSDimitry Andric // makes sure it happens, but in rare cases it does not, we do not want 1101bdd1243dSDimitry Andric // to overcomplicate vectorization to support this case. 1102bdd1243dSDimitry Andric if (Instruction *Ptr = dyn_cast<Instruction>(SI->getPointerOperand())) { 1103bdd1243dSDimitry Andric if (TheLoop->contains(Ptr)) { 1104bdd1243dSDimitry Andric reportVectorizationFailure( 1105bdd1243dSDimitry Andric "Invariant address is calculated inside the loop", 1106bdd1243dSDimitry Andric "write to a loop invariant address could not " 1107bdd1243dSDimitry Andric "be vectorized", 1108bdd1243dSDimitry Andric "CantVectorizeStoreToLoopInvariantAddress", ORE, TheLoop); 1109bdd1243dSDimitry Andric return false; 1110bdd1243dSDimitry Andric } 1111bdd1243dSDimitry Andric } 111281ad6265SDimitry Andric } 111381ad6265SDimitry Andric 1114*0fca6ea1SDimitry Andric if (LAI->hasStoreStoreDependenceInvolvingLoopInvariantAddress()) { 111581ad6265SDimitry Andric // For each invariant address, check its last stored value is the result 111681ad6265SDimitry Andric // of one of our reductions. 111781ad6265SDimitry Andric // 1118*0fca6ea1SDimitry Andric // We do not check if dependence with loads exists because that is already 1119*0fca6ea1SDimitry Andric // checked via hasLoadStoreDependenceInvolvingLoopInvariantAddress. 112081ad6265SDimitry Andric ScalarEvolution *SE = PSE.getSE(); 112181ad6265SDimitry Andric SmallVector<StoreInst *, 4> UnhandledStores; 112281ad6265SDimitry Andric for (StoreInst *SI : LAI->getStoresToInvariantAddresses()) { 112381ad6265SDimitry Andric if (isInvariantStoreOfReduction(SI)) { 112481ad6265SDimitry Andric // Earlier stores to this address are effectively deadcode. 112581ad6265SDimitry Andric // With opaque pointers it is possible for one pointer to be used with 112681ad6265SDimitry Andric // different sizes of stored values: 112781ad6265SDimitry Andric // store i32 0, ptr %x 112881ad6265SDimitry Andric // store i8 0, ptr %x 112981ad6265SDimitry Andric // The latest store doesn't complitely overwrite the first one in the 113081ad6265SDimitry Andric // example. That is why we have to make sure that types of stored 113181ad6265SDimitry Andric // values are same. 113281ad6265SDimitry Andric // TODO: Check that bitwidth of unhandled store is smaller then the 113381ad6265SDimitry Andric // one that overwrites it and add a test. 113481ad6265SDimitry Andric erase_if(UnhandledStores, [SE, SI](StoreInst *I) { 113581ad6265SDimitry Andric return storeToSameAddress(SE, SI, I) && 113681ad6265SDimitry Andric I->getValueOperand()->getType() == 113781ad6265SDimitry Andric SI->getValueOperand()->getType(); 113881ad6265SDimitry Andric }); 113981ad6265SDimitry Andric continue; 114081ad6265SDimitry Andric } 114181ad6265SDimitry Andric UnhandledStores.push_back(SI); 114281ad6265SDimitry Andric } 114381ad6265SDimitry Andric 114481ad6265SDimitry Andric bool IsOK = UnhandledStores.empty(); 114581ad6265SDimitry Andric // TODO: we should also validate against InvariantMemSets. 114681ad6265SDimitry Andric if (!IsOK) { 114781ad6265SDimitry Andric reportVectorizationFailure( 114881ad6265SDimitry Andric "We don't allow storing to uniform addresses", 114981ad6265SDimitry Andric "write to a loop invariant address could not " 115081ad6265SDimitry Andric "be vectorized", 115181ad6265SDimitry Andric "CantVectorizeStoreToLoopInvariantAddress", ORE, TheLoop); 115281ad6265SDimitry Andric return false; 115381ad6265SDimitry Andric } 115481ad6265SDimitry Andric } 115581ad6265SDimitry Andric } 1156fe6060f1SDimitry Andric 115781ad6265SDimitry Andric PSE.addPredicate(LAI->getPSE().getPredicate()); 11580b57cec5SDimitry Andric return true; 11590b57cec5SDimitry Andric } 11600b57cec5SDimitry Andric 1161fe6060f1SDimitry Andric bool LoopVectorizationLegality::canVectorizeFPMath( 1162fe6060f1SDimitry Andric bool EnableStrictReductions) { 1163fe6060f1SDimitry Andric 1164fe6060f1SDimitry Andric // First check if there is any ExactFP math or if we allow reassociations 1165fe6060f1SDimitry Andric if (!Requirements->getExactFPInst() || Hints->allowReordering()) 1166fe6060f1SDimitry Andric return true; 1167fe6060f1SDimitry Andric 1168fe6060f1SDimitry Andric // If the above is false, we have ExactFPMath & do not allow reordering. 1169fe6060f1SDimitry Andric // If the EnableStrictReductions flag is set, first check if we have any 1170fe6060f1SDimitry Andric // Exact FP induction vars, which we cannot vectorize. 1171fe6060f1SDimitry Andric if (!EnableStrictReductions || 1172fe6060f1SDimitry Andric any_of(getInductionVars(), [&](auto &Induction) -> bool { 1173fe6060f1SDimitry Andric InductionDescriptor IndDesc = Induction.second; 1174fe6060f1SDimitry Andric return IndDesc.getExactFPMathInst(); 1175fe6060f1SDimitry Andric })) 1176fe6060f1SDimitry Andric return false; 1177fe6060f1SDimitry Andric 1178fe6060f1SDimitry Andric // We can now only vectorize if all reductions with Exact FP math also 1179fe6060f1SDimitry Andric // have the isOrdered flag set, which indicates that we can move the 1180fe6060f1SDimitry Andric // reduction operations in-loop. 1181fe6060f1SDimitry Andric return (all_of(getReductionVars(), [&](auto &Reduction) -> bool { 1182fe6060f1SDimitry Andric const RecurrenceDescriptor &RdxDesc = Reduction.second; 1183fe6060f1SDimitry Andric return !RdxDesc.hasExactFPMath() || RdxDesc.isOrdered(); 1184fe6060f1SDimitry Andric })); 1185fe6060f1SDimitry Andric } 1186fe6060f1SDimitry Andric 118781ad6265SDimitry Andric bool LoopVectorizationLegality::isInvariantStoreOfReduction(StoreInst *SI) { 118881ad6265SDimitry Andric return any_of(getReductionVars(), [&](auto &Reduction) -> bool { 118981ad6265SDimitry Andric const RecurrenceDescriptor &RdxDesc = Reduction.second; 119081ad6265SDimitry Andric return RdxDesc.IntermediateStore == SI; 119181ad6265SDimitry Andric }); 119281ad6265SDimitry Andric } 119381ad6265SDimitry Andric 119481ad6265SDimitry Andric bool LoopVectorizationLegality::isInvariantAddressOfReduction(Value *V) { 119581ad6265SDimitry Andric return any_of(getReductionVars(), [&](auto &Reduction) -> bool { 119681ad6265SDimitry Andric const RecurrenceDescriptor &RdxDesc = Reduction.second; 119781ad6265SDimitry Andric if (!RdxDesc.IntermediateStore) 119881ad6265SDimitry Andric return false; 119981ad6265SDimitry Andric 120081ad6265SDimitry Andric ScalarEvolution *SE = PSE.getSE(); 120181ad6265SDimitry Andric Value *InvariantAddress = RdxDesc.IntermediateStore->getPointerOperand(); 120281ad6265SDimitry Andric return V == InvariantAddress || 120381ad6265SDimitry Andric SE->getSCEV(V) == SE->getSCEV(InvariantAddress); 120481ad6265SDimitry Andric }); 120581ad6265SDimitry Andric } 120681ad6265SDimitry Andric 12070eae32dcSDimitry Andric bool LoopVectorizationLegality::isInductionPhi(const Value *V) const { 12080b57cec5SDimitry Andric Value *In0 = const_cast<Value *>(V); 12090b57cec5SDimitry Andric PHINode *PN = dyn_cast_or_null<PHINode>(In0); 12100b57cec5SDimitry Andric if (!PN) 12110b57cec5SDimitry Andric return false; 12120b57cec5SDimitry Andric 12130b57cec5SDimitry Andric return Inductions.count(PN); 12140b57cec5SDimitry Andric } 12150b57cec5SDimitry Andric 12160eae32dcSDimitry Andric const InductionDescriptor * 12170eae32dcSDimitry Andric LoopVectorizationLegality::getIntOrFpInductionDescriptor(PHINode *Phi) const { 12180eae32dcSDimitry Andric if (!isInductionPhi(Phi)) 12190eae32dcSDimitry Andric return nullptr; 12200eae32dcSDimitry Andric auto &ID = getInductionVars().find(Phi)->second; 12210eae32dcSDimitry Andric if (ID.getKind() == InductionDescriptor::IK_IntInduction || 12220eae32dcSDimitry Andric ID.getKind() == InductionDescriptor::IK_FpInduction) 12230eae32dcSDimitry Andric return &ID; 12240eae32dcSDimitry Andric return nullptr; 12250eae32dcSDimitry Andric } 12260eae32dcSDimitry Andric 122781ad6265SDimitry Andric const InductionDescriptor * 122881ad6265SDimitry Andric LoopVectorizationLegality::getPointerInductionDescriptor(PHINode *Phi) const { 122981ad6265SDimitry Andric if (!isInductionPhi(Phi)) 123081ad6265SDimitry Andric return nullptr; 123181ad6265SDimitry Andric auto &ID = getInductionVars().find(Phi)->second; 123281ad6265SDimitry Andric if (ID.getKind() == InductionDescriptor::IK_PtrInduction) 123381ad6265SDimitry Andric return &ID; 123481ad6265SDimitry Andric return nullptr; 123581ad6265SDimitry Andric } 123681ad6265SDimitry Andric 12370eae32dcSDimitry Andric bool LoopVectorizationLegality::isCastedInductionVariable( 12380eae32dcSDimitry Andric const Value *V) const { 12390b57cec5SDimitry Andric auto *Inst = dyn_cast<Instruction>(V); 12400b57cec5SDimitry Andric return (Inst && InductionCastsToIgnore.count(Inst)); 12410b57cec5SDimitry Andric } 12420b57cec5SDimitry Andric 12430eae32dcSDimitry Andric bool LoopVectorizationLegality::isInductionVariable(const Value *V) const { 12440b57cec5SDimitry Andric return isInductionPhi(V) || isCastedInductionVariable(V); 12450b57cec5SDimitry Andric } 12460b57cec5SDimitry Andric 1247bdd1243dSDimitry Andric bool LoopVectorizationLegality::isFixedOrderRecurrence( 12480eae32dcSDimitry Andric const PHINode *Phi) const { 1249bdd1243dSDimitry Andric return FixedOrderRecurrences.count(Phi); 12500b57cec5SDimitry Andric } 12510b57cec5SDimitry Andric 1252fe6060f1SDimitry Andric bool LoopVectorizationLegality::blockNeedsPredication(BasicBlock *BB) const { 12530b57cec5SDimitry Andric return LoopAccessInfo::blockNeedsPredication(BB, TheLoop, DT); 12540b57cec5SDimitry Andric } 12550b57cec5SDimitry Andric 12560b57cec5SDimitry Andric bool LoopVectorizationLegality::blockCanBePredicated( 1257e8d8bef9SDimitry Andric BasicBlock *BB, SmallPtrSetImpl<Value *> &SafePtrs, 12585f757f3fSDimitry Andric SmallPtrSetImpl<const Instruction *> &MaskedOp) const { 12590b57cec5SDimitry Andric for (Instruction &I : *BB) { 12605ffd83dbSDimitry Andric // We can predicate blocks with calls to assume, as long as we drop them in 12615ffd83dbSDimitry Andric // case we flatten the CFG via predication. 12625ffd83dbSDimitry Andric if (match(&I, m_Intrinsic<Intrinsic::assume>())) { 12635f757f3fSDimitry Andric MaskedOp.insert(&I); 12645ffd83dbSDimitry Andric continue; 12655ffd83dbSDimitry Andric } 12665ffd83dbSDimitry Andric 1267e8d8bef9SDimitry Andric // Do not let llvm.experimental.noalias.scope.decl block the vectorization. 1268e8d8bef9SDimitry Andric // TODO: there might be cases that it should block the vectorization. Let's 1269e8d8bef9SDimitry Andric // ignore those for now. 1270e8d8bef9SDimitry Andric if (isa<NoAliasScopeDeclInst>(&I)) 1271e8d8bef9SDimitry Andric continue; 1272e8d8bef9SDimitry Andric 127306c3fb27SDimitry Andric // We can allow masked calls if there's at least one vector variant, even 127406c3fb27SDimitry Andric // if we end up scalarizing due to the cost model calculations. 127506c3fb27SDimitry Andric // TODO: Allow other calls if they have appropriate attributes... readonly 127606c3fb27SDimitry Andric // and argmemonly? 127706c3fb27SDimitry Andric if (CallInst *CI = dyn_cast<CallInst>(&I)) 127806c3fb27SDimitry Andric if (VFDatabase::hasMaskedVariant(*CI)) { 127906c3fb27SDimitry Andric MaskedOp.insert(CI); 128006c3fb27SDimitry Andric continue; 128106c3fb27SDimitry Andric } 128206c3fb27SDimitry Andric 1283bdd1243dSDimitry Andric // Loads are handled via masking (or speculated if safe to do so.) 1284bdd1243dSDimitry Andric if (auto *LI = dyn_cast<LoadInst>(&I)) { 1285bdd1243dSDimitry Andric if (!SafePtrs.count(LI->getPointerOperand())) 12860b57cec5SDimitry Andric MaskedOp.insert(LI); 12870b57cec5SDimitry Andric continue; 12880b57cec5SDimitry Andric } 12890b57cec5SDimitry Andric 12900b57cec5SDimitry Andric // Predicated store requires some form of masking: 12910b57cec5SDimitry Andric // 1) masked store HW instruction, 12920b57cec5SDimitry Andric // 2) emulation via load-blend-store (only if safe and legal to do so, 12930b57cec5SDimitry Andric // be aware on the race conditions), or 12940b57cec5SDimitry Andric // 3) element-by-element predicate check and scalar store. 1295bdd1243dSDimitry Andric if (auto *SI = dyn_cast<StoreInst>(&I)) { 12960b57cec5SDimitry Andric MaskedOp.insert(SI); 12970b57cec5SDimitry Andric continue; 12980b57cec5SDimitry Andric } 1299bdd1243dSDimitry Andric 1300bdd1243dSDimitry Andric if (I.mayReadFromMemory() || I.mayWriteToMemory() || I.mayThrow()) 13010b57cec5SDimitry Andric return false; 13020b57cec5SDimitry Andric } 13030b57cec5SDimitry Andric 13040b57cec5SDimitry Andric return true; 13050b57cec5SDimitry Andric } 13060b57cec5SDimitry Andric 13070b57cec5SDimitry Andric bool LoopVectorizationLegality::canVectorizeWithIfConvert() { 13080b57cec5SDimitry Andric if (!EnableIfConversion) { 13090b57cec5SDimitry Andric reportVectorizationFailure("If-conversion is disabled", 13100b57cec5SDimitry Andric "if-conversion is disabled", 13118bcb0991SDimitry Andric "IfConversionDisabled", 13128bcb0991SDimitry Andric ORE, TheLoop); 13130b57cec5SDimitry Andric return false; 13140b57cec5SDimitry Andric } 13150b57cec5SDimitry Andric 13160b57cec5SDimitry Andric assert(TheLoop->getNumBlocks() > 1 && "Single block loops are vectorizable"); 13170b57cec5SDimitry Andric 13188bcb0991SDimitry Andric // A list of pointers which are known to be dereferenceable within scope of 13198bcb0991SDimitry Andric // the loop body for each iteration of the loop which executes. That is, 13208bcb0991SDimitry Andric // the memory pointed to can be dereferenced (with the access size implied by 13218bcb0991SDimitry Andric // the value's type) unconditionally within the loop header without 13228bcb0991SDimitry Andric // introducing a new fault. 13235ffd83dbSDimitry Andric SmallPtrSet<Value *, 8> SafePointers; 13240b57cec5SDimitry Andric 13250b57cec5SDimitry Andric // Collect safe addresses. 13260b57cec5SDimitry Andric for (BasicBlock *BB : TheLoop->blocks()) { 13278bcb0991SDimitry Andric if (!blockNeedsPredication(BB)) { 13280b57cec5SDimitry Andric for (Instruction &I : *BB) 13290b57cec5SDimitry Andric if (auto *Ptr = getLoadStorePointerOperand(&I)) 13305ffd83dbSDimitry Andric SafePointers.insert(Ptr); 13318bcb0991SDimitry Andric continue; 13328bcb0991SDimitry Andric } 13338bcb0991SDimitry Andric 13348bcb0991SDimitry Andric // For a block which requires predication, a address may be safe to access 13358bcb0991SDimitry Andric // in the loop w/o predication if we can prove dereferenceability facts 13368bcb0991SDimitry Andric // sufficient to ensure it'll never fault within the loop. For the moment, 13378bcb0991SDimitry Andric // we restrict this to loads; stores are more complicated due to 13388bcb0991SDimitry Andric // concurrency restrictions. 13398bcb0991SDimitry Andric ScalarEvolution &SE = *PSE.getSE(); 13408bcb0991SDimitry Andric for (Instruction &I : *BB) { 13418bcb0991SDimitry Andric LoadInst *LI = dyn_cast<LoadInst>(&I); 1342e8d8bef9SDimitry Andric if (LI && !LI->getType()->isVectorTy() && !mustSuppressSpeculation(*LI) && 1343bdd1243dSDimitry Andric isDereferenceableAndAlignedInLoop(LI, TheLoop, SE, *DT, AC)) 13445ffd83dbSDimitry Andric SafePointers.insert(LI->getPointerOperand()); 13458bcb0991SDimitry Andric } 13460b57cec5SDimitry Andric } 13470b57cec5SDimitry Andric 13480b57cec5SDimitry Andric // Collect the blocks that need predication. 13490b57cec5SDimitry Andric for (BasicBlock *BB : TheLoop->blocks()) { 13500b57cec5SDimitry Andric // We don't support switch statements inside loops. 13510b57cec5SDimitry Andric if (!isa<BranchInst>(BB->getTerminator())) { 13520b57cec5SDimitry Andric reportVectorizationFailure("Loop contains a switch statement", 13530b57cec5SDimitry Andric "loop contains a switch statement", 13548bcb0991SDimitry Andric "LoopContainsSwitch", ORE, TheLoop, 13558bcb0991SDimitry Andric BB->getTerminator()); 13560b57cec5SDimitry Andric return false; 13570b57cec5SDimitry Andric } 13580b57cec5SDimitry Andric 13590b57cec5SDimitry Andric // We must be able to predicate all blocks that need to be predicated. 13605f757f3fSDimitry Andric if (blockNeedsPredication(BB) && 13615f757f3fSDimitry Andric !blockCanBePredicated(BB, SafePointers, MaskedOp)) { 13620b57cec5SDimitry Andric reportVectorizationFailure( 13630b57cec5SDimitry Andric "Control flow cannot be substituted for a select", 13645f757f3fSDimitry Andric "control flow cannot be substituted for a select", "NoCFGForSelect", 13655f757f3fSDimitry Andric ORE, TheLoop, BB->getTerminator()); 13660b57cec5SDimitry Andric return false; 13670b57cec5SDimitry Andric } 13680b57cec5SDimitry Andric } 13690b57cec5SDimitry Andric 13700b57cec5SDimitry Andric // We can if-convert this loop. 13710b57cec5SDimitry Andric return true; 13720b57cec5SDimitry Andric } 13730b57cec5SDimitry Andric 13740b57cec5SDimitry Andric // Helper function to canVectorizeLoopNestCFG. 13750b57cec5SDimitry Andric bool LoopVectorizationLegality::canVectorizeLoopCFG(Loop *Lp, 13760b57cec5SDimitry Andric bool UseVPlanNativePath) { 1377e8d8bef9SDimitry Andric assert((UseVPlanNativePath || Lp->isInnermost()) && 13780b57cec5SDimitry Andric "VPlan-native path is not enabled."); 13790b57cec5SDimitry Andric 13800b57cec5SDimitry Andric // TODO: ORE should be improved to show more accurate information when an 13810b57cec5SDimitry Andric // outer loop can't be vectorized because a nested loop is not understood or 13820b57cec5SDimitry Andric // legal. Something like: "outer_loop_location: loop not vectorized: 13830b57cec5SDimitry Andric // (inner_loop_location) loop control flow is not understood by vectorizer". 13840b57cec5SDimitry Andric 13850b57cec5SDimitry Andric // Store the result and return it at the end instead of exiting early, in case 13860b57cec5SDimitry Andric // allowExtraAnalysis is used to report multiple reasons for not vectorizing. 13870b57cec5SDimitry Andric bool Result = true; 13880b57cec5SDimitry Andric bool DoExtraAnalysis = ORE->allowExtraAnalysis(DEBUG_TYPE); 13890b57cec5SDimitry Andric 13900b57cec5SDimitry Andric // We must have a loop in canonical form. Loops with indirectbr in them cannot 13910b57cec5SDimitry Andric // be canonicalized. 13920b57cec5SDimitry Andric if (!Lp->getLoopPreheader()) { 13930b57cec5SDimitry Andric reportVectorizationFailure("Loop doesn't have a legal pre-header", 13940b57cec5SDimitry Andric "loop control flow is not understood by vectorizer", 13958bcb0991SDimitry Andric "CFGNotUnderstood", ORE, TheLoop); 13960b57cec5SDimitry Andric if (DoExtraAnalysis) 13970b57cec5SDimitry Andric Result = false; 13980b57cec5SDimitry Andric else 13990b57cec5SDimitry Andric return false; 14000b57cec5SDimitry Andric } 14010b57cec5SDimitry Andric 14020b57cec5SDimitry Andric // We must have a single backedge. 14030b57cec5SDimitry Andric if (Lp->getNumBackEdges() != 1) { 14040b57cec5SDimitry Andric reportVectorizationFailure("The loop must have a single backedge", 14050b57cec5SDimitry Andric "loop control flow is not understood by vectorizer", 14068bcb0991SDimitry Andric "CFGNotUnderstood", ORE, TheLoop); 14070b57cec5SDimitry Andric if (DoExtraAnalysis) 14080b57cec5SDimitry Andric Result = false; 14090b57cec5SDimitry Andric else 14100b57cec5SDimitry Andric return false; 14110b57cec5SDimitry Andric } 14120b57cec5SDimitry Andric 14130b57cec5SDimitry Andric return Result; 14140b57cec5SDimitry Andric } 14150b57cec5SDimitry Andric 14160b57cec5SDimitry Andric bool LoopVectorizationLegality::canVectorizeLoopNestCFG( 14170b57cec5SDimitry Andric Loop *Lp, bool UseVPlanNativePath) { 14180b57cec5SDimitry Andric // Store the result and return it at the end instead of exiting early, in case 14190b57cec5SDimitry Andric // allowExtraAnalysis is used to report multiple reasons for not vectorizing. 14200b57cec5SDimitry Andric bool Result = true; 14210b57cec5SDimitry Andric bool DoExtraAnalysis = ORE->allowExtraAnalysis(DEBUG_TYPE); 14220b57cec5SDimitry Andric if (!canVectorizeLoopCFG(Lp, UseVPlanNativePath)) { 14230b57cec5SDimitry Andric if (DoExtraAnalysis) 14240b57cec5SDimitry Andric Result = false; 14250b57cec5SDimitry Andric else 14260b57cec5SDimitry Andric return false; 14270b57cec5SDimitry Andric } 14280b57cec5SDimitry Andric 14290b57cec5SDimitry Andric // Recursively check whether the loop control flow of nested loops is 14300b57cec5SDimitry Andric // understood. 14310b57cec5SDimitry Andric for (Loop *SubLp : *Lp) 14320b57cec5SDimitry Andric if (!canVectorizeLoopNestCFG(SubLp, UseVPlanNativePath)) { 14330b57cec5SDimitry Andric if (DoExtraAnalysis) 14340b57cec5SDimitry Andric Result = false; 14350b57cec5SDimitry Andric else 14360b57cec5SDimitry Andric return false; 14370b57cec5SDimitry Andric } 14380b57cec5SDimitry Andric 14390b57cec5SDimitry Andric return Result; 14400b57cec5SDimitry Andric } 14410b57cec5SDimitry Andric 14420b57cec5SDimitry Andric bool LoopVectorizationLegality::canVectorize(bool UseVPlanNativePath) { 14430b57cec5SDimitry Andric // Store the result and return it at the end instead of exiting early, in case 14440b57cec5SDimitry Andric // allowExtraAnalysis is used to report multiple reasons for not vectorizing. 14450b57cec5SDimitry Andric bool Result = true; 14460b57cec5SDimitry Andric 14470b57cec5SDimitry Andric bool DoExtraAnalysis = ORE->allowExtraAnalysis(DEBUG_TYPE); 14480b57cec5SDimitry Andric // Check whether the loop-related control flow in the loop nest is expected by 14490b57cec5SDimitry Andric // vectorizer. 14500b57cec5SDimitry Andric if (!canVectorizeLoopNestCFG(TheLoop, UseVPlanNativePath)) { 14510b57cec5SDimitry Andric if (DoExtraAnalysis) 14520b57cec5SDimitry Andric Result = false; 14530b57cec5SDimitry Andric else 14540b57cec5SDimitry Andric return false; 14550b57cec5SDimitry Andric } 14560b57cec5SDimitry Andric 14570b57cec5SDimitry Andric // We need to have a loop header. 14580b57cec5SDimitry Andric LLVM_DEBUG(dbgs() << "LV: Found a loop: " << TheLoop->getHeader()->getName() 14590b57cec5SDimitry Andric << '\n'); 14600b57cec5SDimitry Andric 14610b57cec5SDimitry Andric // Specific checks for outer loops. We skip the remaining legal checks at this 14620b57cec5SDimitry Andric // point because they don't support outer loops. 1463e8d8bef9SDimitry Andric if (!TheLoop->isInnermost()) { 14640b57cec5SDimitry Andric assert(UseVPlanNativePath && "VPlan-native path is not enabled."); 14650b57cec5SDimitry Andric 14660b57cec5SDimitry Andric if (!canVectorizeOuterLoop()) { 14670b57cec5SDimitry Andric reportVectorizationFailure("Unsupported outer loop", 14680b57cec5SDimitry Andric "unsupported outer loop", 14698bcb0991SDimitry Andric "UnsupportedOuterLoop", 14708bcb0991SDimitry Andric ORE, TheLoop); 14710b57cec5SDimitry Andric // TODO: Implement DoExtraAnalysis when subsequent legal checks support 14720b57cec5SDimitry Andric // outer loops. 14730b57cec5SDimitry Andric return false; 14740b57cec5SDimitry Andric } 14750b57cec5SDimitry Andric 14760b57cec5SDimitry Andric LLVM_DEBUG(dbgs() << "LV: We can vectorize this outer loop!\n"); 14770b57cec5SDimitry Andric return Result; 14780b57cec5SDimitry Andric } 14790b57cec5SDimitry Andric 1480e8d8bef9SDimitry Andric assert(TheLoop->isInnermost() && "Inner loop expected."); 14810b57cec5SDimitry Andric // Check if we can if-convert non-single-bb loops. 14820b57cec5SDimitry Andric unsigned NumBlocks = TheLoop->getNumBlocks(); 14830b57cec5SDimitry Andric if (NumBlocks != 1 && !canVectorizeWithIfConvert()) { 14840b57cec5SDimitry Andric LLVM_DEBUG(dbgs() << "LV: Can't if-convert the loop.\n"); 14850b57cec5SDimitry Andric if (DoExtraAnalysis) 14860b57cec5SDimitry Andric Result = false; 14870b57cec5SDimitry Andric else 14880b57cec5SDimitry Andric return false; 14890b57cec5SDimitry Andric } 14900b57cec5SDimitry Andric 14910b57cec5SDimitry Andric // Check if we can vectorize the instructions and CFG in this loop. 14920b57cec5SDimitry Andric if (!canVectorizeInstrs()) { 14930b57cec5SDimitry Andric LLVM_DEBUG(dbgs() << "LV: Can't vectorize the instructions or CFG\n"); 14940b57cec5SDimitry Andric if (DoExtraAnalysis) 14950b57cec5SDimitry Andric Result = false; 14960b57cec5SDimitry Andric else 14970b57cec5SDimitry Andric return false; 14980b57cec5SDimitry Andric } 14990b57cec5SDimitry Andric 15000b57cec5SDimitry Andric // Go over each instruction and look at memory deps. 15010b57cec5SDimitry Andric if (!canVectorizeMemory()) { 15020b57cec5SDimitry Andric LLVM_DEBUG(dbgs() << "LV: Can't vectorize due to memory conflicts\n"); 15030b57cec5SDimitry Andric if (DoExtraAnalysis) 15040b57cec5SDimitry Andric Result = false; 15050b57cec5SDimitry Andric else 15060b57cec5SDimitry Andric return false; 15070b57cec5SDimitry Andric } 15080b57cec5SDimitry Andric 1509*0fca6ea1SDimitry Andric if (isa<SCEVCouldNotCompute>(PSE.getBackedgeTakenCount())) { 1510*0fca6ea1SDimitry Andric reportVectorizationFailure("could not determine number of loop iterations", 1511*0fca6ea1SDimitry Andric "could not determine number of loop iterations", 1512*0fca6ea1SDimitry Andric "CantComputeNumberOfIterations", ORE, TheLoop); 1513*0fca6ea1SDimitry Andric if (DoExtraAnalysis) 1514*0fca6ea1SDimitry Andric Result = false; 1515*0fca6ea1SDimitry Andric else 1516*0fca6ea1SDimitry Andric return false; 1517*0fca6ea1SDimitry Andric } 1518*0fca6ea1SDimitry Andric 15190b57cec5SDimitry Andric LLVM_DEBUG(dbgs() << "LV: We can vectorize this loop" 15200b57cec5SDimitry Andric << (LAI->getRuntimePointerChecking()->Need 15210b57cec5SDimitry Andric ? " (with a runtime bound check)" 15220b57cec5SDimitry Andric : "") 15230b57cec5SDimitry Andric << "!\n"); 15240b57cec5SDimitry Andric 15250b57cec5SDimitry Andric unsigned SCEVThreshold = VectorizeSCEVCheckThreshold; 15260b57cec5SDimitry Andric if (Hints->getForce() == LoopVectorizeHints::FK_Enabled) 15270b57cec5SDimitry Andric SCEVThreshold = PragmaVectorizeSCEVCheckThreshold; 15280b57cec5SDimitry Andric 152981ad6265SDimitry Andric if (PSE.getPredicate().getComplexity() > SCEVThreshold) { 15300b57cec5SDimitry Andric reportVectorizationFailure("Too many SCEV checks needed", 15310b57cec5SDimitry Andric "Too many SCEV assumptions need to be made and checked at runtime", 15328bcb0991SDimitry Andric "TooManySCEVRunTimeChecks", ORE, TheLoop); 15330b57cec5SDimitry Andric if (DoExtraAnalysis) 15340b57cec5SDimitry Andric Result = false; 15350b57cec5SDimitry Andric else 15360b57cec5SDimitry Andric return false; 15370b57cec5SDimitry Andric } 15380b57cec5SDimitry Andric 15390b57cec5SDimitry Andric // Okay! We've done all the tests. If any have failed, return false. Otherwise 15400b57cec5SDimitry Andric // we can vectorize, and at this point we don't have any other mem analysis 15410b57cec5SDimitry Andric // which may limit our maximum vectorization factor, so just return true with 15420b57cec5SDimitry Andric // no restrictions. 15430b57cec5SDimitry Andric return Result; 15440b57cec5SDimitry Andric } 15450b57cec5SDimitry Andric 1546*0fca6ea1SDimitry Andric bool LoopVectorizationLegality::canFoldTailByMasking() const { 15470b57cec5SDimitry Andric 15480b57cec5SDimitry Andric LLVM_DEBUG(dbgs() << "LV: checking if tail can be folded by masking.\n"); 15490b57cec5SDimitry Andric 15508bcb0991SDimitry Andric SmallPtrSet<const Value *, 8> ReductionLiveOuts; 15510b57cec5SDimitry Andric 1552bdd1243dSDimitry Andric for (const auto &Reduction : getReductionVars()) 15538bcb0991SDimitry Andric ReductionLiveOuts.insert(Reduction.second.getLoopExitInstr()); 15548bcb0991SDimitry Andric 15558bcb0991SDimitry Andric // TODO: handle non-reduction outside users when tail is folded by masking. 15560b57cec5SDimitry Andric for (auto *AE : AllowedExit) { 15578bcb0991SDimitry Andric // Check that all users of allowed exit values are inside the loop or 15588bcb0991SDimitry Andric // are the live-out of a reduction. 15598bcb0991SDimitry Andric if (ReductionLiveOuts.count(AE)) 15608bcb0991SDimitry Andric continue; 15610b57cec5SDimitry Andric for (User *U : AE->users()) { 15620b57cec5SDimitry Andric Instruction *UI = cast<Instruction>(U); 15630b57cec5SDimitry Andric if (TheLoop->contains(UI)) 15640b57cec5SDimitry Andric continue; 1565e8d8bef9SDimitry Andric LLVM_DEBUG( 1566e8d8bef9SDimitry Andric dbgs() 1567e8d8bef9SDimitry Andric << "LV: Cannot fold tail by masking, loop has an outside user for " 1568e8d8bef9SDimitry Andric << *UI << "\n"); 15690b57cec5SDimitry Andric return false; 15700b57cec5SDimitry Andric } 15710b57cec5SDimitry Andric } 15720b57cec5SDimitry Andric 1573*0fca6ea1SDimitry Andric for (const auto &Entry : getInductionVars()) { 1574*0fca6ea1SDimitry Andric PHINode *OrigPhi = Entry.first; 1575*0fca6ea1SDimitry Andric for (User *U : OrigPhi->users()) { 1576*0fca6ea1SDimitry Andric auto *UI = cast<Instruction>(U); 1577*0fca6ea1SDimitry Andric if (!TheLoop->contains(UI)) { 1578*0fca6ea1SDimitry Andric LLVM_DEBUG(dbgs() << "LV: Cannot fold tail by masking, loop IV has an " 1579*0fca6ea1SDimitry Andric "outside user for " 1580*0fca6ea1SDimitry Andric << *UI << "\n"); 1581*0fca6ea1SDimitry Andric return false; 1582*0fca6ea1SDimitry Andric } 1583*0fca6ea1SDimitry Andric } 1584*0fca6ea1SDimitry Andric } 1585*0fca6ea1SDimitry Andric 15860b57cec5SDimitry Andric // The list of pointers that we can safely read and write to remains empty. 15870b57cec5SDimitry Andric SmallPtrSet<Value *, 8> SafePointers; 15880b57cec5SDimitry Andric 1589*0fca6ea1SDimitry Andric // Check all blocks for predication, including those that ordinarily do not 1590*0fca6ea1SDimitry Andric // need predication such as the header block. 1591e8d8bef9SDimitry Andric SmallPtrSet<const Instruction *, 8> TmpMaskedOp; 15920b57cec5SDimitry Andric for (BasicBlock *BB : TheLoop->blocks()) { 15935f757f3fSDimitry Andric if (!blockCanBePredicated(BB, SafePointers, TmpMaskedOp)) { 1594*0fca6ea1SDimitry Andric LLVM_DEBUG(dbgs() << "LV: Cannot fold tail by masking.\n"); 15950b57cec5SDimitry Andric return false; 15960b57cec5SDimitry Andric } 15970b57cec5SDimitry Andric } 15980b57cec5SDimitry Andric 15990b57cec5SDimitry Andric LLVM_DEBUG(dbgs() << "LV: can fold tail by masking.\n"); 1600e8d8bef9SDimitry Andric 16010b57cec5SDimitry Andric return true; 16020b57cec5SDimitry Andric } 16030b57cec5SDimitry Andric 1604*0fca6ea1SDimitry Andric void LoopVectorizationLegality::prepareToFoldTailByMasking() { 1605*0fca6ea1SDimitry Andric // The list of pointers that we can safely read and write to remains empty. 1606*0fca6ea1SDimitry Andric SmallPtrSet<Value *, 8> SafePointers; 1607*0fca6ea1SDimitry Andric 1608*0fca6ea1SDimitry Andric // Mark all blocks for predication, including those that ordinarily do not 1609*0fca6ea1SDimitry Andric // need predication such as the header block. 1610*0fca6ea1SDimitry Andric for (BasicBlock *BB : TheLoop->blocks()) { 1611*0fca6ea1SDimitry Andric [[maybe_unused]] bool R = blockCanBePredicated(BB, SafePointers, MaskedOp); 1612*0fca6ea1SDimitry Andric assert(R && "Must be able to predicate block when tail-folding."); 1613*0fca6ea1SDimitry Andric } 1614*0fca6ea1SDimitry Andric } 1615*0fca6ea1SDimitry Andric 16160b57cec5SDimitry Andric } // namespace llvm 1617