xref: /freebsd-src/contrib/llvm-project/llvm/lib/Transforms/Vectorize/LoopVectorizationLegality.cpp (revision 0fca6ea1d4eea4c934cfff25ac9ee8ad6fe95583)
10b57cec5SDimitry Andric //===- LoopVectorizationLegality.cpp --------------------------------------===//
20b57cec5SDimitry Andric //
30b57cec5SDimitry Andric // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
40b57cec5SDimitry Andric // See https://llvm.org/LICENSE.txt for license information.
50b57cec5SDimitry Andric // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
60b57cec5SDimitry Andric //
70b57cec5SDimitry Andric //===----------------------------------------------------------------------===//
80b57cec5SDimitry Andric //
90b57cec5SDimitry Andric // This file provides loop vectorization legality analysis. Original code
100b57cec5SDimitry Andric // resided in LoopVectorize.cpp for a long time.
110b57cec5SDimitry Andric //
120b57cec5SDimitry Andric // At this point, it is implemented as a utility class, not as an analysis
130b57cec5SDimitry Andric // pass. It should be easy to create an analysis pass around it if there
140b57cec5SDimitry Andric // is a need (but D45420 needs to happen first).
150b57cec5SDimitry Andric //
16e8d8bef9SDimitry Andric 
170b57cec5SDimitry Andric #include "llvm/Transforms/Vectorize/LoopVectorizationLegality.h"
188bcb0991SDimitry Andric #include "llvm/Analysis/Loads.h"
195ffd83dbSDimitry Andric #include "llvm/Analysis/LoopInfo.h"
2081ad6265SDimitry Andric #include "llvm/Analysis/OptimizationRemarkEmitter.h"
21e8d8bef9SDimitry Andric #include "llvm/Analysis/TargetLibraryInfo.h"
2281ad6265SDimitry Andric #include "llvm/Analysis/TargetTransformInfo.h"
238bcb0991SDimitry Andric #include "llvm/Analysis/ValueTracking.h"
240b57cec5SDimitry Andric #include "llvm/Analysis/VectorUtils.h"
250b57cec5SDimitry Andric #include "llvm/IR/IntrinsicInst.h"
265ffd83dbSDimitry Andric #include "llvm/IR/PatternMatch.h"
27e8d8bef9SDimitry Andric #include "llvm/Transforms/Utils/SizeOpts.h"
285ffd83dbSDimitry Andric #include "llvm/Transforms/Vectorize/LoopVectorize.h"
290b57cec5SDimitry Andric 
300b57cec5SDimitry Andric using namespace llvm;
315ffd83dbSDimitry Andric using namespace PatternMatch;
320b57cec5SDimitry Andric 
330b57cec5SDimitry Andric #define LV_NAME "loop-vectorize"
340b57cec5SDimitry Andric #define DEBUG_TYPE LV_NAME
350b57cec5SDimitry Andric 
360b57cec5SDimitry Andric static cl::opt<bool>
370b57cec5SDimitry Andric     EnableIfConversion("enable-if-conversion", cl::init(true), cl::Hidden,
380b57cec5SDimitry Andric                        cl::desc("Enable if-conversion during vectorization."));
390b57cec5SDimitry Andric 
4006c3fb27SDimitry Andric static cl::opt<bool>
4106c3fb27SDimitry Andric AllowStridedPointerIVs("lv-strided-pointer-ivs", cl::init(false), cl::Hidden,
4206c3fb27SDimitry Andric                        cl::desc("Enable recognition of non-constant strided "
4306c3fb27SDimitry Andric                                 "pointer induction variables."));
4406c3fb27SDimitry Andric 
45fe6060f1SDimitry Andric namespace llvm {
46fe6060f1SDimitry Andric cl::opt<bool>
47fe6060f1SDimitry Andric     HintsAllowReordering("hints-allow-reordering", cl::init(true), cl::Hidden,
48fe6060f1SDimitry Andric                          cl::desc("Allow enabling loop hints to reorder "
49fe6060f1SDimitry Andric                                   "FP operations during vectorization."));
50fe6060f1SDimitry Andric }
510b57cec5SDimitry Andric 
52fe6060f1SDimitry Andric // TODO: Move size-based thresholds out of legality checking, make cost based
53fe6060f1SDimitry Andric // decisions instead of hard thresholds.
540b57cec5SDimitry Andric static cl::opt<unsigned> VectorizeSCEVCheckThreshold(
550b57cec5SDimitry Andric     "vectorize-scev-check-threshold", cl::init(16), cl::Hidden,
560b57cec5SDimitry Andric     cl::desc("The maximum number of SCEV checks allowed."));
570b57cec5SDimitry Andric 
580b57cec5SDimitry Andric static cl::opt<unsigned> PragmaVectorizeSCEVCheckThreshold(
590b57cec5SDimitry Andric     "pragma-vectorize-scev-check-threshold", cl::init(128), cl::Hidden,
600b57cec5SDimitry Andric     cl::desc("The maximum number of SCEV checks allowed with a "
610b57cec5SDimitry Andric              "vectorize(enable) pragma"));
620b57cec5SDimitry Andric 
630eae32dcSDimitry Andric static cl::opt<LoopVectorizeHints::ScalableForceKind>
640eae32dcSDimitry Andric     ForceScalableVectorization(
650eae32dcSDimitry Andric         "scalable-vectorization", cl::init(LoopVectorizeHints::SK_Unspecified),
66fe6060f1SDimitry Andric         cl::Hidden,
67fe6060f1SDimitry Andric         cl::desc("Control whether the compiler can use scalable vectors to "
68fe6060f1SDimitry Andric                  "vectorize a loop"),
69fe6060f1SDimitry Andric         cl::values(
70fe6060f1SDimitry Andric             clEnumValN(LoopVectorizeHints::SK_FixedWidthOnly, "off",
71fe6060f1SDimitry Andric                        "Scalable vectorization is disabled."),
720eae32dcSDimitry Andric             clEnumValN(
730eae32dcSDimitry Andric                 LoopVectorizeHints::SK_PreferScalable, "preferred",
740eae32dcSDimitry Andric                 "Scalable vectorization is available and favored when the "
750eae32dcSDimitry Andric                 "cost is inconclusive."),
760eae32dcSDimitry Andric             clEnumValN(
770eae32dcSDimitry Andric                 LoopVectorizeHints::SK_PreferScalable, "on",
78fe6060f1SDimitry Andric                 "Scalable vectorization is available and favored when the "
79fe6060f1SDimitry Andric                 "cost is inconclusive.")));
80fe6060f1SDimitry Andric 
810b57cec5SDimitry Andric /// Maximum vectorization interleave count.
820b57cec5SDimitry Andric static const unsigned MaxInterleaveFactor = 16;
830b57cec5SDimitry Andric 
840b57cec5SDimitry Andric namespace llvm {
850b57cec5SDimitry Andric 
860b57cec5SDimitry Andric bool LoopVectorizeHints::Hint::validate(unsigned Val) {
870b57cec5SDimitry Andric   switch (Kind) {
880b57cec5SDimitry Andric   case HK_WIDTH:
890b57cec5SDimitry Andric     return isPowerOf2_32(Val) && Val <= VectorizerParams::MaxVectorWidth;
90fe6060f1SDimitry Andric   case HK_INTERLEAVE:
910b57cec5SDimitry Andric     return isPowerOf2_32(Val) && Val <= MaxInterleaveFactor;
920b57cec5SDimitry Andric   case HK_FORCE:
930b57cec5SDimitry Andric     return (Val <= 1);
940b57cec5SDimitry Andric   case HK_ISVECTORIZED:
958bcb0991SDimitry Andric   case HK_PREDICATE:
96e8d8bef9SDimitry Andric   case HK_SCALABLE:
970b57cec5SDimitry Andric     return (Val == 0 || Val == 1);
980b57cec5SDimitry Andric   }
990b57cec5SDimitry Andric   return false;
1000b57cec5SDimitry Andric }
1010b57cec5SDimitry Andric 
1020b57cec5SDimitry Andric LoopVectorizeHints::LoopVectorizeHints(const Loop *L,
1030b57cec5SDimitry Andric                                        bool InterleaveOnlyWhenForced,
1040eae32dcSDimitry Andric                                        OptimizationRemarkEmitter &ORE,
1050eae32dcSDimitry Andric                                        const TargetTransformInfo *TTI)
1060b57cec5SDimitry Andric     : Width("vectorize.width", VectorizerParams::VectorizationFactor, HK_WIDTH),
107fe6060f1SDimitry Andric       Interleave("interleave.count", InterleaveOnlyWhenForced, HK_INTERLEAVE),
1080b57cec5SDimitry Andric       Force("vectorize.enable", FK_Undefined, HK_FORCE),
1098bcb0991SDimitry Andric       IsVectorized("isvectorized", 0, HK_ISVECTORIZED),
110e8d8bef9SDimitry Andric       Predicate("vectorize.predicate.enable", FK_Undefined, HK_PREDICATE),
111fe6060f1SDimitry Andric       Scalable("vectorize.scalable.enable", SK_Unspecified, HK_SCALABLE),
112fe6060f1SDimitry Andric       TheLoop(L), ORE(ORE) {
1130b57cec5SDimitry Andric   // Populate values with existing loop metadata.
1140b57cec5SDimitry Andric   getHintsFromMetadata();
1150b57cec5SDimitry Andric 
1160b57cec5SDimitry Andric   // force-vector-interleave overrides DisableInterleaving.
1170b57cec5SDimitry Andric   if (VectorizerParams::isInterleaveForced())
1180b57cec5SDimitry Andric     Interleave.Value = VectorizerParams::VectorizationInterleave;
1190b57cec5SDimitry Andric 
1200eae32dcSDimitry Andric   // If the metadata doesn't explicitly specify whether to enable scalable
1210eae32dcSDimitry Andric   // vectorization, then decide based on the following criteria (increasing
1220eae32dcSDimitry Andric   // level of priority):
1230eae32dcSDimitry Andric   //  - Target default
1240eae32dcSDimitry Andric   //  - Metadata width
1250eae32dcSDimitry Andric   //  - Force option (always overrides)
1260eae32dcSDimitry Andric   if ((LoopVectorizeHints::ScalableForceKind)Scalable.Value == SK_Unspecified) {
1270eae32dcSDimitry Andric     if (TTI)
1280eae32dcSDimitry Andric       Scalable.Value = TTI->enableScalableVectorization() ? SK_PreferScalable
1290eae32dcSDimitry Andric                                                           : SK_FixedWidthOnly;
1300eae32dcSDimitry Andric 
1310eae32dcSDimitry Andric     if (Width.Value)
132fe6060f1SDimitry Andric       // If the width is set, but the metadata says nothing about the scalable
133fe6060f1SDimitry Andric       // property, then assume it concerns only a fixed-width UserVF.
134fe6060f1SDimitry Andric       // If width is not set, the flag takes precedence.
1350eae32dcSDimitry Andric       Scalable.Value = SK_FixedWidthOnly;
1360eae32dcSDimitry Andric   }
1370eae32dcSDimitry Andric 
1380eae32dcSDimitry Andric   // If the flag is set to force any use of scalable vectors, override the loop
1390eae32dcSDimitry Andric   // hints.
1400eae32dcSDimitry Andric   if (ForceScalableVectorization.getValue() !=
1410eae32dcSDimitry Andric       LoopVectorizeHints::SK_Unspecified)
1420eae32dcSDimitry Andric     Scalable.Value = ForceScalableVectorization.getValue();
1430eae32dcSDimitry Andric 
1440eae32dcSDimitry Andric   // Scalable vectorization is disabled if no preference is specified.
1450eae32dcSDimitry Andric   if ((LoopVectorizeHints::ScalableForceKind)Scalable.Value == SK_Unspecified)
146fe6060f1SDimitry Andric     Scalable.Value = SK_FixedWidthOnly;
147fe6060f1SDimitry Andric 
1480b57cec5SDimitry Andric   if (IsVectorized.Value != 1)
1490b57cec5SDimitry Andric     // If the vectorization width and interleaving count are both 1 then
1500b57cec5SDimitry Andric     // consider the loop to have been already vectorized because there's
1510b57cec5SDimitry Andric     // nothing more that we can do.
152e8d8bef9SDimitry Andric     IsVectorized.Value =
153fe6060f1SDimitry Andric         getWidth() == ElementCount::getFixed(1) && getInterleave() == 1;
154fe6060f1SDimitry Andric   LLVM_DEBUG(if (InterleaveOnlyWhenForced && getInterleave() == 1) dbgs()
1550b57cec5SDimitry Andric              << "LV: Interleaving disabled by the pass manager\n");
1560b57cec5SDimitry Andric }
1570b57cec5SDimitry Andric 
1580b57cec5SDimitry Andric void LoopVectorizeHints::setAlreadyVectorized() {
1590b57cec5SDimitry Andric   LLVMContext &Context = TheLoop->getHeader()->getContext();
1600b57cec5SDimitry Andric 
1610b57cec5SDimitry Andric   MDNode *IsVectorizedMD = MDNode::get(
1620b57cec5SDimitry Andric       Context,
1630b57cec5SDimitry Andric       {MDString::get(Context, "llvm.loop.isvectorized"),
1640b57cec5SDimitry Andric        ConstantAsMetadata::get(ConstantInt::get(Context, APInt(32, 1)))});
1650b57cec5SDimitry Andric   MDNode *LoopID = TheLoop->getLoopID();
1660b57cec5SDimitry Andric   MDNode *NewLoopID =
1670b57cec5SDimitry Andric       makePostTransformationMetadata(Context, LoopID,
1680b57cec5SDimitry Andric                                      {Twine(Prefix(), "vectorize.").str(),
1690b57cec5SDimitry Andric                                       Twine(Prefix(), "interleave.").str()},
1700b57cec5SDimitry Andric                                      {IsVectorizedMD});
1710b57cec5SDimitry Andric   TheLoop->setLoopID(NewLoopID);
1720b57cec5SDimitry Andric 
1730b57cec5SDimitry Andric   // Update internal cache.
1740b57cec5SDimitry Andric   IsVectorized.Value = 1;
1750b57cec5SDimitry Andric }
1760b57cec5SDimitry Andric 
1770b57cec5SDimitry Andric bool LoopVectorizeHints::allowVectorization(
1780b57cec5SDimitry Andric     Function *F, Loop *L, bool VectorizeOnlyWhenForced) const {
1790b57cec5SDimitry Andric   if (getForce() == LoopVectorizeHints::FK_Disabled) {
1800b57cec5SDimitry Andric     LLVM_DEBUG(dbgs() << "LV: Not vectorizing: #pragma vectorize disable.\n");
1810b57cec5SDimitry Andric     emitRemarkWithHints();
1820b57cec5SDimitry Andric     return false;
1830b57cec5SDimitry Andric   }
1840b57cec5SDimitry Andric 
1850b57cec5SDimitry Andric   if (VectorizeOnlyWhenForced && getForce() != LoopVectorizeHints::FK_Enabled) {
1860b57cec5SDimitry Andric     LLVM_DEBUG(dbgs() << "LV: Not vectorizing: No #pragma vectorize enable.\n");
1870b57cec5SDimitry Andric     emitRemarkWithHints();
1880b57cec5SDimitry Andric     return false;
1890b57cec5SDimitry Andric   }
1900b57cec5SDimitry Andric 
1910b57cec5SDimitry Andric   if (getIsVectorized() == 1) {
1920b57cec5SDimitry Andric     LLVM_DEBUG(dbgs() << "LV: Not vectorizing: Disabled/already vectorized.\n");
1930b57cec5SDimitry Andric     // FIXME: Add interleave.disable metadata. This will allow
1940b57cec5SDimitry Andric     // vectorize.disable to be used without disabling the pass and errors
1950b57cec5SDimitry Andric     // to differentiate between disabled vectorization and a width of 1.
1960b57cec5SDimitry Andric     ORE.emit([&]() {
1970b57cec5SDimitry Andric       return OptimizationRemarkAnalysis(vectorizeAnalysisPassName(),
1980b57cec5SDimitry Andric                                         "AllDisabled", L->getStartLoc(),
1990b57cec5SDimitry Andric                                         L->getHeader())
2000b57cec5SDimitry Andric              << "loop not vectorized: vectorization and interleaving are "
2010b57cec5SDimitry Andric                 "explicitly disabled, or the loop has already been "
2020b57cec5SDimitry Andric                 "vectorized";
2030b57cec5SDimitry Andric     });
2040b57cec5SDimitry Andric     return false;
2050b57cec5SDimitry Andric   }
2060b57cec5SDimitry Andric 
2070b57cec5SDimitry Andric   return true;
2080b57cec5SDimitry Andric }
2090b57cec5SDimitry Andric 
2100b57cec5SDimitry Andric void LoopVectorizeHints::emitRemarkWithHints() const {
2110b57cec5SDimitry Andric   using namespace ore;
2120b57cec5SDimitry Andric 
2130b57cec5SDimitry Andric   ORE.emit([&]() {
2140b57cec5SDimitry Andric     if (Force.Value == LoopVectorizeHints::FK_Disabled)
2150b57cec5SDimitry Andric       return OptimizationRemarkMissed(LV_NAME, "MissedExplicitlyDisabled",
2160b57cec5SDimitry Andric                                       TheLoop->getStartLoc(),
2170b57cec5SDimitry Andric                                       TheLoop->getHeader())
2180b57cec5SDimitry Andric              << "loop not vectorized: vectorization is explicitly disabled";
2190b57cec5SDimitry Andric     else {
2200b57cec5SDimitry Andric       OptimizationRemarkMissed R(LV_NAME, "MissedDetails",
2210b57cec5SDimitry Andric                                  TheLoop->getStartLoc(), TheLoop->getHeader());
2220b57cec5SDimitry Andric       R << "loop not vectorized";
2230b57cec5SDimitry Andric       if (Force.Value == LoopVectorizeHints::FK_Enabled) {
2240b57cec5SDimitry Andric         R << " (Force=" << NV("Force", true);
2250b57cec5SDimitry Andric         if (Width.Value != 0)
226e8d8bef9SDimitry Andric           R << ", Vector Width=" << NV("VectorWidth", getWidth());
227fe6060f1SDimitry Andric         if (getInterleave() != 0)
228fe6060f1SDimitry Andric           R << ", Interleave Count=" << NV("InterleaveCount", getInterleave());
2290b57cec5SDimitry Andric         R << ")";
2300b57cec5SDimitry Andric       }
2310b57cec5SDimitry Andric       return R;
2320b57cec5SDimitry Andric     }
2330b57cec5SDimitry Andric   });
2340b57cec5SDimitry Andric }
2350b57cec5SDimitry Andric 
2360b57cec5SDimitry Andric const char *LoopVectorizeHints::vectorizeAnalysisPassName() const {
237e8d8bef9SDimitry Andric   if (getWidth() == ElementCount::getFixed(1))
2380b57cec5SDimitry Andric     return LV_NAME;
2390b57cec5SDimitry Andric   if (getForce() == LoopVectorizeHints::FK_Disabled)
2400b57cec5SDimitry Andric     return LV_NAME;
241e8d8bef9SDimitry Andric   if (getForce() == LoopVectorizeHints::FK_Undefined && getWidth().isZero())
2420b57cec5SDimitry Andric     return LV_NAME;
2430b57cec5SDimitry Andric   return OptimizationRemarkAnalysis::AlwaysPrint;
2440b57cec5SDimitry Andric }
2450b57cec5SDimitry Andric 
246fe6060f1SDimitry Andric bool LoopVectorizeHints::allowReordering() const {
247fe6060f1SDimitry Andric   // Allow the vectorizer to change the order of operations if enabling
248fe6060f1SDimitry Andric   // loop hints are provided
249fe6060f1SDimitry Andric   ElementCount EC = getWidth();
250fe6060f1SDimitry Andric   return HintsAllowReordering &&
251fe6060f1SDimitry Andric          (getForce() == LoopVectorizeHints::FK_Enabled ||
252fe6060f1SDimitry Andric           EC.getKnownMinValue() > 1);
253fe6060f1SDimitry Andric }
254fe6060f1SDimitry Andric 
2550b57cec5SDimitry Andric void LoopVectorizeHints::getHintsFromMetadata() {
2560b57cec5SDimitry Andric   MDNode *LoopID = TheLoop->getLoopID();
2570b57cec5SDimitry Andric   if (!LoopID)
2580b57cec5SDimitry Andric     return;
2590b57cec5SDimitry Andric 
2600b57cec5SDimitry Andric   // First operand should refer to the loop id itself.
2610b57cec5SDimitry Andric   assert(LoopID->getNumOperands() > 0 && "requires at least one operand");
2620b57cec5SDimitry Andric   assert(LoopID->getOperand(0) == LoopID && "invalid loop id");
2630b57cec5SDimitry Andric 
264*0fca6ea1SDimitry Andric   for (const MDOperand &MDO : llvm::drop_begin(LoopID->operands())) {
2650b57cec5SDimitry Andric     const MDString *S = nullptr;
2660b57cec5SDimitry Andric     SmallVector<Metadata *, 4> Args;
2670b57cec5SDimitry Andric 
2680b57cec5SDimitry Andric     // The expected hint is either a MDString or a MDNode with the first
2690b57cec5SDimitry Andric     // operand a MDString.
270*0fca6ea1SDimitry Andric     if (const MDNode *MD = dyn_cast<MDNode>(MDO)) {
2710b57cec5SDimitry Andric       if (!MD || MD->getNumOperands() == 0)
2720b57cec5SDimitry Andric         continue;
2730b57cec5SDimitry Andric       S = dyn_cast<MDString>(MD->getOperand(0));
2740b57cec5SDimitry Andric       for (unsigned i = 1, ie = MD->getNumOperands(); i < ie; ++i)
2750b57cec5SDimitry Andric         Args.push_back(MD->getOperand(i));
2760b57cec5SDimitry Andric     } else {
277*0fca6ea1SDimitry Andric       S = dyn_cast<MDString>(MDO);
2780b57cec5SDimitry Andric       assert(Args.size() == 0 && "too many arguments for MDString");
2790b57cec5SDimitry Andric     }
2800b57cec5SDimitry Andric 
2810b57cec5SDimitry Andric     if (!S)
2820b57cec5SDimitry Andric       continue;
2830b57cec5SDimitry Andric 
2840b57cec5SDimitry Andric     // Check if the hint starts with the loop metadata prefix.
2850b57cec5SDimitry Andric     StringRef Name = S->getString();
2860b57cec5SDimitry Andric     if (Args.size() == 1)
2870b57cec5SDimitry Andric       setHint(Name, Args[0]);
2880b57cec5SDimitry Andric   }
2890b57cec5SDimitry Andric }
2900b57cec5SDimitry Andric 
2910b57cec5SDimitry Andric void LoopVectorizeHints::setHint(StringRef Name, Metadata *Arg) {
2925f757f3fSDimitry Andric   if (!Name.starts_with(Prefix()))
2930b57cec5SDimitry Andric     return;
2940b57cec5SDimitry Andric   Name = Name.substr(Prefix().size(), StringRef::npos);
2950b57cec5SDimitry Andric 
2960b57cec5SDimitry Andric   const ConstantInt *C = mdconst::dyn_extract<ConstantInt>(Arg);
2970b57cec5SDimitry Andric   if (!C)
2980b57cec5SDimitry Andric     return;
2990b57cec5SDimitry Andric   unsigned Val = C->getZExtValue();
3000b57cec5SDimitry Andric 
301e8d8bef9SDimitry Andric   Hint *Hints[] = {&Width,        &Interleave, &Force,
302e8d8bef9SDimitry Andric                    &IsVectorized, &Predicate,  &Scalable};
303bdd1243dSDimitry Andric   for (auto *H : Hints) {
3040b57cec5SDimitry Andric     if (Name == H->Name) {
3050b57cec5SDimitry Andric       if (H->validate(Val))
3060b57cec5SDimitry Andric         H->Value = Val;
3070b57cec5SDimitry Andric       else
3080b57cec5SDimitry Andric         LLVM_DEBUG(dbgs() << "LV: ignoring invalid hint '" << Name << "'\n");
3090b57cec5SDimitry Andric       break;
3100b57cec5SDimitry Andric     }
3110b57cec5SDimitry Andric   }
3120b57cec5SDimitry Andric }
3130b57cec5SDimitry Andric 
3140b57cec5SDimitry Andric // Return true if the inner loop \p Lp is uniform with regard to the outer loop
3150b57cec5SDimitry Andric // \p OuterLp (i.e., if the outer loop is vectorized, all the vector lanes
3160b57cec5SDimitry Andric // executing the inner loop will execute the same iterations). This check is
3170b57cec5SDimitry Andric // very constrained for now but it will be relaxed in the future. \p Lp is
3180b57cec5SDimitry Andric // considered uniform if it meets all the following conditions:
3190b57cec5SDimitry Andric //   1) it has a canonical IV (starting from 0 and with stride 1),
3200b57cec5SDimitry Andric //   2) its latch terminator is a conditional branch and,
3210b57cec5SDimitry Andric //   3) its latch condition is a compare instruction whose operands are the
3220b57cec5SDimitry Andric //      canonical IV and an OuterLp invariant.
3230b57cec5SDimitry Andric // This check doesn't take into account the uniformity of other conditions not
3240b57cec5SDimitry Andric // related to the loop latch because they don't affect the loop uniformity.
3250b57cec5SDimitry Andric //
3260b57cec5SDimitry Andric // NOTE: We decided to keep all these checks and its associated documentation
3270b57cec5SDimitry Andric // together so that we can easily have a picture of the current supported loop
3280b57cec5SDimitry Andric // nests. However, some of the current checks don't depend on \p OuterLp and
3290b57cec5SDimitry Andric // would be redundantly executed for each \p Lp if we invoked this function for
3300b57cec5SDimitry Andric // different candidate outer loops. This is not the case for now because we
3310b57cec5SDimitry Andric // don't currently have the infrastructure to evaluate multiple candidate outer
3320b57cec5SDimitry Andric // loops and \p OuterLp will be a fixed parameter while we only support explicit
3330b57cec5SDimitry Andric // outer loop vectorization. It's also very likely that these checks go away
3340b57cec5SDimitry Andric // before introducing the aforementioned infrastructure. However, if this is not
3350b57cec5SDimitry Andric // the case, we should move the \p OuterLp independent checks to a separate
3360b57cec5SDimitry Andric // function that is only executed once for each \p Lp.
3370b57cec5SDimitry Andric static bool isUniformLoop(Loop *Lp, Loop *OuterLp) {
3380b57cec5SDimitry Andric   assert(Lp->getLoopLatch() && "Expected loop with a single latch.");
3390b57cec5SDimitry Andric 
3400b57cec5SDimitry Andric   // If Lp is the outer loop, it's uniform by definition.
3410b57cec5SDimitry Andric   if (Lp == OuterLp)
3420b57cec5SDimitry Andric     return true;
3430b57cec5SDimitry Andric   assert(OuterLp->contains(Lp) && "OuterLp must contain Lp.");
3440b57cec5SDimitry Andric 
3450b57cec5SDimitry Andric   // 1.
3460b57cec5SDimitry Andric   PHINode *IV = Lp->getCanonicalInductionVariable();
3470b57cec5SDimitry Andric   if (!IV) {
3480b57cec5SDimitry Andric     LLVM_DEBUG(dbgs() << "LV: Canonical IV not found.\n");
3490b57cec5SDimitry Andric     return false;
3500b57cec5SDimitry Andric   }
3510b57cec5SDimitry Andric 
3520b57cec5SDimitry Andric   // 2.
3530b57cec5SDimitry Andric   BasicBlock *Latch = Lp->getLoopLatch();
3540b57cec5SDimitry Andric   auto *LatchBr = dyn_cast<BranchInst>(Latch->getTerminator());
3550b57cec5SDimitry Andric   if (!LatchBr || LatchBr->isUnconditional()) {
3560b57cec5SDimitry Andric     LLVM_DEBUG(dbgs() << "LV: Unsupported loop latch branch.\n");
3570b57cec5SDimitry Andric     return false;
3580b57cec5SDimitry Andric   }
3590b57cec5SDimitry Andric 
3600b57cec5SDimitry Andric   // 3.
3610b57cec5SDimitry Andric   auto *LatchCmp = dyn_cast<CmpInst>(LatchBr->getCondition());
3620b57cec5SDimitry Andric   if (!LatchCmp) {
3630b57cec5SDimitry Andric     LLVM_DEBUG(
3640b57cec5SDimitry Andric         dbgs() << "LV: Loop latch condition is not a compare instruction.\n");
3650b57cec5SDimitry Andric     return false;
3660b57cec5SDimitry Andric   }
3670b57cec5SDimitry Andric 
3680b57cec5SDimitry Andric   Value *CondOp0 = LatchCmp->getOperand(0);
3690b57cec5SDimitry Andric   Value *CondOp1 = LatchCmp->getOperand(1);
3700b57cec5SDimitry Andric   Value *IVUpdate = IV->getIncomingValueForBlock(Latch);
3710b57cec5SDimitry Andric   if (!(CondOp0 == IVUpdate && OuterLp->isLoopInvariant(CondOp1)) &&
3720b57cec5SDimitry Andric       !(CondOp1 == IVUpdate && OuterLp->isLoopInvariant(CondOp0))) {
3730b57cec5SDimitry Andric     LLVM_DEBUG(dbgs() << "LV: Loop latch condition is not uniform.\n");
3740b57cec5SDimitry Andric     return false;
3750b57cec5SDimitry Andric   }
3760b57cec5SDimitry Andric 
3770b57cec5SDimitry Andric   return true;
3780b57cec5SDimitry Andric }
3790b57cec5SDimitry Andric 
3800b57cec5SDimitry Andric // Return true if \p Lp and all its nested loops are uniform with regard to \p
3810b57cec5SDimitry Andric // OuterLp.
3820b57cec5SDimitry Andric static bool isUniformLoopNest(Loop *Lp, Loop *OuterLp) {
3830b57cec5SDimitry Andric   if (!isUniformLoop(Lp, OuterLp))
3840b57cec5SDimitry Andric     return false;
3850b57cec5SDimitry Andric 
3860b57cec5SDimitry Andric   // Check if nested loops are uniform.
3870b57cec5SDimitry Andric   for (Loop *SubLp : *Lp)
3880b57cec5SDimitry Andric     if (!isUniformLoopNest(SubLp, OuterLp))
3890b57cec5SDimitry Andric       return false;
3900b57cec5SDimitry Andric 
3910b57cec5SDimitry Andric   return true;
3920b57cec5SDimitry Andric }
3930b57cec5SDimitry Andric 
3940b57cec5SDimitry Andric static Type *convertPointerToIntegerType(const DataLayout &DL, Type *Ty) {
3950b57cec5SDimitry Andric   if (Ty->isPointerTy())
3960b57cec5SDimitry Andric     return DL.getIntPtrType(Ty);
3970b57cec5SDimitry Andric 
3980b57cec5SDimitry Andric   // It is possible that char's or short's overflow when we ask for the loop's
3990b57cec5SDimitry Andric   // trip count, work around this by changing the type size.
4000b57cec5SDimitry Andric   if (Ty->getScalarSizeInBits() < 32)
4010b57cec5SDimitry Andric     return Type::getInt32Ty(Ty->getContext());
4020b57cec5SDimitry Andric 
4030b57cec5SDimitry Andric   return Ty;
4040b57cec5SDimitry Andric }
4050b57cec5SDimitry Andric 
4060b57cec5SDimitry Andric static Type *getWiderType(const DataLayout &DL, Type *Ty0, Type *Ty1) {
4070b57cec5SDimitry Andric   Ty0 = convertPointerToIntegerType(DL, Ty0);
4080b57cec5SDimitry Andric   Ty1 = convertPointerToIntegerType(DL, Ty1);
4090b57cec5SDimitry Andric   if (Ty0->getScalarSizeInBits() > Ty1->getScalarSizeInBits())
4100b57cec5SDimitry Andric     return Ty0;
4110b57cec5SDimitry Andric   return Ty1;
4120b57cec5SDimitry Andric }
4130b57cec5SDimitry Andric 
4140b57cec5SDimitry Andric /// Check that the instruction has outside loop users and is not an
4150b57cec5SDimitry Andric /// identified reduction variable.
4160b57cec5SDimitry Andric static bool hasOutsideLoopUser(const Loop *TheLoop, Instruction *Inst,
4170b57cec5SDimitry Andric                                SmallPtrSetImpl<Value *> &AllowedExit) {
4180b57cec5SDimitry Andric   // Reductions, Inductions and non-header phis are allowed to have exit users. All
4190b57cec5SDimitry Andric   // other instructions must not have external users.
4200b57cec5SDimitry Andric   if (!AllowedExit.count(Inst))
4210b57cec5SDimitry Andric     // Check that all of the users of the loop are inside the BB.
4220b57cec5SDimitry Andric     for (User *U : Inst->users()) {
4230b57cec5SDimitry Andric       Instruction *UI = cast<Instruction>(U);
4240b57cec5SDimitry Andric       // This user may be a reduction exit value.
4250b57cec5SDimitry Andric       if (!TheLoop->contains(UI)) {
4260b57cec5SDimitry Andric         LLVM_DEBUG(dbgs() << "LV: Found an outside user for : " << *UI << '\n');
4270b57cec5SDimitry Andric         return true;
4280b57cec5SDimitry Andric       }
4290b57cec5SDimitry Andric     }
4300b57cec5SDimitry Andric   return false;
4310b57cec5SDimitry Andric }
4320b57cec5SDimitry Andric 
43381ad6265SDimitry Andric /// Returns true if A and B have same pointer operands or same SCEVs addresses
43481ad6265SDimitry Andric static bool storeToSameAddress(ScalarEvolution *SE, StoreInst *A,
43581ad6265SDimitry Andric                                StoreInst *B) {
43681ad6265SDimitry Andric   // Compare store
43781ad6265SDimitry Andric   if (A == B)
43881ad6265SDimitry Andric     return true;
43981ad6265SDimitry Andric 
44081ad6265SDimitry Andric   // Otherwise Compare pointers
44181ad6265SDimitry Andric   Value *APtr = A->getPointerOperand();
44281ad6265SDimitry Andric   Value *BPtr = B->getPointerOperand();
44381ad6265SDimitry Andric   if (APtr == BPtr)
44481ad6265SDimitry Andric     return true;
44581ad6265SDimitry Andric 
44681ad6265SDimitry Andric   // Otherwise compare address SCEVs
44781ad6265SDimitry Andric   if (SE->getSCEV(APtr) == SE->getSCEV(BPtr))
44881ad6265SDimitry Andric     return true;
44981ad6265SDimitry Andric 
45081ad6265SDimitry Andric   return false;
45181ad6265SDimitry Andric }
45281ad6265SDimitry Andric 
453349cc55cSDimitry Andric int LoopVectorizationLegality::isConsecutivePtr(Type *AccessTy,
454349cc55cSDimitry Andric                                                 Value *Ptr) const {
45506c3fb27SDimitry Andric   // FIXME: Currently, the set of symbolic strides is sometimes queried before
45606c3fb27SDimitry Andric   // it's collected.  This happens from canVectorizeWithIfConvert, when the
45706c3fb27SDimitry Andric   // pointer is checked to reference consecutive elements suitable for a
45806c3fb27SDimitry Andric   // masked access.
45906c3fb27SDimitry Andric   const auto &Strides =
46006c3fb27SDimitry Andric     LAI ? LAI->getSymbolicStrides() : DenseMap<Value *, const SCEV *>();
4610b57cec5SDimitry Andric 
462e8d8bef9SDimitry Andric   Function *F = TheLoop->getHeader()->getParent();
463e8d8bef9SDimitry Andric   bool OptForSize = F->hasOptSize() ||
464e8d8bef9SDimitry Andric                     llvm::shouldOptimizeForSize(TheLoop->getHeader(), PSI, BFI,
465e8d8bef9SDimitry Andric                                                 PGSOQueryType::IRPass);
466e8d8bef9SDimitry Andric   bool CanAddPredicate = !OptForSize;
467349cc55cSDimitry Andric   int Stride = getPtrStride(PSE, AccessTy, Ptr, TheLoop, Strides,
468bdd1243dSDimitry Andric                             CanAddPredicate, false).value_or(0);
4690b57cec5SDimitry Andric   if (Stride == 1 || Stride == -1)
4700b57cec5SDimitry Andric     return Stride;
4710b57cec5SDimitry Andric   return 0;
4720b57cec5SDimitry Andric }
4730b57cec5SDimitry Andric 
47406c3fb27SDimitry Andric bool LoopVectorizationLegality::isInvariant(Value *V) const {
47506c3fb27SDimitry Andric   return LAI->isInvariant(V);
4760b57cec5SDimitry Andric }
4770b57cec5SDimitry Andric 
47806c3fb27SDimitry Andric namespace {
47906c3fb27SDimitry Andric /// A rewriter to build the SCEVs for each of the VF lanes in the expected
48006c3fb27SDimitry Andric /// vectorized loop, which can then be compared to detect their uniformity. This
48106c3fb27SDimitry Andric /// is done by replacing the AddRec SCEVs of the original scalar loop (TheLoop)
48206c3fb27SDimitry Andric /// with new AddRecs where the step is multiplied by StepMultiplier and Offset *
48306c3fb27SDimitry Andric /// Step is added. Also checks if all sub-expressions are analyzable w.r.t.
48406c3fb27SDimitry Andric /// uniformity.
48506c3fb27SDimitry Andric class SCEVAddRecForUniformityRewriter
48606c3fb27SDimitry Andric     : public SCEVRewriteVisitor<SCEVAddRecForUniformityRewriter> {
48706c3fb27SDimitry Andric   /// Multiplier to be applied to the step of AddRecs in TheLoop.
48806c3fb27SDimitry Andric   unsigned StepMultiplier;
48906c3fb27SDimitry Andric 
49006c3fb27SDimitry Andric   /// Offset to be added to the AddRecs in TheLoop.
49106c3fb27SDimitry Andric   unsigned Offset;
49206c3fb27SDimitry Andric 
49306c3fb27SDimitry Andric   /// Loop for which to rewrite AddRecsFor.
49406c3fb27SDimitry Andric   Loop *TheLoop;
49506c3fb27SDimitry Andric 
49606c3fb27SDimitry Andric   /// Is any sub-expressions not analyzable w.r.t. uniformity?
49706c3fb27SDimitry Andric   bool CannotAnalyze = false;
49806c3fb27SDimitry Andric 
49906c3fb27SDimitry Andric   bool canAnalyze() const { return !CannotAnalyze; }
50006c3fb27SDimitry Andric 
50106c3fb27SDimitry Andric public:
50206c3fb27SDimitry Andric   SCEVAddRecForUniformityRewriter(ScalarEvolution &SE, unsigned StepMultiplier,
50306c3fb27SDimitry Andric                                   unsigned Offset, Loop *TheLoop)
50406c3fb27SDimitry Andric       : SCEVRewriteVisitor(SE), StepMultiplier(StepMultiplier), Offset(Offset),
50506c3fb27SDimitry Andric         TheLoop(TheLoop) {}
50606c3fb27SDimitry Andric 
50706c3fb27SDimitry Andric   const SCEV *visitAddRecExpr(const SCEVAddRecExpr *Expr) {
50806c3fb27SDimitry Andric     assert(Expr->getLoop() == TheLoop &&
50906c3fb27SDimitry Andric            "addrec outside of TheLoop must be invariant and should have been "
51006c3fb27SDimitry Andric            "handled earlier");
51106c3fb27SDimitry Andric     // Build a new AddRec by multiplying the step by StepMultiplier and
51206c3fb27SDimitry Andric     // incrementing the start by Offset * step.
51306c3fb27SDimitry Andric     Type *Ty = Expr->getType();
51406c3fb27SDimitry Andric     auto *Step = Expr->getStepRecurrence(SE);
51506c3fb27SDimitry Andric     if (!SE.isLoopInvariant(Step, TheLoop)) {
51606c3fb27SDimitry Andric       CannotAnalyze = true;
51706c3fb27SDimitry Andric       return Expr;
51806c3fb27SDimitry Andric     }
51906c3fb27SDimitry Andric     auto *NewStep = SE.getMulExpr(Step, SE.getConstant(Ty, StepMultiplier));
52006c3fb27SDimitry Andric     auto *ScaledOffset = SE.getMulExpr(Step, SE.getConstant(Ty, Offset));
52106c3fb27SDimitry Andric     auto *NewStart = SE.getAddExpr(Expr->getStart(), ScaledOffset);
52206c3fb27SDimitry Andric     return SE.getAddRecExpr(NewStart, NewStep, TheLoop, SCEV::FlagAnyWrap);
52306c3fb27SDimitry Andric   }
52406c3fb27SDimitry Andric 
52506c3fb27SDimitry Andric   const SCEV *visit(const SCEV *S) {
52606c3fb27SDimitry Andric     if (CannotAnalyze || SE.isLoopInvariant(S, TheLoop))
52706c3fb27SDimitry Andric       return S;
52806c3fb27SDimitry Andric     return SCEVRewriteVisitor<SCEVAddRecForUniformityRewriter>::visit(S);
52906c3fb27SDimitry Andric   }
53006c3fb27SDimitry Andric 
53106c3fb27SDimitry Andric   const SCEV *visitUnknown(const SCEVUnknown *S) {
53206c3fb27SDimitry Andric     if (SE.isLoopInvariant(S, TheLoop))
53306c3fb27SDimitry Andric       return S;
53406c3fb27SDimitry Andric     // The value could vary across iterations.
53506c3fb27SDimitry Andric     CannotAnalyze = true;
53606c3fb27SDimitry Andric     return S;
53706c3fb27SDimitry Andric   }
53806c3fb27SDimitry Andric 
53906c3fb27SDimitry Andric   const SCEV *visitCouldNotCompute(const SCEVCouldNotCompute *S) {
54006c3fb27SDimitry Andric     // Could not analyze the expression.
54106c3fb27SDimitry Andric     CannotAnalyze = true;
54206c3fb27SDimitry Andric     return S;
54306c3fb27SDimitry Andric   }
54406c3fb27SDimitry Andric 
54506c3fb27SDimitry Andric   static const SCEV *rewrite(const SCEV *S, ScalarEvolution &SE,
54606c3fb27SDimitry Andric                              unsigned StepMultiplier, unsigned Offset,
54706c3fb27SDimitry Andric                              Loop *TheLoop) {
54806c3fb27SDimitry Andric     /// Bail out if the expression does not contain an UDiv expression.
54906c3fb27SDimitry Andric     /// Uniform values which are not loop invariant require operations to strip
55006c3fb27SDimitry Andric     /// out the lowest bits. For now just look for UDivs and use it to avoid
55106c3fb27SDimitry Andric     /// re-writing UDIV-free expressions for other lanes to limit compile time.
55206c3fb27SDimitry Andric     if (!SCEVExprContains(S,
55306c3fb27SDimitry Andric                           [](const SCEV *S) { return isa<SCEVUDivExpr>(S); }))
55406c3fb27SDimitry Andric       return SE.getCouldNotCompute();
55506c3fb27SDimitry Andric 
55606c3fb27SDimitry Andric     SCEVAddRecForUniformityRewriter Rewriter(SE, StepMultiplier, Offset,
55706c3fb27SDimitry Andric                                              TheLoop);
55806c3fb27SDimitry Andric     const SCEV *Result = Rewriter.visit(S);
55906c3fb27SDimitry Andric 
56006c3fb27SDimitry Andric     if (Rewriter.canAnalyze())
56106c3fb27SDimitry Andric       return Result;
56206c3fb27SDimitry Andric     return SE.getCouldNotCompute();
56306c3fb27SDimitry Andric   }
56406c3fb27SDimitry Andric };
56506c3fb27SDimitry Andric 
56606c3fb27SDimitry Andric } // namespace
56706c3fb27SDimitry Andric 
56806c3fb27SDimitry Andric bool LoopVectorizationLegality::isUniform(Value *V, ElementCount VF) const {
56906c3fb27SDimitry Andric   if (isInvariant(V))
57006c3fb27SDimitry Andric     return true;
57106c3fb27SDimitry Andric   if (VF.isScalable())
57206c3fb27SDimitry Andric     return false;
57306c3fb27SDimitry Andric   if (VF.isScalar())
57406c3fb27SDimitry Andric     return true;
57506c3fb27SDimitry Andric 
57606c3fb27SDimitry Andric   // Since we rely on SCEV for uniformity, if the type is not SCEVable, it is
57706c3fb27SDimitry Andric   // never considered uniform.
57806c3fb27SDimitry Andric   auto *SE = PSE.getSE();
57906c3fb27SDimitry Andric   if (!SE->isSCEVable(V->getType()))
58006c3fb27SDimitry Andric     return false;
58106c3fb27SDimitry Andric   const SCEV *S = SE->getSCEV(V);
58206c3fb27SDimitry Andric 
58306c3fb27SDimitry Andric   // Rewrite AddRecs in TheLoop to step by VF and check if the expression for
58406c3fb27SDimitry Andric   // lane 0 matches the expressions for all other lanes.
58506c3fb27SDimitry Andric   unsigned FixedVF = VF.getKnownMinValue();
58606c3fb27SDimitry Andric   const SCEV *FirstLaneExpr =
58706c3fb27SDimitry Andric       SCEVAddRecForUniformityRewriter::rewrite(S, *SE, FixedVF, 0, TheLoop);
58806c3fb27SDimitry Andric   if (isa<SCEVCouldNotCompute>(FirstLaneExpr))
58906c3fb27SDimitry Andric     return false;
59006c3fb27SDimitry Andric 
59106c3fb27SDimitry Andric   // Make sure the expressions for lanes FixedVF-1..1 match the expression for
59206c3fb27SDimitry Andric   // lane 0. We check lanes in reverse order for compile-time, as frequently
59306c3fb27SDimitry Andric   // checking the last lane is sufficient to rule out uniformity.
59406c3fb27SDimitry Andric   return all_of(reverse(seq<unsigned>(1, FixedVF)), [&](unsigned I) {
59506c3fb27SDimitry Andric     const SCEV *IthLaneExpr =
59606c3fb27SDimitry Andric         SCEVAddRecForUniformityRewriter::rewrite(S, *SE, FixedVF, I, TheLoop);
59706c3fb27SDimitry Andric     return FirstLaneExpr == IthLaneExpr;
59806c3fb27SDimitry Andric   });
59906c3fb27SDimitry Andric }
60006c3fb27SDimitry Andric 
60106c3fb27SDimitry Andric bool LoopVectorizationLegality::isUniformMemOp(Instruction &I,
60206c3fb27SDimitry Andric                                                ElementCount VF) const {
603bdd1243dSDimitry Andric   Value *Ptr = getLoadStorePointerOperand(&I);
604bdd1243dSDimitry Andric   if (!Ptr)
605bdd1243dSDimitry Andric     return false;
606bdd1243dSDimitry Andric   // Note: There's nothing inherent which prevents predicated loads and
607bdd1243dSDimitry Andric   // stores from being uniform.  The current lowering simply doesn't handle
608bdd1243dSDimitry Andric   // it; in particular, the cost model distinguishes scatter/gather from
609bdd1243dSDimitry Andric   // scalar w/predication, and we currently rely on the scalar path.
61006c3fb27SDimitry Andric   return isUniform(Ptr, VF) && !blockNeedsPredication(I.getParent());
611bdd1243dSDimitry Andric }
612bdd1243dSDimitry Andric 
6130b57cec5SDimitry Andric bool LoopVectorizationLegality::canVectorizeOuterLoop() {
614e8d8bef9SDimitry Andric   assert(!TheLoop->isInnermost() && "We are not vectorizing an outer loop.");
6150b57cec5SDimitry Andric   // Store the result and return it at the end instead of exiting early, in case
6160b57cec5SDimitry Andric   // allowExtraAnalysis is used to report multiple reasons for not vectorizing.
6170b57cec5SDimitry Andric   bool Result = true;
6180b57cec5SDimitry Andric   bool DoExtraAnalysis = ORE->allowExtraAnalysis(DEBUG_TYPE);
6190b57cec5SDimitry Andric 
6200b57cec5SDimitry Andric   for (BasicBlock *BB : TheLoop->blocks()) {
6210b57cec5SDimitry Andric     // Check whether the BB terminator is a BranchInst. Any other terminator is
6220b57cec5SDimitry Andric     // not supported yet.
6230b57cec5SDimitry Andric     auto *Br = dyn_cast<BranchInst>(BB->getTerminator());
6240b57cec5SDimitry Andric     if (!Br) {
6250b57cec5SDimitry Andric       reportVectorizationFailure("Unsupported basic block terminator",
6260b57cec5SDimitry Andric           "loop control flow is not understood by vectorizer",
6278bcb0991SDimitry Andric           "CFGNotUnderstood", ORE, TheLoop);
6280b57cec5SDimitry Andric       if (DoExtraAnalysis)
6290b57cec5SDimitry Andric         Result = false;
6300b57cec5SDimitry Andric       else
6310b57cec5SDimitry Andric         return false;
6320b57cec5SDimitry Andric     }
6330b57cec5SDimitry Andric 
6340b57cec5SDimitry Andric     // Check whether the BranchInst is a supported one. Only unconditional
6350b57cec5SDimitry Andric     // branches, conditional branches with an outer loop invariant condition or
6360b57cec5SDimitry Andric     // backedges are supported.
6370b57cec5SDimitry Andric     // FIXME: We skip these checks when VPlan predication is enabled as we
6380b57cec5SDimitry Andric     // want to allow divergent branches. This whole check will be removed
6390b57cec5SDimitry Andric     // once VPlan predication is on by default.
64081ad6265SDimitry Andric     if (Br && Br->isConditional() &&
6410b57cec5SDimitry Andric         !TheLoop->isLoopInvariant(Br->getCondition()) &&
6420b57cec5SDimitry Andric         !LI->isLoopHeader(Br->getSuccessor(0)) &&
6430b57cec5SDimitry Andric         !LI->isLoopHeader(Br->getSuccessor(1))) {
6440b57cec5SDimitry Andric       reportVectorizationFailure("Unsupported conditional branch",
6450b57cec5SDimitry Andric           "loop control flow is not understood by vectorizer",
6468bcb0991SDimitry Andric           "CFGNotUnderstood", ORE, TheLoop);
6470b57cec5SDimitry Andric       if (DoExtraAnalysis)
6480b57cec5SDimitry Andric         Result = false;
6490b57cec5SDimitry Andric       else
6500b57cec5SDimitry Andric         return false;
6510b57cec5SDimitry Andric     }
6520b57cec5SDimitry Andric   }
6530b57cec5SDimitry Andric 
6540b57cec5SDimitry Andric   // Check whether inner loops are uniform. At this point, we only support
6550b57cec5SDimitry Andric   // simple outer loops scenarios with uniform nested loops.
6560b57cec5SDimitry Andric   if (!isUniformLoopNest(TheLoop /*loop nest*/,
6570b57cec5SDimitry Andric                          TheLoop /*context outer loop*/)) {
6580b57cec5SDimitry Andric     reportVectorizationFailure("Outer loop contains divergent loops",
6590b57cec5SDimitry Andric         "loop control flow is not understood by vectorizer",
6608bcb0991SDimitry Andric         "CFGNotUnderstood", ORE, TheLoop);
6610b57cec5SDimitry Andric     if (DoExtraAnalysis)
6620b57cec5SDimitry Andric       Result = false;
6630b57cec5SDimitry Andric     else
6640b57cec5SDimitry Andric       return false;
6650b57cec5SDimitry Andric   }
6660b57cec5SDimitry Andric 
6670b57cec5SDimitry Andric   // Check whether we are able to set up outer loop induction.
6680b57cec5SDimitry Andric   if (!setupOuterLoopInductions()) {
6690b57cec5SDimitry Andric     reportVectorizationFailure("Unsupported outer loop Phi(s)",
6700b57cec5SDimitry Andric                                "Unsupported outer loop Phi(s)",
6718bcb0991SDimitry Andric                                "UnsupportedPhi", ORE, TheLoop);
6720b57cec5SDimitry Andric     if (DoExtraAnalysis)
6730b57cec5SDimitry Andric       Result = false;
6740b57cec5SDimitry Andric     else
6750b57cec5SDimitry Andric       return false;
6760b57cec5SDimitry Andric   }
6770b57cec5SDimitry Andric 
6780b57cec5SDimitry Andric   return Result;
6790b57cec5SDimitry Andric }
6800b57cec5SDimitry Andric 
6810b57cec5SDimitry Andric void LoopVectorizationLegality::addInductionPhi(
6820b57cec5SDimitry Andric     PHINode *Phi, const InductionDescriptor &ID,
6830b57cec5SDimitry Andric     SmallPtrSetImpl<Value *> &AllowedExit) {
6840b57cec5SDimitry Andric   Inductions[Phi] = ID;
6850b57cec5SDimitry Andric 
6860b57cec5SDimitry Andric   // In case this induction also comes with casts that we know we can ignore
6870b57cec5SDimitry Andric   // in the vectorized loop body, record them here. All casts could be recorded
6880b57cec5SDimitry Andric   // here for ignoring, but suffices to record only the first (as it is the
6890b57cec5SDimitry Andric   // only one that may bw used outside the cast sequence).
6900b57cec5SDimitry Andric   const SmallVectorImpl<Instruction *> &Casts = ID.getCastInsts();
6910b57cec5SDimitry Andric   if (!Casts.empty())
6920b57cec5SDimitry Andric     InductionCastsToIgnore.insert(*Casts.begin());
6930b57cec5SDimitry Andric 
6940b57cec5SDimitry Andric   Type *PhiTy = Phi->getType();
695*0fca6ea1SDimitry Andric   const DataLayout &DL = Phi->getDataLayout();
6960b57cec5SDimitry Andric 
6970b57cec5SDimitry Andric   // Get the widest type.
6980b57cec5SDimitry Andric   if (!PhiTy->isFloatingPointTy()) {
6990b57cec5SDimitry Andric     if (!WidestIndTy)
7000b57cec5SDimitry Andric       WidestIndTy = convertPointerToIntegerType(DL, PhiTy);
7010b57cec5SDimitry Andric     else
7020b57cec5SDimitry Andric       WidestIndTy = getWiderType(DL, PhiTy, WidestIndTy);
7030b57cec5SDimitry Andric   }
7040b57cec5SDimitry Andric 
7050b57cec5SDimitry Andric   // Int inductions are special because we only allow one IV.
7060b57cec5SDimitry Andric   if (ID.getKind() == InductionDescriptor::IK_IntInduction &&
7070b57cec5SDimitry Andric       ID.getConstIntStepValue() && ID.getConstIntStepValue()->isOne() &&
7080b57cec5SDimitry Andric       isa<Constant>(ID.getStartValue()) &&
7090b57cec5SDimitry Andric       cast<Constant>(ID.getStartValue())->isNullValue()) {
7100b57cec5SDimitry Andric 
7110b57cec5SDimitry Andric     // Use the phi node with the widest type as induction. Use the last
7120b57cec5SDimitry Andric     // one if there are multiple (no good reason for doing this other
7130b57cec5SDimitry Andric     // than it is expedient). We've checked that it begins at zero and
7140b57cec5SDimitry Andric     // steps by one, so this is a canonical induction variable.
7150b57cec5SDimitry Andric     if (!PrimaryInduction || PhiTy == WidestIndTy)
7160b57cec5SDimitry Andric       PrimaryInduction = Phi;
7170b57cec5SDimitry Andric   }
7180b57cec5SDimitry Andric 
7190b57cec5SDimitry Andric   // Both the PHI node itself, and the "post-increment" value feeding
7200b57cec5SDimitry Andric   // back into the PHI node may have external users.
7210b57cec5SDimitry Andric   // We can allow those uses, except if the SCEVs we have for them rely
7220b57cec5SDimitry Andric   // on predicates that only hold within the loop, since allowing the exit
7230b57cec5SDimitry Andric   // currently means re-using this SCEV outside the loop (see PR33706 for more
7240b57cec5SDimitry Andric   // details).
72581ad6265SDimitry Andric   if (PSE.getPredicate().isAlwaysTrue()) {
7260b57cec5SDimitry Andric     AllowedExit.insert(Phi);
7270b57cec5SDimitry Andric     AllowedExit.insert(Phi->getIncomingValueForBlock(TheLoop->getLoopLatch()));
7280b57cec5SDimitry Andric   }
7290b57cec5SDimitry Andric 
7300b57cec5SDimitry Andric   LLVM_DEBUG(dbgs() << "LV: Found an induction variable.\n");
7310b57cec5SDimitry Andric }
7320b57cec5SDimitry Andric 
7330b57cec5SDimitry Andric bool LoopVectorizationLegality::setupOuterLoopInductions() {
7340b57cec5SDimitry Andric   BasicBlock *Header = TheLoop->getHeader();
7350b57cec5SDimitry Andric 
7360b57cec5SDimitry Andric   // Returns true if a given Phi is a supported induction.
7370b57cec5SDimitry Andric   auto isSupportedPhi = [&](PHINode &Phi) -> bool {
7380b57cec5SDimitry Andric     InductionDescriptor ID;
7390b57cec5SDimitry Andric     if (InductionDescriptor::isInductionPHI(&Phi, TheLoop, PSE, ID) &&
7400b57cec5SDimitry Andric         ID.getKind() == InductionDescriptor::IK_IntInduction) {
7410b57cec5SDimitry Andric       addInductionPhi(&Phi, ID, AllowedExit);
7420b57cec5SDimitry Andric       return true;
7430b57cec5SDimitry Andric     } else {
7440b57cec5SDimitry Andric       // Bail out for any Phi in the outer loop header that is not a supported
7450b57cec5SDimitry Andric       // induction.
7460b57cec5SDimitry Andric       LLVM_DEBUG(
7470b57cec5SDimitry Andric           dbgs()
7480b57cec5SDimitry Andric           << "LV: Found unsupported PHI for outer loop vectorization.\n");
7490b57cec5SDimitry Andric       return false;
7500b57cec5SDimitry Andric     }
7510b57cec5SDimitry Andric   };
7520b57cec5SDimitry Andric 
7530b57cec5SDimitry Andric   if (llvm::all_of(Header->phis(), isSupportedPhi))
7540b57cec5SDimitry Andric     return true;
7550b57cec5SDimitry Andric   else
7560b57cec5SDimitry Andric     return false;
7570b57cec5SDimitry Andric }
7580b57cec5SDimitry Andric 
7595ffd83dbSDimitry Andric /// Checks if a function is scalarizable according to the TLI, in
7605ffd83dbSDimitry Andric /// the sense that it should be vectorized and then expanded in
7615ffd83dbSDimitry Andric /// multiple scalar calls. This is represented in the
7625ffd83dbSDimitry Andric /// TLI via mappings that do not specify a vector name, as in the
7635ffd83dbSDimitry Andric /// following example:
7645ffd83dbSDimitry Andric ///
7655ffd83dbSDimitry Andric ///    const VecDesc VecIntrinsics[] = {
7665ffd83dbSDimitry Andric ///      {"llvm.phx.abs.i32", "", 4}
7675ffd83dbSDimitry Andric ///    };
7685ffd83dbSDimitry Andric static bool isTLIScalarize(const TargetLibraryInfo &TLI, const CallInst &CI) {
7695ffd83dbSDimitry Andric   const StringRef ScalarName = CI.getCalledFunction()->getName();
7705ffd83dbSDimitry Andric   bool Scalarize = TLI.isFunctionVectorizable(ScalarName);
7715ffd83dbSDimitry Andric   // Check that all known VFs are not associated to a vector
7725ffd83dbSDimitry Andric   // function, i.e. the vector name is emty.
773fe6060f1SDimitry Andric   if (Scalarize) {
774fe6060f1SDimitry Andric     ElementCount WidestFixedVF, WidestScalableVF;
775fe6060f1SDimitry Andric     TLI.getWidestVF(ScalarName, WidestFixedVF, WidestScalableVF);
776fe6060f1SDimitry Andric     for (ElementCount VF = ElementCount::getFixed(2);
777fe6060f1SDimitry Andric          ElementCount::isKnownLE(VF, WidestFixedVF); VF *= 2)
7785ffd83dbSDimitry Andric       Scalarize &= !TLI.isFunctionVectorizable(ScalarName, VF);
779fe6060f1SDimitry Andric     for (ElementCount VF = ElementCount::getScalable(1);
780fe6060f1SDimitry Andric          ElementCount::isKnownLE(VF, WidestScalableVF); VF *= 2)
781fe6060f1SDimitry Andric       Scalarize &= !TLI.isFunctionVectorizable(ScalarName, VF);
782fe6060f1SDimitry Andric     assert((WidestScalableVF.isZero() || !Scalarize) &&
783fe6060f1SDimitry Andric            "Caller may decide to scalarize a variant using a scalable VF");
7845ffd83dbSDimitry Andric   }
7855ffd83dbSDimitry Andric   return Scalarize;
7865ffd83dbSDimitry Andric }
7875ffd83dbSDimitry Andric 
7880b57cec5SDimitry Andric bool LoopVectorizationLegality::canVectorizeInstrs() {
7890b57cec5SDimitry Andric   BasicBlock *Header = TheLoop->getHeader();
7900b57cec5SDimitry Andric 
7910b57cec5SDimitry Andric   // For each block in the loop.
7920b57cec5SDimitry Andric   for (BasicBlock *BB : TheLoop->blocks()) {
7930b57cec5SDimitry Andric     // Scan the instructions in the block and look for hazards.
7940b57cec5SDimitry Andric     for (Instruction &I : *BB) {
7950b57cec5SDimitry Andric       if (auto *Phi = dyn_cast<PHINode>(&I)) {
7960b57cec5SDimitry Andric         Type *PhiTy = Phi->getType();
7970b57cec5SDimitry Andric         // Check that this PHI type is allowed.
7980b57cec5SDimitry Andric         if (!PhiTy->isIntegerTy() && !PhiTy->isFloatingPointTy() &&
7990b57cec5SDimitry Andric             !PhiTy->isPointerTy()) {
8000b57cec5SDimitry Andric           reportVectorizationFailure("Found a non-int non-pointer PHI",
8010b57cec5SDimitry Andric                                      "loop control flow is not understood by vectorizer",
8028bcb0991SDimitry Andric                                      "CFGNotUnderstood", ORE, TheLoop);
8030b57cec5SDimitry Andric           return false;
8040b57cec5SDimitry Andric         }
8050b57cec5SDimitry Andric 
8060b57cec5SDimitry Andric         // If this PHINode is not in the header block, then we know that we
8070b57cec5SDimitry Andric         // can convert it to select during if-conversion. No need to check if
8080b57cec5SDimitry Andric         // the PHIs in this block are induction or reduction variables.
8090b57cec5SDimitry Andric         if (BB != Header) {
8100b57cec5SDimitry Andric           // Non-header phi nodes that have outside uses can be vectorized. Add
8110b57cec5SDimitry Andric           // them to the list of allowed exits.
8120b57cec5SDimitry Andric           // Unsafe cyclic dependencies with header phis are identified during
813bdd1243dSDimitry Andric           // legalization for reduction, induction and fixed order
8140b57cec5SDimitry Andric           // recurrences.
8150b57cec5SDimitry Andric           AllowedExit.insert(&I);
8160b57cec5SDimitry Andric           continue;
8170b57cec5SDimitry Andric         }
8180b57cec5SDimitry Andric 
8190b57cec5SDimitry Andric         // We only allow if-converted PHIs with exactly two incoming values.
8200b57cec5SDimitry Andric         if (Phi->getNumIncomingValues() != 2) {
8210b57cec5SDimitry Andric           reportVectorizationFailure("Found an invalid PHI",
8220b57cec5SDimitry Andric               "loop control flow is not understood by vectorizer",
8238bcb0991SDimitry Andric               "CFGNotUnderstood", ORE, TheLoop, Phi);
8240b57cec5SDimitry Andric           return false;
8250b57cec5SDimitry Andric         }
8260b57cec5SDimitry Andric 
8270b57cec5SDimitry Andric         RecurrenceDescriptor RedDes;
8280b57cec5SDimitry Andric         if (RecurrenceDescriptor::isReductionPHI(Phi, TheLoop, RedDes, DB, AC,
82981ad6265SDimitry Andric                                                  DT, PSE.getSE())) {
830fe6060f1SDimitry Andric           Requirements->addExactFPMathInst(RedDes.getExactFPMathInst());
8310b57cec5SDimitry Andric           AllowedExit.insert(RedDes.getLoopExitInstr());
8320b57cec5SDimitry Andric           Reductions[Phi] = RedDes;
8330b57cec5SDimitry Andric           continue;
8340b57cec5SDimitry Andric         }
8350b57cec5SDimitry Andric 
83606c3fb27SDimitry Andric         // We prevent matching non-constant strided pointer IVS to preserve
83706c3fb27SDimitry Andric         // historical vectorizer behavior after a generalization of the
83806c3fb27SDimitry Andric         // IVDescriptor code.  The intent is to remove this check, but we
83906c3fb27SDimitry Andric         // have to fix issues around code quality for such loops first.
84006c3fb27SDimitry Andric         auto isDisallowedStridedPointerInduction =
84106c3fb27SDimitry Andric           [](const InductionDescriptor &ID) {
84206c3fb27SDimitry Andric           if (AllowStridedPointerIVs)
84306c3fb27SDimitry Andric             return false;
84406c3fb27SDimitry Andric           return ID.getKind() == InductionDescriptor::IK_PtrInduction &&
84506c3fb27SDimitry Andric             ID.getConstIntStepValue() == nullptr;
84606c3fb27SDimitry Andric         };
84706c3fb27SDimitry Andric 
848bdd1243dSDimitry Andric         // TODO: Instead of recording the AllowedExit, it would be good to
849bdd1243dSDimitry Andric         // record the complementary set: NotAllowedExit. These include (but may
850bdd1243dSDimitry Andric         // not be limited to):
8510b57cec5SDimitry Andric         // 1. Reduction phis as they represent the one-before-last value, which
8520b57cec5SDimitry Andric         // is not available when vectorized
8530b57cec5SDimitry Andric         // 2. Induction phis and increment when SCEV predicates cannot be used
8540b57cec5SDimitry Andric         // outside the loop - see addInductionPhi
8550b57cec5SDimitry Andric         // 3. Non-Phis with outside uses when SCEV predicates cannot be used
8560b57cec5SDimitry Andric         // outside the loop - see call to hasOutsideLoopUser in the non-phi
8570b57cec5SDimitry Andric         // handling below
858bdd1243dSDimitry Andric         // 4. FixedOrderRecurrence phis that can possibly be handled by
8590b57cec5SDimitry Andric         // extraction.
8600b57cec5SDimitry Andric         // By recording these, we can then reason about ways to vectorize each
8610b57cec5SDimitry Andric         // of these NotAllowedExit.
8620b57cec5SDimitry Andric         InductionDescriptor ID;
86306c3fb27SDimitry Andric         if (InductionDescriptor::isInductionPHI(Phi, TheLoop, PSE, ID) &&
86406c3fb27SDimitry Andric             !isDisallowedStridedPointerInduction(ID)) {
8650b57cec5SDimitry Andric           addInductionPhi(Phi, ID, AllowedExit);
866fe6060f1SDimitry Andric           Requirements->addExactFPMathInst(ID.getExactFPMathInst());
8670b57cec5SDimitry Andric           continue;
8680b57cec5SDimitry Andric         }
8690b57cec5SDimitry Andric 
87006c3fb27SDimitry Andric         if (RecurrenceDescriptor::isFixedOrderRecurrence(Phi, TheLoop, DT)) {
8715ffd83dbSDimitry Andric           AllowedExit.insert(Phi);
872bdd1243dSDimitry Andric           FixedOrderRecurrences.insert(Phi);
8730b57cec5SDimitry Andric           continue;
8740b57cec5SDimitry Andric         }
8750b57cec5SDimitry Andric 
8760b57cec5SDimitry Andric         // As a last resort, coerce the PHI to a AddRec expression
8770b57cec5SDimitry Andric         // and re-try classifying it a an induction PHI.
87806c3fb27SDimitry Andric         if (InductionDescriptor::isInductionPHI(Phi, TheLoop, PSE, ID, true) &&
87906c3fb27SDimitry Andric             !isDisallowedStridedPointerInduction(ID)) {
8800b57cec5SDimitry Andric           addInductionPhi(Phi, ID, AllowedExit);
8810b57cec5SDimitry Andric           continue;
8820b57cec5SDimitry Andric         }
8830b57cec5SDimitry Andric 
8840b57cec5SDimitry Andric         reportVectorizationFailure("Found an unidentified PHI",
8850b57cec5SDimitry Andric             "value that could not be identified as "
8860b57cec5SDimitry Andric             "reduction is used outside the loop",
8878bcb0991SDimitry Andric             "NonReductionValueUsedOutsideLoop", ORE, TheLoop, Phi);
8880b57cec5SDimitry Andric         return false;
8890b57cec5SDimitry Andric       } // end of PHI handling
8900b57cec5SDimitry Andric 
8910b57cec5SDimitry Andric       // We handle calls that:
8920b57cec5SDimitry Andric       //   * Are debug info intrinsics.
8930b57cec5SDimitry Andric       //   * Have a mapping to an IR intrinsic.
8940b57cec5SDimitry Andric       //   * Have a vector version available.
8950b57cec5SDimitry Andric       auto *CI = dyn_cast<CallInst>(&I);
8965ffd83dbSDimitry Andric 
8970b57cec5SDimitry Andric       if (CI && !getVectorIntrinsicIDForCall(CI, TLI) &&
8980b57cec5SDimitry Andric           !isa<DbgInfoIntrinsic>(CI) &&
8990b57cec5SDimitry Andric           !(CI->getCalledFunction() && TLI &&
9005ffd83dbSDimitry Andric             (!VFDatabase::getMappings(*CI).empty() ||
9015ffd83dbSDimitry Andric              isTLIScalarize(*TLI, *CI)))) {
9020b57cec5SDimitry Andric         // If the call is a recognized math libary call, it is likely that
9030b57cec5SDimitry Andric         // we can vectorize it given loosened floating-point constraints.
9040b57cec5SDimitry Andric         LibFunc Func;
9050b57cec5SDimitry Andric         bool IsMathLibCall =
9060b57cec5SDimitry Andric             TLI && CI->getCalledFunction() &&
9070b57cec5SDimitry Andric             CI->getType()->isFloatingPointTy() &&
9080b57cec5SDimitry Andric             TLI->getLibFunc(CI->getCalledFunction()->getName(), Func) &&
9090b57cec5SDimitry Andric             TLI->hasOptimizedCodeGen(Func);
9100b57cec5SDimitry Andric 
9110b57cec5SDimitry Andric         if (IsMathLibCall) {
9120b57cec5SDimitry Andric           // TODO: Ideally, we should not use clang-specific language here,
9130b57cec5SDimitry Andric           // but it's hard to provide meaningful yet generic advice.
9140b57cec5SDimitry Andric           // Also, should this be guarded by allowExtraAnalysis() and/or be part
9150b57cec5SDimitry Andric           // of the returned info from isFunctionVectorizable()?
9165ffd83dbSDimitry Andric           reportVectorizationFailure(
9175ffd83dbSDimitry Andric               "Found a non-intrinsic callsite",
9180b57cec5SDimitry Andric               "library call cannot be vectorized. "
9190b57cec5SDimitry Andric               "Try compiling with -fno-math-errno, -ffast-math, "
9200b57cec5SDimitry Andric               "or similar flags",
9218bcb0991SDimitry Andric               "CantVectorizeLibcall", ORE, TheLoop, CI);
9220b57cec5SDimitry Andric         } else {
9230b57cec5SDimitry Andric           reportVectorizationFailure("Found a non-intrinsic callsite",
9240b57cec5SDimitry Andric                                      "call instruction cannot be vectorized",
9258bcb0991SDimitry Andric                                      "CantVectorizeLibcall", ORE, TheLoop, CI);
9260b57cec5SDimitry Andric         }
9270b57cec5SDimitry Andric         return false;
9280b57cec5SDimitry Andric       }
9290b57cec5SDimitry Andric 
9300b57cec5SDimitry Andric       // Some intrinsics have scalar arguments and should be same in order for
9310b57cec5SDimitry Andric       // them to be vectorized (i.e. loop invariant).
9320b57cec5SDimitry Andric       if (CI) {
9330b57cec5SDimitry Andric         auto *SE = PSE.getSE();
9340b57cec5SDimitry Andric         Intrinsic::ID IntrinID = getVectorIntrinsicIDForCall(CI, TLI);
935349cc55cSDimitry Andric         for (unsigned i = 0, e = CI->arg_size(); i != e; ++i)
93681ad6265SDimitry Andric           if (isVectorIntrinsicWithScalarOpAtArg(IntrinID, i)) {
9370b57cec5SDimitry Andric             if (!SE->isLoopInvariant(PSE.getSCEV(CI->getOperand(i)), TheLoop)) {
9380b57cec5SDimitry Andric               reportVectorizationFailure("Found unvectorizable intrinsic",
9390b57cec5SDimitry Andric                   "intrinsic instruction cannot be vectorized",
9408bcb0991SDimitry Andric                   "CantVectorizeIntrinsic", ORE, TheLoop, CI);
9410b57cec5SDimitry Andric               return false;
9420b57cec5SDimitry Andric             }
9430b57cec5SDimitry Andric           }
9440b57cec5SDimitry Andric       }
9450b57cec5SDimitry Andric 
9465f757f3fSDimitry Andric       // If we found a vectorized variant of a function, note that so LV can
9475f757f3fSDimitry Andric       // make better decisions about maximum VF.
9485f757f3fSDimitry Andric       if (CI && !VFDatabase::getMappings(*CI).empty())
9495f757f3fSDimitry Andric         VecCallVariantsFound = true;
9505f757f3fSDimitry Andric 
9510b57cec5SDimitry Andric       // Check that the instruction return type is vectorizable.
9520b57cec5SDimitry Andric       // Also, we can't vectorize extractelement instructions.
9530b57cec5SDimitry Andric       if ((!VectorType::isValidElementType(I.getType()) &&
9540b57cec5SDimitry Andric            !I.getType()->isVoidTy()) ||
9550b57cec5SDimitry Andric           isa<ExtractElementInst>(I)) {
9560b57cec5SDimitry Andric         reportVectorizationFailure("Found unvectorizable type",
9570b57cec5SDimitry Andric             "instruction return type cannot be vectorized",
9588bcb0991SDimitry Andric             "CantVectorizeInstructionReturnType", ORE, TheLoop, &I);
9590b57cec5SDimitry Andric         return false;
9600b57cec5SDimitry Andric       }
9610b57cec5SDimitry Andric 
9620b57cec5SDimitry Andric       // Check that the stored type is vectorizable.
9630b57cec5SDimitry Andric       if (auto *ST = dyn_cast<StoreInst>(&I)) {
9640b57cec5SDimitry Andric         Type *T = ST->getValueOperand()->getType();
9650b57cec5SDimitry Andric         if (!VectorType::isValidElementType(T)) {
9660b57cec5SDimitry Andric           reportVectorizationFailure("Store instruction cannot be vectorized",
9670b57cec5SDimitry Andric                                      "store instruction cannot be vectorized",
9688bcb0991SDimitry Andric                                      "CantVectorizeStore", ORE, TheLoop, ST);
9690b57cec5SDimitry Andric           return false;
9700b57cec5SDimitry Andric         }
9710b57cec5SDimitry Andric 
9720b57cec5SDimitry Andric         // For nontemporal stores, check that a nontemporal vector version is
9730b57cec5SDimitry Andric         // supported on the target.
9740b57cec5SDimitry Andric         if (ST->getMetadata(LLVMContext::MD_nontemporal)) {
9750b57cec5SDimitry Andric           // Arbitrarily try a vector of 2 elements.
976e8d8bef9SDimitry Andric           auto *VecTy = FixedVectorType::get(T, /*NumElts=*/2);
9770b57cec5SDimitry Andric           assert(VecTy && "did not find vectorized version of stored type");
9785ffd83dbSDimitry Andric           if (!TTI->isLegalNTStore(VecTy, ST->getAlign())) {
9790b57cec5SDimitry Andric             reportVectorizationFailure(
9800b57cec5SDimitry Andric                 "nontemporal store instruction cannot be vectorized",
9810b57cec5SDimitry Andric                 "nontemporal store instruction cannot be vectorized",
9828bcb0991SDimitry Andric                 "CantVectorizeNontemporalStore", ORE, TheLoop, ST);
9830b57cec5SDimitry Andric             return false;
9840b57cec5SDimitry Andric           }
9850b57cec5SDimitry Andric         }
9860b57cec5SDimitry Andric 
9870b57cec5SDimitry Andric       } else if (auto *LD = dyn_cast<LoadInst>(&I)) {
9880b57cec5SDimitry Andric         if (LD->getMetadata(LLVMContext::MD_nontemporal)) {
9890b57cec5SDimitry Andric           // For nontemporal loads, check that a nontemporal vector version is
9900b57cec5SDimitry Andric           // supported on the target (arbitrarily try a vector of 2 elements).
991e8d8bef9SDimitry Andric           auto *VecTy = FixedVectorType::get(I.getType(), /*NumElts=*/2);
9920b57cec5SDimitry Andric           assert(VecTy && "did not find vectorized version of load type");
9935ffd83dbSDimitry Andric           if (!TTI->isLegalNTLoad(VecTy, LD->getAlign())) {
9940b57cec5SDimitry Andric             reportVectorizationFailure(
9950b57cec5SDimitry Andric                 "nontemporal load instruction cannot be vectorized",
9960b57cec5SDimitry Andric                 "nontemporal load instruction cannot be vectorized",
9978bcb0991SDimitry Andric                 "CantVectorizeNontemporalLoad", ORE, TheLoop, LD);
9980b57cec5SDimitry Andric             return false;
9990b57cec5SDimitry Andric           }
10000b57cec5SDimitry Andric         }
10010b57cec5SDimitry Andric 
10020b57cec5SDimitry Andric         // FP instructions can allow unsafe algebra, thus vectorizable by
10030b57cec5SDimitry Andric         // non-IEEE-754 compliant SIMD units.
10040b57cec5SDimitry Andric         // This applies to floating-point math operations and calls, not memory
10050b57cec5SDimitry Andric         // operations, shuffles, or casts, as they don't change precision or
10060b57cec5SDimitry Andric         // semantics.
10070b57cec5SDimitry Andric       } else if (I.getType()->isFloatingPointTy() && (CI || I.isBinaryOp()) &&
10080b57cec5SDimitry Andric                  !I.isFast()) {
10090b57cec5SDimitry Andric         LLVM_DEBUG(dbgs() << "LV: Found FP op with unsafe algebra.\n");
10100b57cec5SDimitry Andric         Hints->setPotentiallyUnsafe();
10110b57cec5SDimitry Andric       }
10120b57cec5SDimitry Andric 
10130b57cec5SDimitry Andric       // Reduction instructions are allowed to have exit users.
10140b57cec5SDimitry Andric       // All other instructions must not have external users.
10150b57cec5SDimitry Andric       if (hasOutsideLoopUser(TheLoop, &I, AllowedExit)) {
10160b57cec5SDimitry Andric         // We can safely vectorize loops where instructions within the loop are
10170b57cec5SDimitry Andric         // used outside the loop only if the SCEV predicates within the loop is
10180b57cec5SDimitry Andric         // same as outside the loop. Allowing the exit means reusing the SCEV
10190b57cec5SDimitry Andric         // outside the loop.
102081ad6265SDimitry Andric         if (PSE.getPredicate().isAlwaysTrue()) {
10210b57cec5SDimitry Andric           AllowedExit.insert(&I);
10220b57cec5SDimitry Andric           continue;
10230b57cec5SDimitry Andric         }
10240b57cec5SDimitry Andric         reportVectorizationFailure("Value cannot be used outside the loop",
10250b57cec5SDimitry Andric                                    "value cannot be used outside the loop",
10268bcb0991SDimitry Andric                                    "ValueUsedOutsideLoop", ORE, TheLoop, &I);
10270b57cec5SDimitry Andric         return false;
10280b57cec5SDimitry Andric       }
10290b57cec5SDimitry Andric     } // next instr.
10300b57cec5SDimitry Andric   }
10310b57cec5SDimitry Andric 
10320b57cec5SDimitry Andric   if (!PrimaryInduction) {
10330b57cec5SDimitry Andric     if (Inductions.empty()) {
10340b57cec5SDimitry Andric       reportVectorizationFailure("Did not find one integer induction var",
10350b57cec5SDimitry Andric           "loop induction variable could not be identified",
10368bcb0991SDimitry Andric           "NoInductionVariable", ORE, TheLoop);
10370b57cec5SDimitry Andric       return false;
10380b57cec5SDimitry Andric     } else if (!WidestIndTy) {
10390b57cec5SDimitry Andric       reportVectorizationFailure("Did not find one integer induction var",
10400b57cec5SDimitry Andric           "integer loop induction variable could not be identified",
10418bcb0991SDimitry Andric           "NoIntegerInductionVariable", ORE, TheLoop);
10420b57cec5SDimitry Andric       return false;
10430b57cec5SDimitry Andric     } else {
10440b57cec5SDimitry Andric       LLVM_DEBUG(dbgs() << "LV: Did not find one integer induction var.\n");
10450b57cec5SDimitry Andric     }
10460b57cec5SDimitry Andric   }
10470b57cec5SDimitry Andric 
10480b57cec5SDimitry Andric   // Now we know the widest induction type, check if our found induction
10490b57cec5SDimitry Andric   // is the same size. If it's not, unset it here and InnerLoopVectorizer
10500b57cec5SDimitry Andric   // will create another.
10510b57cec5SDimitry Andric   if (PrimaryInduction && WidestIndTy != PrimaryInduction->getType())
10520b57cec5SDimitry Andric     PrimaryInduction = nullptr;
10530b57cec5SDimitry Andric 
10540b57cec5SDimitry Andric   return true;
10550b57cec5SDimitry Andric }
10560b57cec5SDimitry Andric 
10570b57cec5SDimitry Andric bool LoopVectorizationLegality::canVectorizeMemory() {
1058bdd1243dSDimitry Andric   LAI = &LAIs.getInfo(*TheLoop);
10590b57cec5SDimitry Andric   const OptimizationRemarkAnalysis *LAR = LAI->getReport();
10600b57cec5SDimitry Andric   if (LAR) {
10610b57cec5SDimitry Andric     ORE->emit([&]() {
10620b57cec5SDimitry Andric       return OptimizationRemarkAnalysis(Hints->vectorizeAnalysisPassName(),
10630b57cec5SDimitry Andric                                         "loop not vectorized: ", *LAR);
10640b57cec5SDimitry Andric     });
10650b57cec5SDimitry Andric   }
1066fe6060f1SDimitry Andric 
10670b57cec5SDimitry Andric   if (!LAI->canVectorizeMemory())
10680b57cec5SDimitry Andric     return false;
10690b57cec5SDimitry Andric 
1070*0fca6ea1SDimitry Andric   if (LAI->hasLoadStoreDependenceInvolvingLoopInvariantAddress()) {
1071*0fca6ea1SDimitry Andric     reportVectorizationFailure("We don't allow storing to uniform addresses",
1072*0fca6ea1SDimitry Andric                                "write to a loop invariant address could not "
1073*0fca6ea1SDimitry Andric                                "be vectorized",
1074*0fca6ea1SDimitry Andric                                "CantVectorizeStoreToLoopInvariantAddress", ORE,
1075*0fca6ea1SDimitry Andric                                TheLoop);
1076*0fca6ea1SDimitry Andric     return false;
1077*0fca6ea1SDimitry Andric   }
1078*0fca6ea1SDimitry Andric 
107981ad6265SDimitry Andric   // We can vectorize stores to invariant address when final reduction value is
108081ad6265SDimitry Andric   // guaranteed to be stored at the end of the loop. Also, if decision to
108181ad6265SDimitry Andric   // vectorize loop is made, runtime checks are added so as to make sure that
108281ad6265SDimitry Andric   // invariant address won't alias with any other objects.
108381ad6265SDimitry Andric   if (!LAI->getStoresToInvariantAddresses().empty()) {
1084bdd1243dSDimitry Andric     // For each invariant address, check if last stored value is unconditional
1085bdd1243dSDimitry Andric     // and the address is not calculated inside the loop.
108681ad6265SDimitry Andric     for (StoreInst *SI : LAI->getStoresToInvariantAddresses()) {
1087bdd1243dSDimitry Andric       if (!isInvariantStoreOfReduction(SI))
1088bdd1243dSDimitry Andric         continue;
1089bdd1243dSDimitry Andric 
1090bdd1243dSDimitry Andric       if (blockNeedsPredication(SI->getParent())) {
109181ad6265SDimitry Andric         reportVectorizationFailure(
109281ad6265SDimitry Andric             "We don't allow storing to uniform addresses",
109381ad6265SDimitry Andric             "write of conditional recurring variant value to a loop "
109481ad6265SDimitry Andric             "invariant address could not be vectorized",
10958bcb0991SDimitry Andric             "CantVectorizeStoreToLoopInvariantAddress", ORE, TheLoop);
10960b57cec5SDimitry Andric         return false;
10970b57cec5SDimitry Andric       }
1098bdd1243dSDimitry Andric 
1099bdd1243dSDimitry Andric       // Invariant address should be defined outside of loop. LICM pass usually
1100bdd1243dSDimitry Andric       // makes sure it happens, but in rare cases it does not, we do not want
1101bdd1243dSDimitry Andric       // to overcomplicate vectorization to support this case.
1102bdd1243dSDimitry Andric       if (Instruction *Ptr = dyn_cast<Instruction>(SI->getPointerOperand())) {
1103bdd1243dSDimitry Andric         if (TheLoop->contains(Ptr)) {
1104bdd1243dSDimitry Andric           reportVectorizationFailure(
1105bdd1243dSDimitry Andric               "Invariant address is calculated inside the loop",
1106bdd1243dSDimitry Andric               "write to a loop invariant address could not "
1107bdd1243dSDimitry Andric               "be vectorized",
1108bdd1243dSDimitry Andric               "CantVectorizeStoreToLoopInvariantAddress", ORE, TheLoop);
1109bdd1243dSDimitry Andric           return false;
1110bdd1243dSDimitry Andric         }
1111bdd1243dSDimitry Andric       }
111281ad6265SDimitry Andric     }
111381ad6265SDimitry Andric 
1114*0fca6ea1SDimitry Andric     if (LAI->hasStoreStoreDependenceInvolvingLoopInvariantAddress()) {
111581ad6265SDimitry Andric       // For each invariant address, check its last stored value is the result
111681ad6265SDimitry Andric       // of one of our reductions.
111781ad6265SDimitry Andric       //
1118*0fca6ea1SDimitry Andric       // We do not check if dependence with loads exists because that is already
1119*0fca6ea1SDimitry Andric       // checked via hasLoadStoreDependenceInvolvingLoopInvariantAddress.
112081ad6265SDimitry Andric       ScalarEvolution *SE = PSE.getSE();
112181ad6265SDimitry Andric       SmallVector<StoreInst *, 4> UnhandledStores;
112281ad6265SDimitry Andric       for (StoreInst *SI : LAI->getStoresToInvariantAddresses()) {
112381ad6265SDimitry Andric         if (isInvariantStoreOfReduction(SI)) {
112481ad6265SDimitry Andric           // Earlier stores to this address are effectively deadcode.
112581ad6265SDimitry Andric           // With opaque pointers it is possible for one pointer to be used with
112681ad6265SDimitry Andric           // different sizes of stored values:
112781ad6265SDimitry Andric           //    store i32 0, ptr %x
112881ad6265SDimitry Andric           //    store i8 0, ptr %x
112981ad6265SDimitry Andric           // The latest store doesn't complitely overwrite the first one in the
113081ad6265SDimitry Andric           // example. That is why we have to make sure that types of stored
113181ad6265SDimitry Andric           // values are same.
113281ad6265SDimitry Andric           // TODO: Check that bitwidth of unhandled store is smaller then the
113381ad6265SDimitry Andric           // one that overwrites it and add a test.
113481ad6265SDimitry Andric           erase_if(UnhandledStores, [SE, SI](StoreInst *I) {
113581ad6265SDimitry Andric             return storeToSameAddress(SE, SI, I) &&
113681ad6265SDimitry Andric                    I->getValueOperand()->getType() ==
113781ad6265SDimitry Andric                        SI->getValueOperand()->getType();
113881ad6265SDimitry Andric           });
113981ad6265SDimitry Andric           continue;
114081ad6265SDimitry Andric         }
114181ad6265SDimitry Andric         UnhandledStores.push_back(SI);
114281ad6265SDimitry Andric       }
114381ad6265SDimitry Andric 
114481ad6265SDimitry Andric       bool IsOK = UnhandledStores.empty();
114581ad6265SDimitry Andric       // TODO: we should also validate against InvariantMemSets.
114681ad6265SDimitry Andric       if (!IsOK) {
114781ad6265SDimitry Andric         reportVectorizationFailure(
114881ad6265SDimitry Andric             "We don't allow storing to uniform addresses",
114981ad6265SDimitry Andric             "write to a loop invariant address could not "
115081ad6265SDimitry Andric             "be vectorized",
115181ad6265SDimitry Andric             "CantVectorizeStoreToLoopInvariantAddress", ORE, TheLoop);
115281ad6265SDimitry Andric         return false;
115381ad6265SDimitry Andric       }
115481ad6265SDimitry Andric     }
115581ad6265SDimitry Andric   }
1156fe6060f1SDimitry Andric 
115781ad6265SDimitry Andric   PSE.addPredicate(LAI->getPSE().getPredicate());
11580b57cec5SDimitry Andric   return true;
11590b57cec5SDimitry Andric }
11600b57cec5SDimitry Andric 
1161fe6060f1SDimitry Andric bool LoopVectorizationLegality::canVectorizeFPMath(
1162fe6060f1SDimitry Andric     bool EnableStrictReductions) {
1163fe6060f1SDimitry Andric 
1164fe6060f1SDimitry Andric   // First check if there is any ExactFP math or if we allow reassociations
1165fe6060f1SDimitry Andric   if (!Requirements->getExactFPInst() || Hints->allowReordering())
1166fe6060f1SDimitry Andric     return true;
1167fe6060f1SDimitry Andric 
1168fe6060f1SDimitry Andric   // If the above is false, we have ExactFPMath & do not allow reordering.
1169fe6060f1SDimitry Andric   // If the EnableStrictReductions flag is set, first check if we have any
1170fe6060f1SDimitry Andric   // Exact FP induction vars, which we cannot vectorize.
1171fe6060f1SDimitry Andric   if (!EnableStrictReductions ||
1172fe6060f1SDimitry Andric       any_of(getInductionVars(), [&](auto &Induction) -> bool {
1173fe6060f1SDimitry Andric         InductionDescriptor IndDesc = Induction.second;
1174fe6060f1SDimitry Andric         return IndDesc.getExactFPMathInst();
1175fe6060f1SDimitry Andric       }))
1176fe6060f1SDimitry Andric     return false;
1177fe6060f1SDimitry Andric 
1178fe6060f1SDimitry Andric   // We can now only vectorize if all reductions with Exact FP math also
1179fe6060f1SDimitry Andric   // have the isOrdered flag set, which indicates that we can move the
1180fe6060f1SDimitry Andric   // reduction operations in-loop.
1181fe6060f1SDimitry Andric   return (all_of(getReductionVars(), [&](auto &Reduction) -> bool {
1182fe6060f1SDimitry Andric     const RecurrenceDescriptor &RdxDesc = Reduction.second;
1183fe6060f1SDimitry Andric     return !RdxDesc.hasExactFPMath() || RdxDesc.isOrdered();
1184fe6060f1SDimitry Andric   }));
1185fe6060f1SDimitry Andric }
1186fe6060f1SDimitry Andric 
118781ad6265SDimitry Andric bool LoopVectorizationLegality::isInvariantStoreOfReduction(StoreInst *SI) {
118881ad6265SDimitry Andric   return any_of(getReductionVars(), [&](auto &Reduction) -> bool {
118981ad6265SDimitry Andric     const RecurrenceDescriptor &RdxDesc = Reduction.second;
119081ad6265SDimitry Andric     return RdxDesc.IntermediateStore == SI;
119181ad6265SDimitry Andric   });
119281ad6265SDimitry Andric }
119381ad6265SDimitry Andric 
119481ad6265SDimitry Andric bool LoopVectorizationLegality::isInvariantAddressOfReduction(Value *V) {
119581ad6265SDimitry Andric   return any_of(getReductionVars(), [&](auto &Reduction) -> bool {
119681ad6265SDimitry Andric     const RecurrenceDescriptor &RdxDesc = Reduction.second;
119781ad6265SDimitry Andric     if (!RdxDesc.IntermediateStore)
119881ad6265SDimitry Andric       return false;
119981ad6265SDimitry Andric 
120081ad6265SDimitry Andric     ScalarEvolution *SE = PSE.getSE();
120181ad6265SDimitry Andric     Value *InvariantAddress = RdxDesc.IntermediateStore->getPointerOperand();
120281ad6265SDimitry Andric     return V == InvariantAddress ||
120381ad6265SDimitry Andric            SE->getSCEV(V) == SE->getSCEV(InvariantAddress);
120481ad6265SDimitry Andric   });
120581ad6265SDimitry Andric }
120681ad6265SDimitry Andric 
12070eae32dcSDimitry Andric bool LoopVectorizationLegality::isInductionPhi(const Value *V) const {
12080b57cec5SDimitry Andric   Value *In0 = const_cast<Value *>(V);
12090b57cec5SDimitry Andric   PHINode *PN = dyn_cast_or_null<PHINode>(In0);
12100b57cec5SDimitry Andric   if (!PN)
12110b57cec5SDimitry Andric     return false;
12120b57cec5SDimitry Andric 
12130b57cec5SDimitry Andric   return Inductions.count(PN);
12140b57cec5SDimitry Andric }
12150b57cec5SDimitry Andric 
12160eae32dcSDimitry Andric const InductionDescriptor *
12170eae32dcSDimitry Andric LoopVectorizationLegality::getIntOrFpInductionDescriptor(PHINode *Phi) const {
12180eae32dcSDimitry Andric   if (!isInductionPhi(Phi))
12190eae32dcSDimitry Andric     return nullptr;
12200eae32dcSDimitry Andric   auto &ID = getInductionVars().find(Phi)->second;
12210eae32dcSDimitry Andric   if (ID.getKind() == InductionDescriptor::IK_IntInduction ||
12220eae32dcSDimitry Andric       ID.getKind() == InductionDescriptor::IK_FpInduction)
12230eae32dcSDimitry Andric     return &ID;
12240eae32dcSDimitry Andric   return nullptr;
12250eae32dcSDimitry Andric }
12260eae32dcSDimitry Andric 
122781ad6265SDimitry Andric const InductionDescriptor *
122881ad6265SDimitry Andric LoopVectorizationLegality::getPointerInductionDescriptor(PHINode *Phi) const {
122981ad6265SDimitry Andric   if (!isInductionPhi(Phi))
123081ad6265SDimitry Andric     return nullptr;
123181ad6265SDimitry Andric   auto &ID = getInductionVars().find(Phi)->second;
123281ad6265SDimitry Andric   if (ID.getKind() == InductionDescriptor::IK_PtrInduction)
123381ad6265SDimitry Andric     return &ID;
123481ad6265SDimitry Andric   return nullptr;
123581ad6265SDimitry Andric }
123681ad6265SDimitry Andric 
12370eae32dcSDimitry Andric bool LoopVectorizationLegality::isCastedInductionVariable(
12380eae32dcSDimitry Andric     const Value *V) const {
12390b57cec5SDimitry Andric   auto *Inst = dyn_cast<Instruction>(V);
12400b57cec5SDimitry Andric   return (Inst && InductionCastsToIgnore.count(Inst));
12410b57cec5SDimitry Andric }
12420b57cec5SDimitry Andric 
12430eae32dcSDimitry Andric bool LoopVectorizationLegality::isInductionVariable(const Value *V) const {
12440b57cec5SDimitry Andric   return isInductionPhi(V) || isCastedInductionVariable(V);
12450b57cec5SDimitry Andric }
12460b57cec5SDimitry Andric 
1247bdd1243dSDimitry Andric bool LoopVectorizationLegality::isFixedOrderRecurrence(
12480eae32dcSDimitry Andric     const PHINode *Phi) const {
1249bdd1243dSDimitry Andric   return FixedOrderRecurrences.count(Phi);
12500b57cec5SDimitry Andric }
12510b57cec5SDimitry Andric 
1252fe6060f1SDimitry Andric bool LoopVectorizationLegality::blockNeedsPredication(BasicBlock *BB) const {
12530b57cec5SDimitry Andric   return LoopAccessInfo::blockNeedsPredication(BB, TheLoop, DT);
12540b57cec5SDimitry Andric }
12550b57cec5SDimitry Andric 
12560b57cec5SDimitry Andric bool LoopVectorizationLegality::blockCanBePredicated(
1257e8d8bef9SDimitry Andric     BasicBlock *BB, SmallPtrSetImpl<Value *> &SafePtrs,
12585f757f3fSDimitry Andric     SmallPtrSetImpl<const Instruction *> &MaskedOp) const {
12590b57cec5SDimitry Andric   for (Instruction &I : *BB) {
12605ffd83dbSDimitry Andric     // We can predicate blocks with calls to assume, as long as we drop them in
12615ffd83dbSDimitry Andric     // case we flatten the CFG via predication.
12625ffd83dbSDimitry Andric     if (match(&I, m_Intrinsic<Intrinsic::assume>())) {
12635f757f3fSDimitry Andric       MaskedOp.insert(&I);
12645ffd83dbSDimitry Andric       continue;
12655ffd83dbSDimitry Andric     }
12665ffd83dbSDimitry Andric 
1267e8d8bef9SDimitry Andric     // Do not let llvm.experimental.noalias.scope.decl block the vectorization.
1268e8d8bef9SDimitry Andric     // TODO: there might be cases that it should block the vectorization. Let's
1269e8d8bef9SDimitry Andric     // ignore those for now.
1270e8d8bef9SDimitry Andric     if (isa<NoAliasScopeDeclInst>(&I))
1271e8d8bef9SDimitry Andric       continue;
1272e8d8bef9SDimitry Andric 
127306c3fb27SDimitry Andric     // We can allow masked calls if there's at least one vector variant, even
127406c3fb27SDimitry Andric     // if we end up scalarizing due to the cost model calculations.
127506c3fb27SDimitry Andric     // TODO: Allow other calls if they have appropriate attributes... readonly
127606c3fb27SDimitry Andric     // and argmemonly?
127706c3fb27SDimitry Andric     if (CallInst *CI = dyn_cast<CallInst>(&I))
127806c3fb27SDimitry Andric       if (VFDatabase::hasMaskedVariant(*CI)) {
127906c3fb27SDimitry Andric         MaskedOp.insert(CI);
128006c3fb27SDimitry Andric         continue;
128106c3fb27SDimitry Andric       }
128206c3fb27SDimitry Andric 
1283bdd1243dSDimitry Andric     // Loads are handled via masking (or speculated if safe to do so.)
1284bdd1243dSDimitry Andric     if (auto *LI = dyn_cast<LoadInst>(&I)) {
1285bdd1243dSDimitry Andric       if (!SafePtrs.count(LI->getPointerOperand()))
12860b57cec5SDimitry Andric         MaskedOp.insert(LI);
12870b57cec5SDimitry Andric       continue;
12880b57cec5SDimitry Andric     }
12890b57cec5SDimitry Andric 
12900b57cec5SDimitry Andric     // Predicated store requires some form of masking:
12910b57cec5SDimitry Andric     // 1) masked store HW instruction,
12920b57cec5SDimitry Andric     // 2) emulation via load-blend-store (only if safe and legal to do so,
12930b57cec5SDimitry Andric     //    be aware on the race conditions), or
12940b57cec5SDimitry Andric     // 3) element-by-element predicate check and scalar store.
1295bdd1243dSDimitry Andric     if (auto *SI = dyn_cast<StoreInst>(&I)) {
12960b57cec5SDimitry Andric       MaskedOp.insert(SI);
12970b57cec5SDimitry Andric       continue;
12980b57cec5SDimitry Andric     }
1299bdd1243dSDimitry Andric 
1300bdd1243dSDimitry Andric     if (I.mayReadFromMemory() || I.mayWriteToMemory() || I.mayThrow())
13010b57cec5SDimitry Andric       return false;
13020b57cec5SDimitry Andric   }
13030b57cec5SDimitry Andric 
13040b57cec5SDimitry Andric   return true;
13050b57cec5SDimitry Andric }
13060b57cec5SDimitry Andric 
13070b57cec5SDimitry Andric bool LoopVectorizationLegality::canVectorizeWithIfConvert() {
13080b57cec5SDimitry Andric   if (!EnableIfConversion) {
13090b57cec5SDimitry Andric     reportVectorizationFailure("If-conversion is disabled",
13100b57cec5SDimitry Andric                                "if-conversion is disabled",
13118bcb0991SDimitry Andric                                "IfConversionDisabled",
13128bcb0991SDimitry Andric                                ORE, TheLoop);
13130b57cec5SDimitry Andric     return false;
13140b57cec5SDimitry Andric   }
13150b57cec5SDimitry Andric 
13160b57cec5SDimitry Andric   assert(TheLoop->getNumBlocks() > 1 && "Single block loops are vectorizable");
13170b57cec5SDimitry Andric 
13188bcb0991SDimitry Andric   // A list of pointers which are known to be dereferenceable within scope of
13198bcb0991SDimitry Andric   // the loop body for each iteration of the loop which executes.  That is,
13208bcb0991SDimitry Andric   // the memory pointed to can be dereferenced (with the access size implied by
13218bcb0991SDimitry Andric   // the value's type) unconditionally within the loop header without
13228bcb0991SDimitry Andric   // introducing a new fault.
13235ffd83dbSDimitry Andric   SmallPtrSet<Value *, 8> SafePointers;
13240b57cec5SDimitry Andric 
13250b57cec5SDimitry Andric   // Collect safe addresses.
13260b57cec5SDimitry Andric   for (BasicBlock *BB : TheLoop->blocks()) {
13278bcb0991SDimitry Andric     if (!blockNeedsPredication(BB)) {
13280b57cec5SDimitry Andric       for (Instruction &I : *BB)
13290b57cec5SDimitry Andric         if (auto *Ptr = getLoadStorePointerOperand(&I))
13305ffd83dbSDimitry Andric           SafePointers.insert(Ptr);
13318bcb0991SDimitry Andric       continue;
13328bcb0991SDimitry Andric     }
13338bcb0991SDimitry Andric 
13348bcb0991SDimitry Andric     // For a block which requires predication, a address may be safe to access
13358bcb0991SDimitry Andric     // in the loop w/o predication if we can prove dereferenceability facts
13368bcb0991SDimitry Andric     // sufficient to ensure it'll never fault within the loop. For the moment,
13378bcb0991SDimitry Andric     // we restrict this to loads; stores are more complicated due to
13388bcb0991SDimitry Andric     // concurrency restrictions.
13398bcb0991SDimitry Andric     ScalarEvolution &SE = *PSE.getSE();
13408bcb0991SDimitry Andric     for (Instruction &I : *BB) {
13418bcb0991SDimitry Andric       LoadInst *LI = dyn_cast<LoadInst>(&I);
1342e8d8bef9SDimitry Andric       if (LI && !LI->getType()->isVectorTy() && !mustSuppressSpeculation(*LI) &&
1343bdd1243dSDimitry Andric           isDereferenceableAndAlignedInLoop(LI, TheLoop, SE, *DT, AC))
13445ffd83dbSDimitry Andric         SafePointers.insert(LI->getPointerOperand());
13458bcb0991SDimitry Andric     }
13460b57cec5SDimitry Andric   }
13470b57cec5SDimitry Andric 
13480b57cec5SDimitry Andric   // Collect the blocks that need predication.
13490b57cec5SDimitry Andric   for (BasicBlock *BB : TheLoop->blocks()) {
13500b57cec5SDimitry Andric     // We don't support switch statements inside loops.
13510b57cec5SDimitry Andric     if (!isa<BranchInst>(BB->getTerminator())) {
13520b57cec5SDimitry Andric       reportVectorizationFailure("Loop contains a switch statement",
13530b57cec5SDimitry Andric                                  "loop contains a switch statement",
13548bcb0991SDimitry Andric                                  "LoopContainsSwitch", ORE, TheLoop,
13558bcb0991SDimitry Andric                                  BB->getTerminator());
13560b57cec5SDimitry Andric       return false;
13570b57cec5SDimitry Andric     }
13580b57cec5SDimitry Andric 
13590b57cec5SDimitry Andric     // We must be able to predicate all blocks that need to be predicated.
13605f757f3fSDimitry Andric     if (blockNeedsPredication(BB) &&
13615f757f3fSDimitry Andric         !blockCanBePredicated(BB, SafePointers, MaskedOp)) {
13620b57cec5SDimitry Andric       reportVectorizationFailure(
13630b57cec5SDimitry Andric           "Control flow cannot be substituted for a select",
13645f757f3fSDimitry Andric           "control flow cannot be substituted for a select", "NoCFGForSelect",
13655f757f3fSDimitry Andric           ORE, TheLoop, BB->getTerminator());
13660b57cec5SDimitry Andric       return false;
13670b57cec5SDimitry Andric     }
13680b57cec5SDimitry Andric   }
13690b57cec5SDimitry Andric 
13700b57cec5SDimitry Andric   // We can if-convert this loop.
13710b57cec5SDimitry Andric   return true;
13720b57cec5SDimitry Andric }
13730b57cec5SDimitry Andric 
13740b57cec5SDimitry Andric // Helper function to canVectorizeLoopNestCFG.
13750b57cec5SDimitry Andric bool LoopVectorizationLegality::canVectorizeLoopCFG(Loop *Lp,
13760b57cec5SDimitry Andric                                                     bool UseVPlanNativePath) {
1377e8d8bef9SDimitry Andric   assert((UseVPlanNativePath || Lp->isInnermost()) &&
13780b57cec5SDimitry Andric          "VPlan-native path is not enabled.");
13790b57cec5SDimitry Andric 
13800b57cec5SDimitry Andric   // TODO: ORE should be improved to show more accurate information when an
13810b57cec5SDimitry Andric   // outer loop can't be vectorized because a nested loop is not understood or
13820b57cec5SDimitry Andric   // legal. Something like: "outer_loop_location: loop not vectorized:
13830b57cec5SDimitry Andric   // (inner_loop_location) loop control flow is not understood by vectorizer".
13840b57cec5SDimitry Andric 
13850b57cec5SDimitry Andric   // Store the result and return it at the end instead of exiting early, in case
13860b57cec5SDimitry Andric   // allowExtraAnalysis is used to report multiple reasons for not vectorizing.
13870b57cec5SDimitry Andric   bool Result = true;
13880b57cec5SDimitry Andric   bool DoExtraAnalysis = ORE->allowExtraAnalysis(DEBUG_TYPE);
13890b57cec5SDimitry Andric 
13900b57cec5SDimitry Andric   // We must have a loop in canonical form. Loops with indirectbr in them cannot
13910b57cec5SDimitry Andric   // be canonicalized.
13920b57cec5SDimitry Andric   if (!Lp->getLoopPreheader()) {
13930b57cec5SDimitry Andric     reportVectorizationFailure("Loop doesn't have a legal pre-header",
13940b57cec5SDimitry Andric         "loop control flow is not understood by vectorizer",
13958bcb0991SDimitry Andric         "CFGNotUnderstood", ORE, TheLoop);
13960b57cec5SDimitry Andric     if (DoExtraAnalysis)
13970b57cec5SDimitry Andric       Result = false;
13980b57cec5SDimitry Andric     else
13990b57cec5SDimitry Andric       return false;
14000b57cec5SDimitry Andric   }
14010b57cec5SDimitry Andric 
14020b57cec5SDimitry Andric   // We must have a single backedge.
14030b57cec5SDimitry Andric   if (Lp->getNumBackEdges() != 1) {
14040b57cec5SDimitry Andric     reportVectorizationFailure("The loop must have a single backedge",
14050b57cec5SDimitry Andric         "loop control flow is not understood by vectorizer",
14068bcb0991SDimitry Andric         "CFGNotUnderstood", ORE, TheLoop);
14070b57cec5SDimitry Andric     if (DoExtraAnalysis)
14080b57cec5SDimitry Andric       Result = false;
14090b57cec5SDimitry Andric     else
14100b57cec5SDimitry Andric       return false;
14110b57cec5SDimitry Andric   }
14120b57cec5SDimitry Andric 
14130b57cec5SDimitry Andric   return Result;
14140b57cec5SDimitry Andric }
14150b57cec5SDimitry Andric 
14160b57cec5SDimitry Andric bool LoopVectorizationLegality::canVectorizeLoopNestCFG(
14170b57cec5SDimitry Andric     Loop *Lp, bool UseVPlanNativePath) {
14180b57cec5SDimitry Andric   // Store the result and return it at the end instead of exiting early, in case
14190b57cec5SDimitry Andric   // allowExtraAnalysis is used to report multiple reasons for not vectorizing.
14200b57cec5SDimitry Andric   bool Result = true;
14210b57cec5SDimitry Andric   bool DoExtraAnalysis = ORE->allowExtraAnalysis(DEBUG_TYPE);
14220b57cec5SDimitry Andric   if (!canVectorizeLoopCFG(Lp, UseVPlanNativePath)) {
14230b57cec5SDimitry Andric     if (DoExtraAnalysis)
14240b57cec5SDimitry Andric       Result = false;
14250b57cec5SDimitry Andric     else
14260b57cec5SDimitry Andric       return false;
14270b57cec5SDimitry Andric   }
14280b57cec5SDimitry Andric 
14290b57cec5SDimitry Andric   // Recursively check whether the loop control flow of nested loops is
14300b57cec5SDimitry Andric   // understood.
14310b57cec5SDimitry Andric   for (Loop *SubLp : *Lp)
14320b57cec5SDimitry Andric     if (!canVectorizeLoopNestCFG(SubLp, UseVPlanNativePath)) {
14330b57cec5SDimitry Andric       if (DoExtraAnalysis)
14340b57cec5SDimitry Andric         Result = false;
14350b57cec5SDimitry Andric       else
14360b57cec5SDimitry Andric         return false;
14370b57cec5SDimitry Andric     }
14380b57cec5SDimitry Andric 
14390b57cec5SDimitry Andric   return Result;
14400b57cec5SDimitry Andric }
14410b57cec5SDimitry Andric 
14420b57cec5SDimitry Andric bool LoopVectorizationLegality::canVectorize(bool UseVPlanNativePath) {
14430b57cec5SDimitry Andric   // Store the result and return it at the end instead of exiting early, in case
14440b57cec5SDimitry Andric   // allowExtraAnalysis is used to report multiple reasons for not vectorizing.
14450b57cec5SDimitry Andric   bool Result = true;
14460b57cec5SDimitry Andric 
14470b57cec5SDimitry Andric   bool DoExtraAnalysis = ORE->allowExtraAnalysis(DEBUG_TYPE);
14480b57cec5SDimitry Andric   // Check whether the loop-related control flow in the loop nest is expected by
14490b57cec5SDimitry Andric   // vectorizer.
14500b57cec5SDimitry Andric   if (!canVectorizeLoopNestCFG(TheLoop, UseVPlanNativePath)) {
14510b57cec5SDimitry Andric     if (DoExtraAnalysis)
14520b57cec5SDimitry Andric       Result = false;
14530b57cec5SDimitry Andric     else
14540b57cec5SDimitry Andric       return false;
14550b57cec5SDimitry Andric   }
14560b57cec5SDimitry Andric 
14570b57cec5SDimitry Andric   // We need to have a loop header.
14580b57cec5SDimitry Andric   LLVM_DEBUG(dbgs() << "LV: Found a loop: " << TheLoop->getHeader()->getName()
14590b57cec5SDimitry Andric                     << '\n');
14600b57cec5SDimitry Andric 
14610b57cec5SDimitry Andric   // Specific checks for outer loops. We skip the remaining legal checks at this
14620b57cec5SDimitry Andric   // point because they don't support outer loops.
1463e8d8bef9SDimitry Andric   if (!TheLoop->isInnermost()) {
14640b57cec5SDimitry Andric     assert(UseVPlanNativePath && "VPlan-native path is not enabled.");
14650b57cec5SDimitry Andric 
14660b57cec5SDimitry Andric     if (!canVectorizeOuterLoop()) {
14670b57cec5SDimitry Andric       reportVectorizationFailure("Unsupported outer loop",
14680b57cec5SDimitry Andric                                  "unsupported outer loop",
14698bcb0991SDimitry Andric                                  "UnsupportedOuterLoop",
14708bcb0991SDimitry Andric                                  ORE, TheLoop);
14710b57cec5SDimitry Andric       // TODO: Implement DoExtraAnalysis when subsequent legal checks support
14720b57cec5SDimitry Andric       // outer loops.
14730b57cec5SDimitry Andric       return false;
14740b57cec5SDimitry Andric     }
14750b57cec5SDimitry Andric 
14760b57cec5SDimitry Andric     LLVM_DEBUG(dbgs() << "LV: We can vectorize this outer loop!\n");
14770b57cec5SDimitry Andric     return Result;
14780b57cec5SDimitry Andric   }
14790b57cec5SDimitry Andric 
1480e8d8bef9SDimitry Andric   assert(TheLoop->isInnermost() && "Inner loop expected.");
14810b57cec5SDimitry Andric   // Check if we can if-convert non-single-bb loops.
14820b57cec5SDimitry Andric   unsigned NumBlocks = TheLoop->getNumBlocks();
14830b57cec5SDimitry Andric   if (NumBlocks != 1 && !canVectorizeWithIfConvert()) {
14840b57cec5SDimitry Andric     LLVM_DEBUG(dbgs() << "LV: Can't if-convert the loop.\n");
14850b57cec5SDimitry Andric     if (DoExtraAnalysis)
14860b57cec5SDimitry Andric       Result = false;
14870b57cec5SDimitry Andric     else
14880b57cec5SDimitry Andric       return false;
14890b57cec5SDimitry Andric   }
14900b57cec5SDimitry Andric 
14910b57cec5SDimitry Andric   // Check if we can vectorize the instructions and CFG in this loop.
14920b57cec5SDimitry Andric   if (!canVectorizeInstrs()) {
14930b57cec5SDimitry Andric     LLVM_DEBUG(dbgs() << "LV: Can't vectorize the instructions or CFG\n");
14940b57cec5SDimitry Andric     if (DoExtraAnalysis)
14950b57cec5SDimitry Andric       Result = false;
14960b57cec5SDimitry Andric     else
14970b57cec5SDimitry Andric       return false;
14980b57cec5SDimitry Andric   }
14990b57cec5SDimitry Andric 
15000b57cec5SDimitry Andric   // Go over each instruction and look at memory deps.
15010b57cec5SDimitry Andric   if (!canVectorizeMemory()) {
15020b57cec5SDimitry Andric     LLVM_DEBUG(dbgs() << "LV: Can't vectorize due to memory conflicts\n");
15030b57cec5SDimitry Andric     if (DoExtraAnalysis)
15040b57cec5SDimitry Andric       Result = false;
15050b57cec5SDimitry Andric     else
15060b57cec5SDimitry Andric       return false;
15070b57cec5SDimitry Andric   }
15080b57cec5SDimitry Andric 
1509*0fca6ea1SDimitry Andric   if (isa<SCEVCouldNotCompute>(PSE.getBackedgeTakenCount())) {
1510*0fca6ea1SDimitry Andric     reportVectorizationFailure("could not determine number of loop iterations",
1511*0fca6ea1SDimitry Andric                                "could not determine number of loop iterations",
1512*0fca6ea1SDimitry Andric                                "CantComputeNumberOfIterations", ORE, TheLoop);
1513*0fca6ea1SDimitry Andric     if (DoExtraAnalysis)
1514*0fca6ea1SDimitry Andric       Result = false;
1515*0fca6ea1SDimitry Andric     else
1516*0fca6ea1SDimitry Andric       return false;
1517*0fca6ea1SDimitry Andric   }
1518*0fca6ea1SDimitry Andric 
15190b57cec5SDimitry Andric   LLVM_DEBUG(dbgs() << "LV: We can vectorize this loop"
15200b57cec5SDimitry Andric                     << (LAI->getRuntimePointerChecking()->Need
15210b57cec5SDimitry Andric                             ? " (with a runtime bound check)"
15220b57cec5SDimitry Andric                             : "")
15230b57cec5SDimitry Andric                     << "!\n");
15240b57cec5SDimitry Andric 
15250b57cec5SDimitry Andric   unsigned SCEVThreshold = VectorizeSCEVCheckThreshold;
15260b57cec5SDimitry Andric   if (Hints->getForce() == LoopVectorizeHints::FK_Enabled)
15270b57cec5SDimitry Andric     SCEVThreshold = PragmaVectorizeSCEVCheckThreshold;
15280b57cec5SDimitry Andric 
152981ad6265SDimitry Andric   if (PSE.getPredicate().getComplexity() > SCEVThreshold) {
15300b57cec5SDimitry Andric     reportVectorizationFailure("Too many SCEV checks needed",
15310b57cec5SDimitry Andric         "Too many SCEV assumptions need to be made and checked at runtime",
15328bcb0991SDimitry Andric         "TooManySCEVRunTimeChecks", ORE, TheLoop);
15330b57cec5SDimitry Andric     if (DoExtraAnalysis)
15340b57cec5SDimitry Andric       Result = false;
15350b57cec5SDimitry Andric     else
15360b57cec5SDimitry Andric       return false;
15370b57cec5SDimitry Andric   }
15380b57cec5SDimitry Andric 
15390b57cec5SDimitry Andric   // Okay! We've done all the tests. If any have failed, return false. Otherwise
15400b57cec5SDimitry Andric   // we can vectorize, and at this point we don't have any other mem analysis
15410b57cec5SDimitry Andric   // which may limit our maximum vectorization factor, so just return true with
15420b57cec5SDimitry Andric   // no restrictions.
15430b57cec5SDimitry Andric   return Result;
15440b57cec5SDimitry Andric }
15450b57cec5SDimitry Andric 
1546*0fca6ea1SDimitry Andric bool LoopVectorizationLegality::canFoldTailByMasking() const {
15470b57cec5SDimitry Andric 
15480b57cec5SDimitry Andric   LLVM_DEBUG(dbgs() << "LV: checking if tail can be folded by masking.\n");
15490b57cec5SDimitry Andric 
15508bcb0991SDimitry Andric   SmallPtrSet<const Value *, 8> ReductionLiveOuts;
15510b57cec5SDimitry Andric 
1552bdd1243dSDimitry Andric   for (const auto &Reduction : getReductionVars())
15538bcb0991SDimitry Andric     ReductionLiveOuts.insert(Reduction.second.getLoopExitInstr());
15548bcb0991SDimitry Andric 
15558bcb0991SDimitry Andric   // TODO: handle non-reduction outside users when tail is folded by masking.
15560b57cec5SDimitry Andric   for (auto *AE : AllowedExit) {
15578bcb0991SDimitry Andric     // Check that all users of allowed exit values are inside the loop or
15588bcb0991SDimitry Andric     // are the live-out of a reduction.
15598bcb0991SDimitry Andric     if (ReductionLiveOuts.count(AE))
15608bcb0991SDimitry Andric       continue;
15610b57cec5SDimitry Andric     for (User *U : AE->users()) {
15620b57cec5SDimitry Andric       Instruction *UI = cast<Instruction>(U);
15630b57cec5SDimitry Andric       if (TheLoop->contains(UI))
15640b57cec5SDimitry Andric         continue;
1565e8d8bef9SDimitry Andric       LLVM_DEBUG(
1566e8d8bef9SDimitry Andric           dbgs()
1567e8d8bef9SDimitry Andric           << "LV: Cannot fold tail by masking, loop has an outside user for "
1568e8d8bef9SDimitry Andric           << *UI << "\n");
15690b57cec5SDimitry Andric       return false;
15700b57cec5SDimitry Andric     }
15710b57cec5SDimitry Andric   }
15720b57cec5SDimitry Andric 
1573*0fca6ea1SDimitry Andric   for (const auto &Entry : getInductionVars()) {
1574*0fca6ea1SDimitry Andric     PHINode *OrigPhi = Entry.first;
1575*0fca6ea1SDimitry Andric     for (User *U : OrigPhi->users()) {
1576*0fca6ea1SDimitry Andric       auto *UI = cast<Instruction>(U);
1577*0fca6ea1SDimitry Andric       if (!TheLoop->contains(UI)) {
1578*0fca6ea1SDimitry Andric         LLVM_DEBUG(dbgs() << "LV: Cannot fold tail by masking, loop IV has an "
1579*0fca6ea1SDimitry Andric                              "outside user for "
1580*0fca6ea1SDimitry Andric                           << *UI << "\n");
1581*0fca6ea1SDimitry Andric         return false;
1582*0fca6ea1SDimitry Andric       }
1583*0fca6ea1SDimitry Andric     }
1584*0fca6ea1SDimitry Andric   }
1585*0fca6ea1SDimitry Andric 
15860b57cec5SDimitry Andric   // The list of pointers that we can safely read and write to remains empty.
15870b57cec5SDimitry Andric   SmallPtrSet<Value *, 8> SafePointers;
15880b57cec5SDimitry Andric 
1589*0fca6ea1SDimitry Andric   // Check all blocks for predication, including those that ordinarily do not
1590*0fca6ea1SDimitry Andric   // need predication such as the header block.
1591e8d8bef9SDimitry Andric   SmallPtrSet<const Instruction *, 8> TmpMaskedOp;
15920b57cec5SDimitry Andric   for (BasicBlock *BB : TheLoop->blocks()) {
15935f757f3fSDimitry Andric     if (!blockCanBePredicated(BB, SafePointers, TmpMaskedOp)) {
1594*0fca6ea1SDimitry Andric       LLVM_DEBUG(dbgs() << "LV: Cannot fold tail by masking.\n");
15950b57cec5SDimitry Andric       return false;
15960b57cec5SDimitry Andric     }
15970b57cec5SDimitry Andric   }
15980b57cec5SDimitry Andric 
15990b57cec5SDimitry Andric   LLVM_DEBUG(dbgs() << "LV: can fold tail by masking.\n");
1600e8d8bef9SDimitry Andric 
16010b57cec5SDimitry Andric   return true;
16020b57cec5SDimitry Andric }
16030b57cec5SDimitry Andric 
1604*0fca6ea1SDimitry Andric void LoopVectorizationLegality::prepareToFoldTailByMasking() {
1605*0fca6ea1SDimitry Andric   // The list of pointers that we can safely read and write to remains empty.
1606*0fca6ea1SDimitry Andric   SmallPtrSet<Value *, 8> SafePointers;
1607*0fca6ea1SDimitry Andric 
1608*0fca6ea1SDimitry Andric   // Mark all blocks for predication, including those that ordinarily do not
1609*0fca6ea1SDimitry Andric   // need predication such as the header block.
1610*0fca6ea1SDimitry Andric   for (BasicBlock *BB : TheLoop->blocks()) {
1611*0fca6ea1SDimitry Andric     [[maybe_unused]] bool R = blockCanBePredicated(BB, SafePointers, MaskedOp);
1612*0fca6ea1SDimitry Andric     assert(R && "Must be able to predicate block when tail-folding.");
1613*0fca6ea1SDimitry Andric   }
1614*0fca6ea1SDimitry Andric }
1615*0fca6ea1SDimitry Andric 
16160b57cec5SDimitry Andric } // namespace llvm
1617