xref: /freebsd-src/contrib/llvm-project/llvm/lib/CodeGen/InterleavedAccessPass.cpp (revision 0fca6ea1d4eea4c934cfff25ac9ee8ad6fe95583)
10b57cec5SDimitry Andric //===- InterleavedAccessPass.cpp ------------------------------------------===//
20b57cec5SDimitry Andric //
30b57cec5SDimitry Andric // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
40b57cec5SDimitry Andric // See https://llvm.org/LICENSE.txt for license information.
50b57cec5SDimitry Andric // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
60b57cec5SDimitry Andric //
70b57cec5SDimitry Andric //===----------------------------------------------------------------------===//
80b57cec5SDimitry Andric //
90b57cec5SDimitry Andric // This file implements the Interleaved Access pass, which identifies
100b57cec5SDimitry Andric // interleaved memory accesses and transforms them into target specific
110b57cec5SDimitry Andric // intrinsics.
120b57cec5SDimitry Andric //
130b57cec5SDimitry Andric // An interleaved load reads data from memory into several vectors, with
140b57cec5SDimitry Andric // DE-interleaving the data on a factor. An interleaved store writes several
150b57cec5SDimitry Andric // vectors to memory with RE-interleaving the data on a factor.
160b57cec5SDimitry Andric //
170b57cec5SDimitry Andric // As interleaved accesses are difficult to identified in CodeGen (mainly
180b57cec5SDimitry Andric // because the VECTOR_SHUFFLE DAG node is quite different from the shufflevector
190b57cec5SDimitry Andric // IR), we identify and transform them to intrinsics in this pass so the
200b57cec5SDimitry Andric // intrinsics can be easily matched into target specific instructions later in
210b57cec5SDimitry Andric // CodeGen.
220b57cec5SDimitry Andric //
230b57cec5SDimitry Andric // E.g. An interleaved load (Factor = 2):
240b57cec5SDimitry Andric //        %wide.vec = load <8 x i32>, <8 x i32>* %ptr
25e8d8bef9SDimitry Andric //        %v0 = shuffle <8 x i32> %wide.vec, <8 x i32> poison, <0, 2, 4, 6>
26e8d8bef9SDimitry Andric //        %v1 = shuffle <8 x i32> %wide.vec, <8 x i32> poison, <1, 3, 5, 7>
270b57cec5SDimitry Andric //
280b57cec5SDimitry Andric // It could be transformed into a ld2 intrinsic in AArch64 backend or a vld2
290b57cec5SDimitry Andric // intrinsic in ARM backend.
300b57cec5SDimitry Andric //
310b57cec5SDimitry Andric // In X86, this can be further optimized into a set of target
320b57cec5SDimitry Andric // specific loads followed by an optimized sequence of shuffles.
330b57cec5SDimitry Andric //
340b57cec5SDimitry Andric // E.g. An interleaved store (Factor = 3):
350b57cec5SDimitry Andric //        %i.vec = shuffle <8 x i32> %v0, <8 x i32> %v1,
360b57cec5SDimitry Andric //                                    <0, 4, 8, 1, 5, 9, 2, 6, 10, 3, 7, 11>
370b57cec5SDimitry Andric //        store <12 x i32> %i.vec, <12 x i32>* %ptr
380b57cec5SDimitry Andric //
390b57cec5SDimitry Andric // It could be transformed into a st3 intrinsic in AArch64 backend or a vst3
400b57cec5SDimitry Andric // intrinsic in ARM backend.
410b57cec5SDimitry Andric //
420b57cec5SDimitry Andric // Similarly, a set of interleaved stores can be transformed into an optimized
430b57cec5SDimitry Andric // sequence of shuffles followed by a set of target specific stores for X86.
440b57cec5SDimitry Andric //
450b57cec5SDimitry Andric //===----------------------------------------------------------------------===//
460b57cec5SDimitry Andric 
470b57cec5SDimitry Andric #include "llvm/ADT/ArrayRef.h"
480b57cec5SDimitry Andric #include "llvm/ADT/DenseMap.h"
4981ad6265SDimitry Andric #include "llvm/ADT/SetVector.h"
500b57cec5SDimitry Andric #include "llvm/ADT/SmallVector.h"
515f757f3fSDimitry Andric #include "llvm/CodeGen/InterleavedAccess.h"
520b57cec5SDimitry Andric #include "llvm/CodeGen/TargetLowering.h"
530b57cec5SDimitry Andric #include "llvm/CodeGen/TargetPassConfig.h"
540b57cec5SDimitry Andric #include "llvm/CodeGen/TargetSubtargetInfo.h"
550b57cec5SDimitry Andric #include "llvm/IR/Constants.h"
560b57cec5SDimitry Andric #include "llvm/IR/Dominators.h"
570b57cec5SDimitry Andric #include "llvm/IR/Function.h"
580b57cec5SDimitry Andric #include "llvm/IR/IRBuilder.h"
590b57cec5SDimitry Andric #include "llvm/IR/InstIterator.h"
600b57cec5SDimitry Andric #include "llvm/IR/Instruction.h"
610b57cec5SDimitry Andric #include "llvm/IR/Instructions.h"
6206c3fb27SDimitry Andric #include "llvm/IR/IntrinsicInst.h"
63480093f4SDimitry Andric #include "llvm/InitializePasses.h"
640b57cec5SDimitry Andric #include "llvm/Pass.h"
650b57cec5SDimitry Andric #include "llvm/Support/Casting.h"
660b57cec5SDimitry Andric #include "llvm/Support/CommandLine.h"
670b57cec5SDimitry Andric #include "llvm/Support/Debug.h"
680b57cec5SDimitry Andric #include "llvm/Support/MathExtras.h"
690b57cec5SDimitry Andric #include "llvm/Support/raw_ostream.h"
700b57cec5SDimitry Andric #include "llvm/Target/TargetMachine.h"
71e8d8bef9SDimitry Andric #include "llvm/Transforms/Utils/Local.h"
720b57cec5SDimitry Andric #include <cassert>
730b57cec5SDimitry Andric #include <utility>
740b57cec5SDimitry Andric 
750b57cec5SDimitry Andric using namespace llvm;
760b57cec5SDimitry Andric 
770b57cec5SDimitry Andric #define DEBUG_TYPE "interleaved-access"
780b57cec5SDimitry Andric 
790b57cec5SDimitry Andric static cl::opt<bool> LowerInterleavedAccesses(
800b57cec5SDimitry Andric     "lower-interleaved-accesses",
810b57cec5SDimitry Andric     cl::desc("Enable lowering interleaved accesses to intrinsics"),
820b57cec5SDimitry Andric     cl::init(true), cl::Hidden);
830b57cec5SDimitry Andric 
840b57cec5SDimitry Andric namespace {
850b57cec5SDimitry Andric 
865f757f3fSDimitry Andric class InterleavedAccessImpl {
875f757f3fSDimitry Andric   friend class InterleavedAccess;
885f757f3fSDimitry Andric 
890b57cec5SDimitry Andric public:
905f757f3fSDimitry Andric   InterleavedAccessImpl() = default;
915f757f3fSDimitry Andric   InterleavedAccessImpl(DominatorTree *DT, const TargetLowering *TLI)
925f757f3fSDimitry Andric       : DT(DT), TLI(TLI), MaxFactor(TLI->getMaxSupportedInterleaveFactor()) {}
935f757f3fSDimitry Andric   bool runOnFunction(Function &F);
940b57cec5SDimitry Andric 
950b57cec5SDimitry Andric private:
960b57cec5SDimitry Andric   DominatorTree *DT = nullptr;
970b57cec5SDimitry Andric   const TargetLowering *TLI = nullptr;
980b57cec5SDimitry Andric 
990b57cec5SDimitry Andric   /// The maximum supported interleave factor.
10006c3fb27SDimitry Andric   unsigned MaxFactor = 0u;
1010b57cec5SDimitry Andric 
1020b57cec5SDimitry Andric   /// Transform an interleaved load into target specific intrinsics.
1030b57cec5SDimitry Andric   bool lowerInterleavedLoad(LoadInst *LI,
1040b57cec5SDimitry Andric                             SmallVector<Instruction *, 32> &DeadInsts);
1050b57cec5SDimitry Andric 
1060b57cec5SDimitry Andric   /// Transform an interleaved store into target specific intrinsics.
1070b57cec5SDimitry Andric   bool lowerInterleavedStore(StoreInst *SI,
1080b57cec5SDimitry Andric                              SmallVector<Instruction *, 32> &DeadInsts);
1090b57cec5SDimitry Andric 
11006c3fb27SDimitry Andric   /// Transform a load and a deinterleave intrinsic into target specific
11106c3fb27SDimitry Andric   /// instructions.
11206c3fb27SDimitry Andric   bool lowerDeinterleaveIntrinsic(IntrinsicInst *II,
11306c3fb27SDimitry Andric                                   SmallVector<Instruction *, 32> &DeadInsts);
11406c3fb27SDimitry Andric 
11506c3fb27SDimitry Andric   /// Transform an interleave intrinsic and a store into target specific
11606c3fb27SDimitry Andric   /// instructions.
11706c3fb27SDimitry Andric   bool lowerInterleaveIntrinsic(IntrinsicInst *II,
11806c3fb27SDimitry Andric                                 SmallVector<Instruction *, 32> &DeadInsts);
11906c3fb27SDimitry Andric 
1200b57cec5SDimitry Andric   /// Returns true if the uses of an interleaved load by the
1210b57cec5SDimitry Andric   /// extractelement instructions in \p Extracts can be replaced by uses of the
1220b57cec5SDimitry Andric   /// shufflevector instructions in \p Shuffles instead. If so, the necessary
1230b57cec5SDimitry Andric   /// replacements are also performed.
1240b57cec5SDimitry Andric   bool tryReplaceExtracts(ArrayRef<ExtractElementInst *> Extracts,
1250b57cec5SDimitry Andric                           ArrayRef<ShuffleVectorInst *> Shuffles);
126e8d8bef9SDimitry Andric 
127e8d8bef9SDimitry Andric   /// Given a number of shuffles of the form shuffle(binop(x,y)), convert them
128e8d8bef9SDimitry Andric   /// to binop(shuffle(x), shuffle(y)) to allow the formation of an
129e8d8bef9SDimitry Andric   /// interleaving load. Any newly created shuffles that operate on \p LI will
130e8d8bef9SDimitry Andric   /// be added to \p Shuffles. Returns true, if any changes to the IR have been
131e8d8bef9SDimitry Andric   /// made.
132e8d8bef9SDimitry Andric   bool replaceBinOpShuffles(ArrayRef<ShuffleVectorInst *> BinOpShuffles,
133e8d8bef9SDimitry Andric                             SmallVectorImpl<ShuffleVectorInst *> &Shuffles,
134e8d8bef9SDimitry Andric                             LoadInst *LI);
1350b57cec5SDimitry Andric };
1360b57cec5SDimitry Andric 
1375f757f3fSDimitry Andric class InterleavedAccess : public FunctionPass {
1385f757f3fSDimitry Andric   InterleavedAccessImpl Impl;
1395f757f3fSDimitry Andric 
1405f757f3fSDimitry Andric public:
1415f757f3fSDimitry Andric   static char ID;
1425f757f3fSDimitry Andric 
1435f757f3fSDimitry Andric   InterleavedAccess() : FunctionPass(ID) {
1445f757f3fSDimitry Andric     initializeInterleavedAccessPass(*PassRegistry::getPassRegistry());
1455f757f3fSDimitry Andric   }
1465f757f3fSDimitry Andric 
1475f757f3fSDimitry Andric   StringRef getPassName() const override { return "Interleaved Access Pass"; }
1485f757f3fSDimitry Andric 
1495f757f3fSDimitry Andric   bool runOnFunction(Function &F) override;
1505f757f3fSDimitry Andric 
1515f757f3fSDimitry Andric   void getAnalysisUsage(AnalysisUsage &AU) const override {
1525f757f3fSDimitry Andric     AU.addRequired<DominatorTreeWrapperPass>();
1535f757f3fSDimitry Andric     AU.setPreservesCFG();
1545f757f3fSDimitry Andric   }
1555f757f3fSDimitry Andric };
1565f757f3fSDimitry Andric 
1570b57cec5SDimitry Andric } // end anonymous namespace.
1580b57cec5SDimitry Andric 
1595f757f3fSDimitry Andric PreservedAnalyses InterleavedAccessPass::run(Function &F,
1605f757f3fSDimitry Andric                                              FunctionAnalysisManager &FAM) {
1615f757f3fSDimitry Andric   auto *DT = &FAM.getResult<DominatorTreeAnalysis>(F);
1625f757f3fSDimitry Andric   auto *TLI = TM->getSubtargetImpl(F)->getTargetLowering();
1635f757f3fSDimitry Andric   InterleavedAccessImpl Impl(DT, TLI);
1645f757f3fSDimitry Andric   bool Changed = Impl.runOnFunction(F);
1655f757f3fSDimitry Andric 
1665f757f3fSDimitry Andric   if (!Changed)
1675f757f3fSDimitry Andric     return PreservedAnalyses::all();
1685f757f3fSDimitry Andric 
1695f757f3fSDimitry Andric   PreservedAnalyses PA;
1705f757f3fSDimitry Andric   PA.preserveSet<CFGAnalyses>();
1715f757f3fSDimitry Andric   return PA;
1725f757f3fSDimitry Andric }
1735f757f3fSDimitry Andric 
1740b57cec5SDimitry Andric char InterleavedAccess::ID = 0;
1750b57cec5SDimitry Andric 
1765f757f3fSDimitry Andric bool InterleavedAccess::runOnFunction(Function &F) {
1775f757f3fSDimitry Andric   auto *TPC = getAnalysisIfAvailable<TargetPassConfig>();
1785f757f3fSDimitry Andric   if (!TPC || !LowerInterleavedAccesses)
1795f757f3fSDimitry Andric     return false;
1805f757f3fSDimitry Andric 
1815f757f3fSDimitry Andric   LLVM_DEBUG(dbgs() << "*** " << getPassName() << ": " << F.getName() << "\n");
1825f757f3fSDimitry Andric 
1835f757f3fSDimitry Andric   Impl.DT = &getAnalysis<DominatorTreeWrapperPass>().getDomTree();
1845f757f3fSDimitry Andric   auto &TM = TPC->getTM<TargetMachine>();
1855f757f3fSDimitry Andric   Impl.TLI = TM.getSubtargetImpl(F)->getTargetLowering();
1865f757f3fSDimitry Andric   Impl.MaxFactor = Impl.TLI->getMaxSupportedInterleaveFactor();
1875f757f3fSDimitry Andric 
1885f757f3fSDimitry Andric   return Impl.runOnFunction(F);
1895f757f3fSDimitry Andric }
1905f757f3fSDimitry Andric 
1910b57cec5SDimitry Andric INITIALIZE_PASS_BEGIN(InterleavedAccess, DEBUG_TYPE,
1920b57cec5SDimitry Andric     "Lower interleaved memory accesses to target specific intrinsics", false,
1930b57cec5SDimitry Andric     false)
1940b57cec5SDimitry Andric INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass)
1950b57cec5SDimitry Andric INITIALIZE_PASS_END(InterleavedAccess, DEBUG_TYPE,
1960b57cec5SDimitry Andric     "Lower interleaved memory accesses to target specific intrinsics", false,
1970b57cec5SDimitry Andric     false)
1980b57cec5SDimitry Andric 
1990b57cec5SDimitry Andric FunctionPass *llvm::createInterleavedAccessPass() {
2000b57cec5SDimitry Andric   return new InterleavedAccess();
2010b57cec5SDimitry Andric }
2020b57cec5SDimitry Andric 
2030b57cec5SDimitry Andric /// Check if the mask is a DE-interleave mask for an interleaved load.
2040b57cec5SDimitry Andric ///
2050b57cec5SDimitry Andric /// E.g. DE-interleave masks (Factor = 2) could be:
2060b57cec5SDimitry Andric ///     <0, 2, 4, 6>    (mask of index 0 to extract even elements)
2070b57cec5SDimitry Andric ///     <1, 3, 5, 7>    (mask of index 1 to extract odd elements)
2080b57cec5SDimitry Andric static bool isDeInterleaveMask(ArrayRef<int> Mask, unsigned &Factor,
2090b57cec5SDimitry Andric                                unsigned &Index, unsigned MaxFactor,
2100b57cec5SDimitry Andric                                unsigned NumLoadElements) {
2110b57cec5SDimitry Andric   if (Mask.size() < 2)
2120b57cec5SDimitry Andric     return false;
2130b57cec5SDimitry Andric 
2140b57cec5SDimitry Andric   // Check potential Factors.
2150b57cec5SDimitry Andric   for (Factor = 2; Factor <= MaxFactor; Factor++) {
2160b57cec5SDimitry Andric     // Make sure we don't produce a load wider than the input load.
2170b57cec5SDimitry Andric     if (Mask.size() * Factor > NumLoadElements)
2180b57cec5SDimitry Andric       return false;
219*0fca6ea1SDimitry Andric     if (ShuffleVectorInst::isDeInterleaveMaskOfFactor(Mask, Factor, Index))
2200b57cec5SDimitry Andric       return true;
2210b57cec5SDimitry Andric   }
2220b57cec5SDimitry Andric 
2230b57cec5SDimitry Andric   return false;
2240b57cec5SDimitry Andric }
2250b57cec5SDimitry Andric 
2260b57cec5SDimitry Andric /// Check if the mask can be used in an interleaved store.
2270b57cec5SDimitry Andric //
2280b57cec5SDimitry Andric /// It checks for a more general pattern than the RE-interleave mask.
2290b57cec5SDimitry Andric /// I.e. <x, y, ... z, x+1, y+1, ...z+1, x+2, y+2, ...z+2, ...>
2300b57cec5SDimitry Andric /// E.g. For a Factor of 2 (LaneLen=4): <4, 32, 5, 33, 6, 34, 7, 35>
2310b57cec5SDimitry Andric /// E.g. For a Factor of 3 (LaneLen=4): <4, 32, 16, 5, 33, 17, 6, 34, 18, 7, 35, 19>
2320b57cec5SDimitry Andric /// E.g. For a Factor of 4 (LaneLen=2): <8, 2, 12, 4, 9, 3, 13, 5>
2330b57cec5SDimitry Andric ///
2340b57cec5SDimitry Andric /// The particular case of an RE-interleave mask is:
2350b57cec5SDimitry Andric /// I.e. <0, LaneLen, ... , LaneLen*(Factor - 1), 1, LaneLen + 1, ...>
2360b57cec5SDimitry Andric /// E.g. For a Factor of 2 (LaneLen=4): <0, 4, 1, 5, 2, 6, 3, 7>
23706c3fb27SDimitry Andric static bool isReInterleaveMask(ShuffleVectorInst *SVI, unsigned &Factor,
23806c3fb27SDimitry Andric                                unsigned MaxFactor) {
23906c3fb27SDimitry Andric   unsigned NumElts = SVI->getShuffleMask().size();
2400b57cec5SDimitry Andric   if (NumElts < 4)
2410b57cec5SDimitry Andric     return false;
2420b57cec5SDimitry Andric 
2430b57cec5SDimitry Andric   // Check potential Factors.
2440b57cec5SDimitry Andric   for (Factor = 2; Factor <= MaxFactor; Factor++) {
24506c3fb27SDimitry Andric     if (SVI->isInterleave(Factor))
2460b57cec5SDimitry Andric       return true;
2470b57cec5SDimitry Andric   }
2480b57cec5SDimitry Andric 
2490b57cec5SDimitry Andric   return false;
2500b57cec5SDimitry Andric }
2510b57cec5SDimitry Andric 
2525f757f3fSDimitry Andric bool InterleavedAccessImpl::lowerInterleavedLoad(
2530b57cec5SDimitry Andric     LoadInst *LI, SmallVector<Instruction *, 32> &DeadInsts) {
2545ffd83dbSDimitry Andric   if (!LI->isSimple() || isa<ScalableVectorType>(LI->getType()))
2550b57cec5SDimitry Andric     return false;
2560b57cec5SDimitry Andric 
257e8d8bef9SDimitry Andric   // Check if all users of this load are shufflevectors. If we encounter any
258e8d8bef9SDimitry Andric   // users that are extractelement instructions or binary operators, we save
259e8d8bef9SDimitry Andric   // them to later check if they can be modified to extract from one of the
260e8d8bef9SDimitry Andric   // shufflevectors instead of the load.
261e8d8bef9SDimitry Andric 
2620b57cec5SDimitry Andric   SmallVector<ShuffleVectorInst *, 4> Shuffles;
2630b57cec5SDimitry Andric   SmallVector<ExtractElementInst *, 4> Extracts;
264e8d8bef9SDimitry Andric   // BinOpShuffles need to be handled a single time in case both operands of the
265e8d8bef9SDimitry Andric   // binop are the same load.
266e8d8bef9SDimitry Andric   SmallSetVector<ShuffleVectorInst *, 4> BinOpShuffles;
2670b57cec5SDimitry Andric 
268e8d8bef9SDimitry Andric   for (auto *User : LI->users()) {
269e8d8bef9SDimitry Andric     auto *Extract = dyn_cast<ExtractElementInst>(User);
2700b57cec5SDimitry Andric     if (Extract && isa<ConstantInt>(Extract->getIndexOperand())) {
2710b57cec5SDimitry Andric       Extracts.push_back(Extract);
2720b57cec5SDimitry Andric       continue;
2730b57cec5SDimitry Andric     }
274753f127fSDimitry Andric     if (auto *BI = dyn_cast<BinaryOperator>(User)) {
2755f757f3fSDimitry Andric       if (!BI->user_empty() && all_of(BI->users(), [](auto *U) {
27606c3fb27SDimitry Andric             auto *SVI = dyn_cast<ShuffleVectorInst>(U);
27706c3fb27SDimitry Andric             return SVI && isa<UndefValue>(SVI->getOperand(1));
27806c3fb27SDimitry Andric           })) {
279753f127fSDimitry Andric         for (auto *SVI : BI->users())
280753f127fSDimitry Andric           BinOpShuffles.insert(cast<ShuffleVectorInst>(SVI));
281e8d8bef9SDimitry Andric         continue;
282e8d8bef9SDimitry Andric       }
283e8d8bef9SDimitry Andric     }
284e8d8bef9SDimitry Andric     auto *SVI = dyn_cast<ShuffleVectorInst>(User);
2850b57cec5SDimitry Andric     if (!SVI || !isa<UndefValue>(SVI->getOperand(1)))
2860b57cec5SDimitry Andric       return false;
2870b57cec5SDimitry Andric 
2880b57cec5SDimitry Andric     Shuffles.push_back(SVI);
2890b57cec5SDimitry Andric   }
2900b57cec5SDimitry Andric 
291e8d8bef9SDimitry Andric   if (Shuffles.empty() && BinOpShuffles.empty())
2920b57cec5SDimitry Andric     return false;
2930b57cec5SDimitry Andric 
2940b57cec5SDimitry Andric   unsigned Factor, Index;
2950b57cec5SDimitry Andric 
2965ffd83dbSDimitry Andric   unsigned NumLoadElements =
2975ffd83dbSDimitry Andric       cast<FixedVectorType>(LI->getType())->getNumElements();
298e8d8bef9SDimitry Andric   auto *FirstSVI = Shuffles.size() > 0 ? Shuffles[0] : BinOpShuffles[0];
2990b57cec5SDimitry Andric   // Check if the first shufflevector is DE-interleave shuffle.
300e8d8bef9SDimitry Andric   if (!isDeInterleaveMask(FirstSVI->getShuffleMask(), Factor, Index, MaxFactor,
301e8d8bef9SDimitry Andric                           NumLoadElements))
3020b57cec5SDimitry Andric     return false;
3030b57cec5SDimitry Andric 
3040b57cec5SDimitry Andric   // Holds the corresponding index for each DE-interleave shuffle.
3050b57cec5SDimitry Andric   SmallVector<unsigned, 4> Indices;
3060b57cec5SDimitry Andric 
307e8d8bef9SDimitry Andric   Type *VecTy = FirstSVI->getType();
3080b57cec5SDimitry Andric 
3090b57cec5SDimitry Andric   // Check if other shufflevectors are also DE-interleaved of the same type
3100b57cec5SDimitry Andric   // and factor as the first shufflevector.
311e8d8bef9SDimitry Andric   for (auto *Shuffle : Shuffles) {
312e8d8bef9SDimitry Andric     if (Shuffle->getType() != VecTy)
3130b57cec5SDimitry Andric       return false;
314*0fca6ea1SDimitry Andric     if (!ShuffleVectorInst::isDeInterleaveMaskOfFactor(
315*0fca6ea1SDimitry Andric             Shuffle->getShuffleMask(), Factor, Index))
3160b57cec5SDimitry Andric       return false;
3170b57cec5SDimitry Andric 
318e8d8bef9SDimitry Andric     assert(Shuffle->getShuffleMask().size() <= NumLoadElements);
319e8d8bef9SDimitry Andric     Indices.push_back(Index);
320e8d8bef9SDimitry Andric   }
321e8d8bef9SDimitry Andric   for (auto *Shuffle : BinOpShuffles) {
322e8d8bef9SDimitry Andric     if (Shuffle->getType() != VecTy)
323e8d8bef9SDimitry Andric       return false;
324*0fca6ea1SDimitry Andric     if (!ShuffleVectorInst::isDeInterleaveMaskOfFactor(
325*0fca6ea1SDimitry Andric             Shuffle->getShuffleMask(), Factor, Index))
326e8d8bef9SDimitry Andric       return false;
327e8d8bef9SDimitry Andric 
328e8d8bef9SDimitry Andric     assert(Shuffle->getShuffleMask().size() <= NumLoadElements);
329e8d8bef9SDimitry Andric 
330e8d8bef9SDimitry Andric     if (cast<Instruction>(Shuffle->getOperand(0))->getOperand(0) == LI)
331e8d8bef9SDimitry Andric       Indices.push_back(Index);
332e8d8bef9SDimitry Andric     if (cast<Instruction>(Shuffle->getOperand(0))->getOperand(1) == LI)
3330b57cec5SDimitry Andric       Indices.push_back(Index);
3340b57cec5SDimitry Andric   }
3350b57cec5SDimitry Andric 
3360b57cec5SDimitry Andric   // Try and modify users of the load that are extractelement instructions to
3370b57cec5SDimitry Andric   // use the shufflevector instructions instead of the load.
3380b57cec5SDimitry Andric   if (!tryReplaceExtracts(Extracts, Shuffles))
3390b57cec5SDimitry Andric     return false;
3400b57cec5SDimitry Andric 
341e8d8bef9SDimitry Andric   bool BinOpShuffleChanged =
342e8d8bef9SDimitry Andric       replaceBinOpShuffles(BinOpShuffles.getArrayRef(), Shuffles, LI);
343e8d8bef9SDimitry Andric 
3440b57cec5SDimitry Andric   LLVM_DEBUG(dbgs() << "IA: Found an interleaved load: " << *LI << "\n");
3450b57cec5SDimitry Andric 
3460b57cec5SDimitry Andric   // Try to create target specific intrinsics to replace the load and shuffles.
347e8d8bef9SDimitry Andric   if (!TLI->lowerInterleavedLoad(LI, Shuffles, Indices, Factor)) {
348e8d8bef9SDimitry Andric     // If Extracts is not empty, tryReplaceExtracts made changes earlier.
349e8d8bef9SDimitry Andric     return !Extracts.empty() || BinOpShuffleChanged;
350e8d8bef9SDimitry Andric   }
3510b57cec5SDimitry Andric 
352fe6060f1SDimitry Andric   append_range(DeadInsts, Shuffles);
3530b57cec5SDimitry Andric 
3540b57cec5SDimitry Andric   DeadInsts.push_back(LI);
3550b57cec5SDimitry Andric   return true;
3560b57cec5SDimitry Andric }
3570b57cec5SDimitry Andric 
3585f757f3fSDimitry Andric bool InterleavedAccessImpl::replaceBinOpShuffles(
359e8d8bef9SDimitry Andric     ArrayRef<ShuffleVectorInst *> BinOpShuffles,
360e8d8bef9SDimitry Andric     SmallVectorImpl<ShuffleVectorInst *> &Shuffles, LoadInst *LI) {
361e8d8bef9SDimitry Andric   for (auto *SVI : BinOpShuffles) {
362e8d8bef9SDimitry Andric     BinaryOperator *BI = cast<BinaryOperator>(SVI->getOperand(0));
363e8d8bef9SDimitry Andric     Type *BIOp0Ty = BI->getOperand(0)->getType();
364e8d8bef9SDimitry Andric     ArrayRef<int> Mask = SVI->getShuffleMask();
365e8d8bef9SDimitry Andric     assert(all_of(Mask, [&](int Idx) {
366e8d8bef9SDimitry Andric       return Idx < (int)cast<FixedVectorType>(BIOp0Ty)->getNumElements();
367e8d8bef9SDimitry Andric     }));
368e8d8bef9SDimitry Andric 
369*0fca6ea1SDimitry Andric     BasicBlock::iterator insertPos = SVI->getIterator();
370e8d8bef9SDimitry Andric     auto *NewSVI1 =
371e8d8bef9SDimitry Andric         new ShuffleVectorInst(BI->getOperand(0), PoisonValue::get(BIOp0Ty),
372*0fca6ea1SDimitry Andric                               Mask, SVI->getName(), insertPos);
373e8d8bef9SDimitry Andric     auto *NewSVI2 = new ShuffleVectorInst(
374e8d8bef9SDimitry Andric         BI->getOperand(1), PoisonValue::get(BI->getOperand(1)->getType()), Mask,
375*0fca6ea1SDimitry Andric         SVI->getName(), insertPos);
376fe6060f1SDimitry Andric     BinaryOperator *NewBI = BinaryOperator::CreateWithCopiedFlags(
377*0fca6ea1SDimitry Andric         BI->getOpcode(), NewSVI1, NewSVI2, BI, BI->getName(), insertPos);
378e8d8bef9SDimitry Andric     SVI->replaceAllUsesWith(NewBI);
379e8d8bef9SDimitry Andric     LLVM_DEBUG(dbgs() << "  Replaced: " << *BI << "\n    And   : " << *SVI
380e8d8bef9SDimitry Andric                       << "\n  With    : " << *NewSVI1 << "\n    And   : "
381e8d8bef9SDimitry Andric                       << *NewSVI2 << "\n    And   : " << *NewBI << "\n");
382e8d8bef9SDimitry Andric     RecursivelyDeleteTriviallyDeadInstructions(SVI);
383e8d8bef9SDimitry Andric     if (NewSVI1->getOperand(0) == LI)
384e8d8bef9SDimitry Andric       Shuffles.push_back(NewSVI1);
385e8d8bef9SDimitry Andric     if (NewSVI2->getOperand(0) == LI)
386e8d8bef9SDimitry Andric       Shuffles.push_back(NewSVI2);
387e8d8bef9SDimitry Andric   }
388e8d8bef9SDimitry Andric 
389e8d8bef9SDimitry Andric   return !BinOpShuffles.empty();
390e8d8bef9SDimitry Andric }
391e8d8bef9SDimitry Andric 
3925f757f3fSDimitry Andric bool InterleavedAccessImpl::tryReplaceExtracts(
3930b57cec5SDimitry Andric     ArrayRef<ExtractElementInst *> Extracts,
3940b57cec5SDimitry Andric     ArrayRef<ShuffleVectorInst *> Shuffles) {
3950b57cec5SDimitry Andric   // If there aren't any extractelement instructions to modify, there's nothing
3960b57cec5SDimitry Andric   // to do.
3970b57cec5SDimitry Andric   if (Extracts.empty())
3980b57cec5SDimitry Andric     return true;
3990b57cec5SDimitry Andric 
4000b57cec5SDimitry Andric   // Maps extractelement instructions to vector-index pairs. The extractlement
4010b57cec5SDimitry Andric   // instructions will be modified to use the new vector and index operands.
4020b57cec5SDimitry Andric   DenseMap<ExtractElementInst *, std::pair<Value *, int>> ReplacementMap;
4030b57cec5SDimitry Andric 
4040b57cec5SDimitry Andric   for (auto *Extract : Extracts) {
4050b57cec5SDimitry Andric     // The vector index that is extracted.
4060b57cec5SDimitry Andric     auto *IndexOperand = cast<ConstantInt>(Extract->getIndexOperand());
4070b57cec5SDimitry Andric     auto Index = IndexOperand->getSExtValue();
4080b57cec5SDimitry Andric 
4090b57cec5SDimitry Andric     // Look for a suitable shufflevector instruction. The goal is to modify the
4100b57cec5SDimitry Andric     // extractelement instruction (which uses an interleaved load) to use one
4110b57cec5SDimitry Andric     // of the shufflevector instructions instead of the load.
4120b57cec5SDimitry Andric     for (auto *Shuffle : Shuffles) {
4130b57cec5SDimitry Andric       // If the shufflevector instruction doesn't dominate the extract, we
4140b57cec5SDimitry Andric       // can't create a use of it.
4150b57cec5SDimitry Andric       if (!DT->dominates(Shuffle, Extract))
4160b57cec5SDimitry Andric         continue;
4170b57cec5SDimitry Andric 
4180b57cec5SDimitry Andric       // Inspect the indices of the shufflevector instruction. If the shuffle
4190b57cec5SDimitry Andric       // selects the same index that is extracted, we can modify the
4200b57cec5SDimitry Andric       // extractelement instruction.
4210b57cec5SDimitry Andric       SmallVector<int, 4> Indices;
4220b57cec5SDimitry Andric       Shuffle->getShuffleMask(Indices);
4230b57cec5SDimitry Andric       for (unsigned I = 0; I < Indices.size(); ++I)
4240b57cec5SDimitry Andric         if (Indices[I] == Index) {
4250b57cec5SDimitry Andric           assert(Extract->getOperand(0) == Shuffle->getOperand(0) &&
4260b57cec5SDimitry Andric                  "Vector operations do not match");
4270b57cec5SDimitry Andric           ReplacementMap[Extract] = std::make_pair(Shuffle, I);
4280b57cec5SDimitry Andric           break;
4290b57cec5SDimitry Andric         }
4300b57cec5SDimitry Andric 
4310b57cec5SDimitry Andric       // If we found a suitable shufflevector instruction, stop looking.
4320b57cec5SDimitry Andric       if (ReplacementMap.count(Extract))
4330b57cec5SDimitry Andric         break;
4340b57cec5SDimitry Andric     }
4350b57cec5SDimitry Andric 
4360b57cec5SDimitry Andric     // If we did not find a suitable shufflevector instruction, the
4370b57cec5SDimitry Andric     // extractelement instruction cannot be modified, so we must give up.
4380b57cec5SDimitry Andric     if (!ReplacementMap.count(Extract))
4390b57cec5SDimitry Andric       return false;
4400b57cec5SDimitry Andric   }
4410b57cec5SDimitry Andric 
4420b57cec5SDimitry Andric   // Finally, perform the replacements.
4430b57cec5SDimitry Andric   IRBuilder<> Builder(Extracts[0]->getContext());
4440b57cec5SDimitry Andric   for (auto &Replacement : ReplacementMap) {
4450b57cec5SDimitry Andric     auto *Extract = Replacement.first;
4460b57cec5SDimitry Andric     auto *Vector = Replacement.second.first;
4470b57cec5SDimitry Andric     auto Index = Replacement.second.second;
4480b57cec5SDimitry Andric     Builder.SetInsertPoint(Extract);
4490b57cec5SDimitry Andric     Extract->replaceAllUsesWith(Builder.CreateExtractElement(Vector, Index));
4500b57cec5SDimitry Andric     Extract->eraseFromParent();
4510b57cec5SDimitry Andric   }
4520b57cec5SDimitry Andric 
4530b57cec5SDimitry Andric   return true;
4540b57cec5SDimitry Andric }
4550b57cec5SDimitry Andric 
4565f757f3fSDimitry Andric bool InterleavedAccessImpl::lowerInterleavedStore(
4570b57cec5SDimitry Andric     StoreInst *SI, SmallVector<Instruction *, 32> &DeadInsts) {
4580b57cec5SDimitry Andric   if (!SI->isSimple())
4590b57cec5SDimitry Andric     return false;
4600b57cec5SDimitry Andric 
461e8d8bef9SDimitry Andric   auto *SVI = dyn_cast<ShuffleVectorInst>(SI->getValueOperand());
4625ffd83dbSDimitry Andric   if (!SVI || !SVI->hasOneUse() || isa<ScalableVectorType>(SVI->getType()))
4630b57cec5SDimitry Andric     return false;
4640b57cec5SDimitry Andric 
4650b57cec5SDimitry Andric   // Check if the shufflevector is RE-interleave shuffle.
4660b57cec5SDimitry Andric   unsigned Factor;
46706c3fb27SDimitry Andric   if (!isReInterleaveMask(SVI, Factor, MaxFactor))
4680b57cec5SDimitry Andric     return false;
4690b57cec5SDimitry Andric 
4700b57cec5SDimitry Andric   LLVM_DEBUG(dbgs() << "IA: Found an interleaved store: " << *SI << "\n");
4710b57cec5SDimitry Andric 
4720b57cec5SDimitry Andric   // Try to create target specific intrinsics to replace the store and shuffle.
4730b57cec5SDimitry Andric   if (!TLI->lowerInterleavedStore(SI, SVI, Factor))
4740b57cec5SDimitry Andric     return false;
4750b57cec5SDimitry Andric 
4760b57cec5SDimitry Andric   // Already have a new target specific interleaved store. Erase the old store.
4770b57cec5SDimitry Andric   DeadInsts.push_back(SI);
4780b57cec5SDimitry Andric   DeadInsts.push_back(SVI);
4790b57cec5SDimitry Andric   return true;
4800b57cec5SDimitry Andric }
4810b57cec5SDimitry Andric 
4825f757f3fSDimitry Andric bool InterleavedAccessImpl::lowerDeinterleaveIntrinsic(
48306c3fb27SDimitry Andric     IntrinsicInst *DI, SmallVector<Instruction *, 32> &DeadInsts) {
48406c3fb27SDimitry Andric   LoadInst *LI = dyn_cast<LoadInst>(DI->getOperand(0));
48506c3fb27SDimitry Andric 
48606c3fb27SDimitry Andric   if (!LI || !LI->hasOneUse() || !LI->isSimple())
48706c3fb27SDimitry Andric     return false;
48806c3fb27SDimitry Andric 
48906c3fb27SDimitry Andric   LLVM_DEBUG(dbgs() << "IA: Found a deinterleave intrinsic: " << *DI << "\n");
49006c3fb27SDimitry Andric 
49106c3fb27SDimitry Andric   // Try and match this with target specific intrinsics.
49206c3fb27SDimitry Andric   if (!TLI->lowerDeinterleaveIntrinsicToLoad(DI, LI))
49306c3fb27SDimitry Andric     return false;
49406c3fb27SDimitry Andric 
49506c3fb27SDimitry Andric   // We now have a target-specific load, so delete the old one.
49606c3fb27SDimitry Andric   DeadInsts.push_back(DI);
49706c3fb27SDimitry Andric   DeadInsts.push_back(LI);
49806c3fb27SDimitry Andric   return true;
49906c3fb27SDimitry Andric }
50006c3fb27SDimitry Andric 
5015f757f3fSDimitry Andric bool InterleavedAccessImpl::lowerInterleaveIntrinsic(
50206c3fb27SDimitry Andric     IntrinsicInst *II, SmallVector<Instruction *, 32> &DeadInsts) {
50306c3fb27SDimitry Andric   if (!II->hasOneUse())
50406c3fb27SDimitry Andric     return false;
50506c3fb27SDimitry Andric 
50606c3fb27SDimitry Andric   StoreInst *SI = dyn_cast<StoreInst>(*(II->users().begin()));
50706c3fb27SDimitry Andric 
50806c3fb27SDimitry Andric   if (!SI || !SI->isSimple())
50906c3fb27SDimitry Andric     return false;
51006c3fb27SDimitry Andric 
51106c3fb27SDimitry Andric   LLVM_DEBUG(dbgs() << "IA: Found an interleave intrinsic: " << *II << "\n");
51206c3fb27SDimitry Andric 
51306c3fb27SDimitry Andric   // Try and match this with target specific intrinsics.
51406c3fb27SDimitry Andric   if (!TLI->lowerInterleaveIntrinsicToStore(II, SI))
51506c3fb27SDimitry Andric     return false;
51606c3fb27SDimitry Andric 
51706c3fb27SDimitry Andric   // We now have a target-specific store, so delete the old one.
51806c3fb27SDimitry Andric   DeadInsts.push_back(SI);
51906c3fb27SDimitry Andric   DeadInsts.push_back(II);
52006c3fb27SDimitry Andric   return true;
52106c3fb27SDimitry Andric }
52206c3fb27SDimitry Andric 
5235f757f3fSDimitry Andric bool InterleavedAccessImpl::runOnFunction(Function &F) {
5240b57cec5SDimitry Andric   // Holds dead instructions that will be erased later.
5250b57cec5SDimitry Andric   SmallVector<Instruction *, 32> DeadInsts;
5260b57cec5SDimitry Andric   bool Changed = false;
5270b57cec5SDimitry Andric 
5280b57cec5SDimitry Andric   for (auto &I : instructions(F)) {
529e8d8bef9SDimitry Andric     if (auto *LI = dyn_cast<LoadInst>(&I))
5300b57cec5SDimitry Andric       Changed |= lowerInterleavedLoad(LI, DeadInsts);
5310b57cec5SDimitry Andric 
532e8d8bef9SDimitry Andric     if (auto *SI = dyn_cast<StoreInst>(&I))
5330b57cec5SDimitry Andric       Changed |= lowerInterleavedStore(SI, DeadInsts);
53406c3fb27SDimitry Andric 
53506c3fb27SDimitry Andric     if (auto *II = dyn_cast<IntrinsicInst>(&I)) {
53606c3fb27SDimitry Andric       // At present, we only have intrinsics to represent (de)interleaving
53706c3fb27SDimitry Andric       // with a factor of 2.
538*0fca6ea1SDimitry Andric       if (II->getIntrinsicID() == Intrinsic::vector_deinterleave2)
53906c3fb27SDimitry Andric         Changed |= lowerDeinterleaveIntrinsic(II, DeadInsts);
540*0fca6ea1SDimitry Andric       if (II->getIntrinsicID() == Intrinsic::vector_interleave2)
54106c3fb27SDimitry Andric         Changed |= lowerInterleaveIntrinsic(II, DeadInsts);
54206c3fb27SDimitry Andric     }
5430b57cec5SDimitry Andric   }
5440b57cec5SDimitry Andric 
545fcaf7f86SDimitry Andric   for (auto *I : DeadInsts)
5460b57cec5SDimitry Andric     I->eraseFromParent();
5470b57cec5SDimitry Andric 
5480b57cec5SDimitry Andric   return Changed;
5490b57cec5SDimitry Andric }
550