10b57cec5SDimitry Andric //===- InterleavedAccessPass.cpp ------------------------------------------===// 20b57cec5SDimitry Andric // 30b57cec5SDimitry Andric // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 40b57cec5SDimitry Andric // See https://llvm.org/LICENSE.txt for license information. 50b57cec5SDimitry Andric // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 60b57cec5SDimitry Andric // 70b57cec5SDimitry Andric //===----------------------------------------------------------------------===// 80b57cec5SDimitry Andric // 90b57cec5SDimitry Andric // This file implements the Interleaved Access pass, which identifies 100b57cec5SDimitry Andric // interleaved memory accesses and transforms them into target specific 110b57cec5SDimitry Andric // intrinsics. 120b57cec5SDimitry Andric // 130b57cec5SDimitry Andric // An interleaved load reads data from memory into several vectors, with 140b57cec5SDimitry Andric // DE-interleaving the data on a factor. An interleaved store writes several 150b57cec5SDimitry Andric // vectors to memory with RE-interleaving the data on a factor. 160b57cec5SDimitry Andric // 170b57cec5SDimitry Andric // As interleaved accesses are difficult to identified in CodeGen (mainly 180b57cec5SDimitry Andric // because the VECTOR_SHUFFLE DAG node is quite different from the shufflevector 190b57cec5SDimitry Andric // IR), we identify and transform them to intrinsics in this pass so the 200b57cec5SDimitry Andric // intrinsics can be easily matched into target specific instructions later in 210b57cec5SDimitry Andric // CodeGen. 220b57cec5SDimitry Andric // 230b57cec5SDimitry Andric // E.g. An interleaved load (Factor = 2): 240b57cec5SDimitry Andric // %wide.vec = load <8 x i32>, <8 x i32>* %ptr 25e8d8bef9SDimitry Andric // %v0 = shuffle <8 x i32> %wide.vec, <8 x i32> poison, <0, 2, 4, 6> 26e8d8bef9SDimitry Andric // %v1 = shuffle <8 x i32> %wide.vec, <8 x i32> poison, <1, 3, 5, 7> 270b57cec5SDimitry Andric // 280b57cec5SDimitry Andric // It could be transformed into a ld2 intrinsic in AArch64 backend or a vld2 290b57cec5SDimitry Andric // intrinsic in ARM backend. 300b57cec5SDimitry Andric // 310b57cec5SDimitry Andric // In X86, this can be further optimized into a set of target 320b57cec5SDimitry Andric // specific loads followed by an optimized sequence of shuffles. 330b57cec5SDimitry Andric // 340b57cec5SDimitry Andric // E.g. An interleaved store (Factor = 3): 350b57cec5SDimitry Andric // %i.vec = shuffle <8 x i32> %v0, <8 x i32> %v1, 360b57cec5SDimitry Andric // <0, 4, 8, 1, 5, 9, 2, 6, 10, 3, 7, 11> 370b57cec5SDimitry Andric // store <12 x i32> %i.vec, <12 x i32>* %ptr 380b57cec5SDimitry Andric // 390b57cec5SDimitry Andric // It could be transformed into a st3 intrinsic in AArch64 backend or a vst3 400b57cec5SDimitry Andric // intrinsic in ARM backend. 410b57cec5SDimitry Andric // 420b57cec5SDimitry Andric // Similarly, a set of interleaved stores can be transformed into an optimized 430b57cec5SDimitry Andric // sequence of shuffles followed by a set of target specific stores for X86. 440b57cec5SDimitry Andric // 450b57cec5SDimitry Andric //===----------------------------------------------------------------------===// 460b57cec5SDimitry Andric 470b57cec5SDimitry Andric #include "llvm/ADT/ArrayRef.h" 480b57cec5SDimitry Andric #include "llvm/ADT/DenseMap.h" 4981ad6265SDimitry Andric #include "llvm/ADT/SetVector.h" 500b57cec5SDimitry Andric #include "llvm/ADT/SmallVector.h" 515f757f3fSDimitry Andric #include "llvm/CodeGen/InterleavedAccess.h" 520b57cec5SDimitry Andric #include "llvm/CodeGen/TargetLowering.h" 530b57cec5SDimitry Andric #include "llvm/CodeGen/TargetPassConfig.h" 540b57cec5SDimitry Andric #include "llvm/CodeGen/TargetSubtargetInfo.h" 550b57cec5SDimitry Andric #include "llvm/IR/Constants.h" 560b57cec5SDimitry Andric #include "llvm/IR/Dominators.h" 570b57cec5SDimitry Andric #include "llvm/IR/Function.h" 580b57cec5SDimitry Andric #include "llvm/IR/IRBuilder.h" 590b57cec5SDimitry Andric #include "llvm/IR/InstIterator.h" 600b57cec5SDimitry Andric #include "llvm/IR/Instruction.h" 610b57cec5SDimitry Andric #include "llvm/IR/Instructions.h" 6206c3fb27SDimitry Andric #include "llvm/IR/IntrinsicInst.h" 63480093f4SDimitry Andric #include "llvm/InitializePasses.h" 640b57cec5SDimitry Andric #include "llvm/Pass.h" 650b57cec5SDimitry Andric #include "llvm/Support/Casting.h" 660b57cec5SDimitry Andric #include "llvm/Support/CommandLine.h" 670b57cec5SDimitry Andric #include "llvm/Support/Debug.h" 680b57cec5SDimitry Andric #include "llvm/Support/MathExtras.h" 690b57cec5SDimitry Andric #include "llvm/Support/raw_ostream.h" 700b57cec5SDimitry Andric #include "llvm/Target/TargetMachine.h" 71e8d8bef9SDimitry Andric #include "llvm/Transforms/Utils/Local.h" 720b57cec5SDimitry Andric #include <cassert> 730b57cec5SDimitry Andric #include <utility> 740b57cec5SDimitry Andric 750b57cec5SDimitry Andric using namespace llvm; 760b57cec5SDimitry Andric 770b57cec5SDimitry Andric #define DEBUG_TYPE "interleaved-access" 780b57cec5SDimitry Andric 790b57cec5SDimitry Andric static cl::opt<bool> LowerInterleavedAccesses( 800b57cec5SDimitry Andric "lower-interleaved-accesses", 810b57cec5SDimitry Andric cl::desc("Enable lowering interleaved accesses to intrinsics"), 820b57cec5SDimitry Andric cl::init(true), cl::Hidden); 830b57cec5SDimitry Andric 840b57cec5SDimitry Andric namespace { 850b57cec5SDimitry Andric 865f757f3fSDimitry Andric class InterleavedAccessImpl { 875f757f3fSDimitry Andric friend class InterleavedAccess; 885f757f3fSDimitry Andric 890b57cec5SDimitry Andric public: 905f757f3fSDimitry Andric InterleavedAccessImpl() = default; 915f757f3fSDimitry Andric InterleavedAccessImpl(DominatorTree *DT, const TargetLowering *TLI) 925f757f3fSDimitry Andric : DT(DT), TLI(TLI), MaxFactor(TLI->getMaxSupportedInterleaveFactor()) {} 935f757f3fSDimitry Andric bool runOnFunction(Function &F); 940b57cec5SDimitry Andric 950b57cec5SDimitry Andric private: 960b57cec5SDimitry Andric DominatorTree *DT = nullptr; 970b57cec5SDimitry Andric const TargetLowering *TLI = nullptr; 980b57cec5SDimitry Andric 990b57cec5SDimitry Andric /// The maximum supported interleave factor. 10006c3fb27SDimitry Andric unsigned MaxFactor = 0u; 1010b57cec5SDimitry Andric 1020b57cec5SDimitry Andric /// Transform an interleaved load into target specific intrinsics. 1030b57cec5SDimitry Andric bool lowerInterleavedLoad(LoadInst *LI, 1040b57cec5SDimitry Andric SmallVector<Instruction *, 32> &DeadInsts); 1050b57cec5SDimitry Andric 1060b57cec5SDimitry Andric /// Transform an interleaved store into target specific intrinsics. 1070b57cec5SDimitry Andric bool lowerInterleavedStore(StoreInst *SI, 1080b57cec5SDimitry Andric SmallVector<Instruction *, 32> &DeadInsts); 1090b57cec5SDimitry Andric 11006c3fb27SDimitry Andric /// Transform a load and a deinterleave intrinsic into target specific 11106c3fb27SDimitry Andric /// instructions. 11206c3fb27SDimitry Andric bool lowerDeinterleaveIntrinsic(IntrinsicInst *II, 11306c3fb27SDimitry Andric SmallVector<Instruction *, 32> &DeadInsts); 11406c3fb27SDimitry Andric 11506c3fb27SDimitry Andric /// Transform an interleave intrinsic and a store into target specific 11606c3fb27SDimitry Andric /// instructions. 11706c3fb27SDimitry Andric bool lowerInterleaveIntrinsic(IntrinsicInst *II, 11806c3fb27SDimitry Andric SmallVector<Instruction *, 32> &DeadInsts); 11906c3fb27SDimitry Andric 1200b57cec5SDimitry Andric /// Returns true if the uses of an interleaved load by the 1210b57cec5SDimitry Andric /// extractelement instructions in \p Extracts can be replaced by uses of the 1220b57cec5SDimitry Andric /// shufflevector instructions in \p Shuffles instead. If so, the necessary 1230b57cec5SDimitry Andric /// replacements are also performed. 1240b57cec5SDimitry Andric bool tryReplaceExtracts(ArrayRef<ExtractElementInst *> Extracts, 1250b57cec5SDimitry Andric ArrayRef<ShuffleVectorInst *> Shuffles); 126e8d8bef9SDimitry Andric 127e8d8bef9SDimitry Andric /// Given a number of shuffles of the form shuffle(binop(x,y)), convert them 128e8d8bef9SDimitry Andric /// to binop(shuffle(x), shuffle(y)) to allow the formation of an 129e8d8bef9SDimitry Andric /// interleaving load. Any newly created shuffles that operate on \p LI will 130e8d8bef9SDimitry Andric /// be added to \p Shuffles. Returns true, if any changes to the IR have been 131e8d8bef9SDimitry Andric /// made. 132e8d8bef9SDimitry Andric bool replaceBinOpShuffles(ArrayRef<ShuffleVectorInst *> BinOpShuffles, 133e8d8bef9SDimitry Andric SmallVectorImpl<ShuffleVectorInst *> &Shuffles, 134e8d8bef9SDimitry Andric LoadInst *LI); 1350b57cec5SDimitry Andric }; 1360b57cec5SDimitry Andric 1375f757f3fSDimitry Andric class InterleavedAccess : public FunctionPass { 1385f757f3fSDimitry Andric InterleavedAccessImpl Impl; 1395f757f3fSDimitry Andric 1405f757f3fSDimitry Andric public: 1415f757f3fSDimitry Andric static char ID; 1425f757f3fSDimitry Andric 1435f757f3fSDimitry Andric InterleavedAccess() : FunctionPass(ID) { 1445f757f3fSDimitry Andric initializeInterleavedAccessPass(*PassRegistry::getPassRegistry()); 1455f757f3fSDimitry Andric } 1465f757f3fSDimitry Andric 1475f757f3fSDimitry Andric StringRef getPassName() const override { return "Interleaved Access Pass"; } 1485f757f3fSDimitry Andric 1495f757f3fSDimitry Andric bool runOnFunction(Function &F) override; 1505f757f3fSDimitry Andric 1515f757f3fSDimitry Andric void getAnalysisUsage(AnalysisUsage &AU) const override { 1525f757f3fSDimitry Andric AU.addRequired<DominatorTreeWrapperPass>(); 1535f757f3fSDimitry Andric AU.setPreservesCFG(); 1545f757f3fSDimitry Andric } 1555f757f3fSDimitry Andric }; 1565f757f3fSDimitry Andric 1570b57cec5SDimitry Andric } // end anonymous namespace. 1580b57cec5SDimitry Andric 1595f757f3fSDimitry Andric PreservedAnalyses InterleavedAccessPass::run(Function &F, 1605f757f3fSDimitry Andric FunctionAnalysisManager &FAM) { 1615f757f3fSDimitry Andric auto *DT = &FAM.getResult<DominatorTreeAnalysis>(F); 1625f757f3fSDimitry Andric auto *TLI = TM->getSubtargetImpl(F)->getTargetLowering(); 1635f757f3fSDimitry Andric InterleavedAccessImpl Impl(DT, TLI); 1645f757f3fSDimitry Andric bool Changed = Impl.runOnFunction(F); 1655f757f3fSDimitry Andric 1665f757f3fSDimitry Andric if (!Changed) 1675f757f3fSDimitry Andric return PreservedAnalyses::all(); 1685f757f3fSDimitry Andric 1695f757f3fSDimitry Andric PreservedAnalyses PA; 1705f757f3fSDimitry Andric PA.preserveSet<CFGAnalyses>(); 1715f757f3fSDimitry Andric return PA; 1725f757f3fSDimitry Andric } 1735f757f3fSDimitry Andric 1740b57cec5SDimitry Andric char InterleavedAccess::ID = 0; 1750b57cec5SDimitry Andric 1765f757f3fSDimitry Andric bool InterleavedAccess::runOnFunction(Function &F) { 1775f757f3fSDimitry Andric auto *TPC = getAnalysisIfAvailable<TargetPassConfig>(); 1785f757f3fSDimitry Andric if (!TPC || !LowerInterleavedAccesses) 1795f757f3fSDimitry Andric return false; 1805f757f3fSDimitry Andric 1815f757f3fSDimitry Andric LLVM_DEBUG(dbgs() << "*** " << getPassName() << ": " << F.getName() << "\n"); 1825f757f3fSDimitry Andric 1835f757f3fSDimitry Andric Impl.DT = &getAnalysis<DominatorTreeWrapperPass>().getDomTree(); 1845f757f3fSDimitry Andric auto &TM = TPC->getTM<TargetMachine>(); 1855f757f3fSDimitry Andric Impl.TLI = TM.getSubtargetImpl(F)->getTargetLowering(); 1865f757f3fSDimitry Andric Impl.MaxFactor = Impl.TLI->getMaxSupportedInterleaveFactor(); 1875f757f3fSDimitry Andric 1885f757f3fSDimitry Andric return Impl.runOnFunction(F); 1895f757f3fSDimitry Andric } 1905f757f3fSDimitry Andric 1910b57cec5SDimitry Andric INITIALIZE_PASS_BEGIN(InterleavedAccess, DEBUG_TYPE, 1920b57cec5SDimitry Andric "Lower interleaved memory accesses to target specific intrinsics", false, 1930b57cec5SDimitry Andric false) 1940b57cec5SDimitry Andric INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass) 1950b57cec5SDimitry Andric INITIALIZE_PASS_END(InterleavedAccess, DEBUG_TYPE, 1960b57cec5SDimitry Andric "Lower interleaved memory accesses to target specific intrinsics", false, 1970b57cec5SDimitry Andric false) 1980b57cec5SDimitry Andric 1990b57cec5SDimitry Andric FunctionPass *llvm::createInterleavedAccessPass() { 2000b57cec5SDimitry Andric return new InterleavedAccess(); 2010b57cec5SDimitry Andric } 2020b57cec5SDimitry Andric 2030b57cec5SDimitry Andric /// Check if the mask is a DE-interleave mask for an interleaved load. 2040b57cec5SDimitry Andric /// 2050b57cec5SDimitry Andric /// E.g. DE-interleave masks (Factor = 2) could be: 2060b57cec5SDimitry Andric /// <0, 2, 4, 6> (mask of index 0 to extract even elements) 2070b57cec5SDimitry Andric /// <1, 3, 5, 7> (mask of index 1 to extract odd elements) 2080b57cec5SDimitry Andric static bool isDeInterleaveMask(ArrayRef<int> Mask, unsigned &Factor, 2090b57cec5SDimitry Andric unsigned &Index, unsigned MaxFactor, 2100b57cec5SDimitry Andric unsigned NumLoadElements) { 2110b57cec5SDimitry Andric if (Mask.size() < 2) 2120b57cec5SDimitry Andric return false; 2130b57cec5SDimitry Andric 2140b57cec5SDimitry Andric // Check potential Factors. 2150b57cec5SDimitry Andric for (Factor = 2; Factor <= MaxFactor; Factor++) { 2160b57cec5SDimitry Andric // Make sure we don't produce a load wider than the input load. 2170b57cec5SDimitry Andric if (Mask.size() * Factor > NumLoadElements) 2180b57cec5SDimitry Andric return false; 219*0fca6ea1SDimitry Andric if (ShuffleVectorInst::isDeInterleaveMaskOfFactor(Mask, Factor, Index)) 2200b57cec5SDimitry Andric return true; 2210b57cec5SDimitry Andric } 2220b57cec5SDimitry Andric 2230b57cec5SDimitry Andric return false; 2240b57cec5SDimitry Andric } 2250b57cec5SDimitry Andric 2260b57cec5SDimitry Andric /// Check if the mask can be used in an interleaved store. 2270b57cec5SDimitry Andric // 2280b57cec5SDimitry Andric /// It checks for a more general pattern than the RE-interleave mask. 2290b57cec5SDimitry Andric /// I.e. <x, y, ... z, x+1, y+1, ...z+1, x+2, y+2, ...z+2, ...> 2300b57cec5SDimitry Andric /// E.g. For a Factor of 2 (LaneLen=4): <4, 32, 5, 33, 6, 34, 7, 35> 2310b57cec5SDimitry Andric /// E.g. For a Factor of 3 (LaneLen=4): <4, 32, 16, 5, 33, 17, 6, 34, 18, 7, 35, 19> 2320b57cec5SDimitry Andric /// E.g. For a Factor of 4 (LaneLen=2): <8, 2, 12, 4, 9, 3, 13, 5> 2330b57cec5SDimitry Andric /// 2340b57cec5SDimitry Andric /// The particular case of an RE-interleave mask is: 2350b57cec5SDimitry Andric /// I.e. <0, LaneLen, ... , LaneLen*(Factor - 1), 1, LaneLen + 1, ...> 2360b57cec5SDimitry Andric /// E.g. For a Factor of 2 (LaneLen=4): <0, 4, 1, 5, 2, 6, 3, 7> 23706c3fb27SDimitry Andric static bool isReInterleaveMask(ShuffleVectorInst *SVI, unsigned &Factor, 23806c3fb27SDimitry Andric unsigned MaxFactor) { 23906c3fb27SDimitry Andric unsigned NumElts = SVI->getShuffleMask().size(); 2400b57cec5SDimitry Andric if (NumElts < 4) 2410b57cec5SDimitry Andric return false; 2420b57cec5SDimitry Andric 2430b57cec5SDimitry Andric // Check potential Factors. 2440b57cec5SDimitry Andric for (Factor = 2; Factor <= MaxFactor; Factor++) { 24506c3fb27SDimitry Andric if (SVI->isInterleave(Factor)) 2460b57cec5SDimitry Andric return true; 2470b57cec5SDimitry Andric } 2480b57cec5SDimitry Andric 2490b57cec5SDimitry Andric return false; 2500b57cec5SDimitry Andric } 2510b57cec5SDimitry Andric 2525f757f3fSDimitry Andric bool InterleavedAccessImpl::lowerInterleavedLoad( 2530b57cec5SDimitry Andric LoadInst *LI, SmallVector<Instruction *, 32> &DeadInsts) { 2545ffd83dbSDimitry Andric if (!LI->isSimple() || isa<ScalableVectorType>(LI->getType())) 2550b57cec5SDimitry Andric return false; 2560b57cec5SDimitry Andric 257e8d8bef9SDimitry Andric // Check if all users of this load are shufflevectors. If we encounter any 258e8d8bef9SDimitry Andric // users that are extractelement instructions or binary operators, we save 259e8d8bef9SDimitry Andric // them to later check if they can be modified to extract from one of the 260e8d8bef9SDimitry Andric // shufflevectors instead of the load. 261e8d8bef9SDimitry Andric 2620b57cec5SDimitry Andric SmallVector<ShuffleVectorInst *, 4> Shuffles; 2630b57cec5SDimitry Andric SmallVector<ExtractElementInst *, 4> Extracts; 264e8d8bef9SDimitry Andric // BinOpShuffles need to be handled a single time in case both operands of the 265e8d8bef9SDimitry Andric // binop are the same load. 266e8d8bef9SDimitry Andric SmallSetVector<ShuffleVectorInst *, 4> BinOpShuffles; 2670b57cec5SDimitry Andric 268e8d8bef9SDimitry Andric for (auto *User : LI->users()) { 269e8d8bef9SDimitry Andric auto *Extract = dyn_cast<ExtractElementInst>(User); 2700b57cec5SDimitry Andric if (Extract && isa<ConstantInt>(Extract->getIndexOperand())) { 2710b57cec5SDimitry Andric Extracts.push_back(Extract); 2720b57cec5SDimitry Andric continue; 2730b57cec5SDimitry Andric } 274753f127fSDimitry Andric if (auto *BI = dyn_cast<BinaryOperator>(User)) { 2755f757f3fSDimitry Andric if (!BI->user_empty() && all_of(BI->users(), [](auto *U) { 27606c3fb27SDimitry Andric auto *SVI = dyn_cast<ShuffleVectorInst>(U); 27706c3fb27SDimitry Andric return SVI && isa<UndefValue>(SVI->getOperand(1)); 27806c3fb27SDimitry Andric })) { 279753f127fSDimitry Andric for (auto *SVI : BI->users()) 280753f127fSDimitry Andric BinOpShuffles.insert(cast<ShuffleVectorInst>(SVI)); 281e8d8bef9SDimitry Andric continue; 282e8d8bef9SDimitry Andric } 283e8d8bef9SDimitry Andric } 284e8d8bef9SDimitry Andric auto *SVI = dyn_cast<ShuffleVectorInst>(User); 2850b57cec5SDimitry Andric if (!SVI || !isa<UndefValue>(SVI->getOperand(1))) 2860b57cec5SDimitry Andric return false; 2870b57cec5SDimitry Andric 2880b57cec5SDimitry Andric Shuffles.push_back(SVI); 2890b57cec5SDimitry Andric } 2900b57cec5SDimitry Andric 291e8d8bef9SDimitry Andric if (Shuffles.empty() && BinOpShuffles.empty()) 2920b57cec5SDimitry Andric return false; 2930b57cec5SDimitry Andric 2940b57cec5SDimitry Andric unsigned Factor, Index; 2950b57cec5SDimitry Andric 2965ffd83dbSDimitry Andric unsigned NumLoadElements = 2975ffd83dbSDimitry Andric cast<FixedVectorType>(LI->getType())->getNumElements(); 298e8d8bef9SDimitry Andric auto *FirstSVI = Shuffles.size() > 0 ? Shuffles[0] : BinOpShuffles[0]; 2990b57cec5SDimitry Andric // Check if the first shufflevector is DE-interleave shuffle. 300e8d8bef9SDimitry Andric if (!isDeInterleaveMask(FirstSVI->getShuffleMask(), Factor, Index, MaxFactor, 301e8d8bef9SDimitry Andric NumLoadElements)) 3020b57cec5SDimitry Andric return false; 3030b57cec5SDimitry Andric 3040b57cec5SDimitry Andric // Holds the corresponding index for each DE-interleave shuffle. 3050b57cec5SDimitry Andric SmallVector<unsigned, 4> Indices; 3060b57cec5SDimitry Andric 307e8d8bef9SDimitry Andric Type *VecTy = FirstSVI->getType(); 3080b57cec5SDimitry Andric 3090b57cec5SDimitry Andric // Check if other shufflevectors are also DE-interleaved of the same type 3100b57cec5SDimitry Andric // and factor as the first shufflevector. 311e8d8bef9SDimitry Andric for (auto *Shuffle : Shuffles) { 312e8d8bef9SDimitry Andric if (Shuffle->getType() != VecTy) 3130b57cec5SDimitry Andric return false; 314*0fca6ea1SDimitry Andric if (!ShuffleVectorInst::isDeInterleaveMaskOfFactor( 315*0fca6ea1SDimitry Andric Shuffle->getShuffleMask(), Factor, Index)) 3160b57cec5SDimitry Andric return false; 3170b57cec5SDimitry Andric 318e8d8bef9SDimitry Andric assert(Shuffle->getShuffleMask().size() <= NumLoadElements); 319e8d8bef9SDimitry Andric Indices.push_back(Index); 320e8d8bef9SDimitry Andric } 321e8d8bef9SDimitry Andric for (auto *Shuffle : BinOpShuffles) { 322e8d8bef9SDimitry Andric if (Shuffle->getType() != VecTy) 323e8d8bef9SDimitry Andric return false; 324*0fca6ea1SDimitry Andric if (!ShuffleVectorInst::isDeInterleaveMaskOfFactor( 325*0fca6ea1SDimitry Andric Shuffle->getShuffleMask(), Factor, Index)) 326e8d8bef9SDimitry Andric return false; 327e8d8bef9SDimitry Andric 328e8d8bef9SDimitry Andric assert(Shuffle->getShuffleMask().size() <= NumLoadElements); 329e8d8bef9SDimitry Andric 330e8d8bef9SDimitry Andric if (cast<Instruction>(Shuffle->getOperand(0))->getOperand(0) == LI) 331e8d8bef9SDimitry Andric Indices.push_back(Index); 332e8d8bef9SDimitry Andric if (cast<Instruction>(Shuffle->getOperand(0))->getOperand(1) == LI) 3330b57cec5SDimitry Andric Indices.push_back(Index); 3340b57cec5SDimitry Andric } 3350b57cec5SDimitry Andric 3360b57cec5SDimitry Andric // Try and modify users of the load that are extractelement instructions to 3370b57cec5SDimitry Andric // use the shufflevector instructions instead of the load. 3380b57cec5SDimitry Andric if (!tryReplaceExtracts(Extracts, Shuffles)) 3390b57cec5SDimitry Andric return false; 3400b57cec5SDimitry Andric 341e8d8bef9SDimitry Andric bool BinOpShuffleChanged = 342e8d8bef9SDimitry Andric replaceBinOpShuffles(BinOpShuffles.getArrayRef(), Shuffles, LI); 343e8d8bef9SDimitry Andric 3440b57cec5SDimitry Andric LLVM_DEBUG(dbgs() << "IA: Found an interleaved load: " << *LI << "\n"); 3450b57cec5SDimitry Andric 3460b57cec5SDimitry Andric // Try to create target specific intrinsics to replace the load and shuffles. 347e8d8bef9SDimitry Andric if (!TLI->lowerInterleavedLoad(LI, Shuffles, Indices, Factor)) { 348e8d8bef9SDimitry Andric // If Extracts is not empty, tryReplaceExtracts made changes earlier. 349e8d8bef9SDimitry Andric return !Extracts.empty() || BinOpShuffleChanged; 350e8d8bef9SDimitry Andric } 3510b57cec5SDimitry Andric 352fe6060f1SDimitry Andric append_range(DeadInsts, Shuffles); 3530b57cec5SDimitry Andric 3540b57cec5SDimitry Andric DeadInsts.push_back(LI); 3550b57cec5SDimitry Andric return true; 3560b57cec5SDimitry Andric } 3570b57cec5SDimitry Andric 3585f757f3fSDimitry Andric bool InterleavedAccessImpl::replaceBinOpShuffles( 359e8d8bef9SDimitry Andric ArrayRef<ShuffleVectorInst *> BinOpShuffles, 360e8d8bef9SDimitry Andric SmallVectorImpl<ShuffleVectorInst *> &Shuffles, LoadInst *LI) { 361e8d8bef9SDimitry Andric for (auto *SVI : BinOpShuffles) { 362e8d8bef9SDimitry Andric BinaryOperator *BI = cast<BinaryOperator>(SVI->getOperand(0)); 363e8d8bef9SDimitry Andric Type *BIOp0Ty = BI->getOperand(0)->getType(); 364e8d8bef9SDimitry Andric ArrayRef<int> Mask = SVI->getShuffleMask(); 365e8d8bef9SDimitry Andric assert(all_of(Mask, [&](int Idx) { 366e8d8bef9SDimitry Andric return Idx < (int)cast<FixedVectorType>(BIOp0Ty)->getNumElements(); 367e8d8bef9SDimitry Andric })); 368e8d8bef9SDimitry Andric 369*0fca6ea1SDimitry Andric BasicBlock::iterator insertPos = SVI->getIterator(); 370e8d8bef9SDimitry Andric auto *NewSVI1 = 371e8d8bef9SDimitry Andric new ShuffleVectorInst(BI->getOperand(0), PoisonValue::get(BIOp0Ty), 372*0fca6ea1SDimitry Andric Mask, SVI->getName(), insertPos); 373e8d8bef9SDimitry Andric auto *NewSVI2 = new ShuffleVectorInst( 374e8d8bef9SDimitry Andric BI->getOperand(1), PoisonValue::get(BI->getOperand(1)->getType()), Mask, 375*0fca6ea1SDimitry Andric SVI->getName(), insertPos); 376fe6060f1SDimitry Andric BinaryOperator *NewBI = BinaryOperator::CreateWithCopiedFlags( 377*0fca6ea1SDimitry Andric BI->getOpcode(), NewSVI1, NewSVI2, BI, BI->getName(), insertPos); 378e8d8bef9SDimitry Andric SVI->replaceAllUsesWith(NewBI); 379e8d8bef9SDimitry Andric LLVM_DEBUG(dbgs() << " Replaced: " << *BI << "\n And : " << *SVI 380e8d8bef9SDimitry Andric << "\n With : " << *NewSVI1 << "\n And : " 381e8d8bef9SDimitry Andric << *NewSVI2 << "\n And : " << *NewBI << "\n"); 382e8d8bef9SDimitry Andric RecursivelyDeleteTriviallyDeadInstructions(SVI); 383e8d8bef9SDimitry Andric if (NewSVI1->getOperand(0) == LI) 384e8d8bef9SDimitry Andric Shuffles.push_back(NewSVI1); 385e8d8bef9SDimitry Andric if (NewSVI2->getOperand(0) == LI) 386e8d8bef9SDimitry Andric Shuffles.push_back(NewSVI2); 387e8d8bef9SDimitry Andric } 388e8d8bef9SDimitry Andric 389e8d8bef9SDimitry Andric return !BinOpShuffles.empty(); 390e8d8bef9SDimitry Andric } 391e8d8bef9SDimitry Andric 3925f757f3fSDimitry Andric bool InterleavedAccessImpl::tryReplaceExtracts( 3930b57cec5SDimitry Andric ArrayRef<ExtractElementInst *> Extracts, 3940b57cec5SDimitry Andric ArrayRef<ShuffleVectorInst *> Shuffles) { 3950b57cec5SDimitry Andric // If there aren't any extractelement instructions to modify, there's nothing 3960b57cec5SDimitry Andric // to do. 3970b57cec5SDimitry Andric if (Extracts.empty()) 3980b57cec5SDimitry Andric return true; 3990b57cec5SDimitry Andric 4000b57cec5SDimitry Andric // Maps extractelement instructions to vector-index pairs. The extractlement 4010b57cec5SDimitry Andric // instructions will be modified to use the new vector and index operands. 4020b57cec5SDimitry Andric DenseMap<ExtractElementInst *, std::pair<Value *, int>> ReplacementMap; 4030b57cec5SDimitry Andric 4040b57cec5SDimitry Andric for (auto *Extract : Extracts) { 4050b57cec5SDimitry Andric // The vector index that is extracted. 4060b57cec5SDimitry Andric auto *IndexOperand = cast<ConstantInt>(Extract->getIndexOperand()); 4070b57cec5SDimitry Andric auto Index = IndexOperand->getSExtValue(); 4080b57cec5SDimitry Andric 4090b57cec5SDimitry Andric // Look for a suitable shufflevector instruction. The goal is to modify the 4100b57cec5SDimitry Andric // extractelement instruction (which uses an interleaved load) to use one 4110b57cec5SDimitry Andric // of the shufflevector instructions instead of the load. 4120b57cec5SDimitry Andric for (auto *Shuffle : Shuffles) { 4130b57cec5SDimitry Andric // If the shufflevector instruction doesn't dominate the extract, we 4140b57cec5SDimitry Andric // can't create a use of it. 4150b57cec5SDimitry Andric if (!DT->dominates(Shuffle, Extract)) 4160b57cec5SDimitry Andric continue; 4170b57cec5SDimitry Andric 4180b57cec5SDimitry Andric // Inspect the indices of the shufflevector instruction. If the shuffle 4190b57cec5SDimitry Andric // selects the same index that is extracted, we can modify the 4200b57cec5SDimitry Andric // extractelement instruction. 4210b57cec5SDimitry Andric SmallVector<int, 4> Indices; 4220b57cec5SDimitry Andric Shuffle->getShuffleMask(Indices); 4230b57cec5SDimitry Andric for (unsigned I = 0; I < Indices.size(); ++I) 4240b57cec5SDimitry Andric if (Indices[I] == Index) { 4250b57cec5SDimitry Andric assert(Extract->getOperand(0) == Shuffle->getOperand(0) && 4260b57cec5SDimitry Andric "Vector operations do not match"); 4270b57cec5SDimitry Andric ReplacementMap[Extract] = std::make_pair(Shuffle, I); 4280b57cec5SDimitry Andric break; 4290b57cec5SDimitry Andric } 4300b57cec5SDimitry Andric 4310b57cec5SDimitry Andric // If we found a suitable shufflevector instruction, stop looking. 4320b57cec5SDimitry Andric if (ReplacementMap.count(Extract)) 4330b57cec5SDimitry Andric break; 4340b57cec5SDimitry Andric } 4350b57cec5SDimitry Andric 4360b57cec5SDimitry Andric // If we did not find a suitable shufflevector instruction, the 4370b57cec5SDimitry Andric // extractelement instruction cannot be modified, so we must give up. 4380b57cec5SDimitry Andric if (!ReplacementMap.count(Extract)) 4390b57cec5SDimitry Andric return false; 4400b57cec5SDimitry Andric } 4410b57cec5SDimitry Andric 4420b57cec5SDimitry Andric // Finally, perform the replacements. 4430b57cec5SDimitry Andric IRBuilder<> Builder(Extracts[0]->getContext()); 4440b57cec5SDimitry Andric for (auto &Replacement : ReplacementMap) { 4450b57cec5SDimitry Andric auto *Extract = Replacement.first; 4460b57cec5SDimitry Andric auto *Vector = Replacement.second.first; 4470b57cec5SDimitry Andric auto Index = Replacement.second.second; 4480b57cec5SDimitry Andric Builder.SetInsertPoint(Extract); 4490b57cec5SDimitry Andric Extract->replaceAllUsesWith(Builder.CreateExtractElement(Vector, Index)); 4500b57cec5SDimitry Andric Extract->eraseFromParent(); 4510b57cec5SDimitry Andric } 4520b57cec5SDimitry Andric 4530b57cec5SDimitry Andric return true; 4540b57cec5SDimitry Andric } 4550b57cec5SDimitry Andric 4565f757f3fSDimitry Andric bool InterleavedAccessImpl::lowerInterleavedStore( 4570b57cec5SDimitry Andric StoreInst *SI, SmallVector<Instruction *, 32> &DeadInsts) { 4580b57cec5SDimitry Andric if (!SI->isSimple()) 4590b57cec5SDimitry Andric return false; 4600b57cec5SDimitry Andric 461e8d8bef9SDimitry Andric auto *SVI = dyn_cast<ShuffleVectorInst>(SI->getValueOperand()); 4625ffd83dbSDimitry Andric if (!SVI || !SVI->hasOneUse() || isa<ScalableVectorType>(SVI->getType())) 4630b57cec5SDimitry Andric return false; 4640b57cec5SDimitry Andric 4650b57cec5SDimitry Andric // Check if the shufflevector is RE-interleave shuffle. 4660b57cec5SDimitry Andric unsigned Factor; 46706c3fb27SDimitry Andric if (!isReInterleaveMask(SVI, Factor, MaxFactor)) 4680b57cec5SDimitry Andric return false; 4690b57cec5SDimitry Andric 4700b57cec5SDimitry Andric LLVM_DEBUG(dbgs() << "IA: Found an interleaved store: " << *SI << "\n"); 4710b57cec5SDimitry Andric 4720b57cec5SDimitry Andric // Try to create target specific intrinsics to replace the store and shuffle. 4730b57cec5SDimitry Andric if (!TLI->lowerInterleavedStore(SI, SVI, Factor)) 4740b57cec5SDimitry Andric return false; 4750b57cec5SDimitry Andric 4760b57cec5SDimitry Andric // Already have a new target specific interleaved store. Erase the old store. 4770b57cec5SDimitry Andric DeadInsts.push_back(SI); 4780b57cec5SDimitry Andric DeadInsts.push_back(SVI); 4790b57cec5SDimitry Andric return true; 4800b57cec5SDimitry Andric } 4810b57cec5SDimitry Andric 4825f757f3fSDimitry Andric bool InterleavedAccessImpl::lowerDeinterleaveIntrinsic( 48306c3fb27SDimitry Andric IntrinsicInst *DI, SmallVector<Instruction *, 32> &DeadInsts) { 48406c3fb27SDimitry Andric LoadInst *LI = dyn_cast<LoadInst>(DI->getOperand(0)); 48506c3fb27SDimitry Andric 48606c3fb27SDimitry Andric if (!LI || !LI->hasOneUse() || !LI->isSimple()) 48706c3fb27SDimitry Andric return false; 48806c3fb27SDimitry Andric 48906c3fb27SDimitry Andric LLVM_DEBUG(dbgs() << "IA: Found a deinterleave intrinsic: " << *DI << "\n"); 49006c3fb27SDimitry Andric 49106c3fb27SDimitry Andric // Try and match this with target specific intrinsics. 49206c3fb27SDimitry Andric if (!TLI->lowerDeinterleaveIntrinsicToLoad(DI, LI)) 49306c3fb27SDimitry Andric return false; 49406c3fb27SDimitry Andric 49506c3fb27SDimitry Andric // We now have a target-specific load, so delete the old one. 49606c3fb27SDimitry Andric DeadInsts.push_back(DI); 49706c3fb27SDimitry Andric DeadInsts.push_back(LI); 49806c3fb27SDimitry Andric return true; 49906c3fb27SDimitry Andric } 50006c3fb27SDimitry Andric 5015f757f3fSDimitry Andric bool InterleavedAccessImpl::lowerInterleaveIntrinsic( 50206c3fb27SDimitry Andric IntrinsicInst *II, SmallVector<Instruction *, 32> &DeadInsts) { 50306c3fb27SDimitry Andric if (!II->hasOneUse()) 50406c3fb27SDimitry Andric return false; 50506c3fb27SDimitry Andric 50606c3fb27SDimitry Andric StoreInst *SI = dyn_cast<StoreInst>(*(II->users().begin())); 50706c3fb27SDimitry Andric 50806c3fb27SDimitry Andric if (!SI || !SI->isSimple()) 50906c3fb27SDimitry Andric return false; 51006c3fb27SDimitry Andric 51106c3fb27SDimitry Andric LLVM_DEBUG(dbgs() << "IA: Found an interleave intrinsic: " << *II << "\n"); 51206c3fb27SDimitry Andric 51306c3fb27SDimitry Andric // Try and match this with target specific intrinsics. 51406c3fb27SDimitry Andric if (!TLI->lowerInterleaveIntrinsicToStore(II, SI)) 51506c3fb27SDimitry Andric return false; 51606c3fb27SDimitry Andric 51706c3fb27SDimitry Andric // We now have a target-specific store, so delete the old one. 51806c3fb27SDimitry Andric DeadInsts.push_back(SI); 51906c3fb27SDimitry Andric DeadInsts.push_back(II); 52006c3fb27SDimitry Andric return true; 52106c3fb27SDimitry Andric } 52206c3fb27SDimitry Andric 5235f757f3fSDimitry Andric bool InterleavedAccessImpl::runOnFunction(Function &F) { 5240b57cec5SDimitry Andric // Holds dead instructions that will be erased later. 5250b57cec5SDimitry Andric SmallVector<Instruction *, 32> DeadInsts; 5260b57cec5SDimitry Andric bool Changed = false; 5270b57cec5SDimitry Andric 5280b57cec5SDimitry Andric for (auto &I : instructions(F)) { 529e8d8bef9SDimitry Andric if (auto *LI = dyn_cast<LoadInst>(&I)) 5300b57cec5SDimitry Andric Changed |= lowerInterleavedLoad(LI, DeadInsts); 5310b57cec5SDimitry Andric 532e8d8bef9SDimitry Andric if (auto *SI = dyn_cast<StoreInst>(&I)) 5330b57cec5SDimitry Andric Changed |= lowerInterleavedStore(SI, DeadInsts); 53406c3fb27SDimitry Andric 53506c3fb27SDimitry Andric if (auto *II = dyn_cast<IntrinsicInst>(&I)) { 53606c3fb27SDimitry Andric // At present, we only have intrinsics to represent (de)interleaving 53706c3fb27SDimitry Andric // with a factor of 2. 538*0fca6ea1SDimitry Andric if (II->getIntrinsicID() == Intrinsic::vector_deinterleave2) 53906c3fb27SDimitry Andric Changed |= lowerDeinterleaveIntrinsic(II, DeadInsts); 540*0fca6ea1SDimitry Andric if (II->getIntrinsicID() == Intrinsic::vector_interleave2) 54106c3fb27SDimitry Andric Changed |= lowerInterleaveIntrinsic(II, DeadInsts); 54206c3fb27SDimitry Andric } 5430b57cec5SDimitry Andric } 5440b57cec5SDimitry Andric 545fcaf7f86SDimitry Andric for (auto *I : DeadInsts) 5460b57cec5SDimitry Andric I->eraseFromParent(); 5470b57cec5SDimitry Andric 5480b57cec5SDimitry Andric return Changed; 5490b57cec5SDimitry Andric } 550