1fe6060f1SDimitry Andric //===- MVELaneInterleaving.cpp - Inverleave for MVE instructions ----------===// 2fe6060f1SDimitry Andric // 3fe6060f1SDimitry Andric // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4fe6060f1SDimitry Andric // See https://llvm.org/LICENSE.txt for license information. 5fe6060f1SDimitry Andric // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6fe6060f1SDimitry Andric // 7fe6060f1SDimitry Andric //===----------------------------------------------------------------------===// 8fe6060f1SDimitry Andric // 9fe6060f1SDimitry Andric // This pass interleaves around sext/zext/trunc instructions. MVE does not have 10fe6060f1SDimitry Andric // a single sext/zext or trunc instruction that takes the bottom half of a 11fe6060f1SDimitry Andric // vector and extends to a full width, like NEON has with MOVL. Instead it is 12fe6060f1SDimitry Andric // expected that this happens through top/bottom instructions. So the MVE 13fe6060f1SDimitry Andric // equivalent VMOVLT/B instructions take either the even or odd elements of the 14fe6060f1SDimitry Andric // input and extend them to the larger type, producing a vector with half the 15fe6060f1SDimitry Andric // number of elements each of double the bitwidth. As there is no simple 16fe6060f1SDimitry Andric // instruction, we often have to turn sext/zext/trunc into a series of lane 17fe6060f1SDimitry Andric // moves (or stack loads/stores, which we do not do yet). 18fe6060f1SDimitry Andric // 19fe6060f1SDimitry Andric // This pass takes vector code that starts at truncs, looks for interconnected 20fe6060f1SDimitry Andric // blobs of operations that end with sext/zext (or constants/splats) of the 21fe6060f1SDimitry Andric // form: 22fe6060f1SDimitry Andric // %sa = sext v8i16 %a to v8i32 23fe6060f1SDimitry Andric // %sb = sext v8i16 %b to v8i32 24fe6060f1SDimitry Andric // %add = add v8i32 %sa, %sb 25fe6060f1SDimitry Andric // %r = trunc %add to v8i16 26fe6060f1SDimitry Andric // And adds shuffles to allow the use of VMOVL/VMOVN instrctions: 27fe6060f1SDimitry Andric // %sha = shuffle v8i16 %a, undef, <0, 2, 4, 6, 1, 3, 5, 7> 28fe6060f1SDimitry Andric // %sa = sext v8i16 %sha to v8i32 29fe6060f1SDimitry Andric // %shb = shuffle v8i16 %b, undef, <0, 2, 4, 6, 1, 3, 5, 7> 30fe6060f1SDimitry Andric // %sb = sext v8i16 %shb to v8i32 31fe6060f1SDimitry Andric // %add = add v8i32 %sa, %sb 32fe6060f1SDimitry Andric // %r = trunc %add to v8i16 33fe6060f1SDimitry Andric // %shr = shuffle v8i16 %r, undef, <0, 4, 1, 5, 2, 6, 3, 7> 34fe6060f1SDimitry Andric // Which can then be split and lowered to MVE instructions efficiently: 35fe6060f1SDimitry Andric // %sa_b = VMOVLB.s16 %a 36fe6060f1SDimitry Andric // %sa_t = VMOVLT.s16 %a 37fe6060f1SDimitry Andric // %sb_b = VMOVLB.s16 %b 38fe6060f1SDimitry Andric // %sb_t = VMOVLT.s16 %b 39fe6060f1SDimitry Andric // %add_b = VADD.i32 %sa_b, %sb_b 40fe6060f1SDimitry Andric // %add_t = VADD.i32 %sa_t, %sb_t 41fe6060f1SDimitry Andric // %r = VMOVNT.i16 %add_b, %add_t 42fe6060f1SDimitry Andric // 43fe6060f1SDimitry Andric //===----------------------------------------------------------------------===// 44fe6060f1SDimitry Andric 45fe6060f1SDimitry Andric #include "ARM.h" 46fe6060f1SDimitry Andric #include "ARMBaseInstrInfo.h" 47fe6060f1SDimitry Andric #include "ARMSubtarget.h" 4881ad6265SDimitry Andric #include "llvm/ADT/SetVector.h" 49fe6060f1SDimitry Andric #include "llvm/Analysis/TargetTransformInfo.h" 50fe6060f1SDimitry Andric #include "llvm/CodeGen/TargetLowering.h" 51fe6060f1SDimitry Andric #include "llvm/CodeGen/TargetPassConfig.h" 52fe6060f1SDimitry Andric #include "llvm/CodeGen/TargetSubtargetInfo.h" 53fe6060f1SDimitry Andric #include "llvm/IR/BasicBlock.h" 54fe6060f1SDimitry Andric #include "llvm/IR/Constant.h" 55fe6060f1SDimitry Andric #include "llvm/IR/Constants.h" 56fe6060f1SDimitry Andric #include "llvm/IR/DerivedTypes.h" 57fe6060f1SDimitry Andric #include "llvm/IR/Function.h" 58fe6060f1SDimitry Andric #include "llvm/IR/IRBuilder.h" 59fe6060f1SDimitry Andric #include "llvm/IR/InstIterator.h" 60fe6060f1SDimitry Andric #include "llvm/IR/InstrTypes.h" 61fe6060f1SDimitry Andric #include "llvm/IR/Instruction.h" 62fe6060f1SDimitry Andric #include "llvm/IR/Instructions.h" 63fe6060f1SDimitry Andric #include "llvm/IR/IntrinsicInst.h" 64fe6060f1SDimitry Andric #include "llvm/IR/Intrinsics.h" 65fe6060f1SDimitry Andric #include "llvm/IR/IntrinsicsARM.h" 66fe6060f1SDimitry Andric #include "llvm/IR/PatternMatch.h" 67fe6060f1SDimitry Andric #include "llvm/IR/Type.h" 68fe6060f1SDimitry Andric #include "llvm/IR/Value.h" 69fe6060f1SDimitry Andric #include "llvm/InitializePasses.h" 70fe6060f1SDimitry Andric #include "llvm/Pass.h" 71fe6060f1SDimitry Andric #include "llvm/Support/Casting.h" 72fe6060f1SDimitry Andric #include <algorithm> 73fe6060f1SDimitry Andric #include <cassert> 74fe6060f1SDimitry Andric 75fe6060f1SDimitry Andric using namespace llvm; 76fe6060f1SDimitry Andric 77fe6060f1SDimitry Andric #define DEBUG_TYPE "mve-laneinterleave" 78fe6060f1SDimitry Andric 79fe6060f1SDimitry Andric cl::opt<bool> EnableInterleave( 80fe6060f1SDimitry Andric "enable-mve-interleave", cl::Hidden, cl::init(true), 81fe6060f1SDimitry Andric cl::desc("Enable interleave MVE vector operation lowering")); 82fe6060f1SDimitry Andric 83fe6060f1SDimitry Andric namespace { 84fe6060f1SDimitry Andric 85fe6060f1SDimitry Andric class MVELaneInterleaving : public FunctionPass { 86fe6060f1SDimitry Andric public: 87fe6060f1SDimitry Andric static char ID; // Pass identification, replacement for typeid 88fe6060f1SDimitry Andric 89fe6060f1SDimitry Andric explicit MVELaneInterleaving() : FunctionPass(ID) { 90fe6060f1SDimitry Andric initializeMVELaneInterleavingPass(*PassRegistry::getPassRegistry()); 91fe6060f1SDimitry Andric } 92fe6060f1SDimitry Andric 93fe6060f1SDimitry Andric bool runOnFunction(Function &F) override; 94fe6060f1SDimitry Andric 95fe6060f1SDimitry Andric StringRef getPassName() const override { return "MVE lane interleaving"; } 96fe6060f1SDimitry Andric 97fe6060f1SDimitry Andric void getAnalysisUsage(AnalysisUsage &AU) const override { 98fe6060f1SDimitry Andric AU.setPreservesCFG(); 99fe6060f1SDimitry Andric AU.addRequired<TargetPassConfig>(); 100fe6060f1SDimitry Andric FunctionPass::getAnalysisUsage(AU); 101fe6060f1SDimitry Andric } 102fe6060f1SDimitry Andric }; 103fe6060f1SDimitry Andric 104fe6060f1SDimitry Andric } // end anonymous namespace 105fe6060f1SDimitry Andric 106fe6060f1SDimitry Andric char MVELaneInterleaving::ID = 0; 107fe6060f1SDimitry Andric 108fe6060f1SDimitry Andric INITIALIZE_PASS(MVELaneInterleaving, DEBUG_TYPE, "MVE lane interleaving", false, 109fe6060f1SDimitry Andric false) 110fe6060f1SDimitry Andric 111fe6060f1SDimitry Andric Pass *llvm::createMVELaneInterleavingPass() { 112fe6060f1SDimitry Andric return new MVELaneInterleaving(); 113fe6060f1SDimitry Andric } 114fe6060f1SDimitry Andric 115fe6060f1SDimitry Andric static bool isProfitableToInterleave(SmallSetVector<Instruction *, 4> &Exts, 116fe6060f1SDimitry Andric SmallSetVector<Instruction *, 4> &Truncs) { 117fe6060f1SDimitry Andric // This is not always beneficial to transform. Exts can be incorporated into 118fe6060f1SDimitry Andric // loads, Truncs can be folded into stores. 119fe6060f1SDimitry Andric // Truncs are usually the same number of instructions, 120fe6060f1SDimitry Andric // VSTRH.32(A);VSTRH.32(B) vs VSTRH.16(VMOVNT A, B) with interleaving 121fe6060f1SDimitry Andric // Exts are unfortunately more instructions in the general case: 122fe6060f1SDimitry Andric // A=VLDRH.32; B=VLDRH.32; 123fe6060f1SDimitry Andric // vs with interleaving: 124fe6060f1SDimitry Andric // T=VLDRH.16; A=VMOVNB T; B=VMOVNT T 125fe6060f1SDimitry Andric // But those VMOVL may be folded into a VMULL. 126fe6060f1SDimitry Andric 127fe6060f1SDimitry Andric // But expensive extends/truncs are always good to remove. FPExts always 128fe6060f1SDimitry Andric // involve extra VCVT's so are always considered to be beneficial to convert. 129fe6060f1SDimitry Andric for (auto *E : Exts) { 130fe6060f1SDimitry Andric if (isa<FPExtInst>(E) || !isa<LoadInst>(E->getOperand(0))) { 131fe6060f1SDimitry Andric LLVM_DEBUG(dbgs() << "Beneficial due to " << *E << "\n"); 132fe6060f1SDimitry Andric return true; 133fe6060f1SDimitry Andric } 134fe6060f1SDimitry Andric } 135fe6060f1SDimitry Andric for (auto *T : Truncs) { 136fe6060f1SDimitry Andric if (T->hasOneUse() && !isa<StoreInst>(*T->user_begin())) { 137fe6060f1SDimitry Andric LLVM_DEBUG(dbgs() << "Beneficial due to " << *T << "\n"); 138fe6060f1SDimitry Andric return true; 139fe6060f1SDimitry Andric } 140fe6060f1SDimitry Andric } 141fe6060f1SDimitry Andric 142fe6060f1SDimitry Andric // Otherwise, we know we have a load(ext), see if any of the Extends are a 143fe6060f1SDimitry Andric // vmull. This is a simple heuristic and certainly not perfect. 144fe6060f1SDimitry Andric for (auto *E : Exts) { 145fe6060f1SDimitry Andric if (!E->hasOneUse() || 146fe6060f1SDimitry Andric cast<Instruction>(*E->user_begin())->getOpcode() != Instruction::Mul) { 147fe6060f1SDimitry Andric LLVM_DEBUG(dbgs() << "Not beneficial due to " << *E << "\n"); 148fe6060f1SDimitry Andric return false; 149fe6060f1SDimitry Andric } 150fe6060f1SDimitry Andric } 151fe6060f1SDimitry Andric return true; 152fe6060f1SDimitry Andric } 153fe6060f1SDimitry Andric 154fe6060f1SDimitry Andric static bool tryInterleave(Instruction *Start, 155fe6060f1SDimitry Andric SmallPtrSetImpl<Instruction *> &Visited) { 156fe6060f1SDimitry Andric LLVM_DEBUG(dbgs() << "tryInterleave from " << *Start << "\n"); 157fe6060f1SDimitry Andric auto *VT = cast<FixedVectorType>(Start->getType()); 158fe6060f1SDimitry Andric 159fe6060f1SDimitry Andric if (!isa<Instruction>(Start->getOperand(0))) 160fe6060f1SDimitry Andric return false; 161fe6060f1SDimitry Andric 162fe6060f1SDimitry Andric // Look for connected operations starting from Ext's, terminating at Truncs. 163fe6060f1SDimitry Andric std::vector<Instruction *> Worklist; 164fe6060f1SDimitry Andric Worklist.push_back(Start); 165fe6060f1SDimitry Andric Worklist.push_back(cast<Instruction>(Start->getOperand(0))); 166fe6060f1SDimitry Andric 167fe6060f1SDimitry Andric SmallSetVector<Instruction *, 4> Truncs; 168fe6060f1SDimitry Andric SmallSetVector<Instruction *, 4> Exts; 169fe6060f1SDimitry Andric SmallSetVector<Use *, 4> OtherLeafs; 170fe6060f1SDimitry Andric SmallSetVector<Instruction *, 4> Ops; 171fe6060f1SDimitry Andric 172fe6060f1SDimitry Andric while (!Worklist.empty()) { 173fe6060f1SDimitry Andric Instruction *I = Worklist.back(); 174fe6060f1SDimitry Andric Worklist.pop_back(); 175fe6060f1SDimitry Andric 176fe6060f1SDimitry Andric switch (I->getOpcode()) { 177fe6060f1SDimitry Andric // Truncs 178fe6060f1SDimitry Andric case Instruction::Trunc: 179fe6060f1SDimitry Andric case Instruction::FPTrunc: 18081ad6265SDimitry Andric if (!Truncs.insert(I)) 181fe6060f1SDimitry Andric continue; 182fe6060f1SDimitry Andric Visited.insert(I); 183fe6060f1SDimitry Andric break; 184fe6060f1SDimitry Andric 185fe6060f1SDimitry Andric // Extend leafs 186fe6060f1SDimitry Andric case Instruction::SExt: 187fe6060f1SDimitry Andric case Instruction::ZExt: 188fe6060f1SDimitry Andric case Instruction::FPExt: 189fe6060f1SDimitry Andric if (Exts.count(I)) 190fe6060f1SDimitry Andric continue; 191fe6060f1SDimitry Andric for (auto *Use : I->users()) 192fe6060f1SDimitry Andric Worklist.push_back(cast<Instruction>(Use)); 193fe6060f1SDimitry Andric Exts.insert(I); 194fe6060f1SDimitry Andric break; 195fe6060f1SDimitry Andric 196fe6060f1SDimitry Andric case Instruction::Call: { 197fe6060f1SDimitry Andric IntrinsicInst *II = dyn_cast<IntrinsicInst>(I); 198fe6060f1SDimitry Andric if (!II) 199fe6060f1SDimitry Andric return false; 200fe6060f1SDimitry Andric 201fe6060f1SDimitry Andric switch (II->getIntrinsicID()) { 202fe6060f1SDimitry Andric case Intrinsic::abs: 203fe6060f1SDimitry Andric case Intrinsic::smin: 204fe6060f1SDimitry Andric case Intrinsic::smax: 205fe6060f1SDimitry Andric case Intrinsic::umin: 206fe6060f1SDimitry Andric case Intrinsic::umax: 207fe6060f1SDimitry Andric case Intrinsic::sadd_sat: 208fe6060f1SDimitry Andric case Intrinsic::ssub_sat: 209fe6060f1SDimitry Andric case Intrinsic::uadd_sat: 210fe6060f1SDimitry Andric case Intrinsic::usub_sat: 211fe6060f1SDimitry Andric case Intrinsic::minnum: 212fe6060f1SDimitry Andric case Intrinsic::maxnum: 213fe6060f1SDimitry Andric case Intrinsic::fabs: 214fe6060f1SDimitry Andric case Intrinsic::fma: 215fe6060f1SDimitry Andric case Intrinsic::ceil: 216fe6060f1SDimitry Andric case Intrinsic::floor: 217fe6060f1SDimitry Andric case Intrinsic::rint: 218fe6060f1SDimitry Andric case Intrinsic::round: 219fe6060f1SDimitry Andric case Intrinsic::trunc: 220fe6060f1SDimitry Andric break; 221fe6060f1SDimitry Andric default: 222fe6060f1SDimitry Andric return false; 223fe6060f1SDimitry Andric } 224*bdd1243dSDimitry Andric [[fallthrough]]; // Fall through to treating these like an operator below. 225fe6060f1SDimitry Andric } 226fe6060f1SDimitry Andric // Binary/tertiary ops 227fe6060f1SDimitry Andric case Instruction::Add: 228fe6060f1SDimitry Andric case Instruction::Sub: 229fe6060f1SDimitry Andric case Instruction::Mul: 230fe6060f1SDimitry Andric case Instruction::AShr: 231fe6060f1SDimitry Andric case Instruction::LShr: 232fe6060f1SDimitry Andric case Instruction::Shl: 233fe6060f1SDimitry Andric case Instruction::ICmp: 234fe6060f1SDimitry Andric case Instruction::FCmp: 235fe6060f1SDimitry Andric case Instruction::FAdd: 236fe6060f1SDimitry Andric case Instruction::FMul: 237fe6060f1SDimitry Andric case Instruction::Select: 23881ad6265SDimitry Andric if (!Ops.insert(I)) 239fe6060f1SDimitry Andric continue; 240fe6060f1SDimitry Andric 241fe6060f1SDimitry Andric for (Use &Op : I->operands()) { 242fe6060f1SDimitry Andric if (!isa<FixedVectorType>(Op->getType())) 243fe6060f1SDimitry Andric continue; 244fe6060f1SDimitry Andric if (isa<Instruction>(Op)) 245fe6060f1SDimitry Andric Worklist.push_back(cast<Instruction>(&Op)); 246fe6060f1SDimitry Andric else 247fe6060f1SDimitry Andric OtherLeafs.insert(&Op); 248fe6060f1SDimitry Andric } 249fe6060f1SDimitry Andric 250fe6060f1SDimitry Andric for (auto *Use : I->users()) 251fe6060f1SDimitry Andric Worklist.push_back(cast<Instruction>(Use)); 252fe6060f1SDimitry Andric break; 253fe6060f1SDimitry Andric 254fe6060f1SDimitry Andric case Instruction::ShuffleVector: 255fe6060f1SDimitry Andric // A shuffle of a splat is a splat. 256fe6060f1SDimitry Andric if (cast<ShuffleVectorInst>(I)->isZeroEltSplat()) 257fe6060f1SDimitry Andric continue; 258*bdd1243dSDimitry Andric [[fallthrough]]; 259fe6060f1SDimitry Andric 260fe6060f1SDimitry Andric default: 261fe6060f1SDimitry Andric LLVM_DEBUG(dbgs() << " Unhandled instruction: " << *I << "\n"); 262fe6060f1SDimitry Andric return false; 263fe6060f1SDimitry Andric } 264fe6060f1SDimitry Andric } 265fe6060f1SDimitry Andric 266fe6060f1SDimitry Andric if (Exts.empty() && OtherLeafs.empty()) 267fe6060f1SDimitry Andric return false; 268fe6060f1SDimitry Andric 269fe6060f1SDimitry Andric LLVM_DEBUG({ 270fe6060f1SDimitry Andric dbgs() << "Found group:\n Exts:"; 271fe6060f1SDimitry Andric for (auto *I : Exts) 272fe6060f1SDimitry Andric dbgs() << " " << *I << "\n"; 273fe6060f1SDimitry Andric dbgs() << " Ops:"; 274fe6060f1SDimitry Andric for (auto *I : Ops) 275fe6060f1SDimitry Andric dbgs() << " " << *I << "\n"; 276fe6060f1SDimitry Andric dbgs() << " OtherLeafs:"; 277fe6060f1SDimitry Andric for (auto *I : OtherLeafs) 278fe6060f1SDimitry Andric dbgs() << " " << *I->get() << " of " << *I->getUser() << "\n"; 279fe6060f1SDimitry Andric dbgs() << "Truncs:"; 280fe6060f1SDimitry Andric for (auto *I : Truncs) 281fe6060f1SDimitry Andric dbgs() << " " << *I << "\n"; 282fe6060f1SDimitry Andric }); 283fe6060f1SDimitry Andric 284fe6060f1SDimitry Andric assert(!Truncs.empty() && "Expected some truncs"); 285fe6060f1SDimitry Andric 286fe6060f1SDimitry Andric // Check types 287fe6060f1SDimitry Andric unsigned NumElts = VT->getNumElements(); 288fe6060f1SDimitry Andric unsigned BaseElts = VT->getScalarSizeInBits() == 16 289fe6060f1SDimitry Andric ? 8 290fe6060f1SDimitry Andric : (VT->getScalarSizeInBits() == 8 ? 16 : 0); 291fe6060f1SDimitry Andric if (BaseElts == 0 || NumElts % BaseElts != 0) { 292fe6060f1SDimitry Andric LLVM_DEBUG(dbgs() << " Type is unsupported\n"); 293fe6060f1SDimitry Andric return false; 294fe6060f1SDimitry Andric } 295fe6060f1SDimitry Andric if (Start->getOperand(0)->getType()->getScalarSizeInBits() != 296fe6060f1SDimitry Andric VT->getScalarSizeInBits() * 2) { 297fe6060f1SDimitry Andric LLVM_DEBUG(dbgs() << " Type not double sized\n"); 298fe6060f1SDimitry Andric return false; 299fe6060f1SDimitry Andric } 300fe6060f1SDimitry Andric for (Instruction *I : Exts) 301fe6060f1SDimitry Andric if (I->getOperand(0)->getType() != VT) { 302fe6060f1SDimitry Andric LLVM_DEBUG(dbgs() << " Wrong type on " << *I << "\n"); 303fe6060f1SDimitry Andric return false; 304fe6060f1SDimitry Andric } 305fe6060f1SDimitry Andric for (Instruction *I : Truncs) 306fe6060f1SDimitry Andric if (I->getType() != VT) { 307fe6060f1SDimitry Andric LLVM_DEBUG(dbgs() << " Wrong type on " << *I << "\n"); 308fe6060f1SDimitry Andric return false; 309fe6060f1SDimitry Andric } 310fe6060f1SDimitry Andric 311fe6060f1SDimitry Andric // Check that it looks beneficial 312fe6060f1SDimitry Andric if (!isProfitableToInterleave(Exts, Truncs)) 313fe6060f1SDimitry Andric return false; 314fe6060f1SDimitry Andric 315fe6060f1SDimitry Andric // Create new shuffles around the extends / truncs / other leaves. 316fe6060f1SDimitry Andric IRBuilder<> Builder(Start); 317fe6060f1SDimitry Andric 318fe6060f1SDimitry Andric SmallVector<int, 16> LeafMask; 319fe6060f1SDimitry Andric SmallVector<int, 16> TruncMask; 320fe6060f1SDimitry Andric // LeafMask : 0, 2, 4, 6, 1, 3, 5, 7 8, 10, 12, 14, 9, 11, 13, 15 321fe6060f1SDimitry Andric // TruncMask: 0, 4, 1, 5, 2, 6, 3, 7 8, 12, 9, 13, 10, 14, 11, 15 322fe6060f1SDimitry Andric for (unsigned Base = 0; Base < NumElts; Base += BaseElts) { 323fe6060f1SDimitry Andric for (unsigned i = 0; i < BaseElts / 2; i++) 324fe6060f1SDimitry Andric LeafMask.push_back(Base + i * 2); 325fe6060f1SDimitry Andric for (unsigned i = 0; i < BaseElts / 2; i++) 326fe6060f1SDimitry Andric LeafMask.push_back(Base + i * 2 + 1); 327fe6060f1SDimitry Andric } 328fe6060f1SDimitry Andric for (unsigned Base = 0; Base < NumElts; Base += BaseElts) { 329fe6060f1SDimitry Andric for (unsigned i = 0; i < BaseElts / 2; i++) { 330fe6060f1SDimitry Andric TruncMask.push_back(Base + i); 331fe6060f1SDimitry Andric TruncMask.push_back(Base + i + BaseElts / 2); 332fe6060f1SDimitry Andric } 333fe6060f1SDimitry Andric } 334fe6060f1SDimitry Andric 335fe6060f1SDimitry Andric for (Instruction *I : Exts) { 336fe6060f1SDimitry Andric LLVM_DEBUG(dbgs() << "Replacing ext " << *I << "\n"); 337fe6060f1SDimitry Andric Builder.SetInsertPoint(I); 338fe6060f1SDimitry Andric Value *Shuffle = Builder.CreateShuffleVector(I->getOperand(0), LeafMask); 339fe6060f1SDimitry Andric bool FPext = isa<FPExtInst>(I); 340fe6060f1SDimitry Andric bool Sext = isa<SExtInst>(I); 341fe6060f1SDimitry Andric Value *Ext = FPext ? Builder.CreateFPExt(Shuffle, I->getType()) 342fe6060f1SDimitry Andric : Sext ? Builder.CreateSExt(Shuffle, I->getType()) 343fe6060f1SDimitry Andric : Builder.CreateZExt(Shuffle, I->getType()); 344fe6060f1SDimitry Andric I->replaceAllUsesWith(Ext); 345fe6060f1SDimitry Andric LLVM_DEBUG(dbgs() << " with " << *Shuffle << "\n"); 346fe6060f1SDimitry Andric } 347fe6060f1SDimitry Andric 348fe6060f1SDimitry Andric for (Use *I : OtherLeafs) { 349fe6060f1SDimitry Andric LLVM_DEBUG(dbgs() << "Replacing leaf " << *I << "\n"); 350fe6060f1SDimitry Andric Builder.SetInsertPoint(cast<Instruction>(I->getUser())); 351fe6060f1SDimitry Andric Value *Shuffle = Builder.CreateShuffleVector(I->get(), LeafMask); 352fe6060f1SDimitry Andric I->getUser()->setOperand(I->getOperandNo(), Shuffle); 353fe6060f1SDimitry Andric LLVM_DEBUG(dbgs() << " with " << *Shuffle << "\n"); 354fe6060f1SDimitry Andric } 355fe6060f1SDimitry Andric 356fe6060f1SDimitry Andric for (Instruction *I : Truncs) { 357fe6060f1SDimitry Andric LLVM_DEBUG(dbgs() << "Replacing trunc " << *I << "\n"); 358fe6060f1SDimitry Andric 359fe6060f1SDimitry Andric Builder.SetInsertPoint(I->getParent(), ++I->getIterator()); 360fe6060f1SDimitry Andric Value *Shuf = Builder.CreateShuffleVector(I, TruncMask); 361fe6060f1SDimitry Andric I->replaceAllUsesWith(Shuf); 362fe6060f1SDimitry Andric cast<Instruction>(Shuf)->setOperand(0, I); 363fe6060f1SDimitry Andric 364fe6060f1SDimitry Andric LLVM_DEBUG(dbgs() << " with " << *Shuf << "\n"); 365fe6060f1SDimitry Andric } 366fe6060f1SDimitry Andric 367fe6060f1SDimitry Andric return true; 368fe6060f1SDimitry Andric } 369fe6060f1SDimitry Andric 370fe6060f1SDimitry Andric bool MVELaneInterleaving::runOnFunction(Function &F) { 371fe6060f1SDimitry Andric if (!EnableInterleave) 372fe6060f1SDimitry Andric return false; 373fe6060f1SDimitry Andric auto &TPC = getAnalysis<TargetPassConfig>(); 374fe6060f1SDimitry Andric auto &TM = TPC.getTM<TargetMachine>(); 375fe6060f1SDimitry Andric auto *ST = &TM.getSubtarget<ARMSubtarget>(F); 376fe6060f1SDimitry Andric if (!ST->hasMVEIntegerOps()) 377fe6060f1SDimitry Andric return false; 378fe6060f1SDimitry Andric 379fe6060f1SDimitry Andric bool Changed = false; 380fe6060f1SDimitry Andric 381fe6060f1SDimitry Andric SmallPtrSet<Instruction *, 16> Visited; 382fe6060f1SDimitry Andric for (Instruction &I : reverse(instructions(F))) { 383fe6060f1SDimitry Andric if (I.getType()->isVectorTy() && 384fe6060f1SDimitry Andric (isa<TruncInst>(I) || isa<FPTruncInst>(I)) && !Visited.count(&I)) 385fe6060f1SDimitry Andric Changed |= tryInterleave(&I, Visited); 386fe6060f1SDimitry Andric } 387fe6060f1SDimitry Andric 388fe6060f1SDimitry Andric return Changed; 389fe6060f1SDimitry Andric } 390