xref: /freebsd-src/contrib/llvm-project/llvm/lib/Target/ARM/MVELaneInterleavingPass.cpp (revision 06c3fb2749bda94cb5201f81ffdb8fa6c3161b2e)
1fe6060f1SDimitry Andric //===- MVELaneInterleaving.cpp - Inverleave for MVE instructions ----------===//
2fe6060f1SDimitry Andric //
3fe6060f1SDimitry Andric // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4fe6060f1SDimitry Andric // See https://llvm.org/LICENSE.txt for license information.
5fe6060f1SDimitry Andric // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6fe6060f1SDimitry Andric //
7fe6060f1SDimitry Andric //===----------------------------------------------------------------------===//
8fe6060f1SDimitry Andric //
9fe6060f1SDimitry Andric // This pass interleaves around sext/zext/trunc instructions. MVE does not have
10fe6060f1SDimitry Andric // a single sext/zext or trunc instruction that takes the bottom half of a
11fe6060f1SDimitry Andric // vector and extends to a full width, like NEON has with MOVL. Instead it is
12fe6060f1SDimitry Andric // expected that this happens through top/bottom instructions. So the MVE
13fe6060f1SDimitry Andric // equivalent VMOVLT/B instructions take either the even or odd elements of the
14fe6060f1SDimitry Andric // input and extend them to the larger type, producing a vector with half the
15fe6060f1SDimitry Andric // number of elements each of double the bitwidth. As there is no simple
16fe6060f1SDimitry Andric // instruction, we often have to turn sext/zext/trunc into a series of lane
17fe6060f1SDimitry Andric // moves (or stack loads/stores, which we do not do yet).
18fe6060f1SDimitry Andric //
19fe6060f1SDimitry Andric // This pass takes vector code that starts at truncs, looks for interconnected
20fe6060f1SDimitry Andric // blobs of operations that end with sext/zext (or constants/splats) of the
21fe6060f1SDimitry Andric // form:
22fe6060f1SDimitry Andric //   %sa = sext v8i16 %a to v8i32
23fe6060f1SDimitry Andric //   %sb = sext v8i16 %b to v8i32
24fe6060f1SDimitry Andric //   %add = add v8i32 %sa, %sb
25fe6060f1SDimitry Andric //   %r = trunc %add to v8i16
26fe6060f1SDimitry Andric // And adds shuffles to allow the use of VMOVL/VMOVN instrctions:
27fe6060f1SDimitry Andric //   %sha = shuffle v8i16 %a, undef, <0, 2, 4, 6, 1, 3, 5, 7>
28fe6060f1SDimitry Andric //   %sa = sext v8i16 %sha to v8i32
29fe6060f1SDimitry Andric //   %shb = shuffle v8i16 %b, undef, <0, 2, 4, 6, 1, 3, 5, 7>
30fe6060f1SDimitry Andric //   %sb = sext v8i16 %shb to v8i32
31fe6060f1SDimitry Andric //   %add = add v8i32 %sa, %sb
32fe6060f1SDimitry Andric //   %r = trunc %add to v8i16
33fe6060f1SDimitry Andric //   %shr = shuffle v8i16 %r, undef, <0, 4, 1, 5, 2, 6, 3, 7>
34fe6060f1SDimitry Andric // Which can then be split and lowered to MVE instructions efficiently:
35fe6060f1SDimitry Andric //   %sa_b = VMOVLB.s16 %a
36fe6060f1SDimitry Andric //   %sa_t = VMOVLT.s16 %a
37fe6060f1SDimitry Andric //   %sb_b = VMOVLB.s16 %b
38fe6060f1SDimitry Andric //   %sb_t = VMOVLT.s16 %b
39fe6060f1SDimitry Andric //   %add_b = VADD.i32 %sa_b, %sb_b
40fe6060f1SDimitry Andric //   %add_t = VADD.i32 %sa_t, %sb_t
41fe6060f1SDimitry Andric //   %r = VMOVNT.i16 %add_b, %add_t
42fe6060f1SDimitry Andric //
43fe6060f1SDimitry Andric //===----------------------------------------------------------------------===//
44fe6060f1SDimitry Andric 
45fe6060f1SDimitry Andric #include "ARM.h"
46fe6060f1SDimitry Andric #include "ARMBaseInstrInfo.h"
47fe6060f1SDimitry Andric #include "ARMSubtarget.h"
4881ad6265SDimitry Andric #include "llvm/ADT/SetVector.h"
49fe6060f1SDimitry Andric #include "llvm/Analysis/TargetTransformInfo.h"
50fe6060f1SDimitry Andric #include "llvm/CodeGen/TargetLowering.h"
51fe6060f1SDimitry Andric #include "llvm/CodeGen/TargetPassConfig.h"
52fe6060f1SDimitry Andric #include "llvm/CodeGen/TargetSubtargetInfo.h"
53fe6060f1SDimitry Andric #include "llvm/IR/BasicBlock.h"
54fe6060f1SDimitry Andric #include "llvm/IR/Constant.h"
55fe6060f1SDimitry Andric #include "llvm/IR/Constants.h"
56fe6060f1SDimitry Andric #include "llvm/IR/DerivedTypes.h"
57fe6060f1SDimitry Andric #include "llvm/IR/Function.h"
58fe6060f1SDimitry Andric #include "llvm/IR/IRBuilder.h"
59fe6060f1SDimitry Andric #include "llvm/IR/InstIterator.h"
60fe6060f1SDimitry Andric #include "llvm/IR/InstrTypes.h"
61fe6060f1SDimitry Andric #include "llvm/IR/Instruction.h"
62fe6060f1SDimitry Andric #include "llvm/IR/Instructions.h"
63fe6060f1SDimitry Andric #include "llvm/IR/IntrinsicInst.h"
64fe6060f1SDimitry Andric #include "llvm/IR/Intrinsics.h"
65fe6060f1SDimitry Andric #include "llvm/IR/IntrinsicsARM.h"
66fe6060f1SDimitry Andric #include "llvm/IR/PatternMatch.h"
67fe6060f1SDimitry Andric #include "llvm/IR/Type.h"
68fe6060f1SDimitry Andric #include "llvm/IR/Value.h"
69fe6060f1SDimitry Andric #include "llvm/InitializePasses.h"
70fe6060f1SDimitry Andric #include "llvm/Pass.h"
71fe6060f1SDimitry Andric #include "llvm/Support/Casting.h"
72fe6060f1SDimitry Andric #include <algorithm>
73fe6060f1SDimitry Andric #include <cassert>
74fe6060f1SDimitry Andric 
75fe6060f1SDimitry Andric using namespace llvm;
76fe6060f1SDimitry Andric 
77fe6060f1SDimitry Andric #define DEBUG_TYPE "mve-laneinterleave"
78fe6060f1SDimitry Andric 
79fe6060f1SDimitry Andric cl::opt<bool> EnableInterleave(
80fe6060f1SDimitry Andric     "enable-mve-interleave", cl::Hidden, cl::init(true),
81fe6060f1SDimitry Andric     cl::desc("Enable interleave MVE vector operation lowering"));
82fe6060f1SDimitry Andric 
83fe6060f1SDimitry Andric namespace {
84fe6060f1SDimitry Andric 
85fe6060f1SDimitry Andric class MVELaneInterleaving : public FunctionPass {
86fe6060f1SDimitry Andric public:
87fe6060f1SDimitry Andric   static char ID; // Pass identification, replacement for typeid
88fe6060f1SDimitry Andric 
MVELaneInterleaving()89fe6060f1SDimitry Andric   explicit MVELaneInterleaving() : FunctionPass(ID) {
90fe6060f1SDimitry Andric     initializeMVELaneInterleavingPass(*PassRegistry::getPassRegistry());
91fe6060f1SDimitry Andric   }
92fe6060f1SDimitry Andric 
93fe6060f1SDimitry Andric   bool runOnFunction(Function &F) override;
94fe6060f1SDimitry Andric 
getPassName() const95fe6060f1SDimitry Andric   StringRef getPassName() const override { return "MVE lane interleaving"; }
96fe6060f1SDimitry Andric 
getAnalysisUsage(AnalysisUsage & AU) const97fe6060f1SDimitry Andric   void getAnalysisUsage(AnalysisUsage &AU) const override {
98fe6060f1SDimitry Andric     AU.setPreservesCFG();
99fe6060f1SDimitry Andric     AU.addRequired<TargetPassConfig>();
100fe6060f1SDimitry Andric     FunctionPass::getAnalysisUsage(AU);
101fe6060f1SDimitry Andric   }
102fe6060f1SDimitry Andric };
103fe6060f1SDimitry Andric 
104fe6060f1SDimitry Andric } // end anonymous namespace
105fe6060f1SDimitry Andric 
106fe6060f1SDimitry Andric char MVELaneInterleaving::ID = 0;
107fe6060f1SDimitry Andric 
108fe6060f1SDimitry Andric INITIALIZE_PASS(MVELaneInterleaving, DEBUG_TYPE, "MVE lane interleaving", false,
109fe6060f1SDimitry Andric                 false)
110fe6060f1SDimitry Andric 
createMVELaneInterleavingPass()111fe6060f1SDimitry Andric Pass *llvm::createMVELaneInterleavingPass() {
112fe6060f1SDimitry Andric   return new MVELaneInterleaving();
113fe6060f1SDimitry Andric }
114fe6060f1SDimitry Andric 
isProfitableToInterleave(SmallSetVector<Instruction *,4> & Exts,SmallSetVector<Instruction *,4> & Truncs)115fe6060f1SDimitry Andric static bool isProfitableToInterleave(SmallSetVector<Instruction *, 4> &Exts,
116fe6060f1SDimitry Andric                                      SmallSetVector<Instruction *, 4> &Truncs) {
117fe6060f1SDimitry Andric   // This is not always beneficial to transform. Exts can be incorporated into
118fe6060f1SDimitry Andric   // loads, Truncs can be folded into stores.
119fe6060f1SDimitry Andric   // Truncs are usually the same number of instructions,
120fe6060f1SDimitry Andric   //  VSTRH.32(A);VSTRH.32(B) vs VSTRH.16(VMOVNT A, B) with interleaving
121fe6060f1SDimitry Andric   // Exts are unfortunately more instructions in the general case:
122fe6060f1SDimitry Andric   //  A=VLDRH.32; B=VLDRH.32;
123fe6060f1SDimitry Andric   // vs with interleaving:
124fe6060f1SDimitry Andric   //  T=VLDRH.16; A=VMOVNB T; B=VMOVNT T
125fe6060f1SDimitry Andric   // But those VMOVL may be folded into a VMULL.
126fe6060f1SDimitry Andric 
127fe6060f1SDimitry Andric   // But expensive extends/truncs are always good to remove. FPExts always
128fe6060f1SDimitry Andric   // involve extra VCVT's so are always considered to be beneficial to convert.
129fe6060f1SDimitry Andric   for (auto *E : Exts) {
130fe6060f1SDimitry Andric     if (isa<FPExtInst>(E) || !isa<LoadInst>(E->getOperand(0))) {
131fe6060f1SDimitry Andric       LLVM_DEBUG(dbgs() << "Beneficial due to " << *E << "\n");
132fe6060f1SDimitry Andric       return true;
133fe6060f1SDimitry Andric     }
134fe6060f1SDimitry Andric   }
135fe6060f1SDimitry Andric   for (auto *T : Truncs) {
136fe6060f1SDimitry Andric     if (T->hasOneUse() && !isa<StoreInst>(*T->user_begin())) {
137fe6060f1SDimitry Andric       LLVM_DEBUG(dbgs() << "Beneficial due to " << *T << "\n");
138fe6060f1SDimitry Andric       return true;
139fe6060f1SDimitry Andric     }
140fe6060f1SDimitry Andric   }
141fe6060f1SDimitry Andric 
142fe6060f1SDimitry Andric   // Otherwise, we know we have a load(ext), see if any of the Extends are a
143fe6060f1SDimitry Andric   // vmull. This is a simple heuristic and certainly not perfect.
144fe6060f1SDimitry Andric   for (auto *E : Exts) {
145fe6060f1SDimitry Andric     if (!E->hasOneUse() ||
146fe6060f1SDimitry Andric         cast<Instruction>(*E->user_begin())->getOpcode() != Instruction::Mul) {
147fe6060f1SDimitry Andric       LLVM_DEBUG(dbgs() << "Not beneficial due to " << *E << "\n");
148fe6060f1SDimitry Andric       return false;
149fe6060f1SDimitry Andric     }
150fe6060f1SDimitry Andric   }
151fe6060f1SDimitry Andric   return true;
152fe6060f1SDimitry Andric }
153fe6060f1SDimitry Andric 
tryInterleave(Instruction * Start,SmallPtrSetImpl<Instruction * > & Visited)154fe6060f1SDimitry Andric static bool tryInterleave(Instruction *Start,
155fe6060f1SDimitry Andric                           SmallPtrSetImpl<Instruction *> &Visited) {
156fe6060f1SDimitry Andric   LLVM_DEBUG(dbgs() << "tryInterleave from " << *Start << "\n");
157fe6060f1SDimitry Andric 
158fe6060f1SDimitry Andric   if (!isa<Instruction>(Start->getOperand(0)))
159fe6060f1SDimitry Andric     return false;
160fe6060f1SDimitry Andric 
161fe6060f1SDimitry Andric   // Look for connected operations starting from Ext's, terminating at Truncs.
162fe6060f1SDimitry Andric   std::vector<Instruction *> Worklist;
163fe6060f1SDimitry Andric   Worklist.push_back(Start);
164fe6060f1SDimitry Andric   Worklist.push_back(cast<Instruction>(Start->getOperand(0)));
165fe6060f1SDimitry Andric 
166fe6060f1SDimitry Andric   SmallSetVector<Instruction *, 4> Truncs;
167*06c3fb27SDimitry Andric   SmallSetVector<Instruction *, 4> Reducts;
168fe6060f1SDimitry Andric   SmallSetVector<Instruction *, 4> Exts;
169fe6060f1SDimitry Andric   SmallSetVector<Use *, 4> OtherLeafs;
170fe6060f1SDimitry Andric   SmallSetVector<Instruction *, 4> Ops;
171fe6060f1SDimitry Andric 
172fe6060f1SDimitry Andric   while (!Worklist.empty()) {
173fe6060f1SDimitry Andric     Instruction *I = Worklist.back();
174fe6060f1SDimitry Andric     Worklist.pop_back();
175fe6060f1SDimitry Andric 
176fe6060f1SDimitry Andric     switch (I->getOpcode()) {
177fe6060f1SDimitry Andric     // Truncs
178fe6060f1SDimitry Andric     case Instruction::Trunc:
179fe6060f1SDimitry Andric     case Instruction::FPTrunc:
18081ad6265SDimitry Andric       if (!Truncs.insert(I))
181fe6060f1SDimitry Andric         continue;
182fe6060f1SDimitry Andric       Visited.insert(I);
183fe6060f1SDimitry Andric       break;
184fe6060f1SDimitry Andric 
185fe6060f1SDimitry Andric     // Extend leafs
186fe6060f1SDimitry Andric     case Instruction::SExt:
187fe6060f1SDimitry Andric     case Instruction::ZExt:
188fe6060f1SDimitry Andric     case Instruction::FPExt:
189fe6060f1SDimitry Andric       if (Exts.count(I))
190fe6060f1SDimitry Andric         continue;
191fe6060f1SDimitry Andric       for (auto *Use : I->users())
192fe6060f1SDimitry Andric         Worklist.push_back(cast<Instruction>(Use));
193fe6060f1SDimitry Andric       Exts.insert(I);
194fe6060f1SDimitry Andric       break;
195fe6060f1SDimitry Andric 
196fe6060f1SDimitry Andric     case Instruction::Call: {
197fe6060f1SDimitry Andric       IntrinsicInst *II = dyn_cast<IntrinsicInst>(I);
198fe6060f1SDimitry Andric       if (!II)
199fe6060f1SDimitry Andric         return false;
200fe6060f1SDimitry Andric 
201*06c3fb27SDimitry Andric       if (II->getIntrinsicID() == Intrinsic::vector_reduce_add) {
202*06c3fb27SDimitry Andric         if (!Reducts.insert(I))
203*06c3fb27SDimitry Andric           continue;
204*06c3fb27SDimitry Andric         Visited.insert(I);
205*06c3fb27SDimitry Andric         break;
206*06c3fb27SDimitry Andric       }
207*06c3fb27SDimitry Andric 
208fe6060f1SDimitry Andric       switch (II->getIntrinsicID()) {
209fe6060f1SDimitry Andric       case Intrinsic::abs:
210fe6060f1SDimitry Andric       case Intrinsic::smin:
211fe6060f1SDimitry Andric       case Intrinsic::smax:
212fe6060f1SDimitry Andric       case Intrinsic::umin:
213fe6060f1SDimitry Andric       case Intrinsic::umax:
214fe6060f1SDimitry Andric       case Intrinsic::sadd_sat:
215fe6060f1SDimitry Andric       case Intrinsic::ssub_sat:
216fe6060f1SDimitry Andric       case Intrinsic::uadd_sat:
217fe6060f1SDimitry Andric       case Intrinsic::usub_sat:
218fe6060f1SDimitry Andric       case Intrinsic::minnum:
219fe6060f1SDimitry Andric       case Intrinsic::maxnum:
220fe6060f1SDimitry Andric       case Intrinsic::fabs:
221fe6060f1SDimitry Andric       case Intrinsic::fma:
222fe6060f1SDimitry Andric       case Intrinsic::ceil:
223fe6060f1SDimitry Andric       case Intrinsic::floor:
224fe6060f1SDimitry Andric       case Intrinsic::rint:
225fe6060f1SDimitry Andric       case Intrinsic::round:
226fe6060f1SDimitry Andric       case Intrinsic::trunc:
227fe6060f1SDimitry Andric         break;
228fe6060f1SDimitry Andric       default:
229fe6060f1SDimitry Andric         return false;
230fe6060f1SDimitry Andric       }
231bdd1243dSDimitry Andric       [[fallthrough]]; // Fall through to treating these like an operator below.
232fe6060f1SDimitry Andric     }
233fe6060f1SDimitry Andric     // Binary/tertiary ops
234fe6060f1SDimitry Andric     case Instruction::Add:
235fe6060f1SDimitry Andric     case Instruction::Sub:
236fe6060f1SDimitry Andric     case Instruction::Mul:
237fe6060f1SDimitry Andric     case Instruction::AShr:
238fe6060f1SDimitry Andric     case Instruction::LShr:
239fe6060f1SDimitry Andric     case Instruction::Shl:
240fe6060f1SDimitry Andric     case Instruction::ICmp:
241fe6060f1SDimitry Andric     case Instruction::FCmp:
242fe6060f1SDimitry Andric     case Instruction::FAdd:
243fe6060f1SDimitry Andric     case Instruction::FMul:
244fe6060f1SDimitry Andric     case Instruction::Select:
24581ad6265SDimitry Andric       if (!Ops.insert(I))
246fe6060f1SDimitry Andric         continue;
247fe6060f1SDimitry Andric 
248fe6060f1SDimitry Andric       for (Use &Op : I->operands()) {
249fe6060f1SDimitry Andric         if (!isa<FixedVectorType>(Op->getType()))
250fe6060f1SDimitry Andric           continue;
251fe6060f1SDimitry Andric         if (isa<Instruction>(Op))
252fe6060f1SDimitry Andric           Worklist.push_back(cast<Instruction>(&Op));
253fe6060f1SDimitry Andric         else
254fe6060f1SDimitry Andric           OtherLeafs.insert(&Op);
255fe6060f1SDimitry Andric       }
256fe6060f1SDimitry Andric 
257fe6060f1SDimitry Andric       for (auto *Use : I->users())
258fe6060f1SDimitry Andric         Worklist.push_back(cast<Instruction>(Use));
259fe6060f1SDimitry Andric       break;
260fe6060f1SDimitry Andric 
261fe6060f1SDimitry Andric     case Instruction::ShuffleVector:
262fe6060f1SDimitry Andric       // A shuffle of a splat is a splat.
263fe6060f1SDimitry Andric       if (cast<ShuffleVectorInst>(I)->isZeroEltSplat())
264fe6060f1SDimitry Andric         continue;
265bdd1243dSDimitry Andric       [[fallthrough]];
266fe6060f1SDimitry Andric 
267fe6060f1SDimitry Andric     default:
268fe6060f1SDimitry Andric       LLVM_DEBUG(dbgs() << "  Unhandled instruction: " << *I << "\n");
269fe6060f1SDimitry Andric       return false;
270fe6060f1SDimitry Andric     }
271fe6060f1SDimitry Andric   }
272fe6060f1SDimitry Andric 
273fe6060f1SDimitry Andric   if (Exts.empty() && OtherLeafs.empty())
274fe6060f1SDimitry Andric     return false;
275fe6060f1SDimitry Andric 
276fe6060f1SDimitry Andric   LLVM_DEBUG({
277*06c3fb27SDimitry Andric     dbgs() << "Found group:\n  Exts:\n";
278fe6060f1SDimitry Andric     for (auto *I : Exts)
279fe6060f1SDimitry Andric       dbgs() << "  " << *I << "\n";
280*06c3fb27SDimitry Andric     dbgs() << "  Ops:\n";
281fe6060f1SDimitry Andric     for (auto *I : Ops)
282fe6060f1SDimitry Andric       dbgs() << "  " << *I << "\n";
283*06c3fb27SDimitry Andric     dbgs() << "  OtherLeafs:\n";
284fe6060f1SDimitry Andric     for (auto *I : OtherLeafs)
285fe6060f1SDimitry Andric       dbgs() << "  " << *I->get() << " of " << *I->getUser() << "\n";
286*06c3fb27SDimitry Andric     dbgs() << "  Truncs:\n";
287fe6060f1SDimitry Andric     for (auto *I : Truncs)
288fe6060f1SDimitry Andric       dbgs() << "  " << *I << "\n";
289*06c3fb27SDimitry Andric     dbgs() << "  Reducts:\n";
290*06c3fb27SDimitry Andric     for (auto *I : Reducts)
291*06c3fb27SDimitry Andric       dbgs() << "  " << *I << "\n";
292fe6060f1SDimitry Andric   });
293fe6060f1SDimitry Andric 
294*06c3fb27SDimitry Andric   assert((!Truncs.empty() || !Reducts.empty()) &&
295*06c3fb27SDimitry Andric          "Expected some truncs or reductions");
296*06c3fb27SDimitry Andric   if (Truncs.empty() && Exts.empty())
297*06c3fb27SDimitry Andric     return false;
298*06c3fb27SDimitry Andric 
299*06c3fb27SDimitry Andric   auto *VT = !Truncs.empty()
300*06c3fb27SDimitry Andric                  ? cast<FixedVectorType>(Truncs[0]->getType())
301*06c3fb27SDimitry Andric                  : cast<FixedVectorType>(Exts[0]->getOperand(0)->getType());
302*06c3fb27SDimitry Andric   LLVM_DEBUG(dbgs() << "Using VT:" << *VT << "\n");
303fe6060f1SDimitry Andric 
304fe6060f1SDimitry Andric   // Check types
305fe6060f1SDimitry Andric   unsigned NumElts = VT->getNumElements();
306fe6060f1SDimitry Andric   unsigned BaseElts = VT->getScalarSizeInBits() == 16
307fe6060f1SDimitry Andric                           ? 8
308fe6060f1SDimitry Andric                           : (VT->getScalarSizeInBits() == 8 ? 16 : 0);
309fe6060f1SDimitry Andric   if (BaseElts == 0 || NumElts % BaseElts != 0) {
310fe6060f1SDimitry Andric     LLVM_DEBUG(dbgs() << "  Type is unsupported\n");
311fe6060f1SDimitry Andric     return false;
312fe6060f1SDimitry Andric   }
313fe6060f1SDimitry Andric   if (Start->getOperand(0)->getType()->getScalarSizeInBits() !=
314fe6060f1SDimitry Andric       VT->getScalarSizeInBits() * 2) {
315fe6060f1SDimitry Andric     LLVM_DEBUG(dbgs() << "  Type not double sized\n");
316fe6060f1SDimitry Andric     return false;
317fe6060f1SDimitry Andric   }
318fe6060f1SDimitry Andric   for (Instruction *I : Exts)
319fe6060f1SDimitry Andric     if (I->getOperand(0)->getType() != VT) {
320fe6060f1SDimitry Andric       LLVM_DEBUG(dbgs() << "  Wrong type on " << *I << "\n");
321fe6060f1SDimitry Andric       return false;
322fe6060f1SDimitry Andric     }
323fe6060f1SDimitry Andric   for (Instruction *I : Truncs)
324fe6060f1SDimitry Andric     if (I->getType() != VT) {
325fe6060f1SDimitry Andric       LLVM_DEBUG(dbgs() << "  Wrong type on " << *I << "\n");
326fe6060f1SDimitry Andric       return false;
327fe6060f1SDimitry Andric     }
328fe6060f1SDimitry Andric 
329fe6060f1SDimitry Andric   // Check that it looks beneficial
330fe6060f1SDimitry Andric   if (!isProfitableToInterleave(Exts, Truncs))
331fe6060f1SDimitry Andric     return false;
332*06c3fb27SDimitry Andric   if (!Reducts.empty() && (Ops.empty() || all_of(Ops, [](Instruction *I) {
333*06c3fb27SDimitry Andric                              return I->getOpcode() == Instruction::Mul ||
334*06c3fb27SDimitry Andric                                     I->getOpcode() == Instruction::Select ||
335*06c3fb27SDimitry Andric                                     I->getOpcode() == Instruction::ICmp;
336*06c3fb27SDimitry Andric                            }))) {
337*06c3fb27SDimitry Andric     LLVM_DEBUG(dbgs() << "Reduction does not look profitable\n");
338*06c3fb27SDimitry Andric     return false;
339*06c3fb27SDimitry Andric   }
340fe6060f1SDimitry Andric 
341fe6060f1SDimitry Andric   // Create new shuffles around the extends / truncs / other leaves.
342fe6060f1SDimitry Andric   IRBuilder<> Builder(Start);
343fe6060f1SDimitry Andric 
344fe6060f1SDimitry Andric   SmallVector<int, 16> LeafMask;
345fe6060f1SDimitry Andric   SmallVector<int, 16> TruncMask;
346fe6060f1SDimitry Andric   // LeafMask : 0, 2, 4, 6, 1, 3, 5, 7   8, 10, 12, 14,  9, 11, 13, 15
347fe6060f1SDimitry Andric   // TruncMask: 0, 4, 1, 5, 2, 6, 3, 7   8, 12,  9, 13, 10, 14, 11, 15
348fe6060f1SDimitry Andric   for (unsigned Base = 0; Base < NumElts; Base += BaseElts) {
349fe6060f1SDimitry Andric     for (unsigned i = 0; i < BaseElts / 2; i++)
350fe6060f1SDimitry Andric       LeafMask.push_back(Base + i * 2);
351fe6060f1SDimitry Andric     for (unsigned i = 0; i < BaseElts / 2; i++)
352fe6060f1SDimitry Andric       LeafMask.push_back(Base + i * 2 + 1);
353fe6060f1SDimitry Andric   }
354fe6060f1SDimitry Andric   for (unsigned Base = 0; Base < NumElts; Base += BaseElts) {
355fe6060f1SDimitry Andric     for (unsigned i = 0; i < BaseElts / 2; i++) {
356fe6060f1SDimitry Andric       TruncMask.push_back(Base + i);
357fe6060f1SDimitry Andric       TruncMask.push_back(Base + i + BaseElts / 2);
358fe6060f1SDimitry Andric     }
359fe6060f1SDimitry Andric   }
360fe6060f1SDimitry Andric 
361fe6060f1SDimitry Andric   for (Instruction *I : Exts) {
362fe6060f1SDimitry Andric     LLVM_DEBUG(dbgs() << "Replacing ext " << *I << "\n");
363fe6060f1SDimitry Andric     Builder.SetInsertPoint(I);
364fe6060f1SDimitry Andric     Value *Shuffle = Builder.CreateShuffleVector(I->getOperand(0), LeafMask);
365fe6060f1SDimitry Andric     bool FPext = isa<FPExtInst>(I);
366fe6060f1SDimitry Andric     bool Sext = isa<SExtInst>(I);
367fe6060f1SDimitry Andric     Value *Ext = FPext ? Builder.CreateFPExt(Shuffle, I->getType())
368fe6060f1SDimitry Andric                        : Sext ? Builder.CreateSExt(Shuffle, I->getType())
369fe6060f1SDimitry Andric                               : Builder.CreateZExt(Shuffle, I->getType());
370fe6060f1SDimitry Andric     I->replaceAllUsesWith(Ext);
371fe6060f1SDimitry Andric     LLVM_DEBUG(dbgs() << "  with " << *Shuffle << "\n");
372fe6060f1SDimitry Andric   }
373fe6060f1SDimitry Andric 
374fe6060f1SDimitry Andric   for (Use *I : OtherLeafs) {
375fe6060f1SDimitry Andric     LLVM_DEBUG(dbgs() << "Replacing leaf " << *I << "\n");
376fe6060f1SDimitry Andric     Builder.SetInsertPoint(cast<Instruction>(I->getUser()));
377fe6060f1SDimitry Andric     Value *Shuffle = Builder.CreateShuffleVector(I->get(), LeafMask);
378fe6060f1SDimitry Andric     I->getUser()->setOperand(I->getOperandNo(), Shuffle);
379fe6060f1SDimitry Andric     LLVM_DEBUG(dbgs() << "  with " << *Shuffle << "\n");
380fe6060f1SDimitry Andric   }
381fe6060f1SDimitry Andric 
382fe6060f1SDimitry Andric   for (Instruction *I : Truncs) {
383fe6060f1SDimitry Andric     LLVM_DEBUG(dbgs() << "Replacing trunc " << *I << "\n");
384fe6060f1SDimitry Andric 
385fe6060f1SDimitry Andric     Builder.SetInsertPoint(I->getParent(), ++I->getIterator());
386fe6060f1SDimitry Andric     Value *Shuf = Builder.CreateShuffleVector(I, TruncMask);
387fe6060f1SDimitry Andric     I->replaceAllUsesWith(Shuf);
388fe6060f1SDimitry Andric     cast<Instruction>(Shuf)->setOperand(0, I);
389fe6060f1SDimitry Andric 
390fe6060f1SDimitry Andric     LLVM_DEBUG(dbgs() << "  with " << *Shuf << "\n");
391fe6060f1SDimitry Andric   }
392fe6060f1SDimitry Andric 
393fe6060f1SDimitry Andric   return true;
394fe6060f1SDimitry Andric }
395fe6060f1SDimitry Andric 
396*06c3fb27SDimitry Andric // Add reductions are fairly common and associative, meaning we can start the
397*06c3fb27SDimitry Andric // interleaving from them and don't need to emit a shuffle.
isAddReduction(Instruction & I)398*06c3fb27SDimitry Andric static bool isAddReduction(Instruction &I) {
399*06c3fb27SDimitry Andric   if (auto *II = dyn_cast<IntrinsicInst>(&I))
400*06c3fb27SDimitry Andric     return II->getIntrinsicID() == Intrinsic::vector_reduce_add;
401*06c3fb27SDimitry Andric   return false;
402*06c3fb27SDimitry Andric }
403*06c3fb27SDimitry Andric 
runOnFunction(Function & F)404fe6060f1SDimitry Andric bool MVELaneInterleaving::runOnFunction(Function &F) {
405fe6060f1SDimitry Andric   if (!EnableInterleave)
406fe6060f1SDimitry Andric     return false;
407fe6060f1SDimitry Andric   auto &TPC = getAnalysis<TargetPassConfig>();
408fe6060f1SDimitry Andric   auto &TM = TPC.getTM<TargetMachine>();
409fe6060f1SDimitry Andric   auto *ST = &TM.getSubtarget<ARMSubtarget>(F);
410fe6060f1SDimitry Andric   if (!ST->hasMVEIntegerOps())
411fe6060f1SDimitry Andric     return false;
412fe6060f1SDimitry Andric 
413fe6060f1SDimitry Andric   bool Changed = false;
414fe6060f1SDimitry Andric 
415fe6060f1SDimitry Andric   SmallPtrSet<Instruction *, 16> Visited;
416fe6060f1SDimitry Andric   for (Instruction &I : reverse(instructions(F))) {
417*06c3fb27SDimitry Andric     if (((I.getType()->isVectorTy() &&
418*06c3fb27SDimitry Andric           (isa<TruncInst>(I) || isa<FPTruncInst>(I))) ||
419*06c3fb27SDimitry Andric          isAddReduction(I)) &&
420*06c3fb27SDimitry Andric         !Visited.count(&I))
421fe6060f1SDimitry Andric       Changed |= tryInterleave(&I, Visited);
422fe6060f1SDimitry Andric   }
423fe6060f1SDimitry Andric 
424fe6060f1SDimitry Andric   return Changed;
425fe6060f1SDimitry Andric }
426