xref: /freebsd-src/contrib/llvm-project/llvm/lib/Target/ARM/MVETPAndVPTOptimisationsPass.cpp (revision 0fca6ea1d4eea4c934cfff25ac9ee8ad6fe95583)
1fe6060f1SDimitry Andric //===-- MVETPAndVPTOptimisationsPass.cpp ----------------------------------===//
2fe6060f1SDimitry Andric //
3fe6060f1SDimitry Andric // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4fe6060f1SDimitry Andric // See https://llvm.org/LICENSE.txt for license information.
5fe6060f1SDimitry Andric // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6fe6060f1SDimitry Andric //
7fe6060f1SDimitry Andric //===----------------------------------------------------------------------===//
8fe6060f1SDimitry Andric //
9fe6060f1SDimitry Andric /// \file This pass does a few optimisations related to Tail predicated loops
10fe6060f1SDimitry Andric /// and MVE VPT blocks before register allocation is performed. For VPT blocks
11fe6060f1SDimitry Andric /// the goal is to maximize the sizes of the blocks that will be created by the
12fe6060f1SDimitry Andric /// MVE VPT Block Insertion pass (which runs after register allocation). For
13fe6060f1SDimitry Andric /// tail predicated loops we transform the loop into something that will
14fe6060f1SDimitry Andric /// hopefully make the backend ARMLowOverheadLoops pass's job easier.
15fe6060f1SDimitry Andric ///
16fe6060f1SDimitry Andric //===----------------------------------------------------------------------===//
17fe6060f1SDimitry Andric 
18fe6060f1SDimitry Andric #include "ARM.h"
19fe6060f1SDimitry Andric #include "ARMSubtarget.h"
20fe6060f1SDimitry Andric #include "MCTargetDesc/ARMBaseInfo.h"
21fe6060f1SDimitry Andric #include "MVETailPredUtils.h"
22fe6060f1SDimitry Andric #include "Thumb2InstrInfo.h"
23fe6060f1SDimitry Andric #include "llvm/ADT/SmallVector.h"
24fe6060f1SDimitry Andric #include "llvm/CodeGen/MachineBasicBlock.h"
25fe6060f1SDimitry Andric #include "llvm/CodeGen/MachineDominators.h"
26fe6060f1SDimitry Andric #include "llvm/CodeGen/MachineFunction.h"
27fe6060f1SDimitry Andric #include "llvm/CodeGen/MachineFunctionPass.h"
28fe6060f1SDimitry Andric #include "llvm/CodeGen/MachineInstr.h"
29fe6060f1SDimitry Andric #include "llvm/CodeGen/MachineLoopInfo.h"
30fe6060f1SDimitry Andric #include "llvm/InitializePasses.h"
31fe6060f1SDimitry Andric #include "llvm/Support/Debug.h"
32fe6060f1SDimitry Andric #include <cassert>
33fe6060f1SDimitry Andric 
34fe6060f1SDimitry Andric using namespace llvm;
35fe6060f1SDimitry Andric 
36fe6060f1SDimitry Andric #define DEBUG_TYPE "arm-mve-vpt-opts"
37fe6060f1SDimitry Andric 
38fe6060f1SDimitry Andric static cl::opt<bool>
39fe6060f1SDimitry Andric MergeEndDec("arm-enable-merge-loopenddec", cl::Hidden,
40fe6060f1SDimitry Andric     cl::desc("Enable merging Loop End and Dec instructions."),
41fe6060f1SDimitry Andric     cl::init(true));
42fe6060f1SDimitry Andric 
43349cc55cSDimitry Andric static cl::opt<bool>
44349cc55cSDimitry Andric SetLRPredicate("arm-set-lr-predicate", cl::Hidden,
45349cc55cSDimitry Andric     cl::desc("Enable setting lr as a predicate in tail predication regions."),
46349cc55cSDimitry Andric     cl::init(true));
47349cc55cSDimitry Andric 
48fe6060f1SDimitry Andric namespace {
49fe6060f1SDimitry Andric class MVETPAndVPTOptimisations : public MachineFunctionPass {
50fe6060f1SDimitry Andric public:
51fe6060f1SDimitry Andric   static char ID;
52fe6060f1SDimitry Andric   const Thumb2InstrInfo *TII;
53fe6060f1SDimitry Andric   MachineRegisterInfo *MRI;
54fe6060f1SDimitry Andric 
55fe6060f1SDimitry Andric   MVETPAndVPTOptimisations() : MachineFunctionPass(ID) {}
56fe6060f1SDimitry Andric 
57fe6060f1SDimitry Andric   bool runOnMachineFunction(MachineFunction &Fn) override;
58fe6060f1SDimitry Andric 
59fe6060f1SDimitry Andric   void getAnalysisUsage(AnalysisUsage &AU) const override {
60*0fca6ea1SDimitry Andric     AU.addRequired<MachineLoopInfoWrapperPass>();
61*0fca6ea1SDimitry Andric     AU.addPreserved<MachineLoopInfoWrapperPass>();
62*0fca6ea1SDimitry Andric     AU.addRequired<MachineDominatorTreeWrapperPass>();
63*0fca6ea1SDimitry Andric     AU.addPreserved<MachineDominatorTreeWrapperPass>();
64fe6060f1SDimitry Andric     MachineFunctionPass::getAnalysisUsage(AU);
65fe6060f1SDimitry Andric   }
66fe6060f1SDimitry Andric 
67fe6060f1SDimitry Andric   StringRef getPassName() const override {
68fe6060f1SDimitry Andric     return "ARM MVE TailPred and VPT Optimisation Pass";
69fe6060f1SDimitry Andric   }
70fe6060f1SDimitry Andric 
71fe6060f1SDimitry Andric private:
72fe6060f1SDimitry Andric   bool LowerWhileLoopStart(MachineLoop *ML);
73fe6060f1SDimitry Andric   bool MergeLoopEnd(MachineLoop *ML);
74fe6060f1SDimitry Andric   bool ConvertTailPredLoop(MachineLoop *ML, MachineDominatorTree *DT);
75fe6060f1SDimitry Andric   MachineInstr &ReplaceRegisterUseWithVPNOT(MachineBasicBlock &MBB,
76fe6060f1SDimitry Andric                                             MachineInstr &Instr,
77fe6060f1SDimitry Andric                                             MachineOperand &User,
78fe6060f1SDimitry Andric                                             Register Target);
79fe6060f1SDimitry Andric   bool ReduceOldVCCRValueUses(MachineBasicBlock &MBB);
80fe6060f1SDimitry Andric   bool ReplaceVCMPsByVPNOTs(MachineBasicBlock &MBB);
81fe6060f1SDimitry Andric   bool ReplaceConstByVPNOTs(MachineBasicBlock &MBB, MachineDominatorTree *DT);
82fe6060f1SDimitry Andric   bool ConvertVPSEL(MachineBasicBlock &MBB);
83fe6060f1SDimitry Andric   bool HintDoLoopStartReg(MachineBasicBlock &MBB);
84fe6060f1SDimitry Andric   MachineInstr *CheckForLRUseInPredecessors(MachineBasicBlock *PreHeader,
85fe6060f1SDimitry Andric                                             MachineInstr *LoopStart);
86fe6060f1SDimitry Andric };
87fe6060f1SDimitry Andric 
88fe6060f1SDimitry Andric char MVETPAndVPTOptimisations::ID = 0;
89fe6060f1SDimitry Andric 
90fe6060f1SDimitry Andric } // end anonymous namespace
91fe6060f1SDimitry Andric 
92fe6060f1SDimitry Andric INITIALIZE_PASS_BEGIN(MVETPAndVPTOptimisations, DEBUG_TYPE,
93fe6060f1SDimitry Andric                       "ARM MVE TailPred and VPT Optimisations pass", false,
94fe6060f1SDimitry Andric                       false)
95*0fca6ea1SDimitry Andric INITIALIZE_PASS_DEPENDENCY(MachineLoopInfoWrapperPass)
96*0fca6ea1SDimitry Andric INITIALIZE_PASS_DEPENDENCY(MachineDominatorTreeWrapperPass)
97fe6060f1SDimitry Andric INITIALIZE_PASS_END(MVETPAndVPTOptimisations, DEBUG_TYPE,
98fe6060f1SDimitry Andric                     "ARM MVE TailPred and VPT Optimisations pass", false, false)
99fe6060f1SDimitry Andric 
100fe6060f1SDimitry Andric static MachineInstr *LookThroughCOPY(MachineInstr *MI,
101fe6060f1SDimitry Andric                                      MachineRegisterInfo *MRI) {
102fe6060f1SDimitry Andric   while (MI && MI->getOpcode() == TargetOpcode::COPY &&
103fe6060f1SDimitry Andric          MI->getOperand(1).getReg().isVirtual())
104fe6060f1SDimitry Andric     MI = MRI->getVRegDef(MI->getOperand(1).getReg());
105fe6060f1SDimitry Andric   return MI;
106fe6060f1SDimitry Andric }
107fe6060f1SDimitry Andric 
108fe6060f1SDimitry Andric // Given a loop ML, this attempts to find the t2LoopEnd, t2LoopDec and
109fe6060f1SDimitry Andric // corresponding PHI that make up a low overhead loop. Only handles 'do' loops
110fe6060f1SDimitry Andric // at the moment, returning a t2DoLoopStart in LoopStart.
111fe6060f1SDimitry Andric static bool findLoopComponents(MachineLoop *ML, MachineRegisterInfo *MRI,
112fe6060f1SDimitry Andric                                MachineInstr *&LoopStart, MachineInstr *&LoopPhi,
113fe6060f1SDimitry Andric                                MachineInstr *&LoopDec, MachineInstr *&LoopEnd) {
114fe6060f1SDimitry Andric   MachineBasicBlock *Header = ML->getHeader();
115fe6060f1SDimitry Andric   MachineBasicBlock *Latch = ML->getLoopLatch();
116fe6060f1SDimitry Andric   if (!Header || !Latch) {
117fe6060f1SDimitry Andric     LLVM_DEBUG(dbgs() << "  no Loop Latch or Header\n");
118fe6060f1SDimitry Andric     return false;
119fe6060f1SDimitry Andric   }
120fe6060f1SDimitry Andric 
121fe6060f1SDimitry Andric   // Find the loop end from the terminators.
122fe6060f1SDimitry Andric   LoopEnd = nullptr;
123fe6060f1SDimitry Andric   for (auto &T : Latch->terminators()) {
124fe6060f1SDimitry Andric     if (T.getOpcode() == ARM::t2LoopEnd && T.getOperand(1).getMBB() == Header) {
125fe6060f1SDimitry Andric       LoopEnd = &T;
126fe6060f1SDimitry Andric       break;
127fe6060f1SDimitry Andric     }
128fe6060f1SDimitry Andric     if (T.getOpcode() == ARM::t2LoopEndDec &&
129fe6060f1SDimitry Andric         T.getOperand(2).getMBB() == Header) {
130fe6060f1SDimitry Andric       LoopEnd = &T;
131fe6060f1SDimitry Andric       break;
132fe6060f1SDimitry Andric     }
133fe6060f1SDimitry Andric   }
134fe6060f1SDimitry Andric   if (!LoopEnd) {
135fe6060f1SDimitry Andric     LLVM_DEBUG(dbgs() << "  no LoopEnd\n");
136fe6060f1SDimitry Andric     return false;
137fe6060f1SDimitry Andric   }
138fe6060f1SDimitry Andric   LLVM_DEBUG(dbgs() << "  found loop end: " << *LoopEnd);
139fe6060f1SDimitry Andric 
140fe6060f1SDimitry Andric   // Find the dec from the use of the end. There may be copies between
141fe6060f1SDimitry Andric   // instructions. We expect the loop to loop like:
142fe6060f1SDimitry Andric   //   $vs = t2DoLoopStart ...
143fe6060f1SDimitry Andric   // loop:
144fe6060f1SDimitry Andric   //   $vp = phi [ $vs ], [ $vd ]
145fe6060f1SDimitry Andric   //   ...
146fe6060f1SDimitry Andric   //   $vd = t2LoopDec $vp
147fe6060f1SDimitry Andric   //   ...
148fe6060f1SDimitry Andric   //   t2LoopEnd $vd, loop
149fe6060f1SDimitry Andric   if (LoopEnd->getOpcode() == ARM::t2LoopEndDec)
150fe6060f1SDimitry Andric     LoopDec = LoopEnd;
151fe6060f1SDimitry Andric   else {
152fe6060f1SDimitry Andric     LoopDec =
153fe6060f1SDimitry Andric         LookThroughCOPY(MRI->getVRegDef(LoopEnd->getOperand(0).getReg()), MRI);
154fe6060f1SDimitry Andric     if (!LoopDec || LoopDec->getOpcode() != ARM::t2LoopDec) {
155fe6060f1SDimitry Andric       LLVM_DEBUG(dbgs() << "  didn't find LoopDec where we expected!\n");
156fe6060f1SDimitry Andric       return false;
157fe6060f1SDimitry Andric     }
158fe6060f1SDimitry Andric   }
159fe6060f1SDimitry Andric   LLVM_DEBUG(dbgs() << "  found loop dec: " << *LoopDec);
160fe6060f1SDimitry Andric 
161fe6060f1SDimitry Andric   LoopPhi =
162fe6060f1SDimitry Andric       LookThroughCOPY(MRI->getVRegDef(LoopDec->getOperand(1).getReg()), MRI);
163fe6060f1SDimitry Andric   if (!LoopPhi || LoopPhi->getOpcode() != TargetOpcode::PHI ||
164fe6060f1SDimitry Andric       LoopPhi->getNumOperands() != 5 ||
165fe6060f1SDimitry Andric       (LoopPhi->getOperand(2).getMBB() != Latch &&
166fe6060f1SDimitry Andric        LoopPhi->getOperand(4).getMBB() != Latch)) {
167fe6060f1SDimitry Andric     LLVM_DEBUG(dbgs() << "  didn't find PHI where we expected!\n");
168fe6060f1SDimitry Andric     return false;
169fe6060f1SDimitry Andric   }
170fe6060f1SDimitry Andric   LLVM_DEBUG(dbgs() << "  found loop phi: " << *LoopPhi);
171fe6060f1SDimitry Andric 
172fe6060f1SDimitry Andric   Register StartReg = LoopPhi->getOperand(2).getMBB() == Latch
173fe6060f1SDimitry Andric                           ? LoopPhi->getOperand(3).getReg()
174fe6060f1SDimitry Andric                           : LoopPhi->getOperand(1).getReg();
175fe6060f1SDimitry Andric   LoopStart = LookThroughCOPY(MRI->getVRegDef(StartReg), MRI);
176fe6060f1SDimitry Andric   if (!LoopStart || (LoopStart->getOpcode() != ARM::t2DoLoopStart &&
177fe6060f1SDimitry Andric                      LoopStart->getOpcode() != ARM::t2WhileLoopSetup &&
178fe6060f1SDimitry Andric                      LoopStart->getOpcode() != ARM::t2WhileLoopStartLR)) {
179fe6060f1SDimitry Andric     LLVM_DEBUG(dbgs() << "  didn't find Start where we expected!\n");
180fe6060f1SDimitry Andric     return false;
181fe6060f1SDimitry Andric   }
182fe6060f1SDimitry Andric   LLVM_DEBUG(dbgs() << "  found loop start: " << *LoopStart);
183fe6060f1SDimitry Andric 
184fe6060f1SDimitry Andric   return true;
185fe6060f1SDimitry Andric }
186fe6060f1SDimitry Andric 
187fe6060f1SDimitry Andric static void RevertWhileLoopSetup(MachineInstr *MI, const TargetInstrInfo *TII) {
188fe6060f1SDimitry Andric   MachineBasicBlock *MBB = MI->getParent();
189fe6060f1SDimitry Andric   assert(MI->getOpcode() == ARM::t2WhileLoopSetup &&
190fe6060f1SDimitry Andric          "Only expected a t2WhileLoopSetup in RevertWhileLoopStart!");
191fe6060f1SDimitry Andric 
192fe6060f1SDimitry Andric   // Subs
193fe6060f1SDimitry Andric   MachineInstrBuilder MIB =
194fe6060f1SDimitry Andric       BuildMI(*MBB, MI, MI->getDebugLoc(), TII->get(ARM::t2SUBri));
195fe6060f1SDimitry Andric   MIB.add(MI->getOperand(0));
196fe6060f1SDimitry Andric   MIB.add(MI->getOperand(1));
197fe6060f1SDimitry Andric   MIB.addImm(0);
198fe6060f1SDimitry Andric   MIB.addImm(ARMCC::AL);
199fe6060f1SDimitry Andric   MIB.addReg(ARM::NoRegister);
200fe6060f1SDimitry Andric   MIB.addReg(ARM::CPSR, RegState::Define);
201fe6060f1SDimitry Andric 
202fe6060f1SDimitry Andric   // Attempt to find a t2WhileLoopStart and revert to a t2Bcc.
203fe6060f1SDimitry Andric   for (MachineInstr &I : MBB->terminators()) {
204fe6060f1SDimitry Andric     if (I.getOpcode() == ARM::t2WhileLoopStart) {
205fe6060f1SDimitry Andric       MachineInstrBuilder MIB =
206fe6060f1SDimitry Andric           BuildMI(*MBB, &I, I.getDebugLoc(), TII->get(ARM::t2Bcc));
207fe6060f1SDimitry Andric       MIB.add(MI->getOperand(1)); // branch target
208fe6060f1SDimitry Andric       MIB.addImm(ARMCC::EQ);
209fe6060f1SDimitry Andric       MIB.addReg(ARM::CPSR);
210fe6060f1SDimitry Andric       I.eraseFromParent();
211fe6060f1SDimitry Andric       break;
212fe6060f1SDimitry Andric     }
213fe6060f1SDimitry Andric   }
214fe6060f1SDimitry Andric 
215fe6060f1SDimitry Andric   MI->eraseFromParent();
216fe6060f1SDimitry Andric }
217fe6060f1SDimitry Andric 
218fe6060f1SDimitry Andric // The Hardware Loop insertion and ISel Lowering produce the pseudos for the
219fe6060f1SDimitry Andric // start of a while loop:
220fe6060f1SDimitry Andric //   %a:gprlr = t2WhileLoopSetup %Cnt
221fe6060f1SDimitry Andric //   t2WhileLoopStart %a, %BB
222fe6060f1SDimitry Andric // We want to convert those to a single instruction which, like t2LoopEndDec and
223fe6060f1SDimitry Andric // t2DoLoopStartTP is both a terminator and produces a value:
224fe6060f1SDimitry Andric //   %a:grplr: t2WhileLoopStartLR %Cnt, %BB
225fe6060f1SDimitry Andric //
226fe6060f1SDimitry Andric // Otherwise if we can't, we revert the loop. t2WhileLoopSetup and
227fe6060f1SDimitry Andric // t2WhileLoopStart are not valid past regalloc.
228fe6060f1SDimitry Andric bool MVETPAndVPTOptimisations::LowerWhileLoopStart(MachineLoop *ML) {
229fe6060f1SDimitry Andric   LLVM_DEBUG(dbgs() << "LowerWhileLoopStart on loop "
230fe6060f1SDimitry Andric                     << ML->getHeader()->getName() << "\n");
231fe6060f1SDimitry Andric 
232fe6060f1SDimitry Andric   MachineInstr *LoopEnd, *LoopPhi, *LoopStart, *LoopDec;
233fe6060f1SDimitry Andric   if (!findLoopComponents(ML, MRI, LoopStart, LoopPhi, LoopDec, LoopEnd))
234fe6060f1SDimitry Andric     return false;
235fe6060f1SDimitry Andric 
236fe6060f1SDimitry Andric   if (LoopStart->getOpcode() != ARM::t2WhileLoopSetup)
237fe6060f1SDimitry Andric     return false;
238fe6060f1SDimitry Andric 
239fe6060f1SDimitry Andric   Register LR = LoopStart->getOperand(0).getReg();
240fe6060f1SDimitry Andric   auto WLSIt = find_if(MRI->use_nodbg_instructions(LR), [](auto &MI) {
241fe6060f1SDimitry Andric     return MI.getOpcode() == ARM::t2WhileLoopStart;
242fe6060f1SDimitry Andric   });
243fe6060f1SDimitry Andric   if (!MergeEndDec || WLSIt == MRI->use_instr_nodbg_end()) {
244fe6060f1SDimitry Andric     RevertWhileLoopSetup(LoopStart, TII);
245fe6060f1SDimitry Andric     RevertLoopDec(LoopStart, TII);
246fe6060f1SDimitry Andric     RevertLoopEnd(LoopStart, TII);
247fe6060f1SDimitry Andric     return true;
248fe6060f1SDimitry Andric   }
249fe6060f1SDimitry Andric 
250fe6060f1SDimitry Andric   MachineInstrBuilder MI =
251fe6060f1SDimitry Andric       BuildMI(*WLSIt->getParent(), *WLSIt, WLSIt->getDebugLoc(),
252fe6060f1SDimitry Andric               TII->get(ARM::t2WhileLoopStartLR), LR)
253fe6060f1SDimitry Andric           .add(LoopStart->getOperand(1))
254fe6060f1SDimitry Andric           .add(WLSIt->getOperand(1));
255fe6060f1SDimitry Andric   (void)MI;
256fe6060f1SDimitry Andric   LLVM_DEBUG(dbgs() << "Lowered WhileLoopStart into: " << *MI.getInstr());
257fe6060f1SDimitry Andric 
258fe6060f1SDimitry Andric   WLSIt->eraseFromParent();
259fe6060f1SDimitry Andric   LoopStart->eraseFromParent();
260fe6060f1SDimitry Andric   return true;
261fe6060f1SDimitry Andric }
262fe6060f1SDimitry Andric 
263fe6060f1SDimitry Andric // Return true if this instruction is invalid in a low overhead loop, usually
264fe6060f1SDimitry Andric // because it clobbers LR.
265fe6060f1SDimitry Andric static bool IsInvalidTPInstruction(MachineInstr &MI) {
266fe6060f1SDimitry Andric   return MI.isCall() || isLoopStart(MI);
267fe6060f1SDimitry Andric }
268fe6060f1SDimitry Andric 
269fe6060f1SDimitry Andric // Starting from PreHeader, search for invalid instructions back until the
270fe6060f1SDimitry Andric // LoopStart block is reached. If invalid instructions are found, the loop start
271fe6060f1SDimitry Andric // is reverted from a WhileLoopStart to a DoLoopStart on the same loop. Will
272fe6060f1SDimitry Andric // return the new DLS LoopStart if updated.
273fe6060f1SDimitry Andric MachineInstr *MVETPAndVPTOptimisations::CheckForLRUseInPredecessors(
274fe6060f1SDimitry Andric     MachineBasicBlock *PreHeader, MachineInstr *LoopStart) {
275fe6060f1SDimitry Andric   SmallVector<MachineBasicBlock *> Worklist;
276fe6060f1SDimitry Andric   SmallPtrSet<MachineBasicBlock *, 4> Visited;
277fe6060f1SDimitry Andric   Worklist.push_back(PreHeader);
278fe6060f1SDimitry Andric   Visited.insert(LoopStart->getParent());
279fe6060f1SDimitry Andric 
280fe6060f1SDimitry Andric   while (!Worklist.empty()) {
281fe6060f1SDimitry Andric     MachineBasicBlock *MBB = Worklist.pop_back_val();
282fe6060f1SDimitry Andric     if (Visited.count(MBB))
283fe6060f1SDimitry Andric       continue;
284fe6060f1SDimitry Andric 
285fe6060f1SDimitry Andric     for (MachineInstr &MI : *MBB) {
286fe6060f1SDimitry Andric       if (!IsInvalidTPInstruction(MI))
287fe6060f1SDimitry Andric         continue;
288fe6060f1SDimitry Andric 
289fe6060f1SDimitry Andric       LLVM_DEBUG(dbgs() << "Found LR use in predecessors, reverting: " << MI);
290fe6060f1SDimitry Andric 
291fe6060f1SDimitry Andric       // Create a t2DoLoopStart at the end of the preheader.
292fe6060f1SDimitry Andric       MachineInstrBuilder MIB =
293fe6060f1SDimitry Andric           BuildMI(*PreHeader, PreHeader->getFirstTerminator(),
294fe6060f1SDimitry Andric                   LoopStart->getDebugLoc(), TII->get(ARM::t2DoLoopStart));
295fe6060f1SDimitry Andric       MIB.add(LoopStart->getOperand(0));
296fe6060f1SDimitry Andric       MIB.add(LoopStart->getOperand(1));
297fe6060f1SDimitry Andric 
298fe6060f1SDimitry Andric       // Make sure to remove the kill flags, to prevent them from being invalid.
299fe6060f1SDimitry Andric       LoopStart->getOperand(1).setIsKill(false);
300fe6060f1SDimitry Andric 
301fe6060f1SDimitry Andric       // Revert the t2WhileLoopStartLR to a CMP and Br.
302fe6060f1SDimitry Andric       RevertWhileLoopStartLR(LoopStart, TII, ARM::t2Bcc, true);
303fe6060f1SDimitry Andric       return MIB;
304fe6060f1SDimitry Andric     }
305fe6060f1SDimitry Andric 
306fe6060f1SDimitry Andric     Visited.insert(MBB);
307fe6060f1SDimitry Andric     for (auto *Pred : MBB->predecessors())
308fe6060f1SDimitry Andric       Worklist.push_back(Pred);
309fe6060f1SDimitry Andric   }
310fe6060f1SDimitry Andric   return LoopStart;
311fe6060f1SDimitry Andric }
312fe6060f1SDimitry Andric 
313fe6060f1SDimitry Andric // This function converts loops with t2LoopEnd and t2LoopEnd instructions into
314fe6060f1SDimitry Andric // a single t2LoopEndDec instruction. To do that it needs to make sure that LR
315fe6060f1SDimitry Andric // will be valid to be used for the low overhead loop, which means nothing else
316fe6060f1SDimitry Andric // is using LR (especially calls) and there are no superfluous copies in the
317fe6060f1SDimitry Andric // loop. The t2LoopEndDec is a branching terminator that produces a value (the
318fe6060f1SDimitry Andric // decrement) around the loop edge, which means we need to be careful that they
319fe6060f1SDimitry Andric // will be valid to allocate without any spilling.
320fe6060f1SDimitry Andric bool MVETPAndVPTOptimisations::MergeLoopEnd(MachineLoop *ML) {
321fe6060f1SDimitry Andric   if (!MergeEndDec)
322fe6060f1SDimitry Andric     return false;
323fe6060f1SDimitry Andric 
324fe6060f1SDimitry Andric   LLVM_DEBUG(dbgs() << "MergeLoopEnd on loop " << ML->getHeader()->getName()
325fe6060f1SDimitry Andric                     << "\n");
326fe6060f1SDimitry Andric 
327fe6060f1SDimitry Andric   MachineInstr *LoopEnd, *LoopPhi, *LoopStart, *LoopDec;
328fe6060f1SDimitry Andric   if (!findLoopComponents(ML, MRI, LoopStart, LoopPhi, LoopDec, LoopEnd))
329fe6060f1SDimitry Andric     return false;
330fe6060f1SDimitry Andric 
331fe6060f1SDimitry Andric   // Check if there is an illegal instruction (a call) in the low overhead loop
332fe6060f1SDimitry Andric   // and if so revert it now before we get any further. While loops also need to
333fe6060f1SDimitry Andric   // check the preheaders, but can be reverted to a DLS loop if needed.
334fe6060f1SDimitry Andric   auto *PreHeader = ML->getLoopPreheader();
335fe6060f1SDimitry Andric   if (LoopStart->getOpcode() == ARM::t2WhileLoopStartLR && PreHeader)
336fe6060f1SDimitry Andric     LoopStart = CheckForLRUseInPredecessors(PreHeader, LoopStart);
337fe6060f1SDimitry Andric 
338fe6060f1SDimitry Andric   for (MachineBasicBlock *MBB : ML->blocks()) {
339fe6060f1SDimitry Andric     for (MachineInstr &MI : *MBB) {
340fe6060f1SDimitry Andric       if (IsInvalidTPInstruction(MI)) {
341fe6060f1SDimitry Andric         LLVM_DEBUG(dbgs() << "Found LR use in loop, reverting: " << MI);
342fe6060f1SDimitry Andric         if (LoopStart->getOpcode() == ARM::t2DoLoopStart)
343fe6060f1SDimitry Andric           RevertDoLoopStart(LoopStart, TII);
344fe6060f1SDimitry Andric         else
345fe6060f1SDimitry Andric           RevertWhileLoopStartLR(LoopStart, TII);
346fe6060f1SDimitry Andric         RevertLoopDec(LoopDec, TII);
347fe6060f1SDimitry Andric         RevertLoopEnd(LoopEnd, TII);
348fe6060f1SDimitry Andric         return true;
349fe6060f1SDimitry Andric       }
350fe6060f1SDimitry Andric     }
351fe6060f1SDimitry Andric   }
352fe6060f1SDimitry Andric 
353fe6060f1SDimitry Andric   // Remove any copies from the loop, to ensure the phi that remains is both
354fe6060f1SDimitry Andric   // simpler and contains no extra uses. Because t2LoopEndDec is a terminator
355fe6060f1SDimitry Andric   // that cannot spill, we need to be careful what remains in the loop.
356fe6060f1SDimitry Andric   Register PhiReg = LoopPhi->getOperand(0).getReg();
357fe6060f1SDimitry Andric   Register DecReg = LoopDec->getOperand(0).getReg();
358fe6060f1SDimitry Andric   Register StartReg = LoopStart->getOperand(0).getReg();
359fe6060f1SDimitry Andric   // Ensure the uses are expected, and collect any copies we want to remove.
360fe6060f1SDimitry Andric   SmallVector<MachineInstr *, 4> Copies;
361fe6060f1SDimitry Andric   auto CheckUsers = [&Copies](Register BaseReg,
362fe6060f1SDimitry Andric                               ArrayRef<MachineInstr *> ExpectedUsers,
363fe6060f1SDimitry Andric                               MachineRegisterInfo *MRI) {
364fe6060f1SDimitry Andric     SmallVector<Register, 4> Worklist;
365fe6060f1SDimitry Andric     Worklist.push_back(BaseReg);
366fe6060f1SDimitry Andric     while (!Worklist.empty()) {
367fe6060f1SDimitry Andric       Register Reg = Worklist.pop_back_val();
368fe6060f1SDimitry Andric       for (MachineInstr &MI : MRI->use_nodbg_instructions(Reg)) {
3690eae32dcSDimitry Andric         if (llvm::is_contained(ExpectedUsers, &MI))
370fe6060f1SDimitry Andric           continue;
371fe6060f1SDimitry Andric         if (MI.getOpcode() != TargetOpcode::COPY ||
372fe6060f1SDimitry Andric             !MI.getOperand(0).getReg().isVirtual()) {
373fe6060f1SDimitry Andric           LLVM_DEBUG(dbgs() << "Extra users of register found: " << MI);
374fe6060f1SDimitry Andric           return false;
375fe6060f1SDimitry Andric         }
376fe6060f1SDimitry Andric         Worklist.push_back(MI.getOperand(0).getReg());
377fe6060f1SDimitry Andric         Copies.push_back(&MI);
378fe6060f1SDimitry Andric       }
379fe6060f1SDimitry Andric     }
380fe6060f1SDimitry Andric     return true;
381fe6060f1SDimitry Andric   };
382fe6060f1SDimitry Andric   if (!CheckUsers(PhiReg, {LoopDec}, MRI) ||
383fe6060f1SDimitry Andric       !CheckUsers(DecReg, {LoopPhi, LoopEnd}, MRI) ||
384fe6060f1SDimitry Andric       !CheckUsers(StartReg, {LoopPhi}, MRI)) {
385fe6060f1SDimitry Andric     // Don't leave a t2WhileLoopStartLR without the LoopDecEnd.
386fe6060f1SDimitry Andric     if (LoopStart->getOpcode() == ARM::t2WhileLoopStartLR) {
387fe6060f1SDimitry Andric       RevertWhileLoopStartLR(LoopStart, TII);
388fe6060f1SDimitry Andric       RevertLoopDec(LoopDec, TII);
389fe6060f1SDimitry Andric       RevertLoopEnd(LoopEnd, TII);
390fe6060f1SDimitry Andric       return true;
391fe6060f1SDimitry Andric     }
392fe6060f1SDimitry Andric     return false;
393fe6060f1SDimitry Andric   }
394fe6060f1SDimitry Andric 
395fe6060f1SDimitry Andric   MRI->constrainRegClass(StartReg, &ARM::GPRlrRegClass);
396fe6060f1SDimitry Andric   MRI->constrainRegClass(PhiReg, &ARM::GPRlrRegClass);
397fe6060f1SDimitry Andric   MRI->constrainRegClass(DecReg, &ARM::GPRlrRegClass);
398fe6060f1SDimitry Andric 
399fe6060f1SDimitry Andric   if (LoopPhi->getOperand(2).getMBB() == ML->getLoopLatch()) {
400fe6060f1SDimitry Andric     LoopPhi->getOperand(3).setReg(StartReg);
401fe6060f1SDimitry Andric     LoopPhi->getOperand(1).setReg(DecReg);
402fe6060f1SDimitry Andric   } else {
403fe6060f1SDimitry Andric     LoopPhi->getOperand(1).setReg(StartReg);
404fe6060f1SDimitry Andric     LoopPhi->getOperand(3).setReg(DecReg);
405fe6060f1SDimitry Andric   }
406fe6060f1SDimitry Andric 
40781ad6265SDimitry Andric   SmallVector<MachineOperand, 4> Cond;              // For analyzeBranch.
40881ad6265SDimitry Andric   MachineBasicBlock *TBB = nullptr, *FBB = nullptr; // For analyzeBranch.
40981ad6265SDimitry Andric   if (!TII->analyzeBranch(*LoopEnd->getParent(), TBB, FBB, Cond) && !FBB) {
41081ad6265SDimitry Andric     // If the LoopEnd falls through, need to insert a t2B to the fall-through
41181ad6265SDimitry Andric     // block so that the non-analyzable t2LoopEndDec doesn't fall through.
41281ad6265SDimitry Andric     MachineFunction::iterator MBBI = ++LoopEnd->getParent()->getIterator();
41381ad6265SDimitry Andric     BuildMI(LoopEnd->getParent(), DebugLoc(), TII->get(ARM::t2B))
41481ad6265SDimitry Andric         .addMBB(&*MBBI)
41581ad6265SDimitry Andric         .add(predOps(ARMCC::AL));
41681ad6265SDimitry Andric   }
41781ad6265SDimitry Andric 
418fe6060f1SDimitry Andric   // Replace the loop dec and loop end as a single instruction.
419fe6060f1SDimitry Andric   MachineInstrBuilder MI =
420fe6060f1SDimitry Andric       BuildMI(*LoopEnd->getParent(), *LoopEnd, LoopEnd->getDebugLoc(),
421fe6060f1SDimitry Andric               TII->get(ARM::t2LoopEndDec), DecReg)
422fe6060f1SDimitry Andric           .addReg(PhiReg)
423fe6060f1SDimitry Andric           .add(LoopEnd->getOperand(1));
424fe6060f1SDimitry Andric   (void)MI;
425fe6060f1SDimitry Andric   LLVM_DEBUG(dbgs() << "Merged LoopDec and End into: " << *MI.getInstr());
426fe6060f1SDimitry Andric 
427fe6060f1SDimitry Andric   LoopDec->eraseFromParent();
428fe6060f1SDimitry Andric   LoopEnd->eraseFromParent();
429fe6060f1SDimitry Andric   for (auto *MI : Copies)
430fe6060f1SDimitry Andric     MI->eraseFromParent();
431fe6060f1SDimitry Andric   return true;
432fe6060f1SDimitry Andric }
433fe6060f1SDimitry Andric 
434fe6060f1SDimitry Andric // Convert t2DoLoopStart to t2DoLoopStartTP if the loop contains VCTP
435fe6060f1SDimitry Andric // instructions. This keeps the VCTP count reg operand on the t2DoLoopStartTP
436fe6060f1SDimitry Andric // instruction, making the backend ARMLowOverheadLoops passes job of finding the
437fe6060f1SDimitry Andric // VCTP operand much simpler.
438fe6060f1SDimitry Andric bool MVETPAndVPTOptimisations::ConvertTailPredLoop(MachineLoop *ML,
439fe6060f1SDimitry Andric                                               MachineDominatorTree *DT) {
440fe6060f1SDimitry Andric   LLVM_DEBUG(dbgs() << "ConvertTailPredLoop on loop "
441fe6060f1SDimitry Andric                     << ML->getHeader()->getName() << "\n");
442fe6060f1SDimitry Andric 
443fe6060f1SDimitry Andric   // Find some loop components including the LoopEnd/Dec/Start, and any VCTP's
444fe6060f1SDimitry Andric   // in the loop.
445fe6060f1SDimitry Andric   MachineInstr *LoopEnd, *LoopPhi, *LoopStart, *LoopDec;
446fe6060f1SDimitry Andric   if (!findLoopComponents(ML, MRI, LoopStart, LoopPhi, LoopDec, LoopEnd))
447fe6060f1SDimitry Andric     return false;
448fe6060f1SDimitry Andric   if (LoopDec != LoopEnd || (LoopStart->getOpcode() != ARM::t2DoLoopStart &&
449fe6060f1SDimitry Andric                              LoopStart->getOpcode() != ARM::t2WhileLoopStartLR))
450fe6060f1SDimitry Andric     return false;
451fe6060f1SDimitry Andric 
452fe6060f1SDimitry Andric   SmallVector<MachineInstr *, 4> VCTPs;
453349cc55cSDimitry Andric   SmallVector<MachineInstr *, 4> MVEInstrs;
454349cc55cSDimitry Andric   for (MachineBasicBlock *BB : ML->blocks()) {
455fe6060f1SDimitry Andric     for (MachineInstr &MI : *BB)
456fe6060f1SDimitry Andric       if (isVCTP(&MI))
457fe6060f1SDimitry Andric         VCTPs.push_back(&MI);
458349cc55cSDimitry Andric       else if (findFirstVPTPredOperandIdx(MI) != -1)
459349cc55cSDimitry Andric         MVEInstrs.push_back(&MI);
460349cc55cSDimitry Andric   }
461fe6060f1SDimitry Andric 
462fe6060f1SDimitry Andric   if (VCTPs.empty()) {
463fe6060f1SDimitry Andric     LLVM_DEBUG(dbgs() << "  no VCTPs\n");
464fe6060f1SDimitry Andric     return false;
465fe6060f1SDimitry Andric   }
466fe6060f1SDimitry Andric 
467fe6060f1SDimitry Andric   // Check all VCTPs are the same.
468fe6060f1SDimitry Andric   MachineInstr *FirstVCTP = *VCTPs.begin();
469fe6060f1SDimitry Andric   for (MachineInstr *VCTP : VCTPs) {
470fe6060f1SDimitry Andric     LLVM_DEBUG(dbgs() << "  with VCTP " << *VCTP);
471fe6060f1SDimitry Andric     if (VCTP->getOpcode() != FirstVCTP->getOpcode() ||
472fe6060f1SDimitry Andric         VCTP->getOperand(0).getReg() != FirstVCTP->getOperand(0).getReg()) {
473fe6060f1SDimitry Andric       LLVM_DEBUG(dbgs() << "  VCTP's are not identical\n");
474fe6060f1SDimitry Andric       return false;
475fe6060f1SDimitry Andric     }
476fe6060f1SDimitry Andric   }
477fe6060f1SDimitry Andric 
478fe6060f1SDimitry Andric   // Check for the register being used can be setup before the loop. We expect
479fe6060f1SDimitry Andric   // this to be:
480fe6060f1SDimitry Andric   //   $vx = ...
481fe6060f1SDimitry Andric   // loop:
482fe6060f1SDimitry Andric   //   $vp = PHI [ $vx ], [ $vd ]
483fe6060f1SDimitry Andric   //   ..
484fe6060f1SDimitry Andric   //   $vpr = VCTP $vp
485fe6060f1SDimitry Andric   //   ..
486fe6060f1SDimitry Andric   //   $vd = t2SUBri $vp, #n
487fe6060f1SDimitry Andric   //   ..
488fe6060f1SDimitry Andric   Register CountReg = FirstVCTP->getOperand(1).getReg();
489fe6060f1SDimitry Andric   if (!CountReg.isVirtual()) {
490fe6060f1SDimitry Andric     LLVM_DEBUG(dbgs() << "  cannot determine VCTP PHI\n");
491fe6060f1SDimitry Andric     return false;
492fe6060f1SDimitry Andric   }
493fe6060f1SDimitry Andric   MachineInstr *Phi = LookThroughCOPY(MRI->getVRegDef(CountReg), MRI);
494fe6060f1SDimitry Andric   if (!Phi || Phi->getOpcode() != TargetOpcode::PHI ||
495fe6060f1SDimitry Andric       Phi->getNumOperands() != 5 ||
496fe6060f1SDimitry Andric       (Phi->getOperand(2).getMBB() != ML->getLoopLatch() &&
497fe6060f1SDimitry Andric        Phi->getOperand(4).getMBB() != ML->getLoopLatch())) {
498fe6060f1SDimitry Andric     LLVM_DEBUG(dbgs() << "  cannot determine VCTP Count\n");
499fe6060f1SDimitry Andric     return false;
500fe6060f1SDimitry Andric   }
501fe6060f1SDimitry Andric   CountReg = Phi->getOperand(2).getMBB() == ML->getLoopLatch()
502fe6060f1SDimitry Andric                  ? Phi->getOperand(3).getReg()
503fe6060f1SDimitry Andric                  : Phi->getOperand(1).getReg();
504fe6060f1SDimitry Andric 
505fe6060f1SDimitry Andric   // Replace the t2DoLoopStart with the t2DoLoopStartTP, move it to the end of
506fe6060f1SDimitry Andric   // the preheader and add the new CountReg to it. We attempt to place it late
507fe6060f1SDimitry Andric   // in the preheader, but may need to move that earlier based on uses.
508fe6060f1SDimitry Andric   MachineBasicBlock *MBB = LoopStart->getParent();
509fe6060f1SDimitry Andric   MachineBasicBlock::iterator InsertPt = MBB->getFirstTerminator();
510fe6060f1SDimitry Andric   for (MachineInstr &Use :
511fe6060f1SDimitry Andric        MRI->use_instructions(LoopStart->getOperand(0).getReg()))
512fe6060f1SDimitry Andric     if ((InsertPt != MBB->end() && !DT->dominates(&*InsertPt, &Use)) ||
513fe6060f1SDimitry Andric         !DT->dominates(ML->getHeader(), Use.getParent())) {
514fe6060f1SDimitry Andric       LLVM_DEBUG(dbgs() << "  InsertPt could not be a terminator!\n");
515fe6060f1SDimitry Andric       return false;
516fe6060f1SDimitry Andric     }
517fe6060f1SDimitry Andric 
518fe6060f1SDimitry Andric   unsigned NewOpc = LoopStart->getOpcode() == ARM::t2DoLoopStart
519fe6060f1SDimitry Andric                         ? ARM::t2DoLoopStartTP
520fe6060f1SDimitry Andric                         : ARM::t2WhileLoopStartTP;
521fe6060f1SDimitry Andric   MachineInstrBuilder MI =
522fe6060f1SDimitry Andric       BuildMI(*MBB, InsertPt, LoopStart->getDebugLoc(), TII->get(NewOpc))
523fe6060f1SDimitry Andric           .add(LoopStart->getOperand(0))
524fe6060f1SDimitry Andric           .add(LoopStart->getOperand(1))
525fe6060f1SDimitry Andric           .addReg(CountReg);
526fe6060f1SDimitry Andric   if (NewOpc == ARM::t2WhileLoopStartTP)
527fe6060f1SDimitry Andric     MI.add(LoopStart->getOperand(2));
528fe6060f1SDimitry Andric   LLVM_DEBUG(dbgs() << "Replacing " << *LoopStart << "  with "
529fe6060f1SDimitry Andric                     << *MI.getInstr());
530fe6060f1SDimitry Andric   MRI->constrainRegClass(CountReg, &ARM::rGPRRegClass);
531fe6060f1SDimitry Andric   LoopStart->eraseFromParent();
532fe6060f1SDimitry Andric 
533349cc55cSDimitry Andric   if (SetLRPredicate) {
534349cc55cSDimitry Andric     // Each instruction in the loop needs to be using LR as the predicate from
535349cc55cSDimitry Andric     // the Phi as the predicate.
536349cc55cSDimitry Andric     Register LR = LoopPhi->getOperand(0).getReg();
537349cc55cSDimitry Andric     for (MachineInstr *MI : MVEInstrs) {
538349cc55cSDimitry Andric       int Idx = findFirstVPTPredOperandIdx(*MI);
539349cc55cSDimitry Andric       MI->getOperand(Idx + 2).setReg(LR);
540349cc55cSDimitry Andric     }
541349cc55cSDimitry Andric   }
542349cc55cSDimitry Andric 
543fe6060f1SDimitry Andric   return true;
544fe6060f1SDimitry Andric }
545fe6060f1SDimitry Andric 
546fe6060f1SDimitry Andric // Returns true if Opcode is any VCMP Opcode.
547fe6060f1SDimitry Andric static bool IsVCMP(unsigned Opcode) { return VCMPOpcodeToVPT(Opcode) != 0; }
548fe6060f1SDimitry Andric 
549fe6060f1SDimitry Andric // Returns true if a VCMP with this Opcode can have its operands swapped.
550fe6060f1SDimitry Andric // There is 2 kind of VCMP that can't have their operands swapped: Float VCMPs,
551fe6060f1SDimitry Andric // and VCMPr instructions (since the r is always on the right).
552fe6060f1SDimitry Andric static bool CanHaveSwappedOperands(unsigned Opcode) {
553fe6060f1SDimitry Andric   switch (Opcode) {
554fe6060f1SDimitry Andric   default:
555fe6060f1SDimitry Andric     return true;
556fe6060f1SDimitry Andric   case ARM::MVE_VCMPf32:
557fe6060f1SDimitry Andric   case ARM::MVE_VCMPf16:
558fe6060f1SDimitry Andric   case ARM::MVE_VCMPf32r:
559fe6060f1SDimitry Andric   case ARM::MVE_VCMPf16r:
560fe6060f1SDimitry Andric   case ARM::MVE_VCMPi8r:
561fe6060f1SDimitry Andric   case ARM::MVE_VCMPi16r:
562fe6060f1SDimitry Andric   case ARM::MVE_VCMPi32r:
563fe6060f1SDimitry Andric   case ARM::MVE_VCMPu8r:
564fe6060f1SDimitry Andric   case ARM::MVE_VCMPu16r:
565fe6060f1SDimitry Andric   case ARM::MVE_VCMPu32r:
566fe6060f1SDimitry Andric   case ARM::MVE_VCMPs8r:
567fe6060f1SDimitry Andric   case ARM::MVE_VCMPs16r:
568fe6060f1SDimitry Andric   case ARM::MVE_VCMPs32r:
569fe6060f1SDimitry Andric     return false;
570fe6060f1SDimitry Andric   }
571fe6060f1SDimitry Andric }
572fe6060f1SDimitry Andric 
573fe6060f1SDimitry Andric // Returns the CondCode of a VCMP Instruction.
574fe6060f1SDimitry Andric static ARMCC::CondCodes GetCondCode(MachineInstr &Instr) {
575fe6060f1SDimitry Andric   assert(IsVCMP(Instr.getOpcode()) && "Inst must be a VCMP");
576fe6060f1SDimitry Andric   return ARMCC::CondCodes(Instr.getOperand(3).getImm());
577fe6060f1SDimitry Andric }
578fe6060f1SDimitry Andric 
579fe6060f1SDimitry Andric // Returns true if Cond is equivalent to a VPNOT instruction on the result of
580fe6060f1SDimitry Andric // Prev. Cond and Prev must be VCMPs.
581fe6060f1SDimitry Andric static bool IsVPNOTEquivalent(MachineInstr &Cond, MachineInstr &Prev) {
582fe6060f1SDimitry Andric   assert(IsVCMP(Cond.getOpcode()) && IsVCMP(Prev.getOpcode()));
583fe6060f1SDimitry Andric 
584fe6060f1SDimitry Andric   // Opcodes must match.
585fe6060f1SDimitry Andric   if (Cond.getOpcode() != Prev.getOpcode())
586fe6060f1SDimitry Andric     return false;
587fe6060f1SDimitry Andric 
588fe6060f1SDimitry Andric   MachineOperand &CondOP1 = Cond.getOperand(1), &CondOP2 = Cond.getOperand(2);
589fe6060f1SDimitry Andric   MachineOperand &PrevOP1 = Prev.getOperand(1), &PrevOP2 = Prev.getOperand(2);
590fe6060f1SDimitry Andric 
591fe6060f1SDimitry Andric   // If the VCMP has the opposite condition with the same operands, we can
592fe6060f1SDimitry Andric   // replace it with a VPNOT
593fe6060f1SDimitry Andric   ARMCC::CondCodes ExpectedCode = GetCondCode(Cond);
594fe6060f1SDimitry Andric   ExpectedCode = ARMCC::getOppositeCondition(ExpectedCode);
595fe6060f1SDimitry Andric   if (ExpectedCode == GetCondCode(Prev))
596fe6060f1SDimitry Andric     if (CondOP1.isIdenticalTo(PrevOP1) && CondOP2.isIdenticalTo(PrevOP2))
597fe6060f1SDimitry Andric       return true;
598fe6060f1SDimitry Andric   // Check again with operands swapped if possible
599fe6060f1SDimitry Andric   if (!CanHaveSwappedOperands(Cond.getOpcode()))
600fe6060f1SDimitry Andric     return false;
601fe6060f1SDimitry Andric   ExpectedCode = ARMCC::getSwappedCondition(ExpectedCode);
602fe6060f1SDimitry Andric   return ExpectedCode == GetCondCode(Prev) && CondOP1.isIdenticalTo(PrevOP2) &&
603fe6060f1SDimitry Andric          CondOP2.isIdenticalTo(PrevOP1);
604fe6060f1SDimitry Andric }
605fe6060f1SDimitry Andric 
606fe6060f1SDimitry Andric // Returns true if Instr writes to VCCR.
607fe6060f1SDimitry Andric static bool IsWritingToVCCR(MachineInstr &Instr) {
608fe6060f1SDimitry Andric   if (Instr.getNumOperands() == 0)
609fe6060f1SDimitry Andric     return false;
610fe6060f1SDimitry Andric   MachineOperand &Dst = Instr.getOperand(0);
611fe6060f1SDimitry Andric   if (!Dst.isReg())
612fe6060f1SDimitry Andric     return false;
613fe6060f1SDimitry Andric   Register DstReg = Dst.getReg();
614fe6060f1SDimitry Andric   if (!DstReg.isVirtual())
615fe6060f1SDimitry Andric     return false;
616fe6060f1SDimitry Andric   MachineRegisterInfo &RegInfo = Instr.getMF()->getRegInfo();
617fe6060f1SDimitry Andric   const TargetRegisterClass *RegClass = RegInfo.getRegClassOrNull(DstReg);
618fe6060f1SDimitry Andric   return RegClass && (RegClass->getID() == ARM::VCCRRegClassID);
619fe6060f1SDimitry Andric }
620fe6060f1SDimitry Andric 
621fe6060f1SDimitry Andric // Transforms
622fe6060f1SDimitry Andric //    <Instr that uses %A ('User' Operand)>
623fe6060f1SDimitry Andric // Into
624fe6060f1SDimitry Andric //    %K = VPNOT %Target
625fe6060f1SDimitry Andric //    <Instr that uses %K ('User' Operand)>
626fe6060f1SDimitry Andric // And returns the newly inserted VPNOT.
627fe6060f1SDimitry Andric // This optimization is done in the hopes of preventing spills/reloads of VPR by
628fe6060f1SDimitry Andric // reducing the number of VCCR values with overlapping lifetimes.
629fe6060f1SDimitry Andric MachineInstr &MVETPAndVPTOptimisations::ReplaceRegisterUseWithVPNOT(
630fe6060f1SDimitry Andric     MachineBasicBlock &MBB, MachineInstr &Instr, MachineOperand &User,
631fe6060f1SDimitry Andric     Register Target) {
632fe6060f1SDimitry Andric   Register NewResult = MRI->createVirtualRegister(MRI->getRegClass(Target));
633fe6060f1SDimitry Andric 
634fe6060f1SDimitry Andric   MachineInstrBuilder MIBuilder =
635fe6060f1SDimitry Andric       BuildMI(MBB, &Instr, Instr.getDebugLoc(), TII->get(ARM::MVE_VPNOT))
636fe6060f1SDimitry Andric           .addDef(NewResult)
637fe6060f1SDimitry Andric           .addReg(Target);
638fe6060f1SDimitry Andric   addUnpredicatedMveVpredNOp(MIBuilder);
639fe6060f1SDimitry Andric 
640fe6060f1SDimitry Andric   // Make the user use NewResult instead, and clear its kill flag.
641fe6060f1SDimitry Andric   User.setReg(NewResult);
642fe6060f1SDimitry Andric   User.setIsKill(false);
643fe6060f1SDimitry Andric 
644fe6060f1SDimitry Andric   LLVM_DEBUG(dbgs() << "  Inserting VPNOT (for spill prevention): ";
645fe6060f1SDimitry Andric              MIBuilder.getInstr()->dump());
646fe6060f1SDimitry Andric 
647fe6060f1SDimitry Andric   return *MIBuilder.getInstr();
648fe6060f1SDimitry Andric }
649fe6060f1SDimitry Andric 
650fe6060f1SDimitry Andric // Moves a VPNOT before its first user if an instruction that uses Reg is found
651fe6060f1SDimitry Andric // in-between the VPNOT and its user.
652fe6060f1SDimitry Andric // Returns true if there is at least one user of the VPNOT in the block.
653fe6060f1SDimitry Andric static bool MoveVPNOTBeforeFirstUser(MachineBasicBlock &MBB,
654fe6060f1SDimitry Andric                                      MachineBasicBlock::iterator Iter,
655fe6060f1SDimitry Andric                                      Register Reg) {
656fe6060f1SDimitry Andric   assert(Iter->getOpcode() == ARM::MVE_VPNOT && "Not a VPNOT!");
657fe6060f1SDimitry Andric   assert(getVPTInstrPredicate(*Iter) == ARMVCC::None &&
658fe6060f1SDimitry Andric          "The VPNOT cannot be predicated");
659fe6060f1SDimitry Andric 
660fe6060f1SDimitry Andric   MachineInstr &VPNOT = *Iter;
661fe6060f1SDimitry Andric   Register VPNOTResult = VPNOT.getOperand(0).getReg();
662fe6060f1SDimitry Andric   Register VPNOTOperand = VPNOT.getOperand(1).getReg();
663fe6060f1SDimitry Andric 
664fe6060f1SDimitry Andric   // Whether the VPNOT will need to be moved, and whether we found a user of the
665fe6060f1SDimitry Andric   // VPNOT.
666fe6060f1SDimitry Andric   bool MustMove = false, HasUser = false;
667fe6060f1SDimitry Andric   MachineOperand *VPNOTOperandKiller = nullptr;
668fe6060f1SDimitry Andric   for (; Iter != MBB.end(); ++Iter) {
669fe6060f1SDimitry Andric     if (MachineOperand *MO =
670*0fca6ea1SDimitry Andric             Iter->findRegisterUseOperand(VPNOTOperand, /*TRI=*/nullptr,
671*0fca6ea1SDimitry Andric                                          /*isKill*/ true)) {
672fe6060f1SDimitry Andric       // If we find the operand that kills the VPNOTOperand's result, save it.
673fe6060f1SDimitry Andric       VPNOTOperandKiller = MO;
674fe6060f1SDimitry Andric     }
675fe6060f1SDimitry Andric 
676*0fca6ea1SDimitry Andric     if (Iter->findRegisterUseOperandIdx(Reg, /*TRI=*/nullptr) != -1) {
677fe6060f1SDimitry Andric       MustMove = true;
678fe6060f1SDimitry Andric       continue;
679fe6060f1SDimitry Andric     }
680fe6060f1SDimitry Andric 
681*0fca6ea1SDimitry Andric     if (Iter->findRegisterUseOperandIdx(VPNOTResult, /*TRI=*/nullptr) == -1)
682fe6060f1SDimitry Andric       continue;
683fe6060f1SDimitry Andric 
684fe6060f1SDimitry Andric     HasUser = true;
685fe6060f1SDimitry Andric     if (!MustMove)
686fe6060f1SDimitry Andric       break;
687fe6060f1SDimitry Andric 
688fe6060f1SDimitry Andric     // Move the VPNOT right before Iter
689fe6060f1SDimitry Andric     LLVM_DEBUG(dbgs() << "Moving: "; VPNOT.dump(); dbgs() << "  Before: ";
690fe6060f1SDimitry Andric                Iter->dump());
691fe6060f1SDimitry Andric     MBB.splice(Iter, &MBB, VPNOT.getIterator());
692fe6060f1SDimitry Andric     // If we move the instr, and its operand was killed earlier, remove the kill
693fe6060f1SDimitry Andric     // flag.
694fe6060f1SDimitry Andric     if (VPNOTOperandKiller)
695fe6060f1SDimitry Andric       VPNOTOperandKiller->setIsKill(false);
696fe6060f1SDimitry Andric 
697fe6060f1SDimitry Andric     break;
698fe6060f1SDimitry Andric   }
699fe6060f1SDimitry Andric   return HasUser;
700fe6060f1SDimitry Andric }
701fe6060f1SDimitry Andric 
702fe6060f1SDimitry Andric // This optimisation attempts to reduce the number of overlapping lifetimes of
703fe6060f1SDimitry Andric // VCCR values by replacing uses of old VCCR values with VPNOTs. For example,
704fe6060f1SDimitry Andric // this replaces
705fe6060f1SDimitry Andric //    %A:vccr = (something)
706fe6060f1SDimitry Andric //    %B:vccr = VPNOT %A
707fe6060f1SDimitry Andric //    %Foo = (some op that uses %B)
708fe6060f1SDimitry Andric //    %Bar = (some op that uses %A)
709fe6060f1SDimitry Andric // With
710fe6060f1SDimitry Andric //    %A:vccr = (something)
711fe6060f1SDimitry Andric //    %B:vccr = VPNOT %A
712fe6060f1SDimitry Andric //    %Foo = (some op that uses %B)
713fe6060f1SDimitry Andric //    %TMP2:vccr = VPNOT %B
714fe6060f1SDimitry Andric //    %Bar = (some op that uses %A)
715fe6060f1SDimitry Andric bool MVETPAndVPTOptimisations::ReduceOldVCCRValueUses(MachineBasicBlock &MBB) {
716fe6060f1SDimitry Andric   MachineBasicBlock::iterator Iter = MBB.begin(), End = MBB.end();
717fe6060f1SDimitry Andric   SmallVector<MachineInstr *, 4> DeadInstructions;
718fe6060f1SDimitry Andric   bool Modified = false;
719fe6060f1SDimitry Andric 
720fe6060f1SDimitry Andric   while (Iter != End) {
721fe6060f1SDimitry Andric     Register VCCRValue, OppositeVCCRValue;
722fe6060f1SDimitry Andric     // The first loop looks for 2 unpredicated instructions:
723fe6060f1SDimitry Andric     //    %A:vccr = (instr)     ; A is stored in VCCRValue
724fe6060f1SDimitry Andric     //    %B:vccr = VPNOT %A    ; B is stored in OppositeVCCRValue
725fe6060f1SDimitry Andric     for (; Iter != End; ++Iter) {
726fe6060f1SDimitry Andric       // We're only interested in unpredicated instructions that write to VCCR.
727fe6060f1SDimitry Andric       if (!IsWritingToVCCR(*Iter) ||
728fe6060f1SDimitry Andric           getVPTInstrPredicate(*Iter) != ARMVCC::None)
729fe6060f1SDimitry Andric         continue;
730fe6060f1SDimitry Andric       Register Dst = Iter->getOperand(0).getReg();
731fe6060f1SDimitry Andric 
732fe6060f1SDimitry Andric       // If we already have a VCCRValue, and this is a VPNOT on VCCRValue, we've
733fe6060f1SDimitry Andric       // found what we were looking for.
734fe6060f1SDimitry Andric       if (VCCRValue && Iter->getOpcode() == ARM::MVE_VPNOT &&
735*0fca6ea1SDimitry Andric           Iter->findRegisterUseOperandIdx(VCCRValue, /*TRI=*/nullptr) != -1) {
736fe6060f1SDimitry Andric         // Move the VPNOT closer to its first user if needed, and ignore if it
737fe6060f1SDimitry Andric         // has no users.
738fe6060f1SDimitry Andric         if (!MoveVPNOTBeforeFirstUser(MBB, Iter, VCCRValue))
739fe6060f1SDimitry Andric           continue;
740fe6060f1SDimitry Andric 
741fe6060f1SDimitry Andric         OppositeVCCRValue = Dst;
742fe6060f1SDimitry Andric         ++Iter;
743fe6060f1SDimitry Andric         break;
744fe6060f1SDimitry Andric       }
745fe6060f1SDimitry Andric 
746fe6060f1SDimitry Andric       // Else, just set VCCRValue.
747fe6060f1SDimitry Andric       VCCRValue = Dst;
748fe6060f1SDimitry Andric     }
749fe6060f1SDimitry Andric 
750fe6060f1SDimitry Andric     // If the first inner loop didn't find anything, stop here.
751fe6060f1SDimitry Andric     if (Iter == End)
752fe6060f1SDimitry Andric       break;
753fe6060f1SDimitry Andric 
754fe6060f1SDimitry Andric     assert(VCCRValue && OppositeVCCRValue &&
755fe6060f1SDimitry Andric            "VCCRValue and OppositeVCCRValue shouldn't be empty if the loop "
756fe6060f1SDimitry Andric            "stopped before the end of the block!");
757fe6060f1SDimitry Andric     assert(VCCRValue != OppositeVCCRValue &&
758fe6060f1SDimitry Andric            "VCCRValue should not be equal to OppositeVCCRValue!");
759fe6060f1SDimitry Andric 
760fe6060f1SDimitry Andric     // LastVPNOTResult always contains the same value as OppositeVCCRValue.
761fe6060f1SDimitry Andric     Register LastVPNOTResult = OppositeVCCRValue;
762fe6060f1SDimitry Andric 
763fe6060f1SDimitry Andric     // This second loop tries to optimize the remaining instructions.
764fe6060f1SDimitry Andric     for (; Iter != End; ++Iter) {
765fe6060f1SDimitry Andric       bool IsInteresting = false;
766fe6060f1SDimitry Andric 
767*0fca6ea1SDimitry Andric       if (MachineOperand *MO =
768*0fca6ea1SDimitry Andric               Iter->findRegisterUseOperand(VCCRValue, /*TRI=*/nullptr)) {
769fe6060f1SDimitry Andric         IsInteresting = true;
770fe6060f1SDimitry Andric 
771fe6060f1SDimitry Andric         // - If the instruction is a VPNOT, it can be removed, and we can just
772fe6060f1SDimitry Andric         //   replace its uses with LastVPNOTResult.
773fe6060f1SDimitry Andric         // - Else, insert a new VPNOT on LastVPNOTResult to recompute VCCRValue.
774fe6060f1SDimitry Andric         if (Iter->getOpcode() == ARM::MVE_VPNOT) {
775fe6060f1SDimitry Andric           Register Result = Iter->getOperand(0).getReg();
776fe6060f1SDimitry Andric 
777fe6060f1SDimitry Andric           MRI->replaceRegWith(Result, LastVPNOTResult);
778fe6060f1SDimitry Andric           DeadInstructions.push_back(&*Iter);
779fe6060f1SDimitry Andric           Modified = true;
780fe6060f1SDimitry Andric 
781fe6060f1SDimitry Andric           LLVM_DEBUG(dbgs()
782fe6060f1SDimitry Andric                      << "Replacing all uses of '" << printReg(Result)
783fe6060f1SDimitry Andric                      << "' with '" << printReg(LastVPNOTResult) << "'\n");
784fe6060f1SDimitry Andric         } else {
785fe6060f1SDimitry Andric           MachineInstr &VPNOT =
786fe6060f1SDimitry Andric               ReplaceRegisterUseWithVPNOT(MBB, *Iter, *MO, LastVPNOTResult);
787fe6060f1SDimitry Andric           Modified = true;
788fe6060f1SDimitry Andric 
789fe6060f1SDimitry Andric           LastVPNOTResult = VPNOT.getOperand(0).getReg();
790fe6060f1SDimitry Andric           std::swap(VCCRValue, OppositeVCCRValue);
791fe6060f1SDimitry Andric 
792fe6060f1SDimitry Andric           LLVM_DEBUG(dbgs() << "Replacing use of '" << printReg(VCCRValue)
793fe6060f1SDimitry Andric                             << "' with '" << printReg(LastVPNOTResult)
794fe6060f1SDimitry Andric                             << "' in instr: " << *Iter);
795fe6060f1SDimitry Andric         }
796fe6060f1SDimitry Andric       } else {
797fe6060f1SDimitry Andric         // If the instr uses OppositeVCCRValue, make it use LastVPNOTResult
798fe6060f1SDimitry Andric         // instead as they contain the same value.
799*0fca6ea1SDimitry Andric         if (MachineOperand *MO = Iter->findRegisterUseOperand(
800*0fca6ea1SDimitry Andric                 OppositeVCCRValue, /*TRI=*/nullptr)) {
801fe6060f1SDimitry Andric           IsInteresting = true;
802fe6060f1SDimitry Andric 
803fe6060f1SDimitry Andric           // This is pointless if LastVPNOTResult == OppositeVCCRValue.
804fe6060f1SDimitry Andric           if (LastVPNOTResult != OppositeVCCRValue) {
805fe6060f1SDimitry Andric             LLVM_DEBUG(dbgs() << "Replacing usage of '"
806fe6060f1SDimitry Andric                               << printReg(OppositeVCCRValue) << "' with '"
807fe6060f1SDimitry Andric                               << printReg(LastVPNOTResult) << " for instr: ";
808fe6060f1SDimitry Andric                        Iter->dump());
809fe6060f1SDimitry Andric             MO->setReg(LastVPNOTResult);
810fe6060f1SDimitry Andric             Modified = true;
811fe6060f1SDimitry Andric           }
812fe6060f1SDimitry Andric 
813fe6060f1SDimitry Andric           MO->setIsKill(false);
814fe6060f1SDimitry Andric         }
815fe6060f1SDimitry Andric 
816fe6060f1SDimitry Andric         // If this is an unpredicated VPNOT on
817fe6060f1SDimitry Andric         // LastVPNOTResult/OppositeVCCRValue, we can act like we inserted it.
818fe6060f1SDimitry Andric         if (Iter->getOpcode() == ARM::MVE_VPNOT &&
819fe6060f1SDimitry Andric             getVPTInstrPredicate(*Iter) == ARMVCC::None) {
820fe6060f1SDimitry Andric           Register VPNOTOperand = Iter->getOperand(1).getReg();
821fe6060f1SDimitry Andric           if (VPNOTOperand == LastVPNOTResult ||
822fe6060f1SDimitry Andric               VPNOTOperand == OppositeVCCRValue) {
823fe6060f1SDimitry Andric             IsInteresting = true;
824fe6060f1SDimitry Andric 
825fe6060f1SDimitry Andric             std::swap(VCCRValue, OppositeVCCRValue);
826fe6060f1SDimitry Andric             LastVPNOTResult = Iter->getOperand(0).getReg();
827fe6060f1SDimitry Andric           }
828fe6060f1SDimitry Andric         }
829fe6060f1SDimitry Andric       }
830fe6060f1SDimitry Andric 
831fe6060f1SDimitry Andric       // If this instruction was not interesting, and it writes to VCCR, stop.
832fe6060f1SDimitry Andric       if (!IsInteresting && IsWritingToVCCR(*Iter))
833fe6060f1SDimitry Andric         break;
834fe6060f1SDimitry Andric     }
835fe6060f1SDimitry Andric   }
836fe6060f1SDimitry Andric 
837fe6060f1SDimitry Andric   for (MachineInstr *DeadInstruction : DeadInstructions)
838fe6060f1SDimitry Andric     DeadInstruction->eraseFromParent();
839fe6060f1SDimitry Andric 
840fe6060f1SDimitry Andric   return Modified;
841fe6060f1SDimitry Andric }
842fe6060f1SDimitry Andric 
843fe6060f1SDimitry Andric // This optimisation replaces VCMPs with VPNOTs when they are equivalent.
844fe6060f1SDimitry Andric bool MVETPAndVPTOptimisations::ReplaceVCMPsByVPNOTs(MachineBasicBlock &MBB) {
845fe6060f1SDimitry Andric   SmallVector<MachineInstr *, 4> DeadInstructions;
846fe6060f1SDimitry Andric 
847fe6060f1SDimitry Andric   // The last VCMP that we have seen and that couldn't be replaced.
848fe6060f1SDimitry Andric   // This is reset when an instruction that writes to VCCR/VPR is found, or when
849fe6060f1SDimitry Andric   // a VCMP is replaced with a VPNOT.
850fe6060f1SDimitry Andric   // We'll only replace VCMPs with VPNOTs when this is not null, and when the
851fe6060f1SDimitry Andric   // current VCMP is the opposite of PrevVCMP.
852fe6060f1SDimitry Andric   MachineInstr *PrevVCMP = nullptr;
853fe6060f1SDimitry Andric   // If we find an instruction that kills the result of PrevVCMP, we save the
854fe6060f1SDimitry Andric   // operand here to remove the kill flag in case we need to use PrevVCMP's
855fe6060f1SDimitry Andric   // result.
856fe6060f1SDimitry Andric   MachineOperand *PrevVCMPResultKiller = nullptr;
857fe6060f1SDimitry Andric 
858fe6060f1SDimitry Andric   for (MachineInstr &Instr : MBB.instrs()) {
859fe6060f1SDimitry Andric     if (PrevVCMP) {
860*0fca6ea1SDimitry Andric       if (MachineOperand *MO =
861*0fca6ea1SDimitry Andric               Instr.findRegisterUseOperand(PrevVCMP->getOperand(0).getReg(),
862*0fca6ea1SDimitry Andric                                            /*TRI=*/nullptr, /*isKill*/ true)) {
863fe6060f1SDimitry Andric         // If we come accross the instr that kills PrevVCMP's result, record it
864fe6060f1SDimitry Andric         // so we can remove the kill flag later if we need to.
865fe6060f1SDimitry Andric         PrevVCMPResultKiller = MO;
866fe6060f1SDimitry Andric       }
867fe6060f1SDimitry Andric     }
868fe6060f1SDimitry Andric 
869fe6060f1SDimitry Andric     // Ignore predicated instructions.
870fe6060f1SDimitry Andric     if (getVPTInstrPredicate(Instr) != ARMVCC::None)
871fe6060f1SDimitry Andric       continue;
872fe6060f1SDimitry Andric 
873fe6060f1SDimitry Andric     // Only look at VCMPs
874fe6060f1SDimitry Andric     if (!IsVCMP(Instr.getOpcode())) {
875fe6060f1SDimitry Andric       // If the instruction writes to VCCR, forget the previous VCMP.
876fe6060f1SDimitry Andric       if (IsWritingToVCCR(Instr))
877fe6060f1SDimitry Andric         PrevVCMP = nullptr;
878fe6060f1SDimitry Andric       continue;
879fe6060f1SDimitry Andric     }
880fe6060f1SDimitry Andric 
881fe6060f1SDimitry Andric     if (!PrevVCMP || !IsVPNOTEquivalent(Instr, *PrevVCMP)) {
882fe6060f1SDimitry Andric       PrevVCMP = &Instr;
883fe6060f1SDimitry Andric       continue;
884fe6060f1SDimitry Andric     }
885fe6060f1SDimitry Andric 
886fe6060f1SDimitry Andric     // The register containing the result of the VCMP that we're going to
887fe6060f1SDimitry Andric     // replace.
888fe6060f1SDimitry Andric     Register PrevVCMPResultReg = PrevVCMP->getOperand(0).getReg();
889fe6060f1SDimitry Andric 
890fe6060f1SDimitry Andric     // Build a VPNOT to replace the VCMP, reusing its operands.
891fe6060f1SDimitry Andric     MachineInstrBuilder MIBuilder =
892fe6060f1SDimitry Andric         BuildMI(MBB, &Instr, Instr.getDebugLoc(), TII->get(ARM::MVE_VPNOT))
893fe6060f1SDimitry Andric             .add(Instr.getOperand(0))
894fe6060f1SDimitry Andric             .addReg(PrevVCMPResultReg);
895fe6060f1SDimitry Andric     addUnpredicatedMveVpredNOp(MIBuilder);
896fe6060f1SDimitry Andric     LLVM_DEBUG(dbgs() << "Inserting VPNOT (to replace VCMP): ";
897fe6060f1SDimitry Andric                MIBuilder.getInstr()->dump(); dbgs() << "  Removed VCMP: ";
898fe6060f1SDimitry Andric                Instr.dump());
899fe6060f1SDimitry Andric 
900fe6060f1SDimitry Andric     // If we found an instruction that uses, and kills PrevVCMP's result,
901fe6060f1SDimitry Andric     // remove the kill flag.
902fe6060f1SDimitry Andric     if (PrevVCMPResultKiller)
903fe6060f1SDimitry Andric       PrevVCMPResultKiller->setIsKill(false);
904fe6060f1SDimitry Andric 
905fe6060f1SDimitry Andric     // Finally, mark the old VCMP for removal and reset
906fe6060f1SDimitry Andric     // PrevVCMP/PrevVCMPResultKiller.
907fe6060f1SDimitry Andric     DeadInstructions.push_back(&Instr);
908fe6060f1SDimitry Andric     PrevVCMP = nullptr;
909fe6060f1SDimitry Andric     PrevVCMPResultKiller = nullptr;
910fe6060f1SDimitry Andric   }
911fe6060f1SDimitry Andric 
912fe6060f1SDimitry Andric   for (MachineInstr *DeadInstruction : DeadInstructions)
913fe6060f1SDimitry Andric     DeadInstruction->eraseFromParent();
914fe6060f1SDimitry Andric 
915fe6060f1SDimitry Andric   return !DeadInstructions.empty();
916fe6060f1SDimitry Andric }
917fe6060f1SDimitry Andric 
918fe6060f1SDimitry Andric bool MVETPAndVPTOptimisations::ReplaceConstByVPNOTs(MachineBasicBlock &MBB,
919fe6060f1SDimitry Andric                                                     MachineDominatorTree *DT) {
920fe6060f1SDimitry Andric   // Scan through the block, looking for instructions that use constants moves
921fe6060f1SDimitry Andric   // into VPR that are the negative of one another. These are expected to be
922fe6060f1SDimitry Andric   // COPY's to VCCRRegClass, from a t2MOVi or t2MOVi16. The last seen constant
923fe6060f1SDimitry Andric   // mask is kept it or and VPNOT's of it are added or reused as we scan through
924fe6060f1SDimitry Andric   // the function.
925fe6060f1SDimitry Andric   unsigned LastVPTImm = 0;
926fe6060f1SDimitry Andric   Register LastVPTReg = 0;
927fe6060f1SDimitry Andric   SmallSet<MachineInstr *, 4> DeadInstructions;
928fe6060f1SDimitry Andric 
929fe6060f1SDimitry Andric   for (MachineInstr &Instr : MBB.instrs()) {
930fe6060f1SDimitry Andric     // Look for predicated MVE instructions.
931fe6060f1SDimitry Andric     int PIdx = llvm::findFirstVPTPredOperandIdx(Instr);
932fe6060f1SDimitry Andric     if (PIdx == -1)
933fe6060f1SDimitry Andric       continue;
934fe6060f1SDimitry Andric     Register VPR = Instr.getOperand(PIdx + 1).getReg();
935fe6060f1SDimitry Andric     if (!VPR.isVirtual())
936fe6060f1SDimitry Andric       continue;
937fe6060f1SDimitry Andric 
938fe6060f1SDimitry Andric     // From that we are looking for an instruction like %11:vccr = COPY %9:rgpr.
939fe6060f1SDimitry Andric     MachineInstr *Copy = MRI->getVRegDef(VPR);
940fe6060f1SDimitry Andric     if (!Copy || Copy->getOpcode() != TargetOpcode::COPY ||
941fe6060f1SDimitry Andric         !Copy->getOperand(1).getReg().isVirtual() ||
942fe6060f1SDimitry Andric         MRI->getRegClass(Copy->getOperand(1).getReg()) == &ARM::VCCRRegClass) {
943fe6060f1SDimitry Andric       LastVPTReg = 0;
944fe6060f1SDimitry Andric       continue;
945fe6060f1SDimitry Andric     }
946fe6060f1SDimitry Andric     Register GPR = Copy->getOperand(1).getReg();
947fe6060f1SDimitry Andric 
948fe6060f1SDimitry Andric     // Find the Immediate used by the copy.
949fe6060f1SDimitry Andric     auto getImm = [&](Register GPR) -> unsigned {
950fe6060f1SDimitry Andric       MachineInstr *Def = MRI->getVRegDef(GPR);
951fe6060f1SDimitry Andric       if (Def && (Def->getOpcode() == ARM::t2MOVi ||
952fe6060f1SDimitry Andric                   Def->getOpcode() == ARM::t2MOVi16))
953fe6060f1SDimitry Andric         return Def->getOperand(1).getImm();
954fe6060f1SDimitry Andric       return -1U;
955fe6060f1SDimitry Andric     };
956fe6060f1SDimitry Andric     unsigned Imm = getImm(GPR);
957fe6060f1SDimitry Andric     if (Imm == -1U) {
958fe6060f1SDimitry Andric       LastVPTReg = 0;
959fe6060f1SDimitry Andric       continue;
960fe6060f1SDimitry Andric     }
961fe6060f1SDimitry Andric 
962fe6060f1SDimitry Andric     unsigned NotImm = ~Imm & 0xffff;
963fe6060f1SDimitry Andric     if (LastVPTReg != 0 && LastVPTReg != VPR && LastVPTImm == Imm) {
964*0fca6ea1SDimitry Andric       MRI->clearKillFlags(LastVPTReg);
965fe6060f1SDimitry Andric       Instr.getOperand(PIdx + 1).setReg(LastVPTReg);
966fe6060f1SDimitry Andric       if (MRI->use_empty(VPR)) {
967fe6060f1SDimitry Andric         DeadInstructions.insert(Copy);
968fe6060f1SDimitry Andric         if (MRI->hasOneUse(GPR))
969fe6060f1SDimitry Andric           DeadInstructions.insert(MRI->getVRegDef(GPR));
970fe6060f1SDimitry Andric       }
971fe6060f1SDimitry Andric       LLVM_DEBUG(dbgs() << "Reusing predicate: in  " << Instr);
97206c3fb27SDimitry Andric       VPR = LastVPTReg;
973fe6060f1SDimitry Andric     } else if (LastVPTReg != 0 && LastVPTImm == NotImm) {
974fe6060f1SDimitry Andric       // We have found the not of a previous constant. Create a VPNot of the
975fe6060f1SDimitry Andric       // earlier predicate reg and use it instead of the copy.
976fe6060f1SDimitry Andric       Register NewVPR = MRI->createVirtualRegister(&ARM::VCCRRegClass);
977fe6060f1SDimitry Andric       auto VPNot = BuildMI(MBB, &Instr, Instr.getDebugLoc(),
978fe6060f1SDimitry Andric                            TII->get(ARM::MVE_VPNOT), NewVPR)
979fe6060f1SDimitry Andric                        .addReg(LastVPTReg);
980fe6060f1SDimitry Andric       addUnpredicatedMveVpredNOp(VPNot);
981fe6060f1SDimitry Andric 
982fe6060f1SDimitry Andric       // Use the new register and check if the def is now dead.
983fe6060f1SDimitry Andric       Instr.getOperand(PIdx + 1).setReg(NewVPR);
984fe6060f1SDimitry Andric       if (MRI->use_empty(VPR)) {
985fe6060f1SDimitry Andric         DeadInstructions.insert(Copy);
986fe6060f1SDimitry Andric         if (MRI->hasOneUse(GPR))
987fe6060f1SDimitry Andric           DeadInstructions.insert(MRI->getVRegDef(GPR));
988fe6060f1SDimitry Andric       }
989fe6060f1SDimitry Andric       LLVM_DEBUG(dbgs() << "Adding VPNot: " << *VPNot << "  to replace use at "
990fe6060f1SDimitry Andric                         << Instr);
991fe6060f1SDimitry Andric       VPR = NewVPR;
992fe6060f1SDimitry Andric     }
993fe6060f1SDimitry Andric 
994fe6060f1SDimitry Andric     LastVPTImm = Imm;
995fe6060f1SDimitry Andric     LastVPTReg = VPR;
996fe6060f1SDimitry Andric   }
997fe6060f1SDimitry Andric 
998fe6060f1SDimitry Andric   for (MachineInstr *DI : DeadInstructions)
999fe6060f1SDimitry Andric     DI->eraseFromParent();
1000fe6060f1SDimitry Andric 
1001fe6060f1SDimitry Andric   return !DeadInstructions.empty();
1002fe6060f1SDimitry Andric }
1003fe6060f1SDimitry Andric 
1004fe6060f1SDimitry Andric // Replace VPSEL with a predicated VMOV in blocks with a VCTP. This is a
1005fe6060f1SDimitry Andric // somewhat blunt approximation to allow tail predicated with vpsel
1006fe6060f1SDimitry Andric // instructions. We turn a vselect into a VPSEL in ISEL, but they have slightly
1007fe6060f1SDimitry Andric // different semantics under tail predication. Until that is modelled we just
1008fe6060f1SDimitry Andric // convert to a VMOVT (via a predicated VORR) instead.
1009fe6060f1SDimitry Andric bool MVETPAndVPTOptimisations::ConvertVPSEL(MachineBasicBlock &MBB) {
1010fe6060f1SDimitry Andric   bool HasVCTP = false;
1011fe6060f1SDimitry Andric   SmallVector<MachineInstr *, 4> DeadInstructions;
1012fe6060f1SDimitry Andric 
1013fe6060f1SDimitry Andric   for (MachineInstr &MI : MBB.instrs()) {
1014fe6060f1SDimitry Andric     if (isVCTP(&MI)) {
1015fe6060f1SDimitry Andric       HasVCTP = true;
1016fe6060f1SDimitry Andric       continue;
1017fe6060f1SDimitry Andric     }
1018fe6060f1SDimitry Andric 
1019fe6060f1SDimitry Andric     if (!HasVCTP || MI.getOpcode() != ARM::MVE_VPSEL)
1020fe6060f1SDimitry Andric       continue;
1021fe6060f1SDimitry Andric 
1022fe6060f1SDimitry Andric     MachineInstrBuilder MIBuilder =
1023fe6060f1SDimitry Andric         BuildMI(MBB, &MI, MI.getDebugLoc(), TII->get(ARM::MVE_VORR))
1024fe6060f1SDimitry Andric             .add(MI.getOperand(0))
1025fe6060f1SDimitry Andric             .add(MI.getOperand(1))
1026fe6060f1SDimitry Andric             .add(MI.getOperand(1))
1027fe6060f1SDimitry Andric             .addImm(ARMVCC::Then)
1028fe6060f1SDimitry Andric             .add(MI.getOperand(4))
1029349cc55cSDimitry Andric             .add(MI.getOperand(5))
1030fe6060f1SDimitry Andric             .add(MI.getOperand(2));
1031fe6060f1SDimitry Andric     // Silence unused variable warning in release builds.
1032fe6060f1SDimitry Andric     (void)MIBuilder;
1033fe6060f1SDimitry Andric     LLVM_DEBUG(dbgs() << "Replacing VPSEL: "; MI.dump();
1034fe6060f1SDimitry Andric                dbgs() << "     with VMOVT: "; MIBuilder.getInstr()->dump());
1035fe6060f1SDimitry Andric     DeadInstructions.push_back(&MI);
1036fe6060f1SDimitry Andric   }
1037fe6060f1SDimitry Andric 
1038fe6060f1SDimitry Andric   for (MachineInstr *DeadInstruction : DeadInstructions)
1039fe6060f1SDimitry Andric     DeadInstruction->eraseFromParent();
1040fe6060f1SDimitry Andric 
1041fe6060f1SDimitry Andric   return !DeadInstructions.empty();
1042fe6060f1SDimitry Andric }
1043fe6060f1SDimitry Andric 
1044fe6060f1SDimitry Andric // Add a registry allocation hint for t2DoLoopStart to hint it towards LR, as
1045fe6060f1SDimitry Andric // the instruction may be removable as a noop.
1046fe6060f1SDimitry Andric bool MVETPAndVPTOptimisations::HintDoLoopStartReg(MachineBasicBlock &MBB) {
1047fe6060f1SDimitry Andric   bool Changed = false;
1048fe6060f1SDimitry Andric   for (MachineInstr &MI : MBB.instrs()) {
1049fe6060f1SDimitry Andric     if (MI.getOpcode() != ARM::t2DoLoopStart)
1050fe6060f1SDimitry Andric       continue;
1051fe6060f1SDimitry Andric     Register R = MI.getOperand(1).getReg();
1052fe6060f1SDimitry Andric     MachineFunction *MF = MI.getParent()->getParent();
1053fe6060f1SDimitry Andric     MF->getRegInfo().setRegAllocationHint(R, ARMRI::RegLR, 0);
1054fe6060f1SDimitry Andric     Changed = true;
1055fe6060f1SDimitry Andric   }
1056fe6060f1SDimitry Andric   return Changed;
1057fe6060f1SDimitry Andric }
1058fe6060f1SDimitry Andric 
1059fe6060f1SDimitry Andric bool MVETPAndVPTOptimisations::runOnMachineFunction(MachineFunction &Fn) {
106081ad6265SDimitry Andric   const ARMSubtarget &STI = Fn.getSubtarget<ARMSubtarget>();
1061fe6060f1SDimitry Andric 
1062fe6060f1SDimitry Andric   if (!STI.isThumb2() || !STI.hasLOB())
1063fe6060f1SDimitry Andric     return false;
1064fe6060f1SDimitry Andric 
1065fe6060f1SDimitry Andric   TII = static_cast<const Thumb2InstrInfo *>(STI.getInstrInfo());
1066fe6060f1SDimitry Andric   MRI = &Fn.getRegInfo();
1067*0fca6ea1SDimitry Andric   MachineLoopInfo *MLI = &getAnalysis<MachineLoopInfoWrapperPass>().getLI();
1068*0fca6ea1SDimitry Andric   MachineDominatorTree *DT =
1069*0fca6ea1SDimitry Andric       &getAnalysis<MachineDominatorTreeWrapperPass>().getDomTree();
1070fe6060f1SDimitry Andric 
1071fe6060f1SDimitry Andric   LLVM_DEBUG(dbgs() << "********** ARM MVE VPT Optimisations **********\n"
1072fe6060f1SDimitry Andric                     << "********** Function: " << Fn.getName() << '\n');
1073fe6060f1SDimitry Andric 
1074fe6060f1SDimitry Andric   bool Modified = false;
1075*0fca6ea1SDimitry Andric   for (MachineLoop *ML : MLI->getLoopsInPreorder()) {
1076fe6060f1SDimitry Andric     Modified |= LowerWhileLoopStart(ML);
1077fe6060f1SDimitry Andric     Modified |= MergeLoopEnd(ML);
1078fe6060f1SDimitry Andric     Modified |= ConvertTailPredLoop(ML, DT);
1079fe6060f1SDimitry Andric   }
1080fe6060f1SDimitry Andric 
1081fe6060f1SDimitry Andric   for (MachineBasicBlock &MBB : Fn) {
1082fe6060f1SDimitry Andric     Modified |= HintDoLoopStartReg(MBB);
1083fe6060f1SDimitry Andric     Modified |= ReplaceConstByVPNOTs(MBB, DT);
1084fe6060f1SDimitry Andric     Modified |= ReplaceVCMPsByVPNOTs(MBB);
1085fe6060f1SDimitry Andric     Modified |= ReduceOldVCCRValueUses(MBB);
1086fe6060f1SDimitry Andric     Modified |= ConvertVPSEL(MBB);
1087fe6060f1SDimitry Andric   }
1088fe6060f1SDimitry Andric 
1089fe6060f1SDimitry Andric   LLVM_DEBUG(dbgs() << "**************************************\n");
1090fe6060f1SDimitry Andric   return Modified;
1091fe6060f1SDimitry Andric }
1092fe6060f1SDimitry Andric 
1093fe6060f1SDimitry Andric /// createMVETPAndVPTOptimisationsPass
1094fe6060f1SDimitry Andric FunctionPass *llvm::createMVETPAndVPTOptimisationsPass() {
1095fe6060f1SDimitry Andric   return new MVETPAndVPTOptimisations();
1096fe6060f1SDimitry Andric }
1097