1fe6060f1SDimitry Andric //===-- MVETPAndVPTOptimisationsPass.cpp ----------------------------------===// 2fe6060f1SDimitry Andric // 3fe6060f1SDimitry Andric // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4fe6060f1SDimitry Andric // See https://llvm.org/LICENSE.txt for license information. 5fe6060f1SDimitry Andric // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6fe6060f1SDimitry Andric // 7fe6060f1SDimitry Andric //===----------------------------------------------------------------------===// 8fe6060f1SDimitry Andric // 9fe6060f1SDimitry Andric /// \file This pass does a few optimisations related to Tail predicated loops 10fe6060f1SDimitry Andric /// and MVE VPT blocks before register allocation is performed. For VPT blocks 11fe6060f1SDimitry Andric /// the goal is to maximize the sizes of the blocks that will be created by the 12fe6060f1SDimitry Andric /// MVE VPT Block Insertion pass (which runs after register allocation). For 13fe6060f1SDimitry Andric /// tail predicated loops we transform the loop into something that will 14fe6060f1SDimitry Andric /// hopefully make the backend ARMLowOverheadLoops pass's job easier. 15fe6060f1SDimitry Andric /// 16fe6060f1SDimitry Andric //===----------------------------------------------------------------------===// 17fe6060f1SDimitry Andric 18fe6060f1SDimitry Andric #include "ARM.h" 19fe6060f1SDimitry Andric #include "ARMSubtarget.h" 20fe6060f1SDimitry Andric #include "MCTargetDesc/ARMBaseInfo.h" 21fe6060f1SDimitry Andric #include "MVETailPredUtils.h" 22fe6060f1SDimitry Andric #include "Thumb2InstrInfo.h" 23fe6060f1SDimitry Andric #include "llvm/ADT/SmallVector.h" 24fe6060f1SDimitry Andric #include "llvm/CodeGen/MachineBasicBlock.h" 25fe6060f1SDimitry Andric #include "llvm/CodeGen/MachineDominators.h" 26fe6060f1SDimitry Andric #include "llvm/CodeGen/MachineFunction.h" 27fe6060f1SDimitry Andric #include "llvm/CodeGen/MachineFunctionPass.h" 28fe6060f1SDimitry Andric #include "llvm/CodeGen/MachineInstr.h" 29fe6060f1SDimitry Andric #include "llvm/CodeGen/MachineLoopInfo.h" 30fe6060f1SDimitry Andric #include "llvm/InitializePasses.h" 31fe6060f1SDimitry Andric #include "llvm/Support/Debug.h" 32fe6060f1SDimitry Andric #include <cassert> 33fe6060f1SDimitry Andric 34fe6060f1SDimitry Andric using namespace llvm; 35fe6060f1SDimitry Andric 36fe6060f1SDimitry Andric #define DEBUG_TYPE "arm-mve-vpt-opts" 37fe6060f1SDimitry Andric 38fe6060f1SDimitry Andric static cl::opt<bool> 39fe6060f1SDimitry Andric MergeEndDec("arm-enable-merge-loopenddec", cl::Hidden, 40fe6060f1SDimitry Andric cl::desc("Enable merging Loop End and Dec instructions."), 41fe6060f1SDimitry Andric cl::init(true)); 42fe6060f1SDimitry Andric 43349cc55cSDimitry Andric static cl::opt<bool> 44349cc55cSDimitry Andric SetLRPredicate("arm-set-lr-predicate", cl::Hidden, 45349cc55cSDimitry Andric cl::desc("Enable setting lr as a predicate in tail predication regions."), 46349cc55cSDimitry Andric cl::init(true)); 47349cc55cSDimitry Andric 48fe6060f1SDimitry Andric namespace { 49fe6060f1SDimitry Andric class MVETPAndVPTOptimisations : public MachineFunctionPass { 50fe6060f1SDimitry Andric public: 51fe6060f1SDimitry Andric static char ID; 52fe6060f1SDimitry Andric const Thumb2InstrInfo *TII; 53fe6060f1SDimitry Andric MachineRegisterInfo *MRI; 54fe6060f1SDimitry Andric 55fe6060f1SDimitry Andric MVETPAndVPTOptimisations() : MachineFunctionPass(ID) {} 56fe6060f1SDimitry Andric 57fe6060f1SDimitry Andric bool runOnMachineFunction(MachineFunction &Fn) override; 58fe6060f1SDimitry Andric 59fe6060f1SDimitry Andric void getAnalysisUsage(AnalysisUsage &AU) const override { 60*0fca6ea1SDimitry Andric AU.addRequired<MachineLoopInfoWrapperPass>(); 61*0fca6ea1SDimitry Andric AU.addPreserved<MachineLoopInfoWrapperPass>(); 62*0fca6ea1SDimitry Andric AU.addRequired<MachineDominatorTreeWrapperPass>(); 63*0fca6ea1SDimitry Andric AU.addPreserved<MachineDominatorTreeWrapperPass>(); 64fe6060f1SDimitry Andric MachineFunctionPass::getAnalysisUsage(AU); 65fe6060f1SDimitry Andric } 66fe6060f1SDimitry Andric 67fe6060f1SDimitry Andric StringRef getPassName() const override { 68fe6060f1SDimitry Andric return "ARM MVE TailPred and VPT Optimisation Pass"; 69fe6060f1SDimitry Andric } 70fe6060f1SDimitry Andric 71fe6060f1SDimitry Andric private: 72fe6060f1SDimitry Andric bool LowerWhileLoopStart(MachineLoop *ML); 73fe6060f1SDimitry Andric bool MergeLoopEnd(MachineLoop *ML); 74fe6060f1SDimitry Andric bool ConvertTailPredLoop(MachineLoop *ML, MachineDominatorTree *DT); 75fe6060f1SDimitry Andric MachineInstr &ReplaceRegisterUseWithVPNOT(MachineBasicBlock &MBB, 76fe6060f1SDimitry Andric MachineInstr &Instr, 77fe6060f1SDimitry Andric MachineOperand &User, 78fe6060f1SDimitry Andric Register Target); 79fe6060f1SDimitry Andric bool ReduceOldVCCRValueUses(MachineBasicBlock &MBB); 80fe6060f1SDimitry Andric bool ReplaceVCMPsByVPNOTs(MachineBasicBlock &MBB); 81fe6060f1SDimitry Andric bool ReplaceConstByVPNOTs(MachineBasicBlock &MBB, MachineDominatorTree *DT); 82fe6060f1SDimitry Andric bool ConvertVPSEL(MachineBasicBlock &MBB); 83fe6060f1SDimitry Andric bool HintDoLoopStartReg(MachineBasicBlock &MBB); 84fe6060f1SDimitry Andric MachineInstr *CheckForLRUseInPredecessors(MachineBasicBlock *PreHeader, 85fe6060f1SDimitry Andric MachineInstr *LoopStart); 86fe6060f1SDimitry Andric }; 87fe6060f1SDimitry Andric 88fe6060f1SDimitry Andric char MVETPAndVPTOptimisations::ID = 0; 89fe6060f1SDimitry Andric 90fe6060f1SDimitry Andric } // end anonymous namespace 91fe6060f1SDimitry Andric 92fe6060f1SDimitry Andric INITIALIZE_PASS_BEGIN(MVETPAndVPTOptimisations, DEBUG_TYPE, 93fe6060f1SDimitry Andric "ARM MVE TailPred and VPT Optimisations pass", false, 94fe6060f1SDimitry Andric false) 95*0fca6ea1SDimitry Andric INITIALIZE_PASS_DEPENDENCY(MachineLoopInfoWrapperPass) 96*0fca6ea1SDimitry Andric INITIALIZE_PASS_DEPENDENCY(MachineDominatorTreeWrapperPass) 97fe6060f1SDimitry Andric INITIALIZE_PASS_END(MVETPAndVPTOptimisations, DEBUG_TYPE, 98fe6060f1SDimitry Andric "ARM MVE TailPred and VPT Optimisations pass", false, false) 99fe6060f1SDimitry Andric 100fe6060f1SDimitry Andric static MachineInstr *LookThroughCOPY(MachineInstr *MI, 101fe6060f1SDimitry Andric MachineRegisterInfo *MRI) { 102fe6060f1SDimitry Andric while (MI && MI->getOpcode() == TargetOpcode::COPY && 103fe6060f1SDimitry Andric MI->getOperand(1).getReg().isVirtual()) 104fe6060f1SDimitry Andric MI = MRI->getVRegDef(MI->getOperand(1).getReg()); 105fe6060f1SDimitry Andric return MI; 106fe6060f1SDimitry Andric } 107fe6060f1SDimitry Andric 108fe6060f1SDimitry Andric // Given a loop ML, this attempts to find the t2LoopEnd, t2LoopDec and 109fe6060f1SDimitry Andric // corresponding PHI that make up a low overhead loop. Only handles 'do' loops 110fe6060f1SDimitry Andric // at the moment, returning a t2DoLoopStart in LoopStart. 111fe6060f1SDimitry Andric static bool findLoopComponents(MachineLoop *ML, MachineRegisterInfo *MRI, 112fe6060f1SDimitry Andric MachineInstr *&LoopStart, MachineInstr *&LoopPhi, 113fe6060f1SDimitry Andric MachineInstr *&LoopDec, MachineInstr *&LoopEnd) { 114fe6060f1SDimitry Andric MachineBasicBlock *Header = ML->getHeader(); 115fe6060f1SDimitry Andric MachineBasicBlock *Latch = ML->getLoopLatch(); 116fe6060f1SDimitry Andric if (!Header || !Latch) { 117fe6060f1SDimitry Andric LLVM_DEBUG(dbgs() << " no Loop Latch or Header\n"); 118fe6060f1SDimitry Andric return false; 119fe6060f1SDimitry Andric } 120fe6060f1SDimitry Andric 121fe6060f1SDimitry Andric // Find the loop end from the terminators. 122fe6060f1SDimitry Andric LoopEnd = nullptr; 123fe6060f1SDimitry Andric for (auto &T : Latch->terminators()) { 124fe6060f1SDimitry Andric if (T.getOpcode() == ARM::t2LoopEnd && T.getOperand(1).getMBB() == Header) { 125fe6060f1SDimitry Andric LoopEnd = &T; 126fe6060f1SDimitry Andric break; 127fe6060f1SDimitry Andric } 128fe6060f1SDimitry Andric if (T.getOpcode() == ARM::t2LoopEndDec && 129fe6060f1SDimitry Andric T.getOperand(2).getMBB() == Header) { 130fe6060f1SDimitry Andric LoopEnd = &T; 131fe6060f1SDimitry Andric break; 132fe6060f1SDimitry Andric } 133fe6060f1SDimitry Andric } 134fe6060f1SDimitry Andric if (!LoopEnd) { 135fe6060f1SDimitry Andric LLVM_DEBUG(dbgs() << " no LoopEnd\n"); 136fe6060f1SDimitry Andric return false; 137fe6060f1SDimitry Andric } 138fe6060f1SDimitry Andric LLVM_DEBUG(dbgs() << " found loop end: " << *LoopEnd); 139fe6060f1SDimitry Andric 140fe6060f1SDimitry Andric // Find the dec from the use of the end. There may be copies between 141fe6060f1SDimitry Andric // instructions. We expect the loop to loop like: 142fe6060f1SDimitry Andric // $vs = t2DoLoopStart ... 143fe6060f1SDimitry Andric // loop: 144fe6060f1SDimitry Andric // $vp = phi [ $vs ], [ $vd ] 145fe6060f1SDimitry Andric // ... 146fe6060f1SDimitry Andric // $vd = t2LoopDec $vp 147fe6060f1SDimitry Andric // ... 148fe6060f1SDimitry Andric // t2LoopEnd $vd, loop 149fe6060f1SDimitry Andric if (LoopEnd->getOpcode() == ARM::t2LoopEndDec) 150fe6060f1SDimitry Andric LoopDec = LoopEnd; 151fe6060f1SDimitry Andric else { 152fe6060f1SDimitry Andric LoopDec = 153fe6060f1SDimitry Andric LookThroughCOPY(MRI->getVRegDef(LoopEnd->getOperand(0).getReg()), MRI); 154fe6060f1SDimitry Andric if (!LoopDec || LoopDec->getOpcode() != ARM::t2LoopDec) { 155fe6060f1SDimitry Andric LLVM_DEBUG(dbgs() << " didn't find LoopDec where we expected!\n"); 156fe6060f1SDimitry Andric return false; 157fe6060f1SDimitry Andric } 158fe6060f1SDimitry Andric } 159fe6060f1SDimitry Andric LLVM_DEBUG(dbgs() << " found loop dec: " << *LoopDec); 160fe6060f1SDimitry Andric 161fe6060f1SDimitry Andric LoopPhi = 162fe6060f1SDimitry Andric LookThroughCOPY(MRI->getVRegDef(LoopDec->getOperand(1).getReg()), MRI); 163fe6060f1SDimitry Andric if (!LoopPhi || LoopPhi->getOpcode() != TargetOpcode::PHI || 164fe6060f1SDimitry Andric LoopPhi->getNumOperands() != 5 || 165fe6060f1SDimitry Andric (LoopPhi->getOperand(2).getMBB() != Latch && 166fe6060f1SDimitry Andric LoopPhi->getOperand(4).getMBB() != Latch)) { 167fe6060f1SDimitry Andric LLVM_DEBUG(dbgs() << " didn't find PHI where we expected!\n"); 168fe6060f1SDimitry Andric return false; 169fe6060f1SDimitry Andric } 170fe6060f1SDimitry Andric LLVM_DEBUG(dbgs() << " found loop phi: " << *LoopPhi); 171fe6060f1SDimitry Andric 172fe6060f1SDimitry Andric Register StartReg = LoopPhi->getOperand(2).getMBB() == Latch 173fe6060f1SDimitry Andric ? LoopPhi->getOperand(3).getReg() 174fe6060f1SDimitry Andric : LoopPhi->getOperand(1).getReg(); 175fe6060f1SDimitry Andric LoopStart = LookThroughCOPY(MRI->getVRegDef(StartReg), MRI); 176fe6060f1SDimitry Andric if (!LoopStart || (LoopStart->getOpcode() != ARM::t2DoLoopStart && 177fe6060f1SDimitry Andric LoopStart->getOpcode() != ARM::t2WhileLoopSetup && 178fe6060f1SDimitry Andric LoopStart->getOpcode() != ARM::t2WhileLoopStartLR)) { 179fe6060f1SDimitry Andric LLVM_DEBUG(dbgs() << " didn't find Start where we expected!\n"); 180fe6060f1SDimitry Andric return false; 181fe6060f1SDimitry Andric } 182fe6060f1SDimitry Andric LLVM_DEBUG(dbgs() << " found loop start: " << *LoopStart); 183fe6060f1SDimitry Andric 184fe6060f1SDimitry Andric return true; 185fe6060f1SDimitry Andric } 186fe6060f1SDimitry Andric 187fe6060f1SDimitry Andric static void RevertWhileLoopSetup(MachineInstr *MI, const TargetInstrInfo *TII) { 188fe6060f1SDimitry Andric MachineBasicBlock *MBB = MI->getParent(); 189fe6060f1SDimitry Andric assert(MI->getOpcode() == ARM::t2WhileLoopSetup && 190fe6060f1SDimitry Andric "Only expected a t2WhileLoopSetup in RevertWhileLoopStart!"); 191fe6060f1SDimitry Andric 192fe6060f1SDimitry Andric // Subs 193fe6060f1SDimitry Andric MachineInstrBuilder MIB = 194fe6060f1SDimitry Andric BuildMI(*MBB, MI, MI->getDebugLoc(), TII->get(ARM::t2SUBri)); 195fe6060f1SDimitry Andric MIB.add(MI->getOperand(0)); 196fe6060f1SDimitry Andric MIB.add(MI->getOperand(1)); 197fe6060f1SDimitry Andric MIB.addImm(0); 198fe6060f1SDimitry Andric MIB.addImm(ARMCC::AL); 199fe6060f1SDimitry Andric MIB.addReg(ARM::NoRegister); 200fe6060f1SDimitry Andric MIB.addReg(ARM::CPSR, RegState::Define); 201fe6060f1SDimitry Andric 202fe6060f1SDimitry Andric // Attempt to find a t2WhileLoopStart and revert to a t2Bcc. 203fe6060f1SDimitry Andric for (MachineInstr &I : MBB->terminators()) { 204fe6060f1SDimitry Andric if (I.getOpcode() == ARM::t2WhileLoopStart) { 205fe6060f1SDimitry Andric MachineInstrBuilder MIB = 206fe6060f1SDimitry Andric BuildMI(*MBB, &I, I.getDebugLoc(), TII->get(ARM::t2Bcc)); 207fe6060f1SDimitry Andric MIB.add(MI->getOperand(1)); // branch target 208fe6060f1SDimitry Andric MIB.addImm(ARMCC::EQ); 209fe6060f1SDimitry Andric MIB.addReg(ARM::CPSR); 210fe6060f1SDimitry Andric I.eraseFromParent(); 211fe6060f1SDimitry Andric break; 212fe6060f1SDimitry Andric } 213fe6060f1SDimitry Andric } 214fe6060f1SDimitry Andric 215fe6060f1SDimitry Andric MI->eraseFromParent(); 216fe6060f1SDimitry Andric } 217fe6060f1SDimitry Andric 218fe6060f1SDimitry Andric // The Hardware Loop insertion and ISel Lowering produce the pseudos for the 219fe6060f1SDimitry Andric // start of a while loop: 220fe6060f1SDimitry Andric // %a:gprlr = t2WhileLoopSetup %Cnt 221fe6060f1SDimitry Andric // t2WhileLoopStart %a, %BB 222fe6060f1SDimitry Andric // We want to convert those to a single instruction which, like t2LoopEndDec and 223fe6060f1SDimitry Andric // t2DoLoopStartTP is both a terminator and produces a value: 224fe6060f1SDimitry Andric // %a:grplr: t2WhileLoopStartLR %Cnt, %BB 225fe6060f1SDimitry Andric // 226fe6060f1SDimitry Andric // Otherwise if we can't, we revert the loop. t2WhileLoopSetup and 227fe6060f1SDimitry Andric // t2WhileLoopStart are not valid past regalloc. 228fe6060f1SDimitry Andric bool MVETPAndVPTOptimisations::LowerWhileLoopStart(MachineLoop *ML) { 229fe6060f1SDimitry Andric LLVM_DEBUG(dbgs() << "LowerWhileLoopStart on loop " 230fe6060f1SDimitry Andric << ML->getHeader()->getName() << "\n"); 231fe6060f1SDimitry Andric 232fe6060f1SDimitry Andric MachineInstr *LoopEnd, *LoopPhi, *LoopStart, *LoopDec; 233fe6060f1SDimitry Andric if (!findLoopComponents(ML, MRI, LoopStart, LoopPhi, LoopDec, LoopEnd)) 234fe6060f1SDimitry Andric return false; 235fe6060f1SDimitry Andric 236fe6060f1SDimitry Andric if (LoopStart->getOpcode() != ARM::t2WhileLoopSetup) 237fe6060f1SDimitry Andric return false; 238fe6060f1SDimitry Andric 239fe6060f1SDimitry Andric Register LR = LoopStart->getOperand(0).getReg(); 240fe6060f1SDimitry Andric auto WLSIt = find_if(MRI->use_nodbg_instructions(LR), [](auto &MI) { 241fe6060f1SDimitry Andric return MI.getOpcode() == ARM::t2WhileLoopStart; 242fe6060f1SDimitry Andric }); 243fe6060f1SDimitry Andric if (!MergeEndDec || WLSIt == MRI->use_instr_nodbg_end()) { 244fe6060f1SDimitry Andric RevertWhileLoopSetup(LoopStart, TII); 245fe6060f1SDimitry Andric RevertLoopDec(LoopStart, TII); 246fe6060f1SDimitry Andric RevertLoopEnd(LoopStart, TII); 247fe6060f1SDimitry Andric return true; 248fe6060f1SDimitry Andric } 249fe6060f1SDimitry Andric 250fe6060f1SDimitry Andric MachineInstrBuilder MI = 251fe6060f1SDimitry Andric BuildMI(*WLSIt->getParent(), *WLSIt, WLSIt->getDebugLoc(), 252fe6060f1SDimitry Andric TII->get(ARM::t2WhileLoopStartLR), LR) 253fe6060f1SDimitry Andric .add(LoopStart->getOperand(1)) 254fe6060f1SDimitry Andric .add(WLSIt->getOperand(1)); 255fe6060f1SDimitry Andric (void)MI; 256fe6060f1SDimitry Andric LLVM_DEBUG(dbgs() << "Lowered WhileLoopStart into: " << *MI.getInstr()); 257fe6060f1SDimitry Andric 258fe6060f1SDimitry Andric WLSIt->eraseFromParent(); 259fe6060f1SDimitry Andric LoopStart->eraseFromParent(); 260fe6060f1SDimitry Andric return true; 261fe6060f1SDimitry Andric } 262fe6060f1SDimitry Andric 263fe6060f1SDimitry Andric // Return true if this instruction is invalid in a low overhead loop, usually 264fe6060f1SDimitry Andric // because it clobbers LR. 265fe6060f1SDimitry Andric static bool IsInvalidTPInstruction(MachineInstr &MI) { 266fe6060f1SDimitry Andric return MI.isCall() || isLoopStart(MI); 267fe6060f1SDimitry Andric } 268fe6060f1SDimitry Andric 269fe6060f1SDimitry Andric // Starting from PreHeader, search for invalid instructions back until the 270fe6060f1SDimitry Andric // LoopStart block is reached. If invalid instructions are found, the loop start 271fe6060f1SDimitry Andric // is reverted from a WhileLoopStart to a DoLoopStart on the same loop. Will 272fe6060f1SDimitry Andric // return the new DLS LoopStart if updated. 273fe6060f1SDimitry Andric MachineInstr *MVETPAndVPTOptimisations::CheckForLRUseInPredecessors( 274fe6060f1SDimitry Andric MachineBasicBlock *PreHeader, MachineInstr *LoopStart) { 275fe6060f1SDimitry Andric SmallVector<MachineBasicBlock *> Worklist; 276fe6060f1SDimitry Andric SmallPtrSet<MachineBasicBlock *, 4> Visited; 277fe6060f1SDimitry Andric Worklist.push_back(PreHeader); 278fe6060f1SDimitry Andric Visited.insert(LoopStart->getParent()); 279fe6060f1SDimitry Andric 280fe6060f1SDimitry Andric while (!Worklist.empty()) { 281fe6060f1SDimitry Andric MachineBasicBlock *MBB = Worklist.pop_back_val(); 282fe6060f1SDimitry Andric if (Visited.count(MBB)) 283fe6060f1SDimitry Andric continue; 284fe6060f1SDimitry Andric 285fe6060f1SDimitry Andric for (MachineInstr &MI : *MBB) { 286fe6060f1SDimitry Andric if (!IsInvalidTPInstruction(MI)) 287fe6060f1SDimitry Andric continue; 288fe6060f1SDimitry Andric 289fe6060f1SDimitry Andric LLVM_DEBUG(dbgs() << "Found LR use in predecessors, reverting: " << MI); 290fe6060f1SDimitry Andric 291fe6060f1SDimitry Andric // Create a t2DoLoopStart at the end of the preheader. 292fe6060f1SDimitry Andric MachineInstrBuilder MIB = 293fe6060f1SDimitry Andric BuildMI(*PreHeader, PreHeader->getFirstTerminator(), 294fe6060f1SDimitry Andric LoopStart->getDebugLoc(), TII->get(ARM::t2DoLoopStart)); 295fe6060f1SDimitry Andric MIB.add(LoopStart->getOperand(0)); 296fe6060f1SDimitry Andric MIB.add(LoopStart->getOperand(1)); 297fe6060f1SDimitry Andric 298fe6060f1SDimitry Andric // Make sure to remove the kill flags, to prevent them from being invalid. 299fe6060f1SDimitry Andric LoopStart->getOperand(1).setIsKill(false); 300fe6060f1SDimitry Andric 301fe6060f1SDimitry Andric // Revert the t2WhileLoopStartLR to a CMP and Br. 302fe6060f1SDimitry Andric RevertWhileLoopStartLR(LoopStart, TII, ARM::t2Bcc, true); 303fe6060f1SDimitry Andric return MIB; 304fe6060f1SDimitry Andric } 305fe6060f1SDimitry Andric 306fe6060f1SDimitry Andric Visited.insert(MBB); 307fe6060f1SDimitry Andric for (auto *Pred : MBB->predecessors()) 308fe6060f1SDimitry Andric Worklist.push_back(Pred); 309fe6060f1SDimitry Andric } 310fe6060f1SDimitry Andric return LoopStart; 311fe6060f1SDimitry Andric } 312fe6060f1SDimitry Andric 313fe6060f1SDimitry Andric // This function converts loops with t2LoopEnd and t2LoopEnd instructions into 314fe6060f1SDimitry Andric // a single t2LoopEndDec instruction. To do that it needs to make sure that LR 315fe6060f1SDimitry Andric // will be valid to be used for the low overhead loop, which means nothing else 316fe6060f1SDimitry Andric // is using LR (especially calls) and there are no superfluous copies in the 317fe6060f1SDimitry Andric // loop. The t2LoopEndDec is a branching terminator that produces a value (the 318fe6060f1SDimitry Andric // decrement) around the loop edge, which means we need to be careful that they 319fe6060f1SDimitry Andric // will be valid to allocate without any spilling. 320fe6060f1SDimitry Andric bool MVETPAndVPTOptimisations::MergeLoopEnd(MachineLoop *ML) { 321fe6060f1SDimitry Andric if (!MergeEndDec) 322fe6060f1SDimitry Andric return false; 323fe6060f1SDimitry Andric 324fe6060f1SDimitry Andric LLVM_DEBUG(dbgs() << "MergeLoopEnd on loop " << ML->getHeader()->getName() 325fe6060f1SDimitry Andric << "\n"); 326fe6060f1SDimitry Andric 327fe6060f1SDimitry Andric MachineInstr *LoopEnd, *LoopPhi, *LoopStart, *LoopDec; 328fe6060f1SDimitry Andric if (!findLoopComponents(ML, MRI, LoopStart, LoopPhi, LoopDec, LoopEnd)) 329fe6060f1SDimitry Andric return false; 330fe6060f1SDimitry Andric 331fe6060f1SDimitry Andric // Check if there is an illegal instruction (a call) in the low overhead loop 332fe6060f1SDimitry Andric // and if so revert it now before we get any further. While loops also need to 333fe6060f1SDimitry Andric // check the preheaders, but can be reverted to a DLS loop if needed. 334fe6060f1SDimitry Andric auto *PreHeader = ML->getLoopPreheader(); 335fe6060f1SDimitry Andric if (LoopStart->getOpcode() == ARM::t2WhileLoopStartLR && PreHeader) 336fe6060f1SDimitry Andric LoopStart = CheckForLRUseInPredecessors(PreHeader, LoopStart); 337fe6060f1SDimitry Andric 338fe6060f1SDimitry Andric for (MachineBasicBlock *MBB : ML->blocks()) { 339fe6060f1SDimitry Andric for (MachineInstr &MI : *MBB) { 340fe6060f1SDimitry Andric if (IsInvalidTPInstruction(MI)) { 341fe6060f1SDimitry Andric LLVM_DEBUG(dbgs() << "Found LR use in loop, reverting: " << MI); 342fe6060f1SDimitry Andric if (LoopStart->getOpcode() == ARM::t2DoLoopStart) 343fe6060f1SDimitry Andric RevertDoLoopStart(LoopStart, TII); 344fe6060f1SDimitry Andric else 345fe6060f1SDimitry Andric RevertWhileLoopStartLR(LoopStart, TII); 346fe6060f1SDimitry Andric RevertLoopDec(LoopDec, TII); 347fe6060f1SDimitry Andric RevertLoopEnd(LoopEnd, TII); 348fe6060f1SDimitry Andric return true; 349fe6060f1SDimitry Andric } 350fe6060f1SDimitry Andric } 351fe6060f1SDimitry Andric } 352fe6060f1SDimitry Andric 353fe6060f1SDimitry Andric // Remove any copies from the loop, to ensure the phi that remains is both 354fe6060f1SDimitry Andric // simpler and contains no extra uses. Because t2LoopEndDec is a terminator 355fe6060f1SDimitry Andric // that cannot spill, we need to be careful what remains in the loop. 356fe6060f1SDimitry Andric Register PhiReg = LoopPhi->getOperand(0).getReg(); 357fe6060f1SDimitry Andric Register DecReg = LoopDec->getOperand(0).getReg(); 358fe6060f1SDimitry Andric Register StartReg = LoopStart->getOperand(0).getReg(); 359fe6060f1SDimitry Andric // Ensure the uses are expected, and collect any copies we want to remove. 360fe6060f1SDimitry Andric SmallVector<MachineInstr *, 4> Copies; 361fe6060f1SDimitry Andric auto CheckUsers = [&Copies](Register BaseReg, 362fe6060f1SDimitry Andric ArrayRef<MachineInstr *> ExpectedUsers, 363fe6060f1SDimitry Andric MachineRegisterInfo *MRI) { 364fe6060f1SDimitry Andric SmallVector<Register, 4> Worklist; 365fe6060f1SDimitry Andric Worklist.push_back(BaseReg); 366fe6060f1SDimitry Andric while (!Worklist.empty()) { 367fe6060f1SDimitry Andric Register Reg = Worklist.pop_back_val(); 368fe6060f1SDimitry Andric for (MachineInstr &MI : MRI->use_nodbg_instructions(Reg)) { 3690eae32dcSDimitry Andric if (llvm::is_contained(ExpectedUsers, &MI)) 370fe6060f1SDimitry Andric continue; 371fe6060f1SDimitry Andric if (MI.getOpcode() != TargetOpcode::COPY || 372fe6060f1SDimitry Andric !MI.getOperand(0).getReg().isVirtual()) { 373fe6060f1SDimitry Andric LLVM_DEBUG(dbgs() << "Extra users of register found: " << MI); 374fe6060f1SDimitry Andric return false; 375fe6060f1SDimitry Andric } 376fe6060f1SDimitry Andric Worklist.push_back(MI.getOperand(0).getReg()); 377fe6060f1SDimitry Andric Copies.push_back(&MI); 378fe6060f1SDimitry Andric } 379fe6060f1SDimitry Andric } 380fe6060f1SDimitry Andric return true; 381fe6060f1SDimitry Andric }; 382fe6060f1SDimitry Andric if (!CheckUsers(PhiReg, {LoopDec}, MRI) || 383fe6060f1SDimitry Andric !CheckUsers(DecReg, {LoopPhi, LoopEnd}, MRI) || 384fe6060f1SDimitry Andric !CheckUsers(StartReg, {LoopPhi}, MRI)) { 385fe6060f1SDimitry Andric // Don't leave a t2WhileLoopStartLR without the LoopDecEnd. 386fe6060f1SDimitry Andric if (LoopStart->getOpcode() == ARM::t2WhileLoopStartLR) { 387fe6060f1SDimitry Andric RevertWhileLoopStartLR(LoopStart, TII); 388fe6060f1SDimitry Andric RevertLoopDec(LoopDec, TII); 389fe6060f1SDimitry Andric RevertLoopEnd(LoopEnd, TII); 390fe6060f1SDimitry Andric return true; 391fe6060f1SDimitry Andric } 392fe6060f1SDimitry Andric return false; 393fe6060f1SDimitry Andric } 394fe6060f1SDimitry Andric 395fe6060f1SDimitry Andric MRI->constrainRegClass(StartReg, &ARM::GPRlrRegClass); 396fe6060f1SDimitry Andric MRI->constrainRegClass(PhiReg, &ARM::GPRlrRegClass); 397fe6060f1SDimitry Andric MRI->constrainRegClass(DecReg, &ARM::GPRlrRegClass); 398fe6060f1SDimitry Andric 399fe6060f1SDimitry Andric if (LoopPhi->getOperand(2).getMBB() == ML->getLoopLatch()) { 400fe6060f1SDimitry Andric LoopPhi->getOperand(3).setReg(StartReg); 401fe6060f1SDimitry Andric LoopPhi->getOperand(1).setReg(DecReg); 402fe6060f1SDimitry Andric } else { 403fe6060f1SDimitry Andric LoopPhi->getOperand(1).setReg(StartReg); 404fe6060f1SDimitry Andric LoopPhi->getOperand(3).setReg(DecReg); 405fe6060f1SDimitry Andric } 406fe6060f1SDimitry Andric 40781ad6265SDimitry Andric SmallVector<MachineOperand, 4> Cond; // For analyzeBranch. 40881ad6265SDimitry Andric MachineBasicBlock *TBB = nullptr, *FBB = nullptr; // For analyzeBranch. 40981ad6265SDimitry Andric if (!TII->analyzeBranch(*LoopEnd->getParent(), TBB, FBB, Cond) && !FBB) { 41081ad6265SDimitry Andric // If the LoopEnd falls through, need to insert a t2B to the fall-through 41181ad6265SDimitry Andric // block so that the non-analyzable t2LoopEndDec doesn't fall through. 41281ad6265SDimitry Andric MachineFunction::iterator MBBI = ++LoopEnd->getParent()->getIterator(); 41381ad6265SDimitry Andric BuildMI(LoopEnd->getParent(), DebugLoc(), TII->get(ARM::t2B)) 41481ad6265SDimitry Andric .addMBB(&*MBBI) 41581ad6265SDimitry Andric .add(predOps(ARMCC::AL)); 41681ad6265SDimitry Andric } 41781ad6265SDimitry Andric 418fe6060f1SDimitry Andric // Replace the loop dec and loop end as a single instruction. 419fe6060f1SDimitry Andric MachineInstrBuilder MI = 420fe6060f1SDimitry Andric BuildMI(*LoopEnd->getParent(), *LoopEnd, LoopEnd->getDebugLoc(), 421fe6060f1SDimitry Andric TII->get(ARM::t2LoopEndDec), DecReg) 422fe6060f1SDimitry Andric .addReg(PhiReg) 423fe6060f1SDimitry Andric .add(LoopEnd->getOperand(1)); 424fe6060f1SDimitry Andric (void)MI; 425fe6060f1SDimitry Andric LLVM_DEBUG(dbgs() << "Merged LoopDec and End into: " << *MI.getInstr()); 426fe6060f1SDimitry Andric 427fe6060f1SDimitry Andric LoopDec->eraseFromParent(); 428fe6060f1SDimitry Andric LoopEnd->eraseFromParent(); 429fe6060f1SDimitry Andric for (auto *MI : Copies) 430fe6060f1SDimitry Andric MI->eraseFromParent(); 431fe6060f1SDimitry Andric return true; 432fe6060f1SDimitry Andric } 433fe6060f1SDimitry Andric 434fe6060f1SDimitry Andric // Convert t2DoLoopStart to t2DoLoopStartTP if the loop contains VCTP 435fe6060f1SDimitry Andric // instructions. This keeps the VCTP count reg operand on the t2DoLoopStartTP 436fe6060f1SDimitry Andric // instruction, making the backend ARMLowOverheadLoops passes job of finding the 437fe6060f1SDimitry Andric // VCTP operand much simpler. 438fe6060f1SDimitry Andric bool MVETPAndVPTOptimisations::ConvertTailPredLoop(MachineLoop *ML, 439fe6060f1SDimitry Andric MachineDominatorTree *DT) { 440fe6060f1SDimitry Andric LLVM_DEBUG(dbgs() << "ConvertTailPredLoop on loop " 441fe6060f1SDimitry Andric << ML->getHeader()->getName() << "\n"); 442fe6060f1SDimitry Andric 443fe6060f1SDimitry Andric // Find some loop components including the LoopEnd/Dec/Start, and any VCTP's 444fe6060f1SDimitry Andric // in the loop. 445fe6060f1SDimitry Andric MachineInstr *LoopEnd, *LoopPhi, *LoopStart, *LoopDec; 446fe6060f1SDimitry Andric if (!findLoopComponents(ML, MRI, LoopStart, LoopPhi, LoopDec, LoopEnd)) 447fe6060f1SDimitry Andric return false; 448fe6060f1SDimitry Andric if (LoopDec != LoopEnd || (LoopStart->getOpcode() != ARM::t2DoLoopStart && 449fe6060f1SDimitry Andric LoopStart->getOpcode() != ARM::t2WhileLoopStartLR)) 450fe6060f1SDimitry Andric return false; 451fe6060f1SDimitry Andric 452fe6060f1SDimitry Andric SmallVector<MachineInstr *, 4> VCTPs; 453349cc55cSDimitry Andric SmallVector<MachineInstr *, 4> MVEInstrs; 454349cc55cSDimitry Andric for (MachineBasicBlock *BB : ML->blocks()) { 455fe6060f1SDimitry Andric for (MachineInstr &MI : *BB) 456fe6060f1SDimitry Andric if (isVCTP(&MI)) 457fe6060f1SDimitry Andric VCTPs.push_back(&MI); 458349cc55cSDimitry Andric else if (findFirstVPTPredOperandIdx(MI) != -1) 459349cc55cSDimitry Andric MVEInstrs.push_back(&MI); 460349cc55cSDimitry Andric } 461fe6060f1SDimitry Andric 462fe6060f1SDimitry Andric if (VCTPs.empty()) { 463fe6060f1SDimitry Andric LLVM_DEBUG(dbgs() << " no VCTPs\n"); 464fe6060f1SDimitry Andric return false; 465fe6060f1SDimitry Andric } 466fe6060f1SDimitry Andric 467fe6060f1SDimitry Andric // Check all VCTPs are the same. 468fe6060f1SDimitry Andric MachineInstr *FirstVCTP = *VCTPs.begin(); 469fe6060f1SDimitry Andric for (MachineInstr *VCTP : VCTPs) { 470fe6060f1SDimitry Andric LLVM_DEBUG(dbgs() << " with VCTP " << *VCTP); 471fe6060f1SDimitry Andric if (VCTP->getOpcode() != FirstVCTP->getOpcode() || 472fe6060f1SDimitry Andric VCTP->getOperand(0).getReg() != FirstVCTP->getOperand(0).getReg()) { 473fe6060f1SDimitry Andric LLVM_DEBUG(dbgs() << " VCTP's are not identical\n"); 474fe6060f1SDimitry Andric return false; 475fe6060f1SDimitry Andric } 476fe6060f1SDimitry Andric } 477fe6060f1SDimitry Andric 478fe6060f1SDimitry Andric // Check for the register being used can be setup before the loop. We expect 479fe6060f1SDimitry Andric // this to be: 480fe6060f1SDimitry Andric // $vx = ... 481fe6060f1SDimitry Andric // loop: 482fe6060f1SDimitry Andric // $vp = PHI [ $vx ], [ $vd ] 483fe6060f1SDimitry Andric // .. 484fe6060f1SDimitry Andric // $vpr = VCTP $vp 485fe6060f1SDimitry Andric // .. 486fe6060f1SDimitry Andric // $vd = t2SUBri $vp, #n 487fe6060f1SDimitry Andric // .. 488fe6060f1SDimitry Andric Register CountReg = FirstVCTP->getOperand(1).getReg(); 489fe6060f1SDimitry Andric if (!CountReg.isVirtual()) { 490fe6060f1SDimitry Andric LLVM_DEBUG(dbgs() << " cannot determine VCTP PHI\n"); 491fe6060f1SDimitry Andric return false; 492fe6060f1SDimitry Andric } 493fe6060f1SDimitry Andric MachineInstr *Phi = LookThroughCOPY(MRI->getVRegDef(CountReg), MRI); 494fe6060f1SDimitry Andric if (!Phi || Phi->getOpcode() != TargetOpcode::PHI || 495fe6060f1SDimitry Andric Phi->getNumOperands() != 5 || 496fe6060f1SDimitry Andric (Phi->getOperand(2).getMBB() != ML->getLoopLatch() && 497fe6060f1SDimitry Andric Phi->getOperand(4).getMBB() != ML->getLoopLatch())) { 498fe6060f1SDimitry Andric LLVM_DEBUG(dbgs() << " cannot determine VCTP Count\n"); 499fe6060f1SDimitry Andric return false; 500fe6060f1SDimitry Andric } 501fe6060f1SDimitry Andric CountReg = Phi->getOperand(2).getMBB() == ML->getLoopLatch() 502fe6060f1SDimitry Andric ? Phi->getOperand(3).getReg() 503fe6060f1SDimitry Andric : Phi->getOperand(1).getReg(); 504fe6060f1SDimitry Andric 505fe6060f1SDimitry Andric // Replace the t2DoLoopStart with the t2DoLoopStartTP, move it to the end of 506fe6060f1SDimitry Andric // the preheader and add the new CountReg to it. We attempt to place it late 507fe6060f1SDimitry Andric // in the preheader, but may need to move that earlier based on uses. 508fe6060f1SDimitry Andric MachineBasicBlock *MBB = LoopStart->getParent(); 509fe6060f1SDimitry Andric MachineBasicBlock::iterator InsertPt = MBB->getFirstTerminator(); 510fe6060f1SDimitry Andric for (MachineInstr &Use : 511fe6060f1SDimitry Andric MRI->use_instructions(LoopStart->getOperand(0).getReg())) 512fe6060f1SDimitry Andric if ((InsertPt != MBB->end() && !DT->dominates(&*InsertPt, &Use)) || 513fe6060f1SDimitry Andric !DT->dominates(ML->getHeader(), Use.getParent())) { 514fe6060f1SDimitry Andric LLVM_DEBUG(dbgs() << " InsertPt could not be a terminator!\n"); 515fe6060f1SDimitry Andric return false; 516fe6060f1SDimitry Andric } 517fe6060f1SDimitry Andric 518fe6060f1SDimitry Andric unsigned NewOpc = LoopStart->getOpcode() == ARM::t2DoLoopStart 519fe6060f1SDimitry Andric ? ARM::t2DoLoopStartTP 520fe6060f1SDimitry Andric : ARM::t2WhileLoopStartTP; 521fe6060f1SDimitry Andric MachineInstrBuilder MI = 522fe6060f1SDimitry Andric BuildMI(*MBB, InsertPt, LoopStart->getDebugLoc(), TII->get(NewOpc)) 523fe6060f1SDimitry Andric .add(LoopStart->getOperand(0)) 524fe6060f1SDimitry Andric .add(LoopStart->getOperand(1)) 525fe6060f1SDimitry Andric .addReg(CountReg); 526fe6060f1SDimitry Andric if (NewOpc == ARM::t2WhileLoopStartTP) 527fe6060f1SDimitry Andric MI.add(LoopStart->getOperand(2)); 528fe6060f1SDimitry Andric LLVM_DEBUG(dbgs() << "Replacing " << *LoopStart << " with " 529fe6060f1SDimitry Andric << *MI.getInstr()); 530fe6060f1SDimitry Andric MRI->constrainRegClass(CountReg, &ARM::rGPRRegClass); 531fe6060f1SDimitry Andric LoopStart->eraseFromParent(); 532fe6060f1SDimitry Andric 533349cc55cSDimitry Andric if (SetLRPredicate) { 534349cc55cSDimitry Andric // Each instruction in the loop needs to be using LR as the predicate from 535349cc55cSDimitry Andric // the Phi as the predicate. 536349cc55cSDimitry Andric Register LR = LoopPhi->getOperand(0).getReg(); 537349cc55cSDimitry Andric for (MachineInstr *MI : MVEInstrs) { 538349cc55cSDimitry Andric int Idx = findFirstVPTPredOperandIdx(*MI); 539349cc55cSDimitry Andric MI->getOperand(Idx + 2).setReg(LR); 540349cc55cSDimitry Andric } 541349cc55cSDimitry Andric } 542349cc55cSDimitry Andric 543fe6060f1SDimitry Andric return true; 544fe6060f1SDimitry Andric } 545fe6060f1SDimitry Andric 546fe6060f1SDimitry Andric // Returns true if Opcode is any VCMP Opcode. 547fe6060f1SDimitry Andric static bool IsVCMP(unsigned Opcode) { return VCMPOpcodeToVPT(Opcode) != 0; } 548fe6060f1SDimitry Andric 549fe6060f1SDimitry Andric // Returns true if a VCMP with this Opcode can have its operands swapped. 550fe6060f1SDimitry Andric // There is 2 kind of VCMP that can't have their operands swapped: Float VCMPs, 551fe6060f1SDimitry Andric // and VCMPr instructions (since the r is always on the right). 552fe6060f1SDimitry Andric static bool CanHaveSwappedOperands(unsigned Opcode) { 553fe6060f1SDimitry Andric switch (Opcode) { 554fe6060f1SDimitry Andric default: 555fe6060f1SDimitry Andric return true; 556fe6060f1SDimitry Andric case ARM::MVE_VCMPf32: 557fe6060f1SDimitry Andric case ARM::MVE_VCMPf16: 558fe6060f1SDimitry Andric case ARM::MVE_VCMPf32r: 559fe6060f1SDimitry Andric case ARM::MVE_VCMPf16r: 560fe6060f1SDimitry Andric case ARM::MVE_VCMPi8r: 561fe6060f1SDimitry Andric case ARM::MVE_VCMPi16r: 562fe6060f1SDimitry Andric case ARM::MVE_VCMPi32r: 563fe6060f1SDimitry Andric case ARM::MVE_VCMPu8r: 564fe6060f1SDimitry Andric case ARM::MVE_VCMPu16r: 565fe6060f1SDimitry Andric case ARM::MVE_VCMPu32r: 566fe6060f1SDimitry Andric case ARM::MVE_VCMPs8r: 567fe6060f1SDimitry Andric case ARM::MVE_VCMPs16r: 568fe6060f1SDimitry Andric case ARM::MVE_VCMPs32r: 569fe6060f1SDimitry Andric return false; 570fe6060f1SDimitry Andric } 571fe6060f1SDimitry Andric } 572fe6060f1SDimitry Andric 573fe6060f1SDimitry Andric // Returns the CondCode of a VCMP Instruction. 574fe6060f1SDimitry Andric static ARMCC::CondCodes GetCondCode(MachineInstr &Instr) { 575fe6060f1SDimitry Andric assert(IsVCMP(Instr.getOpcode()) && "Inst must be a VCMP"); 576fe6060f1SDimitry Andric return ARMCC::CondCodes(Instr.getOperand(3).getImm()); 577fe6060f1SDimitry Andric } 578fe6060f1SDimitry Andric 579fe6060f1SDimitry Andric // Returns true if Cond is equivalent to a VPNOT instruction on the result of 580fe6060f1SDimitry Andric // Prev. Cond and Prev must be VCMPs. 581fe6060f1SDimitry Andric static bool IsVPNOTEquivalent(MachineInstr &Cond, MachineInstr &Prev) { 582fe6060f1SDimitry Andric assert(IsVCMP(Cond.getOpcode()) && IsVCMP(Prev.getOpcode())); 583fe6060f1SDimitry Andric 584fe6060f1SDimitry Andric // Opcodes must match. 585fe6060f1SDimitry Andric if (Cond.getOpcode() != Prev.getOpcode()) 586fe6060f1SDimitry Andric return false; 587fe6060f1SDimitry Andric 588fe6060f1SDimitry Andric MachineOperand &CondOP1 = Cond.getOperand(1), &CondOP2 = Cond.getOperand(2); 589fe6060f1SDimitry Andric MachineOperand &PrevOP1 = Prev.getOperand(1), &PrevOP2 = Prev.getOperand(2); 590fe6060f1SDimitry Andric 591fe6060f1SDimitry Andric // If the VCMP has the opposite condition with the same operands, we can 592fe6060f1SDimitry Andric // replace it with a VPNOT 593fe6060f1SDimitry Andric ARMCC::CondCodes ExpectedCode = GetCondCode(Cond); 594fe6060f1SDimitry Andric ExpectedCode = ARMCC::getOppositeCondition(ExpectedCode); 595fe6060f1SDimitry Andric if (ExpectedCode == GetCondCode(Prev)) 596fe6060f1SDimitry Andric if (CondOP1.isIdenticalTo(PrevOP1) && CondOP2.isIdenticalTo(PrevOP2)) 597fe6060f1SDimitry Andric return true; 598fe6060f1SDimitry Andric // Check again with operands swapped if possible 599fe6060f1SDimitry Andric if (!CanHaveSwappedOperands(Cond.getOpcode())) 600fe6060f1SDimitry Andric return false; 601fe6060f1SDimitry Andric ExpectedCode = ARMCC::getSwappedCondition(ExpectedCode); 602fe6060f1SDimitry Andric return ExpectedCode == GetCondCode(Prev) && CondOP1.isIdenticalTo(PrevOP2) && 603fe6060f1SDimitry Andric CondOP2.isIdenticalTo(PrevOP1); 604fe6060f1SDimitry Andric } 605fe6060f1SDimitry Andric 606fe6060f1SDimitry Andric // Returns true if Instr writes to VCCR. 607fe6060f1SDimitry Andric static bool IsWritingToVCCR(MachineInstr &Instr) { 608fe6060f1SDimitry Andric if (Instr.getNumOperands() == 0) 609fe6060f1SDimitry Andric return false; 610fe6060f1SDimitry Andric MachineOperand &Dst = Instr.getOperand(0); 611fe6060f1SDimitry Andric if (!Dst.isReg()) 612fe6060f1SDimitry Andric return false; 613fe6060f1SDimitry Andric Register DstReg = Dst.getReg(); 614fe6060f1SDimitry Andric if (!DstReg.isVirtual()) 615fe6060f1SDimitry Andric return false; 616fe6060f1SDimitry Andric MachineRegisterInfo &RegInfo = Instr.getMF()->getRegInfo(); 617fe6060f1SDimitry Andric const TargetRegisterClass *RegClass = RegInfo.getRegClassOrNull(DstReg); 618fe6060f1SDimitry Andric return RegClass && (RegClass->getID() == ARM::VCCRRegClassID); 619fe6060f1SDimitry Andric } 620fe6060f1SDimitry Andric 621fe6060f1SDimitry Andric // Transforms 622fe6060f1SDimitry Andric // <Instr that uses %A ('User' Operand)> 623fe6060f1SDimitry Andric // Into 624fe6060f1SDimitry Andric // %K = VPNOT %Target 625fe6060f1SDimitry Andric // <Instr that uses %K ('User' Operand)> 626fe6060f1SDimitry Andric // And returns the newly inserted VPNOT. 627fe6060f1SDimitry Andric // This optimization is done in the hopes of preventing spills/reloads of VPR by 628fe6060f1SDimitry Andric // reducing the number of VCCR values with overlapping lifetimes. 629fe6060f1SDimitry Andric MachineInstr &MVETPAndVPTOptimisations::ReplaceRegisterUseWithVPNOT( 630fe6060f1SDimitry Andric MachineBasicBlock &MBB, MachineInstr &Instr, MachineOperand &User, 631fe6060f1SDimitry Andric Register Target) { 632fe6060f1SDimitry Andric Register NewResult = MRI->createVirtualRegister(MRI->getRegClass(Target)); 633fe6060f1SDimitry Andric 634fe6060f1SDimitry Andric MachineInstrBuilder MIBuilder = 635fe6060f1SDimitry Andric BuildMI(MBB, &Instr, Instr.getDebugLoc(), TII->get(ARM::MVE_VPNOT)) 636fe6060f1SDimitry Andric .addDef(NewResult) 637fe6060f1SDimitry Andric .addReg(Target); 638fe6060f1SDimitry Andric addUnpredicatedMveVpredNOp(MIBuilder); 639fe6060f1SDimitry Andric 640fe6060f1SDimitry Andric // Make the user use NewResult instead, and clear its kill flag. 641fe6060f1SDimitry Andric User.setReg(NewResult); 642fe6060f1SDimitry Andric User.setIsKill(false); 643fe6060f1SDimitry Andric 644fe6060f1SDimitry Andric LLVM_DEBUG(dbgs() << " Inserting VPNOT (for spill prevention): "; 645fe6060f1SDimitry Andric MIBuilder.getInstr()->dump()); 646fe6060f1SDimitry Andric 647fe6060f1SDimitry Andric return *MIBuilder.getInstr(); 648fe6060f1SDimitry Andric } 649fe6060f1SDimitry Andric 650fe6060f1SDimitry Andric // Moves a VPNOT before its first user if an instruction that uses Reg is found 651fe6060f1SDimitry Andric // in-between the VPNOT and its user. 652fe6060f1SDimitry Andric // Returns true if there is at least one user of the VPNOT in the block. 653fe6060f1SDimitry Andric static bool MoveVPNOTBeforeFirstUser(MachineBasicBlock &MBB, 654fe6060f1SDimitry Andric MachineBasicBlock::iterator Iter, 655fe6060f1SDimitry Andric Register Reg) { 656fe6060f1SDimitry Andric assert(Iter->getOpcode() == ARM::MVE_VPNOT && "Not a VPNOT!"); 657fe6060f1SDimitry Andric assert(getVPTInstrPredicate(*Iter) == ARMVCC::None && 658fe6060f1SDimitry Andric "The VPNOT cannot be predicated"); 659fe6060f1SDimitry Andric 660fe6060f1SDimitry Andric MachineInstr &VPNOT = *Iter; 661fe6060f1SDimitry Andric Register VPNOTResult = VPNOT.getOperand(0).getReg(); 662fe6060f1SDimitry Andric Register VPNOTOperand = VPNOT.getOperand(1).getReg(); 663fe6060f1SDimitry Andric 664fe6060f1SDimitry Andric // Whether the VPNOT will need to be moved, and whether we found a user of the 665fe6060f1SDimitry Andric // VPNOT. 666fe6060f1SDimitry Andric bool MustMove = false, HasUser = false; 667fe6060f1SDimitry Andric MachineOperand *VPNOTOperandKiller = nullptr; 668fe6060f1SDimitry Andric for (; Iter != MBB.end(); ++Iter) { 669fe6060f1SDimitry Andric if (MachineOperand *MO = 670*0fca6ea1SDimitry Andric Iter->findRegisterUseOperand(VPNOTOperand, /*TRI=*/nullptr, 671*0fca6ea1SDimitry Andric /*isKill*/ true)) { 672fe6060f1SDimitry Andric // If we find the operand that kills the VPNOTOperand's result, save it. 673fe6060f1SDimitry Andric VPNOTOperandKiller = MO; 674fe6060f1SDimitry Andric } 675fe6060f1SDimitry Andric 676*0fca6ea1SDimitry Andric if (Iter->findRegisterUseOperandIdx(Reg, /*TRI=*/nullptr) != -1) { 677fe6060f1SDimitry Andric MustMove = true; 678fe6060f1SDimitry Andric continue; 679fe6060f1SDimitry Andric } 680fe6060f1SDimitry Andric 681*0fca6ea1SDimitry Andric if (Iter->findRegisterUseOperandIdx(VPNOTResult, /*TRI=*/nullptr) == -1) 682fe6060f1SDimitry Andric continue; 683fe6060f1SDimitry Andric 684fe6060f1SDimitry Andric HasUser = true; 685fe6060f1SDimitry Andric if (!MustMove) 686fe6060f1SDimitry Andric break; 687fe6060f1SDimitry Andric 688fe6060f1SDimitry Andric // Move the VPNOT right before Iter 689fe6060f1SDimitry Andric LLVM_DEBUG(dbgs() << "Moving: "; VPNOT.dump(); dbgs() << " Before: "; 690fe6060f1SDimitry Andric Iter->dump()); 691fe6060f1SDimitry Andric MBB.splice(Iter, &MBB, VPNOT.getIterator()); 692fe6060f1SDimitry Andric // If we move the instr, and its operand was killed earlier, remove the kill 693fe6060f1SDimitry Andric // flag. 694fe6060f1SDimitry Andric if (VPNOTOperandKiller) 695fe6060f1SDimitry Andric VPNOTOperandKiller->setIsKill(false); 696fe6060f1SDimitry Andric 697fe6060f1SDimitry Andric break; 698fe6060f1SDimitry Andric } 699fe6060f1SDimitry Andric return HasUser; 700fe6060f1SDimitry Andric } 701fe6060f1SDimitry Andric 702fe6060f1SDimitry Andric // This optimisation attempts to reduce the number of overlapping lifetimes of 703fe6060f1SDimitry Andric // VCCR values by replacing uses of old VCCR values with VPNOTs. For example, 704fe6060f1SDimitry Andric // this replaces 705fe6060f1SDimitry Andric // %A:vccr = (something) 706fe6060f1SDimitry Andric // %B:vccr = VPNOT %A 707fe6060f1SDimitry Andric // %Foo = (some op that uses %B) 708fe6060f1SDimitry Andric // %Bar = (some op that uses %A) 709fe6060f1SDimitry Andric // With 710fe6060f1SDimitry Andric // %A:vccr = (something) 711fe6060f1SDimitry Andric // %B:vccr = VPNOT %A 712fe6060f1SDimitry Andric // %Foo = (some op that uses %B) 713fe6060f1SDimitry Andric // %TMP2:vccr = VPNOT %B 714fe6060f1SDimitry Andric // %Bar = (some op that uses %A) 715fe6060f1SDimitry Andric bool MVETPAndVPTOptimisations::ReduceOldVCCRValueUses(MachineBasicBlock &MBB) { 716fe6060f1SDimitry Andric MachineBasicBlock::iterator Iter = MBB.begin(), End = MBB.end(); 717fe6060f1SDimitry Andric SmallVector<MachineInstr *, 4> DeadInstructions; 718fe6060f1SDimitry Andric bool Modified = false; 719fe6060f1SDimitry Andric 720fe6060f1SDimitry Andric while (Iter != End) { 721fe6060f1SDimitry Andric Register VCCRValue, OppositeVCCRValue; 722fe6060f1SDimitry Andric // The first loop looks for 2 unpredicated instructions: 723fe6060f1SDimitry Andric // %A:vccr = (instr) ; A is stored in VCCRValue 724fe6060f1SDimitry Andric // %B:vccr = VPNOT %A ; B is stored in OppositeVCCRValue 725fe6060f1SDimitry Andric for (; Iter != End; ++Iter) { 726fe6060f1SDimitry Andric // We're only interested in unpredicated instructions that write to VCCR. 727fe6060f1SDimitry Andric if (!IsWritingToVCCR(*Iter) || 728fe6060f1SDimitry Andric getVPTInstrPredicate(*Iter) != ARMVCC::None) 729fe6060f1SDimitry Andric continue; 730fe6060f1SDimitry Andric Register Dst = Iter->getOperand(0).getReg(); 731fe6060f1SDimitry Andric 732fe6060f1SDimitry Andric // If we already have a VCCRValue, and this is a VPNOT on VCCRValue, we've 733fe6060f1SDimitry Andric // found what we were looking for. 734fe6060f1SDimitry Andric if (VCCRValue && Iter->getOpcode() == ARM::MVE_VPNOT && 735*0fca6ea1SDimitry Andric Iter->findRegisterUseOperandIdx(VCCRValue, /*TRI=*/nullptr) != -1) { 736fe6060f1SDimitry Andric // Move the VPNOT closer to its first user if needed, and ignore if it 737fe6060f1SDimitry Andric // has no users. 738fe6060f1SDimitry Andric if (!MoveVPNOTBeforeFirstUser(MBB, Iter, VCCRValue)) 739fe6060f1SDimitry Andric continue; 740fe6060f1SDimitry Andric 741fe6060f1SDimitry Andric OppositeVCCRValue = Dst; 742fe6060f1SDimitry Andric ++Iter; 743fe6060f1SDimitry Andric break; 744fe6060f1SDimitry Andric } 745fe6060f1SDimitry Andric 746fe6060f1SDimitry Andric // Else, just set VCCRValue. 747fe6060f1SDimitry Andric VCCRValue = Dst; 748fe6060f1SDimitry Andric } 749fe6060f1SDimitry Andric 750fe6060f1SDimitry Andric // If the first inner loop didn't find anything, stop here. 751fe6060f1SDimitry Andric if (Iter == End) 752fe6060f1SDimitry Andric break; 753fe6060f1SDimitry Andric 754fe6060f1SDimitry Andric assert(VCCRValue && OppositeVCCRValue && 755fe6060f1SDimitry Andric "VCCRValue and OppositeVCCRValue shouldn't be empty if the loop " 756fe6060f1SDimitry Andric "stopped before the end of the block!"); 757fe6060f1SDimitry Andric assert(VCCRValue != OppositeVCCRValue && 758fe6060f1SDimitry Andric "VCCRValue should not be equal to OppositeVCCRValue!"); 759fe6060f1SDimitry Andric 760fe6060f1SDimitry Andric // LastVPNOTResult always contains the same value as OppositeVCCRValue. 761fe6060f1SDimitry Andric Register LastVPNOTResult = OppositeVCCRValue; 762fe6060f1SDimitry Andric 763fe6060f1SDimitry Andric // This second loop tries to optimize the remaining instructions. 764fe6060f1SDimitry Andric for (; Iter != End; ++Iter) { 765fe6060f1SDimitry Andric bool IsInteresting = false; 766fe6060f1SDimitry Andric 767*0fca6ea1SDimitry Andric if (MachineOperand *MO = 768*0fca6ea1SDimitry Andric Iter->findRegisterUseOperand(VCCRValue, /*TRI=*/nullptr)) { 769fe6060f1SDimitry Andric IsInteresting = true; 770fe6060f1SDimitry Andric 771fe6060f1SDimitry Andric // - If the instruction is a VPNOT, it can be removed, and we can just 772fe6060f1SDimitry Andric // replace its uses with LastVPNOTResult. 773fe6060f1SDimitry Andric // - Else, insert a new VPNOT on LastVPNOTResult to recompute VCCRValue. 774fe6060f1SDimitry Andric if (Iter->getOpcode() == ARM::MVE_VPNOT) { 775fe6060f1SDimitry Andric Register Result = Iter->getOperand(0).getReg(); 776fe6060f1SDimitry Andric 777fe6060f1SDimitry Andric MRI->replaceRegWith(Result, LastVPNOTResult); 778fe6060f1SDimitry Andric DeadInstructions.push_back(&*Iter); 779fe6060f1SDimitry Andric Modified = true; 780fe6060f1SDimitry Andric 781fe6060f1SDimitry Andric LLVM_DEBUG(dbgs() 782fe6060f1SDimitry Andric << "Replacing all uses of '" << printReg(Result) 783fe6060f1SDimitry Andric << "' with '" << printReg(LastVPNOTResult) << "'\n"); 784fe6060f1SDimitry Andric } else { 785fe6060f1SDimitry Andric MachineInstr &VPNOT = 786fe6060f1SDimitry Andric ReplaceRegisterUseWithVPNOT(MBB, *Iter, *MO, LastVPNOTResult); 787fe6060f1SDimitry Andric Modified = true; 788fe6060f1SDimitry Andric 789fe6060f1SDimitry Andric LastVPNOTResult = VPNOT.getOperand(0).getReg(); 790fe6060f1SDimitry Andric std::swap(VCCRValue, OppositeVCCRValue); 791fe6060f1SDimitry Andric 792fe6060f1SDimitry Andric LLVM_DEBUG(dbgs() << "Replacing use of '" << printReg(VCCRValue) 793fe6060f1SDimitry Andric << "' with '" << printReg(LastVPNOTResult) 794fe6060f1SDimitry Andric << "' in instr: " << *Iter); 795fe6060f1SDimitry Andric } 796fe6060f1SDimitry Andric } else { 797fe6060f1SDimitry Andric // If the instr uses OppositeVCCRValue, make it use LastVPNOTResult 798fe6060f1SDimitry Andric // instead as they contain the same value. 799*0fca6ea1SDimitry Andric if (MachineOperand *MO = Iter->findRegisterUseOperand( 800*0fca6ea1SDimitry Andric OppositeVCCRValue, /*TRI=*/nullptr)) { 801fe6060f1SDimitry Andric IsInteresting = true; 802fe6060f1SDimitry Andric 803fe6060f1SDimitry Andric // This is pointless if LastVPNOTResult == OppositeVCCRValue. 804fe6060f1SDimitry Andric if (LastVPNOTResult != OppositeVCCRValue) { 805fe6060f1SDimitry Andric LLVM_DEBUG(dbgs() << "Replacing usage of '" 806fe6060f1SDimitry Andric << printReg(OppositeVCCRValue) << "' with '" 807fe6060f1SDimitry Andric << printReg(LastVPNOTResult) << " for instr: "; 808fe6060f1SDimitry Andric Iter->dump()); 809fe6060f1SDimitry Andric MO->setReg(LastVPNOTResult); 810fe6060f1SDimitry Andric Modified = true; 811fe6060f1SDimitry Andric } 812fe6060f1SDimitry Andric 813fe6060f1SDimitry Andric MO->setIsKill(false); 814fe6060f1SDimitry Andric } 815fe6060f1SDimitry Andric 816fe6060f1SDimitry Andric // If this is an unpredicated VPNOT on 817fe6060f1SDimitry Andric // LastVPNOTResult/OppositeVCCRValue, we can act like we inserted it. 818fe6060f1SDimitry Andric if (Iter->getOpcode() == ARM::MVE_VPNOT && 819fe6060f1SDimitry Andric getVPTInstrPredicate(*Iter) == ARMVCC::None) { 820fe6060f1SDimitry Andric Register VPNOTOperand = Iter->getOperand(1).getReg(); 821fe6060f1SDimitry Andric if (VPNOTOperand == LastVPNOTResult || 822fe6060f1SDimitry Andric VPNOTOperand == OppositeVCCRValue) { 823fe6060f1SDimitry Andric IsInteresting = true; 824fe6060f1SDimitry Andric 825fe6060f1SDimitry Andric std::swap(VCCRValue, OppositeVCCRValue); 826fe6060f1SDimitry Andric LastVPNOTResult = Iter->getOperand(0).getReg(); 827fe6060f1SDimitry Andric } 828fe6060f1SDimitry Andric } 829fe6060f1SDimitry Andric } 830fe6060f1SDimitry Andric 831fe6060f1SDimitry Andric // If this instruction was not interesting, and it writes to VCCR, stop. 832fe6060f1SDimitry Andric if (!IsInteresting && IsWritingToVCCR(*Iter)) 833fe6060f1SDimitry Andric break; 834fe6060f1SDimitry Andric } 835fe6060f1SDimitry Andric } 836fe6060f1SDimitry Andric 837fe6060f1SDimitry Andric for (MachineInstr *DeadInstruction : DeadInstructions) 838fe6060f1SDimitry Andric DeadInstruction->eraseFromParent(); 839fe6060f1SDimitry Andric 840fe6060f1SDimitry Andric return Modified; 841fe6060f1SDimitry Andric } 842fe6060f1SDimitry Andric 843fe6060f1SDimitry Andric // This optimisation replaces VCMPs with VPNOTs when they are equivalent. 844fe6060f1SDimitry Andric bool MVETPAndVPTOptimisations::ReplaceVCMPsByVPNOTs(MachineBasicBlock &MBB) { 845fe6060f1SDimitry Andric SmallVector<MachineInstr *, 4> DeadInstructions; 846fe6060f1SDimitry Andric 847fe6060f1SDimitry Andric // The last VCMP that we have seen and that couldn't be replaced. 848fe6060f1SDimitry Andric // This is reset when an instruction that writes to VCCR/VPR is found, or when 849fe6060f1SDimitry Andric // a VCMP is replaced with a VPNOT. 850fe6060f1SDimitry Andric // We'll only replace VCMPs with VPNOTs when this is not null, and when the 851fe6060f1SDimitry Andric // current VCMP is the opposite of PrevVCMP. 852fe6060f1SDimitry Andric MachineInstr *PrevVCMP = nullptr; 853fe6060f1SDimitry Andric // If we find an instruction that kills the result of PrevVCMP, we save the 854fe6060f1SDimitry Andric // operand here to remove the kill flag in case we need to use PrevVCMP's 855fe6060f1SDimitry Andric // result. 856fe6060f1SDimitry Andric MachineOperand *PrevVCMPResultKiller = nullptr; 857fe6060f1SDimitry Andric 858fe6060f1SDimitry Andric for (MachineInstr &Instr : MBB.instrs()) { 859fe6060f1SDimitry Andric if (PrevVCMP) { 860*0fca6ea1SDimitry Andric if (MachineOperand *MO = 861*0fca6ea1SDimitry Andric Instr.findRegisterUseOperand(PrevVCMP->getOperand(0).getReg(), 862*0fca6ea1SDimitry Andric /*TRI=*/nullptr, /*isKill*/ true)) { 863fe6060f1SDimitry Andric // If we come accross the instr that kills PrevVCMP's result, record it 864fe6060f1SDimitry Andric // so we can remove the kill flag later if we need to. 865fe6060f1SDimitry Andric PrevVCMPResultKiller = MO; 866fe6060f1SDimitry Andric } 867fe6060f1SDimitry Andric } 868fe6060f1SDimitry Andric 869fe6060f1SDimitry Andric // Ignore predicated instructions. 870fe6060f1SDimitry Andric if (getVPTInstrPredicate(Instr) != ARMVCC::None) 871fe6060f1SDimitry Andric continue; 872fe6060f1SDimitry Andric 873fe6060f1SDimitry Andric // Only look at VCMPs 874fe6060f1SDimitry Andric if (!IsVCMP(Instr.getOpcode())) { 875fe6060f1SDimitry Andric // If the instruction writes to VCCR, forget the previous VCMP. 876fe6060f1SDimitry Andric if (IsWritingToVCCR(Instr)) 877fe6060f1SDimitry Andric PrevVCMP = nullptr; 878fe6060f1SDimitry Andric continue; 879fe6060f1SDimitry Andric } 880fe6060f1SDimitry Andric 881fe6060f1SDimitry Andric if (!PrevVCMP || !IsVPNOTEquivalent(Instr, *PrevVCMP)) { 882fe6060f1SDimitry Andric PrevVCMP = &Instr; 883fe6060f1SDimitry Andric continue; 884fe6060f1SDimitry Andric } 885fe6060f1SDimitry Andric 886fe6060f1SDimitry Andric // The register containing the result of the VCMP that we're going to 887fe6060f1SDimitry Andric // replace. 888fe6060f1SDimitry Andric Register PrevVCMPResultReg = PrevVCMP->getOperand(0).getReg(); 889fe6060f1SDimitry Andric 890fe6060f1SDimitry Andric // Build a VPNOT to replace the VCMP, reusing its operands. 891fe6060f1SDimitry Andric MachineInstrBuilder MIBuilder = 892fe6060f1SDimitry Andric BuildMI(MBB, &Instr, Instr.getDebugLoc(), TII->get(ARM::MVE_VPNOT)) 893fe6060f1SDimitry Andric .add(Instr.getOperand(0)) 894fe6060f1SDimitry Andric .addReg(PrevVCMPResultReg); 895fe6060f1SDimitry Andric addUnpredicatedMveVpredNOp(MIBuilder); 896fe6060f1SDimitry Andric LLVM_DEBUG(dbgs() << "Inserting VPNOT (to replace VCMP): "; 897fe6060f1SDimitry Andric MIBuilder.getInstr()->dump(); dbgs() << " Removed VCMP: "; 898fe6060f1SDimitry Andric Instr.dump()); 899fe6060f1SDimitry Andric 900fe6060f1SDimitry Andric // If we found an instruction that uses, and kills PrevVCMP's result, 901fe6060f1SDimitry Andric // remove the kill flag. 902fe6060f1SDimitry Andric if (PrevVCMPResultKiller) 903fe6060f1SDimitry Andric PrevVCMPResultKiller->setIsKill(false); 904fe6060f1SDimitry Andric 905fe6060f1SDimitry Andric // Finally, mark the old VCMP for removal and reset 906fe6060f1SDimitry Andric // PrevVCMP/PrevVCMPResultKiller. 907fe6060f1SDimitry Andric DeadInstructions.push_back(&Instr); 908fe6060f1SDimitry Andric PrevVCMP = nullptr; 909fe6060f1SDimitry Andric PrevVCMPResultKiller = nullptr; 910fe6060f1SDimitry Andric } 911fe6060f1SDimitry Andric 912fe6060f1SDimitry Andric for (MachineInstr *DeadInstruction : DeadInstructions) 913fe6060f1SDimitry Andric DeadInstruction->eraseFromParent(); 914fe6060f1SDimitry Andric 915fe6060f1SDimitry Andric return !DeadInstructions.empty(); 916fe6060f1SDimitry Andric } 917fe6060f1SDimitry Andric 918fe6060f1SDimitry Andric bool MVETPAndVPTOptimisations::ReplaceConstByVPNOTs(MachineBasicBlock &MBB, 919fe6060f1SDimitry Andric MachineDominatorTree *DT) { 920fe6060f1SDimitry Andric // Scan through the block, looking for instructions that use constants moves 921fe6060f1SDimitry Andric // into VPR that are the negative of one another. These are expected to be 922fe6060f1SDimitry Andric // COPY's to VCCRRegClass, from a t2MOVi or t2MOVi16. The last seen constant 923fe6060f1SDimitry Andric // mask is kept it or and VPNOT's of it are added or reused as we scan through 924fe6060f1SDimitry Andric // the function. 925fe6060f1SDimitry Andric unsigned LastVPTImm = 0; 926fe6060f1SDimitry Andric Register LastVPTReg = 0; 927fe6060f1SDimitry Andric SmallSet<MachineInstr *, 4> DeadInstructions; 928fe6060f1SDimitry Andric 929fe6060f1SDimitry Andric for (MachineInstr &Instr : MBB.instrs()) { 930fe6060f1SDimitry Andric // Look for predicated MVE instructions. 931fe6060f1SDimitry Andric int PIdx = llvm::findFirstVPTPredOperandIdx(Instr); 932fe6060f1SDimitry Andric if (PIdx == -1) 933fe6060f1SDimitry Andric continue; 934fe6060f1SDimitry Andric Register VPR = Instr.getOperand(PIdx + 1).getReg(); 935fe6060f1SDimitry Andric if (!VPR.isVirtual()) 936fe6060f1SDimitry Andric continue; 937fe6060f1SDimitry Andric 938fe6060f1SDimitry Andric // From that we are looking for an instruction like %11:vccr = COPY %9:rgpr. 939fe6060f1SDimitry Andric MachineInstr *Copy = MRI->getVRegDef(VPR); 940fe6060f1SDimitry Andric if (!Copy || Copy->getOpcode() != TargetOpcode::COPY || 941fe6060f1SDimitry Andric !Copy->getOperand(1).getReg().isVirtual() || 942fe6060f1SDimitry Andric MRI->getRegClass(Copy->getOperand(1).getReg()) == &ARM::VCCRRegClass) { 943fe6060f1SDimitry Andric LastVPTReg = 0; 944fe6060f1SDimitry Andric continue; 945fe6060f1SDimitry Andric } 946fe6060f1SDimitry Andric Register GPR = Copy->getOperand(1).getReg(); 947fe6060f1SDimitry Andric 948fe6060f1SDimitry Andric // Find the Immediate used by the copy. 949fe6060f1SDimitry Andric auto getImm = [&](Register GPR) -> unsigned { 950fe6060f1SDimitry Andric MachineInstr *Def = MRI->getVRegDef(GPR); 951fe6060f1SDimitry Andric if (Def && (Def->getOpcode() == ARM::t2MOVi || 952fe6060f1SDimitry Andric Def->getOpcode() == ARM::t2MOVi16)) 953fe6060f1SDimitry Andric return Def->getOperand(1).getImm(); 954fe6060f1SDimitry Andric return -1U; 955fe6060f1SDimitry Andric }; 956fe6060f1SDimitry Andric unsigned Imm = getImm(GPR); 957fe6060f1SDimitry Andric if (Imm == -1U) { 958fe6060f1SDimitry Andric LastVPTReg = 0; 959fe6060f1SDimitry Andric continue; 960fe6060f1SDimitry Andric } 961fe6060f1SDimitry Andric 962fe6060f1SDimitry Andric unsigned NotImm = ~Imm & 0xffff; 963fe6060f1SDimitry Andric if (LastVPTReg != 0 && LastVPTReg != VPR && LastVPTImm == Imm) { 964*0fca6ea1SDimitry Andric MRI->clearKillFlags(LastVPTReg); 965fe6060f1SDimitry Andric Instr.getOperand(PIdx + 1).setReg(LastVPTReg); 966fe6060f1SDimitry Andric if (MRI->use_empty(VPR)) { 967fe6060f1SDimitry Andric DeadInstructions.insert(Copy); 968fe6060f1SDimitry Andric if (MRI->hasOneUse(GPR)) 969fe6060f1SDimitry Andric DeadInstructions.insert(MRI->getVRegDef(GPR)); 970fe6060f1SDimitry Andric } 971fe6060f1SDimitry Andric LLVM_DEBUG(dbgs() << "Reusing predicate: in " << Instr); 97206c3fb27SDimitry Andric VPR = LastVPTReg; 973fe6060f1SDimitry Andric } else if (LastVPTReg != 0 && LastVPTImm == NotImm) { 974fe6060f1SDimitry Andric // We have found the not of a previous constant. Create a VPNot of the 975fe6060f1SDimitry Andric // earlier predicate reg and use it instead of the copy. 976fe6060f1SDimitry Andric Register NewVPR = MRI->createVirtualRegister(&ARM::VCCRRegClass); 977fe6060f1SDimitry Andric auto VPNot = BuildMI(MBB, &Instr, Instr.getDebugLoc(), 978fe6060f1SDimitry Andric TII->get(ARM::MVE_VPNOT), NewVPR) 979fe6060f1SDimitry Andric .addReg(LastVPTReg); 980fe6060f1SDimitry Andric addUnpredicatedMveVpredNOp(VPNot); 981fe6060f1SDimitry Andric 982fe6060f1SDimitry Andric // Use the new register and check if the def is now dead. 983fe6060f1SDimitry Andric Instr.getOperand(PIdx + 1).setReg(NewVPR); 984fe6060f1SDimitry Andric if (MRI->use_empty(VPR)) { 985fe6060f1SDimitry Andric DeadInstructions.insert(Copy); 986fe6060f1SDimitry Andric if (MRI->hasOneUse(GPR)) 987fe6060f1SDimitry Andric DeadInstructions.insert(MRI->getVRegDef(GPR)); 988fe6060f1SDimitry Andric } 989fe6060f1SDimitry Andric LLVM_DEBUG(dbgs() << "Adding VPNot: " << *VPNot << " to replace use at " 990fe6060f1SDimitry Andric << Instr); 991fe6060f1SDimitry Andric VPR = NewVPR; 992fe6060f1SDimitry Andric } 993fe6060f1SDimitry Andric 994fe6060f1SDimitry Andric LastVPTImm = Imm; 995fe6060f1SDimitry Andric LastVPTReg = VPR; 996fe6060f1SDimitry Andric } 997fe6060f1SDimitry Andric 998fe6060f1SDimitry Andric for (MachineInstr *DI : DeadInstructions) 999fe6060f1SDimitry Andric DI->eraseFromParent(); 1000fe6060f1SDimitry Andric 1001fe6060f1SDimitry Andric return !DeadInstructions.empty(); 1002fe6060f1SDimitry Andric } 1003fe6060f1SDimitry Andric 1004fe6060f1SDimitry Andric // Replace VPSEL with a predicated VMOV in blocks with a VCTP. This is a 1005fe6060f1SDimitry Andric // somewhat blunt approximation to allow tail predicated with vpsel 1006fe6060f1SDimitry Andric // instructions. We turn a vselect into a VPSEL in ISEL, but they have slightly 1007fe6060f1SDimitry Andric // different semantics under tail predication. Until that is modelled we just 1008fe6060f1SDimitry Andric // convert to a VMOVT (via a predicated VORR) instead. 1009fe6060f1SDimitry Andric bool MVETPAndVPTOptimisations::ConvertVPSEL(MachineBasicBlock &MBB) { 1010fe6060f1SDimitry Andric bool HasVCTP = false; 1011fe6060f1SDimitry Andric SmallVector<MachineInstr *, 4> DeadInstructions; 1012fe6060f1SDimitry Andric 1013fe6060f1SDimitry Andric for (MachineInstr &MI : MBB.instrs()) { 1014fe6060f1SDimitry Andric if (isVCTP(&MI)) { 1015fe6060f1SDimitry Andric HasVCTP = true; 1016fe6060f1SDimitry Andric continue; 1017fe6060f1SDimitry Andric } 1018fe6060f1SDimitry Andric 1019fe6060f1SDimitry Andric if (!HasVCTP || MI.getOpcode() != ARM::MVE_VPSEL) 1020fe6060f1SDimitry Andric continue; 1021fe6060f1SDimitry Andric 1022fe6060f1SDimitry Andric MachineInstrBuilder MIBuilder = 1023fe6060f1SDimitry Andric BuildMI(MBB, &MI, MI.getDebugLoc(), TII->get(ARM::MVE_VORR)) 1024fe6060f1SDimitry Andric .add(MI.getOperand(0)) 1025fe6060f1SDimitry Andric .add(MI.getOperand(1)) 1026fe6060f1SDimitry Andric .add(MI.getOperand(1)) 1027fe6060f1SDimitry Andric .addImm(ARMVCC::Then) 1028fe6060f1SDimitry Andric .add(MI.getOperand(4)) 1029349cc55cSDimitry Andric .add(MI.getOperand(5)) 1030fe6060f1SDimitry Andric .add(MI.getOperand(2)); 1031fe6060f1SDimitry Andric // Silence unused variable warning in release builds. 1032fe6060f1SDimitry Andric (void)MIBuilder; 1033fe6060f1SDimitry Andric LLVM_DEBUG(dbgs() << "Replacing VPSEL: "; MI.dump(); 1034fe6060f1SDimitry Andric dbgs() << " with VMOVT: "; MIBuilder.getInstr()->dump()); 1035fe6060f1SDimitry Andric DeadInstructions.push_back(&MI); 1036fe6060f1SDimitry Andric } 1037fe6060f1SDimitry Andric 1038fe6060f1SDimitry Andric for (MachineInstr *DeadInstruction : DeadInstructions) 1039fe6060f1SDimitry Andric DeadInstruction->eraseFromParent(); 1040fe6060f1SDimitry Andric 1041fe6060f1SDimitry Andric return !DeadInstructions.empty(); 1042fe6060f1SDimitry Andric } 1043fe6060f1SDimitry Andric 1044fe6060f1SDimitry Andric // Add a registry allocation hint for t2DoLoopStart to hint it towards LR, as 1045fe6060f1SDimitry Andric // the instruction may be removable as a noop. 1046fe6060f1SDimitry Andric bool MVETPAndVPTOptimisations::HintDoLoopStartReg(MachineBasicBlock &MBB) { 1047fe6060f1SDimitry Andric bool Changed = false; 1048fe6060f1SDimitry Andric for (MachineInstr &MI : MBB.instrs()) { 1049fe6060f1SDimitry Andric if (MI.getOpcode() != ARM::t2DoLoopStart) 1050fe6060f1SDimitry Andric continue; 1051fe6060f1SDimitry Andric Register R = MI.getOperand(1).getReg(); 1052fe6060f1SDimitry Andric MachineFunction *MF = MI.getParent()->getParent(); 1053fe6060f1SDimitry Andric MF->getRegInfo().setRegAllocationHint(R, ARMRI::RegLR, 0); 1054fe6060f1SDimitry Andric Changed = true; 1055fe6060f1SDimitry Andric } 1056fe6060f1SDimitry Andric return Changed; 1057fe6060f1SDimitry Andric } 1058fe6060f1SDimitry Andric 1059fe6060f1SDimitry Andric bool MVETPAndVPTOptimisations::runOnMachineFunction(MachineFunction &Fn) { 106081ad6265SDimitry Andric const ARMSubtarget &STI = Fn.getSubtarget<ARMSubtarget>(); 1061fe6060f1SDimitry Andric 1062fe6060f1SDimitry Andric if (!STI.isThumb2() || !STI.hasLOB()) 1063fe6060f1SDimitry Andric return false; 1064fe6060f1SDimitry Andric 1065fe6060f1SDimitry Andric TII = static_cast<const Thumb2InstrInfo *>(STI.getInstrInfo()); 1066fe6060f1SDimitry Andric MRI = &Fn.getRegInfo(); 1067*0fca6ea1SDimitry Andric MachineLoopInfo *MLI = &getAnalysis<MachineLoopInfoWrapperPass>().getLI(); 1068*0fca6ea1SDimitry Andric MachineDominatorTree *DT = 1069*0fca6ea1SDimitry Andric &getAnalysis<MachineDominatorTreeWrapperPass>().getDomTree(); 1070fe6060f1SDimitry Andric 1071fe6060f1SDimitry Andric LLVM_DEBUG(dbgs() << "********** ARM MVE VPT Optimisations **********\n" 1072fe6060f1SDimitry Andric << "********** Function: " << Fn.getName() << '\n'); 1073fe6060f1SDimitry Andric 1074fe6060f1SDimitry Andric bool Modified = false; 1075*0fca6ea1SDimitry Andric for (MachineLoop *ML : MLI->getLoopsInPreorder()) { 1076fe6060f1SDimitry Andric Modified |= LowerWhileLoopStart(ML); 1077fe6060f1SDimitry Andric Modified |= MergeLoopEnd(ML); 1078fe6060f1SDimitry Andric Modified |= ConvertTailPredLoop(ML, DT); 1079fe6060f1SDimitry Andric } 1080fe6060f1SDimitry Andric 1081fe6060f1SDimitry Andric for (MachineBasicBlock &MBB : Fn) { 1082fe6060f1SDimitry Andric Modified |= HintDoLoopStartReg(MBB); 1083fe6060f1SDimitry Andric Modified |= ReplaceConstByVPNOTs(MBB, DT); 1084fe6060f1SDimitry Andric Modified |= ReplaceVCMPsByVPNOTs(MBB); 1085fe6060f1SDimitry Andric Modified |= ReduceOldVCCRValueUses(MBB); 1086fe6060f1SDimitry Andric Modified |= ConvertVPSEL(MBB); 1087fe6060f1SDimitry Andric } 1088fe6060f1SDimitry Andric 1089fe6060f1SDimitry Andric LLVM_DEBUG(dbgs() << "**************************************\n"); 1090fe6060f1SDimitry Andric return Modified; 1091fe6060f1SDimitry Andric } 1092fe6060f1SDimitry Andric 1093fe6060f1SDimitry Andric /// createMVETPAndVPTOptimisationsPass 1094fe6060f1SDimitry Andric FunctionPass *llvm::createMVETPAndVPTOptimisationsPass() { 1095fe6060f1SDimitry Andric return new MVETPAndVPTOptimisations(); 1096fe6060f1SDimitry Andric } 1097