18bcb0991SDimitry Andric //===-- MVEVPTBlockPass.cpp - Insert MVE VPT blocks -----------------------===// 28bcb0991SDimitry Andric // 38bcb0991SDimitry Andric // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 48bcb0991SDimitry Andric // See https://llvm.org/LICENSE.txt for license information. 58bcb0991SDimitry Andric // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 68bcb0991SDimitry Andric // 78bcb0991SDimitry Andric //===----------------------------------------------------------------------===// 88bcb0991SDimitry Andric 98bcb0991SDimitry Andric #include "ARM.h" 108bcb0991SDimitry Andric #include "ARMMachineFunctionInfo.h" 118bcb0991SDimitry Andric #include "ARMSubtarget.h" 128bcb0991SDimitry Andric #include "MCTargetDesc/ARMBaseInfo.h" 138bcb0991SDimitry Andric #include "Thumb2InstrInfo.h" 148bcb0991SDimitry Andric #include "llvm/ADT/SmallSet.h" 158bcb0991SDimitry Andric #include "llvm/ADT/SmallVector.h" 168bcb0991SDimitry Andric #include "llvm/ADT/Statistic.h" 178bcb0991SDimitry Andric #include "llvm/ADT/StringRef.h" 188bcb0991SDimitry Andric #include "llvm/CodeGen/MachineBasicBlock.h" 198bcb0991SDimitry Andric #include "llvm/CodeGen/MachineFunction.h" 208bcb0991SDimitry Andric #include "llvm/CodeGen/MachineFunctionPass.h" 218bcb0991SDimitry Andric #include "llvm/CodeGen/MachineInstr.h" 228bcb0991SDimitry Andric #include "llvm/CodeGen/MachineInstrBuilder.h" 238bcb0991SDimitry Andric #include "llvm/CodeGen/MachineInstrBundle.h" 248bcb0991SDimitry Andric #include "llvm/CodeGen/MachineOperand.h" 258bcb0991SDimitry Andric #include "llvm/IR/DebugLoc.h" 268bcb0991SDimitry Andric #include "llvm/MC/MCInstrDesc.h" 27*5ffd83dbSDimitry Andric #include "llvm/MC/MCRegisterInfo.h" 288bcb0991SDimitry Andric #include "llvm/Support/Debug.h" 298bcb0991SDimitry Andric #include <cassert> 308bcb0991SDimitry Andric #include <new> 318bcb0991SDimitry Andric 328bcb0991SDimitry Andric using namespace llvm; 338bcb0991SDimitry Andric 348bcb0991SDimitry Andric #define DEBUG_TYPE "arm-mve-vpt" 358bcb0991SDimitry Andric 368bcb0991SDimitry Andric namespace { 378bcb0991SDimitry Andric class MVEVPTBlock : public MachineFunctionPass { 388bcb0991SDimitry Andric public: 398bcb0991SDimitry Andric static char ID; 40*5ffd83dbSDimitry Andric const Thumb2InstrInfo *TII; 41*5ffd83dbSDimitry Andric const TargetRegisterInfo *TRI; 428bcb0991SDimitry Andric 438bcb0991SDimitry Andric MVEVPTBlock() : MachineFunctionPass(ID) {} 448bcb0991SDimitry Andric 458bcb0991SDimitry Andric bool runOnMachineFunction(MachineFunction &Fn) override; 468bcb0991SDimitry Andric 478bcb0991SDimitry Andric MachineFunctionProperties getRequiredProperties() const override { 488bcb0991SDimitry Andric return MachineFunctionProperties().set( 49*5ffd83dbSDimitry Andric MachineFunctionProperties::Property::NoVRegs); 508bcb0991SDimitry Andric } 518bcb0991SDimitry Andric 528bcb0991SDimitry Andric StringRef getPassName() const override { 538bcb0991SDimitry Andric return "MVE VPT block insertion pass"; 548bcb0991SDimitry Andric } 558bcb0991SDimitry Andric 568bcb0991SDimitry Andric private: 578bcb0991SDimitry Andric bool InsertVPTBlocks(MachineBasicBlock &MBB); 588bcb0991SDimitry Andric }; 598bcb0991SDimitry Andric 608bcb0991SDimitry Andric char MVEVPTBlock::ID = 0; 618bcb0991SDimitry Andric 628bcb0991SDimitry Andric } // end anonymous namespace 638bcb0991SDimitry Andric 648bcb0991SDimitry Andric INITIALIZE_PASS(MVEVPTBlock, DEBUG_TYPE, "ARM MVE VPT block pass", false, false) 658bcb0991SDimitry Andric 66*5ffd83dbSDimitry Andric static MachineInstr *findVCMPToFoldIntoVPST(MachineBasicBlock::iterator MI, 67*5ffd83dbSDimitry Andric const TargetRegisterInfo *TRI, 688bcb0991SDimitry Andric unsigned &NewOpcode) { 69*5ffd83dbSDimitry Andric // Search backwards to the instruction that defines VPR. This may or not 70*5ffd83dbSDimitry Andric // be a VCMP, we check that after this loop. If we find another instruction 71*5ffd83dbSDimitry Andric // that reads cpsr, we return nullptr. 72*5ffd83dbSDimitry Andric MachineBasicBlock::iterator CmpMI = MI; 73*5ffd83dbSDimitry Andric while (CmpMI != MI->getParent()->begin()) { 74*5ffd83dbSDimitry Andric --CmpMI; 75*5ffd83dbSDimitry Andric if (CmpMI->modifiesRegister(ARM::VPR, TRI)) 76*5ffd83dbSDimitry Andric break; 77*5ffd83dbSDimitry Andric if (CmpMI->readsRegister(ARM::VPR, TRI)) 78*5ffd83dbSDimitry Andric break; 79*5ffd83dbSDimitry Andric } 80*5ffd83dbSDimitry Andric 81*5ffd83dbSDimitry Andric if (CmpMI == MI) 82*5ffd83dbSDimitry Andric return nullptr; 83*5ffd83dbSDimitry Andric NewOpcode = VCMPOpcodeToVPT(CmpMI->getOpcode()); 84*5ffd83dbSDimitry Andric if (NewOpcode == 0) 858bcb0991SDimitry Andric return nullptr; 868bcb0991SDimitry Andric 87*5ffd83dbSDimitry Andric // Search forward from CmpMI to MI, checking if either register was def'd 88*5ffd83dbSDimitry Andric if (registerDefinedBetween(CmpMI->getOperand(1).getReg(), std::next(CmpMI), 89*5ffd83dbSDimitry Andric MI, TRI)) 908bcb0991SDimitry Andric return nullptr; 91*5ffd83dbSDimitry Andric if (registerDefinedBetween(CmpMI->getOperand(2).getReg(), std::next(CmpMI), 92*5ffd83dbSDimitry Andric MI, TRI)) 938bcb0991SDimitry Andric return nullptr; 94*5ffd83dbSDimitry Andric return &*CmpMI; 95*5ffd83dbSDimitry Andric } 96480093f4SDimitry Andric 97*5ffd83dbSDimitry Andric // Advances Iter past a block of predicated instructions. 98*5ffd83dbSDimitry Andric // Returns true if it successfully skipped the whole block of predicated 99*5ffd83dbSDimitry Andric // instructions. Returns false when it stopped early (due to MaxSteps), or if 100*5ffd83dbSDimitry Andric // Iter didn't point to a predicated instruction. 101*5ffd83dbSDimitry Andric static bool StepOverPredicatedInstrs(MachineBasicBlock::instr_iterator &Iter, 102*5ffd83dbSDimitry Andric MachineBasicBlock::instr_iterator EndIter, 103*5ffd83dbSDimitry Andric unsigned MaxSteps, 104*5ffd83dbSDimitry Andric unsigned &NumInstrsSteppedOver) { 105*5ffd83dbSDimitry Andric ARMVCC::VPTCodes NextPred = ARMVCC::None; 106*5ffd83dbSDimitry Andric Register PredReg; 107*5ffd83dbSDimitry Andric NumInstrsSteppedOver = 0; 108*5ffd83dbSDimitry Andric 109*5ffd83dbSDimitry Andric while (Iter != EndIter) { 110*5ffd83dbSDimitry Andric NextPred = getVPTInstrPredicate(*Iter, PredReg); 111*5ffd83dbSDimitry Andric assert(NextPred != ARMVCC::Else && 112*5ffd83dbSDimitry Andric "VPT block pass does not expect Else preds"); 113*5ffd83dbSDimitry Andric if (NextPred == ARMVCC::None || MaxSteps == 0) 114*5ffd83dbSDimitry Andric break; 115*5ffd83dbSDimitry Andric --MaxSteps; 116*5ffd83dbSDimitry Andric ++Iter; 117*5ffd83dbSDimitry Andric ++NumInstrsSteppedOver; 118*5ffd83dbSDimitry Andric }; 119*5ffd83dbSDimitry Andric 120*5ffd83dbSDimitry Andric return NumInstrsSteppedOver != 0 && 121*5ffd83dbSDimitry Andric (NextPred == ARMVCC::None || Iter == EndIter); 122*5ffd83dbSDimitry Andric } 123*5ffd83dbSDimitry Andric 124*5ffd83dbSDimitry Andric // Returns true if at least one instruction in the range [Iter, End) defines 125*5ffd83dbSDimitry Andric // or kills VPR. 126*5ffd83dbSDimitry Andric static bool IsVPRDefinedOrKilledByBlock(MachineBasicBlock::iterator Iter, 127*5ffd83dbSDimitry Andric MachineBasicBlock::iterator End) { 128*5ffd83dbSDimitry Andric for (; Iter != End; ++Iter) 129*5ffd83dbSDimitry Andric if (Iter->definesRegister(ARM::VPR) || Iter->killsRegister(ARM::VPR)) 130*5ffd83dbSDimitry Andric return true; 131*5ffd83dbSDimitry Andric return false; 132*5ffd83dbSDimitry Andric } 133*5ffd83dbSDimitry Andric 134*5ffd83dbSDimitry Andric // Creates a T, TT, TTT or TTTT BlockMask depending on BlockSize. 135*5ffd83dbSDimitry Andric static ARM::PredBlockMask GetInitialBlockMask(unsigned BlockSize) { 136*5ffd83dbSDimitry Andric switch (BlockSize) { 137*5ffd83dbSDimitry Andric case 1: 138*5ffd83dbSDimitry Andric return ARM::PredBlockMask::T; 139*5ffd83dbSDimitry Andric case 2: 140*5ffd83dbSDimitry Andric return ARM::PredBlockMask::TT; 141*5ffd83dbSDimitry Andric case 3: 142*5ffd83dbSDimitry Andric return ARM::PredBlockMask::TTT; 143*5ffd83dbSDimitry Andric case 4: 144*5ffd83dbSDimitry Andric return ARM::PredBlockMask::TTTT; 145*5ffd83dbSDimitry Andric default: 146*5ffd83dbSDimitry Andric llvm_unreachable("Invalid BlockSize!"); 147*5ffd83dbSDimitry Andric } 148*5ffd83dbSDimitry Andric } 149*5ffd83dbSDimitry Andric 150*5ffd83dbSDimitry Andric // Given an iterator (Iter) that points at an instruction with a "Then" 151*5ffd83dbSDimitry Andric // predicate, tries to create the largest block of continuous predicated 152*5ffd83dbSDimitry Andric // instructions possible, and returns the VPT Block Mask of that block. 153*5ffd83dbSDimitry Andric // 154*5ffd83dbSDimitry Andric // This will try to perform some minor optimization in order to maximize the 155*5ffd83dbSDimitry Andric // size of the block. 156*5ffd83dbSDimitry Andric static ARM::PredBlockMask 157*5ffd83dbSDimitry Andric CreateVPTBlock(MachineBasicBlock::instr_iterator &Iter, 158*5ffd83dbSDimitry Andric MachineBasicBlock::instr_iterator EndIter, 159*5ffd83dbSDimitry Andric SmallVectorImpl<MachineInstr *> &DeadInstructions) { 160*5ffd83dbSDimitry Andric MachineBasicBlock::instr_iterator BlockBeg = Iter; 161*5ffd83dbSDimitry Andric (void)BlockBeg; 162*5ffd83dbSDimitry Andric assert(getVPTInstrPredicate(*Iter) == ARMVCC::Then && 163*5ffd83dbSDimitry Andric "Expected a Predicated Instruction"); 164*5ffd83dbSDimitry Andric 165*5ffd83dbSDimitry Andric LLVM_DEBUG(dbgs() << "VPT block created for: "; Iter->dump()); 166*5ffd83dbSDimitry Andric 167*5ffd83dbSDimitry Andric unsigned BlockSize; 168*5ffd83dbSDimitry Andric StepOverPredicatedInstrs(Iter, EndIter, 4, BlockSize); 169*5ffd83dbSDimitry Andric 170*5ffd83dbSDimitry Andric LLVM_DEBUG(for (MachineBasicBlock::instr_iterator AddedInstIter = 171*5ffd83dbSDimitry Andric std::next(BlockBeg); 172*5ffd83dbSDimitry Andric AddedInstIter != Iter; ++AddedInstIter) { 173*5ffd83dbSDimitry Andric dbgs() << " adding: "; 174*5ffd83dbSDimitry Andric AddedInstIter->dump(); 175*5ffd83dbSDimitry Andric }); 176*5ffd83dbSDimitry Andric 177*5ffd83dbSDimitry Andric // Generate the initial BlockMask 178*5ffd83dbSDimitry Andric ARM::PredBlockMask BlockMask = GetInitialBlockMask(BlockSize); 179*5ffd83dbSDimitry Andric 180*5ffd83dbSDimitry Andric // Remove VPNOTs while there's still room in the block, so we can make the 181*5ffd83dbSDimitry Andric // largest block possible. 182*5ffd83dbSDimitry Andric ARMVCC::VPTCodes CurrentPredicate = ARMVCC::Else; 183*5ffd83dbSDimitry Andric while (BlockSize < 4 && Iter != EndIter && 184*5ffd83dbSDimitry Andric Iter->getOpcode() == ARM::MVE_VPNOT) { 185*5ffd83dbSDimitry Andric 186*5ffd83dbSDimitry Andric // Try to skip all of the predicated instructions after the VPNOT, stopping 187*5ffd83dbSDimitry Andric // after (4 - BlockSize). If we can't skip them all, stop. 188*5ffd83dbSDimitry Andric unsigned ElseInstCnt = 0; 189*5ffd83dbSDimitry Andric MachineBasicBlock::instr_iterator VPNOTBlockEndIter = std::next(Iter); 190*5ffd83dbSDimitry Andric if (!StepOverPredicatedInstrs(VPNOTBlockEndIter, EndIter, (4 - BlockSize), 191*5ffd83dbSDimitry Andric ElseInstCnt)) 192*5ffd83dbSDimitry Andric break; 193*5ffd83dbSDimitry Andric 194*5ffd83dbSDimitry Andric // Check if this VPNOT can be removed or not: It can only be removed if at 195*5ffd83dbSDimitry Andric // least one of the predicated instruction that follows it kills or sets 196*5ffd83dbSDimitry Andric // VPR. 197*5ffd83dbSDimitry Andric if (!IsVPRDefinedOrKilledByBlock(Iter, VPNOTBlockEndIter)) 198*5ffd83dbSDimitry Andric break; 199*5ffd83dbSDimitry Andric 200*5ffd83dbSDimitry Andric LLVM_DEBUG(dbgs() << " removing VPNOT: "; Iter->dump();); 201*5ffd83dbSDimitry Andric 202*5ffd83dbSDimitry Andric // Record the new size of the block 203*5ffd83dbSDimitry Andric BlockSize += ElseInstCnt; 204*5ffd83dbSDimitry Andric assert(BlockSize <= 4 && "Block is too large!"); 205*5ffd83dbSDimitry Andric 206*5ffd83dbSDimitry Andric // Record the VPNot to remove it later. 207*5ffd83dbSDimitry Andric DeadInstructions.push_back(&*Iter); 208*5ffd83dbSDimitry Andric ++Iter; 209*5ffd83dbSDimitry Andric 210*5ffd83dbSDimitry Andric // Replace the predicates of the instructions we're adding. 211*5ffd83dbSDimitry Andric // Note that we are using "Iter" to iterate over the block so we can update 212*5ffd83dbSDimitry Andric // it at the same time. 213*5ffd83dbSDimitry Andric for (; Iter != VPNOTBlockEndIter; ++Iter) { 214*5ffd83dbSDimitry Andric // Find the register in which the predicate is 215*5ffd83dbSDimitry Andric int OpIdx = findFirstVPTPredOperandIdx(*Iter); 216*5ffd83dbSDimitry Andric assert(OpIdx != -1); 217*5ffd83dbSDimitry Andric 218*5ffd83dbSDimitry Andric // Change the predicate and update the mask 219*5ffd83dbSDimitry Andric Iter->getOperand(OpIdx).setImm(CurrentPredicate); 220*5ffd83dbSDimitry Andric BlockMask = expandPredBlockMask(BlockMask, CurrentPredicate); 221*5ffd83dbSDimitry Andric 222*5ffd83dbSDimitry Andric LLVM_DEBUG(dbgs() << " adding : "; Iter->dump()); 223*5ffd83dbSDimitry Andric } 224*5ffd83dbSDimitry Andric 225*5ffd83dbSDimitry Andric CurrentPredicate = 226*5ffd83dbSDimitry Andric (CurrentPredicate == ARMVCC::Then ? ARMVCC::Else : ARMVCC::Then); 227*5ffd83dbSDimitry Andric } 228*5ffd83dbSDimitry Andric return BlockMask; 2298bcb0991SDimitry Andric } 2308bcb0991SDimitry Andric 2318bcb0991SDimitry Andric bool MVEVPTBlock::InsertVPTBlocks(MachineBasicBlock &Block) { 2328bcb0991SDimitry Andric bool Modified = false; 2338bcb0991SDimitry Andric MachineBasicBlock::instr_iterator MBIter = Block.instr_begin(); 2348bcb0991SDimitry Andric MachineBasicBlock::instr_iterator EndIter = Block.instr_end(); 235*5ffd83dbSDimitry Andric 236*5ffd83dbSDimitry Andric SmallVector<MachineInstr *, 4> DeadInstructions; 2378bcb0991SDimitry Andric 2388bcb0991SDimitry Andric while (MBIter != EndIter) { 2398bcb0991SDimitry Andric MachineInstr *MI = &*MBIter; 240*5ffd83dbSDimitry Andric Register PredReg; 241*5ffd83dbSDimitry Andric DebugLoc DL = MI->getDebugLoc(); 2428bcb0991SDimitry Andric 2438bcb0991SDimitry Andric ARMVCC::VPTCodes Pred = getVPTInstrPredicate(*MI, PredReg); 2448bcb0991SDimitry Andric 2458bcb0991SDimitry Andric // The idea of the predicate is that None, Then and Else are for use when 2468bcb0991SDimitry Andric // handling assembly language: they correspond to the three possible 2478bcb0991SDimitry Andric // suffixes "", "t" and "e" on the mnemonic. So when instructions are read 248*5ffd83dbSDimitry Andric // from assembly source or disassembled from object code, you expect to 249*5ffd83dbSDimitry Andric // see a mixture whenever there's a long VPT block. But in code 250*5ffd83dbSDimitry Andric // generation, we hope we'll never generate an Else as input to this pass. 2518bcb0991SDimitry Andric assert(Pred != ARMVCC::Else && "VPT block pass does not expect Else preds"); 2528bcb0991SDimitry Andric 2538bcb0991SDimitry Andric if (Pred == ARMVCC::None) { 2548bcb0991SDimitry Andric ++MBIter; 2558bcb0991SDimitry Andric continue; 2568bcb0991SDimitry Andric } 2578bcb0991SDimitry Andric 258*5ffd83dbSDimitry Andric ARM::PredBlockMask BlockMask = 259*5ffd83dbSDimitry Andric CreateVPTBlock(MBIter, EndIter, DeadInstructions); 2608bcb0991SDimitry Andric 261*5ffd83dbSDimitry Andric // Search back for a VCMP that can be folded to create a VPT, or else 262*5ffd83dbSDimitry Andric // create a VPST directly 2638bcb0991SDimitry Andric MachineInstrBuilder MIBuilder; 2648bcb0991SDimitry Andric unsigned NewOpcode; 265*5ffd83dbSDimitry Andric LLVM_DEBUG(dbgs() << " final block mask: " << (unsigned)BlockMask << "\n"); 266*5ffd83dbSDimitry Andric if (MachineInstr *VCMP = findVCMPToFoldIntoVPST(MI, TRI, NewOpcode)) { 2678bcb0991SDimitry Andric LLVM_DEBUG(dbgs() << " folding VCMP into VPST: "; VCMP->dump()); 268*5ffd83dbSDimitry Andric MIBuilder = BuildMI(Block, MI, DL, TII->get(NewOpcode)); 269*5ffd83dbSDimitry Andric MIBuilder.addImm((uint64_t)BlockMask); 2708bcb0991SDimitry Andric MIBuilder.add(VCMP->getOperand(1)); 2718bcb0991SDimitry Andric MIBuilder.add(VCMP->getOperand(2)); 2728bcb0991SDimitry Andric MIBuilder.add(VCMP->getOperand(3)); 273*5ffd83dbSDimitry Andric VCMP->eraseFromParent(); 2748bcb0991SDimitry Andric } else { 275*5ffd83dbSDimitry Andric MIBuilder = BuildMI(Block, MI, DL, TII->get(ARM::MVE_VPST)); 276*5ffd83dbSDimitry Andric MIBuilder.addImm((uint64_t)BlockMask); 2778bcb0991SDimitry Andric } 2788bcb0991SDimitry Andric 2798bcb0991SDimitry Andric finalizeBundle( 2808bcb0991SDimitry Andric Block, MachineBasicBlock::instr_iterator(MIBuilder.getInstr()), MBIter); 2818bcb0991SDimitry Andric 2828bcb0991SDimitry Andric Modified = true; 2838bcb0991SDimitry Andric } 284480093f4SDimitry Andric 285*5ffd83dbSDimitry Andric // Erase all dead instructions 286*5ffd83dbSDimitry Andric for (MachineInstr *DeadMI : DeadInstructions) { 287*5ffd83dbSDimitry Andric if (DeadMI->isInsideBundle()) 288*5ffd83dbSDimitry Andric DeadMI->eraseFromBundle(); 289*5ffd83dbSDimitry Andric else 290*5ffd83dbSDimitry Andric DeadMI->eraseFromParent(); 291*5ffd83dbSDimitry Andric } 292480093f4SDimitry Andric 2938bcb0991SDimitry Andric return Modified; 2948bcb0991SDimitry Andric } 2958bcb0991SDimitry Andric 2968bcb0991SDimitry Andric bool MVEVPTBlock::runOnMachineFunction(MachineFunction &Fn) { 2978bcb0991SDimitry Andric const ARMSubtarget &STI = 2988bcb0991SDimitry Andric static_cast<const ARMSubtarget &>(Fn.getSubtarget()); 2998bcb0991SDimitry Andric 3008bcb0991SDimitry Andric if (!STI.isThumb2() || !STI.hasMVEIntegerOps()) 3018bcb0991SDimitry Andric return false; 3028bcb0991SDimitry Andric 3038bcb0991SDimitry Andric TII = static_cast<const Thumb2InstrInfo *>(STI.getInstrInfo()); 304*5ffd83dbSDimitry Andric TRI = STI.getRegisterInfo(); 3058bcb0991SDimitry Andric 3068bcb0991SDimitry Andric LLVM_DEBUG(dbgs() << "********** ARM MVE VPT BLOCKS **********\n" 3078bcb0991SDimitry Andric << "********** Function: " << Fn.getName() << '\n'); 3088bcb0991SDimitry Andric 3098bcb0991SDimitry Andric bool Modified = false; 3108bcb0991SDimitry Andric for (MachineBasicBlock &MBB : Fn) 3118bcb0991SDimitry Andric Modified |= InsertVPTBlocks(MBB); 3128bcb0991SDimitry Andric 3138bcb0991SDimitry Andric LLVM_DEBUG(dbgs() << "**************************************\n"); 3148bcb0991SDimitry Andric return Modified; 3158bcb0991SDimitry Andric } 3168bcb0991SDimitry Andric 3178bcb0991SDimitry Andric /// createMVEVPTBlock - Returns an instance of the MVE VPT block 3188bcb0991SDimitry Andric /// insertion pass. 3198bcb0991SDimitry Andric FunctionPass *llvm::createMVEVPTBlockPass() { return new MVEVPTBlock(); } 320