1 //===-- MVEVPTBlockPass.cpp - Insert MVE VPT blocks -----------------------===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 9 #include "ARM.h" 10 #include "ARMMachineFunctionInfo.h" 11 #include "ARMSubtarget.h" 12 #include "Thumb2InstrInfo.h" 13 #include "llvm/ADT/SmallVector.h" 14 #include "llvm/ADT/Statistic.h" 15 #include "llvm/ADT/StringRef.h" 16 #include "llvm/CodeGen/MachineBasicBlock.h" 17 #include "llvm/CodeGen/MachineFunction.h" 18 #include "llvm/CodeGen/MachineFunctionPass.h" 19 #include "llvm/CodeGen/MachineInstr.h" 20 #include "llvm/CodeGen/MachineInstrBuilder.h" 21 #include "llvm/CodeGen/MachineInstrBundle.h" 22 #include "llvm/CodeGen/MachineOperand.h" 23 #include "llvm/IR/DebugLoc.h" 24 #include "llvm/Support/Debug.h" 25 #include <cassert> 26 #include <new> 27 28 using namespace llvm; 29 30 #define DEBUG_TYPE "arm-mve-vpt" 31 32 namespace { 33 class MVEVPTBlock : public MachineFunctionPass { 34 public: 35 static char ID; 36 const Thumb2InstrInfo *TII; 37 const TargetRegisterInfo *TRI; 38 39 MVEVPTBlock() : MachineFunctionPass(ID) {} 40 41 bool runOnMachineFunction(MachineFunction &Fn) override; 42 43 MachineFunctionProperties getRequiredProperties() const override { 44 return MachineFunctionProperties().set( 45 MachineFunctionProperties::Property::NoVRegs); 46 } 47 48 StringRef getPassName() const override { 49 return "MVE VPT block insertion pass"; 50 } 51 52 private: 53 bool InsertVPTBlocks(MachineBasicBlock &MBB); 54 }; 55 56 char MVEVPTBlock::ID = 0; 57 58 } // end anonymous namespace 59 60 INITIALIZE_PASS(MVEVPTBlock, DEBUG_TYPE, "ARM MVE VPT block pass", false, false) 61 62 static MachineInstr *findVCMPToFoldIntoVPST(MachineBasicBlock::iterator MI, 63 const TargetRegisterInfo *TRI, 64 unsigned &NewOpcode) { 65 // Search backwards to the instruction that defines VPR. This may or not 66 // be a VCMP, we check that after this loop. If we find another instruction 67 // that reads cpsr, we return nullptr. 68 MachineBasicBlock::iterator CmpMI = MI; 69 while (CmpMI != MI->getParent()->begin()) { 70 --CmpMI; 71 if (CmpMI->modifiesRegister(ARM::VPR, TRI)) 72 break; 73 if (CmpMI->readsRegister(ARM::VPR, TRI)) 74 break; 75 } 76 77 if (CmpMI == MI) 78 return nullptr; 79 NewOpcode = VCMPOpcodeToVPT(CmpMI->getOpcode()); 80 if (NewOpcode == 0) 81 return nullptr; 82 83 // Search forward from CmpMI to MI, checking if either register was def'd 84 if (registerDefinedBetween(CmpMI->getOperand(1).getReg(), std::next(CmpMI), 85 MI, TRI)) 86 return nullptr; 87 if (registerDefinedBetween(CmpMI->getOperand(2).getReg(), std::next(CmpMI), 88 MI, TRI)) 89 return nullptr; 90 return &*CmpMI; 91 } 92 93 // Advances Iter past a block of predicated instructions. 94 // Returns true if it successfully skipped the whole block of predicated 95 // instructions. Returns false when it stopped early (due to MaxSteps), or if 96 // Iter didn't point to a predicated instruction. 97 static bool StepOverPredicatedInstrs(MachineBasicBlock::instr_iterator &Iter, 98 MachineBasicBlock::instr_iterator EndIter, 99 unsigned MaxSteps, 100 unsigned &NumInstrsSteppedOver) { 101 ARMVCC::VPTCodes NextPred = ARMVCC::None; 102 Register PredReg; 103 NumInstrsSteppedOver = 0; 104 105 while (Iter != EndIter) { 106 if (Iter->isDebugInstr()) { 107 // Skip debug instructions 108 ++Iter; 109 continue; 110 } 111 112 NextPred = getVPTInstrPredicate(*Iter, PredReg); 113 assert(NextPred != ARMVCC::Else && 114 "VPT block pass does not expect Else preds"); 115 if (NextPred == ARMVCC::None || MaxSteps == 0) 116 break; 117 --MaxSteps; 118 ++Iter; 119 ++NumInstrsSteppedOver; 120 }; 121 122 return NumInstrsSteppedOver != 0 && 123 (NextPred == ARMVCC::None || Iter == EndIter); 124 } 125 126 // Returns true if at least one instruction in the range [Iter, End) defines 127 // or kills VPR. 128 static bool IsVPRDefinedOrKilledByBlock(MachineBasicBlock::iterator Iter, 129 MachineBasicBlock::iterator End) { 130 for (; Iter != End; ++Iter) 131 if (Iter->definesRegister(ARM::VPR, /*TRI=*/nullptr) || 132 Iter->killsRegister(ARM::VPR, /*TRI=*/nullptr)) 133 return true; 134 return false; 135 } 136 137 // Creates a T, TT, TTT or TTTT BlockMask depending on BlockSize. 138 static ARM::PredBlockMask GetInitialBlockMask(unsigned BlockSize) { 139 switch (BlockSize) { 140 case 1: 141 return ARM::PredBlockMask::T; 142 case 2: 143 return ARM::PredBlockMask::TT; 144 case 3: 145 return ARM::PredBlockMask::TTT; 146 case 4: 147 return ARM::PredBlockMask::TTTT; 148 default: 149 llvm_unreachable("Invalid BlockSize!"); 150 } 151 } 152 153 // Given an iterator (Iter) that points at an instruction with a "Then" 154 // predicate, tries to create the largest block of continuous predicated 155 // instructions possible, and returns the VPT Block Mask of that block. 156 // 157 // This will try to perform some minor optimization in order to maximize the 158 // size of the block. 159 static ARM::PredBlockMask 160 CreateVPTBlock(MachineBasicBlock::instr_iterator &Iter, 161 MachineBasicBlock::instr_iterator EndIter, 162 SmallVectorImpl<MachineInstr *> &DeadInstructions) { 163 MachineBasicBlock::instr_iterator BlockBeg = Iter; 164 (void)BlockBeg; 165 assert(getVPTInstrPredicate(*Iter) == ARMVCC::Then && 166 "Expected a Predicated Instruction"); 167 168 LLVM_DEBUG(dbgs() << "VPT block created for: "; Iter->dump()); 169 170 unsigned BlockSize; 171 StepOverPredicatedInstrs(Iter, EndIter, 4, BlockSize); 172 173 LLVM_DEBUG(for (MachineBasicBlock::instr_iterator AddedInstIter = 174 std::next(BlockBeg); 175 AddedInstIter != Iter; ++AddedInstIter) { 176 if (AddedInstIter->isDebugInstr()) 177 continue; 178 dbgs() << " adding: "; 179 AddedInstIter->dump(); 180 }); 181 182 // Generate the initial BlockMask 183 ARM::PredBlockMask BlockMask = GetInitialBlockMask(BlockSize); 184 185 // Remove VPNOTs while there's still room in the block, so we can make the 186 // largest block possible. 187 ARMVCC::VPTCodes CurrentPredicate = ARMVCC::Else; 188 while (BlockSize < 4 && Iter != EndIter && 189 Iter->getOpcode() == ARM::MVE_VPNOT) { 190 191 // Try to skip all of the predicated instructions after the VPNOT, stopping 192 // after (4 - BlockSize). If we can't skip them all, stop. 193 unsigned ElseInstCnt = 0; 194 MachineBasicBlock::instr_iterator VPNOTBlockEndIter = std::next(Iter); 195 if (!StepOverPredicatedInstrs(VPNOTBlockEndIter, EndIter, (4 - BlockSize), 196 ElseInstCnt)) 197 break; 198 199 // Check if this VPNOT can be removed or not: It can only be removed if at 200 // least one of the predicated instruction that follows it kills or sets 201 // VPR. 202 if (!IsVPRDefinedOrKilledByBlock(Iter, VPNOTBlockEndIter)) 203 break; 204 205 LLVM_DEBUG(dbgs() << " removing VPNOT: "; Iter->dump()); 206 207 // Record the new size of the block 208 BlockSize += ElseInstCnt; 209 assert(BlockSize <= 4 && "Block is too large!"); 210 211 // Record the VPNot to remove it later. 212 DeadInstructions.push_back(&*Iter); 213 ++Iter; 214 215 // Replace the predicates of the instructions we're adding. 216 // Note that we are using "Iter" to iterate over the block so we can update 217 // it at the same time. 218 for (; Iter != VPNOTBlockEndIter; ++Iter) { 219 if (Iter->isDebugInstr()) 220 continue; 221 222 // Find the register in which the predicate is 223 int OpIdx = findFirstVPTPredOperandIdx(*Iter); 224 assert(OpIdx != -1); 225 226 // Change the predicate and update the mask 227 Iter->getOperand(OpIdx).setImm(CurrentPredicate); 228 BlockMask = expandPredBlockMask(BlockMask, CurrentPredicate); 229 230 LLVM_DEBUG(dbgs() << " adding : "; Iter->dump()); 231 } 232 233 CurrentPredicate = 234 (CurrentPredicate == ARMVCC::Then ? ARMVCC::Else : ARMVCC::Then); 235 } 236 return BlockMask; 237 } 238 239 bool MVEVPTBlock::InsertVPTBlocks(MachineBasicBlock &Block) { 240 bool Modified = false; 241 MachineBasicBlock::instr_iterator MBIter = Block.instr_begin(); 242 MachineBasicBlock::instr_iterator EndIter = Block.instr_end(); 243 244 SmallVector<MachineInstr *, 4> DeadInstructions; 245 246 while (MBIter != EndIter) { 247 MachineInstr *MI = &*MBIter; 248 Register PredReg; 249 DebugLoc DL = MI->getDebugLoc(); 250 251 ARMVCC::VPTCodes Pred = getVPTInstrPredicate(*MI, PredReg); 252 253 // The idea of the predicate is that None, Then and Else are for use when 254 // handling assembly language: they correspond to the three possible 255 // suffixes "", "t" and "e" on the mnemonic. So when instructions are read 256 // from assembly source or disassembled from object code, you expect to 257 // see a mixture whenever there's a long VPT block. But in code 258 // generation, we hope we'll never generate an Else as input to this pass. 259 assert(Pred != ARMVCC::Else && "VPT block pass does not expect Else preds"); 260 261 if (Pred == ARMVCC::None) { 262 ++MBIter; 263 continue; 264 } 265 266 ARM::PredBlockMask BlockMask = 267 CreateVPTBlock(MBIter, EndIter, DeadInstructions); 268 269 // Search back for a VCMP that can be folded to create a VPT, or else 270 // create a VPST directly 271 MachineInstrBuilder MIBuilder; 272 unsigned NewOpcode; 273 LLVM_DEBUG(dbgs() << " final block mask: " << (unsigned)BlockMask << "\n"); 274 if (MachineInstr *VCMP = findVCMPToFoldIntoVPST(MI, TRI, NewOpcode)) { 275 LLVM_DEBUG(dbgs() << " folding VCMP into VPST: "; VCMP->dump()); 276 MIBuilder = BuildMI(Block, MI, DL, TII->get(NewOpcode)); 277 MIBuilder.addImm((uint64_t)BlockMask); 278 MIBuilder.add(VCMP->getOperand(1)); 279 MIBuilder.add(VCMP->getOperand(2)); 280 MIBuilder.add(VCMP->getOperand(3)); 281 282 // We need to remove any kill flags between the original VCMP and the new 283 // insertion point. 284 for (MachineInstr &MII : 285 make_range(VCMP->getIterator(), MI->getIterator())) { 286 MII.clearRegisterKills(VCMP->getOperand(1).getReg(), TRI); 287 MII.clearRegisterKills(VCMP->getOperand(2).getReg(), TRI); 288 } 289 290 VCMP->eraseFromParent(); 291 } else { 292 MIBuilder = BuildMI(Block, MI, DL, TII->get(ARM::MVE_VPST)); 293 MIBuilder.addImm((uint64_t)BlockMask); 294 } 295 296 // Erase all dead instructions (VPNOT's). Do that now so that they do not 297 // mess with the bundle creation. 298 for (MachineInstr *DeadMI : DeadInstructions) 299 DeadMI->eraseFromParent(); 300 DeadInstructions.clear(); 301 302 finalizeBundle( 303 Block, MachineBasicBlock::instr_iterator(MIBuilder.getInstr()), MBIter); 304 305 Modified = true; 306 } 307 308 return Modified; 309 } 310 311 bool MVEVPTBlock::runOnMachineFunction(MachineFunction &Fn) { 312 const ARMSubtarget &STI = Fn.getSubtarget<ARMSubtarget>(); 313 314 if (!STI.isThumb2() || !STI.hasMVEIntegerOps()) 315 return false; 316 317 TII = static_cast<const Thumb2InstrInfo *>(STI.getInstrInfo()); 318 TRI = STI.getRegisterInfo(); 319 320 LLVM_DEBUG(dbgs() << "********** ARM MVE VPT BLOCKS **********\n" 321 << "********** Function: " << Fn.getName() << '\n'); 322 323 bool Modified = false; 324 for (MachineBasicBlock &MBB : Fn) 325 Modified |= InsertVPTBlocks(MBB); 326 327 LLVM_DEBUG(dbgs() << "**************************************\n"); 328 return Modified; 329 } 330 331 /// createMVEVPTBlock - Returns an instance of the MVE VPT block 332 /// insertion pass. 333 FunctionPass *llvm::createMVEVPTBlockPass() { return new MVEVPTBlock(); } 334