1 //===-- MVEVPTBlockPass.cpp - Insert MVE VPT blocks -----------------------===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 9 #include "ARM.h" 10 #include "ARMMachineFunctionInfo.h" 11 #include "ARMSubtarget.h" 12 #include "MCTargetDesc/ARMBaseInfo.h" 13 #include "Thumb2InstrInfo.h" 14 #include "llvm/ADT/SmallSet.h" 15 #include "llvm/ADT/SmallVector.h" 16 #include "llvm/ADT/Statistic.h" 17 #include "llvm/ADT/StringRef.h" 18 #include "llvm/CodeGen/MachineBasicBlock.h" 19 #include "llvm/CodeGen/MachineFunction.h" 20 #include "llvm/CodeGen/MachineFunctionPass.h" 21 #include "llvm/CodeGen/MachineInstr.h" 22 #include "llvm/CodeGen/MachineInstrBuilder.h" 23 #include "llvm/CodeGen/MachineInstrBundle.h" 24 #include "llvm/CodeGen/MachineOperand.h" 25 #include "llvm/IR/DebugLoc.h" 26 #include "llvm/MC/MCInstrDesc.h" 27 #include "llvm/MC/MCRegisterInfo.h" 28 #include "llvm/Support/Debug.h" 29 #include <cassert> 30 #include <new> 31 32 using namespace llvm; 33 34 #define DEBUG_TYPE "arm-mve-vpt" 35 36 namespace { 37 class MVEVPTBlock : public MachineFunctionPass { 38 public: 39 static char ID; 40 const Thumb2InstrInfo *TII; 41 const TargetRegisterInfo *TRI; 42 43 MVEVPTBlock() : MachineFunctionPass(ID) {} 44 45 bool runOnMachineFunction(MachineFunction &Fn) override; 46 47 MachineFunctionProperties getRequiredProperties() const override { 48 return MachineFunctionProperties().set( 49 MachineFunctionProperties::Property::NoVRegs); 50 } 51 52 StringRef getPassName() const override { 53 return "MVE VPT block insertion pass"; 54 } 55 56 private: 57 bool InsertVPTBlocks(MachineBasicBlock &MBB); 58 }; 59 60 char MVEVPTBlock::ID = 0; 61 62 } // end anonymous namespace 63 64 INITIALIZE_PASS(MVEVPTBlock, DEBUG_TYPE, "ARM MVE VPT block pass", false, false) 65 66 enum VPTMaskValue { 67 T = 8, // 0b1000 68 TT = 4, // 0b0100 69 TE = 12, // 0b1100 70 TTT = 2, // 0b0010 71 TTE = 6, // 0b0110 72 TEE = 10, // 0b1010 73 TET = 14, // 0b1110 74 TTTT = 1, // 0b0001 75 TTTE = 3, // 0b0011 76 TTEE = 5, // 0b0101 77 TTET = 7, // 0b0111 78 TEEE = 9, // 0b1001 79 TEET = 11, // 0b1011 80 TETT = 13, // 0b1101 81 TETE = 15 // 0b1111 82 }; 83 84 static unsigned VCMPOpcodeToVPT(unsigned Opcode) { 85 switch (Opcode) { 86 case ARM::MVE_VCMPf32: 87 return ARM::MVE_VPTv4f32; 88 case ARM::MVE_VCMPf16: 89 return ARM::MVE_VPTv8f16; 90 case ARM::MVE_VCMPi8: 91 return ARM::MVE_VPTv16i8; 92 case ARM::MVE_VCMPi16: 93 return ARM::MVE_VPTv8i16; 94 case ARM::MVE_VCMPi32: 95 return ARM::MVE_VPTv4i32; 96 case ARM::MVE_VCMPu8: 97 return ARM::MVE_VPTv16u8; 98 case ARM::MVE_VCMPu16: 99 return ARM::MVE_VPTv8u16; 100 case ARM::MVE_VCMPu32: 101 return ARM::MVE_VPTv4u32; 102 case ARM::MVE_VCMPs8: 103 return ARM::MVE_VPTv16s8; 104 case ARM::MVE_VCMPs16: 105 return ARM::MVE_VPTv8s16; 106 case ARM::MVE_VCMPs32: 107 return ARM::MVE_VPTv4s32; 108 109 case ARM::MVE_VCMPf32r: 110 return ARM::MVE_VPTv4f32r; 111 case ARM::MVE_VCMPf16r: 112 return ARM::MVE_VPTv8f16r; 113 case ARM::MVE_VCMPi8r: 114 return ARM::MVE_VPTv16i8r; 115 case ARM::MVE_VCMPi16r: 116 return ARM::MVE_VPTv8i16r; 117 case ARM::MVE_VCMPi32r: 118 return ARM::MVE_VPTv4i32r; 119 case ARM::MVE_VCMPu8r: 120 return ARM::MVE_VPTv16u8r; 121 case ARM::MVE_VCMPu16r: 122 return ARM::MVE_VPTv8u16r; 123 case ARM::MVE_VCMPu32r: 124 return ARM::MVE_VPTv4u32r; 125 case ARM::MVE_VCMPs8r: 126 return ARM::MVE_VPTv16s8r; 127 case ARM::MVE_VCMPs16r: 128 return ARM::MVE_VPTv8s16r; 129 case ARM::MVE_VCMPs32r: 130 return ARM::MVE_VPTv4s32r; 131 132 default: 133 return 0; 134 } 135 } 136 137 static MachineInstr *findVCMPToFoldIntoVPST(MachineBasicBlock::iterator MI, 138 const TargetRegisterInfo *TRI, 139 unsigned &NewOpcode) { 140 // Search backwards to the instruction that defines VPR. This may or not 141 // be a VCMP, we check that after this loop. If we find another instruction 142 // that reads cpsr, we return nullptr. 143 MachineBasicBlock::iterator CmpMI = MI; 144 while (CmpMI != MI->getParent()->begin()) { 145 --CmpMI; 146 if (CmpMI->modifiesRegister(ARM::VPR, TRI)) 147 break; 148 if (CmpMI->readsRegister(ARM::VPR, TRI)) 149 break; 150 } 151 152 if (CmpMI == MI) 153 return nullptr; 154 NewOpcode = VCMPOpcodeToVPT(CmpMI->getOpcode()); 155 if (NewOpcode == 0) 156 return nullptr; 157 158 // Search forward from CmpMI to MI, checking if either register was def'd 159 if (registerDefinedBetween(CmpMI->getOperand(1).getReg(), std::next(CmpMI), 160 MI, TRI)) 161 return nullptr; 162 if (registerDefinedBetween(CmpMI->getOperand(2).getReg(), std::next(CmpMI), 163 MI, TRI)) 164 return nullptr; 165 return &*CmpMI; 166 } 167 168 bool MVEVPTBlock::InsertVPTBlocks(MachineBasicBlock &Block) { 169 bool Modified = false; 170 MachineBasicBlock::instr_iterator MBIter = Block.instr_begin(); 171 MachineBasicBlock::instr_iterator EndIter = Block.instr_end(); 172 173 while (MBIter != EndIter) { 174 MachineInstr *MI = &*MBIter; 175 unsigned PredReg = 0; 176 DebugLoc dl = MI->getDebugLoc(); 177 178 ARMVCC::VPTCodes Pred = getVPTInstrPredicate(*MI, PredReg); 179 180 // The idea of the predicate is that None, Then and Else are for use when 181 // handling assembly language: they correspond to the three possible 182 // suffixes "", "t" and "e" on the mnemonic. So when instructions are read 183 // from assembly source or disassembled from object code, you expect to see 184 // a mixture whenever there's a long VPT block. But in code generation, we 185 // hope we'll never generate an Else as input to this pass. 186 assert(Pred != ARMVCC::Else && "VPT block pass does not expect Else preds"); 187 188 if (Pred == ARMVCC::None) { 189 ++MBIter; 190 continue; 191 } 192 193 LLVM_DEBUG(dbgs() << "VPT block created for: "; MI->dump()); 194 int VPTInstCnt = 1; 195 ARMVCC::VPTCodes NextPred; 196 197 // Look at subsequent instructions, checking if they can be in the same VPT 198 // block. 199 ++MBIter; 200 while (MBIter != EndIter && VPTInstCnt < 4) { 201 NextPred = getVPTInstrPredicate(*MBIter, PredReg); 202 assert(NextPred != ARMVCC::Else && 203 "VPT block pass does not expect Else preds"); 204 if (NextPred != Pred) 205 break; 206 LLVM_DEBUG(dbgs() << " adding : "; MBIter->dump()); 207 ++VPTInstCnt; 208 ++MBIter; 209 }; 210 211 unsigned BlockMask = 0; 212 switch (VPTInstCnt) { 213 case 1: 214 BlockMask = VPTMaskValue::T; 215 break; 216 case 2: 217 BlockMask = VPTMaskValue::TT; 218 break; 219 case 3: 220 BlockMask = VPTMaskValue::TTT; 221 break; 222 case 4: 223 BlockMask = VPTMaskValue::TTTT; 224 break; 225 default: 226 llvm_unreachable("Unexpected number of instruction in a VPT block"); 227 }; 228 229 // Search back for a VCMP that can be folded to create a VPT, or else create 230 // a VPST directly 231 MachineInstrBuilder MIBuilder; 232 unsigned NewOpcode; 233 MachineInstr *VCMP = findVCMPToFoldIntoVPST(MI, TRI, NewOpcode); 234 if (VCMP) { 235 LLVM_DEBUG(dbgs() << " folding VCMP into VPST: "; VCMP->dump()); 236 MIBuilder = BuildMI(Block, MI, dl, TII->get(NewOpcode)); 237 MIBuilder.addImm(BlockMask); 238 MIBuilder.add(VCMP->getOperand(1)); 239 MIBuilder.add(VCMP->getOperand(2)); 240 MIBuilder.add(VCMP->getOperand(3)); 241 VCMP->eraseFromParent(); 242 } else { 243 MIBuilder = BuildMI(Block, MI, dl, TII->get(ARM::MVE_VPST)); 244 MIBuilder.addImm(BlockMask); 245 } 246 247 finalizeBundle( 248 Block, MachineBasicBlock::instr_iterator(MIBuilder.getInstr()), MBIter); 249 250 Modified = true; 251 } 252 return Modified; 253 } 254 255 bool MVEVPTBlock::runOnMachineFunction(MachineFunction &Fn) { 256 const ARMSubtarget &STI = 257 static_cast<const ARMSubtarget &>(Fn.getSubtarget()); 258 259 if (!STI.isThumb2() || !STI.hasMVEIntegerOps()) 260 return false; 261 262 TII = static_cast<const Thumb2InstrInfo *>(STI.getInstrInfo()); 263 TRI = STI.getRegisterInfo(); 264 265 LLVM_DEBUG(dbgs() << "********** ARM MVE VPT BLOCKS **********\n" 266 << "********** Function: " << Fn.getName() << '\n'); 267 268 bool Modified = false; 269 for (MachineBasicBlock &MBB : Fn) 270 Modified |= InsertVPTBlocks(MBB); 271 272 LLVM_DEBUG(dbgs() << "**************************************\n"); 273 return Modified; 274 } 275 276 /// createMVEVPTBlock - Returns an instance of the MVE VPT block 277 /// insertion pass. 278 FunctionPass *llvm::createMVEVPTBlockPass() { return new MVEVPTBlock(); } 279