1 //===-- ARMExpandPseudoInsts.cpp - Expand pseudo instructions -------------===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 // 9 // This file contains a pass that expands pseudo instructions into target 10 // instructions to allow proper scheduling, if-conversion, and other late 11 // optimizations. This pass should be run after register allocation but before 12 // the post-regalloc scheduling pass. 13 // 14 //===----------------------------------------------------------------------===// 15 16 #include "ARM.h" 17 #include "ARMBaseInstrInfo.h" 18 #include "ARMBaseRegisterInfo.h" 19 #include "ARMConstantPoolValue.h" 20 #include "ARMMachineFunctionInfo.h" 21 #include "ARMSubtarget.h" 22 #include "MCTargetDesc/ARMAddressingModes.h" 23 #include "llvm/CodeGen/LivePhysRegs.h" 24 #include "llvm/CodeGen/MachineFrameInfo.h" 25 #include "llvm/CodeGen/MachineFunctionPass.h" 26 #include "llvm/CodeGen/MachineJumpTableInfo.h" 27 #include "llvm/MC/MCAsmInfo.h" 28 #include "llvm/Support/Debug.h" 29 30 using namespace llvm; 31 32 #define DEBUG_TYPE "arm-pseudo" 33 34 static cl::opt<bool> 35 VerifyARMPseudo("verify-arm-pseudo-expand", cl::Hidden, 36 cl::desc("Verify machine code after expanding ARM pseudos")); 37 38 #define ARM_EXPAND_PSEUDO_NAME "ARM pseudo instruction expansion pass" 39 40 namespace { 41 class ARMExpandPseudo : public MachineFunctionPass { 42 public: 43 static char ID; 44 ARMExpandPseudo() : MachineFunctionPass(ID) {} 45 46 const ARMBaseInstrInfo *TII; 47 const TargetRegisterInfo *TRI; 48 const ARMSubtarget *STI; 49 ARMFunctionInfo *AFI; 50 51 bool runOnMachineFunction(MachineFunction &Fn) override; 52 53 MachineFunctionProperties getRequiredProperties() const override { 54 return MachineFunctionProperties().set( 55 MachineFunctionProperties::Property::NoVRegs); 56 } 57 58 StringRef getPassName() const override { 59 return ARM_EXPAND_PSEUDO_NAME; 60 } 61 62 private: 63 bool ExpandMI(MachineBasicBlock &MBB, 64 MachineBasicBlock::iterator MBBI, 65 MachineBasicBlock::iterator &NextMBBI); 66 bool ExpandMBB(MachineBasicBlock &MBB); 67 void ExpandVLD(MachineBasicBlock::iterator &MBBI); 68 void ExpandVST(MachineBasicBlock::iterator &MBBI); 69 void ExpandLaneOp(MachineBasicBlock::iterator &MBBI); 70 void ExpandVTBL(MachineBasicBlock::iterator &MBBI, 71 unsigned Opc, bool IsExt); 72 void ExpandMQQPRLoadStore(MachineBasicBlock::iterator &MBBI); 73 void ExpandTMOV32BitImm(MachineBasicBlock &MBB, 74 MachineBasicBlock::iterator &MBBI); 75 void ExpandMOV32BitImm(MachineBasicBlock &MBB, 76 MachineBasicBlock::iterator &MBBI); 77 void CMSEClearGPRegs(MachineBasicBlock &MBB, 78 MachineBasicBlock::iterator MBBI, const DebugLoc &DL, 79 const SmallVectorImpl<unsigned> &ClearRegs, 80 unsigned ClobberReg); 81 MachineBasicBlock &CMSEClearFPRegs(MachineBasicBlock &MBB, 82 MachineBasicBlock::iterator MBBI); 83 MachineBasicBlock &CMSEClearFPRegsV8(MachineBasicBlock &MBB, 84 MachineBasicBlock::iterator MBBI, 85 const BitVector &ClearRegs); 86 MachineBasicBlock &CMSEClearFPRegsV81(MachineBasicBlock &MBB, 87 MachineBasicBlock::iterator MBBI, 88 const BitVector &ClearRegs); 89 void CMSESaveClearFPRegs(MachineBasicBlock &MBB, 90 MachineBasicBlock::iterator MBBI, DebugLoc &DL, 91 const LivePhysRegs &LiveRegs, 92 SmallVectorImpl<unsigned> &AvailableRegs); 93 void CMSESaveClearFPRegsV8(MachineBasicBlock &MBB, 94 MachineBasicBlock::iterator MBBI, DebugLoc &DL, 95 const LivePhysRegs &LiveRegs, 96 SmallVectorImpl<unsigned> &ScratchRegs); 97 void CMSESaveClearFPRegsV81(MachineBasicBlock &MBB, 98 MachineBasicBlock::iterator MBBI, DebugLoc &DL, 99 const LivePhysRegs &LiveRegs); 100 void CMSERestoreFPRegs(MachineBasicBlock &MBB, 101 MachineBasicBlock::iterator MBBI, DebugLoc &DL, 102 SmallVectorImpl<unsigned> &AvailableRegs); 103 void CMSERestoreFPRegsV8(MachineBasicBlock &MBB, 104 MachineBasicBlock::iterator MBBI, DebugLoc &DL, 105 SmallVectorImpl<unsigned> &AvailableRegs); 106 void CMSERestoreFPRegsV81(MachineBasicBlock &MBB, 107 MachineBasicBlock::iterator MBBI, DebugLoc &DL, 108 SmallVectorImpl<unsigned> &AvailableRegs); 109 bool ExpandCMP_SWAP(MachineBasicBlock &MBB, 110 MachineBasicBlock::iterator MBBI, unsigned LdrexOp, 111 unsigned StrexOp, unsigned UxtOp, 112 MachineBasicBlock::iterator &NextMBBI); 113 114 bool ExpandCMP_SWAP_64(MachineBasicBlock &MBB, 115 MachineBasicBlock::iterator MBBI, 116 MachineBasicBlock::iterator &NextMBBI); 117 }; 118 char ARMExpandPseudo::ID = 0; 119 } 120 121 INITIALIZE_PASS(ARMExpandPseudo, DEBUG_TYPE, ARM_EXPAND_PSEUDO_NAME, false, 122 false) 123 124 namespace { 125 // Constants for register spacing in NEON load/store instructions. 126 // For quad-register load-lane and store-lane pseudo instructors, the 127 // spacing is initially assumed to be EvenDblSpc, and that is changed to 128 // OddDblSpc depending on the lane number operand. 129 enum NEONRegSpacing { 130 SingleSpc, 131 SingleLowSpc , // Single spacing, low registers, three and four vectors. 132 SingleHighQSpc, // Single spacing, high registers, four vectors. 133 SingleHighTSpc, // Single spacing, high registers, three vectors. 134 EvenDblSpc, 135 OddDblSpc 136 }; 137 138 // Entries for NEON load/store information table. The table is sorted by 139 // PseudoOpc for fast binary-search lookups. 140 struct NEONLdStTableEntry { 141 uint16_t PseudoOpc; 142 uint16_t RealOpc; 143 bool IsLoad; 144 bool isUpdating; 145 bool hasWritebackOperand; 146 uint8_t RegSpacing; // One of type NEONRegSpacing 147 uint8_t NumRegs; // D registers loaded or stored 148 uint8_t RegElts; // elements per D register; used for lane ops 149 // FIXME: Temporary flag to denote whether the real instruction takes 150 // a single register (like the encoding) or all of the registers in 151 // the list (like the asm syntax and the isel DAG). When all definitions 152 // are converted to take only the single encoded register, this will 153 // go away. 154 bool copyAllListRegs; 155 156 // Comparison methods for binary search of the table. 157 bool operator<(const NEONLdStTableEntry &TE) const { 158 return PseudoOpc < TE.PseudoOpc; 159 } 160 friend bool operator<(const NEONLdStTableEntry &TE, unsigned PseudoOpc) { 161 return TE.PseudoOpc < PseudoOpc; 162 } 163 friend bool LLVM_ATTRIBUTE_UNUSED operator<(unsigned PseudoOpc, 164 const NEONLdStTableEntry &TE) { 165 return PseudoOpc < TE.PseudoOpc; 166 } 167 }; 168 } 169 170 static const NEONLdStTableEntry NEONLdStTable[] = { 171 { ARM::VLD1LNq16Pseudo, ARM::VLD1LNd16, true, false, false, EvenDblSpc, 1, 4 ,true}, 172 { ARM::VLD1LNq16Pseudo_UPD, ARM::VLD1LNd16_UPD, true, true, true, EvenDblSpc, 1, 4 ,true}, 173 { ARM::VLD1LNq32Pseudo, ARM::VLD1LNd32, true, false, false, EvenDblSpc, 1, 2 ,true}, 174 { ARM::VLD1LNq32Pseudo_UPD, ARM::VLD1LNd32_UPD, true, true, true, EvenDblSpc, 1, 2 ,true}, 175 { ARM::VLD1LNq8Pseudo, ARM::VLD1LNd8, true, false, false, EvenDblSpc, 1, 8 ,true}, 176 { ARM::VLD1LNq8Pseudo_UPD, ARM::VLD1LNd8_UPD, true, true, true, EvenDblSpc, 1, 8 ,true}, 177 178 { ARM::VLD1d16QPseudo, ARM::VLD1d16Q, true, false, false, SingleSpc, 4, 4 ,false}, 179 { ARM::VLD1d16QPseudoWB_fixed, ARM::VLD1d16Qwb_fixed, true, true, false, SingleSpc, 4, 4 ,false}, 180 { ARM::VLD1d16QPseudoWB_register, ARM::VLD1d16Qwb_register, true, true, true, SingleSpc, 4, 4 ,false}, 181 { ARM::VLD1d16TPseudo, ARM::VLD1d16T, true, false, false, SingleSpc, 3, 4 ,false}, 182 { ARM::VLD1d16TPseudoWB_fixed, ARM::VLD1d16Twb_fixed, true, true, false, SingleSpc, 3, 4 ,false}, 183 { ARM::VLD1d16TPseudoWB_register, ARM::VLD1d16Twb_register, true, true, true, SingleSpc, 3, 4 ,false}, 184 185 { ARM::VLD1d32QPseudo, ARM::VLD1d32Q, true, false, false, SingleSpc, 4, 2 ,false}, 186 { ARM::VLD1d32QPseudoWB_fixed, ARM::VLD1d32Qwb_fixed, true, true, false, SingleSpc, 4, 2 ,false}, 187 { ARM::VLD1d32QPseudoWB_register, ARM::VLD1d32Qwb_register, true, true, true, SingleSpc, 4, 2 ,false}, 188 { ARM::VLD1d32TPseudo, ARM::VLD1d32T, true, false, false, SingleSpc, 3, 2 ,false}, 189 { ARM::VLD1d32TPseudoWB_fixed, ARM::VLD1d32Twb_fixed, true, true, false, SingleSpc, 3, 2 ,false}, 190 { ARM::VLD1d32TPseudoWB_register, ARM::VLD1d32Twb_register, true, true, true, SingleSpc, 3, 2 ,false}, 191 192 { ARM::VLD1d64QPseudo, ARM::VLD1d64Q, true, false, false, SingleSpc, 4, 1 ,false}, 193 { ARM::VLD1d64QPseudoWB_fixed, ARM::VLD1d64Qwb_fixed, true, true, false, SingleSpc, 4, 1 ,false}, 194 { ARM::VLD1d64QPseudoWB_register, ARM::VLD1d64Qwb_register, true, true, true, SingleSpc, 4, 1 ,false}, 195 { ARM::VLD1d64TPseudo, ARM::VLD1d64T, true, false, false, SingleSpc, 3, 1 ,false}, 196 { ARM::VLD1d64TPseudoWB_fixed, ARM::VLD1d64Twb_fixed, true, true, false, SingleSpc, 3, 1 ,false}, 197 { ARM::VLD1d64TPseudoWB_register, ARM::VLD1d64Twb_register, true, true, true, SingleSpc, 3, 1 ,false}, 198 199 { ARM::VLD1d8QPseudo, ARM::VLD1d8Q, true, false, false, SingleSpc, 4, 8 ,false}, 200 { ARM::VLD1d8QPseudoWB_fixed, ARM::VLD1d8Qwb_fixed, true, true, false, SingleSpc, 4, 8 ,false}, 201 { ARM::VLD1d8QPseudoWB_register, ARM::VLD1d8Qwb_register, true, true, true, SingleSpc, 4, 8 ,false}, 202 { ARM::VLD1d8TPseudo, ARM::VLD1d8T, true, false, false, SingleSpc, 3, 8 ,false}, 203 { ARM::VLD1d8TPseudoWB_fixed, ARM::VLD1d8Twb_fixed, true, true, false, SingleSpc, 3, 8 ,false}, 204 { ARM::VLD1d8TPseudoWB_register, ARM::VLD1d8Twb_register, true, true, true, SingleSpc, 3, 8 ,false}, 205 206 { ARM::VLD1q16HighQPseudo, ARM::VLD1d16Q, true, false, false, SingleHighQSpc, 4, 4 ,false}, 207 { ARM::VLD1q16HighQPseudo_UPD, ARM::VLD1d16Qwb_fixed, true, true, true, SingleHighQSpc, 4, 4 ,false}, 208 { ARM::VLD1q16HighTPseudo, ARM::VLD1d16T, true, false, false, SingleHighTSpc, 3, 4 ,false}, 209 { ARM::VLD1q16HighTPseudo_UPD, ARM::VLD1d16Twb_fixed, true, true, true, SingleHighTSpc, 3, 4 ,false}, 210 { ARM::VLD1q16LowQPseudo_UPD, ARM::VLD1d16Qwb_fixed, true, true, true, SingleLowSpc, 4, 4 ,false}, 211 { ARM::VLD1q16LowTPseudo_UPD, ARM::VLD1d16Twb_fixed, true, true, true, SingleLowSpc, 3, 4 ,false}, 212 213 { ARM::VLD1q32HighQPseudo, ARM::VLD1d32Q, true, false, false, SingleHighQSpc, 4, 2 ,false}, 214 { ARM::VLD1q32HighQPseudo_UPD, ARM::VLD1d32Qwb_fixed, true, true, true, SingleHighQSpc, 4, 2 ,false}, 215 { ARM::VLD1q32HighTPseudo, ARM::VLD1d32T, true, false, false, SingleHighTSpc, 3, 2 ,false}, 216 { ARM::VLD1q32HighTPseudo_UPD, ARM::VLD1d32Twb_fixed, true, true, true, SingleHighTSpc, 3, 2 ,false}, 217 { ARM::VLD1q32LowQPseudo_UPD, ARM::VLD1d32Qwb_fixed, true, true, true, SingleLowSpc, 4, 2 ,false}, 218 { ARM::VLD1q32LowTPseudo_UPD, ARM::VLD1d32Twb_fixed, true, true, true, SingleLowSpc, 3, 2 ,false}, 219 220 { ARM::VLD1q64HighQPseudo, ARM::VLD1d64Q, true, false, false, SingleHighQSpc, 4, 1 ,false}, 221 { ARM::VLD1q64HighQPseudo_UPD, ARM::VLD1d64Qwb_fixed, true, true, true, SingleHighQSpc, 4, 1 ,false}, 222 { ARM::VLD1q64HighTPseudo, ARM::VLD1d64T, true, false, false, SingleHighTSpc, 3, 1 ,false}, 223 { ARM::VLD1q64HighTPseudo_UPD, ARM::VLD1d64Twb_fixed, true, true, true, SingleHighTSpc, 3, 1 ,false}, 224 { ARM::VLD1q64LowQPseudo_UPD, ARM::VLD1d64Qwb_fixed, true, true, true, SingleLowSpc, 4, 1 ,false}, 225 { ARM::VLD1q64LowTPseudo_UPD, ARM::VLD1d64Twb_fixed, true, true, true, SingleLowSpc, 3, 1 ,false}, 226 227 { ARM::VLD1q8HighQPseudo, ARM::VLD1d8Q, true, false, false, SingleHighQSpc, 4, 8 ,false}, 228 { ARM::VLD1q8HighQPseudo_UPD, ARM::VLD1d8Qwb_fixed, true, true, true, SingleHighQSpc, 4, 8 ,false}, 229 { ARM::VLD1q8HighTPseudo, ARM::VLD1d8T, true, false, false, SingleHighTSpc, 3, 8 ,false}, 230 { ARM::VLD1q8HighTPseudo_UPD, ARM::VLD1d8Twb_fixed, true, true, true, SingleHighTSpc, 3, 8 ,false}, 231 { ARM::VLD1q8LowQPseudo_UPD, ARM::VLD1d8Qwb_fixed, true, true, true, SingleLowSpc, 4, 8 ,false}, 232 { ARM::VLD1q8LowTPseudo_UPD, ARM::VLD1d8Twb_fixed, true, true, true, SingleLowSpc, 3, 8 ,false}, 233 234 { ARM::VLD2DUPq16EvenPseudo, ARM::VLD2DUPd16x2, true, false, false, EvenDblSpc, 2, 4 ,false}, 235 { ARM::VLD2DUPq16OddPseudo, ARM::VLD2DUPd16x2, true, false, false, OddDblSpc, 2, 4 ,false}, 236 { ARM::VLD2DUPq16OddPseudoWB_fixed, ARM::VLD2DUPd16x2wb_fixed, true, true, false, OddDblSpc, 2, 4 ,false}, 237 { ARM::VLD2DUPq16OddPseudoWB_register, ARM::VLD2DUPd16x2wb_register, true, true, true, OddDblSpc, 2, 4 ,false}, 238 { ARM::VLD2DUPq32EvenPseudo, ARM::VLD2DUPd32x2, true, false, false, EvenDblSpc, 2, 2 ,false}, 239 { ARM::VLD2DUPq32OddPseudo, ARM::VLD2DUPd32x2, true, false, false, OddDblSpc, 2, 2 ,false}, 240 { ARM::VLD2DUPq32OddPseudoWB_fixed, ARM::VLD2DUPd32x2wb_fixed, true, true, false, OddDblSpc, 2, 2 ,false}, 241 { ARM::VLD2DUPq32OddPseudoWB_register, ARM::VLD2DUPd32x2wb_register, true, true, true, OddDblSpc, 2, 2 ,false}, 242 { ARM::VLD2DUPq8EvenPseudo, ARM::VLD2DUPd8x2, true, false, false, EvenDblSpc, 2, 8 ,false}, 243 { ARM::VLD2DUPq8OddPseudo, ARM::VLD2DUPd8x2, true, false, false, OddDblSpc, 2, 8 ,false}, 244 { ARM::VLD2DUPq8OddPseudoWB_fixed, ARM::VLD2DUPd8x2wb_fixed, true, true, false, OddDblSpc, 2, 8 ,false}, 245 { ARM::VLD2DUPq8OddPseudoWB_register, ARM::VLD2DUPd8x2wb_register, true, true, true, OddDblSpc, 2, 8 ,false}, 246 247 { ARM::VLD2LNd16Pseudo, ARM::VLD2LNd16, true, false, false, SingleSpc, 2, 4 ,true}, 248 { ARM::VLD2LNd16Pseudo_UPD, ARM::VLD2LNd16_UPD, true, true, true, SingleSpc, 2, 4 ,true}, 249 { ARM::VLD2LNd32Pseudo, ARM::VLD2LNd32, true, false, false, SingleSpc, 2, 2 ,true}, 250 { ARM::VLD2LNd32Pseudo_UPD, ARM::VLD2LNd32_UPD, true, true, true, SingleSpc, 2, 2 ,true}, 251 { ARM::VLD2LNd8Pseudo, ARM::VLD2LNd8, true, false, false, SingleSpc, 2, 8 ,true}, 252 { ARM::VLD2LNd8Pseudo_UPD, ARM::VLD2LNd8_UPD, true, true, true, SingleSpc, 2, 8 ,true}, 253 { ARM::VLD2LNq16Pseudo, ARM::VLD2LNq16, true, false, false, EvenDblSpc, 2, 4 ,true}, 254 { ARM::VLD2LNq16Pseudo_UPD, ARM::VLD2LNq16_UPD, true, true, true, EvenDblSpc, 2, 4 ,true}, 255 { ARM::VLD2LNq32Pseudo, ARM::VLD2LNq32, true, false, false, EvenDblSpc, 2, 2 ,true}, 256 { ARM::VLD2LNq32Pseudo_UPD, ARM::VLD2LNq32_UPD, true, true, true, EvenDblSpc, 2, 2 ,true}, 257 258 { ARM::VLD2q16Pseudo, ARM::VLD2q16, true, false, false, SingleSpc, 4, 4 ,false}, 259 { ARM::VLD2q16PseudoWB_fixed, ARM::VLD2q16wb_fixed, true, true, false, SingleSpc, 4, 4 ,false}, 260 { ARM::VLD2q16PseudoWB_register, ARM::VLD2q16wb_register, true, true, true, SingleSpc, 4, 4 ,false}, 261 { ARM::VLD2q32Pseudo, ARM::VLD2q32, true, false, false, SingleSpc, 4, 2 ,false}, 262 { ARM::VLD2q32PseudoWB_fixed, ARM::VLD2q32wb_fixed, true, true, false, SingleSpc, 4, 2 ,false}, 263 { ARM::VLD2q32PseudoWB_register, ARM::VLD2q32wb_register, true, true, true, SingleSpc, 4, 2 ,false}, 264 { ARM::VLD2q8Pseudo, ARM::VLD2q8, true, false, false, SingleSpc, 4, 8 ,false}, 265 { ARM::VLD2q8PseudoWB_fixed, ARM::VLD2q8wb_fixed, true, true, false, SingleSpc, 4, 8 ,false}, 266 { ARM::VLD2q8PseudoWB_register, ARM::VLD2q8wb_register, true, true, true, SingleSpc, 4, 8 ,false}, 267 268 { ARM::VLD3DUPd16Pseudo, ARM::VLD3DUPd16, true, false, false, SingleSpc, 3, 4,true}, 269 { ARM::VLD3DUPd16Pseudo_UPD, ARM::VLD3DUPd16_UPD, true, true, true, SingleSpc, 3, 4,true}, 270 { ARM::VLD3DUPd32Pseudo, ARM::VLD3DUPd32, true, false, false, SingleSpc, 3, 2,true}, 271 { ARM::VLD3DUPd32Pseudo_UPD, ARM::VLD3DUPd32_UPD, true, true, true, SingleSpc, 3, 2,true}, 272 { ARM::VLD3DUPd8Pseudo, ARM::VLD3DUPd8, true, false, false, SingleSpc, 3, 8,true}, 273 { ARM::VLD3DUPd8Pseudo_UPD, ARM::VLD3DUPd8_UPD, true, true, true, SingleSpc, 3, 8,true}, 274 { ARM::VLD3DUPq16EvenPseudo, ARM::VLD3DUPq16, true, false, false, EvenDblSpc, 3, 4 ,true}, 275 { ARM::VLD3DUPq16OddPseudo, ARM::VLD3DUPq16, true, false, false, OddDblSpc, 3, 4 ,true}, 276 { ARM::VLD3DUPq16OddPseudo_UPD, ARM::VLD3DUPq16_UPD, true, true, true, OddDblSpc, 3, 4 ,true}, 277 { ARM::VLD3DUPq32EvenPseudo, ARM::VLD3DUPq32, true, false, false, EvenDblSpc, 3, 2 ,true}, 278 { ARM::VLD3DUPq32OddPseudo, ARM::VLD3DUPq32, true, false, false, OddDblSpc, 3, 2 ,true}, 279 { ARM::VLD3DUPq32OddPseudo_UPD, ARM::VLD3DUPq32_UPD, true, true, true, OddDblSpc, 3, 2 ,true}, 280 { ARM::VLD3DUPq8EvenPseudo, ARM::VLD3DUPq8, true, false, false, EvenDblSpc, 3, 8 ,true}, 281 { ARM::VLD3DUPq8OddPseudo, ARM::VLD3DUPq8, true, false, false, OddDblSpc, 3, 8 ,true}, 282 { ARM::VLD3DUPq8OddPseudo_UPD, ARM::VLD3DUPq8_UPD, true, true, true, OddDblSpc, 3, 8 ,true}, 283 284 { ARM::VLD3LNd16Pseudo, ARM::VLD3LNd16, true, false, false, SingleSpc, 3, 4 ,true}, 285 { ARM::VLD3LNd16Pseudo_UPD, ARM::VLD3LNd16_UPD, true, true, true, SingleSpc, 3, 4 ,true}, 286 { ARM::VLD3LNd32Pseudo, ARM::VLD3LNd32, true, false, false, SingleSpc, 3, 2 ,true}, 287 { ARM::VLD3LNd32Pseudo_UPD, ARM::VLD3LNd32_UPD, true, true, true, SingleSpc, 3, 2 ,true}, 288 { ARM::VLD3LNd8Pseudo, ARM::VLD3LNd8, true, false, false, SingleSpc, 3, 8 ,true}, 289 { ARM::VLD3LNd8Pseudo_UPD, ARM::VLD3LNd8_UPD, true, true, true, SingleSpc, 3, 8 ,true}, 290 { ARM::VLD3LNq16Pseudo, ARM::VLD3LNq16, true, false, false, EvenDblSpc, 3, 4 ,true}, 291 { ARM::VLD3LNq16Pseudo_UPD, ARM::VLD3LNq16_UPD, true, true, true, EvenDblSpc, 3, 4 ,true}, 292 { ARM::VLD3LNq32Pseudo, ARM::VLD3LNq32, true, false, false, EvenDblSpc, 3, 2 ,true}, 293 { ARM::VLD3LNq32Pseudo_UPD, ARM::VLD3LNq32_UPD, true, true, true, EvenDblSpc, 3, 2 ,true}, 294 295 { ARM::VLD3d16Pseudo, ARM::VLD3d16, true, false, false, SingleSpc, 3, 4 ,true}, 296 { ARM::VLD3d16Pseudo_UPD, ARM::VLD3d16_UPD, true, true, true, SingleSpc, 3, 4 ,true}, 297 { ARM::VLD3d32Pseudo, ARM::VLD3d32, true, false, false, SingleSpc, 3, 2 ,true}, 298 { ARM::VLD3d32Pseudo_UPD, ARM::VLD3d32_UPD, true, true, true, SingleSpc, 3, 2 ,true}, 299 { ARM::VLD3d8Pseudo, ARM::VLD3d8, true, false, false, SingleSpc, 3, 8 ,true}, 300 { ARM::VLD3d8Pseudo_UPD, ARM::VLD3d8_UPD, true, true, true, SingleSpc, 3, 8 ,true}, 301 302 { ARM::VLD3q16Pseudo_UPD, ARM::VLD3q16_UPD, true, true, true, EvenDblSpc, 3, 4 ,true}, 303 { ARM::VLD3q16oddPseudo, ARM::VLD3q16, true, false, false, OddDblSpc, 3, 4 ,true}, 304 { ARM::VLD3q16oddPseudo_UPD, ARM::VLD3q16_UPD, true, true, true, OddDblSpc, 3, 4 ,true}, 305 { ARM::VLD3q32Pseudo_UPD, ARM::VLD3q32_UPD, true, true, true, EvenDblSpc, 3, 2 ,true}, 306 { ARM::VLD3q32oddPseudo, ARM::VLD3q32, true, false, false, OddDblSpc, 3, 2 ,true}, 307 { ARM::VLD3q32oddPseudo_UPD, ARM::VLD3q32_UPD, true, true, true, OddDblSpc, 3, 2 ,true}, 308 { ARM::VLD3q8Pseudo_UPD, ARM::VLD3q8_UPD, true, true, true, EvenDblSpc, 3, 8 ,true}, 309 { ARM::VLD3q8oddPseudo, ARM::VLD3q8, true, false, false, OddDblSpc, 3, 8 ,true}, 310 { ARM::VLD3q8oddPseudo_UPD, ARM::VLD3q8_UPD, true, true, true, OddDblSpc, 3, 8 ,true}, 311 312 { ARM::VLD4DUPd16Pseudo, ARM::VLD4DUPd16, true, false, false, SingleSpc, 4, 4,true}, 313 { ARM::VLD4DUPd16Pseudo_UPD, ARM::VLD4DUPd16_UPD, true, true, true, SingleSpc, 4, 4,true}, 314 { ARM::VLD4DUPd32Pseudo, ARM::VLD4DUPd32, true, false, false, SingleSpc, 4, 2,true}, 315 { ARM::VLD4DUPd32Pseudo_UPD, ARM::VLD4DUPd32_UPD, true, true, true, SingleSpc, 4, 2,true}, 316 { ARM::VLD4DUPd8Pseudo, ARM::VLD4DUPd8, true, false, false, SingleSpc, 4, 8,true}, 317 { ARM::VLD4DUPd8Pseudo_UPD, ARM::VLD4DUPd8_UPD, true, true, true, SingleSpc, 4, 8,true}, 318 { ARM::VLD4DUPq16EvenPseudo, ARM::VLD4DUPq16, true, false, false, EvenDblSpc, 4, 4 ,true}, 319 { ARM::VLD4DUPq16OddPseudo, ARM::VLD4DUPq16, true, false, false, OddDblSpc, 4, 4 ,true}, 320 { ARM::VLD4DUPq16OddPseudo_UPD, ARM::VLD4DUPq16_UPD, true, true, true, OddDblSpc, 4, 4 ,true}, 321 { ARM::VLD4DUPq32EvenPseudo, ARM::VLD4DUPq32, true, false, false, EvenDblSpc, 4, 2 ,true}, 322 { ARM::VLD4DUPq32OddPseudo, ARM::VLD4DUPq32, true, false, false, OddDblSpc, 4, 2 ,true}, 323 { ARM::VLD4DUPq32OddPseudo_UPD, ARM::VLD4DUPq32_UPD, true, true, true, OddDblSpc, 4, 2 ,true}, 324 { ARM::VLD4DUPq8EvenPseudo, ARM::VLD4DUPq8, true, false, false, EvenDblSpc, 4, 8 ,true}, 325 { ARM::VLD4DUPq8OddPseudo, ARM::VLD4DUPq8, true, false, false, OddDblSpc, 4, 8 ,true}, 326 { ARM::VLD4DUPq8OddPseudo_UPD, ARM::VLD4DUPq8_UPD, true, true, true, OddDblSpc, 4, 8 ,true}, 327 328 { ARM::VLD4LNd16Pseudo, ARM::VLD4LNd16, true, false, false, SingleSpc, 4, 4 ,true}, 329 { ARM::VLD4LNd16Pseudo_UPD, ARM::VLD4LNd16_UPD, true, true, true, SingleSpc, 4, 4 ,true}, 330 { ARM::VLD4LNd32Pseudo, ARM::VLD4LNd32, true, false, false, SingleSpc, 4, 2 ,true}, 331 { ARM::VLD4LNd32Pseudo_UPD, ARM::VLD4LNd32_UPD, true, true, true, SingleSpc, 4, 2 ,true}, 332 { ARM::VLD4LNd8Pseudo, ARM::VLD4LNd8, true, false, false, SingleSpc, 4, 8 ,true}, 333 { ARM::VLD4LNd8Pseudo_UPD, ARM::VLD4LNd8_UPD, true, true, true, SingleSpc, 4, 8 ,true}, 334 { ARM::VLD4LNq16Pseudo, ARM::VLD4LNq16, true, false, false, EvenDblSpc, 4, 4 ,true}, 335 { ARM::VLD4LNq16Pseudo_UPD, ARM::VLD4LNq16_UPD, true, true, true, EvenDblSpc, 4, 4 ,true}, 336 { ARM::VLD4LNq32Pseudo, ARM::VLD4LNq32, true, false, false, EvenDblSpc, 4, 2 ,true}, 337 { ARM::VLD4LNq32Pseudo_UPD, ARM::VLD4LNq32_UPD, true, true, true, EvenDblSpc, 4, 2 ,true}, 338 339 { ARM::VLD4d16Pseudo, ARM::VLD4d16, true, false, false, SingleSpc, 4, 4 ,true}, 340 { ARM::VLD4d16Pseudo_UPD, ARM::VLD4d16_UPD, true, true, true, SingleSpc, 4, 4 ,true}, 341 { ARM::VLD4d32Pseudo, ARM::VLD4d32, true, false, false, SingleSpc, 4, 2 ,true}, 342 { ARM::VLD4d32Pseudo_UPD, ARM::VLD4d32_UPD, true, true, true, SingleSpc, 4, 2 ,true}, 343 { ARM::VLD4d8Pseudo, ARM::VLD4d8, true, false, false, SingleSpc, 4, 8 ,true}, 344 { ARM::VLD4d8Pseudo_UPD, ARM::VLD4d8_UPD, true, true, true, SingleSpc, 4, 8 ,true}, 345 346 { ARM::VLD4q16Pseudo_UPD, ARM::VLD4q16_UPD, true, true, true, EvenDblSpc, 4, 4 ,true}, 347 { ARM::VLD4q16oddPseudo, ARM::VLD4q16, true, false, false, OddDblSpc, 4, 4 ,true}, 348 { ARM::VLD4q16oddPseudo_UPD, ARM::VLD4q16_UPD, true, true, true, OddDblSpc, 4, 4 ,true}, 349 { ARM::VLD4q32Pseudo_UPD, ARM::VLD4q32_UPD, true, true, true, EvenDblSpc, 4, 2 ,true}, 350 { ARM::VLD4q32oddPseudo, ARM::VLD4q32, true, false, false, OddDblSpc, 4, 2 ,true}, 351 { ARM::VLD4q32oddPseudo_UPD, ARM::VLD4q32_UPD, true, true, true, OddDblSpc, 4, 2 ,true}, 352 { ARM::VLD4q8Pseudo_UPD, ARM::VLD4q8_UPD, true, true, true, EvenDblSpc, 4, 8 ,true}, 353 { ARM::VLD4q8oddPseudo, ARM::VLD4q8, true, false, false, OddDblSpc, 4, 8 ,true}, 354 { ARM::VLD4q8oddPseudo_UPD, ARM::VLD4q8_UPD, true, true, true, OddDblSpc, 4, 8 ,true}, 355 356 { ARM::VST1LNq16Pseudo, ARM::VST1LNd16, false, false, false, EvenDblSpc, 1, 4 ,true}, 357 { ARM::VST1LNq16Pseudo_UPD, ARM::VST1LNd16_UPD, false, true, true, EvenDblSpc, 1, 4 ,true}, 358 { ARM::VST1LNq32Pseudo, ARM::VST1LNd32, false, false, false, EvenDblSpc, 1, 2 ,true}, 359 { ARM::VST1LNq32Pseudo_UPD, ARM::VST1LNd32_UPD, false, true, true, EvenDblSpc, 1, 2 ,true}, 360 { ARM::VST1LNq8Pseudo, ARM::VST1LNd8, false, false, false, EvenDblSpc, 1, 8 ,true}, 361 { ARM::VST1LNq8Pseudo_UPD, ARM::VST1LNd8_UPD, false, true, true, EvenDblSpc, 1, 8 ,true}, 362 363 { ARM::VST1d16QPseudo, ARM::VST1d16Q, false, false, false, SingleSpc, 4, 4 ,false}, 364 { ARM::VST1d16QPseudoWB_fixed, ARM::VST1d16Qwb_fixed, false, true, false, SingleSpc, 4, 4 ,false}, 365 { ARM::VST1d16QPseudoWB_register, ARM::VST1d16Qwb_register, false, true, true, SingleSpc, 4, 4 ,false}, 366 { ARM::VST1d16TPseudo, ARM::VST1d16T, false, false, false, SingleSpc, 3, 4 ,false}, 367 { ARM::VST1d16TPseudoWB_fixed, ARM::VST1d16Twb_fixed, false, true, false, SingleSpc, 3, 4 ,false}, 368 { ARM::VST1d16TPseudoWB_register, ARM::VST1d16Twb_register, false, true, true, SingleSpc, 3, 4 ,false}, 369 370 { ARM::VST1d32QPseudo, ARM::VST1d32Q, false, false, false, SingleSpc, 4, 2 ,false}, 371 { ARM::VST1d32QPseudoWB_fixed, ARM::VST1d32Qwb_fixed, false, true, false, SingleSpc, 4, 2 ,false}, 372 { ARM::VST1d32QPseudoWB_register, ARM::VST1d32Qwb_register, false, true, true, SingleSpc, 4, 2 ,false}, 373 { ARM::VST1d32TPseudo, ARM::VST1d32T, false, false, false, SingleSpc, 3, 2 ,false}, 374 { ARM::VST1d32TPseudoWB_fixed, ARM::VST1d32Twb_fixed, false, true, false, SingleSpc, 3, 2 ,false}, 375 { ARM::VST1d32TPseudoWB_register, ARM::VST1d32Twb_register, false, true, true, SingleSpc, 3, 2 ,false}, 376 377 { ARM::VST1d64QPseudo, ARM::VST1d64Q, false, false, false, SingleSpc, 4, 1 ,false}, 378 { ARM::VST1d64QPseudoWB_fixed, ARM::VST1d64Qwb_fixed, false, true, false, SingleSpc, 4, 1 ,false}, 379 { ARM::VST1d64QPseudoWB_register, ARM::VST1d64Qwb_register, false, true, true, SingleSpc, 4, 1 ,false}, 380 { ARM::VST1d64TPseudo, ARM::VST1d64T, false, false, false, SingleSpc, 3, 1 ,false}, 381 { ARM::VST1d64TPseudoWB_fixed, ARM::VST1d64Twb_fixed, false, true, false, SingleSpc, 3, 1 ,false}, 382 { ARM::VST1d64TPseudoWB_register, ARM::VST1d64Twb_register, false, true, true, SingleSpc, 3, 1 ,false}, 383 384 { ARM::VST1d8QPseudo, ARM::VST1d8Q, false, false, false, SingleSpc, 4, 8 ,false}, 385 { ARM::VST1d8QPseudoWB_fixed, ARM::VST1d8Qwb_fixed, false, true, false, SingleSpc, 4, 8 ,false}, 386 { ARM::VST1d8QPseudoWB_register, ARM::VST1d8Qwb_register, false, true, true, SingleSpc, 4, 8 ,false}, 387 { ARM::VST1d8TPseudo, ARM::VST1d8T, false, false, false, SingleSpc, 3, 8 ,false}, 388 { ARM::VST1d8TPseudoWB_fixed, ARM::VST1d8Twb_fixed, false, true, false, SingleSpc, 3, 8 ,false}, 389 { ARM::VST1d8TPseudoWB_register, ARM::VST1d8Twb_register, false, true, true, SingleSpc, 3, 8 ,false}, 390 391 { ARM::VST1q16HighQPseudo, ARM::VST1d16Q, false, false, false, SingleHighQSpc, 4, 4 ,false}, 392 { ARM::VST1q16HighQPseudo_UPD, ARM::VST1d16Qwb_fixed, false, true, true, SingleHighQSpc, 4, 8 ,false}, 393 { ARM::VST1q16HighTPseudo, ARM::VST1d16T, false, false, false, SingleHighTSpc, 3, 4 ,false}, 394 { ARM::VST1q16HighTPseudo_UPD, ARM::VST1d16Twb_fixed, false, true, true, SingleHighTSpc, 3, 4 ,false}, 395 { ARM::VST1q16LowQPseudo_UPD, ARM::VST1d16Qwb_fixed, false, true, true, SingleLowSpc, 4, 4 ,false}, 396 { ARM::VST1q16LowTPseudo_UPD, ARM::VST1d16Twb_fixed, false, true, true, SingleLowSpc, 3, 4 ,false}, 397 398 { ARM::VST1q32HighQPseudo, ARM::VST1d32Q, false, false, false, SingleHighQSpc, 4, 2 ,false}, 399 { ARM::VST1q32HighQPseudo_UPD, ARM::VST1d32Qwb_fixed, false, true, true, SingleHighQSpc, 4, 8 ,false}, 400 { ARM::VST1q32HighTPseudo, ARM::VST1d32T, false, false, false, SingleHighTSpc, 3, 2 ,false}, 401 { ARM::VST1q32HighTPseudo_UPD, ARM::VST1d32Twb_fixed, false, true, true, SingleHighTSpc, 3, 2 ,false}, 402 { ARM::VST1q32LowQPseudo_UPD, ARM::VST1d32Qwb_fixed, false, true, true, SingleLowSpc, 4, 2 ,false}, 403 { ARM::VST1q32LowTPseudo_UPD, ARM::VST1d32Twb_fixed, false, true, true, SingleLowSpc, 3, 2 ,false}, 404 405 { ARM::VST1q64HighQPseudo, ARM::VST1d64Q, false, false, false, SingleHighQSpc, 4, 1 ,false}, 406 { ARM::VST1q64HighQPseudo_UPD, ARM::VST1d64Qwb_fixed, false, true, true, SingleHighQSpc, 4, 8 ,false}, 407 { ARM::VST1q64HighTPseudo, ARM::VST1d64T, false, false, false, SingleHighTSpc, 3, 1 ,false}, 408 { ARM::VST1q64HighTPseudo_UPD, ARM::VST1d64Twb_fixed, false, true, true, SingleHighTSpc, 3, 1 ,false}, 409 { ARM::VST1q64LowQPseudo_UPD, ARM::VST1d64Qwb_fixed, false, true, true, SingleLowSpc, 4, 1 ,false}, 410 { ARM::VST1q64LowTPseudo_UPD, ARM::VST1d64Twb_fixed, false, true, true, SingleLowSpc, 3, 1 ,false}, 411 412 { ARM::VST1q8HighQPseudo, ARM::VST1d8Q, false, false, false, SingleHighQSpc, 4, 8 ,false}, 413 { ARM::VST1q8HighQPseudo_UPD, ARM::VST1d8Qwb_fixed, false, true, true, SingleHighQSpc, 4, 8 ,false}, 414 { ARM::VST1q8HighTPseudo, ARM::VST1d8T, false, false, false, SingleHighTSpc, 3, 8 ,false}, 415 { ARM::VST1q8HighTPseudo_UPD, ARM::VST1d8Twb_fixed, false, true, true, SingleHighTSpc, 3, 8 ,false}, 416 { ARM::VST1q8LowQPseudo_UPD, ARM::VST1d8Qwb_fixed, false, true, true, SingleLowSpc, 4, 8 ,false}, 417 { ARM::VST1q8LowTPseudo_UPD, ARM::VST1d8Twb_fixed, false, true, true, SingleLowSpc, 3, 8 ,false}, 418 419 { ARM::VST2LNd16Pseudo, ARM::VST2LNd16, false, false, false, SingleSpc, 2, 4 ,true}, 420 { ARM::VST2LNd16Pseudo_UPD, ARM::VST2LNd16_UPD, false, true, true, SingleSpc, 2, 4 ,true}, 421 { ARM::VST2LNd32Pseudo, ARM::VST2LNd32, false, false, false, SingleSpc, 2, 2 ,true}, 422 { ARM::VST2LNd32Pseudo_UPD, ARM::VST2LNd32_UPD, false, true, true, SingleSpc, 2, 2 ,true}, 423 { ARM::VST2LNd8Pseudo, ARM::VST2LNd8, false, false, false, SingleSpc, 2, 8 ,true}, 424 { ARM::VST2LNd8Pseudo_UPD, ARM::VST2LNd8_UPD, false, true, true, SingleSpc, 2, 8 ,true}, 425 { ARM::VST2LNq16Pseudo, ARM::VST2LNq16, false, false, false, EvenDblSpc, 2, 4,true}, 426 { ARM::VST2LNq16Pseudo_UPD, ARM::VST2LNq16_UPD, false, true, true, EvenDblSpc, 2, 4,true}, 427 { ARM::VST2LNq32Pseudo, ARM::VST2LNq32, false, false, false, EvenDblSpc, 2, 2,true}, 428 { ARM::VST2LNq32Pseudo_UPD, ARM::VST2LNq32_UPD, false, true, true, EvenDblSpc, 2, 2,true}, 429 430 { ARM::VST2q16Pseudo, ARM::VST2q16, false, false, false, SingleSpc, 4, 4 ,false}, 431 { ARM::VST2q16PseudoWB_fixed, ARM::VST2q16wb_fixed, false, true, false, SingleSpc, 4, 4 ,false}, 432 { ARM::VST2q16PseudoWB_register, ARM::VST2q16wb_register, false, true, true, SingleSpc, 4, 4 ,false}, 433 { ARM::VST2q32Pseudo, ARM::VST2q32, false, false, false, SingleSpc, 4, 2 ,false}, 434 { ARM::VST2q32PseudoWB_fixed, ARM::VST2q32wb_fixed, false, true, false, SingleSpc, 4, 2 ,false}, 435 { ARM::VST2q32PseudoWB_register, ARM::VST2q32wb_register, false, true, true, SingleSpc, 4, 2 ,false}, 436 { ARM::VST2q8Pseudo, ARM::VST2q8, false, false, false, SingleSpc, 4, 8 ,false}, 437 { ARM::VST2q8PseudoWB_fixed, ARM::VST2q8wb_fixed, false, true, false, SingleSpc, 4, 8 ,false}, 438 { ARM::VST2q8PseudoWB_register, ARM::VST2q8wb_register, false, true, true, SingleSpc, 4, 8 ,false}, 439 440 { ARM::VST3LNd16Pseudo, ARM::VST3LNd16, false, false, false, SingleSpc, 3, 4 ,true}, 441 { ARM::VST3LNd16Pseudo_UPD, ARM::VST3LNd16_UPD, false, true, true, SingleSpc, 3, 4 ,true}, 442 { ARM::VST3LNd32Pseudo, ARM::VST3LNd32, false, false, false, SingleSpc, 3, 2 ,true}, 443 { ARM::VST3LNd32Pseudo_UPD, ARM::VST3LNd32_UPD, false, true, true, SingleSpc, 3, 2 ,true}, 444 { ARM::VST3LNd8Pseudo, ARM::VST3LNd8, false, false, false, SingleSpc, 3, 8 ,true}, 445 { ARM::VST3LNd8Pseudo_UPD, ARM::VST3LNd8_UPD, false, true, true, SingleSpc, 3, 8 ,true}, 446 { ARM::VST3LNq16Pseudo, ARM::VST3LNq16, false, false, false, EvenDblSpc, 3, 4,true}, 447 { ARM::VST3LNq16Pseudo_UPD, ARM::VST3LNq16_UPD, false, true, true, EvenDblSpc, 3, 4,true}, 448 { ARM::VST3LNq32Pseudo, ARM::VST3LNq32, false, false, false, EvenDblSpc, 3, 2,true}, 449 { ARM::VST3LNq32Pseudo_UPD, ARM::VST3LNq32_UPD, false, true, true, EvenDblSpc, 3, 2,true}, 450 451 { ARM::VST3d16Pseudo, ARM::VST3d16, false, false, false, SingleSpc, 3, 4 ,true}, 452 { ARM::VST3d16Pseudo_UPD, ARM::VST3d16_UPD, false, true, true, SingleSpc, 3, 4 ,true}, 453 { ARM::VST3d32Pseudo, ARM::VST3d32, false, false, false, SingleSpc, 3, 2 ,true}, 454 { ARM::VST3d32Pseudo_UPD, ARM::VST3d32_UPD, false, true, true, SingleSpc, 3, 2 ,true}, 455 { ARM::VST3d8Pseudo, ARM::VST3d8, false, false, false, SingleSpc, 3, 8 ,true}, 456 { ARM::VST3d8Pseudo_UPD, ARM::VST3d8_UPD, false, true, true, SingleSpc, 3, 8 ,true}, 457 458 { ARM::VST3q16Pseudo_UPD, ARM::VST3q16_UPD, false, true, true, EvenDblSpc, 3, 4 ,true}, 459 { ARM::VST3q16oddPseudo, ARM::VST3q16, false, false, false, OddDblSpc, 3, 4 ,true}, 460 { ARM::VST3q16oddPseudo_UPD, ARM::VST3q16_UPD, false, true, true, OddDblSpc, 3, 4 ,true}, 461 { ARM::VST3q32Pseudo_UPD, ARM::VST3q32_UPD, false, true, true, EvenDblSpc, 3, 2 ,true}, 462 { ARM::VST3q32oddPseudo, ARM::VST3q32, false, false, false, OddDblSpc, 3, 2 ,true}, 463 { ARM::VST3q32oddPseudo_UPD, ARM::VST3q32_UPD, false, true, true, OddDblSpc, 3, 2 ,true}, 464 { ARM::VST3q8Pseudo_UPD, ARM::VST3q8_UPD, false, true, true, EvenDblSpc, 3, 8 ,true}, 465 { ARM::VST3q8oddPseudo, ARM::VST3q8, false, false, false, OddDblSpc, 3, 8 ,true}, 466 { ARM::VST3q8oddPseudo_UPD, ARM::VST3q8_UPD, false, true, true, OddDblSpc, 3, 8 ,true}, 467 468 { ARM::VST4LNd16Pseudo, ARM::VST4LNd16, false, false, false, SingleSpc, 4, 4 ,true}, 469 { ARM::VST4LNd16Pseudo_UPD, ARM::VST4LNd16_UPD, false, true, true, SingleSpc, 4, 4 ,true}, 470 { ARM::VST4LNd32Pseudo, ARM::VST4LNd32, false, false, false, SingleSpc, 4, 2 ,true}, 471 { ARM::VST4LNd32Pseudo_UPD, ARM::VST4LNd32_UPD, false, true, true, SingleSpc, 4, 2 ,true}, 472 { ARM::VST4LNd8Pseudo, ARM::VST4LNd8, false, false, false, SingleSpc, 4, 8 ,true}, 473 { ARM::VST4LNd8Pseudo_UPD, ARM::VST4LNd8_UPD, false, true, true, SingleSpc, 4, 8 ,true}, 474 { ARM::VST4LNq16Pseudo, ARM::VST4LNq16, false, false, false, EvenDblSpc, 4, 4,true}, 475 { ARM::VST4LNq16Pseudo_UPD, ARM::VST4LNq16_UPD, false, true, true, EvenDblSpc, 4, 4,true}, 476 { ARM::VST4LNq32Pseudo, ARM::VST4LNq32, false, false, false, EvenDblSpc, 4, 2,true}, 477 { ARM::VST4LNq32Pseudo_UPD, ARM::VST4LNq32_UPD, false, true, true, EvenDblSpc, 4, 2,true}, 478 479 { ARM::VST4d16Pseudo, ARM::VST4d16, false, false, false, SingleSpc, 4, 4 ,true}, 480 { ARM::VST4d16Pseudo_UPD, ARM::VST4d16_UPD, false, true, true, SingleSpc, 4, 4 ,true}, 481 { ARM::VST4d32Pseudo, ARM::VST4d32, false, false, false, SingleSpc, 4, 2 ,true}, 482 { ARM::VST4d32Pseudo_UPD, ARM::VST4d32_UPD, false, true, true, SingleSpc, 4, 2 ,true}, 483 { ARM::VST4d8Pseudo, ARM::VST4d8, false, false, false, SingleSpc, 4, 8 ,true}, 484 { ARM::VST4d8Pseudo_UPD, ARM::VST4d8_UPD, false, true, true, SingleSpc, 4, 8 ,true}, 485 486 { ARM::VST4q16Pseudo_UPD, ARM::VST4q16_UPD, false, true, true, EvenDblSpc, 4, 4 ,true}, 487 { ARM::VST4q16oddPseudo, ARM::VST4q16, false, false, false, OddDblSpc, 4, 4 ,true}, 488 { ARM::VST4q16oddPseudo_UPD, ARM::VST4q16_UPD, false, true, true, OddDblSpc, 4, 4 ,true}, 489 { ARM::VST4q32Pseudo_UPD, ARM::VST4q32_UPD, false, true, true, EvenDblSpc, 4, 2 ,true}, 490 { ARM::VST4q32oddPseudo, ARM::VST4q32, false, false, false, OddDblSpc, 4, 2 ,true}, 491 { ARM::VST4q32oddPseudo_UPD, ARM::VST4q32_UPD, false, true, true, OddDblSpc, 4, 2 ,true}, 492 { ARM::VST4q8Pseudo_UPD, ARM::VST4q8_UPD, false, true, true, EvenDblSpc, 4, 8 ,true}, 493 { ARM::VST4q8oddPseudo, ARM::VST4q8, false, false, false, OddDblSpc, 4, 8 ,true}, 494 { ARM::VST4q8oddPseudo_UPD, ARM::VST4q8_UPD, false, true, true, OddDblSpc, 4, 8 ,true} 495 }; 496 497 /// LookupNEONLdSt - Search the NEONLdStTable for information about a NEON 498 /// load or store pseudo instruction. 499 static const NEONLdStTableEntry *LookupNEONLdSt(unsigned Opcode) { 500 #ifndef NDEBUG 501 // Make sure the table is sorted. 502 static std::atomic<bool> TableChecked(false); 503 if (!TableChecked.load(std::memory_order_relaxed)) { 504 assert(llvm::is_sorted(NEONLdStTable) && "NEONLdStTable is not sorted!"); 505 TableChecked.store(true, std::memory_order_relaxed); 506 } 507 #endif 508 509 auto I = llvm::lower_bound(NEONLdStTable, Opcode); 510 if (I != std::end(NEONLdStTable) && I->PseudoOpc == Opcode) 511 return I; 512 return nullptr; 513 } 514 515 /// GetDSubRegs - Get 4 D subregisters of a Q, QQ, or QQQQ register, 516 /// corresponding to the specified register spacing. Not all of the results 517 /// are necessarily valid, e.g., a Q register only has 2 D subregisters. 518 static void GetDSubRegs(unsigned Reg, NEONRegSpacing RegSpc, 519 const TargetRegisterInfo *TRI, MCRegister &D0, 520 MCRegister &D1, MCRegister &D2, MCRegister &D3) { 521 if (RegSpc == SingleSpc || RegSpc == SingleLowSpc) { 522 D0 = TRI->getSubReg(Reg, ARM::dsub_0); 523 D1 = TRI->getSubReg(Reg, ARM::dsub_1); 524 D2 = TRI->getSubReg(Reg, ARM::dsub_2); 525 D3 = TRI->getSubReg(Reg, ARM::dsub_3); 526 } else if (RegSpc == SingleHighQSpc) { 527 D0 = TRI->getSubReg(Reg, ARM::dsub_4); 528 D1 = TRI->getSubReg(Reg, ARM::dsub_5); 529 D2 = TRI->getSubReg(Reg, ARM::dsub_6); 530 D3 = TRI->getSubReg(Reg, ARM::dsub_7); 531 } else if (RegSpc == SingleHighTSpc) { 532 D0 = TRI->getSubReg(Reg, ARM::dsub_3); 533 D1 = TRI->getSubReg(Reg, ARM::dsub_4); 534 D2 = TRI->getSubReg(Reg, ARM::dsub_5); 535 D3 = TRI->getSubReg(Reg, ARM::dsub_6); 536 } else if (RegSpc == EvenDblSpc) { 537 D0 = TRI->getSubReg(Reg, ARM::dsub_0); 538 D1 = TRI->getSubReg(Reg, ARM::dsub_2); 539 D2 = TRI->getSubReg(Reg, ARM::dsub_4); 540 D3 = TRI->getSubReg(Reg, ARM::dsub_6); 541 } else { 542 assert(RegSpc == OddDblSpc && "unknown register spacing"); 543 D0 = TRI->getSubReg(Reg, ARM::dsub_1); 544 D1 = TRI->getSubReg(Reg, ARM::dsub_3); 545 D2 = TRI->getSubReg(Reg, ARM::dsub_5); 546 D3 = TRI->getSubReg(Reg, ARM::dsub_7); 547 } 548 } 549 550 /// ExpandVLD - Translate VLD pseudo instructions with Q, QQ or QQQQ register 551 /// operands to real VLD instructions with D register operands. 552 void ARMExpandPseudo::ExpandVLD(MachineBasicBlock::iterator &MBBI) { 553 MachineInstr &MI = *MBBI; 554 MachineBasicBlock &MBB = *MI.getParent(); 555 LLVM_DEBUG(dbgs() << "Expanding: "; MI.dump()); 556 557 const NEONLdStTableEntry *TableEntry = LookupNEONLdSt(MI.getOpcode()); 558 assert(TableEntry && TableEntry->IsLoad && "NEONLdStTable lookup failed"); 559 NEONRegSpacing RegSpc = (NEONRegSpacing)TableEntry->RegSpacing; 560 unsigned NumRegs = TableEntry->NumRegs; 561 562 MachineInstrBuilder MIB = BuildMI(MBB, MBBI, MI.getDebugLoc(), 563 TII->get(TableEntry->RealOpc)); 564 unsigned OpIdx = 0; 565 566 bool DstIsDead = MI.getOperand(OpIdx).isDead(); 567 Register DstReg = MI.getOperand(OpIdx++).getReg(); 568 569 bool IsVLD2DUP = TableEntry->RealOpc == ARM::VLD2DUPd8x2 || 570 TableEntry->RealOpc == ARM::VLD2DUPd16x2 || 571 TableEntry->RealOpc == ARM::VLD2DUPd32x2 || 572 TableEntry->RealOpc == ARM::VLD2DUPd8x2wb_fixed || 573 TableEntry->RealOpc == ARM::VLD2DUPd16x2wb_fixed || 574 TableEntry->RealOpc == ARM::VLD2DUPd32x2wb_fixed || 575 TableEntry->RealOpc == ARM::VLD2DUPd8x2wb_register || 576 TableEntry->RealOpc == ARM::VLD2DUPd16x2wb_register || 577 TableEntry->RealOpc == ARM::VLD2DUPd32x2wb_register; 578 579 if (IsVLD2DUP) { 580 unsigned SubRegIndex; 581 if (RegSpc == EvenDblSpc) { 582 SubRegIndex = ARM::dsub_0; 583 } else { 584 assert(RegSpc == OddDblSpc && "Unexpected spacing!"); 585 SubRegIndex = ARM::dsub_1; 586 } 587 Register SubReg = TRI->getSubReg(DstReg, SubRegIndex); 588 MCRegister DstRegPair = 589 TRI->getMatchingSuperReg(SubReg, ARM::dsub_0, &ARM::DPairSpcRegClass); 590 MIB.addReg(DstRegPair, RegState::Define | getDeadRegState(DstIsDead)); 591 } else { 592 MCRegister D0, D1, D2, D3; 593 GetDSubRegs(DstReg, RegSpc, TRI, D0, D1, D2, D3); 594 MIB.addReg(D0, RegState::Define | getDeadRegState(DstIsDead)); 595 if (NumRegs > 1 && TableEntry->copyAllListRegs) 596 MIB.addReg(D1, RegState::Define | getDeadRegState(DstIsDead)); 597 if (NumRegs > 2 && TableEntry->copyAllListRegs) 598 MIB.addReg(D2, RegState::Define | getDeadRegState(DstIsDead)); 599 if (NumRegs > 3 && TableEntry->copyAllListRegs) 600 MIB.addReg(D3, RegState::Define | getDeadRegState(DstIsDead)); 601 } 602 603 if (TableEntry->isUpdating) 604 MIB.add(MI.getOperand(OpIdx++)); 605 606 // Copy the addrmode6 operands. 607 MIB.add(MI.getOperand(OpIdx++)); 608 MIB.add(MI.getOperand(OpIdx++)); 609 610 // Copy the am6offset operand. 611 if (TableEntry->hasWritebackOperand) { 612 // TODO: The writing-back pseudo instructions we translate here are all 613 // defined to take am6offset nodes that are capable to represent both fixed 614 // and register forms. Some real instructions, however, do not rely on 615 // am6offset and have separate definitions for such forms. When this is the 616 // case, fixed forms do not take any offset nodes, so here we skip them for 617 // such instructions. Once all real and pseudo writing-back instructions are 618 // rewritten without use of am6offset nodes, this code will go away. 619 const MachineOperand &AM6Offset = MI.getOperand(OpIdx++); 620 if (TableEntry->RealOpc == ARM::VLD1d8Qwb_fixed || 621 TableEntry->RealOpc == ARM::VLD1d16Qwb_fixed || 622 TableEntry->RealOpc == ARM::VLD1d32Qwb_fixed || 623 TableEntry->RealOpc == ARM::VLD1d64Qwb_fixed || 624 TableEntry->RealOpc == ARM::VLD1d8Twb_fixed || 625 TableEntry->RealOpc == ARM::VLD1d16Twb_fixed || 626 TableEntry->RealOpc == ARM::VLD1d32Twb_fixed || 627 TableEntry->RealOpc == ARM::VLD1d64Twb_fixed || 628 TableEntry->RealOpc == ARM::VLD2DUPd8x2wb_fixed || 629 TableEntry->RealOpc == ARM::VLD2DUPd16x2wb_fixed || 630 TableEntry->RealOpc == ARM::VLD2DUPd32x2wb_fixed) { 631 assert(AM6Offset.getReg() == 0 && 632 "A fixed writing-back pseudo instruction provides an offset " 633 "register!"); 634 } else { 635 MIB.add(AM6Offset); 636 } 637 } 638 639 // For an instruction writing double-spaced subregs, the pseudo instruction 640 // has an extra operand that is a use of the super-register. Record the 641 // operand index and skip over it. 642 unsigned SrcOpIdx = 0; 643 if (RegSpc == EvenDblSpc || RegSpc == OddDblSpc || RegSpc == SingleLowSpc || 644 RegSpc == SingleHighQSpc || RegSpc == SingleHighTSpc) 645 SrcOpIdx = OpIdx++; 646 647 // Copy the predicate operands. 648 MIB.add(MI.getOperand(OpIdx++)); 649 MIB.add(MI.getOperand(OpIdx++)); 650 651 // Copy the super-register source operand used for double-spaced subregs over 652 // to the new instruction as an implicit operand. 653 if (SrcOpIdx != 0) { 654 MachineOperand MO = MI.getOperand(SrcOpIdx); 655 MO.setImplicit(true); 656 MIB.add(MO); 657 } 658 // Add an implicit def for the super-register. 659 MIB.addReg(DstReg, RegState::ImplicitDefine | getDeadRegState(DstIsDead)); 660 MIB.copyImplicitOps(MI); 661 662 // Transfer memoperands. 663 MIB.cloneMemRefs(MI); 664 MI.eraseFromParent(); 665 LLVM_DEBUG(dbgs() << "To: "; MIB.getInstr()->dump();); 666 } 667 668 /// ExpandVST - Translate VST pseudo instructions with Q, QQ or QQQQ register 669 /// operands to real VST instructions with D register operands. 670 void ARMExpandPseudo::ExpandVST(MachineBasicBlock::iterator &MBBI) { 671 MachineInstr &MI = *MBBI; 672 MachineBasicBlock &MBB = *MI.getParent(); 673 LLVM_DEBUG(dbgs() << "Expanding: "; MI.dump()); 674 675 const NEONLdStTableEntry *TableEntry = LookupNEONLdSt(MI.getOpcode()); 676 assert(TableEntry && !TableEntry->IsLoad && "NEONLdStTable lookup failed"); 677 NEONRegSpacing RegSpc = (NEONRegSpacing)TableEntry->RegSpacing; 678 unsigned NumRegs = TableEntry->NumRegs; 679 680 MachineInstrBuilder MIB = BuildMI(MBB, MBBI, MI.getDebugLoc(), 681 TII->get(TableEntry->RealOpc)); 682 unsigned OpIdx = 0; 683 if (TableEntry->isUpdating) 684 MIB.add(MI.getOperand(OpIdx++)); 685 686 // Copy the addrmode6 operands. 687 MIB.add(MI.getOperand(OpIdx++)); 688 MIB.add(MI.getOperand(OpIdx++)); 689 690 if (TableEntry->hasWritebackOperand) { 691 // TODO: The writing-back pseudo instructions we translate here are all 692 // defined to take am6offset nodes that are capable to represent both fixed 693 // and register forms. Some real instructions, however, do not rely on 694 // am6offset and have separate definitions for such forms. When this is the 695 // case, fixed forms do not take any offset nodes, so here we skip them for 696 // such instructions. Once all real and pseudo writing-back instructions are 697 // rewritten without use of am6offset nodes, this code will go away. 698 const MachineOperand &AM6Offset = MI.getOperand(OpIdx++); 699 if (TableEntry->RealOpc == ARM::VST1d8Qwb_fixed || 700 TableEntry->RealOpc == ARM::VST1d16Qwb_fixed || 701 TableEntry->RealOpc == ARM::VST1d32Qwb_fixed || 702 TableEntry->RealOpc == ARM::VST1d64Qwb_fixed || 703 TableEntry->RealOpc == ARM::VST1d8Twb_fixed || 704 TableEntry->RealOpc == ARM::VST1d16Twb_fixed || 705 TableEntry->RealOpc == ARM::VST1d32Twb_fixed || 706 TableEntry->RealOpc == ARM::VST1d64Twb_fixed) { 707 assert(AM6Offset.getReg() == 0 && 708 "A fixed writing-back pseudo instruction provides an offset " 709 "register!"); 710 } else { 711 MIB.add(AM6Offset); 712 } 713 } 714 715 bool SrcIsKill = MI.getOperand(OpIdx).isKill(); 716 bool SrcIsUndef = MI.getOperand(OpIdx).isUndef(); 717 Register SrcReg = MI.getOperand(OpIdx++).getReg(); 718 MCRegister D0, D1, D2, D3; 719 GetDSubRegs(SrcReg, RegSpc, TRI, D0, D1, D2, D3); 720 MIB.addReg(D0, getUndefRegState(SrcIsUndef)); 721 if (NumRegs > 1 && TableEntry->copyAllListRegs) 722 MIB.addReg(D1, getUndefRegState(SrcIsUndef)); 723 if (NumRegs > 2 && TableEntry->copyAllListRegs) 724 MIB.addReg(D2, getUndefRegState(SrcIsUndef)); 725 if (NumRegs > 3 && TableEntry->copyAllListRegs) 726 MIB.addReg(D3, getUndefRegState(SrcIsUndef)); 727 728 // Copy the predicate operands. 729 MIB.add(MI.getOperand(OpIdx++)); 730 MIB.add(MI.getOperand(OpIdx++)); 731 732 if (SrcIsKill && !SrcIsUndef) // Add an implicit kill for the super-reg. 733 MIB->addRegisterKilled(SrcReg, TRI, true); 734 else if (!SrcIsUndef) 735 MIB.addReg(SrcReg, RegState::Implicit); // Add implicit uses for src reg. 736 MIB.copyImplicitOps(MI); 737 738 // Transfer memoperands. 739 MIB.cloneMemRefs(MI); 740 MI.eraseFromParent(); 741 LLVM_DEBUG(dbgs() << "To: "; MIB.getInstr()->dump();); 742 } 743 744 /// ExpandLaneOp - Translate VLD*LN and VST*LN instructions with Q, QQ or QQQQ 745 /// register operands to real instructions with D register operands. 746 void ARMExpandPseudo::ExpandLaneOp(MachineBasicBlock::iterator &MBBI) { 747 MachineInstr &MI = *MBBI; 748 MachineBasicBlock &MBB = *MI.getParent(); 749 LLVM_DEBUG(dbgs() << "Expanding: "; MI.dump()); 750 751 const NEONLdStTableEntry *TableEntry = LookupNEONLdSt(MI.getOpcode()); 752 assert(TableEntry && "NEONLdStTable lookup failed"); 753 NEONRegSpacing RegSpc = (NEONRegSpacing)TableEntry->RegSpacing; 754 unsigned NumRegs = TableEntry->NumRegs; 755 unsigned RegElts = TableEntry->RegElts; 756 757 MachineInstrBuilder MIB = BuildMI(MBB, MBBI, MI.getDebugLoc(), 758 TII->get(TableEntry->RealOpc)); 759 unsigned OpIdx = 0; 760 // The lane operand is always the 3rd from last operand, before the 2 761 // predicate operands. 762 unsigned Lane = MI.getOperand(MI.getDesc().getNumOperands() - 3).getImm(); 763 764 // Adjust the lane and spacing as needed for Q registers. 765 assert(RegSpc != OddDblSpc && "unexpected register spacing for VLD/VST-lane"); 766 if (RegSpc == EvenDblSpc && Lane >= RegElts) { 767 RegSpc = OddDblSpc; 768 Lane -= RegElts; 769 } 770 assert(Lane < RegElts && "out of range lane for VLD/VST-lane"); 771 772 MCRegister D0, D1, D2, D3; 773 unsigned DstReg = 0; 774 bool DstIsDead = false; 775 if (TableEntry->IsLoad) { 776 DstIsDead = MI.getOperand(OpIdx).isDead(); 777 DstReg = MI.getOperand(OpIdx++).getReg(); 778 GetDSubRegs(DstReg, RegSpc, TRI, D0, D1, D2, D3); 779 MIB.addReg(D0, RegState::Define | getDeadRegState(DstIsDead)); 780 if (NumRegs > 1) 781 MIB.addReg(D1, RegState::Define | getDeadRegState(DstIsDead)); 782 if (NumRegs > 2) 783 MIB.addReg(D2, RegState::Define | getDeadRegState(DstIsDead)); 784 if (NumRegs > 3) 785 MIB.addReg(D3, RegState::Define | getDeadRegState(DstIsDead)); 786 } 787 788 if (TableEntry->isUpdating) 789 MIB.add(MI.getOperand(OpIdx++)); 790 791 // Copy the addrmode6 operands. 792 MIB.add(MI.getOperand(OpIdx++)); 793 MIB.add(MI.getOperand(OpIdx++)); 794 // Copy the am6offset operand. 795 if (TableEntry->hasWritebackOperand) 796 MIB.add(MI.getOperand(OpIdx++)); 797 798 // Grab the super-register source. 799 MachineOperand MO = MI.getOperand(OpIdx++); 800 if (!TableEntry->IsLoad) 801 GetDSubRegs(MO.getReg(), RegSpc, TRI, D0, D1, D2, D3); 802 803 // Add the subregs as sources of the new instruction. 804 unsigned SrcFlags = (getUndefRegState(MO.isUndef()) | 805 getKillRegState(MO.isKill())); 806 MIB.addReg(D0, SrcFlags); 807 if (NumRegs > 1) 808 MIB.addReg(D1, SrcFlags); 809 if (NumRegs > 2) 810 MIB.addReg(D2, SrcFlags); 811 if (NumRegs > 3) 812 MIB.addReg(D3, SrcFlags); 813 814 // Add the lane number operand. 815 MIB.addImm(Lane); 816 OpIdx += 1; 817 818 // Copy the predicate operands. 819 MIB.add(MI.getOperand(OpIdx++)); 820 MIB.add(MI.getOperand(OpIdx++)); 821 822 // Copy the super-register source to be an implicit source. 823 MO.setImplicit(true); 824 MIB.add(MO); 825 if (TableEntry->IsLoad) 826 // Add an implicit def for the super-register. 827 MIB.addReg(DstReg, RegState::ImplicitDefine | getDeadRegState(DstIsDead)); 828 MIB.copyImplicitOps(MI); 829 // Transfer memoperands. 830 MIB.cloneMemRefs(MI); 831 MI.eraseFromParent(); 832 } 833 834 /// ExpandVTBL - Translate VTBL and VTBX pseudo instructions with Q or QQ 835 /// register operands to real instructions with D register operands. 836 void ARMExpandPseudo::ExpandVTBL(MachineBasicBlock::iterator &MBBI, 837 unsigned Opc, bool IsExt) { 838 MachineInstr &MI = *MBBI; 839 MachineBasicBlock &MBB = *MI.getParent(); 840 LLVM_DEBUG(dbgs() << "Expanding: "; MI.dump()); 841 842 MachineInstrBuilder MIB = BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(Opc)); 843 unsigned OpIdx = 0; 844 845 // Transfer the destination register operand. 846 MIB.add(MI.getOperand(OpIdx++)); 847 if (IsExt) { 848 MachineOperand VdSrc(MI.getOperand(OpIdx++)); 849 MIB.add(VdSrc); 850 } 851 852 bool SrcIsKill = MI.getOperand(OpIdx).isKill(); 853 Register SrcReg = MI.getOperand(OpIdx++).getReg(); 854 MCRegister D0, D1, D2, D3; 855 GetDSubRegs(SrcReg, SingleSpc, TRI, D0, D1, D2, D3); 856 MIB.addReg(D0); 857 858 // Copy the other source register operand. 859 MachineOperand VmSrc(MI.getOperand(OpIdx++)); 860 MIB.add(VmSrc); 861 862 // Copy the predicate operands. 863 MIB.add(MI.getOperand(OpIdx++)); 864 MIB.add(MI.getOperand(OpIdx++)); 865 866 // Add an implicit kill and use for the super-reg. 867 MIB.addReg(SrcReg, RegState::Implicit | getKillRegState(SrcIsKill)); 868 MIB.copyImplicitOps(MI); 869 MI.eraseFromParent(); 870 LLVM_DEBUG(dbgs() << "To: "; MIB.getInstr()->dump();); 871 } 872 873 void ARMExpandPseudo::ExpandMQQPRLoadStore(MachineBasicBlock::iterator &MBBI) { 874 MachineInstr &MI = *MBBI; 875 MachineBasicBlock &MBB = *MI.getParent(); 876 unsigned NewOpc = 877 MI.getOpcode() == ARM::MQQPRStore || MI.getOpcode() == ARM::MQQQQPRStore 878 ? ARM::VSTMDIA 879 : ARM::VLDMDIA; 880 MachineInstrBuilder MIB = 881 BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(NewOpc)); 882 883 unsigned Flags = getKillRegState(MI.getOperand(0).isKill()) | 884 getDefRegState(MI.getOperand(0).isDef()); 885 Register SrcReg = MI.getOperand(0).getReg(); 886 887 // Copy the destination register. 888 MIB.add(MI.getOperand(1)); 889 MIB.add(predOps(ARMCC::AL)); 890 MIB.addReg(TRI->getSubReg(SrcReg, ARM::dsub_0), Flags); 891 MIB.addReg(TRI->getSubReg(SrcReg, ARM::dsub_1), Flags); 892 MIB.addReg(TRI->getSubReg(SrcReg, ARM::dsub_2), Flags); 893 MIB.addReg(TRI->getSubReg(SrcReg, ARM::dsub_3), Flags); 894 if (MI.getOpcode() == ARM::MQQQQPRStore || 895 MI.getOpcode() == ARM::MQQQQPRLoad) { 896 MIB.addReg(TRI->getSubReg(SrcReg, ARM::dsub_4), Flags); 897 MIB.addReg(TRI->getSubReg(SrcReg, ARM::dsub_5), Flags); 898 MIB.addReg(TRI->getSubReg(SrcReg, ARM::dsub_6), Flags); 899 MIB.addReg(TRI->getSubReg(SrcReg, ARM::dsub_7), Flags); 900 } 901 902 if (NewOpc == ARM::VSTMDIA) 903 MIB.addReg(SrcReg, RegState::Implicit); 904 905 MIB.copyImplicitOps(MI); 906 MIB.cloneMemRefs(MI); 907 MI.eraseFromParent(); 908 } 909 910 static bool IsAnAddressOperand(const MachineOperand &MO) { 911 // This check is overly conservative. Unless we are certain that the machine 912 // operand is not a symbol reference, we return that it is a symbol reference. 913 // This is important as the load pair may not be split up Windows. 914 switch (MO.getType()) { 915 case MachineOperand::MO_Register: 916 case MachineOperand::MO_Immediate: 917 case MachineOperand::MO_CImmediate: 918 case MachineOperand::MO_FPImmediate: 919 case MachineOperand::MO_ShuffleMask: 920 return false; 921 case MachineOperand::MO_MachineBasicBlock: 922 return true; 923 case MachineOperand::MO_FrameIndex: 924 return false; 925 case MachineOperand::MO_ConstantPoolIndex: 926 case MachineOperand::MO_TargetIndex: 927 case MachineOperand::MO_JumpTableIndex: 928 case MachineOperand::MO_ExternalSymbol: 929 case MachineOperand::MO_GlobalAddress: 930 case MachineOperand::MO_BlockAddress: 931 return true; 932 case MachineOperand::MO_RegisterMask: 933 case MachineOperand::MO_RegisterLiveOut: 934 return false; 935 case MachineOperand::MO_Metadata: 936 case MachineOperand::MO_MCSymbol: 937 return true; 938 case MachineOperand::MO_DbgInstrRef: 939 case MachineOperand::MO_CFIIndex: 940 return false; 941 case MachineOperand::MO_IntrinsicID: 942 case MachineOperand::MO_Predicate: 943 llvm_unreachable("should not exist post-isel"); 944 } 945 llvm_unreachable("unhandled machine operand type"); 946 } 947 948 static MachineOperand makeImplicit(const MachineOperand &MO) { 949 MachineOperand NewMO = MO; 950 NewMO.setImplicit(); 951 return NewMO; 952 } 953 954 static MachineOperand getMovOperand(const MachineOperand &MO, 955 unsigned TargetFlag) { 956 unsigned TF = MO.getTargetFlags() | TargetFlag; 957 switch (MO.getType()) { 958 case MachineOperand::MO_Immediate: { 959 unsigned Imm = MO.getImm(); 960 switch (TargetFlag) { 961 case ARMII::MO_HI_8_15: 962 Imm = (Imm >> 24) & 0xff; 963 break; 964 case ARMII::MO_HI_0_7: 965 Imm = (Imm >> 16) & 0xff; 966 break; 967 case ARMII::MO_LO_8_15: 968 Imm = (Imm >> 8) & 0xff; 969 break; 970 case ARMII::MO_LO_0_7: 971 Imm = Imm & 0xff; 972 break; 973 case ARMII::MO_HI16: 974 Imm = (Imm >> 16) & 0xffff; 975 break; 976 case ARMII::MO_LO16: 977 Imm = Imm & 0xffff; 978 break; 979 default: 980 llvm_unreachable("Only HI/LO target flags are expected"); 981 } 982 return MachineOperand::CreateImm(Imm); 983 } 984 case MachineOperand::MO_ExternalSymbol: 985 return MachineOperand::CreateES(MO.getSymbolName(), TF); 986 case MachineOperand::MO_JumpTableIndex: 987 return MachineOperand::CreateJTI(MO.getIndex(), TF); 988 default: 989 return MachineOperand::CreateGA(MO.getGlobal(), MO.getOffset(), TF); 990 } 991 } 992 993 void ARMExpandPseudo::ExpandTMOV32BitImm(MachineBasicBlock &MBB, 994 MachineBasicBlock::iterator &MBBI) { 995 MachineInstr &MI = *MBBI; 996 Register DstReg = MI.getOperand(0).getReg(); 997 bool DstIsDead = MI.getOperand(0).isDead(); 998 const MachineOperand &MO = MI.getOperand(1); 999 unsigned MIFlags = MI.getFlags(); 1000 1001 LLVM_DEBUG(dbgs() << "Expanding: "; MI.dump()); 1002 1003 // Expand the mov into a sequence of mov/add+lsl of the individual bytes. We 1004 // want to avoid emitting any zero bytes, as they won't change the result, and 1005 // also don't want any pointless shifts, so instead of immediately emitting 1006 // the shift for a byte we keep track of how much we will need to shift and do 1007 // it before the next nonzero byte. 1008 unsigned PendingShift = 0; 1009 for (unsigned Byte = 0; Byte < 4; ++Byte) { 1010 unsigned Flag = Byte == 0 ? ARMII::MO_HI_8_15 1011 : Byte == 1 ? ARMII::MO_HI_0_7 1012 : Byte == 2 ? ARMII::MO_LO_8_15 1013 : ARMII::MO_LO_0_7; 1014 MachineOperand Operand = getMovOperand(MO, Flag); 1015 bool ZeroImm = Operand.isImm() && Operand.getImm() == 0; 1016 unsigned Op = PendingShift ? ARM::tADDi8 : ARM::tMOVi8; 1017 1018 // Emit the pending shift if we're going to emit this byte or if we've 1019 // reached the end. 1020 if (PendingShift && (!ZeroImm || Byte == 3)) { 1021 MachineInstr *Lsl = 1022 BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(ARM::tLSLri), DstReg) 1023 .add(t1CondCodeOp(true)) 1024 .addReg(DstReg) 1025 .addImm(PendingShift) 1026 .add(predOps(ARMCC::AL)) 1027 .setMIFlags(MIFlags); 1028 (void)Lsl; 1029 LLVM_DEBUG(dbgs() << "And: "; Lsl->dump();); 1030 PendingShift = 0; 1031 } 1032 1033 // Emit this byte if it's nonzero. 1034 if (!ZeroImm) { 1035 MachineInstrBuilder MIB = 1036 BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(Op), DstReg) 1037 .add(t1CondCodeOp(true)); 1038 if (Op == ARM::tADDi8) 1039 MIB.addReg(DstReg); 1040 MIB.add(Operand); 1041 MIB.add(predOps(ARMCC::AL)); 1042 MIB.setMIFlags(MIFlags); 1043 LLVM_DEBUG(dbgs() << (Op == ARM::tMOVi8 ? "To: " : "And:") << " "; 1044 MIB.getInstr()->dump();); 1045 } 1046 1047 // Don't accumulate the shift value if we've not yet seen a nonzero byte. 1048 if (PendingShift || !ZeroImm) 1049 PendingShift += 8; 1050 } 1051 1052 // The dest is dead on the last instruction we emitted if it was dead on the 1053 // original instruction. 1054 (--MBBI)->getOperand(0).setIsDead(DstIsDead); 1055 1056 MI.eraseFromParent(); 1057 } 1058 1059 void ARMExpandPseudo::ExpandMOV32BitImm(MachineBasicBlock &MBB, 1060 MachineBasicBlock::iterator &MBBI) { 1061 MachineInstr &MI = *MBBI; 1062 unsigned Opcode = MI.getOpcode(); 1063 Register PredReg; 1064 ARMCC::CondCodes Pred = getInstrPredicate(MI, PredReg); 1065 Register DstReg = MI.getOperand(0).getReg(); 1066 bool DstIsDead = MI.getOperand(0).isDead(); 1067 bool isCC = Opcode == ARM::MOVCCi32imm || Opcode == ARM::t2MOVCCi32imm; 1068 const MachineOperand &MO = MI.getOperand(isCC ? 2 : 1); 1069 bool RequiresBundling = STI->isTargetWindows() && IsAnAddressOperand(MO); 1070 MachineInstrBuilder LO16, HI16; 1071 LLVM_DEBUG(dbgs() << "Expanding: "; MI.dump()); 1072 1073 if (!STI->hasV6T2Ops() && 1074 (Opcode == ARM::MOVi32imm || Opcode == ARM::MOVCCi32imm)) { 1075 // FIXME Windows CE supports older ARM CPUs 1076 assert(!STI->isTargetWindows() && "Windows on ARM requires ARMv7+"); 1077 1078 assert (MO.isImm() && "MOVi32imm w/ non-immediate source operand!"); 1079 unsigned ImmVal = (unsigned)MO.getImm(); 1080 unsigned SOImmValV1 = 0, SOImmValV2 = 0; 1081 1082 if (ARM_AM::isSOImmTwoPartVal(ImmVal)) { // Expand into a movi + orr. 1083 LO16 = BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(ARM::MOVi), DstReg); 1084 HI16 = BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(ARM::ORRri)) 1085 .addReg(DstReg, RegState::Define | getDeadRegState(DstIsDead)) 1086 .addReg(DstReg); 1087 SOImmValV1 = ARM_AM::getSOImmTwoPartFirst(ImmVal); 1088 SOImmValV2 = ARM_AM::getSOImmTwoPartSecond(ImmVal); 1089 } else { // Expand into a mvn + sub. 1090 LO16 = BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(ARM::MVNi), DstReg); 1091 HI16 = BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(ARM::SUBri)) 1092 .addReg(DstReg, RegState::Define | getDeadRegState(DstIsDead)) 1093 .addReg(DstReg); 1094 SOImmValV1 = ARM_AM::getSOImmTwoPartFirst(-ImmVal); 1095 SOImmValV2 = ARM_AM::getSOImmTwoPartSecond(-ImmVal); 1096 SOImmValV1 = ~(-SOImmValV1); 1097 } 1098 1099 unsigned MIFlags = MI.getFlags(); 1100 LO16 = LO16.addImm(SOImmValV1); 1101 HI16 = HI16.addImm(SOImmValV2); 1102 LO16.cloneMemRefs(MI); 1103 HI16.cloneMemRefs(MI); 1104 LO16.setMIFlags(MIFlags); 1105 HI16.setMIFlags(MIFlags); 1106 LO16.addImm(Pred).addReg(PredReg).add(condCodeOp()); 1107 HI16.addImm(Pred).addReg(PredReg).add(condCodeOp()); 1108 if (isCC) 1109 LO16.add(makeImplicit(MI.getOperand(1))); 1110 LO16.copyImplicitOps(MI); 1111 HI16.copyImplicitOps(MI); 1112 MI.eraseFromParent(); 1113 return; 1114 } 1115 1116 unsigned LO16Opc = 0; 1117 unsigned HI16Opc = 0; 1118 unsigned MIFlags = MI.getFlags(); 1119 if (Opcode == ARM::t2MOVi32imm || Opcode == ARM::t2MOVCCi32imm) { 1120 LO16Opc = ARM::t2MOVi16; 1121 HI16Opc = ARM::t2MOVTi16; 1122 } else { 1123 LO16Opc = ARM::MOVi16; 1124 HI16Opc = ARM::MOVTi16; 1125 } 1126 1127 LO16 = BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(LO16Opc), DstReg); 1128 LO16.setMIFlags(MIFlags); 1129 LO16.add(getMovOperand(MO, ARMII::MO_LO16)); 1130 LO16.cloneMemRefs(MI); 1131 LO16.addImm(Pred).addReg(PredReg); 1132 if (isCC) 1133 LO16.add(makeImplicit(MI.getOperand(1))); 1134 LO16.copyImplicitOps(MI); 1135 LLVM_DEBUG(dbgs() << "To: "; LO16.getInstr()->dump();); 1136 1137 MachineOperand HIOperand = getMovOperand(MO, ARMII::MO_HI16); 1138 if (!(HIOperand.isImm() && HIOperand.getImm() == 0)) { 1139 HI16 = BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(HI16Opc)) 1140 .addReg(DstReg, RegState::Define | getDeadRegState(DstIsDead)) 1141 .addReg(DstReg); 1142 HI16.setMIFlags(MIFlags); 1143 HI16.add(HIOperand); 1144 HI16.cloneMemRefs(MI); 1145 HI16.addImm(Pred).addReg(PredReg); 1146 HI16.copyImplicitOps(MI); 1147 LLVM_DEBUG(dbgs() << "And: "; HI16.getInstr()->dump();); 1148 } else { 1149 LO16->getOperand(0).setIsDead(DstIsDead); 1150 } 1151 1152 if (RequiresBundling) 1153 finalizeBundle(MBB, LO16->getIterator(), MBBI->getIterator()); 1154 1155 MI.eraseFromParent(); 1156 } 1157 1158 // The size of the area, accessed by that VLSTM/VLLDM 1159 // S0-S31 + FPSCR + 8 more bytes (VPR + pad, or just pad) 1160 static const int CMSE_FP_SAVE_SIZE = 136; 1161 1162 static void determineGPRegsToClear(const MachineInstr &MI, 1163 const std::initializer_list<unsigned> &Regs, 1164 SmallVectorImpl<unsigned> &ClearRegs) { 1165 SmallVector<unsigned, 4> OpRegs; 1166 for (const MachineOperand &Op : MI.operands()) { 1167 if (!Op.isReg() || !Op.isUse()) 1168 continue; 1169 OpRegs.push_back(Op.getReg()); 1170 } 1171 llvm::sort(OpRegs); 1172 1173 std::set_difference(Regs.begin(), Regs.end(), OpRegs.begin(), OpRegs.end(), 1174 std::back_inserter(ClearRegs)); 1175 } 1176 1177 void ARMExpandPseudo::CMSEClearGPRegs( 1178 MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, 1179 const DebugLoc &DL, const SmallVectorImpl<unsigned> &ClearRegs, 1180 unsigned ClobberReg) { 1181 1182 if (STI->hasV8_1MMainlineOps()) { 1183 // Clear the registers using the CLRM instruction. 1184 MachineInstrBuilder CLRM = 1185 BuildMI(MBB, MBBI, DL, TII->get(ARM::t2CLRM)).add(predOps(ARMCC::AL)); 1186 for (unsigned R : ClearRegs) 1187 CLRM.addReg(R, RegState::Define); 1188 CLRM.addReg(ARM::APSR, RegState::Define); 1189 CLRM.addReg(ARM::CPSR, RegState::Define | RegState::Implicit); 1190 } else { 1191 // Clear the registers and flags by copying ClobberReg into them. 1192 // (Baseline can't do a high register clear in one instruction). 1193 for (unsigned Reg : ClearRegs) { 1194 if (Reg == ClobberReg) 1195 continue; 1196 BuildMI(MBB, MBBI, DL, TII->get(ARM::tMOVr), Reg) 1197 .addReg(ClobberReg) 1198 .add(predOps(ARMCC::AL)); 1199 } 1200 1201 BuildMI(MBB, MBBI, DL, TII->get(ARM::t2MSR_M)) 1202 .addImm(STI->hasDSP() ? 0xc00 : 0x800) 1203 .addReg(ClobberReg) 1204 .add(predOps(ARMCC::AL)); 1205 } 1206 } 1207 1208 // Find which FP registers need to be cleared. The parameter `ClearRegs` is 1209 // initialised with all elements set to true, and this function resets all the 1210 // bits, which correspond to register uses. Returns true if any floating point 1211 // register is defined, false otherwise. 1212 static bool determineFPRegsToClear(const MachineInstr &MI, 1213 BitVector &ClearRegs) { 1214 bool DefFP = false; 1215 for (const MachineOperand &Op : MI.operands()) { 1216 if (!Op.isReg()) 1217 continue; 1218 1219 Register Reg = Op.getReg(); 1220 if (Op.isDef()) { 1221 if ((Reg >= ARM::Q0 && Reg <= ARM::Q7) || 1222 (Reg >= ARM::D0 && Reg <= ARM::D15) || 1223 (Reg >= ARM::S0 && Reg <= ARM::S31)) 1224 DefFP = true; 1225 continue; 1226 } 1227 1228 if (Reg >= ARM::Q0 && Reg <= ARM::Q7) { 1229 int R = Reg - ARM::Q0; 1230 ClearRegs.reset(R * 4, (R + 1) * 4); 1231 } else if (Reg >= ARM::D0 && Reg <= ARM::D15) { 1232 int R = Reg - ARM::D0; 1233 ClearRegs.reset(R * 2, (R + 1) * 2); 1234 } else if (Reg >= ARM::S0 && Reg <= ARM::S31) { 1235 ClearRegs[Reg - ARM::S0] = false; 1236 } 1237 } 1238 return DefFP; 1239 } 1240 1241 MachineBasicBlock & 1242 ARMExpandPseudo::CMSEClearFPRegs(MachineBasicBlock &MBB, 1243 MachineBasicBlock::iterator MBBI) { 1244 BitVector ClearRegs(16, true); 1245 (void)determineFPRegsToClear(*MBBI, ClearRegs); 1246 1247 if (STI->hasV8_1MMainlineOps()) 1248 return CMSEClearFPRegsV81(MBB, MBBI, ClearRegs); 1249 else 1250 return CMSEClearFPRegsV8(MBB, MBBI, ClearRegs); 1251 } 1252 1253 // Clear the FP registers for v8.0-M, by copying over the content 1254 // of LR. Uses R12 as a scratch register. 1255 MachineBasicBlock & 1256 ARMExpandPseudo::CMSEClearFPRegsV8(MachineBasicBlock &MBB, 1257 MachineBasicBlock::iterator MBBI, 1258 const BitVector &ClearRegs) { 1259 if (!STI->hasFPRegs()) 1260 return MBB; 1261 1262 auto &RetI = *MBBI; 1263 const DebugLoc &DL = RetI.getDebugLoc(); 1264 1265 // If optimising for minimum size, clear FP registers unconditionally. 1266 // Otherwise, check the CONTROL.SFPA (Secure Floating-Point Active) bit and 1267 // don't clear them if they belong to the non-secure state. 1268 MachineBasicBlock *ClearBB, *DoneBB; 1269 if (STI->hasMinSize()) { 1270 ClearBB = DoneBB = &MBB; 1271 } else { 1272 MachineFunction *MF = MBB.getParent(); 1273 ClearBB = MF->CreateMachineBasicBlock(MBB.getBasicBlock()); 1274 DoneBB = MF->CreateMachineBasicBlock(MBB.getBasicBlock()); 1275 1276 MF->insert(++MBB.getIterator(), ClearBB); 1277 MF->insert(++ClearBB->getIterator(), DoneBB); 1278 1279 DoneBB->splice(DoneBB->end(), &MBB, MBBI, MBB.end()); 1280 DoneBB->transferSuccessors(&MBB); 1281 MBB.addSuccessor(ClearBB); 1282 MBB.addSuccessor(DoneBB); 1283 ClearBB->addSuccessor(DoneBB); 1284 1285 // At the new basic blocks we need to have live-in the registers, used 1286 // for the return value as well as LR, used to clear registers. 1287 for (const MachineOperand &Op : RetI.operands()) { 1288 if (!Op.isReg()) 1289 continue; 1290 Register Reg = Op.getReg(); 1291 if (Reg == ARM::NoRegister || Reg == ARM::LR) 1292 continue; 1293 assert(Reg.isPhysical() && "Unallocated register"); 1294 ClearBB->addLiveIn(Reg); 1295 DoneBB->addLiveIn(Reg); 1296 } 1297 ClearBB->addLiveIn(ARM::LR); 1298 DoneBB->addLiveIn(ARM::LR); 1299 1300 // Read the CONTROL register. 1301 BuildMI(MBB, MBB.end(), DL, TII->get(ARM::t2MRS_M), ARM::R12) 1302 .addImm(20) 1303 .add(predOps(ARMCC::AL)); 1304 // Check bit 3 (SFPA). 1305 BuildMI(MBB, MBB.end(), DL, TII->get(ARM::t2TSTri)) 1306 .addReg(ARM::R12) 1307 .addImm(8) 1308 .add(predOps(ARMCC::AL)); 1309 // If SFPA is clear, jump over ClearBB to DoneBB. 1310 BuildMI(MBB, MBB.end(), DL, TII->get(ARM::tBcc)) 1311 .addMBB(DoneBB) 1312 .addImm(ARMCC::EQ) 1313 .addReg(ARM::CPSR, RegState::Kill); 1314 } 1315 1316 // Emit the clearing sequence 1317 for (unsigned D = 0; D < 8; D++) { 1318 // Attempt to clear as double 1319 if (ClearRegs[D * 2 + 0] && ClearRegs[D * 2 + 1]) { 1320 unsigned Reg = ARM::D0 + D; 1321 BuildMI(ClearBB, DL, TII->get(ARM::VMOVDRR), Reg) 1322 .addReg(ARM::LR) 1323 .addReg(ARM::LR) 1324 .add(predOps(ARMCC::AL)); 1325 } else { 1326 // Clear first part as single 1327 if (ClearRegs[D * 2 + 0]) { 1328 unsigned Reg = ARM::S0 + D * 2; 1329 BuildMI(ClearBB, DL, TII->get(ARM::VMOVSR), Reg) 1330 .addReg(ARM::LR) 1331 .add(predOps(ARMCC::AL)); 1332 } 1333 // Clear second part as single 1334 if (ClearRegs[D * 2 + 1]) { 1335 unsigned Reg = ARM::S0 + D * 2 + 1; 1336 BuildMI(ClearBB, DL, TII->get(ARM::VMOVSR), Reg) 1337 .addReg(ARM::LR) 1338 .add(predOps(ARMCC::AL)); 1339 } 1340 } 1341 } 1342 1343 // Clear FPSCR bits 0-4, 7, 28-31 1344 // The other bits are program global according to the AAPCS 1345 BuildMI(ClearBB, DL, TII->get(ARM::VMRS), ARM::R12) 1346 .add(predOps(ARMCC::AL)); 1347 BuildMI(ClearBB, DL, TII->get(ARM::t2BICri), ARM::R12) 1348 .addReg(ARM::R12) 1349 .addImm(0x0000009F) 1350 .add(predOps(ARMCC::AL)) 1351 .add(condCodeOp()); 1352 BuildMI(ClearBB, DL, TII->get(ARM::t2BICri), ARM::R12) 1353 .addReg(ARM::R12) 1354 .addImm(0xF0000000) 1355 .add(predOps(ARMCC::AL)) 1356 .add(condCodeOp()); 1357 BuildMI(ClearBB, DL, TII->get(ARM::VMSR)) 1358 .addReg(ARM::R12) 1359 .add(predOps(ARMCC::AL)); 1360 1361 return *DoneBB; 1362 } 1363 1364 MachineBasicBlock & 1365 ARMExpandPseudo::CMSEClearFPRegsV81(MachineBasicBlock &MBB, 1366 MachineBasicBlock::iterator MBBI, 1367 const BitVector &ClearRegs) { 1368 auto &RetI = *MBBI; 1369 1370 // Emit a sequence of VSCCLRM <sreglist> instructions, one instruction for 1371 // each contiguous sequence of S-registers. 1372 int Start = -1, End = -1; 1373 for (int S = 0, E = ClearRegs.size(); S != E; ++S) { 1374 if (ClearRegs[S] && S == End + 1) { 1375 End = S; // extend range 1376 continue; 1377 } 1378 // Emit current range. 1379 if (Start < End) { 1380 MachineInstrBuilder VSCCLRM = 1381 BuildMI(MBB, MBBI, RetI.getDebugLoc(), TII->get(ARM::VSCCLRMS)) 1382 .add(predOps(ARMCC::AL)); 1383 while (++Start <= End) 1384 VSCCLRM.addReg(ARM::S0 + Start, RegState::Define); 1385 VSCCLRM.addReg(ARM::VPR, RegState::Define); 1386 } 1387 Start = End = S; 1388 } 1389 // Emit last range. 1390 if (Start < End) { 1391 MachineInstrBuilder VSCCLRM = 1392 BuildMI(MBB, MBBI, RetI.getDebugLoc(), TII->get(ARM::VSCCLRMS)) 1393 .add(predOps(ARMCC::AL)); 1394 while (++Start <= End) 1395 VSCCLRM.addReg(ARM::S0 + Start, RegState::Define); 1396 VSCCLRM.addReg(ARM::VPR, RegState::Define); 1397 } 1398 1399 return MBB; 1400 } 1401 1402 void ARMExpandPseudo::CMSESaveClearFPRegs( 1403 MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, DebugLoc &DL, 1404 const LivePhysRegs &LiveRegs, SmallVectorImpl<unsigned> &ScratchRegs) { 1405 if (STI->hasV8_1MMainlineOps()) 1406 CMSESaveClearFPRegsV81(MBB, MBBI, DL, LiveRegs); 1407 else if (STI->hasV8MMainlineOps()) 1408 CMSESaveClearFPRegsV8(MBB, MBBI, DL, LiveRegs, ScratchRegs); 1409 } 1410 1411 // Save and clear FP registers if present 1412 void ARMExpandPseudo::CMSESaveClearFPRegsV8( 1413 MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, DebugLoc &DL, 1414 const LivePhysRegs &LiveRegs, SmallVectorImpl<unsigned> &ScratchRegs) { 1415 1416 // Store an available register for FPSCR clearing 1417 assert(!ScratchRegs.empty()); 1418 unsigned SpareReg = ScratchRegs.front(); 1419 1420 // save space on stack for VLSTM 1421 BuildMI(MBB, MBBI, DL, TII->get(ARM::tSUBspi), ARM::SP) 1422 .addReg(ARM::SP) 1423 .addImm(CMSE_FP_SAVE_SIZE >> 2) 1424 .add(predOps(ARMCC::AL)); 1425 1426 // Use ScratchRegs to store the fp regs 1427 std::vector<std::tuple<unsigned, unsigned, unsigned>> ClearedFPRegs; 1428 std::vector<unsigned> NonclearedFPRegs; 1429 bool ReturnsFPReg = false; 1430 for (const MachineOperand &Op : MBBI->operands()) { 1431 if (Op.isReg() && Op.isUse()) { 1432 Register Reg = Op.getReg(); 1433 assert(!ARM::DPRRegClass.contains(Reg) || 1434 ARM::DPR_VFP2RegClass.contains(Reg)); 1435 assert(!ARM::QPRRegClass.contains(Reg)); 1436 if (ARM::DPR_VFP2RegClass.contains(Reg)) { 1437 if (ScratchRegs.size() >= 2) { 1438 unsigned SaveReg2 = ScratchRegs.pop_back_val(); 1439 unsigned SaveReg1 = ScratchRegs.pop_back_val(); 1440 ClearedFPRegs.emplace_back(Reg, SaveReg1, SaveReg2); 1441 1442 // Save the fp register to the normal registers 1443 BuildMI(MBB, MBBI, DL, TII->get(ARM::VMOVRRD)) 1444 .addReg(SaveReg1, RegState::Define) 1445 .addReg(SaveReg2, RegState::Define) 1446 .addReg(Reg) 1447 .add(predOps(ARMCC::AL)); 1448 } else { 1449 NonclearedFPRegs.push_back(Reg); 1450 } 1451 } else if (ARM::SPRRegClass.contains(Reg)) { 1452 if (ScratchRegs.size() >= 1) { 1453 unsigned SaveReg = ScratchRegs.pop_back_val(); 1454 ClearedFPRegs.emplace_back(Reg, SaveReg, 0); 1455 1456 // Save the fp register to the normal registers 1457 BuildMI(MBB, MBBI, DL, TII->get(ARM::VMOVRS), SaveReg) 1458 .addReg(Reg) 1459 .add(predOps(ARMCC::AL)); 1460 } else { 1461 NonclearedFPRegs.push_back(Reg); 1462 } 1463 } 1464 } else if (Op.isReg() && Op.isDef()) { 1465 Register Reg = Op.getReg(); 1466 if (ARM::SPRRegClass.contains(Reg) || ARM::DPRRegClass.contains(Reg) || 1467 ARM::QPRRegClass.contains(Reg)) 1468 ReturnsFPReg = true; 1469 } 1470 } 1471 1472 bool PassesFPReg = (!NonclearedFPRegs.empty() || !ClearedFPRegs.empty()); 1473 1474 if (PassesFPReg || ReturnsFPReg) 1475 assert(STI->hasFPRegs() && "Subtarget needs fpregs"); 1476 1477 // CVE-2024-7883 1478 // 1479 // The VLLDM/VLSTM instructions set up lazy state preservation, but they 1480 // execute as NOPs if the FP register file is not considered to contain 1481 // secure data, represented by the CONTROL_S.SFPA bit. This means that the 1482 // state of CONTROL_S.SFPA must be the same when these two instructions are 1483 // executed. That might not be the case if we haven't used any FP 1484 // instructions before the VLSTM, so CONTROL_S.SFPA is clear, but do have one 1485 // before the VLLDM, which sets it.. 1486 // 1487 // If we can't prove that SFPA will be the same for the VLSTM and VLLDM, we 1488 // execute a "vmov s0, s0" instruction before the VLSTM to ensure that 1489 // CONTROL_S.SFPA is set for both. 1490 // 1491 // That can only happen for callees which take no FP arguments (or we'd have 1492 // inserted a VMOV above) and which return values in FP regs (so that we need 1493 // to use a VMOV to back-up the return value before the VLLDM). It also can't 1494 // happen if the call is dominated by other existing floating-point 1495 // instructions, but we don't currently check for that case. 1496 // 1497 // These conditions mean that we only emit this instruction when using the 1498 // hard-float ABI, which means we can assume that FP instructions are 1499 // available, and don't need to make it conditional like we do for the 1500 // CVE-2021-35465 workaround. 1501 if (ReturnsFPReg && !PassesFPReg) { 1502 bool S0Dead = !LiveRegs.contains(ARM::S0); 1503 BuildMI(MBB, MBBI, DL, TII->get(ARM::VMOVS)) 1504 .addReg(ARM::S0, RegState::Define | getDeadRegState(S0Dead)) 1505 .addReg(ARM::S0, getUndefRegState(S0Dead)) 1506 .add(predOps(ARMCC::AL)); 1507 } 1508 1509 // Lazy store all fp registers to the stack. 1510 // This executes as NOP in the absence of floating-point support. 1511 MachineInstrBuilder VLSTM = 1512 BuildMI(MBB, MBBI, DL, TII->get(ARM::VLSTM)) 1513 .addReg(ARM::SP) 1514 .add(predOps(ARMCC::AL)) 1515 .addImm(0); // Represents a pseoudo register list, has no effect on 1516 // the encoding. 1517 // Mark non-live registers as undef 1518 for (MachineOperand &MO : VLSTM->implicit_operands()) { 1519 if (MO.isReg() && !MO.isDef()) { 1520 Register Reg = MO.getReg(); 1521 MO.setIsUndef(!LiveRegs.contains(Reg)); 1522 } 1523 } 1524 1525 // Restore all arguments 1526 for (const auto &Regs : ClearedFPRegs) { 1527 unsigned Reg, SaveReg1, SaveReg2; 1528 std::tie(Reg, SaveReg1, SaveReg2) = Regs; 1529 if (ARM::DPR_VFP2RegClass.contains(Reg)) 1530 BuildMI(MBB, MBBI, DL, TII->get(ARM::VMOVDRR), Reg) 1531 .addReg(SaveReg1) 1532 .addReg(SaveReg2) 1533 .add(predOps(ARMCC::AL)); 1534 else if (ARM::SPRRegClass.contains(Reg)) 1535 BuildMI(MBB, MBBI, DL, TII->get(ARM::VMOVSR), Reg) 1536 .addReg(SaveReg1) 1537 .add(predOps(ARMCC::AL)); 1538 } 1539 1540 for (unsigned Reg : NonclearedFPRegs) { 1541 if (ARM::DPR_VFP2RegClass.contains(Reg)) { 1542 if (STI->isLittle()) { 1543 BuildMI(MBB, MBBI, DL, TII->get(ARM::VLDRD), Reg) 1544 .addReg(ARM::SP) 1545 .addImm((Reg - ARM::D0) * 2) 1546 .add(predOps(ARMCC::AL)); 1547 } else { 1548 // For big-endian targets we need to load the two subregisters of Reg 1549 // manually because VLDRD would load them in wrong order 1550 MCRegister SReg0 = TRI->getSubReg(Reg, ARM::ssub_0); 1551 BuildMI(MBB, MBBI, DL, TII->get(ARM::VLDRS), SReg0) 1552 .addReg(ARM::SP) 1553 .addImm((Reg - ARM::D0) * 2) 1554 .add(predOps(ARMCC::AL)); 1555 BuildMI(MBB, MBBI, DL, TII->get(ARM::VLDRS), SReg0 + 1) 1556 .addReg(ARM::SP) 1557 .addImm((Reg - ARM::D0) * 2 + 1) 1558 .add(predOps(ARMCC::AL)); 1559 } 1560 } else if (ARM::SPRRegClass.contains(Reg)) { 1561 BuildMI(MBB, MBBI, DL, TII->get(ARM::VLDRS), Reg) 1562 .addReg(ARM::SP) 1563 .addImm(Reg - ARM::S0) 1564 .add(predOps(ARMCC::AL)); 1565 } 1566 } 1567 // restore FPSCR from stack and clear bits 0-4, 7, 28-31 1568 // The other bits are program global according to the AAPCS 1569 if (PassesFPReg) { 1570 BuildMI(MBB, MBBI, DL, TII->get(ARM::tLDRspi), SpareReg) 1571 .addReg(ARM::SP) 1572 .addImm(0x10) 1573 .add(predOps(ARMCC::AL)); 1574 BuildMI(MBB, MBBI, DL, TII->get(ARM::t2BICri), SpareReg) 1575 .addReg(SpareReg) 1576 .addImm(0x0000009F) 1577 .add(predOps(ARMCC::AL)) 1578 .add(condCodeOp()); 1579 BuildMI(MBB, MBBI, DL, TII->get(ARM::t2BICri), SpareReg) 1580 .addReg(SpareReg) 1581 .addImm(0xF0000000) 1582 .add(predOps(ARMCC::AL)) 1583 .add(condCodeOp()); 1584 BuildMI(MBB, MBBI, DL, TII->get(ARM::VMSR)) 1585 .addReg(SpareReg) 1586 .add(predOps(ARMCC::AL)); 1587 // The ldr must happen after a floating point instruction. To prevent the 1588 // post-ra scheduler to mess with the order, we create a bundle. 1589 finalizeBundle(MBB, VLSTM->getIterator(), MBBI->getIterator()); 1590 } 1591 } 1592 1593 void ARMExpandPseudo::CMSESaveClearFPRegsV81(MachineBasicBlock &MBB, 1594 MachineBasicBlock::iterator MBBI, 1595 DebugLoc &DL, 1596 const LivePhysRegs &LiveRegs) { 1597 BitVector ClearRegs(32, true); 1598 bool DefFP = determineFPRegsToClear(*MBBI, ClearRegs); 1599 1600 // If the instruction does not write to a FP register and no elements were 1601 // removed from the set, then no FP registers were used to pass 1602 // arguments/returns. 1603 if (!DefFP && ClearRegs.count() == ClearRegs.size()) { 1604 // save space on stack for VLSTM 1605 BuildMI(MBB, MBBI, DL, TII->get(ARM::tSUBspi), ARM::SP) 1606 .addReg(ARM::SP) 1607 .addImm(CMSE_FP_SAVE_SIZE >> 2) 1608 .add(predOps(ARMCC::AL)); 1609 1610 // Lazy store all FP registers to the stack 1611 MachineInstrBuilder VLSTM = 1612 BuildMI(MBB, MBBI, DL, TII->get(ARM::VLSTM)) 1613 .addReg(ARM::SP) 1614 .add(predOps(ARMCC::AL)) 1615 .addImm(0); // Represents a pseoudo register list, has no effect on 1616 // the encoding. 1617 // Mark non-live registers as undef 1618 for (MachineOperand &MO : VLSTM->implicit_operands()) { 1619 if (MO.isReg() && !MO.isDef()) { 1620 Register Reg = MO.getReg(); 1621 MO.setIsUndef(!LiveRegs.contains(Reg)); 1622 } 1623 } 1624 } else { 1625 // Push all the callee-saved registers (s16-s31). 1626 MachineInstrBuilder VPUSH = 1627 BuildMI(MBB, MBBI, DL, TII->get(ARM::VSTMSDB_UPD), ARM::SP) 1628 .addReg(ARM::SP) 1629 .add(predOps(ARMCC::AL)); 1630 for (unsigned Reg = ARM::S16; Reg <= ARM::S31; ++Reg) 1631 VPUSH.addReg(Reg); 1632 1633 // Clear FP registers with a VSCCLRM. 1634 (void)CMSEClearFPRegsV81(MBB, MBBI, ClearRegs); 1635 1636 // Save floating-point context. 1637 BuildMI(MBB, MBBI, DL, TII->get(ARM::VSTR_FPCXTS_pre), ARM::SP) 1638 .addReg(ARM::SP) 1639 .addImm(-8) 1640 .add(predOps(ARMCC::AL)); 1641 } 1642 } 1643 1644 // Restore FP registers if present 1645 void ARMExpandPseudo::CMSERestoreFPRegs( 1646 MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, DebugLoc &DL, 1647 SmallVectorImpl<unsigned> &AvailableRegs) { 1648 if (STI->hasV8_1MMainlineOps()) 1649 CMSERestoreFPRegsV81(MBB, MBBI, DL, AvailableRegs); 1650 else if (STI->hasV8MMainlineOps()) 1651 CMSERestoreFPRegsV8(MBB, MBBI, DL, AvailableRegs); 1652 } 1653 1654 void ARMExpandPseudo::CMSERestoreFPRegsV8( 1655 MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, DebugLoc &DL, 1656 SmallVectorImpl<unsigned> &AvailableRegs) { 1657 1658 // Keep a scratch register for the mitigation sequence. 1659 unsigned ScratchReg = ARM::NoRegister; 1660 if (STI->fixCMSE_CVE_2021_35465()) 1661 ScratchReg = AvailableRegs.pop_back_val(); 1662 1663 // Use AvailableRegs to store the fp regs 1664 std::vector<std::tuple<unsigned, unsigned, unsigned>> ClearedFPRegs; 1665 std::vector<unsigned> NonclearedFPRegs; 1666 for (const MachineOperand &Op : MBBI->operands()) { 1667 if (Op.isReg() && Op.isDef()) { 1668 Register Reg = Op.getReg(); 1669 assert(!ARM::DPRRegClass.contains(Reg) || 1670 ARM::DPR_VFP2RegClass.contains(Reg)); 1671 assert(!ARM::QPRRegClass.contains(Reg)); 1672 if (ARM::DPR_VFP2RegClass.contains(Reg)) { 1673 if (AvailableRegs.size() >= 2) { 1674 unsigned SaveReg2 = AvailableRegs.pop_back_val(); 1675 unsigned SaveReg1 = AvailableRegs.pop_back_val(); 1676 ClearedFPRegs.emplace_back(Reg, SaveReg1, SaveReg2); 1677 1678 // Save the fp register to the normal registers 1679 BuildMI(MBB, MBBI, DL, TII->get(ARM::VMOVRRD)) 1680 .addReg(SaveReg1, RegState::Define) 1681 .addReg(SaveReg2, RegState::Define) 1682 .addReg(Reg) 1683 .add(predOps(ARMCC::AL)); 1684 } else { 1685 NonclearedFPRegs.push_back(Reg); 1686 } 1687 } else if (ARM::SPRRegClass.contains(Reg)) { 1688 if (AvailableRegs.size() >= 1) { 1689 unsigned SaveReg = AvailableRegs.pop_back_val(); 1690 ClearedFPRegs.emplace_back(Reg, SaveReg, 0); 1691 1692 // Save the fp register to the normal registers 1693 BuildMI(MBB, MBBI, DL, TII->get(ARM::VMOVRS), SaveReg) 1694 .addReg(Reg) 1695 .add(predOps(ARMCC::AL)); 1696 } else { 1697 NonclearedFPRegs.push_back(Reg); 1698 } 1699 } 1700 } 1701 } 1702 1703 bool returnsFPReg = (!NonclearedFPRegs.empty() || !ClearedFPRegs.empty()); 1704 1705 if (returnsFPReg) 1706 assert(STI->hasFPRegs() && "Subtarget needs fpregs"); 1707 1708 // Push FP regs that cannot be restored via normal registers on the stack 1709 for (unsigned Reg : NonclearedFPRegs) { 1710 if (ARM::DPR_VFP2RegClass.contains(Reg)) 1711 BuildMI(MBB, MBBI, DL, TII->get(ARM::VSTRD)) 1712 .addReg(Reg) 1713 .addReg(ARM::SP) 1714 .addImm((Reg - ARM::D0) * 2) 1715 .add(predOps(ARMCC::AL)); 1716 else if (ARM::SPRRegClass.contains(Reg)) 1717 BuildMI(MBB, MBBI, DL, TII->get(ARM::VSTRS)) 1718 .addReg(Reg) 1719 .addReg(ARM::SP) 1720 .addImm(Reg - ARM::S0) 1721 .add(predOps(ARMCC::AL)); 1722 } 1723 1724 // Lazy load fp regs from stack. 1725 // This executes as NOP in the absence of floating-point support. 1726 MachineInstrBuilder VLLDM = 1727 BuildMI(MBB, MBBI, DL, TII->get(ARM::VLLDM)) 1728 .addReg(ARM::SP) 1729 .add(predOps(ARMCC::AL)) 1730 .addImm(0); // Represents a pseoudo register list, has no effect on 1731 // the encoding. 1732 1733 if (STI->fixCMSE_CVE_2021_35465()) { 1734 auto Bundler = MIBundleBuilder(MBB, VLLDM); 1735 // Read the CONTROL register. 1736 Bundler.append(BuildMI(*MBB.getParent(), DL, TII->get(ARM::t2MRS_M)) 1737 .addReg(ScratchReg, RegState::Define) 1738 .addImm(20) 1739 .add(predOps(ARMCC::AL))); 1740 // Check bit 3 (SFPA). 1741 Bundler.append(BuildMI(*MBB.getParent(), DL, TII->get(ARM::t2TSTri)) 1742 .addReg(ScratchReg) 1743 .addImm(8) 1744 .add(predOps(ARMCC::AL))); 1745 // Emit the IT block. 1746 Bundler.append(BuildMI(*MBB.getParent(), DL, TII->get(ARM::t2IT)) 1747 .addImm(ARMCC::NE) 1748 .addImm(8)); 1749 // If SFPA is clear jump over to VLLDM, otherwise execute an instruction 1750 // which has no functional effect apart from causing context creation: 1751 // vmovne s0, s0. In the absence of FPU we emit .inst.w 0xeeb00a40, 1752 // which is defined as NOP if not executed. 1753 if (STI->hasFPRegs()) 1754 Bundler.append(BuildMI(*MBB.getParent(), DL, TII->get(ARM::VMOVS)) 1755 .addReg(ARM::S0, RegState::Define) 1756 .addReg(ARM::S0, RegState::Undef) 1757 .add(predOps(ARMCC::NE))); 1758 else 1759 Bundler.append(BuildMI(*MBB.getParent(), DL, TII->get(ARM::INLINEASM)) 1760 .addExternalSymbol(".inst.w 0xeeb00a40") 1761 .addImm(InlineAsm::Extra_HasSideEffects)); 1762 finalizeBundle(MBB, Bundler.begin(), Bundler.end()); 1763 } 1764 1765 // Restore all FP registers via normal registers 1766 for (const auto &Regs : ClearedFPRegs) { 1767 unsigned Reg, SaveReg1, SaveReg2; 1768 std::tie(Reg, SaveReg1, SaveReg2) = Regs; 1769 if (ARM::DPR_VFP2RegClass.contains(Reg)) 1770 BuildMI(MBB, MBBI, DL, TII->get(ARM::VMOVDRR), Reg) 1771 .addReg(SaveReg1) 1772 .addReg(SaveReg2) 1773 .add(predOps(ARMCC::AL)); 1774 else if (ARM::SPRRegClass.contains(Reg)) 1775 BuildMI(MBB, MBBI, DL, TII->get(ARM::VMOVSR), Reg) 1776 .addReg(SaveReg1) 1777 .add(predOps(ARMCC::AL)); 1778 } 1779 1780 // Pop the stack space 1781 BuildMI(MBB, MBBI, DL, TII->get(ARM::tADDspi), ARM::SP) 1782 .addReg(ARM::SP) 1783 .addImm(CMSE_FP_SAVE_SIZE >> 2) 1784 .add(predOps(ARMCC::AL)); 1785 } 1786 1787 static bool definesOrUsesFPReg(const MachineInstr &MI) { 1788 for (const MachineOperand &Op : MI.operands()) { 1789 if (!Op.isReg()) 1790 continue; 1791 Register Reg = Op.getReg(); 1792 if ((Reg >= ARM::Q0 && Reg <= ARM::Q7) || 1793 (Reg >= ARM::D0 && Reg <= ARM::D15) || 1794 (Reg >= ARM::S0 && Reg <= ARM::S31)) 1795 return true; 1796 } 1797 return false; 1798 } 1799 1800 void ARMExpandPseudo::CMSERestoreFPRegsV81( 1801 MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, DebugLoc &DL, 1802 SmallVectorImpl<unsigned> &AvailableRegs) { 1803 if (!definesOrUsesFPReg(*MBBI)) { 1804 if (STI->fixCMSE_CVE_2021_35465()) { 1805 BuildMI(MBB, MBBI, DL, TII->get(ARM::VSCCLRMS)) 1806 .add(predOps(ARMCC::AL)) 1807 .addReg(ARM::VPR, RegState::Define); 1808 } 1809 1810 // Load FP registers from stack. 1811 BuildMI(MBB, MBBI, DL, TII->get(ARM::VLLDM)) 1812 .addReg(ARM::SP) 1813 .add(predOps(ARMCC::AL)) 1814 .addImm(0); // Represents a pseoudo register list, has no effect on the 1815 // encoding. 1816 1817 // Pop the stack space 1818 BuildMI(MBB, MBBI, DL, TII->get(ARM::tADDspi), ARM::SP) 1819 .addReg(ARM::SP) 1820 .addImm(CMSE_FP_SAVE_SIZE >> 2) 1821 .add(predOps(ARMCC::AL)); 1822 } else { 1823 // Restore the floating point context. 1824 BuildMI(MBB, MBBI, MBBI->getDebugLoc(), TII->get(ARM::VLDR_FPCXTS_post), 1825 ARM::SP) 1826 .addReg(ARM::SP) 1827 .addImm(8) 1828 .add(predOps(ARMCC::AL)); 1829 1830 // Pop all the callee-saved registers (s16-s31). 1831 MachineInstrBuilder VPOP = 1832 BuildMI(MBB, MBBI, DL, TII->get(ARM::VLDMSIA_UPD), ARM::SP) 1833 .addReg(ARM::SP) 1834 .add(predOps(ARMCC::AL)); 1835 for (unsigned Reg = ARM::S16; Reg <= ARM::S31; ++Reg) 1836 VPOP.addReg(Reg, RegState::Define); 1837 } 1838 } 1839 1840 /// Expand a CMP_SWAP pseudo-inst to an ldrex/strex loop as simply as 1841 /// possible. This only gets used at -O0 so we don't care about efficiency of 1842 /// the generated code. 1843 bool ARMExpandPseudo::ExpandCMP_SWAP(MachineBasicBlock &MBB, 1844 MachineBasicBlock::iterator MBBI, 1845 unsigned LdrexOp, unsigned StrexOp, 1846 unsigned UxtOp, 1847 MachineBasicBlock::iterator &NextMBBI) { 1848 bool IsThumb = STI->isThumb(); 1849 bool IsThumb1Only = STI->isThumb1Only(); 1850 MachineInstr &MI = *MBBI; 1851 DebugLoc DL = MI.getDebugLoc(); 1852 const MachineOperand &Dest = MI.getOperand(0); 1853 Register TempReg = MI.getOperand(1).getReg(); 1854 // Duplicating undef operands into 2 instructions does not guarantee the same 1855 // value on both; However undef should be replaced by xzr anyway. 1856 assert(!MI.getOperand(2).isUndef() && "cannot handle undef"); 1857 Register AddrReg = MI.getOperand(2).getReg(); 1858 Register DesiredReg = MI.getOperand(3).getReg(); 1859 Register NewReg = MI.getOperand(4).getReg(); 1860 1861 if (IsThumb) { 1862 assert(STI->hasV8MBaselineOps() && 1863 "CMP_SWAP not expected to be custom expanded for Thumb1"); 1864 assert((UxtOp == 0 || UxtOp == ARM::tUXTB || UxtOp == ARM::tUXTH) && 1865 "ARMv8-M.baseline does not have t2UXTB/t2UXTH"); 1866 assert((UxtOp == 0 || ARM::tGPRRegClass.contains(DesiredReg)) && 1867 "DesiredReg used for UXT op must be tGPR"); 1868 } 1869 1870 MachineFunction *MF = MBB.getParent(); 1871 auto LoadCmpBB = MF->CreateMachineBasicBlock(MBB.getBasicBlock()); 1872 auto StoreBB = MF->CreateMachineBasicBlock(MBB.getBasicBlock()); 1873 auto DoneBB = MF->CreateMachineBasicBlock(MBB.getBasicBlock()); 1874 1875 MF->insert(++MBB.getIterator(), LoadCmpBB); 1876 MF->insert(++LoadCmpBB->getIterator(), StoreBB); 1877 MF->insert(++StoreBB->getIterator(), DoneBB); 1878 1879 if (UxtOp) { 1880 MachineInstrBuilder MIB = 1881 BuildMI(MBB, MBBI, DL, TII->get(UxtOp), DesiredReg) 1882 .addReg(DesiredReg, RegState::Kill); 1883 if (!IsThumb) 1884 MIB.addImm(0); 1885 MIB.add(predOps(ARMCC::AL)); 1886 } 1887 1888 // .Lloadcmp: 1889 // ldrex rDest, [rAddr] 1890 // cmp rDest, rDesired 1891 // bne .Ldone 1892 1893 MachineInstrBuilder MIB; 1894 MIB = BuildMI(LoadCmpBB, DL, TII->get(LdrexOp), Dest.getReg()); 1895 MIB.addReg(AddrReg); 1896 if (LdrexOp == ARM::t2LDREX) 1897 MIB.addImm(0); // a 32-bit Thumb ldrex (only) allows an offset. 1898 MIB.add(predOps(ARMCC::AL)); 1899 1900 unsigned CMPrr = IsThumb ? ARM::tCMPhir : ARM::CMPrr; 1901 BuildMI(LoadCmpBB, DL, TII->get(CMPrr)) 1902 .addReg(Dest.getReg(), getKillRegState(Dest.isDead())) 1903 .addReg(DesiredReg) 1904 .add(predOps(ARMCC::AL)); 1905 unsigned Bcc = IsThumb ? ARM::tBcc : ARM::Bcc; 1906 BuildMI(LoadCmpBB, DL, TII->get(Bcc)) 1907 .addMBB(DoneBB) 1908 .addImm(ARMCC::NE) 1909 .addReg(ARM::CPSR, RegState::Kill); 1910 LoadCmpBB->addSuccessor(DoneBB); 1911 LoadCmpBB->addSuccessor(StoreBB); 1912 1913 // .Lstore: 1914 // strex rTempReg, rNew, [rAddr] 1915 // cmp rTempReg, #0 1916 // bne .Lloadcmp 1917 MIB = BuildMI(StoreBB, DL, TII->get(StrexOp), TempReg) 1918 .addReg(NewReg) 1919 .addReg(AddrReg); 1920 if (StrexOp == ARM::t2STREX) 1921 MIB.addImm(0); // a 32-bit Thumb strex (only) allows an offset. 1922 MIB.add(predOps(ARMCC::AL)); 1923 1924 unsigned CMPri = 1925 IsThumb ? (IsThumb1Only ? ARM::tCMPi8 : ARM::t2CMPri) : ARM::CMPri; 1926 BuildMI(StoreBB, DL, TII->get(CMPri)) 1927 .addReg(TempReg, RegState::Kill) 1928 .addImm(0) 1929 .add(predOps(ARMCC::AL)); 1930 BuildMI(StoreBB, DL, TII->get(Bcc)) 1931 .addMBB(LoadCmpBB) 1932 .addImm(ARMCC::NE) 1933 .addReg(ARM::CPSR, RegState::Kill); 1934 StoreBB->addSuccessor(LoadCmpBB); 1935 StoreBB->addSuccessor(DoneBB); 1936 1937 DoneBB->splice(DoneBB->end(), &MBB, MI, MBB.end()); 1938 DoneBB->transferSuccessors(&MBB); 1939 1940 MBB.addSuccessor(LoadCmpBB); 1941 1942 NextMBBI = MBB.end(); 1943 MI.eraseFromParent(); 1944 1945 // Recompute livein lists. 1946 LivePhysRegs LiveRegs; 1947 computeAndAddLiveIns(LiveRegs, *DoneBB); 1948 computeAndAddLiveIns(LiveRegs, *StoreBB); 1949 computeAndAddLiveIns(LiveRegs, *LoadCmpBB); 1950 // Do an extra pass around the loop to get loop carried registers right. 1951 StoreBB->clearLiveIns(); 1952 computeAndAddLiveIns(LiveRegs, *StoreBB); 1953 LoadCmpBB->clearLiveIns(); 1954 computeAndAddLiveIns(LiveRegs, *LoadCmpBB); 1955 1956 return true; 1957 } 1958 1959 /// ARM's ldrexd/strexd take a consecutive register pair (represented as a 1960 /// single GPRPair register), Thumb's take two separate registers so we need to 1961 /// extract the subregs from the pair. 1962 static void addExclusiveRegPair(MachineInstrBuilder &MIB, MachineOperand &Reg, 1963 unsigned Flags, bool IsThumb, 1964 const TargetRegisterInfo *TRI) { 1965 if (IsThumb) { 1966 Register RegLo = TRI->getSubReg(Reg.getReg(), ARM::gsub_0); 1967 Register RegHi = TRI->getSubReg(Reg.getReg(), ARM::gsub_1); 1968 MIB.addReg(RegLo, Flags); 1969 MIB.addReg(RegHi, Flags); 1970 } else 1971 MIB.addReg(Reg.getReg(), Flags); 1972 } 1973 1974 /// Expand a 64-bit CMP_SWAP to an ldrexd/strexd loop. 1975 bool ARMExpandPseudo::ExpandCMP_SWAP_64(MachineBasicBlock &MBB, 1976 MachineBasicBlock::iterator MBBI, 1977 MachineBasicBlock::iterator &NextMBBI) { 1978 bool IsThumb = STI->isThumb(); 1979 assert(!STI->isThumb1Only() && "CMP_SWAP_64 unsupported under Thumb1!"); 1980 MachineInstr &MI = *MBBI; 1981 DebugLoc DL = MI.getDebugLoc(); 1982 MachineOperand &Dest = MI.getOperand(0); 1983 // Duplicating undef operands into 2 instructions does not guarantee the same 1984 // value on both; However undef should be replaced by xzr anyway. 1985 assert(!MI.getOperand(1).isUndef() && "cannot handle undef"); 1986 Register AddrAndTempReg = MI.getOperand(1).getReg(); 1987 Register AddrReg = TRI->getSubReg(AddrAndTempReg, ARM::gsub_0); 1988 Register TempReg = TRI->getSubReg(AddrAndTempReg, ARM::gsub_1); 1989 assert(MI.getOperand(1).getReg() == MI.getOperand(2).getReg() && 1990 "tied operands have different registers"); 1991 Register DesiredReg = MI.getOperand(3).getReg(); 1992 MachineOperand New = MI.getOperand(4); 1993 New.setIsKill(false); 1994 1995 Register DestLo = TRI->getSubReg(Dest.getReg(), ARM::gsub_0); 1996 Register DestHi = TRI->getSubReg(Dest.getReg(), ARM::gsub_1); 1997 Register DesiredLo = TRI->getSubReg(DesiredReg, ARM::gsub_0); 1998 Register DesiredHi = TRI->getSubReg(DesiredReg, ARM::gsub_1); 1999 2000 MachineFunction *MF = MBB.getParent(); 2001 auto LoadCmpBB = MF->CreateMachineBasicBlock(MBB.getBasicBlock()); 2002 auto StoreBB = MF->CreateMachineBasicBlock(MBB.getBasicBlock()); 2003 auto DoneBB = MF->CreateMachineBasicBlock(MBB.getBasicBlock()); 2004 2005 MF->insert(++MBB.getIterator(), LoadCmpBB); 2006 MF->insert(++LoadCmpBB->getIterator(), StoreBB); 2007 MF->insert(++StoreBB->getIterator(), DoneBB); 2008 2009 // .Lloadcmp: 2010 // ldrexd rDestLo, rDestHi, [rAddr] 2011 // cmp rDestLo, rDesiredLo 2012 // sbcs dead rTempReg, rDestHi, rDesiredHi 2013 // bne .Ldone 2014 unsigned LDREXD = IsThumb ? ARM::t2LDREXD : ARM::LDREXD; 2015 MachineInstrBuilder MIB; 2016 MIB = BuildMI(LoadCmpBB, DL, TII->get(LDREXD)); 2017 addExclusiveRegPair(MIB, Dest, RegState::Define, IsThumb, TRI); 2018 MIB.addReg(AddrReg).add(predOps(ARMCC::AL)); 2019 2020 unsigned CMPrr = IsThumb ? ARM::tCMPhir : ARM::CMPrr; 2021 BuildMI(LoadCmpBB, DL, TII->get(CMPrr)) 2022 .addReg(DestLo, getKillRegState(Dest.isDead())) 2023 .addReg(DesiredLo) 2024 .add(predOps(ARMCC::AL)); 2025 2026 BuildMI(LoadCmpBB, DL, TII->get(CMPrr)) 2027 .addReg(DestHi, getKillRegState(Dest.isDead())) 2028 .addReg(DesiredHi) 2029 .addImm(ARMCC::EQ).addReg(ARM::CPSR, RegState::Kill); 2030 2031 unsigned Bcc = IsThumb ? ARM::tBcc : ARM::Bcc; 2032 BuildMI(LoadCmpBB, DL, TII->get(Bcc)) 2033 .addMBB(DoneBB) 2034 .addImm(ARMCC::NE) 2035 .addReg(ARM::CPSR, RegState::Kill); 2036 LoadCmpBB->addSuccessor(DoneBB); 2037 LoadCmpBB->addSuccessor(StoreBB); 2038 2039 // .Lstore: 2040 // strexd rTempReg, rNewLo, rNewHi, [rAddr] 2041 // cmp rTempReg, #0 2042 // bne .Lloadcmp 2043 unsigned STREXD = IsThumb ? ARM::t2STREXD : ARM::STREXD; 2044 MIB = BuildMI(StoreBB, DL, TII->get(STREXD), TempReg); 2045 unsigned Flags = getKillRegState(New.isDead()); 2046 addExclusiveRegPair(MIB, New, Flags, IsThumb, TRI); 2047 MIB.addReg(AddrReg).add(predOps(ARMCC::AL)); 2048 2049 unsigned CMPri = IsThumb ? ARM::t2CMPri : ARM::CMPri; 2050 BuildMI(StoreBB, DL, TII->get(CMPri)) 2051 .addReg(TempReg, RegState::Kill) 2052 .addImm(0) 2053 .add(predOps(ARMCC::AL)); 2054 BuildMI(StoreBB, DL, TII->get(Bcc)) 2055 .addMBB(LoadCmpBB) 2056 .addImm(ARMCC::NE) 2057 .addReg(ARM::CPSR, RegState::Kill); 2058 StoreBB->addSuccessor(LoadCmpBB); 2059 StoreBB->addSuccessor(DoneBB); 2060 2061 DoneBB->splice(DoneBB->end(), &MBB, MI, MBB.end()); 2062 DoneBB->transferSuccessors(&MBB); 2063 2064 MBB.addSuccessor(LoadCmpBB); 2065 2066 NextMBBI = MBB.end(); 2067 MI.eraseFromParent(); 2068 2069 // Recompute livein lists. 2070 LivePhysRegs LiveRegs; 2071 computeAndAddLiveIns(LiveRegs, *DoneBB); 2072 computeAndAddLiveIns(LiveRegs, *StoreBB); 2073 computeAndAddLiveIns(LiveRegs, *LoadCmpBB); 2074 // Do an extra pass around the loop to get loop carried registers right. 2075 StoreBB->clearLiveIns(); 2076 computeAndAddLiveIns(LiveRegs, *StoreBB); 2077 LoadCmpBB->clearLiveIns(); 2078 computeAndAddLiveIns(LiveRegs, *LoadCmpBB); 2079 2080 return true; 2081 } 2082 2083 static void CMSEPushCalleeSaves(const TargetInstrInfo &TII, 2084 MachineBasicBlock &MBB, 2085 MachineBasicBlock::iterator MBBI, 2086 Register JumpReg, const LivePhysRegs &LiveRegs, 2087 bool Thumb1Only) { 2088 const DebugLoc &DL = MBBI->getDebugLoc(); 2089 if (Thumb1Only) { // push Lo and Hi regs separately 2090 MachineInstrBuilder PushMIB = 2091 BuildMI(MBB, MBBI, DL, TII.get(ARM::tPUSH)).add(predOps(ARMCC::AL)); 2092 for (unsigned Reg = ARM::R4; Reg < ARM::R8; ++Reg) { 2093 PushMIB.addReg( 2094 Reg, Reg == JumpReg || LiveRegs.contains(Reg) ? 0 : RegState::Undef); 2095 } 2096 2097 // Thumb1 can only tPUSH low regs, so we copy the high regs to the low 2098 // regs that we just saved and push the low regs again, taking care to 2099 // not clobber JumpReg. If JumpReg is one of the low registers, push first 2100 // the values of r9-r11, and then r8. That would leave them ordered in 2101 // memory, and allow us to later pop them with a single instructions. 2102 // FIXME: Could also use any of r0-r3 that are free (including in the 2103 // first PUSH above). 2104 for (unsigned LoReg = ARM::R7, HiReg = ARM::R11; LoReg >= ARM::R4; 2105 --LoReg) { 2106 if (JumpReg == LoReg) 2107 continue; 2108 BuildMI(MBB, MBBI, DL, TII.get(ARM::tMOVr), LoReg) 2109 .addReg(HiReg, LiveRegs.contains(HiReg) ? 0 : RegState::Undef) 2110 .add(predOps(ARMCC::AL)); 2111 --HiReg; 2112 } 2113 MachineInstrBuilder PushMIB2 = 2114 BuildMI(MBB, MBBI, DL, TII.get(ARM::tPUSH)).add(predOps(ARMCC::AL)); 2115 for (unsigned Reg = ARM::R4; Reg < ARM::R8; ++Reg) { 2116 if (Reg == JumpReg) 2117 continue; 2118 PushMIB2.addReg(Reg, RegState::Kill); 2119 } 2120 2121 // If we couldn't use a low register for temporary storage (because it was 2122 // the JumpReg), use r4 or r5, whichever is not JumpReg. It has already been 2123 // saved. 2124 if (JumpReg >= ARM::R4 && JumpReg <= ARM::R7) { 2125 Register LoReg = JumpReg == ARM::R4 ? ARM::R5 : ARM::R4; 2126 BuildMI(MBB, MBBI, DL, TII.get(ARM::tMOVr), LoReg) 2127 .addReg(ARM::R8, LiveRegs.contains(ARM::R8) ? 0 : RegState::Undef) 2128 .add(predOps(ARMCC::AL)); 2129 BuildMI(MBB, MBBI, DL, TII.get(ARM::tPUSH)) 2130 .add(predOps(ARMCC::AL)) 2131 .addReg(LoReg, RegState::Kill); 2132 } 2133 } else { // push Lo and Hi registers with a single instruction 2134 MachineInstrBuilder PushMIB = 2135 BuildMI(MBB, MBBI, DL, TII.get(ARM::t2STMDB_UPD), ARM::SP) 2136 .addReg(ARM::SP) 2137 .add(predOps(ARMCC::AL)); 2138 for (unsigned Reg = ARM::R4; Reg < ARM::R12; ++Reg) { 2139 PushMIB.addReg( 2140 Reg, Reg == JumpReg || LiveRegs.contains(Reg) ? 0 : RegState::Undef); 2141 } 2142 } 2143 } 2144 2145 static void CMSEPopCalleeSaves(const TargetInstrInfo &TII, 2146 MachineBasicBlock &MBB, 2147 MachineBasicBlock::iterator MBBI, int JumpReg, 2148 bool Thumb1Only) { 2149 const DebugLoc &DL = MBBI->getDebugLoc(); 2150 if (Thumb1Only) { 2151 MachineInstrBuilder PopMIB = 2152 BuildMI(MBB, MBBI, DL, TII.get(ARM::tPOP)).add(predOps(ARMCC::AL)); 2153 for (int R = 0; R < 4; ++R) { 2154 PopMIB.addReg(ARM::R4 + R, RegState::Define); 2155 BuildMI(MBB, MBBI, DL, TII.get(ARM::tMOVr), ARM::R8 + R) 2156 .addReg(ARM::R4 + R, RegState::Kill) 2157 .add(predOps(ARMCC::AL)); 2158 } 2159 MachineInstrBuilder PopMIB2 = 2160 BuildMI(MBB, MBBI, DL, TII.get(ARM::tPOP)).add(predOps(ARMCC::AL)); 2161 for (int R = 0; R < 4; ++R) 2162 PopMIB2.addReg(ARM::R4 + R, RegState::Define); 2163 } else { // pop Lo and Hi registers with a single instruction 2164 MachineInstrBuilder PopMIB = 2165 BuildMI(MBB, MBBI, DL, TII.get(ARM::t2LDMIA_UPD), ARM::SP) 2166 .addReg(ARM::SP) 2167 .add(predOps(ARMCC::AL)); 2168 for (unsigned Reg = ARM::R4; Reg < ARM::R12; ++Reg) 2169 PopMIB.addReg(Reg, RegState::Define); 2170 } 2171 } 2172 2173 bool ARMExpandPseudo::ExpandMI(MachineBasicBlock &MBB, 2174 MachineBasicBlock::iterator MBBI, 2175 MachineBasicBlock::iterator &NextMBBI) { 2176 MachineInstr &MI = *MBBI; 2177 unsigned Opcode = MI.getOpcode(); 2178 switch (Opcode) { 2179 default: 2180 return false; 2181 2182 case ARM::VBSPd: 2183 case ARM::VBSPq: { 2184 Register DstReg = MI.getOperand(0).getReg(); 2185 if (DstReg == MI.getOperand(3).getReg()) { 2186 // Expand to VBIT 2187 unsigned NewOpc = Opcode == ARM::VBSPd ? ARM::VBITd : ARM::VBITq; 2188 BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(NewOpc)) 2189 .add(MI.getOperand(0)) 2190 .add(MI.getOperand(3)) 2191 .add(MI.getOperand(2)) 2192 .add(MI.getOperand(1)) 2193 .addImm(MI.getOperand(4).getImm()) 2194 .add(MI.getOperand(5)); 2195 } else if (DstReg == MI.getOperand(2).getReg()) { 2196 // Expand to VBIF 2197 unsigned NewOpc = Opcode == ARM::VBSPd ? ARM::VBIFd : ARM::VBIFq; 2198 BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(NewOpc)) 2199 .add(MI.getOperand(0)) 2200 .add(MI.getOperand(2)) 2201 .add(MI.getOperand(3)) 2202 .add(MI.getOperand(1)) 2203 .addImm(MI.getOperand(4).getImm()) 2204 .add(MI.getOperand(5)); 2205 } else { 2206 // Expand to VBSL 2207 unsigned NewOpc = Opcode == ARM::VBSPd ? ARM::VBSLd : ARM::VBSLq; 2208 if (DstReg == MI.getOperand(1).getReg()) { 2209 BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(NewOpc)) 2210 .add(MI.getOperand(0)) 2211 .add(MI.getOperand(1)) 2212 .add(MI.getOperand(2)) 2213 .add(MI.getOperand(3)) 2214 .addImm(MI.getOperand(4).getImm()) 2215 .add(MI.getOperand(5)); 2216 } else { 2217 // Use move to satisfy constraints 2218 unsigned MoveOpc = Opcode == ARM::VBSPd ? ARM::VORRd : ARM::VORRq; 2219 unsigned MO1Flags = getRegState(MI.getOperand(1)) & ~RegState::Kill; 2220 BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(MoveOpc)) 2221 .addReg(DstReg, 2222 RegState::Define | 2223 getRenamableRegState(MI.getOperand(0).isRenamable())) 2224 .addReg(MI.getOperand(1).getReg(), MO1Flags) 2225 .addReg(MI.getOperand(1).getReg(), MO1Flags) 2226 .addImm(MI.getOperand(4).getImm()) 2227 .add(MI.getOperand(5)); 2228 BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(NewOpc)) 2229 .add(MI.getOperand(0)) 2230 .addReg(DstReg, 2231 RegState::Kill | 2232 getRenamableRegState(MI.getOperand(0).isRenamable())) 2233 .add(MI.getOperand(2)) 2234 .add(MI.getOperand(3)) 2235 .addImm(MI.getOperand(4).getImm()) 2236 .add(MI.getOperand(5)); 2237 } 2238 } 2239 MI.eraseFromParent(); 2240 return true; 2241 } 2242 2243 case ARM::TCRETURNdi: 2244 case ARM::TCRETURNri: 2245 case ARM::TCRETURNrinotr12: { 2246 MachineBasicBlock::iterator MBBI = MBB.getLastNonDebugInstr(); 2247 if (MBBI->getOpcode() == ARM::SEH_EpilogEnd) 2248 MBBI--; 2249 if (MBBI->getOpcode() == ARM::SEH_Nop_Ret) 2250 MBBI--; 2251 assert(MBBI->isReturn() && 2252 "Can only insert epilog into returning blocks"); 2253 unsigned RetOpcode = MBBI->getOpcode(); 2254 DebugLoc dl = MBBI->getDebugLoc(); 2255 const ARMBaseInstrInfo &TII = *static_cast<const ARMBaseInstrInfo *>( 2256 MBB.getParent()->getSubtarget().getInstrInfo()); 2257 2258 // Tail call return: adjust the stack pointer and jump to callee. 2259 MBBI = MBB.getLastNonDebugInstr(); 2260 if (MBBI->getOpcode() == ARM::SEH_EpilogEnd) 2261 MBBI--; 2262 if (MBBI->getOpcode() == ARM::SEH_Nop_Ret) 2263 MBBI--; 2264 MachineOperand &JumpTarget = MBBI->getOperand(0); 2265 2266 // Jump to label or value in register. 2267 if (RetOpcode == ARM::TCRETURNdi) { 2268 MachineFunction *MF = MBB.getParent(); 2269 bool NeedsWinCFI = MF->getTarget().getMCAsmInfo()->usesWindowsCFI() && 2270 MF->getFunction().needsUnwindTableEntry(); 2271 unsigned TCOpcode = 2272 STI->isThumb() 2273 ? ((STI->isTargetMachO() || NeedsWinCFI) ? ARM::tTAILJMPd 2274 : ARM::tTAILJMPdND) 2275 : ARM::TAILJMPd; 2276 MachineInstrBuilder MIB = BuildMI(MBB, MBBI, dl, TII.get(TCOpcode)); 2277 if (JumpTarget.isGlobal()) 2278 MIB.addGlobalAddress(JumpTarget.getGlobal(), JumpTarget.getOffset(), 2279 JumpTarget.getTargetFlags()); 2280 else { 2281 assert(JumpTarget.isSymbol()); 2282 MIB.addExternalSymbol(JumpTarget.getSymbolName(), 2283 JumpTarget.getTargetFlags()); 2284 } 2285 2286 // Add the default predicate in Thumb mode. 2287 if (STI->isThumb()) 2288 MIB.add(predOps(ARMCC::AL)); 2289 } else if (RetOpcode == ARM::TCRETURNri || 2290 RetOpcode == ARM::TCRETURNrinotr12) { 2291 unsigned Opcode = 2292 STI->isThumb() ? ARM::tTAILJMPr 2293 : (STI->hasV4TOps() ? ARM::TAILJMPr : ARM::TAILJMPr4); 2294 BuildMI(MBB, MBBI, dl, 2295 TII.get(Opcode)) 2296 .addReg(JumpTarget.getReg(), RegState::Kill); 2297 } 2298 2299 auto NewMI = std::prev(MBBI); 2300 for (unsigned i = 2, e = MBBI->getNumOperands(); i != e; ++i) 2301 NewMI->addOperand(MBBI->getOperand(i)); 2302 2303 // Update call info and delete the pseudo instruction TCRETURN. 2304 if (MI.isCandidateForAdditionalCallInfo()) 2305 MI.getMF()->moveAdditionalCallInfo(&MI, &*NewMI); 2306 // Copy nomerge flag over to new instruction. 2307 if (MI.getFlag(MachineInstr::NoMerge)) 2308 NewMI->setFlag(MachineInstr::NoMerge); 2309 MBB.erase(MBBI); 2310 2311 MBBI = NewMI; 2312 return true; 2313 } 2314 case ARM::tBXNS_RET: { 2315 // For v8.0-M.Main we need to authenticate LR before clearing FPRs, which 2316 // uses R12 as a scratch register. 2317 if (!STI->hasV8_1MMainlineOps() && AFI->shouldSignReturnAddress()) 2318 BuildMI(MBB, MBBI, DebugLoc(), TII->get(ARM::t2AUT)); 2319 2320 MachineBasicBlock &AfterBB = CMSEClearFPRegs(MBB, MBBI); 2321 2322 if (STI->hasV8_1MMainlineOps()) { 2323 // Restore the non-secure floating point context. 2324 BuildMI(MBB, MBBI, MBBI->getDebugLoc(), 2325 TII->get(ARM::VLDR_FPCXTNS_post), ARM::SP) 2326 .addReg(ARM::SP) 2327 .addImm(4) 2328 .add(predOps(ARMCC::AL)); 2329 2330 if (AFI->shouldSignReturnAddress()) 2331 BuildMI(AfterBB, AfterBB.end(), DebugLoc(), TII->get(ARM::t2AUT)); 2332 } 2333 2334 // Clear all GPR that are not a use of the return instruction. 2335 assert(llvm::all_of(MBBI->operands(), [](const MachineOperand &Op) { 2336 return !Op.isReg() || Op.getReg() != ARM::R12; 2337 })); 2338 SmallVector<unsigned, 5> ClearRegs; 2339 determineGPRegsToClear( 2340 *MBBI, {ARM::R0, ARM::R1, ARM::R2, ARM::R3, ARM::R12}, ClearRegs); 2341 CMSEClearGPRegs(AfterBB, AfterBB.end(), MBBI->getDebugLoc(), ClearRegs, 2342 ARM::LR); 2343 2344 MachineInstrBuilder NewMI = 2345 BuildMI(AfterBB, AfterBB.end(), MBBI->getDebugLoc(), 2346 TII->get(ARM::tBXNS)) 2347 .addReg(ARM::LR) 2348 .add(predOps(ARMCC::AL)); 2349 for (const MachineOperand &Op : MI.operands()) 2350 NewMI->addOperand(Op); 2351 MI.eraseFromParent(); 2352 return true; 2353 } 2354 case ARM::tBLXNS_CALL: { 2355 DebugLoc DL = MBBI->getDebugLoc(); 2356 Register JumpReg = MBBI->getOperand(0).getReg(); 2357 2358 // Figure out which registers are live at the point immediately before the 2359 // call. When we indiscriminately push a set of registers, the live 2360 // registers are added as ordinary use operands, whereas dead registers 2361 // are "undef". 2362 LivePhysRegs LiveRegs(*TRI); 2363 LiveRegs.addLiveOuts(MBB); 2364 for (const MachineInstr &MI : make_range(MBB.rbegin(), MBBI.getReverse())) 2365 LiveRegs.stepBackward(MI); 2366 LiveRegs.stepBackward(*MBBI); 2367 2368 CMSEPushCalleeSaves(*TII, MBB, MBBI, JumpReg, LiveRegs, 2369 AFI->isThumb1OnlyFunction()); 2370 2371 SmallVector<unsigned, 16> ClearRegs; 2372 determineGPRegsToClear(*MBBI, 2373 {ARM::R0, ARM::R1, ARM::R2, ARM::R3, ARM::R4, 2374 ARM::R5, ARM::R6, ARM::R7, ARM::R8, ARM::R9, 2375 ARM::R10, ARM::R11, ARM::R12}, 2376 ClearRegs); 2377 auto OriginalClearRegs = ClearRegs; 2378 2379 // Get the first cleared register as a scratch (to use later with tBIC). 2380 // We need to use the first so we can ensure it is a low register. 2381 unsigned ScratchReg = ClearRegs.front(); 2382 2383 // Clear LSB of JumpReg 2384 if (AFI->isThumb2Function()) { 2385 BuildMI(MBB, MBBI, DL, TII->get(ARM::t2BICri), JumpReg) 2386 .addReg(JumpReg) 2387 .addImm(1) 2388 .add(predOps(ARMCC::AL)) 2389 .add(condCodeOp()); 2390 } else { 2391 // We need to use an extra register to cope with 8M Baseline, 2392 // since we have saved all of the registers we are ok to trash a non 2393 // argument register here. 2394 BuildMI(MBB, MBBI, DL, TII->get(ARM::tMOVi8), ScratchReg) 2395 .add(condCodeOp()) 2396 .addImm(1) 2397 .add(predOps(ARMCC::AL)); 2398 BuildMI(MBB, MBBI, DL, TII->get(ARM::tBIC), JumpReg) 2399 .addReg(ARM::CPSR, RegState::Define) 2400 .addReg(JumpReg) 2401 .addReg(ScratchReg) 2402 .add(predOps(ARMCC::AL)); 2403 } 2404 2405 CMSESaveClearFPRegs(MBB, MBBI, DL, LiveRegs, 2406 ClearRegs); // save+clear FP regs with ClearRegs 2407 CMSEClearGPRegs(MBB, MBBI, DL, ClearRegs, JumpReg); 2408 2409 const MachineInstrBuilder NewCall = 2410 BuildMI(MBB, MBBI, DL, TII->get(ARM::tBLXNSr)) 2411 .add(predOps(ARMCC::AL)) 2412 .addReg(JumpReg, RegState::Kill); 2413 2414 for (const MachineOperand &MO : llvm::drop_begin(MI.operands())) 2415 NewCall->addOperand(MO); 2416 if (MI.isCandidateForAdditionalCallInfo()) 2417 MI.getMF()->moveAdditionalCallInfo(&MI, NewCall.getInstr()); 2418 2419 CMSERestoreFPRegs(MBB, MBBI, DL, OriginalClearRegs); // restore FP registers 2420 2421 CMSEPopCalleeSaves(*TII, MBB, MBBI, JumpReg, AFI->isThumb1OnlyFunction()); 2422 2423 MI.eraseFromParent(); 2424 return true; 2425 } 2426 case ARM::VMOVHcc: 2427 case ARM::VMOVScc: 2428 case ARM::VMOVDcc: { 2429 unsigned newOpc = Opcode != ARM::VMOVDcc ? ARM::VMOVS : ARM::VMOVD; 2430 BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(newOpc), 2431 MI.getOperand(1).getReg()) 2432 .add(MI.getOperand(2)) 2433 .addImm(MI.getOperand(3).getImm()) // 'pred' 2434 .add(MI.getOperand(4)) 2435 .add(makeImplicit(MI.getOperand(1))); 2436 2437 MI.eraseFromParent(); 2438 return true; 2439 } 2440 case ARM::t2MOVCCr: 2441 case ARM::MOVCCr: { 2442 unsigned Opc = AFI->isThumbFunction() ? ARM::t2MOVr : ARM::MOVr; 2443 BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(Opc), 2444 MI.getOperand(1).getReg()) 2445 .add(MI.getOperand(2)) 2446 .addImm(MI.getOperand(3).getImm()) // 'pred' 2447 .add(MI.getOperand(4)) 2448 .add(condCodeOp()) // 's' bit 2449 .add(makeImplicit(MI.getOperand(1))); 2450 2451 MI.eraseFromParent(); 2452 return true; 2453 } 2454 case ARM::MOVCCsi: { 2455 BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(ARM::MOVsi), 2456 (MI.getOperand(1).getReg())) 2457 .add(MI.getOperand(2)) 2458 .addImm(MI.getOperand(3).getImm()) 2459 .addImm(MI.getOperand(4).getImm()) // 'pred' 2460 .add(MI.getOperand(5)) 2461 .add(condCodeOp()) // 's' bit 2462 .add(makeImplicit(MI.getOperand(1))); 2463 2464 MI.eraseFromParent(); 2465 return true; 2466 } 2467 case ARM::MOVCCsr: { 2468 BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(ARM::MOVsr), 2469 (MI.getOperand(1).getReg())) 2470 .add(MI.getOperand(2)) 2471 .add(MI.getOperand(3)) 2472 .addImm(MI.getOperand(4).getImm()) 2473 .addImm(MI.getOperand(5).getImm()) // 'pred' 2474 .add(MI.getOperand(6)) 2475 .add(condCodeOp()) // 's' bit 2476 .add(makeImplicit(MI.getOperand(1))); 2477 2478 MI.eraseFromParent(); 2479 return true; 2480 } 2481 case ARM::t2MOVCCi16: 2482 case ARM::MOVCCi16: { 2483 unsigned NewOpc = AFI->isThumbFunction() ? ARM::t2MOVi16 : ARM::MOVi16; 2484 BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(NewOpc), 2485 MI.getOperand(1).getReg()) 2486 .addImm(MI.getOperand(2).getImm()) 2487 .addImm(MI.getOperand(3).getImm()) // 'pred' 2488 .add(MI.getOperand(4)) 2489 .add(makeImplicit(MI.getOperand(1))); 2490 MI.eraseFromParent(); 2491 return true; 2492 } 2493 case ARM::t2MOVCCi: 2494 case ARM::MOVCCi: { 2495 unsigned Opc = AFI->isThumbFunction() ? ARM::t2MOVi : ARM::MOVi; 2496 BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(Opc), 2497 MI.getOperand(1).getReg()) 2498 .addImm(MI.getOperand(2).getImm()) 2499 .addImm(MI.getOperand(3).getImm()) // 'pred' 2500 .add(MI.getOperand(4)) 2501 .add(condCodeOp()) // 's' bit 2502 .add(makeImplicit(MI.getOperand(1))); 2503 2504 MI.eraseFromParent(); 2505 return true; 2506 } 2507 case ARM::t2MVNCCi: 2508 case ARM::MVNCCi: { 2509 unsigned Opc = AFI->isThumbFunction() ? ARM::t2MVNi : ARM::MVNi; 2510 BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(Opc), 2511 MI.getOperand(1).getReg()) 2512 .addImm(MI.getOperand(2).getImm()) 2513 .addImm(MI.getOperand(3).getImm()) // 'pred' 2514 .add(MI.getOperand(4)) 2515 .add(condCodeOp()) // 's' bit 2516 .add(makeImplicit(MI.getOperand(1))); 2517 2518 MI.eraseFromParent(); 2519 return true; 2520 } 2521 case ARM::t2MOVCClsl: 2522 case ARM::t2MOVCClsr: 2523 case ARM::t2MOVCCasr: 2524 case ARM::t2MOVCCror: { 2525 unsigned NewOpc; 2526 switch (Opcode) { 2527 case ARM::t2MOVCClsl: NewOpc = ARM::t2LSLri; break; 2528 case ARM::t2MOVCClsr: NewOpc = ARM::t2LSRri; break; 2529 case ARM::t2MOVCCasr: NewOpc = ARM::t2ASRri; break; 2530 case ARM::t2MOVCCror: NewOpc = ARM::t2RORri; break; 2531 default: llvm_unreachable("unexpeced conditional move"); 2532 } 2533 BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(NewOpc), 2534 MI.getOperand(1).getReg()) 2535 .add(MI.getOperand(2)) 2536 .addImm(MI.getOperand(3).getImm()) 2537 .addImm(MI.getOperand(4).getImm()) // 'pred' 2538 .add(MI.getOperand(5)) 2539 .add(condCodeOp()) // 's' bit 2540 .add(makeImplicit(MI.getOperand(1))); 2541 MI.eraseFromParent(); 2542 return true; 2543 } 2544 case ARM::Int_eh_sjlj_dispatchsetup: { 2545 MachineFunction &MF = *MI.getParent()->getParent(); 2546 const ARMBaseInstrInfo *AII = 2547 static_cast<const ARMBaseInstrInfo*>(TII); 2548 const ARMBaseRegisterInfo &RI = AII->getRegisterInfo(); 2549 // For functions using a base pointer, we rematerialize it (via the frame 2550 // pointer) here since eh.sjlj.setjmp and eh.sjlj.longjmp don't do it 2551 // for us. Otherwise, expand to nothing. 2552 if (RI.hasBasePointer(MF)) { 2553 int32_t NumBytes = AFI->getFramePtrSpillOffset(); 2554 Register FramePtr = RI.getFrameRegister(MF); 2555 assert(MF.getSubtarget().getFrameLowering()->hasFP(MF) && 2556 "base pointer without frame pointer?"); 2557 2558 if (AFI->isThumb2Function()) { 2559 emitT2RegPlusImmediate(MBB, MBBI, MI.getDebugLoc(), ARM::R6, 2560 FramePtr, -NumBytes, ARMCC::AL, 0, *TII); 2561 } else if (AFI->isThumbFunction()) { 2562 emitThumbRegPlusImmediate(MBB, MBBI, MI.getDebugLoc(), ARM::R6, 2563 FramePtr, -NumBytes, *TII, RI); 2564 } else { 2565 emitARMRegPlusImmediate(MBB, MBBI, MI.getDebugLoc(), ARM::R6, 2566 FramePtr, -NumBytes, ARMCC::AL, 0, 2567 *TII); 2568 } 2569 // If there's dynamic realignment, adjust for it. 2570 if (RI.hasStackRealignment(MF)) { 2571 MachineFrameInfo &MFI = MF.getFrameInfo(); 2572 Align MaxAlign = MFI.getMaxAlign(); 2573 assert (!AFI->isThumb1OnlyFunction()); 2574 // Emit bic r6, r6, MaxAlign 2575 assert(MaxAlign <= Align(256) && 2576 "The BIC instruction cannot encode " 2577 "immediates larger than 256 with all lower " 2578 "bits set."); 2579 unsigned bicOpc = AFI->isThumbFunction() ? 2580 ARM::t2BICri : ARM::BICri; 2581 BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(bicOpc), ARM::R6) 2582 .addReg(ARM::R6, RegState::Kill) 2583 .addImm(MaxAlign.value() - 1) 2584 .add(predOps(ARMCC::AL)) 2585 .add(condCodeOp()); 2586 } 2587 } 2588 MI.eraseFromParent(); 2589 return true; 2590 } 2591 2592 case ARM::LSRs1: 2593 case ARM::ASRs1: { 2594 // These are just fancy MOVs instructions. 2595 BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(ARM::MOVsi), 2596 MI.getOperand(0).getReg()) 2597 .add(MI.getOperand(1)) 2598 .addImm(ARM_AM::getSORegOpc( 2599 (Opcode == ARM::LSRs1 ? ARM_AM::lsr : ARM_AM::asr), 1)) 2600 .add(predOps(ARMCC::AL)) 2601 .addReg(ARM::CPSR, RegState::Define); 2602 MI.eraseFromParent(); 2603 return true; 2604 } 2605 case ARM::RRX: { 2606 // This encodes as "MOVs Rd, Rm, rrx 2607 BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(ARM::MOVsi), 2608 MI.getOperand(0).getReg()) 2609 .add(MI.getOperand(1)) 2610 .addImm(ARM_AM::getSORegOpc(ARM_AM::rrx, 0)) 2611 .add(predOps(ARMCC::AL)) 2612 .add(condCodeOp()) 2613 .copyImplicitOps(MI); 2614 MI.eraseFromParent(); 2615 return true; 2616 } 2617 case ARM::tTPsoft: 2618 case ARM::TPsoft: { 2619 const bool Thumb = Opcode == ARM::tTPsoft; 2620 2621 MachineInstrBuilder MIB; 2622 MachineFunction *MF = MBB.getParent(); 2623 if (STI->genLongCalls()) { 2624 MachineConstantPool *MCP = MF->getConstantPool(); 2625 unsigned PCLabelID = AFI->createPICLabelUId(); 2626 MachineConstantPoolValue *CPV = 2627 ARMConstantPoolSymbol::Create(MF->getFunction().getContext(), 2628 "__aeabi_read_tp", PCLabelID, 0); 2629 Register Reg = MI.getOperand(0).getReg(); 2630 MIB = 2631 BuildMI(MBB, MBBI, MI.getDebugLoc(), 2632 TII->get(Thumb ? ARM::tLDRpci : ARM::LDRi12), Reg) 2633 .addConstantPoolIndex(MCP->getConstantPoolIndex(CPV, Align(4))); 2634 if (!Thumb) 2635 MIB.addImm(0); 2636 MIB.add(predOps(ARMCC::AL)); 2637 2638 MIB = 2639 BuildMI(MBB, MBBI, MI.getDebugLoc(), 2640 TII->get(Thumb ? gettBLXrOpcode(*MF) : getBLXOpcode(*MF))); 2641 if (Thumb) 2642 MIB.add(predOps(ARMCC::AL)); 2643 MIB.addReg(Reg, RegState::Kill); 2644 } else { 2645 MIB = BuildMI(MBB, MBBI, MI.getDebugLoc(), 2646 TII->get(Thumb ? ARM::tBL : ARM::BL)); 2647 if (Thumb) 2648 MIB.add(predOps(ARMCC::AL)); 2649 MIB.addExternalSymbol("__aeabi_read_tp", 0); 2650 } 2651 2652 MIB.cloneMemRefs(MI); 2653 MIB.copyImplicitOps(MI); 2654 // Update the call info. 2655 if (MI.isCandidateForAdditionalCallInfo()) 2656 MF->moveAdditionalCallInfo(&MI, &*MIB); 2657 MI.eraseFromParent(); 2658 return true; 2659 } 2660 case ARM::tLDRpci_pic: 2661 case ARM::t2LDRpci_pic: { 2662 unsigned NewLdOpc = (Opcode == ARM::tLDRpci_pic) 2663 ? ARM::tLDRpci : ARM::t2LDRpci; 2664 Register DstReg = MI.getOperand(0).getReg(); 2665 bool DstIsDead = MI.getOperand(0).isDead(); 2666 BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(NewLdOpc), DstReg) 2667 .add(MI.getOperand(1)) 2668 .add(predOps(ARMCC::AL)) 2669 .cloneMemRefs(MI) 2670 .copyImplicitOps(MI); 2671 BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(ARM::tPICADD)) 2672 .addReg(DstReg, RegState::Define | getDeadRegState(DstIsDead)) 2673 .addReg(DstReg) 2674 .add(MI.getOperand(2)) 2675 .copyImplicitOps(MI); 2676 MI.eraseFromParent(); 2677 return true; 2678 } 2679 2680 case ARM::LDRLIT_ga_abs: 2681 case ARM::LDRLIT_ga_pcrel: 2682 case ARM::LDRLIT_ga_pcrel_ldr: 2683 case ARM::tLDRLIT_ga_abs: 2684 case ARM::t2LDRLIT_ga_pcrel: 2685 case ARM::tLDRLIT_ga_pcrel: { 2686 Register DstReg = MI.getOperand(0).getReg(); 2687 bool DstIsDead = MI.getOperand(0).isDead(); 2688 const MachineOperand &MO1 = MI.getOperand(1); 2689 auto Flags = MO1.getTargetFlags(); 2690 const GlobalValue *GV = MO1.getGlobal(); 2691 bool IsARM = Opcode != ARM::tLDRLIT_ga_pcrel && 2692 Opcode != ARM::tLDRLIT_ga_abs && 2693 Opcode != ARM::t2LDRLIT_ga_pcrel; 2694 bool IsPIC = 2695 Opcode != ARM::LDRLIT_ga_abs && Opcode != ARM::tLDRLIT_ga_abs; 2696 unsigned LDRLITOpc = IsARM ? ARM::LDRi12 : ARM::tLDRpci; 2697 if (Opcode == ARM::t2LDRLIT_ga_pcrel) 2698 LDRLITOpc = ARM::t2LDRpci; 2699 unsigned PICAddOpc = 2700 IsARM 2701 ? (Opcode == ARM::LDRLIT_ga_pcrel_ldr ? ARM::PICLDR : ARM::PICADD) 2702 : ARM::tPICADD; 2703 2704 // We need a new const-pool entry to load from. 2705 MachineConstantPool *MCP = MBB.getParent()->getConstantPool(); 2706 unsigned ARMPCLabelIndex = 0; 2707 MachineConstantPoolValue *CPV; 2708 2709 if (IsPIC) { 2710 unsigned PCAdj = IsARM ? 8 : 4; 2711 auto Modifier = (Flags & ARMII::MO_GOT) 2712 ? ARMCP::GOT_PREL 2713 : ARMCP::no_modifier; 2714 ARMPCLabelIndex = AFI->createPICLabelUId(); 2715 CPV = ARMConstantPoolConstant::Create( 2716 GV, ARMPCLabelIndex, ARMCP::CPValue, PCAdj, Modifier, 2717 /*AddCurrentAddr*/ Modifier == ARMCP::GOT_PREL); 2718 } else 2719 CPV = ARMConstantPoolConstant::Create(GV, ARMCP::no_modifier); 2720 2721 MachineInstrBuilder MIB = 2722 BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(LDRLITOpc), DstReg) 2723 .addConstantPoolIndex(MCP->getConstantPoolIndex(CPV, Align(4))); 2724 if (IsARM) 2725 MIB.addImm(0); 2726 MIB.add(predOps(ARMCC::AL)); 2727 2728 if (IsPIC) { 2729 MachineInstrBuilder MIB = 2730 BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(PICAddOpc)) 2731 .addReg(DstReg, RegState::Define | getDeadRegState(DstIsDead)) 2732 .addReg(DstReg) 2733 .addImm(ARMPCLabelIndex); 2734 2735 if (IsARM) 2736 MIB.add(predOps(ARMCC::AL)); 2737 } 2738 2739 MI.eraseFromParent(); 2740 return true; 2741 } 2742 case ARM::MOV_ga_pcrel: 2743 case ARM::MOV_ga_pcrel_ldr: 2744 case ARM::t2MOV_ga_pcrel: { 2745 // Expand into movw + movw. Also "add pc" / ldr [pc] in PIC mode. 2746 unsigned LabelId = AFI->createPICLabelUId(); 2747 Register DstReg = MI.getOperand(0).getReg(); 2748 bool DstIsDead = MI.getOperand(0).isDead(); 2749 const MachineOperand &MO1 = MI.getOperand(1); 2750 const GlobalValue *GV = MO1.getGlobal(); 2751 unsigned TF = MO1.getTargetFlags(); 2752 bool isARM = Opcode != ARM::t2MOV_ga_pcrel; 2753 unsigned LO16Opc = isARM ? ARM::MOVi16_ga_pcrel : ARM::t2MOVi16_ga_pcrel; 2754 unsigned HI16Opc = isARM ? ARM::MOVTi16_ga_pcrel :ARM::t2MOVTi16_ga_pcrel; 2755 unsigned LO16TF = TF | ARMII::MO_LO16; 2756 unsigned HI16TF = TF | ARMII::MO_HI16; 2757 unsigned PICAddOpc = isARM 2758 ? (Opcode == ARM::MOV_ga_pcrel_ldr ? ARM::PICLDR : ARM::PICADD) 2759 : ARM::tPICADD; 2760 BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(LO16Opc), DstReg) 2761 .addGlobalAddress(GV, MO1.getOffset(), TF | LO16TF) 2762 .addImm(LabelId) 2763 .copyImplicitOps(MI); 2764 2765 BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(HI16Opc), DstReg) 2766 .addReg(DstReg) 2767 .addGlobalAddress(GV, MO1.getOffset(), TF | HI16TF) 2768 .addImm(LabelId) 2769 .copyImplicitOps(MI); 2770 2771 MachineInstrBuilder MIB3 = BuildMI(MBB, MBBI, MI.getDebugLoc(), 2772 TII->get(PICAddOpc)) 2773 .addReg(DstReg, RegState::Define | getDeadRegState(DstIsDead)) 2774 .addReg(DstReg).addImm(LabelId); 2775 if (isARM) { 2776 MIB3.add(predOps(ARMCC::AL)); 2777 if (Opcode == ARM::MOV_ga_pcrel_ldr) 2778 MIB3.cloneMemRefs(MI); 2779 } 2780 MIB3.copyImplicitOps(MI); 2781 MI.eraseFromParent(); 2782 return true; 2783 } 2784 2785 case ARM::MOVi32imm: 2786 case ARM::MOVCCi32imm: 2787 case ARM::t2MOVi32imm: 2788 case ARM::t2MOVCCi32imm: 2789 ExpandMOV32BitImm(MBB, MBBI); 2790 return true; 2791 2792 case ARM::tMOVi32imm: 2793 ExpandTMOV32BitImm(MBB, MBBI); 2794 return true; 2795 2796 case ARM::tLEApcrelJT: 2797 // Inline jump tables are handled in ARMAsmPrinter. 2798 if (MI.getMF()->getJumpTableInfo()->getEntryKind() == 2799 MachineJumpTableInfo::EK_Inline) 2800 return false; 2801 2802 // Use a 32-bit immediate move to generate the address of the jump table. 2803 assert(STI->isThumb() && "Non-inline jump tables expected only in thumb"); 2804 ExpandTMOV32BitImm(MBB, MBBI); 2805 return true; 2806 2807 case ARM::SUBS_PC_LR: { 2808 BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(ARM::SUBri), ARM::PC) 2809 .addReg(ARM::LR) 2810 .add(MI.getOperand(0)) 2811 .add(MI.getOperand(1)) 2812 .add(MI.getOperand(2)) 2813 .addReg(ARM::CPSR, RegState::Undef) 2814 .copyImplicitOps(MI); 2815 MI.eraseFromParent(); 2816 return true; 2817 } 2818 case ARM::VLDMQIA: { 2819 unsigned NewOpc = ARM::VLDMDIA; 2820 MachineInstrBuilder MIB = 2821 BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(NewOpc)); 2822 unsigned OpIdx = 0; 2823 2824 // Grab the Q register destination. 2825 bool DstIsDead = MI.getOperand(OpIdx).isDead(); 2826 Register DstReg = MI.getOperand(OpIdx++).getReg(); 2827 2828 // Copy the source register. 2829 MIB.add(MI.getOperand(OpIdx++)); 2830 2831 // Copy the predicate operands. 2832 MIB.add(MI.getOperand(OpIdx++)); 2833 MIB.add(MI.getOperand(OpIdx++)); 2834 2835 // Add the destination operands (D subregs). 2836 Register D0 = TRI->getSubReg(DstReg, ARM::dsub_0); 2837 Register D1 = TRI->getSubReg(DstReg, ARM::dsub_1); 2838 MIB.addReg(D0, RegState::Define | getDeadRegState(DstIsDead)) 2839 .addReg(D1, RegState::Define | getDeadRegState(DstIsDead)); 2840 2841 // Add an implicit def for the super-register. 2842 MIB.addReg(DstReg, RegState::ImplicitDefine | getDeadRegState(DstIsDead)); 2843 MIB.copyImplicitOps(MI); 2844 MIB.cloneMemRefs(MI); 2845 MI.eraseFromParent(); 2846 return true; 2847 } 2848 2849 case ARM::VSTMQIA: { 2850 unsigned NewOpc = ARM::VSTMDIA; 2851 MachineInstrBuilder MIB = 2852 BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(NewOpc)); 2853 unsigned OpIdx = 0; 2854 2855 // Grab the Q register source. 2856 bool SrcIsKill = MI.getOperand(OpIdx).isKill(); 2857 Register SrcReg = MI.getOperand(OpIdx++).getReg(); 2858 2859 // Copy the destination register. 2860 MachineOperand Dst(MI.getOperand(OpIdx++)); 2861 MIB.add(Dst); 2862 2863 // Copy the predicate operands. 2864 MIB.add(MI.getOperand(OpIdx++)); 2865 MIB.add(MI.getOperand(OpIdx++)); 2866 2867 // Add the source operands (D subregs). 2868 Register D0 = TRI->getSubReg(SrcReg, ARM::dsub_0); 2869 Register D1 = TRI->getSubReg(SrcReg, ARM::dsub_1); 2870 MIB.addReg(D0, SrcIsKill ? RegState::Kill : 0) 2871 .addReg(D1, SrcIsKill ? RegState::Kill : 0); 2872 2873 if (SrcIsKill) // Add an implicit kill for the Q register. 2874 MIB->addRegisterKilled(SrcReg, TRI, true); 2875 2876 MIB.copyImplicitOps(MI); 2877 MIB.cloneMemRefs(MI); 2878 MI.eraseFromParent(); 2879 return true; 2880 } 2881 2882 case ARM::VLD2q8Pseudo: 2883 case ARM::VLD2q16Pseudo: 2884 case ARM::VLD2q32Pseudo: 2885 case ARM::VLD2q8PseudoWB_fixed: 2886 case ARM::VLD2q16PseudoWB_fixed: 2887 case ARM::VLD2q32PseudoWB_fixed: 2888 case ARM::VLD2q8PseudoWB_register: 2889 case ARM::VLD2q16PseudoWB_register: 2890 case ARM::VLD2q32PseudoWB_register: 2891 case ARM::VLD3d8Pseudo: 2892 case ARM::VLD3d16Pseudo: 2893 case ARM::VLD3d32Pseudo: 2894 case ARM::VLD1d8TPseudo: 2895 case ARM::VLD1d8TPseudoWB_fixed: 2896 case ARM::VLD1d8TPseudoWB_register: 2897 case ARM::VLD1d16TPseudo: 2898 case ARM::VLD1d16TPseudoWB_fixed: 2899 case ARM::VLD1d16TPseudoWB_register: 2900 case ARM::VLD1d32TPseudo: 2901 case ARM::VLD1d32TPseudoWB_fixed: 2902 case ARM::VLD1d32TPseudoWB_register: 2903 case ARM::VLD1d64TPseudo: 2904 case ARM::VLD1d64TPseudoWB_fixed: 2905 case ARM::VLD1d64TPseudoWB_register: 2906 case ARM::VLD3d8Pseudo_UPD: 2907 case ARM::VLD3d16Pseudo_UPD: 2908 case ARM::VLD3d32Pseudo_UPD: 2909 case ARM::VLD3q8Pseudo_UPD: 2910 case ARM::VLD3q16Pseudo_UPD: 2911 case ARM::VLD3q32Pseudo_UPD: 2912 case ARM::VLD3q8oddPseudo: 2913 case ARM::VLD3q16oddPseudo: 2914 case ARM::VLD3q32oddPseudo: 2915 case ARM::VLD3q8oddPseudo_UPD: 2916 case ARM::VLD3q16oddPseudo_UPD: 2917 case ARM::VLD3q32oddPseudo_UPD: 2918 case ARM::VLD4d8Pseudo: 2919 case ARM::VLD4d16Pseudo: 2920 case ARM::VLD4d32Pseudo: 2921 case ARM::VLD1d8QPseudo: 2922 case ARM::VLD1d8QPseudoWB_fixed: 2923 case ARM::VLD1d8QPseudoWB_register: 2924 case ARM::VLD1d16QPseudo: 2925 case ARM::VLD1d16QPseudoWB_fixed: 2926 case ARM::VLD1d16QPseudoWB_register: 2927 case ARM::VLD1d32QPseudo: 2928 case ARM::VLD1d32QPseudoWB_fixed: 2929 case ARM::VLD1d32QPseudoWB_register: 2930 case ARM::VLD1d64QPseudo: 2931 case ARM::VLD1d64QPseudoWB_fixed: 2932 case ARM::VLD1d64QPseudoWB_register: 2933 case ARM::VLD1q8HighQPseudo: 2934 case ARM::VLD1q8HighQPseudo_UPD: 2935 case ARM::VLD1q8LowQPseudo_UPD: 2936 case ARM::VLD1q8HighTPseudo: 2937 case ARM::VLD1q8HighTPseudo_UPD: 2938 case ARM::VLD1q8LowTPseudo_UPD: 2939 case ARM::VLD1q16HighQPseudo: 2940 case ARM::VLD1q16HighQPseudo_UPD: 2941 case ARM::VLD1q16LowQPseudo_UPD: 2942 case ARM::VLD1q16HighTPseudo: 2943 case ARM::VLD1q16HighTPseudo_UPD: 2944 case ARM::VLD1q16LowTPseudo_UPD: 2945 case ARM::VLD1q32HighQPseudo: 2946 case ARM::VLD1q32HighQPseudo_UPD: 2947 case ARM::VLD1q32LowQPseudo_UPD: 2948 case ARM::VLD1q32HighTPseudo: 2949 case ARM::VLD1q32HighTPseudo_UPD: 2950 case ARM::VLD1q32LowTPseudo_UPD: 2951 case ARM::VLD1q64HighQPseudo: 2952 case ARM::VLD1q64HighQPseudo_UPD: 2953 case ARM::VLD1q64LowQPseudo_UPD: 2954 case ARM::VLD1q64HighTPseudo: 2955 case ARM::VLD1q64HighTPseudo_UPD: 2956 case ARM::VLD1q64LowTPseudo_UPD: 2957 case ARM::VLD4d8Pseudo_UPD: 2958 case ARM::VLD4d16Pseudo_UPD: 2959 case ARM::VLD4d32Pseudo_UPD: 2960 case ARM::VLD4q8Pseudo_UPD: 2961 case ARM::VLD4q16Pseudo_UPD: 2962 case ARM::VLD4q32Pseudo_UPD: 2963 case ARM::VLD4q8oddPseudo: 2964 case ARM::VLD4q16oddPseudo: 2965 case ARM::VLD4q32oddPseudo: 2966 case ARM::VLD4q8oddPseudo_UPD: 2967 case ARM::VLD4q16oddPseudo_UPD: 2968 case ARM::VLD4q32oddPseudo_UPD: 2969 case ARM::VLD3DUPd8Pseudo: 2970 case ARM::VLD3DUPd16Pseudo: 2971 case ARM::VLD3DUPd32Pseudo: 2972 case ARM::VLD3DUPd8Pseudo_UPD: 2973 case ARM::VLD3DUPd16Pseudo_UPD: 2974 case ARM::VLD3DUPd32Pseudo_UPD: 2975 case ARM::VLD4DUPd8Pseudo: 2976 case ARM::VLD4DUPd16Pseudo: 2977 case ARM::VLD4DUPd32Pseudo: 2978 case ARM::VLD4DUPd8Pseudo_UPD: 2979 case ARM::VLD4DUPd16Pseudo_UPD: 2980 case ARM::VLD4DUPd32Pseudo_UPD: 2981 case ARM::VLD2DUPq8EvenPseudo: 2982 case ARM::VLD2DUPq8OddPseudo: 2983 case ARM::VLD2DUPq16EvenPseudo: 2984 case ARM::VLD2DUPq16OddPseudo: 2985 case ARM::VLD2DUPq32EvenPseudo: 2986 case ARM::VLD2DUPq32OddPseudo: 2987 case ARM::VLD2DUPq8OddPseudoWB_fixed: 2988 case ARM::VLD2DUPq8OddPseudoWB_register: 2989 case ARM::VLD2DUPq16OddPseudoWB_fixed: 2990 case ARM::VLD2DUPq16OddPseudoWB_register: 2991 case ARM::VLD2DUPq32OddPseudoWB_fixed: 2992 case ARM::VLD2DUPq32OddPseudoWB_register: 2993 case ARM::VLD3DUPq8EvenPseudo: 2994 case ARM::VLD3DUPq8OddPseudo: 2995 case ARM::VLD3DUPq16EvenPseudo: 2996 case ARM::VLD3DUPq16OddPseudo: 2997 case ARM::VLD3DUPq32EvenPseudo: 2998 case ARM::VLD3DUPq32OddPseudo: 2999 case ARM::VLD3DUPq8OddPseudo_UPD: 3000 case ARM::VLD3DUPq16OddPseudo_UPD: 3001 case ARM::VLD3DUPq32OddPseudo_UPD: 3002 case ARM::VLD4DUPq8EvenPseudo: 3003 case ARM::VLD4DUPq8OddPseudo: 3004 case ARM::VLD4DUPq16EvenPseudo: 3005 case ARM::VLD4DUPq16OddPseudo: 3006 case ARM::VLD4DUPq32EvenPseudo: 3007 case ARM::VLD4DUPq32OddPseudo: 3008 case ARM::VLD4DUPq8OddPseudo_UPD: 3009 case ARM::VLD4DUPq16OddPseudo_UPD: 3010 case ARM::VLD4DUPq32OddPseudo_UPD: 3011 ExpandVLD(MBBI); 3012 return true; 3013 3014 case ARM::VST2q8Pseudo: 3015 case ARM::VST2q16Pseudo: 3016 case ARM::VST2q32Pseudo: 3017 case ARM::VST2q8PseudoWB_fixed: 3018 case ARM::VST2q16PseudoWB_fixed: 3019 case ARM::VST2q32PseudoWB_fixed: 3020 case ARM::VST2q8PseudoWB_register: 3021 case ARM::VST2q16PseudoWB_register: 3022 case ARM::VST2q32PseudoWB_register: 3023 case ARM::VST3d8Pseudo: 3024 case ARM::VST3d16Pseudo: 3025 case ARM::VST3d32Pseudo: 3026 case ARM::VST1d8TPseudo: 3027 case ARM::VST1d8TPseudoWB_fixed: 3028 case ARM::VST1d8TPseudoWB_register: 3029 case ARM::VST1d16TPseudo: 3030 case ARM::VST1d16TPseudoWB_fixed: 3031 case ARM::VST1d16TPseudoWB_register: 3032 case ARM::VST1d32TPseudo: 3033 case ARM::VST1d32TPseudoWB_fixed: 3034 case ARM::VST1d32TPseudoWB_register: 3035 case ARM::VST1d64TPseudo: 3036 case ARM::VST1d64TPseudoWB_fixed: 3037 case ARM::VST1d64TPseudoWB_register: 3038 case ARM::VST3d8Pseudo_UPD: 3039 case ARM::VST3d16Pseudo_UPD: 3040 case ARM::VST3d32Pseudo_UPD: 3041 case ARM::VST3q8Pseudo_UPD: 3042 case ARM::VST3q16Pseudo_UPD: 3043 case ARM::VST3q32Pseudo_UPD: 3044 case ARM::VST3q8oddPseudo: 3045 case ARM::VST3q16oddPseudo: 3046 case ARM::VST3q32oddPseudo: 3047 case ARM::VST3q8oddPseudo_UPD: 3048 case ARM::VST3q16oddPseudo_UPD: 3049 case ARM::VST3q32oddPseudo_UPD: 3050 case ARM::VST4d8Pseudo: 3051 case ARM::VST4d16Pseudo: 3052 case ARM::VST4d32Pseudo: 3053 case ARM::VST1d8QPseudo: 3054 case ARM::VST1d8QPseudoWB_fixed: 3055 case ARM::VST1d8QPseudoWB_register: 3056 case ARM::VST1d16QPseudo: 3057 case ARM::VST1d16QPseudoWB_fixed: 3058 case ARM::VST1d16QPseudoWB_register: 3059 case ARM::VST1d32QPseudo: 3060 case ARM::VST1d32QPseudoWB_fixed: 3061 case ARM::VST1d32QPseudoWB_register: 3062 case ARM::VST1d64QPseudo: 3063 case ARM::VST1d64QPseudoWB_fixed: 3064 case ARM::VST1d64QPseudoWB_register: 3065 case ARM::VST4d8Pseudo_UPD: 3066 case ARM::VST4d16Pseudo_UPD: 3067 case ARM::VST4d32Pseudo_UPD: 3068 case ARM::VST1q8HighQPseudo: 3069 case ARM::VST1q8LowQPseudo_UPD: 3070 case ARM::VST1q8HighTPseudo: 3071 case ARM::VST1q8LowTPseudo_UPD: 3072 case ARM::VST1q16HighQPseudo: 3073 case ARM::VST1q16LowQPseudo_UPD: 3074 case ARM::VST1q16HighTPseudo: 3075 case ARM::VST1q16LowTPseudo_UPD: 3076 case ARM::VST1q32HighQPseudo: 3077 case ARM::VST1q32LowQPseudo_UPD: 3078 case ARM::VST1q32HighTPseudo: 3079 case ARM::VST1q32LowTPseudo_UPD: 3080 case ARM::VST1q64HighQPseudo: 3081 case ARM::VST1q64LowQPseudo_UPD: 3082 case ARM::VST1q64HighTPseudo: 3083 case ARM::VST1q64LowTPseudo_UPD: 3084 case ARM::VST1q8HighTPseudo_UPD: 3085 case ARM::VST1q16HighTPseudo_UPD: 3086 case ARM::VST1q32HighTPseudo_UPD: 3087 case ARM::VST1q64HighTPseudo_UPD: 3088 case ARM::VST1q8HighQPseudo_UPD: 3089 case ARM::VST1q16HighQPseudo_UPD: 3090 case ARM::VST1q32HighQPseudo_UPD: 3091 case ARM::VST1q64HighQPseudo_UPD: 3092 case ARM::VST4q8Pseudo_UPD: 3093 case ARM::VST4q16Pseudo_UPD: 3094 case ARM::VST4q32Pseudo_UPD: 3095 case ARM::VST4q8oddPseudo: 3096 case ARM::VST4q16oddPseudo: 3097 case ARM::VST4q32oddPseudo: 3098 case ARM::VST4q8oddPseudo_UPD: 3099 case ARM::VST4q16oddPseudo_UPD: 3100 case ARM::VST4q32oddPseudo_UPD: 3101 ExpandVST(MBBI); 3102 return true; 3103 3104 case ARM::VLD1LNq8Pseudo: 3105 case ARM::VLD1LNq16Pseudo: 3106 case ARM::VLD1LNq32Pseudo: 3107 case ARM::VLD1LNq8Pseudo_UPD: 3108 case ARM::VLD1LNq16Pseudo_UPD: 3109 case ARM::VLD1LNq32Pseudo_UPD: 3110 case ARM::VLD2LNd8Pseudo: 3111 case ARM::VLD2LNd16Pseudo: 3112 case ARM::VLD2LNd32Pseudo: 3113 case ARM::VLD2LNq16Pseudo: 3114 case ARM::VLD2LNq32Pseudo: 3115 case ARM::VLD2LNd8Pseudo_UPD: 3116 case ARM::VLD2LNd16Pseudo_UPD: 3117 case ARM::VLD2LNd32Pseudo_UPD: 3118 case ARM::VLD2LNq16Pseudo_UPD: 3119 case ARM::VLD2LNq32Pseudo_UPD: 3120 case ARM::VLD3LNd8Pseudo: 3121 case ARM::VLD3LNd16Pseudo: 3122 case ARM::VLD3LNd32Pseudo: 3123 case ARM::VLD3LNq16Pseudo: 3124 case ARM::VLD3LNq32Pseudo: 3125 case ARM::VLD3LNd8Pseudo_UPD: 3126 case ARM::VLD3LNd16Pseudo_UPD: 3127 case ARM::VLD3LNd32Pseudo_UPD: 3128 case ARM::VLD3LNq16Pseudo_UPD: 3129 case ARM::VLD3LNq32Pseudo_UPD: 3130 case ARM::VLD4LNd8Pseudo: 3131 case ARM::VLD4LNd16Pseudo: 3132 case ARM::VLD4LNd32Pseudo: 3133 case ARM::VLD4LNq16Pseudo: 3134 case ARM::VLD4LNq32Pseudo: 3135 case ARM::VLD4LNd8Pseudo_UPD: 3136 case ARM::VLD4LNd16Pseudo_UPD: 3137 case ARM::VLD4LNd32Pseudo_UPD: 3138 case ARM::VLD4LNq16Pseudo_UPD: 3139 case ARM::VLD4LNq32Pseudo_UPD: 3140 case ARM::VST1LNq8Pseudo: 3141 case ARM::VST1LNq16Pseudo: 3142 case ARM::VST1LNq32Pseudo: 3143 case ARM::VST1LNq8Pseudo_UPD: 3144 case ARM::VST1LNq16Pseudo_UPD: 3145 case ARM::VST1LNq32Pseudo_UPD: 3146 case ARM::VST2LNd8Pseudo: 3147 case ARM::VST2LNd16Pseudo: 3148 case ARM::VST2LNd32Pseudo: 3149 case ARM::VST2LNq16Pseudo: 3150 case ARM::VST2LNq32Pseudo: 3151 case ARM::VST2LNd8Pseudo_UPD: 3152 case ARM::VST2LNd16Pseudo_UPD: 3153 case ARM::VST2LNd32Pseudo_UPD: 3154 case ARM::VST2LNq16Pseudo_UPD: 3155 case ARM::VST2LNq32Pseudo_UPD: 3156 case ARM::VST3LNd8Pseudo: 3157 case ARM::VST3LNd16Pseudo: 3158 case ARM::VST3LNd32Pseudo: 3159 case ARM::VST3LNq16Pseudo: 3160 case ARM::VST3LNq32Pseudo: 3161 case ARM::VST3LNd8Pseudo_UPD: 3162 case ARM::VST3LNd16Pseudo_UPD: 3163 case ARM::VST3LNd32Pseudo_UPD: 3164 case ARM::VST3LNq16Pseudo_UPD: 3165 case ARM::VST3LNq32Pseudo_UPD: 3166 case ARM::VST4LNd8Pseudo: 3167 case ARM::VST4LNd16Pseudo: 3168 case ARM::VST4LNd32Pseudo: 3169 case ARM::VST4LNq16Pseudo: 3170 case ARM::VST4LNq32Pseudo: 3171 case ARM::VST4LNd8Pseudo_UPD: 3172 case ARM::VST4LNd16Pseudo_UPD: 3173 case ARM::VST4LNd32Pseudo_UPD: 3174 case ARM::VST4LNq16Pseudo_UPD: 3175 case ARM::VST4LNq32Pseudo_UPD: 3176 ExpandLaneOp(MBBI); 3177 return true; 3178 3179 case ARM::VTBL3Pseudo: ExpandVTBL(MBBI, ARM::VTBL3, false); return true; 3180 case ARM::VTBL4Pseudo: ExpandVTBL(MBBI, ARM::VTBL4, false); return true; 3181 case ARM::VTBX3Pseudo: ExpandVTBL(MBBI, ARM::VTBX3, true); return true; 3182 case ARM::VTBX4Pseudo: ExpandVTBL(MBBI, ARM::VTBX4, true); return true; 3183 3184 case ARM::MQQPRLoad: 3185 case ARM::MQQPRStore: 3186 case ARM::MQQQQPRLoad: 3187 case ARM::MQQQQPRStore: 3188 ExpandMQQPRLoadStore(MBBI); 3189 return true; 3190 3191 case ARM::tCMP_SWAP_8: 3192 assert(STI->isThumb()); 3193 return ExpandCMP_SWAP(MBB, MBBI, ARM::t2LDREXB, ARM::t2STREXB, ARM::tUXTB, 3194 NextMBBI); 3195 case ARM::tCMP_SWAP_16: 3196 assert(STI->isThumb()); 3197 return ExpandCMP_SWAP(MBB, MBBI, ARM::t2LDREXH, ARM::t2STREXH, ARM::tUXTH, 3198 NextMBBI); 3199 case ARM::tCMP_SWAP_32: 3200 assert(STI->isThumb()); 3201 return ExpandCMP_SWAP(MBB, MBBI, ARM::t2LDREX, ARM::t2STREX, 0, NextMBBI); 3202 3203 case ARM::CMP_SWAP_8: 3204 assert(!STI->isThumb()); 3205 return ExpandCMP_SWAP(MBB, MBBI, ARM::LDREXB, ARM::STREXB, ARM::UXTB, 3206 NextMBBI); 3207 case ARM::CMP_SWAP_16: 3208 assert(!STI->isThumb()); 3209 return ExpandCMP_SWAP(MBB, MBBI, ARM::LDREXH, ARM::STREXH, ARM::UXTH, 3210 NextMBBI); 3211 case ARM::CMP_SWAP_32: 3212 assert(!STI->isThumb()); 3213 return ExpandCMP_SWAP(MBB, MBBI, ARM::LDREX, ARM::STREX, 0, NextMBBI); 3214 3215 case ARM::CMP_SWAP_64: 3216 return ExpandCMP_SWAP_64(MBB, MBBI, NextMBBI); 3217 3218 case ARM::tBL_PUSHLR: 3219 case ARM::BL_PUSHLR: { 3220 const bool Thumb = Opcode == ARM::tBL_PUSHLR; 3221 Register Reg = MI.getOperand(0).getReg(); 3222 assert(Reg == ARM::LR && "expect LR register!"); 3223 MachineInstrBuilder MIB; 3224 if (Thumb) { 3225 // push {lr} 3226 BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(ARM::tPUSH)) 3227 .add(predOps(ARMCC::AL)) 3228 .addReg(Reg); 3229 3230 // bl __gnu_mcount_nc 3231 MIB = BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(ARM::tBL)); 3232 } else { 3233 // stmdb sp!, {lr} 3234 BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(ARM::STMDB_UPD)) 3235 .addReg(ARM::SP, RegState::Define) 3236 .addReg(ARM::SP) 3237 .add(predOps(ARMCC::AL)) 3238 .addReg(Reg); 3239 3240 // bl __gnu_mcount_nc 3241 MIB = BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(ARM::BL)); 3242 } 3243 MIB.cloneMemRefs(MI); 3244 for (const MachineOperand &MO : llvm::drop_begin(MI.operands())) 3245 MIB.add(MO); 3246 MI.eraseFromParent(); 3247 return true; 3248 } 3249 case ARM::t2CALL_BTI: { 3250 MachineFunction &MF = *MI.getMF(); 3251 MachineInstrBuilder MIB = 3252 BuildMI(MF, MI.getDebugLoc(), TII->get(ARM::tBL)); 3253 MIB.cloneMemRefs(MI); 3254 for (unsigned i = 0; i < MI.getNumOperands(); ++i) 3255 MIB.add(MI.getOperand(i)); 3256 if (MI.isCandidateForAdditionalCallInfo()) 3257 MF.moveAdditionalCallInfo(&MI, MIB.getInstr()); 3258 MIBundleBuilder Bundler(MBB, MI); 3259 Bundler.append(MIB); 3260 Bundler.append(BuildMI(MF, MI.getDebugLoc(), TII->get(ARM::t2BTI))); 3261 finalizeBundle(MBB, Bundler.begin(), Bundler.end()); 3262 MI.eraseFromParent(); 3263 return true; 3264 } 3265 case ARM::LOADDUAL: 3266 case ARM::STOREDUAL: { 3267 Register PairReg = MI.getOperand(0).getReg(); 3268 3269 MachineInstrBuilder MIB = 3270 BuildMI(MBB, MBBI, MI.getDebugLoc(), 3271 TII->get(Opcode == ARM::LOADDUAL ? ARM::LDRD : ARM::STRD)) 3272 .addReg(TRI->getSubReg(PairReg, ARM::gsub_0), 3273 Opcode == ARM::LOADDUAL ? RegState::Define : 0) 3274 .addReg(TRI->getSubReg(PairReg, ARM::gsub_1), 3275 Opcode == ARM::LOADDUAL ? RegState::Define : 0); 3276 for (const MachineOperand &MO : llvm::drop_begin(MI.operands())) 3277 MIB.add(MO); 3278 MIB.add(predOps(ARMCC::AL)); 3279 MIB.cloneMemRefs(MI); 3280 MI.eraseFromParent(); 3281 return true; 3282 } 3283 } 3284 } 3285 3286 bool ARMExpandPseudo::ExpandMBB(MachineBasicBlock &MBB) { 3287 bool Modified = false; 3288 3289 MachineBasicBlock::iterator MBBI = MBB.begin(), E = MBB.end(); 3290 while (MBBI != E) { 3291 MachineBasicBlock::iterator NMBBI = std::next(MBBI); 3292 Modified |= ExpandMI(MBB, MBBI, NMBBI); 3293 MBBI = NMBBI; 3294 } 3295 3296 return Modified; 3297 } 3298 3299 bool ARMExpandPseudo::runOnMachineFunction(MachineFunction &MF) { 3300 STI = &MF.getSubtarget<ARMSubtarget>(); 3301 TII = STI->getInstrInfo(); 3302 TRI = STI->getRegisterInfo(); 3303 AFI = MF.getInfo<ARMFunctionInfo>(); 3304 3305 LLVM_DEBUG(dbgs() << "********** ARM EXPAND PSEUDO INSTRUCTIONS **********\n" 3306 << "********** Function: " << MF.getName() << '\n'); 3307 3308 bool Modified = false; 3309 for (MachineBasicBlock &MBB : MF) 3310 Modified |= ExpandMBB(MBB); 3311 if (VerifyARMPseudo) 3312 MF.verify(this, "After expanding ARM pseudo instructions."); 3313 3314 LLVM_DEBUG(dbgs() << "***************************************************\n"); 3315 return Modified; 3316 } 3317 3318 /// createARMExpandPseudoPass - returns an instance of the pseudo instruction 3319 /// expansion pass. 3320 FunctionPass *llvm::createARMExpandPseudoPass() { 3321 return new ARMExpandPseudo(); 3322 } 3323