xref: /netbsd-src/external/apache2/llvm/dist/llvm/lib/Target/ARM/ARMExpandPseudoInsts.cpp (revision 82d56013d7b633d116a93943de88e08335357a7c)
1 //===-- ARMExpandPseudoInsts.cpp - Expand pseudo instructions -------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This file contains a pass that expands pseudo instructions into target
10 // instructions to allow proper scheduling, if-conversion, and other late
11 // optimizations. This pass should be run after register allocation but before
12 // the post-regalloc scheduling pass.
13 //
14 //===----------------------------------------------------------------------===//
15 
16 #include "ARM.h"
17 #include "ARMBaseInstrInfo.h"
18 #include "ARMBaseRegisterInfo.h"
19 #include "ARMConstantPoolValue.h"
20 #include "ARMMachineFunctionInfo.h"
21 #include "ARMSubtarget.h"
22 #include "MCTargetDesc/ARMAddressingModes.h"
23 #include "llvm/CodeGen/LivePhysRegs.h"
24 #include "llvm/CodeGen/MachineFrameInfo.h"
25 #include "llvm/CodeGen/MachineFunctionPass.h"
26 #include "llvm/Support/Debug.h"
27 
28 using namespace llvm;
29 
30 #define DEBUG_TYPE "arm-pseudo"
31 
32 static cl::opt<bool>
33 VerifyARMPseudo("verify-arm-pseudo-expand", cl::Hidden,
34                 cl::desc("Verify machine code after expanding ARM pseudos"));
35 
36 #define ARM_EXPAND_PSEUDO_NAME "ARM pseudo instruction expansion pass"
37 
38 namespace {
39   class ARMExpandPseudo : public MachineFunctionPass {
40   public:
41     static char ID;
ARMExpandPseudo()42     ARMExpandPseudo() : MachineFunctionPass(ID) {}
43 
44     const ARMBaseInstrInfo *TII;
45     const TargetRegisterInfo *TRI;
46     const ARMSubtarget *STI;
47     ARMFunctionInfo *AFI;
48 
49     bool runOnMachineFunction(MachineFunction &Fn) override;
50 
getRequiredProperties() const51     MachineFunctionProperties getRequiredProperties() const override {
52       return MachineFunctionProperties().set(
53           MachineFunctionProperties::Property::NoVRegs);
54     }
55 
getPassName() const56     StringRef getPassName() const override {
57       return ARM_EXPAND_PSEUDO_NAME;
58     }
59 
60   private:
61     void TransferImpOps(MachineInstr &OldMI,
62                         MachineInstrBuilder &UseMI, MachineInstrBuilder &DefMI);
63     bool ExpandMI(MachineBasicBlock &MBB,
64                   MachineBasicBlock::iterator MBBI,
65                   MachineBasicBlock::iterator &NextMBBI);
66     bool ExpandMBB(MachineBasicBlock &MBB);
67     void ExpandVLD(MachineBasicBlock::iterator &MBBI);
68     void ExpandVST(MachineBasicBlock::iterator &MBBI);
69     void ExpandLaneOp(MachineBasicBlock::iterator &MBBI);
70     void ExpandVTBL(MachineBasicBlock::iterator &MBBI,
71                     unsigned Opc, bool IsExt);
72     void ExpandMOV32BitImm(MachineBasicBlock &MBB,
73                            MachineBasicBlock::iterator &MBBI);
74     void CMSEClearGPRegs(MachineBasicBlock &MBB,
75                          MachineBasicBlock::iterator MBBI, const DebugLoc &DL,
76                          const SmallVectorImpl<unsigned> &ClearRegs,
77                          unsigned ClobberReg);
78     MachineBasicBlock &CMSEClearFPRegs(MachineBasicBlock &MBB,
79                                        MachineBasicBlock::iterator MBBI);
80     MachineBasicBlock &CMSEClearFPRegsV8(MachineBasicBlock &MBB,
81                                          MachineBasicBlock::iterator MBBI,
82                                          const BitVector &ClearRegs);
83     MachineBasicBlock &CMSEClearFPRegsV81(MachineBasicBlock &MBB,
84                                           MachineBasicBlock::iterator MBBI,
85                                           const BitVector &ClearRegs);
86     void CMSESaveClearFPRegs(MachineBasicBlock &MBB,
87                              MachineBasicBlock::iterator MBBI, DebugLoc &DL,
88                              const LivePhysRegs &LiveRegs,
89                              SmallVectorImpl<unsigned> &AvailableRegs);
90     void CMSESaveClearFPRegsV8(MachineBasicBlock &MBB,
91                                MachineBasicBlock::iterator MBBI, DebugLoc &DL,
92                                const LivePhysRegs &LiveRegs,
93                                SmallVectorImpl<unsigned> &ScratchRegs);
94     void CMSESaveClearFPRegsV81(MachineBasicBlock &MBB,
95                                 MachineBasicBlock::iterator MBBI, DebugLoc &DL,
96                                 const LivePhysRegs &LiveRegs);
97     void CMSERestoreFPRegs(MachineBasicBlock &MBB,
98                            MachineBasicBlock::iterator MBBI, DebugLoc &DL,
99                            SmallVectorImpl<unsigned> &AvailableRegs);
100     void CMSERestoreFPRegsV8(MachineBasicBlock &MBB,
101                              MachineBasicBlock::iterator MBBI, DebugLoc &DL,
102                              SmallVectorImpl<unsigned> &AvailableRegs);
103     void CMSERestoreFPRegsV81(MachineBasicBlock &MBB,
104                               MachineBasicBlock::iterator MBBI, DebugLoc &DL,
105                               SmallVectorImpl<unsigned> &AvailableRegs);
106     bool ExpandCMP_SWAP(MachineBasicBlock &MBB,
107                         MachineBasicBlock::iterator MBBI, unsigned LdrexOp,
108                         unsigned StrexOp, unsigned UxtOp,
109                         MachineBasicBlock::iterator &NextMBBI);
110 
111     bool ExpandCMP_SWAP_64(MachineBasicBlock &MBB,
112                            MachineBasicBlock::iterator MBBI,
113                            MachineBasicBlock::iterator &NextMBBI);
114   };
115   char ARMExpandPseudo::ID = 0;
116 }
117 
INITIALIZE_PASS(ARMExpandPseudo,DEBUG_TYPE,ARM_EXPAND_PSEUDO_NAME,false,false)118 INITIALIZE_PASS(ARMExpandPseudo, DEBUG_TYPE, ARM_EXPAND_PSEUDO_NAME, false,
119                 false)
120 
121 /// TransferImpOps - Transfer implicit operands on the pseudo instruction to
122 /// the instructions created from the expansion.
123 void ARMExpandPseudo::TransferImpOps(MachineInstr &OldMI,
124                                      MachineInstrBuilder &UseMI,
125                                      MachineInstrBuilder &DefMI) {
126   const MCInstrDesc &Desc = OldMI.getDesc();
127   for (unsigned i = Desc.getNumOperands(), e = OldMI.getNumOperands();
128        i != e; ++i) {
129     const MachineOperand &MO = OldMI.getOperand(i);
130     assert(MO.isReg() && MO.getReg());
131     if (MO.isUse())
132       UseMI.add(MO);
133     else
134       DefMI.add(MO);
135   }
136 }
137 
138 namespace {
139   // Constants for register spacing in NEON load/store instructions.
140   // For quad-register load-lane and store-lane pseudo instructors, the
141   // spacing is initially assumed to be EvenDblSpc, and that is changed to
142   // OddDblSpc depending on the lane number operand.
143   enum NEONRegSpacing {
144     SingleSpc,
145     SingleLowSpc ,  // Single spacing, low registers, three and four vectors.
146     SingleHighQSpc, // Single spacing, high registers, four vectors.
147     SingleHighTSpc, // Single spacing, high registers, three vectors.
148     EvenDblSpc,
149     OddDblSpc
150   };
151 
152   // Entries for NEON load/store information table.  The table is sorted by
153   // PseudoOpc for fast binary-search lookups.
154   struct NEONLdStTableEntry {
155     uint16_t PseudoOpc;
156     uint16_t RealOpc;
157     bool IsLoad;
158     bool isUpdating;
159     bool hasWritebackOperand;
160     uint8_t RegSpacing; // One of type NEONRegSpacing
161     uint8_t NumRegs; // D registers loaded or stored
162     uint8_t RegElts; // elements per D register; used for lane ops
163     // FIXME: Temporary flag to denote whether the real instruction takes
164     // a single register (like the encoding) or all of the registers in
165     // the list (like the asm syntax and the isel DAG). When all definitions
166     // are converted to take only the single encoded register, this will
167     // go away.
168     bool copyAllListRegs;
169 
170     // Comparison methods for binary search of the table.
operator <__anond11e20210211::NEONLdStTableEntry171     bool operator<(const NEONLdStTableEntry &TE) const {
172       return PseudoOpc < TE.PseudoOpc;
173     }
operator <(const NEONLdStTableEntry & TE,unsigned PseudoOpc)174     friend bool operator<(const NEONLdStTableEntry &TE, unsigned PseudoOpc) {
175       return TE.PseudoOpc < PseudoOpc;
176     }
operator <(unsigned PseudoOpc,const NEONLdStTableEntry & TE)177     friend bool LLVM_ATTRIBUTE_UNUSED operator<(unsigned PseudoOpc,
178                                                 const NEONLdStTableEntry &TE) {
179       return PseudoOpc < TE.PseudoOpc;
180     }
181   };
182 }
183 
184 static const NEONLdStTableEntry NEONLdStTable[] = {
185 { ARM::VLD1LNq16Pseudo,     ARM::VLD1LNd16,     true, false, false, EvenDblSpc, 1, 4 ,true},
186 { ARM::VLD1LNq16Pseudo_UPD, ARM::VLD1LNd16_UPD, true, true, true,  EvenDblSpc, 1, 4 ,true},
187 { ARM::VLD1LNq32Pseudo,     ARM::VLD1LNd32,     true, false, false, EvenDblSpc, 1, 2 ,true},
188 { ARM::VLD1LNq32Pseudo_UPD, ARM::VLD1LNd32_UPD, true, true, true,  EvenDblSpc, 1, 2 ,true},
189 { ARM::VLD1LNq8Pseudo,      ARM::VLD1LNd8,      true, false, false, EvenDblSpc, 1, 8 ,true},
190 { ARM::VLD1LNq8Pseudo_UPD,  ARM::VLD1LNd8_UPD, true, true, true,  EvenDblSpc, 1, 8 ,true},
191 
192 { ARM::VLD1d16QPseudo,      ARM::VLD1d16Q,     true,  false, false, SingleSpc,  4, 4 ,false},
193 { ARM::VLD1d16TPseudo,      ARM::VLD1d16T,     true,  false, false, SingleSpc,  3, 4 ,false},
194 { ARM::VLD1d32QPseudo,      ARM::VLD1d32Q,     true,  false, false, SingleSpc,  4, 2 ,false},
195 { ARM::VLD1d32TPseudo,      ARM::VLD1d32T,     true,  false, false, SingleSpc,  3, 2 ,false},
196 { ARM::VLD1d64QPseudo,      ARM::VLD1d64Q,     true,  false, false, SingleSpc,  4, 1 ,false},
197 { ARM::VLD1d64QPseudoWB_fixed,  ARM::VLD1d64Qwb_fixed,   true,  true, false, SingleSpc,  4, 1 ,false},
198 { ARM::VLD1d64QPseudoWB_register,  ARM::VLD1d64Qwb_register,   true,  true, true, SingleSpc,  4, 1 ,false},
199 { ARM::VLD1d64TPseudo,      ARM::VLD1d64T,     true,  false, false, SingleSpc,  3, 1 ,false},
200 { ARM::VLD1d64TPseudoWB_fixed,  ARM::VLD1d64Twb_fixed,   true,  true, false, SingleSpc,  3, 1 ,false},
201 { ARM::VLD1d64TPseudoWB_register,  ARM::VLD1d64Twb_register, true, true, true,  SingleSpc,  3, 1 ,false},
202 { ARM::VLD1d8QPseudo,       ARM::VLD1d8Q,      true,  false, false, SingleSpc,  4, 8 ,false},
203 { ARM::VLD1d8TPseudo,       ARM::VLD1d8T,      true,  false, false, SingleSpc,  3, 8 ,false},
204 { ARM::VLD1q16HighQPseudo,  ARM::VLD1d16Q,     true,  false, false, SingleHighQSpc,  4, 4 ,false},
205 { ARM::VLD1q16HighTPseudo,  ARM::VLD1d16T,     true,  false, false, SingleHighTSpc,  3, 4 ,false},
206 { ARM::VLD1q16LowQPseudo_UPD,  ARM::VLD1d16Qwb_fixed,   true,  true, true, SingleLowSpc,  4, 4 ,false},
207 { ARM::VLD1q16LowTPseudo_UPD,  ARM::VLD1d16Twb_fixed,   true,  true, true, SingleLowSpc,  3, 4 ,false},
208 { ARM::VLD1q32HighQPseudo,  ARM::VLD1d32Q,     true,  false, false, SingleHighQSpc,  4, 2 ,false},
209 { ARM::VLD1q32HighTPseudo,  ARM::VLD1d32T,     true,  false, false, SingleHighTSpc,  3, 2 ,false},
210 { ARM::VLD1q32LowQPseudo_UPD,  ARM::VLD1d32Qwb_fixed,   true,  true, true, SingleLowSpc,  4, 2 ,false},
211 { ARM::VLD1q32LowTPseudo_UPD,  ARM::VLD1d32Twb_fixed,   true,  true, true, SingleLowSpc,  3, 2 ,false},
212 { ARM::VLD1q64HighQPseudo,  ARM::VLD1d64Q,     true,  false, false, SingleHighQSpc,  4, 1 ,false},
213 { ARM::VLD1q64HighTPseudo,  ARM::VLD1d64T,     true,  false, false, SingleHighTSpc,  3, 1 ,false},
214 { ARM::VLD1q64LowQPseudo_UPD,  ARM::VLD1d64Qwb_fixed,   true,  true, true, SingleLowSpc,  4, 1 ,false},
215 { ARM::VLD1q64LowTPseudo_UPD,  ARM::VLD1d64Twb_fixed,   true,  true, true, SingleLowSpc,  3, 1 ,false},
216 { ARM::VLD1q8HighQPseudo,   ARM::VLD1d8Q,     true,  false, false, SingleHighQSpc,  4, 8 ,false},
217 { ARM::VLD1q8HighTPseudo,   ARM::VLD1d8T,     true,  false, false, SingleHighTSpc,  3, 8 ,false},
218 { ARM::VLD1q8LowQPseudo_UPD,  ARM::VLD1d8Qwb_fixed,   true,  true, true, SingleLowSpc,  4, 8 ,false},
219 { ARM::VLD1q8LowTPseudo_UPD,  ARM::VLD1d8Twb_fixed,   true,  true, true, SingleLowSpc,  3, 8 ,false},
220 
221 { ARM::VLD2DUPq16EvenPseudo,  ARM::VLD2DUPd16x2,  true, false, false, EvenDblSpc, 2, 4 ,false},
222 { ARM::VLD2DUPq16OddPseudo,   ARM::VLD2DUPd16x2,  true, false, false, OddDblSpc,  2, 4 ,false},
223 { ARM::VLD2DUPq32EvenPseudo,  ARM::VLD2DUPd32x2,  true, false, false, EvenDblSpc, 2, 2 ,false},
224 { ARM::VLD2DUPq32OddPseudo,   ARM::VLD2DUPd32x2,  true, false, false, OddDblSpc,  2, 2 ,false},
225 { ARM::VLD2DUPq8EvenPseudo,   ARM::VLD2DUPd8x2,   true, false, false, EvenDblSpc, 2, 8 ,false},
226 { ARM::VLD2DUPq8OddPseudo,    ARM::VLD2DUPd8x2,   true, false, false, OddDblSpc,  2, 8 ,false},
227 
228 { ARM::VLD2LNd16Pseudo,     ARM::VLD2LNd16,     true, false, false, SingleSpc,  2, 4 ,true},
229 { ARM::VLD2LNd16Pseudo_UPD, ARM::VLD2LNd16_UPD, true, true, true,  SingleSpc,  2, 4 ,true},
230 { ARM::VLD2LNd32Pseudo,     ARM::VLD2LNd32,     true, false, false, SingleSpc,  2, 2 ,true},
231 { ARM::VLD2LNd32Pseudo_UPD, ARM::VLD2LNd32_UPD, true, true, true,  SingleSpc,  2, 2 ,true},
232 { ARM::VLD2LNd8Pseudo,      ARM::VLD2LNd8,      true, false, false, SingleSpc,  2, 8 ,true},
233 { ARM::VLD2LNd8Pseudo_UPD,  ARM::VLD2LNd8_UPD, true, true, true,  SingleSpc,  2, 8 ,true},
234 { ARM::VLD2LNq16Pseudo,     ARM::VLD2LNq16,     true, false, false, EvenDblSpc, 2, 4 ,true},
235 { ARM::VLD2LNq16Pseudo_UPD, ARM::VLD2LNq16_UPD, true, true, true,  EvenDblSpc, 2, 4 ,true},
236 { ARM::VLD2LNq32Pseudo,     ARM::VLD2LNq32,     true, false, false, EvenDblSpc, 2, 2 ,true},
237 { ARM::VLD2LNq32Pseudo_UPD, ARM::VLD2LNq32_UPD, true, true, true,  EvenDblSpc, 2, 2 ,true},
238 
239 { ARM::VLD2q16Pseudo,       ARM::VLD2q16,      true,  false, false, SingleSpc,  4, 4 ,false},
240 { ARM::VLD2q16PseudoWB_fixed,   ARM::VLD2q16wb_fixed, true, true, false,  SingleSpc,  4, 4 ,false},
241 { ARM::VLD2q16PseudoWB_register,   ARM::VLD2q16wb_register, true, true, true,  SingleSpc,  4, 4 ,false},
242 { ARM::VLD2q32Pseudo,       ARM::VLD2q32,      true,  false, false, SingleSpc,  4, 2 ,false},
243 { ARM::VLD2q32PseudoWB_fixed,   ARM::VLD2q32wb_fixed, true, true, false,  SingleSpc,  4, 2 ,false},
244 { ARM::VLD2q32PseudoWB_register,   ARM::VLD2q32wb_register, true, true, true,  SingleSpc,  4, 2 ,false},
245 { ARM::VLD2q8Pseudo,        ARM::VLD2q8,       true,  false, false, SingleSpc,  4, 8 ,false},
246 { ARM::VLD2q8PseudoWB_fixed,    ARM::VLD2q8wb_fixed, true, true, false,  SingleSpc,  4, 8 ,false},
247 { ARM::VLD2q8PseudoWB_register,    ARM::VLD2q8wb_register, true, true, true,  SingleSpc,  4, 8 ,false},
248 
249 { ARM::VLD3DUPd16Pseudo,     ARM::VLD3DUPd16,     true, false, false, SingleSpc, 3, 4,true},
250 { ARM::VLD3DUPd16Pseudo_UPD, ARM::VLD3DUPd16_UPD, true, true, true,  SingleSpc, 3, 4,true},
251 { ARM::VLD3DUPd32Pseudo,     ARM::VLD3DUPd32,     true, false, false, SingleSpc, 3, 2,true},
252 { ARM::VLD3DUPd32Pseudo_UPD, ARM::VLD3DUPd32_UPD, true, true, true,  SingleSpc, 3, 2,true},
253 { ARM::VLD3DUPd8Pseudo,      ARM::VLD3DUPd8,      true, false, false, SingleSpc, 3, 8,true},
254 { ARM::VLD3DUPd8Pseudo_UPD,  ARM::VLD3DUPd8_UPD, true, true, true,  SingleSpc, 3, 8,true},
255 { ARM::VLD3DUPq16EvenPseudo, ARM::VLD3DUPq16,     true, false, false, EvenDblSpc, 3, 4 ,true},
256 { ARM::VLD3DUPq16OddPseudo,  ARM::VLD3DUPq16,     true, false, false, OddDblSpc,  3, 4 ,true},
257 { ARM::VLD3DUPq32EvenPseudo, ARM::VLD3DUPq32,     true, false, false, EvenDblSpc, 3, 2 ,true},
258 { ARM::VLD3DUPq32OddPseudo,  ARM::VLD3DUPq32,     true, false, false, OddDblSpc,  3, 2 ,true},
259 { ARM::VLD3DUPq8EvenPseudo,  ARM::VLD3DUPq8,      true, false, false, EvenDblSpc, 3, 8 ,true},
260 { ARM::VLD3DUPq8OddPseudo,   ARM::VLD3DUPq8,      true, false, false, OddDblSpc,  3, 8 ,true},
261 
262 { ARM::VLD3LNd16Pseudo,     ARM::VLD3LNd16,     true, false, false, SingleSpc,  3, 4 ,true},
263 { ARM::VLD3LNd16Pseudo_UPD, ARM::VLD3LNd16_UPD, true, true, true,  SingleSpc,  3, 4 ,true},
264 { ARM::VLD3LNd32Pseudo,     ARM::VLD3LNd32,     true, false, false, SingleSpc,  3, 2 ,true},
265 { ARM::VLD3LNd32Pseudo_UPD, ARM::VLD3LNd32_UPD, true, true, true,  SingleSpc,  3, 2 ,true},
266 { ARM::VLD3LNd8Pseudo,      ARM::VLD3LNd8,      true, false, false, SingleSpc,  3, 8 ,true},
267 { ARM::VLD3LNd8Pseudo_UPD,  ARM::VLD3LNd8_UPD, true, true, true,  SingleSpc,  3, 8 ,true},
268 { ARM::VLD3LNq16Pseudo,     ARM::VLD3LNq16,     true, false, false, EvenDblSpc, 3, 4 ,true},
269 { ARM::VLD3LNq16Pseudo_UPD, ARM::VLD3LNq16_UPD, true, true, true,  EvenDblSpc, 3, 4 ,true},
270 { ARM::VLD3LNq32Pseudo,     ARM::VLD3LNq32,     true, false, false, EvenDblSpc, 3, 2 ,true},
271 { ARM::VLD3LNq32Pseudo_UPD, ARM::VLD3LNq32_UPD, true, true, true,  EvenDblSpc, 3, 2 ,true},
272 
273 { ARM::VLD3d16Pseudo,       ARM::VLD3d16,      true,  false, false, SingleSpc,  3, 4 ,true},
274 { ARM::VLD3d16Pseudo_UPD,   ARM::VLD3d16_UPD, true, true, true,  SingleSpc,  3, 4 ,true},
275 { ARM::VLD3d32Pseudo,       ARM::VLD3d32,      true,  false, false, SingleSpc,  3, 2 ,true},
276 { ARM::VLD3d32Pseudo_UPD,   ARM::VLD3d32_UPD, true, true, true,  SingleSpc,  3, 2 ,true},
277 { ARM::VLD3d8Pseudo,        ARM::VLD3d8,       true,  false, false, SingleSpc,  3, 8 ,true},
278 { ARM::VLD3d8Pseudo_UPD,    ARM::VLD3d8_UPD, true, true, true,  SingleSpc,  3, 8 ,true},
279 
280 { ARM::VLD3q16Pseudo_UPD,    ARM::VLD3q16_UPD, true, true, true,  EvenDblSpc, 3, 4 ,true},
281 { ARM::VLD3q16oddPseudo,     ARM::VLD3q16,     true,  false, false, OddDblSpc,  3, 4 ,true},
282 { ARM::VLD3q16oddPseudo_UPD, ARM::VLD3q16_UPD, true, true, true,  OddDblSpc,  3, 4 ,true},
283 { ARM::VLD3q32Pseudo_UPD,    ARM::VLD3q32_UPD, true, true, true,  EvenDblSpc, 3, 2 ,true},
284 { ARM::VLD3q32oddPseudo,     ARM::VLD3q32,     true,  false, false, OddDblSpc,  3, 2 ,true},
285 { ARM::VLD3q32oddPseudo_UPD, ARM::VLD3q32_UPD, true, true, true,  OddDblSpc,  3, 2 ,true},
286 { ARM::VLD3q8Pseudo_UPD,     ARM::VLD3q8_UPD, true, true, true,  EvenDblSpc, 3, 8 ,true},
287 { ARM::VLD3q8oddPseudo,      ARM::VLD3q8,      true,  false, false, OddDblSpc,  3, 8 ,true},
288 { ARM::VLD3q8oddPseudo_UPD,  ARM::VLD3q8_UPD, true, true, true,  OddDblSpc,  3, 8 ,true},
289 
290 { ARM::VLD4DUPd16Pseudo,     ARM::VLD4DUPd16,     true, false, false, SingleSpc, 4, 4,true},
291 { ARM::VLD4DUPd16Pseudo_UPD, ARM::VLD4DUPd16_UPD, true, true, true,  SingleSpc, 4, 4,true},
292 { ARM::VLD4DUPd32Pseudo,     ARM::VLD4DUPd32,     true, false, false, SingleSpc, 4, 2,true},
293 { ARM::VLD4DUPd32Pseudo_UPD, ARM::VLD4DUPd32_UPD, true, true, true,  SingleSpc, 4, 2,true},
294 { ARM::VLD4DUPd8Pseudo,      ARM::VLD4DUPd8,      true, false, false, SingleSpc, 4, 8,true},
295 { ARM::VLD4DUPd8Pseudo_UPD,  ARM::VLD4DUPd8_UPD, true, true, true,  SingleSpc, 4, 8,true},
296 { ARM::VLD4DUPq16EvenPseudo, ARM::VLD4DUPq16,     true, false, false, EvenDblSpc, 4, 4 ,true},
297 { ARM::VLD4DUPq16OddPseudo,  ARM::VLD4DUPq16,     true, false, false, OddDblSpc,  4, 4 ,true},
298 { ARM::VLD4DUPq32EvenPseudo, ARM::VLD4DUPq32,     true, false, false, EvenDblSpc, 4, 2 ,true},
299 { ARM::VLD4DUPq32OddPseudo,  ARM::VLD4DUPq32,     true, false, false, OddDblSpc,  4, 2 ,true},
300 { ARM::VLD4DUPq8EvenPseudo,  ARM::VLD4DUPq8,      true, false, false, EvenDblSpc, 4, 8 ,true},
301 { ARM::VLD4DUPq8OddPseudo,   ARM::VLD4DUPq8,      true, false, false, OddDblSpc,  4, 8 ,true},
302 
303 { ARM::VLD4LNd16Pseudo,     ARM::VLD4LNd16,     true, false, false, SingleSpc,  4, 4 ,true},
304 { ARM::VLD4LNd16Pseudo_UPD, ARM::VLD4LNd16_UPD, true, true, true,  SingleSpc,  4, 4 ,true},
305 { ARM::VLD4LNd32Pseudo,     ARM::VLD4LNd32,     true, false, false, SingleSpc,  4, 2 ,true},
306 { ARM::VLD4LNd32Pseudo_UPD, ARM::VLD4LNd32_UPD, true, true, true,  SingleSpc,  4, 2 ,true},
307 { ARM::VLD4LNd8Pseudo,      ARM::VLD4LNd8,      true, false, false, SingleSpc,  4, 8 ,true},
308 { ARM::VLD4LNd8Pseudo_UPD,  ARM::VLD4LNd8_UPD, true, true, true,  SingleSpc,  4, 8 ,true},
309 { ARM::VLD4LNq16Pseudo,     ARM::VLD4LNq16,     true, false, false, EvenDblSpc, 4, 4 ,true},
310 { ARM::VLD4LNq16Pseudo_UPD, ARM::VLD4LNq16_UPD, true, true, true,  EvenDblSpc, 4, 4 ,true},
311 { ARM::VLD4LNq32Pseudo,     ARM::VLD4LNq32,     true, false, false, EvenDblSpc, 4, 2 ,true},
312 { ARM::VLD4LNq32Pseudo_UPD, ARM::VLD4LNq32_UPD, true, true, true,  EvenDblSpc, 4, 2 ,true},
313 
314 { ARM::VLD4d16Pseudo,       ARM::VLD4d16,      true,  false, false, SingleSpc,  4, 4 ,true},
315 { ARM::VLD4d16Pseudo_UPD,   ARM::VLD4d16_UPD, true, true, true,  SingleSpc,  4, 4 ,true},
316 { ARM::VLD4d32Pseudo,       ARM::VLD4d32,      true,  false, false, SingleSpc,  4, 2 ,true},
317 { ARM::VLD4d32Pseudo_UPD,   ARM::VLD4d32_UPD, true, true, true,  SingleSpc,  4, 2 ,true},
318 { ARM::VLD4d8Pseudo,        ARM::VLD4d8,       true,  false, false, SingleSpc,  4, 8 ,true},
319 { ARM::VLD4d8Pseudo_UPD,    ARM::VLD4d8_UPD, true, true, true,  SingleSpc,  4, 8 ,true},
320 
321 { ARM::VLD4q16Pseudo_UPD,    ARM::VLD4q16_UPD, true, true, true,  EvenDblSpc, 4, 4 ,true},
322 { ARM::VLD4q16oddPseudo,     ARM::VLD4q16,     true,  false, false, OddDblSpc,  4, 4 ,true},
323 { ARM::VLD4q16oddPseudo_UPD, ARM::VLD4q16_UPD, true, true, true,  OddDblSpc,  4, 4 ,true},
324 { ARM::VLD4q32Pseudo_UPD,    ARM::VLD4q32_UPD, true, true, true,  EvenDblSpc, 4, 2 ,true},
325 { ARM::VLD4q32oddPseudo,     ARM::VLD4q32,     true,  false, false, OddDblSpc,  4, 2 ,true},
326 { ARM::VLD4q32oddPseudo_UPD, ARM::VLD4q32_UPD, true, true, true,  OddDblSpc,  4, 2 ,true},
327 { ARM::VLD4q8Pseudo_UPD,     ARM::VLD4q8_UPD, true, true, true,  EvenDblSpc, 4, 8 ,true},
328 { ARM::VLD4q8oddPseudo,      ARM::VLD4q8,      true,  false, false, OddDblSpc,  4, 8 ,true},
329 { ARM::VLD4q8oddPseudo_UPD,  ARM::VLD4q8_UPD, true, true, true,  OddDblSpc,  4, 8 ,true},
330 
331 { ARM::VST1LNq16Pseudo,     ARM::VST1LNd16,    false, false, false, EvenDblSpc, 1, 4 ,true},
332 { ARM::VST1LNq16Pseudo_UPD, ARM::VST1LNd16_UPD, false, true, true,  EvenDblSpc, 1, 4 ,true},
333 { ARM::VST1LNq32Pseudo,     ARM::VST1LNd32,    false, false, false, EvenDblSpc, 1, 2 ,true},
334 { ARM::VST1LNq32Pseudo_UPD, ARM::VST1LNd32_UPD, false, true, true,  EvenDblSpc, 1, 2 ,true},
335 { ARM::VST1LNq8Pseudo,      ARM::VST1LNd8,     false, false, false, EvenDblSpc, 1, 8 ,true},
336 { ARM::VST1LNq8Pseudo_UPD,  ARM::VST1LNd8_UPD, false, true, true,  EvenDblSpc, 1, 8 ,true},
337 
338 { ARM::VST1d16QPseudo,      ARM::VST1d16Q,     false, false, false, SingleSpc,  4, 4 ,false},
339 { ARM::VST1d16QPseudoWB_fixed,  ARM::VST1d16Qwb_fixed, false, true, false, SingleSpc,  4, 4 ,false},
340 { ARM::VST1d16QPseudoWB_register, ARM::VST1d16Qwb_register, false, true, true, SingleSpc,  4, 4 ,false},
341 { ARM::VST1d16TPseudo,      ARM::VST1d16T,     false, false, false, SingleSpc,  3, 4 ,false},
342 { ARM::VST1d16TPseudoWB_fixed,  ARM::VST1d16Twb_fixed, false, true, false, SingleSpc,  3, 4 ,false},
343 { ARM::VST1d16TPseudoWB_register, ARM::VST1d16Twb_register, false, true, true, SingleSpc,  3, 4 ,false},
344 
345 { ARM::VST1d32QPseudo,      ARM::VST1d32Q,     false, false, false, SingleSpc,  4, 2 ,false},
346 { ARM::VST1d32QPseudoWB_fixed,  ARM::VST1d32Qwb_fixed, false, true, false, SingleSpc,  4, 2 ,false},
347 { ARM::VST1d32QPseudoWB_register, ARM::VST1d32Qwb_register, false, true, true, SingleSpc,  4, 2 ,false},
348 { ARM::VST1d32TPseudo,      ARM::VST1d32T,     false, false, false, SingleSpc,  3, 2 ,false},
349 { ARM::VST1d32TPseudoWB_fixed,  ARM::VST1d32Twb_fixed, false, true, false, SingleSpc,  3, 2 ,false},
350 { ARM::VST1d32TPseudoWB_register, ARM::VST1d32Twb_register, false, true, true, SingleSpc,  3, 2 ,false},
351 
352 { ARM::VST1d64QPseudo,      ARM::VST1d64Q,     false, false, false, SingleSpc,  4, 1 ,false},
353 { ARM::VST1d64QPseudoWB_fixed,  ARM::VST1d64Qwb_fixed, false, true, false,  SingleSpc,  4, 1 ,false},
354 { ARM::VST1d64QPseudoWB_register, ARM::VST1d64Qwb_register, false, true, true,  SingleSpc,  4, 1 ,false},
355 { ARM::VST1d64TPseudo,      ARM::VST1d64T,     false, false, false, SingleSpc,  3, 1 ,false},
356 { ARM::VST1d64TPseudoWB_fixed,  ARM::VST1d64Twb_fixed, false, true, false,  SingleSpc,  3, 1 ,false},
357 { ARM::VST1d64TPseudoWB_register, ARM::VST1d64Twb_register, false, true, true,  SingleSpc,  3, 1 ,false},
358 
359 { ARM::VST1d8QPseudo,       ARM::VST1d8Q,      false, false, false, SingleSpc,  4, 8 ,false},
360 { ARM::VST1d8QPseudoWB_fixed,   ARM::VST1d8Qwb_fixed, false, true, false, SingleSpc,  4, 8 ,false},
361 { ARM::VST1d8QPseudoWB_register,  ARM::VST1d8Qwb_register, false, true, true, SingleSpc,  4, 8 ,false},
362 { ARM::VST1d8TPseudo,       ARM::VST1d8T,      false, false, false, SingleSpc,  3, 8 ,false},
363 { ARM::VST1d8TPseudoWB_fixed,   ARM::VST1d8Twb_fixed, false, true, false, SingleSpc,  3, 8 ,false},
364 { ARM::VST1d8TPseudoWB_register,  ARM::VST1d8Twb_register, false, true, true, SingleSpc,  3, 8 ,false},
365 
366 { ARM::VST1q16HighQPseudo,  ARM::VST1d16Q,     false, false, false, SingleHighQSpc,   4, 4 ,false},
367 { ARM::VST1q16HighQPseudo_UPD,  ARM::VST1d16Qwb_fixed,  false, true, true, SingleHighQSpc,   4, 8 ,false},
368 { ARM::VST1q16HighTPseudo,  ARM::VST1d16T,     false, false, false, SingleHighTSpc,   3, 4 ,false},
369 { ARM::VST1q16HighTPseudo_UPD,  ARM::VST1d16Twb_fixed,  false, true, true, SingleHighTSpc,   3, 4 ,false},
370 { ARM::VST1q16LowQPseudo_UPD,   ARM::VST1d16Qwb_fixed,  false, true, true, SingleLowSpc,   4, 4 ,false},
371 { ARM::VST1q16LowTPseudo_UPD,   ARM::VST1d16Twb_fixed,  false, true, true, SingleLowSpc,   3, 4 ,false},
372 
373 { ARM::VST1q32HighQPseudo,  ARM::VST1d32Q,     false, false, false, SingleHighQSpc,   4, 2 ,false},
374 { ARM::VST1q32HighQPseudo_UPD,  ARM::VST1d32Qwb_fixed,  false, true, true, SingleHighQSpc,   4, 8 ,false},
375 { ARM::VST1q32HighTPseudo,  ARM::VST1d32T,     false, false, false, SingleHighTSpc,   3, 2 ,false},
376 { ARM::VST1q32HighTPseudo_UPD,  ARM::VST1d32Twb_fixed,  false, true, true, SingleHighTSpc,   3, 2 ,false},
377 { ARM::VST1q32LowQPseudo_UPD,   ARM::VST1d32Qwb_fixed,  false, true, true, SingleLowSpc,   4, 2 ,false},
378 { ARM::VST1q32LowTPseudo_UPD,   ARM::VST1d32Twb_fixed,  false, true, true, SingleLowSpc,   3, 2 ,false},
379 
380 { ARM::VST1q64HighQPseudo,  ARM::VST1d64Q,     false, false, false, SingleHighQSpc,   4, 1 ,false},
381 { ARM::VST1q64HighQPseudo_UPD,  ARM::VST1d64Qwb_fixed,  false, true, true, SingleHighQSpc,   4, 8 ,false},
382 { ARM::VST1q64HighTPseudo,  ARM::VST1d64T,     false, false, false, SingleHighTSpc,   3, 1 ,false},
383 { ARM::VST1q64HighTPseudo_UPD,  ARM::VST1d64Twb_fixed,  false, true, true, SingleHighTSpc,   3, 1 ,false},
384 { ARM::VST1q64LowQPseudo_UPD,   ARM::VST1d64Qwb_fixed,  false, true, true, SingleLowSpc,   4, 1 ,false},
385 { ARM::VST1q64LowTPseudo_UPD,   ARM::VST1d64Twb_fixed,  false, true, true, SingleLowSpc,   3, 1 ,false},
386 
387 { ARM::VST1q8HighQPseudo,   ARM::VST1d8Q,      false, false, false, SingleHighQSpc,   4, 8 ,false},
388 { ARM::VST1q8HighQPseudo_UPD,  ARM::VST1d8Qwb_fixed,  false, true, true, SingleHighQSpc,   4, 8 ,false},
389 { ARM::VST1q8HighTPseudo,   ARM::VST1d8T,      false, false, false, SingleHighTSpc,   3, 8 ,false},
390 { ARM::VST1q8HighTPseudo_UPD,  ARM::VST1d8Twb_fixed,  false, true, true, SingleHighTSpc,   3, 8 ,false},
391 { ARM::VST1q8LowQPseudo_UPD,   ARM::VST1d8Qwb_fixed,  false, true, true, SingleLowSpc,   4, 8 ,false},
392 { ARM::VST1q8LowTPseudo_UPD,   ARM::VST1d8Twb_fixed,  false, true, true, SingleLowSpc,   3, 8 ,false},
393 
394 { ARM::VST2LNd16Pseudo,     ARM::VST2LNd16,     false, false, false, SingleSpc, 2, 4 ,true},
395 { ARM::VST2LNd16Pseudo_UPD, ARM::VST2LNd16_UPD, false, true, true,  SingleSpc, 2, 4 ,true},
396 { ARM::VST2LNd32Pseudo,     ARM::VST2LNd32,     false, false, false, SingleSpc, 2, 2 ,true},
397 { ARM::VST2LNd32Pseudo_UPD, ARM::VST2LNd32_UPD, false, true, true,  SingleSpc, 2, 2 ,true},
398 { ARM::VST2LNd8Pseudo,      ARM::VST2LNd8,      false, false, false, SingleSpc, 2, 8 ,true},
399 { ARM::VST2LNd8Pseudo_UPD,  ARM::VST2LNd8_UPD, false, true, true,  SingleSpc, 2, 8 ,true},
400 { ARM::VST2LNq16Pseudo,     ARM::VST2LNq16,     false, false, false, EvenDblSpc, 2, 4,true},
401 { ARM::VST2LNq16Pseudo_UPD, ARM::VST2LNq16_UPD, false, true, true,  EvenDblSpc, 2, 4,true},
402 { ARM::VST2LNq32Pseudo,     ARM::VST2LNq32,     false, false, false, EvenDblSpc, 2, 2,true},
403 { ARM::VST2LNq32Pseudo_UPD, ARM::VST2LNq32_UPD, false, true, true,  EvenDblSpc, 2, 2,true},
404 
405 { ARM::VST2q16Pseudo,       ARM::VST2q16,      false, false, false, SingleSpc,  4, 4 ,false},
406 { ARM::VST2q16PseudoWB_fixed,   ARM::VST2q16wb_fixed, false, true, false,  SingleSpc,  4, 4 ,false},
407 { ARM::VST2q16PseudoWB_register,   ARM::VST2q16wb_register, false, true, true,  SingleSpc,  4, 4 ,false},
408 { ARM::VST2q32Pseudo,       ARM::VST2q32,      false, false, false, SingleSpc,  4, 2 ,false},
409 { ARM::VST2q32PseudoWB_fixed,   ARM::VST2q32wb_fixed, false, true, false,  SingleSpc,  4, 2 ,false},
410 { ARM::VST2q32PseudoWB_register,   ARM::VST2q32wb_register, false, true, true,  SingleSpc,  4, 2 ,false},
411 { ARM::VST2q8Pseudo,        ARM::VST2q8,       false, false, false, SingleSpc,  4, 8 ,false},
412 { ARM::VST2q8PseudoWB_fixed,    ARM::VST2q8wb_fixed, false, true, false,  SingleSpc,  4, 8 ,false},
413 { ARM::VST2q8PseudoWB_register,    ARM::VST2q8wb_register, false, true, true,  SingleSpc,  4, 8 ,false},
414 
415 { ARM::VST3LNd16Pseudo,     ARM::VST3LNd16,     false, false, false, SingleSpc, 3, 4 ,true},
416 { ARM::VST3LNd16Pseudo_UPD, ARM::VST3LNd16_UPD, false, true, true,  SingleSpc, 3, 4 ,true},
417 { ARM::VST3LNd32Pseudo,     ARM::VST3LNd32,     false, false, false, SingleSpc, 3, 2 ,true},
418 { ARM::VST3LNd32Pseudo_UPD, ARM::VST3LNd32_UPD, false, true, true,  SingleSpc, 3, 2 ,true},
419 { ARM::VST3LNd8Pseudo,      ARM::VST3LNd8,      false, false, false, SingleSpc, 3, 8 ,true},
420 { ARM::VST3LNd8Pseudo_UPD,  ARM::VST3LNd8_UPD, false, true, true,  SingleSpc, 3, 8 ,true},
421 { ARM::VST3LNq16Pseudo,     ARM::VST3LNq16,     false, false, false, EvenDblSpc, 3, 4,true},
422 { ARM::VST3LNq16Pseudo_UPD, ARM::VST3LNq16_UPD, false, true, true,  EvenDblSpc, 3, 4,true},
423 { ARM::VST3LNq32Pseudo,     ARM::VST3LNq32,     false, false, false, EvenDblSpc, 3, 2,true},
424 { ARM::VST3LNq32Pseudo_UPD, ARM::VST3LNq32_UPD, false, true, true,  EvenDblSpc, 3, 2,true},
425 
426 { ARM::VST3d16Pseudo,       ARM::VST3d16,      false, false, false, SingleSpc,  3, 4 ,true},
427 { ARM::VST3d16Pseudo_UPD,   ARM::VST3d16_UPD, false, true, true,  SingleSpc,  3, 4 ,true},
428 { ARM::VST3d32Pseudo,       ARM::VST3d32,      false, false, false, SingleSpc,  3, 2 ,true},
429 { ARM::VST3d32Pseudo_UPD,   ARM::VST3d32_UPD, false, true, true,  SingleSpc,  3, 2 ,true},
430 { ARM::VST3d8Pseudo,        ARM::VST3d8,       false, false, false, SingleSpc,  3, 8 ,true},
431 { ARM::VST3d8Pseudo_UPD,    ARM::VST3d8_UPD, false, true, true,  SingleSpc,  3, 8 ,true},
432 
433 { ARM::VST3q16Pseudo_UPD,    ARM::VST3q16_UPD, false, true, true,  EvenDblSpc, 3, 4 ,true},
434 { ARM::VST3q16oddPseudo,     ARM::VST3q16,     false, false, false, OddDblSpc,  3, 4 ,true},
435 { ARM::VST3q16oddPseudo_UPD, ARM::VST3q16_UPD, false, true, true,  OddDblSpc,  3, 4 ,true},
436 { ARM::VST3q32Pseudo_UPD,    ARM::VST3q32_UPD, false, true, true,  EvenDblSpc, 3, 2 ,true},
437 { ARM::VST3q32oddPseudo,     ARM::VST3q32,     false, false, false, OddDblSpc,  3, 2 ,true},
438 { ARM::VST3q32oddPseudo_UPD, ARM::VST3q32_UPD, false, true, true,  OddDblSpc,  3, 2 ,true},
439 { ARM::VST3q8Pseudo_UPD,     ARM::VST3q8_UPD, false, true, true,  EvenDblSpc, 3, 8 ,true},
440 { ARM::VST3q8oddPseudo,      ARM::VST3q8,      false, false, false, OddDblSpc,  3, 8 ,true},
441 { ARM::VST3q8oddPseudo_UPD,  ARM::VST3q8_UPD, false, true, true,  OddDblSpc,  3, 8 ,true},
442 
443 { ARM::VST4LNd16Pseudo,     ARM::VST4LNd16,     false, false, false, SingleSpc, 4, 4 ,true},
444 { ARM::VST4LNd16Pseudo_UPD, ARM::VST4LNd16_UPD, false, true, true,  SingleSpc, 4, 4 ,true},
445 { ARM::VST4LNd32Pseudo,     ARM::VST4LNd32,     false, false, false, SingleSpc, 4, 2 ,true},
446 { ARM::VST4LNd32Pseudo_UPD, ARM::VST4LNd32_UPD, false, true, true,  SingleSpc, 4, 2 ,true},
447 { ARM::VST4LNd8Pseudo,      ARM::VST4LNd8,      false, false, false, SingleSpc, 4, 8 ,true},
448 { ARM::VST4LNd8Pseudo_UPD,  ARM::VST4LNd8_UPD, false, true, true,  SingleSpc, 4, 8 ,true},
449 { ARM::VST4LNq16Pseudo,     ARM::VST4LNq16,     false, false, false, EvenDblSpc, 4, 4,true},
450 { ARM::VST4LNq16Pseudo_UPD, ARM::VST4LNq16_UPD, false, true, true,  EvenDblSpc, 4, 4,true},
451 { ARM::VST4LNq32Pseudo,     ARM::VST4LNq32,     false, false, false, EvenDblSpc, 4, 2,true},
452 { ARM::VST4LNq32Pseudo_UPD, ARM::VST4LNq32_UPD, false, true, true,  EvenDblSpc, 4, 2,true},
453 
454 { ARM::VST4d16Pseudo,       ARM::VST4d16,      false, false, false, SingleSpc,  4, 4 ,true},
455 { ARM::VST4d16Pseudo_UPD,   ARM::VST4d16_UPD, false, true, true,  SingleSpc,  4, 4 ,true},
456 { ARM::VST4d32Pseudo,       ARM::VST4d32,      false, false, false, SingleSpc,  4, 2 ,true},
457 { ARM::VST4d32Pseudo_UPD,   ARM::VST4d32_UPD, false, true, true,  SingleSpc,  4, 2 ,true},
458 { ARM::VST4d8Pseudo,        ARM::VST4d8,       false, false, false, SingleSpc,  4, 8 ,true},
459 { ARM::VST4d8Pseudo_UPD,    ARM::VST4d8_UPD, false, true, true,  SingleSpc,  4, 8 ,true},
460 
461 { ARM::VST4q16Pseudo_UPD,    ARM::VST4q16_UPD, false, true, true,  EvenDblSpc, 4, 4 ,true},
462 { ARM::VST4q16oddPseudo,     ARM::VST4q16,     false, false, false, OddDblSpc,  4, 4 ,true},
463 { ARM::VST4q16oddPseudo_UPD, ARM::VST4q16_UPD, false, true, true,  OddDblSpc,  4, 4 ,true},
464 { ARM::VST4q32Pseudo_UPD,    ARM::VST4q32_UPD, false, true, true,  EvenDblSpc, 4, 2 ,true},
465 { ARM::VST4q32oddPseudo,     ARM::VST4q32,     false, false, false, OddDblSpc,  4, 2 ,true},
466 { ARM::VST4q32oddPseudo_UPD, ARM::VST4q32_UPD, false, true, true,  OddDblSpc,  4, 2 ,true},
467 { ARM::VST4q8Pseudo_UPD,     ARM::VST4q8_UPD, false, true, true,  EvenDblSpc, 4, 8 ,true},
468 { ARM::VST4q8oddPseudo,      ARM::VST4q8,      false, false, false, OddDblSpc,  4, 8 ,true},
469 { ARM::VST4q8oddPseudo_UPD,  ARM::VST4q8_UPD, false, true, true,  OddDblSpc,  4, 8 ,true}
470 };
471 
472 /// LookupNEONLdSt - Search the NEONLdStTable for information about a NEON
473 /// load or store pseudo instruction.
LookupNEONLdSt(unsigned Opcode)474 static const NEONLdStTableEntry *LookupNEONLdSt(unsigned Opcode) {
475 #ifndef NDEBUG
476   // Make sure the table is sorted.
477   static std::atomic<bool> TableChecked(false);
478   if (!TableChecked.load(std::memory_order_relaxed)) {
479     assert(llvm::is_sorted(NEONLdStTable) && "NEONLdStTable is not sorted!");
480     TableChecked.store(true, std::memory_order_relaxed);
481   }
482 #endif
483 
484   auto I = llvm::lower_bound(NEONLdStTable, Opcode);
485   if (I != std::end(NEONLdStTable) && I->PseudoOpc == Opcode)
486     return I;
487   return nullptr;
488 }
489 
490 /// GetDSubRegs - Get 4 D subregisters of a Q, QQ, or QQQQ register,
491 /// corresponding to the specified register spacing.  Not all of the results
492 /// are necessarily valid, e.g., a Q register only has 2 D subregisters.
GetDSubRegs(unsigned Reg,NEONRegSpacing RegSpc,const TargetRegisterInfo * TRI,unsigned & D0,unsigned & D1,unsigned & D2,unsigned & D3)493 static void GetDSubRegs(unsigned Reg, NEONRegSpacing RegSpc,
494                         const TargetRegisterInfo *TRI, unsigned &D0,
495                         unsigned &D1, unsigned &D2, unsigned &D3) {
496   if (RegSpc == SingleSpc || RegSpc == SingleLowSpc) {
497     D0 = TRI->getSubReg(Reg, ARM::dsub_0);
498     D1 = TRI->getSubReg(Reg, ARM::dsub_1);
499     D2 = TRI->getSubReg(Reg, ARM::dsub_2);
500     D3 = TRI->getSubReg(Reg, ARM::dsub_3);
501   } else if (RegSpc == SingleHighQSpc) {
502     D0 = TRI->getSubReg(Reg, ARM::dsub_4);
503     D1 = TRI->getSubReg(Reg, ARM::dsub_5);
504     D2 = TRI->getSubReg(Reg, ARM::dsub_6);
505     D3 = TRI->getSubReg(Reg, ARM::dsub_7);
506   } else if (RegSpc == SingleHighTSpc) {
507     D0 = TRI->getSubReg(Reg, ARM::dsub_3);
508     D1 = TRI->getSubReg(Reg, ARM::dsub_4);
509     D2 = TRI->getSubReg(Reg, ARM::dsub_5);
510     D3 = TRI->getSubReg(Reg, ARM::dsub_6);
511   } else if (RegSpc == EvenDblSpc) {
512     D0 = TRI->getSubReg(Reg, ARM::dsub_0);
513     D1 = TRI->getSubReg(Reg, ARM::dsub_2);
514     D2 = TRI->getSubReg(Reg, ARM::dsub_4);
515     D3 = TRI->getSubReg(Reg, ARM::dsub_6);
516   } else {
517     assert(RegSpc == OddDblSpc && "unknown register spacing");
518     D0 = TRI->getSubReg(Reg, ARM::dsub_1);
519     D1 = TRI->getSubReg(Reg, ARM::dsub_3);
520     D2 = TRI->getSubReg(Reg, ARM::dsub_5);
521     D3 = TRI->getSubReg(Reg, ARM::dsub_7);
522   }
523 }
524 
525 /// ExpandVLD - Translate VLD pseudo instructions with Q, QQ or QQQQ register
526 /// operands to real VLD instructions with D register operands.
ExpandVLD(MachineBasicBlock::iterator & MBBI)527 void ARMExpandPseudo::ExpandVLD(MachineBasicBlock::iterator &MBBI) {
528   MachineInstr &MI = *MBBI;
529   MachineBasicBlock &MBB = *MI.getParent();
530   LLVM_DEBUG(dbgs() << "Expanding: "; MI.dump());
531 
532   const NEONLdStTableEntry *TableEntry = LookupNEONLdSt(MI.getOpcode());
533   assert(TableEntry && TableEntry->IsLoad && "NEONLdStTable lookup failed");
534   NEONRegSpacing RegSpc = (NEONRegSpacing)TableEntry->RegSpacing;
535   unsigned NumRegs = TableEntry->NumRegs;
536 
537   MachineInstrBuilder MIB = BuildMI(MBB, MBBI, MI.getDebugLoc(),
538                                     TII->get(TableEntry->RealOpc));
539   unsigned OpIdx = 0;
540 
541   bool DstIsDead = MI.getOperand(OpIdx).isDead();
542   Register DstReg = MI.getOperand(OpIdx++).getReg();
543   if(TableEntry->RealOpc == ARM::VLD2DUPd8x2 ||
544      TableEntry->RealOpc == ARM::VLD2DUPd16x2 ||
545      TableEntry->RealOpc == ARM::VLD2DUPd32x2) {
546     unsigned SubRegIndex;
547     if (RegSpc == EvenDblSpc) {
548       SubRegIndex = ARM::dsub_0;
549     } else {
550       assert(RegSpc == OddDblSpc && "Unexpected spacing!");
551       SubRegIndex = ARM::dsub_1;
552     }
553     Register SubReg = TRI->getSubReg(DstReg, SubRegIndex);
554     unsigned DstRegPair = TRI->getMatchingSuperReg(SubReg, ARM::dsub_0,
555                                                    &ARM::DPairSpcRegClass);
556     MIB.addReg(DstRegPair, RegState::Define | getDeadRegState(DstIsDead));
557   } else {
558     unsigned D0, D1, D2, D3;
559     GetDSubRegs(DstReg, RegSpc, TRI, D0, D1, D2, D3);
560     MIB.addReg(D0, RegState::Define | getDeadRegState(DstIsDead));
561     if (NumRegs > 1 && TableEntry->copyAllListRegs)
562       MIB.addReg(D1, RegState::Define | getDeadRegState(DstIsDead));
563     if (NumRegs > 2 && TableEntry->copyAllListRegs)
564       MIB.addReg(D2, RegState::Define | getDeadRegState(DstIsDead));
565     if (NumRegs > 3 && TableEntry->copyAllListRegs)
566       MIB.addReg(D3, RegState::Define | getDeadRegState(DstIsDead));
567   }
568 
569   if (TableEntry->isUpdating)
570     MIB.add(MI.getOperand(OpIdx++));
571 
572   // Copy the addrmode6 operands.
573   MIB.add(MI.getOperand(OpIdx++));
574   MIB.add(MI.getOperand(OpIdx++));
575 
576   // Copy the am6offset operand.
577   if (TableEntry->hasWritebackOperand) {
578     // TODO: The writing-back pseudo instructions we translate here are all
579     // defined to take am6offset nodes that are capable to represent both fixed
580     // and register forms. Some real instructions, however, do not rely on
581     // am6offset and have separate definitions for such forms. When this is the
582     // case, fixed forms do not take any offset nodes, so here we skip them for
583     // such instructions. Once all real and pseudo writing-back instructions are
584     // rewritten without use of am6offset nodes, this code will go away.
585     const MachineOperand &AM6Offset = MI.getOperand(OpIdx++);
586     if (TableEntry->RealOpc == ARM::VLD1d8Qwb_fixed ||
587         TableEntry->RealOpc == ARM::VLD1d16Qwb_fixed ||
588         TableEntry->RealOpc == ARM::VLD1d32Qwb_fixed ||
589         TableEntry->RealOpc == ARM::VLD1d64Qwb_fixed ||
590         TableEntry->RealOpc == ARM::VLD1d8Twb_fixed ||
591         TableEntry->RealOpc == ARM::VLD1d16Twb_fixed ||
592         TableEntry->RealOpc == ARM::VLD1d32Twb_fixed ||
593         TableEntry->RealOpc == ARM::VLD1d64Twb_fixed) {
594       assert(AM6Offset.getReg() == 0 &&
595              "A fixed writing-back pseudo instruction provides an offset "
596              "register!");
597     } else {
598       MIB.add(AM6Offset);
599     }
600   }
601 
602   // For an instruction writing double-spaced subregs, the pseudo instruction
603   // has an extra operand that is a use of the super-register.  Record the
604   // operand index and skip over it.
605   unsigned SrcOpIdx = 0;
606   if(TableEntry->RealOpc != ARM::VLD2DUPd8x2 &&
607      TableEntry->RealOpc != ARM::VLD2DUPd16x2 &&
608      TableEntry->RealOpc != ARM::VLD2DUPd32x2) {
609     if (RegSpc == EvenDblSpc || RegSpc == OddDblSpc ||
610         RegSpc == SingleLowSpc || RegSpc == SingleHighQSpc ||
611         RegSpc == SingleHighTSpc)
612       SrcOpIdx = OpIdx++;
613   }
614 
615   // Copy the predicate operands.
616   MIB.add(MI.getOperand(OpIdx++));
617   MIB.add(MI.getOperand(OpIdx++));
618 
619   // Copy the super-register source operand used for double-spaced subregs over
620   // to the new instruction as an implicit operand.
621   if (SrcOpIdx != 0) {
622     MachineOperand MO = MI.getOperand(SrcOpIdx);
623     MO.setImplicit(true);
624     MIB.add(MO);
625   }
626   // Add an implicit def for the super-register.
627   MIB.addReg(DstReg, RegState::ImplicitDefine | getDeadRegState(DstIsDead));
628   TransferImpOps(MI, MIB, MIB);
629 
630   // Transfer memoperands.
631   MIB.cloneMemRefs(MI);
632   MI.eraseFromParent();
633   LLVM_DEBUG(dbgs() << "To:        "; MIB.getInstr()->dump(););
634 }
635 
636 /// ExpandVST - Translate VST pseudo instructions with Q, QQ or QQQQ register
637 /// operands to real VST instructions with D register operands.
ExpandVST(MachineBasicBlock::iterator & MBBI)638 void ARMExpandPseudo::ExpandVST(MachineBasicBlock::iterator &MBBI) {
639   MachineInstr &MI = *MBBI;
640   MachineBasicBlock &MBB = *MI.getParent();
641   LLVM_DEBUG(dbgs() << "Expanding: "; MI.dump());
642 
643   const NEONLdStTableEntry *TableEntry = LookupNEONLdSt(MI.getOpcode());
644   assert(TableEntry && !TableEntry->IsLoad && "NEONLdStTable lookup failed");
645   NEONRegSpacing RegSpc = (NEONRegSpacing)TableEntry->RegSpacing;
646   unsigned NumRegs = TableEntry->NumRegs;
647 
648   MachineInstrBuilder MIB = BuildMI(MBB, MBBI, MI.getDebugLoc(),
649                                     TII->get(TableEntry->RealOpc));
650   unsigned OpIdx = 0;
651   if (TableEntry->isUpdating)
652     MIB.add(MI.getOperand(OpIdx++));
653 
654   // Copy the addrmode6 operands.
655   MIB.add(MI.getOperand(OpIdx++));
656   MIB.add(MI.getOperand(OpIdx++));
657 
658   if (TableEntry->hasWritebackOperand) {
659     // TODO: The writing-back pseudo instructions we translate here are all
660     // defined to take am6offset nodes that are capable to represent both fixed
661     // and register forms. Some real instructions, however, do not rely on
662     // am6offset and have separate definitions for such forms. When this is the
663     // case, fixed forms do not take any offset nodes, so here we skip them for
664     // such instructions. Once all real and pseudo writing-back instructions are
665     // rewritten without use of am6offset nodes, this code will go away.
666     const MachineOperand &AM6Offset = MI.getOperand(OpIdx++);
667     if (TableEntry->RealOpc == ARM::VST1d8Qwb_fixed ||
668         TableEntry->RealOpc == ARM::VST1d16Qwb_fixed ||
669         TableEntry->RealOpc == ARM::VST1d32Qwb_fixed ||
670         TableEntry->RealOpc == ARM::VST1d64Qwb_fixed ||
671         TableEntry->RealOpc == ARM::VST1d8Twb_fixed ||
672         TableEntry->RealOpc == ARM::VST1d16Twb_fixed ||
673         TableEntry->RealOpc == ARM::VST1d32Twb_fixed ||
674         TableEntry->RealOpc == ARM::VST1d64Twb_fixed) {
675       assert(AM6Offset.getReg() == 0 &&
676              "A fixed writing-back pseudo instruction provides an offset "
677              "register!");
678     } else {
679       MIB.add(AM6Offset);
680     }
681   }
682 
683   bool SrcIsKill = MI.getOperand(OpIdx).isKill();
684   bool SrcIsUndef = MI.getOperand(OpIdx).isUndef();
685   Register SrcReg = MI.getOperand(OpIdx++).getReg();
686   unsigned D0, D1, D2, D3;
687   GetDSubRegs(SrcReg, RegSpc, TRI, D0, D1, D2, D3);
688   MIB.addReg(D0, getUndefRegState(SrcIsUndef));
689   if (NumRegs > 1 && TableEntry->copyAllListRegs)
690     MIB.addReg(D1, getUndefRegState(SrcIsUndef));
691   if (NumRegs > 2 && TableEntry->copyAllListRegs)
692     MIB.addReg(D2, getUndefRegState(SrcIsUndef));
693   if (NumRegs > 3 && TableEntry->copyAllListRegs)
694     MIB.addReg(D3, getUndefRegState(SrcIsUndef));
695 
696   // Copy the predicate operands.
697   MIB.add(MI.getOperand(OpIdx++));
698   MIB.add(MI.getOperand(OpIdx++));
699 
700   if (SrcIsKill && !SrcIsUndef) // Add an implicit kill for the super-reg.
701     MIB->addRegisterKilled(SrcReg, TRI, true);
702   else if (!SrcIsUndef)
703     MIB.addReg(SrcReg, RegState::Implicit); // Add implicit uses for src reg.
704   TransferImpOps(MI, MIB, MIB);
705 
706   // Transfer memoperands.
707   MIB.cloneMemRefs(MI);
708   MI.eraseFromParent();
709   LLVM_DEBUG(dbgs() << "To:        "; MIB.getInstr()->dump(););
710 }
711 
712 /// ExpandLaneOp - Translate VLD*LN and VST*LN instructions with Q, QQ or QQQQ
713 /// register operands to real instructions with D register operands.
ExpandLaneOp(MachineBasicBlock::iterator & MBBI)714 void ARMExpandPseudo::ExpandLaneOp(MachineBasicBlock::iterator &MBBI) {
715   MachineInstr &MI = *MBBI;
716   MachineBasicBlock &MBB = *MI.getParent();
717   LLVM_DEBUG(dbgs() << "Expanding: "; MI.dump());
718 
719   const NEONLdStTableEntry *TableEntry = LookupNEONLdSt(MI.getOpcode());
720   assert(TableEntry && "NEONLdStTable lookup failed");
721   NEONRegSpacing RegSpc = (NEONRegSpacing)TableEntry->RegSpacing;
722   unsigned NumRegs = TableEntry->NumRegs;
723   unsigned RegElts = TableEntry->RegElts;
724 
725   MachineInstrBuilder MIB = BuildMI(MBB, MBBI, MI.getDebugLoc(),
726                                     TII->get(TableEntry->RealOpc));
727   unsigned OpIdx = 0;
728   // The lane operand is always the 3rd from last operand, before the 2
729   // predicate operands.
730   unsigned Lane = MI.getOperand(MI.getDesc().getNumOperands() - 3).getImm();
731 
732   // Adjust the lane and spacing as needed for Q registers.
733   assert(RegSpc != OddDblSpc && "unexpected register spacing for VLD/VST-lane");
734   if (RegSpc == EvenDblSpc && Lane >= RegElts) {
735     RegSpc = OddDblSpc;
736     Lane -= RegElts;
737   }
738   assert(Lane < RegElts && "out of range lane for VLD/VST-lane");
739 
740   unsigned D0 = 0, D1 = 0, D2 = 0, D3 = 0;
741   unsigned DstReg = 0;
742   bool DstIsDead = false;
743   if (TableEntry->IsLoad) {
744     DstIsDead = MI.getOperand(OpIdx).isDead();
745     DstReg = MI.getOperand(OpIdx++).getReg();
746     GetDSubRegs(DstReg, RegSpc, TRI, D0, D1, D2, D3);
747     MIB.addReg(D0, RegState::Define | getDeadRegState(DstIsDead));
748     if (NumRegs > 1)
749       MIB.addReg(D1, RegState::Define | getDeadRegState(DstIsDead));
750     if (NumRegs > 2)
751       MIB.addReg(D2, RegState::Define | getDeadRegState(DstIsDead));
752     if (NumRegs > 3)
753       MIB.addReg(D3, RegState::Define | getDeadRegState(DstIsDead));
754   }
755 
756   if (TableEntry->isUpdating)
757     MIB.add(MI.getOperand(OpIdx++));
758 
759   // Copy the addrmode6 operands.
760   MIB.add(MI.getOperand(OpIdx++));
761   MIB.add(MI.getOperand(OpIdx++));
762   // Copy the am6offset operand.
763   if (TableEntry->hasWritebackOperand)
764     MIB.add(MI.getOperand(OpIdx++));
765 
766   // Grab the super-register source.
767   MachineOperand MO = MI.getOperand(OpIdx++);
768   if (!TableEntry->IsLoad)
769     GetDSubRegs(MO.getReg(), RegSpc, TRI, D0, D1, D2, D3);
770 
771   // Add the subregs as sources of the new instruction.
772   unsigned SrcFlags = (getUndefRegState(MO.isUndef()) |
773                        getKillRegState(MO.isKill()));
774   MIB.addReg(D0, SrcFlags);
775   if (NumRegs > 1)
776     MIB.addReg(D1, SrcFlags);
777   if (NumRegs > 2)
778     MIB.addReg(D2, SrcFlags);
779   if (NumRegs > 3)
780     MIB.addReg(D3, SrcFlags);
781 
782   // Add the lane number operand.
783   MIB.addImm(Lane);
784   OpIdx += 1;
785 
786   // Copy the predicate operands.
787   MIB.add(MI.getOperand(OpIdx++));
788   MIB.add(MI.getOperand(OpIdx++));
789 
790   // Copy the super-register source to be an implicit source.
791   MO.setImplicit(true);
792   MIB.add(MO);
793   if (TableEntry->IsLoad)
794     // Add an implicit def for the super-register.
795     MIB.addReg(DstReg, RegState::ImplicitDefine | getDeadRegState(DstIsDead));
796   TransferImpOps(MI, MIB, MIB);
797   // Transfer memoperands.
798   MIB.cloneMemRefs(MI);
799   MI.eraseFromParent();
800 }
801 
802 /// ExpandVTBL - Translate VTBL and VTBX pseudo instructions with Q or QQ
803 /// register operands to real instructions with D register operands.
ExpandVTBL(MachineBasicBlock::iterator & MBBI,unsigned Opc,bool IsExt)804 void ARMExpandPseudo::ExpandVTBL(MachineBasicBlock::iterator &MBBI,
805                                  unsigned Opc, bool IsExt) {
806   MachineInstr &MI = *MBBI;
807   MachineBasicBlock &MBB = *MI.getParent();
808   LLVM_DEBUG(dbgs() << "Expanding: "; MI.dump());
809 
810   MachineInstrBuilder MIB = BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(Opc));
811   unsigned OpIdx = 0;
812 
813   // Transfer the destination register operand.
814   MIB.add(MI.getOperand(OpIdx++));
815   if (IsExt) {
816     MachineOperand VdSrc(MI.getOperand(OpIdx++));
817     MIB.add(VdSrc);
818   }
819 
820   bool SrcIsKill = MI.getOperand(OpIdx).isKill();
821   Register SrcReg = MI.getOperand(OpIdx++).getReg();
822   unsigned D0, D1, D2, D3;
823   GetDSubRegs(SrcReg, SingleSpc, TRI, D0, D1, D2, D3);
824   MIB.addReg(D0);
825 
826   // Copy the other source register operand.
827   MachineOperand VmSrc(MI.getOperand(OpIdx++));
828   MIB.add(VmSrc);
829 
830   // Copy the predicate operands.
831   MIB.add(MI.getOperand(OpIdx++));
832   MIB.add(MI.getOperand(OpIdx++));
833 
834   // Add an implicit kill and use for the super-reg.
835   MIB.addReg(SrcReg, RegState::Implicit | getKillRegState(SrcIsKill));
836   TransferImpOps(MI, MIB, MIB);
837   MI.eraseFromParent();
838   LLVM_DEBUG(dbgs() << "To:        "; MIB.getInstr()->dump(););
839 }
840 
IsAnAddressOperand(const MachineOperand & MO)841 static bool IsAnAddressOperand(const MachineOperand &MO) {
842   // This check is overly conservative.  Unless we are certain that the machine
843   // operand is not a symbol reference, we return that it is a symbol reference.
844   // This is important as the load pair may not be split up Windows.
845   switch (MO.getType()) {
846   case MachineOperand::MO_Register:
847   case MachineOperand::MO_Immediate:
848   case MachineOperand::MO_CImmediate:
849   case MachineOperand::MO_FPImmediate:
850   case MachineOperand::MO_ShuffleMask:
851     return false;
852   case MachineOperand::MO_MachineBasicBlock:
853     return true;
854   case MachineOperand::MO_FrameIndex:
855     return false;
856   case MachineOperand::MO_ConstantPoolIndex:
857   case MachineOperand::MO_TargetIndex:
858   case MachineOperand::MO_JumpTableIndex:
859   case MachineOperand::MO_ExternalSymbol:
860   case MachineOperand::MO_GlobalAddress:
861   case MachineOperand::MO_BlockAddress:
862     return true;
863   case MachineOperand::MO_RegisterMask:
864   case MachineOperand::MO_RegisterLiveOut:
865     return false;
866   case MachineOperand::MO_Metadata:
867   case MachineOperand::MO_MCSymbol:
868     return true;
869   case MachineOperand::MO_CFIIndex:
870     return false;
871   case MachineOperand::MO_IntrinsicID:
872   case MachineOperand::MO_Predicate:
873     llvm_unreachable("should not exist post-isel");
874   }
875   llvm_unreachable("unhandled machine operand type");
876 }
877 
makeImplicit(const MachineOperand & MO)878 static MachineOperand makeImplicit(const MachineOperand &MO) {
879   MachineOperand NewMO = MO;
880   NewMO.setImplicit();
881   return NewMO;
882 }
883 
ExpandMOV32BitImm(MachineBasicBlock & MBB,MachineBasicBlock::iterator & MBBI)884 void ARMExpandPseudo::ExpandMOV32BitImm(MachineBasicBlock &MBB,
885                                         MachineBasicBlock::iterator &MBBI) {
886   MachineInstr &MI = *MBBI;
887   unsigned Opcode = MI.getOpcode();
888   Register PredReg;
889   ARMCC::CondCodes Pred = getInstrPredicate(MI, PredReg);
890   Register DstReg = MI.getOperand(0).getReg();
891   bool DstIsDead = MI.getOperand(0).isDead();
892   bool isCC = Opcode == ARM::MOVCCi32imm || Opcode == ARM::t2MOVCCi32imm;
893   const MachineOperand &MO = MI.getOperand(isCC ? 2 : 1);
894   bool RequiresBundling = STI->isTargetWindows() && IsAnAddressOperand(MO);
895   MachineInstrBuilder LO16, HI16;
896   LLVM_DEBUG(dbgs() << "Expanding: "; MI.dump());
897 
898   if (!STI->hasV6T2Ops() &&
899       (Opcode == ARM::MOVi32imm || Opcode == ARM::MOVCCi32imm)) {
900     // FIXME Windows CE supports older ARM CPUs
901     assert(!STI->isTargetWindows() && "Windows on ARM requires ARMv7+");
902 
903     assert (MO.isImm() && "MOVi32imm w/ non-immediate source operand!");
904     unsigned ImmVal = (unsigned)MO.getImm();
905     unsigned SOImmValV1 = 0, SOImmValV2 = 0;
906 
907     if (ARM_AM::isSOImmTwoPartVal(ImmVal)) { // Expand into a movi + orr.
908       LO16 = BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(ARM::MOVi), DstReg);
909       HI16 = BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(ARM::ORRri))
910           .addReg(DstReg, RegState::Define | getDeadRegState(DstIsDead))
911           .addReg(DstReg);
912       SOImmValV1 = ARM_AM::getSOImmTwoPartFirst(ImmVal);
913       SOImmValV2 = ARM_AM::getSOImmTwoPartSecond(ImmVal);
914     } else { // Expand into a mvn + sub.
915       LO16 = BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(ARM::MVNi), DstReg);
916       HI16 = BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(ARM::SUBri))
917           .addReg(DstReg, RegState::Define | getDeadRegState(DstIsDead))
918           .addReg(DstReg);
919       SOImmValV1 = ARM_AM::getSOImmTwoPartFirst(-ImmVal);
920       SOImmValV2 = ARM_AM::getSOImmTwoPartSecond(-ImmVal);
921       SOImmValV1 = ~(-SOImmValV1);
922     }
923 
924     unsigned MIFlags = MI.getFlags();
925     LO16 = LO16.addImm(SOImmValV1);
926     HI16 = HI16.addImm(SOImmValV2);
927     LO16.cloneMemRefs(MI);
928     HI16.cloneMemRefs(MI);
929     LO16.setMIFlags(MIFlags);
930     HI16.setMIFlags(MIFlags);
931     LO16.addImm(Pred).addReg(PredReg).add(condCodeOp());
932     HI16.addImm(Pred).addReg(PredReg).add(condCodeOp());
933     if (isCC)
934       LO16.add(makeImplicit(MI.getOperand(1)));
935     TransferImpOps(MI, LO16, HI16);
936     MI.eraseFromParent();
937     return;
938   }
939 
940   unsigned LO16Opc = 0;
941   unsigned HI16Opc = 0;
942   unsigned MIFlags = MI.getFlags();
943   if (Opcode == ARM::t2MOVi32imm || Opcode == ARM::t2MOVCCi32imm) {
944     LO16Opc = ARM::t2MOVi16;
945     HI16Opc = ARM::t2MOVTi16;
946   } else {
947     LO16Opc = ARM::MOVi16;
948     HI16Opc = ARM::MOVTi16;
949   }
950 
951   LO16 = BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(LO16Opc), DstReg);
952   HI16 = BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(HI16Opc))
953     .addReg(DstReg, RegState::Define | getDeadRegState(DstIsDead))
954     .addReg(DstReg);
955 
956   LO16.setMIFlags(MIFlags);
957   HI16.setMIFlags(MIFlags);
958 
959   switch (MO.getType()) {
960   case MachineOperand::MO_Immediate: {
961     unsigned Imm = MO.getImm();
962     unsigned Lo16 = Imm & 0xffff;
963     unsigned Hi16 = (Imm >> 16) & 0xffff;
964     LO16 = LO16.addImm(Lo16);
965     HI16 = HI16.addImm(Hi16);
966     break;
967   }
968   case MachineOperand::MO_ExternalSymbol: {
969     const char *ES = MO.getSymbolName();
970     unsigned TF = MO.getTargetFlags();
971     LO16 = LO16.addExternalSymbol(ES, TF | ARMII::MO_LO16);
972     HI16 = HI16.addExternalSymbol(ES, TF | ARMII::MO_HI16);
973     break;
974   }
975   default: {
976     const GlobalValue *GV = MO.getGlobal();
977     unsigned TF = MO.getTargetFlags();
978     LO16 = LO16.addGlobalAddress(GV, MO.getOffset(), TF | ARMII::MO_LO16);
979     HI16 = HI16.addGlobalAddress(GV, MO.getOffset(), TF | ARMII::MO_HI16);
980     break;
981   }
982   }
983 
984   LO16.cloneMemRefs(MI);
985   HI16.cloneMemRefs(MI);
986   LO16.addImm(Pred).addReg(PredReg);
987   HI16.addImm(Pred).addReg(PredReg);
988 
989   if (RequiresBundling)
990     finalizeBundle(MBB, LO16->getIterator(), MBBI->getIterator());
991 
992   if (isCC)
993     LO16.add(makeImplicit(MI.getOperand(1)));
994   TransferImpOps(MI, LO16, HI16);
995   MI.eraseFromParent();
996   LLVM_DEBUG(dbgs() << "To:        "; LO16.getInstr()->dump(););
997   LLVM_DEBUG(dbgs() << "And:       "; HI16.getInstr()->dump(););
998 }
999 
1000 // The size of the area, accessed by that VLSTM/VLLDM
1001 // S0-S31 + FPSCR + 8 more bytes (VPR + pad, or just pad)
1002 static const int CMSE_FP_SAVE_SIZE = 136;
1003 
determineGPRegsToClear(const MachineInstr & MI,const std::initializer_list<unsigned> & Regs,SmallVectorImpl<unsigned> & ClearRegs)1004 static void determineGPRegsToClear(const MachineInstr &MI,
1005                                    const std::initializer_list<unsigned> &Regs,
1006                                    SmallVectorImpl<unsigned> &ClearRegs) {
1007   SmallVector<unsigned, 4> OpRegs;
1008   for (const MachineOperand &Op : MI.operands()) {
1009     if (!Op.isReg() || !Op.isUse())
1010       continue;
1011     OpRegs.push_back(Op.getReg());
1012   }
1013   llvm::sort(OpRegs);
1014 
1015   std::set_difference(Regs.begin(), Regs.end(), OpRegs.begin(), OpRegs.end(),
1016                       std::back_inserter(ClearRegs));
1017 }
1018 
CMSEClearGPRegs(MachineBasicBlock & MBB,MachineBasicBlock::iterator MBBI,const DebugLoc & DL,const SmallVectorImpl<unsigned> & ClearRegs,unsigned ClobberReg)1019 void ARMExpandPseudo::CMSEClearGPRegs(
1020     MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI,
1021     const DebugLoc &DL, const SmallVectorImpl<unsigned> &ClearRegs,
1022     unsigned ClobberReg) {
1023 
1024   if (STI->hasV8_1MMainlineOps()) {
1025     // Clear the registers using the CLRM instruction.
1026     MachineInstrBuilder CLRM =
1027         BuildMI(MBB, MBBI, DL, TII->get(ARM::t2CLRM)).add(predOps(ARMCC::AL));
1028     for (unsigned R : ClearRegs)
1029       CLRM.addReg(R, RegState::Define);
1030     CLRM.addReg(ARM::APSR, RegState::Define);
1031     CLRM.addReg(ARM::CPSR, RegState::Define | RegState::Implicit);
1032   } else {
1033     // Clear the registers and flags by copying ClobberReg into them.
1034     // (Baseline can't do a high register clear in one instruction).
1035     for (unsigned Reg : ClearRegs) {
1036       if (Reg == ClobberReg)
1037         continue;
1038       BuildMI(MBB, MBBI, DL, TII->get(ARM::tMOVr), Reg)
1039           .addReg(ClobberReg)
1040           .add(predOps(ARMCC::AL));
1041     }
1042 
1043     BuildMI(MBB, MBBI, DL, TII->get(ARM::t2MSR_M))
1044         .addImm(STI->hasDSP() ? 0xc00 : 0x800)
1045         .addReg(ClobberReg)
1046         .add(predOps(ARMCC::AL));
1047   }
1048 }
1049 
1050 // Find which FP registers need to be cleared.  The parameter `ClearRegs` is
1051 // initialised with all elements set to true, and this function resets all the
1052 // bits, which correspond to register uses. Returns true if any floating point
1053 // register is defined, false otherwise.
determineFPRegsToClear(const MachineInstr & MI,BitVector & ClearRegs)1054 static bool determineFPRegsToClear(const MachineInstr &MI,
1055                                    BitVector &ClearRegs) {
1056   bool DefFP = false;
1057   for (const MachineOperand &Op : MI.operands()) {
1058     if (!Op.isReg())
1059       continue;
1060 
1061     unsigned Reg = Op.getReg();
1062     if (Op.isDef()) {
1063       if ((Reg >= ARM::Q0 && Reg <= ARM::Q7) ||
1064           (Reg >= ARM::D0 && Reg <= ARM::D15) ||
1065           (Reg >= ARM::S0 && Reg <= ARM::S31))
1066         DefFP = true;
1067       continue;
1068     }
1069 
1070     if (Reg >= ARM::Q0 && Reg <= ARM::Q7) {
1071       int R = Reg - ARM::Q0;
1072       ClearRegs.reset(R * 4, (R + 1) * 4);
1073     } else if (Reg >= ARM::D0 && Reg <= ARM::D15) {
1074       int R = Reg - ARM::D0;
1075       ClearRegs.reset(R * 2, (R + 1) * 2);
1076     } else if (Reg >= ARM::S0 && Reg <= ARM::S31) {
1077       ClearRegs[Reg - ARM::S0] = false;
1078     }
1079   }
1080   return DefFP;
1081 }
1082 
1083 MachineBasicBlock &
CMSEClearFPRegs(MachineBasicBlock & MBB,MachineBasicBlock::iterator MBBI)1084 ARMExpandPseudo::CMSEClearFPRegs(MachineBasicBlock &MBB,
1085                                  MachineBasicBlock::iterator MBBI) {
1086   BitVector ClearRegs(16, true);
1087   (void)determineFPRegsToClear(*MBBI, ClearRegs);
1088 
1089   if (STI->hasV8_1MMainlineOps())
1090     return CMSEClearFPRegsV81(MBB, MBBI, ClearRegs);
1091   else
1092     return CMSEClearFPRegsV8(MBB, MBBI, ClearRegs);
1093 }
1094 
1095 // Clear the FP registers for v8.0-M, by copying over the content
1096 // of LR. Uses R12 as a scratch register.
1097 MachineBasicBlock &
CMSEClearFPRegsV8(MachineBasicBlock & MBB,MachineBasicBlock::iterator MBBI,const BitVector & ClearRegs)1098 ARMExpandPseudo::CMSEClearFPRegsV8(MachineBasicBlock &MBB,
1099                                    MachineBasicBlock::iterator MBBI,
1100                                    const BitVector &ClearRegs) {
1101   if (!STI->hasFPRegs())
1102     return MBB;
1103 
1104   auto &RetI = *MBBI;
1105   const DebugLoc &DL = RetI.getDebugLoc();
1106 
1107   // If optimising for minimum size, clear FP registers unconditionally.
1108   // Otherwise, check the CONTROL.SFPA (Secure Floating-Point Active) bit and
1109   // don't clear them if they belong to the non-secure state.
1110   MachineBasicBlock *ClearBB, *DoneBB;
1111   if (STI->hasMinSize()) {
1112     ClearBB = DoneBB = &MBB;
1113   } else {
1114     MachineFunction *MF = MBB.getParent();
1115     ClearBB = MF->CreateMachineBasicBlock(MBB.getBasicBlock());
1116     DoneBB = MF->CreateMachineBasicBlock(MBB.getBasicBlock());
1117 
1118     MF->insert(++MBB.getIterator(), ClearBB);
1119     MF->insert(++ClearBB->getIterator(), DoneBB);
1120 
1121     DoneBB->splice(DoneBB->end(), &MBB, MBBI, MBB.end());
1122     DoneBB->transferSuccessors(&MBB);
1123     MBB.addSuccessor(ClearBB);
1124     MBB.addSuccessor(DoneBB);
1125     ClearBB->addSuccessor(DoneBB);
1126 
1127     // At the new basic blocks we need to have live-in the registers, used
1128     // for the return value as well as LR, used to clear registers.
1129     for (const MachineOperand &Op : RetI.operands()) {
1130       if (!Op.isReg())
1131         continue;
1132       Register Reg = Op.getReg();
1133       if (Reg == ARM::NoRegister || Reg == ARM::LR)
1134         continue;
1135       assert(Register::isPhysicalRegister(Reg) && "Unallocated register");
1136       ClearBB->addLiveIn(Reg);
1137       DoneBB->addLiveIn(Reg);
1138     }
1139     ClearBB->addLiveIn(ARM::LR);
1140     DoneBB->addLiveIn(ARM::LR);
1141 
1142     // Read the CONTROL register.
1143     BuildMI(MBB, MBB.end(), DL, TII->get(ARM::t2MRS_M), ARM::R12)
1144         .addImm(20)
1145         .add(predOps(ARMCC::AL));
1146     // Check bit 3 (SFPA).
1147     BuildMI(MBB, MBB.end(), DL, TII->get(ARM::t2TSTri))
1148         .addReg(ARM::R12)
1149         .addImm(8)
1150         .add(predOps(ARMCC::AL));
1151     // If SFPA is clear, jump over ClearBB to DoneBB.
1152     BuildMI(MBB, MBB.end(), DL, TII->get(ARM::tBcc))
1153         .addMBB(DoneBB)
1154         .addImm(ARMCC::EQ)
1155         .addReg(ARM::CPSR, RegState::Kill);
1156   }
1157 
1158   // Emit the clearing sequence
1159   for (unsigned D = 0; D < 8; D++) {
1160     // Attempt to clear as double
1161     if (ClearRegs[D * 2 + 0] && ClearRegs[D * 2 + 1]) {
1162       unsigned Reg = ARM::D0 + D;
1163       BuildMI(ClearBB, DL, TII->get(ARM::VMOVDRR), Reg)
1164           .addReg(ARM::LR)
1165           .addReg(ARM::LR)
1166           .add(predOps(ARMCC::AL));
1167     } else {
1168       // Clear first part as single
1169       if (ClearRegs[D * 2 + 0]) {
1170         unsigned Reg = ARM::S0 + D * 2;
1171         BuildMI(ClearBB, DL, TII->get(ARM::VMOVSR), Reg)
1172             .addReg(ARM::LR)
1173             .add(predOps(ARMCC::AL));
1174       }
1175       // Clear second part as single
1176       if (ClearRegs[D * 2 + 1]) {
1177         unsigned Reg = ARM::S0 + D * 2 + 1;
1178         BuildMI(ClearBB, DL, TII->get(ARM::VMOVSR), Reg)
1179             .addReg(ARM::LR)
1180             .add(predOps(ARMCC::AL));
1181       }
1182     }
1183   }
1184 
1185   // Clear FPSCR bits 0-4, 7, 28-31
1186   // The other bits are program global according to the AAPCS
1187   BuildMI(ClearBB, DL, TII->get(ARM::VMRS), ARM::R12)
1188       .add(predOps(ARMCC::AL));
1189   BuildMI(ClearBB, DL, TII->get(ARM::t2BICri), ARM::R12)
1190       .addReg(ARM::R12)
1191       .addImm(0x0000009F)
1192       .add(predOps(ARMCC::AL))
1193       .add(condCodeOp());
1194   BuildMI(ClearBB, DL, TII->get(ARM::t2BICri), ARM::R12)
1195       .addReg(ARM::R12)
1196       .addImm(0xF0000000)
1197       .add(predOps(ARMCC::AL))
1198       .add(condCodeOp());
1199   BuildMI(ClearBB, DL, TII->get(ARM::VMSR))
1200       .addReg(ARM::R12)
1201       .add(predOps(ARMCC::AL));
1202 
1203   return *DoneBB;
1204 }
1205 
1206 MachineBasicBlock &
CMSEClearFPRegsV81(MachineBasicBlock & MBB,MachineBasicBlock::iterator MBBI,const BitVector & ClearRegs)1207 ARMExpandPseudo::CMSEClearFPRegsV81(MachineBasicBlock &MBB,
1208                                     MachineBasicBlock::iterator MBBI,
1209                                     const BitVector &ClearRegs) {
1210   auto &RetI = *MBBI;
1211 
1212   // Emit a sequence of VSCCLRM <sreglist> instructions, one instruction for
1213   // each contiguous sequence of S-registers.
1214   int Start = -1, End = -1;
1215   for (int S = 0, E = ClearRegs.size(); S != E; ++S) {
1216     if (ClearRegs[S] && S == End + 1) {
1217       End = S; // extend range
1218       continue;
1219     }
1220     // Emit current range.
1221     if (Start < End) {
1222       MachineInstrBuilder VSCCLRM =
1223           BuildMI(MBB, MBBI, RetI.getDebugLoc(), TII->get(ARM::VSCCLRMS))
1224               .add(predOps(ARMCC::AL));
1225       while (++Start <= End)
1226         VSCCLRM.addReg(ARM::S0 + Start, RegState::Define);
1227       VSCCLRM.addReg(ARM::VPR, RegState::Define);
1228     }
1229     Start = End = S;
1230   }
1231   // Emit last range.
1232   if (Start < End) {
1233     MachineInstrBuilder VSCCLRM =
1234         BuildMI(MBB, MBBI, RetI.getDebugLoc(), TII->get(ARM::VSCCLRMS))
1235             .add(predOps(ARMCC::AL));
1236     while (++Start <= End)
1237       VSCCLRM.addReg(ARM::S0 + Start, RegState::Define);
1238     VSCCLRM.addReg(ARM::VPR, RegState::Define);
1239   }
1240 
1241   return MBB;
1242 }
1243 
CMSESaveClearFPRegs(MachineBasicBlock & MBB,MachineBasicBlock::iterator MBBI,DebugLoc & DL,const LivePhysRegs & LiveRegs,SmallVectorImpl<unsigned> & ScratchRegs)1244 void ARMExpandPseudo::CMSESaveClearFPRegs(
1245     MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, DebugLoc &DL,
1246     const LivePhysRegs &LiveRegs, SmallVectorImpl<unsigned> &ScratchRegs) {
1247   if (STI->hasV8_1MMainlineOps())
1248     CMSESaveClearFPRegsV81(MBB, MBBI, DL, LiveRegs);
1249   else
1250     CMSESaveClearFPRegsV8(MBB, MBBI, DL, LiveRegs, ScratchRegs);
1251 }
1252 
1253 // Save and clear FP registers if present
CMSESaveClearFPRegsV8(MachineBasicBlock & MBB,MachineBasicBlock::iterator MBBI,DebugLoc & DL,const LivePhysRegs & LiveRegs,SmallVectorImpl<unsigned> & ScratchRegs)1254 void ARMExpandPseudo::CMSESaveClearFPRegsV8(
1255     MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, DebugLoc &DL,
1256     const LivePhysRegs &LiveRegs, SmallVectorImpl<unsigned> &ScratchRegs) {
1257   if (!STI->hasFPRegs())
1258     return;
1259 
1260   // Store an available register for FPSCR clearing
1261   assert(!ScratchRegs.empty());
1262   unsigned SpareReg = ScratchRegs.front();
1263 
1264   // save space on stack for VLSTM
1265   BuildMI(MBB, MBBI, DL, TII->get(ARM::tSUBspi), ARM::SP)
1266       .addReg(ARM::SP)
1267       .addImm(CMSE_FP_SAVE_SIZE >> 2)
1268       .add(predOps(ARMCC::AL));
1269 
1270   // Use ScratchRegs to store the fp regs
1271   std::vector<std::tuple<unsigned, unsigned, unsigned>> ClearedFPRegs;
1272   std::vector<unsigned> NonclearedFPRegs;
1273   for (const MachineOperand &Op : MBBI->operands()) {
1274     if (Op.isReg() && Op.isUse()) {
1275       unsigned Reg = Op.getReg();
1276       assert(!ARM::DPRRegClass.contains(Reg) ||
1277              ARM::DPR_VFP2RegClass.contains(Reg));
1278       assert(!ARM::QPRRegClass.contains(Reg));
1279       if (ARM::DPR_VFP2RegClass.contains(Reg)) {
1280         if (ScratchRegs.size() >= 2) {
1281           unsigned SaveReg2 = ScratchRegs.pop_back_val();
1282           unsigned SaveReg1 = ScratchRegs.pop_back_val();
1283           ClearedFPRegs.emplace_back(Reg, SaveReg1, SaveReg2);
1284 
1285           // Save the fp register to the normal registers
1286           BuildMI(MBB, MBBI, DL, TII->get(ARM::VMOVRRD))
1287               .addReg(SaveReg1, RegState::Define)
1288               .addReg(SaveReg2, RegState::Define)
1289               .addReg(Reg)
1290               .add(predOps(ARMCC::AL));
1291         } else {
1292           NonclearedFPRegs.push_back(Reg);
1293         }
1294       } else if (ARM::SPRRegClass.contains(Reg)) {
1295         if (ScratchRegs.size() >= 1) {
1296           unsigned SaveReg = ScratchRegs.pop_back_val();
1297           ClearedFPRegs.emplace_back(Reg, SaveReg, 0);
1298 
1299           // Save the fp register to the normal registers
1300           BuildMI(MBB, MBBI, DL, TII->get(ARM::VMOVRS), SaveReg)
1301               .addReg(Reg)
1302               .add(predOps(ARMCC::AL));
1303         } else {
1304           NonclearedFPRegs.push_back(Reg);
1305         }
1306       }
1307     }
1308   }
1309 
1310   bool passesFPReg = (!NonclearedFPRegs.empty() || !ClearedFPRegs.empty());
1311 
1312   // Lazy store all fp registers to the stack
1313   MachineInstrBuilder VLSTM = BuildMI(MBB, MBBI, DL, TII->get(ARM::VLSTM))
1314                                   .addReg(ARM::SP)
1315                                   .add(predOps(ARMCC::AL));
1316   for (auto R : {ARM::VPR, ARM::FPSCR, ARM::FPSCR_NZCV, ARM::Q0, ARM::Q1,
1317                  ARM::Q2, ARM::Q3, ARM::Q4, ARM::Q5, ARM::Q6, ARM::Q7})
1318     VLSTM.addReg(R, RegState::Implicit |
1319                         (LiveRegs.contains(R) ? 0 : RegState::Undef));
1320 
1321   // Restore all arguments
1322   for (const auto &Regs : ClearedFPRegs) {
1323     unsigned Reg, SaveReg1, SaveReg2;
1324     std::tie(Reg, SaveReg1, SaveReg2) = Regs;
1325     if (ARM::DPR_VFP2RegClass.contains(Reg))
1326       BuildMI(MBB, MBBI, DL, TII->get(ARM::VMOVDRR), Reg)
1327           .addReg(SaveReg1)
1328           .addReg(SaveReg2)
1329           .add(predOps(ARMCC::AL));
1330     else if (ARM::SPRRegClass.contains(Reg))
1331       BuildMI(MBB, MBBI, DL, TII->get(ARM::VMOVSR), Reg)
1332           .addReg(SaveReg1)
1333           .add(predOps(ARMCC::AL));
1334   }
1335 
1336   for (unsigned Reg : NonclearedFPRegs) {
1337     if (ARM::DPR_VFP2RegClass.contains(Reg)) {
1338       if (STI->isLittle()) {
1339         BuildMI(MBB, MBBI, DL, TII->get(ARM::VLDRD), Reg)
1340             .addReg(ARM::SP)
1341             .addImm((Reg - ARM::D0) * 2)
1342             .add(predOps(ARMCC::AL));
1343       } else {
1344         // For big-endian targets we need to load the two subregisters of Reg
1345         // manually because VLDRD would load them in wrong order
1346         unsigned SReg0 = TRI->getSubReg(Reg, ARM::ssub_0);
1347         BuildMI(MBB, MBBI, DL, TII->get(ARM::VLDRS), SReg0)
1348             .addReg(ARM::SP)
1349             .addImm((Reg - ARM::D0) * 2)
1350             .add(predOps(ARMCC::AL));
1351         BuildMI(MBB, MBBI, DL, TII->get(ARM::VLDRS), SReg0 + 1)
1352             .addReg(ARM::SP)
1353             .addImm((Reg - ARM::D0) * 2 + 1)
1354             .add(predOps(ARMCC::AL));
1355       }
1356     } else if (ARM::SPRRegClass.contains(Reg)) {
1357       BuildMI(MBB, MBBI, DL, TII->get(ARM::VLDRS), Reg)
1358           .addReg(ARM::SP)
1359           .addImm(Reg - ARM::S0)
1360           .add(predOps(ARMCC::AL));
1361     }
1362   }
1363   // restore FPSCR from stack and clear bits 0-4, 7, 28-31
1364   // The other bits are program global according to the AAPCS
1365   if (passesFPReg) {
1366     BuildMI(MBB, MBBI, DL, TII->get(ARM::t2LDRi8), SpareReg)
1367         .addReg(ARM::SP)
1368         .addImm(0x40)
1369         .add(predOps(ARMCC::AL));
1370     BuildMI(MBB, MBBI, DL, TII->get(ARM::t2BICri), SpareReg)
1371         .addReg(SpareReg)
1372         .addImm(0x0000009F)
1373         .add(predOps(ARMCC::AL))
1374         .add(condCodeOp());
1375     BuildMI(MBB, MBBI, DL, TII->get(ARM::t2BICri), SpareReg)
1376         .addReg(SpareReg)
1377         .addImm(0xF0000000)
1378         .add(predOps(ARMCC::AL))
1379         .add(condCodeOp());
1380     BuildMI(MBB, MBBI, DL, TII->get(ARM::VMSR))
1381         .addReg(SpareReg)
1382         .add(predOps(ARMCC::AL));
1383     // The ldr must happen after a floating point instruction. To prevent the
1384     // post-ra scheduler to mess with the order, we create a bundle.
1385     finalizeBundle(MBB, VLSTM->getIterator(), MBBI->getIterator());
1386   }
1387 }
1388 
CMSESaveClearFPRegsV81(MachineBasicBlock & MBB,MachineBasicBlock::iterator MBBI,DebugLoc & DL,const LivePhysRegs & LiveRegs)1389 void ARMExpandPseudo::CMSESaveClearFPRegsV81(MachineBasicBlock &MBB,
1390                                              MachineBasicBlock::iterator MBBI,
1391                                              DebugLoc &DL,
1392                                              const LivePhysRegs &LiveRegs) {
1393   BitVector ClearRegs(32, true);
1394   bool DefFP = determineFPRegsToClear(*MBBI, ClearRegs);
1395 
1396   // If the instruction does not write to a FP register and no elements were
1397   // removed from the set, then no FP registers were used to pass
1398   // arguments/returns.
1399   if (!DefFP && ClearRegs.count() == ClearRegs.size()) {
1400     // save space on stack for VLSTM
1401     BuildMI(MBB, MBBI, DL, TII->get(ARM::tSUBspi), ARM::SP)
1402         .addReg(ARM::SP)
1403         .addImm(CMSE_FP_SAVE_SIZE >> 2)
1404         .add(predOps(ARMCC::AL));
1405 
1406     // Lazy store all FP registers to the stack
1407     MachineInstrBuilder VLSTM = BuildMI(MBB, MBBI, DL, TII->get(ARM::VLSTM))
1408                                     .addReg(ARM::SP)
1409                                     .add(predOps(ARMCC::AL));
1410     for (auto R : {ARM::VPR, ARM::FPSCR, ARM::FPSCR_NZCV, ARM::Q0, ARM::Q1,
1411                    ARM::Q2, ARM::Q3, ARM::Q4, ARM::Q5, ARM::Q6, ARM::Q7})
1412       VLSTM.addReg(R, RegState::Implicit |
1413                           (LiveRegs.contains(R) ? 0 : RegState::Undef));
1414   } else {
1415     // Push all the callee-saved registers (s16-s31).
1416     MachineInstrBuilder VPUSH =
1417         BuildMI(MBB, MBBI, DL, TII->get(ARM::VSTMSDB_UPD), ARM::SP)
1418             .addReg(ARM::SP)
1419             .add(predOps(ARMCC::AL));
1420     for (int Reg = ARM::S16; Reg <= ARM::S31; ++Reg)
1421       VPUSH.addReg(Reg);
1422 
1423     // Clear FP registers with a VSCCLRM.
1424     (void)CMSEClearFPRegsV81(MBB, MBBI, ClearRegs);
1425 
1426     // Save floating-point context.
1427     BuildMI(MBB, MBBI, DL, TII->get(ARM::VSTR_FPCXTS_pre), ARM::SP)
1428         .addReg(ARM::SP)
1429         .addImm(-8)
1430         .add(predOps(ARMCC::AL));
1431   }
1432 }
1433 
1434 // Restore FP registers if present
CMSERestoreFPRegs(MachineBasicBlock & MBB,MachineBasicBlock::iterator MBBI,DebugLoc & DL,SmallVectorImpl<unsigned> & AvailableRegs)1435 void ARMExpandPseudo::CMSERestoreFPRegs(
1436     MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, DebugLoc &DL,
1437     SmallVectorImpl<unsigned> &AvailableRegs) {
1438   if (STI->hasV8_1MMainlineOps())
1439     CMSERestoreFPRegsV81(MBB, MBBI, DL, AvailableRegs);
1440   else
1441     CMSERestoreFPRegsV8(MBB, MBBI, DL, AvailableRegs);
1442 }
1443 
CMSERestoreFPRegsV8(MachineBasicBlock & MBB,MachineBasicBlock::iterator MBBI,DebugLoc & DL,SmallVectorImpl<unsigned> & AvailableRegs)1444 void ARMExpandPseudo::CMSERestoreFPRegsV8(
1445     MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, DebugLoc &DL,
1446     SmallVectorImpl<unsigned> &AvailableRegs) {
1447   if (!STI->hasFPRegs())
1448     return;
1449 
1450   // Use AvailableRegs to store the fp regs
1451   std::vector<std::tuple<unsigned, unsigned, unsigned>> ClearedFPRegs;
1452   std::vector<unsigned> NonclearedFPRegs;
1453   for (const MachineOperand &Op : MBBI->operands()) {
1454     if (Op.isReg() && Op.isDef()) {
1455       unsigned Reg = Op.getReg();
1456       assert(!ARM::DPRRegClass.contains(Reg) ||
1457              ARM::DPR_VFP2RegClass.contains(Reg));
1458       assert(!ARM::QPRRegClass.contains(Reg));
1459       if (ARM::DPR_VFP2RegClass.contains(Reg)) {
1460         if (AvailableRegs.size() >= 2) {
1461           unsigned SaveReg2 = AvailableRegs.pop_back_val();
1462           unsigned SaveReg1 = AvailableRegs.pop_back_val();
1463           ClearedFPRegs.emplace_back(Reg, SaveReg1, SaveReg2);
1464 
1465           // Save the fp register to the normal registers
1466           BuildMI(MBB, MBBI, DL, TII->get(ARM::VMOVRRD))
1467               .addReg(SaveReg1, RegState::Define)
1468               .addReg(SaveReg2, RegState::Define)
1469               .addReg(Reg)
1470               .add(predOps(ARMCC::AL));
1471         } else {
1472           NonclearedFPRegs.push_back(Reg);
1473         }
1474       } else if (ARM::SPRRegClass.contains(Reg)) {
1475         if (AvailableRegs.size() >= 1) {
1476           unsigned SaveReg = AvailableRegs.pop_back_val();
1477           ClearedFPRegs.emplace_back(Reg, SaveReg, 0);
1478 
1479           // Save the fp register to the normal registers
1480           BuildMI(MBB, MBBI, DL, TII->get(ARM::VMOVRS), SaveReg)
1481               .addReg(Reg)
1482               .add(predOps(ARMCC::AL));
1483         } else {
1484           NonclearedFPRegs.push_back(Reg);
1485         }
1486       }
1487     }
1488   }
1489 
1490   // Push FP regs that cannot be restored via normal registers on the stack
1491   for (unsigned Reg : NonclearedFPRegs) {
1492     if (ARM::DPR_VFP2RegClass.contains(Reg))
1493       BuildMI(MBB, MBBI, DL, TII->get(ARM::VSTRD), Reg)
1494           .addReg(ARM::SP)
1495           .addImm((Reg - ARM::D0) * 2)
1496           .add(predOps(ARMCC::AL));
1497     else if (ARM::SPRRegClass.contains(Reg))
1498       BuildMI(MBB, MBBI, DL, TII->get(ARM::VSTRS), Reg)
1499           .addReg(ARM::SP)
1500           .addImm(Reg - ARM::S0)
1501           .add(predOps(ARMCC::AL));
1502   }
1503 
1504   // Lazy load fp regs from stack
1505   BuildMI(MBB, MBBI, DL, TII->get(ARM::VLLDM))
1506       .addReg(ARM::SP)
1507       .add(predOps(ARMCC::AL));
1508 
1509   // Restore all FP registers via normal registers
1510   for (const auto &Regs : ClearedFPRegs) {
1511     unsigned Reg, SaveReg1, SaveReg2;
1512     std::tie(Reg, SaveReg1, SaveReg2) = Regs;
1513     if (ARM::DPR_VFP2RegClass.contains(Reg))
1514       BuildMI(MBB, MBBI, DL, TII->get(ARM::VMOVDRR), Reg)
1515           .addReg(SaveReg1)
1516           .addReg(SaveReg2)
1517           .add(predOps(ARMCC::AL));
1518     else if (ARM::SPRRegClass.contains(Reg))
1519       BuildMI(MBB, MBBI, DL, TII->get(ARM::VMOVSR), Reg)
1520           .addReg(SaveReg1)
1521           .add(predOps(ARMCC::AL));
1522   }
1523 
1524   // Pop the stack space
1525   BuildMI(MBB, MBBI, DL, TII->get(ARM::tADDspi), ARM::SP)
1526       .addReg(ARM::SP)
1527       .addImm(CMSE_FP_SAVE_SIZE >> 2)
1528       .add(predOps(ARMCC::AL));
1529 }
1530 
definesOrUsesFPReg(const MachineInstr & MI)1531 static bool definesOrUsesFPReg(const MachineInstr &MI) {
1532   for (const MachineOperand &Op : MI.operands()) {
1533     if (!Op.isReg())
1534       continue;
1535     unsigned Reg = Op.getReg();
1536     if ((Reg >= ARM::Q0 && Reg <= ARM::Q7) ||
1537         (Reg >= ARM::D0 && Reg <= ARM::D15) ||
1538         (Reg >= ARM::S0 && Reg <= ARM::S31))
1539       return true;
1540   }
1541   return false;
1542 }
1543 
CMSERestoreFPRegsV81(MachineBasicBlock & MBB,MachineBasicBlock::iterator MBBI,DebugLoc & DL,SmallVectorImpl<unsigned> & AvailableRegs)1544 void ARMExpandPseudo::CMSERestoreFPRegsV81(
1545     MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, DebugLoc &DL,
1546     SmallVectorImpl<unsigned> &AvailableRegs) {
1547   if (!definesOrUsesFPReg(*MBBI)) {
1548     // Load FP registers from stack.
1549     BuildMI(MBB, MBBI, DL, TII->get(ARM::VLLDM))
1550         .addReg(ARM::SP)
1551         .add(predOps(ARMCC::AL));
1552 
1553     // Pop the stack space
1554     BuildMI(MBB, MBBI, DL, TII->get(ARM::tADDspi), ARM::SP)
1555         .addReg(ARM::SP)
1556         .addImm(CMSE_FP_SAVE_SIZE >> 2)
1557         .add(predOps(ARMCC::AL));
1558   } else {
1559     // Restore the floating point context.
1560     BuildMI(MBB, MBBI, MBBI->getDebugLoc(), TII->get(ARM::VLDR_FPCXTS_post),
1561             ARM::SP)
1562         .addReg(ARM::SP)
1563         .addImm(8)
1564         .add(predOps(ARMCC::AL));
1565 
1566     // Pop all the callee-saved registers (s16-s31).
1567     MachineInstrBuilder VPOP =
1568         BuildMI(MBB, MBBI, DL, TII->get(ARM::VLDMSIA_UPD), ARM::SP)
1569             .addReg(ARM::SP)
1570             .add(predOps(ARMCC::AL));
1571     for (int Reg = ARM::S16; Reg <= ARM::S31; ++Reg)
1572       VPOP.addReg(Reg, RegState::Define);
1573   }
1574 }
1575 
1576 /// Expand a CMP_SWAP pseudo-inst to an ldrex/strex loop as simply as
1577 /// possible. This only gets used at -O0 so we don't care about efficiency of
1578 /// the generated code.
ExpandCMP_SWAP(MachineBasicBlock & MBB,MachineBasicBlock::iterator MBBI,unsigned LdrexOp,unsigned StrexOp,unsigned UxtOp,MachineBasicBlock::iterator & NextMBBI)1579 bool ARMExpandPseudo::ExpandCMP_SWAP(MachineBasicBlock &MBB,
1580                                      MachineBasicBlock::iterator MBBI,
1581                                      unsigned LdrexOp, unsigned StrexOp,
1582                                      unsigned UxtOp,
1583                                      MachineBasicBlock::iterator &NextMBBI) {
1584   bool IsThumb = STI->isThumb();
1585   MachineInstr &MI = *MBBI;
1586   DebugLoc DL = MI.getDebugLoc();
1587   const MachineOperand &Dest = MI.getOperand(0);
1588   Register TempReg = MI.getOperand(1).getReg();
1589   // Duplicating undef operands into 2 instructions does not guarantee the same
1590   // value on both; However undef should be replaced by xzr anyway.
1591   assert(!MI.getOperand(2).isUndef() && "cannot handle undef");
1592   Register AddrReg = MI.getOperand(2).getReg();
1593   Register DesiredReg = MI.getOperand(3).getReg();
1594   Register NewReg = MI.getOperand(4).getReg();
1595 
1596   if (IsThumb) {
1597     assert(STI->hasV8MBaselineOps() &&
1598            "CMP_SWAP not expected to be custom expanded for Thumb1");
1599     assert((UxtOp == 0 || UxtOp == ARM::tUXTB || UxtOp == ARM::tUXTH) &&
1600            "ARMv8-M.baseline does not have t2UXTB/t2UXTH");
1601     assert(ARM::tGPRRegClass.contains(DesiredReg) &&
1602            "DesiredReg used for UXT op must be tGPR");
1603   }
1604 
1605   MachineFunction *MF = MBB.getParent();
1606   auto LoadCmpBB = MF->CreateMachineBasicBlock(MBB.getBasicBlock());
1607   auto StoreBB = MF->CreateMachineBasicBlock(MBB.getBasicBlock());
1608   auto DoneBB = MF->CreateMachineBasicBlock(MBB.getBasicBlock());
1609 
1610   MF->insert(++MBB.getIterator(), LoadCmpBB);
1611   MF->insert(++LoadCmpBB->getIterator(), StoreBB);
1612   MF->insert(++StoreBB->getIterator(), DoneBB);
1613 
1614   if (UxtOp) {
1615     MachineInstrBuilder MIB =
1616         BuildMI(MBB, MBBI, DL, TII->get(UxtOp), DesiredReg)
1617             .addReg(DesiredReg, RegState::Kill);
1618     if (!IsThumb)
1619       MIB.addImm(0);
1620     MIB.add(predOps(ARMCC::AL));
1621   }
1622 
1623   // .Lloadcmp:
1624   //     ldrex rDest, [rAddr]
1625   //     cmp rDest, rDesired
1626   //     bne .Ldone
1627 
1628   MachineInstrBuilder MIB;
1629   MIB = BuildMI(LoadCmpBB, DL, TII->get(LdrexOp), Dest.getReg());
1630   MIB.addReg(AddrReg);
1631   if (LdrexOp == ARM::t2LDREX)
1632     MIB.addImm(0); // a 32-bit Thumb ldrex (only) allows an offset.
1633   MIB.add(predOps(ARMCC::AL));
1634 
1635   unsigned CMPrr = IsThumb ? ARM::tCMPhir : ARM::CMPrr;
1636   BuildMI(LoadCmpBB, DL, TII->get(CMPrr))
1637       .addReg(Dest.getReg(), getKillRegState(Dest.isDead()))
1638       .addReg(DesiredReg)
1639       .add(predOps(ARMCC::AL));
1640   unsigned Bcc = IsThumb ? ARM::tBcc : ARM::Bcc;
1641   BuildMI(LoadCmpBB, DL, TII->get(Bcc))
1642       .addMBB(DoneBB)
1643       .addImm(ARMCC::NE)
1644       .addReg(ARM::CPSR, RegState::Kill);
1645   LoadCmpBB->addSuccessor(DoneBB);
1646   LoadCmpBB->addSuccessor(StoreBB);
1647 
1648   // .Lstore:
1649   //     strex rTempReg, rNew, [rAddr]
1650   //     cmp rTempReg, #0
1651   //     bne .Lloadcmp
1652   MIB = BuildMI(StoreBB, DL, TII->get(StrexOp), TempReg)
1653     .addReg(NewReg)
1654     .addReg(AddrReg);
1655   if (StrexOp == ARM::t2STREX)
1656     MIB.addImm(0); // a 32-bit Thumb strex (only) allows an offset.
1657   MIB.add(predOps(ARMCC::AL));
1658 
1659   unsigned CMPri = IsThumb ? ARM::t2CMPri : ARM::CMPri;
1660   BuildMI(StoreBB, DL, TII->get(CMPri))
1661       .addReg(TempReg, RegState::Kill)
1662       .addImm(0)
1663       .add(predOps(ARMCC::AL));
1664   BuildMI(StoreBB, DL, TII->get(Bcc))
1665       .addMBB(LoadCmpBB)
1666       .addImm(ARMCC::NE)
1667       .addReg(ARM::CPSR, RegState::Kill);
1668   StoreBB->addSuccessor(LoadCmpBB);
1669   StoreBB->addSuccessor(DoneBB);
1670 
1671   DoneBB->splice(DoneBB->end(), &MBB, MI, MBB.end());
1672   DoneBB->transferSuccessors(&MBB);
1673 
1674   MBB.addSuccessor(LoadCmpBB);
1675 
1676   NextMBBI = MBB.end();
1677   MI.eraseFromParent();
1678 
1679   // Recompute livein lists.
1680   LivePhysRegs LiveRegs;
1681   computeAndAddLiveIns(LiveRegs, *DoneBB);
1682   computeAndAddLiveIns(LiveRegs, *StoreBB);
1683   computeAndAddLiveIns(LiveRegs, *LoadCmpBB);
1684   // Do an extra pass around the loop to get loop carried registers right.
1685   StoreBB->clearLiveIns();
1686   computeAndAddLiveIns(LiveRegs, *StoreBB);
1687   LoadCmpBB->clearLiveIns();
1688   computeAndAddLiveIns(LiveRegs, *LoadCmpBB);
1689 
1690   return true;
1691 }
1692 
1693 /// ARM's ldrexd/strexd take a consecutive register pair (represented as a
1694 /// single GPRPair register), Thumb's take two separate registers so we need to
1695 /// extract the subregs from the pair.
addExclusiveRegPair(MachineInstrBuilder & MIB,MachineOperand & Reg,unsigned Flags,bool IsThumb,const TargetRegisterInfo * TRI)1696 static void addExclusiveRegPair(MachineInstrBuilder &MIB, MachineOperand &Reg,
1697                                 unsigned Flags, bool IsThumb,
1698                                 const TargetRegisterInfo *TRI) {
1699   if (IsThumb) {
1700     Register RegLo = TRI->getSubReg(Reg.getReg(), ARM::gsub_0);
1701     Register RegHi = TRI->getSubReg(Reg.getReg(), ARM::gsub_1);
1702     MIB.addReg(RegLo, Flags);
1703     MIB.addReg(RegHi, Flags);
1704   } else
1705     MIB.addReg(Reg.getReg(), Flags);
1706 }
1707 
1708 /// Expand a 64-bit CMP_SWAP to an ldrexd/strexd loop.
ExpandCMP_SWAP_64(MachineBasicBlock & MBB,MachineBasicBlock::iterator MBBI,MachineBasicBlock::iterator & NextMBBI)1709 bool ARMExpandPseudo::ExpandCMP_SWAP_64(MachineBasicBlock &MBB,
1710                                         MachineBasicBlock::iterator MBBI,
1711                                         MachineBasicBlock::iterator &NextMBBI) {
1712   bool IsThumb = STI->isThumb();
1713   MachineInstr &MI = *MBBI;
1714   DebugLoc DL = MI.getDebugLoc();
1715   MachineOperand &Dest = MI.getOperand(0);
1716   Register TempReg = MI.getOperand(1).getReg();
1717   // Duplicating undef operands into 2 instructions does not guarantee the same
1718   // value on both; However undef should be replaced by xzr anyway.
1719   assert(!MI.getOperand(2).isUndef() && "cannot handle undef");
1720   Register AddrReg = MI.getOperand(2).getReg();
1721   Register DesiredReg = MI.getOperand(3).getReg();
1722   MachineOperand New = MI.getOperand(4);
1723   New.setIsKill(false);
1724 
1725   Register DestLo = TRI->getSubReg(Dest.getReg(), ARM::gsub_0);
1726   Register DestHi = TRI->getSubReg(Dest.getReg(), ARM::gsub_1);
1727   Register DesiredLo = TRI->getSubReg(DesiredReg, ARM::gsub_0);
1728   Register DesiredHi = TRI->getSubReg(DesiredReg, ARM::gsub_1);
1729 
1730   MachineFunction *MF = MBB.getParent();
1731   auto LoadCmpBB = MF->CreateMachineBasicBlock(MBB.getBasicBlock());
1732   auto StoreBB = MF->CreateMachineBasicBlock(MBB.getBasicBlock());
1733   auto DoneBB = MF->CreateMachineBasicBlock(MBB.getBasicBlock());
1734 
1735   MF->insert(++MBB.getIterator(), LoadCmpBB);
1736   MF->insert(++LoadCmpBB->getIterator(), StoreBB);
1737   MF->insert(++StoreBB->getIterator(), DoneBB);
1738 
1739   // .Lloadcmp:
1740   //     ldrexd rDestLo, rDestHi, [rAddr]
1741   //     cmp rDestLo, rDesiredLo
1742   //     sbcs dead rTempReg, rDestHi, rDesiredHi
1743   //     bne .Ldone
1744   unsigned LDREXD = IsThumb ? ARM::t2LDREXD : ARM::LDREXD;
1745   MachineInstrBuilder MIB;
1746   MIB = BuildMI(LoadCmpBB, DL, TII->get(LDREXD));
1747   addExclusiveRegPair(MIB, Dest, RegState::Define, IsThumb, TRI);
1748   MIB.addReg(AddrReg).add(predOps(ARMCC::AL));
1749 
1750   unsigned CMPrr = IsThumb ? ARM::tCMPhir : ARM::CMPrr;
1751   BuildMI(LoadCmpBB, DL, TII->get(CMPrr))
1752       .addReg(DestLo, getKillRegState(Dest.isDead()))
1753       .addReg(DesiredLo)
1754       .add(predOps(ARMCC::AL));
1755 
1756   BuildMI(LoadCmpBB, DL, TII->get(CMPrr))
1757       .addReg(DestHi, getKillRegState(Dest.isDead()))
1758       .addReg(DesiredHi)
1759       .addImm(ARMCC::EQ).addReg(ARM::CPSR, RegState::Kill);
1760 
1761   unsigned Bcc = IsThumb ? ARM::tBcc : ARM::Bcc;
1762   BuildMI(LoadCmpBB, DL, TII->get(Bcc))
1763       .addMBB(DoneBB)
1764       .addImm(ARMCC::NE)
1765       .addReg(ARM::CPSR, RegState::Kill);
1766   LoadCmpBB->addSuccessor(DoneBB);
1767   LoadCmpBB->addSuccessor(StoreBB);
1768 
1769   // .Lstore:
1770   //     strexd rTempReg, rNewLo, rNewHi, [rAddr]
1771   //     cmp rTempReg, #0
1772   //     bne .Lloadcmp
1773   unsigned STREXD = IsThumb ? ARM::t2STREXD : ARM::STREXD;
1774   MIB = BuildMI(StoreBB, DL, TII->get(STREXD), TempReg);
1775   unsigned Flags = getKillRegState(New.isDead());
1776   addExclusiveRegPair(MIB, New, Flags, IsThumb, TRI);
1777   MIB.addReg(AddrReg).add(predOps(ARMCC::AL));
1778 
1779   unsigned CMPri = IsThumb ? ARM::t2CMPri : ARM::CMPri;
1780   BuildMI(StoreBB, DL, TII->get(CMPri))
1781       .addReg(TempReg, RegState::Kill)
1782       .addImm(0)
1783       .add(predOps(ARMCC::AL));
1784   BuildMI(StoreBB, DL, TII->get(Bcc))
1785       .addMBB(LoadCmpBB)
1786       .addImm(ARMCC::NE)
1787       .addReg(ARM::CPSR, RegState::Kill);
1788   StoreBB->addSuccessor(LoadCmpBB);
1789   StoreBB->addSuccessor(DoneBB);
1790 
1791   DoneBB->splice(DoneBB->end(), &MBB, MI, MBB.end());
1792   DoneBB->transferSuccessors(&MBB);
1793 
1794   MBB.addSuccessor(LoadCmpBB);
1795 
1796   NextMBBI = MBB.end();
1797   MI.eraseFromParent();
1798 
1799   // Recompute livein lists.
1800   LivePhysRegs LiveRegs;
1801   computeAndAddLiveIns(LiveRegs, *DoneBB);
1802   computeAndAddLiveIns(LiveRegs, *StoreBB);
1803   computeAndAddLiveIns(LiveRegs, *LoadCmpBB);
1804   // Do an extra pass around the loop to get loop carried registers right.
1805   StoreBB->clearLiveIns();
1806   computeAndAddLiveIns(LiveRegs, *StoreBB);
1807   LoadCmpBB->clearLiveIns();
1808   computeAndAddLiveIns(LiveRegs, *LoadCmpBB);
1809 
1810   return true;
1811 }
1812 
CMSEPushCalleeSaves(const TargetInstrInfo & TII,MachineBasicBlock & MBB,MachineBasicBlock::iterator MBBI,int JumpReg,const LivePhysRegs & LiveRegs,bool Thumb1Only)1813 static void CMSEPushCalleeSaves(const TargetInstrInfo &TII,
1814                                 MachineBasicBlock &MBB,
1815                                 MachineBasicBlock::iterator MBBI, int JumpReg,
1816                                 const LivePhysRegs &LiveRegs, bool Thumb1Only) {
1817   const DebugLoc &DL = MBBI->getDebugLoc();
1818   if (Thumb1Only) { // push Lo and Hi regs separately
1819     MachineInstrBuilder PushMIB =
1820         BuildMI(MBB, MBBI, DL, TII.get(ARM::tPUSH)).add(predOps(ARMCC::AL));
1821     for (int Reg = ARM::R4; Reg < ARM::R8; ++Reg) {
1822       PushMIB.addReg(
1823           Reg, Reg == JumpReg || LiveRegs.contains(Reg) ? 0 : RegState::Undef);
1824     }
1825 
1826     // Thumb1 can only tPUSH low regs, so we copy the high regs to the low
1827     // regs that we just saved and push the low regs again, taking care to
1828     // not clobber JumpReg. If JumpReg is one of the low registers, push first
1829     // the values of r9-r11, and then r8. That would leave them ordered in
1830     // memory, and allow us to later pop them with a single instructions.
1831     // FIXME: Could also use any of r0-r3 that are free (including in the
1832     // first PUSH above).
1833     for (int LoReg = ARM::R7, HiReg = ARM::R11; LoReg >= ARM::R4; --LoReg) {
1834       if (JumpReg == LoReg)
1835         continue;
1836       BuildMI(MBB, MBBI, DL, TII.get(ARM::tMOVr), LoReg)
1837           .addReg(HiReg, LiveRegs.contains(HiReg) ? 0 : RegState::Undef)
1838           .add(predOps(ARMCC::AL));
1839       --HiReg;
1840     }
1841     MachineInstrBuilder PushMIB2 =
1842         BuildMI(MBB, MBBI, DL, TII.get(ARM::tPUSH)).add(predOps(ARMCC::AL));
1843     for (int Reg = ARM::R4; Reg < ARM::R8; ++Reg) {
1844       if (Reg == JumpReg)
1845         continue;
1846       PushMIB2.addReg(Reg, RegState::Kill);
1847     }
1848 
1849     // If we couldn't use a low register for temporary storage (because it was
1850     // the JumpReg), use r4 or r5, whichever is not JumpReg. It has already been
1851     // saved.
1852     if (JumpReg >= ARM::R4 && JumpReg <= ARM::R7) {
1853       int LoReg = JumpReg == ARM::R4 ? ARM::R5 : ARM::R4;
1854       BuildMI(MBB, MBBI, DL, TII.get(ARM::tMOVr), LoReg)
1855           .addReg(ARM::R8, LiveRegs.contains(ARM::R8) ? 0 : RegState::Undef)
1856           .add(predOps(ARMCC::AL));
1857       BuildMI(MBB, MBBI, DL, TII.get(ARM::tPUSH))
1858           .add(predOps(ARMCC::AL))
1859           .addReg(LoReg, RegState::Kill);
1860     }
1861   } else { // push Lo and Hi registers with a single instruction
1862     MachineInstrBuilder PushMIB =
1863         BuildMI(MBB, MBBI, DL, TII.get(ARM::t2STMDB_UPD), ARM::SP)
1864             .addReg(ARM::SP)
1865             .add(predOps(ARMCC::AL));
1866     for (int Reg = ARM::R4; Reg < ARM::R12; ++Reg) {
1867       PushMIB.addReg(
1868           Reg, Reg == JumpReg || LiveRegs.contains(Reg) ? 0 : RegState::Undef);
1869     }
1870   }
1871 }
1872 
CMSEPopCalleeSaves(const TargetInstrInfo & TII,MachineBasicBlock & MBB,MachineBasicBlock::iterator MBBI,int JumpReg,bool Thumb1Only)1873 static void CMSEPopCalleeSaves(const TargetInstrInfo &TII,
1874                                MachineBasicBlock &MBB,
1875                                MachineBasicBlock::iterator MBBI, int JumpReg,
1876                                bool Thumb1Only) {
1877   const DebugLoc &DL = MBBI->getDebugLoc();
1878   if (Thumb1Only) {
1879     MachineInstrBuilder PopMIB =
1880         BuildMI(MBB, MBBI, DL, TII.get(ARM::tPOP)).add(predOps(ARMCC::AL));
1881     for (int R = 0; R < 4; ++R) {
1882       PopMIB.addReg(ARM::R4 + R, RegState::Define);
1883       BuildMI(MBB, MBBI, DL, TII.get(ARM::tMOVr), ARM::R8 + R)
1884           .addReg(ARM::R4 + R, RegState::Kill)
1885           .add(predOps(ARMCC::AL));
1886     }
1887     MachineInstrBuilder PopMIB2 =
1888         BuildMI(MBB, MBBI, DL, TII.get(ARM::tPOP)).add(predOps(ARMCC::AL));
1889     for (int R = 0; R < 4; ++R)
1890       PopMIB2.addReg(ARM::R4 + R, RegState::Define);
1891   } else { // pop Lo and Hi registers with a single instruction
1892     MachineInstrBuilder PopMIB =
1893         BuildMI(MBB, MBBI, DL, TII.get(ARM::t2LDMIA_UPD), ARM::SP)
1894             .addReg(ARM::SP)
1895             .add(predOps(ARMCC::AL));
1896     for (int Reg = ARM::R4; Reg < ARM::R12; ++Reg)
1897       PopMIB.addReg(Reg, RegState::Define);
1898   }
1899 }
1900 
ExpandMI(MachineBasicBlock & MBB,MachineBasicBlock::iterator MBBI,MachineBasicBlock::iterator & NextMBBI)1901 bool ARMExpandPseudo::ExpandMI(MachineBasicBlock &MBB,
1902                                MachineBasicBlock::iterator MBBI,
1903                                MachineBasicBlock::iterator &NextMBBI) {
1904   MachineInstr &MI = *MBBI;
1905   unsigned Opcode = MI.getOpcode();
1906   switch (Opcode) {
1907     default:
1908       return false;
1909 
1910     case ARM::VBSPd:
1911     case ARM::VBSPq: {
1912       Register DstReg = MI.getOperand(0).getReg();
1913       if (DstReg == MI.getOperand(3).getReg()) {
1914         // Expand to VBIT
1915         unsigned NewOpc = Opcode == ARM::VBSPd ? ARM::VBITd : ARM::VBITq;
1916         BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(NewOpc))
1917             .add(MI.getOperand(0))
1918             .add(MI.getOperand(3))
1919             .add(MI.getOperand(2))
1920             .add(MI.getOperand(1))
1921             .addImm(MI.getOperand(4).getImm())
1922             .add(MI.getOperand(5));
1923       } else if (DstReg == MI.getOperand(2).getReg()) {
1924         // Expand to VBIF
1925         unsigned NewOpc = Opcode == ARM::VBSPd ? ARM::VBIFd : ARM::VBIFq;
1926         BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(NewOpc))
1927             .add(MI.getOperand(0))
1928             .add(MI.getOperand(2))
1929             .add(MI.getOperand(3))
1930             .add(MI.getOperand(1))
1931             .addImm(MI.getOperand(4).getImm())
1932             .add(MI.getOperand(5));
1933       } else {
1934         // Expand to VBSL
1935         unsigned NewOpc = Opcode == ARM::VBSPd ? ARM::VBSLd : ARM::VBSLq;
1936         if (DstReg == MI.getOperand(1).getReg()) {
1937           BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(NewOpc))
1938               .add(MI.getOperand(0))
1939               .add(MI.getOperand(1))
1940               .add(MI.getOperand(2))
1941               .add(MI.getOperand(3))
1942               .addImm(MI.getOperand(4).getImm())
1943               .add(MI.getOperand(5));
1944         } else {
1945           // Use move to satisfy constraints
1946           unsigned MoveOpc = Opcode == ARM::VBSPd ? ARM::VORRd : ARM::VORRq;
1947           BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(MoveOpc))
1948               .addReg(DstReg,
1949                       RegState::Define |
1950                           getRenamableRegState(MI.getOperand(0).isRenamable()))
1951               .add(MI.getOperand(1))
1952               .add(MI.getOperand(1))
1953               .addImm(MI.getOperand(4).getImm())
1954               .add(MI.getOperand(5));
1955           BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(NewOpc))
1956               .add(MI.getOperand(0))
1957               .addReg(DstReg,
1958                       RegState::Kill |
1959                           getRenamableRegState(MI.getOperand(0).isRenamable()))
1960               .add(MI.getOperand(2))
1961               .add(MI.getOperand(3))
1962               .addImm(MI.getOperand(4).getImm())
1963               .add(MI.getOperand(5));
1964         }
1965       }
1966       MI.eraseFromParent();
1967       return true;
1968     }
1969 
1970     case ARM::TCRETURNdi:
1971     case ARM::TCRETURNri: {
1972       MachineBasicBlock::iterator MBBI = MBB.getLastNonDebugInstr();
1973       assert(MBBI->isReturn() &&
1974              "Can only insert epilog into returning blocks");
1975       unsigned RetOpcode = MBBI->getOpcode();
1976       DebugLoc dl = MBBI->getDebugLoc();
1977       const ARMBaseInstrInfo &TII = *static_cast<const ARMBaseInstrInfo *>(
1978           MBB.getParent()->getSubtarget().getInstrInfo());
1979 
1980       // Tail call return: adjust the stack pointer and jump to callee.
1981       MBBI = MBB.getLastNonDebugInstr();
1982       MachineOperand &JumpTarget = MBBI->getOperand(0);
1983 
1984       // Jump to label or value in register.
1985       if (RetOpcode == ARM::TCRETURNdi) {
1986         unsigned TCOpcode =
1987             STI->isThumb()
1988                 ? (STI->isTargetMachO() ? ARM::tTAILJMPd : ARM::tTAILJMPdND)
1989                 : ARM::TAILJMPd;
1990         MachineInstrBuilder MIB = BuildMI(MBB, MBBI, dl, TII.get(TCOpcode));
1991         if (JumpTarget.isGlobal())
1992           MIB.addGlobalAddress(JumpTarget.getGlobal(), JumpTarget.getOffset(),
1993                                JumpTarget.getTargetFlags());
1994         else {
1995           assert(JumpTarget.isSymbol());
1996           MIB.addExternalSymbol(JumpTarget.getSymbolName(),
1997                                 JumpTarget.getTargetFlags());
1998         }
1999 
2000         // Add the default predicate in Thumb mode.
2001         if (STI->isThumb())
2002           MIB.add(predOps(ARMCC::AL));
2003       } else if (RetOpcode == ARM::TCRETURNri) {
2004         unsigned Opcode =
2005           STI->isThumb() ? ARM::tTAILJMPr
2006                          : (STI->hasV4TOps() ? ARM::TAILJMPr : ARM::TAILJMPr4);
2007         BuildMI(MBB, MBBI, dl,
2008                 TII.get(Opcode))
2009             .addReg(JumpTarget.getReg(), RegState::Kill);
2010       }
2011 
2012       auto NewMI = std::prev(MBBI);
2013       for (unsigned i = 1, e = MBBI->getNumOperands(); i != e; ++i)
2014         NewMI->addOperand(MBBI->getOperand(i));
2015 
2016 
2017       // Update call site info and delete the pseudo instruction TCRETURN.
2018       if (MI.isCandidateForCallSiteEntry())
2019         MI.getMF()->moveCallSiteInfo(&MI, &*NewMI);
2020       MBB.erase(MBBI);
2021 
2022       MBBI = NewMI;
2023       return true;
2024     }
2025     case ARM::tBXNS_RET: {
2026       MachineBasicBlock &AfterBB = CMSEClearFPRegs(MBB, MBBI);
2027 
2028       if (STI->hasV8_1MMainlineOps()) {
2029         // Restore the non-secure floating point context.
2030         BuildMI(MBB, MBBI, MBBI->getDebugLoc(),
2031                 TII->get(ARM::VLDR_FPCXTNS_post), ARM::SP)
2032             .addReg(ARM::SP)
2033             .addImm(4)
2034             .add(predOps(ARMCC::AL));
2035       }
2036 
2037       // Clear all GPR that are not a use of the return instruction.
2038       assert(llvm::all_of(MBBI->operands(), [](const MachineOperand &Op) {
2039         return !Op.isReg() || Op.getReg() != ARM::R12;
2040       }));
2041       SmallVector<unsigned, 5> ClearRegs;
2042       determineGPRegsToClear(
2043           *MBBI, {ARM::R0, ARM::R1, ARM::R2, ARM::R3, ARM::R12}, ClearRegs);
2044       CMSEClearGPRegs(AfterBB, AfterBB.end(), MBBI->getDebugLoc(), ClearRegs,
2045                       ARM::LR);
2046 
2047       MachineInstrBuilder NewMI =
2048           BuildMI(AfterBB, AfterBB.end(), MBBI->getDebugLoc(),
2049                   TII->get(ARM::tBXNS))
2050               .addReg(ARM::LR)
2051               .add(predOps(ARMCC::AL));
2052       for (const MachineOperand &Op : MI.operands())
2053         NewMI->addOperand(Op);
2054       MI.eraseFromParent();
2055       return true;
2056     }
2057     case ARM::tBLXNS_CALL: {
2058       DebugLoc DL = MBBI->getDebugLoc();
2059       unsigned JumpReg = MBBI->getOperand(0).getReg();
2060 
2061       // Figure out which registers are live at the point immediately before the
2062       // call. When we indiscriminately push a set of registers, the live
2063       // registers are added as ordinary use operands, whereas dead registers
2064       // are "undef".
2065       LivePhysRegs LiveRegs(*TRI);
2066       LiveRegs.addLiveOuts(MBB);
2067       for (const MachineInstr &MI : make_range(MBB.rbegin(), MBBI.getReverse()))
2068         LiveRegs.stepBackward(MI);
2069       LiveRegs.stepBackward(*MBBI);
2070 
2071       CMSEPushCalleeSaves(*TII, MBB, MBBI, JumpReg, LiveRegs,
2072                           AFI->isThumb1OnlyFunction());
2073 
2074       SmallVector<unsigned, 16> ClearRegs;
2075       determineGPRegsToClear(*MBBI,
2076                              {ARM::R0, ARM::R1, ARM::R2, ARM::R3, ARM::R4,
2077                               ARM::R5, ARM::R6, ARM::R7, ARM::R8, ARM::R9,
2078                               ARM::R10, ARM::R11, ARM::R12},
2079                              ClearRegs);
2080       auto OriginalClearRegs = ClearRegs;
2081 
2082       // Get the first cleared register as a scratch (to use later with tBIC).
2083       // We need to use the first so we can ensure it is a low register.
2084       unsigned ScratchReg = ClearRegs.front();
2085 
2086       // Clear LSB of JumpReg
2087       if (AFI->isThumb2Function()) {
2088         BuildMI(MBB, MBBI, DL, TII->get(ARM::t2BICri), JumpReg)
2089             .addReg(JumpReg)
2090             .addImm(1)
2091             .add(predOps(ARMCC::AL))
2092             .add(condCodeOp());
2093       } else {
2094         // We need to use an extra register to cope with 8M Baseline,
2095         // since we have saved all of the registers we are ok to trash a non
2096         // argument register here.
2097         BuildMI(MBB, MBBI, DL, TII->get(ARM::tMOVi8), ScratchReg)
2098             .add(condCodeOp())
2099             .addImm(1)
2100             .add(predOps(ARMCC::AL));
2101         BuildMI(MBB, MBBI, DL, TII->get(ARM::tBIC), JumpReg)
2102             .addReg(ARM::CPSR, RegState::Define)
2103             .addReg(JumpReg)
2104             .addReg(ScratchReg)
2105             .add(predOps(ARMCC::AL));
2106       }
2107 
2108       CMSESaveClearFPRegs(MBB, MBBI, DL, LiveRegs,
2109                           ClearRegs); // save+clear FP regs with ClearRegs
2110       CMSEClearGPRegs(MBB, MBBI, DL, ClearRegs, JumpReg);
2111 
2112       const MachineInstrBuilder NewCall =
2113           BuildMI(MBB, MBBI, DL, TII->get(ARM::tBLXNSr))
2114               .add(predOps(ARMCC::AL))
2115               .addReg(JumpReg, RegState::Kill);
2116 
2117       for (int I = 1, E = MI.getNumOperands(); I != E; ++I)
2118         NewCall->addOperand(MI.getOperand(I));
2119       if (MI.isCandidateForCallSiteEntry())
2120         MI.getMF()->moveCallSiteInfo(&MI, NewCall.getInstr());
2121 
2122       CMSERestoreFPRegs(MBB, MBBI, DL, OriginalClearRegs); // restore FP registers
2123 
2124       CMSEPopCalleeSaves(*TII, MBB, MBBI, JumpReg, AFI->isThumb1OnlyFunction());
2125 
2126       MI.eraseFromParent();
2127       return true;
2128     }
2129     case ARM::VMOVHcc:
2130     case ARM::VMOVScc:
2131     case ARM::VMOVDcc: {
2132       unsigned newOpc = Opcode != ARM::VMOVDcc ? ARM::VMOVS : ARM::VMOVD;
2133       BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(newOpc),
2134               MI.getOperand(1).getReg())
2135           .add(MI.getOperand(2))
2136           .addImm(MI.getOperand(3).getImm()) // 'pred'
2137           .add(MI.getOperand(4))
2138           .add(makeImplicit(MI.getOperand(1)));
2139 
2140       MI.eraseFromParent();
2141       return true;
2142     }
2143     case ARM::t2MOVCCr:
2144     case ARM::MOVCCr: {
2145       unsigned Opc = AFI->isThumbFunction() ? ARM::t2MOVr : ARM::MOVr;
2146       BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(Opc),
2147               MI.getOperand(1).getReg())
2148           .add(MI.getOperand(2))
2149           .addImm(MI.getOperand(3).getImm()) // 'pred'
2150           .add(MI.getOperand(4))
2151           .add(condCodeOp()) // 's' bit
2152           .add(makeImplicit(MI.getOperand(1)));
2153 
2154       MI.eraseFromParent();
2155       return true;
2156     }
2157     case ARM::MOVCCsi: {
2158       BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(ARM::MOVsi),
2159               (MI.getOperand(1).getReg()))
2160           .add(MI.getOperand(2))
2161           .addImm(MI.getOperand(3).getImm())
2162           .addImm(MI.getOperand(4).getImm()) // 'pred'
2163           .add(MI.getOperand(5))
2164           .add(condCodeOp()) // 's' bit
2165           .add(makeImplicit(MI.getOperand(1)));
2166 
2167       MI.eraseFromParent();
2168       return true;
2169     }
2170     case ARM::MOVCCsr: {
2171       BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(ARM::MOVsr),
2172               (MI.getOperand(1).getReg()))
2173           .add(MI.getOperand(2))
2174           .add(MI.getOperand(3))
2175           .addImm(MI.getOperand(4).getImm())
2176           .addImm(MI.getOperand(5).getImm()) // 'pred'
2177           .add(MI.getOperand(6))
2178           .add(condCodeOp()) // 's' bit
2179           .add(makeImplicit(MI.getOperand(1)));
2180 
2181       MI.eraseFromParent();
2182       return true;
2183     }
2184     case ARM::t2MOVCCi16:
2185     case ARM::MOVCCi16: {
2186       unsigned NewOpc = AFI->isThumbFunction() ? ARM::t2MOVi16 : ARM::MOVi16;
2187       BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(NewOpc),
2188               MI.getOperand(1).getReg())
2189           .addImm(MI.getOperand(2).getImm())
2190           .addImm(MI.getOperand(3).getImm()) // 'pred'
2191           .add(MI.getOperand(4))
2192           .add(makeImplicit(MI.getOperand(1)));
2193       MI.eraseFromParent();
2194       return true;
2195     }
2196     case ARM::t2MOVCCi:
2197     case ARM::MOVCCi: {
2198       unsigned Opc = AFI->isThumbFunction() ? ARM::t2MOVi : ARM::MOVi;
2199       BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(Opc),
2200               MI.getOperand(1).getReg())
2201           .addImm(MI.getOperand(2).getImm())
2202           .addImm(MI.getOperand(3).getImm()) // 'pred'
2203           .add(MI.getOperand(4))
2204           .add(condCodeOp()) // 's' bit
2205           .add(makeImplicit(MI.getOperand(1)));
2206 
2207       MI.eraseFromParent();
2208       return true;
2209     }
2210     case ARM::t2MVNCCi:
2211     case ARM::MVNCCi: {
2212       unsigned Opc = AFI->isThumbFunction() ? ARM::t2MVNi : ARM::MVNi;
2213       BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(Opc),
2214               MI.getOperand(1).getReg())
2215           .addImm(MI.getOperand(2).getImm())
2216           .addImm(MI.getOperand(3).getImm()) // 'pred'
2217           .add(MI.getOperand(4))
2218           .add(condCodeOp()) // 's' bit
2219           .add(makeImplicit(MI.getOperand(1)));
2220 
2221       MI.eraseFromParent();
2222       return true;
2223     }
2224     case ARM::t2MOVCClsl:
2225     case ARM::t2MOVCClsr:
2226     case ARM::t2MOVCCasr:
2227     case ARM::t2MOVCCror: {
2228       unsigned NewOpc;
2229       switch (Opcode) {
2230       case ARM::t2MOVCClsl: NewOpc = ARM::t2LSLri; break;
2231       case ARM::t2MOVCClsr: NewOpc = ARM::t2LSRri; break;
2232       case ARM::t2MOVCCasr: NewOpc = ARM::t2ASRri; break;
2233       case ARM::t2MOVCCror: NewOpc = ARM::t2RORri; break;
2234       default: llvm_unreachable("unexpeced conditional move");
2235       }
2236       BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(NewOpc),
2237               MI.getOperand(1).getReg())
2238           .add(MI.getOperand(2))
2239           .addImm(MI.getOperand(3).getImm())
2240           .addImm(MI.getOperand(4).getImm()) // 'pred'
2241           .add(MI.getOperand(5))
2242           .add(condCodeOp()) // 's' bit
2243           .add(makeImplicit(MI.getOperand(1)));
2244       MI.eraseFromParent();
2245       return true;
2246     }
2247     case ARM::Int_eh_sjlj_dispatchsetup: {
2248       MachineFunction &MF = *MI.getParent()->getParent();
2249       const ARMBaseInstrInfo *AII =
2250         static_cast<const ARMBaseInstrInfo*>(TII);
2251       const ARMBaseRegisterInfo &RI = AII->getRegisterInfo();
2252       // For functions using a base pointer, we rematerialize it (via the frame
2253       // pointer) here since eh.sjlj.setjmp and eh.sjlj.longjmp don't do it
2254       // for us. Otherwise, expand to nothing.
2255       if (RI.hasBasePointer(MF)) {
2256         int32_t NumBytes = AFI->getFramePtrSpillOffset();
2257         Register FramePtr = RI.getFrameRegister(MF);
2258         assert(MF.getSubtarget().getFrameLowering()->hasFP(MF) &&
2259                "base pointer without frame pointer?");
2260 
2261         if (AFI->isThumb2Function()) {
2262           emitT2RegPlusImmediate(MBB, MBBI, MI.getDebugLoc(), ARM::R6,
2263                                  FramePtr, -NumBytes, ARMCC::AL, 0, *TII);
2264         } else if (AFI->isThumbFunction()) {
2265           emitThumbRegPlusImmediate(MBB, MBBI, MI.getDebugLoc(), ARM::R6,
2266                                     FramePtr, -NumBytes, *TII, RI);
2267         } else {
2268           emitARMRegPlusImmediate(MBB, MBBI, MI.getDebugLoc(), ARM::R6,
2269                                   FramePtr, -NumBytes, ARMCC::AL, 0,
2270                                   *TII);
2271         }
2272         // If there's dynamic realignment, adjust for it.
2273         if (RI.hasStackRealignment(MF)) {
2274           MachineFrameInfo &MFI = MF.getFrameInfo();
2275           Align MaxAlign = MFI.getMaxAlign();
2276           assert (!AFI->isThumb1OnlyFunction());
2277           // Emit bic r6, r6, MaxAlign
2278           assert(MaxAlign <= Align(256) &&
2279                  "The BIC instruction cannot encode "
2280                  "immediates larger than 256 with all lower "
2281                  "bits set.");
2282           unsigned bicOpc = AFI->isThumbFunction() ?
2283             ARM::t2BICri : ARM::BICri;
2284           BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(bicOpc), ARM::R6)
2285               .addReg(ARM::R6, RegState::Kill)
2286               .addImm(MaxAlign.value() - 1)
2287               .add(predOps(ARMCC::AL))
2288               .add(condCodeOp());
2289         }
2290       }
2291       MI.eraseFromParent();
2292       return true;
2293     }
2294 
2295     case ARM::MOVsrl_flag:
2296     case ARM::MOVsra_flag: {
2297       // These are just fancy MOVs instructions.
2298       BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(ARM::MOVsi),
2299               MI.getOperand(0).getReg())
2300           .add(MI.getOperand(1))
2301           .addImm(ARM_AM::getSORegOpc(
2302               (Opcode == ARM::MOVsrl_flag ? ARM_AM::lsr : ARM_AM::asr), 1))
2303           .add(predOps(ARMCC::AL))
2304           .addReg(ARM::CPSR, RegState::Define);
2305       MI.eraseFromParent();
2306       return true;
2307     }
2308     case ARM::RRX: {
2309       // This encodes as "MOVs Rd, Rm, rrx
2310       MachineInstrBuilder MIB =
2311           BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(ARM::MOVsi),
2312                   MI.getOperand(0).getReg())
2313               .add(MI.getOperand(1))
2314               .addImm(ARM_AM::getSORegOpc(ARM_AM::rrx, 0))
2315               .add(predOps(ARMCC::AL))
2316               .add(condCodeOp());
2317       TransferImpOps(MI, MIB, MIB);
2318       MI.eraseFromParent();
2319       return true;
2320     }
2321     case ARM::tTPsoft:
2322     case ARM::TPsoft: {
2323       const bool Thumb = Opcode == ARM::tTPsoft;
2324 
2325       MachineInstrBuilder MIB;
2326       MachineFunction *MF = MBB.getParent();
2327       if (STI->genLongCalls()) {
2328         MachineConstantPool *MCP = MF->getConstantPool();
2329         unsigned PCLabelID = AFI->createPICLabelUId();
2330         MachineConstantPoolValue *CPV =
2331             ARMConstantPoolSymbol::Create(MF->getFunction().getContext(),
2332                                           "__aeabi_read_tp", PCLabelID, 0);
2333         Register Reg = MI.getOperand(0).getReg();
2334         MIB =
2335             BuildMI(MBB, MBBI, MI.getDebugLoc(),
2336                     TII->get(Thumb ? ARM::tLDRpci : ARM::LDRi12), Reg)
2337                 .addConstantPoolIndex(MCP->getConstantPoolIndex(CPV, Align(4)));
2338         if (!Thumb)
2339           MIB.addImm(0);
2340         MIB.add(predOps(ARMCC::AL));
2341 
2342         MIB =
2343             BuildMI(MBB, MBBI, MI.getDebugLoc(),
2344                     TII->get(Thumb ? gettBLXrOpcode(*MF) : getBLXOpcode(*MF)));
2345         if (Thumb)
2346           MIB.add(predOps(ARMCC::AL));
2347         MIB.addReg(Reg, RegState::Kill);
2348       } else {
2349         MIB = BuildMI(MBB, MBBI, MI.getDebugLoc(),
2350                       TII->get(Thumb ? ARM::tBL : ARM::BL));
2351         if (Thumb)
2352           MIB.add(predOps(ARMCC::AL));
2353         MIB.addExternalSymbol("__aeabi_read_tp", 0);
2354       }
2355 
2356       MIB.cloneMemRefs(MI);
2357       TransferImpOps(MI, MIB, MIB);
2358       // Update the call site info.
2359       if (MI.isCandidateForCallSiteEntry())
2360         MF->moveCallSiteInfo(&MI, &*MIB);
2361       MI.eraseFromParent();
2362       return true;
2363     }
2364     case ARM::tLDRpci_pic:
2365     case ARM::t2LDRpci_pic: {
2366       unsigned NewLdOpc = (Opcode == ARM::tLDRpci_pic)
2367         ? ARM::tLDRpci : ARM::t2LDRpci;
2368       Register DstReg = MI.getOperand(0).getReg();
2369       bool DstIsDead = MI.getOperand(0).isDead();
2370       MachineInstrBuilder MIB1 =
2371           BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(NewLdOpc), DstReg)
2372               .add(MI.getOperand(1))
2373               .add(predOps(ARMCC::AL));
2374       MIB1.cloneMemRefs(MI);
2375       MachineInstrBuilder MIB2 =
2376           BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(ARM::tPICADD))
2377               .addReg(DstReg, RegState::Define | getDeadRegState(DstIsDead))
2378               .addReg(DstReg)
2379               .add(MI.getOperand(2));
2380       TransferImpOps(MI, MIB1, MIB2);
2381       MI.eraseFromParent();
2382       return true;
2383     }
2384 
2385     case ARM::LDRLIT_ga_abs:
2386     case ARM::LDRLIT_ga_pcrel:
2387     case ARM::LDRLIT_ga_pcrel_ldr:
2388     case ARM::tLDRLIT_ga_abs:
2389     case ARM::tLDRLIT_ga_pcrel: {
2390       Register DstReg = MI.getOperand(0).getReg();
2391       bool DstIsDead = MI.getOperand(0).isDead();
2392       const MachineOperand &MO1 = MI.getOperand(1);
2393       auto Flags = MO1.getTargetFlags();
2394       const GlobalValue *GV = MO1.getGlobal();
2395       bool IsARM =
2396           Opcode != ARM::tLDRLIT_ga_pcrel && Opcode != ARM::tLDRLIT_ga_abs;
2397       bool IsPIC =
2398           Opcode != ARM::LDRLIT_ga_abs && Opcode != ARM::tLDRLIT_ga_abs;
2399       unsigned LDRLITOpc = IsARM ? ARM::LDRi12 : ARM::tLDRpci;
2400       unsigned PICAddOpc =
2401           IsARM
2402               ? (Opcode == ARM::LDRLIT_ga_pcrel_ldr ? ARM::PICLDR : ARM::PICADD)
2403               : ARM::tPICADD;
2404 
2405       // We need a new const-pool entry to load from.
2406       MachineConstantPool *MCP = MBB.getParent()->getConstantPool();
2407       unsigned ARMPCLabelIndex = 0;
2408       MachineConstantPoolValue *CPV;
2409 
2410       if (IsPIC) {
2411         unsigned PCAdj = IsARM ? 8 : 4;
2412         auto Modifier = (Flags & ARMII::MO_GOT)
2413                             ? ARMCP::GOT_PREL
2414                             : ARMCP::no_modifier;
2415         ARMPCLabelIndex = AFI->createPICLabelUId();
2416         CPV = ARMConstantPoolConstant::Create(
2417             GV, ARMPCLabelIndex, ARMCP::CPValue, PCAdj, Modifier,
2418             /*AddCurrentAddr*/ Modifier == ARMCP::GOT_PREL);
2419       } else
2420         CPV = ARMConstantPoolConstant::Create(GV, ARMCP::no_modifier);
2421 
2422       MachineInstrBuilder MIB =
2423           BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(LDRLITOpc), DstReg)
2424               .addConstantPoolIndex(MCP->getConstantPoolIndex(CPV, Align(4)));
2425       if (IsARM)
2426         MIB.addImm(0);
2427       MIB.add(predOps(ARMCC::AL));
2428 
2429       if (IsPIC) {
2430         MachineInstrBuilder MIB =
2431           BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(PICAddOpc))
2432             .addReg(DstReg, RegState::Define | getDeadRegState(DstIsDead))
2433             .addReg(DstReg)
2434             .addImm(ARMPCLabelIndex);
2435 
2436         if (IsARM)
2437           MIB.add(predOps(ARMCC::AL));
2438       }
2439 
2440       MI.eraseFromParent();
2441       return true;
2442     }
2443     case ARM::MOV_ga_pcrel:
2444     case ARM::MOV_ga_pcrel_ldr:
2445     case ARM::t2MOV_ga_pcrel: {
2446       // Expand into movw + movw. Also "add pc" / ldr [pc] in PIC mode.
2447       unsigned LabelId = AFI->createPICLabelUId();
2448       Register DstReg = MI.getOperand(0).getReg();
2449       bool DstIsDead = MI.getOperand(0).isDead();
2450       const MachineOperand &MO1 = MI.getOperand(1);
2451       const GlobalValue *GV = MO1.getGlobal();
2452       unsigned TF = MO1.getTargetFlags();
2453       bool isARM = Opcode != ARM::t2MOV_ga_pcrel;
2454       unsigned LO16Opc = isARM ? ARM::MOVi16_ga_pcrel : ARM::t2MOVi16_ga_pcrel;
2455       unsigned HI16Opc = isARM ? ARM::MOVTi16_ga_pcrel :ARM::t2MOVTi16_ga_pcrel;
2456       unsigned LO16TF = TF | ARMII::MO_LO16;
2457       unsigned HI16TF = TF | ARMII::MO_HI16;
2458       unsigned PICAddOpc = isARM
2459         ? (Opcode == ARM::MOV_ga_pcrel_ldr ? ARM::PICLDR : ARM::PICADD)
2460         : ARM::tPICADD;
2461       MachineInstrBuilder MIB1 = BuildMI(MBB, MBBI, MI.getDebugLoc(),
2462                                          TII->get(LO16Opc), DstReg)
2463         .addGlobalAddress(GV, MO1.getOffset(), TF | LO16TF)
2464         .addImm(LabelId);
2465 
2466       BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(HI16Opc), DstReg)
2467         .addReg(DstReg)
2468         .addGlobalAddress(GV, MO1.getOffset(), TF | HI16TF)
2469         .addImm(LabelId);
2470 
2471       MachineInstrBuilder MIB3 = BuildMI(MBB, MBBI, MI.getDebugLoc(),
2472                                          TII->get(PICAddOpc))
2473         .addReg(DstReg, RegState::Define | getDeadRegState(DstIsDead))
2474         .addReg(DstReg).addImm(LabelId);
2475       if (isARM) {
2476         MIB3.add(predOps(ARMCC::AL));
2477         if (Opcode == ARM::MOV_ga_pcrel_ldr)
2478           MIB3.cloneMemRefs(MI);
2479       }
2480       TransferImpOps(MI, MIB1, MIB3);
2481       MI.eraseFromParent();
2482       return true;
2483     }
2484 
2485     case ARM::MOVi32imm:
2486     case ARM::MOVCCi32imm:
2487     case ARM::t2MOVi32imm:
2488     case ARM::t2MOVCCi32imm:
2489       ExpandMOV32BitImm(MBB, MBBI);
2490       return true;
2491 
2492     case ARM::SUBS_PC_LR: {
2493       MachineInstrBuilder MIB =
2494           BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(ARM::SUBri), ARM::PC)
2495               .addReg(ARM::LR)
2496               .add(MI.getOperand(0))
2497               .add(MI.getOperand(1))
2498               .add(MI.getOperand(2))
2499               .addReg(ARM::CPSR, RegState::Undef);
2500       TransferImpOps(MI, MIB, MIB);
2501       MI.eraseFromParent();
2502       return true;
2503     }
2504     case ARM::VLDMQIA: {
2505       unsigned NewOpc = ARM::VLDMDIA;
2506       MachineInstrBuilder MIB =
2507         BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(NewOpc));
2508       unsigned OpIdx = 0;
2509 
2510       // Grab the Q register destination.
2511       bool DstIsDead = MI.getOperand(OpIdx).isDead();
2512       Register DstReg = MI.getOperand(OpIdx++).getReg();
2513 
2514       // Copy the source register.
2515       MIB.add(MI.getOperand(OpIdx++));
2516 
2517       // Copy the predicate operands.
2518       MIB.add(MI.getOperand(OpIdx++));
2519       MIB.add(MI.getOperand(OpIdx++));
2520 
2521       // Add the destination operands (D subregs).
2522       Register D0 = TRI->getSubReg(DstReg, ARM::dsub_0);
2523       Register D1 = TRI->getSubReg(DstReg, ARM::dsub_1);
2524       MIB.addReg(D0, RegState::Define | getDeadRegState(DstIsDead))
2525         .addReg(D1, RegState::Define | getDeadRegState(DstIsDead));
2526 
2527       // Add an implicit def for the super-register.
2528       MIB.addReg(DstReg, RegState::ImplicitDefine | getDeadRegState(DstIsDead));
2529       TransferImpOps(MI, MIB, MIB);
2530       MIB.cloneMemRefs(MI);
2531       MI.eraseFromParent();
2532       return true;
2533     }
2534 
2535     case ARM::VSTMQIA: {
2536       unsigned NewOpc = ARM::VSTMDIA;
2537       MachineInstrBuilder MIB =
2538         BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(NewOpc));
2539       unsigned OpIdx = 0;
2540 
2541       // Grab the Q register source.
2542       bool SrcIsKill = MI.getOperand(OpIdx).isKill();
2543       Register SrcReg = MI.getOperand(OpIdx++).getReg();
2544 
2545       // Copy the destination register.
2546       MachineOperand Dst(MI.getOperand(OpIdx++));
2547       MIB.add(Dst);
2548 
2549       // Copy the predicate operands.
2550       MIB.add(MI.getOperand(OpIdx++));
2551       MIB.add(MI.getOperand(OpIdx++));
2552 
2553       // Add the source operands (D subregs).
2554       Register D0 = TRI->getSubReg(SrcReg, ARM::dsub_0);
2555       Register D1 = TRI->getSubReg(SrcReg, ARM::dsub_1);
2556       MIB.addReg(D0, SrcIsKill ? RegState::Kill : 0)
2557          .addReg(D1, SrcIsKill ? RegState::Kill : 0);
2558 
2559       if (SrcIsKill)      // Add an implicit kill for the Q register.
2560         MIB->addRegisterKilled(SrcReg, TRI, true);
2561 
2562       TransferImpOps(MI, MIB, MIB);
2563       MIB.cloneMemRefs(MI);
2564       MI.eraseFromParent();
2565       return true;
2566     }
2567 
2568     case ARM::VLD2q8Pseudo:
2569     case ARM::VLD2q16Pseudo:
2570     case ARM::VLD2q32Pseudo:
2571     case ARM::VLD2q8PseudoWB_fixed:
2572     case ARM::VLD2q16PseudoWB_fixed:
2573     case ARM::VLD2q32PseudoWB_fixed:
2574     case ARM::VLD2q8PseudoWB_register:
2575     case ARM::VLD2q16PseudoWB_register:
2576     case ARM::VLD2q32PseudoWB_register:
2577     case ARM::VLD3d8Pseudo:
2578     case ARM::VLD3d16Pseudo:
2579     case ARM::VLD3d32Pseudo:
2580     case ARM::VLD1d8TPseudo:
2581     case ARM::VLD1d16TPseudo:
2582     case ARM::VLD1d32TPseudo:
2583     case ARM::VLD1d64TPseudo:
2584     case ARM::VLD1d64TPseudoWB_fixed:
2585     case ARM::VLD1d64TPseudoWB_register:
2586     case ARM::VLD3d8Pseudo_UPD:
2587     case ARM::VLD3d16Pseudo_UPD:
2588     case ARM::VLD3d32Pseudo_UPD:
2589     case ARM::VLD3q8Pseudo_UPD:
2590     case ARM::VLD3q16Pseudo_UPD:
2591     case ARM::VLD3q32Pseudo_UPD:
2592     case ARM::VLD3q8oddPseudo:
2593     case ARM::VLD3q16oddPseudo:
2594     case ARM::VLD3q32oddPseudo:
2595     case ARM::VLD3q8oddPseudo_UPD:
2596     case ARM::VLD3q16oddPseudo_UPD:
2597     case ARM::VLD3q32oddPseudo_UPD:
2598     case ARM::VLD4d8Pseudo:
2599     case ARM::VLD4d16Pseudo:
2600     case ARM::VLD4d32Pseudo:
2601     case ARM::VLD1d8QPseudo:
2602     case ARM::VLD1d16QPseudo:
2603     case ARM::VLD1d32QPseudo:
2604     case ARM::VLD1d64QPseudo:
2605     case ARM::VLD1d64QPseudoWB_fixed:
2606     case ARM::VLD1d64QPseudoWB_register:
2607     case ARM::VLD1q8HighQPseudo:
2608     case ARM::VLD1q8LowQPseudo_UPD:
2609     case ARM::VLD1q8HighTPseudo:
2610     case ARM::VLD1q8LowTPseudo_UPD:
2611     case ARM::VLD1q16HighQPseudo:
2612     case ARM::VLD1q16LowQPseudo_UPD:
2613     case ARM::VLD1q16HighTPseudo:
2614     case ARM::VLD1q16LowTPseudo_UPD:
2615     case ARM::VLD1q32HighQPseudo:
2616     case ARM::VLD1q32LowQPseudo_UPD:
2617     case ARM::VLD1q32HighTPseudo:
2618     case ARM::VLD1q32LowTPseudo_UPD:
2619     case ARM::VLD1q64HighQPseudo:
2620     case ARM::VLD1q64LowQPseudo_UPD:
2621     case ARM::VLD1q64HighTPseudo:
2622     case ARM::VLD1q64LowTPseudo_UPD:
2623     case ARM::VLD4d8Pseudo_UPD:
2624     case ARM::VLD4d16Pseudo_UPD:
2625     case ARM::VLD4d32Pseudo_UPD:
2626     case ARM::VLD4q8Pseudo_UPD:
2627     case ARM::VLD4q16Pseudo_UPD:
2628     case ARM::VLD4q32Pseudo_UPD:
2629     case ARM::VLD4q8oddPseudo:
2630     case ARM::VLD4q16oddPseudo:
2631     case ARM::VLD4q32oddPseudo:
2632     case ARM::VLD4q8oddPseudo_UPD:
2633     case ARM::VLD4q16oddPseudo_UPD:
2634     case ARM::VLD4q32oddPseudo_UPD:
2635     case ARM::VLD3DUPd8Pseudo:
2636     case ARM::VLD3DUPd16Pseudo:
2637     case ARM::VLD3DUPd32Pseudo:
2638     case ARM::VLD3DUPd8Pseudo_UPD:
2639     case ARM::VLD3DUPd16Pseudo_UPD:
2640     case ARM::VLD3DUPd32Pseudo_UPD:
2641     case ARM::VLD4DUPd8Pseudo:
2642     case ARM::VLD4DUPd16Pseudo:
2643     case ARM::VLD4DUPd32Pseudo:
2644     case ARM::VLD4DUPd8Pseudo_UPD:
2645     case ARM::VLD4DUPd16Pseudo_UPD:
2646     case ARM::VLD4DUPd32Pseudo_UPD:
2647     case ARM::VLD2DUPq8EvenPseudo:
2648     case ARM::VLD2DUPq8OddPseudo:
2649     case ARM::VLD2DUPq16EvenPseudo:
2650     case ARM::VLD2DUPq16OddPseudo:
2651     case ARM::VLD2DUPq32EvenPseudo:
2652     case ARM::VLD2DUPq32OddPseudo:
2653     case ARM::VLD3DUPq8EvenPseudo:
2654     case ARM::VLD3DUPq8OddPseudo:
2655     case ARM::VLD3DUPq16EvenPseudo:
2656     case ARM::VLD3DUPq16OddPseudo:
2657     case ARM::VLD3DUPq32EvenPseudo:
2658     case ARM::VLD3DUPq32OddPseudo:
2659     case ARM::VLD4DUPq8EvenPseudo:
2660     case ARM::VLD4DUPq8OddPseudo:
2661     case ARM::VLD4DUPq16EvenPseudo:
2662     case ARM::VLD4DUPq16OddPseudo:
2663     case ARM::VLD4DUPq32EvenPseudo:
2664     case ARM::VLD4DUPq32OddPseudo:
2665       ExpandVLD(MBBI);
2666       return true;
2667 
2668     case ARM::VST2q8Pseudo:
2669     case ARM::VST2q16Pseudo:
2670     case ARM::VST2q32Pseudo:
2671     case ARM::VST2q8PseudoWB_fixed:
2672     case ARM::VST2q16PseudoWB_fixed:
2673     case ARM::VST2q32PseudoWB_fixed:
2674     case ARM::VST2q8PseudoWB_register:
2675     case ARM::VST2q16PseudoWB_register:
2676     case ARM::VST2q32PseudoWB_register:
2677     case ARM::VST3d8Pseudo:
2678     case ARM::VST3d16Pseudo:
2679     case ARM::VST3d32Pseudo:
2680     case ARM::VST1d8TPseudo:
2681     case ARM::VST1d8TPseudoWB_fixed:
2682     case ARM::VST1d8TPseudoWB_register:
2683     case ARM::VST1d16TPseudo:
2684     case ARM::VST1d16TPseudoWB_fixed:
2685     case ARM::VST1d16TPseudoWB_register:
2686     case ARM::VST1d32TPseudo:
2687     case ARM::VST1d32TPseudoWB_fixed:
2688     case ARM::VST1d32TPseudoWB_register:
2689     case ARM::VST1d64TPseudo:
2690     case ARM::VST1d64TPseudoWB_fixed:
2691     case ARM::VST1d64TPseudoWB_register:
2692     case ARM::VST3d8Pseudo_UPD:
2693     case ARM::VST3d16Pseudo_UPD:
2694     case ARM::VST3d32Pseudo_UPD:
2695     case ARM::VST3q8Pseudo_UPD:
2696     case ARM::VST3q16Pseudo_UPD:
2697     case ARM::VST3q32Pseudo_UPD:
2698     case ARM::VST3q8oddPseudo:
2699     case ARM::VST3q16oddPseudo:
2700     case ARM::VST3q32oddPseudo:
2701     case ARM::VST3q8oddPseudo_UPD:
2702     case ARM::VST3q16oddPseudo_UPD:
2703     case ARM::VST3q32oddPseudo_UPD:
2704     case ARM::VST4d8Pseudo:
2705     case ARM::VST4d16Pseudo:
2706     case ARM::VST4d32Pseudo:
2707     case ARM::VST1d8QPseudo:
2708     case ARM::VST1d8QPseudoWB_fixed:
2709     case ARM::VST1d8QPseudoWB_register:
2710     case ARM::VST1d16QPseudo:
2711     case ARM::VST1d16QPseudoWB_fixed:
2712     case ARM::VST1d16QPseudoWB_register:
2713     case ARM::VST1d32QPseudo:
2714     case ARM::VST1d32QPseudoWB_fixed:
2715     case ARM::VST1d32QPseudoWB_register:
2716     case ARM::VST1d64QPseudo:
2717     case ARM::VST1d64QPseudoWB_fixed:
2718     case ARM::VST1d64QPseudoWB_register:
2719     case ARM::VST4d8Pseudo_UPD:
2720     case ARM::VST4d16Pseudo_UPD:
2721     case ARM::VST4d32Pseudo_UPD:
2722     case ARM::VST1q8HighQPseudo:
2723     case ARM::VST1q8LowQPseudo_UPD:
2724     case ARM::VST1q8HighTPseudo:
2725     case ARM::VST1q8LowTPseudo_UPD:
2726     case ARM::VST1q16HighQPseudo:
2727     case ARM::VST1q16LowQPseudo_UPD:
2728     case ARM::VST1q16HighTPseudo:
2729     case ARM::VST1q16LowTPseudo_UPD:
2730     case ARM::VST1q32HighQPseudo:
2731     case ARM::VST1q32LowQPseudo_UPD:
2732     case ARM::VST1q32HighTPseudo:
2733     case ARM::VST1q32LowTPseudo_UPD:
2734     case ARM::VST1q64HighQPseudo:
2735     case ARM::VST1q64LowQPseudo_UPD:
2736     case ARM::VST1q64HighTPseudo:
2737     case ARM::VST1q64LowTPseudo_UPD:
2738     case ARM::VST1q8HighTPseudo_UPD:
2739     case ARM::VST1q16HighTPseudo_UPD:
2740     case ARM::VST1q32HighTPseudo_UPD:
2741     case ARM::VST1q64HighTPseudo_UPD:
2742     case ARM::VST1q8HighQPseudo_UPD:
2743     case ARM::VST1q16HighQPseudo_UPD:
2744     case ARM::VST1q32HighQPseudo_UPD:
2745     case ARM::VST1q64HighQPseudo_UPD:
2746     case ARM::VST4q8Pseudo_UPD:
2747     case ARM::VST4q16Pseudo_UPD:
2748     case ARM::VST4q32Pseudo_UPD:
2749     case ARM::VST4q8oddPseudo:
2750     case ARM::VST4q16oddPseudo:
2751     case ARM::VST4q32oddPseudo:
2752     case ARM::VST4q8oddPseudo_UPD:
2753     case ARM::VST4q16oddPseudo_UPD:
2754     case ARM::VST4q32oddPseudo_UPD:
2755       ExpandVST(MBBI);
2756       return true;
2757 
2758     case ARM::VLD1LNq8Pseudo:
2759     case ARM::VLD1LNq16Pseudo:
2760     case ARM::VLD1LNq32Pseudo:
2761     case ARM::VLD1LNq8Pseudo_UPD:
2762     case ARM::VLD1LNq16Pseudo_UPD:
2763     case ARM::VLD1LNq32Pseudo_UPD:
2764     case ARM::VLD2LNd8Pseudo:
2765     case ARM::VLD2LNd16Pseudo:
2766     case ARM::VLD2LNd32Pseudo:
2767     case ARM::VLD2LNq16Pseudo:
2768     case ARM::VLD2LNq32Pseudo:
2769     case ARM::VLD2LNd8Pseudo_UPD:
2770     case ARM::VLD2LNd16Pseudo_UPD:
2771     case ARM::VLD2LNd32Pseudo_UPD:
2772     case ARM::VLD2LNq16Pseudo_UPD:
2773     case ARM::VLD2LNq32Pseudo_UPD:
2774     case ARM::VLD3LNd8Pseudo:
2775     case ARM::VLD3LNd16Pseudo:
2776     case ARM::VLD3LNd32Pseudo:
2777     case ARM::VLD3LNq16Pseudo:
2778     case ARM::VLD3LNq32Pseudo:
2779     case ARM::VLD3LNd8Pseudo_UPD:
2780     case ARM::VLD3LNd16Pseudo_UPD:
2781     case ARM::VLD3LNd32Pseudo_UPD:
2782     case ARM::VLD3LNq16Pseudo_UPD:
2783     case ARM::VLD3LNq32Pseudo_UPD:
2784     case ARM::VLD4LNd8Pseudo:
2785     case ARM::VLD4LNd16Pseudo:
2786     case ARM::VLD4LNd32Pseudo:
2787     case ARM::VLD4LNq16Pseudo:
2788     case ARM::VLD4LNq32Pseudo:
2789     case ARM::VLD4LNd8Pseudo_UPD:
2790     case ARM::VLD4LNd16Pseudo_UPD:
2791     case ARM::VLD4LNd32Pseudo_UPD:
2792     case ARM::VLD4LNq16Pseudo_UPD:
2793     case ARM::VLD4LNq32Pseudo_UPD:
2794     case ARM::VST1LNq8Pseudo:
2795     case ARM::VST1LNq16Pseudo:
2796     case ARM::VST1LNq32Pseudo:
2797     case ARM::VST1LNq8Pseudo_UPD:
2798     case ARM::VST1LNq16Pseudo_UPD:
2799     case ARM::VST1LNq32Pseudo_UPD:
2800     case ARM::VST2LNd8Pseudo:
2801     case ARM::VST2LNd16Pseudo:
2802     case ARM::VST2LNd32Pseudo:
2803     case ARM::VST2LNq16Pseudo:
2804     case ARM::VST2LNq32Pseudo:
2805     case ARM::VST2LNd8Pseudo_UPD:
2806     case ARM::VST2LNd16Pseudo_UPD:
2807     case ARM::VST2LNd32Pseudo_UPD:
2808     case ARM::VST2LNq16Pseudo_UPD:
2809     case ARM::VST2LNq32Pseudo_UPD:
2810     case ARM::VST3LNd8Pseudo:
2811     case ARM::VST3LNd16Pseudo:
2812     case ARM::VST3LNd32Pseudo:
2813     case ARM::VST3LNq16Pseudo:
2814     case ARM::VST3LNq32Pseudo:
2815     case ARM::VST3LNd8Pseudo_UPD:
2816     case ARM::VST3LNd16Pseudo_UPD:
2817     case ARM::VST3LNd32Pseudo_UPD:
2818     case ARM::VST3LNq16Pseudo_UPD:
2819     case ARM::VST3LNq32Pseudo_UPD:
2820     case ARM::VST4LNd8Pseudo:
2821     case ARM::VST4LNd16Pseudo:
2822     case ARM::VST4LNd32Pseudo:
2823     case ARM::VST4LNq16Pseudo:
2824     case ARM::VST4LNq32Pseudo:
2825     case ARM::VST4LNd8Pseudo_UPD:
2826     case ARM::VST4LNd16Pseudo_UPD:
2827     case ARM::VST4LNd32Pseudo_UPD:
2828     case ARM::VST4LNq16Pseudo_UPD:
2829     case ARM::VST4LNq32Pseudo_UPD:
2830       ExpandLaneOp(MBBI);
2831       return true;
2832 
2833     case ARM::VTBL3Pseudo: ExpandVTBL(MBBI, ARM::VTBL3, false); return true;
2834     case ARM::VTBL4Pseudo: ExpandVTBL(MBBI, ARM::VTBL4, false); return true;
2835     case ARM::VTBX3Pseudo: ExpandVTBL(MBBI, ARM::VTBX3, true); return true;
2836     case ARM::VTBX4Pseudo: ExpandVTBL(MBBI, ARM::VTBX4, true); return true;
2837 
2838     case ARM::tCMP_SWAP_8:
2839       assert(STI->isThumb());
2840       return ExpandCMP_SWAP(MBB, MBBI, ARM::t2LDREXB, ARM::t2STREXB, ARM::tUXTB,
2841                             NextMBBI);
2842     case ARM::tCMP_SWAP_16:
2843       assert(STI->isThumb());
2844       return ExpandCMP_SWAP(MBB, MBBI, ARM::t2LDREXH, ARM::t2STREXH, ARM::tUXTH,
2845                             NextMBBI);
2846 
2847     case ARM::CMP_SWAP_8:
2848       assert(!STI->isThumb());
2849       return ExpandCMP_SWAP(MBB, MBBI, ARM::LDREXB, ARM::STREXB, ARM::UXTB,
2850                             NextMBBI);
2851     case ARM::CMP_SWAP_16:
2852       assert(!STI->isThumb());
2853       return ExpandCMP_SWAP(MBB, MBBI, ARM::LDREXH, ARM::STREXH, ARM::UXTH,
2854                             NextMBBI);
2855     case ARM::CMP_SWAP_32:
2856       if (STI->isThumb())
2857         return ExpandCMP_SWAP(MBB, MBBI, ARM::t2LDREX, ARM::t2STREX, 0,
2858                               NextMBBI);
2859       else
2860         return ExpandCMP_SWAP(MBB, MBBI, ARM::LDREX, ARM::STREX, 0, NextMBBI);
2861 
2862     case ARM::CMP_SWAP_64:
2863       return ExpandCMP_SWAP_64(MBB, MBBI, NextMBBI);
2864 
2865     case ARM::tBL_PUSHLR:
2866     case ARM::BL_PUSHLR: {
2867       const bool Thumb = Opcode == ARM::tBL_PUSHLR;
2868       Register Reg = MI.getOperand(0).getReg();
2869       assert(Reg == ARM::LR && "expect LR register!");
2870       MachineInstrBuilder MIB;
2871       if (Thumb) {
2872         // push {lr}
2873         BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(ARM::tPUSH))
2874             .add(predOps(ARMCC::AL))
2875             .addReg(Reg);
2876 
2877         // bl __gnu_mcount_nc
2878         MIB = BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(ARM::tBL));
2879       } else {
2880         // stmdb   sp!, {lr}
2881         BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(ARM::STMDB_UPD))
2882             .addReg(ARM::SP, RegState::Define)
2883             .addReg(ARM::SP)
2884             .add(predOps(ARMCC::AL))
2885             .addReg(Reg);
2886 
2887         // bl __gnu_mcount_nc
2888         MIB = BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(ARM::BL));
2889       }
2890       MIB.cloneMemRefs(MI);
2891       for (unsigned i = 1; i < MI.getNumOperands(); ++i) MIB.add(MI.getOperand(i));
2892       MI.eraseFromParent();
2893       return true;
2894     }
2895     case ARM::LOADDUAL:
2896     case ARM::STOREDUAL: {
2897       Register PairReg = MI.getOperand(0).getReg();
2898 
2899       MachineInstrBuilder MIB =
2900           BuildMI(MBB, MBBI, MI.getDebugLoc(),
2901                   TII->get(Opcode == ARM::LOADDUAL ? ARM::LDRD : ARM::STRD))
2902               .addReg(TRI->getSubReg(PairReg, ARM::gsub_0),
2903                       Opcode == ARM::LOADDUAL ? RegState::Define : 0)
2904               .addReg(TRI->getSubReg(PairReg, ARM::gsub_1),
2905                       Opcode == ARM::LOADDUAL ? RegState::Define : 0);
2906       for (unsigned i = 1; i < MI.getNumOperands(); i++)
2907         MIB.add(MI.getOperand(i));
2908       MIB.add(predOps(ARMCC::AL));
2909       MIB.cloneMemRefs(MI);
2910       MI.eraseFromParent();
2911       return true;
2912     }
2913   }
2914 }
2915 
ExpandMBB(MachineBasicBlock & MBB)2916 bool ARMExpandPseudo::ExpandMBB(MachineBasicBlock &MBB) {
2917   bool Modified = false;
2918 
2919   MachineBasicBlock::iterator MBBI = MBB.begin(), E = MBB.end();
2920   while (MBBI != E) {
2921     MachineBasicBlock::iterator NMBBI = std::next(MBBI);
2922     Modified |= ExpandMI(MBB, MBBI, NMBBI);
2923     MBBI = NMBBI;
2924   }
2925 
2926   return Modified;
2927 }
2928 
runOnMachineFunction(MachineFunction & MF)2929 bool ARMExpandPseudo::runOnMachineFunction(MachineFunction &MF) {
2930   STI = &static_cast<const ARMSubtarget &>(MF.getSubtarget());
2931   TII = STI->getInstrInfo();
2932   TRI = STI->getRegisterInfo();
2933   AFI = MF.getInfo<ARMFunctionInfo>();
2934 
2935   LLVM_DEBUG(dbgs() << "********** ARM EXPAND PSEUDO INSTRUCTIONS **********\n"
2936                     << "********** Function: " << MF.getName() << '\n');
2937 
2938   bool Modified = false;
2939   for (MachineBasicBlock &MBB : MF)
2940     Modified |= ExpandMBB(MBB);
2941   if (VerifyARMPseudo)
2942     MF.verify(this, "After expanding ARM pseudo instructions.");
2943 
2944   LLVM_DEBUG(dbgs() << "***************************************************\n");
2945   return Modified;
2946 }
2947 
2948 /// createARMExpandPseudoPass - returns an instance of the pseudo instruction
2949 /// expansion pass.
createARMExpandPseudoPass()2950 FunctionPass *llvm::createARMExpandPseudoPass() {
2951   return new ARMExpandPseudo();
2952 }
2953