xref: /llvm-project/llvm/lib/Target/ARM/ARMISelDAGToDAG.cpp (revision bd261ecc5aeefd62150cb5f04e4a4f0cb7a12e1c)
1 //===-- ARMISelDAGToDAG.cpp - A dag to dag inst selector for ARM ----------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This file defines an instruction selector for the ARM target.
10 //
11 //===----------------------------------------------------------------------===//
12 
13 #include "ARM.h"
14 #include "ARMBaseInstrInfo.h"
15 #include "ARMTargetMachine.h"
16 #include "MCTargetDesc/ARMAddressingModes.h"
17 #include "Utils/ARMBaseInfo.h"
18 #include "llvm/ADT/APSInt.h"
19 #include "llvm/ADT/StringSwitch.h"
20 #include "llvm/CodeGen/MachineFrameInfo.h"
21 #include "llvm/CodeGen/MachineFunction.h"
22 #include "llvm/CodeGen/MachineInstrBuilder.h"
23 #include "llvm/CodeGen/MachineRegisterInfo.h"
24 #include "llvm/CodeGen/SelectionDAG.h"
25 #include "llvm/CodeGen/SelectionDAGISel.h"
26 #include "llvm/CodeGen/TargetLowering.h"
27 #include "llvm/IR/Constants.h"
28 #include "llvm/IR/DerivedTypes.h"
29 #include "llvm/IR/Function.h"
30 #include "llvm/IR/Intrinsics.h"
31 #include "llvm/IR/IntrinsicsARM.h"
32 #include "llvm/IR/LLVMContext.h"
33 #include "llvm/Support/CommandLine.h"
34 #include "llvm/Support/ErrorHandling.h"
35 #include "llvm/Target/TargetOptions.h"
36 #include <optional>
37 
38 using namespace llvm;
39 
40 #define DEBUG_TYPE "arm-isel"
41 #define PASS_NAME "ARM Instruction Selection"
42 
43 static cl::opt<bool>
44 DisableShifterOp("disable-shifter-op", cl::Hidden,
45   cl::desc("Disable isel of shifter-op"),
46   cl::init(false));
47 
48 //===--------------------------------------------------------------------===//
49 /// ARMDAGToDAGISel - ARM specific code to select ARM machine
50 /// instructions for SelectionDAG operations.
51 ///
52 namespace {
53 
54 class ARMDAGToDAGISel : public SelectionDAGISel {
55   /// Subtarget - Keep a pointer to the ARMSubtarget around so that we can
56   /// make the right decision when generating code for different targets.
57   const ARMSubtarget *Subtarget;
58 
59 public:
60   ARMDAGToDAGISel() = delete;
61 
62   explicit ARMDAGToDAGISel(ARMBaseTargetMachine &tm, CodeGenOptLevel OptLevel)
63       : SelectionDAGISel(tm, OptLevel) {}
64 
65   bool runOnMachineFunction(MachineFunction &MF) override {
66     // Reset the subtarget each time through.
67     Subtarget = &MF.getSubtarget<ARMSubtarget>();
68     SelectionDAGISel::runOnMachineFunction(MF);
69     return true;
70   }
71 
72   void PreprocessISelDAG() override;
73 
74   /// getI32Imm - Return a target constant of type i32 with the specified
75   /// value.
76   inline SDValue getI32Imm(unsigned Imm, const SDLoc &dl) {
77     return CurDAG->getTargetConstant(Imm, dl, MVT::i32);
78   }
79 
80   void Select(SDNode *N) override;
81 
82   /// Return true as some complex patterns, like those that call
83   /// canExtractShiftFromMul can modify the DAG inplace.
84   bool ComplexPatternFuncMutatesDAG() const override { return true; }
85 
86   bool hasNoVMLxHazardUse(SDNode *N) const;
87   bool isShifterOpProfitable(const SDValue &Shift,
88                              ARM_AM::ShiftOpc ShOpcVal, unsigned ShAmt);
89   bool SelectRegShifterOperand(SDValue N, SDValue &A,
90                                SDValue &B, SDValue &C,
91                                bool CheckProfitability = true);
92   bool SelectImmShifterOperand(SDValue N, SDValue &A,
93                                SDValue &B, bool CheckProfitability = true);
94   bool SelectShiftRegShifterOperand(SDValue N, SDValue &A, SDValue &B,
95                                     SDValue &C) {
96     // Don't apply the profitability check
97     return SelectRegShifterOperand(N, A, B, C, false);
98   }
99   bool SelectShiftImmShifterOperand(SDValue N, SDValue &A, SDValue &B) {
100     // Don't apply the profitability check
101     return SelectImmShifterOperand(N, A, B, false);
102   }
103   bool SelectShiftImmShifterOperandOneUse(SDValue N, SDValue &A, SDValue &B) {
104     if (!N.hasOneUse())
105       return false;
106     return SelectImmShifterOperand(N, A, B, false);
107   }
108 
109   bool SelectAddLikeOr(SDNode *Parent, SDValue N, SDValue &Out);
110 
111   bool SelectAddrModeImm12(SDValue N, SDValue &Base, SDValue &OffImm);
112   bool SelectLdStSOReg(SDValue N, SDValue &Base, SDValue &Offset, SDValue &Opc);
113 
114   bool SelectAddrMode2OffsetReg(SDNode *Op, SDValue N,
115                              SDValue &Offset, SDValue &Opc);
116   bool SelectAddrMode2OffsetImm(SDNode *Op, SDValue N,
117                              SDValue &Offset, SDValue &Opc);
118   bool SelectAddrMode2OffsetImmPre(SDNode *Op, SDValue N,
119                              SDValue &Offset, SDValue &Opc);
120   bool SelectAddrOffsetNone(SDValue N, SDValue &Base);
121   bool SelectAddrMode3(SDValue N, SDValue &Base,
122                        SDValue &Offset, SDValue &Opc);
123   bool SelectAddrMode3Offset(SDNode *Op, SDValue N,
124                              SDValue &Offset, SDValue &Opc);
125   bool IsAddressingMode5(SDValue N, SDValue &Base, SDValue &Offset, bool FP16);
126   bool SelectAddrMode5(SDValue N, SDValue &Base, SDValue &Offset);
127   bool SelectAddrMode5FP16(SDValue N, SDValue &Base, SDValue &Offset);
128   bool SelectAddrMode6(SDNode *Parent, SDValue N, SDValue &Addr,SDValue &Align);
129   bool SelectAddrMode6Offset(SDNode *Op, SDValue N, SDValue &Offset);
130 
131   bool SelectAddrModePC(SDValue N, SDValue &Offset, SDValue &Label);
132 
133   // Thumb Addressing Modes:
134   bool SelectThumbAddrModeRR(SDValue N, SDValue &Base, SDValue &Offset);
135   bool SelectThumbAddrModeRRSext(SDValue N, SDValue &Base, SDValue &Offset);
136   bool SelectThumbAddrModeImm5S(SDValue N, unsigned Scale, SDValue &Base,
137                                 SDValue &OffImm);
138   bool SelectThumbAddrModeImm5S1(SDValue N, SDValue &Base,
139                                  SDValue &OffImm);
140   bool SelectThumbAddrModeImm5S2(SDValue N, SDValue &Base,
141                                  SDValue &OffImm);
142   bool SelectThumbAddrModeImm5S4(SDValue N, SDValue &Base,
143                                  SDValue &OffImm);
144   bool SelectThumbAddrModeSP(SDValue N, SDValue &Base, SDValue &OffImm);
145   template <unsigned Shift>
146   bool SelectTAddrModeImm7(SDValue N, SDValue &Base, SDValue &OffImm);
147 
148   // Thumb 2 Addressing Modes:
149   bool SelectT2AddrModeImm12(SDValue N, SDValue &Base, SDValue &OffImm);
150   template <unsigned Shift>
151   bool SelectT2AddrModeImm8(SDValue N, SDValue &Base, SDValue &OffImm);
152   bool SelectT2AddrModeImm8(SDValue N, SDValue &Base,
153                             SDValue &OffImm);
154   bool SelectT2AddrModeImm8Offset(SDNode *Op, SDValue N,
155                                  SDValue &OffImm);
156   template <unsigned Shift>
157   bool SelectT2AddrModeImm7Offset(SDNode *Op, SDValue N, SDValue &OffImm);
158   bool SelectT2AddrModeImm7Offset(SDNode *Op, SDValue N, SDValue &OffImm,
159                                   unsigned Shift);
160   template <unsigned Shift>
161   bool SelectT2AddrModeImm7(SDValue N, SDValue &Base, SDValue &OffImm);
162   bool SelectT2AddrModeSoReg(SDValue N, SDValue &Base,
163                              SDValue &OffReg, SDValue &ShImm);
164   bool SelectT2AddrModeExclusive(SDValue N, SDValue &Base, SDValue &OffImm);
165 
166   template<int Min, int Max>
167   bool SelectImmediateInRange(SDValue N, SDValue &OffImm);
168 
169   inline bool is_so_imm(unsigned Imm) const {
170     return ARM_AM::getSOImmVal(Imm) != -1;
171   }
172 
173   inline bool is_so_imm_not(unsigned Imm) const {
174     return ARM_AM::getSOImmVal(~Imm) != -1;
175   }
176 
177   inline bool is_t2_so_imm(unsigned Imm) const {
178     return ARM_AM::getT2SOImmVal(Imm) != -1;
179   }
180 
181   inline bool is_t2_so_imm_not(unsigned Imm) const {
182     return ARM_AM::getT2SOImmVal(~Imm) != -1;
183   }
184 
185   // Include the pieces autogenerated from the target description.
186 #include "ARMGenDAGISel.inc"
187 
188 private:
189   void transferMemOperands(SDNode *Src, SDNode *Dst);
190 
191   /// Indexed (pre/post inc/dec) load matching code for ARM.
192   bool tryARMIndexedLoad(SDNode *N);
193   bool tryT1IndexedLoad(SDNode *N);
194   bool tryT2IndexedLoad(SDNode *N);
195   bool tryMVEIndexedLoad(SDNode *N);
196   bool tryFMULFixed(SDNode *N, SDLoc dl);
197   bool tryFP_TO_INT(SDNode *N, SDLoc dl);
198   bool transformFixedFloatingPointConversion(SDNode *N, SDNode *FMul,
199                                              bool IsUnsigned,
200                                              bool FixedToFloat);
201 
202   /// SelectVLD - Select NEON load intrinsics.  NumVecs should be
203   /// 1, 2, 3 or 4.  The opcode arrays specify the instructions used for
204   /// loads of D registers and even subregs and odd subregs of Q registers.
205   /// For NumVecs <= 2, QOpcodes1 is not used.
206   void SelectVLD(SDNode *N, bool isUpdating, unsigned NumVecs,
207                  const uint16_t *DOpcodes, const uint16_t *QOpcodes0,
208                  const uint16_t *QOpcodes1);
209 
210   /// SelectVST - Select NEON store intrinsics.  NumVecs should
211   /// be 1, 2, 3 or 4.  The opcode arrays specify the instructions used for
212   /// stores of D registers and even subregs and odd subregs of Q registers.
213   /// For NumVecs <= 2, QOpcodes1 is not used.
214   void SelectVST(SDNode *N, bool isUpdating, unsigned NumVecs,
215                  const uint16_t *DOpcodes, const uint16_t *QOpcodes0,
216                  const uint16_t *QOpcodes1);
217 
218   /// SelectVLDSTLane - Select NEON load/store lane intrinsics.  NumVecs should
219   /// be 2, 3 or 4.  The opcode arrays specify the instructions used for
220   /// load/store of D registers and Q registers.
221   void SelectVLDSTLane(SDNode *N, bool IsLoad, bool isUpdating,
222                        unsigned NumVecs, const uint16_t *DOpcodes,
223                        const uint16_t *QOpcodes);
224 
225   /// Helper functions for setting up clusters of MVE predication operands.
226   template <typename SDValueVector>
227   void AddMVEPredicateToOps(SDValueVector &Ops, SDLoc Loc,
228                             SDValue PredicateMask);
229   template <typename SDValueVector>
230   void AddMVEPredicateToOps(SDValueVector &Ops, SDLoc Loc,
231                             SDValue PredicateMask, SDValue Inactive);
232 
233   template <typename SDValueVector>
234   void AddEmptyMVEPredicateToOps(SDValueVector &Ops, SDLoc Loc);
235   template <typename SDValueVector>
236   void AddEmptyMVEPredicateToOps(SDValueVector &Ops, SDLoc Loc, EVT InactiveTy);
237 
238   /// SelectMVE_WB - Select MVE writeback load/store intrinsics.
239   void SelectMVE_WB(SDNode *N, const uint16_t *Opcodes, bool Predicated);
240 
241   /// SelectMVE_LongShift - Select MVE 64-bit scalar shift intrinsics.
242   void SelectMVE_LongShift(SDNode *N, uint16_t Opcode, bool Immediate,
243                            bool HasSaturationOperand);
244 
245   /// SelectMVE_VADCSBC - Select MVE vector add/sub-with-carry intrinsics.
246   void SelectMVE_VADCSBC(SDNode *N, uint16_t OpcodeWithCarry,
247                          uint16_t OpcodeWithNoCarry, bool Add, bool Predicated);
248 
249   /// SelectMVE_VSHLC - Select MVE intrinsics for a shift that carries between
250   /// vector lanes.
251   void SelectMVE_VSHLC(SDNode *N, bool Predicated);
252 
253   /// Select long MVE vector reductions with two vector operands
254   /// Stride is the number of vector element widths the instruction can operate
255   /// on:
256   /// 2 for long non-rounding variants, vml{a,s}ldav[a][x]: [i16, i32]
257   /// 1 for long rounding variants: vrml{a,s}ldavh[a][x]: [i32]
258   /// Stride is used when addressing the OpcodesS array which contains multiple
259   /// opcodes for each element width.
260   /// TySize is the index into the list of element types listed above
261   void SelectBaseMVE_VMLLDAV(SDNode *N, bool Predicated,
262                              const uint16_t *OpcodesS, const uint16_t *OpcodesU,
263                              size_t Stride, size_t TySize);
264 
265   /// Select a 64-bit MVE vector reduction with two vector operands
266   /// arm_mve_vmlldava_[predicated]
267   void SelectMVE_VMLLDAV(SDNode *N, bool Predicated, const uint16_t *OpcodesS,
268                          const uint16_t *OpcodesU);
269   /// Select a 72-bit MVE vector rounding reduction with two vector operands
270   /// int_arm_mve_vrmlldavha[_predicated]
271   void SelectMVE_VRMLLDAVH(SDNode *N, bool Predicated, const uint16_t *OpcodesS,
272                            const uint16_t *OpcodesU);
273 
274   /// SelectMVE_VLD - Select MVE interleaving load intrinsics. NumVecs
275   /// should be 2 or 4. The opcode array specifies the instructions
276   /// used for 8, 16 and 32-bit lane sizes respectively, and each
277   /// pointer points to a set of NumVecs sub-opcodes used for the
278   /// different stages (e.g. VLD20 versus VLD21) of each load family.
279   void SelectMVE_VLD(SDNode *N, unsigned NumVecs,
280                      const uint16_t *const *Opcodes, bool HasWriteback);
281 
282   /// SelectMVE_VxDUP - Select MVE incrementing-dup instructions. Opcodes is an
283   /// array of 3 elements for the 8, 16 and 32-bit lane sizes.
284   void SelectMVE_VxDUP(SDNode *N, const uint16_t *Opcodes,
285                        bool Wrapping, bool Predicated);
286 
287   /// Select SelectCDE_CXxD - Select CDE dual-GPR instruction (one of CX1D,
288   /// CX1DA, CX2D, CX2DA, CX3, CX3DA).
289   /// \arg \c NumExtraOps number of extra operands besides the coprocossor,
290   ///                     the accumulator and the immediate operand, i.e. 0
291   ///                     for CX1*, 1 for CX2*, 2 for CX3*
292   /// \arg \c HasAccum whether the instruction has an accumulator operand
293   void SelectCDE_CXxD(SDNode *N, uint16_t Opcode, size_t NumExtraOps,
294                       bool HasAccum);
295 
296   /// SelectVLDDup - Select NEON load-duplicate intrinsics.  NumVecs
297   /// should be 1, 2, 3 or 4.  The opcode array specifies the instructions used
298   /// for loading D registers.
299   void SelectVLDDup(SDNode *N, bool IsIntrinsic, bool isUpdating,
300                     unsigned NumVecs, const uint16_t *DOpcodes,
301                     const uint16_t *QOpcodes0 = nullptr,
302                     const uint16_t *QOpcodes1 = nullptr);
303 
304   /// Try to select SBFX/UBFX instructions for ARM.
305   bool tryV6T2BitfieldExtractOp(SDNode *N, bool isSigned);
306 
307   bool tryInsertVectorElt(SDNode *N);
308 
309   // Select special operations if node forms integer ABS pattern
310   bool tryABSOp(SDNode *N);
311 
312   bool tryReadRegister(SDNode *N);
313   bool tryWriteRegister(SDNode *N);
314 
315   bool tryInlineAsm(SDNode *N);
316 
317   void SelectCMPZ(SDNode *N, bool &SwitchEQNEToPLMI);
318 
319   void SelectCMP_SWAP(SDNode *N);
320 
321   /// SelectInlineAsmMemoryOperand - Implement addressing mode selection for
322   /// inline asm expressions.
323   bool SelectInlineAsmMemoryOperand(const SDValue &Op,
324                                     InlineAsm::ConstraintCode ConstraintID,
325                                     std::vector<SDValue> &OutOps) override;
326 
327   // Form pairs of consecutive R, S, D, or Q registers.
328   SDNode *createGPRPairNode(EVT VT, SDValue V0, SDValue V1);
329   SDNode *createSRegPairNode(EVT VT, SDValue V0, SDValue V1);
330   SDNode *createDRegPairNode(EVT VT, SDValue V0, SDValue V1);
331   SDNode *createQRegPairNode(EVT VT, SDValue V0, SDValue V1);
332 
333   // Form sequences of 4 consecutive S, D, or Q registers.
334   SDNode *createQuadSRegsNode(EVT VT, SDValue V0, SDValue V1, SDValue V2, SDValue V3);
335   SDNode *createQuadDRegsNode(EVT VT, SDValue V0, SDValue V1, SDValue V2, SDValue V3);
336   SDNode *createQuadQRegsNode(EVT VT, SDValue V0, SDValue V1, SDValue V2, SDValue V3);
337 
338   // Get the alignment operand for a NEON VLD or VST instruction.
339   SDValue GetVLDSTAlign(SDValue Align, const SDLoc &dl, unsigned NumVecs,
340                         bool is64BitVector);
341 
342   /// Checks if N is a multiplication by a constant where we can extract out a
343   /// power of two from the constant so that it can be used in a shift, but only
344   /// if it simplifies the materialization of the constant. Returns true if it
345   /// is, and assigns to PowerOfTwo the power of two that should be extracted
346   /// out and to NewMulConst the new constant to be multiplied by.
347   bool canExtractShiftFromMul(const SDValue &N, unsigned MaxShift,
348                               unsigned &PowerOfTwo, SDValue &NewMulConst) const;
349 
350   /// Replace N with M in CurDAG, in a way that also ensures that M gets
351   /// selected when N would have been selected.
352   void replaceDAGValue(const SDValue &N, SDValue M);
353 };
354 
355 class ARMDAGToDAGISelLegacy : public SelectionDAGISelLegacy {
356 public:
357   static char ID;
358   ARMDAGToDAGISelLegacy(ARMBaseTargetMachine &tm, CodeGenOptLevel OptLevel)
359       : SelectionDAGISelLegacy(
360             ID, std::make_unique<ARMDAGToDAGISel>(tm, OptLevel)) {}
361 };
362 }
363 
364 char ARMDAGToDAGISelLegacy::ID = 0;
365 
366 INITIALIZE_PASS(ARMDAGToDAGISelLegacy, DEBUG_TYPE, PASS_NAME, false, false)
367 
368 /// isInt32Immediate - This method tests to see if the node is a 32-bit constant
369 /// operand. If so Imm will receive the 32-bit value.
370 static bool isInt32Immediate(SDNode *N, unsigned &Imm) {
371   if (N->getOpcode() == ISD::Constant && N->getValueType(0) == MVT::i32) {
372     Imm = N->getAsZExtVal();
373     return true;
374   }
375   return false;
376 }
377 
378 // isInt32Immediate - This method tests to see if a constant operand.
379 // If so Imm will receive the 32 bit value.
380 static bool isInt32Immediate(SDValue N, unsigned &Imm) {
381   return isInt32Immediate(N.getNode(), Imm);
382 }
383 
384 // isOpcWithIntImmediate - This method tests to see if the node is a specific
385 // opcode and that it has a immediate integer right operand.
386 // If so Imm will receive the 32 bit value.
387 static bool isOpcWithIntImmediate(SDNode *N, unsigned Opc, unsigned& Imm) {
388   return N->getOpcode() == Opc &&
389          isInt32Immediate(N->getOperand(1).getNode(), Imm);
390 }
391 
392 /// Check whether a particular node is a constant value representable as
393 /// (N * Scale) where (N in [\p RangeMin, \p RangeMax).
394 ///
395 /// \param ScaledConstant [out] - On success, the pre-scaled constant value.
396 static bool isScaledConstantInRange(SDValue Node, int Scale,
397                                     int RangeMin, int RangeMax,
398                                     int &ScaledConstant) {
399   assert(Scale > 0 && "Invalid scale!");
400 
401   // Check that this is a constant.
402   const ConstantSDNode *C = dyn_cast<ConstantSDNode>(Node);
403   if (!C)
404     return false;
405 
406   ScaledConstant = (int) C->getZExtValue();
407   if ((ScaledConstant % Scale) != 0)
408     return false;
409 
410   ScaledConstant /= Scale;
411   return ScaledConstant >= RangeMin && ScaledConstant < RangeMax;
412 }
413 
414 void ARMDAGToDAGISel::PreprocessISelDAG() {
415   if (!Subtarget->hasV6T2Ops())
416     return;
417 
418   bool isThumb2 = Subtarget->isThumb();
419   // We use make_early_inc_range to avoid invalidation issues.
420   for (SDNode &N : llvm::make_early_inc_range(CurDAG->allnodes())) {
421     if (N.getOpcode() != ISD::ADD)
422       continue;
423 
424     // Look for (add X1, (and (srl X2, c1), c2)) where c2 is constant with
425     // leading zeros, followed by consecutive set bits, followed by 1 or 2
426     // trailing zeros, e.g. 1020.
427     // Transform the expression to
428     // (add X1, (shl (and (srl X2, c1), (c2>>tz)), tz)) where tz is the number
429     // of trailing zeros of c2. The left shift would be folded as an shifter
430     // operand of 'add' and the 'and' and 'srl' would become a bits extraction
431     // node (UBFX).
432 
433     SDValue N0 = N.getOperand(0);
434     SDValue N1 = N.getOperand(1);
435     unsigned And_imm = 0;
436     if (!isOpcWithIntImmediate(N1.getNode(), ISD::AND, And_imm)) {
437       if (isOpcWithIntImmediate(N0.getNode(), ISD::AND, And_imm))
438         std::swap(N0, N1);
439     }
440     if (!And_imm)
441       continue;
442 
443     // Check if the AND mask is an immediate of the form: 000.....1111111100
444     unsigned TZ = llvm::countr_zero(And_imm);
445     if (TZ != 1 && TZ != 2)
446       // Be conservative here. Shifter operands aren't always free. e.g. On
447       // Swift, left shifter operand of 1 / 2 for free but others are not.
448       // e.g.
449       //  ubfx   r3, r1, #16, #8
450       //  ldr.w  r3, [r0, r3, lsl #2]
451       // vs.
452       //  mov.w  r9, #1020
453       //  and.w  r2, r9, r1, lsr #14
454       //  ldr    r2, [r0, r2]
455       continue;
456     And_imm >>= TZ;
457     if (And_imm & (And_imm + 1))
458       continue;
459 
460     // Look for (and (srl X, c1), c2).
461     SDValue Srl = N1.getOperand(0);
462     unsigned Srl_imm = 0;
463     if (!isOpcWithIntImmediate(Srl.getNode(), ISD::SRL, Srl_imm) ||
464         (Srl_imm <= 2))
465       continue;
466 
467     // Make sure first operand is not a shifter operand which would prevent
468     // folding of the left shift.
469     SDValue CPTmp0;
470     SDValue CPTmp1;
471     SDValue CPTmp2;
472     if (isThumb2) {
473       if (SelectImmShifterOperand(N0, CPTmp0, CPTmp1))
474         continue;
475     } else {
476       if (SelectImmShifterOperand(N0, CPTmp0, CPTmp1) ||
477           SelectRegShifterOperand(N0, CPTmp0, CPTmp1, CPTmp2))
478         continue;
479     }
480 
481     // Now make the transformation.
482     Srl = CurDAG->getNode(ISD::SRL, SDLoc(Srl), MVT::i32,
483                           Srl.getOperand(0),
484                           CurDAG->getConstant(Srl_imm + TZ, SDLoc(Srl),
485                                               MVT::i32));
486     N1 = CurDAG->getNode(ISD::AND, SDLoc(N1), MVT::i32,
487                          Srl,
488                          CurDAG->getConstant(And_imm, SDLoc(Srl), MVT::i32));
489     N1 = CurDAG->getNode(ISD::SHL, SDLoc(N1), MVT::i32,
490                          N1, CurDAG->getConstant(TZ, SDLoc(Srl), MVT::i32));
491     CurDAG->UpdateNodeOperands(&N, N0, N1);
492   }
493 }
494 
495 /// hasNoVMLxHazardUse - Return true if it's desirable to select a FP MLA / MLS
496 /// node. VFP / NEON fp VMLA / VMLS instructions have special RAW hazards (at
497 /// least on current ARM implementations) which should be avoidded.
498 bool ARMDAGToDAGISel::hasNoVMLxHazardUse(SDNode *N) const {
499   if (OptLevel == CodeGenOptLevel::None)
500     return true;
501 
502   if (!Subtarget->hasVMLxHazards())
503     return true;
504 
505   if (!N->hasOneUse())
506     return false;
507 
508   SDNode *User = *N->user_begin();
509   if (User->getOpcode() == ISD::CopyToReg)
510     return true;
511   if (User->isMachineOpcode()) {
512     const ARMBaseInstrInfo *TII = static_cast<const ARMBaseInstrInfo *>(
513         CurDAG->getSubtarget().getInstrInfo());
514 
515     const MCInstrDesc &MCID = TII->get(User->getMachineOpcode());
516     if (MCID.mayStore())
517       return true;
518     unsigned Opcode = MCID.getOpcode();
519     if (Opcode == ARM::VMOVRS || Opcode == ARM::VMOVRRD)
520       return true;
521     // vmlx feeding into another vmlx. We actually want to unfold
522     // the use later in the MLxExpansion pass. e.g.
523     // vmla
524     // vmla (stall 8 cycles)
525     //
526     // vmul (5 cycles)
527     // vadd (5 cycles)
528     // vmla
529     // This adds up to about 18 - 19 cycles.
530     //
531     // vmla
532     // vmul (stall 4 cycles)
533     // vadd adds up to about 14 cycles.
534     return TII->isFpMLxInstruction(Opcode);
535   }
536 
537   return false;
538 }
539 
540 bool ARMDAGToDAGISel::isShifterOpProfitable(const SDValue &Shift,
541                                             ARM_AM::ShiftOpc ShOpcVal,
542                                             unsigned ShAmt) {
543   if (!Subtarget->isLikeA9() && !Subtarget->isSwift())
544     return true;
545   if (Shift.hasOneUse())
546     return true;
547   // R << 2 is free.
548   return ShOpcVal == ARM_AM::lsl &&
549          (ShAmt == 2 || (Subtarget->isSwift() && ShAmt == 1));
550 }
551 
552 bool ARMDAGToDAGISel::canExtractShiftFromMul(const SDValue &N,
553                                              unsigned MaxShift,
554                                              unsigned &PowerOfTwo,
555                                              SDValue &NewMulConst) const {
556   assert(N.getOpcode() == ISD::MUL);
557   assert(MaxShift > 0);
558 
559   // If the multiply is used in more than one place then changing the constant
560   // will make other uses incorrect, so don't.
561   if (!N.hasOneUse()) return false;
562   // Check if the multiply is by a constant
563   ConstantSDNode *MulConst = dyn_cast<ConstantSDNode>(N.getOperand(1));
564   if (!MulConst) return false;
565   // If the constant is used in more than one place then modifying it will mean
566   // we need to materialize two constants instead of one, which is a bad idea.
567   if (!MulConst->hasOneUse()) return false;
568   unsigned MulConstVal = MulConst->getZExtValue();
569   if (MulConstVal == 0) return false;
570 
571   // Find the largest power of 2 that MulConstVal is a multiple of
572   PowerOfTwo = MaxShift;
573   while ((MulConstVal % (1 << PowerOfTwo)) != 0) {
574     --PowerOfTwo;
575     if (PowerOfTwo == 0) return false;
576   }
577 
578   // Only optimise if the new cost is better
579   unsigned NewMulConstVal = MulConstVal / (1 << PowerOfTwo);
580   NewMulConst = CurDAG->getConstant(NewMulConstVal, SDLoc(N), MVT::i32);
581   unsigned OldCost = ConstantMaterializationCost(MulConstVal, Subtarget);
582   unsigned NewCost = ConstantMaterializationCost(NewMulConstVal, Subtarget);
583   return NewCost < OldCost;
584 }
585 
586 void ARMDAGToDAGISel::replaceDAGValue(const SDValue &N, SDValue M) {
587   CurDAG->RepositionNode(N.getNode()->getIterator(), M.getNode());
588   ReplaceUses(N, M);
589 }
590 
591 bool ARMDAGToDAGISel::SelectImmShifterOperand(SDValue N,
592                                               SDValue &BaseReg,
593                                               SDValue &Opc,
594                                               bool CheckProfitability) {
595   if (DisableShifterOp)
596     return false;
597 
598   // If N is a multiply-by-constant and it's profitable to extract a shift and
599   // use it in a shifted operand do so.
600   if (N.getOpcode() == ISD::MUL) {
601     unsigned PowerOfTwo = 0;
602     SDValue NewMulConst;
603     if (canExtractShiftFromMul(N, 31, PowerOfTwo, NewMulConst)) {
604       HandleSDNode Handle(N);
605       SDLoc Loc(N);
606       replaceDAGValue(N.getOperand(1), NewMulConst);
607       BaseReg = Handle.getValue();
608       Opc = CurDAG->getTargetConstant(
609           ARM_AM::getSORegOpc(ARM_AM::lsl, PowerOfTwo), Loc, MVT::i32);
610       return true;
611     }
612   }
613 
614   ARM_AM::ShiftOpc ShOpcVal = ARM_AM::getShiftOpcForNode(N.getOpcode());
615 
616   // Don't match base register only case. That is matched to a separate
617   // lower complexity pattern with explicit register operand.
618   if (ShOpcVal == ARM_AM::no_shift) return false;
619 
620   BaseReg = N.getOperand(0);
621   unsigned ShImmVal = 0;
622   ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(N.getOperand(1));
623   if (!RHS) return false;
624   ShImmVal = RHS->getZExtValue() & 31;
625   Opc = CurDAG->getTargetConstant(ARM_AM::getSORegOpc(ShOpcVal, ShImmVal),
626                                   SDLoc(N), MVT::i32);
627   return true;
628 }
629 
630 bool ARMDAGToDAGISel::SelectRegShifterOperand(SDValue N,
631                                               SDValue &BaseReg,
632                                               SDValue &ShReg,
633                                               SDValue &Opc,
634                                               bool CheckProfitability) {
635   if (DisableShifterOp)
636     return false;
637 
638   ARM_AM::ShiftOpc ShOpcVal = ARM_AM::getShiftOpcForNode(N.getOpcode());
639 
640   // Don't match base register only case. That is matched to a separate
641   // lower complexity pattern with explicit register operand.
642   if (ShOpcVal == ARM_AM::no_shift) return false;
643 
644   BaseReg = N.getOperand(0);
645   unsigned ShImmVal = 0;
646   ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(N.getOperand(1));
647   if (RHS) return false;
648 
649   ShReg = N.getOperand(1);
650   if (CheckProfitability && !isShifterOpProfitable(N, ShOpcVal, ShImmVal))
651     return false;
652   Opc = CurDAG->getTargetConstant(ARM_AM::getSORegOpc(ShOpcVal, ShImmVal),
653                                   SDLoc(N), MVT::i32);
654   return true;
655 }
656 
657 // Determine whether an ISD::OR's operands are suitable to turn the operation
658 // into an addition, which often has more compact encodings.
659 bool ARMDAGToDAGISel::SelectAddLikeOr(SDNode *Parent, SDValue N, SDValue &Out) {
660   assert(Parent->getOpcode() == ISD::OR && "unexpected parent");
661   Out = N;
662   return CurDAG->haveNoCommonBitsSet(N, Parent->getOperand(1));
663 }
664 
665 
666 bool ARMDAGToDAGISel::SelectAddrModeImm12(SDValue N,
667                                           SDValue &Base,
668                                           SDValue &OffImm) {
669   // Match simple R + imm12 operands.
670 
671   // Base only.
672   if (N.getOpcode() != ISD::ADD && N.getOpcode() != ISD::SUB &&
673       !CurDAG->isBaseWithConstantOffset(N)) {
674     if (N.getOpcode() == ISD::FrameIndex) {
675       // Match frame index.
676       int FI = cast<FrameIndexSDNode>(N)->getIndex();
677       Base = CurDAG->getTargetFrameIndex(
678           FI, TLI->getPointerTy(CurDAG->getDataLayout()));
679       OffImm  = CurDAG->getTargetConstant(0, SDLoc(N), MVT::i32);
680       return true;
681     }
682 
683     if (N.getOpcode() == ARMISD::Wrapper &&
684         N.getOperand(0).getOpcode() != ISD::TargetGlobalAddress &&
685         N.getOperand(0).getOpcode() != ISD::TargetExternalSymbol &&
686         N.getOperand(0).getOpcode() != ISD::TargetGlobalTLSAddress) {
687       Base = N.getOperand(0);
688     } else
689       Base = N;
690     OffImm  = CurDAG->getTargetConstant(0, SDLoc(N), MVT::i32);
691     return true;
692   }
693 
694   if (ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(N.getOperand(1))) {
695     int RHSC = (int)RHS->getSExtValue();
696     if (N.getOpcode() == ISD::SUB)
697       RHSC = -RHSC;
698 
699     if (RHSC > -0x1000 && RHSC < 0x1000) { // 12 bits
700       Base   = N.getOperand(0);
701       if (Base.getOpcode() == ISD::FrameIndex) {
702         int FI = cast<FrameIndexSDNode>(Base)->getIndex();
703         Base = CurDAG->getTargetFrameIndex(
704             FI, TLI->getPointerTy(CurDAG->getDataLayout()));
705       }
706       OffImm = CurDAG->getSignedTargetConstant(RHSC, SDLoc(N), MVT::i32);
707       return true;
708     }
709   }
710 
711   // Base only.
712   Base = N;
713   OffImm  = CurDAG->getTargetConstant(0, SDLoc(N), MVT::i32);
714   return true;
715 }
716 
717 
718 
719 bool ARMDAGToDAGISel::SelectLdStSOReg(SDValue N, SDValue &Base, SDValue &Offset,
720                                       SDValue &Opc) {
721   if (N.getOpcode() == ISD::MUL &&
722       ((!Subtarget->isLikeA9() && !Subtarget->isSwift()) || N.hasOneUse())) {
723     if (ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(N.getOperand(1))) {
724       // X * [3,5,9] -> X + X * [2,4,8] etc.
725       int RHSC = (int)RHS->getZExtValue();
726       if (RHSC & 1) {
727         RHSC = RHSC & ~1;
728         ARM_AM::AddrOpc AddSub = ARM_AM::add;
729         if (RHSC < 0) {
730           AddSub = ARM_AM::sub;
731           RHSC = - RHSC;
732         }
733         if (isPowerOf2_32(RHSC)) {
734           unsigned ShAmt = Log2_32(RHSC);
735           Base = Offset = N.getOperand(0);
736           Opc = CurDAG->getTargetConstant(ARM_AM::getAM2Opc(AddSub, ShAmt,
737                                                             ARM_AM::lsl),
738                                           SDLoc(N), MVT::i32);
739           return true;
740         }
741       }
742     }
743   }
744 
745   if (N.getOpcode() != ISD::ADD && N.getOpcode() != ISD::SUB &&
746       // ISD::OR that is equivalent to an ISD::ADD.
747       !CurDAG->isBaseWithConstantOffset(N))
748     return false;
749 
750   // Leave simple R +/- imm12 operands for LDRi12
751   if (N.getOpcode() == ISD::ADD || N.getOpcode() == ISD::OR) {
752     int RHSC;
753     if (isScaledConstantInRange(N.getOperand(1), /*Scale=*/1,
754                                 -0x1000+1, 0x1000, RHSC)) // 12 bits.
755       return false;
756   }
757 
758   // Otherwise this is R +/- [possibly shifted] R.
759   ARM_AM::AddrOpc AddSub = N.getOpcode() == ISD::SUB ? ARM_AM::sub:ARM_AM::add;
760   ARM_AM::ShiftOpc ShOpcVal =
761     ARM_AM::getShiftOpcForNode(N.getOperand(1).getOpcode());
762   unsigned ShAmt = 0;
763 
764   Base   = N.getOperand(0);
765   Offset = N.getOperand(1);
766 
767   if (ShOpcVal != ARM_AM::no_shift) {
768     // Check to see if the RHS of the shift is a constant, if not, we can't fold
769     // it.
770     if (ConstantSDNode *Sh =
771            dyn_cast<ConstantSDNode>(N.getOperand(1).getOperand(1))) {
772       ShAmt = Sh->getZExtValue();
773       if (isShifterOpProfitable(Offset, ShOpcVal, ShAmt))
774         Offset = N.getOperand(1).getOperand(0);
775       else {
776         ShAmt = 0;
777         ShOpcVal = ARM_AM::no_shift;
778       }
779     } else {
780       ShOpcVal = ARM_AM::no_shift;
781     }
782   }
783 
784   // Try matching (R shl C) + (R).
785   if (N.getOpcode() != ISD::SUB && ShOpcVal == ARM_AM::no_shift &&
786       !(Subtarget->isLikeA9() || Subtarget->isSwift() ||
787         N.getOperand(0).hasOneUse())) {
788     ShOpcVal = ARM_AM::getShiftOpcForNode(N.getOperand(0).getOpcode());
789     if (ShOpcVal != ARM_AM::no_shift) {
790       // Check to see if the RHS of the shift is a constant, if not, we can't
791       // fold it.
792       if (ConstantSDNode *Sh =
793           dyn_cast<ConstantSDNode>(N.getOperand(0).getOperand(1))) {
794         ShAmt = Sh->getZExtValue();
795         if (isShifterOpProfitable(N.getOperand(0), ShOpcVal, ShAmt)) {
796           Offset = N.getOperand(0).getOperand(0);
797           Base = N.getOperand(1);
798         } else {
799           ShAmt = 0;
800           ShOpcVal = ARM_AM::no_shift;
801         }
802       } else {
803         ShOpcVal = ARM_AM::no_shift;
804       }
805     }
806   }
807 
808   // If Offset is a multiply-by-constant and it's profitable to extract a shift
809   // and use it in a shifted operand do so.
810   if (Offset.getOpcode() == ISD::MUL && N.hasOneUse()) {
811     unsigned PowerOfTwo = 0;
812     SDValue NewMulConst;
813     if (canExtractShiftFromMul(Offset, 31, PowerOfTwo, NewMulConst)) {
814       HandleSDNode Handle(Offset);
815       replaceDAGValue(Offset.getOperand(1), NewMulConst);
816       Offset = Handle.getValue();
817       ShAmt = PowerOfTwo;
818       ShOpcVal = ARM_AM::lsl;
819     }
820   }
821 
822   Opc = CurDAG->getTargetConstant(ARM_AM::getAM2Opc(AddSub, ShAmt, ShOpcVal),
823                                   SDLoc(N), MVT::i32);
824   return true;
825 }
826 
827 bool ARMDAGToDAGISel::SelectAddrMode2OffsetReg(SDNode *Op, SDValue N,
828                                             SDValue &Offset, SDValue &Opc) {
829   unsigned Opcode = Op->getOpcode();
830   ISD::MemIndexedMode AM = (Opcode == ISD::LOAD)
831     ? cast<LoadSDNode>(Op)->getAddressingMode()
832     : cast<StoreSDNode>(Op)->getAddressingMode();
833   ARM_AM::AddrOpc AddSub = (AM == ISD::PRE_INC || AM == ISD::POST_INC)
834     ? ARM_AM::add : ARM_AM::sub;
835   int Val;
836   if (isScaledConstantInRange(N, /*Scale=*/1, 0, 0x1000, Val))
837     return false;
838 
839   Offset = N;
840   ARM_AM::ShiftOpc ShOpcVal = ARM_AM::getShiftOpcForNode(N.getOpcode());
841   unsigned ShAmt = 0;
842   if (ShOpcVal != ARM_AM::no_shift) {
843     // Check to see if the RHS of the shift is a constant, if not, we can't fold
844     // it.
845     if (ConstantSDNode *Sh = dyn_cast<ConstantSDNode>(N.getOperand(1))) {
846       ShAmt = Sh->getZExtValue();
847       if (isShifterOpProfitable(N, ShOpcVal, ShAmt))
848         Offset = N.getOperand(0);
849       else {
850         ShAmt = 0;
851         ShOpcVal = ARM_AM::no_shift;
852       }
853     } else {
854       ShOpcVal = ARM_AM::no_shift;
855     }
856   }
857 
858   Opc = CurDAG->getTargetConstant(ARM_AM::getAM2Opc(AddSub, ShAmt, ShOpcVal),
859                                   SDLoc(N), MVT::i32);
860   return true;
861 }
862 
863 bool ARMDAGToDAGISel::SelectAddrMode2OffsetImmPre(SDNode *Op, SDValue N,
864                                             SDValue &Offset, SDValue &Opc) {
865   unsigned Opcode = Op->getOpcode();
866   ISD::MemIndexedMode AM = (Opcode == ISD::LOAD)
867     ? cast<LoadSDNode>(Op)->getAddressingMode()
868     : cast<StoreSDNode>(Op)->getAddressingMode();
869   ARM_AM::AddrOpc AddSub = (AM == ISD::PRE_INC || AM == ISD::POST_INC)
870     ? ARM_AM::add : ARM_AM::sub;
871   int Val;
872   if (isScaledConstantInRange(N, /*Scale=*/1, 0, 0x1000, Val)) { // 12 bits.
873     if (AddSub == ARM_AM::sub) Val *= -1;
874     Offset = CurDAG->getRegister(0, MVT::i32);
875     Opc = CurDAG->getSignedTargetConstant(Val, SDLoc(Op), MVT::i32);
876     return true;
877   }
878 
879   return false;
880 }
881 
882 
883 bool ARMDAGToDAGISel::SelectAddrMode2OffsetImm(SDNode *Op, SDValue N,
884                                             SDValue &Offset, SDValue &Opc) {
885   unsigned Opcode = Op->getOpcode();
886   ISD::MemIndexedMode AM = (Opcode == ISD::LOAD)
887     ? cast<LoadSDNode>(Op)->getAddressingMode()
888     : cast<StoreSDNode>(Op)->getAddressingMode();
889   ARM_AM::AddrOpc AddSub = (AM == ISD::PRE_INC || AM == ISD::POST_INC)
890     ? ARM_AM::add : ARM_AM::sub;
891   int Val;
892   if (isScaledConstantInRange(N, /*Scale=*/1, 0, 0x1000, Val)) { // 12 bits.
893     Offset = CurDAG->getRegister(0, MVT::i32);
894     Opc = CurDAG->getTargetConstant(ARM_AM::getAM2Opc(AddSub, Val,
895                                                       ARM_AM::no_shift),
896                                     SDLoc(Op), MVT::i32);
897     return true;
898   }
899 
900   return false;
901 }
902 
903 bool ARMDAGToDAGISel::SelectAddrOffsetNone(SDValue N, SDValue &Base) {
904   Base = N;
905   return true;
906 }
907 
908 bool ARMDAGToDAGISel::SelectAddrMode3(SDValue N,
909                                       SDValue &Base, SDValue &Offset,
910                                       SDValue &Opc) {
911   if (N.getOpcode() == ISD::SUB) {
912     // X - C  is canonicalize to X + -C, no need to handle it here.
913     Base = N.getOperand(0);
914     Offset = N.getOperand(1);
915     Opc = CurDAG->getTargetConstant(ARM_AM::getAM3Opc(ARM_AM::sub, 0), SDLoc(N),
916                                     MVT::i32);
917     return true;
918   }
919 
920   if (!CurDAG->isBaseWithConstantOffset(N)) {
921     Base = N;
922     if (N.getOpcode() == ISD::FrameIndex) {
923       int FI = cast<FrameIndexSDNode>(N)->getIndex();
924       Base = CurDAG->getTargetFrameIndex(
925           FI, TLI->getPointerTy(CurDAG->getDataLayout()));
926     }
927     Offset = CurDAG->getRegister(0, MVT::i32);
928     Opc = CurDAG->getTargetConstant(ARM_AM::getAM3Opc(ARM_AM::add, 0), SDLoc(N),
929                                     MVT::i32);
930     return true;
931   }
932 
933   // If the RHS is +/- imm8, fold into addr mode.
934   int RHSC;
935   if (isScaledConstantInRange(N.getOperand(1), /*Scale=*/1,
936                               -256 + 1, 256, RHSC)) { // 8 bits.
937     Base = N.getOperand(0);
938     if (Base.getOpcode() == ISD::FrameIndex) {
939       int FI = cast<FrameIndexSDNode>(Base)->getIndex();
940       Base = CurDAG->getTargetFrameIndex(
941           FI, TLI->getPointerTy(CurDAG->getDataLayout()));
942     }
943     Offset = CurDAG->getRegister(0, MVT::i32);
944 
945     ARM_AM::AddrOpc AddSub = ARM_AM::add;
946     if (RHSC < 0) {
947       AddSub = ARM_AM::sub;
948       RHSC = -RHSC;
949     }
950     Opc = CurDAG->getTargetConstant(ARM_AM::getAM3Opc(AddSub, RHSC), SDLoc(N),
951                                     MVT::i32);
952     return true;
953   }
954 
955   Base = N.getOperand(0);
956   Offset = N.getOperand(1);
957   Opc = CurDAG->getTargetConstant(ARM_AM::getAM3Opc(ARM_AM::add, 0), SDLoc(N),
958                                   MVT::i32);
959   return true;
960 }
961 
962 bool ARMDAGToDAGISel::SelectAddrMode3Offset(SDNode *Op, SDValue N,
963                                             SDValue &Offset, SDValue &Opc) {
964   unsigned Opcode = Op->getOpcode();
965   ISD::MemIndexedMode AM = (Opcode == ISD::LOAD)
966     ? cast<LoadSDNode>(Op)->getAddressingMode()
967     : cast<StoreSDNode>(Op)->getAddressingMode();
968   ARM_AM::AddrOpc AddSub = (AM == ISD::PRE_INC || AM == ISD::POST_INC)
969     ? ARM_AM::add : ARM_AM::sub;
970   int Val;
971   if (isScaledConstantInRange(N, /*Scale=*/1, 0, 256, Val)) { // 12 bits.
972     Offset = CurDAG->getRegister(0, MVT::i32);
973     Opc = CurDAG->getTargetConstant(ARM_AM::getAM3Opc(AddSub, Val), SDLoc(Op),
974                                     MVT::i32);
975     return true;
976   }
977 
978   Offset = N;
979   Opc = CurDAG->getTargetConstant(ARM_AM::getAM3Opc(AddSub, 0), SDLoc(Op),
980                                   MVT::i32);
981   return true;
982 }
983 
984 bool ARMDAGToDAGISel::IsAddressingMode5(SDValue N, SDValue &Base, SDValue &Offset,
985                                         bool FP16) {
986   if (!CurDAG->isBaseWithConstantOffset(N)) {
987     Base = N;
988     if (N.getOpcode() == ISD::FrameIndex) {
989       int FI = cast<FrameIndexSDNode>(N)->getIndex();
990       Base = CurDAG->getTargetFrameIndex(
991           FI, TLI->getPointerTy(CurDAG->getDataLayout()));
992     } else if (N.getOpcode() == ARMISD::Wrapper &&
993                N.getOperand(0).getOpcode() != ISD::TargetGlobalAddress &&
994                N.getOperand(0).getOpcode() != ISD::TargetExternalSymbol &&
995                N.getOperand(0).getOpcode() != ISD::TargetGlobalTLSAddress) {
996       Base = N.getOperand(0);
997     }
998     Offset = CurDAG->getTargetConstant(ARM_AM::getAM5Opc(ARM_AM::add, 0),
999                                        SDLoc(N), MVT::i32);
1000     return true;
1001   }
1002 
1003   // If the RHS is +/- imm8, fold into addr mode.
1004   int RHSC;
1005   const int Scale = FP16 ? 2 : 4;
1006 
1007   if (isScaledConstantInRange(N.getOperand(1), Scale, -255, 256, RHSC)) {
1008     Base = N.getOperand(0);
1009     if (Base.getOpcode() == ISD::FrameIndex) {
1010       int FI = cast<FrameIndexSDNode>(Base)->getIndex();
1011       Base = CurDAG->getTargetFrameIndex(
1012           FI, TLI->getPointerTy(CurDAG->getDataLayout()));
1013     }
1014 
1015     ARM_AM::AddrOpc AddSub = ARM_AM::add;
1016     if (RHSC < 0) {
1017       AddSub = ARM_AM::sub;
1018       RHSC = -RHSC;
1019     }
1020 
1021     if (FP16)
1022       Offset = CurDAG->getTargetConstant(ARM_AM::getAM5FP16Opc(AddSub, RHSC),
1023                                          SDLoc(N), MVT::i32);
1024     else
1025       Offset = CurDAG->getTargetConstant(ARM_AM::getAM5Opc(AddSub, RHSC),
1026                                          SDLoc(N), MVT::i32);
1027 
1028     return true;
1029   }
1030 
1031   Base = N;
1032 
1033   if (FP16)
1034     Offset = CurDAG->getTargetConstant(ARM_AM::getAM5FP16Opc(ARM_AM::add, 0),
1035                                        SDLoc(N), MVT::i32);
1036   else
1037     Offset = CurDAG->getTargetConstant(ARM_AM::getAM5Opc(ARM_AM::add, 0),
1038                                        SDLoc(N), MVT::i32);
1039 
1040   return true;
1041 }
1042 
1043 bool ARMDAGToDAGISel::SelectAddrMode5(SDValue N,
1044                                       SDValue &Base, SDValue &Offset) {
1045   return IsAddressingMode5(N, Base, Offset, /*FP16=*/ false);
1046 }
1047 
1048 bool ARMDAGToDAGISel::SelectAddrMode5FP16(SDValue N,
1049                                           SDValue &Base, SDValue &Offset) {
1050   return IsAddressingMode5(N, Base, Offset, /*FP16=*/ true);
1051 }
1052 
1053 bool ARMDAGToDAGISel::SelectAddrMode6(SDNode *Parent, SDValue N, SDValue &Addr,
1054                                       SDValue &Align) {
1055   Addr = N;
1056 
1057   unsigned Alignment = 0;
1058 
1059   MemSDNode *MemN = cast<MemSDNode>(Parent);
1060 
1061   if (isa<LSBaseSDNode>(MemN) ||
1062       ((MemN->getOpcode() == ARMISD::VST1_UPD ||
1063         MemN->getOpcode() == ARMISD::VLD1_UPD) &&
1064        MemN->getConstantOperandVal(MemN->getNumOperands() - 1) == 1)) {
1065     // This case occurs only for VLD1-lane/dup and VST1-lane instructions.
1066     // The maximum alignment is equal to the memory size being referenced.
1067     llvm::Align MMOAlign = MemN->getAlign();
1068     unsigned MemSize = MemN->getMemoryVT().getSizeInBits() / 8;
1069     if (MMOAlign.value() >= MemSize && MemSize > 1)
1070       Alignment = MemSize;
1071   } else {
1072     // All other uses of addrmode6 are for intrinsics.  For now just record
1073     // the raw alignment value; it will be refined later based on the legal
1074     // alignment operands for the intrinsic.
1075     Alignment = MemN->getAlign().value();
1076   }
1077 
1078   Align = CurDAG->getTargetConstant(Alignment, SDLoc(N), MVT::i32);
1079   return true;
1080 }
1081 
1082 bool ARMDAGToDAGISel::SelectAddrMode6Offset(SDNode *Op, SDValue N,
1083                                             SDValue &Offset) {
1084   LSBaseSDNode *LdSt = cast<LSBaseSDNode>(Op);
1085   ISD::MemIndexedMode AM = LdSt->getAddressingMode();
1086   if (AM != ISD::POST_INC)
1087     return false;
1088   Offset = N;
1089   if (ConstantSDNode *NC = dyn_cast<ConstantSDNode>(N)) {
1090     if (NC->getZExtValue() * 8 == LdSt->getMemoryVT().getSizeInBits())
1091       Offset = CurDAG->getRegister(0, MVT::i32);
1092   }
1093   return true;
1094 }
1095 
1096 bool ARMDAGToDAGISel::SelectAddrModePC(SDValue N,
1097                                        SDValue &Offset, SDValue &Label) {
1098   if (N.getOpcode() == ARMISD::PIC_ADD && N.hasOneUse()) {
1099     Offset = N.getOperand(0);
1100     SDValue N1 = N.getOperand(1);
1101     Label = CurDAG->getTargetConstant(N1->getAsZExtVal(), SDLoc(N), MVT::i32);
1102     return true;
1103   }
1104 
1105   return false;
1106 }
1107 
1108 
1109 //===----------------------------------------------------------------------===//
1110 //                         Thumb Addressing Modes
1111 //===----------------------------------------------------------------------===//
1112 
1113 static bool shouldUseZeroOffsetLdSt(SDValue N) {
1114   // Negative numbers are difficult to materialise in thumb1. If we are
1115   // selecting the add of a negative, instead try to select ri with a zero
1116   // offset, so create the add node directly which will become a sub.
1117   if (N.getOpcode() != ISD::ADD)
1118     return false;
1119 
1120   // Look for an imm which is not legal for ld/st, but is legal for sub.
1121   if (auto C = dyn_cast<ConstantSDNode>(N.getOperand(1)))
1122     return C->getSExtValue() < 0 && C->getSExtValue() >= -255;
1123 
1124   return false;
1125 }
1126 
1127 bool ARMDAGToDAGISel::SelectThumbAddrModeRRSext(SDValue N, SDValue &Base,
1128                                                 SDValue &Offset) {
1129   if (N.getOpcode() != ISD::ADD && !CurDAG->isBaseWithConstantOffset(N)) {
1130     if (!isNullConstant(N))
1131       return false;
1132 
1133     Base = Offset = N;
1134     return true;
1135   }
1136 
1137   Base = N.getOperand(0);
1138   Offset = N.getOperand(1);
1139   return true;
1140 }
1141 
1142 bool ARMDAGToDAGISel::SelectThumbAddrModeRR(SDValue N, SDValue &Base,
1143                                             SDValue &Offset) {
1144   if (shouldUseZeroOffsetLdSt(N))
1145     return false; // Select ri instead
1146   return SelectThumbAddrModeRRSext(N, Base, Offset);
1147 }
1148 
1149 bool
1150 ARMDAGToDAGISel::SelectThumbAddrModeImm5S(SDValue N, unsigned Scale,
1151                                           SDValue &Base, SDValue &OffImm) {
1152   if (shouldUseZeroOffsetLdSt(N)) {
1153     Base = N;
1154     OffImm = CurDAG->getTargetConstant(0, SDLoc(N), MVT::i32);
1155     return true;
1156   }
1157 
1158   if (!CurDAG->isBaseWithConstantOffset(N)) {
1159     if (N.getOpcode() == ISD::ADD) {
1160       return false; // We want to select register offset instead
1161     } else if (N.getOpcode() == ARMISD::Wrapper &&
1162         N.getOperand(0).getOpcode() != ISD::TargetGlobalAddress &&
1163         N.getOperand(0).getOpcode() != ISD::TargetExternalSymbol &&
1164         N.getOperand(0).getOpcode() != ISD::TargetConstantPool &&
1165         N.getOperand(0).getOpcode() != ISD::TargetGlobalTLSAddress) {
1166       Base = N.getOperand(0);
1167     } else {
1168       Base = N;
1169     }
1170 
1171     OffImm = CurDAG->getTargetConstant(0, SDLoc(N), MVT::i32);
1172     return true;
1173   }
1174 
1175   // If the RHS is + imm5 * scale, fold into addr mode.
1176   int RHSC;
1177   if (isScaledConstantInRange(N.getOperand(1), Scale, 0, 32, RHSC)) {
1178     Base = N.getOperand(0);
1179     OffImm = CurDAG->getSignedTargetConstant(RHSC, SDLoc(N), MVT::i32);
1180     return true;
1181   }
1182 
1183   // Offset is too large, so use register offset instead.
1184   return false;
1185 }
1186 
1187 bool
1188 ARMDAGToDAGISel::SelectThumbAddrModeImm5S4(SDValue N, SDValue &Base,
1189                                            SDValue &OffImm) {
1190   return SelectThumbAddrModeImm5S(N, 4, Base, OffImm);
1191 }
1192 
1193 bool
1194 ARMDAGToDAGISel::SelectThumbAddrModeImm5S2(SDValue N, SDValue &Base,
1195                                            SDValue &OffImm) {
1196   return SelectThumbAddrModeImm5S(N, 2, Base, OffImm);
1197 }
1198 
1199 bool
1200 ARMDAGToDAGISel::SelectThumbAddrModeImm5S1(SDValue N, SDValue &Base,
1201                                            SDValue &OffImm) {
1202   return SelectThumbAddrModeImm5S(N, 1, Base, OffImm);
1203 }
1204 
1205 bool ARMDAGToDAGISel::SelectThumbAddrModeSP(SDValue N,
1206                                             SDValue &Base, SDValue &OffImm) {
1207   if (N.getOpcode() == ISD::FrameIndex) {
1208     int FI = cast<FrameIndexSDNode>(N)->getIndex();
1209     // Only multiples of 4 are allowed for the offset, so the frame object
1210     // alignment must be at least 4.
1211     MachineFrameInfo &MFI = MF->getFrameInfo();
1212     if (MFI.getObjectAlign(FI) < Align(4))
1213       MFI.setObjectAlignment(FI, Align(4));
1214     Base = CurDAG->getTargetFrameIndex(
1215         FI, TLI->getPointerTy(CurDAG->getDataLayout()));
1216     OffImm = CurDAG->getTargetConstant(0, SDLoc(N), MVT::i32);
1217     return true;
1218   }
1219 
1220   if (!CurDAG->isBaseWithConstantOffset(N))
1221     return false;
1222 
1223   if (N.getOperand(0).getOpcode() == ISD::FrameIndex) {
1224     // If the RHS is + imm8 * scale, fold into addr mode.
1225     int RHSC;
1226     if (isScaledConstantInRange(N.getOperand(1), /*Scale=*/4, 0, 256, RHSC)) {
1227       Base = N.getOperand(0);
1228       int FI = cast<FrameIndexSDNode>(Base)->getIndex();
1229       // Make sure the offset is inside the object, or we might fail to
1230       // allocate an emergency spill slot. (An out-of-range access is UB, but
1231       // it could show up anyway.)
1232       MachineFrameInfo &MFI = MF->getFrameInfo();
1233       if (RHSC * 4 < MFI.getObjectSize(FI)) {
1234         // For LHS+RHS to result in an offset that's a multiple of 4 the object
1235         // indexed by the LHS must be 4-byte aligned.
1236         if (!MFI.isFixedObjectIndex(FI) && MFI.getObjectAlign(FI) < Align(4))
1237           MFI.setObjectAlignment(FI, Align(4));
1238         if (MFI.getObjectAlign(FI) >= Align(4)) {
1239           Base = CurDAG->getTargetFrameIndex(
1240               FI, TLI->getPointerTy(CurDAG->getDataLayout()));
1241           OffImm = CurDAG->getSignedTargetConstant(RHSC, SDLoc(N), MVT::i32);
1242           return true;
1243         }
1244       }
1245     }
1246   }
1247 
1248   return false;
1249 }
1250 
1251 template <unsigned Shift>
1252 bool ARMDAGToDAGISel::SelectTAddrModeImm7(SDValue N, SDValue &Base,
1253                                           SDValue &OffImm) {
1254   if (N.getOpcode() == ISD::SUB || CurDAG->isBaseWithConstantOffset(N)) {
1255     int RHSC;
1256     if (isScaledConstantInRange(N.getOperand(1), 1 << Shift, -0x7f, 0x80,
1257                                 RHSC)) {
1258       Base = N.getOperand(0);
1259       if (N.getOpcode() == ISD::SUB)
1260         RHSC = -RHSC;
1261       OffImm = CurDAG->getSignedTargetConstant(RHSC * (1 << Shift), SDLoc(N),
1262                                                MVT::i32);
1263       return true;
1264     }
1265   }
1266 
1267   // Base only.
1268   Base = N;
1269   OffImm = CurDAG->getTargetConstant(0, SDLoc(N), MVT::i32);
1270   return true;
1271 }
1272 
1273 
1274 //===----------------------------------------------------------------------===//
1275 //                        Thumb 2 Addressing Modes
1276 //===----------------------------------------------------------------------===//
1277 
1278 
1279 bool ARMDAGToDAGISel::SelectT2AddrModeImm12(SDValue N,
1280                                             SDValue &Base, SDValue &OffImm) {
1281   // Match simple R + imm12 operands.
1282 
1283   // Base only.
1284   if (N.getOpcode() != ISD::ADD && N.getOpcode() != ISD::SUB &&
1285       !CurDAG->isBaseWithConstantOffset(N)) {
1286     if (N.getOpcode() == ISD::FrameIndex) {
1287       // Match frame index.
1288       int FI = cast<FrameIndexSDNode>(N)->getIndex();
1289       Base = CurDAG->getTargetFrameIndex(
1290           FI, TLI->getPointerTy(CurDAG->getDataLayout()));
1291       OffImm  = CurDAG->getTargetConstant(0, SDLoc(N), MVT::i32);
1292       return true;
1293     }
1294 
1295     if (N.getOpcode() == ARMISD::Wrapper &&
1296         N.getOperand(0).getOpcode() != ISD::TargetGlobalAddress &&
1297         N.getOperand(0).getOpcode() != ISD::TargetExternalSymbol &&
1298         N.getOperand(0).getOpcode() != ISD::TargetGlobalTLSAddress) {
1299       Base = N.getOperand(0);
1300       if (Base.getOpcode() == ISD::TargetConstantPool)
1301         return false;  // We want to select t2LDRpci instead.
1302     } else
1303       Base = N;
1304     OffImm  = CurDAG->getTargetConstant(0, SDLoc(N), MVT::i32);
1305     return true;
1306   }
1307 
1308   if (ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(N.getOperand(1))) {
1309     if (SelectT2AddrModeImm8(N, Base, OffImm))
1310       // Let t2LDRi8 handle (R - imm8).
1311       return false;
1312 
1313     int RHSC = (int)RHS->getZExtValue();
1314     if (N.getOpcode() == ISD::SUB)
1315       RHSC = -RHSC;
1316 
1317     if (RHSC >= 0 && RHSC < 0x1000) { // 12 bits (unsigned)
1318       Base   = N.getOperand(0);
1319       if (Base.getOpcode() == ISD::FrameIndex) {
1320         int FI = cast<FrameIndexSDNode>(Base)->getIndex();
1321         Base = CurDAG->getTargetFrameIndex(
1322             FI, TLI->getPointerTy(CurDAG->getDataLayout()));
1323       }
1324       OffImm = CurDAG->getSignedTargetConstant(RHSC, SDLoc(N), MVT::i32);
1325       return true;
1326     }
1327   }
1328 
1329   // Base only.
1330   Base = N;
1331   OffImm  = CurDAG->getTargetConstant(0, SDLoc(N), MVT::i32);
1332   return true;
1333 }
1334 
1335 template <unsigned Shift>
1336 bool ARMDAGToDAGISel::SelectT2AddrModeImm8(SDValue N, SDValue &Base,
1337                                            SDValue &OffImm) {
1338   if (N.getOpcode() == ISD::SUB || CurDAG->isBaseWithConstantOffset(N)) {
1339     int RHSC;
1340     if (isScaledConstantInRange(N.getOperand(1), 1 << Shift, -255, 256, RHSC)) {
1341       Base = N.getOperand(0);
1342       if (Base.getOpcode() == ISD::FrameIndex) {
1343         int FI = cast<FrameIndexSDNode>(Base)->getIndex();
1344         Base = CurDAG->getTargetFrameIndex(
1345             FI, TLI->getPointerTy(CurDAG->getDataLayout()));
1346       }
1347 
1348       if (N.getOpcode() == ISD::SUB)
1349         RHSC = -RHSC;
1350       OffImm = CurDAG->getSignedTargetConstant(RHSC * (1 << Shift), SDLoc(N),
1351                                                MVT::i32);
1352       return true;
1353     }
1354   }
1355 
1356   // Base only.
1357   Base = N;
1358   OffImm = CurDAG->getTargetConstant(0, SDLoc(N), MVT::i32);
1359   return true;
1360 }
1361 
1362 bool ARMDAGToDAGISel::SelectT2AddrModeImm8(SDValue N,
1363                                            SDValue &Base, SDValue &OffImm) {
1364   // Match simple R - imm8 operands.
1365   if (N.getOpcode() != ISD::ADD && N.getOpcode() != ISD::SUB &&
1366       !CurDAG->isBaseWithConstantOffset(N))
1367     return false;
1368 
1369   if (ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(N.getOperand(1))) {
1370     int RHSC = (int)RHS->getSExtValue();
1371     if (N.getOpcode() == ISD::SUB)
1372       RHSC = -RHSC;
1373 
1374     if ((RHSC >= -255) && (RHSC < 0)) { // 8 bits (always negative)
1375       Base = N.getOperand(0);
1376       if (Base.getOpcode() == ISD::FrameIndex) {
1377         int FI = cast<FrameIndexSDNode>(Base)->getIndex();
1378         Base = CurDAG->getTargetFrameIndex(
1379             FI, TLI->getPointerTy(CurDAG->getDataLayout()));
1380       }
1381       OffImm = CurDAG->getSignedTargetConstant(RHSC, SDLoc(N), MVT::i32);
1382       return true;
1383     }
1384   }
1385 
1386   return false;
1387 }
1388 
1389 bool ARMDAGToDAGISel::SelectT2AddrModeImm8Offset(SDNode *Op, SDValue N,
1390                                                  SDValue &OffImm){
1391   unsigned Opcode = Op->getOpcode();
1392   ISD::MemIndexedMode AM = (Opcode == ISD::LOAD)
1393     ? cast<LoadSDNode>(Op)->getAddressingMode()
1394     : cast<StoreSDNode>(Op)->getAddressingMode();
1395   int RHSC;
1396   if (isScaledConstantInRange(N, /*Scale=*/1, 0, 0x100, RHSC)) { // 8 bits.
1397     OffImm = ((AM == ISD::PRE_INC) || (AM == ISD::POST_INC))
1398                  ? CurDAG->getSignedTargetConstant(RHSC, SDLoc(N), MVT::i32)
1399                  : CurDAG->getSignedTargetConstant(-RHSC, SDLoc(N), MVT::i32);
1400     return true;
1401   }
1402 
1403   return false;
1404 }
1405 
1406 template <unsigned Shift>
1407 bool ARMDAGToDAGISel::SelectT2AddrModeImm7(SDValue N, SDValue &Base,
1408                                            SDValue &OffImm) {
1409   if (N.getOpcode() == ISD::SUB || CurDAG->isBaseWithConstantOffset(N)) {
1410     int RHSC;
1411     if (isScaledConstantInRange(N.getOperand(1), 1 << Shift, -0x7f, 0x80,
1412                                 RHSC)) {
1413       Base = N.getOperand(0);
1414       if (Base.getOpcode() == ISD::FrameIndex) {
1415         int FI = cast<FrameIndexSDNode>(Base)->getIndex();
1416         Base = CurDAG->getTargetFrameIndex(
1417             FI, TLI->getPointerTy(CurDAG->getDataLayout()));
1418       }
1419 
1420       if (N.getOpcode() == ISD::SUB)
1421         RHSC = -RHSC;
1422       OffImm = CurDAG->getSignedTargetConstant(RHSC * (1 << Shift), SDLoc(N),
1423                                                MVT::i32);
1424       return true;
1425     }
1426   }
1427 
1428   // Base only.
1429   Base = N;
1430   OffImm = CurDAG->getTargetConstant(0, SDLoc(N), MVT::i32);
1431   return true;
1432 }
1433 
1434 template <unsigned Shift>
1435 bool ARMDAGToDAGISel::SelectT2AddrModeImm7Offset(SDNode *Op, SDValue N,
1436                                                  SDValue &OffImm) {
1437   return SelectT2AddrModeImm7Offset(Op, N, OffImm, Shift);
1438 }
1439 
1440 bool ARMDAGToDAGISel::SelectT2AddrModeImm7Offset(SDNode *Op, SDValue N,
1441                                                  SDValue &OffImm,
1442                                                  unsigned Shift) {
1443   unsigned Opcode = Op->getOpcode();
1444   ISD::MemIndexedMode AM;
1445   switch (Opcode) {
1446   case ISD::LOAD:
1447     AM = cast<LoadSDNode>(Op)->getAddressingMode();
1448     break;
1449   case ISD::STORE:
1450     AM = cast<StoreSDNode>(Op)->getAddressingMode();
1451     break;
1452   case ISD::MLOAD:
1453     AM = cast<MaskedLoadSDNode>(Op)->getAddressingMode();
1454     break;
1455   case ISD::MSTORE:
1456     AM = cast<MaskedStoreSDNode>(Op)->getAddressingMode();
1457     break;
1458   default:
1459     llvm_unreachable("Unexpected Opcode for Imm7Offset");
1460   }
1461 
1462   int RHSC;
1463   // 7 bit constant, shifted by Shift.
1464   if (isScaledConstantInRange(N, 1 << Shift, 0, 0x80, RHSC)) {
1465     OffImm = ((AM == ISD::PRE_INC) || (AM == ISD::POST_INC))
1466                  ? CurDAG->getSignedTargetConstant(RHSC * (1 << Shift),
1467                                                    SDLoc(N), MVT::i32)
1468                  : CurDAG->getSignedTargetConstant(-RHSC * (1 << Shift),
1469                                                    SDLoc(N), MVT::i32);
1470     return true;
1471   }
1472   return false;
1473 }
1474 
1475 template <int Min, int Max>
1476 bool ARMDAGToDAGISel::SelectImmediateInRange(SDValue N, SDValue &OffImm) {
1477   int Val;
1478   if (isScaledConstantInRange(N, 1, Min, Max, Val)) {
1479     OffImm = CurDAG->getSignedTargetConstant(Val, SDLoc(N), MVT::i32);
1480     return true;
1481   }
1482   return false;
1483 }
1484 
1485 bool ARMDAGToDAGISel::SelectT2AddrModeSoReg(SDValue N,
1486                                             SDValue &Base,
1487                                             SDValue &OffReg, SDValue &ShImm) {
1488   // (R - imm8) should be handled by t2LDRi8. The rest are handled by t2LDRi12.
1489   if (N.getOpcode() != ISD::ADD && !CurDAG->isBaseWithConstantOffset(N))
1490     return false;
1491 
1492   // Leave (R + imm12) for t2LDRi12, (R - imm8) for t2LDRi8.
1493   if (ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(N.getOperand(1))) {
1494     int RHSC = (int)RHS->getZExtValue();
1495     if (RHSC >= 0 && RHSC < 0x1000) // 12 bits (unsigned)
1496       return false;
1497     else if (RHSC < 0 && RHSC >= -255) // 8 bits
1498       return false;
1499   }
1500 
1501   // Look for (R + R) or (R + (R << [1,2,3])).
1502   unsigned ShAmt = 0;
1503   Base   = N.getOperand(0);
1504   OffReg = N.getOperand(1);
1505 
1506   // Swap if it is ((R << c) + R).
1507   ARM_AM::ShiftOpc ShOpcVal = ARM_AM::getShiftOpcForNode(OffReg.getOpcode());
1508   if (ShOpcVal != ARM_AM::lsl) {
1509     ShOpcVal = ARM_AM::getShiftOpcForNode(Base.getOpcode());
1510     if (ShOpcVal == ARM_AM::lsl)
1511       std::swap(Base, OffReg);
1512   }
1513 
1514   if (ShOpcVal == ARM_AM::lsl) {
1515     // Check to see if the RHS of the shift is a constant, if not, we can't fold
1516     // it.
1517     if (ConstantSDNode *Sh = dyn_cast<ConstantSDNode>(OffReg.getOperand(1))) {
1518       ShAmt = Sh->getZExtValue();
1519       if (ShAmt < 4 && isShifterOpProfitable(OffReg, ShOpcVal, ShAmt))
1520         OffReg = OffReg.getOperand(0);
1521       else {
1522         ShAmt = 0;
1523       }
1524     }
1525   }
1526 
1527   // If OffReg is a multiply-by-constant and it's profitable to extract a shift
1528   // and use it in a shifted operand do so.
1529   if (OffReg.getOpcode() == ISD::MUL && N.hasOneUse()) {
1530     unsigned PowerOfTwo = 0;
1531     SDValue NewMulConst;
1532     if (canExtractShiftFromMul(OffReg, 3, PowerOfTwo, NewMulConst)) {
1533       HandleSDNode Handle(OffReg);
1534       replaceDAGValue(OffReg.getOperand(1), NewMulConst);
1535       OffReg = Handle.getValue();
1536       ShAmt = PowerOfTwo;
1537     }
1538   }
1539 
1540   ShImm = CurDAG->getTargetConstant(ShAmt, SDLoc(N), MVT::i32);
1541 
1542   return true;
1543 }
1544 
1545 bool ARMDAGToDAGISel::SelectT2AddrModeExclusive(SDValue N, SDValue &Base,
1546                                                 SDValue &OffImm) {
1547   // This *must* succeed since it's used for the irreplaceable ldrex and strex
1548   // instructions.
1549   Base = N;
1550   OffImm = CurDAG->getTargetConstant(0, SDLoc(N), MVT::i32);
1551 
1552   if (N.getOpcode() != ISD::ADD || !CurDAG->isBaseWithConstantOffset(N))
1553     return true;
1554 
1555   ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(N.getOperand(1));
1556   if (!RHS)
1557     return true;
1558 
1559   uint32_t RHSC = (int)RHS->getZExtValue();
1560   if (RHSC > 1020 || RHSC % 4 != 0)
1561     return true;
1562 
1563   Base = N.getOperand(0);
1564   if (Base.getOpcode() == ISD::FrameIndex) {
1565     int FI = cast<FrameIndexSDNode>(Base)->getIndex();
1566     Base = CurDAG->getTargetFrameIndex(
1567         FI, TLI->getPointerTy(CurDAG->getDataLayout()));
1568   }
1569 
1570   OffImm = CurDAG->getTargetConstant(RHSC/4, SDLoc(N), MVT::i32);
1571   return true;
1572 }
1573 
1574 //===--------------------------------------------------------------------===//
1575 
1576 /// getAL - Returns a ARMCC::AL immediate node.
1577 static inline SDValue getAL(SelectionDAG *CurDAG, const SDLoc &dl) {
1578   return CurDAG->getTargetConstant((uint64_t)ARMCC::AL, dl, MVT::i32);
1579 }
1580 
1581 void ARMDAGToDAGISel::transferMemOperands(SDNode *N, SDNode *Result) {
1582   MachineMemOperand *MemOp = cast<MemSDNode>(N)->getMemOperand();
1583   CurDAG->setNodeMemRefs(cast<MachineSDNode>(Result), {MemOp});
1584 }
1585 
1586 bool ARMDAGToDAGISel::tryARMIndexedLoad(SDNode *N) {
1587   LoadSDNode *LD = cast<LoadSDNode>(N);
1588   ISD::MemIndexedMode AM = LD->getAddressingMode();
1589   if (AM == ISD::UNINDEXED)
1590     return false;
1591 
1592   EVT LoadedVT = LD->getMemoryVT();
1593   SDValue Offset, AMOpc;
1594   bool isPre = (AM == ISD::PRE_INC) || (AM == ISD::PRE_DEC);
1595   unsigned Opcode = 0;
1596   bool Match = false;
1597   if (LoadedVT == MVT::i32 && isPre &&
1598       SelectAddrMode2OffsetImmPre(N, LD->getOffset(), Offset, AMOpc)) {
1599     Opcode = ARM::LDR_PRE_IMM;
1600     Match = true;
1601   } else if (LoadedVT == MVT::i32 && !isPre &&
1602       SelectAddrMode2OffsetImm(N, LD->getOffset(), Offset, AMOpc)) {
1603     Opcode = ARM::LDR_POST_IMM;
1604     Match = true;
1605   } else if (LoadedVT == MVT::i32 &&
1606       SelectAddrMode2OffsetReg(N, LD->getOffset(), Offset, AMOpc)) {
1607     Opcode = isPre ? ARM::LDR_PRE_REG : ARM::LDR_POST_REG;
1608     Match = true;
1609 
1610   } else if (LoadedVT == MVT::i16 &&
1611              SelectAddrMode3Offset(N, LD->getOffset(), Offset, AMOpc)) {
1612     Match = true;
1613     Opcode = (LD->getExtensionType() == ISD::SEXTLOAD)
1614       ? (isPre ? ARM::LDRSH_PRE : ARM::LDRSH_POST)
1615       : (isPre ? ARM::LDRH_PRE : ARM::LDRH_POST);
1616   } else if (LoadedVT == MVT::i8 || LoadedVT == MVT::i1) {
1617     if (LD->getExtensionType() == ISD::SEXTLOAD) {
1618       if (SelectAddrMode3Offset(N, LD->getOffset(), Offset, AMOpc)) {
1619         Match = true;
1620         Opcode = isPre ? ARM::LDRSB_PRE : ARM::LDRSB_POST;
1621       }
1622     } else {
1623       if (isPre &&
1624           SelectAddrMode2OffsetImmPre(N, LD->getOffset(), Offset, AMOpc)) {
1625         Match = true;
1626         Opcode = ARM::LDRB_PRE_IMM;
1627       } else if (!isPre &&
1628                   SelectAddrMode2OffsetImm(N, LD->getOffset(), Offset, AMOpc)) {
1629         Match = true;
1630         Opcode = ARM::LDRB_POST_IMM;
1631       } else if (SelectAddrMode2OffsetReg(N, LD->getOffset(), Offset, AMOpc)) {
1632         Match = true;
1633         Opcode = isPre ? ARM::LDRB_PRE_REG : ARM::LDRB_POST_REG;
1634       }
1635     }
1636   }
1637 
1638   if (Match) {
1639     if (Opcode == ARM::LDR_PRE_IMM || Opcode == ARM::LDRB_PRE_IMM) {
1640       SDValue Chain = LD->getChain();
1641       SDValue Base = LD->getBasePtr();
1642       SDValue Ops[]= { Base, AMOpc, getAL(CurDAG, SDLoc(N)),
1643                        CurDAG->getRegister(0, MVT::i32), Chain };
1644       SDNode *New = CurDAG->getMachineNode(Opcode, SDLoc(N), MVT::i32, MVT::i32,
1645                                            MVT::Other, Ops);
1646       transferMemOperands(N, New);
1647       ReplaceNode(N, New);
1648       return true;
1649     } else {
1650       SDValue Chain = LD->getChain();
1651       SDValue Base = LD->getBasePtr();
1652       SDValue Ops[]= { Base, Offset, AMOpc, getAL(CurDAG, SDLoc(N)),
1653                        CurDAG->getRegister(0, MVT::i32), Chain };
1654       SDNode *New = CurDAG->getMachineNode(Opcode, SDLoc(N), MVT::i32, MVT::i32,
1655                                            MVT::Other, Ops);
1656       transferMemOperands(N, New);
1657       ReplaceNode(N, New);
1658       return true;
1659     }
1660   }
1661 
1662   return false;
1663 }
1664 
1665 bool ARMDAGToDAGISel::tryT1IndexedLoad(SDNode *N) {
1666   LoadSDNode *LD = cast<LoadSDNode>(N);
1667   EVT LoadedVT = LD->getMemoryVT();
1668   ISD::MemIndexedMode AM = LD->getAddressingMode();
1669   if (AM != ISD::POST_INC || LD->getExtensionType() != ISD::NON_EXTLOAD ||
1670       LoadedVT.getSimpleVT().SimpleTy != MVT::i32)
1671     return false;
1672 
1673   auto *COffs = dyn_cast<ConstantSDNode>(LD->getOffset());
1674   if (!COffs || COffs->getZExtValue() != 4)
1675     return false;
1676 
1677   // A T1 post-indexed load is just a single register LDM: LDM r0!, {r1}.
1678   // The encoding of LDM is not how the rest of ISel expects a post-inc load to
1679   // look however, so we use a pseudo here and switch it for a tLDMIA_UPD after
1680   // ISel.
1681   SDValue Chain = LD->getChain();
1682   SDValue Base = LD->getBasePtr();
1683   SDValue Ops[]= { Base, getAL(CurDAG, SDLoc(N)),
1684                    CurDAG->getRegister(0, MVT::i32), Chain };
1685   SDNode *New = CurDAG->getMachineNode(ARM::tLDR_postidx, SDLoc(N), MVT::i32,
1686                                        MVT::i32, MVT::Other, Ops);
1687   transferMemOperands(N, New);
1688   ReplaceNode(N, New);
1689   return true;
1690 }
1691 
1692 bool ARMDAGToDAGISel::tryT2IndexedLoad(SDNode *N) {
1693   LoadSDNode *LD = cast<LoadSDNode>(N);
1694   ISD::MemIndexedMode AM = LD->getAddressingMode();
1695   if (AM == ISD::UNINDEXED)
1696     return false;
1697 
1698   EVT LoadedVT = LD->getMemoryVT();
1699   bool isSExtLd = LD->getExtensionType() == ISD::SEXTLOAD;
1700   SDValue Offset;
1701   bool isPre = (AM == ISD::PRE_INC) || (AM == ISD::PRE_DEC);
1702   unsigned Opcode = 0;
1703   bool Match = false;
1704   if (SelectT2AddrModeImm8Offset(N, LD->getOffset(), Offset)) {
1705     switch (LoadedVT.getSimpleVT().SimpleTy) {
1706     case MVT::i32:
1707       Opcode = isPre ? ARM::t2LDR_PRE : ARM::t2LDR_POST;
1708       break;
1709     case MVT::i16:
1710       if (isSExtLd)
1711         Opcode = isPre ? ARM::t2LDRSH_PRE : ARM::t2LDRSH_POST;
1712       else
1713         Opcode = isPre ? ARM::t2LDRH_PRE : ARM::t2LDRH_POST;
1714       break;
1715     case MVT::i8:
1716     case MVT::i1:
1717       if (isSExtLd)
1718         Opcode = isPre ? ARM::t2LDRSB_PRE : ARM::t2LDRSB_POST;
1719       else
1720         Opcode = isPre ? ARM::t2LDRB_PRE : ARM::t2LDRB_POST;
1721       break;
1722     default:
1723       return false;
1724     }
1725     Match = true;
1726   }
1727 
1728   if (Match) {
1729     SDValue Chain = LD->getChain();
1730     SDValue Base = LD->getBasePtr();
1731     SDValue Ops[]= { Base, Offset, getAL(CurDAG, SDLoc(N)),
1732                      CurDAG->getRegister(0, MVT::i32), Chain };
1733     SDNode *New = CurDAG->getMachineNode(Opcode, SDLoc(N), MVT::i32, MVT::i32,
1734                                          MVT::Other, Ops);
1735     transferMemOperands(N, New);
1736     ReplaceNode(N, New);
1737     return true;
1738   }
1739 
1740   return false;
1741 }
1742 
1743 bool ARMDAGToDAGISel::tryMVEIndexedLoad(SDNode *N) {
1744   EVT LoadedVT;
1745   unsigned Opcode = 0;
1746   bool isSExtLd, isPre;
1747   Align Alignment;
1748   ARMVCC::VPTCodes Pred;
1749   SDValue PredReg;
1750   SDValue Chain, Base, Offset;
1751 
1752   if (LoadSDNode *LD = dyn_cast<LoadSDNode>(N)) {
1753     ISD::MemIndexedMode AM = LD->getAddressingMode();
1754     if (AM == ISD::UNINDEXED)
1755       return false;
1756     LoadedVT = LD->getMemoryVT();
1757     if (!LoadedVT.isVector())
1758       return false;
1759 
1760     Chain = LD->getChain();
1761     Base = LD->getBasePtr();
1762     Offset = LD->getOffset();
1763     Alignment = LD->getAlign();
1764     isSExtLd = LD->getExtensionType() == ISD::SEXTLOAD;
1765     isPre = (AM == ISD::PRE_INC) || (AM == ISD::PRE_DEC);
1766     Pred = ARMVCC::None;
1767     PredReg = CurDAG->getRegister(0, MVT::i32);
1768   } else if (MaskedLoadSDNode *LD = dyn_cast<MaskedLoadSDNode>(N)) {
1769     ISD::MemIndexedMode AM = LD->getAddressingMode();
1770     if (AM == ISD::UNINDEXED)
1771       return false;
1772     LoadedVT = LD->getMemoryVT();
1773     if (!LoadedVT.isVector())
1774       return false;
1775 
1776     Chain = LD->getChain();
1777     Base = LD->getBasePtr();
1778     Offset = LD->getOffset();
1779     Alignment = LD->getAlign();
1780     isSExtLd = LD->getExtensionType() == ISD::SEXTLOAD;
1781     isPre = (AM == ISD::PRE_INC) || (AM == ISD::PRE_DEC);
1782     Pred = ARMVCC::Then;
1783     PredReg = LD->getMask();
1784   } else
1785     llvm_unreachable("Expected a Load or a Masked Load!");
1786 
1787   // We allow LE non-masked loads to change the type (for example use a vldrb.8
1788   // as opposed to a vldrw.32). This can allow extra addressing modes or
1789   // alignments for what is otherwise an equivalent instruction.
1790   bool CanChangeType = Subtarget->isLittle() && !isa<MaskedLoadSDNode>(N);
1791 
1792   SDValue NewOffset;
1793   if (Alignment >= Align(2) && LoadedVT == MVT::v4i16 &&
1794       SelectT2AddrModeImm7Offset(N, Offset, NewOffset, 1)) {
1795     if (isSExtLd)
1796       Opcode = isPre ? ARM::MVE_VLDRHS32_pre : ARM::MVE_VLDRHS32_post;
1797     else
1798       Opcode = isPre ? ARM::MVE_VLDRHU32_pre : ARM::MVE_VLDRHU32_post;
1799   } else if (LoadedVT == MVT::v8i8 &&
1800              SelectT2AddrModeImm7Offset(N, Offset, NewOffset, 0)) {
1801     if (isSExtLd)
1802       Opcode = isPre ? ARM::MVE_VLDRBS16_pre : ARM::MVE_VLDRBS16_post;
1803     else
1804       Opcode = isPre ? ARM::MVE_VLDRBU16_pre : ARM::MVE_VLDRBU16_post;
1805   } else if (LoadedVT == MVT::v4i8 &&
1806              SelectT2AddrModeImm7Offset(N, Offset, NewOffset, 0)) {
1807     if (isSExtLd)
1808       Opcode = isPre ? ARM::MVE_VLDRBS32_pre : ARM::MVE_VLDRBS32_post;
1809     else
1810       Opcode = isPre ? ARM::MVE_VLDRBU32_pre : ARM::MVE_VLDRBU32_post;
1811   } else if (Alignment >= Align(4) &&
1812              (CanChangeType || LoadedVT == MVT::v4i32 ||
1813               LoadedVT == MVT::v4f32) &&
1814              SelectT2AddrModeImm7Offset(N, Offset, NewOffset, 2))
1815     Opcode = isPre ? ARM::MVE_VLDRWU32_pre : ARM::MVE_VLDRWU32_post;
1816   else if (Alignment >= Align(2) &&
1817            (CanChangeType || LoadedVT == MVT::v8i16 ||
1818             LoadedVT == MVT::v8f16) &&
1819            SelectT2AddrModeImm7Offset(N, Offset, NewOffset, 1))
1820     Opcode = isPre ? ARM::MVE_VLDRHU16_pre : ARM::MVE_VLDRHU16_post;
1821   else if ((CanChangeType || LoadedVT == MVT::v16i8) &&
1822            SelectT2AddrModeImm7Offset(N, Offset, NewOffset, 0))
1823     Opcode = isPre ? ARM::MVE_VLDRBU8_pre : ARM::MVE_VLDRBU8_post;
1824   else
1825     return false;
1826 
1827   SDValue Ops[] = {Base,
1828                    NewOffset,
1829                    CurDAG->getTargetConstant(Pred, SDLoc(N), MVT::i32),
1830                    PredReg,
1831                    CurDAG->getRegister(0, MVT::i32), // tp_reg
1832                    Chain};
1833   SDNode *New = CurDAG->getMachineNode(Opcode, SDLoc(N), MVT::i32,
1834                                        N->getValueType(0), MVT::Other, Ops);
1835   transferMemOperands(N, New);
1836   ReplaceUses(SDValue(N, 0), SDValue(New, 1));
1837   ReplaceUses(SDValue(N, 1), SDValue(New, 0));
1838   ReplaceUses(SDValue(N, 2), SDValue(New, 2));
1839   CurDAG->RemoveDeadNode(N);
1840   return true;
1841 }
1842 
1843 /// Form a GPRPair pseudo register from a pair of GPR regs.
1844 SDNode *ARMDAGToDAGISel::createGPRPairNode(EVT VT, SDValue V0, SDValue V1) {
1845   SDLoc dl(V0.getNode());
1846   SDValue RegClass =
1847     CurDAG->getTargetConstant(ARM::GPRPairRegClassID, dl, MVT::i32);
1848   SDValue SubReg0 = CurDAG->getTargetConstant(ARM::gsub_0, dl, MVT::i32);
1849   SDValue SubReg1 = CurDAG->getTargetConstant(ARM::gsub_1, dl, MVT::i32);
1850   const SDValue Ops[] = { RegClass, V0, SubReg0, V1, SubReg1 };
1851   return CurDAG->getMachineNode(TargetOpcode::REG_SEQUENCE, dl, VT, Ops);
1852 }
1853 
1854 /// Form a D register from a pair of S registers.
1855 SDNode *ARMDAGToDAGISel::createSRegPairNode(EVT VT, SDValue V0, SDValue V1) {
1856   SDLoc dl(V0.getNode());
1857   SDValue RegClass =
1858     CurDAG->getTargetConstant(ARM::DPR_VFP2RegClassID, dl, MVT::i32);
1859   SDValue SubReg0 = CurDAG->getTargetConstant(ARM::ssub_0, dl, MVT::i32);
1860   SDValue SubReg1 = CurDAG->getTargetConstant(ARM::ssub_1, dl, MVT::i32);
1861   const SDValue Ops[] = { RegClass, V0, SubReg0, V1, SubReg1 };
1862   return CurDAG->getMachineNode(TargetOpcode::REG_SEQUENCE, dl, VT, Ops);
1863 }
1864 
1865 /// Form a quad register from a pair of D registers.
1866 SDNode *ARMDAGToDAGISel::createDRegPairNode(EVT VT, SDValue V0, SDValue V1) {
1867   SDLoc dl(V0.getNode());
1868   SDValue RegClass = CurDAG->getTargetConstant(ARM::QPRRegClassID, dl,
1869                                                MVT::i32);
1870   SDValue SubReg0 = CurDAG->getTargetConstant(ARM::dsub_0, dl, MVT::i32);
1871   SDValue SubReg1 = CurDAG->getTargetConstant(ARM::dsub_1, dl, MVT::i32);
1872   const SDValue Ops[] = { RegClass, V0, SubReg0, V1, SubReg1 };
1873   return CurDAG->getMachineNode(TargetOpcode::REG_SEQUENCE, dl, VT, Ops);
1874 }
1875 
1876 /// Form 4 consecutive D registers from a pair of Q registers.
1877 SDNode *ARMDAGToDAGISel::createQRegPairNode(EVT VT, SDValue V0, SDValue V1) {
1878   SDLoc dl(V0.getNode());
1879   SDValue RegClass = CurDAG->getTargetConstant(ARM::QQPRRegClassID, dl,
1880                                                MVT::i32);
1881   SDValue SubReg0 = CurDAG->getTargetConstant(ARM::qsub_0, dl, MVT::i32);
1882   SDValue SubReg1 = CurDAG->getTargetConstant(ARM::qsub_1, dl, MVT::i32);
1883   const SDValue Ops[] = { RegClass, V0, SubReg0, V1, SubReg1 };
1884   return CurDAG->getMachineNode(TargetOpcode::REG_SEQUENCE, dl, VT, Ops);
1885 }
1886 
1887 /// Form 4 consecutive S registers.
1888 SDNode *ARMDAGToDAGISel::createQuadSRegsNode(EVT VT, SDValue V0, SDValue V1,
1889                                    SDValue V2, SDValue V3) {
1890   SDLoc dl(V0.getNode());
1891   SDValue RegClass =
1892     CurDAG->getTargetConstant(ARM::QPR_VFP2RegClassID, dl, MVT::i32);
1893   SDValue SubReg0 = CurDAG->getTargetConstant(ARM::ssub_0, dl, MVT::i32);
1894   SDValue SubReg1 = CurDAG->getTargetConstant(ARM::ssub_1, dl, MVT::i32);
1895   SDValue SubReg2 = CurDAG->getTargetConstant(ARM::ssub_2, dl, MVT::i32);
1896   SDValue SubReg3 = CurDAG->getTargetConstant(ARM::ssub_3, dl, MVT::i32);
1897   const SDValue Ops[] = { RegClass, V0, SubReg0, V1, SubReg1,
1898                                     V2, SubReg2, V3, SubReg3 };
1899   return CurDAG->getMachineNode(TargetOpcode::REG_SEQUENCE, dl, VT, Ops);
1900 }
1901 
1902 /// Form 4 consecutive D registers.
1903 SDNode *ARMDAGToDAGISel::createQuadDRegsNode(EVT VT, SDValue V0, SDValue V1,
1904                                    SDValue V2, SDValue V3) {
1905   SDLoc dl(V0.getNode());
1906   SDValue RegClass = CurDAG->getTargetConstant(ARM::QQPRRegClassID, dl,
1907                                                MVT::i32);
1908   SDValue SubReg0 = CurDAG->getTargetConstant(ARM::dsub_0, dl, MVT::i32);
1909   SDValue SubReg1 = CurDAG->getTargetConstant(ARM::dsub_1, dl, MVT::i32);
1910   SDValue SubReg2 = CurDAG->getTargetConstant(ARM::dsub_2, dl, MVT::i32);
1911   SDValue SubReg3 = CurDAG->getTargetConstant(ARM::dsub_3, dl, MVT::i32);
1912   const SDValue Ops[] = { RegClass, V0, SubReg0, V1, SubReg1,
1913                                     V2, SubReg2, V3, SubReg3 };
1914   return CurDAG->getMachineNode(TargetOpcode::REG_SEQUENCE, dl, VT, Ops);
1915 }
1916 
1917 /// Form 4 consecutive Q registers.
1918 SDNode *ARMDAGToDAGISel::createQuadQRegsNode(EVT VT, SDValue V0, SDValue V1,
1919                                    SDValue V2, SDValue V3) {
1920   SDLoc dl(V0.getNode());
1921   SDValue RegClass = CurDAG->getTargetConstant(ARM::QQQQPRRegClassID, dl,
1922                                                MVT::i32);
1923   SDValue SubReg0 = CurDAG->getTargetConstant(ARM::qsub_0, dl, MVT::i32);
1924   SDValue SubReg1 = CurDAG->getTargetConstant(ARM::qsub_1, dl, MVT::i32);
1925   SDValue SubReg2 = CurDAG->getTargetConstant(ARM::qsub_2, dl, MVT::i32);
1926   SDValue SubReg3 = CurDAG->getTargetConstant(ARM::qsub_3, dl, MVT::i32);
1927   const SDValue Ops[] = { RegClass, V0, SubReg0, V1, SubReg1,
1928                                     V2, SubReg2, V3, SubReg3 };
1929   return CurDAG->getMachineNode(TargetOpcode::REG_SEQUENCE, dl, VT, Ops);
1930 }
1931 
1932 /// GetVLDSTAlign - Get the alignment (in bytes) for the alignment operand
1933 /// of a NEON VLD or VST instruction.  The supported values depend on the
1934 /// number of registers being loaded.
1935 SDValue ARMDAGToDAGISel::GetVLDSTAlign(SDValue Align, const SDLoc &dl,
1936                                        unsigned NumVecs, bool is64BitVector) {
1937   unsigned NumRegs = NumVecs;
1938   if (!is64BitVector && NumVecs < 3)
1939     NumRegs *= 2;
1940 
1941   unsigned Alignment = Align->getAsZExtVal();
1942   if (Alignment >= 32 && NumRegs == 4)
1943     Alignment = 32;
1944   else if (Alignment >= 16 && (NumRegs == 2 || NumRegs == 4))
1945     Alignment = 16;
1946   else if (Alignment >= 8)
1947     Alignment = 8;
1948   else
1949     Alignment = 0;
1950 
1951   return CurDAG->getTargetConstant(Alignment, dl, MVT::i32);
1952 }
1953 
1954 static bool isVLDfixed(unsigned Opc)
1955 {
1956   switch (Opc) {
1957   default: return false;
1958   case ARM::VLD1d8wb_fixed : return true;
1959   case ARM::VLD1d16wb_fixed : return true;
1960   case ARM::VLD1d64Qwb_fixed : return true;
1961   case ARM::VLD1d32wb_fixed : return true;
1962   case ARM::VLD1d64wb_fixed : return true;
1963   case ARM::VLD1d8TPseudoWB_fixed : return true;
1964   case ARM::VLD1d16TPseudoWB_fixed : return true;
1965   case ARM::VLD1d32TPseudoWB_fixed : return true;
1966   case ARM::VLD1d64TPseudoWB_fixed : return true;
1967   case ARM::VLD1d8QPseudoWB_fixed : return true;
1968   case ARM::VLD1d16QPseudoWB_fixed : return true;
1969   case ARM::VLD1d32QPseudoWB_fixed : return true;
1970   case ARM::VLD1d64QPseudoWB_fixed : return true;
1971   case ARM::VLD1q8wb_fixed : return true;
1972   case ARM::VLD1q16wb_fixed : return true;
1973   case ARM::VLD1q32wb_fixed : return true;
1974   case ARM::VLD1q64wb_fixed : return true;
1975   case ARM::VLD1DUPd8wb_fixed : return true;
1976   case ARM::VLD1DUPd16wb_fixed : return true;
1977   case ARM::VLD1DUPd32wb_fixed : return true;
1978   case ARM::VLD1DUPq8wb_fixed : return true;
1979   case ARM::VLD1DUPq16wb_fixed : return true;
1980   case ARM::VLD1DUPq32wb_fixed : return true;
1981   case ARM::VLD2d8wb_fixed : return true;
1982   case ARM::VLD2d16wb_fixed : return true;
1983   case ARM::VLD2d32wb_fixed : return true;
1984   case ARM::VLD2q8PseudoWB_fixed : return true;
1985   case ARM::VLD2q16PseudoWB_fixed : return true;
1986   case ARM::VLD2q32PseudoWB_fixed : return true;
1987   case ARM::VLD2DUPd8wb_fixed : return true;
1988   case ARM::VLD2DUPd16wb_fixed : return true;
1989   case ARM::VLD2DUPd32wb_fixed : return true;
1990   case ARM::VLD2DUPq8OddPseudoWB_fixed: return true;
1991   case ARM::VLD2DUPq16OddPseudoWB_fixed: return true;
1992   case ARM::VLD2DUPq32OddPseudoWB_fixed: return true;
1993   }
1994 }
1995 
1996 static bool isVSTfixed(unsigned Opc)
1997 {
1998   switch (Opc) {
1999   default: return false;
2000   case ARM::VST1d8wb_fixed : return true;
2001   case ARM::VST1d16wb_fixed : return true;
2002   case ARM::VST1d32wb_fixed : return true;
2003   case ARM::VST1d64wb_fixed : return true;
2004   case ARM::VST1q8wb_fixed : return true;
2005   case ARM::VST1q16wb_fixed : return true;
2006   case ARM::VST1q32wb_fixed : return true;
2007   case ARM::VST1q64wb_fixed : return true;
2008   case ARM::VST1d8TPseudoWB_fixed : return true;
2009   case ARM::VST1d16TPseudoWB_fixed : return true;
2010   case ARM::VST1d32TPseudoWB_fixed : return true;
2011   case ARM::VST1d64TPseudoWB_fixed : return true;
2012   case ARM::VST1d8QPseudoWB_fixed : return true;
2013   case ARM::VST1d16QPseudoWB_fixed : return true;
2014   case ARM::VST1d32QPseudoWB_fixed : return true;
2015   case ARM::VST1d64QPseudoWB_fixed : return true;
2016   case ARM::VST2d8wb_fixed : return true;
2017   case ARM::VST2d16wb_fixed : return true;
2018   case ARM::VST2d32wb_fixed : return true;
2019   case ARM::VST2q8PseudoWB_fixed : return true;
2020   case ARM::VST2q16PseudoWB_fixed : return true;
2021   case ARM::VST2q32PseudoWB_fixed : return true;
2022   }
2023 }
2024 
2025 // Get the register stride update opcode of a VLD/VST instruction that
2026 // is otherwise equivalent to the given fixed stride updating instruction.
2027 static unsigned getVLDSTRegisterUpdateOpcode(unsigned Opc) {
2028   assert((isVLDfixed(Opc) || isVSTfixed(Opc))
2029     && "Incorrect fixed stride updating instruction.");
2030   switch (Opc) {
2031   default: break;
2032   case ARM::VLD1d8wb_fixed: return ARM::VLD1d8wb_register;
2033   case ARM::VLD1d16wb_fixed: return ARM::VLD1d16wb_register;
2034   case ARM::VLD1d32wb_fixed: return ARM::VLD1d32wb_register;
2035   case ARM::VLD1d64wb_fixed: return ARM::VLD1d64wb_register;
2036   case ARM::VLD1q8wb_fixed: return ARM::VLD1q8wb_register;
2037   case ARM::VLD1q16wb_fixed: return ARM::VLD1q16wb_register;
2038   case ARM::VLD1q32wb_fixed: return ARM::VLD1q32wb_register;
2039   case ARM::VLD1q64wb_fixed: return ARM::VLD1q64wb_register;
2040   case ARM::VLD1d64Twb_fixed: return ARM::VLD1d64Twb_register;
2041   case ARM::VLD1d64Qwb_fixed: return ARM::VLD1d64Qwb_register;
2042   case ARM::VLD1d8TPseudoWB_fixed: return ARM::VLD1d8TPseudoWB_register;
2043   case ARM::VLD1d16TPseudoWB_fixed: return ARM::VLD1d16TPseudoWB_register;
2044   case ARM::VLD1d32TPseudoWB_fixed: return ARM::VLD1d32TPseudoWB_register;
2045   case ARM::VLD1d64TPseudoWB_fixed: return ARM::VLD1d64TPseudoWB_register;
2046   case ARM::VLD1d8QPseudoWB_fixed: return ARM::VLD1d8QPseudoWB_register;
2047   case ARM::VLD1d16QPseudoWB_fixed: return ARM::VLD1d16QPseudoWB_register;
2048   case ARM::VLD1d32QPseudoWB_fixed: return ARM::VLD1d32QPseudoWB_register;
2049   case ARM::VLD1d64QPseudoWB_fixed: return ARM::VLD1d64QPseudoWB_register;
2050   case ARM::VLD1DUPd8wb_fixed : return ARM::VLD1DUPd8wb_register;
2051   case ARM::VLD1DUPd16wb_fixed : return ARM::VLD1DUPd16wb_register;
2052   case ARM::VLD1DUPd32wb_fixed : return ARM::VLD1DUPd32wb_register;
2053   case ARM::VLD1DUPq8wb_fixed : return ARM::VLD1DUPq8wb_register;
2054   case ARM::VLD1DUPq16wb_fixed : return ARM::VLD1DUPq16wb_register;
2055   case ARM::VLD1DUPq32wb_fixed : return ARM::VLD1DUPq32wb_register;
2056   case ARM::VLD2DUPq8OddPseudoWB_fixed: return ARM::VLD2DUPq8OddPseudoWB_register;
2057   case ARM::VLD2DUPq16OddPseudoWB_fixed: return ARM::VLD2DUPq16OddPseudoWB_register;
2058   case ARM::VLD2DUPq32OddPseudoWB_fixed: return ARM::VLD2DUPq32OddPseudoWB_register;
2059 
2060   case ARM::VST1d8wb_fixed: return ARM::VST1d8wb_register;
2061   case ARM::VST1d16wb_fixed: return ARM::VST1d16wb_register;
2062   case ARM::VST1d32wb_fixed: return ARM::VST1d32wb_register;
2063   case ARM::VST1d64wb_fixed: return ARM::VST1d64wb_register;
2064   case ARM::VST1q8wb_fixed: return ARM::VST1q8wb_register;
2065   case ARM::VST1q16wb_fixed: return ARM::VST1q16wb_register;
2066   case ARM::VST1q32wb_fixed: return ARM::VST1q32wb_register;
2067   case ARM::VST1q64wb_fixed: return ARM::VST1q64wb_register;
2068   case ARM::VST1d8TPseudoWB_fixed: return ARM::VST1d8TPseudoWB_register;
2069   case ARM::VST1d16TPseudoWB_fixed: return ARM::VST1d16TPseudoWB_register;
2070   case ARM::VST1d32TPseudoWB_fixed: return ARM::VST1d32TPseudoWB_register;
2071   case ARM::VST1d64TPseudoWB_fixed: return ARM::VST1d64TPseudoWB_register;
2072   case ARM::VST1d8QPseudoWB_fixed: return ARM::VST1d8QPseudoWB_register;
2073   case ARM::VST1d16QPseudoWB_fixed: return ARM::VST1d16QPseudoWB_register;
2074   case ARM::VST1d32QPseudoWB_fixed: return ARM::VST1d32QPseudoWB_register;
2075   case ARM::VST1d64QPseudoWB_fixed: return ARM::VST1d64QPseudoWB_register;
2076 
2077   case ARM::VLD2d8wb_fixed: return ARM::VLD2d8wb_register;
2078   case ARM::VLD2d16wb_fixed: return ARM::VLD2d16wb_register;
2079   case ARM::VLD2d32wb_fixed: return ARM::VLD2d32wb_register;
2080   case ARM::VLD2q8PseudoWB_fixed: return ARM::VLD2q8PseudoWB_register;
2081   case ARM::VLD2q16PseudoWB_fixed: return ARM::VLD2q16PseudoWB_register;
2082   case ARM::VLD2q32PseudoWB_fixed: return ARM::VLD2q32PseudoWB_register;
2083 
2084   case ARM::VST2d8wb_fixed: return ARM::VST2d8wb_register;
2085   case ARM::VST2d16wb_fixed: return ARM::VST2d16wb_register;
2086   case ARM::VST2d32wb_fixed: return ARM::VST2d32wb_register;
2087   case ARM::VST2q8PseudoWB_fixed: return ARM::VST2q8PseudoWB_register;
2088   case ARM::VST2q16PseudoWB_fixed: return ARM::VST2q16PseudoWB_register;
2089   case ARM::VST2q32PseudoWB_fixed: return ARM::VST2q32PseudoWB_register;
2090 
2091   case ARM::VLD2DUPd8wb_fixed: return ARM::VLD2DUPd8wb_register;
2092   case ARM::VLD2DUPd16wb_fixed: return ARM::VLD2DUPd16wb_register;
2093   case ARM::VLD2DUPd32wb_fixed: return ARM::VLD2DUPd32wb_register;
2094   }
2095   return Opc; // If not one we handle, return it unchanged.
2096 }
2097 
2098 /// Returns true if the given increment is a Constant known to be equal to the
2099 /// access size performed by a NEON load/store. This means the "[rN]!" form can
2100 /// be used.
2101 static bool isPerfectIncrement(SDValue Inc, EVT VecTy, unsigned NumVecs) {
2102   auto C = dyn_cast<ConstantSDNode>(Inc);
2103   return C && C->getZExtValue() == VecTy.getSizeInBits() / 8 * NumVecs;
2104 }
2105 
2106 void ARMDAGToDAGISel::SelectVLD(SDNode *N, bool isUpdating, unsigned NumVecs,
2107                                 const uint16_t *DOpcodes,
2108                                 const uint16_t *QOpcodes0,
2109                                 const uint16_t *QOpcodes1) {
2110   assert(Subtarget->hasNEON());
2111   assert(NumVecs >= 1 && NumVecs <= 4 && "VLD NumVecs out-of-range");
2112   SDLoc dl(N);
2113 
2114   SDValue MemAddr, Align;
2115   bool IsIntrinsic = !isUpdating;  // By coincidence, all supported updating
2116                                    // nodes are not intrinsics.
2117   unsigned AddrOpIdx = IsIntrinsic ? 2 : 1;
2118   if (!SelectAddrMode6(N, N->getOperand(AddrOpIdx), MemAddr, Align))
2119     return;
2120 
2121   SDValue Chain = N->getOperand(0);
2122   EVT VT = N->getValueType(0);
2123   bool is64BitVector = VT.is64BitVector();
2124   Align = GetVLDSTAlign(Align, dl, NumVecs, is64BitVector);
2125 
2126   unsigned OpcodeIndex;
2127   switch (VT.getSimpleVT().SimpleTy) {
2128   default: llvm_unreachable("unhandled vld type");
2129     // Double-register operations:
2130   case MVT::v8i8:  OpcodeIndex = 0; break;
2131   case MVT::v4f16:
2132   case MVT::v4bf16:
2133   case MVT::v4i16: OpcodeIndex = 1; break;
2134   case MVT::v2f32:
2135   case MVT::v2i32: OpcodeIndex = 2; break;
2136   case MVT::v1i64: OpcodeIndex = 3; break;
2137     // Quad-register operations:
2138   case MVT::v16i8: OpcodeIndex = 0; break;
2139   case MVT::v8f16:
2140   case MVT::v8bf16:
2141   case MVT::v8i16: OpcodeIndex = 1; break;
2142   case MVT::v4f32:
2143   case MVT::v4i32: OpcodeIndex = 2; break;
2144   case MVT::v2f64:
2145   case MVT::v2i64: OpcodeIndex = 3; break;
2146   }
2147 
2148   EVT ResTy;
2149   if (NumVecs == 1)
2150     ResTy = VT;
2151   else {
2152     unsigned ResTyElts = (NumVecs == 3) ? 4 : NumVecs;
2153     if (!is64BitVector)
2154       ResTyElts *= 2;
2155     ResTy = EVT::getVectorVT(*CurDAG->getContext(), MVT::i64, ResTyElts);
2156   }
2157   std::vector<EVT> ResTys;
2158   ResTys.push_back(ResTy);
2159   if (isUpdating)
2160     ResTys.push_back(MVT::i32);
2161   ResTys.push_back(MVT::Other);
2162 
2163   SDValue Pred = getAL(CurDAG, dl);
2164   SDValue Reg0 = CurDAG->getRegister(0, MVT::i32);
2165   SDNode *VLd;
2166   SmallVector<SDValue, 7> Ops;
2167 
2168   // Double registers and VLD1/VLD2 quad registers are directly supported.
2169   if (is64BitVector || NumVecs <= 2) {
2170     unsigned Opc = (is64BitVector ? DOpcodes[OpcodeIndex] :
2171                     QOpcodes0[OpcodeIndex]);
2172     Ops.push_back(MemAddr);
2173     Ops.push_back(Align);
2174     if (isUpdating) {
2175       SDValue Inc = N->getOperand(AddrOpIdx + 1);
2176       bool IsImmUpdate = isPerfectIncrement(Inc, VT, NumVecs);
2177       if (!IsImmUpdate) {
2178         // We use a VLD1 for v1i64 even if the pseudo says vld2/3/4, so
2179         // check for the opcode rather than the number of vector elements.
2180         if (isVLDfixed(Opc))
2181           Opc = getVLDSTRegisterUpdateOpcode(Opc);
2182         Ops.push_back(Inc);
2183       // VLD1/VLD2 fixed increment does not need Reg0 so only include it in
2184       // the operands if not such an opcode.
2185       } else if (!isVLDfixed(Opc))
2186         Ops.push_back(Reg0);
2187     }
2188     Ops.push_back(Pred);
2189     Ops.push_back(Reg0);
2190     Ops.push_back(Chain);
2191     VLd = CurDAG->getMachineNode(Opc, dl, ResTys, Ops);
2192 
2193   } else {
2194     // Otherwise, quad registers are loaded with two separate instructions,
2195     // where one loads the even registers and the other loads the odd registers.
2196     EVT AddrTy = MemAddr.getValueType();
2197 
2198     // Load the even subregs.  This is always an updating load, so that it
2199     // provides the address to the second load for the odd subregs.
2200     SDValue ImplDef =
2201       SDValue(CurDAG->getMachineNode(TargetOpcode::IMPLICIT_DEF, dl, ResTy), 0);
2202     const SDValue OpsA[] = { MemAddr, Align, Reg0, ImplDef, Pred, Reg0, Chain };
2203     SDNode *VLdA = CurDAG->getMachineNode(QOpcodes0[OpcodeIndex], dl,
2204                                           ResTy, AddrTy, MVT::Other, OpsA);
2205     Chain = SDValue(VLdA, 2);
2206 
2207     // Load the odd subregs.
2208     Ops.push_back(SDValue(VLdA, 1));
2209     Ops.push_back(Align);
2210     if (isUpdating) {
2211       SDValue Inc = N->getOperand(AddrOpIdx + 1);
2212       assert(isa<ConstantSDNode>(Inc.getNode()) &&
2213              "only constant post-increment update allowed for VLD3/4");
2214       (void)Inc;
2215       Ops.push_back(Reg0);
2216     }
2217     Ops.push_back(SDValue(VLdA, 0));
2218     Ops.push_back(Pred);
2219     Ops.push_back(Reg0);
2220     Ops.push_back(Chain);
2221     VLd = CurDAG->getMachineNode(QOpcodes1[OpcodeIndex], dl, ResTys, Ops);
2222   }
2223 
2224   // Transfer memoperands.
2225   MachineMemOperand *MemOp = cast<MemIntrinsicSDNode>(N)->getMemOperand();
2226   CurDAG->setNodeMemRefs(cast<MachineSDNode>(VLd), {MemOp});
2227 
2228   if (NumVecs == 1) {
2229     ReplaceNode(N, VLd);
2230     return;
2231   }
2232 
2233   // Extract out the subregisters.
2234   SDValue SuperReg = SDValue(VLd, 0);
2235   static_assert(ARM::dsub_7 == ARM::dsub_0 + 7 &&
2236                     ARM::qsub_3 == ARM::qsub_0 + 3,
2237                 "Unexpected subreg numbering");
2238   unsigned Sub0 = (is64BitVector ? ARM::dsub_0 : ARM::qsub_0);
2239   for (unsigned Vec = 0; Vec < NumVecs; ++Vec)
2240     ReplaceUses(SDValue(N, Vec),
2241                 CurDAG->getTargetExtractSubreg(Sub0 + Vec, dl, VT, SuperReg));
2242   ReplaceUses(SDValue(N, NumVecs), SDValue(VLd, 1));
2243   if (isUpdating)
2244     ReplaceUses(SDValue(N, NumVecs + 1), SDValue(VLd, 2));
2245   CurDAG->RemoveDeadNode(N);
2246 }
2247 
2248 void ARMDAGToDAGISel::SelectVST(SDNode *N, bool isUpdating, unsigned NumVecs,
2249                                 const uint16_t *DOpcodes,
2250                                 const uint16_t *QOpcodes0,
2251                                 const uint16_t *QOpcodes1) {
2252   assert(Subtarget->hasNEON());
2253   assert(NumVecs >= 1 && NumVecs <= 4 && "VST NumVecs out-of-range");
2254   SDLoc dl(N);
2255 
2256   SDValue MemAddr, Align;
2257   bool IsIntrinsic = !isUpdating;  // By coincidence, all supported updating
2258                                    // nodes are not intrinsics.
2259   unsigned AddrOpIdx = IsIntrinsic ? 2 : 1;
2260   unsigned Vec0Idx = 3; // AddrOpIdx + (isUpdating ? 2 : 1)
2261   if (!SelectAddrMode6(N, N->getOperand(AddrOpIdx), MemAddr, Align))
2262     return;
2263 
2264   MachineMemOperand *MemOp = cast<MemIntrinsicSDNode>(N)->getMemOperand();
2265 
2266   SDValue Chain = N->getOperand(0);
2267   EVT VT = N->getOperand(Vec0Idx).getValueType();
2268   bool is64BitVector = VT.is64BitVector();
2269   Align = GetVLDSTAlign(Align, dl, NumVecs, is64BitVector);
2270 
2271   unsigned OpcodeIndex;
2272   switch (VT.getSimpleVT().SimpleTy) {
2273   default: llvm_unreachable("unhandled vst type");
2274     // Double-register operations:
2275   case MVT::v8i8:  OpcodeIndex = 0; break;
2276   case MVT::v4f16:
2277   case MVT::v4bf16:
2278   case MVT::v4i16: OpcodeIndex = 1; break;
2279   case MVT::v2f32:
2280   case MVT::v2i32: OpcodeIndex = 2; break;
2281   case MVT::v1i64: OpcodeIndex = 3; break;
2282     // Quad-register operations:
2283   case MVT::v16i8: OpcodeIndex = 0; break;
2284   case MVT::v8f16:
2285   case MVT::v8bf16:
2286   case MVT::v8i16: OpcodeIndex = 1; break;
2287   case MVT::v4f32:
2288   case MVT::v4i32: OpcodeIndex = 2; break;
2289   case MVT::v2f64:
2290   case MVT::v2i64: OpcodeIndex = 3; break;
2291   }
2292 
2293   std::vector<EVT> ResTys;
2294   if (isUpdating)
2295     ResTys.push_back(MVT::i32);
2296   ResTys.push_back(MVT::Other);
2297 
2298   SDValue Pred = getAL(CurDAG, dl);
2299   SDValue Reg0 = CurDAG->getRegister(0, MVT::i32);
2300   SmallVector<SDValue, 7> Ops;
2301 
2302   // Double registers and VST1/VST2 quad registers are directly supported.
2303   if (is64BitVector || NumVecs <= 2) {
2304     SDValue SrcReg;
2305     if (NumVecs == 1) {
2306       SrcReg = N->getOperand(Vec0Idx);
2307     } else if (is64BitVector) {
2308       // Form a REG_SEQUENCE to force register allocation.
2309       SDValue V0 = N->getOperand(Vec0Idx + 0);
2310       SDValue V1 = N->getOperand(Vec0Idx + 1);
2311       if (NumVecs == 2)
2312         SrcReg = SDValue(createDRegPairNode(MVT::v2i64, V0, V1), 0);
2313       else {
2314         SDValue V2 = N->getOperand(Vec0Idx + 2);
2315         // If it's a vst3, form a quad D-register and leave the last part as
2316         // an undef.
2317         SDValue V3 = (NumVecs == 3)
2318           ? SDValue(CurDAG->getMachineNode(TargetOpcode::IMPLICIT_DEF,dl,VT), 0)
2319           : N->getOperand(Vec0Idx + 3);
2320         SrcReg = SDValue(createQuadDRegsNode(MVT::v4i64, V0, V1, V2, V3), 0);
2321       }
2322     } else {
2323       // Form a QQ register.
2324       SDValue Q0 = N->getOperand(Vec0Idx);
2325       SDValue Q1 = N->getOperand(Vec0Idx + 1);
2326       SrcReg = SDValue(createQRegPairNode(MVT::v4i64, Q0, Q1), 0);
2327     }
2328 
2329     unsigned Opc = (is64BitVector ? DOpcodes[OpcodeIndex] :
2330                     QOpcodes0[OpcodeIndex]);
2331     Ops.push_back(MemAddr);
2332     Ops.push_back(Align);
2333     if (isUpdating) {
2334       SDValue Inc = N->getOperand(AddrOpIdx + 1);
2335       bool IsImmUpdate = isPerfectIncrement(Inc, VT, NumVecs);
2336       if (!IsImmUpdate) {
2337         // We use a VST1 for v1i64 even if the pseudo says VST2/3/4, so
2338         // check for the opcode rather than the number of vector elements.
2339         if (isVSTfixed(Opc))
2340           Opc = getVLDSTRegisterUpdateOpcode(Opc);
2341         Ops.push_back(Inc);
2342       }
2343       // VST1/VST2 fixed increment does not need Reg0 so only include it in
2344       // the operands if not such an opcode.
2345       else if (!isVSTfixed(Opc))
2346         Ops.push_back(Reg0);
2347     }
2348     Ops.push_back(SrcReg);
2349     Ops.push_back(Pred);
2350     Ops.push_back(Reg0);
2351     Ops.push_back(Chain);
2352     SDNode *VSt = CurDAG->getMachineNode(Opc, dl, ResTys, Ops);
2353 
2354     // Transfer memoperands.
2355     CurDAG->setNodeMemRefs(cast<MachineSDNode>(VSt), {MemOp});
2356 
2357     ReplaceNode(N, VSt);
2358     return;
2359   }
2360 
2361   // Otherwise, quad registers are stored with two separate instructions,
2362   // where one stores the even registers and the other stores the odd registers.
2363 
2364   // Form the QQQQ REG_SEQUENCE.
2365   SDValue V0 = N->getOperand(Vec0Idx + 0);
2366   SDValue V1 = N->getOperand(Vec0Idx + 1);
2367   SDValue V2 = N->getOperand(Vec0Idx + 2);
2368   SDValue V3 = (NumVecs == 3)
2369     ? SDValue(CurDAG->getMachineNode(TargetOpcode::IMPLICIT_DEF, dl, VT), 0)
2370     : N->getOperand(Vec0Idx + 3);
2371   SDValue RegSeq = SDValue(createQuadQRegsNode(MVT::v8i64, V0, V1, V2, V3), 0);
2372 
2373   // Store the even D registers.  This is always an updating store, so that it
2374   // provides the address to the second store for the odd subregs.
2375   const SDValue OpsA[] = { MemAddr, Align, Reg0, RegSeq, Pred, Reg0, Chain };
2376   SDNode *VStA = CurDAG->getMachineNode(QOpcodes0[OpcodeIndex], dl,
2377                                         MemAddr.getValueType(),
2378                                         MVT::Other, OpsA);
2379   CurDAG->setNodeMemRefs(cast<MachineSDNode>(VStA), {MemOp});
2380   Chain = SDValue(VStA, 1);
2381 
2382   // Store the odd D registers.
2383   Ops.push_back(SDValue(VStA, 0));
2384   Ops.push_back(Align);
2385   if (isUpdating) {
2386     SDValue Inc = N->getOperand(AddrOpIdx + 1);
2387     assert(isa<ConstantSDNode>(Inc.getNode()) &&
2388            "only constant post-increment update allowed for VST3/4");
2389     (void)Inc;
2390     Ops.push_back(Reg0);
2391   }
2392   Ops.push_back(RegSeq);
2393   Ops.push_back(Pred);
2394   Ops.push_back(Reg0);
2395   Ops.push_back(Chain);
2396   SDNode *VStB = CurDAG->getMachineNode(QOpcodes1[OpcodeIndex], dl, ResTys,
2397                                         Ops);
2398   CurDAG->setNodeMemRefs(cast<MachineSDNode>(VStB), {MemOp});
2399   ReplaceNode(N, VStB);
2400 }
2401 
2402 void ARMDAGToDAGISel::SelectVLDSTLane(SDNode *N, bool IsLoad, bool isUpdating,
2403                                       unsigned NumVecs,
2404                                       const uint16_t *DOpcodes,
2405                                       const uint16_t *QOpcodes) {
2406   assert(Subtarget->hasNEON());
2407   assert(NumVecs >=2 && NumVecs <= 4 && "VLDSTLane NumVecs out-of-range");
2408   SDLoc dl(N);
2409 
2410   SDValue MemAddr, Align;
2411   bool IsIntrinsic = !isUpdating;  // By coincidence, all supported updating
2412                                    // nodes are not intrinsics.
2413   unsigned AddrOpIdx = IsIntrinsic ? 2 : 1;
2414   unsigned Vec0Idx = 3; // AddrOpIdx + (isUpdating ? 2 : 1)
2415   if (!SelectAddrMode6(N, N->getOperand(AddrOpIdx), MemAddr, Align))
2416     return;
2417 
2418   MachineMemOperand *MemOp = cast<MemIntrinsicSDNode>(N)->getMemOperand();
2419 
2420   SDValue Chain = N->getOperand(0);
2421   unsigned Lane = N->getConstantOperandVal(Vec0Idx + NumVecs);
2422   EVT VT = N->getOperand(Vec0Idx).getValueType();
2423   bool is64BitVector = VT.is64BitVector();
2424 
2425   unsigned Alignment = 0;
2426   if (NumVecs != 3) {
2427     Alignment = Align->getAsZExtVal();
2428     unsigned NumBytes = NumVecs * VT.getScalarSizeInBits() / 8;
2429     if (Alignment > NumBytes)
2430       Alignment = NumBytes;
2431     if (Alignment < 8 && Alignment < NumBytes)
2432       Alignment = 0;
2433     // Alignment must be a power of two; make sure of that.
2434     Alignment = (Alignment & -Alignment);
2435     if (Alignment == 1)
2436       Alignment = 0;
2437   }
2438   Align = CurDAG->getTargetConstant(Alignment, dl, MVT::i32);
2439 
2440   unsigned OpcodeIndex;
2441   switch (VT.getSimpleVT().SimpleTy) {
2442   default: llvm_unreachable("unhandled vld/vst lane type");
2443     // Double-register operations:
2444   case MVT::v8i8:  OpcodeIndex = 0; break;
2445   case MVT::v4f16:
2446   case MVT::v4bf16:
2447   case MVT::v4i16: OpcodeIndex = 1; break;
2448   case MVT::v2f32:
2449   case MVT::v2i32: OpcodeIndex = 2; break;
2450     // Quad-register operations:
2451   case MVT::v8f16:
2452   case MVT::v8bf16:
2453   case MVT::v8i16: OpcodeIndex = 0; break;
2454   case MVT::v4f32:
2455   case MVT::v4i32: OpcodeIndex = 1; break;
2456   }
2457 
2458   std::vector<EVT> ResTys;
2459   if (IsLoad) {
2460     unsigned ResTyElts = (NumVecs == 3) ? 4 : NumVecs;
2461     if (!is64BitVector)
2462       ResTyElts *= 2;
2463     ResTys.push_back(EVT::getVectorVT(*CurDAG->getContext(),
2464                                       MVT::i64, ResTyElts));
2465   }
2466   if (isUpdating)
2467     ResTys.push_back(MVT::i32);
2468   ResTys.push_back(MVT::Other);
2469 
2470   SDValue Pred = getAL(CurDAG, dl);
2471   SDValue Reg0 = CurDAG->getRegister(0, MVT::i32);
2472 
2473   SmallVector<SDValue, 8> Ops;
2474   Ops.push_back(MemAddr);
2475   Ops.push_back(Align);
2476   if (isUpdating) {
2477     SDValue Inc = N->getOperand(AddrOpIdx + 1);
2478     bool IsImmUpdate =
2479         isPerfectIncrement(Inc, VT.getVectorElementType(), NumVecs);
2480     Ops.push_back(IsImmUpdate ? Reg0 : Inc);
2481   }
2482 
2483   SDValue SuperReg;
2484   SDValue V0 = N->getOperand(Vec0Idx + 0);
2485   SDValue V1 = N->getOperand(Vec0Idx + 1);
2486   if (NumVecs == 2) {
2487     if (is64BitVector)
2488       SuperReg = SDValue(createDRegPairNode(MVT::v2i64, V0, V1), 0);
2489     else
2490       SuperReg = SDValue(createQRegPairNode(MVT::v4i64, V0, V1), 0);
2491   } else {
2492     SDValue V2 = N->getOperand(Vec0Idx + 2);
2493     SDValue V3 = (NumVecs == 3)
2494       ? SDValue(CurDAG->getMachineNode(TargetOpcode::IMPLICIT_DEF, dl, VT), 0)
2495       : N->getOperand(Vec0Idx + 3);
2496     if (is64BitVector)
2497       SuperReg = SDValue(createQuadDRegsNode(MVT::v4i64, V0, V1, V2, V3), 0);
2498     else
2499       SuperReg = SDValue(createQuadQRegsNode(MVT::v8i64, V0, V1, V2, V3), 0);
2500   }
2501   Ops.push_back(SuperReg);
2502   Ops.push_back(getI32Imm(Lane, dl));
2503   Ops.push_back(Pred);
2504   Ops.push_back(Reg0);
2505   Ops.push_back(Chain);
2506 
2507   unsigned Opc = (is64BitVector ? DOpcodes[OpcodeIndex] :
2508                                   QOpcodes[OpcodeIndex]);
2509   SDNode *VLdLn = CurDAG->getMachineNode(Opc, dl, ResTys, Ops);
2510   CurDAG->setNodeMemRefs(cast<MachineSDNode>(VLdLn), {MemOp});
2511   if (!IsLoad) {
2512     ReplaceNode(N, VLdLn);
2513     return;
2514   }
2515 
2516   // Extract the subregisters.
2517   SuperReg = SDValue(VLdLn, 0);
2518   static_assert(ARM::dsub_7 == ARM::dsub_0 + 7 &&
2519                     ARM::qsub_3 == ARM::qsub_0 + 3,
2520                 "Unexpected subreg numbering");
2521   unsigned Sub0 = is64BitVector ? ARM::dsub_0 : ARM::qsub_0;
2522   for (unsigned Vec = 0; Vec < NumVecs; ++Vec)
2523     ReplaceUses(SDValue(N, Vec),
2524                 CurDAG->getTargetExtractSubreg(Sub0 + Vec, dl, VT, SuperReg));
2525   ReplaceUses(SDValue(N, NumVecs), SDValue(VLdLn, 1));
2526   if (isUpdating)
2527     ReplaceUses(SDValue(N, NumVecs + 1), SDValue(VLdLn, 2));
2528   CurDAG->RemoveDeadNode(N);
2529 }
2530 
2531 template <typename SDValueVector>
2532 void ARMDAGToDAGISel::AddMVEPredicateToOps(SDValueVector &Ops, SDLoc Loc,
2533                                            SDValue PredicateMask) {
2534   Ops.push_back(CurDAG->getTargetConstant(ARMVCC::Then, Loc, MVT::i32));
2535   Ops.push_back(PredicateMask);
2536   Ops.push_back(CurDAG->getRegister(0, MVT::i32)); // tp_reg
2537 }
2538 
2539 template <typename SDValueVector>
2540 void ARMDAGToDAGISel::AddMVEPredicateToOps(SDValueVector &Ops, SDLoc Loc,
2541                                            SDValue PredicateMask,
2542                                            SDValue Inactive) {
2543   Ops.push_back(CurDAG->getTargetConstant(ARMVCC::Then, Loc, MVT::i32));
2544   Ops.push_back(PredicateMask);
2545   Ops.push_back(CurDAG->getRegister(0, MVT::i32)); // tp_reg
2546   Ops.push_back(Inactive);
2547 }
2548 
2549 template <typename SDValueVector>
2550 void ARMDAGToDAGISel::AddEmptyMVEPredicateToOps(SDValueVector &Ops, SDLoc Loc) {
2551   Ops.push_back(CurDAG->getTargetConstant(ARMVCC::None, Loc, MVT::i32));
2552   Ops.push_back(CurDAG->getRegister(0, MVT::i32));
2553   Ops.push_back(CurDAG->getRegister(0, MVT::i32)); // tp_reg
2554 }
2555 
2556 template <typename SDValueVector>
2557 void ARMDAGToDAGISel::AddEmptyMVEPredicateToOps(SDValueVector &Ops, SDLoc Loc,
2558                                                 EVT InactiveTy) {
2559   Ops.push_back(CurDAG->getTargetConstant(ARMVCC::None, Loc, MVT::i32));
2560   Ops.push_back(CurDAG->getRegister(0, MVT::i32));
2561   Ops.push_back(CurDAG->getRegister(0, MVT::i32)); // tp_reg
2562   Ops.push_back(SDValue(
2563       CurDAG->getMachineNode(TargetOpcode::IMPLICIT_DEF, Loc, InactiveTy), 0));
2564 }
2565 
2566 void ARMDAGToDAGISel::SelectMVE_WB(SDNode *N, const uint16_t *Opcodes,
2567                                    bool Predicated) {
2568   SDLoc Loc(N);
2569   SmallVector<SDValue, 8> Ops;
2570 
2571   uint16_t Opcode;
2572   switch (N->getValueType(1).getVectorElementType().getSizeInBits()) {
2573   case 32:
2574     Opcode = Opcodes[0];
2575     break;
2576   case 64:
2577     Opcode = Opcodes[1];
2578     break;
2579   default:
2580     llvm_unreachable("bad vector element size in SelectMVE_WB");
2581   }
2582 
2583   Ops.push_back(N->getOperand(2)); // vector of base addresses
2584 
2585   int32_t ImmValue = N->getConstantOperandVal(3);
2586   Ops.push_back(getI32Imm(ImmValue, Loc)); // immediate offset
2587 
2588   if (Predicated)
2589     AddMVEPredicateToOps(Ops, Loc, N->getOperand(4));
2590   else
2591     AddEmptyMVEPredicateToOps(Ops, Loc);
2592 
2593   Ops.push_back(N->getOperand(0)); // chain
2594 
2595   SmallVector<EVT, 8> VTs;
2596   VTs.push_back(N->getValueType(1));
2597   VTs.push_back(N->getValueType(0));
2598   VTs.push_back(N->getValueType(2));
2599 
2600   SDNode *New = CurDAG->getMachineNode(Opcode, SDLoc(N), VTs, Ops);
2601   ReplaceUses(SDValue(N, 0), SDValue(New, 1));
2602   ReplaceUses(SDValue(N, 1), SDValue(New, 0));
2603   ReplaceUses(SDValue(N, 2), SDValue(New, 2));
2604   transferMemOperands(N, New);
2605   CurDAG->RemoveDeadNode(N);
2606 }
2607 
2608 void ARMDAGToDAGISel::SelectMVE_LongShift(SDNode *N, uint16_t Opcode,
2609                                           bool Immediate,
2610                                           bool HasSaturationOperand) {
2611   SDLoc Loc(N);
2612   SmallVector<SDValue, 8> Ops;
2613 
2614   // Two 32-bit halves of the value to be shifted
2615   Ops.push_back(N->getOperand(1));
2616   Ops.push_back(N->getOperand(2));
2617 
2618   // The shift count
2619   if (Immediate) {
2620     int32_t ImmValue = N->getConstantOperandVal(3);
2621     Ops.push_back(getI32Imm(ImmValue, Loc)); // immediate shift count
2622   } else {
2623     Ops.push_back(N->getOperand(3));
2624   }
2625 
2626   // The immediate saturation operand, if any
2627   if (HasSaturationOperand) {
2628     int32_t SatOp = N->getConstantOperandVal(4);
2629     int SatBit = (SatOp == 64 ? 0 : 1);
2630     Ops.push_back(getI32Imm(SatBit, Loc));
2631   }
2632 
2633   // MVE scalar shifts are IT-predicable, so include the standard
2634   // predicate arguments.
2635   Ops.push_back(getAL(CurDAG, Loc));
2636   Ops.push_back(CurDAG->getRegister(0, MVT::i32));
2637 
2638   CurDAG->SelectNodeTo(N, Opcode, N->getVTList(), ArrayRef(Ops));
2639 }
2640 
2641 void ARMDAGToDAGISel::SelectMVE_VADCSBC(SDNode *N, uint16_t OpcodeWithCarry,
2642                                         uint16_t OpcodeWithNoCarry,
2643                                         bool Add, bool Predicated) {
2644   SDLoc Loc(N);
2645   SmallVector<SDValue, 8> Ops;
2646   uint16_t Opcode;
2647 
2648   unsigned FirstInputOp = Predicated ? 2 : 1;
2649 
2650   // Two input vectors and the input carry flag
2651   Ops.push_back(N->getOperand(FirstInputOp));
2652   Ops.push_back(N->getOperand(FirstInputOp + 1));
2653   SDValue CarryIn = N->getOperand(FirstInputOp + 2);
2654   ConstantSDNode *CarryInConstant = dyn_cast<ConstantSDNode>(CarryIn);
2655   uint32_t CarryMask = 1 << 29;
2656   uint32_t CarryExpected = Add ? 0 : CarryMask;
2657   if (CarryInConstant &&
2658       (CarryInConstant->getZExtValue() & CarryMask) == CarryExpected) {
2659     Opcode = OpcodeWithNoCarry;
2660   } else {
2661     Ops.push_back(CarryIn);
2662     Opcode = OpcodeWithCarry;
2663   }
2664 
2665   if (Predicated)
2666     AddMVEPredicateToOps(Ops, Loc,
2667                          N->getOperand(FirstInputOp + 3),  // predicate
2668                          N->getOperand(FirstInputOp - 1)); // inactive
2669   else
2670     AddEmptyMVEPredicateToOps(Ops, Loc, N->getValueType(0));
2671 
2672   CurDAG->SelectNodeTo(N, Opcode, N->getVTList(), ArrayRef(Ops));
2673 }
2674 
2675 void ARMDAGToDAGISel::SelectMVE_VSHLC(SDNode *N, bool Predicated) {
2676   SDLoc Loc(N);
2677   SmallVector<SDValue, 8> Ops;
2678 
2679   // One vector input, followed by a 32-bit word of bits to shift in
2680   // and then an immediate shift count
2681   Ops.push_back(N->getOperand(1));
2682   Ops.push_back(N->getOperand(2));
2683   int32_t ImmValue = N->getConstantOperandVal(3);
2684   Ops.push_back(getI32Imm(ImmValue, Loc)); // immediate shift count
2685 
2686   if (Predicated)
2687     AddMVEPredicateToOps(Ops, Loc, N->getOperand(4));
2688   else
2689     AddEmptyMVEPredicateToOps(Ops, Loc);
2690 
2691   CurDAG->SelectNodeTo(N, ARM::MVE_VSHLC, N->getVTList(), ArrayRef(Ops));
2692 }
2693 
2694 static bool SDValueToConstBool(SDValue SDVal) {
2695   assert(isa<ConstantSDNode>(SDVal) && "expected a compile-time constant");
2696   ConstantSDNode *SDValConstant = dyn_cast<ConstantSDNode>(SDVal);
2697   uint64_t Value = SDValConstant->getZExtValue();
2698   assert((Value == 0 || Value == 1) && "expected value 0 or 1");
2699   return Value;
2700 }
2701 
2702 void ARMDAGToDAGISel::SelectBaseMVE_VMLLDAV(SDNode *N, bool Predicated,
2703                                             const uint16_t *OpcodesS,
2704                                             const uint16_t *OpcodesU,
2705                                             size_t Stride, size_t TySize) {
2706   assert(TySize < Stride && "Invalid TySize");
2707   bool IsUnsigned = SDValueToConstBool(N->getOperand(1));
2708   bool IsSub = SDValueToConstBool(N->getOperand(2));
2709   bool IsExchange = SDValueToConstBool(N->getOperand(3));
2710   if (IsUnsigned) {
2711     assert(!IsSub &&
2712            "Unsigned versions of vmlsldav[a]/vrmlsldavh[a] do not exist");
2713     assert(!IsExchange &&
2714            "Unsigned versions of vmlaldav[a]x/vrmlaldavh[a]x do not exist");
2715   }
2716 
2717   auto OpIsZero = [N](size_t OpNo) {
2718     return isNullConstant(N->getOperand(OpNo));
2719   };
2720 
2721   // If the input accumulator value is not zero, select an instruction with
2722   // accumulator, otherwise select an instruction without accumulator
2723   bool IsAccum = !(OpIsZero(4) && OpIsZero(5));
2724 
2725   const uint16_t *Opcodes = IsUnsigned ? OpcodesU : OpcodesS;
2726   if (IsSub)
2727     Opcodes += 4 * Stride;
2728   if (IsExchange)
2729     Opcodes += 2 * Stride;
2730   if (IsAccum)
2731     Opcodes += Stride;
2732   uint16_t Opcode = Opcodes[TySize];
2733 
2734   SDLoc Loc(N);
2735   SmallVector<SDValue, 8> Ops;
2736   // Push the accumulator operands, if they are used
2737   if (IsAccum) {
2738     Ops.push_back(N->getOperand(4));
2739     Ops.push_back(N->getOperand(5));
2740   }
2741   // Push the two vector operands
2742   Ops.push_back(N->getOperand(6));
2743   Ops.push_back(N->getOperand(7));
2744 
2745   if (Predicated)
2746     AddMVEPredicateToOps(Ops, Loc, N->getOperand(8));
2747   else
2748     AddEmptyMVEPredicateToOps(Ops, Loc);
2749 
2750   CurDAG->SelectNodeTo(N, Opcode, N->getVTList(), ArrayRef(Ops));
2751 }
2752 
2753 void ARMDAGToDAGISel::SelectMVE_VMLLDAV(SDNode *N, bool Predicated,
2754                                         const uint16_t *OpcodesS,
2755                                         const uint16_t *OpcodesU) {
2756   EVT VecTy = N->getOperand(6).getValueType();
2757   size_t SizeIndex;
2758   switch (VecTy.getVectorElementType().getSizeInBits()) {
2759   case 16:
2760     SizeIndex = 0;
2761     break;
2762   case 32:
2763     SizeIndex = 1;
2764     break;
2765   default:
2766     llvm_unreachable("bad vector element size");
2767   }
2768 
2769   SelectBaseMVE_VMLLDAV(N, Predicated, OpcodesS, OpcodesU, 2, SizeIndex);
2770 }
2771 
2772 void ARMDAGToDAGISel::SelectMVE_VRMLLDAVH(SDNode *N, bool Predicated,
2773                                           const uint16_t *OpcodesS,
2774                                           const uint16_t *OpcodesU) {
2775   assert(
2776       N->getOperand(6).getValueType().getVectorElementType().getSizeInBits() ==
2777           32 &&
2778       "bad vector element size");
2779   SelectBaseMVE_VMLLDAV(N, Predicated, OpcodesS, OpcodesU, 1, 0);
2780 }
2781 
2782 void ARMDAGToDAGISel::SelectMVE_VLD(SDNode *N, unsigned NumVecs,
2783                                     const uint16_t *const *Opcodes,
2784                                     bool HasWriteback) {
2785   EVT VT = N->getValueType(0);
2786   SDLoc Loc(N);
2787 
2788   const uint16_t *OurOpcodes;
2789   switch (VT.getVectorElementType().getSizeInBits()) {
2790   case 8:
2791     OurOpcodes = Opcodes[0];
2792     break;
2793   case 16:
2794     OurOpcodes = Opcodes[1];
2795     break;
2796   case 32:
2797     OurOpcodes = Opcodes[2];
2798     break;
2799   default:
2800     llvm_unreachable("bad vector element size in SelectMVE_VLD");
2801   }
2802 
2803   EVT DataTy = EVT::getVectorVT(*CurDAG->getContext(), MVT::i64, NumVecs * 2);
2804   SmallVector<EVT, 4> ResultTys = {DataTy, MVT::Other};
2805   unsigned PtrOperand = HasWriteback ? 1 : 2;
2806 
2807   auto Data = SDValue(
2808       CurDAG->getMachineNode(TargetOpcode::IMPLICIT_DEF, Loc, DataTy), 0);
2809   SDValue Chain = N->getOperand(0);
2810   // Add a MVE_VLDn instruction for each Vec, except the last
2811   for (unsigned Stage = 0; Stage < NumVecs - 1; ++Stage) {
2812     SDValue Ops[] = {Data, N->getOperand(PtrOperand), Chain};
2813     auto LoadInst =
2814         CurDAG->getMachineNode(OurOpcodes[Stage], Loc, ResultTys, Ops);
2815     Data = SDValue(LoadInst, 0);
2816     Chain = SDValue(LoadInst, 1);
2817     transferMemOperands(N, LoadInst);
2818   }
2819   // The last may need a writeback on it
2820   if (HasWriteback)
2821     ResultTys = {DataTy, MVT::i32, MVT::Other};
2822   SDValue Ops[] = {Data, N->getOperand(PtrOperand), Chain};
2823   auto LoadInst =
2824       CurDAG->getMachineNode(OurOpcodes[NumVecs - 1], Loc, ResultTys, Ops);
2825   transferMemOperands(N, LoadInst);
2826 
2827   unsigned i;
2828   for (i = 0; i < NumVecs; i++)
2829     ReplaceUses(SDValue(N, i),
2830                 CurDAG->getTargetExtractSubreg(ARM::qsub_0 + i, Loc, VT,
2831                                                SDValue(LoadInst, 0)));
2832   if (HasWriteback)
2833     ReplaceUses(SDValue(N, i++), SDValue(LoadInst, 1));
2834   ReplaceUses(SDValue(N, i), SDValue(LoadInst, HasWriteback ? 2 : 1));
2835   CurDAG->RemoveDeadNode(N);
2836 }
2837 
2838 void ARMDAGToDAGISel::SelectMVE_VxDUP(SDNode *N, const uint16_t *Opcodes,
2839                                       bool Wrapping, bool Predicated) {
2840   EVT VT = N->getValueType(0);
2841   SDLoc Loc(N);
2842 
2843   uint16_t Opcode;
2844   switch (VT.getScalarSizeInBits()) {
2845   case 8:
2846     Opcode = Opcodes[0];
2847     break;
2848   case 16:
2849     Opcode = Opcodes[1];
2850     break;
2851   case 32:
2852     Opcode = Opcodes[2];
2853     break;
2854   default:
2855     llvm_unreachable("bad vector element size in SelectMVE_VxDUP");
2856   }
2857 
2858   SmallVector<SDValue, 8> Ops;
2859   unsigned OpIdx = 1;
2860 
2861   SDValue Inactive;
2862   if (Predicated)
2863     Inactive = N->getOperand(OpIdx++);
2864 
2865   Ops.push_back(N->getOperand(OpIdx++));     // base
2866   if (Wrapping)
2867     Ops.push_back(N->getOperand(OpIdx++));   // limit
2868 
2869   SDValue ImmOp = N->getOperand(OpIdx++);    // step
2870   int ImmValue = ImmOp->getAsZExtVal();
2871   Ops.push_back(getI32Imm(ImmValue, Loc));
2872 
2873   if (Predicated)
2874     AddMVEPredicateToOps(Ops, Loc, N->getOperand(OpIdx), Inactive);
2875   else
2876     AddEmptyMVEPredicateToOps(Ops, Loc, N->getValueType(0));
2877 
2878   CurDAG->SelectNodeTo(N, Opcode, N->getVTList(), ArrayRef(Ops));
2879 }
2880 
2881 void ARMDAGToDAGISel::SelectCDE_CXxD(SDNode *N, uint16_t Opcode,
2882                                      size_t NumExtraOps, bool HasAccum) {
2883   bool IsBigEndian = CurDAG->getDataLayout().isBigEndian();
2884   SDLoc Loc(N);
2885   SmallVector<SDValue, 8> Ops;
2886 
2887   unsigned OpIdx = 1;
2888 
2889   // Convert and append the immediate operand designating the coprocessor.
2890   SDValue ImmCorpoc = N->getOperand(OpIdx++);
2891   uint32_t ImmCoprocVal = ImmCorpoc->getAsZExtVal();
2892   Ops.push_back(getI32Imm(ImmCoprocVal, Loc));
2893 
2894   // For accumulating variants copy the low and high order parts of the
2895   // accumulator into a register pair and add it to the operand vector.
2896   if (HasAccum) {
2897     SDValue AccLo = N->getOperand(OpIdx++);
2898     SDValue AccHi = N->getOperand(OpIdx++);
2899     if (IsBigEndian)
2900       std::swap(AccLo, AccHi);
2901     Ops.push_back(SDValue(createGPRPairNode(MVT::Untyped, AccLo, AccHi), 0));
2902   }
2903 
2904   // Copy extra operands as-is.
2905   for (size_t I = 0; I < NumExtraOps; I++)
2906     Ops.push_back(N->getOperand(OpIdx++));
2907 
2908   // Convert and append the immediate operand
2909   SDValue Imm = N->getOperand(OpIdx);
2910   uint32_t ImmVal = Imm->getAsZExtVal();
2911   Ops.push_back(getI32Imm(ImmVal, Loc));
2912 
2913   // Accumulating variants are IT-predicable, add predicate operands.
2914   if (HasAccum) {
2915     SDValue Pred = getAL(CurDAG, Loc);
2916     SDValue PredReg = CurDAG->getRegister(0, MVT::i32);
2917     Ops.push_back(Pred);
2918     Ops.push_back(PredReg);
2919   }
2920 
2921   // Create the CDE intruction
2922   SDNode *InstrNode = CurDAG->getMachineNode(Opcode, Loc, MVT::Untyped, Ops);
2923   SDValue ResultPair = SDValue(InstrNode, 0);
2924 
2925   // The original intrinsic had two outputs, and the output of the dual-register
2926   // CDE instruction is a register pair. We need to extract the two subregisters
2927   // and replace all uses of the original outputs with the extracted
2928   // subregisters.
2929   uint16_t SubRegs[2] = {ARM::gsub_0, ARM::gsub_1};
2930   if (IsBigEndian)
2931     std::swap(SubRegs[0], SubRegs[1]);
2932 
2933   for (size_t ResIdx = 0; ResIdx < 2; ResIdx++) {
2934     if (SDValue(N, ResIdx).use_empty())
2935       continue;
2936     SDValue SubReg = CurDAG->getTargetExtractSubreg(SubRegs[ResIdx], Loc,
2937                                                     MVT::i32, ResultPair);
2938     ReplaceUses(SDValue(N, ResIdx), SubReg);
2939   }
2940 
2941   CurDAG->RemoveDeadNode(N);
2942 }
2943 
2944 void ARMDAGToDAGISel::SelectVLDDup(SDNode *N, bool IsIntrinsic,
2945                                    bool isUpdating, unsigned NumVecs,
2946                                    const uint16_t *DOpcodes,
2947                                    const uint16_t *QOpcodes0,
2948                                    const uint16_t *QOpcodes1) {
2949   assert(Subtarget->hasNEON());
2950   assert(NumVecs >= 1 && NumVecs <= 4 && "VLDDup NumVecs out-of-range");
2951   SDLoc dl(N);
2952 
2953   SDValue MemAddr, Align;
2954   unsigned AddrOpIdx = IsIntrinsic ? 2 : 1;
2955   if (!SelectAddrMode6(N, N->getOperand(AddrOpIdx), MemAddr, Align))
2956     return;
2957 
2958   SDValue Chain = N->getOperand(0);
2959   EVT VT = N->getValueType(0);
2960   bool is64BitVector = VT.is64BitVector();
2961 
2962   unsigned Alignment = 0;
2963   if (NumVecs != 3) {
2964     Alignment = Align->getAsZExtVal();
2965     unsigned NumBytes = NumVecs * VT.getScalarSizeInBits() / 8;
2966     if (Alignment > NumBytes)
2967       Alignment = NumBytes;
2968     if (Alignment < 8 && Alignment < NumBytes)
2969       Alignment = 0;
2970     // Alignment must be a power of two; make sure of that.
2971     Alignment = (Alignment & -Alignment);
2972     if (Alignment == 1)
2973       Alignment = 0;
2974   }
2975   Align = CurDAG->getTargetConstant(Alignment, dl, MVT::i32);
2976 
2977   unsigned OpcodeIndex;
2978   switch (VT.getSimpleVT().SimpleTy) {
2979   default: llvm_unreachable("unhandled vld-dup type");
2980   case MVT::v8i8:
2981   case MVT::v16i8: OpcodeIndex = 0; break;
2982   case MVT::v4i16:
2983   case MVT::v8i16:
2984   case MVT::v4f16:
2985   case MVT::v8f16:
2986   case MVT::v4bf16:
2987   case MVT::v8bf16:
2988                   OpcodeIndex = 1; break;
2989   case MVT::v2f32:
2990   case MVT::v2i32:
2991   case MVT::v4f32:
2992   case MVT::v4i32: OpcodeIndex = 2; break;
2993   case MVT::v1f64:
2994   case MVT::v1i64: OpcodeIndex = 3; break;
2995   }
2996 
2997   unsigned ResTyElts = (NumVecs == 3) ? 4 : NumVecs;
2998   if (!is64BitVector)
2999     ResTyElts *= 2;
3000   EVT ResTy = EVT::getVectorVT(*CurDAG->getContext(), MVT::i64, ResTyElts);
3001 
3002   std::vector<EVT> ResTys;
3003   ResTys.push_back(ResTy);
3004   if (isUpdating)
3005     ResTys.push_back(MVT::i32);
3006   ResTys.push_back(MVT::Other);
3007 
3008   SDValue Pred = getAL(CurDAG, dl);
3009   SDValue Reg0 = CurDAG->getRegister(0, MVT::i32);
3010 
3011   SmallVector<SDValue, 6> Ops;
3012   Ops.push_back(MemAddr);
3013   Ops.push_back(Align);
3014   unsigned Opc = is64BitVector    ? DOpcodes[OpcodeIndex]
3015                  : (NumVecs == 1) ? QOpcodes0[OpcodeIndex]
3016                                   : QOpcodes1[OpcodeIndex];
3017   if (isUpdating) {
3018     SDValue Inc = N->getOperand(2);
3019     bool IsImmUpdate =
3020         isPerfectIncrement(Inc, VT.getVectorElementType(), NumVecs);
3021     if (IsImmUpdate) {
3022       if (!isVLDfixed(Opc))
3023         Ops.push_back(Reg0);
3024     } else {
3025       if (isVLDfixed(Opc))
3026         Opc = getVLDSTRegisterUpdateOpcode(Opc);
3027       Ops.push_back(Inc);
3028     }
3029   }
3030   if (is64BitVector || NumVecs == 1) {
3031     // Double registers and VLD1 quad registers are directly supported.
3032   } else {
3033     SDValue ImplDef = SDValue(
3034         CurDAG->getMachineNode(TargetOpcode::IMPLICIT_DEF, dl, ResTy), 0);
3035     const SDValue OpsA[] = {MemAddr, Align, ImplDef, Pred, Reg0, Chain};
3036     SDNode *VLdA = CurDAG->getMachineNode(QOpcodes0[OpcodeIndex], dl, ResTy,
3037                                           MVT::Other, OpsA);
3038     Ops.push_back(SDValue(VLdA, 0));
3039     Chain = SDValue(VLdA, 1);
3040   }
3041 
3042   Ops.push_back(Pred);
3043   Ops.push_back(Reg0);
3044   Ops.push_back(Chain);
3045 
3046   SDNode *VLdDup = CurDAG->getMachineNode(Opc, dl, ResTys, Ops);
3047 
3048   // Transfer memoperands.
3049   MachineMemOperand *MemOp = cast<MemIntrinsicSDNode>(N)->getMemOperand();
3050   CurDAG->setNodeMemRefs(cast<MachineSDNode>(VLdDup), {MemOp});
3051 
3052   // Extract the subregisters.
3053   if (NumVecs == 1) {
3054     ReplaceUses(SDValue(N, 0), SDValue(VLdDup, 0));
3055   } else {
3056     SDValue SuperReg = SDValue(VLdDup, 0);
3057     static_assert(ARM::dsub_7 == ARM::dsub_0 + 7, "Unexpected subreg numbering");
3058     unsigned SubIdx = is64BitVector ? ARM::dsub_0 : ARM::qsub_0;
3059     for (unsigned Vec = 0; Vec != NumVecs; ++Vec) {
3060       ReplaceUses(SDValue(N, Vec),
3061                   CurDAG->getTargetExtractSubreg(SubIdx+Vec, dl, VT, SuperReg));
3062     }
3063   }
3064   ReplaceUses(SDValue(N, NumVecs), SDValue(VLdDup, 1));
3065   if (isUpdating)
3066     ReplaceUses(SDValue(N, NumVecs + 1), SDValue(VLdDup, 2));
3067   CurDAG->RemoveDeadNode(N);
3068 }
3069 
3070 bool ARMDAGToDAGISel::tryInsertVectorElt(SDNode *N) {
3071   if (!Subtarget->hasMVEIntegerOps())
3072     return false;
3073 
3074   SDLoc dl(N);
3075 
3076   // We are trying to use VMOV/VMOVX/VINS to more efficiently lower insert and
3077   // extracts of v8f16 and v8i16 vectors. Check that we have two adjacent
3078   // inserts of the correct type:
3079   SDValue Ins1 = SDValue(N, 0);
3080   SDValue Ins2 = N->getOperand(0);
3081   EVT VT = Ins1.getValueType();
3082   if (Ins2.getOpcode() != ISD::INSERT_VECTOR_ELT || !Ins2.hasOneUse() ||
3083       !isa<ConstantSDNode>(Ins1.getOperand(2)) ||
3084       !isa<ConstantSDNode>(Ins2.getOperand(2)) ||
3085       (VT != MVT::v8f16 && VT != MVT::v8i16) || (Ins2.getValueType() != VT))
3086     return false;
3087 
3088   unsigned Lane1 = Ins1.getConstantOperandVal(2);
3089   unsigned Lane2 = Ins2.getConstantOperandVal(2);
3090   if (Lane2 % 2 != 0 || Lane1 != Lane2 + 1)
3091     return false;
3092 
3093   // If the inserted values will be able to use T/B already, leave it to the
3094   // existing tablegen patterns. For example VCVTT/VCVTB.
3095   SDValue Val1 = Ins1.getOperand(1);
3096   SDValue Val2 = Ins2.getOperand(1);
3097   if (Val1.getOpcode() == ISD::FP_ROUND || Val2.getOpcode() == ISD::FP_ROUND)
3098     return false;
3099 
3100   // Check if the inserted values are both extracts.
3101   if ((Val1.getOpcode() == ISD::EXTRACT_VECTOR_ELT ||
3102        Val1.getOpcode() == ARMISD::VGETLANEu) &&
3103       (Val2.getOpcode() == ISD::EXTRACT_VECTOR_ELT ||
3104        Val2.getOpcode() == ARMISD::VGETLANEu) &&
3105       isa<ConstantSDNode>(Val1.getOperand(1)) &&
3106       isa<ConstantSDNode>(Val2.getOperand(1)) &&
3107       (Val1.getOperand(0).getValueType() == MVT::v8f16 ||
3108        Val1.getOperand(0).getValueType() == MVT::v8i16) &&
3109       (Val2.getOperand(0).getValueType() == MVT::v8f16 ||
3110        Val2.getOperand(0).getValueType() == MVT::v8i16)) {
3111     unsigned ExtractLane1 = Val1.getConstantOperandVal(1);
3112     unsigned ExtractLane2 = Val2.getConstantOperandVal(1);
3113 
3114     // If the two extracted lanes are from the same place and adjacent, this
3115     // simplifies into a f32 lane move.
3116     if (Val1.getOperand(0) == Val2.getOperand(0) && ExtractLane2 % 2 == 0 &&
3117         ExtractLane1 == ExtractLane2 + 1) {
3118       SDValue NewExt = CurDAG->getTargetExtractSubreg(
3119           ARM::ssub_0 + ExtractLane2 / 2, dl, MVT::f32, Val1.getOperand(0));
3120       SDValue NewIns = CurDAG->getTargetInsertSubreg(
3121           ARM::ssub_0 + Lane2 / 2, dl, VT, Ins2.getOperand(0),
3122           NewExt);
3123       ReplaceUses(Ins1, NewIns);
3124       return true;
3125     }
3126 
3127     // Else v8i16 pattern of an extract and an insert, with a optional vmovx for
3128     // extracting odd lanes.
3129     if (VT == MVT::v8i16 && Subtarget->hasFullFP16()) {
3130       SDValue Inp1 = CurDAG->getTargetExtractSubreg(
3131           ARM::ssub_0 + ExtractLane1 / 2, dl, MVT::f32, Val1.getOperand(0));
3132       SDValue Inp2 = CurDAG->getTargetExtractSubreg(
3133           ARM::ssub_0 + ExtractLane2 / 2, dl, MVT::f32, Val2.getOperand(0));
3134       if (ExtractLane1 % 2 != 0)
3135         Inp1 = SDValue(CurDAG->getMachineNode(ARM::VMOVH, dl, MVT::f32, Inp1), 0);
3136       if (ExtractLane2 % 2 != 0)
3137         Inp2 = SDValue(CurDAG->getMachineNode(ARM::VMOVH, dl, MVT::f32, Inp2), 0);
3138       SDNode *VINS = CurDAG->getMachineNode(ARM::VINSH, dl, MVT::f32, Inp2, Inp1);
3139       SDValue NewIns =
3140           CurDAG->getTargetInsertSubreg(ARM::ssub_0 + Lane2 / 2, dl, MVT::v4f32,
3141                                         Ins2.getOperand(0), SDValue(VINS, 0));
3142       ReplaceUses(Ins1, NewIns);
3143       return true;
3144     }
3145   }
3146 
3147   // The inserted values are not extracted - if they are f16 then insert them
3148   // directly using a VINS.
3149   if (VT == MVT::v8f16 && Subtarget->hasFullFP16()) {
3150     SDNode *VINS = CurDAG->getMachineNode(ARM::VINSH, dl, MVT::f32, Val2, Val1);
3151     SDValue NewIns =
3152         CurDAG->getTargetInsertSubreg(ARM::ssub_0 + Lane2 / 2, dl, MVT::v4f32,
3153                                       Ins2.getOperand(0), SDValue(VINS, 0));
3154     ReplaceUses(Ins1, NewIns);
3155     return true;
3156   }
3157 
3158   return false;
3159 }
3160 
3161 bool ARMDAGToDAGISel::transformFixedFloatingPointConversion(SDNode *N,
3162                                                             SDNode *FMul,
3163                                                             bool IsUnsigned,
3164                                                             bool FixedToFloat) {
3165   auto Type = N->getValueType(0);
3166   unsigned ScalarBits = Type.getScalarSizeInBits();
3167   if (ScalarBits > 32)
3168     return false;
3169 
3170   SDNodeFlags FMulFlags = FMul->getFlags();
3171   // The fixed-point vcvt and vcvt+vmul are not always equivalent if inf is
3172   // allowed in 16 bit unsigned floats
3173   if (ScalarBits == 16 && !FMulFlags.hasNoInfs() && IsUnsigned)
3174     return false;
3175 
3176   SDValue ImmNode = FMul->getOperand(1);
3177   SDValue VecVal = FMul->getOperand(0);
3178   if (VecVal->getOpcode() == ISD::UINT_TO_FP ||
3179       VecVal->getOpcode() == ISD::SINT_TO_FP)
3180     VecVal = VecVal->getOperand(0);
3181 
3182   if (VecVal.getValueType().getScalarSizeInBits() != ScalarBits)
3183     return false;
3184 
3185   if (ImmNode.getOpcode() == ISD::BITCAST) {
3186     if (ImmNode.getValueType().getScalarSizeInBits() != ScalarBits)
3187       return false;
3188     ImmNode = ImmNode.getOperand(0);
3189   }
3190 
3191   if (ImmNode.getValueType().getScalarSizeInBits() != ScalarBits)
3192     return false;
3193 
3194   APFloat ImmAPF(0.0f);
3195   switch (ImmNode.getOpcode()) {
3196   case ARMISD::VMOVIMM:
3197   case ARMISD::VDUP: {
3198     if (!isa<ConstantSDNode>(ImmNode.getOperand(0)))
3199       return false;
3200     unsigned Imm = ImmNode.getConstantOperandVal(0);
3201     if (ImmNode.getOpcode() == ARMISD::VMOVIMM)
3202       Imm = ARM_AM::decodeVMOVModImm(Imm, ScalarBits);
3203     ImmAPF =
3204         APFloat(ScalarBits == 32 ? APFloat::IEEEsingle() : APFloat::IEEEhalf(),
3205                 APInt(ScalarBits, Imm));
3206     break;
3207   }
3208   case ARMISD::VMOVFPIMM: {
3209     ImmAPF = APFloat(ARM_AM::getFPImmFloat(ImmNode.getConstantOperandVal(0)));
3210     break;
3211   }
3212   default:
3213     return false;
3214   }
3215 
3216   // Where n is the number of fractional bits, multiplying by 2^n will convert
3217   // from float to fixed and multiplying by 2^-n will convert from fixed to
3218   // float. Taking log2 of the factor (after taking the inverse in the case of
3219   // float to fixed) will give n.
3220   APFloat ToConvert = ImmAPF;
3221   if (FixedToFloat) {
3222     if (!ImmAPF.getExactInverse(&ToConvert))
3223       return false;
3224   }
3225   APSInt Converted(64, false);
3226   bool IsExact;
3227   ToConvert.convertToInteger(Converted, llvm::RoundingMode::NearestTiesToEven,
3228                              &IsExact);
3229   if (!IsExact || !Converted.isPowerOf2())
3230     return false;
3231 
3232   unsigned FracBits = Converted.logBase2();
3233   if (FracBits > ScalarBits)
3234     return false;
3235 
3236   SmallVector<SDValue, 3> Ops{
3237       VecVal, CurDAG->getConstant(FracBits, SDLoc(N), MVT::i32)};
3238   AddEmptyMVEPredicateToOps(Ops, SDLoc(N), Type);
3239 
3240   unsigned int Opcode;
3241   switch (ScalarBits) {
3242   case 16:
3243     if (FixedToFloat)
3244       Opcode = IsUnsigned ? ARM::MVE_VCVTf16u16_fix : ARM::MVE_VCVTf16s16_fix;
3245     else
3246       Opcode = IsUnsigned ? ARM::MVE_VCVTu16f16_fix : ARM::MVE_VCVTs16f16_fix;
3247     break;
3248   case 32:
3249     if (FixedToFloat)
3250       Opcode = IsUnsigned ? ARM::MVE_VCVTf32u32_fix : ARM::MVE_VCVTf32s32_fix;
3251     else
3252       Opcode = IsUnsigned ? ARM::MVE_VCVTu32f32_fix : ARM::MVE_VCVTs32f32_fix;
3253     break;
3254   default:
3255     llvm_unreachable("unexpected number of scalar bits");
3256     break;
3257   }
3258 
3259   ReplaceNode(N, CurDAG->getMachineNode(Opcode, SDLoc(N), Type, Ops));
3260   return true;
3261 }
3262 
3263 bool ARMDAGToDAGISel::tryFP_TO_INT(SDNode *N, SDLoc dl) {
3264   // Transform a floating-point to fixed-point conversion to a VCVT
3265   if (!Subtarget->hasMVEFloatOps())
3266     return false;
3267   EVT Type = N->getValueType(0);
3268   if (!Type.isVector())
3269     return false;
3270   unsigned int ScalarBits = Type.getScalarSizeInBits();
3271 
3272   bool IsUnsigned = N->getOpcode() == ISD::FP_TO_UINT ||
3273                     N->getOpcode() == ISD::FP_TO_UINT_SAT;
3274   SDNode *Node = N->getOperand(0).getNode();
3275 
3276   // floating-point to fixed-point with one fractional bit gets turned into an
3277   // FP_TO_[U|S]INT(FADD (x, x)) rather than an FP_TO_[U|S]INT(FMUL (x, y))
3278   if (Node->getOpcode() == ISD::FADD) {
3279     if (Node->getOperand(0) != Node->getOperand(1))
3280       return false;
3281     SDNodeFlags Flags = Node->getFlags();
3282     // The fixed-point vcvt and vcvt+vmul are not always equivalent if inf is
3283     // allowed in 16 bit unsigned floats
3284     if (ScalarBits == 16 && !Flags.hasNoInfs() && IsUnsigned)
3285       return false;
3286 
3287     unsigned Opcode;
3288     switch (ScalarBits) {
3289     case 16:
3290       Opcode = IsUnsigned ? ARM::MVE_VCVTu16f16_fix : ARM::MVE_VCVTs16f16_fix;
3291       break;
3292     case 32:
3293       Opcode = IsUnsigned ? ARM::MVE_VCVTu32f32_fix : ARM::MVE_VCVTs32f32_fix;
3294       break;
3295     }
3296     SmallVector<SDValue, 3> Ops{Node->getOperand(0),
3297                                 CurDAG->getConstant(1, dl, MVT::i32)};
3298     AddEmptyMVEPredicateToOps(Ops, dl, Type);
3299 
3300     ReplaceNode(N, CurDAG->getMachineNode(Opcode, dl, Type, Ops));
3301     return true;
3302   }
3303 
3304   if (Node->getOpcode() != ISD::FMUL)
3305     return false;
3306 
3307   return transformFixedFloatingPointConversion(N, Node, IsUnsigned, false);
3308 }
3309 
3310 bool ARMDAGToDAGISel::tryFMULFixed(SDNode *N, SDLoc dl) {
3311   // Transform a fixed-point to floating-point conversion to a VCVT
3312   if (!Subtarget->hasMVEFloatOps())
3313     return false;
3314   auto Type = N->getValueType(0);
3315   if (!Type.isVector())
3316     return false;
3317 
3318   auto LHS = N->getOperand(0);
3319   if (LHS.getOpcode() != ISD::SINT_TO_FP && LHS.getOpcode() != ISD::UINT_TO_FP)
3320     return false;
3321 
3322   return transformFixedFloatingPointConversion(
3323       N, N, LHS.getOpcode() == ISD::UINT_TO_FP, true);
3324 }
3325 
3326 bool ARMDAGToDAGISel::tryV6T2BitfieldExtractOp(SDNode *N, bool isSigned) {
3327   if (!Subtarget->hasV6T2Ops())
3328     return false;
3329 
3330   unsigned Opc = isSigned
3331     ? (Subtarget->isThumb() ? ARM::t2SBFX : ARM::SBFX)
3332     : (Subtarget->isThumb() ? ARM::t2UBFX : ARM::UBFX);
3333   SDLoc dl(N);
3334 
3335   // For unsigned extracts, check for a shift right and mask
3336   unsigned And_imm = 0;
3337   if (N->getOpcode() == ISD::AND) {
3338     if (isOpcWithIntImmediate(N, ISD::AND, And_imm)) {
3339 
3340       // The immediate is a mask of the low bits iff imm & (imm+1) == 0
3341       if (And_imm & (And_imm + 1))
3342         return false;
3343 
3344       unsigned Srl_imm = 0;
3345       if (isOpcWithIntImmediate(N->getOperand(0).getNode(), ISD::SRL,
3346                                 Srl_imm)) {
3347         assert(Srl_imm > 0 && Srl_imm < 32 && "bad amount in shift node!");
3348 
3349         // Mask off the unnecessary bits of the AND immediate; normally
3350         // DAGCombine will do this, but that might not happen if
3351         // targetShrinkDemandedConstant chooses a different immediate.
3352         And_imm &= -1U >> Srl_imm;
3353 
3354         // Note: The width operand is encoded as width-1.
3355         unsigned Width = llvm::countr_one(And_imm) - 1;
3356         unsigned LSB = Srl_imm;
3357 
3358         SDValue Reg0 = CurDAG->getRegister(0, MVT::i32);
3359 
3360         if ((LSB + Width + 1) == N->getValueType(0).getSizeInBits()) {
3361           // It's cheaper to use a right shift to extract the top bits.
3362           if (Subtarget->isThumb()) {
3363             Opc = isSigned ? ARM::t2ASRri : ARM::t2LSRri;
3364             SDValue Ops[] = { N->getOperand(0).getOperand(0),
3365                               CurDAG->getTargetConstant(LSB, dl, MVT::i32),
3366                               getAL(CurDAG, dl), Reg0, Reg0 };
3367             CurDAG->SelectNodeTo(N, Opc, MVT::i32, Ops);
3368             return true;
3369           }
3370 
3371           // ARM models shift instructions as MOVsi with shifter operand.
3372           ARM_AM::ShiftOpc ShOpcVal = ARM_AM::getShiftOpcForNode(ISD::SRL);
3373           SDValue ShOpc =
3374             CurDAG->getTargetConstant(ARM_AM::getSORegOpc(ShOpcVal, LSB), dl,
3375                                       MVT::i32);
3376           SDValue Ops[] = { N->getOperand(0).getOperand(0), ShOpc,
3377                             getAL(CurDAG, dl), Reg0, Reg0 };
3378           CurDAG->SelectNodeTo(N, ARM::MOVsi, MVT::i32, Ops);
3379           return true;
3380         }
3381 
3382         assert(LSB + Width + 1 <= 32 && "Shouldn't create an invalid ubfx");
3383         SDValue Ops[] = { N->getOperand(0).getOperand(0),
3384                           CurDAG->getTargetConstant(LSB, dl, MVT::i32),
3385                           CurDAG->getTargetConstant(Width, dl, MVT::i32),
3386                           getAL(CurDAG, dl), Reg0 };
3387         CurDAG->SelectNodeTo(N, Opc, MVT::i32, Ops);
3388         return true;
3389       }
3390     }
3391     return false;
3392   }
3393 
3394   // Otherwise, we're looking for a shift of a shift
3395   unsigned Shl_imm = 0;
3396   if (isOpcWithIntImmediate(N->getOperand(0).getNode(), ISD::SHL, Shl_imm)) {
3397     assert(Shl_imm > 0 && Shl_imm < 32 && "bad amount in shift node!");
3398     unsigned Srl_imm = 0;
3399     if (isInt32Immediate(N->getOperand(1), Srl_imm)) {
3400       assert(Srl_imm > 0 && Srl_imm < 32 && "bad amount in shift node!");
3401       // Note: The width operand is encoded as width-1.
3402       unsigned Width = 32 - Srl_imm - 1;
3403       int LSB = Srl_imm - Shl_imm;
3404       if (LSB < 0)
3405         return false;
3406       SDValue Reg0 = CurDAG->getRegister(0, MVT::i32);
3407       assert(LSB + Width + 1 <= 32 && "Shouldn't create an invalid ubfx");
3408       SDValue Ops[] = { N->getOperand(0).getOperand(0),
3409                         CurDAG->getTargetConstant(LSB, dl, MVT::i32),
3410                         CurDAG->getTargetConstant(Width, dl, MVT::i32),
3411                         getAL(CurDAG, dl), Reg0 };
3412       CurDAG->SelectNodeTo(N, Opc, MVT::i32, Ops);
3413       return true;
3414     }
3415   }
3416 
3417   // Or we are looking for a shift of an and, with a mask operand
3418   if (isOpcWithIntImmediate(N->getOperand(0).getNode(), ISD::AND, And_imm) &&
3419       isShiftedMask_32(And_imm)) {
3420     unsigned Srl_imm = 0;
3421     unsigned LSB = llvm::countr_zero(And_imm);
3422     // Shift must be the same as the ands lsb
3423     if (isInt32Immediate(N->getOperand(1), Srl_imm) && Srl_imm == LSB) {
3424       assert(Srl_imm > 0 && Srl_imm < 32 && "bad amount in shift node!");
3425       unsigned MSB = llvm::Log2_32(And_imm);
3426       // Note: The width operand is encoded as width-1.
3427       unsigned Width = MSB - LSB;
3428       SDValue Reg0 = CurDAG->getRegister(0, MVT::i32);
3429       assert(Srl_imm + Width + 1 <= 32 && "Shouldn't create an invalid ubfx");
3430       SDValue Ops[] = { N->getOperand(0).getOperand(0),
3431                         CurDAG->getTargetConstant(Srl_imm, dl, MVT::i32),
3432                         CurDAG->getTargetConstant(Width, dl, MVT::i32),
3433                         getAL(CurDAG, dl), Reg0 };
3434       CurDAG->SelectNodeTo(N, Opc, MVT::i32, Ops);
3435       return true;
3436     }
3437   }
3438 
3439   if (N->getOpcode() == ISD::SIGN_EXTEND_INREG) {
3440     unsigned Width = cast<VTSDNode>(N->getOperand(1))->getVT().getSizeInBits();
3441     unsigned LSB = 0;
3442     if (!isOpcWithIntImmediate(N->getOperand(0).getNode(), ISD::SRL, LSB) &&
3443         !isOpcWithIntImmediate(N->getOperand(0).getNode(), ISD::SRA, LSB))
3444       return false;
3445 
3446     if (LSB + Width > 32)
3447       return false;
3448 
3449     SDValue Reg0 = CurDAG->getRegister(0, MVT::i32);
3450     assert(LSB + Width <= 32 && "Shouldn't create an invalid ubfx");
3451     SDValue Ops[] = { N->getOperand(0).getOperand(0),
3452                       CurDAG->getTargetConstant(LSB, dl, MVT::i32),
3453                       CurDAG->getTargetConstant(Width - 1, dl, MVT::i32),
3454                       getAL(CurDAG, dl), Reg0 };
3455     CurDAG->SelectNodeTo(N, Opc, MVT::i32, Ops);
3456     return true;
3457   }
3458 
3459   return false;
3460 }
3461 
3462 /// Target-specific DAG combining for ISD::SUB.
3463 /// Target-independent combining lowers SELECT_CC nodes of the form
3464 /// select_cc setg[ge] X,  0,  X, -X
3465 /// select_cc setgt    X, -1,  X, -X
3466 /// select_cc setl[te] X,  0, -X,  X
3467 /// select_cc setlt    X,  1, -X,  X
3468 /// which represent Integer ABS into:
3469 /// Y = sra (X, size(X)-1); sub (xor (X, Y), Y)
3470 /// ARM instruction selection detects the latter and matches it to
3471 /// ARM::ABS or ARM::t2ABS machine node.
3472 bool ARMDAGToDAGISel::tryABSOp(SDNode *N){
3473   SDValue SUBSrc0 = N->getOperand(0);
3474   SDValue SUBSrc1 = N->getOperand(1);
3475   EVT VT = N->getValueType(0);
3476 
3477   if (Subtarget->isThumb1Only())
3478     return false;
3479 
3480   if (SUBSrc0.getOpcode() != ISD::XOR || SUBSrc1.getOpcode() != ISD::SRA)
3481     return false;
3482 
3483   SDValue XORSrc0 = SUBSrc0.getOperand(0);
3484   SDValue XORSrc1 = SUBSrc0.getOperand(1);
3485   SDValue SRASrc0 = SUBSrc1.getOperand(0);
3486   SDValue SRASrc1 = SUBSrc1.getOperand(1);
3487   ConstantSDNode *SRAConstant =  dyn_cast<ConstantSDNode>(SRASrc1);
3488   EVT XType = SRASrc0.getValueType();
3489   unsigned Size = XType.getSizeInBits() - 1;
3490 
3491   if (XORSrc1 == SUBSrc1 && XORSrc0 == SRASrc0 && XType.isInteger() &&
3492       SRAConstant != nullptr && Size == SRAConstant->getZExtValue()) {
3493     unsigned Opcode = Subtarget->isThumb2() ? ARM::t2ABS : ARM::ABS;
3494     CurDAG->SelectNodeTo(N, Opcode, VT, XORSrc0);
3495     return true;
3496   }
3497 
3498   return false;
3499 }
3500 
3501 /// We've got special pseudo-instructions for these
3502 void ARMDAGToDAGISel::SelectCMP_SWAP(SDNode *N) {
3503   unsigned Opcode;
3504   EVT MemTy = cast<MemSDNode>(N)->getMemoryVT();
3505   if (MemTy == MVT::i8)
3506     Opcode = Subtarget->isThumb() ? ARM::tCMP_SWAP_8 : ARM::CMP_SWAP_8;
3507   else if (MemTy == MVT::i16)
3508     Opcode = Subtarget->isThumb() ? ARM::tCMP_SWAP_16 : ARM::CMP_SWAP_16;
3509   else if (MemTy == MVT::i32)
3510     Opcode = Subtarget->isThumb() ? ARM::tCMP_SWAP_32 : ARM::CMP_SWAP_32;
3511   else
3512     llvm_unreachable("Unknown AtomicCmpSwap type");
3513 
3514   SDValue Ops[] = {N->getOperand(1), N->getOperand(2), N->getOperand(3),
3515                    N->getOperand(0)};
3516   SDNode *CmpSwap = CurDAG->getMachineNode(
3517       Opcode, SDLoc(N),
3518       CurDAG->getVTList(MVT::i32, MVT::i32, MVT::Other), Ops);
3519 
3520   MachineMemOperand *MemOp = cast<MemSDNode>(N)->getMemOperand();
3521   CurDAG->setNodeMemRefs(cast<MachineSDNode>(CmpSwap), {MemOp});
3522 
3523   ReplaceUses(SDValue(N, 0), SDValue(CmpSwap, 0));
3524   ReplaceUses(SDValue(N, 1), SDValue(CmpSwap, 2));
3525   CurDAG->RemoveDeadNode(N);
3526 }
3527 
3528 static std::optional<std::pair<unsigned, unsigned>>
3529 getContiguousRangeOfSetBits(const APInt &A) {
3530   unsigned FirstOne = A.getBitWidth() - A.countl_zero() - 1;
3531   unsigned LastOne = A.countr_zero();
3532   if (A.popcount() != (FirstOne - LastOne + 1))
3533     return std::nullopt;
3534   return std::make_pair(FirstOne, LastOne);
3535 }
3536 
3537 void ARMDAGToDAGISel::SelectCMPZ(SDNode *N, bool &SwitchEQNEToPLMI) {
3538   assert(N->getOpcode() == ARMISD::CMPZ);
3539   SwitchEQNEToPLMI = false;
3540 
3541   if (!Subtarget->isThumb())
3542     // FIXME: Work out whether it is profitable to do this in A32 mode - LSL and
3543     // LSR don't exist as standalone instructions - they need the barrel shifter.
3544     return;
3545 
3546   // select (cmpz (and X, C), #0) -> (LSLS X) or (LSRS X) or (LSRS (LSLS X))
3547   SDValue And = N->getOperand(0);
3548   if (!And->hasOneUse())
3549     return;
3550 
3551   SDValue Zero = N->getOperand(1);
3552   if (!isNullConstant(Zero) || And->getOpcode() != ISD::AND)
3553     return;
3554   SDValue X = And.getOperand(0);
3555   auto C = dyn_cast<ConstantSDNode>(And.getOperand(1));
3556 
3557   if (!C)
3558     return;
3559   auto Range = getContiguousRangeOfSetBits(C->getAPIntValue());
3560   if (!Range)
3561     return;
3562 
3563   // There are several ways to lower this:
3564   SDNode *NewN;
3565   SDLoc dl(N);
3566 
3567   auto EmitShift = [&](unsigned Opc, SDValue Src, unsigned Imm) -> SDNode* {
3568     if (Subtarget->isThumb2()) {
3569       Opc = (Opc == ARM::tLSLri) ? ARM::t2LSLri : ARM::t2LSRri;
3570       SDValue Ops[] = { Src, CurDAG->getTargetConstant(Imm, dl, MVT::i32),
3571                         getAL(CurDAG, dl), CurDAG->getRegister(0, MVT::i32),
3572                         CurDAG->getRegister(0, MVT::i32) };
3573       return CurDAG->getMachineNode(Opc, dl, MVT::i32, Ops);
3574     } else {
3575       SDValue Ops[] = {CurDAG->getRegister(ARM::CPSR, MVT::i32), Src,
3576                        CurDAG->getTargetConstant(Imm, dl, MVT::i32),
3577                        getAL(CurDAG, dl), CurDAG->getRegister(0, MVT::i32)};
3578       return CurDAG->getMachineNode(Opc, dl, MVT::i32, Ops);
3579     }
3580   };
3581 
3582   if (Range->second == 0) {
3583     //  1. Mask includes the LSB -> Simply shift the top N bits off
3584     NewN = EmitShift(ARM::tLSLri, X, 31 - Range->first);
3585     ReplaceNode(And.getNode(), NewN);
3586   } else if (Range->first == 31) {
3587     //  2. Mask includes the MSB -> Simply shift the bottom N bits off
3588     NewN = EmitShift(ARM::tLSRri, X, Range->second);
3589     ReplaceNode(And.getNode(), NewN);
3590   } else if (Range->first == Range->second) {
3591     //  3. Only one bit is set. We can shift this into the sign bit and use a
3592     //     PL/MI comparison. This is not safe if CMPZ has multiple uses because
3593     //     only one of them (the one currently being selected) will be switched
3594     //     to use the new condition code.
3595     if (!N->hasOneUse())
3596       return;
3597     NewN = EmitShift(ARM::tLSLri, X, 31 - Range->first);
3598     ReplaceNode(And.getNode(), NewN);
3599 
3600     SwitchEQNEToPLMI = true;
3601   } else if (!Subtarget->hasV6T2Ops()) {
3602     //  4. Do a double shift to clear bottom and top bits, but only in
3603     //     thumb-1 mode as in thumb-2 we can use UBFX.
3604     NewN = EmitShift(ARM::tLSLri, X, 31 - Range->first);
3605     NewN = EmitShift(ARM::tLSRri, SDValue(NewN, 0),
3606                      Range->second + (31 - Range->first));
3607     ReplaceNode(And.getNode(), NewN);
3608   }
3609 }
3610 
3611 static unsigned getVectorShuffleOpcode(EVT VT, unsigned Opc64[3],
3612                                        unsigned Opc128[3]) {
3613   assert((VT.is64BitVector() || VT.is128BitVector()) &&
3614          "Unexpected vector shuffle length");
3615   switch (VT.getScalarSizeInBits()) {
3616   default:
3617     llvm_unreachable("Unexpected vector shuffle element size");
3618   case 8:
3619     return VT.is64BitVector() ? Opc64[0] : Opc128[0];
3620   case 16:
3621     return VT.is64BitVector() ? Opc64[1] : Opc128[1];
3622   case 32:
3623     return VT.is64BitVector() ? Opc64[2] : Opc128[2];
3624   }
3625 }
3626 
3627 void ARMDAGToDAGISel::Select(SDNode *N) {
3628   SDLoc dl(N);
3629 
3630   if (N->isMachineOpcode()) {
3631     N->setNodeId(-1);
3632     return;   // Already selected.
3633   }
3634 
3635   switch (N->getOpcode()) {
3636   default: break;
3637   case ISD::STORE: {
3638     // For Thumb1, match an sp-relative store in C++. This is a little
3639     // unfortunate, but I don't think I can make the chain check work
3640     // otherwise.  (The chain of the store has to be the same as the chain
3641     // of the CopyFromReg, or else we can't replace the CopyFromReg with
3642     // a direct reference to "SP".)
3643     //
3644     // This is only necessary on Thumb1 because Thumb1 sp-relative stores use
3645     // a different addressing mode from other four-byte stores.
3646     //
3647     // This pattern usually comes up with call arguments.
3648     StoreSDNode *ST = cast<StoreSDNode>(N);
3649     SDValue Ptr = ST->getBasePtr();
3650     if (Subtarget->isThumb1Only() && ST->isUnindexed()) {
3651       int RHSC = 0;
3652       if (Ptr.getOpcode() == ISD::ADD &&
3653           isScaledConstantInRange(Ptr.getOperand(1), /*Scale=*/4, 0, 256, RHSC))
3654         Ptr = Ptr.getOperand(0);
3655 
3656       if (Ptr.getOpcode() == ISD::CopyFromReg &&
3657           cast<RegisterSDNode>(Ptr.getOperand(1))->getReg() == ARM::SP &&
3658           Ptr.getOperand(0) == ST->getChain()) {
3659         SDValue Ops[] = {ST->getValue(),
3660                          CurDAG->getRegister(ARM::SP, MVT::i32),
3661                          CurDAG->getTargetConstant(RHSC, dl, MVT::i32),
3662                          getAL(CurDAG, dl),
3663                          CurDAG->getRegister(0, MVT::i32),
3664                          ST->getChain()};
3665         MachineSDNode *ResNode =
3666             CurDAG->getMachineNode(ARM::tSTRspi, dl, MVT::Other, Ops);
3667         MachineMemOperand *MemOp = ST->getMemOperand();
3668         CurDAG->setNodeMemRefs(cast<MachineSDNode>(ResNode), {MemOp});
3669         ReplaceNode(N, ResNode);
3670         return;
3671       }
3672     }
3673     break;
3674   }
3675   case ISD::WRITE_REGISTER:
3676     if (tryWriteRegister(N))
3677       return;
3678     break;
3679   case ISD::READ_REGISTER:
3680     if (tryReadRegister(N))
3681       return;
3682     break;
3683   case ISD::INLINEASM:
3684   case ISD::INLINEASM_BR:
3685     if (tryInlineAsm(N))
3686       return;
3687     break;
3688   case ISD::SUB:
3689     // Select special operations if SUB node forms integer ABS pattern
3690     if (tryABSOp(N))
3691       return;
3692     // Other cases are autogenerated.
3693     break;
3694   case ISD::Constant: {
3695     unsigned Val = N->getAsZExtVal();
3696     // If we can't materialize the constant we need to use a literal pool
3697     if (ConstantMaterializationCost(Val, Subtarget) > 2 &&
3698         !Subtarget->genExecuteOnly()) {
3699       SDValue CPIdx = CurDAG->getTargetConstantPool(
3700           ConstantInt::get(Type::getInt32Ty(*CurDAG->getContext()), Val),
3701           TLI->getPointerTy(CurDAG->getDataLayout()));
3702 
3703       SDNode *ResNode;
3704       if (Subtarget->isThumb()) {
3705         SDValue Ops[] = {
3706           CPIdx,
3707           getAL(CurDAG, dl),
3708           CurDAG->getRegister(0, MVT::i32),
3709           CurDAG->getEntryNode()
3710         };
3711         ResNode = CurDAG->getMachineNode(ARM::tLDRpci, dl, MVT::i32, MVT::Other,
3712                                          Ops);
3713       } else {
3714         SDValue Ops[] = {
3715           CPIdx,
3716           CurDAG->getTargetConstant(0, dl, MVT::i32),
3717           getAL(CurDAG, dl),
3718           CurDAG->getRegister(0, MVT::i32),
3719           CurDAG->getEntryNode()
3720         };
3721         ResNode = CurDAG->getMachineNode(ARM::LDRcp, dl, MVT::i32, MVT::Other,
3722                                          Ops);
3723       }
3724       // Annotate the Node with memory operand information so that MachineInstr
3725       // queries work properly. This e.g. gives the register allocation the
3726       // required information for rematerialization.
3727       MachineFunction& MF = CurDAG->getMachineFunction();
3728       MachineMemOperand *MemOp =
3729           MF.getMachineMemOperand(MachinePointerInfo::getConstantPool(MF),
3730                                   MachineMemOperand::MOLoad, 4, Align(4));
3731 
3732       CurDAG->setNodeMemRefs(cast<MachineSDNode>(ResNode), {MemOp});
3733 
3734       ReplaceNode(N, ResNode);
3735       return;
3736     }
3737 
3738     // Other cases are autogenerated.
3739     break;
3740   }
3741   case ISD::FrameIndex: {
3742     // Selects to ADDri FI, 0 which in turn will become ADDri SP, imm.
3743     int FI = cast<FrameIndexSDNode>(N)->getIndex();
3744     SDValue TFI = CurDAG->getTargetFrameIndex(
3745         FI, TLI->getPointerTy(CurDAG->getDataLayout()));
3746     if (Subtarget->isThumb1Only()) {
3747       // Set the alignment of the frame object to 4, to avoid having to generate
3748       // more than one ADD
3749       MachineFrameInfo &MFI = MF->getFrameInfo();
3750       if (MFI.getObjectAlign(FI) < Align(4))
3751         MFI.setObjectAlignment(FI, Align(4));
3752       CurDAG->SelectNodeTo(N, ARM::tADDframe, MVT::i32, TFI,
3753                            CurDAG->getTargetConstant(0, dl, MVT::i32));
3754       return;
3755     } else {
3756       unsigned Opc = ((Subtarget->isThumb() && Subtarget->hasThumb2()) ?
3757                       ARM::t2ADDri : ARM::ADDri);
3758       SDValue Ops[] = { TFI, CurDAG->getTargetConstant(0, dl, MVT::i32),
3759                         getAL(CurDAG, dl), CurDAG->getRegister(0, MVT::i32),
3760                         CurDAG->getRegister(0, MVT::i32) };
3761       CurDAG->SelectNodeTo(N, Opc, MVT::i32, Ops);
3762       return;
3763     }
3764   }
3765   case ISD::INSERT_VECTOR_ELT: {
3766     if (tryInsertVectorElt(N))
3767       return;
3768     break;
3769   }
3770   case ISD::SRL:
3771     if (tryV6T2BitfieldExtractOp(N, false))
3772       return;
3773     break;
3774   case ISD::SIGN_EXTEND_INREG:
3775   case ISD::SRA:
3776     if (tryV6T2BitfieldExtractOp(N, true))
3777       return;
3778     break;
3779   case ISD::FP_TO_UINT:
3780   case ISD::FP_TO_SINT:
3781   case ISD::FP_TO_UINT_SAT:
3782   case ISD::FP_TO_SINT_SAT:
3783     if (tryFP_TO_INT(N, dl))
3784       return;
3785     break;
3786   case ISD::FMUL:
3787     if (tryFMULFixed(N, dl))
3788       return;
3789     break;
3790   case ISD::MUL:
3791     if (Subtarget->isThumb1Only())
3792       break;
3793     if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(N->getOperand(1))) {
3794       unsigned RHSV = C->getZExtValue();
3795       if (!RHSV) break;
3796       if (isPowerOf2_32(RHSV-1)) {  // 2^n+1?
3797         unsigned ShImm = Log2_32(RHSV-1);
3798         if (ShImm >= 32)
3799           break;
3800         SDValue V = N->getOperand(0);
3801         ShImm = ARM_AM::getSORegOpc(ARM_AM::lsl, ShImm);
3802         SDValue ShImmOp = CurDAG->getTargetConstant(ShImm, dl, MVT::i32);
3803         SDValue Reg0 = CurDAG->getRegister(0, MVT::i32);
3804         if (Subtarget->isThumb()) {
3805           SDValue Ops[] = { V, V, ShImmOp, getAL(CurDAG, dl), Reg0, Reg0 };
3806           CurDAG->SelectNodeTo(N, ARM::t2ADDrs, MVT::i32, Ops);
3807           return;
3808         } else {
3809           SDValue Ops[] = { V, V, Reg0, ShImmOp, getAL(CurDAG, dl), Reg0,
3810                             Reg0 };
3811           CurDAG->SelectNodeTo(N, ARM::ADDrsi, MVT::i32, Ops);
3812           return;
3813         }
3814       }
3815       if (isPowerOf2_32(RHSV+1)) {  // 2^n-1?
3816         unsigned ShImm = Log2_32(RHSV+1);
3817         if (ShImm >= 32)
3818           break;
3819         SDValue V = N->getOperand(0);
3820         ShImm = ARM_AM::getSORegOpc(ARM_AM::lsl, ShImm);
3821         SDValue ShImmOp = CurDAG->getTargetConstant(ShImm, dl, MVT::i32);
3822         SDValue Reg0 = CurDAG->getRegister(0, MVT::i32);
3823         if (Subtarget->isThumb()) {
3824           SDValue Ops[] = { V, V, ShImmOp, getAL(CurDAG, dl), Reg0, Reg0 };
3825           CurDAG->SelectNodeTo(N, ARM::t2RSBrs, MVT::i32, Ops);
3826           return;
3827         } else {
3828           SDValue Ops[] = { V, V, Reg0, ShImmOp, getAL(CurDAG, dl), Reg0,
3829                             Reg0 };
3830           CurDAG->SelectNodeTo(N, ARM::RSBrsi, MVT::i32, Ops);
3831           return;
3832         }
3833       }
3834     }
3835     break;
3836   case ISD::AND: {
3837     // Check for unsigned bitfield extract
3838     if (tryV6T2BitfieldExtractOp(N, false))
3839       return;
3840 
3841     // If an immediate is used in an AND node, it is possible that the immediate
3842     // can be more optimally materialized when negated. If this is the case we
3843     // can negate the immediate and use a BIC instead.
3844     auto *N1C = dyn_cast<ConstantSDNode>(N->getOperand(1));
3845     if (N1C && N1C->hasOneUse() && Subtarget->isThumb()) {
3846       uint32_t Imm = (uint32_t) N1C->getZExtValue();
3847 
3848       // In Thumb2 mode, an AND can take a 12-bit immediate. If this
3849       // immediate can be negated and fit in the immediate operand of
3850       // a t2BIC, don't do any manual transform here as this can be
3851       // handled by the generic ISel machinery.
3852       bool PreferImmediateEncoding =
3853         Subtarget->hasThumb2() && (is_t2_so_imm(Imm) || is_t2_so_imm_not(Imm));
3854       if (!PreferImmediateEncoding &&
3855           ConstantMaterializationCost(Imm, Subtarget) >
3856               ConstantMaterializationCost(~Imm, Subtarget)) {
3857         // The current immediate costs more to materialize than a negated
3858         // immediate, so negate the immediate and use a BIC.
3859         SDValue NewImm = CurDAG->getConstant(~Imm, dl, MVT::i32);
3860         // If the new constant didn't exist before, reposition it in the topological
3861         // ordering so it is just before N. Otherwise, don't touch its location.
3862         if (NewImm->getNodeId() == -1)
3863           CurDAG->RepositionNode(N->getIterator(), NewImm.getNode());
3864 
3865         if (!Subtarget->hasThumb2()) {
3866           SDValue Ops[] = {CurDAG->getRegister(ARM::CPSR, MVT::i32),
3867                            N->getOperand(0), NewImm, getAL(CurDAG, dl),
3868                            CurDAG->getRegister(0, MVT::i32)};
3869           ReplaceNode(N, CurDAG->getMachineNode(ARM::tBIC, dl, MVT::i32, Ops));
3870           return;
3871         } else {
3872           SDValue Ops[] = {N->getOperand(0), NewImm, getAL(CurDAG, dl),
3873                            CurDAG->getRegister(0, MVT::i32),
3874                            CurDAG->getRegister(0, MVT::i32)};
3875           ReplaceNode(N,
3876                       CurDAG->getMachineNode(ARM::t2BICrr, dl, MVT::i32, Ops));
3877           return;
3878         }
3879       }
3880     }
3881 
3882     // (and (or x, c2), c1) and top 16-bits of c1 and c2 match, lower 16-bits
3883     // of c1 are 0xffff, and lower 16-bit of c2 are 0. That is, the top 16-bits
3884     // are entirely contributed by c2 and lower 16-bits are entirely contributed
3885     // by x. That's equal to (or (and x, 0xffff), (and c1, 0xffff0000)).
3886     // Select it to: "movt x, ((c1 & 0xffff) >> 16)
3887     EVT VT = N->getValueType(0);
3888     if (VT != MVT::i32)
3889       break;
3890     unsigned Opc = (Subtarget->isThumb() && Subtarget->hasThumb2())
3891       ? ARM::t2MOVTi16
3892       : (Subtarget->hasV6T2Ops() ? ARM::MOVTi16 : 0);
3893     if (!Opc)
3894       break;
3895     SDValue N0 = N->getOperand(0), N1 = N->getOperand(1);
3896     N1C = dyn_cast<ConstantSDNode>(N1);
3897     if (!N1C)
3898       break;
3899     if (N0.getOpcode() == ISD::OR && N0.getNode()->hasOneUse()) {
3900       SDValue N2 = N0.getOperand(1);
3901       ConstantSDNode *N2C = dyn_cast<ConstantSDNode>(N2);
3902       if (!N2C)
3903         break;
3904       unsigned N1CVal = N1C->getZExtValue();
3905       unsigned N2CVal = N2C->getZExtValue();
3906       if ((N1CVal & 0xffff0000U) == (N2CVal & 0xffff0000U) &&
3907           (N1CVal & 0xffffU) == 0xffffU &&
3908           (N2CVal & 0xffffU) == 0x0U) {
3909         SDValue Imm16 = CurDAG->getTargetConstant((N2CVal & 0xFFFF0000U) >> 16,
3910                                                   dl, MVT::i32);
3911         SDValue Ops[] = { N0.getOperand(0), Imm16,
3912                           getAL(CurDAG, dl), CurDAG->getRegister(0, MVT::i32) };
3913         ReplaceNode(N, CurDAG->getMachineNode(Opc, dl, VT, Ops));
3914         return;
3915       }
3916     }
3917 
3918     break;
3919   }
3920   case ARMISD::UMAAL: {
3921     unsigned Opc = Subtarget->isThumb() ? ARM::t2UMAAL : ARM::UMAAL;
3922     SDValue Ops[] = { N->getOperand(0), N->getOperand(1),
3923                       N->getOperand(2), N->getOperand(3),
3924                       getAL(CurDAG, dl),
3925                       CurDAG->getRegister(0, MVT::i32) };
3926     ReplaceNode(N, CurDAG->getMachineNode(Opc, dl, MVT::i32, MVT::i32, Ops));
3927     return;
3928   }
3929   case ARMISD::UMLAL:{
3930     if (Subtarget->isThumb()) {
3931       SDValue Ops[] = { N->getOperand(0), N->getOperand(1), N->getOperand(2),
3932                         N->getOperand(3), getAL(CurDAG, dl),
3933                         CurDAG->getRegister(0, MVT::i32)};
3934       ReplaceNode(
3935           N, CurDAG->getMachineNode(ARM::t2UMLAL, dl, MVT::i32, MVT::i32, Ops));
3936       return;
3937     }else{
3938       SDValue Ops[] = { N->getOperand(0), N->getOperand(1), N->getOperand(2),
3939                         N->getOperand(3), getAL(CurDAG, dl),
3940                         CurDAG->getRegister(0, MVT::i32),
3941                         CurDAG->getRegister(0, MVT::i32) };
3942       ReplaceNode(N, CurDAG->getMachineNode(
3943                          Subtarget->hasV6Ops() ? ARM::UMLAL : ARM::UMLALv5, dl,
3944                          MVT::i32, MVT::i32, Ops));
3945       return;
3946     }
3947   }
3948   case ARMISD::SMLAL:{
3949     if (Subtarget->isThumb()) {
3950       SDValue Ops[] = { N->getOperand(0), N->getOperand(1), N->getOperand(2),
3951                         N->getOperand(3), getAL(CurDAG, dl),
3952                         CurDAG->getRegister(0, MVT::i32)};
3953       ReplaceNode(
3954           N, CurDAG->getMachineNode(ARM::t2SMLAL, dl, MVT::i32, MVT::i32, Ops));
3955       return;
3956     }else{
3957       SDValue Ops[] = { N->getOperand(0), N->getOperand(1), N->getOperand(2),
3958                         N->getOperand(3), getAL(CurDAG, dl),
3959                         CurDAG->getRegister(0, MVT::i32),
3960                         CurDAG->getRegister(0, MVT::i32) };
3961       ReplaceNode(N, CurDAG->getMachineNode(
3962                          Subtarget->hasV6Ops() ? ARM::SMLAL : ARM::SMLALv5, dl,
3963                          MVT::i32, MVT::i32, Ops));
3964       return;
3965     }
3966   }
3967   case ARMISD::SUBE: {
3968     if (!Subtarget->hasV6Ops() || !Subtarget->hasDSP())
3969       break;
3970     // Look for a pattern to match SMMLS
3971     // (sube a, (smul_loHi a, b), (subc 0, (smul_LOhi(a, b))))
3972     if (N->getOperand(1).getOpcode() != ISD::SMUL_LOHI ||
3973         N->getOperand(2).getOpcode() != ARMISD::SUBC ||
3974         !SDValue(N, 1).use_empty())
3975       break;
3976 
3977     if (Subtarget->isThumb())
3978       assert(Subtarget->hasThumb2() &&
3979              "This pattern should not be generated for Thumb");
3980 
3981     SDValue SmulLoHi = N->getOperand(1);
3982     SDValue Subc = N->getOperand(2);
3983     SDValue Zero = Subc.getOperand(0);
3984 
3985     if (!isNullConstant(Zero) || Subc.getOperand(1) != SmulLoHi.getValue(0) ||
3986         N->getOperand(1) != SmulLoHi.getValue(1) ||
3987         N->getOperand(2) != Subc.getValue(1))
3988       break;
3989 
3990     unsigned Opc = Subtarget->isThumb2() ? ARM::t2SMMLS : ARM::SMMLS;
3991     SDValue Ops[] = { SmulLoHi.getOperand(0), SmulLoHi.getOperand(1),
3992                       N->getOperand(0), getAL(CurDAG, dl),
3993                       CurDAG->getRegister(0, MVT::i32) };
3994     ReplaceNode(N, CurDAG->getMachineNode(Opc, dl, MVT::i32, Ops));
3995     return;
3996   }
3997   case ISD::LOAD: {
3998     if (Subtarget->hasMVEIntegerOps() && tryMVEIndexedLoad(N))
3999       return;
4000     if (Subtarget->isThumb() && Subtarget->hasThumb2()) {
4001       if (tryT2IndexedLoad(N))
4002         return;
4003     } else if (Subtarget->isThumb()) {
4004       if (tryT1IndexedLoad(N))
4005         return;
4006     } else if (tryARMIndexedLoad(N))
4007       return;
4008     // Other cases are autogenerated.
4009     break;
4010   }
4011   case ISD::MLOAD:
4012     if (Subtarget->hasMVEIntegerOps() && tryMVEIndexedLoad(N))
4013       return;
4014     // Other cases are autogenerated.
4015     break;
4016   case ARMISD::WLSSETUP: {
4017     SDNode *New = CurDAG->getMachineNode(ARM::t2WhileLoopSetup, dl, MVT::i32,
4018                                          N->getOperand(0));
4019     ReplaceUses(N, New);
4020     CurDAG->RemoveDeadNode(N);
4021     return;
4022   }
4023   case ARMISD::WLS: {
4024     SDNode *New = CurDAG->getMachineNode(ARM::t2WhileLoopStart, dl, MVT::Other,
4025                                          N->getOperand(1), N->getOperand(2),
4026                                          N->getOperand(0));
4027     ReplaceUses(N, New);
4028     CurDAG->RemoveDeadNode(N);
4029     return;
4030   }
4031   case ARMISD::LE: {
4032     SDValue Ops[] = { N->getOperand(1),
4033                       N->getOperand(2),
4034                       N->getOperand(0) };
4035     unsigned Opc = ARM::t2LoopEnd;
4036     SDNode *New = CurDAG->getMachineNode(Opc, dl, MVT::Other, Ops);
4037     ReplaceUses(N, New);
4038     CurDAG->RemoveDeadNode(N);
4039     return;
4040   }
4041   case ARMISD::LDRD: {
4042     if (Subtarget->isThumb2())
4043       break; // TableGen handles isel in this case.
4044     SDValue Base, RegOffset, ImmOffset;
4045     const SDValue &Chain = N->getOperand(0);
4046     const SDValue &Addr = N->getOperand(1);
4047     SelectAddrMode3(Addr, Base, RegOffset, ImmOffset);
4048     if (RegOffset != CurDAG->getRegister(0, MVT::i32)) {
4049       // The register-offset variant of LDRD mandates that the register
4050       // allocated to RegOffset is not reused in any of the remaining operands.
4051       // This restriction is currently not enforced. Therefore emitting this
4052       // variant is explicitly avoided.
4053       Base = Addr;
4054       RegOffset = CurDAG->getRegister(0, MVT::i32);
4055     }
4056     SDValue Ops[] = {Base, RegOffset, ImmOffset, Chain};
4057     SDNode *New = CurDAG->getMachineNode(ARM::LOADDUAL, dl,
4058                                          {MVT::Untyped, MVT::Other}, Ops);
4059     SDValue Lo = CurDAG->getTargetExtractSubreg(ARM::gsub_0, dl, MVT::i32,
4060                                                 SDValue(New, 0));
4061     SDValue Hi = CurDAG->getTargetExtractSubreg(ARM::gsub_1, dl, MVT::i32,
4062                                                 SDValue(New, 0));
4063     transferMemOperands(N, New);
4064     ReplaceUses(SDValue(N, 0), Lo);
4065     ReplaceUses(SDValue(N, 1), Hi);
4066     ReplaceUses(SDValue(N, 2), SDValue(New, 1));
4067     CurDAG->RemoveDeadNode(N);
4068     return;
4069   }
4070   case ARMISD::STRD: {
4071     if (Subtarget->isThumb2())
4072       break; // TableGen handles isel in this case.
4073     SDValue Base, RegOffset, ImmOffset;
4074     const SDValue &Chain = N->getOperand(0);
4075     const SDValue &Addr = N->getOperand(3);
4076     SelectAddrMode3(Addr, Base, RegOffset, ImmOffset);
4077     if (RegOffset != CurDAG->getRegister(0, MVT::i32)) {
4078       // The register-offset variant of STRD mandates that the register
4079       // allocated to RegOffset is not reused in any of the remaining operands.
4080       // This restriction is currently not enforced. Therefore emitting this
4081       // variant is explicitly avoided.
4082       Base = Addr;
4083       RegOffset = CurDAG->getRegister(0, MVT::i32);
4084     }
4085     SDNode *RegPair =
4086         createGPRPairNode(MVT::Untyped, N->getOperand(1), N->getOperand(2));
4087     SDValue Ops[] = {SDValue(RegPair, 0), Base, RegOffset, ImmOffset, Chain};
4088     SDNode *New = CurDAG->getMachineNode(ARM::STOREDUAL, dl, MVT::Other, Ops);
4089     transferMemOperands(N, New);
4090     ReplaceUses(SDValue(N, 0), SDValue(New, 0));
4091     CurDAG->RemoveDeadNode(N);
4092     return;
4093   }
4094   case ARMISD::LOOP_DEC: {
4095     SDValue Ops[] = { N->getOperand(1),
4096                       N->getOperand(2),
4097                       N->getOperand(0) };
4098     SDNode *Dec =
4099       CurDAG->getMachineNode(ARM::t2LoopDec, dl,
4100                              CurDAG->getVTList(MVT::i32, MVT::Other), Ops);
4101     ReplaceUses(N, Dec);
4102     CurDAG->RemoveDeadNode(N);
4103     return;
4104   }
4105   case ARMISD::BRCOND: {
4106     // Pattern: (ARMbrcond:void (bb:Other):$dst, (imm:i32):$cc)
4107     // Emits: (Bcc:void (bb:Other):$dst, (imm:i32):$cc)
4108     // Pattern complexity = 6  cost = 1  size = 0
4109 
4110     // Pattern: (ARMbrcond:void (bb:Other):$dst, (imm:i32):$cc)
4111     // Emits: (tBcc:void (bb:Other):$dst, (imm:i32):$cc)
4112     // Pattern complexity = 6  cost = 1  size = 0
4113 
4114     // Pattern: (ARMbrcond:void (bb:Other):$dst, (imm:i32):$cc)
4115     // Emits: (t2Bcc:void (bb:Other):$dst, (imm:i32):$cc)
4116     // Pattern complexity = 6  cost = 1  size = 0
4117 
4118     unsigned Opc = Subtarget->isThumb() ?
4119       ((Subtarget->hasThumb2()) ? ARM::t2Bcc : ARM::tBcc) : ARM::Bcc;
4120     SDValue Chain = N->getOperand(0);
4121     SDValue N1 = N->getOperand(1);
4122     SDValue N2 = N->getOperand(2);
4123     SDValue Flags = N->getOperand(3);
4124     assert(N1.getOpcode() == ISD::BasicBlock);
4125     assert(N2.getOpcode() == ISD::Constant);
4126 
4127     unsigned CC = (unsigned)N2->getAsZExtVal();
4128 
4129     if (Flags.getOpcode() == ARMISD::CMPZ) {
4130       if (Flags.getOperand(0).getOpcode() == ISD::INTRINSIC_W_CHAIN) {
4131         SDValue Int = Flags.getOperand(0);
4132         uint64_t ID = Int->getConstantOperandVal(1);
4133 
4134         // Handle low-overhead loops.
4135         if (ID == Intrinsic::loop_decrement_reg) {
4136           SDValue Elements = Int.getOperand(2);
4137           SDValue Size = CurDAG->getTargetConstant(Int.getConstantOperandVal(3),
4138                                                    dl, MVT::i32);
4139 
4140           SDValue Args[] = { Elements, Size, Int.getOperand(0) };
4141           SDNode *LoopDec =
4142             CurDAG->getMachineNode(ARM::t2LoopDec, dl,
4143                                    CurDAG->getVTList(MVT::i32, MVT::Other),
4144                                    Args);
4145           ReplaceUses(Int.getNode(), LoopDec);
4146 
4147           SDValue EndArgs[] = { SDValue(LoopDec, 0), N1, Chain };
4148           SDNode *LoopEnd =
4149             CurDAG->getMachineNode(ARM::t2LoopEnd, dl, MVT::Other, EndArgs);
4150 
4151           ReplaceUses(N, LoopEnd);
4152           CurDAG->RemoveDeadNode(N);
4153           CurDAG->RemoveDeadNode(Flags.getNode());
4154           CurDAG->RemoveDeadNode(Int.getNode());
4155           return;
4156         }
4157       }
4158 
4159       bool SwitchEQNEToPLMI;
4160       SelectCMPZ(Flags.getNode(), SwitchEQNEToPLMI);
4161       Flags = N->getOperand(3);
4162 
4163       if (SwitchEQNEToPLMI) {
4164         switch ((ARMCC::CondCodes)CC) {
4165         default: llvm_unreachable("CMPZ must be either NE or EQ!");
4166         case ARMCC::NE:
4167           CC = (unsigned)ARMCC::MI;
4168           break;
4169         case ARMCC::EQ:
4170           CC = (unsigned)ARMCC::PL;
4171           break;
4172         }
4173       }
4174     }
4175 
4176     SDValue Tmp2 = CurDAG->getTargetConstant(CC, dl, MVT::i32);
4177     Chain = CurDAG->getCopyToReg(Chain, dl, ARM::CPSR, Flags, SDValue());
4178     SDValue Ops[] = {N1, Tmp2, CurDAG->getRegister(ARM::CPSR, MVT::i32), Chain,
4179                      Chain.getValue(1)};
4180     CurDAG->SelectNodeTo(N, Opc, MVT::Other, Ops);
4181     return;
4182   }
4183 
4184   case ARMISD::CMPZ: {
4185     // select (CMPZ X, #-C) -> (CMPZ (ADDS X, #C), #0)
4186     //   This allows us to avoid materializing the expensive negative constant.
4187     //   The CMPZ #0 is useless and will be peepholed away but we need to keep
4188     //   it for its flags output.
4189     SDValue X = N->getOperand(0);
4190     auto *C = dyn_cast<ConstantSDNode>(N->getOperand(1).getNode());
4191     if (C && C->getSExtValue() < 0 && Subtarget->isThumb()) {
4192       int64_t Addend = -C->getSExtValue();
4193 
4194       SDNode *Add = nullptr;
4195       // ADDS can be better than CMN if the immediate fits in a
4196       // 16-bit ADDS, which means either [0,256) for tADDi8 or [0,8) for tADDi3.
4197       // Outside that range we can just use a CMN which is 32-bit but has a
4198       // 12-bit immediate range.
4199       if (Addend < 1<<8) {
4200         if (Subtarget->isThumb2()) {
4201           SDValue Ops[] = { X, CurDAG->getTargetConstant(Addend, dl, MVT::i32),
4202                             getAL(CurDAG, dl), CurDAG->getRegister(0, MVT::i32),
4203                             CurDAG->getRegister(0, MVT::i32) };
4204           Add = CurDAG->getMachineNode(ARM::t2ADDri, dl, MVT::i32, Ops);
4205         } else {
4206           unsigned Opc = (Addend < 1<<3) ? ARM::tADDi3 : ARM::tADDi8;
4207           SDValue Ops[] = {CurDAG->getRegister(ARM::CPSR, MVT::i32), X,
4208                            CurDAG->getTargetConstant(Addend, dl, MVT::i32),
4209                            getAL(CurDAG, dl), CurDAG->getRegister(0, MVT::i32)};
4210           Add = CurDAG->getMachineNode(Opc, dl, MVT::i32, Ops);
4211         }
4212       }
4213       if (Add) {
4214         SDValue Ops2[] = {SDValue(Add, 0), CurDAG->getConstant(0, dl, MVT::i32)};
4215         CurDAG->MorphNodeTo(N, ARMISD::CMPZ, N->getVTList(), Ops2);
4216       }
4217     }
4218     // Other cases are autogenerated.
4219     break;
4220   }
4221 
4222   case ARMISD::CMOV: {
4223     SDValue Flags = N->getOperand(3);
4224 
4225     if (Flags.getOpcode() == ARMISD::CMPZ) {
4226       bool SwitchEQNEToPLMI;
4227       SelectCMPZ(Flags.getNode(), SwitchEQNEToPLMI);
4228 
4229       if (SwitchEQNEToPLMI) {
4230         SDValue ARMcc = N->getOperand(2);
4231         ARMCC::CondCodes CC = (ARMCC::CondCodes)ARMcc->getAsZExtVal();
4232 
4233         switch (CC) {
4234         default: llvm_unreachable("CMPZ must be either NE or EQ!");
4235         case ARMCC::NE:
4236           CC = ARMCC::MI;
4237           break;
4238         case ARMCC::EQ:
4239           CC = ARMCC::PL;
4240           break;
4241         }
4242         SDValue NewARMcc = CurDAG->getConstant((unsigned)CC, dl, MVT::i32);
4243         SDValue Ops[] = {N->getOperand(0), N->getOperand(1), NewARMcc,
4244                          N->getOperand(3)};
4245         CurDAG->MorphNodeTo(N, ARMISD::CMOV, N->getVTList(), Ops);
4246       }
4247     }
4248     // Other cases are autogenerated.
4249     break;
4250   }
4251   case ARMISD::VZIP: {
4252     EVT VT = N->getValueType(0);
4253     // vzip.32 Dd, Dm is a pseudo-instruction expanded to vtrn.32 Dd, Dm.
4254     unsigned Opc64[] = {ARM::VZIPd8, ARM::VZIPd16, ARM::VTRNd32};
4255     unsigned Opc128[] = {ARM::VZIPq8, ARM::VZIPq16, ARM::VZIPq32};
4256     unsigned Opc = getVectorShuffleOpcode(VT, Opc64, Opc128);
4257     SDValue Pred = getAL(CurDAG, dl);
4258     SDValue PredReg = CurDAG->getRegister(0, MVT::i32);
4259     SDValue Ops[] = {N->getOperand(0), N->getOperand(1), Pred, PredReg};
4260     ReplaceNode(N, CurDAG->getMachineNode(Opc, dl, VT, VT, Ops));
4261     return;
4262   }
4263   case ARMISD::VUZP: {
4264     EVT VT = N->getValueType(0);
4265     // vuzp.32 Dd, Dm is a pseudo-instruction expanded to vtrn.32 Dd, Dm.
4266     unsigned Opc64[] = {ARM::VUZPd8, ARM::VUZPd16, ARM::VTRNd32};
4267     unsigned Opc128[] = {ARM::VUZPq8, ARM::VUZPq16, ARM::VUZPq32};
4268     unsigned Opc = getVectorShuffleOpcode(VT, Opc64, Opc128);
4269     SDValue Pred = getAL(CurDAG, dl);
4270     SDValue PredReg = CurDAG->getRegister(0, MVT::i32);
4271     SDValue Ops[] = {N->getOperand(0), N->getOperand(1), Pred, PredReg};
4272     ReplaceNode(N, CurDAG->getMachineNode(Opc, dl, VT, VT, Ops));
4273     return;
4274   }
4275   case ARMISD::VTRN: {
4276     EVT VT = N->getValueType(0);
4277     unsigned Opc64[] = {ARM::VTRNd8, ARM::VTRNd16, ARM::VTRNd32};
4278     unsigned Opc128[] = {ARM::VTRNq8, ARM::VTRNq16, ARM::VTRNq32};
4279     unsigned Opc = getVectorShuffleOpcode(VT, Opc64, Opc128);
4280     SDValue Pred = getAL(CurDAG, dl);
4281     SDValue PredReg = CurDAG->getRegister(0, MVT::i32);
4282     SDValue Ops[] = {N->getOperand(0), N->getOperand(1), Pred, PredReg};
4283     ReplaceNode(N, CurDAG->getMachineNode(Opc, dl, VT, VT, Ops));
4284     return;
4285   }
4286   case ARMISD::BUILD_VECTOR: {
4287     EVT VecVT = N->getValueType(0);
4288     EVT EltVT = VecVT.getVectorElementType();
4289     unsigned NumElts = VecVT.getVectorNumElements();
4290     if (EltVT == MVT::f64) {
4291       assert(NumElts == 2 && "unexpected type for BUILD_VECTOR");
4292       ReplaceNode(
4293           N, createDRegPairNode(VecVT, N->getOperand(0), N->getOperand(1)));
4294       return;
4295     }
4296     assert(EltVT == MVT::f32 && "unexpected type for BUILD_VECTOR");
4297     if (NumElts == 2) {
4298       ReplaceNode(
4299           N, createSRegPairNode(VecVT, N->getOperand(0), N->getOperand(1)));
4300       return;
4301     }
4302     assert(NumElts == 4 && "unexpected type for BUILD_VECTOR");
4303     ReplaceNode(N,
4304                 createQuadSRegsNode(VecVT, N->getOperand(0), N->getOperand(1),
4305                                     N->getOperand(2), N->getOperand(3)));
4306     return;
4307   }
4308 
4309   case ARMISD::VLD1DUP: {
4310     static const uint16_t DOpcodes[] = { ARM::VLD1DUPd8, ARM::VLD1DUPd16,
4311                                          ARM::VLD1DUPd32 };
4312     static const uint16_t QOpcodes[] = { ARM::VLD1DUPq8, ARM::VLD1DUPq16,
4313                                          ARM::VLD1DUPq32 };
4314     SelectVLDDup(N, /* IsIntrinsic= */ false, false, 1, DOpcodes, QOpcodes);
4315     return;
4316   }
4317 
4318   case ARMISD::VLD2DUP: {
4319     static const uint16_t Opcodes[] = { ARM::VLD2DUPd8, ARM::VLD2DUPd16,
4320                                         ARM::VLD2DUPd32 };
4321     SelectVLDDup(N, /* IsIntrinsic= */ false, false, 2, Opcodes);
4322     return;
4323   }
4324 
4325   case ARMISD::VLD3DUP: {
4326     static const uint16_t Opcodes[] = { ARM::VLD3DUPd8Pseudo,
4327                                         ARM::VLD3DUPd16Pseudo,
4328                                         ARM::VLD3DUPd32Pseudo };
4329     SelectVLDDup(N, /* IsIntrinsic= */ false, false, 3, Opcodes);
4330     return;
4331   }
4332 
4333   case ARMISD::VLD4DUP: {
4334     static const uint16_t Opcodes[] = { ARM::VLD4DUPd8Pseudo,
4335                                         ARM::VLD4DUPd16Pseudo,
4336                                         ARM::VLD4DUPd32Pseudo };
4337     SelectVLDDup(N, /* IsIntrinsic= */ false, false, 4, Opcodes);
4338     return;
4339   }
4340 
4341   case ARMISD::VLD1DUP_UPD: {
4342     static const uint16_t DOpcodes[] = { ARM::VLD1DUPd8wb_fixed,
4343                                          ARM::VLD1DUPd16wb_fixed,
4344                                          ARM::VLD1DUPd32wb_fixed };
4345     static const uint16_t QOpcodes[] = { ARM::VLD1DUPq8wb_fixed,
4346                                          ARM::VLD1DUPq16wb_fixed,
4347                                          ARM::VLD1DUPq32wb_fixed };
4348     SelectVLDDup(N, /* IsIntrinsic= */ false, true, 1, DOpcodes, QOpcodes);
4349     return;
4350   }
4351 
4352   case ARMISD::VLD2DUP_UPD: {
4353     static const uint16_t DOpcodes[] = { ARM::VLD2DUPd8wb_fixed,
4354                                          ARM::VLD2DUPd16wb_fixed,
4355                                          ARM::VLD2DUPd32wb_fixed,
4356                                          ARM::VLD1q64wb_fixed };
4357     static const uint16_t QOpcodes0[] = { ARM::VLD2DUPq8EvenPseudo,
4358                                           ARM::VLD2DUPq16EvenPseudo,
4359                                           ARM::VLD2DUPq32EvenPseudo };
4360     static const uint16_t QOpcodes1[] = { ARM::VLD2DUPq8OddPseudoWB_fixed,
4361                                           ARM::VLD2DUPq16OddPseudoWB_fixed,
4362                                           ARM::VLD2DUPq32OddPseudoWB_fixed };
4363     SelectVLDDup(N, /* IsIntrinsic= */ false, true, 2, DOpcodes, QOpcodes0, QOpcodes1);
4364     return;
4365   }
4366 
4367   case ARMISD::VLD3DUP_UPD: {
4368     static const uint16_t DOpcodes[] = { ARM::VLD3DUPd8Pseudo_UPD,
4369                                          ARM::VLD3DUPd16Pseudo_UPD,
4370                                          ARM::VLD3DUPd32Pseudo_UPD,
4371                                          ARM::VLD1d64TPseudoWB_fixed };
4372     static const uint16_t QOpcodes0[] = { ARM::VLD3DUPq8EvenPseudo,
4373                                           ARM::VLD3DUPq16EvenPseudo,
4374                                           ARM::VLD3DUPq32EvenPseudo };
4375     static const uint16_t QOpcodes1[] = { ARM::VLD3DUPq8OddPseudo_UPD,
4376                                           ARM::VLD3DUPq16OddPseudo_UPD,
4377                                           ARM::VLD3DUPq32OddPseudo_UPD };
4378     SelectVLDDup(N, /* IsIntrinsic= */ false, true, 3, DOpcodes, QOpcodes0, QOpcodes1);
4379     return;
4380   }
4381 
4382   case ARMISD::VLD4DUP_UPD: {
4383     static const uint16_t DOpcodes[] = { ARM::VLD4DUPd8Pseudo_UPD,
4384                                          ARM::VLD4DUPd16Pseudo_UPD,
4385                                          ARM::VLD4DUPd32Pseudo_UPD,
4386                                          ARM::VLD1d64QPseudoWB_fixed };
4387     static const uint16_t QOpcodes0[] = { ARM::VLD4DUPq8EvenPseudo,
4388                                           ARM::VLD4DUPq16EvenPseudo,
4389                                           ARM::VLD4DUPq32EvenPseudo };
4390     static const uint16_t QOpcodes1[] = { ARM::VLD4DUPq8OddPseudo_UPD,
4391                                           ARM::VLD4DUPq16OddPseudo_UPD,
4392                                           ARM::VLD4DUPq32OddPseudo_UPD };
4393     SelectVLDDup(N, /* IsIntrinsic= */ false, true, 4, DOpcodes, QOpcodes0, QOpcodes1);
4394     return;
4395   }
4396 
4397   case ARMISD::VLD1_UPD: {
4398     static const uint16_t DOpcodes[] = { ARM::VLD1d8wb_fixed,
4399                                          ARM::VLD1d16wb_fixed,
4400                                          ARM::VLD1d32wb_fixed,
4401                                          ARM::VLD1d64wb_fixed };
4402     static const uint16_t QOpcodes[] = { ARM::VLD1q8wb_fixed,
4403                                          ARM::VLD1q16wb_fixed,
4404                                          ARM::VLD1q32wb_fixed,
4405                                          ARM::VLD1q64wb_fixed };
4406     SelectVLD(N, true, 1, DOpcodes, QOpcodes, nullptr);
4407     return;
4408   }
4409 
4410   case ARMISD::VLD2_UPD: {
4411     if (Subtarget->hasNEON()) {
4412       static const uint16_t DOpcodes[] = {
4413           ARM::VLD2d8wb_fixed, ARM::VLD2d16wb_fixed, ARM::VLD2d32wb_fixed,
4414           ARM::VLD1q64wb_fixed};
4415       static const uint16_t QOpcodes[] = {ARM::VLD2q8PseudoWB_fixed,
4416                                           ARM::VLD2q16PseudoWB_fixed,
4417                                           ARM::VLD2q32PseudoWB_fixed};
4418       SelectVLD(N, true, 2, DOpcodes, QOpcodes, nullptr);
4419     } else {
4420       static const uint16_t Opcodes8[] = {ARM::MVE_VLD20_8,
4421                                           ARM::MVE_VLD21_8_wb};
4422       static const uint16_t Opcodes16[] = {ARM::MVE_VLD20_16,
4423                                            ARM::MVE_VLD21_16_wb};
4424       static const uint16_t Opcodes32[] = {ARM::MVE_VLD20_32,
4425                                            ARM::MVE_VLD21_32_wb};
4426       static const uint16_t *const Opcodes[] = {Opcodes8, Opcodes16, Opcodes32};
4427       SelectMVE_VLD(N, 2, Opcodes, true);
4428     }
4429     return;
4430   }
4431 
4432   case ARMISD::VLD3_UPD: {
4433     static const uint16_t DOpcodes[] = { ARM::VLD3d8Pseudo_UPD,
4434                                          ARM::VLD3d16Pseudo_UPD,
4435                                          ARM::VLD3d32Pseudo_UPD,
4436                                          ARM::VLD1d64TPseudoWB_fixed};
4437     static const uint16_t QOpcodes0[] = { ARM::VLD3q8Pseudo_UPD,
4438                                           ARM::VLD3q16Pseudo_UPD,
4439                                           ARM::VLD3q32Pseudo_UPD };
4440     static const uint16_t QOpcodes1[] = { ARM::VLD3q8oddPseudo_UPD,
4441                                           ARM::VLD3q16oddPseudo_UPD,
4442                                           ARM::VLD3q32oddPseudo_UPD };
4443     SelectVLD(N, true, 3, DOpcodes, QOpcodes0, QOpcodes1);
4444     return;
4445   }
4446 
4447   case ARMISD::VLD4_UPD: {
4448     if (Subtarget->hasNEON()) {
4449       static const uint16_t DOpcodes[] = {
4450           ARM::VLD4d8Pseudo_UPD, ARM::VLD4d16Pseudo_UPD, ARM::VLD4d32Pseudo_UPD,
4451           ARM::VLD1d64QPseudoWB_fixed};
4452       static const uint16_t QOpcodes0[] = {ARM::VLD4q8Pseudo_UPD,
4453                                            ARM::VLD4q16Pseudo_UPD,
4454                                            ARM::VLD4q32Pseudo_UPD};
4455       static const uint16_t QOpcodes1[] = {ARM::VLD4q8oddPseudo_UPD,
4456                                            ARM::VLD4q16oddPseudo_UPD,
4457                                            ARM::VLD4q32oddPseudo_UPD};
4458       SelectVLD(N, true, 4, DOpcodes, QOpcodes0, QOpcodes1);
4459     } else {
4460       static const uint16_t Opcodes8[] = {ARM::MVE_VLD40_8, ARM::MVE_VLD41_8,
4461                                           ARM::MVE_VLD42_8,
4462                                           ARM::MVE_VLD43_8_wb};
4463       static const uint16_t Opcodes16[] = {ARM::MVE_VLD40_16, ARM::MVE_VLD41_16,
4464                                            ARM::MVE_VLD42_16,
4465                                            ARM::MVE_VLD43_16_wb};
4466       static const uint16_t Opcodes32[] = {ARM::MVE_VLD40_32, ARM::MVE_VLD41_32,
4467                                            ARM::MVE_VLD42_32,
4468                                            ARM::MVE_VLD43_32_wb};
4469       static const uint16_t *const Opcodes[] = {Opcodes8, Opcodes16, Opcodes32};
4470       SelectMVE_VLD(N, 4, Opcodes, true);
4471     }
4472     return;
4473   }
4474 
4475   case ARMISD::VLD1x2_UPD: {
4476     if (Subtarget->hasNEON()) {
4477       static const uint16_t DOpcodes[] = {
4478           ARM::VLD1q8wb_fixed, ARM::VLD1q16wb_fixed, ARM::VLD1q32wb_fixed,
4479           ARM::VLD1q64wb_fixed};
4480       static const uint16_t QOpcodes[] = {
4481           ARM::VLD1d8QPseudoWB_fixed, ARM::VLD1d16QPseudoWB_fixed,
4482           ARM::VLD1d32QPseudoWB_fixed, ARM::VLD1d64QPseudoWB_fixed};
4483       SelectVLD(N, true, 2, DOpcodes, QOpcodes, nullptr);
4484       return;
4485     }
4486     break;
4487   }
4488 
4489   case ARMISD::VLD1x3_UPD: {
4490     if (Subtarget->hasNEON()) {
4491       static const uint16_t DOpcodes[] = {
4492           ARM::VLD1d8TPseudoWB_fixed, ARM::VLD1d16TPseudoWB_fixed,
4493           ARM::VLD1d32TPseudoWB_fixed, ARM::VLD1d64TPseudoWB_fixed};
4494       static const uint16_t QOpcodes0[] = {
4495           ARM::VLD1q8LowTPseudo_UPD, ARM::VLD1q16LowTPseudo_UPD,
4496           ARM::VLD1q32LowTPseudo_UPD, ARM::VLD1q64LowTPseudo_UPD};
4497       static const uint16_t QOpcodes1[] = {
4498           ARM::VLD1q8HighTPseudo_UPD, ARM::VLD1q16HighTPseudo_UPD,
4499           ARM::VLD1q32HighTPseudo_UPD, ARM::VLD1q64HighTPseudo_UPD};
4500       SelectVLD(N, true, 3, DOpcodes, QOpcodes0, QOpcodes1);
4501       return;
4502     }
4503     break;
4504   }
4505 
4506   case ARMISD::VLD1x4_UPD: {
4507     if (Subtarget->hasNEON()) {
4508       static const uint16_t DOpcodes[] = {
4509           ARM::VLD1d8QPseudoWB_fixed, ARM::VLD1d16QPseudoWB_fixed,
4510           ARM::VLD1d32QPseudoWB_fixed, ARM::VLD1d64QPseudoWB_fixed};
4511       static const uint16_t QOpcodes0[] = {
4512           ARM::VLD1q8LowQPseudo_UPD, ARM::VLD1q16LowQPseudo_UPD,
4513           ARM::VLD1q32LowQPseudo_UPD, ARM::VLD1q64LowQPseudo_UPD};
4514       static const uint16_t QOpcodes1[] = {
4515           ARM::VLD1q8HighQPseudo_UPD, ARM::VLD1q16HighQPseudo_UPD,
4516           ARM::VLD1q32HighQPseudo_UPD, ARM::VLD1q64HighQPseudo_UPD};
4517       SelectVLD(N, true, 4, DOpcodes, QOpcodes0, QOpcodes1);
4518       return;
4519     }
4520     break;
4521   }
4522 
4523   case ARMISD::VLD2LN_UPD: {
4524     static const uint16_t DOpcodes[] = { ARM::VLD2LNd8Pseudo_UPD,
4525                                          ARM::VLD2LNd16Pseudo_UPD,
4526                                          ARM::VLD2LNd32Pseudo_UPD };
4527     static const uint16_t QOpcodes[] = { ARM::VLD2LNq16Pseudo_UPD,
4528                                          ARM::VLD2LNq32Pseudo_UPD };
4529     SelectVLDSTLane(N, true, true, 2, DOpcodes, QOpcodes);
4530     return;
4531   }
4532 
4533   case ARMISD::VLD3LN_UPD: {
4534     static const uint16_t DOpcodes[] = { ARM::VLD3LNd8Pseudo_UPD,
4535                                          ARM::VLD3LNd16Pseudo_UPD,
4536                                          ARM::VLD3LNd32Pseudo_UPD };
4537     static const uint16_t QOpcodes[] = { ARM::VLD3LNq16Pseudo_UPD,
4538                                          ARM::VLD3LNq32Pseudo_UPD };
4539     SelectVLDSTLane(N, true, true, 3, DOpcodes, QOpcodes);
4540     return;
4541   }
4542 
4543   case ARMISD::VLD4LN_UPD: {
4544     static const uint16_t DOpcodes[] = { ARM::VLD4LNd8Pseudo_UPD,
4545                                          ARM::VLD4LNd16Pseudo_UPD,
4546                                          ARM::VLD4LNd32Pseudo_UPD };
4547     static const uint16_t QOpcodes[] = { ARM::VLD4LNq16Pseudo_UPD,
4548                                          ARM::VLD4LNq32Pseudo_UPD };
4549     SelectVLDSTLane(N, true, true, 4, DOpcodes, QOpcodes);
4550     return;
4551   }
4552 
4553   case ARMISD::VST1_UPD: {
4554     static const uint16_t DOpcodes[] = { ARM::VST1d8wb_fixed,
4555                                          ARM::VST1d16wb_fixed,
4556                                          ARM::VST1d32wb_fixed,
4557                                          ARM::VST1d64wb_fixed };
4558     static const uint16_t QOpcodes[] = { ARM::VST1q8wb_fixed,
4559                                          ARM::VST1q16wb_fixed,
4560                                          ARM::VST1q32wb_fixed,
4561                                          ARM::VST1q64wb_fixed };
4562     SelectVST(N, true, 1, DOpcodes, QOpcodes, nullptr);
4563     return;
4564   }
4565 
4566   case ARMISD::VST2_UPD: {
4567     if (Subtarget->hasNEON()) {
4568       static const uint16_t DOpcodes[] = {
4569           ARM::VST2d8wb_fixed, ARM::VST2d16wb_fixed, ARM::VST2d32wb_fixed,
4570           ARM::VST1q64wb_fixed};
4571       static const uint16_t QOpcodes[] = {ARM::VST2q8PseudoWB_fixed,
4572                                           ARM::VST2q16PseudoWB_fixed,
4573                                           ARM::VST2q32PseudoWB_fixed};
4574       SelectVST(N, true, 2, DOpcodes, QOpcodes, nullptr);
4575       return;
4576     }
4577     break;
4578   }
4579 
4580   case ARMISD::VST3_UPD: {
4581     static const uint16_t DOpcodes[] = { ARM::VST3d8Pseudo_UPD,
4582                                          ARM::VST3d16Pseudo_UPD,
4583                                          ARM::VST3d32Pseudo_UPD,
4584                                          ARM::VST1d64TPseudoWB_fixed};
4585     static const uint16_t QOpcodes0[] = { ARM::VST3q8Pseudo_UPD,
4586                                           ARM::VST3q16Pseudo_UPD,
4587                                           ARM::VST3q32Pseudo_UPD };
4588     static const uint16_t QOpcodes1[] = { ARM::VST3q8oddPseudo_UPD,
4589                                           ARM::VST3q16oddPseudo_UPD,
4590                                           ARM::VST3q32oddPseudo_UPD };
4591     SelectVST(N, true, 3, DOpcodes, QOpcodes0, QOpcodes1);
4592     return;
4593   }
4594 
4595   case ARMISD::VST4_UPD: {
4596     if (Subtarget->hasNEON()) {
4597       static const uint16_t DOpcodes[] = {
4598           ARM::VST4d8Pseudo_UPD, ARM::VST4d16Pseudo_UPD, ARM::VST4d32Pseudo_UPD,
4599           ARM::VST1d64QPseudoWB_fixed};
4600       static const uint16_t QOpcodes0[] = {ARM::VST4q8Pseudo_UPD,
4601                                            ARM::VST4q16Pseudo_UPD,
4602                                            ARM::VST4q32Pseudo_UPD};
4603       static const uint16_t QOpcodes1[] = {ARM::VST4q8oddPseudo_UPD,
4604                                            ARM::VST4q16oddPseudo_UPD,
4605                                            ARM::VST4q32oddPseudo_UPD};
4606       SelectVST(N, true, 4, DOpcodes, QOpcodes0, QOpcodes1);
4607       return;
4608     }
4609     break;
4610   }
4611 
4612   case ARMISD::VST1x2_UPD: {
4613     if (Subtarget->hasNEON()) {
4614       static const uint16_t DOpcodes[] = { ARM::VST1q8wb_fixed,
4615                                            ARM::VST1q16wb_fixed,
4616                                            ARM::VST1q32wb_fixed,
4617                                            ARM::VST1q64wb_fixed};
4618       static const uint16_t QOpcodes[] = { ARM::VST1d8QPseudoWB_fixed,
4619                                            ARM::VST1d16QPseudoWB_fixed,
4620                                            ARM::VST1d32QPseudoWB_fixed,
4621                                            ARM::VST1d64QPseudoWB_fixed };
4622       SelectVST(N, true, 2, DOpcodes, QOpcodes, nullptr);
4623       return;
4624     }
4625     break;
4626   }
4627 
4628   case ARMISD::VST1x3_UPD: {
4629     if (Subtarget->hasNEON()) {
4630       static const uint16_t DOpcodes[] = { ARM::VST1d8TPseudoWB_fixed,
4631                                            ARM::VST1d16TPseudoWB_fixed,
4632                                            ARM::VST1d32TPseudoWB_fixed,
4633                                            ARM::VST1d64TPseudoWB_fixed };
4634       static const uint16_t QOpcodes0[] = { ARM::VST1q8LowTPseudo_UPD,
4635                                             ARM::VST1q16LowTPseudo_UPD,
4636                                             ARM::VST1q32LowTPseudo_UPD,
4637                                             ARM::VST1q64LowTPseudo_UPD };
4638       static const uint16_t QOpcodes1[] = { ARM::VST1q8HighTPseudo_UPD,
4639                                             ARM::VST1q16HighTPseudo_UPD,
4640                                             ARM::VST1q32HighTPseudo_UPD,
4641                                             ARM::VST1q64HighTPseudo_UPD };
4642       SelectVST(N, true, 3, DOpcodes, QOpcodes0, QOpcodes1);
4643       return;
4644     }
4645     break;
4646   }
4647 
4648   case ARMISD::VST1x4_UPD: {
4649     if (Subtarget->hasNEON()) {
4650       static const uint16_t DOpcodes[] = { ARM::VST1d8QPseudoWB_fixed,
4651                                            ARM::VST1d16QPseudoWB_fixed,
4652                                            ARM::VST1d32QPseudoWB_fixed,
4653                                            ARM::VST1d64QPseudoWB_fixed };
4654       static const uint16_t QOpcodes0[] = { ARM::VST1q8LowQPseudo_UPD,
4655                                             ARM::VST1q16LowQPseudo_UPD,
4656                                             ARM::VST1q32LowQPseudo_UPD,
4657                                             ARM::VST1q64LowQPseudo_UPD };
4658       static const uint16_t QOpcodes1[] = { ARM::VST1q8HighQPseudo_UPD,
4659                                             ARM::VST1q16HighQPseudo_UPD,
4660                                             ARM::VST1q32HighQPseudo_UPD,
4661                                             ARM::VST1q64HighQPseudo_UPD };
4662       SelectVST(N, true, 4, DOpcodes, QOpcodes0, QOpcodes1);
4663       return;
4664     }
4665     break;
4666   }
4667   case ARMISD::VST2LN_UPD: {
4668     static const uint16_t DOpcodes[] = { ARM::VST2LNd8Pseudo_UPD,
4669                                          ARM::VST2LNd16Pseudo_UPD,
4670                                          ARM::VST2LNd32Pseudo_UPD };
4671     static const uint16_t QOpcodes[] = { ARM::VST2LNq16Pseudo_UPD,
4672                                          ARM::VST2LNq32Pseudo_UPD };
4673     SelectVLDSTLane(N, false, true, 2, DOpcodes, QOpcodes);
4674     return;
4675   }
4676 
4677   case ARMISD::VST3LN_UPD: {
4678     static const uint16_t DOpcodes[] = { ARM::VST3LNd8Pseudo_UPD,
4679                                          ARM::VST3LNd16Pseudo_UPD,
4680                                          ARM::VST3LNd32Pseudo_UPD };
4681     static const uint16_t QOpcodes[] = { ARM::VST3LNq16Pseudo_UPD,
4682                                          ARM::VST3LNq32Pseudo_UPD };
4683     SelectVLDSTLane(N, false, true, 3, DOpcodes, QOpcodes);
4684     return;
4685   }
4686 
4687   case ARMISD::VST4LN_UPD: {
4688     static const uint16_t DOpcodes[] = { ARM::VST4LNd8Pseudo_UPD,
4689                                          ARM::VST4LNd16Pseudo_UPD,
4690                                          ARM::VST4LNd32Pseudo_UPD };
4691     static const uint16_t QOpcodes[] = { ARM::VST4LNq16Pseudo_UPD,
4692                                          ARM::VST4LNq32Pseudo_UPD };
4693     SelectVLDSTLane(N, false, true, 4, DOpcodes, QOpcodes);
4694     return;
4695   }
4696 
4697   case ISD::INTRINSIC_VOID:
4698   case ISD::INTRINSIC_W_CHAIN: {
4699     unsigned IntNo = N->getConstantOperandVal(1);
4700     switch (IntNo) {
4701     default:
4702       break;
4703 
4704     case Intrinsic::arm_mrrc:
4705     case Intrinsic::arm_mrrc2: {
4706       SDLoc dl(N);
4707       SDValue Chain = N->getOperand(0);
4708       unsigned Opc;
4709 
4710       if (Subtarget->isThumb())
4711         Opc = (IntNo == Intrinsic::arm_mrrc ? ARM::t2MRRC : ARM::t2MRRC2);
4712       else
4713         Opc = (IntNo == Intrinsic::arm_mrrc ? ARM::MRRC : ARM::MRRC2);
4714 
4715       SmallVector<SDValue, 5> Ops;
4716       Ops.push_back(getI32Imm(N->getConstantOperandVal(2), dl)); /* coproc */
4717       Ops.push_back(getI32Imm(N->getConstantOperandVal(3), dl)); /* opc */
4718       Ops.push_back(getI32Imm(N->getConstantOperandVal(4), dl)); /* CRm */
4719 
4720       // The mrrc2 instruction in ARM doesn't allow predicates, the top 4 bits of the encoded
4721       // instruction will always be '1111' but it is possible in assembly language to specify
4722       // AL as a predicate to mrrc2 but it doesn't make any difference to the encoded instruction.
4723       if (Opc != ARM::MRRC2) {
4724         Ops.push_back(getAL(CurDAG, dl));
4725         Ops.push_back(CurDAG->getRegister(0, MVT::i32));
4726       }
4727 
4728       Ops.push_back(Chain);
4729 
4730       // Writes to two registers.
4731       const EVT RetType[] = {MVT::i32, MVT::i32, MVT::Other};
4732 
4733       ReplaceNode(N, CurDAG->getMachineNode(Opc, dl, RetType, Ops));
4734       return;
4735     }
4736     case Intrinsic::arm_ldaexd:
4737     case Intrinsic::arm_ldrexd: {
4738       SDLoc dl(N);
4739       SDValue Chain = N->getOperand(0);
4740       SDValue MemAddr = N->getOperand(2);
4741       bool isThumb = Subtarget->isThumb() && Subtarget->hasV8MBaselineOps();
4742 
4743       bool IsAcquire = IntNo == Intrinsic::arm_ldaexd;
4744       unsigned NewOpc = isThumb ? (IsAcquire ? ARM::t2LDAEXD : ARM::t2LDREXD)
4745                                 : (IsAcquire ? ARM::LDAEXD : ARM::LDREXD);
4746 
4747       // arm_ldrexd returns a i64 value in {i32, i32}
4748       std::vector<EVT> ResTys;
4749       if (isThumb) {
4750         ResTys.push_back(MVT::i32);
4751         ResTys.push_back(MVT::i32);
4752       } else
4753         ResTys.push_back(MVT::Untyped);
4754       ResTys.push_back(MVT::Other);
4755 
4756       // Place arguments in the right order.
4757       SDValue Ops[] = {MemAddr, getAL(CurDAG, dl),
4758                        CurDAG->getRegister(0, MVT::i32), Chain};
4759       SDNode *Ld = CurDAG->getMachineNode(NewOpc, dl, ResTys, Ops);
4760       // Transfer memoperands.
4761       MachineMemOperand *MemOp = cast<MemIntrinsicSDNode>(N)->getMemOperand();
4762       CurDAG->setNodeMemRefs(cast<MachineSDNode>(Ld), {MemOp});
4763 
4764       // Remap uses.
4765       SDValue OutChain = isThumb ? SDValue(Ld, 2) : SDValue(Ld, 1);
4766       if (!SDValue(N, 0).use_empty()) {
4767         SDValue Result;
4768         if (isThumb)
4769           Result = SDValue(Ld, 0);
4770         else {
4771           SDValue SubRegIdx =
4772             CurDAG->getTargetConstant(ARM::gsub_0, dl, MVT::i32);
4773           SDNode *ResNode = CurDAG->getMachineNode(TargetOpcode::EXTRACT_SUBREG,
4774               dl, MVT::i32, SDValue(Ld, 0), SubRegIdx);
4775           Result = SDValue(ResNode,0);
4776         }
4777         ReplaceUses(SDValue(N, 0), Result);
4778       }
4779       if (!SDValue(N, 1).use_empty()) {
4780         SDValue Result;
4781         if (isThumb)
4782           Result = SDValue(Ld, 1);
4783         else {
4784           SDValue SubRegIdx =
4785             CurDAG->getTargetConstant(ARM::gsub_1, dl, MVT::i32);
4786           SDNode *ResNode = CurDAG->getMachineNode(TargetOpcode::EXTRACT_SUBREG,
4787               dl, MVT::i32, SDValue(Ld, 0), SubRegIdx);
4788           Result = SDValue(ResNode,0);
4789         }
4790         ReplaceUses(SDValue(N, 1), Result);
4791       }
4792       ReplaceUses(SDValue(N, 2), OutChain);
4793       CurDAG->RemoveDeadNode(N);
4794       return;
4795     }
4796     case Intrinsic::arm_stlexd:
4797     case Intrinsic::arm_strexd: {
4798       SDLoc dl(N);
4799       SDValue Chain = N->getOperand(0);
4800       SDValue Val0 = N->getOperand(2);
4801       SDValue Val1 = N->getOperand(3);
4802       SDValue MemAddr = N->getOperand(4);
4803 
4804       // Store exclusive double return a i32 value which is the return status
4805       // of the issued store.
4806       const EVT ResTys[] = {MVT::i32, MVT::Other};
4807 
4808       bool isThumb = Subtarget->isThumb() && Subtarget->hasThumb2();
4809       // Place arguments in the right order.
4810       SmallVector<SDValue, 7> Ops;
4811       if (isThumb) {
4812         Ops.push_back(Val0);
4813         Ops.push_back(Val1);
4814       } else
4815         // arm_strexd uses GPRPair.
4816         Ops.push_back(SDValue(createGPRPairNode(MVT::Untyped, Val0, Val1), 0));
4817       Ops.push_back(MemAddr);
4818       Ops.push_back(getAL(CurDAG, dl));
4819       Ops.push_back(CurDAG->getRegister(0, MVT::i32));
4820       Ops.push_back(Chain);
4821 
4822       bool IsRelease = IntNo == Intrinsic::arm_stlexd;
4823       unsigned NewOpc = isThumb ? (IsRelease ? ARM::t2STLEXD : ARM::t2STREXD)
4824                                 : (IsRelease ? ARM::STLEXD : ARM::STREXD);
4825 
4826       SDNode *St = CurDAG->getMachineNode(NewOpc, dl, ResTys, Ops);
4827       // Transfer memoperands.
4828       MachineMemOperand *MemOp = cast<MemIntrinsicSDNode>(N)->getMemOperand();
4829       CurDAG->setNodeMemRefs(cast<MachineSDNode>(St), {MemOp});
4830 
4831       ReplaceNode(N, St);
4832       return;
4833     }
4834 
4835     case Intrinsic::arm_neon_vld1: {
4836       static const uint16_t DOpcodes[] = { ARM::VLD1d8, ARM::VLD1d16,
4837                                            ARM::VLD1d32, ARM::VLD1d64 };
4838       static const uint16_t QOpcodes[] = { ARM::VLD1q8, ARM::VLD1q16,
4839                                            ARM::VLD1q32, ARM::VLD1q64};
4840       SelectVLD(N, false, 1, DOpcodes, QOpcodes, nullptr);
4841       return;
4842     }
4843 
4844     case Intrinsic::arm_neon_vld1x2: {
4845       static const uint16_t DOpcodes[] = { ARM::VLD1q8, ARM::VLD1q16,
4846                                            ARM::VLD1q32, ARM::VLD1q64 };
4847       static const uint16_t QOpcodes[] = { ARM::VLD1d8QPseudo,
4848                                            ARM::VLD1d16QPseudo,
4849                                            ARM::VLD1d32QPseudo,
4850                                            ARM::VLD1d64QPseudo };
4851       SelectVLD(N, false, 2, DOpcodes, QOpcodes, nullptr);
4852       return;
4853     }
4854 
4855     case Intrinsic::arm_neon_vld1x3: {
4856       static const uint16_t DOpcodes[] = { ARM::VLD1d8TPseudo,
4857                                            ARM::VLD1d16TPseudo,
4858                                            ARM::VLD1d32TPseudo,
4859                                            ARM::VLD1d64TPseudo };
4860       static const uint16_t QOpcodes0[] = { ARM::VLD1q8LowTPseudo_UPD,
4861                                             ARM::VLD1q16LowTPseudo_UPD,
4862                                             ARM::VLD1q32LowTPseudo_UPD,
4863                                             ARM::VLD1q64LowTPseudo_UPD };
4864       static const uint16_t QOpcodes1[] = { ARM::VLD1q8HighTPseudo,
4865                                             ARM::VLD1q16HighTPseudo,
4866                                             ARM::VLD1q32HighTPseudo,
4867                                             ARM::VLD1q64HighTPseudo };
4868       SelectVLD(N, false, 3, DOpcodes, QOpcodes0, QOpcodes1);
4869       return;
4870     }
4871 
4872     case Intrinsic::arm_neon_vld1x4: {
4873       static const uint16_t DOpcodes[] = { ARM::VLD1d8QPseudo,
4874                                            ARM::VLD1d16QPseudo,
4875                                            ARM::VLD1d32QPseudo,
4876                                            ARM::VLD1d64QPseudo };
4877       static const uint16_t QOpcodes0[] = { ARM::VLD1q8LowQPseudo_UPD,
4878                                             ARM::VLD1q16LowQPseudo_UPD,
4879                                             ARM::VLD1q32LowQPseudo_UPD,
4880                                             ARM::VLD1q64LowQPseudo_UPD };
4881       static const uint16_t QOpcodes1[] = { ARM::VLD1q8HighQPseudo,
4882                                             ARM::VLD1q16HighQPseudo,
4883                                             ARM::VLD1q32HighQPseudo,
4884                                             ARM::VLD1q64HighQPseudo };
4885       SelectVLD(N, false, 4, DOpcodes, QOpcodes0, QOpcodes1);
4886       return;
4887     }
4888 
4889     case Intrinsic::arm_neon_vld2: {
4890       static const uint16_t DOpcodes[] = { ARM::VLD2d8, ARM::VLD2d16,
4891                                            ARM::VLD2d32, ARM::VLD1q64 };
4892       static const uint16_t QOpcodes[] = { ARM::VLD2q8Pseudo, ARM::VLD2q16Pseudo,
4893                                            ARM::VLD2q32Pseudo };
4894       SelectVLD(N, false, 2, DOpcodes, QOpcodes, nullptr);
4895       return;
4896     }
4897 
4898     case Intrinsic::arm_neon_vld3: {
4899       static const uint16_t DOpcodes[] = { ARM::VLD3d8Pseudo,
4900                                            ARM::VLD3d16Pseudo,
4901                                            ARM::VLD3d32Pseudo,
4902                                            ARM::VLD1d64TPseudo };
4903       static const uint16_t QOpcodes0[] = { ARM::VLD3q8Pseudo_UPD,
4904                                             ARM::VLD3q16Pseudo_UPD,
4905                                             ARM::VLD3q32Pseudo_UPD };
4906       static const uint16_t QOpcodes1[] = { ARM::VLD3q8oddPseudo,
4907                                             ARM::VLD3q16oddPseudo,
4908                                             ARM::VLD3q32oddPseudo };
4909       SelectVLD(N, false, 3, DOpcodes, QOpcodes0, QOpcodes1);
4910       return;
4911     }
4912 
4913     case Intrinsic::arm_neon_vld4: {
4914       static const uint16_t DOpcodes[] = { ARM::VLD4d8Pseudo,
4915                                            ARM::VLD4d16Pseudo,
4916                                            ARM::VLD4d32Pseudo,
4917                                            ARM::VLD1d64QPseudo };
4918       static const uint16_t QOpcodes0[] = { ARM::VLD4q8Pseudo_UPD,
4919                                             ARM::VLD4q16Pseudo_UPD,
4920                                             ARM::VLD4q32Pseudo_UPD };
4921       static const uint16_t QOpcodes1[] = { ARM::VLD4q8oddPseudo,
4922                                             ARM::VLD4q16oddPseudo,
4923                                             ARM::VLD4q32oddPseudo };
4924       SelectVLD(N, false, 4, DOpcodes, QOpcodes0, QOpcodes1);
4925       return;
4926     }
4927 
4928     case Intrinsic::arm_neon_vld2dup: {
4929       static const uint16_t DOpcodes[] = { ARM::VLD2DUPd8, ARM::VLD2DUPd16,
4930                                            ARM::VLD2DUPd32, ARM::VLD1q64 };
4931       static const uint16_t QOpcodes0[] = { ARM::VLD2DUPq8EvenPseudo,
4932                                             ARM::VLD2DUPq16EvenPseudo,
4933                                             ARM::VLD2DUPq32EvenPseudo };
4934       static const uint16_t QOpcodes1[] = { ARM::VLD2DUPq8OddPseudo,
4935                                             ARM::VLD2DUPq16OddPseudo,
4936                                             ARM::VLD2DUPq32OddPseudo };
4937       SelectVLDDup(N, /* IsIntrinsic= */ true, false, 2,
4938                    DOpcodes, QOpcodes0, QOpcodes1);
4939       return;
4940     }
4941 
4942     case Intrinsic::arm_neon_vld3dup: {
4943       static const uint16_t DOpcodes[] = { ARM::VLD3DUPd8Pseudo,
4944                                            ARM::VLD3DUPd16Pseudo,
4945                                            ARM::VLD3DUPd32Pseudo,
4946                                            ARM::VLD1d64TPseudo };
4947       static const uint16_t QOpcodes0[] = { ARM::VLD3DUPq8EvenPseudo,
4948                                             ARM::VLD3DUPq16EvenPseudo,
4949                                             ARM::VLD3DUPq32EvenPseudo };
4950       static const uint16_t QOpcodes1[] = { ARM::VLD3DUPq8OddPseudo,
4951                                             ARM::VLD3DUPq16OddPseudo,
4952                                             ARM::VLD3DUPq32OddPseudo };
4953       SelectVLDDup(N, /* IsIntrinsic= */ true, false, 3,
4954                    DOpcodes, QOpcodes0, QOpcodes1);
4955       return;
4956     }
4957 
4958     case Intrinsic::arm_neon_vld4dup: {
4959       static const uint16_t DOpcodes[] = { ARM::VLD4DUPd8Pseudo,
4960                                            ARM::VLD4DUPd16Pseudo,
4961                                            ARM::VLD4DUPd32Pseudo,
4962                                            ARM::VLD1d64QPseudo };
4963       static const uint16_t QOpcodes0[] = { ARM::VLD4DUPq8EvenPseudo,
4964                                             ARM::VLD4DUPq16EvenPseudo,
4965                                             ARM::VLD4DUPq32EvenPseudo };
4966       static const uint16_t QOpcodes1[] = { ARM::VLD4DUPq8OddPseudo,
4967                                             ARM::VLD4DUPq16OddPseudo,
4968                                             ARM::VLD4DUPq32OddPseudo };
4969       SelectVLDDup(N, /* IsIntrinsic= */ true, false, 4,
4970                    DOpcodes, QOpcodes0, QOpcodes1);
4971       return;
4972     }
4973 
4974     case Intrinsic::arm_neon_vld2lane: {
4975       static const uint16_t DOpcodes[] = { ARM::VLD2LNd8Pseudo,
4976                                            ARM::VLD2LNd16Pseudo,
4977                                            ARM::VLD2LNd32Pseudo };
4978       static const uint16_t QOpcodes[] = { ARM::VLD2LNq16Pseudo,
4979                                            ARM::VLD2LNq32Pseudo };
4980       SelectVLDSTLane(N, true, false, 2, DOpcodes, QOpcodes);
4981       return;
4982     }
4983 
4984     case Intrinsic::arm_neon_vld3lane: {
4985       static const uint16_t DOpcodes[] = { ARM::VLD3LNd8Pseudo,
4986                                            ARM::VLD3LNd16Pseudo,
4987                                            ARM::VLD3LNd32Pseudo };
4988       static const uint16_t QOpcodes[] = { ARM::VLD3LNq16Pseudo,
4989                                            ARM::VLD3LNq32Pseudo };
4990       SelectVLDSTLane(N, true, false, 3, DOpcodes, QOpcodes);
4991       return;
4992     }
4993 
4994     case Intrinsic::arm_neon_vld4lane: {
4995       static const uint16_t DOpcodes[] = { ARM::VLD4LNd8Pseudo,
4996                                            ARM::VLD4LNd16Pseudo,
4997                                            ARM::VLD4LNd32Pseudo };
4998       static const uint16_t QOpcodes[] = { ARM::VLD4LNq16Pseudo,
4999                                            ARM::VLD4LNq32Pseudo };
5000       SelectVLDSTLane(N, true, false, 4, DOpcodes, QOpcodes);
5001       return;
5002     }
5003 
5004     case Intrinsic::arm_neon_vst1: {
5005       static const uint16_t DOpcodes[] = { ARM::VST1d8, ARM::VST1d16,
5006                                            ARM::VST1d32, ARM::VST1d64 };
5007       static const uint16_t QOpcodes[] = { ARM::VST1q8, ARM::VST1q16,
5008                                            ARM::VST1q32, ARM::VST1q64 };
5009       SelectVST(N, false, 1, DOpcodes, QOpcodes, nullptr);
5010       return;
5011     }
5012 
5013     case Intrinsic::arm_neon_vst1x2: {
5014       static const uint16_t DOpcodes[] = { ARM::VST1q8, ARM::VST1q16,
5015                                            ARM::VST1q32, ARM::VST1q64 };
5016       static const uint16_t QOpcodes[] = { ARM::VST1d8QPseudo,
5017                                            ARM::VST1d16QPseudo,
5018                                            ARM::VST1d32QPseudo,
5019                                            ARM::VST1d64QPseudo };
5020       SelectVST(N, false, 2, DOpcodes, QOpcodes, nullptr);
5021       return;
5022     }
5023 
5024     case Intrinsic::arm_neon_vst1x3: {
5025       static const uint16_t DOpcodes[] = { ARM::VST1d8TPseudo,
5026                                            ARM::VST1d16TPseudo,
5027                                            ARM::VST1d32TPseudo,
5028                                            ARM::VST1d64TPseudo };
5029       static const uint16_t QOpcodes0[] = { ARM::VST1q8LowTPseudo_UPD,
5030                                             ARM::VST1q16LowTPseudo_UPD,
5031                                             ARM::VST1q32LowTPseudo_UPD,
5032                                             ARM::VST1q64LowTPseudo_UPD };
5033       static const uint16_t QOpcodes1[] = { ARM::VST1q8HighTPseudo,
5034                                             ARM::VST1q16HighTPseudo,
5035                                             ARM::VST1q32HighTPseudo,
5036                                             ARM::VST1q64HighTPseudo };
5037       SelectVST(N, false, 3, DOpcodes, QOpcodes0, QOpcodes1);
5038       return;
5039     }
5040 
5041     case Intrinsic::arm_neon_vst1x4: {
5042       static const uint16_t DOpcodes[] = { ARM::VST1d8QPseudo,
5043                                            ARM::VST1d16QPseudo,
5044                                            ARM::VST1d32QPseudo,
5045                                            ARM::VST1d64QPseudo };
5046       static const uint16_t QOpcodes0[] = { ARM::VST1q8LowQPseudo_UPD,
5047                                             ARM::VST1q16LowQPseudo_UPD,
5048                                             ARM::VST1q32LowQPseudo_UPD,
5049                                             ARM::VST1q64LowQPseudo_UPD };
5050       static const uint16_t QOpcodes1[] = { ARM::VST1q8HighQPseudo,
5051                                             ARM::VST1q16HighQPseudo,
5052                                             ARM::VST1q32HighQPseudo,
5053                                             ARM::VST1q64HighQPseudo };
5054       SelectVST(N, false, 4, DOpcodes, QOpcodes0, QOpcodes1);
5055       return;
5056     }
5057 
5058     case Intrinsic::arm_neon_vst2: {
5059       static const uint16_t DOpcodes[] = { ARM::VST2d8, ARM::VST2d16,
5060                                            ARM::VST2d32, ARM::VST1q64 };
5061       static const uint16_t QOpcodes[] = { ARM::VST2q8Pseudo, ARM::VST2q16Pseudo,
5062                                            ARM::VST2q32Pseudo };
5063       SelectVST(N, false, 2, DOpcodes, QOpcodes, nullptr);
5064       return;
5065     }
5066 
5067     case Intrinsic::arm_neon_vst3: {
5068       static const uint16_t DOpcodes[] = { ARM::VST3d8Pseudo,
5069                                            ARM::VST3d16Pseudo,
5070                                            ARM::VST3d32Pseudo,
5071                                            ARM::VST1d64TPseudo };
5072       static const uint16_t QOpcodes0[] = { ARM::VST3q8Pseudo_UPD,
5073                                             ARM::VST3q16Pseudo_UPD,
5074                                             ARM::VST3q32Pseudo_UPD };
5075       static const uint16_t QOpcodes1[] = { ARM::VST3q8oddPseudo,
5076                                             ARM::VST3q16oddPseudo,
5077                                             ARM::VST3q32oddPseudo };
5078       SelectVST(N, false, 3, DOpcodes, QOpcodes0, QOpcodes1);
5079       return;
5080     }
5081 
5082     case Intrinsic::arm_neon_vst4: {
5083       static const uint16_t DOpcodes[] = { ARM::VST4d8Pseudo,
5084                                            ARM::VST4d16Pseudo,
5085                                            ARM::VST4d32Pseudo,
5086                                            ARM::VST1d64QPseudo };
5087       static const uint16_t QOpcodes0[] = { ARM::VST4q8Pseudo_UPD,
5088                                             ARM::VST4q16Pseudo_UPD,
5089                                             ARM::VST4q32Pseudo_UPD };
5090       static const uint16_t QOpcodes1[] = { ARM::VST4q8oddPseudo,
5091                                             ARM::VST4q16oddPseudo,
5092                                             ARM::VST4q32oddPseudo };
5093       SelectVST(N, false, 4, DOpcodes, QOpcodes0, QOpcodes1);
5094       return;
5095     }
5096 
5097     case Intrinsic::arm_neon_vst2lane: {
5098       static const uint16_t DOpcodes[] = { ARM::VST2LNd8Pseudo,
5099                                            ARM::VST2LNd16Pseudo,
5100                                            ARM::VST2LNd32Pseudo };
5101       static const uint16_t QOpcodes[] = { ARM::VST2LNq16Pseudo,
5102                                            ARM::VST2LNq32Pseudo };
5103       SelectVLDSTLane(N, false, false, 2, DOpcodes, QOpcodes);
5104       return;
5105     }
5106 
5107     case Intrinsic::arm_neon_vst3lane: {
5108       static const uint16_t DOpcodes[] = { ARM::VST3LNd8Pseudo,
5109                                            ARM::VST3LNd16Pseudo,
5110                                            ARM::VST3LNd32Pseudo };
5111       static const uint16_t QOpcodes[] = { ARM::VST3LNq16Pseudo,
5112                                            ARM::VST3LNq32Pseudo };
5113       SelectVLDSTLane(N, false, false, 3, DOpcodes, QOpcodes);
5114       return;
5115     }
5116 
5117     case Intrinsic::arm_neon_vst4lane: {
5118       static const uint16_t DOpcodes[] = { ARM::VST4LNd8Pseudo,
5119                                            ARM::VST4LNd16Pseudo,
5120                                            ARM::VST4LNd32Pseudo };
5121       static const uint16_t QOpcodes[] = { ARM::VST4LNq16Pseudo,
5122                                            ARM::VST4LNq32Pseudo };
5123       SelectVLDSTLane(N, false, false, 4, DOpcodes, QOpcodes);
5124       return;
5125     }
5126 
5127     case Intrinsic::arm_mve_vldr_gather_base_wb:
5128     case Intrinsic::arm_mve_vldr_gather_base_wb_predicated: {
5129       static const uint16_t Opcodes[] = {ARM::MVE_VLDRWU32_qi_pre,
5130                                          ARM::MVE_VLDRDU64_qi_pre};
5131       SelectMVE_WB(N, Opcodes,
5132                    IntNo == Intrinsic::arm_mve_vldr_gather_base_wb_predicated);
5133       return;
5134     }
5135 
5136     case Intrinsic::arm_mve_vld2q: {
5137       static const uint16_t Opcodes8[] = {ARM::MVE_VLD20_8, ARM::MVE_VLD21_8};
5138       static const uint16_t Opcodes16[] = {ARM::MVE_VLD20_16,
5139                                            ARM::MVE_VLD21_16};
5140       static const uint16_t Opcodes32[] = {ARM::MVE_VLD20_32,
5141                                            ARM::MVE_VLD21_32};
5142       static const uint16_t *const Opcodes[] = {Opcodes8, Opcodes16, Opcodes32};
5143       SelectMVE_VLD(N, 2, Opcodes, false);
5144       return;
5145     }
5146 
5147     case Intrinsic::arm_mve_vld4q: {
5148       static const uint16_t Opcodes8[] = {ARM::MVE_VLD40_8, ARM::MVE_VLD41_8,
5149                                           ARM::MVE_VLD42_8, ARM::MVE_VLD43_8};
5150       static const uint16_t Opcodes16[] = {ARM::MVE_VLD40_16, ARM::MVE_VLD41_16,
5151                                            ARM::MVE_VLD42_16,
5152                                            ARM::MVE_VLD43_16};
5153       static const uint16_t Opcodes32[] = {ARM::MVE_VLD40_32, ARM::MVE_VLD41_32,
5154                                            ARM::MVE_VLD42_32,
5155                                            ARM::MVE_VLD43_32};
5156       static const uint16_t *const Opcodes[] = {Opcodes8, Opcodes16, Opcodes32};
5157       SelectMVE_VLD(N, 4, Opcodes, false);
5158       return;
5159     }
5160     }
5161     break;
5162   }
5163 
5164   case ISD::INTRINSIC_WO_CHAIN: {
5165     unsigned IntNo = N->getConstantOperandVal(0);
5166     switch (IntNo) {
5167     default:
5168       break;
5169 
5170     // Scalar f32 -> bf16
5171     case Intrinsic::arm_neon_vcvtbfp2bf: {
5172       SDLoc dl(N);
5173       const SDValue &Src = N->getOperand(1);
5174       llvm::EVT DestTy = N->getValueType(0);
5175       SDValue Pred = getAL(CurDAG, dl);
5176       SDValue Reg0 = CurDAG->getRegister(0, MVT::i32);
5177       SDValue Ops[] = { Src, Src, Pred, Reg0 };
5178       CurDAG->SelectNodeTo(N, ARM::BF16_VCVTB, DestTy, Ops);
5179       return;
5180     }
5181 
5182     // Vector v4f32 -> v4bf16
5183     case Intrinsic::arm_neon_vcvtfp2bf: {
5184       SDLoc dl(N);
5185       const SDValue &Src = N->getOperand(1);
5186       SDValue Pred = getAL(CurDAG, dl);
5187       SDValue Reg0 = CurDAG->getRegister(0, MVT::i32);
5188       SDValue Ops[] = { Src, Pred, Reg0 };
5189       CurDAG->SelectNodeTo(N, ARM::BF16_VCVT, MVT::v4bf16, Ops);
5190       return;
5191     }
5192 
5193     case Intrinsic::arm_mve_urshrl:
5194       SelectMVE_LongShift(N, ARM::MVE_URSHRL, true, false);
5195       return;
5196     case Intrinsic::arm_mve_uqshll:
5197       SelectMVE_LongShift(N, ARM::MVE_UQSHLL, true, false);
5198       return;
5199     case Intrinsic::arm_mve_srshrl:
5200       SelectMVE_LongShift(N, ARM::MVE_SRSHRL, true, false);
5201       return;
5202     case Intrinsic::arm_mve_sqshll:
5203       SelectMVE_LongShift(N, ARM::MVE_SQSHLL, true, false);
5204       return;
5205     case Intrinsic::arm_mve_uqrshll:
5206       SelectMVE_LongShift(N, ARM::MVE_UQRSHLL, false, true);
5207       return;
5208     case Intrinsic::arm_mve_sqrshrl:
5209       SelectMVE_LongShift(N, ARM::MVE_SQRSHRL, false, true);
5210       return;
5211 
5212     case Intrinsic::arm_mve_vadc:
5213     case Intrinsic::arm_mve_vadc_predicated:
5214       SelectMVE_VADCSBC(N, ARM::MVE_VADC, ARM::MVE_VADCI, true,
5215                         IntNo == Intrinsic::arm_mve_vadc_predicated);
5216       return;
5217     case Intrinsic::arm_mve_vsbc:
5218     case Intrinsic::arm_mve_vsbc_predicated:
5219       SelectMVE_VADCSBC(N, ARM::MVE_VSBC, ARM::MVE_VSBCI, false,
5220                         IntNo == Intrinsic::arm_mve_vsbc_predicated);
5221       return;
5222     case Intrinsic::arm_mve_vshlc:
5223     case Intrinsic::arm_mve_vshlc_predicated:
5224       SelectMVE_VSHLC(N, IntNo == Intrinsic::arm_mve_vshlc_predicated);
5225       return;
5226 
5227     case Intrinsic::arm_mve_vmlldava:
5228     case Intrinsic::arm_mve_vmlldava_predicated: {
5229       static const uint16_t OpcodesU[] = {
5230           ARM::MVE_VMLALDAVu16,   ARM::MVE_VMLALDAVu32,
5231           ARM::MVE_VMLALDAVau16,  ARM::MVE_VMLALDAVau32,
5232       };
5233       static const uint16_t OpcodesS[] = {
5234           ARM::MVE_VMLALDAVs16,   ARM::MVE_VMLALDAVs32,
5235           ARM::MVE_VMLALDAVas16,  ARM::MVE_VMLALDAVas32,
5236           ARM::MVE_VMLALDAVxs16,  ARM::MVE_VMLALDAVxs32,
5237           ARM::MVE_VMLALDAVaxs16, ARM::MVE_VMLALDAVaxs32,
5238           ARM::MVE_VMLSLDAVs16,   ARM::MVE_VMLSLDAVs32,
5239           ARM::MVE_VMLSLDAVas16,  ARM::MVE_VMLSLDAVas32,
5240           ARM::MVE_VMLSLDAVxs16,  ARM::MVE_VMLSLDAVxs32,
5241           ARM::MVE_VMLSLDAVaxs16, ARM::MVE_VMLSLDAVaxs32,
5242       };
5243       SelectMVE_VMLLDAV(N, IntNo == Intrinsic::arm_mve_vmlldava_predicated,
5244                         OpcodesS, OpcodesU);
5245       return;
5246     }
5247 
5248     case Intrinsic::arm_mve_vrmlldavha:
5249     case Intrinsic::arm_mve_vrmlldavha_predicated: {
5250       static const uint16_t OpcodesU[] = {
5251           ARM::MVE_VRMLALDAVHu32,  ARM::MVE_VRMLALDAVHau32,
5252       };
5253       static const uint16_t OpcodesS[] = {
5254           ARM::MVE_VRMLALDAVHs32,  ARM::MVE_VRMLALDAVHas32,
5255           ARM::MVE_VRMLALDAVHxs32, ARM::MVE_VRMLALDAVHaxs32,
5256           ARM::MVE_VRMLSLDAVHs32,  ARM::MVE_VRMLSLDAVHas32,
5257           ARM::MVE_VRMLSLDAVHxs32, ARM::MVE_VRMLSLDAVHaxs32,
5258       };
5259       SelectMVE_VRMLLDAVH(N, IntNo == Intrinsic::arm_mve_vrmlldavha_predicated,
5260                           OpcodesS, OpcodesU);
5261       return;
5262     }
5263 
5264     case Intrinsic::arm_mve_vidup:
5265     case Intrinsic::arm_mve_vidup_predicated: {
5266       static const uint16_t Opcodes[] = {
5267           ARM::MVE_VIDUPu8, ARM::MVE_VIDUPu16, ARM::MVE_VIDUPu32,
5268       };
5269       SelectMVE_VxDUP(N, Opcodes, false,
5270                       IntNo == Intrinsic::arm_mve_vidup_predicated);
5271       return;
5272     }
5273 
5274     case Intrinsic::arm_mve_vddup:
5275     case Intrinsic::arm_mve_vddup_predicated: {
5276       static const uint16_t Opcodes[] = {
5277           ARM::MVE_VDDUPu8, ARM::MVE_VDDUPu16, ARM::MVE_VDDUPu32,
5278       };
5279       SelectMVE_VxDUP(N, Opcodes, false,
5280                       IntNo == Intrinsic::arm_mve_vddup_predicated);
5281       return;
5282     }
5283 
5284     case Intrinsic::arm_mve_viwdup:
5285     case Intrinsic::arm_mve_viwdup_predicated: {
5286       static const uint16_t Opcodes[] = {
5287           ARM::MVE_VIWDUPu8, ARM::MVE_VIWDUPu16, ARM::MVE_VIWDUPu32,
5288       };
5289       SelectMVE_VxDUP(N, Opcodes, true,
5290                       IntNo == Intrinsic::arm_mve_viwdup_predicated);
5291       return;
5292     }
5293 
5294     case Intrinsic::arm_mve_vdwdup:
5295     case Intrinsic::arm_mve_vdwdup_predicated: {
5296       static const uint16_t Opcodes[] = {
5297           ARM::MVE_VDWDUPu8, ARM::MVE_VDWDUPu16, ARM::MVE_VDWDUPu32,
5298       };
5299       SelectMVE_VxDUP(N, Opcodes, true,
5300                       IntNo == Intrinsic::arm_mve_vdwdup_predicated);
5301       return;
5302     }
5303 
5304     case Intrinsic::arm_cde_cx1d:
5305     case Intrinsic::arm_cde_cx1da:
5306     case Intrinsic::arm_cde_cx2d:
5307     case Intrinsic::arm_cde_cx2da:
5308     case Intrinsic::arm_cde_cx3d:
5309     case Intrinsic::arm_cde_cx3da: {
5310       bool HasAccum = IntNo == Intrinsic::arm_cde_cx1da ||
5311                       IntNo == Intrinsic::arm_cde_cx2da ||
5312                       IntNo == Intrinsic::arm_cde_cx3da;
5313       size_t NumExtraOps;
5314       uint16_t Opcode;
5315       switch (IntNo) {
5316       case Intrinsic::arm_cde_cx1d:
5317       case Intrinsic::arm_cde_cx1da:
5318         NumExtraOps = 0;
5319         Opcode = HasAccum ? ARM::CDE_CX1DA : ARM::CDE_CX1D;
5320         break;
5321       case Intrinsic::arm_cde_cx2d:
5322       case Intrinsic::arm_cde_cx2da:
5323         NumExtraOps = 1;
5324         Opcode = HasAccum ? ARM::CDE_CX2DA : ARM::CDE_CX2D;
5325         break;
5326       case Intrinsic::arm_cde_cx3d:
5327       case Intrinsic::arm_cde_cx3da:
5328         NumExtraOps = 2;
5329         Opcode = HasAccum ? ARM::CDE_CX3DA : ARM::CDE_CX3D;
5330         break;
5331       default:
5332         llvm_unreachable("Unexpected opcode");
5333       }
5334       SelectCDE_CXxD(N, Opcode, NumExtraOps, HasAccum);
5335       return;
5336     }
5337     }
5338     break;
5339   }
5340 
5341   case ISD::ATOMIC_CMP_SWAP:
5342     SelectCMP_SWAP(N);
5343     return;
5344   }
5345 
5346   SelectCode(N);
5347 }
5348 
5349 // Inspect a register string of the form
5350 // cp<coprocessor>:<opc1>:c<CRn>:c<CRm>:<opc2> (32bit) or
5351 // cp<coprocessor>:<opc1>:c<CRm> (64bit) inspect the fields of the string
5352 // and obtain the integer operands from them, adding these operands to the
5353 // provided vector.
5354 static void getIntOperandsFromRegisterString(StringRef RegString,
5355                                              SelectionDAG *CurDAG,
5356                                              const SDLoc &DL,
5357                                              std::vector<SDValue> &Ops) {
5358   SmallVector<StringRef, 5> Fields;
5359   RegString.split(Fields, ':');
5360 
5361   if (Fields.size() > 1) {
5362     bool AllIntFields = true;
5363 
5364     for (StringRef Field : Fields) {
5365       // Need to trim out leading 'cp' characters and get the integer field.
5366       unsigned IntField;
5367       AllIntFields &= !Field.trim("CPcp").getAsInteger(10, IntField);
5368       Ops.push_back(CurDAG->getTargetConstant(IntField, DL, MVT::i32));
5369     }
5370 
5371     assert(AllIntFields &&
5372             "Unexpected non-integer value in special register string.");
5373     (void)AllIntFields;
5374   }
5375 }
5376 
5377 // Maps a Banked Register string to its mask value. The mask value returned is
5378 // for use in the MRSbanked / MSRbanked instruction nodes as the Banked Register
5379 // mask operand, which expresses which register is to be used, e.g. r8, and in
5380 // which mode it is to be used, e.g. usr. Returns -1 to signify that the string
5381 // was invalid.
5382 static inline int getBankedRegisterMask(StringRef RegString) {
5383   auto TheReg = ARMBankedReg::lookupBankedRegByName(RegString.lower());
5384   if (!TheReg)
5385      return -1;
5386   return TheReg->Encoding;
5387 }
5388 
5389 // The flags here are common to those allowed for apsr in the A class cores and
5390 // those allowed for the special registers in the M class cores. Returns a
5391 // value representing which flags were present, -1 if invalid.
5392 static inline int getMClassFlagsMask(StringRef Flags) {
5393   return StringSwitch<int>(Flags)
5394           .Case("", 0x2) // no flags means nzcvq for psr registers, and 0x2 is
5395                          // correct when flags are not permitted
5396           .Case("g", 0x1)
5397           .Case("nzcvq", 0x2)
5398           .Case("nzcvqg", 0x3)
5399           .Default(-1);
5400 }
5401 
5402 // Maps MClass special registers string to its value for use in the
5403 // t2MRS_M/t2MSR_M instruction nodes as the SYSm value operand.
5404 // Returns -1 to signify that the string was invalid.
5405 static int getMClassRegisterMask(StringRef Reg, const ARMSubtarget *Subtarget) {
5406   auto TheReg = ARMSysReg::lookupMClassSysRegByName(Reg);
5407   const FeatureBitset &FeatureBits = Subtarget->getFeatureBits();
5408   if (!TheReg || !TheReg->hasRequiredFeatures(FeatureBits))
5409     return -1;
5410   return (int)(TheReg->Encoding & 0xFFF); // SYSm value
5411 }
5412 
5413 static int getARClassRegisterMask(StringRef Reg, StringRef Flags) {
5414   // The mask operand contains the special register (R Bit) in bit 4, whether
5415   // the register is spsr (R bit is 1) or one of cpsr/apsr (R bit is 0), and
5416   // bits 3-0 contains the fields to be accessed in the special register, set by
5417   // the flags provided with the register.
5418   int Mask = 0;
5419   if (Reg == "apsr") {
5420     // The flags permitted for apsr are the same flags that are allowed in
5421     // M class registers. We get the flag value and then shift the flags into
5422     // the correct place to combine with the mask.
5423     Mask = getMClassFlagsMask(Flags);
5424     if (Mask == -1)
5425       return -1;
5426     return Mask << 2;
5427   }
5428 
5429   if (Reg != "cpsr" && Reg != "spsr") {
5430     return -1;
5431   }
5432 
5433   // This is the same as if the flags were "fc"
5434   if (Flags.empty() || Flags == "all")
5435     return Mask | 0x9;
5436 
5437   // Inspect the supplied flags string and set the bits in the mask for
5438   // the relevant and valid flags allowed for cpsr and spsr.
5439   for (char Flag : Flags) {
5440     int FlagVal;
5441     switch (Flag) {
5442       case 'c':
5443         FlagVal = 0x1;
5444         break;
5445       case 'x':
5446         FlagVal = 0x2;
5447         break;
5448       case 's':
5449         FlagVal = 0x4;
5450         break;
5451       case 'f':
5452         FlagVal = 0x8;
5453         break;
5454       default:
5455         FlagVal = 0;
5456     }
5457 
5458     // This avoids allowing strings where the same flag bit appears twice.
5459     if (!FlagVal || (Mask & FlagVal))
5460       return -1;
5461     Mask |= FlagVal;
5462   }
5463 
5464   // If the register is spsr then we need to set the R bit.
5465   if (Reg == "spsr")
5466     Mask |= 0x10;
5467 
5468   return Mask;
5469 }
5470 
5471 // Lower the read_register intrinsic to ARM specific DAG nodes
5472 // using the supplied metadata string to select the instruction node to use
5473 // and the registers/masks to construct as operands for the node.
5474 bool ARMDAGToDAGISel::tryReadRegister(SDNode *N){
5475   const auto *MD = cast<MDNodeSDNode>(N->getOperand(1));
5476   const auto *RegString = cast<MDString>(MD->getMD()->getOperand(0));
5477   bool IsThumb2 = Subtarget->isThumb2();
5478   SDLoc DL(N);
5479 
5480   std::vector<SDValue> Ops;
5481   getIntOperandsFromRegisterString(RegString->getString(), CurDAG, DL, Ops);
5482 
5483   if (!Ops.empty()) {
5484     // If the special register string was constructed of fields (as defined
5485     // in the ACLE) then need to lower to MRC node (32 bit) or
5486     // MRRC node(64 bit), we can make the distinction based on the number of
5487     // operands we have.
5488     unsigned Opcode;
5489     SmallVector<EVT, 3> ResTypes;
5490     if (Ops.size() == 5){
5491       Opcode = IsThumb2 ? ARM::t2MRC : ARM::MRC;
5492       ResTypes.append({ MVT::i32, MVT::Other });
5493     } else {
5494       assert(Ops.size() == 3 &&
5495               "Invalid number of fields in special register string.");
5496       Opcode = IsThumb2 ? ARM::t2MRRC : ARM::MRRC;
5497       ResTypes.append({ MVT::i32, MVT::i32, MVT::Other });
5498     }
5499 
5500     Ops.push_back(getAL(CurDAG, DL));
5501     Ops.push_back(CurDAG->getRegister(0, MVT::i32));
5502     Ops.push_back(N->getOperand(0));
5503     ReplaceNode(N, CurDAG->getMachineNode(Opcode, DL, ResTypes, Ops));
5504     return true;
5505   }
5506 
5507   std::string SpecialReg = RegString->getString().lower();
5508 
5509   int BankedReg = getBankedRegisterMask(SpecialReg);
5510   if (BankedReg != -1) {
5511     Ops = { CurDAG->getTargetConstant(BankedReg, DL, MVT::i32),
5512             getAL(CurDAG, DL), CurDAG->getRegister(0, MVT::i32),
5513             N->getOperand(0) };
5514     ReplaceNode(
5515         N, CurDAG->getMachineNode(IsThumb2 ? ARM::t2MRSbanked : ARM::MRSbanked,
5516                                   DL, MVT::i32, MVT::Other, Ops));
5517     return true;
5518   }
5519 
5520   // The VFP registers are read by creating SelectionDAG nodes with opcodes
5521   // corresponding to the register that is being read from. So we switch on the
5522   // string to find which opcode we need to use.
5523   unsigned Opcode = StringSwitch<unsigned>(SpecialReg)
5524                     .Case("fpscr", ARM::VMRS)
5525                     .Case("fpexc", ARM::VMRS_FPEXC)
5526                     .Case("fpsid", ARM::VMRS_FPSID)
5527                     .Case("mvfr0", ARM::VMRS_MVFR0)
5528                     .Case("mvfr1", ARM::VMRS_MVFR1)
5529                     .Case("mvfr2", ARM::VMRS_MVFR2)
5530                     .Case("fpinst", ARM::VMRS_FPINST)
5531                     .Case("fpinst2", ARM::VMRS_FPINST2)
5532                     .Default(0);
5533 
5534   // If an opcode was found then we can lower the read to a VFP instruction.
5535   if (Opcode) {
5536     if (!Subtarget->hasVFP2Base())
5537       return false;
5538     if (Opcode == ARM::VMRS_MVFR2 && !Subtarget->hasFPARMv8Base())
5539       return false;
5540 
5541     Ops = { getAL(CurDAG, DL), CurDAG->getRegister(0, MVT::i32),
5542             N->getOperand(0) };
5543     ReplaceNode(N,
5544                 CurDAG->getMachineNode(Opcode, DL, MVT::i32, MVT::Other, Ops));
5545     return true;
5546   }
5547 
5548   // If the target is M Class then need to validate that the register string
5549   // is an acceptable value, so check that a mask can be constructed from the
5550   // string.
5551   if (Subtarget->isMClass()) {
5552     int SYSmValue = getMClassRegisterMask(SpecialReg, Subtarget);
5553     if (SYSmValue == -1)
5554       return false;
5555 
5556     SDValue Ops[] = { CurDAG->getTargetConstant(SYSmValue, DL, MVT::i32),
5557                       getAL(CurDAG, DL), CurDAG->getRegister(0, MVT::i32),
5558                       N->getOperand(0) };
5559     ReplaceNode(
5560         N, CurDAG->getMachineNode(ARM::t2MRS_M, DL, MVT::i32, MVT::Other, Ops));
5561     return true;
5562   }
5563 
5564   // Here we know the target is not M Class so we need to check if it is one
5565   // of the remaining possible values which are apsr, cpsr or spsr.
5566   if (SpecialReg == "apsr" || SpecialReg == "cpsr") {
5567     Ops = { getAL(CurDAG, DL), CurDAG->getRegister(0, MVT::i32),
5568             N->getOperand(0) };
5569     ReplaceNode(N, CurDAG->getMachineNode(IsThumb2 ? ARM::t2MRS_AR : ARM::MRS,
5570                                           DL, MVT::i32, MVT::Other, Ops));
5571     return true;
5572   }
5573 
5574   if (SpecialReg == "spsr") {
5575     Ops = { getAL(CurDAG, DL), CurDAG->getRegister(0, MVT::i32),
5576             N->getOperand(0) };
5577     ReplaceNode(
5578         N, CurDAG->getMachineNode(IsThumb2 ? ARM::t2MRSsys_AR : ARM::MRSsys, DL,
5579                                   MVT::i32, MVT::Other, Ops));
5580     return true;
5581   }
5582 
5583   return false;
5584 }
5585 
5586 // Lower the write_register intrinsic to ARM specific DAG nodes
5587 // using the supplied metadata string to select the instruction node to use
5588 // and the registers/masks to use in the nodes
5589 bool ARMDAGToDAGISel::tryWriteRegister(SDNode *N){
5590   const auto *MD = cast<MDNodeSDNode>(N->getOperand(1));
5591   const auto *RegString = cast<MDString>(MD->getMD()->getOperand(0));
5592   bool IsThumb2 = Subtarget->isThumb2();
5593   SDLoc DL(N);
5594 
5595   std::vector<SDValue> Ops;
5596   getIntOperandsFromRegisterString(RegString->getString(), CurDAG, DL, Ops);
5597 
5598   if (!Ops.empty()) {
5599     // If the special register string was constructed of fields (as defined
5600     // in the ACLE) then need to lower to MCR node (32 bit) or
5601     // MCRR node(64 bit), we can make the distinction based on the number of
5602     // operands we have.
5603     unsigned Opcode;
5604     if (Ops.size() == 5) {
5605       Opcode = IsThumb2 ? ARM::t2MCR : ARM::MCR;
5606       Ops.insert(Ops.begin()+2, N->getOperand(2));
5607     } else {
5608       assert(Ops.size() == 3 &&
5609               "Invalid number of fields in special register string.");
5610       Opcode = IsThumb2 ? ARM::t2MCRR : ARM::MCRR;
5611       SDValue WriteValue[] = { N->getOperand(2), N->getOperand(3) };
5612       Ops.insert(Ops.begin()+2, WriteValue, WriteValue+2);
5613     }
5614 
5615     Ops.push_back(getAL(CurDAG, DL));
5616     Ops.push_back(CurDAG->getRegister(0, MVT::i32));
5617     Ops.push_back(N->getOperand(0));
5618 
5619     ReplaceNode(N, CurDAG->getMachineNode(Opcode, DL, MVT::Other, Ops));
5620     return true;
5621   }
5622 
5623   std::string SpecialReg = RegString->getString().lower();
5624   int BankedReg = getBankedRegisterMask(SpecialReg);
5625   if (BankedReg != -1) {
5626     Ops = { CurDAG->getTargetConstant(BankedReg, DL, MVT::i32), N->getOperand(2),
5627             getAL(CurDAG, DL), CurDAG->getRegister(0, MVT::i32),
5628             N->getOperand(0) };
5629     ReplaceNode(
5630         N, CurDAG->getMachineNode(IsThumb2 ? ARM::t2MSRbanked : ARM::MSRbanked,
5631                                   DL, MVT::Other, Ops));
5632     return true;
5633   }
5634 
5635   // The VFP registers are written to by creating SelectionDAG nodes with
5636   // opcodes corresponding to the register that is being written. So we switch
5637   // on the string to find which opcode we need to use.
5638   unsigned Opcode = StringSwitch<unsigned>(SpecialReg)
5639                     .Case("fpscr", ARM::VMSR)
5640                     .Case("fpexc", ARM::VMSR_FPEXC)
5641                     .Case("fpsid", ARM::VMSR_FPSID)
5642                     .Case("fpinst", ARM::VMSR_FPINST)
5643                     .Case("fpinst2", ARM::VMSR_FPINST2)
5644                     .Default(0);
5645 
5646   if (Opcode) {
5647     if (!Subtarget->hasVFP2Base())
5648       return false;
5649     Ops = { N->getOperand(2), getAL(CurDAG, DL),
5650             CurDAG->getRegister(0, MVT::i32), N->getOperand(0) };
5651     ReplaceNode(N, CurDAG->getMachineNode(Opcode, DL, MVT::Other, Ops));
5652     return true;
5653   }
5654 
5655   std::pair<StringRef, StringRef> Fields;
5656   Fields = StringRef(SpecialReg).rsplit('_');
5657   std::string Reg = Fields.first.str();
5658   StringRef Flags = Fields.second;
5659 
5660   // If the target was M Class then need to validate the special register value
5661   // and retrieve the mask for use in the instruction node.
5662   if (Subtarget->isMClass()) {
5663     int SYSmValue = getMClassRegisterMask(SpecialReg, Subtarget);
5664     if (SYSmValue == -1)
5665       return false;
5666 
5667     SDValue Ops[] = { CurDAG->getTargetConstant(SYSmValue, DL, MVT::i32),
5668                       N->getOperand(2), getAL(CurDAG, DL),
5669                       CurDAG->getRegister(0, MVT::i32), N->getOperand(0) };
5670     ReplaceNode(N, CurDAG->getMachineNode(ARM::t2MSR_M, DL, MVT::Other, Ops));
5671     return true;
5672   }
5673 
5674   // We then check to see if a valid mask can be constructed for one of the
5675   // register string values permitted for the A and R class cores. These values
5676   // are apsr, spsr and cpsr; these are also valid on older cores.
5677   int Mask = getARClassRegisterMask(Reg, Flags);
5678   if (Mask != -1) {
5679     Ops = { CurDAG->getTargetConstant(Mask, DL, MVT::i32), N->getOperand(2),
5680             getAL(CurDAG, DL), CurDAG->getRegister(0, MVT::i32),
5681             N->getOperand(0) };
5682     ReplaceNode(N, CurDAG->getMachineNode(IsThumb2 ? ARM::t2MSR_AR : ARM::MSR,
5683                                           DL, MVT::Other, Ops));
5684     return true;
5685   }
5686 
5687   return false;
5688 }
5689 
5690 bool ARMDAGToDAGISel::tryInlineAsm(SDNode *N){
5691   std::vector<SDValue> AsmNodeOperands;
5692   InlineAsm::Flag Flag;
5693   bool Changed = false;
5694   unsigned NumOps = N->getNumOperands();
5695 
5696   // Normally, i64 data is bounded to two arbitrary GRPs for "%r" constraint.
5697   // However, some instrstions (e.g. ldrexd/strexd in ARM mode) require
5698   // (even/even+1) GPRs and use %n and %Hn to refer to the individual regs
5699   // respectively. Since there is no constraint to explicitly specify a
5700   // reg pair, we use GPRPair reg class for "%r" for 64-bit data. For Thumb,
5701   // the 64-bit data may be referred by H, Q, R modifiers, so we still pack
5702   // them into a GPRPair.
5703 
5704   SDLoc dl(N);
5705   SDValue Glue = N->getGluedNode() ? N->getOperand(NumOps - 1) : SDValue();
5706 
5707   SmallVector<bool, 8> OpChanged;
5708   // Glue node will be appended late.
5709   for(unsigned i = 0, e = N->getGluedNode() ? NumOps - 1 : NumOps; i < e; ++i) {
5710     SDValue op = N->getOperand(i);
5711     AsmNodeOperands.push_back(op);
5712 
5713     if (i < InlineAsm::Op_FirstOperand)
5714       continue;
5715 
5716     if (const auto *C = dyn_cast<ConstantSDNode>(N->getOperand(i)))
5717       Flag = InlineAsm::Flag(C->getZExtValue());
5718     else
5719       continue;
5720 
5721     // Immediate operands to inline asm in the SelectionDAG are modeled with
5722     // two operands. The first is a constant of value InlineAsm::Kind::Imm, and
5723     // the second is a constant with the value of the immediate. If we get here
5724     // and we have a Kind::Imm, skip the next operand, and continue.
5725     if (Flag.isImmKind()) {
5726       SDValue op = N->getOperand(++i);
5727       AsmNodeOperands.push_back(op);
5728       continue;
5729     }
5730 
5731     const unsigned NumRegs = Flag.getNumOperandRegisters();
5732     if (NumRegs)
5733       OpChanged.push_back(false);
5734 
5735     unsigned DefIdx = 0;
5736     bool IsTiedToChangedOp = false;
5737     // If it's a use that is tied with a previous def, it has no
5738     // reg class constraint.
5739     if (Changed && Flag.isUseOperandTiedToDef(DefIdx))
5740       IsTiedToChangedOp = OpChanged[DefIdx];
5741 
5742     // Memory operands to inline asm in the SelectionDAG are modeled with two
5743     // operands: a constant of value InlineAsm::Kind::Mem followed by the input
5744     // operand. If we get here and we have a Kind::Mem, skip the next operand
5745     // (so it doesn't get misinterpreted), and continue. We do this here because
5746     // it's important to update the OpChanged array correctly before moving on.
5747     if (Flag.isMemKind()) {
5748       SDValue op = N->getOperand(++i);
5749       AsmNodeOperands.push_back(op);
5750       continue;
5751     }
5752 
5753     if (!Flag.isRegUseKind() && !Flag.isRegDefKind() &&
5754         !Flag.isRegDefEarlyClobberKind())
5755       continue;
5756 
5757     unsigned RC;
5758     const bool HasRC = Flag.hasRegClassConstraint(RC);
5759     if ((!IsTiedToChangedOp && (!HasRC || RC != ARM::GPRRegClassID))
5760         || NumRegs != 2)
5761       continue;
5762 
5763     assert((i+2 < NumOps) && "Invalid number of operands in inline asm");
5764     SDValue V0 = N->getOperand(i+1);
5765     SDValue V1 = N->getOperand(i+2);
5766     Register Reg0 = cast<RegisterSDNode>(V0)->getReg();
5767     Register Reg1 = cast<RegisterSDNode>(V1)->getReg();
5768     SDValue PairedReg;
5769     MachineRegisterInfo &MRI = MF->getRegInfo();
5770 
5771     if (Flag.isRegDefKind() || Flag.isRegDefEarlyClobberKind()) {
5772       // Replace the two GPRs with 1 GPRPair and copy values from GPRPair to
5773       // the original GPRs.
5774 
5775       Register GPVR = MRI.createVirtualRegister(&ARM::GPRPairRegClass);
5776       PairedReg = CurDAG->getRegister(GPVR, MVT::Untyped);
5777       SDValue Chain = SDValue(N,0);
5778 
5779       SDNode *GU = N->getGluedUser();
5780       SDValue RegCopy = CurDAG->getCopyFromReg(Chain, dl, GPVR, MVT::Untyped,
5781                                                Chain.getValue(1));
5782 
5783       // Extract values from a GPRPair reg and copy to the original GPR reg.
5784       SDValue Sub0 = CurDAG->getTargetExtractSubreg(ARM::gsub_0, dl, MVT::i32,
5785                                                     RegCopy);
5786       SDValue Sub1 = CurDAG->getTargetExtractSubreg(ARM::gsub_1, dl, MVT::i32,
5787                                                     RegCopy);
5788       SDValue T0 = CurDAG->getCopyToReg(Sub0, dl, Reg0, Sub0,
5789                                         RegCopy.getValue(1));
5790       SDValue T1 = CurDAG->getCopyToReg(Sub1, dl, Reg1, Sub1, T0.getValue(1));
5791 
5792       // Update the original glue user.
5793       std::vector<SDValue> Ops(GU->op_begin(), GU->op_end()-1);
5794       Ops.push_back(T1.getValue(1));
5795       CurDAG->UpdateNodeOperands(GU, Ops);
5796     } else {
5797       // For Kind  == InlineAsm::Kind::RegUse, we first copy two GPRs into a
5798       // GPRPair and then pass the GPRPair to the inline asm.
5799       SDValue Chain = AsmNodeOperands[InlineAsm::Op_InputChain];
5800 
5801       // As REG_SEQ doesn't take RegisterSDNode, we copy them first.
5802       SDValue T0 = CurDAG->getCopyFromReg(Chain, dl, Reg0, MVT::i32,
5803                                           Chain.getValue(1));
5804       SDValue T1 = CurDAG->getCopyFromReg(Chain, dl, Reg1, MVT::i32,
5805                                           T0.getValue(1));
5806       SDValue Pair = SDValue(createGPRPairNode(MVT::Untyped, T0, T1), 0);
5807 
5808       // Copy REG_SEQ into a GPRPair-typed VR and replace the original two
5809       // i32 VRs of inline asm with it.
5810       Register GPVR = MRI.createVirtualRegister(&ARM::GPRPairRegClass);
5811       PairedReg = CurDAG->getRegister(GPVR, MVT::Untyped);
5812       Chain = CurDAG->getCopyToReg(T1, dl, GPVR, Pair, T1.getValue(1));
5813 
5814       AsmNodeOperands[InlineAsm::Op_InputChain] = Chain;
5815       Glue = Chain.getValue(1);
5816     }
5817 
5818     Changed = true;
5819 
5820     if(PairedReg.getNode()) {
5821       OpChanged[OpChanged.size() -1 ] = true;
5822       Flag = InlineAsm::Flag(Flag.getKind(), 1 /* RegNum*/);
5823       if (IsTiedToChangedOp)
5824         Flag.setMatchingOp(DefIdx);
5825       else
5826         Flag.setRegClass(ARM::GPRPairRegClassID);
5827       // Replace the current flag.
5828       AsmNodeOperands[AsmNodeOperands.size() -1] = CurDAG->getTargetConstant(
5829           Flag, dl, MVT::i32);
5830       // Add the new register node and skip the original two GPRs.
5831       AsmNodeOperands.push_back(PairedReg);
5832       // Skip the next two GPRs.
5833       i += 2;
5834     }
5835   }
5836 
5837   if (Glue.getNode())
5838     AsmNodeOperands.push_back(Glue);
5839   if (!Changed)
5840     return false;
5841 
5842   SDValue New = CurDAG->getNode(N->getOpcode(), SDLoc(N),
5843       CurDAG->getVTList(MVT::Other, MVT::Glue), AsmNodeOperands);
5844   New->setNodeId(-1);
5845   ReplaceNode(N, New.getNode());
5846   return true;
5847 }
5848 
5849 bool ARMDAGToDAGISel::SelectInlineAsmMemoryOperand(
5850     const SDValue &Op, InlineAsm::ConstraintCode ConstraintID,
5851     std::vector<SDValue> &OutOps) {
5852   switch(ConstraintID) {
5853   default:
5854     llvm_unreachable("Unexpected asm memory constraint");
5855   case InlineAsm::ConstraintCode::m:
5856   case InlineAsm::ConstraintCode::o:
5857   case InlineAsm::ConstraintCode::Q:
5858   case InlineAsm::ConstraintCode::Um:
5859   case InlineAsm::ConstraintCode::Un:
5860   case InlineAsm::ConstraintCode::Uq:
5861   case InlineAsm::ConstraintCode::Us:
5862   case InlineAsm::ConstraintCode::Ut:
5863   case InlineAsm::ConstraintCode::Uv:
5864   case InlineAsm::ConstraintCode::Uy:
5865     // Require the address to be in a register.  That is safe for all ARM
5866     // variants and it is hard to do anything much smarter without knowing
5867     // how the operand is used.
5868     OutOps.push_back(Op);
5869     return false;
5870   }
5871   return true;
5872 }
5873 
5874 /// createARMISelDag - This pass converts a legalized DAG into a
5875 /// ARM-specific DAG, ready for instruction scheduling.
5876 ///
5877 FunctionPass *llvm::createARMISelDag(ARMBaseTargetMachine &TM,
5878                                      CodeGenOptLevel OptLevel) {
5879   return new ARMDAGToDAGISelLegacy(TM, OptLevel);
5880 }
5881