xref: /llvm-project/llvm/lib/Target/AArch64/AArch64ExpandPseudoInsts.cpp (revision 19032bfe87fa0f4a3a7b3e68daafc93331b71e0d)
1 //===- AArch64ExpandPseudoInsts.cpp - Expand pseudo instructions ----------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This file contains a pass that expands pseudo instructions into target
10 // instructions to allow proper scheduling and other late optimizations.  This
11 // pass should be run after register allocation but before the post-regalloc
12 // scheduling pass.
13 //
14 //===----------------------------------------------------------------------===//
15 
16 #include "AArch64ExpandImm.h"
17 #include "AArch64InstrInfo.h"
18 #include "AArch64MachineFunctionInfo.h"
19 #include "AArch64Subtarget.h"
20 #include "MCTargetDesc/AArch64AddressingModes.h"
21 #include "Utils/AArch64BaseInfo.h"
22 #include "llvm/CodeGen/LivePhysRegs.h"
23 #include "llvm/CodeGen/MachineBasicBlock.h"
24 #include "llvm/CodeGen/MachineConstantPool.h"
25 #include "llvm/CodeGen/MachineFunction.h"
26 #include "llvm/CodeGen/MachineFunctionPass.h"
27 #include "llvm/CodeGen/MachineInstr.h"
28 #include "llvm/CodeGen/MachineInstrBuilder.h"
29 #include "llvm/CodeGen/MachineOperand.h"
30 #include "llvm/CodeGen/TargetSubtargetInfo.h"
31 #include "llvm/IR/DebugLoc.h"
32 #include "llvm/MC/MCInstrDesc.h"
33 #include "llvm/Pass.h"
34 #include "llvm/Support/CodeGen.h"
35 #include "llvm/Target/TargetMachine.h"
36 #include "llvm/TargetParser/Triple.h"
37 #include <cassert>
38 #include <cstdint>
39 #include <iterator>
40 
41 using namespace llvm;
42 
43 #define AARCH64_EXPAND_PSEUDO_NAME "AArch64 pseudo instruction expansion pass"
44 
45 namespace {
46 
47 class AArch64ExpandPseudo : public MachineFunctionPass {
48 public:
49   const AArch64InstrInfo *TII;
50 
51   static char ID;
52 
53   AArch64ExpandPseudo() : MachineFunctionPass(ID) {
54     initializeAArch64ExpandPseudoPass(*PassRegistry::getPassRegistry());
55   }
56 
57   bool runOnMachineFunction(MachineFunction &Fn) override;
58 
59   StringRef getPassName() const override { return AARCH64_EXPAND_PSEUDO_NAME; }
60 
61 private:
62   bool expandMBB(MachineBasicBlock &MBB);
63   bool expandMI(MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI,
64                 MachineBasicBlock::iterator &NextMBBI);
65   bool expandMultiVecPseudo(MachineBasicBlock &MBB,
66                             MachineBasicBlock::iterator MBBI,
67                             TargetRegisterClass ContiguousClass,
68                             TargetRegisterClass StridedClass,
69                             unsigned ContiguousOpc, unsigned StridedOpc);
70   bool expandFormTuplePseudo(MachineBasicBlock &MBB,
71                              MachineBasicBlock::iterator MBBI,
72                              MachineBasicBlock::iterator &NextMBBI,
73                              unsigned Size);
74   bool expandMOVImm(MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI,
75                     unsigned BitSize);
76 
77   bool expand_DestructiveOp(MachineInstr &MI, MachineBasicBlock &MBB,
78                             MachineBasicBlock::iterator MBBI);
79   bool expandCMP_SWAP(MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI,
80                       unsigned LdarOp, unsigned StlrOp, unsigned CmpOp,
81                       unsigned ExtendImm, unsigned ZeroReg,
82                       MachineBasicBlock::iterator &NextMBBI);
83   bool expandCMP_SWAP_128(MachineBasicBlock &MBB,
84                           MachineBasicBlock::iterator MBBI,
85                           MachineBasicBlock::iterator &NextMBBI);
86   bool expandSetTagLoop(MachineBasicBlock &MBB,
87                         MachineBasicBlock::iterator MBBI,
88                         MachineBasicBlock::iterator &NextMBBI);
89   bool expandSVESpillFill(MachineBasicBlock &MBB,
90                           MachineBasicBlock::iterator MBBI, unsigned Opc,
91                           unsigned N);
92   bool expandCALL_RVMARKER(MachineBasicBlock &MBB,
93                            MachineBasicBlock::iterator MBBI);
94   bool expandCALL_BTI(MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI);
95   bool expandStoreSwiftAsyncContext(MachineBasicBlock &MBB,
96                                     MachineBasicBlock::iterator MBBI);
97   MachineBasicBlock *expandRestoreZA(MachineBasicBlock &MBB,
98                                      MachineBasicBlock::iterator MBBI);
99   MachineBasicBlock *expandCondSMToggle(MachineBasicBlock &MBB,
100                                         MachineBasicBlock::iterator MBBI);
101 };
102 
103 } // end anonymous namespace
104 
105 char AArch64ExpandPseudo::ID = 0;
106 
107 INITIALIZE_PASS(AArch64ExpandPseudo, "aarch64-expand-pseudo",
108                 AARCH64_EXPAND_PSEUDO_NAME, false, false)
109 
110 /// Transfer implicit operands on the pseudo instruction to the
111 /// instructions created from the expansion.
112 static void transferImpOps(MachineInstr &OldMI, MachineInstrBuilder &UseMI,
113                            MachineInstrBuilder &DefMI) {
114   const MCInstrDesc &Desc = OldMI.getDesc();
115   for (const MachineOperand &MO :
116        llvm::drop_begin(OldMI.operands(), Desc.getNumOperands())) {
117     assert(MO.isReg() && MO.getReg());
118     if (MO.isUse())
119       UseMI.add(MO);
120     else
121       DefMI.add(MO);
122   }
123 }
124 
125 /// Expand a MOVi32imm or MOVi64imm pseudo instruction to one or more
126 /// real move-immediate instructions to synthesize the immediate.
127 bool AArch64ExpandPseudo::expandMOVImm(MachineBasicBlock &MBB,
128                                        MachineBasicBlock::iterator MBBI,
129                                        unsigned BitSize) {
130   MachineInstr &MI = *MBBI;
131   Register DstReg = MI.getOperand(0).getReg();
132   uint64_t RenamableState =
133       MI.getOperand(0).isRenamable() ? RegState::Renamable : 0;
134   uint64_t Imm = MI.getOperand(1).getImm();
135 
136   if (DstReg == AArch64::XZR || DstReg == AArch64::WZR) {
137     // Useless def, and we don't want to risk creating an invalid ORR (which
138     // would really write to sp).
139     MI.eraseFromParent();
140     return true;
141   }
142 
143   SmallVector<AArch64_IMM::ImmInsnModel, 4> Insn;
144   AArch64_IMM::expandMOVImm(Imm, BitSize, Insn);
145   assert(Insn.size() != 0);
146 
147   SmallVector<MachineInstrBuilder, 4> MIBS;
148   for (auto I = Insn.begin(), E = Insn.end(); I != E; ++I) {
149     bool LastItem = std::next(I) == E;
150     switch (I->Opcode)
151     {
152     default: llvm_unreachable("unhandled!"); break;
153 
154     case AArch64::ORRWri:
155     case AArch64::ORRXri:
156       if (I->Op1 == 0) {
157         MIBS.push_back(BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(I->Opcode))
158                            .add(MI.getOperand(0))
159                            .addReg(BitSize == 32 ? AArch64::WZR : AArch64::XZR)
160                            .addImm(I->Op2));
161       } else {
162         Register DstReg = MI.getOperand(0).getReg();
163         bool DstIsDead = MI.getOperand(0).isDead();
164         MIBS.push_back(
165             BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(I->Opcode))
166                 .addReg(DstReg, RegState::Define |
167                                     getDeadRegState(DstIsDead && LastItem) |
168                                     RenamableState)
169                 .addReg(DstReg)
170                 .addImm(I->Op2));
171       }
172       break;
173     case AArch64::ORRWrs:
174     case AArch64::ORRXrs: {
175       Register DstReg = MI.getOperand(0).getReg();
176       bool DstIsDead = MI.getOperand(0).isDead();
177       MIBS.push_back(
178           BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(I->Opcode))
179               .addReg(DstReg, RegState::Define |
180                                   getDeadRegState(DstIsDead && LastItem) |
181                                   RenamableState)
182               .addReg(DstReg)
183               .addReg(DstReg)
184               .addImm(I->Op2));
185     } break;
186     case AArch64::ANDXri:
187     case AArch64::EORXri:
188       if (I->Op1 == 0) {
189         MIBS.push_back(BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(I->Opcode))
190                            .add(MI.getOperand(0))
191                            .addReg(BitSize == 32 ? AArch64::WZR : AArch64::XZR)
192                            .addImm(I->Op2));
193       } else {
194         Register DstReg = MI.getOperand(0).getReg();
195         bool DstIsDead = MI.getOperand(0).isDead();
196         MIBS.push_back(
197             BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(I->Opcode))
198                 .addReg(DstReg, RegState::Define |
199                                     getDeadRegState(DstIsDead && LastItem) |
200                                     RenamableState)
201                 .addReg(DstReg)
202                 .addImm(I->Op2));
203       }
204       break;
205     case AArch64::MOVNWi:
206     case AArch64::MOVNXi:
207     case AArch64::MOVZWi:
208     case AArch64::MOVZXi: {
209       bool DstIsDead = MI.getOperand(0).isDead();
210       MIBS.push_back(BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(I->Opcode))
211         .addReg(DstReg, RegState::Define |
212                 getDeadRegState(DstIsDead && LastItem) |
213                 RenamableState)
214         .addImm(I->Op1)
215         .addImm(I->Op2));
216       } break;
217     case AArch64::MOVKWi:
218     case AArch64::MOVKXi: {
219       Register DstReg = MI.getOperand(0).getReg();
220       bool DstIsDead = MI.getOperand(0).isDead();
221       MIBS.push_back(BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(I->Opcode))
222         .addReg(DstReg,
223                 RegState::Define |
224                 getDeadRegState(DstIsDead && LastItem) |
225                 RenamableState)
226         .addReg(DstReg)
227         .addImm(I->Op1)
228         .addImm(I->Op2));
229       } break;
230     }
231   }
232   transferImpOps(MI, MIBS.front(), MIBS.back());
233   MI.eraseFromParent();
234   return true;
235 }
236 
237 bool AArch64ExpandPseudo::expandCMP_SWAP(
238     MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, unsigned LdarOp,
239     unsigned StlrOp, unsigned CmpOp, unsigned ExtendImm, unsigned ZeroReg,
240     MachineBasicBlock::iterator &NextMBBI) {
241   MachineInstr &MI = *MBBI;
242   MIMetadata MIMD(MI);
243   const MachineOperand &Dest = MI.getOperand(0);
244   Register StatusReg = MI.getOperand(1).getReg();
245   bool StatusDead = MI.getOperand(1).isDead();
246   // Duplicating undef operands into 2 instructions does not guarantee the same
247   // value on both; However undef should be replaced by xzr anyway.
248   assert(!MI.getOperand(2).isUndef() && "cannot handle undef");
249   Register AddrReg = MI.getOperand(2).getReg();
250   Register DesiredReg = MI.getOperand(3).getReg();
251   Register NewReg = MI.getOperand(4).getReg();
252 
253   MachineFunction *MF = MBB.getParent();
254   auto LoadCmpBB = MF->CreateMachineBasicBlock(MBB.getBasicBlock());
255   auto StoreBB = MF->CreateMachineBasicBlock(MBB.getBasicBlock());
256   auto DoneBB = MF->CreateMachineBasicBlock(MBB.getBasicBlock());
257 
258   MF->insert(++MBB.getIterator(), LoadCmpBB);
259   MF->insert(++LoadCmpBB->getIterator(), StoreBB);
260   MF->insert(++StoreBB->getIterator(), DoneBB);
261 
262   // .Lloadcmp:
263   //     mov wStatus, 0
264   //     ldaxr xDest, [xAddr]
265   //     cmp xDest, xDesired
266   //     b.ne .Ldone
267   if (!StatusDead)
268     BuildMI(LoadCmpBB, MIMD, TII->get(AArch64::MOVZWi), StatusReg)
269       .addImm(0).addImm(0);
270   BuildMI(LoadCmpBB, MIMD, TII->get(LdarOp), Dest.getReg())
271       .addReg(AddrReg);
272   BuildMI(LoadCmpBB, MIMD, TII->get(CmpOp), ZeroReg)
273       .addReg(Dest.getReg(), getKillRegState(Dest.isDead()))
274       .addReg(DesiredReg)
275       .addImm(ExtendImm);
276   BuildMI(LoadCmpBB, MIMD, TII->get(AArch64::Bcc))
277       .addImm(AArch64CC::NE)
278       .addMBB(DoneBB)
279       .addReg(AArch64::NZCV, RegState::Implicit | RegState::Kill);
280   LoadCmpBB->addSuccessor(DoneBB);
281   LoadCmpBB->addSuccessor(StoreBB);
282 
283   // .Lstore:
284   //     stlxr wStatus, xNew, [xAddr]
285   //     cbnz wStatus, .Lloadcmp
286   BuildMI(StoreBB, MIMD, TII->get(StlrOp), StatusReg)
287       .addReg(NewReg)
288       .addReg(AddrReg);
289   BuildMI(StoreBB, MIMD, TII->get(AArch64::CBNZW))
290       .addReg(StatusReg, getKillRegState(StatusDead))
291       .addMBB(LoadCmpBB);
292   StoreBB->addSuccessor(LoadCmpBB);
293   StoreBB->addSuccessor(DoneBB);
294 
295   DoneBB->splice(DoneBB->end(), &MBB, MI, MBB.end());
296   DoneBB->transferSuccessors(&MBB);
297 
298   MBB.addSuccessor(LoadCmpBB);
299 
300   NextMBBI = MBB.end();
301   MI.eraseFromParent();
302 
303   // Recompute livein lists.
304   LivePhysRegs LiveRegs;
305   computeAndAddLiveIns(LiveRegs, *DoneBB);
306   computeAndAddLiveIns(LiveRegs, *StoreBB);
307   computeAndAddLiveIns(LiveRegs, *LoadCmpBB);
308   // Do an extra pass around the loop to get loop carried registers right.
309   StoreBB->clearLiveIns();
310   computeAndAddLiveIns(LiveRegs, *StoreBB);
311   LoadCmpBB->clearLiveIns();
312   computeAndAddLiveIns(LiveRegs, *LoadCmpBB);
313 
314   return true;
315 }
316 
317 bool AArch64ExpandPseudo::expandCMP_SWAP_128(
318     MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI,
319     MachineBasicBlock::iterator &NextMBBI) {
320   MachineInstr &MI = *MBBI;
321   MIMetadata MIMD(MI);
322   MachineOperand &DestLo = MI.getOperand(0);
323   MachineOperand &DestHi = MI.getOperand(1);
324   Register StatusReg = MI.getOperand(2).getReg();
325   bool StatusDead = MI.getOperand(2).isDead();
326   // Duplicating undef operands into 2 instructions does not guarantee the same
327   // value on both; However undef should be replaced by xzr anyway.
328   assert(!MI.getOperand(3).isUndef() && "cannot handle undef");
329   Register AddrReg = MI.getOperand(3).getReg();
330   Register DesiredLoReg = MI.getOperand(4).getReg();
331   Register DesiredHiReg = MI.getOperand(5).getReg();
332   Register NewLoReg = MI.getOperand(6).getReg();
333   Register NewHiReg = MI.getOperand(7).getReg();
334 
335   unsigned LdxpOp, StxpOp;
336 
337   switch (MI.getOpcode()) {
338   case AArch64::CMP_SWAP_128_MONOTONIC:
339     LdxpOp = AArch64::LDXPX;
340     StxpOp = AArch64::STXPX;
341     break;
342   case AArch64::CMP_SWAP_128_RELEASE:
343     LdxpOp = AArch64::LDXPX;
344     StxpOp = AArch64::STLXPX;
345     break;
346   case AArch64::CMP_SWAP_128_ACQUIRE:
347     LdxpOp = AArch64::LDAXPX;
348     StxpOp = AArch64::STXPX;
349     break;
350   case AArch64::CMP_SWAP_128:
351     LdxpOp = AArch64::LDAXPX;
352     StxpOp = AArch64::STLXPX;
353     break;
354   default:
355     llvm_unreachable("Unexpected opcode");
356   }
357 
358   MachineFunction *MF = MBB.getParent();
359   auto LoadCmpBB = MF->CreateMachineBasicBlock(MBB.getBasicBlock());
360   auto StoreBB = MF->CreateMachineBasicBlock(MBB.getBasicBlock());
361   auto FailBB = MF->CreateMachineBasicBlock(MBB.getBasicBlock());
362   auto DoneBB = MF->CreateMachineBasicBlock(MBB.getBasicBlock());
363 
364   MF->insert(++MBB.getIterator(), LoadCmpBB);
365   MF->insert(++LoadCmpBB->getIterator(), StoreBB);
366   MF->insert(++StoreBB->getIterator(), FailBB);
367   MF->insert(++FailBB->getIterator(), DoneBB);
368 
369   // .Lloadcmp:
370   //     ldaxp xDestLo, xDestHi, [xAddr]
371   //     cmp xDestLo, xDesiredLo
372   //     sbcs xDestHi, xDesiredHi
373   //     b.ne .Ldone
374   BuildMI(LoadCmpBB, MIMD, TII->get(LdxpOp))
375       .addReg(DestLo.getReg(), RegState::Define)
376       .addReg(DestHi.getReg(), RegState::Define)
377       .addReg(AddrReg);
378   BuildMI(LoadCmpBB, MIMD, TII->get(AArch64::SUBSXrs), AArch64::XZR)
379       .addReg(DestLo.getReg(), getKillRegState(DestLo.isDead()))
380       .addReg(DesiredLoReg)
381       .addImm(0);
382   BuildMI(LoadCmpBB, MIMD, TII->get(AArch64::CSINCWr), StatusReg)
383     .addUse(AArch64::WZR)
384     .addUse(AArch64::WZR)
385     .addImm(AArch64CC::EQ);
386   BuildMI(LoadCmpBB, MIMD, TII->get(AArch64::SUBSXrs), AArch64::XZR)
387       .addReg(DestHi.getReg(), getKillRegState(DestHi.isDead()))
388       .addReg(DesiredHiReg)
389       .addImm(0);
390   BuildMI(LoadCmpBB, MIMD, TII->get(AArch64::CSINCWr), StatusReg)
391       .addUse(StatusReg, RegState::Kill)
392       .addUse(StatusReg, RegState::Kill)
393       .addImm(AArch64CC::EQ);
394   BuildMI(LoadCmpBB, MIMD, TII->get(AArch64::CBNZW))
395       .addUse(StatusReg, getKillRegState(StatusDead))
396       .addMBB(FailBB);
397   LoadCmpBB->addSuccessor(FailBB);
398   LoadCmpBB->addSuccessor(StoreBB);
399 
400   // .Lstore:
401   //     stlxp wStatus, xNewLo, xNewHi, [xAddr]
402   //     cbnz wStatus, .Lloadcmp
403   BuildMI(StoreBB, MIMD, TII->get(StxpOp), StatusReg)
404       .addReg(NewLoReg)
405       .addReg(NewHiReg)
406       .addReg(AddrReg);
407   BuildMI(StoreBB, MIMD, TII->get(AArch64::CBNZW))
408       .addReg(StatusReg, getKillRegState(StatusDead))
409       .addMBB(LoadCmpBB);
410   BuildMI(StoreBB, MIMD, TII->get(AArch64::B)).addMBB(DoneBB);
411   StoreBB->addSuccessor(LoadCmpBB);
412   StoreBB->addSuccessor(DoneBB);
413 
414   // .Lfail:
415   //     stlxp wStatus, xDestLo, xDestHi, [xAddr]
416   //     cbnz wStatus, .Lloadcmp
417   BuildMI(FailBB, MIMD, TII->get(StxpOp), StatusReg)
418       .addReg(DestLo.getReg())
419       .addReg(DestHi.getReg())
420       .addReg(AddrReg);
421   BuildMI(FailBB, MIMD, TII->get(AArch64::CBNZW))
422       .addReg(StatusReg, getKillRegState(StatusDead))
423       .addMBB(LoadCmpBB);
424   FailBB->addSuccessor(LoadCmpBB);
425   FailBB->addSuccessor(DoneBB);
426 
427   DoneBB->splice(DoneBB->end(), &MBB, MI, MBB.end());
428   DoneBB->transferSuccessors(&MBB);
429 
430   MBB.addSuccessor(LoadCmpBB);
431 
432   NextMBBI = MBB.end();
433   MI.eraseFromParent();
434 
435   // Recompute liveness bottom up.
436   LivePhysRegs LiveRegs;
437   computeAndAddLiveIns(LiveRegs, *DoneBB);
438   computeAndAddLiveIns(LiveRegs, *FailBB);
439   computeAndAddLiveIns(LiveRegs, *StoreBB);
440   computeAndAddLiveIns(LiveRegs, *LoadCmpBB);
441 
442   // Do an extra pass in the loop to get the loop carried dependencies right.
443   FailBB->clearLiveIns();
444   computeAndAddLiveIns(LiveRegs, *FailBB);
445   StoreBB->clearLiveIns();
446   computeAndAddLiveIns(LiveRegs, *StoreBB);
447   LoadCmpBB->clearLiveIns();
448   computeAndAddLiveIns(LiveRegs, *LoadCmpBB);
449 
450   return true;
451 }
452 
453 /// \brief Expand Pseudos to Instructions with destructive operands.
454 ///
455 /// This mechanism uses MOVPRFX instructions for zeroing the false lanes
456 /// or for fixing relaxed register allocation conditions to comply with
457 /// the instructions register constraints. The latter case may be cheaper
458 /// than setting the register constraints in the register allocator,
459 /// since that will insert regular MOV instructions rather than MOVPRFX.
460 ///
461 /// Example (after register allocation):
462 ///
463 ///   FSUB_ZPZZ_ZERO_B Z0, Pg, Z1, Z0
464 ///
465 /// * The Pseudo FSUB_ZPZZ_ZERO_B maps to FSUB_ZPmZ_B.
466 /// * We cannot map directly to FSUB_ZPmZ_B because the register
467 ///   constraints of the instruction are not met.
468 /// * Also the _ZERO specifies the false lanes need to be zeroed.
469 ///
470 /// We first try to see if the destructive operand == result operand,
471 /// if not, we try to swap the operands, e.g.
472 ///
473 ///   FSUB_ZPmZ_B  Z0, Pg/m, Z0, Z1
474 ///
475 /// But because FSUB_ZPmZ is not commutative, this is semantically
476 /// different, so we need a reverse instruction:
477 ///
478 ///   FSUBR_ZPmZ_B  Z0, Pg/m, Z0, Z1
479 ///
480 /// Then we implement the zeroing of the false lanes of Z0 by adding
481 /// a zeroing MOVPRFX instruction:
482 ///
483 ///   MOVPRFX_ZPzZ_B Z0, Pg/z, Z0
484 ///   FSUBR_ZPmZ_B   Z0, Pg/m, Z0, Z1
485 ///
486 /// Note that this can only be done for _ZERO or _UNDEF variants where
487 /// we can guarantee the false lanes to be zeroed (by implementing this)
488 /// or that they are undef (don't care / not used), otherwise the
489 /// swapping of operands is illegal because the operation is not
490 /// (or cannot be emulated to be) fully commutative.
491 bool AArch64ExpandPseudo::expand_DestructiveOp(
492                             MachineInstr &MI,
493                             MachineBasicBlock &MBB,
494                             MachineBasicBlock::iterator MBBI) {
495   unsigned Opcode = AArch64::getSVEPseudoMap(MI.getOpcode());
496   uint64_t DType = TII->get(Opcode).TSFlags & AArch64::DestructiveInstTypeMask;
497   uint64_t FalseLanes = MI.getDesc().TSFlags & AArch64::FalseLanesMask;
498   bool FalseZero = FalseLanes == AArch64::FalseLanesZero;
499   Register DstReg = MI.getOperand(0).getReg();
500   bool DstIsDead = MI.getOperand(0).isDead();
501   bool UseRev = false;
502   unsigned PredIdx, DOPIdx, SrcIdx, Src2Idx;
503 
504   switch (DType) {
505   case AArch64::DestructiveBinaryComm:
506   case AArch64::DestructiveBinaryCommWithRev:
507     if (DstReg == MI.getOperand(3).getReg()) {
508       // FSUB Zd, Pg, Zs1, Zd  ==> FSUBR   Zd, Pg/m, Zd, Zs1
509       std::tie(PredIdx, DOPIdx, SrcIdx) = std::make_tuple(1, 3, 2);
510       UseRev = true;
511       break;
512     }
513     [[fallthrough]];
514   case AArch64::DestructiveBinary:
515   case AArch64::DestructiveBinaryImm:
516     std::tie(PredIdx, DOPIdx, SrcIdx) = std::make_tuple(1, 2, 3);
517     break;
518   case AArch64::DestructiveUnaryPassthru:
519     std::tie(PredIdx, DOPIdx, SrcIdx) = std::make_tuple(2, 3, 3);
520     break;
521   case AArch64::DestructiveTernaryCommWithRev:
522     std::tie(PredIdx, DOPIdx, SrcIdx, Src2Idx) = std::make_tuple(1, 2, 3, 4);
523     if (DstReg == MI.getOperand(3).getReg()) {
524       // FMLA Zd, Pg, Za, Zd, Zm ==> FMAD Zdn, Pg, Zm, Za
525       std::tie(PredIdx, DOPIdx, SrcIdx, Src2Idx) = std::make_tuple(1, 3, 4, 2);
526       UseRev = true;
527     } else if (DstReg == MI.getOperand(4).getReg()) {
528       // FMLA Zd, Pg, Za, Zm, Zd ==> FMAD Zdn, Pg, Zm, Za
529       std::tie(PredIdx, DOPIdx, SrcIdx, Src2Idx) = std::make_tuple(1, 4, 3, 2);
530       UseRev = true;
531     }
532     break;
533   default:
534     llvm_unreachable("Unsupported Destructive Operand type");
535   }
536 
537   // MOVPRFX can only be used if the destination operand
538   // is the destructive operand, not as any other operand,
539   // so the Destructive Operand must be unique.
540   bool DOPRegIsUnique = false;
541   switch (DType) {
542   case AArch64::DestructiveBinary:
543     DOPRegIsUnique = DstReg != MI.getOperand(SrcIdx).getReg();
544     break;
545   case AArch64::DestructiveBinaryComm:
546   case AArch64::DestructiveBinaryCommWithRev:
547     DOPRegIsUnique =
548       DstReg != MI.getOperand(DOPIdx).getReg() ||
549       MI.getOperand(DOPIdx).getReg() != MI.getOperand(SrcIdx).getReg();
550     break;
551   case AArch64::DestructiveUnaryPassthru:
552   case AArch64::DestructiveBinaryImm:
553     DOPRegIsUnique = true;
554     break;
555   case AArch64::DestructiveTernaryCommWithRev:
556     DOPRegIsUnique =
557         DstReg != MI.getOperand(DOPIdx).getReg() ||
558         (MI.getOperand(DOPIdx).getReg() != MI.getOperand(SrcIdx).getReg() &&
559          MI.getOperand(DOPIdx).getReg() != MI.getOperand(Src2Idx).getReg());
560     break;
561   }
562 
563   // Resolve the reverse opcode
564   if (UseRev) {
565     int NewOpcode;
566     // e.g. DIV -> DIVR
567     if ((NewOpcode = AArch64::getSVERevInstr(Opcode)) != -1)
568       Opcode = NewOpcode;
569     // e.g. DIVR -> DIV
570     else if ((NewOpcode = AArch64::getSVENonRevInstr(Opcode)) != -1)
571       Opcode = NewOpcode;
572   }
573 
574   // Get the right MOVPRFX
575   uint64_t ElementSize = TII->getElementSizeForOpcode(Opcode);
576   unsigned MovPrfx, LSLZero, MovPrfxZero;
577   switch (ElementSize) {
578   case AArch64::ElementSizeNone:
579   case AArch64::ElementSizeB:
580     MovPrfx = AArch64::MOVPRFX_ZZ;
581     LSLZero = AArch64::LSL_ZPmI_B;
582     MovPrfxZero = AArch64::MOVPRFX_ZPzZ_B;
583     break;
584   case AArch64::ElementSizeH:
585     MovPrfx = AArch64::MOVPRFX_ZZ;
586     LSLZero = AArch64::LSL_ZPmI_H;
587     MovPrfxZero = AArch64::MOVPRFX_ZPzZ_H;
588     break;
589   case AArch64::ElementSizeS:
590     MovPrfx = AArch64::MOVPRFX_ZZ;
591     LSLZero = AArch64::LSL_ZPmI_S;
592     MovPrfxZero = AArch64::MOVPRFX_ZPzZ_S;
593     break;
594   case AArch64::ElementSizeD:
595     MovPrfx = AArch64::MOVPRFX_ZZ;
596     LSLZero = AArch64::LSL_ZPmI_D;
597     MovPrfxZero = AArch64::MOVPRFX_ZPzZ_D;
598     break;
599   default:
600     llvm_unreachable("Unsupported ElementSize");
601   }
602 
603   //
604   // Create the destructive operation (if required)
605   //
606   MachineInstrBuilder PRFX, DOP;
607   if (FalseZero) {
608     // If we cannot prefix the requested instruction we'll instead emit a
609     // prefixed_zeroing_mov for DestructiveBinary.
610     assert((DOPRegIsUnique || DType == AArch64::DestructiveBinary ||
611             DType == AArch64::DestructiveBinaryComm ||
612             DType == AArch64::DestructiveBinaryCommWithRev) &&
613            "The destructive operand should be unique");
614     assert(ElementSize != AArch64::ElementSizeNone &&
615            "This instruction is unpredicated");
616 
617     // Merge source operand into destination register
618     PRFX = BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(MovPrfxZero))
619                .addReg(DstReg, RegState::Define)
620                .addReg(MI.getOperand(PredIdx).getReg())
621                .addReg(MI.getOperand(DOPIdx).getReg());
622 
623     // After the movprfx, the destructive operand is same as Dst
624     DOPIdx = 0;
625 
626     // Create the additional LSL to zero the lanes when the DstReg is not
627     // unique. Zeros the lanes in z0 that aren't active in p0 with sequence
628     // movprfx z0.b, p0/z, z0.b; lsl z0.b, p0/m, z0.b, #0;
629     if ((DType == AArch64::DestructiveBinary ||
630          DType == AArch64::DestructiveBinaryComm ||
631          DType == AArch64::DestructiveBinaryCommWithRev) &&
632         !DOPRegIsUnique) {
633       BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(LSLZero))
634           .addReg(DstReg, RegState::Define)
635           .add(MI.getOperand(PredIdx))
636           .addReg(DstReg)
637           .addImm(0);
638     }
639   } else if (DstReg != MI.getOperand(DOPIdx).getReg()) {
640     assert(DOPRegIsUnique && "The destructive operand should be unique");
641     PRFX = BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(MovPrfx))
642                .addReg(DstReg, RegState::Define)
643                .addReg(MI.getOperand(DOPIdx).getReg());
644     DOPIdx = 0;
645   }
646 
647   //
648   // Create the destructive operation
649   //
650   DOP = BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(Opcode))
651     .addReg(DstReg, RegState::Define | getDeadRegState(DstIsDead));
652 
653   switch (DType) {
654   case AArch64::DestructiveUnaryPassthru:
655     DOP.addReg(MI.getOperand(DOPIdx).getReg(), RegState::Kill)
656         .add(MI.getOperand(PredIdx))
657         .add(MI.getOperand(SrcIdx));
658     break;
659   case AArch64::DestructiveBinary:
660   case AArch64::DestructiveBinaryImm:
661   case AArch64::DestructiveBinaryComm:
662   case AArch64::DestructiveBinaryCommWithRev:
663     DOP.add(MI.getOperand(PredIdx))
664        .addReg(MI.getOperand(DOPIdx).getReg(), RegState::Kill)
665        .add(MI.getOperand(SrcIdx));
666     break;
667   case AArch64::DestructiveTernaryCommWithRev:
668     DOP.add(MI.getOperand(PredIdx))
669         .addReg(MI.getOperand(DOPIdx).getReg(), RegState::Kill)
670         .add(MI.getOperand(SrcIdx))
671         .add(MI.getOperand(Src2Idx));
672     break;
673   }
674 
675   if (PRFX) {
676     finalizeBundle(MBB, PRFX->getIterator(), MBBI->getIterator());
677     transferImpOps(MI, PRFX, DOP);
678   } else
679     transferImpOps(MI, DOP, DOP);
680 
681   MI.eraseFromParent();
682   return true;
683 }
684 
685 bool AArch64ExpandPseudo::expandSetTagLoop(
686     MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI,
687     MachineBasicBlock::iterator &NextMBBI) {
688   MachineInstr &MI = *MBBI;
689   DebugLoc DL = MI.getDebugLoc();
690   Register SizeReg = MI.getOperand(0).getReg();
691   Register AddressReg = MI.getOperand(1).getReg();
692 
693   MachineFunction *MF = MBB.getParent();
694 
695   bool ZeroData = MI.getOpcode() == AArch64::STZGloop_wback;
696   const unsigned OpCode1 =
697       ZeroData ? AArch64::STZGPostIndex : AArch64::STGPostIndex;
698   const unsigned OpCode2 =
699       ZeroData ? AArch64::STZ2GPostIndex : AArch64::ST2GPostIndex;
700 
701   unsigned Size = MI.getOperand(2).getImm();
702   assert(Size > 0 && Size % 16 == 0);
703   if (Size % (16 * 2) != 0) {
704     BuildMI(MBB, MBBI, DL, TII->get(OpCode1), AddressReg)
705         .addReg(AddressReg)
706         .addReg(AddressReg)
707         .addImm(1);
708     Size -= 16;
709   }
710   MachineBasicBlock::iterator I =
711       BuildMI(MBB, MBBI, DL, TII->get(AArch64::MOVi64imm), SizeReg)
712           .addImm(Size);
713   expandMOVImm(MBB, I, 64);
714 
715   auto LoopBB = MF->CreateMachineBasicBlock(MBB.getBasicBlock());
716   auto DoneBB = MF->CreateMachineBasicBlock(MBB.getBasicBlock());
717 
718   MF->insert(++MBB.getIterator(), LoopBB);
719   MF->insert(++LoopBB->getIterator(), DoneBB);
720 
721   BuildMI(LoopBB, DL, TII->get(OpCode2))
722       .addDef(AddressReg)
723       .addReg(AddressReg)
724       .addReg(AddressReg)
725       .addImm(2)
726       .cloneMemRefs(MI)
727       .setMIFlags(MI.getFlags());
728   BuildMI(LoopBB, DL, TII->get(AArch64::SUBSXri))
729       .addDef(SizeReg)
730       .addReg(SizeReg)
731       .addImm(16 * 2)
732       .addImm(0);
733   BuildMI(LoopBB, DL, TII->get(AArch64::Bcc))
734       .addImm(AArch64CC::NE)
735       .addMBB(LoopBB)
736       .addReg(AArch64::NZCV, RegState::Implicit | RegState::Kill);
737 
738   LoopBB->addSuccessor(LoopBB);
739   LoopBB->addSuccessor(DoneBB);
740 
741   DoneBB->splice(DoneBB->end(), &MBB, MI, MBB.end());
742   DoneBB->transferSuccessors(&MBB);
743 
744   MBB.addSuccessor(LoopBB);
745 
746   NextMBBI = MBB.end();
747   MI.eraseFromParent();
748   // Recompute liveness bottom up.
749   LivePhysRegs LiveRegs;
750   computeAndAddLiveIns(LiveRegs, *DoneBB);
751   computeAndAddLiveIns(LiveRegs, *LoopBB);
752   // Do an extra pass in the loop to get the loop carried dependencies right.
753   // FIXME: is this necessary?
754   LoopBB->clearLiveIns();
755   computeAndAddLiveIns(LiveRegs, *LoopBB);
756   DoneBB->clearLiveIns();
757   computeAndAddLiveIns(LiveRegs, *DoneBB);
758 
759   return true;
760 }
761 
762 bool AArch64ExpandPseudo::expandSVESpillFill(MachineBasicBlock &MBB,
763                                              MachineBasicBlock::iterator MBBI,
764                                              unsigned Opc, unsigned N) {
765   assert((Opc == AArch64::LDR_ZXI || Opc == AArch64::STR_ZXI ||
766           Opc == AArch64::LDR_PXI || Opc == AArch64::STR_PXI) &&
767          "Unexpected opcode");
768   unsigned RState = (Opc == AArch64::LDR_ZXI || Opc == AArch64::LDR_PXI)
769                         ? RegState::Define
770                         : 0;
771   unsigned sub0 = (Opc == AArch64::LDR_ZXI || Opc == AArch64::STR_ZXI)
772                       ? AArch64::zsub0
773                       : AArch64::psub0;
774   const TargetRegisterInfo *TRI =
775       MBB.getParent()->getSubtarget().getRegisterInfo();
776   MachineInstr &MI = *MBBI;
777   for (unsigned Offset = 0; Offset < N; ++Offset) {
778     int ImmOffset = MI.getOperand(2).getImm() + Offset;
779     bool Kill = (Offset + 1 == N) ? MI.getOperand(1).isKill() : false;
780     assert(ImmOffset >= -256 && ImmOffset < 256 &&
781            "Immediate spill offset out of range");
782     BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(Opc))
783         .addReg(TRI->getSubReg(MI.getOperand(0).getReg(), sub0 + Offset),
784                 RState)
785         .addReg(MI.getOperand(1).getReg(), getKillRegState(Kill))
786         .addImm(ImmOffset);
787   }
788   MI.eraseFromParent();
789   return true;
790 }
791 
792 // Create a call with the passed opcode and explicit operands, copying over all
793 // the implicit operands from *MBBI, starting at the regmask.
794 static MachineInstr *createCallWithOps(MachineBasicBlock &MBB,
795                                        MachineBasicBlock::iterator MBBI,
796                                        const AArch64InstrInfo *TII,
797                                        unsigned Opcode,
798                                        ArrayRef<MachineOperand> ExplicitOps,
799                                        unsigned RegMaskStartIdx) {
800   // Build the MI, with explicit operands first (including the call target).
801   MachineInstr *Call = BuildMI(MBB, MBBI, MBBI->getDebugLoc(), TII->get(Opcode))
802                            .add(ExplicitOps)
803                            .getInstr();
804 
805   // Register arguments are added during ISel, but cannot be added as explicit
806   // operands of the branch as it expects to be B <target> which is only one
807   // operand. Instead they are implicit operands used by the branch.
808   while (!MBBI->getOperand(RegMaskStartIdx).isRegMask()) {
809     const MachineOperand &MOP = MBBI->getOperand(RegMaskStartIdx);
810     assert(MOP.isReg() && "can only add register operands");
811     Call->addOperand(MachineOperand::CreateReg(
812         MOP.getReg(), /*Def=*/false, /*Implicit=*/true, /*isKill=*/false,
813         /*isDead=*/false, /*isUndef=*/MOP.isUndef()));
814     RegMaskStartIdx++;
815   }
816   for (const MachineOperand &MO :
817        llvm::drop_begin(MBBI->operands(), RegMaskStartIdx))
818     Call->addOperand(MO);
819 
820   return Call;
821 }
822 
823 // Create a call to CallTarget, copying over all the operands from *MBBI,
824 // starting at the regmask.
825 static MachineInstr *createCall(MachineBasicBlock &MBB,
826                                 MachineBasicBlock::iterator MBBI,
827                                 const AArch64InstrInfo *TII,
828                                 MachineOperand &CallTarget,
829                                 unsigned RegMaskStartIdx) {
830   unsigned Opc = CallTarget.isGlobal() ? AArch64::BL : AArch64::BLR;
831 
832   assert((CallTarget.isGlobal() || CallTarget.isReg()) &&
833          "invalid operand for regular call");
834   return createCallWithOps(MBB, MBBI, TII, Opc, CallTarget, RegMaskStartIdx);
835 }
836 
837 bool AArch64ExpandPseudo::expandCALL_RVMARKER(
838     MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI) {
839   // Expand CALL_RVMARKER pseudo to:
840   // - a branch to the call target, followed by
841   // - the special `mov x29, x29` marker, and
842   // - another branch, to the runtime function
843   // Mark the sequence as bundle, to avoid passes moving other code in between.
844   MachineInstr &MI = *MBBI;
845   MachineOperand &RVTarget = MI.getOperand(0);
846   assert(RVTarget.isGlobal() && "invalid operand for attached call");
847 
848   MachineInstr *OriginalCall = nullptr;
849 
850   if (MI.getOpcode() == AArch64::BLRA_RVMARKER) {
851     // ptrauth call.
852     const MachineOperand &CallTarget = MI.getOperand(1);
853     const MachineOperand &Key = MI.getOperand(2);
854     const MachineOperand &IntDisc = MI.getOperand(3);
855     const MachineOperand &AddrDisc = MI.getOperand(4);
856 
857     assert((Key.getImm() == AArch64PACKey::IA ||
858             Key.getImm() == AArch64PACKey::IB) &&
859            "Invalid auth call key");
860 
861     MachineOperand Ops[] = {CallTarget, Key, IntDisc, AddrDisc};
862 
863     OriginalCall = createCallWithOps(MBB, MBBI, TII, AArch64::BLRA, Ops,
864                                      /*RegMaskStartIdx=*/5);
865   } else {
866     assert(MI.getOpcode() == AArch64::BLR_RVMARKER && "unknown rvmarker MI");
867     OriginalCall = createCall(MBB, MBBI, TII, MI.getOperand(1),
868                               // Regmask starts after the RV and call targets.
869                               /*RegMaskStartIdx=*/2);
870   }
871 
872   BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(AArch64::ORRXrs))
873                      .addReg(AArch64::FP, RegState::Define)
874                      .addReg(AArch64::XZR)
875                      .addReg(AArch64::FP)
876                      .addImm(0);
877 
878   auto *RVCall = BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(AArch64::BL))
879                      .add(RVTarget)
880                      .getInstr();
881 
882   if (MI.shouldUpdateAdditionalCallInfo())
883     MBB.getParent()->moveAdditionalCallInfo(&MI, OriginalCall);
884 
885   MI.eraseFromParent();
886   finalizeBundle(MBB, OriginalCall->getIterator(),
887                  std::next(RVCall->getIterator()));
888   return true;
889 }
890 
891 bool AArch64ExpandPseudo::expandCALL_BTI(MachineBasicBlock &MBB,
892                                          MachineBasicBlock::iterator MBBI) {
893   // Expand CALL_BTI pseudo to:
894   // - a branch to the call target
895   // - a BTI instruction
896   // Mark the sequence as a bundle, to avoid passes moving other code in
897   // between.
898   MachineInstr &MI = *MBBI;
899   MachineInstr *Call = createCall(MBB, MBBI, TII, MI.getOperand(0),
900                                   // Regmask starts after the call target.
901                                   /*RegMaskStartIdx=*/1);
902 
903   Call->setCFIType(*MBB.getParent(), MI.getCFIType());
904 
905   MachineInstr *BTI =
906       BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(AArch64::HINT))
907           // BTI J so that setjmp can to BR to this.
908           .addImm(36)
909           .getInstr();
910 
911   if (MI.shouldUpdateAdditionalCallInfo())
912     MBB.getParent()->moveAdditionalCallInfo(&MI, Call);
913 
914   MI.eraseFromParent();
915   finalizeBundle(MBB, Call->getIterator(), std::next(BTI->getIterator()));
916   return true;
917 }
918 
919 bool AArch64ExpandPseudo::expandStoreSwiftAsyncContext(
920     MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI) {
921   Register CtxReg = MBBI->getOperand(0).getReg();
922   Register BaseReg = MBBI->getOperand(1).getReg();
923   int Offset = MBBI->getOperand(2).getImm();
924   DebugLoc DL(MBBI->getDebugLoc());
925   auto &STI = MBB.getParent()->getSubtarget<AArch64Subtarget>();
926 
927   if (STI.getTargetTriple().getArchName() != "arm64e") {
928     BuildMI(MBB, MBBI, DL, TII->get(AArch64::STRXui))
929         .addUse(CtxReg)
930         .addUse(BaseReg)
931         .addImm(Offset / 8)
932         .setMIFlag(MachineInstr::FrameSetup);
933     MBBI->eraseFromParent();
934     return true;
935   }
936 
937   // We need to sign the context in an address-discriminated way. 0xc31a is a
938   // fixed random value, chosen as part of the ABI.
939   //     add x16, xBase, #Offset
940   //     movk x16, #0xc31a, lsl #48
941   //     mov x17, x22/xzr
942   //     pacdb x17, x16
943   //     str x17, [xBase, #Offset]
944   unsigned Opc = Offset >= 0 ? AArch64::ADDXri : AArch64::SUBXri;
945   BuildMI(MBB, MBBI, DL, TII->get(Opc), AArch64::X16)
946       .addUse(BaseReg)
947       .addImm(abs(Offset))
948       .addImm(0)
949       .setMIFlag(MachineInstr::FrameSetup);
950   BuildMI(MBB, MBBI, DL, TII->get(AArch64::MOVKXi), AArch64::X16)
951       .addUse(AArch64::X16)
952       .addImm(0xc31a)
953       .addImm(48)
954       .setMIFlag(MachineInstr::FrameSetup);
955   // We're not allowed to clobber X22 (and couldn't clobber XZR if we tried), so
956   // move it somewhere before signing.
957   BuildMI(MBB, MBBI, DL, TII->get(AArch64::ORRXrs), AArch64::X17)
958       .addUse(AArch64::XZR)
959       .addUse(CtxReg)
960       .addImm(0)
961       .setMIFlag(MachineInstr::FrameSetup);
962   BuildMI(MBB, MBBI, DL, TII->get(AArch64::PACDB), AArch64::X17)
963       .addUse(AArch64::X17)
964       .addUse(AArch64::X16)
965       .setMIFlag(MachineInstr::FrameSetup);
966   BuildMI(MBB, MBBI, DL, TII->get(AArch64::STRXui))
967       .addUse(AArch64::X17)
968       .addUse(BaseReg)
969       .addImm(Offset / 8)
970       .setMIFlag(MachineInstr::FrameSetup);
971 
972   MBBI->eraseFromParent();
973   return true;
974 }
975 
976 MachineBasicBlock *
977 AArch64ExpandPseudo::expandRestoreZA(MachineBasicBlock &MBB,
978                                      MachineBasicBlock::iterator MBBI) {
979   MachineInstr &MI = *MBBI;
980   assert((std::next(MBBI) != MBB.end() ||
981           MI.getParent()->successors().begin() !=
982               MI.getParent()->successors().end()) &&
983          "Unexpected unreachable in block that restores ZA");
984 
985   // Compare TPIDR2_EL0 value against 0.
986   DebugLoc DL = MI.getDebugLoc();
987   MachineInstrBuilder Cbz = BuildMI(MBB, MBBI, DL, TII->get(AArch64::CBZX))
988                                 .add(MI.getOperand(0));
989 
990   // Split MBB and create two new blocks:
991   //  - MBB now contains all instructions before RestoreZAPseudo.
992   //  - SMBB contains the RestoreZAPseudo instruction only.
993   //  - EndBB contains all instructions after RestoreZAPseudo.
994   MachineInstr &PrevMI = *std::prev(MBBI);
995   MachineBasicBlock *SMBB = MBB.splitAt(PrevMI, /*UpdateLiveIns*/ true);
996   MachineBasicBlock *EndBB = std::next(MI.getIterator()) == SMBB->end()
997                                  ? *SMBB->successors().begin()
998                                  : SMBB->splitAt(MI, /*UpdateLiveIns*/ true);
999 
1000   // Add the SMBB label to the TB[N]Z instruction & create a branch to EndBB.
1001   Cbz.addMBB(SMBB);
1002   BuildMI(&MBB, DL, TII->get(AArch64::B))
1003       .addMBB(EndBB);
1004   MBB.addSuccessor(EndBB);
1005 
1006   // Replace the pseudo with a call (BL).
1007   MachineInstrBuilder MIB =
1008       BuildMI(*SMBB, SMBB->end(), DL, TII->get(AArch64::BL));
1009   MIB.addReg(MI.getOperand(1).getReg(), RegState::Implicit);
1010   for (unsigned I = 2; I < MI.getNumOperands(); ++I)
1011     MIB.add(MI.getOperand(I));
1012   BuildMI(SMBB, DL, TII->get(AArch64::B)).addMBB(EndBB);
1013 
1014   MI.eraseFromParent();
1015   return EndBB;
1016 }
1017 
1018 MachineBasicBlock *
1019 AArch64ExpandPseudo::expandCondSMToggle(MachineBasicBlock &MBB,
1020                                         MachineBasicBlock::iterator MBBI) {
1021   MachineInstr &MI = *MBBI;
1022   // In the case of a smstart/smstop before a unreachable, just remove the pseudo.
1023   // Exception handling code generated by Clang may introduce unreachables and it
1024   // seems unnecessary to restore pstate.sm when that happens. Note that it is
1025   // not just an optimisation, the code below expects a successor instruction/block
1026   // in order to split the block at MBBI.
1027   if (std::next(MBBI) == MBB.end() &&
1028       MI.getParent()->successors().begin() ==
1029           MI.getParent()->successors().end()) {
1030     MI.eraseFromParent();
1031     return &MBB;
1032   }
1033 
1034   // Expand the pseudo into smstart or smstop instruction. The pseudo has the
1035   // following operands:
1036   //
1037   //   MSRpstatePseudo <za|sm|both>, <0|1>, condition[, pstate.sm], <regmask>
1038   //
1039   // The pseudo is expanded into a conditional smstart/smstop, with a
1040   // check if pstate.sm (register) equals the expected value, and if not,
1041   // invokes the smstart/smstop.
1042   //
1043   // As an example, the following block contains a normal call from a
1044   // streaming-compatible function:
1045   //
1046   // OrigBB:
1047   //   MSRpstatePseudo 3, 0, IfCallerIsStreaming, %0, <regmask>  <- Cond SMSTOP
1048   //   bl @normal_callee
1049   //   MSRpstatePseudo 3, 1, IfCallerIsStreaming, %0, <regmask>  <- Cond SMSTART
1050   //
1051   // ...which will be transformed into:
1052   //
1053   // OrigBB:
1054   //   TBNZx %0:gpr64, 0, SMBB
1055   //   b EndBB
1056   //
1057   // SMBB:
1058   //   MSRpstatesvcrImm1 3, 0, <regmask>                  <- SMSTOP
1059   //
1060   // EndBB:
1061   //   bl @normal_callee
1062   //   MSRcond_pstatesvcrImm1 3, 1, <regmask>             <- SMSTART
1063   //
1064   DebugLoc DL = MI.getDebugLoc();
1065 
1066   // Create the conditional branch based on the third operand of the
1067   // instruction, which tells us if we are wrapping a normal or streaming
1068   // function.
1069   // We test the live value of pstate.sm and toggle pstate.sm if this is not the
1070   // expected value for the callee (0 for a normal callee and 1 for a streaming
1071   // callee).
1072   unsigned Opc;
1073   switch (MI.getOperand(2).getImm()) {
1074   case AArch64SME::Always:
1075     llvm_unreachable("Should have matched to instruction directly");
1076   case AArch64SME::IfCallerIsStreaming:
1077     Opc = AArch64::TBNZW;
1078     break;
1079   case AArch64SME::IfCallerIsNonStreaming:
1080     Opc = AArch64::TBZW;
1081     break;
1082   }
1083   auto PStateSM = MI.getOperand(3).getReg();
1084   auto TRI = MBB.getParent()->getSubtarget().getRegisterInfo();
1085   unsigned SMReg32 = TRI->getSubReg(PStateSM, AArch64::sub_32);
1086   MachineInstrBuilder Tbx =
1087       BuildMI(MBB, MBBI, DL, TII->get(Opc)).addReg(SMReg32).addImm(0);
1088 
1089   // Split MBB and create two new blocks:
1090   //  - MBB now contains all instructions before MSRcond_pstatesvcrImm1.
1091   //  - SMBB contains the MSRcond_pstatesvcrImm1 instruction only.
1092   //  - EndBB contains all instructions after MSRcond_pstatesvcrImm1.
1093   MachineInstr &PrevMI = *std::prev(MBBI);
1094   MachineBasicBlock *SMBB = MBB.splitAt(PrevMI, /*UpdateLiveIns*/ true);
1095   MachineBasicBlock *EndBB = std::next(MI.getIterator()) == SMBB->end()
1096                                  ? *SMBB->successors().begin()
1097                                  : SMBB->splitAt(MI, /*UpdateLiveIns*/ true);
1098 
1099   // Add the SMBB label to the TB[N]Z instruction & create a branch to EndBB.
1100   Tbx.addMBB(SMBB);
1101   BuildMI(&MBB, DL, TII->get(AArch64::B))
1102       .addMBB(EndBB);
1103   MBB.addSuccessor(EndBB);
1104 
1105   // Create the SMSTART/SMSTOP (MSRpstatesvcrImm1) instruction in SMBB.
1106   MachineInstrBuilder MIB = BuildMI(*SMBB, SMBB->begin(), MI.getDebugLoc(),
1107                                     TII->get(AArch64::MSRpstatesvcrImm1));
1108   // Copy all but the second and third operands of MSRcond_pstatesvcrImm1 (as
1109   // these contain the CopyFromReg for the first argument and the flag to
1110   // indicate whether the callee is streaming or normal).
1111   MIB.add(MI.getOperand(0));
1112   MIB.add(MI.getOperand(1));
1113   for (unsigned i = 4; i < MI.getNumOperands(); ++i)
1114     MIB.add(MI.getOperand(i));
1115 
1116   BuildMI(SMBB, DL, TII->get(AArch64::B)).addMBB(EndBB);
1117 
1118   MI.eraseFromParent();
1119   return EndBB;
1120 }
1121 
1122 bool AArch64ExpandPseudo::expandMultiVecPseudo(
1123     MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI,
1124     TargetRegisterClass ContiguousClass, TargetRegisterClass StridedClass,
1125     unsigned ContiguousOp, unsigned StridedOpc) {
1126   MachineInstr &MI = *MBBI;
1127   Register Tuple = MI.getOperand(0).getReg();
1128 
1129   auto ContiguousRange = ContiguousClass.getRegisters();
1130   auto StridedRange = StridedClass.getRegisters();
1131   unsigned Opc;
1132   if (llvm::is_contained(ContiguousRange, Tuple.asMCReg())) {
1133     Opc = ContiguousOp;
1134   } else if (llvm::is_contained(StridedRange, Tuple.asMCReg())) {
1135     Opc = StridedOpc;
1136   } else
1137     llvm_unreachable("Cannot expand Multi-Vector pseudo");
1138 
1139   MachineInstrBuilder MIB = BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(Opc))
1140                                 .add(MI.getOperand(0))
1141                                 .add(MI.getOperand(1))
1142                                 .add(MI.getOperand(2))
1143                                 .add(MI.getOperand(3));
1144   transferImpOps(MI, MIB, MIB);
1145   MI.eraseFromParent();
1146   return true;
1147 }
1148 
1149 bool AArch64ExpandPseudo::expandFormTuplePseudo(
1150     MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI,
1151     MachineBasicBlock::iterator &NextMBBI, unsigned Size) {
1152   assert((Size == 2 || Size == 4) && "Invalid Tuple Size");
1153   MachineInstr &MI = *MBBI;
1154   Register ReturnTuple = MI.getOperand(0).getReg();
1155 
1156   const TargetRegisterInfo *TRI =
1157       MBB.getParent()->getSubtarget().getRegisterInfo();
1158   for (unsigned I = 0; I < Size; ++I) {
1159     Register FormTupleOpReg = MI.getOperand(I + 1).getReg();
1160     Register ReturnTupleSubReg =
1161         TRI->getSubReg(ReturnTuple, AArch64::zsub0 + I);
1162     // Add copies to ensure the subregisters remain in the correct order
1163     // for any contigious operation they are used by.
1164     if (FormTupleOpReg != ReturnTupleSubReg)
1165       BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(AArch64::ORR_ZZZ))
1166           .addReg(ReturnTupleSubReg, RegState::Define)
1167           .addReg(FormTupleOpReg)
1168           .addReg(FormTupleOpReg);
1169   }
1170 
1171   MI.eraseFromParent();
1172   return true;
1173 }
1174 
1175 /// If MBBI references a pseudo instruction that should be expanded here,
1176 /// do the expansion and return true.  Otherwise return false.
1177 bool AArch64ExpandPseudo::expandMI(MachineBasicBlock &MBB,
1178                                    MachineBasicBlock::iterator MBBI,
1179                                    MachineBasicBlock::iterator &NextMBBI) {
1180   MachineInstr &MI = *MBBI;
1181   unsigned Opcode = MI.getOpcode();
1182 
1183   // Check if we can expand the destructive op
1184   int OrigInstr = AArch64::getSVEPseudoMap(MI.getOpcode());
1185   if (OrigInstr != -1) {
1186     auto &Orig = TII->get(OrigInstr);
1187     if ((Orig.TSFlags & AArch64::DestructiveInstTypeMask) !=
1188         AArch64::NotDestructive) {
1189       return expand_DestructiveOp(MI, MBB, MBBI);
1190     }
1191   }
1192 
1193   switch (Opcode) {
1194   default:
1195     break;
1196 
1197   case AArch64::BSPv8i8:
1198   case AArch64::BSPv16i8: {
1199     Register DstReg = MI.getOperand(0).getReg();
1200     if (DstReg == MI.getOperand(3).getReg()) {
1201       // Expand to BIT
1202       BuildMI(MBB, MBBI, MI.getDebugLoc(),
1203               TII->get(Opcode == AArch64::BSPv8i8 ? AArch64::BITv8i8
1204                                                   : AArch64::BITv16i8))
1205           .add(MI.getOperand(0))
1206           .add(MI.getOperand(3))
1207           .add(MI.getOperand(2))
1208           .add(MI.getOperand(1));
1209     } else if (DstReg == MI.getOperand(2).getReg()) {
1210       // Expand to BIF
1211       BuildMI(MBB, MBBI, MI.getDebugLoc(),
1212               TII->get(Opcode == AArch64::BSPv8i8 ? AArch64::BIFv8i8
1213                                                   : AArch64::BIFv16i8))
1214           .add(MI.getOperand(0))
1215           .add(MI.getOperand(2))
1216           .add(MI.getOperand(3))
1217           .add(MI.getOperand(1));
1218     } else {
1219       // Expand to BSL, use additional move if required
1220       if (DstReg == MI.getOperand(1).getReg()) {
1221         BuildMI(MBB, MBBI, MI.getDebugLoc(),
1222                 TII->get(Opcode == AArch64::BSPv8i8 ? AArch64::BSLv8i8
1223                                                     : AArch64::BSLv16i8))
1224             .add(MI.getOperand(0))
1225             .add(MI.getOperand(1))
1226             .add(MI.getOperand(2))
1227             .add(MI.getOperand(3));
1228       } else {
1229         BuildMI(MBB, MBBI, MI.getDebugLoc(),
1230                 TII->get(Opcode == AArch64::BSPv8i8 ? AArch64::ORRv8i8
1231                                                     : AArch64::ORRv16i8))
1232             .addReg(DstReg,
1233                     RegState::Define |
1234                         getRenamableRegState(MI.getOperand(0).isRenamable()))
1235             .add(MI.getOperand(1))
1236             .add(MI.getOperand(1));
1237         BuildMI(MBB, MBBI, MI.getDebugLoc(),
1238                 TII->get(Opcode == AArch64::BSPv8i8 ? AArch64::BSLv8i8
1239                                                     : AArch64::BSLv16i8))
1240             .add(MI.getOperand(0))
1241             .addReg(DstReg,
1242                     RegState::Kill |
1243                         getRenamableRegState(MI.getOperand(0).isRenamable()))
1244             .add(MI.getOperand(2))
1245             .add(MI.getOperand(3));
1246       }
1247     }
1248     MI.eraseFromParent();
1249     return true;
1250   }
1251 
1252   case AArch64::ADDWrr:
1253   case AArch64::SUBWrr:
1254   case AArch64::ADDXrr:
1255   case AArch64::SUBXrr:
1256   case AArch64::ADDSWrr:
1257   case AArch64::SUBSWrr:
1258   case AArch64::ADDSXrr:
1259   case AArch64::SUBSXrr:
1260   case AArch64::ANDWrr:
1261   case AArch64::ANDXrr:
1262   case AArch64::BICWrr:
1263   case AArch64::BICXrr:
1264   case AArch64::ANDSWrr:
1265   case AArch64::ANDSXrr:
1266   case AArch64::BICSWrr:
1267   case AArch64::BICSXrr:
1268   case AArch64::EONWrr:
1269   case AArch64::EONXrr:
1270   case AArch64::EORWrr:
1271   case AArch64::EORXrr:
1272   case AArch64::ORNWrr:
1273   case AArch64::ORNXrr:
1274   case AArch64::ORRWrr:
1275   case AArch64::ORRXrr: {
1276     unsigned Opcode;
1277     switch (MI.getOpcode()) {
1278     default:
1279       return false;
1280     case AArch64::ADDWrr:      Opcode = AArch64::ADDWrs; break;
1281     case AArch64::SUBWrr:      Opcode = AArch64::SUBWrs; break;
1282     case AArch64::ADDXrr:      Opcode = AArch64::ADDXrs; break;
1283     case AArch64::SUBXrr:      Opcode = AArch64::SUBXrs; break;
1284     case AArch64::ADDSWrr:     Opcode = AArch64::ADDSWrs; break;
1285     case AArch64::SUBSWrr:     Opcode = AArch64::SUBSWrs; break;
1286     case AArch64::ADDSXrr:     Opcode = AArch64::ADDSXrs; break;
1287     case AArch64::SUBSXrr:     Opcode = AArch64::SUBSXrs; break;
1288     case AArch64::ANDWrr:      Opcode = AArch64::ANDWrs; break;
1289     case AArch64::ANDXrr:      Opcode = AArch64::ANDXrs; break;
1290     case AArch64::BICWrr:      Opcode = AArch64::BICWrs; break;
1291     case AArch64::BICXrr:      Opcode = AArch64::BICXrs; break;
1292     case AArch64::ANDSWrr:     Opcode = AArch64::ANDSWrs; break;
1293     case AArch64::ANDSXrr:     Opcode = AArch64::ANDSXrs; break;
1294     case AArch64::BICSWrr:     Opcode = AArch64::BICSWrs; break;
1295     case AArch64::BICSXrr:     Opcode = AArch64::BICSXrs; break;
1296     case AArch64::EONWrr:      Opcode = AArch64::EONWrs; break;
1297     case AArch64::EONXrr:      Opcode = AArch64::EONXrs; break;
1298     case AArch64::EORWrr:      Opcode = AArch64::EORWrs; break;
1299     case AArch64::EORXrr:      Opcode = AArch64::EORXrs; break;
1300     case AArch64::ORNWrr:      Opcode = AArch64::ORNWrs; break;
1301     case AArch64::ORNXrr:      Opcode = AArch64::ORNXrs; break;
1302     case AArch64::ORRWrr:      Opcode = AArch64::ORRWrs; break;
1303     case AArch64::ORRXrr:      Opcode = AArch64::ORRXrs; break;
1304     }
1305     MachineFunction &MF = *MBB.getParent();
1306     // Try to create new inst without implicit operands added.
1307     MachineInstr *NewMI = MF.CreateMachineInstr(
1308         TII->get(Opcode), MI.getDebugLoc(), /*NoImplicit=*/true);
1309     MBB.insert(MBBI, NewMI);
1310     MachineInstrBuilder MIB1(MF, NewMI);
1311     MIB1->setPCSections(MF, MI.getPCSections());
1312     MIB1.addReg(MI.getOperand(0).getReg(), RegState::Define)
1313         .add(MI.getOperand(1))
1314         .add(MI.getOperand(2))
1315         .addImm(AArch64_AM::getShifterImm(AArch64_AM::LSL, 0));
1316     transferImpOps(MI, MIB1, MIB1);
1317     if (auto DebugNumber = MI.peekDebugInstrNum())
1318       NewMI->setDebugInstrNum(DebugNumber);
1319     MI.eraseFromParent();
1320     return true;
1321   }
1322 
1323   case AArch64::LOADgot: {
1324     MachineFunction *MF = MBB.getParent();
1325     Register DstReg = MI.getOperand(0).getReg();
1326     const MachineOperand &MO1 = MI.getOperand(1);
1327     unsigned Flags = MO1.getTargetFlags();
1328 
1329     if (MF->getTarget().getCodeModel() == CodeModel::Tiny) {
1330       // Tiny codemodel expand to LDR
1331       MachineInstrBuilder MIB = BuildMI(MBB, MBBI, MI.getDebugLoc(),
1332                                         TII->get(AArch64::LDRXl), DstReg);
1333 
1334       if (MO1.isGlobal()) {
1335         MIB.addGlobalAddress(MO1.getGlobal(), 0, Flags);
1336       } else if (MO1.isSymbol()) {
1337         MIB.addExternalSymbol(MO1.getSymbolName(), Flags);
1338       } else {
1339         assert(MO1.isCPI() &&
1340                "Only expect globals, externalsymbols, or constant pools");
1341         MIB.addConstantPoolIndex(MO1.getIndex(), MO1.getOffset(), Flags);
1342       }
1343     } else {
1344       // Small codemodel expand into ADRP + LDR.
1345       MachineFunction &MF = *MI.getParent()->getParent();
1346       DebugLoc DL = MI.getDebugLoc();
1347       MachineInstrBuilder MIB1 =
1348           BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(AArch64::ADRP), DstReg);
1349 
1350       MachineInstrBuilder MIB2;
1351       if (MF.getSubtarget<AArch64Subtarget>().isTargetILP32()) {
1352         auto TRI = MBB.getParent()->getSubtarget().getRegisterInfo();
1353         unsigned Reg32 = TRI->getSubReg(DstReg, AArch64::sub_32);
1354         unsigned DstFlags = MI.getOperand(0).getTargetFlags();
1355         MIB2 = BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(AArch64::LDRWui))
1356                    .addDef(Reg32)
1357                    .addReg(DstReg, RegState::Kill)
1358                    .addReg(DstReg, DstFlags | RegState::Implicit);
1359       } else {
1360         Register DstReg = MI.getOperand(0).getReg();
1361         MIB2 = BuildMI(MBB, MBBI, DL, TII->get(AArch64::LDRXui))
1362                    .add(MI.getOperand(0))
1363                    .addUse(DstReg, RegState::Kill);
1364       }
1365 
1366       if (MO1.isGlobal()) {
1367         MIB1.addGlobalAddress(MO1.getGlobal(), 0, Flags | AArch64II::MO_PAGE);
1368         MIB2.addGlobalAddress(MO1.getGlobal(), 0,
1369                               Flags | AArch64II::MO_PAGEOFF | AArch64II::MO_NC);
1370       } else if (MO1.isSymbol()) {
1371         MIB1.addExternalSymbol(MO1.getSymbolName(), Flags | AArch64II::MO_PAGE);
1372         MIB2.addExternalSymbol(MO1.getSymbolName(), Flags |
1373                                                         AArch64II::MO_PAGEOFF |
1374                                                         AArch64II::MO_NC);
1375       } else {
1376         assert(MO1.isCPI() &&
1377                "Only expect globals, externalsymbols, or constant pools");
1378         MIB1.addConstantPoolIndex(MO1.getIndex(), MO1.getOffset(),
1379                                   Flags | AArch64II::MO_PAGE);
1380         MIB2.addConstantPoolIndex(MO1.getIndex(), MO1.getOffset(),
1381                                   Flags | AArch64II::MO_PAGEOFF |
1382                                       AArch64II::MO_NC);
1383       }
1384 
1385       transferImpOps(MI, MIB1, MIB2);
1386     }
1387     MI.eraseFromParent();
1388     return true;
1389   }
1390   case AArch64::MOVaddrBA: {
1391     MachineFunction &MF = *MI.getParent()->getParent();
1392     if (MF.getSubtarget<AArch64Subtarget>().isTargetMachO()) {
1393       // blockaddress expressions have to come from a constant pool because the
1394       // largest addend (and hence offset within a function) allowed for ADRP is
1395       // only 8MB.
1396       const BlockAddress *BA = MI.getOperand(1).getBlockAddress();
1397       assert(MI.getOperand(1).getOffset() == 0 && "unexpected offset");
1398 
1399       MachineConstantPool *MCP = MF.getConstantPool();
1400       unsigned CPIdx = MCP->getConstantPoolIndex(BA, Align(8));
1401 
1402       Register DstReg = MI.getOperand(0).getReg();
1403       auto MIB1 =
1404           BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(AArch64::ADRP), DstReg)
1405               .addConstantPoolIndex(CPIdx, 0, AArch64II::MO_PAGE);
1406       auto MIB2 = BuildMI(MBB, MBBI, MI.getDebugLoc(),
1407                           TII->get(AArch64::LDRXui), DstReg)
1408                       .addUse(DstReg)
1409                       .addConstantPoolIndex(
1410                           CPIdx, 0, AArch64II::MO_PAGEOFF | AArch64II::MO_NC);
1411       transferImpOps(MI, MIB1, MIB2);
1412       MI.eraseFromParent();
1413       return true;
1414     }
1415   }
1416     [[fallthrough]];
1417   case AArch64::MOVaddr:
1418   case AArch64::MOVaddrJT:
1419   case AArch64::MOVaddrCP:
1420   case AArch64::MOVaddrTLS:
1421   case AArch64::MOVaddrEXT: {
1422     // Expand into ADRP + ADD.
1423     Register DstReg = MI.getOperand(0).getReg();
1424     assert(DstReg != AArch64::XZR);
1425     MachineInstrBuilder MIB1 =
1426         BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(AArch64::ADRP), DstReg)
1427             .add(MI.getOperand(1));
1428 
1429     if (MI.getOperand(1).getTargetFlags() & AArch64II::MO_TAGGED) {
1430       // MO_TAGGED on the page indicates a tagged address. Set the tag now.
1431       // We do so by creating a MOVK that sets bits 48-63 of the register to
1432       // (global address + 0x100000000 - PC) >> 48. This assumes that we're in
1433       // the small code model so we can assume a binary size of <= 4GB, which
1434       // makes the untagged PC relative offset positive. The binary must also be
1435       // loaded into address range [0, 2^48). Both of these properties need to
1436       // be ensured at runtime when using tagged addresses.
1437       auto Tag = MI.getOperand(1);
1438       Tag.setTargetFlags(AArch64II::MO_PREL | AArch64II::MO_G3);
1439       Tag.setOffset(0x100000000);
1440       BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(AArch64::MOVKXi), DstReg)
1441           .addReg(DstReg)
1442           .add(Tag)
1443           .addImm(48);
1444     }
1445 
1446     MachineInstrBuilder MIB2 =
1447         BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(AArch64::ADDXri))
1448             .add(MI.getOperand(0))
1449             .addReg(DstReg)
1450             .add(MI.getOperand(2))
1451             .addImm(0);
1452 
1453     transferImpOps(MI, MIB1, MIB2);
1454     MI.eraseFromParent();
1455     return true;
1456   }
1457   case AArch64::ADDlowTLS:
1458     // Produce a plain ADD
1459     BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(AArch64::ADDXri))
1460         .add(MI.getOperand(0))
1461         .add(MI.getOperand(1))
1462         .add(MI.getOperand(2))
1463         .addImm(0);
1464     MI.eraseFromParent();
1465     return true;
1466 
1467   case AArch64::MOVbaseTLS: {
1468     Register DstReg = MI.getOperand(0).getReg();
1469     auto SysReg = AArch64SysReg::TPIDR_EL0;
1470     MachineFunction *MF = MBB.getParent();
1471     if (MF->getSubtarget<AArch64Subtarget>().useEL3ForTP())
1472       SysReg = AArch64SysReg::TPIDR_EL3;
1473     else if (MF->getSubtarget<AArch64Subtarget>().useEL2ForTP())
1474       SysReg = AArch64SysReg::TPIDR_EL2;
1475     else if (MF->getSubtarget<AArch64Subtarget>().useEL1ForTP())
1476       SysReg = AArch64SysReg::TPIDR_EL1;
1477     else if (MF->getSubtarget<AArch64Subtarget>().useROEL0ForTP())
1478       SysReg = AArch64SysReg::TPIDRRO_EL0;
1479     BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(AArch64::MRS), DstReg)
1480         .addImm(SysReg);
1481     MI.eraseFromParent();
1482     return true;
1483   }
1484 
1485   case AArch64::MOVi32imm:
1486     return expandMOVImm(MBB, MBBI, 32);
1487   case AArch64::MOVi64imm:
1488     return expandMOVImm(MBB, MBBI, 64);
1489   case AArch64::RET_ReallyLR: {
1490     // Hiding the LR use with RET_ReallyLR may lead to extra kills in the
1491     // function and missing live-ins. We are fine in practice because callee
1492     // saved register handling ensures the register value is restored before
1493     // RET, but we need the undef flag here to appease the MachineVerifier
1494     // liveness checks.
1495     MachineInstrBuilder MIB =
1496         BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(AArch64::RET))
1497           .addReg(AArch64::LR, RegState::Undef);
1498     transferImpOps(MI, MIB, MIB);
1499     MI.eraseFromParent();
1500     return true;
1501   }
1502   case AArch64::CMP_SWAP_8:
1503     return expandCMP_SWAP(MBB, MBBI, AArch64::LDAXRB, AArch64::STLXRB,
1504                           AArch64::SUBSWrx,
1505                           AArch64_AM::getArithExtendImm(AArch64_AM::UXTB, 0),
1506                           AArch64::WZR, NextMBBI);
1507   case AArch64::CMP_SWAP_16:
1508     return expandCMP_SWAP(MBB, MBBI, AArch64::LDAXRH, AArch64::STLXRH,
1509                           AArch64::SUBSWrx,
1510                           AArch64_AM::getArithExtendImm(AArch64_AM::UXTH, 0),
1511                           AArch64::WZR, NextMBBI);
1512   case AArch64::CMP_SWAP_32:
1513     return expandCMP_SWAP(MBB, MBBI, AArch64::LDAXRW, AArch64::STLXRW,
1514                           AArch64::SUBSWrs,
1515                           AArch64_AM::getShifterImm(AArch64_AM::LSL, 0),
1516                           AArch64::WZR, NextMBBI);
1517   case AArch64::CMP_SWAP_64:
1518     return expandCMP_SWAP(MBB, MBBI,
1519                           AArch64::LDAXRX, AArch64::STLXRX, AArch64::SUBSXrs,
1520                           AArch64_AM::getShifterImm(AArch64_AM::LSL, 0),
1521                           AArch64::XZR, NextMBBI);
1522   case AArch64::CMP_SWAP_128:
1523   case AArch64::CMP_SWAP_128_RELEASE:
1524   case AArch64::CMP_SWAP_128_ACQUIRE:
1525   case AArch64::CMP_SWAP_128_MONOTONIC:
1526     return expandCMP_SWAP_128(MBB, MBBI, NextMBBI);
1527 
1528   case AArch64::AESMCrrTied:
1529   case AArch64::AESIMCrrTied: {
1530     MachineInstrBuilder MIB =
1531     BuildMI(MBB, MBBI, MI.getDebugLoc(),
1532             TII->get(Opcode == AArch64::AESMCrrTied ? AArch64::AESMCrr :
1533                                                       AArch64::AESIMCrr))
1534       .add(MI.getOperand(0))
1535       .add(MI.getOperand(1));
1536     transferImpOps(MI, MIB, MIB);
1537     MI.eraseFromParent();
1538     return true;
1539    }
1540    case AArch64::IRGstack: {
1541      MachineFunction &MF = *MBB.getParent();
1542      const AArch64FunctionInfo *AFI = MF.getInfo<AArch64FunctionInfo>();
1543      const AArch64FrameLowering *TFI =
1544          MF.getSubtarget<AArch64Subtarget>().getFrameLowering();
1545 
1546      // IRG does not allow immediate offset. getTaggedBasePointerOffset should
1547      // almost always point to SP-after-prologue; if not, emit a longer
1548      // instruction sequence.
1549      int BaseOffset = -AFI->getTaggedBasePointerOffset();
1550      Register FrameReg;
1551      StackOffset FrameRegOffset = TFI->resolveFrameOffsetReference(
1552          MF, BaseOffset, false /*isFixed*/, false /*isSVE*/, FrameReg,
1553          /*PreferFP=*/false,
1554          /*ForSimm=*/true);
1555      Register SrcReg = FrameReg;
1556      if (FrameRegOffset) {
1557        // Use output register as temporary.
1558        SrcReg = MI.getOperand(0).getReg();
1559        emitFrameOffset(MBB, &MI, MI.getDebugLoc(), SrcReg, FrameReg,
1560                        FrameRegOffset, TII);
1561      }
1562      BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(AArch64::IRG))
1563          .add(MI.getOperand(0))
1564          .addUse(SrcReg)
1565          .add(MI.getOperand(2));
1566      MI.eraseFromParent();
1567      return true;
1568    }
1569    case AArch64::TAGPstack: {
1570      int64_t Offset = MI.getOperand(2).getImm();
1571      BuildMI(MBB, MBBI, MI.getDebugLoc(),
1572              TII->get(Offset >= 0 ? AArch64::ADDG : AArch64::SUBG))
1573          .add(MI.getOperand(0))
1574          .add(MI.getOperand(1))
1575          .addImm(std::abs(Offset))
1576          .add(MI.getOperand(4));
1577      MI.eraseFromParent();
1578      return true;
1579    }
1580    case AArch64::STGloop_wback:
1581    case AArch64::STZGloop_wback:
1582      return expandSetTagLoop(MBB, MBBI, NextMBBI);
1583    case AArch64::STGloop:
1584    case AArch64::STZGloop:
1585      report_fatal_error(
1586          "Non-writeback variants of STGloop / STZGloop should not "
1587          "survive past PrologEpilogInserter.");
1588    case AArch64::STR_ZZZZXI:
1589      return expandSVESpillFill(MBB, MBBI, AArch64::STR_ZXI, 4);
1590    case AArch64::STR_ZZZXI:
1591      return expandSVESpillFill(MBB, MBBI, AArch64::STR_ZXI, 3);
1592    case AArch64::STR_ZZXI:
1593      return expandSVESpillFill(MBB, MBBI, AArch64::STR_ZXI, 2);
1594    case AArch64::STR_PPXI:
1595      return expandSVESpillFill(MBB, MBBI, AArch64::STR_PXI, 2);
1596    case AArch64::LDR_ZZZZXI:
1597      return expandSVESpillFill(MBB, MBBI, AArch64::LDR_ZXI, 4);
1598    case AArch64::LDR_ZZZXI:
1599      return expandSVESpillFill(MBB, MBBI, AArch64::LDR_ZXI, 3);
1600    case AArch64::LDR_ZZXI:
1601      return expandSVESpillFill(MBB, MBBI, AArch64::LDR_ZXI, 2);
1602    case AArch64::LDR_PPXI:
1603      return expandSVESpillFill(MBB, MBBI, AArch64::LDR_PXI, 2);
1604    case AArch64::BLR_RVMARKER:
1605    case AArch64::BLRA_RVMARKER:
1606      return expandCALL_RVMARKER(MBB, MBBI);
1607    case AArch64::BLR_BTI:
1608      return expandCALL_BTI(MBB, MBBI);
1609    case AArch64::StoreSwiftAsyncContext:
1610      return expandStoreSwiftAsyncContext(MBB, MBBI);
1611    case AArch64::RestoreZAPseudo: {
1612      auto *NewMBB = expandRestoreZA(MBB, MBBI);
1613      if (NewMBB != &MBB)
1614        NextMBBI = MBB.end(); // The NextMBBI iterator is invalidated.
1615      return true;
1616    }
1617    case AArch64::MSRpstatePseudo: {
1618      auto *NewMBB = expandCondSMToggle(MBB, MBBI);
1619      if (NewMBB != &MBB)
1620        NextMBBI = MBB.end(); // The NextMBBI iterator is invalidated.
1621      return true;
1622    }
1623    case AArch64::COALESCER_BARRIER_FPR16:
1624    case AArch64::COALESCER_BARRIER_FPR32:
1625    case AArch64::COALESCER_BARRIER_FPR64:
1626    case AArch64::COALESCER_BARRIER_FPR128:
1627      MI.eraseFromParent();
1628      return true;
1629    case AArch64::LD1B_2Z_IMM_PSEUDO:
1630      return expandMultiVecPseudo(
1631          MBB, MBBI, AArch64::ZPR2RegClass, AArch64::ZPR2StridedRegClass,
1632          AArch64::LD1B_2Z_IMM, AArch64::LD1B_2Z_STRIDED_IMM);
1633    case AArch64::LD1H_2Z_IMM_PSEUDO:
1634      return expandMultiVecPseudo(
1635          MBB, MBBI, AArch64::ZPR2RegClass, AArch64::ZPR2StridedRegClass,
1636          AArch64::LD1H_2Z_IMM, AArch64::LD1H_2Z_STRIDED_IMM);
1637    case AArch64::LD1W_2Z_IMM_PSEUDO:
1638      return expandMultiVecPseudo(
1639          MBB, MBBI, AArch64::ZPR2RegClass, AArch64::ZPR2StridedRegClass,
1640          AArch64::LD1W_2Z_IMM, AArch64::LD1W_2Z_STRIDED_IMM);
1641    case AArch64::LD1D_2Z_IMM_PSEUDO:
1642      return expandMultiVecPseudo(
1643          MBB, MBBI, AArch64::ZPR2RegClass, AArch64::ZPR2StridedRegClass,
1644          AArch64::LD1D_2Z_IMM, AArch64::LD1D_2Z_STRIDED_IMM);
1645    case AArch64::LDNT1B_2Z_IMM_PSEUDO:
1646      return expandMultiVecPseudo(
1647          MBB, MBBI, AArch64::ZPR2RegClass, AArch64::ZPR2StridedRegClass,
1648          AArch64::LDNT1B_2Z_IMM, AArch64::LDNT1B_2Z_STRIDED_IMM);
1649    case AArch64::LDNT1H_2Z_IMM_PSEUDO:
1650      return expandMultiVecPseudo(
1651          MBB, MBBI, AArch64::ZPR2RegClass, AArch64::ZPR2StridedRegClass,
1652          AArch64::LDNT1H_2Z_IMM, AArch64::LDNT1H_2Z_STRIDED_IMM);
1653    case AArch64::LDNT1W_2Z_IMM_PSEUDO:
1654      return expandMultiVecPseudo(
1655          MBB, MBBI, AArch64::ZPR2RegClass, AArch64::ZPR2StridedRegClass,
1656          AArch64::LDNT1W_2Z_IMM, AArch64::LDNT1W_2Z_STRIDED_IMM);
1657    case AArch64::LDNT1D_2Z_IMM_PSEUDO:
1658      return expandMultiVecPseudo(
1659          MBB, MBBI, AArch64::ZPR2RegClass, AArch64::ZPR2StridedRegClass,
1660          AArch64::LDNT1D_2Z_IMM, AArch64::LDNT1D_2Z_STRIDED_IMM);
1661    case AArch64::LD1B_2Z_PSEUDO:
1662      return expandMultiVecPseudo(MBB, MBBI, AArch64::ZPR2RegClass,
1663                                  AArch64::ZPR2StridedRegClass, AArch64::LD1B_2Z,
1664                                  AArch64::LD1B_2Z_STRIDED);
1665    case AArch64::LD1H_2Z_PSEUDO:
1666      return expandMultiVecPseudo(MBB, MBBI, AArch64::ZPR2RegClass,
1667                                  AArch64::ZPR2StridedRegClass, AArch64::LD1H_2Z,
1668                                  AArch64::LD1H_2Z_STRIDED);
1669    case AArch64::LD1W_2Z_PSEUDO:
1670      return expandMultiVecPseudo(MBB, MBBI, AArch64::ZPR2RegClass,
1671                                  AArch64::ZPR2StridedRegClass, AArch64::LD1W_2Z,
1672                                  AArch64::LD1W_2Z_STRIDED);
1673    case AArch64::LD1D_2Z_PSEUDO:
1674      return expandMultiVecPseudo(MBB, MBBI, AArch64::ZPR2RegClass,
1675                                  AArch64::ZPR2StridedRegClass, AArch64::LD1D_2Z,
1676                                  AArch64::LD1D_2Z_STRIDED);
1677    case AArch64::LDNT1B_2Z_PSEUDO:
1678      return expandMultiVecPseudo(
1679          MBB, MBBI, AArch64::ZPR2RegClass, AArch64::ZPR2StridedRegClass,
1680          AArch64::LDNT1B_2Z, AArch64::LDNT1B_2Z_STRIDED);
1681    case AArch64::LDNT1H_2Z_PSEUDO:
1682      return expandMultiVecPseudo(
1683          MBB, MBBI, AArch64::ZPR2RegClass, AArch64::ZPR2StridedRegClass,
1684          AArch64::LDNT1H_2Z, AArch64::LDNT1H_2Z_STRIDED);
1685    case AArch64::LDNT1W_2Z_PSEUDO:
1686      return expandMultiVecPseudo(
1687          MBB, MBBI, AArch64::ZPR2RegClass, AArch64::ZPR2StridedRegClass,
1688          AArch64::LDNT1W_2Z, AArch64::LDNT1W_2Z_STRIDED);
1689    case AArch64::LDNT1D_2Z_PSEUDO:
1690      return expandMultiVecPseudo(
1691          MBB, MBBI, AArch64::ZPR2RegClass, AArch64::ZPR2StridedRegClass,
1692          AArch64::LDNT1D_2Z, AArch64::LDNT1D_2Z_STRIDED);
1693    case AArch64::LD1B_4Z_IMM_PSEUDO:
1694      return expandMultiVecPseudo(
1695          MBB, MBBI, AArch64::ZPR4RegClass, AArch64::ZPR4StridedRegClass,
1696          AArch64::LD1B_4Z_IMM, AArch64::LD1B_4Z_STRIDED_IMM);
1697    case AArch64::LD1H_4Z_IMM_PSEUDO:
1698      return expandMultiVecPseudo(
1699          MBB, MBBI, AArch64::ZPR4RegClass, AArch64::ZPR4StridedRegClass,
1700          AArch64::LD1H_4Z_IMM, AArch64::LD1H_4Z_STRIDED_IMM);
1701    case AArch64::LD1W_4Z_IMM_PSEUDO:
1702      return expandMultiVecPseudo(
1703          MBB, MBBI, AArch64::ZPR4RegClass, AArch64::ZPR4StridedRegClass,
1704          AArch64::LD1W_4Z_IMM, AArch64::LD1W_4Z_STRIDED_IMM);
1705    case AArch64::LD1D_4Z_IMM_PSEUDO:
1706      return expandMultiVecPseudo(
1707          MBB, MBBI, AArch64::ZPR4RegClass, AArch64::ZPR4StridedRegClass,
1708          AArch64::LD1D_4Z_IMM, AArch64::LD1D_4Z_STRIDED_IMM);
1709    case AArch64::LDNT1B_4Z_IMM_PSEUDO:
1710      return expandMultiVecPseudo(
1711          MBB, MBBI, AArch64::ZPR4RegClass, AArch64::ZPR4StridedRegClass,
1712          AArch64::LDNT1B_4Z_IMM, AArch64::LDNT1B_4Z_STRIDED_IMM);
1713    case AArch64::LDNT1H_4Z_IMM_PSEUDO:
1714      return expandMultiVecPseudo(
1715          MBB, MBBI, AArch64::ZPR4RegClass, AArch64::ZPR4StridedRegClass,
1716          AArch64::LDNT1H_4Z_IMM, AArch64::LDNT1H_4Z_STRIDED_IMM);
1717    case AArch64::LDNT1W_4Z_IMM_PSEUDO:
1718      return expandMultiVecPseudo(
1719          MBB, MBBI, AArch64::ZPR4RegClass, AArch64::ZPR4StridedRegClass,
1720          AArch64::LDNT1W_4Z_IMM, AArch64::LDNT1W_4Z_STRIDED_IMM);
1721    case AArch64::LDNT1D_4Z_IMM_PSEUDO:
1722      return expandMultiVecPseudo(
1723          MBB, MBBI, AArch64::ZPR4RegClass, AArch64::ZPR4StridedRegClass,
1724          AArch64::LDNT1D_4Z_IMM, AArch64::LDNT1D_4Z_STRIDED_IMM);
1725    case AArch64::LD1B_4Z_PSEUDO:
1726      return expandMultiVecPseudo(MBB, MBBI, AArch64::ZPR4RegClass,
1727                                  AArch64::ZPR4StridedRegClass, AArch64::LD1B_4Z,
1728                                  AArch64::LD1B_4Z_STRIDED);
1729    case AArch64::LD1H_4Z_PSEUDO:
1730      return expandMultiVecPseudo(MBB, MBBI, AArch64::ZPR4RegClass,
1731                                  AArch64::ZPR4StridedRegClass, AArch64::LD1H_4Z,
1732                                  AArch64::LD1H_4Z_STRIDED);
1733    case AArch64::LD1W_4Z_PSEUDO:
1734      return expandMultiVecPseudo(MBB, MBBI, AArch64::ZPR4RegClass,
1735                                  AArch64::ZPR4StridedRegClass, AArch64::LD1W_4Z,
1736                                  AArch64::LD1W_4Z_STRIDED);
1737    case AArch64::LD1D_4Z_PSEUDO:
1738      return expandMultiVecPseudo(MBB, MBBI, AArch64::ZPR4RegClass,
1739                                  AArch64::ZPR4StridedRegClass, AArch64::LD1D_4Z,
1740                                  AArch64::LD1D_4Z_STRIDED);
1741    case AArch64::LDNT1B_4Z_PSEUDO:
1742      return expandMultiVecPseudo(
1743          MBB, MBBI, AArch64::ZPR4RegClass, AArch64::ZPR4StridedRegClass,
1744          AArch64::LDNT1B_4Z, AArch64::LDNT1B_4Z_STRIDED);
1745    case AArch64::LDNT1H_4Z_PSEUDO:
1746      return expandMultiVecPseudo(
1747          MBB, MBBI, AArch64::ZPR4RegClass, AArch64::ZPR4StridedRegClass,
1748          AArch64::LDNT1H_4Z, AArch64::LDNT1H_4Z_STRIDED);
1749    case AArch64::LDNT1W_4Z_PSEUDO:
1750      return expandMultiVecPseudo(
1751          MBB, MBBI, AArch64::ZPR4RegClass, AArch64::ZPR4StridedRegClass,
1752          AArch64::LDNT1W_4Z, AArch64::LDNT1W_4Z_STRIDED);
1753    case AArch64::LDNT1D_4Z_PSEUDO:
1754      return expandMultiVecPseudo(
1755          MBB, MBBI, AArch64::ZPR4RegClass, AArch64::ZPR4StridedRegClass,
1756          AArch64::LDNT1D_4Z, AArch64::LDNT1D_4Z_STRIDED);
1757    case AArch64::FORM_TRANSPOSED_REG_TUPLE_X2_PSEUDO:
1758      return expandFormTuplePseudo(MBB, MBBI, NextMBBI, 2);
1759    case AArch64::FORM_TRANSPOSED_REG_TUPLE_X4_PSEUDO:
1760      return expandFormTuplePseudo(MBB, MBBI, NextMBBI, 4);
1761   }
1762   return false;
1763 }
1764 
1765 /// Iterate over the instructions in basic block MBB and expand any
1766 /// pseudo instructions.  Return true if anything was modified.
1767 bool AArch64ExpandPseudo::expandMBB(MachineBasicBlock &MBB) {
1768   bool Modified = false;
1769 
1770   MachineBasicBlock::iterator MBBI = MBB.begin(), E = MBB.end();
1771   while (MBBI != E) {
1772     MachineBasicBlock::iterator NMBBI = std::next(MBBI);
1773     Modified |= expandMI(MBB, MBBI, NMBBI);
1774     MBBI = NMBBI;
1775   }
1776 
1777   return Modified;
1778 }
1779 
1780 bool AArch64ExpandPseudo::runOnMachineFunction(MachineFunction &MF) {
1781   TII = static_cast<const AArch64InstrInfo *>(MF.getSubtarget().getInstrInfo());
1782 
1783   bool Modified = false;
1784   for (auto &MBB : MF)
1785     Modified |= expandMBB(MBB);
1786   return Modified;
1787 }
1788 
1789 /// Returns an instance of the pseudo instruction expansion pass.
1790 FunctionPass *llvm::createAArch64ExpandPseudoPass() {
1791   return new AArch64ExpandPseudo();
1792 }
1793