1 //===- AArch64ExpandPseudoInsts.cpp - Expand pseudo instructions ----------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This file contains a pass that expands pseudo instructions into target
10 // instructions to allow proper scheduling and other late optimizations. This
11 // pass should be run after register allocation but before the post-regalloc
12 // scheduling pass.
13 //
14 //===----------------------------------------------------------------------===//
15
16 #include "AArch64ExpandImm.h"
17 #include "AArch64InstrInfo.h"
18 #include "AArch64MachineFunctionInfo.h"
19 #include "AArch64Subtarget.h"
20 #include "MCTargetDesc/AArch64AddressingModes.h"
21 #include "Utils/AArch64BaseInfo.h"
22 #include "llvm/ADT/DenseMap.h"
23 #include "llvm/ADT/Triple.h"
24 #include "llvm/CodeGen/LivePhysRegs.h"
25 #include "llvm/CodeGen/MachineBasicBlock.h"
26 #include "llvm/CodeGen/MachineConstantPool.h"
27 #include "llvm/CodeGen/MachineFunction.h"
28 #include "llvm/CodeGen/MachineFunctionPass.h"
29 #include "llvm/CodeGen/MachineInstr.h"
30 #include "llvm/CodeGen/MachineInstrBuilder.h"
31 #include "llvm/CodeGen/MachineOperand.h"
32 #include "llvm/CodeGen/TargetSubtargetInfo.h"
33 #include "llvm/IR/DebugLoc.h"
34 #include "llvm/MC/MCInstrDesc.h"
35 #include "llvm/Pass.h"
36 #include "llvm/Support/CodeGen.h"
37 #include "llvm/Support/MathExtras.h"
38 #include "llvm/Target/TargetMachine.h"
39 #include <cassert>
40 #include <cstdint>
41 #include <iterator>
42 #include <limits>
43 #include <utility>
44
45 using namespace llvm;
46
47 #define AARCH64_EXPAND_PSEUDO_NAME "AArch64 pseudo instruction expansion pass"
48
49 namespace {
50
51 class AArch64ExpandPseudo : public MachineFunctionPass {
52 public:
53 const AArch64InstrInfo *TII;
54
55 static char ID;
56
AArch64ExpandPseudo()57 AArch64ExpandPseudo() : MachineFunctionPass(ID) {
58 initializeAArch64ExpandPseudoPass(*PassRegistry::getPassRegistry());
59 }
60
61 bool runOnMachineFunction(MachineFunction &Fn) override;
62
getPassName() const63 StringRef getPassName() const override { return AARCH64_EXPAND_PSEUDO_NAME; }
64
65 private:
66 bool expandMBB(MachineBasicBlock &MBB);
67 bool expandMI(MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI,
68 MachineBasicBlock::iterator &NextMBBI);
69 bool expandMOVImm(MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI,
70 unsigned BitSize);
71
72 bool expand_DestructiveOp(MachineInstr &MI, MachineBasicBlock &MBB,
73 MachineBasicBlock::iterator MBBI);
74 bool expandCMP_SWAP(MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI,
75 unsigned LdarOp, unsigned StlrOp, unsigned CmpOp,
76 unsigned ExtendImm, unsigned ZeroReg,
77 MachineBasicBlock::iterator &NextMBBI);
78 bool expandCMP_SWAP_128(MachineBasicBlock &MBB,
79 MachineBasicBlock::iterator MBBI,
80 MachineBasicBlock::iterator &NextMBBI);
81 bool expandSetTagLoop(MachineBasicBlock &MBB,
82 MachineBasicBlock::iterator MBBI,
83 MachineBasicBlock::iterator &NextMBBI);
84 bool expandSVESpillFill(MachineBasicBlock &MBB,
85 MachineBasicBlock::iterator MBBI, unsigned Opc,
86 unsigned N);
87 bool expandCALL_RVMARKER(MachineBasicBlock &MBB,
88 MachineBasicBlock::iterator MBBI);
89 bool expandCALL_BTI(MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI);
90 bool expandStoreSwiftAsyncContext(MachineBasicBlock &MBB,
91 MachineBasicBlock::iterator MBBI);
92 MachineBasicBlock *expandRestoreZA(MachineBasicBlock &MBB,
93 MachineBasicBlock::iterator MBBI);
94 MachineBasicBlock *expandCondSMToggle(MachineBasicBlock &MBB,
95 MachineBasicBlock::iterator MBBI);
96 };
97
98 } // end anonymous namespace
99
100 char AArch64ExpandPseudo::ID = 0;
101
102 INITIALIZE_PASS(AArch64ExpandPseudo, "aarch64-expand-pseudo",
103 AARCH64_EXPAND_PSEUDO_NAME, false, false)
104
105 /// Transfer implicit operands on the pseudo instruction to the
106 /// instructions created from the expansion.
transferImpOps(MachineInstr & OldMI,MachineInstrBuilder & UseMI,MachineInstrBuilder & DefMI)107 static void transferImpOps(MachineInstr &OldMI, MachineInstrBuilder &UseMI,
108 MachineInstrBuilder &DefMI) {
109 const MCInstrDesc &Desc = OldMI.getDesc();
110 for (const MachineOperand &MO :
111 llvm::drop_begin(OldMI.operands(), Desc.getNumOperands())) {
112 assert(MO.isReg() && MO.getReg());
113 if (MO.isUse())
114 UseMI.add(MO);
115 else
116 DefMI.add(MO);
117 }
118 }
119
120 /// Expand a MOVi32imm or MOVi64imm pseudo instruction to one or more
121 /// real move-immediate instructions to synthesize the immediate.
expandMOVImm(MachineBasicBlock & MBB,MachineBasicBlock::iterator MBBI,unsigned BitSize)122 bool AArch64ExpandPseudo::expandMOVImm(MachineBasicBlock &MBB,
123 MachineBasicBlock::iterator MBBI,
124 unsigned BitSize) {
125 MachineInstr &MI = *MBBI;
126 Register DstReg = MI.getOperand(0).getReg();
127 uint64_t RenamableState =
128 MI.getOperand(0).isRenamable() ? RegState::Renamable : 0;
129 uint64_t Imm = MI.getOperand(1).getImm();
130
131 if (DstReg == AArch64::XZR || DstReg == AArch64::WZR) {
132 // Useless def, and we don't want to risk creating an invalid ORR (which
133 // would really write to sp).
134 MI.eraseFromParent();
135 return true;
136 }
137
138 SmallVector<AArch64_IMM::ImmInsnModel, 4> Insn;
139 AArch64_IMM::expandMOVImm(Imm, BitSize, Insn);
140 assert(Insn.size() != 0);
141
142 SmallVector<MachineInstrBuilder, 4> MIBS;
143 for (auto I = Insn.begin(), E = Insn.end(); I != E; ++I) {
144 bool LastItem = std::next(I) == E;
145 switch (I->Opcode)
146 {
147 default: llvm_unreachable("unhandled!"); break;
148
149 case AArch64::ORRWri:
150 case AArch64::ORRXri:
151 MIBS.push_back(BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(I->Opcode))
152 .add(MI.getOperand(0))
153 .addReg(BitSize == 32 ? AArch64::WZR : AArch64::XZR)
154 .addImm(I->Op2));
155 break;
156 case AArch64::MOVNWi:
157 case AArch64::MOVNXi:
158 case AArch64::MOVZWi:
159 case AArch64::MOVZXi: {
160 bool DstIsDead = MI.getOperand(0).isDead();
161 MIBS.push_back(BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(I->Opcode))
162 .addReg(DstReg, RegState::Define |
163 getDeadRegState(DstIsDead && LastItem) |
164 RenamableState)
165 .addImm(I->Op1)
166 .addImm(I->Op2));
167 } break;
168 case AArch64::MOVKWi:
169 case AArch64::MOVKXi: {
170 Register DstReg = MI.getOperand(0).getReg();
171 bool DstIsDead = MI.getOperand(0).isDead();
172 MIBS.push_back(BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(I->Opcode))
173 .addReg(DstReg,
174 RegState::Define |
175 getDeadRegState(DstIsDead && LastItem) |
176 RenamableState)
177 .addReg(DstReg)
178 .addImm(I->Op1)
179 .addImm(I->Op2));
180 } break;
181 }
182 }
183 transferImpOps(MI, MIBS.front(), MIBS.back());
184 MI.eraseFromParent();
185 return true;
186 }
187
expandCMP_SWAP(MachineBasicBlock & MBB,MachineBasicBlock::iterator MBBI,unsigned LdarOp,unsigned StlrOp,unsigned CmpOp,unsigned ExtendImm,unsigned ZeroReg,MachineBasicBlock::iterator & NextMBBI)188 bool AArch64ExpandPseudo::expandCMP_SWAP(
189 MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, unsigned LdarOp,
190 unsigned StlrOp, unsigned CmpOp, unsigned ExtendImm, unsigned ZeroReg,
191 MachineBasicBlock::iterator &NextMBBI) {
192 MachineInstr &MI = *MBBI;
193 MIMetadata MIMD(MI);
194 const MachineOperand &Dest = MI.getOperand(0);
195 Register StatusReg = MI.getOperand(1).getReg();
196 bool StatusDead = MI.getOperand(1).isDead();
197 // Duplicating undef operands into 2 instructions does not guarantee the same
198 // value on both; However undef should be replaced by xzr anyway.
199 assert(!MI.getOperand(2).isUndef() && "cannot handle undef");
200 Register AddrReg = MI.getOperand(2).getReg();
201 Register DesiredReg = MI.getOperand(3).getReg();
202 Register NewReg = MI.getOperand(4).getReg();
203
204 MachineFunction *MF = MBB.getParent();
205 auto LoadCmpBB = MF->CreateMachineBasicBlock(MBB.getBasicBlock());
206 auto StoreBB = MF->CreateMachineBasicBlock(MBB.getBasicBlock());
207 auto DoneBB = MF->CreateMachineBasicBlock(MBB.getBasicBlock());
208
209 MF->insert(++MBB.getIterator(), LoadCmpBB);
210 MF->insert(++LoadCmpBB->getIterator(), StoreBB);
211 MF->insert(++StoreBB->getIterator(), DoneBB);
212
213 // .Lloadcmp:
214 // mov wStatus, 0
215 // ldaxr xDest, [xAddr]
216 // cmp xDest, xDesired
217 // b.ne .Ldone
218 if (!StatusDead)
219 BuildMI(LoadCmpBB, MIMD, TII->get(AArch64::MOVZWi), StatusReg)
220 .addImm(0).addImm(0);
221 BuildMI(LoadCmpBB, MIMD, TII->get(LdarOp), Dest.getReg())
222 .addReg(AddrReg);
223 BuildMI(LoadCmpBB, MIMD, TII->get(CmpOp), ZeroReg)
224 .addReg(Dest.getReg(), getKillRegState(Dest.isDead()))
225 .addReg(DesiredReg)
226 .addImm(ExtendImm);
227 BuildMI(LoadCmpBB, MIMD, TII->get(AArch64::Bcc))
228 .addImm(AArch64CC::NE)
229 .addMBB(DoneBB)
230 .addReg(AArch64::NZCV, RegState::Implicit | RegState::Kill);
231 LoadCmpBB->addSuccessor(DoneBB);
232 LoadCmpBB->addSuccessor(StoreBB);
233
234 // .Lstore:
235 // stlxr wStatus, xNew, [xAddr]
236 // cbnz wStatus, .Lloadcmp
237 BuildMI(StoreBB, MIMD, TII->get(StlrOp), StatusReg)
238 .addReg(NewReg)
239 .addReg(AddrReg);
240 BuildMI(StoreBB, MIMD, TII->get(AArch64::CBNZW))
241 .addReg(StatusReg, getKillRegState(StatusDead))
242 .addMBB(LoadCmpBB);
243 StoreBB->addSuccessor(LoadCmpBB);
244 StoreBB->addSuccessor(DoneBB);
245
246 DoneBB->splice(DoneBB->end(), &MBB, MI, MBB.end());
247 DoneBB->transferSuccessors(&MBB);
248
249 MBB.addSuccessor(LoadCmpBB);
250
251 NextMBBI = MBB.end();
252 MI.eraseFromParent();
253
254 // Recompute livein lists.
255 LivePhysRegs LiveRegs;
256 computeAndAddLiveIns(LiveRegs, *DoneBB);
257 computeAndAddLiveIns(LiveRegs, *StoreBB);
258 computeAndAddLiveIns(LiveRegs, *LoadCmpBB);
259 // Do an extra pass around the loop to get loop carried registers right.
260 StoreBB->clearLiveIns();
261 computeAndAddLiveIns(LiveRegs, *StoreBB);
262 LoadCmpBB->clearLiveIns();
263 computeAndAddLiveIns(LiveRegs, *LoadCmpBB);
264
265 return true;
266 }
267
expandCMP_SWAP_128(MachineBasicBlock & MBB,MachineBasicBlock::iterator MBBI,MachineBasicBlock::iterator & NextMBBI)268 bool AArch64ExpandPseudo::expandCMP_SWAP_128(
269 MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI,
270 MachineBasicBlock::iterator &NextMBBI) {
271 MachineInstr &MI = *MBBI;
272 MIMetadata MIMD(MI);
273 MachineOperand &DestLo = MI.getOperand(0);
274 MachineOperand &DestHi = MI.getOperand(1);
275 Register StatusReg = MI.getOperand(2).getReg();
276 bool StatusDead = MI.getOperand(2).isDead();
277 // Duplicating undef operands into 2 instructions does not guarantee the same
278 // value on both; However undef should be replaced by xzr anyway.
279 assert(!MI.getOperand(3).isUndef() && "cannot handle undef");
280 Register AddrReg = MI.getOperand(3).getReg();
281 Register DesiredLoReg = MI.getOperand(4).getReg();
282 Register DesiredHiReg = MI.getOperand(5).getReg();
283 Register NewLoReg = MI.getOperand(6).getReg();
284 Register NewHiReg = MI.getOperand(7).getReg();
285
286 unsigned LdxpOp, StxpOp;
287
288 switch (MI.getOpcode()) {
289 case AArch64::CMP_SWAP_128_MONOTONIC:
290 LdxpOp = AArch64::LDXPX;
291 StxpOp = AArch64::STXPX;
292 break;
293 case AArch64::CMP_SWAP_128_RELEASE:
294 LdxpOp = AArch64::LDXPX;
295 StxpOp = AArch64::STLXPX;
296 break;
297 case AArch64::CMP_SWAP_128_ACQUIRE:
298 LdxpOp = AArch64::LDAXPX;
299 StxpOp = AArch64::STXPX;
300 break;
301 case AArch64::CMP_SWAP_128:
302 LdxpOp = AArch64::LDAXPX;
303 StxpOp = AArch64::STLXPX;
304 break;
305 default:
306 llvm_unreachable("Unexpected opcode");
307 }
308
309 MachineFunction *MF = MBB.getParent();
310 auto LoadCmpBB = MF->CreateMachineBasicBlock(MBB.getBasicBlock());
311 auto StoreBB = MF->CreateMachineBasicBlock(MBB.getBasicBlock());
312 auto FailBB = MF->CreateMachineBasicBlock(MBB.getBasicBlock());
313 auto DoneBB = MF->CreateMachineBasicBlock(MBB.getBasicBlock());
314
315 MF->insert(++MBB.getIterator(), LoadCmpBB);
316 MF->insert(++LoadCmpBB->getIterator(), StoreBB);
317 MF->insert(++StoreBB->getIterator(), FailBB);
318 MF->insert(++FailBB->getIterator(), DoneBB);
319
320 // .Lloadcmp:
321 // ldaxp xDestLo, xDestHi, [xAddr]
322 // cmp xDestLo, xDesiredLo
323 // sbcs xDestHi, xDesiredHi
324 // b.ne .Ldone
325 BuildMI(LoadCmpBB, MIMD, TII->get(LdxpOp))
326 .addReg(DestLo.getReg(), RegState::Define)
327 .addReg(DestHi.getReg(), RegState::Define)
328 .addReg(AddrReg);
329 BuildMI(LoadCmpBB, MIMD, TII->get(AArch64::SUBSXrs), AArch64::XZR)
330 .addReg(DestLo.getReg(), getKillRegState(DestLo.isDead()))
331 .addReg(DesiredLoReg)
332 .addImm(0);
333 BuildMI(LoadCmpBB, MIMD, TII->get(AArch64::CSINCWr), StatusReg)
334 .addUse(AArch64::WZR)
335 .addUse(AArch64::WZR)
336 .addImm(AArch64CC::EQ);
337 BuildMI(LoadCmpBB, MIMD, TII->get(AArch64::SUBSXrs), AArch64::XZR)
338 .addReg(DestHi.getReg(), getKillRegState(DestHi.isDead()))
339 .addReg(DesiredHiReg)
340 .addImm(0);
341 BuildMI(LoadCmpBB, MIMD, TII->get(AArch64::CSINCWr), StatusReg)
342 .addUse(StatusReg, RegState::Kill)
343 .addUse(StatusReg, RegState::Kill)
344 .addImm(AArch64CC::EQ);
345 BuildMI(LoadCmpBB, MIMD, TII->get(AArch64::CBNZW))
346 .addUse(StatusReg, getKillRegState(StatusDead))
347 .addMBB(FailBB);
348 LoadCmpBB->addSuccessor(FailBB);
349 LoadCmpBB->addSuccessor(StoreBB);
350
351 // .Lstore:
352 // stlxp wStatus, xNewLo, xNewHi, [xAddr]
353 // cbnz wStatus, .Lloadcmp
354 BuildMI(StoreBB, MIMD, TII->get(StxpOp), StatusReg)
355 .addReg(NewLoReg)
356 .addReg(NewHiReg)
357 .addReg(AddrReg);
358 BuildMI(StoreBB, MIMD, TII->get(AArch64::CBNZW))
359 .addReg(StatusReg, getKillRegState(StatusDead))
360 .addMBB(LoadCmpBB);
361 BuildMI(StoreBB, MIMD, TII->get(AArch64::B)).addMBB(DoneBB);
362 StoreBB->addSuccessor(LoadCmpBB);
363 StoreBB->addSuccessor(DoneBB);
364
365 // .Lfail:
366 // stlxp wStatus, xDestLo, xDestHi, [xAddr]
367 // cbnz wStatus, .Lloadcmp
368 BuildMI(FailBB, MIMD, TII->get(StxpOp), StatusReg)
369 .addReg(DestLo.getReg())
370 .addReg(DestHi.getReg())
371 .addReg(AddrReg);
372 BuildMI(FailBB, MIMD, TII->get(AArch64::CBNZW))
373 .addReg(StatusReg, getKillRegState(StatusDead))
374 .addMBB(LoadCmpBB);
375 FailBB->addSuccessor(LoadCmpBB);
376 FailBB->addSuccessor(DoneBB);
377
378 DoneBB->splice(DoneBB->end(), &MBB, MI, MBB.end());
379 DoneBB->transferSuccessors(&MBB);
380
381 MBB.addSuccessor(LoadCmpBB);
382
383 NextMBBI = MBB.end();
384 MI.eraseFromParent();
385
386 // Recompute liveness bottom up.
387 LivePhysRegs LiveRegs;
388 computeAndAddLiveIns(LiveRegs, *DoneBB);
389 computeAndAddLiveIns(LiveRegs, *FailBB);
390 computeAndAddLiveIns(LiveRegs, *StoreBB);
391 computeAndAddLiveIns(LiveRegs, *LoadCmpBB);
392
393 // Do an extra pass in the loop to get the loop carried dependencies right.
394 FailBB->clearLiveIns();
395 computeAndAddLiveIns(LiveRegs, *FailBB);
396 StoreBB->clearLiveIns();
397 computeAndAddLiveIns(LiveRegs, *StoreBB);
398 LoadCmpBB->clearLiveIns();
399 computeAndAddLiveIns(LiveRegs, *LoadCmpBB);
400
401 return true;
402 }
403
404 /// \brief Expand Pseudos to Instructions with destructive operands.
405 ///
406 /// This mechanism uses MOVPRFX instructions for zeroing the false lanes
407 /// or for fixing relaxed register allocation conditions to comply with
408 /// the instructions register constraints. The latter case may be cheaper
409 /// than setting the register constraints in the register allocator,
410 /// since that will insert regular MOV instructions rather than MOVPRFX.
411 ///
412 /// Example (after register allocation):
413 ///
414 /// FSUB_ZPZZ_ZERO_B Z0, Pg, Z1, Z0
415 ///
416 /// * The Pseudo FSUB_ZPZZ_ZERO_B maps to FSUB_ZPmZ_B.
417 /// * We cannot map directly to FSUB_ZPmZ_B because the register
418 /// constraints of the instruction are not met.
419 /// * Also the _ZERO specifies the false lanes need to be zeroed.
420 ///
421 /// We first try to see if the destructive operand == result operand,
422 /// if not, we try to swap the operands, e.g.
423 ///
424 /// FSUB_ZPmZ_B Z0, Pg/m, Z0, Z1
425 ///
426 /// But because FSUB_ZPmZ is not commutative, this is semantically
427 /// different, so we need a reverse instruction:
428 ///
429 /// FSUBR_ZPmZ_B Z0, Pg/m, Z0, Z1
430 ///
431 /// Then we implement the zeroing of the false lanes of Z0 by adding
432 /// a zeroing MOVPRFX instruction:
433 ///
434 /// MOVPRFX_ZPzZ_B Z0, Pg/z, Z0
435 /// FSUBR_ZPmZ_B Z0, Pg/m, Z0, Z1
436 ///
437 /// Note that this can only be done for _ZERO or _UNDEF variants where
438 /// we can guarantee the false lanes to be zeroed (by implementing this)
439 /// or that they are undef (don't care / not used), otherwise the
440 /// swapping of operands is illegal because the operation is not
441 /// (or cannot be emulated to be) fully commutative.
expand_DestructiveOp(MachineInstr & MI,MachineBasicBlock & MBB,MachineBasicBlock::iterator MBBI)442 bool AArch64ExpandPseudo::expand_DestructiveOp(
443 MachineInstr &MI,
444 MachineBasicBlock &MBB,
445 MachineBasicBlock::iterator MBBI) {
446 unsigned Opcode = AArch64::getSVEPseudoMap(MI.getOpcode());
447 uint64_t DType = TII->get(Opcode).TSFlags & AArch64::DestructiveInstTypeMask;
448 uint64_t FalseLanes = MI.getDesc().TSFlags & AArch64::FalseLanesMask;
449 bool FalseZero = FalseLanes == AArch64::FalseLanesZero;
450 Register DstReg = MI.getOperand(0).getReg();
451 bool DstIsDead = MI.getOperand(0).isDead();
452 bool UseRev = false;
453 unsigned PredIdx, DOPIdx, SrcIdx, Src2Idx;
454
455 switch (DType) {
456 case AArch64::DestructiveBinaryComm:
457 case AArch64::DestructiveBinaryCommWithRev:
458 if (DstReg == MI.getOperand(3).getReg()) {
459 // FSUB Zd, Pg, Zs1, Zd ==> FSUBR Zd, Pg/m, Zd, Zs1
460 std::tie(PredIdx, DOPIdx, SrcIdx) = std::make_tuple(1, 3, 2);
461 UseRev = true;
462 break;
463 }
464 [[fallthrough]];
465 case AArch64::DestructiveBinary:
466 case AArch64::DestructiveBinaryImm:
467 std::tie(PredIdx, DOPIdx, SrcIdx) = std::make_tuple(1, 2, 3);
468 break;
469 case AArch64::DestructiveUnaryPassthru:
470 std::tie(PredIdx, DOPIdx, SrcIdx) = std::make_tuple(2, 3, 3);
471 break;
472 case AArch64::DestructiveTernaryCommWithRev:
473 std::tie(PredIdx, DOPIdx, SrcIdx, Src2Idx) = std::make_tuple(1, 2, 3, 4);
474 if (DstReg == MI.getOperand(3).getReg()) {
475 // FMLA Zd, Pg, Za, Zd, Zm ==> FMAD Zdn, Pg, Zm, Za
476 std::tie(PredIdx, DOPIdx, SrcIdx, Src2Idx) = std::make_tuple(1, 3, 4, 2);
477 UseRev = true;
478 } else if (DstReg == MI.getOperand(4).getReg()) {
479 // FMLA Zd, Pg, Za, Zm, Zd ==> FMAD Zdn, Pg, Zm, Za
480 std::tie(PredIdx, DOPIdx, SrcIdx, Src2Idx) = std::make_tuple(1, 4, 3, 2);
481 UseRev = true;
482 }
483 break;
484 default:
485 llvm_unreachable("Unsupported Destructive Operand type");
486 }
487
488 // MOVPRFX can only be used if the destination operand
489 // is the destructive operand, not as any other operand,
490 // so the Destructive Operand must be unique.
491 bool DOPRegIsUnique = false;
492 switch (DType) {
493 case AArch64::DestructiveBinary:
494 DOPRegIsUnique = DstReg != MI.getOperand(SrcIdx).getReg();
495 break;
496 case AArch64::DestructiveBinaryComm:
497 case AArch64::DestructiveBinaryCommWithRev:
498 DOPRegIsUnique =
499 DstReg != MI.getOperand(DOPIdx).getReg() ||
500 MI.getOperand(DOPIdx).getReg() != MI.getOperand(SrcIdx).getReg();
501 break;
502 case AArch64::DestructiveUnaryPassthru:
503 case AArch64::DestructiveBinaryImm:
504 DOPRegIsUnique = true;
505 break;
506 case AArch64::DestructiveTernaryCommWithRev:
507 DOPRegIsUnique =
508 DstReg != MI.getOperand(DOPIdx).getReg() ||
509 (MI.getOperand(DOPIdx).getReg() != MI.getOperand(SrcIdx).getReg() &&
510 MI.getOperand(DOPIdx).getReg() != MI.getOperand(Src2Idx).getReg());
511 break;
512 }
513
514 // Resolve the reverse opcode
515 if (UseRev) {
516 int NewOpcode;
517 // e.g. DIV -> DIVR
518 if ((NewOpcode = AArch64::getSVERevInstr(Opcode)) != -1)
519 Opcode = NewOpcode;
520 // e.g. DIVR -> DIV
521 else if ((NewOpcode = AArch64::getSVENonRevInstr(Opcode)) != -1)
522 Opcode = NewOpcode;
523 }
524
525 // Get the right MOVPRFX
526 uint64_t ElementSize = TII->getElementSizeForOpcode(Opcode);
527 unsigned MovPrfx, LSLZero, MovPrfxZero;
528 switch (ElementSize) {
529 case AArch64::ElementSizeNone:
530 case AArch64::ElementSizeB:
531 MovPrfx = AArch64::MOVPRFX_ZZ;
532 LSLZero = AArch64::LSL_ZPmI_B;
533 MovPrfxZero = AArch64::MOVPRFX_ZPzZ_B;
534 break;
535 case AArch64::ElementSizeH:
536 MovPrfx = AArch64::MOVPRFX_ZZ;
537 LSLZero = AArch64::LSL_ZPmI_H;
538 MovPrfxZero = AArch64::MOVPRFX_ZPzZ_H;
539 break;
540 case AArch64::ElementSizeS:
541 MovPrfx = AArch64::MOVPRFX_ZZ;
542 LSLZero = AArch64::LSL_ZPmI_S;
543 MovPrfxZero = AArch64::MOVPRFX_ZPzZ_S;
544 break;
545 case AArch64::ElementSizeD:
546 MovPrfx = AArch64::MOVPRFX_ZZ;
547 LSLZero = AArch64::LSL_ZPmI_D;
548 MovPrfxZero = AArch64::MOVPRFX_ZPzZ_D;
549 break;
550 default:
551 llvm_unreachable("Unsupported ElementSize");
552 }
553
554 //
555 // Create the destructive operation (if required)
556 //
557 MachineInstrBuilder PRFX, DOP;
558 if (FalseZero) {
559 // If we cannot prefix the requested instruction we'll instead emit a
560 // prefixed_zeroing_mov for DestructiveBinary.
561 assert((DOPRegIsUnique || DType == AArch64::DestructiveBinary ||
562 DType == AArch64::DestructiveBinaryComm) &&
563 "The destructive operand should be unique");
564 assert(ElementSize != AArch64::ElementSizeNone &&
565 "This instruction is unpredicated");
566
567 // Merge source operand into destination register
568 PRFX = BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(MovPrfxZero))
569 .addReg(DstReg, RegState::Define)
570 .addReg(MI.getOperand(PredIdx).getReg())
571 .addReg(MI.getOperand(DOPIdx).getReg());
572
573 // After the movprfx, the destructive operand is same as Dst
574 DOPIdx = 0;
575
576 // Create the additional LSL to zero the lanes when the DstReg is not
577 // unique. Zeros the lanes in z0 that aren't active in p0 with sequence
578 // movprfx z0.b, p0/z, z0.b; lsl z0.b, p0/m, z0.b, #0;
579 if ((DType == AArch64::DestructiveBinary ||
580 DType == AArch64::DestructiveBinaryComm) &&
581 !DOPRegIsUnique) {
582 BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(LSLZero))
583 .addReg(DstReg, RegState::Define)
584 .add(MI.getOperand(PredIdx))
585 .addReg(DstReg)
586 .addImm(0);
587 }
588 } else if (DstReg != MI.getOperand(DOPIdx).getReg()) {
589 assert(DOPRegIsUnique && "The destructive operand should be unique");
590 PRFX = BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(MovPrfx))
591 .addReg(DstReg, RegState::Define)
592 .addReg(MI.getOperand(DOPIdx).getReg());
593 DOPIdx = 0;
594 }
595
596 //
597 // Create the destructive operation
598 //
599 DOP = BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(Opcode))
600 .addReg(DstReg, RegState::Define | getDeadRegState(DstIsDead));
601
602 switch (DType) {
603 case AArch64::DestructiveUnaryPassthru:
604 DOP.addReg(MI.getOperand(DOPIdx).getReg(), RegState::Kill)
605 .add(MI.getOperand(PredIdx))
606 .add(MI.getOperand(SrcIdx));
607 break;
608 case AArch64::DestructiveBinary:
609 case AArch64::DestructiveBinaryImm:
610 case AArch64::DestructiveBinaryComm:
611 case AArch64::DestructiveBinaryCommWithRev:
612 DOP.add(MI.getOperand(PredIdx))
613 .addReg(MI.getOperand(DOPIdx).getReg(), RegState::Kill)
614 .add(MI.getOperand(SrcIdx));
615 break;
616 case AArch64::DestructiveTernaryCommWithRev:
617 DOP.add(MI.getOperand(PredIdx))
618 .addReg(MI.getOperand(DOPIdx).getReg(), RegState::Kill)
619 .add(MI.getOperand(SrcIdx))
620 .add(MI.getOperand(Src2Idx));
621 break;
622 }
623
624 if (PRFX) {
625 finalizeBundle(MBB, PRFX->getIterator(), MBBI->getIterator());
626 transferImpOps(MI, PRFX, DOP);
627 } else
628 transferImpOps(MI, DOP, DOP);
629
630 MI.eraseFromParent();
631 return true;
632 }
633
expandSetTagLoop(MachineBasicBlock & MBB,MachineBasicBlock::iterator MBBI,MachineBasicBlock::iterator & NextMBBI)634 bool AArch64ExpandPseudo::expandSetTagLoop(
635 MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI,
636 MachineBasicBlock::iterator &NextMBBI) {
637 MachineInstr &MI = *MBBI;
638 DebugLoc DL = MI.getDebugLoc();
639 Register SizeReg = MI.getOperand(0).getReg();
640 Register AddressReg = MI.getOperand(1).getReg();
641
642 MachineFunction *MF = MBB.getParent();
643
644 bool ZeroData = MI.getOpcode() == AArch64::STZGloop_wback;
645 const unsigned OpCode1 =
646 ZeroData ? AArch64::STZGPostIndex : AArch64::STGPostIndex;
647 const unsigned OpCode2 =
648 ZeroData ? AArch64::STZ2GPostIndex : AArch64::ST2GPostIndex;
649
650 unsigned Size = MI.getOperand(2).getImm();
651 assert(Size > 0 && Size % 16 == 0);
652 if (Size % (16 * 2) != 0) {
653 BuildMI(MBB, MBBI, DL, TII->get(OpCode1), AddressReg)
654 .addReg(AddressReg)
655 .addReg(AddressReg)
656 .addImm(1);
657 Size -= 16;
658 }
659 MachineBasicBlock::iterator I =
660 BuildMI(MBB, MBBI, DL, TII->get(AArch64::MOVi64imm), SizeReg)
661 .addImm(Size);
662 expandMOVImm(MBB, I, 64);
663
664 auto LoopBB = MF->CreateMachineBasicBlock(MBB.getBasicBlock());
665 auto DoneBB = MF->CreateMachineBasicBlock(MBB.getBasicBlock());
666
667 MF->insert(++MBB.getIterator(), LoopBB);
668 MF->insert(++LoopBB->getIterator(), DoneBB);
669
670 BuildMI(LoopBB, DL, TII->get(OpCode2))
671 .addDef(AddressReg)
672 .addReg(AddressReg)
673 .addReg(AddressReg)
674 .addImm(2)
675 .cloneMemRefs(MI)
676 .setMIFlags(MI.getFlags());
677 BuildMI(LoopBB, DL, TII->get(AArch64::SUBXri))
678 .addDef(SizeReg)
679 .addReg(SizeReg)
680 .addImm(16 * 2)
681 .addImm(0);
682 BuildMI(LoopBB, DL, TII->get(AArch64::CBNZX)).addUse(SizeReg).addMBB(LoopBB);
683
684 LoopBB->addSuccessor(LoopBB);
685 LoopBB->addSuccessor(DoneBB);
686
687 DoneBB->splice(DoneBB->end(), &MBB, MI, MBB.end());
688 DoneBB->transferSuccessors(&MBB);
689
690 MBB.addSuccessor(LoopBB);
691
692 NextMBBI = MBB.end();
693 MI.eraseFromParent();
694 // Recompute liveness bottom up.
695 LivePhysRegs LiveRegs;
696 computeAndAddLiveIns(LiveRegs, *DoneBB);
697 computeAndAddLiveIns(LiveRegs, *LoopBB);
698 // Do an extra pass in the loop to get the loop carried dependencies right.
699 // FIXME: is this necessary?
700 LoopBB->clearLiveIns();
701 computeAndAddLiveIns(LiveRegs, *LoopBB);
702 DoneBB->clearLiveIns();
703 computeAndAddLiveIns(LiveRegs, *DoneBB);
704
705 return true;
706 }
707
expandSVESpillFill(MachineBasicBlock & MBB,MachineBasicBlock::iterator MBBI,unsigned Opc,unsigned N)708 bool AArch64ExpandPseudo::expandSVESpillFill(MachineBasicBlock &MBB,
709 MachineBasicBlock::iterator MBBI,
710 unsigned Opc, unsigned N) {
711 const TargetRegisterInfo *TRI =
712 MBB.getParent()->getSubtarget().getRegisterInfo();
713 MachineInstr &MI = *MBBI;
714 for (unsigned Offset = 0; Offset < N; ++Offset) {
715 int ImmOffset = MI.getOperand(2).getImm() + Offset;
716 bool Kill = (Offset + 1 == N) ? MI.getOperand(1).isKill() : false;
717 assert(ImmOffset >= -256 && ImmOffset < 256 &&
718 "Immediate spill offset out of range");
719 BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(Opc))
720 .addReg(
721 TRI->getSubReg(MI.getOperand(0).getReg(), AArch64::zsub0 + Offset),
722 Opc == AArch64::LDR_ZXI ? RegState::Define : 0)
723 .addReg(MI.getOperand(1).getReg(), getKillRegState(Kill))
724 .addImm(ImmOffset);
725 }
726 MI.eraseFromParent();
727 return true;
728 }
729
expandCALL_RVMARKER(MachineBasicBlock & MBB,MachineBasicBlock::iterator MBBI)730 bool AArch64ExpandPseudo::expandCALL_RVMARKER(
731 MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI) {
732 // Expand CALL_RVMARKER pseudo to:
733 // - a branch to the call target, followed by
734 // - the special `mov x29, x29` marker, and
735 // - another branch, to the runtime function
736 // Mark the sequence as bundle, to avoid passes moving other code in between.
737 MachineInstr &MI = *MBBI;
738
739 MachineInstr *OriginalCall;
740 MachineOperand &RVTarget = MI.getOperand(0);
741 MachineOperand &CallTarget = MI.getOperand(1);
742 assert((CallTarget.isGlobal() || CallTarget.isReg()) &&
743 "invalid operand for regular call");
744 assert(RVTarget.isGlobal() && "invalid operand for attached call");
745 unsigned Opc = CallTarget.isGlobal() ? AArch64::BL : AArch64::BLR;
746 OriginalCall = BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(Opc)).getInstr();
747 OriginalCall->addOperand(CallTarget);
748
749 unsigned RegMaskStartIdx = 2;
750 // Skip register arguments. Those are added during ISel, but are not
751 // needed for the concrete branch.
752 while (!MI.getOperand(RegMaskStartIdx).isRegMask()) {
753 auto MOP = MI.getOperand(RegMaskStartIdx);
754 assert(MOP.isReg() && "can only add register operands");
755 OriginalCall->addOperand(MachineOperand::CreateReg(
756 MOP.getReg(), /*Def=*/false, /*Implicit=*/true));
757 RegMaskStartIdx++;
758 }
759 for (const MachineOperand &MO :
760 llvm::drop_begin(MI.operands(), RegMaskStartIdx))
761 OriginalCall->addOperand(MO);
762
763 BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(AArch64::ORRXrs))
764 .addReg(AArch64::FP, RegState::Define)
765 .addReg(AArch64::XZR)
766 .addReg(AArch64::FP)
767 .addImm(0);
768
769 auto *RVCall = BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(AArch64::BL))
770 .add(RVTarget)
771 .getInstr();
772
773 if (MI.shouldUpdateCallSiteInfo())
774 MBB.getParent()->moveCallSiteInfo(&MI, OriginalCall);
775
776 MI.eraseFromParent();
777 finalizeBundle(MBB, OriginalCall->getIterator(),
778 std::next(RVCall->getIterator()));
779 return true;
780 }
781
expandCALL_BTI(MachineBasicBlock & MBB,MachineBasicBlock::iterator MBBI)782 bool AArch64ExpandPseudo::expandCALL_BTI(MachineBasicBlock &MBB,
783 MachineBasicBlock::iterator MBBI) {
784 // Expand CALL_BTI pseudo to:
785 // - a branch to the call target
786 // - a BTI instruction
787 // Mark the sequence as a bundle, to avoid passes moving other code in
788 // between.
789
790 MachineInstr &MI = *MBBI;
791 MachineOperand &CallTarget = MI.getOperand(0);
792 assert((CallTarget.isGlobal() || CallTarget.isReg()) &&
793 "invalid operand for regular call");
794 unsigned Opc = CallTarget.isGlobal() ? AArch64::BL : AArch64::BLR;
795 MachineInstr *Call =
796 BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(Opc)).getInstr();
797 Call->addOperand(CallTarget);
798 Call->setCFIType(*MBB.getParent(), MI.getCFIType());
799
800 MachineInstr *BTI =
801 BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(AArch64::HINT))
802 // BTI J so that setjmp can to BR to this.
803 .addImm(36)
804 .getInstr();
805
806 if (MI.shouldUpdateCallSiteInfo())
807 MBB.getParent()->moveCallSiteInfo(&MI, Call);
808
809 MI.eraseFromParent();
810 finalizeBundle(MBB, Call->getIterator(), std::next(BTI->getIterator()));
811 return true;
812 }
813
expandStoreSwiftAsyncContext(MachineBasicBlock & MBB,MachineBasicBlock::iterator MBBI)814 bool AArch64ExpandPseudo::expandStoreSwiftAsyncContext(
815 MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI) {
816 Register CtxReg = MBBI->getOperand(0).getReg();
817 Register BaseReg = MBBI->getOperand(1).getReg();
818 int Offset = MBBI->getOperand(2).getImm();
819 DebugLoc DL(MBBI->getDebugLoc());
820 auto &STI = MBB.getParent()->getSubtarget<AArch64Subtarget>();
821
822 if (STI.getTargetTriple().getArchName() != "arm64e") {
823 BuildMI(MBB, MBBI, DL, TII->get(AArch64::STRXui))
824 .addUse(CtxReg)
825 .addUse(BaseReg)
826 .addImm(Offset / 8)
827 .setMIFlag(MachineInstr::FrameSetup);
828 MBBI->eraseFromParent();
829 return true;
830 }
831
832 // We need to sign the context in an address-discriminated way. 0xc31a is a
833 // fixed random value, chosen as part of the ABI.
834 // add x16, xBase, #Offset
835 // movk x16, #0xc31a, lsl #48
836 // mov x17, x22/xzr
837 // pacdb x17, x16
838 // str x17, [xBase, #Offset]
839 unsigned Opc = Offset >= 0 ? AArch64::ADDXri : AArch64::SUBXri;
840 BuildMI(MBB, MBBI, DL, TII->get(Opc), AArch64::X16)
841 .addUse(BaseReg)
842 .addImm(abs(Offset))
843 .addImm(0)
844 .setMIFlag(MachineInstr::FrameSetup);
845 BuildMI(MBB, MBBI, DL, TII->get(AArch64::MOVKXi), AArch64::X16)
846 .addUse(AArch64::X16)
847 .addImm(0xc31a)
848 .addImm(48)
849 .setMIFlag(MachineInstr::FrameSetup);
850 // We're not allowed to clobber X22 (and couldn't clobber XZR if we tried), so
851 // move it somewhere before signing.
852 BuildMI(MBB, MBBI, DL, TII->get(AArch64::ORRXrs), AArch64::X17)
853 .addUse(AArch64::XZR)
854 .addUse(CtxReg)
855 .addImm(0)
856 .setMIFlag(MachineInstr::FrameSetup);
857 BuildMI(MBB, MBBI, DL, TII->get(AArch64::PACDB), AArch64::X17)
858 .addUse(AArch64::X17)
859 .addUse(AArch64::X16)
860 .setMIFlag(MachineInstr::FrameSetup);
861 BuildMI(MBB, MBBI, DL, TII->get(AArch64::STRXui))
862 .addUse(AArch64::X17)
863 .addUse(BaseReg)
864 .addImm(Offset / 8)
865 .setMIFlag(MachineInstr::FrameSetup);
866
867 MBBI->eraseFromParent();
868 return true;
869 }
870
871 MachineBasicBlock *
expandRestoreZA(MachineBasicBlock & MBB,MachineBasicBlock::iterator MBBI)872 AArch64ExpandPseudo::expandRestoreZA(MachineBasicBlock &MBB,
873 MachineBasicBlock::iterator MBBI) {
874 MachineInstr &MI = *MBBI;
875 assert((std::next(MBBI) != MBB.end() ||
876 MI.getParent()->successors().begin() !=
877 MI.getParent()->successors().end()) &&
878 "Unexpected unreachable in block that restores ZA");
879
880 // Compare TPIDR2_EL0 value against 0.
881 DebugLoc DL = MI.getDebugLoc();
882 MachineInstrBuilder Cbz = BuildMI(MBB, MBBI, DL, TII->get(AArch64::CBZX))
883 .add(MI.getOperand(0));
884
885 // Split MBB and create two new blocks:
886 // - MBB now contains all instructions before RestoreZAPseudo.
887 // - SMBB contains the RestoreZAPseudo instruction only.
888 // - EndBB contains all instructions after RestoreZAPseudo.
889 MachineInstr &PrevMI = *std::prev(MBBI);
890 MachineBasicBlock *SMBB = MBB.splitAt(PrevMI, /*UpdateLiveIns*/ true);
891 MachineBasicBlock *EndBB = std::next(MI.getIterator()) == SMBB->end()
892 ? *SMBB->successors().begin()
893 : SMBB->splitAt(MI, /*UpdateLiveIns*/ true);
894
895 // Add the SMBB label to the TB[N]Z instruction & create a branch to EndBB.
896 Cbz.addMBB(SMBB);
897 BuildMI(&MBB, DL, TII->get(AArch64::B))
898 .addMBB(EndBB);
899 MBB.addSuccessor(EndBB);
900
901 // Replace the pseudo with a call (BL).
902 MachineInstrBuilder MIB =
903 BuildMI(*SMBB, SMBB->end(), DL, TII->get(AArch64::BL));
904 MIB.addReg(MI.getOperand(1).getReg(), RegState::Implicit);
905 for (unsigned I = 2; I < MI.getNumOperands(); ++I)
906 MIB.add(MI.getOperand(I));
907 BuildMI(SMBB, DL, TII->get(AArch64::B)).addMBB(EndBB);
908
909 MI.eraseFromParent();
910 return EndBB;
911 }
912
913 MachineBasicBlock *
expandCondSMToggle(MachineBasicBlock & MBB,MachineBasicBlock::iterator MBBI)914 AArch64ExpandPseudo::expandCondSMToggle(MachineBasicBlock &MBB,
915 MachineBasicBlock::iterator MBBI) {
916 MachineInstr &MI = *MBBI;
917 // In the case of a smstart/smstop before a unreachable, just remove the pseudo.
918 // Exception handling code generated by Clang may introduce unreachables and it
919 // seems unnecessary to restore pstate.sm when that happens. Note that it is
920 // not just an optimisation, the code below expects a successor instruction/block
921 // in order to split the block at MBBI.
922 if (std::next(MBBI) == MBB.end() &&
923 MI.getParent()->successors().begin() ==
924 MI.getParent()->successors().end()) {
925 MI.eraseFromParent();
926 return &MBB;
927 }
928
929 // Expand the pseudo into smstart or smstop instruction. The pseudo has the
930 // following operands:
931 //
932 // MSRpstatePseudo <za|sm|both>, <0|1>, pstate.sm, expectedval, <regmask>
933 //
934 // The pseudo is expanded into a conditional smstart/smstop, with a
935 // check if pstate.sm (register) equals the expected value, and if not,
936 // invokes the smstart/smstop.
937 //
938 // As an example, the following block contains a normal call from a
939 // streaming-compatible function:
940 //
941 // OrigBB:
942 // MSRpstatePseudo 3, 0, %0, 0, <regmask> <- Conditional SMSTOP
943 // bl @normal_callee
944 // MSRpstatePseudo 3, 1, %0, 0, <regmask> <- Conditional SMSTART
945 //
946 // ...which will be transformed into:
947 //
948 // OrigBB:
949 // TBNZx %0:gpr64, 0, SMBB
950 // b EndBB
951 //
952 // SMBB:
953 // MSRpstatesvcrImm1 3, 0, <regmask> <- SMSTOP
954 //
955 // EndBB:
956 // bl @normal_callee
957 // MSRcond_pstatesvcrImm1 3, 1, <regmask> <- SMSTART
958 //
959 DebugLoc DL = MI.getDebugLoc();
960
961 // Create the conditional branch based on the third operand of the
962 // instruction, which tells us if we are wrapping a normal or streaming
963 // function.
964 // We test the live value of pstate.sm and toggle pstate.sm if this is not the
965 // expected value for the callee (0 for a normal callee and 1 for a streaming
966 // callee).
967 auto PStateSM = MI.getOperand(2).getReg();
968 bool IsStreamingCallee = MI.getOperand(3).getImm();
969 unsigned Opc = IsStreamingCallee ? AArch64::TBZX : AArch64::TBNZX;
970 MachineInstrBuilder Tbx =
971 BuildMI(MBB, MBBI, DL, TII->get(Opc)).addReg(PStateSM).addImm(0);
972
973 // Split MBB and create two new blocks:
974 // - MBB now contains all instructions before MSRcond_pstatesvcrImm1.
975 // - SMBB contains the MSRcond_pstatesvcrImm1 instruction only.
976 // - EndBB contains all instructions after MSRcond_pstatesvcrImm1.
977 MachineInstr &PrevMI = *std::prev(MBBI);
978 MachineBasicBlock *SMBB = MBB.splitAt(PrevMI, /*UpdateLiveIns*/ true);
979 MachineBasicBlock *EndBB = std::next(MI.getIterator()) == SMBB->end()
980 ? *SMBB->successors().begin()
981 : SMBB->splitAt(MI, /*UpdateLiveIns*/ true);
982
983 // Add the SMBB label to the TB[N]Z instruction & create a branch to EndBB.
984 Tbx.addMBB(SMBB);
985 BuildMI(&MBB, DL, TII->get(AArch64::B))
986 .addMBB(EndBB);
987 MBB.addSuccessor(EndBB);
988
989 // Create the SMSTART/SMSTOP (MSRpstatesvcrImm1) instruction in SMBB.
990 MachineInstrBuilder MIB = BuildMI(*SMBB, SMBB->begin(), MI.getDebugLoc(),
991 TII->get(AArch64::MSRpstatesvcrImm1));
992 // Copy all but the second and third operands of MSRcond_pstatesvcrImm1 (as
993 // these contain the CopyFromReg for the first argument and the flag to
994 // indicate whether the callee is streaming or normal).
995 MIB.add(MI.getOperand(0));
996 MIB.add(MI.getOperand(1));
997 for (unsigned i = 4; i < MI.getNumOperands(); ++i)
998 MIB.add(MI.getOperand(i));
999
1000 BuildMI(SMBB, DL, TII->get(AArch64::B)).addMBB(EndBB);
1001
1002 MI.eraseFromParent();
1003 return EndBB;
1004 }
1005
1006 /// If MBBI references a pseudo instruction that should be expanded here,
1007 /// do the expansion and return true. Otherwise return false.
expandMI(MachineBasicBlock & MBB,MachineBasicBlock::iterator MBBI,MachineBasicBlock::iterator & NextMBBI)1008 bool AArch64ExpandPseudo::expandMI(MachineBasicBlock &MBB,
1009 MachineBasicBlock::iterator MBBI,
1010 MachineBasicBlock::iterator &NextMBBI) {
1011 MachineInstr &MI = *MBBI;
1012 unsigned Opcode = MI.getOpcode();
1013
1014 // Check if we can expand the destructive op
1015 int OrigInstr = AArch64::getSVEPseudoMap(MI.getOpcode());
1016 if (OrigInstr != -1) {
1017 auto &Orig = TII->get(OrigInstr);
1018 if ((Orig.TSFlags & AArch64::DestructiveInstTypeMask)
1019 != AArch64::NotDestructive) {
1020 return expand_DestructiveOp(MI, MBB, MBBI);
1021 }
1022 }
1023
1024 switch (Opcode) {
1025 default:
1026 break;
1027
1028 case AArch64::BSPv8i8:
1029 case AArch64::BSPv16i8: {
1030 Register DstReg = MI.getOperand(0).getReg();
1031 if (DstReg == MI.getOperand(3).getReg()) {
1032 // Expand to BIT
1033 BuildMI(MBB, MBBI, MI.getDebugLoc(),
1034 TII->get(Opcode == AArch64::BSPv8i8 ? AArch64::BITv8i8
1035 : AArch64::BITv16i8))
1036 .add(MI.getOperand(0))
1037 .add(MI.getOperand(3))
1038 .add(MI.getOperand(2))
1039 .add(MI.getOperand(1));
1040 } else if (DstReg == MI.getOperand(2).getReg()) {
1041 // Expand to BIF
1042 BuildMI(MBB, MBBI, MI.getDebugLoc(),
1043 TII->get(Opcode == AArch64::BSPv8i8 ? AArch64::BIFv8i8
1044 : AArch64::BIFv16i8))
1045 .add(MI.getOperand(0))
1046 .add(MI.getOperand(2))
1047 .add(MI.getOperand(3))
1048 .add(MI.getOperand(1));
1049 } else {
1050 // Expand to BSL, use additional move if required
1051 if (DstReg == MI.getOperand(1).getReg()) {
1052 BuildMI(MBB, MBBI, MI.getDebugLoc(),
1053 TII->get(Opcode == AArch64::BSPv8i8 ? AArch64::BSLv8i8
1054 : AArch64::BSLv16i8))
1055 .add(MI.getOperand(0))
1056 .add(MI.getOperand(1))
1057 .add(MI.getOperand(2))
1058 .add(MI.getOperand(3));
1059 } else {
1060 BuildMI(MBB, MBBI, MI.getDebugLoc(),
1061 TII->get(Opcode == AArch64::BSPv8i8 ? AArch64::ORRv8i8
1062 : AArch64::ORRv16i8))
1063 .addReg(DstReg,
1064 RegState::Define |
1065 getRenamableRegState(MI.getOperand(0).isRenamable()))
1066 .add(MI.getOperand(1))
1067 .add(MI.getOperand(1));
1068 BuildMI(MBB, MBBI, MI.getDebugLoc(),
1069 TII->get(Opcode == AArch64::BSPv8i8 ? AArch64::BSLv8i8
1070 : AArch64::BSLv16i8))
1071 .add(MI.getOperand(0))
1072 .addReg(DstReg,
1073 RegState::Kill |
1074 getRenamableRegState(MI.getOperand(0).isRenamable()))
1075 .add(MI.getOperand(2))
1076 .add(MI.getOperand(3));
1077 }
1078 }
1079 MI.eraseFromParent();
1080 return true;
1081 }
1082
1083 case AArch64::ADDWrr:
1084 case AArch64::SUBWrr:
1085 case AArch64::ADDXrr:
1086 case AArch64::SUBXrr:
1087 case AArch64::ADDSWrr:
1088 case AArch64::SUBSWrr:
1089 case AArch64::ADDSXrr:
1090 case AArch64::SUBSXrr:
1091 case AArch64::ANDWrr:
1092 case AArch64::ANDXrr:
1093 case AArch64::BICWrr:
1094 case AArch64::BICXrr:
1095 case AArch64::ANDSWrr:
1096 case AArch64::ANDSXrr:
1097 case AArch64::BICSWrr:
1098 case AArch64::BICSXrr:
1099 case AArch64::EONWrr:
1100 case AArch64::EONXrr:
1101 case AArch64::EORWrr:
1102 case AArch64::EORXrr:
1103 case AArch64::ORNWrr:
1104 case AArch64::ORNXrr:
1105 case AArch64::ORRWrr:
1106 case AArch64::ORRXrr: {
1107 unsigned Opcode;
1108 switch (MI.getOpcode()) {
1109 default:
1110 return false;
1111 case AArch64::ADDWrr: Opcode = AArch64::ADDWrs; break;
1112 case AArch64::SUBWrr: Opcode = AArch64::SUBWrs; break;
1113 case AArch64::ADDXrr: Opcode = AArch64::ADDXrs; break;
1114 case AArch64::SUBXrr: Opcode = AArch64::SUBXrs; break;
1115 case AArch64::ADDSWrr: Opcode = AArch64::ADDSWrs; break;
1116 case AArch64::SUBSWrr: Opcode = AArch64::SUBSWrs; break;
1117 case AArch64::ADDSXrr: Opcode = AArch64::ADDSXrs; break;
1118 case AArch64::SUBSXrr: Opcode = AArch64::SUBSXrs; break;
1119 case AArch64::ANDWrr: Opcode = AArch64::ANDWrs; break;
1120 case AArch64::ANDXrr: Opcode = AArch64::ANDXrs; break;
1121 case AArch64::BICWrr: Opcode = AArch64::BICWrs; break;
1122 case AArch64::BICXrr: Opcode = AArch64::BICXrs; break;
1123 case AArch64::ANDSWrr: Opcode = AArch64::ANDSWrs; break;
1124 case AArch64::ANDSXrr: Opcode = AArch64::ANDSXrs; break;
1125 case AArch64::BICSWrr: Opcode = AArch64::BICSWrs; break;
1126 case AArch64::BICSXrr: Opcode = AArch64::BICSXrs; break;
1127 case AArch64::EONWrr: Opcode = AArch64::EONWrs; break;
1128 case AArch64::EONXrr: Opcode = AArch64::EONXrs; break;
1129 case AArch64::EORWrr: Opcode = AArch64::EORWrs; break;
1130 case AArch64::EORXrr: Opcode = AArch64::EORXrs; break;
1131 case AArch64::ORNWrr: Opcode = AArch64::ORNWrs; break;
1132 case AArch64::ORNXrr: Opcode = AArch64::ORNXrs; break;
1133 case AArch64::ORRWrr: Opcode = AArch64::ORRWrs; break;
1134 case AArch64::ORRXrr: Opcode = AArch64::ORRXrs; break;
1135 }
1136 MachineFunction &MF = *MBB.getParent();
1137 // Try to create new inst without implicit operands added.
1138 MachineInstr *NewMI = MF.CreateMachineInstr(
1139 TII->get(Opcode), MI.getDebugLoc(), /*NoImplicit=*/true);
1140 MBB.insert(MBBI, NewMI);
1141 MachineInstrBuilder MIB1(MF, NewMI);
1142 MIB1->setPCSections(MF, MI.getPCSections());
1143 MIB1.addReg(MI.getOperand(0).getReg(), RegState::Define)
1144 .add(MI.getOperand(1))
1145 .add(MI.getOperand(2))
1146 .addImm(AArch64_AM::getShifterImm(AArch64_AM::LSL, 0));
1147 transferImpOps(MI, MIB1, MIB1);
1148 MI.eraseFromParent();
1149 return true;
1150 }
1151
1152 case AArch64::LOADgot: {
1153 MachineFunction *MF = MBB.getParent();
1154 Register DstReg = MI.getOperand(0).getReg();
1155 const MachineOperand &MO1 = MI.getOperand(1);
1156 unsigned Flags = MO1.getTargetFlags();
1157
1158 if (MF->getTarget().getCodeModel() == CodeModel::Tiny) {
1159 // Tiny codemodel expand to LDR
1160 MachineInstrBuilder MIB = BuildMI(MBB, MBBI, MI.getDebugLoc(),
1161 TII->get(AArch64::LDRXl), DstReg);
1162
1163 if (MO1.isGlobal()) {
1164 MIB.addGlobalAddress(MO1.getGlobal(), 0, Flags);
1165 } else if (MO1.isSymbol()) {
1166 MIB.addExternalSymbol(MO1.getSymbolName(), Flags);
1167 } else {
1168 assert(MO1.isCPI() &&
1169 "Only expect globals, externalsymbols, or constant pools");
1170 MIB.addConstantPoolIndex(MO1.getIndex(), MO1.getOffset(), Flags);
1171 }
1172 } else {
1173 // Small codemodel expand into ADRP + LDR.
1174 MachineFunction &MF = *MI.getParent()->getParent();
1175 DebugLoc DL = MI.getDebugLoc();
1176 MachineInstrBuilder MIB1 =
1177 BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(AArch64::ADRP), DstReg);
1178
1179 MachineInstrBuilder MIB2;
1180 if (MF.getSubtarget<AArch64Subtarget>().isTargetILP32()) {
1181 auto TRI = MBB.getParent()->getSubtarget().getRegisterInfo();
1182 unsigned Reg32 = TRI->getSubReg(DstReg, AArch64::sub_32);
1183 unsigned DstFlags = MI.getOperand(0).getTargetFlags();
1184 MIB2 = BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(AArch64::LDRWui))
1185 .addDef(Reg32)
1186 .addReg(DstReg, RegState::Kill)
1187 .addReg(DstReg, DstFlags | RegState::Implicit);
1188 } else {
1189 Register DstReg = MI.getOperand(0).getReg();
1190 MIB2 = BuildMI(MBB, MBBI, DL, TII->get(AArch64::LDRXui))
1191 .add(MI.getOperand(0))
1192 .addUse(DstReg, RegState::Kill);
1193 }
1194
1195 if (MO1.isGlobal()) {
1196 MIB1.addGlobalAddress(MO1.getGlobal(), 0, Flags | AArch64II::MO_PAGE);
1197 MIB2.addGlobalAddress(MO1.getGlobal(), 0,
1198 Flags | AArch64II::MO_PAGEOFF | AArch64II::MO_NC);
1199 } else if (MO1.isSymbol()) {
1200 MIB1.addExternalSymbol(MO1.getSymbolName(), Flags | AArch64II::MO_PAGE);
1201 MIB2.addExternalSymbol(MO1.getSymbolName(), Flags |
1202 AArch64II::MO_PAGEOFF |
1203 AArch64II::MO_NC);
1204 } else {
1205 assert(MO1.isCPI() &&
1206 "Only expect globals, externalsymbols, or constant pools");
1207 MIB1.addConstantPoolIndex(MO1.getIndex(), MO1.getOffset(),
1208 Flags | AArch64II::MO_PAGE);
1209 MIB2.addConstantPoolIndex(MO1.getIndex(), MO1.getOffset(),
1210 Flags | AArch64II::MO_PAGEOFF |
1211 AArch64II::MO_NC);
1212 }
1213
1214 transferImpOps(MI, MIB1, MIB2);
1215 }
1216 MI.eraseFromParent();
1217 return true;
1218 }
1219 case AArch64::MOVaddrBA: {
1220 MachineFunction &MF = *MI.getParent()->getParent();
1221 if (MF.getSubtarget<AArch64Subtarget>().isTargetMachO()) {
1222 // blockaddress expressions have to come from a constant pool because the
1223 // largest addend (and hence offset within a function) allowed for ADRP is
1224 // only 8MB.
1225 const BlockAddress *BA = MI.getOperand(1).getBlockAddress();
1226 assert(MI.getOperand(1).getOffset() == 0 && "unexpected offset");
1227
1228 MachineConstantPool *MCP = MF.getConstantPool();
1229 unsigned CPIdx = MCP->getConstantPoolIndex(BA, Align(8));
1230
1231 Register DstReg = MI.getOperand(0).getReg();
1232 auto MIB1 =
1233 BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(AArch64::ADRP), DstReg)
1234 .addConstantPoolIndex(CPIdx, 0, AArch64II::MO_PAGE);
1235 auto MIB2 = BuildMI(MBB, MBBI, MI.getDebugLoc(),
1236 TII->get(AArch64::LDRXui), DstReg)
1237 .addUse(DstReg)
1238 .addConstantPoolIndex(
1239 CPIdx, 0, AArch64II::MO_PAGEOFF | AArch64II::MO_NC);
1240 transferImpOps(MI, MIB1, MIB2);
1241 MI.eraseFromParent();
1242 return true;
1243 }
1244 }
1245 [[fallthrough]];
1246 case AArch64::MOVaddr:
1247 case AArch64::MOVaddrJT:
1248 case AArch64::MOVaddrCP:
1249 case AArch64::MOVaddrTLS:
1250 case AArch64::MOVaddrEXT: {
1251 // Expand into ADRP + ADD.
1252 Register DstReg = MI.getOperand(0).getReg();
1253 assert(DstReg != AArch64::XZR);
1254 MachineInstrBuilder MIB1 =
1255 BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(AArch64::ADRP), DstReg)
1256 .add(MI.getOperand(1));
1257
1258 if (MI.getOperand(1).getTargetFlags() & AArch64II::MO_TAGGED) {
1259 // MO_TAGGED on the page indicates a tagged address. Set the tag now.
1260 // We do so by creating a MOVK that sets bits 48-63 of the register to
1261 // (global address + 0x100000000 - PC) >> 48. This assumes that we're in
1262 // the small code model so we can assume a binary size of <= 4GB, which
1263 // makes the untagged PC relative offset positive. The binary must also be
1264 // loaded into address range [0, 2^48). Both of these properties need to
1265 // be ensured at runtime when using tagged addresses.
1266 auto Tag = MI.getOperand(1);
1267 Tag.setTargetFlags(AArch64II::MO_PREL | AArch64II::MO_G3);
1268 Tag.setOffset(0x100000000);
1269 BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(AArch64::MOVKXi), DstReg)
1270 .addReg(DstReg)
1271 .add(Tag)
1272 .addImm(48);
1273 }
1274
1275 MachineInstrBuilder MIB2 =
1276 BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(AArch64::ADDXri))
1277 .add(MI.getOperand(0))
1278 .addReg(DstReg)
1279 .add(MI.getOperand(2))
1280 .addImm(0);
1281
1282 transferImpOps(MI, MIB1, MIB2);
1283 MI.eraseFromParent();
1284 return true;
1285 }
1286 case AArch64::ADDlowTLS:
1287 // Produce a plain ADD
1288 BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(AArch64::ADDXri))
1289 .add(MI.getOperand(0))
1290 .add(MI.getOperand(1))
1291 .add(MI.getOperand(2))
1292 .addImm(0);
1293 MI.eraseFromParent();
1294 return true;
1295
1296 case AArch64::MOVbaseTLS: {
1297 Register DstReg = MI.getOperand(0).getReg();
1298 auto SysReg = AArch64SysReg::TPIDR_EL0;
1299 MachineFunction *MF = MBB.getParent();
1300 if (MF->getSubtarget<AArch64Subtarget>().useEL3ForTP())
1301 SysReg = AArch64SysReg::TPIDR_EL3;
1302 else if (MF->getSubtarget<AArch64Subtarget>().useEL2ForTP())
1303 SysReg = AArch64SysReg::TPIDR_EL2;
1304 else if (MF->getSubtarget<AArch64Subtarget>().useEL1ForTP())
1305 SysReg = AArch64SysReg::TPIDR_EL1;
1306 BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(AArch64::MRS), DstReg)
1307 .addImm(SysReg);
1308 MI.eraseFromParent();
1309 return true;
1310 }
1311
1312 case AArch64::MOVi32imm:
1313 return expandMOVImm(MBB, MBBI, 32);
1314 case AArch64::MOVi64imm:
1315 return expandMOVImm(MBB, MBBI, 64);
1316 case AArch64::RET_ReallyLR: {
1317 // Hiding the LR use with RET_ReallyLR may lead to extra kills in the
1318 // function and missing live-ins. We are fine in practice because callee
1319 // saved register handling ensures the register value is restored before
1320 // RET, but we need the undef flag here to appease the MachineVerifier
1321 // liveness checks.
1322 MachineInstrBuilder MIB =
1323 BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(AArch64::RET))
1324 .addReg(AArch64::LR, RegState::Undef);
1325 transferImpOps(MI, MIB, MIB);
1326 MI.eraseFromParent();
1327 return true;
1328 }
1329 case AArch64::CMP_SWAP_8:
1330 return expandCMP_SWAP(MBB, MBBI, AArch64::LDAXRB, AArch64::STLXRB,
1331 AArch64::SUBSWrx,
1332 AArch64_AM::getArithExtendImm(AArch64_AM::UXTB, 0),
1333 AArch64::WZR, NextMBBI);
1334 case AArch64::CMP_SWAP_16:
1335 return expandCMP_SWAP(MBB, MBBI, AArch64::LDAXRH, AArch64::STLXRH,
1336 AArch64::SUBSWrx,
1337 AArch64_AM::getArithExtendImm(AArch64_AM::UXTH, 0),
1338 AArch64::WZR, NextMBBI);
1339 case AArch64::CMP_SWAP_32:
1340 return expandCMP_SWAP(MBB, MBBI, AArch64::LDAXRW, AArch64::STLXRW,
1341 AArch64::SUBSWrs,
1342 AArch64_AM::getShifterImm(AArch64_AM::LSL, 0),
1343 AArch64::WZR, NextMBBI);
1344 case AArch64::CMP_SWAP_64:
1345 return expandCMP_SWAP(MBB, MBBI,
1346 AArch64::LDAXRX, AArch64::STLXRX, AArch64::SUBSXrs,
1347 AArch64_AM::getShifterImm(AArch64_AM::LSL, 0),
1348 AArch64::XZR, NextMBBI);
1349 case AArch64::CMP_SWAP_128:
1350 case AArch64::CMP_SWAP_128_RELEASE:
1351 case AArch64::CMP_SWAP_128_ACQUIRE:
1352 case AArch64::CMP_SWAP_128_MONOTONIC:
1353 return expandCMP_SWAP_128(MBB, MBBI, NextMBBI);
1354
1355 case AArch64::AESMCrrTied:
1356 case AArch64::AESIMCrrTied: {
1357 MachineInstrBuilder MIB =
1358 BuildMI(MBB, MBBI, MI.getDebugLoc(),
1359 TII->get(Opcode == AArch64::AESMCrrTied ? AArch64::AESMCrr :
1360 AArch64::AESIMCrr))
1361 .add(MI.getOperand(0))
1362 .add(MI.getOperand(1));
1363 transferImpOps(MI, MIB, MIB);
1364 MI.eraseFromParent();
1365 return true;
1366 }
1367 case AArch64::IRGstack: {
1368 MachineFunction &MF = *MBB.getParent();
1369 const AArch64FunctionInfo *AFI = MF.getInfo<AArch64FunctionInfo>();
1370 const AArch64FrameLowering *TFI =
1371 MF.getSubtarget<AArch64Subtarget>().getFrameLowering();
1372
1373 // IRG does not allow immediate offset. getTaggedBasePointerOffset should
1374 // almost always point to SP-after-prologue; if not, emit a longer
1375 // instruction sequence.
1376 int BaseOffset = -AFI->getTaggedBasePointerOffset();
1377 Register FrameReg;
1378 StackOffset FrameRegOffset = TFI->resolveFrameOffsetReference(
1379 MF, BaseOffset, false /*isFixed*/, false /*isSVE*/, FrameReg,
1380 /*PreferFP=*/false,
1381 /*ForSimm=*/true);
1382 Register SrcReg = FrameReg;
1383 if (FrameRegOffset) {
1384 // Use output register as temporary.
1385 SrcReg = MI.getOperand(0).getReg();
1386 emitFrameOffset(MBB, &MI, MI.getDebugLoc(), SrcReg, FrameReg,
1387 FrameRegOffset, TII);
1388 }
1389 BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(AArch64::IRG))
1390 .add(MI.getOperand(0))
1391 .addUse(SrcReg)
1392 .add(MI.getOperand(2));
1393 MI.eraseFromParent();
1394 return true;
1395 }
1396 case AArch64::TAGPstack: {
1397 int64_t Offset = MI.getOperand(2).getImm();
1398 BuildMI(MBB, MBBI, MI.getDebugLoc(),
1399 TII->get(Offset >= 0 ? AArch64::ADDG : AArch64::SUBG))
1400 .add(MI.getOperand(0))
1401 .add(MI.getOperand(1))
1402 .addImm(std::abs(Offset))
1403 .add(MI.getOperand(4));
1404 MI.eraseFromParent();
1405 return true;
1406 }
1407 case AArch64::STGloop_wback:
1408 case AArch64::STZGloop_wback:
1409 return expandSetTagLoop(MBB, MBBI, NextMBBI);
1410 case AArch64::STGloop:
1411 case AArch64::STZGloop:
1412 report_fatal_error(
1413 "Non-writeback variants of STGloop / STZGloop should not "
1414 "survive past PrologEpilogInserter.");
1415 case AArch64::STR_ZZZZXI:
1416 return expandSVESpillFill(MBB, MBBI, AArch64::STR_ZXI, 4);
1417 case AArch64::STR_ZZZXI:
1418 return expandSVESpillFill(MBB, MBBI, AArch64::STR_ZXI, 3);
1419 case AArch64::STR_ZZXI:
1420 return expandSVESpillFill(MBB, MBBI, AArch64::STR_ZXI, 2);
1421 case AArch64::LDR_ZZZZXI:
1422 return expandSVESpillFill(MBB, MBBI, AArch64::LDR_ZXI, 4);
1423 case AArch64::LDR_ZZZXI:
1424 return expandSVESpillFill(MBB, MBBI, AArch64::LDR_ZXI, 3);
1425 case AArch64::LDR_ZZXI:
1426 return expandSVESpillFill(MBB, MBBI, AArch64::LDR_ZXI, 2);
1427 case AArch64::BLR_RVMARKER:
1428 return expandCALL_RVMARKER(MBB, MBBI);
1429 case AArch64::BLR_BTI:
1430 return expandCALL_BTI(MBB, MBBI);
1431 case AArch64::StoreSwiftAsyncContext:
1432 return expandStoreSwiftAsyncContext(MBB, MBBI);
1433 case AArch64::RestoreZAPseudo: {
1434 auto *NewMBB = expandRestoreZA(MBB, MBBI);
1435 if (NewMBB != &MBB)
1436 NextMBBI = MBB.end(); // The NextMBBI iterator is invalidated.
1437 return true;
1438 }
1439 case AArch64::MSRpstatePseudo: {
1440 auto *NewMBB = expandCondSMToggle(MBB, MBBI);
1441 if (NewMBB != &MBB)
1442 NextMBBI = MBB.end(); // The NextMBBI iterator is invalidated.
1443 return true;
1444 }
1445 case AArch64::OBSCURE_COPY: {
1446 if (MI.getOperand(0).getReg() != MI.getOperand(1).getReg()) {
1447 BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(AArch64::ORRXrs))
1448 .add(MI.getOperand(0))
1449 .addReg(AArch64::XZR)
1450 .add(MI.getOperand(1))
1451 .addImm(0);
1452 }
1453 MI.eraseFromParent();
1454 return true;
1455 }
1456 }
1457 return false;
1458 }
1459
1460 /// Iterate over the instructions in basic block MBB and expand any
1461 /// pseudo instructions. Return true if anything was modified.
expandMBB(MachineBasicBlock & MBB)1462 bool AArch64ExpandPseudo::expandMBB(MachineBasicBlock &MBB) {
1463 bool Modified = false;
1464
1465 MachineBasicBlock::iterator MBBI = MBB.begin(), E = MBB.end();
1466 while (MBBI != E) {
1467 MachineBasicBlock::iterator NMBBI = std::next(MBBI);
1468 Modified |= expandMI(MBB, MBBI, NMBBI);
1469 MBBI = NMBBI;
1470 }
1471
1472 return Modified;
1473 }
1474
runOnMachineFunction(MachineFunction & MF)1475 bool AArch64ExpandPseudo::runOnMachineFunction(MachineFunction &MF) {
1476 TII = static_cast<const AArch64InstrInfo *>(MF.getSubtarget().getInstrInfo());
1477
1478 bool Modified = false;
1479 for (auto &MBB : MF)
1480 Modified |= expandMBB(MBB);
1481 return Modified;
1482 }
1483
1484 /// Returns an instance of the pseudo instruction expansion pass.
createAArch64ExpandPseudoPass()1485 FunctionPass *llvm::createAArch64ExpandPseudoPass() {
1486 return new AArch64ExpandPseudo();
1487 }
1488