xref: /llvm-project/llvm/lib/Target/AArch64/AArch64LowerHomogeneousPrologEpilog.cpp (revision 60db321081be2324bec7e18eb76421cc566625fd)
1 //===- AArch64LowerHomogeneousPrologEpilog.cpp ----------------------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This file contains a pass that lowers homogeneous prolog/epilog instructions.
10 //
11 //===----------------------------------------------------------------------===//
12 
13 #include "AArch64InstrInfo.h"
14 #include "AArch64Subtarget.h"
15 #include "MCTargetDesc/AArch64InstPrinter.h"
16 #include "llvm/CodeGen/MachineBasicBlock.h"
17 #include "llvm/CodeGen/MachineFunction.h"
18 #include "llvm/CodeGen/MachineInstr.h"
19 #include "llvm/CodeGen/MachineInstrBuilder.h"
20 #include "llvm/CodeGen/MachineModuleInfo.h"
21 #include "llvm/CodeGen/MachineOperand.h"
22 #include "llvm/CodeGen/TargetSubtargetInfo.h"
23 #include "llvm/IR/DebugLoc.h"
24 #include "llvm/IR/IRBuilder.h"
25 #include "llvm/IR/Module.h"
26 #include "llvm/Pass.h"
27 #include <optional>
28 #include <sstream>
29 
30 using namespace llvm;
31 
32 #define AARCH64_LOWER_HOMOGENEOUS_PROLOG_EPILOG_NAME                           \
33   "AArch64 homogeneous prolog/epilog lowering pass"
34 
35 cl::opt<int> FrameHelperSizeThreshold(
36     "frame-helper-size-threshold", cl::init(2), cl::Hidden,
37     cl::desc("The minimum number of instructions that are outlined in a frame "
38              "helper (default = 2)"));
39 
40 namespace {
41 
42 class AArch64LowerHomogeneousPE {
43 public:
44   const AArch64InstrInfo *TII;
45 
46   AArch64LowerHomogeneousPE(Module *M, MachineModuleInfo *MMI)
47       : M(M), MMI(MMI) {}
48 
49   bool run();
50   bool runOnMachineFunction(MachineFunction &Fn);
51 
52 private:
53   Module *M;
54   MachineModuleInfo *MMI;
55 
56   bool runOnMBB(MachineBasicBlock &MBB);
57   bool runOnMI(MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI,
58                MachineBasicBlock::iterator &NextMBBI);
59 
60   /// Lower a HOM_Prolog pseudo instruction into a helper call
61   /// or a sequence of homogeneous stores.
62   /// When a fp setup follows, it can be optimized.
63   bool lowerProlog(MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI,
64                    MachineBasicBlock::iterator &NextMBBI);
65   /// Lower a HOM_Epilog pseudo instruction into a helper call
66   /// or a sequence of homogeneous loads.
67   /// When a return follow, it can be optimized.
68   bool lowerEpilog(MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI,
69                    MachineBasicBlock::iterator &NextMBBI);
70 };
71 
72 class AArch64LowerHomogeneousPrologEpilog : public ModulePass {
73 public:
74   static char ID;
75 
76   AArch64LowerHomogeneousPrologEpilog() : ModulePass(ID) {
77     initializeAArch64LowerHomogeneousPrologEpilogPass(
78         *PassRegistry::getPassRegistry());
79   }
80   void getAnalysisUsage(AnalysisUsage &AU) const override {
81     AU.addRequired<MachineModuleInfoWrapperPass>();
82     AU.addPreserved<MachineModuleInfoWrapperPass>();
83     AU.setPreservesAll();
84     ModulePass::getAnalysisUsage(AU);
85   }
86   bool runOnModule(Module &M) override;
87 
88   StringRef getPassName() const override {
89     return AARCH64_LOWER_HOMOGENEOUS_PROLOG_EPILOG_NAME;
90   }
91 };
92 
93 } // end anonymous namespace
94 
95 char AArch64LowerHomogeneousPrologEpilog::ID = 0;
96 
97 INITIALIZE_PASS(AArch64LowerHomogeneousPrologEpilog,
98                 "aarch64-lower-homogeneous-prolog-epilog",
99                 AARCH64_LOWER_HOMOGENEOUS_PROLOG_EPILOG_NAME, false, false)
100 
101 bool AArch64LowerHomogeneousPrologEpilog::runOnModule(Module &M) {
102   if (skipModule(M))
103     return false;
104 
105   MachineModuleInfo *MMI =
106       &getAnalysis<MachineModuleInfoWrapperPass>().getMMI();
107   return AArch64LowerHomogeneousPE(&M, MMI).run();
108 }
109 
110 bool AArch64LowerHomogeneousPE::run() {
111   bool Changed = false;
112   for (auto &F : *M) {
113     if (F.empty())
114       continue;
115 
116     MachineFunction *MF = MMI->getMachineFunction(F);
117     if (!MF)
118       continue;
119     Changed |= runOnMachineFunction(*MF);
120   }
121 
122   return Changed;
123 }
124 enum FrameHelperType { Prolog, PrologFrame, Epilog, EpilogTail };
125 
126 /// Return a frame helper name with the given CSRs and the helper type.
127 /// For instance, a prolog helper that saves x19 and x20 is named as
128 /// OUTLINED_FUNCTION_PROLOG_x19x20.
129 static std::string getFrameHelperName(SmallVectorImpl<unsigned> &Regs,
130                                       FrameHelperType Type, unsigned FpOffset) {
131   std::ostringstream RegStream;
132   switch (Type) {
133   case FrameHelperType::Prolog:
134     RegStream << "OUTLINED_FUNCTION_PROLOG_";
135     break;
136   case FrameHelperType::PrologFrame:
137     RegStream << "OUTLINED_FUNCTION_PROLOG_FRAME" << FpOffset << "_";
138     break;
139   case FrameHelperType::Epilog:
140     RegStream << "OUTLINED_FUNCTION_EPILOG_";
141     break;
142   case FrameHelperType::EpilogTail:
143     RegStream << "OUTLINED_FUNCTION_EPILOG_TAIL_";
144     break;
145   }
146 
147   for (auto Reg : Regs) {
148     if (Reg == AArch64::NoRegister)
149       continue;
150     RegStream << AArch64InstPrinter::getRegisterName(Reg);
151   }
152 
153   return RegStream.str();
154 }
155 
156 /// Create a Function for the unique frame helper with the given name.
157 /// Return a newly created MachineFunction with an empty MachineBasicBlock.
158 static MachineFunction &createFrameHelperMachineFunction(Module *M,
159                                                          MachineModuleInfo *MMI,
160                                                          StringRef Name) {
161   LLVMContext &C = M->getContext();
162   Function *F = M->getFunction(Name);
163   assert(F == nullptr && "Function has been created before");
164   F = Function::Create(FunctionType::get(Type::getVoidTy(C), false),
165                        Function::ExternalLinkage, Name, M);
166   assert(F && "Function was null!");
167 
168   // Use ODR linkage to avoid duplication.
169   F->setLinkage(GlobalValue::LinkOnceODRLinkage);
170   F->setUnnamedAddr(GlobalValue::UnnamedAddr::Global);
171 
172   // Set minsize, so we don't insert padding between outlined functions.
173   F->addFnAttr(Attribute::NoInline);
174   F->addFnAttr(Attribute::MinSize);
175   F->addFnAttr(Attribute::Naked);
176 
177   MachineFunction &MF = MMI->getOrCreateMachineFunction(*F);
178   // Remove unnecessary register liveness and set NoVRegs.
179   MF.getProperties().reset(MachineFunctionProperties::Property::TracksLiveness);
180   MF.getProperties().reset(MachineFunctionProperties::Property::IsSSA);
181   MF.getProperties().set(MachineFunctionProperties::Property::NoVRegs);
182   MF.getRegInfo().freezeReservedRegs();
183 
184   // Create entry block.
185   BasicBlock *EntryBB = BasicBlock::Create(C, "entry", F);
186   IRBuilder<> Builder(EntryBB);
187   Builder.CreateRetVoid();
188 
189   // Insert the new block into the function.
190   MachineBasicBlock *MBB = MF.CreateMachineBasicBlock();
191   MF.insert(MF.begin(), MBB);
192 
193   return MF;
194 }
195 
196 /// Emit a store-pair instruction for frame-setup.
197 /// If Reg2 is AArch64::NoRegister, emit STR instead.
198 static void emitStore(MachineFunction &MF, MachineBasicBlock &MBB,
199                       MachineBasicBlock::iterator Pos,
200                       const TargetInstrInfo &TII, unsigned Reg1, unsigned Reg2,
201                       int Offset, bool IsPreDec) {
202   assert(Reg1 != AArch64::NoRegister);
203   const bool IsPaired = Reg2 != AArch64::NoRegister;
204   bool IsFloat = AArch64::FPR64RegClass.contains(Reg1);
205   assert(!(IsFloat ^ AArch64::FPR64RegClass.contains(Reg2)));
206   unsigned Opc;
207   if (IsPreDec) {
208     if (IsFloat)
209       Opc = IsPaired ? AArch64::STPDpre : AArch64::STRDpre;
210     else
211       Opc = IsPaired ? AArch64::STPXpre : AArch64::STRXpre;
212   } else {
213     if (IsFloat)
214       Opc = IsPaired ? AArch64::STPDi : AArch64::STRDui;
215     else
216       Opc = IsPaired ? AArch64::STPXi : AArch64::STRXui;
217   }
218   // The implicit scale for Offset is 8.
219   TypeSize Scale(0U, false), Width(0U, false);
220   int64_t MinOffset, MaxOffset;
221   [[maybe_unused]] bool Success =
222       AArch64InstrInfo::getMemOpInfo(Opc, Scale, Width, MinOffset, MaxOffset);
223   assert(Success && "Invalid Opcode");
224   Offset *= (8 / (int)Scale);
225 
226   MachineInstrBuilder MIB = BuildMI(MBB, Pos, DebugLoc(), TII.get(Opc));
227   if (IsPreDec)
228     MIB.addDef(AArch64::SP);
229   if (IsPaired)
230     MIB.addReg(Reg2);
231   MIB.addReg(Reg1)
232       .addReg(AArch64::SP)
233       .addImm(Offset)
234       .setMIFlag(MachineInstr::FrameSetup);
235 }
236 
237 /// Emit a load-pair instruction for frame-destroy.
238 /// If Reg2 is AArch64::NoRegister, emit LDR instead.
239 static void emitLoad(MachineFunction &MF, MachineBasicBlock &MBB,
240                      MachineBasicBlock::iterator Pos,
241                      const TargetInstrInfo &TII, unsigned Reg1, unsigned Reg2,
242                      int Offset, bool IsPostDec) {
243   assert(Reg1 != AArch64::NoRegister);
244   const bool IsPaired = Reg2 != AArch64::NoRegister;
245   bool IsFloat = AArch64::FPR64RegClass.contains(Reg1);
246   assert(!(IsFloat ^ AArch64::FPR64RegClass.contains(Reg2)));
247   unsigned Opc;
248   if (IsPostDec) {
249     if (IsFloat)
250       Opc = IsPaired ? AArch64::LDPDpost : AArch64::LDRDpost;
251     else
252       Opc = IsPaired ? AArch64::LDPXpost : AArch64::LDRXpost;
253   } else {
254     if (IsFloat)
255       Opc = IsPaired ? AArch64::LDPDi : AArch64::LDRDui;
256     else
257       Opc = IsPaired ? AArch64::LDPXi : AArch64::LDRXui;
258   }
259   // The implicit scale for Offset is 8.
260   TypeSize Scale(0U, false), Width(0U, false);
261   int64_t MinOffset, MaxOffset;
262   [[maybe_unused]] bool Success =
263       AArch64InstrInfo::getMemOpInfo(Opc, Scale, Width, MinOffset, MaxOffset);
264   assert(Success && "Invalid Opcode");
265   Offset *= (8 / (int)Scale);
266 
267   MachineInstrBuilder MIB = BuildMI(MBB, Pos, DebugLoc(), TII.get(Opc));
268   if (IsPostDec)
269     MIB.addDef(AArch64::SP);
270   if (IsPaired)
271     MIB.addReg(Reg2, getDefRegState(true));
272   MIB.addReg(Reg1, getDefRegState(true))
273       .addReg(AArch64::SP)
274       .addImm(Offset)
275       .setMIFlag(MachineInstr::FrameDestroy);
276 }
277 
278 /// Return a unique function if a helper can be formed with the given Regs
279 /// and frame type.
280 /// 1) _OUTLINED_FUNCTION_PROLOG_x30x29x19x20x21x22:
281 ///    stp x22, x21, [sp, #-32]!    ; x29/x30 has been stored at the caller
282 ///    stp x20, x19, [sp, #16]
283 ///    ret
284 ///
285 /// 2) _OUTLINED_FUNCTION_PROLOG_FRAME32_x30x29x19x20x21x22:
286 ///    stp x22, x21, [sp, #-32]!    ; x29/x30 has been stored at the caller
287 ///    stp x20, x19, [sp, #16]
288 ///    add fp, sp, #32
289 ///    ret
290 ///
291 /// 3) _OUTLINED_FUNCTION_EPILOG_x30x29x19x20x21x22:
292 ///    mov x16, x30
293 ///    ldp x29, x30, [sp, #32]
294 ///    ldp x20, x19, [sp, #16]
295 ///    ldp x22, x21, [sp], #48
296 ///    ret x16
297 ///
298 /// 4) _OUTLINED_FUNCTION_EPILOG_TAIL_x30x29x19x20x21x22:
299 ///    ldp x29, x30, [sp, #32]
300 ///    ldp x20, x19, [sp, #16]
301 ///    ldp x22, x21, [sp], #48
302 ///    ret
303 /// @param M module
304 /// @param MMI machine module info
305 /// @param Regs callee save regs that the helper will handle
306 /// @param Type frame helper type
307 /// @return a helper function
308 static Function *getOrCreateFrameHelper(Module *M, MachineModuleInfo *MMI,
309                                         SmallVectorImpl<unsigned> &Regs,
310                                         FrameHelperType Type,
311                                         unsigned FpOffset = 0) {
312   assert(Regs.size() >= 2);
313   auto Name = getFrameHelperName(Regs, Type, FpOffset);
314   auto *F = M->getFunction(Name);
315   if (F)
316     return F;
317 
318   auto &MF = createFrameHelperMachineFunction(M, MMI, Name);
319   MachineBasicBlock &MBB = *MF.begin();
320   const TargetSubtargetInfo &STI = MF.getSubtarget();
321   const TargetInstrInfo &TII = *STI.getInstrInfo();
322 
323   int Size = (int)Regs.size();
324   switch (Type) {
325   case FrameHelperType::Prolog:
326   case FrameHelperType::PrologFrame: {
327     // Compute the remaining SP adjust beyond FP/LR.
328     auto LRIdx = std::distance(Regs.begin(), llvm::find(Regs, AArch64::LR));
329 
330     // If the register stored to the lowest address is not LR, we must subtract
331     // more from SP here.
332     if (LRIdx != Size - 2) {
333       assert(Regs[Size - 2] != AArch64::LR);
334       emitStore(MF, MBB, MBB.end(), TII, Regs[Size - 2], Regs[Size - 1],
335                 LRIdx - Size + 2, true);
336     }
337 
338     // Store CSRs in the reverse order.
339     for (int I = Size - 3; I >= 0; I -= 2) {
340       // FP/LR has been stored at call-site.
341       if (Regs[I - 1] == AArch64::LR)
342         continue;
343       emitStore(MF, MBB, MBB.end(), TII, Regs[I - 1], Regs[I], Size - I - 1,
344                 false);
345     }
346     if (Type == FrameHelperType::PrologFrame)
347       BuildMI(MBB, MBB.end(), DebugLoc(), TII.get(AArch64::ADDXri))
348           .addDef(AArch64::FP)
349           .addUse(AArch64::SP)
350           .addImm(FpOffset)
351           .addImm(0)
352           .setMIFlag(MachineInstr::FrameSetup);
353 
354     BuildMI(MBB, MBB.end(), DebugLoc(), TII.get(AArch64::RET))
355         .addReg(AArch64::LR);
356     break;
357   }
358   case FrameHelperType::Epilog:
359   case FrameHelperType::EpilogTail:
360     if (Type == FrameHelperType::Epilog)
361       // Stash LR to X16
362       BuildMI(MBB, MBB.end(), DebugLoc(), TII.get(AArch64::ORRXrs))
363           .addDef(AArch64::X16)
364           .addReg(AArch64::XZR)
365           .addUse(AArch64::LR)
366           .addImm(0);
367 
368     for (int I = 0; I < Size - 2; I += 2)
369       emitLoad(MF, MBB, MBB.end(), TII, Regs[I], Regs[I + 1], Size - I - 2,
370                false);
371     // Restore the last CSR with post-increment of SP.
372     emitLoad(MF, MBB, MBB.end(), TII, Regs[Size - 2], Regs[Size - 1], Size,
373              true);
374 
375     BuildMI(MBB, MBB.end(), DebugLoc(), TII.get(AArch64::RET))
376         .addReg(Type == FrameHelperType::Epilog ? AArch64::X16 : AArch64::LR);
377     break;
378   }
379 
380   return M->getFunction(Name);
381 }
382 
383 /// This function checks if a frame helper should be used for
384 /// HOM_Prolog/HOM_Epilog pseudo instruction expansion.
385 /// @param MBB machine basic block
386 /// @param NextMBBI  next instruction following HOM_Prolog/HOM_Epilog
387 /// @param Regs callee save registers that are saved or restored.
388 /// @param Type frame helper type
389 /// @return True if a use of helper is qualified.
390 static bool shouldUseFrameHelper(MachineBasicBlock &MBB,
391                                  MachineBasicBlock::iterator &NextMBBI,
392                                  SmallVectorImpl<unsigned> &Regs,
393                                  FrameHelperType Type) {
394   const auto *TRI = MBB.getParent()->getSubtarget().getRegisterInfo();
395   auto RegCount = Regs.size();
396   assert(RegCount > 0 && (RegCount % 2 == 0));
397   // # of instructions that will be outlined.
398   int InstCount = RegCount / 2;
399 
400   // Do not use a helper call when not saving LR.
401   if (!llvm::is_contained(Regs, AArch64::LR))
402     return false;
403 
404   switch (Type) {
405   case FrameHelperType::Prolog:
406     // Prolog helper cannot save FP/LR.
407     InstCount--;
408     break;
409   case FrameHelperType::PrologFrame: {
410     // Effecitvely no change in InstCount since FpAdjusment is included.
411     break;
412   }
413   case FrameHelperType::Epilog:
414     // Bail-out if X16 is live across the epilog helper because it is used in
415     // the helper to handle X30.
416     for (auto NextMI = NextMBBI; NextMI != MBB.end(); NextMI++) {
417       if (NextMI->readsRegister(AArch64::W16, TRI))
418         return false;
419     }
420     // Epilog may not be in the last block. Check the liveness in successors.
421     for (const MachineBasicBlock *SuccMBB : MBB.successors()) {
422       if (SuccMBB->isLiveIn(AArch64::W16) || SuccMBB->isLiveIn(AArch64::X16))
423         return false;
424     }
425     // No change in InstCount for the regular epilog case.
426     break;
427   case FrameHelperType::EpilogTail: {
428     // EpilogTail helper includes the caller's return.
429     if (NextMBBI == MBB.end())
430       return false;
431     if (NextMBBI->getOpcode() != AArch64::RET_ReallyLR)
432       return false;
433     InstCount++;
434     break;
435   }
436   }
437 
438   return InstCount >= FrameHelperSizeThreshold;
439 }
440 
441 /// Lower a HOM_Epilog pseudo instruction into a helper call while
442 /// creating the helper on demand. Or emit a sequence of loads in place when not
443 /// using a helper call.
444 ///
445 /// 1. With a helper including ret
446 ///    HOM_Epilog x30, x29, x19, x20, x21, x22              ; MBBI
447 ///    ret                                                  ; NextMBBI
448 ///    =>
449 ///    b _OUTLINED_FUNCTION_EPILOG_TAIL_x30x29x19x20x21x22
450 ///    ...                                                  ; NextMBBI
451 ///
452 /// 2. With a helper
453 ///    HOM_Epilog x30, x29, x19, x20, x21, x22
454 ///    =>
455 ///    bl _OUTLINED_FUNCTION_EPILOG_x30x29x19x20x21x22
456 ///
457 /// 3. Without a helper
458 ///    HOM_Epilog x30, x29, x19, x20, x21, x22
459 ///    =>
460 ///    ldp x29, x30, [sp, #32]
461 ///    ldp x20, x19, [sp, #16]
462 ///    ldp x22, x21, [sp], #48
463 bool AArch64LowerHomogeneousPE::lowerEpilog(
464     MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI,
465     MachineBasicBlock::iterator &NextMBBI) {
466   auto &MF = *MBB.getParent();
467   MachineInstr &MI = *MBBI;
468 
469   DebugLoc DL = MI.getDebugLoc();
470   SmallVector<unsigned, 8> Regs;
471   bool HasUnpairedReg = false;
472   for (auto &MO : MI.operands())
473     if (MO.isReg()) {
474       if (!MO.getReg().isValid()) {
475         // For now we are only expecting unpaired GP registers which should
476         // occur exactly once.
477         assert(!HasUnpairedReg);
478         HasUnpairedReg = true;
479       }
480       Regs.push_back(MO.getReg());
481     }
482   (void)HasUnpairedReg;
483   int Size = (int)Regs.size();
484   if (Size == 0)
485     return false;
486   // Registers are in pair.
487   assert(Size % 2 == 0);
488   assert(MI.getOpcode() == AArch64::HOM_Epilog);
489 
490   auto Return = NextMBBI;
491   if (shouldUseFrameHelper(MBB, NextMBBI, Regs, FrameHelperType::EpilogTail)) {
492     // When MBB ends with a return, emit a tail-call to the epilog helper
493     auto *EpilogTailHelper =
494         getOrCreateFrameHelper(M, MMI, Regs, FrameHelperType::EpilogTail);
495     BuildMI(MBB, MBBI, DL, TII->get(AArch64::TCRETURNdi))
496         .addGlobalAddress(EpilogTailHelper)
497         .addImm(0)
498         .setMIFlag(MachineInstr::FrameDestroy)
499         .copyImplicitOps(MI)
500         .copyImplicitOps(*Return);
501     NextMBBI = std::next(Return);
502     Return->removeFromParent();
503   } else if (shouldUseFrameHelper(MBB, NextMBBI, Regs,
504                                   FrameHelperType::Epilog)) {
505     // The default epilog helper case.
506     auto *EpilogHelper =
507         getOrCreateFrameHelper(M, MMI, Regs, FrameHelperType::Epilog);
508     BuildMI(MBB, MBBI, DL, TII->get(AArch64::BL))
509         .addGlobalAddress(EpilogHelper)
510         .setMIFlag(MachineInstr::FrameDestroy)
511         .copyImplicitOps(MI);
512   } else {
513     // Fall back to no-helper.
514     for (int I = 0; I < Size - 2; I += 2)
515       emitLoad(MF, MBB, MBBI, *TII, Regs[I], Regs[I + 1], Size - I - 2, false);
516     // Restore the last CSR with post-increment of SP.
517     emitLoad(MF, MBB, MBBI, *TII, Regs[Size - 2], Regs[Size - 1], Size, true);
518   }
519 
520   MBBI->removeFromParent();
521   return true;
522 }
523 
524 /// Lower a HOM_Prolog pseudo instruction into a helper call while
525 /// creating the helper on demand. Or emit a sequence of stores in place when
526 /// not using a helper call.
527 ///
528 /// 1. With a helper including frame-setup
529 ///    HOM_Prolog x30, x29, x19, x20, x21, x22, 32
530 ///    =>
531 ///    stp x29, x30, [sp, #-16]!
532 ///    bl _OUTLINED_FUNCTION_PROLOG_FRAME32_x30x29x19x20x21x22
533 ///
534 /// 2. With a helper
535 ///    HOM_Prolog x30, x29, x19, x20, x21, x22
536 ///    =>
537 ///    stp x29, x30, [sp, #-16]!
538 ///    bl _OUTLINED_FUNCTION_PROLOG_x30x29x19x20x21x22
539 ///
540 /// 3. Without a helper
541 ///    HOM_Prolog x30, x29, x19, x20, x21, x22
542 ///    =>
543 ///    stp	x22, x21, [sp, #-48]!
544 ///    stp	x20, x19, [sp, #16]
545 ///    stp	x29, x30, [sp, #32]
546 bool AArch64LowerHomogeneousPE::lowerProlog(
547     MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI,
548     MachineBasicBlock::iterator &NextMBBI) {
549   auto &MF = *MBB.getParent();
550   MachineInstr &MI = *MBBI;
551 
552   DebugLoc DL = MI.getDebugLoc();
553   SmallVector<unsigned, 8> Regs;
554   bool HasUnpairedReg = false;
555   int LRIdx = 0;
556   std::optional<int> FpOffset;
557   for (auto &MO : MI.operands()) {
558     if (MO.isReg()) {
559       if (MO.getReg().isValid()) {
560         if (MO.getReg() == AArch64::LR)
561           LRIdx = Regs.size();
562       } else {
563         // For now we are only expecting unpaired GP registers which should
564         // occur exactly once.
565         assert(!HasUnpairedReg);
566         HasUnpairedReg = true;
567       }
568       Regs.push_back(MO.getReg());
569     } else if (MO.isImm()) {
570       FpOffset = MO.getImm();
571     }
572   }
573   (void)HasUnpairedReg;
574   int Size = (int)Regs.size();
575   if (Size == 0)
576     return false;
577   // Allow compact unwind case only for oww.
578   assert(Size % 2 == 0);
579   assert(MI.getOpcode() == AArch64::HOM_Prolog);
580 
581   if (FpOffset &&
582       shouldUseFrameHelper(MBB, NextMBBI, Regs, FrameHelperType::PrologFrame)) {
583     // FP/LR is stored at the top of stack before the prolog helper call.
584     emitStore(MF, MBB, MBBI, *TII, AArch64::LR, AArch64::FP, -LRIdx - 2, true);
585     auto *PrologFrameHelper = getOrCreateFrameHelper(
586         M, MMI, Regs, FrameHelperType::PrologFrame, *FpOffset);
587     BuildMI(MBB, MBBI, DL, TII->get(AArch64::BL))
588         .addGlobalAddress(PrologFrameHelper)
589         .setMIFlag(MachineInstr::FrameSetup)
590         .copyImplicitOps(MI)
591         .addReg(AArch64::FP, RegState::Implicit | RegState::Define)
592         .addReg(AArch64::SP, RegState::Implicit);
593   } else if (!FpOffset && shouldUseFrameHelper(MBB, NextMBBI, Regs,
594                                                FrameHelperType::Prolog)) {
595     // FP/LR is stored at the top of stack before the prolog helper call.
596     emitStore(MF, MBB, MBBI, *TII, AArch64::LR, AArch64::FP, -LRIdx - 2, true);
597     auto *PrologHelper =
598         getOrCreateFrameHelper(M, MMI, Regs, FrameHelperType::Prolog);
599     BuildMI(MBB, MBBI, DL, TII->get(AArch64::BL))
600         .addGlobalAddress(PrologHelper)
601         .setMIFlag(MachineInstr::FrameSetup)
602         .copyImplicitOps(MI);
603   } else {
604     // Fall back to no-helper.
605     emitStore(MF, MBB, MBBI, *TII, Regs[Size - 2], Regs[Size - 1], -Size, true);
606     for (int I = Size - 3; I >= 0; I -= 2)
607       emitStore(MF, MBB, MBBI, *TII, Regs[I - 1], Regs[I], Size - I - 1, false);
608     if (FpOffset) {
609       BuildMI(MBB, MBBI, DL, TII->get(AArch64::ADDXri))
610           .addDef(AArch64::FP)
611           .addUse(AArch64::SP)
612           .addImm(*FpOffset)
613           .addImm(0)
614           .setMIFlag(MachineInstr::FrameSetup);
615     }
616   }
617 
618   MBBI->removeFromParent();
619   return true;
620 }
621 
622 /// Process each machine instruction
623 /// @param MBB machine basic block
624 /// @param MBBI current instruction iterator
625 /// @param NextMBBI next instruction iterator which can be updated
626 /// @return True when IR is changed.
627 bool AArch64LowerHomogeneousPE::runOnMI(MachineBasicBlock &MBB,
628                                         MachineBasicBlock::iterator MBBI,
629                                         MachineBasicBlock::iterator &NextMBBI) {
630   MachineInstr &MI = *MBBI;
631   unsigned Opcode = MI.getOpcode();
632   switch (Opcode) {
633   default:
634     break;
635   case AArch64::HOM_Prolog:
636     return lowerProlog(MBB, MBBI, NextMBBI);
637   case AArch64::HOM_Epilog:
638     return lowerEpilog(MBB, MBBI, NextMBBI);
639   }
640   return false;
641 }
642 
643 bool AArch64LowerHomogeneousPE::runOnMBB(MachineBasicBlock &MBB) {
644   bool Modified = false;
645 
646   MachineBasicBlock::iterator MBBI = MBB.begin(), E = MBB.end();
647   while (MBBI != E) {
648     MachineBasicBlock::iterator NMBBI = std::next(MBBI);
649     Modified |= runOnMI(MBB, MBBI, NMBBI);
650     MBBI = NMBBI;
651   }
652 
653   return Modified;
654 }
655 
656 bool AArch64LowerHomogeneousPE::runOnMachineFunction(MachineFunction &MF) {
657   TII = static_cast<const AArch64InstrInfo *>(MF.getSubtarget().getInstrInfo());
658 
659   bool Modified = false;
660   for (auto &MBB : MF)
661     Modified |= runOnMBB(MBB);
662   return Modified;
663 }
664 
665 ModulePass *llvm::createAArch64LowerHomogeneousPrologEpilogPass() {
666   return new AArch64LowerHomogeneousPrologEpilog();
667 }
668