1 //===- AArch64LowerHomogeneousPrologEpilog.cpp ----------------------------===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 // 9 // This file contains a pass that lowers homogeneous prolog/epilog instructions. 10 // 11 //===----------------------------------------------------------------------===// 12 13 #include "AArch64InstrInfo.h" 14 #include "AArch64Subtarget.h" 15 #include "MCTargetDesc/AArch64InstPrinter.h" 16 #include "llvm/CodeGen/MachineBasicBlock.h" 17 #include "llvm/CodeGen/MachineFunction.h" 18 #include "llvm/CodeGen/MachineInstr.h" 19 #include "llvm/CodeGen/MachineInstrBuilder.h" 20 #include "llvm/CodeGen/MachineModuleInfo.h" 21 #include "llvm/CodeGen/MachineOperand.h" 22 #include "llvm/CodeGen/TargetSubtargetInfo.h" 23 #include "llvm/IR/DebugLoc.h" 24 #include "llvm/IR/IRBuilder.h" 25 #include "llvm/IR/Module.h" 26 #include "llvm/Pass.h" 27 #include <optional> 28 #include <sstream> 29 30 using namespace llvm; 31 32 #define AARCH64_LOWER_HOMOGENEOUS_PROLOG_EPILOG_NAME \ 33 "AArch64 homogeneous prolog/epilog lowering pass" 34 35 cl::opt<int> FrameHelperSizeThreshold( 36 "frame-helper-size-threshold", cl::init(2), cl::Hidden, 37 cl::desc("The minimum number of instructions that are outlined in a frame " 38 "helper (default = 2)")); 39 40 namespace { 41 42 class AArch64LowerHomogeneousPE { 43 public: 44 const AArch64InstrInfo *TII; 45 46 AArch64LowerHomogeneousPE(Module *M, MachineModuleInfo *MMI) 47 : M(M), MMI(MMI) {} 48 49 bool run(); 50 bool runOnMachineFunction(MachineFunction &Fn); 51 52 private: 53 Module *M; 54 MachineModuleInfo *MMI; 55 56 bool runOnMBB(MachineBasicBlock &MBB); 57 bool runOnMI(MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, 58 MachineBasicBlock::iterator &NextMBBI); 59 60 /// Lower a HOM_Prolog pseudo instruction into a helper call 61 /// or a sequence of homogeneous stores. 62 /// When a fp setup follows, it can be optimized. 63 bool lowerProlog(MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, 64 MachineBasicBlock::iterator &NextMBBI); 65 /// Lower a HOM_Epilog pseudo instruction into a helper call 66 /// or a sequence of homogeneous loads. 67 /// When a return follow, it can be optimized. 68 bool lowerEpilog(MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, 69 MachineBasicBlock::iterator &NextMBBI); 70 }; 71 72 class AArch64LowerHomogeneousPrologEpilog : public ModulePass { 73 public: 74 static char ID; 75 76 AArch64LowerHomogeneousPrologEpilog() : ModulePass(ID) { 77 initializeAArch64LowerHomogeneousPrologEpilogPass( 78 *PassRegistry::getPassRegistry()); 79 } 80 void getAnalysisUsage(AnalysisUsage &AU) const override { 81 AU.addRequired<MachineModuleInfoWrapperPass>(); 82 AU.addPreserved<MachineModuleInfoWrapperPass>(); 83 AU.setPreservesAll(); 84 ModulePass::getAnalysisUsage(AU); 85 } 86 bool runOnModule(Module &M) override; 87 88 StringRef getPassName() const override { 89 return AARCH64_LOWER_HOMOGENEOUS_PROLOG_EPILOG_NAME; 90 } 91 }; 92 93 } // end anonymous namespace 94 95 char AArch64LowerHomogeneousPrologEpilog::ID = 0; 96 97 INITIALIZE_PASS(AArch64LowerHomogeneousPrologEpilog, 98 "aarch64-lower-homogeneous-prolog-epilog", 99 AARCH64_LOWER_HOMOGENEOUS_PROLOG_EPILOG_NAME, false, false) 100 101 bool AArch64LowerHomogeneousPrologEpilog::runOnModule(Module &M) { 102 if (skipModule(M)) 103 return false; 104 105 MachineModuleInfo *MMI = 106 &getAnalysis<MachineModuleInfoWrapperPass>().getMMI(); 107 return AArch64LowerHomogeneousPE(&M, MMI).run(); 108 } 109 110 bool AArch64LowerHomogeneousPE::run() { 111 bool Changed = false; 112 for (auto &F : *M) { 113 if (F.empty()) 114 continue; 115 116 MachineFunction *MF = MMI->getMachineFunction(F); 117 if (!MF) 118 continue; 119 Changed |= runOnMachineFunction(*MF); 120 } 121 122 return Changed; 123 } 124 enum FrameHelperType { Prolog, PrologFrame, Epilog, EpilogTail }; 125 126 /// Return a frame helper name with the given CSRs and the helper type. 127 /// For instance, a prolog helper that saves x19 and x20 is named as 128 /// OUTLINED_FUNCTION_PROLOG_x19x20. 129 static std::string getFrameHelperName(SmallVectorImpl<unsigned> &Regs, 130 FrameHelperType Type, unsigned FpOffset) { 131 std::ostringstream RegStream; 132 switch (Type) { 133 case FrameHelperType::Prolog: 134 RegStream << "OUTLINED_FUNCTION_PROLOG_"; 135 break; 136 case FrameHelperType::PrologFrame: 137 RegStream << "OUTLINED_FUNCTION_PROLOG_FRAME" << FpOffset << "_"; 138 break; 139 case FrameHelperType::Epilog: 140 RegStream << "OUTLINED_FUNCTION_EPILOG_"; 141 break; 142 case FrameHelperType::EpilogTail: 143 RegStream << "OUTLINED_FUNCTION_EPILOG_TAIL_"; 144 break; 145 } 146 147 for (auto Reg : Regs) { 148 if (Reg == AArch64::NoRegister) 149 continue; 150 RegStream << AArch64InstPrinter::getRegisterName(Reg); 151 } 152 153 return RegStream.str(); 154 } 155 156 /// Create a Function for the unique frame helper with the given name. 157 /// Return a newly created MachineFunction with an empty MachineBasicBlock. 158 static MachineFunction &createFrameHelperMachineFunction(Module *M, 159 MachineModuleInfo *MMI, 160 StringRef Name) { 161 LLVMContext &C = M->getContext(); 162 Function *F = M->getFunction(Name); 163 assert(F == nullptr && "Function has been created before"); 164 F = Function::Create(FunctionType::get(Type::getVoidTy(C), false), 165 Function::ExternalLinkage, Name, M); 166 assert(F && "Function was null!"); 167 168 // Use ODR linkage to avoid duplication. 169 F->setLinkage(GlobalValue::LinkOnceODRLinkage); 170 F->setUnnamedAddr(GlobalValue::UnnamedAddr::Global); 171 172 // Set minsize, so we don't insert padding between outlined functions. 173 F->addFnAttr(Attribute::NoInline); 174 F->addFnAttr(Attribute::MinSize); 175 F->addFnAttr(Attribute::Naked); 176 177 MachineFunction &MF = MMI->getOrCreateMachineFunction(*F); 178 // Remove unnecessary register liveness and set NoVRegs. 179 MF.getProperties().reset(MachineFunctionProperties::Property::TracksLiveness); 180 MF.getProperties().reset(MachineFunctionProperties::Property::IsSSA); 181 MF.getProperties().set(MachineFunctionProperties::Property::NoVRegs); 182 MF.getRegInfo().freezeReservedRegs(); 183 184 // Create entry block. 185 BasicBlock *EntryBB = BasicBlock::Create(C, "entry", F); 186 IRBuilder<> Builder(EntryBB); 187 Builder.CreateRetVoid(); 188 189 // Insert the new block into the function. 190 MachineBasicBlock *MBB = MF.CreateMachineBasicBlock(); 191 MF.insert(MF.begin(), MBB); 192 193 return MF; 194 } 195 196 /// Emit a store-pair instruction for frame-setup. 197 /// If Reg2 is AArch64::NoRegister, emit STR instead. 198 static void emitStore(MachineFunction &MF, MachineBasicBlock &MBB, 199 MachineBasicBlock::iterator Pos, 200 const TargetInstrInfo &TII, unsigned Reg1, unsigned Reg2, 201 int Offset, bool IsPreDec) { 202 assert(Reg1 != AArch64::NoRegister); 203 const bool IsPaired = Reg2 != AArch64::NoRegister; 204 bool IsFloat = AArch64::FPR64RegClass.contains(Reg1); 205 assert(!(IsFloat ^ AArch64::FPR64RegClass.contains(Reg2))); 206 unsigned Opc; 207 if (IsPreDec) { 208 if (IsFloat) 209 Opc = IsPaired ? AArch64::STPDpre : AArch64::STRDpre; 210 else 211 Opc = IsPaired ? AArch64::STPXpre : AArch64::STRXpre; 212 } else { 213 if (IsFloat) 214 Opc = IsPaired ? AArch64::STPDi : AArch64::STRDui; 215 else 216 Opc = IsPaired ? AArch64::STPXi : AArch64::STRXui; 217 } 218 // The implicit scale for Offset is 8. 219 TypeSize Scale(0U, false), Width(0U, false); 220 int64_t MinOffset, MaxOffset; 221 [[maybe_unused]] bool Success = 222 AArch64InstrInfo::getMemOpInfo(Opc, Scale, Width, MinOffset, MaxOffset); 223 assert(Success && "Invalid Opcode"); 224 Offset *= (8 / (int)Scale); 225 226 MachineInstrBuilder MIB = BuildMI(MBB, Pos, DebugLoc(), TII.get(Opc)); 227 if (IsPreDec) 228 MIB.addDef(AArch64::SP); 229 if (IsPaired) 230 MIB.addReg(Reg2); 231 MIB.addReg(Reg1) 232 .addReg(AArch64::SP) 233 .addImm(Offset) 234 .setMIFlag(MachineInstr::FrameSetup); 235 } 236 237 /// Emit a load-pair instruction for frame-destroy. 238 /// If Reg2 is AArch64::NoRegister, emit LDR instead. 239 static void emitLoad(MachineFunction &MF, MachineBasicBlock &MBB, 240 MachineBasicBlock::iterator Pos, 241 const TargetInstrInfo &TII, unsigned Reg1, unsigned Reg2, 242 int Offset, bool IsPostDec) { 243 assert(Reg1 != AArch64::NoRegister); 244 const bool IsPaired = Reg2 != AArch64::NoRegister; 245 bool IsFloat = AArch64::FPR64RegClass.contains(Reg1); 246 assert(!(IsFloat ^ AArch64::FPR64RegClass.contains(Reg2))); 247 unsigned Opc; 248 if (IsPostDec) { 249 if (IsFloat) 250 Opc = IsPaired ? AArch64::LDPDpost : AArch64::LDRDpost; 251 else 252 Opc = IsPaired ? AArch64::LDPXpost : AArch64::LDRXpost; 253 } else { 254 if (IsFloat) 255 Opc = IsPaired ? AArch64::LDPDi : AArch64::LDRDui; 256 else 257 Opc = IsPaired ? AArch64::LDPXi : AArch64::LDRXui; 258 } 259 // The implicit scale for Offset is 8. 260 TypeSize Scale(0U, false), Width(0U, false); 261 int64_t MinOffset, MaxOffset; 262 [[maybe_unused]] bool Success = 263 AArch64InstrInfo::getMemOpInfo(Opc, Scale, Width, MinOffset, MaxOffset); 264 assert(Success && "Invalid Opcode"); 265 Offset *= (8 / (int)Scale); 266 267 MachineInstrBuilder MIB = BuildMI(MBB, Pos, DebugLoc(), TII.get(Opc)); 268 if (IsPostDec) 269 MIB.addDef(AArch64::SP); 270 if (IsPaired) 271 MIB.addReg(Reg2, getDefRegState(true)); 272 MIB.addReg(Reg1, getDefRegState(true)) 273 .addReg(AArch64::SP) 274 .addImm(Offset) 275 .setMIFlag(MachineInstr::FrameDestroy); 276 } 277 278 /// Return a unique function if a helper can be formed with the given Regs 279 /// and frame type. 280 /// 1) _OUTLINED_FUNCTION_PROLOG_x30x29x19x20x21x22: 281 /// stp x22, x21, [sp, #-32]! ; x29/x30 has been stored at the caller 282 /// stp x20, x19, [sp, #16] 283 /// ret 284 /// 285 /// 2) _OUTLINED_FUNCTION_PROLOG_FRAME32_x30x29x19x20x21x22: 286 /// stp x22, x21, [sp, #-32]! ; x29/x30 has been stored at the caller 287 /// stp x20, x19, [sp, #16] 288 /// add fp, sp, #32 289 /// ret 290 /// 291 /// 3) _OUTLINED_FUNCTION_EPILOG_x30x29x19x20x21x22: 292 /// mov x16, x30 293 /// ldp x29, x30, [sp, #32] 294 /// ldp x20, x19, [sp, #16] 295 /// ldp x22, x21, [sp], #48 296 /// ret x16 297 /// 298 /// 4) _OUTLINED_FUNCTION_EPILOG_TAIL_x30x29x19x20x21x22: 299 /// ldp x29, x30, [sp, #32] 300 /// ldp x20, x19, [sp, #16] 301 /// ldp x22, x21, [sp], #48 302 /// ret 303 /// @param M module 304 /// @param MMI machine module info 305 /// @param Regs callee save regs that the helper will handle 306 /// @param Type frame helper type 307 /// @return a helper function 308 static Function *getOrCreateFrameHelper(Module *M, MachineModuleInfo *MMI, 309 SmallVectorImpl<unsigned> &Regs, 310 FrameHelperType Type, 311 unsigned FpOffset = 0) { 312 assert(Regs.size() >= 2); 313 auto Name = getFrameHelperName(Regs, Type, FpOffset); 314 auto *F = M->getFunction(Name); 315 if (F) 316 return F; 317 318 auto &MF = createFrameHelperMachineFunction(M, MMI, Name); 319 MachineBasicBlock &MBB = *MF.begin(); 320 const TargetSubtargetInfo &STI = MF.getSubtarget(); 321 const TargetInstrInfo &TII = *STI.getInstrInfo(); 322 323 int Size = (int)Regs.size(); 324 switch (Type) { 325 case FrameHelperType::Prolog: 326 case FrameHelperType::PrologFrame: { 327 // Compute the remaining SP adjust beyond FP/LR. 328 auto LRIdx = std::distance(Regs.begin(), llvm::find(Regs, AArch64::LR)); 329 330 // If the register stored to the lowest address is not LR, we must subtract 331 // more from SP here. 332 if (LRIdx != Size - 2) { 333 assert(Regs[Size - 2] != AArch64::LR); 334 emitStore(MF, MBB, MBB.end(), TII, Regs[Size - 2], Regs[Size - 1], 335 LRIdx - Size + 2, true); 336 } 337 338 // Store CSRs in the reverse order. 339 for (int I = Size - 3; I >= 0; I -= 2) { 340 // FP/LR has been stored at call-site. 341 if (Regs[I - 1] == AArch64::LR) 342 continue; 343 emitStore(MF, MBB, MBB.end(), TII, Regs[I - 1], Regs[I], Size - I - 1, 344 false); 345 } 346 if (Type == FrameHelperType::PrologFrame) 347 BuildMI(MBB, MBB.end(), DebugLoc(), TII.get(AArch64::ADDXri)) 348 .addDef(AArch64::FP) 349 .addUse(AArch64::SP) 350 .addImm(FpOffset) 351 .addImm(0) 352 .setMIFlag(MachineInstr::FrameSetup); 353 354 BuildMI(MBB, MBB.end(), DebugLoc(), TII.get(AArch64::RET)) 355 .addReg(AArch64::LR); 356 break; 357 } 358 case FrameHelperType::Epilog: 359 case FrameHelperType::EpilogTail: 360 if (Type == FrameHelperType::Epilog) 361 // Stash LR to X16 362 BuildMI(MBB, MBB.end(), DebugLoc(), TII.get(AArch64::ORRXrs)) 363 .addDef(AArch64::X16) 364 .addReg(AArch64::XZR) 365 .addUse(AArch64::LR) 366 .addImm(0); 367 368 for (int I = 0; I < Size - 2; I += 2) 369 emitLoad(MF, MBB, MBB.end(), TII, Regs[I], Regs[I + 1], Size - I - 2, 370 false); 371 // Restore the last CSR with post-increment of SP. 372 emitLoad(MF, MBB, MBB.end(), TII, Regs[Size - 2], Regs[Size - 1], Size, 373 true); 374 375 BuildMI(MBB, MBB.end(), DebugLoc(), TII.get(AArch64::RET)) 376 .addReg(Type == FrameHelperType::Epilog ? AArch64::X16 : AArch64::LR); 377 break; 378 } 379 380 return M->getFunction(Name); 381 } 382 383 /// This function checks if a frame helper should be used for 384 /// HOM_Prolog/HOM_Epilog pseudo instruction expansion. 385 /// @param MBB machine basic block 386 /// @param NextMBBI next instruction following HOM_Prolog/HOM_Epilog 387 /// @param Regs callee save registers that are saved or restored. 388 /// @param Type frame helper type 389 /// @return True if a use of helper is qualified. 390 static bool shouldUseFrameHelper(MachineBasicBlock &MBB, 391 MachineBasicBlock::iterator &NextMBBI, 392 SmallVectorImpl<unsigned> &Regs, 393 FrameHelperType Type) { 394 const auto *TRI = MBB.getParent()->getSubtarget().getRegisterInfo(); 395 auto RegCount = Regs.size(); 396 assert(RegCount > 0 && (RegCount % 2 == 0)); 397 // # of instructions that will be outlined. 398 int InstCount = RegCount / 2; 399 400 // Do not use a helper call when not saving LR. 401 if (!llvm::is_contained(Regs, AArch64::LR)) 402 return false; 403 404 switch (Type) { 405 case FrameHelperType::Prolog: 406 // Prolog helper cannot save FP/LR. 407 InstCount--; 408 break; 409 case FrameHelperType::PrologFrame: { 410 // Effecitvely no change in InstCount since FpAdjusment is included. 411 break; 412 } 413 case FrameHelperType::Epilog: 414 // Bail-out if X16 is live across the epilog helper because it is used in 415 // the helper to handle X30. 416 for (auto NextMI = NextMBBI; NextMI != MBB.end(); NextMI++) { 417 if (NextMI->readsRegister(AArch64::W16, TRI)) 418 return false; 419 } 420 // Epilog may not be in the last block. Check the liveness in successors. 421 for (const MachineBasicBlock *SuccMBB : MBB.successors()) { 422 if (SuccMBB->isLiveIn(AArch64::W16) || SuccMBB->isLiveIn(AArch64::X16)) 423 return false; 424 } 425 // No change in InstCount for the regular epilog case. 426 break; 427 case FrameHelperType::EpilogTail: { 428 // EpilogTail helper includes the caller's return. 429 if (NextMBBI == MBB.end()) 430 return false; 431 if (NextMBBI->getOpcode() != AArch64::RET_ReallyLR) 432 return false; 433 InstCount++; 434 break; 435 } 436 } 437 438 return InstCount >= FrameHelperSizeThreshold; 439 } 440 441 /// Lower a HOM_Epilog pseudo instruction into a helper call while 442 /// creating the helper on demand. Or emit a sequence of loads in place when not 443 /// using a helper call. 444 /// 445 /// 1. With a helper including ret 446 /// HOM_Epilog x30, x29, x19, x20, x21, x22 ; MBBI 447 /// ret ; NextMBBI 448 /// => 449 /// b _OUTLINED_FUNCTION_EPILOG_TAIL_x30x29x19x20x21x22 450 /// ... ; NextMBBI 451 /// 452 /// 2. With a helper 453 /// HOM_Epilog x30, x29, x19, x20, x21, x22 454 /// => 455 /// bl _OUTLINED_FUNCTION_EPILOG_x30x29x19x20x21x22 456 /// 457 /// 3. Without a helper 458 /// HOM_Epilog x30, x29, x19, x20, x21, x22 459 /// => 460 /// ldp x29, x30, [sp, #32] 461 /// ldp x20, x19, [sp, #16] 462 /// ldp x22, x21, [sp], #48 463 bool AArch64LowerHomogeneousPE::lowerEpilog( 464 MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, 465 MachineBasicBlock::iterator &NextMBBI) { 466 auto &MF = *MBB.getParent(); 467 MachineInstr &MI = *MBBI; 468 469 DebugLoc DL = MI.getDebugLoc(); 470 SmallVector<unsigned, 8> Regs; 471 bool HasUnpairedReg = false; 472 for (auto &MO : MI.operands()) 473 if (MO.isReg()) { 474 if (!MO.getReg().isValid()) { 475 // For now we are only expecting unpaired GP registers which should 476 // occur exactly once. 477 assert(!HasUnpairedReg); 478 HasUnpairedReg = true; 479 } 480 Regs.push_back(MO.getReg()); 481 } 482 (void)HasUnpairedReg; 483 int Size = (int)Regs.size(); 484 if (Size == 0) 485 return false; 486 // Registers are in pair. 487 assert(Size % 2 == 0); 488 assert(MI.getOpcode() == AArch64::HOM_Epilog); 489 490 auto Return = NextMBBI; 491 if (shouldUseFrameHelper(MBB, NextMBBI, Regs, FrameHelperType::EpilogTail)) { 492 // When MBB ends with a return, emit a tail-call to the epilog helper 493 auto *EpilogTailHelper = 494 getOrCreateFrameHelper(M, MMI, Regs, FrameHelperType::EpilogTail); 495 BuildMI(MBB, MBBI, DL, TII->get(AArch64::TCRETURNdi)) 496 .addGlobalAddress(EpilogTailHelper) 497 .addImm(0) 498 .setMIFlag(MachineInstr::FrameDestroy) 499 .copyImplicitOps(MI) 500 .copyImplicitOps(*Return); 501 NextMBBI = std::next(Return); 502 Return->removeFromParent(); 503 } else if (shouldUseFrameHelper(MBB, NextMBBI, Regs, 504 FrameHelperType::Epilog)) { 505 // The default epilog helper case. 506 auto *EpilogHelper = 507 getOrCreateFrameHelper(M, MMI, Regs, FrameHelperType::Epilog); 508 BuildMI(MBB, MBBI, DL, TII->get(AArch64::BL)) 509 .addGlobalAddress(EpilogHelper) 510 .setMIFlag(MachineInstr::FrameDestroy) 511 .copyImplicitOps(MI); 512 } else { 513 // Fall back to no-helper. 514 for (int I = 0; I < Size - 2; I += 2) 515 emitLoad(MF, MBB, MBBI, *TII, Regs[I], Regs[I + 1], Size - I - 2, false); 516 // Restore the last CSR with post-increment of SP. 517 emitLoad(MF, MBB, MBBI, *TII, Regs[Size - 2], Regs[Size - 1], Size, true); 518 } 519 520 MBBI->removeFromParent(); 521 return true; 522 } 523 524 /// Lower a HOM_Prolog pseudo instruction into a helper call while 525 /// creating the helper on demand. Or emit a sequence of stores in place when 526 /// not using a helper call. 527 /// 528 /// 1. With a helper including frame-setup 529 /// HOM_Prolog x30, x29, x19, x20, x21, x22, 32 530 /// => 531 /// stp x29, x30, [sp, #-16]! 532 /// bl _OUTLINED_FUNCTION_PROLOG_FRAME32_x30x29x19x20x21x22 533 /// 534 /// 2. With a helper 535 /// HOM_Prolog x30, x29, x19, x20, x21, x22 536 /// => 537 /// stp x29, x30, [sp, #-16]! 538 /// bl _OUTLINED_FUNCTION_PROLOG_x30x29x19x20x21x22 539 /// 540 /// 3. Without a helper 541 /// HOM_Prolog x30, x29, x19, x20, x21, x22 542 /// => 543 /// stp x22, x21, [sp, #-48]! 544 /// stp x20, x19, [sp, #16] 545 /// stp x29, x30, [sp, #32] 546 bool AArch64LowerHomogeneousPE::lowerProlog( 547 MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, 548 MachineBasicBlock::iterator &NextMBBI) { 549 auto &MF = *MBB.getParent(); 550 MachineInstr &MI = *MBBI; 551 552 DebugLoc DL = MI.getDebugLoc(); 553 SmallVector<unsigned, 8> Regs; 554 bool HasUnpairedReg = false; 555 int LRIdx = 0; 556 std::optional<int> FpOffset; 557 for (auto &MO : MI.operands()) { 558 if (MO.isReg()) { 559 if (MO.getReg().isValid()) { 560 if (MO.getReg() == AArch64::LR) 561 LRIdx = Regs.size(); 562 } else { 563 // For now we are only expecting unpaired GP registers which should 564 // occur exactly once. 565 assert(!HasUnpairedReg); 566 HasUnpairedReg = true; 567 } 568 Regs.push_back(MO.getReg()); 569 } else if (MO.isImm()) { 570 FpOffset = MO.getImm(); 571 } 572 } 573 (void)HasUnpairedReg; 574 int Size = (int)Regs.size(); 575 if (Size == 0) 576 return false; 577 // Allow compact unwind case only for oww. 578 assert(Size % 2 == 0); 579 assert(MI.getOpcode() == AArch64::HOM_Prolog); 580 581 if (FpOffset && 582 shouldUseFrameHelper(MBB, NextMBBI, Regs, FrameHelperType::PrologFrame)) { 583 // FP/LR is stored at the top of stack before the prolog helper call. 584 emitStore(MF, MBB, MBBI, *TII, AArch64::LR, AArch64::FP, -LRIdx - 2, true); 585 auto *PrologFrameHelper = getOrCreateFrameHelper( 586 M, MMI, Regs, FrameHelperType::PrologFrame, *FpOffset); 587 BuildMI(MBB, MBBI, DL, TII->get(AArch64::BL)) 588 .addGlobalAddress(PrologFrameHelper) 589 .setMIFlag(MachineInstr::FrameSetup) 590 .copyImplicitOps(MI) 591 .addReg(AArch64::FP, RegState::Implicit | RegState::Define) 592 .addReg(AArch64::SP, RegState::Implicit); 593 } else if (!FpOffset && shouldUseFrameHelper(MBB, NextMBBI, Regs, 594 FrameHelperType::Prolog)) { 595 // FP/LR is stored at the top of stack before the prolog helper call. 596 emitStore(MF, MBB, MBBI, *TII, AArch64::LR, AArch64::FP, -LRIdx - 2, true); 597 auto *PrologHelper = 598 getOrCreateFrameHelper(M, MMI, Regs, FrameHelperType::Prolog); 599 BuildMI(MBB, MBBI, DL, TII->get(AArch64::BL)) 600 .addGlobalAddress(PrologHelper) 601 .setMIFlag(MachineInstr::FrameSetup) 602 .copyImplicitOps(MI); 603 } else { 604 // Fall back to no-helper. 605 emitStore(MF, MBB, MBBI, *TII, Regs[Size - 2], Regs[Size - 1], -Size, true); 606 for (int I = Size - 3; I >= 0; I -= 2) 607 emitStore(MF, MBB, MBBI, *TII, Regs[I - 1], Regs[I], Size - I - 1, false); 608 if (FpOffset) { 609 BuildMI(MBB, MBBI, DL, TII->get(AArch64::ADDXri)) 610 .addDef(AArch64::FP) 611 .addUse(AArch64::SP) 612 .addImm(*FpOffset) 613 .addImm(0) 614 .setMIFlag(MachineInstr::FrameSetup); 615 } 616 } 617 618 MBBI->removeFromParent(); 619 return true; 620 } 621 622 /// Process each machine instruction 623 /// @param MBB machine basic block 624 /// @param MBBI current instruction iterator 625 /// @param NextMBBI next instruction iterator which can be updated 626 /// @return True when IR is changed. 627 bool AArch64LowerHomogeneousPE::runOnMI(MachineBasicBlock &MBB, 628 MachineBasicBlock::iterator MBBI, 629 MachineBasicBlock::iterator &NextMBBI) { 630 MachineInstr &MI = *MBBI; 631 unsigned Opcode = MI.getOpcode(); 632 switch (Opcode) { 633 default: 634 break; 635 case AArch64::HOM_Prolog: 636 return lowerProlog(MBB, MBBI, NextMBBI); 637 case AArch64::HOM_Epilog: 638 return lowerEpilog(MBB, MBBI, NextMBBI); 639 } 640 return false; 641 } 642 643 bool AArch64LowerHomogeneousPE::runOnMBB(MachineBasicBlock &MBB) { 644 bool Modified = false; 645 646 MachineBasicBlock::iterator MBBI = MBB.begin(), E = MBB.end(); 647 while (MBBI != E) { 648 MachineBasicBlock::iterator NMBBI = std::next(MBBI); 649 Modified |= runOnMI(MBB, MBBI, NMBBI); 650 MBBI = NMBBI; 651 } 652 653 return Modified; 654 } 655 656 bool AArch64LowerHomogeneousPE::runOnMachineFunction(MachineFunction &MF) { 657 TII = static_cast<const AArch64InstrInfo *>(MF.getSubtarget().getInstrInfo()); 658 659 bool Modified = false; 660 for (auto &MBB : MF) 661 Modified |= runOnMBB(MBB); 662 return Modified; 663 } 664 665 ModulePass *llvm::createAArch64LowerHomogeneousPrologEpilogPass() { 666 return new AArch64LowerHomogeneousPrologEpilog(); 667 } 668