1 //===- Thumb1FrameLowering.cpp - Thumb1 Frame Information -----------------===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 // 9 // This file contains the Thumb1 implementation of TargetFrameLowering class. 10 // 11 //===----------------------------------------------------------------------===// 12 13 #include "Thumb1FrameLowering.h" 14 #include "ARMBaseInstrInfo.h" 15 #include "ARMBaseRegisterInfo.h" 16 #include "ARMMachineFunctionInfo.h" 17 #include "ARMSubtarget.h" 18 #include "Thumb1InstrInfo.h" 19 #include "ThumbRegisterInfo.h" 20 #include "Utils/ARMBaseInfo.h" 21 #include "llvm/ADT/BitVector.h" 22 #include "llvm/ADT/STLExtras.h" 23 #include "llvm/ADT/SmallVector.h" 24 #include "llvm/CodeGen/LivePhysRegs.h" 25 #include "llvm/CodeGen/MachineBasicBlock.h" 26 #include "llvm/CodeGen/MachineFrameInfo.h" 27 #include "llvm/CodeGen/MachineFunction.h" 28 #include "llvm/CodeGen/MachineInstr.h" 29 #include "llvm/CodeGen/MachineInstrBuilder.h" 30 #include "llvm/CodeGen/MachineModuleInfo.h" 31 #include "llvm/CodeGen/MachineOperand.h" 32 #include "llvm/CodeGen/MachineRegisterInfo.h" 33 #include "llvm/CodeGen/TargetInstrInfo.h" 34 #include "llvm/CodeGen/TargetOpcodes.h" 35 #include "llvm/CodeGen/TargetSubtargetInfo.h" 36 #include "llvm/IR/DebugLoc.h" 37 #include "llvm/MC/MCContext.h" 38 #include "llvm/MC/MCDwarf.h" 39 #include "llvm/MC/MCRegisterInfo.h" 40 #include "llvm/Support/Compiler.h" 41 #include "llvm/Support/ErrorHandling.h" 42 #include <cassert> 43 #include <iterator> 44 #include <vector> 45 46 #define DEBUG_TYPE "arm-frame-lowering" 47 48 using namespace llvm; 49 50 Thumb1FrameLowering::Thumb1FrameLowering(const ARMSubtarget &sti) 51 : ARMFrameLowering(sti) {} 52 53 bool Thumb1FrameLowering::hasReservedCallFrame(const MachineFunction &MF) const{ 54 const MachineFrameInfo &MFI = MF.getFrameInfo(); 55 unsigned CFSize = MFI.getMaxCallFrameSize(); 56 // It's not always a good idea to include the call frame as part of the 57 // stack frame. ARM (especially Thumb) has small immediate offset to 58 // address the stack frame. So a large call frame can cause poor codegen 59 // and may even makes it impossible to scavenge a register. 60 if (CFSize >= ((1 << 8) - 1) * 4 / 2) // Half of imm8 * 4 61 return false; 62 63 return !MFI.hasVarSizedObjects(); 64 } 65 66 static void 67 emitPrologueEpilogueSPUpdate(MachineBasicBlock &MBB, 68 MachineBasicBlock::iterator &MBBI, 69 const TargetInstrInfo &TII, const DebugLoc &dl, 70 const ThumbRegisterInfo &MRI, int NumBytes, 71 unsigned ScratchReg, unsigned MIFlags) { 72 // If it would take more than three instructions to adjust the stack pointer 73 // using tADDspi/tSUBspi, load an immediate instead. 74 if (std::abs(NumBytes) > 508 * 3) { 75 // We use a different codepath here from the normal 76 // emitThumbRegPlusImmediate so we don't have to deal with register 77 // scavenging. (Scavenging could try to use the emergency spill slot 78 // before we've actually finished setting up the stack.) 79 if (ScratchReg == ARM::NoRegister) 80 report_fatal_error("Failed to emit Thumb1 stack adjustment"); 81 MachineFunction &MF = *MBB.getParent(); 82 const ARMSubtarget &ST = MF.getSubtarget<ARMSubtarget>(); 83 if (ST.genExecuteOnly()) { 84 unsigned XOInstr = ST.useMovt() ? ARM::t2MOVi32imm : ARM::tMOVi32imm; 85 BuildMI(MBB, MBBI, dl, TII.get(XOInstr), ScratchReg) 86 .addImm(NumBytes).setMIFlags(MIFlags); 87 } else { 88 MRI.emitLoadConstPool(MBB, MBBI, dl, ScratchReg, 0, NumBytes, ARMCC::AL, 89 0, MIFlags); 90 } 91 BuildMI(MBB, MBBI, dl, TII.get(ARM::tADDhirr), ARM::SP) 92 .addReg(ARM::SP) 93 .addReg(ScratchReg, RegState::Kill) 94 .add(predOps(ARMCC::AL)) 95 .setMIFlags(MIFlags); 96 return; 97 } 98 // FIXME: This is assuming the heuristics in emitThumbRegPlusImmediate 99 // won't change. 100 emitThumbRegPlusImmediate(MBB, MBBI, dl, ARM::SP, ARM::SP, NumBytes, TII, 101 MRI, MIFlags); 102 103 } 104 105 static void emitCallSPUpdate(MachineBasicBlock &MBB, 106 MachineBasicBlock::iterator &MBBI, 107 const TargetInstrInfo &TII, const DebugLoc &dl, 108 const ThumbRegisterInfo &MRI, int NumBytes, 109 unsigned MIFlags = MachineInstr::NoFlags) { 110 emitThumbRegPlusImmediate(MBB, MBBI, dl, ARM::SP, ARM::SP, NumBytes, TII, 111 MRI, MIFlags); 112 } 113 114 115 MachineBasicBlock::iterator Thumb1FrameLowering:: 116 eliminateCallFramePseudoInstr(MachineFunction &MF, MachineBasicBlock &MBB, 117 MachineBasicBlock::iterator I) const { 118 const Thumb1InstrInfo &TII = 119 *static_cast<const Thumb1InstrInfo *>(STI.getInstrInfo()); 120 const ThumbRegisterInfo *RegInfo = 121 static_cast<const ThumbRegisterInfo *>(STI.getRegisterInfo()); 122 if (!hasReservedCallFrame(MF)) { 123 // If we have alloca, convert as follows: 124 // ADJCALLSTACKDOWN -> sub, sp, sp, amount 125 // ADJCALLSTACKUP -> add, sp, sp, amount 126 MachineInstr &Old = *I; 127 DebugLoc dl = Old.getDebugLoc(); 128 unsigned Amount = TII.getFrameSize(Old); 129 if (Amount != 0) { 130 // We need to keep the stack aligned properly. To do this, we round the 131 // amount of space needed for the outgoing arguments up to the next 132 // alignment boundary. 133 Amount = alignTo(Amount, getStackAlign()); 134 135 // Replace the pseudo instruction with a new instruction... 136 unsigned Opc = Old.getOpcode(); 137 if (Opc == ARM::ADJCALLSTACKDOWN || Opc == ARM::tADJCALLSTACKDOWN) { 138 emitCallSPUpdate(MBB, I, TII, dl, *RegInfo, -Amount); 139 } else { 140 assert(Opc == ARM::ADJCALLSTACKUP || Opc == ARM::tADJCALLSTACKUP); 141 emitCallSPUpdate(MBB, I, TII, dl, *RegInfo, Amount); 142 } 143 } 144 } 145 return MBB.erase(I); 146 } 147 148 void Thumb1FrameLowering::emitPrologue(MachineFunction &MF, 149 MachineBasicBlock &MBB) const { 150 MachineBasicBlock::iterator MBBI = MBB.begin(); 151 MachineFrameInfo &MFI = MF.getFrameInfo(); 152 ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>(); 153 const MCRegisterInfo *MRI = MF.getContext().getRegisterInfo(); 154 const ThumbRegisterInfo *RegInfo = 155 static_cast<const ThumbRegisterInfo *>(STI.getRegisterInfo()); 156 const Thumb1InstrInfo &TII = 157 *static_cast<const Thumb1InstrInfo *>(STI.getInstrInfo()); 158 159 unsigned ArgRegsSaveSize = AFI->getArgRegsSaveSize(); 160 unsigned NumBytes = MFI.getStackSize(); 161 assert(NumBytes >= ArgRegsSaveSize && 162 "ArgRegsSaveSize is included in NumBytes"); 163 const std::vector<CalleeSavedInfo> &CSI = MFI.getCalleeSavedInfo(); 164 assert(STI.getPushPopSplitVariation(MF) == ARMSubtarget::SplitR7 && 165 "Must use R7 spilt for Thumb1"); 166 167 // Debug location must be unknown since the first debug location is used 168 // to determine the end of the prologue. 169 DebugLoc dl; 170 171 Register FramePtr = RegInfo->getFrameRegister(MF); 172 Register BasePtr = RegInfo->getBaseRegister(); 173 int CFAOffset = 0; 174 175 // Thumb add/sub sp, imm8 instructions implicitly multiply the offset by 4. 176 NumBytes = (NumBytes + 3) & ~3; 177 MFI.setStackSize(NumBytes); 178 179 // Determine the sizes of each callee-save spill areas and record which frame 180 // belongs to which callee-save spill areas. 181 unsigned FRSize = 0, GPRCS1Size = 0, GPRCS2Size = 0, DPRCSSize = 0; 182 int FramePtrSpillFI = 0; 183 184 if (ArgRegsSaveSize) { 185 emitPrologueEpilogueSPUpdate(MBB, MBBI, TII, dl, *RegInfo, -ArgRegsSaveSize, 186 ARM::NoRegister, MachineInstr::FrameSetup); 187 CFAOffset += ArgRegsSaveSize; 188 unsigned CFIIndex = 189 MF.addFrameInst(MCCFIInstruction::cfiDefCfaOffset(nullptr, CFAOffset)); 190 BuildMI(MBB, MBBI, dl, TII.get(TargetOpcode::CFI_INSTRUCTION)) 191 .addCFIIndex(CFIIndex) 192 .setMIFlags(MachineInstr::FrameSetup); 193 } 194 195 if (!AFI->hasStackFrame()) { 196 if (NumBytes - ArgRegsSaveSize != 0) { 197 emitPrologueEpilogueSPUpdate(MBB, MBBI, TII, dl, *RegInfo, 198 -(NumBytes - ArgRegsSaveSize), 199 ARM::NoRegister, MachineInstr::FrameSetup); 200 CFAOffset += NumBytes - ArgRegsSaveSize; 201 unsigned CFIIndex = MF.addFrameInst( 202 MCCFIInstruction::cfiDefCfaOffset(nullptr, CFAOffset)); 203 BuildMI(MBB, MBBI, dl, TII.get(TargetOpcode::CFI_INSTRUCTION)) 204 .addCFIIndex(CFIIndex) 205 .setMIFlags(MachineInstr::FrameSetup); 206 } 207 return; 208 } 209 210 bool HasFrameRecordArea = hasFP(MF) && ARM::hGPRRegClass.contains(FramePtr); 211 212 for (const CalleeSavedInfo &I : CSI) { 213 Register Reg = I.getReg(); 214 int FI = I.getFrameIdx(); 215 if (Reg == FramePtr) 216 FramePtrSpillFI = FI; 217 switch (Reg) { 218 case ARM::R11: 219 if (HasFrameRecordArea) { 220 FRSize += 4; 221 break; 222 } 223 [[fallthrough]]; 224 case ARM::R8: 225 case ARM::R9: 226 case ARM::R10: 227 GPRCS2Size += 4; 228 break; 229 case ARM::LR: 230 if (HasFrameRecordArea) { 231 FRSize += 4; 232 break; 233 } 234 [[fallthrough]]; 235 case ARM::R4: 236 case ARM::R5: 237 case ARM::R6: 238 case ARM::R7: 239 GPRCS1Size += 4; 240 break; 241 default: 242 DPRCSSize += 8; 243 } 244 } 245 246 MachineBasicBlock::iterator FRPush, GPRCS1Push, GPRCS2Push; 247 if (HasFrameRecordArea) { 248 // Skip Frame Record setup: 249 // push {lr} 250 // mov lr, r11 251 // push {lr} 252 std::advance(MBBI, 2); 253 FRPush = MBBI++; 254 } 255 256 if (MBBI != MBB.end() && MBBI->getOpcode() == ARM::tPUSH) { 257 GPRCS1Push = MBBI; 258 ++MBBI; 259 } 260 261 // Find last push instruction for GPRCS2 - spilling of high registers 262 // (r8-r11) could consist of multiple tPUSH and tMOVr instructions. 263 while (true) { 264 MachineBasicBlock::iterator OldMBBI = MBBI; 265 // Skip a run of tMOVr instructions 266 while (MBBI != MBB.end() && MBBI->getOpcode() == ARM::tMOVr && 267 MBBI->getFlag(MachineInstr::FrameSetup)) 268 MBBI++; 269 if (MBBI != MBB.end() && MBBI->getOpcode() == ARM::tPUSH && 270 MBBI->getFlag(MachineInstr::FrameSetup)) { 271 GPRCS2Push = MBBI; 272 MBBI++; 273 } else { 274 // We have reached an instruction which is not a push, so the previous 275 // run of tMOVr instructions (which may have been empty) was not part of 276 // the prologue. Reset MBBI back to the last PUSH of the prologue. 277 MBBI = OldMBBI; 278 break; 279 } 280 } 281 282 // Skip past this code sequence, which is emitted to restore the LR if it is 283 // live-in and clobbered by the frame record setup code: 284 // ldr rX, [sp, #Y] 285 // mov lr, rX 286 if (MBBI != MBB.end() && MBBI->getOpcode() == ARM::tLDRspi && 287 MBBI->getFlag(MachineInstr::FrameSetup)) { 288 ++MBBI; 289 if (MBBI != MBB.end() && MBBI->getOpcode() == ARM::tMOVr && 290 MBBI->getOperand(0).getReg() == ARM::LR && 291 MBBI->getFlag(MachineInstr::FrameSetup)) { 292 ++MBBI; 293 } 294 } 295 296 // Determine starting offsets of spill areas. 297 unsigned DPRCSOffset = NumBytes - ArgRegsSaveSize - 298 (FRSize + GPRCS1Size + GPRCS2Size + DPRCSSize); 299 unsigned GPRCS2Offset = DPRCSOffset + DPRCSSize; 300 unsigned GPRCS1Offset = GPRCS2Offset + GPRCS2Size; 301 bool HasFP = hasFP(MF); 302 if (HasFP) 303 AFI->setFramePtrSpillOffset(MFI.getObjectOffset(FramePtrSpillFI) + 304 NumBytes); 305 if (HasFrameRecordArea) 306 AFI->setFrameRecordSavedAreaSize(FRSize); 307 AFI->setGPRCalleeSavedArea1Offset(GPRCS1Offset); 308 AFI->setGPRCalleeSavedArea2Offset(GPRCS2Offset); 309 AFI->setDPRCalleeSavedArea1Offset(DPRCSOffset); 310 NumBytes = DPRCSOffset; 311 312 int FramePtrOffsetInBlock = 0; 313 unsigned adjustedGPRCS1Size = GPRCS1Size; 314 if (GPRCS1Size > 0 && GPRCS2Size == 0 && 315 tryFoldSPUpdateIntoPushPop(STI, MF, &*(GPRCS1Push), NumBytes)) { 316 FramePtrOffsetInBlock = NumBytes; 317 adjustedGPRCS1Size += NumBytes; 318 NumBytes = 0; 319 } 320 CFAOffset += adjustedGPRCS1Size; 321 322 // Adjust FP so it point to the stack slot that contains the previous FP. 323 if (HasFP) { 324 MachineBasicBlock::iterator AfterPush = 325 HasFrameRecordArea ? std::next(FRPush) : std::next(GPRCS1Push); 326 if (HasFrameRecordArea) { 327 // We have just finished pushing the previous FP into the stack, 328 // so simply capture the SP value as the new Frame Pointer. 329 BuildMI(MBB, AfterPush, dl, TII.get(ARM::tMOVr), FramePtr) 330 .addReg(ARM::SP) 331 .setMIFlags(MachineInstr::FrameSetup) 332 .add(predOps(ARMCC::AL)); 333 } else { 334 FramePtrOffsetInBlock += 335 MFI.getObjectOffset(FramePtrSpillFI) + GPRCS1Size + ArgRegsSaveSize; 336 BuildMI(MBB, AfterPush, dl, TII.get(ARM::tADDrSPi), FramePtr) 337 .addReg(ARM::SP) 338 .addImm(FramePtrOffsetInBlock / 4) 339 .setMIFlags(MachineInstr::FrameSetup) 340 .add(predOps(ARMCC::AL)); 341 } 342 343 if(FramePtrOffsetInBlock) { 344 unsigned CFIIndex = MF.addFrameInst(MCCFIInstruction::cfiDefCfa( 345 nullptr, MRI->getDwarfRegNum(FramePtr, true), (CFAOffset - FramePtrOffsetInBlock))); 346 BuildMI(MBB, AfterPush, dl, TII.get(TargetOpcode::CFI_INSTRUCTION)) 347 .addCFIIndex(CFIIndex) 348 .setMIFlags(MachineInstr::FrameSetup); 349 } else { 350 unsigned CFIIndex = 351 MF.addFrameInst(MCCFIInstruction::createDefCfaRegister( 352 nullptr, MRI->getDwarfRegNum(FramePtr, true))); 353 BuildMI(MBB, AfterPush, dl, TII.get(TargetOpcode::CFI_INSTRUCTION)) 354 .addCFIIndex(CFIIndex) 355 .setMIFlags(MachineInstr::FrameSetup); 356 } 357 if (NumBytes > 508) 358 // If offset is > 508 then sp cannot be adjusted in a single instruction, 359 // try restoring from fp instead. 360 AFI->setShouldRestoreSPFromFP(true); 361 } 362 363 // Emit call frame information for the callee-saved low registers. 364 if (GPRCS1Size > 0) { 365 MachineBasicBlock::iterator Pos = std::next(GPRCS1Push); 366 if (adjustedGPRCS1Size) { 367 unsigned CFIIndex = 368 MF.addFrameInst(MCCFIInstruction::cfiDefCfaOffset(nullptr, CFAOffset)); 369 BuildMI(MBB, Pos, dl, TII.get(TargetOpcode::CFI_INSTRUCTION)) 370 .addCFIIndex(CFIIndex) 371 .setMIFlags(MachineInstr::FrameSetup); 372 } 373 for (const CalleeSavedInfo &I : CSI) { 374 Register Reg = I.getReg(); 375 int FI = I.getFrameIdx(); 376 switch (Reg) { 377 case ARM::R8: 378 case ARM::R9: 379 case ARM::R10: 380 case ARM::R11: 381 case ARM::R12: 382 break; 383 case ARM::R0: 384 case ARM::R1: 385 case ARM::R2: 386 case ARM::R3: 387 case ARM::R4: 388 case ARM::R5: 389 case ARM::R6: 390 case ARM::R7: 391 case ARM::LR: 392 unsigned CFIIndex = MF.addFrameInst(MCCFIInstruction::createOffset( 393 nullptr, MRI->getDwarfRegNum(Reg, true), MFI.getObjectOffset(FI))); 394 BuildMI(MBB, Pos, dl, TII.get(TargetOpcode::CFI_INSTRUCTION)) 395 .addCFIIndex(CFIIndex) 396 .setMIFlags(MachineInstr::FrameSetup); 397 break; 398 } 399 } 400 } 401 402 // Emit call frame information for the callee-saved high registers. 403 if (GPRCS2Size > 0) { 404 MachineBasicBlock::iterator Pos = std::next(GPRCS2Push); 405 for (auto &I : CSI) { 406 Register Reg = I.getReg(); 407 int FI = I.getFrameIdx(); 408 switch (Reg) { 409 case ARM::R8: 410 case ARM::R9: 411 case ARM::R10: 412 case ARM::R11: 413 case ARM::R12: { 414 unsigned CFIIndex = MF.addFrameInst(MCCFIInstruction::createOffset( 415 nullptr, MRI->getDwarfRegNum(Reg, true), MFI.getObjectOffset(FI))); 416 BuildMI(MBB, Pos, dl, TII.get(TargetOpcode::CFI_INSTRUCTION)) 417 .addCFIIndex(CFIIndex) 418 .setMIFlags(MachineInstr::FrameSetup); 419 break; 420 } 421 default: 422 break; 423 } 424 } 425 } 426 427 if (NumBytes) { 428 // Insert it after all the callee-save spills. 429 // 430 // For a large stack frame, we might need a scratch register to store 431 // the size of the frame. We know all callee-save registers are free 432 // at this point in the prologue, so pick one. 433 unsigned ScratchRegister = ARM::NoRegister; 434 for (auto &I : CSI) { 435 Register Reg = I.getReg(); 436 if (isARMLowRegister(Reg) && !(HasFP && Reg == FramePtr)) { 437 ScratchRegister = Reg; 438 break; 439 } 440 } 441 emitPrologueEpilogueSPUpdate(MBB, MBBI, TII, dl, *RegInfo, -NumBytes, 442 ScratchRegister, MachineInstr::FrameSetup); 443 if (!HasFP) { 444 CFAOffset += NumBytes; 445 unsigned CFIIndex = MF.addFrameInst( 446 MCCFIInstruction::cfiDefCfaOffset(nullptr, CFAOffset)); 447 BuildMI(MBB, MBBI, dl, TII.get(TargetOpcode::CFI_INSTRUCTION)) 448 .addCFIIndex(CFIIndex) 449 .setMIFlags(MachineInstr::FrameSetup); 450 } 451 } 452 453 if (STI.isTargetELF() && HasFP) 454 MFI.setOffsetAdjustment(MFI.getOffsetAdjustment() - 455 AFI->getFramePtrSpillOffset()); 456 457 AFI->setGPRCalleeSavedArea1Size(GPRCS1Size); 458 AFI->setGPRCalleeSavedArea2Size(GPRCS2Size); 459 AFI->setDPRCalleeSavedArea1Size(DPRCSSize); 460 461 if (RegInfo->hasStackRealignment(MF)) { 462 const unsigned NrBitsToZero = Log2(MFI.getMaxAlign()); 463 // Emit the following sequence, using R4 as a temporary, since we cannot use 464 // SP as a source or destination register for the shifts: 465 // mov r4, sp 466 // lsrs r4, r4, #NrBitsToZero 467 // lsls r4, r4, #NrBitsToZero 468 // mov sp, r4 469 BuildMI(MBB, MBBI, dl, TII.get(ARM::tMOVr), ARM::R4) 470 .addReg(ARM::SP, RegState::Kill) 471 .add(predOps(ARMCC::AL)); 472 473 BuildMI(MBB, MBBI, dl, TII.get(ARM::tLSRri), ARM::R4) 474 .addDef(ARM::CPSR) 475 .addReg(ARM::R4, RegState::Kill) 476 .addImm(NrBitsToZero) 477 .add(predOps(ARMCC::AL)); 478 479 BuildMI(MBB, MBBI, dl, TII.get(ARM::tLSLri), ARM::R4) 480 .addDef(ARM::CPSR) 481 .addReg(ARM::R4, RegState::Kill) 482 .addImm(NrBitsToZero) 483 .add(predOps(ARMCC::AL)); 484 485 BuildMI(MBB, MBBI, dl, TII.get(ARM::tMOVr), ARM::SP) 486 .addReg(ARM::R4, RegState::Kill) 487 .add(predOps(ARMCC::AL)); 488 489 AFI->setShouldRestoreSPFromFP(true); 490 } 491 492 // If we need a base pointer, set it up here. It's whatever the value 493 // of the stack pointer is at this point. Any variable size objects 494 // will be allocated after this, so we can still use the base pointer 495 // to reference locals. 496 if (RegInfo->hasBasePointer(MF)) 497 BuildMI(MBB, MBBI, dl, TII.get(ARM::tMOVr), BasePtr) 498 .addReg(ARM::SP) 499 .add(predOps(ARMCC::AL)); 500 501 // If the frame has variable sized objects then the epilogue must restore 502 // the sp from fp. We can assume there's an FP here since hasFP already 503 // checks for hasVarSizedObjects. 504 if (MFI.hasVarSizedObjects()) 505 AFI->setShouldRestoreSPFromFP(true); 506 507 // In some cases, virtual registers have been introduced, e.g. by uses of 508 // emitThumbRegPlusImmInReg. 509 MF.getProperties().reset(MachineFunctionProperties::Property::NoVRegs); 510 } 511 512 void Thumb1FrameLowering::emitEpilogue(MachineFunction &MF, 513 MachineBasicBlock &MBB) const { 514 MachineBasicBlock::iterator MBBI = MBB.getFirstTerminator(); 515 DebugLoc dl = MBBI != MBB.end() ? MBBI->getDebugLoc() : DebugLoc(); 516 MachineFrameInfo &MFI = MF.getFrameInfo(); 517 ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>(); 518 const ThumbRegisterInfo *RegInfo = 519 static_cast<const ThumbRegisterInfo *>(STI.getRegisterInfo()); 520 const Thumb1InstrInfo &TII = 521 *static_cast<const Thumb1InstrInfo *>(STI.getInstrInfo()); 522 523 unsigned ArgRegsSaveSize = AFI->getArgRegsSaveSize(); 524 int NumBytes = (int)MFI.getStackSize(); 525 assert((unsigned)NumBytes >= ArgRegsSaveSize && 526 "ArgRegsSaveSize is included in NumBytes"); 527 Register FramePtr = RegInfo->getFrameRegister(MF); 528 529 if (!AFI->hasStackFrame()) { 530 if (NumBytes - ArgRegsSaveSize != 0) 531 emitPrologueEpilogueSPUpdate(MBB, MBBI, TII, dl, *RegInfo, 532 NumBytes - ArgRegsSaveSize, ARM::NoRegister, 533 MachineInstr::FrameDestroy); 534 } else { 535 // Unwind MBBI to point to first LDR / VLDRD. 536 if (MBBI != MBB.begin()) { 537 do 538 --MBBI; 539 while (MBBI != MBB.begin() && MBBI->getFlag(MachineInstr::FrameDestroy)); 540 if (!MBBI->getFlag(MachineInstr::FrameDestroy)) 541 ++MBBI; 542 } 543 544 // Move SP to start of FP callee save spill area. 545 NumBytes -= 546 (AFI->getFrameRecordSavedAreaSize() + 547 AFI->getGPRCalleeSavedArea1Size() + AFI->getGPRCalleeSavedArea2Size() + 548 AFI->getDPRCalleeSavedArea1Size() + ArgRegsSaveSize); 549 550 // We are likely to need a scratch register and we know all callee-save 551 // registers are free at this point in the epilogue, so pick one. 552 unsigned ScratchRegister = ARM::NoRegister; 553 bool HasFP = hasFP(MF); 554 for (auto &I : MFI.getCalleeSavedInfo()) { 555 Register Reg = I.getReg(); 556 if (isARMLowRegister(Reg) && !(HasFP && Reg == FramePtr)) { 557 ScratchRegister = Reg; 558 break; 559 } 560 } 561 562 if (AFI->shouldRestoreSPFromFP()) { 563 NumBytes = AFI->getFramePtrSpillOffset() - NumBytes; 564 // Reset SP based on frame pointer only if the stack frame extends beyond 565 // frame pointer stack slot, the target is ELF and the function has FP, or 566 // the target uses var sized objects. 567 if (NumBytes) { 568 assert(ScratchRegister != ARM::NoRegister && 569 "No scratch register to restore SP from FP!"); 570 emitThumbRegPlusImmediate(MBB, MBBI, dl, ScratchRegister, FramePtr, -NumBytes, 571 TII, *RegInfo, MachineInstr::FrameDestroy); 572 BuildMI(MBB, MBBI, dl, TII.get(ARM::tMOVr), ARM::SP) 573 .addReg(ScratchRegister) 574 .add(predOps(ARMCC::AL)) 575 .setMIFlag(MachineInstr::FrameDestroy); 576 } else 577 BuildMI(MBB, MBBI, dl, TII.get(ARM::tMOVr), ARM::SP) 578 .addReg(FramePtr) 579 .add(predOps(ARMCC::AL)) 580 .setMIFlag(MachineInstr::FrameDestroy); 581 } else { 582 if (MBBI != MBB.end() && MBBI->getOpcode() == ARM::tBX_RET && 583 &MBB.front() != &*MBBI && std::prev(MBBI)->getOpcode() == ARM::tPOP) { 584 MachineBasicBlock::iterator PMBBI = std::prev(MBBI); 585 if (!tryFoldSPUpdateIntoPushPop(STI, MF, &*PMBBI, NumBytes)) 586 emitPrologueEpilogueSPUpdate(MBB, PMBBI, TII, dl, *RegInfo, NumBytes, 587 ScratchRegister, MachineInstr::FrameDestroy); 588 } else if (!tryFoldSPUpdateIntoPushPop(STI, MF, &*MBBI, NumBytes)) 589 emitPrologueEpilogueSPUpdate(MBB, MBBI, TII, dl, *RegInfo, NumBytes, 590 ScratchRegister, MachineInstr::FrameDestroy); 591 } 592 } 593 594 if (needPopSpecialFixUp(MF)) { 595 bool Done = emitPopSpecialFixUp(MBB, /* DoIt */ true); 596 (void)Done; 597 assert(Done && "Emission of the special fixup failed!?"); 598 } 599 } 600 601 bool Thumb1FrameLowering::canUseAsEpilogue(const MachineBasicBlock &MBB) const { 602 if (!needPopSpecialFixUp(*MBB.getParent())) 603 return true; 604 605 MachineBasicBlock *TmpMBB = const_cast<MachineBasicBlock *>(&MBB); 606 return emitPopSpecialFixUp(*TmpMBB, /* DoIt */ false); 607 } 608 609 bool Thumb1FrameLowering::needPopSpecialFixUp(const MachineFunction &MF) const { 610 ARMFunctionInfo *AFI = 611 const_cast<MachineFunction *>(&MF)->getInfo<ARMFunctionInfo>(); 612 if (AFI->getArgRegsSaveSize()) 613 return true; 614 615 // LR cannot be encoded with Thumb1, i.e., it requires a special fix-up. 616 for (const CalleeSavedInfo &CSI : MF.getFrameInfo().getCalleeSavedInfo()) 617 if (CSI.getReg() == ARM::LR) 618 return true; 619 620 return false; 621 } 622 623 static void findTemporariesForLR(const BitVector &GPRsNoLRSP, 624 const BitVector &PopFriendly, 625 const LiveRegUnits &UsedRegs, unsigned &PopReg, 626 unsigned &TmpReg, MachineRegisterInfo &MRI) { 627 PopReg = TmpReg = 0; 628 for (auto Reg : GPRsNoLRSP.set_bits()) { 629 if (UsedRegs.available(Reg)) { 630 // Remember the first pop-friendly register and exit. 631 if (PopFriendly.test(Reg)) { 632 PopReg = Reg; 633 TmpReg = 0; 634 break; 635 } 636 // Otherwise, remember that the register will be available to 637 // save a pop-friendly register. 638 TmpReg = Reg; 639 } 640 } 641 } 642 643 bool Thumb1FrameLowering::emitPopSpecialFixUp(MachineBasicBlock &MBB, 644 bool DoIt) const { 645 MachineFunction &MF = *MBB.getParent(); 646 ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>(); 647 unsigned ArgRegsSaveSize = AFI->getArgRegsSaveSize(); 648 const TargetInstrInfo &TII = *STI.getInstrInfo(); 649 const ThumbRegisterInfo *RegInfo = 650 static_cast<const ThumbRegisterInfo *>(STI.getRegisterInfo()); 651 652 // If MBBI is a return instruction, or is a tPOP followed by a return 653 // instruction in the successor BB, we may be able to directly restore 654 // LR in the PC. 655 // This is only possible with v5T ops (v4T can't change the Thumb bit via 656 // a POP PC instruction), and only if we do not need to emit any SP update. 657 // Otherwise, we need a temporary register to pop the value 658 // and copy that value into LR. 659 auto MBBI = MBB.getFirstTerminator(); 660 bool CanRestoreDirectly = STI.hasV5TOps() && !ArgRegsSaveSize; 661 if (CanRestoreDirectly) { 662 if (MBBI != MBB.end() && MBBI->getOpcode() != ARM::tB) 663 CanRestoreDirectly = (MBBI->getOpcode() == ARM::tBX_RET || 664 MBBI->getOpcode() == ARM::tPOP_RET); 665 else { 666 auto MBBI_prev = MBBI; 667 MBBI_prev--; 668 assert(MBBI_prev->getOpcode() == ARM::tPOP); 669 assert(MBB.succ_size() == 1); 670 if ((*MBB.succ_begin())->begin()->getOpcode() == ARM::tBX_RET) 671 MBBI = MBBI_prev; // Replace the final tPOP with a tPOP_RET. 672 else 673 CanRestoreDirectly = false; 674 } 675 } 676 677 if (CanRestoreDirectly) { 678 if (!DoIt || MBBI->getOpcode() == ARM::tPOP_RET) 679 return true; 680 MachineInstrBuilder MIB = 681 BuildMI(MBB, MBBI, MBBI->getDebugLoc(), TII.get(ARM::tPOP_RET)) 682 .add(predOps(ARMCC::AL)) 683 .setMIFlag(MachineInstr::FrameDestroy); 684 // Copy implicit ops and popped registers, if any. 685 for (auto MO: MBBI->operands()) 686 if (MO.isReg() && (MO.isImplicit() || MO.isDef())) 687 MIB.add(MO); 688 MIB.addReg(ARM::PC, RegState::Define); 689 // Erase the old instruction (tBX_RET or tPOP). 690 MBB.erase(MBBI); 691 return true; 692 } 693 694 // Look for a temporary register to use. 695 // First, compute the liveness information. 696 const TargetRegisterInfo &TRI = *STI.getRegisterInfo(); 697 LiveRegUnits UsedRegs(TRI); 698 UsedRegs.addLiveOuts(MBB); 699 // The semantic of pristines changed recently and now, 700 // the callee-saved registers that are touched in the function 701 // are not part of the pristines set anymore. 702 // Add those callee-saved now. 703 const MCPhysReg *CSRegs = TRI.getCalleeSavedRegs(&MF); 704 for (unsigned i = 0; CSRegs[i]; ++i) 705 UsedRegs.addReg(CSRegs[i]); 706 707 DebugLoc dl = DebugLoc(); 708 if (MBBI != MBB.end()) { 709 dl = MBBI->getDebugLoc(); 710 auto InstUpToMBBI = MBB.end(); 711 while (InstUpToMBBI != MBBI) 712 // The pre-decrement is on purpose here. 713 // We want to have the liveness right before MBBI. 714 UsedRegs.stepBackward(*--InstUpToMBBI); 715 } 716 717 // Look for a register that can be directly use in the POP. 718 unsigned PopReg = 0; 719 // And some temporary register, just in case. 720 unsigned TemporaryReg = 0; 721 BitVector PopFriendly = 722 TRI.getAllocatableSet(MF, TRI.getRegClass(ARM::tGPRRegClassID)); 723 724 assert(PopFriendly.any() && "No allocatable pop-friendly register?!"); 725 // Rebuild the GPRs from the high registers because they are removed 726 // form the GPR reg class for thumb1. 727 BitVector GPRsNoLRSP = 728 TRI.getAllocatableSet(MF, TRI.getRegClass(ARM::hGPRRegClassID)); 729 GPRsNoLRSP |= PopFriendly; 730 GPRsNoLRSP.reset(ARM::LR); 731 GPRsNoLRSP.reset(ARM::SP); 732 GPRsNoLRSP.reset(ARM::PC); 733 findTemporariesForLR(GPRsNoLRSP, PopFriendly, UsedRegs, PopReg, TemporaryReg, 734 MF.getRegInfo()); 735 736 // If we couldn't find a pop-friendly register, try restoring LR before 737 // popping the other callee-saved registers, so we could use one of them as a 738 // temporary. 739 bool UseLDRSP = false; 740 if (!PopReg && MBBI != MBB.begin()) { 741 auto PrevMBBI = MBBI; 742 PrevMBBI--; 743 if (PrevMBBI->getOpcode() == ARM::tPOP) { 744 UsedRegs.stepBackward(*PrevMBBI); 745 findTemporariesForLR(GPRsNoLRSP, PopFriendly, UsedRegs, PopReg, 746 TemporaryReg, MF.getRegInfo()); 747 if (PopReg) { 748 MBBI = PrevMBBI; 749 UseLDRSP = true; 750 } 751 } 752 } 753 754 if (!DoIt && !PopReg && !TemporaryReg) 755 return false; 756 757 assert((PopReg || TemporaryReg) && "Cannot get LR"); 758 759 if (UseLDRSP) { 760 assert(PopReg && "Do not know how to get LR"); 761 // Load the LR via LDR tmp, [SP, #off] 762 BuildMI(MBB, MBBI, dl, TII.get(ARM::tLDRspi)) 763 .addReg(PopReg, RegState::Define) 764 .addReg(ARM::SP) 765 .addImm(MBBI->getNumExplicitOperands() - 2) 766 .add(predOps(ARMCC::AL)) 767 .setMIFlag(MachineInstr::FrameDestroy); 768 // Move from the temporary register to the LR. 769 BuildMI(MBB, MBBI, dl, TII.get(ARM::tMOVr)) 770 .addReg(ARM::LR, RegState::Define) 771 .addReg(PopReg, RegState::Kill) 772 .add(predOps(ARMCC::AL)) 773 .setMIFlag(MachineInstr::FrameDestroy); 774 // Advance past the pop instruction. 775 MBBI++; 776 // Increment the SP. 777 emitPrologueEpilogueSPUpdate(MBB, MBBI, TII, dl, *RegInfo, 778 ArgRegsSaveSize + 4, ARM::NoRegister, 779 MachineInstr::FrameDestroy); 780 return true; 781 } 782 783 if (TemporaryReg) { 784 assert(!PopReg && "Unnecessary MOV is about to be inserted"); 785 PopReg = PopFriendly.find_first(); 786 BuildMI(MBB, MBBI, dl, TII.get(ARM::tMOVr)) 787 .addReg(TemporaryReg, RegState::Define) 788 .addReg(PopReg, RegState::Kill) 789 .add(predOps(ARMCC::AL)) 790 .setMIFlag(MachineInstr::FrameDestroy); 791 } 792 793 if (MBBI != MBB.end() && MBBI->getOpcode() == ARM::tPOP_RET) { 794 // We couldn't use the direct restoration above, so 795 // perform the opposite conversion: tPOP_RET to tPOP. 796 MachineInstrBuilder MIB = 797 BuildMI(MBB, MBBI, MBBI->getDebugLoc(), TII.get(ARM::tPOP)) 798 .add(predOps(ARMCC::AL)) 799 .setMIFlag(MachineInstr::FrameDestroy); 800 bool Popped = false; 801 for (auto MO: MBBI->operands()) 802 if (MO.isReg() && (MO.isImplicit() || MO.isDef()) && 803 MO.getReg() != ARM::PC) { 804 MIB.add(MO); 805 if (!MO.isImplicit()) 806 Popped = true; 807 } 808 // Is there anything left to pop? 809 if (!Popped) 810 MBB.erase(MIB.getInstr()); 811 // Erase the old instruction. 812 MBB.erase(MBBI); 813 MBBI = BuildMI(MBB, MBB.end(), dl, TII.get(ARM::tBX_RET)) 814 .add(predOps(ARMCC::AL)) 815 .setMIFlag(MachineInstr::FrameDestroy); 816 } 817 818 assert(PopReg && "Do not know how to get LR"); 819 BuildMI(MBB, MBBI, dl, TII.get(ARM::tPOP)) 820 .add(predOps(ARMCC::AL)) 821 .addReg(PopReg, RegState::Define) 822 .setMIFlag(MachineInstr::FrameDestroy); 823 824 emitPrologueEpilogueSPUpdate(MBB, MBBI, TII, dl, *RegInfo, ArgRegsSaveSize, 825 ARM::NoRegister, MachineInstr::FrameDestroy); 826 827 BuildMI(MBB, MBBI, dl, TII.get(ARM::tMOVr)) 828 .addReg(ARM::LR, RegState::Define) 829 .addReg(PopReg, RegState::Kill) 830 .add(predOps(ARMCC::AL)) 831 .setMIFlag(MachineInstr::FrameDestroy); 832 833 if (TemporaryReg) 834 BuildMI(MBB, MBBI, dl, TII.get(ARM::tMOVr)) 835 .addReg(PopReg, RegState::Define) 836 .addReg(TemporaryReg, RegState::Kill) 837 .add(predOps(ARMCC::AL)) 838 .setMIFlag(MachineInstr::FrameDestroy); 839 840 return true; 841 } 842 843 static const SmallVector<Register> OrderedLowRegs = {ARM::R4, ARM::R5, ARM::R6, 844 ARM::R7, ARM::LR}; 845 static const SmallVector<Register> OrderedHighRegs = {ARM::R8, ARM::R9, 846 ARM::R10, ARM::R11}; 847 static const SmallVector<Register> OrderedCopyRegs = { 848 ARM::R0, ARM::R1, ARM::R2, ARM::R3, ARM::R4, 849 ARM::R5, ARM::R6, ARM::R7, ARM::LR}; 850 851 static void splitLowAndHighRegs(const std::set<Register> &Regs, 852 std::set<Register> &LowRegs, 853 std::set<Register> &HighRegs) { 854 for (Register Reg : Regs) { 855 if (ARM::tGPRRegClass.contains(Reg) || Reg == ARM::LR) { 856 LowRegs.insert(Reg); 857 } else if (ARM::hGPRRegClass.contains(Reg) && Reg != ARM::LR) { 858 HighRegs.insert(Reg); 859 } else { 860 llvm_unreachable("callee-saved register of unexpected class"); 861 } 862 } 863 } 864 865 template <typename It> 866 It getNextOrderedReg(It OrderedStartIt, It OrderedEndIt, 867 const std::set<Register> &RegSet) { 868 return std::find_if(OrderedStartIt, OrderedEndIt, 869 [&](Register Reg) { return RegSet.count(Reg); }); 870 } 871 872 static void pushRegsToStack(MachineBasicBlock &MBB, 873 MachineBasicBlock::iterator MI, 874 const TargetInstrInfo &TII, 875 const std::set<Register> &RegsToSave, 876 const std::set<Register> &CopyRegs, 877 bool &UsedLRAsTemp) { 878 MachineFunction &MF = *MBB.getParent(); 879 const MachineRegisterInfo &MRI = MF.getRegInfo(); 880 DebugLoc DL; 881 882 std::set<Register> LowRegs, HighRegs; 883 splitLowAndHighRegs(RegsToSave, LowRegs, HighRegs); 884 885 // Push low regs first 886 if (!LowRegs.empty()) { 887 MachineInstrBuilder MIB = 888 BuildMI(MBB, MI, DL, TII.get(ARM::tPUSH)).add(predOps(ARMCC::AL)); 889 for (unsigned Reg : OrderedLowRegs) { 890 if (LowRegs.count(Reg)) { 891 bool isKill = !MRI.isLiveIn(Reg); 892 if (isKill && !MRI.isReserved(Reg)) 893 MBB.addLiveIn(Reg); 894 895 MIB.addReg(Reg, getKillRegState(isKill)); 896 } 897 } 898 MIB.setMIFlags(MachineInstr::FrameSetup); 899 } 900 901 // Now push the high registers 902 // There are no store instructions that can access high registers directly, 903 // so we have to move them to low registers, and push them. 904 // This might take multiple pushes, as it is possible for there to 905 // be fewer low registers available than high registers which need saving. 906 907 // Find the first register to save. 908 // Registers must be processed in reverse order so that in case we need to use 909 // multiple PUSH instructions, the order of the registers on the stack still 910 // matches the unwind info. They need to be swicthed back to ascending order 911 // before adding to the PUSH instruction. 912 auto HiRegToSave = getNextOrderedReg(OrderedHighRegs.rbegin(), 913 OrderedHighRegs.rend(), 914 HighRegs); 915 916 while (HiRegToSave != OrderedHighRegs.rend()) { 917 // Find the first low register to use. 918 auto CopyRegIt = getNextOrderedReg(OrderedCopyRegs.rbegin(), 919 OrderedCopyRegs.rend(), 920 CopyRegs); 921 922 // Create the PUSH, but don't insert it yet (the MOVs need to come first). 923 MachineInstrBuilder PushMIB = BuildMI(MF, DL, TII.get(ARM::tPUSH)) 924 .add(predOps(ARMCC::AL)) 925 .setMIFlags(MachineInstr::FrameSetup); 926 927 SmallVector<unsigned, 4> RegsToPush; 928 while (HiRegToSave != OrderedHighRegs.rend() && 929 CopyRegIt != OrderedCopyRegs.rend()) { 930 if (HighRegs.count(*HiRegToSave)) { 931 bool isKill = !MRI.isLiveIn(*HiRegToSave); 932 if (isKill && !MRI.isReserved(*HiRegToSave)) 933 MBB.addLiveIn(*HiRegToSave); 934 if (*CopyRegIt == ARM::LR) 935 UsedLRAsTemp = true; 936 937 // Emit a MOV from the high reg to the low reg. 938 BuildMI(MBB, MI, DL, TII.get(ARM::tMOVr)) 939 .addReg(*CopyRegIt, RegState::Define) 940 .addReg(*HiRegToSave, getKillRegState(isKill)) 941 .add(predOps(ARMCC::AL)) 942 .setMIFlags(MachineInstr::FrameSetup); 943 944 // Record the register that must be added to the PUSH. 945 RegsToPush.push_back(*CopyRegIt); 946 947 CopyRegIt = getNextOrderedReg(std::next(CopyRegIt), 948 OrderedCopyRegs.rend(), 949 CopyRegs); 950 HiRegToSave = getNextOrderedReg(std::next(HiRegToSave), 951 OrderedHighRegs.rend(), 952 HighRegs); 953 } 954 } 955 956 // Add the low registers to the PUSH, in ascending order. 957 for (unsigned Reg : llvm::reverse(RegsToPush)) 958 PushMIB.addReg(Reg, RegState::Kill); 959 960 // Insert the PUSH instruction after the MOVs. 961 MBB.insert(MI, PushMIB); 962 } 963 } 964 965 static void popRegsFromStack(MachineBasicBlock &MBB, 966 MachineBasicBlock::iterator &MI, 967 const TargetInstrInfo &TII, 968 const std::set<Register> &RegsToRestore, 969 const std::set<Register> &AvailableCopyRegs, 970 bool IsVarArg, bool HasV5Ops) { 971 if (RegsToRestore.empty()) 972 return; 973 974 MachineFunction &MF = *MBB.getParent(); 975 ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>(); 976 DebugLoc DL = MI != MBB.end() ? MI->getDebugLoc() : DebugLoc(); 977 978 std::set<Register> LowRegs, HighRegs; 979 splitLowAndHighRegs(RegsToRestore, LowRegs, HighRegs); 980 981 // Pop the high registers first 982 // There are no store instructions that can access high registers directly, 983 // so we have to pop into low registers and them move to the high registers. 984 // This might take multiple pops, as it is possible for there to 985 // be fewer low registers available than high registers which need restoring. 986 987 // Find the first register to restore. 988 auto HiRegToRestore = getNextOrderedReg(OrderedHighRegs.begin(), 989 OrderedHighRegs.end(), 990 HighRegs); 991 992 std::set<Register> CopyRegs = AvailableCopyRegs; 993 Register LowScratchReg; 994 if (!HighRegs.empty() && CopyRegs.empty()) { 995 // No copy regs are available to pop high regs. Let's make use of a return 996 // register and the scratch register (IP/R12) to copy things around. 997 LowScratchReg = ARM::R0; 998 BuildMI(MBB, MI, DL, TII.get(ARM::tMOVr)) 999 .addReg(ARM::R12, RegState::Define) 1000 .addReg(LowScratchReg, RegState::Kill) 1001 .add(predOps(ARMCC::AL)) 1002 .setMIFlag(MachineInstr::FrameDestroy); 1003 CopyRegs.insert(LowScratchReg); 1004 } 1005 1006 while (HiRegToRestore != OrderedHighRegs.end()) { 1007 assert(!CopyRegs.empty()); 1008 // Find the first low register to use. 1009 auto CopyReg = getNextOrderedReg(OrderedCopyRegs.begin(), 1010 OrderedCopyRegs.end(), 1011 CopyRegs); 1012 1013 // Create the POP instruction. 1014 MachineInstrBuilder PopMIB = BuildMI(MBB, MI, DL, TII.get(ARM::tPOP)) 1015 .add(predOps(ARMCC::AL)) 1016 .setMIFlag(MachineInstr::FrameDestroy); 1017 1018 while (HiRegToRestore != OrderedHighRegs.end() && 1019 CopyReg != OrderedCopyRegs.end()) { 1020 // Add the low register to the POP. 1021 PopMIB.addReg(*CopyReg, RegState::Define); 1022 1023 // Create the MOV from low to high register. 1024 BuildMI(MBB, MI, DL, TII.get(ARM::tMOVr)) 1025 .addReg(*HiRegToRestore, RegState::Define) 1026 .addReg(*CopyReg, RegState::Kill) 1027 .add(predOps(ARMCC::AL)) 1028 .setMIFlag(MachineInstr::FrameDestroy); 1029 1030 CopyReg = getNextOrderedReg(std::next(CopyReg), 1031 OrderedCopyRegs.end(), 1032 CopyRegs); 1033 HiRegToRestore = getNextOrderedReg(std::next(HiRegToRestore), 1034 OrderedHighRegs.end(), 1035 HighRegs); 1036 } 1037 } 1038 1039 // Restore low register used as scratch if necessary 1040 if (LowScratchReg.isValid()) { 1041 BuildMI(MBB, MI, DL, TII.get(ARM::tMOVr)) 1042 .addReg(LowScratchReg, RegState::Define) 1043 .addReg(ARM::R12, RegState::Kill) 1044 .add(predOps(ARMCC::AL)) 1045 .setMIFlag(MachineInstr::FrameDestroy); 1046 } 1047 1048 // Now pop the low registers 1049 if (!LowRegs.empty()) { 1050 MachineInstrBuilder MIB = BuildMI(MF, DL, TII.get(ARM::tPOP)) 1051 .add(predOps(ARMCC::AL)) 1052 .setMIFlag(MachineInstr::FrameDestroy); 1053 1054 bool NeedsPop = false; 1055 for (Register Reg : OrderedLowRegs) { 1056 if (!LowRegs.count(Reg)) 1057 continue; 1058 1059 if (Reg == ARM::LR) { 1060 if (!MBB.succ_empty() || MI->getOpcode() == ARM::TCRETURNdi || 1061 MI->getOpcode() == ARM::TCRETURNri || 1062 MI->getOpcode() == ARM::TCRETURNrinotr12) 1063 // LR may only be popped into PC, as part of return sequence. 1064 // If this isn't the return sequence, we'll need emitPopSpecialFixUp 1065 // to restore LR the hard way. 1066 // FIXME: if we don't pass any stack arguments it would be actually 1067 // advantageous *and* correct to do the conversion to an ordinary call 1068 // instruction here. 1069 continue; 1070 // Special epilogue for vararg functions. See emitEpilogue 1071 if (IsVarArg) 1072 continue; 1073 // ARMv4T requires BX, see emitEpilogue 1074 if (!HasV5Ops) 1075 continue; 1076 1077 // CMSE entry functions must return via BXNS, see emitEpilogue. 1078 if (AFI->isCmseNSEntryFunction()) 1079 continue; 1080 1081 // Pop LR into PC. 1082 Reg = ARM::PC; 1083 (*MIB).setDesc(TII.get(ARM::tPOP_RET)); 1084 if (MI != MBB.end()) 1085 MIB.copyImplicitOps(*MI); 1086 MI = MBB.erase(MI); 1087 } 1088 MIB.addReg(Reg, getDefRegState(true)); 1089 NeedsPop = true; 1090 } 1091 1092 // It's illegal to emit pop instruction without operands. 1093 if (NeedsPop) 1094 MBB.insert(MI, &*MIB); 1095 else 1096 MF.deleteMachineInstr(MIB); 1097 } 1098 } 1099 1100 bool Thumb1FrameLowering::spillCalleeSavedRegisters( 1101 MachineBasicBlock &MBB, MachineBasicBlock::iterator MI, 1102 ArrayRef<CalleeSavedInfo> CSI, const TargetRegisterInfo *TRI) const { 1103 if (CSI.empty()) 1104 return false; 1105 1106 const TargetInstrInfo &TII = *STI.getInstrInfo(); 1107 MachineFunction &MF = *MBB.getParent(); 1108 const ARMBaseRegisterInfo *RegInfo = static_cast<const ARMBaseRegisterInfo *>( 1109 MF.getSubtarget().getRegisterInfo()); 1110 Register FPReg = RegInfo->getFrameRegister(MF); 1111 1112 // In case FP is a high reg, we need a separate push sequence to generate 1113 // a correct Frame Record 1114 bool NeedsFrameRecordPush = hasFP(MF) && ARM::hGPRRegClass.contains(FPReg); 1115 bool LRLiveIn = MF.getRegInfo().isLiveIn(ARM::LR); 1116 bool UsedLRAsTemp = false; 1117 1118 std::set<Register> FrameRecord; 1119 std::set<Register> SpilledGPRs; 1120 for (const CalleeSavedInfo &I : CSI) { 1121 Register Reg = I.getReg(); 1122 if (NeedsFrameRecordPush && (Reg == FPReg || Reg == ARM::LR)) 1123 FrameRecord.insert(Reg); 1124 else 1125 SpilledGPRs.insert(Reg); 1126 } 1127 1128 // Determine intermediate registers which can be used for pushing the frame 1129 // record: 1130 // - Unused argument registers 1131 // - LR: This is possible because the first PUSH will save it on the stack, 1132 // so it is free to be used as a temporary for the second. However, it 1133 // is possible for LR to be live-in to the function, in which case we 1134 // will need to restore it later in the prologue, so we only use this 1135 // if there are no free argument registers. 1136 std::set<Register> FrameRecordCopyRegs; 1137 for (unsigned ArgReg : {ARM::R0, ARM::R1, ARM::R2, ARM::R3}) 1138 if (!MF.getRegInfo().isLiveIn(ArgReg)) 1139 FrameRecordCopyRegs.insert(ArgReg); 1140 if (FrameRecordCopyRegs.empty()) 1141 FrameRecordCopyRegs.insert(ARM::LR); 1142 1143 pushRegsToStack(MBB, MI, TII, FrameRecord, FrameRecordCopyRegs, UsedLRAsTemp); 1144 1145 // Determine intermediate registers which can be used for pushing high regs: 1146 // - Spilled low regs 1147 // - Unused argument registers 1148 std::set<Register> CopyRegs; 1149 for (Register Reg : SpilledGPRs) 1150 if ((ARM::tGPRRegClass.contains(Reg) || Reg == ARM::LR) && 1151 !MF.getRegInfo().isLiveIn(Reg) && !(hasFP(MF) && Reg == FPReg)) 1152 CopyRegs.insert(Reg); 1153 for (unsigned ArgReg : {ARM::R0, ARM::R1, ARM::R2, ARM::R3}) 1154 if (!MF.getRegInfo().isLiveIn(ArgReg)) 1155 CopyRegs.insert(ArgReg); 1156 1157 pushRegsToStack(MBB, MI, TII, SpilledGPRs, CopyRegs, UsedLRAsTemp); 1158 1159 // If the push sequence used LR as a temporary, and LR is live-in (for 1160 // example because it is used by the llvm.returnaddress intrinsic), then we 1161 // need to reload it from the stack. Thumb1 does not have a load instruction 1162 // which can use LR, so we need to load into a temporary low register and 1163 // copy to LR. 1164 if (LRLiveIn && UsedLRAsTemp) { 1165 auto CopyRegIt = getNextOrderedReg(OrderedCopyRegs.rbegin(), 1166 OrderedCopyRegs.rend(), CopyRegs); 1167 assert(CopyRegIt != OrderedCopyRegs.rend()); 1168 unsigned NumRegsPushed = FrameRecord.size() + SpilledGPRs.size(); 1169 LLVM_DEBUG( 1170 dbgs() << "LR is live-in but clobbered in prologue, restoring via " 1171 << RegInfo->getName(*CopyRegIt) << "\n"); 1172 1173 BuildMI(MBB, MI, DebugLoc(), TII.get(ARM::tLDRspi), *CopyRegIt) 1174 .addReg(ARM::SP) 1175 .addImm(NumRegsPushed - 1) 1176 .add(predOps(ARMCC::AL)) 1177 .setMIFlags(MachineInstr::FrameSetup); 1178 1179 BuildMI(MBB, MI, DebugLoc(), TII.get(ARM::tMOVr), ARM::LR) 1180 .addReg(*CopyRegIt) 1181 .add(predOps(ARMCC::AL)) 1182 .setMIFlags(MachineInstr::FrameSetup); 1183 } 1184 1185 return true; 1186 } 1187 1188 bool Thumb1FrameLowering::restoreCalleeSavedRegisters( 1189 MachineBasicBlock &MBB, MachineBasicBlock::iterator MI, 1190 MutableArrayRef<CalleeSavedInfo> CSI, const TargetRegisterInfo *TRI) const { 1191 if (CSI.empty()) 1192 return false; 1193 1194 MachineFunction &MF = *MBB.getParent(); 1195 ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>(); 1196 const TargetInstrInfo &TII = *STI.getInstrInfo(); 1197 const ARMBaseRegisterInfo *RegInfo = static_cast<const ARMBaseRegisterInfo *>( 1198 MF.getSubtarget().getRegisterInfo()); 1199 bool IsVarArg = AFI->getArgRegsSaveSize() > 0; 1200 Register FPReg = RegInfo->getFrameRegister(MF); 1201 1202 // In case FP is a high reg, we need a separate pop sequence to generate 1203 // a correct Frame Record 1204 bool NeedsFrameRecordPop = hasFP(MF) && ARM::hGPRRegClass.contains(FPReg); 1205 1206 std::set<Register> FrameRecord; 1207 std::set<Register> SpilledGPRs; 1208 for (CalleeSavedInfo &I : CSI) { 1209 Register Reg = I.getReg(); 1210 if (NeedsFrameRecordPop && (Reg == FPReg || Reg == ARM::LR)) 1211 FrameRecord.insert(Reg); 1212 else 1213 SpilledGPRs.insert(Reg); 1214 1215 if (Reg == ARM::LR) 1216 I.setRestored(false); 1217 } 1218 1219 // Determine intermidiate registers which can be used for popping high regs: 1220 // - Spilled low regs 1221 // - Unused return registers 1222 std::set<Register> CopyRegs; 1223 std::set<Register> UnusedReturnRegs; 1224 for (Register Reg : SpilledGPRs) 1225 if ((ARM::tGPRRegClass.contains(Reg)) && !(hasFP(MF) && Reg == FPReg)) 1226 CopyRegs.insert(Reg); 1227 auto Terminator = MBB.getFirstTerminator(); 1228 if (Terminator != MBB.end() && Terminator->getOpcode() == ARM::tBX_RET) { 1229 UnusedReturnRegs.insert(ARM::R0); 1230 UnusedReturnRegs.insert(ARM::R1); 1231 UnusedReturnRegs.insert(ARM::R2); 1232 UnusedReturnRegs.insert(ARM::R3); 1233 for (auto Op : Terminator->implicit_operands()) { 1234 if (Op.isReg()) 1235 UnusedReturnRegs.erase(Op.getReg()); 1236 } 1237 } 1238 CopyRegs.insert(UnusedReturnRegs.begin(), UnusedReturnRegs.end()); 1239 1240 // First pop regular spilled regs. 1241 popRegsFromStack(MBB, MI, TII, SpilledGPRs, CopyRegs, IsVarArg, 1242 STI.hasV5TOps()); 1243 1244 // LR may only be popped into pc, as part of a return sequence. 1245 // Check that no other pop instructions are inserted after that. 1246 assert((!SpilledGPRs.count(ARM::LR) || FrameRecord.empty()) && 1247 "Can't insert pop after return sequence"); 1248 1249 // Now pop Frame Record regs. 1250 // Only unused return registers can be used as copy regs at this point. 1251 popRegsFromStack(MBB, MI, TII, FrameRecord, UnusedReturnRegs, IsVarArg, 1252 STI.hasV5TOps()); 1253 1254 return true; 1255 } 1256