1 //=======- ARMFrameLowering.cpp - ARM Frame Information --------*- C++ -*-====// 2 // 3 // The LLVM Compiler Infrastructure 4 // 5 // This file is distributed under the University of Illinois Open Source 6 // License. See LICENSE.TXT for details. 7 // 8 //===----------------------------------------------------------------------===// 9 // 10 // This file contains the ARM implementation of TargetFrameLowering class. 11 // 12 //===----------------------------------------------------------------------===// 13 14 #include "ARMFrameLowering.h" 15 #include "ARMAddressingModes.h" 16 #include "ARMBaseInstrInfo.h" 17 #include "ARMMachineFunctionInfo.h" 18 #include "llvm/CodeGen/MachineFrameInfo.h" 19 #include "llvm/CodeGen/MachineFunction.h" 20 #include "llvm/CodeGen/MachineInstrBuilder.h" 21 #include "llvm/CodeGen/MachineRegisterInfo.h" 22 #include "llvm/CodeGen/RegisterScavenging.h" 23 #include "llvm/Target/TargetOptions.h" 24 25 using namespace llvm; 26 27 /// hasFP - Return true if the specified function should have a dedicated frame 28 /// pointer register. This is true if the function has variable sized allocas 29 /// or if frame pointer elimination is disabled. 30 /// 31 bool ARMFrameLowering::hasFP(const MachineFunction &MF) const { 32 const TargetRegisterInfo *RegInfo = MF.getTarget().getRegisterInfo(); 33 34 // Mac OS X requires FP not to be clobbered for backtracing purpose. 35 if (STI.isTargetDarwin()) 36 return true; 37 38 const MachineFrameInfo *MFI = MF.getFrameInfo(); 39 // Always eliminate non-leaf frame pointers. 40 return ((DisableFramePointerElim(MF) && MFI->hasCalls()) || 41 RegInfo->needsStackRealignment(MF) || 42 MFI->hasVarSizedObjects() || 43 MFI->isFrameAddressTaken()); 44 } 45 46 // hasReservedCallFrame - Under normal circumstances, when a frame pointer is 47 // not required, we reserve argument space for call sites in the function 48 // immediately on entry to the current function. This eliminates the need for 49 // add/sub sp brackets around call sites. Returns true if the call frame is 50 // included as part of the stack frame. 51 bool ARMFrameLowering::hasReservedCallFrame(const MachineFunction &MF) const { 52 const MachineFrameInfo *FFI = MF.getFrameInfo(); 53 unsigned CFSize = FFI->getMaxCallFrameSize(); 54 // It's not always a good idea to include the call frame as part of the 55 // stack frame. ARM (especially Thumb) has small immediate offset to 56 // address the stack frame. So a large call frame can cause poor codegen 57 // and may even makes it impossible to scavenge a register. 58 if (CFSize >= ((1 << 12) - 1) / 2) // Half of imm12 59 return false; 60 61 return !MF.getFrameInfo()->hasVarSizedObjects(); 62 } 63 64 // canSimplifyCallFramePseudos - If there is a reserved call frame, the 65 // call frame pseudos can be simplified. Unlike most targets, having a FP 66 // is not sufficient here since we still may reference some objects via SP 67 // even when FP is available in Thumb2 mode. 68 bool ARMFrameLowering::canSimplifyCallFramePseudos(const MachineFunction &MF)const { 69 return hasReservedCallFrame(MF) || MF.getFrameInfo()->hasVarSizedObjects(); 70 } 71 72 static bool isCalleeSavedRegister(unsigned Reg, const unsigned *CSRegs) { 73 for (unsigned i = 0; CSRegs[i]; ++i) 74 if (Reg == CSRegs[i]) 75 return true; 76 return false; 77 } 78 79 static bool isCSRestore(MachineInstr *MI, 80 const ARMBaseInstrInfo &TII, 81 const unsigned *CSRegs) { 82 // Integer spill area is handled with "pop". 83 if (MI->getOpcode() == ARM::LDMIA_RET || 84 MI->getOpcode() == ARM::t2LDMIA_RET || 85 MI->getOpcode() == ARM::LDMIA_UPD || 86 MI->getOpcode() == ARM::t2LDMIA_UPD || 87 MI->getOpcode() == ARM::VLDMDIA_UPD) { 88 // The first two operands are predicates. The last two are 89 // imp-def and imp-use of SP. Check everything in between. 90 for (int i = 5, e = MI->getNumOperands(); i != e; ++i) 91 if (!isCalleeSavedRegister(MI->getOperand(i).getReg(), CSRegs)) 92 return false; 93 return true; 94 } 95 if ((MI->getOpcode() == ARM::LDR_POST || 96 MI->getOpcode() == ARM::t2LDR_POST) && 97 isCalleeSavedRegister(MI->getOperand(0).getReg(), CSRegs) && 98 MI->getOperand(1).getReg() == ARM::SP) 99 return true; 100 101 return false; 102 } 103 104 static void 105 emitSPUpdate(bool isARM, 106 MachineBasicBlock &MBB, MachineBasicBlock::iterator &MBBI, 107 DebugLoc dl, const ARMBaseInstrInfo &TII, 108 int NumBytes, 109 ARMCC::CondCodes Pred = ARMCC::AL, unsigned PredReg = 0) { 110 if (isARM) 111 emitARMRegPlusImmediate(MBB, MBBI, dl, ARM::SP, ARM::SP, NumBytes, 112 Pred, PredReg, TII); 113 else 114 emitT2RegPlusImmediate(MBB, MBBI, dl, ARM::SP, ARM::SP, NumBytes, 115 Pred, PredReg, TII); 116 } 117 118 void ARMFrameLowering::emitPrologue(MachineFunction &MF) const { 119 MachineBasicBlock &MBB = MF.front(); 120 MachineBasicBlock::iterator MBBI = MBB.begin(); 121 MachineFrameInfo *MFI = MF.getFrameInfo(); 122 ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>(); 123 const ARMBaseRegisterInfo *RegInfo = 124 static_cast<const ARMBaseRegisterInfo*>(MF.getTarget().getRegisterInfo()); 125 const ARMBaseInstrInfo &TII = 126 *static_cast<const ARMBaseInstrInfo*>(MF.getTarget().getInstrInfo()); 127 assert(!AFI->isThumb1OnlyFunction() && 128 "This emitPrologue does not support Thumb1!"); 129 bool isARM = !AFI->isThumbFunction(); 130 unsigned VARegSaveSize = AFI->getVarArgsRegSaveSize(); 131 unsigned NumBytes = MFI->getStackSize(); 132 const std::vector<CalleeSavedInfo> &CSI = MFI->getCalleeSavedInfo(); 133 DebugLoc dl = MBBI != MBB.end() ? MBBI->getDebugLoc() : DebugLoc(); 134 unsigned FramePtr = RegInfo->getFrameRegister(MF); 135 136 // Determine the sizes of each callee-save spill areas and record which frame 137 // belongs to which callee-save spill areas. 138 unsigned GPRCS1Size = 0, GPRCS2Size = 0, DPRCSSize = 0; 139 int FramePtrSpillFI = 0; 140 141 // Allocate the vararg register save area. This is not counted in NumBytes. 142 if (VARegSaveSize) 143 emitSPUpdate(isARM, MBB, MBBI, dl, TII, -VARegSaveSize); 144 145 if (!AFI->hasStackFrame()) { 146 if (NumBytes != 0) 147 emitSPUpdate(isARM, MBB, MBBI, dl, TII, -NumBytes); 148 return; 149 } 150 151 for (unsigned i = 0, e = CSI.size(); i != e; ++i) { 152 unsigned Reg = CSI[i].getReg(); 153 int FI = CSI[i].getFrameIdx(); 154 switch (Reg) { 155 case ARM::R4: 156 case ARM::R5: 157 case ARM::R6: 158 case ARM::R7: 159 case ARM::LR: 160 if (Reg == FramePtr) 161 FramePtrSpillFI = FI; 162 AFI->addGPRCalleeSavedArea1Frame(FI); 163 GPRCS1Size += 4; 164 break; 165 case ARM::R8: 166 case ARM::R9: 167 case ARM::R10: 168 case ARM::R11: 169 if (Reg == FramePtr) 170 FramePtrSpillFI = FI; 171 if (STI.isTargetDarwin()) { 172 AFI->addGPRCalleeSavedArea2Frame(FI); 173 GPRCS2Size += 4; 174 } else { 175 AFI->addGPRCalleeSavedArea1Frame(FI); 176 GPRCS1Size += 4; 177 } 178 break; 179 default: 180 AFI->addDPRCalleeSavedAreaFrame(FI); 181 DPRCSSize += 8; 182 } 183 } 184 185 // Move past area 1. 186 if (GPRCS1Size > 0) MBBI++; 187 188 // Set FP to point to the stack slot that contains the previous FP. 189 // For Darwin, FP is R7, which has now been stored in spill area 1. 190 // Otherwise, if this is not Darwin, all the callee-saved registers go 191 // into spill area 1, including the FP in R11. In either case, it is 192 // now safe to emit this assignment. 193 bool HasFP = hasFP(MF); 194 if (HasFP) { 195 unsigned ADDriOpc = !AFI->isThumbFunction() ? ARM::ADDri : ARM::t2ADDri; 196 MachineInstrBuilder MIB = 197 BuildMI(MBB, MBBI, dl, TII.get(ADDriOpc), FramePtr) 198 .addFrameIndex(FramePtrSpillFI).addImm(0); 199 AddDefaultCC(AddDefaultPred(MIB)); 200 } 201 202 // Move past area 2. 203 if (GPRCS2Size > 0) MBBI++; 204 205 // Determine starting offsets of spill areas. 206 unsigned DPRCSOffset = NumBytes - (GPRCS1Size + GPRCS2Size + DPRCSSize); 207 unsigned GPRCS2Offset = DPRCSOffset + DPRCSSize; 208 unsigned GPRCS1Offset = GPRCS2Offset + GPRCS2Size; 209 if (HasFP) 210 AFI->setFramePtrSpillOffset(MFI->getObjectOffset(FramePtrSpillFI) + 211 NumBytes); 212 AFI->setGPRCalleeSavedArea1Offset(GPRCS1Offset); 213 AFI->setGPRCalleeSavedArea2Offset(GPRCS2Offset); 214 AFI->setDPRCalleeSavedAreaOffset(DPRCSOffset); 215 216 // Move past area 3. 217 if (DPRCSSize > 0) MBBI++; 218 219 NumBytes = DPRCSOffset; 220 if (NumBytes) { 221 // Adjust SP after all the callee-save spills. 222 emitSPUpdate(isARM, MBB, MBBI, dl, TII, -NumBytes); 223 if (HasFP && isARM) 224 // Restore from fp only in ARM mode: e.g. sub sp, r7, #24 225 // Note it's not safe to do this in Thumb2 mode because it would have 226 // taken two instructions: 227 // mov sp, r7 228 // sub sp, #24 229 // If an interrupt is taken between the two instructions, then sp is in 230 // an inconsistent state (pointing to the middle of callee-saved area). 231 // The interrupt handler can end up clobbering the registers. 232 AFI->setShouldRestoreSPFromFP(true); 233 } 234 235 if (STI.isTargetELF() && hasFP(MF)) 236 MFI->setOffsetAdjustment(MFI->getOffsetAdjustment() - 237 AFI->getFramePtrSpillOffset()); 238 239 AFI->setGPRCalleeSavedArea1Size(GPRCS1Size); 240 AFI->setGPRCalleeSavedArea2Size(GPRCS2Size); 241 AFI->setDPRCalleeSavedAreaSize(DPRCSSize); 242 243 // If we need dynamic stack realignment, do it here. Be paranoid and make 244 // sure if we also have VLAs, we have a base pointer for frame access. 245 if (RegInfo->needsStackRealignment(MF)) { 246 unsigned MaxAlign = MFI->getMaxAlignment(); 247 assert (!AFI->isThumb1OnlyFunction()); 248 if (!AFI->isThumbFunction()) { 249 // Emit bic sp, sp, MaxAlign 250 AddDefaultCC(AddDefaultPred(BuildMI(MBB, MBBI, dl, 251 TII.get(ARM::BICri), ARM::SP) 252 .addReg(ARM::SP, RegState::Kill) 253 .addImm(MaxAlign-1))); 254 } else { 255 // We cannot use sp as source/dest register here, thus we're emitting the 256 // following sequence: 257 // mov r4, sp 258 // bic r4, r4, MaxAlign 259 // mov sp, r4 260 // FIXME: It will be better just to find spare register here. 261 BuildMI(MBB, MBBI, dl, TII.get(ARM::tMOVgpr2tgpr), ARM::R4) 262 .addReg(ARM::SP, RegState::Kill); 263 AddDefaultCC(AddDefaultPred(BuildMI(MBB, MBBI, dl, 264 TII.get(ARM::t2BICri), ARM::R4) 265 .addReg(ARM::R4, RegState::Kill) 266 .addImm(MaxAlign-1))); 267 BuildMI(MBB, MBBI, dl, TII.get(ARM::tMOVtgpr2gpr), ARM::SP) 268 .addReg(ARM::R4, RegState::Kill); 269 } 270 271 AFI->setShouldRestoreSPFromFP(true); 272 } 273 274 // If we need a base pointer, set it up here. It's whatever the value 275 // of the stack pointer is at this point. Any variable size objects 276 // will be allocated after this, so we can still use the base pointer 277 // to reference locals. 278 if (RegInfo->hasBasePointer(MF)) { 279 if (isARM) 280 BuildMI(MBB, MBBI, dl, 281 TII.get(ARM::MOVr), RegInfo->getBaseRegister()) 282 .addReg(ARM::SP) 283 .addImm((unsigned)ARMCC::AL).addReg(0).addReg(0); 284 else 285 BuildMI(MBB, MBBI, dl, 286 TII.get(ARM::tMOVgpr2gpr), RegInfo->getBaseRegister()) 287 .addReg(ARM::SP); 288 } 289 290 // If the frame has variable sized objects then the epilogue must restore 291 // the sp from fp. We can assume there's an FP here since hasFP already 292 // checks for hasVarSizedObjects. 293 if (MFI->hasVarSizedObjects()) 294 AFI->setShouldRestoreSPFromFP(true); 295 } 296 297 void ARMFrameLowering::emitEpilogue(MachineFunction &MF, 298 MachineBasicBlock &MBB) const { 299 MachineBasicBlock::iterator MBBI = prior(MBB.end()); 300 assert(MBBI->getDesc().isReturn() && 301 "Can only insert epilog into returning blocks"); 302 unsigned RetOpcode = MBBI->getOpcode(); 303 DebugLoc dl = MBBI->getDebugLoc(); 304 MachineFrameInfo *MFI = MF.getFrameInfo(); 305 ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>(); 306 const TargetRegisterInfo *RegInfo = MF.getTarget().getRegisterInfo(); 307 const ARMBaseInstrInfo &TII = 308 *static_cast<const ARMBaseInstrInfo*>(MF.getTarget().getInstrInfo()); 309 assert(!AFI->isThumb1OnlyFunction() && 310 "This emitEpilogue does not support Thumb1!"); 311 bool isARM = !AFI->isThumbFunction(); 312 313 unsigned VARegSaveSize = AFI->getVarArgsRegSaveSize(); 314 int NumBytes = (int)MFI->getStackSize(); 315 unsigned FramePtr = RegInfo->getFrameRegister(MF); 316 317 if (!AFI->hasStackFrame()) { 318 if (NumBytes != 0) 319 emitSPUpdate(isARM, MBB, MBBI, dl, TII, NumBytes); 320 } else { 321 // Unwind MBBI to point to first LDR / VLDRD. 322 const unsigned *CSRegs = RegInfo->getCalleeSavedRegs(); 323 if (MBBI != MBB.begin()) { 324 do 325 --MBBI; 326 while (MBBI != MBB.begin() && isCSRestore(MBBI, TII, CSRegs)); 327 if (!isCSRestore(MBBI, TII, CSRegs)) 328 ++MBBI; 329 } 330 331 // Move SP to start of FP callee save spill area. 332 NumBytes -= (AFI->getGPRCalleeSavedArea1Size() + 333 AFI->getGPRCalleeSavedArea2Size() + 334 AFI->getDPRCalleeSavedAreaSize()); 335 336 // Reset SP based on frame pointer only if the stack frame extends beyond 337 // frame pointer stack slot or target is ELF and the function has FP. 338 if (AFI->shouldRestoreSPFromFP()) { 339 NumBytes = AFI->getFramePtrSpillOffset() - NumBytes; 340 if (NumBytes) { 341 if (isARM) 342 emitARMRegPlusImmediate(MBB, MBBI, dl, ARM::SP, FramePtr, -NumBytes, 343 ARMCC::AL, 0, TII); 344 else { 345 // It's not possible to restore SP from FP in a single instruction. 346 // For Darwin, this looks like: 347 // mov sp, r7 348 // sub sp, #24 349 // This is bad, if an interrupt is taken after the mov, sp is in an 350 // inconsistent state. 351 // Use the first callee-saved register as a scratch register. 352 assert(MF.getRegInfo().isPhysRegUsed(ARM::R4) && 353 "No scratch register to restore SP from FP!"); 354 emitT2RegPlusImmediate(MBB, MBBI, dl, ARM::R4, FramePtr, -NumBytes, 355 ARMCC::AL, 0, TII); 356 BuildMI(MBB, MBBI, dl, TII.get(ARM::tMOVgpr2gpr), ARM::SP) 357 .addReg(ARM::R4); 358 } 359 } else { 360 // Thumb2 or ARM. 361 if (isARM) 362 BuildMI(MBB, MBBI, dl, TII.get(ARM::MOVr), ARM::SP) 363 .addReg(FramePtr).addImm((unsigned)ARMCC::AL).addReg(0).addReg(0); 364 else 365 BuildMI(MBB, MBBI, dl, TII.get(ARM::tMOVgpr2gpr), ARM::SP) 366 .addReg(FramePtr); 367 } 368 } else if (NumBytes) 369 emitSPUpdate(isARM, MBB, MBBI, dl, TII, NumBytes); 370 371 // Increment past our save areas. 372 if (AFI->getDPRCalleeSavedAreaSize()) MBBI++; 373 if (AFI->getGPRCalleeSavedArea2Size()) MBBI++; 374 if (AFI->getGPRCalleeSavedArea1Size()) MBBI++; 375 } 376 377 if (RetOpcode == ARM::TCRETURNdi || RetOpcode == ARM::TCRETURNdiND || 378 RetOpcode == ARM::TCRETURNri || RetOpcode == ARM::TCRETURNriND) { 379 // Tail call return: adjust the stack pointer and jump to callee. 380 MBBI = prior(MBB.end()); 381 MachineOperand &JumpTarget = MBBI->getOperand(0); 382 383 // Jump to label or value in register. 384 if (RetOpcode == ARM::TCRETURNdi || RetOpcode == ARM::TCRETURNdiND) { 385 unsigned TCOpcode = (RetOpcode == ARM::TCRETURNdi) 386 ? (STI.isThumb() ? ARM::TAILJMPdt : ARM::TAILJMPd) 387 : (STI.isThumb() ? ARM::TAILJMPdNDt : ARM::TAILJMPdND); 388 MachineInstrBuilder MIB = BuildMI(MBB, MBBI, dl, TII.get(TCOpcode)); 389 if (JumpTarget.isGlobal()) 390 MIB.addGlobalAddress(JumpTarget.getGlobal(), JumpTarget.getOffset(), 391 JumpTarget.getTargetFlags()); 392 else { 393 assert(JumpTarget.isSymbol()); 394 MIB.addExternalSymbol(JumpTarget.getSymbolName(), 395 JumpTarget.getTargetFlags()); 396 } 397 } else if (RetOpcode == ARM::TCRETURNri) { 398 BuildMI(MBB, MBBI, dl, TII.get(ARM::TAILJMPr)). 399 addReg(JumpTarget.getReg(), RegState::Kill); 400 } else if (RetOpcode == ARM::TCRETURNriND) { 401 BuildMI(MBB, MBBI, dl, TII.get(ARM::TAILJMPrND)). 402 addReg(JumpTarget.getReg(), RegState::Kill); 403 } 404 405 MachineInstr *NewMI = prior(MBBI); 406 for (unsigned i = 1, e = MBBI->getNumOperands(); i != e; ++i) 407 NewMI->addOperand(MBBI->getOperand(i)); 408 409 // Delete the pseudo instruction TCRETURN. 410 MBB.erase(MBBI); 411 } 412 413 if (VARegSaveSize) 414 emitSPUpdate(isARM, MBB, MBBI, dl, TII, VARegSaveSize); 415 } 416 417 // Provide a base+offset reference to an FI slot for debug info. It's the 418 // same as what we use for resolving the code-gen references for now. 419 // FIXME: This can go wrong when references are SP-relative and simple call 420 // frames aren't used. 421 int 422 ARMFrameLowering::getFrameIndexReference(const MachineFunction &MF, int FI, 423 unsigned &FrameReg) const { 424 return ResolveFrameIndexReference(MF, FI, FrameReg, 0); 425 } 426 427 int 428 ARMFrameLowering::ResolveFrameIndexReference(const MachineFunction &MF, 429 int FI, 430 unsigned &FrameReg, 431 int SPAdj) const { 432 const MachineFrameInfo *MFI = MF.getFrameInfo(); 433 const ARMBaseRegisterInfo *RegInfo = 434 static_cast<const ARMBaseRegisterInfo*>(MF.getTarget().getRegisterInfo()); 435 const ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>(); 436 int Offset = MFI->getObjectOffset(FI) + MFI->getStackSize(); 437 int FPOffset = Offset - AFI->getFramePtrSpillOffset(); 438 bool isFixed = MFI->isFixedObjectIndex(FI); 439 440 FrameReg = ARM::SP; 441 Offset += SPAdj; 442 if (AFI->isGPRCalleeSavedArea1Frame(FI)) 443 return Offset - AFI->getGPRCalleeSavedArea1Offset(); 444 else if (AFI->isGPRCalleeSavedArea2Frame(FI)) 445 return Offset - AFI->getGPRCalleeSavedArea2Offset(); 446 else if (AFI->isDPRCalleeSavedAreaFrame(FI)) 447 return Offset - AFI->getDPRCalleeSavedAreaOffset(); 448 449 // When dynamically realigning the stack, use the frame pointer for 450 // parameters, and the stack/base pointer for locals. 451 if (RegInfo->needsStackRealignment(MF)) { 452 assert (hasFP(MF) && "dynamic stack realignment without a FP!"); 453 if (isFixed) { 454 FrameReg = RegInfo->getFrameRegister(MF); 455 Offset = FPOffset; 456 } else if (MFI->hasVarSizedObjects()) { 457 assert(RegInfo->hasBasePointer(MF) && 458 "VLAs and dynamic stack alignment, but missing base pointer!"); 459 FrameReg = RegInfo->getBaseRegister(); 460 } 461 return Offset; 462 } 463 464 // If there is a frame pointer, use it when we can. 465 if (hasFP(MF) && AFI->hasStackFrame()) { 466 // Use frame pointer to reference fixed objects. Use it for locals if 467 // there are VLAs (and thus the SP isn't reliable as a base). 468 if (isFixed || (MFI->hasVarSizedObjects() && 469 !RegInfo->hasBasePointer(MF))) { 470 FrameReg = RegInfo->getFrameRegister(MF); 471 return FPOffset; 472 } else if (MFI->hasVarSizedObjects()) { 473 assert(RegInfo->hasBasePointer(MF) && "missing base pointer!"); 474 // Try to use the frame pointer if we can, else use the base pointer 475 // since it's available. This is handy for the emergency spill slot, in 476 // particular. 477 if (AFI->isThumb2Function()) { 478 if (FPOffset >= -255 && FPOffset < 0) { 479 FrameReg = RegInfo->getFrameRegister(MF); 480 return FPOffset; 481 } 482 } else 483 FrameReg = RegInfo->getBaseRegister(); 484 } else if (AFI->isThumb2Function()) { 485 // In Thumb2 mode, the negative offset is very limited. Try to avoid 486 // out of range references. 487 if (FPOffset >= -255 && FPOffset < 0) { 488 FrameReg = RegInfo->getFrameRegister(MF); 489 return FPOffset; 490 } 491 } else if (Offset > (FPOffset < 0 ? -FPOffset : FPOffset)) { 492 // Otherwise, use SP or FP, whichever is closer to the stack slot. 493 FrameReg = RegInfo->getFrameRegister(MF); 494 return FPOffset; 495 } 496 } 497 // Use the base pointer if we have one. 498 if (RegInfo->hasBasePointer(MF)) 499 FrameReg = RegInfo->getBaseRegister(); 500 return Offset; 501 } 502 503 int ARMFrameLowering::getFrameIndexOffset(const MachineFunction &MF, int FI) const { 504 unsigned FrameReg; 505 return getFrameIndexReference(MF, FI, FrameReg); 506 } 507 508 void ARMFrameLowering::emitPushInst(MachineBasicBlock &MBB, 509 MachineBasicBlock::iterator MI, 510 const std::vector<CalleeSavedInfo> &CSI, 511 unsigned StmOpc, unsigned StrOpc, bool NoGap, 512 bool(*Func)(unsigned, bool)) const { 513 MachineFunction &MF = *MBB.getParent(); 514 const TargetInstrInfo &TII = *MF.getTarget().getInstrInfo(); 515 516 DebugLoc DL; 517 if (MI != MBB.end()) DL = MI->getDebugLoc(); 518 519 SmallVector<std::pair<unsigned,bool>, 4> Regs; 520 unsigned i = CSI.size(); 521 while (i != 0) { 522 unsigned LastReg = 0; 523 for (; i != 0; --i) { 524 unsigned Reg = CSI[i-1].getReg(); 525 if (!(Func)(Reg, STI.isTargetDarwin())) continue; 526 527 // Add the callee-saved register as live-in unless it's LR and 528 // @llvm.returnaddress is called. If LR is returned for 529 // @llvm.returnaddress then it's already added to the function and 530 // entry block live-in sets. 531 bool isKill = true; 532 if (Reg == ARM::LR) { 533 if (MF.getFrameInfo()->isReturnAddressTaken() && 534 MF.getRegInfo().isLiveIn(Reg)) 535 isKill = false; 536 } 537 538 if (isKill) 539 MBB.addLiveIn(Reg); 540 541 // If NoGap is true, push consecutive registers and then leave the rest 542 // for other instructions. e.g. 543 // vpush {d8, d10, d11} -> vpush {d8}, vpush {d10, d11} 544 if (NoGap && LastReg && LastReg != Reg-1) 545 break; 546 LastReg = Reg; 547 Regs.push_back(std::make_pair(Reg, isKill)); 548 } 549 550 if (Regs.empty()) 551 continue; 552 if (Regs.size() > 1 || StrOpc== 0) { 553 MachineInstrBuilder MIB = 554 AddDefaultPred(BuildMI(MBB, MI, DL, TII.get(StmOpc), ARM::SP) 555 .addReg(ARM::SP)); 556 for (unsigned i = 0, e = Regs.size(); i < e; ++i) 557 MIB.addReg(Regs[i].first, getKillRegState(Regs[i].second)); 558 } else if (Regs.size() == 1) { 559 MachineInstrBuilder MIB = BuildMI(MBB, MI, DL, TII.get(StrOpc), 560 ARM::SP) 561 .addReg(Regs[0].first, getKillRegState(Regs[0].second)) 562 .addReg(ARM::SP); 563 // ARM mode needs an extra reg0 here due to addrmode2. Will go away once 564 // that refactoring is complete (eventually). 565 if (StrOpc == ARM::STR_PRE) { 566 MIB.addReg(0); 567 MIB.addImm(ARM_AM::getAM2Opc(ARM_AM::sub, 4, ARM_AM::no_shift)); 568 } else 569 MIB.addImm(-4); 570 AddDefaultPred(MIB); 571 } 572 Regs.clear(); 573 } 574 } 575 576 void ARMFrameLowering::emitPopInst(MachineBasicBlock &MBB, 577 MachineBasicBlock::iterator MI, 578 const std::vector<CalleeSavedInfo> &CSI, 579 unsigned LdmOpc, unsigned LdrOpc, 580 bool isVarArg, bool NoGap, 581 bool(*Func)(unsigned, bool)) const { 582 MachineFunction &MF = *MBB.getParent(); 583 const TargetInstrInfo &TII = *MF.getTarget().getInstrInfo(); 584 ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>(); 585 DebugLoc DL = MI->getDebugLoc(); 586 587 SmallVector<unsigned, 4> Regs; 588 unsigned i = CSI.size(); 589 while (i != 0) { 590 unsigned LastReg = 0; 591 bool DeleteRet = false; 592 for (; i != 0; --i) { 593 unsigned Reg = CSI[i-1].getReg(); 594 if (!(Func)(Reg, STI.isTargetDarwin())) continue; 595 596 if (Reg == ARM::LR && !isVarArg && STI.hasV5TOps()) { 597 Reg = ARM::PC; 598 LdmOpc = AFI->isThumbFunction() ? ARM::t2LDMIA_RET : ARM::LDMIA_RET; 599 // Fold the return instruction into the LDM. 600 DeleteRet = true; 601 } 602 603 // If NoGap is true, pop consecutive registers and then leave the rest 604 // for other instructions. e.g. 605 // vpop {d8, d10, d11} -> vpop {d8}, vpop {d10, d11} 606 if (NoGap && LastReg && LastReg != Reg-1) 607 break; 608 609 LastReg = Reg; 610 Regs.push_back(Reg); 611 } 612 613 if (Regs.empty()) 614 continue; 615 if (Regs.size() > 1 || LdrOpc == 0) { 616 MachineInstrBuilder MIB = 617 AddDefaultPred(BuildMI(MBB, MI, DL, TII.get(LdmOpc), ARM::SP) 618 .addReg(ARM::SP)); 619 for (unsigned i = 0, e = Regs.size(); i < e; ++i) 620 MIB.addReg(Regs[i], getDefRegState(true)); 621 if (DeleteRet) 622 MI->eraseFromParent(); 623 MI = MIB; 624 } else if (Regs.size() == 1) { 625 // If we adjusted the reg to PC from LR above, switch it back here. We 626 // only do that for LDM. 627 if (Regs[0] == ARM::PC) 628 Regs[0] = ARM::LR; 629 MachineInstrBuilder MIB = 630 BuildMI(MBB, MI, DL, TII.get(LdrOpc), Regs[0]) 631 .addReg(ARM::SP, RegState::Define) 632 .addReg(ARM::SP); 633 // ARM mode needs an extra reg0 here due to addrmode2. Will go away once 634 // that refactoring is complete (eventually). 635 if (LdrOpc == ARM::LDR_POST) { 636 MIB.addReg(0); 637 MIB.addImm(ARM_AM::getAM2Opc(ARM_AM::add, 4, ARM_AM::no_shift)); 638 } else 639 MIB.addImm(4); 640 AddDefaultPred(MIB); 641 } 642 Regs.clear(); 643 } 644 } 645 646 bool ARMFrameLowering::spillCalleeSavedRegisters(MachineBasicBlock &MBB, 647 MachineBasicBlock::iterator MI, 648 const std::vector<CalleeSavedInfo> &CSI, 649 const TargetRegisterInfo *TRI) const { 650 if (CSI.empty()) 651 return false; 652 653 MachineFunction &MF = *MBB.getParent(); 654 ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>(); 655 DebugLoc DL = MI->getDebugLoc(); 656 657 unsigned PushOpc = AFI->isThumbFunction() ? ARM::t2STMDB_UPD : ARM::STMDB_UPD; 658 unsigned PushOneOpc = AFI->isThumbFunction() ? ARM::t2STR_PRE : ARM::STR_PRE; 659 unsigned FltOpc = ARM::VSTMDDB_UPD; 660 emitPushInst(MBB, MI, CSI, PushOpc, PushOneOpc, false, &isARMArea1Register); 661 emitPushInst(MBB, MI, CSI, PushOpc, PushOneOpc, false, &isARMArea2Register); 662 emitPushInst(MBB, MI, CSI, FltOpc, 0, true, &isARMArea3Register); 663 664 return true; 665 } 666 667 bool ARMFrameLowering::restoreCalleeSavedRegisters(MachineBasicBlock &MBB, 668 MachineBasicBlock::iterator MI, 669 const std::vector<CalleeSavedInfo> &CSI, 670 const TargetRegisterInfo *TRI) const { 671 if (CSI.empty()) 672 return false; 673 674 MachineFunction &MF = *MBB.getParent(); 675 ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>(); 676 bool isVarArg = AFI->getVarArgsRegSaveSize() > 0; 677 DebugLoc DL = MI->getDebugLoc(); 678 679 unsigned PopOpc = AFI->isThumbFunction() ? ARM::t2LDMIA_UPD : ARM::LDMIA_UPD; 680 unsigned LdrOpc = AFI->isThumbFunction() ? ARM::t2LDR_POST : ARM::LDR_POST; 681 unsigned FltOpc = ARM::VLDMDIA_UPD; 682 emitPopInst(MBB, MI, CSI, FltOpc, 0, isVarArg, true, &isARMArea3Register); 683 emitPopInst(MBB, MI, CSI, PopOpc, LdrOpc, isVarArg, false, 684 &isARMArea2Register); 685 emitPopInst(MBB, MI, CSI, PopOpc, LdrOpc, isVarArg, false, 686 &isARMArea1Register); 687 688 return true; 689 } 690 691 // FIXME: Make generic? 692 static unsigned GetFunctionSizeInBytes(const MachineFunction &MF, 693 const ARMBaseInstrInfo &TII) { 694 unsigned FnSize = 0; 695 for (MachineFunction::const_iterator MBBI = MF.begin(), E = MF.end(); 696 MBBI != E; ++MBBI) { 697 const MachineBasicBlock &MBB = *MBBI; 698 for (MachineBasicBlock::const_iterator I = MBB.begin(),E = MBB.end(); 699 I != E; ++I) 700 FnSize += TII.GetInstSizeInBytes(I); 701 } 702 return FnSize; 703 } 704 705 /// estimateStackSize - Estimate and return the size of the frame. 706 /// FIXME: Make generic? 707 static unsigned estimateStackSize(MachineFunction &MF) { 708 const MachineFrameInfo *FFI = MF.getFrameInfo(); 709 int Offset = 0; 710 for (int i = FFI->getObjectIndexBegin(); i != 0; ++i) { 711 int FixedOff = -FFI->getObjectOffset(i); 712 if (FixedOff > Offset) Offset = FixedOff; 713 } 714 for (unsigned i = 0, e = FFI->getObjectIndexEnd(); i != e; ++i) { 715 if (FFI->isDeadObjectIndex(i)) 716 continue; 717 Offset += FFI->getObjectSize(i); 718 unsigned Align = FFI->getObjectAlignment(i); 719 // Adjust to alignment boundary 720 Offset = (Offset+Align-1)/Align*Align; 721 } 722 return (unsigned)Offset; 723 } 724 725 /// estimateRSStackSizeLimit - Look at each instruction that references stack 726 /// frames and return the stack size limit beyond which some of these 727 /// instructions will require a scratch register during their expansion later. 728 // FIXME: Move to TII? 729 static unsigned estimateRSStackSizeLimit(MachineFunction &MF, 730 const TargetFrameLowering *TFI) { 731 const ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>(); 732 unsigned Limit = (1 << 12) - 1; 733 for (MachineFunction::iterator BB = MF.begin(),E = MF.end(); BB != E; ++BB) { 734 for (MachineBasicBlock::iterator I = BB->begin(), E = BB->end(); 735 I != E; ++I) { 736 for (unsigned i = 0, e = I->getNumOperands(); i != e; ++i) { 737 if (!I->getOperand(i).isFI()) continue; 738 739 // When using ADDri to get the address of a stack object, 255 is the 740 // largest offset guaranteed to fit in the immediate offset. 741 if (I->getOpcode() == ARM::ADDri) { 742 Limit = std::min(Limit, (1U << 8) - 1); 743 break; 744 } 745 746 // Otherwise check the addressing mode. 747 switch (I->getDesc().TSFlags & ARMII::AddrModeMask) { 748 case ARMII::AddrMode3: 749 case ARMII::AddrModeT2_i8: 750 Limit = std::min(Limit, (1U << 8) - 1); 751 break; 752 case ARMII::AddrMode5: 753 case ARMII::AddrModeT2_i8s4: 754 Limit = std::min(Limit, ((1U << 8) - 1) * 4); 755 break; 756 case ARMII::AddrModeT2_i12: 757 // i12 supports only positive offset so these will be converted to 758 // i8 opcodes. See llvm::rewriteT2FrameIndex. 759 if (TFI->hasFP(MF) && AFI->hasStackFrame()) 760 Limit = std::min(Limit, (1U << 8) - 1); 761 break; 762 case ARMII::AddrMode4: 763 case ARMII::AddrMode6: 764 // Addressing modes 4 & 6 (load/store) instructions can't encode an 765 // immediate offset for stack references. 766 return 0; 767 default: 768 break; 769 } 770 break; // At most one FI per instruction 771 } 772 } 773 } 774 775 return Limit; 776 } 777 778 void 779 ARMFrameLowering::processFunctionBeforeCalleeSavedScan(MachineFunction &MF, 780 RegScavenger *RS) const { 781 // This tells PEI to spill the FP as if it is any other callee-save register 782 // to take advantage the eliminateFrameIndex machinery. This also ensures it 783 // is spilled in the order specified by getCalleeSavedRegs() to make it easier 784 // to combine multiple loads / stores. 785 bool CanEliminateFrame = true; 786 bool CS1Spilled = false; 787 bool LRSpilled = false; 788 unsigned NumGPRSpills = 0; 789 SmallVector<unsigned, 4> UnspilledCS1GPRs; 790 SmallVector<unsigned, 4> UnspilledCS2GPRs; 791 const ARMBaseRegisterInfo *RegInfo = 792 static_cast<const ARMBaseRegisterInfo*>(MF.getTarget().getRegisterInfo()); 793 const ARMBaseInstrInfo &TII = 794 *static_cast<const ARMBaseInstrInfo*>(MF.getTarget().getInstrInfo()); 795 ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>(); 796 MachineFrameInfo *MFI = MF.getFrameInfo(); 797 unsigned FramePtr = RegInfo->getFrameRegister(MF); 798 799 // Spill R4 if Thumb2 function requires stack realignment - it will be used as 800 // scratch register. Also spill R4 if Thumb2 function has varsized objects, 801 // since it's always posible to restore sp from fp in a single instruction. 802 // FIXME: It will be better just to find spare register here. 803 if (AFI->isThumb2Function() && 804 (MFI->hasVarSizedObjects() || RegInfo->needsStackRealignment(MF))) 805 MF.getRegInfo().setPhysRegUsed(ARM::R4); 806 807 // Spill LR if Thumb1 function uses variable length argument lists. 808 if (AFI->isThumb1OnlyFunction() && AFI->getVarArgsRegSaveSize() > 0) 809 MF.getRegInfo().setPhysRegUsed(ARM::LR); 810 811 // Spill the BasePtr if it's used. 812 if (RegInfo->hasBasePointer(MF)) 813 MF.getRegInfo().setPhysRegUsed(RegInfo->getBaseRegister()); 814 815 // Don't spill FP if the frame can be eliminated. This is determined 816 // by scanning the callee-save registers to see if any is used. 817 const unsigned *CSRegs = RegInfo->getCalleeSavedRegs(); 818 for (unsigned i = 0; CSRegs[i]; ++i) { 819 unsigned Reg = CSRegs[i]; 820 bool Spilled = false; 821 if (MF.getRegInfo().isPhysRegUsed(Reg)) { 822 AFI->setCSRegisterIsSpilled(Reg); 823 Spilled = true; 824 CanEliminateFrame = false; 825 } else { 826 // Check alias registers too. 827 for (const unsigned *Aliases = 828 RegInfo->getAliasSet(Reg); *Aliases; ++Aliases) { 829 if (MF.getRegInfo().isPhysRegUsed(*Aliases)) { 830 Spilled = true; 831 CanEliminateFrame = false; 832 } 833 } 834 } 835 836 if (!ARM::GPRRegisterClass->contains(Reg)) 837 continue; 838 839 if (Spilled) { 840 NumGPRSpills++; 841 842 if (!STI.isTargetDarwin()) { 843 if (Reg == ARM::LR) 844 LRSpilled = true; 845 CS1Spilled = true; 846 continue; 847 } 848 849 // Keep track if LR and any of R4, R5, R6, and R7 is spilled. 850 switch (Reg) { 851 case ARM::LR: 852 LRSpilled = true; 853 // Fallthrough 854 case ARM::R4: case ARM::R5: 855 case ARM::R6: case ARM::R7: 856 CS1Spilled = true; 857 break; 858 default: 859 break; 860 } 861 } else { 862 if (!STI.isTargetDarwin()) { 863 UnspilledCS1GPRs.push_back(Reg); 864 continue; 865 } 866 867 switch (Reg) { 868 case ARM::R4: case ARM::R5: 869 case ARM::R6: case ARM::R7: 870 case ARM::LR: 871 UnspilledCS1GPRs.push_back(Reg); 872 break; 873 default: 874 UnspilledCS2GPRs.push_back(Reg); 875 break; 876 } 877 } 878 } 879 880 bool ForceLRSpill = false; 881 if (!LRSpilled && AFI->isThumb1OnlyFunction()) { 882 unsigned FnSize = GetFunctionSizeInBytes(MF, TII); 883 // Force LR to be spilled if the Thumb function size is > 2048. This enables 884 // use of BL to implement far jump. If it turns out that it's not needed 885 // then the branch fix up path will undo it. 886 if (FnSize >= (1 << 11)) { 887 CanEliminateFrame = false; 888 ForceLRSpill = true; 889 } 890 } 891 892 // If any of the stack slot references may be out of range of an immediate 893 // offset, make sure a register (or a spill slot) is available for the 894 // register scavenger. Note that if we're indexing off the frame pointer, the 895 // effective stack size is 4 bytes larger since the FP points to the stack 896 // slot of the previous FP. Also, if we have variable sized objects in the 897 // function, stack slot references will often be negative, and some of 898 // our instructions are positive-offset only, so conservatively consider 899 // that case to want a spill slot (or register) as well. Similarly, if 900 // the function adjusts the stack pointer during execution and the 901 // adjustments aren't already part of our stack size estimate, our offset 902 // calculations may be off, so be conservative. 903 // FIXME: We could add logic to be more precise about negative offsets 904 // and which instructions will need a scratch register for them. Is it 905 // worth the effort and added fragility? 906 bool BigStack = 907 (RS && 908 (estimateStackSize(MF) + ((hasFP(MF) && AFI->hasStackFrame()) ? 4:0) >= 909 estimateRSStackSizeLimit(MF, this))) 910 || MFI->hasVarSizedObjects() 911 || (MFI->adjustsStack() && !canSimplifyCallFramePseudos(MF)); 912 913 bool ExtraCSSpill = false; 914 if (BigStack || !CanEliminateFrame || RegInfo->cannotEliminateFrame(MF)) { 915 AFI->setHasStackFrame(true); 916 917 // If LR is not spilled, but at least one of R4, R5, R6, and R7 is spilled. 918 // Spill LR as well so we can fold BX_RET to the registers restore (LDM). 919 if (!LRSpilled && CS1Spilled) { 920 MF.getRegInfo().setPhysRegUsed(ARM::LR); 921 AFI->setCSRegisterIsSpilled(ARM::LR); 922 NumGPRSpills++; 923 UnspilledCS1GPRs.erase(std::find(UnspilledCS1GPRs.begin(), 924 UnspilledCS1GPRs.end(), (unsigned)ARM::LR)); 925 ForceLRSpill = false; 926 ExtraCSSpill = true; 927 } 928 929 if (hasFP(MF)) { 930 MF.getRegInfo().setPhysRegUsed(FramePtr); 931 NumGPRSpills++; 932 } 933 934 // If stack and double are 8-byte aligned and we are spilling an odd number 935 // of GPRs, spill one extra callee save GPR so we won't have to pad between 936 // the integer and double callee save areas. 937 unsigned TargetAlign = getStackAlignment(); 938 if (TargetAlign == 8 && (NumGPRSpills & 1)) { 939 if (CS1Spilled && !UnspilledCS1GPRs.empty()) { 940 for (unsigned i = 0, e = UnspilledCS1GPRs.size(); i != e; ++i) { 941 unsigned Reg = UnspilledCS1GPRs[i]; 942 // Don't spill high register if the function is thumb1 943 if (!AFI->isThumb1OnlyFunction() || 944 isARMLowRegister(Reg) || Reg == ARM::LR) { 945 MF.getRegInfo().setPhysRegUsed(Reg); 946 AFI->setCSRegisterIsSpilled(Reg); 947 if (!RegInfo->isReservedReg(MF, Reg)) 948 ExtraCSSpill = true; 949 break; 950 } 951 } 952 } else if (!UnspilledCS2GPRs.empty() && !AFI->isThumb1OnlyFunction()) { 953 unsigned Reg = UnspilledCS2GPRs.front(); 954 MF.getRegInfo().setPhysRegUsed(Reg); 955 AFI->setCSRegisterIsSpilled(Reg); 956 if (!RegInfo->isReservedReg(MF, Reg)) 957 ExtraCSSpill = true; 958 } 959 } 960 961 // Estimate if we might need to scavenge a register at some point in order 962 // to materialize a stack offset. If so, either spill one additional 963 // callee-saved register or reserve a special spill slot to facilitate 964 // register scavenging. Thumb1 needs a spill slot for stack pointer 965 // adjustments also, even when the frame itself is small. 966 if (BigStack && !ExtraCSSpill) { 967 // If any non-reserved CS register isn't spilled, just spill one or two 968 // extra. That should take care of it! 969 unsigned NumExtras = TargetAlign / 4; 970 SmallVector<unsigned, 2> Extras; 971 while (NumExtras && !UnspilledCS1GPRs.empty()) { 972 unsigned Reg = UnspilledCS1GPRs.back(); 973 UnspilledCS1GPRs.pop_back(); 974 if (!RegInfo->isReservedReg(MF, Reg) && 975 (!AFI->isThumb1OnlyFunction() || isARMLowRegister(Reg) || 976 Reg == ARM::LR)) { 977 Extras.push_back(Reg); 978 NumExtras--; 979 } 980 } 981 // For non-Thumb1 functions, also check for hi-reg CS registers 982 if (!AFI->isThumb1OnlyFunction()) { 983 while (NumExtras && !UnspilledCS2GPRs.empty()) { 984 unsigned Reg = UnspilledCS2GPRs.back(); 985 UnspilledCS2GPRs.pop_back(); 986 if (!RegInfo->isReservedReg(MF, Reg)) { 987 Extras.push_back(Reg); 988 NumExtras--; 989 } 990 } 991 } 992 if (Extras.size() && NumExtras == 0) { 993 for (unsigned i = 0, e = Extras.size(); i != e; ++i) { 994 MF.getRegInfo().setPhysRegUsed(Extras[i]); 995 AFI->setCSRegisterIsSpilled(Extras[i]); 996 } 997 } else if (!AFI->isThumb1OnlyFunction()) { 998 // note: Thumb1 functions spill to R12, not the stack. Reserve a slot 999 // closest to SP or frame pointer. 1000 const TargetRegisterClass *RC = ARM::GPRRegisterClass; 1001 RS->setScavengingFrameIndex(MFI->CreateStackObject(RC->getSize(), 1002 RC->getAlignment(), 1003 false)); 1004 } 1005 } 1006 } 1007 1008 if (ForceLRSpill) { 1009 MF.getRegInfo().setPhysRegUsed(ARM::LR); 1010 AFI->setCSRegisterIsSpilled(ARM::LR); 1011 AFI->setLRIsSpilledForFarJump(true); 1012 } 1013 } 1014