1 //===-- PPCFrameLowering.cpp - PPC Frame Information ----------------------===// 2 // 3 // The LLVM Compiler Infrastructure 4 // 5 // This file is distributed under the University of Illinois Open Source 6 // License. See LICENSE.TXT for details. 7 // 8 //===----------------------------------------------------------------------===// 9 // 10 // This file contains the PPC implementation of TargetFrameLowering class. 11 // 12 //===----------------------------------------------------------------------===// 13 14 #include "PPCFrameLowering.h" 15 #include "PPCInstrBuilder.h" 16 #include "PPCInstrInfo.h" 17 #include "PPCMachineFunctionInfo.h" 18 #include "PPCSubtarget.h" 19 #include "PPCTargetMachine.h" 20 #include "llvm/CodeGen/MachineFrameInfo.h" 21 #include "llvm/CodeGen/MachineFunction.h" 22 #include "llvm/CodeGen/MachineInstrBuilder.h" 23 #include "llvm/CodeGen/MachineModuleInfo.h" 24 #include "llvm/CodeGen/MachineRegisterInfo.h" 25 #include "llvm/CodeGen/RegisterScavenging.h" 26 #include "llvm/IR/Function.h" 27 #include "llvm/Target/TargetOptions.h" 28 29 using namespace llvm; 30 31 /// VRRegNo - Map from a numbered VR register to its enum value. 32 /// 33 static const MCPhysReg VRRegNo[] = { 34 PPC::V0 , PPC::V1 , PPC::V2 , PPC::V3 , PPC::V4 , PPC::V5 , PPC::V6 , PPC::V7 , 35 PPC::V8 , PPC::V9 , PPC::V10, PPC::V11, PPC::V12, PPC::V13, PPC::V14, PPC::V15, 36 PPC::V16, PPC::V17, PPC::V18, PPC::V19, PPC::V20, PPC::V21, PPC::V22, PPC::V23, 37 PPC::V24, PPC::V25, PPC::V26, PPC::V27, PPC::V28, PPC::V29, PPC::V30, PPC::V31 38 }; 39 40 static unsigned computeReturnSaveOffset(const PPCSubtarget &STI) { 41 if (STI.isDarwinABI()) 42 return STI.isPPC64() ? 16 : 8; 43 // SVR4 ABI: 44 return STI.isPPC64() ? 16 : 4; 45 } 46 47 static unsigned computeTOCSaveOffset(const PPCSubtarget &STI) { 48 return STI.isELFv2ABI() ? 24 : 40; 49 } 50 51 static unsigned computeFramePointerSaveOffset(const PPCSubtarget &STI) { 52 // For the Darwin ABI: 53 // We cannot use the TOC save slot (offset +20) in the PowerPC linkage area 54 // for saving the frame pointer (if needed.) While the published ABI has 55 // not used this slot since at least MacOSX 10.2, there is older code 56 // around that does use it, and that needs to continue to work. 57 if (STI.isDarwinABI()) 58 return STI.isPPC64() ? -8U : -4U; 59 60 // SVR4 ABI: First slot in the general register save area. 61 return STI.isPPC64() ? -8U : -4U; 62 } 63 64 static unsigned computeLinkageSize(const PPCSubtarget &STI) { 65 if (STI.isDarwinABI() || STI.isPPC64()) 66 return (STI.isELFv2ABI() ? 4 : 6) * (STI.isPPC64() ? 8 : 4); 67 68 // SVR4 ABI: 69 return 8; 70 } 71 72 static unsigned computeBasePointerSaveOffset(const PPCSubtarget &STI) { 73 if (STI.isDarwinABI()) 74 return STI.isPPC64() ? -16U : -8U; 75 76 // SVR4 ABI: First slot in the general register save area. 77 return STI.isPPC64() 78 ? -16U 79 : STI.getTargetMachine().isPositionIndependent() ? -12U : -8U; 80 } 81 82 PPCFrameLowering::PPCFrameLowering(const PPCSubtarget &STI) 83 : TargetFrameLowering(TargetFrameLowering::StackGrowsDown, 84 STI.getPlatformStackAlignment(), 0), 85 Subtarget(STI), ReturnSaveOffset(computeReturnSaveOffset(Subtarget)), 86 TOCSaveOffset(computeTOCSaveOffset(Subtarget)), 87 FramePointerSaveOffset(computeFramePointerSaveOffset(Subtarget)), 88 LinkageSize(computeLinkageSize(Subtarget)), 89 BasePointerSaveOffset(computeBasePointerSaveOffset(STI)) {} 90 91 // With the SVR4 ABI, callee-saved registers have fixed offsets on the stack. 92 const PPCFrameLowering::SpillSlot *PPCFrameLowering::getCalleeSavedSpillSlots( 93 unsigned &NumEntries) const { 94 if (Subtarget.isDarwinABI()) { 95 NumEntries = 1; 96 if (Subtarget.isPPC64()) { 97 static const SpillSlot darwin64Offsets = {PPC::X31, -8}; 98 return &darwin64Offsets; 99 } else { 100 static const SpillSlot darwinOffsets = {PPC::R31, -4}; 101 return &darwinOffsets; 102 } 103 } 104 105 // Early exit if not using the SVR4 ABI. 106 if (!Subtarget.isSVR4ABI()) { 107 NumEntries = 0; 108 return nullptr; 109 } 110 111 // Note that the offsets here overlap, but this is fixed up in 112 // processFunctionBeforeFrameFinalized. 113 114 static const SpillSlot Offsets[] = { 115 // Floating-point register save area offsets. 116 {PPC::F31, -8}, 117 {PPC::F30, -16}, 118 {PPC::F29, -24}, 119 {PPC::F28, -32}, 120 {PPC::F27, -40}, 121 {PPC::F26, -48}, 122 {PPC::F25, -56}, 123 {PPC::F24, -64}, 124 {PPC::F23, -72}, 125 {PPC::F22, -80}, 126 {PPC::F21, -88}, 127 {PPC::F20, -96}, 128 {PPC::F19, -104}, 129 {PPC::F18, -112}, 130 {PPC::F17, -120}, 131 {PPC::F16, -128}, 132 {PPC::F15, -136}, 133 {PPC::F14, -144}, 134 135 // General register save area offsets. 136 {PPC::R31, -4}, 137 {PPC::R30, -8}, 138 {PPC::R29, -12}, 139 {PPC::R28, -16}, 140 {PPC::R27, -20}, 141 {PPC::R26, -24}, 142 {PPC::R25, -28}, 143 {PPC::R24, -32}, 144 {PPC::R23, -36}, 145 {PPC::R22, -40}, 146 {PPC::R21, -44}, 147 {PPC::R20, -48}, 148 {PPC::R19, -52}, 149 {PPC::R18, -56}, 150 {PPC::R17, -60}, 151 {PPC::R16, -64}, 152 {PPC::R15, -68}, 153 {PPC::R14, -72}, 154 155 // CR save area offset. We map each of the nonvolatile CR fields 156 // to the slot for CR2, which is the first of the nonvolatile CR 157 // fields to be assigned, so that we only allocate one save slot. 158 // See PPCRegisterInfo::hasReservedSpillSlot() for more information. 159 {PPC::CR2, -4}, 160 161 // VRSAVE save area offset. 162 {PPC::VRSAVE, -4}, 163 164 // Vector register save area 165 {PPC::V31, -16}, 166 {PPC::V30, -32}, 167 {PPC::V29, -48}, 168 {PPC::V28, -64}, 169 {PPC::V27, -80}, 170 {PPC::V26, -96}, 171 {PPC::V25, -112}, 172 {PPC::V24, -128}, 173 {PPC::V23, -144}, 174 {PPC::V22, -160}, 175 {PPC::V21, -176}, 176 {PPC::V20, -192}}; 177 178 static const SpillSlot Offsets64[] = { 179 // Floating-point register save area offsets. 180 {PPC::F31, -8}, 181 {PPC::F30, -16}, 182 {PPC::F29, -24}, 183 {PPC::F28, -32}, 184 {PPC::F27, -40}, 185 {PPC::F26, -48}, 186 {PPC::F25, -56}, 187 {PPC::F24, -64}, 188 {PPC::F23, -72}, 189 {PPC::F22, -80}, 190 {PPC::F21, -88}, 191 {PPC::F20, -96}, 192 {PPC::F19, -104}, 193 {PPC::F18, -112}, 194 {PPC::F17, -120}, 195 {PPC::F16, -128}, 196 {PPC::F15, -136}, 197 {PPC::F14, -144}, 198 199 // General register save area offsets. 200 {PPC::X31, -8}, 201 {PPC::X30, -16}, 202 {PPC::X29, -24}, 203 {PPC::X28, -32}, 204 {PPC::X27, -40}, 205 {PPC::X26, -48}, 206 {PPC::X25, -56}, 207 {PPC::X24, -64}, 208 {PPC::X23, -72}, 209 {PPC::X22, -80}, 210 {PPC::X21, -88}, 211 {PPC::X20, -96}, 212 {PPC::X19, -104}, 213 {PPC::X18, -112}, 214 {PPC::X17, -120}, 215 {PPC::X16, -128}, 216 {PPC::X15, -136}, 217 {PPC::X14, -144}, 218 219 // VRSAVE save area offset. 220 {PPC::VRSAVE, -4}, 221 222 // Vector register save area 223 {PPC::V31, -16}, 224 {PPC::V30, -32}, 225 {PPC::V29, -48}, 226 {PPC::V28, -64}, 227 {PPC::V27, -80}, 228 {PPC::V26, -96}, 229 {PPC::V25, -112}, 230 {PPC::V24, -128}, 231 {PPC::V23, -144}, 232 {PPC::V22, -160}, 233 {PPC::V21, -176}, 234 {PPC::V20, -192}}; 235 236 if (Subtarget.isPPC64()) { 237 NumEntries = array_lengthof(Offsets64); 238 239 return Offsets64; 240 } else { 241 NumEntries = array_lengthof(Offsets); 242 243 return Offsets; 244 } 245 } 246 247 /// RemoveVRSaveCode - We have found that this function does not need any code 248 /// to manipulate the VRSAVE register, even though it uses vector registers. 249 /// This can happen when the only registers used are known to be live in or out 250 /// of the function. Remove all of the VRSAVE related code from the function. 251 /// FIXME: The removal of the code results in a compile failure at -O0 when the 252 /// function contains a function call, as the GPR containing original VRSAVE 253 /// contents is spilled and reloaded around the call. Without the prolog code, 254 /// the spill instruction refers to an undefined register. This code needs 255 /// to account for all uses of that GPR. 256 static void RemoveVRSaveCode(MachineInstr &MI) { 257 MachineBasicBlock *Entry = MI.getParent(); 258 MachineFunction *MF = Entry->getParent(); 259 260 // We know that the MTVRSAVE instruction immediately follows MI. Remove it. 261 MachineBasicBlock::iterator MBBI = MI; 262 ++MBBI; 263 assert(MBBI != Entry->end() && MBBI->getOpcode() == PPC::MTVRSAVE); 264 MBBI->eraseFromParent(); 265 266 bool RemovedAllMTVRSAVEs = true; 267 // See if we can find and remove the MTVRSAVE instruction from all of the 268 // epilog blocks. 269 for (MachineFunction::iterator I = MF->begin(), E = MF->end(); I != E; ++I) { 270 // If last instruction is a return instruction, add an epilogue 271 if (I->isReturnBlock()) { 272 bool FoundIt = false; 273 for (MBBI = I->end(); MBBI != I->begin(); ) { 274 --MBBI; 275 if (MBBI->getOpcode() == PPC::MTVRSAVE) { 276 MBBI->eraseFromParent(); // remove it. 277 FoundIt = true; 278 break; 279 } 280 } 281 RemovedAllMTVRSAVEs &= FoundIt; 282 } 283 } 284 285 // If we found and removed all MTVRSAVE instructions, remove the read of 286 // VRSAVE as well. 287 if (RemovedAllMTVRSAVEs) { 288 MBBI = MI; 289 assert(MBBI != Entry->begin() && "UPDATE_VRSAVE is first instr in block?"); 290 --MBBI; 291 assert(MBBI->getOpcode() == PPC::MFVRSAVE && "VRSAVE instrs wandered?"); 292 MBBI->eraseFromParent(); 293 } 294 295 // Finally, nuke the UPDATE_VRSAVE. 296 MI.eraseFromParent(); 297 } 298 299 // HandleVRSaveUpdate - MI is the UPDATE_VRSAVE instruction introduced by the 300 // instruction selector. Based on the vector registers that have been used, 301 // transform this into the appropriate ORI instruction. 302 static void HandleVRSaveUpdate(MachineInstr &MI, const TargetInstrInfo &TII) { 303 MachineFunction *MF = MI.getParent()->getParent(); 304 const TargetRegisterInfo *TRI = MF->getSubtarget().getRegisterInfo(); 305 DebugLoc dl = MI.getDebugLoc(); 306 307 const MachineRegisterInfo &MRI = MF->getRegInfo(); 308 unsigned UsedRegMask = 0; 309 for (unsigned i = 0; i != 32; ++i) 310 if (MRI.isPhysRegModified(VRRegNo[i])) 311 UsedRegMask |= 1 << (31-i); 312 313 // Live in and live out values already must be in the mask, so don't bother 314 // marking them. 315 for (std::pair<unsigned, unsigned> LI : MF->getRegInfo().liveins()) { 316 unsigned RegNo = TRI->getEncodingValue(LI.first); 317 if (VRRegNo[RegNo] == LI.first) // If this really is a vector reg. 318 UsedRegMask &= ~(1 << (31-RegNo)); // Doesn't need to be marked. 319 } 320 321 // Live out registers appear as use operands on return instructions. 322 for (MachineFunction::const_iterator BI = MF->begin(), BE = MF->end(); 323 UsedRegMask != 0 && BI != BE; ++BI) { 324 const MachineBasicBlock &MBB = *BI; 325 if (!MBB.isReturnBlock()) 326 continue; 327 const MachineInstr &Ret = MBB.back(); 328 for (unsigned I = 0, E = Ret.getNumOperands(); I != E; ++I) { 329 const MachineOperand &MO = Ret.getOperand(I); 330 if (!MO.isReg() || !PPC::VRRCRegClass.contains(MO.getReg())) 331 continue; 332 unsigned RegNo = TRI->getEncodingValue(MO.getReg()); 333 UsedRegMask &= ~(1 << (31-RegNo)); 334 } 335 } 336 337 // If no registers are used, turn this into a copy. 338 if (UsedRegMask == 0) { 339 // Remove all VRSAVE code. 340 RemoveVRSaveCode(MI); 341 return; 342 } 343 344 unsigned SrcReg = MI.getOperand(1).getReg(); 345 unsigned DstReg = MI.getOperand(0).getReg(); 346 347 if ((UsedRegMask & 0xFFFF) == UsedRegMask) { 348 if (DstReg != SrcReg) 349 BuildMI(*MI.getParent(), MI, dl, TII.get(PPC::ORI), DstReg) 350 .addReg(SrcReg) 351 .addImm(UsedRegMask); 352 else 353 BuildMI(*MI.getParent(), MI, dl, TII.get(PPC::ORI), DstReg) 354 .addReg(SrcReg, RegState::Kill) 355 .addImm(UsedRegMask); 356 } else if ((UsedRegMask & 0xFFFF0000) == UsedRegMask) { 357 if (DstReg != SrcReg) 358 BuildMI(*MI.getParent(), MI, dl, TII.get(PPC::ORIS), DstReg) 359 .addReg(SrcReg) 360 .addImm(UsedRegMask >> 16); 361 else 362 BuildMI(*MI.getParent(), MI, dl, TII.get(PPC::ORIS), DstReg) 363 .addReg(SrcReg, RegState::Kill) 364 .addImm(UsedRegMask >> 16); 365 } else { 366 if (DstReg != SrcReg) 367 BuildMI(*MI.getParent(), MI, dl, TII.get(PPC::ORIS), DstReg) 368 .addReg(SrcReg) 369 .addImm(UsedRegMask >> 16); 370 else 371 BuildMI(*MI.getParent(), MI, dl, TII.get(PPC::ORIS), DstReg) 372 .addReg(SrcReg, RegState::Kill) 373 .addImm(UsedRegMask >> 16); 374 375 BuildMI(*MI.getParent(), MI, dl, TII.get(PPC::ORI), DstReg) 376 .addReg(DstReg, RegState::Kill) 377 .addImm(UsedRegMask & 0xFFFF); 378 } 379 380 // Remove the old UPDATE_VRSAVE instruction. 381 MI.eraseFromParent(); 382 } 383 384 static bool spillsCR(const MachineFunction &MF) { 385 const PPCFunctionInfo *FuncInfo = MF.getInfo<PPCFunctionInfo>(); 386 return FuncInfo->isCRSpilled(); 387 } 388 389 static bool spillsVRSAVE(const MachineFunction &MF) { 390 const PPCFunctionInfo *FuncInfo = MF.getInfo<PPCFunctionInfo>(); 391 return FuncInfo->isVRSAVESpilled(); 392 } 393 394 static bool hasSpills(const MachineFunction &MF) { 395 const PPCFunctionInfo *FuncInfo = MF.getInfo<PPCFunctionInfo>(); 396 return FuncInfo->hasSpills(); 397 } 398 399 static bool hasNonRISpills(const MachineFunction &MF) { 400 const PPCFunctionInfo *FuncInfo = MF.getInfo<PPCFunctionInfo>(); 401 return FuncInfo->hasNonRISpills(); 402 } 403 404 /// MustSaveLR - Return true if this function requires that we save the LR 405 /// register onto the stack in the prolog and restore it in the epilog of the 406 /// function. 407 static bool MustSaveLR(const MachineFunction &MF, unsigned LR) { 408 const PPCFunctionInfo *MFI = MF.getInfo<PPCFunctionInfo>(); 409 410 // We need a save/restore of LR if there is any def of LR (which is 411 // defined by calls, including the PIC setup sequence), or if there is 412 // some use of the LR stack slot (e.g. for builtin_return_address). 413 // (LR comes in 32 and 64 bit versions.) 414 MachineRegisterInfo::def_iterator RI = MF.getRegInfo().def_begin(LR); 415 return RI !=MF.getRegInfo().def_end() || MFI->isLRStoreRequired(); 416 } 417 418 /// determineFrameLayout - Determine the size of the frame and maximum call 419 /// frame size. 420 unsigned PPCFrameLowering::determineFrameLayout(MachineFunction &MF, 421 bool UpdateMF, 422 bool UseEstimate) const { 423 MachineFrameInfo &MFI = MF.getFrameInfo(); 424 425 // Get the number of bytes to allocate from the FrameInfo 426 unsigned FrameSize = 427 UseEstimate ? MFI.estimateStackSize(MF) : MFI.getStackSize(); 428 429 // Get stack alignments. The frame must be aligned to the greatest of these: 430 unsigned TargetAlign = getStackAlignment(); // alignment required per the ABI 431 unsigned MaxAlign = MFI.getMaxAlignment(); // algmt required by data in frame 432 unsigned AlignMask = std::max(MaxAlign, TargetAlign) - 1; 433 434 const PPCRegisterInfo *RegInfo = Subtarget.getRegisterInfo(); 435 436 unsigned LR = RegInfo->getRARegister(); 437 bool DisableRedZone = MF.getFunction().hasFnAttribute(Attribute::NoRedZone); 438 bool CanUseRedZone = !MFI.hasVarSizedObjects() && // No dynamic alloca. 439 !MFI.adjustsStack() && // No calls. 440 !MustSaveLR(MF, LR) && // No need to save LR. 441 !RegInfo->hasBasePointer(MF); // No special alignment. 442 443 // Note: for PPC32 SVR4ABI (Non-DarwinABI), we can still generate stackless 444 // code if all local vars are reg-allocated. 445 bool FitsInRedZone = FrameSize <= Subtarget.getRedZoneSize(); 446 447 // Check whether we can skip adjusting the stack pointer (by using red zone) 448 if (!DisableRedZone && CanUseRedZone && FitsInRedZone) { 449 // No need for frame 450 if (UpdateMF) 451 MFI.setStackSize(0); 452 return 0; 453 } 454 455 // Get the maximum call frame size of all the calls. 456 unsigned maxCallFrameSize = MFI.getMaxCallFrameSize(); 457 458 // Maximum call frame needs to be at least big enough for linkage area. 459 unsigned minCallFrameSize = getLinkageSize(); 460 maxCallFrameSize = std::max(maxCallFrameSize, minCallFrameSize); 461 462 // If we have dynamic alloca then maxCallFrameSize needs to be aligned so 463 // that allocations will be aligned. 464 if (MFI.hasVarSizedObjects()) 465 maxCallFrameSize = (maxCallFrameSize + AlignMask) & ~AlignMask; 466 467 // Update maximum call frame size. 468 if (UpdateMF) 469 MFI.setMaxCallFrameSize(maxCallFrameSize); 470 471 // Include call frame size in total. 472 FrameSize += maxCallFrameSize; 473 474 // Make sure the frame is aligned. 475 FrameSize = (FrameSize + AlignMask) & ~AlignMask; 476 477 // Update frame info. 478 if (UpdateMF) 479 MFI.setStackSize(FrameSize); 480 481 return FrameSize; 482 } 483 484 // hasFP - Return true if the specified function actually has a dedicated frame 485 // pointer register. 486 bool PPCFrameLowering::hasFP(const MachineFunction &MF) const { 487 const MachineFrameInfo &MFI = MF.getFrameInfo(); 488 // FIXME: This is pretty much broken by design: hasFP() might be called really 489 // early, before the stack layout was calculated and thus hasFP() might return 490 // true or false here depending on the time of call. 491 return (MFI.getStackSize()) && needsFP(MF); 492 } 493 494 // needsFP - Return true if the specified function should have a dedicated frame 495 // pointer register. This is true if the function has variable sized allocas or 496 // if frame pointer elimination is disabled. 497 bool PPCFrameLowering::needsFP(const MachineFunction &MF) const { 498 const MachineFrameInfo &MFI = MF.getFrameInfo(); 499 500 // Naked functions have no stack frame pushed, so we don't have a frame 501 // pointer. 502 if (MF.getFunction().hasFnAttribute(Attribute::Naked)) 503 return false; 504 505 return MF.getTarget().Options.DisableFramePointerElim(MF) || 506 MFI.hasVarSizedObjects() || MFI.hasStackMap() || MFI.hasPatchPoint() || 507 (MF.getTarget().Options.GuaranteedTailCallOpt && 508 MF.getInfo<PPCFunctionInfo>()->hasFastCall()); 509 } 510 511 void PPCFrameLowering::replaceFPWithRealFP(MachineFunction &MF) const { 512 bool is31 = needsFP(MF); 513 unsigned FPReg = is31 ? PPC::R31 : PPC::R1; 514 unsigned FP8Reg = is31 ? PPC::X31 : PPC::X1; 515 516 const PPCRegisterInfo *RegInfo = Subtarget.getRegisterInfo(); 517 bool HasBP = RegInfo->hasBasePointer(MF); 518 unsigned BPReg = HasBP ? (unsigned) RegInfo->getBaseRegister(MF) : FPReg; 519 unsigned BP8Reg = HasBP ? (unsigned) PPC::X30 : FP8Reg; 520 521 for (MachineFunction::iterator BI = MF.begin(), BE = MF.end(); 522 BI != BE; ++BI) 523 for (MachineBasicBlock::iterator MBBI = BI->end(); MBBI != BI->begin(); ) { 524 --MBBI; 525 for (unsigned I = 0, E = MBBI->getNumOperands(); I != E; ++I) { 526 MachineOperand &MO = MBBI->getOperand(I); 527 if (!MO.isReg()) 528 continue; 529 530 switch (MO.getReg()) { 531 case PPC::FP: 532 MO.setReg(FPReg); 533 break; 534 case PPC::FP8: 535 MO.setReg(FP8Reg); 536 break; 537 case PPC::BP: 538 MO.setReg(BPReg); 539 break; 540 case PPC::BP8: 541 MO.setReg(BP8Reg); 542 break; 543 544 } 545 } 546 } 547 } 548 549 /* This function will do the following: 550 - If MBB is an entry or exit block, set SR1 and SR2 to R0 and R12 551 respectively (defaults recommended by the ABI) and return true 552 - If MBB is not an entry block, initialize the register scavenger and look 553 for available registers. 554 - If the defaults (R0/R12) are available, return true 555 - If TwoUniqueRegsRequired is set to true, it looks for two unique 556 registers. Otherwise, look for a single available register. 557 - If the required registers are found, set SR1 and SR2 and return true. 558 - If the required registers are not found, set SR2 or both SR1 and SR2 to 559 PPC::NoRegister and return false. 560 561 Note that if both SR1 and SR2 are valid parameters and TwoUniqueRegsRequired 562 is not set, this function will attempt to find two different registers, but 563 still return true if only one register is available (and set SR1 == SR2). 564 */ 565 bool 566 PPCFrameLowering::findScratchRegister(MachineBasicBlock *MBB, 567 bool UseAtEnd, 568 bool TwoUniqueRegsRequired, 569 unsigned *SR1, 570 unsigned *SR2) const { 571 RegScavenger RS; 572 unsigned R0 = Subtarget.isPPC64() ? PPC::X0 : PPC::R0; 573 unsigned R12 = Subtarget.isPPC64() ? PPC::X12 : PPC::R12; 574 575 // Set the defaults for the two scratch registers. 576 if (SR1) 577 *SR1 = R0; 578 579 if (SR2) { 580 assert (SR1 && "Asking for the second scratch register but not the first?"); 581 *SR2 = R12; 582 } 583 584 // If MBB is an entry or exit block, use R0 and R12 as the scratch registers. 585 if ((UseAtEnd && MBB->isReturnBlock()) || 586 (!UseAtEnd && (&MBB->getParent()->front() == MBB))) 587 return true; 588 589 RS.enterBasicBlock(*MBB); 590 591 if (UseAtEnd && !MBB->empty()) { 592 // The scratch register will be used at the end of the block, so must 593 // consider all registers used within the block 594 595 MachineBasicBlock::iterator MBBI = MBB->getFirstTerminator(); 596 // If no terminator, back iterator up to previous instruction. 597 if (MBBI == MBB->end()) 598 MBBI = std::prev(MBBI); 599 600 if (MBBI != MBB->begin()) 601 RS.forward(MBBI); 602 } 603 604 // If the two registers are available, we're all good. 605 // Note that we only return here if both R0 and R12 are available because 606 // although the function may not require two unique registers, it may benefit 607 // from having two so we should try to provide them. 608 if (!RS.isRegUsed(R0) && !RS.isRegUsed(R12)) 609 return true; 610 611 // Get the list of callee-saved registers for the target. 612 const PPCRegisterInfo *RegInfo = Subtarget.getRegisterInfo(); 613 const MCPhysReg *CSRegs = RegInfo->getCalleeSavedRegs(MBB->getParent()); 614 615 // Get all the available registers in the block. 616 BitVector BV = RS.getRegsAvailable(Subtarget.isPPC64() ? &PPC::G8RCRegClass : 617 &PPC::GPRCRegClass); 618 619 // We shouldn't use callee-saved registers as scratch registers as they may be 620 // available when looking for a candidate block for shrink wrapping but not 621 // available when the actual prologue/epilogue is being emitted because they 622 // were added as live-in to the prologue block by PrologueEpilogueInserter. 623 for (int i = 0; CSRegs[i]; ++i) 624 BV.reset(CSRegs[i]); 625 626 // Set the first scratch register to the first available one. 627 if (SR1) { 628 int FirstScratchReg = BV.find_first(); 629 *SR1 = FirstScratchReg == -1 ? (unsigned)PPC::NoRegister : FirstScratchReg; 630 } 631 632 // If there is another one available, set the second scratch register to that. 633 // Otherwise, set it to either PPC::NoRegister if this function requires two 634 // or to whatever SR1 is set to if this function doesn't require two. 635 if (SR2) { 636 int SecondScratchReg = BV.find_next(*SR1); 637 if (SecondScratchReg != -1) 638 *SR2 = SecondScratchReg; 639 else 640 *SR2 = TwoUniqueRegsRequired ? (unsigned)PPC::NoRegister : *SR1; 641 } 642 643 // Now that we've done our best to provide both registers, double check 644 // whether we were unable to provide enough. 645 if (BV.count() < (TwoUniqueRegsRequired ? 2U : 1U)) 646 return false; 647 648 return true; 649 } 650 651 // We need a scratch register for spilling LR and for spilling CR. By default, 652 // we use two scratch registers to hide latency. However, if only one scratch 653 // register is available, we can adjust for that by not overlapping the spill 654 // code. However, if we need to realign the stack (i.e. have a base pointer) 655 // and the stack frame is large, we need two scratch registers. 656 bool 657 PPCFrameLowering::twoUniqueScratchRegsRequired(MachineBasicBlock *MBB) const { 658 const PPCRegisterInfo *RegInfo = Subtarget.getRegisterInfo(); 659 MachineFunction &MF = *(MBB->getParent()); 660 bool HasBP = RegInfo->hasBasePointer(MF); 661 unsigned FrameSize = determineFrameLayout(MF, false); 662 int NegFrameSize = -FrameSize; 663 bool IsLargeFrame = !isInt<16>(NegFrameSize); 664 MachineFrameInfo &MFI = MF.getFrameInfo(); 665 unsigned MaxAlign = MFI.getMaxAlignment(); 666 bool HasRedZone = Subtarget.isPPC64() || !Subtarget.isSVR4ABI(); 667 668 return (IsLargeFrame || !HasRedZone) && HasBP && MaxAlign > 1; 669 } 670 671 bool PPCFrameLowering::canUseAsPrologue(const MachineBasicBlock &MBB) const { 672 MachineBasicBlock *TmpMBB = const_cast<MachineBasicBlock *>(&MBB); 673 674 return findScratchRegister(TmpMBB, false, 675 twoUniqueScratchRegsRequired(TmpMBB)); 676 } 677 678 bool PPCFrameLowering::canUseAsEpilogue(const MachineBasicBlock &MBB) const { 679 MachineBasicBlock *TmpMBB = const_cast<MachineBasicBlock *>(&MBB); 680 681 return findScratchRegister(TmpMBB, true); 682 } 683 684 void PPCFrameLowering::emitPrologue(MachineFunction &MF, 685 MachineBasicBlock &MBB) const { 686 MachineBasicBlock::iterator MBBI = MBB.begin(); 687 MachineFrameInfo &MFI = MF.getFrameInfo(); 688 const PPCInstrInfo &TII = *Subtarget.getInstrInfo(); 689 const PPCRegisterInfo *RegInfo = Subtarget.getRegisterInfo(); 690 691 MachineModuleInfo &MMI = MF.getMMI(); 692 const MCRegisterInfo *MRI = MMI.getContext().getRegisterInfo(); 693 DebugLoc dl; 694 bool needsCFI = MMI.hasDebugInfo() || 695 MF.getFunction().needsUnwindTableEntry(); 696 697 // Get processor type. 698 bool isPPC64 = Subtarget.isPPC64(); 699 // Get the ABI. 700 bool isSVR4ABI = Subtarget.isSVR4ABI(); 701 bool isELFv2ABI = Subtarget.isELFv2ABI(); 702 assert((Subtarget.isDarwinABI() || isSVR4ABI) && 703 "Currently only Darwin and SVR4 ABIs are supported for PowerPC."); 704 705 // Scan the prolog, looking for an UPDATE_VRSAVE instruction. If we find it, 706 // process it. 707 if (!isSVR4ABI) 708 for (unsigned i = 0; MBBI != MBB.end(); ++i, ++MBBI) { 709 if (MBBI->getOpcode() == PPC::UPDATE_VRSAVE) { 710 HandleVRSaveUpdate(*MBBI, TII); 711 break; 712 } 713 } 714 715 // Move MBBI back to the beginning of the prologue block. 716 MBBI = MBB.begin(); 717 718 // Work out frame sizes. 719 unsigned FrameSize = determineFrameLayout(MF); 720 int NegFrameSize = -FrameSize; 721 if (!isInt<32>(NegFrameSize)) 722 llvm_unreachable("Unhandled stack size!"); 723 724 if (MFI.isFrameAddressTaken()) 725 replaceFPWithRealFP(MF); 726 727 // Check if the link register (LR) must be saved. 728 PPCFunctionInfo *FI = MF.getInfo<PPCFunctionInfo>(); 729 bool MustSaveLR = FI->mustSaveLR(); 730 const SmallVectorImpl<unsigned> &MustSaveCRs = FI->getMustSaveCRs(); 731 bool MustSaveCR = !MustSaveCRs.empty(); 732 // Do we have a frame pointer and/or base pointer for this function? 733 bool HasFP = hasFP(MF); 734 bool HasBP = RegInfo->hasBasePointer(MF); 735 bool HasRedZone = isPPC64 || !isSVR4ABI; 736 737 unsigned SPReg = isPPC64 ? PPC::X1 : PPC::R1; 738 unsigned BPReg = RegInfo->getBaseRegister(MF); 739 unsigned FPReg = isPPC64 ? PPC::X31 : PPC::R31; 740 unsigned LRReg = isPPC64 ? PPC::LR8 : PPC::LR; 741 unsigned ScratchReg = 0; 742 unsigned TempReg = isPPC64 ? PPC::X12 : PPC::R12; // another scratch reg 743 // ...(R12/X12 is volatile in both Darwin & SVR4, & can't be a function arg.) 744 const MCInstrDesc& MFLRInst = TII.get(isPPC64 ? PPC::MFLR8 745 : PPC::MFLR ); 746 const MCInstrDesc& StoreInst = TII.get(isPPC64 ? PPC::STD 747 : PPC::STW ); 748 const MCInstrDesc& StoreUpdtInst = TII.get(isPPC64 ? PPC::STDU 749 : PPC::STWU ); 750 const MCInstrDesc& StoreUpdtIdxInst = TII.get(isPPC64 ? PPC::STDUX 751 : PPC::STWUX); 752 const MCInstrDesc& LoadImmShiftedInst = TII.get(isPPC64 ? PPC::LIS8 753 : PPC::LIS ); 754 const MCInstrDesc& OrImmInst = TII.get(isPPC64 ? PPC::ORI8 755 : PPC::ORI ); 756 const MCInstrDesc& OrInst = TII.get(isPPC64 ? PPC::OR8 757 : PPC::OR ); 758 const MCInstrDesc& SubtractCarryingInst = TII.get(isPPC64 ? PPC::SUBFC8 759 : PPC::SUBFC); 760 const MCInstrDesc& SubtractImmCarryingInst = TII.get(isPPC64 ? PPC::SUBFIC8 761 : PPC::SUBFIC); 762 763 // Regarding this assert: Even though LR is saved in the caller's frame (i.e., 764 // LROffset is positive), that slot is callee-owned. Because PPC32 SVR4 has no 765 // Red Zone, an asynchronous event (a form of "callee") could claim a frame & 766 // overwrite it, so PPC32 SVR4 must claim at least a minimal frame to save LR. 767 assert((isPPC64 || !isSVR4ABI || !(!FrameSize && (MustSaveLR || HasFP))) && 768 "FrameSize must be >0 to save/restore the FP or LR for 32-bit SVR4."); 769 770 // Using the same bool variable as below to suppress compiler warnings. 771 bool SingleScratchReg = 772 findScratchRegister(&MBB, false, twoUniqueScratchRegsRequired(&MBB), 773 &ScratchReg, &TempReg); 774 assert(SingleScratchReg && 775 "Required number of registers not available in this block"); 776 777 SingleScratchReg = ScratchReg == TempReg; 778 779 int LROffset = getReturnSaveOffset(); 780 781 int FPOffset = 0; 782 if (HasFP) { 783 if (isSVR4ABI) { 784 MachineFrameInfo &MFI = MF.getFrameInfo(); 785 int FPIndex = FI->getFramePointerSaveIndex(); 786 assert(FPIndex && "No Frame Pointer Save Slot!"); 787 FPOffset = MFI.getObjectOffset(FPIndex); 788 } else { 789 FPOffset = getFramePointerSaveOffset(); 790 } 791 } 792 793 int BPOffset = 0; 794 if (HasBP) { 795 if (isSVR4ABI) { 796 MachineFrameInfo &MFI = MF.getFrameInfo(); 797 int BPIndex = FI->getBasePointerSaveIndex(); 798 assert(BPIndex && "No Base Pointer Save Slot!"); 799 BPOffset = MFI.getObjectOffset(BPIndex); 800 } else { 801 BPOffset = getBasePointerSaveOffset(); 802 } 803 } 804 805 int PBPOffset = 0; 806 if (FI->usesPICBase()) { 807 MachineFrameInfo &MFI = MF.getFrameInfo(); 808 int PBPIndex = FI->getPICBasePointerSaveIndex(); 809 assert(PBPIndex && "No PIC Base Pointer Save Slot!"); 810 PBPOffset = MFI.getObjectOffset(PBPIndex); 811 } 812 813 // Get stack alignments. 814 unsigned MaxAlign = MFI.getMaxAlignment(); 815 if (HasBP && MaxAlign > 1) 816 assert(isPowerOf2_32(MaxAlign) && isInt<16>(MaxAlign) && 817 "Invalid alignment!"); 818 819 // Frames of 32KB & larger require special handling because they cannot be 820 // indexed into with a simple STDU/STWU/STD/STW immediate offset operand. 821 bool isLargeFrame = !isInt<16>(NegFrameSize); 822 823 assert((isPPC64 || !MustSaveCR) && 824 "Prologue CR saving supported only in 64-bit mode"); 825 826 // Check if we can move the stack update instruction (stdu) down the prologue 827 // past the callee saves. Hopefully this will avoid the situation where the 828 // saves are waiting for the update on the store with update to complete. 829 MachineBasicBlock::iterator StackUpdateLoc = MBBI; 830 bool MovingStackUpdateDown = false; 831 // This optimization has a number of guards. At this point we are being very 832 // cautious and we do not try to do this when we have a fast call or 833 // we are using PIC base or we are using a frame pointer or a base pointer. 834 // It would be possible to turn on this optimization under these conditions 835 // as well but it would require further modifications to the prologue and 836 // epilogue. For example, if we want to turn on this optimization for 837 // functions that use frame pointers we would have to take into consideration 838 // the fact that spills to the stack may be using r30 instead of r1. 839 // Aside form that we need to have a non-zero frame and we need to have a 840 // non-large frame size. Notice that we did not use !isLargeFrame but we used 841 // isInt<16>(FrameSize) instead. This is important because this guard has to 842 // be identical to the one in the epilogue and in the epilogue the variable 843 // is defined as bool isLargeFrame = !isInt<16>(FrameSize); 844 if (FrameSize && !FI->hasFastCall() && !FI->usesPICBase() && !HasFP && 845 !HasBP && isInt<16>(FrameSize)) { 846 const std::vector<CalleeSavedInfo> &Info = MFI.getCalleeSavedInfo(); 847 for (int i=0; i<Info.size(); i++) { 848 int FrIdx = Info[i].getFrameIdx(); 849 if (FrIdx < 0) { 850 if (MFI.isFixedObjectIndex(FrIdx) && MFI.getObjectOffset(FrIdx) < 0) { 851 MFI.setObjectOffset(FrIdx, MFI.getObjectOffset(FrIdx) + NegFrameSize); 852 StackUpdateLoc++; 853 MovingStackUpdateDown = true; 854 } 855 } 856 } 857 } 858 859 // If we need to spill the CR and the LR but we don't have two separate 860 // registers available, we must spill them one at a time 861 if (MustSaveCR && SingleScratchReg && MustSaveLR) { 862 // In the ELFv2 ABI, we are not required to save all CR fields. 863 // If only one or two CR fields are clobbered, it is more efficient to use 864 // mfocrf to selectively save just those fields, because mfocrf has short 865 // latency compares to mfcr. 866 unsigned MfcrOpcode = PPC::MFCR8; 867 unsigned CrState = RegState::ImplicitKill; 868 if (isELFv2ABI && MustSaveCRs.size() == 1) { 869 MfcrOpcode = PPC::MFOCRF8; 870 CrState = RegState::Kill; 871 } 872 MachineInstrBuilder MIB = 873 BuildMI(MBB, MBBI, dl, TII.get(MfcrOpcode), TempReg); 874 for (unsigned i = 0, e = MustSaveCRs.size(); i != e; ++i) 875 MIB.addReg(MustSaveCRs[i], CrState); 876 BuildMI(MBB, MBBI, dl, TII.get(PPC::STW8)) 877 .addReg(TempReg, getKillRegState(true)) 878 .addImm(8) 879 .addReg(SPReg); 880 } 881 882 if (MustSaveLR) 883 BuildMI(MBB, MBBI, dl, MFLRInst, ScratchReg); 884 885 if (MustSaveCR && 886 !(SingleScratchReg && MustSaveLR)) { // will only occur for PPC64 887 // In the ELFv2 ABI, we are not required to save all CR fields. 888 // If only one or two CR fields are clobbered, it is more efficient to use 889 // mfocrf to selectively save just those fields, because mfocrf has short 890 // latency compares to mfcr. 891 unsigned MfcrOpcode = PPC::MFCR8; 892 unsigned CrState = RegState::ImplicitKill; 893 if (isELFv2ABI && MustSaveCRs.size() == 1) { 894 MfcrOpcode = PPC::MFOCRF8; 895 CrState = RegState::Kill; 896 } 897 MachineInstrBuilder MIB = 898 BuildMI(MBB, MBBI, dl, TII.get(MfcrOpcode), TempReg); 899 for (unsigned i = 0, e = MustSaveCRs.size(); i != e; ++i) 900 MIB.addReg(MustSaveCRs[i], CrState); 901 } 902 903 if (HasRedZone) { 904 if (HasFP) 905 BuildMI(MBB, MBBI, dl, StoreInst) 906 .addReg(FPReg) 907 .addImm(FPOffset) 908 .addReg(SPReg); 909 if (FI->usesPICBase()) 910 BuildMI(MBB, MBBI, dl, StoreInst) 911 .addReg(PPC::R30) 912 .addImm(PBPOffset) 913 .addReg(SPReg); 914 if (HasBP) 915 BuildMI(MBB, MBBI, dl, StoreInst) 916 .addReg(BPReg) 917 .addImm(BPOffset) 918 .addReg(SPReg); 919 } 920 921 if (MustSaveLR) 922 BuildMI(MBB, StackUpdateLoc, dl, StoreInst) 923 .addReg(ScratchReg, getKillRegState(true)) 924 .addImm(LROffset) 925 .addReg(SPReg); 926 927 if (MustSaveCR && 928 !(SingleScratchReg && MustSaveLR)) { // will only occur for PPC64 929 assert(HasRedZone && "A red zone is always available on PPC64"); 930 BuildMI(MBB, MBBI, dl, TII.get(PPC::STW8)) 931 .addReg(TempReg, getKillRegState(true)) 932 .addImm(8) 933 .addReg(SPReg); 934 } 935 936 // Skip the rest if this is a leaf function & all spills fit in the Red Zone. 937 if (!FrameSize) 938 return; 939 940 // Adjust stack pointer: r1 += NegFrameSize. 941 // If there is a preferred stack alignment, align R1 now 942 943 if (HasBP && HasRedZone) { 944 // Save a copy of r1 as the base pointer. 945 BuildMI(MBB, MBBI, dl, OrInst, BPReg) 946 .addReg(SPReg) 947 .addReg(SPReg); 948 } 949 950 // Have we generated a STUX instruction to claim stack frame? If so, 951 // the negated frame size will be placed in ScratchReg. 952 bool HasSTUX = false; 953 954 // This condition must be kept in sync with canUseAsPrologue. 955 if (HasBP && MaxAlign > 1) { 956 if (isPPC64) 957 BuildMI(MBB, MBBI, dl, TII.get(PPC::RLDICL), ScratchReg) 958 .addReg(SPReg) 959 .addImm(0) 960 .addImm(64 - Log2_32(MaxAlign)); 961 else // PPC32... 962 BuildMI(MBB, MBBI, dl, TII.get(PPC::RLWINM), ScratchReg) 963 .addReg(SPReg) 964 .addImm(0) 965 .addImm(32 - Log2_32(MaxAlign)) 966 .addImm(31); 967 if (!isLargeFrame) { 968 BuildMI(MBB, MBBI, dl, SubtractImmCarryingInst, ScratchReg) 969 .addReg(ScratchReg, RegState::Kill) 970 .addImm(NegFrameSize); 971 } else { 972 assert(!SingleScratchReg && "Only a single scratch reg available"); 973 BuildMI(MBB, MBBI, dl, LoadImmShiftedInst, TempReg) 974 .addImm(NegFrameSize >> 16); 975 BuildMI(MBB, MBBI, dl, OrImmInst, TempReg) 976 .addReg(TempReg, RegState::Kill) 977 .addImm(NegFrameSize & 0xFFFF); 978 BuildMI(MBB, MBBI, dl, SubtractCarryingInst, ScratchReg) 979 .addReg(ScratchReg, RegState::Kill) 980 .addReg(TempReg, RegState::Kill); 981 } 982 983 BuildMI(MBB, MBBI, dl, StoreUpdtIdxInst, SPReg) 984 .addReg(SPReg, RegState::Kill) 985 .addReg(SPReg) 986 .addReg(ScratchReg); 987 HasSTUX = true; 988 989 } else if (!isLargeFrame) { 990 BuildMI(MBB, StackUpdateLoc, dl, StoreUpdtInst, SPReg) 991 .addReg(SPReg) 992 .addImm(NegFrameSize) 993 .addReg(SPReg); 994 995 } else { 996 BuildMI(MBB, MBBI, dl, LoadImmShiftedInst, ScratchReg) 997 .addImm(NegFrameSize >> 16); 998 BuildMI(MBB, MBBI, dl, OrImmInst, ScratchReg) 999 .addReg(ScratchReg, RegState::Kill) 1000 .addImm(NegFrameSize & 0xFFFF); 1001 BuildMI(MBB, MBBI, dl, StoreUpdtIdxInst, SPReg) 1002 .addReg(SPReg, RegState::Kill) 1003 .addReg(SPReg) 1004 .addReg(ScratchReg); 1005 HasSTUX = true; 1006 } 1007 1008 if (!HasRedZone) { 1009 assert(!isPPC64 && "A red zone is always available on PPC64"); 1010 if (HasSTUX) { 1011 // The negated frame size is in ScratchReg, and the SPReg has been 1012 // decremented by the frame size: SPReg = old SPReg + ScratchReg. 1013 // Since FPOffset, PBPOffset, etc. are relative to the beginning of 1014 // the stack frame (i.e. the old SP), ideally, we would put the old 1015 // SP into a register and use it as the base for the stores. The 1016 // problem is that the only available register may be ScratchReg, 1017 // which could be R0, and R0 cannot be used as a base address. 1018 1019 // First, set ScratchReg to the old SP. This may need to be modified 1020 // later. 1021 BuildMI(MBB, MBBI, dl, TII.get(PPC::SUBF), ScratchReg) 1022 .addReg(ScratchReg, RegState::Kill) 1023 .addReg(SPReg); 1024 1025 if (ScratchReg == PPC::R0) { 1026 // R0 cannot be used as a base register, but it can be used as an 1027 // index in a store-indexed. 1028 int LastOffset = 0; 1029 if (HasFP) { 1030 // R0 += (FPOffset-LastOffset). 1031 // Need addic, since addi treats R0 as 0. 1032 BuildMI(MBB, MBBI, dl, TII.get(PPC::ADDIC), ScratchReg) 1033 .addReg(ScratchReg) 1034 .addImm(FPOffset-LastOffset); 1035 LastOffset = FPOffset; 1036 // Store FP into *R0. 1037 BuildMI(MBB, MBBI, dl, TII.get(PPC::STWX)) 1038 .addReg(FPReg, RegState::Kill) // Save FP. 1039 .addReg(PPC::ZERO) 1040 .addReg(ScratchReg); // This will be the index (R0 is ok here). 1041 } 1042 if (FI->usesPICBase()) { 1043 // R0 += (PBPOffset-LastOffset). 1044 BuildMI(MBB, MBBI, dl, TII.get(PPC::ADDIC), ScratchReg) 1045 .addReg(ScratchReg) 1046 .addImm(PBPOffset-LastOffset); 1047 LastOffset = PBPOffset; 1048 BuildMI(MBB, MBBI, dl, TII.get(PPC::STWX)) 1049 .addReg(PPC::R30, RegState::Kill) // Save PIC base pointer. 1050 .addReg(PPC::ZERO) 1051 .addReg(ScratchReg); // This will be the index (R0 is ok here). 1052 } 1053 if (HasBP) { 1054 // R0 += (BPOffset-LastOffset). 1055 BuildMI(MBB, MBBI, dl, TII.get(PPC::ADDIC), ScratchReg) 1056 .addReg(ScratchReg) 1057 .addImm(BPOffset-LastOffset); 1058 LastOffset = BPOffset; 1059 BuildMI(MBB, MBBI, dl, TII.get(PPC::STWX)) 1060 .addReg(BPReg, RegState::Kill) // Save BP. 1061 .addReg(PPC::ZERO) 1062 .addReg(ScratchReg); // This will be the index (R0 is ok here). 1063 // BP = R0-LastOffset 1064 BuildMI(MBB, MBBI, dl, TII.get(PPC::ADDIC), BPReg) 1065 .addReg(ScratchReg, RegState::Kill) 1066 .addImm(-LastOffset); 1067 } 1068 } else { 1069 // ScratchReg is not R0, so use it as the base register. It is 1070 // already set to the old SP, so we can use the offsets directly. 1071 1072 // Now that the stack frame has been allocated, save all the necessary 1073 // registers using ScratchReg as the base address. 1074 if (HasFP) 1075 BuildMI(MBB, MBBI, dl, StoreInst) 1076 .addReg(FPReg) 1077 .addImm(FPOffset) 1078 .addReg(ScratchReg); 1079 if (FI->usesPICBase()) 1080 BuildMI(MBB, MBBI, dl, StoreInst) 1081 .addReg(PPC::R30) 1082 .addImm(PBPOffset) 1083 .addReg(ScratchReg); 1084 if (HasBP) { 1085 BuildMI(MBB, MBBI, dl, StoreInst) 1086 .addReg(BPReg) 1087 .addImm(BPOffset) 1088 .addReg(ScratchReg); 1089 BuildMI(MBB, MBBI, dl, OrInst, BPReg) 1090 .addReg(ScratchReg, RegState::Kill) 1091 .addReg(ScratchReg); 1092 } 1093 } 1094 } else { 1095 // The frame size is a known 16-bit constant (fitting in the immediate 1096 // field of STWU). To be here we have to be compiling for PPC32. 1097 // Since the SPReg has been decreased by FrameSize, add it back to each 1098 // offset. 1099 if (HasFP) 1100 BuildMI(MBB, MBBI, dl, StoreInst) 1101 .addReg(FPReg) 1102 .addImm(FrameSize + FPOffset) 1103 .addReg(SPReg); 1104 if (FI->usesPICBase()) 1105 BuildMI(MBB, MBBI, dl, StoreInst) 1106 .addReg(PPC::R30) 1107 .addImm(FrameSize + PBPOffset) 1108 .addReg(SPReg); 1109 if (HasBP) { 1110 BuildMI(MBB, MBBI, dl, StoreInst) 1111 .addReg(BPReg) 1112 .addImm(FrameSize + BPOffset) 1113 .addReg(SPReg); 1114 BuildMI(MBB, MBBI, dl, TII.get(PPC::ADDI), BPReg) 1115 .addReg(SPReg) 1116 .addImm(FrameSize); 1117 } 1118 } 1119 } 1120 1121 // Add Call Frame Information for the instructions we generated above. 1122 if (needsCFI) { 1123 unsigned CFIIndex; 1124 1125 if (HasBP) { 1126 // Define CFA in terms of BP. Do this in preference to using FP/SP, 1127 // because if the stack needed aligning then CFA won't be at a fixed 1128 // offset from FP/SP. 1129 unsigned Reg = MRI->getDwarfRegNum(BPReg, true); 1130 CFIIndex = MF.addFrameInst( 1131 MCCFIInstruction::createDefCfaRegister(nullptr, Reg)); 1132 } else { 1133 // Adjust the definition of CFA to account for the change in SP. 1134 assert(NegFrameSize); 1135 CFIIndex = MF.addFrameInst( 1136 MCCFIInstruction::createDefCfaOffset(nullptr, NegFrameSize)); 1137 } 1138 BuildMI(MBB, MBBI, dl, TII.get(TargetOpcode::CFI_INSTRUCTION)) 1139 .addCFIIndex(CFIIndex); 1140 1141 if (HasFP) { 1142 // Describe where FP was saved, at a fixed offset from CFA. 1143 unsigned Reg = MRI->getDwarfRegNum(FPReg, true); 1144 CFIIndex = MF.addFrameInst( 1145 MCCFIInstruction::createOffset(nullptr, Reg, FPOffset)); 1146 BuildMI(MBB, MBBI, dl, TII.get(TargetOpcode::CFI_INSTRUCTION)) 1147 .addCFIIndex(CFIIndex); 1148 } 1149 1150 if (FI->usesPICBase()) { 1151 // Describe where FP was saved, at a fixed offset from CFA. 1152 unsigned Reg = MRI->getDwarfRegNum(PPC::R30, true); 1153 CFIIndex = MF.addFrameInst( 1154 MCCFIInstruction::createOffset(nullptr, Reg, PBPOffset)); 1155 BuildMI(MBB, MBBI, dl, TII.get(TargetOpcode::CFI_INSTRUCTION)) 1156 .addCFIIndex(CFIIndex); 1157 } 1158 1159 if (HasBP) { 1160 // Describe where BP was saved, at a fixed offset from CFA. 1161 unsigned Reg = MRI->getDwarfRegNum(BPReg, true); 1162 CFIIndex = MF.addFrameInst( 1163 MCCFIInstruction::createOffset(nullptr, Reg, BPOffset)); 1164 BuildMI(MBB, MBBI, dl, TII.get(TargetOpcode::CFI_INSTRUCTION)) 1165 .addCFIIndex(CFIIndex); 1166 } 1167 1168 if (MustSaveLR) { 1169 // Describe where LR was saved, at a fixed offset from CFA. 1170 unsigned Reg = MRI->getDwarfRegNum(LRReg, true); 1171 CFIIndex = MF.addFrameInst( 1172 MCCFIInstruction::createOffset(nullptr, Reg, LROffset)); 1173 BuildMI(MBB, MBBI, dl, TII.get(TargetOpcode::CFI_INSTRUCTION)) 1174 .addCFIIndex(CFIIndex); 1175 } 1176 } 1177 1178 // If there is a frame pointer, copy R1 into R31 1179 if (HasFP) { 1180 BuildMI(MBB, MBBI, dl, OrInst, FPReg) 1181 .addReg(SPReg) 1182 .addReg(SPReg); 1183 1184 if (!HasBP && needsCFI) { 1185 // Change the definition of CFA from SP+offset to FP+offset, because SP 1186 // will change at every alloca. 1187 unsigned Reg = MRI->getDwarfRegNum(FPReg, true); 1188 unsigned CFIIndex = MF.addFrameInst( 1189 MCCFIInstruction::createDefCfaRegister(nullptr, Reg)); 1190 1191 BuildMI(MBB, MBBI, dl, TII.get(TargetOpcode::CFI_INSTRUCTION)) 1192 .addCFIIndex(CFIIndex); 1193 } 1194 } 1195 1196 if (needsCFI) { 1197 // Describe where callee saved registers were saved, at fixed offsets from 1198 // CFA. 1199 const std::vector<CalleeSavedInfo> &CSI = MFI.getCalleeSavedInfo(); 1200 for (unsigned I = 0, E = CSI.size(); I != E; ++I) { 1201 unsigned Reg = CSI[I].getReg(); 1202 if (Reg == PPC::LR || Reg == PPC::LR8 || Reg == PPC::RM) continue; 1203 1204 // This is a bit of a hack: CR2LT, CR2GT, CR2EQ and CR2UN are just 1205 // subregisters of CR2. We just need to emit a move of CR2. 1206 if (PPC::CRBITRCRegClass.contains(Reg)) 1207 continue; 1208 1209 // For SVR4, don't emit a move for the CR spill slot if we haven't 1210 // spilled CRs. 1211 if (isSVR4ABI && (PPC::CR2 <= Reg && Reg <= PPC::CR4) 1212 && !MustSaveCR) 1213 continue; 1214 1215 // For 64-bit SVR4 when we have spilled CRs, the spill location 1216 // is SP+8, not a frame-relative slot. 1217 if (isSVR4ABI && isPPC64 && (PPC::CR2 <= Reg && Reg <= PPC::CR4)) { 1218 // In the ELFv1 ABI, only CR2 is noted in CFI and stands in for 1219 // the whole CR word. In the ELFv2 ABI, every CR that was 1220 // actually saved gets its own CFI record. 1221 unsigned CRReg = isELFv2ABI? Reg : (unsigned) PPC::CR2; 1222 unsigned CFIIndex = MF.addFrameInst(MCCFIInstruction::createOffset( 1223 nullptr, MRI->getDwarfRegNum(CRReg, true), 8)); 1224 BuildMI(MBB, MBBI, dl, TII.get(TargetOpcode::CFI_INSTRUCTION)) 1225 .addCFIIndex(CFIIndex); 1226 continue; 1227 } 1228 1229 int Offset = MFI.getObjectOffset(CSI[I].getFrameIdx()); 1230 // We have changed the object offset above but we do not want to change 1231 // the actual offsets in the CFI instruction so we have to undo the 1232 // offset change here. 1233 if (MovingStackUpdateDown) 1234 Offset -= NegFrameSize; 1235 1236 unsigned CFIIndex = MF.addFrameInst(MCCFIInstruction::createOffset( 1237 nullptr, MRI->getDwarfRegNum(Reg, true), Offset)); 1238 BuildMI(MBB, MBBI, dl, TII.get(TargetOpcode::CFI_INSTRUCTION)) 1239 .addCFIIndex(CFIIndex); 1240 } 1241 } 1242 } 1243 1244 void PPCFrameLowering::emitEpilogue(MachineFunction &MF, 1245 MachineBasicBlock &MBB) const { 1246 MachineBasicBlock::iterator MBBI = MBB.getFirstTerminator(); 1247 DebugLoc dl; 1248 1249 if (MBBI != MBB.end()) 1250 dl = MBBI->getDebugLoc(); 1251 1252 const PPCInstrInfo &TII = *Subtarget.getInstrInfo(); 1253 const PPCRegisterInfo *RegInfo = Subtarget.getRegisterInfo(); 1254 1255 // Get alignment info so we know how to restore the SP. 1256 const MachineFrameInfo &MFI = MF.getFrameInfo(); 1257 1258 // Get the number of bytes allocated from the FrameInfo. 1259 int FrameSize = MFI.getStackSize(); 1260 1261 // Get processor type. 1262 bool isPPC64 = Subtarget.isPPC64(); 1263 // Get the ABI. 1264 bool isSVR4ABI = Subtarget.isSVR4ABI(); 1265 1266 // Check if the link register (LR) has been saved. 1267 PPCFunctionInfo *FI = MF.getInfo<PPCFunctionInfo>(); 1268 bool MustSaveLR = FI->mustSaveLR(); 1269 const SmallVectorImpl<unsigned> &MustSaveCRs = FI->getMustSaveCRs(); 1270 bool MustSaveCR = !MustSaveCRs.empty(); 1271 // Do we have a frame pointer and/or base pointer for this function? 1272 bool HasFP = hasFP(MF); 1273 bool HasBP = RegInfo->hasBasePointer(MF); 1274 bool HasRedZone = Subtarget.isPPC64() || !Subtarget.isSVR4ABI(); 1275 1276 unsigned SPReg = isPPC64 ? PPC::X1 : PPC::R1; 1277 unsigned BPReg = RegInfo->getBaseRegister(MF); 1278 unsigned FPReg = isPPC64 ? PPC::X31 : PPC::R31; 1279 unsigned ScratchReg = 0; 1280 unsigned TempReg = isPPC64 ? PPC::X12 : PPC::R12; // another scratch reg 1281 const MCInstrDesc& MTLRInst = TII.get( isPPC64 ? PPC::MTLR8 1282 : PPC::MTLR ); 1283 const MCInstrDesc& LoadInst = TII.get( isPPC64 ? PPC::LD 1284 : PPC::LWZ ); 1285 const MCInstrDesc& LoadImmShiftedInst = TII.get( isPPC64 ? PPC::LIS8 1286 : PPC::LIS ); 1287 const MCInstrDesc& OrInst = TII.get(isPPC64 ? PPC::OR8 1288 : PPC::OR ); 1289 const MCInstrDesc& OrImmInst = TII.get( isPPC64 ? PPC::ORI8 1290 : PPC::ORI ); 1291 const MCInstrDesc& AddImmInst = TII.get( isPPC64 ? PPC::ADDI8 1292 : PPC::ADDI ); 1293 const MCInstrDesc& AddInst = TII.get( isPPC64 ? PPC::ADD8 1294 : PPC::ADD4 ); 1295 1296 int LROffset = getReturnSaveOffset(); 1297 1298 int FPOffset = 0; 1299 1300 // Using the same bool variable as below to suppress compiler warnings. 1301 bool SingleScratchReg = findScratchRegister(&MBB, true, false, &ScratchReg, 1302 &TempReg); 1303 assert(SingleScratchReg && 1304 "Could not find an available scratch register"); 1305 1306 SingleScratchReg = ScratchReg == TempReg; 1307 1308 if (HasFP) { 1309 if (isSVR4ABI) { 1310 int FPIndex = FI->getFramePointerSaveIndex(); 1311 assert(FPIndex && "No Frame Pointer Save Slot!"); 1312 FPOffset = MFI.getObjectOffset(FPIndex); 1313 } else { 1314 FPOffset = getFramePointerSaveOffset(); 1315 } 1316 } 1317 1318 int BPOffset = 0; 1319 if (HasBP) { 1320 if (isSVR4ABI) { 1321 int BPIndex = FI->getBasePointerSaveIndex(); 1322 assert(BPIndex && "No Base Pointer Save Slot!"); 1323 BPOffset = MFI.getObjectOffset(BPIndex); 1324 } else { 1325 BPOffset = getBasePointerSaveOffset(); 1326 } 1327 } 1328 1329 int PBPOffset = 0; 1330 if (FI->usesPICBase()) { 1331 int PBPIndex = FI->getPICBasePointerSaveIndex(); 1332 assert(PBPIndex && "No PIC Base Pointer Save Slot!"); 1333 PBPOffset = MFI.getObjectOffset(PBPIndex); 1334 } 1335 1336 bool IsReturnBlock = (MBBI != MBB.end() && MBBI->isReturn()); 1337 1338 if (IsReturnBlock) { 1339 unsigned RetOpcode = MBBI->getOpcode(); 1340 bool UsesTCRet = RetOpcode == PPC::TCRETURNri || 1341 RetOpcode == PPC::TCRETURNdi || 1342 RetOpcode == PPC::TCRETURNai || 1343 RetOpcode == PPC::TCRETURNri8 || 1344 RetOpcode == PPC::TCRETURNdi8 || 1345 RetOpcode == PPC::TCRETURNai8; 1346 1347 if (UsesTCRet) { 1348 int MaxTCRetDelta = FI->getTailCallSPDelta(); 1349 MachineOperand &StackAdjust = MBBI->getOperand(1); 1350 assert(StackAdjust.isImm() && "Expecting immediate value."); 1351 // Adjust stack pointer. 1352 int StackAdj = StackAdjust.getImm(); 1353 int Delta = StackAdj - MaxTCRetDelta; 1354 assert((Delta >= 0) && "Delta must be positive"); 1355 if (MaxTCRetDelta>0) 1356 FrameSize += (StackAdj +Delta); 1357 else 1358 FrameSize += StackAdj; 1359 } 1360 } 1361 1362 // Frames of 32KB & larger require special handling because they cannot be 1363 // indexed into with a simple LD/LWZ immediate offset operand. 1364 bool isLargeFrame = !isInt<16>(FrameSize); 1365 1366 // On targets without red zone, the SP needs to be restored last, so that 1367 // all live contents of the stack frame are upwards of the SP. This means 1368 // that we cannot restore SP just now, since there may be more registers 1369 // to restore from the stack frame (e.g. R31). If the frame size is not 1370 // a simple immediate value, we will need a spare register to hold the 1371 // restored SP. If the frame size is known and small, we can simply adjust 1372 // the offsets of the registers to be restored, and still use SP to restore 1373 // them. In such case, the final update of SP will be to add the frame 1374 // size to it. 1375 // To simplify the code, set RBReg to the base register used to restore 1376 // values from the stack, and set SPAdd to the value that needs to be added 1377 // to the SP at the end. The default values are as if red zone was present. 1378 unsigned RBReg = SPReg; 1379 unsigned SPAdd = 0; 1380 1381 // Check if we can move the stack update instruction up the epilogue 1382 // past the callee saves. This will allow the move to LR instruction 1383 // to be executed before the restores of the callee saves which means 1384 // that the callee saves can hide the latency from the MTLR instrcution. 1385 MachineBasicBlock::iterator StackUpdateLoc = MBBI; 1386 bool MovingStackUpdateUp = false; 1387 if (FrameSize && !FI->hasFastCall() && !FI->usesPICBase() && !HasFP && 1388 !HasBP && !isLargeFrame) { 1389 const std::vector< CalleeSavedInfo > & Info = MFI.getCalleeSavedInfo(); 1390 for (int i=0; i<Info.size(); i++) { 1391 int FrIdx = Info[i].getFrameIdx(); 1392 if (FrIdx < 0) { 1393 if (MFI.isFixedObjectIndex(FrIdx) && MFI.getObjectOffset(FrIdx) < 0) { 1394 StackUpdateLoc--; 1395 MovingStackUpdateUp = true; 1396 } 1397 } 1398 } 1399 } 1400 1401 if (FrameSize) { 1402 // In the prologue, the loaded (or persistent) stack pointer value is 1403 // offset by the STDU/STDUX/STWU/STWUX instruction. For targets with red 1404 // zone add this offset back now. 1405 1406 // If this function contained a fastcc call and GuaranteedTailCallOpt is 1407 // enabled (=> hasFastCall()==true) the fastcc call might contain a tail 1408 // call which invalidates the stack pointer value in SP(0). So we use the 1409 // value of R31 in this case. 1410 if (FI->hasFastCall()) { 1411 assert(HasFP && "Expecting a valid frame pointer."); 1412 if (!HasRedZone) 1413 RBReg = FPReg; 1414 if (!isLargeFrame) { 1415 BuildMI(MBB, MBBI, dl, AddImmInst, RBReg) 1416 .addReg(FPReg).addImm(FrameSize); 1417 } else { 1418 BuildMI(MBB, MBBI, dl, LoadImmShiftedInst, ScratchReg) 1419 .addImm(FrameSize >> 16); 1420 BuildMI(MBB, MBBI, dl, OrImmInst, ScratchReg) 1421 .addReg(ScratchReg, RegState::Kill) 1422 .addImm(FrameSize & 0xFFFF); 1423 BuildMI(MBB, MBBI, dl, AddInst) 1424 .addReg(RBReg) 1425 .addReg(FPReg) 1426 .addReg(ScratchReg); 1427 } 1428 } else if (!isLargeFrame && !HasBP && !MFI.hasVarSizedObjects()) { 1429 if (HasRedZone) { 1430 BuildMI(MBB, StackUpdateLoc, dl, AddImmInst, SPReg) 1431 .addReg(SPReg) 1432 .addImm(FrameSize); 1433 } else { 1434 // Make sure that adding FrameSize will not overflow the max offset 1435 // size. 1436 assert(FPOffset <= 0 && BPOffset <= 0 && PBPOffset <= 0 && 1437 "Local offsets should be negative"); 1438 SPAdd = FrameSize; 1439 FPOffset += FrameSize; 1440 BPOffset += FrameSize; 1441 PBPOffset += FrameSize; 1442 } 1443 } else { 1444 // We don't want to use ScratchReg as a base register, because it 1445 // could happen to be R0. Use FP instead, but make sure to preserve it. 1446 if (!HasRedZone) { 1447 // If FP is not saved, copy it to ScratchReg. 1448 if (!HasFP) 1449 BuildMI(MBB, MBBI, dl, OrInst, ScratchReg) 1450 .addReg(FPReg) 1451 .addReg(FPReg); 1452 RBReg = FPReg; 1453 } 1454 BuildMI(MBB, StackUpdateLoc, dl, LoadInst, RBReg) 1455 .addImm(0) 1456 .addReg(SPReg); 1457 } 1458 } 1459 assert(RBReg != ScratchReg && "Should have avoided ScratchReg"); 1460 // If there is no red zone, ScratchReg may be needed for holding a useful 1461 // value (although not the base register). Make sure it is not overwritten 1462 // too early. 1463 1464 assert((isPPC64 || !MustSaveCR) && 1465 "Epilogue CR restoring supported only in 64-bit mode"); 1466 1467 // If we need to restore both the LR and the CR and we only have one 1468 // available scratch register, we must do them one at a time. 1469 if (MustSaveCR && SingleScratchReg && MustSaveLR) { 1470 // Here TempReg == ScratchReg, and in the absence of red zone ScratchReg 1471 // is live here. 1472 assert(HasRedZone && "Expecting red zone"); 1473 BuildMI(MBB, MBBI, dl, TII.get(PPC::LWZ8), TempReg) 1474 .addImm(8) 1475 .addReg(SPReg); 1476 for (unsigned i = 0, e = MustSaveCRs.size(); i != e; ++i) 1477 BuildMI(MBB, MBBI, dl, TII.get(PPC::MTOCRF8), MustSaveCRs[i]) 1478 .addReg(TempReg, getKillRegState(i == e-1)); 1479 } 1480 1481 // Delay restoring of the LR if ScratchReg is needed. This is ok, since 1482 // LR is stored in the caller's stack frame. ScratchReg will be needed 1483 // if RBReg is anything other than SP. We shouldn't use ScratchReg as 1484 // a base register anyway, because it may happen to be R0. 1485 bool LoadedLR = false; 1486 if (MustSaveLR && RBReg == SPReg && isInt<16>(LROffset+SPAdd)) { 1487 BuildMI(MBB, StackUpdateLoc, dl, LoadInst, ScratchReg) 1488 .addImm(LROffset+SPAdd) 1489 .addReg(RBReg); 1490 LoadedLR = true; 1491 } 1492 1493 if (MustSaveCR && !(SingleScratchReg && MustSaveLR)) { 1494 // This will only occur for PPC64. 1495 assert(isPPC64 && "Expecting 64-bit mode"); 1496 assert(RBReg == SPReg && "Should be using SP as a base register"); 1497 BuildMI(MBB, MBBI, dl, TII.get(PPC::LWZ8), TempReg) 1498 .addImm(8) 1499 .addReg(RBReg); 1500 } 1501 1502 if (HasFP) { 1503 // If there is red zone, restore FP directly, since SP has already been 1504 // restored. Otherwise, restore the value of FP into ScratchReg. 1505 if (HasRedZone || RBReg == SPReg) 1506 BuildMI(MBB, MBBI, dl, LoadInst, FPReg) 1507 .addImm(FPOffset) 1508 .addReg(SPReg); 1509 else 1510 BuildMI(MBB, MBBI, dl, LoadInst, ScratchReg) 1511 .addImm(FPOffset) 1512 .addReg(RBReg); 1513 } 1514 1515 if (FI->usesPICBase()) 1516 BuildMI(MBB, MBBI, dl, LoadInst, PPC::R30) 1517 .addImm(PBPOffset) 1518 .addReg(RBReg); 1519 1520 if (HasBP) 1521 BuildMI(MBB, MBBI, dl, LoadInst, BPReg) 1522 .addImm(BPOffset) 1523 .addReg(RBReg); 1524 1525 // There is nothing more to be loaded from the stack, so now we can 1526 // restore SP: SP = RBReg + SPAdd. 1527 if (RBReg != SPReg || SPAdd != 0) { 1528 assert(!HasRedZone && "This should not happen with red zone"); 1529 // If SPAdd is 0, generate a copy. 1530 if (SPAdd == 0) 1531 BuildMI(MBB, MBBI, dl, OrInst, SPReg) 1532 .addReg(RBReg) 1533 .addReg(RBReg); 1534 else 1535 BuildMI(MBB, MBBI, dl, AddImmInst, SPReg) 1536 .addReg(RBReg) 1537 .addImm(SPAdd); 1538 1539 assert(RBReg != ScratchReg && "Should be using FP or SP as base register"); 1540 if (RBReg == FPReg) 1541 BuildMI(MBB, MBBI, dl, OrInst, FPReg) 1542 .addReg(ScratchReg) 1543 .addReg(ScratchReg); 1544 1545 // Now load the LR from the caller's stack frame. 1546 if (MustSaveLR && !LoadedLR) 1547 BuildMI(MBB, MBBI, dl, LoadInst, ScratchReg) 1548 .addImm(LROffset) 1549 .addReg(SPReg); 1550 } 1551 1552 if (MustSaveCR && 1553 !(SingleScratchReg && MustSaveLR)) // will only occur for PPC64 1554 for (unsigned i = 0, e = MustSaveCRs.size(); i != e; ++i) 1555 BuildMI(MBB, MBBI, dl, TII.get(PPC::MTOCRF8), MustSaveCRs[i]) 1556 .addReg(TempReg, getKillRegState(i == e-1)); 1557 1558 if (MustSaveLR) 1559 BuildMI(MBB, StackUpdateLoc, dl, MTLRInst).addReg(ScratchReg); 1560 1561 // Callee pop calling convention. Pop parameter/linkage area. Used for tail 1562 // call optimization 1563 if (IsReturnBlock) { 1564 unsigned RetOpcode = MBBI->getOpcode(); 1565 if (MF.getTarget().Options.GuaranteedTailCallOpt && 1566 (RetOpcode == PPC::BLR || RetOpcode == PPC::BLR8) && 1567 MF.getFunction().getCallingConv() == CallingConv::Fast) { 1568 PPCFunctionInfo *FI = MF.getInfo<PPCFunctionInfo>(); 1569 unsigned CallerAllocatedAmt = FI->getMinReservedArea(); 1570 1571 if (CallerAllocatedAmt && isInt<16>(CallerAllocatedAmt)) { 1572 BuildMI(MBB, MBBI, dl, AddImmInst, SPReg) 1573 .addReg(SPReg).addImm(CallerAllocatedAmt); 1574 } else { 1575 BuildMI(MBB, MBBI, dl, LoadImmShiftedInst, ScratchReg) 1576 .addImm(CallerAllocatedAmt >> 16); 1577 BuildMI(MBB, MBBI, dl, OrImmInst, ScratchReg) 1578 .addReg(ScratchReg, RegState::Kill) 1579 .addImm(CallerAllocatedAmt & 0xFFFF); 1580 BuildMI(MBB, MBBI, dl, AddInst) 1581 .addReg(SPReg) 1582 .addReg(FPReg) 1583 .addReg(ScratchReg); 1584 } 1585 } else { 1586 createTailCallBranchInstr(MBB); 1587 } 1588 } 1589 } 1590 1591 void PPCFrameLowering::createTailCallBranchInstr(MachineBasicBlock &MBB) const { 1592 MachineBasicBlock::iterator MBBI = MBB.getFirstTerminator(); 1593 1594 // If we got this far a first terminator should exist. 1595 assert(MBBI != MBB.end() && "Failed to find the first terminator."); 1596 1597 DebugLoc dl = MBBI->getDebugLoc(); 1598 const PPCInstrInfo &TII = *Subtarget.getInstrInfo(); 1599 1600 // Create branch instruction for pseudo tail call return instruction 1601 unsigned RetOpcode = MBBI->getOpcode(); 1602 if (RetOpcode == PPC::TCRETURNdi) { 1603 MBBI = MBB.getLastNonDebugInstr(); 1604 MachineOperand &JumpTarget = MBBI->getOperand(0); 1605 BuildMI(MBB, MBBI, dl, TII.get(PPC::TAILB)). 1606 addGlobalAddress(JumpTarget.getGlobal(), JumpTarget.getOffset()); 1607 } else if (RetOpcode == PPC::TCRETURNri) { 1608 MBBI = MBB.getLastNonDebugInstr(); 1609 assert(MBBI->getOperand(0).isReg() && "Expecting register operand."); 1610 BuildMI(MBB, MBBI, dl, TII.get(PPC::TAILBCTR)); 1611 } else if (RetOpcode == PPC::TCRETURNai) { 1612 MBBI = MBB.getLastNonDebugInstr(); 1613 MachineOperand &JumpTarget = MBBI->getOperand(0); 1614 BuildMI(MBB, MBBI, dl, TII.get(PPC::TAILBA)).addImm(JumpTarget.getImm()); 1615 } else if (RetOpcode == PPC::TCRETURNdi8) { 1616 MBBI = MBB.getLastNonDebugInstr(); 1617 MachineOperand &JumpTarget = MBBI->getOperand(0); 1618 BuildMI(MBB, MBBI, dl, TII.get(PPC::TAILB8)). 1619 addGlobalAddress(JumpTarget.getGlobal(), JumpTarget.getOffset()); 1620 } else if (RetOpcode == PPC::TCRETURNri8) { 1621 MBBI = MBB.getLastNonDebugInstr(); 1622 assert(MBBI->getOperand(0).isReg() && "Expecting register operand."); 1623 BuildMI(MBB, MBBI, dl, TII.get(PPC::TAILBCTR8)); 1624 } else if (RetOpcode == PPC::TCRETURNai8) { 1625 MBBI = MBB.getLastNonDebugInstr(); 1626 MachineOperand &JumpTarget = MBBI->getOperand(0); 1627 BuildMI(MBB, MBBI, dl, TII.get(PPC::TAILBA8)).addImm(JumpTarget.getImm()); 1628 } 1629 } 1630 1631 void PPCFrameLowering::determineCalleeSaves(MachineFunction &MF, 1632 BitVector &SavedRegs, 1633 RegScavenger *RS) const { 1634 TargetFrameLowering::determineCalleeSaves(MF, SavedRegs, RS); 1635 1636 const PPCRegisterInfo *RegInfo = Subtarget.getRegisterInfo(); 1637 1638 // Save and clear the LR state. 1639 PPCFunctionInfo *FI = MF.getInfo<PPCFunctionInfo>(); 1640 unsigned LR = RegInfo->getRARegister(); 1641 FI->setMustSaveLR(MustSaveLR(MF, LR)); 1642 SavedRegs.reset(LR); 1643 1644 // Save R31 if necessary 1645 int FPSI = FI->getFramePointerSaveIndex(); 1646 bool isPPC64 = Subtarget.isPPC64(); 1647 bool isDarwinABI = Subtarget.isDarwinABI(); 1648 MachineFrameInfo &MFI = MF.getFrameInfo(); 1649 1650 // If the frame pointer save index hasn't been defined yet. 1651 if (!FPSI && needsFP(MF)) { 1652 // Find out what the fix offset of the frame pointer save area. 1653 int FPOffset = getFramePointerSaveOffset(); 1654 // Allocate the frame index for frame pointer save area. 1655 FPSI = MFI.CreateFixedObject(isPPC64? 8 : 4, FPOffset, true); 1656 // Save the result. 1657 FI->setFramePointerSaveIndex(FPSI); 1658 } 1659 1660 int BPSI = FI->getBasePointerSaveIndex(); 1661 if (!BPSI && RegInfo->hasBasePointer(MF)) { 1662 int BPOffset = getBasePointerSaveOffset(); 1663 // Allocate the frame index for the base pointer save area. 1664 BPSI = MFI.CreateFixedObject(isPPC64? 8 : 4, BPOffset, true); 1665 // Save the result. 1666 FI->setBasePointerSaveIndex(BPSI); 1667 } 1668 1669 // Reserve stack space for the PIC Base register (R30). 1670 // Only used in SVR4 32-bit. 1671 if (FI->usesPICBase()) { 1672 int PBPSI = MFI.CreateFixedObject(4, -8, true); 1673 FI->setPICBasePointerSaveIndex(PBPSI); 1674 } 1675 1676 // Make sure we don't explicitly spill r31, because, for example, we have 1677 // some inline asm which explicity clobbers it, when we otherwise have a 1678 // frame pointer and are using r31's spill slot for the prologue/epilogue 1679 // code. Same goes for the base pointer and the PIC base register. 1680 if (needsFP(MF)) 1681 SavedRegs.reset(isPPC64 ? PPC::X31 : PPC::R31); 1682 if (RegInfo->hasBasePointer(MF)) 1683 SavedRegs.reset(RegInfo->getBaseRegister(MF)); 1684 if (FI->usesPICBase()) 1685 SavedRegs.reset(PPC::R30); 1686 1687 // Reserve stack space to move the linkage area to in case of a tail call. 1688 int TCSPDelta = 0; 1689 if (MF.getTarget().Options.GuaranteedTailCallOpt && 1690 (TCSPDelta = FI->getTailCallSPDelta()) < 0) { 1691 MFI.CreateFixedObject(-1 * TCSPDelta, TCSPDelta, true); 1692 } 1693 1694 // For 32-bit SVR4, allocate the nonvolatile CR spill slot iff the 1695 // function uses CR 2, 3, or 4. 1696 if (!isPPC64 && !isDarwinABI && 1697 (SavedRegs.test(PPC::CR2) || 1698 SavedRegs.test(PPC::CR3) || 1699 SavedRegs.test(PPC::CR4))) { 1700 int FrameIdx = MFI.CreateFixedObject((uint64_t)4, (int64_t)-4, true); 1701 FI->setCRSpillFrameIndex(FrameIdx); 1702 } 1703 } 1704 1705 void PPCFrameLowering::processFunctionBeforeFrameFinalized(MachineFunction &MF, 1706 RegScavenger *RS) const { 1707 // Early exit if not using the SVR4 ABI. 1708 if (!Subtarget.isSVR4ABI()) { 1709 addScavengingSpillSlot(MF, RS); 1710 return; 1711 } 1712 1713 // Get callee saved register information. 1714 MachineFrameInfo &MFI = MF.getFrameInfo(); 1715 const std::vector<CalleeSavedInfo> &CSI = MFI.getCalleeSavedInfo(); 1716 1717 // If the function is shrink-wrapped, and if the function has a tail call, the 1718 // tail call might not be in the new RestoreBlock, so real branch instruction 1719 // won't be generated by emitEpilogue(), because shrink-wrap has chosen new 1720 // RestoreBlock. So we handle this case here. 1721 if (MFI.getSavePoint() && MFI.hasTailCall()) { 1722 MachineBasicBlock *RestoreBlock = MFI.getRestorePoint(); 1723 for (MachineBasicBlock &MBB : MF) { 1724 if (MBB.isReturnBlock() && (&MBB) != RestoreBlock) 1725 createTailCallBranchInstr(MBB); 1726 } 1727 } 1728 1729 // Early exit if no callee saved registers are modified! 1730 if (CSI.empty() && !needsFP(MF)) { 1731 addScavengingSpillSlot(MF, RS); 1732 return; 1733 } 1734 1735 unsigned MinGPR = PPC::R31; 1736 unsigned MinG8R = PPC::X31; 1737 unsigned MinFPR = PPC::F31; 1738 unsigned MinVR = PPC::V31; 1739 1740 bool HasGPSaveArea = false; 1741 bool HasG8SaveArea = false; 1742 bool HasFPSaveArea = false; 1743 bool HasVRSAVESaveArea = false; 1744 bool HasVRSaveArea = false; 1745 1746 SmallVector<CalleeSavedInfo, 18> GPRegs; 1747 SmallVector<CalleeSavedInfo, 18> G8Regs; 1748 SmallVector<CalleeSavedInfo, 18> FPRegs; 1749 SmallVector<CalleeSavedInfo, 18> VRegs; 1750 1751 for (unsigned i = 0, e = CSI.size(); i != e; ++i) { 1752 unsigned Reg = CSI[i].getReg(); 1753 if (PPC::GPRCRegClass.contains(Reg)) { 1754 HasGPSaveArea = true; 1755 1756 GPRegs.push_back(CSI[i]); 1757 1758 if (Reg < MinGPR) { 1759 MinGPR = Reg; 1760 } 1761 } else if (PPC::G8RCRegClass.contains(Reg)) { 1762 HasG8SaveArea = true; 1763 1764 G8Regs.push_back(CSI[i]); 1765 1766 if (Reg < MinG8R) { 1767 MinG8R = Reg; 1768 } 1769 } else if (PPC::F8RCRegClass.contains(Reg)) { 1770 HasFPSaveArea = true; 1771 1772 FPRegs.push_back(CSI[i]); 1773 1774 if (Reg < MinFPR) { 1775 MinFPR = Reg; 1776 } 1777 } else if (PPC::CRBITRCRegClass.contains(Reg) || 1778 PPC::CRRCRegClass.contains(Reg)) { 1779 ; // do nothing, as we already know whether CRs are spilled 1780 } else if (PPC::VRSAVERCRegClass.contains(Reg)) { 1781 HasVRSAVESaveArea = true; 1782 } else if (PPC::VRRCRegClass.contains(Reg)) { 1783 HasVRSaveArea = true; 1784 1785 VRegs.push_back(CSI[i]); 1786 1787 if (Reg < MinVR) { 1788 MinVR = Reg; 1789 } 1790 } else { 1791 llvm_unreachable("Unknown RegisterClass!"); 1792 } 1793 } 1794 1795 PPCFunctionInfo *PFI = MF.getInfo<PPCFunctionInfo>(); 1796 const TargetRegisterInfo *TRI = Subtarget.getRegisterInfo(); 1797 1798 int64_t LowerBound = 0; 1799 1800 // Take into account stack space reserved for tail calls. 1801 int TCSPDelta = 0; 1802 if (MF.getTarget().Options.GuaranteedTailCallOpt && 1803 (TCSPDelta = PFI->getTailCallSPDelta()) < 0) { 1804 LowerBound = TCSPDelta; 1805 } 1806 1807 // The Floating-point register save area is right below the back chain word 1808 // of the previous stack frame. 1809 if (HasFPSaveArea) { 1810 for (unsigned i = 0, e = FPRegs.size(); i != e; ++i) { 1811 int FI = FPRegs[i].getFrameIdx(); 1812 1813 MFI.setObjectOffset(FI, LowerBound + MFI.getObjectOffset(FI)); 1814 } 1815 1816 LowerBound -= (31 - TRI->getEncodingValue(MinFPR) + 1) * 8; 1817 } 1818 1819 // Check whether the frame pointer register is allocated. If so, make sure it 1820 // is spilled to the correct offset. 1821 if (needsFP(MF)) { 1822 int FI = PFI->getFramePointerSaveIndex(); 1823 assert(FI && "No Frame Pointer Save Slot!"); 1824 MFI.setObjectOffset(FI, LowerBound + MFI.getObjectOffset(FI)); 1825 // FP is R31/X31, so no need to update MinGPR/MinG8R. 1826 HasGPSaveArea = true; 1827 } 1828 1829 if (PFI->usesPICBase()) { 1830 int FI = PFI->getPICBasePointerSaveIndex(); 1831 assert(FI && "No PIC Base Pointer Save Slot!"); 1832 MFI.setObjectOffset(FI, LowerBound + MFI.getObjectOffset(FI)); 1833 1834 MinGPR = std::min<unsigned>(MinGPR, PPC::R30); 1835 HasGPSaveArea = true; 1836 } 1837 1838 const PPCRegisterInfo *RegInfo = Subtarget.getRegisterInfo(); 1839 if (RegInfo->hasBasePointer(MF)) { 1840 int FI = PFI->getBasePointerSaveIndex(); 1841 assert(FI && "No Base Pointer Save Slot!"); 1842 MFI.setObjectOffset(FI, LowerBound + MFI.getObjectOffset(FI)); 1843 1844 unsigned BP = RegInfo->getBaseRegister(MF); 1845 if (PPC::G8RCRegClass.contains(BP)) { 1846 MinG8R = std::min<unsigned>(MinG8R, BP); 1847 HasG8SaveArea = true; 1848 } else if (PPC::GPRCRegClass.contains(BP)) { 1849 MinGPR = std::min<unsigned>(MinGPR, BP); 1850 HasGPSaveArea = true; 1851 } 1852 } 1853 1854 // General register save area starts right below the Floating-point 1855 // register save area. 1856 if (HasGPSaveArea || HasG8SaveArea) { 1857 // Move general register save area spill slots down, taking into account 1858 // the size of the Floating-point register save area. 1859 for (unsigned i = 0, e = GPRegs.size(); i != e; ++i) { 1860 int FI = GPRegs[i].getFrameIdx(); 1861 1862 MFI.setObjectOffset(FI, LowerBound + MFI.getObjectOffset(FI)); 1863 } 1864 1865 // Move general register save area spill slots down, taking into account 1866 // the size of the Floating-point register save area. 1867 for (unsigned i = 0, e = G8Regs.size(); i != e; ++i) { 1868 int FI = G8Regs[i].getFrameIdx(); 1869 1870 MFI.setObjectOffset(FI, LowerBound + MFI.getObjectOffset(FI)); 1871 } 1872 1873 unsigned MinReg = 1874 std::min<unsigned>(TRI->getEncodingValue(MinGPR), 1875 TRI->getEncodingValue(MinG8R)); 1876 1877 if (Subtarget.isPPC64()) { 1878 LowerBound -= (31 - MinReg + 1) * 8; 1879 } else { 1880 LowerBound -= (31 - MinReg + 1) * 4; 1881 } 1882 } 1883 1884 // For 32-bit only, the CR save area is below the general register 1885 // save area. For 64-bit SVR4, the CR save area is addressed relative 1886 // to the stack pointer and hence does not need an adjustment here. 1887 // Only CR2 (the first nonvolatile spilled) has an associated frame 1888 // index so that we have a single uniform save area. 1889 if (spillsCR(MF) && !(Subtarget.isPPC64() && Subtarget.isSVR4ABI())) { 1890 // Adjust the frame index of the CR spill slot. 1891 for (unsigned i = 0, e = CSI.size(); i != e; ++i) { 1892 unsigned Reg = CSI[i].getReg(); 1893 1894 if ((Subtarget.isSVR4ABI() && Reg == PPC::CR2) 1895 // Leave Darwin logic as-is. 1896 || (!Subtarget.isSVR4ABI() && 1897 (PPC::CRBITRCRegClass.contains(Reg) || 1898 PPC::CRRCRegClass.contains(Reg)))) { 1899 int FI = CSI[i].getFrameIdx(); 1900 1901 MFI.setObjectOffset(FI, LowerBound + MFI.getObjectOffset(FI)); 1902 } 1903 } 1904 1905 LowerBound -= 4; // The CR save area is always 4 bytes long. 1906 } 1907 1908 if (HasVRSAVESaveArea) { 1909 // FIXME SVR4: Is it actually possible to have multiple elements in CSI 1910 // which have the VRSAVE register class? 1911 // Adjust the frame index of the VRSAVE spill slot. 1912 for (unsigned i = 0, e = CSI.size(); i != e; ++i) { 1913 unsigned Reg = CSI[i].getReg(); 1914 1915 if (PPC::VRSAVERCRegClass.contains(Reg)) { 1916 int FI = CSI[i].getFrameIdx(); 1917 1918 MFI.setObjectOffset(FI, LowerBound + MFI.getObjectOffset(FI)); 1919 } 1920 } 1921 1922 LowerBound -= 4; // The VRSAVE save area is always 4 bytes long. 1923 } 1924 1925 if (HasVRSaveArea) { 1926 // Insert alignment padding, we need 16-byte alignment. Note: for postive 1927 // number the alignment formula is : y = (x + (n-1)) & (~(n-1)). But since 1928 // we are using negative number here (the stack grows downward). We should 1929 // use formula : y = x & (~(n-1)). Where x is the size before aligning, n 1930 // is the alignment size ( n = 16 here) and y is the size after aligning. 1931 assert(LowerBound <= 0 && "Expect LowerBound have a non-positive value!"); 1932 LowerBound &= ~(15); 1933 1934 for (unsigned i = 0, e = VRegs.size(); i != e; ++i) { 1935 int FI = VRegs[i].getFrameIdx(); 1936 1937 MFI.setObjectOffset(FI, LowerBound + MFI.getObjectOffset(FI)); 1938 } 1939 } 1940 1941 addScavengingSpillSlot(MF, RS); 1942 } 1943 1944 void 1945 PPCFrameLowering::addScavengingSpillSlot(MachineFunction &MF, 1946 RegScavenger *RS) const { 1947 // Reserve a slot closest to SP or frame pointer if we have a dynalloc or 1948 // a large stack, which will require scavenging a register to materialize a 1949 // large offset. 1950 1951 // We need to have a scavenger spill slot for spills if the frame size is 1952 // large. In case there is no free register for large-offset addressing, 1953 // this slot is used for the necessary emergency spill. Also, we need the 1954 // slot for dynamic stack allocations. 1955 1956 // The scavenger might be invoked if the frame offset does not fit into 1957 // the 16-bit immediate. We don't know the complete frame size here 1958 // because we've not yet computed callee-saved register spills or the 1959 // needed alignment padding. 1960 unsigned StackSize = determineFrameLayout(MF, false, true); 1961 MachineFrameInfo &MFI = MF.getFrameInfo(); 1962 if (MFI.hasVarSizedObjects() || spillsCR(MF) || spillsVRSAVE(MF) || 1963 hasNonRISpills(MF) || (hasSpills(MF) && !isInt<16>(StackSize))) { 1964 const TargetRegisterClass &GPRC = PPC::GPRCRegClass; 1965 const TargetRegisterClass &G8RC = PPC::G8RCRegClass; 1966 const TargetRegisterClass &RC = Subtarget.isPPC64() ? G8RC : GPRC; 1967 const TargetRegisterInfo &TRI = *Subtarget.getRegisterInfo(); 1968 unsigned Size = TRI.getSpillSize(RC); 1969 unsigned Align = TRI.getSpillAlignment(RC); 1970 RS->addScavengingFrameIndex(MFI.CreateStackObject(Size, Align, false)); 1971 1972 // Might we have over-aligned allocas? 1973 bool HasAlVars = MFI.hasVarSizedObjects() && 1974 MFI.getMaxAlignment() > getStackAlignment(); 1975 1976 // These kinds of spills might need two registers. 1977 if (spillsCR(MF) || spillsVRSAVE(MF) || HasAlVars) 1978 RS->addScavengingFrameIndex(MFI.CreateStackObject(Size, Align, false)); 1979 1980 } 1981 } 1982 1983 bool 1984 PPCFrameLowering::spillCalleeSavedRegisters(MachineBasicBlock &MBB, 1985 MachineBasicBlock::iterator MI, 1986 const std::vector<CalleeSavedInfo> &CSI, 1987 const TargetRegisterInfo *TRI) const { 1988 1989 // Currently, this function only handles SVR4 32- and 64-bit ABIs. 1990 // Return false otherwise to maintain pre-existing behavior. 1991 if (!Subtarget.isSVR4ABI()) 1992 return false; 1993 1994 MachineFunction *MF = MBB.getParent(); 1995 const PPCInstrInfo &TII = *Subtarget.getInstrInfo(); 1996 DebugLoc DL; 1997 bool CRSpilled = false; 1998 MachineInstrBuilder CRMIB; 1999 2000 for (unsigned i = 0, e = CSI.size(); i != e; ++i) { 2001 unsigned Reg = CSI[i].getReg(); 2002 // Only Darwin actually uses the VRSAVE register, but it can still appear 2003 // here if, for example, @llvm.eh.unwind.init() is used. If we're not on 2004 // Darwin, ignore it. 2005 if (Reg == PPC::VRSAVE && !Subtarget.isDarwinABI()) 2006 continue; 2007 2008 // CR2 through CR4 are the nonvolatile CR fields. 2009 bool IsCRField = PPC::CR2 <= Reg && Reg <= PPC::CR4; 2010 2011 // Add the callee-saved register as live-in; it's killed at the spill. 2012 MBB.addLiveIn(Reg); 2013 2014 if (CRSpilled && IsCRField) { 2015 CRMIB.addReg(Reg, RegState::ImplicitKill); 2016 continue; 2017 } 2018 2019 // Insert the spill to the stack frame. 2020 if (IsCRField) { 2021 PPCFunctionInfo *FuncInfo = MF->getInfo<PPCFunctionInfo>(); 2022 if (Subtarget.isPPC64()) { 2023 // The actual spill will happen at the start of the prologue. 2024 FuncInfo->addMustSaveCR(Reg); 2025 } else { 2026 CRSpilled = true; 2027 FuncInfo->setSpillsCR(); 2028 2029 // 32-bit: FP-relative. Note that we made sure CR2-CR4 all have 2030 // the same frame index in PPCRegisterInfo::hasReservedSpillSlot. 2031 CRMIB = BuildMI(*MF, DL, TII.get(PPC::MFCR), PPC::R12) 2032 .addReg(Reg, RegState::ImplicitKill); 2033 2034 MBB.insert(MI, CRMIB); 2035 MBB.insert(MI, addFrameReference(BuildMI(*MF, DL, TII.get(PPC::STW)) 2036 .addReg(PPC::R12, 2037 getKillRegState(true)), 2038 CSI[i].getFrameIdx())); 2039 } 2040 } else { 2041 const TargetRegisterClass *RC = TRI->getMinimalPhysRegClass(Reg); 2042 TII.storeRegToStackSlot(MBB, MI, Reg, true, 2043 CSI[i].getFrameIdx(), RC, TRI); 2044 } 2045 } 2046 return true; 2047 } 2048 2049 static void 2050 restoreCRs(bool isPPC64, bool is31, 2051 bool CR2Spilled, bool CR3Spilled, bool CR4Spilled, 2052 MachineBasicBlock &MBB, MachineBasicBlock::iterator MI, 2053 const std::vector<CalleeSavedInfo> &CSI, unsigned CSIIndex) { 2054 2055 MachineFunction *MF = MBB.getParent(); 2056 const PPCInstrInfo &TII = *MF->getSubtarget<PPCSubtarget>().getInstrInfo(); 2057 DebugLoc DL; 2058 unsigned RestoreOp, MoveReg; 2059 2060 if (isPPC64) 2061 // This is handled during epilogue generation. 2062 return; 2063 else { 2064 // 32-bit: FP-relative 2065 MBB.insert(MI, addFrameReference(BuildMI(*MF, DL, TII.get(PPC::LWZ), 2066 PPC::R12), 2067 CSI[CSIIndex].getFrameIdx())); 2068 RestoreOp = PPC::MTOCRF; 2069 MoveReg = PPC::R12; 2070 } 2071 2072 if (CR2Spilled) 2073 MBB.insert(MI, BuildMI(*MF, DL, TII.get(RestoreOp), PPC::CR2) 2074 .addReg(MoveReg, getKillRegState(!CR3Spilled && !CR4Spilled))); 2075 2076 if (CR3Spilled) 2077 MBB.insert(MI, BuildMI(*MF, DL, TII.get(RestoreOp), PPC::CR3) 2078 .addReg(MoveReg, getKillRegState(!CR4Spilled))); 2079 2080 if (CR4Spilled) 2081 MBB.insert(MI, BuildMI(*MF, DL, TII.get(RestoreOp), PPC::CR4) 2082 .addReg(MoveReg, getKillRegState(true))); 2083 } 2084 2085 MachineBasicBlock::iterator PPCFrameLowering:: 2086 eliminateCallFramePseudoInstr(MachineFunction &MF, MachineBasicBlock &MBB, 2087 MachineBasicBlock::iterator I) const { 2088 const TargetInstrInfo &TII = *Subtarget.getInstrInfo(); 2089 if (MF.getTarget().Options.GuaranteedTailCallOpt && 2090 I->getOpcode() == PPC::ADJCALLSTACKUP) { 2091 // Add (actually subtract) back the amount the callee popped on return. 2092 if (int CalleeAmt = I->getOperand(1).getImm()) { 2093 bool is64Bit = Subtarget.isPPC64(); 2094 CalleeAmt *= -1; 2095 unsigned StackReg = is64Bit ? PPC::X1 : PPC::R1; 2096 unsigned TmpReg = is64Bit ? PPC::X0 : PPC::R0; 2097 unsigned ADDIInstr = is64Bit ? PPC::ADDI8 : PPC::ADDI; 2098 unsigned ADDInstr = is64Bit ? PPC::ADD8 : PPC::ADD4; 2099 unsigned LISInstr = is64Bit ? PPC::LIS8 : PPC::LIS; 2100 unsigned ORIInstr = is64Bit ? PPC::ORI8 : PPC::ORI; 2101 const DebugLoc &dl = I->getDebugLoc(); 2102 2103 if (isInt<16>(CalleeAmt)) { 2104 BuildMI(MBB, I, dl, TII.get(ADDIInstr), StackReg) 2105 .addReg(StackReg, RegState::Kill) 2106 .addImm(CalleeAmt); 2107 } else { 2108 MachineBasicBlock::iterator MBBI = I; 2109 BuildMI(MBB, MBBI, dl, TII.get(LISInstr), TmpReg) 2110 .addImm(CalleeAmt >> 16); 2111 BuildMI(MBB, MBBI, dl, TII.get(ORIInstr), TmpReg) 2112 .addReg(TmpReg, RegState::Kill) 2113 .addImm(CalleeAmt & 0xFFFF); 2114 BuildMI(MBB, MBBI, dl, TII.get(ADDInstr), StackReg) 2115 .addReg(StackReg, RegState::Kill) 2116 .addReg(TmpReg); 2117 } 2118 } 2119 } 2120 // Simply discard ADJCALLSTACKDOWN, ADJCALLSTACKUP instructions. 2121 return MBB.erase(I); 2122 } 2123 2124 bool 2125 PPCFrameLowering::restoreCalleeSavedRegisters(MachineBasicBlock &MBB, 2126 MachineBasicBlock::iterator MI, 2127 std::vector<CalleeSavedInfo> &CSI, 2128 const TargetRegisterInfo *TRI) const { 2129 2130 // Currently, this function only handles SVR4 32- and 64-bit ABIs. 2131 // Return false otherwise to maintain pre-existing behavior. 2132 if (!Subtarget.isSVR4ABI()) 2133 return false; 2134 2135 MachineFunction *MF = MBB.getParent(); 2136 const PPCInstrInfo &TII = *Subtarget.getInstrInfo(); 2137 bool CR2Spilled = false; 2138 bool CR3Spilled = false; 2139 bool CR4Spilled = false; 2140 unsigned CSIIndex = 0; 2141 2142 // Initialize insertion-point logic; we will be restoring in reverse 2143 // order of spill. 2144 MachineBasicBlock::iterator I = MI, BeforeI = I; 2145 bool AtStart = I == MBB.begin(); 2146 2147 if (!AtStart) 2148 --BeforeI; 2149 2150 for (unsigned i = 0, e = CSI.size(); i != e; ++i) { 2151 unsigned Reg = CSI[i].getReg(); 2152 2153 // Only Darwin actually uses the VRSAVE register, but it can still appear 2154 // here if, for example, @llvm.eh.unwind.init() is used. If we're not on 2155 // Darwin, ignore it. 2156 if (Reg == PPC::VRSAVE && !Subtarget.isDarwinABI()) 2157 continue; 2158 2159 if (Reg == PPC::CR2) { 2160 CR2Spilled = true; 2161 // The spill slot is associated only with CR2, which is the 2162 // first nonvolatile spilled. Save it here. 2163 CSIIndex = i; 2164 continue; 2165 } else if (Reg == PPC::CR3) { 2166 CR3Spilled = true; 2167 continue; 2168 } else if (Reg == PPC::CR4) { 2169 CR4Spilled = true; 2170 continue; 2171 } else { 2172 // When we first encounter a non-CR register after seeing at 2173 // least one CR register, restore all spilled CRs together. 2174 if ((CR2Spilled || CR3Spilled || CR4Spilled) 2175 && !(PPC::CR2 <= Reg && Reg <= PPC::CR4)) { 2176 bool is31 = needsFP(*MF); 2177 restoreCRs(Subtarget.isPPC64(), is31, 2178 CR2Spilled, CR3Spilled, CR4Spilled, 2179 MBB, I, CSI, CSIIndex); 2180 CR2Spilled = CR3Spilled = CR4Spilled = false; 2181 } 2182 2183 // Default behavior for non-CR saves. 2184 const TargetRegisterClass *RC = TRI->getMinimalPhysRegClass(Reg); 2185 TII.loadRegFromStackSlot(MBB, I, Reg, CSI[i].getFrameIdx(), 2186 RC, TRI); 2187 assert(I != MBB.begin() && 2188 "loadRegFromStackSlot didn't insert any code!"); 2189 } 2190 2191 // Insert in reverse order. 2192 if (AtStart) 2193 I = MBB.begin(); 2194 else { 2195 I = BeforeI; 2196 ++I; 2197 } 2198 } 2199 2200 // If we haven't yet spilled the CRs, do so now. 2201 if (CR2Spilled || CR3Spilled || CR4Spilled) { 2202 bool is31 = needsFP(*MF); 2203 restoreCRs(Subtarget.isPPC64(), is31, CR2Spilled, CR3Spilled, CR4Spilled, 2204 MBB, I, CSI, CSIIndex); 2205 } 2206 2207 return true; 2208 } 2209 2210 bool PPCFrameLowering::enableShrinkWrapping(const MachineFunction &MF) const { 2211 return (MF.getSubtarget<PPCSubtarget>().isSVR4ABI() && 2212 MF.getSubtarget<PPCSubtarget>().isPPC64()); 2213 } 2214