1 //===-- PPCFrameLowering.cpp - PPC Frame Information ----------------------===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 // 9 // This file contains the PPC implementation of TargetFrameLowering class. 10 // 11 //===----------------------------------------------------------------------===// 12 13 #include "PPCFrameLowering.h" 14 #include "PPCInstrBuilder.h" 15 #include "PPCInstrInfo.h" 16 #include "PPCMachineFunctionInfo.h" 17 #include "PPCSubtarget.h" 18 #include "PPCTargetMachine.h" 19 #include "llvm/ADT/Statistic.h" 20 #include "llvm/CodeGen/MachineFrameInfo.h" 21 #include "llvm/CodeGen/MachineFunction.h" 22 #include "llvm/CodeGen/MachineInstrBuilder.h" 23 #include "llvm/CodeGen/MachineModuleInfo.h" 24 #include "llvm/CodeGen/MachineRegisterInfo.h" 25 #include "llvm/CodeGen/RegisterScavenging.h" 26 #include "llvm/IR/Function.h" 27 #include "llvm/Target/TargetOptions.h" 28 29 using namespace llvm; 30 31 #define DEBUG_TYPE "framelowering" 32 STATISTIC(NumPESpillVSR, "Number of spills to vector in prologue"); 33 STATISTIC(NumPEReloadVSR, "Number of reloads from vector in epilogue"); 34 35 static cl::opt<bool> 36 EnablePEVectorSpills("ppc-enable-pe-vector-spills", 37 cl::desc("Enable spills in prologue to vector registers."), 38 cl::init(false), cl::Hidden); 39 40 /// VRRegNo - Map from a numbered VR register to its enum value. 41 /// 42 static const MCPhysReg VRRegNo[] = { 43 PPC::V0 , PPC::V1 , PPC::V2 , PPC::V3 , PPC::V4 , PPC::V5 , PPC::V6 , PPC::V7 , 44 PPC::V8 , PPC::V9 , PPC::V10, PPC::V11, PPC::V12, PPC::V13, PPC::V14, PPC::V15, 45 PPC::V16, PPC::V17, PPC::V18, PPC::V19, PPC::V20, PPC::V21, PPC::V22, PPC::V23, 46 PPC::V24, PPC::V25, PPC::V26, PPC::V27, PPC::V28, PPC::V29, PPC::V30, PPC::V31 47 }; 48 49 static unsigned computeReturnSaveOffset(const PPCSubtarget &STI) { 50 if (STI.isDarwinABI()) 51 return STI.isPPC64() ? 16 : 8; 52 // SVR4 ABI: 53 return STI.isPPC64() ? 16 : 4; 54 } 55 56 static unsigned computeTOCSaveOffset(const PPCSubtarget &STI) { 57 return STI.isELFv2ABI() ? 24 : 40; 58 } 59 60 static unsigned computeFramePointerSaveOffset(const PPCSubtarget &STI) { 61 // For the Darwin ABI: 62 // We cannot use the TOC save slot (offset +20) in the PowerPC linkage area 63 // for saving the frame pointer (if needed.) While the published ABI has 64 // not used this slot since at least MacOSX 10.2, there is older code 65 // around that does use it, and that needs to continue to work. 66 if (STI.isDarwinABI()) 67 return STI.isPPC64() ? -8U : -4U; 68 69 // SVR4 ABI: First slot in the general register save area. 70 return STI.isPPC64() ? -8U : -4U; 71 } 72 73 static unsigned computeLinkageSize(const PPCSubtarget &STI) { 74 if ((STI.isDarwinABI() || STI.isAIXABI()) || STI.isPPC64()) 75 return (STI.isELFv2ABI() ? 4 : 6) * (STI.isPPC64() ? 8 : 4); 76 77 // 32-bit SVR4 ABI: 78 return 8; 79 } 80 81 static unsigned computeBasePointerSaveOffset(const PPCSubtarget &STI) { 82 if (STI.isDarwinABI()) 83 return STI.isPPC64() ? -16U : -8U; 84 85 // SVR4 ABI: First slot in the general register save area. 86 return STI.isPPC64() 87 ? -16U 88 : STI.getTargetMachine().isPositionIndependent() ? -12U : -8U; 89 } 90 91 static unsigned computeCRSaveOffset() { 92 // The condition register save offset needs to be updated for AIX PPC32. 93 return 8; 94 } 95 96 PPCFrameLowering::PPCFrameLowering(const PPCSubtarget &STI) 97 : TargetFrameLowering(TargetFrameLowering::StackGrowsDown, 98 STI.getPlatformStackAlignment(), 0), 99 Subtarget(STI), ReturnSaveOffset(computeReturnSaveOffset(Subtarget)), 100 TOCSaveOffset(computeTOCSaveOffset(Subtarget)), 101 FramePointerSaveOffset(computeFramePointerSaveOffset(Subtarget)), 102 LinkageSize(computeLinkageSize(Subtarget)), 103 BasePointerSaveOffset(computeBasePointerSaveOffset(Subtarget)), 104 CRSaveOffset(computeCRSaveOffset()) {} 105 106 // With the SVR4 ABI, callee-saved registers have fixed offsets on the stack. 107 const PPCFrameLowering::SpillSlot *PPCFrameLowering::getCalleeSavedSpillSlots( 108 unsigned &NumEntries) const { 109 if (Subtarget.isDarwinABI()) { 110 NumEntries = 1; 111 if (Subtarget.isPPC64()) { 112 static const SpillSlot darwin64Offsets = {PPC::X31, -8}; 113 return &darwin64Offsets; 114 } else { 115 static const SpillSlot darwinOffsets = {PPC::R31, -4}; 116 return &darwinOffsets; 117 } 118 } 119 120 // Early exit if not using the SVR4 ABI. 121 if (!Subtarget.isSVR4ABI()) { 122 NumEntries = 0; 123 return nullptr; 124 } 125 126 // Note that the offsets here overlap, but this is fixed up in 127 // processFunctionBeforeFrameFinalized. 128 129 static const SpillSlot Offsets[] = { 130 // Floating-point register save area offsets. 131 {PPC::F31, -8}, 132 {PPC::F30, -16}, 133 {PPC::F29, -24}, 134 {PPC::F28, -32}, 135 {PPC::F27, -40}, 136 {PPC::F26, -48}, 137 {PPC::F25, -56}, 138 {PPC::F24, -64}, 139 {PPC::F23, -72}, 140 {PPC::F22, -80}, 141 {PPC::F21, -88}, 142 {PPC::F20, -96}, 143 {PPC::F19, -104}, 144 {PPC::F18, -112}, 145 {PPC::F17, -120}, 146 {PPC::F16, -128}, 147 {PPC::F15, -136}, 148 {PPC::F14, -144}, 149 150 // General register save area offsets. 151 {PPC::R31, -4}, 152 {PPC::R30, -8}, 153 {PPC::R29, -12}, 154 {PPC::R28, -16}, 155 {PPC::R27, -20}, 156 {PPC::R26, -24}, 157 {PPC::R25, -28}, 158 {PPC::R24, -32}, 159 {PPC::R23, -36}, 160 {PPC::R22, -40}, 161 {PPC::R21, -44}, 162 {PPC::R20, -48}, 163 {PPC::R19, -52}, 164 {PPC::R18, -56}, 165 {PPC::R17, -60}, 166 {PPC::R16, -64}, 167 {PPC::R15, -68}, 168 {PPC::R14, -72}, 169 170 // CR save area offset. We map each of the nonvolatile CR fields 171 // to the slot for CR2, which is the first of the nonvolatile CR 172 // fields to be assigned, so that we only allocate one save slot. 173 // See PPCRegisterInfo::hasReservedSpillSlot() for more information. 174 {PPC::CR2, -4}, 175 176 // VRSAVE save area offset. 177 {PPC::VRSAVE, -4}, 178 179 // Vector register save area 180 {PPC::V31, -16}, 181 {PPC::V30, -32}, 182 {PPC::V29, -48}, 183 {PPC::V28, -64}, 184 {PPC::V27, -80}, 185 {PPC::V26, -96}, 186 {PPC::V25, -112}, 187 {PPC::V24, -128}, 188 {PPC::V23, -144}, 189 {PPC::V22, -160}, 190 {PPC::V21, -176}, 191 {PPC::V20, -192}, 192 193 // SPE register save area (overlaps Vector save area). 194 {PPC::S31, -8}, 195 {PPC::S30, -16}, 196 {PPC::S29, -24}, 197 {PPC::S28, -32}, 198 {PPC::S27, -40}, 199 {PPC::S26, -48}, 200 {PPC::S25, -56}, 201 {PPC::S24, -64}, 202 {PPC::S23, -72}, 203 {PPC::S22, -80}, 204 {PPC::S21, -88}, 205 {PPC::S20, -96}, 206 {PPC::S19, -104}, 207 {PPC::S18, -112}, 208 {PPC::S17, -120}, 209 {PPC::S16, -128}, 210 {PPC::S15, -136}, 211 {PPC::S14, -144}}; 212 213 static const SpillSlot Offsets64[] = { 214 // Floating-point register save area offsets. 215 {PPC::F31, -8}, 216 {PPC::F30, -16}, 217 {PPC::F29, -24}, 218 {PPC::F28, -32}, 219 {PPC::F27, -40}, 220 {PPC::F26, -48}, 221 {PPC::F25, -56}, 222 {PPC::F24, -64}, 223 {PPC::F23, -72}, 224 {PPC::F22, -80}, 225 {PPC::F21, -88}, 226 {PPC::F20, -96}, 227 {PPC::F19, -104}, 228 {PPC::F18, -112}, 229 {PPC::F17, -120}, 230 {PPC::F16, -128}, 231 {PPC::F15, -136}, 232 {PPC::F14, -144}, 233 234 // General register save area offsets. 235 {PPC::X31, -8}, 236 {PPC::X30, -16}, 237 {PPC::X29, -24}, 238 {PPC::X28, -32}, 239 {PPC::X27, -40}, 240 {PPC::X26, -48}, 241 {PPC::X25, -56}, 242 {PPC::X24, -64}, 243 {PPC::X23, -72}, 244 {PPC::X22, -80}, 245 {PPC::X21, -88}, 246 {PPC::X20, -96}, 247 {PPC::X19, -104}, 248 {PPC::X18, -112}, 249 {PPC::X17, -120}, 250 {PPC::X16, -128}, 251 {PPC::X15, -136}, 252 {PPC::X14, -144}, 253 254 // VRSAVE save area offset. 255 {PPC::VRSAVE, -4}, 256 257 // Vector register save area 258 {PPC::V31, -16}, 259 {PPC::V30, -32}, 260 {PPC::V29, -48}, 261 {PPC::V28, -64}, 262 {PPC::V27, -80}, 263 {PPC::V26, -96}, 264 {PPC::V25, -112}, 265 {PPC::V24, -128}, 266 {PPC::V23, -144}, 267 {PPC::V22, -160}, 268 {PPC::V21, -176}, 269 {PPC::V20, -192}}; 270 271 if (Subtarget.isPPC64()) { 272 NumEntries = array_lengthof(Offsets64); 273 274 return Offsets64; 275 } else { 276 NumEntries = array_lengthof(Offsets); 277 278 return Offsets; 279 } 280 } 281 282 /// RemoveVRSaveCode - We have found that this function does not need any code 283 /// to manipulate the VRSAVE register, even though it uses vector registers. 284 /// This can happen when the only registers used are known to be live in or out 285 /// of the function. Remove all of the VRSAVE related code from the function. 286 /// FIXME: The removal of the code results in a compile failure at -O0 when the 287 /// function contains a function call, as the GPR containing original VRSAVE 288 /// contents is spilled and reloaded around the call. Without the prolog code, 289 /// the spill instruction refers to an undefined register. This code needs 290 /// to account for all uses of that GPR. 291 static void RemoveVRSaveCode(MachineInstr &MI) { 292 MachineBasicBlock *Entry = MI.getParent(); 293 MachineFunction *MF = Entry->getParent(); 294 295 // We know that the MTVRSAVE instruction immediately follows MI. Remove it. 296 MachineBasicBlock::iterator MBBI = MI; 297 ++MBBI; 298 assert(MBBI != Entry->end() && MBBI->getOpcode() == PPC::MTVRSAVE); 299 MBBI->eraseFromParent(); 300 301 bool RemovedAllMTVRSAVEs = true; 302 // See if we can find and remove the MTVRSAVE instruction from all of the 303 // epilog blocks. 304 for (MachineFunction::iterator I = MF->begin(), E = MF->end(); I != E; ++I) { 305 // If last instruction is a return instruction, add an epilogue 306 if (I->isReturnBlock()) { 307 bool FoundIt = false; 308 for (MBBI = I->end(); MBBI != I->begin(); ) { 309 --MBBI; 310 if (MBBI->getOpcode() == PPC::MTVRSAVE) { 311 MBBI->eraseFromParent(); // remove it. 312 FoundIt = true; 313 break; 314 } 315 } 316 RemovedAllMTVRSAVEs &= FoundIt; 317 } 318 } 319 320 // If we found and removed all MTVRSAVE instructions, remove the read of 321 // VRSAVE as well. 322 if (RemovedAllMTVRSAVEs) { 323 MBBI = MI; 324 assert(MBBI != Entry->begin() && "UPDATE_VRSAVE is first instr in block?"); 325 --MBBI; 326 assert(MBBI->getOpcode() == PPC::MFVRSAVE && "VRSAVE instrs wandered?"); 327 MBBI->eraseFromParent(); 328 } 329 330 // Finally, nuke the UPDATE_VRSAVE. 331 MI.eraseFromParent(); 332 } 333 334 // HandleVRSaveUpdate - MI is the UPDATE_VRSAVE instruction introduced by the 335 // instruction selector. Based on the vector registers that have been used, 336 // transform this into the appropriate ORI instruction. 337 static void HandleVRSaveUpdate(MachineInstr &MI, const TargetInstrInfo &TII) { 338 MachineFunction *MF = MI.getParent()->getParent(); 339 const TargetRegisterInfo *TRI = MF->getSubtarget().getRegisterInfo(); 340 DebugLoc dl = MI.getDebugLoc(); 341 342 const MachineRegisterInfo &MRI = MF->getRegInfo(); 343 unsigned UsedRegMask = 0; 344 for (unsigned i = 0; i != 32; ++i) 345 if (MRI.isPhysRegModified(VRRegNo[i])) 346 UsedRegMask |= 1 << (31-i); 347 348 // Live in and live out values already must be in the mask, so don't bother 349 // marking them. 350 for (std::pair<unsigned, unsigned> LI : MF->getRegInfo().liveins()) { 351 unsigned RegNo = TRI->getEncodingValue(LI.first); 352 if (VRRegNo[RegNo] == LI.first) // If this really is a vector reg. 353 UsedRegMask &= ~(1 << (31-RegNo)); // Doesn't need to be marked. 354 } 355 356 // Live out registers appear as use operands on return instructions. 357 for (MachineFunction::const_iterator BI = MF->begin(), BE = MF->end(); 358 UsedRegMask != 0 && BI != BE; ++BI) { 359 const MachineBasicBlock &MBB = *BI; 360 if (!MBB.isReturnBlock()) 361 continue; 362 const MachineInstr &Ret = MBB.back(); 363 for (unsigned I = 0, E = Ret.getNumOperands(); I != E; ++I) { 364 const MachineOperand &MO = Ret.getOperand(I); 365 if (!MO.isReg() || !PPC::VRRCRegClass.contains(MO.getReg())) 366 continue; 367 unsigned RegNo = TRI->getEncodingValue(MO.getReg()); 368 UsedRegMask &= ~(1 << (31-RegNo)); 369 } 370 } 371 372 // If no registers are used, turn this into a copy. 373 if (UsedRegMask == 0) { 374 // Remove all VRSAVE code. 375 RemoveVRSaveCode(MI); 376 return; 377 } 378 379 unsigned SrcReg = MI.getOperand(1).getReg(); 380 unsigned DstReg = MI.getOperand(0).getReg(); 381 382 if ((UsedRegMask & 0xFFFF) == UsedRegMask) { 383 if (DstReg != SrcReg) 384 BuildMI(*MI.getParent(), MI, dl, TII.get(PPC::ORI), DstReg) 385 .addReg(SrcReg) 386 .addImm(UsedRegMask); 387 else 388 BuildMI(*MI.getParent(), MI, dl, TII.get(PPC::ORI), DstReg) 389 .addReg(SrcReg, RegState::Kill) 390 .addImm(UsedRegMask); 391 } else if ((UsedRegMask & 0xFFFF0000) == UsedRegMask) { 392 if (DstReg != SrcReg) 393 BuildMI(*MI.getParent(), MI, dl, TII.get(PPC::ORIS), DstReg) 394 .addReg(SrcReg) 395 .addImm(UsedRegMask >> 16); 396 else 397 BuildMI(*MI.getParent(), MI, dl, TII.get(PPC::ORIS), DstReg) 398 .addReg(SrcReg, RegState::Kill) 399 .addImm(UsedRegMask >> 16); 400 } else { 401 if (DstReg != SrcReg) 402 BuildMI(*MI.getParent(), MI, dl, TII.get(PPC::ORIS), DstReg) 403 .addReg(SrcReg) 404 .addImm(UsedRegMask >> 16); 405 else 406 BuildMI(*MI.getParent(), MI, dl, TII.get(PPC::ORIS), DstReg) 407 .addReg(SrcReg, RegState::Kill) 408 .addImm(UsedRegMask >> 16); 409 410 BuildMI(*MI.getParent(), MI, dl, TII.get(PPC::ORI), DstReg) 411 .addReg(DstReg, RegState::Kill) 412 .addImm(UsedRegMask & 0xFFFF); 413 } 414 415 // Remove the old UPDATE_VRSAVE instruction. 416 MI.eraseFromParent(); 417 } 418 419 static bool spillsCR(const MachineFunction &MF) { 420 const PPCFunctionInfo *FuncInfo = MF.getInfo<PPCFunctionInfo>(); 421 return FuncInfo->isCRSpilled(); 422 } 423 424 static bool spillsVRSAVE(const MachineFunction &MF) { 425 const PPCFunctionInfo *FuncInfo = MF.getInfo<PPCFunctionInfo>(); 426 return FuncInfo->isVRSAVESpilled(); 427 } 428 429 static bool hasSpills(const MachineFunction &MF) { 430 const PPCFunctionInfo *FuncInfo = MF.getInfo<PPCFunctionInfo>(); 431 return FuncInfo->hasSpills(); 432 } 433 434 static bool hasNonRISpills(const MachineFunction &MF) { 435 const PPCFunctionInfo *FuncInfo = MF.getInfo<PPCFunctionInfo>(); 436 return FuncInfo->hasNonRISpills(); 437 } 438 439 /// MustSaveLR - Return true if this function requires that we save the LR 440 /// register onto the stack in the prolog and restore it in the epilog of the 441 /// function. 442 static bool MustSaveLR(const MachineFunction &MF, unsigned LR) { 443 const PPCFunctionInfo *MFI = MF.getInfo<PPCFunctionInfo>(); 444 445 // We need a save/restore of LR if there is any def of LR (which is 446 // defined by calls, including the PIC setup sequence), or if there is 447 // some use of the LR stack slot (e.g. for builtin_return_address). 448 // (LR comes in 32 and 64 bit versions.) 449 MachineRegisterInfo::def_iterator RI = MF.getRegInfo().def_begin(LR); 450 return RI !=MF.getRegInfo().def_end() || MFI->isLRStoreRequired(); 451 } 452 453 /// determineFrameLayoutAndUpdate - Determine the size of the frame and maximum 454 /// call frame size. Update the MachineFunction object with the stack size. 455 unsigned 456 PPCFrameLowering::determineFrameLayoutAndUpdate(MachineFunction &MF, 457 bool UseEstimate) const { 458 unsigned NewMaxCallFrameSize = 0; 459 unsigned FrameSize = determineFrameLayout(MF, UseEstimate, 460 &NewMaxCallFrameSize); 461 MF.getFrameInfo().setStackSize(FrameSize); 462 MF.getFrameInfo().setMaxCallFrameSize(NewMaxCallFrameSize); 463 return FrameSize; 464 } 465 466 /// determineFrameLayout - Determine the size of the frame and maximum call 467 /// frame size. 468 unsigned 469 PPCFrameLowering::determineFrameLayout(const MachineFunction &MF, 470 bool UseEstimate, 471 unsigned *NewMaxCallFrameSize) const { 472 const MachineFrameInfo &MFI = MF.getFrameInfo(); 473 const PPCFunctionInfo *FI = MF.getInfo<PPCFunctionInfo>(); 474 475 // Get the number of bytes to allocate from the FrameInfo 476 unsigned FrameSize = 477 UseEstimate ? MFI.estimateStackSize(MF) : MFI.getStackSize(); 478 479 // Get stack alignments. The frame must be aligned to the greatest of these: 480 unsigned TargetAlign = getStackAlignment(); // alignment required per the ABI 481 unsigned MaxAlign = MFI.getMaxAlignment(); // algmt required by data in frame 482 unsigned AlignMask = std::max(MaxAlign, TargetAlign) - 1; 483 484 const PPCRegisterInfo *RegInfo = Subtarget.getRegisterInfo(); 485 486 unsigned LR = RegInfo->getRARegister(); 487 bool DisableRedZone = MF.getFunction().hasFnAttribute(Attribute::NoRedZone); 488 bool CanUseRedZone = !MFI.hasVarSizedObjects() && // No dynamic alloca. 489 !MFI.adjustsStack() && // No calls. 490 !MustSaveLR(MF, LR) && // No need to save LR. 491 !FI->mustSaveTOC() && // No need to save TOC. 492 !RegInfo->hasBasePointer(MF); // No special alignment. 493 494 // Note: for PPC32 SVR4ABI (Non-DarwinABI), we can still generate stackless 495 // code if all local vars are reg-allocated. 496 bool FitsInRedZone = FrameSize <= Subtarget.getRedZoneSize(); 497 498 // Check whether we can skip adjusting the stack pointer (by using red zone) 499 if (!DisableRedZone && CanUseRedZone && FitsInRedZone) { 500 // No need for frame 501 return 0; 502 } 503 504 // Get the maximum call frame size of all the calls. 505 unsigned maxCallFrameSize = MFI.getMaxCallFrameSize(); 506 507 // Maximum call frame needs to be at least big enough for linkage area. 508 unsigned minCallFrameSize = getLinkageSize(); 509 maxCallFrameSize = std::max(maxCallFrameSize, minCallFrameSize); 510 511 // If we have dynamic alloca then maxCallFrameSize needs to be aligned so 512 // that allocations will be aligned. 513 if (MFI.hasVarSizedObjects()) 514 maxCallFrameSize = (maxCallFrameSize + AlignMask) & ~AlignMask; 515 516 // Update the new max call frame size if the caller passes in a valid pointer. 517 if (NewMaxCallFrameSize) 518 *NewMaxCallFrameSize = maxCallFrameSize; 519 520 // Include call frame size in total. 521 FrameSize += maxCallFrameSize; 522 523 // Make sure the frame is aligned. 524 FrameSize = (FrameSize + AlignMask) & ~AlignMask; 525 526 return FrameSize; 527 } 528 529 // hasFP - Return true if the specified function actually has a dedicated frame 530 // pointer register. 531 bool PPCFrameLowering::hasFP(const MachineFunction &MF) const { 532 const MachineFrameInfo &MFI = MF.getFrameInfo(); 533 // FIXME: This is pretty much broken by design: hasFP() might be called really 534 // early, before the stack layout was calculated and thus hasFP() might return 535 // true or false here depending on the time of call. 536 return (MFI.getStackSize()) && needsFP(MF); 537 } 538 539 // needsFP - Return true if the specified function should have a dedicated frame 540 // pointer register. This is true if the function has variable sized allocas or 541 // if frame pointer elimination is disabled. 542 bool PPCFrameLowering::needsFP(const MachineFunction &MF) const { 543 const MachineFrameInfo &MFI = MF.getFrameInfo(); 544 545 // Naked functions have no stack frame pushed, so we don't have a frame 546 // pointer. 547 if (MF.getFunction().hasFnAttribute(Attribute::Naked)) 548 return false; 549 550 return MF.getTarget().Options.DisableFramePointerElim(MF) || 551 MFI.hasVarSizedObjects() || MFI.hasStackMap() || MFI.hasPatchPoint() || 552 (MF.getTarget().Options.GuaranteedTailCallOpt && 553 MF.getInfo<PPCFunctionInfo>()->hasFastCall()); 554 } 555 556 void PPCFrameLowering::replaceFPWithRealFP(MachineFunction &MF) const { 557 bool is31 = needsFP(MF); 558 unsigned FPReg = is31 ? PPC::R31 : PPC::R1; 559 unsigned FP8Reg = is31 ? PPC::X31 : PPC::X1; 560 561 const PPCRegisterInfo *RegInfo = Subtarget.getRegisterInfo(); 562 bool HasBP = RegInfo->hasBasePointer(MF); 563 unsigned BPReg = HasBP ? (unsigned) RegInfo->getBaseRegister(MF) : FPReg; 564 unsigned BP8Reg = HasBP ? (unsigned) PPC::X30 : FP8Reg; 565 566 for (MachineFunction::iterator BI = MF.begin(), BE = MF.end(); 567 BI != BE; ++BI) 568 for (MachineBasicBlock::iterator MBBI = BI->end(); MBBI != BI->begin(); ) { 569 --MBBI; 570 for (unsigned I = 0, E = MBBI->getNumOperands(); I != E; ++I) { 571 MachineOperand &MO = MBBI->getOperand(I); 572 if (!MO.isReg()) 573 continue; 574 575 switch (MO.getReg()) { 576 case PPC::FP: 577 MO.setReg(FPReg); 578 break; 579 case PPC::FP8: 580 MO.setReg(FP8Reg); 581 break; 582 case PPC::BP: 583 MO.setReg(BPReg); 584 break; 585 case PPC::BP8: 586 MO.setReg(BP8Reg); 587 break; 588 589 } 590 } 591 } 592 } 593 594 /* This function will do the following: 595 - If MBB is an entry or exit block, set SR1 and SR2 to R0 and R12 596 respectively (defaults recommended by the ABI) and return true 597 - If MBB is not an entry block, initialize the register scavenger and look 598 for available registers. 599 - If the defaults (R0/R12) are available, return true 600 - If TwoUniqueRegsRequired is set to true, it looks for two unique 601 registers. Otherwise, look for a single available register. 602 - If the required registers are found, set SR1 and SR2 and return true. 603 - If the required registers are not found, set SR2 or both SR1 and SR2 to 604 PPC::NoRegister and return false. 605 606 Note that if both SR1 and SR2 are valid parameters and TwoUniqueRegsRequired 607 is not set, this function will attempt to find two different registers, but 608 still return true if only one register is available (and set SR1 == SR2). 609 */ 610 bool 611 PPCFrameLowering::findScratchRegister(MachineBasicBlock *MBB, 612 bool UseAtEnd, 613 bool TwoUniqueRegsRequired, 614 unsigned *SR1, 615 unsigned *SR2) const { 616 RegScavenger RS; 617 unsigned R0 = Subtarget.isPPC64() ? PPC::X0 : PPC::R0; 618 unsigned R12 = Subtarget.isPPC64() ? PPC::X12 : PPC::R12; 619 620 // Set the defaults for the two scratch registers. 621 if (SR1) 622 *SR1 = R0; 623 624 if (SR2) { 625 assert (SR1 && "Asking for the second scratch register but not the first?"); 626 *SR2 = R12; 627 } 628 629 // If MBB is an entry or exit block, use R0 and R12 as the scratch registers. 630 if ((UseAtEnd && MBB->isReturnBlock()) || 631 (!UseAtEnd && (&MBB->getParent()->front() == MBB))) 632 return true; 633 634 RS.enterBasicBlock(*MBB); 635 636 if (UseAtEnd && !MBB->empty()) { 637 // The scratch register will be used at the end of the block, so must 638 // consider all registers used within the block 639 640 MachineBasicBlock::iterator MBBI = MBB->getFirstTerminator(); 641 // If no terminator, back iterator up to previous instruction. 642 if (MBBI == MBB->end()) 643 MBBI = std::prev(MBBI); 644 645 if (MBBI != MBB->begin()) 646 RS.forward(MBBI); 647 } 648 649 // If the two registers are available, we're all good. 650 // Note that we only return here if both R0 and R12 are available because 651 // although the function may not require two unique registers, it may benefit 652 // from having two so we should try to provide them. 653 if (!RS.isRegUsed(R0) && !RS.isRegUsed(R12)) 654 return true; 655 656 // Get the list of callee-saved registers for the target. 657 const PPCRegisterInfo *RegInfo = Subtarget.getRegisterInfo(); 658 const MCPhysReg *CSRegs = RegInfo->getCalleeSavedRegs(MBB->getParent()); 659 660 // Get all the available registers in the block. 661 BitVector BV = RS.getRegsAvailable(Subtarget.isPPC64() ? &PPC::G8RCRegClass : 662 &PPC::GPRCRegClass); 663 664 // We shouldn't use callee-saved registers as scratch registers as they may be 665 // available when looking for a candidate block for shrink wrapping but not 666 // available when the actual prologue/epilogue is being emitted because they 667 // were added as live-in to the prologue block by PrologueEpilogueInserter. 668 for (int i = 0; CSRegs[i]; ++i) 669 BV.reset(CSRegs[i]); 670 671 // Set the first scratch register to the first available one. 672 if (SR1) { 673 int FirstScratchReg = BV.find_first(); 674 *SR1 = FirstScratchReg == -1 ? (unsigned)PPC::NoRegister : FirstScratchReg; 675 } 676 677 // If there is another one available, set the second scratch register to that. 678 // Otherwise, set it to either PPC::NoRegister if this function requires two 679 // or to whatever SR1 is set to if this function doesn't require two. 680 if (SR2) { 681 int SecondScratchReg = BV.find_next(*SR1); 682 if (SecondScratchReg != -1) 683 *SR2 = SecondScratchReg; 684 else 685 *SR2 = TwoUniqueRegsRequired ? (unsigned)PPC::NoRegister : *SR1; 686 } 687 688 // Now that we've done our best to provide both registers, double check 689 // whether we were unable to provide enough. 690 if (BV.count() < (TwoUniqueRegsRequired ? 2U : 1U)) 691 return false; 692 693 return true; 694 } 695 696 // We need a scratch register for spilling LR and for spilling CR. By default, 697 // we use two scratch registers to hide latency. However, if only one scratch 698 // register is available, we can adjust for that by not overlapping the spill 699 // code. However, if we need to realign the stack (i.e. have a base pointer) 700 // and the stack frame is large, we need two scratch registers. 701 bool 702 PPCFrameLowering::twoUniqueScratchRegsRequired(MachineBasicBlock *MBB) const { 703 const PPCRegisterInfo *RegInfo = Subtarget.getRegisterInfo(); 704 MachineFunction &MF = *(MBB->getParent()); 705 bool HasBP = RegInfo->hasBasePointer(MF); 706 unsigned FrameSize = determineFrameLayout(MF); 707 int NegFrameSize = -FrameSize; 708 bool IsLargeFrame = !isInt<16>(NegFrameSize); 709 MachineFrameInfo &MFI = MF.getFrameInfo(); 710 unsigned MaxAlign = MFI.getMaxAlignment(); 711 bool HasRedZone = Subtarget.isPPC64() || !Subtarget.isSVR4ABI(); 712 713 return (IsLargeFrame || !HasRedZone) && HasBP && MaxAlign > 1; 714 } 715 716 bool PPCFrameLowering::canUseAsPrologue(const MachineBasicBlock &MBB) const { 717 MachineBasicBlock *TmpMBB = const_cast<MachineBasicBlock *>(&MBB); 718 719 return findScratchRegister(TmpMBB, false, 720 twoUniqueScratchRegsRequired(TmpMBB)); 721 } 722 723 bool PPCFrameLowering::canUseAsEpilogue(const MachineBasicBlock &MBB) const { 724 MachineBasicBlock *TmpMBB = const_cast<MachineBasicBlock *>(&MBB); 725 726 return findScratchRegister(TmpMBB, true); 727 } 728 729 bool PPCFrameLowering::stackUpdateCanBeMoved(MachineFunction &MF) const { 730 const PPCRegisterInfo *RegInfo = Subtarget.getRegisterInfo(); 731 PPCFunctionInfo *FI = MF.getInfo<PPCFunctionInfo>(); 732 733 // Abort if there is no register info or function info. 734 if (!RegInfo || !FI) 735 return false; 736 737 // Only move the stack update on ELFv2 ABI and PPC64. 738 if (!Subtarget.isELFv2ABI() || !Subtarget.isPPC64()) 739 return false; 740 741 // Check the frame size first and return false if it does not fit the 742 // requirements. 743 // We need a non-zero frame size as well as a frame that will fit in the red 744 // zone. This is because by moving the stack pointer update we are now storing 745 // to the red zone until the stack pointer is updated. If we get an interrupt 746 // inside the prologue but before the stack update we now have a number of 747 // stores to the red zone and those stores must all fit. 748 MachineFrameInfo &MFI = MF.getFrameInfo(); 749 unsigned FrameSize = MFI.getStackSize(); 750 if (!FrameSize || FrameSize > Subtarget.getRedZoneSize()) 751 return false; 752 753 // Frame pointers and base pointers complicate matters so don't do anything 754 // if we have them. For example having a frame pointer will sometimes require 755 // a copy of r1 into r31 and that makes keeping track of updates to r1 more 756 // difficult. 757 if (hasFP(MF) || RegInfo->hasBasePointer(MF)) 758 return false; 759 760 // Calls to fast_cc functions use different rules for passing parameters on 761 // the stack from the ABI and using PIC base in the function imposes 762 // similar restrictions to using the base pointer. It is not generally safe 763 // to move the stack pointer update in these situations. 764 if (FI->hasFastCall() || FI->usesPICBase()) 765 return false; 766 767 // Finally we can move the stack update if we do not require register 768 // scavenging. Register scavenging can introduce more spills and so 769 // may make the frame size larger than we have computed. 770 return !RegInfo->requiresFrameIndexScavenging(MF); 771 } 772 773 void PPCFrameLowering::emitPrologue(MachineFunction &MF, 774 MachineBasicBlock &MBB) const { 775 MachineBasicBlock::iterator MBBI = MBB.begin(); 776 MachineFrameInfo &MFI = MF.getFrameInfo(); 777 const PPCInstrInfo &TII = *Subtarget.getInstrInfo(); 778 const PPCRegisterInfo *RegInfo = Subtarget.getRegisterInfo(); 779 780 MachineModuleInfo &MMI = MF.getMMI(); 781 const MCRegisterInfo *MRI = MMI.getContext().getRegisterInfo(); 782 DebugLoc dl; 783 bool needsCFI = MMI.hasDebugInfo() || 784 MF.getFunction().needsUnwindTableEntry(); 785 786 // Get processor type. 787 bool isPPC64 = Subtarget.isPPC64(); 788 // Get the ABI. 789 bool isSVR4ABI = Subtarget.isSVR4ABI(); 790 bool isELFv2ABI = Subtarget.isELFv2ABI(); 791 assert((Subtarget.isDarwinABI() || isSVR4ABI) && 792 "Currently only Darwin and SVR4 ABIs are supported for PowerPC."); 793 794 // Scan the prolog, looking for an UPDATE_VRSAVE instruction. If we find it, 795 // process it. 796 if (!isSVR4ABI) 797 for (unsigned i = 0; MBBI != MBB.end(); ++i, ++MBBI) { 798 if (MBBI->getOpcode() == PPC::UPDATE_VRSAVE) { 799 HandleVRSaveUpdate(*MBBI, TII); 800 break; 801 } 802 } 803 804 // Move MBBI back to the beginning of the prologue block. 805 MBBI = MBB.begin(); 806 807 // Work out frame sizes. 808 unsigned FrameSize = determineFrameLayoutAndUpdate(MF); 809 int NegFrameSize = -FrameSize; 810 if (!isInt<32>(NegFrameSize)) 811 llvm_unreachable("Unhandled stack size!"); 812 813 if (MFI.isFrameAddressTaken()) 814 replaceFPWithRealFP(MF); 815 816 // Check if the link register (LR) must be saved. 817 PPCFunctionInfo *FI = MF.getInfo<PPCFunctionInfo>(); 818 bool MustSaveLR = FI->mustSaveLR(); 819 bool MustSaveTOC = FI->mustSaveTOC(); 820 const SmallVectorImpl<unsigned> &MustSaveCRs = FI->getMustSaveCRs(); 821 bool MustSaveCR = !MustSaveCRs.empty(); 822 // Do we have a frame pointer and/or base pointer for this function? 823 bool HasFP = hasFP(MF); 824 bool HasBP = RegInfo->hasBasePointer(MF); 825 bool HasRedZone = isPPC64 || !isSVR4ABI; 826 827 unsigned SPReg = isPPC64 ? PPC::X1 : PPC::R1; 828 unsigned BPReg = RegInfo->getBaseRegister(MF); 829 unsigned FPReg = isPPC64 ? PPC::X31 : PPC::R31; 830 unsigned LRReg = isPPC64 ? PPC::LR8 : PPC::LR; 831 unsigned TOCReg = isPPC64 ? PPC::X2 : PPC::R2; 832 unsigned ScratchReg = 0; 833 unsigned TempReg = isPPC64 ? PPC::X12 : PPC::R12; // another scratch reg 834 // ...(R12/X12 is volatile in both Darwin & SVR4, & can't be a function arg.) 835 const MCInstrDesc& MFLRInst = TII.get(isPPC64 ? PPC::MFLR8 836 : PPC::MFLR ); 837 const MCInstrDesc& StoreInst = TII.get(isPPC64 ? PPC::STD 838 : PPC::STW ); 839 const MCInstrDesc& StoreUpdtInst = TII.get(isPPC64 ? PPC::STDU 840 : PPC::STWU ); 841 const MCInstrDesc& StoreUpdtIdxInst = TII.get(isPPC64 ? PPC::STDUX 842 : PPC::STWUX); 843 const MCInstrDesc& LoadImmShiftedInst = TII.get(isPPC64 ? PPC::LIS8 844 : PPC::LIS ); 845 const MCInstrDesc& OrImmInst = TII.get(isPPC64 ? PPC::ORI8 846 : PPC::ORI ); 847 const MCInstrDesc& OrInst = TII.get(isPPC64 ? PPC::OR8 848 : PPC::OR ); 849 const MCInstrDesc& SubtractCarryingInst = TII.get(isPPC64 ? PPC::SUBFC8 850 : PPC::SUBFC); 851 const MCInstrDesc& SubtractImmCarryingInst = TII.get(isPPC64 ? PPC::SUBFIC8 852 : PPC::SUBFIC); 853 854 // Regarding this assert: Even though LR is saved in the caller's frame (i.e., 855 // LROffset is positive), that slot is callee-owned. Because PPC32 SVR4 has no 856 // Red Zone, an asynchronous event (a form of "callee") could claim a frame & 857 // overwrite it, so PPC32 SVR4 must claim at least a minimal frame to save LR. 858 assert((isPPC64 || !isSVR4ABI || !(!FrameSize && (MustSaveLR || HasFP))) && 859 "FrameSize must be >0 to save/restore the FP or LR for 32-bit SVR4."); 860 861 // Using the same bool variable as below to suppress compiler warnings. 862 bool SingleScratchReg = 863 findScratchRegister(&MBB, false, twoUniqueScratchRegsRequired(&MBB), 864 &ScratchReg, &TempReg); 865 assert(SingleScratchReg && 866 "Required number of registers not available in this block"); 867 868 SingleScratchReg = ScratchReg == TempReg; 869 870 int LROffset = getReturnSaveOffset(); 871 872 int FPOffset = 0; 873 if (HasFP) { 874 if (isSVR4ABI) { 875 MachineFrameInfo &MFI = MF.getFrameInfo(); 876 int FPIndex = FI->getFramePointerSaveIndex(); 877 assert(FPIndex && "No Frame Pointer Save Slot!"); 878 FPOffset = MFI.getObjectOffset(FPIndex); 879 } else { 880 FPOffset = getFramePointerSaveOffset(); 881 } 882 } 883 884 int BPOffset = 0; 885 if (HasBP) { 886 if (isSVR4ABI) { 887 MachineFrameInfo &MFI = MF.getFrameInfo(); 888 int BPIndex = FI->getBasePointerSaveIndex(); 889 assert(BPIndex && "No Base Pointer Save Slot!"); 890 BPOffset = MFI.getObjectOffset(BPIndex); 891 } else { 892 BPOffset = getBasePointerSaveOffset(); 893 } 894 } 895 896 int PBPOffset = 0; 897 if (FI->usesPICBase()) { 898 MachineFrameInfo &MFI = MF.getFrameInfo(); 899 int PBPIndex = FI->getPICBasePointerSaveIndex(); 900 assert(PBPIndex && "No PIC Base Pointer Save Slot!"); 901 PBPOffset = MFI.getObjectOffset(PBPIndex); 902 } 903 904 // Get stack alignments. 905 unsigned MaxAlign = MFI.getMaxAlignment(); 906 if (HasBP && MaxAlign > 1) 907 assert(isPowerOf2_32(MaxAlign) && isInt<16>(MaxAlign) && 908 "Invalid alignment!"); 909 910 // Frames of 32KB & larger require special handling because they cannot be 911 // indexed into with a simple STDU/STWU/STD/STW immediate offset operand. 912 bool isLargeFrame = !isInt<16>(NegFrameSize); 913 914 assert((isPPC64 || !MustSaveCR) && 915 "Prologue CR saving supported only in 64-bit mode"); 916 917 // Check if we can move the stack update instruction (stdu) down the prologue 918 // past the callee saves. Hopefully this will avoid the situation where the 919 // saves are waiting for the update on the store with update to complete. 920 MachineBasicBlock::iterator StackUpdateLoc = MBBI; 921 bool MovingStackUpdateDown = false; 922 923 // Check if we can move the stack update. 924 if (stackUpdateCanBeMoved(MF)) { 925 const std::vector<CalleeSavedInfo> &Info = MFI.getCalleeSavedInfo(); 926 for (CalleeSavedInfo CSI : Info) { 927 int FrIdx = CSI.getFrameIdx(); 928 // If the frame index is not negative the callee saved info belongs to a 929 // stack object that is not a fixed stack object. We ignore non-fixed 930 // stack objects because we won't move the stack update pointer past them. 931 if (FrIdx >= 0) 932 continue; 933 934 if (MFI.isFixedObjectIndex(FrIdx) && MFI.getObjectOffset(FrIdx) < 0) { 935 StackUpdateLoc++; 936 MovingStackUpdateDown = true; 937 } else { 938 // We need all of the Frame Indices to meet these conditions. 939 // If they do not, abort the whole operation. 940 StackUpdateLoc = MBBI; 941 MovingStackUpdateDown = false; 942 break; 943 } 944 } 945 946 // If the operation was not aborted then update the object offset. 947 if (MovingStackUpdateDown) { 948 for (CalleeSavedInfo CSI : Info) { 949 int FrIdx = CSI.getFrameIdx(); 950 if (FrIdx < 0) 951 MFI.setObjectOffset(FrIdx, MFI.getObjectOffset(FrIdx) + NegFrameSize); 952 } 953 } 954 } 955 956 // If we need to spill the CR and the LR but we don't have two separate 957 // registers available, we must spill them one at a time 958 if (MustSaveCR && SingleScratchReg && MustSaveLR) { 959 // In the ELFv2 ABI, we are not required to save all CR fields. 960 // If only one or two CR fields are clobbered, it is more efficient to use 961 // mfocrf to selectively save just those fields, because mfocrf has short 962 // latency compares to mfcr. 963 unsigned MfcrOpcode = PPC::MFCR8; 964 unsigned CrState = RegState::ImplicitKill; 965 if (isELFv2ABI && MustSaveCRs.size() == 1) { 966 MfcrOpcode = PPC::MFOCRF8; 967 CrState = RegState::Kill; 968 } 969 MachineInstrBuilder MIB = 970 BuildMI(MBB, MBBI, dl, TII.get(MfcrOpcode), TempReg); 971 for (unsigned i = 0, e = MustSaveCRs.size(); i != e; ++i) 972 MIB.addReg(MustSaveCRs[i], CrState); 973 BuildMI(MBB, MBBI, dl, TII.get(PPC::STW8)) 974 .addReg(TempReg, getKillRegState(true)) 975 .addImm(getCRSaveOffset()) 976 .addReg(SPReg); 977 } 978 979 if (MustSaveLR) 980 BuildMI(MBB, MBBI, dl, MFLRInst, ScratchReg); 981 982 if (MustSaveCR && 983 !(SingleScratchReg && MustSaveLR)) { // will only occur for PPC64 984 // In the ELFv2 ABI, we are not required to save all CR fields. 985 // If only one or two CR fields are clobbered, it is more efficient to use 986 // mfocrf to selectively save just those fields, because mfocrf has short 987 // latency compares to mfcr. 988 unsigned MfcrOpcode = PPC::MFCR8; 989 unsigned CrState = RegState::ImplicitKill; 990 if (isELFv2ABI && MustSaveCRs.size() == 1) { 991 MfcrOpcode = PPC::MFOCRF8; 992 CrState = RegState::Kill; 993 } 994 MachineInstrBuilder MIB = 995 BuildMI(MBB, MBBI, dl, TII.get(MfcrOpcode), TempReg); 996 for (unsigned i = 0, e = MustSaveCRs.size(); i != e; ++i) 997 MIB.addReg(MustSaveCRs[i], CrState); 998 } 999 1000 if (HasRedZone) { 1001 if (HasFP) 1002 BuildMI(MBB, MBBI, dl, StoreInst) 1003 .addReg(FPReg) 1004 .addImm(FPOffset) 1005 .addReg(SPReg); 1006 if (FI->usesPICBase()) 1007 BuildMI(MBB, MBBI, dl, StoreInst) 1008 .addReg(PPC::R30) 1009 .addImm(PBPOffset) 1010 .addReg(SPReg); 1011 if (HasBP) 1012 BuildMI(MBB, MBBI, dl, StoreInst) 1013 .addReg(BPReg) 1014 .addImm(BPOffset) 1015 .addReg(SPReg); 1016 } 1017 1018 if (MustSaveLR) 1019 BuildMI(MBB, StackUpdateLoc, dl, StoreInst) 1020 .addReg(ScratchReg, getKillRegState(true)) 1021 .addImm(LROffset) 1022 .addReg(SPReg); 1023 1024 if (MustSaveCR && 1025 !(SingleScratchReg && MustSaveLR)) { // will only occur for PPC64 1026 assert(HasRedZone && "A red zone is always available on PPC64"); 1027 BuildMI(MBB, MBBI, dl, TII.get(PPC::STW8)) 1028 .addReg(TempReg, getKillRegState(true)) 1029 .addImm(getCRSaveOffset()) 1030 .addReg(SPReg); 1031 } 1032 1033 // Skip the rest if this is a leaf function & all spills fit in the Red Zone. 1034 if (!FrameSize) 1035 return; 1036 1037 // Adjust stack pointer: r1 += NegFrameSize. 1038 // If there is a preferred stack alignment, align R1 now 1039 1040 if (HasBP && HasRedZone) { 1041 // Save a copy of r1 as the base pointer. 1042 BuildMI(MBB, MBBI, dl, OrInst, BPReg) 1043 .addReg(SPReg) 1044 .addReg(SPReg); 1045 } 1046 1047 // Have we generated a STUX instruction to claim stack frame? If so, 1048 // the negated frame size will be placed in ScratchReg. 1049 bool HasSTUX = false; 1050 1051 // This condition must be kept in sync with canUseAsPrologue. 1052 if (HasBP && MaxAlign > 1) { 1053 if (isPPC64) 1054 BuildMI(MBB, MBBI, dl, TII.get(PPC::RLDICL), ScratchReg) 1055 .addReg(SPReg) 1056 .addImm(0) 1057 .addImm(64 - Log2_32(MaxAlign)); 1058 else // PPC32... 1059 BuildMI(MBB, MBBI, dl, TII.get(PPC::RLWINM), ScratchReg) 1060 .addReg(SPReg) 1061 .addImm(0) 1062 .addImm(32 - Log2_32(MaxAlign)) 1063 .addImm(31); 1064 if (!isLargeFrame) { 1065 BuildMI(MBB, MBBI, dl, SubtractImmCarryingInst, ScratchReg) 1066 .addReg(ScratchReg, RegState::Kill) 1067 .addImm(NegFrameSize); 1068 } else { 1069 assert(!SingleScratchReg && "Only a single scratch reg available"); 1070 BuildMI(MBB, MBBI, dl, LoadImmShiftedInst, TempReg) 1071 .addImm(NegFrameSize >> 16); 1072 BuildMI(MBB, MBBI, dl, OrImmInst, TempReg) 1073 .addReg(TempReg, RegState::Kill) 1074 .addImm(NegFrameSize & 0xFFFF); 1075 BuildMI(MBB, MBBI, dl, SubtractCarryingInst, ScratchReg) 1076 .addReg(ScratchReg, RegState::Kill) 1077 .addReg(TempReg, RegState::Kill); 1078 } 1079 1080 BuildMI(MBB, MBBI, dl, StoreUpdtIdxInst, SPReg) 1081 .addReg(SPReg, RegState::Kill) 1082 .addReg(SPReg) 1083 .addReg(ScratchReg); 1084 HasSTUX = true; 1085 1086 } else if (!isLargeFrame) { 1087 BuildMI(MBB, StackUpdateLoc, dl, StoreUpdtInst, SPReg) 1088 .addReg(SPReg) 1089 .addImm(NegFrameSize) 1090 .addReg(SPReg); 1091 1092 } else { 1093 BuildMI(MBB, MBBI, dl, LoadImmShiftedInst, ScratchReg) 1094 .addImm(NegFrameSize >> 16); 1095 BuildMI(MBB, MBBI, dl, OrImmInst, ScratchReg) 1096 .addReg(ScratchReg, RegState::Kill) 1097 .addImm(NegFrameSize & 0xFFFF); 1098 BuildMI(MBB, MBBI, dl, StoreUpdtIdxInst, SPReg) 1099 .addReg(SPReg, RegState::Kill) 1100 .addReg(SPReg) 1101 .addReg(ScratchReg); 1102 HasSTUX = true; 1103 } 1104 1105 // Save the TOC register after the stack pointer update if a prologue TOC 1106 // save is required for the function. 1107 if (MustSaveTOC) { 1108 assert(isELFv2ABI && "TOC saves in the prologue only supported on ELFv2"); 1109 BuildMI(MBB, StackUpdateLoc, dl, TII.get(PPC::STD)) 1110 .addReg(TOCReg, getKillRegState(true)) 1111 .addImm(TOCSaveOffset) 1112 .addReg(SPReg); 1113 } 1114 1115 if (!HasRedZone) { 1116 assert(!isPPC64 && "A red zone is always available on PPC64"); 1117 if (HasSTUX) { 1118 // The negated frame size is in ScratchReg, and the SPReg has been 1119 // decremented by the frame size: SPReg = old SPReg + ScratchReg. 1120 // Since FPOffset, PBPOffset, etc. are relative to the beginning of 1121 // the stack frame (i.e. the old SP), ideally, we would put the old 1122 // SP into a register and use it as the base for the stores. The 1123 // problem is that the only available register may be ScratchReg, 1124 // which could be R0, and R0 cannot be used as a base address. 1125 1126 // First, set ScratchReg to the old SP. This may need to be modified 1127 // later. 1128 BuildMI(MBB, MBBI, dl, TII.get(PPC::SUBF), ScratchReg) 1129 .addReg(ScratchReg, RegState::Kill) 1130 .addReg(SPReg); 1131 1132 if (ScratchReg == PPC::R0) { 1133 // R0 cannot be used as a base register, but it can be used as an 1134 // index in a store-indexed. 1135 int LastOffset = 0; 1136 if (HasFP) { 1137 // R0 += (FPOffset-LastOffset). 1138 // Need addic, since addi treats R0 as 0. 1139 BuildMI(MBB, MBBI, dl, TII.get(PPC::ADDIC), ScratchReg) 1140 .addReg(ScratchReg) 1141 .addImm(FPOffset-LastOffset); 1142 LastOffset = FPOffset; 1143 // Store FP into *R0. 1144 BuildMI(MBB, MBBI, dl, TII.get(PPC::STWX)) 1145 .addReg(FPReg, RegState::Kill) // Save FP. 1146 .addReg(PPC::ZERO) 1147 .addReg(ScratchReg); // This will be the index (R0 is ok here). 1148 } 1149 if (FI->usesPICBase()) { 1150 // R0 += (PBPOffset-LastOffset). 1151 BuildMI(MBB, MBBI, dl, TII.get(PPC::ADDIC), ScratchReg) 1152 .addReg(ScratchReg) 1153 .addImm(PBPOffset-LastOffset); 1154 LastOffset = PBPOffset; 1155 BuildMI(MBB, MBBI, dl, TII.get(PPC::STWX)) 1156 .addReg(PPC::R30, RegState::Kill) // Save PIC base pointer. 1157 .addReg(PPC::ZERO) 1158 .addReg(ScratchReg); // This will be the index (R0 is ok here). 1159 } 1160 if (HasBP) { 1161 // R0 += (BPOffset-LastOffset). 1162 BuildMI(MBB, MBBI, dl, TII.get(PPC::ADDIC), ScratchReg) 1163 .addReg(ScratchReg) 1164 .addImm(BPOffset-LastOffset); 1165 LastOffset = BPOffset; 1166 BuildMI(MBB, MBBI, dl, TII.get(PPC::STWX)) 1167 .addReg(BPReg, RegState::Kill) // Save BP. 1168 .addReg(PPC::ZERO) 1169 .addReg(ScratchReg); // This will be the index (R0 is ok here). 1170 // BP = R0-LastOffset 1171 BuildMI(MBB, MBBI, dl, TII.get(PPC::ADDIC), BPReg) 1172 .addReg(ScratchReg, RegState::Kill) 1173 .addImm(-LastOffset); 1174 } 1175 } else { 1176 // ScratchReg is not R0, so use it as the base register. It is 1177 // already set to the old SP, so we can use the offsets directly. 1178 1179 // Now that the stack frame has been allocated, save all the necessary 1180 // registers using ScratchReg as the base address. 1181 if (HasFP) 1182 BuildMI(MBB, MBBI, dl, StoreInst) 1183 .addReg(FPReg) 1184 .addImm(FPOffset) 1185 .addReg(ScratchReg); 1186 if (FI->usesPICBase()) 1187 BuildMI(MBB, MBBI, dl, StoreInst) 1188 .addReg(PPC::R30) 1189 .addImm(PBPOffset) 1190 .addReg(ScratchReg); 1191 if (HasBP) { 1192 BuildMI(MBB, MBBI, dl, StoreInst) 1193 .addReg(BPReg) 1194 .addImm(BPOffset) 1195 .addReg(ScratchReg); 1196 BuildMI(MBB, MBBI, dl, OrInst, BPReg) 1197 .addReg(ScratchReg, RegState::Kill) 1198 .addReg(ScratchReg); 1199 } 1200 } 1201 } else { 1202 // The frame size is a known 16-bit constant (fitting in the immediate 1203 // field of STWU). To be here we have to be compiling for PPC32. 1204 // Since the SPReg has been decreased by FrameSize, add it back to each 1205 // offset. 1206 if (HasFP) 1207 BuildMI(MBB, MBBI, dl, StoreInst) 1208 .addReg(FPReg) 1209 .addImm(FrameSize + FPOffset) 1210 .addReg(SPReg); 1211 if (FI->usesPICBase()) 1212 BuildMI(MBB, MBBI, dl, StoreInst) 1213 .addReg(PPC::R30) 1214 .addImm(FrameSize + PBPOffset) 1215 .addReg(SPReg); 1216 if (HasBP) { 1217 BuildMI(MBB, MBBI, dl, StoreInst) 1218 .addReg(BPReg) 1219 .addImm(FrameSize + BPOffset) 1220 .addReg(SPReg); 1221 BuildMI(MBB, MBBI, dl, TII.get(PPC::ADDI), BPReg) 1222 .addReg(SPReg) 1223 .addImm(FrameSize); 1224 } 1225 } 1226 } 1227 1228 // Add Call Frame Information for the instructions we generated above. 1229 if (needsCFI) { 1230 unsigned CFIIndex; 1231 1232 if (HasBP) { 1233 // Define CFA in terms of BP. Do this in preference to using FP/SP, 1234 // because if the stack needed aligning then CFA won't be at a fixed 1235 // offset from FP/SP. 1236 unsigned Reg = MRI->getDwarfRegNum(BPReg, true); 1237 CFIIndex = MF.addFrameInst( 1238 MCCFIInstruction::createDefCfaRegister(nullptr, Reg)); 1239 } else { 1240 // Adjust the definition of CFA to account for the change in SP. 1241 assert(NegFrameSize); 1242 CFIIndex = MF.addFrameInst( 1243 MCCFIInstruction::createDefCfaOffset(nullptr, NegFrameSize)); 1244 } 1245 BuildMI(MBB, MBBI, dl, TII.get(TargetOpcode::CFI_INSTRUCTION)) 1246 .addCFIIndex(CFIIndex); 1247 1248 if (HasFP) { 1249 // Describe where FP was saved, at a fixed offset from CFA. 1250 unsigned Reg = MRI->getDwarfRegNum(FPReg, true); 1251 CFIIndex = MF.addFrameInst( 1252 MCCFIInstruction::createOffset(nullptr, Reg, FPOffset)); 1253 BuildMI(MBB, MBBI, dl, TII.get(TargetOpcode::CFI_INSTRUCTION)) 1254 .addCFIIndex(CFIIndex); 1255 } 1256 1257 if (FI->usesPICBase()) { 1258 // Describe where FP was saved, at a fixed offset from CFA. 1259 unsigned Reg = MRI->getDwarfRegNum(PPC::R30, true); 1260 CFIIndex = MF.addFrameInst( 1261 MCCFIInstruction::createOffset(nullptr, Reg, PBPOffset)); 1262 BuildMI(MBB, MBBI, dl, TII.get(TargetOpcode::CFI_INSTRUCTION)) 1263 .addCFIIndex(CFIIndex); 1264 } 1265 1266 if (HasBP) { 1267 // Describe where BP was saved, at a fixed offset from CFA. 1268 unsigned Reg = MRI->getDwarfRegNum(BPReg, true); 1269 CFIIndex = MF.addFrameInst( 1270 MCCFIInstruction::createOffset(nullptr, Reg, BPOffset)); 1271 BuildMI(MBB, MBBI, dl, TII.get(TargetOpcode::CFI_INSTRUCTION)) 1272 .addCFIIndex(CFIIndex); 1273 } 1274 1275 if (MustSaveLR) { 1276 // Describe where LR was saved, at a fixed offset from CFA. 1277 unsigned Reg = MRI->getDwarfRegNum(LRReg, true); 1278 CFIIndex = MF.addFrameInst( 1279 MCCFIInstruction::createOffset(nullptr, Reg, LROffset)); 1280 BuildMI(MBB, MBBI, dl, TII.get(TargetOpcode::CFI_INSTRUCTION)) 1281 .addCFIIndex(CFIIndex); 1282 } 1283 } 1284 1285 // If there is a frame pointer, copy R1 into R31 1286 if (HasFP) { 1287 BuildMI(MBB, MBBI, dl, OrInst, FPReg) 1288 .addReg(SPReg) 1289 .addReg(SPReg); 1290 1291 if (!HasBP && needsCFI) { 1292 // Change the definition of CFA from SP+offset to FP+offset, because SP 1293 // will change at every alloca. 1294 unsigned Reg = MRI->getDwarfRegNum(FPReg, true); 1295 unsigned CFIIndex = MF.addFrameInst( 1296 MCCFIInstruction::createDefCfaRegister(nullptr, Reg)); 1297 1298 BuildMI(MBB, MBBI, dl, TII.get(TargetOpcode::CFI_INSTRUCTION)) 1299 .addCFIIndex(CFIIndex); 1300 } 1301 } 1302 1303 if (needsCFI) { 1304 // Describe where callee saved registers were saved, at fixed offsets from 1305 // CFA. 1306 const std::vector<CalleeSavedInfo> &CSI = MFI.getCalleeSavedInfo(); 1307 for (unsigned I = 0, E = CSI.size(); I != E; ++I) { 1308 unsigned Reg = CSI[I].getReg(); 1309 if (Reg == PPC::LR || Reg == PPC::LR8 || Reg == PPC::RM) continue; 1310 1311 // This is a bit of a hack: CR2LT, CR2GT, CR2EQ and CR2UN are just 1312 // subregisters of CR2. We just need to emit a move of CR2. 1313 if (PPC::CRBITRCRegClass.contains(Reg)) 1314 continue; 1315 1316 if ((Reg == PPC::X2 || Reg == PPC::R2) && MustSaveTOC) 1317 continue; 1318 1319 // For SVR4, don't emit a move for the CR spill slot if we haven't 1320 // spilled CRs. 1321 if (isSVR4ABI && (PPC::CR2 <= Reg && Reg <= PPC::CR4) 1322 && !MustSaveCR) 1323 continue; 1324 1325 // For 64-bit SVR4 when we have spilled CRs, the spill location 1326 // is SP+8, not a frame-relative slot. 1327 if (isSVR4ABI && isPPC64 && (PPC::CR2 <= Reg && Reg <= PPC::CR4)) { 1328 // In the ELFv1 ABI, only CR2 is noted in CFI and stands in for 1329 // the whole CR word. In the ELFv2 ABI, every CR that was 1330 // actually saved gets its own CFI record. 1331 unsigned CRReg = isELFv2ABI? Reg : (unsigned) PPC::CR2; 1332 unsigned CFIIndex = MF.addFrameInst(MCCFIInstruction::createOffset( 1333 nullptr, MRI->getDwarfRegNum(CRReg, true), getCRSaveOffset())); 1334 BuildMI(MBB, MBBI, dl, TII.get(TargetOpcode::CFI_INSTRUCTION)) 1335 .addCFIIndex(CFIIndex); 1336 continue; 1337 } 1338 1339 if (CSI[I].isSpilledToReg()) { 1340 unsigned SpilledReg = CSI[I].getDstReg(); 1341 unsigned CFIRegister = MF.addFrameInst(MCCFIInstruction::createRegister( 1342 nullptr, MRI->getDwarfRegNum(Reg, true), 1343 MRI->getDwarfRegNum(SpilledReg, true))); 1344 BuildMI(MBB, MBBI, dl, TII.get(TargetOpcode::CFI_INSTRUCTION)) 1345 .addCFIIndex(CFIRegister); 1346 } else { 1347 int Offset = MFI.getObjectOffset(CSI[I].getFrameIdx()); 1348 // We have changed the object offset above but we do not want to change 1349 // the actual offsets in the CFI instruction so we have to undo the 1350 // offset change here. 1351 if (MovingStackUpdateDown) 1352 Offset -= NegFrameSize; 1353 1354 unsigned CFIIndex = MF.addFrameInst(MCCFIInstruction::createOffset( 1355 nullptr, MRI->getDwarfRegNum(Reg, true), Offset)); 1356 BuildMI(MBB, MBBI, dl, TII.get(TargetOpcode::CFI_INSTRUCTION)) 1357 .addCFIIndex(CFIIndex); 1358 } 1359 } 1360 } 1361 } 1362 1363 void PPCFrameLowering::emitEpilogue(MachineFunction &MF, 1364 MachineBasicBlock &MBB) const { 1365 MachineBasicBlock::iterator MBBI = MBB.getFirstTerminator(); 1366 DebugLoc dl; 1367 1368 if (MBBI != MBB.end()) 1369 dl = MBBI->getDebugLoc(); 1370 1371 const PPCInstrInfo &TII = *Subtarget.getInstrInfo(); 1372 const PPCRegisterInfo *RegInfo = Subtarget.getRegisterInfo(); 1373 1374 // Get alignment info so we know how to restore the SP. 1375 const MachineFrameInfo &MFI = MF.getFrameInfo(); 1376 1377 // Get the number of bytes allocated from the FrameInfo. 1378 int FrameSize = MFI.getStackSize(); 1379 1380 // Get processor type. 1381 bool isPPC64 = Subtarget.isPPC64(); 1382 // Get the ABI. 1383 bool isSVR4ABI = Subtarget.isSVR4ABI(); 1384 1385 // Check if the link register (LR) has been saved. 1386 PPCFunctionInfo *FI = MF.getInfo<PPCFunctionInfo>(); 1387 bool MustSaveLR = FI->mustSaveLR(); 1388 const SmallVectorImpl<unsigned> &MustSaveCRs = FI->getMustSaveCRs(); 1389 bool MustSaveCR = !MustSaveCRs.empty(); 1390 // Do we have a frame pointer and/or base pointer for this function? 1391 bool HasFP = hasFP(MF); 1392 bool HasBP = RegInfo->hasBasePointer(MF); 1393 bool HasRedZone = Subtarget.isPPC64() || !Subtarget.isSVR4ABI(); 1394 1395 unsigned SPReg = isPPC64 ? PPC::X1 : PPC::R1; 1396 unsigned BPReg = RegInfo->getBaseRegister(MF); 1397 unsigned FPReg = isPPC64 ? PPC::X31 : PPC::R31; 1398 unsigned ScratchReg = 0; 1399 unsigned TempReg = isPPC64 ? PPC::X12 : PPC::R12; // another scratch reg 1400 const MCInstrDesc& MTLRInst = TII.get( isPPC64 ? PPC::MTLR8 1401 : PPC::MTLR ); 1402 const MCInstrDesc& LoadInst = TII.get( isPPC64 ? PPC::LD 1403 : PPC::LWZ ); 1404 const MCInstrDesc& LoadImmShiftedInst = TII.get( isPPC64 ? PPC::LIS8 1405 : PPC::LIS ); 1406 const MCInstrDesc& OrInst = TII.get(isPPC64 ? PPC::OR8 1407 : PPC::OR ); 1408 const MCInstrDesc& OrImmInst = TII.get( isPPC64 ? PPC::ORI8 1409 : PPC::ORI ); 1410 const MCInstrDesc& AddImmInst = TII.get( isPPC64 ? PPC::ADDI8 1411 : PPC::ADDI ); 1412 const MCInstrDesc& AddInst = TII.get( isPPC64 ? PPC::ADD8 1413 : PPC::ADD4 ); 1414 1415 int LROffset = getReturnSaveOffset(); 1416 1417 int FPOffset = 0; 1418 1419 // Using the same bool variable as below to suppress compiler warnings. 1420 bool SingleScratchReg = findScratchRegister(&MBB, true, false, &ScratchReg, 1421 &TempReg); 1422 assert(SingleScratchReg && 1423 "Could not find an available scratch register"); 1424 1425 SingleScratchReg = ScratchReg == TempReg; 1426 1427 if (HasFP) { 1428 if (isSVR4ABI) { 1429 int FPIndex = FI->getFramePointerSaveIndex(); 1430 assert(FPIndex && "No Frame Pointer Save Slot!"); 1431 FPOffset = MFI.getObjectOffset(FPIndex); 1432 } else { 1433 FPOffset = getFramePointerSaveOffset(); 1434 } 1435 } 1436 1437 int BPOffset = 0; 1438 if (HasBP) { 1439 if (isSVR4ABI) { 1440 int BPIndex = FI->getBasePointerSaveIndex(); 1441 assert(BPIndex && "No Base Pointer Save Slot!"); 1442 BPOffset = MFI.getObjectOffset(BPIndex); 1443 } else { 1444 BPOffset = getBasePointerSaveOffset(); 1445 } 1446 } 1447 1448 int PBPOffset = 0; 1449 if (FI->usesPICBase()) { 1450 int PBPIndex = FI->getPICBasePointerSaveIndex(); 1451 assert(PBPIndex && "No PIC Base Pointer Save Slot!"); 1452 PBPOffset = MFI.getObjectOffset(PBPIndex); 1453 } 1454 1455 bool IsReturnBlock = (MBBI != MBB.end() && MBBI->isReturn()); 1456 1457 if (IsReturnBlock) { 1458 unsigned RetOpcode = MBBI->getOpcode(); 1459 bool UsesTCRet = RetOpcode == PPC::TCRETURNri || 1460 RetOpcode == PPC::TCRETURNdi || 1461 RetOpcode == PPC::TCRETURNai || 1462 RetOpcode == PPC::TCRETURNri8 || 1463 RetOpcode == PPC::TCRETURNdi8 || 1464 RetOpcode == PPC::TCRETURNai8; 1465 1466 if (UsesTCRet) { 1467 int MaxTCRetDelta = FI->getTailCallSPDelta(); 1468 MachineOperand &StackAdjust = MBBI->getOperand(1); 1469 assert(StackAdjust.isImm() && "Expecting immediate value."); 1470 // Adjust stack pointer. 1471 int StackAdj = StackAdjust.getImm(); 1472 int Delta = StackAdj - MaxTCRetDelta; 1473 assert((Delta >= 0) && "Delta must be positive"); 1474 if (MaxTCRetDelta>0) 1475 FrameSize += (StackAdj +Delta); 1476 else 1477 FrameSize += StackAdj; 1478 } 1479 } 1480 1481 // Frames of 32KB & larger require special handling because they cannot be 1482 // indexed into with a simple LD/LWZ immediate offset operand. 1483 bool isLargeFrame = !isInt<16>(FrameSize); 1484 1485 // On targets without red zone, the SP needs to be restored last, so that 1486 // all live contents of the stack frame are upwards of the SP. This means 1487 // that we cannot restore SP just now, since there may be more registers 1488 // to restore from the stack frame (e.g. R31). If the frame size is not 1489 // a simple immediate value, we will need a spare register to hold the 1490 // restored SP. If the frame size is known and small, we can simply adjust 1491 // the offsets of the registers to be restored, and still use SP to restore 1492 // them. In such case, the final update of SP will be to add the frame 1493 // size to it. 1494 // To simplify the code, set RBReg to the base register used to restore 1495 // values from the stack, and set SPAdd to the value that needs to be added 1496 // to the SP at the end. The default values are as if red zone was present. 1497 unsigned RBReg = SPReg; 1498 unsigned SPAdd = 0; 1499 1500 // Check if we can move the stack update instruction up the epilogue 1501 // past the callee saves. This will allow the move to LR instruction 1502 // to be executed before the restores of the callee saves which means 1503 // that the callee saves can hide the latency from the MTLR instrcution. 1504 MachineBasicBlock::iterator StackUpdateLoc = MBBI; 1505 if (stackUpdateCanBeMoved(MF)) { 1506 const std::vector<CalleeSavedInfo> & Info = MFI.getCalleeSavedInfo(); 1507 for (CalleeSavedInfo CSI : Info) { 1508 int FrIdx = CSI.getFrameIdx(); 1509 // If the frame index is not negative the callee saved info belongs to a 1510 // stack object that is not a fixed stack object. We ignore non-fixed 1511 // stack objects because we won't move the update of the stack pointer 1512 // past them. 1513 if (FrIdx >= 0) 1514 continue; 1515 1516 if (MFI.isFixedObjectIndex(FrIdx) && MFI.getObjectOffset(FrIdx) < 0) 1517 StackUpdateLoc--; 1518 else { 1519 // Abort the operation as we can't update all CSR restores. 1520 StackUpdateLoc = MBBI; 1521 break; 1522 } 1523 } 1524 } 1525 1526 if (FrameSize) { 1527 // In the prologue, the loaded (or persistent) stack pointer value is 1528 // offset by the STDU/STDUX/STWU/STWUX instruction. For targets with red 1529 // zone add this offset back now. 1530 1531 // If this function contained a fastcc call and GuaranteedTailCallOpt is 1532 // enabled (=> hasFastCall()==true) the fastcc call might contain a tail 1533 // call which invalidates the stack pointer value in SP(0). So we use the 1534 // value of R31 in this case. 1535 if (FI->hasFastCall()) { 1536 assert(HasFP && "Expecting a valid frame pointer."); 1537 if (!HasRedZone) 1538 RBReg = FPReg; 1539 if (!isLargeFrame) { 1540 BuildMI(MBB, MBBI, dl, AddImmInst, RBReg) 1541 .addReg(FPReg).addImm(FrameSize); 1542 } else { 1543 BuildMI(MBB, MBBI, dl, LoadImmShiftedInst, ScratchReg) 1544 .addImm(FrameSize >> 16); 1545 BuildMI(MBB, MBBI, dl, OrImmInst, ScratchReg) 1546 .addReg(ScratchReg, RegState::Kill) 1547 .addImm(FrameSize & 0xFFFF); 1548 BuildMI(MBB, MBBI, dl, AddInst) 1549 .addReg(RBReg) 1550 .addReg(FPReg) 1551 .addReg(ScratchReg); 1552 } 1553 } else if (!isLargeFrame && !HasBP && !MFI.hasVarSizedObjects()) { 1554 if (HasRedZone) { 1555 BuildMI(MBB, StackUpdateLoc, dl, AddImmInst, SPReg) 1556 .addReg(SPReg) 1557 .addImm(FrameSize); 1558 } else { 1559 // Make sure that adding FrameSize will not overflow the max offset 1560 // size. 1561 assert(FPOffset <= 0 && BPOffset <= 0 && PBPOffset <= 0 && 1562 "Local offsets should be negative"); 1563 SPAdd = FrameSize; 1564 FPOffset += FrameSize; 1565 BPOffset += FrameSize; 1566 PBPOffset += FrameSize; 1567 } 1568 } else { 1569 // We don't want to use ScratchReg as a base register, because it 1570 // could happen to be R0. Use FP instead, but make sure to preserve it. 1571 if (!HasRedZone) { 1572 // If FP is not saved, copy it to ScratchReg. 1573 if (!HasFP) 1574 BuildMI(MBB, MBBI, dl, OrInst, ScratchReg) 1575 .addReg(FPReg) 1576 .addReg(FPReg); 1577 RBReg = FPReg; 1578 } 1579 BuildMI(MBB, StackUpdateLoc, dl, LoadInst, RBReg) 1580 .addImm(0) 1581 .addReg(SPReg); 1582 } 1583 } 1584 assert(RBReg != ScratchReg && "Should have avoided ScratchReg"); 1585 // If there is no red zone, ScratchReg may be needed for holding a useful 1586 // value (although not the base register). Make sure it is not overwritten 1587 // too early. 1588 1589 assert((isPPC64 || !MustSaveCR) && 1590 "Epilogue CR restoring supported only in 64-bit mode"); 1591 1592 // If we need to restore both the LR and the CR and we only have one 1593 // available scratch register, we must do them one at a time. 1594 if (MustSaveCR && SingleScratchReg && MustSaveLR) { 1595 // Here TempReg == ScratchReg, and in the absence of red zone ScratchReg 1596 // is live here. 1597 assert(HasRedZone && "Expecting red zone"); 1598 BuildMI(MBB, MBBI, dl, TII.get(PPC::LWZ8), TempReg) 1599 .addImm(getCRSaveOffset()) 1600 .addReg(SPReg); 1601 for (unsigned i = 0, e = MustSaveCRs.size(); i != e; ++i) 1602 BuildMI(MBB, MBBI, dl, TII.get(PPC::MTOCRF8), MustSaveCRs[i]) 1603 .addReg(TempReg, getKillRegState(i == e-1)); 1604 } 1605 1606 // Delay restoring of the LR if ScratchReg is needed. This is ok, since 1607 // LR is stored in the caller's stack frame. ScratchReg will be needed 1608 // if RBReg is anything other than SP. We shouldn't use ScratchReg as 1609 // a base register anyway, because it may happen to be R0. 1610 bool LoadedLR = false; 1611 if (MustSaveLR && RBReg == SPReg && isInt<16>(LROffset+SPAdd)) { 1612 BuildMI(MBB, StackUpdateLoc, dl, LoadInst, ScratchReg) 1613 .addImm(LROffset+SPAdd) 1614 .addReg(RBReg); 1615 LoadedLR = true; 1616 } 1617 1618 if (MustSaveCR && !(SingleScratchReg && MustSaveLR)) { 1619 // This will only occur for PPC64. 1620 assert(isPPC64 && "Expecting 64-bit mode"); 1621 assert(RBReg == SPReg && "Should be using SP as a base register"); 1622 BuildMI(MBB, MBBI, dl, TII.get(PPC::LWZ8), TempReg) 1623 .addImm(getCRSaveOffset()) 1624 .addReg(RBReg); 1625 } 1626 1627 if (HasFP) { 1628 // If there is red zone, restore FP directly, since SP has already been 1629 // restored. Otherwise, restore the value of FP into ScratchReg. 1630 if (HasRedZone || RBReg == SPReg) 1631 BuildMI(MBB, MBBI, dl, LoadInst, FPReg) 1632 .addImm(FPOffset) 1633 .addReg(SPReg); 1634 else 1635 BuildMI(MBB, MBBI, dl, LoadInst, ScratchReg) 1636 .addImm(FPOffset) 1637 .addReg(RBReg); 1638 } 1639 1640 if (FI->usesPICBase()) 1641 BuildMI(MBB, MBBI, dl, LoadInst, PPC::R30) 1642 .addImm(PBPOffset) 1643 .addReg(RBReg); 1644 1645 if (HasBP) 1646 BuildMI(MBB, MBBI, dl, LoadInst, BPReg) 1647 .addImm(BPOffset) 1648 .addReg(RBReg); 1649 1650 // There is nothing more to be loaded from the stack, so now we can 1651 // restore SP: SP = RBReg + SPAdd. 1652 if (RBReg != SPReg || SPAdd != 0) { 1653 assert(!HasRedZone && "This should not happen with red zone"); 1654 // If SPAdd is 0, generate a copy. 1655 if (SPAdd == 0) 1656 BuildMI(MBB, MBBI, dl, OrInst, SPReg) 1657 .addReg(RBReg) 1658 .addReg(RBReg); 1659 else 1660 BuildMI(MBB, MBBI, dl, AddImmInst, SPReg) 1661 .addReg(RBReg) 1662 .addImm(SPAdd); 1663 1664 assert(RBReg != ScratchReg && "Should be using FP or SP as base register"); 1665 if (RBReg == FPReg) 1666 BuildMI(MBB, MBBI, dl, OrInst, FPReg) 1667 .addReg(ScratchReg) 1668 .addReg(ScratchReg); 1669 1670 // Now load the LR from the caller's stack frame. 1671 if (MustSaveLR && !LoadedLR) 1672 BuildMI(MBB, MBBI, dl, LoadInst, ScratchReg) 1673 .addImm(LROffset) 1674 .addReg(SPReg); 1675 } 1676 1677 if (MustSaveCR && 1678 !(SingleScratchReg && MustSaveLR)) // will only occur for PPC64 1679 for (unsigned i = 0, e = MustSaveCRs.size(); i != e; ++i) 1680 BuildMI(MBB, MBBI, dl, TII.get(PPC::MTOCRF8), MustSaveCRs[i]) 1681 .addReg(TempReg, getKillRegState(i == e-1)); 1682 1683 if (MustSaveLR) 1684 BuildMI(MBB, StackUpdateLoc, dl, MTLRInst).addReg(ScratchReg); 1685 1686 // Callee pop calling convention. Pop parameter/linkage area. Used for tail 1687 // call optimization 1688 if (IsReturnBlock) { 1689 unsigned RetOpcode = MBBI->getOpcode(); 1690 if (MF.getTarget().Options.GuaranteedTailCallOpt && 1691 (RetOpcode == PPC::BLR || RetOpcode == PPC::BLR8) && 1692 MF.getFunction().getCallingConv() == CallingConv::Fast) { 1693 PPCFunctionInfo *FI = MF.getInfo<PPCFunctionInfo>(); 1694 unsigned CallerAllocatedAmt = FI->getMinReservedArea(); 1695 1696 if (CallerAllocatedAmt && isInt<16>(CallerAllocatedAmt)) { 1697 BuildMI(MBB, MBBI, dl, AddImmInst, SPReg) 1698 .addReg(SPReg).addImm(CallerAllocatedAmt); 1699 } else { 1700 BuildMI(MBB, MBBI, dl, LoadImmShiftedInst, ScratchReg) 1701 .addImm(CallerAllocatedAmt >> 16); 1702 BuildMI(MBB, MBBI, dl, OrImmInst, ScratchReg) 1703 .addReg(ScratchReg, RegState::Kill) 1704 .addImm(CallerAllocatedAmt & 0xFFFF); 1705 BuildMI(MBB, MBBI, dl, AddInst) 1706 .addReg(SPReg) 1707 .addReg(FPReg) 1708 .addReg(ScratchReg); 1709 } 1710 } else { 1711 createTailCallBranchInstr(MBB); 1712 } 1713 } 1714 } 1715 1716 void PPCFrameLowering::createTailCallBranchInstr(MachineBasicBlock &MBB) const { 1717 MachineBasicBlock::iterator MBBI = MBB.getFirstTerminator(); 1718 1719 // If we got this far a first terminator should exist. 1720 assert(MBBI != MBB.end() && "Failed to find the first terminator."); 1721 1722 DebugLoc dl = MBBI->getDebugLoc(); 1723 const PPCInstrInfo &TII = *Subtarget.getInstrInfo(); 1724 1725 // Create branch instruction for pseudo tail call return instruction 1726 unsigned RetOpcode = MBBI->getOpcode(); 1727 if (RetOpcode == PPC::TCRETURNdi) { 1728 MBBI = MBB.getLastNonDebugInstr(); 1729 MachineOperand &JumpTarget = MBBI->getOperand(0); 1730 BuildMI(MBB, MBBI, dl, TII.get(PPC::TAILB)). 1731 addGlobalAddress(JumpTarget.getGlobal(), JumpTarget.getOffset()); 1732 } else if (RetOpcode == PPC::TCRETURNri) { 1733 MBBI = MBB.getLastNonDebugInstr(); 1734 assert(MBBI->getOperand(0).isReg() && "Expecting register operand."); 1735 BuildMI(MBB, MBBI, dl, TII.get(PPC::TAILBCTR)); 1736 } else if (RetOpcode == PPC::TCRETURNai) { 1737 MBBI = MBB.getLastNonDebugInstr(); 1738 MachineOperand &JumpTarget = MBBI->getOperand(0); 1739 BuildMI(MBB, MBBI, dl, TII.get(PPC::TAILBA)).addImm(JumpTarget.getImm()); 1740 } else if (RetOpcode == PPC::TCRETURNdi8) { 1741 MBBI = MBB.getLastNonDebugInstr(); 1742 MachineOperand &JumpTarget = MBBI->getOperand(0); 1743 BuildMI(MBB, MBBI, dl, TII.get(PPC::TAILB8)). 1744 addGlobalAddress(JumpTarget.getGlobal(), JumpTarget.getOffset()); 1745 } else if (RetOpcode == PPC::TCRETURNri8) { 1746 MBBI = MBB.getLastNonDebugInstr(); 1747 assert(MBBI->getOperand(0).isReg() && "Expecting register operand."); 1748 BuildMI(MBB, MBBI, dl, TII.get(PPC::TAILBCTR8)); 1749 } else if (RetOpcode == PPC::TCRETURNai8) { 1750 MBBI = MBB.getLastNonDebugInstr(); 1751 MachineOperand &JumpTarget = MBBI->getOperand(0); 1752 BuildMI(MBB, MBBI, dl, TII.get(PPC::TAILBA8)).addImm(JumpTarget.getImm()); 1753 } 1754 } 1755 1756 void PPCFrameLowering::determineCalleeSaves(MachineFunction &MF, 1757 BitVector &SavedRegs, 1758 RegScavenger *RS) const { 1759 TargetFrameLowering::determineCalleeSaves(MF, SavedRegs, RS); 1760 1761 const PPCRegisterInfo *RegInfo = Subtarget.getRegisterInfo(); 1762 1763 // Save and clear the LR state. 1764 PPCFunctionInfo *FI = MF.getInfo<PPCFunctionInfo>(); 1765 unsigned LR = RegInfo->getRARegister(); 1766 FI->setMustSaveLR(MustSaveLR(MF, LR)); 1767 SavedRegs.reset(LR); 1768 1769 // Save R31 if necessary 1770 int FPSI = FI->getFramePointerSaveIndex(); 1771 bool isPPC64 = Subtarget.isPPC64(); 1772 bool isDarwinABI = Subtarget.isDarwinABI(); 1773 MachineFrameInfo &MFI = MF.getFrameInfo(); 1774 1775 // If the frame pointer save index hasn't been defined yet. 1776 if (!FPSI && needsFP(MF)) { 1777 // Find out what the fix offset of the frame pointer save area. 1778 int FPOffset = getFramePointerSaveOffset(); 1779 // Allocate the frame index for frame pointer save area. 1780 FPSI = MFI.CreateFixedObject(isPPC64? 8 : 4, FPOffset, true); 1781 // Save the result. 1782 FI->setFramePointerSaveIndex(FPSI); 1783 } 1784 1785 int BPSI = FI->getBasePointerSaveIndex(); 1786 if (!BPSI && RegInfo->hasBasePointer(MF)) { 1787 int BPOffset = getBasePointerSaveOffset(); 1788 // Allocate the frame index for the base pointer save area. 1789 BPSI = MFI.CreateFixedObject(isPPC64? 8 : 4, BPOffset, true); 1790 // Save the result. 1791 FI->setBasePointerSaveIndex(BPSI); 1792 } 1793 1794 // Reserve stack space for the PIC Base register (R30). 1795 // Only used in SVR4 32-bit. 1796 if (FI->usesPICBase()) { 1797 int PBPSI = MFI.CreateFixedObject(4, -8, true); 1798 FI->setPICBasePointerSaveIndex(PBPSI); 1799 } 1800 1801 // Make sure we don't explicitly spill r31, because, for example, we have 1802 // some inline asm which explicitly clobbers it, when we otherwise have a 1803 // frame pointer and are using r31's spill slot for the prologue/epilogue 1804 // code. Same goes for the base pointer and the PIC base register. 1805 if (needsFP(MF)) 1806 SavedRegs.reset(isPPC64 ? PPC::X31 : PPC::R31); 1807 if (RegInfo->hasBasePointer(MF)) 1808 SavedRegs.reset(RegInfo->getBaseRegister(MF)); 1809 if (FI->usesPICBase()) 1810 SavedRegs.reset(PPC::R30); 1811 1812 // Reserve stack space to move the linkage area to in case of a tail call. 1813 int TCSPDelta = 0; 1814 if (MF.getTarget().Options.GuaranteedTailCallOpt && 1815 (TCSPDelta = FI->getTailCallSPDelta()) < 0) { 1816 MFI.CreateFixedObject(-1 * TCSPDelta, TCSPDelta, true); 1817 } 1818 1819 // For 32-bit SVR4, allocate the nonvolatile CR spill slot iff the 1820 // function uses CR 2, 3, or 4. 1821 if (!isPPC64 && !isDarwinABI && 1822 (SavedRegs.test(PPC::CR2) || 1823 SavedRegs.test(PPC::CR3) || 1824 SavedRegs.test(PPC::CR4))) { 1825 int FrameIdx = MFI.CreateFixedObject((uint64_t)4, (int64_t)-4, true); 1826 FI->setCRSpillFrameIndex(FrameIdx); 1827 } 1828 } 1829 1830 void PPCFrameLowering::processFunctionBeforeFrameFinalized(MachineFunction &MF, 1831 RegScavenger *RS) const { 1832 // Early exit if not using the SVR4 ABI. 1833 if (!Subtarget.isSVR4ABI()) { 1834 addScavengingSpillSlot(MF, RS); 1835 return; 1836 } 1837 1838 // Get callee saved register information. 1839 MachineFrameInfo &MFI = MF.getFrameInfo(); 1840 const std::vector<CalleeSavedInfo> &CSI = MFI.getCalleeSavedInfo(); 1841 1842 // If the function is shrink-wrapped, and if the function has a tail call, the 1843 // tail call might not be in the new RestoreBlock, so real branch instruction 1844 // won't be generated by emitEpilogue(), because shrink-wrap has chosen new 1845 // RestoreBlock. So we handle this case here. 1846 if (MFI.getSavePoint() && MFI.hasTailCall()) { 1847 MachineBasicBlock *RestoreBlock = MFI.getRestorePoint(); 1848 for (MachineBasicBlock &MBB : MF) { 1849 if (MBB.isReturnBlock() && (&MBB) != RestoreBlock) 1850 createTailCallBranchInstr(MBB); 1851 } 1852 } 1853 1854 // Early exit if no callee saved registers are modified! 1855 if (CSI.empty() && !needsFP(MF)) { 1856 addScavengingSpillSlot(MF, RS); 1857 return; 1858 } 1859 1860 unsigned MinGPR = PPC::R31; 1861 unsigned MinG8R = PPC::X31; 1862 unsigned MinFPR = PPC::F31; 1863 unsigned MinVR = Subtarget.hasSPE() ? PPC::S31 : PPC::V31; 1864 1865 bool HasGPSaveArea = false; 1866 bool HasG8SaveArea = false; 1867 bool HasFPSaveArea = false; 1868 bool HasVRSAVESaveArea = false; 1869 bool HasVRSaveArea = false; 1870 1871 SmallVector<CalleeSavedInfo, 18> GPRegs; 1872 SmallVector<CalleeSavedInfo, 18> G8Regs; 1873 SmallVector<CalleeSavedInfo, 18> FPRegs; 1874 SmallVector<CalleeSavedInfo, 18> VRegs; 1875 1876 for (unsigned i = 0, e = CSI.size(); i != e; ++i) { 1877 unsigned Reg = CSI[i].getReg(); 1878 assert((!MF.getInfo<PPCFunctionInfo>()->mustSaveTOC() || 1879 (Reg != PPC::X2 && Reg != PPC::R2)) && 1880 "Not expecting to try to spill R2 in a function that must save TOC"); 1881 if (PPC::GPRCRegClass.contains(Reg) || 1882 PPC::SPE4RCRegClass.contains(Reg)) { 1883 HasGPSaveArea = true; 1884 1885 GPRegs.push_back(CSI[i]); 1886 1887 if (Reg < MinGPR) { 1888 MinGPR = Reg; 1889 } 1890 } else if (PPC::G8RCRegClass.contains(Reg)) { 1891 HasG8SaveArea = true; 1892 1893 G8Regs.push_back(CSI[i]); 1894 1895 if (Reg < MinG8R) { 1896 MinG8R = Reg; 1897 } 1898 } else if (PPC::F8RCRegClass.contains(Reg)) { 1899 HasFPSaveArea = true; 1900 1901 FPRegs.push_back(CSI[i]); 1902 1903 if (Reg < MinFPR) { 1904 MinFPR = Reg; 1905 } 1906 } else if (PPC::CRBITRCRegClass.contains(Reg) || 1907 PPC::CRRCRegClass.contains(Reg)) { 1908 ; // do nothing, as we already know whether CRs are spilled 1909 } else if (PPC::VRSAVERCRegClass.contains(Reg)) { 1910 HasVRSAVESaveArea = true; 1911 } else if (PPC::VRRCRegClass.contains(Reg) || 1912 PPC::SPERCRegClass.contains(Reg)) { 1913 // Altivec and SPE are mutually exclusive, but have the same stack 1914 // alignment requirements, so overload the save area for both cases. 1915 HasVRSaveArea = true; 1916 1917 VRegs.push_back(CSI[i]); 1918 1919 if (Reg < MinVR) { 1920 MinVR = Reg; 1921 } 1922 } else { 1923 llvm_unreachable("Unknown RegisterClass!"); 1924 } 1925 } 1926 1927 PPCFunctionInfo *PFI = MF.getInfo<PPCFunctionInfo>(); 1928 const TargetRegisterInfo *TRI = Subtarget.getRegisterInfo(); 1929 1930 int64_t LowerBound = 0; 1931 1932 // Take into account stack space reserved for tail calls. 1933 int TCSPDelta = 0; 1934 if (MF.getTarget().Options.GuaranteedTailCallOpt && 1935 (TCSPDelta = PFI->getTailCallSPDelta()) < 0) { 1936 LowerBound = TCSPDelta; 1937 } 1938 1939 // The Floating-point register save area is right below the back chain word 1940 // of the previous stack frame. 1941 if (HasFPSaveArea) { 1942 for (unsigned i = 0, e = FPRegs.size(); i != e; ++i) { 1943 int FI = FPRegs[i].getFrameIdx(); 1944 1945 MFI.setObjectOffset(FI, LowerBound + MFI.getObjectOffset(FI)); 1946 } 1947 1948 LowerBound -= (31 - TRI->getEncodingValue(MinFPR) + 1) * 8; 1949 } 1950 1951 // Check whether the frame pointer register is allocated. If so, make sure it 1952 // is spilled to the correct offset. 1953 if (needsFP(MF)) { 1954 int FI = PFI->getFramePointerSaveIndex(); 1955 assert(FI && "No Frame Pointer Save Slot!"); 1956 MFI.setObjectOffset(FI, LowerBound + MFI.getObjectOffset(FI)); 1957 // FP is R31/X31, so no need to update MinGPR/MinG8R. 1958 HasGPSaveArea = true; 1959 } 1960 1961 if (PFI->usesPICBase()) { 1962 int FI = PFI->getPICBasePointerSaveIndex(); 1963 assert(FI && "No PIC Base Pointer Save Slot!"); 1964 MFI.setObjectOffset(FI, LowerBound + MFI.getObjectOffset(FI)); 1965 1966 MinGPR = std::min<unsigned>(MinGPR, PPC::R30); 1967 HasGPSaveArea = true; 1968 } 1969 1970 const PPCRegisterInfo *RegInfo = Subtarget.getRegisterInfo(); 1971 if (RegInfo->hasBasePointer(MF)) { 1972 int FI = PFI->getBasePointerSaveIndex(); 1973 assert(FI && "No Base Pointer Save Slot!"); 1974 MFI.setObjectOffset(FI, LowerBound + MFI.getObjectOffset(FI)); 1975 1976 unsigned BP = RegInfo->getBaseRegister(MF); 1977 if (PPC::G8RCRegClass.contains(BP)) { 1978 MinG8R = std::min<unsigned>(MinG8R, BP); 1979 HasG8SaveArea = true; 1980 } else if (PPC::GPRCRegClass.contains(BP)) { 1981 MinGPR = std::min<unsigned>(MinGPR, BP); 1982 HasGPSaveArea = true; 1983 } 1984 } 1985 1986 // General register save area starts right below the Floating-point 1987 // register save area. 1988 if (HasGPSaveArea || HasG8SaveArea) { 1989 // Move general register save area spill slots down, taking into account 1990 // the size of the Floating-point register save area. 1991 for (unsigned i = 0, e = GPRegs.size(); i != e; ++i) { 1992 if (!GPRegs[i].isSpilledToReg()) { 1993 int FI = GPRegs[i].getFrameIdx(); 1994 MFI.setObjectOffset(FI, LowerBound + MFI.getObjectOffset(FI)); 1995 } 1996 } 1997 1998 // Move general register save area spill slots down, taking into account 1999 // the size of the Floating-point register save area. 2000 for (unsigned i = 0, e = G8Regs.size(); i != e; ++i) { 2001 if (!G8Regs[i].isSpilledToReg()) { 2002 int FI = G8Regs[i].getFrameIdx(); 2003 MFI.setObjectOffset(FI, LowerBound + MFI.getObjectOffset(FI)); 2004 } 2005 } 2006 2007 unsigned MinReg = 2008 std::min<unsigned>(TRI->getEncodingValue(MinGPR), 2009 TRI->getEncodingValue(MinG8R)); 2010 2011 if (Subtarget.isPPC64()) { 2012 LowerBound -= (31 - MinReg + 1) * 8; 2013 } else { 2014 LowerBound -= (31 - MinReg + 1) * 4; 2015 } 2016 } 2017 2018 // For 32-bit only, the CR save area is below the general register 2019 // save area. For 64-bit SVR4, the CR save area is addressed relative 2020 // to the stack pointer and hence does not need an adjustment here. 2021 // Only CR2 (the first nonvolatile spilled) has an associated frame 2022 // index so that we have a single uniform save area. 2023 if (spillsCR(MF) && !(Subtarget.isPPC64() && Subtarget.isSVR4ABI())) { 2024 // Adjust the frame index of the CR spill slot. 2025 for (unsigned i = 0, e = CSI.size(); i != e; ++i) { 2026 unsigned Reg = CSI[i].getReg(); 2027 2028 if ((Subtarget.isSVR4ABI() && Reg == PPC::CR2) 2029 // Leave Darwin logic as-is. 2030 || (!Subtarget.isSVR4ABI() && 2031 (PPC::CRBITRCRegClass.contains(Reg) || 2032 PPC::CRRCRegClass.contains(Reg)))) { 2033 int FI = CSI[i].getFrameIdx(); 2034 2035 MFI.setObjectOffset(FI, LowerBound + MFI.getObjectOffset(FI)); 2036 } 2037 } 2038 2039 LowerBound -= 4; // The CR save area is always 4 bytes long. 2040 } 2041 2042 if (HasVRSAVESaveArea) { 2043 // FIXME SVR4: Is it actually possible to have multiple elements in CSI 2044 // which have the VRSAVE register class? 2045 // Adjust the frame index of the VRSAVE spill slot. 2046 for (unsigned i = 0, e = CSI.size(); i != e; ++i) { 2047 unsigned Reg = CSI[i].getReg(); 2048 2049 if (PPC::VRSAVERCRegClass.contains(Reg)) { 2050 int FI = CSI[i].getFrameIdx(); 2051 2052 MFI.setObjectOffset(FI, LowerBound + MFI.getObjectOffset(FI)); 2053 } 2054 } 2055 2056 LowerBound -= 4; // The VRSAVE save area is always 4 bytes long. 2057 } 2058 2059 // Both Altivec and SPE have the same alignment and padding requirements 2060 // within the stack frame. 2061 if (HasVRSaveArea) { 2062 // Insert alignment padding, we need 16-byte alignment. Note: for positive 2063 // number the alignment formula is : y = (x + (n-1)) & (~(n-1)). But since 2064 // we are using negative number here (the stack grows downward). We should 2065 // use formula : y = x & (~(n-1)). Where x is the size before aligning, n 2066 // is the alignment size ( n = 16 here) and y is the size after aligning. 2067 assert(LowerBound <= 0 && "Expect LowerBound have a non-positive value!"); 2068 LowerBound &= ~(15); 2069 2070 for (unsigned i = 0, e = VRegs.size(); i != e; ++i) { 2071 int FI = VRegs[i].getFrameIdx(); 2072 2073 MFI.setObjectOffset(FI, LowerBound + MFI.getObjectOffset(FI)); 2074 } 2075 } 2076 2077 addScavengingSpillSlot(MF, RS); 2078 } 2079 2080 void 2081 PPCFrameLowering::addScavengingSpillSlot(MachineFunction &MF, 2082 RegScavenger *RS) const { 2083 // Reserve a slot closest to SP or frame pointer if we have a dynalloc or 2084 // a large stack, which will require scavenging a register to materialize a 2085 // large offset. 2086 2087 // We need to have a scavenger spill slot for spills if the frame size is 2088 // large. In case there is no free register for large-offset addressing, 2089 // this slot is used for the necessary emergency spill. Also, we need the 2090 // slot for dynamic stack allocations. 2091 2092 // The scavenger might be invoked if the frame offset does not fit into 2093 // the 16-bit immediate. We don't know the complete frame size here 2094 // because we've not yet computed callee-saved register spills or the 2095 // needed alignment padding. 2096 unsigned StackSize = determineFrameLayout(MF, true); 2097 MachineFrameInfo &MFI = MF.getFrameInfo(); 2098 if (MFI.hasVarSizedObjects() || spillsCR(MF) || spillsVRSAVE(MF) || 2099 hasNonRISpills(MF) || (hasSpills(MF) && !isInt<16>(StackSize))) { 2100 const TargetRegisterClass &GPRC = PPC::GPRCRegClass; 2101 const TargetRegisterClass &G8RC = PPC::G8RCRegClass; 2102 const TargetRegisterClass &RC = Subtarget.isPPC64() ? G8RC : GPRC; 2103 const TargetRegisterInfo &TRI = *Subtarget.getRegisterInfo(); 2104 unsigned Size = TRI.getSpillSize(RC); 2105 unsigned Align = TRI.getSpillAlignment(RC); 2106 RS->addScavengingFrameIndex(MFI.CreateStackObject(Size, Align, false)); 2107 2108 // Might we have over-aligned allocas? 2109 bool HasAlVars = MFI.hasVarSizedObjects() && 2110 MFI.getMaxAlignment() > getStackAlignment(); 2111 2112 // These kinds of spills might need two registers. 2113 if (spillsCR(MF) || spillsVRSAVE(MF) || HasAlVars) 2114 RS->addScavengingFrameIndex(MFI.CreateStackObject(Size, Align, false)); 2115 2116 } 2117 } 2118 2119 // This function checks if a callee saved gpr can be spilled to a volatile 2120 // vector register. This occurs for leaf functions when the option 2121 // ppc-enable-pe-vector-spills is enabled. If there are any remaining registers 2122 // which were not spilled to vectors, return false so the target independent 2123 // code can handle them by assigning a FrameIdx to a stack slot. 2124 bool PPCFrameLowering::assignCalleeSavedSpillSlots( 2125 MachineFunction &MF, const TargetRegisterInfo *TRI, 2126 std::vector<CalleeSavedInfo> &CSI) const { 2127 2128 if (CSI.empty()) 2129 return true; // Early exit if no callee saved registers are modified! 2130 2131 // Early exit if cannot spill gprs to volatile vector registers. 2132 MachineFrameInfo &MFI = MF.getFrameInfo(); 2133 if (!EnablePEVectorSpills || MFI.hasCalls() || !Subtarget.hasP9Vector()) 2134 return false; 2135 2136 // Build a BitVector of VSRs that can be used for spilling GPRs. 2137 BitVector BVAllocatable = TRI->getAllocatableSet(MF); 2138 BitVector BVCalleeSaved(TRI->getNumRegs()); 2139 const PPCRegisterInfo *RegInfo = Subtarget.getRegisterInfo(); 2140 const MCPhysReg *CSRegs = RegInfo->getCalleeSavedRegs(&MF); 2141 for (unsigned i = 0; CSRegs[i]; ++i) 2142 BVCalleeSaved.set(CSRegs[i]); 2143 2144 for (unsigned Reg : BVAllocatable.set_bits()) { 2145 // Set to 0 if the register is not a volatile VF/F8 register, or if it is 2146 // used in the function. 2147 if (BVCalleeSaved[Reg] || 2148 (!PPC::F8RCRegClass.contains(Reg) && 2149 !PPC::VFRCRegClass.contains(Reg)) || 2150 (MF.getRegInfo().isPhysRegUsed(Reg))) 2151 BVAllocatable.reset(Reg); 2152 } 2153 2154 bool AllSpilledToReg = true; 2155 for (auto &CS : CSI) { 2156 if (BVAllocatable.none()) 2157 return false; 2158 2159 unsigned Reg = CS.getReg(); 2160 if (!PPC::G8RCRegClass.contains(Reg) && !PPC::GPRCRegClass.contains(Reg)) { 2161 AllSpilledToReg = false; 2162 continue; 2163 } 2164 2165 unsigned VolatileVFReg = BVAllocatable.find_first(); 2166 if (VolatileVFReg < BVAllocatable.size()) { 2167 CS.setDstReg(VolatileVFReg); 2168 BVAllocatable.reset(VolatileVFReg); 2169 } else { 2170 AllSpilledToReg = false; 2171 } 2172 } 2173 return AllSpilledToReg; 2174 } 2175 2176 2177 bool 2178 PPCFrameLowering::spillCalleeSavedRegisters(MachineBasicBlock &MBB, 2179 MachineBasicBlock::iterator MI, 2180 const std::vector<CalleeSavedInfo> &CSI, 2181 const TargetRegisterInfo *TRI) const { 2182 2183 // Currently, this function only handles SVR4 32- and 64-bit ABIs. 2184 // Return false otherwise to maintain pre-existing behavior. 2185 if (!Subtarget.isSVR4ABI()) 2186 return false; 2187 2188 MachineFunction *MF = MBB.getParent(); 2189 const PPCInstrInfo &TII = *Subtarget.getInstrInfo(); 2190 PPCFunctionInfo *FI = MF->getInfo<PPCFunctionInfo>(); 2191 bool MustSaveTOC = FI->mustSaveTOC(); 2192 DebugLoc DL; 2193 bool CRSpilled = false; 2194 MachineInstrBuilder CRMIB; 2195 2196 for (unsigned i = 0, e = CSI.size(); i != e; ++i) { 2197 unsigned Reg = CSI[i].getReg(); 2198 // Only Darwin actually uses the VRSAVE register, but it can still appear 2199 // here if, for example, @llvm.eh.unwind.init() is used. If we're not on 2200 // Darwin, ignore it. 2201 if (Reg == PPC::VRSAVE && !Subtarget.isDarwinABI()) 2202 continue; 2203 2204 // CR2 through CR4 are the nonvolatile CR fields. 2205 bool IsCRField = PPC::CR2 <= Reg && Reg <= PPC::CR4; 2206 2207 // Add the callee-saved register as live-in; it's killed at the spill. 2208 // Do not do this for callee-saved registers that are live-in to the 2209 // function because they will already be marked live-in and this will be 2210 // adding it for a second time. It is an error to add the same register 2211 // to the set more than once. 2212 const MachineRegisterInfo &MRI = MF->getRegInfo(); 2213 bool IsLiveIn = MRI.isLiveIn(Reg); 2214 if (!IsLiveIn) 2215 MBB.addLiveIn(Reg); 2216 2217 if (CRSpilled && IsCRField) { 2218 CRMIB.addReg(Reg, RegState::ImplicitKill); 2219 continue; 2220 } 2221 2222 // The actual spill will happen in the prologue. 2223 if ((Reg == PPC::X2 || Reg == PPC::R2) && MustSaveTOC) 2224 continue; 2225 2226 // Insert the spill to the stack frame. 2227 if (IsCRField) { 2228 PPCFunctionInfo *FuncInfo = MF->getInfo<PPCFunctionInfo>(); 2229 if (Subtarget.isPPC64()) { 2230 // The actual spill will happen at the start of the prologue. 2231 FuncInfo->addMustSaveCR(Reg); 2232 } else { 2233 CRSpilled = true; 2234 FuncInfo->setSpillsCR(); 2235 2236 // 32-bit: FP-relative. Note that we made sure CR2-CR4 all have 2237 // the same frame index in PPCRegisterInfo::hasReservedSpillSlot. 2238 CRMIB = BuildMI(*MF, DL, TII.get(PPC::MFCR), PPC::R12) 2239 .addReg(Reg, RegState::ImplicitKill); 2240 2241 MBB.insert(MI, CRMIB); 2242 MBB.insert(MI, addFrameReference(BuildMI(*MF, DL, TII.get(PPC::STW)) 2243 .addReg(PPC::R12, 2244 getKillRegState(true)), 2245 CSI[i].getFrameIdx())); 2246 } 2247 } else { 2248 if (CSI[i].isSpilledToReg()) { 2249 NumPESpillVSR++; 2250 BuildMI(MBB, MI, DL, TII.get(PPC::MTVSRD), CSI[i].getDstReg()) 2251 .addReg(Reg, getKillRegState(true)); 2252 } else { 2253 const TargetRegisterClass *RC = TRI->getMinimalPhysRegClass(Reg); 2254 // Use !IsLiveIn for the kill flag. 2255 // We do not want to kill registers that are live in this function 2256 // before their use because they will become undefined registers. 2257 TII.storeRegToStackSlot(MBB, MI, Reg, !IsLiveIn, 2258 CSI[i].getFrameIdx(), RC, TRI); 2259 } 2260 } 2261 } 2262 return true; 2263 } 2264 2265 static void 2266 restoreCRs(bool isPPC64, bool is31, 2267 bool CR2Spilled, bool CR3Spilled, bool CR4Spilled, 2268 MachineBasicBlock &MBB, MachineBasicBlock::iterator MI, 2269 const std::vector<CalleeSavedInfo> &CSI, unsigned CSIIndex) { 2270 2271 MachineFunction *MF = MBB.getParent(); 2272 const PPCInstrInfo &TII = *MF->getSubtarget<PPCSubtarget>().getInstrInfo(); 2273 DebugLoc DL; 2274 unsigned RestoreOp, MoveReg; 2275 2276 if (isPPC64) 2277 // This is handled during epilogue generation. 2278 return; 2279 else { 2280 // 32-bit: FP-relative 2281 MBB.insert(MI, addFrameReference(BuildMI(*MF, DL, TII.get(PPC::LWZ), 2282 PPC::R12), 2283 CSI[CSIIndex].getFrameIdx())); 2284 RestoreOp = PPC::MTOCRF; 2285 MoveReg = PPC::R12; 2286 } 2287 2288 if (CR2Spilled) 2289 MBB.insert(MI, BuildMI(*MF, DL, TII.get(RestoreOp), PPC::CR2) 2290 .addReg(MoveReg, getKillRegState(!CR3Spilled && !CR4Spilled))); 2291 2292 if (CR3Spilled) 2293 MBB.insert(MI, BuildMI(*MF, DL, TII.get(RestoreOp), PPC::CR3) 2294 .addReg(MoveReg, getKillRegState(!CR4Spilled))); 2295 2296 if (CR4Spilled) 2297 MBB.insert(MI, BuildMI(*MF, DL, TII.get(RestoreOp), PPC::CR4) 2298 .addReg(MoveReg, getKillRegState(true))); 2299 } 2300 2301 MachineBasicBlock::iterator PPCFrameLowering:: 2302 eliminateCallFramePseudoInstr(MachineFunction &MF, MachineBasicBlock &MBB, 2303 MachineBasicBlock::iterator I) const { 2304 const TargetInstrInfo &TII = *Subtarget.getInstrInfo(); 2305 if (MF.getTarget().Options.GuaranteedTailCallOpt && 2306 I->getOpcode() == PPC::ADJCALLSTACKUP) { 2307 // Add (actually subtract) back the amount the callee popped on return. 2308 if (int CalleeAmt = I->getOperand(1).getImm()) { 2309 bool is64Bit = Subtarget.isPPC64(); 2310 CalleeAmt *= -1; 2311 unsigned StackReg = is64Bit ? PPC::X1 : PPC::R1; 2312 unsigned TmpReg = is64Bit ? PPC::X0 : PPC::R0; 2313 unsigned ADDIInstr = is64Bit ? PPC::ADDI8 : PPC::ADDI; 2314 unsigned ADDInstr = is64Bit ? PPC::ADD8 : PPC::ADD4; 2315 unsigned LISInstr = is64Bit ? PPC::LIS8 : PPC::LIS; 2316 unsigned ORIInstr = is64Bit ? PPC::ORI8 : PPC::ORI; 2317 const DebugLoc &dl = I->getDebugLoc(); 2318 2319 if (isInt<16>(CalleeAmt)) { 2320 BuildMI(MBB, I, dl, TII.get(ADDIInstr), StackReg) 2321 .addReg(StackReg, RegState::Kill) 2322 .addImm(CalleeAmt); 2323 } else { 2324 MachineBasicBlock::iterator MBBI = I; 2325 BuildMI(MBB, MBBI, dl, TII.get(LISInstr), TmpReg) 2326 .addImm(CalleeAmt >> 16); 2327 BuildMI(MBB, MBBI, dl, TII.get(ORIInstr), TmpReg) 2328 .addReg(TmpReg, RegState::Kill) 2329 .addImm(CalleeAmt & 0xFFFF); 2330 BuildMI(MBB, MBBI, dl, TII.get(ADDInstr), StackReg) 2331 .addReg(StackReg, RegState::Kill) 2332 .addReg(TmpReg); 2333 } 2334 } 2335 } 2336 // Simply discard ADJCALLSTACKDOWN, ADJCALLSTACKUP instructions. 2337 return MBB.erase(I); 2338 } 2339 2340 bool 2341 PPCFrameLowering::restoreCalleeSavedRegisters(MachineBasicBlock &MBB, 2342 MachineBasicBlock::iterator MI, 2343 std::vector<CalleeSavedInfo> &CSI, 2344 const TargetRegisterInfo *TRI) const { 2345 2346 // Currently, this function only handles SVR4 32- and 64-bit ABIs. 2347 // Return false otherwise to maintain pre-existing behavior. 2348 if (!Subtarget.isSVR4ABI()) 2349 return false; 2350 2351 MachineFunction *MF = MBB.getParent(); 2352 const PPCInstrInfo &TII = *Subtarget.getInstrInfo(); 2353 PPCFunctionInfo *FI = MF->getInfo<PPCFunctionInfo>(); 2354 bool MustSaveTOC = FI->mustSaveTOC(); 2355 bool CR2Spilled = false; 2356 bool CR3Spilled = false; 2357 bool CR4Spilled = false; 2358 unsigned CSIIndex = 0; 2359 2360 // Initialize insertion-point logic; we will be restoring in reverse 2361 // order of spill. 2362 MachineBasicBlock::iterator I = MI, BeforeI = I; 2363 bool AtStart = I == MBB.begin(); 2364 2365 if (!AtStart) 2366 --BeforeI; 2367 2368 for (unsigned i = 0, e = CSI.size(); i != e; ++i) { 2369 unsigned Reg = CSI[i].getReg(); 2370 2371 // Only Darwin actually uses the VRSAVE register, but it can still appear 2372 // here if, for example, @llvm.eh.unwind.init() is used. If we're not on 2373 // Darwin, ignore it. 2374 if (Reg == PPC::VRSAVE && !Subtarget.isDarwinABI()) 2375 continue; 2376 2377 if ((Reg == PPC::X2 || Reg == PPC::R2) && MustSaveTOC) 2378 continue; 2379 2380 if (Reg == PPC::CR2) { 2381 CR2Spilled = true; 2382 // The spill slot is associated only with CR2, which is the 2383 // first nonvolatile spilled. Save it here. 2384 CSIIndex = i; 2385 continue; 2386 } else if (Reg == PPC::CR3) { 2387 CR3Spilled = true; 2388 continue; 2389 } else if (Reg == PPC::CR4) { 2390 CR4Spilled = true; 2391 continue; 2392 } else { 2393 // When we first encounter a non-CR register after seeing at 2394 // least one CR register, restore all spilled CRs together. 2395 if ((CR2Spilled || CR3Spilled || CR4Spilled) 2396 && !(PPC::CR2 <= Reg && Reg <= PPC::CR4)) { 2397 bool is31 = needsFP(*MF); 2398 restoreCRs(Subtarget.isPPC64(), is31, 2399 CR2Spilled, CR3Spilled, CR4Spilled, 2400 MBB, I, CSI, CSIIndex); 2401 CR2Spilled = CR3Spilled = CR4Spilled = false; 2402 } 2403 2404 if (CSI[i].isSpilledToReg()) { 2405 DebugLoc DL; 2406 NumPEReloadVSR++; 2407 BuildMI(MBB, I, DL, TII.get(PPC::MFVSRD), Reg) 2408 .addReg(CSI[i].getDstReg(), getKillRegState(true)); 2409 } else { 2410 // Default behavior for non-CR saves. 2411 const TargetRegisterClass *RC = TRI->getMinimalPhysRegClass(Reg); 2412 TII.loadRegFromStackSlot(MBB, I, Reg, CSI[i].getFrameIdx(), RC, TRI); 2413 assert(I != MBB.begin() && 2414 "loadRegFromStackSlot didn't insert any code!"); 2415 } 2416 } 2417 2418 // Insert in reverse order. 2419 if (AtStart) 2420 I = MBB.begin(); 2421 else { 2422 I = BeforeI; 2423 ++I; 2424 } 2425 } 2426 2427 // If we haven't yet spilled the CRs, do so now. 2428 if (CR2Spilled || CR3Spilled || CR4Spilled) { 2429 bool is31 = needsFP(*MF); 2430 restoreCRs(Subtarget.isPPC64(), is31, CR2Spilled, CR3Spilled, CR4Spilled, 2431 MBB, I, CSI, CSIIndex); 2432 } 2433 2434 return true; 2435 } 2436 2437 bool PPCFrameLowering::enableShrinkWrapping(const MachineFunction &MF) const { 2438 if (MF.getInfo<PPCFunctionInfo>()->shrinkWrapDisabled()) 2439 return false; 2440 return (MF.getSubtarget<PPCSubtarget>().isSVR4ABI() && 2441 MF.getSubtarget<PPCSubtarget>().isPPC64()); 2442 } 2443