1 //===-- PPCFrameLowering.cpp - PPC Frame Information ----------------------===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 // 9 // This file contains the PPC implementation of TargetFrameLowering class. 10 // 11 //===----------------------------------------------------------------------===// 12 13 #include "PPCFrameLowering.h" 14 #include "PPCInstrBuilder.h" 15 #include "PPCInstrInfo.h" 16 #include "PPCMachineFunctionInfo.h" 17 #include "PPCSubtarget.h" 18 #include "PPCTargetMachine.h" 19 #include "llvm/ADT/Statistic.h" 20 #include "llvm/CodeGen/MachineFrameInfo.h" 21 #include "llvm/CodeGen/MachineFunction.h" 22 #include "llvm/CodeGen/MachineInstrBuilder.h" 23 #include "llvm/CodeGen/MachineModuleInfo.h" 24 #include "llvm/CodeGen/MachineRegisterInfo.h" 25 #include "llvm/CodeGen/RegisterScavenging.h" 26 #include "llvm/IR/Function.h" 27 #include "llvm/Target/TargetOptions.h" 28 29 using namespace llvm; 30 31 #define DEBUG_TYPE "framelowering" 32 STATISTIC(NumPESpillVSR, "Number of spills to vector in prologue"); 33 STATISTIC(NumPEReloadVSR, "Number of reloads from vector in epilogue"); 34 35 static cl::opt<bool> 36 EnablePEVectorSpills("ppc-enable-pe-vector-spills", 37 cl::desc("Enable spills in prologue to vector registers."), 38 cl::init(false), cl::Hidden); 39 40 /// VRRegNo - Map from a numbered VR register to its enum value. 41 /// 42 static const MCPhysReg VRRegNo[] = { 43 PPC::V0 , PPC::V1 , PPC::V2 , PPC::V3 , PPC::V4 , PPC::V5 , PPC::V6 , PPC::V7 , 44 PPC::V8 , PPC::V9 , PPC::V10, PPC::V11, PPC::V12, PPC::V13, PPC::V14, PPC::V15, 45 PPC::V16, PPC::V17, PPC::V18, PPC::V19, PPC::V20, PPC::V21, PPC::V22, PPC::V23, 46 PPC::V24, PPC::V25, PPC::V26, PPC::V27, PPC::V28, PPC::V29, PPC::V30, PPC::V31 47 }; 48 49 static unsigned computeReturnSaveOffset(const PPCSubtarget &STI) { 50 if (STI.isDarwinABI()) 51 return STI.isPPC64() ? 16 : 8; 52 // SVR4 ABI: 53 return STI.isPPC64() ? 16 : 4; 54 } 55 56 static unsigned computeTOCSaveOffset(const PPCSubtarget &STI) { 57 return STI.isELFv2ABI() ? 24 : 40; 58 } 59 60 static unsigned computeFramePointerSaveOffset(const PPCSubtarget &STI) { 61 // For the Darwin ABI: 62 // We cannot use the TOC save slot (offset +20) in the PowerPC linkage area 63 // for saving the frame pointer (if needed.) While the published ABI has 64 // not used this slot since at least MacOSX 10.2, there is older code 65 // around that does use it, and that needs to continue to work. 66 if (STI.isDarwinABI()) 67 return STI.isPPC64() ? -8U : -4U; 68 69 // SVR4 ABI: First slot in the general register save area. 70 return STI.isPPC64() ? -8U : -4U; 71 } 72 73 static unsigned computeLinkageSize(const PPCSubtarget &STI) { 74 if (STI.isDarwinABI() || STI.isPPC64()) 75 return (STI.isELFv2ABI() ? 4 : 6) * (STI.isPPC64() ? 8 : 4); 76 77 // SVR4 ABI: 78 return 8; 79 } 80 81 static unsigned computeBasePointerSaveOffset(const PPCSubtarget &STI) { 82 if (STI.isDarwinABI()) 83 return STI.isPPC64() ? -16U : -8U; 84 85 // SVR4 ABI: First slot in the general register save area. 86 return STI.isPPC64() 87 ? -16U 88 : STI.getTargetMachine().isPositionIndependent() ? -12U : -8U; 89 } 90 91 PPCFrameLowering::PPCFrameLowering(const PPCSubtarget &STI) 92 : TargetFrameLowering(TargetFrameLowering::StackGrowsDown, 93 STI.getPlatformStackAlignment(), 0), 94 Subtarget(STI), ReturnSaveOffset(computeReturnSaveOffset(Subtarget)), 95 TOCSaveOffset(computeTOCSaveOffset(Subtarget)), 96 FramePointerSaveOffset(computeFramePointerSaveOffset(Subtarget)), 97 LinkageSize(computeLinkageSize(Subtarget)), 98 BasePointerSaveOffset(computeBasePointerSaveOffset(STI)) {} 99 100 // With the SVR4 ABI, callee-saved registers have fixed offsets on the stack. 101 const PPCFrameLowering::SpillSlot *PPCFrameLowering::getCalleeSavedSpillSlots( 102 unsigned &NumEntries) const { 103 if (Subtarget.isDarwinABI()) { 104 NumEntries = 1; 105 if (Subtarget.isPPC64()) { 106 static const SpillSlot darwin64Offsets = {PPC::X31, -8}; 107 return &darwin64Offsets; 108 } else { 109 static const SpillSlot darwinOffsets = {PPC::R31, -4}; 110 return &darwinOffsets; 111 } 112 } 113 114 // Early exit if not using the SVR4 ABI. 115 if (!Subtarget.isSVR4ABI()) { 116 NumEntries = 0; 117 return nullptr; 118 } 119 120 // Note that the offsets here overlap, but this is fixed up in 121 // processFunctionBeforeFrameFinalized. 122 123 static const SpillSlot Offsets[] = { 124 // Floating-point register save area offsets. 125 {PPC::F31, -8}, 126 {PPC::F30, -16}, 127 {PPC::F29, -24}, 128 {PPC::F28, -32}, 129 {PPC::F27, -40}, 130 {PPC::F26, -48}, 131 {PPC::F25, -56}, 132 {PPC::F24, -64}, 133 {PPC::F23, -72}, 134 {PPC::F22, -80}, 135 {PPC::F21, -88}, 136 {PPC::F20, -96}, 137 {PPC::F19, -104}, 138 {PPC::F18, -112}, 139 {PPC::F17, -120}, 140 {PPC::F16, -128}, 141 {PPC::F15, -136}, 142 {PPC::F14, -144}, 143 144 // General register save area offsets. 145 {PPC::R31, -4}, 146 {PPC::R30, -8}, 147 {PPC::R29, -12}, 148 {PPC::R28, -16}, 149 {PPC::R27, -20}, 150 {PPC::R26, -24}, 151 {PPC::R25, -28}, 152 {PPC::R24, -32}, 153 {PPC::R23, -36}, 154 {PPC::R22, -40}, 155 {PPC::R21, -44}, 156 {PPC::R20, -48}, 157 {PPC::R19, -52}, 158 {PPC::R18, -56}, 159 {PPC::R17, -60}, 160 {PPC::R16, -64}, 161 {PPC::R15, -68}, 162 {PPC::R14, -72}, 163 164 // CR save area offset. We map each of the nonvolatile CR fields 165 // to the slot for CR2, which is the first of the nonvolatile CR 166 // fields to be assigned, so that we only allocate one save slot. 167 // See PPCRegisterInfo::hasReservedSpillSlot() for more information. 168 {PPC::CR2, -4}, 169 170 // VRSAVE save area offset. 171 {PPC::VRSAVE, -4}, 172 173 // Vector register save area 174 {PPC::V31, -16}, 175 {PPC::V30, -32}, 176 {PPC::V29, -48}, 177 {PPC::V28, -64}, 178 {PPC::V27, -80}, 179 {PPC::V26, -96}, 180 {PPC::V25, -112}, 181 {PPC::V24, -128}, 182 {PPC::V23, -144}, 183 {PPC::V22, -160}, 184 {PPC::V21, -176}, 185 {PPC::V20, -192}, 186 187 // SPE register save area (overlaps Vector save area). 188 {PPC::S31, -8}, 189 {PPC::S30, -16}, 190 {PPC::S29, -24}, 191 {PPC::S28, -32}, 192 {PPC::S27, -40}, 193 {PPC::S26, -48}, 194 {PPC::S25, -56}, 195 {PPC::S24, -64}, 196 {PPC::S23, -72}, 197 {PPC::S22, -80}, 198 {PPC::S21, -88}, 199 {PPC::S20, -96}, 200 {PPC::S19, -104}, 201 {PPC::S18, -112}, 202 {PPC::S17, -120}, 203 {PPC::S16, -128}, 204 {PPC::S15, -136}, 205 {PPC::S14, -144}}; 206 207 static const SpillSlot Offsets64[] = { 208 // Floating-point register save area offsets. 209 {PPC::F31, -8}, 210 {PPC::F30, -16}, 211 {PPC::F29, -24}, 212 {PPC::F28, -32}, 213 {PPC::F27, -40}, 214 {PPC::F26, -48}, 215 {PPC::F25, -56}, 216 {PPC::F24, -64}, 217 {PPC::F23, -72}, 218 {PPC::F22, -80}, 219 {PPC::F21, -88}, 220 {PPC::F20, -96}, 221 {PPC::F19, -104}, 222 {PPC::F18, -112}, 223 {PPC::F17, -120}, 224 {PPC::F16, -128}, 225 {PPC::F15, -136}, 226 {PPC::F14, -144}, 227 228 // General register save area offsets. 229 {PPC::X31, -8}, 230 {PPC::X30, -16}, 231 {PPC::X29, -24}, 232 {PPC::X28, -32}, 233 {PPC::X27, -40}, 234 {PPC::X26, -48}, 235 {PPC::X25, -56}, 236 {PPC::X24, -64}, 237 {PPC::X23, -72}, 238 {PPC::X22, -80}, 239 {PPC::X21, -88}, 240 {PPC::X20, -96}, 241 {PPC::X19, -104}, 242 {PPC::X18, -112}, 243 {PPC::X17, -120}, 244 {PPC::X16, -128}, 245 {PPC::X15, -136}, 246 {PPC::X14, -144}, 247 248 // VRSAVE save area offset. 249 {PPC::VRSAVE, -4}, 250 251 // Vector register save area 252 {PPC::V31, -16}, 253 {PPC::V30, -32}, 254 {PPC::V29, -48}, 255 {PPC::V28, -64}, 256 {PPC::V27, -80}, 257 {PPC::V26, -96}, 258 {PPC::V25, -112}, 259 {PPC::V24, -128}, 260 {PPC::V23, -144}, 261 {PPC::V22, -160}, 262 {PPC::V21, -176}, 263 {PPC::V20, -192}}; 264 265 if (Subtarget.isPPC64()) { 266 NumEntries = array_lengthof(Offsets64); 267 268 return Offsets64; 269 } else { 270 NumEntries = array_lengthof(Offsets); 271 272 return Offsets; 273 } 274 } 275 276 /// RemoveVRSaveCode - We have found that this function does not need any code 277 /// to manipulate the VRSAVE register, even though it uses vector registers. 278 /// This can happen when the only registers used are known to be live in or out 279 /// of the function. Remove all of the VRSAVE related code from the function. 280 /// FIXME: The removal of the code results in a compile failure at -O0 when the 281 /// function contains a function call, as the GPR containing original VRSAVE 282 /// contents is spilled and reloaded around the call. Without the prolog code, 283 /// the spill instruction refers to an undefined register. This code needs 284 /// to account for all uses of that GPR. 285 static void RemoveVRSaveCode(MachineInstr &MI) { 286 MachineBasicBlock *Entry = MI.getParent(); 287 MachineFunction *MF = Entry->getParent(); 288 289 // We know that the MTVRSAVE instruction immediately follows MI. Remove it. 290 MachineBasicBlock::iterator MBBI = MI; 291 ++MBBI; 292 assert(MBBI != Entry->end() && MBBI->getOpcode() == PPC::MTVRSAVE); 293 MBBI->eraseFromParent(); 294 295 bool RemovedAllMTVRSAVEs = true; 296 // See if we can find and remove the MTVRSAVE instruction from all of the 297 // epilog blocks. 298 for (MachineFunction::iterator I = MF->begin(), E = MF->end(); I != E; ++I) { 299 // If last instruction is a return instruction, add an epilogue 300 if (I->isReturnBlock()) { 301 bool FoundIt = false; 302 for (MBBI = I->end(); MBBI != I->begin(); ) { 303 --MBBI; 304 if (MBBI->getOpcode() == PPC::MTVRSAVE) { 305 MBBI->eraseFromParent(); // remove it. 306 FoundIt = true; 307 break; 308 } 309 } 310 RemovedAllMTVRSAVEs &= FoundIt; 311 } 312 } 313 314 // If we found and removed all MTVRSAVE instructions, remove the read of 315 // VRSAVE as well. 316 if (RemovedAllMTVRSAVEs) { 317 MBBI = MI; 318 assert(MBBI != Entry->begin() && "UPDATE_VRSAVE is first instr in block?"); 319 --MBBI; 320 assert(MBBI->getOpcode() == PPC::MFVRSAVE && "VRSAVE instrs wandered?"); 321 MBBI->eraseFromParent(); 322 } 323 324 // Finally, nuke the UPDATE_VRSAVE. 325 MI.eraseFromParent(); 326 } 327 328 // HandleVRSaveUpdate - MI is the UPDATE_VRSAVE instruction introduced by the 329 // instruction selector. Based on the vector registers that have been used, 330 // transform this into the appropriate ORI instruction. 331 static void HandleVRSaveUpdate(MachineInstr &MI, const TargetInstrInfo &TII) { 332 MachineFunction *MF = MI.getParent()->getParent(); 333 const TargetRegisterInfo *TRI = MF->getSubtarget().getRegisterInfo(); 334 DebugLoc dl = MI.getDebugLoc(); 335 336 const MachineRegisterInfo &MRI = MF->getRegInfo(); 337 unsigned UsedRegMask = 0; 338 for (unsigned i = 0; i != 32; ++i) 339 if (MRI.isPhysRegModified(VRRegNo[i])) 340 UsedRegMask |= 1 << (31-i); 341 342 // Live in and live out values already must be in the mask, so don't bother 343 // marking them. 344 for (std::pair<unsigned, unsigned> LI : MF->getRegInfo().liveins()) { 345 unsigned RegNo = TRI->getEncodingValue(LI.first); 346 if (VRRegNo[RegNo] == LI.first) // If this really is a vector reg. 347 UsedRegMask &= ~(1 << (31-RegNo)); // Doesn't need to be marked. 348 } 349 350 // Live out registers appear as use operands on return instructions. 351 for (MachineFunction::const_iterator BI = MF->begin(), BE = MF->end(); 352 UsedRegMask != 0 && BI != BE; ++BI) { 353 const MachineBasicBlock &MBB = *BI; 354 if (!MBB.isReturnBlock()) 355 continue; 356 const MachineInstr &Ret = MBB.back(); 357 for (unsigned I = 0, E = Ret.getNumOperands(); I != E; ++I) { 358 const MachineOperand &MO = Ret.getOperand(I); 359 if (!MO.isReg() || !PPC::VRRCRegClass.contains(MO.getReg())) 360 continue; 361 unsigned RegNo = TRI->getEncodingValue(MO.getReg()); 362 UsedRegMask &= ~(1 << (31-RegNo)); 363 } 364 } 365 366 // If no registers are used, turn this into a copy. 367 if (UsedRegMask == 0) { 368 // Remove all VRSAVE code. 369 RemoveVRSaveCode(MI); 370 return; 371 } 372 373 unsigned SrcReg = MI.getOperand(1).getReg(); 374 unsigned DstReg = MI.getOperand(0).getReg(); 375 376 if ((UsedRegMask & 0xFFFF) == UsedRegMask) { 377 if (DstReg != SrcReg) 378 BuildMI(*MI.getParent(), MI, dl, TII.get(PPC::ORI), DstReg) 379 .addReg(SrcReg) 380 .addImm(UsedRegMask); 381 else 382 BuildMI(*MI.getParent(), MI, dl, TII.get(PPC::ORI), DstReg) 383 .addReg(SrcReg, RegState::Kill) 384 .addImm(UsedRegMask); 385 } else if ((UsedRegMask & 0xFFFF0000) == UsedRegMask) { 386 if (DstReg != SrcReg) 387 BuildMI(*MI.getParent(), MI, dl, TII.get(PPC::ORIS), DstReg) 388 .addReg(SrcReg) 389 .addImm(UsedRegMask >> 16); 390 else 391 BuildMI(*MI.getParent(), MI, dl, TII.get(PPC::ORIS), DstReg) 392 .addReg(SrcReg, RegState::Kill) 393 .addImm(UsedRegMask >> 16); 394 } else { 395 if (DstReg != SrcReg) 396 BuildMI(*MI.getParent(), MI, dl, TII.get(PPC::ORIS), DstReg) 397 .addReg(SrcReg) 398 .addImm(UsedRegMask >> 16); 399 else 400 BuildMI(*MI.getParent(), MI, dl, TII.get(PPC::ORIS), DstReg) 401 .addReg(SrcReg, RegState::Kill) 402 .addImm(UsedRegMask >> 16); 403 404 BuildMI(*MI.getParent(), MI, dl, TII.get(PPC::ORI), DstReg) 405 .addReg(DstReg, RegState::Kill) 406 .addImm(UsedRegMask & 0xFFFF); 407 } 408 409 // Remove the old UPDATE_VRSAVE instruction. 410 MI.eraseFromParent(); 411 } 412 413 static bool spillsCR(const MachineFunction &MF) { 414 const PPCFunctionInfo *FuncInfo = MF.getInfo<PPCFunctionInfo>(); 415 return FuncInfo->isCRSpilled(); 416 } 417 418 static bool spillsVRSAVE(const MachineFunction &MF) { 419 const PPCFunctionInfo *FuncInfo = MF.getInfo<PPCFunctionInfo>(); 420 return FuncInfo->isVRSAVESpilled(); 421 } 422 423 static bool hasSpills(const MachineFunction &MF) { 424 const PPCFunctionInfo *FuncInfo = MF.getInfo<PPCFunctionInfo>(); 425 return FuncInfo->hasSpills(); 426 } 427 428 static bool hasNonRISpills(const MachineFunction &MF) { 429 const PPCFunctionInfo *FuncInfo = MF.getInfo<PPCFunctionInfo>(); 430 return FuncInfo->hasNonRISpills(); 431 } 432 433 /// MustSaveLR - Return true if this function requires that we save the LR 434 /// register onto the stack in the prolog and restore it in the epilog of the 435 /// function. 436 static bool MustSaveLR(const MachineFunction &MF, unsigned LR) { 437 const PPCFunctionInfo *MFI = MF.getInfo<PPCFunctionInfo>(); 438 439 // We need a save/restore of LR if there is any def of LR (which is 440 // defined by calls, including the PIC setup sequence), or if there is 441 // some use of the LR stack slot (e.g. for builtin_return_address). 442 // (LR comes in 32 and 64 bit versions.) 443 MachineRegisterInfo::def_iterator RI = MF.getRegInfo().def_begin(LR); 444 return RI !=MF.getRegInfo().def_end() || MFI->isLRStoreRequired(); 445 } 446 447 /// determineFrameLayoutAndUpdate - Determine the size of the frame and maximum 448 /// call frame size. Update the MachineFunction object with the stack size. 449 unsigned 450 PPCFrameLowering::determineFrameLayoutAndUpdate(MachineFunction &MF, 451 bool UseEstimate) const { 452 unsigned NewMaxCallFrameSize = 0; 453 unsigned FrameSize = determineFrameLayout(MF, UseEstimate, 454 &NewMaxCallFrameSize); 455 MF.getFrameInfo().setStackSize(FrameSize); 456 MF.getFrameInfo().setMaxCallFrameSize(NewMaxCallFrameSize); 457 return FrameSize; 458 } 459 460 /// determineFrameLayout - Determine the size of the frame and maximum call 461 /// frame size. 462 unsigned 463 PPCFrameLowering::determineFrameLayout(const MachineFunction &MF, 464 bool UseEstimate, 465 unsigned *NewMaxCallFrameSize) const { 466 const MachineFrameInfo &MFI = MF.getFrameInfo(); 467 468 // Get the number of bytes to allocate from the FrameInfo 469 unsigned FrameSize = 470 UseEstimate ? MFI.estimateStackSize(MF) : MFI.getStackSize(); 471 472 // Get stack alignments. The frame must be aligned to the greatest of these: 473 unsigned TargetAlign = getStackAlignment(); // alignment required per the ABI 474 unsigned MaxAlign = MFI.getMaxAlignment(); // algmt required by data in frame 475 unsigned AlignMask = std::max(MaxAlign, TargetAlign) - 1; 476 477 const PPCRegisterInfo *RegInfo = Subtarget.getRegisterInfo(); 478 479 unsigned LR = RegInfo->getRARegister(); 480 bool DisableRedZone = MF.getFunction().hasFnAttribute(Attribute::NoRedZone); 481 bool CanUseRedZone = !MFI.hasVarSizedObjects() && // No dynamic alloca. 482 !MFI.adjustsStack() && // No calls. 483 !MustSaveLR(MF, LR) && // No need to save LR. 484 !RegInfo->hasBasePointer(MF); // No special alignment. 485 486 // Note: for PPC32 SVR4ABI (Non-DarwinABI), we can still generate stackless 487 // code if all local vars are reg-allocated. 488 bool FitsInRedZone = FrameSize <= Subtarget.getRedZoneSize(); 489 490 // Check whether we can skip adjusting the stack pointer (by using red zone) 491 if (!DisableRedZone && CanUseRedZone && FitsInRedZone) { 492 // No need for frame 493 return 0; 494 } 495 496 // Get the maximum call frame size of all the calls. 497 unsigned maxCallFrameSize = MFI.getMaxCallFrameSize(); 498 499 // Maximum call frame needs to be at least big enough for linkage area. 500 unsigned minCallFrameSize = getLinkageSize(); 501 maxCallFrameSize = std::max(maxCallFrameSize, minCallFrameSize); 502 503 // If we have dynamic alloca then maxCallFrameSize needs to be aligned so 504 // that allocations will be aligned. 505 if (MFI.hasVarSizedObjects()) 506 maxCallFrameSize = (maxCallFrameSize + AlignMask) & ~AlignMask; 507 508 // Update the new max call frame size if the caller passes in a valid pointer. 509 if (NewMaxCallFrameSize) 510 *NewMaxCallFrameSize = maxCallFrameSize; 511 512 // Include call frame size in total. 513 FrameSize += maxCallFrameSize; 514 515 // Make sure the frame is aligned. 516 FrameSize = (FrameSize + AlignMask) & ~AlignMask; 517 518 return FrameSize; 519 } 520 521 // hasFP - Return true if the specified function actually has a dedicated frame 522 // pointer register. 523 bool PPCFrameLowering::hasFP(const MachineFunction &MF) const { 524 const MachineFrameInfo &MFI = MF.getFrameInfo(); 525 // FIXME: This is pretty much broken by design: hasFP() might be called really 526 // early, before the stack layout was calculated and thus hasFP() might return 527 // true or false here depending on the time of call. 528 return (MFI.getStackSize()) && needsFP(MF); 529 } 530 531 // needsFP - Return true if the specified function should have a dedicated frame 532 // pointer register. This is true if the function has variable sized allocas or 533 // if frame pointer elimination is disabled. 534 bool PPCFrameLowering::needsFP(const MachineFunction &MF) const { 535 const MachineFrameInfo &MFI = MF.getFrameInfo(); 536 537 // Naked functions have no stack frame pushed, so we don't have a frame 538 // pointer. 539 if (MF.getFunction().hasFnAttribute(Attribute::Naked)) 540 return false; 541 542 return MF.getTarget().Options.DisableFramePointerElim(MF) || 543 MFI.hasVarSizedObjects() || MFI.hasStackMap() || MFI.hasPatchPoint() || 544 (MF.getTarget().Options.GuaranteedTailCallOpt && 545 MF.getInfo<PPCFunctionInfo>()->hasFastCall()); 546 } 547 548 void PPCFrameLowering::replaceFPWithRealFP(MachineFunction &MF) const { 549 bool is31 = needsFP(MF); 550 unsigned FPReg = is31 ? PPC::R31 : PPC::R1; 551 unsigned FP8Reg = is31 ? PPC::X31 : PPC::X1; 552 553 const PPCRegisterInfo *RegInfo = Subtarget.getRegisterInfo(); 554 bool HasBP = RegInfo->hasBasePointer(MF); 555 unsigned BPReg = HasBP ? (unsigned) RegInfo->getBaseRegister(MF) : FPReg; 556 unsigned BP8Reg = HasBP ? (unsigned) PPC::X30 : FP8Reg; 557 558 for (MachineFunction::iterator BI = MF.begin(), BE = MF.end(); 559 BI != BE; ++BI) 560 for (MachineBasicBlock::iterator MBBI = BI->end(); MBBI != BI->begin(); ) { 561 --MBBI; 562 for (unsigned I = 0, E = MBBI->getNumOperands(); I != E; ++I) { 563 MachineOperand &MO = MBBI->getOperand(I); 564 if (!MO.isReg()) 565 continue; 566 567 switch (MO.getReg()) { 568 case PPC::FP: 569 MO.setReg(FPReg); 570 break; 571 case PPC::FP8: 572 MO.setReg(FP8Reg); 573 break; 574 case PPC::BP: 575 MO.setReg(BPReg); 576 break; 577 case PPC::BP8: 578 MO.setReg(BP8Reg); 579 break; 580 581 } 582 } 583 } 584 } 585 586 /* This function will do the following: 587 - If MBB is an entry or exit block, set SR1 and SR2 to R0 and R12 588 respectively (defaults recommended by the ABI) and return true 589 - If MBB is not an entry block, initialize the register scavenger and look 590 for available registers. 591 - If the defaults (R0/R12) are available, return true 592 - If TwoUniqueRegsRequired is set to true, it looks for two unique 593 registers. Otherwise, look for a single available register. 594 - If the required registers are found, set SR1 and SR2 and return true. 595 - If the required registers are not found, set SR2 or both SR1 and SR2 to 596 PPC::NoRegister and return false. 597 598 Note that if both SR1 and SR2 are valid parameters and TwoUniqueRegsRequired 599 is not set, this function will attempt to find two different registers, but 600 still return true if only one register is available (and set SR1 == SR2). 601 */ 602 bool 603 PPCFrameLowering::findScratchRegister(MachineBasicBlock *MBB, 604 bool UseAtEnd, 605 bool TwoUniqueRegsRequired, 606 unsigned *SR1, 607 unsigned *SR2) const { 608 RegScavenger RS; 609 unsigned R0 = Subtarget.isPPC64() ? PPC::X0 : PPC::R0; 610 unsigned R12 = Subtarget.isPPC64() ? PPC::X12 : PPC::R12; 611 612 // Set the defaults for the two scratch registers. 613 if (SR1) 614 *SR1 = R0; 615 616 if (SR2) { 617 assert (SR1 && "Asking for the second scratch register but not the first?"); 618 *SR2 = R12; 619 } 620 621 // If MBB is an entry or exit block, use R0 and R12 as the scratch registers. 622 if ((UseAtEnd && MBB->isReturnBlock()) || 623 (!UseAtEnd && (&MBB->getParent()->front() == MBB))) 624 return true; 625 626 RS.enterBasicBlock(*MBB); 627 628 if (UseAtEnd && !MBB->empty()) { 629 // The scratch register will be used at the end of the block, so must 630 // consider all registers used within the block 631 632 MachineBasicBlock::iterator MBBI = MBB->getFirstTerminator(); 633 // If no terminator, back iterator up to previous instruction. 634 if (MBBI == MBB->end()) 635 MBBI = std::prev(MBBI); 636 637 if (MBBI != MBB->begin()) 638 RS.forward(MBBI); 639 } 640 641 // If the two registers are available, we're all good. 642 // Note that we only return here if both R0 and R12 are available because 643 // although the function may not require two unique registers, it may benefit 644 // from having two so we should try to provide them. 645 if (!RS.isRegUsed(R0) && !RS.isRegUsed(R12)) 646 return true; 647 648 // Get the list of callee-saved registers for the target. 649 const PPCRegisterInfo *RegInfo = Subtarget.getRegisterInfo(); 650 const MCPhysReg *CSRegs = RegInfo->getCalleeSavedRegs(MBB->getParent()); 651 652 // Get all the available registers in the block. 653 BitVector BV = RS.getRegsAvailable(Subtarget.isPPC64() ? &PPC::G8RCRegClass : 654 &PPC::GPRCRegClass); 655 656 // We shouldn't use callee-saved registers as scratch registers as they may be 657 // available when looking for a candidate block for shrink wrapping but not 658 // available when the actual prologue/epilogue is being emitted because they 659 // were added as live-in to the prologue block by PrologueEpilogueInserter. 660 for (int i = 0; CSRegs[i]; ++i) 661 BV.reset(CSRegs[i]); 662 663 // Set the first scratch register to the first available one. 664 if (SR1) { 665 int FirstScratchReg = BV.find_first(); 666 *SR1 = FirstScratchReg == -1 ? (unsigned)PPC::NoRegister : FirstScratchReg; 667 } 668 669 // If there is another one available, set the second scratch register to that. 670 // Otherwise, set it to either PPC::NoRegister if this function requires two 671 // or to whatever SR1 is set to if this function doesn't require two. 672 if (SR2) { 673 int SecondScratchReg = BV.find_next(*SR1); 674 if (SecondScratchReg != -1) 675 *SR2 = SecondScratchReg; 676 else 677 *SR2 = TwoUniqueRegsRequired ? (unsigned)PPC::NoRegister : *SR1; 678 } 679 680 // Now that we've done our best to provide both registers, double check 681 // whether we were unable to provide enough. 682 if (BV.count() < (TwoUniqueRegsRequired ? 2U : 1U)) 683 return false; 684 685 return true; 686 } 687 688 // We need a scratch register for spilling LR and for spilling CR. By default, 689 // we use two scratch registers to hide latency. However, if only one scratch 690 // register is available, we can adjust for that by not overlapping the spill 691 // code. However, if we need to realign the stack (i.e. have a base pointer) 692 // and the stack frame is large, we need two scratch registers. 693 bool 694 PPCFrameLowering::twoUniqueScratchRegsRequired(MachineBasicBlock *MBB) const { 695 const PPCRegisterInfo *RegInfo = Subtarget.getRegisterInfo(); 696 MachineFunction &MF = *(MBB->getParent()); 697 bool HasBP = RegInfo->hasBasePointer(MF); 698 unsigned FrameSize = determineFrameLayout(MF); 699 int NegFrameSize = -FrameSize; 700 bool IsLargeFrame = !isInt<16>(NegFrameSize); 701 MachineFrameInfo &MFI = MF.getFrameInfo(); 702 unsigned MaxAlign = MFI.getMaxAlignment(); 703 bool HasRedZone = Subtarget.isPPC64() || !Subtarget.isSVR4ABI(); 704 705 return (IsLargeFrame || !HasRedZone) && HasBP && MaxAlign > 1; 706 } 707 708 bool PPCFrameLowering::canUseAsPrologue(const MachineBasicBlock &MBB) const { 709 MachineBasicBlock *TmpMBB = const_cast<MachineBasicBlock *>(&MBB); 710 711 return findScratchRegister(TmpMBB, false, 712 twoUniqueScratchRegsRequired(TmpMBB)); 713 } 714 715 bool PPCFrameLowering::canUseAsEpilogue(const MachineBasicBlock &MBB) const { 716 MachineBasicBlock *TmpMBB = const_cast<MachineBasicBlock *>(&MBB); 717 718 return findScratchRegister(TmpMBB, true); 719 } 720 721 bool PPCFrameLowering::stackUpdateCanBeMoved(MachineFunction &MF) const { 722 const PPCRegisterInfo *RegInfo = Subtarget.getRegisterInfo(); 723 PPCFunctionInfo *FI = MF.getInfo<PPCFunctionInfo>(); 724 725 // Abort if there is no register info or function info. 726 if (!RegInfo || !FI) 727 return false; 728 729 // Only move the stack update on ELFv2 ABI and PPC64. 730 if (!Subtarget.isELFv2ABI() || !Subtarget.isPPC64()) 731 return false; 732 733 // Check the frame size first and return false if it does not fit the 734 // requirements. 735 // We need a non-zero frame size as well as a frame that will fit in the red 736 // zone. This is because by moving the stack pointer update we are now storing 737 // to the red zone until the stack pointer is updated. If we get an interrupt 738 // inside the prologue but before the stack update we now have a number of 739 // stores to the red zone and those stores must all fit. 740 MachineFrameInfo &MFI = MF.getFrameInfo(); 741 unsigned FrameSize = MFI.getStackSize(); 742 if (!FrameSize || FrameSize > Subtarget.getRedZoneSize()) 743 return false; 744 745 // Frame pointers and base pointers complicate matters so don't do anything 746 // if we have them. For example having a frame pointer will sometimes require 747 // a copy of r1 into r31 and that makes keeping track of updates to r1 more 748 // difficult. 749 if (hasFP(MF) || RegInfo->hasBasePointer(MF)) 750 return false; 751 752 // Calls to fast_cc functions use different rules for passing parameters on 753 // the stack from the ABI and using PIC base in the function imposes 754 // similar restrictions to using the base pointer. It is not generally safe 755 // to move the stack pointer update in these situations. 756 if (FI->hasFastCall() || FI->usesPICBase()) 757 return false; 758 759 // Finally we can move the stack update if we do not require regiser 760 // scavenging. Register scavenging can introduce more spills and so 761 // may make the frame size larger than we have computed. 762 return !RegInfo->requiresFrameIndexScavenging(MF); 763 } 764 765 void PPCFrameLowering::emitPrologue(MachineFunction &MF, 766 MachineBasicBlock &MBB) const { 767 MachineBasicBlock::iterator MBBI = MBB.begin(); 768 MachineFrameInfo &MFI = MF.getFrameInfo(); 769 const PPCInstrInfo &TII = *Subtarget.getInstrInfo(); 770 const PPCRegisterInfo *RegInfo = Subtarget.getRegisterInfo(); 771 772 MachineModuleInfo &MMI = MF.getMMI(); 773 const MCRegisterInfo *MRI = MMI.getContext().getRegisterInfo(); 774 DebugLoc dl; 775 bool needsCFI = MMI.hasDebugInfo() || 776 MF.getFunction().needsUnwindTableEntry(); 777 778 // Get processor type. 779 bool isPPC64 = Subtarget.isPPC64(); 780 // Get the ABI. 781 bool isSVR4ABI = Subtarget.isSVR4ABI(); 782 bool isELFv2ABI = Subtarget.isELFv2ABI(); 783 assert((Subtarget.isDarwinABI() || isSVR4ABI) && 784 "Currently only Darwin and SVR4 ABIs are supported for PowerPC."); 785 786 // Scan the prolog, looking for an UPDATE_VRSAVE instruction. If we find it, 787 // process it. 788 if (!isSVR4ABI) 789 for (unsigned i = 0; MBBI != MBB.end(); ++i, ++MBBI) { 790 if (MBBI->getOpcode() == PPC::UPDATE_VRSAVE) { 791 HandleVRSaveUpdate(*MBBI, TII); 792 break; 793 } 794 } 795 796 // Move MBBI back to the beginning of the prologue block. 797 MBBI = MBB.begin(); 798 799 // Work out frame sizes. 800 unsigned FrameSize = determineFrameLayoutAndUpdate(MF); 801 int NegFrameSize = -FrameSize; 802 if (!isInt<32>(NegFrameSize)) 803 llvm_unreachable("Unhandled stack size!"); 804 805 if (MFI.isFrameAddressTaken()) 806 replaceFPWithRealFP(MF); 807 808 // Check if the link register (LR) must be saved. 809 PPCFunctionInfo *FI = MF.getInfo<PPCFunctionInfo>(); 810 bool MustSaveLR = FI->mustSaveLR(); 811 const SmallVectorImpl<unsigned> &MustSaveCRs = FI->getMustSaveCRs(); 812 bool MustSaveCR = !MustSaveCRs.empty(); 813 // Do we have a frame pointer and/or base pointer for this function? 814 bool HasFP = hasFP(MF); 815 bool HasBP = RegInfo->hasBasePointer(MF); 816 bool HasRedZone = isPPC64 || !isSVR4ABI; 817 818 unsigned SPReg = isPPC64 ? PPC::X1 : PPC::R1; 819 unsigned BPReg = RegInfo->getBaseRegister(MF); 820 unsigned FPReg = isPPC64 ? PPC::X31 : PPC::R31; 821 unsigned LRReg = isPPC64 ? PPC::LR8 : PPC::LR; 822 unsigned ScratchReg = 0; 823 unsigned TempReg = isPPC64 ? PPC::X12 : PPC::R12; // another scratch reg 824 // ...(R12/X12 is volatile in both Darwin & SVR4, & can't be a function arg.) 825 const MCInstrDesc& MFLRInst = TII.get(isPPC64 ? PPC::MFLR8 826 : PPC::MFLR ); 827 const MCInstrDesc& StoreInst = TII.get(isPPC64 ? PPC::STD 828 : PPC::STW ); 829 const MCInstrDesc& StoreUpdtInst = TII.get(isPPC64 ? PPC::STDU 830 : PPC::STWU ); 831 const MCInstrDesc& StoreUpdtIdxInst = TII.get(isPPC64 ? PPC::STDUX 832 : PPC::STWUX); 833 const MCInstrDesc& LoadImmShiftedInst = TII.get(isPPC64 ? PPC::LIS8 834 : PPC::LIS ); 835 const MCInstrDesc& OrImmInst = TII.get(isPPC64 ? PPC::ORI8 836 : PPC::ORI ); 837 const MCInstrDesc& OrInst = TII.get(isPPC64 ? PPC::OR8 838 : PPC::OR ); 839 const MCInstrDesc& SubtractCarryingInst = TII.get(isPPC64 ? PPC::SUBFC8 840 : PPC::SUBFC); 841 const MCInstrDesc& SubtractImmCarryingInst = TII.get(isPPC64 ? PPC::SUBFIC8 842 : PPC::SUBFIC); 843 844 // Regarding this assert: Even though LR is saved in the caller's frame (i.e., 845 // LROffset is positive), that slot is callee-owned. Because PPC32 SVR4 has no 846 // Red Zone, an asynchronous event (a form of "callee") could claim a frame & 847 // overwrite it, so PPC32 SVR4 must claim at least a minimal frame to save LR. 848 assert((isPPC64 || !isSVR4ABI || !(!FrameSize && (MustSaveLR || HasFP))) && 849 "FrameSize must be >0 to save/restore the FP or LR for 32-bit SVR4."); 850 851 // Using the same bool variable as below to suppress compiler warnings. 852 bool SingleScratchReg = 853 findScratchRegister(&MBB, false, twoUniqueScratchRegsRequired(&MBB), 854 &ScratchReg, &TempReg); 855 assert(SingleScratchReg && 856 "Required number of registers not available in this block"); 857 858 SingleScratchReg = ScratchReg == TempReg; 859 860 int LROffset = getReturnSaveOffset(); 861 862 int FPOffset = 0; 863 if (HasFP) { 864 if (isSVR4ABI) { 865 MachineFrameInfo &MFI = MF.getFrameInfo(); 866 int FPIndex = FI->getFramePointerSaveIndex(); 867 assert(FPIndex && "No Frame Pointer Save Slot!"); 868 FPOffset = MFI.getObjectOffset(FPIndex); 869 } else { 870 FPOffset = getFramePointerSaveOffset(); 871 } 872 } 873 874 int BPOffset = 0; 875 if (HasBP) { 876 if (isSVR4ABI) { 877 MachineFrameInfo &MFI = MF.getFrameInfo(); 878 int BPIndex = FI->getBasePointerSaveIndex(); 879 assert(BPIndex && "No Base Pointer Save Slot!"); 880 BPOffset = MFI.getObjectOffset(BPIndex); 881 } else { 882 BPOffset = getBasePointerSaveOffset(); 883 } 884 } 885 886 int PBPOffset = 0; 887 if (FI->usesPICBase()) { 888 MachineFrameInfo &MFI = MF.getFrameInfo(); 889 int PBPIndex = FI->getPICBasePointerSaveIndex(); 890 assert(PBPIndex && "No PIC Base Pointer Save Slot!"); 891 PBPOffset = MFI.getObjectOffset(PBPIndex); 892 } 893 894 // Get stack alignments. 895 unsigned MaxAlign = MFI.getMaxAlignment(); 896 if (HasBP && MaxAlign > 1) 897 assert(isPowerOf2_32(MaxAlign) && isInt<16>(MaxAlign) && 898 "Invalid alignment!"); 899 900 // Frames of 32KB & larger require special handling because they cannot be 901 // indexed into with a simple STDU/STWU/STD/STW immediate offset operand. 902 bool isLargeFrame = !isInt<16>(NegFrameSize); 903 904 assert((isPPC64 || !MustSaveCR) && 905 "Prologue CR saving supported only in 64-bit mode"); 906 907 // Check if we can move the stack update instruction (stdu) down the prologue 908 // past the callee saves. Hopefully this will avoid the situation where the 909 // saves are waiting for the update on the store with update to complete. 910 MachineBasicBlock::iterator StackUpdateLoc = MBBI; 911 bool MovingStackUpdateDown = false; 912 913 // Check if we can move the stack update. 914 if (stackUpdateCanBeMoved(MF)) { 915 const std::vector<CalleeSavedInfo> &Info = MFI.getCalleeSavedInfo(); 916 for (CalleeSavedInfo CSI : Info) { 917 int FrIdx = CSI.getFrameIdx(); 918 // If the frame index is not negative the callee saved info belongs to a 919 // stack object that is not a fixed stack object. We ignore non-fixed 920 // stack objects because we won't move the stack update pointer past them. 921 if (FrIdx >= 0) 922 continue; 923 924 if (MFI.isFixedObjectIndex(FrIdx) && MFI.getObjectOffset(FrIdx) < 0) { 925 StackUpdateLoc++; 926 MovingStackUpdateDown = true; 927 } else { 928 // We need all of the Frame Indices to meet these conditions. 929 // If they do not, abort the whole operation. 930 StackUpdateLoc = MBBI; 931 MovingStackUpdateDown = false; 932 break; 933 } 934 } 935 936 // If the operation was not aborted then update the object offset. 937 if (MovingStackUpdateDown) { 938 for (CalleeSavedInfo CSI : Info) { 939 int FrIdx = CSI.getFrameIdx(); 940 if (FrIdx < 0) 941 MFI.setObjectOffset(FrIdx, MFI.getObjectOffset(FrIdx) + NegFrameSize); 942 } 943 } 944 } 945 946 // If we need to spill the CR and the LR but we don't have two separate 947 // registers available, we must spill them one at a time 948 if (MustSaveCR && SingleScratchReg && MustSaveLR) { 949 // In the ELFv2 ABI, we are not required to save all CR fields. 950 // If only one or two CR fields are clobbered, it is more efficient to use 951 // mfocrf to selectively save just those fields, because mfocrf has short 952 // latency compares to mfcr. 953 unsigned MfcrOpcode = PPC::MFCR8; 954 unsigned CrState = RegState::ImplicitKill; 955 if (isELFv2ABI && MustSaveCRs.size() == 1) { 956 MfcrOpcode = PPC::MFOCRF8; 957 CrState = RegState::Kill; 958 } 959 MachineInstrBuilder MIB = 960 BuildMI(MBB, MBBI, dl, TII.get(MfcrOpcode), TempReg); 961 for (unsigned i = 0, e = MustSaveCRs.size(); i != e; ++i) 962 MIB.addReg(MustSaveCRs[i], CrState); 963 BuildMI(MBB, MBBI, dl, TII.get(PPC::STW8)) 964 .addReg(TempReg, getKillRegState(true)) 965 .addImm(8) 966 .addReg(SPReg); 967 } 968 969 if (MustSaveLR) 970 BuildMI(MBB, MBBI, dl, MFLRInst, ScratchReg); 971 972 if (MustSaveCR && 973 !(SingleScratchReg && MustSaveLR)) { // will only occur for PPC64 974 // In the ELFv2 ABI, we are not required to save all CR fields. 975 // If only one or two CR fields are clobbered, it is more efficient to use 976 // mfocrf to selectively save just those fields, because mfocrf has short 977 // latency compares to mfcr. 978 unsigned MfcrOpcode = PPC::MFCR8; 979 unsigned CrState = RegState::ImplicitKill; 980 if (isELFv2ABI && MustSaveCRs.size() == 1) { 981 MfcrOpcode = PPC::MFOCRF8; 982 CrState = RegState::Kill; 983 } 984 MachineInstrBuilder MIB = 985 BuildMI(MBB, MBBI, dl, TII.get(MfcrOpcode), TempReg); 986 for (unsigned i = 0, e = MustSaveCRs.size(); i != e; ++i) 987 MIB.addReg(MustSaveCRs[i], CrState); 988 } 989 990 if (HasRedZone) { 991 if (HasFP) 992 BuildMI(MBB, MBBI, dl, StoreInst) 993 .addReg(FPReg) 994 .addImm(FPOffset) 995 .addReg(SPReg); 996 if (FI->usesPICBase()) 997 BuildMI(MBB, MBBI, dl, StoreInst) 998 .addReg(PPC::R30) 999 .addImm(PBPOffset) 1000 .addReg(SPReg); 1001 if (HasBP) 1002 BuildMI(MBB, MBBI, dl, StoreInst) 1003 .addReg(BPReg) 1004 .addImm(BPOffset) 1005 .addReg(SPReg); 1006 } 1007 1008 if (MustSaveLR) 1009 BuildMI(MBB, StackUpdateLoc, dl, StoreInst) 1010 .addReg(ScratchReg, getKillRegState(true)) 1011 .addImm(LROffset) 1012 .addReg(SPReg); 1013 1014 if (MustSaveCR && 1015 !(SingleScratchReg && MustSaveLR)) { // will only occur for PPC64 1016 assert(HasRedZone && "A red zone is always available on PPC64"); 1017 BuildMI(MBB, MBBI, dl, TII.get(PPC::STW8)) 1018 .addReg(TempReg, getKillRegState(true)) 1019 .addImm(8) 1020 .addReg(SPReg); 1021 } 1022 1023 // Skip the rest if this is a leaf function & all spills fit in the Red Zone. 1024 if (!FrameSize) 1025 return; 1026 1027 // Adjust stack pointer: r1 += NegFrameSize. 1028 // If there is a preferred stack alignment, align R1 now 1029 1030 if (HasBP && HasRedZone) { 1031 // Save a copy of r1 as the base pointer. 1032 BuildMI(MBB, MBBI, dl, OrInst, BPReg) 1033 .addReg(SPReg) 1034 .addReg(SPReg); 1035 } 1036 1037 // Have we generated a STUX instruction to claim stack frame? If so, 1038 // the negated frame size will be placed in ScratchReg. 1039 bool HasSTUX = false; 1040 1041 // This condition must be kept in sync with canUseAsPrologue. 1042 if (HasBP && MaxAlign > 1) { 1043 if (isPPC64) 1044 BuildMI(MBB, MBBI, dl, TII.get(PPC::RLDICL), ScratchReg) 1045 .addReg(SPReg) 1046 .addImm(0) 1047 .addImm(64 - Log2_32(MaxAlign)); 1048 else // PPC32... 1049 BuildMI(MBB, MBBI, dl, TII.get(PPC::RLWINM), ScratchReg) 1050 .addReg(SPReg) 1051 .addImm(0) 1052 .addImm(32 - Log2_32(MaxAlign)) 1053 .addImm(31); 1054 if (!isLargeFrame) { 1055 BuildMI(MBB, MBBI, dl, SubtractImmCarryingInst, ScratchReg) 1056 .addReg(ScratchReg, RegState::Kill) 1057 .addImm(NegFrameSize); 1058 } else { 1059 assert(!SingleScratchReg && "Only a single scratch reg available"); 1060 BuildMI(MBB, MBBI, dl, LoadImmShiftedInst, TempReg) 1061 .addImm(NegFrameSize >> 16); 1062 BuildMI(MBB, MBBI, dl, OrImmInst, TempReg) 1063 .addReg(TempReg, RegState::Kill) 1064 .addImm(NegFrameSize & 0xFFFF); 1065 BuildMI(MBB, MBBI, dl, SubtractCarryingInst, ScratchReg) 1066 .addReg(ScratchReg, RegState::Kill) 1067 .addReg(TempReg, RegState::Kill); 1068 } 1069 1070 BuildMI(MBB, MBBI, dl, StoreUpdtIdxInst, SPReg) 1071 .addReg(SPReg, RegState::Kill) 1072 .addReg(SPReg) 1073 .addReg(ScratchReg); 1074 HasSTUX = true; 1075 1076 } else if (!isLargeFrame) { 1077 BuildMI(MBB, StackUpdateLoc, dl, StoreUpdtInst, SPReg) 1078 .addReg(SPReg) 1079 .addImm(NegFrameSize) 1080 .addReg(SPReg); 1081 1082 } else { 1083 BuildMI(MBB, MBBI, dl, LoadImmShiftedInst, ScratchReg) 1084 .addImm(NegFrameSize >> 16); 1085 BuildMI(MBB, MBBI, dl, OrImmInst, ScratchReg) 1086 .addReg(ScratchReg, RegState::Kill) 1087 .addImm(NegFrameSize & 0xFFFF); 1088 BuildMI(MBB, MBBI, dl, StoreUpdtIdxInst, SPReg) 1089 .addReg(SPReg, RegState::Kill) 1090 .addReg(SPReg) 1091 .addReg(ScratchReg); 1092 HasSTUX = true; 1093 } 1094 1095 if (!HasRedZone) { 1096 assert(!isPPC64 && "A red zone is always available on PPC64"); 1097 if (HasSTUX) { 1098 // The negated frame size is in ScratchReg, and the SPReg has been 1099 // decremented by the frame size: SPReg = old SPReg + ScratchReg. 1100 // Since FPOffset, PBPOffset, etc. are relative to the beginning of 1101 // the stack frame (i.e. the old SP), ideally, we would put the old 1102 // SP into a register and use it as the base for the stores. The 1103 // problem is that the only available register may be ScratchReg, 1104 // which could be R0, and R0 cannot be used as a base address. 1105 1106 // First, set ScratchReg to the old SP. This may need to be modified 1107 // later. 1108 BuildMI(MBB, MBBI, dl, TII.get(PPC::SUBF), ScratchReg) 1109 .addReg(ScratchReg, RegState::Kill) 1110 .addReg(SPReg); 1111 1112 if (ScratchReg == PPC::R0) { 1113 // R0 cannot be used as a base register, but it can be used as an 1114 // index in a store-indexed. 1115 int LastOffset = 0; 1116 if (HasFP) { 1117 // R0 += (FPOffset-LastOffset). 1118 // Need addic, since addi treats R0 as 0. 1119 BuildMI(MBB, MBBI, dl, TII.get(PPC::ADDIC), ScratchReg) 1120 .addReg(ScratchReg) 1121 .addImm(FPOffset-LastOffset); 1122 LastOffset = FPOffset; 1123 // Store FP into *R0. 1124 BuildMI(MBB, MBBI, dl, TII.get(PPC::STWX)) 1125 .addReg(FPReg, RegState::Kill) // Save FP. 1126 .addReg(PPC::ZERO) 1127 .addReg(ScratchReg); // This will be the index (R0 is ok here). 1128 } 1129 if (FI->usesPICBase()) { 1130 // R0 += (PBPOffset-LastOffset). 1131 BuildMI(MBB, MBBI, dl, TII.get(PPC::ADDIC), ScratchReg) 1132 .addReg(ScratchReg) 1133 .addImm(PBPOffset-LastOffset); 1134 LastOffset = PBPOffset; 1135 BuildMI(MBB, MBBI, dl, TII.get(PPC::STWX)) 1136 .addReg(PPC::R30, RegState::Kill) // Save PIC base pointer. 1137 .addReg(PPC::ZERO) 1138 .addReg(ScratchReg); // This will be the index (R0 is ok here). 1139 } 1140 if (HasBP) { 1141 // R0 += (BPOffset-LastOffset). 1142 BuildMI(MBB, MBBI, dl, TII.get(PPC::ADDIC), ScratchReg) 1143 .addReg(ScratchReg) 1144 .addImm(BPOffset-LastOffset); 1145 LastOffset = BPOffset; 1146 BuildMI(MBB, MBBI, dl, TII.get(PPC::STWX)) 1147 .addReg(BPReg, RegState::Kill) // Save BP. 1148 .addReg(PPC::ZERO) 1149 .addReg(ScratchReg); // This will be the index (R0 is ok here). 1150 // BP = R0-LastOffset 1151 BuildMI(MBB, MBBI, dl, TII.get(PPC::ADDIC), BPReg) 1152 .addReg(ScratchReg, RegState::Kill) 1153 .addImm(-LastOffset); 1154 } 1155 } else { 1156 // ScratchReg is not R0, so use it as the base register. It is 1157 // already set to the old SP, so we can use the offsets directly. 1158 1159 // Now that the stack frame has been allocated, save all the necessary 1160 // registers using ScratchReg as the base address. 1161 if (HasFP) 1162 BuildMI(MBB, MBBI, dl, StoreInst) 1163 .addReg(FPReg) 1164 .addImm(FPOffset) 1165 .addReg(ScratchReg); 1166 if (FI->usesPICBase()) 1167 BuildMI(MBB, MBBI, dl, StoreInst) 1168 .addReg(PPC::R30) 1169 .addImm(PBPOffset) 1170 .addReg(ScratchReg); 1171 if (HasBP) { 1172 BuildMI(MBB, MBBI, dl, StoreInst) 1173 .addReg(BPReg) 1174 .addImm(BPOffset) 1175 .addReg(ScratchReg); 1176 BuildMI(MBB, MBBI, dl, OrInst, BPReg) 1177 .addReg(ScratchReg, RegState::Kill) 1178 .addReg(ScratchReg); 1179 } 1180 } 1181 } else { 1182 // The frame size is a known 16-bit constant (fitting in the immediate 1183 // field of STWU). To be here we have to be compiling for PPC32. 1184 // Since the SPReg has been decreased by FrameSize, add it back to each 1185 // offset. 1186 if (HasFP) 1187 BuildMI(MBB, MBBI, dl, StoreInst) 1188 .addReg(FPReg) 1189 .addImm(FrameSize + FPOffset) 1190 .addReg(SPReg); 1191 if (FI->usesPICBase()) 1192 BuildMI(MBB, MBBI, dl, StoreInst) 1193 .addReg(PPC::R30) 1194 .addImm(FrameSize + PBPOffset) 1195 .addReg(SPReg); 1196 if (HasBP) { 1197 BuildMI(MBB, MBBI, dl, StoreInst) 1198 .addReg(BPReg) 1199 .addImm(FrameSize + BPOffset) 1200 .addReg(SPReg); 1201 BuildMI(MBB, MBBI, dl, TII.get(PPC::ADDI), BPReg) 1202 .addReg(SPReg) 1203 .addImm(FrameSize); 1204 } 1205 } 1206 } 1207 1208 // Add Call Frame Information for the instructions we generated above. 1209 if (needsCFI) { 1210 unsigned CFIIndex; 1211 1212 if (HasBP) { 1213 // Define CFA in terms of BP. Do this in preference to using FP/SP, 1214 // because if the stack needed aligning then CFA won't be at a fixed 1215 // offset from FP/SP. 1216 unsigned Reg = MRI->getDwarfRegNum(BPReg, true); 1217 CFIIndex = MF.addFrameInst( 1218 MCCFIInstruction::createDefCfaRegister(nullptr, Reg)); 1219 } else { 1220 // Adjust the definition of CFA to account for the change in SP. 1221 assert(NegFrameSize); 1222 CFIIndex = MF.addFrameInst( 1223 MCCFIInstruction::createDefCfaOffset(nullptr, NegFrameSize)); 1224 } 1225 BuildMI(MBB, MBBI, dl, TII.get(TargetOpcode::CFI_INSTRUCTION)) 1226 .addCFIIndex(CFIIndex); 1227 1228 if (HasFP) { 1229 // Describe where FP was saved, at a fixed offset from CFA. 1230 unsigned Reg = MRI->getDwarfRegNum(FPReg, true); 1231 CFIIndex = MF.addFrameInst( 1232 MCCFIInstruction::createOffset(nullptr, Reg, FPOffset)); 1233 BuildMI(MBB, MBBI, dl, TII.get(TargetOpcode::CFI_INSTRUCTION)) 1234 .addCFIIndex(CFIIndex); 1235 } 1236 1237 if (FI->usesPICBase()) { 1238 // Describe where FP was saved, at a fixed offset from CFA. 1239 unsigned Reg = MRI->getDwarfRegNum(PPC::R30, true); 1240 CFIIndex = MF.addFrameInst( 1241 MCCFIInstruction::createOffset(nullptr, Reg, PBPOffset)); 1242 BuildMI(MBB, MBBI, dl, TII.get(TargetOpcode::CFI_INSTRUCTION)) 1243 .addCFIIndex(CFIIndex); 1244 } 1245 1246 if (HasBP) { 1247 // Describe where BP was saved, at a fixed offset from CFA. 1248 unsigned Reg = MRI->getDwarfRegNum(BPReg, true); 1249 CFIIndex = MF.addFrameInst( 1250 MCCFIInstruction::createOffset(nullptr, Reg, BPOffset)); 1251 BuildMI(MBB, MBBI, dl, TII.get(TargetOpcode::CFI_INSTRUCTION)) 1252 .addCFIIndex(CFIIndex); 1253 } 1254 1255 if (MustSaveLR) { 1256 // Describe where LR was saved, at a fixed offset from CFA. 1257 unsigned Reg = MRI->getDwarfRegNum(LRReg, true); 1258 CFIIndex = MF.addFrameInst( 1259 MCCFIInstruction::createOffset(nullptr, Reg, LROffset)); 1260 BuildMI(MBB, MBBI, dl, TII.get(TargetOpcode::CFI_INSTRUCTION)) 1261 .addCFIIndex(CFIIndex); 1262 } 1263 } 1264 1265 // If there is a frame pointer, copy R1 into R31 1266 if (HasFP) { 1267 BuildMI(MBB, MBBI, dl, OrInst, FPReg) 1268 .addReg(SPReg) 1269 .addReg(SPReg); 1270 1271 if (!HasBP && needsCFI) { 1272 // Change the definition of CFA from SP+offset to FP+offset, because SP 1273 // will change at every alloca. 1274 unsigned Reg = MRI->getDwarfRegNum(FPReg, true); 1275 unsigned CFIIndex = MF.addFrameInst( 1276 MCCFIInstruction::createDefCfaRegister(nullptr, Reg)); 1277 1278 BuildMI(MBB, MBBI, dl, TII.get(TargetOpcode::CFI_INSTRUCTION)) 1279 .addCFIIndex(CFIIndex); 1280 } 1281 } 1282 1283 if (needsCFI) { 1284 // Describe where callee saved registers were saved, at fixed offsets from 1285 // CFA. 1286 const std::vector<CalleeSavedInfo> &CSI = MFI.getCalleeSavedInfo(); 1287 for (unsigned I = 0, E = CSI.size(); I != E; ++I) { 1288 unsigned Reg = CSI[I].getReg(); 1289 if (Reg == PPC::LR || Reg == PPC::LR8 || Reg == PPC::RM) continue; 1290 1291 // This is a bit of a hack: CR2LT, CR2GT, CR2EQ and CR2UN are just 1292 // subregisters of CR2. We just need to emit a move of CR2. 1293 if (PPC::CRBITRCRegClass.contains(Reg)) 1294 continue; 1295 1296 // For SVR4, don't emit a move for the CR spill slot if we haven't 1297 // spilled CRs. 1298 if (isSVR4ABI && (PPC::CR2 <= Reg && Reg <= PPC::CR4) 1299 && !MustSaveCR) 1300 continue; 1301 1302 // For 64-bit SVR4 when we have spilled CRs, the spill location 1303 // is SP+8, not a frame-relative slot. 1304 if (isSVR4ABI && isPPC64 && (PPC::CR2 <= Reg && Reg <= PPC::CR4)) { 1305 // In the ELFv1 ABI, only CR2 is noted in CFI and stands in for 1306 // the whole CR word. In the ELFv2 ABI, every CR that was 1307 // actually saved gets its own CFI record. 1308 unsigned CRReg = isELFv2ABI? Reg : (unsigned) PPC::CR2; 1309 unsigned CFIIndex = MF.addFrameInst(MCCFIInstruction::createOffset( 1310 nullptr, MRI->getDwarfRegNum(CRReg, true), 8)); 1311 BuildMI(MBB, MBBI, dl, TII.get(TargetOpcode::CFI_INSTRUCTION)) 1312 .addCFIIndex(CFIIndex); 1313 continue; 1314 } 1315 1316 if (CSI[I].isSpilledToReg()) { 1317 unsigned SpilledReg = CSI[I].getDstReg(); 1318 unsigned CFIRegister = MF.addFrameInst(MCCFIInstruction::createRegister( 1319 nullptr, MRI->getDwarfRegNum(Reg, true), 1320 MRI->getDwarfRegNum(SpilledReg, true))); 1321 BuildMI(MBB, MBBI, dl, TII.get(TargetOpcode::CFI_INSTRUCTION)) 1322 .addCFIIndex(CFIRegister); 1323 } else { 1324 int Offset = MFI.getObjectOffset(CSI[I].getFrameIdx()); 1325 // We have changed the object offset above but we do not want to change 1326 // the actual offsets in the CFI instruction so we have to undo the 1327 // offset change here. 1328 if (MovingStackUpdateDown) 1329 Offset -= NegFrameSize; 1330 1331 unsigned CFIIndex = MF.addFrameInst(MCCFIInstruction::createOffset( 1332 nullptr, MRI->getDwarfRegNum(Reg, true), Offset)); 1333 BuildMI(MBB, MBBI, dl, TII.get(TargetOpcode::CFI_INSTRUCTION)) 1334 .addCFIIndex(CFIIndex); 1335 } 1336 } 1337 } 1338 } 1339 1340 void PPCFrameLowering::emitEpilogue(MachineFunction &MF, 1341 MachineBasicBlock &MBB) const { 1342 MachineBasicBlock::iterator MBBI = MBB.getFirstTerminator(); 1343 DebugLoc dl; 1344 1345 if (MBBI != MBB.end()) 1346 dl = MBBI->getDebugLoc(); 1347 1348 const PPCInstrInfo &TII = *Subtarget.getInstrInfo(); 1349 const PPCRegisterInfo *RegInfo = Subtarget.getRegisterInfo(); 1350 1351 // Get alignment info so we know how to restore the SP. 1352 const MachineFrameInfo &MFI = MF.getFrameInfo(); 1353 1354 // Get the number of bytes allocated from the FrameInfo. 1355 int FrameSize = MFI.getStackSize(); 1356 1357 // Get processor type. 1358 bool isPPC64 = Subtarget.isPPC64(); 1359 // Get the ABI. 1360 bool isSVR4ABI = Subtarget.isSVR4ABI(); 1361 1362 // Check if the link register (LR) has been saved. 1363 PPCFunctionInfo *FI = MF.getInfo<PPCFunctionInfo>(); 1364 bool MustSaveLR = FI->mustSaveLR(); 1365 const SmallVectorImpl<unsigned> &MustSaveCRs = FI->getMustSaveCRs(); 1366 bool MustSaveCR = !MustSaveCRs.empty(); 1367 // Do we have a frame pointer and/or base pointer for this function? 1368 bool HasFP = hasFP(MF); 1369 bool HasBP = RegInfo->hasBasePointer(MF); 1370 bool HasRedZone = Subtarget.isPPC64() || !Subtarget.isSVR4ABI(); 1371 1372 unsigned SPReg = isPPC64 ? PPC::X1 : PPC::R1; 1373 unsigned BPReg = RegInfo->getBaseRegister(MF); 1374 unsigned FPReg = isPPC64 ? PPC::X31 : PPC::R31; 1375 unsigned ScratchReg = 0; 1376 unsigned TempReg = isPPC64 ? PPC::X12 : PPC::R12; // another scratch reg 1377 const MCInstrDesc& MTLRInst = TII.get( isPPC64 ? PPC::MTLR8 1378 : PPC::MTLR ); 1379 const MCInstrDesc& LoadInst = TII.get( isPPC64 ? PPC::LD 1380 : PPC::LWZ ); 1381 const MCInstrDesc& LoadImmShiftedInst = TII.get( isPPC64 ? PPC::LIS8 1382 : PPC::LIS ); 1383 const MCInstrDesc& OrInst = TII.get(isPPC64 ? PPC::OR8 1384 : PPC::OR ); 1385 const MCInstrDesc& OrImmInst = TII.get( isPPC64 ? PPC::ORI8 1386 : PPC::ORI ); 1387 const MCInstrDesc& AddImmInst = TII.get( isPPC64 ? PPC::ADDI8 1388 : PPC::ADDI ); 1389 const MCInstrDesc& AddInst = TII.get( isPPC64 ? PPC::ADD8 1390 : PPC::ADD4 ); 1391 1392 int LROffset = getReturnSaveOffset(); 1393 1394 int FPOffset = 0; 1395 1396 // Using the same bool variable as below to suppress compiler warnings. 1397 bool SingleScratchReg = findScratchRegister(&MBB, true, false, &ScratchReg, 1398 &TempReg); 1399 assert(SingleScratchReg && 1400 "Could not find an available scratch register"); 1401 1402 SingleScratchReg = ScratchReg == TempReg; 1403 1404 if (HasFP) { 1405 if (isSVR4ABI) { 1406 int FPIndex = FI->getFramePointerSaveIndex(); 1407 assert(FPIndex && "No Frame Pointer Save Slot!"); 1408 FPOffset = MFI.getObjectOffset(FPIndex); 1409 } else { 1410 FPOffset = getFramePointerSaveOffset(); 1411 } 1412 } 1413 1414 int BPOffset = 0; 1415 if (HasBP) { 1416 if (isSVR4ABI) { 1417 int BPIndex = FI->getBasePointerSaveIndex(); 1418 assert(BPIndex && "No Base Pointer Save Slot!"); 1419 BPOffset = MFI.getObjectOffset(BPIndex); 1420 } else { 1421 BPOffset = getBasePointerSaveOffset(); 1422 } 1423 } 1424 1425 int PBPOffset = 0; 1426 if (FI->usesPICBase()) { 1427 int PBPIndex = FI->getPICBasePointerSaveIndex(); 1428 assert(PBPIndex && "No PIC Base Pointer Save Slot!"); 1429 PBPOffset = MFI.getObjectOffset(PBPIndex); 1430 } 1431 1432 bool IsReturnBlock = (MBBI != MBB.end() && MBBI->isReturn()); 1433 1434 if (IsReturnBlock) { 1435 unsigned RetOpcode = MBBI->getOpcode(); 1436 bool UsesTCRet = RetOpcode == PPC::TCRETURNri || 1437 RetOpcode == PPC::TCRETURNdi || 1438 RetOpcode == PPC::TCRETURNai || 1439 RetOpcode == PPC::TCRETURNri8 || 1440 RetOpcode == PPC::TCRETURNdi8 || 1441 RetOpcode == PPC::TCRETURNai8; 1442 1443 if (UsesTCRet) { 1444 int MaxTCRetDelta = FI->getTailCallSPDelta(); 1445 MachineOperand &StackAdjust = MBBI->getOperand(1); 1446 assert(StackAdjust.isImm() && "Expecting immediate value."); 1447 // Adjust stack pointer. 1448 int StackAdj = StackAdjust.getImm(); 1449 int Delta = StackAdj - MaxTCRetDelta; 1450 assert((Delta >= 0) && "Delta must be positive"); 1451 if (MaxTCRetDelta>0) 1452 FrameSize += (StackAdj +Delta); 1453 else 1454 FrameSize += StackAdj; 1455 } 1456 } 1457 1458 // Frames of 32KB & larger require special handling because they cannot be 1459 // indexed into with a simple LD/LWZ immediate offset operand. 1460 bool isLargeFrame = !isInt<16>(FrameSize); 1461 1462 // On targets without red zone, the SP needs to be restored last, so that 1463 // all live contents of the stack frame are upwards of the SP. This means 1464 // that we cannot restore SP just now, since there may be more registers 1465 // to restore from the stack frame (e.g. R31). If the frame size is not 1466 // a simple immediate value, we will need a spare register to hold the 1467 // restored SP. If the frame size is known and small, we can simply adjust 1468 // the offsets of the registers to be restored, and still use SP to restore 1469 // them. In such case, the final update of SP will be to add the frame 1470 // size to it. 1471 // To simplify the code, set RBReg to the base register used to restore 1472 // values from the stack, and set SPAdd to the value that needs to be added 1473 // to the SP at the end. The default values are as if red zone was present. 1474 unsigned RBReg = SPReg; 1475 unsigned SPAdd = 0; 1476 1477 // Check if we can move the stack update instruction up the epilogue 1478 // past the callee saves. This will allow the move to LR instruction 1479 // to be executed before the restores of the callee saves which means 1480 // that the callee saves can hide the latency from the MTLR instrcution. 1481 MachineBasicBlock::iterator StackUpdateLoc = MBBI; 1482 if (stackUpdateCanBeMoved(MF)) { 1483 const std::vector<CalleeSavedInfo> & Info = MFI.getCalleeSavedInfo(); 1484 for (CalleeSavedInfo CSI : Info) { 1485 int FrIdx = CSI.getFrameIdx(); 1486 // If the frame index is not negative the callee saved info belongs to a 1487 // stack object that is not a fixed stack object. We ignore non-fixed 1488 // stack objects because we won't move the update of the stack pointer 1489 // past them. 1490 if (FrIdx >= 0) 1491 continue; 1492 1493 if (MFI.isFixedObjectIndex(FrIdx) && MFI.getObjectOffset(FrIdx) < 0) 1494 StackUpdateLoc--; 1495 else { 1496 // Abort the operation as we can't update all CSR restores. 1497 StackUpdateLoc = MBBI; 1498 break; 1499 } 1500 } 1501 } 1502 1503 if (FrameSize) { 1504 // In the prologue, the loaded (or persistent) stack pointer value is 1505 // offset by the STDU/STDUX/STWU/STWUX instruction. For targets with red 1506 // zone add this offset back now. 1507 1508 // If this function contained a fastcc call and GuaranteedTailCallOpt is 1509 // enabled (=> hasFastCall()==true) the fastcc call might contain a tail 1510 // call which invalidates the stack pointer value in SP(0). So we use the 1511 // value of R31 in this case. 1512 if (FI->hasFastCall()) { 1513 assert(HasFP && "Expecting a valid frame pointer."); 1514 if (!HasRedZone) 1515 RBReg = FPReg; 1516 if (!isLargeFrame) { 1517 BuildMI(MBB, MBBI, dl, AddImmInst, RBReg) 1518 .addReg(FPReg).addImm(FrameSize); 1519 } else { 1520 BuildMI(MBB, MBBI, dl, LoadImmShiftedInst, ScratchReg) 1521 .addImm(FrameSize >> 16); 1522 BuildMI(MBB, MBBI, dl, OrImmInst, ScratchReg) 1523 .addReg(ScratchReg, RegState::Kill) 1524 .addImm(FrameSize & 0xFFFF); 1525 BuildMI(MBB, MBBI, dl, AddInst) 1526 .addReg(RBReg) 1527 .addReg(FPReg) 1528 .addReg(ScratchReg); 1529 } 1530 } else if (!isLargeFrame && !HasBP && !MFI.hasVarSizedObjects()) { 1531 if (HasRedZone) { 1532 BuildMI(MBB, StackUpdateLoc, dl, AddImmInst, SPReg) 1533 .addReg(SPReg) 1534 .addImm(FrameSize); 1535 } else { 1536 // Make sure that adding FrameSize will not overflow the max offset 1537 // size. 1538 assert(FPOffset <= 0 && BPOffset <= 0 && PBPOffset <= 0 && 1539 "Local offsets should be negative"); 1540 SPAdd = FrameSize; 1541 FPOffset += FrameSize; 1542 BPOffset += FrameSize; 1543 PBPOffset += FrameSize; 1544 } 1545 } else { 1546 // We don't want to use ScratchReg as a base register, because it 1547 // could happen to be R0. Use FP instead, but make sure to preserve it. 1548 if (!HasRedZone) { 1549 // If FP is not saved, copy it to ScratchReg. 1550 if (!HasFP) 1551 BuildMI(MBB, MBBI, dl, OrInst, ScratchReg) 1552 .addReg(FPReg) 1553 .addReg(FPReg); 1554 RBReg = FPReg; 1555 } 1556 BuildMI(MBB, StackUpdateLoc, dl, LoadInst, RBReg) 1557 .addImm(0) 1558 .addReg(SPReg); 1559 } 1560 } 1561 assert(RBReg != ScratchReg && "Should have avoided ScratchReg"); 1562 // If there is no red zone, ScratchReg may be needed for holding a useful 1563 // value (although not the base register). Make sure it is not overwritten 1564 // too early. 1565 1566 assert((isPPC64 || !MustSaveCR) && 1567 "Epilogue CR restoring supported only in 64-bit mode"); 1568 1569 // If we need to restore both the LR and the CR and we only have one 1570 // available scratch register, we must do them one at a time. 1571 if (MustSaveCR && SingleScratchReg && MustSaveLR) { 1572 // Here TempReg == ScratchReg, and in the absence of red zone ScratchReg 1573 // is live here. 1574 assert(HasRedZone && "Expecting red zone"); 1575 BuildMI(MBB, MBBI, dl, TII.get(PPC::LWZ8), TempReg) 1576 .addImm(8) 1577 .addReg(SPReg); 1578 for (unsigned i = 0, e = MustSaveCRs.size(); i != e; ++i) 1579 BuildMI(MBB, MBBI, dl, TII.get(PPC::MTOCRF8), MustSaveCRs[i]) 1580 .addReg(TempReg, getKillRegState(i == e-1)); 1581 } 1582 1583 // Delay restoring of the LR if ScratchReg is needed. This is ok, since 1584 // LR is stored in the caller's stack frame. ScratchReg will be needed 1585 // if RBReg is anything other than SP. We shouldn't use ScratchReg as 1586 // a base register anyway, because it may happen to be R0. 1587 bool LoadedLR = false; 1588 if (MustSaveLR && RBReg == SPReg && isInt<16>(LROffset+SPAdd)) { 1589 BuildMI(MBB, StackUpdateLoc, dl, LoadInst, ScratchReg) 1590 .addImm(LROffset+SPAdd) 1591 .addReg(RBReg); 1592 LoadedLR = true; 1593 } 1594 1595 if (MustSaveCR && !(SingleScratchReg && MustSaveLR)) { 1596 // This will only occur for PPC64. 1597 assert(isPPC64 && "Expecting 64-bit mode"); 1598 assert(RBReg == SPReg && "Should be using SP as a base register"); 1599 BuildMI(MBB, MBBI, dl, TII.get(PPC::LWZ8), TempReg) 1600 .addImm(8) 1601 .addReg(RBReg); 1602 } 1603 1604 if (HasFP) { 1605 // If there is red zone, restore FP directly, since SP has already been 1606 // restored. Otherwise, restore the value of FP into ScratchReg. 1607 if (HasRedZone || RBReg == SPReg) 1608 BuildMI(MBB, MBBI, dl, LoadInst, FPReg) 1609 .addImm(FPOffset) 1610 .addReg(SPReg); 1611 else 1612 BuildMI(MBB, MBBI, dl, LoadInst, ScratchReg) 1613 .addImm(FPOffset) 1614 .addReg(RBReg); 1615 } 1616 1617 if (FI->usesPICBase()) 1618 BuildMI(MBB, MBBI, dl, LoadInst, PPC::R30) 1619 .addImm(PBPOffset) 1620 .addReg(RBReg); 1621 1622 if (HasBP) 1623 BuildMI(MBB, MBBI, dl, LoadInst, BPReg) 1624 .addImm(BPOffset) 1625 .addReg(RBReg); 1626 1627 // There is nothing more to be loaded from the stack, so now we can 1628 // restore SP: SP = RBReg + SPAdd. 1629 if (RBReg != SPReg || SPAdd != 0) { 1630 assert(!HasRedZone && "This should not happen with red zone"); 1631 // If SPAdd is 0, generate a copy. 1632 if (SPAdd == 0) 1633 BuildMI(MBB, MBBI, dl, OrInst, SPReg) 1634 .addReg(RBReg) 1635 .addReg(RBReg); 1636 else 1637 BuildMI(MBB, MBBI, dl, AddImmInst, SPReg) 1638 .addReg(RBReg) 1639 .addImm(SPAdd); 1640 1641 assert(RBReg != ScratchReg && "Should be using FP or SP as base register"); 1642 if (RBReg == FPReg) 1643 BuildMI(MBB, MBBI, dl, OrInst, FPReg) 1644 .addReg(ScratchReg) 1645 .addReg(ScratchReg); 1646 1647 // Now load the LR from the caller's stack frame. 1648 if (MustSaveLR && !LoadedLR) 1649 BuildMI(MBB, MBBI, dl, LoadInst, ScratchReg) 1650 .addImm(LROffset) 1651 .addReg(SPReg); 1652 } 1653 1654 if (MustSaveCR && 1655 !(SingleScratchReg && MustSaveLR)) // will only occur for PPC64 1656 for (unsigned i = 0, e = MustSaveCRs.size(); i != e; ++i) 1657 BuildMI(MBB, MBBI, dl, TII.get(PPC::MTOCRF8), MustSaveCRs[i]) 1658 .addReg(TempReg, getKillRegState(i == e-1)); 1659 1660 if (MustSaveLR) 1661 BuildMI(MBB, StackUpdateLoc, dl, MTLRInst).addReg(ScratchReg); 1662 1663 // Callee pop calling convention. Pop parameter/linkage area. Used for tail 1664 // call optimization 1665 if (IsReturnBlock) { 1666 unsigned RetOpcode = MBBI->getOpcode(); 1667 if (MF.getTarget().Options.GuaranteedTailCallOpt && 1668 (RetOpcode == PPC::BLR || RetOpcode == PPC::BLR8) && 1669 MF.getFunction().getCallingConv() == CallingConv::Fast) { 1670 PPCFunctionInfo *FI = MF.getInfo<PPCFunctionInfo>(); 1671 unsigned CallerAllocatedAmt = FI->getMinReservedArea(); 1672 1673 if (CallerAllocatedAmt && isInt<16>(CallerAllocatedAmt)) { 1674 BuildMI(MBB, MBBI, dl, AddImmInst, SPReg) 1675 .addReg(SPReg).addImm(CallerAllocatedAmt); 1676 } else { 1677 BuildMI(MBB, MBBI, dl, LoadImmShiftedInst, ScratchReg) 1678 .addImm(CallerAllocatedAmt >> 16); 1679 BuildMI(MBB, MBBI, dl, OrImmInst, ScratchReg) 1680 .addReg(ScratchReg, RegState::Kill) 1681 .addImm(CallerAllocatedAmt & 0xFFFF); 1682 BuildMI(MBB, MBBI, dl, AddInst) 1683 .addReg(SPReg) 1684 .addReg(FPReg) 1685 .addReg(ScratchReg); 1686 } 1687 } else { 1688 createTailCallBranchInstr(MBB); 1689 } 1690 } 1691 } 1692 1693 void PPCFrameLowering::createTailCallBranchInstr(MachineBasicBlock &MBB) const { 1694 MachineBasicBlock::iterator MBBI = MBB.getFirstTerminator(); 1695 1696 // If we got this far a first terminator should exist. 1697 assert(MBBI != MBB.end() && "Failed to find the first terminator."); 1698 1699 DebugLoc dl = MBBI->getDebugLoc(); 1700 const PPCInstrInfo &TII = *Subtarget.getInstrInfo(); 1701 1702 // Create branch instruction for pseudo tail call return instruction 1703 unsigned RetOpcode = MBBI->getOpcode(); 1704 if (RetOpcode == PPC::TCRETURNdi) { 1705 MBBI = MBB.getLastNonDebugInstr(); 1706 MachineOperand &JumpTarget = MBBI->getOperand(0); 1707 BuildMI(MBB, MBBI, dl, TII.get(PPC::TAILB)). 1708 addGlobalAddress(JumpTarget.getGlobal(), JumpTarget.getOffset()); 1709 } else if (RetOpcode == PPC::TCRETURNri) { 1710 MBBI = MBB.getLastNonDebugInstr(); 1711 assert(MBBI->getOperand(0).isReg() && "Expecting register operand."); 1712 BuildMI(MBB, MBBI, dl, TII.get(PPC::TAILBCTR)); 1713 } else if (RetOpcode == PPC::TCRETURNai) { 1714 MBBI = MBB.getLastNonDebugInstr(); 1715 MachineOperand &JumpTarget = MBBI->getOperand(0); 1716 BuildMI(MBB, MBBI, dl, TII.get(PPC::TAILBA)).addImm(JumpTarget.getImm()); 1717 } else if (RetOpcode == PPC::TCRETURNdi8) { 1718 MBBI = MBB.getLastNonDebugInstr(); 1719 MachineOperand &JumpTarget = MBBI->getOperand(0); 1720 BuildMI(MBB, MBBI, dl, TII.get(PPC::TAILB8)). 1721 addGlobalAddress(JumpTarget.getGlobal(), JumpTarget.getOffset()); 1722 } else if (RetOpcode == PPC::TCRETURNri8) { 1723 MBBI = MBB.getLastNonDebugInstr(); 1724 assert(MBBI->getOperand(0).isReg() && "Expecting register operand."); 1725 BuildMI(MBB, MBBI, dl, TII.get(PPC::TAILBCTR8)); 1726 } else if (RetOpcode == PPC::TCRETURNai8) { 1727 MBBI = MBB.getLastNonDebugInstr(); 1728 MachineOperand &JumpTarget = MBBI->getOperand(0); 1729 BuildMI(MBB, MBBI, dl, TII.get(PPC::TAILBA8)).addImm(JumpTarget.getImm()); 1730 } 1731 } 1732 1733 void PPCFrameLowering::determineCalleeSaves(MachineFunction &MF, 1734 BitVector &SavedRegs, 1735 RegScavenger *RS) const { 1736 TargetFrameLowering::determineCalleeSaves(MF, SavedRegs, RS); 1737 1738 const PPCRegisterInfo *RegInfo = Subtarget.getRegisterInfo(); 1739 1740 // Save and clear the LR state. 1741 PPCFunctionInfo *FI = MF.getInfo<PPCFunctionInfo>(); 1742 unsigned LR = RegInfo->getRARegister(); 1743 FI->setMustSaveLR(MustSaveLR(MF, LR)); 1744 SavedRegs.reset(LR); 1745 1746 // Save R31 if necessary 1747 int FPSI = FI->getFramePointerSaveIndex(); 1748 bool isPPC64 = Subtarget.isPPC64(); 1749 bool isDarwinABI = Subtarget.isDarwinABI(); 1750 MachineFrameInfo &MFI = MF.getFrameInfo(); 1751 1752 // If the frame pointer save index hasn't been defined yet. 1753 if (!FPSI && needsFP(MF)) { 1754 // Find out what the fix offset of the frame pointer save area. 1755 int FPOffset = getFramePointerSaveOffset(); 1756 // Allocate the frame index for frame pointer save area. 1757 FPSI = MFI.CreateFixedObject(isPPC64? 8 : 4, FPOffset, true); 1758 // Save the result. 1759 FI->setFramePointerSaveIndex(FPSI); 1760 } 1761 1762 int BPSI = FI->getBasePointerSaveIndex(); 1763 if (!BPSI && RegInfo->hasBasePointer(MF)) { 1764 int BPOffset = getBasePointerSaveOffset(); 1765 // Allocate the frame index for the base pointer save area. 1766 BPSI = MFI.CreateFixedObject(isPPC64? 8 : 4, BPOffset, true); 1767 // Save the result. 1768 FI->setBasePointerSaveIndex(BPSI); 1769 } 1770 1771 // Reserve stack space for the PIC Base register (R30). 1772 // Only used in SVR4 32-bit. 1773 if (FI->usesPICBase()) { 1774 int PBPSI = MFI.CreateFixedObject(4, -8, true); 1775 FI->setPICBasePointerSaveIndex(PBPSI); 1776 } 1777 1778 // Make sure we don't explicitly spill r31, because, for example, we have 1779 // some inline asm which explicitly clobbers it, when we otherwise have a 1780 // frame pointer and are using r31's spill slot for the prologue/epilogue 1781 // code. Same goes for the base pointer and the PIC base register. 1782 if (needsFP(MF)) 1783 SavedRegs.reset(isPPC64 ? PPC::X31 : PPC::R31); 1784 if (RegInfo->hasBasePointer(MF)) 1785 SavedRegs.reset(RegInfo->getBaseRegister(MF)); 1786 if (FI->usesPICBase()) 1787 SavedRegs.reset(PPC::R30); 1788 1789 // Reserve stack space to move the linkage area to in case of a tail call. 1790 int TCSPDelta = 0; 1791 if (MF.getTarget().Options.GuaranteedTailCallOpt && 1792 (TCSPDelta = FI->getTailCallSPDelta()) < 0) { 1793 MFI.CreateFixedObject(-1 * TCSPDelta, TCSPDelta, true); 1794 } 1795 1796 // For 32-bit SVR4, allocate the nonvolatile CR spill slot iff the 1797 // function uses CR 2, 3, or 4. 1798 if (!isPPC64 && !isDarwinABI && 1799 (SavedRegs.test(PPC::CR2) || 1800 SavedRegs.test(PPC::CR3) || 1801 SavedRegs.test(PPC::CR4))) { 1802 int FrameIdx = MFI.CreateFixedObject((uint64_t)4, (int64_t)-4, true); 1803 FI->setCRSpillFrameIndex(FrameIdx); 1804 } 1805 } 1806 1807 void PPCFrameLowering::processFunctionBeforeFrameFinalized(MachineFunction &MF, 1808 RegScavenger *RS) const { 1809 // Early exit if not using the SVR4 ABI. 1810 if (!Subtarget.isSVR4ABI()) { 1811 addScavengingSpillSlot(MF, RS); 1812 return; 1813 } 1814 1815 // Get callee saved register information. 1816 MachineFrameInfo &MFI = MF.getFrameInfo(); 1817 const std::vector<CalleeSavedInfo> &CSI = MFI.getCalleeSavedInfo(); 1818 1819 // If the function is shrink-wrapped, and if the function has a tail call, the 1820 // tail call might not be in the new RestoreBlock, so real branch instruction 1821 // won't be generated by emitEpilogue(), because shrink-wrap has chosen new 1822 // RestoreBlock. So we handle this case here. 1823 if (MFI.getSavePoint() && MFI.hasTailCall()) { 1824 MachineBasicBlock *RestoreBlock = MFI.getRestorePoint(); 1825 for (MachineBasicBlock &MBB : MF) { 1826 if (MBB.isReturnBlock() && (&MBB) != RestoreBlock) 1827 createTailCallBranchInstr(MBB); 1828 } 1829 } 1830 1831 // Early exit if no callee saved registers are modified! 1832 if (CSI.empty() && !needsFP(MF)) { 1833 addScavengingSpillSlot(MF, RS); 1834 return; 1835 } 1836 1837 unsigned MinGPR = PPC::R31; 1838 unsigned MinG8R = PPC::X31; 1839 unsigned MinFPR = PPC::F31; 1840 unsigned MinVR = Subtarget.hasSPE() ? PPC::S31 : PPC::V31; 1841 1842 bool HasGPSaveArea = false; 1843 bool HasG8SaveArea = false; 1844 bool HasFPSaveArea = false; 1845 bool HasVRSAVESaveArea = false; 1846 bool HasVRSaveArea = false; 1847 1848 SmallVector<CalleeSavedInfo, 18> GPRegs; 1849 SmallVector<CalleeSavedInfo, 18> G8Regs; 1850 SmallVector<CalleeSavedInfo, 18> FPRegs; 1851 SmallVector<CalleeSavedInfo, 18> VRegs; 1852 1853 for (unsigned i = 0, e = CSI.size(); i != e; ++i) { 1854 unsigned Reg = CSI[i].getReg(); 1855 if (PPC::GPRCRegClass.contains(Reg) || 1856 PPC::SPE4RCRegClass.contains(Reg)) { 1857 HasGPSaveArea = true; 1858 1859 GPRegs.push_back(CSI[i]); 1860 1861 if (Reg < MinGPR) { 1862 MinGPR = Reg; 1863 } 1864 } else if (PPC::G8RCRegClass.contains(Reg)) { 1865 HasG8SaveArea = true; 1866 1867 G8Regs.push_back(CSI[i]); 1868 1869 if (Reg < MinG8R) { 1870 MinG8R = Reg; 1871 } 1872 } else if (PPC::F8RCRegClass.contains(Reg)) { 1873 HasFPSaveArea = true; 1874 1875 FPRegs.push_back(CSI[i]); 1876 1877 if (Reg < MinFPR) { 1878 MinFPR = Reg; 1879 } 1880 } else if (PPC::CRBITRCRegClass.contains(Reg) || 1881 PPC::CRRCRegClass.contains(Reg)) { 1882 ; // do nothing, as we already know whether CRs are spilled 1883 } else if (PPC::VRSAVERCRegClass.contains(Reg)) { 1884 HasVRSAVESaveArea = true; 1885 } else if (PPC::VRRCRegClass.contains(Reg) || 1886 PPC::SPERCRegClass.contains(Reg)) { 1887 // Altivec and SPE are mutually exclusive, but have the same stack 1888 // alignment requirements, so overload the save area for both cases. 1889 HasVRSaveArea = true; 1890 1891 VRegs.push_back(CSI[i]); 1892 1893 if (Reg < MinVR) { 1894 MinVR = Reg; 1895 } 1896 } else { 1897 llvm_unreachable("Unknown RegisterClass!"); 1898 } 1899 } 1900 1901 PPCFunctionInfo *PFI = MF.getInfo<PPCFunctionInfo>(); 1902 const TargetRegisterInfo *TRI = Subtarget.getRegisterInfo(); 1903 1904 int64_t LowerBound = 0; 1905 1906 // Take into account stack space reserved for tail calls. 1907 int TCSPDelta = 0; 1908 if (MF.getTarget().Options.GuaranteedTailCallOpt && 1909 (TCSPDelta = PFI->getTailCallSPDelta()) < 0) { 1910 LowerBound = TCSPDelta; 1911 } 1912 1913 // The Floating-point register save area is right below the back chain word 1914 // of the previous stack frame. 1915 if (HasFPSaveArea) { 1916 for (unsigned i = 0, e = FPRegs.size(); i != e; ++i) { 1917 int FI = FPRegs[i].getFrameIdx(); 1918 1919 MFI.setObjectOffset(FI, LowerBound + MFI.getObjectOffset(FI)); 1920 } 1921 1922 LowerBound -= (31 - TRI->getEncodingValue(MinFPR) + 1) * 8; 1923 } 1924 1925 // Check whether the frame pointer register is allocated. If so, make sure it 1926 // is spilled to the correct offset. 1927 if (needsFP(MF)) { 1928 int FI = PFI->getFramePointerSaveIndex(); 1929 assert(FI && "No Frame Pointer Save Slot!"); 1930 MFI.setObjectOffset(FI, LowerBound + MFI.getObjectOffset(FI)); 1931 // FP is R31/X31, so no need to update MinGPR/MinG8R. 1932 HasGPSaveArea = true; 1933 } 1934 1935 if (PFI->usesPICBase()) { 1936 int FI = PFI->getPICBasePointerSaveIndex(); 1937 assert(FI && "No PIC Base Pointer Save Slot!"); 1938 MFI.setObjectOffset(FI, LowerBound + MFI.getObjectOffset(FI)); 1939 1940 MinGPR = std::min<unsigned>(MinGPR, PPC::R30); 1941 HasGPSaveArea = true; 1942 } 1943 1944 const PPCRegisterInfo *RegInfo = Subtarget.getRegisterInfo(); 1945 if (RegInfo->hasBasePointer(MF)) { 1946 int FI = PFI->getBasePointerSaveIndex(); 1947 assert(FI && "No Base Pointer Save Slot!"); 1948 MFI.setObjectOffset(FI, LowerBound + MFI.getObjectOffset(FI)); 1949 1950 unsigned BP = RegInfo->getBaseRegister(MF); 1951 if (PPC::G8RCRegClass.contains(BP)) { 1952 MinG8R = std::min<unsigned>(MinG8R, BP); 1953 HasG8SaveArea = true; 1954 } else if (PPC::GPRCRegClass.contains(BP)) { 1955 MinGPR = std::min<unsigned>(MinGPR, BP); 1956 HasGPSaveArea = true; 1957 } 1958 } 1959 1960 // General register save area starts right below the Floating-point 1961 // register save area. 1962 if (HasGPSaveArea || HasG8SaveArea) { 1963 // Move general register save area spill slots down, taking into account 1964 // the size of the Floating-point register save area. 1965 for (unsigned i = 0, e = GPRegs.size(); i != e; ++i) { 1966 if (!GPRegs[i].isSpilledToReg()) { 1967 int FI = GPRegs[i].getFrameIdx(); 1968 MFI.setObjectOffset(FI, LowerBound + MFI.getObjectOffset(FI)); 1969 } 1970 } 1971 1972 // Move general register save area spill slots down, taking into account 1973 // the size of the Floating-point register save area. 1974 for (unsigned i = 0, e = G8Regs.size(); i != e; ++i) { 1975 if (!G8Regs[i].isSpilledToReg()) { 1976 int FI = G8Regs[i].getFrameIdx(); 1977 MFI.setObjectOffset(FI, LowerBound + MFI.getObjectOffset(FI)); 1978 } 1979 } 1980 1981 unsigned MinReg = 1982 std::min<unsigned>(TRI->getEncodingValue(MinGPR), 1983 TRI->getEncodingValue(MinG8R)); 1984 1985 if (Subtarget.isPPC64()) { 1986 LowerBound -= (31 - MinReg + 1) * 8; 1987 } else { 1988 LowerBound -= (31 - MinReg + 1) * 4; 1989 } 1990 } 1991 1992 // For 32-bit only, the CR save area is below the general register 1993 // save area. For 64-bit SVR4, the CR save area is addressed relative 1994 // to the stack pointer and hence does not need an adjustment here. 1995 // Only CR2 (the first nonvolatile spilled) has an associated frame 1996 // index so that we have a single uniform save area. 1997 if (spillsCR(MF) && !(Subtarget.isPPC64() && Subtarget.isSVR4ABI())) { 1998 // Adjust the frame index of the CR spill slot. 1999 for (unsigned i = 0, e = CSI.size(); i != e; ++i) { 2000 unsigned Reg = CSI[i].getReg(); 2001 2002 if ((Subtarget.isSVR4ABI() && Reg == PPC::CR2) 2003 // Leave Darwin logic as-is. 2004 || (!Subtarget.isSVR4ABI() && 2005 (PPC::CRBITRCRegClass.contains(Reg) || 2006 PPC::CRRCRegClass.contains(Reg)))) { 2007 int FI = CSI[i].getFrameIdx(); 2008 2009 MFI.setObjectOffset(FI, LowerBound + MFI.getObjectOffset(FI)); 2010 } 2011 } 2012 2013 LowerBound -= 4; // The CR save area is always 4 bytes long. 2014 } 2015 2016 if (HasVRSAVESaveArea) { 2017 // FIXME SVR4: Is it actually possible to have multiple elements in CSI 2018 // which have the VRSAVE register class? 2019 // Adjust the frame index of the VRSAVE spill slot. 2020 for (unsigned i = 0, e = CSI.size(); i != e; ++i) { 2021 unsigned Reg = CSI[i].getReg(); 2022 2023 if (PPC::VRSAVERCRegClass.contains(Reg)) { 2024 int FI = CSI[i].getFrameIdx(); 2025 2026 MFI.setObjectOffset(FI, LowerBound + MFI.getObjectOffset(FI)); 2027 } 2028 } 2029 2030 LowerBound -= 4; // The VRSAVE save area is always 4 bytes long. 2031 } 2032 2033 // Both Altivec and SPE have the same alignment and padding requirements 2034 // within the stack frame. 2035 if (HasVRSaveArea) { 2036 // Insert alignment padding, we need 16-byte alignment. Note: for positive 2037 // number the alignment formula is : y = (x + (n-1)) & (~(n-1)). But since 2038 // we are using negative number here (the stack grows downward). We should 2039 // use formula : y = x & (~(n-1)). Where x is the size before aligning, n 2040 // is the alignment size ( n = 16 here) and y is the size after aligning. 2041 assert(LowerBound <= 0 && "Expect LowerBound have a non-positive value!"); 2042 LowerBound &= ~(15); 2043 2044 for (unsigned i = 0, e = VRegs.size(); i != e; ++i) { 2045 int FI = VRegs[i].getFrameIdx(); 2046 2047 MFI.setObjectOffset(FI, LowerBound + MFI.getObjectOffset(FI)); 2048 } 2049 } 2050 2051 addScavengingSpillSlot(MF, RS); 2052 } 2053 2054 void 2055 PPCFrameLowering::addScavengingSpillSlot(MachineFunction &MF, 2056 RegScavenger *RS) const { 2057 // Reserve a slot closest to SP or frame pointer if we have a dynalloc or 2058 // a large stack, which will require scavenging a register to materialize a 2059 // large offset. 2060 2061 // We need to have a scavenger spill slot for spills if the frame size is 2062 // large. In case there is no free register for large-offset addressing, 2063 // this slot is used for the necessary emergency spill. Also, we need the 2064 // slot for dynamic stack allocations. 2065 2066 // The scavenger might be invoked if the frame offset does not fit into 2067 // the 16-bit immediate. We don't know the complete frame size here 2068 // because we've not yet computed callee-saved register spills or the 2069 // needed alignment padding. 2070 unsigned StackSize = determineFrameLayout(MF, true); 2071 MachineFrameInfo &MFI = MF.getFrameInfo(); 2072 if (MFI.hasVarSizedObjects() || spillsCR(MF) || spillsVRSAVE(MF) || 2073 hasNonRISpills(MF) || (hasSpills(MF) && !isInt<16>(StackSize))) { 2074 const TargetRegisterClass &GPRC = PPC::GPRCRegClass; 2075 const TargetRegisterClass &G8RC = PPC::G8RCRegClass; 2076 const TargetRegisterClass &RC = Subtarget.isPPC64() ? G8RC : GPRC; 2077 const TargetRegisterInfo &TRI = *Subtarget.getRegisterInfo(); 2078 unsigned Size = TRI.getSpillSize(RC); 2079 unsigned Align = TRI.getSpillAlignment(RC); 2080 RS->addScavengingFrameIndex(MFI.CreateStackObject(Size, Align, false)); 2081 2082 // Might we have over-aligned allocas? 2083 bool HasAlVars = MFI.hasVarSizedObjects() && 2084 MFI.getMaxAlignment() > getStackAlignment(); 2085 2086 // These kinds of spills might need two registers. 2087 if (spillsCR(MF) || spillsVRSAVE(MF) || HasAlVars) 2088 RS->addScavengingFrameIndex(MFI.CreateStackObject(Size, Align, false)); 2089 2090 } 2091 } 2092 2093 // This function checks if a callee saved gpr can be spilled to a volatile 2094 // vector register. This occurs for leaf functions when the option 2095 // ppc-enable-pe-vector-spills is enabled. If there are any remaining registers 2096 // which were not spilled to vectors, return false so the target independent 2097 // code can handle them by assigning a FrameIdx to a stack slot. 2098 bool PPCFrameLowering::assignCalleeSavedSpillSlots( 2099 MachineFunction &MF, const TargetRegisterInfo *TRI, 2100 std::vector<CalleeSavedInfo> &CSI) const { 2101 2102 if (CSI.empty()) 2103 return true; // Early exit if no callee saved registers are modified! 2104 2105 // Early exit if cannot spill gprs to volatile vector registers. 2106 MachineFrameInfo &MFI = MF.getFrameInfo(); 2107 if (!EnablePEVectorSpills || MFI.hasCalls() || !Subtarget.hasP9Vector()) 2108 return false; 2109 2110 // Build a BitVector of VSRs that can be used for spilling GPRs. 2111 BitVector BVAllocatable = TRI->getAllocatableSet(MF); 2112 BitVector BVCalleeSaved(TRI->getNumRegs()); 2113 const PPCRegisterInfo *RegInfo = Subtarget.getRegisterInfo(); 2114 const MCPhysReg *CSRegs = RegInfo->getCalleeSavedRegs(&MF); 2115 for (unsigned i = 0; CSRegs[i]; ++i) 2116 BVCalleeSaved.set(CSRegs[i]); 2117 2118 for (unsigned Reg : BVAllocatable.set_bits()) { 2119 // Set to 0 if the register is not a volatile VF/F8 register, or if it is 2120 // used in the function. 2121 if (BVCalleeSaved[Reg] || 2122 (!PPC::F8RCRegClass.contains(Reg) && 2123 !PPC::VFRCRegClass.contains(Reg)) || 2124 (MF.getRegInfo().isPhysRegUsed(Reg))) 2125 BVAllocatable.reset(Reg); 2126 } 2127 2128 bool AllSpilledToReg = true; 2129 for (auto &CS : CSI) { 2130 if (BVAllocatable.none()) 2131 return false; 2132 2133 unsigned Reg = CS.getReg(); 2134 if (!PPC::G8RCRegClass.contains(Reg) && !PPC::GPRCRegClass.contains(Reg)) { 2135 AllSpilledToReg = false; 2136 continue; 2137 } 2138 2139 unsigned VolatileVFReg = BVAllocatable.find_first(); 2140 if (VolatileVFReg < BVAllocatable.size()) { 2141 CS.setDstReg(VolatileVFReg); 2142 BVAllocatable.reset(VolatileVFReg); 2143 } else { 2144 AllSpilledToReg = false; 2145 } 2146 } 2147 return AllSpilledToReg; 2148 } 2149 2150 2151 bool 2152 PPCFrameLowering::spillCalleeSavedRegisters(MachineBasicBlock &MBB, 2153 MachineBasicBlock::iterator MI, 2154 const std::vector<CalleeSavedInfo> &CSI, 2155 const TargetRegisterInfo *TRI) const { 2156 2157 // Currently, this function only handles SVR4 32- and 64-bit ABIs. 2158 // Return false otherwise to maintain pre-existing behavior. 2159 if (!Subtarget.isSVR4ABI()) 2160 return false; 2161 2162 MachineFunction *MF = MBB.getParent(); 2163 const PPCInstrInfo &TII = *Subtarget.getInstrInfo(); 2164 DebugLoc DL; 2165 bool CRSpilled = false; 2166 MachineInstrBuilder CRMIB; 2167 2168 for (unsigned i = 0, e = CSI.size(); i != e; ++i) { 2169 unsigned Reg = CSI[i].getReg(); 2170 // Only Darwin actually uses the VRSAVE register, but it can still appear 2171 // here if, for example, @llvm.eh.unwind.init() is used. If we're not on 2172 // Darwin, ignore it. 2173 if (Reg == PPC::VRSAVE && !Subtarget.isDarwinABI()) 2174 continue; 2175 2176 // CR2 through CR4 are the nonvolatile CR fields. 2177 bool IsCRField = PPC::CR2 <= Reg && Reg <= PPC::CR4; 2178 2179 // Add the callee-saved register as live-in; it's killed at the spill. 2180 // Do not do this for callee-saved registers that are live-in to the 2181 // function because they will already be marked live-in and this will be 2182 // adding it for a second time. It is an error to add the same register 2183 // to the set more than once. 2184 const MachineRegisterInfo &MRI = MF->getRegInfo(); 2185 bool IsLiveIn = MRI.isLiveIn(Reg); 2186 if (!IsLiveIn) 2187 MBB.addLiveIn(Reg); 2188 2189 if (CRSpilled && IsCRField) { 2190 CRMIB.addReg(Reg, RegState::ImplicitKill); 2191 continue; 2192 } 2193 2194 // Insert the spill to the stack frame. 2195 if (IsCRField) { 2196 PPCFunctionInfo *FuncInfo = MF->getInfo<PPCFunctionInfo>(); 2197 if (Subtarget.isPPC64()) { 2198 // The actual spill will happen at the start of the prologue. 2199 FuncInfo->addMustSaveCR(Reg); 2200 } else { 2201 CRSpilled = true; 2202 FuncInfo->setSpillsCR(); 2203 2204 // 32-bit: FP-relative. Note that we made sure CR2-CR4 all have 2205 // the same frame index in PPCRegisterInfo::hasReservedSpillSlot. 2206 CRMIB = BuildMI(*MF, DL, TII.get(PPC::MFCR), PPC::R12) 2207 .addReg(Reg, RegState::ImplicitKill); 2208 2209 MBB.insert(MI, CRMIB); 2210 MBB.insert(MI, addFrameReference(BuildMI(*MF, DL, TII.get(PPC::STW)) 2211 .addReg(PPC::R12, 2212 getKillRegState(true)), 2213 CSI[i].getFrameIdx())); 2214 } 2215 } else { 2216 if (CSI[i].isSpilledToReg()) { 2217 NumPESpillVSR++; 2218 BuildMI(MBB, MI, DL, TII.get(PPC::MTVSRD), CSI[i].getDstReg()) 2219 .addReg(Reg, getKillRegState(true)); 2220 } else { 2221 const TargetRegisterClass *RC = TRI->getMinimalPhysRegClass(Reg); 2222 // Use !IsLiveIn for the kill flag. 2223 // We do not want to kill registers that are live in this function 2224 // before their use because they will become undefined registers. 2225 TII.storeRegToStackSlot(MBB, MI, Reg, !IsLiveIn, 2226 CSI[i].getFrameIdx(), RC, TRI); 2227 } 2228 } 2229 } 2230 return true; 2231 } 2232 2233 static void 2234 restoreCRs(bool isPPC64, bool is31, 2235 bool CR2Spilled, bool CR3Spilled, bool CR4Spilled, 2236 MachineBasicBlock &MBB, MachineBasicBlock::iterator MI, 2237 const std::vector<CalleeSavedInfo> &CSI, unsigned CSIIndex) { 2238 2239 MachineFunction *MF = MBB.getParent(); 2240 const PPCInstrInfo &TII = *MF->getSubtarget<PPCSubtarget>().getInstrInfo(); 2241 DebugLoc DL; 2242 unsigned RestoreOp, MoveReg; 2243 2244 if (isPPC64) 2245 // This is handled during epilogue generation. 2246 return; 2247 else { 2248 // 32-bit: FP-relative 2249 MBB.insert(MI, addFrameReference(BuildMI(*MF, DL, TII.get(PPC::LWZ), 2250 PPC::R12), 2251 CSI[CSIIndex].getFrameIdx())); 2252 RestoreOp = PPC::MTOCRF; 2253 MoveReg = PPC::R12; 2254 } 2255 2256 if (CR2Spilled) 2257 MBB.insert(MI, BuildMI(*MF, DL, TII.get(RestoreOp), PPC::CR2) 2258 .addReg(MoveReg, getKillRegState(!CR3Spilled && !CR4Spilled))); 2259 2260 if (CR3Spilled) 2261 MBB.insert(MI, BuildMI(*MF, DL, TII.get(RestoreOp), PPC::CR3) 2262 .addReg(MoveReg, getKillRegState(!CR4Spilled))); 2263 2264 if (CR4Spilled) 2265 MBB.insert(MI, BuildMI(*MF, DL, TII.get(RestoreOp), PPC::CR4) 2266 .addReg(MoveReg, getKillRegState(true))); 2267 } 2268 2269 MachineBasicBlock::iterator PPCFrameLowering:: 2270 eliminateCallFramePseudoInstr(MachineFunction &MF, MachineBasicBlock &MBB, 2271 MachineBasicBlock::iterator I) const { 2272 const TargetInstrInfo &TII = *Subtarget.getInstrInfo(); 2273 if (MF.getTarget().Options.GuaranteedTailCallOpt && 2274 I->getOpcode() == PPC::ADJCALLSTACKUP) { 2275 // Add (actually subtract) back the amount the callee popped on return. 2276 if (int CalleeAmt = I->getOperand(1).getImm()) { 2277 bool is64Bit = Subtarget.isPPC64(); 2278 CalleeAmt *= -1; 2279 unsigned StackReg = is64Bit ? PPC::X1 : PPC::R1; 2280 unsigned TmpReg = is64Bit ? PPC::X0 : PPC::R0; 2281 unsigned ADDIInstr = is64Bit ? PPC::ADDI8 : PPC::ADDI; 2282 unsigned ADDInstr = is64Bit ? PPC::ADD8 : PPC::ADD4; 2283 unsigned LISInstr = is64Bit ? PPC::LIS8 : PPC::LIS; 2284 unsigned ORIInstr = is64Bit ? PPC::ORI8 : PPC::ORI; 2285 const DebugLoc &dl = I->getDebugLoc(); 2286 2287 if (isInt<16>(CalleeAmt)) { 2288 BuildMI(MBB, I, dl, TII.get(ADDIInstr), StackReg) 2289 .addReg(StackReg, RegState::Kill) 2290 .addImm(CalleeAmt); 2291 } else { 2292 MachineBasicBlock::iterator MBBI = I; 2293 BuildMI(MBB, MBBI, dl, TII.get(LISInstr), TmpReg) 2294 .addImm(CalleeAmt >> 16); 2295 BuildMI(MBB, MBBI, dl, TII.get(ORIInstr), TmpReg) 2296 .addReg(TmpReg, RegState::Kill) 2297 .addImm(CalleeAmt & 0xFFFF); 2298 BuildMI(MBB, MBBI, dl, TII.get(ADDInstr), StackReg) 2299 .addReg(StackReg, RegState::Kill) 2300 .addReg(TmpReg); 2301 } 2302 } 2303 } 2304 // Simply discard ADJCALLSTACKDOWN, ADJCALLSTACKUP instructions. 2305 return MBB.erase(I); 2306 } 2307 2308 bool 2309 PPCFrameLowering::restoreCalleeSavedRegisters(MachineBasicBlock &MBB, 2310 MachineBasicBlock::iterator MI, 2311 std::vector<CalleeSavedInfo> &CSI, 2312 const TargetRegisterInfo *TRI) const { 2313 2314 // Currently, this function only handles SVR4 32- and 64-bit ABIs. 2315 // Return false otherwise to maintain pre-existing behavior. 2316 if (!Subtarget.isSVR4ABI()) 2317 return false; 2318 2319 MachineFunction *MF = MBB.getParent(); 2320 const PPCInstrInfo &TII = *Subtarget.getInstrInfo(); 2321 bool CR2Spilled = false; 2322 bool CR3Spilled = false; 2323 bool CR4Spilled = false; 2324 unsigned CSIIndex = 0; 2325 2326 // Initialize insertion-point logic; we will be restoring in reverse 2327 // order of spill. 2328 MachineBasicBlock::iterator I = MI, BeforeI = I; 2329 bool AtStart = I == MBB.begin(); 2330 2331 if (!AtStart) 2332 --BeforeI; 2333 2334 for (unsigned i = 0, e = CSI.size(); i != e; ++i) { 2335 unsigned Reg = CSI[i].getReg(); 2336 2337 // Only Darwin actually uses the VRSAVE register, but it can still appear 2338 // here if, for example, @llvm.eh.unwind.init() is used. If we're not on 2339 // Darwin, ignore it. 2340 if (Reg == PPC::VRSAVE && !Subtarget.isDarwinABI()) 2341 continue; 2342 2343 if (Reg == PPC::CR2) { 2344 CR2Spilled = true; 2345 // The spill slot is associated only with CR2, which is the 2346 // first nonvolatile spilled. Save it here. 2347 CSIIndex = i; 2348 continue; 2349 } else if (Reg == PPC::CR3) { 2350 CR3Spilled = true; 2351 continue; 2352 } else if (Reg == PPC::CR4) { 2353 CR4Spilled = true; 2354 continue; 2355 } else { 2356 // When we first encounter a non-CR register after seeing at 2357 // least one CR register, restore all spilled CRs together. 2358 if ((CR2Spilled || CR3Spilled || CR4Spilled) 2359 && !(PPC::CR2 <= Reg && Reg <= PPC::CR4)) { 2360 bool is31 = needsFP(*MF); 2361 restoreCRs(Subtarget.isPPC64(), is31, 2362 CR2Spilled, CR3Spilled, CR4Spilled, 2363 MBB, I, CSI, CSIIndex); 2364 CR2Spilled = CR3Spilled = CR4Spilled = false; 2365 } 2366 2367 if (CSI[i].isSpilledToReg()) { 2368 DebugLoc DL; 2369 NumPEReloadVSR++; 2370 BuildMI(MBB, I, DL, TII.get(PPC::MFVSRD), Reg) 2371 .addReg(CSI[i].getDstReg(), getKillRegState(true)); 2372 } else { 2373 // Default behavior for non-CR saves. 2374 const TargetRegisterClass *RC = TRI->getMinimalPhysRegClass(Reg); 2375 TII.loadRegFromStackSlot(MBB, I, Reg, CSI[i].getFrameIdx(), RC, TRI); 2376 assert(I != MBB.begin() && 2377 "loadRegFromStackSlot didn't insert any code!"); 2378 } 2379 } 2380 2381 // Insert in reverse order. 2382 if (AtStart) 2383 I = MBB.begin(); 2384 else { 2385 I = BeforeI; 2386 ++I; 2387 } 2388 } 2389 2390 // If we haven't yet spilled the CRs, do so now. 2391 if (CR2Spilled || CR3Spilled || CR4Spilled) { 2392 bool is31 = needsFP(*MF); 2393 restoreCRs(Subtarget.isPPC64(), is31, CR2Spilled, CR3Spilled, CR4Spilled, 2394 MBB, I, CSI, CSIIndex); 2395 } 2396 2397 return true; 2398 } 2399 2400 bool PPCFrameLowering::enableShrinkWrapping(const MachineFunction &MF) const { 2401 if (MF.getInfo<PPCFunctionInfo>()->shrinkWrapDisabled()) 2402 return false; 2403 return (MF.getSubtarget<PPCSubtarget>().isSVR4ABI() && 2404 MF.getSubtarget<PPCSubtarget>().isPPC64()); 2405 } 2406