1 //===-- PPCFrameLowering.cpp - PPC Frame Information ----------------------===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 // 9 // This file contains the PPC implementation of TargetFrameLowering class. 10 // 11 //===----------------------------------------------------------------------===// 12 13 #include "PPCFrameLowering.h" 14 #include "PPCInstrBuilder.h" 15 #include "PPCInstrInfo.h" 16 #include "PPCMachineFunctionInfo.h" 17 #include "PPCSubtarget.h" 18 #include "PPCTargetMachine.h" 19 #include "llvm/ADT/Statistic.h" 20 #include "llvm/CodeGen/MachineFrameInfo.h" 21 #include "llvm/CodeGen/MachineFunction.h" 22 #include "llvm/CodeGen/MachineInstrBuilder.h" 23 #include "llvm/CodeGen/MachineModuleInfo.h" 24 #include "llvm/CodeGen/MachineRegisterInfo.h" 25 #include "llvm/CodeGen/RegisterScavenging.h" 26 #include "llvm/IR/Function.h" 27 #include "llvm/Target/TargetOptions.h" 28 29 using namespace llvm; 30 31 #define DEBUG_TYPE "framelowering" 32 STATISTIC(NumPESpillVSR, "Number of spills to vector in prologue"); 33 STATISTIC(NumPEReloadVSR, "Number of reloads from vector in epilogue"); 34 35 static cl::opt<bool> 36 EnablePEVectorSpills("ppc-enable-pe-vector-spills", 37 cl::desc("Enable spills in prologue to vector registers."), 38 cl::init(false), cl::Hidden); 39 40 /// VRRegNo - Map from a numbered VR register to its enum value. 41 /// 42 static const MCPhysReg VRRegNo[] = { 43 PPC::V0 , PPC::V1 , PPC::V2 , PPC::V3 , PPC::V4 , PPC::V5 , PPC::V6 , PPC::V7 , 44 PPC::V8 , PPC::V9 , PPC::V10, PPC::V11, PPC::V12, PPC::V13, PPC::V14, PPC::V15, 45 PPC::V16, PPC::V17, PPC::V18, PPC::V19, PPC::V20, PPC::V21, PPC::V22, PPC::V23, 46 PPC::V24, PPC::V25, PPC::V26, PPC::V27, PPC::V28, PPC::V29, PPC::V30, PPC::V31 47 }; 48 49 static unsigned computeReturnSaveOffset(const PPCSubtarget &STI) { 50 if (STI.isAIXABI()) 51 return STI.isPPC64() ? 16 : 8; 52 // SVR4 ABI: 53 return STI.isPPC64() ? 16 : 4; 54 } 55 56 static unsigned computeTOCSaveOffset(const PPCSubtarget &STI) { 57 if (STI.isAIXABI()) 58 return STI.isPPC64() ? 40 : 20; 59 return STI.isELFv2ABI() ? 24 : 40; 60 } 61 62 static unsigned computeFramePointerSaveOffset(const PPCSubtarget &STI) { 63 // First slot in the general register save area. 64 return STI.isPPC64() ? -8U : -4U; 65 } 66 67 static unsigned computeLinkageSize(const PPCSubtarget &STI) { 68 if (STI.isAIXABI() || STI.isPPC64()) 69 return (STI.isELFv2ABI() ? 4 : 6) * (STI.isPPC64() ? 8 : 4); 70 71 // 32-bit SVR4 ABI: 72 return 8; 73 } 74 75 static unsigned computeBasePointerSaveOffset(const PPCSubtarget &STI) { 76 // SVR4 ABI: First slot in the general register save area. 77 return STI.isPPC64() 78 ? -16U 79 : STI.getTargetMachine().isPositionIndependent() ? -12U : -8U; 80 } 81 82 static unsigned computeCRSaveOffset() { 83 // The condition register save offset needs to be updated for AIX PPC32. 84 return 8; 85 } 86 87 PPCFrameLowering::PPCFrameLowering(const PPCSubtarget &STI) 88 : TargetFrameLowering(TargetFrameLowering::StackGrowsDown, 89 STI.getPlatformStackAlignment(), 0), 90 Subtarget(STI), ReturnSaveOffset(computeReturnSaveOffset(Subtarget)), 91 TOCSaveOffset(computeTOCSaveOffset(Subtarget)), 92 FramePointerSaveOffset(computeFramePointerSaveOffset(Subtarget)), 93 LinkageSize(computeLinkageSize(Subtarget)), 94 BasePointerSaveOffset(computeBasePointerSaveOffset(Subtarget)), 95 CRSaveOffset(computeCRSaveOffset()) {} 96 97 // With the SVR4 ABI, callee-saved registers have fixed offsets on the stack. 98 const PPCFrameLowering::SpillSlot *PPCFrameLowering::getCalleeSavedSpillSlots( 99 unsigned &NumEntries) const { 100 // Early exit if not using the SVR4 ABI. 101 if (!Subtarget.isSVR4ABI()) { 102 NumEntries = 0; 103 return nullptr; 104 } 105 106 // Floating-point register save area offsets. 107 #define CALLEE_SAVED_FPRS \ 108 {PPC::F31, -8}, \ 109 {PPC::F30, -16}, \ 110 {PPC::F29, -24}, \ 111 {PPC::F28, -32}, \ 112 {PPC::F27, -40}, \ 113 {PPC::F26, -48}, \ 114 {PPC::F25, -56}, \ 115 {PPC::F24, -64}, \ 116 {PPC::F23, -72}, \ 117 {PPC::F22, -80}, \ 118 {PPC::F21, -88}, \ 119 {PPC::F20, -96}, \ 120 {PPC::F19, -104}, \ 121 {PPC::F18, -112}, \ 122 {PPC::F17, -120}, \ 123 {PPC::F16, -128}, \ 124 {PPC::F15, -136}, \ 125 {PPC::F14, -144} 126 127 // 32-bit general purpose register save area offsets. 128 #define CALLEE_SAVED_GPRS32 \ 129 {PPC::R31, -4}, \ 130 {PPC::R30, -8}, \ 131 {PPC::R29, -12}, \ 132 {PPC::R28, -16}, \ 133 {PPC::R27, -20}, \ 134 {PPC::R26, -24}, \ 135 {PPC::R25, -28}, \ 136 {PPC::R24, -32}, \ 137 {PPC::R23, -36}, \ 138 {PPC::R22, -40}, \ 139 {PPC::R21, -44}, \ 140 {PPC::R20, -48}, \ 141 {PPC::R19, -52}, \ 142 {PPC::R18, -56}, \ 143 {PPC::R17, -60}, \ 144 {PPC::R16, -64}, \ 145 {PPC::R15, -68}, \ 146 {PPC::R14, -72} 147 148 // 64-bit general purpose register save area offsets. 149 #define CALLEE_SAVED_GPRS64 \ 150 {PPC::X31, -8}, \ 151 {PPC::X30, -16}, \ 152 {PPC::X29, -24}, \ 153 {PPC::X28, -32}, \ 154 {PPC::X27, -40}, \ 155 {PPC::X26, -48}, \ 156 {PPC::X25, -56}, \ 157 {PPC::X24, -64}, \ 158 {PPC::X23, -72}, \ 159 {PPC::X22, -80}, \ 160 {PPC::X21, -88}, \ 161 {PPC::X20, -96}, \ 162 {PPC::X19, -104}, \ 163 {PPC::X18, -112}, \ 164 {PPC::X17, -120}, \ 165 {PPC::X16, -128}, \ 166 {PPC::X15, -136}, \ 167 {PPC::X14, -144} 168 169 // Vector register save area offsets. 170 #define CALLEE_SAVED_VRS \ 171 {PPC::V31, -16}, \ 172 {PPC::V30, -32}, \ 173 {PPC::V29, -48}, \ 174 {PPC::V28, -64}, \ 175 {PPC::V27, -80}, \ 176 {PPC::V26, -96}, \ 177 {PPC::V25, -112}, \ 178 {PPC::V24, -128}, \ 179 {PPC::V23, -144}, \ 180 {PPC::V22, -160}, \ 181 {PPC::V21, -176}, \ 182 {PPC::V20, -192} 183 184 // Note that the offsets here overlap, but this is fixed up in 185 // processFunctionBeforeFrameFinalized. 186 187 static const SpillSlot Offsets[] = { 188 CALLEE_SAVED_FPRS, 189 CALLEE_SAVED_GPRS32, 190 191 // CR save area offset. We map each of the nonvolatile CR fields 192 // to the slot for CR2, which is the first of the nonvolatile CR 193 // fields to be assigned, so that we only allocate one save slot. 194 // See PPCRegisterInfo::hasReservedSpillSlot() for more information. 195 {PPC::CR2, -4}, 196 197 // VRSAVE save area offset. 198 {PPC::VRSAVE, -4}, 199 200 CALLEE_SAVED_VRS, 201 202 // SPE register save area (overlaps Vector save area). 203 {PPC::S31, -8}, 204 {PPC::S30, -16}, 205 {PPC::S29, -24}, 206 {PPC::S28, -32}, 207 {PPC::S27, -40}, 208 {PPC::S26, -48}, 209 {PPC::S25, -56}, 210 {PPC::S24, -64}, 211 {PPC::S23, -72}, 212 {PPC::S22, -80}, 213 {PPC::S21, -88}, 214 {PPC::S20, -96}, 215 {PPC::S19, -104}, 216 {PPC::S18, -112}, 217 {PPC::S17, -120}, 218 {PPC::S16, -128}, 219 {PPC::S15, -136}, 220 {PPC::S14, -144}}; 221 222 static const SpillSlot Offsets64[] = { 223 CALLEE_SAVED_FPRS, 224 CALLEE_SAVED_GPRS64, 225 226 // VRSAVE save area offset. 227 {PPC::VRSAVE, -4}, 228 229 CALLEE_SAVED_VRS 230 }; 231 232 if (Subtarget.isPPC64()) { 233 NumEntries = array_lengthof(Offsets64); 234 235 return Offsets64; 236 } else { 237 NumEntries = array_lengthof(Offsets); 238 239 return Offsets; 240 } 241 } 242 243 /// RemoveVRSaveCode - We have found that this function does not need any code 244 /// to manipulate the VRSAVE register, even though it uses vector registers. 245 /// This can happen when the only registers used are known to be live in or out 246 /// of the function. Remove all of the VRSAVE related code from the function. 247 /// FIXME: The removal of the code results in a compile failure at -O0 when the 248 /// function contains a function call, as the GPR containing original VRSAVE 249 /// contents is spilled and reloaded around the call. Without the prolog code, 250 /// the spill instruction refers to an undefined register. This code needs 251 /// to account for all uses of that GPR. 252 static void RemoveVRSaveCode(MachineInstr &MI) { 253 MachineBasicBlock *Entry = MI.getParent(); 254 MachineFunction *MF = Entry->getParent(); 255 256 // We know that the MTVRSAVE instruction immediately follows MI. Remove it. 257 MachineBasicBlock::iterator MBBI = MI; 258 ++MBBI; 259 assert(MBBI != Entry->end() && MBBI->getOpcode() == PPC::MTVRSAVE); 260 MBBI->eraseFromParent(); 261 262 bool RemovedAllMTVRSAVEs = true; 263 // See if we can find and remove the MTVRSAVE instruction from all of the 264 // epilog blocks. 265 for (MachineFunction::iterator I = MF->begin(), E = MF->end(); I != E; ++I) { 266 // If last instruction is a return instruction, add an epilogue 267 if (I->isReturnBlock()) { 268 bool FoundIt = false; 269 for (MBBI = I->end(); MBBI != I->begin(); ) { 270 --MBBI; 271 if (MBBI->getOpcode() == PPC::MTVRSAVE) { 272 MBBI->eraseFromParent(); // remove it. 273 FoundIt = true; 274 break; 275 } 276 } 277 RemovedAllMTVRSAVEs &= FoundIt; 278 } 279 } 280 281 // If we found and removed all MTVRSAVE instructions, remove the read of 282 // VRSAVE as well. 283 if (RemovedAllMTVRSAVEs) { 284 MBBI = MI; 285 assert(MBBI != Entry->begin() && "UPDATE_VRSAVE is first instr in block?"); 286 --MBBI; 287 assert(MBBI->getOpcode() == PPC::MFVRSAVE && "VRSAVE instrs wandered?"); 288 MBBI->eraseFromParent(); 289 } 290 291 // Finally, nuke the UPDATE_VRSAVE. 292 MI.eraseFromParent(); 293 } 294 295 // HandleVRSaveUpdate - MI is the UPDATE_VRSAVE instruction introduced by the 296 // instruction selector. Based on the vector registers that have been used, 297 // transform this into the appropriate ORI instruction. 298 static void HandleVRSaveUpdate(MachineInstr &MI, const TargetInstrInfo &TII) { 299 MachineFunction *MF = MI.getParent()->getParent(); 300 const TargetRegisterInfo *TRI = MF->getSubtarget().getRegisterInfo(); 301 DebugLoc dl = MI.getDebugLoc(); 302 303 const MachineRegisterInfo &MRI = MF->getRegInfo(); 304 unsigned UsedRegMask = 0; 305 for (unsigned i = 0; i != 32; ++i) 306 if (MRI.isPhysRegModified(VRRegNo[i])) 307 UsedRegMask |= 1 << (31-i); 308 309 // Live in and live out values already must be in the mask, so don't bother 310 // marking them. 311 for (std::pair<unsigned, unsigned> LI : MF->getRegInfo().liveins()) { 312 unsigned RegNo = TRI->getEncodingValue(LI.first); 313 if (VRRegNo[RegNo] == LI.first) // If this really is a vector reg. 314 UsedRegMask &= ~(1 << (31-RegNo)); // Doesn't need to be marked. 315 } 316 317 // Live out registers appear as use operands on return instructions. 318 for (MachineFunction::const_iterator BI = MF->begin(), BE = MF->end(); 319 UsedRegMask != 0 && BI != BE; ++BI) { 320 const MachineBasicBlock &MBB = *BI; 321 if (!MBB.isReturnBlock()) 322 continue; 323 const MachineInstr &Ret = MBB.back(); 324 for (unsigned I = 0, E = Ret.getNumOperands(); I != E; ++I) { 325 const MachineOperand &MO = Ret.getOperand(I); 326 if (!MO.isReg() || !PPC::VRRCRegClass.contains(MO.getReg())) 327 continue; 328 unsigned RegNo = TRI->getEncodingValue(MO.getReg()); 329 UsedRegMask &= ~(1 << (31-RegNo)); 330 } 331 } 332 333 // If no registers are used, turn this into a copy. 334 if (UsedRegMask == 0) { 335 // Remove all VRSAVE code. 336 RemoveVRSaveCode(MI); 337 return; 338 } 339 340 Register SrcReg = MI.getOperand(1).getReg(); 341 Register DstReg = MI.getOperand(0).getReg(); 342 343 if ((UsedRegMask & 0xFFFF) == UsedRegMask) { 344 if (DstReg != SrcReg) 345 BuildMI(*MI.getParent(), MI, dl, TII.get(PPC::ORI), DstReg) 346 .addReg(SrcReg) 347 .addImm(UsedRegMask); 348 else 349 BuildMI(*MI.getParent(), MI, dl, TII.get(PPC::ORI), DstReg) 350 .addReg(SrcReg, RegState::Kill) 351 .addImm(UsedRegMask); 352 } else if ((UsedRegMask & 0xFFFF0000) == UsedRegMask) { 353 if (DstReg != SrcReg) 354 BuildMI(*MI.getParent(), MI, dl, TII.get(PPC::ORIS), DstReg) 355 .addReg(SrcReg) 356 .addImm(UsedRegMask >> 16); 357 else 358 BuildMI(*MI.getParent(), MI, dl, TII.get(PPC::ORIS), DstReg) 359 .addReg(SrcReg, RegState::Kill) 360 .addImm(UsedRegMask >> 16); 361 } else { 362 if (DstReg != SrcReg) 363 BuildMI(*MI.getParent(), MI, dl, TII.get(PPC::ORIS), DstReg) 364 .addReg(SrcReg) 365 .addImm(UsedRegMask >> 16); 366 else 367 BuildMI(*MI.getParent(), MI, dl, TII.get(PPC::ORIS), DstReg) 368 .addReg(SrcReg, RegState::Kill) 369 .addImm(UsedRegMask >> 16); 370 371 BuildMI(*MI.getParent(), MI, dl, TII.get(PPC::ORI), DstReg) 372 .addReg(DstReg, RegState::Kill) 373 .addImm(UsedRegMask & 0xFFFF); 374 } 375 376 // Remove the old UPDATE_VRSAVE instruction. 377 MI.eraseFromParent(); 378 } 379 380 static bool spillsCR(const MachineFunction &MF) { 381 const PPCFunctionInfo *FuncInfo = MF.getInfo<PPCFunctionInfo>(); 382 return FuncInfo->isCRSpilled(); 383 } 384 385 static bool spillsVRSAVE(const MachineFunction &MF) { 386 const PPCFunctionInfo *FuncInfo = MF.getInfo<PPCFunctionInfo>(); 387 return FuncInfo->isVRSAVESpilled(); 388 } 389 390 static bool hasSpills(const MachineFunction &MF) { 391 const PPCFunctionInfo *FuncInfo = MF.getInfo<PPCFunctionInfo>(); 392 return FuncInfo->hasSpills(); 393 } 394 395 static bool hasNonRISpills(const MachineFunction &MF) { 396 const PPCFunctionInfo *FuncInfo = MF.getInfo<PPCFunctionInfo>(); 397 return FuncInfo->hasNonRISpills(); 398 } 399 400 /// MustSaveLR - Return true if this function requires that we save the LR 401 /// register onto the stack in the prolog and restore it in the epilog of the 402 /// function. 403 static bool MustSaveLR(const MachineFunction &MF, unsigned LR) { 404 const PPCFunctionInfo *MFI = MF.getInfo<PPCFunctionInfo>(); 405 406 // We need a save/restore of LR if there is any def of LR (which is 407 // defined by calls, including the PIC setup sequence), or if there is 408 // some use of the LR stack slot (e.g. for builtin_return_address). 409 // (LR comes in 32 and 64 bit versions.) 410 MachineRegisterInfo::def_iterator RI = MF.getRegInfo().def_begin(LR); 411 return RI !=MF.getRegInfo().def_end() || MFI->isLRStoreRequired(); 412 } 413 414 /// determineFrameLayoutAndUpdate - Determine the size of the frame and maximum 415 /// call frame size. Update the MachineFunction object with the stack size. 416 unsigned 417 PPCFrameLowering::determineFrameLayoutAndUpdate(MachineFunction &MF, 418 bool UseEstimate) const { 419 unsigned NewMaxCallFrameSize = 0; 420 unsigned FrameSize = determineFrameLayout(MF, UseEstimate, 421 &NewMaxCallFrameSize); 422 MF.getFrameInfo().setStackSize(FrameSize); 423 MF.getFrameInfo().setMaxCallFrameSize(NewMaxCallFrameSize); 424 return FrameSize; 425 } 426 427 /// determineFrameLayout - Determine the size of the frame and maximum call 428 /// frame size. 429 unsigned 430 PPCFrameLowering::determineFrameLayout(const MachineFunction &MF, 431 bool UseEstimate, 432 unsigned *NewMaxCallFrameSize) const { 433 const MachineFrameInfo &MFI = MF.getFrameInfo(); 434 const PPCFunctionInfo *FI = MF.getInfo<PPCFunctionInfo>(); 435 436 // Get the number of bytes to allocate from the FrameInfo 437 unsigned FrameSize = 438 UseEstimate ? MFI.estimateStackSize(MF) : MFI.getStackSize(); 439 440 // Get stack alignments. The frame must be aligned to the greatest of these: 441 unsigned TargetAlign = getStackAlignment(); // alignment required per the ABI 442 unsigned MaxAlign = MFI.getMaxAlignment(); // algmt required by data in frame 443 unsigned AlignMask = std::max(MaxAlign, TargetAlign) - 1; 444 445 const PPCRegisterInfo *RegInfo = Subtarget.getRegisterInfo(); 446 447 unsigned LR = RegInfo->getRARegister(); 448 bool DisableRedZone = MF.getFunction().hasFnAttribute(Attribute::NoRedZone); 449 bool CanUseRedZone = !MFI.hasVarSizedObjects() && // No dynamic alloca. 450 !MFI.adjustsStack() && // No calls. 451 !MustSaveLR(MF, LR) && // No need to save LR. 452 !FI->mustSaveTOC() && // No need to save TOC. 453 !RegInfo->hasBasePointer(MF); // No special alignment. 454 455 // Note: for PPC32 SVR4ABI (Non-DarwinABI), we can still generate stackless 456 // code if all local vars are reg-allocated. 457 bool FitsInRedZone = FrameSize <= Subtarget.getRedZoneSize(); 458 459 // Check whether we can skip adjusting the stack pointer (by using red zone) 460 if (!DisableRedZone && CanUseRedZone && FitsInRedZone) { 461 // No need for frame 462 return 0; 463 } 464 465 // Get the maximum call frame size of all the calls. 466 unsigned maxCallFrameSize = MFI.getMaxCallFrameSize(); 467 468 // Maximum call frame needs to be at least big enough for linkage area. 469 unsigned minCallFrameSize = getLinkageSize(); 470 maxCallFrameSize = std::max(maxCallFrameSize, minCallFrameSize); 471 472 // If we have dynamic alloca then maxCallFrameSize needs to be aligned so 473 // that allocations will be aligned. 474 if (MFI.hasVarSizedObjects()) 475 maxCallFrameSize = (maxCallFrameSize + AlignMask) & ~AlignMask; 476 477 // Update the new max call frame size if the caller passes in a valid pointer. 478 if (NewMaxCallFrameSize) 479 *NewMaxCallFrameSize = maxCallFrameSize; 480 481 // Include call frame size in total. 482 FrameSize += maxCallFrameSize; 483 484 // Make sure the frame is aligned. 485 FrameSize = (FrameSize + AlignMask) & ~AlignMask; 486 487 return FrameSize; 488 } 489 490 // hasFP - Return true if the specified function actually has a dedicated frame 491 // pointer register. 492 bool PPCFrameLowering::hasFP(const MachineFunction &MF) const { 493 const MachineFrameInfo &MFI = MF.getFrameInfo(); 494 // FIXME: This is pretty much broken by design: hasFP() might be called really 495 // early, before the stack layout was calculated and thus hasFP() might return 496 // true or false here depending on the time of call. 497 return (MFI.getStackSize()) && needsFP(MF); 498 } 499 500 // needsFP - Return true if the specified function should have a dedicated frame 501 // pointer register. This is true if the function has variable sized allocas or 502 // if frame pointer elimination is disabled. 503 bool PPCFrameLowering::needsFP(const MachineFunction &MF) const { 504 const MachineFrameInfo &MFI = MF.getFrameInfo(); 505 506 // Naked functions have no stack frame pushed, so we don't have a frame 507 // pointer. 508 if (MF.getFunction().hasFnAttribute(Attribute::Naked)) 509 return false; 510 511 return MF.getTarget().Options.DisableFramePointerElim(MF) || 512 MFI.hasVarSizedObjects() || MFI.hasStackMap() || MFI.hasPatchPoint() || 513 (MF.getTarget().Options.GuaranteedTailCallOpt && 514 MF.getInfo<PPCFunctionInfo>()->hasFastCall()); 515 } 516 517 void PPCFrameLowering::replaceFPWithRealFP(MachineFunction &MF) const { 518 bool is31 = needsFP(MF); 519 unsigned FPReg = is31 ? PPC::R31 : PPC::R1; 520 unsigned FP8Reg = is31 ? PPC::X31 : PPC::X1; 521 522 const PPCRegisterInfo *RegInfo = Subtarget.getRegisterInfo(); 523 bool HasBP = RegInfo->hasBasePointer(MF); 524 unsigned BPReg = HasBP ? (unsigned) RegInfo->getBaseRegister(MF) : FPReg; 525 unsigned BP8Reg = HasBP ? (unsigned) PPC::X30 : FP8Reg; 526 527 for (MachineFunction::iterator BI = MF.begin(), BE = MF.end(); 528 BI != BE; ++BI) 529 for (MachineBasicBlock::iterator MBBI = BI->end(); MBBI != BI->begin(); ) { 530 --MBBI; 531 for (unsigned I = 0, E = MBBI->getNumOperands(); I != E; ++I) { 532 MachineOperand &MO = MBBI->getOperand(I); 533 if (!MO.isReg()) 534 continue; 535 536 switch (MO.getReg()) { 537 case PPC::FP: 538 MO.setReg(FPReg); 539 break; 540 case PPC::FP8: 541 MO.setReg(FP8Reg); 542 break; 543 case PPC::BP: 544 MO.setReg(BPReg); 545 break; 546 case PPC::BP8: 547 MO.setReg(BP8Reg); 548 break; 549 550 } 551 } 552 } 553 } 554 555 /* This function will do the following: 556 - If MBB is an entry or exit block, set SR1 and SR2 to R0 and R12 557 respectively (defaults recommended by the ABI) and return true 558 - If MBB is not an entry block, initialize the register scavenger and look 559 for available registers. 560 - If the defaults (R0/R12) are available, return true 561 - If TwoUniqueRegsRequired is set to true, it looks for two unique 562 registers. Otherwise, look for a single available register. 563 - If the required registers are found, set SR1 and SR2 and return true. 564 - If the required registers are not found, set SR2 or both SR1 and SR2 to 565 PPC::NoRegister and return false. 566 567 Note that if both SR1 and SR2 are valid parameters and TwoUniqueRegsRequired 568 is not set, this function will attempt to find two different registers, but 569 still return true if only one register is available (and set SR1 == SR2). 570 */ 571 bool 572 PPCFrameLowering::findScratchRegister(MachineBasicBlock *MBB, 573 bool UseAtEnd, 574 bool TwoUniqueRegsRequired, 575 unsigned *SR1, 576 unsigned *SR2) const { 577 RegScavenger RS; 578 unsigned R0 = Subtarget.isPPC64() ? PPC::X0 : PPC::R0; 579 unsigned R12 = Subtarget.isPPC64() ? PPC::X12 : PPC::R12; 580 581 // Set the defaults for the two scratch registers. 582 if (SR1) 583 *SR1 = R0; 584 585 if (SR2) { 586 assert (SR1 && "Asking for the second scratch register but not the first?"); 587 *SR2 = R12; 588 } 589 590 // If MBB is an entry or exit block, use R0 and R12 as the scratch registers. 591 if ((UseAtEnd && MBB->isReturnBlock()) || 592 (!UseAtEnd && (&MBB->getParent()->front() == MBB))) 593 return true; 594 595 RS.enterBasicBlock(*MBB); 596 597 if (UseAtEnd && !MBB->empty()) { 598 // The scratch register will be used at the end of the block, so must 599 // consider all registers used within the block 600 601 MachineBasicBlock::iterator MBBI = MBB->getFirstTerminator(); 602 // If no terminator, back iterator up to previous instruction. 603 if (MBBI == MBB->end()) 604 MBBI = std::prev(MBBI); 605 606 if (MBBI != MBB->begin()) 607 RS.forward(MBBI); 608 } 609 610 // If the two registers are available, we're all good. 611 // Note that we only return here if both R0 and R12 are available because 612 // although the function may not require two unique registers, it may benefit 613 // from having two so we should try to provide them. 614 if (!RS.isRegUsed(R0) && !RS.isRegUsed(R12)) 615 return true; 616 617 // Get the list of callee-saved registers for the target. 618 const PPCRegisterInfo *RegInfo = Subtarget.getRegisterInfo(); 619 const MCPhysReg *CSRegs = RegInfo->getCalleeSavedRegs(MBB->getParent()); 620 621 // Get all the available registers in the block. 622 BitVector BV = RS.getRegsAvailable(Subtarget.isPPC64() ? &PPC::G8RCRegClass : 623 &PPC::GPRCRegClass); 624 625 // We shouldn't use callee-saved registers as scratch registers as they may be 626 // available when looking for a candidate block for shrink wrapping but not 627 // available when the actual prologue/epilogue is being emitted because they 628 // were added as live-in to the prologue block by PrologueEpilogueInserter. 629 for (int i = 0; CSRegs[i]; ++i) 630 BV.reset(CSRegs[i]); 631 632 // Set the first scratch register to the first available one. 633 if (SR1) { 634 int FirstScratchReg = BV.find_first(); 635 *SR1 = FirstScratchReg == -1 ? (unsigned)PPC::NoRegister : FirstScratchReg; 636 } 637 638 // If there is another one available, set the second scratch register to that. 639 // Otherwise, set it to either PPC::NoRegister if this function requires two 640 // or to whatever SR1 is set to if this function doesn't require two. 641 if (SR2) { 642 int SecondScratchReg = BV.find_next(*SR1); 643 if (SecondScratchReg != -1) 644 *SR2 = SecondScratchReg; 645 else 646 *SR2 = TwoUniqueRegsRequired ? (unsigned)PPC::NoRegister : *SR1; 647 } 648 649 // Now that we've done our best to provide both registers, double check 650 // whether we were unable to provide enough. 651 if (BV.count() < (TwoUniqueRegsRequired ? 2U : 1U)) 652 return false; 653 654 return true; 655 } 656 657 // We need a scratch register for spilling LR and for spilling CR. By default, 658 // we use two scratch registers to hide latency. However, if only one scratch 659 // register is available, we can adjust for that by not overlapping the spill 660 // code. However, if we need to realign the stack (i.e. have a base pointer) 661 // and the stack frame is large, we need two scratch registers. 662 bool 663 PPCFrameLowering::twoUniqueScratchRegsRequired(MachineBasicBlock *MBB) const { 664 const PPCRegisterInfo *RegInfo = Subtarget.getRegisterInfo(); 665 MachineFunction &MF = *(MBB->getParent()); 666 bool HasBP = RegInfo->hasBasePointer(MF); 667 unsigned FrameSize = determineFrameLayout(MF); 668 int NegFrameSize = -FrameSize; 669 bool IsLargeFrame = !isInt<16>(NegFrameSize); 670 MachineFrameInfo &MFI = MF.getFrameInfo(); 671 unsigned MaxAlign = MFI.getMaxAlignment(); 672 bool HasRedZone = Subtarget.isPPC64() || !Subtarget.isSVR4ABI(); 673 674 return (IsLargeFrame || !HasRedZone) && HasBP && MaxAlign > 1; 675 } 676 677 bool PPCFrameLowering::canUseAsPrologue(const MachineBasicBlock &MBB) const { 678 MachineBasicBlock *TmpMBB = const_cast<MachineBasicBlock *>(&MBB); 679 680 return findScratchRegister(TmpMBB, false, 681 twoUniqueScratchRegsRequired(TmpMBB)); 682 } 683 684 bool PPCFrameLowering::canUseAsEpilogue(const MachineBasicBlock &MBB) const { 685 MachineBasicBlock *TmpMBB = const_cast<MachineBasicBlock *>(&MBB); 686 687 return findScratchRegister(TmpMBB, true); 688 } 689 690 bool PPCFrameLowering::stackUpdateCanBeMoved(MachineFunction &MF) const { 691 const PPCRegisterInfo *RegInfo = Subtarget.getRegisterInfo(); 692 PPCFunctionInfo *FI = MF.getInfo<PPCFunctionInfo>(); 693 694 // Abort if there is no register info or function info. 695 if (!RegInfo || !FI) 696 return false; 697 698 // Only move the stack update on ELFv2 ABI and PPC64. 699 if (!Subtarget.isELFv2ABI() || !Subtarget.isPPC64()) 700 return false; 701 702 // Check the frame size first and return false if it does not fit the 703 // requirements. 704 // We need a non-zero frame size as well as a frame that will fit in the red 705 // zone. This is because by moving the stack pointer update we are now storing 706 // to the red zone until the stack pointer is updated. If we get an interrupt 707 // inside the prologue but before the stack update we now have a number of 708 // stores to the red zone and those stores must all fit. 709 MachineFrameInfo &MFI = MF.getFrameInfo(); 710 unsigned FrameSize = MFI.getStackSize(); 711 if (!FrameSize || FrameSize > Subtarget.getRedZoneSize()) 712 return false; 713 714 // Frame pointers and base pointers complicate matters so don't do anything 715 // if we have them. For example having a frame pointer will sometimes require 716 // a copy of r1 into r31 and that makes keeping track of updates to r1 more 717 // difficult. 718 if (hasFP(MF) || RegInfo->hasBasePointer(MF)) 719 return false; 720 721 // Calls to fast_cc functions use different rules for passing parameters on 722 // the stack from the ABI and using PIC base in the function imposes 723 // similar restrictions to using the base pointer. It is not generally safe 724 // to move the stack pointer update in these situations. 725 if (FI->hasFastCall() || FI->usesPICBase()) 726 return false; 727 728 // Finally we can move the stack update if we do not require register 729 // scavenging. Register scavenging can introduce more spills and so 730 // may make the frame size larger than we have computed. 731 return !RegInfo->requiresFrameIndexScavenging(MF); 732 } 733 734 void PPCFrameLowering::emitPrologue(MachineFunction &MF, 735 MachineBasicBlock &MBB) const { 736 MachineBasicBlock::iterator MBBI = MBB.begin(); 737 MachineFrameInfo &MFI = MF.getFrameInfo(); 738 const PPCInstrInfo &TII = *Subtarget.getInstrInfo(); 739 const PPCRegisterInfo *RegInfo = Subtarget.getRegisterInfo(); 740 741 MachineModuleInfo &MMI = MF.getMMI(); 742 const MCRegisterInfo *MRI = MMI.getContext().getRegisterInfo(); 743 DebugLoc dl; 744 bool needsCFI = MF.needsFrameMoves(); 745 746 // Get processor type. 747 bool isPPC64 = Subtarget.isPPC64(); 748 // Get the ABI. 749 bool isSVR4ABI = Subtarget.isSVR4ABI(); 750 bool isAIXABI = Subtarget.isAIXABI(); 751 bool isELFv2ABI = Subtarget.isELFv2ABI(); 752 assert((isSVR4ABI || isAIXABI) && "Unsupported PPC ABI."); 753 754 // Scan the prolog, looking for an UPDATE_VRSAVE instruction. If we find it, 755 // process it. 756 if (!isSVR4ABI) 757 for (unsigned i = 0; MBBI != MBB.end(); ++i, ++MBBI) { 758 if (MBBI->getOpcode() == PPC::UPDATE_VRSAVE) { 759 if (isAIXABI) 760 report_fatal_error("UPDATE_VRSAVE is unexpected on AIX."); 761 HandleVRSaveUpdate(*MBBI, TII); 762 break; 763 } 764 } 765 766 // Move MBBI back to the beginning of the prologue block. 767 MBBI = MBB.begin(); 768 769 // Work out frame sizes. 770 unsigned FrameSize = determineFrameLayoutAndUpdate(MF); 771 int NegFrameSize = -FrameSize; 772 if (!isInt<32>(NegFrameSize)) 773 llvm_unreachable("Unhandled stack size!"); 774 775 if (MFI.isFrameAddressTaken()) 776 replaceFPWithRealFP(MF); 777 778 // Check if the link register (LR) must be saved. 779 PPCFunctionInfo *FI = MF.getInfo<PPCFunctionInfo>(); 780 bool MustSaveLR = FI->mustSaveLR(); 781 bool MustSaveTOC = FI->mustSaveTOC(); 782 const SmallVectorImpl<unsigned> &MustSaveCRs = FI->getMustSaveCRs(); 783 bool MustSaveCR = !MustSaveCRs.empty(); 784 // Do we have a frame pointer and/or base pointer for this function? 785 bool HasFP = hasFP(MF); 786 bool HasBP = RegInfo->hasBasePointer(MF); 787 bool HasRedZone = isPPC64 || !isSVR4ABI; 788 789 unsigned SPReg = isPPC64 ? PPC::X1 : PPC::R1; 790 Register BPReg = RegInfo->getBaseRegister(MF); 791 unsigned FPReg = isPPC64 ? PPC::X31 : PPC::R31; 792 unsigned LRReg = isPPC64 ? PPC::LR8 : PPC::LR; 793 unsigned TOCReg = isPPC64 ? PPC::X2 : PPC::R2; 794 unsigned ScratchReg = 0; 795 unsigned TempReg = isPPC64 ? PPC::X12 : PPC::R12; // another scratch reg 796 // ...(R12/X12 is volatile in both Darwin & SVR4, & can't be a function arg.) 797 const MCInstrDesc& MFLRInst = TII.get(isPPC64 ? PPC::MFLR8 798 : PPC::MFLR ); 799 const MCInstrDesc& StoreInst = TII.get(isPPC64 ? PPC::STD 800 : PPC::STW ); 801 const MCInstrDesc& StoreUpdtInst = TII.get(isPPC64 ? PPC::STDU 802 : PPC::STWU ); 803 const MCInstrDesc& StoreUpdtIdxInst = TII.get(isPPC64 ? PPC::STDUX 804 : PPC::STWUX); 805 const MCInstrDesc& LoadImmShiftedInst = TII.get(isPPC64 ? PPC::LIS8 806 : PPC::LIS ); 807 const MCInstrDesc& OrImmInst = TII.get(isPPC64 ? PPC::ORI8 808 : PPC::ORI ); 809 const MCInstrDesc& OrInst = TII.get(isPPC64 ? PPC::OR8 810 : PPC::OR ); 811 const MCInstrDesc& SubtractCarryingInst = TII.get(isPPC64 ? PPC::SUBFC8 812 : PPC::SUBFC); 813 const MCInstrDesc& SubtractImmCarryingInst = TII.get(isPPC64 ? PPC::SUBFIC8 814 : PPC::SUBFIC); 815 816 // Regarding this assert: Even though LR is saved in the caller's frame (i.e., 817 // LROffset is positive), that slot is callee-owned. Because PPC32 SVR4 has no 818 // Red Zone, an asynchronous event (a form of "callee") could claim a frame & 819 // overwrite it, so PPC32 SVR4 must claim at least a minimal frame to save LR. 820 assert((isPPC64 || !isSVR4ABI || !(!FrameSize && (MustSaveLR || HasFP))) && 821 "FrameSize must be >0 to save/restore the FP or LR for 32-bit SVR4."); 822 823 // Using the same bool variable as below to suppress compiler warnings. 824 bool SingleScratchReg = 825 findScratchRegister(&MBB, false, twoUniqueScratchRegsRequired(&MBB), 826 &ScratchReg, &TempReg); 827 assert(SingleScratchReg && 828 "Required number of registers not available in this block"); 829 830 SingleScratchReg = ScratchReg == TempReg; 831 832 int LROffset = getReturnSaveOffset(); 833 834 int FPOffset = 0; 835 if (HasFP) { 836 if (isSVR4ABI) { 837 MachineFrameInfo &MFI = MF.getFrameInfo(); 838 int FPIndex = FI->getFramePointerSaveIndex(); 839 assert(FPIndex && "No Frame Pointer Save Slot!"); 840 FPOffset = MFI.getObjectOffset(FPIndex); 841 } else { 842 FPOffset = getFramePointerSaveOffset(); 843 } 844 } 845 846 int BPOffset = 0; 847 if (HasBP) { 848 if (isSVR4ABI) { 849 MachineFrameInfo &MFI = MF.getFrameInfo(); 850 int BPIndex = FI->getBasePointerSaveIndex(); 851 assert(BPIndex && "No Base Pointer Save Slot!"); 852 BPOffset = MFI.getObjectOffset(BPIndex); 853 } else { 854 BPOffset = getBasePointerSaveOffset(); 855 } 856 } 857 858 int PBPOffset = 0; 859 if (FI->usesPICBase()) { 860 MachineFrameInfo &MFI = MF.getFrameInfo(); 861 int PBPIndex = FI->getPICBasePointerSaveIndex(); 862 assert(PBPIndex && "No PIC Base Pointer Save Slot!"); 863 PBPOffset = MFI.getObjectOffset(PBPIndex); 864 } 865 866 // Get stack alignments. 867 unsigned MaxAlign = MFI.getMaxAlignment(); 868 if (HasBP && MaxAlign > 1) 869 assert(isPowerOf2_32(MaxAlign) && isInt<16>(MaxAlign) && 870 "Invalid alignment!"); 871 872 // Frames of 32KB & larger require special handling because they cannot be 873 // indexed into with a simple STDU/STWU/STD/STW immediate offset operand. 874 bool isLargeFrame = !isInt<16>(NegFrameSize); 875 876 assert((isPPC64 || !MustSaveCR) && 877 "Prologue CR saving supported only in 64-bit mode"); 878 879 if (MustSaveCR && isAIXABI) 880 report_fatal_error("Prologue CR saving is unimplemented on AIX."); 881 882 // Check if we can move the stack update instruction (stdu) down the prologue 883 // past the callee saves. Hopefully this will avoid the situation where the 884 // saves are waiting for the update on the store with update to complete. 885 MachineBasicBlock::iterator StackUpdateLoc = MBBI; 886 bool MovingStackUpdateDown = false; 887 888 // Check if we can move the stack update. 889 if (stackUpdateCanBeMoved(MF)) { 890 const std::vector<CalleeSavedInfo> &Info = MFI.getCalleeSavedInfo(); 891 for (CalleeSavedInfo CSI : Info) { 892 int FrIdx = CSI.getFrameIdx(); 893 // If the frame index is not negative the callee saved info belongs to a 894 // stack object that is not a fixed stack object. We ignore non-fixed 895 // stack objects because we won't move the stack update pointer past them. 896 if (FrIdx >= 0) 897 continue; 898 899 if (MFI.isFixedObjectIndex(FrIdx) && MFI.getObjectOffset(FrIdx) < 0) { 900 StackUpdateLoc++; 901 MovingStackUpdateDown = true; 902 } else { 903 // We need all of the Frame Indices to meet these conditions. 904 // If they do not, abort the whole operation. 905 StackUpdateLoc = MBBI; 906 MovingStackUpdateDown = false; 907 break; 908 } 909 } 910 911 // If the operation was not aborted then update the object offset. 912 if (MovingStackUpdateDown) { 913 for (CalleeSavedInfo CSI : Info) { 914 int FrIdx = CSI.getFrameIdx(); 915 if (FrIdx < 0) 916 MFI.setObjectOffset(FrIdx, MFI.getObjectOffset(FrIdx) + NegFrameSize); 917 } 918 } 919 } 920 921 // If we need to spill the CR and the LR but we don't have two separate 922 // registers available, we must spill them one at a time 923 if (MustSaveCR && SingleScratchReg && MustSaveLR) { 924 // In the ELFv2 ABI, we are not required to save all CR fields. 925 // If only one or two CR fields are clobbered, it is more efficient to use 926 // mfocrf to selectively save just those fields, because mfocrf has short 927 // latency compares to mfcr. 928 unsigned MfcrOpcode = PPC::MFCR8; 929 unsigned CrState = RegState::ImplicitKill; 930 if (isELFv2ABI && MustSaveCRs.size() == 1) { 931 MfcrOpcode = PPC::MFOCRF8; 932 CrState = RegState::Kill; 933 } 934 MachineInstrBuilder MIB = 935 BuildMI(MBB, MBBI, dl, TII.get(MfcrOpcode), TempReg); 936 for (unsigned i = 0, e = MustSaveCRs.size(); i != e; ++i) 937 MIB.addReg(MustSaveCRs[i], CrState); 938 BuildMI(MBB, MBBI, dl, TII.get(PPC::STW8)) 939 .addReg(TempReg, getKillRegState(true)) 940 .addImm(getCRSaveOffset()) 941 .addReg(SPReg); 942 } 943 944 if (MustSaveLR) 945 BuildMI(MBB, MBBI, dl, MFLRInst, ScratchReg); 946 947 if (MustSaveCR && 948 !(SingleScratchReg && MustSaveLR)) { // will only occur for PPC64 949 // In the ELFv2 ABI, we are not required to save all CR fields. 950 // If only one or two CR fields are clobbered, it is more efficient to use 951 // mfocrf to selectively save just those fields, because mfocrf has short 952 // latency compares to mfcr. 953 unsigned MfcrOpcode = PPC::MFCR8; 954 unsigned CrState = RegState::ImplicitKill; 955 if (isELFv2ABI && MustSaveCRs.size() == 1) { 956 MfcrOpcode = PPC::MFOCRF8; 957 CrState = RegState::Kill; 958 } 959 MachineInstrBuilder MIB = 960 BuildMI(MBB, MBBI, dl, TII.get(MfcrOpcode), TempReg); 961 for (unsigned i = 0, e = MustSaveCRs.size(); i != e; ++i) 962 MIB.addReg(MustSaveCRs[i], CrState); 963 } 964 965 if (HasRedZone) { 966 if (HasFP) 967 BuildMI(MBB, MBBI, dl, StoreInst) 968 .addReg(FPReg) 969 .addImm(FPOffset) 970 .addReg(SPReg); 971 if (FI->usesPICBase()) 972 BuildMI(MBB, MBBI, dl, StoreInst) 973 .addReg(PPC::R30) 974 .addImm(PBPOffset) 975 .addReg(SPReg); 976 if (HasBP) 977 BuildMI(MBB, MBBI, dl, StoreInst) 978 .addReg(BPReg) 979 .addImm(BPOffset) 980 .addReg(SPReg); 981 } 982 983 if (MustSaveLR) 984 BuildMI(MBB, StackUpdateLoc, dl, StoreInst) 985 .addReg(ScratchReg, getKillRegState(true)) 986 .addImm(LROffset) 987 .addReg(SPReg); 988 989 if (MustSaveCR && 990 !(SingleScratchReg && MustSaveLR)) { // will only occur for PPC64 991 assert(HasRedZone && "A red zone is always available on PPC64"); 992 BuildMI(MBB, MBBI, dl, TII.get(PPC::STW8)) 993 .addReg(TempReg, getKillRegState(true)) 994 .addImm(getCRSaveOffset()) 995 .addReg(SPReg); 996 } 997 998 // Skip the rest if this is a leaf function & all spills fit in the Red Zone. 999 if (!FrameSize) 1000 return; 1001 1002 // Adjust stack pointer: r1 += NegFrameSize. 1003 // If there is a preferred stack alignment, align R1 now 1004 1005 if (HasBP && HasRedZone) { 1006 // Save a copy of r1 as the base pointer. 1007 BuildMI(MBB, MBBI, dl, OrInst, BPReg) 1008 .addReg(SPReg) 1009 .addReg(SPReg); 1010 } 1011 1012 // Have we generated a STUX instruction to claim stack frame? If so, 1013 // the negated frame size will be placed in ScratchReg. 1014 bool HasSTUX = false; 1015 1016 // This condition must be kept in sync with canUseAsPrologue. 1017 if (HasBP && MaxAlign > 1) { 1018 if (isPPC64) 1019 BuildMI(MBB, MBBI, dl, TII.get(PPC::RLDICL), ScratchReg) 1020 .addReg(SPReg) 1021 .addImm(0) 1022 .addImm(64 - Log2_32(MaxAlign)); 1023 else // PPC32... 1024 BuildMI(MBB, MBBI, dl, TII.get(PPC::RLWINM), ScratchReg) 1025 .addReg(SPReg) 1026 .addImm(0) 1027 .addImm(32 - Log2_32(MaxAlign)) 1028 .addImm(31); 1029 if (!isLargeFrame) { 1030 BuildMI(MBB, MBBI, dl, SubtractImmCarryingInst, ScratchReg) 1031 .addReg(ScratchReg, RegState::Kill) 1032 .addImm(NegFrameSize); 1033 } else { 1034 assert(!SingleScratchReg && "Only a single scratch reg available"); 1035 BuildMI(MBB, MBBI, dl, LoadImmShiftedInst, TempReg) 1036 .addImm(NegFrameSize >> 16); 1037 BuildMI(MBB, MBBI, dl, OrImmInst, TempReg) 1038 .addReg(TempReg, RegState::Kill) 1039 .addImm(NegFrameSize & 0xFFFF); 1040 BuildMI(MBB, MBBI, dl, SubtractCarryingInst, ScratchReg) 1041 .addReg(ScratchReg, RegState::Kill) 1042 .addReg(TempReg, RegState::Kill); 1043 } 1044 1045 BuildMI(MBB, MBBI, dl, StoreUpdtIdxInst, SPReg) 1046 .addReg(SPReg, RegState::Kill) 1047 .addReg(SPReg) 1048 .addReg(ScratchReg); 1049 HasSTUX = true; 1050 1051 } else if (!isLargeFrame) { 1052 BuildMI(MBB, StackUpdateLoc, dl, StoreUpdtInst, SPReg) 1053 .addReg(SPReg) 1054 .addImm(NegFrameSize) 1055 .addReg(SPReg); 1056 1057 } else { 1058 BuildMI(MBB, MBBI, dl, LoadImmShiftedInst, ScratchReg) 1059 .addImm(NegFrameSize >> 16); 1060 BuildMI(MBB, MBBI, dl, OrImmInst, ScratchReg) 1061 .addReg(ScratchReg, RegState::Kill) 1062 .addImm(NegFrameSize & 0xFFFF); 1063 BuildMI(MBB, MBBI, dl, StoreUpdtIdxInst, SPReg) 1064 .addReg(SPReg, RegState::Kill) 1065 .addReg(SPReg) 1066 .addReg(ScratchReg); 1067 HasSTUX = true; 1068 } 1069 1070 // Save the TOC register after the stack pointer update if a prologue TOC 1071 // save is required for the function. 1072 if (MustSaveTOC) { 1073 assert(isELFv2ABI && "TOC saves in the prologue only supported on ELFv2"); 1074 BuildMI(MBB, StackUpdateLoc, dl, TII.get(PPC::STD)) 1075 .addReg(TOCReg, getKillRegState(true)) 1076 .addImm(TOCSaveOffset) 1077 .addReg(SPReg); 1078 } 1079 1080 if (!HasRedZone) { 1081 assert(!isPPC64 && "A red zone is always available on PPC64"); 1082 if (HasSTUX) { 1083 // The negated frame size is in ScratchReg, and the SPReg has been 1084 // decremented by the frame size: SPReg = old SPReg + ScratchReg. 1085 // Since FPOffset, PBPOffset, etc. are relative to the beginning of 1086 // the stack frame (i.e. the old SP), ideally, we would put the old 1087 // SP into a register and use it as the base for the stores. The 1088 // problem is that the only available register may be ScratchReg, 1089 // which could be R0, and R0 cannot be used as a base address. 1090 1091 // First, set ScratchReg to the old SP. This may need to be modified 1092 // later. 1093 BuildMI(MBB, MBBI, dl, TII.get(PPC::SUBF), ScratchReg) 1094 .addReg(ScratchReg, RegState::Kill) 1095 .addReg(SPReg); 1096 1097 if (ScratchReg == PPC::R0) { 1098 // R0 cannot be used as a base register, but it can be used as an 1099 // index in a store-indexed. 1100 int LastOffset = 0; 1101 if (HasFP) { 1102 // R0 += (FPOffset-LastOffset). 1103 // Need addic, since addi treats R0 as 0. 1104 BuildMI(MBB, MBBI, dl, TII.get(PPC::ADDIC), ScratchReg) 1105 .addReg(ScratchReg) 1106 .addImm(FPOffset-LastOffset); 1107 LastOffset = FPOffset; 1108 // Store FP into *R0. 1109 BuildMI(MBB, MBBI, dl, TII.get(PPC::STWX)) 1110 .addReg(FPReg, RegState::Kill) // Save FP. 1111 .addReg(PPC::ZERO) 1112 .addReg(ScratchReg); // This will be the index (R0 is ok here). 1113 } 1114 if (FI->usesPICBase()) { 1115 // R0 += (PBPOffset-LastOffset). 1116 BuildMI(MBB, MBBI, dl, TII.get(PPC::ADDIC), ScratchReg) 1117 .addReg(ScratchReg) 1118 .addImm(PBPOffset-LastOffset); 1119 LastOffset = PBPOffset; 1120 BuildMI(MBB, MBBI, dl, TII.get(PPC::STWX)) 1121 .addReg(PPC::R30, RegState::Kill) // Save PIC base pointer. 1122 .addReg(PPC::ZERO) 1123 .addReg(ScratchReg); // This will be the index (R0 is ok here). 1124 } 1125 if (HasBP) { 1126 // R0 += (BPOffset-LastOffset). 1127 BuildMI(MBB, MBBI, dl, TII.get(PPC::ADDIC), ScratchReg) 1128 .addReg(ScratchReg) 1129 .addImm(BPOffset-LastOffset); 1130 LastOffset = BPOffset; 1131 BuildMI(MBB, MBBI, dl, TII.get(PPC::STWX)) 1132 .addReg(BPReg, RegState::Kill) // Save BP. 1133 .addReg(PPC::ZERO) 1134 .addReg(ScratchReg); // This will be the index (R0 is ok here). 1135 // BP = R0-LastOffset 1136 BuildMI(MBB, MBBI, dl, TII.get(PPC::ADDIC), BPReg) 1137 .addReg(ScratchReg, RegState::Kill) 1138 .addImm(-LastOffset); 1139 } 1140 } else { 1141 // ScratchReg is not R0, so use it as the base register. It is 1142 // already set to the old SP, so we can use the offsets directly. 1143 1144 // Now that the stack frame has been allocated, save all the necessary 1145 // registers using ScratchReg as the base address. 1146 if (HasFP) 1147 BuildMI(MBB, MBBI, dl, StoreInst) 1148 .addReg(FPReg) 1149 .addImm(FPOffset) 1150 .addReg(ScratchReg); 1151 if (FI->usesPICBase()) 1152 BuildMI(MBB, MBBI, dl, StoreInst) 1153 .addReg(PPC::R30) 1154 .addImm(PBPOffset) 1155 .addReg(ScratchReg); 1156 if (HasBP) { 1157 BuildMI(MBB, MBBI, dl, StoreInst) 1158 .addReg(BPReg) 1159 .addImm(BPOffset) 1160 .addReg(ScratchReg); 1161 BuildMI(MBB, MBBI, dl, OrInst, BPReg) 1162 .addReg(ScratchReg, RegState::Kill) 1163 .addReg(ScratchReg); 1164 } 1165 } 1166 } else { 1167 // The frame size is a known 16-bit constant (fitting in the immediate 1168 // field of STWU). To be here we have to be compiling for PPC32. 1169 // Since the SPReg has been decreased by FrameSize, add it back to each 1170 // offset. 1171 if (HasFP) 1172 BuildMI(MBB, MBBI, dl, StoreInst) 1173 .addReg(FPReg) 1174 .addImm(FrameSize + FPOffset) 1175 .addReg(SPReg); 1176 if (FI->usesPICBase()) 1177 BuildMI(MBB, MBBI, dl, StoreInst) 1178 .addReg(PPC::R30) 1179 .addImm(FrameSize + PBPOffset) 1180 .addReg(SPReg); 1181 if (HasBP) { 1182 BuildMI(MBB, MBBI, dl, StoreInst) 1183 .addReg(BPReg) 1184 .addImm(FrameSize + BPOffset) 1185 .addReg(SPReg); 1186 BuildMI(MBB, MBBI, dl, TII.get(PPC::ADDI), BPReg) 1187 .addReg(SPReg) 1188 .addImm(FrameSize); 1189 } 1190 } 1191 } 1192 1193 // Add Call Frame Information for the instructions we generated above. 1194 if (needsCFI) { 1195 unsigned CFIIndex; 1196 1197 if (HasBP) { 1198 // Define CFA in terms of BP. Do this in preference to using FP/SP, 1199 // because if the stack needed aligning then CFA won't be at a fixed 1200 // offset from FP/SP. 1201 unsigned Reg = MRI->getDwarfRegNum(BPReg, true); 1202 CFIIndex = MF.addFrameInst( 1203 MCCFIInstruction::createDefCfaRegister(nullptr, Reg)); 1204 } else { 1205 // Adjust the definition of CFA to account for the change in SP. 1206 assert(NegFrameSize); 1207 CFIIndex = MF.addFrameInst( 1208 MCCFIInstruction::createDefCfaOffset(nullptr, NegFrameSize)); 1209 } 1210 BuildMI(MBB, MBBI, dl, TII.get(TargetOpcode::CFI_INSTRUCTION)) 1211 .addCFIIndex(CFIIndex); 1212 1213 if (HasFP) { 1214 // Describe where FP was saved, at a fixed offset from CFA. 1215 unsigned Reg = MRI->getDwarfRegNum(FPReg, true); 1216 CFIIndex = MF.addFrameInst( 1217 MCCFIInstruction::createOffset(nullptr, Reg, FPOffset)); 1218 BuildMI(MBB, MBBI, dl, TII.get(TargetOpcode::CFI_INSTRUCTION)) 1219 .addCFIIndex(CFIIndex); 1220 } 1221 1222 if (FI->usesPICBase()) { 1223 // Describe where FP was saved, at a fixed offset from CFA. 1224 unsigned Reg = MRI->getDwarfRegNum(PPC::R30, true); 1225 CFIIndex = MF.addFrameInst( 1226 MCCFIInstruction::createOffset(nullptr, Reg, PBPOffset)); 1227 BuildMI(MBB, MBBI, dl, TII.get(TargetOpcode::CFI_INSTRUCTION)) 1228 .addCFIIndex(CFIIndex); 1229 } 1230 1231 if (HasBP) { 1232 // Describe where BP was saved, at a fixed offset from CFA. 1233 unsigned Reg = MRI->getDwarfRegNum(BPReg, true); 1234 CFIIndex = MF.addFrameInst( 1235 MCCFIInstruction::createOffset(nullptr, Reg, BPOffset)); 1236 BuildMI(MBB, MBBI, dl, TII.get(TargetOpcode::CFI_INSTRUCTION)) 1237 .addCFIIndex(CFIIndex); 1238 } 1239 1240 if (MustSaveLR) { 1241 // Describe where LR was saved, at a fixed offset from CFA. 1242 unsigned Reg = MRI->getDwarfRegNum(LRReg, true); 1243 CFIIndex = MF.addFrameInst( 1244 MCCFIInstruction::createOffset(nullptr, Reg, LROffset)); 1245 BuildMI(MBB, MBBI, dl, TII.get(TargetOpcode::CFI_INSTRUCTION)) 1246 .addCFIIndex(CFIIndex); 1247 } 1248 } 1249 1250 // If there is a frame pointer, copy R1 into R31 1251 if (HasFP) { 1252 BuildMI(MBB, MBBI, dl, OrInst, FPReg) 1253 .addReg(SPReg) 1254 .addReg(SPReg); 1255 1256 if (!HasBP && needsCFI) { 1257 // Change the definition of CFA from SP+offset to FP+offset, because SP 1258 // will change at every alloca. 1259 unsigned Reg = MRI->getDwarfRegNum(FPReg, true); 1260 unsigned CFIIndex = MF.addFrameInst( 1261 MCCFIInstruction::createDefCfaRegister(nullptr, Reg)); 1262 1263 BuildMI(MBB, MBBI, dl, TII.get(TargetOpcode::CFI_INSTRUCTION)) 1264 .addCFIIndex(CFIIndex); 1265 } 1266 } 1267 1268 if (needsCFI) { 1269 // Describe where callee saved registers were saved, at fixed offsets from 1270 // CFA. 1271 const std::vector<CalleeSavedInfo> &CSI = MFI.getCalleeSavedInfo(); 1272 for (unsigned I = 0, E = CSI.size(); I != E; ++I) { 1273 unsigned Reg = CSI[I].getReg(); 1274 if (Reg == PPC::LR || Reg == PPC::LR8 || Reg == PPC::RM) continue; 1275 1276 // This is a bit of a hack: CR2LT, CR2GT, CR2EQ and CR2UN are just 1277 // subregisters of CR2. We just need to emit a move of CR2. 1278 if (PPC::CRBITRCRegClass.contains(Reg)) 1279 continue; 1280 1281 if ((Reg == PPC::X2 || Reg == PPC::R2) && MustSaveTOC) 1282 continue; 1283 1284 // For SVR4, don't emit a move for the CR spill slot if we haven't 1285 // spilled CRs. 1286 if (isSVR4ABI && (PPC::CR2 <= Reg && Reg <= PPC::CR4) 1287 && !MustSaveCR) 1288 continue; 1289 1290 // For 64-bit SVR4 when we have spilled CRs, the spill location 1291 // is SP+8, not a frame-relative slot. 1292 if (isSVR4ABI && isPPC64 && (PPC::CR2 <= Reg && Reg <= PPC::CR4)) { 1293 // In the ELFv1 ABI, only CR2 is noted in CFI and stands in for 1294 // the whole CR word. In the ELFv2 ABI, every CR that was 1295 // actually saved gets its own CFI record. 1296 unsigned CRReg = isELFv2ABI? Reg : (unsigned) PPC::CR2; 1297 unsigned CFIIndex = MF.addFrameInst(MCCFIInstruction::createOffset( 1298 nullptr, MRI->getDwarfRegNum(CRReg, true), getCRSaveOffset())); 1299 BuildMI(MBB, MBBI, dl, TII.get(TargetOpcode::CFI_INSTRUCTION)) 1300 .addCFIIndex(CFIIndex); 1301 continue; 1302 } 1303 1304 if (CSI[I].isSpilledToReg()) { 1305 unsigned SpilledReg = CSI[I].getDstReg(); 1306 unsigned CFIRegister = MF.addFrameInst(MCCFIInstruction::createRegister( 1307 nullptr, MRI->getDwarfRegNum(Reg, true), 1308 MRI->getDwarfRegNum(SpilledReg, true))); 1309 BuildMI(MBB, MBBI, dl, TII.get(TargetOpcode::CFI_INSTRUCTION)) 1310 .addCFIIndex(CFIRegister); 1311 } else { 1312 int Offset = MFI.getObjectOffset(CSI[I].getFrameIdx()); 1313 // We have changed the object offset above but we do not want to change 1314 // the actual offsets in the CFI instruction so we have to undo the 1315 // offset change here. 1316 if (MovingStackUpdateDown) 1317 Offset -= NegFrameSize; 1318 1319 unsigned CFIIndex = MF.addFrameInst(MCCFIInstruction::createOffset( 1320 nullptr, MRI->getDwarfRegNum(Reg, true), Offset)); 1321 BuildMI(MBB, MBBI, dl, TII.get(TargetOpcode::CFI_INSTRUCTION)) 1322 .addCFIIndex(CFIIndex); 1323 } 1324 } 1325 } 1326 } 1327 1328 void PPCFrameLowering::emitEpilogue(MachineFunction &MF, 1329 MachineBasicBlock &MBB) const { 1330 MachineBasicBlock::iterator MBBI = MBB.getFirstTerminator(); 1331 DebugLoc dl; 1332 1333 if (MBBI != MBB.end()) 1334 dl = MBBI->getDebugLoc(); 1335 1336 const PPCInstrInfo &TII = *Subtarget.getInstrInfo(); 1337 const PPCRegisterInfo *RegInfo = Subtarget.getRegisterInfo(); 1338 1339 // Get alignment info so we know how to restore the SP. 1340 const MachineFrameInfo &MFI = MF.getFrameInfo(); 1341 1342 // Get the number of bytes allocated from the FrameInfo. 1343 int FrameSize = MFI.getStackSize(); 1344 1345 // Get processor type. 1346 bool isPPC64 = Subtarget.isPPC64(); 1347 // Get the ABI. 1348 bool isSVR4ABI = Subtarget.isSVR4ABI(); 1349 1350 // Check if the link register (LR) has been saved. 1351 PPCFunctionInfo *FI = MF.getInfo<PPCFunctionInfo>(); 1352 bool MustSaveLR = FI->mustSaveLR(); 1353 const SmallVectorImpl<unsigned> &MustSaveCRs = FI->getMustSaveCRs(); 1354 bool MustSaveCR = !MustSaveCRs.empty(); 1355 // Do we have a frame pointer and/or base pointer for this function? 1356 bool HasFP = hasFP(MF); 1357 bool HasBP = RegInfo->hasBasePointer(MF); 1358 bool HasRedZone = Subtarget.isPPC64() || !Subtarget.isSVR4ABI(); 1359 1360 unsigned SPReg = isPPC64 ? PPC::X1 : PPC::R1; 1361 Register BPReg = RegInfo->getBaseRegister(MF); 1362 unsigned FPReg = isPPC64 ? PPC::X31 : PPC::R31; 1363 unsigned ScratchReg = 0; 1364 unsigned TempReg = isPPC64 ? PPC::X12 : PPC::R12; // another scratch reg 1365 const MCInstrDesc& MTLRInst = TII.get( isPPC64 ? PPC::MTLR8 1366 : PPC::MTLR ); 1367 const MCInstrDesc& LoadInst = TII.get( isPPC64 ? PPC::LD 1368 : PPC::LWZ ); 1369 const MCInstrDesc& LoadImmShiftedInst = TII.get( isPPC64 ? PPC::LIS8 1370 : PPC::LIS ); 1371 const MCInstrDesc& OrInst = TII.get(isPPC64 ? PPC::OR8 1372 : PPC::OR ); 1373 const MCInstrDesc& OrImmInst = TII.get( isPPC64 ? PPC::ORI8 1374 : PPC::ORI ); 1375 const MCInstrDesc& AddImmInst = TII.get( isPPC64 ? PPC::ADDI8 1376 : PPC::ADDI ); 1377 const MCInstrDesc& AddInst = TII.get( isPPC64 ? PPC::ADD8 1378 : PPC::ADD4 ); 1379 1380 int LROffset = getReturnSaveOffset(); 1381 1382 int FPOffset = 0; 1383 1384 // Using the same bool variable as below to suppress compiler warnings. 1385 bool SingleScratchReg = findScratchRegister(&MBB, true, false, &ScratchReg, 1386 &TempReg); 1387 assert(SingleScratchReg && 1388 "Could not find an available scratch register"); 1389 1390 SingleScratchReg = ScratchReg == TempReg; 1391 1392 if (HasFP) { 1393 if (isSVR4ABI) { 1394 int FPIndex = FI->getFramePointerSaveIndex(); 1395 assert(FPIndex && "No Frame Pointer Save Slot!"); 1396 FPOffset = MFI.getObjectOffset(FPIndex); 1397 } else { 1398 FPOffset = getFramePointerSaveOffset(); 1399 } 1400 } 1401 1402 int BPOffset = 0; 1403 if (HasBP) { 1404 if (isSVR4ABI) { 1405 int BPIndex = FI->getBasePointerSaveIndex(); 1406 assert(BPIndex && "No Base Pointer Save Slot!"); 1407 BPOffset = MFI.getObjectOffset(BPIndex); 1408 } else { 1409 BPOffset = getBasePointerSaveOffset(); 1410 } 1411 } 1412 1413 int PBPOffset = 0; 1414 if (FI->usesPICBase()) { 1415 int PBPIndex = FI->getPICBasePointerSaveIndex(); 1416 assert(PBPIndex && "No PIC Base Pointer Save Slot!"); 1417 PBPOffset = MFI.getObjectOffset(PBPIndex); 1418 } 1419 1420 bool IsReturnBlock = (MBBI != MBB.end() && MBBI->isReturn()); 1421 1422 if (IsReturnBlock) { 1423 unsigned RetOpcode = MBBI->getOpcode(); 1424 bool UsesTCRet = RetOpcode == PPC::TCRETURNri || 1425 RetOpcode == PPC::TCRETURNdi || 1426 RetOpcode == PPC::TCRETURNai || 1427 RetOpcode == PPC::TCRETURNri8 || 1428 RetOpcode == PPC::TCRETURNdi8 || 1429 RetOpcode == PPC::TCRETURNai8; 1430 1431 if (UsesTCRet) { 1432 int MaxTCRetDelta = FI->getTailCallSPDelta(); 1433 MachineOperand &StackAdjust = MBBI->getOperand(1); 1434 assert(StackAdjust.isImm() && "Expecting immediate value."); 1435 // Adjust stack pointer. 1436 int StackAdj = StackAdjust.getImm(); 1437 int Delta = StackAdj - MaxTCRetDelta; 1438 assert((Delta >= 0) && "Delta must be positive"); 1439 if (MaxTCRetDelta>0) 1440 FrameSize += (StackAdj +Delta); 1441 else 1442 FrameSize += StackAdj; 1443 } 1444 } 1445 1446 // Frames of 32KB & larger require special handling because they cannot be 1447 // indexed into with a simple LD/LWZ immediate offset operand. 1448 bool isLargeFrame = !isInt<16>(FrameSize); 1449 1450 // On targets without red zone, the SP needs to be restored last, so that 1451 // all live contents of the stack frame are upwards of the SP. This means 1452 // that we cannot restore SP just now, since there may be more registers 1453 // to restore from the stack frame (e.g. R31). If the frame size is not 1454 // a simple immediate value, we will need a spare register to hold the 1455 // restored SP. If the frame size is known and small, we can simply adjust 1456 // the offsets of the registers to be restored, and still use SP to restore 1457 // them. In such case, the final update of SP will be to add the frame 1458 // size to it. 1459 // To simplify the code, set RBReg to the base register used to restore 1460 // values from the stack, and set SPAdd to the value that needs to be added 1461 // to the SP at the end. The default values are as if red zone was present. 1462 unsigned RBReg = SPReg; 1463 unsigned SPAdd = 0; 1464 1465 // Check if we can move the stack update instruction up the epilogue 1466 // past the callee saves. This will allow the move to LR instruction 1467 // to be executed before the restores of the callee saves which means 1468 // that the callee saves can hide the latency from the MTLR instrcution. 1469 MachineBasicBlock::iterator StackUpdateLoc = MBBI; 1470 if (stackUpdateCanBeMoved(MF)) { 1471 const std::vector<CalleeSavedInfo> & Info = MFI.getCalleeSavedInfo(); 1472 for (CalleeSavedInfo CSI : Info) { 1473 int FrIdx = CSI.getFrameIdx(); 1474 // If the frame index is not negative the callee saved info belongs to a 1475 // stack object that is not a fixed stack object. We ignore non-fixed 1476 // stack objects because we won't move the update of the stack pointer 1477 // past them. 1478 if (FrIdx >= 0) 1479 continue; 1480 1481 if (MFI.isFixedObjectIndex(FrIdx) && MFI.getObjectOffset(FrIdx) < 0) 1482 StackUpdateLoc--; 1483 else { 1484 // Abort the operation as we can't update all CSR restores. 1485 StackUpdateLoc = MBBI; 1486 break; 1487 } 1488 } 1489 } 1490 1491 if (FrameSize) { 1492 // In the prologue, the loaded (or persistent) stack pointer value is 1493 // offset by the STDU/STDUX/STWU/STWUX instruction. For targets with red 1494 // zone add this offset back now. 1495 1496 // If this function contained a fastcc call and GuaranteedTailCallOpt is 1497 // enabled (=> hasFastCall()==true) the fastcc call might contain a tail 1498 // call which invalidates the stack pointer value in SP(0). So we use the 1499 // value of R31 in this case. 1500 if (FI->hasFastCall()) { 1501 assert(HasFP && "Expecting a valid frame pointer."); 1502 if (!HasRedZone) 1503 RBReg = FPReg; 1504 if (!isLargeFrame) { 1505 BuildMI(MBB, MBBI, dl, AddImmInst, RBReg) 1506 .addReg(FPReg).addImm(FrameSize); 1507 } else { 1508 BuildMI(MBB, MBBI, dl, LoadImmShiftedInst, ScratchReg) 1509 .addImm(FrameSize >> 16); 1510 BuildMI(MBB, MBBI, dl, OrImmInst, ScratchReg) 1511 .addReg(ScratchReg, RegState::Kill) 1512 .addImm(FrameSize & 0xFFFF); 1513 BuildMI(MBB, MBBI, dl, AddInst) 1514 .addReg(RBReg) 1515 .addReg(FPReg) 1516 .addReg(ScratchReg); 1517 } 1518 } else if (!isLargeFrame && !HasBP && !MFI.hasVarSizedObjects()) { 1519 if (HasRedZone) { 1520 BuildMI(MBB, StackUpdateLoc, dl, AddImmInst, SPReg) 1521 .addReg(SPReg) 1522 .addImm(FrameSize); 1523 } else { 1524 // Make sure that adding FrameSize will not overflow the max offset 1525 // size. 1526 assert(FPOffset <= 0 && BPOffset <= 0 && PBPOffset <= 0 && 1527 "Local offsets should be negative"); 1528 SPAdd = FrameSize; 1529 FPOffset += FrameSize; 1530 BPOffset += FrameSize; 1531 PBPOffset += FrameSize; 1532 } 1533 } else { 1534 // We don't want to use ScratchReg as a base register, because it 1535 // could happen to be R0. Use FP instead, but make sure to preserve it. 1536 if (!HasRedZone) { 1537 // If FP is not saved, copy it to ScratchReg. 1538 if (!HasFP) 1539 BuildMI(MBB, MBBI, dl, OrInst, ScratchReg) 1540 .addReg(FPReg) 1541 .addReg(FPReg); 1542 RBReg = FPReg; 1543 } 1544 BuildMI(MBB, StackUpdateLoc, dl, LoadInst, RBReg) 1545 .addImm(0) 1546 .addReg(SPReg); 1547 } 1548 } 1549 assert(RBReg != ScratchReg && "Should have avoided ScratchReg"); 1550 // If there is no red zone, ScratchReg may be needed for holding a useful 1551 // value (although not the base register). Make sure it is not overwritten 1552 // too early. 1553 1554 assert((isPPC64 || !MustSaveCR) && 1555 "Epilogue CR restoring supported only in 64-bit mode"); 1556 1557 // If we need to restore both the LR and the CR and we only have one 1558 // available scratch register, we must do them one at a time. 1559 if (MustSaveCR && SingleScratchReg && MustSaveLR) { 1560 // Here TempReg == ScratchReg, and in the absence of red zone ScratchReg 1561 // is live here. 1562 assert(HasRedZone && "Expecting red zone"); 1563 BuildMI(MBB, MBBI, dl, TII.get(PPC::LWZ8), TempReg) 1564 .addImm(getCRSaveOffset()) 1565 .addReg(SPReg); 1566 for (unsigned i = 0, e = MustSaveCRs.size(); i != e; ++i) 1567 BuildMI(MBB, MBBI, dl, TII.get(PPC::MTOCRF8), MustSaveCRs[i]) 1568 .addReg(TempReg, getKillRegState(i == e-1)); 1569 } 1570 1571 // Delay restoring of the LR if ScratchReg is needed. This is ok, since 1572 // LR is stored in the caller's stack frame. ScratchReg will be needed 1573 // if RBReg is anything other than SP. We shouldn't use ScratchReg as 1574 // a base register anyway, because it may happen to be R0. 1575 bool LoadedLR = false; 1576 if (MustSaveLR && RBReg == SPReg && isInt<16>(LROffset+SPAdd)) { 1577 BuildMI(MBB, StackUpdateLoc, dl, LoadInst, ScratchReg) 1578 .addImm(LROffset+SPAdd) 1579 .addReg(RBReg); 1580 LoadedLR = true; 1581 } 1582 1583 if (MustSaveCR && !(SingleScratchReg && MustSaveLR)) { 1584 // This will only occur for PPC64. 1585 assert(isPPC64 && "Expecting 64-bit mode"); 1586 assert(RBReg == SPReg && "Should be using SP as a base register"); 1587 BuildMI(MBB, MBBI, dl, TII.get(PPC::LWZ8), TempReg) 1588 .addImm(getCRSaveOffset()) 1589 .addReg(RBReg); 1590 } 1591 1592 if (HasFP) { 1593 // If there is red zone, restore FP directly, since SP has already been 1594 // restored. Otherwise, restore the value of FP into ScratchReg. 1595 if (HasRedZone || RBReg == SPReg) 1596 BuildMI(MBB, MBBI, dl, LoadInst, FPReg) 1597 .addImm(FPOffset) 1598 .addReg(SPReg); 1599 else 1600 BuildMI(MBB, MBBI, dl, LoadInst, ScratchReg) 1601 .addImm(FPOffset) 1602 .addReg(RBReg); 1603 } 1604 1605 if (FI->usesPICBase()) 1606 BuildMI(MBB, MBBI, dl, LoadInst, PPC::R30) 1607 .addImm(PBPOffset) 1608 .addReg(RBReg); 1609 1610 if (HasBP) 1611 BuildMI(MBB, MBBI, dl, LoadInst, BPReg) 1612 .addImm(BPOffset) 1613 .addReg(RBReg); 1614 1615 // There is nothing more to be loaded from the stack, so now we can 1616 // restore SP: SP = RBReg + SPAdd. 1617 if (RBReg != SPReg || SPAdd != 0) { 1618 assert(!HasRedZone && "This should not happen with red zone"); 1619 // If SPAdd is 0, generate a copy. 1620 if (SPAdd == 0) 1621 BuildMI(MBB, MBBI, dl, OrInst, SPReg) 1622 .addReg(RBReg) 1623 .addReg(RBReg); 1624 else 1625 BuildMI(MBB, MBBI, dl, AddImmInst, SPReg) 1626 .addReg(RBReg) 1627 .addImm(SPAdd); 1628 1629 assert(RBReg != ScratchReg && "Should be using FP or SP as base register"); 1630 if (RBReg == FPReg) 1631 BuildMI(MBB, MBBI, dl, OrInst, FPReg) 1632 .addReg(ScratchReg) 1633 .addReg(ScratchReg); 1634 1635 // Now load the LR from the caller's stack frame. 1636 if (MustSaveLR && !LoadedLR) 1637 BuildMI(MBB, MBBI, dl, LoadInst, ScratchReg) 1638 .addImm(LROffset) 1639 .addReg(SPReg); 1640 } 1641 1642 if (MustSaveCR && 1643 !(SingleScratchReg && MustSaveLR)) // will only occur for PPC64 1644 for (unsigned i = 0, e = MustSaveCRs.size(); i != e; ++i) 1645 BuildMI(MBB, MBBI, dl, TII.get(PPC::MTOCRF8), MustSaveCRs[i]) 1646 .addReg(TempReg, getKillRegState(i == e-1)); 1647 1648 if (MustSaveLR) 1649 BuildMI(MBB, StackUpdateLoc, dl, MTLRInst).addReg(ScratchReg); 1650 1651 // Callee pop calling convention. Pop parameter/linkage area. Used for tail 1652 // call optimization 1653 if (IsReturnBlock) { 1654 unsigned RetOpcode = MBBI->getOpcode(); 1655 if (MF.getTarget().Options.GuaranteedTailCallOpt && 1656 (RetOpcode == PPC::BLR || RetOpcode == PPC::BLR8) && 1657 MF.getFunction().getCallingConv() == CallingConv::Fast) { 1658 PPCFunctionInfo *FI = MF.getInfo<PPCFunctionInfo>(); 1659 unsigned CallerAllocatedAmt = FI->getMinReservedArea(); 1660 1661 if (CallerAllocatedAmt && isInt<16>(CallerAllocatedAmt)) { 1662 BuildMI(MBB, MBBI, dl, AddImmInst, SPReg) 1663 .addReg(SPReg).addImm(CallerAllocatedAmt); 1664 } else { 1665 BuildMI(MBB, MBBI, dl, LoadImmShiftedInst, ScratchReg) 1666 .addImm(CallerAllocatedAmt >> 16); 1667 BuildMI(MBB, MBBI, dl, OrImmInst, ScratchReg) 1668 .addReg(ScratchReg, RegState::Kill) 1669 .addImm(CallerAllocatedAmt & 0xFFFF); 1670 BuildMI(MBB, MBBI, dl, AddInst) 1671 .addReg(SPReg) 1672 .addReg(FPReg) 1673 .addReg(ScratchReg); 1674 } 1675 } else { 1676 createTailCallBranchInstr(MBB); 1677 } 1678 } 1679 } 1680 1681 void PPCFrameLowering::createTailCallBranchInstr(MachineBasicBlock &MBB) const { 1682 MachineBasicBlock::iterator MBBI = MBB.getFirstTerminator(); 1683 1684 // If we got this far a first terminator should exist. 1685 assert(MBBI != MBB.end() && "Failed to find the first terminator."); 1686 1687 DebugLoc dl = MBBI->getDebugLoc(); 1688 const PPCInstrInfo &TII = *Subtarget.getInstrInfo(); 1689 1690 // Create branch instruction for pseudo tail call return instruction 1691 unsigned RetOpcode = MBBI->getOpcode(); 1692 if (RetOpcode == PPC::TCRETURNdi) { 1693 MBBI = MBB.getLastNonDebugInstr(); 1694 MachineOperand &JumpTarget = MBBI->getOperand(0); 1695 BuildMI(MBB, MBBI, dl, TII.get(PPC::TAILB)). 1696 addGlobalAddress(JumpTarget.getGlobal(), JumpTarget.getOffset()); 1697 } else if (RetOpcode == PPC::TCRETURNri) { 1698 MBBI = MBB.getLastNonDebugInstr(); 1699 assert(MBBI->getOperand(0).isReg() && "Expecting register operand."); 1700 BuildMI(MBB, MBBI, dl, TII.get(PPC::TAILBCTR)); 1701 } else if (RetOpcode == PPC::TCRETURNai) { 1702 MBBI = MBB.getLastNonDebugInstr(); 1703 MachineOperand &JumpTarget = MBBI->getOperand(0); 1704 BuildMI(MBB, MBBI, dl, TII.get(PPC::TAILBA)).addImm(JumpTarget.getImm()); 1705 } else if (RetOpcode == PPC::TCRETURNdi8) { 1706 MBBI = MBB.getLastNonDebugInstr(); 1707 MachineOperand &JumpTarget = MBBI->getOperand(0); 1708 BuildMI(MBB, MBBI, dl, TII.get(PPC::TAILB8)). 1709 addGlobalAddress(JumpTarget.getGlobal(), JumpTarget.getOffset()); 1710 } else if (RetOpcode == PPC::TCRETURNri8) { 1711 MBBI = MBB.getLastNonDebugInstr(); 1712 assert(MBBI->getOperand(0).isReg() && "Expecting register operand."); 1713 BuildMI(MBB, MBBI, dl, TII.get(PPC::TAILBCTR8)); 1714 } else if (RetOpcode == PPC::TCRETURNai8) { 1715 MBBI = MBB.getLastNonDebugInstr(); 1716 MachineOperand &JumpTarget = MBBI->getOperand(0); 1717 BuildMI(MBB, MBBI, dl, TII.get(PPC::TAILBA8)).addImm(JumpTarget.getImm()); 1718 } 1719 } 1720 1721 void PPCFrameLowering::determineCalleeSaves(MachineFunction &MF, 1722 BitVector &SavedRegs, 1723 RegScavenger *RS) const { 1724 TargetFrameLowering::determineCalleeSaves(MF, SavedRegs, RS); 1725 1726 const PPCRegisterInfo *RegInfo = Subtarget.getRegisterInfo(); 1727 1728 // Save and clear the LR state. 1729 PPCFunctionInfo *FI = MF.getInfo<PPCFunctionInfo>(); 1730 unsigned LR = RegInfo->getRARegister(); 1731 FI->setMustSaveLR(MustSaveLR(MF, LR)); 1732 SavedRegs.reset(LR); 1733 1734 // Save R31 if necessary 1735 int FPSI = FI->getFramePointerSaveIndex(); 1736 const bool isPPC64 = Subtarget.isPPC64(); 1737 MachineFrameInfo &MFI = MF.getFrameInfo(); 1738 1739 // If the frame pointer save index hasn't been defined yet. 1740 if (!FPSI && needsFP(MF)) { 1741 // Find out what the fix offset of the frame pointer save area. 1742 int FPOffset = getFramePointerSaveOffset(); 1743 // Allocate the frame index for frame pointer save area. 1744 FPSI = MFI.CreateFixedObject(isPPC64? 8 : 4, FPOffset, true); 1745 // Save the result. 1746 FI->setFramePointerSaveIndex(FPSI); 1747 } 1748 1749 int BPSI = FI->getBasePointerSaveIndex(); 1750 if (!BPSI && RegInfo->hasBasePointer(MF)) { 1751 int BPOffset = getBasePointerSaveOffset(); 1752 // Allocate the frame index for the base pointer save area. 1753 BPSI = MFI.CreateFixedObject(isPPC64? 8 : 4, BPOffset, true); 1754 // Save the result. 1755 FI->setBasePointerSaveIndex(BPSI); 1756 } 1757 1758 // Reserve stack space for the PIC Base register (R30). 1759 // Only used in SVR4 32-bit. 1760 if (FI->usesPICBase()) { 1761 int PBPSI = MFI.CreateFixedObject(4, -8, true); 1762 FI->setPICBasePointerSaveIndex(PBPSI); 1763 } 1764 1765 // Make sure we don't explicitly spill r31, because, for example, we have 1766 // some inline asm which explicitly clobbers it, when we otherwise have a 1767 // frame pointer and are using r31's spill slot for the prologue/epilogue 1768 // code. Same goes for the base pointer and the PIC base register. 1769 if (needsFP(MF)) 1770 SavedRegs.reset(isPPC64 ? PPC::X31 : PPC::R31); 1771 if (RegInfo->hasBasePointer(MF)) 1772 SavedRegs.reset(RegInfo->getBaseRegister(MF)); 1773 if (FI->usesPICBase()) 1774 SavedRegs.reset(PPC::R30); 1775 1776 // Reserve stack space to move the linkage area to in case of a tail call. 1777 int TCSPDelta = 0; 1778 if (MF.getTarget().Options.GuaranteedTailCallOpt && 1779 (TCSPDelta = FI->getTailCallSPDelta()) < 0) { 1780 MFI.CreateFixedObject(-1 * TCSPDelta, TCSPDelta, true); 1781 } 1782 1783 // For 32-bit SVR4, allocate the nonvolatile CR spill slot iff the 1784 // function uses CR 2, 3, or 4. For 64-bit SVR4 we create a FixedStack 1785 // object at the offset of the CR-save slot in the linkage area. The actual 1786 // save and restore of the condition register will be created as part of the 1787 // prologue and epilogue insertion, but the FixedStack object is needed to 1788 // keep the CalleSavedInfo valid. 1789 if (Subtarget.isSVR4ABI() && 1790 (SavedRegs.test(PPC::CR2) || SavedRegs.test(PPC::CR3) || 1791 SavedRegs.test(PPC::CR4))) { 1792 const uint64_t SpillSize = 4; // Condition register is always 4 bytes. 1793 const int64_t SpillOffset = Subtarget.isPPC64() ? 8 : -4; 1794 int FrameIdx = 1795 MFI.CreateFixedObject(SpillSize, SpillOffset, 1796 /* IsImmutable */ true, /* IsAliased */ false); 1797 FI->setCRSpillFrameIndex(FrameIdx); 1798 } 1799 } 1800 1801 void PPCFrameLowering::processFunctionBeforeFrameFinalized(MachineFunction &MF, 1802 RegScavenger *RS) const { 1803 // Early exit if not using the SVR4 ABI. 1804 if (!Subtarget.isSVR4ABI()) { 1805 addScavengingSpillSlot(MF, RS); 1806 return; 1807 } 1808 1809 // Get callee saved register information. 1810 MachineFrameInfo &MFI = MF.getFrameInfo(); 1811 const std::vector<CalleeSavedInfo> &CSI = MFI.getCalleeSavedInfo(); 1812 1813 // If the function is shrink-wrapped, and if the function has a tail call, the 1814 // tail call might not be in the new RestoreBlock, so real branch instruction 1815 // won't be generated by emitEpilogue(), because shrink-wrap has chosen new 1816 // RestoreBlock. So we handle this case here. 1817 if (MFI.getSavePoint() && MFI.hasTailCall()) { 1818 MachineBasicBlock *RestoreBlock = MFI.getRestorePoint(); 1819 for (MachineBasicBlock &MBB : MF) { 1820 if (MBB.isReturnBlock() && (&MBB) != RestoreBlock) 1821 createTailCallBranchInstr(MBB); 1822 } 1823 } 1824 1825 // Early exit if no callee saved registers are modified! 1826 if (CSI.empty() && !needsFP(MF)) { 1827 addScavengingSpillSlot(MF, RS); 1828 return; 1829 } 1830 1831 unsigned MinGPR = PPC::R31; 1832 unsigned MinG8R = PPC::X31; 1833 unsigned MinFPR = PPC::F31; 1834 unsigned MinVR = Subtarget.hasSPE() ? PPC::S31 : PPC::V31; 1835 1836 bool HasGPSaveArea = false; 1837 bool HasG8SaveArea = false; 1838 bool HasFPSaveArea = false; 1839 bool HasVRSAVESaveArea = false; 1840 bool HasVRSaveArea = false; 1841 1842 SmallVector<CalleeSavedInfo, 18> GPRegs; 1843 SmallVector<CalleeSavedInfo, 18> G8Regs; 1844 SmallVector<CalleeSavedInfo, 18> FPRegs; 1845 SmallVector<CalleeSavedInfo, 18> VRegs; 1846 1847 for (unsigned i = 0, e = CSI.size(); i != e; ++i) { 1848 unsigned Reg = CSI[i].getReg(); 1849 assert((!MF.getInfo<PPCFunctionInfo>()->mustSaveTOC() || 1850 (Reg != PPC::X2 && Reg != PPC::R2)) && 1851 "Not expecting to try to spill R2 in a function that must save TOC"); 1852 if (PPC::GPRCRegClass.contains(Reg)) { 1853 HasGPSaveArea = true; 1854 1855 GPRegs.push_back(CSI[i]); 1856 1857 if (Reg < MinGPR) { 1858 MinGPR = Reg; 1859 } 1860 } else if (PPC::G8RCRegClass.contains(Reg)) { 1861 HasG8SaveArea = true; 1862 1863 G8Regs.push_back(CSI[i]); 1864 1865 if (Reg < MinG8R) { 1866 MinG8R = Reg; 1867 } 1868 } else if (PPC::F8RCRegClass.contains(Reg)) { 1869 HasFPSaveArea = true; 1870 1871 FPRegs.push_back(CSI[i]); 1872 1873 if (Reg < MinFPR) { 1874 MinFPR = Reg; 1875 } 1876 } else if (PPC::CRBITRCRegClass.contains(Reg) || 1877 PPC::CRRCRegClass.contains(Reg)) { 1878 ; // do nothing, as we already know whether CRs are spilled 1879 } else if (PPC::VRSAVERCRegClass.contains(Reg)) { 1880 HasVRSAVESaveArea = true; 1881 } else if (PPC::VRRCRegClass.contains(Reg) || 1882 PPC::SPERCRegClass.contains(Reg)) { 1883 // Altivec and SPE are mutually exclusive, but have the same stack 1884 // alignment requirements, so overload the save area for both cases. 1885 HasVRSaveArea = true; 1886 1887 VRegs.push_back(CSI[i]); 1888 1889 if (Reg < MinVR) { 1890 MinVR = Reg; 1891 } 1892 } else { 1893 llvm_unreachable("Unknown RegisterClass!"); 1894 } 1895 } 1896 1897 PPCFunctionInfo *PFI = MF.getInfo<PPCFunctionInfo>(); 1898 const TargetRegisterInfo *TRI = Subtarget.getRegisterInfo(); 1899 1900 int64_t LowerBound = 0; 1901 1902 // Take into account stack space reserved for tail calls. 1903 int TCSPDelta = 0; 1904 if (MF.getTarget().Options.GuaranteedTailCallOpt && 1905 (TCSPDelta = PFI->getTailCallSPDelta()) < 0) { 1906 LowerBound = TCSPDelta; 1907 } 1908 1909 // The Floating-point register save area is right below the back chain word 1910 // of the previous stack frame. 1911 if (HasFPSaveArea) { 1912 for (unsigned i = 0, e = FPRegs.size(); i != e; ++i) { 1913 int FI = FPRegs[i].getFrameIdx(); 1914 1915 MFI.setObjectOffset(FI, LowerBound + MFI.getObjectOffset(FI)); 1916 } 1917 1918 LowerBound -= (31 - TRI->getEncodingValue(MinFPR) + 1) * 8; 1919 } 1920 1921 // Check whether the frame pointer register is allocated. If so, make sure it 1922 // is spilled to the correct offset. 1923 if (needsFP(MF)) { 1924 int FI = PFI->getFramePointerSaveIndex(); 1925 assert(FI && "No Frame Pointer Save Slot!"); 1926 MFI.setObjectOffset(FI, LowerBound + MFI.getObjectOffset(FI)); 1927 // FP is R31/X31, so no need to update MinGPR/MinG8R. 1928 HasGPSaveArea = true; 1929 } 1930 1931 if (PFI->usesPICBase()) { 1932 int FI = PFI->getPICBasePointerSaveIndex(); 1933 assert(FI && "No PIC Base Pointer Save Slot!"); 1934 MFI.setObjectOffset(FI, LowerBound + MFI.getObjectOffset(FI)); 1935 1936 MinGPR = std::min<unsigned>(MinGPR, PPC::R30); 1937 HasGPSaveArea = true; 1938 } 1939 1940 const PPCRegisterInfo *RegInfo = Subtarget.getRegisterInfo(); 1941 if (RegInfo->hasBasePointer(MF)) { 1942 int FI = PFI->getBasePointerSaveIndex(); 1943 assert(FI && "No Base Pointer Save Slot!"); 1944 MFI.setObjectOffset(FI, LowerBound + MFI.getObjectOffset(FI)); 1945 1946 Register BP = RegInfo->getBaseRegister(MF); 1947 if (PPC::G8RCRegClass.contains(BP)) { 1948 MinG8R = std::min<unsigned>(MinG8R, BP); 1949 HasG8SaveArea = true; 1950 } else if (PPC::GPRCRegClass.contains(BP)) { 1951 MinGPR = std::min<unsigned>(MinGPR, BP); 1952 HasGPSaveArea = true; 1953 } 1954 } 1955 1956 // General register save area starts right below the Floating-point 1957 // register save area. 1958 if (HasGPSaveArea || HasG8SaveArea) { 1959 // Move general register save area spill slots down, taking into account 1960 // the size of the Floating-point register save area. 1961 for (unsigned i = 0, e = GPRegs.size(); i != e; ++i) { 1962 if (!GPRegs[i].isSpilledToReg()) { 1963 int FI = GPRegs[i].getFrameIdx(); 1964 MFI.setObjectOffset(FI, LowerBound + MFI.getObjectOffset(FI)); 1965 } 1966 } 1967 1968 // Move general register save area spill slots down, taking into account 1969 // the size of the Floating-point register save area. 1970 for (unsigned i = 0, e = G8Regs.size(); i != e; ++i) { 1971 if (!G8Regs[i].isSpilledToReg()) { 1972 int FI = G8Regs[i].getFrameIdx(); 1973 MFI.setObjectOffset(FI, LowerBound + MFI.getObjectOffset(FI)); 1974 } 1975 } 1976 1977 unsigned MinReg = 1978 std::min<unsigned>(TRI->getEncodingValue(MinGPR), 1979 TRI->getEncodingValue(MinG8R)); 1980 1981 if (Subtarget.isPPC64()) { 1982 LowerBound -= (31 - MinReg + 1) * 8; 1983 } else { 1984 LowerBound -= (31 - MinReg + 1) * 4; 1985 } 1986 } 1987 1988 // For 32-bit only, the CR save area is below the general register 1989 // save area. For 64-bit SVR4, the CR save area is addressed relative 1990 // to the stack pointer and hence does not need an adjustment here. 1991 // Only CR2 (the first nonvolatile spilled) has an associated frame 1992 // index so that we have a single uniform save area. 1993 if (spillsCR(MF) && Subtarget.is32BitELFABI()) { 1994 // Adjust the frame index of the CR spill slot. 1995 for (const auto &CSInfo : CSI) { 1996 if (CSInfo.getReg() == PPC::CR2) { 1997 int FI = CSInfo.getFrameIdx(); 1998 MFI.setObjectOffset(FI, LowerBound + MFI.getObjectOffset(FI)); 1999 break; 2000 } 2001 } 2002 2003 LowerBound -= 4; // The CR save area is always 4 bytes long. 2004 } 2005 2006 if (HasVRSAVESaveArea) { 2007 // FIXME SVR4: Is it actually possible to have multiple elements in CSI 2008 // which have the VRSAVE register class? 2009 // Adjust the frame index of the VRSAVE spill slot. 2010 for (unsigned i = 0, e = CSI.size(); i != e; ++i) { 2011 unsigned Reg = CSI[i].getReg(); 2012 2013 if (PPC::VRSAVERCRegClass.contains(Reg)) { 2014 int FI = CSI[i].getFrameIdx(); 2015 2016 MFI.setObjectOffset(FI, LowerBound + MFI.getObjectOffset(FI)); 2017 } 2018 } 2019 2020 LowerBound -= 4; // The VRSAVE save area is always 4 bytes long. 2021 } 2022 2023 // Both Altivec and SPE have the same alignment and padding requirements 2024 // within the stack frame. 2025 if (HasVRSaveArea) { 2026 // Insert alignment padding, we need 16-byte alignment. Note: for positive 2027 // number the alignment formula is : y = (x + (n-1)) & (~(n-1)). But since 2028 // we are using negative number here (the stack grows downward). We should 2029 // use formula : y = x & (~(n-1)). Where x is the size before aligning, n 2030 // is the alignment size ( n = 16 here) and y is the size after aligning. 2031 assert(LowerBound <= 0 && "Expect LowerBound have a non-positive value!"); 2032 LowerBound &= ~(15); 2033 2034 for (unsigned i = 0, e = VRegs.size(); i != e; ++i) { 2035 int FI = VRegs[i].getFrameIdx(); 2036 2037 MFI.setObjectOffset(FI, LowerBound + MFI.getObjectOffset(FI)); 2038 } 2039 } 2040 2041 addScavengingSpillSlot(MF, RS); 2042 } 2043 2044 void 2045 PPCFrameLowering::addScavengingSpillSlot(MachineFunction &MF, 2046 RegScavenger *RS) const { 2047 // Reserve a slot closest to SP or frame pointer if we have a dynalloc or 2048 // a large stack, which will require scavenging a register to materialize a 2049 // large offset. 2050 2051 // We need to have a scavenger spill slot for spills if the frame size is 2052 // large. In case there is no free register for large-offset addressing, 2053 // this slot is used for the necessary emergency spill. Also, we need the 2054 // slot for dynamic stack allocations. 2055 2056 // The scavenger might be invoked if the frame offset does not fit into 2057 // the 16-bit immediate. We don't know the complete frame size here 2058 // because we've not yet computed callee-saved register spills or the 2059 // needed alignment padding. 2060 unsigned StackSize = determineFrameLayout(MF, true); 2061 MachineFrameInfo &MFI = MF.getFrameInfo(); 2062 if (MFI.hasVarSizedObjects() || spillsCR(MF) || spillsVRSAVE(MF) || 2063 hasNonRISpills(MF) || (hasSpills(MF) && !isInt<16>(StackSize))) { 2064 const TargetRegisterClass &GPRC = PPC::GPRCRegClass; 2065 const TargetRegisterClass &G8RC = PPC::G8RCRegClass; 2066 const TargetRegisterClass &RC = Subtarget.isPPC64() ? G8RC : GPRC; 2067 const TargetRegisterInfo &TRI = *Subtarget.getRegisterInfo(); 2068 unsigned Size = TRI.getSpillSize(RC); 2069 unsigned Align = TRI.getSpillAlignment(RC); 2070 RS->addScavengingFrameIndex(MFI.CreateStackObject(Size, Align, false)); 2071 2072 // Might we have over-aligned allocas? 2073 bool HasAlVars = MFI.hasVarSizedObjects() && 2074 MFI.getMaxAlignment() > getStackAlignment(); 2075 2076 // These kinds of spills might need two registers. 2077 if (spillsCR(MF) || spillsVRSAVE(MF) || HasAlVars) 2078 RS->addScavengingFrameIndex(MFI.CreateStackObject(Size, Align, false)); 2079 2080 } 2081 } 2082 2083 // This function checks if a callee saved gpr can be spilled to a volatile 2084 // vector register. This occurs for leaf functions when the option 2085 // ppc-enable-pe-vector-spills is enabled. If there are any remaining registers 2086 // which were not spilled to vectors, return false so the target independent 2087 // code can handle them by assigning a FrameIdx to a stack slot. 2088 bool PPCFrameLowering::assignCalleeSavedSpillSlots( 2089 MachineFunction &MF, const TargetRegisterInfo *TRI, 2090 std::vector<CalleeSavedInfo> &CSI) const { 2091 2092 if (CSI.empty()) 2093 return true; // Early exit if no callee saved registers are modified! 2094 2095 // Early exit if cannot spill gprs to volatile vector registers. 2096 MachineFrameInfo &MFI = MF.getFrameInfo(); 2097 if (!EnablePEVectorSpills || MFI.hasCalls() || !Subtarget.hasP9Vector()) 2098 return false; 2099 2100 // Build a BitVector of VSRs that can be used for spilling GPRs. 2101 BitVector BVAllocatable = TRI->getAllocatableSet(MF); 2102 BitVector BVCalleeSaved(TRI->getNumRegs()); 2103 const PPCRegisterInfo *RegInfo = Subtarget.getRegisterInfo(); 2104 const MCPhysReg *CSRegs = RegInfo->getCalleeSavedRegs(&MF); 2105 for (unsigned i = 0; CSRegs[i]; ++i) 2106 BVCalleeSaved.set(CSRegs[i]); 2107 2108 for (unsigned Reg : BVAllocatable.set_bits()) { 2109 // Set to 0 if the register is not a volatile VF/F8 register, or if it is 2110 // used in the function. 2111 if (BVCalleeSaved[Reg] || 2112 (!PPC::F8RCRegClass.contains(Reg) && 2113 !PPC::VFRCRegClass.contains(Reg)) || 2114 (MF.getRegInfo().isPhysRegUsed(Reg))) 2115 BVAllocatable.reset(Reg); 2116 } 2117 2118 bool AllSpilledToReg = true; 2119 for (auto &CS : CSI) { 2120 if (BVAllocatable.none()) 2121 return false; 2122 2123 unsigned Reg = CS.getReg(); 2124 if (!PPC::G8RCRegClass.contains(Reg) && !PPC::GPRCRegClass.contains(Reg)) { 2125 AllSpilledToReg = false; 2126 continue; 2127 } 2128 2129 unsigned VolatileVFReg = BVAllocatable.find_first(); 2130 if (VolatileVFReg < BVAllocatable.size()) { 2131 CS.setDstReg(VolatileVFReg); 2132 BVAllocatable.reset(VolatileVFReg); 2133 } else { 2134 AllSpilledToReg = false; 2135 } 2136 } 2137 return AllSpilledToReg; 2138 } 2139 2140 bool PPCFrameLowering::spillCalleeSavedRegisters( 2141 MachineBasicBlock &MBB, MachineBasicBlock::iterator MI, 2142 ArrayRef<CalleeSavedInfo> CSI, const TargetRegisterInfo *TRI) const { 2143 2144 // Currently, this function only handles SVR4 32- and 64-bit ABIs. 2145 // Return false otherwise to maintain pre-existing behavior. 2146 if (!Subtarget.isSVR4ABI()) 2147 return false; 2148 2149 MachineFunction *MF = MBB.getParent(); 2150 const PPCInstrInfo &TII = *Subtarget.getInstrInfo(); 2151 PPCFunctionInfo *FI = MF->getInfo<PPCFunctionInfo>(); 2152 bool MustSaveTOC = FI->mustSaveTOC(); 2153 DebugLoc DL; 2154 bool CRSpilled = false; 2155 MachineInstrBuilder CRMIB; 2156 2157 for (unsigned i = 0, e = CSI.size(); i != e; ++i) { 2158 unsigned Reg = CSI[i].getReg(); 2159 // VRSAVE can appear here if, for example, @llvm.eh.unwind.init() is used. 2160 if (Reg == PPC::VRSAVE) 2161 continue; 2162 2163 // CR2 through CR4 are the nonvolatile CR fields. 2164 bool IsCRField = PPC::CR2 <= Reg && Reg <= PPC::CR4; 2165 2166 // Add the callee-saved register as live-in; it's killed at the spill. 2167 // Do not do this for callee-saved registers that are live-in to the 2168 // function because they will already be marked live-in and this will be 2169 // adding it for a second time. It is an error to add the same register 2170 // to the set more than once. 2171 const MachineRegisterInfo &MRI = MF->getRegInfo(); 2172 bool IsLiveIn = MRI.isLiveIn(Reg); 2173 if (!IsLiveIn) 2174 MBB.addLiveIn(Reg); 2175 2176 if (CRSpilled && IsCRField) { 2177 CRMIB.addReg(Reg, RegState::ImplicitKill); 2178 continue; 2179 } 2180 2181 // The actual spill will happen in the prologue. 2182 if ((Reg == PPC::X2 || Reg == PPC::R2) && MustSaveTOC) 2183 continue; 2184 2185 // Insert the spill to the stack frame. 2186 if (IsCRField) { 2187 PPCFunctionInfo *FuncInfo = MF->getInfo<PPCFunctionInfo>(); 2188 if (Subtarget.isPPC64()) { 2189 // The actual spill will happen at the start of the prologue. 2190 FuncInfo->addMustSaveCR(Reg); 2191 } else { 2192 CRSpilled = true; 2193 FuncInfo->setSpillsCR(); 2194 2195 // 32-bit: FP-relative. Note that we made sure CR2-CR4 all have 2196 // the same frame index in PPCRegisterInfo::hasReservedSpillSlot. 2197 CRMIB = BuildMI(*MF, DL, TII.get(PPC::MFCR), PPC::R12) 2198 .addReg(Reg, RegState::ImplicitKill); 2199 2200 MBB.insert(MI, CRMIB); 2201 MBB.insert(MI, addFrameReference(BuildMI(*MF, DL, TII.get(PPC::STW)) 2202 .addReg(PPC::R12, 2203 getKillRegState(true)), 2204 CSI[i].getFrameIdx())); 2205 } 2206 } else { 2207 if (CSI[i].isSpilledToReg()) { 2208 NumPESpillVSR++; 2209 BuildMI(MBB, MI, DL, TII.get(PPC::MTVSRD), CSI[i].getDstReg()) 2210 .addReg(Reg, getKillRegState(true)); 2211 } else { 2212 const TargetRegisterClass *RC = TRI->getMinimalPhysRegClass(Reg); 2213 // Use !IsLiveIn for the kill flag. 2214 // We do not want to kill registers that are live in this function 2215 // before their use because they will become undefined registers. 2216 // Functions without NoUnwind need to preserve the order of elements in 2217 // saved vector registers. 2218 if (Subtarget.needsSwapsForVSXMemOps() && 2219 !MF->getFunction().hasFnAttribute(Attribute::NoUnwind)) 2220 TII.storeRegToStackSlotNoUpd(MBB, MI, Reg, !IsLiveIn, 2221 CSI[i].getFrameIdx(), RC, TRI); 2222 else 2223 TII.storeRegToStackSlot(MBB, MI, Reg, !IsLiveIn, CSI[i].getFrameIdx(), 2224 RC, TRI); 2225 } 2226 } 2227 } 2228 return true; 2229 } 2230 2231 static void restoreCRs(bool is31, bool CR2Spilled, bool CR3Spilled, 2232 bool CR4Spilled, MachineBasicBlock &MBB, 2233 MachineBasicBlock::iterator MI, 2234 ArrayRef<CalleeSavedInfo> CSI, unsigned CSIIndex) { 2235 2236 MachineFunction *MF = MBB.getParent(); 2237 const PPCInstrInfo &TII = *MF->getSubtarget<PPCSubtarget>().getInstrInfo(); 2238 DebugLoc DL; 2239 unsigned MoveReg = PPC::R12; 2240 2241 // 32-bit: FP-relative 2242 MBB.insert(MI, 2243 addFrameReference(BuildMI(*MF, DL, TII.get(PPC::LWZ), MoveReg), 2244 CSI[CSIIndex].getFrameIdx())); 2245 2246 unsigned RestoreOp = PPC::MTOCRF; 2247 if (CR2Spilled) 2248 MBB.insert(MI, BuildMI(*MF, DL, TII.get(RestoreOp), PPC::CR2) 2249 .addReg(MoveReg, getKillRegState(!CR3Spilled && !CR4Spilled))); 2250 2251 if (CR3Spilled) 2252 MBB.insert(MI, BuildMI(*MF, DL, TII.get(RestoreOp), PPC::CR3) 2253 .addReg(MoveReg, getKillRegState(!CR4Spilled))); 2254 2255 if (CR4Spilled) 2256 MBB.insert(MI, BuildMI(*MF, DL, TII.get(RestoreOp), PPC::CR4) 2257 .addReg(MoveReg, getKillRegState(true))); 2258 } 2259 2260 MachineBasicBlock::iterator PPCFrameLowering:: 2261 eliminateCallFramePseudoInstr(MachineFunction &MF, MachineBasicBlock &MBB, 2262 MachineBasicBlock::iterator I) const { 2263 const TargetInstrInfo &TII = *Subtarget.getInstrInfo(); 2264 if (MF.getTarget().Options.GuaranteedTailCallOpt && 2265 I->getOpcode() == PPC::ADJCALLSTACKUP) { 2266 // Add (actually subtract) back the amount the callee popped on return. 2267 if (int CalleeAmt = I->getOperand(1).getImm()) { 2268 bool is64Bit = Subtarget.isPPC64(); 2269 CalleeAmt *= -1; 2270 unsigned StackReg = is64Bit ? PPC::X1 : PPC::R1; 2271 unsigned TmpReg = is64Bit ? PPC::X0 : PPC::R0; 2272 unsigned ADDIInstr = is64Bit ? PPC::ADDI8 : PPC::ADDI; 2273 unsigned ADDInstr = is64Bit ? PPC::ADD8 : PPC::ADD4; 2274 unsigned LISInstr = is64Bit ? PPC::LIS8 : PPC::LIS; 2275 unsigned ORIInstr = is64Bit ? PPC::ORI8 : PPC::ORI; 2276 const DebugLoc &dl = I->getDebugLoc(); 2277 2278 if (isInt<16>(CalleeAmt)) { 2279 BuildMI(MBB, I, dl, TII.get(ADDIInstr), StackReg) 2280 .addReg(StackReg, RegState::Kill) 2281 .addImm(CalleeAmt); 2282 } else { 2283 MachineBasicBlock::iterator MBBI = I; 2284 BuildMI(MBB, MBBI, dl, TII.get(LISInstr), TmpReg) 2285 .addImm(CalleeAmt >> 16); 2286 BuildMI(MBB, MBBI, dl, TII.get(ORIInstr), TmpReg) 2287 .addReg(TmpReg, RegState::Kill) 2288 .addImm(CalleeAmt & 0xFFFF); 2289 BuildMI(MBB, MBBI, dl, TII.get(ADDInstr), StackReg) 2290 .addReg(StackReg, RegState::Kill) 2291 .addReg(TmpReg); 2292 } 2293 } 2294 } 2295 // Simply discard ADJCALLSTACKDOWN, ADJCALLSTACKUP instructions. 2296 return MBB.erase(I); 2297 } 2298 2299 static bool isCalleeSavedCR(unsigned Reg) { 2300 return PPC::CR2 == Reg || Reg == PPC::CR3 || Reg == PPC::CR4; 2301 } 2302 2303 bool 2304 PPCFrameLowering::restoreCalleeSavedRegisters(MachineBasicBlock &MBB, 2305 MachineBasicBlock::iterator MI, 2306 std::vector<CalleeSavedInfo> &CSI, 2307 const TargetRegisterInfo *TRI) const { 2308 2309 // Currently, this function only handles SVR4 32- and 64-bit ABIs. 2310 // Return false otherwise to maintain pre-existing behavior. 2311 if (!Subtarget.isSVR4ABI()) 2312 return false; 2313 2314 MachineFunction *MF = MBB.getParent(); 2315 const PPCInstrInfo &TII = *Subtarget.getInstrInfo(); 2316 PPCFunctionInfo *FI = MF->getInfo<PPCFunctionInfo>(); 2317 bool MustSaveTOC = FI->mustSaveTOC(); 2318 bool CR2Spilled = false; 2319 bool CR3Spilled = false; 2320 bool CR4Spilled = false; 2321 unsigned CSIIndex = 0; 2322 2323 // Initialize insertion-point logic; we will be restoring in reverse 2324 // order of spill. 2325 MachineBasicBlock::iterator I = MI, BeforeI = I; 2326 bool AtStart = I == MBB.begin(); 2327 2328 if (!AtStart) 2329 --BeforeI; 2330 2331 for (unsigned i = 0, e = CSI.size(); i != e; ++i) { 2332 unsigned Reg = CSI[i].getReg(); 2333 2334 // VRSAVE can appear here if, for example, @llvm.eh.unwind.init() is used. 2335 if (Reg == PPC::VRSAVE) 2336 continue; 2337 2338 if ((Reg == PPC::X2 || Reg == PPC::R2) && MustSaveTOC) 2339 continue; 2340 2341 // Restore of callee saved condition register field is handled during 2342 // epilogue insertion. 2343 if (isCalleeSavedCR(Reg) && !Subtarget.is32BitELFABI()) 2344 continue; 2345 2346 if (Reg == PPC::CR2) { 2347 CR2Spilled = true; 2348 // The spill slot is associated only with CR2, which is the 2349 // first nonvolatile spilled. Save it here. 2350 CSIIndex = i; 2351 continue; 2352 } else if (Reg == PPC::CR3) { 2353 CR3Spilled = true; 2354 continue; 2355 } else if (Reg == PPC::CR4) { 2356 CR4Spilled = true; 2357 continue; 2358 } else { 2359 // When we first encounter a non-CR register after seeing at 2360 // least one CR register, restore all spilled CRs together. 2361 if (CR2Spilled || CR3Spilled || CR4Spilled) { 2362 bool is31 = needsFP(*MF); 2363 restoreCRs(is31, CR2Spilled, CR3Spilled, CR4Spilled, MBB, I, CSI, 2364 CSIIndex); 2365 CR2Spilled = CR3Spilled = CR4Spilled = false; 2366 } 2367 2368 if (CSI[i].isSpilledToReg()) { 2369 DebugLoc DL; 2370 NumPEReloadVSR++; 2371 BuildMI(MBB, I, DL, TII.get(PPC::MFVSRD), Reg) 2372 .addReg(CSI[i].getDstReg(), getKillRegState(true)); 2373 } else { 2374 // Default behavior for non-CR saves. 2375 const TargetRegisterClass *RC = TRI->getMinimalPhysRegClass(Reg); 2376 2377 // Functions without NoUnwind need to preserve the order of elements in 2378 // saved vector registers. 2379 if (Subtarget.needsSwapsForVSXMemOps() && 2380 !MF->getFunction().hasFnAttribute(Attribute::NoUnwind)) 2381 TII.loadRegFromStackSlotNoUpd(MBB, I, Reg, CSI[i].getFrameIdx(), RC, 2382 TRI); 2383 else 2384 TII.loadRegFromStackSlot(MBB, I, Reg, CSI[i].getFrameIdx(), RC, TRI); 2385 2386 assert(I != MBB.begin() && 2387 "loadRegFromStackSlot didn't insert any code!"); 2388 } 2389 } 2390 2391 // Insert in reverse order. 2392 if (AtStart) 2393 I = MBB.begin(); 2394 else { 2395 I = BeforeI; 2396 ++I; 2397 } 2398 } 2399 2400 // If we haven't yet spilled the CRs, do so now. 2401 if (CR2Spilled || CR3Spilled || CR4Spilled) { 2402 assert(Subtarget.is32BitELFABI() && 2403 "Only set CR[2|3|4]Spilled on 32-bit SVR4."); 2404 bool is31 = needsFP(*MF); 2405 restoreCRs(is31, CR2Spilled, CR3Spilled, CR4Spilled, MBB, I, CSI, CSIIndex); 2406 } 2407 2408 return true; 2409 } 2410 2411 unsigned PPCFrameLowering::getTOCSaveOffset() const { 2412 return TOCSaveOffset; 2413 } 2414 2415 unsigned PPCFrameLowering::getFramePointerSaveOffset() const { 2416 return FramePointerSaveOffset; 2417 } 2418 2419 unsigned PPCFrameLowering::getBasePointerSaveOffset() const { 2420 if (Subtarget.isAIXABI()) 2421 report_fatal_error("BasePointer is not implemented on AIX yet."); 2422 return BasePointerSaveOffset; 2423 } 2424 2425 bool PPCFrameLowering::enableShrinkWrapping(const MachineFunction &MF) const { 2426 if (MF.getInfo<PPCFunctionInfo>()->shrinkWrapDisabled()) 2427 return false; 2428 return (MF.getSubtarget<PPCSubtarget>().isSVR4ABI() && 2429 MF.getSubtarget<PPCSubtarget>().isPPC64()); 2430 } 2431