1 //===-- PPCFrameLowering.cpp - PPC Frame Information ----------------------===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 // 9 // This file contains the PPC implementation of TargetFrameLowering class. 10 // 11 //===----------------------------------------------------------------------===// 12 13 #include "PPCFrameLowering.h" 14 #include "PPCInstrBuilder.h" 15 #include "PPCInstrInfo.h" 16 #include "PPCMachineFunctionInfo.h" 17 #include "PPCSubtarget.h" 18 #include "PPCTargetMachine.h" 19 #include "llvm/ADT/Statistic.h" 20 #include "llvm/CodeGen/MachineFrameInfo.h" 21 #include "llvm/CodeGen/MachineFunction.h" 22 #include "llvm/CodeGen/MachineInstrBuilder.h" 23 #include "llvm/CodeGen/MachineModuleInfo.h" 24 #include "llvm/CodeGen/MachineRegisterInfo.h" 25 #include "llvm/CodeGen/RegisterScavenging.h" 26 #include "llvm/IR/Function.h" 27 #include "llvm/Target/TargetOptions.h" 28 29 using namespace llvm; 30 31 #define DEBUG_TYPE "framelowering" 32 STATISTIC(NumPESpillVSR, "Number of spills to vector in prologue"); 33 STATISTIC(NumPEReloadVSR, "Number of reloads from vector in epilogue"); 34 35 static cl::opt<bool> 36 EnablePEVectorSpills("ppc-enable-pe-vector-spills", 37 cl::desc("Enable spills in prologue to vector registers."), 38 cl::init(false), cl::Hidden); 39 40 /// VRRegNo - Map from a numbered VR register to its enum value. 41 /// 42 static const MCPhysReg VRRegNo[] = { 43 PPC::V0 , PPC::V1 , PPC::V2 , PPC::V3 , PPC::V4 , PPC::V5 , PPC::V6 , PPC::V7 , 44 PPC::V8 , PPC::V9 , PPC::V10, PPC::V11, PPC::V12, PPC::V13, PPC::V14, PPC::V15, 45 PPC::V16, PPC::V17, PPC::V18, PPC::V19, PPC::V20, PPC::V21, PPC::V22, PPC::V23, 46 PPC::V24, PPC::V25, PPC::V26, PPC::V27, PPC::V28, PPC::V29, PPC::V30, PPC::V31 47 }; 48 49 static unsigned computeReturnSaveOffset(const PPCSubtarget &STI) { 50 if (STI.isAIXABI()) 51 return STI.isPPC64() ? 16 : 8; 52 // SVR4 ABI: 53 return STI.isPPC64() ? 16 : 4; 54 } 55 56 static unsigned computeTOCSaveOffset(const PPCSubtarget &STI) { 57 if (STI.isAIXABI()) 58 return STI.isPPC64() ? 40 : 20; 59 return STI.isELFv2ABI() ? 24 : 40; 60 } 61 62 static unsigned computeFramePointerSaveOffset(const PPCSubtarget &STI) { 63 // First slot in the general register save area. 64 return STI.isPPC64() ? -8U : -4U; 65 } 66 67 static unsigned computeLinkageSize(const PPCSubtarget &STI) { 68 if (STI.isAIXABI() || STI.isPPC64()) 69 return (STI.isELFv2ABI() ? 4 : 6) * (STI.isPPC64() ? 8 : 4); 70 71 // 32-bit SVR4 ABI: 72 return 8; 73 } 74 75 static unsigned computeBasePointerSaveOffset(const PPCSubtarget &STI) { 76 // SVR4 ABI: First slot in the general register save area. 77 return STI.isPPC64() 78 ? -16U 79 : STI.getTargetMachine().isPositionIndependent() ? -12U : -8U; 80 } 81 82 static unsigned computeCRSaveOffset(const PPCSubtarget &STI) { 83 return (STI.isAIXABI() && !STI.isPPC64()) ? 4 : 8; 84 } 85 86 PPCFrameLowering::PPCFrameLowering(const PPCSubtarget &STI) 87 : TargetFrameLowering(TargetFrameLowering::StackGrowsDown, 88 STI.getPlatformStackAlignment(), 0), 89 Subtarget(STI), ReturnSaveOffset(computeReturnSaveOffset(Subtarget)), 90 TOCSaveOffset(computeTOCSaveOffset(Subtarget)), 91 FramePointerSaveOffset(computeFramePointerSaveOffset(Subtarget)), 92 LinkageSize(computeLinkageSize(Subtarget)), 93 BasePointerSaveOffset(computeBasePointerSaveOffset(Subtarget)), 94 CRSaveOffset(computeCRSaveOffset(Subtarget)) {} 95 96 // With the SVR4 ABI, callee-saved registers have fixed offsets on the stack. 97 const PPCFrameLowering::SpillSlot *PPCFrameLowering::getCalleeSavedSpillSlots( 98 unsigned &NumEntries) const { 99 // Early exit if not using the SVR4 ABI. 100 if (!Subtarget.isSVR4ABI()) { 101 NumEntries = 0; 102 return nullptr; 103 } 104 105 // Floating-point register save area offsets. 106 #define CALLEE_SAVED_FPRS \ 107 {PPC::F31, -8}, \ 108 {PPC::F30, -16}, \ 109 {PPC::F29, -24}, \ 110 {PPC::F28, -32}, \ 111 {PPC::F27, -40}, \ 112 {PPC::F26, -48}, \ 113 {PPC::F25, -56}, \ 114 {PPC::F24, -64}, \ 115 {PPC::F23, -72}, \ 116 {PPC::F22, -80}, \ 117 {PPC::F21, -88}, \ 118 {PPC::F20, -96}, \ 119 {PPC::F19, -104}, \ 120 {PPC::F18, -112}, \ 121 {PPC::F17, -120}, \ 122 {PPC::F16, -128}, \ 123 {PPC::F15, -136}, \ 124 {PPC::F14, -144} 125 126 // 32-bit general purpose register save area offsets. 127 #define CALLEE_SAVED_GPRS32 \ 128 {PPC::R31, -4}, \ 129 {PPC::R30, -8}, \ 130 {PPC::R29, -12}, \ 131 {PPC::R28, -16}, \ 132 {PPC::R27, -20}, \ 133 {PPC::R26, -24}, \ 134 {PPC::R25, -28}, \ 135 {PPC::R24, -32}, \ 136 {PPC::R23, -36}, \ 137 {PPC::R22, -40}, \ 138 {PPC::R21, -44}, \ 139 {PPC::R20, -48}, \ 140 {PPC::R19, -52}, \ 141 {PPC::R18, -56}, \ 142 {PPC::R17, -60}, \ 143 {PPC::R16, -64}, \ 144 {PPC::R15, -68}, \ 145 {PPC::R14, -72} 146 147 // 64-bit general purpose register save area offsets. 148 #define CALLEE_SAVED_GPRS64 \ 149 {PPC::X31, -8}, \ 150 {PPC::X30, -16}, \ 151 {PPC::X29, -24}, \ 152 {PPC::X28, -32}, \ 153 {PPC::X27, -40}, \ 154 {PPC::X26, -48}, \ 155 {PPC::X25, -56}, \ 156 {PPC::X24, -64}, \ 157 {PPC::X23, -72}, \ 158 {PPC::X22, -80}, \ 159 {PPC::X21, -88}, \ 160 {PPC::X20, -96}, \ 161 {PPC::X19, -104}, \ 162 {PPC::X18, -112}, \ 163 {PPC::X17, -120}, \ 164 {PPC::X16, -128}, \ 165 {PPC::X15, -136}, \ 166 {PPC::X14, -144} 167 168 // Vector register save area offsets. 169 #define CALLEE_SAVED_VRS \ 170 {PPC::V31, -16}, \ 171 {PPC::V30, -32}, \ 172 {PPC::V29, -48}, \ 173 {PPC::V28, -64}, \ 174 {PPC::V27, -80}, \ 175 {PPC::V26, -96}, \ 176 {PPC::V25, -112}, \ 177 {PPC::V24, -128}, \ 178 {PPC::V23, -144}, \ 179 {PPC::V22, -160}, \ 180 {PPC::V21, -176}, \ 181 {PPC::V20, -192} 182 183 // Note that the offsets here overlap, but this is fixed up in 184 // processFunctionBeforeFrameFinalized. 185 186 static const SpillSlot Offsets[] = { 187 CALLEE_SAVED_FPRS, 188 CALLEE_SAVED_GPRS32, 189 190 // CR save area offset. We map each of the nonvolatile CR fields 191 // to the slot for CR2, which is the first of the nonvolatile CR 192 // fields to be assigned, so that we only allocate one save slot. 193 // See PPCRegisterInfo::hasReservedSpillSlot() for more information. 194 {PPC::CR2, -4}, 195 196 // VRSAVE save area offset. 197 {PPC::VRSAVE, -4}, 198 199 CALLEE_SAVED_VRS, 200 201 // SPE register save area (overlaps Vector save area). 202 {PPC::S31, -8}, 203 {PPC::S30, -16}, 204 {PPC::S29, -24}, 205 {PPC::S28, -32}, 206 {PPC::S27, -40}, 207 {PPC::S26, -48}, 208 {PPC::S25, -56}, 209 {PPC::S24, -64}, 210 {PPC::S23, -72}, 211 {PPC::S22, -80}, 212 {PPC::S21, -88}, 213 {PPC::S20, -96}, 214 {PPC::S19, -104}, 215 {PPC::S18, -112}, 216 {PPC::S17, -120}, 217 {PPC::S16, -128}, 218 {PPC::S15, -136}, 219 {PPC::S14, -144}}; 220 221 static const SpillSlot Offsets64[] = { 222 CALLEE_SAVED_FPRS, 223 CALLEE_SAVED_GPRS64, 224 225 // VRSAVE save area offset. 226 {PPC::VRSAVE, -4}, 227 228 CALLEE_SAVED_VRS 229 }; 230 231 if (Subtarget.isPPC64()) { 232 NumEntries = array_lengthof(Offsets64); 233 234 return Offsets64; 235 } else { 236 NumEntries = array_lengthof(Offsets); 237 238 return Offsets; 239 } 240 } 241 242 /// RemoveVRSaveCode - We have found that this function does not need any code 243 /// to manipulate the VRSAVE register, even though it uses vector registers. 244 /// This can happen when the only registers used are known to be live in or out 245 /// of the function. Remove all of the VRSAVE related code from the function. 246 /// FIXME: The removal of the code results in a compile failure at -O0 when the 247 /// function contains a function call, as the GPR containing original VRSAVE 248 /// contents is spilled and reloaded around the call. Without the prolog code, 249 /// the spill instruction refers to an undefined register. This code needs 250 /// to account for all uses of that GPR. 251 static void RemoveVRSaveCode(MachineInstr &MI) { 252 MachineBasicBlock *Entry = MI.getParent(); 253 MachineFunction *MF = Entry->getParent(); 254 255 // We know that the MTVRSAVE instruction immediately follows MI. Remove it. 256 MachineBasicBlock::iterator MBBI = MI; 257 ++MBBI; 258 assert(MBBI != Entry->end() && MBBI->getOpcode() == PPC::MTVRSAVE); 259 MBBI->eraseFromParent(); 260 261 bool RemovedAllMTVRSAVEs = true; 262 // See if we can find and remove the MTVRSAVE instruction from all of the 263 // epilog blocks. 264 for (MachineFunction::iterator I = MF->begin(), E = MF->end(); I != E; ++I) { 265 // If last instruction is a return instruction, add an epilogue 266 if (I->isReturnBlock()) { 267 bool FoundIt = false; 268 for (MBBI = I->end(); MBBI != I->begin(); ) { 269 --MBBI; 270 if (MBBI->getOpcode() == PPC::MTVRSAVE) { 271 MBBI->eraseFromParent(); // remove it. 272 FoundIt = true; 273 break; 274 } 275 } 276 RemovedAllMTVRSAVEs &= FoundIt; 277 } 278 } 279 280 // If we found and removed all MTVRSAVE instructions, remove the read of 281 // VRSAVE as well. 282 if (RemovedAllMTVRSAVEs) { 283 MBBI = MI; 284 assert(MBBI != Entry->begin() && "UPDATE_VRSAVE is first instr in block?"); 285 --MBBI; 286 assert(MBBI->getOpcode() == PPC::MFVRSAVE && "VRSAVE instrs wandered?"); 287 MBBI->eraseFromParent(); 288 } 289 290 // Finally, nuke the UPDATE_VRSAVE. 291 MI.eraseFromParent(); 292 } 293 294 // HandleVRSaveUpdate - MI is the UPDATE_VRSAVE instruction introduced by the 295 // instruction selector. Based on the vector registers that have been used, 296 // transform this into the appropriate ORI instruction. 297 static void HandleVRSaveUpdate(MachineInstr &MI, const TargetInstrInfo &TII) { 298 MachineFunction *MF = MI.getParent()->getParent(); 299 const TargetRegisterInfo *TRI = MF->getSubtarget().getRegisterInfo(); 300 DebugLoc dl = MI.getDebugLoc(); 301 302 const MachineRegisterInfo &MRI = MF->getRegInfo(); 303 unsigned UsedRegMask = 0; 304 for (unsigned i = 0; i != 32; ++i) 305 if (MRI.isPhysRegModified(VRRegNo[i])) 306 UsedRegMask |= 1 << (31-i); 307 308 // Live in and live out values already must be in the mask, so don't bother 309 // marking them. 310 for (std::pair<unsigned, unsigned> LI : MF->getRegInfo().liveins()) { 311 unsigned RegNo = TRI->getEncodingValue(LI.first); 312 if (VRRegNo[RegNo] == LI.first) // If this really is a vector reg. 313 UsedRegMask &= ~(1 << (31-RegNo)); // Doesn't need to be marked. 314 } 315 316 // Live out registers appear as use operands on return instructions. 317 for (MachineFunction::const_iterator BI = MF->begin(), BE = MF->end(); 318 UsedRegMask != 0 && BI != BE; ++BI) { 319 const MachineBasicBlock &MBB = *BI; 320 if (!MBB.isReturnBlock()) 321 continue; 322 const MachineInstr &Ret = MBB.back(); 323 for (unsigned I = 0, E = Ret.getNumOperands(); I != E; ++I) { 324 const MachineOperand &MO = Ret.getOperand(I); 325 if (!MO.isReg() || !PPC::VRRCRegClass.contains(MO.getReg())) 326 continue; 327 unsigned RegNo = TRI->getEncodingValue(MO.getReg()); 328 UsedRegMask &= ~(1 << (31-RegNo)); 329 } 330 } 331 332 // If no registers are used, turn this into a copy. 333 if (UsedRegMask == 0) { 334 // Remove all VRSAVE code. 335 RemoveVRSaveCode(MI); 336 return; 337 } 338 339 Register SrcReg = MI.getOperand(1).getReg(); 340 Register DstReg = MI.getOperand(0).getReg(); 341 342 if ((UsedRegMask & 0xFFFF) == UsedRegMask) { 343 if (DstReg != SrcReg) 344 BuildMI(*MI.getParent(), MI, dl, TII.get(PPC::ORI), DstReg) 345 .addReg(SrcReg) 346 .addImm(UsedRegMask); 347 else 348 BuildMI(*MI.getParent(), MI, dl, TII.get(PPC::ORI), DstReg) 349 .addReg(SrcReg, RegState::Kill) 350 .addImm(UsedRegMask); 351 } else if ((UsedRegMask & 0xFFFF0000) == UsedRegMask) { 352 if (DstReg != SrcReg) 353 BuildMI(*MI.getParent(), MI, dl, TII.get(PPC::ORIS), DstReg) 354 .addReg(SrcReg) 355 .addImm(UsedRegMask >> 16); 356 else 357 BuildMI(*MI.getParent(), MI, dl, TII.get(PPC::ORIS), DstReg) 358 .addReg(SrcReg, RegState::Kill) 359 .addImm(UsedRegMask >> 16); 360 } else { 361 if (DstReg != SrcReg) 362 BuildMI(*MI.getParent(), MI, dl, TII.get(PPC::ORIS), DstReg) 363 .addReg(SrcReg) 364 .addImm(UsedRegMask >> 16); 365 else 366 BuildMI(*MI.getParent(), MI, dl, TII.get(PPC::ORIS), DstReg) 367 .addReg(SrcReg, RegState::Kill) 368 .addImm(UsedRegMask >> 16); 369 370 BuildMI(*MI.getParent(), MI, dl, TII.get(PPC::ORI), DstReg) 371 .addReg(DstReg, RegState::Kill) 372 .addImm(UsedRegMask & 0xFFFF); 373 } 374 375 // Remove the old UPDATE_VRSAVE instruction. 376 MI.eraseFromParent(); 377 } 378 379 static bool spillsCR(const MachineFunction &MF) { 380 const PPCFunctionInfo *FuncInfo = MF.getInfo<PPCFunctionInfo>(); 381 return FuncInfo->isCRSpilled(); 382 } 383 384 static bool spillsVRSAVE(const MachineFunction &MF) { 385 const PPCFunctionInfo *FuncInfo = MF.getInfo<PPCFunctionInfo>(); 386 return FuncInfo->isVRSAVESpilled(); 387 } 388 389 static bool hasSpills(const MachineFunction &MF) { 390 const PPCFunctionInfo *FuncInfo = MF.getInfo<PPCFunctionInfo>(); 391 return FuncInfo->hasSpills(); 392 } 393 394 static bool hasNonRISpills(const MachineFunction &MF) { 395 const PPCFunctionInfo *FuncInfo = MF.getInfo<PPCFunctionInfo>(); 396 return FuncInfo->hasNonRISpills(); 397 } 398 399 /// MustSaveLR - Return true if this function requires that we save the LR 400 /// register onto the stack in the prolog and restore it in the epilog of the 401 /// function. 402 static bool MustSaveLR(const MachineFunction &MF, unsigned LR) { 403 const PPCFunctionInfo *MFI = MF.getInfo<PPCFunctionInfo>(); 404 405 // We need a save/restore of LR if there is any def of LR (which is 406 // defined by calls, including the PIC setup sequence), or if there is 407 // some use of the LR stack slot (e.g. for builtin_return_address). 408 // (LR comes in 32 and 64 bit versions.) 409 MachineRegisterInfo::def_iterator RI = MF.getRegInfo().def_begin(LR); 410 return RI !=MF.getRegInfo().def_end() || MFI->isLRStoreRequired(); 411 } 412 413 /// determineFrameLayoutAndUpdate - Determine the size of the frame and maximum 414 /// call frame size. Update the MachineFunction object with the stack size. 415 unsigned 416 PPCFrameLowering::determineFrameLayoutAndUpdate(MachineFunction &MF, 417 bool UseEstimate) const { 418 unsigned NewMaxCallFrameSize = 0; 419 unsigned FrameSize = determineFrameLayout(MF, UseEstimate, 420 &NewMaxCallFrameSize); 421 MF.getFrameInfo().setStackSize(FrameSize); 422 MF.getFrameInfo().setMaxCallFrameSize(NewMaxCallFrameSize); 423 return FrameSize; 424 } 425 426 /// determineFrameLayout - Determine the size of the frame and maximum call 427 /// frame size. 428 unsigned 429 PPCFrameLowering::determineFrameLayout(const MachineFunction &MF, 430 bool UseEstimate, 431 unsigned *NewMaxCallFrameSize) const { 432 const MachineFrameInfo &MFI = MF.getFrameInfo(); 433 const PPCFunctionInfo *FI = MF.getInfo<PPCFunctionInfo>(); 434 435 // Get the number of bytes to allocate from the FrameInfo 436 unsigned FrameSize = 437 UseEstimate ? MFI.estimateStackSize(MF) : MFI.getStackSize(); 438 439 // Get stack alignments. The frame must be aligned to the greatest of these: 440 unsigned TargetAlign = getStackAlignment(); // alignment required per the ABI 441 unsigned MaxAlign = MFI.getMaxAlignment(); // algmt required by data in frame 442 unsigned AlignMask = std::max(MaxAlign, TargetAlign) - 1; 443 444 const PPCRegisterInfo *RegInfo = Subtarget.getRegisterInfo(); 445 446 unsigned LR = RegInfo->getRARegister(); 447 bool DisableRedZone = MF.getFunction().hasFnAttribute(Attribute::NoRedZone); 448 bool CanUseRedZone = !MFI.hasVarSizedObjects() && // No dynamic alloca. 449 !MFI.adjustsStack() && // No calls. 450 !MustSaveLR(MF, LR) && // No need to save LR. 451 !FI->mustSaveTOC() && // No need to save TOC. 452 !RegInfo->hasBasePointer(MF); // No special alignment. 453 454 // Note: for PPC32 SVR4ABI (Non-DarwinABI), we can still generate stackless 455 // code if all local vars are reg-allocated. 456 bool FitsInRedZone = FrameSize <= Subtarget.getRedZoneSize(); 457 458 // Check whether we can skip adjusting the stack pointer (by using red zone) 459 if (!DisableRedZone && CanUseRedZone && FitsInRedZone) { 460 // No need for frame 461 return 0; 462 } 463 464 // Get the maximum call frame size of all the calls. 465 unsigned maxCallFrameSize = MFI.getMaxCallFrameSize(); 466 467 // Maximum call frame needs to be at least big enough for linkage area. 468 unsigned minCallFrameSize = getLinkageSize(); 469 maxCallFrameSize = std::max(maxCallFrameSize, minCallFrameSize); 470 471 // If we have dynamic alloca then maxCallFrameSize needs to be aligned so 472 // that allocations will be aligned. 473 if (MFI.hasVarSizedObjects()) 474 maxCallFrameSize = (maxCallFrameSize + AlignMask) & ~AlignMask; 475 476 // Update the new max call frame size if the caller passes in a valid pointer. 477 if (NewMaxCallFrameSize) 478 *NewMaxCallFrameSize = maxCallFrameSize; 479 480 // Include call frame size in total. 481 FrameSize += maxCallFrameSize; 482 483 // Make sure the frame is aligned. 484 FrameSize = (FrameSize + AlignMask) & ~AlignMask; 485 486 return FrameSize; 487 } 488 489 // hasFP - Return true if the specified function actually has a dedicated frame 490 // pointer register. 491 bool PPCFrameLowering::hasFP(const MachineFunction &MF) const { 492 const MachineFrameInfo &MFI = MF.getFrameInfo(); 493 // FIXME: This is pretty much broken by design: hasFP() might be called really 494 // early, before the stack layout was calculated and thus hasFP() might return 495 // true or false here depending on the time of call. 496 return (MFI.getStackSize()) && needsFP(MF); 497 } 498 499 // needsFP - Return true if the specified function should have a dedicated frame 500 // pointer register. This is true if the function has variable sized allocas or 501 // if frame pointer elimination is disabled. 502 bool PPCFrameLowering::needsFP(const MachineFunction &MF) const { 503 const MachineFrameInfo &MFI = MF.getFrameInfo(); 504 505 // Naked functions have no stack frame pushed, so we don't have a frame 506 // pointer. 507 if (MF.getFunction().hasFnAttribute(Attribute::Naked)) 508 return false; 509 510 return MF.getTarget().Options.DisableFramePointerElim(MF) || 511 MFI.hasVarSizedObjects() || MFI.hasStackMap() || MFI.hasPatchPoint() || 512 (MF.getTarget().Options.GuaranteedTailCallOpt && 513 MF.getInfo<PPCFunctionInfo>()->hasFastCall()); 514 } 515 516 void PPCFrameLowering::replaceFPWithRealFP(MachineFunction &MF) const { 517 bool is31 = needsFP(MF); 518 unsigned FPReg = is31 ? PPC::R31 : PPC::R1; 519 unsigned FP8Reg = is31 ? PPC::X31 : PPC::X1; 520 521 const PPCRegisterInfo *RegInfo = Subtarget.getRegisterInfo(); 522 bool HasBP = RegInfo->hasBasePointer(MF); 523 unsigned BPReg = HasBP ? (unsigned) RegInfo->getBaseRegister(MF) : FPReg; 524 unsigned BP8Reg = HasBP ? (unsigned) PPC::X30 : FP8Reg; 525 526 for (MachineFunction::iterator BI = MF.begin(), BE = MF.end(); 527 BI != BE; ++BI) 528 for (MachineBasicBlock::iterator MBBI = BI->end(); MBBI != BI->begin(); ) { 529 --MBBI; 530 for (unsigned I = 0, E = MBBI->getNumOperands(); I != E; ++I) { 531 MachineOperand &MO = MBBI->getOperand(I); 532 if (!MO.isReg()) 533 continue; 534 535 switch (MO.getReg()) { 536 case PPC::FP: 537 MO.setReg(FPReg); 538 break; 539 case PPC::FP8: 540 MO.setReg(FP8Reg); 541 break; 542 case PPC::BP: 543 MO.setReg(BPReg); 544 break; 545 case PPC::BP8: 546 MO.setReg(BP8Reg); 547 break; 548 549 } 550 } 551 } 552 } 553 554 /* This function will do the following: 555 - If MBB is an entry or exit block, set SR1 and SR2 to R0 and R12 556 respectively (defaults recommended by the ABI) and return true 557 - If MBB is not an entry block, initialize the register scavenger and look 558 for available registers. 559 - If the defaults (R0/R12) are available, return true 560 - If TwoUniqueRegsRequired is set to true, it looks for two unique 561 registers. Otherwise, look for a single available register. 562 - If the required registers are found, set SR1 and SR2 and return true. 563 - If the required registers are not found, set SR2 or both SR1 and SR2 to 564 PPC::NoRegister and return false. 565 566 Note that if both SR1 and SR2 are valid parameters and TwoUniqueRegsRequired 567 is not set, this function will attempt to find two different registers, but 568 still return true if only one register is available (and set SR1 == SR2). 569 */ 570 bool 571 PPCFrameLowering::findScratchRegister(MachineBasicBlock *MBB, 572 bool UseAtEnd, 573 bool TwoUniqueRegsRequired, 574 unsigned *SR1, 575 unsigned *SR2) const { 576 RegScavenger RS; 577 unsigned R0 = Subtarget.isPPC64() ? PPC::X0 : PPC::R0; 578 unsigned R12 = Subtarget.isPPC64() ? PPC::X12 : PPC::R12; 579 580 // Set the defaults for the two scratch registers. 581 if (SR1) 582 *SR1 = R0; 583 584 if (SR2) { 585 assert (SR1 && "Asking for the second scratch register but not the first?"); 586 *SR2 = R12; 587 } 588 589 // If MBB is an entry or exit block, use R0 and R12 as the scratch registers. 590 if ((UseAtEnd && MBB->isReturnBlock()) || 591 (!UseAtEnd && (&MBB->getParent()->front() == MBB))) 592 return true; 593 594 RS.enterBasicBlock(*MBB); 595 596 if (UseAtEnd && !MBB->empty()) { 597 // The scratch register will be used at the end of the block, so must 598 // consider all registers used within the block 599 600 MachineBasicBlock::iterator MBBI = MBB->getFirstTerminator(); 601 // If no terminator, back iterator up to previous instruction. 602 if (MBBI == MBB->end()) 603 MBBI = std::prev(MBBI); 604 605 if (MBBI != MBB->begin()) 606 RS.forward(MBBI); 607 } 608 609 // If the two registers are available, we're all good. 610 // Note that we only return here if both R0 and R12 are available because 611 // although the function may not require two unique registers, it may benefit 612 // from having two so we should try to provide them. 613 if (!RS.isRegUsed(R0) && !RS.isRegUsed(R12)) 614 return true; 615 616 // Get the list of callee-saved registers for the target. 617 const PPCRegisterInfo *RegInfo = Subtarget.getRegisterInfo(); 618 const MCPhysReg *CSRegs = RegInfo->getCalleeSavedRegs(MBB->getParent()); 619 620 // Get all the available registers in the block. 621 BitVector BV = RS.getRegsAvailable(Subtarget.isPPC64() ? &PPC::G8RCRegClass : 622 &PPC::GPRCRegClass); 623 624 // We shouldn't use callee-saved registers as scratch registers as they may be 625 // available when looking for a candidate block for shrink wrapping but not 626 // available when the actual prologue/epilogue is being emitted because they 627 // were added as live-in to the prologue block by PrologueEpilogueInserter. 628 for (int i = 0; CSRegs[i]; ++i) 629 BV.reset(CSRegs[i]); 630 631 // Set the first scratch register to the first available one. 632 if (SR1) { 633 int FirstScratchReg = BV.find_first(); 634 *SR1 = FirstScratchReg == -1 ? (unsigned)PPC::NoRegister : FirstScratchReg; 635 } 636 637 // If there is another one available, set the second scratch register to that. 638 // Otherwise, set it to either PPC::NoRegister if this function requires two 639 // or to whatever SR1 is set to if this function doesn't require two. 640 if (SR2) { 641 int SecondScratchReg = BV.find_next(*SR1); 642 if (SecondScratchReg != -1) 643 *SR2 = SecondScratchReg; 644 else 645 *SR2 = TwoUniqueRegsRequired ? (unsigned)PPC::NoRegister : *SR1; 646 } 647 648 // Now that we've done our best to provide both registers, double check 649 // whether we were unable to provide enough. 650 if (BV.count() < (TwoUniqueRegsRequired ? 2U : 1U)) 651 return false; 652 653 return true; 654 } 655 656 // We need a scratch register for spilling LR and for spilling CR. By default, 657 // we use two scratch registers to hide latency. However, if only one scratch 658 // register is available, we can adjust for that by not overlapping the spill 659 // code. However, if we need to realign the stack (i.e. have a base pointer) 660 // and the stack frame is large, we need two scratch registers. 661 bool 662 PPCFrameLowering::twoUniqueScratchRegsRequired(MachineBasicBlock *MBB) const { 663 const PPCRegisterInfo *RegInfo = Subtarget.getRegisterInfo(); 664 MachineFunction &MF = *(MBB->getParent()); 665 bool HasBP = RegInfo->hasBasePointer(MF); 666 unsigned FrameSize = determineFrameLayout(MF); 667 int NegFrameSize = -FrameSize; 668 bool IsLargeFrame = !isInt<16>(NegFrameSize); 669 MachineFrameInfo &MFI = MF.getFrameInfo(); 670 unsigned MaxAlign = MFI.getMaxAlignment(); 671 bool HasRedZone = Subtarget.isPPC64() || !Subtarget.isSVR4ABI(); 672 673 return (IsLargeFrame || !HasRedZone) && HasBP && MaxAlign > 1; 674 } 675 676 bool PPCFrameLowering::canUseAsPrologue(const MachineBasicBlock &MBB) const { 677 MachineBasicBlock *TmpMBB = const_cast<MachineBasicBlock *>(&MBB); 678 679 return findScratchRegister(TmpMBB, false, 680 twoUniqueScratchRegsRequired(TmpMBB)); 681 } 682 683 bool PPCFrameLowering::canUseAsEpilogue(const MachineBasicBlock &MBB) const { 684 MachineBasicBlock *TmpMBB = const_cast<MachineBasicBlock *>(&MBB); 685 686 return findScratchRegister(TmpMBB, true); 687 } 688 689 bool PPCFrameLowering::stackUpdateCanBeMoved(MachineFunction &MF) const { 690 const PPCRegisterInfo *RegInfo = Subtarget.getRegisterInfo(); 691 PPCFunctionInfo *FI = MF.getInfo<PPCFunctionInfo>(); 692 693 // Abort if there is no register info or function info. 694 if (!RegInfo || !FI) 695 return false; 696 697 // Only move the stack update on ELFv2 ABI and PPC64. 698 if (!Subtarget.isELFv2ABI() || !Subtarget.isPPC64()) 699 return false; 700 701 // Check the frame size first and return false if it does not fit the 702 // requirements. 703 // We need a non-zero frame size as well as a frame that will fit in the red 704 // zone. This is because by moving the stack pointer update we are now storing 705 // to the red zone until the stack pointer is updated. If we get an interrupt 706 // inside the prologue but before the stack update we now have a number of 707 // stores to the red zone and those stores must all fit. 708 MachineFrameInfo &MFI = MF.getFrameInfo(); 709 unsigned FrameSize = MFI.getStackSize(); 710 if (!FrameSize || FrameSize > Subtarget.getRedZoneSize()) 711 return false; 712 713 // Frame pointers and base pointers complicate matters so don't do anything 714 // if we have them. For example having a frame pointer will sometimes require 715 // a copy of r1 into r31 and that makes keeping track of updates to r1 more 716 // difficult. 717 if (hasFP(MF) || RegInfo->hasBasePointer(MF)) 718 return false; 719 720 // Calls to fast_cc functions use different rules for passing parameters on 721 // the stack from the ABI and using PIC base in the function imposes 722 // similar restrictions to using the base pointer. It is not generally safe 723 // to move the stack pointer update in these situations. 724 if (FI->hasFastCall() || FI->usesPICBase()) 725 return false; 726 727 // Finally we can move the stack update if we do not require register 728 // scavenging. Register scavenging can introduce more spills and so 729 // may make the frame size larger than we have computed. 730 return !RegInfo->requiresFrameIndexScavenging(MF); 731 } 732 733 void PPCFrameLowering::emitPrologue(MachineFunction &MF, 734 MachineBasicBlock &MBB) const { 735 MachineBasicBlock::iterator MBBI = MBB.begin(); 736 MachineFrameInfo &MFI = MF.getFrameInfo(); 737 const PPCInstrInfo &TII = *Subtarget.getInstrInfo(); 738 const PPCRegisterInfo *RegInfo = Subtarget.getRegisterInfo(); 739 740 MachineModuleInfo &MMI = MF.getMMI(); 741 const MCRegisterInfo *MRI = MMI.getContext().getRegisterInfo(); 742 DebugLoc dl; 743 // AIX assembler does not support cfi directives. 744 const bool needsCFI = MF.needsFrameMoves() && !Subtarget.isAIXABI(); 745 746 // Get processor type. 747 bool isPPC64 = Subtarget.isPPC64(); 748 // Get the ABI. 749 bool isSVR4ABI = Subtarget.isSVR4ABI(); 750 bool isAIXABI = Subtarget.isAIXABI(); 751 bool isELFv2ABI = Subtarget.isELFv2ABI(); 752 assert((isSVR4ABI || isAIXABI) && "Unsupported PPC ABI."); 753 754 // Scan the prolog, looking for an UPDATE_VRSAVE instruction. If we find it, 755 // process it. 756 if (!isSVR4ABI) 757 for (unsigned i = 0; MBBI != MBB.end(); ++i, ++MBBI) { 758 if (MBBI->getOpcode() == PPC::UPDATE_VRSAVE) { 759 if (isAIXABI) 760 report_fatal_error("UPDATE_VRSAVE is unexpected on AIX."); 761 HandleVRSaveUpdate(*MBBI, TII); 762 break; 763 } 764 } 765 766 // Move MBBI back to the beginning of the prologue block. 767 MBBI = MBB.begin(); 768 769 // Work out frame sizes. 770 unsigned FrameSize = determineFrameLayoutAndUpdate(MF); 771 int NegFrameSize = -FrameSize; 772 if (!isInt<32>(NegFrameSize)) 773 llvm_unreachable("Unhandled stack size!"); 774 775 if (MFI.isFrameAddressTaken()) 776 replaceFPWithRealFP(MF); 777 778 // Check if the link register (LR) must be saved. 779 PPCFunctionInfo *FI = MF.getInfo<PPCFunctionInfo>(); 780 bool MustSaveLR = FI->mustSaveLR(); 781 bool MustSaveTOC = FI->mustSaveTOC(); 782 const SmallVectorImpl<unsigned> &MustSaveCRs = FI->getMustSaveCRs(); 783 bool MustSaveCR = !MustSaveCRs.empty(); 784 // Do we have a frame pointer and/or base pointer for this function? 785 bool HasFP = hasFP(MF); 786 bool HasBP = RegInfo->hasBasePointer(MF); 787 bool HasRedZone = isPPC64 || !isSVR4ABI; 788 789 unsigned SPReg = isPPC64 ? PPC::X1 : PPC::R1; 790 Register BPReg = RegInfo->getBaseRegister(MF); 791 unsigned FPReg = isPPC64 ? PPC::X31 : PPC::R31; 792 unsigned LRReg = isPPC64 ? PPC::LR8 : PPC::LR; 793 unsigned TOCReg = isPPC64 ? PPC::X2 : PPC::R2; 794 unsigned ScratchReg = 0; 795 unsigned TempReg = isPPC64 ? PPC::X12 : PPC::R12; // another scratch reg 796 // ...(R12/X12 is volatile in both Darwin & SVR4, & can't be a function arg.) 797 const MCInstrDesc& MFLRInst = TII.get(isPPC64 ? PPC::MFLR8 798 : PPC::MFLR ); 799 const MCInstrDesc& StoreInst = TII.get(isPPC64 ? PPC::STD 800 : PPC::STW ); 801 const MCInstrDesc& StoreUpdtInst = TII.get(isPPC64 ? PPC::STDU 802 : PPC::STWU ); 803 const MCInstrDesc& StoreUpdtIdxInst = TII.get(isPPC64 ? PPC::STDUX 804 : PPC::STWUX); 805 const MCInstrDesc& LoadImmShiftedInst = TII.get(isPPC64 ? PPC::LIS8 806 : PPC::LIS ); 807 const MCInstrDesc& OrImmInst = TII.get(isPPC64 ? PPC::ORI8 808 : PPC::ORI ); 809 const MCInstrDesc& OrInst = TII.get(isPPC64 ? PPC::OR8 810 : PPC::OR ); 811 const MCInstrDesc& SubtractCarryingInst = TII.get(isPPC64 ? PPC::SUBFC8 812 : PPC::SUBFC); 813 const MCInstrDesc& SubtractImmCarryingInst = TII.get(isPPC64 ? PPC::SUBFIC8 814 : PPC::SUBFIC); 815 const MCInstrDesc &MoveFromCondRegInst = TII.get(isPPC64 ? PPC::MFCR8 816 : PPC::MFCR); 817 const MCInstrDesc &StoreWordInst = TII.get(isPPC64 ? PPC::STW8 : PPC::STW); 818 819 // Regarding this assert: Even though LR is saved in the caller's frame (i.e., 820 // LROffset is positive), that slot is callee-owned. Because PPC32 SVR4 has no 821 // Red Zone, an asynchronous event (a form of "callee") could claim a frame & 822 // overwrite it, so PPC32 SVR4 must claim at least a minimal frame to save LR. 823 assert((isPPC64 || !isSVR4ABI || !(!FrameSize && (MustSaveLR || HasFP))) && 824 "FrameSize must be >0 to save/restore the FP or LR for 32-bit SVR4."); 825 826 // Using the same bool variable as below to suppress compiler warnings. 827 bool SingleScratchReg = 828 findScratchRegister(&MBB, false, twoUniqueScratchRegsRequired(&MBB), 829 &ScratchReg, &TempReg); 830 assert(SingleScratchReg && 831 "Required number of registers not available in this block"); 832 833 SingleScratchReg = ScratchReg == TempReg; 834 835 int LROffset = getReturnSaveOffset(); 836 837 int FPOffset = 0; 838 if (HasFP) { 839 if (isSVR4ABI) { 840 MachineFrameInfo &MFI = MF.getFrameInfo(); 841 int FPIndex = FI->getFramePointerSaveIndex(); 842 assert(FPIndex && "No Frame Pointer Save Slot!"); 843 FPOffset = MFI.getObjectOffset(FPIndex); 844 } else { 845 FPOffset = getFramePointerSaveOffset(); 846 } 847 } 848 849 int BPOffset = 0; 850 if (HasBP) { 851 if (isSVR4ABI) { 852 MachineFrameInfo &MFI = MF.getFrameInfo(); 853 int BPIndex = FI->getBasePointerSaveIndex(); 854 assert(BPIndex && "No Base Pointer Save Slot!"); 855 BPOffset = MFI.getObjectOffset(BPIndex); 856 } else { 857 BPOffset = getBasePointerSaveOffset(); 858 } 859 } 860 861 int PBPOffset = 0; 862 if (FI->usesPICBase()) { 863 MachineFrameInfo &MFI = MF.getFrameInfo(); 864 int PBPIndex = FI->getPICBasePointerSaveIndex(); 865 assert(PBPIndex && "No PIC Base Pointer Save Slot!"); 866 PBPOffset = MFI.getObjectOffset(PBPIndex); 867 } 868 869 // Get stack alignments. 870 unsigned MaxAlign = MFI.getMaxAlignment(); 871 if (HasBP && MaxAlign > 1) 872 assert(isPowerOf2_32(MaxAlign) && isInt<16>(MaxAlign) && 873 "Invalid alignment!"); 874 875 // Frames of 32KB & larger require special handling because they cannot be 876 // indexed into with a simple STDU/STWU/STD/STW immediate offset operand. 877 bool isLargeFrame = !isInt<16>(NegFrameSize); 878 879 // Check if we can move the stack update instruction (stdu) down the prologue 880 // past the callee saves. Hopefully this will avoid the situation where the 881 // saves are waiting for the update on the store with update to complete. 882 MachineBasicBlock::iterator StackUpdateLoc = MBBI; 883 bool MovingStackUpdateDown = false; 884 885 // Check if we can move the stack update. 886 if (stackUpdateCanBeMoved(MF)) { 887 const std::vector<CalleeSavedInfo> &Info = MFI.getCalleeSavedInfo(); 888 for (CalleeSavedInfo CSI : Info) { 889 int FrIdx = CSI.getFrameIdx(); 890 // If the frame index is not negative the callee saved info belongs to a 891 // stack object that is not a fixed stack object. We ignore non-fixed 892 // stack objects because we won't move the stack update pointer past them. 893 if (FrIdx >= 0) 894 continue; 895 896 if (MFI.isFixedObjectIndex(FrIdx) && MFI.getObjectOffset(FrIdx) < 0) { 897 StackUpdateLoc++; 898 MovingStackUpdateDown = true; 899 } else { 900 // We need all of the Frame Indices to meet these conditions. 901 // If they do not, abort the whole operation. 902 StackUpdateLoc = MBBI; 903 MovingStackUpdateDown = false; 904 break; 905 } 906 } 907 908 // If the operation was not aborted then update the object offset. 909 if (MovingStackUpdateDown) { 910 for (CalleeSavedInfo CSI : Info) { 911 int FrIdx = CSI.getFrameIdx(); 912 if (FrIdx < 0) 913 MFI.setObjectOffset(FrIdx, MFI.getObjectOffset(FrIdx) + NegFrameSize); 914 } 915 } 916 } 917 918 // Where in the prologue we move the CR fields depends on how many scratch 919 // registers we have, and if we need to save the link register or not. This 920 // lambda is to avoid duplicating the logic in 2 places. 921 auto BuildMoveFromCR = [&]() { 922 if (isELFv2ABI && MustSaveCRs.size() == 1) { 923 // In the ELFv2 ABI, we are not required to save all CR fields. 924 // If only one CR field is clobbered, it is more efficient to use 925 // mfocrf to selectively save just that field, because mfocrf has short 926 // latency compares to mfcr. 927 assert(isPPC64 && "V2 ABI is 64-bit only."); 928 MachineInstrBuilder MIB = 929 BuildMI(MBB, MBBI, dl, TII.get(PPC::MFOCRF8), TempReg); 930 MIB.addReg(MustSaveCRs[0], RegState::Kill); 931 } else { 932 MachineInstrBuilder MIB = 933 BuildMI(MBB, MBBI, dl, MoveFromCondRegInst, TempReg); 934 for (unsigned CRfield : MustSaveCRs) 935 MIB.addReg(CRfield, RegState::ImplicitKill); 936 } 937 }; 938 939 // If we need to spill the CR and the LR but we don't have two separate 940 // registers available, we must spill them one at a time 941 if (MustSaveCR && SingleScratchReg && MustSaveLR) { 942 BuildMoveFromCR(); 943 BuildMI(MBB, MBBI, dl, StoreWordInst) 944 .addReg(TempReg, getKillRegState(true)) 945 .addImm(CRSaveOffset) 946 .addReg(SPReg); 947 } 948 949 if (MustSaveLR) 950 BuildMI(MBB, MBBI, dl, MFLRInst, ScratchReg); 951 952 if (MustSaveCR && !(SingleScratchReg && MustSaveLR)) 953 BuildMoveFromCR(); 954 955 if (HasRedZone) { 956 if (HasFP) 957 BuildMI(MBB, MBBI, dl, StoreInst) 958 .addReg(FPReg) 959 .addImm(FPOffset) 960 .addReg(SPReg); 961 if (FI->usesPICBase()) 962 BuildMI(MBB, MBBI, dl, StoreInst) 963 .addReg(PPC::R30) 964 .addImm(PBPOffset) 965 .addReg(SPReg); 966 if (HasBP) 967 BuildMI(MBB, MBBI, dl, StoreInst) 968 .addReg(BPReg) 969 .addImm(BPOffset) 970 .addReg(SPReg); 971 } 972 973 if (MustSaveLR) 974 BuildMI(MBB, StackUpdateLoc, dl, StoreInst) 975 .addReg(ScratchReg, getKillRegState(true)) 976 .addImm(LROffset) 977 .addReg(SPReg); 978 979 if (MustSaveCR && 980 !(SingleScratchReg && MustSaveLR)) { 981 assert(HasRedZone && "A red zone is always available on PPC64"); 982 BuildMI(MBB, MBBI, dl, StoreWordInst) 983 .addReg(TempReg, getKillRegState(true)) 984 .addImm(CRSaveOffset) 985 .addReg(SPReg); 986 } 987 988 // Skip the rest if this is a leaf function & all spills fit in the Red Zone. 989 if (!FrameSize) 990 return; 991 992 // Adjust stack pointer: r1 += NegFrameSize. 993 // If there is a preferred stack alignment, align R1 now 994 995 if (HasBP && HasRedZone) { 996 // Save a copy of r1 as the base pointer. 997 BuildMI(MBB, MBBI, dl, OrInst, BPReg) 998 .addReg(SPReg) 999 .addReg(SPReg); 1000 } 1001 1002 // Have we generated a STUX instruction to claim stack frame? If so, 1003 // the negated frame size will be placed in ScratchReg. 1004 bool HasSTUX = false; 1005 1006 // This condition must be kept in sync with canUseAsPrologue. 1007 if (HasBP && MaxAlign > 1) { 1008 if (isPPC64) 1009 BuildMI(MBB, MBBI, dl, TII.get(PPC::RLDICL), ScratchReg) 1010 .addReg(SPReg) 1011 .addImm(0) 1012 .addImm(64 - Log2_32(MaxAlign)); 1013 else // PPC32... 1014 BuildMI(MBB, MBBI, dl, TII.get(PPC::RLWINM), ScratchReg) 1015 .addReg(SPReg) 1016 .addImm(0) 1017 .addImm(32 - Log2_32(MaxAlign)) 1018 .addImm(31); 1019 if (!isLargeFrame) { 1020 BuildMI(MBB, MBBI, dl, SubtractImmCarryingInst, ScratchReg) 1021 .addReg(ScratchReg, RegState::Kill) 1022 .addImm(NegFrameSize); 1023 } else { 1024 assert(!SingleScratchReg && "Only a single scratch reg available"); 1025 BuildMI(MBB, MBBI, dl, LoadImmShiftedInst, TempReg) 1026 .addImm(NegFrameSize >> 16); 1027 BuildMI(MBB, MBBI, dl, OrImmInst, TempReg) 1028 .addReg(TempReg, RegState::Kill) 1029 .addImm(NegFrameSize & 0xFFFF); 1030 BuildMI(MBB, MBBI, dl, SubtractCarryingInst, ScratchReg) 1031 .addReg(ScratchReg, RegState::Kill) 1032 .addReg(TempReg, RegState::Kill); 1033 } 1034 1035 BuildMI(MBB, MBBI, dl, StoreUpdtIdxInst, SPReg) 1036 .addReg(SPReg, RegState::Kill) 1037 .addReg(SPReg) 1038 .addReg(ScratchReg); 1039 HasSTUX = true; 1040 1041 } else if (!isLargeFrame) { 1042 BuildMI(MBB, StackUpdateLoc, dl, StoreUpdtInst, SPReg) 1043 .addReg(SPReg) 1044 .addImm(NegFrameSize) 1045 .addReg(SPReg); 1046 1047 } else { 1048 BuildMI(MBB, MBBI, dl, LoadImmShiftedInst, ScratchReg) 1049 .addImm(NegFrameSize >> 16); 1050 BuildMI(MBB, MBBI, dl, OrImmInst, ScratchReg) 1051 .addReg(ScratchReg, RegState::Kill) 1052 .addImm(NegFrameSize & 0xFFFF); 1053 BuildMI(MBB, MBBI, dl, StoreUpdtIdxInst, SPReg) 1054 .addReg(SPReg, RegState::Kill) 1055 .addReg(SPReg) 1056 .addReg(ScratchReg); 1057 HasSTUX = true; 1058 } 1059 1060 // Save the TOC register after the stack pointer update if a prologue TOC 1061 // save is required for the function. 1062 if (MustSaveTOC) { 1063 assert(isELFv2ABI && "TOC saves in the prologue only supported on ELFv2"); 1064 BuildMI(MBB, StackUpdateLoc, dl, TII.get(PPC::STD)) 1065 .addReg(TOCReg, getKillRegState(true)) 1066 .addImm(TOCSaveOffset) 1067 .addReg(SPReg); 1068 } 1069 1070 if (!HasRedZone) { 1071 assert(!isPPC64 && "A red zone is always available on PPC64"); 1072 if (HasSTUX) { 1073 // The negated frame size is in ScratchReg, and the SPReg has been 1074 // decremented by the frame size: SPReg = old SPReg + ScratchReg. 1075 // Since FPOffset, PBPOffset, etc. are relative to the beginning of 1076 // the stack frame (i.e. the old SP), ideally, we would put the old 1077 // SP into a register and use it as the base for the stores. The 1078 // problem is that the only available register may be ScratchReg, 1079 // which could be R0, and R0 cannot be used as a base address. 1080 1081 // First, set ScratchReg to the old SP. This may need to be modified 1082 // later. 1083 BuildMI(MBB, MBBI, dl, TII.get(PPC::SUBF), ScratchReg) 1084 .addReg(ScratchReg, RegState::Kill) 1085 .addReg(SPReg); 1086 1087 if (ScratchReg == PPC::R0) { 1088 // R0 cannot be used as a base register, but it can be used as an 1089 // index in a store-indexed. 1090 int LastOffset = 0; 1091 if (HasFP) { 1092 // R0 += (FPOffset-LastOffset). 1093 // Need addic, since addi treats R0 as 0. 1094 BuildMI(MBB, MBBI, dl, TII.get(PPC::ADDIC), ScratchReg) 1095 .addReg(ScratchReg) 1096 .addImm(FPOffset-LastOffset); 1097 LastOffset = FPOffset; 1098 // Store FP into *R0. 1099 BuildMI(MBB, MBBI, dl, TII.get(PPC::STWX)) 1100 .addReg(FPReg, RegState::Kill) // Save FP. 1101 .addReg(PPC::ZERO) 1102 .addReg(ScratchReg); // This will be the index (R0 is ok here). 1103 } 1104 if (FI->usesPICBase()) { 1105 // R0 += (PBPOffset-LastOffset). 1106 BuildMI(MBB, MBBI, dl, TII.get(PPC::ADDIC), ScratchReg) 1107 .addReg(ScratchReg) 1108 .addImm(PBPOffset-LastOffset); 1109 LastOffset = PBPOffset; 1110 BuildMI(MBB, MBBI, dl, TII.get(PPC::STWX)) 1111 .addReg(PPC::R30, RegState::Kill) // Save PIC base pointer. 1112 .addReg(PPC::ZERO) 1113 .addReg(ScratchReg); // This will be the index (R0 is ok here). 1114 } 1115 if (HasBP) { 1116 // R0 += (BPOffset-LastOffset). 1117 BuildMI(MBB, MBBI, dl, TII.get(PPC::ADDIC), ScratchReg) 1118 .addReg(ScratchReg) 1119 .addImm(BPOffset-LastOffset); 1120 LastOffset = BPOffset; 1121 BuildMI(MBB, MBBI, dl, TII.get(PPC::STWX)) 1122 .addReg(BPReg, RegState::Kill) // Save BP. 1123 .addReg(PPC::ZERO) 1124 .addReg(ScratchReg); // This will be the index (R0 is ok here). 1125 // BP = R0-LastOffset 1126 BuildMI(MBB, MBBI, dl, TII.get(PPC::ADDIC), BPReg) 1127 .addReg(ScratchReg, RegState::Kill) 1128 .addImm(-LastOffset); 1129 } 1130 } else { 1131 // ScratchReg is not R0, so use it as the base register. It is 1132 // already set to the old SP, so we can use the offsets directly. 1133 1134 // Now that the stack frame has been allocated, save all the necessary 1135 // registers using ScratchReg as the base address. 1136 if (HasFP) 1137 BuildMI(MBB, MBBI, dl, StoreInst) 1138 .addReg(FPReg) 1139 .addImm(FPOffset) 1140 .addReg(ScratchReg); 1141 if (FI->usesPICBase()) 1142 BuildMI(MBB, MBBI, dl, StoreInst) 1143 .addReg(PPC::R30) 1144 .addImm(PBPOffset) 1145 .addReg(ScratchReg); 1146 if (HasBP) { 1147 BuildMI(MBB, MBBI, dl, StoreInst) 1148 .addReg(BPReg) 1149 .addImm(BPOffset) 1150 .addReg(ScratchReg); 1151 BuildMI(MBB, MBBI, dl, OrInst, BPReg) 1152 .addReg(ScratchReg, RegState::Kill) 1153 .addReg(ScratchReg); 1154 } 1155 } 1156 } else { 1157 // The frame size is a known 16-bit constant (fitting in the immediate 1158 // field of STWU). To be here we have to be compiling for PPC32. 1159 // Since the SPReg has been decreased by FrameSize, add it back to each 1160 // offset. 1161 if (HasFP) 1162 BuildMI(MBB, MBBI, dl, StoreInst) 1163 .addReg(FPReg) 1164 .addImm(FrameSize + FPOffset) 1165 .addReg(SPReg); 1166 if (FI->usesPICBase()) 1167 BuildMI(MBB, MBBI, dl, StoreInst) 1168 .addReg(PPC::R30) 1169 .addImm(FrameSize + PBPOffset) 1170 .addReg(SPReg); 1171 if (HasBP) { 1172 BuildMI(MBB, MBBI, dl, StoreInst) 1173 .addReg(BPReg) 1174 .addImm(FrameSize + BPOffset) 1175 .addReg(SPReg); 1176 BuildMI(MBB, MBBI, dl, TII.get(PPC::ADDI), BPReg) 1177 .addReg(SPReg) 1178 .addImm(FrameSize); 1179 } 1180 } 1181 } 1182 1183 // Add Call Frame Information for the instructions we generated above. 1184 if (needsCFI) { 1185 unsigned CFIIndex; 1186 1187 if (HasBP) { 1188 // Define CFA in terms of BP. Do this in preference to using FP/SP, 1189 // because if the stack needed aligning then CFA won't be at a fixed 1190 // offset from FP/SP. 1191 unsigned Reg = MRI->getDwarfRegNum(BPReg, true); 1192 CFIIndex = MF.addFrameInst( 1193 MCCFIInstruction::createDefCfaRegister(nullptr, Reg)); 1194 } else { 1195 // Adjust the definition of CFA to account for the change in SP. 1196 assert(NegFrameSize); 1197 CFIIndex = MF.addFrameInst( 1198 MCCFIInstruction::createDefCfaOffset(nullptr, NegFrameSize)); 1199 } 1200 BuildMI(MBB, MBBI, dl, TII.get(TargetOpcode::CFI_INSTRUCTION)) 1201 .addCFIIndex(CFIIndex); 1202 1203 if (HasFP) { 1204 // Describe where FP was saved, at a fixed offset from CFA. 1205 unsigned Reg = MRI->getDwarfRegNum(FPReg, true); 1206 CFIIndex = MF.addFrameInst( 1207 MCCFIInstruction::createOffset(nullptr, Reg, FPOffset)); 1208 BuildMI(MBB, MBBI, dl, TII.get(TargetOpcode::CFI_INSTRUCTION)) 1209 .addCFIIndex(CFIIndex); 1210 } 1211 1212 if (FI->usesPICBase()) { 1213 // Describe where FP was saved, at a fixed offset from CFA. 1214 unsigned Reg = MRI->getDwarfRegNum(PPC::R30, true); 1215 CFIIndex = MF.addFrameInst( 1216 MCCFIInstruction::createOffset(nullptr, Reg, PBPOffset)); 1217 BuildMI(MBB, MBBI, dl, TII.get(TargetOpcode::CFI_INSTRUCTION)) 1218 .addCFIIndex(CFIIndex); 1219 } 1220 1221 if (HasBP) { 1222 // Describe where BP was saved, at a fixed offset from CFA. 1223 unsigned Reg = MRI->getDwarfRegNum(BPReg, true); 1224 CFIIndex = MF.addFrameInst( 1225 MCCFIInstruction::createOffset(nullptr, Reg, BPOffset)); 1226 BuildMI(MBB, MBBI, dl, TII.get(TargetOpcode::CFI_INSTRUCTION)) 1227 .addCFIIndex(CFIIndex); 1228 } 1229 1230 if (MustSaveLR) { 1231 // Describe where LR was saved, at a fixed offset from CFA. 1232 unsigned Reg = MRI->getDwarfRegNum(LRReg, true); 1233 CFIIndex = MF.addFrameInst( 1234 MCCFIInstruction::createOffset(nullptr, Reg, LROffset)); 1235 BuildMI(MBB, MBBI, dl, TII.get(TargetOpcode::CFI_INSTRUCTION)) 1236 .addCFIIndex(CFIIndex); 1237 } 1238 } 1239 1240 // If there is a frame pointer, copy R1 into R31 1241 if (HasFP) { 1242 BuildMI(MBB, MBBI, dl, OrInst, FPReg) 1243 .addReg(SPReg) 1244 .addReg(SPReg); 1245 1246 if (!HasBP && needsCFI) { 1247 // Change the definition of CFA from SP+offset to FP+offset, because SP 1248 // will change at every alloca. 1249 unsigned Reg = MRI->getDwarfRegNum(FPReg, true); 1250 unsigned CFIIndex = MF.addFrameInst( 1251 MCCFIInstruction::createDefCfaRegister(nullptr, Reg)); 1252 1253 BuildMI(MBB, MBBI, dl, TII.get(TargetOpcode::CFI_INSTRUCTION)) 1254 .addCFIIndex(CFIIndex); 1255 } 1256 } 1257 1258 if (needsCFI) { 1259 // Describe where callee saved registers were saved, at fixed offsets from 1260 // CFA. 1261 const std::vector<CalleeSavedInfo> &CSI = MFI.getCalleeSavedInfo(); 1262 for (unsigned I = 0, E = CSI.size(); I != E; ++I) { 1263 unsigned Reg = CSI[I].getReg(); 1264 if (Reg == PPC::LR || Reg == PPC::LR8 || Reg == PPC::RM) continue; 1265 1266 // This is a bit of a hack: CR2LT, CR2GT, CR2EQ and CR2UN are just 1267 // subregisters of CR2. We just need to emit a move of CR2. 1268 if (PPC::CRBITRCRegClass.contains(Reg)) 1269 continue; 1270 1271 if ((Reg == PPC::X2 || Reg == PPC::R2) && MustSaveTOC) 1272 continue; 1273 1274 // For SVR4, don't emit a move for the CR spill slot if we haven't 1275 // spilled CRs. 1276 if (isSVR4ABI && (PPC::CR2 <= Reg && Reg <= PPC::CR4) 1277 && !MustSaveCR) 1278 continue; 1279 1280 // For 64-bit SVR4 when we have spilled CRs, the spill location 1281 // is SP+8, not a frame-relative slot. 1282 if (isSVR4ABI && isPPC64 && (PPC::CR2 <= Reg && Reg <= PPC::CR4)) { 1283 // In the ELFv1 ABI, only CR2 is noted in CFI and stands in for 1284 // the whole CR word. In the ELFv2 ABI, every CR that was 1285 // actually saved gets its own CFI record. 1286 unsigned CRReg = isELFv2ABI? Reg : (unsigned) PPC::CR2; 1287 unsigned CFIIndex = MF.addFrameInst(MCCFIInstruction::createOffset( 1288 nullptr, MRI->getDwarfRegNum(CRReg, true), CRSaveOffset)); 1289 BuildMI(MBB, MBBI, dl, TII.get(TargetOpcode::CFI_INSTRUCTION)) 1290 .addCFIIndex(CFIIndex); 1291 continue; 1292 } 1293 1294 if (CSI[I].isSpilledToReg()) { 1295 unsigned SpilledReg = CSI[I].getDstReg(); 1296 unsigned CFIRegister = MF.addFrameInst(MCCFIInstruction::createRegister( 1297 nullptr, MRI->getDwarfRegNum(Reg, true), 1298 MRI->getDwarfRegNum(SpilledReg, true))); 1299 BuildMI(MBB, MBBI, dl, TII.get(TargetOpcode::CFI_INSTRUCTION)) 1300 .addCFIIndex(CFIRegister); 1301 } else { 1302 int Offset = MFI.getObjectOffset(CSI[I].getFrameIdx()); 1303 // We have changed the object offset above but we do not want to change 1304 // the actual offsets in the CFI instruction so we have to undo the 1305 // offset change here. 1306 if (MovingStackUpdateDown) 1307 Offset -= NegFrameSize; 1308 1309 unsigned CFIIndex = MF.addFrameInst(MCCFIInstruction::createOffset( 1310 nullptr, MRI->getDwarfRegNum(Reg, true), Offset)); 1311 BuildMI(MBB, MBBI, dl, TII.get(TargetOpcode::CFI_INSTRUCTION)) 1312 .addCFIIndex(CFIIndex); 1313 } 1314 } 1315 } 1316 } 1317 1318 void PPCFrameLowering::emitEpilogue(MachineFunction &MF, 1319 MachineBasicBlock &MBB) const { 1320 MachineBasicBlock::iterator MBBI = MBB.getFirstTerminator(); 1321 DebugLoc dl; 1322 1323 if (MBBI != MBB.end()) 1324 dl = MBBI->getDebugLoc(); 1325 1326 const PPCInstrInfo &TII = *Subtarget.getInstrInfo(); 1327 const PPCRegisterInfo *RegInfo = Subtarget.getRegisterInfo(); 1328 1329 // Get alignment info so we know how to restore the SP. 1330 const MachineFrameInfo &MFI = MF.getFrameInfo(); 1331 1332 // Get the number of bytes allocated from the FrameInfo. 1333 int FrameSize = MFI.getStackSize(); 1334 1335 // Get processor type. 1336 bool isPPC64 = Subtarget.isPPC64(); 1337 // Get the ABI. 1338 bool isSVR4ABI = Subtarget.isSVR4ABI(); 1339 1340 // Check if the link register (LR) has been saved. 1341 PPCFunctionInfo *FI = MF.getInfo<PPCFunctionInfo>(); 1342 bool MustSaveLR = FI->mustSaveLR(); 1343 const SmallVectorImpl<unsigned> &MustSaveCRs = FI->getMustSaveCRs(); 1344 bool MustSaveCR = !MustSaveCRs.empty(); 1345 // Do we have a frame pointer and/or base pointer for this function? 1346 bool HasFP = hasFP(MF); 1347 bool HasBP = RegInfo->hasBasePointer(MF); 1348 bool HasRedZone = Subtarget.isPPC64() || !Subtarget.isSVR4ABI(); 1349 1350 unsigned SPReg = isPPC64 ? PPC::X1 : PPC::R1; 1351 Register BPReg = RegInfo->getBaseRegister(MF); 1352 unsigned FPReg = isPPC64 ? PPC::X31 : PPC::R31; 1353 unsigned ScratchReg = 0; 1354 unsigned TempReg = isPPC64 ? PPC::X12 : PPC::R12; // another scratch reg 1355 const MCInstrDesc& MTLRInst = TII.get( isPPC64 ? PPC::MTLR8 1356 : PPC::MTLR ); 1357 const MCInstrDesc& LoadInst = TII.get( isPPC64 ? PPC::LD 1358 : PPC::LWZ ); 1359 const MCInstrDesc& LoadImmShiftedInst = TII.get( isPPC64 ? PPC::LIS8 1360 : PPC::LIS ); 1361 const MCInstrDesc& OrInst = TII.get(isPPC64 ? PPC::OR8 1362 : PPC::OR ); 1363 const MCInstrDesc& OrImmInst = TII.get( isPPC64 ? PPC::ORI8 1364 : PPC::ORI ); 1365 const MCInstrDesc& AddImmInst = TII.get( isPPC64 ? PPC::ADDI8 1366 : PPC::ADDI ); 1367 const MCInstrDesc& AddInst = TII.get( isPPC64 ? PPC::ADD8 1368 : PPC::ADD4 ); 1369 const MCInstrDesc& LoadWordInst = TII.get( isPPC64 ? PPC::LWZ8 1370 : PPC::LWZ); 1371 const MCInstrDesc& MoveToCRInst = TII.get( isPPC64 ? PPC::MTOCRF8 1372 : PPC::MTOCRF); 1373 int LROffset = getReturnSaveOffset(); 1374 1375 int FPOffset = 0; 1376 1377 // Using the same bool variable as below to suppress compiler warnings. 1378 bool SingleScratchReg = findScratchRegister(&MBB, true, false, &ScratchReg, 1379 &TempReg); 1380 assert(SingleScratchReg && 1381 "Could not find an available scratch register"); 1382 1383 SingleScratchReg = ScratchReg == TempReg; 1384 1385 if (HasFP) { 1386 if (isSVR4ABI) { 1387 int FPIndex = FI->getFramePointerSaveIndex(); 1388 assert(FPIndex && "No Frame Pointer Save Slot!"); 1389 FPOffset = MFI.getObjectOffset(FPIndex); 1390 } else { 1391 FPOffset = getFramePointerSaveOffset(); 1392 } 1393 } 1394 1395 int BPOffset = 0; 1396 if (HasBP) { 1397 if (isSVR4ABI) { 1398 int BPIndex = FI->getBasePointerSaveIndex(); 1399 assert(BPIndex && "No Base Pointer Save Slot!"); 1400 BPOffset = MFI.getObjectOffset(BPIndex); 1401 } else { 1402 BPOffset = getBasePointerSaveOffset(); 1403 } 1404 } 1405 1406 int PBPOffset = 0; 1407 if (FI->usesPICBase()) { 1408 int PBPIndex = FI->getPICBasePointerSaveIndex(); 1409 assert(PBPIndex && "No PIC Base Pointer Save Slot!"); 1410 PBPOffset = MFI.getObjectOffset(PBPIndex); 1411 } 1412 1413 bool IsReturnBlock = (MBBI != MBB.end() && MBBI->isReturn()); 1414 1415 if (IsReturnBlock) { 1416 unsigned RetOpcode = MBBI->getOpcode(); 1417 bool UsesTCRet = RetOpcode == PPC::TCRETURNri || 1418 RetOpcode == PPC::TCRETURNdi || 1419 RetOpcode == PPC::TCRETURNai || 1420 RetOpcode == PPC::TCRETURNri8 || 1421 RetOpcode == PPC::TCRETURNdi8 || 1422 RetOpcode == PPC::TCRETURNai8; 1423 1424 if (UsesTCRet) { 1425 int MaxTCRetDelta = FI->getTailCallSPDelta(); 1426 MachineOperand &StackAdjust = MBBI->getOperand(1); 1427 assert(StackAdjust.isImm() && "Expecting immediate value."); 1428 // Adjust stack pointer. 1429 int StackAdj = StackAdjust.getImm(); 1430 int Delta = StackAdj - MaxTCRetDelta; 1431 assert((Delta >= 0) && "Delta must be positive"); 1432 if (MaxTCRetDelta>0) 1433 FrameSize += (StackAdj +Delta); 1434 else 1435 FrameSize += StackAdj; 1436 } 1437 } 1438 1439 // Frames of 32KB & larger require special handling because they cannot be 1440 // indexed into with a simple LD/LWZ immediate offset operand. 1441 bool isLargeFrame = !isInt<16>(FrameSize); 1442 1443 // On targets without red zone, the SP needs to be restored last, so that 1444 // all live contents of the stack frame are upwards of the SP. This means 1445 // that we cannot restore SP just now, since there may be more registers 1446 // to restore from the stack frame (e.g. R31). If the frame size is not 1447 // a simple immediate value, we will need a spare register to hold the 1448 // restored SP. If the frame size is known and small, we can simply adjust 1449 // the offsets of the registers to be restored, and still use SP to restore 1450 // them. In such case, the final update of SP will be to add the frame 1451 // size to it. 1452 // To simplify the code, set RBReg to the base register used to restore 1453 // values from the stack, and set SPAdd to the value that needs to be added 1454 // to the SP at the end. The default values are as if red zone was present. 1455 unsigned RBReg = SPReg; 1456 unsigned SPAdd = 0; 1457 1458 // Check if we can move the stack update instruction up the epilogue 1459 // past the callee saves. This will allow the move to LR instruction 1460 // to be executed before the restores of the callee saves which means 1461 // that the callee saves can hide the latency from the MTLR instrcution. 1462 MachineBasicBlock::iterator StackUpdateLoc = MBBI; 1463 if (stackUpdateCanBeMoved(MF)) { 1464 const std::vector<CalleeSavedInfo> & Info = MFI.getCalleeSavedInfo(); 1465 for (CalleeSavedInfo CSI : Info) { 1466 int FrIdx = CSI.getFrameIdx(); 1467 // If the frame index is not negative the callee saved info belongs to a 1468 // stack object that is not a fixed stack object. We ignore non-fixed 1469 // stack objects because we won't move the update of the stack pointer 1470 // past them. 1471 if (FrIdx >= 0) 1472 continue; 1473 1474 if (MFI.isFixedObjectIndex(FrIdx) && MFI.getObjectOffset(FrIdx) < 0) 1475 StackUpdateLoc--; 1476 else { 1477 // Abort the operation as we can't update all CSR restores. 1478 StackUpdateLoc = MBBI; 1479 break; 1480 } 1481 } 1482 } 1483 1484 if (FrameSize) { 1485 // In the prologue, the loaded (or persistent) stack pointer value is 1486 // offset by the STDU/STDUX/STWU/STWUX instruction. For targets with red 1487 // zone add this offset back now. 1488 1489 // If this function contained a fastcc call and GuaranteedTailCallOpt is 1490 // enabled (=> hasFastCall()==true) the fastcc call might contain a tail 1491 // call which invalidates the stack pointer value in SP(0). So we use the 1492 // value of R31 in this case. 1493 if (FI->hasFastCall()) { 1494 assert(HasFP && "Expecting a valid frame pointer."); 1495 if (!HasRedZone) 1496 RBReg = FPReg; 1497 if (!isLargeFrame) { 1498 BuildMI(MBB, MBBI, dl, AddImmInst, RBReg) 1499 .addReg(FPReg).addImm(FrameSize); 1500 } else { 1501 BuildMI(MBB, MBBI, dl, LoadImmShiftedInst, ScratchReg) 1502 .addImm(FrameSize >> 16); 1503 BuildMI(MBB, MBBI, dl, OrImmInst, ScratchReg) 1504 .addReg(ScratchReg, RegState::Kill) 1505 .addImm(FrameSize & 0xFFFF); 1506 BuildMI(MBB, MBBI, dl, AddInst) 1507 .addReg(RBReg) 1508 .addReg(FPReg) 1509 .addReg(ScratchReg); 1510 } 1511 } else if (!isLargeFrame && !HasBP && !MFI.hasVarSizedObjects()) { 1512 if (HasRedZone) { 1513 BuildMI(MBB, StackUpdateLoc, dl, AddImmInst, SPReg) 1514 .addReg(SPReg) 1515 .addImm(FrameSize); 1516 } else { 1517 // Make sure that adding FrameSize will not overflow the max offset 1518 // size. 1519 assert(FPOffset <= 0 && BPOffset <= 0 && PBPOffset <= 0 && 1520 "Local offsets should be negative"); 1521 SPAdd = FrameSize; 1522 FPOffset += FrameSize; 1523 BPOffset += FrameSize; 1524 PBPOffset += FrameSize; 1525 } 1526 } else { 1527 // We don't want to use ScratchReg as a base register, because it 1528 // could happen to be R0. Use FP instead, but make sure to preserve it. 1529 if (!HasRedZone) { 1530 // If FP is not saved, copy it to ScratchReg. 1531 if (!HasFP) 1532 BuildMI(MBB, MBBI, dl, OrInst, ScratchReg) 1533 .addReg(FPReg) 1534 .addReg(FPReg); 1535 RBReg = FPReg; 1536 } 1537 BuildMI(MBB, StackUpdateLoc, dl, LoadInst, RBReg) 1538 .addImm(0) 1539 .addReg(SPReg); 1540 } 1541 } 1542 assert(RBReg != ScratchReg && "Should have avoided ScratchReg"); 1543 // If there is no red zone, ScratchReg may be needed for holding a useful 1544 // value (although not the base register). Make sure it is not overwritten 1545 // too early. 1546 1547 // If we need to restore both the LR and the CR and we only have one 1548 // available scratch register, we must do them one at a time. 1549 if (MustSaveCR && SingleScratchReg && MustSaveLR) { 1550 // Here TempReg == ScratchReg, and in the absence of red zone ScratchReg 1551 // is live here. 1552 assert(HasRedZone && "Expecting red zone"); 1553 BuildMI(MBB, MBBI, dl, LoadWordInst, TempReg) 1554 .addImm(CRSaveOffset) 1555 .addReg(SPReg); 1556 for (unsigned i = 0, e = MustSaveCRs.size(); i != e; ++i) 1557 BuildMI(MBB, MBBI, dl, MoveToCRInst, MustSaveCRs[i]) 1558 .addReg(TempReg, getKillRegState(i == e-1)); 1559 } 1560 1561 // Delay restoring of the LR if ScratchReg is needed. This is ok, since 1562 // LR is stored in the caller's stack frame. ScratchReg will be needed 1563 // if RBReg is anything other than SP. We shouldn't use ScratchReg as 1564 // a base register anyway, because it may happen to be R0. 1565 bool LoadedLR = false; 1566 if (MustSaveLR && RBReg == SPReg && isInt<16>(LROffset+SPAdd)) { 1567 BuildMI(MBB, StackUpdateLoc, dl, LoadInst, ScratchReg) 1568 .addImm(LROffset+SPAdd) 1569 .addReg(RBReg); 1570 LoadedLR = true; 1571 } 1572 1573 if (MustSaveCR && !(SingleScratchReg && MustSaveLR)) { 1574 assert(RBReg == SPReg && "Should be using SP as a base register"); 1575 BuildMI(MBB, MBBI, dl, LoadWordInst, TempReg) 1576 .addImm(CRSaveOffset) 1577 .addReg(RBReg); 1578 } 1579 1580 if (HasFP) { 1581 // If there is red zone, restore FP directly, since SP has already been 1582 // restored. Otherwise, restore the value of FP into ScratchReg. 1583 if (HasRedZone || RBReg == SPReg) 1584 BuildMI(MBB, MBBI, dl, LoadInst, FPReg) 1585 .addImm(FPOffset) 1586 .addReg(SPReg); 1587 else 1588 BuildMI(MBB, MBBI, dl, LoadInst, ScratchReg) 1589 .addImm(FPOffset) 1590 .addReg(RBReg); 1591 } 1592 1593 if (FI->usesPICBase()) 1594 BuildMI(MBB, MBBI, dl, LoadInst, PPC::R30) 1595 .addImm(PBPOffset) 1596 .addReg(RBReg); 1597 1598 if (HasBP) 1599 BuildMI(MBB, MBBI, dl, LoadInst, BPReg) 1600 .addImm(BPOffset) 1601 .addReg(RBReg); 1602 1603 // There is nothing more to be loaded from the stack, so now we can 1604 // restore SP: SP = RBReg + SPAdd. 1605 if (RBReg != SPReg || SPAdd != 0) { 1606 assert(!HasRedZone && "This should not happen with red zone"); 1607 // If SPAdd is 0, generate a copy. 1608 if (SPAdd == 0) 1609 BuildMI(MBB, MBBI, dl, OrInst, SPReg) 1610 .addReg(RBReg) 1611 .addReg(RBReg); 1612 else 1613 BuildMI(MBB, MBBI, dl, AddImmInst, SPReg) 1614 .addReg(RBReg) 1615 .addImm(SPAdd); 1616 1617 assert(RBReg != ScratchReg && "Should be using FP or SP as base register"); 1618 if (RBReg == FPReg) 1619 BuildMI(MBB, MBBI, dl, OrInst, FPReg) 1620 .addReg(ScratchReg) 1621 .addReg(ScratchReg); 1622 1623 // Now load the LR from the caller's stack frame. 1624 if (MustSaveLR && !LoadedLR) 1625 BuildMI(MBB, MBBI, dl, LoadInst, ScratchReg) 1626 .addImm(LROffset) 1627 .addReg(SPReg); 1628 } 1629 1630 if (MustSaveCR && 1631 !(SingleScratchReg && MustSaveLR)) 1632 for (unsigned i = 0, e = MustSaveCRs.size(); i != e; ++i) 1633 BuildMI(MBB, MBBI, dl, MoveToCRInst, MustSaveCRs[i]) 1634 .addReg(TempReg, getKillRegState(i == e-1)); 1635 1636 if (MustSaveLR) 1637 BuildMI(MBB, StackUpdateLoc, dl, MTLRInst).addReg(ScratchReg); 1638 1639 // Callee pop calling convention. Pop parameter/linkage area. Used for tail 1640 // call optimization 1641 if (IsReturnBlock) { 1642 unsigned RetOpcode = MBBI->getOpcode(); 1643 if (MF.getTarget().Options.GuaranteedTailCallOpt && 1644 (RetOpcode == PPC::BLR || RetOpcode == PPC::BLR8) && 1645 MF.getFunction().getCallingConv() == CallingConv::Fast) { 1646 PPCFunctionInfo *FI = MF.getInfo<PPCFunctionInfo>(); 1647 unsigned CallerAllocatedAmt = FI->getMinReservedArea(); 1648 1649 if (CallerAllocatedAmt && isInt<16>(CallerAllocatedAmt)) { 1650 BuildMI(MBB, MBBI, dl, AddImmInst, SPReg) 1651 .addReg(SPReg).addImm(CallerAllocatedAmt); 1652 } else { 1653 BuildMI(MBB, MBBI, dl, LoadImmShiftedInst, ScratchReg) 1654 .addImm(CallerAllocatedAmt >> 16); 1655 BuildMI(MBB, MBBI, dl, OrImmInst, ScratchReg) 1656 .addReg(ScratchReg, RegState::Kill) 1657 .addImm(CallerAllocatedAmt & 0xFFFF); 1658 BuildMI(MBB, MBBI, dl, AddInst) 1659 .addReg(SPReg) 1660 .addReg(FPReg) 1661 .addReg(ScratchReg); 1662 } 1663 } else { 1664 createTailCallBranchInstr(MBB); 1665 } 1666 } 1667 } 1668 1669 void PPCFrameLowering::createTailCallBranchInstr(MachineBasicBlock &MBB) const { 1670 MachineBasicBlock::iterator MBBI = MBB.getFirstTerminator(); 1671 1672 // If we got this far a first terminator should exist. 1673 assert(MBBI != MBB.end() && "Failed to find the first terminator."); 1674 1675 DebugLoc dl = MBBI->getDebugLoc(); 1676 const PPCInstrInfo &TII = *Subtarget.getInstrInfo(); 1677 1678 // Create branch instruction for pseudo tail call return instruction 1679 unsigned RetOpcode = MBBI->getOpcode(); 1680 if (RetOpcode == PPC::TCRETURNdi) { 1681 MBBI = MBB.getLastNonDebugInstr(); 1682 MachineOperand &JumpTarget = MBBI->getOperand(0); 1683 BuildMI(MBB, MBBI, dl, TII.get(PPC::TAILB)). 1684 addGlobalAddress(JumpTarget.getGlobal(), JumpTarget.getOffset()); 1685 } else if (RetOpcode == PPC::TCRETURNri) { 1686 MBBI = MBB.getLastNonDebugInstr(); 1687 assert(MBBI->getOperand(0).isReg() && "Expecting register operand."); 1688 BuildMI(MBB, MBBI, dl, TII.get(PPC::TAILBCTR)); 1689 } else if (RetOpcode == PPC::TCRETURNai) { 1690 MBBI = MBB.getLastNonDebugInstr(); 1691 MachineOperand &JumpTarget = MBBI->getOperand(0); 1692 BuildMI(MBB, MBBI, dl, TII.get(PPC::TAILBA)).addImm(JumpTarget.getImm()); 1693 } else if (RetOpcode == PPC::TCRETURNdi8) { 1694 MBBI = MBB.getLastNonDebugInstr(); 1695 MachineOperand &JumpTarget = MBBI->getOperand(0); 1696 BuildMI(MBB, MBBI, dl, TII.get(PPC::TAILB8)). 1697 addGlobalAddress(JumpTarget.getGlobal(), JumpTarget.getOffset()); 1698 } else if (RetOpcode == PPC::TCRETURNri8) { 1699 MBBI = MBB.getLastNonDebugInstr(); 1700 assert(MBBI->getOperand(0).isReg() && "Expecting register operand."); 1701 BuildMI(MBB, MBBI, dl, TII.get(PPC::TAILBCTR8)); 1702 } else if (RetOpcode == PPC::TCRETURNai8) { 1703 MBBI = MBB.getLastNonDebugInstr(); 1704 MachineOperand &JumpTarget = MBBI->getOperand(0); 1705 BuildMI(MBB, MBBI, dl, TII.get(PPC::TAILBA8)).addImm(JumpTarget.getImm()); 1706 } 1707 } 1708 1709 void PPCFrameLowering::determineCalleeSaves(MachineFunction &MF, 1710 BitVector &SavedRegs, 1711 RegScavenger *RS) const { 1712 TargetFrameLowering::determineCalleeSaves(MF, SavedRegs, RS); 1713 1714 const PPCRegisterInfo *RegInfo = Subtarget.getRegisterInfo(); 1715 1716 // Save and clear the LR state. 1717 PPCFunctionInfo *FI = MF.getInfo<PPCFunctionInfo>(); 1718 unsigned LR = RegInfo->getRARegister(); 1719 FI->setMustSaveLR(MustSaveLR(MF, LR)); 1720 SavedRegs.reset(LR); 1721 1722 // Save R31 if necessary 1723 int FPSI = FI->getFramePointerSaveIndex(); 1724 const bool isPPC64 = Subtarget.isPPC64(); 1725 MachineFrameInfo &MFI = MF.getFrameInfo(); 1726 1727 // If the frame pointer save index hasn't been defined yet. 1728 if (!FPSI && needsFP(MF)) { 1729 // Find out what the fix offset of the frame pointer save area. 1730 int FPOffset = getFramePointerSaveOffset(); 1731 // Allocate the frame index for frame pointer save area. 1732 FPSI = MFI.CreateFixedObject(isPPC64? 8 : 4, FPOffset, true); 1733 // Save the result. 1734 FI->setFramePointerSaveIndex(FPSI); 1735 } 1736 1737 int BPSI = FI->getBasePointerSaveIndex(); 1738 if (!BPSI && RegInfo->hasBasePointer(MF)) { 1739 int BPOffset = getBasePointerSaveOffset(); 1740 // Allocate the frame index for the base pointer save area. 1741 BPSI = MFI.CreateFixedObject(isPPC64? 8 : 4, BPOffset, true); 1742 // Save the result. 1743 FI->setBasePointerSaveIndex(BPSI); 1744 } 1745 1746 // Reserve stack space for the PIC Base register (R30). 1747 // Only used in SVR4 32-bit. 1748 if (FI->usesPICBase()) { 1749 int PBPSI = MFI.CreateFixedObject(4, -8, true); 1750 FI->setPICBasePointerSaveIndex(PBPSI); 1751 } 1752 1753 // Make sure we don't explicitly spill r31, because, for example, we have 1754 // some inline asm which explicitly clobbers it, when we otherwise have a 1755 // frame pointer and are using r31's spill slot for the prologue/epilogue 1756 // code. Same goes for the base pointer and the PIC base register. 1757 if (needsFP(MF)) 1758 SavedRegs.reset(isPPC64 ? PPC::X31 : PPC::R31); 1759 if (RegInfo->hasBasePointer(MF)) 1760 SavedRegs.reset(RegInfo->getBaseRegister(MF)); 1761 if (FI->usesPICBase()) 1762 SavedRegs.reset(PPC::R30); 1763 1764 // Reserve stack space to move the linkage area to in case of a tail call. 1765 int TCSPDelta = 0; 1766 if (MF.getTarget().Options.GuaranteedTailCallOpt && 1767 (TCSPDelta = FI->getTailCallSPDelta()) < 0) { 1768 MFI.CreateFixedObject(-1 * TCSPDelta, TCSPDelta, true); 1769 } 1770 1771 // Allocate the nonvolatile CR spill slot iff the function uses CR 2, 3, or 4. 1772 // For 64-bit SVR4, and all flavors of AIX we create a FixedStack 1773 // object at the offset of the CR-save slot in the linkage area. The actual 1774 // save and restore of the condition register will be created as part of the 1775 // prologue and epilogue insertion, but the FixedStack object is needed to 1776 // keep the CalleSavedInfo valid. 1777 if ((SavedRegs.test(PPC::CR2) || SavedRegs.test(PPC::CR3) || 1778 SavedRegs.test(PPC::CR4))) { 1779 const uint64_t SpillSize = 4; // Condition register is always 4 bytes. 1780 const int64_t SpillOffset = 1781 Subtarget.isPPC64() ? 8 : Subtarget.isAIXABI() ? 4 : -4; 1782 int FrameIdx = 1783 MFI.CreateFixedObject(SpillSize, SpillOffset, 1784 /* IsImmutable */ true, /* IsAliased */ false); 1785 FI->setCRSpillFrameIndex(FrameIdx); 1786 } 1787 } 1788 1789 void PPCFrameLowering::processFunctionBeforeFrameFinalized(MachineFunction &MF, 1790 RegScavenger *RS) const { 1791 // Early exit if not using the SVR4 ABI. 1792 if (!Subtarget.isSVR4ABI()) { 1793 addScavengingSpillSlot(MF, RS); 1794 return; 1795 } 1796 1797 // Get callee saved register information. 1798 MachineFrameInfo &MFI = MF.getFrameInfo(); 1799 const std::vector<CalleeSavedInfo> &CSI = MFI.getCalleeSavedInfo(); 1800 1801 // If the function is shrink-wrapped, and if the function has a tail call, the 1802 // tail call might not be in the new RestoreBlock, so real branch instruction 1803 // won't be generated by emitEpilogue(), because shrink-wrap has chosen new 1804 // RestoreBlock. So we handle this case here. 1805 if (MFI.getSavePoint() && MFI.hasTailCall()) { 1806 MachineBasicBlock *RestoreBlock = MFI.getRestorePoint(); 1807 for (MachineBasicBlock &MBB : MF) { 1808 if (MBB.isReturnBlock() && (&MBB) != RestoreBlock) 1809 createTailCallBranchInstr(MBB); 1810 } 1811 } 1812 1813 // Early exit if no callee saved registers are modified! 1814 if (CSI.empty() && !needsFP(MF)) { 1815 addScavengingSpillSlot(MF, RS); 1816 return; 1817 } 1818 1819 unsigned MinGPR = PPC::R31; 1820 unsigned MinG8R = PPC::X31; 1821 unsigned MinFPR = PPC::F31; 1822 unsigned MinVR = Subtarget.hasSPE() ? PPC::S31 : PPC::V31; 1823 1824 bool HasGPSaveArea = false; 1825 bool HasG8SaveArea = false; 1826 bool HasFPSaveArea = false; 1827 bool HasVRSAVESaveArea = false; 1828 bool HasVRSaveArea = false; 1829 1830 SmallVector<CalleeSavedInfo, 18> GPRegs; 1831 SmallVector<CalleeSavedInfo, 18> G8Regs; 1832 SmallVector<CalleeSavedInfo, 18> FPRegs; 1833 SmallVector<CalleeSavedInfo, 18> VRegs; 1834 1835 for (unsigned i = 0, e = CSI.size(); i != e; ++i) { 1836 unsigned Reg = CSI[i].getReg(); 1837 assert((!MF.getInfo<PPCFunctionInfo>()->mustSaveTOC() || 1838 (Reg != PPC::X2 && Reg != PPC::R2)) && 1839 "Not expecting to try to spill R2 in a function that must save TOC"); 1840 if (PPC::GPRCRegClass.contains(Reg)) { 1841 HasGPSaveArea = true; 1842 1843 GPRegs.push_back(CSI[i]); 1844 1845 if (Reg < MinGPR) { 1846 MinGPR = Reg; 1847 } 1848 } else if (PPC::G8RCRegClass.contains(Reg)) { 1849 HasG8SaveArea = true; 1850 1851 G8Regs.push_back(CSI[i]); 1852 1853 if (Reg < MinG8R) { 1854 MinG8R = Reg; 1855 } 1856 } else if (PPC::F8RCRegClass.contains(Reg)) { 1857 HasFPSaveArea = true; 1858 1859 FPRegs.push_back(CSI[i]); 1860 1861 if (Reg < MinFPR) { 1862 MinFPR = Reg; 1863 } 1864 } else if (PPC::CRBITRCRegClass.contains(Reg) || 1865 PPC::CRRCRegClass.contains(Reg)) { 1866 ; // do nothing, as we already know whether CRs are spilled 1867 } else if (PPC::VRSAVERCRegClass.contains(Reg)) { 1868 HasVRSAVESaveArea = true; 1869 } else if (PPC::VRRCRegClass.contains(Reg) || 1870 PPC::SPERCRegClass.contains(Reg)) { 1871 // Altivec and SPE are mutually exclusive, but have the same stack 1872 // alignment requirements, so overload the save area for both cases. 1873 HasVRSaveArea = true; 1874 1875 VRegs.push_back(CSI[i]); 1876 1877 if (Reg < MinVR) { 1878 MinVR = Reg; 1879 } 1880 } else { 1881 llvm_unreachable("Unknown RegisterClass!"); 1882 } 1883 } 1884 1885 PPCFunctionInfo *PFI = MF.getInfo<PPCFunctionInfo>(); 1886 const TargetRegisterInfo *TRI = Subtarget.getRegisterInfo(); 1887 1888 int64_t LowerBound = 0; 1889 1890 // Take into account stack space reserved for tail calls. 1891 int TCSPDelta = 0; 1892 if (MF.getTarget().Options.GuaranteedTailCallOpt && 1893 (TCSPDelta = PFI->getTailCallSPDelta()) < 0) { 1894 LowerBound = TCSPDelta; 1895 } 1896 1897 // The Floating-point register save area is right below the back chain word 1898 // of the previous stack frame. 1899 if (HasFPSaveArea) { 1900 for (unsigned i = 0, e = FPRegs.size(); i != e; ++i) { 1901 int FI = FPRegs[i].getFrameIdx(); 1902 1903 MFI.setObjectOffset(FI, LowerBound + MFI.getObjectOffset(FI)); 1904 } 1905 1906 LowerBound -= (31 - TRI->getEncodingValue(MinFPR) + 1) * 8; 1907 } 1908 1909 // Check whether the frame pointer register is allocated. If so, make sure it 1910 // is spilled to the correct offset. 1911 if (needsFP(MF)) { 1912 int FI = PFI->getFramePointerSaveIndex(); 1913 assert(FI && "No Frame Pointer Save Slot!"); 1914 MFI.setObjectOffset(FI, LowerBound + MFI.getObjectOffset(FI)); 1915 // FP is R31/X31, so no need to update MinGPR/MinG8R. 1916 HasGPSaveArea = true; 1917 } 1918 1919 if (PFI->usesPICBase()) { 1920 int FI = PFI->getPICBasePointerSaveIndex(); 1921 assert(FI && "No PIC Base Pointer Save Slot!"); 1922 MFI.setObjectOffset(FI, LowerBound + MFI.getObjectOffset(FI)); 1923 1924 MinGPR = std::min<unsigned>(MinGPR, PPC::R30); 1925 HasGPSaveArea = true; 1926 } 1927 1928 const PPCRegisterInfo *RegInfo = Subtarget.getRegisterInfo(); 1929 if (RegInfo->hasBasePointer(MF)) { 1930 int FI = PFI->getBasePointerSaveIndex(); 1931 assert(FI && "No Base Pointer Save Slot!"); 1932 MFI.setObjectOffset(FI, LowerBound + MFI.getObjectOffset(FI)); 1933 1934 Register BP = RegInfo->getBaseRegister(MF); 1935 if (PPC::G8RCRegClass.contains(BP)) { 1936 MinG8R = std::min<unsigned>(MinG8R, BP); 1937 HasG8SaveArea = true; 1938 } else if (PPC::GPRCRegClass.contains(BP)) { 1939 MinGPR = std::min<unsigned>(MinGPR, BP); 1940 HasGPSaveArea = true; 1941 } 1942 } 1943 1944 // General register save area starts right below the Floating-point 1945 // register save area. 1946 if (HasGPSaveArea || HasG8SaveArea) { 1947 // Move general register save area spill slots down, taking into account 1948 // the size of the Floating-point register save area. 1949 for (unsigned i = 0, e = GPRegs.size(); i != e; ++i) { 1950 if (!GPRegs[i].isSpilledToReg()) { 1951 int FI = GPRegs[i].getFrameIdx(); 1952 MFI.setObjectOffset(FI, LowerBound + MFI.getObjectOffset(FI)); 1953 } 1954 } 1955 1956 // Move general register save area spill slots down, taking into account 1957 // the size of the Floating-point register save area. 1958 for (unsigned i = 0, e = G8Regs.size(); i != e; ++i) { 1959 if (!G8Regs[i].isSpilledToReg()) { 1960 int FI = G8Regs[i].getFrameIdx(); 1961 MFI.setObjectOffset(FI, LowerBound + MFI.getObjectOffset(FI)); 1962 } 1963 } 1964 1965 unsigned MinReg = 1966 std::min<unsigned>(TRI->getEncodingValue(MinGPR), 1967 TRI->getEncodingValue(MinG8R)); 1968 1969 if (Subtarget.isPPC64()) { 1970 LowerBound -= (31 - MinReg + 1) * 8; 1971 } else { 1972 LowerBound -= (31 - MinReg + 1) * 4; 1973 } 1974 } 1975 1976 // For 32-bit only, the CR save area is below the general register 1977 // save area. For 64-bit SVR4, the CR save area is addressed relative 1978 // to the stack pointer and hence does not need an adjustment here. 1979 // Only CR2 (the first nonvolatile spilled) has an associated frame 1980 // index so that we have a single uniform save area. 1981 if (spillsCR(MF) && Subtarget.is32BitELFABI()) { 1982 // Adjust the frame index of the CR spill slot. 1983 for (const auto &CSInfo : CSI) { 1984 if (CSInfo.getReg() == PPC::CR2) { 1985 int FI = CSInfo.getFrameIdx(); 1986 MFI.setObjectOffset(FI, LowerBound + MFI.getObjectOffset(FI)); 1987 break; 1988 } 1989 } 1990 1991 LowerBound -= 4; // The CR save area is always 4 bytes long. 1992 } 1993 1994 if (HasVRSAVESaveArea) { 1995 // FIXME SVR4: Is it actually possible to have multiple elements in CSI 1996 // which have the VRSAVE register class? 1997 // Adjust the frame index of the VRSAVE spill slot. 1998 for (unsigned i = 0, e = CSI.size(); i != e; ++i) { 1999 unsigned Reg = CSI[i].getReg(); 2000 2001 if (PPC::VRSAVERCRegClass.contains(Reg)) { 2002 int FI = CSI[i].getFrameIdx(); 2003 2004 MFI.setObjectOffset(FI, LowerBound + MFI.getObjectOffset(FI)); 2005 } 2006 } 2007 2008 LowerBound -= 4; // The VRSAVE save area is always 4 bytes long. 2009 } 2010 2011 // Both Altivec and SPE have the same alignment and padding requirements 2012 // within the stack frame. 2013 if (HasVRSaveArea) { 2014 // Insert alignment padding, we need 16-byte alignment. Note: for positive 2015 // number the alignment formula is : y = (x + (n-1)) & (~(n-1)). But since 2016 // we are using negative number here (the stack grows downward). We should 2017 // use formula : y = x & (~(n-1)). Where x is the size before aligning, n 2018 // is the alignment size ( n = 16 here) and y is the size after aligning. 2019 assert(LowerBound <= 0 && "Expect LowerBound have a non-positive value!"); 2020 LowerBound &= ~(15); 2021 2022 for (unsigned i = 0, e = VRegs.size(); i != e; ++i) { 2023 int FI = VRegs[i].getFrameIdx(); 2024 2025 MFI.setObjectOffset(FI, LowerBound + MFI.getObjectOffset(FI)); 2026 } 2027 } 2028 2029 addScavengingSpillSlot(MF, RS); 2030 } 2031 2032 void 2033 PPCFrameLowering::addScavengingSpillSlot(MachineFunction &MF, 2034 RegScavenger *RS) const { 2035 // Reserve a slot closest to SP or frame pointer if we have a dynalloc or 2036 // a large stack, which will require scavenging a register to materialize a 2037 // large offset. 2038 2039 // We need to have a scavenger spill slot for spills if the frame size is 2040 // large. In case there is no free register for large-offset addressing, 2041 // this slot is used for the necessary emergency spill. Also, we need the 2042 // slot for dynamic stack allocations. 2043 2044 // The scavenger might be invoked if the frame offset does not fit into 2045 // the 16-bit immediate. We don't know the complete frame size here 2046 // because we've not yet computed callee-saved register spills or the 2047 // needed alignment padding. 2048 unsigned StackSize = determineFrameLayout(MF, true); 2049 MachineFrameInfo &MFI = MF.getFrameInfo(); 2050 if (MFI.hasVarSizedObjects() || spillsCR(MF) || spillsVRSAVE(MF) || 2051 hasNonRISpills(MF) || (hasSpills(MF) && !isInt<16>(StackSize))) { 2052 const TargetRegisterClass &GPRC = PPC::GPRCRegClass; 2053 const TargetRegisterClass &G8RC = PPC::G8RCRegClass; 2054 const TargetRegisterClass &RC = Subtarget.isPPC64() ? G8RC : GPRC; 2055 const TargetRegisterInfo &TRI = *Subtarget.getRegisterInfo(); 2056 unsigned Size = TRI.getSpillSize(RC); 2057 unsigned Align = TRI.getSpillAlignment(RC); 2058 RS->addScavengingFrameIndex(MFI.CreateStackObject(Size, Align, false)); 2059 2060 // Might we have over-aligned allocas? 2061 bool HasAlVars = MFI.hasVarSizedObjects() && 2062 MFI.getMaxAlignment() > getStackAlignment(); 2063 2064 // These kinds of spills might need two registers. 2065 if (spillsCR(MF) || spillsVRSAVE(MF) || HasAlVars) 2066 RS->addScavengingFrameIndex(MFI.CreateStackObject(Size, Align, false)); 2067 2068 } 2069 } 2070 2071 // This function checks if a callee saved gpr can be spilled to a volatile 2072 // vector register. This occurs for leaf functions when the option 2073 // ppc-enable-pe-vector-spills is enabled. If there are any remaining registers 2074 // which were not spilled to vectors, return false so the target independent 2075 // code can handle them by assigning a FrameIdx to a stack slot. 2076 bool PPCFrameLowering::assignCalleeSavedSpillSlots( 2077 MachineFunction &MF, const TargetRegisterInfo *TRI, 2078 std::vector<CalleeSavedInfo> &CSI) const { 2079 2080 if (CSI.empty()) 2081 return true; // Early exit if no callee saved registers are modified! 2082 2083 // Early exit if cannot spill gprs to volatile vector registers. 2084 MachineFrameInfo &MFI = MF.getFrameInfo(); 2085 if (!EnablePEVectorSpills || MFI.hasCalls() || !Subtarget.hasP9Vector()) 2086 return false; 2087 2088 // Build a BitVector of VSRs that can be used for spilling GPRs. 2089 BitVector BVAllocatable = TRI->getAllocatableSet(MF); 2090 BitVector BVCalleeSaved(TRI->getNumRegs()); 2091 const PPCRegisterInfo *RegInfo = Subtarget.getRegisterInfo(); 2092 const MCPhysReg *CSRegs = RegInfo->getCalleeSavedRegs(&MF); 2093 for (unsigned i = 0; CSRegs[i]; ++i) 2094 BVCalleeSaved.set(CSRegs[i]); 2095 2096 for (unsigned Reg : BVAllocatable.set_bits()) { 2097 // Set to 0 if the register is not a volatile VF/F8 register, or if it is 2098 // used in the function. 2099 if (BVCalleeSaved[Reg] || 2100 (!PPC::F8RCRegClass.contains(Reg) && 2101 !PPC::VFRCRegClass.contains(Reg)) || 2102 (MF.getRegInfo().isPhysRegUsed(Reg))) 2103 BVAllocatable.reset(Reg); 2104 } 2105 2106 bool AllSpilledToReg = true; 2107 for (auto &CS : CSI) { 2108 if (BVAllocatable.none()) 2109 return false; 2110 2111 unsigned Reg = CS.getReg(); 2112 if (!PPC::G8RCRegClass.contains(Reg) && !PPC::GPRCRegClass.contains(Reg)) { 2113 AllSpilledToReg = false; 2114 continue; 2115 } 2116 2117 unsigned VolatileVFReg = BVAllocatable.find_first(); 2118 if (VolatileVFReg < BVAllocatable.size()) { 2119 CS.setDstReg(VolatileVFReg); 2120 BVAllocatable.reset(VolatileVFReg); 2121 } else { 2122 AllSpilledToReg = false; 2123 } 2124 } 2125 return AllSpilledToReg; 2126 } 2127 2128 bool PPCFrameLowering::spillCalleeSavedRegisters( 2129 MachineBasicBlock &MBB, MachineBasicBlock::iterator MI, 2130 ArrayRef<CalleeSavedInfo> CSI, const TargetRegisterInfo *TRI) const { 2131 2132 MachineFunction *MF = MBB.getParent(); 2133 const PPCInstrInfo &TII = *Subtarget.getInstrInfo(); 2134 PPCFunctionInfo *FI = MF->getInfo<PPCFunctionInfo>(); 2135 bool MustSaveTOC = FI->mustSaveTOC(); 2136 DebugLoc DL; 2137 bool CRSpilled = false; 2138 MachineInstrBuilder CRMIB; 2139 2140 for (unsigned i = 0, e = CSI.size(); i != e; ++i) { 2141 unsigned Reg = CSI[i].getReg(); 2142 // VRSAVE can appear here if, for example, @llvm.eh.unwind.init() is used. 2143 if (Reg == PPC::VRSAVE) 2144 continue; 2145 2146 // CR2 through CR4 are the nonvolatile CR fields. 2147 bool IsCRField = PPC::CR2 <= Reg && Reg <= PPC::CR4; 2148 2149 // Add the callee-saved register as live-in; it's killed at the spill. 2150 // Do not do this for callee-saved registers that are live-in to the 2151 // function because they will already be marked live-in and this will be 2152 // adding it for a second time. It is an error to add the same register 2153 // to the set more than once. 2154 const MachineRegisterInfo &MRI = MF->getRegInfo(); 2155 bool IsLiveIn = MRI.isLiveIn(Reg); 2156 if (!IsLiveIn) 2157 MBB.addLiveIn(Reg); 2158 2159 if (CRSpilled && IsCRField) { 2160 CRMIB.addReg(Reg, RegState::ImplicitKill); 2161 continue; 2162 } 2163 2164 // The actual spill will happen in the prologue. 2165 if ((Reg == PPC::X2 || Reg == PPC::R2) && MustSaveTOC) 2166 continue; 2167 2168 // Insert the spill to the stack frame. 2169 if (IsCRField) { 2170 PPCFunctionInfo *FuncInfo = MF->getInfo<PPCFunctionInfo>(); 2171 if (!Subtarget.is32BitELFABI()) { 2172 // The actual spill will happen at the start of the prologue. 2173 FuncInfo->addMustSaveCR(Reg); 2174 } else { 2175 CRSpilled = true; 2176 FuncInfo->setSpillsCR(); 2177 2178 // 32-bit: FP-relative. Note that we made sure CR2-CR4 all have 2179 // the same frame index in PPCRegisterInfo::hasReservedSpillSlot. 2180 CRMIB = BuildMI(*MF, DL, TII.get(PPC::MFCR), PPC::R12) 2181 .addReg(Reg, RegState::ImplicitKill); 2182 2183 MBB.insert(MI, CRMIB); 2184 MBB.insert(MI, addFrameReference(BuildMI(*MF, DL, TII.get(PPC::STW)) 2185 .addReg(PPC::R12, 2186 getKillRegState(true)), 2187 CSI[i].getFrameIdx())); 2188 } 2189 } else { 2190 if (CSI[i].isSpilledToReg()) { 2191 NumPESpillVSR++; 2192 BuildMI(MBB, MI, DL, TII.get(PPC::MTVSRD), CSI[i].getDstReg()) 2193 .addReg(Reg, getKillRegState(true)); 2194 } else { 2195 const TargetRegisterClass *RC = TRI->getMinimalPhysRegClass(Reg); 2196 // Use !IsLiveIn for the kill flag. 2197 // We do not want to kill registers that are live in this function 2198 // before their use because they will become undefined registers. 2199 // Functions without NoUnwind need to preserve the order of elements in 2200 // saved vector registers. 2201 if (Subtarget.needsSwapsForVSXMemOps() && 2202 !MF->getFunction().hasFnAttribute(Attribute::NoUnwind)) 2203 TII.storeRegToStackSlotNoUpd(MBB, MI, Reg, !IsLiveIn, 2204 CSI[i].getFrameIdx(), RC, TRI); 2205 else 2206 TII.storeRegToStackSlot(MBB, MI, Reg, !IsLiveIn, CSI[i].getFrameIdx(), 2207 RC, TRI); 2208 } 2209 } 2210 } 2211 return true; 2212 } 2213 2214 static void restoreCRs(bool is31, bool CR2Spilled, bool CR3Spilled, 2215 bool CR4Spilled, MachineBasicBlock &MBB, 2216 MachineBasicBlock::iterator MI, 2217 ArrayRef<CalleeSavedInfo> CSI, unsigned CSIIndex) { 2218 2219 MachineFunction *MF = MBB.getParent(); 2220 const PPCInstrInfo &TII = *MF->getSubtarget<PPCSubtarget>().getInstrInfo(); 2221 DebugLoc DL; 2222 unsigned MoveReg = PPC::R12; 2223 2224 // 32-bit: FP-relative 2225 MBB.insert(MI, 2226 addFrameReference(BuildMI(*MF, DL, TII.get(PPC::LWZ), MoveReg), 2227 CSI[CSIIndex].getFrameIdx())); 2228 2229 unsigned RestoreOp = PPC::MTOCRF; 2230 if (CR2Spilled) 2231 MBB.insert(MI, BuildMI(*MF, DL, TII.get(RestoreOp), PPC::CR2) 2232 .addReg(MoveReg, getKillRegState(!CR3Spilled && !CR4Spilled))); 2233 2234 if (CR3Spilled) 2235 MBB.insert(MI, BuildMI(*MF, DL, TII.get(RestoreOp), PPC::CR3) 2236 .addReg(MoveReg, getKillRegState(!CR4Spilled))); 2237 2238 if (CR4Spilled) 2239 MBB.insert(MI, BuildMI(*MF, DL, TII.get(RestoreOp), PPC::CR4) 2240 .addReg(MoveReg, getKillRegState(true))); 2241 } 2242 2243 MachineBasicBlock::iterator PPCFrameLowering:: 2244 eliminateCallFramePseudoInstr(MachineFunction &MF, MachineBasicBlock &MBB, 2245 MachineBasicBlock::iterator I) const { 2246 const TargetInstrInfo &TII = *Subtarget.getInstrInfo(); 2247 if (MF.getTarget().Options.GuaranteedTailCallOpt && 2248 I->getOpcode() == PPC::ADJCALLSTACKUP) { 2249 // Add (actually subtract) back the amount the callee popped on return. 2250 if (int CalleeAmt = I->getOperand(1).getImm()) { 2251 bool is64Bit = Subtarget.isPPC64(); 2252 CalleeAmt *= -1; 2253 unsigned StackReg = is64Bit ? PPC::X1 : PPC::R1; 2254 unsigned TmpReg = is64Bit ? PPC::X0 : PPC::R0; 2255 unsigned ADDIInstr = is64Bit ? PPC::ADDI8 : PPC::ADDI; 2256 unsigned ADDInstr = is64Bit ? PPC::ADD8 : PPC::ADD4; 2257 unsigned LISInstr = is64Bit ? PPC::LIS8 : PPC::LIS; 2258 unsigned ORIInstr = is64Bit ? PPC::ORI8 : PPC::ORI; 2259 const DebugLoc &dl = I->getDebugLoc(); 2260 2261 if (isInt<16>(CalleeAmt)) { 2262 BuildMI(MBB, I, dl, TII.get(ADDIInstr), StackReg) 2263 .addReg(StackReg, RegState::Kill) 2264 .addImm(CalleeAmt); 2265 } else { 2266 MachineBasicBlock::iterator MBBI = I; 2267 BuildMI(MBB, MBBI, dl, TII.get(LISInstr), TmpReg) 2268 .addImm(CalleeAmt >> 16); 2269 BuildMI(MBB, MBBI, dl, TII.get(ORIInstr), TmpReg) 2270 .addReg(TmpReg, RegState::Kill) 2271 .addImm(CalleeAmt & 0xFFFF); 2272 BuildMI(MBB, MBBI, dl, TII.get(ADDInstr), StackReg) 2273 .addReg(StackReg, RegState::Kill) 2274 .addReg(TmpReg); 2275 } 2276 } 2277 } 2278 // Simply discard ADJCALLSTACKDOWN, ADJCALLSTACKUP instructions. 2279 return MBB.erase(I); 2280 } 2281 2282 static bool isCalleeSavedCR(unsigned Reg) { 2283 return PPC::CR2 == Reg || Reg == PPC::CR3 || Reg == PPC::CR4; 2284 } 2285 2286 bool 2287 PPCFrameLowering::restoreCalleeSavedRegisters(MachineBasicBlock &MBB, 2288 MachineBasicBlock::iterator MI, 2289 std::vector<CalleeSavedInfo> &CSI, 2290 const TargetRegisterInfo *TRI) const { 2291 MachineFunction *MF = MBB.getParent(); 2292 const PPCInstrInfo &TII = *Subtarget.getInstrInfo(); 2293 PPCFunctionInfo *FI = MF->getInfo<PPCFunctionInfo>(); 2294 bool MustSaveTOC = FI->mustSaveTOC(); 2295 bool CR2Spilled = false; 2296 bool CR3Spilled = false; 2297 bool CR4Spilled = false; 2298 unsigned CSIIndex = 0; 2299 2300 // Initialize insertion-point logic; we will be restoring in reverse 2301 // order of spill. 2302 MachineBasicBlock::iterator I = MI, BeforeI = I; 2303 bool AtStart = I == MBB.begin(); 2304 2305 if (!AtStart) 2306 --BeforeI; 2307 2308 for (unsigned i = 0, e = CSI.size(); i != e; ++i) { 2309 unsigned Reg = CSI[i].getReg(); 2310 2311 // VRSAVE can appear here if, for example, @llvm.eh.unwind.init() is used. 2312 if (Reg == PPC::VRSAVE) 2313 continue; 2314 2315 if ((Reg == PPC::X2 || Reg == PPC::R2) && MustSaveTOC) 2316 continue; 2317 2318 // Restore of callee saved condition register field is handled during 2319 // epilogue insertion. 2320 if (isCalleeSavedCR(Reg) && !Subtarget.is32BitELFABI()) 2321 continue; 2322 2323 if (Reg == PPC::CR2) { 2324 CR2Spilled = true; 2325 // The spill slot is associated only with CR2, which is the 2326 // first nonvolatile spilled. Save it here. 2327 CSIIndex = i; 2328 continue; 2329 } else if (Reg == PPC::CR3) { 2330 CR3Spilled = true; 2331 continue; 2332 } else if (Reg == PPC::CR4) { 2333 CR4Spilled = true; 2334 continue; 2335 } else { 2336 // On 32-bit ELF when we first encounter a non-CR register after seeing at 2337 // least one CR register, restore all spilled CRs together. 2338 if (CR2Spilled || CR3Spilled || CR4Spilled) { 2339 bool is31 = needsFP(*MF); 2340 restoreCRs(is31, CR2Spilled, CR3Spilled, CR4Spilled, MBB, I, CSI, 2341 CSIIndex); 2342 CR2Spilled = CR3Spilled = CR4Spilled = false; 2343 } 2344 2345 if (CSI[i].isSpilledToReg()) { 2346 DebugLoc DL; 2347 NumPEReloadVSR++; 2348 BuildMI(MBB, I, DL, TII.get(PPC::MFVSRD), Reg) 2349 .addReg(CSI[i].getDstReg(), getKillRegState(true)); 2350 } else { 2351 // Default behavior for non-CR saves. 2352 const TargetRegisterClass *RC = TRI->getMinimalPhysRegClass(Reg); 2353 2354 // Functions without NoUnwind need to preserve the order of elements in 2355 // saved vector registers. 2356 if (Subtarget.needsSwapsForVSXMemOps() && 2357 !MF->getFunction().hasFnAttribute(Attribute::NoUnwind)) 2358 TII.loadRegFromStackSlotNoUpd(MBB, I, Reg, CSI[i].getFrameIdx(), RC, 2359 TRI); 2360 else 2361 TII.loadRegFromStackSlot(MBB, I, Reg, CSI[i].getFrameIdx(), RC, TRI); 2362 2363 assert(I != MBB.begin() && 2364 "loadRegFromStackSlot didn't insert any code!"); 2365 } 2366 } 2367 2368 // Insert in reverse order. 2369 if (AtStart) 2370 I = MBB.begin(); 2371 else { 2372 I = BeforeI; 2373 ++I; 2374 } 2375 } 2376 2377 // If we haven't yet spilled the CRs, do so now. 2378 if (CR2Spilled || CR3Spilled || CR4Spilled) { 2379 assert(Subtarget.is32BitELFABI() && 2380 "Only set CR[2|3|4]Spilled on 32-bit SVR4."); 2381 bool is31 = needsFP(*MF); 2382 restoreCRs(is31, CR2Spilled, CR3Spilled, CR4Spilled, MBB, I, CSI, CSIIndex); 2383 } 2384 2385 return true; 2386 } 2387 2388 unsigned PPCFrameLowering::getTOCSaveOffset() const { 2389 return TOCSaveOffset; 2390 } 2391 2392 unsigned PPCFrameLowering::getFramePointerSaveOffset() const { 2393 return FramePointerSaveOffset; 2394 } 2395 2396 unsigned PPCFrameLowering::getBasePointerSaveOffset() const { 2397 if (Subtarget.isAIXABI()) 2398 report_fatal_error("BasePointer is not implemented on AIX yet."); 2399 return BasePointerSaveOffset; 2400 } 2401 2402 bool PPCFrameLowering::enableShrinkWrapping(const MachineFunction &MF) const { 2403 if (MF.getInfo<PPCFunctionInfo>()->shrinkWrapDisabled()) 2404 return false; 2405 return (MF.getSubtarget<PPCSubtarget>().isSVR4ABI() && 2406 MF.getSubtarget<PPCSubtarget>().isPPC64()); 2407 } 2408