1 //===-- PPCFrameLowering.cpp - PPC Frame Information ----------------------===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 // 9 // This file contains the PPC implementation of TargetFrameLowering class. 10 // 11 //===----------------------------------------------------------------------===// 12 13 #include "PPCFrameLowering.h" 14 #include "PPCInstrBuilder.h" 15 #include "PPCInstrInfo.h" 16 #include "PPCMachineFunctionInfo.h" 17 #include "PPCSubtarget.h" 18 #include "PPCTargetMachine.h" 19 #include "llvm/ADT/Statistic.h" 20 #include "llvm/CodeGen/MachineFrameInfo.h" 21 #include "llvm/CodeGen/MachineFunction.h" 22 #include "llvm/CodeGen/MachineInstrBuilder.h" 23 #include "llvm/CodeGen/MachineModuleInfo.h" 24 #include "llvm/CodeGen/MachineRegisterInfo.h" 25 #include "llvm/CodeGen/RegisterScavenging.h" 26 #include "llvm/IR/Function.h" 27 #include "llvm/Target/TargetOptions.h" 28 29 using namespace llvm; 30 31 #define DEBUG_TYPE "framelowering" 32 STATISTIC(NumPESpillVSR, "Number of spills to vector in prologue"); 33 STATISTIC(NumPEReloadVSR, "Number of reloads from vector in epilogue"); 34 35 static cl::opt<bool> 36 EnablePEVectorSpills("ppc-enable-pe-vector-spills", 37 cl::desc("Enable spills in prologue to vector registers."), 38 cl::init(false), cl::Hidden); 39 40 /// VRRegNo - Map from a numbered VR register to its enum value. 41 /// 42 static const MCPhysReg VRRegNo[] = { 43 PPC::V0 , PPC::V1 , PPC::V2 , PPC::V3 , PPC::V4 , PPC::V5 , PPC::V6 , PPC::V7 , 44 PPC::V8 , PPC::V9 , PPC::V10, PPC::V11, PPC::V12, PPC::V13, PPC::V14, PPC::V15, 45 PPC::V16, PPC::V17, PPC::V18, PPC::V19, PPC::V20, PPC::V21, PPC::V22, PPC::V23, 46 PPC::V24, PPC::V25, PPC::V26, PPC::V27, PPC::V28, PPC::V29, PPC::V30, PPC::V31 47 }; 48 49 static unsigned computeReturnSaveOffset(const PPCSubtarget &STI) { 50 if (STI.isAIXABI()) 51 return STI.isPPC64() ? 16 : 8; 52 // SVR4 ABI: 53 return STI.isPPC64() ? 16 : 4; 54 } 55 56 static unsigned computeTOCSaveOffset(const PPCSubtarget &STI) { 57 if (STI.isAIXABI()) 58 return STI.isPPC64() ? 40 : 20; 59 return STI.isELFv2ABI() ? 24 : 40; 60 } 61 62 static unsigned computeFramePointerSaveOffset(const PPCSubtarget &STI) { 63 // First slot in the general register save area. 64 return STI.isPPC64() ? -8U : -4U; 65 } 66 67 static unsigned computeLinkageSize(const PPCSubtarget &STI) { 68 if (STI.isAIXABI() || STI.isPPC64()) 69 return (STI.isELFv2ABI() ? 4 : 6) * (STI.isPPC64() ? 8 : 4); 70 71 // 32-bit SVR4 ABI: 72 return 8; 73 } 74 75 static unsigned computeBasePointerSaveOffset(const PPCSubtarget &STI) { 76 // SVR4 ABI: First slot in the general register save area. 77 return STI.isPPC64() 78 ? -16U 79 : STI.getTargetMachine().isPositionIndependent() ? -12U : -8U; 80 } 81 82 static unsigned computeCRSaveOffset(const PPCSubtarget &STI) { 83 return (STI.isAIXABI() && !STI.isPPC64()) ? 4 : 8; 84 } 85 86 PPCFrameLowering::PPCFrameLowering(const PPCSubtarget &STI) 87 : TargetFrameLowering(TargetFrameLowering::StackGrowsDown, 88 STI.getPlatformStackAlignment(), 0), 89 Subtarget(STI), ReturnSaveOffset(computeReturnSaveOffset(Subtarget)), 90 TOCSaveOffset(computeTOCSaveOffset(Subtarget)), 91 FramePointerSaveOffset(computeFramePointerSaveOffset(Subtarget)), 92 LinkageSize(computeLinkageSize(Subtarget)), 93 BasePointerSaveOffset(computeBasePointerSaveOffset(Subtarget)), 94 CRSaveOffset(computeCRSaveOffset(Subtarget)) {} 95 96 // With the SVR4 ABI, callee-saved registers have fixed offsets on the stack. 97 const PPCFrameLowering::SpillSlot *PPCFrameLowering::getCalleeSavedSpillSlots( 98 unsigned &NumEntries) const { 99 // Early exit if not using the SVR4 ABI. 100 if (!Subtarget.isSVR4ABI()) { 101 NumEntries = 0; 102 return nullptr; 103 } 104 105 // Floating-point register save area offsets. 106 #define CALLEE_SAVED_FPRS \ 107 {PPC::F31, -8}, \ 108 {PPC::F30, -16}, \ 109 {PPC::F29, -24}, \ 110 {PPC::F28, -32}, \ 111 {PPC::F27, -40}, \ 112 {PPC::F26, -48}, \ 113 {PPC::F25, -56}, \ 114 {PPC::F24, -64}, \ 115 {PPC::F23, -72}, \ 116 {PPC::F22, -80}, \ 117 {PPC::F21, -88}, \ 118 {PPC::F20, -96}, \ 119 {PPC::F19, -104}, \ 120 {PPC::F18, -112}, \ 121 {PPC::F17, -120}, \ 122 {PPC::F16, -128}, \ 123 {PPC::F15, -136}, \ 124 {PPC::F14, -144} 125 126 // 32-bit general purpose register save area offsets. 127 #define CALLEE_SAVED_GPRS32 \ 128 {PPC::R31, -4}, \ 129 {PPC::R30, -8}, \ 130 {PPC::R29, -12}, \ 131 {PPC::R28, -16}, \ 132 {PPC::R27, -20}, \ 133 {PPC::R26, -24}, \ 134 {PPC::R25, -28}, \ 135 {PPC::R24, -32}, \ 136 {PPC::R23, -36}, \ 137 {PPC::R22, -40}, \ 138 {PPC::R21, -44}, \ 139 {PPC::R20, -48}, \ 140 {PPC::R19, -52}, \ 141 {PPC::R18, -56}, \ 142 {PPC::R17, -60}, \ 143 {PPC::R16, -64}, \ 144 {PPC::R15, -68}, \ 145 {PPC::R14, -72} 146 147 // 64-bit general purpose register save area offsets. 148 #define CALLEE_SAVED_GPRS64 \ 149 {PPC::X31, -8}, \ 150 {PPC::X30, -16}, \ 151 {PPC::X29, -24}, \ 152 {PPC::X28, -32}, \ 153 {PPC::X27, -40}, \ 154 {PPC::X26, -48}, \ 155 {PPC::X25, -56}, \ 156 {PPC::X24, -64}, \ 157 {PPC::X23, -72}, \ 158 {PPC::X22, -80}, \ 159 {PPC::X21, -88}, \ 160 {PPC::X20, -96}, \ 161 {PPC::X19, -104}, \ 162 {PPC::X18, -112}, \ 163 {PPC::X17, -120}, \ 164 {PPC::X16, -128}, \ 165 {PPC::X15, -136}, \ 166 {PPC::X14, -144} 167 168 // Vector register save area offsets. 169 #define CALLEE_SAVED_VRS \ 170 {PPC::V31, -16}, \ 171 {PPC::V30, -32}, \ 172 {PPC::V29, -48}, \ 173 {PPC::V28, -64}, \ 174 {PPC::V27, -80}, \ 175 {PPC::V26, -96}, \ 176 {PPC::V25, -112}, \ 177 {PPC::V24, -128}, \ 178 {PPC::V23, -144}, \ 179 {PPC::V22, -160}, \ 180 {PPC::V21, -176}, \ 181 {PPC::V20, -192} 182 183 // Note that the offsets here overlap, but this is fixed up in 184 // processFunctionBeforeFrameFinalized. 185 186 static const SpillSlot Offsets[] = { 187 CALLEE_SAVED_FPRS, 188 CALLEE_SAVED_GPRS32, 189 190 // CR save area offset. We map each of the nonvolatile CR fields 191 // to the slot for CR2, which is the first of the nonvolatile CR 192 // fields to be assigned, so that we only allocate one save slot. 193 // See PPCRegisterInfo::hasReservedSpillSlot() for more information. 194 {PPC::CR2, -4}, 195 196 // VRSAVE save area offset. 197 {PPC::VRSAVE, -4}, 198 199 CALLEE_SAVED_VRS, 200 201 // SPE register save area (overlaps Vector save area). 202 {PPC::S31, -8}, 203 {PPC::S30, -16}, 204 {PPC::S29, -24}, 205 {PPC::S28, -32}, 206 {PPC::S27, -40}, 207 {PPC::S26, -48}, 208 {PPC::S25, -56}, 209 {PPC::S24, -64}, 210 {PPC::S23, -72}, 211 {PPC::S22, -80}, 212 {PPC::S21, -88}, 213 {PPC::S20, -96}, 214 {PPC::S19, -104}, 215 {PPC::S18, -112}, 216 {PPC::S17, -120}, 217 {PPC::S16, -128}, 218 {PPC::S15, -136}, 219 {PPC::S14, -144}}; 220 221 static const SpillSlot Offsets64[] = { 222 CALLEE_SAVED_FPRS, 223 CALLEE_SAVED_GPRS64, 224 225 // VRSAVE save area offset. 226 {PPC::VRSAVE, -4}, 227 228 CALLEE_SAVED_VRS 229 }; 230 231 if (Subtarget.isPPC64()) { 232 NumEntries = array_lengthof(Offsets64); 233 234 return Offsets64; 235 } else { 236 NumEntries = array_lengthof(Offsets); 237 238 return Offsets; 239 } 240 } 241 242 /// RemoveVRSaveCode - We have found that this function does not need any code 243 /// to manipulate the VRSAVE register, even though it uses vector registers. 244 /// This can happen when the only registers used are known to be live in or out 245 /// of the function. Remove all of the VRSAVE related code from the function. 246 /// FIXME: The removal of the code results in a compile failure at -O0 when the 247 /// function contains a function call, as the GPR containing original VRSAVE 248 /// contents is spilled and reloaded around the call. Without the prolog code, 249 /// the spill instruction refers to an undefined register. This code needs 250 /// to account for all uses of that GPR. 251 static void RemoveVRSaveCode(MachineInstr &MI) { 252 MachineBasicBlock *Entry = MI.getParent(); 253 MachineFunction *MF = Entry->getParent(); 254 255 // We know that the MTVRSAVE instruction immediately follows MI. Remove it. 256 MachineBasicBlock::iterator MBBI = MI; 257 ++MBBI; 258 assert(MBBI != Entry->end() && MBBI->getOpcode() == PPC::MTVRSAVE); 259 MBBI->eraseFromParent(); 260 261 bool RemovedAllMTVRSAVEs = true; 262 // See if we can find and remove the MTVRSAVE instruction from all of the 263 // epilog blocks. 264 for (MachineFunction::iterator I = MF->begin(), E = MF->end(); I != E; ++I) { 265 // If last instruction is a return instruction, add an epilogue 266 if (I->isReturnBlock()) { 267 bool FoundIt = false; 268 for (MBBI = I->end(); MBBI != I->begin(); ) { 269 --MBBI; 270 if (MBBI->getOpcode() == PPC::MTVRSAVE) { 271 MBBI->eraseFromParent(); // remove it. 272 FoundIt = true; 273 break; 274 } 275 } 276 RemovedAllMTVRSAVEs &= FoundIt; 277 } 278 } 279 280 // If we found and removed all MTVRSAVE instructions, remove the read of 281 // VRSAVE as well. 282 if (RemovedAllMTVRSAVEs) { 283 MBBI = MI; 284 assert(MBBI != Entry->begin() && "UPDATE_VRSAVE is first instr in block?"); 285 --MBBI; 286 assert(MBBI->getOpcode() == PPC::MFVRSAVE && "VRSAVE instrs wandered?"); 287 MBBI->eraseFromParent(); 288 } 289 290 // Finally, nuke the UPDATE_VRSAVE. 291 MI.eraseFromParent(); 292 } 293 294 // HandleVRSaveUpdate - MI is the UPDATE_VRSAVE instruction introduced by the 295 // instruction selector. Based on the vector registers that have been used, 296 // transform this into the appropriate ORI instruction. 297 static void HandleVRSaveUpdate(MachineInstr &MI, const TargetInstrInfo &TII) { 298 MachineFunction *MF = MI.getParent()->getParent(); 299 const TargetRegisterInfo *TRI = MF->getSubtarget().getRegisterInfo(); 300 DebugLoc dl = MI.getDebugLoc(); 301 302 const MachineRegisterInfo &MRI = MF->getRegInfo(); 303 unsigned UsedRegMask = 0; 304 for (unsigned i = 0; i != 32; ++i) 305 if (MRI.isPhysRegModified(VRRegNo[i])) 306 UsedRegMask |= 1 << (31-i); 307 308 // Live in and live out values already must be in the mask, so don't bother 309 // marking them. 310 for (std::pair<unsigned, unsigned> LI : MF->getRegInfo().liveins()) { 311 unsigned RegNo = TRI->getEncodingValue(LI.first); 312 if (VRRegNo[RegNo] == LI.first) // If this really is a vector reg. 313 UsedRegMask &= ~(1 << (31-RegNo)); // Doesn't need to be marked. 314 } 315 316 // Live out registers appear as use operands on return instructions. 317 for (MachineFunction::const_iterator BI = MF->begin(), BE = MF->end(); 318 UsedRegMask != 0 && BI != BE; ++BI) { 319 const MachineBasicBlock &MBB = *BI; 320 if (!MBB.isReturnBlock()) 321 continue; 322 const MachineInstr &Ret = MBB.back(); 323 for (unsigned I = 0, E = Ret.getNumOperands(); I != E; ++I) { 324 const MachineOperand &MO = Ret.getOperand(I); 325 if (!MO.isReg() || !PPC::VRRCRegClass.contains(MO.getReg())) 326 continue; 327 unsigned RegNo = TRI->getEncodingValue(MO.getReg()); 328 UsedRegMask &= ~(1 << (31-RegNo)); 329 } 330 } 331 332 // If no registers are used, turn this into a copy. 333 if (UsedRegMask == 0) { 334 // Remove all VRSAVE code. 335 RemoveVRSaveCode(MI); 336 return; 337 } 338 339 Register SrcReg = MI.getOperand(1).getReg(); 340 Register DstReg = MI.getOperand(0).getReg(); 341 342 if ((UsedRegMask & 0xFFFF) == UsedRegMask) { 343 if (DstReg != SrcReg) 344 BuildMI(*MI.getParent(), MI, dl, TII.get(PPC::ORI), DstReg) 345 .addReg(SrcReg) 346 .addImm(UsedRegMask); 347 else 348 BuildMI(*MI.getParent(), MI, dl, TII.get(PPC::ORI), DstReg) 349 .addReg(SrcReg, RegState::Kill) 350 .addImm(UsedRegMask); 351 } else if ((UsedRegMask & 0xFFFF0000) == UsedRegMask) { 352 if (DstReg != SrcReg) 353 BuildMI(*MI.getParent(), MI, dl, TII.get(PPC::ORIS), DstReg) 354 .addReg(SrcReg) 355 .addImm(UsedRegMask >> 16); 356 else 357 BuildMI(*MI.getParent(), MI, dl, TII.get(PPC::ORIS), DstReg) 358 .addReg(SrcReg, RegState::Kill) 359 .addImm(UsedRegMask >> 16); 360 } else { 361 if (DstReg != SrcReg) 362 BuildMI(*MI.getParent(), MI, dl, TII.get(PPC::ORIS), DstReg) 363 .addReg(SrcReg) 364 .addImm(UsedRegMask >> 16); 365 else 366 BuildMI(*MI.getParent(), MI, dl, TII.get(PPC::ORIS), DstReg) 367 .addReg(SrcReg, RegState::Kill) 368 .addImm(UsedRegMask >> 16); 369 370 BuildMI(*MI.getParent(), MI, dl, TII.get(PPC::ORI), DstReg) 371 .addReg(DstReg, RegState::Kill) 372 .addImm(UsedRegMask & 0xFFFF); 373 } 374 375 // Remove the old UPDATE_VRSAVE instruction. 376 MI.eraseFromParent(); 377 } 378 379 static bool spillsCR(const MachineFunction &MF) { 380 const PPCFunctionInfo *FuncInfo = MF.getInfo<PPCFunctionInfo>(); 381 return FuncInfo->isCRSpilled(); 382 } 383 384 static bool spillsVRSAVE(const MachineFunction &MF) { 385 const PPCFunctionInfo *FuncInfo = MF.getInfo<PPCFunctionInfo>(); 386 return FuncInfo->isVRSAVESpilled(); 387 } 388 389 static bool hasSpills(const MachineFunction &MF) { 390 const PPCFunctionInfo *FuncInfo = MF.getInfo<PPCFunctionInfo>(); 391 return FuncInfo->hasSpills(); 392 } 393 394 static bool hasNonRISpills(const MachineFunction &MF) { 395 const PPCFunctionInfo *FuncInfo = MF.getInfo<PPCFunctionInfo>(); 396 return FuncInfo->hasNonRISpills(); 397 } 398 399 /// MustSaveLR - Return true if this function requires that we save the LR 400 /// register onto the stack in the prolog and restore it in the epilog of the 401 /// function. 402 static bool MustSaveLR(const MachineFunction &MF, unsigned LR) { 403 const PPCFunctionInfo *MFI = MF.getInfo<PPCFunctionInfo>(); 404 405 // We need a save/restore of LR if there is any def of LR (which is 406 // defined by calls, including the PIC setup sequence), or if there is 407 // some use of the LR stack slot (e.g. for builtin_return_address). 408 // (LR comes in 32 and 64 bit versions.) 409 MachineRegisterInfo::def_iterator RI = MF.getRegInfo().def_begin(LR); 410 return RI !=MF.getRegInfo().def_end() || MFI->isLRStoreRequired(); 411 } 412 413 /// determineFrameLayoutAndUpdate - Determine the size of the frame and maximum 414 /// call frame size. Update the MachineFunction object with the stack size. 415 unsigned 416 PPCFrameLowering::determineFrameLayoutAndUpdate(MachineFunction &MF, 417 bool UseEstimate) const { 418 unsigned NewMaxCallFrameSize = 0; 419 unsigned FrameSize = determineFrameLayout(MF, UseEstimate, 420 &NewMaxCallFrameSize); 421 MF.getFrameInfo().setStackSize(FrameSize); 422 MF.getFrameInfo().setMaxCallFrameSize(NewMaxCallFrameSize); 423 return FrameSize; 424 } 425 426 /// determineFrameLayout - Determine the size of the frame and maximum call 427 /// frame size. 428 unsigned 429 PPCFrameLowering::determineFrameLayout(const MachineFunction &MF, 430 bool UseEstimate, 431 unsigned *NewMaxCallFrameSize) const { 432 const MachineFrameInfo &MFI = MF.getFrameInfo(); 433 const PPCFunctionInfo *FI = MF.getInfo<PPCFunctionInfo>(); 434 435 // Get the number of bytes to allocate from the FrameInfo 436 unsigned FrameSize = 437 UseEstimate ? MFI.estimateStackSize(MF) : MFI.getStackSize(); 438 439 // Get stack alignments. The frame must be aligned to the greatest of these: 440 Align TargetAlign = getStackAlign(); // alignment required per the ABI 441 Align MaxAlign = MFI.getMaxAlign(); // algmt required by data in frame 442 Align Alignment = std::max(TargetAlign, MaxAlign); 443 444 const PPCRegisterInfo *RegInfo = Subtarget.getRegisterInfo(); 445 446 unsigned LR = RegInfo->getRARegister(); 447 bool DisableRedZone = MF.getFunction().hasFnAttribute(Attribute::NoRedZone); 448 bool CanUseRedZone = !MFI.hasVarSizedObjects() && // No dynamic alloca. 449 !MFI.adjustsStack() && // No calls. 450 !MustSaveLR(MF, LR) && // No need to save LR. 451 !FI->mustSaveTOC() && // No need to save TOC. 452 !RegInfo->hasBasePointer(MF); // No special alignment. 453 454 // Note: for PPC32 SVR4ABI (Non-DarwinABI), we can still generate stackless 455 // code if all local vars are reg-allocated. 456 bool FitsInRedZone = FrameSize <= Subtarget.getRedZoneSize(); 457 458 // Check whether we can skip adjusting the stack pointer (by using red zone) 459 if (!DisableRedZone && CanUseRedZone && FitsInRedZone) { 460 // No need for frame 461 return 0; 462 } 463 464 // Get the maximum call frame size of all the calls. 465 unsigned maxCallFrameSize = MFI.getMaxCallFrameSize(); 466 467 // Maximum call frame needs to be at least big enough for linkage area. 468 unsigned minCallFrameSize = getLinkageSize(); 469 maxCallFrameSize = std::max(maxCallFrameSize, minCallFrameSize); 470 471 // If we have dynamic alloca then maxCallFrameSize needs to be aligned so 472 // that allocations will be aligned. 473 if (MFI.hasVarSizedObjects()) 474 maxCallFrameSize = alignTo(maxCallFrameSize, Alignment); 475 476 // Update the new max call frame size if the caller passes in a valid pointer. 477 if (NewMaxCallFrameSize) 478 *NewMaxCallFrameSize = maxCallFrameSize; 479 480 // Include call frame size in total. 481 FrameSize += maxCallFrameSize; 482 483 // Make sure the frame is aligned. 484 FrameSize = alignTo(FrameSize, Alignment); 485 486 return FrameSize; 487 } 488 489 // hasFP - Return true if the specified function actually has a dedicated frame 490 // pointer register. 491 bool PPCFrameLowering::hasFP(const MachineFunction &MF) const { 492 const MachineFrameInfo &MFI = MF.getFrameInfo(); 493 // FIXME: This is pretty much broken by design: hasFP() might be called really 494 // early, before the stack layout was calculated and thus hasFP() might return 495 // true or false here depending on the time of call. 496 return (MFI.getStackSize()) && needsFP(MF); 497 } 498 499 // needsFP - Return true if the specified function should have a dedicated frame 500 // pointer register. This is true if the function has variable sized allocas or 501 // if frame pointer elimination is disabled. 502 bool PPCFrameLowering::needsFP(const MachineFunction &MF) const { 503 const MachineFrameInfo &MFI = MF.getFrameInfo(); 504 505 // Naked functions have no stack frame pushed, so we don't have a frame 506 // pointer. 507 if (MF.getFunction().hasFnAttribute(Attribute::Naked)) 508 return false; 509 510 return MF.getTarget().Options.DisableFramePointerElim(MF) || 511 MFI.hasVarSizedObjects() || MFI.hasStackMap() || MFI.hasPatchPoint() || 512 (MF.getTarget().Options.GuaranteedTailCallOpt && 513 MF.getInfo<PPCFunctionInfo>()->hasFastCall()); 514 } 515 516 void PPCFrameLowering::replaceFPWithRealFP(MachineFunction &MF) const { 517 bool is31 = needsFP(MF); 518 unsigned FPReg = is31 ? PPC::R31 : PPC::R1; 519 unsigned FP8Reg = is31 ? PPC::X31 : PPC::X1; 520 521 const PPCRegisterInfo *RegInfo = Subtarget.getRegisterInfo(); 522 bool HasBP = RegInfo->hasBasePointer(MF); 523 unsigned BPReg = HasBP ? (unsigned) RegInfo->getBaseRegister(MF) : FPReg; 524 unsigned BP8Reg = HasBP ? (unsigned) PPC::X30 : FP8Reg; 525 526 for (MachineFunction::iterator BI = MF.begin(), BE = MF.end(); 527 BI != BE; ++BI) 528 for (MachineBasicBlock::iterator MBBI = BI->end(); MBBI != BI->begin(); ) { 529 --MBBI; 530 for (unsigned I = 0, E = MBBI->getNumOperands(); I != E; ++I) { 531 MachineOperand &MO = MBBI->getOperand(I); 532 if (!MO.isReg()) 533 continue; 534 535 switch (MO.getReg()) { 536 case PPC::FP: 537 MO.setReg(FPReg); 538 break; 539 case PPC::FP8: 540 MO.setReg(FP8Reg); 541 break; 542 case PPC::BP: 543 MO.setReg(BPReg); 544 break; 545 case PPC::BP8: 546 MO.setReg(BP8Reg); 547 break; 548 549 } 550 } 551 } 552 } 553 554 /* This function will do the following: 555 - If MBB is an entry or exit block, set SR1 and SR2 to R0 and R12 556 respectively (defaults recommended by the ABI) and return true 557 - If MBB is not an entry block, initialize the register scavenger and look 558 for available registers. 559 - If the defaults (R0/R12) are available, return true 560 - If TwoUniqueRegsRequired is set to true, it looks for two unique 561 registers. Otherwise, look for a single available register. 562 - If the required registers are found, set SR1 and SR2 and return true. 563 - If the required registers are not found, set SR2 or both SR1 and SR2 to 564 PPC::NoRegister and return false. 565 566 Note that if both SR1 and SR2 are valid parameters and TwoUniqueRegsRequired 567 is not set, this function will attempt to find two different registers, but 568 still return true if only one register is available (and set SR1 == SR2). 569 */ 570 bool 571 PPCFrameLowering::findScratchRegister(MachineBasicBlock *MBB, 572 bool UseAtEnd, 573 bool TwoUniqueRegsRequired, 574 unsigned *SR1, 575 unsigned *SR2) const { 576 RegScavenger RS; 577 unsigned R0 = Subtarget.isPPC64() ? PPC::X0 : PPC::R0; 578 unsigned R12 = Subtarget.isPPC64() ? PPC::X12 : PPC::R12; 579 580 // Set the defaults for the two scratch registers. 581 if (SR1) 582 *SR1 = R0; 583 584 if (SR2) { 585 assert (SR1 && "Asking for the second scratch register but not the first?"); 586 *SR2 = R12; 587 } 588 589 // If MBB is an entry or exit block, use R0 and R12 as the scratch registers. 590 if ((UseAtEnd && MBB->isReturnBlock()) || 591 (!UseAtEnd && (&MBB->getParent()->front() == MBB))) 592 return true; 593 594 RS.enterBasicBlock(*MBB); 595 596 if (UseAtEnd && !MBB->empty()) { 597 // The scratch register will be used at the end of the block, so must 598 // consider all registers used within the block 599 600 MachineBasicBlock::iterator MBBI = MBB->getFirstTerminator(); 601 // If no terminator, back iterator up to previous instruction. 602 if (MBBI == MBB->end()) 603 MBBI = std::prev(MBBI); 604 605 if (MBBI != MBB->begin()) 606 RS.forward(MBBI); 607 } 608 609 // If the two registers are available, we're all good. 610 // Note that we only return here if both R0 and R12 are available because 611 // although the function may not require two unique registers, it may benefit 612 // from having two so we should try to provide them. 613 if (!RS.isRegUsed(R0) && !RS.isRegUsed(R12)) 614 return true; 615 616 // Get the list of callee-saved registers for the target. 617 const PPCRegisterInfo *RegInfo = Subtarget.getRegisterInfo(); 618 const MCPhysReg *CSRegs = RegInfo->getCalleeSavedRegs(MBB->getParent()); 619 620 // Get all the available registers in the block. 621 BitVector BV = RS.getRegsAvailable(Subtarget.isPPC64() ? &PPC::G8RCRegClass : 622 &PPC::GPRCRegClass); 623 624 // We shouldn't use callee-saved registers as scratch registers as they may be 625 // available when looking for a candidate block for shrink wrapping but not 626 // available when the actual prologue/epilogue is being emitted because they 627 // were added as live-in to the prologue block by PrologueEpilogueInserter. 628 for (int i = 0; CSRegs[i]; ++i) 629 BV.reset(CSRegs[i]); 630 631 // Set the first scratch register to the first available one. 632 if (SR1) { 633 int FirstScratchReg = BV.find_first(); 634 *SR1 = FirstScratchReg == -1 ? (unsigned)PPC::NoRegister : FirstScratchReg; 635 } 636 637 // If there is another one available, set the second scratch register to that. 638 // Otherwise, set it to either PPC::NoRegister if this function requires two 639 // or to whatever SR1 is set to if this function doesn't require two. 640 if (SR2) { 641 int SecondScratchReg = BV.find_next(*SR1); 642 if (SecondScratchReg != -1) 643 *SR2 = SecondScratchReg; 644 else 645 *SR2 = TwoUniqueRegsRequired ? (unsigned)PPC::NoRegister : *SR1; 646 } 647 648 // Now that we've done our best to provide both registers, double check 649 // whether we were unable to provide enough. 650 if (BV.count() < (TwoUniqueRegsRequired ? 2U : 1U)) 651 return false; 652 653 return true; 654 } 655 656 // We need a scratch register for spilling LR and for spilling CR. By default, 657 // we use two scratch registers to hide latency. However, if only one scratch 658 // register is available, we can adjust for that by not overlapping the spill 659 // code. However, if we need to realign the stack (i.e. have a base pointer) 660 // and the stack frame is large, we need two scratch registers. 661 bool 662 PPCFrameLowering::twoUniqueScratchRegsRequired(MachineBasicBlock *MBB) const { 663 const PPCRegisterInfo *RegInfo = Subtarget.getRegisterInfo(); 664 MachineFunction &MF = *(MBB->getParent()); 665 bool HasBP = RegInfo->hasBasePointer(MF); 666 unsigned FrameSize = determineFrameLayout(MF); 667 int NegFrameSize = -FrameSize; 668 bool IsLargeFrame = !isInt<16>(NegFrameSize); 669 MachineFrameInfo &MFI = MF.getFrameInfo(); 670 Align MaxAlign = MFI.getMaxAlign(); 671 bool HasRedZone = Subtarget.isPPC64() || !Subtarget.isSVR4ABI(); 672 673 return (IsLargeFrame || !HasRedZone) && HasBP && MaxAlign > 1; 674 } 675 676 bool PPCFrameLowering::canUseAsPrologue(const MachineBasicBlock &MBB) const { 677 MachineBasicBlock *TmpMBB = const_cast<MachineBasicBlock *>(&MBB); 678 679 return findScratchRegister(TmpMBB, false, 680 twoUniqueScratchRegsRequired(TmpMBB)); 681 } 682 683 bool PPCFrameLowering::canUseAsEpilogue(const MachineBasicBlock &MBB) const { 684 MachineBasicBlock *TmpMBB = const_cast<MachineBasicBlock *>(&MBB); 685 686 return findScratchRegister(TmpMBB, true); 687 } 688 689 bool PPCFrameLowering::stackUpdateCanBeMoved(MachineFunction &MF) const { 690 const PPCRegisterInfo *RegInfo = Subtarget.getRegisterInfo(); 691 PPCFunctionInfo *FI = MF.getInfo<PPCFunctionInfo>(); 692 693 // Abort if there is no register info or function info. 694 if (!RegInfo || !FI) 695 return false; 696 697 // Only move the stack update on ELFv2 ABI and PPC64. 698 if (!Subtarget.isELFv2ABI() || !Subtarget.isPPC64()) 699 return false; 700 701 // Check the frame size first and return false if it does not fit the 702 // requirements. 703 // We need a non-zero frame size as well as a frame that will fit in the red 704 // zone. This is because by moving the stack pointer update we are now storing 705 // to the red zone until the stack pointer is updated. If we get an interrupt 706 // inside the prologue but before the stack update we now have a number of 707 // stores to the red zone and those stores must all fit. 708 MachineFrameInfo &MFI = MF.getFrameInfo(); 709 unsigned FrameSize = MFI.getStackSize(); 710 if (!FrameSize || FrameSize > Subtarget.getRedZoneSize()) 711 return false; 712 713 // Frame pointers and base pointers complicate matters so don't do anything 714 // if we have them. For example having a frame pointer will sometimes require 715 // a copy of r1 into r31 and that makes keeping track of updates to r1 more 716 // difficult. 717 if (hasFP(MF) || RegInfo->hasBasePointer(MF)) 718 return false; 719 720 // Calls to fast_cc functions use different rules for passing parameters on 721 // the stack from the ABI and using PIC base in the function imposes 722 // similar restrictions to using the base pointer. It is not generally safe 723 // to move the stack pointer update in these situations. 724 if (FI->hasFastCall() || FI->usesPICBase()) 725 return false; 726 727 // Finally we can move the stack update if we do not require register 728 // scavenging. Register scavenging can introduce more spills and so 729 // may make the frame size larger than we have computed. 730 return !RegInfo->requiresFrameIndexScavenging(MF); 731 } 732 733 void PPCFrameLowering::emitPrologue(MachineFunction &MF, 734 MachineBasicBlock &MBB) const { 735 MachineBasicBlock::iterator MBBI = MBB.begin(); 736 MachineFrameInfo &MFI = MF.getFrameInfo(); 737 const PPCInstrInfo &TII = *Subtarget.getInstrInfo(); 738 const PPCRegisterInfo *RegInfo = Subtarget.getRegisterInfo(); 739 740 MachineModuleInfo &MMI = MF.getMMI(); 741 const MCRegisterInfo *MRI = MMI.getContext().getRegisterInfo(); 742 DebugLoc dl; 743 // AIX assembler does not support cfi directives. 744 const bool needsCFI = MF.needsFrameMoves() && !Subtarget.isAIXABI(); 745 746 // Get processor type. 747 bool isPPC64 = Subtarget.isPPC64(); 748 // Get the ABI. 749 bool isSVR4ABI = Subtarget.isSVR4ABI(); 750 bool isAIXABI = Subtarget.isAIXABI(); 751 bool isELFv2ABI = Subtarget.isELFv2ABI(); 752 assert((isSVR4ABI || isAIXABI) && "Unsupported PPC ABI."); 753 754 // Scan the prolog, looking for an UPDATE_VRSAVE instruction. If we find it, 755 // process it. 756 if (!isSVR4ABI) 757 for (unsigned i = 0; MBBI != MBB.end(); ++i, ++MBBI) { 758 if (MBBI->getOpcode() == PPC::UPDATE_VRSAVE) { 759 if (isAIXABI) 760 report_fatal_error("UPDATE_VRSAVE is unexpected on AIX."); 761 HandleVRSaveUpdate(*MBBI, TII); 762 break; 763 } 764 } 765 766 // Move MBBI back to the beginning of the prologue block. 767 MBBI = MBB.begin(); 768 769 // Work out frame sizes. 770 unsigned FrameSize = determineFrameLayoutAndUpdate(MF); 771 int NegFrameSize = -FrameSize; 772 if (!isInt<32>(NegFrameSize)) 773 llvm_unreachable("Unhandled stack size!"); 774 775 if (MFI.isFrameAddressTaken()) 776 replaceFPWithRealFP(MF); 777 778 // Check if the link register (LR) must be saved. 779 PPCFunctionInfo *FI = MF.getInfo<PPCFunctionInfo>(); 780 bool MustSaveLR = FI->mustSaveLR(); 781 bool MustSaveTOC = FI->mustSaveTOC(); 782 const SmallVectorImpl<unsigned> &MustSaveCRs = FI->getMustSaveCRs(); 783 bool MustSaveCR = !MustSaveCRs.empty(); 784 // Do we have a frame pointer and/or base pointer for this function? 785 bool HasFP = hasFP(MF); 786 bool HasBP = RegInfo->hasBasePointer(MF); 787 bool HasRedZone = isPPC64 || !isSVR4ABI; 788 789 unsigned SPReg = isPPC64 ? PPC::X1 : PPC::R1; 790 Register BPReg = RegInfo->getBaseRegister(MF); 791 unsigned FPReg = isPPC64 ? PPC::X31 : PPC::R31; 792 unsigned LRReg = isPPC64 ? PPC::LR8 : PPC::LR; 793 unsigned TOCReg = isPPC64 ? PPC::X2 : PPC::R2; 794 unsigned ScratchReg = 0; 795 unsigned TempReg = isPPC64 ? PPC::X12 : PPC::R12; // another scratch reg 796 // ...(R12/X12 is volatile in both Darwin & SVR4, & can't be a function arg.) 797 const MCInstrDesc& MFLRInst = TII.get(isPPC64 ? PPC::MFLR8 798 : PPC::MFLR ); 799 const MCInstrDesc& StoreInst = TII.get(isPPC64 ? PPC::STD 800 : PPC::STW ); 801 const MCInstrDesc& StoreUpdtInst = TII.get(isPPC64 ? PPC::STDU 802 : PPC::STWU ); 803 const MCInstrDesc& StoreUpdtIdxInst = TII.get(isPPC64 ? PPC::STDUX 804 : PPC::STWUX); 805 const MCInstrDesc& LoadImmShiftedInst = TII.get(isPPC64 ? PPC::LIS8 806 : PPC::LIS ); 807 const MCInstrDesc& OrImmInst = TII.get(isPPC64 ? PPC::ORI8 808 : PPC::ORI ); 809 const MCInstrDesc& OrInst = TII.get(isPPC64 ? PPC::OR8 810 : PPC::OR ); 811 const MCInstrDesc& SubtractCarryingInst = TII.get(isPPC64 ? PPC::SUBFC8 812 : PPC::SUBFC); 813 const MCInstrDesc& SubtractImmCarryingInst = TII.get(isPPC64 ? PPC::SUBFIC8 814 : PPC::SUBFIC); 815 const MCInstrDesc &MoveFromCondRegInst = TII.get(isPPC64 ? PPC::MFCR8 816 : PPC::MFCR); 817 const MCInstrDesc &StoreWordInst = TII.get(isPPC64 ? PPC::STW8 : PPC::STW); 818 819 // Regarding this assert: Even though LR is saved in the caller's frame (i.e., 820 // LROffset is positive), that slot is callee-owned. Because PPC32 SVR4 has no 821 // Red Zone, an asynchronous event (a form of "callee") could claim a frame & 822 // overwrite it, so PPC32 SVR4 must claim at least a minimal frame to save LR. 823 assert((isPPC64 || !isSVR4ABI || !(!FrameSize && (MustSaveLR || HasFP))) && 824 "FrameSize must be >0 to save/restore the FP or LR for 32-bit SVR4."); 825 826 // Using the same bool variable as below to suppress compiler warnings. 827 bool SingleScratchReg = 828 findScratchRegister(&MBB, false, twoUniqueScratchRegsRequired(&MBB), 829 &ScratchReg, &TempReg); 830 assert(SingleScratchReg && 831 "Required number of registers not available in this block"); 832 833 SingleScratchReg = ScratchReg == TempReg; 834 835 int LROffset = getReturnSaveOffset(); 836 837 int FPOffset = 0; 838 if (HasFP) { 839 if (isSVR4ABI) { 840 MachineFrameInfo &MFI = MF.getFrameInfo(); 841 int FPIndex = FI->getFramePointerSaveIndex(); 842 assert(FPIndex && "No Frame Pointer Save Slot!"); 843 FPOffset = MFI.getObjectOffset(FPIndex); 844 } else { 845 FPOffset = getFramePointerSaveOffset(); 846 } 847 } 848 849 int BPOffset = 0; 850 if (HasBP) { 851 if (isSVR4ABI) { 852 MachineFrameInfo &MFI = MF.getFrameInfo(); 853 int BPIndex = FI->getBasePointerSaveIndex(); 854 assert(BPIndex && "No Base Pointer Save Slot!"); 855 BPOffset = MFI.getObjectOffset(BPIndex); 856 } else { 857 BPOffset = getBasePointerSaveOffset(); 858 } 859 } 860 861 int PBPOffset = 0; 862 if (FI->usesPICBase()) { 863 MachineFrameInfo &MFI = MF.getFrameInfo(); 864 int PBPIndex = FI->getPICBasePointerSaveIndex(); 865 assert(PBPIndex && "No PIC Base Pointer Save Slot!"); 866 PBPOffset = MFI.getObjectOffset(PBPIndex); 867 } 868 869 // Get stack alignments. 870 Align MaxAlign = MFI.getMaxAlign(); 871 if (HasBP && MaxAlign > 1) 872 assert(Log2(MaxAlign) < 16 && "Invalid alignment!"); 873 874 // Frames of 32KB & larger require special handling because they cannot be 875 // indexed into with a simple STDU/STWU/STD/STW immediate offset operand. 876 bool isLargeFrame = !isInt<16>(NegFrameSize); 877 878 // Check if we can move the stack update instruction (stdu) down the prologue 879 // past the callee saves. Hopefully this will avoid the situation where the 880 // saves are waiting for the update on the store with update to complete. 881 MachineBasicBlock::iterator StackUpdateLoc = MBBI; 882 bool MovingStackUpdateDown = false; 883 884 // Check if we can move the stack update. 885 if (stackUpdateCanBeMoved(MF)) { 886 const std::vector<CalleeSavedInfo> &Info = MFI.getCalleeSavedInfo(); 887 for (CalleeSavedInfo CSI : Info) { 888 int FrIdx = CSI.getFrameIdx(); 889 // If the frame index is not negative the callee saved info belongs to a 890 // stack object that is not a fixed stack object. We ignore non-fixed 891 // stack objects because we won't move the stack update pointer past them. 892 if (FrIdx >= 0) 893 continue; 894 895 if (MFI.isFixedObjectIndex(FrIdx) && MFI.getObjectOffset(FrIdx) < 0) { 896 StackUpdateLoc++; 897 MovingStackUpdateDown = true; 898 } else { 899 // We need all of the Frame Indices to meet these conditions. 900 // If they do not, abort the whole operation. 901 StackUpdateLoc = MBBI; 902 MovingStackUpdateDown = false; 903 break; 904 } 905 } 906 907 // If the operation was not aborted then update the object offset. 908 if (MovingStackUpdateDown) { 909 for (CalleeSavedInfo CSI : Info) { 910 int FrIdx = CSI.getFrameIdx(); 911 if (FrIdx < 0) 912 MFI.setObjectOffset(FrIdx, MFI.getObjectOffset(FrIdx) + NegFrameSize); 913 } 914 } 915 } 916 917 // Where in the prologue we move the CR fields depends on how many scratch 918 // registers we have, and if we need to save the link register or not. This 919 // lambda is to avoid duplicating the logic in 2 places. 920 auto BuildMoveFromCR = [&]() { 921 if (isELFv2ABI && MustSaveCRs.size() == 1) { 922 // In the ELFv2 ABI, we are not required to save all CR fields. 923 // If only one CR field is clobbered, it is more efficient to use 924 // mfocrf to selectively save just that field, because mfocrf has short 925 // latency compares to mfcr. 926 assert(isPPC64 && "V2 ABI is 64-bit only."); 927 MachineInstrBuilder MIB = 928 BuildMI(MBB, MBBI, dl, TII.get(PPC::MFOCRF8), TempReg); 929 MIB.addReg(MustSaveCRs[0], RegState::Kill); 930 } else { 931 MachineInstrBuilder MIB = 932 BuildMI(MBB, MBBI, dl, MoveFromCondRegInst, TempReg); 933 for (unsigned CRfield : MustSaveCRs) 934 MIB.addReg(CRfield, RegState::ImplicitKill); 935 } 936 }; 937 938 // If we need to spill the CR and the LR but we don't have two separate 939 // registers available, we must spill them one at a time 940 if (MustSaveCR && SingleScratchReg && MustSaveLR) { 941 BuildMoveFromCR(); 942 BuildMI(MBB, MBBI, dl, StoreWordInst) 943 .addReg(TempReg, getKillRegState(true)) 944 .addImm(CRSaveOffset) 945 .addReg(SPReg); 946 } 947 948 if (MustSaveLR) 949 BuildMI(MBB, MBBI, dl, MFLRInst, ScratchReg); 950 951 if (MustSaveCR && !(SingleScratchReg && MustSaveLR)) 952 BuildMoveFromCR(); 953 954 if (HasRedZone) { 955 if (HasFP) 956 BuildMI(MBB, MBBI, dl, StoreInst) 957 .addReg(FPReg) 958 .addImm(FPOffset) 959 .addReg(SPReg); 960 if (FI->usesPICBase()) 961 BuildMI(MBB, MBBI, dl, StoreInst) 962 .addReg(PPC::R30) 963 .addImm(PBPOffset) 964 .addReg(SPReg); 965 if (HasBP) 966 BuildMI(MBB, MBBI, dl, StoreInst) 967 .addReg(BPReg) 968 .addImm(BPOffset) 969 .addReg(SPReg); 970 } 971 972 if (MustSaveLR) 973 BuildMI(MBB, StackUpdateLoc, dl, StoreInst) 974 .addReg(ScratchReg, getKillRegState(true)) 975 .addImm(LROffset) 976 .addReg(SPReg); 977 978 if (MustSaveCR && 979 !(SingleScratchReg && MustSaveLR)) { 980 assert(HasRedZone && "A red zone is always available on PPC64"); 981 BuildMI(MBB, MBBI, dl, StoreWordInst) 982 .addReg(TempReg, getKillRegState(true)) 983 .addImm(CRSaveOffset) 984 .addReg(SPReg); 985 } 986 987 // Skip the rest if this is a leaf function & all spills fit in the Red Zone. 988 if (!FrameSize) 989 return; 990 991 // Adjust stack pointer: r1 += NegFrameSize. 992 // If there is a preferred stack alignment, align R1 now 993 994 if (HasBP && HasRedZone) { 995 // Save a copy of r1 as the base pointer. 996 BuildMI(MBB, MBBI, dl, OrInst, BPReg) 997 .addReg(SPReg) 998 .addReg(SPReg); 999 } 1000 1001 // Have we generated a STUX instruction to claim stack frame? If so, 1002 // the negated frame size will be placed in ScratchReg. 1003 bool HasSTUX = false; 1004 1005 // This condition must be kept in sync with canUseAsPrologue. 1006 if (HasBP && MaxAlign > 1) { 1007 if (isPPC64) 1008 BuildMI(MBB, MBBI, dl, TII.get(PPC::RLDICL), ScratchReg) 1009 .addReg(SPReg) 1010 .addImm(0) 1011 .addImm(64 - Log2(MaxAlign)); 1012 else // PPC32... 1013 BuildMI(MBB, MBBI, dl, TII.get(PPC::RLWINM), ScratchReg) 1014 .addReg(SPReg) 1015 .addImm(0) 1016 .addImm(32 - Log2(MaxAlign)) 1017 .addImm(31); 1018 if (!isLargeFrame) { 1019 BuildMI(MBB, MBBI, dl, SubtractImmCarryingInst, ScratchReg) 1020 .addReg(ScratchReg, RegState::Kill) 1021 .addImm(NegFrameSize); 1022 } else { 1023 assert(!SingleScratchReg && "Only a single scratch reg available"); 1024 BuildMI(MBB, MBBI, dl, LoadImmShiftedInst, TempReg) 1025 .addImm(NegFrameSize >> 16); 1026 BuildMI(MBB, MBBI, dl, OrImmInst, TempReg) 1027 .addReg(TempReg, RegState::Kill) 1028 .addImm(NegFrameSize & 0xFFFF); 1029 BuildMI(MBB, MBBI, dl, SubtractCarryingInst, ScratchReg) 1030 .addReg(ScratchReg, RegState::Kill) 1031 .addReg(TempReg, RegState::Kill); 1032 } 1033 1034 BuildMI(MBB, MBBI, dl, StoreUpdtIdxInst, SPReg) 1035 .addReg(SPReg, RegState::Kill) 1036 .addReg(SPReg) 1037 .addReg(ScratchReg); 1038 HasSTUX = true; 1039 1040 } else if (!isLargeFrame) { 1041 BuildMI(MBB, StackUpdateLoc, dl, StoreUpdtInst, SPReg) 1042 .addReg(SPReg) 1043 .addImm(NegFrameSize) 1044 .addReg(SPReg); 1045 1046 } else { 1047 BuildMI(MBB, MBBI, dl, LoadImmShiftedInst, ScratchReg) 1048 .addImm(NegFrameSize >> 16); 1049 BuildMI(MBB, MBBI, dl, OrImmInst, ScratchReg) 1050 .addReg(ScratchReg, RegState::Kill) 1051 .addImm(NegFrameSize & 0xFFFF); 1052 BuildMI(MBB, MBBI, dl, StoreUpdtIdxInst, SPReg) 1053 .addReg(SPReg, RegState::Kill) 1054 .addReg(SPReg) 1055 .addReg(ScratchReg); 1056 HasSTUX = true; 1057 } 1058 1059 // Save the TOC register after the stack pointer update if a prologue TOC 1060 // save is required for the function. 1061 if (MustSaveTOC) { 1062 assert(isELFv2ABI && "TOC saves in the prologue only supported on ELFv2"); 1063 BuildMI(MBB, StackUpdateLoc, dl, TII.get(PPC::STD)) 1064 .addReg(TOCReg, getKillRegState(true)) 1065 .addImm(TOCSaveOffset) 1066 .addReg(SPReg); 1067 } 1068 1069 if (!HasRedZone) { 1070 assert(!isPPC64 && "A red zone is always available on PPC64"); 1071 if (HasSTUX) { 1072 // The negated frame size is in ScratchReg, and the SPReg has been 1073 // decremented by the frame size: SPReg = old SPReg + ScratchReg. 1074 // Since FPOffset, PBPOffset, etc. are relative to the beginning of 1075 // the stack frame (i.e. the old SP), ideally, we would put the old 1076 // SP into a register and use it as the base for the stores. The 1077 // problem is that the only available register may be ScratchReg, 1078 // which could be R0, and R0 cannot be used as a base address. 1079 1080 // First, set ScratchReg to the old SP. This may need to be modified 1081 // later. 1082 BuildMI(MBB, MBBI, dl, TII.get(PPC::SUBF), ScratchReg) 1083 .addReg(ScratchReg, RegState::Kill) 1084 .addReg(SPReg); 1085 1086 if (ScratchReg == PPC::R0) { 1087 // R0 cannot be used as a base register, but it can be used as an 1088 // index in a store-indexed. 1089 int LastOffset = 0; 1090 if (HasFP) { 1091 // R0 += (FPOffset-LastOffset). 1092 // Need addic, since addi treats R0 as 0. 1093 BuildMI(MBB, MBBI, dl, TII.get(PPC::ADDIC), ScratchReg) 1094 .addReg(ScratchReg) 1095 .addImm(FPOffset-LastOffset); 1096 LastOffset = FPOffset; 1097 // Store FP into *R0. 1098 BuildMI(MBB, MBBI, dl, TII.get(PPC::STWX)) 1099 .addReg(FPReg, RegState::Kill) // Save FP. 1100 .addReg(PPC::ZERO) 1101 .addReg(ScratchReg); // This will be the index (R0 is ok here). 1102 } 1103 if (FI->usesPICBase()) { 1104 // R0 += (PBPOffset-LastOffset). 1105 BuildMI(MBB, MBBI, dl, TII.get(PPC::ADDIC), ScratchReg) 1106 .addReg(ScratchReg) 1107 .addImm(PBPOffset-LastOffset); 1108 LastOffset = PBPOffset; 1109 BuildMI(MBB, MBBI, dl, TII.get(PPC::STWX)) 1110 .addReg(PPC::R30, RegState::Kill) // Save PIC base pointer. 1111 .addReg(PPC::ZERO) 1112 .addReg(ScratchReg); // This will be the index (R0 is ok here). 1113 } 1114 if (HasBP) { 1115 // R0 += (BPOffset-LastOffset). 1116 BuildMI(MBB, MBBI, dl, TII.get(PPC::ADDIC), ScratchReg) 1117 .addReg(ScratchReg) 1118 .addImm(BPOffset-LastOffset); 1119 LastOffset = BPOffset; 1120 BuildMI(MBB, MBBI, dl, TII.get(PPC::STWX)) 1121 .addReg(BPReg, RegState::Kill) // Save BP. 1122 .addReg(PPC::ZERO) 1123 .addReg(ScratchReg); // This will be the index (R0 is ok here). 1124 // BP = R0-LastOffset 1125 BuildMI(MBB, MBBI, dl, TII.get(PPC::ADDIC), BPReg) 1126 .addReg(ScratchReg, RegState::Kill) 1127 .addImm(-LastOffset); 1128 } 1129 } else { 1130 // ScratchReg is not R0, so use it as the base register. It is 1131 // already set to the old SP, so we can use the offsets directly. 1132 1133 // Now that the stack frame has been allocated, save all the necessary 1134 // registers using ScratchReg as the base address. 1135 if (HasFP) 1136 BuildMI(MBB, MBBI, dl, StoreInst) 1137 .addReg(FPReg) 1138 .addImm(FPOffset) 1139 .addReg(ScratchReg); 1140 if (FI->usesPICBase()) 1141 BuildMI(MBB, MBBI, dl, StoreInst) 1142 .addReg(PPC::R30) 1143 .addImm(PBPOffset) 1144 .addReg(ScratchReg); 1145 if (HasBP) { 1146 BuildMI(MBB, MBBI, dl, StoreInst) 1147 .addReg(BPReg) 1148 .addImm(BPOffset) 1149 .addReg(ScratchReg); 1150 BuildMI(MBB, MBBI, dl, OrInst, BPReg) 1151 .addReg(ScratchReg, RegState::Kill) 1152 .addReg(ScratchReg); 1153 } 1154 } 1155 } else { 1156 // The frame size is a known 16-bit constant (fitting in the immediate 1157 // field of STWU). To be here we have to be compiling for PPC32. 1158 // Since the SPReg has been decreased by FrameSize, add it back to each 1159 // offset. 1160 if (HasFP) 1161 BuildMI(MBB, MBBI, dl, StoreInst) 1162 .addReg(FPReg) 1163 .addImm(FrameSize + FPOffset) 1164 .addReg(SPReg); 1165 if (FI->usesPICBase()) 1166 BuildMI(MBB, MBBI, dl, StoreInst) 1167 .addReg(PPC::R30) 1168 .addImm(FrameSize + PBPOffset) 1169 .addReg(SPReg); 1170 if (HasBP) { 1171 BuildMI(MBB, MBBI, dl, StoreInst) 1172 .addReg(BPReg) 1173 .addImm(FrameSize + BPOffset) 1174 .addReg(SPReg); 1175 BuildMI(MBB, MBBI, dl, TII.get(PPC::ADDI), BPReg) 1176 .addReg(SPReg) 1177 .addImm(FrameSize); 1178 } 1179 } 1180 } 1181 1182 // Add Call Frame Information for the instructions we generated above. 1183 if (needsCFI) { 1184 unsigned CFIIndex; 1185 1186 if (HasBP) { 1187 // Define CFA in terms of BP. Do this in preference to using FP/SP, 1188 // because if the stack needed aligning then CFA won't be at a fixed 1189 // offset from FP/SP. 1190 unsigned Reg = MRI->getDwarfRegNum(BPReg, true); 1191 CFIIndex = MF.addFrameInst( 1192 MCCFIInstruction::createDefCfaRegister(nullptr, Reg)); 1193 } else { 1194 // Adjust the definition of CFA to account for the change in SP. 1195 assert(NegFrameSize); 1196 CFIIndex = MF.addFrameInst( 1197 MCCFIInstruction::createDefCfaOffset(nullptr, NegFrameSize)); 1198 } 1199 BuildMI(MBB, MBBI, dl, TII.get(TargetOpcode::CFI_INSTRUCTION)) 1200 .addCFIIndex(CFIIndex); 1201 1202 if (HasFP) { 1203 // Describe where FP was saved, at a fixed offset from CFA. 1204 unsigned Reg = MRI->getDwarfRegNum(FPReg, true); 1205 CFIIndex = MF.addFrameInst( 1206 MCCFIInstruction::createOffset(nullptr, Reg, FPOffset)); 1207 BuildMI(MBB, MBBI, dl, TII.get(TargetOpcode::CFI_INSTRUCTION)) 1208 .addCFIIndex(CFIIndex); 1209 } 1210 1211 if (FI->usesPICBase()) { 1212 // Describe where FP was saved, at a fixed offset from CFA. 1213 unsigned Reg = MRI->getDwarfRegNum(PPC::R30, true); 1214 CFIIndex = MF.addFrameInst( 1215 MCCFIInstruction::createOffset(nullptr, Reg, PBPOffset)); 1216 BuildMI(MBB, MBBI, dl, TII.get(TargetOpcode::CFI_INSTRUCTION)) 1217 .addCFIIndex(CFIIndex); 1218 } 1219 1220 if (HasBP) { 1221 // Describe where BP was saved, at a fixed offset from CFA. 1222 unsigned Reg = MRI->getDwarfRegNum(BPReg, true); 1223 CFIIndex = MF.addFrameInst( 1224 MCCFIInstruction::createOffset(nullptr, Reg, BPOffset)); 1225 BuildMI(MBB, MBBI, dl, TII.get(TargetOpcode::CFI_INSTRUCTION)) 1226 .addCFIIndex(CFIIndex); 1227 } 1228 1229 if (MustSaveLR) { 1230 // Describe where LR was saved, at a fixed offset from CFA. 1231 unsigned Reg = MRI->getDwarfRegNum(LRReg, true); 1232 CFIIndex = MF.addFrameInst( 1233 MCCFIInstruction::createOffset(nullptr, Reg, LROffset)); 1234 BuildMI(MBB, MBBI, dl, TII.get(TargetOpcode::CFI_INSTRUCTION)) 1235 .addCFIIndex(CFIIndex); 1236 } 1237 } 1238 1239 // If there is a frame pointer, copy R1 into R31 1240 if (HasFP) { 1241 BuildMI(MBB, MBBI, dl, OrInst, FPReg) 1242 .addReg(SPReg) 1243 .addReg(SPReg); 1244 1245 if (!HasBP && needsCFI) { 1246 // Change the definition of CFA from SP+offset to FP+offset, because SP 1247 // will change at every alloca. 1248 unsigned Reg = MRI->getDwarfRegNum(FPReg, true); 1249 unsigned CFIIndex = MF.addFrameInst( 1250 MCCFIInstruction::createDefCfaRegister(nullptr, Reg)); 1251 1252 BuildMI(MBB, MBBI, dl, TII.get(TargetOpcode::CFI_INSTRUCTION)) 1253 .addCFIIndex(CFIIndex); 1254 } 1255 } 1256 1257 if (needsCFI) { 1258 // Describe where callee saved registers were saved, at fixed offsets from 1259 // CFA. 1260 const std::vector<CalleeSavedInfo> &CSI = MFI.getCalleeSavedInfo(); 1261 for (unsigned I = 0, E = CSI.size(); I != E; ++I) { 1262 unsigned Reg = CSI[I].getReg(); 1263 if (Reg == PPC::LR || Reg == PPC::LR8 || Reg == PPC::RM) continue; 1264 1265 // This is a bit of a hack: CR2LT, CR2GT, CR2EQ and CR2UN are just 1266 // subregisters of CR2. We just need to emit a move of CR2. 1267 if (PPC::CRBITRCRegClass.contains(Reg)) 1268 continue; 1269 1270 if ((Reg == PPC::X2 || Reg == PPC::R2) && MustSaveTOC) 1271 continue; 1272 1273 // For SVR4, don't emit a move for the CR spill slot if we haven't 1274 // spilled CRs. 1275 if (isSVR4ABI && (PPC::CR2 <= Reg && Reg <= PPC::CR4) 1276 && !MustSaveCR) 1277 continue; 1278 1279 // For 64-bit SVR4 when we have spilled CRs, the spill location 1280 // is SP+8, not a frame-relative slot. 1281 if (isSVR4ABI && isPPC64 && (PPC::CR2 <= Reg && Reg <= PPC::CR4)) { 1282 // In the ELFv1 ABI, only CR2 is noted in CFI and stands in for 1283 // the whole CR word. In the ELFv2 ABI, every CR that was 1284 // actually saved gets its own CFI record. 1285 unsigned CRReg = isELFv2ABI? Reg : (unsigned) PPC::CR2; 1286 unsigned CFIIndex = MF.addFrameInst(MCCFIInstruction::createOffset( 1287 nullptr, MRI->getDwarfRegNum(CRReg, true), CRSaveOffset)); 1288 BuildMI(MBB, MBBI, dl, TII.get(TargetOpcode::CFI_INSTRUCTION)) 1289 .addCFIIndex(CFIIndex); 1290 continue; 1291 } 1292 1293 if (CSI[I].isSpilledToReg()) { 1294 unsigned SpilledReg = CSI[I].getDstReg(); 1295 unsigned CFIRegister = MF.addFrameInst(MCCFIInstruction::createRegister( 1296 nullptr, MRI->getDwarfRegNum(Reg, true), 1297 MRI->getDwarfRegNum(SpilledReg, true))); 1298 BuildMI(MBB, MBBI, dl, TII.get(TargetOpcode::CFI_INSTRUCTION)) 1299 .addCFIIndex(CFIRegister); 1300 } else { 1301 int Offset = MFI.getObjectOffset(CSI[I].getFrameIdx()); 1302 // We have changed the object offset above but we do not want to change 1303 // the actual offsets in the CFI instruction so we have to undo the 1304 // offset change here. 1305 if (MovingStackUpdateDown) 1306 Offset -= NegFrameSize; 1307 1308 unsigned CFIIndex = MF.addFrameInst(MCCFIInstruction::createOffset( 1309 nullptr, MRI->getDwarfRegNum(Reg, true), Offset)); 1310 BuildMI(MBB, MBBI, dl, TII.get(TargetOpcode::CFI_INSTRUCTION)) 1311 .addCFIIndex(CFIIndex); 1312 } 1313 } 1314 } 1315 } 1316 1317 void PPCFrameLowering::emitEpilogue(MachineFunction &MF, 1318 MachineBasicBlock &MBB) const { 1319 MachineBasicBlock::iterator MBBI = MBB.getFirstTerminator(); 1320 DebugLoc dl; 1321 1322 if (MBBI != MBB.end()) 1323 dl = MBBI->getDebugLoc(); 1324 1325 const PPCInstrInfo &TII = *Subtarget.getInstrInfo(); 1326 const PPCRegisterInfo *RegInfo = Subtarget.getRegisterInfo(); 1327 1328 // Get alignment info so we know how to restore the SP. 1329 const MachineFrameInfo &MFI = MF.getFrameInfo(); 1330 1331 // Get the number of bytes allocated from the FrameInfo. 1332 int FrameSize = MFI.getStackSize(); 1333 1334 // Get processor type. 1335 bool isPPC64 = Subtarget.isPPC64(); 1336 // Get the ABI. 1337 bool isSVR4ABI = Subtarget.isSVR4ABI(); 1338 1339 // Check if the link register (LR) has been saved. 1340 PPCFunctionInfo *FI = MF.getInfo<PPCFunctionInfo>(); 1341 bool MustSaveLR = FI->mustSaveLR(); 1342 const SmallVectorImpl<unsigned> &MustSaveCRs = FI->getMustSaveCRs(); 1343 bool MustSaveCR = !MustSaveCRs.empty(); 1344 // Do we have a frame pointer and/or base pointer for this function? 1345 bool HasFP = hasFP(MF); 1346 bool HasBP = RegInfo->hasBasePointer(MF); 1347 bool HasRedZone = Subtarget.isPPC64() || !Subtarget.isSVR4ABI(); 1348 1349 unsigned SPReg = isPPC64 ? PPC::X1 : PPC::R1; 1350 Register BPReg = RegInfo->getBaseRegister(MF); 1351 unsigned FPReg = isPPC64 ? PPC::X31 : PPC::R31; 1352 unsigned ScratchReg = 0; 1353 unsigned TempReg = isPPC64 ? PPC::X12 : PPC::R12; // another scratch reg 1354 const MCInstrDesc& MTLRInst = TII.get( isPPC64 ? PPC::MTLR8 1355 : PPC::MTLR ); 1356 const MCInstrDesc& LoadInst = TII.get( isPPC64 ? PPC::LD 1357 : PPC::LWZ ); 1358 const MCInstrDesc& LoadImmShiftedInst = TII.get( isPPC64 ? PPC::LIS8 1359 : PPC::LIS ); 1360 const MCInstrDesc& OrInst = TII.get(isPPC64 ? PPC::OR8 1361 : PPC::OR ); 1362 const MCInstrDesc& OrImmInst = TII.get( isPPC64 ? PPC::ORI8 1363 : PPC::ORI ); 1364 const MCInstrDesc& AddImmInst = TII.get( isPPC64 ? PPC::ADDI8 1365 : PPC::ADDI ); 1366 const MCInstrDesc& AddInst = TII.get( isPPC64 ? PPC::ADD8 1367 : PPC::ADD4 ); 1368 const MCInstrDesc& LoadWordInst = TII.get( isPPC64 ? PPC::LWZ8 1369 : PPC::LWZ); 1370 const MCInstrDesc& MoveToCRInst = TII.get( isPPC64 ? PPC::MTOCRF8 1371 : PPC::MTOCRF); 1372 int LROffset = getReturnSaveOffset(); 1373 1374 int FPOffset = 0; 1375 1376 // Using the same bool variable as below to suppress compiler warnings. 1377 bool SingleScratchReg = findScratchRegister(&MBB, true, false, &ScratchReg, 1378 &TempReg); 1379 assert(SingleScratchReg && 1380 "Could not find an available scratch register"); 1381 1382 SingleScratchReg = ScratchReg == TempReg; 1383 1384 if (HasFP) { 1385 if (isSVR4ABI) { 1386 int FPIndex = FI->getFramePointerSaveIndex(); 1387 assert(FPIndex && "No Frame Pointer Save Slot!"); 1388 FPOffset = MFI.getObjectOffset(FPIndex); 1389 } else { 1390 FPOffset = getFramePointerSaveOffset(); 1391 } 1392 } 1393 1394 int BPOffset = 0; 1395 if (HasBP) { 1396 if (isSVR4ABI) { 1397 int BPIndex = FI->getBasePointerSaveIndex(); 1398 assert(BPIndex && "No Base Pointer Save Slot!"); 1399 BPOffset = MFI.getObjectOffset(BPIndex); 1400 } else { 1401 BPOffset = getBasePointerSaveOffset(); 1402 } 1403 } 1404 1405 int PBPOffset = 0; 1406 if (FI->usesPICBase()) { 1407 int PBPIndex = FI->getPICBasePointerSaveIndex(); 1408 assert(PBPIndex && "No PIC Base Pointer Save Slot!"); 1409 PBPOffset = MFI.getObjectOffset(PBPIndex); 1410 } 1411 1412 bool IsReturnBlock = (MBBI != MBB.end() && MBBI->isReturn()); 1413 1414 if (IsReturnBlock) { 1415 unsigned RetOpcode = MBBI->getOpcode(); 1416 bool UsesTCRet = RetOpcode == PPC::TCRETURNri || 1417 RetOpcode == PPC::TCRETURNdi || 1418 RetOpcode == PPC::TCRETURNai || 1419 RetOpcode == PPC::TCRETURNri8 || 1420 RetOpcode == PPC::TCRETURNdi8 || 1421 RetOpcode == PPC::TCRETURNai8; 1422 1423 if (UsesTCRet) { 1424 int MaxTCRetDelta = FI->getTailCallSPDelta(); 1425 MachineOperand &StackAdjust = MBBI->getOperand(1); 1426 assert(StackAdjust.isImm() && "Expecting immediate value."); 1427 // Adjust stack pointer. 1428 int StackAdj = StackAdjust.getImm(); 1429 int Delta = StackAdj - MaxTCRetDelta; 1430 assert((Delta >= 0) && "Delta must be positive"); 1431 if (MaxTCRetDelta>0) 1432 FrameSize += (StackAdj +Delta); 1433 else 1434 FrameSize += StackAdj; 1435 } 1436 } 1437 1438 // Frames of 32KB & larger require special handling because they cannot be 1439 // indexed into with a simple LD/LWZ immediate offset operand. 1440 bool isLargeFrame = !isInt<16>(FrameSize); 1441 1442 // On targets without red zone, the SP needs to be restored last, so that 1443 // all live contents of the stack frame are upwards of the SP. This means 1444 // that we cannot restore SP just now, since there may be more registers 1445 // to restore from the stack frame (e.g. R31). If the frame size is not 1446 // a simple immediate value, we will need a spare register to hold the 1447 // restored SP. If the frame size is known and small, we can simply adjust 1448 // the offsets of the registers to be restored, and still use SP to restore 1449 // them. In such case, the final update of SP will be to add the frame 1450 // size to it. 1451 // To simplify the code, set RBReg to the base register used to restore 1452 // values from the stack, and set SPAdd to the value that needs to be added 1453 // to the SP at the end. The default values are as if red zone was present. 1454 unsigned RBReg = SPReg; 1455 unsigned SPAdd = 0; 1456 1457 // Check if we can move the stack update instruction up the epilogue 1458 // past the callee saves. This will allow the move to LR instruction 1459 // to be executed before the restores of the callee saves which means 1460 // that the callee saves can hide the latency from the MTLR instrcution. 1461 MachineBasicBlock::iterator StackUpdateLoc = MBBI; 1462 if (stackUpdateCanBeMoved(MF)) { 1463 const std::vector<CalleeSavedInfo> & Info = MFI.getCalleeSavedInfo(); 1464 for (CalleeSavedInfo CSI : Info) { 1465 int FrIdx = CSI.getFrameIdx(); 1466 // If the frame index is not negative the callee saved info belongs to a 1467 // stack object that is not a fixed stack object. We ignore non-fixed 1468 // stack objects because we won't move the update of the stack pointer 1469 // past them. 1470 if (FrIdx >= 0) 1471 continue; 1472 1473 if (MFI.isFixedObjectIndex(FrIdx) && MFI.getObjectOffset(FrIdx) < 0) 1474 StackUpdateLoc--; 1475 else { 1476 // Abort the operation as we can't update all CSR restores. 1477 StackUpdateLoc = MBBI; 1478 break; 1479 } 1480 } 1481 } 1482 1483 if (FrameSize) { 1484 // In the prologue, the loaded (or persistent) stack pointer value is 1485 // offset by the STDU/STDUX/STWU/STWUX instruction. For targets with red 1486 // zone add this offset back now. 1487 1488 // If this function contained a fastcc call and GuaranteedTailCallOpt is 1489 // enabled (=> hasFastCall()==true) the fastcc call might contain a tail 1490 // call which invalidates the stack pointer value in SP(0). So we use the 1491 // value of R31 in this case. 1492 if (FI->hasFastCall()) { 1493 assert(HasFP && "Expecting a valid frame pointer."); 1494 if (!HasRedZone) 1495 RBReg = FPReg; 1496 if (!isLargeFrame) { 1497 BuildMI(MBB, MBBI, dl, AddImmInst, RBReg) 1498 .addReg(FPReg).addImm(FrameSize); 1499 } else { 1500 BuildMI(MBB, MBBI, dl, LoadImmShiftedInst, ScratchReg) 1501 .addImm(FrameSize >> 16); 1502 BuildMI(MBB, MBBI, dl, OrImmInst, ScratchReg) 1503 .addReg(ScratchReg, RegState::Kill) 1504 .addImm(FrameSize & 0xFFFF); 1505 BuildMI(MBB, MBBI, dl, AddInst) 1506 .addReg(RBReg) 1507 .addReg(FPReg) 1508 .addReg(ScratchReg); 1509 } 1510 } else if (!isLargeFrame && !HasBP && !MFI.hasVarSizedObjects()) { 1511 if (HasRedZone) { 1512 BuildMI(MBB, StackUpdateLoc, dl, AddImmInst, SPReg) 1513 .addReg(SPReg) 1514 .addImm(FrameSize); 1515 } else { 1516 // Make sure that adding FrameSize will not overflow the max offset 1517 // size. 1518 assert(FPOffset <= 0 && BPOffset <= 0 && PBPOffset <= 0 && 1519 "Local offsets should be negative"); 1520 SPAdd = FrameSize; 1521 FPOffset += FrameSize; 1522 BPOffset += FrameSize; 1523 PBPOffset += FrameSize; 1524 } 1525 } else { 1526 // We don't want to use ScratchReg as a base register, because it 1527 // could happen to be R0. Use FP instead, but make sure to preserve it. 1528 if (!HasRedZone) { 1529 // If FP is not saved, copy it to ScratchReg. 1530 if (!HasFP) 1531 BuildMI(MBB, MBBI, dl, OrInst, ScratchReg) 1532 .addReg(FPReg) 1533 .addReg(FPReg); 1534 RBReg = FPReg; 1535 } 1536 BuildMI(MBB, StackUpdateLoc, dl, LoadInst, RBReg) 1537 .addImm(0) 1538 .addReg(SPReg); 1539 } 1540 } 1541 assert(RBReg != ScratchReg && "Should have avoided ScratchReg"); 1542 // If there is no red zone, ScratchReg may be needed for holding a useful 1543 // value (although not the base register). Make sure it is not overwritten 1544 // too early. 1545 1546 // If we need to restore both the LR and the CR and we only have one 1547 // available scratch register, we must do them one at a time. 1548 if (MustSaveCR && SingleScratchReg && MustSaveLR) { 1549 // Here TempReg == ScratchReg, and in the absence of red zone ScratchReg 1550 // is live here. 1551 assert(HasRedZone && "Expecting red zone"); 1552 BuildMI(MBB, MBBI, dl, LoadWordInst, TempReg) 1553 .addImm(CRSaveOffset) 1554 .addReg(SPReg); 1555 for (unsigned i = 0, e = MustSaveCRs.size(); i != e; ++i) 1556 BuildMI(MBB, MBBI, dl, MoveToCRInst, MustSaveCRs[i]) 1557 .addReg(TempReg, getKillRegState(i == e-1)); 1558 } 1559 1560 // Delay restoring of the LR if ScratchReg is needed. This is ok, since 1561 // LR is stored in the caller's stack frame. ScratchReg will be needed 1562 // if RBReg is anything other than SP. We shouldn't use ScratchReg as 1563 // a base register anyway, because it may happen to be R0. 1564 bool LoadedLR = false; 1565 if (MustSaveLR && RBReg == SPReg && isInt<16>(LROffset+SPAdd)) { 1566 BuildMI(MBB, StackUpdateLoc, dl, LoadInst, ScratchReg) 1567 .addImm(LROffset+SPAdd) 1568 .addReg(RBReg); 1569 LoadedLR = true; 1570 } 1571 1572 if (MustSaveCR && !(SingleScratchReg && MustSaveLR)) { 1573 assert(RBReg == SPReg && "Should be using SP as a base register"); 1574 BuildMI(MBB, MBBI, dl, LoadWordInst, TempReg) 1575 .addImm(CRSaveOffset) 1576 .addReg(RBReg); 1577 } 1578 1579 if (HasFP) { 1580 // If there is red zone, restore FP directly, since SP has already been 1581 // restored. Otherwise, restore the value of FP into ScratchReg. 1582 if (HasRedZone || RBReg == SPReg) 1583 BuildMI(MBB, MBBI, dl, LoadInst, FPReg) 1584 .addImm(FPOffset) 1585 .addReg(SPReg); 1586 else 1587 BuildMI(MBB, MBBI, dl, LoadInst, ScratchReg) 1588 .addImm(FPOffset) 1589 .addReg(RBReg); 1590 } 1591 1592 if (FI->usesPICBase()) 1593 BuildMI(MBB, MBBI, dl, LoadInst, PPC::R30) 1594 .addImm(PBPOffset) 1595 .addReg(RBReg); 1596 1597 if (HasBP) 1598 BuildMI(MBB, MBBI, dl, LoadInst, BPReg) 1599 .addImm(BPOffset) 1600 .addReg(RBReg); 1601 1602 // There is nothing more to be loaded from the stack, so now we can 1603 // restore SP: SP = RBReg + SPAdd. 1604 if (RBReg != SPReg || SPAdd != 0) { 1605 assert(!HasRedZone && "This should not happen with red zone"); 1606 // If SPAdd is 0, generate a copy. 1607 if (SPAdd == 0) 1608 BuildMI(MBB, MBBI, dl, OrInst, SPReg) 1609 .addReg(RBReg) 1610 .addReg(RBReg); 1611 else 1612 BuildMI(MBB, MBBI, dl, AddImmInst, SPReg) 1613 .addReg(RBReg) 1614 .addImm(SPAdd); 1615 1616 assert(RBReg != ScratchReg && "Should be using FP or SP as base register"); 1617 if (RBReg == FPReg) 1618 BuildMI(MBB, MBBI, dl, OrInst, FPReg) 1619 .addReg(ScratchReg) 1620 .addReg(ScratchReg); 1621 1622 // Now load the LR from the caller's stack frame. 1623 if (MustSaveLR && !LoadedLR) 1624 BuildMI(MBB, MBBI, dl, LoadInst, ScratchReg) 1625 .addImm(LROffset) 1626 .addReg(SPReg); 1627 } 1628 1629 if (MustSaveCR && 1630 !(SingleScratchReg && MustSaveLR)) 1631 for (unsigned i = 0, e = MustSaveCRs.size(); i != e; ++i) 1632 BuildMI(MBB, MBBI, dl, MoveToCRInst, MustSaveCRs[i]) 1633 .addReg(TempReg, getKillRegState(i == e-1)); 1634 1635 if (MustSaveLR) 1636 BuildMI(MBB, StackUpdateLoc, dl, MTLRInst).addReg(ScratchReg); 1637 1638 // Callee pop calling convention. Pop parameter/linkage area. Used for tail 1639 // call optimization 1640 if (IsReturnBlock) { 1641 unsigned RetOpcode = MBBI->getOpcode(); 1642 if (MF.getTarget().Options.GuaranteedTailCallOpt && 1643 (RetOpcode == PPC::BLR || RetOpcode == PPC::BLR8) && 1644 MF.getFunction().getCallingConv() == CallingConv::Fast) { 1645 PPCFunctionInfo *FI = MF.getInfo<PPCFunctionInfo>(); 1646 unsigned CallerAllocatedAmt = FI->getMinReservedArea(); 1647 1648 if (CallerAllocatedAmt && isInt<16>(CallerAllocatedAmt)) { 1649 BuildMI(MBB, MBBI, dl, AddImmInst, SPReg) 1650 .addReg(SPReg).addImm(CallerAllocatedAmt); 1651 } else { 1652 BuildMI(MBB, MBBI, dl, LoadImmShiftedInst, ScratchReg) 1653 .addImm(CallerAllocatedAmt >> 16); 1654 BuildMI(MBB, MBBI, dl, OrImmInst, ScratchReg) 1655 .addReg(ScratchReg, RegState::Kill) 1656 .addImm(CallerAllocatedAmt & 0xFFFF); 1657 BuildMI(MBB, MBBI, dl, AddInst) 1658 .addReg(SPReg) 1659 .addReg(FPReg) 1660 .addReg(ScratchReg); 1661 } 1662 } else { 1663 createTailCallBranchInstr(MBB); 1664 } 1665 } 1666 } 1667 1668 void PPCFrameLowering::createTailCallBranchInstr(MachineBasicBlock &MBB) const { 1669 MachineBasicBlock::iterator MBBI = MBB.getFirstTerminator(); 1670 1671 // If we got this far a first terminator should exist. 1672 assert(MBBI != MBB.end() && "Failed to find the first terminator."); 1673 1674 DebugLoc dl = MBBI->getDebugLoc(); 1675 const PPCInstrInfo &TII = *Subtarget.getInstrInfo(); 1676 1677 // Create branch instruction for pseudo tail call return instruction. 1678 // The TCRETURNdi variants are direct calls. Valid targets for those are 1679 // MO_GlobalAddress operands as well as MO_ExternalSymbol with PC-Rel 1680 // since we can tail call external functions with PC-Rel (i.e. we don't need 1681 // to worry about different TOC pointers). Some of the external functions will 1682 // be MO_GlobalAddress while others like memcpy for example, are going to 1683 // be MO_ExternalSymbol. 1684 unsigned RetOpcode = MBBI->getOpcode(); 1685 if (RetOpcode == PPC::TCRETURNdi) { 1686 MBBI = MBB.getLastNonDebugInstr(); 1687 MachineOperand &JumpTarget = MBBI->getOperand(0); 1688 if (JumpTarget.isGlobal()) 1689 BuildMI(MBB, MBBI, dl, TII.get(PPC::TAILB)). 1690 addGlobalAddress(JumpTarget.getGlobal(), JumpTarget.getOffset()); 1691 else if (JumpTarget.isSymbol()) 1692 BuildMI(MBB, MBBI, dl, TII.get(PPC::TAILB)). 1693 addExternalSymbol(JumpTarget.getSymbolName()); 1694 else 1695 llvm_unreachable("Expecting Global or External Symbol"); 1696 } else if (RetOpcode == PPC::TCRETURNri) { 1697 MBBI = MBB.getLastNonDebugInstr(); 1698 assert(MBBI->getOperand(0).isReg() && "Expecting register operand."); 1699 BuildMI(MBB, MBBI, dl, TII.get(PPC::TAILBCTR)); 1700 } else if (RetOpcode == PPC::TCRETURNai) { 1701 MBBI = MBB.getLastNonDebugInstr(); 1702 MachineOperand &JumpTarget = MBBI->getOperand(0); 1703 BuildMI(MBB, MBBI, dl, TII.get(PPC::TAILBA)).addImm(JumpTarget.getImm()); 1704 } else if (RetOpcode == PPC::TCRETURNdi8) { 1705 MBBI = MBB.getLastNonDebugInstr(); 1706 MachineOperand &JumpTarget = MBBI->getOperand(0); 1707 if (JumpTarget.isGlobal()) 1708 BuildMI(MBB, MBBI, dl, TII.get(PPC::TAILB8)). 1709 addGlobalAddress(JumpTarget.getGlobal(), JumpTarget.getOffset()); 1710 else if (JumpTarget.isSymbol()) 1711 BuildMI(MBB, MBBI, dl, TII.get(PPC::TAILB8)). 1712 addExternalSymbol(JumpTarget.getSymbolName()); 1713 else 1714 llvm_unreachable("Expecting Global or External Symbol"); 1715 } else if (RetOpcode == PPC::TCRETURNri8) { 1716 MBBI = MBB.getLastNonDebugInstr(); 1717 assert(MBBI->getOperand(0).isReg() && "Expecting register operand."); 1718 BuildMI(MBB, MBBI, dl, TII.get(PPC::TAILBCTR8)); 1719 } else if (RetOpcode == PPC::TCRETURNai8) { 1720 MBBI = MBB.getLastNonDebugInstr(); 1721 MachineOperand &JumpTarget = MBBI->getOperand(0); 1722 BuildMI(MBB, MBBI, dl, TII.get(PPC::TAILBA8)).addImm(JumpTarget.getImm()); 1723 } 1724 } 1725 1726 void PPCFrameLowering::determineCalleeSaves(MachineFunction &MF, 1727 BitVector &SavedRegs, 1728 RegScavenger *RS) const { 1729 TargetFrameLowering::determineCalleeSaves(MF, SavedRegs, RS); 1730 1731 const PPCRegisterInfo *RegInfo = Subtarget.getRegisterInfo(); 1732 1733 // Save and clear the LR state. 1734 PPCFunctionInfo *FI = MF.getInfo<PPCFunctionInfo>(); 1735 unsigned LR = RegInfo->getRARegister(); 1736 FI->setMustSaveLR(MustSaveLR(MF, LR)); 1737 SavedRegs.reset(LR); 1738 1739 // Save R31 if necessary 1740 int FPSI = FI->getFramePointerSaveIndex(); 1741 const bool isPPC64 = Subtarget.isPPC64(); 1742 MachineFrameInfo &MFI = MF.getFrameInfo(); 1743 1744 // If the frame pointer save index hasn't been defined yet. 1745 if (!FPSI && needsFP(MF)) { 1746 // Find out what the fix offset of the frame pointer save area. 1747 int FPOffset = getFramePointerSaveOffset(); 1748 // Allocate the frame index for frame pointer save area. 1749 FPSI = MFI.CreateFixedObject(isPPC64? 8 : 4, FPOffset, true); 1750 // Save the result. 1751 FI->setFramePointerSaveIndex(FPSI); 1752 } 1753 1754 int BPSI = FI->getBasePointerSaveIndex(); 1755 if (!BPSI && RegInfo->hasBasePointer(MF)) { 1756 int BPOffset = getBasePointerSaveOffset(); 1757 // Allocate the frame index for the base pointer save area. 1758 BPSI = MFI.CreateFixedObject(isPPC64? 8 : 4, BPOffset, true); 1759 // Save the result. 1760 FI->setBasePointerSaveIndex(BPSI); 1761 } 1762 1763 // Reserve stack space for the PIC Base register (R30). 1764 // Only used in SVR4 32-bit. 1765 if (FI->usesPICBase()) { 1766 int PBPSI = MFI.CreateFixedObject(4, -8, true); 1767 FI->setPICBasePointerSaveIndex(PBPSI); 1768 } 1769 1770 // Make sure we don't explicitly spill r31, because, for example, we have 1771 // some inline asm which explicitly clobbers it, when we otherwise have a 1772 // frame pointer and are using r31's spill slot for the prologue/epilogue 1773 // code. Same goes for the base pointer and the PIC base register. 1774 if (needsFP(MF)) 1775 SavedRegs.reset(isPPC64 ? PPC::X31 : PPC::R31); 1776 if (RegInfo->hasBasePointer(MF)) 1777 SavedRegs.reset(RegInfo->getBaseRegister(MF)); 1778 if (FI->usesPICBase()) 1779 SavedRegs.reset(PPC::R30); 1780 1781 // Reserve stack space to move the linkage area to in case of a tail call. 1782 int TCSPDelta = 0; 1783 if (MF.getTarget().Options.GuaranteedTailCallOpt && 1784 (TCSPDelta = FI->getTailCallSPDelta()) < 0) { 1785 MFI.CreateFixedObject(-1 * TCSPDelta, TCSPDelta, true); 1786 } 1787 1788 // Allocate the nonvolatile CR spill slot iff the function uses CR 2, 3, or 4. 1789 // For 64-bit SVR4, and all flavors of AIX we create a FixedStack 1790 // object at the offset of the CR-save slot in the linkage area. The actual 1791 // save and restore of the condition register will be created as part of the 1792 // prologue and epilogue insertion, but the FixedStack object is needed to 1793 // keep the CalleSavedInfo valid. 1794 if ((SavedRegs.test(PPC::CR2) || SavedRegs.test(PPC::CR3) || 1795 SavedRegs.test(PPC::CR4))) { 1796 const uint64_t SpillSize = 4; // Condition register is always 4 bytes. 1797 const int64_t SpillOffset = 1798 Subtarget.isPPC64() ? 8 : Subtarget.isAIXABI() ? 4 : -4; 1799 int FrameIdx = 1800 MFI.CreateFixedObject(SpillSize, SpillOffset, 1801 /* IsImmutable */ true, /* IsAliased */ false); 1802 FI->setCRSpillFrameIndex(FrameIdx); 1803 } 1804 } 1805 1806 void PPCFrameLowering::processFunctionBeforeFrameFinalized(MachineFunction &MF, 1807 RegScavenger *RS) const { 1808 // Early exit if not using the SVR4 ABI. 1809 if (!Subtarget.isSVR4ABI()) { 1810 addScavengingSpillSlot(MF, RS); 1811 return; 1812 } 1813 1814 // Get callee saved register information. 1815 MachineFrameInfo &MFI = MF.getFrameInfo(); 1816 const std::vector<CalleeSavedInfo> &CSI = MFI.getCalleeSavedInfo(); 1817 1818 // If the function is shrink-wrapped, and if the function has a tail call, the 1819 // tail call might not be in the new RestoreBlock, so real branch instruction 1820 // won't be generated by emitEpilogue(), because shrink-wrap has chosen new 1821 // RestoreBlock. So we handle this case here. 1822 if (MFI.getSavePoint() && MFI.hasTailCall()) { 1823 MachineBasicBlock *RestoreBlock = MFI.getRestorePoint(); 1824 for (MachineBasicBlock &MBB : MF) { 1825 if (MBB.isReturnBlock() && (&MBB) != RestoreBlock) 1826 createTailCallBranchInstr(MBB); 1827 } 1828 } 1829 1830 // Early exit if no callee saved registers are modified! 1831 if (CSI.empty() && !needsFP(MF)) { 1832 addScavengingSpillSlot(MF, RS); 1833 return; 1834 } 1835 1836 unsigned MinGPR = PPC::R31; 1837 unsigned MinG8R = PPC::X31; 1838 unsigned MinFPR = PPC::F31; 1839 unsigned MinVR = Subtarget.hasSPE() ? PPC::S31 : PPC::V31; 1840 1841 bool HasGPSaveArea = false; 1842 bool HasG8SaveArea = false; 1843 bool HasFPSaveArea = false; 1844 bool HasVRSAVESaveArea = false; 1845 bool HasVRSaveArea = false; 1846 1847 SmallVector<CalleeSavedInfo, 18> GPRegs; 1848 SmallVector<CalleeSavedInfo, 18> G8Regs; 1849 SmallVector<CalleeSavedInfo, 18> FPRegs; 1850 SmallVector<CalleeSavedInfo, 18> VRegs; 1851 1852 for (unsigned i = 0, e = CSI.size(); i != e; ++i) { 1853 unsigned Reg = CSI[i].getReg(); 1854 assert((!MF.getInfo<PPCFunctionInfo>()->mustSaveTOC() || 1855 (Reg != PPC::X2 && Reg != PPC::R2)) && 1856 "Not expecting to try to spill R2 in a function that must save TOC"); 1857 if (PPC::GPRCRegClass.contains(Reg)) { 1858 HasGPSaveArea = true; 1859 1860 GPRegs.push_back(CSI[i]); 1861 1862 if (Reg < MinGPR) { 1863 MinGPR = Reg; 1864 } 1865 } else if (PPC::G8RCRegClass.contains(Reg)) { 1866 HasG8SaveArea = true; 1867 1868 G8Regs.push_back(CSI[i]); 1869 1870 if (Reg < MinG8R) { 1871 MinG8R = Reg; 1872 } 1873 } else if (PPC::F8RCRegClass.contains(Reg)) { 1874 HasFPSaveArea = true; 1875 1876 FPRegs.push_back(CSI[i]); 1877 1878 if (Reg < MinFPR) { 1879 MinFPR = Reg; 1880 } 1881 } else if (PPC::CRBITRCRegClass.contains(Reg) || 1882 PPC::CRRCRegClass.contains(Reg)) { 1883 ; // do nothing, as we already know whether CRs are spilled 1884 } else if (PPC::VRSAVERCRegClass.contains(Reg)) { 1885 HasVRSAVESaveArea = true; 1886 } else if (PPC::VRRCRegClass.contains(Reg) || 1887 PPC::SPERCRegClass.contains(Reg)) { 1888 // Altivec and SPE are mutually exclusive, but have the same stack 1889 // alignment requirements, so overload the save area for both cases. 1890 HasVRSaveArea = true; 1891 1892 VRegs.push_back(CSI[i]); 1893 1894 if (Reg < MinVR) { 1895 MinVR = Reg; 1896 } 1897 } else { 1898 llvm_unreachable("Unknown RegisterClass!"); 1899 } 1900 } 1901 1902 PPCFunctionInfo *PFI = MF.getInfo<PPCFunctionInfo>(); 1903 const TargetRegisterInfo *TRI = Subtarget.getRegisterInfo(); 1904 1905 int64_t LowerBound = 0; 1906 1907 // Take into account stack space reserved for tail calls. 1908 int TCSPDelta = 0; 1909 if (MF.getTarget().Options.GuaranteedTailCallOpt && 1910 (TCSPDelta = PFI->getTailCallSPDelta()) < 0) { 1911 LowerBound = TCSPDelta; 1912 } 1913 1914 // The Floating-point register save area is right below the back chain word 1915 // of the previous stack frame. 1916 if (HasFPSaveArea) { 1917 for (unsigned i = 0, e = FPRegs.size(); i != e; ++i) { 1918 int FI = FPRegs[i].getFrameIdx(); 1919 1920 MFI.setObjectOffset(FI, LowerBound + MFI.getObjectOffset(FI)); 1921 } 1922 1923 LowerBound -= (31 - TRI->getEncodingValue(MinFPR) + 1) * 8; 1924 } 1925 1926 // Check whether the frame pointer register is allocated. If so, make sure it 1927 // is spilled to the correct offset. 1928 if (needsFP(MF)) { 1929 int FI = PFI->getFramePointerSaveIndex(); 1930 assert(FI && "No Frame Pointer Save Slot!"); 1931 MFI.setObjectOffset(FI, LowerBound + MFI.getObjectOffset(FI)); 1932 // FP is R31/X31, so no need to update MinGPR/MinG8R. 1933 HasGPSaveArea = true; 1934 } 1935 1936 if (PFI->usesPICBase()) { 1937 int FI = PFI->getPICBasePointerSaveIndex(); 1938 assert(FI && "No PIC Base Pointer Save Slot!"); 1939 MFI.setObjectOffset(FI, LowerBound + MFI.getObjectOffset(FI)); 1940 1941 MinGPR = std::min<unsigned>(MinGPR, PPC::R30); 1942 HasGPSaveArea = true; 1943 } 1944 1945 const PPCRegisterInfo *RegInfo = Subtarget.getRegisterInfo(); 1946 if (RegInfo->hasBasePointer(MF)) { 1947 int FI = PFI->getBasePointerSaveIndex(); 1948 assert(FI && "No Base Pointer Save Slot!"); 1949 MFI.setObjectOffset(FI, LowerBound + MFI.getObjectOffset(FI)); 1950 1951 Register BP = RegInfo->getBaseRegister(MF); 1952 if (PPC::G8RCRegClass.contains(BP)) { 1953 MinG8R = std::min<unsigned>(MinG8R, BP); 1954 HasG8SaveArea = true; 1955 } else if (PPC::GPRCRegClass.contains(BP)) { 1956 MinGPR = std::min<unsigned>(MinGPR, BP); 1957 HasGPSaveArea = true; 1958 } 1959 } 1960 1961 // General register save area starts right below the Floating-point 1962 // register save area. 1963 if (HasGPSaveArea || HasG8SaveArea) { 1964 // Move general register save area spill slots down, taking into account 1965 // the size of the Floating-point register save area. 1966 for (unsigned i = 0, e = GPRegs.size(); i != e; ++i) { 1967 if (!GPRegs[i].isSpilledToReg()) { 1968 int FI = GPRegs[i].getFrameIdx(); 1969 MFI.setObjectOffset(FI, LowerBound + MFI.getObjectOffset(FI)); 1970 } 1971 } 1972 1973 // Move general register save area spill slots down, taking into account 1974 // the size of the Floating-point register save area. 1975 for (unsigned i = 0, e = G8Regs.size(); i != e; ++i) { 1976 if (!G8Regs[i].isSpilledToReg()) { 1977 int FI = G8Regs[i].getFrameIdx(); 1978 MFI.setObjectOffset(FI, LowerBound + MFI.getObjectOffset(FI)); 1979 } 1980 } 1981 1982 unsigned MinReg = 1983 std::min<unsigned>(TRI->getEncodingValue(MinGPR), 1984 TRI->getEncodingValue(MinG8R)); 1985 1986 if (Subtarget.isPPC64()) { 1987 LowerBound -= (31 - MinReg + 1) * 8; 1988 } else { 1989 LowerBound -= (31 - MinReg + 1) * 4; 1990 } 1991 } 1992 1993 // For 32-bit only, the CR save area is below the general register 1994 // save area. For 64-bit SVR4, the CR save area is addressed relative 1995 // to the stack pointer and hence does not need an adjustment here. 1996 // Only CR2 (the first nonvolatile spilled) has an associated frame 1997 // index so that we have a single uniform save area. 1998 if (spillsCR(MF) && Subtarget.is32BitELFABI()) { 1999 // Adjust the frame index of the CR spill slot. 2000 for (const auto &CSInfo : CSI) { 2001 if (CSInfo.getReg() == PPC::CR2) { 2002 int FI = CSInfo.getFrameIdx(); 2003 MFI.setObjectOffset(FI, LowerBound + MFI.getObjectOffset(FI)); 2004 break; 2005 } 2006 } 2007 2008 LowerBound -= 4; // The CR save area is always 4 bytes long. 2009 } 2010 2011 if (HasVRSAVESaveArea) { 2012 // FIXME SVR4: Is it actually possible to have multiple elements in CSI 2013 // which have the VRSAVE register class? 2014 // Adjust the frame index of the VRSAVE spill slot. 2015 for (unsigned i = 0, e = CSI.size(); i != e; ++i) { 2016 unsigned Reg = CSI[i].getReg(); 2017 2018 if (PPC::VRSAVERCRegClass.contains(Reg)) { 2019 int FI = CSI[i].getFrameIdx(); 2020 2021 MFI.setObjectOffset(FI, LowerBound + MFI.getObjectOffset(FI)); 2022 } 2023 } 2024 2025 LowerBound -= 4; // The VRSAVE save area is always 4 bytes long. 2026 } 2027 2028 // Both Altivec and SPE have the same alignment and padding requirements 2029 // within the stack frame. 2030 if (HasVRSaveArea) { 2031 // Insert alignment padding, we need 16-byte alignment. Note: for positive 2032 // number the alignment formula is : y = (x + (n-1)) & (~(n-1)). But since 2033 // we are using negative number here (the stack grows downward). We should 2034 // use formula : y = x & (~(n-1)). Where x is the size before aligning, n 2035 // is the alignment size ( n = 16 here) and y is the size after aligning. 2036 assert(LowerBound <= 0 && "Expect LowerBound have a non-positive value!"); 2037 LowerBound &= ~(15); 2038 2039 for (unsigned i = 0, e = VRegs.size(); i != e; ++i) { 2040 int FI = VRegs[i].getFrameIdx(); 2041 2042 MFI.setObjectOffset(FI, LowerBound + MFI.getObjectOffset(FI)); 2043 } 2044 } 2045 2046 addScavengingSpillSlot(MF, RS); 2047 } 2048 2049 void 2050 PPCFrameLowering::addScavengingSpillSlot(MachineFunction &MF, 2051 RegScavenger *RS) const { 2052 // Reserve a slot closest to SP or frame pointer if we have a dynalloc or 2053 // a large stack, which will require scavenging a register to materialize a 2054 // large offset. 2055 2056 // We need to have a scavenger spill slot for spills if the frame size is 2057 // large. In case there is no free register for large-offset addressing, 2058 // this slot is used for the necessary emergency spill. Also, we need the 2059 // slot for dynamic stack allocations. 2060 2061 // The scavenger might be invoked if the frame offset does not fit into 2062 // the 16-bit immediate. We don't know the complete frame size here 2063 // because we've not yet computed callee-saved register spills or the 2064 // needed alignment padding. 2065 unsigned StackSize = determineFrameLayout(MF, true); 2066 MachineFrameInfo &MFI = MF.getFrameInfo(); 2067 if (MFI.hasVarSizedObjects() || spillsCR(MF) || spillsVRSAVE(MF) || 2068 hasNonRISpills(MF) || (hasSpills(MF) && !isInt<16>(StackSize))) { 2069 const TargetRegisterClass &GPRC = PPC::GPRCRegClass; 2070 const TargetRegisterClass &G8RC = PPC::G8RCRegClass; 2071 const TargetRegisterClass &RC = Subtarget.isPPC64() ? G8RC : GPRC; 2072 const TargetRegisterInfo &TRI = *Subtarget.getRegisterInfo(); 2073 unsigned Size = TRI.getSpillSize(RC); 2074 unsigned Align = TRI.getSpillAlignment(RC); 2075 RS->addScavengingFrameIndex(MFI.CreateStackObject(Size, Align, false)); 2076 2077 // Might we have over-aligned allocas? 2078 bool HasAlVars = 2079 MFI.hasVarSizedObjects() && MFI.getMaxAlign() > getStackAlign(); 2080 2081 // These kinds of spills might need two registers. 2082 if (spillsCR(MF) || spillsVRSAVE(MF) || HasAlVars) 2083 RS->addScavengingFrameIndex(MFI.CreateStackObject(Size, Align, false)); 2084 2085 } 2086 } 2087 2088 // This function checks if a callee saved gpr can be spilled to a volatile 2089 // vector register. This occurs for leaf functions when the option 2090 // ppc-enable-pe-vector-spills is enabled. If there are any remaining registers 2091 // which were not spilled to vectors, return false so the target independent 2092 // code can handle them by assigning a FrameIdx to a stack slot. 2093 bool PPCFrameLowering::assignCalleeSavedSpillSlots( 2094 MachineFunction &MF, const TargetRegisterInfo *TRI, 2095 std::vector<CalleeSavedInfo> &CSI) const { 2096 2097 if (CSI.empty()) 2098 return true; // Early exit if no callee saved registers are modified! 2099 2100 // Early exit if cannot spill gprs to volatile vector registers. 2101 MachineFrameInfo &MFI = MF.getFrameInfo(); 2102 if (!EnablePEVectorSpills || MFI.hasCalls() || !Subtarget.hasP9Vector()) 2103 return false; 2104 2105 // Build a BitVector of VSRs that can be used for spilling GPRs. 2106 BitVector BVAllocatable = TRI->getAllocatableSet(MF); 2107 BitVector BVCalleeSaved(TRI->getNumRegs()); 2108 const PPCRegisterInfo *RegInfo = Subtarget.getRegisterInfo(); 2109 const MCPhysReg *CSRegs = RegInfo->getCalleeSavedRegs(&MF); 2110 for (unsigned i = 0; CSRegs[i]; ++i) 2111 BVCalleeSaved.set(CSRegs[i]); 2112 2113 for (unsigned Reg : BVAllocatable.set_bits()) { 2114 // Set to 0 if the register is not a volatile VF/F8 register, or if it is 2115 // used in the function. 2116 if (BVCalleeSaved[Reg] || 2117 (!PPC::F8RCRegClass.contains(Reg) && 2118 !PPC::VFRCRegClass.contains(Reg)) || 2119 (MF.getRegInfo().isPhysRegUsed(Reg))) 2120 BVAllocatable.reset(Reg); 2121 } 2122 2123 bool AllSpilledToReg = true; 2124 for (auto &CS : CSI) { 2125 if (BVAllocatable.none()) 2126 return false; 2127 2128 unsigned Reg = CS.getReg(); 2129 if (!PPC::G8RCRegClass.contains(Reg) && !PPC::GPRCRegClass.contains(Reg)) { 2130 AllSpilledToReg = false; 2131 continue; 2132 } 2133 2134 unsigned VolatileVFReg = BVAllocatable.find_first(); 2135 if (VolatileVFReg < BVAllocatable.size()) { 2136 CS.setDstReg(VolatileVFReg); 2137 BVAllocatable.reset(VolatileVFReg); 2138 } else { 2139 AllSpilledToReg = false; 2140 } 2141 } 2142 return AllSpilledToReg; 2143 } 2144 2145 bool PPCFrameLowering::spillCalleeSavedRegisters( 2146 MachineBasicBlock &MBB, MachineBasicBlock::iterator MI, 2147 ArrayRef<CalleeSavedInfo> CSI, const TargetRegisterInfo *TRI) const { 2148 2149 MachineFunction *MF = MBB.getParent(); 2150 const PPCInstrInfo &TII = *Subtarget.getInstrInfo(); 2151 PPCFunctionInfo *FI = MF->getInfo<PPCFunctionInfo>(); 2152 bool MustSaveTOC = FI->mustSaveTOC(); 2153 DebugLoc DL; 2154 bool CRSpilled = false; 2155 MachineInstrBuilder CRMIB; 2156 2157 for (unsigned i = 0, e = CSI.size(); i != e; ++i) { 2158 unsigned Reg = CSI[i].getReg(); 2159 // VRSAVE can appear here if, for example, @llvm.eh.unwind.init() is used. 2160 if (Reg == PPC::VRSAVE) 2161 continue; 2162 2163 // CR2 through CR4 are the nonvolatile CR fields. 2164 bool IsCRField = PPC::CR2 <= Reg && Reg <= PPC::CR4; 2165 2166 // Add the callee-saved register as live-in; it's killed at the spill. 2167 // Do not do this for callee-saved registers that are live-in to the 2168 // function because they will already be marked live-in and this will be 2169 // adding it for a second time. It is an error to add the same register 2170 // to the set more than once. 2171 const MachineRegisterInfo &MRI = MF->getRegInfo(); 2172 bool IsLiveIn = MRI.isLiveIn(Reg); 2173 if (!IsLiveIn) 2174 MBB.addLiveIn(Reg); 2175 2176 if (CRSpilled && IsCRField) { 2177 CRMIB.addReg(Reg, RegState::ImplicitKill); 2178 continue; 2179 } 2180 2181 // The actual spill will happen in the prologue. 2182 if ((Reg == PPC::X2 || Reg == PPC::R2) && MustSaveTOC) 2183 continue; 2184 2185 // Insert the spill to the stack frame. 2186 if (IsCRField) { 2187 PPCFunctionInfo *FuncInfo = MF->getInfo<PPCFunctionInfo>(); 2188 if (!Subtarget.is32BitELFABI()) { 2189 // The actual spill will happen at the start of the prologue. 2190 FuncInfo->addMustSaveCR(Reg); 2191 } else { 2192 CRSpilled = true; 2193 FuncInfo->setSpillsCR(); 2194 2195 // 32-bit: FP-relative. Note that we made sure CR2-CR4 all have 2196 // the same frame index in PPCRegisterInfo::hasReservedSpillSlot. 2197 CRMIB = BuildMI(*MF, DL, TII.get(PPC::MFCR), PPC::R12) 2198 .addReg(Reg, RegState::ImplicitKill); 2199 2200 MBB.insert(MI, CRMIB); 2201 MBB.insert(MI, addFrameReference(BuildMI(*MF, DL, TII.get(PPC::STW)) 2202 .addReg(PPC::R12, 2203 getKillRegState(true)), 2204 CSI[i].getFrameIdx())); 2205 } 2206 } else { 2207 if (CSI[i].isSpilledToReg()) { 2208 NumPESpillVSR++; 2209 BuildMI(MBB, MI, DL, TII.get(PPC::MTVSRD), CSI[i].getDstReg()) 2210 .addReg(Reg, getKillRegState(true)); 2211 } else { 2212 const TargetRegisterClass *RC = TRI->getMinimalPhysRegClass(Reg); 2213 // Use !IsLiveIn for the kill flag. 2214 // We do not want to kill registers that are live in this function 2215 // before their use because they will become undefined registers. 2216 // Functions without NoUnwind need to preserve the order of elements in 2217 // saved vector registers. 2218 if (Subtarget.needsSwapsForVSXMemOps() && 2219 !MF->getFunction().hasFnAttribute(Attribute::NoUnwind)) 2220 TII.storeRegToStackSlotNoUpd(MBB, MI, Reg, !IsLiveIn, 2221 CSI[i].getFrameIdx(), RC, TRI); 2222 else 2223 TII.storeRegToStackSlot(MBB, MI, Reg, !IsLiveIn, CSI[i].getFrameIdx(), 2224 RC, TRI); 2225 } 2226 } 2227 } 2228 return true; 2229 } 2230 2231 static void restoreCRs(bool is31, bool CR2Spilled, bool CR3Spilled, 2232 bool CR4Spilled, MachineBasicBlock &MBB, 2233 MachineBasicBlock::iterator MI, 2234 ArrayRef<CalleeSavedInfo> CSI, unsigned CSIIndex) { 2235 2236 MachineFunction *MF = MBB.getParent(); 2237 const PPCInstrInfo &TII = *MF->getSubtarget<PPCSubtarget>().getInstrInfo(); 2238 DebugLoc DL; 2239 unsigned MoveReg = PPC::R12; 2240 2241 // 32-bit: FP-relative 2242 MBB.insert(MI, 2243 addFrameReference(BuildMI(*MF, DL, TII.get(PPC::LWZ), MoveReg), 2244 CSI[CSIIndex].getFrameIdx())); 2245 2246 unsigned RestoreOp = PPC::MTOCRF; 2247 if (CR2Spilled) 2248 MBB.insert(MI, BuildMI(*MF, DL, TII.get(RestoreOp), PPC::CR2) 2249 .addReg(MoveReg, getKillRegState(!CR3Spilled && !CR4Spilled))); 2250 2251 if (CR3Spilled) 2252 MBB.insert(MI, BuildMI(*MF, DL, TII.get(RestoreOp), PPC::CR3) 2253 .addReg(MoveReg, getKillRegState(!CR4Spilled))); 2254 2255 if (CR4Spilled) 2256 MBB.insert(MI, BuildMI(*MF, DL, TII.get(RestoreOp), PPC::CR4) 2257 .addReg(MoveReg, getKillRegState(true))); 2258 } 2259 2260 MachineBasicBlock::iterator PPCFrameLowering:: 2261 eliminateCallFramePseudoInstr(MachineFunction &MF, MachineBasicBlock &MBB, 2262 MachineBasicBlock::iterator I) const { 2263 const TargetInstrInfo &TII = *Subtarget.getInstrInfo(); 2264 if (MF.getTarget().Options.GuaranteedTailCallOpt && 2265 I->getOpcode() == PPC::ADJCALLSTACKUP) { 2266 // Add (actually subtract) back the amount the callee popped on return. 2267 if (int CalleeAmt = I->getOperand(1).getImm()) { 2268 bool is64Bit = Subtarget.isPPC64(); 2269 CalleeAmt *= -1; 2270 unsigned StackReg = is64Bit ? PPC::X1 : PPC::R1; 2271 unsigned TmpReg = is64Bit ? PPC::X0 : PPC::R0; 2272 unsigned ADDIInstr = is64Bit ? PPC::ADDI8 : PPC::ADDI; 2273 unsigned ADDInstr = is64Bit ? PPC::ADD8 : PPC::ADD4; 2274 unsigned LISInstr = is64Bit ? PPC::LIS8 : PPC::LIS; 2275 unsigned ORIInstr = is64Bit ? PPC::ORI8 : PPC::ORI; 2276 const DebugLoc &dl = I->getDebugLoc(); 2277 2278 if (isInt<16>(CalleeAmt)) { 2279 BuildMI(MBB, I, dl, TII.get(ADDIInstr), StackReg) 2280 .addReg(StackReg, RegState::Kill) 2281 .addImm(CalleeAmt); 2282 } else { 2283 MachineBasicBlock::iterator MBBI = I; 2284 BuildMI(MBB, MBBI, dl, TII.get(LISInstr), TmpReg) 2285 .addImm(CalleeAmt >> 16); 2286 BuildMI(MBB, MBBI, dl, TII.get(ORIInstr), TmpReg) 2287 .addReg(TmpReg, RegState::Kill) 2288 .addImm(CalleeAmt & 0xFFFF); 2289 BuildMI(MBB, MBBI, dl, TII.get(ADDInstr), StackReg) 2290 .addReg(StackReg, RegState::Kill) 2291 .addReg(TmpReg); 2292 } 2293 } 2294 } 2295 // Simply discard ADJCALLSTACKDOWN, ADJCALLSTACKUP instructions. 2296 return MBB.erase(I); 2297 } 2298 2299 static bool isCalleeSavedCR(unsigned Reg) { 2300 return PPC::CR2 == Reg || Reg == PPC::CR3 || Reg == PPC::CR4; 2301 } 2302 2303 bool PPCFrameLowering::restoreCalleeSavedRegisters( 2304 MachineBasicBlock &MBB, MachineBasicBlock::iterator MI, 2305 MutableArrayRef<CalleeSavedInfo> CSI, const TargetRegisterInfo *TRI) const { 2306 MachineFunction *MF = MBB.getParent(); 2307 const PPCInstrInfo &TII = *Subtarget.getInstrInfo(); 2308 PPCFunctionInfo *FI = MF->getInfo<PPCFunctionInfo>(); 2309 bool MustSaveTOC = FI->mustSaveTOC(); 2310 bool CR2Spilled = false; 2311 bool CR3Spilled = false; 2312 bool CR4Spilled = false; 2313 unsigned CSIIndex = 0; 2314 2315 // Initialize insertion-point logic; we will be restoring in reverse 2316 // order of spill. 2317 MachineBasicBlock::iterator I = MI, BeforeI = I; 2318 bool AtStart = I == MBB.begin(); 2319 2320 if (!AtStart) 2321 --BeforeI; 2322 2323 for (unsigned i = 0, e = CSI.size(); i != e; ++i) { 2324 unsigned Reg = CSI[i].getReg(); 2325 2326 // VRSAVE can appear here if, for example, @llvm.eh.unwind.init() is used. 2327 if (Reg == PPC::VRSAVE) 2328 continue; 2329 2330 if ((Reg == PPC::X2 || Reg == PPC::R2) && MustSaveTOC) 2331 continue; 2332 2333 // Restore of callee saved condition register field is handled during 2334 // epilogue insertion. 2335 if (isCalleeSavedCR(Reg) && !Subtarget.is32BitELFABI()) 2336 continue; 2337 2338 if (Reg == PPC::CR2) { 2339 CR2Spilled = true; 2340 // The spill slot is associated only with CR2, which is the 2341 // first nonvolatile spilled. Save it here. 2342 CSIIndex = i; 2343 continue; 2344 } else if (Reg == PPC::CR3) { 2345 CR3Spilled = true; 2346 continue; 2347 } else if (Reg == PPC::CR4) { 2348 CR4Spilled = true; 2349 continue; 2350 } else { 2351 // On 32-bit ELF when we first encounter a non-CR register after seeing at 2352 // least one CR register, restore all spilled CRs together. 2353 if (CR2Spilled || CR3Spilled || CR4Spilled) { 2354 bool is31 = needsFP(*MF); 2355 restoreCRs(is31, CR2Spilled, CR3Spilled, CR4Spilled, MBB, I, CSI, 2356 CSIIndex); 2357 CR2Spilled = CR3Spilled = CR4Spilled = false; 2358 } 2359 2360 if (CSI[i].isSpilledToReg()) { 2361 DebugLoc DL; 2362 NumPEReloadVSR++; 2363 BuildMI(MBB, I, DL, TII.get(PPC::MFVSRD), Reg) 2364 .addReg(CSI[i].getDstReg(), getKillRegState(true)); 2365 } else { 2366 // Default behavior for non-CR saves. 2367 const TargetRegisterClass *RC = TRI->getMinimalPhysRegClass(Reg); 2368 2369 // Functions without NoUnwind need to preserve the order of elements in 2370 // saved vector registers. 2371 if (Subtarget.needsSwapsForVSXMemOps() && 2372 !MF->getFunction().hasFnAttribute(Attribute::NoUnwind)) 2373 TII.loadRegFromStackSlotNoUpd(MBB, I, Reg, CSI[i].getFrameIdx(), RC, 2374 TRI); 2375 else 2376 TII.loadRegFromStackSlot(MBB, I, Reg, CSI[i].getFrameIdx(), RC, TRI); 2377 2378 assert(I != MBB.begin() && 2379 "loadRegFromStackSlot didn't insert any code!"); 2380 } 2381 } 2382 2383 // Insert in reverse order. 2384 if (AtStart) 2385 I = MBB.begin(); 2386 else { 2387 I = BeforeI; 2388 ++I; 2389 } 2390 } 2391 2392 // If we haven't yet spilled the CRs, do so now. 2393 if (CR2Spilled || CR3Spilled || CR4Spilled) { 2394 assert(Subtarget.is32BitELFABI() && 2395 "Only set CR[2|3|4]Spilled on 32-bit SVR4."); 2396 bool is31 = needsFP(*MF); 2397 restoreCRs(is31, CR2Spilled, CR3Spilled, CR4Spilled, MBB, I, CSI, CSIIndex); 2398 } 2399 2400 return true; 2401 } 2402 2403 unsigned PPCFrameLowering::getTOCSaveOffset() const { 2404 return TOCSaveOffset; 2405 } 2406 2407 unsigned PPCFrameLowering::getFramePointerSaveOffset() const { 2408 return FramePointerSaveOffset; 2409 } 2410 2411 unsigned PPCFrameLowering::getBasePointerSaveOffset() const { 2412 if (Subtarget.isAIXABI()) 2413 report_fatal_error("BasePointer is not implemented on AIX yet."); 2414 return BasePointerSaveOffset; 2415 } 2416 2417 bool PPCFrameLowering::enableShrinkWrapping(const MachineFunction &MF) const { 2418 if (MF.getInfo<PPCFunctionInfo>()->shrinkWrapDisabled()) 2419 return false; 2420 return (MF.getSubtarget<PPCSubtarget>().isSVR4ABI() && 2421 MF.getSubtarget<PPCSubtarget>().isPPC64()); 2422 } 2423