1 //===-- PPCFrameLowering.cpp - PPC Frame Information ----------------------===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 // 9 // This file contains the PPC implementation of TargetFrameLowering class. 10 // 11 //===----------------------------------------------------------------------===// 12 13 #include "PPCFrameLowering.h" 14 #include "PPCInstrBuilder.h" 15 #include "PPCInstrInfo.h" 16 #include "PPCMachineFunctionInfo.h" 17 #include "PPCSubtarget.h" 18 #include "PPCTargetMachine.h" 19 #include "llvm/ADT/Statistic.h" 20 #include "llvm/CodeGen/MachineFrameInfo.h" 21 #include "llvm/CodeGen/MachineFunction.h" 22 #include "llvm/CodeGen/MachineInstrBuilder.h" 23 #include "llvm/CodeGen/MachineModuleInfo.h" 24 #include "llvm/CodeGen/MachineRegisterInfo.h" 25 #include "llvm/CodeGen/RegisterScavenging.h" 26 #include "llvm/IR/Function.h" 27 #include "llvm/Target/TargetOptions.h" 28 29 using namespace llvm; 30 31 #define DEBUG_TYPE "framelowering" 32 STATISTIC(NumPESpillVSR, "Number of spills to vector in prologue"); 33 STATISTIC(NumPEReloadVSR, "Number of reloads from vector in epilogue"); 34 35 static cl::opt<bool> 36 EnablePEVectorSpills("ppc-enable-pe-vector-spills", 37 cl::desc("Enable spills in prologue to vector registers."), 38 cl::init(false), cl::Hidden); 39 40 /// VRRegNo - Map from a numbered VR register to its enum value. 41 /// 42 static const MCPhysReg VRRegNo[] = { 43 PPC::V0 , PPC::V1 , PPC::V2 , PPC::V3 , PPC::V4 , PPC::V5 , PPC::V6 , PPC::V7 , 44 PPC::V8 , PPC::V9 , PPC::V10, PPC::V11, PPC::V12, PPC::V13, PPC::V14, PPC::V15, 45 PPC::V16, PPC::V17, PPC::V18, PPC::V19, PPC::V20, PPC::V21, PPC::V22, PPC::V23, 46 PPC::V24, PPC::V25, PPC::V26, PPC::V27, PPC::V28, PPC::V29, PPC::V30, PPC::V31 47 }; 48 49 static unsigned computeReturnSaveOffset(const PPCSubtarget &STI) { 50 if (STI.isAIXABI()) 51 return STI.isPPC64() ? 16 : 8; 52 // SVR4 ABI: 53 return STI.isPPC64() ? 16 : 4; 54 } 55 56 static unsigned computeTOCSaveOffset(const PPCSubtarget &STI) { 57 if (STI.isAIXABI()) 58 return STI.isPPC64() ? 40 : 20; 59 return STI.isELFv2ABI() ? 24 : 40; 60 } 61 62 static unsigned computeFramePointerSaveOffset(const PPCSubtarget &STI) { 63 // SVR4 ABI: First slot in the general register save area. 64 return STI.isPPC64() ? -8U : -4U; 65 } 66 67 static unsigned computeLinkageSize(const PPCSubtarget &STI) { 68 if (STI.isAIXABI() || STI.isPPC64()) 69 return (STI.isELFv2ABI() ? 4 : 6) * (STI.isPPC64() ? 8 : 4); 70 71 // 32-bit SVR4 ABI: 72 return 8; 73 } 74 75 static unsigned computeBasePointerSaveOffset(const PPCSubtarget &STI) { 76 // SVR4 ABI: First slot in the general register save area. 77 return STI.isPPC64() 78 ? -16U 79 : STI.getTargetMachine().isPositionIndependent() ? -12U : -8U; 80 } 81 82 static unsigned computeCRSaveOffset() { 83 // The condition register save offset needs to be updated for AIX PPC32. 84 return 8; 85 } 86 87 PPCFrameLowering::PPCFrameLowering(const PPCSubtarget &STI) 88 : TargetFrameLowering(TargetFrameLowering::StackGrowsDown, 89 STI.getPlatformStackAlignment(), 0), 90 Subtarget(STI), ReturnSaveOffset(computeReturnSaveOffset(Subtarget)), 91 TOCSaveOffset(computeTOCSaveOffset(Subtarget)), 92 FramePointerSaveOffset(computeFramePointerSaveOffset(Subtarget)), 93 LinkageSize(computeLinkageSize(Subtarget)), 94 BasePointerSaveOffset(computeBasePointerSaveOffset(Subtarget)), 95 CRSaveOffset(computeCRSaveOffset()) {} 96 97 // With the SVR4 ABI, callee-saved registers have fixed offsets on the stack. 98 const PPCFrameLowering::SpillSlot *PPCFrameLowering::getCalleeSavedSpillSlots( 99 unsigned &NumEntries) const { 100 // Early exit if not using the SVR4 ABI. 101 if (!Subtarget.isSVR4ABI()) { 102 NumEntries = 0; 103 return nullptr; 104 } 105 106 // Note that the offsets here overlap, but this is fixed up in 107 // processFunctionBeforeFrameFinalized. 108 109 static const SpillSlot Offsets[] = { 110 // Floating-point register save area offsets. 111 {PPC::F31, -8}, 112 {PPC::F30, -16}, 113 {PPC::F29, -24}, 114 {PPC::F28, -32}, 115 {PPC::F27, -40}, 116 {PPC::F26, -48}, 117 {PPC::F25, -56}, 118 {PPC::F24, -64}, 119 {PPC::F23, -72}, 120 {PPC::F22, -80}, 121 {PPC::F21, -88}, 122 {PPC::F20, -96}, 123 {PPC::F19, -104}, 124 {PPC::F18, -112}, 125 {PPC::F17, -120}, 126 {PPC::F16, -128}, 127 {PPC::F15, -136}, 128 {PPC::F14, -144}, 129 130 // General register save area offsets. 131 {PPC::R31, -4}, 132 {PPC::R30, -8}, 133 {PPC::R29, -12}, 134 {PPC::R28, -16}, 135 {PPC::R27, -20}, 136 {PPC::R26, -24}, 137 {PPC::R25, -28}, 138 {PPC::R24, -32}, 139 {PPC::R23, -36}, 140 {PPC::R22, -40}, 141 {PPC::R21, -44}, 142 {PPC::R20, -48}, 143 {PPC::R19, -52}, 144 {PPC::R18, -56}, 145 {PPC::R17, -60}, 146 {PPC::R16, -64}, 147 {PPC::R15, -68}, 148 {PPC::R14, -72}, 149 150 // CR save area offset. We map each of the nonvolatile CR fields 151 // to the slot for CR2, which is the first of the nonvolatile CR 152 // fields to be assigned, so that we only allocate one save slot. 153 // See PPCRegisterInfo::hasReservedSpillSlot() for more information. 154 {PPC::CR2, -4}, 155 156 // VRSAVE save area offset. 157 {PPC::VRSAVE, -4}, 158 159 // Vector register save area 160 {PPC::V31, -16}, 161 {PPC::V30, -32}, 162 {PPC::V29, -48}, 163 {PPC::V28, -64}, 164 {PPC::V27, -80}, 165 {PPC::V26, -96}, 166 {PPC::V25, -112}, 167 {PPC::V24, -128}, 168 {PPC::V23, -144}, 169 {PPC::V22, -160}, 170 {PPC::V21, -176}, 171 {PPC::V20, -192}, 172 173 // SPE register save area (overlaps Vector save area). 174 {PPC::S31, -8}, 175 {PPC::S30, -16}, 176 {PPC::S29, -24}, 177 {PPC::S28, -32}, 178 {PPC::S27, -40}, 179 {PPC::S26, -48}, 180 {PPC::S25, -56}, 181 {PPC::S24, -64}, 182 {PPC::S23, -72}, 183 {PPC::S22, -80}, 184 {PPC::S21, -88}, 185 {PPC::S20, -96}, 186 {PPC::S19, -104}, 187 {PPC::S18, -112}, 188 {PPC::S17, -120}, 189 {PPC::S16, -128}, 190 {PPC::S15, -136}, 191 {PPC::S14, -144}}; 192 193 static const SpillSlot Offsets64[] = { 194 // Floating-point register save area offsets. 195 {PPC::F31, -8}, 196 {PPC::F30, -16}, 197 {PPC::F29, -24}, 198 {PPC::F28, -32}, 199 {PPC::F27, -40}, 200 {PPC::F26, -48}, 201 {PPC::F25, -56}, 202 {PPC::F24, -64}, 203 {PPC::F23, -72}, 204 {PPC::F22, -80}, 205 {PPC::F21, -88}, 206 {PPC::F20, -96}, 207 {PPC::F19, -104}, 208 {PPC::F18, -112}, 209 {PPC::F17, -120}, 210 {PPC::F16, -128}, 211 {PPC::F15, -136}, 212 {PPC::F14, -144}, 213 214 // General register save area offsets. 215 {PPC::X31, -8}, 216 {PPC::X30, -16}, 217 {PPC::X29, -24}, 218 {PPC::X28, -32}, 219 {PPC::X27, -40}, 220 {PPC::X26, -48}, 221 {PPC::X25, -56}, 222 {PPC::X24, -64}, 223 {PPC::X23, -72}, 224 {PPC::X22, -80}, 225 {PPC::X21, -88}, 226 {PPC::X20, -96}, 227 {PPC::X19, -104}, 228 {PPC::X18, -112}, 229 {PPC::X17, -120}, 230 {PPC::X16, -128}, 231 {PPC::X15, -136}, 232 {PPC::X14, -144}, 233 234 // VRSAVE save area offset. 235 {PPC::VRSAVE, -4}, 236 237 // Vector register save area 238 {PPC::V31, -16}, 239 {PPC::V30, -32}, 240 {PPC::V29, -48}, 241 {PPC::V28, -64}, 242 {PPC::V27, -80}, 243 {PPC::V26, -96}, 244 {PPC::V25, -112}, 245 {PPC::V24, -128}, 246 {PPC::V23, -144}, 247 {PPC::V22, -160}, 248 {PPC::V21, -176}, 249 {PPC::V20, -192}}; 250 251 if (Subtarget.isPPC64()) { 252 NumEntries = array_lengthof(Offsets64); 253 254 return Offsets64; 255 } else { 256 NumEntries = array_lengthof(Offsets); 257 258 return Offsets; 259 } 260 } 261 262 /// RemoveVRSaveCode - We have found that this function does not need any code 263 /// to manipulate the VRSAVE register, even though it uses vector registers. 264 /// This can happen when the only registers used are known to be live in or out 265 /// of the function. Remove all of the VRSAVE related code from the function. 266 /// FIXME: The removal of the code results in a compile failure at -O0 when the 267 /// function contains a function call, as the GPR containing original VRSAVE 268 /// contents is spilled and reloaded around the call. Without the prolog code, 269 /// the spill instruction refers to an undefined register. This code needs 270 /// to account for all uses of that GPR. 271 static void RemoveVRSaveCode(MachineInstr &MI) { 272 MachineBasicBlock *Entry = MI.getParent(); 273 MachineFunction *MF = Entry->getParent(); 274 275 // We know that the MTVRSAVE instruction immediately follows MI. Remove it. 276 MachineBasicBlock::iterator MBBI = MI; 277 ++MBBI; 278 assert(MBBI != Entry->end() && MBBI->getOpcode() == PPC::MTVRSAVE); 279 MBBI->eraseFromParent(); 280 281 bool RemovedAllMTVRSAVEs = true; 282 // See if we can find and remove the MTVRSAVE instruction from all of the 283 // epilog blocks. 284 for (MachineFunction::iterator I = MF->begin(), E = MF->end(); I != E; ++I) { 285 // If last instruction is a return instruction, add an epilogue 286 if (I->isReturnBlock()) { 287 bool FoundIt = false; 288 for (MBBI = I->end(); MBBI != I->begin(); ) { 289 --MBBI; 290 if (MBBI->getOpcode() == PPC::MTVRSAVE) { 291 MBBI->eraseFromParent(); // remove it. 292 FoundIt = true; 293 break; 294 } 295 } 296 RemovedAllMTVRSAVEs &= FoundIt; 297 } 298 } 299 300 // If we found and removed all MTVRSAVE instructions, remove the read of 301 // VRSAVE as well. 302 if (RemovedAllMTVRSAVEs) { 303 MBBI = MI; 304 assert(MBBI != Entry->begin() && "UPDATE_VRSAVE is first instr in block?"); 305 --MBBI; 306 assert(MBBI->getOpcode() == PPC::MFVRSAVE && "VRSAVE instrs wandered?"); 307 MBBI->eraseFromParent(); 308 } 309 310 // Finally, nuke the UPDATE_VRSAVE. 311 MI.eraseFromParent(); 312 } 313 314 // HandleVRSaveUpdate - MI is the UPDATE_VRSAVE instruction introduced by the 315 // instruction selector. Based on the vector registers that have been used, 316 // transform this into the appropriate ORI instruction. 317 static void HandleVRSaveUpdate(MachineInstr &MI, const TargetInstrInfo &TII) { 318 MachineFunction *MF = MI.getParent()->getParent(); 319 const TargetRegisterInfo *TRI = MF->getSubtarget().getRegisterInfo(); 320 DebugLoc dl = MI.getDebugLoc(); 321 322 const MachineRegisterInfo &MRI = MF->getRegInfo(); 323 unsigned UsedRegMask = 0; 324 for (unsigned i = 0; i != 32; ++i) 325 if (MRI.isPhysRegModified(VRRegNo[i])) 326 UsedRegMask |= 1 << (31-i); 327 328 // Live in and live out values already must be in the mask, so don't bother 329 // marking them. 330 for (std::pair<unsigned, unsigned> LI : MF->getRegInfo().liveins()) { 331 unsigned RegNo = TRI->getEncodingValue(LI.first); 332 if (VRRegNo[RegNo] == LI.first) // If this really is a vector reg. 333 UsedRegMask &= ~(1 << (31-RegNo)); // Doesn't need to be marked. 334 } 335 336 // Live out registers appear as use operands on return instructions. 337 for (MachineFunction::const_iterator BI = MF->begin(), BE = MF->end(); 338 UsedRegMask != 0 && BI != BE; ++BI) { 339 const MachineBasicBlock &MBB = *BI; 340 if (!MBB.isReturnBlock()) 341 continue; 342 const MachineInstr &Ret = MBB.back(); 343 for (unsigned I = 0, E = Ret.getNumOperands(); I != E; ++I) { 344 const MachineOperand &MO = Ret.getOperand(I); 345 if (!MO.isReg() || !PPC::VRRCRegClass.contains(MO.getReg())) 346 continue; 347 unsigned RegNo = TRI->getEncodingValue(MO.getReg()); 348 UsedRegMask &= ~(1 << (31-RegNo)); 349 } 350 } 351 352 // If no registers are used, turn this into a copy. 353 if (UsedRegMask == 0) { 354 // Remove all VRSAVE code. 355 RemoveVRSaveCode(MI); 356 return; 357 } 358 359 Register SrcReg = MI.getOperand(1).getReg(); 360 Register DstReg = MI.getOperand(0).getReg(); 361 362 if ((UsedRegMask & 0xFFFF) == UsedRegMask) { 363 if (DstReg != SrcReg) 364 BuildMI(*MI.getParent(), MI, dl, TII.get(PPC::ORI), DstReg) 365 .addReg(SrcReg) 366 .addImm(UsedRegMask); 367 else 368 BuildMI(*MI.getParent(), MI, dl, TII.get(PPC::ORI), DstReg) 369 .addReg(SrcReg, RegState::Kill) 370 .addImm(UsedRegMask); 371 } else if ((UsedRegMask & 0xFFFF0000) == UsedRegMask) { 372 if (DstReg != SrcReg) 373 BuildMI(*MI.getParent(), MI, dl, TII.get(PPC::ORIS), DstReg) 374 .addReg(SrcReg) 375 .addImm(UsedRegMask >> 16); 376 else 377 BuildMI(*MI.getParent(), MI, dl, TII.get(PPC::ORIS), DstReg) 378 .addReg(SrcReg, RegState::Kill) 379 .addImm(UsedRegMask >> 16); 380 } else { 381 if (DstReg != SrcReg) 382 BuildMI(*MI.getParent(), MI, dl, TII.get(PPC::ORIS), DstReg) 383 .addReg(SrcReg) 384 .addImm(UsedRegMask >> 16); 385 else 386 BuildMI(*MI.getParent(), MI, dl, TII.get(PPC::ORIS), DstReg) 387 .addReg(SrcReg, RegState::Kill) 388 .addImm(UsedRegMask >> 16); 389 390 BuildMI(*MI.getParent(), MI, dl, TII.get(PPC::ORI), DstReg) 391 .addReg(DstReg, RegState::Kill) 392 .addImm(UsedRegMask & 0xFFFF); 393 } 394 395 // Remove the old UPDATE_VRSAVE instruction. 396 MI.eraseFromParent(); 397 } 398 399 static bool spillsCR(const MachineFunction &MF) { 400 const PPCFunctionInfo *FuncInfo = MF.getInfo<PPCFunctionInfo>(); 401 return FuncInfo->isCRSpilled(); 402 } 403 404 static bool spillsVRSAVE(const MachineFunction &MF) { 405 const PPCFunctionInfo *FuncInfo = MF.getInfo<PPCFunctionInfo>(); 406 return FuncInfo->isVRSAVESpilled(); 407 } 408 409 static bool hasSpills(const MachineFunction &MF) { 410 const PPCFunctionInfo *FuncInfo = MF.getInfo<PPCFunctionInfo>(); 411 return FuncInfo->hasSpills(); 412 } 413 414 static bool hasNonRISpills(const MachineFunction &MF) { 415 const PPCFunctionInfo *FuncInfo = MF.getInfo<PPCFunctionInfo>(); 416 return FuncInfo->hasNonRISpills(); 417 } 418 419 /// MustSaveLR - Return true if this function requires that we save the LR 420 /// register onto the stack in the prolog and restore it in the epilog of the 421 /// function. 422 static bool MustSaveLR(const MachineFunction &MF, unsigned LR) { 423 const PPCFunctionInfo *MFI = MF.getInfo<PPCFunctionInfo>(); 424 425 // We need a save/restore of LR if there is any def of LR (which is 426 // defined by calls, including the PIC setup sequence), or if there is 427 // some use of the LR stack slot (e.g. for builtin_return_address). 428 // (LR comes in 32 and 64 bit versions.) 429 MachineRegisterInfo::def_iterator RI = MF.getRegInfo().def_begin(LR); 430 return RI !=MF.getRegInfo().def_end() || MFI->isLRStoreRequired(); 431 } 432 433 /// determineFrameLayoutAndUpdate - Determine the size of the frame and maximum 434 /// call frame size. Update the MachineFunction object with the stack size. 435 unsigned 436 PPCFrameLowering::determineFrameLayoutAndUpdate(MachineFunction &MF, 437 bool UseEstimate) const { 438 unsigned NewMaxCallFrameSize = 0; 439 unsigned FrameSize = determineFrameLayout(MF, UseEstimate, 440 &NewMaxCallFrameSize); 441 MF.getFrameInfo().setStackSize(FrameSize); 442 MF.getFrameInfo().setMaxCallFrameSize(NewMaxCallFrameSize); 443 return FrameSize; 444 } 445 446 /// determineFrameLayout - Determine the size of the frame and maximum call 447 /// frame size. 448 unsigned 449 PPCFrameLowering::determineFrameLayout(const MachineFunction &MF, 450 bool UseEstimate, 451 unsigned *NewMaxCallFrameSize) const { 452 const MachineFrameInfo &MFI = MF.getFrameInfo(); 453 const PPCFunctionInfo *FI = MF.getInfo<PPCFunctionInfo>(); 454 455 // Get the number of bytes to allocate from the FrameInfo 456 unsigned FrameSize = 457 UseEstimate ? MFI.estimateStackSize(MF) : MFI.getStackSize(); 458 459 // Get stack alignments. The frame must be aligned to the greatest of these: 460 unsigned TargetAlign = getStackAlignment(); // alignment required per the ABI 461 unsigned MaxAlign = MFI.getMaxAlignment(); // algmt required by data in frame 462 unsigned AlignMask = std::max(MaxAlign, TargetAlign) - 1; 463 464 const PPCRegisterInfo *RegInfo = Subtarget.getRegisterInfo(); 465 466 unsigned LR = RegInfo->getRARegister(); 467 bool DisableRedZone = MF.getFunction().hasFnAttribute(Attribute::NoRedZone); 468 bool CanUseRedZone = !MFI.hasVarSizedObjects() && // No dynamic alloca. 469 !MFI.adjustsStack() && // No calls. 470 !MustSaveLR(MF, LR) && // No need to save LR. 471 !FI->mustSaveTOC() && // No need to save TOC. 472 !RegInfo->hasBasePointer(MF); // No special alignment. 473 474 // Note: for PPC32 SVR4ABI (Non-DarwinABI), we can still generate stackless 475 // code if all local vars are reg-allocated. 476 bool FitsInRedZone = FrameSize <= Subtarget.getRedZoneSize(); 477 478 // Check whether we can skip adjusting the stack pointer (by using red zone) 479 if (!DisableRedZone && CanUseRedZone && FitsInRedZone) { 480 // No need for frame 481 return 0; 482 } 483 484 // Get the maximum call frame size of all the calls. 485 unsigned maxCallFrameSize = MFI.getMaxCallFrameSize(); 486 487 // Maximum call frame needs to be at least big enough for linkage area. 488 unsigned minCallFrameSize = getLinkageSize(); 489 maxCallFrameSize = std::max(maxCallFrameSize, minCallFrameSize); 490 491 // If we have dynamic alloca then maxCallFrameSize needs to be aligned so 492 // that allocations will be aligned. 493 if (MFI.hasVarSizedObjects()) 494 maxCallFrameSize = (maxCallFrameSize + AlignMask) & ~AlignMask; 495 496 // Update the new max call frame size if the caller passes in a valid pointer. 497 if (NewMaxCallFrameSize) 498 *NewMaxCallFrameSize = maxCallFrameSize; 499 500 // Include call frame size in total. 501 FrameSize += maxCallFrameSize; 502 503 // Make sure the frame is aligned. 504 FrameSize = (FrameSize + AlignMask) & ~AlignMask; 505 506 return FrameSize; 507 } 508 509 // hasFP - Return true if the specified function actually has a dedicated frame 510 // pointer register. 511 bool PPCFrameLowering::hasFP(const MachineFunction &MF) const { 512 const MachineFrameInfo &MFI = MF.getFrameInfo(); 513 // FIXME: This is pretty much broken by design: hasFP() might be called really 514 // early, before the stack layout was calculated and thus hasFP() might return 515 // true or false here depending on the time of call. 516 return (MFI.getStackSize()) && needsFP(MF); 517 } 518 519 // needsFP - Return true if the specified function should have a dedicated frame 520 // pointer register. This is true if the function has variable sized allocas or 521 // if frame pointer elimination is disabled. 522 bool PPCFrameLowering::needsFP(const MachineFunction &MF) const { 523 const MachineFrameInfo &MFI = MF.getFrameInfo(); 524 525 // Naked functions have no stack frame pushed, so we don't have a frame 526 // pointer. 527 if (MF.getFunction().hasFnAttribute(Attribute::Naked)) 528 return false; 529 530 return MF.getTarget().Options.DisableFramePointerElim(MF) || 531 MFI.hasVarSizedObjects() || MFI.hasStackMap() || MFI.hasPatchPoint() || 532 (MF.getTarget().Options.GuaranteedTailCallOpt && 533 MF.getInfo<PPCFunctionInfo>()->hasFastCall()); 534 } 535 536 void PPCFrameLowering::replaceFPWithRealFP(MachineFunction &MF) const { 537 bool is31 = needsFP(MF); 538 unsigned FPReg = is31 ? PPC::R31 : PPC::R1; 539 unsigned FP8Reg = is31 ? PPC::X31 : PPC::X1; 540 541 const PPCRegisterInfo *RegInfo = Subtarget.getRegisterInfo(); 542 bool HasBP = RegInfo->hasBasePointer(MF); 543 unsigned BPReg = HasBP ? (unsigned) RegInfo->getBaseRegister(MF) : FPReg; 544 unsigned BP8Reg = HasBP ? (unsigned) PPC::X30 : FP8Reg; 545 546 for (MachineFunction::iterator BI = MF.begin(), BE = MF.end(); 547 BI != BE; ++BI) 548 for (MachineBasicBlock::iterator MBBI = BI->end(); MBBI != BI->begin(); ) { 549 --MBBI; 550 for (unsigned I = 0, E = MBBI->getNumOperands(); I != E; ++I) { 551 MachineOperand &MO = MBBI->getOperand(I); 552 if (!MO.isReg()) 553 continue; 554 555 switch (MO.getReg()) { 556 case PPC::FP: 557 MO.setReg(FPReg); 558 break; 559 case PPC::FP8: 560 MO.setReg(FP8Reg); 561 break; 562 case PPC::BP: 563 MO.setReg(BPReg); 564 break; 565 case PPC::BP8: 566 MO.setReg(BP8Reg); 567 break; 568 569 } 570 } 571 } 572 } 573 574 /* This function will do the following: 575 - If MBB is an entry or exit block, set SR1 and SR2 to R0 and R12 576 respectively (defaults recommended by the ABI) and return true 577 - If MBB is not an entry block, initialize the register scavenger and look 578 for available registers. 579 - If the defaults (R0/R12) are available, return true 580 - If TwoUniqueRegsRequired is set to true, it looks for two unique 581 registers. Otherwise, look for a single available register. 582 - If the required registers are found, set SR1 and SR2 and return true. 583 - If the required registers are not found, set SR2 or both SR1 and SR2 to 584 PPC::NoRegister and return false. 585 586 Note that if both SR1 and SR2 are valid parameters and TwoUniqueRegsRequired 587 is not set, this function will attempt to find two different registers, but 588 still return true if only one register is available (and set SR1 == SR2). 589 */ 590 bool 591 PPCFrameLowering::findScratchRegister(MachineBasicBlock *MBB, 592 bool UseAtEnd, 593 bool TwoUniqueRegsRequired, 594 unsigned *SR1, 595 unsigned *SR2) const { 596 RegScavenger RS; 597 unsigned R0 = Subtarget.isPPC64() ? PPC::X0 : PPC::R0; 598 unsigned R12 = Subtarget.isPPC64() ? PPC::X12 : PPC::R12; 599 600 // Set the defaults for the two scratch registers. 601 if (SR1) 602 *SR1 = R0; 603 604 if (SR2) { 605 assert (SR1 && "Asking for the second scratch register but not the first?"); 606 *SR2 = R12; 607 } 608 609 // If MBB is an entry or exit block, use R0 and R12 as the scratch registers. 610 if ((UseAtEnd && MBB->isReturnBlock()) || 611 (!UseAtEnd && (&MBB->getParent()->front() == MBB))) 612 return true; 613 614 RS.enterBasicBlock(*MBB); 615 616 if (UseAtEnd && !MBB->empty()) { 617 // The scratch register will be used at the end of the block, so must 618 // consider all registers used within the block 619 620 MachineBasicBlock::iterator MBBI = MBB->getFirstTerminator(); 621 // If no terminator, back iterator up to previous instruction. 622 if (MBBI == MBB->end()) 623 MBBI = std::prev(MBBI); 624 625 if (MBBI != MBB->begin()) 626 RS.forward(MBBI); 627 } 628 629 // If the two registers are available, we're all good. 630 // Note that we only return here if both R0 and R12 are available because 631 // although the function may not require two unique registers, it may benefit 632 // from having two so we should try to provide them. 633 if (!RS.isRegUsed(R0) && !RS.isRegUsed(R12)) 634 return true; 635 636 // Get the list of callee-saved registers for the target. 637 const PPCRegisterInfo *RegInfo = Subtarget.getRegisterInfo(); 638 const MCPhysReg *CSRegs = RegInfo->getCalleeSavedRegs(MBB->getParent()); 639 640 // Get all the available registers in the block. 641 BitVector BV = RS.getRegsAvailable(Subtarget.isPPC64() ? &PPC::G8RCRegClass : 642 &PPC::GPRCRegClass); 643 644 // We shouldn't use callee-saved registers as scratch registers as they may be 645 // available when looking for a candidate block for shrink wrapping but not 646 // available when the actual prologue/epilogue is being emitted because they 647 // were added as live-in to the prologue block by PrologueEpilogueInserter. 648 for (int i = 0; CSRegs[i]; ++i) 649 BV.reset(CSRegs[i]); 650 651 // Set the first scratch register to the first available one. 652 if (SR1) { 653 int FirstScratchReg = BV.find_first(); 654 *SR1 = FirstScratchReg == -1 ? (unsigned)PPC::NoRegister : FirstScratchReg; 655 } 656 657 // If there is another one available, set the second scratch register to that. 658 // Otherwise, set it to either PPC::NoRegister if this function requires two 659 // or to whatever SR1 is set to if this function doesn't require two. 660 if (SR2) { 661 int SecondScratchReg = BV.find_next(*SR1); 662 if (SecondScratchReg != -1) 663 *SR2 = SecondScratchReg; 664 else 665 *SR2 = TwoUniqueRegsRequired ? (unsigned)PPC::NoRegister : *SR1; 666 } 667 668 // Now that we've done our best to provide both registers, double check 669 // whether we were unable to provide enough. 670 if (BV.count() < (TwoUniqueRegsRequired ? 2U : 1U)) 671 return false; 672 673 return true; 674 } 675 676 // We need a scratch register for spilling LR and for spilling CR. By default, 677 // we use two scratch registers to hide latency. However, if only one scratch 678 // register is available, we can adjust for that by not overlapping the spill 679 // code. However, if we need to realign the stack (i.e. have a base pointer) 680 // and the stack frame is large, we need two scratch registers. 681 bool 682 PPCFrameLowering::twoUniqueScratchRegsRequired(MachineBasicBlock *MBB) const { 683 const PPCRegisterInfo *RegInfo = Subtarget.getRegisterInfo(); 684 MachineFunction &MF = *(MBB->getParent()); 685 bool HasBP = RegInfo->hasBasePointer(MF); 686 unsigned FrameSize = determineFrameLayout(MF); 687 int NegFrameSize = -FrameSize; 688 bool IsLargeFrame = !isInt<16>(NegFrameSize); 689 MachineFrameInfo &MFI = MF.getFrameInfo(); 690 unsigned MaxAlign = MFI.getMaxAlignment(); 691 bool HasRedZone = Subtarget.isPPC64() || !Subtarget.isSVR4ABI(); 692 693 return (IsLargeFrame || !HasRedZone) && HasBP && MaxAlign > 1; 694 } 695 696 bool PPCFrameLowering::canUseAsPrologue(const MachineBasicBlock &MBB) const { 697 MachineBasicBlock *TmpMBB = const_cast<MachineBasicBlock *>(&MBB); 698 699 return findScratchRegister(TmpMBB, false, 700 twoUniqueScratchRegsRequired(TmpMBB)); 701 } 702 703 bool PPCFrameLowering::canUseAsEpilogue(const MachineBasicBlock &MBB) const { 704 MachineBasicBlock *TmpMBB = const_cast<MachineBasicBlock *>(&MBB); 705 706 return findScratchRegister(TmpMBB, true); 707 } 708 709 bool PPCFrameLowering::stackUpdateCanBeMoved(MachineFunction &MF) const { 710 const PPCRegisterInfo *RegInfo = Subtarget.getRegisterInfo(); 711 PPCFunctionInfo *FI = MF.getInfo<PPCFunctionInfo>(); 712 713 // Abort if there is no register info or function info. 714 if (!RegInfo || !FI) 715 return false; 716 717 // Only move the stack update on ELFv2 ABI and PPC64. 718 if (!Subtarget.isELFv2ABI() || !Subtarget.isPPC64()) 719 return false; 720 721 // Check the frame size first and return false if it does not fit the 722 // requirements. 723 // We need a non-zero frame size as well as a frame that will fit in the red 724 // zone. This is because by moving the stack pointer update we are now storing 725 // to the red zone until the stack pointer is updated. If we get an interrupt 726 // inside the prologue but before the stack update we now have a number of 727 // stores to the red zone and those stores must all fit. 728 MachineFrameInfo &MFI = MF.getFrameInfo(); 729 unsigned FrameSize = MFI.getStackSize(); 730 if (!FrameSize || FrameSize > Subtarget.getRedZoneSize()) 731 return false; 732 733 // Frame pointers and base pointers complicate matters so don't do anything 734 // if we have them. For example having a frame pointer will sometimes require 735 // a copy of r1 into r31 and that makes keeping track of updates to r1 more 736 // difficult. 737 if (hasFP(MF) || RegInfo->hasBasePointer(MF)) 738 return false; 739 740 // Calls to fast_cc functions use different rules for passing parameters on 741 // the stack from the ABI and using PIC base in the function imposes 742 // similar restrictions to using the base pointer. It is not generally safe 743 // to move the stack pointer update in these situations. 744 if (FI->hasFastCall() || FI->usesPICBase()) 745 return false; 746 747 // Finally we can move the stack update if we do not require register 748 // scavenging. Register scavenging can introduce more spills and so 749 // may make the frame size larger than we have computed. 750 return !RegInfo->requiresFrameIndexScavenging(MF); 751 } 752 753 void PPCFrameLowering::emitPrologue(MachineFunction &MF, 754 MachineBasicBlock &MBB) const { 755 MachineBasicBlock::iterator MBBI = MBB.begin(); 756 MachineFrameInfo &MFI = MF.getFrameInfo(); 757 const PPCInstrInfo &TII = *Subtarget.getInstrInfo(); 758 const PPCRegisterInfo *RegInfo = Subtarget.getRegisterInfo(); 759 760 MachineModuleInfo &MMI = MF.getMMI(); 761 const MCRegisterInfo *MRI = MMI.getContext().getRegisterInfo(); 762 DebugLoc dl; 763 bool needsCFI = MF.needsFrameMoves(); 764 765 // Get processor type. 766 bool isPPC64 = Subtarget.isPPC64(); 767 // Get the ABI. 768 bool isSVR4ABI = Subtarget.isSVR4ABI(); 769 bool isAIXABI = Subtarget.isAIXABI(); 770 bool isELFv2ABI = Subtarget.isELFv2ABI(); 771 assert((isSVR4ABI || isAIXABI) && "Unsupported PPC ABI."); 772 773 // Scan the prolog, looking for an UPDATE_VRSAVE instruction. If we find it, 774 // process it. 775 if (!isSVR4ABI) 776 for (unsigned i = 0; MBBI != MBB.end(); ++i, ++MBBI) { 777 if (MBBI->getOpcode() == PPC::UPDATE_VRSAVE) { 778 if (isAIXABI) 779 report_fatal_error("UPDATE_VRSAVE is unexpected on AIX."); 780 HandleVRSaveUpdate(*MBBI, TII); 781 break; 782 } 783 } 784 785 // Move MBBI back to the beginning of the prologue block. 786 MBBI = MBB.begin(); 787 788 // Work out frame sizes. 789 unsigned FrameSize = determineFrameLayoutAndUpdate(MF); 790 int NegFrameSize = -FrameSize; 791 if (!isInt<32>(NegFrameSize)) 792 llvm_unreachable("Unhandled stack size!"); 793 794 if (MFI.isFrameAddressTaken()) 795 replaceFPWithRealFP(MF); 796 797 // Check if the link register (LR) must be saved. 798 PPCFunctionInfo *FI = MF.getInfo<PPCFunctionInfo>(); 799 bool MustSaveLR = FI->mustSaveLR(); 800 bool MustSaveTOC = FI->mustSaveTOC(); 801 const SmallVectorImpl<unsigned> &MustSaveCRs = FI->getMustSaveCRs(); 802 bool MustSaveCR = !MustSaveCRs.empty(); 803 // Do we have a frame pointer and/or base pointer for this function? 804 bool HasFP = hasFP(MF); 805 bool HasBP = RegInfo->hasBasePointer(MF); 806 bool HasRedZone = isPPC64 || !isSVR4ABI; 807 808 unsigned SPReg = isPPC64 ? PPC::X1 : PPC::R1; 809 Register BPReg = RegInfo->getBaseRegister(MF); 810 unsigned FPReg = isPPC64 ? PPC::X31 : PPC::R31; 811 unsigned LRReg = isPPC64 ? PPC::LR8 : PPC::LR; 812 unsigned TOCReg = isPPC64 ? PPC::X2 : PPC::R2; 813 unsigned ScratchReg = 0; 814 unsigned TempReg = isPPC64 ? PPC::X12 : PPC::R12; // another scratch reg 815 // ...(R12/X12 is volatile in both Darwin & SVR4, & can't be a function arg.) 816 const MCInstrDesc& MFLRInst = TII.get(isPPC64 ? PPC::MFLR8 817 : PPC::MFLR ); 818 const MCInstrDesc& StoreInst = TII.get(isPPC64 ? PPC::STD 819 : PPC::STW ); 820 const MCInstrDesc& StoreUpdtInst = TII.get(isPPC64 ? PPC::STDU 821 : PPC::STWU ); 822 const MCInstrDesc& StoreUpdtIdxInst = TII.get(isPPC64 ? PPC::STDUX 823 : PPC::STWUX); 824 const MCInstrDesc& LoadImmShiftedInst = TII.get(isPPC64 ? PPC::LIS8 825 : PPC::LIS ); 826 const MCInstrDesc& OrImmInst = TII.get(isPPC64 ? PPC::ORI8 827 : PPC::ORI ); 828 const MCInstrDesc& OrInst = TII.get(isPPC64 ? PPC::OR8 829 : PPC::OR ); 830 const MCInstrDesc& SubtractCarryingInst = TII.get(isPPC64 ? PPC::SUBFC8 831 : PPC::SUBFC); 832 const MCInstrDesc& SubtractImmCarryingInst = TII.get(isPPC64 ? PPC::SUBFIC8 833 : PPC::SUBFIC); 834 835 // Regarding this assert: Even though LR is saved in the caller's frame (i.e., 836 // LROffset is positive), that slot is callee-owned. Because PPC32 SVR4 has no 837 // Red Zone, an asynchronous event (a form of "callee") could claim a frame & 838 // overwrite it, so PPC32 SVR4 must claim at least a minimal frame to save LR. 839 assert((isPPC64 || !isSVR4ABI || !(!FrameSize && (MustSaveLR || HasFP))) && 840 "FrameSize must be >0 to save/restore the FP or LR for 32-bit SVR4."); 841 842 // Using the same bool variable as below to suppress compiler warnings. 843 bool SingleScratchReg = 844 findScratchRegister(&MBB, false, twoUniqueScratchRegsRequired(&MBB), 845 &ScratchReg, &TempReg); 846 assert(SingleScratchReg && 847 "Required number of registers not available in this block"); 848 849 SingleScratchReg = ScratchReg == TempReg; 850 851 int LROffset = getReturnSaveOffset(); 852 853 int FPOffset = 0; 854 if (HasFP) { 855 if (isSVR4ABI) { 856 MachineFrameInfo &MFI = MF.getFrameInfo(); 857 int FPIndex = FI->getFramePointerSaveIndex(); 858 assert(FPIndex && "No Frame Pointer Save Slot!"); 859 FPOffset = MFI.getObjectOffset(FPIndex); 860 } else { 861 FPOffset = getFramePointerSaveOffset(); 862 } 863 } 864 865 int BPOffset = 0; 866 if (HasBP) { 867 if (isSVR4ABI) { 868 MachineFrameInfo &MFI = MF.getFrameInfo(); 869 int BPIndex = FI->getBasePointerSaveIndex(); 870 assert(BPIndex && "No Base Pointer Save Slot!"); 871 BPOffset = MFI.getObjectOffset(BPIndex); 872 } else { 873 BPOffset = getBasePointerSaveOffset(); 874 } 875 } 876 877 int PBPOffset = 0; 878 if (FI->usesPICBase()) { 879 MachineFrameInfo &MFI = MF.getFrameInfo(); 880 int PBPIndex = FI->getPICBasePointerSaveIndex(); 881 assert(PBPIndex && "No PIC Base Pointer Save Slot!"); 882 PBPOffset = MFI.getObjectOffset(PBPIndex); 883 } 884 885 // Get stack alignments. 886 unsigned MaxAlign = MFI.getMaxAlignment(); 887 if (HasBP && MaxAlign > 1) 888 assert(isPowerOf2_32(MaxAlign) && isInt<16>(MaxAlign) && 889 "Invalid alignment!"); 890 891 // Frames of 32KB & larger require special handling because they cannot be 892 // indexed into with a simple STDU/STWU/STD/STW immediate offset operand. 893 bool isLargeFrame = !isInt<16>(NegFrameSize); 894 895 assert((isPPC64 || !MustSaveCR) && 896 "Prologue CR saving supported only in 64-bit mode"); 897 898 if (MustSaveCR && isAIXABI) 899 report_fatal_error("Prologue CR saving is unimplemented on AIX."); 900 901 // Check if we can move the stack update instruction (stdu) down the prologue 902 // past the callee saves. Hopefully this will avoid the situation where the 903 // saves are waiting for the update on the store with update to complete. 904 MachineBasicBlock::iterator StackUpdateLoc = MBBI; 905 bool MovingStackUpdateDown = false; 906 907 // Check if we can move the stack update. 908 if (stackUpdateCanBeMoved(MF)) { 909 const std::vector<CalleeSavedInfo> &Info = MFI.getCalleeSavedInfo(); 910 for (CalleeSavedInfo CSI : Info) { 911 int FrIdx = CSI.getFrameIdx(); 912 // If the frame index is not negative the callee saved info belongs to a 913 // stack object that is not a fixed stack object. We ignore non-fixed 914 // stack objects because we won't move the stack update pointer past them. 915 if (FrIdx >= 0) 916 continue; 917 918 if (MFI.isFixedObjectIndex(FrIdx) && MFI.getObjectOffset(FrIdx) < 0) { 919 StackUpdateLoc++; 920 MovingStackUpdateDown = true; 921 } else { 922 // We need all of the Frame Indices to meet these conditions. 923 // If they do not, abort the whole operation. 924 StackUpdateLoc = MBBI; 925 MovingStackUpdateDown = false; 926 break; 927 } 928 } 929 930 // If the operation was not aborted then update the object offset. 931 if (MovingStackUpdateDown) { 932 for (CalleeSavedInfo CSI : Info) { 933 int FrIdx = CSI.getFrameIdx(); 934 if (FrIdx < 0) 935 MFI.setObjectOffset(FrIdx, MFI.getObjectOffset(FrIdx) + NegFrameSize); 936 } 937 } 938 } 939 940 // If we need to spill the CR and the LR but we don't have two separate 941 // registers available, we must spill them one at a time 942 if (MustSaveCR && SingleScratchReg && MustSaveLR) { 943 // In the ELFv2 ABI, we are not required to save all CR fields. 944 // If only one or two CR fields are clobbered, it is more efficient to use 945 // mfocrf to selectively save just those fields, because mfocrf has short 946 // latency compares to mfcr. 947 unsigned MfcrOpcode = PPC::MFCR8; 948 unsigned CrState = RegState::ImplicitKill; 949 if (isELFv2ABI && MustSaveCRs.size() == 1) { 950 MfcrOpcode = PPC::MFOCRF8; 951 CrState = RegState::Kill; 952 } 953 MachineInstrBuilder MIB = 954 BuildMI(MBB, MBBI, dl, TII.get(MfcrOpcode), TempReg); 955 for (unsigned i = 0, e = MustSaveCRs.size(); i != e; ++i) 956 MIB.addReg(MustSaveCRs[i], CrState); 957 BuildMI(MBB, MBBI, dl, TII.get(PPC::STW8)) 958 .addReg(TempReg, getKillRegState(true)) 959 .addImm(getCRSaveOffset()) 960 .addReg(SPReg); 961 } 962 963 if (MustSaveLR) 964 BuildMI(MBB, MBBI, dl, MFLRInst, ScratchReg); 965 966 if (MustSaveCR && 967 !(SingleScratchReg && MustSaveLR)) { // will only occur for PPC64 968 // In the ELFv2 ABI, we are not required to save all CR fields. 969 // If only one or two CR fields are clobbered, it is more efficient to use 970 // mfocrf to selectively save just those fields, because mfocrf has short 971 // latency compares to mfcr. 972 unsigned MfcrOpcode = PPC::MFCR8; 973 unsigned CrState = RegState::ImplicitKill; 974 if (isELFv2ABI && MustSaveCRs.size() == 1) { 975 MfcrOpcode = PPC::MFOCRF8; 976 CrState = RegState::Kill; 977 } 978 MachineInstrBuilder MIB = 979 BuildMI(MBB, MBBI, dl, TII.get(MfcrOpcode), TempReg); 980 for (unsigned i = 0, e = MustSaveCRs.size(); i != e; ++i) 981 MIB.addReg(MustSaveCRs[i], CrState); 982 } 983 984 if (HasRedZone) { 985 if (HasFP) 986 BuildMI(MBB, MBBI, dl, StoreInst) 987 .addReg(FPReg) 988 .addImm(FPOffset) 989 .addReg(SPReg); 990 if (FI->usesPICBase()) 991 BuildMI(MBB, MBBI, dl, StoreInst) 992 .addReg(PPC::R30) 993 .addImm(PBPOffset) 994 .addReg(SPReg); 995 if (HasBP) 996 BuildMI(MBB, MBBI, dl, StoreInst) 997 .addReg(BPReg) 998 .addImm(BPOffset) 999 .addReg(SPReg); 1000 } 1001 1002 if (MustSaveLR) 1003 BuildMI(MBB, StackUpdateLoc, dl, StoreInst) 1004 .addReg(ScratchReg, getKillRegState(true)) 1005 .addImm(LROffset) 1006 .addReg(SPReg); 1007 1008 if (MustSaveCR && 1009 !(SingleScratchReg && MustSaveLR)) { // will only occur for PPC64 1010 assert(HasRedZone && "A red zone is always available on PPC64"); 1011 BuildMI(MBB, MBBI, dl, TII.get(PPC::STW8)) 1012 .addReg(TempReg, getKillRegState(true)) 1013 .addImm(getCRSaveOffset()) 1014 .addReg(SPReg); 1015 } 1016 1017 // Skip the rest if this is a leaf function & all spills fit in the Red Zone. 1018 if (!FrameSize) 1019 return; 1020 1021 // Adjust stack pointer: r1 += NegFrameSize. 1022 // If there is a preferred stack alignment, align R1 now 1023 1024 if (HasBP && HasRedZone) { 1025 // Save a copy of r1 as the base pointer. 1026 BuildMI(MBB, MBBI, dl, OrInst, BPReg) 1027 .addReg(SPReg) 1028 .addReg(SPReg); 1029 } 1030 1031 // Have we generated a STUX instruction to claim stack frame? If so, 1032 // the negated frame size will be placed in ScratchReg. 1033 bool HasSTUX = false; 1034 1035 // This condition must be kept in sync with canUseAsPrologue. 1036 if (HasBP && MaxAlign > 1) { 1037 if (isPPC64) 1038 BuildMI(MBB, MBBI, dl, TII.get(PPC::RLDICL), ScratchReg) 1039 .addReg(SPReg) 1040 .addImm(0) 1041 .addImm(64 - Log2_32(MaxAlign)); 1042 else // PPC32... 1043 BuildMI(MBB, MBBI, dl, TII.get(PPC::RLWINM), ScratchReg) 1044 .addReg(SPReg) 1045 .addImm(0) 1046 .addImm(32 - Log2_32(MaxAlign)) 1047 .addImm(31); 1048 if (!isLargeFrame) { 1049 BuildMI(MBB, MBBI, dl, SubtractImmCarryingInst, ScratchReg) 1050 .addReg(ScratchReg, RegState::Kill) 1051 .addImm(NegFrameSize); 1052 } else { 1053 assert(!SingleScratchReg && "Only a single scratch reg available"); 1054 BuildMI(MBB, MBBI, dl, LoadImmShiftedInst, TempReg) 1055 .addImm(NegFrameSize >> 16); 1056 BuildMI(MBB, MBBI, dl, OrImmInst, TempReg) 1057 .addReg(TempReg, RegState::Kill) 1058 .addImm(NegFrameSize & 0xFFFF); 1059 BuildMI(MBB, MBBI, dl, SubtractCarryingInst, ScratchReg) 1060 .addReg(ScratchReg, RegState::Kill) 1061 .addReg(TempReg, RegState::Kill); 1062 } 1063 1064 BuildMI(MBB, MBBI, dl, StoreUpdtIdxInst, SPReg) 1065 .addReg(SPReg, RegState::Kill) 1066 .addReg(SPReg) 1067 .addReg(ScratchReg); 1068 HasSTUX = true; 1069 1070 } else if (!isLargeFrame) { 1071 BuildMI(MBB, StackUpdateLoc, dl, StoreUpdtInst, SPReg) 1072 .addReg(SPReg) 1073 .addImm(NegFrameSize) 1074 .addReg(SPReg); 1075 1076 } else { 1077 BuildMI(MBB, MBBI, dl, LoadImmShiftedInst, ScratchReg) 1078 .addImm(NegFrameSize >> 16); 1079 BuildMI(MBB, MBBI, dl, OrImmInst, ScratchReg) 1080 .addReg(ScratchReg, RegState::Kill) 1081 .addImm(NegFrameSize & 0xFFFF); 1082 BuildMI(MBB, MBBI, dl, StoreUpdtIdxInst, SPReg) 1083 .addReg(SPReg, RegState::Kill) 1084 .addReg(SPReg) 1085 .addReg(ScratchReg); 1086 HasSTUX = true; 1087 } 1088 1089 // Save the TOC register after the stack pointer update if a prologue TOC 1090 // save is required for the function. 1091 if (MustSaveTOC) { 1092 assert(isELFv2ABI && "TOC saves in the prologue only supported on ELFv2"); 1093 BuildMI(MBB, StackUpdateLoc, dl, TII.get(PPC::STD)) 1094 .addReg(TOCReg, getKillRegState(true)) 1095 .addImm(TOCSaveOffset) 1096 .addReg(SPReg); 1097 } 1098 1099 if (!HasRedZone) { 1100 assert(!isPPC64 && "A red zone is always available on PPC64"); 1101 if (HasSTUX) { 1102 // The negated frame size is in ScratchReg, and the SPReg has been 1103 // decremented by the frame size: SPReg = old SPReg + ScratchReg. 1104 // Since FPOffset, PBPOffset, etc. are relative to the beginning of 1105 // the stack frame (i.e. the old SP), ideally, we would put the old 1106 // SP into a register and use it as the base for the stores. The 1107 // problem is that the only available register may be ScratchReg, 1108 // which could be R0, and R0 cannot be used as a base address. 1109 1110 // First, set ScratchReg to the old SP. This may need to be modified 1111 // later. 1112 BuildMI(MBB, MBBI, dl, TII.get(PPC::SUBF), ScratchReg) 1113 .addReg(ScratchReg, RegState::Kill) 1114 .addReg(SPReg); 1115 1116 if (ScratchReg == PPC::R0) { 1117 // R0 cannot be used as a base register, but it can be used as an 1118 // index in a store-indexed. 1119 int LastOffset = 0; 1120 if (HasFP) { 1121 // R0 += (FPOffset-LastOffset). 1122 // Need addic, since addi treats R0 as 0. 1123 BuildMI(MBB, MBBI, dl, TII.get(PPC::ADDIC), ScratchReg) 1124 .addReg(ScratchReg) 1125 .addImm(FPOffset-LastOffset); 1126 LastOffset = FPOffset; 1127 // Store FP into *R0. 1128 BuildMI(MBB, MBBI, dl, TII.get(PPC::STWX)) 1129 .addReg(FPReg, RegState::Kill) // Save FP. 1130 .addReg(PPC::ZERO) 1131 .addReg(ScratchReg); // This will be the index (R0 is ok here). 1132 } 1133 if (FI->usesPICBase()) { 1134 // R0 += (PBPOffset-LastOffset). 1135 BuildMI(MBB, MBBI, dl, TII.get(PPC::ADDIC), ScratchReg) 1136 .addReg(ScratchReg) 1137 .addImm(PBPOffset-LastOffset); 1138 LastOffset = PBPOffset; 1139 BuildMI(MBB, MBBI, dl, TII.get(PPC::STWX)) 1140 .addReg(PPC::R30, RegState::Kill) // Save PIC base pointer. 1141 .addReg(PPC::ZERO) 1142 .addReg(ScratchReg); // This will be the index (R0 is ok here). 1143 } 1144 if (HasBP) { 1145 // R0 += (BPOffset-LastOffset). 1146 BuildMI(MBB, MBBI, dl, TII.get(PPC::ADDIC), ScratchReg) 1147 .addReg(ScratchReg) 1148 .addImm(BPOffset-LastOffset); 1149 LastOffset = BPOffset; 1150 BuildMI(MBB, MBBI, dl, TII.get(PPC::STWX)) 1151 .addReg(BPReg, RegState::Kill) // Save BP. 1152 .addReg(PPC::ZERO) 1153 .addReg(ScratchReg); // This will be the index (R0 is ok here). 1154 // BP = R0-LastOffset 1155 BuildMI(MBB, MBBI, dl, TII.get(PPC::ADDIC), BPReg) 1156 .addReg(ScratchReg, RegState::Kill) 1157 .addImm(-LastOffset); 1158 } 1159 } else { 1160 // ScratchReg is not R0, so use it as the base register. It is 1161 // already set to the old SP, so we can use the offsets directly. 1162 1163 // Now that the stack frame has been allocated, save all the necessary 1164 // registers using ScratchReg as the base address. 1165 if (HasFP) 1166 BuildMI(MBB, MBBI, dl, StoreInst) 1167 .addReg(FPReg) 1168 .addImm(FPOffset) 1169 .addReg(ScratchReg); 1170 if (FI->usesPICBase()) 1171 BuildMI(MBB, MBBI, dl, StoreInst) 1172 .addReg(PPC::R30) 1173 .addImm(PBPOffset) 1174 .addReg(ScratchReg); 1175 if (HasBP) { 1176 BuildMI(MBB, MBBI, dl, StoreInst) 1177 .addReg(BPReg) 1178 .addImm(BPOffset) 1179 .addReg(ScratchReg); 1180 BuildMI(MBB, MBBI, dl, OrInst, BPReg) 1181 .addReg(ScratchReg, RegState::Kill) 1182 .addReg(ScratchReg); 1183 } 1184 } 1185 } else { 1186 // The frame size is a known 16-bit constant (fitting in the immediate 1187 // field of STWU). To be here we have to be compiling for PPC32. 1188 // Since the SPReg has been decreased by FrameSize, add it back to each 1189 // offset. 1190 if (HasFP) 1191 BuildMI(MBB, MBBI, dl, StoreInst) 1192 .addReg(FPReg) 1193 .addImm(FrameSize + FPOffset) 1194 .addReg(SPReg); 1195 if (FI->usesPICBase()) 1196 BuildMI(MBB, MBBI, dl, StoreInst) 1197 .addReg(PPC::R30) 1198 .addImm(FrameSize + PBPOffset) 1199 .addReg(SPReg); 1200 if (HasBP) { 1201 BuildMI(MBB, MBBI, dl, StoreInst) 1202 .addReg(BPReg) 1203 .addImm(FrameSize + BPOffset) 1204 .addReg(SPReg); 1205 BuildMI(MBB, MBBI, dl, TII.get(PPC::ADDI), BPReg) 1206 .addReg(SPReg) 1207 .addImm(FrameSize); 1208 } 1209 } 1210 } 1211 1212 // Add Call Frame Information for the instructions we generated above. 1213 if (needsCFI) { 1214 unsigned CFIIndex; 1215 1216 if (HasBP) { 1217 // Define CFA in terms of BP. Do this in preference to using FP/SP, 1218 // because if the stack needed aligning then CFA won't be at a fixed 1219 // offset from FP/SP. 1220 unsigned Reg = MRI->getDwarfRegNum(BPReg, true); 1221 CFIIndex = MF.addFrameInst( 1222 MCCFIInstruction::createDefCfaRegister(nullptr, Reg)); 1223 } else { 1224 // Adjust the definition of CFA to account for the change in SP. 1225 assert(NegFrameSize); 1226 CFIIndex = MF.addFrameInst( 1227 MCCFIInstruction::createDefCfaOffset(nullptr, NegFrameSize)); 1228 } 1229 BuildMI(MBB, MBBI, dl, TII.get(TargetOpcode::CFI_INSTRUCTION)) 1230 .addCFIIndex(CFIIndex); 1231 1232 if (HasFP) { 1233 // Describe where FP was saved, at a fixed offset from CFA. 1234 unsigned Reg = MRI->getDwarfRegNum(FPReg, true); 1235 CFIIndex = MF.addFrameInst( 1236 MCCFIInstruction::createOffset(nullptr, Reg, FPOffset)); 1237 BuildMI(MBB, MBBI, dl, TII.get(TargetOpcode::CFI_INSTRUCTION)) 1238 .addCFIIndex(CFIIndex); 1239 } 1240 1241 if (FI->usesPICBase()) { 1242 // Describe where FP was saved, at a fixed offset from CFA. 1243 unsigned Reg = MRI->getDwarfRegNum(PPC::R30, true); 1244 CFIIndex = MF.addFrameInst( 1245 MCCFIInstruction::createOffset(nullptr, Reg, PBPOffset)); 1246 BuildMI(MBB, MBBI, dl, TII.get(TargetOpcode::CFI_INSTRUCTION)) 1247 .addCFIIndex(CFIIndex); 1248 } 1249 1250 if (HasBP) { 1251 // Describe where BP was saved, at a fixed offset from CFA. 1252 unsigned Reg = MRI->getDwarfRegNum(BPReg, true); 1253 CFIIndex = MF.addFrameInst( 1254 MCCFIInstruction::createOffset(nullptr, Reg, BPOffset)); 1255 BuildMI(MBB, MBBI, dl, TII.get(TargetOpcode::CFI_INSTRUCTION)) 1256 .addCFIIndex(CFIIndex); 1257 } 1258 1259 if (MustSaveLR) { 1260 // Describe where LR was saved, at a fixed offset from CFA. 1261 unsigned Reg = MRI->getDwarfRegNum(LRReg, true); 1262 CFIIndex = MF.addFrameInst( 1263 MCCFIInstruction::createOffset(nullptr, Reg, LROffset)); 1264 BuildMI(MBB, MBBI, dl, TII.get(TargetOpcode::CFI_INSTRUCTION)) 1265 .addCFIIndex(CFIIndex); 1266 } 1267 } 1268 1269 // If there is a frame pointer, copy R1 into R31 1270 if (HasFP) { 1271 BuildMI(MBB, MBBI, dl, OrInst, FPReg) 1272 .addReg(SPReg) 1273 .addReg(SPReg); 1274 1275 if (!HasBP && needsCFI) { 1276 // Change the definition of CFA from SP+offset to FP+offset, because SP 1277 // will change at every alloca. 1278 unsigned Reg = MRI->getDwarfRegNum(FPReg, true); 1279 unsigned CFIIndex = MF.addFrameInst( 1280 MCCFIInstruction::createDefCfaRegister(nullptr, Reg)); 1281 1282 BuildMI(MBB, MBBI, dl, TII.get(TargetOpcode::CFI_INSTRUCTION)) 1283 .addCFIIndex(CFIIndex); 1284 } 1285 } 1286 1287 if (needsCFI) { 1288 // Describe where callee saved registers were saved, at fixed offsets from 1289 // CFA. 1290 const std::vector<CalleeSavedInfo> &CSI = MFI.getCalleeSavedInfo(); 1291 for (unsigned I = 0, E = CSI.size(); I != E; ++I) { 1292 unsigned Reg = CSI[I].getReg(); 1293 if (Reg == PPC::LR || Reg == PPC::LR8 || Reg == PPC::RM) continue; 1294 1295 // This is a bit of a hack: CR2LT, CR2GT, CR2EQ and CR2UN are just 1296 // subregisters of CR2. We just need to emit a move of CR2. 1297 if (PPC::CRBITRCRegClass.contains(Reg)) 1298 continue; 1299 1300 if ((Reg == PPC::X2 || Reg == PPC::R2) && MustSaveTOC) 1301 continue; 1302 1303 // For SVR4, don't emit a move for the CR spill slot if we haven't 1304 // spilled CRs. 1305 if (isSVR4ABI && (PPC::CR2 <= Reg && Reg <= PPC::CR4) 1306 && !MustSaveCR) 1307 continue; 1308 1309 // For 64-bit SVR4 when we have spilled CRs, the spill location 1310 // is SP+8, not a frame-relative slot. 1311 if (isSVR4ABI && isPPC64 && (PPC::CR2 <= Reg && Reg <= PPC::CR4)) { 1312 // In the ELFv1 ABI, only CR2 is noted in CFI and stands in for 1313 // the whole CR word. In the ELFv2 ABI, every CR that was 1314 // actually saved gets its own CFI record. 1315 unsigned CRReg = isELFv2ABI? Reg : (unsigned) PPC::CR2; 1316 unsigned CFIIndex = MF.addFrameInst(MCCFIInstruction::createOffset( 1317 nullptr, MRI->getDwarfRegNum(CRReg, true), getCRSaveOffset())); 1318 BuildMI(MBB, MBBI, dl, TII.get(TargetOpcode::CFI_INSTRUCTION)) 1319 .addCFIIndex(CFIIndex); 1320 continue; 1321 } 1322 1323 if (CSI[I].isSpilledToReg()) { 1324 unsigned SpilledReg = CSI[I].getDstReg(); 1325 unsigned CFIRegister = MF.addFrameInst(MCCFIInstruction::createRegister( 1326 nullptr, MRI->getDwarfRegNum(Reg, true), 1327 MRI->getDwarfRegNum(SpilledReg, true))); 1328 BuildMI(MBB, MBBI, dl, TII.get(TargetOpcode::CFI_INSTRUCTION)) 1329 .addCFIIndex(CFIRegister); 1330 } else { 1331 int Offset = MFI.getObjectOffset(CSI[I].getFrameIdx()); 1332 // We have changed the object offset above but we do not want to change 1333 // the actual offsets in the CFI instruction so we have to undo the 1334 // offset change here. 1335 if (MovingStackUpdateDown) 1336 Offset -= NegFrameSize; 1337 1338 unsigned CFIIndex = MF.addFrameInst(MCCFIInstruction::createOffset( 1339 nullptr, MRI->getDwarfRegNum(Reg, true), Offset)); 1340 BuildMI(MBB, MBBI, dl, TII.get(TargetOpcode::CFI_INSTRUCTION)) 1341 .addCFIIndex(CFIIndex); 1342 } 1343 } 1344 } 1345 } 1346 1347 void PPCFrameLowering::emitEpilogue(MachineFunction &MF, 1348 MachineBasicBlock &MBB) const { 1349 MachineBasicBlock::iterator MBBI = MBB.getFirstTerminator(); 1350 DebugLoc dl; 1351 1352 if (MBBI != MBB.end()) 1353 dl = MBBI->getDebugLoc(); 1354 1355 const PPCInstrInfo &TII = *Subtarget.getInstrInfo(); 1356 const PPCRegisterInfo *RegInfo = Subtarget.getRegisterInfo(); 1357 1358 // Get alignment info so we know how to restore the SP. 1359 const MachineFrameInfo &MFI = MF.getFrameInfo(); 1360 1361 // Get the number of bytes allocated from the FrameInfo. 1362 int FrameSize = MFI.getStackSize(); 1363 1364 // Get processor type. 1365 bool isPPC64 = Subtarget.isPPC64(); 1366 // Get the ABI. 1367 bool isSVR4ABI = Subtarget.isSVR4ABI(); 1368 1369 // Check if the link register (LR) has been saved. 1370 PPCFunctionInfo *FI = MF.getInfo<PPCFunctionInfo>(); 1371 bool MustSaveLR = FI->mustSaveLR(); 1372 const SmallVectorImpl<unsigned> &MustSaveCRs = FI->getMustSaveCRs(); 1373 bool MustSaveCR = !MustSaveCRs.empty(); 1374 // Do we have a frame pointer and/or base pointer for this function? 1375 bool HasFP = hasFP(MF); 1376 bool HasBP = RegInfo->hasBasePointer(MF); 1377 bool HasRedZone = Subtarget.isPPC64() || !Subtarget.isSVR4ABI(); 1378 1379 unsigned SPReg = isPPC64 ? PPC::X1 : PPC::R1; 1380 Register BPReg = RegInfo->getBaseRegister(MF); 1381 unsigned FPReg = isPPC64 ? PPC::X31 : PPC::R31; 1382 unsigned ScratchReg = 0; 1383 unsigned TempReg = isPPC64 ? PPC::X12 : PPC::R12; // another scratch reg 1384 const MCInstrDesc& MTLRInst = TII.get( isPPC64 ? PPC::MTLR8 1385 : PPC::MTLR ); 1386 const MCInstrDesc& LoadInst = TII.get( isPPC64 ? PPC::LD 1387 : PPC::LWZ ); 1388 const MCInstrDesc& LoadImmShiftedInst = TII.get( isPPC64 ? PPC::LIS8 1389 : PPC::LIS ); 1390 const MCInstrDesc& OrInst = TII.get(isPPC64 ? PPC::OR8 1391 : PPC::OR ); 1392 const MCInstrDesc& OrImmInst = TII.get( isPPC64 ? PPC::ORI8 1393 : PPC::ORI ); 1394 const MCInstrDesc& AddImmInst = TII.get( isPPC64 ? PPC::ADDI8 1395 : PPC::ADDI ); 1396 const MCInstrDesc& AddInst = TII.get( isPPC64 ? PPC::ADD8 1397 : PPC::ADD4 ); 1398 1399 int LROffset = getReturnSaveOffset(); 1400 1401 int FPOffset = 0; 1402 1403 // Using the same bool variable as below to suppress compiler warnings. 1404 bool SingleScratchReg = findScratchRegister(&MBB, true, false, &ScratchReg, 1405 &TempReg); 1406 assert(SingleScratchReg && 1407 "Could not find an available scratch register"); 1408 1409 SingleScratchReg = ScratchReg == TempReg; 1410 1411 if (HasFP) { 1412 if (isSVR4ABI) { 1413 int FPIndex = FI->getFramePointerSaveIndex(); 1414 assert(FPIndex && "No Frame Pointer Save Slot!"); 1415 FPOffset = MFI.getObjectOffset(FPIndex); 1416 } else { 1417 FPOffset = getFramePointerSaveOffset(); 1418 } 1419 } 1420 1421 int BPOffset = 0; 1422 if (HasBP) { 1423 if (isSVR4ABI) { 1424 int BPIndex = FI->getBasePointerSaveIndex(); 1425 assert(BPIndex && "No Base Pointer Save Slot!"); 1426 BPOffset = MFI.getObjectOffset(BPIndex); 1427 } else { 1428 BPOffset = getBasePointerSaveOffset(); 1429 } 1430 } 1431 1432 int PBPOffset = 0; 1433 if (FI->usesPICBase()) { 1434 int PBPIndex = FI->getPICBasePointerSaveIndex(); 1435 assert(PBPIndex && "No PIC Base Pointer Save Slot!"); 1436 PBPOffset = MFI.getObjectOffset(PBPIndex); 1437 } 1438 1439 bool IsReturnBlock = (MBBI != MBB.end() && MBBI->isReturn()); 1440 1441 if (IsReturnBlock) { 1442 unsigned RetOpcode = MBBI->getOpcode(); 1443 bool UsesTCRet = RetOpcode == PPC::TCRETURNri || 1444 RetOpcode == PPC::TCRETURNdi || 1445 RetOpcode == PPC::TCRETURNai || 1446 RetOpcode == PPC::TCRETURNri8 || 1447 RetOpcode == PPC::TCRETURNdi8 || 1448 RetOpcode == PPC::TCRETURNai8; 1449 1450 if (UsesTCRet) { 1451 int MaxTCRetDelta = FI->getTailCallSPDelta(); 1452 MachineOperand &StackAdjust = MBBI->getOperand(1); 1453 assert(StackAdjust.isImm() && "Expecting immediate value."); 1454 // Adjust stack pointer. 1455 int StackAdj = StackAdjust.getImm(); 1456 int Delta = StackAdj - MaxTCRetDelta; 1457 assert((Delta >= 0) && "Delta must be positive"); 1458 if (MaxTCRetDelta>0) 1459 FrameSize += (StackAdj +Delta); 1460 else 1461 FrameSize += StackAdj; 1462 } 1463 } 1464 1465 // Frames of 32KB & larger require special handling because they cannot be 1466 // indexed into with a simple LD/LWZ immediate offset operand. 1467 bool isLargeFrame = !isInt<16>(FrameSize); 1468 1469 // On targets without red zone, the SP needs to be restored last, so that 1470 // all live contents of the stack frame are upwards of the SP. This means 1471 // that we cannot restore SP just now, since there may be more registers 1472 // to restore from the stack frame (e.g. R31). If the frame size is not 1473 // a simple immediate value, we will need a spare register to hold the 1474 // restored SP. If the frame size is known and small, we can simply adjust 1475 // the offsets of the registers to be restored, and still use SP to restore 1476 // them. In such case, the final update of SP will be to add the frame 1477 // size to it. 1478 // To simplify the code, set RBReg to the base register used to restore 1479 // values from the stack, and set SPAdd to the value that needs to be added 1480 // to the SP at the end. The default values are as if red zone was present. 1481 unsigned RBReg = SPReg; 1482 unsigned SPAdd = 0; 1483 1484 // Check if we can move the stack update instruction up the epilogue 1485 // past the callee saves. This will allow the move to LR instruction 1486 // to be executed before the restores of the callee saves which means 1487 // that the callee saves can hide the latency from the MTLR instrcution. 1488 MachineBasicBlock::iterator StackUpdateLoc = MBBI; 1489 if (stackUpdateCanBeMoved(MF)) { 1490 const std::vector<CalleeSavedInfo> & Info = MFI.getCalleeSavedInfo(); 1491 for (CalleeSavedInfo CSI : Info) { 1492 int FrIdx = CSI.getFrameIdx(); 1493 // If the frame index is not negative the callee saved info belongs to a 1494 // stack object that is not a fixed stack object. We ignore non-fixed 1495 // stack objects because we won't move the update of the stack pointer 1496 // past them. 1497 if (FrIdx >= 0) 1498 continue; 1499 1500 if (MFI.isFixedObjectIndex(FrIdx) && MFI.getObjectOffset(FrIdx) < 0) 1501 StackUpdateLoc--; 1502 else { 1503 // Abort the operation as we can't update all CSR restores. 1504 StackUpdateLoc = MBBI; 1505 break; 1506 } 1507 } 1508 } 1509 1510 if (FrameSize) { 1511 // In the prologue, the loaded (or persistent) stack pointer value is 1512 // offset by the STDU/STDUX/STWU/STWUX instruction. For targets with red 1513 // zone add this offset back now. 1514 1515 // If this function contained a fastcc call and GuaranteedTailCallOpt is 1516 // enabled (=> hasFastCall()==true) the fastcc call might contain a tail 1517 // call which invalidates the stack pointer value in SP(0). So we use the 1518 // value of R31 in this case. 1519 if (FI->hasFastCall()) { 1520 assert(HasFP && "Expecting a valid frame pointer."); 1521 if (!HasRedZone) 1522 RBReg = FPReg; 1523 if (!isLargeFrame) { 1524 BuildMI(MBB, MBBI, dl, AddImmInst, RBReg) 1525 .addReg(FPReg).addImm(FrameSize); 1526 } else { 1527 BuildMI(MBB, MBBI, dl, LoadImmShiftedInst, ScratchReg) 1528 .addImm(FrameSize >> 16); 1529 BuildMI(MBB, MBBI, dl, OrImmInst, ScratchReg) 1530 .addReg(ScratchReg, RegState::Kill) 1531 .addImm(FrameSize & 0xFFFF); 1532 BuildMI(MBB, MBBI, dl, AddInst) 1533 .addReg(RBReg) 1534 .addReg(FPReg) 1535 .addReg(ScratchReg); 1536 } 1537 } else if (!isLargeFrame && !HasBP && !MFI.hasVarSizedObjects()) { 1538 if (HasRedZone) { 1539 BuildMI(MBB, StackUpdateLoc, dl, AddImmInst, SPReg) 1540 .addReg(SPReg) 1541 .addImm(FrameSize); 1542 } else { 1543 // Make sure that adding FrameSize will not overflow the max offset 1544 // size. 1545 assert(FPOffset <= 0 && BPOffset <= 0 && PBPOffset <= 0 && 1546 "Local offsets should be negative"); 1547 SPAdd = FrameSize; 1548 FPOffset += FrameSize; 1549 BPOffset += FrameSize; 1550 PBPOffset += FrameSize; 1551 } 1552 } else { 1553 // We don't want to use ScratchReg as a base register, because it 1554 // could happen to be R0. Use FP instead, but make sure to preserve it. 1555 if (!HasRedZone) { 1556 // If FP is not saved, copy it to ScratchReg. 1557 if (!HasFP) 1558 BuildMI(MBB, MBBI, dl, OrInst, ScratchReg) 1559 .addReg(FPReg) 1560 .addReg(FPReg); 1561 RBReg = FPReg; 1562 } 1563 BuildMI(MBB, StackUpdateLoc, dl, LoadInst, RBReg) 1564 .addImm(0) 1565 .addReg(SPReg); 1566 } 1567 } 1568 assert(RBReg != ScratchReg && "Should have avoided ScratchReg"); 1569 // If there is no red zone, ScratchReg may be needed for holding a useful 1570 // value (although not the base register). Make sure it is not overwritten 1571 // too early. 1572 1573 assert((isPPC64 || !MustSaveCR) && 1574 "Epilogue CR restoring supported only in 64-bit mode"); 1575 1576 // If we need to restore both the LR and the CR and we only have one 1577 // available scratch register, we must do them one at a time. 1578 if (MustSaveCR && SingleScratchReg && MustSaveLR) { 1579 // Here TempReg == ScratchReg, and in the absence of red zone ScratchReg 1580 // is live here. 1581 assert(HasRedZone && "Expecting red zone"); 1582 BuildMI(MBB, MBBI, dl, TII.get(PPC::LWZ8), TempReg) 1583 .addImm(getCRSaveOffset()) 1584 .addReg(SPReg); 1585 for (unsigned i = 0, e = MustSaveCRs.size(); i != e; ++i) 1586 BuildMI(MBB, MBBI, dl, TII.get(PPC::MTOCRF8), MustSaveCRs[i]) 1587 .addReg(TempReg, getKillRegState(i == e-1)); 1588 } 1589 1590 // Delay restoring of the LR if ScratchReg is needed. This is ok, since 1591 // LR is stored in the caller's stack frame. ScratchReg will be needed 1592 // if RBReg is anything other than SP. We shouldn't use ScratchReg as 1593 // a base register anyway, because it may happen to be R0. 1594 bool LoadedLR = false; 1595 if (MustSaveLR && RBReg == SPReg && isInt<16>(LROffset+SPAdd)) { 1596 BuildMI(MBB, StackUpdateLoc, dl, LoadInst, ScratchReg) 1597 .addImm(LROffset+SPAdd) 1598 .addReg(RBReg); 1599 LoadedLR = true; 1600 } 1601 1602 if (MustSaveCR && !(SingleScratchReg && MustSaveLR)) { 1603 // This will only occur for PPC64. 1604 assert(isPPC64 && "Expecting 64-bit mode"); 1605 assert(RBReg == SPReg && "Should be using SP as a base register"); 1606 BuildMI(MBB, MBBI, dl, TII.get(PPC::LWZ8), TempReg) 1607 .addImm(getCRSaveOffset()) 1608 .addReg(RBReg); 1609 } 1610 1611 if (HasFP) { 1612 // If there is red zone, restore FP directly, since SP has already been 1613 // restored. Otherwise, restore the value of FP into ScratchReg. 1614 if (HasRedZone || RBReg == SPReg) 1615 BuildMI(MBB, MBBI, dl, LoadInst, FPReg) 1616 .addImm(FPOffset) 1617 .addReg(SPReg); 1618 else 1619 BuildMI(MBB, MBBI, dl, LoadInst, ScratchReg) 1620 .addImm(FPOffset) 1621 .addReg(RBReg); 1622 } 1623 1624 if (FI->usesPICBase()) 1625 BuildMI(MBB, MBBI, dl, LoadInst, PPC::R30) 1626 .addImm(PBPOffset) 1627 .addReg(RBReg); 1628 1629 if (HasBP) 1630 BuildMI(MBB, MBBI, dl, LoadInst, BPReg) 1631 .addImm(BPOffset) 1632 .addReg(RBReg); 1633 1634 // There is nothing more to be loaded from the stack, so now we can 1635 // restore SP: SP = RBReg + SPAdd. 1636 if (RBReg != SPReg || SPAdd != 0) { 1637 assert(!HasRedZone && "This should not happen with red zone"); 1638 // If SPAdd is 0, generate a copy. 1639 if (SPAdd == 0) 1640 BuildMI(MBB, MBBI, dl, OrInst, SPReg) 1641 .addReg(RBReg) 1642 .addReg(RBReg); 1643 else 1644 BuildMI(MBB, MBBI, dl, AddImmInst, SPReg) 1645 .addReg(RBReg) 1646 .addImm(SPAdd); 1647 1648 assert(RBReg != ScratchReg && "Should be using FP or SP as base register"); 1649 if (RBReg == FPReg) 1650 BuildMI(MBB, MBBI, dl, OrInst, FPReg) 1651 .addReg(ScratchReg) 1652 .addReg(ScratchReg); 1653 1654 // Now load the LR from the caller's stack frame. 1655 if (MustSaveLR && !LoadedLR) 1656 BuildMI(MBB, MBBI, dl, LoadInst, ScratchReg) 1657 .addImm(LROffset) 1658 .addReg(SPReg); 1659 } 1660 1661 if (MustSaveCR && 1662 !(SingleScratchReg && MustSaveLR)) // will only occur for PPC64 1663 for (unsigned i = 0, e = MustSaveCRs.size(); i != e; ++i) 1664 BuildMI(MBB, MBBI, dl, TII.get(PPC::MTOCRF8), MustSaveCRs[i]) 1665 .addReg(TempReg, getKillRegState(i == e-1)); 1666 1667 if (MustSaveLR) 1668 BuildMI(MBB, StackUpdateLoc, dl, MTLRInst).addReg(ScratchReg); 1669 1670 // Callee pop calling convention. Pop parameter/linkage area. Used for tail 1671 // call optimization 1672 if (IsReturnBlock) { 1673 unsigned RetOpcode = MBBI->getOpcode(); 1674 if (MF.getTarget().Options.GuaranteedTailCallOpt && 1675 (RetOpcode == PPC::BLR || RetOpcode == PPC::BLR8) && 1676 MF.getFunction().getCallingConv() == CallingConv::Fast) { 1677 PPCFunctionInfo *FI = MF.getInfo<PPCFunctionInfo>(); 1678 unsigned CallerAllocatedAmt = FI->getMinReservedArea(); 1679 1680 if (CallerAllocatedAmt && isInt<16>(CallerAllocatedAmt)) { 1681 BuildMI(MBB, MBBI, dl, AddImmInst, SPReg) 1682 .addReg(SPReg).addImm(CallerAllocatedAmt); 1683 } else { 1684 BuildMI(MBB, MBBI, dl, LoadImmShiftedInst, ScratchReg) 1685 .addImm(CallerAllocatedAmt >> 16); 1686 BuildMI(MBB, MBBI, dl, OrImmInst, ScratchReg) 1687 .addReg(ScratchReg, RegState::Kill) 1688 .addImm(CallerAllocatedAmt & 0xFFFF); 1689 BuildMI(MBB, MBBI, dl, AddInst) 1690 .addReg(SPReg) 1691 .addReg(FPReg) 1692 .addReg(ScratchReg); 1693 } 1694 } else { 1695 createTailCallBranchInstr(MBB); 1696 } 1697 } 1698 } 1699 1700 void PPCFrameLowering::createTailCallBranchInstr(MachineBasicBlock &MBB) const { 1701 MachineBasicBlock::iterator MBBI = MBB.getFirstTerminator(); 1702 1703 // If we got this far a first terminator should exist. 1704 assert(MBBI != MBB.end() && "Failed to find the first terminator."); 1705 1706 DebugLoc dl = MBBI->getDebugLoc(); 1707 const PPCInstrInfo &TII = *Subtarget.getInstrInfo(); 1708 1709 // Create branch instruction for pseudo tail call return instruction 1710 unsigned RetOpcode = MBBI->getOpcode(); 1711 if (RetOpcode == PPC::TCRETURNdi) { 1712 MBBI = MBB.getLastNonDebugInstr(); 1713 MachineOperand &JumpTarget = MBBI->getOperand(0); 1714 BuildMI(MBB, MBBI, dl, TII.get(PPC::TAILB)). 1715 addGlobalAddress(JumpTarget.getGlobal(), JumpTarget.getOffset()); 1716 } else if (RetOpcode == PPC::TCRETURNri) { 1717 MBBI = MBB.getLastNonDebugInstr(); 1718 assert(MBBI->getOperand(0).isReg() && "Expecting register operand."); 1719 BuildMI(MBB, MBBI, dl, TII.get(PPC::TAILBCTR)); 1720 } else if (RetOpcode == PPC::TCRETURNai) { 1721 MBBI = MBB.getLastNonDebugInstr(); 1722 MachineOperand &JumpTarget = MBBI->getOperand(0); 1723 BuildMI(MBB, MBBI, dl, TII.get(PPC::TAILBA)).addImm(JumpTarget.getImm()); 1724 } else if (RetOpcode == PPC::TCRETURNdi8) { 1725 MBBI = MBB.getLastNonDebugInstr(); 1726 MachineOperand &JumpTarget = MBBI->getOperand(0); 1727 BuildMI(MBB, MBBI, dl, TII.get(PPC::TAILB8)). 1728 addGlobalAddress(JumpTarget.getGlobal(), JumpTarget.getOffset()); 1729 } else if (RetOpcode == PPC::TCRETURNri8) { 1730 MBBI = MBB.getLastNonDebugInstr(); 1731 assert(MBBI->getOperand(0).isReg() && "Expecting register operand."); 1732 BuildMI(MBB, MBBI, dl, TII.get(PPC::TAILBCTR8)); 1733 } else if (RetOpcode == PPC::TCRETURNai8) { 1734 MBBI = MBB.getLastNonDebugInstr(); 1735 MachineOperand &JumpTarget = MBBI->getOperand(0); 1736 BuildMI(MBB, MBBI, dl, TII.get(PPC::TAILBA8)).addImm(JumpTarget.getImm()); 1737 } 1738 } 1739 1740 void PPCFrameLowering::determineCalleeSaves(MachineFunction &MF, 1741 BitVector &SavedRegs, 1742 RegScavenger *RS) const { 1743 TargetFrameLowering::determineCalleeSaves(MF, SavedRegs, RS); 1744 1745 const PPCRegisterInfo *RegInfo = Subtarget.getRegisterInfo(); 1746 1747 // Save and clear the LR state. 1748 PPCFunctionInfo *FI = MF.getInfo<PPCFunctionInfo>(); 1749 unsigned LR = RegInfo->getRARegister(); 1750 FI->setMustSaveLR(MustSaveLR(MF, LR)); 1751 SavedRegs.reset(LR); 1752 1753 // Save R31 if necessary 1754 int FPSI = FI->getFramePointerSaveIndex(); 1755 const bool isPPC64 = Subtarget.isPPC64(); 1756 MachineFrameInfo &MFI = MF.getFrameInfo(); 1757 1758 // If the frame pointer save index hasn't been defined yet. 1759 if (!FPSI && needsFP(MF)) { 1760 // Find out what the fix offset of the frame pointer save area. 1761 int FPOffset = getFramePointerSaveOffset(); 1762 // Allocate the frame index for frame pointer save area. 1763 FPSI = MFI.CreateFixedObject(isPPC64? 8 : 4, FPOffset, true); 1764 // Save the result. 1765 FI->setFramePointerSaveIndex(FPSI); 1766 } 1767 1768 int BPSI = FI->getBasePointerSaveIndex(); 1769 if (!BPSI && RegInfo->hasBasePointer(MF)) { 1770 int BPOffset = getBasePointerSaveOffset(); 1771 // Allocate the frame index for the base pointer save area. 1772 BPSI = MFI.CreateFixedObject(isPPC64? 8 : 4, BPOffset, true); 1773 // Save the result. 1774 FI->setBasePointerSaveIndex(BPSI); 1775 } 1776 1777 // Reserve stack space for the PIC Base register (R30). 1778 // Only used in SVR4 32-bit. 1779 if (FI->usesPICBase()) { 1780 int PBPSI = MFI.CreateFixedObject(4, -8, true); 1781 FI->setPICBasePointerSaveIndex(PBPSI); 1782 } 1783 1784 // Make sure we don't explicitly spill r31, because, for example, we have 1785 // some inline asm which explicitly clobbers it, when we otherwise have a 1786 // frame pointer and are using r31's spill slot for the prologue/epilogue 1787 // code. Same goes for the base pointer and the PIC base register. 1788 if (needsFP(MF)) 1789 SavedRegs.reset(isPPC64 ? PPC::X31 : PPC::R31); 1790 if (RegInfo->hasBasePointer(MF)) 1791 SavedRegs.reset(RegInfo->getBaseRegister(MF)); 1792 if (FI->usesPICBase()) 1793 SavedRegs.reset(PPC::R30); 1794 1795 // Reserve stack space to move the linkage area to in case of a tail call. 1796 int TCSPDelta = 0; 1797 if (MF.getTarget().Options.GuaranteedTailCallOpt && 1798 (TCSPDelta = FI->getTailCallSPDelta()) < 0) { 1799 MFI.CreateFixedObject(-1 * TCSPDelta, TCSPDelta, true); 1800 } 1801 1802 // For 32-bit SVR4, allocate the nonvolatile CR spill slot iff the 1803 // function uses CR 2, 3, or 4. For 64-bit SVR4 we create a FixedStack 1804 // object at the offset of the CR-save slot in the linkage area. The actual 1805 // save and restore of the condition register will be created as part of the 1806 // prologue and epilogue insertion, but the FixedStack object is needed to 1807 // keep the CalleSavedInfo valid. 1808 if (Subtarget.isSVR4ABI() && 1809 (SavedRegs.test(PPC::CR2) || SavedRegs.test(PPC::CR3) || 1810 SavedRegs.test(PPC::CR4))) { 1811 const uint64_t SpillSize = 4; // Condition register is always 4 bytes. 1812 const int64_t SpillOffset = Subtarget.isPPC64() ? 8 : -4; 1813 int FrameIdx = 1814 MFI.CreateFixedObject(SpillSize, SpillOffset, 1815 /* IsImmutable */ true, /* IsAliased */ false); 1816 FI->setCRSpillFrameIndex(FrameIdx); 1817 } 1818 } 1819 1820 void PPCFrameLowering::processFunctionBeforeFrameFinalized(MachineFunction &MF, 1821 RegScavenger *RS) const { 1822 // Early exit if not using the SVR4 ABI. 1823 if (!Subtarget.isSVR4ABI()) { 1824 addScavengingSpillSlot(MF, RS); 1825 return; 1826 } 1827 1828 // Get callee saved register information. 1829 MachineFrameInfo &MFI = MF.getFrameInfo(); 1830 const std::vector<CalleeSavedInfo> &CSI = MFI.getCalleeSavedInfo(); 1831 1832 // If the function is shrink-wrapped, and if the function has a tail call, the 1833 // tail call might not be in the new RestoreBlock, so real branch instruction 1834 // won't be generated by emitEpilogue(), because shrink-wrap has chosen new 1835 // RestoreBlock. So we handle this case here. 1836 if (MFI.getSavePoint() && MFI.hasTailCall()) { 1837 MachineBasicBlock *RestoreBlock = MFI.getRestorePoint(); 1838 for (MachineBasicBlock &MBB : MF) { 1839 if (MBB.isReturnBlock() && (&MBB) != RestoreBlock) 1840 createTailCallBranchInstr(MBB); 1841 } 1842 } 1843 1844 // Early exit if no callee saved registers are modified! 1845 if (CSI.empty() && !needsFP(MF)) { 1846 addScavengingSpillSlot(MF, RS); 1847 return; 1848 } 1849 1850 unsigned MinGPR = PPC::R31; 1851 unsigned MinG8R = PPC::X31; 1852 unsigned MinFPR = PPC::F31; 1853 unsigned MinVR = Subtarget.hasSPE() ? PPC::S31 : PPC::V31; 1854 1855 bool HasGPSaveArea = false; 1856 bool HasG8SaveArea = false; 1857 bool HasFPSaveArea = false; 1858 bool HasVRSAVESaveArea = false; 1859 bool HasVRSaveArea = false; 1860 1861 SmallVector<CalleeSavedInfo, 18> GPRegs; 1862 SmallVector<CalleeSavedInfo, 18> G8Regs; 1863 SmallVector<CalleeSavedInfo, 18> FPRegs; 1864 SmallVector<CalleeSavedInfo, 18> VRegs; 1865 1866 for (unsigned i = 0, e = CSI.size(); i != e; ++i) { 1867 unsigned Reg = CSI[i].getReg(); 1868 assert((!MF.getInfo<PPCFunctionInfo>()->mustSaveTOC() || 1869 (Reg != PPC::X2 && Reg != PPC::R2)) && 1870 "Not expecting to try to spill R2 in a function that must save TOC"); 1871 if (PPC::GPRCRegClass.contains(Reg)) { 1872 HasGPSaveArea = true; 1873 1874 GPRegs.push_back(CSI[i]); 1875 1876 if (Reg < MinGPR) { 1877 MinGPR = Reg; 1878 } 1879 } else if (PPC::G8RCRegClass.contains(Reg)) { 1880 HasG8SaveArea = true; 1881 1882 G8Regs.push_back(CSI[i]); 1883 1884 if (Reg < MinG8R) { 1885 MinG8R = Reg; 1886 } 1887 } else if (PPC::F8RCRegClass.contains(Reg)) { 1888 HasFPSaveArea = true; 1889 1890 FPRegs.push_back(CSI[i]); 1891 1892 if (Reg < MinFPR) { 1893 MinFPR = Reg; 1894 } 1895 } else if (PPC::CRBITRCRegClass.contains(Reg) || 1896 PPC::CRRCRegClass.contains(Reg)) { 1897 ; // do nothing, as we already know whether CRs are spilled 1898 } else if (PPC::VRSAVERCRegClass.contains(Reg)) { 1899 HasVRSAVESaveArea = true; 1900 } else if (PPC::VRRCRegClass.contains(Reg) || 1901 PPC::SPERCRegClass.contains(Reg)) { 1902 // Altivec and SPE are mutually exclusive, but have the same stack 1903 // alignment requirements, so overload the save area for both cases. 1904 HasVRSaveArea = true; 1905 1906 VRegs.push_back(CSI[i]); 1907 1908 if (Reg < MinVR) { 1909 MinVR = Reg; 1910 } 1911 } else { 1912 llvm_unreachable("Unknown RegisterClass!"); 1913 } 1914 } 1915 1916 PPCFunctionInfo *PFI = MF.getInfo<PPCFunctionInfo>(); 1917 const TargetRegisterInfo *TRI = Subtarget.getRegisterInfo(); 1918 1919 int64_t LowerBound = 0; 1920 1921 // Take into account stack space reserved for tail calls. 1922 int TCSPDelta = 0; 1923 if (MF.getTarget().Options.GuaranteedTailCallOpt && 1924 (TCSPDelta = PFI->getTailCallSPDelta()) < 0) { 1925 LowerBound = TCSPDelta; 1926 } 1927 1928 // The Floating-point register save area is right below the back chain word 1929 // of the previous stack frame. 1930 if (HasFPSaveArea) { 1931 for (unsigned i = 0, e = FPRegs.size(); i != e; ++i) { 1932 int FI = FPRegs[i].getFrameIdx(); 1933 1934 MFI.setObjectOffset(FI, LowerBound + MFI.getObjectOffset(FI)); 1935 } 1936 1937 LowerBound -= (31 - TRI->getEncodingValue(MinFPR) + 1) * 8; 1938 } 1939 1940 // Check whether the frame pointer register is allocated. If so, make sure it 1941 // is spilled to the correct offset. 1942 if (needsFP(MF)) { 1943 int FI = PFI->getFramePointerSaveIndex(); 1944 assert(FI && "No Frame Pointer Save Slot!"); 1945 MFI.setObjectOffset(FI, LowerBound + MFI.getObjectOffset(FI)); 1946 // FP is R31/X31, so no need to update MinGPR/MinG8R. 1947 HasGPSaveArea = true; 1948 } 1949 1950 if (PFI->usesPICBase()) { 1951 int FI = PFI->getPICBasePointerSaveIndex(); 1952 assert(FI && "No PIC Base Pointer Save Slot!"); 1953 MFI.setObjectOffset(FI, LowerBound + MFI.getObjectOffset(FI)); 1954 1955 MinGPR = std::min<unsigned>(MinGPR, PPC::R30); 1956 HasGPSaveArea = true; 1957 } 1958 1959 const PPCRegisterInfo *RegInfo = Subtarget.getRegisterInfo(); 1960 if (RegInfo->hasBasePointer(MF)) { 1961 int FI = PFI->getBasePointerSaveIndex(); 1962 assert(FI && "No Base Pointer Save Slot!"); 1963 MFI.setObjectOffset(FI, LowerBound + MFI.getObjectOffset(FI)); 1964 1965 Register BP = RegInfo->getBaseRegister(MF); 1966 if (PPC::G8RCRegClass.contains(BP)) { 1967 MinG8R = std::min<unsigned>(MinG8R, BP); 1968 HasG8SaveArea = true; 1969 } else if (PPC::GPRCRegClass.contains(BP)) { 1970 MinGPR = std::min<unsigned>(MinGPR, BP); 1971 HasGPSaveArea = true; 1972 } 1973 } 1974 1975 // General register save area starts right below the Floating-point 1976 // register save area. 1977 if (HasGPSaveArea || HasG8SaveArea) { 1978 // Move general register save area spill slots down, taking into account 1979 // the size of the Floating-point register save area. 1980 for (unsigned i = 0, e = GPRegs.size(); i != e; ++i) { 1981 if (!GPRegs[i].isSpilledToReg()) { 1982 int FI = GPRegs[i].getFrameIdx(); 1983 MFI.setObjectOffset(FI, LowerBound + MFI.getObjectOffset(FI)); 1984 } 1985 } 1986 1987 // Move general register save area spill slots down, taking into account 1988 // the size of the Floating-point register save area. 1989 for (unsigned i = 0, e = G8Regs.size(); i != e; ++i) { 1990 if (!G8Regs[i].isSpilledToReg()) { 1991 int FI = G8Regs[i].getFrameIdx(); 1992 MFI.setObjectOffset(FI, LowerBound + MFI.getObjectOffset(FI)); 1993 } 1994 } 1995 1996 unsigned MinReg = 1997 std::min<unsigned>(TRI->getEncodingValue(MinGPR), 1998 TRI->getEncodingValue(MinG8R)); 1999 2000 if (Subtarget.isPPC64()) { 2001 LowerBound -= (31 - MinReg + 1) * 8; 2002 } else { 2003 LowerBound -= (31 - MinReg + 1) * 4; 2004 } 2005 } 2006 2007 // For 32-bit only, the CR save area is below the general register 2008 // save area. For 64-bit SVR4, the CR save area is addressed relative 2009 // to the stack pointer and hence does not need an adjustment here. 2010 // Only CR2 (the first nonvolatile spilled) has an associated frame 2011 // index so that we have a single uniform save area. 2012 if (spillsCR(MF) && !(Subtarget.isPPC64() && Subtarget.isSVR4ABI())) { 2013 // Adjust the frame index of the CR spill slot. 2014 for (unsigned i = 0, e = CSI.size(); i != e; ++i) { 2015 unsigned Reg = CSI[i].getReg(); 2016 2017 if ((Subtarget.isSVR4ABI() && Reg == PPC::CR2) 2018 // Leave Darwin logic as-is. 2019 || (!Subtarget.isSVR4ABI() && 2020 (PPC::CRBITRCRegClass.contains(Reg) || 2021 PPC::CRRCRegClass.contains(Reg)))) { 2022 int FI = CSI[i].getFrameIdx(); 2023 2024 MFI.setObjectOffset(FI, LowerBound + MFI.getObjectOffset(FI)); 2025 } 2026 } 2027 2028 LowerBound -= 4; // The CR save area is always 4 bytes long. 2029 } 2030 2031 if (HasVRSAVESaveArea) { 2032 // FIXME SVR4: Is it actually possible to have multiple elements in CSI 2033 // which have the VRSAVE register class? 2034 // Adjust the frame index of the VRSAVE spill slot. 2035 for (unsigned i = 0, e = CSI.size(); i != e; ++i) { 2036 unsigned Reg = CSI[i].getReg(); 2037 2038 if (PPC::VRSAVERCRegClass.contains(Reg)) { 2039 int FI = CSI[i].getFrameIdx(); 2040 2041 MFI.setObjectOffset(FI, LowerBound + MFI.getObjectOffset(FI)); 2042 } 2043 } 2044 2045 LowerBound -= 4; // The VRSAVE save area is always 4 bytes long. 2046 } 2047 2048 // Both Altivec and SPE have the same alignment and padding requirements 2049 // within the stack frame. 2050 if (HasVRSaveArea) { 2051 // Insert alignment padding, we need 16-byte alignment. Note: for positive 2052 // number the alignment formula is : y = (x + (n-1)) & (~(n-1)). But since 2053 // we are using negative number here (the stack grows downward). We should 2054 // use formula : y = x & (~(n-1)). Where x is the size before aligning, n 2055 // is the alignment size ( n = 16 here) and y is the size after aligning. 2056 assert(LowerBound <= 0 && "Expect LowerBound have a non-positive value!"); 2057 LowerBound &= ~(15); 2058 2059 for (unsigned i = 0, e = VRegs.size(); i != e; ++i) { 2060 int FI = VRegs[i].getFrameIdx(); 2061 2062 MFI.setObjectOffset(FI, LowerBound + MFI.getObjectOffset(FI)); 2063 } 2064 } 2065 2066 addScavengingSpillSlot(MF, RS); 2067 } 2068 2069 void 2070 PPCFrameLowering::addScavengingSpillSlot(MachineFunction &MF, 2071 RegScavenger *RS) const { 2072 // Reserve a slot closest to SP or frame pointer if we have a dynalloc or 2073 // a large stack, which will require scavenging a register to materialize a 2074 // large offset. 2075 2076 // We need to have a scavenger spill slot for spills if the frame size is 2077 // large. In case there is no free register for large-offset addressing, 2078 // this slot is used for the necessary emergency spill. Also, we need the 2079 // slot for dynamic stack allocations. 2080 2081 // The scavenger might be invoked if the frame offset does not fit into 2082 // the 16-bit immediate. We don't know the complete frame size here 2083 // because we've not yet computed callee-saved register spills or the 2084 // needed alignment padding. 2085 unsigned StackSize = determineFrameLayout(MF, true); 2086 MachineFrameInfo &MFI = MF.getFrameInfo(); 2087 if (MFI.hasVarSizedObjects() || spillsCR(MF) || spillsVRSAVE(MF) || 2088 hasNonRISpills(MF) || (hasSpills(MF) && !isInt<16>(StackSize))) { 2089 const TargetRegisterClass &GPRC = PPC::GPRCRegClass; 2090 const TargetRegisterClass &G8RC = PPC::G8RCRegClass; 2091 const TargetRegisterClass &RC = Subtarget.isPPC64() ? G8RC : GPRC; 2092 const TargetRegisterInfo &TRI = *Subtarget.getRegisterInfo(); 2093 unsigned Size = TRI.getSpillSize(RC); 2094 unsigned Align = TRI.getSpillAlignment(RC); 2095 RS->addScavengingFrameIndex(MFI.CreateStackObject(Size, Align, false)); 2096 2097 // Might we have over-aligned allocas? 2098 bool HasAlVars = MFI.hasVarSizedObjects() && 2099 MFI.getMaxAlignment() > getStackAlignment(); 2100 2101 // These kinds of spills might need two registers. 2102 if (spillsCR(MF) || spillsVRSAVE(MF) || HasAlVars) 2103 RS->addScavengingFrameIndex(MFI.CreateStackObject(Size, Align, false)); 2104 2105 } 2106 } 2107 2108 // This function checks if a callee saved gpr can be spilled to a volatile 2109 // vector register. This occurs for leaf functions when the option 2110 // ppc-enable-pe-vector-spills is enabled. If there are any remaining registers 2111 // which were not spilled to vectors, return false so the target independent 2112 // code can handle them by assigning a FrameIdx to a stack slot. 2113 bool PPCFrameLowering::assignCalleeSavedSpillSlots( 2114 MachineFunction &MF, const TargetRegisterInfo *TRI, 2115 std::vector<CalleeSavedInfo> &CSI) const { 2116 2117 if (CSI.empty()) 2118 return true; // Early exit if no callee saved registers are modified! 2119 2120 // Early exit if cannot spill gprs to volatile vector registers. 2121 MachineFrameInfo &MFI = MF.getFrameInfo(); 2122 if (!EnablePEVectorSpills || MFI.hasCalls() || !Subtarget.hasP9Vector()) 2123 return false; 2124 2125 // Build a BitVector of VSRs that can be used for spilling GPRs. 2126 BitVector BVAllocatable = TRI->getAllocatableSet(MF); 2127 BitVector BVCalleeSaved(TRI->getNumRegs()); 2128 const PPCRegisterInfo *RegInfo = Subtarget.getRegisterInfo(); 2129 const MCPhysReg *CSRegs = RegInfo->getCalleeSavedRegs(&MF); 2130 for (unsigned i = 0; CSRegs[i]; ++i) 2131 BVCalleeSaved.set(CSRegs[i]); 2132 2133 for (unsigned Reg : BVAllocatable.set_bits()) { 2134 // Set to 0 if the register is not a volatile VF/F8 register, or if it is 2135 // used in the function. 2136 if (BVCalleeSaved[Reg] || 2137 (!PPC::F8RCRegClass.contains(Reg) && 2138 !PPC::VFRCRegClass.contains(Reg)) || 2139 (MF.getRegInfo().isPhysRegUsed(Reg))) 2140 BVAllocatable.reset(Reg); 2141 } 2142 2143 bool AllSpilledToReg = true; 2144 for (auto &CS : CSI) { 2145 if (BVAllocatable.none()) 2146 return false; 2147 2148 unsigned Reg = CS.getReg(); 2149 if (!PPC::G8RCRegClass.contains(Reg) && !PPC::GPRCRegClass.contains(Reg)) { 2150 AllSpilledToReg = false; 2151 continue; 2152 } 2153 2154 unsigned VolatileVFReg = BVAllocatable.find_first(); 2155 if (VolatileVFReg < BVAllocatable.size()) { 2156 CS.setDstReg(VolatileVFReg); 2157 BVAllocatable.reset(VolatileVFReg); 2158 } else { 2159 AllSpilledToReg = false; 2160 } 2161 } 2162 return AllSpilledToReg; 2163 } 2164 2165 2166 bool 2167 PPCFrameLowering::spillCalleeSavedRegisters(MachineBasicBlock &MBB, 2168 MachineBasicBlock::iterator MI, 2169 const std::vector<CalleeSavedInfo> &CSI, 2170 const TargetRegisterInfo *TRI) const { 2171 2172 // Currently, this function only handles SVR4 32- and 64-bit ABIs. 2173 // Return false otherwise to maintain pre-existing behavior. 2174 if (!Subtarget.isSVR4ABI()) 2175 return false; 2176 2177 MachineFunction *MF = MBB.getParent(); 2178 const PPCInstrInfo &TII = *Subtarget.getInstrInfo(); 2179 PPCFunctionInfo *FI = MF->getInfo<PPCFunctionInfo>(); 2180 bool MustSaveTOC = FI->mustSaveTOC(); 2181 DebugLoc DL; 2182 bool CRSpilled = false; 2183 MachineInstrBuilder CRMIB; 2184 2185 for (unsigned i = 0, e = CSI.size(); i != e; ++i) { 2186 unsigned Reg = CSI[i].getReg(); 2187 // VRSAVE can appear here if, for example, @llvm.eh.unwind.init() is used. 2188 if (Reg == PPC::VRSAVE) 2189 continue; 2190 2191 // CR2 through CR4 are the nonvolatile CR fields. 2192 bool IsCRField = PPC::CR2 <= Reg && Reg <= PPC::CR4; 2193 2194 // Add the callee-saved register as live-in; it's killed at the spill. 2195 // Do not do this for callee-saved registers that are live-in to the 2196 // function because they will already be marked live-in and this will be 2197 // adding it for a second time. It is an error to add the same register 2198 // to the set more than once. 2199 const MachineRegisterInfo &MRI = MF->getRegInfo(); 2200 bool IsLiveIn = MRI.isLiveIn(Reg); 2201 if (!IsLiveIn) 2202 MBB.addLiveIn(Reg); 2203 2204 if (CRSpilled && IsCRField) { 2205 CRMIB.addReg(Reg, RegState::ImplicitKill); 2206 continue; 2207 } 2208 2209 // The actual spill will happen in the prologue. 2210 if ((Reg == PPC::X2 || Reg == PPC::R2) && MustSaveTOC) 2211 continue; 2212 2213 // Insert the spill to the stack frame. 2214 if (IsCRField) { 2215 PPCFunctionInfo *FuncInfo = MF->getInfo<PPCFunctionInfo>(); 2216 if (Subtarget.isPPC64()) { 2217 // The actual spill will happen at the start of the prologue. 2218 FuncInfo->addMustSaveCR(Reg); 2219 } else { 2220 CRSpilled = true; 2221 FuncInfo->setSpillsCR(); 2222 2223 // 32-bit: FP-relative. Note that we made sure CR2-CR4 all have 2224 // the same frame index in PPCRegisterInfo::hasReservedSpillSlot. 2225 CRMIB = BuildMI(*MF, DL, TII.get(PPC::MFCR), PPC::R12) 2226 .addReg(Reg, RegState::ImplicitKill); 2227 2228 MBB.insert(MI, CRMIB); 2229 MBB.insert(MI, addFrameReference(BuildMI(*MF, DL, TII.get(PPC::STW)) 2230 .addReg(PPC::R12, 2231 getKillRegState(true)), 2232 CSI[i].getFrameIdx())); 2233 } 2234 } else { 2235 if (CSI[i].isSpilledToReg()) { 2236 NumPESpillVSR++; 2237 BuildMI(MBB, MI, DL, TII.get(PPC::MTVSRD), CSI[i].getDstReg()) 2238 .addReg(Reg, getKillRegState(true)); 2239 } else { 2240 const TargetRegisterClass *RC = TRI->getMinimalPhysRegClass(Reg); 2241 // Use !IsLiveIn for the kill flag. 2242 // We do not want to kill registers that are live in this function 2243 // before their use because they will become undefined registers. 2244 // Functions without NoUnwind need to preserve the order of elements in 2245 // saved vector registers. 2246 if (Subtarget.needsSwapsForVSXMemOps() && 2247 !MF->getFunction().hasFnAttribute(Attribute::NoUnwind)) 2248 TII.storeRegToStackSlotNoUpd(MBB, MI, Reg, !IsLiveIn, 2249 CSI[i].getFrameIdx(), RC, TRI); 2250 else 2251 TII.storeRegToStackSlot(MBB, MI, Reg, !IsLiveIn, CSI[i].getFrameIdx(), 2252 RC, TRI); 2253 } 2254 } 2255 } 2256 return true; 2257 } 2258 2259 static void 2260 restoreCRs(bool isPPC64, bool is31, 2261 bool CR2Spilled, bool CR3Spilled, bool CR4Spilled, 2262 MachineBasicBlock &MBB, MachineBasicBlock::iterator MI, 2263 const std::vector<CalleeSavedInfo> &CSI, unsigned CSIIndex) { 2264 2265 MachineFunction *MF = MBB.getParent(); 2266 const PPCInstrInfo &TII = *MF->getSubtarget<PPCSubtarget>().getInstrInfo(); 2267 DebugLoc DL; 2268 unsigned RestoreOp, MoveReg; 2269 2270 if (isPPC64) 2271 // This is handled during epilogue generation. 2272 return; 2273 else { 2274 // 32-bit: FP-relative 2275 MBB.insert(MI, addFrameReference(BuildMI(*MF, DL, TII.get(PPC::LWZ), 2276 PPC::R12), 2277 CSI[CSIIndex].getFrameIdx())); 2278 RestoreOp = PPC::MTOCRF; 2279 MoveReg = PPC::R12; 2280 } 2281 2282 if (CR2Spilled) 2283 MBB.insert(MI, BuildMI(*MF, DL, TII.get(RestoreOp), PPC::CR2) 2284 .addReg(MoveReg, getKillRegState(!CR3Spilled && !CR4Spilled))); 2285 2286 if (CR3Spilled) 2287 MBB.insert(MI, BuildMI(*MF, DL, TII.get(RestoreOp), PPC::CR3) 2288 .addReg(MoveReg, getKillRegState(!CR4Spilled))); 2289 2290 if (CR4Spilled) 2291 MBB.insert(MI, BuildMI(*MF, DL, TII.get(RestoreOp), PPC::CR4) 2292 .addReg(MoveReg, getKillRegState(true))); 2293 } 2294 2295 MachineBasicBlock::iterator PPCFrameLowering:: 2296 eliminateCallFramePseudoInstr(MachineFunction &MF, MachineBasicBlock &MBB, 2297 MachineBasicBlock::iterator I) const { 2298 const TargetInstrInfo &TII = *Subtarget.getInstrInfo(); 2299 if (MF.getTarget().Options.GuaranteedTailCallOpt && 2300 I->getOpcode() == PPC::ADJCALLSTACKUP) { 2301 // Add (actually subtract) back the amount the callee popped on return. 2302 if (int CalleeAmt = I->getOperand(1).getImm()) { 2303 bool is64Bit = Subtarget.isPPC64(); 2304 CalleeAmt *= -1; 2305 unsigned StackReg = is64Bit ? PPC::X1 : PPC::R1; 2306 unsigned TmpReg = is64Bit ? PPC::X0 : PPC::R0; 2307 unsigned ADDIInstr = is64Bit ? PPC::ADDI8 : PPC::ADDI; 2308 unsigned ADDInstr = is64Bit ? PPC::ADD8 : PPC::ADD4; 2309 unsigned LISInstr = is64Bit ? PPC::LIS8 : PPC::LIS; 2310 unsigned ORIInstr = is64Bit ? PPC::ORI8 : PPC::ORI; 2311 const DebugLoc &dl = I->getDebugLoc(); 2312 2313 if (isInt<16>(CalleeAmt)) { 2314 BuildMI(MBB, I, dl, TII.get(ADDIInstr), StackReg) 2315 .addReg(StackReg, RegState::Kill) 2316 .addImm(CalleeAmt); 2317 } else { 2318 MachineBasicBlock::iterator MBBI = I; 2319 BuildMI(MBB, MBBI, dl, TII.get(LISInstr), TmpReg) 2320 .addImm(CalleeAmt >> 16); 2321 BuildMI(MBB, MBBI, dl, TII.get(ORIInstr), TmpReg) 2322 .addReg(TmpReg, RegState::Kill) 2323 .addImm(CalleeAmt & 0xFFFF); 2324 BuildMI(MBB, MBBI, dl, TII.get(ADDInstr), StackReg) 2325 .addReg(StackReg, RegState::Kill) 2326 .addReg(TmpReg); 2327 } 2328 } 2329 } 2330 // Simply discard ADJCALLSTACKDOWN, ADJCALLSTACKUP instructions. 2331 return MBB.erase(I); 2332 } 2333 2334 bool 2335 PPCFrameLowering::restoreCalleeSavedRegisters(MachineBasicBlock &MBB, 2336 MachineBasicBlock::iterator MI, 2337 std::vector<CalleeSavedInfo> &CSI, 2338 const TargetRegisterInfo *TRI) const { 2339 2340 // Currently, this function only handles SVR4 32- and 64-bit ABIs. 2341 // Return false otherwise to maintain pre-existing behavior. 2342 if (!Subtarget.isSVR4ABI()) 2343 return false; 2344 2345 MachineFunction *MF = MBB.getParent(); 2346 const PPCInstrInfo &TII = *Subtarget.getInstrInfo(); 2347 PPCFunctionInfo *FI = MF->getInfo<PPCFunctionInfo>(); 2348 bool MustSaveTOC = FI->mustSaveTOC(); 2349 bool CR2Spilled = false; 2350 bool CR3Spilled = false; 2351 bool CR4Spilled = false; 2352 unsigned CSIIndex = 0; 2353 2354 // Initialize insertion-point logic; we will be restoring in reverse 2355 // order of spill. 2356 MachineBasicBlock::iterator I = MI, BeforeI = I; 2357 bool AtStart = I == MBB.begin(); 2358 2359 if (!AtStart) 2360 --BeforeI; 2361 2362 for (unsigned i = 0, e = CSI.size(); i != e; ++i) { 2363 unsigned Reg = CSI[i].getReg(); 2364 2365 // VRSAVE can appear here if, for example, @llvm.eh.unwind.init() is used. 2366 if (Reg == PPC::VRSAVE) 2367 continue; 2368 2369 if ((Reg == PPC::X2 || Reg == PPC::R2) && MustSaveTOC) 2370 continue; 2371 2372 if (Reg == PPC::CR2) { 2373 CR2Spilled = true; 2374 // The spill slot is associated only with CR2, which is the 2375 // first nonvolatile spilled. Save it here. 2376 CSIIndex = i; 2377 continue; 2378 } else if (Reg == PPC::CR3) { 2379 CR3Spilled = true; 2380 continue; 2381 } else if (Reg == PPC::CR4) { 2382 CR4Spilled = true; 2383 continue; 2384 } else { 2385 // When we first encounter a non-CR register after seeing at 2386 // least one CR register, restore all spilled CRs together. 2387 if ((CR2Spilled || CR3Spilled || CR4Spilled) 2388 && !(PPC::CR2 <= Reg && Reg <= PPC::CR4)) { 2389 bool is31 = needsFP(*MF); 2390 restoreCRs(Subtarget.isPPC64(), is31, 2391 CR2Spilled, CR3Spilled, CR4Spilled, 2392 MBB, I, CSI, CSIIndex); 2393 CR2Spilled = CR3Spilled = CR4Spilled = false; 2394 } 2395 2396 if (CSI[i].isSpilledToReg()) { 2397 DebugLoc DL; 2398 NumPEReloadVSR++; 2399 BuildMI(MBB, I, DL, TII.get(PPC::MFVSRD), Reg) 2400 .addReg(CSI[i].getDstReg(), getKillRegState(true)); 2401 } else { 2402 // Default behavior for non-CR saves. 2403 const TargetRegisterClass *RC = TRI->getMinimalPhysRegClass(Reg); 2404 2405 // Functions without NoUnwind need to preserve the order of elements in 2406 // saved vector registers. 2407 if (Subtarget.needsSwapsForVSXMemOps() && 2408 !MF->getFunction().hasFnAttribute(Attribute::NoUnwind)) 2409 TII.loadRegFromStackSlotNoUpd(MBB, I, Reg, CSI[i].getFrameIdx(), RC, 2410 TRI); 2411 else 2412 TII.loadRegFromStackSlot(MBB, I, Reg, CSI[i].getFrameIdx(), RC, TRI); 2413 2414 assert(I != MBB.begin() && 2415 "loadRegFromStackSlot didn't insert any code!"); 2416 } 2417 } 2418 2419 // Insert in reverse order. 2420 if (AtStart) 2421 I = MBB.begin(); 2422 else { 2423 I = BeforeI; 2424 ++I; 2425 } 2426 } 2427 2428 // If we haven't yet spilled the CRs, do so now. 2429 if (CR2Spilled || CR3Spilled || CR4Spilled) { 2430 bool is31 = needsFP(*MF); 2431 restoreCRs(Subtarget.isPPC64(), is31, CR2Spilled, CR3Spilled, CR4Spilled, 2432 MBB, I, CSI, CSIIndex); 2433 } 2434 2435 return true; 2436 } 2437 2438 unsigned PPCFrameLowering::getTOCSaveOffset() const { 2439 return TOCSaveOffset; 2440 } 2441 2442 unsigned PPCFrameLowering::getFramePointerSaveOffset() const { 2443 if (Subtarget.isAIXABI()) 2444 report_fatal_error("FramePointer is not implemented on AIX yet."); 2445 return FramePointerSaveOffset; 2446 } 2447 2448 unsigned PPCFrameLowering::getBasePointerSaveOffset() const { 2449 if (Subtarget.isAIXABI()) 2450 report_fatal_error("BasePointer is not implemented on AIX yet."); 2451 return BasePointerSaveOffset; 2452 } 2453 2454 bool PPCFrameLowering::enableShrinkWrapping(const MachineFunction &MF) const { 2455 if (MF.getInfo<PPCFunctionInfo>()->shrinkWrapDisabled()) 2456 return false; 2457 return (MF.getSubtarget<PPCSubtarget>().isSVR4ABI() && 2458 MF.getSubtarget<PPCSubtarget>().isPPC64()); 2459 } 2460