1 //===-- PPCFrameLowering.cpp - PPC Frame Information ----------------------===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 // 9 // This file contains the PPC implementation of TargetFrameLowering class. 10 // 11 //===----------------------------------------------------------------------===// 12 13 #include "PPCFrameLowering.h" 14 #include "PPCInstrBuilder.h" 15 #include "PPCInstrInfo.h" 16 #include "PPCMachineFunctionInfo.h" 17 #include "PPCSubtarget.h" 18 #include "PPCTargetMachine.h" 19 #include "llvm/ADT/Statistic.h" 20 #include "llvm/CodeGen/MachineFrameInfo.h" 21 #include "llvm/CodeGen/MachineFunction.h" 22 #include "llvm/CodeGen/MachineInstrBuilder.h" 23 #include "llvm/CodeGen/MachineModuleInfo.h" 24 #include "llvm/CodeGen/MachineRegisterInfo.h" 25 #include "llvm/CodeGen/RegisterScavenging.h" 26 #include "llvm/IR/Function.h" 27 #include "llvm/Target/TargetOptions.h" 28 29 using namespace llvm; 30 31 #define DEBUG_TYPE "framelowering" 32 STATISTIC(NumPESpillVSR, "Number of spills to vector in prologue"); 33 STATISTIC(NumPEReloadVSR, "Number of reloads from vector in epilogue"); 34 35 static cl::opt<bool> 36 EnablePEVectorSpills("ppc-enable-pe-vector-spills", 37 cl::desc("Enable spills in prologue to vector registers."), 38 cl::init(false), cl::Hidden); 39 40 /// VRRegNo - Map from a numbered VR register to its enum value. 41 /// 42 static const MCPhysReg VRRegNo[] = { 43 PPC::V0 , PPC::V1 , PPC::V2 , PPC::V3 , PPC::V4 , PPC::V5 , PPC::V6 , PPC::V7 , 44 PPC::V8 , PPC::V9 , PPC::V10, PPC::V11, PPC::V12, PPC::V13, PPC::V14, PPC::V15, 45 PPC::V16, PPC::V17, PPC::V18, PPC::V19, PPC::V20, PPC::V21, PPC::V22, PPC::V23, 46 PPC::V24, PPC::V25, PPC::V26, PPC::V27, PPC::V28, PPC::V29, PPC::V30, PPC::V31 47 }; 48 49 static unsigned computeReturnSaveOffset(const PPCSubtarget &STI) { 50 if (STI.isAIXABI()) 51 return STI.isPPC64() ? 16 : 8; 52 // SVR4 ABI: 53 return STI.isPPC64() ? 16 : 4; 54 } 55 56 static unsigned computeTOCSaveOffset(const PPCSubtarget &STI) { 57 if (STI.isAIXABI()) 58 return STI.isPPC64() ? 40 : 20; 59 return STI.isELFv2ABI() ? 24 : 40; 60 } 61 62 static unsigned computeFramePointerSaveOffset(const PPCSubtarget &STI) { 63 // First slot in the general register save area. 64 return STI.isPPC64() ? -8U : -4U; 65 } 66 67 static unsigned computeLinkageSize(const PPCSubtarget &STI) { 68 if (STI.isAIXABI() || STI.isPPC64()) 69 return (STI.isELFv2ABI() ? 4 : 6) * (STI.isPPC64() ? 8 : 4); 70 71 // 32-bit SVR4 ABI: 72 return 8; 73 } 74 75 static unsigned computeBasePointerSaveOffset(const PPCSubtarget &STI) { 76 // Third slot in the general purpose register save area. 77 if (STI.is32BitELFABI() && STI.getTargetMachine().isPositionIndependent()) 78 return -12U; 79 80 // Second slot in the general purpose register save area. 81 return STI.isPPC64() ? -16U : -8U; 82 } 83 84 static unsigned computeCRSaveOffset(const PPCSubtarget &STI) { 85 return (STI.isAIXABI() && !STI.isPPC64()) ? 4 : 8; 86 } 87 88 PPCFrameLowering::PPCFrameLowering(const PPCSubtarget &STI) 89 : TargetFrameLowering(TargetFrameLowering::StackGrowsDown, 90 STI.getPlatformStackAlignment(), 0), 91 Subtarget(STI), ReturnSaveOffset(computeReturnSaveOffset(Subtarget)), 92 TOCSaveOffset(computeTOCSaveOffset(Subtarget)), 93 FramePointerSaveOffset(computeFramePointerSaveOffset(Subtarget)), 94 LinkageSize(computeLinkageSize(Subtarget)), 95 BasePointerSaveOffset(computeBasePointerSaveOffset(Subtarget)), 96 CRSaveOffset(computeCRSaveOffset(Subtarget)) {} 97 98 // With the SVR4 ABI, callee-saved registers have fixed offsets on the stack. 99 const PPCFrameLowering::SpillSlot *PPCFrameLowering::getCalleeSavedSpillSlots( 100 unsigned &NumEntries) const { 101 102 // Floating-point register save area offsets. 103 #define CALLEE_SAVED_FPRS \ 104 {PPC::F31, -8}, \ 105 {PPC::F30, -16}, \ 106 {PPC::F29, -24}, \ 107 {PPC::F28, -32}, \ 108 {PPC::F27, -40}, \ 109 {PPC::F26, -48}, \ 110 {PPC::F25, -56}, \ 111 {PPC::F24, -64}, \ 112 {PPC::F23, -72}, \ 113 {PPC::F22, -80}, \ 114 {PPC::F21, -88}, \ 115 {PPC::F20, -96}, \ 116 {PPC::F19, -104}, \ 117 {PPC::F18, -112}, \ 118 {PPC::F17, -120}, \ 119 {PPC::F16, -128}, \ 120 {PPC::F15, -136}, \ 121 {PPC::F14, -144} 122 123 // 32-bit general purpose register save area offsets shared by ELF and 124 // AIX. AIX has an extra CSR with r13. 125 #define CALLEE_SAVED_GPRS32 \ 126 {PPC::R31, -4}, \ 127 {PPC::R30, -8}, \ 128 {PPC::R29, -12}, \ 129 {PPC::R28, -16}, \ 130 {PPC::R27, -20}, \ 131 {PPC::R26, -24}, \ 132 {PPC::R25, -28}, \ 133 {PPC::R24, -32}, \ 134 {PPC::R23, -36}, \ 135 {PPC::R22, -40}, \ 136 {PPC::R21, -44}, \ 137 {PPC::R20, -48}, \ 138 {PPC::R19, -52}, \ 139 {PPC::R18, -56}, \ 140 {PPC::R17, -60}, \ 141 {PPC::R16, -64}, \ 142 {PPC::R15, -68}, \ 143 {PPC::R14, -72} 144 145 // 64-bit general purpose register save area offsets. 146 #define CALLEE_SAVED_GPRS64 \ 147 {PPC::X31, -8}, \ 148 {PPC::X30, -16}, \ 149 {PPC::X29, -24}, \ 150 {PPC::X28, -32}, \ 151 {PPC::X27, -40}, \ 152 {PPC::X26, -48}, \ 153 {PPC::X25, -56}, \ 154 {PPC::X24, -64}, \ 155 {PPC::X23, -72}, \ 156 {PPC::X22, -80}, \ 157 {PPC::X21, -88}, \ 158 {PPC::X20, -96}, \ 159 {PPC::X19, -104}, \ 160 {PPC::X18, -112}, \ 161 {PPC::X17, -120}, \ 162 {PPC::X16, -128}, \ 163 {PPC::X15, -136}, \ 164 {PPC::X14, -144} 165 166 // Vector register save area offsets. 167 #define CALLEE_SAVED_VRS \ 168 {PPC::V31, -16}, \ 169 {PPC::V30, -32}, \ 170 {PPC::V29, -48}, \ 171 {PPC::V28, -64}, \ 172 {PPC::V27, -80}, \ 173 {PPC::V26, -96}, \ 174 {PPC::V25, -112}, \ 175 {PPC::V24, -128}, \ 176 {PPC::V23, -144}, \ 177 {PPC::V22, -160}, \ 178 {PPC::V21, -176}, \ 179 {PPC::V20, -192} 180 181 // Note that the offsets here overlap, but this is fixed up in 182 // processFunctionBeforeFrameFinalized. 183 184 static const SpillSlot ELFOffsets32[] = { 185 CALLEE_SAVED_FPRS, 186 CALLEE_SAVED_GPRS32, 187 188 // CR save area offset. We map each of the nonvolatile CR fields 189 // to the slot for CR2, which is the first of the nonvolatile CR 190 // fields to be assigned, so that we only allocate one save slot. 191 // See PPCRegisterInfo::hasReservedSpillSlot() for more information. 192 {PPC::CR2, -4}, 193 194 // VRSAVE save area offset. 195 {PPC::VRSAVE, -4}, 196 197 CALLEE_SAVED_VRS, 198 199 // SPE register save area (overlaps Vector save area). 200 {PPC::S31, -8}, 201 {PPC::S30, -16}, 202 {PPC::S29, -24}, 203 {PPC::S28, -32}, 204 {PPC::S27, -40}, 205 {PPC::S26, -48}, 206 {PPC::S25, -56}, 207 {PPC::S24, -64}, 208 {PPC::S23, -72}, 209 {PPC::S22, -80}, 210 {PPC::S21, -88}, 211 {PPC::S20, -96}, 212 {PPC::S19, -104}, 213 {PPC::S18, -112}, 214 {PPC::S17, -120}, 215 {PPC::S16, -128}, 216 {PPC::S15, -136}, 217 {PPC::S14, -144}}; 218 219 static const SpillSlot ELFOffsets64[] = { 220 CALLEE_SAVED_FPRS, 221 CALLEE_SAVED_GPRS64, 222 223 // VRSAVE save area offset. 224 {PPC::VRSAVE, -4}, 225 CALLEE_SAVED_VRS 226 }; 227 228 static const SpillSlot AIXOffsets32[] = { 229 CALLEE_SAVED_FPRS, 230 CALLEE_SAVED_GPRS32, 231 // Add AIX's extra CSR. 232 {PPC::R13, -76}, 233 // TODO: Update when we add vector support for AIX. 234 }; 235 236 static const SpillSlot AIXOffsets64[] = { 237 CALLEE_SAVED_FPRS, 238 CALLEE_SAVED_GPRS64, 239 // TODO: Update when we add vector support for AIX. 240 }; 241 242 if (Subtarget.is64BitELFABI()) { 243 NumEntries = array_lengthof(ELFOffsets64); 244 return ELFOffsets64; 245 } 246 247 if (Subtarget.is32BitELFABI()) { 248 NumEntries = array_lengthof(ELFOffsets32); 249 return ELFOffsets32; 250 } 251 252 assert(Subtarget.isAIXABI() && "Unexpected ABI."); 253 254 if (Subtarget.isPPC64()) { 255 NumEntries = array_lengthof(AIXOffsets64); 256 return AIXOffsets64; 257 } 258 259 NumEntries = array_lengthof(AIXOffsets32); 260 return AIXOffsets32; 261 } 262 263 /// RemoveVRSaveCode - We have found that this function does not need any code 264 /// to manipulate the VRSAVE register, even though it uses vector registers. 265 /// This can happen when the only registers used are known to be live in or out 266 /// of the function. Remove all of the VRSAVE related code from the function. 267 /// FIXME: The removal of the code results in a compile failure at -O0 when the 268 /// function contains a function call, as the GPR containing original VRSAVE 269 /// contents is spilled and reloaded around the call. Without the prolog code, 270 /// the spill instruction refers to an undefined register. This code needs 271 /// to account for all uses of that GPR. 272 static void RemoveVRSaveCode(MachineInstr &MI) { 273 MachineBasicBlock *Entry = MI.getParent(); 274 MachineFunction *MF = Entry->getParent(); 275 276 // We know that the MTVRSAVE instruction immediately follows MI. Remove it. 277 MachineBasicBlock::iterator MBBI = MI; 278 ++MBBI; 279 assert(MBBI != Entry->end() && MBBI->getOpcode() == PPC::MTVRSAVE); 280 MBBI->eraseFromParent(); 281 282 bool RemovedAllMTVRSAVEs = true; 283 // See if we can find and remove the MTVRSAVE instruction from all of the 284 // epilog blocks. 285 for (MachineFunction::iterator I = MF->begin(), E = MF->end(); I != E; ++I) { 286 // If last instruction is a return instruction, add an epilogue 287 if (I->isReturnBlock()) { 288 bool FoundIt = false; 289 for (MBBI = I->end(); MBBI != I->begin(); ) { 290 --MBBI; 291 if (MBBI->getOpcode() == PPC::MTVRSAVE) { 292 MBBI->eraseFromParent(); // remove it. 293 FoundIt = true; 294 break; 295 } 296 } 297 RemovedAllMTVRSAVEs &= FoundIt; 298 } 299 } 300 301 // If we found and removed all MTVRSAVE instructions, remove the read of 302 // VRSAVE as well. 303 if (RemovedAllMTVRSAVEs) { 304 MBBI = MI; 305 assert(MBBI != Entry->begin() && "UPDATE_VRSAVE is first instr in block?"); 306 --MBBI; 307 assert(MBBI->getOpcode() == PPC::MFVRSAVE && "VRSAVE instrs wandered?"); 308 MBBI->eraseFromParent(); 309 } 310 311 // Finally, nuke the UPDATE_VRSAVE. 312 MI.eraseFromParent(); 313 } 314 315 // HandleVRSaveUpdate - MI is the UPDATE_VRSAVE instruction introduced by the 316 // instruction selector. Based on the vector registers that have been used, 317 // transform this into the appropriate ORI instruction. 318 static void HandleVRSaveUpdate(MachineInstr &MI, const TargetInstrInfo &TII) { 319 MachineFunction *MF = MI.getParent()->getParent(); 320 const TargetRegisterInfo *TRI = MF->getSubtarget().getRegisterInfo(); 321 DebugLoc dl = MI.getDebugLoc(); 322 323 const MachineRegisterInfo &MRI = MF->getRegInfo(); 324 unsigned UsedRegMask = 0; 325 for (unsigned i = 0; i != 32; ++i) 326 if (MRI.isPhysRegModified(VRRegNo[i])) 327 UsedRegMask |= 1 << (31-i); 328 329 // Live in and live out values already must be in the mask, so don't bother 330 // marking them. 331 for (std::pair<unsigned, unsigned> LI : MF->getRegInfo().liveins()) { 332 unsigned RegNo = TRI->getEncodingValue(LI.first); 333 if (VRRegNo[RegNo] == LI.first) // If this really is a vector reg. 334 UsedRegMask &= ~(1 << (31-RegNo)); // Doesn't need to be marked. 335 } 336 337 // Live out registers appear as use operands on return instructions. 338 for (MachineFunction::const_iterator BI = MF->begin(), BE = MF->end(); 339 UsedRegMask != 0 && BI != BE; ++BI) { 340 const MachineBasicBlock &MBB = *BI; 341 if (!MBB.isReturnBlock()) 342 continue; 343 const MachineInstr &Ret = MBB.back(); 344 for (unsigned I = 0, E = Ret.getNumOperands(); I != E; ++I) { 345 const MachineOperand &MO = Ret.getOperand(I); 346 if (!MO.isReg() || !PPC::VRRCRegClass.contains(MO.getReg())) 347 continue; 348 unsigned RegNo = TRI->getEncodingValue(MO.getReg()); 349 UsedRegMask &= ~(1 << (31-RegNo)); 350 } 351 } 352 353 // If no registers are used, turn this into a copy. 354 if (UsedRegMask == 0) { 355 // Remove all VRSAVE code. 356 RemoveVRSaveCode(MI); 357 return; 358 } 359 360 Register SrcReg = MI.getOperand(1).getReg(); 361 Register DstReg = MI.getOperand(0).getReg(); 362 363 if ((UsedRegMask & 0xFFFF) == UsedRegMask) { 364 if (DstReg != SrcReg) 365 BuildMI(*MI.getParent(), MI, dl, TII.get(PPC::ORI), DstReg) 366 .addReg(SrcReg) 367 .addImm(UsedRegMask); 368 else 369 BuildMI(*MI.getParent(), MI, dl, TII.get(PPC::ORI), DstReg) 370 .addReg(SrcReg, RegState::Kill) 371 .addImm(UsedRegMask); 372 } else if ((UsedRegMask & 0xFFFF0000) == UsedRegMask) { 373 if (DstReg != SrcReg) 374 BuildMI(*MI.getParent(), MI, dl, TII.get(PPC::ORIS), DstReg) 375 .addReg(SrcReg) 376 .addImm(UsedRegMask >> 16); 377 else 378 BuildMI(*MI.getParent(), MI, dl, TII.get(PPC::ORIS), DstReg) 379 .addReg(SrcReg, RegState::Kill) 380 .addImm(UsedRegMask >> 16); 381 } else { 382 if (DstReg != SrcReg) 383 BuildMI(*MI.getParent(), MI, dl, TII.get(PPC::ORIS), DstReg) 384 .addReg(SrcReg) 385 .addImm(UsedRegMask >> 16); 386 else 387 BuildMI(*MI.getParent(), MI, dl, TII.get(PPC::ORIS), DstReg) 388 .addReg(SrcReg, RegState::Kill) 389 .addImm(UsedRegMask >> 16); 390 391 BuildMI(*MI.getParent(), MI, dl, TII.get(PPC::ORI), DstReg) 392 .addReg(DstReg, RegState::Kill) 393 .addImm(UsedRegMask & 0xFFFF); 394 } 395 396 // Remove the old UPDATE_VRSAVE instruction. 397 MI.eraseFromParent(); 398 } 399 400 static bool spillsCR(const MachineFunction &MF) { 401 const PPCFunctionInfo *FuncInfo = MF.getInfo<PPCFunctionInfo>(); 402 return FuncInfo->isCRSpilled(); 403 } 404 405 static bool spillsVRSAVE(const MachineFunction &MF) { 406 const PPCFunctionInfo *FuncInfo = MF.getInfo<PPCFunctionInfo>(); 407 return FuncInfo->isVRSAVESpilled(); 408 } 409 410 static bool hasSpills(const MachineFunction &MF) { 411 const PPCFunctionInfo *FuncInfo = MF.getInfo<PPCFunctionInfo>(); 412 return FuncInfo->hasSpills(); 413 } 414 415 static bool hasNonRISpills(const MachineFunction &MF) { 416 const PPCFunctionInfo *FuncInfo = MF.getInfo<PPCFunctionInfo>(); 417 return FuncInfo->hasNonRISpills(); 418 } 419 420 /// MustSaveLR - Return true if this function requires that we save the LR 421 /// register onto the stack in the prolog and restore it in the epilog of the 422 /// function. 423 static bool MustSaveLR(const MachineFunction &MF, unsigned LR) { 424 const PPCFunctionInfo *MFI = MF.getInfo<PPCFunctionInfo>(); 425 426 // We need a save/restore of LR if there is any def of LR (which is 427 // defined by calls, including the PIC setup sequence), or if there is 428 // some use of the LR stack slot (e.g. for builtin_return_address). 429 // (LR comes in 32 and 64 bit versions.) 430 MachineRegisterInfo::def_iterator RI = MF.getRegInfo().def_begin(LR); 431 return RI !=MF.getRegInfo().def_end() || MFI->isLRStoreRequired(); 432 } 433 434 /// determineFrameLayoutAndUpdate - Determine the size of the frame and maximum 435 /// call frame size. Update the MachineFunction object with the stack size. 436 unsigned 437 PPCFrameLowering::determineFrameLayoutAndUpdate(MachineFunction &MF, 438 bool UseEstimate) const { 439 unsigned NewMaxCallFrameSize = 0; 440 unsigned FrameSize = determineFrameLayout(MF, UseEstimate, 441 &NewMaxCallFrameSize); 442 MF.getFrameInfo().setStackSize(FrameSize); 443 MF.getFrameInfo().setMaxCallFrameSize(NewMaxCallFrameSize); 444 return FrameSize; 445 } 446 447 /// determineFrameLayout - Determine the size of the frame and maximum call 448 /// frame size. 449 unsigned 450 PPCFrameLowering::determineFrameLayout(const MachineFunction &MF, 451 bool UseEstimate, 452 unsigned *NewMaxCallFrameSize) const { 453 const MachineFrameInfo &MFI = MF.getFrameInfo(); 454 const PPCFunctionInfo *FI = MF.getInfo<PPCFunctionInfo>(); 455 456 // Get the number of bytes to allocate from the FrameInfo 457 unsigned FrameSize = 458 UseEstimate ? MFI.estimateStackSize(MF) : MFI.getStackSize(); 459 460 // Get stack alignments. The frame must be aligned to the greatest of these: 461 Align TargetAlign = getStackAlign(); // alignment required per the ABI 462 Align MaxAlign = MFI.getMaxAlign(); // algmt required by data in frame 463 Align Alignment = std::max(TargetAlign, MaxAlign); 464 465 const PPCRegisterInfo *RegInfo = Subtarget.getRegisterInfo(); 466 467 unsigned LR = RegInfo->getRARegister(); 468 bool DisableRedZone = MF.getFunction().hasFnAttribute(Attribute::NoRedZone); 469 bool CanUseRedZone = !MFI.hasVarSizedObjects() && // No dynamic alloca. 470 !MFI.adjustsStack() && // No calls. 471 !MustSaveLR(MF, LR) && // No need to save LR. 472 !FI->mustSaveTOC() && // No need to save TOC. 473 !RegInfo->hasBasePointer(MF); // No special alignment. 474 475 // Note: for PPC32 SVR4ABI (Non-DarwinABI), we can still generate stackless 476 // code if all local vars are reg-allocated. 477 bool FitsInRedZone = FrameSize <= Subtarget.getRedZoneSize(); 478 479 // Check whether we can skip adjusting the stack pointer (by using red zone) 480 if (!DisableRedZone && CanUseRedZone && FitsInRedZone) { 481 // No need for frame 482 return 0; 483 } 484 485 // Get the maximum call frame size of all the calls. 486 unsigned maxCallFrameSize = MFI.getMaxCallFrameSize(); 487 488 // Maximum call frame needs to be at least big enough for linkage area. 489 unsigned minCallFrameSize = getLinkageSize(); 490 maxCallFrameSize = std::max(maxCallFrameSize, minCallFrameSize); 491 492 // If we have dynamic alloca then maxCallFrameSize needs to be aligned so 493 // that allocations will be aligned. 494 if (MFI.hasVarSizedObjects()) 495 maxCallFrameSize = alignTo(maxCallFrameSize, Alignment); 496 497 // Update the new max call frame size if the caller passes in a valid pointer. 498 if (NewMaxCallFrameSize) 499 *NewMaxCallFrameSize = maxCallFrameSize; 500 501 // Include call frame size in total. 502 FrameSize += maxCallFrameSize; 503 504 // Make sure the frame is aligned. 505 FrameSize = alignTo(FrameSize, Alignment); 506 507 return FrameSize; 508 } 509 510 // hasFP - Return true if the specified function actually has a dedicated frame 511 // pointer register. 512 bool PPCFrameLowering::hasFP(const MachineFunction &MF) const { 513 const MachineFrameInfo &MFI = MF.getFrameInfo(); 514 // FIXME: This is pretty much broken by design: hasFP() might be called really 515 // early, before the stack layout was calculated and thus hasFP() might return 516 // true or false here depending on the time of call. 517 return (MFI.getStackSize()) && needsFP(MF); 518 } 519 520 // needsFP - Return true if the specified function should have a dedicated frame 521 // pointer register. This is true if the function has variable sized allocas or 522 // if frame pointer elimination is disabled. 523 bool PPCFrameLowering::needsFP(const MachineFunction &MF) const { 524 const MachineFrameInfo &MFI = MF.getFrameInfo(); 525 526 // Naked functions have no stack frame pushed, so we don't have a frame 527 // pointer. 528 if (MF.getFunction().hasFnAttribute(Attribute::Naked)) 529 return false; 530 531 return MF.getTarget().Options.DisableFramePointerElim(MF) || 532 MFI.hasVarSizedObjects() || MFI.hasStackMap() || MFI.hasPatchPoint() || 533 (MF.getTarget().Options.GuaranteedTailCallOpt && 534 MF.getInfo<PPCFunctionInfo>()->hasFastCall()); 535 } 536 537 void PPCFrameLowering::replaceFPWithRealFP(MachineFunction &MF) const { 538 bool is31 = needsFP(MF); 539 unsigned FPReg = is31 ? PPC::R31 : PPC::R1; 540 unsigned FP8Reg = is31 ? PPC::X31 : PPC::X1; 541 542 const PPCRegisterInfo *RegInfo = Subtarget.getRegisterInfo(); 543 bool HasBP = RegInfo->hasBasePointer(MF); 544 unsigned BPReg = HasBP ? (unsigned) RegInfo->getBaseRegister(MF) : FPReg; 545 unsigned BP8Reg = HasBP ? (unsigned) PPC::X30 : FP8Reg; 546 547 for (MachineFunction::iterator BI = MF.begin(), BE = MF.end(); 548 BI != BE; ++BI) 549 for (MachineBasicBlock::iterator MBBI = BI->end(); MBBI != BI->begin(); ) { 550 --MBBI; 551 for (unsigned I = 0, E = MBBI->getNumOperands(); I != E; ++I) { 552 MachineOperand &MO = MBBI->getOperand(I); 553 if (!MO.isReg()) 554 continue; 555 556 switch (MO.getReg()) { 557 case PPC::FP: 558 MO.setReg(FPReg); 559 break; 560 case PPC::FP8: 561 MO.setReg(FP8Reg); 562 break; 563 case PPC::BP: 564 MO.setReg(BPReg); 565 break; 566 case PPC::BP8: 567 MO.setReg(BP8Reg); 568 break; 569 570 } 571 } 572 } 573 } 574 575 /* This function will do the following: 576 - If MBB is an entry or exit block, set SR1 and SR2 to R0 and R12 577 respectively (defaults recommended by the ABI) and return true 578 - If MBB is not an entry block, initialize the register scavenger and look 579 for available registers. 580 - If the defaults (R0/R12) are available, return true 581 - If TwoUniqueRegsRequired is set to true, it looks for two unique 582 registers. Otherwise, look for a single available register. 583 - If the required registers are found, set SR1 and SR2 and return true. 584 - If the required registers are not found, set SR2 or both SR1 and SR2 to 585 PPC::NoRegister and return false. 586 587 Note that if both SR1 and SR2 are valid parameters and TwoUniqueRegsRequired 588 is not set, this function will attempt to find two different registers, but 589 still return true if only one register is available (and set SR1 == SR2). 590 */ 591 bool 592 PPCFrameLowering::findScratchRegister(MachineBasicBlock *MBB, 593 bool UseAtEnd, 594 bool TwoUniqueRegsRequired, 595 Register *SR1, 596 Register *SR2) const { 597 RegScavenger RS; 598 Register R0 = Subtarget.isPPC64() ? PPC::X0 : PPC::R0; 599 Register R12 = Subtarget.isPPC64() ? PPC::X12 : PPC::R12; 600 601 // Set the defaults for the two scratch registers. 602 if (SR1) 603 *SR1 = R0; 604 605 if (SR2) { 606 assert (SR1 && "Asking for the second scratch register but not the first?"); 607 *SR2 = R12; 608 } 609 610 // If MBB is an entry or exit block, use R0 and R12 as the scratch registers. 611 if ((UseAtEnd && MBB->isReturnBlock()) || 612 (!UseAtEnd && (&MBB->getParent()->front() == MBB))) 613 return true; 614 615 RS.enterBasicBlock(*MBB); 616 617 if (UseAtEnd && !MBB->empty()) { 618 // The scratch register will be used at the end of the block, so must 619 // consider all registers used within the block 620 621 MachineBasicBlock::iterator MBBI = MBB->getFirstTerminator(); 622 // If no terminator, back iterator up to previous instruction. 623 if (MBBI == MBB->end()) 624 MBBI = std::prev(MBBI); 625 626 if (MBBI != MBB->begin()) 627 RS.forward(MBBI); 628 } 629 630 // If the two registers are available, we're all good. 631 // Note that we only return here if both R0 and R12 are available because 632 // although the function may not require two unique registers, it may benefit 633 // from having two so we should try to provide them. 634 if (!RS.isRegUsed(R0) && !RS.isRegUsed(R12)) 635 return true; 636 637 // Get the list of callee-saved registers for the target. 638 const PPCRegisterInfo *RegInfo = Subtarget.getRegisterInfo(); 639 const MCPhysReg *CSRegs = RegInfo->getCalleeSavedRegs(MBB->getParent()); 640 641 // Get all the available registers in the block. 642 BitVector BV = RS.getRegsAvailable(Subtarget.isPPC64() ? &PPC::G8RCRegClass : 643 &PPC::GPRCRegClass); 644 645 // We shouldn't use callee-saved registers as scratch registers as they may be 646 // available when looking for a candidate block for shrink wrapping but not 647 // available when the actual prologue/epilogue is being emitted because they 648 // were added as live-in to the prologue block by PrologueEpilogueInserter. 649 for (int i = 0; CSRegs[i]; ++i) 650 BV.reset(CSRegs[i]); 651 652 // Set the first scratch register to the first available one. 653 if (SR1) { 654 int FirstScratchReg = BV.find_first(); 655 *SR1 = FirstScratchReg == -1 ? (unsigned)PPC::NoRegister : FirstScratchReg; 656 } 657 658 // If there is another one available, set the second scratch register to that. 659 // Otherwise, set it to either PPC::NoRegister if this function requires two 660 // or to whatever SR1 is set to if this function doesn't require two. 661 if (SR2) { 662 int SecondScratchReg = BV.find_next(*SR1); 663 if (SecondScratchReg != -1) 664 *SR2 = SecondScratchReg; 665 else 666 *SR2 = TwoUniqueRegsRequired ? Register() : *SR1; 667 } 668 669 // Now that we've done our best to provide both registers, double check 670 // whether we were unable to provide enough. 671 if (BV.count() < (TwoUniqueRegsRequired ? 2U : 1U)) 672 return false; 673 674 return true; 675 } 676 677 // We need a scratch register for spilling LR and for spilling CR. By default, 678 // we use two scratch registers to hide latency. However, if only one scratch 679 // register is available, we can adjust for that by not overlapping the spill 680 // code. However, if we need to realign the stack (i.e. have a base pointer) 681 // and the stack frame is large, we need two scratch registers. 682 bool 683 PPCFrameLowering::twoUniqueScratchRegsRequired(MachineBasicBlock *MBB) const { 684 const PPCRegisterInfo *RegInfo = Subtarget.getRegisterInfo(); 685 MachineFunction &MF = *(MBB->getParent()); 686 bool HasBP = RegInfo->hasBasePointer(MF); 687 unsigned FrameSize = determineFrameLayout(MF); 688 int NegFrameSize = -FrameSize; 689 bool IsLargeFrame = !isInt<16>(NegFrameSize); 690 MachineFrameInfo &MFI = MF.getFrameInfo(); 691 Align MaxAlign = MFI.getMaxAlign(); 692 bool HasRedZone = Subtarget.isPPC64() || !Subtarget.isSVR4ABI(); 693 694 return (IsLargeFrame || !HasRedZone) && HasBP && MaxAlign > 1; 695 } 696 697 bool PPCFrameLowering::canUseAsPrologue(const MachineBasicBlock &MBB) const { 698 MachineBasicBlock *TmpMBB = const_cast<MachineBasicBlock *>(&MBB); 699 700 return findScratchRegister(TmpMBB, false, 701 twoUniqueScratchRegsRequired(TmpMBB)); 702 } 703 704 bool PPCFrameLowering::canUseAsEpilogue(const MachineBasicBlock &MBB) const { 705 MachineBasicBlock *TmpMBB = const_cast<MachineBasicBlock *>(&MBB); 706 707 return findScratchRegister(TmpMBB, true); 708 } 709 710 bool PPCFrameLowering::stackUpdateCanBeMoved(MachineFunction &MF) const { 711 const PPCRegisterInfo *RegInfo = Subtarget.getRegisterInfo(); 712 PPCFunctionInfo *FI = MF.getInfo<PPCFunctionInfo>(); 713 714 // Abort if there is no register info or function info. 715 if (!RegInfo || !FI) 716 return false; 717 718 // Only move the stack update on ELFv2 ABI and PPC64. 719 if (!Subtarget.isELFv2ABI() || !Subtarget.isPPC64()) 720 return false; 721 722 // Check the frame size first and return false if it does not fit the 723 // requirements. 724 // We need a non-zero frame size as well as a frame that will fit in the red 725 // zone. This is because by moving the stack pointer update we are now storing 726 // to the red zone until the stack pointer is updated. If we get an interrupt 727 // inside the prologue but before the stack update we now have a number of 728 // stores to the red zone and those stores must all fit. 729 MachineFrameInfo &MFI = MF.getFrameInfo(); 730 unsigned FrameSize = MFI.getStackSize(); 731 if (!FrameSize || FrameSize > Subtarget.getRedZoneSize()) 732 return false; 733 734 // Frame pointers and base pointers complicate matters so don't do anything 735 // if we have them. For example having a frame pointer will sometimes require 736 // a copy of r1 into r31 and that makes keeping track of updates to r1 more 737 // difficult. 738 if (hasFP(MF) || RegInfo->hasBasePointer(MF)) 739 return false; 740 741 // Calls to fast_cc functions use different rules for passing parameters on 742 // the stack from the ABI and using PIC base in the function imposes 743 // similar restrictions to using the base pointer. It is not generally safe 744 // to move the stack pointer update in these situations. 745 if (FI->hasFastCall() || FI->usesPICBase()) 746 return false; 747 748 // Finally we can move the stack update if we do not require register 749 // scavenging. Register scavenging can introduce more spills and so 750 // may make the frame size larger than we have computed. 751 return !RegInfo->requiresFrameIndexScavenging(MF); 752 } 753 754 void PPCFrameLowering::emitPrologue(MachineFunction &MF, 755 MachineBasicBlock &MBB) const { 756 MachineBasicBlock::iterator MBBI = MBB.begin(); 757 MachineFrameInfo &MFI = MF.getFrameInfo(); 758 const PPCInstrInfo &TII = *Subtarget.getInstrInfo(); 759 const PPCRegisterInfo *RegInfo = Subtarget.getRegisterInfo(); 760 761 MachineModuleInfo &MMI = MF.getMMI(); 762 const MCRegisterInfo *MRI = MMI.getContext().getRegisterInfo(); 763 DebugLoc dl; 764 // AIX assembler does not support cfi directives. 765 const bool needsCFI = MF.needsFrameMoves() && !Subtarget.isAIXABI(); 766 767 // Get processor type. 768 bool isPPC64 = Subtarget.isPPC64(); 769 // Get the ABI. 770 bool isSVR4ABI = Subtarget.isSVR4ABI(); 771 bool isAIXABI = Subtarget.isAIXABI(); 772 bool isELFv2ABI = Subtarget.isELFv2ABI(); 773 assert((isSVR4ABI || isAIXABI) && "Unsupported PPC ABI."); 774 775 // Scan the prolog, looking for an UPDATE_VRSAVE instruction. If we find it, 776 // process it. 777 if (!isSVR4ABI) 778 for (unsigned i = 0; MBBI != MBB.end(); ++i, ++MBBI) { 779 if (MBBI->getOpcode() == PPC::UPDATE_VRSAVE) { 780 if (isAIXABI) 781 report_fatal_error("UPDATE_VRSAVE is unexpected on AIX."); 782 HandleVRSaveUpdate(*MBBI, TII); 783 break; 784 } 785 } 786 787 // Move MBBI back to the beginning of the prologue block. 788 MBBI = MBB.begin(); 789 790 // Work out frame sizes. 791 unsigned FrameSize = determineFrameLayoutAndUpdate(MF); 792 int NegFrameSize = -FrameSize; 793 if (!isInt<32>(NegFrameSize)) 794 llvm_unreachable("Unhandled stack size!"); 795 796 if (MFI.isFrameAddressTaken()) 797 replaceFPWithRealFP(MF); 798 799 // Check if the link register (LR) must be saved. 800 PPCFunctionInfo *FI = MF.getInfo<PPCFunctionInfo>(); 801 bool MustSaveLR = FI->mustSaveLR(); 802 bool MustSaveTOC = FI->mustSaveTOC(); 803 const SmallVectorImpl<Register> &MustSaveCRs = FI->getMustSaveCRs(); 804 bool MustSaveCR = !MustSaveCRs.empty(); 805 // Do we have a frame pointer and/or base pointer for this function? 806 bool HasFP = hasFP(MF); 807 bool HasBP = RegInfo->hasBasePointer(MF); 808 bool HasRedZone = isPPC64 || !isSVR4ABI; 809 810 Register SPReg = isPPC64 ? PPC::X1 : PPC::R1; 811 Register BPReg = RegInfo->getBaseRegister(MF); 812 Register FPReg = isPPC64 ? PPC::X31 : PPC::R31; 813 Register LRReg = isPPC64 ? PPC::LR8 : PPC::LR; 814 Register TOCReg = isPPC64 ? PPC::X2 : PPC::R2; 815 Register ScratchReg; 816 Register TempReg = isPPC64 ? PPC::X12 : PPC::R12; // another scratch reg 817 // ...(R12/X12 is volatile in both Darwin & SVR4, & can't be a function arg.) 818 const MCInstrDesc& MFLRInst = TII.get(isPPC64 ? PPC::MFLR8 819 : PPC::MFLR ); 820 const MCInstrDesc& StoreInst = TII.get(isPPC64 ? PPC::STD 821 : PPC::STW ); 822 const MCInstrDesc& StoreUpdtInst = TII.get(isPPC64 ? PPC::STDU 823 : PPC::STWU ); 824 const MCInstrDesc& StoreUpdtIdxInst = TII.get(isPPC64 ? PPC::STDUX 825 : PPC::STWUX); 826 const MCInstrDesc& LoadImmShiftedInst = TII.get(isPPC64 ? PPC::LIS8 827 : PPC::LIS ); 828 const MCInstrDesc& OrImmInst = TII.get(isPPC64 ? PPC::ORI8 829 : PPC::ORI ); 830 const MCInstrDesc& OrInst = TII.get(isPPC64 ? PPC::OR8 831 : PPC::OR ); 832 const MCInstrDesc& SubtractCarryingInst = TII.get(isPPC64 ? PPC::SUBFC8 833 : PPC::SUBFC); 834 const MCInstrDesc& SubtractImmCarryingInst = TII.get(isPPC64 ? PPC::SUBFIC8 835 : PPC::SUBFIC); 836 const MCInstrDesc &MoveFromCondRegInst = TII.get(isPPC64 ? PPC::MFCR8 837 : PPC::MFCR); 838 const MCInstrDesc &StoreWordInst = TII.get(isPPC64 ? PPC::STW8 : PPC::STW); 839 840 // Regarding this assert: Even though LR is saved in the caller's frame (i.e., 841 // LROffset is positive), that slot is callee-owned. Because PPC32 SVR4 has no 842 // Red Zone, an asynchronous event (a form of "callee") could claim a frame & 843 // overwrite it, so PPC32 SVR4 must claim at least a minimal frame to save LR. 844 assert((isPPC64 || !isSVR4ABI || !(!FrameSize && (MustSaveLR || HasFP))) && 845 "FrameSize must be >0 to save/restore the FP or LR for 32-bit SVR4."); 846 847 // Using the same bool variable as below to suppress compiler warnings. 848 bool SingleScratchReg = 849 findScratchRegister(&MBB, false, twoUniqueScratchRegsRequired(&MBB), 850 &ScratchReg, &TempReg); 851 assert(SingleScratchReg && 852 "Required number of registers not available in this block"); 853 854 SingleScratchReg = ScratchReg == TempReg; 855 856 int LROffset = getReturnSaveOffset(); 857 858 int FPOffset = 0; 859 if (HasFP) { 860 if (isSVR4ABI) { 861 MachineFrameInfo &MFI = MF.getFrameInfo(); 862 int FPIndex = FI->getFramePointerSaveIndex(); 863 assert(FPIndex && "No Frame Pointer Save Slot!"); 864 FPOffset = MFI.getObjectOffset(FPIndex); 865 } else { 866 FPOffset = getFramePointerSaveOffset(); 867 } 868 } 869 870 int BPOffset = 0; 871 if (HasBP) { 872 if (isSVR4ABI) { 873 MachineFrameInfo &MFI = MF.getFrameInfo(); 874 int BPIndex = FI->getBasePointerSaveIndex(); 875 assert(BPIndex && "No Base Pointer Save Slot!"); 876 BPOffset = MFI.getObjectOffset(BPIndex); 877 } else { 878 BPOffset = getBasePointerSaveOffset(); 879 } 880 } 881 882 int PBPOffset = 0; 883 if (FI->usesPICBase()) { 884 MachineFrameInfo &MFI = MF.getFrameInfo(); 885 int PBPIndex = FI->getPICBasePointerSaveIndex(); 886 assert(PBPIndex && "No PIC Base Pointer Save Slot!"); 887 PBPOffset = MFI.getObjectOffset(PBPIndex); 888 } 889 890 // Get stack alignments. 891 Align MaxAlign = MFI.getMaxAlign(); 892 if (HasBP && MaxAlign > 1) 893 assert(Log2(MaxAlign) < 16 && "Invalid alignment!"); 894 895 // Frames of 32KB & larger require special handling because they cannot be 896 // indexed into with a simple STDU/STWU/STD/STW immediate offset operand. 897 bool isLargeFrame = !isInt<16>(NegFrameSize); 898 899 // Check if we can move the stack update instruction (stdu) down the prologue 900 // past the callee saves. Hopefully this will avoid the situation where the 901 // saves are waiting for the update on the store with update to complete. 902 MachineBasicBlock::iterator StackUpdateLoc = MBBI; 903 bool MovingStackUpdateDown = false; 904 905 // Check if we can move the stack update. 906 if (stackUpdateCanBeMoved(MF)) { 907 const std::vector<CalleeSavedInfo> &Info = MFI.getCalleeSavedInfo(); 908 for (CalleeSavedInfo CSI : Info) { 909 int FrIdx = CSI.getFrameIdx(); 910 // If the frame index is not negative the callee saved info belongs to a 911 // stack object that is not a fixed stack object. We ignore non-fixed 912 // stack objects because we won't move the stack update pointer past them. 913 if (FrIdx >= 0) 914 continue; 915 916 if (MFI.isFixedObjectIndex(FrIdx) && MFI.getObjectOffset(FrIdx) < 0) { 917 StackUpdateLoc++; 918 MovingStackUpdateDown = true; 919 } else { 920 // We need all of the Frame Indices to meet these conditions. 921 // If they do not, abort the whole operation. 922 StackUpdateLoc = MBBI; 923 MovingStackUpdateDown = false; 924 break; 925 } 926 } 927 928 // If the operation was not aborted then update the object offset. 929 if (MovingStackUpdateDown) { 930 for (CalleeSavedInfo CSI : Info) { 931 int FrIdx = CSI.getFrameIdx(); 932 if (FrIdx < 0) 933 MFI.setObjectOffset(FrIdx, MFI.getObjectOffset(FrIdx) + NegFrameSize); 934 } 935 } 936 } 937 938 // Where in the prologue we move the CR fields depends on how many scratch 939 // registers we have, and if we need to save the link register or not. This 940 // lambda is to avoid duplicating the logic in 2 places. 941 auto BuildMoveFromCR = [&]() { 942 if (isELFv2ABI && MustSaveCRs.size() == 1) { 943 // In the ELFv2 ABI, we are not required to save all CR fields. 944 // If only one CR field is clobbered, it is more efficient to use 945 // mfocrf to selectively save just that field, because mfocrf has short 946 // latency compares to mfcr. 947 assert(isPPC64 && "V2 ABI is 64-bit only."); 948 MachineInstrBuilder MIB = 949 BuildMI(MBB, MBBI, dl, TII.get(PPC::MFOCRF8), TempReg); 950 MIB.addReg(MustSaveCRs[0], RegState::Kill); 951 } else { 952 MachineInstrBuilder MIB = 953 BuildMI(MBB, MBBI, dl, MoveFromCondRegInst, TempReg); 954 for (unsigned CRfield : MustSaveCRs) 955 MIB.addReg(CRfield, RegState::ImplicitKill); 956 } 957 }; 958 959 // If we need to spill the CR and the LR but we don't have two separate 960 // registers available, we must spill them one at a time 961 if (MustSaveCR && SingleScratchReg && MustSaveLR) { 962 BuildMoveFromCR(); 963 BuildMI(MBB, MBBI, dl, StoreWordInst) 964 .addReg(TempReg, getKillRegState(true)) 965 .addImm(CRSaveOffset) 966 .addReg(SPReg); 967 } 968 969 if (MustSaveLR) 970 BuildMI(MBB, MBBI, dl, MFLRInst, ScratchReg); 971 972 if (MustSaveCR && !(SingleScratchReg && MustSaveLR)) 973 BuildMoveFromCR(); 974 975 if (HasRedZone) { 976 if (HasFP) 977 BuildMI(MBB, MBBI, dl, StoreInst) 978 .addReg(FPReg) 979 .addImm(FPOffset) 980 .addReg(SPReg); 981 if (FI->usesPICBase()) 982 BuildMI(MBB, MBBI, dl, StoreInst) 983 .addReg(PPC::R30) 984 .addImm(PBPOffset) 985 .addReg(SPReg); 986 if (HasBP) 987 BuildMI(MBB, MBBI, dl, StoreInst) 988 .addReg(BPReg) 989 .addImm(BPOffset) 990 .addReg(SPReg); 991 } 992 993 if (MustSaveLR) 994 BuildMI(MBB, StackUpdateLoc, dl, StoreInst) 995 .addReg(ScratchReg, getKillRegState(true)) 996 .addImm(LROffset) 997 .addReg(SPReg); 998 999 if (MustSaveCR && 1000 !(SingleScratchReg && MustSaveLR)) { 1001 assert(HasRedZone && "A red zone is always available on PPC64"); 1002 BuildMI(MBB, MBBI, dl, StoreWordInst) 1003 .addReg(TempReg, getKillRegState(true)) 1004 .addImm(CRSaveOffset) 1005 .addReg(SPReg); 1006 } 1007 1008 // Skip the rest if this is a leaf function & all spills fit in the Red Zone. 1009 if (!FrameSize) 1010 return; 1011 1012 // Adjust stack pointer: r1 += NegFrameSize. 1013 // If there is a preferred stack alignment, align R1 now 1014 1015 if (HasBP && HasRedZone) { 1016 // Save a copy of r1 as the base pointer. 1017 BuildMI(MBB, MBBI, dl, OrInst, BPReg) 1018 .addReg(SPReg) 1019 .addReg(SPReg); 1020 } 1021 1022 // Have we generated a STUX instruction to claim stack frame? If so, 1023 // the negated frame size will be placed in ScratchReg. 1024 bool HasSTUX = false; 1025 1026 // This condition must be kept in sync with canUseAsPrologue. 1027 if (HasBP && MaxAlign > 1) { 1028 if (isPPC64) 1029 BuildMI(MBB, MBBI, dl, TII.get(PPC::RLDICL), ScratchReg) 1030 .addReg(SPReg) 1031 .addImm(0) 1032 .addImm(64 - Log2(MaxAlign)); 1033 else // PPC32... 1034 BuildMI(MBB, MBBI, dl, TII.get(PPC::RLWINM), ScratchReg) 1035 .addReg(SPReg) 1036 .addImm(0) 1037 .addImm(32 - Log2(MaxAlign)) 1038 .addImm(31); 1039 if (!isLargeFrame) { 1040 BuildMI(MBB, MBBI, dl, SubtractImmCarryingInst, ScratchReg) 1041 .addReg(ScratchReg, RegState::Kill) 1042 .addImm(NegFrameSize); 1043 } else { 1044 assert(!SingleScratchReg && "Only a single scratch reg available"); 1045 BuildMI(MBB, MBBI, dl, LoadImmShiftedInst, TempReg) 1046 .addImm(NegFrameSize >> 16); 1047 BuildMI(MBB, MBBI, dl, OrImmInst, TempReg) 1048 .addReg(TempReg, RegState::Kill) 1049 .addImm(NegFrameSize & 0xFFFF); 1050 BuildMI(MBB, MBBI, dl, SubtractCarryingInst, ScratchReg) 1051 .addReg(ScratchReg, RegState::Kill) 1052 .addReg(TempReg, RegState::Kill); 1053 } 1054 1055 BuildMI(MBB, MBBI, dl, StoreUpdtIdxInst, SPReg) 1056 .addReg(SPReg, RegState::Kill) 1057 .addReg(SPReg) 1058 .addReg(ScratchReg); 1059 HasSTUX = true; 1060 1061 } else if (!isLargeFrame) { 1062 BuildMI(MBB, StackUpdateLoc, dl, StoreUpdtInst, SPReg) 1063 .addReg(SPReg) 1064 .addImm(NegFrameSize) 1065 .addReg(SPReg); 1066 1067 } else { 1068 BuildMI(MBB, MBBI, dl, LoadImmShiftedInst, ScratchReg) 1069 .addImm(NegFrameSize >> 16); 1070 BuildMI(MBB, MBBI, dl, OrImmInst, ScratchReg) 1071 .addReg(ScratchReg, RegState::Kill) 1072 .addImm(NegFrameSize & 0xFFFF); 1073 BuildMI(MBB, MBBI, dl, StoreUpdtIdxInst, SPReg) 1074 .addReg(SPReg, RegState::Kill) 1075 .addReg(SPReg) 1076 .addReg(ScratchReg); 1077 HasSTUX = true; 1078 } 1079 1080 // Save the TOC register after the stack pointer update if a prologue TOC 1081 // save is required for the function. 1082 if (MustSaveTOC) { 1083 assert(isELFv2ABI && "TOC saves in the prologue only supported on ELFv2"); 1084 BuildMI(MBB, StackUpdateLoc, dl, TII.get(PPC::STD)) 1085 .addReg(TOCReg, getKillRegState(true)) 1086 .addImm(TOCSaveOffset) 1087 .addReg(SPReg); 1088 } 1089 1090 if (!HasRedZone) { 1091 assert(!isPPC64 && "A red zone is always available on PPC64"); 1092 if (HasSTUX) { 1093 // The negated frame size is in ScratchReg, and the SPReg has been 1094 // decremented by the frame size: SPReg = old SPReg + ScratchReg. 1095 // Since FPOffset, PBPOffset, etc. are relative to the beginning of 1096 // the stack frame (i.e. the old SP), ideally, we would put the old 1097 // SP into a register and use it as the base for the stores. The 1098 // problem is that the only available register may be ScratchReg, 1099 // which could be R0, and R0 cannot be used as a base address. 1100 1101 // First, set ScratchReg to the old SP. This may need to be modified 1102 // later. 1103 BuildMI(MBB, MBBI, dl, TII.get(PPC::SUBF), ScratchReg) 1104 .addReg(ScratchReg, RegState::Kill) 1105 .addReg(SPReg); 1106 1107 if (ScratchReg == PPC::R0) { 1108 // R0 cannot be used as a base register, but it can be used as an 1109 // index in a store-indexed. 1110 int LastOffset = 0; 1111 if (HasFP) { 1112 // R0 += (FPOffset-LastOffset). 1113 // Need addic, since addi treats R0 as 0. 1114 BuildMI(MBB, MBBI, dl, TII.get(PPC::ADDIC), ScratchReg) 1115 .addReg(ScratchReg) 1116 .addImm(FPOffset-LastOffset); 1117 LastOffset = FPOffset; 1118 // Store FP into *R0. 1119 BuildMI(MBB, MBBI, dl, TII.get(PPC::STWX)) 1120 .addReg(FPReg, RegState::Kill) // Save FP. 1121 .addReg(PPC::ZERO) 1122 .addReg(ScratchReg); // This will be the index (R0 is ok here). 1123 } 1124 if (FI->usesPICBase()) { 1125 // R0 += (PBPOffset-LastOffset). 1126 BuildMI(MBB, MBBI, dl, TII.get(PPC::ADDIC), ScratchReg) 1127 .addReg(ScratchReg) 1128 .addImm(PBPOffset-LastOffset); 1129 LastOffset = PBPOffset; 1130 BuildMI(MBB, MBBI, dl, TII.get(PPC::STWX)) 1131 .addReg(PPC::R30, RegState::Kill) // Save PIC base pointer. 1132 .addReg(PPC::ZERO) 1133 .addReg(ScratchReg); // This will be the index (R0 is ok here). 1134 } 1135 if (HasBP) { 1136 // R0 += (BPOffset-LastOffset). 1137 BuildMI(MBB, MBBI, dl, TII.get(PPC::ADDIC), ScratchReg) 1138 .addReg(ScratchReg) 1139 .addImm(BPOffset-LastOffset); 1140 LastOffset = BPOffset; 1141 BuildMI(MBB, MBBI, dl, TII.get(PPC::STWX)) 1142 .addReg(BPReg, RegState::Kill) // Save BP. 1143 .addReg(PPC::ZERO) 1144 .addReg(ScratchReg); // This will be the index (R0 is ok here). 1145 // BP = R0-LastOffset 1146 BuildMI(MBB, MBBI, dl, TII.get(PPC::ADDIC), BPReg) 1147 .addReg(ScratchReg, RegState::Kill) 1148 .addImm(-LastOffset); 1149 } 1150 } else { 1151 // ScratchReg is not R0, so use it as the base register. It is 1152 // already set to the old SP, so we can use the offsets directly. 1153 1154 // Now that the stack frame has been allocated, save all the necessary 1155 // registers using ScratchReg as the base address. 1156 if (HasFP) 1157 BuildMI(MBB, MBBI, dl, StoreInst) 1158 .addReg(FPReg) 1159 .addImm(FPOffset) 1160 .addReg(ScratchReg); 1161 if (FI->usesPICBase()) 1162 BuildMI(MBB, MBBI, dl, StoreInst) 1163 .addReg(PPC::R30) 1164 .addImm(PBPOffset) 1165 .addReg(ScratchReg); 1166 if (HasBP) { 1167 BuildMI(MBB, MBBI, dl, StoreInst) 1168 .addReg(BPReg) 1169 .addImm(BPOffset) 1170 .addReg(ScratchReg); 1171 BuildMI(MBB, MBBI, dl, OrInst, BPReg) 1172 .addReg(ScratchReg, RegState::Kill) 1173 .addReg(ScratchReg); 1174 } 1175 } 1176 } else { 1177 // The frame size is a known 16-bit constant (fitting in the immediate 1178 // field of STWU). To be here we have to be compiling for PPC32. 1179 // Since the SPReg has been decreased by FrameSize, add it back to each 1180 // offset. 1181 if (HasFP) 1182 BuildMI(MBB, MBBI, dl, StoreInst) 1183 .addReg(FPReg) 1184 .addImm(FrameSize + FPOffset) 1185 .addReg(SPReg); 1186 if (FI->usesPICBase()) 1187 BuildMI(MBB, MBBI, dl, StoreInst) 1188 .addReg(PPC::R30) 1189 .addImm(FrameSize + PBPOffset) 1190 .addReg(SPReg); 1191 if (HasBP) { 1192 BuildMI(MBB, MBBI, dl, StoreInst) 1193 .addReg(BPReg) 1194 .addImm(FrameSize + BPOffset) 1195 .addReg(SPReg); 1196 BuildMI(MBB, MBBI, dl, TII.get(PPC::ADDI), BPReg) 1197 .addReg(SPReg) 1198 .addImm(FrameSize); 1199 } 1200 } 1201 } 1202 1203 // Add Call Frame Information for the instructions we generated above. 1204 if (needsCFI) { 1205 unsigned CFIIndex; 1206 1207 if (HasBP) { 1208 // Define CFA in terms of BP. Do this in preference to using FP/SP, 1209 // because if the stack needed aligning then CFA won't be at a fixed 1210 // offset from FP/SP. 1211 unsigned Reg = MRI->getDwarfRegNum(BPReg, true); 1212 CFIIndex = MF.addFrameInst( 1213 MCCFIInstruction::createDefCfaRegister(nullptr, Reg)); 1214 } else { 1215 // Adjust the definition of CFA to account for the change in SP. 1216 assert(NegFrameSize); 1217 CFIIndex = MF.addFrameInst( 1218 MCCFIInstruction::cfiDefCfaOffset(nullptr, -NegFrameSize)); 1219 } 1220 BuildMI(MBB, MBBI, dl, TII.get(TargetOpcode::CFI_INSTRUCTION)) 1221 .addCFIIndex(CFIIndex); 1222 1223 if (HasFP) { 1224 // Describe where FP was saved, at a fixed offset from CFA. 1225 unsigned Reg = MRI->getDwarfRegNum(FPReg, true); 1226 CFIIndex = MF.addFrameInst( 1227 MCCFIInstruction::createOffset(nullptr, Reg, FPOffset)); 1228 BuildMI(MBB, MBBI, dl, TII.get(TargetOpcode::CFI_INSTRUCTION)) 1229 .addCFIIndex(CFIIndex); 1230 } 1231 1232 if (FI->usesPICBase()) { 1233 // Describe where FP was saved, at a fixed offset from CFA. 1234 unsigned Reg = MRI->getDwarfRegNum(PPC::R30, true); 1235 CFIIndex = MF.addFrameInst( 1236 MCCFIInstruction::createOffset(nullptr, Reg, PBPOffset)); 1237 BuildMI(MBB, MBBI, dl, TII.get(TargetOpcode::CFI_INSTRUCTION)) 1238 .addCFIIndex(CFIIndex); 1239 } 1240 1241 if (HasBP) { 1242 // Describe where BP was saved, at a fixed offset from CFA. 1243 unsigned Reg = MRI->getDwarfRegNum(BPReg, true); 1244 CFIIndex = MF.addFrameInst( 1245 MCCFIInstruction::createOffset(nullptr, Reg, BPOffset)); 1246 BuildMI(MBB, MBBI, dl, TII.get(TargetOpcode::CFI_INSTRUCTION)) 1247 .addCFIIndex(CFIIndex); 1248 } 1249 1250 if (MustSaveLR) { 1251 // Describe where LR was saved, at a fixed offset from CFA. 1252 unsigned Reg = MRI->getDwarfRegNum(LRReg, true); 1253 CFIIndex = MF.addFrameInst( 1254 MCCFIInstruction::createOffset(nullptr, Reg, LROffset)); 1255 BuildMI(MBB, MBBI, dl, TII.get(TargetOpcode::CFI_INSTRUCTION)) 1256 .addCFIIndex(CFIIndex); 1257 } 1258 } 1259 1260 // If there is a frame pointer, copy R1 into R31 1261 if (HasFP) { 1262 BuildMI(MBB, MBBI, dl, OrInst, FPReg) 1263 .addReg(SPReg) 1264 .addReg(SPReg); 1265 1266 if (!HasBP && needsCFI) { 1267 // Change the definition of CFA from SP+offset to FP+offset, because SP 1268 // will change at every alloca. 1269 unsigned Reg = MRI->getDwarfRegNum(FPReg, true); 1270 unsigned CFIIndex = MF.addFrameInst( 1271 MCCFIInstruction::createDefCfaRegister(nullptr, Reg)); 1272 1273 BuildMI(MBB, MBBI, dl, TII.get(TargetOpcode::CFI_INSTRUCTION)) 1274 .addCFIIndex(CFIIndex); 1275 } 1276 } 1277 1278 if (needsCFI) { 1279 // Describe where callee saved registers were saved, at fixed offsets from 1280 // CFA. 1281 const std::vector<CalleeSavedInfo> &CSI = MFI.getCalleeSavedInfo(); 1282 for (unsigned I = 0, E = CSI.size(); I != E; ++I) { 1283 unsigned Reg = CSI[I].getReg(); 1284 if (Reg == PPC::LR || Reg == PPC::LR8 || Reg == PPC::RM) continue; 1285 1286 // This is a bit of a hack: CR2LT, CR2GT, CR2EQ and CR2UN are just 1287 // subregisters of CR2. We just need to emit a move of CR2. 1288 if (PPC::CRBITRCRegClass.contains(Reg)) 1289 continue; 1290 1291 if ((Reg == PPC::X2 || Reg == PPC::R2) && MustSaveTOC) 1292 continue; 1293 1294 // For SVR4, don't emit a move for the CR spill slot if we haven't 1295 // spilled CRs. 1296 if (isSVR4ABI && (PPC::CR2 <= Reg && Reg <= PPC::CR4) 1297 && !MustSaveCR) 1298 continue; 1299 1300 // For 64-bit SVR4 when we have spilled CRs, the spill location 1301 // is SP+8, not a frame-relative slot. 1302 if (isSVR4ABI && isPPC64 && (PPC::CR2 <= Reg && Reg <= PPC::CR4)) { 1303 // In the ELFv1 ABI, only CR2 is noted in CFI and stands in for 1304 // the whole CR word. In the ELFv2 ABI, every CR that was 1305 // actually saved gets its own CFI record. 1306 unsigned CRReg = isELFv2ABI? Reg : (unsigned) PPC::CR2; 1307 unsigned CFIIndex = MF.addFrameInst(MCCFIInstruction::createOffset( 1308 nullptr, MRI->getDwarfRegNum(CRReg, true), CRSaveOffset)); 1309 BuildMI(MBB, MBBI, dl, TII.get(TargetOpcode::CFI_INSTRUCTION)) 1310 .addCFIIndex(CFIIndex); 1311 continue; 1312 } 1313 1314 if (CSI[I].isSpilledToReg()) { 1315 unsigned SpilledReg = CSI[I].getDstReg(); 1316 unsigned CFIRegister = MF.addFrameInst(MCCFIInstruction::createRegister( 1317 nullptr, MRI->getDwarfRegNum(Reg, true), 1318 MRI->getDwarfRegNum(SpilledReg, true))); 1319 BuildMI(MBB, MBBI, dl, TII.get(TargetOpcode::CFI_INSTRUCTION)) 1320 .addCFIIndex(CFIRegister); 1321 } else { 1322 int Offset = MFI.getObjectOffset(CSI[I].getFrameIdx()); 1323 // We have changed the object offset above but we do not want to change 1324 // the actual offsets in the CFI instruction so we have to undo the 1325 // offset change here. 1326 if (MovingStackUpdateDown) 1327 Offset -= NegFrameSize; 1328 1329 unsigned CFIIndex = MF.addFrameInst(MCCFIInstruction::createOffset( 1330 nullptr, MRI->getDwarfRegNum(Reg, true), Offset)); 1331 BuildMI(MBB, MBBI, dl, TII.get(TargetOpcode::CFI_INSTRUCTION)) 1332 .addCFIIndex(CFIIndex); 1333 } 1334 } 1335 } 1336 } 1337 1338 void PPCFrameLowering::emitEpilogue(MachineFunction &MF, 1339 MachineBasicBlock &MBB) const { 1340 MachineBasicBlock::iterator MBBI = MBB.getFirstTerminator(); 1341 DebugLoc dl; 1342 1343 if (MBBI != MBB.end()) 1344 dl = MBBI->getDebugLoc(); 1345 1346 const PPCInstrInfo &TII = *Subtarget.getInstrInfo(); 1347 const PPCRegisterInfo *RegInfo = Subtarget.getRegisterInfo(); 1348 1349 // Get alignment info so we know how to restore the SP. 1350 const MachineFrameInfo &MFI = MF.getFrameInfo(); 1351 1352 // Get the number of bytes allocated from the FrameInfo. 1353 int FrameSize = MFI.getStackSize(); 1354 1355 // Get processor type. 1356 bool isPPC64 = Subtarget.isPPC64(); 1357 // Get the ABI. 1358 bool isSVR4ABI = Subtarget.isSVR4ABI(); 1359 1360 // Check if the link register (LR) has been saved. 1361 PPCFunctionInfo *FI = MF.getInfo<PPCFunctionInfo>(); 1362 bool MustSaveLR = FI->mustSaveLR(); 1363 const SmallVectorImpl<Register> &MustSaveCRs = FI->getMustSaveCRs(); 1364 bool MustSaveCR = !MustSaveCRs.empty(); 1365 // Do we have a frame pointer and/or base pointer for this function? 1366 bool HasFP = hasFP(MF); 1367 bool HasBP = RegInfo->hasBasePointer(MF); 1368 bool HasRedZone = Subtarget.isPPC64() || !Subtarget.isSVR4ABI(); 1369 1370 Register SPReg = isPPC64 ? PPC::X1 : PPC::R1; 1371 Register BPReg = RegInfo->getBaseRegister(MF); 1372 Register FPReg = isPPC64 ? PPC::X31 : PPC::R31; 1373 Register ScratchReg; 1374 Register TempReg = isPPC64 ? PPC::X12 : PPC::R12; // another scratch reg 1375 const MCInstrDesc& MTLRInst = TII.get( isPPC64 ? PPC::MTLR8 1376 : PPC::MTLR ); 1377 const MCInstrDesc& LoadInst = TII.get( isPPC64 ? PPC::LD 1378 : PPC::LWZ ); 1379 const MCInstrDesc& LoadImmShiftedInst = TII.get( isPPC64 ? PPC::LIS8 1380 : PPC::LIS ); 1381 const MCInstrDesc& OrInst = TII.get(isPPC64 ? PPC::OR8 1382 : PPC::OR ); 1383 const MCInstrDesc& OrImmInst = TII.get( isPPC64 ? PPC::ORI8 1384 : PPC::ORI ); 1385 const MCInstrDesc& AddImmInst = TII.get( isPPC64 ? PPC::ADDI8 1386 : PPC::ADDI ); 1387 const MCInstrDesc& AddInst = TII.get( isPPC64 ? PPC::ADD8 1388 : PPC::ADD4 ); 1389 const MCInstrDesc& LoadWordInst = TII.get( isPPC64 ? PPC::LWZ8 1390 : PPC::LWZ); 1391 const MCInstrDesc& MoveToCRInst = TII.get( isPPC64 ? PPC::MTOCRF8 1392 : PPC::MTOCRF); 1393 int LROffset = getReturnSaveOffset(); 1394 1395 int FPOffset = 0; 1396 1397 // Using the same bool variable as below to suppress compiler warnings. 1398 bool SingleScratchReg = findScratchRegister(&MBB, true, false, &ScratchReg, 1399 &TempReg); 1400 assert(SingleScratchReg && 1401 "Could not find an available scratch register"); 1402 1403 SingleScratchReg = ScratchReg == TempReg; 1404 1405 if (HasFP) { 1406 if (isSVR4ABI) { 1407 int FPIndex = FI->getFramePointerSaveIndex(); 1408 assert(FPIndex && "No Frame Pointer Save Slot!"); 1409 FPOffset = MFI.getObjectOffset(FPIndex); 1410 } else { 1411 FPOffset = getFramePointerSaveOffset(); 1412 } 1413 } 1414 1415 int BPOffset = 0; 1416 if (HasBP) { 1417 if (isSVR4ABI) { 1418 int BPIndex = FI->getBasePointerSaveIndex(); 1419 assert(BPIndex && "No Base Pointer Save Slot!"); 1420 BPOffset = MFI.getObjectOffset(BPIndex); 1421 } else { 1422 BPOffset = getBasePointerSaveOffset(); 1423 } 1424 } 1425 1426 int PBPOffset = 0; 1427 if (FI->usesPICBase()) { 1428 int PBPIndex = FI->getPICBasePointerSaveIndex(); 1429 assert(PBPIndex && "No PIC Base Pointer Save Slot!"); 1430 PBPOffset = MFI.getObjectOffset(PBPIndex); 1431 } 1432 1433 bool IsReturnBlock = (MBBI != MBB.end() && MBBI->isReturn()); 1434 1435 if (IsReturnBlock) { 1436 unsigned RetOpcode = MBBI->getOpcode(); 1437 bool UsesTCRet = RetOpcode == PPC::TCRETURNri || 1438 RetOpcode == PPC::TCRETURNdi || 1439 RetOpcode == PPC::TCRETURNai || 1440 RetOpcode == PPC::TCRETURNri8 || 1441 RetOpcode == PPC::TCRETURNdi8 || 1442 RetOpcode == PPC::TCRETURNai8; 1443 1444 if (UsesTCRet) { 1445 int MaxTCRetDelta = FI->getTailCallSPDelta(); 1446 MachineOperand &StackAdjust = MBBI->getOperand(1); 1447 assert(StackAdjust.isImm() && "Expecting immediate value."); 1448 // Adjust stack pointer. 1449 int StackAdj = StackAdjust.getImm(); 1450 int Delta = StackAdj - MaxTCRetDelta; 1451 assert((Delta >= 0) && "Delta must be positive"); 1452 if (MaxTCRetDelta>0) 1453 FrameSize += (StackAdj +Delta); 1454 else 1455 FrameSize += StackAdj; 1456 } 1457 } 1458 1459 // Frames of 32KB & larger require special handling because they cannot be 1460 // indexed into with a simple LD/LWZ immediate offset operand. 1461 bool isLargeFrame = !isInt<16>(FrameSize); 1462 1463 // On targets without red zone, the SP needs to be restored last, so that 1464 // all live contents of the stack frame are upwards of the SP. This means 1465 // that we cannot restore SP just now, since there may be more registers 1466 // to restore from the stack frame (e.g. R31). If the frame size is not 1467 // a simple immediate value, we will need a spare register to hold the 1468 // restored SP. If the frame size is known and small, we can simply adjust 1469 // the offsets of the registers to be restored, and still use SP to restore 1470 // them. In such case, the final update of SP will be to add the frame 1471 // size to it. 1472 // To simplify the code, set RBReg to the base register used to restore 1473 // values from the stack, and set SPAdd to the value that needs to be added 1474 // to the SP at the end. The default values are as if red zone was present. 1475 unsigned RBReg = SPReg; 1476 unsigned SPAdd = 0; 1477 1478 // Check if we can move the stack update instruction up the epilogue 1479 // past the callee saves. This will allow the move to LR instruction 1480 // to be executed before the restores of the callee saves which means 1481 // that the callee saves can hide the latency from the MTLR instrcution. 1482 MachineBasicBlock::iterator StackUpdateLoc = MBBI; 1483 if (stackUpdateCanBeMoved(MF)) { 1484 const std::vector<CalleeSavedInfo> & Info = MFI.getCalleeSavedInfo(); 1485 for (CalleeSavedInfo CSI : Info) { 1486 int FrIdx = CSI.getFrameIdx(); 1487 // If the frame index is not negative the callee saved info belongs to a 1488 // stack object that is not a fixed stack object. We ignore non-fixed 1489 // stack objects because we won't move the update of the stack pointer 1490 // past them. 1491 if (FrIdx >= 0) 1492 continue; 1493 1494 if (MFI.isFixedObjectIndex(FrIdx) && MFI.getObjectOffset(FrIdx) < 0) 1495 StackUpdateLoc--; 1496 else { 1497 // Abort the operation as we can't update all CSR restores. 1498 StackUpdateLoc = MBBI; 1499 break; 1500 } 1501 } 1502 } 1503 1504 if (FrameSize) { 1505 // In the prologue, the loaded (or persistent) stack pointer value is 1506 // offset by the STDU/STDUX/STWU/STWUX instruction. For targets with red 1507 // zone add this offset back now. 1508 1509 // If this function contained a fastcc call and GuaranteedTailCallOpt is 1510 // enabled (=> hasFastCall()==true) the fastcc call might contain a tail 1511 // call which invalidates the stack pointer value in SP(0). So we use the 1512 // value of R31 in this case. 1513 if (FI->hasFastCall()) { 1514 assert(HasFP && "Expecting a valid frame pointer."); 1515 if (!HasRedZone) 1516 RBReg = FPReg; 1517 if (!isLargeFrame) { 1518 BuildMI(MBB, MBBI, dl, AddImmInst, RBReg) 1519 .addReg(FPReg).addImm(FrameSize); 1520 } else { 1521 BuildMI(MBB, MBBI, dl, LoadImmShiftedInst, ScratchReg) 1522 .addImm(FrameSize >> 16); 1523 BuildMI(MBB, MBBI, dl, OrImmInst, ScratchReg) 1524 .addReg(ScratchReg, RegState::Kill) 1525 .addImm(FrameSize & 0xFFFF); 1526 BuildMI(MBB, MBBI, dl, AddInst) 1527 .addReg(RBReg) 1528 .addReg(FPReg) 1529 .addReg(ScratchReg); 1530 } 1531 } else if (!isLargeFrame && !HasBP && !MFI.hasVarSizedObjects()) { 1532 if (HasRedZone) { 1533 BuildMI(MBB, StackUpdateLoc, dl, AddImmInst, SPReg) 1534 .addReg(SPReg) 1535 .addImm(FrameSize); 1536 } else { 1537 // Make sure that adding FrameSize will not overflow the max offset 1538 // size. 1539 assert(FPOffset <= 0 && BPOffset <= 0 && PBPOffset <= 0 && 1540 "Local offsets should be negative"); 1541 SPAdd = FrameSize; 1542 FPOffset += FrameSize; 1543 BPOffset += FrameSize; 1544 PBPOffset += FrameSize; 1545 } 1546 } else { 1547 // We don't want to use ScratchReg as a base register, because it 1548 // could happen to be R0. Use FP instead, but make sure to preserve it. 1549 if (!HasRedZone) { 1550 // If FP is not saved, copy it to ScratchReg. 1551 if (!HasFP) 1552 BuildMI(MBB, MBBI, dl, OrInst, ScratchReg) 1553 .addReg(FPReg) 1554 .addReg(FPReg); 1555 RBReg = FPReg; 1556 } 1557 BuildMI(MBB, StackUpdateLoc, dl, LoadInst, RBReg) 1558 .addImm(0) 1559 .addReg(SPReg); 1560 } 1561 } 1562 assert(RBReg != ScratchReg && "Should have avoided ScratchReg"); 1563 // If there is no red zone, ScratchReg may be needed for holding a useful 1564 // value (although not the base register). Make sure it is not overwritten 1565 // too early. 1566 1567 // If we need to restore both the LR and the CR and we only have one 1568 // available scratch register, we must do them one at a time. 1569 if (MustSaveCR && SingleScratchReg && MustSaveLR) { 1570 // Here TempReg == ScratchReg, and in the absence of red zone ScratchReg 1571 // is live here. 1572 assert(HasRedZone && "Expecting red zone"); 1573 BuildMI(MBB, MBBI, dl, LoadWordInst, TempReg) 1574 .addImm(CRSaveOffset) 1575 .addReg(SPReg); 1576 for (unsigned i = 0, e = MustSaveCRs.size(); i != e; ++i) 1577 BuildMI(MBB, MBBI, dl, MoveToCRInst, MustSaveCRs[i]) 1578 .addReg(TempReg, getKillRegState(i == e-1)); 1579 } 1580 1581 // Delay restoring of the LR if ScratchReg is needed. This is ok, since 1582 // LR is stored in the caller's stack frame. ScratchReg will be needed 1583 // if RBReg is anything other than SP. We shouldn't use ScratchReg as 1584 // a base register anyway, because it may happen to be R0. 1585 bool LoadedLR = false; 1586 if (MustSaveLR && RBReg == SPReg && isInt<16>(LROffset+SPAdd)) { 1587 BuildMI(MBB, StackUpdateLoc, dl, LoadInst, ScratchReg) 1588 .addImm(LROffset+SPAdd) 1589 .addReg(RBReg); 1590 LoadedLR = true; 1591 } 1592 1593 if (MustSaveCR && !(SingleScratchReg && MustSaveLR)) { 1594 assert(RBReg == SPReg && "Should be using SP as a base register"); 1595 BuildMI(MBB, MBBI, dl, LoadWordInst, TempReg) 1596 .addImm(CRSaveOffset) 1597 .addReg(RBReg); 1598 } 1599 1600 if (HasFP) { 1601 // If there is red zone, restore FP directly, since SP has already been 1602 // restored. Otherwise, restore the value of FP into ScratchReg. 1603 if (HasRedZone || RBReg == SPReg) 1604 BuildMI(MBB, MBBI, dl, LoadInst, FPReg) 1605 .addImm(FPOffset) 1606 .addReg(SPReg); 1607 else 1608 BuildMI(MBB, MBBI, dl, LoadInst, ScratchReg) 1609 .addImm(FPOffset) 1610 .addReg(RBReg); 1611 } 1612 1613 if (FI->usesPICBase()) 1614 BuildMI(MBB, MBBI, dl, LoadInst, PPC::R30) 1615 .addImm(PBPOffset) 1616 .addReg(RBReg); 1617 1618 if (HasBP) 1619 BuildMI(MBB, MBBI, dl, LoadInst, BPReg) 1620 .addImm(BPOffset) 1621 .addReg(RBReg); 1622 1623 // There is nothing more to be loaded from the stack, so now we can 1624 // restore SP: SP = RBReg + SPAdd. 1625 if (RBReg != SPReg || SPAdd != 0) { 1626 assert(!HasRedZone && "This should not happen with red zone"); 1627 // If SPAdd is 0, generate a copy. 1628 if (SPAdd == 0) 1629 BuildMI(MBB, MBBI, dl, OrInst, SPReg) 1630 .addReg(RBReg) 1631 .addReg(RBReg); 1632 else 1633 BuildMI(MBB, MBBI, dl, AddImmInst, SPReg) 1634 .addReg(RBReg) 1635 .addImm(SPAdd); 1636 1637 assert(RBReg != ScratchReg && "Should be using FP or SP as base register"); 1638 if (RBReg == FPReg) 1639 BuildMI(MBB, MBBI, dl, OrInst, FPReg) 1640 .addReg(ScratchReg) 1641 .addReg(ScratchReg); 1642 1643 // Now load the LR from the caller's stack frame. 1644 if (MustSaveLR && !LoadedLR) 1645 BuildMI(MBB, MBBI, dl, LoadInst, ScratchReg) 1646 .addImm(LROffset) 1647 .addReg(SPReg); 1648 } 1649 1650 if (MustSaveCR && 1651 !(SingleScratchReg && MustSaveLR)) 1652 for (unsigned i = 0, e = MustSaveCRs.size(); i != e; ++i) 1653 BuildMI(MBB, MBBI, dl, MoveToCRInst, MustSaveCRs[i]) 1654 .addReg(TempReg, getKillRegState(i == e-1)); 1655 1656 if (MustSaveLR) 1657 BuildMI(MBB, StackUpdateLoc, dl, MTLRInst).addReg(ScratchReg); 1658 1659 // Callee pop calling convention. Pop parameter/linkage area. Used for tail 1660 // call optimization 1661 if (IsReturnBlock) { 1662 unsigned RetOpcode = MBBI->getOpcode(); 1663 if (MF.getTarget().Options.GuaranteedTailCallOpt && 1664 (RetOpcode == PPC::BLR || RetOpcode == PPC::BLR8) && 1665 MF.getFunction().getCallingConv() == CallingConv::Fast) { 1666 PPCFunctionInfo *FI = MF.getInfo<PPCFunctionInfo>(); 1667 unsigned CallerAllocatedAmt = FI->getMinReservedArea(); 1668 1669 if (CallerAllocatedAmt && isInt<16>(CallerAllocatedAmt)) { 1670 BuildMI(MBB, MBBI, dl, AddImmInst, SPReg) 1671 .addReg(SPReg).addImm(CallerAllocatedAmt); 1672 } else { 1673 BuildMI(MBB, MBBI, dl, LoadImmShiftedInst, ScratchReg) 1674 .addImm(CallerAllocatedAmt >> 16); 1675 BuildMI(MBB, MBBI, dl, OrImmInst, ScratchReg) 1676 .addReg(ScratchReg, RegState::Kill) 1677 .addImm(CallerAllocatedAmt & 0xFFFF); 1678 BuildMI(MBB, MBBI, dl, AddInst) 1679 .addReg(SPReg) 1680 .addReg(FPReg) 1681 .addReg(ScratchReg); 1682 } 1683 } else { 1684 createTailCallBranchInstr(MBB); 1685 } 1686 } 1687 } 1688 1689 void PPCFrameLowering::createTailCallBranchInstr(MachineBasicBlock &MBB) const { 1690 MachineBasicBlock::iterator MBBI = MBB.getFirstTerminator(); 1691 1692 // If we got this far a first terminator should exist. 1693 assert(MBBI != MBB.end() && "Failed to find the first terminator."); 1694 1695 DebugLoc dl = MBBI->getDebugLoc(); 1696 const PPCInstrInfo &TII = *Subtarget.getInstrInfo(); 1697 1698 // Create branch instruction for pseudo tail call return instruction. 1699 // The TCRETURNdi variants are direct calls. Valid targets for those are 1700 // MO_GlobalAddress operands as well as MO_ExternalSymbol with PC-Rel 1701 // since we can tail call external functions with PC-Rel (i.e. we don't need 1702 // to worry about different TOC pointers). Some of the external functions will 1703 // be MO_GlobalAddress while others like memcpy for example, are going to 1704 // be MO_ExternalSymbol. 1705 unsigned RetOpcode = MBBI->getOpcode(); 1706 if (RetOpcode == PPC::TCRETURNdi) { 1707 MBBI = MBB.getLastNonDebugInstr(); 1708 MachineOperand &JumpTarget = MBBI->getOperand(0); 1709 if (JumpTarget.isGlobal()) 1710 BuildMI(MBB, MBBI, dl, TII.get(PPC::TAILB)). 1711 addGlobalAddress(JumpTarget.getGlobal(), JumpTarget.getOffset()); 1712 else if (JumpTarget.isSymbol()) 1713 BuildMI(MBB, MBBI, dl, TII.get(PPC::TAILB)). 1714 addExternalSymbol(JumpTarget.getSymbolName()); 1715 else 1716 llvm_unreachable("Expecting Global or External Symbol"); 1717 } else if (RetOpcode == PPC::TCRETURNri) { 1718 MBBI = MBB.getLastNonDebugInstr(); 1719 assert(MBBI->getOperand(0).isReg() && "Expecting register operand."); 1720 BuildMI(MBB, MBBI, dl, TII.get(PPC::TAILBCTR)); 1721 } else if (RetOpcode == PPC::TCRETURNai) { 1722 MBBI = MBB.getLastNonDebugInstr(); 1723 MachineOperand &JumpTarget = MBBI->getOperand(0); 1724 BuildMI(MBB, MBBI, dl, TII.get(PPC::TAILBA)).addImm(JumpTarget.getImm()); 1725 } else if (RetOpcode == PPC::TCRETURNdi8) { 1726 MBBI = MBB.getLastNonDebugInstr(); 1727 MachineOperand &JumpTarget = MBBI->getOperand(0); 1728 if (JumpTarget.isGlobal()) 1729 BuildMI(MBB, MBBI, dl, TII.get(PPC::TAILB8)). 1730 addGlobalAddress(JumpTarget.getGlobal(), JumpTarget.getOffset()); 1731 else if (JumpTarget.isSymbol()) 1732 BuildMI(MBB, MBBI, dl, TII.get(PPC::TAILB8)). 1733 addExternalSymbol(JumpTarget.getSymbolName()); 1734 else 1735 llvm_unreachable("Expecting Global or External Symbol"); 1736 } else if (RetOpcode == PPC::TCRETURNri8) { 1737 MBBI = MBB.getLastNonDebugInstr(); 1738 assert(MBBI->getOperand(0).isReg() && "Expecting register operand."); 1739 BuildMI(MBB, MBBI, dl, TII.get(PPC::TAILBCTR8)); 1740 } else if (RetOpcode == PPC::TCRETURNai8) { 1741 MBBI = MBB.getLastNonDebugInstr(); 1742 MachineOperand &JumpTarget = MBBI->getOperand(0); 1743 BuildMI(MBB, MBBI, dl, TII.get(PPC::TAILBA8)).addImm(JumpTarget.getImm()); 1744 } 1745 } 1746 1747 void PPCFrameLowering::determineCalleeSaves(MachineFunction &MF, 1748 BitVector &SavedRegs, 1749 RegScavenger *RS) const { 1750 TargetFrameLowering::determineCalleeSaves(MF, SavedRegs, RS); 1751 1752 const PPCRegisterInfo *RegInfo = Subtarget.getRegisterInfo(); 1753 1754 // Save and clear the LR state. 1755 PPCFunctionInfo *FI = MF.getInfo<PPCFunctionInfo>(); 1756 unsigned LR = RegInfo->getRARegister(); 1757 FI->setMustSaveLR(MustSaveLR(MF, LR)); 1758 SavedRegs.reset(LR); 1759 1760 // Save R31 if necessary 1761 int FPSI = FI->getFramePointerSaveIndex(); 1762 const bool isPPC64 = Subtarget.isPPC64(); 1763 MachineFrameInfo &MFI = MF.getFrameInfo(); 1764 1765 // If the frame pointer save index hasn't been defined yet. 1766 if (!FPSI && needsFP(MF)) { 1767 // Find out what the fix offset of the frame pointer save area. 1768 int FPOffset = getFramePointerSaveOffset(); 1769 // Allocate the frame index for frame pointer save area. 1770 FPSI = MFI.CreateFixedObject(isPPC64? 8 : 4, FPOffset, true); 1771 // Save the result. 1772 FI->setFramePointerSaveIndex(FPSI); 1773 } 1774 1775 int BPSI = FI->getBasePointerSaveIndex(); 1776 if (!BPSI && RegInfo->hasBasePointer(MF)) { 1777 int BPOffset = getBasePointerSaveOffset(); 1778 // Allocate the frame index for the base pointer save area. 1779 BPSI = MFI.CreateFixedObject(isPPC64? 8 : 4, BPOffset, true); 1780 // Save the result. 1781 FI->setBasePointerSaveIndex(BPSI); 1782 } 1783 1784 // Reserve stack space for the PIC Base register (R30). 1785 // Only used in SVR4 32-bit. 1786 if (FI->usesPICBase()) { 1787 int PBPSI = MFI.CreateFixedObject(4, -8, true); 1788 FI->setPICBasePointerSaveIndex(PBPSI); 1789 } 1790 1791 // Make sure we don't explicitly spill r31, because, for example, we have 1792 // some inline asm which explicitly clobbers it, when we otherwise have a 1793 // frame pointer and are using r31's spill slot for the prologue/epilogue 1794 // code. Same goes for the base pointer and the PIC base register. 1795 if (needsFP(MF)) 1796 SavedRegs.reset(isPPC64 ? PPC::X31 : PPC::R31); 1797 if (RegInfo->hasBasePointer(MF)) 1798 SavedRegs.reset(RegInfo->getBaseRegister(MF)); 1799 if (FI->usesPICBase()) 1800 SavedRegs.reset(PPC::R30); 1801 1802 // Reserve stack space to move the linkage area to in case of a tail call. 1803 int TCSPDelta = 0; 1804 if (MF.getTarget().Options.GuaranteedTailCallOpt && 1805 (TCSPDelta = FI->getTailCallSPDelta()) < 0) { 1806 MFI.CreateFixedObject(-1 * TCSPDelta, TCSPDelta, true); 1807 } 1808 1809 // Allocate the nonvolatile CR spill slot iff the function uses CR 2, 3, or 4. 1810 // For 64-bit SVR4, and all flavors of AIX we create a FixedStack 1811 // object at the offset of the CR-save slot in the linkage area. The actual 1812 // save and restore of the condition register will be created as part of the 1813 // prologue and epilogue insertion, but the FixedStack object is needed to 1814 // keep the CalleSavedInfo valid. 1815 if ((SavedRegs.test(PPC::CR2) || SavedRegs.test(PPC::CR3) || 1816 SavedRegs.test(PPC::CR4))) { 1817 const uint64_t SpillSize = 4; // Condition register is always 4 bytes. 1818 const int64_t SpillOffset = 1819 Subtarget.isPPC64() ? 8 : Subtarget.isAIXABI() ? 4 : -4; 1820 int FrameIdx = 1821 MFI.CreateFixedObject(SpillSize, SpillOffset, 1822 /* IsImmutable */ true, /* IsAliased */ false); 1823 FI->setCRSpillFrameIndex(FrameIdx); 1824 } 1825 } 1826 1827 void PPCFrameLowering::processFunctionBeforeFrameFinalized(MachineFunction &MF, 1828 RegScavenger *RS) const { 1829 // Get callee saved register information. 1830 MachineFrameInfo &MFI = MF.getFrameInfo(); 1831 const std::vector<CalleeSavedInfo> &CSI = MFI.getCalleeSavedInfo(); 1832 1833 // If the function is shrink-wrapped, and if the function has a tail call, the 1834 // tail call might not be in the new RestoreBlock, so real branch instruction 1835 // won't be generated by emitEpilogue(), because shrink-wrap has chosen new 1836 // RestoreBlock. So we handle this case here. 1837 if (MFI.getSavePoint() && MFI.hasTailCall()) { 1838 MachineBasicBlock *RestoreBlock = MFI.getRestorePoint(); 1839 for (MachineBasicBlock &MBB : MF) { 1840 if (MBB.isReturnBlock() && (&MBB) != RestoreBlock) 1841 createTailCallBranchInstr(MBB); 1842 } 1843 } 1844 1845 // Early exit if no callee saved registers are modified! 1846 if (CSI.empty() && !needsFP(MF)) { 1847 addScavengingSpillSlot(MF, RS); 1848 return; 1849 } 1850 1851 unsigned MinGPR = PPC::R31; 1852 unsigned MinG8R = PPC::X31; 1853 unsigned MinFPR = PPC::F31; 1854 unsigned MinVR = Subtarget.hasSPE() ? PPC::S31 : PPC::V31; 1855 1856 bool HasGPSaveArea = false; 1857 bool HasG8SaveArea = false; 1858 bool HasFPSaveArea = false; 1859 bool HasVRSAVESaveArea = false; 1860 bool HasVRSaveArea = false; 1861 1862 SmallVector<CalleeSavedInfo, 18> GPRegs; 1863 SmallVector<CalleeSavedInfo, 18> G8Regs; 1864 SmallVector<CalleeSavedInfo, 18> FPRegs; 1865 SmallVector<CalleeSavedInfo, 18> VRegs; 1866 1867 for (unsigned i = 0, e = CSI.size(); i != e; ++i) { 1868 unsigned Reg = CSI[i].getReg(); 1869 assert((!MF.getInfo<PPCFunctionInfo>()->mustSaveTOC() || 1870 (Reg != PPC::X2 && Reg != PPC::R2)) && 1871 "Not expecting to try to spill R2 in a function that must save TOC"); 1872 if (PPC::GPRCRegClass.contains(Reg)) { 1873 HasGPSaveArea = true; 1874 1875 GPRegs.push_back(CSI[i]); 1876 1877 if (Reg < MinGPR) { 1878 MinGPR = Reg; 1879 } 1880 } else if (PPC::G8RCRegClass.contains(Reg)) { 1881 HasG8SaveArea = true; 1882 1883 G8Regs.push_back(CSI[i]); 1884 1885 if (Reg < MinG8R) { 1886 MinG8R = Reg; 1887 } 1888 } else if (PPC::F8RCRegClass.contains(Reg)) { 1889 HasFPSaveArea = true; 1890 1891 FPRegs.push_back(CSI[i]); 1892 1893 if (Reg < MinFPR) { 1894 MinFPR = Reg; 1895 } 1896 } else if (PPC::CRBITRCRegClass.contains(Reg) || 1897 PPC::CRRCRegClass.contains(Reg)) { 1898 ; // do nothing, as we already know whether CRs are spilled 1899 } else if (PPC::VRSAVERCRegClass.contains(Reg)) { 1900 HasVRSAVESaveArea = true; 1901 } else if (PPC::VRRCRegClass.contains(Reg) || 1902 PPC::SPERCRegClass.contains(Reg)) { 1903 // Altivec and SPE are mutually exclusive, but have the same stack 1904 // alignment requirements, so overload the save area for both cases. 1905 HasVRSaveArea = true; 1906 1907 VRegs.push_back(CSI[i]); 1908 1909 if (Reg < MinVR) { 1910 MinVR = Reg; 1911 } 1912 } else { 1913 llvm_unreachable("Unknown RegisterClass!"); 1914 } 1915 } 1916 1917 PPCFunctionInfo *PFI = MF.getInfo<PPCFunctionInfo>(); 1918 const TargetRegisterInfo *TRI = Subtarget.getRegisterInfo(); 1919 1920 int64_t LowerBound = 0; 1921 1922 // Take into account stack space reserved for tail calls. 1923 int TCSPDelta = 0; 1924 if (MF.getTarget().Options.GuaranteedTailCallOpt && 1925 (TCSPDelta = PFI->getTailCallSPDelta()) < 0) { 1926 LowerBound = TCSPDelta; 1927 } 1928 1929 // The Floating-point register save area is right below the back chain word 1930 // of the previous stack frame. 1931 if (HasFPSaveArea) { 1932 for (unsigned i = 0, e = FPRegs.size(); i != e; ++i) { 1933 int FI = FPRegs[i].getFrameIdx(); 1934 1935 MFI.setObjectOffset(FI, LowerBound + MFI.getObjectOffset(FI)); 1936 } 1937 1938 LowerBound -= (31 - TRI->getEncodingValue(MinFPR) + 1) * 8; 1939 } 1940 1941 // Check whether the frame pointer register is allocated. If so, make sure it 1942 // is spilled to the correct offset. 1943 if (needsFP(MF)) { 1944 int FI = PFI->getFramePointerSaveIndex(); 1945 assert(FI && "No Frame Pointer Save Slot!"); 1946 MFI.setObjectOffset(FI, LowerBound + MFI.getObjectOffset(FI)); 1947 // FP is R31/X31, so no need to update MinGPR/MinG8R. 1948 HasGPSaveArea = true; 1949 } 1950 1951 if (PFI->usesPICBase()) { 1952 int FI = PFI->getPICBasePointerSaveIndex(); 1953 assert(FI && "No PIC Base Pointer Save Slot!"); 1954 MFI.setObjectOffset(FI, LowerBound + MFI.getObjectOffset(FI)); 1955 1956 MinGPR = std::min<unsigned>(MinGPR, PPC::R30); 1957 HasGPSaveArea = true; 1958 } 1959 1960 const PPCRegisterInfo *RegInfo = Subtarget.getRegisterInfo(); 1961 if (RegInfo->hasBasePointer(MF)) { 1962 int FI = PFI->getBasePointerSaveIndex(); 1963 assert(FI && "No Base Pointer Save Slot!"); 1964 MFI.setObjectOffset(FI, LowerBound + MFI.getObjectOffset(FI)); 1965 1966 Register BP = RegInfo->getBaseRegister(MF); 1967 if (PPC::G8RCRegClass.contains(BP)) { 1968 MinG8R = std::min<unsigned>(MinG8R, BP); 1969 HasG8SaveArea = true; 1970 } else if (PPC::GPRCRegClass.contains(BP)) { 1971 MinGPR = std::min<unsigned>(MinGPR, BP); 1972 HasGPSaveArea = true; 1973 } 1974 } 1975 1976 // General register save area starts right below the Floating-point 1977 // register save area. 1978 if (HasGPSaveArea || HasG8SaveArea) { 1979 // Move general register save area spill slots down, taking into account 1980 // the size of the Floating-point register save area. 1981 for (unsigned i = 0, e = GPRegs.size(); i != e; ++i) { 1982 if (!GPRegs[i].isSpilledToReg()) { 1983 int FI = GPRegs[i].getFrameIdx(); 1984 MFI.setObjectOffset(FI, LowerBound + MFI.getObjectOffset(FI)); 1985 } 1986 } 1987 1988 // Move general register save area spill slots down, taking into account 1989 // the size of the Floating-point register save area. 1990 for (unsigned i = 0, e = G8Regs.size(); i != e; ++i) { 1991 if (!G8Regs[i].isSpilledToReg()) { 1992 int FI = G8Regs[i].getFrameIdx(); 1993 MFI.setObjectOffset(FI, LowerBound + MFI.getObjectOffset(FI)); 1994 } 1995 } 1996 1997 unsigned MinReg = 1998 std::min<unsigned>(TRI->getEncodingValue(MinGPR), 1999 TRI->getEncodingValue(MinG8R)); 2000 2001 const unsigned GPRegSize = Subtarget.isPPC64() ? 8 : 4; 2002 LowerBound -= (31 - MinReg + 1) * GPRegSize; 2003 } 2004 2005 // For 32-bit only, the CR save area is below the general register 2006 // save area. For 64-bit SVR4, the CR save area is addressed relative 2007 // to the stack pointer and hence does not need an adjustment here. 2008 // Only CR2 (the first nonvolatile spilled) has an associated frame 2009 // index so that we have a single uniform save area. 2010 if (spillsCR(MF) && Subtarget.is32BitELFABI()) { 2011 // Adjust the frame index of the CR spill slot. 2012 for (const auto &CSInfo : CSI) { 2013 if (CSInfo.getReg() == PPC::CR2) { 2014 int FI = CSInfo.getFrameIdx(); 2015 MFI.setObjectOffset(FI, LowerBound + MFI.getObjectOffset(FI)); 2016 break; 2017 } 2018 } 2019 2020 LowerBound -= 4; // The CR save area is always 4 bytes long. 2021 } 2022 2023 if (HasVRSAVESaveArea) { 2024 // FIXME SVR4: Is it actually possible to have multiple elements in CSI 2025 // which have the VRSAVE register class? 2026 // Adjust the frame index of the VRSAVE spill slot. 2027 for (unsigned i = 0, e = CSI.size(); i != e; ++i) { 2028 unsigned Reg = CSI[i].getReg(); 2029 2030 if (PPC::VRSAVERCRegClass.contains(Reg)) { 2031 int FI = CSI[i].getFrameIdx(); 2032 2033 MFI.setObjectOffset(FI, LowerBound + MFI.getObjectOffset(FI)); 2034 } 2035 } 2036 2037 LowerBound -= 4; // The VRSAVE save area is always 4 bytes long. 2038 } 2039 2040 // Both Altivec and SPE have the same alignment and padding requirements 2041 // within the stack frame. 2042 if (HasVRSaveArea) { 2043 // Insert alignment padding, we need 16-byte alignment. Note: for positive 2044 // number the alignment formula is : y = (x + (n-1)) & (~(n-1)). But since 2045 // we are using negative number here (the stack grows downward). We should 2046 // use formula : y = x & (~(n-1)). Where x is the size before aligning, n 2047 // is the alignment size ( n = 16 here) and y is the size after aligning. 2048 assert(LowerBound <= 0 && "Expect LowerBound have a non-positive value!"); 2049 LowerBound &= ~(15); 2050 2051 for (unsigned i = 0, e = VRegs.size(); i != e; ++i) { 2052 int FI = VRegs[i].getFrameIdx(); 2053 2054 MFI.setObjectOffset(FI, LowerBound + MFI.getObjectOffset(FI)); 2055 } 2056 } 2057 2058 addScavengingSpillSlot(MF, RS); 2059 } 2060 2061 void 2062 PPCFrameLowering::addScavengingSpillSlot(MachineFunction &MF, 2063 RegScavenger *RS) const { 2064 // Reserve a slot closest to SP or frame pointer if we have a dynalloc or 2065 // a large stack, which will require scavenging a register to materialize a 2066 // large offset. 2067 2068 // We need to have a scavenger spill slot for spills if the frame size is 2069 // large. In case there is no free register for large-offset addressing, 2070 // this slot is used for the necessary emergency spill. Also, we need the 2071 // slot for dynamic stack allocations. 2072 2073 // The scavenger might be invoked if the frame offset does not fit into 2074 // the 16-bit immediate. We don't know the complete frame size here 2075 // because we've not yet computed callee-saved register spills or the 2076 // needed alignment padding. 2077 unsigned StackSize = determineFrameLayout(MF, true); 2078 MachineFrameInfo &MFI = MF.getFrameInfo(); 2079 if (MFI.hasVarSizedObjects() || spillsCR(MF) || spillsVRSAVE(MF) || 2080 hasNonRISpills(MF) || (hasSpills(MF) && !isInt<16>(StackSize))) { 2081 const TargetRegisterClass &GPRC = PPC::GPRCRegClass; 2082 const TargetRegisterClass &G8RC = PPC::G8RCRegClass; 2083 const TargetRegisterClass &RC = Subtarget.isPPC64() ? G8RC : GPRC; 2084 const TargetRegisterInfo &TRI = *Subtarget.getRegisterInfo(); 2085 unsigned Size = TRI.getSpillSize(RC); 2086 Align Alignment = TRI.getSpillAlign(RC); 2087 RS->addScavengingFrameIndex(MFI.CreateStackObject(Size, Alignment, false)); 2088 2089 // Might we have over-aligned allocas? 2090 bool HasAlVars = 2091 MFI.hasVarSizedObjects() && MFI.getMaxAlign() > getStackAlign(); 2092 2093 // These kinds of spills might need two registers. 2094 if (spillsCR(MF) || spillsVRSAVE(MF) || HasAlVars) 2095 RS->addScavengingFrameIndex( 2096 MFI.CreateStackObject(Size, Alignment, false)); 2097 } 2098 } 2099 2100 // This function checks if a callee saved gpr can be spilled to a volatile 2101 // vector register. This occurs for leaf functions when the option 2102 // ppc-enable-pe-vector-spills is enabled. If there are any remaining registers 2103 // which were not spilled to vectors, return false so the target independent 2104 // code can handle them by assigning a FrameIdx to a stack slot. 2105 bool PPCFrameLowering::assignCalleeSavedSpillSlots( 2106 MachineFunction &MF, const TargetRegisterInfo *TRI, 2107 std::vector<CalleeSavedInfo> &CSI) const { 2108 2109 if (CSI.empty()) 2110 return true; // Early exit if no callee saved registers are modified! 2111 2112 // Early exit if cannot spill gprs to volatile vector registers. 2113 MachineFrameInfo &MFI = MF.getFrameInfo(); 2114 if (!EnablePEVectorSpills || MFI.hasCalls() || !Subtarget.hasP9Vector()) 2115 return false; 2116 2117 // Build a BitVector of VSRs that can be used for spilling GPRs. 2118 BitVector BVAllocatable = TRI->getAllocatableSet(MF); 2119 BitVector BVCalleeSaved(TRI->getNumRegs()); 2120 const PPCRegisterInfo *RegInfo = Subtarget.getRegisterInfo(); 2121 const MCPhysReg *CSRegs = RegInfo->getCalleeSavedRegs(&MF); 2122 for (unsigned i = 0; CSRegs[i]; ++i) 2123 BVCalleeSaved.set(CSRegs[i]); 2124 2125 for (unsigned Reg : BVAllocatable.set_bits()) { 2126 // Set to 0 if the register is not a volatile VF/F8 register, or if it is 2127 // used in the function. 2128 if (BVCalleeSaved[Reg] || 2129 (!PPC::F8RCRegClass.contains(Reg) && 2130 !PPC::VFRCRegClass.contains(Reg)) || 2131 (MF.getRegInfo().isPhysRegUsed(Reg))) 2132 BVAllocatable.reset(Reg); 2133 } 2134 2135 bool AllSpilledToReg = true; 2136 for (auto &CS : CSI) { 2137 if (BVAllocatable.none()) 2138 return false; 2139 2140 unsigned Reg = CS.getReg(); 2141 if (!PPC::G8RCRegClass.contains(Reg) && !PPC::GPRCRegClass.contains(Reg)) { 2142 AllSpilledToReg = false; 2143 continue; 2144 } 2145 2146 unsigned VolatileVFReg = BVAllocatable.find_first(); 2147 if (VolatileVFReg < BVAllocatable.size()) { 2148 CS.setDstReg(VolatileVFReg); 2149 BVAllocatable.reset(VolatileVFReg); 2150 } else { 2151 AllSpilledToReg = false; 2152 } 2153 } 2154 return AllSpilledToReg; 2155 } 2156 2157 bool PPCFrameLowering::spillCalleeSavedRegisters( 2158 MachineBasicBlock &MBB, MachineBasicBlock::iterator MI, 2159 ArrayRef<CalleeSavedInfo> CSI, const TargetRegisterInfo *TRI) const { 2160 2161 MachineFunction *MF = MBB.getParent(); 2162 const PPCInstrInfo &TII = *Subtarget.getInstrInfo(); 2163 PPCFunctionInfo *FI = MF->getInfo<PPCFunctionInfo>(); 2164 bool MustSaveTOC = FI->mustSaveTOC(); 2165 DebugLoc DL; 2166 bool CRSpilled = false; 2167 MachineInstrBuilder CRMIB; 2168 2169 for (unsigned i = 0, e = CSI.size(); i != e; ++i) { 2170 unsigned Reg = CSI[i].getReg(); 2171 // VRSAVE can appear here if, for example, @llvm.eh.unwind.init() is used. 2172 if (Reg == PPC::VRSAVE) 2173 continue; 2174 2175 // CR2 through CR4 are the nonvolatile CR fields. 2176 bool IsCRField = PPC::CR2 <= Reg && Reg <= PPC::CR4; 2177 2178 // Add the callee-saved register as live-in; it's killed at the spill. 2179 // Do not do this for callee-saved registers that are live-in to the 2180 // function because they will already be marked live-in and this will be 2181 // adding it for a second time. It is an error to add the same register 2182 // to the set more than once. 2183 const MachineRegisterInfo &MRI = MF->getRegInfo(); 2184 bool IsLiveIn = MRI.isLiveIn(Reg); 2185 if (!IsLiveIn) 2186 MBB.addLiveIn(Reg); 2187 2188 if (CRSpilled && IsCRField) { 2189 CRMIB.addReg(Reg, RegState::ImplicitKill); 2190 continue; 2191 } 2192 2193 // The actual spill will happen in the prologue. 2194 if ((Reg == PPC::X2 || Reg == PPC::R2) && MustSaveTOC) 2195 continue; 2196 2197 // Insert the spill to the stack frame. 2198 if (IsCRField) { 2199 PPCFunctionInfo *FuncInfo = MF->getInfo<PPCFunctionInfo>(); 2200 if (!Subtarget.is32BitELFABI()) { 2201 // The actual spill will happen at the start of the prologue. 2202 FuncInfo->addMustSaveCR(Reg); 2203 } else { 2204 CRSpilled = true; 2205 FuncInfo->setSpillsCR(); 2206 2207 // 32-bit: FP-relative. Note that we made sure CR2-CR4 all have 2208 // the same frame index in PPCRegisterInfo::hasReservedSpillSlot. 2209 CRMIB = BuildMI(*MF, DL, TII.get(PPC::MFCR), PPC::R12) 2210 .addReg(Reg, RegState::ImplicitKill); 2211 2212 MBB.insert(MI, CRMIB); 2213 MBB.insert(MI, addFrameReference(BuildMI(*MF, DL, TII.get(PPC::STW)) 2214 .addReg(PPC::R12, 2215 getKillRegState(true)), 2216 CSI[i].getFrameIdx())); 2217 } 2218 } else { 2219 if (CSI[i].isSpilledToReg()) { 2220 NumPESpillVSR++; 2221 BuildMI(MBB, MI, DL, TII.get(PPC::MTVSRD), CSI[i].getDstReg()) 2222 .addReg(Reg, getKillRegState(true)); 2223 } else { 2224 const TargetRegisterClass *RC = TRI->getMinimalPhysRegClass(Reg); 2225 // Use !IsLiveIn for the kill flag. 2226 // We do not want to kill registers that are live in this function 2227 // before their use because they will become undefined registers. 2228 // Functions without NoUnwind need to preserve the order of elements in 2229 // saved vector registers. 2230 if (Subtarget.needsSwapsForVSXMemOps() && 2231 !MF->getFunction().hasFnAttribute(Attribute::NoUnwind)) 2232 TII.storeRegToStackSlotNoUpd(MBB, MI, Reg, !IsLiveIn, 2233 CSI[i].getFrameIdx(), RC, TRI); 2234 else 2235 TII.storeRegToStackSlot(MBB, MI, Reg, !IsLiveIn, CSI[i].getFrameIdx(), 2236 RC, TRI); 2237 } 2238 } 2239 } 2240 return true; 2241 } 2242 2243 static void restoreCRs(bool is31, bool CR2Spilled, bool CR3Spilled, 2244 bool CR4Spilled, MachineBasicBlock &MBB, 2245 MachineBasicBlock::iterator MI, 2246 ArrayRef<CalleeSavedInfo> CSI, unsigned CSIIndex) { 2247 2248 MachineFunction *MF = MBB.getParent(); 2249 const PPCInstrInfo &TII = *MF->getSubtarget<PPCSubtarget>().getInstrInfo(); 2250 DebugLoc DL; 2251 unsigned MoveReg = PPC::R12; 2252 2253 // 32-bit: FP-relative 2254 MBB.insert(MI, 2255 addFrameReference(BuildMI(*MF, DL, TII.get(PPC::LWZ), MoveReg), 2256 CSI[CSIIndex].getFrameIdx())); 2257 2258 unsigned RestoreOp = PPC::MTOCRF; 2259 if (CR2Spilled) 2260 MBB.insert(MI, BuildMI(*MF, DL, TII.get(RestoreOp), PPC::CR2) 2261 .addReg(MoveReg, getKillRegState(!CR3Spilled && !CR4Spilled))); 2262 2263 if (CR3Spilled) 2264 MBB.insert(MI, BuildMI(*MF, DL, TII.get(RestoreOp), PPC::CR3) 2265 .addReg(MoveReg, getKillRegState(!CR4Spilled))); 2266 2267 if (CR4Spilled) 2268 MBB.insert(MI, BuildMI(*MF, DL, TII.get(RestoreOp), PPC::CR4) 2269 .addReg(MoveReg, getKillRegState(true))); 2270 } 2271 2272 MachineBasicBlock::iterator PPCFrameLowering:: 2273 eliminateCallFramePseudoInstr(MachineFunction &MF, MachineBasicBlock &MBB, 2274 MachineBasicBlock::iterator I) const { 2275 const TargetInstrInfo &TII = *Subtarget.getInstrInfo(); 2276 if (MF.getTarget().Options.GuaranteedTailCallOpt && 2277 I->getOpcode() == PPC::ADJCALLSTACKUP) { 2278 // Add (actually subtract) back the amount the callee popped on return. 2279 if (int CalleeAmt = I->getOperand(1).getImm()) { 2280 bool is64Bit = Subtarget.isPPC64(); 2281 CalleeAmt *= -1; 2282 unsigned StackReg = is64Bit ? PPC::X1 : PPC::R1; 2283 unsigned TmpReg = is64Bit ? PPC::X0 : PPC::R0; 2284 unsigned ADDIInstr = is64Bit ? PPC::ADDI8 : PPC::ADDI; 2285 unsigned ADDInstr = is64Bit ? PPC::ADD8 : PPC::ADD4; 2286 unsigned LISInstr = is64Bit ? PPC::LIS8 : PPC::LIS; 2287 unsigned ORIInstr = is64Bit ? PPC::ORI8 : PPC::ORI; 2288 const DebugLoc &dl = I->getDebugLoc(); 2289 2290 if (isInt<16>(CalleeAmt)) { 2291 BuildMI(MBB, I, dl, TII.get(ADDIInstr), StackReg) 2292 .addReg(StackReg, RegState::Kill) 2293 .addImm(CalleeAmt); 2294 } else { 2295 MachineBasicBlock::iterator MBBI = I; 2296 BuildMI(MBB, MBBI, dl, TII.get(LISInstr), TmpReg) 2297 .addImm(CalleeAmt >> 16); 2298 BuildMI(MBB, MBBI, dl, TII.get(ORIInstr), TmpReg) 2299 .addReg(TmpReg, RegState::Kill) 2300 .addImm(CalleeAmt & 0xFFFF); 2301 BuildMI(MBB, MBBI, dl, TII.get(ADDInstr), StackReg) 2302 .addReg(StackReg, RegState::Kill) 2303 .addReg(TmpReg); 2304 } 2305 } 2306 } 2307 // Simply discard ADJCALLSTACKDOWN, ADJCALLSTACKUP instructions. 2308 return MBB.erase(I); 2309 } 2310 2311 static bool isCalleeSavedCR(unsigned Reg) { 2312 return PPC::CR2 == Reg || Reg == PPC::CR3 || Reg == PPC::CR4; 2313 } 2314 2315 bool PPCFrameLowering::restoreCalleeSavedRegisters( 2316 MachineBasicBlock &MBB, MachineBasicBlock::iterator MI, 2317 MutableArrayRef<CalleeSavedInfo> CSI, const TargetRegisterInfo *TRI) const { 2318 MachineFunction *MF = MBB.getParent(); 2319 const PPCInstrInfo &TII = *Subtarget.getInstrInfo(); 2320 PPCFunctionInfo *FI = MF->getInfo<PPCFunctionInfo>(); 2321 bool MustSaveTOC = FI->mustSaveTOC(); 2322 bool CR2Spilled = false; 2323 bool CR3Spilled = false; 2324 bool CR4Spilled = false; 2325 unsigned CSIIndex = 0; 2326 2327 // Initialize insertion-point logic; we will be restoring in reverse 2328 // order of spill. 2329 MachineBasicBlock::iterator I = MI, BeforeI = I; 2330 bool AtStart = I == MBB.begin(); 2331 2332 if (!AtStart) 2333 --BeforeI; 2334 2335 for (unsigned i = 0, e = CSI.size(); i != e; ++i) { 2336 unsigned Reg = CSI[i].getReg(); 2337 2338 // VRSAVE can appear here if, for example, @llvm.eh.unwind.init() is used. 2339 if (Reg == PPC::VRSAVE) 2340 continue; 2341 2342 if ((Reg == PPC::X2 || Reg == PPC::R2) && MustSaveTOC) 2343 continue; 2344 2345 // Restore of callee saved condition register field is handled during 2346 // epilogue insertion. 2347 if (isCalleeSavedCR(Reg) && !Subtarget.is32BitELFABI()) 2348 continue; 2349 2350 if (Reg == PPC::CR2) { 2351 CR2Spilled = true; 2352 // The spill slot is associated only with CR2, which is the 2353 // first nonvolatile spilled. Save it here. 2354 CSIIndex = i; 2355 continue; 2356 } else if (Reg == PPC::CR3) { 2357 CR3Spilled = true; 2358 continue; 2359 } else if (Reg == PPC::CR4) { 2360 CR4Spilled = true; 2361 continue; 2362 } else { 2363 // On 32-bit ELF when we first encounter a non-CR register after seeing at 2364 // least one CR register, restore all spilled CRs together. 2365 if (CR2Spilled || CR3Spilled || CR4Spilled) { 2366 bool is31 = needsFP(*MF); 2367 restoreCRs(is31, CR2Spilled, CR3Spilled, CR4Spilled, MBB, I, CSI, 2368 CSIIndex); 2369 CR2Spilled = CR3Spilled = CR4Spilled = false; 2370 } 2371 2372 if (CSI[i].isSpilledToReg()) { 2373 DebugLoc DL; 2374 NumPEReloadVSR++; 2375 BuildMI(MBB, I, DL, TII.get(PPC::MFVSRD), Reg) 2376 .addReg(CSI[i].getDstReg(), getKillRegState(true)); 2377 } else { 2378 // Default behavior for non-CR saves. 2379 const TargetRegisterClass *RC = TRI->getMinimalPhysRegClass(Reg); 2380 2381 // Functions without NoUnwind need to preserve the order of elements in 2382 // saved vector registers. 2383 if (Subtarget.needsSwapsForVSXMemOps() && 2384 !MF->getFunction().hasFnAttribute(Attribute::NoUnwind)) 2385 TII.loadRegFromStackSlotNoUpd(MBB, I, Reg, CSI[i].getFrameIdx(), RC, 2386 TRI); 2387 else 2388 TII.loadRegFromStackSlot(MBB, I, Reg, CSI[i].getFrameIdx(), RC, TRI); 2389 2390 assert(I != MBB.begin() && 2391 "loadRegFromStackSlot didn't insert any code!"); 2392 } 2393 } 2394 2395 // Insert in reverse order. 2396 if (AtStart) 2397 I = MBB.begin(); 2398 else { 2399 I = BeforeI; 2400 ++I; 2401 } 2402 } 2403 2404 // If we haven't yet spilled the CRs, do so now. 2405 if (CR2Spilled || CR3Spilled || CR4Spilled) { 2406 assert(Subtarget.is32BitELFABI() && 2407 "Only set CR[2|3|4]Spilled on 32-bit SVR4."); 2408 bool is31 = needsFP(*MF); 2409 restoreCRs(is31, CR2Spilled, CR3Spilled, CR4Spilled, MBB, I, CSI, CSIIndex); 2410 } 2411 2412 return true; 2413 } 2414 2415 unsigned PPCFrameLowering::getTOCSaveOffset() const { 2416 return TOCSaveOffset; 2417 } 2418 2419 unsigned PPCFrameLowering::getFramePointerSaveOffset() const { 2420 return FramePointerSaveOffset; 2421 } 2422 2423 unsigned PPCFrameLowering::getBasePointerSaveOffset() const { 2424 return BasePointerSaveOffset; 2425 } 2426 2427 bool PPCFrameLowering::enableShrinkWrapping(const MachineFunction &MF) const { 2428 if (MF.getInfo<PPCFunctionInfo>()->shrinkWrapDisabled()) 2429 return false; 2430 return (MF.getSubtarget<PPCSubtarget>().isSVR4ABI() && 2431 MF.getSubtarget<PPCSubtarget>().isPPC64()); 2432 } 2433