1 //===-- PPCFrameLowering.cpp - PPC Frame Information ----------------------===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 // 9 // This file contains the PPC implementation of TargetFrameLowering class. 10 // 11 //===----------------------------------------------------------------------===// 12 13 #include "PPCFrameLowering.h" 14 #include "PPCInstrBuilder.h" 15 #include "PPCInstrInfo.h" 16 #include "PPCMachineFunctionInfo.h" 17 #include "PPCSubtarget.h" 18 #include "PPCTargetMachine.h" 19 #include "llvm/ADT/Statistic.h" 20 #include "llvm/CodeGen/MachineFrameInfo.h" 21 #include "llvm/CodeGen/MachineFunction.h" 22 #include "llvm/CodeGen/MachineInstrBuilder.h" 23 #include "llvm/CodeGen/MachineModuleInfo.h" 24 #include "llvm/CodeGen/MachineRegisterInfo.h" 25 #include "llvm/CodeGen/RegisterScavenging.h" 26 #include "llvm/IR/Function.h" 27 #include "llvm/Target/TargetOptions.h" 28 29 using namespace llvm; 30 31 #define DEBUG_TYPE "framelowering" 32 STATISTIC(NumPESpillVSR, "Number of spills to vector in prologue"); 33 STATISTIC(NumPEReloadVSR, "Number of reloads from vector in epilogue"); 34 35 static cl::opt<bool> 36 EnablePEVectorSpills("ppc-enable-pe-vector-spills", 37 cl::desc("Enable spills in prologue to vector registers."), 38 cl::init(false), cl::Hidden); 39 40 /// VRRegNo - Map from a numbered VR register to its enum value. 41 /// 42 static const MCPhysReg VRRegNo[] = { 43 PPC::V0 , PPC::V1 , PPC::V2 , PPC::V3 , PPC::V4 , PPC::V5 , PPC::V6 , PPC::V7 , 44 PPC::V8 , PPC::V9 , PPC::V10, PPC::V11, PPC::V12, PPC::V13, PPC::V14, PPC::V15, 45 PPC::V16, PPC::V17, PPC::V18, PPC::V19, PPC::V20, PPC::V21, PPC::V22, PPC::V23, 46 PPC::V24, PPC::V25, PPC::V26, PPC::V27, PPC::V28, PPC::V29, PPC::V30, PPC::V31 47 }; 48 49 static unsigned computeReturnSaveOffset(const PPCSubtarget &STI) { 50 if (STI.isAIXABI()) 51 return STI.isPPC64() ? 16 : 8; 52 // SVR4 ABI: 53 return STI.isPPC64() ? 16 : 4; 54 } 55 56 static unsigned computeTOCSaveOffset(const PPCSubtarget &STI) { 57 if (STI.isAIXABI()) 58 return STI.isPPC64() ? 40 : 20; 59 return STI.isELFv2ABI() ? 24 : 40; 60 } 61 62 static unsigned computeFramePointerSaveOffset(const PPCSubtarget &STI) { 63 // First slot in the general register save area. 64 return STI.isPPC64() ? -8U : -4U; 65 } 66 67 static unsigned computeLinkageSize(const PPCSubtarget &STI) { 68 if (STI.isAIXABI() || STI.isPPC64()) 69 return (STI.isELFv2ABI() ? 4 : 6) * (STI.isPPC64() ? 8 : 4); 70 71 // 32-bit SVR4 ABI: 72 return 8; 73 } 74 75 static unsigned computeBasePointerSaveOffset(const PPCSubtarget &STI) { 76 // SVR4 ABI: First slot in the general register save area. 77 return STI.isPPC64() 78 ? -16U 79 : STI.getTargetMachine().isPositionIndependent() ? -12U : -8U; 80 } 81 82 static unsigned computeCRSaveOffset(const PPCSubtarget &STI) { 83 return (STI.isAIXABI() && !STI.isPPC64()) ? 4 : 8; 84 } 85 86 PPCFrameLowering::PPCFrameLowering(const PPCSubtarget &STI) 87 : TargetFrameLowering(TargetFrameLowering::StackGrowsDown, 88 STI.getPlatformStackAlignment(), 0), 89 Subtarget(STI), ReturnSaveOffset(computeReturnSaveOffset(Subtarget)), 90 TOCSaveOffset(computeTOCSaveOffset(Subtarget)), 91 FramePointerSaveOffset(computeFramePointerSaveOffset(Subtarget)), 92 LinkageSize(computeLinkageSize(Subtarget)), 93 BasePointerSaveOffset(computeBasePointerSaveOffset(Subtarget)), 94 CRSaveOffset(computeCRSaveOffset(Subtarget)) {} 95 96 // With the SVR4 ABI, callee-saved registers have fixed offsets on the stack. 97 const PPCFrameLowering::SpillSlot *PPCFrameLowering::getCalleeSavedSpillSlots( 98 unsigned &NumEntries) const { 99 100 // Floating-point register save area offsets. 101 #define CALLEE_SAVED_FPRS \ 102 {PPC::F31, -8}, \ 103 {PPC::F30, -16}, \ 104 {PPC::F29, -24}, \ 105 {PPC::F28, -32}, \ 106 {PPC::F27, -40}, \ 107 {PPC::F26, -48}, \ 108 {PPC::F25, -56}, \ 109 {PPC::F24, -64}, \ 110 {PPC::F23, -72}, \ 111 {PPC::F22, -80}, \ 112 {PPC::F21, -88}, \ 113 {PPC::F20, -96}, \ 114 {PPC::F19, -104}, \ 115 {PPC::F18, -112}, \ 116 {PPC::F17, -120}, \ 117 {PPC::F16, -128}, \ 118 {PPC::F15, -136}, \ 119 {PPC::F14, -144} 120 121 // 32-bit general purpose register save area offsets shared by ELF and 122 // AIX. AIX has an extra CSR with r13. 123 #define CALLEE_SAVED_GPRS32 \ 124 {PPC::R31, -4}, \ 125 {PPC::R30, -8}, \ 126 {PPC::R29, -12}, \ 127 {PPC::R28, -16}, \ 128 {PPC::R27, -20}, \ 129 {PPC::R26, -24}, \ 130 {PPC::R25, -28}, \ 131 {PPC::R24, -32}, \ 132 {PPC::R23, -36}, \ 133 {PPC::R22, -40}, \ 134 {PPC::R21, -44}, \ 135 {PPC::R20, -48}, \ 136 {PPC::R19, -52}, \ 137 {PPC::R18, -56}, \ 138 {PPC::R17, -60}, \ 139 {PPC::R16, -64}, \ 140 {PPC::R15, -68}, \ 141 {PPC::R14, -72} 142 143 // 64-bit general purpose register save area offsets. 144 #define CALLEE_SAVED_GPRS64 \ 145 {PPC::X31, -8}, \ 146 {PPC::X30, -16}, \ 147 {PPC::X29, -24}, \ 148 {PPC::X28, -32}, \ 149 {PPC::X27, -40}, \ 150 {PPC::X26, -48}, \ 151 {PPC::X25, -56}, \ 152 {PPC::X24, -64}, \ 153 {PPC::X23, -72}, \ 154 {PPC::X22, -80}, \ 155 {PPC::X21, -88}, \ 156 {PPC::X20, -96}, \ 157 {PPC::X19, -104}, \ 158 {PPC::X18, -112}, \ 159 {PPC::X17, -120}, \ 160 {PPC::X16, -128}, \ 161 {PPC::X15, -136}, \ 162 {PPC::X14, -144} 163 164 // Vector register save area offsets. 165 #define CALLEE_SAVED_VRS \ 166 {PPC::V31, -16}, \ 167 {PPC::V30, -32}, \ 168 {PPC::V29, -48}, \ 169 {PPC::V28, -64}, \ 170 {PPC::V27, -80}, \ 171 {PPC::V26, -96}, \ 172 {PPC::V25, -112}, \ 173 {PPC::V24, -128}, \ 174 {PPC::V23, -144}, \ 175 {PPC::V22, -160}, \ 176 {PPC::V21, -176}, \ 177 {PPC::V20, -192} 178 179 // Note that the offsets here overlap, but this is fixed up in 180 // processFunctionBeforeFrameFinalized. 181 182 static const SpillSlot ELFOffsets32[] = { 183 CALLEE_SAVED_FPRS, 184 CALLEE_SAVED_GPRS32, 185 186 // CR save area offset. We map each of the nonvolatile CR fields 187 // to the slot for CR2, which is the first of the nonvolatile CR 188 // fields to be assigned, so that we only allocate one save slot. 189 // See PPCRegisterInfo::hasReservedSpillSlot() for more information. 190 {PPC::CR2, -4}, 191 192 // VRSAVE save area offset. 193 {PPC::VRSAVE, -4}, 194 195 CALLEE_SAVED_VRS, 196 197 // SPE register save area (overlaps Vector save area). 198 {PPC::S31, -8}, 199 {PPC::S30, -16}, 200 {PPC::S29, -24}, 201 {PPC::S28, -32}, 202 {PPC::S27, -40}, 203 {PPC::S26, -48}, 204 {PPC::S25, -56}, 205 {PPC::S24, -64}, 206 {PPC::S23, -72}, 207 {PPC::S22, -80}, 208 {PPC::S21, -88}, 209 {PPC::S20, -96}, 210 {PPC::S19, -104}, 211 {PPC::S18, -112}, 212 {PPC::S17, -120}, 213 {PPC::S16, -128}, 214 {PPC::S15, -136}, 215 {PPC::S14, -144}}; 216 217 static const SpillSlot ELFOffsets64[] = { 218 CALLEE_SAVED_FPRS, 219 CALLEE_SAVED_GPRS64, 220 221 // VRSAVE save area offset. 222 {PPC::VRSAVE, -4}, 223 CALLEE_SAVED_VRS 224 }; 225 226 static const SpillSlot AIXOffsets32[] = { 227 CALLEE_SAVED_FPRS, 228 CALLEE_SAVED_GPRS32, 229 // Add AIX's extra CSR. 230 {PPC::R13, -76}, 231 // TODO Update when we add vector support for AIX. 232 }; 233 234 static const SpillSlot AIXOffsets64[] = { 235 CALLEE_SAVED_FPRS, 236 CALLEE_SAVED_GPRS64, 237 // TODO Update when we add vector support for AIX. 238 }; 239 240 if (Subtarget.is64BitELFABI()) { 241 NumEntries = array_lengthof(ELFOffsets64); 242 return ELFOffsets64; 243 } 244 245 if (Subtarget.is32BitELFABI()) { 246 NumEntries = array_lengthof(ELFOffsets32); 247 return ELFOffsets32; 248 } 249 250 assert(Subtarget.isAIXABI() && "Unexpected ABI."); 251 252 if (Subtarget.isPPC64()) { 253 NumEntries = array_lengthof(AIXOffsets64); 254 return AIXOffsets64; 255 } 256 257 NumEntries = array_lengthof(AIXOffsets32); 258 return AIXOffsets32; 259 } 260 261 /// RemoveVRSaveCode - We have found that this function does not need any code 262 /// to manipulate the VRSAVE register, even though it uses vector registers. 263 /// This can happen when the only registers used are known to be live in or out 264 /// of the function. Remove all of the VRSAVE related code from the function. 265 /// FIXME: The removal of the code results in a compile failure at -O0 when the 266 /// function contains a function call, as the GPR containing original VRSAVE 267 /// contents is spilled and reloaded around the call. Without the prolog code, 268 /// the spill instruction refers to an undefined register. This code needs 269 /// to account for all uses of that GPR. 270 static void RemoveVRSaveCode(MachineInstr &MI) { 271 MachineBasicBlock *Entry = MI.getParent(); 272 MachineFunction *MF = Entry->getParent(); 273 274 // We know that the MTVRSAVE instruction immediately follows MI. Remove it. 275 MachineBasicBlock::iterator MBBI = MI; 276 ++MBBI; 277 assert(MBBI != Entry->end() && MBBI->getOpcode() == PPC::MTVRSAVE); 278 MBBI->eraseFromParent(); 279 280 bool RemovedAllMTVRSAVEs = true; 281 // See if we can find and remove the MTVRSAVE instruction from all of the 282 // epilog blocks. 283 for (MachineFunction::iterator I = MF->begin(), E = MF->end(); I != E; ++I) { 284 // If last instruction is a return instruction, add an epilogue 285 if (I->isReturnBlock()) { 286 bool FoundIt = false; 287 for (MBBI = I->end(); MBBI != I->begin(); ) { 288 --MBBI; 289 if (MBBI->getOpcode() == PPC::MTVRSAVE) { 290 MBBI->eraseFromParent(); // remove it. 291 FoundIt = true; 292 break; 293 } 294 } 295 RemovedAllMTVRSAVEs &= FoundIt; 296 } 297 } 298 299 // If we found and removed all MTVRSAVE instructions, remove the read of 300 // VRSAVE as well. 301 if (RemovedAllMTVRSAVEs) { 302 MBBI = MI; 303 assert(MBBI != Entry->begin() && "UPDATE_VRSAVE is first instr in block?"); 304 --MBBI; 305 assert(MBBI->getOpcode() == PPC::MFVRSAVE && "VRSAVE instrs wandered?"); 306 MBBI->eraseFromParent(); 307 } 308 309 // Finally, nuke the UPDATE_VRSAVE. 310 MI.eraseFromParent(); 311 } 312 313 // HandleVRSaveUpdate - MI is the UPDATE_VRSAVE instruction introduced by the 314 // instruction selector. Based on the vector registers that have been used, 315 // transform this into the appropriate ORI instruction. 316 static void HandleVRSaveUpdate(MachineInstr &MI, const TargetInstrInfo &TII) { 317 MachineFunction *MF = MI.getParent()->getParent(); 318 const TargetRegisterInfo *TRI = MF->getSubtarget().getRegisterInfo(); 319 DebugLoc dl = MI.getDebugLoc(); 320 321 const MachineRegisterInfo &MRI = MF->getRegInfo(); 322 unsigned UsedRegMask = 0; 323 for (unsigned i = 0; i != 32; ++i) 324 if (MRI.isPhysRegModified(VRRegNo[i])) 325 UsedRegMask |= 1 << (31-i); 326 327 // Live in and live out values already must be in the mask, so don't bother 328 // marking them. 329 for (std::pair<unsigned, unsigned> LI : MF->getRegInfo().liveins()) { 330 unsigned RegNo = TRI->getEncodingValue(LI.first); 331 if (VRRegNo[RegNo] == LI.first) // If this really is a vector reg. 332 UsedRegMask &= ~(1 << (31-RegNo)); // Doesn't need to be marked. 333 } 334 335 // Live out registers appear as use operands on return instructions. 336 for (MachineFunction::const_iterator BI = MF->begin(), BE = MF->end(); 337 UsedRegMask != 0 && BI != BE; ++BI) { 338 const MachineBasicBlock &MBB = *BI; 339 if (!MBB.isReturnBlock()) 340 continue; 341 const MachineInstr &Ret = MBB.back(); 342 for (unsigned I = 0, E = Ret.getNumOperands(); I != E; ++I) { 343 const MachineOperand &MO = Ret.getOperand(I); 344 if (!MO.isReg() || !PPC::VRRCRegClass.contains(MO.getReg())) 345 continue; 346 unsigned RegNo = TRI->getEncodingValue(MO.getReg()); 347 UsedRegMask &= ~(1 << (31-RegNo)); 348 } 349 } 350 351 // If no registers are used, turn this into a copy. 352 if (UsedRegMask == 0) { 353 // Remove all VRSAVE code. 354 RemoveVRSaveCode(MI); 355 return; 356 } 357 358 Register SrcReg = MI.getOperand(1).getReg(); 359 Register DstReg = MI.getOperand(0).getReg(); 360 361 if ((UsedRegMask & 0xFFFF) == UsedRegMask) { 362 if (DstReg != SrcReg) 363 BuildMI(*MI.getParent(), MI, dl, TII.get(PPC::ORI), DstReg) 364 .addReg(SrcReg) 365 .addImm(UsedRegMask); 366 else 367 BuildMI(*MI.getParent(), MI, dl, TII.get(PPC::ORI), DstReg) 368 .addReg(SrcReg, RegState::Kill) 369 .addImm(UsedRegMask); 370 } else if ((UsedRegMask & 0xFFFF0000) == UsedRegMask) { 371 if (DstReg != SrcReg) 372 BuildMI(*MI.getParent(), MI, dl, TII.get(PPC::ORIS), DstReg) 373 .addReg(SrcReg) 374 .addImm(UsedRegMask >> 16); 375 else 376 BuildMI(*MI.getParent(), MI, dl, TII.get(PPC::ORIS), DstReg) 377 .addReg(SrcReg, RegState::Kill) 378 .addImm(UsedRegMask >> 16); 379 } else { 380 if (DstReg != SrcReg) 381 BuildMI(*MI.getParent(), MI, dl, TII.get(PPC::ORIS), DstReg) 382 .addReg(SrcReg) 383 .addImm(UsedRegMask >> 16); 384 else 385 BuildMI(*MI.getParent(), MI, dl, TII.get(PPC::ORIS), DstReg) 386 .addReg(SrcReg, RegState::Kill) 387 .addImm(UsedRegMask >> 16); 388 389 BuildMI(*MI.getParent(), MI, dl, TII.get(PPC::ORI), DstReg) 390 .addReg(DstReg, RegState::Kill) 391 .addImm(UsedRegMask & 0xFFFF); 392 } 393 394 // Remove the old UPDATE_VRSAVE instruction. 395 MI.eraseFromParent(); 396 } 397 398 static bool spillsCR(const MachineFunction &MF) { 399 const PPCFunctionInfo *FuncInfo = MF.getInfo<PPCFunctionInfo>(); 400 return FuncInfo->isCRSpilled(); 401 } 402 403 static bool spillsVRSAVE(const MachineFunction &MF) { 404 const PPCFunctionInfo *FuncInfo = MF.getInfo<PPCFunctionInfo>(); 405 return FuncInfo->isVRSAVESpilled(); 406 } 407 408 static bool hasSpills(const MachineFunction &MF) { 409 const PPCFunctionInfo *FuncInfo = MF.getInfo<PPCFunctionInfo>(); 410 return FuncInfo->hasSpills(); 411 } 412 413 static bool hasNonRISpills(const MachineFunction &MF) { 414 const PPCFunctionInfo *FuncInfo = MF.getInfo<PPCFunctionInfo>(); 415 return FuncInfo->hasNonRISpills(); 416 } 417 418 /// MustSaveLR - Return true if this function requires that we save the LR 419 /// register onto the stack in the prolog and restore it in the epilog of the 420 /// function. 421 static bool MustSaveLR(const MachineFunction &MF, unsigned LR) { 422 const PPCFunctionInfo *MFI = MF.getInfo<PPCFunctionInfo>(); 423 424 // We need a save/restore of LR if there is any def of LR (which is 425 // defined by calls, including the PIC setup sequence), or if there is 426 // some use of the LR stack slot (e.g. for builtin_return_address). 427 // (LR comes in 32 and 64 bit versions.) 428 MachineRegisterInfo::def_iterator RI = MF.getRegInfo().def_begin(LR); 429 return RI !=MF.getRegInfo().def_end() || MFI->isLRStoreRequired(); 430 } 431 432 /// determineFrameLayoutAndUpdate - Determine the size of the frame and maximum 433 /// call frame size. Update the MachineFunction object with the stack size. 434 unsigned 435 PPCFrameLowering::determineFrameLayoutAndUpdate(MachineFunction &MF, 436 bool UseEstimate) const { 437 unsigned NewMaxCallFrameSize = 0; 438 unsigned FrameSize = determineFrameLayout(MF, UseEstimate, 439 &NewMaxCallFrameSize); 440 MF.getFrameInfo().setStackSize(FrameSize); 441 MF.getFrameInfo().setMaxCallFrameSize(NewMaxCallFrameSize); 442 return FrameSize; 443 } 444 445 /// determineFrameLayout - Determine the size of the frame and maximum call 446 /// frame size. 447 unsigned 448 PPCFrameLowering::determineFrameLayout(const MachineFunction &MF, 449 bool UseEstimate, 450 unsigned *NewMaxCallFrameSize) const { 451 const MachineFrameInfo &MFI = MF.getFrameInfo(); 452 const PPCFunctionInfo *FI = MF.getInfo<PPCFunctionInfo>(); 453 454 // Get the number of bytes to allocate from the FrameInfo 455 unsigned FrameSize = 456 UseEstimate ? MFI.estimateStackSize(MF) : MFI.getStackSize(); 457 458 // Get stack alignments. The frame must be aligned to the greatest of these: 459 Align TargetAlign = getStackAlign(); // alignment required per the ABI 460 Align MaxAlign = MFI.getMaxAlign(); // algmt required by data in frame 461 Align Alignment = std::max(TargetAlign, MaxAlign); 462 463 const PPCRegisterInfo *RegInfo = Subtarget.getRegisterInfo(); 464 465 unsigned LR = RegInfo->getRARegister(); 466 bool DisableRedZone = MF.getFunction().hasFnAttribute(Attribute::NoRedZone); 467 bool CanUseRedZone = !MFI.hasVarSizedObjects() && // No dynamic alloca. 468 !MFI.adjustsStack() && // No calls. 469 !MustSaveLR(MF, LR) && // No need to save LR. 470 !FI->mustSaveTOC() && // No need to save TOC. 471 !RegInfo->hasBasePointer(MF); // No special alignment. 472 473 // Note: for PPC32 SVR4ABI (Non-DarwinABI), we can still generate stackless 474 // code if all local vars are reg-allocated. 475 bool FitsInRedZone = FrameSize <= Subtarget.getRedZoneSize(); 476 477 // Check whether we can skip adjusting the stack pointer (by using red zone) 478 if (!DisableRedZone && CanUseRedZone && FitsInRedZone) { 479 // No need for frame 480 return 0; 481 } 482 483 // Get the maximum call frame size of all the calls. 484 unsigned maxCallFrameSize = MFI.getMaxCallFrameSize(); 485 486 // Maximum call frame needs to be at least big enough for linkage area. 487 unsigned minCallFrameSize = getLinkageSize(); 488 maxCallFrameSize = std::max(maxCallFrameSize, minCallFrameSize); 489 490 // If we have dynamic alloca then maxCallFrameSize needs to be aligned so 491 // that allocations will be aligned. 492 if (MFI.hasVarSizedObjects()) 493 maxCallFrameSize = alignTo(maxCallFrameSize, Alignment); 494 495 // Update the new max call frame size if the caller passes in a valid pointer. 496 if (NewMaxCallFrameSize) 497 *NewMaxCallFrameSize = maxCallFrameSize; 498 499 // Include call frame size in total. 500 FrameSize += maxCallFrameSize; 501 502 // Make sure the frame is aligned. 503 FrameSize = alignTo(FrameSize, Alignment); 504 505 return FrameSize; 506 } 507 508 // hasFP - Return true if the specified function actually has a dedicated frame 509 // pointer register. 510 bool PPCFrameLowering::hasFP(const MachineFunction &MF) const { 511 const MachineFrameInfo &MFI = MF.getFrameInfo(); 512 // FIXME: This is pretty much broken by design: hasFP() might be called really 513 // early, before the stack layout was calculated and thus hasFP() might return 514 // true or false here depending on the time of call. 515 return (MFI.getStackSize()) && needsFP(MF); 516 } 517 518 // needsFP - Return true if the specified function should have a dedicated frame 519 // pointer register. This is true if the function has variable sized allocas or 520 // if frame pointer elimination is disabled. 521 bool PPCFrameLowering::needsFP(const MachineFunction &MF) const { 522 const MachineFrameInfo &MFI = MF.getFrameInfo(); 523 524 // Naked functions have no stack frame pushed, so we don't have a frame 525 // pointer. 526 if (MF.getFunction().hasFnAttribute(Attribute::Naked)) 527 return false; 528 529 return MF.getTarget().Options.DisableFramePointerElim(MF) || 530 MFI.hasVarSizedObjects() || MFI.hasStackMap() || MFI.hasPatchPoint() || 531 (MF.getTarget().Options.GuaranteedTailCallOpt && 532 MF.getInfo<PPCFunctionInfo>()->hasFastCall()); 533 } 534 535 void PPCFrameLowering::replaceFPWithRealFP(MachineFunction &MF) const { 536 bool is31 = needsFP(MF); 537 unsigned FPReg = is31 ? PPC::R31 : PPC::R1; 538 unsigned FP8Reg = is31 ? PPC::X31 : PPC::X1; 539 540 const PPCRegisterInfo *RegInfo = Subtarget.getRegisterInfo(); 541 bool HasBP = RegInfo->hasBasePointer(MF); 542 unsigned BPReg = HasBP ? (unsigned) RegInfo->getBaseRegister(MF) : FPReg; 543 unsigned BP8Reg = HasBP ? (unsigned) PPC::X30 : FP8Reg; 544 545 for (MachineFunction::iterator BI = MF.begin(), BE = MF.end(); 546 BI != BE; ++BI) 547 for (MachineBasicBlock::iterator MBBI = BI->end(); MBBI != BI->begin(); ) { 548 --MBBI; 549 for (unsigned I = 0, E = MBBI->getNumOperands(); I != E; ++I) { 550 MachineOperand &MO = MBBI->getOperand(I); 551 if (!MO.isReg()) 552 continue; 553 554 switch (MO.getReg()) { 555 case PPC::FP: 556 MO.setReg(FPReg); 557 break; 558 case PPC::FP8: 559 MO.setReg(FP8Reg); 560 break; 561 case PPC::BP: 562 MO.setReg(BPReg); 563 break; 564 case PPC::BP8: 565 MO.setReg(BP8Reg); 566 break; 567 568 } 569 } 570 } 571 } 572 573 /* This function will do the following: 574 - If MBB is an entry or exit block, set SR1 and SR2 to R0 and R12 575 respectively (defaults recommended by the ABI) and return true 576 - If MBB is not an entry block, initialize the register scavenger and look 577 for available registers. 578 - If the defaults (R0/R12) are available, return true 579 - If TwoUniqueRegsRequired is set to true, it looks for two unique 580 registers. Otherwise, look for a single available register. 581 - If the required registers are found, set SR1 and SR2 and return true. 582 - If the required registers are not found, set SR2 or both SR1 and SR2 to 583 PPC::NoRegister and return false. 584 585 Note that if both SR1 and SR2 are valid parameters and TwoUniqueRegsRequired 586 is not set, this function will attempt to find two different registers, but 587 still return true if only one register is available (and set SR1 == SR2). 588 */ 589 bool 590 PPCFrameLowering::findScratchRegister(MachineBasicBlock *MBB, 591 bool UseAtEnd, 592 bool TwoUniqueRegsRequired, 593 Register *SR1, 594 Register *SR2) const { 595 RegScavenger RS; 596 Register R0 = Subtarget.isPPC64() ? PPC::X0 : PPC::R0; 597 Register R12 = Subtarget.isPPC64() ? PPC::X12 : PPC::R12; 598 599 // Set the defaults for the two scratch registers. 600 if (SR1) 601 *SR1 = R0; 602 603 if (SR2) { 604 assert (SR1 && "Asking for the second scratch register but not the first?"); 605 *SR2 = R12; 606 } 607 608 // If MBB is an entry or exit block, use R0 and R12 as the scratch registers. 609 if ((UseAtEnd && MBB->isReturnBlock()) || 610 (!UseAtEnd && (&MBB->getParent()->front() == MBB))) 611 return true; 612 613 RS.enterBasicBlock(*MBB); 614 615 if (UseAtEnd && !MBB->empty()) { 616 // The scratch register will be used at the end of the block, so must 617 // consider all registers used within the block 618 619 MachineBasicBlock::iterator MBBI = MBB->getFirstTerminator(); 620 // If no terminator, back iterator up to previous instruction. 621 if (MBBI == MBB->end()) 622 MBBI = std::prev(MBBI); 623 624 if (MBBI != MBB->begin()) 625 RS.forward(MBBI); 626 } 627 628 // If the two registers are available, we're all good. 629 // Note that we only return here if both R0 and R12 are available because 630 // although the function may not require two unique registers, it may benefit 631 // from having two so we should try to provide them. 632 if (!RS.isRegUsed(R0) && !RS.isRegUsed(R12)) 633 return true; 634 635 // Get the list of callee-saved registers for the target. 636 const PPCRegisterInfo *RegInfo = Subtarget.getRegisterInfo(); 637 const MCPhysReg *CSRegs = RegInfo->getCalleeSavedRegs(MBB->getParent()); 638 639 // Get all the available registers in the block. 640 BitVector BV = RS.getRegsAvailable(Subtarget.isPPC64() ? &PPC::G8RCRegClass : 641 &PPC::GPRCRegClass); 642 643 // We shouldn't use callee-saved registers as scratch registers as they may be 644 // available when looking for a candidate block for shrink wrapping but not 645 // available when the actual prologue/epilogue is being emitted because they 646 // were added as live-in to the prologue block by PrologueEpilogueInserter. 647 for (int i = 0; CSRegs[i]; ++i) 648 BV.reset(CSRegs[i]); 649 650 // Set the first scratch register to the first available one. 651 if (SR1) { 652 int FirstScratchReg = BV.find_first(); 653 *SR1 = FirstScratchReg == -1 ? (unsigned)PPC::NoRegister : FirstScratchReg; 654 } 655 656 // If there is another one available, set the second scratch register to that. 657 // Otherwise, set it to either PPC::NoRegister if this function requires two 658 // or to whatever SR1 is set to if this function doesn't require two. 659 if (SR2) { 660 int SecondScratchReg = BV.find_next(*SR1); 661 if (SecondScratchReg != -1) 662 *SR2 = SecondScratchReg; 663 else 664 *SR2 = TwoUniqueRegsRequired ? Register() : *SR1; 665 } 666 667 // Now that we've done our best to provide both registers, double check 668 // whether we were unable to provide enough. 669 if (BV.count() < (TwoUniqueRegsRequired ? 2U : 1U)) 670 return false; 671 672 return true; 673 } 674 675 // We need a scratch register for spilling LR and for spilling CR. By default, 676 // we use two scratch registers to hide latency. However, if only one scratch 677 // register is available, we can adjust for that by not overlapping the spill 678 // code. However, if we need to realign the stack (i.e. have a base pointer) 679 // and the stack frame is large, we need two scratch registers. 680 bool 681 PPCFrameLowering::twoUniqueScratchRegsRequired(MachineBasicBlock *MBB) const { 682 const PPCRegisterInfo *RegInfo = Subtarget.getRegisterInfo(); 683 MachineFunction &MF = *(MBB->getParent()); 684 bool HasBP = RegInfo->hasBasePointer(MF); 685 unsigned FrameSize = determineFrameLayout(MF); 686 int NegFrameSize = -FrameSize; 687 bool IsLargeFrame = !isInt<16>(NegFrameSize); 688 MachineFrameInfo &MFI = MF.getFrameInfo(); 689 Align MaxAlign = MFI.getMaxAlign(); 690 bool HasRedZone = Subtarget.isPPC64() || !Subtarget.isSVR4ABI(); 691 692 return (IsLargeFrame || !HasRedZone) && HasBP && MaxAlign > 1; 693 } 694 695 bool PPCFrameLowering::canUseAsPrologue(const MachineBasicBlock &MBB) const { 696 MachineBasicBlock *TmpMBB = const_cast<MachineBasicBlock *>(&MBB); 697 698 return findScratchRegister(TmpMBB, false, 699 twoUniqueScratchRegsRequired(TmpMBB)); 700 } 701 702 bool PPCFrameLowering::canUseAsEpilogue(const MachineBasicBlock &MBB) const { 703 MachineBasicBlock *TmpMBB = const_cast<MachineBasicBlock *>(&MBB); 704 705 return findScratchRegister(TmpMBB, true); 706 } 707 708 bool PPCFrameLowering::stackUpdateCanBeMoved(MachineFunction &MF) const { 709 const PPCRegisterInfo *RegInfo = Subtarget.getRegisterInfo(); 710 PPCFunctionInfo *FI = MF.getInfo<PPCFunctionInfo>(); 711 712 // Abort if there is no register info or function info. 713 if (!RegInfo || !FI) 714 return false; 715 716 // Only move the stack update on ELFv2 ABI and PPC64. 717 if (!Subtarget.isELFv2ABI() || !Subtarget.isPPC64()) 718 return false; 719 720 // Check the frame size first and return false if it does not fit the 721 // requirements. 722 // We need a non-zero frame size as well as a frame that will fit in the red 723 // zone. This is because by moving the stack pointer update we are now storing 724 // to the red zone until the stack pointer is updated. If we get an interrupt 725 // inside the prologue but before the stack update we now have a number of 726 // stores to the red zone and those stores must all fit. 727 MachineFrameInfo &MFI = MF.getFrameInfo(); 728 unsigned FrameSize = MFI.getStackSize(); 729 if (!FrameSize || FrameSize > Subtarget.getRedZoneSize()) 730 return false; 731 732 // Frame pointers and base pointers complicate matters so don't do anything 733 // if we have them. For example having a frame pointer will sometimes require 734 // a copy of r1 into r31 and that makes keeping track of updates to r1 more 735 // difficult. 736 if (hasFP(MF) || RegInfo->hasBasePointer(MF)) 737 return false; 738 739 // Calls to fast_cc functions use different rules for passing parameters on 740 // the stack from the ABI and using PIC base in the function imposes 741 // similar restrictions to using the base pointer. It is not generally safe 742 // to move the stack pointer update in these situations. 743 if (FI->hasFastCall() || FI->usesPICBase()) 744 return false; 745 746 // Finally we can move the stack update if we do not require register 747 // scavenging. Register scavenging can introduce more spills and so 748 // may make the frame size larger than we have computed. 749 return !RegInfo->requiresFrameIndexScavenging(MF); 750 } 751 752 void PPCFrameLowering::emitPrologue(MachineFunction &MF, 753 MachineBasicBlock &MBB) const { 754 MachineBasicBlock::iterator MBBI = MBB.begin(); 755 MachineFrameInfo &MFI = MF.getFrameInfo(); 756 const PPCInstrInfo &TII = *Subtarget.getInstrInfo(); 757 const PPCRegisterInfo *RegInfo = Subtarget.getRegisterInfo(); 758 759 MachineModuleInfo &MMI = MF.getMMI(); 760 const MCRegisterInfo *MRI = MMI.getContext().getRegisterInfo(); 761 DebugLoc dl; 762 // AIX assembler does not support cfi directives. 763 const bool needsCFI = MF.needsFrameMoves() && !Subtarget.isAIXABI(); 764 765 // Get processor type. 766 bool isPPC64 = Subtarget.isPPC64(); 767 // Get the ABI. 768 bool isSVR4ABI = Subtarget.isSVR4ABI(); 769 bool isAIXABI = Subtarget.isAIXABI(); 770 bool isELFv2ABI = Subtarget.isELFv2ABI(); 771 assert((isSVR4ABI || isAIXABI) && "Unsupported PPC ABI."); 772 773 // Scan the prolog, looking for an UPDATE_VRSAVE instruction. If we find it, 774 // process it. 775 if (!isSVR4ABI) 776 for (unsigned i = 0; MBBI != MBB.end(); ++i, ++MBBI) { 777 if (MBBI->getOpcode() == PPC::UPDATE_VRSAVE) { 778 if (isAIXABI) 779 report_fatal_error("UPDATE_VRSAVE is unexpected on AIX."); 780 HandleVRSaveUpdate(*MBBI, TII); 781 break; 782 } 783 } 784 785 // Move MBBI back to the beginning of the prologue block. 786 MBBI = MBB.begin(); 787 788 // Work out frame sizes. 789 unsigned FrameSize = determineFrameLayoutAndUpdate(MF); 790 int NegFrameSize = -FrameSize; 791 if (!isInt<32>(NegFrameSize)) 792 llvm_unreachable("Unhandled stack size!"); 793 794 if (MFI.isFrameAddressTaken()) 795 replaceFPWithRealFP(MF); 796 797 // Check if the link register (LR) must be saved. 798 PPCFunctionInfo *FI = MF.getInfo<PPCFunctionInfo>(); 799 bool MustSaveLR = FI->mustSaveLR(); 800 bool MustSaveTOC = FI->mustSaveTOC(); 801 const SmallVectorImpl<Register> &MustSaveCRs = FI->getMustSaveCRs(); 802 bool MustSaveCR = !MustSaveCRs.empty(); 803 // Do we have a frame pointer and/or base pointer for this function? 804 bool HasFP = hasFP(MF); 805 bool HasBP = RegInfo->hasBasePointer(MF); 806 bool HasRedZone = isPPC64 || !isSVR4ABI; 807 808 Register SPReg = isPPC64 ? PPC::X1 : PPC::R1; 809 Register BPReg = RegInfo->getBaseRegister(MF); 810 Register FPReg = isPPC64 ? PPC::X31 : PPC::R31; 811 Register LRReg = isPPC64 ? PPC::LR8 : PPC::LR; 812 Register TOCReg = isPPC64 ? PPC::X2 : PPC::R2; 813 Register ScratchReg; 814 Register TempReg = isPPC64 ? PPC::X12 : PPC::R12; // another scratch reg 815 // ...(R12/X12 is volatile in both Darwin & SVR4, & can't be a function arg.) 816 const MCInstrDesc& MFLRInst = TII.get(isPPC64 ? PPC::MFLR8 817 : PPC::MFLR ); 818 const MCInstrDesc& StoreInst = TII.get(isPPC64 ? PPC::STD 819 : PPC::STW ); 820 const MCInstrDesc& StoreUpdtInst = TII.get(isPPC64 ? PPC::STDU 821 : PPC::STWU ); 822 const MCInstrDesc& StoreUpdtIdxInst = TII.get(isPPC64 ? PPC::STDUX 823 : PPC::STWUX); 824 const MCInstrDesc& LoadImmShiftedInst = TII.get(isPPC64 ? PPC::LIS8 825 : PPC::LIS ); 826 const MCInstrDesc& OrImmInst = TII.get(isPPC64 ? PPC::ORI8 827 : PPC::ORI ); 828 const MCInstrDesc& OrInst = TII.get(isPPC64 ? PPC::OR8 829 : PPC::OR ); 830 const MCInstrDesc& SubtractCarryingInst = TII.get(isPPC64 ? PPC::SUBFC8 831 : PPC::SUBFC); 832 const MCInstrDesc& SubtractImmCarryingInst = TII.get(isPPC64 ? PPC::SUBFIC8 833 : PPC::SUBFIC); 834 const MCInstrDesc &MoveFromCondRegInst = TII.get(isPPC64 ? PPC::MFCR8 835 : PPC::MFCR); 836 const MCInstrDesc &StoreWordInst = TII.get(isPPC64 ? PPC::STW8 : PPC::STW); 837 838 // Regarding this assert: Even though LR is saved in the caller's frame (i.e., 839 // LROffset is positive), that slot is callee-owned. Because PPC32 SVR4 has no 840 // Red Zone, an asynchronous event (a form of "callee") could claim a frame & 841 // overwrite it, so PPC32 SVR4 must claim at least a minimal frame to save LR. 842 assert((isPPC64 || !isSVR4ABI || !(!FrameSize && (MustSaveLR || HasFP))) && 843 "FrameSize must be >0 to save/restore the FP or LR for 32-bit SVR4."); 844 845 // Using the same bool variable as below to suppress compiler warnings. 846 bool SingleScratchReg = 847 findScratchRegister(&MBB, false, twoUniqueScratchRegsRequired(&MBB), 848 &ScratchReg, &TempReg); 849 assert(SingleScratchReg && 850 "Required number of registers not available in this block"); 851 852 SingleScratchReg = ScratchReg == TempReg; 853 854 int LROffset = getReturnSaveOffset(); 855 856 int FPOffset = 0; 857 if (HasFP) { 858 if (isSVR4ABI) { 859 MachineFrameInfo &MFI = MF.getFrameInfo(); 860 int FPIndex = FI->getFramePointerSaveIndex(); 861 assert(FPIndex && "No Frame Pointer Save Slot!"); 862 FPOffset = MFI.getObjectOffset(FPIndex); 863 } else { 864 FPOffset = getFramePointerSaveOffset(); 865 } 866 } 867 868 int BPOffset = 0; 869 if (HasBP) { 870 if (isSVR4ABI) { 871 MachineFrameInfo &MFI = MF.getFrameInfo(); 872 int BPIndex = FI->getBasePointerSaveIndex(); 873 assert(BPIndex && "No Base Pointer Save Slot!"); 874 BPOffset = MFI.getObjectOffset(BPIndex); 875 } else { 876 BPOffset = getBasePointerSaveOffset(); 877 } 878 } 879 880 int PBPOffset = 0; 881 if (FI->usesPICBase()) { 882 MachineFrameInfo &MFI = MF.getFrameInfo(); 883 int PBPIndex = FI->getPICBasePointerSaveIndex(); 884 assert(PBPIndex && "No PIC Base Pointer Save Slot!"); 885 PBPOffset = MFI.getObjectOffset(PBPIndex); 886 } 887 888 // Get stack alignments. 889 Align MaxAlign = MFI.getMaxAlign(); 890 if (HasBP && MaxAlign > 1) 891 assert(Log2(MaxAlign) < 16 && "Invalid alignment!"); 892 893 // Frames of 32KB & larger require special handling because they cannot be 894 // indexed into with a simple STDU/STWU/STD/STW immediate offset operand. 895 bool isLargeFrame = !isInt<16>(NegFrameSize); 896 897 // Check if we can move the stack update instruction (stdu) down the prologue 898 // past the callee saves. Hopefully this will avoid the situation where the 899 // saves are waiting for the update on the store with update to complete. 900 MachineBasicBlock::iterator StackUpdateLoc = MBBI; 901 bool MovingStackUpdateDown = false; 902 903 // Check if we can move the stack update. 904 if (stackUpdateCanBeMoved(MF)) { 905 const std::vector<CalleeSavedInfo> &Info = MFI.getCalleeSavedInfo(); 906 for (CalleeSavedInfo CSI : Info) { 907 int FrIdx = CSI.getFrameIdx(); 908 // If the frame index is not negative the callee saved info belongs to a 909 // stack object that is not a fixed stack object. We ignore non-fixed 910 // stack objects because we won't move the stack update pointer past them. 911 if (FrIdx >= 0) 912 continue; 913 914 if (MFI.isFixedObjectIndex(FrIdx) && MFI.getObjectOffset(FrIdx) < 0) { 915 StackUpdateLoc++; 916 MovingStackUpdateDown = true; 917 } else { 918 // We need all of the Frame Indices to meet these conditions. 919 // If they do not, abort the whole operation. 920 StackUpdateLoc = MBBI; 921 MovingStackUpdateDown = false; 922 break; 923 } 924 } 925 926 // If the operation was not aborted then update the object offset. 927 if (MovingStackUpdateDown) { 928 for (CalleeSavedInfo CSI : Info) { 929 int FrIdx = CSI.getFrameIdx(); 930 if (FrIdx < 0) 931 MFI.setObjectOffset(FrIdx, MFI.getObjectOffset(FrIdx) + NegFrameSize); 932 } 933 } 934 } 935 936 // Where in the prologue we move the CR fields depends on how many scratch 937 // registers we have, and if we need to save the link register or not. This 938 // lambda is to avoid duplicating the logic in 2 places. 939 auto BuildMoveFromCR = [&]() { 940 if (isELFv2ABI && MustSaveCRs.size() == 1) { 941 // In the ELFv2 ABI, we are not required to save all CR fields. 942 // If only one CR field is clobbered, it is more efficient to use 943 // mfocrf to selectively save just that field, because mfocrf has short 944 // latency compares to mfcr. 945 assert(isPPC64 && "V2 ABI is 64-bit only."); 946 MachineInstrBuilder MIB = 947 BuildMI(MBB, MBBI, dl, TII.get(PPC::MFOCRF8), TempReg); 948 MIB.addReg(MustSaveCRs[0], RegState::Kill); 949 } else { 950 MachineInstrBuilder MIB = 951 BuildMI(MBB, MBBI, dl, MoveFromCondRegInst, TempReg); 952 for (unsigned CRfield : MustSaveCRs) 953 MIB.addReg(CRfield, RegState::ImplicitKill); 954 } 955 }; 956 957 // If we need to spill the CR and the LR but we don't have two separate 958 // registers available, we must spill them one at a time 959 if (MustSaveCR && SingleScratchReg && MustSaveLR) { 960 BuildMoveFromCR(); 961 BuildMI(MBB, MBBI, dl, StoreWordInst) 962 .addReg(TempReg, getKillRegState(true)) 963 .addImm(CRSaveOffset) 964 .addReg(SPReg); 965 } 966 967 if (MustSaveLR) 968 BuildMI(MBB, MBBI, dl, MFLRInst, ScratchReg); 969 970 if (MustSaveCR && !(SingleScratchReg && MustSaveLR)) 971 BuildMoveFromCR(); 972 973 if (HasRedZone) { 974 if (HasFP) 975 BuildMI(MBB, MBBI, dl, StoreInst) 976 .addReg(FPReg) 977 .addImm(FPOffset) 978 .addReg(SPReg); 979 if (FI->usesPICBase()) 980 BuildMI(MBB, MBBI, dl, StoreInst) 981 .addReg(PPC::R30) 982 .addImm(PBPOffset) 983 .addReg(SPReg); 984 if (HasBP) 985 BuildMI(MBB, MBBI, dl, StoreInst) 986 .addReg(BPReg) 987 .addImm(BPOffset) 988 .addReg(SPReg); 989 } 990 991 if (MustSaveLR) 992 BuildMI(MBB, StackUpdateLoc, dl, StoreInst) 993 .addReg(ScratchReg, getKillRegState(true)) 994 .addImm(LROffset) 995 .addReg(SPReg); 996 997 if (MustSaveCR && 998 !(SingleScratchReg && MustSaveLR)) { 999 assert(HasRedZone && "A red zone is always available on PPC64"); 1000 BuildMI(MBB, MBBI, dl, StoreWordInst) 1001 .addReg(TempReg, getKillRegState(true)) 1002 .addImm(CRSaveOffset) 1003 .addReg(SPReg); 1004 } 1005 1006 // Skip the rest if this is a leaf function & all spills fit in the Red Zone. 1007 if (!FrameSize) 1008 return; 1009 1010 // Adjust stack pointer: r1 += NegFrameSize. 1011 // If there is a preferred stack alignment, align R1 now 1012 1013 if (HasBP && HasRedZone) { 1014 // Save a copy of r1 as the base pointer. 1015 BuildMI(MBB, MBBI, dl, OrInst, BPReg) 1016 .addReg(SPReg) 1017 .addReg(SPReg); 1018 } 1019 1020 // Have we generated a STUX instruction to claim stack frame? If so, 1021 // the negated frame size will be placed in ScratchReg. 1022 bool HasSTUX = false; 1023 1024 // This condition must be kept in sync with canUseAsPrologue. 1025 if (HasBP && MaxAlign > 1) { 1026 if (isPPC64) 1027 BuildMI(MBB, MBBI, dl, TII.get(PPC::RLDICL), ScratchReg) 1028 .addReg(SPReg) 1029 .addImm(0) 1030 .addImm(64 - Log2(MaxAlign)); 1031 else // PPC32... 1032 BuildMI(MBB, MBBI, dl, TII.get(PPC::RLWINM), ScratchReg) 1033 .addReg(SPReg) 1034 .addImm(0) 1035 .addImm(32 - Log2(MaxAlign)) 1036 .addImm(31); 1037 if (!isLargeFrame) { 1038 BuildMI(MBB, MBBI, dl, SubtractImmCarryingInst, ScratchReg) 1039 .addReg(ScratchReg, RegState::Kill) 1040 .addImm(NegFrameSize); 1041 } else { 1042 assert(!SingleScratchReg && "Only a single scratch reg available"); 1043 BuildMI(MBB, MBBI, dl, LoadImmShiftedInst, TempReg) 1044 .addImm(NegFrameSize >> 16); 1045 BuildMI(MBB, MBBI, dl, OrImmInst, TempReg) 1046 .addReg(TempReg, RegState::Kill) 1047 .addImm(NegFrameSize & 0xFFFF); 1048 BuildMI(MBB, MBBI, dl, SubtractCarryingInst, ScratchReg) 1049 .addReg(ScratchReg, RegState::Kill) 1050 .addReg(TempReg, RegState::Kill); 1051 } 1052 1053 BuildMI(MBB, MBBI, dl, StoreUpdtIdxInst, SPReg) 1054 .addReg(SPReg, RegState::Kill) 1055 .addReg(SPReg) 1056 .addReg(ScratchReg); 1057 HasSTUX = true; 1058 1059 } else if (!isLargeFrame) { 1060 BuildMI(MBB, StackUpdateLoc, dl, StoreUpdtInst, SPReg) 1061 .addReg(SPReg) 1062 .addImm(NegFrameSize) 1063 .addReg(SPReg); 1064 1065 } else { 1066 BuildMI(MBB, MBBI, dl, LoadImmShiftedInst, ScratchReg) 1067 .addImm(NegFrameSize >> 16); 1068 BuildMI(MBB, MBBI, dl, OrImmInst, ScratchReg) 1069 .addReg(ScratchReg, RegState::Kill) 1070 .addImm(NegFrameSize & 0xFFFF); 1071 BuildMI(MBB, MBBI, dl, StoreUpdtIdxInst, SPReg) 1072 .addReg(SPReg, RegState::Kill) 1073 .addReg(SPReg) 1074 .addReg(ScratchReg); 1075 HasSTUX = true; 1076 } 1077 1078 // Save the TOC register after the stack pointer update if a prologue TOC 1079 // save is required for the function. 1080 if (MustSaveTOC) { 1081 assert(isELFv2ABI && "TOC saves in the prologue only supported on ELFv2"); 1082 BuildMI(MBB, StackUpdateLoc, dl, TII.get(PPC::STD)) 1083 .addReg(TOCReg, getKillRegState(true)) 1084 .addImm(TOCSaveOffset) 1085 .addReg(SPReg); 1086 } 1087 1088 if (!HasRedZone) { 1089 assert(!isPPC64 && "A red zone is always available on PPC64"); 1090 if (HasSTUX) { 1091 // The negated frame size is in ScratchReg, and the SPReg has been 1092 // decremented by the frame size: SPReg = old SPReg + ScratchReg. 1093 // Since FPOffset, PBPOffset, etc. are relative to the beginning of 1094 // the stack frame (i.e. the old SP), ideally, we would put the old 1095 // SP into a register and use it as the base for the stores. The 1096 // problem is that the only available register may be ScratchReg, 1097 // which could be R0, and R0 cannot be used as a base address. 1098 1099 // First, set ScratchReg to the old SP. This may need to be modified 1100 // later. 1101 BuildMI(MBB, MBBI, dl, TII.get(PPC::SUBF), ScratchReg) 1102 .addReg(ScratchReg, RegState::Kill) 1103 .addReg(SPReg); 1104 1105 if (ScratchReg == PPC::R0) { 1106 // R0 cannot be used as a base register, but it can be used as an 1107 // index in a store-indexed. 1108 int LastOffset = 0; 1109 if (HasFP) { 1110 // R0 += (FPOffset-LastOffset). 1111 // Need addic, since addi treats R0 as 0. 1112 BuildMI(MBB, MBBI, dl, TII.get(PPC::ADDIC), ScratchReg) 1113 .addReg(ScratchReg) 1114 .addImm(FPOffset-LastOffset); 1115 LastOffset = FPOffset; 1116 // Store FP into *R0. 1117 BuildMI(MBB, MBBI, dl, TII.get(PPC::STWX)) 1118 .addReg(FPReg, RegState::Kill) // Save FP. 1119 .addReg(PPC::ZERO) 1120 .addReg(ScratchReg); // This will be the index (R0 is ok here). 1121 } 1122 if (FI->usesPICBase()) { 1123 // R0 += (PBPOffset-LastOffset). 1124 BuildMI(MBB, MBBI, dl, TII.get(PPC::ADDIC), ScratchReg) 1125 .addReg(ScratchReg) 1126 .addImm(PBPOffset-LastOffset); 1127 LastOffset = PBPOffset; 1128 BuildMI(MBB, MBBI, dl, TII.get(PPC::STWX)) 1129 .addReg(PPC::R30, RegState::Kill) // Save PIC base pointer. 1130 .addReg(PPC::ZERO) 1131 .addReg(ScratchReg); // This will be the index (R0 is ok here). 1132 } 1133 if (HasBP) { 1134 // R0 += (BPOffset-LastOffset). 1135 BuildMI(MBB, MBBI, dl, TII.get(PPC::ADDIC), ScratchReg) 1136 .addReg(ScratchReg) 1137 .addImm(BPOffset-LastOffset); 1138 LastOffset = BPOffset; 1139 BuildMI(MBB, MBBI, dl, TII.get(PPC::STWX)) 1140 .addReg(BPReg, RegState::Kill) // Save BP. 1141 .addReg(PPC::ZERO) 1142 .addReg(ScratchReg); // This will be the index (R0 is ok here). 1143 // BP = R0-LastOffset 1144 BuildMI(MBB, MBBI, dl, TII.get(PPC::ADDIC), BPReg) 1145 .addReg(ScratchReg, RegState::Kill) 1146 .addImm(-LastOffset); 1147 } 1148 } else { 1149 // ScratchReg is not R0, so use it as the base register. It is 1150 // already set to the old SP, so we can use the offsets directly. 1151 1152 // Now that the stack frame has been allocated, save all the necessary 1153 // registers using ScratchReg as the base address. 1154 if (HasFP) 1155 BuildMI(MBB, MBBI, dl, StoreInst) 1156 .addReg(FPReg) 1157 .addImm(FPOffset) 1158 .addReg(ScratchReg); 1159 if (FI->usesPICBase()) 1160 BuildMI(MBB, MBBI, dl, StoreInst) 1161 .addReg(PPC::R30) 1162 .addImm(PBPOffset) 1163 .addReg(ScratchReg); 1164 if (HasBP) { 1165 BuildMI(MBB, MBBI, dl, StoreInst) 1166 .addReg(BPReg) 1167 .addImm(BPOffset) 1168 .addReg(ScratchReg); 1169 BuildMI(MBB, MBBI, dl, OrInst, BPReg) 1170 .addReg(ScratchReg, RegState::Kill) 1171 .addReg(ScratchReg); 1172 } 1173 } 1174 } else { 1175 // The frame size is a known 16-bit constant (fitting in the immediate 1176 // field of STWU). To be here we have to be compiling for PPC32. 1177 // Since the SPReg has been decreased by FrameSize, add it back to each 1178 // offset. 1179 if (HasFP) 1180 BuildMI(MBB, MBBI, dl, StoreInst) 1181 .addReg(FPReg) 1182 .addImm(FrameSize + FPOffset) 1183 .addReg(SPReg); 1184 if (FI->usesPICBase()) 1185 BuildMI(MBB, MBBI, dl, StoreInst) 1186 .addReg(PPC::R30) 1187 .addImm(FrameSize + PBPOffset) 1188 .addReg(SPReg); 1189 if (HasBP) { 1190 BuildMI(MBB, MBBI, dl, StoreInst) 1191 .addReg(BPReg) 1192 .addImm(FrameSize + BPOffset) 1193 .addReg(SPReg); 1194 BuildMI(MBB, MBBI, dl, TII.get(PPC::ADDI), BPReg) 1195 .addReg(SPReg) 1196 .addImm(FrameSize); 1197 } 1198 } 1199 } 1200 1201 // Add Call Frame Information for the instructions we generated above. 1202 if (needsCFI) { 1203 unsigned CFIIndex; 1204 1205 if (HasBP) { 1206 // Define CFA in terms of BP. Do this in preference to using FP/SP, 1207 // because if the stack needed aligning then CFA won't be at a fixed 1208 // offset from FP/SP. 1209 unsigned Reg = MRI->getDwarfRegNum(BPReg, true); 1210 CFIIndex = MF.addFrameInst( 1211 MCCFIInstruction::createDefCfaRegister(nullptr, Reg)); 1212 } else { 1213 // Adjust the definition of CFA to account for the change in SP. 1214 assert(NegFrameSize); 1215 CFIIndex = MF.addFrameInst( 1216 MCCFIInstruction::cfiDefCfaOffset(nullptr, -NegFrameSize)); 1217 } 1218 BuildMI(MBB, MBBI, dl, TII.get(TargetOpcode::CFI_INSTRUCTION)) 1219 .addCFIIndex(CFIIndex); 1220 1221 if (HasFP) { 1222 // Describe where FP was saved, at a fixed offset from CFA. 1223 unsigned Reg = MRI->getDwarfRegNum(FPReg, true); 1224 CFIIndex = MF.addFrameInst( 1225 MCCFIInstruction::createOffset(nullptr, Reg, FPOffset)); 1226 BuildMI(MBB, MBBI, dl, TII.get(TargetOpcode::CFI_INSTRUCTION)) 1227 .addCFIIndex(CFIIndex); 1228 } 1229 1230 if (FI->usesPICBase()) { 1231 // Describe where FP was saved, at a fixed offset from CFA. 1232 unsigned Reg = MRI->getDwarfRegNum(PPC::R30, true); 1233 CFIIndex = MF.addFrameInst( 1234 MCCFIInstruction::createOffset(nullptr, Reg, PBPOffset)); 1235 BuildMI(MBB, MBBI, dl, TII.get(TargetOpcode::CFI_INSTRUCTION)) 1236 .addCFIIndex(CFIIndex); 1237 } 1238 1239 if (HasBP) { 1240 // Describe where BP was saved, at a fixed offset from CFA. 1241 unsigned Reg = MRI->getDwarfRegNum(BPReg, true); 1242 CFIIndex = MF.addFrameInst( 1243 MCCFIInstruction::createOffset(nullptr, Reg, BPOffset)); 1244 BuildMI(MBB, MBBI, dl, TII.get(TargetOpcode::CFI_INSTRUCTION)) 1245 .addCFIIndex(CFIIndex); 1246 } 1247 1248 if (MustSaveLR) { 1249 // Describe where LR was saved, at a fixed offset from CFA. 1250 unsigned Reg = MRI->getDwarfRegNum(LRReg, true); 1251 CFIIndex = MF.addFrameInst( 1252 MCCFIInstruction::createOffset(nullptr, Reg, LROffset)); 1253 BuildMI(MBB, MBBI, dl, TII.get(TargetOpcode::CFI_INSTRUCTION)) 1254 .addCFIIndex(CFIIndex); 1255 } 1256 } 1257 1258 // If there is a frame pointer, copy R1 into R31 1259 if (HasFP) { 1260 BuildMI(MBB, MBBI, dl, OrInst, FPReg) 1261 .addReg(SPReg) 1262 .addReg(SPReg); 1263 1264 if (!HasBP && needsCFI) { 1265 // Change the definition of CFA from SP+offset to FP+offset, because SP 1266 // will change at every alloca. 1267 unsigned Reg = MRI->getDwarfRegNum(FPReg, true); 1268 unsigned CFIIndex = MF.addFrameInst( 1269 MCCFIInstruction::createDefCfaRegister(nullptr, Reg)); 1270 1271 BuildMI(MBB, MBBI, dl, TII.get(TargetOpcode::CFI_INSTRUCTION)) 1272 .addCFIIndex(CFIIndex); 1273 } 1274 } 1275 1276 if (needsCFI) { 1277 // Describe where callee saved registers were saved, at fixed offsets from 1278 // CFA. 1279 const std::vector<CalleeSavedInfo> &CSI = MFI.getCalleeSavedInfo(); 1280 for (unsigned I = 0, E = CSI.size(); I != E; ++I) { 1281 unsigned Reg = CSI[I].getReg(); 1282 if (Reg == PPC::LR || Reg == PPC::LR8 || Reg == PPC::RM) continue; 1283 1284 // This is a bit of a hack: CR2LT, CR2GT, CR2EQ and CR2UN are just 1285 // subregisters of CR2. We just need to emit a move of CR2. 1286 if (PPC::CRBITRCRegClass.contains(Reg)) 1287 continue; 1288 1289 if ((Reg == PPC::X2 || Reg == PPC::R2) && MustSaveTOC) 1290 continue; 1291 1292 // For SVR4, don't emit a move for the CR spill slot if we haven't 1293 // spilled CRs. 1294 if (isSVR4ABI && (PPC::CR2 <= Reg && Reg <= PPC::CR4) 1295 && !MustSaveCR) 1296 continue; 1297 1298 // For 64-bit SVR4 when we have spilled CRs, the spill location 1299 // is SP+8, not a frame-relative slot. 1300 if (isSVR4ABI && isPPC64 && (PPC::CR2 <= Reg && Reg <= PPC::CR4)) { 1301 // In the ELFv1 ABI, only CR2 is noted in CFI and stands in for 1302 // the whole CR word. In the ELFv2 ABI, every CR that was 1303 // actually saved gets its own CFI record. 1304 unsigned CRReg = isELFv2ABI? Reg : (unsigned) PPC::CR2; 1305 unsigned CFIIndex = MF.addFrameInst(MCCFIInstruction::createOffset( 1306 nullptr, MRI->getDwarfRegNum(CRReg, true), CRSaveOffset)); 1307 BuildMI(MBB, MBBI, dl, TII.get(TargetOpcode::CFI_INSTRUCTION)) 1308 .addCFIIndex(CFIIndex); 1309 continue; 1310 } 1311 1312 if (CSI[I].isSpilledToReg()) { 1313 unsigned SpilledReg = CSI[I].getDstReg(); 1314 unsigned CFIRegister = MF.addFrameInst(MCCFIInstruction::createRegister( 1315 nullptr, MRI->getDwarfRegNum(Reg, true), 1316 MRI->getDwarfRegNum(SpilledReg, true))); 1317 BuildMI(MBB, MBBI, dl, TII.get(TargetOpcode::CFI_INSTRUCTION)) 1318 .addCFIIndex(CFIRegister); 1319 } else { 1320 int Offset = MFI.getObjectOffset(CSI[I].getFrameIdx()); 1321 // We have changed the object offset above but we do not want to change 1322 // the actual offsets in the CFI instruction so we have to undo the 1323 // offset change here. 1324 if (MovingStackUpdateDown) 1325 Offset -= NegFrameSize; 1326 1327 unsigned CFIIndex = MF.addFrameInst(MCCFIInstruction::createOffset( 1328 nullptr, MRI->getDwarfRegNum(Reg, true), Offset)); 1329 BuildMI(MBB, MBBI, dl, TII.get(TargetOpcode::CFI_INSTRUCTION)) 1330 .addCFIIndex(CFIIndex); 1331 } 1332 } 1333 } 1334 } 1335 1336 void PPCFrameLowering::emitEpilogue(MachineFunction &MF, 1337 MachineBasicBlock &MBB) const { 1338 MachineBasicBlock::iterator MBBI = MBB.getFirstTerminator(); 1339 DebugLoc dl; 1340 1341 if (MBBI != MBB.end()) 1342 dl = MBBI->getDebugLoc(); 1343 1344 const PPCInstrInfo &TII = *Subtarget.getInstrInfo(); 1345 const PPCRegisterInfo *RegInfo = Subtarget.getRegisterInfo(); 1346 1347 // Get alignment info so we know how to restore the SP. 1348 const MachineFrameInfo &MFI = MF.getFrameInfo(); 1349 1350 // Get the number of bytes allocated from the FrameInfo. 1351 int FrameSize = MFI.getStackSize(); 1352 1353 // Get processor type. 1354 bool isPPC64 = Subtarget.isPPC64(); 1355 // Get the ABI. 1356 bool isSVR4ABI = Subtarget.isSVR4ABI(); 1357 1358 // Check if the link register (LR) has been saved. 1359 PPCFunctionInfo *FI = MF.getInfo<PPCFunctionInfo>(); 1360 bool MustSaveLR = FI->mustSaveLR(); 1361 const SmallVectorImpl<Register> &MustSaveCRs = FI->getMustSaveCRs(); 1362 bool MustSaveCR = !MustSaveCRs.empty(); 1363 // Do we have a frame pointer and/or base pointer for this function? 1364 bool HasFP = hasFP(MF); 1365 bool HasBP = RegInfo->hasBasePointer(MF); 1366 bool HasRedZone = Subtarget.isPPC64() || !Subtarget.isSVR4ABI(); 1367 1368 Register SPReg = isPPC64 ? PPC::X1 : PPC::R1; 1369 Register BPReg = RegInfo->getBaseRegister(MF); 1370 Register FPReg = isPPC64 ? PPC::X31 : PPC::R31; 1371 Register ScratchReg; 1372 Register TempReg = isPPC64 ? PPC::X12 : PPC::R12; // another scratch reg 1373 const MCInstrDesc& MTLRInst = TII.get( isPPC64 ? PPC::MTLR8 1374 : PPC::MTLR ); 1375 const MCInstrDesc& LoadInst = TII.get( isPPC64 ? PPC::LD 1376 : PPC::LWZ ); 1377 const MCInstrDesc& LoadImmShiftedInst = TII.get( isPPC64 ? PPC::LIS8 1378 : PPC::LIS ); 1379 const MCInstrDesc& OrInst = TII.get(isPPC64 ? PPC::OR8 1380 : PPC::OR ); 1381 const MCInstrDesc& OrImmInst = TII.get( isPPC64 ? PPC::ORI8 1382 : PPC::ORI ); 1383 const MCInstrDesc& AddImmInst = TII.get( isPPC64 ? PPC::ADDI8 1384 : PPC::ADDI ); 1385 const MCInstrDesc& AddInst = TII.get( isPPC64 ? PPC::ADD8 1386 : PPC::ADD4 ); 1387 const MCInstrDesc& LoadWordInst = TII.get( isPPC64 ? PPC::LWZ8 1388 : PPC::LWZ); 1389 const MCInstrDesc& MoveToCRInst = TII.get( isPPC64 ? PPC::MTOCRF8 1390 : PPC::MTOCRF); 1391 int LROffset = getReturnSaveOffset(); 1392 1393 int FPOffset = 0; 1394 1395 // Using the same bool variable as below to suppress compiler warnings. 1396 bool SingleScratchReg = findScratchRegister(&MBB, true, false, &ScratchReg, 1397 &TempReg); 1398 assert(SingleScratchReg && 1399 "Could not find an available scratch register"); 1400 1401 SingleScratchReg = ScratchReg == TempReg; 1402 1403 if (HasFP) { 1404 if (isSVR4ABI) { 1405 int FPIndex = FI->getFramePointerSaveIndex(); 1406 assert(FPIndex && "No Frame Pointer Save Slot!"); 1407 FPOffset = MFI.getObjectOffset(FPIndex); 1408 } else { 1409 FPOffset = getFramePointerSaveOffset(); 1410 } 1411 } 1412 1413 int BPOffset = 0; 1414 if (HasBP) { 1415 if (isSVR4ABI) { 1416 int BPIndex = FI->getBasePointerSaveIndex(); 1417 assert(BPIndex && "No Base Pointer Save Slot!"); 1418 BPOffset = MFI.getObjectOffset(BPIndex); 1419 } else { 1420 BPOffset = getBasePointerSaveOffset(); 1421 } 1422 } 1423 1424 int PBPOffset = 0; 1425 if (FI->usesPICBase()) { 1426 int PBPIndex = FI->getPICBasePointerSaveIndex(); 1427 assert(PBPIndex && "No PIC Base Pointer Save Slot!"); 1428 PBPOffset = MFI.getObjectOffset(PBPIndex); 1429 } 1430 1431 bool IsReturnBlock = (MBBI != MBB.end() && MBBI->isReturn()); 1432 1433 if (IsReturnBlock) { 1434 unsigned RetOpcode = MBBI->getOpcode(); 1435 bool UsesTCRet = RetOpcode == PPC::TCRETURNri || 1436 RetOpcode == PPC::TCRETURNdi || 1437 RetOpcode == PPC::TCRETURNai || 1438 RetOpcode == PPC::TCRETURNri8 || 1439 RetOpcode == PPC::TCRETURNdi8 || 1440 RetOpcode == PPC::TCRETURNai8; 1441 1442 if (UsesTCRet) { 1443 int MaxTCRetDelta = FI->getTailCallSPDelta(); 1444 MachineOperand &StackAdjust = MBBI->getOperand(1); 1445 assert(StackAdjust.isImm() && "Expecting immediate value."); 1446 // Adjust stack pointer. 1447 int StackAdj = StackAdjust.getImm(); 1448 int Delta = StackAdj - MaxTCRetDelta; 1449 assert((Delta >= 0) && "Delta must be positive"); 1450 if (MaxTCRetDelta>0) 1451 FrameSize += (StackAdj +Delta); 1452 else 1453 FrameSize += StackAdj; 1454 } 1455 } 1456 1457 // Frames of 32KB & larger require special handling because they cannot be 1458 // indexed into with a simple LD/LWZ immediate offset operand. 1459 bool isLargeFrame = !isInt<16>(FrameSize); 1460 1461 // On targets without red zone, the SP needs to be restored last, so that 1462 // all live contents of the stack frame are upwards of the SP. This means 1463 // that we cannot restore SP just now, since there may be more registers 1464 // to restore from the stack frame (e.g. R31). If the frame size is not 1465 // a simple immediate value, we will need a spare register to hold the 1466 // restored SP. If the frame size is known and small, we can simply adjust 1467 // the offsets of the registers to be restored, and still use SP to restore 1468 // them. In such case, the final update of SP will be to add the frame 1469 // size to it. 1470 // To simplify the code, set RBReg to the base register used to restore 1471 // values from the stack, and set SPAdd to the value that needs to be added 1472 // to the SP at the end. The default values are as if red zone was present. 1473 unsigned RBReg = SPReg; 1474 unsigned SPAdd = 0; 1475 1476 // Check if we can move the stack update instruction up the epilogue 1477 // past the callee saves. This will allow the move to LR instruction 1478 // to be executed before the restores of the callee saves which means 1479 // that the callee saves can hide the latency from the MTLR instrcution. 1480 MachineBasicBlock::iterator StackUpdateLoc = MBBI; 1481 if (stackUpdateCanBeMoved(MF)) { 1482 const std::vector<CalleeSavedInfo> & Info = MFI.getCalleeSavedInfo(); 1483 for (CalleeSavedInfo CSI : Info) { 1484 int FrIdx = CSI.getFrameIdx(); 1485 // If the frame index is not negative the callee saved info belongs to a 1486 // stack object that is not a fixed stack object. We ignore non-fixed 1487 // stack objects because we won't move the update of the stack pointer 1488 // past them. 1489 if (FrIdx >= 0) 1490 continue; 1491 1492 if (MFI.isFixedObjectIndex(FrIdx) && MFI.getObjectOffset(FrIdx) < 0) 1493 StackUpdateLoc--; 1494 else { 1495 // Abort the operation as we can't update all CSR restores. 1496 StackUpdateLoc = MBBI; 1497 break; 1498 } 1499 } 1500 } 1501 1502 if (FrameSize) { 1503 // In the prologue, the loaded (or persistent) stack pointer value is 1504 // offset by the STDU/STDUX/STWU/STWUX instruction. For targets with red 1505 // zone add this offset back now. 1506 1507 // If this function contained a fastcc call and GuaranteedTailCallOpt is 1508 // enabled (=> hasFastCall()==true) the fastcc call might contain a tail 1509 // call which invalidates the stack pointer value in SP(0). So we use the 1510 // value of R31 in this case. 1511 if (FI->hasFastCall()) { 1512 assert(HasFP && "Expecting a valid frame pointer."); 1513 if (!HasRedZone) 1514 RBReg = FPReg; 1515 if (!isLargeFrame) { 1516 BuildMI(MBB, MBBI, dl, AddImmInst, RBReg) 1517 .addReg(FPReg).addImm(FrameSize); 1518 } else { 1519 BuildMI(MBB, MBBI, dl, LoadImmShiftedInst, ScratchReg) 1520 .addImm(FrameSize >> 16); 1521 BuildMI(MBB, MBBI, dl, OrImmInst, ScratchReg) 1522 .addReg(ScratchReg, RegState::Kill) 1523 .addImm(FrameSize & 0xFFFF); 1524 BuildMI(MBB, MBBI, dl, AddInst) 1525 .addReg(RBReg) 1526 .addReg(FPReg) 1527 .addReg(ScratchReg); 1528 } 1529 } else if (!isLargeFrame && !HasBP && !MFI.hasVarSizedObjects()) { 1530 if (HasRedZone) { 1531 BuildMI(MBB, StackUpdateLoc, dl, AddImmInst, SPReg) 1532 .addReg(SPReg) 1533 .addImm(FrameSize); 1534 } else { 1535 // Make sure that adding FrameSize will not overflow the max offset 1536 // size. 1537 assert(FPOffset <= 0 && BPOffset <= 0 && PBPOffset <= 0 && 1538 "Local offsets should be negative"); 1539 SPAdd = FrameSize; 1540 FPOffset += FrameSize; 1541 BPOffset += FrameSize; 1542 PBPOffset += FrameSize; 1543 } 1544 } else { 1545 // We don't want to use ScratchReg as a base register, because it 1546 // could happen to be R0. Use FP instead, but make sure to preserve it. 1547 if (!HasRedZone) { 1548 // If FP is not saved, copy it to ScratchReg. 1549 if (!HasFP) 1550 BuildMI(MBB, MBBI, dl, OrInst, ScratchReg) 1551 .addReg(FPReg) 1552 .addReg(FPReg); 1553 RBReg = FPReg; 1554 } 1555 BuildMI(MBB, StackUpdateLoc, dl, LoadInst, RBReg) 1556 .addImm(0) 1557 .addReg(SPReg); 1558 } 1559 } 1560 assert(RBReg != ScratchReg && "Should have avoided ScratchReg"); 1561 // If there is no red zone, ScratchReg may be needed for holding a useful 1562 // value (although not the base register). Make sure it is not overwritten 1563 // too early. 1564 1565 // If we need to restore both the LR and the CR and we only have one 1566 // available scratch register, we must do them one at a time. 1567 if (MustSaveCR && SingleScratchReg && MustSaveLR) { 1568 // Here TempReg == ScratchReg, and in the absence of red zone ScratchReg 1569 // is live here. 1570 assert(HasRedZone && "Expecting red zone"); 1571 BuildMI(MBB, MBBI, dl, LoadWordInst, TempReg) 1572 .addImm(CRSaveOffset) 1573 .addReg(SPReg); 1574 for (unsigned i = 0, e = MustSaveCRs.size(); i != e; ++i) 1575 BuildMI(MBB, MBBI, dl, MoveToCRInst, MustSaveCRs[i]) 1576 .addReg(TempReg, getKillRegState(i == e-1)); 1577 } 1578 1579 // Delay restoring of the LR if ScratchReg is needed. This is ok, since 1580 // LR is stored in the caller's stack frame. ScratchReg will be needed 1581 // if RBReg is anything other than SP. We shouldn't use ScratchReg as 1582 // a base register anyway, because it may happen to be R0. 1583 bool LoadedLR = false; 1584 if (MustSaveLR && RBReg == SPReg && isInt<16>(LROffset+SPAdd)) { 1585 BuildMI(MBB, StackUpdateLoc, dl, LoadInst, ScratchReg) 1586 .addImm(LROffset+SPAdd) 1587 .addReg(RBReg); 1588 LoadedLR = true; 1589 } 1590 1591 if (MustSaveCR && !(SingleScratchReg && MustSaveLR)) { 1592 assert(RBReg == SPReg && "Should be using SP as a base register"); 1593 BuildMI(MBB, MBBI, dl, LoadWordInst, TempReg) 1594 .addImm(CRSaveOffset) 1595 .addReg(RBReg); 1596 } 1597 1598 if (HasFP) { 1599 // If there is red zone, restore FP directly, since SP has already been 1600 // restored. Otherwise, restore the value of FP into ScratchReg. 1601 if (HasRedZone || RBReg == SPReg) 1602 BuildMI(MBB, MBBI, dl, LoadInst, FPReg) 1603 .addImm(FPOffset) 1604 .addReg(SPReg); 1605 else 1606 BuildMI(MBB, MBBI, dl, LoadInst, ScratchReg) 1607 .addImm(FPOffset) 1608 .addReg(RBReg); 1609 } 1610 1611 if (FI->usesPICBase()) 1612 BuildMI(MBB, MBBI, dl, LoadInst, PPC::R30) 1613 .addImm(PBPOffset) 1614 .addReg(RBReg); 1615 1616 if (HasBP) 1617 BuildMI(MBB, MBBI, dl, LoadInst, BPReg) 1618 .addImm(BPOffset) 1619 .addReg(RBReg); 1620 1621 // There is nothing more to be loaded from the stack, so now we can 1622 // restore SP: SP = RBReg + SPAdd. 1623 if (RBReg != SPReg || SPAdd != 0) { 1624 assert(!HasRedZone && "This should not happen with red zone"); 1625 // If SPAdd is 0, generate a copy. 1626 if (SPAdd == 0) 1627 BuildMI(MBB, MBBI, dl, OrInst, SPReg) 1628 .addReg(RBReg) 1629 .addReg(RBReg); 1630 else 1631 BuildMI(MBB, MBBI, dl, AddImmInst, SPReg) 1632 .addReg(RBReg) 1633 .addImm(SPAdd); 1634 1635 assert(RBReg != ScratchReg && "Should be using FP or SP as base register"); 1636 if (RBReg == FPReg) 1637 BuildMI(MBB, MBBI, dl, OrInst, FPReg) 1638 .addReg(ScratchReg) 1639 .addReg(ScratchReg); 1640 1641 // Now load the LR from the caller's stack frame. 1642 if (MustSaveLR && !LoadedLR) 1643 BuildMI(MBB, MBBI, dl, LoadInst, ScratchReg) 1644 .addImm(LROffset) 1645 .addReg(SPReg); 1646 } 1647 1648 if (MustSaveCR && 1649 !(SingleScratchReg && MustSaveLR)) 1650 for (unsigned i = 0, e = MustSaveCRs.size(); i != e; ++i) 1651 BuildMI(MBB, MBBI, dl, MoveToCRInst, MustSaveCRs[i]) 1652 .addReg(TempReg, getKillRegState(i == e-1)); 1653 1654 if (MustSaveLR) 1655 BuildMI(MBB, StackUpdateLoc, dl, MTLRInst).addReg(ScratchReg); 1656 1657 // Callee pop calling convention. Pop parameter/linkage area. Used for tail 1658 // call optimization 1659 if (IsReturnBlock) { 1660 unsigned RetOpcode = MBBI->getOpcode(); 1661 if (MF.getTarget().Options.GuaranteedTailCallOpt && 1662 (RetOpcode == PPC::BLR || RetOpcode == PPC::BLR8) && 1663 MF.getFunction().getCallingConv() == CallingConv::Fast) { 1664 PPCFunctionInfo *FI = MF.getInfo<PPCFunctionInfo>(); 1665 unsigned CallerAllocatedAmt = FI->getMinReservedArea(); 1666 1667 if (CallerAllocatedAmt && isInt<16>(CallerAllocatedAmt)) { 1668 BuildMI(MBB, MBBI, dl, AddImmInst, SPReg) 1669 .addReg(SPReg).addImm(CallerAllocatedAmt); 1670 } else { 1671 BuildMI(MBB, MBBI, dl, LoadImmShiftedInst, ScratchReg) 1672 .addImm(CallerAllocatedAmt >> 16); 1673 BuildMI(MBB, MBBI, dl, OrImmInst, ScratchReg) 1674 .addReg(ScratchReg, RegState::Kill) 1675 .addImm(CallerAllocatedAmt & 0xFFFF); 1676 BuildMI(MBB, MBBI, dl, AddInst) 1677 .addReg(SPReg) 1678 .addReg(FPReg) 1679 .addReg(ScratchReg); 1680 } 1681 } else { 1682 createTailCallBranchInstr(MBB); 1683 } 1684 } 1685 } 1686 1687 void PPCFrameLowering::createTailCallBranchInstr(MachineBasicBlock &MBB) const { 1688 MachineBasicBlock::iterator MBBI = MBB.getFirstTerminator(); 1689 1690 // If we got this far a first terminator should exist. 1691 assert(MBBI != MBB.end() && "Failed to find the first terminator."); 1692 1693 DebugLoc dl = MBBI->getDebugLoc(); 1694 const PPCInstrInfo &TII = *Subtarget.getInstrInfo(); 1695 1696 // Create branch instruction for pseudo tail call return instruction. 1697 // The TCRETURNdi variants are direct calls. Valid targets for those are 1698 // MO_GlobalAddress operands as well as MO_ExternalSymbol with PC-Rel 1699 // since we can tail call external functions with PC-Rel (i.e. we don't need 1700 // to worry about different TOC pointers). Some of the external functions will 1701 // be MO_GlobalAddress while others like memcpy for example, are going to 1702 // be MO_ExternalSymbol. 1703 unsigned RetOpcode = MBBI->getOpcode(); 1704 if (RetOpcode == PPC::TCRETURNdi) { 1705 MBBI = MBB.getLastNonDebugInstr(); 1706 MachineOperand &JumpTarget = MBBI->getOperand(0); 1707 if (JumpTarget.isGlobal()) 1708 BuildMI(MBB, MBBI, dl, TII.get(PPC::TAILB)). 1709 addGlobalAddress(JumpTarget.getGlobal(), JumpTarget.getOffset()); 1710 else if (JumpTarget.isSymbol()) 1711 BuildMI(MBB, MBBI, dl, TII.get(PPC::TAILB)). 1712 addExternalSymbol(JumpTarget.getSymbolName()); 1713 else 1714 llvm_unreachable("Expecting Global or External Symbol"); 1715 } else if (RetOpcode == PPC::TCRETURNri) { 1716 MBBI = MBB.getLastNonDebugInstr(); 1717 assert(MBBI->getOperand(0).isReg() && "Expecting register operand."); 1718 BuildMI(MBB, MBBI, dl, TII.get(PPC::TAILBCTR)); 1719 } else if (RetOpcode == PPC::TCRETURNai) { 1720 MBBI = MBB.getLastNonDebugInstr(); 1721 MachineOperand &JumpTarget = MBBI->getOperand(0); 1722 BuildMI(MBB, MBBI, dl, TII.get(PPC::TAILBA)).addImm(JumpTarget.getImm()); 1723 } else if (RetOpcode == PPC::TCRETURNdi8) { 1724 MBBI = MBB.getLastNonDebugInstr(); 1725 MachineOperand &JumpTarget = MBBI->getOperand(0); 1726 if (JumpTarget.isGlobal()) 1727 BuildMI(MBB, MBBI, dl, TII.get(PPC::TAILB8)). 1728 addGlobalAddress(JumpTarget.getGlobal(), JumpTarget.getOffset()); 1729 else if (JumpTarget.isSymbol()) 1730 BuildMI(MBB, MBBI, dl, TII.get(PPC::TAILB8)). 1731 addExternalSymbol(JumpTarget.getSymbolName()); 1732 else 1733 llvm_unreachable("Expecting Global or External Symbol"); 1734 } else if (RetOpcode == PPC::TCRETURNri8) { 1735 MBBI = MBB.getLastNonDebugInstr(); 1736 assert(MBBI->getOperand(0).isReg() && "Expecting register operand."); 1737 BuildMI(MBB, MBBI, dl, TII.get(PPC::TAILBCTR8)); 1738 } else if (RetOpcode == PPC::TCRETURNai8) { 1739 MBBI = MBB.getLastNonDebugInstr(); 1740 MachineOperand &JumpTarget = MBBI->getOperand(0); 1741 BuildMI(MBB, MBBI, dl, TII.get(PPC::TAILBA8)).addImm(JumpTarget.getImm()); 1742 } 1743 } 1744 1745 void PPCFrameLowering::determineCalleeSaves(MachineFunction &MF, 1746 BitVector &SavedRegs, 1747 RegScavenger *RS) const { 1748 TargetFrameLowering::determineCalleeSaves(MF, SavedRegs, RS); 1749 1750 const PPCRegisterInfo *RegInfo = Subtarget.getRegisterInfo(); 1751 1752 // Save and clear the LR state. 1753 PPCFunctionInfo *FI = MF.getInfo<PPCFunctionInfo>(); 1754 unsigned LR = RegInfo->getRARegister(); 1755 FI->setMustSaveLR(MustSaveLR(MF, LR)); 1756 SavedRegs.reset(LR); 1757 1758 // Save R31 if necessary 1759 int FPSI = FI->getFramePointerSaveIndex(); 1760 const bool isPPC64 = Subtarget.isPPC64(); 1761 MachineFrameInfo &MFI = MF.getFrameInfo(); 1762 1763 // If the frame pointer save index hasn't been defined yet. 1764 if (!FPSI && needsFP(MF)) { 1765 // Find out what the fix offset of the frame pointer save area. 1766 int FPOffset = getFramePointerSaveOffset(); 1767 // Allocate the frame index for frame pointer save area. 1768 FPSI = MFI.CreateFixedObject(isPPC64? 8 : 4, FPOffset, true); 1769 // Save the result. 1770 FI->setFramePointerSaveIndex(FPSI); 1771 } 1772 1773 int BPSI = FI->getBasePointerSaveIndex(); 1774 if (!BPSI && RegInfo->hasBasePointer(MF)) { 1775 int BPOffset = getBasePointerSaveOffset(); 1776 // Allocate the frame index for the base pointer save area. 1777 BPSI = MFI.CreateFixedObject(isPPC64? 8 : 4, BPOffset, true); 1778 // Save the result. 1779 FI->setBasePointerSaveIndex(BPSI); 1780 } 1781 1782 // Reserve stack space for the PIC Base register (R30). 1783 // Only used in SVR4 32-bit. 1784 if (FI->usesPICBase()) { 1785 int PBPSI = MFI.CreateFixedObject(4, -8, true); 1786 FI->setPICBasePointerSaveIndex(PBPSI); 1787 } 1788 1789 // Make sure we don't explicitly spill r31, because, for example, we have 1790 // some inline asm which explicitly clobbers it, when we otherwise have a 1791 // frame pointer and are using r31's spill slot for the prologue/epilogue 1792 // code. Same goes for the base pointer and the PIC base register. 1793 if (needsFP(MF)) 1794 SavedRegs.reset(isPPC64 ? PPC::X31 : PPC::R31); 1795 if (RegInfo->hasBasePointer(MF)) 1796 SavedRegs.reset(RegInfo->getBaseRegister(MF)); 1797 if (FI->usesPICBase()) 1798 SavedRegs.reset(PPC::R30); 1799 1800 // Reserve stack space to move the linkage area to in case of a tail call. 1801 int TCSPDelta = 0; 1802 if (MF.getTarget().Options.GuaranteedTailCallOpt && 1803 (TCSPDelta = FI->getTailCallSPDelta()) < 0) { 1804 MFI.CreateFixedObject(-1 * TCSPDelta, TCSPDelta, true); 1805 } 1806 1807 // Allocate the nonvolatile CR spill slot iff the function uses CR 2, 3, or 4. 1808 // For 64-bit SVR4, and all flavors of AIX we create a FixedStack 1809 // object at the offset of the CR-save slot in the linkage area. The actual 1810 // save and restore of the condition register will be created as part of the 1811 // prologue and epilogue insertion, but the FixedStack object is needed to 1812 // keep the CalleSavedInfo valid. 1813 if ((SavedRegs.test(PPC::CR2) || SavedRegs.test(PPC::CR3) || 1814 SavedRegs.test(PPC::CR4))) { 1815 const uint64_t SpillSize = 4; // Condition register is always 4 bytes. 1816 const int64_t SpillOffset = 1817 Subtarget.isPPC64() ? 8 : Subtarget.isAIXABI() ? 4 : -4; 1818 int FrameIdx = 1819 MFI.CreateFixedObject(SpillSize, SpillOffset, 1820 /* IsImmutable */ true, /* IsAliased */ false); 1821 FI->setCRSpillFrameIndex(FrameIdx); 1822 } 1823 } 1824 1825 void PPCFrameLowering::processFunctionBeforeFrameFinalized(MachineFunction &MF, 1826 RegScavenger *RS) const { 1827 // Get callee saved register information. 1828 MachineFrameInfo &MFI = MF.getFrameInfo(); 1829 const std::vector<CalleeSavedInfo> &CSI = MFI.getCalleeSavedInfo(); 1830 1831 // If the function is shrink-wrapped, and if the function has a tail call, the 1832 // tail call might not be in the new RestoreBlock, so real branch instruction 1833 // won't be generated by emitEpilogue(), because shrink-wrap has chosen new 1834 // RestoreBlock. So we handle this case here. 1835 if (MFI.getSavePoint() && MFI.hasTailCall()) { 1836 MachineBasicBlock *RestoreBlock = MFI.getRestorePoint(); 1837 for (MachineBasicBlock &MBB : MF) { 1838 if (MBB.isReturnBlock() && (&MBB) != RestoreBlock) 1839 createTailCallBranchInstr(MBB); 1840 } 1841 } 1842 1843 // Early exit if no callee saved registers are modified! 1844 if (CSI.empty() && !needsFP(MF)) { 1845 addScavengingSpillSlot(MF, RS); 1846 return; 1847 } 1848 1849 unsigned MinGPR = PPC::R31; 1850 unsigned MinG8R = PPC::X31; 1851 unsigned MinFPR = PPC::F31; 1852 unsigned MinVR = Subtarget.hasSPE() ? PPC::S31 : PPC::V31; 1853 1854 bool HasGPSaveArea = false; 1855 bool HasG8SaveArea = false; 1856 bool HasFPSaveArea = false; 1857 bool HasVRSAVESaveArea = false; 1858 bool HasVRSaveArea = false; 1859 1860 SmallVector<CalleeSavedInfo, 18> GPRegs; 1861 SmallVector<CalleeSavedInfo, 18> G8Regs; 1862 SmallVector<CalleeSavedInfo, 18> FPRegs; 1863 SmallVector<CalleeSavedInfo, 18> VRegs; 1864 1865 for (unsigned i = 0, e = CSI.size(); i != e; ++i) { 1866 unsigned Reg = CSI[i].getReg(); 1867 assert((!MF.getInfo<PPCFunctionInfo>()->mustSaveTOC() || 1868 (Reg != PPC::X2 && Reg != PPC::R2)) && 1869 "Not expecting to try to spill R2 in a function that must save TOC"); 1870 if (PPC::GPRCRegClass.contains(Reg)) { 1871 HasGPSaveArea = true; 1872 1873 GPRegs.push_back(CSI[i]); 1874 1875 if (Reg < MinGPR) { 1876 MinGPR = Reg; 1877 } 1878 } else if (PPC::G8RCRegClass.contains(Reg)) { 1879 HasG8SaveArea = true; 1880 1881 G8Regs.push_back(CSI[i]); 1882 1883 if (Reg < MinG8R) { 1884 MinG8R = Reg; 1885 } 1886 } else if (PPC::F8RCRegClass.contains(Reg)) { 1887 HasFPSaveArea = true; 1888 1889 FPRegs.push_back(CSI[i]); 1890 1891 if (Reg < MinFPR) { 1892 MinFPR = Reg; 1893 } 1894 } else if (PPC::CRBITRCRegClass.contains(Reg) || 1895 PPC::CRRCRegClass.contains(Reg)) { 1896 ; // do nothing, as we already know whether CRs are spilled 1897 } else if (PPC::VRSAVERCRegClass.contains(Reg)) { 1898 HasVRSAVESaveArea = true; 1899 } else if (PPC::VRRCRegClass.contains(Reg) || 1900 PPC::SPERCRegClass.contains(Reg)) { 1901 // Altivec and SPE are mutually exclusive, but have the same stack 1902 // alignment requirements, so overload the save area for both cases. 1903 HasVRSaveArea = true; 1904 1905 VRegs.push_back(CSI[i]); 1906 1907 if (Reg < MinVR) { 1908 MinVR = Reg; 1909 } 1910 } else { 1911 llvm_unreachable("Unknown RegisterClass!"); 1912 } 1913 } 1914 1915 PPCFunctionInfo *PFI = MF.getInfo<PPCFunctionInfo>(); 1916 const TargetRegisterInfo *TRI = Subtarget.getRegisterInfo(); 1917 1918 int64_t LowerBound = 0; 1919 1920 // Take into account stack space reserved for tail calls. 1921 int TCSPDelta = 0; 1922 if (MF.getTarget().Options.GuaranteedTailCallOpt && 1923 (TCSPDelta = PFI->getTailCallSPDelta()) < 0) { 1924 LowerBound = TCSPDelta; 1925 } 1926 1927 // The Floating-point register save area is right below the back chain word 1928 // of the previous stack frame. 1929 if (HasFPSaveArea) { 1930 for (unsigned i = 0, e = FPRegs.size(); i != e; ++i) { 1931 int FI = FPRegs[i].getFrameIdx(); 1932 1933 MFI.setObjectOffset(FI, LowerBound + MFI.getObjectOffset(FI)); 1934 } 1935 1936 LowerBound -= (31 - TRI->getEncodingValue(MinFPR) + 1) * 8; 1937 } 1938 1939 // Check whether the frame pointer register is allocated. If so, make sure it 1940 // is spilled to the correct offset. 1941 if (needsFP(MF)) { 1942 int FI = PFI->getFramePointerSaveIndex(); 1943 assert(FI && "No Frame Pointer Save Slot!"); 1944 MFI.setObjectOffset(FI, LowerBound + MFI.getObjectOffset(FI)); 1945 // FP is R31/X31, so no need to update MinGPR/MinG8R. 1946 HasGPSaveArea = true; 1947 } 1948 1949 if (PFI->usesPICBase()) { 1950 int FI = PFI->getPICBasePointerSaveIndex(); 1951 assert(FI && "No PIC Base Pointer Save Slot!"); 1952 MFI.setObjectOffset(FI, LowerBound + MFI.getObjectOffset(FI)); 1953 1954 MinGPR = std::min<unsigned>(MinGPR, PPC::R30); 1955 HasGPSaveArea = true; 1956 } 1957 1958 const PPCRegisterInfo *RegInfo = Subtarget.getRegisterInfo(); 1959 if (RegInfo->hasBasePointer(MF)) { 1960 int FI = PFI->getBasePointerSaveIndex(); 1961 assert(FI && "No Base Pointer Save Slot!"); 1962 MFI.setObjectOffset(FI, LowerBound + MFI.getObjectOffset(FI)); 1963 1964 Register BP = RegInfo->getBaseRegister(MF); 1965 if (PPC::G8RCRegClass.contains(BP)) { 1966 MinG8R = std::min<unsigned>(MinG8R, BP); 1967 HasG8SaveArea = true; 1968 } else if (PPC::GPRCRegClass.contains(BP)) { 1969 MinGPR = std::min<unsigned>(MinGPR, BP); 1970 HasGPSaveArea = true; 1971 } 1972 } 1973 1974 // General register save area starts right below the Floating-point 1975 // register save area. 1976 if (HasGPSaveArea || HasG8SaveArea) { 1977 // Move general register save area spill slots down, taking into account 1978 // the size of the Floating-point register save area. 1979 for (unsigned i = 0, e = GPRegs.size(); i != e; ++i) { 1980 if (!GPRegs[i].isSpilledToReg()) { 1981 int FI = GPRegs[i].getFrameIdx(); 1982 MFI.setObjectOffset(FI, LowerBound + MFI.getObjectOffset(FI)); 1983 } 1984 } 1985 1986 // Move general register save area spill slots down, taking into account 1987 // the size of the Floating-point register save area. 1988 for (unsigned i = 0, e = G8Regs.size(); i != e; ++i) { 1989 if (!G8Regs[i].isSpilledToReg()) { 1990 int FI = G8Regs[i].getFrameIdx(); 1991 MFI.setObjectOffset(FI, LowerBound + MFI.getObjectOffset(FI)); 1992 } 1993 } 1994 1995 unsigned MinReg = 1996 std::min<unsigned>(TRI->getEncodingValue(MinGPR), 1997 TRI->getEncodingValue(MinG8R)); 1998 1999 const unsigned GPRegSize = Subtarget.isPPC64() ? 8 : 4; 2000 LowerBound -= (31 - MinReg + 1) * GPRegSize; 2001 } 2002 2003 // For 32-bit only, the CR save area is below the general register 2004 // save area. For 64-bit SVR4, the CR save area is addressed relative 2005 // to the stack pointer and hence does not need an adjustment here. 2006 // Only CR2 (the first nonvolatile spilled) has an associated frame 2007 // index so that we have a single uniform save area. 2008 if (spillsCR(MF) && Subtarget.is32BitELFABI()) { 2009 // Adjust the frame index of the CR spill slot. 2010 for (const auto &CSInfo : CSI) { 2011 if (CSInfo.getReg() == PPC::CR2) { 2012 int FI = CSInfo.getFrameIdx(); 2013 MFI.setObjectOffset(FI, LowerBound + MFI.getObjectOffset(FI)); 2014 break; 2015 } 2016 } 2017 2018 LowerBound -= 4; // The CR save area is always 4 bytes long. 2019 } 2020 2021 if (HasVRSAVESaveArea) { 2022 // FIXME SVR4: Is it actually possible to have multiple elements in CSI 2023 // which have the VRSAVE register class? 2024 // Adjust the frame index of the VRSAVE spill slot. 2025 for (unsigned i = 0, e = CSI.size(); i != e; ++i) { 2026 unsigned Reg = CSI[i].getReg(); 2027 2028 if (PPC::VRSAVERCRegClass.contains(Reg)) { 2029 int FI = CSI[i].getFrameIdx(); 2030 2031 MFI.setObjectOffset(FI, LowerBound + MFI.getObjectOffset(FI)); 2032 } 2033 } 2034 2035 LowerBound -= 4; // The VRSAVE save area is always 4 bytes long. 2036 } 2037 2038 // Both Altivec and SPE have the same alignment and padding requirements 2039 // within the stack frame. 2040 if (HasVRSaveArea) { 2041 // Insert alignment padding, we need 16-byte alignment. Note: for positive 2042 // number the alignment formula is : y = (x + (n-1)) & (~(n-1)). But since 2043 // we are using negative number here (the stack grows downward). We should 2044 // use formula : y = x & (~(n-1)). Where x is the size before aligning, n 2045 // is the alignment size ( n = 16 here) and y is the size after aligning. 2046 assert(LowerBound <= 0 && "Expect LowerBound have a non-positive value!"); 2047 LowerBound &= ~(15); 2048 2049 for (unsigned i = 0, e = VRegs.size(); i != e; ++i) { 2050 int FI = VRegs[i].getFrameIdx(); 2051 2052 MFI.setObjectOffset(FI, LowerBound + MFI.getObjectOffset(FI)); 2053 } 2054 } 2055 2056 addScavengingSpillSlot(MF, RS); 2057 } 2058 2059 void 2060 PPCFrameLowering::addScavengingSpillSlot(MachineFunction &MF, 2061 RegScavenger *RS) const { 2062 // Reserve a slot closest to SP or frame pointer if we have a dynalloc or 2063 // a large stack, which will require scavenging a register to materialize a 2064 // large offset. 2065 2066 // We need to have a scavenger spill slot for spills if the frame size is 2067 // large. In case there is no free register for large-offset addressing, 2068 // this slot is used for the necessary emergency spill. Also, we need the 2069 // slot for dynamic stack allocations. 2070 2071 // The scavenger might be invoked if the frame offset does not fit into 2072 // the 16-bit immediate. We don't know the complete frame size here 2073 // because we've not yet computed callee-saved register spills or the 2074 // needed alignment padding. 2075 unsigned StackSize = determineFrameLayout(MF, true); 2076 MachineFrameInfo &MFI = MF.getFrameInfo(); 2077 if (MFI.hasVarSizedObjects() || spillsCR(MF) || spillsVRSAVE(MF) || 2078 hasNonRISpills(MF) || (hasSpills(MF) && !isInt<16>(StackSize))) { 2079 const TargetRegisterClass &GPRC = PPC::GPRCRegClass; 2080 const TargetRegisterClass &G8RC = PPC::G8RCRegClass; 2081 const TargetRegisterClass &RC = Subtarget.isPPC64() ? G8RC : GPRC; 2082 const TargetRegisterInfo &TRI = *Subtarget.getRegisterInfo(); 2083 unsigned Size = TRI.getSpillSize(RC); 2084 unsigned Align = TRI.getSpillAlignment(RC); 2085 RS->addScavengingFrameIndex(MFI.CreateStackObject(Size, Align, false)); 2086 2087 // Might we have over-aligned allocas? 2088 bool HasAlVars = 2089 MFI.hasVarSizedObjects() && MFI.getMaxAlign() > getStackAlign(); 2090 2091 // These kinds of spills might need two registers. 2092 if (spillsCR(MF) || spillsVRSAVE(MF) || HasAlVars) 2093 RS->addScavengingFrameIndex(MFI.CreateStackObject(Size, Align, false)); 2094 2095 } 2096 } 2097 2098 // This function checks if a callee saved gpr can be spilled to a volatile 2099 // vector register. This occurs for leaf functions when the option 2100 // ppc-enable-pe-vector-spills is enabled. If there are any remaining registers 2101 // which were not spilled to vectors, return false so the target independent 2102 // code can handle them by assigning a FrameIdx to a stack slot. 2103 bool PPCFrameLowering::assignCalleeSavedSpillSlots( 2104 MachineFunction &MF, const TargetRegisterInfo *TRI, 2105 std::vector<CalleeSavedInfo> &CSI) const { 2106 2107 if (CSI.empty()) 2108 return true; // Early exit if no callee saved registers are modified! 2109 2110 // Early exit if cannot spill gprs to volatile vector registers. 2111 MachineFrameInfo &MFI = MF.getFrameInfo(); 2112 if (!EnablePEVectorSpills || MFI.hasCalls() || !Subtarget.hasP9Vector()) 2113 return false; 2114 2115 // Build a BitVector of VSRs that can be used for spilling GPRs. 2116 BitVector BVAllocatable = TRI->getAllocatableSet(MF); 2117 BitVector BVCalleeSaved(TRI->getNumRegs()); 2118 const PPCRegisterInfo *RegInfo = Subtarget.getRegisterInfo(); 2119 const MCPhysReg *CSRegs = RegInfo->getCalleeSavedRegs(&MF); 2120 for (unsigned i = 0; CSRegs[i]; ++i) 2121 BVCalleeSaved.set(CSRegs[i]); 2122 2123 for (unsigned Reg : BVAllocatable.set_bits()) { 2124 // Set to 0 if the register is not a volatile VF/F8 register, or if it is 2125 // used in the function. 2126 if (BVCalleeSaved[Reg] || 2127 (!PPC::F8RCRegClass.contains(Reg) && 2128 !PPC::VFRCRegClass.contains(Reg)) || 2129 (MF.getRegInfo().isPhysRegUsed(Reg))) 2130 BVAllocatable.reset(Reg); 2131 } 2132 2133 bool AllSpilledToReg = true; 2134 for (auto &CS : CSI) { 2135 if (BVAllocatable.none()) 2136 return false; 2137 2138 unsigned Reg = CS.getReg(); 2139 if (!PPC::G8RCRegClass.contains(Reg) && !PPC::GPRCRegClass.contains(Reg)) { 2140 AllSpilledToReg = false; 2141 continue; 2142 } 2143 2144 unsigned VolatileVFReg = BVAllocatable.find_first(); 2145 if (VolatileVFReg < BVAllocatable.size()) { 2146 CS.setDstReg(VolatileVFReg); 2147 BVAllocatable.reset(VolatileVFReg); 2148 } else { 2149 AllSpilledToReg = false; 2150 } 2151 } 2152 return AllSpilledToReg; 2153 } 2154 2155 bool PPCFrameLowering::spillCalleeSavedRegisters( 2156 MachineBasicBlock &MBB, MachineBasicBlock::iterator MI, 2157 ArrayRef<CalleeSavedInfo> CSI, const TargetRegisterInfo *TRI) const { 2158 2159 MachineFunction *MF = MBB.getParent(); 2160 const PPCInstrInfo &TII = *Subtarget.getInstrInfo(); 2161 PPCFunctionInfo *FI = MF->getInfo<PPCFunctionInfo>(); 2162 bool MustSaveTOC = FI->mustSaveTOC(); 2163 DebugLoc DL; 2164 bool CRSpilled = false; 2165 MachineInstrBuilder CRMIB; 2166 2167 for (unsigned i = 0, e = CSI.size(); i != e; ++i) { 2168 unsigned Reg = CSI[i].getReg(); 2169 // VRSAVE can appear here if, for example, @llvm.eh.unwind.init() is used. 2170 if (Reg == PPC::VRSAVE) 2171 continue; 2172 2173 // CR2 through CR4 are the nonvolatile CR fields. 2174 bool IsCRField = PPC::CR2 <= Reg && Reg <= PPC::CR4; 2175 2176 // Add the callee-saved register as live-in; it's killed at the spill. 2177 // Do not do this for callee-saved registers that are live-in to the 2178 // function because they will already be marked live-in and this will be 2179 // adding it for a second time. It is an error to add the same register 2180 // to the set more than once. 2181 const MachineRegisterInfo &MRI = MF->getRegInfo(); 2182 bool IsLiveIn = MRI.isLiveIn(Reg); 2183 if (!IsLiveIn) 2184 MBB.addLiveIn(Reg); 2185 2186 if (CRSpilled && IsCRField) { 2187 CRMIB.addReg(Reg, RegState::ImplicitKill); 2188 continue; 2189 } 2190 2191 // The actual spill will happen in the prologue. 2192 if ((Reg == PPC::X2 || Reg == PPC::R2) && MustSaveTOC) 2193 continue; 2194 2195 // Insert the spill to the stack frame. 2196 if (IsCRField) { 2197 PPCFunctionInfo *FuncInfo = MF->getInfo<PPCFunctionInfo>(); 2198 if (!Subtarget.is32BitELFABI()) { 2199 // The actual spill will happen at the start of the prologue. 2200 FuncInfo->addMustSaveCR(Reg); 2201 } else { 2202 CRSpilled = true; 2203 FuncInfo->setSpillsCR(); 2204 2205 // 32-bit: FP-relative. Note that we made sure CR2-CR4 all have 2206 // the same frame index in PPCRegisterInfo::hasReservedSpillSlot. 2207 CRMIB = BuildMI(*MF, DL, TII.get(PPC::MFCR), PPC::R12) 2208 .addReg(Reg, RegState::ImplicitKill); 2209 2210 MBB.insert(MI, CRMIB); 2211 MBB.insert(MI, addFrameReference(BuildMI(*MF, DL, TII.get(PPC::STW)) 2212 .addReg(PPC::R12, 2213 getKillRegState(true)), 2214 CSI[i].getFrameIdx())); 2215 } 2216 } else { 2217 if (CSI[i].isSpilledToReg()) { 2218 NumPESpillVSR++; 2219 BuildMI(MBB, MI, DL, TII.get(PPC::MTVSRD), CSI[i].getDstReg()) 2220 .addReg(Reg, getKillRegState(true)); 2221 } else { 2222 const TargetRegisterClass *RC = TRI->getMinimalPhysRegClass(Reg); 2223 // Use !IsLiveIn for the kill flag. 2224 // We do not want to kill registers that are live in this function 2225 // before their use because they will become undefined registers. 2226 // Functions without NoUnwind need to preserve the order of elements in 2227 // saved vector registers. 2228 if (Subtarget.needsSwapsForVSXMemOps() && 2229 !MF->getFunction().hasFnAttribute(Attribute::NoUnwind)) 2230 TII.storeRegToStackSlotNoUpd(MBB, MI, Reg, !IsLiveIn, 2231 CSI[i].getFrameIdx(), RC, TRI); 2232 else 2233 TII.storeRegToStackSlot(MBB, MI, Reg, !IsLiveIn, CSI[i].getFrameIdx(), 2234 RC, TRI); 2235 } 2236 } 2237 } 2238 return true; 2239 } 2240 2241 static void restoreCRs(bool is31, bool CR2Spilled, bool CR3Spilled, 2242 bool CR4Spilled, MachineBasicBlock &MBB, 2243 MachineBasicBlock::iterator MI, 2244 ArrayRef<CalleeSavedInfo> CSI, unsigned CSIIndex) { 2245 2246 MachineFunction *MF = MBB.getParent(); 2247 const PPCInstrInfo &TII = *MF->getSubtarget<PPCSubtarget>().getInstrInfo(); 2248 DebugLoc DL; 2249 unsigned MoveReg = PPC::R12; 2250 2251 // 32-bit: FP-relative 2252 MBB.insert(MI, 2253 addFrameReference(BuildMI(*MF, DL, TII.get(PPC::LWZ), MoveReg), 2254 CSI[CSIIndex].getFrameIdx())); 2255 2256 unsigned RestoreOp = PPC::MTOCRF; 2257 if (CR2Spilled) 2258 MBB.insert(MI, BuildMI(*MF, DL, TII.get(RestoreOp), PPC::CR2) 2259 .addReg(MoveReg, getKillRegState(!CR3Spilled && !CR4Spilled))); 2260 2261 if (CR3Spilled) 2262 MBB.insert(MI, BuildMI(*MF, DL, TII.get(RestoreOp), PPC::CR3) 2263 .addReg(MoveReg, getKillRegState(!CR4Spilled))); 2264 2265 if (CR4Spilled) 2266 MBB.insert(MI, BuildMI(*MF, DL, TII.get(RestoreOp), PPC::CR4) 2267 .addReg(MoveReg, getKillRegState(true))); 2268 } 2269 2270 MachineBasicBlock::iterator PPCFrameLowering:: 2271 eliminateCallFramePseudoInstr(MachineFunction &MF, MachineBasicBlock &MBB, 2272 MachineBasicBlock::iterator I) const { 2273 const TargetInstrInfo &TII = *Subtarget.getInstrInfo(); 2274 if (MF.getTarget().Options.GuaranteedTailCallOpt && 2275 I->getOpcode() == PPC::ADJCALLSTACKUP) { 2276 // Add (actually subtract) back the amount the callee popped on return. 2277 if (int CalleeAmt = I->getOperand(1).getImm()) { 2278 bool is64Bit = Subtarget.isPPC64(); 2279 CalleeAmt *= -1; 2280 unsigned StackReg = is64Bit ? PPC::X1 : PPC::R1; 2281 unsigned TmpReg = is64Bit ? PPC::X0 : PPC::R0; 2282 unsigned ADDIInstr = is64Bit ? PPC::ADDI8 : PPC::ADDI; 2283 unsigned ADDInstr = is64Bit ? PPC::ADD8 : PPC::ADD4; 2284 unsigned LISInstr = is64Bit ? PPC::LIS8 : PPC::LIS; 2285 unsigned ORIInstr = is64Bit ? PPC::ORI8 : PPC::ORI; 2286 const DebugLoc &dl = I->getDebugLoc(); 2287 2288 if (isInt<16>(CalleeAmt)) { 2289 BuildMI(MBB, I, dl, TII.get(ADDIInstr), StackReg) 2290 .addReg(StackReg, RegState::Kill) 2291 .addImm(CalleeAmt); 2292 } else { 2293 MachineBasicBlock::iterator MBBI = I; 2294 BuildMI(MBB, MBBI, dl, TII.get(LISInstr), TmpReg) 2295 .addImm(CalleeAmt >> 16); 2296 BuildMI(MBB, MBBI, dl, TII.get(ORIInstr), TmpReg) 2297 .addReg(TmpReg, RegState::Kill) 2298 .addImm(CalleeAmt & 0xFFFF); 2299 BuildMI(MBB, MBBI, dl, TII.get(ADDInstr), StackReg) 2300 .addReg(StackReg, RegState::Kill) 2301 .addReg(TmpReg); 2302 } 2303 } 2304 } 2305 // Simply discard ADJCALLSTACKDOWN, ADJCALLSTACKUP instructions. 2306 return MBB.erase(I); 2307 } 2308 2309 static bool isCalleeSavedCR(unsigned Reg) { 2310 return PPC::CR2 == Reg || Reg == PPC::CR3 || Reg == PPC::CR4; 2311 } 2312 2313 bool PPCFrameLowering::restoreCalleeSavedRegisters( 2314 MachineBasicBlock &MBB, MachineBasicBlock::iterator MI, 2315 MutableArrayRef<CalleeSavedInfo> CSI, const TargetRegisterInfo *TRI) const { 2316 MachineFunction *MF = MBB.getParent(); 2317 const PPCInstrInfo &TII = *Subtarget.getInstrInfo(); 2318 PPCFunctionInfo *FI = MF->getInfo<PPCFunctionInfo>(); 2319 bool MustSaveTOC = FI->mustSaveTOC(); 2320 bool CR2Spilled = false; 2321 bool CR3Spilled = false; 2322 bool CR4Spilled = false; 2323 unsigned CSIIndex = 0; 2324 2325 // Initialize insertion-point logic; we will be restoring in reverse 2326 // order of spill. 2327 MachineBasicBlock::iterator I = MI, BeforeI = I; 2328 bool AtStart = I == MBB.begin(); 2329 2330 if (!AtStart) 2331 --BeforeI; 2332 2333 for (unsigned i = 0, e = CSI.size(); i != e; ++i) { 2334 unsigned Reg = CSI[i].getReg(); 2335 2336 // VRSAVE can appear here if, for example, @llvm.eh.unwind.init() is used. 2337 if (Reg == PPC::VRSAVE) 2338 continue; 2339 2340 if ((Reg == PPC::X2 || Reg == PPC::R2) && MustSaveTOC) 2341 continue; 2342 2343 // Restore of callee saved condition register field is handled during 2344 // epilogue insertion. 2345 if (isCalleeSavedCR(Reg) && !Subtarget.is32BitELFABI()) 2346 continue; 2347 2348 if (Reg == PPC::CR2) { 2349 CR2Spilled = true; 2350 // The spill slot is associated only with CR2, which is the 2351 // first nonvolatile spilled. Save it here. 2352 CSIIndex = i; 2353 continue; 2354 } else if (Reg == PPC::CR3) { 2355 CR3Spilled = true; 2356 continue; 2357 } else if (Reg == PPC::CR4) { 2358 CR4Spilled = true; 2359 continue; 2360 } else { 2361 // On 32-bit ELF when we first encounter a non-CR register after seeing at 2362 // least one CR register, restore all spilled CRs together. 2363 if (CR2Spilled || CR3Spilled || CR4Spilled) { 2364 bool is31 = needsFP(*MF); 2365 restoreCRs(is31, CR2Spilled, CR3Spilled, CR4Spilled, MBB, I, CSI, 2366 CSIIndex); 2367 CR2Spilled = CR3Spilled = CR4Spilled = false; 2368 } 2369 2370 if (CSI[i].isSpilledToReg()) { 2371 DebugLoc DL; 2372 NumPEReloadVSR++; 2373 BuildMI(MBB, I, DL, TII.get(PPC::MFVSRD), Reg) 2374 .addReg(CSI[i].getDstReg(), getKillRegState(true)); 2375 } else { 2376 // Default behavior for non-CR saves. 2377 const TargetRegisterClass *RC = TRI->getMinimalPhysRegClass(Reg); 2378 2379 // Functions without NoUnwind need to preserve the order of elements in 2380 // saved vector registers. 2381 if (Subtarget.needsSwapsForVSXMemOps() && 2382 !MF->getFunction().hasFnAttribute(Attribute::NoUnwind)) 2383 TII.loadRegFromStackSlotNoUpd(MBB, I, Reg, CSI[i].getFrameIdx(), RC, 2384 TRI); 2385 else 2386 TII.loadRegFromStackSlot(MBB, I, Reg, CSI[i].getFrameIdx(), RC, TRI); 2387 2388 assert(I != MBB.begin() && 2389 "loadRegFromStackSlot didn't insert any code!"); 2390 } 2391 } 2392 2393 // Insert in reverse order. 2394 if (AtStart) 2395 I = MBB.begin(); 2396 else { 2397 I = BeforeI; 2398 ++I; 2399 } 2400 } 2401 2402 // If we haven't yet spilled the CRs, do so now. 2403 if (CR2Spilled || CR3Spilled || CR4Spilled) { 2404 assert(Subtarget.is32BitELFABI() && 2405 "Only set CR[2|3|4]Spilled on 32-bit SVR4."); 2406 bool is31 = needsFP(*MF); 2407 restoreCRs(is31, CR2Spilled, CR3Spilled, CR4Spilled, MBB, I, CSI, CSIIndex); 2408 } 2409 2410 return true; 2411 } 2412 2413 unsigned PPCFrameLowering::getTOCSaveOffset() const { 2414 return TOCSaveOffset; 2415 } 2416 2417 unsigned PPCFrameLowering::getFramePointerSaveOffset() const { 2418 return FramePointerSaveOffset; 2419 } 2420 2421 unsigned PPCFrameLowering::getBasePointerSaveOffset() const { 2422 if (Subtarget.isAIXABI()) 2423 report_fatal_error("BasePointer is not implemented on AIX yet."); 2424 return BasePointerSaveOffset; 2425 } 2426 2427 bool PPCFrameLowering::enableShrinkWrapping(const MachineFunction &MF) const { 2428 if (MF.getInfo<PPCFunctionInfo>()->shrinkWrapDisabled()) 2429 return false; 2430 return (MF.getSubtarget<PPCSubtarget>().isSVR4ABI() && 2431 MF.getSubtarget<PPCSubtarget>().isPPC64()); 2432 } 2433