1 //===-- PPCFrameLowering.cpp - PPC Frame Information ----------------------===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 // 9 // This file contains the PPC implementation of TargetFrameLowering class. 10 // 11 //===----------------------------------------------------------------------===// 12 13 #include "MCTargetDesc/PPCPredicates.h" 14 #include "PPCFrameLowering.h" 15 #include "PPCInstrBuilder.h" 16 #include "PPCInstrInfo.h" 17 #include "PPCMachineFunctionInfo.h" 18 #include "PPCSubtarget.h" 19 #include "PPCTargetMachine.h" 20 #include "llvm/ADT/Statistic.h" 21 #include "llvm/CodeGen/MachineFrameInfo.h" 22 #include "llvm/CodeGen/MachineFunction.h" 23 #include "llvm/CodeGen/MachineInstrBuilder.h" 24 #include "llvm/CodeGen/MachineModuleInfo.h" 25 #include "llvm/CodeGen/MachineRegisterInfo.h" 26 #include "llvm/CodeGen/RegisterScavenging.h" 27 #include "llvm/IR/Function.h" 28 #include "llvm/Target/TargetOptions.h" 29 30 using namespace llvm; 31 32 #define DEBUG_TYPE "framelowering" 33 STATISTIC(NumPESpillVSR, "Number of spills to vector in prologue"); 34 STATISTIC(NumPEReloadVSR, "Number of reloads from vector in epilogue"); 35 STATISTIC(NumPrologProbed, "Number of prologues probed"); 36 37 static cl::opt<bool> 38 EnablePEVectorSpills("ppc-enable-pe-vector-spills", 39 cl::desc("Enable spills in prologue to vector registers."), 40 cl::init(false), cl::Hidden); 41 42 /// VRRegNo - Map from a numbered VR register to its enum value. 43 /// 44 static const MCPhysReg VRRegNo[] = { 45 PPC::V0 , PPC::V1 , PPC::V2 , PPC::V3 , PPC::V4 , PPC::V5 , PPC::V6 , PPC::V7 , 46 PPC::V8 , PPC::V9 , PPC::V10, PPC::V11, PPC::V12, PPC::V13, PPC::V14, PPC::V15, 47 PPC::V16, PPC::V17, PPC::V18, PPC::V19, PPC::V20, PPC::V21, PPC::V22, PPC::V23, 48 PPC::V24, PPC::V25, PPC::V26, PPC::V27, PPC::V28, PPC::V29, PPC::V30, PPC::V31 49 }; 50 51 static unsigned computeReturnSaveOffset(const PPCSubtarget &STI) { 52 if (STI.isAIXABI()) 53 return STI.isPPC64() ? 16 : 8; 54 // SVR4 ABI: 55 return STI.isPPC64() ? 16 : 4; 56 } 57 58 static unsigned computeTOCSaveOffset(const PPCSubtarget &STI) { 59 if (STI.isAIXABI()) 60 return STI.isPPC64() ? 40 : 20; 61 return STI.isELFv2ABI() ? 24 : 40; 62 } 63 64 static unsigned computeFramePointerSaveOffset(const PPCSubtarget &STI) { 65 // First slot in the general register save area. 66 return STI.isPPC64() ? -8U : -4U; 67 } 68 69 static unsigned computeLinkageSize(const PPCSubtarget &STI) { 70 if (STI.isAIXABI() || STI.isPPC64()) 71 return (STI.isELFv2ABI() ? 4 : 6) * (STI.isPPC64() ? 8 : 4); 72 73 // 32-bit SVR4 ABI: 74 return 8; 75 } 76 77 static unsigned computeBasePointerSaveOffset(const PPCSubtarget &STI) { 78 // Third slot in the general purpose register save area. 79 if (STI.is32BitELFABI() && STI.getTargetMachine().isPositionIndependent()) 80 return -12U; 81 82 // Second slot in the general purpose register save area. 83 return STI.isPPC64() ? -16U : -8U; 84 } 85 86 static unsigned computeCRSaveOffset(const PPCSubtarget &STI) { 87 return (STI.isAIXABI() && !STI.isPPC64()) ? 4 : 8; 88 } 89 90 PPCFrameLowering::PPCFrameLowering(const PPCSubtarget &STI) 91 : TargetFrameLowering(TargetFrameLowering::StackGrowsDown, 92 STI.getPlatformStackAlignment(), 0), 93 Subtarget(STI), ReturnSaveOffset(computeReturnSaveOffset(Subtarget)), 94 TOCSaveOffset(computeTOCSaveOffset(Subtarget)), 95 FramePointerSaveOffset(computeFramePointerSaveOffset(Subtarget)), 96 LinkageSize(computeLinkageSize(Subtarget)), 97 BasePointerSaveOffset(computeBasePointerSaveOffset(Subtarget)), 98 CRSaveOffset(computeCRSaveOffset(Subtarget)) {} 99 100 // With the SVR4 ABI, callee-saved registers have fixed offsets on the stack. 101 const PPCFrameLowering::SpillSlot *PPCFrameLowering::getCalleeSavedSpillSlots( 102 unsigned &NumEntries) const { 103 104 // Floating-point register save area offsets. 105 #define CALLEE_SAVED_FPRS \ 106 {PPC::F31, -8}, \ 107 {PPC::F30, -16}, \ 108 {PPC::F29, -24}, \ 109 {PPC::F28, -32}, \ 110 {PPC::F27, -40}, \ 111 {PPC::F26, -48}, \ 112 {PPC::F25, -56}, \ 113 {PPC::F24, -64}, \ 114 {PPC::F23, -72}, \ 115 {PPC::F22, -80}, \ 116 {PPC::F21, -88}, \ 117 {PPC::F20, -96}, \ 118 {PPC::F19, -104}, \ 119 {PPC::F18, -112}, \ 120 {PPC::F17, -120}, \ 121 {PPC::F16, -128}, \ 122 {PPC::F15, -136}, \ 123 {PPC::F14, -144} 124 125 // 32-bit general purpose register save area offsets shared by ELF and 126 // AIX. AIX has an extra CSR with r13. 127 #define CALLEE_SAVED_GPRS32 \ 128 {PPC::R31, -4}, \ 129 {PPC::R30, -8}, \ 130 {PPC::R29, -12}, \ 131 {PPC::R28, -16}, \ 132 {PPC::R27, -20}, \ 133 {PPC::R26, -24}, \ 134 {PPC::R25, -28}, \ 135 {PPC::R24, -32}, \ 136 {PPC::R23, -36}, \ 137 {PPC::R22, -40}, \ 138 {PPC::R21, -44}, \ 139 {PPC::R20, -48}, \ 140 {PPC::R19, -52}, \ 141 {PPC::R18, -56}, \ 142 {PPC::R17, -60}, \ 143 {PPC::R16, -64}, \ 144 {PPC::R15, -68}, \ 145 {PPC::R14, -72} 146 147 // 64-bit general purpose register save area offsets. 148 #define CALLEE_SAVED_GPRS64 \ 149 {PPC::X31, -8}, \ 150 {PPC::X30, -16}, \ 151 {PPC::X29, -24}, \ 152 {PPC::X28, -32}, \ 153 {PPC::X27, -40}, \ 154 {PPC::X26, -48}, \ 155 {PPC::X25, -56}, \ 156 {PPC::X24, -64}, \ 157 {PPC::X23, -72}, \ 158 {PPC::X22, -80}, \ 159 {PPC::X21, -88}, \ 160 {PPC::X20, -96}, \ 161 {PPC::X19, -104}, \ 162 {PPC::X18, -112}, \ 163 {PPC::X17, -120}, \ 164 {PPC::X16, -128}, \ 165 {PPC::X15, -136}, \ 166 {PPC::X14, -144} 167 168 // Vector register save area offsets. 169 #define CALLEE_SAVED_VRS \ 170 {PPC::V31, -16}, \ 171 {PPC::V30, -32}, \ 172 {PPC::V29, -48}, \ 173 {PPC::V28, -64}, \ 174 {PPC::V27, -80}, \ 175 {PPC::V26, -96}, \ 176 {PPC::V25, -112}, \ 177 {PPC::V24, -128}, \ 178 {PPC::V23, -144}, \ 179 {PPC::V22, -160}, \ 180 {PPC::V21, -176}, \ 181 {PPC::V20, -192} 182 183 // Note that the offsets here overlap, but this is fixed up in 184 // processFunctionBeforeFrameFinalized. 185 186 static const SpillSlot ELFOffsets32[] = { 187 CALLEE_SAVED_FPRS, 188 CALLEE_SAVED_GPRS32, 189 190 // CR save area offset. We map each of the nonvolatile CR fields 191 // to the slot for CR2, which is the first of the nonvolatile CR 192 // fields to be assigned, so that we only allocate one save slot. 193 // See PPCRegisterInfo::hasReservedSpillSlot() for more information. 194 {PPC::CR2, -4}, 195 196 // VRSAVE save area offset. 197 {PPC::VRSAVE, -4}, 198 199 CALLEE_SAVED_VRS, 200 201 // SPE register save area (overlaps Vector save area). 202 {PPC::S31, -8}, 203 {PPC::S30, -16}, 204 {PPC::S29, -24}, 205 {PPC::S28, -32}, 206 {PPC::S27, -40}, 207 {PPC::S26, -48}, 208 {PPC::S25, -56}, 209 {PPC::S24, -64}, 210 {PPC::S23, -72}, 211 {PPC::S22, -80}, 212 {PPC::S21, -88}, 213 {PPC::S20, -96}, 214 {PPC::S19, -104}, 215 {PPC::S18, -112}, 216 {PPC::S17, -120}, 217 {PPC::S16, -128}, 218 {PPC::S15, -136}, 219 {PPC::S14, -144}}; 220 221 static const SpillSlot ELFOffsets64[] = { 222 CALLEE_SAVED_FPRS, 223 CALLEE_SAVED_GPRS64, 224 225 // VRSAVE save area offset. 226 {PPC::VRSAVE, -4}, 227 CALLEE_SAVED_VRS 228 }; 229 230 static const SpillSlot AIXOffsets32[] = { 231 CALLEE_SAVED_FPRS, 232 CALLEE_SAVED_GPRS32, 233 // Add AIX's extra CSR. 234 {PPC::R13, -76}, 235 // TODO: Update when we add vector support for AIX. 236 }; 237 238 static const SpillSlot AIXOffsets64[] = { 239 CALLEE_SAVED_FPRS, 240 CALLEE_SAVED_GPRS64, 241 // TODO: Update when we add vector support for AIX. 242 }; 243 244 if (Subtarget.is64BitELFABI()) { 245 NumEntries = array_lengthof(ELFOffsets64); 246 return ELFOffsets64; 247 } 248 249 if (Subtarget.is32BitELFABI()) { 250 NumEntries = array_lengthof(ELFOffsets32); 251 return ELFOffsets32; 252 } 253 254 assert(Subtarget.isAIXABI() && "Unexpected ABI."); 255 256 if (Subtarget.isPPC64()) { 257 NumEntries = array_lengthof(AIXOffsets64); 258 return AIXOffsets64; 259 } 260 261 NumEntries = array_lengthof(AIXOffsets32); 262 return AIXOffsets32; 263 } 264 265 /// RemoveVRSaveCode - We have found that this function does not need any code 266 /// to manipulate the VRSAVE register, even though it uses vector registers. 267 /// This can happen when the only registers used are known to be live in or out 268 /// of the function. Remove all of the VRSAVE related code from the function. 269 /// FIXME: The removal of the code results in a compile failure at -O0 when the 270 /// function contains a function call, as the GPR containing original VRSAVE 271 /// contents is spilled and reloaded around the call. Without the prolog code, 272 /// the spill instruction refers to an undefined register. This code needs 273 /// to account for all uses of that GPR. 274 static void RemoveVRSaveCode(MachineInstr &MI) { 275 MachineBasicBlock *Entry = MI.getParent(); 276 MachineFunction *MF = Entry->getParent(); 277 278 // We know that the MTVRSAVE instruction immediately follows MI. Remove it. 279 MachineBasicBlock::iterator MBBI = MI; 280 ++MBBI; 281 assert(MBBI != Entry->end() && MBBI->getOpcode() == PPC::MTVRSAVE); 282 MBBI->eraseFromParent(); 283 284 bool RemovedAllMTVRSAVEs = true; 285 // See if we can find and remove the MTVRSAVE instruction from all of the 286 // epilog blocks. 287 for (MachineFunction::iterator I = MF->begin(), E = MF->end(); I != E; ++I) { 288 // If last instruction is a return instruction, add an epilogue 289 if (I->isReturnBlock()) { 290 bool FoundIt = false; 291 for (MBBI = I->end(); MBBI != I->begin(); ) { 292 --MBBI; 293 if (MBBI->getOpcode() == PPC::MTVRSAVE) { 294 MBBI->eraseFromParent(); // remove it. 295 FoundIt = true; 296 break; 297 } 298 } 299 RemovedAllMTVRSAVEs &= FoundIt; 300 } 301 } 302 303 // If we found and removed all MTVRSAVE instructions, remove the read of 304 // VRSAVE as well. 305 if (RemovedAllMTVRSAVEs) { 306 MBBI = MI; 307 assert(MBBI != Entry->begin() && "UPDATE_VRSAVE is first instr in block?"); 308 --MBBI; 309 assert(MBBI->getOpcode() == PPC::MFVRSAVE && "VRSAVE instrs wandered?"); 310 MBBI->eraseFromParent(); 311 } 312 313 // Finally, nuke the UPDATE_VRSAVE. 314 MI.eraseFromParent(); 315 } 316 317 // HandleVRSaveUpdate - MI is the UPDATE_VRSAVE instruction introduced by the 318 // instruction selector. Based on the vector registers that have been used, 319 // transform this into the appropriate ORI instruction. 320 static void HandleVRSaveUpdate(MachineInstr &MI, const TargetInstrInfo &TII) { 321 MachineFunction *MF = MI.getParent()->getParent(); 322 const TargetRegisterInfo *TRI = MF->getSubtarget().getRegisterInfo(); 323 DebugLoc dl = MI.getDebugLoc(); 324 325 const MachineRegisterInfo &MRI = MF->getRegInfo(); 326 unsigned UsedRegMask = 0; 327 for (unsigned i = 0; i != 32; ++i) 328 if (MRI.isPhysRegModified(VRRegNo[i])) 329 UsedRegMask |= 1 << (31-i); 330 331 // Live in and live out values already must be in the mask, so don't bother 332 // marking them. 333 for (std::pair<unsigned, unsigned> LI : MF->getRegInfo().liveins()) { 334 unsigned RegNo = TRI->getEncodingValue(LI.first); 335 if (VRRegNo[RegNo] == LI.first) // If this really is a vector reg. 336 UsedRegMask &= ~(1 << (31-RegNo)); // Doesn't need to be marked. 337 } 338 339 // Live out registers appear as use operands on return instructions. 340 for (MachineFunction::const_iterator BI = MF->begin(), BE = MF->end(); 341 UsedRegMask != 0 && BI != BE; ++BI) { 342 const MachineBasicBlock &MBB = *BI; 343 if (!MBB.isReturnBlock()) 344 continue; 345 const MachineInstr &Ret = MBB.back(); 346 for (unsigned I = 0, E = Ret.getNumOperands(); I != E; ++I) { 347 const MachineOperand &MO = Ret.getOperand(I); 348 if (!MO.isReg() || !PPC::VRRCRegClass.contains(MO.getReg())) 349 continue; 350 unsigned RegNo = TRI->getEncodingValue(MO.getReg()); 351 UsedRegMask &= ~(1 << (31-RegNo)); 352 } 353 } 354 355 // If no registers are used, turn this into a copy. 356 if (UsedRegMask == 0) { 357 // Remove all VRSAVE code. 358 RemoveVRSaveCode(MI); 359 return; 360 } 361 362 Register SrcReg = MI.getOperand(1).getReg(); 363 Register DstReg = MI.getOperand(0).getReg(); 364 365 if ((UsedRegMask & 0xFFFF) == UsedRegMask) { 366 if (DstReg != SrcReg) 367 BuildMI(*MI.getParent(), MI, dl, TII.get(PPC::ORI), DstReg) 368 .addReg(SrcReg) 369 .addImm(UsedRegMask); 370 else 371 BuildMI(*MI.getParent(), MI, dl, TII.get(PPC::ORI), DstReg) 372 .addReg(SrcReg, RegState::Kill) 373 .addImm(UsedRegMask); 374 } else if ((UsedRegMask & 0xFFFF0000) == UsedRegMask) { 375 if (DstReg != SrcReg) 376 BuildMI(*MI.getParent(), MI, dl, TII.get(PPC::ORIS), DstReg) 377 .addReg(SrcReg) 378 .addImm(UsedRegMask >> 16); 379 else 380 BuildMI(*MI.getParent(), MI, dl, TII.get(PPC::ORIS), DstReg) 381 .addReg(SrcReg, RegState::Kill) 382 .addImm(UsedRegMask >> 16); 383 } else { 384 if (DstReg != SrcReg) 385 BuildMI(*MI.getParent(), MI, dl, TII.get(PPC::ORIS), DstReg) 386 .addReg(SrcReg) 387 .addImm(UsedRegMask >> 16); 388 else 389 BuildMI(*MI.getParent(), MI, dl, TII.get(PPC::ORIS), DstReg) 390 .addReg(SrcReg, RegState::Kill) 391 .addImm(UsedRegMask >> 16); 392 393 BuildMI(*MI.getParent(), MI, dl, TII.get(PPC::ORI), DstReg) 394 .addReg(DstReg, RegState::Kill) 395 .addImm(UsedRegMask & 0xFFFF); 396 } 397 398 // Remove the old UPDATE_VRSAVE instruction. 399 MI.eraseFromParent(); 400 } 401 402 static bool spillsCR(const MachineFunction &MF) { 403 const PPCFunctionInfo *FuncInfo = MF.getInfo<PPCFunctionInfo>(); 404 return FuncInfo->isCRSpilled(); 405 } 406 407 static bool spillsVRSAVE(const MachineFunction &MF) { 408 const PPCFunctionInfo *FuncInfo = MF.getInfo<PPCFunctionInfo>(); 409 return FuncInfo->isVRSAVESpilled(); 410 } 411 412 static bool hasSpills(const MachineFunction &MF) { 413 const PPCFunctionInfo *FuncInfo = MF.getInfo<PPCFunctionInfo>(); 414 return FuncInfo->hasSpills(); 415 } 416 417 static bool hasNonRISpills(const MachineFunction &MF) { 418 const PPCFunctionInfo *FuncInfo = MF.getInfo<PPCFunctionInfo>(); 419 return FuncInfo->hasNonRISpills(); 420 } 421 422 /// MustSaveLR - Return true if this function requires that we save the LR 423 /// register onto the stack in the prolog and restore it in the epilog of the 424 /// function. 425 static bool MustSaveLR(const MachineFunction &MF, unsigned LR) { 426 const PPCFunctionInfo *MFI = MF.getInfo<PPCFunctionInfo>(); 427 428 // We need a save/restore of LR if there is any def of LR (which is 429 // defined by calls, including the PIC setup sequence), or if there is 430 // some use of the LR stack slot (e.g. for builtin_return_address). 431 // (LR comes in 32 and 64 bit versions.) 432 MachineRegisterInfo::def_iterator RI = MF.getRegInfo().def_begin(LR); 433 return RI !=MF.getRegInfo().def_end() || MFI->isLRStoreRequired(); 434 } 435 436 /// determineFrameLayoutAndUpdate - Determine the size of the frame and maximum 437 /// call frame size. Update the MachineFunction object with the stack size. 438 unsigned 439 PPCFrameLowering::determineFrameLayoutAndUpdate(MachineFunction &MF, 440 bool UseEstimate) const { 441 unsigned NewMaxCallFrameSize = 0; 442 unsigned FrameSize = determineFrameLayout(MF, UseEstimate, 443 &NewMaxCallFrameSize); 444 MF.getFrameInfo().setStackSize(FrameSize); 445 MF.getFrameInfo().setMaxCallFrameSize(NewMaxCallFrameSize); 446 return FrameSize; 447 } 448 449 /// determineFrameLayout - Determine the size of the frame and maximum call 450 /// frame size. 451 unsigned 452 PPCFrameLowering::determineFrameLayout(const MachineFunction &MF, 453 bool UseEstimate, 454 unsigned *NewMaxCallFrameSize) const { 455 const MachineFrameInfo &MFI = MF.getFrameInfo(); 456 const PPCFunctionInfo *FI = MF.getInfo<PPCFunctionInfo>(); 457 458 // Get the number of bytes to allocate from the FrameInfo 459 unsigned FrameSize = 460 UseEstimate ? MFI.estimateStackSize(MF) : MFI.getStackSize(); 461 462 // Get stack alignments. The frame must be aligned to the greatest of these: 463 Align TargetAlign = getStackAlign(); // alignment required per the ABI 464 Align MaxAlign = MFI.getMaxAlign(); // algmt required by data in frame 465 Align Alignment = std::max(TargetAlign, MaxAlign); 466 467 const PPCRegisterInfo *RegInfo = Subtarget.getRegisterInfo(); 468 469 unsigned LR = RegInfo->getRARegister(); 470 bool DisableRedZone = MF.getFunction().hasFnAttribute(Attribute::NoRedZone); 471 bool CanUseRedZone = !MFI.hasVarSizedObjects() && // No dynamic alloca. 472 !MFI.adjustsStack() && // No calls. 473 !MustSaveLR(MF, LR) && // No need to save LR. 474 !FI->mustSaveTOC() && // No need to save TOC. 475 !RegInfo->hasBasePointer(MF); // No special alignment. 476 477 // Note: for PPC32 SVR4ABI (Non-DarwinABI), we can still generate stackless 478 // code if all local vars are reg-allocated. 479 bool FitsInRedZone = FrameSize <= Subtarget.getRedZoneSize(); 480 481 // Check whether we can skip adjusting the stack pointer (by using red zone) 482 if (!DisableRedZone && CanUseRedZone && FitsInRedZone) { 483 // No need for frame 484 return 0; 485 } 486 487 // Get the maximum call frame size of all the calls. 488 unsigned maxCallFrameSize = MFI.getMaxCallFrameSize(); 489 490 // Maximum call frame needs to be at least big enough for linkage area. 491 unsigned minCallFrameSize = getLinkageSize(); 492 maxCallFrameSize = std::max(maxCallFrameSize, minCallFrameSize); 493 494 // If we have dynamic alloca then maxCallFrameSize needs to be aligned so 495 // that allocations will be aligned. 496 if (MFI.hasVarSizedObjects()) 497 maxCallFrameSize = alignTo(maxCallFrameSize, Alignment); 498 499 // Update the new max call frame size if the caller passes in a valid pointer. 500 if (NewMaxCallFrameSize) 501 *NewMaxCallFrameSize = maxCallFrameSize; 502 503 // Include call frame size in total. 504 FrameSize += maxCallFrameSize; 505 506 // Make sure the frame is aligned. 507 FrameSize = alignTo(FrameSize, Alignment); 508 509 return FrameSize; 510 } 511 512 // hasFP - Return true if the specified function actually has a dedicated frame 513 // pointer register. 514 bool PPCFrameLowering::hasFP(const MachineFunction &MF) const { 515 const MachineFrameInfo &MFI = MF.getFrameInfo(); 516 // FIXME: This is pretty much broken by design: hasFP() might be called really 517 // early, before the stack layout was calculated and thus hasFP() might return 518 // true or false here depending on the time of call. 519 return (MFI.getStackSize()) && needsFP(MF); 520 } 521 522 // needsFP - Return true if the specified function should have a dedicated frame 523 // pointer register. This is true if the function has variable sized allocas or 524 // if frame pointer elimination is disabled. 525 bool PPCFrameLowering::needsFP(const MachineFunction &MF) const { 526 const MachineFrameInfo &MFI = MF.getFrameInfo(); 527 528 // Naked functions have no stack frame pushed, so we don't have a frame 529 // pointer. 530 if (MF.getFunction().hasFnAttribute(Attribute::Naked)) 531 return false; 532 533 return MF.getTarget().Options.DisableFramePointerElim(MF) || 534 MFI.hasVarSizedObjects() || MFI.hasStackMap() || MFI.hasPatchPoint() || 535 (MF.getTarget().Options.GuaranteedTailCallOpt && 536 MF.getInfo<PPCFunctionInfo>()->hasFastCall()); 537 } 538 539 void PPCFrameLowering::replaceFPWithRealFP(MachineFunction &MF) const { 540 bool is31 = needsFP(MF); 541 unsigned FPReg = is31 ? PPC::R31 : PPC::R1; 542 unsigned FP8Reg = is31 ? PPC::X31 : PPC::X1; 543 544 const PPCRegisterInfo *RegInfo = Subtarget.getRegisterInfo(); 545 bool HasBP = RegInfo->hasBasePointer(MF); 546 unsigned BPReg = HasBP ? (unsigned) RegInfo->getBaseRegister(MF) : FPReg; 547 unsigned BP8Reg = HasBP ? (unsigned) PPC::X30 : FP8Reg; 548 549 for (MachineFunction::iterator BI = MF.begin(), BE = MF.end(); 550 BI != BE; ++BI) 551 for (MachineBasicBlock::iterator MBBI = BI->end(); MBBI != BI->begin(); ) { 552 --MBBI; 553 for (unsigned I = 0, E = MBBI->getNumOperands(); I != E; ++I) { 554 MachineOperand &MO = MBBI->getOperand(I); 555 if (!MO.isReg()) 556 continue; 557 558 switch (MO.getReg()) { 559 case PPC::FP: 560 MO.setReg(FPReg); 561 break; 562 case PPC::FP8: 563 MO.setReg(FP8Reg); 564 break; 565 case PPC::BP: 566 MO.setReg(BPReg); 567 break; 568 case PPC::BP8: 569 MO.setReg(BP8Reg); 570 break; 571 572 } 573 } 574 } 575 } 576 577 /* This function will do the following: 578 - If MBB is an entry or exit block, set SR1 and SR2 to R0 and R12 579 respectively (defaults recommended by the ABI) and return true 580 - If MBB is not an entry block, initialize the register scavenger and look 581 for available registers. 582 - If the defaults (R0/R12) are available, return true 583 - If TwoUniqueRegsRequired is set to true, it looks for two unique 584 registers. Otherwise, look for a single available register. 585 - If the required registers are found, set SR1 and SR2 and return true. 586 - If the required registers are not found, set SR2 or both SR1 and SR2 to 587 PPC::NoRegister and return false. 588 589 Note that if both SR1 and SR2 are valid parameters and TwoUniqueRegsRequired 590 is not set, this function will attempt to find two different registers, but 591 still return true if only one register is available (and set SR1 == SR2). 592 */ 593 bool 594 PPCFrameLowering::findScratchRegister(MachineBasicBlock *MBB, 595 bool UseAtEnd, 596 bool TwoUniqueRegsRequired, 597 Register *SR1, 598 Register *SR2) const { 599 RegScavenger RS; 600 Register R0 = Subtarget.isPPC64() ? PPC::X0 : PPC::R0; 601 Register R12 = Subtarget.isPPC64() ? PPC::X12 : PPC::R12; 602 603 // Set the defaults for the two scratch registers. 604 if (SR1) 605 *SR1 = R0; 606 607 if (SR2) { 608 assert (SR1 && "Asking for the second scratch register but not the first?"); 609 *SR2 = R12; 610 } 611 612 // If MBB is an entry or exit block, use R0 and R12 as the scratch registers. 613 if ((UseAtEnd && MBB->isReturnBlock()) || 614 (!UseAtEnd && (&MBB->getParent()->front() == MBB))) 615 return true; 616 617 RS.enterBasicBlock(*MBB); 618 619 if (UseAtEnd && !MBB->empty()) { 620 // The scratch register will be used at the end of the block, so must 621 // consider all registers used within the block 622 623 MachineBasicBlock::iterator MBBI = MBB->getFirstTerminator(); 624 // If no terminator, back iterator up to previous instruction. 625 if (MBBI == MBB->end()) 626 MBBI = std::prev(MBBI); 627 628 if (MBBI != MBB->begin()) 629 RS.forward(MBBI); 630 } 631 632 // If the two registers are available, we're all good. 633 // Note that we only return here if both R0 and R12 are available because 634 // although the function may not require two unique registers, it may benefit 635 // from having two so we should try to provide them. 636 if (!RS.isRegUsed(R0) && !RS.isRegUsed(R12)) 637 return true; 638 639 // Get the list of callee-saved registers for the target. 640 const PPCRegisterInfo *RegInfo = Subtarget.getRegisterInfo(); 641 const MCPhysReg *CSRegs = RegInfo->getCalleeSavedRegs(MBB->getParent()); 642 643 // Get all the available registers in the block. 644 BitVector BV = RS.getRegsAvailable(Subtarget.isPPC64() ? &PPC::G8RCRegClass : 645 &PPC::GPRCRegClass); 646 647 // We shouldn't use callee-saved registers as scratch registers as they may be 648 // available when looking for a candidate block for shrink wrapping but not 649 // available when the actual prologue/epilogue is being emitted because they 650 // were added as live-in to the prologue block by PrologueEpilogueInserter. 651 for (int i = 0; CSRegs[i]; ++i) 652 BV.reset(CSRegs[i]); 653 654 // Set the first scratch register to the first available one. 655 if (SR1) { 656 int FirstScratchReg = BV.find_first(); 657 *SR1 = FirstScratchReg == -1 ? (unsigned)PPC::NoRegister : FirstScratchReg; 658 } 659 660 // If there is another one available, set the second scratch register to that. 661 // Otherwise, set it to either PPC::NoRegister if this function requires two 662 // or to whatever SR1 is set to if this function doesn't require two. 663 if (SR2) { 664 int SecondScratchReg = BV.find_next(*SR1); 665 if (SecondScratchReg != -1) 666 *SR2 = SecondScratchReg; 667 else 668 *SR2 = TwoUniqueRegsRequired ? Register() : *SR1; 669 } 670 671 // Now that we've done our best to provide both registers, double check 672 // whether we were unable to provide enough. 673 if (BV.count() < (TwoUniqueRegsRequired ? 2U : 1U)) 674 return false; 675 676 return true; 677 } 678 679 // We need a scratch register for spilling LR and for spilling CR. By default, 680 // we use two scratch registers to hide latency. However, if only one scratch 681 // register is available, we can adjust for that by not overlapping the spill 682 // code. However, if we need to realign the stack (i.e. have a base pointer) 683 // and the stack frame is large, we need two scratch registers. 684 bool 685 PPCFrameLowering::twoUniqueScratchRegsRequired(MachineBasicBlock *MBB) const { 686 const PPCRegisterInfo *RegInfo = Subtarget.getRegisterInfo(); 687 MachineFunction &MF = *(MBB->getParent()); 688 bool HasBP = RegInfo->hasBasePointer(MF); 689 unsigned FrameSize = determineFrameLayout(MF); 690 int NegFrameSize = -FrameSize; 691 bool IsLargeFrame = !isInt<16>(NegFrameSize); 692 MachineFrameInfo &MFI = MF.getFrameInfo(); 693 Align MaxAlign = MFI.getMaxAlign(); 694 bool HasRedZone = Subtarget.isPPC64() || !Subtarget.isSVR4ABI(); 695 696 return (IsLargeFrame || !HasRedZone) && HasBP && MaxAlign > 1; 697 } 698 699 bool PPCFrameLowering::canUseAsPrologue(const MachineBasicBlock &MBB) const { 700 MachineBasicBlock *TmpMBB = const_cast<MachineBasicBlock *>(&MBB); 701 702 return findScratchRegister(TmpMBB, false, 703 twoUniqueScratchRegsRequired(TmpMBB)); 704 } 705 706 bool PPCFrameLowering::canUseAsEpilogue(const MachineBasicBlock &MBB) const { 707 MachineBasicBlock *TmpMBB = const_cast<MachineBasicBlock *>(&MBB); 708 709 return findScratchRegister(TmpMBB, true); 710 } 711 712 bool PPCFrameLowering::stackUpdateCanBeMoved(MachineFunction &MF) const { 713 const PPCRegisterInfo *RegInfo = Subtarget.getRegisterInfo(); 714 PPCFunctionInfo *FI = MF.getInfo<PPCFunctionInfo>(); 715 716 // Abort if there is no register info or function info. 717 if (!RegInfo || !FI) 718 return false; 719 720 // Only move the stack update on ELFv2 ABI and PPC64. 721 if (!Subtarget.isELFv2ABI() || !Subtarget.isPPC64()) 722 return false; 723 724 // Check the frame size first and return false if it does not fit the 725 // requirements. 726 // We need a non-zero frame size as well as a frame that will fit in the red 727 // zone. This is because by moving the stack pointer update we are now storing 728 // to the red zone until the stack pointer is updated. If we get an interrupt 729 // inside the prologue but before the stack update we now have a number of 730 // stores to the red zone and those stores must all fit. 731 MachineFrameInfo &MFI = MF.getFrameInfo(); 732 unsigned FrameSize = MFI.getStackSize(); 733 if (!FrameSize || FrameSize > Subtarget.getRedZoneSize()) 734 return false; 735 736 // Frame pointers and base pointers complicate matters so don't do anything 737 // if we have them. For example having a frame pointer will sometimes require 738 // a copy of r1 into r31 and that makes keeping track of updates to r1 more 739 // difficult. 740 if (hasFP(MF) || RegInfo->hasBasePointer(MF)) 741 return false; 742 743 // Calls to fast_cc functions use different rules for passing parameters on 744 // the stack from the ABI and using PIC base in the function imposes 745 // similar restrictions to using the base pointer. It is not generally safe 746 // to move the stack pointer update in these situations. 747 if (FI->hasFastCall() || FI->usesPICBase()) 748 return false; 749 750 // Finally we can move the stack update if we do not require register 751 // scavenging. Register scavenging can introduce more spills and so 752 // may make the frame size larger than we have computed. 753 return !RegInfo->requiresFrameIndexScavenging(MF); 754 } 755 756 void PPCFrameLowering::emitPrologue(MachineFunction &MF, 757 MachineBasicBlock &MBB) const { 758 MachineBasicBlock::iterator MBBI = MBB.begin(); 759 MachineFrameInfo &MFI = MF.getFrameInfo(); 760 const PPCInstrInfo &TII = *Subtarget.getInstrInfo(); 761 const PPCRegisterInfo *RegInfo = Subtarget.getRegisterInfo(); 762 const PPCTargetLowering &TLI = *Subtarget.getTargetLowering(); 763 764 MachineModuleInfo &MMI = MF.getMMI(); 765 const MCRegisterInfo *MRI = MMI.getContext().getRegisterInfo(); 766 DebugLoc dl; 767 // AIX assembler does not support cfi directives. 768 const bool needsCFI = MF.needsFrameMoves() && !Subtarget.isAIXABI(); 769 770 // Get processor type. 771 bool isPPC64 = Subtarget.isPPC64(); 772 // Get the ABI. 773 bool isSVR4ABI = Subtarget.isSVR4ABI(); 774 bool isAIXABI = Subtarget.isAIXABI(); 775 bool isELFv2ABI = Subtarget.isELFv2ABI(); 776 assert((isSVR4ABI || isAIXABI) && "Unsupported PPC ABI."); 777 778 // Scan the prolog, looking for an UPDATE_VRSAVE instruction. If we find it, 779 // process it. 780 if (!isSVR4ABI) 781 for (unsigned i = 0; MBBI != MBB.end(); ++i, ++MBBI) { 782 if (MBBI->getOpcode() == PPC::UPDATE_VRSAVE) { 783 if (isAIXABI) 784 report_fatal_error("UPDATE_VRSAVE is unexpected on AIX."); 785 HandleVRSaveUpdate(*MBBI, TII); 786 break; 787 } 788 } 789 790 // Move MBBI back to the beginning of the prologue block. 791 MBBI = MBB.begin(); 792 793 // Work out frame sizes. 794 unsigned FrameSize = determineFrameLayoutAndUpdate(MF); 795 int NegFrameSize = -FrameSize; 796 if (!isInt<32>(NegFrameSize)) 797 llvm_unreachable("Unhandled stack size!"); 798 799 if (MFI.isFrameAddressTaken()) 800 replaceFPWithRealFP(MF); 801 802 // Check if the link register (LR) must be saved. 803 PPCFunctionInfo *FI = MF.getInfo<PPCFunctionInfo>(); 804 bool MustSaveLR = FI->mustSaveLR(); 805 bool MustSaveTOC = FI->mustSaveTOC(); 806 const SmallVectorImpl<Register> &MustSaveCRs = FI->getMustSaveCRs(); 807 bool MustSaveCR = !MustSaveCRs.empty(); 808 // Do we have a frame pointer and/or base pointer for this function? 809 bool HasFP = hasFP(MF); 810 bool HasBP = RegInfo->hasBasePointer(MF); 811 bool HasRedZone = isPPC64 || !isSVR4ABI; 812 813 Register SPReg = isPPC64 ? PPC::X1 : PPC::R1; 814 Register BPReg = RegInfo->getBaseRegister(MF); 815 Register FPReg = isPPC64 ? PPC::X31 : PPC::R31; 816 Register LRReg = isPPC64 ? PPC::LR8 : PPC::LR; 817 Register TOCReg = isPPC64 ? PPC::X2 : PPC::R2; 818 Register ScratchReg; 819 Register TempReg = isPPC64 ? PPC::X12 : PPC::R12; // another scratch reg 820 // ...(R12/X12 is volatile in both Darwin & SVR4, & can't be a function arg.) 821 const MCInstrDesc& MFLRInst = TII.get(isPPC64 ? PPC::MFLR8 822 : PPC::MFLR ); 823 const MCInstrDesc& StoreInst = TII.get(isPPC64 ? PPC::STD 824 : PPC::STW ); 825 const MCInstrDesc& StoreUpdtInst = TII.get(isPPC64 ? PPC::STDU 826 : PPC::STWU ); 827 const MCInstrDesc& StoreUpdtIdxInst = TII.get(isPPC64 ? PPC::STDUX 828 : PPC::STWUX); 829 const MCInstrDesc& LoadImmShiftedInst = TII.get(isPPC64 ? PPC::LIS8 830 : PPC::LIS ); 831 const MCInstrDesc& OrImmInst = TII.get(isPPC64 ? PPC::ORI8 832 : PPC::ORI ); 833 const MCInstrDesc& OrInst = TII.get(isPPC64 ? PPC::OR8 834 : PPC::OR ); 835 const MCInstrDesc& SubtractCarryingInst = TII.get(isPPC64 ? PPC::SUBFC8 836 : PPC::SUBFC); 837 const MCInstrDesc& SubtractImmCarryingInst = TII.get(isPPC64 ? PPC::SUBFIC8 838 : PPC::SUBFIC); 839 const MCInstrDesc &MoveFromCondRegInst = TII.get(isPPC64 ? PPC::MFCR8 840 : PPC::MFCR); 841 const MCInstrDesc &StoreWordInst = TII.get(isPPC64 ? PPC::STW8 : PPC::STW); 842 843 // Regarding this assert: Even though LR is saved in the caller's frame (i.e., 844 // LROffset is positive), that slot is callee-owned. Because PPC32 SVR4 has no 845 // Red Zone, an asynchronous event (a form of "callee") could claim a frame & 846 // overwrite it, so PPC32 SVR4 must claim at least a minimal frame to save LR. 847 assert((isPPC64 || !isSVR4ABI || !(!FrameSize && (MustSaveLR || HasFP))) && 848 "FrameSize must be >0 to save/restore the FP or LR for 32-bit SVR4."); 849 850 // Using the same bool variable as below to suppress compiler warnings. 851 // Stack probe requires two scratch registers, one for old sp, one for large 852 // frame and large probe size. 853 bool SingleScratchReg = findScratchRegister( 854 &MBB, false, 855 twoUniqueScratchRegsRequired(&MBB) || TLI.hasInlineStackProbe(MF), 856 &ScratchReg, &TempReg); 857 assert(SingleScratchReg && 858 "Required number of registers not available in this block"); 859 860 SingleScratchReg = ScratchReg == TempReg; 861 862 int LROffset = getReturnSaveOffset(); 863 864 int FPOffset = 0; 865 if (HasFP) { 866 if (isSVR4ABI) { 867 MachineFrameInfo &MFI = MF.getFrameInfo(); 868 int FPIndex = FI->getFramePointerSaveIndex(); 869 assert(FPIndex && "No Frame Pointer Save Slot!"); 870 FPOffset = MFI.getObjectOffset(FPIndex); 871 } else { 872 FPOffset = getFramePointerSaveOffset(); 873 } 874 } 875 876 int BPOffset = 0; 877 if (HasBP) { 878 if (isSVR4ABI) { 879 MachineFrameInfo &MFI = MF.getFrameInfo(); 880 int BPIndex = FI->getBasePointerSaveIndex(); 881 assert(BPIndex && "No Base Pointer Save Slot!"); 882 BPOffset = MFI.getObjectOffset(BPIndex); 883 } else { 884 BPOffset = getBasePointerSaveOffset(); 885 } 886 } 887 888 int PBPOffset = 0; 889 if (FI->usesPICBase()) { 890 MachineFrameInfo &MFI = MF.getFrameInfo(); 891 int PBPIndex = FI->getPICBasePointerSaveIndex(); 892 assert(PBPIndex && "No PIC Base Pointer Save Slot!"); 893 PBPOffset = MFI.getObjectOffset(PBPIndex); 894 } 895 896 // Get stack alignments. 897 Align MaxAlign = MFI.getMaxAlign(); 898 if (HasBP && MaxAlign > 1) 899 assert(Log2(MaxAlign) < 16 && "Invalid alignment!"); 900 901 // Frames of 32KB & larger require special handling because they cannot be 902 // indexed into with a simple STDU/STWU/STD/STW immediate offset operand. 903 bool isLargeFrame = !isInt<16>(NegFrameSize); 904 905 // Check if we can move the stack update instruction (stdu) down the prologue 906 // past the callee saves. Hopefully this will avoid the situation where the 907 // saves are waiting for the update on the store with update to complete. 908 MachineBasicBlock::iterator StackUpdateLoc = MBBI; 909 bool MovingStackUpdateDown = false; 910 911 // Check if we can move the stack update. 912 if (stackUpdateCanBeMoved(MF)) { 913 const std::vector<CalleeSavedInfo> &Info = MFI.getCalleeSavedInfo(); 914 for (CalleeSavedInfo CSI : Info) { 915 int FrIdx = CSI.getFrameIdx(); 916 // If the frame index is not negative the callee saved info belongs to a 917 // stack object that is not a fixed stack object. We ignore non-fixed 918 // stack objects because we won't move the stack update pointer past them. 919 if (FrIdx >= 0) 920 continue; 921 922 if (MFI.isFixedObjectIndex(FrIdx) && MFI.getObjectOffset(FrIdx) < 0) { 923 StackUpdateLoc++; 924 MovingStackUpdateDown = true; 925 } else { 926 // We need all of the Frame Indices to meet these conditions. 927 // If they do not, abort the whole operation. 928 StackUpdateLoc = MBBI; 929 MovingStackUpdateDown = false; 930 break; 931 } 932 } 933 934 // If the operation was not aborted then update the object offset. 935 if (MovingStackUpdateDown) { 936 for (CalleeSavedInfo CSI : Info) { 937 int FrIdx = CSI.getFrameIdx(); 938 if (FrIdx < 0) 939 MFI.setObjectOffset(FrIdx, MFI.getObjectOffset(FrIdx) + NegFrameSize); 940 } 941 } 942 } 943 944 // Where in the prologue we move the CR fields depends on how many scratch 945 // registers we have, and if we need to save the link register or not. This 946 // lambda is to avoid duplicating the logic in 2 places. 947 auto BuildMoveFromCR = [&]() { 948 if (isELFv2ABI && MustSaveCRs.size() == 1) { 949 // In the ELFv2 ABI, we are not required to save all CR fields. 950 // If only one CR field is clobbered, it is more efficient to use 951 // mfocrf to selectively save just that field, because mfocrf has short 952 // latency compares to mfcr. 953 assert(isPPC64 && "V2 ABI is 64-bit only."); 954 MachineInstrBuilder MIB = 955 BuildMI(MBB, MBBI, dl, TII.get(PPC::MFOCRF8), TempReg); 956 MIB.addReg(MustSaveCRs[0], RegState::Kill); 957 } else { 958 MachineInstrBuilder MIB = 959 BuildMI(MBB, MBBI, dl, MoveFromCondRegInst, TempReg); 960 for (unsigned CRfield : MustSaveCRs) 961 MIB.addReg(CRfield, RegState::ImplicitKill); 962 } 963 }; 964 965 // If we need to spill the CR and the LR but we don't have two separate 966 // registers available, we must spill them one at a time 967 if (MustSaveCR && SingleScratchReg && MustSaveLR) { 968 BuildMoveFromCR(); 969 BuildMI(MBB, MBBI, dl, StoreWordInst) 970 .addReg(TempReg, getKillRegState(true)) 971 .addImm(CRSaveOffset) 972 .addReg(SPReg); 973 } 974 975 if (MustSaveLR) 976 BuildMI(MBB, MBBI, dl, MFLRInst, ScratchReg); 977 978 if (MustSaveCR && !(SingleScratchReg && MustSaveLR)) 979 BuildMoveFromCR(); 980 981 if (HasRedZone) { 982 if (HasFP) 983 BuildMI(MBB, MBBI, dl, StoreInst) 984 .addReg(FPReg) 985 .addImm(FPOffset) 986 .addReg(SPReg); 987 if (FI->usesPICBase()) 988 BuildMI(MBB, MBBI, dl, StoreInst) 989 .addReg(PPC::R30) 990 .addImm(PBPOffset) 991 .addReg(SPReg); 992 if (HasBP) 993 BuildMI(MBB, MBBI, dl, StoreInst) 994 .addReg(BPReg) 995 .addImm(BPOffset) 996 .addReg(SPReg); 997 } 998 999 if (MustSaveLR) 1000 BuildMI(MBB, StackUpdateLoc, dl, StoreInst) 1001 .addReg(ScratchReg, getKillRegState(true)) 1002 .addImm(LROffset) 1003 .addReg(SPReg); 1004 1005 if (MustSaveCR && 1006 !(SingleScratchReg && MustSaveLR)) { 1007 assert(HasRedZone && "A red zone is always available on PPC64"); 1008 BuildMI(MBB, MBBI, dl, StoreWordInst) 1009 .addReg(TempReg, getKillRegState(true)) 1010 .addImm(CRSaveOffset) 1011 .addReg(SPReg); 1012 } 1013 1014 // Skip the rest if this is a leaf function & all spills fit in the Red Zone. 1015 if (!FrameSize) 1016 return; 1017 1018 // Adjust stack pointer: r1 += NegFrameSize. 1019 // If there is a preferred stack alignment, align R1 now 1020 1021 if (HasBP && HasRedZone) { 1022 // Save a copy of r1 as the base pointer. 1023 BuildMI(MBB, MBBI, dl, OrInst, BPReg) 1024 .addReg(SPReg) 1025 .addReg(SPReg); 1026 } 1027 1028 // Have we generated a STUX instruction to claim stack frame? If so, 1029 // the negated frame size will be placed in ScratchReg. 1030 bool HasSTUX = false; 1031 1032 // If FrameSize <= TLI.getStackProbeSize(MF), as POWER ABI requires backchain 1033 // pointer is always stored at SP, we will get a free probe due to an essential 1034 // STU(X) instruction. 1035 if (TLI.hasInlineStackProbe(MF) && FrameSize > TLI.getStackProbeSize(MF)) { 1036 // To be consistent with other targets, a pseudo instruction is emitted and 1037 // will be later expanded in `inlineStackProbe`. 1038 BuildMI(MBB, MBBI, dl, 1039 TII.get(isPPC64 ? PPC::PROBED_STACKALLOC_64 1040 : PPC::PROBED_STACKALLOC_32)) 1041 .addDef(ScratchReg) 1042 .addDef(TempReg) // TempReg stores the old sp. 1043 .addImm(NegFrameSize); 1044 // FIXME: HasSTUX is only read if HasRedZone is not set, in such case, we 1045 // update the ScratchReg to meet the assumption that ScratchReg contains 1046 // the NegFrameSize. This solution is rather tricky. 1047 if (!HasRedZone) { 1048 BuildMI(MBB, MBBI, dl, TII.get(PPC::SUBF), ScratchReg) 1049 .addReg(TempReg) 1050 .addReg(SPReg); 1051 HasSTUX = true; 1052 } 1053 } else { 1054 // This condition must be kept in sync with canUseAsPrologue. 1055 if (HasBP && MaxAlign > 1) { 1056 if (isPPC64) 1057 BuildMI(MBB, MBBI, dl, TII.get(PPC::RLDICL), ScratchReg) 1058 .addReg(SPReg) 1059 .addImm(0) 1060 .addImm(64 - Log2(MaxAlign)); 1061 else // PPC32... 1062 BuildMI(MBB, MBBI, dl, TII.get(PPC::RLWINM), ScratchReg) 1063 .addReg(SPReg) 1064 .addImm(0) 1065 .addImm(32 - Log2(MaxAlign)) 1066 .addImm(31); 1067 if (!isLargeFrame) { 1068 BuildMI(MBB, MBBI, dl, SubtractImmCarryingInst, ScratchReg) 1069 .addReg(ScratchReg, RegState::Kill) 1070 .addImm(NegFrameSize); 1071 } else { 1072 assert(!SingleScratchReg && "Only a single scratch reg available"); 1073 BuildMI(MBB, MBBI, dl, LoadImmShiftedInst, TempReg) 1074 .addImm(NegFrameSize >> 16); 1075 BuildMI(MBB, MBBI, dl, OrImmInst, TempReg) 1076 .addReg(TempReg, RegState::Kill) 1077 .addImm(NegFrameSize & 0xFFFF); 1078 BuildMI(MBB, MBBI, dl, SubtractCarryingInst, ScratchReg) 1079 .addReg(ScratchReg, RegState::Kill) 1080 .addReg(TempReg, RegState::Kill); 1081 } 1082 1083 BuildMI(MBB, MBBI, dl, StoreUpdtIdxInst, SPReg) 1084 .addReg(SPReg, RegState::Kill) 1085 .addReg(SPReg) 1086 .addReg(ScratchReg); 1087 HasSTUX = true; 1088 1089 } else if (!isLargeFrame) { 1090 BuildMI(MBB, StackUpdateLoc, dl, StoreUpdtInst, SPReg) 1091 .addReg(SPReg) 1092 .addImm(NegFrameSize) 1093 .addReg(SPReg); 1094 1095 } else { 1096 BuildMI(MBB, MBBI, dl, LoadImmShiftedInst, ScratchReg) 1097 .addImm(NegFrameSize >> 16); 1098 BuildMI(MBB, MBBI, dl, OrImmInst, ScratchReg) 1099 .addReg(ScratchReg, RegState::Kill) 1100 .addImm(NegFrameSize & 0xFFFF); 1101 BuildMI(MBB, MBBI, dl, StoreUpdtIdxInst, SPReg) 1102 .addReg(SPReg, RegState::Kill) 1103 .addReg(SPReg) 1104 .addReg(ScratchReg); 1105 HasSTUX = true; 1106 } 1107 } 1108 1109 // Save the TOC register after the stack pointer update if a prologue TOC 1110 // save is required for the function. 1111 if (MustSaveTOC) { 1112 assert(isELFv2ABI && "TOC saves in the prologue only supported on ELFv2"); 1113 BuildMI(MBB, StackUpdateLoc, dl, TII.get(PPC::STD)) 1114 .addReg(TOCReg, getKillRegState(true)) 1115 .addImm(TOCSaveOffset) 1116 .addReg(SPReg); 1117 } 1118 1119 if (!HasRedZone) { 1120 assert(!isPPC64 && "A red zone is always available on PPC64"); 1121 if (HasSTUX) { 1122 // The negated frame size is in ScratchReg, and the SPReg has been 1123 // decremented by the frame size: SPReg = old SPReg + ScratchReg. 1124 // Since FPOffset, PBPOffset, etc. are relative to the beginning of 1125 // the stack frame (i.e. the old SP), ideally, we would put the old 1126 // SP into a register and use it as the base for the stores. The 1127 // problem is that the only available register may be ScratchReg, 1128 // which could be R0, and R0 cannot be used as a base address. 1129 1130 // First, set ScratchReg to the old SP. This may need to be modified 1131 // later. 1132 BuildMI(MBB, MBBI, dl, TII.get(PPC::SUBF), ScratchReg) 1133 .addReg(ScratchReg, RegState::Kill) 1134 .addReg(SPReg); 1135 1136 if (ScratchReg == PPC::R0) { 1137 // R0 cannot be used as a base register, but it can be used as an 1138 // index in a store-indexed. 1139 int LastOffset = 0; 1140 if (HasFP) { 1141 // R0 += (FPOffset-LastOffset). 1142 // Need addic, since addi treats R0 as 0. 1143 BuildMI(MBB, MBBI, dl, TII.get(PPC::ADDIC), ScratchReg) 1144 .addReg(ScratchReg) 1145 .addImm(FPOffset-LastOffset); 1146 LastOffset = FPOffset; 1147 // Store FP into *R0. 1148 BuildMI(MBB, MBBI, dl, TII.get(PPC::STWX)) 1149 .addReg(FPReg, RegState::Kill) // Save FP. 1150 .addReg(PPC::ZERO) 1151 .addReg(ScratchReg); // This will be the index (R0 is ok here). 1152 } 1153 if (FI->usesPICBase()) { 1154 // R0 += (PBPOffset-LastOffset). 1155 BuildMI(MBB, MBBI, dl, TII.get(PPC::ADDIC), ScratchReg) 1156 .addReg(ScratchReg) 1157 .addImm(PBPOffset-LastOffset); 1158 LastOffset = PBPOffset; 1159 BuildMI(MBB, MBBI, dl, TII.get(PPC::STWX)) 1160 .addReg(PPC::R30, RegState::Kill) // Save PIC base pointer. 1161 .addReg(PPC::ZERO) 1162 .addReg(ScratchReg); // This will be the index (R0 is ok here). 1163 } 1164 if (HasBP) { 1165 // R0 += (BPOffset-LastOffset). 1166 BuildMI(MBB, MBBI, dl, TII.get(PPC::ADDIC), ScratchReg) 1167 .addReg(ScratchReg) 1168 .addImm(BPOffset-LastOffset); 1169 LastOffset = BPOffset; 1170 BuildMI(MBB, MBBI, dl, TII.get(PPC::STWX)) 1171 .addReg(BPReg, RegState::Kill) // Save BP. 1172 .addReg(PPC::ZERO) 1173 .addReg(ScratchReg); // This will be the index (R0 is ok here). 1174 // BP = R0-LastOffset 1175 BuildMI(MBB, MBBI, dl, TII.get(PPC::ADDIC), BPReg) 1176 .addReg(ScratchReg, RegState::Kill) 1177 .addImm(-LastOffset); 1178 } 1179 } else { 1180 // ScratchReg is not R0, so use it as the base register. It is 1181 // already set to the old SP, so we can use the offsets directly. 1182 1183 // Now that the stack frame has been allocated, save all the necessary 1184 // registers using ScratchReg as the base address. 1185 if (HasFP) 1186 BuildMI(MBB, MBBI, dl, StoreInst) 1187 .addReg(FPReg) 1188 .addImm(FPOffset) 1189 .addReg(ScratchReg); 1190 if (FI->usesPICBase()) 1191 BuildMI(MBB, MBBI, dl, StoreInst) 1192 .addReg(PPC::R30) 1193 .addImm(PBPOffset) 1194 .addReg(ScratchReg); 1195 if (HasBP) { 1196 BuildMI(MBB, MBBI, dl, StoreInst) 1197 .addReg(BPReg) 1198 .addImm(BPOffset) 1199 .addReg(ScratchReg); 1200 BuildMI(MBB, MBBI, dl, OrInst, BPReg) 1201 .addReg(ScratchReg, RegState::Kill) 1202 .addReg(ScratchReg); 1203 } 1204 } 1205 } else { 1206 // The frame size is a known 16-bit constant (fitting in the immediate 1207 // field of STWU). To be here we have to be compiling for PPC32. 1208 // Since the SPReg has been decreased by FrameSize, add it back to each 1209 // offset. 1210 if (HasFP) 1211 BuildMI(MBB, MBBI, dl, StoreInst) 1212 .addReg(FPReg) 1213 .addImm(FrameSize + FPOffset) 1214 .addReg(SPReg); 1215 if (FI->usesPICBase()) 1216 BuildMI(MBB, MBBI, dl, StoreInst) 1217 .addReg(PPC::R30) 1218 .addImm(FrameSize + PBPOffset) 1219 .addReg(SPReg); 1220 if (HasBP) { 1221 BuildMI(MBB, MBBI, dl, StoreInst) 1222 .addReg(BPReg) 1223 .addImm(FrameSize + BPOffset) 1224 .addReg(SPReg); 1225 BuildMI(MBB, MBBI, dl, TII.get(PPC::ADDI), BPReg) 1226 .addReg(SPReg) 1227 .addImm(FrameSize); 1228 } 1229 } 1230 } 1231 1232 // Add Call Frame Information for the instructions we generated above. 1233 if (needsCFI) { 1234 unsigned CFIIndex; 1235 1236 if (HasBP) { 1237 // Define CFA in terms of BP. Do this in preference to using FP/SP, 1238 // because if the stack needed aligning then CFA won't be at a fixed 1239 // offset from FP/SP. 1240 unsigned Reg = MRI->getDwarfRegNum(BPReg, true); 1241 CFIIndex = MF.addFrameInst( 1242 MCCFIInstruction::createDefCfaRegister(nullptr, Reg)); 1243 } else { 1244 // Adjust the definition of CFA to account for the change in SP. 1245 assert(NegFrameSize); 1246 CFIIndex = MF.addFrameInst( 1247 MCCFIInstruction::cfiDefCfaOffset(nullptr, -NegFrameSize)); 1248 } 1249 BuildMI(MBB, MBBI, dl, TII.get(TargetOpcode::CFI_INSTRUCTION)) 1250 .addCFIIndex(CFIIndex); 1251 1252 if (HasFP) { 1253 // Describe where FP was saved, at a fixed offset from CFA. 1254 unsigned Reg = MRI->getDwarfRegNum(FPReg, true); 1255 CFIIndex = MF.addFrameInst( 1256 MCCFIInstruction::createOffset(nullptr, Reg, FPOffset)); 1257 BuildMI(MBB, MBBI, dl, TII.get(TargetOpcode::CFI_INSTRUCTION)) 1258 .addCFIIndex(CFIIndex); 1259 } 1260 1261 if (FI->usesPICBase()) { 1262 // Describe where FP was saved, at a fixed offset from CFA. 1263 unsigned Reg = MRI->getDwarfRegNum(PPC::R30, true); 1264 CFIIndex = MF.addFrameInst( 1265 MCCFIInstruction::createOffset(nullptr, Reg, PBPOffset)); 1266 BuildMI(MBB, MBBI, dl, TII.get(TargetOpcode::CFI_INSTRUCTION)) 1267 .addCFIIndex(CFIIndex); 1268 } 1269 1270 if (HasBP) { 1271 // Describe where BP was saved, at a fixed offset from CFA. 1272 unsigned Reg = MRI->getDwarfRegNum(BPReg, true); 1273 CFIIndex = MF.addFrameInst( 1274 MCCFIInstruction::createOffset(nullptr, Reg, BPOffset)); 1275 BuildMI(MBB, MBBI, dl, TII.get(TargetOpcode::CFI_INSTRUCTION)) 1276 .addCFIIndex(CFIIndex); 1277 } 1278 1279 if (MustSaveLR) { 1280 // Describe where LR was saved, at a fixed offset from CFA. 1281 unsigned Reg = MRI->getDwarfRegNum(LRReg, true); 1282 CFIIndex = MF.addFrameInst( 1283 MCCFIInstruction::createOffset(nullptr, Reg, LROffset)); 1284 BuildMI(MBB, MBBI, dl, TII.get(TargetOpcode::CFI_INSTRUCTION)) 1285 .addCFIIndex(CFIIndex); 1286 } 1287 } 1288 1289 // If there is a frame pointer, copy R1 into R31 1290 if (HasFP) { 1291 BuildMI(MBB, MBBI, dl, OrInst, FPReg) 1292 .addReg(SPReg) 1293 .addReg(SPReg); 1294 1295 if (!HasBP && needsCFI) { 1296 // Change the definition of CFA from SP+offset to FP+offset, because SP 1297 // will change at every alloca. 1298 unsigned Reg = MRI->getDwarfRegNum(FPReg, true); 1299 unsigned CFIIndex = MF.addFrameInst( 1300 MCCFIInstruction::createDefCfaRegister(nullptr, Reg)); 1301 1302 BuildMI(MBB, MBBI, dl, TII.get(TargetOpcode::CFI_INSTRUCTION)) 1303 .addCFIIndex(CFIIndex); 1304 } 1305 } 1306 1307 if (needsCFI) { 1308 // Describe where callee saved registers were saved, at fixed offsets from 1309 // CFA. 1310 const std::vector<CalleeSavedInfo> &CSI = MFI.getCalleeSavedInfo(); 1311 for (unsigned I = 0, E = CSI.size(); I != E; ++I) { 1312 unsigned Reg = CSI[I].getReg(); 1313 if (Reg == PPC::LR || Reg == PPC::LR8 || Reg == PPC::RM) continue; 1314 1315 // This is a bit of a hack: CR2LT, CR2GT, CR2EQ and CR2UN are just 1316 // subregisters of CR2. We just need to emit a move of CR2. 1317 if (PPC::CRBITRCRegClass.contains(Reg)) 1318 continue; 1319 1320 if ((Reg == PPC::X2 || Reg == PPC::R2) && MustSaveTOC) 1321 continue; 1322 1323 // For SVR4, don't emit a move for the CR spill slot if we haven't 1324 // spilled CRs. 1325 if (isSVR4ABI && (PPC::CR2 <= Reg && Reg <= PPC::CR4) 1326 && !MustSaveCR) 1327 continue; 1328 1329 // For 64-bit SVR4 when we have spilled CRs, the spill location 1330 // is SP+8, not a frame-relative slot. 1331 if (isSVR4ABI && isPPC64 && (PPC::CR2 <= Reg && Reg <= PPC::CR4)) { 1332 // In the ELFv1 ABI, only CR2 is noted in CFI and stands in for 1333 // the whole CR word. In the ELFv2 ABI, every CR that was 1334 // actually saved gets its own CFI record. 1335 unsigned CRReg = isELFv2ABI? Reg : (unsigned) PPC::CR2; 1336 unsigned CFIIndex = MF.addFrameInst(MCCFIInstruction::createOffset( 1337 nullptr, MRI->getDwarfRegNum(CRReg, true), CRSaveOffset)); 1338 BuildMI(MBB, MBBI, dl, TII.get(TargetOpcode::CFI_INSTRUCTION)) 1339 .addCFIIndex(CFIIndex); 1340 continue; 1341 } 1342 1343 if (CSI[I].isSpilledToReg()) { 1344 unsigned SpilledReg = CSI[I].getDstReg(); 1345 unsigned CFIRegister = MF.addFrameInst(MCCFIInstruction::createRegister( 1346 nullptr, MRI->getDwarfRegNum(Reg, true), 1347 MRI->getDwarfRegNum(SpilledReg, true))); 1348 BuildMI(MBB, MBBI, dl, TII.get(TargetOpcode::CFI_INSTRUCTION)) 1349 .addCFIIndex(CFIRegister); 1350 } else { 1351 int Offset = MFI.getObjectOffset(CSI[I].getFrameIdx()); 1352 // We have changed the object offset above but we do not want to change 1353 // the actual offsets in the CFI instruction so we have to undo the 1354 // offset change here. 1355 if (MovingStackUpdateDown) 1356 Offset -= NegFrameSize; 1357 1358 unsigned CFIIndex = MF.addFrameInst(MCCFIInstruction::createOffset( 1359 nullptr, MRI->getDwarfRegNum(Reg, true), Offset)); 1360 BuildMI(MBB, MBBI, dl, TII.get(TargetOpcode::CFI_INSTRUCTION)) 1361 .addCFIIndex(CFIIndex); 1362 } 1363 } 1364 } 1365 } 1366 1367 void PPCFrameLowering::inlineStackProbe(MachineFunction &MF, 1368 MachineBasicBlock &PrologMBB) const { 1369 // TODO: Generate CFI instructions. 1370 bool isPPC64 = Subtarget.isPPC64(); 1371 const PPCTargetLowering &TLI = *Subtarget.getTargetLowering(); 1372 const PPCInstrInfo &TII = *Subtarget.getInstrInfo(); 1373 MachineFrameInfo &MFI = MF.getFrameInfo(); 1374 MachineModuleInfo &MMI = MF.getMMI(); 1375 const MCRegisterInfo *MRI = MMI.getContext().getRegisterInfo(); 1376 // AIX assembler does not support cfi directives. 1377 const bool needsCFI = MF.needsFrameMoves() && !Subtarget.isAIXABI(); 1378 auto StackAllocMIPos = llvm::find_if(PrologMBB, [](MachineInstr &MI) { 1379 int Opc = MI.getOpcode(); 1380 return Opc == PPC::PROBED_STACKALLOC_64 || Opc == PPC::PROBED_STACKALLOC_32; 1381 }); 1382 if (StackAllocMIPos == PrologMBB.end()) 1383 return; 1384 const BasicBlock *ProbedBB = PrologMBB.getBasicBlock(); 1385 DebugLoc DL = PrologMBB.findDebugLoc(StackAllocMIPos); 1386 MachineInstr &MI = *StackAllocMIPos; 1387 int64_t NegFrameSize = MI.getOperand(2).getImm(); 1388 int64_t NegProbeSize = -(int64_t)TLI.getStackProbeSize(MF); 1389 assert(isInt<32>(NegProbeSize) && "Unhandled probe size"); 1390 int64_t NumBlocks = NegFrameSize / NegProbeSize; 1391 int64_t NegResidualSize = NegFrameSize % NegProbeSize; 1392 Register SPReg = isPPC64 ? PPC::X1 : PPC::R1; 1393 Register ScratchReg = MI.getOperand(0).getReg(); 1394 Register FPReg = MI.getOperand(1).getReg(); 1395 const PPCRegisterInfo *RegInfo = Subtarget.getRegisterInfo(); 1396 bool HasBP = RegInfo->hasBasePointer(MF); 1397 Align MaxAlign = MFI.getMaxAlign(); 1398 // Initialize current frame pointer. 1399 const MCInstrDesc &CopyInst = TII.get(isPPC64 ? PPC::OR8 : PPC::OR); 1400 BuildMI(PrologMBB, {MI}, DL, CopyInst, FPReg).addReg(SPReg).addReg(SPReg); 1401 // Subroutines to generate .cfi_* directives. 1402 auto buildDefCFAReg = [&](MachineBasicBlock &MBB, 1403 MachineBasicBlock::iterator MBBI, Register Reg) { 1404 unsigned RegNum = MRI->getDwarfRegNum(Reg, true); 1405 unsigned CFIIndex = MF.addFrameInst( 1406 MCCFIInstruction::createDefCfaRegister(nullptr, RegNum)); 1407 BuildMI(MBB, MBBI, DL, TII.get(TargetOpcode::CFI_INSTRUCTION)) 1408 .addCFIIndex(CFIIndex); 1409 }; 1410 auto buildDefCFA = [&](MachineBasicBlock &MBB, 1411 MachineBasicBlock::iterator MBBI, Register Reg, 1412 int Offset) { 1413 unsigned RegNum = MRI->getDwarfRegNum(Reg, true); 1414 unsigned CFIIndex = MBB.getParent()->addFrameInst( 1415 MCCFIInstruction::cfiDefCfa(nullptr, RegNum, Offset)); 1416 BuildMI(MBB, MBBI, DL, TII.get(TargetOpcode::CFI_INSTRUCTION)) 1417 .addCFIIndex(CFIIndex); 1418 }; 1419 // Subroutine to determine if we can use the Imm as part of d-form. 1420 auto CanUseDForm = [](int64_t Imm) { return isInt<16>(Imm) && Imm % 4 == 0; }; 1421 // Subroutine to materialize the Imm into TempReg. 1422 auto MaterializeImm = [&](MachineBasicBlock &MBB, 1423 MachineBasicBlock::iterator MBBI, int64_t Imm, 1424 Register &TempReg) { 1425 assert(isInt<32>(Imm) && "Unhandled imm"); 1426 if (isInt<16>(Imm)) 1427 BuildMI(MBB, MBBI, DL, TII.get(isPPC64 ? PPC::LI8 : PPC::LI), TempReg) 1428 .addImm(Imm); 1429 else { 1430 BuildMI(MBB, MBBI, DL, TII.get(isPPC64 ? PPC::LIS8 : PPC::LIS), TempReg) 1431 .addImm(Imm >> 16); 1432 BuildMI(MBB, MBBI, DL, TII.get(isPPC64 ? PPC::ORI8 : PPC::ORI), TempReg) 1433 .addReg(TempReg) 1434 .addImm(Imm & 0xFFFF); 1435 } 1436 }; 1437 // Subroutine to store frame pointer and decrease stack pointer by probe size. 1438 auto allocateAndProbe = [&](MachineBasicBlock &MBB, 1439 MachineBasicBlock::iterator MBBI, int64_t NegSize, 1440 Register NegSizeReg, bool UseDForm) { 1441 if (UseDForm) 1442 BuildMI(MBB, MBBI, DL, TII.get(isPPC64 ? PPC::STDU : PPC::STWU), SPReg) 1443 .addReg(FPReg) 1444 .addImm(NegSize) 1445 .addReg(SPReg); 1446 else 1447 BuildMI(MBB, MBBI, DL, TII.get(isPPC64 ? PPC::STDUX : PPC::STWUX), SPReg) 1448 .addReg(FPReg) 1449 .addReg(SPReg) 1450 .addReg(NegSizeReg); 1451 }; 1452 // Use FPReg to calculate CFA. 1453 if (needsCFI) 1454 buildDefCFA(PrologMBB, {MI}, FPReg, 0); 1455 // For case HasBP && MaxAlign > 1, we have to align the SP by performing 1456 // SP = SP - SP % MaxAlign. 1457 if (HasBP && MaxAlign > 1) { 1458 if (isPPC64) 1459 BuildMI(PrologMBB, {MI}, DL, TII.get(PPC::RLDICL), ScratchReg) 1460 .addReg(FPReg) 1461 .addImm(0) 1462 .addImm(64 - Log2(MaxAlign)); 1463 else 1464 BuildMI(PrologMBB, {MI}, DL, TII.get(PPC::RLWINM), ScratchReg) 1465 .addReg(FPReg) 1466 .addImm(0) 1467 .addImm(32 - Log2(MaxAlign)) 1468 .addImm(31); 1469 BuildMI(PrologMBB, {MI}, DL, TII.get(isPPC64 ? PPC::STDUX : PPC::STWUX), 1470 SPReg) 1471 .addReg(FPReg) 1472 .addReg(SPReg) 1473 .addReg(ScratchReg); 1474 } 1475 // Probe residual part. 1476 if (NegResidualSize) { 1477 bool ResidualUseDForm = CanUseDForm(NegResidualSize); 1478 if (!ResidualUseDForm) 1479 MaterializeImm(PrologMBB, {MI}, NegResidualSize, ScratchReg); 1480 allocateAndProbe(PrologMBB, {MI}, NegResidualSize, ScratchReg, 1481 ResidualUseDForm); 1482 } 1483 bool UseDForm = CanUseDForm(NegProbeSize); 1484 // If number of blocks is small, just probe them directly. 1485 if (NumBlocks < 3) { 1486 if (!UseDForm) 1487 MaterializeImm(PrologMBB, {MI}, NegProbeSize, ScratchReg); 1488 for (int i = 0; i < NumBlocks; ++i) 1489 allocateAndProbe(PrologMBB, {MI}, NegProbeSize, ScratchReg, UseDForm); 1490 if (needsCFI) { 1491 // Restore using SPReg to calculate CFA. 1492 buildDefCFAReg(PrologMBB, {MI}, SPReg); 1493 } 1494 } else { 1495 // Since CTR is a volatile register and current shrinkwrap implementation 1496 // won't choose an MBB in a loop as the PrologMBB, it's safe to synthesize a 1497 // CTR loop to probe. 1498 // Calculate trip count and stores it in CTRReg. 1499 MaterializeImm(PrologMBB, {MI}, NumBlocks, ScratchReg); 1500 BuildMI(PrologMBB, {MI}, DL, TII.get(isPPC64 ? PPC::MTCTR8 : PPC::MTCTR)) 1501 .addReg(ScratchReg, RegState::Kill); 1502 if (!UseDForm) 1503 MaterializeImm(PrologMBB, {MI}, NegProbeSize, ScratchReg); 1504 // Create MBBs of the loop. 1505 MachineFunction::iterator MBBInsertPoint = 1506 std::next(PrologMBB.getIterator()); 1507 MachineBasicBlock *LoopMBB = MF.CreateMachineBasicBlock(ProbedBB); 1508 MF.insert(MBBInsertPoint, LoopMBB); 1509 MachineBasicBlock *ExitMBB = MF.CreateMachineBasicBlock(ProbedBB); 1510 MF.insert(MBBInsertPoint, ExitMBB); 1511 // Synthesize the loop body. 1512 allocateAndProbe(*LoopMBB, LoopMBB->end(), NegProbeSize, ScratchReg, 1513 UseDForm); 1514 BuildMI(LoopMBB, DL, TII.get(isPPC64 ? PPC::BDNZ8 : PPC::BDNZ)) 1515 .addMBB(LoopMBB); 1516 LoopMBB->addSuccessor(ExitMBB); 1517 LoopMBB->addSuccessor(LoopMBB); 1518 // Synthesize the exit MBB. 1519 ExitMBB->splice(ExitMBB->end(), &PrologMBB, 1520 std::next(MachineBasicBlock::iterator(MI)), 1521 PrologMBB.end()); 1522 ExitMBB->transferSuccessorsAndUpdatePHIs(&PrologMBB); 1523 PrologMBB.addSuccessor(LoopMBB); 1524 if (needsCFI) { 1525 // Restore using SPReg to calculate CFA. 1526 buildDefCFAReg(*ExitMBB, ExitMBB->begin(), SPReg); 1527 } 1528 // Update liveins. 1529 recomputeLiveIns(*LoopMBB); 1530 recomputeLiveIns(*ExitMBB); 1531 } 1532 ++NumPrologProbed; 1533 MI.eraseFromParent(); 1534 } 1535 1536 void PPCFrameLowering::emitEpilogue(MachineFunction &MF, 1537 MachineBasicBlock &MBB) const { 1538 MachineBasicBlock::iterator MBBI = MBB.getFirstTerminator(); 1539 DebugLoc dl; 1540 1541 if (MBBI != MBB.end()) 1542 dl = MBBI->getDebugLoc(); 1543 1544 const PPCInstrInfo &TII = *Subtarget.getInstrInfo(); 1545 const PPCRegisterInfo *RegInfo = Subtarget.getRegisterInfo(); 1546 1547 // Get alignment info so we know how to restore the SP. 1548 const MachineFrameInfo &MFI = MF.getFrameInfo(); 1549 1550 // Get the number of bytes allocated from the FrameInfo. 1551 int FrameSize = MFI.getStackSize(); 1552 1553 // Get processor type. 1554 bool isPPC64 = Subtarget.isPPC64(); 1555 // Get the ABI. 1556 bool isSVR4ABI = Subtarget.isSVR4ABI(); 1557 1558 // Check if the link register (LR) has been saved. 1559 PPCFunctionInfo *FI = MF.getInfo<PPCFunctionInfo>(); 1560 bool MustSaveLR = FI->mustSaveLR(); 1561 const SmallVectorImpl<Register> &MustSaveCRs = FI->getMustSaveCRs(); 1562 bool MustSaveCR = !MustSaveCRs.empty(); 1563 // Do we have a frame pointer and/or base pointer for this function? 1564 bool HasFP = hasFP(MF); 1565 bool HasBP = RegInfo->hasBasePointer(MF); 1566 bool HasRedZone = Subtarget.isPPC64() || !Subtarget.isSVR4ABI(); 1567 1568 Register SPReg = isPPC64 ? PPC::X1 : PPC::R1; 1569 Register BPReg = RegInfo->getBaseRegister(MF); 1570 Register FPReg = isPPC64 ? PPC::X31 : PPC::R31; 1571 Register ScratchReg; 1572 Register TempReg = isPPC64 ? PPC::X12 : PPC::R12; // another scratch reg 1573 const MCInstrDesc& MTLRInst = TII.get( isPPC64 ? PPC::MTLR8 1574 : PPC::MTLR ); 1575 const MCInstrDesc& LoadInst = TII.get( isPPC64 ? PPC::LD 1576 : PPC::LWZ ); 1577 const MCInstrDesc& LoadImmShiftedInst = TII.get( isPPC64 ? PPC::LIS8 1578 : PPC::LIS ); 1579 const MCInstrDesc& OrInst = TII.get(isPPC64 ? PPC::OR8 1580 : PPC::OR ); 1581 const MCInstrDesc& OrImmInst = TII.get( isPPC64 ? PPC::ORI8 1582 : PPC::ORI ); 1583 const MCInstrDesc& AddImmInst = TII.get( isPPC64 ? PPC::ADDI8 1584 : PPC::ADDI ); 1585 const MCInstrDesc& AddInst = TII.get( isPPC64 ? PPC::ADD8 1586 : PPC::ADD4 ); 1587 const MCInstrDesc& LoadWordInst = TII.get( isPPC64 ? PPC::LWZ8 1588 : PPC::LWZ); 1589 const MCInstrDesc& MoveToCRInst = TII.get( isPPC64 ? PPC::MTOCRF8 1590 : PPC::MTOCRF); 1591 int LROffset = getReturnSaveOffset(); 1592 1593 int FPOffset = 0; 1594 1595 // Using the same bool variable as below to suppress compiler warnings. 1596 bool SingleScratchReg = findScratchRegister(&MBB, true, false, &ScratchReg, 1597 &TempReg); 1598 assert(SingleScratchReg && 1599 "Could not find an available scratch register"); 1600 1601 SingleScratchReg = ScratchReg == TempReg; 1602 1603 if (HasFP) { 1604 if (isSVR4ABI) { 1605 int FPIndex = FI->getFramePointerSaveIndex(); 1606 assert(FPIndex && "No Frame Pointer Save Slot!"); 1607 FPOffset = MFI.getObjectOffset(FPIndex); 1608 } else { 1609 FPOffset = getFramePointerSaveOffset(); 1610 } 1611 } 1612 1613 int BPOffset = 0; 1614 if (HasBP) { 1615 if (isSVR4ABI) { 1616 int BPIndex = FI->getBasePointerSaveIndex(); 1617 assert(BPIndex && "No Base Pointer Save Slot!"); 1618 BPOffset = MFI.getObjectOffset(BPIndex); 1619 } else { 1620 BPOffset = getBasePointerSaveOffset(); 1621 } 1622 } 1623 1624 int PBPOffset = 0; 1625 if (FI->usesPICBase()) { 1626 int PBPIndex = FI->getPICBasePointerSaveIndex(); 1627 assert(PBPIndex && "No PIC Base Pointer Save Slot!"); 1628 PBPOffset = MFI.getObjectOffset(PBPIndex); 1629 } 1630 1631 bool IsReturnBlock = (MBBI != MBB.end() && MBBI->isReturn()); 1632 1633 if (IsReturnBlock) { 1634 unsigned RetOpcode = MBBI->getOpcode(); 1635 bool UsesTCRet = RetOpcode == PPC::TCRETURNri || 1636 RetOpcode == PPC::TCRETURNdi || 1637 RetOpcode == PPC::TCRETURNai || 1638 RetOpcode == PPC::TCRETURNri8 || 1639 RetOpcode == PPC::TCRETURNdi8 || 1640 RetOpcode == PPC::TCRETURNai8; 1641 1642 if (UsesTCRet) { 1643 int MaxTCRetDelta = FI->getTailCallSPDelta(); 1644 MachineOperand &StackAdjust = MBBI->getOperand(1); 1645 assert(StackAdjust.isImm() && "Expecting immediate value."); 1646 // Adjust stack pointer. 1647 int StackAdj = StackAdjust.getImm(); 1648 int Delta = StackAdj - MaxTCRetDelta; 1649 assert((Delta >= 0) && "Delta must be positive"); 1650 if (MaxTCRetDelta>0) 1651 FrameSize += (StackAdj +Delta); 1652 else 1653 FrameSize += StackAdj; 1654 } 1655 } 1656 1657 // Frames of 32KB & larger require special handling because they cannot be 1658 // indexed into with a simple LD/LWZ immediate offset operand. 1659 bool isLargeFrame = !isInt<16>(FrameSize); 1660 1661 // On targets without red zone, the SP needs to be restored last, so that 1662 // all live contents of the stack frame are upwards of the SP. This means 1663 // that we cannot restore SP just now, since there may be more registers 1664 // to restore from the stack frame (e.g. R31). If the frame size is not 1665 // a simple immediate value, we will need a spare register to hold the 1666 // restored SP. If the frame size is known and small, we can simply adjust 1667 // the offsets of the registers to be restored, and still use SP to restore 1668 // them. In such case, the final update of SP will be to add the frame 1669 // size to it. 1670 // To simplify the code, set RBReg to the base register used to restore 1671 // values from the stack, and set SPAdd to the value that needs to be added 1672 // to the SP at the end. The default values are as if red zone was present. 1673 unsigned RBReg = SPReg; 1674 unsigned SPAdd = 0; 1675 1676 // Check if we can move the stack update instruction up the epilogue 1677 // past the callee saves. This will allow the move to LR instruction 1678 // to be executed before the restores of the callee saves which means 1679 // that the callee saves can hide the latency from the MTLR instrcution. 1680 MachineBasicBlock::iterator StackUpdateLoc = MBBI; 1681 if (stackUpdateCanBeMoved(MF)) { 1682 const std::vector<CalleeSavedInfo> & Info = MFI.getCalleeSavedInfo(); 1683 for (CalleeSavedInfo CSI : Info) { 1684 int FrIdx = CSI.getFrameIdx(); 1685 // If the frame index is not negative the callee saved info belongs to a 1686 // stack object that is not a fixed stack object. We ignore non-fixed 1687 // stack objects because we won't move the update of the stack pointer 1688 // past them. 1689 if (FrIdx >= 0) 1690 continue; 1691 1692 if (MFI.isFixedObjectIndex(FrIdx) && MFI.getObjectOffset(FrIdx) < 0) 1693 StackUpdateLoc--; 1694 else { 1695 // Abort the operation as we can't update all CSR restores. 1696 StackUpdateLoc = MBBI; 1697 break; 1698 } 1699 } 1700 } 1701 1702 if (FrameSize) { 1703 // In the prologue, the loaded (or persistent) stack pointer value is 1704 // offset by the STDU/STDUX/STWU/STWUX instruction. For targets with red 1705 // zone add this offset back now. 1706 1707 // If this function contained a fastcc call and GuaranteedTailCallOpt is 1708 // enabled (=> hasFastCall()==true) the fastcc call might contain a tail 1709 // call which invalidates the stack pointer value in SP(0). So we use the 1710 // value of R31 in this case. 1711 if (FI->hasFastCall()) { 1712 assert(HasFP && "Expecting a valid frame pointer."); 1713 if (!HasRedZone) 1714 RBReg = FPReg; 1715 if (!isLargeFrame) { 1716 BuildMI(MBB, MBBI, dl, AddImmInst, RBReg) 1717 .addReg(FPReg).addImm(FrameSize); 1718 } else { 1719 BuildMI(MBB, MBBI, dl, LoadImmShiftedInst, ScratchReg) 1720 .addImm(FrameSize >> 16); 1721 BuildMI(MBB, MBBI, dl, OrImmInst, ScratchReg) 1722 .addReg(ScratchReg, RegState::Kill) 1723 .addImm(FrameSize & 0xFFFF); 1724 BuildMI(MBB, MBBI, dl, AddInst) 1725 .addReg(RBReg) 1726 .addReg(FPReg) 1727 .addReg(ScratchReg); 1728 } 1729 } else if (!isLargeFrame && !HasBP && !MFI.hasVarSizedObjects()) { 1730 if (HasRedZone) { 1731 BuildMI(MBB, StackUpdateLoc, dl, AddImmInst, SPReg) 1732 .addReg(SPReg) 1733 .addImm(FrameSize); 1734 } else { 1735 // Make sure that adding FrameSize will not overflow the max offset 1736 // size. 1737 assert(FPOffset <= 0 && BPOffset <= 0 && PBPOffset <= 0 && 1738 "Local offsets should be negative"); 1739 SPAdd = FrameSize; 1740 FPOffset += FrameSize; 1741 BPOffset += FrameSize; 1742 PBPOffset += FrameSize; 1743 } 1744 } else { 1745 // We don't want to use ScratchReg as a base register, because it 1746 // could happen to be R0. Use FP instead, but make sure to preserve it. 1747 if (!HasRedZone) { 1748 // If FP is not saved, copy it to ScratchReg. 1749 if (!HasFP) 1750 BuildMI(MBB, MBBI, dl, OrInst, ScratchReg) 1751 .addReg(FPReg) 1752 .addReg(FPReg); 1753 RBReg = FPReg; 1754 } 1755 BuildMI(MBB, StackUpdateLoc, dl, LoadInst, RBReg) 1756 .addImm(0) 1757 .addReg(SPReg); 1758 } 1759 } 1760 assert(RBReg != ScratchReg && "Should have avoided ScratchReg"); 1761 // If there is no red zone, ScratchReg may be needed for holding a useful 1762 // value (although not the base register). Make sure it is not overwritten 1763 // too early. 1764 1765 // If we need to restore both the LR and the CR and we only have one 1766 // available scratch register, we must do them one at a time. 1767 if (MustSaveCR && SingleScratchReg && MustSaveLR) { 1768 // Here TempReg == ScratchReg, and in the absence of red zone ScratchReg 1769 // is live here. 1770 assert(HasRedZone && "Expecting red zone"); 1771 BuildMI(MBB, MBBI, dl, LoadWordInst, TempReg) 1772 .addImm(CRSaveOffset) 1773 .addReg(SPReg); 1774 for (unsigned i = 0, e = MustSaveCRs.size(); i != e; ++i) 1775 BuildMI(MBB, MBBI, dl, MoveToCRInst, MustSaveCRs[i]) 1776 .addReg(TempReg, getKillRegState(i == e-1)); 1777 } 1778 1779 // Delay restoring of the LR if ScratchReg is needed. This is ok, since 1780 // LR is stored in the caller's stack frame. ScratchReg will be needed 1781 // if RBReg is anything other than SP. We shouldn't use ScratchReg as 1782 // a base register anyway, because it may happen to be R0. 1783 bool LoadedLR = false; 1784 if (MustSaveLR && RBReg == SPReg && isInt<16>(LROffset+SPAdd)) { 1785 BuildMI(MBB, StackUpdateLoc, dl, LoadInst, ScratchReg) 1786 .addImm(LROffset+SPAdd) 1787 .addReg(RBReg); 1788 LoadedLR = true; 1789 } 1790 1791 if (MustSaveCR && !(SingleScratchReg && MustSaveLR)) { 1792 assert(RBReg == SPReg && "Should be using SP as a base register"); 1793 BuildMI(MBB, MBBI, dl, LoadWordInst, TempReg) 1794 .addImm(CRSaveOffset) 1795 .addReg(RBReg); 1796 } 1797 1798 if (HasFP) { 1799 // If there is red zone, restore FP directly, since SP has already been 1800 // restored. Otherwise, restore the value of FP into ScratchReg. 1801 if (HasRedZone || RBReg == SPReg) 1802 BuildMI(MBB, MBBI, dl, LoadInst, FPReg) 1803 .addImm(FPOffset) 1804 .addReg(SPReg); 1805 else 1806 BuildMI(MBB, MBBI, dl, LoadInst, ScratchReg) 1807 .addImm(FPOffset) 1808 .addReg(RBReg); 1809 } 1810 1811 if (FI->usesPICBase()) 1812 BuildMI(MBB, MBBI, dl, LoadInst, PPC::R30) 1813 .addImm(PBPOffset) 1814 .addReg(RBReg); 1815 1816 if (HasBP) 1817 BuildMI(MBB, MBBI, dl, LoadInst, BPReg) 1818 .addImm(BPOffset) 1819 .addReg(RBReg); 1820 1821 // There is nothing more to be loaded from the stack, so now we can 1822 // restore SP: SP = RBReg + SPAdd. 1823 if (RBReg != SPReg || SPAdd != 0) { 1824 assert(!HasRedZone && "This should not happen with red zone"); 1825 // If SPAdd is 0, generate a copy. 1826 if (SPAdd == 0) 1827 BuildMI(MBB, MBBI, dl, OrInst, SPReg) 1828 .addReg(RBReg) 1829 .addReg(RBReg); 1830 else 1831 BuildMI(MBB, MBBI, dl, AddImmInst, SPReg) 1832 .addReg(RBReg) 1833 .addImm(SPAdd); 1834 1835 assert(RBReg != ScratchReg && "Should be using FP or SP as base register"); 1836 if (RBReg == FPReg) 1837 BuildMI(MBB, MBBI, dl, OrInst, FPReg) 1838 .addReg(ScratchReg) 1839 .addReg(ScratchReg); 1840 1841 // Now load the LR from the caller's stack frame. 1842 if (MustSaveLR && !LoadedLR) 1843 BuildMI(MBB, MBBI, dl, LoadInst, ScratchReg) 1844 .addImm(LROffset) 1845 .addReg(SPReg); 1846 } 1847 1848 if (MustSaveCR && 1849 !(SingleScratchReg && MustSaveLR)) 1850 for (unsigned i = 0, e = MustSaveCRs.size(); i != e; ++i) 1851 BuildMI(MBB, MBBI, dl, MoveToCRInst, MustSaveCRs[i]) 1852 .addReg(TempReg, getKillRegState(i == e-1)); 1853 1854 if (MustSaveLR) 1855 BuildMI(MBB, StackUpdateLoc, dl, MTLRInst).addReg(ScratchReg); 1856 1857 // Callee pop calling convention. Pop parameter/linkage area. Used for tail 1858 // call optimization 1859 if (IsReturnBlock) { 1860 unsigned RetOpcode = MBBI->getOpcode(); 1861 if (MF.getTarget().Options.GuaranteedTailCallOpt && 1862 (RetOpcode == PPC::BLR || RetOpcode == PPC::BLR8) && 1863 MF.getFunction().getCallingConv() == CallingConv::Fast) { 1864 PPCFunctionInfo *FI = MF.getInfo<PPCFunctionInfo>(); 1865 unsigned CallerAllocatedAmt = FI->getMinReservedArea(); 1866 1867 if (CallerAllocatedAmt && isInt<16>(CallerAllocatedAmt)) { 1868 BuildMI(MBB, MBBI, dl, AddImmInst, SPReg) 1869 .addReg(SPReg).addImm(CallerAllocatedAmt); 1870 } else { 1871 BuildMI(MBB, MBBI, dl, LoadImmShiftedInst, ScratchReg) 1872 .addImm(CallerAllocatedAmt >> 16); 1873 BuildMI(MBB, MBBI, dl, OrImmInst, ScratchReg) 1874 .addReg(ScratchReg, RegState::Kill) 1875 .addImm(CallerAllocatedAmt & 0xFFFF); 1876 BuildMI(MBB, MBBI, dl, AddInst) 1877 .addReg(SPReg) 1878 .addReg(FPReg) 1879 .addReg(ScratchReg); 1880 } 1881 } else { 1882 createTailCallBranchInstr(MBB); 1883 } 1884 } 1885 } 1886 1887 void PPCFrameLowering::createTailCallBranchInstr(MachineBasicBlock &MBB) const { 1888 MachineBasicBlock::iterator MBBI = MBB.getFirstTerminator(); 1889 1890 // If we got this far a first terminator should exist. 1891 assert(MBBI != MBB.end() && "Failed to find the first terminator."); 1892 1893 DebugLoc dl = MBBI->getDebugLoc(); 1894 const PPCInstrInfo &TII = *Subtarget.getInstrInfo(); 1895 1896 // Create branch instruction for pseudo tail call return instruction. 1897 // The TCRETURNdi variants are direct calls. Valid targets for those are 1898 // MO_GlobalAddress operands as well as MO_ExternalSymbol with PC-Rel 1899 // since we can tail call external functions with PC-Rel (i.e. we don't need 1900 // to worry about different TOC pointers). Some of the external functions will 1901 // be MO_GlobalAddress while others like memcpy for example, are going to 1902 // be MO_ExternalSymbol. 1903 unsigned RetOpcode = MBBI->getOpcode(); 1904 if (RetOpcode == PPC::TCRETURNdi) { 1905 MBBI = MBB.getLastNonDebugInstr(); 1906 MachineOperand &JumpTarget = MBBI->getOperand(0); 1907 if (JumpTarget.isGlobal()) 1908 BuildMI(MBB, MBBI, dl, TII.get(PPC::TAILB)). 1909 addGlobalAddress(JumpTarget.getGlobal(), JumpTarget.getOffset()); 1910 else if (JumpTarget.isSymbol()) 1911 BuildMI(MBB, MBBI, dl, TII.get(PPC::TAILB)). 1912 addExternalSymbol(JumpTarget.getSymbolName()); 1913 else 1914 llvm_unreachable("Expecting Global or External Symbol"); 1915 } else if (RetOpcode == PPC::TCRETURNri) { 1916 MBBI = MBB.getLastNonDebugInstr(); 1917 assert(MBBI->getOperand(0).isReg() && "Expecting register operand."); 1918 BuildMI(MBB, MBBI, dl, TII.get(PPC::TAILBCTR)); 1919 } else if (RetOpcode == PPC::TCRETURNai) { 1920 MBBI = MBB.getLastNonDebugInstr(); 1921 MachineOperand &JumpTarget = MBBI->getOperand(0); 1922 BuildMI(MBB, MBBI, dl, TII.get(PPC::TAILBA)).addImm(JumpTarget.getImm()); 1923 } else if (RetOpcode == PPC::TCRETURNdi8) { 1924 MBBI = MBB.getLastNonDebugInstr(); 1925 MachineOperand &JumpTarget = MBBI->getOperand(0); 1926 if (JumpTarget.isGlobal()) 1927 BuildMI(MBB, MBBI, dl, TII.get(PPC::TAILB8)). 1928 addGlobalAddress(JumpTarget.getGlobal(), JumpTarget.getOffset()); 1929 else if (JumpTarget.isSymbol()) 1930 BuildMI(MBB, MBBI, dl, TII.get(PPC::TAILB8)). 1931 addExternalSymbol(JumpTarget.getSymbolName()); 1932 else 1933 llvm_unreachable("Expecting Global or External Symbol"); 1934 } else if (RetOpcode == PPC::TCRETURNri8) { 1935 MBBI = MBB.getLastNonDebugInstr(); 1936 assert(MBBI->getOperand(0).isReg() && "Expecting register operand."); 1937 BuildMI(MBB, MBBI, dl, TII.get(PPC::TAILBCTR8)); 1938 } else if (RetOpcode == PPC::TCRETURNai8) { 1939 MBBI = MBB.getLastNonDebugInstr(); 1940 MachineOperand &JumpTarget = MBBI->getOperand(0); 1941 BuildMI(MBB, MBBI, dl, TII.get(PPC::TAILBA8)).addImm(JumpTarget.getImm()); 1942 } 1943 } 1944 1945 void PPCFrameLowering::determineCalleeSaves(MachineFunction &MF, 1946 BitVector &SavedRegs, 1947 RegScavenger *RS) const { 1948 TargetFrameLowering::determineCalleeSaves(MF, SavedRegs, RS); 1949 1950 const PPCRegisterInfo *RegInfo = Subtarget.getRegisterInfo(); 1951 1952 // Save and clear the LR state. 1953 PPCFunctionInfo *FI = MF.getInfo<PPCFunctionInfo>(); 1954 unsigned LR = RegInfo->getRARegister(); 1955 FI->setMustSaveLR(MustSaveLR(MF, LR)); 1956 SavedRegs.reset(LR); 1957 1958 // Save R31 if necessary 1959 int FPSI = FI->getFramePointerSaveIndex(); 1960 const bool isPPC64 = Subtarget.isPPC64(); 1961 MachineFrameInfo &MFI = MF.getFrameInfo(); 1962 1963 // If the frame pointer save index hasn't been defined yet. 1964 if (!FPSI && needsFP(MF)) { 1965 // Find out what the fix offset of the frame pointer save area. 1966 int FPOffset = getFramePointerSaveOffset(); 1967 // Allocate the frame index for frame pointer save area. 1968 FPSI = MFI.CreateFixedObject(isPPC64? 8 : 4, FPOffset, true); 1969 // Save the result. 1970 FI->setFramePointerSaveIndex(FPSI); 1971 } 1972 1973 int BPSI = FI->getBasePointerSaveIndex(); 1974 if (!BPSI && RegInfo->hasBasePointer(MF)) { 1975 int BPOffset = getBasePointerSaveOffset(); 1976 // Allocate the frame index for the base pointer save area. 1977 BPSI = MFI.CreateFixedObject(isPPC64? 8 : 4, BPOffset, true); 1978 // Save the result. 1979 FI->setBasePointerSaveIndex(BPSI); 1980 } 1981 1982 // Reserve stack space for the PIC Base register (R30). 1983 // Only used in SVR4 32-bit. 1984 if (FI->usesPICBase()) { 1985 int PBPSI = MFI.CreateFixedObject(4, -8, true); 1986 FI->setPICBasePointerSaveIndex(PBPSI); 1987 } 1988 1989 // Make sure we don't explicitly spill r31, because, for example, we have 1990 // some inline asm which explicitly clobbers it, when we otherwise have a 1991 // frame pointer and are using r31's spill slot for the prologue/epilogue 1992 // code. Same goes for the base pointer and the PIC base register. 1993 if (needsFP(MF)) 1994 SavedRegs.reset(isPPC64 ? PPC::X31 : PPC::R31); 1995 if (RegInfo->hasBasePointer(MF)) 1996 SavedRegs.reset(RegInfo->getBaseRegister(MF)); 1997 if (FI->usesPICBase()) 1998 SavedRegs.reset(PPC::R30); 1999 2000 // Reserve stack space to move the linkage area to in case of a tail call. 2001 int TCSPDelta = 0; 2002 if (MF.getTarget().Options.GuaranteedTailCallOpt && 2003 (TCSPDelta = FI->getTailCallSPDelta()) < 0) { 2004 MFI.CreateFixedObject(-1 * TCSPDelta, TCSPDelta, true); 2005 } 2006 2007 // Allocate the nonvolatile CR spill slot iff the function uses CR 2, 3, or 4. 2008 // For 64-bit SVR4, and all flavors of AIX we create a FixedStack 2009 // object at the offset of the CR-save slot in the linkage area. The actual 2010 // save and restore of the condition register will be created as part of the 2011 // prologue and epilogue insertion, but the FixedStack object is needed to 2012 // keep the CalleSavedInfo valid. 2013 if ((SavedRegs.test(PPC::CR2) || SavedRegs.test(PPC::CR3) || 2014 SavedRegs.test(PPC::CR4))) { 2015 const uint64_t SpillSize = 4; // Condition register is always 4 bytes. 2016 const int64_t SpillOffset = 2017 Subtarget.isPPC64() ? 8 : Subtarget.isAIXABI() ? 4 : -4; 2018 int FrameIdx = 2019 MFI.CreateFixedObject(SpillSize, SpillOffset, 2020 /* IsImmutable */ true, /* IsAliased */ false); 2021 FI->setCRSpillFrameIndex(FrameIdx); 2022 } 2023 } 2024 2025 void PPCFrameLowering::processFunctionBeforeFrameFinalized(MachineFunction &MF, 2026 RegScavenger *RS) const { 2027 // Get callee saved register information. 2028 MachineFrameInfo &MFI = MF.getFrameInfo(); 2029 const std::vector<CalleeSavedInfo> &CSI = MFI.getCalleeSavedInfo(); 2030 2031 // If the function is shrink-wrapped, and if the function has a tail call, the 2032 // tail call might not be in the new RestoreBlock, so real branch instruction 2033 // won't be generated by emitEpilogue(), because shrink-wrap has chosen new 2034 // RestoreBlock. So we handle this case here. 2035 if (MFI.getSavePoint() && MFI.hasTailCall()) { 2036 MachineBasicBlock *RestoreBlock = MFI.getRestorePoint(); 2037 for (MachineBasicBlock &MBB : MF) { 2038 if (MBB.isReturnBlock() && (&MBB) != RestoreBlock) 2039 createTailCallBranchInstr(MBB); 2040 } 2041 } 2042 2043 // Early exit if no callee saved registers are modified! 2044 if (CSI.empty() && !needsFP(MF)) { 2045 addScavengingSpillSlot(MF, RS); 2046 return; 2047 } 2048 2049 unsigned MinGPR = PPC::R31; 2050 unsigned MinG8R = PPC::X31; 2051 unsigned MinFPR = PPC::F31; 2052 unsigned MinVR = Subtarget.hasSPE() ? PPC::S31 : PPC::V31; 2053 2054 bool HasGPSaveArea = false; 2055 bool HasG8SaveArea = false; 2056 bool HasFPSaveArea = false; 2057 bool HasVRSAVESaveArea = false; 2058 bool HasVRSaveArea = false; 2059 2060 SmallVector<CalleeSavedInfo, 18> GPRegs; 2061 SmallVector<CalleeSavedInfo, 18> G8Regs; 2062 SmallVector<CalleeSavedInfo, 18> FPRegs; 2063 SmallVector<CalleeSavedInfo, 18> VRegs; 2064 2065 for (unsigned i = 0, e = CSI.size(); i != e; ++i) { 2066 unsigned Reg = CSI[i].getReg(); 2067 assert((!MF.getInfo<PPCFunctionInfo>()->mustSaveTOC() || 2068 (Reg != PPC::X2 && Reg != PPC::R2)) && 2069 "Not expecting to try to spill R2 in a function that must save TOC"); 2070 if (PPC::GPRCRegClass.contains(Reg)) { 2071 HasGPSaveArea = true; 2072 2073 GPRegs.push_back(CSI[i]); 2074 2075 if (Reg < MinGPR) { 2076 MinGPR = Reg; 2077 } 2078 } else if (PPC::G8RCRegClass.contains(Reg)) { 2079 HasG8SaveArea = true; 2080 2081 G8Regs.push_back(CSI[i]); 2082 2083 if (Reg < MinG8R) { 2084 MinG8R = Reg; 2085 } 2086 } else if (PPC::F8RCRegClass.contains(Reg)) { 2087 HasFPSaveArea = true; 2088 2089 FPRegs.push_back(CSI[i]); 2090 2091 if (Reg < MinFPR) { 2092 MinFPR = Reg; 2093 } 2094 } else if (PPC::CRBITRCRegClass.contains(Reg) || 2095 PPC::CRRCRegClass.contains(Reg)) { 2096 ; // do nothing, as we already know whether CRs are spilled 2097 } else if (PPC::VRSAVERCRegClass.contains(Reg)) { 2098 HasVRSAVESaveArea = true; 2099 } else if (PPC::VRRCRegClass.contains(Reg) || 2100 PPC::SPERCRegClass.contains(Reg)) { 2101 // Altivec and SPE are mutually exclusive, but have the same stack 2102 // alignment requirements, so overload the save area for both cases. 2103 HasVRSaveArea = true; 2104 2105 VRegs.push_back(CSI[i]); 2106 2107 if (Reg < MinVR) { 2108 MinVR = Reg; 2109 } 2110 } else { 2111 llvm_unreachable("Unknown RegisterClass!"); 2112 } 2113 } 2114 2115 PPCFunctionInfo *PFI = MF.getInfo<PPCFunctionInfo>(); 2116 const TargetRegisterInfo *TRI = Subtarget.getRegisterInfo(); 2117 2118 int64_t LowerBound = 0; 2119 2120 // Take into account stack space reserved for tail calls. 2121 int TCSPDelta = 0; 2122 if (MF.getTarget().Options.GuaranteedTailCallOpt && 2123 (TCSPDelta = PFI->getTailCallSPDelta()) < 0) { 2124 LowerBound = TCSPDelta; 2125 } 2126 2127 // The Floating-point register save area is right below the back chain word 2128 // of the previous stack frame. 2129 if (HasFPSaveArea) { 2130 for (unsigned i = 0, e = FPRegs.size(); i != e; ++i) { 2131 int FI = FPRegs[i].getFrameIdx(); 2132 2133 MFI.setObjectOffset(FI, LowerBound + MFI.getObjectOffset(FI)); 2134 } 2135 2136 LowerBound -= (31 - TRI->getEncodingValue(MinFPR) + 1) * 8; 2137 } 2138 2139 // Check whether the frame pointer register is allocated. If so, make sure it 2140 // is spilled to the correct offset. 2141 if (needsFP(MF)) { 2142 int FI = PFI->getFramePointerSaveIndex(); 2143 assert(FI && "No Frame Pointer Save Slot!"); 2144 MFI.setObjectOffset(FI, LowerBound + MFI.getObjectOffset(FI)); 2145 // FP is R31/X31, so no need to update MinGPR/MinG8R. 2146 HasGPSaveArea = true; 2147 } 2148 2149 if (PFI->usesPICBase()) { 2150 int FI = PFI->getPICBasePointerSaveIndex(); 2151 assert(FI && "No PIC Base Pointer Save Slot!"); 2152 MFI.setObjectOffset(FI, LowerBound + MFI.getObjectOffset(FI)); 2153 2154 MinGPR = std::min<unsigned>(MinGPR, PPC::R30); 2155 HasGPSaveArea = true; 2156 } 2157 2158 const PPCRegisterInfo *RegInfo = Subtarget.getRegisterInfo(); 2159 if (RegInfo->hasBasePointer(MF)) { 2160 int FI = PFI->getBasePointerSaveIndex(); 2161 assert(FI && "No Base Pointer Save Slot!"); 2162 MFI.setObjectOffset(FI, LowerBound + MFI.getObjectOffset(FI)); 2163 2164 Register BP = RegInfo->getBaseRegister(MF); 2165 if (PPC::G8RCRegClass.contains(BP)) { 2166 MinG8R = std::min<unsigned>(MinG8R, BP); 2167 HasG8SaveArea = true; 2168 } else if (PPC::GPRCRegClass.contains(BP)) { 2169 MinGPR = std::min<unsigned>(MinGPR, BP); 2170 HasGPSaveArea = true; 2171 } 2172 } 2173 2174 // General register save area starts right below the Floating-point 2175 // register save area. 2176 if (HasGPSaveArea || HasG8SaveArea) { 2177 // Move general register save area spill slots down, taking into account 2178 // the size of the Floating-point register save area. 2179 for (unsigned i = 0, e = GPRegs.size(); i != e; ++i) { 2180 if (!GPRegs[i].isSpilledToReg()) { 2181 int FI = GPRegs[i].getFrameIdx(); 2182 MFI.setObjectOffset(FI, LowerBound + MFI.getObjectOffset(FI)); 2183 } 2184 } 2185 2186 // Move general register save area spill slots down, taking into account 2187 // the size of the Floating-point register save area. 2188 for (unsigned i = 0, e = G8Regs.size(); i != e; ++i) { 2189 if (!G8Regs[i].isSpilledToReg()) { 2190 int FI = G8Regs[i].getFrameIdx(); 2191 MFI.setObjectOffset(FI, LowerBound + MFI.getObjectOffset(FI)); 2192 } 2193 } 2194 2195 unsigned MinReg = 2196 std::min<unsigned>(TRI->getEncodingValue(MinGPR), 2197 TRI->getEncodingValue(MinG8R)); 2198 2199 const unsigned GPRegSize = Subtarget.isPPC64() ? 8 : 4; 2200 LowerBound -= (31 - MinReg + 1) * GPRegSize; 2201 } 2202 2203 // For 32-bit only, the CR save area is below the general register 2204 // save area. For 64-bit SVR4, the CR save area is addressed relative 2205 // to the stack pointer and hence does not need an adjustment here. 2206 // Only CR2 (the first nonvolatile spilled) has an associated frame 2207 // index so that we have a single uniform save area. 2208 if (spillsCR(MF) && Subtarget.is32BitELFABI()) { 2209 // Adjust the frame index of the CR spill slot. 2210 for (const auto &CSInfo : CSI) { 2211 if (CSInfo.getReg() == PPC::CR2) { 2212 int FI = CSInfo.getFrameIdx(); 2213 MFI.setObjectOffset(FI, LowerBound + MFI.getObjectOffset(FI)); 2214 break; 2215 } 2216 } 2217 2218 LowerBound -= 4; // The CR save area is always 4 bytes long. 2219 } 2220 2221 if (HasVRSAVESaveArea) { 2222 // FIXME SVR4: Is it actually possible to have multiple elements in CSI 2223 // which have the VRSAVE register class? 2224 // Adjust the frame index of the VRSAVE spill slot. 2225 for (unsigned i = 0, e = CSI.size(); i != e; ++i) { 2226 unsigned Reg = CSI[i].getReg(); 2227 2228 if (PPC::VRSAVERCRegClass.contains(Reg)) { 2229 int FI = CSI[i].getFrameIdx(); 2230 2231 MFI.setObjectOffset(FI, LowerBound + MFI.getObjectOffset(FI)); 2232 } 2233 } 2234 2235 LowerBound -= 4; // The VRSAVE save area is always 4 bytes long. 2236 } 2237 2238 // Both Altivec and SPE have the same alignment and padding requirements 2239 // within the stack frame. 2240 if (HasVRSaveArea) { 2241 // Insert alignment padding, we need 16-byte alignment. Note: for positive 2242 // number the alignment formula is : y = (x + (n-1)) & (~(n-1)). But since 2243 // we are using negative number here (the stack grows downward). We should 2244 // use formula : y = x & (~(n-1)). Where x is the size before aligning, n 2245 // is the alignment size ( n = 16 here) and y is the size after aligning. 2246 assert(LowerBound <= 0 && "Expect LowerBound have a non-positive value!"); 2247 LowerBound &= ~(15); 2248 2249 for (unsigned i = 0, e = VRegs.size(); i != e; ++i) { 2250 int FI = VRegs[i].getFrameIdx(); 2251 2252 MFI.setObjectOffset(FI, LowerBound + MFI.getObjectOffset(FI)); 2253 } 2254 } 2255 2256 addScavengingSpillSlot(MF, RS); 2257 } 2258 2259 void 2260 PPCFrameLowering::addScavengingSpillSlot(MachineFunction &MF, 2261 RegScavenger *RS) const { 2262 // Reserve a slot closest to SP or frame pointer if we have a dynalloc or 2263 // a large stack, which will require scavenging a register to materialize a 2264 // large offset. 2265 2266 // We need to have a scavenger spill slot for spills if the frame size is 2267 // large. In case there is no free register for large-offset addressing, 2268 // this slot is used for the necessary emergency spill. Also, we need the 2269 // slot for dynamic stack allocations. 2270 2271 // The scavenger might be invoked if the frame offset does not fit into 2272 // the 16-bit immediate. We don't know the complete frame size here 2273 // because we've not yet computed callee-saved register spills or the 2274 // needed alignment padding. 2275 unsigned StackSize = determineFrameLayout(MF, true); 2276 MachineFrameInfo &MFI = MF.getFrameInfo(); 2277 if (MFI.hasVarSizedObjects() || spillsCR(MF) || spillsVRSAVE(MF) || 2278 hasNonRISpills(MF) || (hasSpills(MF) && !isInt<16>(StackSize))) { 2279 const TargetRegisterClass &GPRC = PPC::GPRCRegClass; 2280 const TargetRegisterClass &G8RC = PPC::G8RCRegClass; 2281 const TargetRegisterClass &RC = Subtarget.isPPC64() ? G8RC : GPRC; 2282 const TargetRegisterInfo &TRI = *Subtarget.getRegisterInfo(); 2283 unsigned Size = TRI.getSpillSize(RC); 2284 Align Alignment = TRI.getSpillAlign(RC); 2285 RS->addScavengingFrameIndex(MFI.CreateStackObject(Size, Alignment, false)); 2286 2287 // Might we have over-aligned allocas? 2288 bool HasAlVars = 2289 MFI.hasVarSizedObjects() && MFI.getMaxAlign() > getStackAlign(); 2290 2291 // These kinds of spills might need two registers. 2292 if (spillsCR(MF) || spillsVRSAVE(MF) || HasAlVars) 2293 RS->addScavengingFrameIndex( 2294 MFI.CreateStackObject(Size, Alignment, false)); 2295 } 2296 } 2297 2298 // This function checks if a callee saved gpr can be spilled to a volatile 2299 // vector register. This occurs for leaf functions when the option 2300 // ppc-enable-pe-vector-spills is enabled. If there are any remaining registers 2301 // which were not spilled to vectors, return false so the target independent 2302 // code can handle them by assigning a FrameIdx to a stack slot. 2303 bool PPCFrameLowering::assignCalleeSavedSpillSlots( 2304 MachineFunction &MF, const TargetRegisterInfo *TRI, 2305 std::vector<CalleeSavedInfo> &CSI) const { 2306 2307 if (CSI.empty()) 2308 return true; // Early exit if no callee saved registers are modified! 2309 2310 // Early exit if cannot spill gprs to volatile vector registers. 2311 MachineFrameInfo &MFI = MF.getFrameInfo(); 2312 if (!EnablePEVectorSpills || MFI.hasCalls() || !Subtarget.hasP9Vector()) 2313 return false; 2314 2315 // Build a BitVector of VSRs that can be used for spilling GPRs. 2316 BitVector BVAllocatable = TRI->getAllocatableSet(MF); 2317 BitVector BVCalleeSaved(TRI->getNumRegs()); 2318 const PPCRegisterInfo *RegInfo = Subtarget.getRegisterInfo(); 2319 const MCPhysReg *CSRegs = RegInfo->getCalleeSavedRegs(&MF); 2320 for (unsigned i = 0; CSRegs[i]; ++i) 2321 BVCalleeSaved.set(CSRegs[i]); 2322 2323 for (unsigned Reg : BVAllocatable.set_bits()) { 2324 // Set to 0 if the register is not a volatile VF/F8 register, or if it is 2325 // used in the function. 2326 if (BVCalleeSaved[Reg] || 2327 (!PPC::F8RCRegClass.contains(Reg) && 2328 !PPC::VFRCRegClass.contains(Reg)) || 2329 (MF.getRegInfo().isPhysRegUsed(Reg))) 2330 BVAllocatable.reset(Reg); 2331 } 2332 2333 bool AllSpilledToReg = true; 2334 for (auto &CS : CSI) { 2335 if (BVAllocatable.none()) 2336 return false; 2337 2338 unsigned Reg = CS.getReg(); 2339 if (!PPC::G8RCRegClass.contains(Reg) && !PPC::GPRCRegClass.contains(Reg)) { 2340 AllSpilledToReg = false; 2341 continue; 2342 } 2343 2344 unsigned VolatileVFReg = BVAllocatable.find_first(); 2345 if (VolatileVFReg < BVAllocatable.size()) { 2346 CS.setDstReg(VolatileVFReg); 2347 BVAllocatable.reset(VolatileVFReg); 2348 } else { 2349 AllSpilledToReg = false; 2350 } 2351 } 2352 return AllSpilledToReg; 2353 } 2354 2355 bool PPCFrameLowering::spillCalleeSavedRegisters( 2356 MachineBasicBlock &MBB, MachineBasicBlock::iterator MI, 2357 ArrayRef<CalleeSavedInfo> CSI, const TargetRegisterInfo *TRI) const { 2358 2359 MachineFunction *MF = MBB.getParent(); 2360 const PPCInstrInfo &TII = *Subtarget.getInstrInfo(); 2361 PPCFunctionInfo *FI = MF->getInfo<PPCFunctionInfo>(); 2362 bool MustSaveTOC = FI->mustSaveTOC(); 2363 DebugLoc DL; 2364 bool CRSpilled = false; 2365 MachineInstrBuilder CRMIB; 2366 2367 for (unsigned i = 0, e = CSI.size(); i != e; ++i) { 2368 unsigned Reg = CSI[i].getReg(); 2369 // VRSAVE can appear here if, for example, @llvm.eh.unwind.init() is used. 2370 if (Reg == PPC::VRSAVE) 2371 continue; 2372 2373 // CR2 through CR4 are the nonvolatile CR fields. 2374 bool IsCRField = PPC::CR2 <= Reg && Reg <= PPC::CR4; 2375 2376 // Add the callee-saved register as live-in; it's killed at the spill. 2377 // Do not do this for callee-saved registers that are live-in to the 2378 // function because they will already be marked live-in and this will be 2379 // adding it for a second time. It is an error to add the same register 2380 // to the set more than once. 2381 const MachineRegisterInfo &MRI = MF->getRegInfo(); 2382 bool IsLiveIn = MRI.isLiveIn(Reg); 2383 if (!IsLiveIn) 2384 MBB.addLiveIn(Reg); 2385 2386 if (CRSpilled && IsCRField) { 2387 CRMIB.addReg(Reg, RegState::ImplicitKill); 2388 continue; 2389 } 2390 2391 // The actual spill will happen in the prologue. 2392 if ((Reg == PPC::X2 || Reg == PPC::R2) && MustSaveTOC) 2393 continue; 2394 2395 // Insert the spill to the stack frame. 2396 if (IsCRField) { 2397 PPCFunctionInfo *FuncInfo = MF->getInfo<PPCFunctionInfo>(); 2398 if (!Subtarget.is32BitELFABI()) { 2399 // The actual spill will happen at the start of the prologue. 2400 FuncInfo->addMustSaveCR(Reg); 2401 } else { 2402 CRSpilled = true; 2403 FuncInfo->setSpillsCR(); 2404 2405 // 32-bit: FP-relative. Note that we made sure CR2-CR4 all have 2406 // the same frame index in PPCRegisterInfo::hasReservedSpillSlot. 2407 CRMIB = BuildMI(*MF, DL, TII.get(PPC::MFCR), PPC::R12) 2408 .addReg(Reg, RegState::ImplicitKill); 2409 2410 MBB.insert(MI, CRMIB); 2411 MBB.insert(MI, addFrameReference(BuildMI(*MF, DL, TII.get(PPC::STW)) 2412 .addReg(PPC::R12, 2413 getKillRegState(true)), 2414 CSI[i].getFrameIdx())); 2415 } 2416 } else { 2417 if (CSI[i].isSpilledToReg()) { 2418 NumPESpillVSR++; 2419 BuildMI(MBB, MI, DL, TII.get(PPC::MTVSRD), CSI[i].getDstReg()) 2420 .addReg(Reg, getKillRegState(true)); 2421 } else { 2422 const TargetRegisterClass *RC = TRI->getMinimalPhysRegClass(Reg); 2423 // Use !IsLiveIn for the kill flag. 2424 // We do not want to kill registers that are live in this function 2425 // before their use because they will become undefined registers. 2426 // Functions without NoUnwind need to preserve the order of elements in 2427 // saved vector registers. 2428 if (Subtarget.needsSwapsForVSXMemOps() && 2429 !MF->getFunction().hasFnAttribute(Attribute::NoUnwind)) 2430 TII.storeRegToStackSlotNoUpd(MBB, MI, Reg, !IsLiveIn, 2431 CSI[i].getFrameIdx(), RC, TRI); 2432 else 2433 TII.storeRegToStackSlot(MBB, MI, Reg, !IsLiveIn, CSI[i].getFrameIdx(), 2434 RC, TRI); 2435 } 2436 } 2437 } 2438 return true; 2439 } 2440 2441 static void restoreCRs(bool is31, bool CR2Spilled, bool CR3Spilled, 2442 bool CR4Spilled, MachineBasicBlock &MBB, 2443 MachineBasicBlock::iterator MI, 2444 ArrayRef<CalleeSavedInfo> CSI, unsigned CSIIndex) { 2445 2446 MachineFunction *MF = MBB.getParent(); 2447 const PPCInstrInfo &TII = *MF->getSubtarget<PPCSubtarget>().getInstrInfo(); 2448 DebugLoc DL; 2449 unsigned MoveReg = PPC::R12; 2450 2451 // 32-bit: FP-relative 2452 MBB.insert(MI, 2453 addFrameReference(BuildMI(*MF, DL, TII.get(PPC::LWZ), MoveReg), 2454 CSI[CSIIndex].getFrameIdx())); 2455 2456 unsigned RestoreOp = PPC::MTOCRF; 2457 if (CR2Spilled) 2458 MBB.insert(MI, BuildMI(*MF, DL, TII.get(RestoreOp), PPC::CR2) 2459 .addReg(MoveReg, getKillRegState(!CR3Spilled && !CR4Spilled))); 2460 2461 if (CR3Spilled) 2462 MBB.insert(MI, BuildMI(*MF, DL, TII.get(RestoreOp), PPC::CR3) 2463 .addReg(MoveReg, getKillRegState(!CR4Spilled))); 2464 2465 if (CR4Spilled) 2466 MBB.insert(MI, BuildMI(*MF, DL, TII.get(RestoreOp), PPC::CR4) 2467 .addReg(MoveReg, getKillRegState(true))); 2468 } 2469 2470 MachineBasicBlock::iterator PPCFrameLowering:: 2471 eliminateCallFramePseudoInstr(MachineFunction &MF, MachineBasicBlock &MBB, 2472 MachineBasicBlock::iterator I) const { 2473 const TargetInstrInfo &TII = *Subtarget.getInstrInfo(); 2474 if (MF.getTarget().Options.GuaranteedTailCallOpt && 2475 I->getOpcode() == PPC::ADJCALLSTACKUP) { 2476 // Add (actually subtract) back the amount the callee popped on return. 2477 if (int CalleeAmt = I->getOperand(1).getImm()) { 2478 bool is64Bit = Subtarget.isPPC64(); 2479 CalleeAmt *= -1; 2480 unsigned StackReg = is64Bit ? PPC::X1 : PPC::R1; 2481 unsigned TmpReg = is64Bit ? PPC::X0 : PPC::R0; 2482 unsigned ADDIInstr = is64Bit ? PPC::ADDI8 : PPC::ADDI; 2483 unsigned ADDInstr = is64Bit ? PPC::ADD8 : PPC::ADD4; 2484 unsigned LISInstr = is64Bit ? PPC::LIS8 : PPC::LIS; 2485 unsigned ORIInstr = is64Bit ? PPC::ORI8 : PPC::ORI; 2486 const DebugLoc &dl = I->getDebugLoc(); 2487 2488 if (isInt<16>(CalleeAmt)) { 2489 BuildMI(MBB, I, dl, TII.get(ADDIInstr), StackReg) 2490 .addReg(StackReg, RegState::Kill) 2491 .addImm(CalleeAmt); 2492 } else { 2493 MachineBasicBlock::iterator MBBI = I; 2494 BuildMI(MBB, MBBI, dl, TII.get(LISInstr), TmpReg) 2495 .addImm(CalleeAmt >> 16); 2496 BuildMI(MBB, MBBI, dl, TII.get(ORIInstr), TmpReg) 2497 .addReg(TmpReg, RegState::Kill) 2498 .addImm(CalleeAmt & 0xFFFF); 2499 BuildMI(MBB, MBBI, dl, TII.get(ADDInstr), StackReg) 2500 .addReg(StackReg, RegState::Kill) 2501 .addReg(TmpReg); 2502 } 2503 } 2504 } 2505 // Simply discard ADJCALLSTACKDOWN, ADJCALLSTACKUP instructions. 2506 return MBB.erase(I); 2507 } 2508 2509 static bool isCalleeSavedCR(unsigned Reg) { 2510 return PPC::CR2 == Reg || Reg == PPC::CR3 || Reg == PPC::CR4; 2511 } 2512 2513 bool PPCFrameLowering::restoreCalleeSavedRegisters( 2514 MachineBasicBlock &MBB, MachineBasicBlock::iterator MI, 2515 MutableArrayRef<CalleeSavedInfo> CSI, const TargetRegisterInfo *TRI) const { 2516 MachineFunction *MF = MBB.getParent(); 2517 const PPCInstrInfo &TII = *Subtarget.getInstrInfo(); 2518 PPCFunctionInfo *FI = MF->getInfo<PPCFunctionInfo>(); 2519 bool MustSaveTOC = FI->mustSaveTOC(); 2520 bool CR2Spilled = false; 2521 bool CR3Spilled = false; 2522 bool CR4Spilled = false; 2523 unsigned CSIIndex = 0; 2524 2525 // Initialize insertion-point logic; we will be restoring in reverse 2526 // order of spill. 2527 MachineBasicBlock::iterator I = MI, BeforeI = I; 2528 bool AtStart = I == MBB.begin(); 2529 2530 if (!AtStart) 2531 --BeforeI; 2532 2533 for (unsigned i = 0, e = CSI.size(); i != e; ++i) { 2534 unsigned Reg = CSI[i].getReg(); 2535 2536 // VRSAVE can appear here if, for example, @llvm.eh.unwind.init() is used. 2537 if (Reg == PPC::VRSAVE) 2538 continue; 2539 2540 if ((Reg == PPC::X2 || Reg == PPC::R2) && MustSaveTOC) 2541 continue; 2542 2543 // Restore of callee saved condition register field is handled during 2544 // epilogue insertion. 2545 if (isCalleeSavedCR(Reg) && !Subtarget.is32BitELFABI()) 2546 continue; 2547 2548 if (Reg == PPC::CR2) { 2549 CR2Spilled = true; 2550 // The spill slot is associated only with CR2, which is the 2551 // first nonvolatile spilled. Save it here. 2552 CSIIndex = i; 2553 continue; 2554 } else if (Reg == PPC::CR3) { 2555 CR3Spilled = true; 2556 continue; 2557 } else if (Reg == PPC::CR4) { 2558 CR4Spilled = true; 2559 continue; 2560 } else { 2561 // On 32-bit ELF when we first encounter a non-CR register after seeing at 2562 // least one CR register, restore all spilled CRs together. 2563 if (CR2Spilled || CR3Spilled || CR4Spilled) { 2564 bool is31 = needsFP(*MF); 2565 restoreCRs(is31, CR2Spilled, CR3Spilled, CR4Spilled, MBB, I, CSI, 2566 CSIIndex); 2567 CR2Spilled = CR3Spilled = CR4Spilled = false; 2568 } 2569 2570 if (CSI[i].isSpilledToReg()) { 2571 DebugLoc DL; 2572 NumPEReloadVSR++; 2573 BuildMI(MBB, I, DL, TII.get(PPC::MFVSRD), Reg) 2574 .addReg(CSI[i].getDstReg(), getKillRegState(true)); 2575 } else { 2576 // Default behavior for non-CR saves. 2577 const TargetRegisterClass *RC = TRI->getMinimalPhysRegClass(Reg); 2578 2579 // Functions without NoUnwind need to preserve the order of elements in 2580 // saved vector registers. 2581 if (Subtarget.needsSwapsForVSXMemOps() && 2582 !MF->getFunction().hasFnAttribute(Attribute::NoUnwind)) 2583 TII.loadRegFromStackSlotNoUpd(MBB, I, Reg, CSI[i].getFrameIdx(), RC, 2584 TRI); 2585 else 2586 TII.loadRegFromStackSlot(MBB, I, Reg, CSI[i].getFrameIdx(), RC, TRI); 2587 2588 assert(I != MBB.begin() && 2589 "loadRegFromStackSlot didn't insert any code!"); 2590 } 2591 } 2592 2593 // Insert in reverse order. 2594 if (AtStart) 2595 I = MBB.begin(); 2596 else { 2597 I = BeforeI; 2598 ++I; 2599 } 2600 } 2601 2602 // If we haven't yet spilled the CRs, do so now. 2603 if (CR2Spilled || CR3Spilled || CR4Spilled) { 2604 assert(Subtarget.is32BitELFABI() && 2605 "Only set CR[2|3|4]Spilled on 32-bit SVR4."); 2606 bool is31 = needsFP(*MF); 2607 restoreCRs(is31, CR2Spilled, CR3Spilled, CR4Spilled, MBB, I, CSI, CSIIndex); 2608 } 2609 2610 return true; 2611 } 2612 2613 unsigned PPCFrameLowering::getTOCSaveOffset() const { 2614 return TOCSaveOffset; 2615 } 2616 2617 unsigned PPCFrameLowering::getFramePointerSaveOffset() const { 2618 return FramePointerSaveOffset; 2619 } 2620 2621 unsigned PPCFrameLowering::getBasePointerSaveOffset() const { 2622 return BasePointerSaveOffset; 2623 } 2624 2625 bool PPCFrameLowering::enableShrinkWrapping(const MachineFunction &MF) const { 2626 if (MF.getInfo<PPCFunctionInfo>()->shrinkWrapDisabled()) 2627 return false; 2628 return (MF.getSubtarget<PPCSubtarget>().isSVR4ABI() && 2629 MF.getSubtarget<PPCSubtarget>().isPPC64()); 2630 } 2631