1 //===-- PPCFrameLowering.cpp - PPC Frame Information ----------------------===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 // 9 // This file contains the PPC implementation of TargetFrameLowering class. 10 // 11 //===----------------------------------------------------------------------===// 12 13 #include "MCTargetDesc/PPCPredicates.h" 14 #include "PPCFrameLowering.h" 15 #include "PPCInstrBuilder.h" 16 #include "PPCInstrInfo.h" 17 #include "PPCMachineFunctionInfo.h" 18 #include "PPCSubtarget.h" 19 #include "PPCTargetMachine.h" 20 #include "llvm/ADT/Statistic.h" 21 #include "llvm/CodeGen/MachineFrameInfo.h" 22 #include "llvm/CodeGen/MachineFunction.h" 23 #include "llvm/CodeGen/MachineInstrBuilder.h" 24 #include "llvm/CodeGen/MachineModuleInfo.h" 25 #include "llvm/CodeGen/MachineRegisterInfo.h" 26 #include "llvm/CodeGen/RegisterScavenging.h" 27 #include "llvm/IR/Function.h" 28 #include "llvm/Target/TargetOptions.h" 29 30 using namespace llvm; 31 32 #define DEBUG_TYPE "framelowering" 33 STATISTIC(NumPESpillVSR, "Number of spills to vector in prologue"); 34 STATISTIC(NumPEReloadVSR, "Number of reloads from vector in epilogue"); 35 STATISTIC(NumPrologProbed, "Number of prologues probed"); 36 37 static cl::opt<bool> 38 EnablePEVectorSpills("ppc-enable-pe-vector-spills", 39 cl::desc("Enable spills in prologue to vector registers."), 40 cl::init(false), cl::Hidden); 41 42 /// VRRegNo - Map from a numbered VR register to its enum value. 43 /// 44 static const MCPhysReg VRRegNo[] = { 45 PPC::V0 , PPC::V1 , PPC::V2 , PPC::V3 , PPC::V4 , PPC::V5 , PPC::V6 , PPC::V7 , 46 PPC::V8 , PPC::V9 , PPC::V10, PPC::V11, PPC::V12, PPC::V13, PPC::V14, PPC::V15, 47 PPC::V16, PPC::V17, PPC::V18, PPC::V19, PPC::V20, PPC::V21, PPC::V22, PPC::V23, 48 PPC::V24, PPC::V25, PPC::V26, PPC::V27, PPC::V28, PPC::V29, PPC::V30, PPC::V31 49 }; 50 51 static unsigned computeReturnSaveOffset(const PPCSubtarget &STI) { 52 if (STI.isAIXABI()) 53 return STI.isPPC64() ? 16 : 8; 54 // SVR4 ABI: 55 return STI.isPPC64() ? 16 : 4; 56 } 57 58 static unsigned computeTOCSaveOffset(const PPCSubtarget &STI) { 59 if (STI.isAIXABI()) 60 return STI.isPPC64() ? 40 : 20; 61 return STI.isELFv2ABI() ? 24 : 40; 62 } 63 64 static unsigned computeFramePointerSaveOffset(const PPCSubtarget &STI) { 65 // First slot in the general register save area. 66 return STI.isPPC64() ? -8U : -4U; 67 } 68 69 static unsigned computeLinkageSize(const PPCSubtarget &STI) { 70 if (STI.isAIXABI() || STI.isPPC64()) 71 return (STI.isELFv2ABI() ? 4 : 6) * (STI.isPPC64() ? 8 : 4); 72 73 // 32-bit SVR4 ABI: 74 return 8; 75 } 76 77 static unsigned computeBasePointerSaveOffset(const PPCSubtarget &STI) { 78 // Third slot in the general purpose register save area. 79 if (STI.is32BitELFABI() && STI.getTargetMachine().isPositionIndependent()) 80 return -12U; 81 82 // Second slot in the general purpose register save area. 83 return STI.isPPC64() ? -16U : -8U; 84 } 85 86 static unsigned computeCRSaveOffset(const PPCSubtarget &STI) { 87 return (STI.isAIXABI() && !STI.isPPC64()) ? 4 : 8; 88 } 89 90 PPCFrameLowering::PPCFrameLowering(const PPCSubtarget &STI) 91 : TargetFrameLowering(TargetFrameLowering::StackGrowsDown, 92 STI.getPlatformStackAlignment(), 0), 93 Subtarget(STI), ReturnSaveOffset(computeReturnSaveOffset(Subtarget)), 94 TOCSaveOffset(computeTOCSaveOffset(Subtarget)), 95 FramePointerSaveOffset(computeFramePointerSaveOffset(Subtarget)), 96 LinkageSize(computeLinkageSize(Subtarget)), 97 BasePointerSaveOffset(computeBasePointerSaveOffset(Subtarget)), 98 CRSaveOffset(computeCRSaveOffset(Subtarget)) {} 99 100 // With the SVR4 ABI, callee-saved registers have fixed offsets on the stack. 101 const PPCFrameLowering::SpillSlot *PPCFrameLowering::getCalleeSavedSpillSlots( 102 unsigned &NumEntries) const { 103 104 // Floating-point register save area offsets. 105 #define CALLEE_SAVED_FPRS \ 106 {PPC::F31, -8}, \ 107 {PPC::F30, -16}, \ 108 {PPC::F29, -24}, \ 109 {PPC::F28, -32}, \ 110 {PPC::F27, -40}, \ 111 {PPC::F26, -48}, \ 112 {PPC::F25, -56}, \ 113 {PPC::F24, -64}, \ 114 {PPC::F23, -72}, \ 115 {PPC::F22, -80}, \ 116 {PPC::F21, -88}, \ 117 {PPC::F20, -96}, \ 118 {PPC::F19, -104}, \ 119 {PPC::F18, -112}, \ 120 {PPC::F17, -120}, \ 121 {PPC::F16, -128}, \ 122 {PPC::F15, -136}, \ 123 {PPC::F14, -144} 124 125 // 32-bit general purpose register save area offsets shared by ELF and 126 // AIX. AIX has an extra CSR with r13. 127 #define CALLEE_SAVED_GPRS32 \ 128 {PPC::R31, -4}, \ 129 {PPC::R30, -8}, \ 130 {PPC::R29, -12}, \ 131 {PPC::R28, -16}, \ 132 {PPC::R27, -20}, \ 133 {PPC::R26, -24}, \ 134 {PPC::R25, -28}, \ 135 {PPC::R24, -32}, \ 136 {PPC::R23, -36}, \ 137 {PPC::R22, -40}, \ 138 {PPC::R21, -44}, \ 139 {PPC::R20, -48}, \ 140 {PPC::R19, -52}, \ 141 {PPC::R18, -56}, \ 142 {PPC::R17, -60}, \ 143 {PPC::R16, -64}, \ 144 {PPC::R15, -68}, \ 145 {PPC::R14, -72} 146 147 // 64-bit general purpose register save area offsets. 148 #define CALLEE_SAVED_GPRS64 \ 149 {PPC::X31, -8}, \ 150 {PPC::X30, -16}, \ 151 {PPC::X29, -24}, \ 152 {PPC::X28, -32}, \ 153 {PPC::X27, -40}, \ 154 {PPC::X26, -48}, \ 155 {PPC::X25, -56}, \ 156 {PPC::X24, -64}, \ 157 {PPC::X23, -72}, \ 158 {PPC::X22, -80}, \ 159 {PPC::X21, -88}, \ 160 {PPC::X20, -96}, \ 161 {PPC::X19, -104}, \ 162 {PPC::X18, -112}, \ 163 {PPC::X17, -120}, \ 164 {PPC::X16, -128}, \ 165 {PPC::X15, -136}, \ 166 {PPC::X14, -144} 167 168 // Vector register save area offsets. 169 #define CALLEE_SAVED_VRS \ 170 {PPC::V31, -16}, \ 171 {PPC::V30, -32}, \ 172 {PPC::V29, -48}, \ 173 {PPC::V28, -64}, \ 174 {PPC::V27, -80}, \ 175 {PPC::V26, -96}, \ 176 {PPC::V25, -112}, \ 177 {PPC::V24, -128}, \ 178 {PPC::V23, -144}, \ 179 {PPC::V22, -160}, \ 180 {PPC::V21, -176}, \ 181 {PPC::V20, -192} 182 183 // Note that the offsets here overlap, but this is fixed up in 184 // processFunctionBeforeFrameFinalized. 185 186 static const SpillSlot ELFOffsets32[] = { 187 CALLEE_SAVED_FPRS, 188 CALLEE_SAVED_GPRS32, 189 190 // CR save area offset. We map each of the nonvolatile CR fields 191 // to the slot for CR2, which is the first of the nonvolatile CR 192 // fields to be assigned, so that we only allocate one save slot. 193 // See PPCRegisterInfo::hasReservedSpillSlot() for more information. 194 {PPC::CR2, -4}, 195 196 // VRSAVE save area offset. 197 {PPC::VRSAVE, -4}, 198 199 CALLEE_SAVED_VRS, 200 201 // SPE register save area (overlaps Vector save area). 202 {PPC::S31, -8}, 203 {PPC::S30, -16}, 204 {PPC::S29, -24}, 205 {PPC::S28, -32}, 206 {PPC::S27, -40}, 207 {PPC::S26, -48}, 208 {PPC::S25, -56}, 209 {PPC::S24, -64}, 210 {PPC::S23, -72}, 211 {PPC::S22, -80}, 212 {PPC::S21, -88}, 213 {PPC::S20, -96}, 214 {PPC::S19, -104}, 215 {PPC::S18, -112}, 216 {PPC::S17, -120}, 217 {PPC::S16, -128}, 218 {PPC::S15, -136}, 219 {PPC::S14, -144}}; 220 221 static const SpillSlot ELFOffsets64[] = { 222 CALLEE_SAVED_FPRS, 223 CALLEE_SAVED_GPRS64, 224 225 // VRSAVE save area offset. 226 {PPC::VRSAVE, -4}, 227 CALLEE_SAVED_VRS 228 }; 229 230 static const SpillSlot AIXOffsets32[] = { 231 CALLEE_SAVED_FPRS, 232 CALLEE_SAVED_GPRS32, 233 // Add AIX's extra CSR. 234 {PPC::R13, -76}, 235 // TODO: Update when we add vector support for AIX. 236 }; 237 238 static const SpillSlot AIXOffsets64[] = { 239 CALLEE_SAVED_FPRS, 240 CALLEE_SAVED_GPRS64, 241 // TODO: Update when we add vector support for AIX. 242 }; 243 244 if (Subtarget.is64BitELFABI()) { 245 NumEntries = array_lengthof(ELFOffsets64); 246 return ELFOffsets64; 247 } 248 249 if (Subtarget.is32BitELFABI()) { 250 NumEntries = array_lengthof(ELFOffsets32); 251 return ELFOffsets32; 252 } 253 254 assert(Subtarget.isAIXABI() && "Unexpected ABI."); 255 256 if (Subtarget.isPPC64()) { 257 NumEntries = array_lengthof(AIXOffsets64); 258 return AIXOffsets64; 259 } 260 261 NumEntries = array_lengthof(AIXOffsets32); 262 return AIXOffsets32; 263 } 264 265 static bool spillsCR(const MachineFunction &MF) { 266 const PPCFunctionInfo *FuncInfo = MF.getInfo<PPCFunctionInfo>(); 267 return FuncInfo->isCRSpilled(); 268 } 269 270 static bool hasSpills(const MachineFunction &MF) { 271 const PPCFunctionInfo *FuncInfo = MF.getInfo<PPCFunctionInfo>(); 272 return FuncInfo->hasSpills(); 273 } 274 275 static bool hasNonRISpills(const MachineFunction &MF) { 276 const PPCFunctionInfo *FuncInfo = MF.getInfo<PPCFunctionInfo>(); 277 return FuncInfo->hasNonRISpills(); 278 } 279 280 /// MustSaveLR - Return true if this function requires that we save the LR 281 /// register onto the stack in the prolog and restore it in the epilog of the 282 /// function. 283 static bool MustSaveLR(const MachineFunction &MF, unsigned LR) { 284 const PPCFunctionInfo *MFI = MF.getInfo<PPCFunctionInfo>(); 285 286 // We need a save/restore of LR if there is any def of LR (which is 287 // defined by calls, including the PIC setup sequence), or if there is 288 // some use of the LR stack slot (e.g. for builtin_return_address). 289 // (LR comes in 32 and 64 bit versions.) 290 MachineRegisterInfo::def_iterator RI = MF.getRegInfo().def_begin(LR); 291 return RI !=MF.getRegInfo().def_end() || MFI->isLRStoreRequired(); 292 } 293 294 /// determineFrameLayoutAndUpdate - Determine the size of the frame and maximum 295 /// call frame size. Update the MachineFunction object with the stack size. 296 unsigned 297 PPCFrameLowering::determineFrameLayoutAndUpdate(MachineFunction &MF, 298 bool UseEstimate) const { 299 unsigned NewMaxCallFrameSize = 0; 300 unsigned FrameSize = determineFrameLayout(MF, UseEstimate, 301 &NewMaxCallFrameSize); 302 MF.getFrameInfo().setStackSize(FrameSize); 303 MF.getFrameInfo().setMaxCallFrameSize(NewMaxCallFrameSize); 304 return FrameSize; 305 } 306 307 /// determineFrameLayout - Determine the size of the frame and maximum call 308 /// frame size. 309 unsigned 310 PPCFrameLowering::determineFrameLayout(const MachineFunction &MF, 311 bool UseEstimate, 312 unsigned *NewMaxCallFrameSize) const { 313 const MachineFrameInfo &MFI = MF.getFrameInfo(); 314 const PPCFunctionInfo *FI = MF.getInfo<PPCFunctionInfo>(); 315 316 // Get the number of bytes to allocate from the FrameInfo 317 unsigned FrameSize = 318 UseEstimate ? MFI.estimateStackSize(MF) : MFI.getStackSize(); 319 320 // Get stack alignments. The frame must be aligned to the greatest of these: 321 Align TargetAlign = getStackAlign(); // alignment required per the ABI 322 Align MaxAlign = MFI.getMaxAlign(); // algmt required by data in frame 323 Align Alignment = std::max(TargetAlign, MaxAlign); 324 325 const PPCRegisterInfo *RegInfo = Subtarget.getRegisterInfo(); 326 327 unsigned LR = RegInfo->getRARegister(); 328 bool DisableRedZone = MF.getFunction().hasFnAttribute(Attribute::NoRedZone); 329 bool CanUseRedZone = !MFI.hasVarSizedObjects() && // No dynamic alloca. 330 !MFI.adjustsStack() && // No calls. 331 !MustSaveLR(MF, LR) && // No need to save LR. 332 !FI->mustSaveTOC() && // No need to save TOC. 333 !RegInfo->hasBasePointer(MF); // No special alignment. 334 335 // Note: for PPC32 SVR4ABI, we can still generate stackless 336 // code if all local vars are reg-allocated. 337 bool FitsInRedZone = FrameSize <= Subtarget.getRedZoneSize(); 338 339 // Check whether we can skip adjusting the stack pointer (by using red zone) 340 if (!DisableRedZone && CanUseRedZone && FitsInRedZone) { 341 // No need for frame 342 return 0; 343 } 344 345 // Get the maximum call frame size of all the calls. 346 unsigned maxCallFrameSize = MFI.getMaxCallFrameSize(); 347 348 // Maximum call frame needs to be at least big enough for linkage area. 349 unsigned minCallFrameSize = getLinkageSize(); 350 maxCallFrameSize = std::max(maxCallFrameSize, minCallFrameSize); 351 352 // If we have dynamic alloca then maxCallFrameSize needs to be aligned so 353 // that allocations will be aligned. 354 if (MFI.hasVarSizedObjects()) 355 maxCallFrameSize = alignTo(maxCallFrameSize, Alignment); 356 357 // Update the new max call frame size if the caller passes in a valid pointer. 358 if (NewMaxCallFrameSize) 359 *NewMaxCallFrameSize = maxCallFrameSize; 360 361 // Include call frame size in total. 362 FrameSize += maxCallFrameSize; 363 364 // Make sure the frame is aligned. 365 FrameSize = alignTo(FrameSize, Alignment); 366 367 return FrameSize; 368 } 369 370 // hasFP - Return true if the specified function actually has a dedicated frame 371 // pointer register. 372 bool PPCFrameLowering::hasFP(const MachineFunction &MF) const { 373 const MachineFrameInfo &MFI = MF.getFrameInfo(); 374 // FIXME: This is pretty much broken by design: hasFP() might be called really 375 // early, before the stack layout was calculated and thus hasFP() might return 376 // true or false here depending on the time of call. 377 return (MFI.getStackSize()) && needsFP(MF); 378 } 379 380 // needsFP - Return true if the specified function should have a dedicated frame 381 // pointer register. This is true if the function has variable sized allocas or 382 // if frame pointer elimination is disabled. 383 bool PPCFrameLowering::needsFP(const MachineFunction &MF) const { 384 const MachineFrameInfo &MFI = MF.getFrameInfo(); 385 386 // Naked functions have no stack frame pushed, so we don't have a frame 387 // pointer. 388 if (MF.getFunction().hasFnAttribute(Attribute::Naked)) 389 return false; 390 391 return MF.getTarget().Options.DisableFramePointerElim(MF) || 392 MFI.hasVarSizedObjects() || MFI.hasStackMap() || MFI.hasPatchPoint() || 393 (MF.getTarget().Options.GuaranteedTailCallOpt && 394 MF.getInfo<PPCFunctionInfo>()->hasFastCall()); 395 } 396 397 void PPCFrameLowering::replaceFPWithRealFP(MachineFunction &MF) const { 398 bool is31 = needsFP(MF); 399 unsigned FPReg = is31 ? PPC::R31 : PPC::R1; 400 unsigned FP8Reg = is31 ? PPC::X31 : PPC::X1; 401 402 const PPCRegisterInfo *RegInfo = Subtarget.getRegisterInfo(); 403 bool HasBP = RegInfo->hasBasePointer(MF); 404 unsigned BPReg = HasBP ? (unsigned) RegInfo->getBaseRegister(MF) : FPReg; 405 unsigned BP8Reg = HasBP ? (unsigned) PPC::X30 : FP8Reg; 406 407 for (MachineFunction::iterator BI = MF.begin(), BE = MF.end(); 408 BI != BE; ++BI) 409 for (MachineBasicBlock::iterator MBBI = BI->end(); MBBI != BI->begin(); ) { 410 --MBBI; 411 for (unsigned I = 0, E = MBBI->getNumOperands(); I != E; ++I) { 412 MachineOperand &MO = MBBI->getOperand(I); 413 if (!MO.isReg()) 414 continue; 415 416 switch (MO.getReg()) { 417 case PPC::FP: 418 MO.setReg(FPReg); 419 break; 420 case PPC::FP8: 421 MO.setReg(FP8Reg); 422 break; 423 case PPC::BP: 424 MO.setReg(BPReg); 425 break; 426 case PPC::BP8: 427 MO.setReg(BP8Reg); 428 break; 429 430 } 431 } 432 } 433 } 434 435 /* This function will do the following: 436 - If MBB is an entry or exit block, set SR1 and SR2 to R0 and R12 437 respectively (defaults recommended by the ABI) and return true 438 - If MBB is not an entry block, initialize the register scavenger and look 439 for available registers. 440 - If the defaults (R0/R12) are available, return true 441 - If TwoUniqueRegsRequired is set to true, it looks for two unique 442 registers. Otherwise, look for a single available register. 443 - If the required registers are found, set SR1 and SR2 and return true. 444 - If the required registers are not found, set SR2 or both SR1 and SR2 to 445 PPC::NoRegister and return false. 446 447 Note that if both SR1 and SR2 are valid parameters and TwoUniqueRegsRequired 448 is not set, this function will attempt to find two different registers, but 449 still return true if only one register is available (and set SR1 == SR2). 450 */ 451 bool 452 PPCFrameLowering::findScratchRegister(MachineBasicBlock *MBB, 453 bool UseAtEnd, 454 bool TwoUniqueRegsRequired, 455 Register *SR1, 456 Register *SR2) const { 457 RegScavenger RS; 458 Register R0 = Subtarget.isPPC64() ? PPC::X0 : PPC::R0; 459 Register R12 = Subtarget.isPPC64() ? PPC::X12 : PPC::R12; 460 461 // Set the defaults for the two scratch registers. 462 if (SR1) 463 *SR1 = R0; 464 465 if (SR2) { 466 assert (SR1 && "Asking for the second scratch register but not the first?"); 467 *SR2 = R12; 468 } 469 470 // If MBB is an entry or exit block, use R0 and R12 as the scratch registers. 471 if ((UseAtEnd && MBB->isReturnBlock()) || 472 (!UseAtEnd && (&MBB->getParent()->front() == MBB))) 473 return true; 474 475 RS.enterBasicBlock(*MBB); 476 477 if (UseAtEnd && !MBB->empty()) { 478 // The scratch register will be used at the end of the block, so must 479 // consider all registers used within the block 480 481 MachineBasicBlock::iterator MBBI = MBB->getFirstTerminator(); 482 // If no terminator, back iterator up to previous instruction. 483 if (MBBI == MBB->end()) 484 MBBI = std::prev(MBBI); 485 486 if (MBBI != MBB->begin()) 487 RS.forward(MBBI); 488 } 489 490 // If the two registers are available, we're all good. 491 // Note that we only return here if both R0 and R12 are available because 492 // although the function may not require two unique registers, it may benefit 493 // from having two so we should try to provide them. 494 if (!RS.isRegUsed(R0) && !RS.isRegUsed(R12)) 495 return true; 496 497 // Get the list of callee-saved registers for the target. 498 const PPCRegisterInfo *RegInfo = Subtarget.getRegisterInfo(); 499 const MCPhysReg *CSRegs = RegInfo->getCalleeSavedRegs(MBB->getParent()); 500 501 // Get all the available registers in the block. 502 BitVector BV = RS.getRegsAvailable(Subtarget.isPPC64() ? &PPC::G8RCRegClass : 503 &PPC::GPRCRegClass); 504 505 // We shouldn't use callee-saved registers as scratch registers as they may be 506 // available when looking for a candidate block for shrink wrapping but not 507 // available when the actual prologue/epilogue is being emitted because they 508 // were added as live-in to the prologue block by PrologueEpilogueInserter. 509 for (int i = 0; CSRegs[i]; ++i) 510 BV.reset(CSRegs[i]); 511 512 // Set the first scratch register to the first available one. 513 if (SR1) { 514 int FirstScratchReg = BV.find_first(); 515 *SR1 = FirstScratchReg == -1 ? (unsigned)PPC::NoRegister : FirstScratchReg; 516 } 517 518 // If there is another one available, set the second scratch register to that. 519 // Otherwise, set it to either PPC::NoRegister if this function requires two 520 // or to whatever SR1 is set to if this function doesn't require two. 521 if (SR2) { 522 int SecondScratchReg = BV.find_next(*SR1); 523 if (SecondScratchReg != -1) 524 *SR2 = SecondScratchReg; 525 else 526 *SR2 = TwoUniqueRegsRequired ? Register() : *SR1; 527 } 528 529 // Now that we've done our best to provide both registers, double check 530 // whether we were unable to provide enough. 531 if (BV.count() < (TwoUniqueRegsRequired ? 2U : 1U)) 532 return false; 533 534 return true; 535 } 536 537 // We need a scratch register for spilling LR and for spilling CR. By default, 538 // we use two scratch registers to hide latency. However, if only one scratch 539 // register is available, we can adjust for that by not overlapping the spill 540 // code. However, if we need to realign the stack (i.e. have a base pointer) 541 // and the stack frame is large, we need two scratch registers. 542 bool 543 PPCFrameLowering::twoUniqueScratchRegsRequired(MachineBasicBlock *MBB) const { 544 const PPCRegisterInfo *RegInfo = Subtarget.getRegisterInfo(); 545 MachineFunction &MF = *(MBB->getParent()); 546 bool HasBP = RegInfo->hasBasePointer(MF); 547 unsigned FrameSize = determineFrameLayout(MF); 548 int NegFrameSize = -FrameSize; 549 bool IsLargeFrame = !isInt<16>(NegFrameSize); 550 MachineFrameInfo &MFI = MF.getFrameInfo(); 551 Align MaxAlign = MFI.getMaxAlign(); 552 bool HasRedZone = Subtarget.isPPC64() || !Subtarget.isSVR4ABI(); 553 554 return (IsLargeFrame || !HasRedZone) && HasBP && MaxAlign > 1; 555 } 556 557 bool PPCFrameLowering::canUseAsPrologue(const MachineBasicBlock &MBB) const { 558 MachineBasicBlock *TmpMBB = const_cast<MachineBasicBlock *>(&MBB); 559 560 return findScratchRegister(TmpMBB, false, 561 twoUniqueScratchRegsRequired(TmpMBB)); 562 } 563 564 bool PPCFrameLowering::canUseAsEpilogue(const MachineBasicBlock &MBB) const { 565 MachineBasicBlock *TmpMBB = const_cast<MachineBasicBlock *>(&MBB); 566 567 return findScratchRegister(TmpMBB, true); 568 } 569 570 bool PPCFrameLowering::stackUpdateCanBeMoved(MachineFunction &MF) const { 571 const PPCRegisterInfo *RegInfo = Subtarget.getRegisterInfo(); 572 PPCFunctionInfo *FI = MF.getInfo<PPCFunctionInfo>(); 573 574 // Abort if there is no register info or function info. 575 if (!RegInfo || !FI) 576 return false; 577 578 // Only move the stack update on ELFv2 ABI and PPC64. 579 if (!Subtarget.isELFv2ABI() || !Subtarget.isPPC64()) 580 return false; 581 582 // Check the frame size first and return false if it does not fit the 583 // requirements. 584 // We need a non-zero frame size as well as a frame that will fit in the red 585 // zone. This is because by moving the stack pointer update we are now storing 586 // to the red zone until the stack pointer is updated. If we get an interrupt 587 // inside the prologue but before the stack update we now have a number of 588 // stores to the red zone and those stores must all fit. 589 MachineFrameInfo &MFI = MF.getFrameInfo(); 590 unsigned FrameSize = MFI.getStackSize(); 591 if (!FrameSize || FrameSize > Subtarget.getRedZoneSize()) 592 return false; 593 594 // Frame pointers and base pointers complicate matters so don't do anything 595 // if we have them. For example having a frame pointer will sometimes require 596 // a copy of r1 into r31 and that makes keeping track of updates to r1 more 597 // difficult. 598 if (hasFP(MF) || RegInfo->hasBasePointer(MF)) 599 return false; 600 601 // Calls to fast_cc functions use different rules for passing parameters on 602 // the stack from the ABI and using PIC base in the function imposes 603 // similar restrictions to using the base pointer. It is not generally safe 604 // to move the stack pointer update in these situations. 605 if (FI->hasFastCall() || FI->usesPICBase()) 606 return false; 607 608 // Finally we can move the stack update if we do not require register 609 // scavenging. Register scavenging can introduce more spills and so 610 // may make the frame size larger than we have computed. 611 return !RegInfo->requiresFrameIndexScavenging(MF); 612 } 613 614 void PPCFrameLowering::emitPrologue(MachineFunction &MF, 615 MachineBasicBlock &MBB) const { 616 MachineBasicBlock::iterator MBBI = MBB.begin(); 617 MachineFrameInfo &MFI = MF.getFrameInfo(); 618 const PPCInstrInfo &TII = *Subtarget.getInstrInfo(); 619 const PPCRegisterInfo *RegInfo = Subtarget.getRegisterInfo(); 620 const PPCTargetLowering &TLI = *Subtarget.getTargetLowering(); 621 622 MachineModuleInfo &MMI = MF.getMMI(); 623 const MCRegisterInfo *MRI = MMI.getContext().getRegisterInfo(); 624 DebugLoc dl; 625 // AIX assembler does not support cfi directives. 626 const bool needsCFI = MF.needsFrameMoves() && !Subtarget.isAIXABI(); 627 628 // Get processor type. 629 bool isPPC64 = Subtarget.isPPC64(); 630 // Get the ABI. 631 bool isSVR4ABI = Subtarget.isSVR4ABI(); 632 bool isAIXABI = Subtarget.isAIXABI(); 633 bool isELFv2ABI = Subtarget.isELFv2ABI(); 634 assert((isSVR4ABI || isAIXABI) && "Unsupported PPC ABI."); 635 636 // Work out frame sizes. 637 unsigned FrameSize = determineFrameLayoutAndUpdate(MF); 638 int NegFrameSize = -FrameSize; 639 if (!isInt<32>(NegFrameSize)) 640 llvm_unreachable("Unhandled stack size!"); 641 642 if (MFI.isFrameAddressTaken()) 643 replaceFPWithRealFP(MF); 644 645 // Check if the link register (LR) must be saved. 646 PPCFunctionInfo *FI = MF.getInfo<PPCFunctionInfo>(); 647 bool MustSaveLR = FI->mustSaveLR(); 648 bool MustSaveTOC = FI->mustSaveTOC(); 649 const SmallVectorImpl<Register> &MustSaveCRs = FI->getMustSaveCRs(); 650 bool MustSaveCR = !MustSaveCRs.empty(); 651 // Do we have a frame pointer and/or base pointer for this function? 652 bool HasFP = hasFP(MF); 653 bool HasBP = RegInfo->hasBasePointer(MF); 654 bool HasRedZone = isPPC64 || !isSVR4ABI; 655 656 Register SPReg = isPPC64 ? PPC::X1 : PPC::R1; 657 Register BPReg = RegInfo->getBaseRegister(MF); 658 Register FPReg = isPPC64 ? PPC::X31 : PPC::R31; 659 Register LRReg = isPPC64 ? PPC::LR8 : PPC::LR; 660 Register TOCReg = isPPC64 ? PPC::X2 : PPC::R2; 661 Register ScratchReg; 662 Register TempReg = isPPC64 ? PPC::X12 : PPC::R12; // another scratch reg 663 // ...(R12/X12 is volatile in both Darwin & SVR4, & can't be a function arg.) 664 const MCInstrDesc& MFLRInst = TII.get(isPPC64 ? PPC::MFLR8 665 : PPC::MFLR ); 666 const MCInstrDesc& StoreInst = TII.get(isPPC64 ? PPC::STD 667 : PPC::STW ); 668 const MCInstrDesc& StoreUpdtInst = TII.get(isPPC64 ? PPC::STDU 669 : PPC::STWU ); 670 const MCInstrDesc& StoreUpdtIdxInst = TII.get(isPPC64 ? PPC::STDUX 671 : PPC::STWUX); 672 const MCInstrDesc& LoadImmShiftedInst = TII.get(isPPC64 ? PPC::LIS8 673 : PPC::LIS ); 674 const MCInstrDesc& OrImmInst = TII.get(isPPC64 ? PPC::ORI8 675 : PPC::ORI ); 676 const MCInstrDesc& OrInst = TII.get(isPPC64 ? PPC::OR8 677 : PPC::OR ); 678 const MCInstrDesc& SubtractCarryingInst = TII.get(isPPC64 ? PPC::SUBFC8 679 : PPC::SUBFC); 680 const MCInstrDesc& SubtractImmCarryingInst = TII.get(isPPC64 ? PPC::SUBFIC8 681 : PPC::SUBFIC); 682 const MCInstrDesc &MoveFromCondRegInst = TII.get(isPPC64 ? PPC::MFCR8 683 : PPC::MFCR); 684 const MCInstrDesc &StoreWordInst = TII.get(isPPC64 ? PPC::STW8 : PPC::STW); 685 686 // Regarding this assert: Even though LR is saved in the caller's frame (i.e., 687 // LROffset is positive), that slot is callee-owned. Because PPC32 SVR4 has no 688 // Red Zone, an asynchronous event (a form of "callee") could claim a frame & 689 // overwrite it, so PPC32 SVR4 must claim at least a minimal frame to save LR. 690 assert((isPPC64 || !isSVR4ABI || !(!FrameSize && (MustSaveLR || HasFP))) && 691 "FrameSize must be >0 to save/restore the FP or LR for 32-bit SVR4."); 692 693 // Using the same bool variable as below to suppress compiler warnings. 694 // Stack probe requires two scratch registers, one for old sp, one for large 695 // frame and large probe size. 696 bool SingleScratchReg = findScratchRegister( 697 &MBB, false, 698 twoUniqueScratchRegsRequired(&MBB) || TLI.hasInlineStackProbe(MF), 699 &ScratchReg, &TempReg); 700 assert(SingleScratchReg && 701 "Required number of registers not available in this block"); 702 703 SingleScratchReg = ScratchReg == TempReg; 704 705 int LROffset = getReturnSaveOffset(); 706 707 int FPOffset = 0; 708 if (HasFP) { 709 MachineFrameInfo &MFI = MF.getFrameInfo(); 710 int FPIndex = FI->getFramePointerSaveIndex(); 711 assert(FPIndex && "No Frame Pointer Save Slot!"); 712 FPOffset = MFI.getObjectOffset(FPIndex); 713 } 714 715 int BPOffset = 0; 716 if (HasBP) { 717 MachineFrameInfo &MFI = MF.getFrameInfo(); 718 int BPIndex = FI->getBasePointerSaveIndex(); 719 assert(BPIndex && "No Base Pointer Save Slot!"); 720 BPOffset = MFI.getObjectOffset(BPIndex); 721 } 722 723 int PBPOffset = 0; 724 if (FI->usesPICBase()) { 725 MachineFrameInfo &MFI = MF.getFrameInfo(); 726 int PBPIndex = FI->getPICBasePointerSaveIndex(); 727 assert(PBPIndex && "No PIC Base Pointer Save Slot!"); 728 PBPOffset = MFI.getObjectOffset(PBPIndex); 729 } 730 731 // Get stack alignments. 732 Align MaxAlign = MFI.getMaxAlign(); 733 if (HasBP && MaxAlign > 1) 734 assert(Log2(MaxAlign) < 16 && "Invalid alignment!"); 735 736 // Frames of 32KB & larger require special handling because they cannot be 737 // indexed into with a simple STDU/STWU/STD/STW immediate offset operand. 738 bool isLargeFrame = !isInt<16>(NegFrameSize); 739 740 // Check if we can move the stack update instruction (stdu) down the prologue 741 // past the callee saves. Hopefully this will avoid the situation where the 742 // saves are waiting for the update on the store with update to complete. 743 MachineBasicBlock::iterator StackUpdateLoc = MBBI; 744 bool MovingStackUpdateDown = false; 745 746 // Check if we can move the stack update. 747 if (stackUpdateCanBeMoved(MF)) { 748 const std::vector<CalleeSavedInfo> &Info = MFI.getCalleeSavedInfo(); 749 for (CalleeSavedInfo CSI : Info) { 750 int FrIdx = CSI.getFrameIdx(); 751 // If the frame index is not negative the callee saved info belongs to a 752 // stack object that is not a fixed stack object. We ignore non-fixed 753 // stack objects because we won't move the stack update pointer past them. 754 if (FrIdx >= 0) 755 continue; 756 757 if (MFI.isFixedObjectIndex(FrIdx) && MFI.getObjectOffset(FrIdx) < 0) { 758 StackUpdateLoc++; 759 MovingStackUpdateDown = true; 760 } else { 761 // We need all of the Frame Indices to meet these conditions. 762 // If they do not, abort the whole operation. 763 StackUpdateLoc = MBBI; 764 MovingStackUpdateDown = false; 765 break; 766 } 767 } 768 769 // If the operation was not aborted then update the object offset. 770 if (MovingStackUpdateDown) { 771 for (CalleeSavedInfo CSI : Info) { 772 int FrIdx = CSI.getFrameIdx(); 773 if (FrIdx < 0) 774 MFI.setObjectOffset(FrIdx, MFI.getObjectOffset(FrIdx) + NegFrameSize); 775 } 776 } 777 } 778 779 // Where in the prologue we move the CR fields depends on how many scratch 780 // registers we have, and if we need to save the link register or not. This 781 // lambda is to avoid duplicating the logic in 2 places. 782 auto BuildMoveFromCR = [&]() { 783 if (isELFv2ABI && MustSaveCRs.size() == 1) { 784 // In the ELFv2 ABI, we are not required to save all CR fields. 785 // If only one CR field is clobbered, it is more efficient to use 786 // mfocrf to selectively save just that field, because mfocrf has short 787 // latency compares to mfcr. 788 assert(isPPC64 && "V2 ABI is 64-bit only."); 789 MachineInstrBuilder MIB = 790 BuildMI(MBB, MBBI, dl, TII.get(PPC::MFOCRF8), TempReg); 791 MIB.addReg(MustSaveCRs[0], RegState::Kill); 792 } else { 793 MachineInstrBuilder MIB = 794 BuildMI(MBB, MBBI, dl, MoveFromCondRegInst, TempReg); 795 for (unsigned CRfield : MustSaveCRs) 796 MIB.addReg(CRfield, RegState::ImplicitKill); 797 } 798 }; 799 800 // If we need to spill the CR and the LR but we don't have two separate 801 // registers available, we must spill them one at a time 802 if (MustSaveCR && SingleScratchReg && MustSaveLR) { 803 BuildMoveFromCR(); 804 BuildMI(MBB, MBBI, dl, StoreWordInst) 805 .addReg(TempReg, getKillRegState(true)) 806 .addImm(CRSaveOffset) 807 .addReg(SPReg); 808 } 809 810 if (MustSaveLR) 811 BuildMI(MBB, MBBI, dl, MFLRInst, ScratchReg); 812 813 if (MustSaveCR && !(SingleScratchReg && MustSaveLR)) 814 BuildMoveFromCR(); 815 816 if (HasRedZone) { 817 if (HasFP) 818 BuildMI(MBB, MBBI, dl, StoreInst) 819 .addReg(FPReg) 820 .addImm(FPOffset) 821 .addReg(SPReg); 822 if (FI->usesPICBase()) 823 BuildMI(MBB, MBBI, dl, StoreInst) 824 .addReg(PPC::R30) 825 .addImm(PBPOffset) 826 .addReg(SPReg); 827 if (HasBP) 828 BuildMI(MBB, MBBI, dl, StoreInst) 829 .addReg(BPReg) 830 .addImm(BPOffset) 831 .addReg(SPReg); 832 } 833 834 if (MustSaveLR) 835 BuildMI(MBB, StackUpdateLoc, dl, StoreInst) 836 .addReg(ScratchReg, getKillRegState(true)) 837 .addImm(LROffset) 838 .addReg(SPReg); 839 840 if (MustSaveCR && 841 !(SingleScratchReg && MustSaveLR)) { 842 assert(HasRedZone && "A red zone is always available on PPC64"); 843 BuildMI(MBB, MBBI, dl, StoreWordInst) 844 .addReg(TempReg, getKillRegState(true)) 845 .addImm(CRSaveOffset) 846 .addReg(SPReg); 847 } 848 849 // Skip the rest if this is a leaf function & all spills fit in the Red Zone. 850 if (!FrameSize) 851 return; 852 853 // Adjust stack pointer: r1 += NegFrameSize. 854 // If there is a preferred stack alignment, align R1 now 855 856 if (HasBP && HasRedZone) { 857 // Save a copy of r1 as the base pointer. 858 BuildMI(MBB, MBBI, dl, OrInst, BPReg) 859 .addReg(SPReg) 860 .addReg(SPReg); 861 } 862 863 // Have we generated a STUX instruction to claim stack frame? If so, 864 // the negated frame size will be placed in ScratchReg. 865 bool HasSTUX = false; 866 867 // If FrameSize <= TLI.getStackProbeSize(MF), as POWER ABI requires backchain 868 // pointer is always stored at SP, we will get a free probe due to an essential 869 // STU(X) instruction. 870 if (TLI.hasInlineStackProbe(MF) && FrameSize > TLI.getStackProbeSize(MF)) { 871 // To be consistent with other targets, a pseudo instruction is emitted and 872 // will be later expanded in `inlineStackProbe`. 873 BuildMI(MBB, MBBI, dl, 874 TII.get(isPPC64 ? PPC::PROBED_STACKALLOC_64 875 : PPC::PROBED_STACKALLOC_32)) 876 .addDef(ScratchReg) 877 .addDef(TempReg) // TempReg stores the old sp. 878 .addImm(NegFrameSize); 879 // FIXME: HasSTUX is only read if HasRedZone is not set, in such case, we 880 // update the ScratchReg to meet the assumption that ScratchReg contains 881 // the NegFrameSize. This solution is rather tricky. 882 if (!HasRedZone) { 883 BuildMI(MBB, MBBI, dl, TII.get(PPC::SUBF), ScratchReg) 884 .addReg(TempReg) 885 .addReg(SPReg); 886 HasSTUX = true; 887 } 888 } else { 889 // This condition must be kept in sync with canUseAsPrologue. 890 if (HasBP && MaxAlign > 1) { 891 if (isPPC64) 892 BuildMI(MBB, MBBI, dl, TII.get(PPC::RLDICL), ScratchReg) 893 .addReg(SPReg) 894 .addImm(0) 895 .addImm(64 - Log2(MaxAlign)); 896 else // PPC32... 897 BuildMI(MBB, MBBI, dl, TII.get(PPC::RLWINM), ScratchReg) 898 .addReg(SPReg) 899 .addImm(0) 900 .addImm(32 - Log2(MaxAlign)) 901 .addImm(31); 902 if (!isLargeFrame) { 903 BuildMI(MBB, MBBI, dl, SubtractImmCarryingInst, ScratchReg) 904 .addReg(ScratchReg, RegState::Kill) 905 .addImm(NegFrameSize); 906 } else { 907 assert(!SingleScratchReg && "Only a single scratch reg available"); 908 BuildMI(MBB, MBBI, dl, LoadImmShiftedInst, TempReg) 909 .addImm(NegFrameSize >> 16); 910 BuildMI(MBB, MBBI, dl, OrImmInst, TempReg) 911 .addReg(TempReg, RegState::Kill) 912 .addImm(NegFrameSize & 0xFFFF); 913 BuildMI(MBB, MBBI, dl, SubtractCarryingInst, ScratchReg) 914 .addReg(ScratchReg, RegState::Kill) 915 .addReg(TempReg, RegState::Kill); 916 } 917 918 BuildMI(MBB, MBBI, dl, StoreUpdtIdxInst, SPReg) 919 .addReg(SPReg, RegState::Kill) 920 .addReg(SPReg) 921 .addReg(ScratchReg); 922 HasSTUX = true; 923 924 } else if (!isLargeFrame) { 925 BuildMI(MBB, StackUpdateLoc, dl, StoreUpdtInst, SPReg) 926 .addReg(SPReg) 927 .addImm(NegFrameSize) 928 .addReg(SPReg); 929 930 } else { 931 BuildMI(MBB, MBBI, dl, LoadImmShiftedInst, ScratchReg) 932 .addImm(NegFrameSize >> 16); 933 BuildMI(MBB, MBBI, dl, OrImmInst, ScratchReg) 934 .addReg(ScratchReg, RegState::Kill) 935 .addImm(NegFrameSize & 0xFFFF); 936 BuildMI(MBB, MBBI, dl, StoreUpdtIdxInst, SPReg) 937 .addReg(SPReg, RegState::Kill) 938 .addReg(SPReg) 939 .addReg(ScratchReg); 940 HasSTUX = true; 941 } 942 } 943 944 // Save the TOC register after the stack pointer update if a prologue TOC 945 // save is required for the function. 946 if (MustSaveTOC) { 947 assert(isELFv2ABI && "TOC saves in the prologue only supported on ELFv2"); 948 BuildMI(MBB, StackUpdateLoc, dl, TII.get(PPC::STD)) 949 .addReg(TOCReg, getKillRegState(true)) 950 .addImm(TOCSaveOffset) 951 .addReg(SPReg); 952 } 953 954 if (!HasRedZone) { 955 assert(!isPPC64 && "A red zone is always available on PPC64"); 956 if (HasSTUX) { 957 // The negated frame size is in ScratchReg, and the SPReg has been 958 // decremented by the frame size: SPReg = old SPReg + ScratchReg. 959 // Since FPOffset, PBPOffset, etc. are relative to the beginning of 960 // the stack frame (i.e. the old SP), ideally, we would put the old 961 // SP into a register and use it as the base for the stores. The 962 // problem is that the only available register may be ScratchReg, 963 // which could be R0, and R0 cannot be used as a base address. 964 965 // First, set ScratchReg to the old SP. This may need to be modified 966 // later. 967 BuildMI(MBB, MBBI, dl, TII.get(PPC::SUBF), ScratchReg) 968 .addReg(ScratchReg, RegState::Kill) 969 .addReg(SPReg); 970 971 if (ScratchReg == PPC::R0) { 972 // R0 cannot be used as a base register, but it can be used as an 973 // index in a store-indexed. 974 int LastOffset = 0; 975 if (HasFP) { 976 // R0 += (FPOffset-LastOffset). 977 // Need addic, since addi treats R0 as 0. 978 BuildMI(MBB, MBBI, dl, TII.get(PPC::ADDIC), ScratchReg) 979 .addReg(ScratchReg) 980 .addImm(FPOffset-LastOffset); 981 LastOffset = FPOffset; 982 // Store FP into *R0. 983 BuildMI(MBB, MBBI, dl, TII.get(PPC::STWX)) 984 .addReg(FPReg, RegState::Kill) // Save FP. 985 .addReg(PPC::ZERO) 986 .addReg(ScratchReg); // This will be the index (R0 is ok here). 987 } 988 if (FI->usesPICBase()) { 989 // R0 += (PBPOffset-LastOffset). 990 BuildMI(MBB, MBBI, dl, TII.get(PPC::ADDIC), ScratchReg) 991 .addReg(ScratchReg) 992 .addImm(PBPOffset-LastOffset); 993 LastOffset = PBPOffset; 994 BuildMI(MBB, MBBI, dl, TII.get(PPC::STWX)) 995 .addReg(PPC::R30, RegState::Kill) // Save PIC base pointer. 996 .addReg(PPC::ZERO) 997 .addReg(ScratchReg); // This will be the index (R0 is ok here). 998 } 999 if (HasBP) { 1000 // R0 += (BPOffset-LastOffset). 1001 BuildMI(MBB, MBBI, dl, TII.get(PPC::ADDIC), ScratchReg) 1002 .addReg(ScratchReg) 1003 .addImm(BPOffset-LastOffset); 1004 LastOffset = BPOffset; 1005 BuildMI(MBB, MBBI, dl, TII.get(PPC::STWX)) 1006 .addReg(BPReg, RegState::Kill) // Save BP. 1007 .addReg(PPC::ZERO) 1008 .addReg(ScratchReg); // This will be the index (R0 is ok here). 1009 // BP = R0-LastOffset 1010 BuildMI(MBB, MBBI, dl, TII.get(PPC::ADDIC), BPReg) 1011 .addReg(ScratchReg, RegState::Kill) 1012 .addImm(-LastOffset); 1013 } 1014 } else { 1015 // ScratchReg is not R0, so use it as the base register. It is 1016 // already set to the old SP, so we can use the offsets directly. 1017 1018 // Now that the stack frame has been allocated, save all the necessary 1019 // registers using ScratchReg as the base address. 1020 if (HasFP) 1021 BuildMI(MBB, MBBI, dl, StoreInst) 1022 .addReg(FPReg) 1023 .addImm(FPOffset) 1024 .addReg(ScratchReg); 1025 if (FI->usesPICBase()) 1026 BuildMI(MBB, MBBI, dl, StoreInst) 1027 .addReg(PPC::R30) 1028 .addImm(PBPOffset) 1029 .addReg(ScratchReg); 1030 if (HasBP) { 1031 BuildMI(MBB, MBBI, dl, StoreInst) 1032 .addReg(BPReg) 1033 .addImm(BPOffset) 1034 .addReg(ScratchReg); 1035 BuildMI(MBB, MBBI, dl, OrInst, BPReg) 1036 .addReg(ScratchReg, RegState::Kill) 1037 .addReg(ScratchReg); 1038 } 1039 } 1040 } else { 1041 // The frame size is a known 16-bit constant (fitting in the immediate 1042 // field of STWU). To be here we have to be compiling for PPC32. 1043 // Since the SPReg has been decreased by FrameSize, add it back to each 1044 // offset. 1045 if (HasFP) 1046 BuildMI(MBB, MBBI, dl, StoreInst) 1047 .addReg(FPReg) 1048 .addImm(FrameSize + FPOffset) 1049 .addReg(SPReg); 1050 if (FI->usesPICBase()) 1051 BuildMI(MBB, MBBI, dl, StoreInst) 1052 .addReg(PPC::R30) 1053 .addImm(FrameSize + PBPOffset) 1054 .addReg(SPReg); 1055 if (HasBP) { 1056 BuildMI(MBB, MBBI, dl, StoreInst) 1057 .addReg(BPReg) 1058 .addImm(FrameSize + BPOffset) 1059 .addReg(SPReg); 1060 BuildMI(MBB, MBBI, dl, TII.get(PPC::ADDI), BPReg) 1061 .addReg(SPReg) 1062 .addImm(FrameSize); 1063 } 1064 } 1065 } 1066 1067 // Add Call Frame Information for the instructions we generated above. 1068 if (needsCFI) { 1069 unsigned CFIIndex; 1070 1071 if (HasBP) { 1072 // Define CFA in terms of BP. Do this in preference to using FP/SP, 1073 // because if the stack needed aligning then CFA won't be at a fixed 1074 // offset from FP/SP. 1075 unsigned Reg = MRI->getDwarfRegNum(BPReg, true); 1076 CFIIndex = MF.addFrameInst( 1077 MCCFIInstruction::createDefCfaRegister(nullptr, Reg)); 1078 } else { 1079 // Adjust the definition of CFA to account for the change in SP. 1080 assert(NegFrameSize); 1081 CFIIndex = MF.addFrameInst( 1082 MCCFIInstruction::cfiDefCfaOffset(nullptr, -NegFrameSize)); 1083 } 1084 BuildMI(MBB, MBBI, dl, TII.get(TargetOpcode::CFI_INSTRUCTION)) 1085 .addCFIIndex(CFIIndex); 1086 1087 if (HasFP) { 1088 // Describe where FP was saved, at a fixed offset from CFA. 1089 unsigned Reg = MRI->getDwarfRegNum(FPReg, true); 1090 CFIIndex = MF.addFrameInst( 1091 MCCFIInstruction::createOffset(nullptr, Reg, FPOffset)); 1092 BuildMI(MBB, MBBI, dl, TII.get(TargetOpcode::CFI_INSTRUCTION)) 1093 .addCFIIndex(CFIIndex); 1094 } 1095 1096 if (FI->usesPICBase()) { 1097 // Describe where FP was saved, at a fixed offset from CFA. 1098 unsigned Reg = MRI->getDwarfRegNum(PPC::R30, true); 1099 CFIIndex = MF.addFrameInst( 1100 MCCFIInstruction::createOffset(nullptr, Reg, PBPOffset)); 1101 BuildMI(MBB, MBBI, dl, TII.get(TargetOpcode::CFI_INSTRUCTION)) 1102 .addCFIIndex(CFIIndex); 1103 } 1104 1105 if (HasBP) { 1106 // Describe where BP was saved, at a fixed offset from CFA. 1107 unsigned Reg = MRI->getDwarfRegNum(BPReg, true); 1108 CFIIndex = MF.addFrameInst( 1109 MCCFIInstruction::createOffset(nullptr, Reg, BPOffset)); 1110 BuildMI(MBB, MBBI, dl, TII.get(TargetOpcode::CFI_INSTRUCTION)) 1111 .addCFIIndex(CFIIndex); 1112 } 1113 1114 if (MustSaveLR) { 1115 // Describe where LR was saved, at a fixed offset from CFA. 1116 unsigned Reg = MRI->getDwarfRegNum(LRReg, true); 1117 CFIIndex = MF.addFrameInst( 1118 MCCFIInstruction::createOffset(nullptr, Reg, LROffset)); 1119 BuildMI(MBB, MBBI, dl, TII.get(TargetOpcode::CFI_INSTRUCTION)) 1120 .addCFIIndex(CFIIndex); 1121 } 1122 } 1123 1124 // If there is a frame pointer, copy R1 into R31 1125 if (HasFP) { 1126 BuildMI(MBB, MBBI, dl, OrInst, FPReg) 1127 .addReg(SPReg) 1128 .addReg(SPReg); 1129 1130 if (!HasBP && needsCFI) { 1131 // Change the definition of CFA from SP+offset to FP+offset, because SP 1132 // will change at every alloca. 1133 unsigned Reg = MRI->getDwarfRegNum(FPReg, true); 1134 unsigned CFIIndex = MF.addFrameInst( 1135 MCCFIInstruction::createDefCfaRegister(nullptr, Reg)); 1136 1137 BuildMI(MBB, MBBI, dl, TII.get(TargetOpcode::CFI_INSTRUCTION)) 1138 .addCFIIndex(CFIIndex); 1139 } 1140 } 1141 1142 if (needsCFI) { 1143 // Describe where callee saved registers were saved, at fixed offsets from 1144 // CFA. 1145 const std::vector<CalleeSavedInfo> &CSI = MFI.getCalleeSavedInfo(); 1146 for (unsigned I = 0, E = CSI.size(); I != E; ++I) { 1147 unsigned Reg = CSI[I].getReg(); 1148 if (Reg == PPC::LR || Reg == PPC::LR8 || Reg == PPC::RM) continue; 1149 1150 // This is a bit of a hack: CR2LT, CR2GT, CR2EQ and CR2UN are just 1151 // subregisters of CR2. We just need to emit a move of CR2. 1152 if (PPC::CRBITRCRegClass.contains(Reg)) 1153 continue; 1154 1155 if ((Reg == PPC::X2 || Reg == PPC::R2) && MustSaveTOC) 1156 continue; 1157 1158 // For SVR4, don't emit a move for the CR spill slot if we haven't 1159 // spilled CRs. 1160 if (isSVR4ABI && (PPC::CR2 <= Reg && Reg <= PPC::CR4) 1161 && !MustSaveCR) 1162 continue; 1163 1164 // For 64-bit SVR4 when we have spilled CRs, the spill location 1165 // is SP+8, not a frame-relative slot. 1166 if (isSVR4ABI && isPPC64 && (PPC::CR2 <= Reg && Reg <= PPC::CR4)) { 1167 // In the ELFv1 ABI, only CR2 is noted in CFI and stands in for 1168 // the whole CR word. In the ELFv2 ABI, every CR that was 1169 // actually saved gets its own CFI record. 1170 unsigned CRReg = isELFv2ABI? Reg : (unsigned) PPC::CR2; 1171 unsigned CFIIndex = MF.addFrameInst(MCCFIInstruction::createOffset( 1172 nullptr, MRI->getDwarfRegNum(CRReg, true), CRSaveOffset)); 1173 BuildMI(MBB, MBBI, dl, TII.get(TargetOpcode::CFI_INSTRUCTION)) 1174 .addCFIIndex(CFIIndex); 1175 continue; 1176 } 1177 1178 if (CSI[I].isSpilledToReg()) { 1179 unsigned SpilledReg = CSI[I].getDstReg(); 1180 unsigned CFIRegister = MF.addFrameInst(MCCFIInstruction::createRegister( 1181 nullptr, MRI->getDwarfRegNum(Reg, true), 1182 MRI->getDwarfRegNum(SpilledReg, true))); 1183 BuildMI(MBB, MBBI, dl, TII.get(TargetOpcode::CFI_INSTRUCTION)) 1184 .addCFIIndex(CFIRegister); 1185 } else { 1186 int Offset = MFI.getObjectOffset(CSI[I].getFrameIdx()); 1187 // We have changed the object offset above but we do not want to change 1188 // the actual offsets in the CFI instruction so we have to undo the 1189 // offset change here. 1190 if (MovingStackUpdateDown) 1191 Offset -= NegFrameSize; 1192 1193 unsigned CFIIndex = MF.addFrameInst(MCCFIInstruction::createOffset( 1194 nullptr, MRI->getDwarfRegNum(Reg, true), Offset)); 1195 BuildMI(MBB, MBBI, dl, TII.get(TargetOpcode::CFI_INSTRUCTION)) 1196 .addCFIIndex(CFIIndex); 1197 } 1198 } 1199 } 1200 } 1201 1202 void PPCFrameLowering::inlineStackProbe(MachineFunction &MF, 1203 MachineBasicBlock &PrologMBB) const { 1204 // TODO: Generate CFI instructions. 1205 bool isPPC64 = Subtarget.isPPC64(); 1206 const PPCTargetLowering &TLI = *Subtarget.getTargetLowering(); 1207 const PPCInstrInfo &TII = *Subtarget.getInstrInfo(); 1208 MachineFrameInfo &MFI = MF.getFrameInfo(); 1209 MachineModuleInfo &MMI = MF.getMMI(); 1210 const MCRegisterInfo *MRI = MMI.getContext().getRegisterInfo(); 1211 // AIX assembler does not support cfi directives. 1212 const bool needsCFI = MF.needsFrameMoves() && !Subtarget.isAIXABI(); 1213 auto StackAllocMIPos = llvm::find_if(PrologMBB, [](MachineInstr &MI) { 1214 int Opc = MI.getOpcode(); 1215 return Opc == PPC::PROBED_STACKALLOC_64 || Opc == PPC::PROBED_STACKALLOC_32; 1216 }); 1217 if (StackAllocMIPos == PrologMBB.end()) 1218 return; 1219 const BasicBlock *ProbedBB = PrologMBB.getBasicBlock(); 1220 DebugLoc DL = PrologMBB.findDebugLoc(StackAllocMIPos); 1221 MachineInstr &MI = *StackAllocMIPos; 1222 int64_t NegFrameSize = MI.getOperand(2).getImm(); 1223 int64_t NegProbeSize = -(int64_t)TLI.getStackProbeSize(MF); 1224 assert(isInt<32>(NegProbeSize) && "Unhandled probe size"); 1225 int64_t NumBlocks = NegFrameSize / NegProbeSize; 1226 int64_t NegResidualSize = NegFrameSize % NegProbeSize; 1227 Register SPReg = isPPC64 ? PPC::X1 : PPC::R1; 1228 Register ScratchReg = MI.getOperand(0).getReg(); 1229 Register FPReg = MI.getOperand(1).getReg(); 1230 const PPCRegisterInfo *RegInfo = Subtarget.getRegisterInfo(); 1231 bool HasBP = RegInfo->hasBasePointer(MF); 1232 Align MaxAlign = MFI.getMaxAlign(); 1233 // Initialize current frame pointer. 1234 const MCInstrDesc &CopyInst = TII.get(isPPC64 ? PPC::OR8 : PPC::OR); 1235 BuildMI(PrologMBB, {MI}, DL, CopyInst, FPReg).addReg(SPReg).addReg(SPReg); 1236 // Subroutines to generate .cfi_* directives. 1237 auto buildDefCFAReg = [&](MachineBasicBlock &MBB, 1238 MachineBasicBlock::iterator MBBI, Register Reg) { 1239 unsigned RegNum = MRI->getDwarfRegNum(Reg, true); 1240 unsigned CFIIndex = MF.addFrameInst( 1241 MCCFIInstruction::createDefCfaRegister(nullptr, RegNum)); 1242 BuildMI(MBB, MBBI, DL, TII.get(TargetOpcode::CFI_INSTRUCTION)) 1243 .addCFIIndex(CFIIndex); 1244 }; 1245 auto buildDefCFA = [&](MachineBasicBlock &MBB, 1246 MachineBasicBlock::iterator MBBI, Register Reg, 1247 int Offset) { 1248 unsigned RegNum = MRI->getDwarfRegNum(Reg, true); 1249 unsigned CFIIndex = MBB.getParent()->addFrameInst( 1250 MCCFIInstruction::cfiDefCfa(nullptr, RegNum, Offset)); 1251 BuildMI(MBB, MBBI, DL, TII.get(TargetOpcode::CFI_INSTRUCTION)) 1252 .addCFIIndex(CFIIndex); 1253 }; 1254 // Subroutine to determine if we can use the Imm as part of d-form. 1255 auto CanUseDForm = [](int64_t Imm) { return isInt<16>(Imm) && Imm % 4 == 0; }; 1256 // Subroutine to materialize the Imm into TempReg. 1257 auto MaterializeImm = [&](MachineBasicBlock &MBB, 1258 MachineBasicBlock::iterator MBBI, int64_t Imm, 1259 Register &TempReg) { 1260 assert(isInt<32>(Imm) && "Unhandled imm"); 1261 if (isInt<16>(Imm)) 1262 BuildMI(MBB, MBBI, DL, TII.get(isPPC64 ? PPC::LI8 : PPC::LI), TempReg) 1263 .addImm(Imm); 1264 else { 1265 BuildMI(MBB, MBBI, DL, TII.get(isPPC64 ? PPC::LIS8 : PPC::LIS), TempReg) 1266 .addImm(Imm >> 16); 1267 BuildMI(MBB, MBBI, DL, TII.get(isPPC64 ? PPC::ORI8 : PPC::ORI), TempReg) 1268 .addReg(TempReg) 1269 .addImm(Imm & 0xFFFF); 1270 } 1271 }; 1272 // Subroutine to store frame pointer and decrease stack pointer by probe size. 1273 auto allocateAndProbe = [&](MachineBasicBlock &MBB, 1274 MachineBasicBlock::iterator MBBI, int64_t NegSize, 1275 Register NegSizeReg, bool UseDForm) { 1276 if (UseDForm) 1277 BuildMI(MBB, MBBI, DL, TII.get(isPPC64 ? PPC::STDU : PPC::STWU), SPReg) 1278 .addReg(FPReg) 1279 .addImm(NegSize) 1280 .addReg(SPReg); 1281 else 1282 BuildMI(MBB, MBBI, DL, TII.get(isPPC64 ? PPC::STDUX : PPC::STWUX), SPReg) 1283 .addReg(FPReg) 1284 .addReg(SPReg) 1285 .addReg(NegSizeReg); 1286 }; 1287 // Use FPReg to calculate CFA. 1288 if (needsCFI) 1289 buildDefCFA(PrologMBB, {MI}, FPReg, 0); 1290 // For case HasBP && MaxAlign > 1, we have to align the SP by performing 1291 // SP = SP - SP % MaxAlign. 1292 if (HasBP && MaxAlign > 1) { 1293 if (isPPC64) 1294 BuildMI(PrologMBB, {MI}, DL, TII.get(PPC::RLDICL), ScratchReg) 1295 .addReg(FPReg) 1296 .addImm(0) 1297 .addImm(64 - Log2(MaxAlign)); 1298 else 1299 BuildMI(PrologMBB, {MI}, DL, TII.get(PPC::RLWINM), ScratchReg) 1300 .addReg(FPReg) 1301 .addImm(0) 1302 .addImm(32 - Log2(MaxAlign)) 1303 .addImm(31); 1304 BuildMI(PrologMBB, {MI}, DL, TII.get(isPPC64 ? PPC::SUBFC8 : PPC::SUBFC), 1305 SPReg) 1306 .addReg(ScratchReg) 1307 .addReg(SPReg); 1308 } 1309 // Probe residual part. 1310 if (NegResidualSize) { 1311 bool ResidualUseDForm = CanUseDForm(NegResidualSize); 1312 if (!ResidualUseDForm) 1313 MaterializeImm(PrologMBB, {MI}, NegResidualSize, ScratchReg); 1314 allocateAndProbe(PrologMBB, {MI}, NegResidualSize, ScratchReg, 1315 ResidualUseDForm); 1316 } 1317 bool UseDForm = CanUseDForm(NegProbeSize); 1318 // If number of blocks is small, just probe them directly. 1319 if (NumBlocks < 3) { 1320 if (!UseDForm) 1321 MaterializeImm(PrologMBB, {MI}, NegProbeSize, ScratchReg); 1322 for (int i = 0; i < NumBlocks; ++i) 1323 allocateAndProbe(PrologMBB, {MI}, NegProbeSize, ScratchReg, UseDForm); 1324 if (needsCFI) { 1325 // Restore using SPReg to calculate CFA. 1326 buildDefCFAReg(PrologMBB, {MI}, SPReg); 1327 } 1328 } else { 1329 // Since CTR is a volatile register and current shrinkwrap implementation 1330 // won't choose an MBB in a loop as the PrologMBB, it's safe to synthesize a 1331 // CTR loop to probe. 1332 // Calculate trip count and stores it in CTRReg. 1333 MaterializeImm(PrologMBB, {MI}, NumBlocks, ScratchReg); 1334 BuildMI(PrologMBB, {MI}, DL, TII.get(isPPC64 ? PPC::MTCTR8 : PPC::MTCTR)) 1335 .addReg(ScratchReg, RegState::Kill); 1336 if (!UseDForm) 1337 MaterializeImm(PrologMBB, {MI}, NegProbeSize, ScratchReg); 1338 // Create MBBs of the loop. 1339 MachineFunction::iterator MBBInsertPoint = 1340 std::next(PrologMBB.getIterator()); 1341 MachineBasicBlock *LoopMBB = MF.CreateMachineBasicBlock(ProbedBB); 1342 MF.insert(MBBInsertPoint, LoopMBB); 1343 MachineBasicBlock *ExitMBB = MF.CreateMachineBasicBlock(ProbedBB); 1344 MF.insert(MBBInsertPoint, ExitMBB); 1345 // Synthesize the loop body. 1346 allocateAndProbe(*LoopMBB, LoopMBB->end(), NegProbeSize, ScratchReg, 1347 UseDForm); 1348 BuildMI(LoopMBB, DL, TII.get(isPPC64 ? PPC::BDNZ8 : PPC::BDNZ)) 1349 .addMBB(LoopMBB); 1350 LoopMBB->addSuccessor(ExitMBB); 1351 LoopMBB->addSuccessor(LoopMBB); 1352 // Synthesize the exit MBB. 1353 ExitMBB->splice(ExitMBB->end(), &PrologMBB, 1354 std::next(MachineBasicBlock::iterator(MI)), 1355 PrologMBB.end()); 1356 ExitMBB->transferSuccessorsAndUpdatePHIs(&PrologMBB); 1357 PrologMBB.addSuccessor(LoopMBB); 1358 if (needsCFI) { 1359 // Restore using SPReg to calculate CFA. 1360 buildDefCFAReg(*ExitMBB, ExitMBB->begin(), SPReg); 1361 } 1362 // Update liveins. 1363 recomputeLiveIns(*LoopMBB); 1364 recomputeLiveIns(*ExitMBB); 1365 } 1366 ++NumPrologProbed; 1367 MI.eraseFromParent(); 1368 } 1369 1370 void PPCFrameLowering::emitEpilogue(MachineFunction &MF, 1371 MachineBasicBlock &MBB) const { 1372 MachineBasicBlock::iterator MBBI = MBB.getFirstTerminator(); 1373 DebugLoc dl; 1374 1375 if (MBBI != MBB.end()) 1376 dl = MBBI->getDebugLoc(); 1377 1378 const PPCInstrInfo &TII = *Subtarget.getInstrInfo(); 1379 const PPCRegisterInfo *RegInfo = Subtarget.getRegisterInfo(); 1380 1381 // Get alignment info so we know how to restore the SP. 1382 const MachineFrameInfo &MFI = MF.getFrameInfo(); 1383 1384 // Get the number of bytes allocated from the FrameInfo. 1385 int FrameSize = MFI.getStackSize(); 1386 1387 // Get processor type. 1388 bool isPPC64 = Subtarget.isPPC64(); 1389 1390 // Check if the link register (LR) has been saved. 1391 PPCFunctionInfo *FI = MF.getInfo<PPCFunctionInfo>(); 1392 bool MustSaveLR = FI->mustSaveLR(); 1393 const SmallVectorImpl<Register> &MustSaveCRs = FI->getMustSaveCRs(); 1394 bool MustSaveCR = !MustSaveCRs.empty(); 1395 // Do we have a frame pointer and/or base pointer for this function? 1396 bool HasFP = hasFP(MF); 1397 bool HasBP = RegInfo->hasBasePointer(MF); 1398 bool HasRedZone = Subtarget.isPPC64() || !Subtarget.isSVR4ABI(); 1399 1400 Register SPReg = isPPC64 ? PPC::X1 : PPC::R1; 1401 Register BPReg = RegInfo->getBaseRegister(MF); 1402 Register FPReg = isPPC64 ? PPC::X31 : PPC::R31; 1403 Register ScratchReg; 1404 Register TempReg = isPPC64 ? PPC::X12 : PPC::R12; // another scratch reg 1405 const MCInstrDesc& MTLRInst = TII.get( isPPC64 ? PPC::MTLR8 1406 : PPC::MTLR ); 1407 const MCInstrDesc& LoadInst = TII.get( isPPC64 ? PPC::LD 1408 : PPC::LWZ ); 1409 const MCInstrDesc& LoadImmShiftedInst = TII.get( isPPC64 ? PPC::LIS8 1410 : PPC::LIS ); 1411 const MCInstrDesc& OrInst = TII.get(isPPC64 ? PPC::OR8 1412 : PPC::OR ); 1413 const MCInstrDesc& OrImmInst = TII.get( isPPC64 ? PPC::ORI8 1414 : PPC::ORI ); 1415 const MCInstrDesc& AddImmInst = TII.get( isPPC64 ? PPC::ADDI8 1416 : PPC::ADDI ); 1417 const MCInstrDesc& AddInst = TII.get( isPPC64 ? PPC::ADD8 1418 : PPC::ADD4 ); 1419 const MCInstrDesc& LoadWordInst = TII.get( isPPC64 ? PPC::LWZ8 1420 : PPC::LWZ); 1421 const MCInstrDesc& MoveToCRInst = TII.get( isPPC64 ? PPC::MTOCRF8 1422 : PPC::MTOCRF); 1423 int LROffset = getReturnSaveOffset(); 1424 1425 int FPOffset = 0; 1426 1427 // Using the same bool variable as below to suppress compiler warnings. 1428 bool SingleScratchReg = findScratchRegister(&MBB, true, false, &ScratchReg, 1429 &TempReg); 1430 assert(SingleScratchReg && 1431 "Could not find an available scratch register"); 1432 1433 SingleScratchReg = ScratchReg == TempReg; 1434 1435 if (HasFP) { 1436 int FPIndex = FI->getFramePointerSaveIndex(); 1437 assert(FPIndex && "No Frame Pointer Save Slot!"); 1438 FPOffset = MFI.getObjectOffset(FPIndex); 1439 } 1440 1441 int BPOffset = 0; 1442 if (HasBP) { 1443 int BPIndex = FI->getBasePointerSaveIndex(); 1444 assert(BPIndex && "No Base Pointer Save Slot!"); 1445 BPOffset = MFI.getObjectOffset(BPIndex); 1446 } 1447 1448 int PBPOffset = 0; 1449 if (FI->usesPICBase()) { 1450 int PBPIndex = FI->getPICBasePointerSaveIndex(); 1451 assert(PBPIndex && "No PIC Base Pointer Save Slot!"); 1452 PBPOffset = MFI.getObjectOffset(PBPIndex); 1453 } 1454 1455 bool IsReturnBlock = (MBBI != MBB.end() && MBBI->isReturn()); 1456 1457 if (IsReturnBlock) { 1458 unsigned RetOpcode = MBBI->getOpcode(); 1459 bool UsesTCRet = RetOpcode == PPC::TCRETURNri || 1460 RetOpcode == PPC::TCRETURNdi || 1461 RetOpcode == PPC::TCRETURNai || 1462 RetOpcode == PPC::TCRETURNri8 || 1463 RetOpcode == PPC::TCRETURNdi8 || 1464 RetOpcode == PPC::TCRETURNai8; 1465 1466 if (UsesTCRet) { 1467 int MaxTCRetDelta = FI->getTailCallSPDelta(); 1468 MachineOperand &StackAdjust = MBBI->getOperand(1); 1469 assert(StackAdjust.isImm() && "Expecting immediate value."); 1470 // Adjust stack pointer. 1471 int StackAdj = StackAdjust.getImm(); 1472 int Delta = StackAdj - MaxTCRetDelta; 1473 assert((Delta >= 0) && "Delta must be positive"); 1474 if (MaxTCRetDelta>0) 1475 FrameSize += (StackAdj +Delta); 1476 else 1477 FrameSize += StackAdj; 1478 } 1479 } 1480 1481 // Frames of 32KB & larger require special handling because they cannot be 1482 // indexed into with a simple LD/LWZ immediate offset operand. 1483 bool isLargeFrame = !isInt<16>(FrameSize); 1484 1485 // On targets without red zone, the SP needs to be restored last, so that 1486 // all live contents of the stack frame are upwards of the SP. This means 1487 // that we cannot restore SP just now, since there may be more registers 1488 // to restore from the stack frame (e.g. R31). If the frame size is not 1489 // a simple immediate value, we will need a spare register to hold the 1490 // restored SP. If the frame size is known and small, we can simply adjust 1491 // the offsets of the registers to be restored, and still use SP to restore 1492 // them. In such case, the final update of SP will be to add the frame 1493 // size to it. 1494 // To simplify the code, set RBReg to the base register used to restore 1495 // values from the stack, and set SPAdd to the value that needs to be added 1496 // to the SP at the end. The default values are as if red zone was present. 1497 unsigned RBReg = SPReg; 1498 unsigned SPAdd = 0; 1499 1500 // Check if we can move the stack update instruction up the epilogue 1501 // past the callee saves. This will allow the move to LR instruction 1502 // to be executed before the restores of the callee saves which means 1503 // that the callee saves can hide the latency from the MTLR instrcution. 1504 MachineBasicBlock::iterator StackUpdateLoc = MBBI; 1505 if (stackUpdateCanBeMoved(MF)) { 1506 const std::vector<CalleeSavedInfo> & Info = MFI.getCalleeSavedInfo(); 1507 for (CalleeSavedInfo CSI : Info) { 1508 int FrIdx = CSI.getFrameIdx(); 1509 // If the frame index is not negative the callee saved info belongs to a 1510 // stack object that is not a fixed stack object. We ignore non-fixed 1511 // stack objects because we won't move the update of the stack pointer 1512 // past them. 1513 if (FrIdx >= 0) 1514 continue; 1515 1516 if (MFI.isFixedObjectIndex(FrIdx) && MFI.getObjectOffset(FrIdx) < 0) 1517 StackUpdateLoc--; 1518 else { 1519 // Abort the operation as we can't update all CSR restores. 1520 StackUpdateLoc = MBBI; 1521 break; 1522 } 1523 } 1524 } 1525 1526 if (FrameSize) { 1527 // In the prologue, the loaded (or persistent) stack pointer value is 1528 // offset by the STDU/STDUX/STWU/STWUX instruction. For targets with red 1529 // zone add this offset back now. 1530 1531 // If this function contained a fastcc call and GuaranteedTailCallOpt is 1532 // enabled (=> hasFastCall()==true) the fastcc call might contain a tail 1533 // call which invalidates the stack pointer value in SP(0). So we use the 1534 // value of R31 in this case. 1535 if (FI->hasFastCall()) { 1536 assert(HasFP && "Expecting a valid frame pointer."); 1537 if (!HasRedZone) 1538 RBReg = FPReg; 1539 if (!isLargeFrame) { 1540 BuildMI(MBB, MBBI, dl, AddImmInst, RBReg) 1541 .addReg(FPReg).addImm(FrameSize); 1542 } else { 1543 BuildMI(MBB, MBBI, dl, LoadImmShiftedInst, ScratchReg) 1544 .addImm(FrameSize >> 16); 1545 BuildMI(MBB, MBBI, dl, OrImmInst, ScratchReg) 1546 .addReg(ScratchReg, RegState::Kill) 1547 .addImm(FrameSize & 0xFFFF); 1548 BuildMI(MBB, MBBI, dl, AddInst) 1549 .addReg(RBReg) 1550 .addReg(FPReg) 1551 .addReg(ScratchReg); 1552 } 1553 } else if (!isLargeFrame && !HasBP && !MFI.hasVarSizedObjects()) { 1554 if (HasRedZone) { 1555 BuildMI(MBB, StackUpdateLoc, dl, AddImmInst, SPReg) 1556 .addReg(SPReg) 1557 .addImm(FrameSize); 1558 } else { 1559 // Make sure that adding FrameSize will not overflow the max offset 1560 // size. 1561 assert(FPOffset <= 0 && BPOffset <= 0 && PBPOffset <= 0 && 1562 "Local offsets should be negative"); 1563 SPAdd = FrameSize; 1564 FPOffset += FrameSize; 1565 BPOffset += FrameSize; 1566 PBPOffset += FrameSize; 1567 } 1568 } else { 1569 // We don't want to use ScratchReg as a base register, because it 1570 // could happen to be R0. Use FP instead, but make sure to preserve it. 1571 if (!HasRedZone) { 1572 // If FP is not saved, copy it to ScratchReg. 1573 if (!HasFP) 1574 BuildMI(MBB, MBBI, dl, OrInst, ScratchReg) 1575 .addReg(FPReg) 1576 .addReg(FPReg); 1577 RBReg = FPReg; 1578 } 1579 BuildMI(MBB, StackUpdateLoc, dl, LoadInst, RBReg) 1580 .addImm(0) 1581 .addReg(SPReg); 1582 } 1583 } 1584 assert(RBReg != ScratchReg && "Should have avoided ScratchReg"); 1585 // If there is no red zone, ScratchReg may be needed for holding a useful 1586 // value (although not the base register). Make sure it is not overwritten 1587 // too early. 1588 1589 // If we need to restore both the LR and the CR and we only have one 1590 // available scratch register, we must do them one at a time. 1591 if (MustSaveCR && SingleScratchReg && MustSaveLR) { 1592 // Here TempReg == ScratchReg, and in the absence of red zone ScratchReg 1593 // is live here. 1594 assert(HasRedZone && "Expecting red zone"); 1595 BuildMI(MBB, MBBI, dl, LoadWordInst, TempReg) 1596 .addImm(CRSaveOffset) 1597 .addReg(SPReg); 1598 for (unsigned i = 0, e = MustSaveCRs.size(); i != e; ++i) 1599 BuildMI(MBB, MBBI, dl, MoveToCRInst, MustSaveCRs[i]) 1600 .addReg(TempReg, getKillRegState(i == e-1)); 1601 } 1602 1603 // Delay restoring of the LR if ScratchReg is needed. This is ok, since 1604 // LR is stored in the caller's stack frame. ScratchReg will be needed 1605 // if RBReg is anything other than SP. We shouldn't use ScratchReg as 1606 // a base register anyway, because it may happen to be R0. 1607 bool LoadedLR = false; 1608 if (MustSaveLR && RBReg == SPReg && isInt<16>(LROffset+SPAdd)) { 1609 BuildMI(MBB, StackUpdateLoc, dl, LoadInst, ScratchReg) 1610 .addImm(LROffset+SPAdd) 1611 .addReg(RBReg); 1612 LoadedLR = true; 1613 } 1614 1615 if (MustSaveCR && !(SingleScratchReg && MustSaveLR)) { 1616 assert(RBReg == SPReg && "Should be using SP as a base register"); 1617 BuildMI(MBB, MBBI, dl, LoadWordInst, TempReg) 1618 .addImm(CRSaveOffset) 1619 .addReg(RBReg); 1620 } 1621 1622 if (HasFP) { 1623 // If there is red zone, restore FP directly, since SP has already been 1624 // restored. Otherwise, restore the value of FP into ScratchReg. 1625 if (HasRedZone || RBReg == SPReg) 1626 BuildMI(MBB, MBBI, dl, LoadInst, FPReg) 1627 .addImm(FPOffset) 1628 .addReg(SPReg); 1629 else 1630 BuildMI(MBB, MBBI, dl, LoadInst, ScratchReg) 1631 .addImm(FPOffset) 1632 .addReg(RBReg); 1633 } 1634 1635 if (FI->usesPICBase()) 1636 BuildMI(MBB, MBBI, dl, LoadInst, PPC::R30) 1637 .addImm(PBPOffset) 1638 .addReg(RBReg); 1639 1640 if (HasBP) 1641 BuildMI(MBB, MBBI, dl, LoadInst, BPReg) 1642 .addImm(BPOffset) 1643 .addReg(RBReg); 1644 1645 // There is nothing more to be loaded from the stack, so now we can 1646 // restore SP: SP = RBReg + SPAdd. 1647 if (RBReg != SPReg || SPAdd != 0) { 1648 assert(!HasRedZone && "This should not happen with red zone"); 1649 // If SPAdd is 0, generate a copy. 1650 if (SPAdd == 0) 1651 BuildMI(MBB, MBBI, dl, OrInst, SPReg) 1652 .addReg(RBReg) 1653 .addReg(RBReg); 1654 else 1655 BuildMI(MBB, MBBI, dl, AddImmInst, SPReg) 1656 .addReg(RBReg) 1657 .addImm(SPAdd); 1658 1659 assert(RBReg != ScratchReg && "Should be using FP or SP as base register"); 1660 if (RBReg == FPReg) 1661 BuildMI(MBB, MBBI, dl, OrInst, FPReg) 1662 .addReg(ScratchReg) 1663 .addReg(ScratchReg); 1664 1665 // Now load the LR from the caller's stack frame. 1666 if (MustSaveLR && !LoadedLR) 1667 BuildMI(MBB, MBBI, dl, LoadInst, ScratchReg) 1668 .addImm(LROffset) 1669 .addReg(SPReg); 1670 } 1671 1672 if (MustSaveCR && 1673 !(SingleScratchReg && MustSaveLR)) 1674 for (unsigned i = 0, e = MustSaveCRs.size(); i != e; ++i) 1675 BuildMI(MBB, MBBI, dl, MoveToCRInst, MustSaveCRs[i]) 1676 .addReg(TempReg, getKillRegState(i == e-1)); 1677 1678 if (MustSaveLR) 1679 BuildMI(MBB, StackUpdateLoc, dl, MTLRInst).addReg(ScratchReg); 1680 1681 // Callee pop calling convention. Pop parameter/linkage area. Used for tail 1682 // call optimization 1683 if (IsReturnBlock) { 1684 unsigned RetOpcode = MBBI->getOpcode(); 1685 if (MF.getTarget().Options.GuaranteedTailCallOpt && 1686 (RetOpcode == PPC::BLR || RetOpcode == PPC::BLR8) && 1687 MF.getFunction().getCallingConv() == CallingConv::Fast) { 1688 PPCFunctionInfo *FI = MF.getInfo<PPCFunctionInfo>(); 1689 unsigned CallerAllocatedAmt = FI->getMinReservedArea(); 1690 1691 if (CallerAllocatedAmt && isInt<16>(CallerAllocatedAmt)) { 1692 BuildMI(MBB, MBBI, dl, AddImmInst, SPReg) 1693 .addReg(SPReg).addImm(CallerAllocatedAmt); 1694 } else { 1695 BuildMI(MBB, MBBI, dl, LoadImmShiftedInst, ScratchReg) 1696 .addImm(CallerAllocatedAmt >> 16); 1697 BuildMI(MBB, MBBI, dl, OrImmInst, ScratchReg) 1698 .addReg(ScratchReg, RegState::Kill) 1699 .addImm(CallerAllocatedAmt & 0xFFFF); 1700 BuildMI(MBB, MBBI, dl, AddInst) 1701 .addReg(SPReg) 1702 .addReg(FPReg) 1703 .addReg(ScratchReg); 1704 } 1705 } else { 1706 createTailCallBranchInstr(MBB); 1707 } 1708 } 1709 } 1710 1711 void PPCFrameLowering::createTailCallBranchInstr(MachineBasicBlock &MBB) const { 1712 MachineBasicBlock::iterator MBBI = MBB.getFirstTerminator(); 1713 1714 // If we got this far a first terminator should exist. 1715 assert(MBBI != MBB.end() && "Failed to find the first terminator."); 1716 1717 DebugLoc dl = MBBI->getDebugLoc(); 1718 const PPCInstrInfo &TII = *Subtarget.getInstrInfo(); 1719 1720 // Create branch instruction for pseudo tail call return instruction. 1721 // The TCRETURNdi variants are direct calls. Valid targets for those are 1722 // MO_GlobalAddress operands as well as MO_ExternalSymbol with PC-Rel 1723 // since we can tail call external functions with PC-Rel (i.e. we don't need 1724 // to worry about different TOC pointers). Some of the external functions will 1725 // be MO_GlobalAddress while others like memcpy for example, are going to 1726 // be MO_ExternalSymbol. 1727 unsigned RetOpcode = MBBI->getOpcode(); 1728 if (RetOpcode == PPC::TCRETURNdi) { 1729 MBBI = MBB.getLastNonDebugInstr(); 1730 MachineOperand &JumpTarget = MBBI->getOperand(0); 1731 if (JumpTarget.isGlobal()) 1732 BuildMI(MBB, MBBI, dl, TII.get(PPC::TAILB)). 1733 addGlobalAddress(JumpTarget.getGlobal(), JumpTarget.getOffset()); 1734 else if (JumpTarget.isSymbol()) 1735 BuildMI(MBB, MBBI, dl, TII.get(PPC::TAILB)). 1736 addExternalSymbol(JumpTarget.getSymbolName()); 1737 else 1738 llvm_unreachable("Expecting Global or External Symbol"); 1739 } else if (RetOpcode == PPC::TCRETURNri) { 1740 MBBI = MBB.getLastNonDebugInstr(); 1741 assert(MBBI->getOperand(0).isReg() && "Expecting register operand."); 1742 BuildMI(MBB, MBBI, dl, TII.get(PPC::TAILBCTR)); 1743 } else if (RetOpcode == PPC::TCRETURNai) { 1744 MBBI = MBB.getLastNonDebugInstr(); 1745 MachineOperand &JumpTarget = MBBI->getOperand(0); 1746 BuildMI(MBB, MBBI, dl, TII.get(PPC::TAILBA)).addImm(JumpTarget.getImm()); 1747 } else if (RetOpcode == PPC::TCRETURNdi8) { 1748 MBBI = MBB.getLastNonDebugInstr(); 1749 MachineOperand &JumpTarget = MBBI->getOperand(0); 1750 if (JumpTarget.isGlobal()) 1751 BuildMI(MBB, MBBI, dl, TII.get(PPC::TAILB8)). 1752 addGlobalAddress(JumpTarget.getGlobal(), JumpTarget.getOffset()); 1753 else if (JumpTarget.isSymbol()) 1754 BuildMI(MBB, MBBI, dl, TII.get(PPC::TAILB8)). 1755 addExternalSymbol(JumpTarget.getSymbolName()); 1756 else 1757 llvm_unreachable("Expecting Global or External Symbol"); 1758 } else if (RetOpcode == PPC::TCRETURNri8) { 1759 MBBI = MBB.getLastNonDebugInstr(); 1760 assert(MBBI->getOperand(0).isReg() && "Expecting register operand."); 1761 BuildMI(MBB, MBBI, dl, TII.get(PPC::TAILBCTR8)); 1762 } else if (RetOpcode == PPC::TCRETURNai8) { 1763 MBBI = MBB.getLastNonDebugInstr(); 1764 MachineOperand &JumpTarget = MBBI->getOperand(0); 1765 BuildMI(MBB, MBBI, dl, TII.get(PPC::TAILBA8)).addImm(JumpTarget.getImm()); 1766 } 1767 } 1768 1769 void PPCFrameLowering::determineCalleeSaves(MachineFunction &MF, 1770 BitVector &SavedRegs, 1771 RegScavenger *RS) const { 1772 TargetFrameLowering::determineCalleeSaves(MF, SavedRegs, RS); 1773 1774 const PPCRegisterInfo *RegInfo = Subtarget.getRegisterInfo(); 1775 1776 // Save and clear the LR state. 1777 PPCFunctionInfo *FI = MF.getInfo<PPCFunctionInfo>(); 1778 unsigned LR = RegInfo->getRARegister(); 1779 FI->setMustSaveLR(MustSaveLR(MF, LR)); 1780 SavedRegs.reset(LR); 1781 1782 // Save R31 if necessary 1783 int FPSI = FI->getFramePointerSaveIndex(); 1784 const bool isPPC64 = Subtarget.isPPC64(); 1785 MachineFrameInfo &MFI = MF.getFrameInfo(); 1786 1787 // If the frame pointer save index hasn't been defined yet. 1788 if (!FPSI && needsFP(MF)) { 1789 // Find out what the fix offset of the frame pointer save area. 1790 int FPOffset = getFramePointerSaveOffset(); 1791 // Allocate the frame index for frame pointer save area. 1792 FPSI = MFI.CreateFixedObject(isPPC64? 8 : 4, FPOffset, true); 1793 // Save the result. 1794 FI->setFramePointerSaveIndex(FPSI); 1795 } 1796 1797 int BPSI = FI->getBasePointerSaveIndex(); 1798 if (!BPSI && RegInfo->hasBasePointer(MF)) { 1799 int BPOffset = getBasePointerSaveOffset(); 1800 // Allocate the frame index for the base pointer save area. 1801 BPSI = MFI.CreateFixedObject(isPPC64? 8 : 4, BPOffset, true); 1802 // Save the result. 1803 FI->setBasePointerSaveIndex(BPSI); 1804 } 1805 1806 // Reserve stack space for the PIC Base register (R30). 1807 // Only used in SVR4 32-bit. 1808 if (FI->usesPICBase()) { 1809 int PBPSI = MFI.CreateFixedObject(4, -8, true); 1810 FI->setPICBasePointerSaveIndex(PBPSI); 1811 } 1812 1813 // Make sure we don't explicitly spill r31, because, for example, we have 1814 // some inline asm which explicitly clobbers it, when we otherwise have a 1815 // frame pointer and are using r31's spill slot for the prologue/epilogue 1816 // code. Same goes for the base pointer and the PIC base register. 1817 if (needsFP(MF)) 1818 SavedRegs.reset(isPPC64 ? PPC::X31 : PPC::R31); 1819 if (RegInfo->hasBasePointer(MF)) 1820 SavedRegs.reset(RegInfo->getBaseRegister(MF)); 1821 if (FI->usesPICBase()) 1822 SavedRegs.reset(PPC::R30); 1823 1824 // Reserve stack space to move the linkage area to in case of a tail call. 1825 int TCSPDelta = 0; 1826 if (MF.getTarget().Options.GuaranteedTailCallOpt && 1827 (TCSPDelta = FI->getTailCallSPDelta()) < 0) { 1828 MFI.CreateFixedObject(-1 * TCSPDelta, TCSPDelta, true); 1829 } 1830 1831 // Allocate the nonvolatile CR spill slot iff the function uses CR 2, 3, or 4. 1832 // For 64-bit SVR4, and all flavors of AIX we create a FixedStack 1833 // object at the offset of the CR-save slot in the linkage area. The actual 1834 // save and restore of the condition register will be created as part of the 1835 // prologue and epilogue insertion, but the FixedStack object is needed to 1836 // keep the CalleSavedInfo valid. 1837 if ((SavedRegs.test(PPC::CR2) || SavedRegs.test(PPC::CR3) || 1838 SavedRegs.test(PPC::CR4))) { 1839 const uint64_t SpillSize = 4; // Condition register is always 4 bytes. 1840 const int64_t SpillOffset = 1841 Subtarget.isPPC64() ? 8 : Subtarget.isAIXABI() ? 4 : -4; 1842 int FrameIdx = 1843 MFI.CreateFixedObject(SpillSize, SpillOffset, 1844 /* IsImmutable */ true, /* IsAliased */ false); 1845 FI->setCRSpillFrameIndex(FrameIdx); 1846 } 1847 } 1848 1849 void PPCFrameLowering::processFunctionBeforeFrameFinalized(MachineFunction &MF, 1850 RegScavenger *RS) const { 1851 // Get callee saved register information. 1852 MachineFrameInfo &MFI = MF.getFrameInfo(); 1853 const std::vector<CalleeSavedInfo> &CSI = MFI.getCalleeSavedInfo(); 1854 1855 // If the function is shrink-wrapped, and if the function has a tail call, the 1856 // tail call might not be in the new RestoreBlock, so real branch instruction 1857 // won't be generated by emitEpilogue(), because shrink-wrap has chosen new 1858 // RestoreBlock. So we handle this case here. 1859 if (MFI.getSavePoint() && MFI.hasTailCall()) { 1860 MachineBasicBlock *RestoreBlock = MFI.getRestorePoint(); 1861 for (MachineBasicBlock &MBB : MF) { 1862 if (MBB.isReturnBlock() && (&MBB) != RestoreBlock) 1863 createTailCallBranchInstr(MBB); 1864 } 1865 } 1866 1867 // Early exit if no callee saved registers are modified! 1868 if (CSI.empty() && !needsFP(MF)) { 1869 addScavengingSpillSlot(MF, RS); 1870 return; 1871 } 1872 1873 unsigned MinGPR = PPC::R31; 1874 unsigned MinG8R = PPC::X31; 1875 unsigned MinFPR = PPC::F31; 1876 unsigned MinVR = Subtarget.hasSPE() ? PPC::S31 : PPC::V31; 1877 1878 bool HasGPSaveArea = false; 1879 bool HasG8SaveArea = false; 1880 bool HasFPSaveArea = false; 1881 bool HasVRSaveArea = false; 1882 1883 SmallVector<CalleeSavedInfo, 18> GPRegs; 1884 SmallVector<CalleeSavedInfo, 18> G8Regs; 1885 SmallVector<CalleeSavedInfo, 18> FPRegs; 1886 SmallVector<CalleeSavedInfo, 18> VRegs; 1887 1888 for (unsigned i = 0, e = CSI.size(); i != e; ++i) { 1889 unsigned Reg = CSI[i].getReg(); 1890 assert((!MF.getInfo<PPCFunctionInfo>()->mustSaveTOC() || 1891 (Reg != PPC::X2 && Reg != PPC::R2)) && 1892 "Not expecting to try to spill R2 in a function that must save TOC"); 1893 if (PPC::GPRCRegClass.contains(Reg)) { 1894 HasGPSaveArea = true; 1895 1896 GPRegs.push_back(CSI[i]); 1897 1898 if (Reg < MinGPR) { 1899 MinGPR = Reg; 1900 } 1901 } else if (PPC::G8RCRegClass.contains(Reg)) { 1902 HasG8SaveArea = true; 1903 1904 G8Regs.push_back(CSI[i]); 1905 1906 if (Reg < MinG8R) { 1907 MinG8R = Reg; 1908 } 1909 } else if (PPC::F8RCRegClass.contains(Reg)) { 1910 HasFPSaveArea = true; 1911 1912 FPRegs.push_back(CSI[i]); 1913 1914 if (Reg < MinFPR) { 1915 MinFPR = Reg; 1916 } 1917 } else if (PPC::CRBITRCRegClass.contains(Reg) || 1918 PPC::CRRCRegClass.contains(Reg)) { 1919 ; // do nothing, as we already know whether CRs are spilled 1920 } else if (PPC::VRRCRegClass.contains(Reg) || 1921 PPC::SPERCRegClass.contains(Reg)) { 1922 // Altivec and SPE are mutually exclusive, but have the same stack 1923 // alignment requirements, so overload the save area for both cases. 1924 HasVRSaveArea = true; 1925 1926 VRegs.push_back(CSI[i]); 1927 1928 if (Reg < MinVR) { 1929 MinVR = Reg; 1930 } 1931 } else { 1932 llvm_unreachable("Unknown RegisterClass!"); 1933 } 1934 } 1935 1936 PPCFunctionInfo *PFI = MF.getInfo<PPCFunctionInfo>(); 1937 const TargetRegisterInfo *TRI = Subtarget.getRegisterInfo(); 1938 1939 int64_t LowerBound = 0; 1940 1941 // Take into account stack space reserved for tail calls. 1942 int TCSPDelta = 0; 1943 if (MF.getTarget().Options.GuaranteedTailCallOpt && 1944 (TCSPDelta = PFI->getTailCallSPDelta()) < 0) { 1945 LowerBound = TCSPDelta; 1946 } 1947 1948 // The Floating-point register save area is right below the back chain word 1949 // of the previous stack frame. 1950 if (HasFPSaveArea) { 1951 for (unsigned i = 0, e = FPRegs.size(); i != e; ++i) { 1952 int FI = FPRegs[i].getFrameIdx(); 1953 1954 MFI.setObjectOffset(FI, LowerBound + MFI.getObjectOffset(FI)); 1955 } 1956 1957 LowerBound -= (31 - TRI->getEncodingValue(MinFPR) + 1) * 8; 1958 } 1959 1960 // Check whether the frame pointer register is allocated. If so, make sure it 1961 // is spilled to the correct offset. 1962 if (needsFP(MF)) { 1963 int FI = PFI->getFramePointerSaveIndex(); 1964 assert(FI && "No Frame Pointer Save Slot!"); 1965 MFI.setObjectOffset(FI, LowerBound + MFI.getObjectOffset(FI)); 1966 // FP is R31/X31, so no need to update MinGPR/MinG8R. 1967 HasGPSaveArea = true; 1968 } 1969 1970 if (PFI->usesPICBase()) { 1971 int FI = PFI->getPICBasePointerSaveIndex(); 1972 assert(FI && "No PIC Base Pointer Save Slot!"); 1973 MFI.setObjectOffset(FI, LowerBound + MFI.getObjectOffset(FI)); 1974 1975 MinGPR = std::min<unsigned>(MinGPR, PPC::R30); 1976 HasGPSaveArea = true; 1977 } 1978 1979 const PPCRegisterInfo *RegInfo = Subtarget.getRegisterInfo(); 1980 if (RegInfo->hasBasePointer(MF)) { 1981 int FI = PFI->getBasePointerSaveIndex(); 1982 assert(FI && "No Base Pointer Save Slot!"); 1983 MFI.setObjectOffset(FI, LowerBound + MFI.getObjectOffset(FI)); 1984 1985 Register BP = RegInfo->getBaseRegister(MF); 1986 if (PPC::G8RCRegClass.contains(BP)) { 1987 MinG8R = std::min<unsigned>(MinG8R, BP); 1988 HasG8SaveArea = true; 1989 } else if (PPC::GPRCRegClass.contains(BP)) { 1990 MinGPR = std::min<unsigned>(MinGPR, BP); 1991 HasGPSaveArea = true; 1992 } 1993 } 1994 1995 // General register save area starts right below the Floating-point 1996 // register save area. 1997 if (HasGPSaveArea || HasG8SaveArea) { 1998 // Move general register save area spill slots down, taking into account 1999 // the size of the Floating-point register save area. 2000 for (unsigned i = 0, e = GPRegs.size(); i != e; ++i) { 2001 if (!GPRegs[i].isSpilledToReg()) { 2002 int FI = GPRegs[i].getFrameIdx(); 2003 MFI.setObjectOffset(FI, LowerBound + MFI.getObjectOffset(FI)); 2004 } 2005 } 2006 2007 // Move general register save area spill slots down, taking into account 2008 // the size of the Floating-point register save area. 2009 for (unsigned i = 0, e = G8Regs.size(); i != e; ++i) { 2010 if (!G8Regs[i].isSpilledToReg()) { 2011 int FI = G8Regs[i].getFrameIdx(); 2012 MFI.setObjectOffset(FI, LowerBound + MFI.getObjectOffset(FI)); 2013 } 2014 } 2015 2016 unsigned MinReg = 2017 std::min<unsigned>(TRI->getEncodingValue(MinGPR), 2018 TRI->getEncodingValue(MinG8R)); 2019 2020 const unsigned GPRegSize = Subtarget.isPPC64() ? 8 : 4; 2021 LowerBound -= (31 - MinReg + 1) * GPRegSize; 2022 } 2023 2024 // For 32-bit only, the CR save area is below the general register 2025 // save area. For 64-bit SVR4, the CR save area is addressed relative 2026 // to the stack pointer and hence does not need an adjustment here. 2027 // Only CR2 (the first nonvolatile spilled) has an associated frame 2028 // index so that we have a single uniform save area. 2029 if (spillsCR(MF) && Subtarget.is32BitELFABI()) { 2030 // Adjust the frame index of the CR spill slot. 2031 for (const auto &CSInfo : CSI) { 2032 if (CSInfo.getReg() == PPC::CR2) { 2033 int FI = CSInfo.getFrameIdx(); 2034 MFI.setObjectOffset(FI, LowerBound + MFI.getObjectOffset(FI)); 2035 break; 2036 } 2037 } 2038 2039 LowerBound -= 4; // The CR save area is always 4 bytes long. 2040 } 2041 2042 // Both Altivec and SPE have the same alignment and padding requirements 2043 // within the stack frame. 2044 if (HasVRSaveArea) { 2045 // Insert alignment padding, we need 16-byte alignment. Note: for positive 2046 // number the alignment formula is : y = (x + (n-1)) & (~(n-1)). But since 2047 // we are using negative number here (the stack grows downward). We should 2048 // use formula : y = x & (~(n-1)). Where x is the size before aligning, n 2049 // is the alignment size ( n = 16 here) and y is the size after aligning. 2050 assert(LowerBound <= 0 && "Expect LowerBound have a non-positive value!"); 2051 LowerBound &= ~(15); 2052 2053 for (unsigned i = 0, e = VRegs.size(); i != e; ++i) { 2054 int FI = VRegs[i].getFrameIdx(); 2055 2056 MFI.setObjectOffset(FI, LowerBound + MFI.getObjectOffset(FI)); 2057 } 2058 } 2059 2060 addScavengingSpillSlot(MF, RS); 2061 } 2062 2063 void 2064 PPCFrameLowering::addScavengingSpillSlot(MachineFunction &MF, 2065 RegScavenger *RS) const { 2066 // Reserve a slot closest to SP or frame pointer if we have a dynalloc or 2067 // a large stack, which will require scavenging a register to materialize a 2068 // large offset. 2069 2070 // We need to have a scavenger spill slot for spills if the frame size is 2071 // large. In case there is no free register for large-offset addressing, 2072 // this slot is used for the necessary emergency spill. Also, we need the 2073 // slot for dynamic stack allocations. 2074 2075 // The scavenger might be invoked if the frame offset does not fit into 2076 // the 16-bit immediate. We don't know the complete frame size here 2077 // because we've not yet computed callee-saved register spills or the 2078 // needed alignment padding. 2079 unsigned StackSize = determineFrameLayout(MF, true); 2080 MachineFrameInfo &MFI = MF.getFrameInfo(); 2081 if (MFI.hasVarSizedObjects() || spillsCR(MF) || hasNonRISpills(MF) || 2082 (hasSpills(MF) && !isInt<16>(StackSize))) { 2083 const TargetRegisterClass &GPRC = PPC::GPRCRegClass; 2084 const TargetRegisterClass &G8RC = PPC::G8RCRegClass; 2085 const TargetRegisterClass &RC = Subtarget.isPPC64() ? G8RC : GPRC; 2086 const TargetRegisterInfo &TRI = *Subtarget.getRegisterInfo(); 2087 unsigned Size = TRI.getSpillSize(RC); 2088 Align Alignment = TRI.getSpillAlign(RC); 2089 RS->addScavengingFrameIndex(MFI.CreateStackObject(Size, Alignment, false)); 2090 2091 // Might we have over-aligned allocas? 2092 bool HasAlVars = 2093 MFI.hasVarSizedObjects() && MFI.getMaxAlign() > getStackAlign(); 2094 2095 // These kinds of spills might need two registers. 2096 if (spillsCR(MF) || HasAlVars) 2097 RS->addScavengingFrameIndex( 2098 MFI.CreateStackObject(Size, Alignment, false)); 2099 } 2100 } 2101 2102 // This function checks if a callee saved gpr can be spilled to a volatile 2103 // vector register. This occurs for leaf functions when the option 2104 // ppc-enable-pe-vector-spills is enabled. If there are any remaining registers 2105 // which were not spilled to vectors, return false so the target independent 2106 // code can handle them by assigning a FrameIdx to a stack slot. 2107 bool PPCFrameLowering::assignCalleeSavedSpillSlots( 2108 MachineFunction &MF, const TargetRegisterInfo *TRI, 2109 std::vector<CalleeSavedInfo> &CSI) const { 2110 2111 if (CSI.empty()) 2112 return true; // Early exit if no callee saved registers are modified! 2113 2114 // Early exit if cannot spill gprs to volatile vector registers. 2115 MachineFrameInfo &MFI = MF.getFrameInfo(); 2116 if (!EnablePEVectorSpills || MFI.hasCalls() || !Subtarget.hasP9Vector()) 2117 return false; 2118 2119 // Build a BitVector of VSRs that can be used for spilling GPRs. 2120 BitVector BVAllocatable = TRI->getAllocatableSet(MF); 2121 BitVector BVCalleeSaved(TRI->getNumRegs()); 2122 const PPCRegisterInfo *RegInfo = Subtarget.getRegisterInfo(); 2123 const MCPhysReg *CSRegs = RegInfo->getCalleeSavedRegs(&MF); 2124 for (unsigned i = 0; CSRegs[i]; ++i) 2125 BVCalleeSaved.set(CSRegs[i]); 2126 2127 for (unsigned Reg : BVAllocatable.set_bits()) { 2128 // Set to 0 if the register is not a volatile VF/F8 register, or if it is 2129 // used in the function. 2130 if (BVCalleeSaved[Reg] || 2131 (!PPC::F8RCRegClass.contains(Reg) && 2132 !PPC::VFRCRegClass.contains(Reg)) || 2133 (MF.getRegInfo().isPhysRegUsed(Reg))) 2134 BVAllocatable.reset(Reg); 2135 } 2136 2137 bool AllSpilledToReg = true; 2138 for (auto &CS : CSI) { 2139 if (BVAllocatable.none()) 2140 return false; 2141 2142 unsigned Reg = CS.getReg(); 2143 if (!PPC::G8RCRegClass.contains(Reg) && !PPC::GPRCRegClass.contains(Reg)) { 2144 AllSpilledToReg = false; 2145 continue; 2146 } 2147 2148 unsigned VolatileVFReg = BVAllocatable.find_first(); 2149 if (VolatileVFReg < BVAllocatable.size()) { 2150 CS.setDstReg(VolatileVFReg); 2151 BVAllocatable.reset(VolatileVFReg); 2152 } else { 2153 AllSpilledToReg = false; 2154 } 2155 } 2156 return AllSpilledToReg; 2157 } 2158 2159 bool PPCFrameLowering::spillCalleeSavedRegisters( 2160 MachineBasicBlock &MBB, MachineBasicBlock::iterator MI, 2161 ArrayRef<CalleeSavedInfo> CSI, const TargetRegisterInfo *TRI) const { 2162 2163 MachineFunction *MF = MBB.getParent(); 2164 const PPCInstrInfo &TII = *Subtarget.getInstrInfo(); 2165 PPCFunctionInfo *FI = MF->getInfo<PPCFunctionInfo>(); 2166 bool MustSaveTOC = FI->mustSaveTOC(); 2167 DebugLoc DL; 2168 bool CRSpilled = false; 2169 MachineInstrBuilder CRMIB; 2170 2171 for (unsigned i = 0, e = CSI.size(); i != e; ++i) { 2172 unsigned Reg = CSI[i].getReg(); 2173 2174 // CR2 through CR4 are the nonvolatile CR fields. 2175 bool IsCRField = PPC::CR2 <= Reg && Reg <= PPC::CR4; 2176 2177 // Add the callee-saved register as live-in; it's killed at the spill. 2178 // Do not do this for callee-saved registers that are live-in to the 2179 // function because they will already be marked live-in and this will be 2180 // adding it for a second time. It is an error to add the same register 2181 // to the set more than once. 2182 const MachineRegisterInfo &MRI = MF->getRegInfo(); 2183 bool IsLiveIn = MRI.isLiveIn(Reg); 2184 if (!IsLiveIn) 2185 MBB.addLiveIn(Reg); 2186 2187 if (CRSpilled && IsCRField) { 2188 CRMIB.addReg(Reg, RegState::ImplicitKill); 2189 continue; 2190 } 2191 2192 // The actual spill will happen in the prologue. 2193 if ((Reg == PPC::X2 || Reg == PPC::R2) && MustSaveTOC) 2194 continue; 2195 2196 // Insert the spill to the stack frame. 2197 if (IsCRField) { 2198 PPCFunctionInfo *FuncInfo = MF->getInfo<PPCFunctionInfo>(); 2199 if (!Subtarget.is32BitELFABI()) { 2200 // The actual spill will happen at the start of the prologue. 2201 FuncInfo->addMustSaveCR(Reg); 2202 } else { 2203 CRSpilled = true; 2204 FuncInfo->setSpillsCR(); 2205 2206 // 32-bit: FP-relative. Note that we made sure CR2-CR4 all have 2207 // the same frame index in PPCRegisterInfo::hasReservedSpillSlot. 2208 CRMIB = BuildMI(*MF, DL, TII.get(PPC::MFCR), PPC::R12) 2209 .addReg(Reg, RegState::ImplicitKill); 2210 2211 MBB.insert(MI, CRMIB); 2212 MBB.insert(MI, addFrameReference(BuildMI(*MF, DL, TII.get(PPC::STW)) 2213 .addReg(PPC::R12, 2214 getKillRegState(true)), 2215 CSI[i].getFrameIdx())); 2216 } 2217 } else { 2218 if (CSI[i].isSpilledToReg()) { 2219 NumPESpillVSR++; 2220 BuildMI(MBB, MI, DL, TII.get(PPC::MTVSRD), CSI[i].getDstReg()) 2221 .addReg(Reg, getKillRegState(true)); 2222 } else { 2223 const TargetRegisterClass *RC = TRI->getMinimalPhysRegClass(Reg); 2224 // Use !IsLiveIn for the kill flag. 2225 // We do not want to kill registers that are live in this function 2226 // before their use because they will become undefined registers. 2227 // Functions without NoUnwind need to preserve the order of elements in 2228 // saved vector registers. 2229 if (Subtarget.needsSwapsForVSXMemOps() && 2230 !MF->getFunction().hasFnAttribute(Attribute::NoUnwind)) 2231 TII.storeRegToStackSlotNoUpd(MBB, MI, Reg, !IsLiveIn, 2232 CSI[i].getFrameIdx(), RC, TRI); 2233 else 2234 TII.storeRegToStackSlot(MBB, MI, Reg, !IsLiveIn, CSI[i].getFrameIdx(), 2235 RC, TRI); 2236 } 2237 } 2238 } 2239 return true; 2240 } 2241 2242 static void restoreCRs(bool is31, bool CR2Spilled, bool CR3Spilled, 2243 bool CR4Spilled, MachineBasicBlock &MBB, 2244 MachineBasicBlock::iterator MI, 2245 ArrayRef<CalleeSavedInfo> CSI, unsigned CSIIndex) { 2246 2247 MachineFunction *MF = MBB.getParent(); 2248 const PPCInstrInfo &TII = *MF->getSubtarget<PPCSubtarget>().getInstrInfo(); 2249 DebugLoc DL; 2250 unsigned MoveReg = PPC::R12; 2251 2252 // 32-bit: FP-relative 2253 MBB.insert(MI, 2254 addFrameReference(BuildMI(*MF, DL, TII.get(PPC::LWZ), MoveReg), 2255 CSI[CSIIndex].getFrameIdx())); 2256 2257 unsigned RestoreOp = PPC::MTOCRF; 2258 if (CR2Spilled) 2259 MBB.insert(MI, BuildMI(*MF, DL, TII.get(RestoreOp), PPC::CR2) 2260 .addReg(MoveReg, getKillRegState(!CR3Spilled && !CR4Spilled))); 2261 2262 if (CR3Spilled) 2263 MBB.insert(MI, BuildMI(*MF, DL, TII.get(RestoreOp), PPC::CR3) 2264 .addReg(MoveReg, getKillRegState(!CR4Spilled))); 2265 2266 if (CR4Spilled) 2267 MBB.insert(MI, BuildMI(*MF, DL, TII.get(RestoreOp), PPC::CR4) 2268 .addReg(MoveReg, getKillRegState(true))); 2269 } 2270 2271 MachineBasicBlock::iterator PPCFrameLowering:: 2272 eliminateCallFramePseudoInstr(MachineFunction &MF, MachineBasicBlock &MBB, 2273 MachineBasicBlock::iterator I) const { 2274 const TargetInstrInfo &TII = *Subtarget.getInstrInfo(); 2275 if (MF.getTarget().Options.GuaranteedTailCallOpt && 2276 I->getOpcode() == PPC::ADJCALLSTACKUP) { 2277 // Add (actually subtract) back the amount the callee popped on return. 2278 if (int CalleeAmt = I->getOperand(1).getImm()) { 2279 bool is64Bit = Subtarget.isPPC64(); 2280 CalleeAmt *= -1; 2281 unsigned StackReg = is64Bit ? PPC::X1 : PPC::R1; 2282 unsigned TmpReg = is64Bit ? PPC::X0 : PPC::R0; 2283 unsigned ADDIInstr = is64Bit ? PPC::ADDI8 : PPC::ADDI; 2284 unsigned ADDInstr = is64Bit ? PPC::ADD8 : PPC::ADD4; 2285 unsigned LISInstr = is64Bit ? PPC::LIS8 : PPC::LIS; 2286 unsigned ORIInstr = is64Bit ? PPC::ORI8 : PPC::ORI; 2287 const DebugLoc &dl = I->getDebugLoc(); 2288 2289 if (isInt<16>(CalleeAmt)) { 2290 BuildMI(MBB, I, dl, TII.get(ADDIInstr), StackReg) 2291 .addReg(StackReg, RegState::Kill) 2292 .addImm(CalleeAmt); 2293 } else { 2294 MachineBasicBlock::iterator MBBI = I; 2295 BuildMI(MBB, MBBI, dl, TII.get(LISInstr), TmpReg) 2296 .addImm(CalleeAmt >> 16); 2297 BuildMI(MBB, MBBI, dl, TII.get(ORIInstr), TmpReg) 2298 .addReg(TmpReg, RegState::Kill) 2299 .addImm(CalleeAmt & 0xFFFF); 2300 BuildMI(MBB, MBBI, dl, TII.get(ADDInstr), StackReg) 2301 .addReg(StackReg, RegState::Kill) 2302 .addReg(TmpReg); 2303 } 2304 } 2305 } 2306 // Simply discard ADJCALLSTACKDOWN, ADJCALLSTACKUP instructions. 2307 return MBB.erase(I); 2308 } 2309 2310 static bool isCalleeSavedCR(unsigned Reg) { 2311 return PPC::CR2 == Reg || Reg == PPC::CR3 || Reg == PPC::CR4; 2312 } 2313 2314 bool PPCFrameLowering::restoreCalleeSavedRegisters( 2315 MachineBasicBlock &MBB, MachineBasicBlock::iterator MI, 2316 MutableArrayRef<CalleeSavedInfo> CSI, const TargetRegisterInfo *TRI) const { 2317 MachineFunction *MF = MBB.getParent(); 2318 const PPCInstrInfo &TII = *Subtarget.getInstrInfo(); 2319 PPCFunctionInfo *FI = MF->getInfo<PPCFunctionInfo>(); 2320 bool MustSaveTOC = FI->mustSaveTOC(); 2321 bool CR2Spilled = false; 2322 bool CR3Spilled = false; 2323 bool CR4Spilled = false; 2324 unsigned CSIIndex = 0; 2325 2326 // Initialize insertion-point logic; we will be restoring in reverse 2327 // order of spill. 2328 MachineBasicBlock::iterator I = MI, BeforeI = I; 2329 bool AtStart = I == MBB.begin(); 2330 2331 if (!AtStart) 2332 --BeforeI; 2333 2334 for (unsigned i = 0, e = CSI.size(); i != e; ++i) { 2335 unsigned Reg = CSI[i].getReg(); 2336 2337 if ((Reg == PPC::X2 || Reg == PPC::R2) && MustSaveTOC) 2338 continue; 2339 2340 // Restore of callee saved condition register field is handled during 2341 // epilogue insertion. 2342 if (isCalleeSavedCR(Reg) && !Subtarget.is32BitELFABI()) 2343 continue; 2344 2345 if (Reg == PPC::CR2) { 2346 CR2Spilled = true; 2347 // The spill slot is associated only with CR2, which is the 2348 // first nonvolatile spilled. Save it here. 2349 CSIIndex = i; 2350 continue; 2351 } else if (Reg == PPC::CR3) { 2352 CR3Spilled = true; 2353 continue; 2354 } else if (Reg == PPC::CR4) { 2355 CR4Spilled = true; 2356 continue; 2357 } else { 2358 // On 32-bit ELF when we first encounter a non-CR register after seeing at 2359 // least one CR register, restore all spilled CRs together. 2360 if (CR2Spilled || CR3Spilled || CR4Spilled) { 2361 bool is31 = needsFP(*MF); 2362 restoreCRs(is31, CR2Spilled, CR3Spilled, CR4Spilled, MBB, I, CSI, 2363 CSIIndex); 2364 CR2Spilled = CR3Spilled = CR4Spilled = false; 2365 } 2366 2367 if (CSI[i].isSpilledToReg()) { 2368 DebugLoc DL; 2369 NumPEReloadVSR++; 2370 BuildMI(MBB, I, DL, TII.get(PPC::MFVSRD), Reg) 2371 .addReg(CSI[i].getDstReg(), getKillRegState(true)); 2372 } else { 2373 // Default behavior for non-CR saves. 2374 const TargetRegisterClass *RC = TRI->getMinimalPhysRegClass(Reg); 2375 2376 // Functions without NoUnwind need to preserve the order of elements in 2377 // saved vector registers. 2378 if (Subtarget.needsSwapsForVSXMemOps() && 2379 !MF->getFunction().hasFnAttribute(Attribute::NoUnwind)) 2380 TII.loadRegFromStackSlotNoUpd(MBB, I, Reg, CSI[i].getFrameIdx(), RC, 2381 TRI); 2382 else 2383 TII.loadRegFromStackSlot(MBB, I, Reg, CSI[i].getFrameIdx(), RC, TRI); 2384 2385 assert(I != MBB.begin() && 2386 "loadRegFromStackSlot didn't insert any code!"); 2387 } 2388 } 2389 2390 // Insert in reverse order. 2391 if (AtStart) 2392 I = MBB.begin(); 2393 else { 2394 I = BeforeI; 2395 ++I; 2396 } 2397 } 2398 2399 // If we haven't yet spilled the CRs, do so now. 2400 if (CR2Spilled || CR3Spilled || CR4Spilled) { 2401 assert(Subtarget.is32BitELFABI() && 2402 "Only set CR[2|3|4]Spilled on 32-bit SVR4."); 2403 bool is31 = needsFP(*MF); 2404 restoreCRs(is31, CR2Spilled, CR3Spilled, CR4Spilled, MBB, I, CSI, CSIIndex); 2405 } 2406 2407 return true; 2408 } 2409 2410 unsigned PPCFrameLowering::getTOCSaveOffset() const { 2411 return TOCSaveOffset; 2412 } 2413 2414 unsigned PPCFrameLowering::getFramePointerSaveOffset() const { 2415 return FramePointerSaveOffset; 2416 } 2417 2418 unsigned PPCFrameLowering::getBasePointerSaveOffset() const { 2419 return BasePointerSaveOffset; 2420 } 2421 2422 bool PPCFrameLowering::enableShrinkWrapping(const MachineFunction &MF) const { 2423 if (MF.getInfo<PPCFunctionInfo>()->shrinkWrapDisabled()) 2424 return false; 2425 return (MF.getSubtarget<PPCSubtarget>().isSVR4ABI() && 2426 MF.getSubtarget<PPCSubtarget>().isPPC64()); 2427 } 2428