1 //===-- PPCFrameLowering.cpp - PPC Frame Information ----------------------===// 2 // 3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 // See https://llvm.org/LICENSE.txt for license information. 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 // 7 //===----------------------------------------------------------------------===// 8 // 9 // This file contains the PPC implementation of TargetFrameLowering class. 10 // 11 //===----------------------------------------------------------------------===// 12 13 #include "PPCFrameLowering.h" 14 #include "MCTargetDesc/PPCPredicates.h" 15 #include "PPCInstrBuilder.h" 16 #include "PPCInstrInfo.h" 17 #include "PPCMachineFunctionInfo.h" 18 #include "PPCSubtarget.h" 19 #include "PPCTargetMachine.h" 20 #include "llvm/ADT/Statistic.h" 21 #include "llvm/CodeGen/LivePhysRegs.h" 22 #include "llvm/CodeGen/MachineFrameInfo.h" 23 #include "llvm/CodeGen/MachineFunction.h" 24 #include "llvm/CodeGen/MachineInstrBuilder.h" 25 #include "llvm/CodeGen/MachineModuleInfo.h" 26 #include "llvm/CodeGen/MachineRegisterInfo.h" 27 #include "llvm/CodeGen/RegisterScavenging.h" 28 #include "llvm/IR/Function.h" 29 #include "llvm/Target/TargetOptions.h" 30 31 using namespace llvm; 32 33 #define DEBUG_TYPE "framelowering" 34 STATISTIC(NumPESpillVSR, "Number of spills to vector in prologue"); 35 STATISTIC(NumPEReloadVSR, "Number of reloads from vector in epilogue"); 36 STATISTIC(NumPrologProbed, "Number of prologues probed"); 37 38 static cl::opt<bool> 39 EnablePEVectorSpills("ppc-enable-pe-vector-spills", 40 cl::desc("Enable spills in prologue to vector registers."), 41 cl::init(false), cl::Hidden); 42 43 static unsigned computeReturnSaveOffset(const PPCSubtarget &STI) { 44 if (STI.isAIXABI()) 45 return STI.isPPC64() ? 16 : 8; 46 // SVR4 ABI: 47 return STI.isPPC64() ? 16 : 4; 48 } 49 50 static unsigned computeTOCSaveOffset(const PPCSubtarget &STI) { 51 if (STI.isAIXABI()) 52 return STI.isPPC64() ? 40 : 20; 53 return STI.isELFv2ABI() ? 24 : 40; 54 } 55 56 static unsigned computeFramePointerSaveOffset(const PPCSubtarget &STI) { 57 // First slot in the general register save area. 58 return STI.isPPC64() ? -8U : -4U; 59 } 60 61 static unsigned computeLinkageSize(const PPCSubtarget &STI) { 62 if (STI.isAIXABI() || STI.isPPC64()) 63 return (STI.isELFv2ABI() ? 4 : 6) * (STI.isPPC64() ? 8 : 4); 64 65 // 32-bit SVR4 ABI: 66 return 8; 67 } 68 69 static unsigned computeBasePointerSaveOffset(const PPCSubtarget &STI) { 70 // Third slot in the general purpose register save area. 71 if (STI.is32BitELFABI() && STI.getTargetMachine().isPositionIndependent()) 72 return -12U; 73 74 // Second slot in the general purpose register save area. 75 return STI.isPPC64() ? -16U : -8U; 76 } 77 78 static unsigned computeCRSaveOffset(const PPCSubtarget &STI) { 79 return (STI.isAIXABI() && !STI.isPPC64()) ? 4 : 8; 80 } 81 82 PPCFrameLowering::PPCFrameLowering(const PPCSubtarget &STI) 83 : TargetFrameLowering(TargetFrameLowering::StackGrowsDown, 84 STI.getPlatformStackAlignment(), 0), 85 Subtarget(STI), ReturnSaveOffset(computeReturnSaveOffset(Subtarget)), 86 TOCSaveOffset(computeTOCSaveOffset(Subtarget)), 87 FramePointerSaveOffset(computeFramePointerSaveOffset(Subtarget)), 88 LinkageSize(computeLinkageSize(Subtarget)), 89 BasePointerSaveOffset(computeBasePointerSaveOffset(Subtarget)), 90 CRSaveOffset(computeCRSaveOffset(Subtarget)) {} 91 92 // With the SVR4 ABI, callee-saved registers have fixed offsets on the stack. 93 const PPCFrameLowering::SpillSlot *PPCFrameLowering::getCalleeSavedSpillSlots( 94 unsigned &NumEntries) const { 95 96 // Floating-point register save area offsets. 97 #define CALLEE_SAVED_FPRS \ 98 {PPC::F31, -8}, \ 99 {PPC::F30, -16}, \ 100 {PPC::F29, -24}, \ 101 {PPC::F28, -32}, \ 102 {PPC::F27, -40}, \ 103 {PPC::F26, -48}, \ 104 {PPC::F25, -56}, \ 105 {PPC::F24, -64}, \ 106 {PPC::F23, -72}, \ 107 {PPC::F22, -80}, \ 108 {PPC::F21, -88}, \ 109 {PPC::F20, -96}, \ 110 {PPC::F19, -104}, \ 111 {PPC::F18, -112}, \ 112 {PPC::F17, -120}, \ 113 {PPC::F16, -128}, \ 114 {PPC::F15, -136}, \ 115 {PPC::F14, -144} 116 117 // 32-bit general purpose register save area offsets shared by ELF and 118 // AIX. AIX has an extra CSR with r13. 119 #define CALLEE_SAVED_GPRS32 \ 120 {PPC::R31, -4}, \ 121 {PPC::R30, -8}, \ 122 {PPC::R29, -12}, \ 123 {PPC::R28, -16}, \ 124 {PPC::R27, -20}, \ 125 {PPC::R26, -24}, \ 126 {PPC::R25, -28}, \ 127 {PPC::R24, -32}, \ 128 {PPC::R23, -36}, \ 129 {PPC::R22, -40}, \ 130 {PPC::R21, -44}, \ 131 {PPC::R20, -48}, \ 132 {PPC::R19, -52}, \ 133 {PPC::R18, -56}, \ 134 {PPC::R17, -60}, \ 135 {PPC::R16, -64}, \ 136 {PPC::R15, -68}, \ 137 {PPC::R14, -72} 138 139 // 64-bit general purpose register save area offsets. 140 #define CALLEE_SAVED_GPRS64 \ 141 {PPC::X31, -8}, \ 142 {PPC::X30, -16}, \ 143 {PPC::X29, -24}, \ 144 {PPC::X28, -32}, \ 145 {PPC::X27, -40}, \ 146 {PPC::X26, -48}, \ 147 {PPC::X25, -56}, \ 148 {PPC::X24, -64}, \ 149 {PPC::X23, -72}, \ 150 {PPC::X22, -80}, \ 151 {PPC::X21, -88}, \ 152 {PPC::X20, -96}, \ 153 {PPC::X19, -104}, \ 154 {PPC::X18, -112}, \ 155 {PPC::X17, -120}, \ 156 {PPC::X16, -128}, \ 157 {PPC::X15, -136}, \ 158 {PPC::X14, -144} 159 160 // Vector register save area offsets. 161 #define CALLEE_SAVED_VRS \ 162 {PPC::V31, -16}, \ 163 {PPC::V30, -32}, \ 164 {PPC::V29, -48}, \ 165 {PPC::V28, -64}, \ 166 {PPC::V27, -80}, \ 167 {PPC::V26, -96}, \ 168 {PPC::V25, -112}, \ 169 {PPC::V24, -128}, \ 170 {PPC::V23, -144}, \ 171 {PPC::V22, -160}, \ 172 {PPC::V21, -176}, \ 173 {PPC::V20, -192} 174 175 // Note that the offsets here overlap, but this is fixed up in 176 // processFunctionBeforeFrameFinalized. 177 178 static const SpillSlot ELFOffsets32[] = { 179 CALLEE_SAVED_FPRS, 180 CALLEE_SAVED_GPRS32, 181 182 // CR save area offset. We map each of the nonvolatile CR fields 183 // to the slot for CR2, which is the first of the nonvolatile CR 184 // fields to be assigned, so that we only allocate one save slot. 185 // See PPCRegisterInfo::hasReservedSpillSlot() for more information. 186 {PPC::CR2, -4}, 187 188 // VRSAVE save area offset. 189 {PPC::VRSAVE, -4}, 190 191 CALLEE_SAVED_VRS, 192 193 // SPE register save area (overlaps Vector save area). 194 {PPC::S31, -8}, 195 {PPC::S30, -16}, 196 {PPC::S29, -24}, 197 {PPC::S28, -32}, 198 {PPC::S27, -40}, 199 {PPC::S26, -48}, 200 {PPC::S25, -56}, 201 {PPC::S24, -64}, 202 {PPC::S23, -72}, 203 {PPC::S22, -80}, 204 {PPC::S21, -88}, 205 {PPC::S20, -96}, 206 {PPC::S19, -104}, 207 {PPC::S18, -112}, 208 {PPC::S17, -120}, 209 {PPC::S16, -128}, 210 {PPC::S15, -136}, 211 {PPC::S14, -144}}; 212 213 static const SpillSlot ELFOffsets64[] = { 214 CALLEE_SAVED_FPRS, 215 CALLEE_SAVED_GPRS64, 216 217 // VRSAVE save area offset. 218 {PPC::VRSAVE, -4}, 219 CALLEE_SAVED_VRS 220 }; 221 222 static const SpillSlot AIXOffsets32[] = {CALLEE_SAVED_FPRS, 223 CALLEE_SAVED_GPRS32, 224 // Add AIX's extra CSR. 225 {PPC::R13, -76}, 226 CALLEE_SAVED_VRS}; 227 228 static const SpillSlot AIXOffsets64[] = { 229 CALLEE_SAVED_FPRS, CALLEE_SAVED_GPRS64, CALLEE_SAVED_VRS}; 230 231 if (Subtarget.is64BitELFABI()) { 232 NumEntries = std::size(ELFOffsets64); 233 return ELFOffsets64; 234 } 235 236 if (Subtarget.is32BitELFABI()) { 237 NumEntries = std::size(ELFOffsets32); 238 return ELFOffsets32; 239 } 240 241 assert(Subtarget.isAIXABI() && "Unexpected ABI."); 242 243 if (Subtarget.isPPC64()) { 244 NumEntries = std::size(AIXOffsets64); 245 return AIXOffsets64; 246 } 247 248 NumEntries = std::size(AIXOffsets32); 249 return AIXOffsets32; 250 } 251 252 static bool spillsCR(const MachineFunction &MF) { 253 const PPCFunctionInfo *FuncInfo = MF.getInfo<PPCFunctionInfo>(); 254 return FuncInfo->isCRSpilled(); 255 } 256 257 static bool hasSpills(const MachineFunction &MF) { 258 const PPCFunctionInfo *FuncInfo = MF.getInfo<PPCFunctionInfo>(); 259 return FuncInfo->hasSpills(); 260 } 261 262 static bool hasNonRISpills(const MachineFunction &MF) { 263 const PPCFunctionInfo *FuncInfo = MF.getInfo<PPCFunctionInfo>(); 264 return FuncInfo->hasNonRISpills(); 265 } 266 267 /// MustSaveLR - Return true if this function requires that we save the LR 268 /// register onto the stack in the prolog and restore it in the epilog of the 269 /// function. 270 static bool MustSaveLR(const MachineFunction &MF, unsigned LR) { 271 const PPCFunctionInfo *MFI = MF.getInfo<PPCFunctionInfo>(); 272 273 // We need a save/restore of LR if there is any def of LR (which is 274 // defined by calls, including the PIC setup sequence), or if there is 275 // some use of the LR stack slot (e.g. for builtin_return_address). 276 // (LR comes in 32 and 64 bit versions.) 277 MachineRegisterInfo::def_iterator RI = MF.getRegInfo().def_begin(LR); 278 return RI !=MF.getRegInfo().def_end() || MFI->isLRStoreRequired(); 279 } 280 281 /// determineFrameLayoutAndUpdate - Determine the size of the frame and maximum 282 /// call frame size. Update the MachineFunction object with the stack size. 283 uint64_t 284 PPCFrameLowering::determineFrameLayoutAndUpdate(MachineFunction &MF, 285 bool UseEstimate) const { 286 unsigned NewMaxCallFrameSize = 0; 287 uint64_t FrameSize = determineFrameLayout(MF, UseEstimate, 288 &NewMaxCallFrameSize); 289 MF.getFrameInfo().setStackSize(FrameSize); 290 MF.getFrameInfo().setMaxCallFrameSize(NewMaxCallFrameSize); 291 return FrameSize; 292 } 293 294 /// determineFrameLayout - Determine the size of the frame and maximum call 295 /// frame size. 296 uint64_t 297 PPCFrameLowering::determineFrameLayout(const MachineFunction &MF, 298 bool UseEstimate, 299 unsigned *NewMaxCallFrameSize) const { 300 const MachineFrameInfo &MFI = MF.getFrameInfo(); 301 const PPCFunctionInfo *FI = MF.getInfo<PPCFunctionInfo>(); 302 303 // Get the number of bytes to allocate from the FrameInfo 304 uint64_t FrameSize = 305 UseEstimate ? MFI.estimateStackSize(MF) : MFI.getStackSize(); 306 307 // Get stack alignments. The frame must be aligned to the greatest of these: 308 Align TargetAlign = getStackAlign(); // alignment required per the ABI 309 Align MaxAlign = MFI.getMaxAlign(); // algmt required by data in frame 310 Align Alignment = std::max(TargetAlign, MaxAlign); 311 312 const PPCRegisterInfo *RegInfo = Subtarget.getRegisterInfo(); 313 314 unsigned LR = RegInfo->getRARegister(); 315 bool DisableRedZone = MF.getFunction().hasFnAttribute(Attribute::NoRedZone); 316 bool CanUseRedZone = !MFI.hasVarSizedObjects() && // No dynamic alloca. 317 !MFI.adjustsStack() && // No calls. 318 !MustSaveLR(MF, LR) && // No need to save LR. 319 !FI->mustSaveTOC() && // No need to save TOC. 320 !RegInfo->hasBasePointer(MF) && // No special alignment. 321 !MFI.isFrameAddressTaken(); 322 323 // Note: for PPC32 SVR4ABI, we can still generate stackless 324 // code if all local vars are reg-allocated. 325 bool FitsInRedZone = FrameSize <= Subtarget.getRedZoneSize(); 326 327 // Check whether we can skip adjusting the stack pointer (by using red zone) 328 if (!DisableRedZone && CanUseRedZone && FitsInRedZone) { 329 // No need for frame 330 return 0; 331 } 332 333 // Get the maximum call frame size of all the calls. 334 unsigned maxCallFrameSize = MFI.getMaxCallFrameSize(); 335 336 // Maximum call frame needs to be at least big enough for linkage area. 337 unsigned minCallFrameSize = getLinkageSize(); 338 maxCallFrameSize = std::max(maxCallFrameSize, minCallFrameSize); 339 340 // If we have dynamic alloca then maxCallFrameSize needs to be aligned so 341 // that allocations will be aligned. 342 if (MFI.hasVarSizedObjects()) 343 maxCallFrameSize = alignTo(maxCallFrameSize, Alignment); 344 345 // Update the new max call frame size if the caller passes in a valid pointer. 346 if (NewMaxCallFrameSize) 347 *NewMaxCallFrameSize = maxCallFrameSize; 348 349 // Include call frame size in total. 350 FrameSize += maxCallFrameSize; 351 352 // Make sure the frame is aligned. 353 FrameSize = alignTo(FrameSize, Alignment); 354 355 return FrameSize; 356 } 357 358 // hasFP - Return true if the specified function actually has a dedicated frame 359 // pointer register. 360 bool PPCFrameLowering::hasFP(const MachineFunction &MF) const { 361 const MachineFrameInfo &MFI = MF.getFrameInfo(); 362 // FIXME: This is pretty much broken by design: hasFP() might be called really 363 // early, before the stack layout was calculated and thus hasFP() might return 364 // true or false here depending on the time of call. 365 return (MFI.getStackSize()) && needsFP(MF); 366 } 367 368 // needsFP - Return true if the specified function should have a dedicated frame 369 // pointer register. This is true if the function has variable sized allocas or 370 // if frame pointer elimination is disabled. 371 bool PPCFrameLowering::needsFP(const MachineFunction &MF) const { 372 const MachineFrameInfo &MFI = MF.getFrameInfo(); 373 374 // Naked functions have no stack frame pushed, so we don't have a frame 375 // pointer. 376 if (MF.getFunction().hasFnAttribute(Attribute::Naked)) 377 return false; 378 379 return MF.getTarget().Options.DisableFramePointerElim(MF) || 380 MFI.hasVarSizedObjects() || MFI.hasStackMap() || MFI.hasPatchPoint() || 381 MF.exposesReturnsTwice() || 382 (MF.getTarget().Options.GuaranteedTailCallOpt && 383 MF.getInfo<PPCFunctionInfo>()->hasFastCall()); 384 } 385 386 void PPCFrameLowering::replaceFPWithRealFP(MachineFunction &MF) const { 387 // When there is dynamic alloca in this function, we can not use the frame 388 // pointer X31/R31 for the frameaddress lowering. In this case, only X1/R1 389 // always points to the backchain. 390 bool is31 = needsFP(MF) && !MF.getFrameInfo().hasVarSizedObjects(); 391 unsigned FPReg = is31 ? PPC::R31 : PPC::R1; 392 unsigned FP8Reg = is31 ? PPC::X31 : PPC::X1; 393 394 const PPCRegisterInfo *RegInfo = Subtarget.getRegisterInfo(); 395 bool HasBP = RegInfo->hasBasePointer(MF); 396 unsigned BPReg = HasBP ? (unsigned) RegInfo->getBaseRegister(MF) : FPReg; 397 unsigned BP8Reg = HasBP ? (unsigned) PPC::X30 : FP8Reg; 398 399 for (MachineBasicBlock &MBB : MF) 400 for (MachineBasicBlock::iterator MBBI = MBB.end(); MBBI != MBB.begin();) { 401 --MBBI; 402 for (MachineOperand &MO : MBBI->operands()) { 403 if (!MO.isReg()) 404 continue; 405 406 switch (MO.getReg()) { 407 case PPC::FP: 408 MO.setReg(FPReg); 409 break; 410 case PPC::FP8: 411 MO.setReg(FP8Reg); 412 break; 413 case PPC::BP: 414 MO.setReg(BPReg); 415 break; 416 case PPC::BP8: 417 MO.setReg(BP8Reg); 418 break; 419 420 } 421 } 422 } 423 } 424 425 /* This function will do the following: 426 - If MBB is an entry or exit block, set SR1 and SR2 to R0 and R12 427 respectively (defaults recommended by the ABI) and return true 428 - If MBB is not an entry block, initialize the register scavenger and look 429 for available registers. 430 - If the defaults (R0/R12) are available, return true 431 - If TwoUniqueRegsRequired is set to true, it looks for two unique 432 registers. Otherwise, look for a single available register. 433 - If the required registers are found, set SR1 and SR2 and return true. 434 - If the required registers are not found, set SR2 or both SR1 and SR2 to 435 PPC::NoRegister and return false. 436 437 Note that if both SR1 and SR2 are valid parameters and TwoUniqueRegsRequired 438 is not set, this function will attempt to find two different registers, but 439 still return true if only one register is available (and set SR1 == SR2). 440 */ 441 bool 442 PPCFrameLowering::findScratchRegister(MachineBasicBlock *MBB, 443 bool UseAtEnd, 444 bool TwoUniqueRegsRequired, 445 Register *SR1, 446 Register *SR2) const { 447 RegScavenger RS; 448 Register R0 = Subtarget.isPPC64() ? PPC::X0 : PPC::R0; 449 Register R12 = Subtarget.isPPC64() ? PPC::X12 : PPC::R12; 450 451 // Set the defaults for the two scratch registers. 452 if (SR1) 453 *SR1 = R0; 454 455 if (SR2) { 456 assert (SR1 && "Asking for the second scratch register but not the first?"); 457 *SR2 = R12; 458 } 459 460 // If MBB is an entry or exit block, use R0 and R12 as the scratch registers. 461 if ((UseAtEnd && MBB->isReturnBlock()) || 462 (!UseAtEnd && (&MBB->getParent()->front() == MBB))) 463 return true; 464 465 if (UseAtEnd) { 466 // The scratch register will be used before the first terminator (or at the 467 // end of the block if there are no terminators). 468 MachineBasicBlock::iterator MBBI = MBB->getFirstTerminator(); 469 if (MBBI == MBB->begin()) { 470 RS.enterBasicBlock(*MBB); 471 } else { 472 RS.enterBasicBlockEnd(*MBB); 473 RS.backward(MBBI); 474 } 475 } else { 476 // The scratch register will be used at the start of the block. 477 RS.enterBasicBlock(*MBB); 478 } 479 480 // If the two registers are available, we're all good. 481 // Note that we only return here if both R0 and R12 are available because 482 // although the function may not require two unique registers, it may benefit 483 // from having two so we should try to provide them. 484 if (!RS.isRegUsed(R0) && !RS.isRegUsed(R12)) 485 return true; 486 487 // Get the list of callee-saved registers for the target. 488 const PPCRegisterInfo *RegInfo = Subtarget.getRegisterInfo(); 489 const MCPhysReg *CSRegs = RegInfo->getCalleeSavedRegs(MBB->getParent()); 490 491 // Get all the available registers in the block. 492 BitVector BV = RS.getRegsAvailable(Subtarget.isPPC64() ? &PPC::G8RCRegClass : 493 &PPC::GPRCRegClass); 494 495 // We shouldn't use callee-saved registers as scratch registers as they may be 496 // available when looking for a candidate block for shrink wrapping but not 497 // available when the actual prologue/epilogue is being emitted because they 498 // were added as live-in to the prologue block by PrologueEpilogueInserter. 499 for (int i = 0; CSRegs[i]; ++i) 500 BV.reset(CSRegs[i]); 501 502 // Set the first scratch register to the first available one. 503 if (SR1) { 504 int FirstScratchReg = BV.find_first(); 505 *SR1 = FirstScratchReg == -1 ? (unsigned)PPC::NoRegister : FirstScratchReg; 506 } 507 508 // If there is another one available, set the second scratch register to that. 509 // Otherwise, set it to either PPC::NoRegister if this function requires two 510 // or to whatever SR1 is set to if this function doesn't require two. 511 if (SR2) { 512 int SecondScratchReg = BV.find_next(*SR1); 513 if (SecondScratchReg != -1) 514 *SR2 = SecondScratchReg; 515 else 516 *SR2 = TwoUniqueRegsRequired ? Register() : *SR1; 517 } 518 519 // Now that we've done our best to provide both registers, double check 520 // whether we were unable to provide enough. 521 if (BV.count() < (TwoUniqueRegsRequired ? 2U : 1U)) 522 return false; 523 524 return true; 525 } 526 527 // We need a scratch register for spilling LR and for spilling CR. By default, 528 // we use two scratch registers to hide latency. However, if only one scratch 529 // register is available, we can adjust for that by not overlapping the spill 530 // code. However, if we need to realign the stack (i.e. have a base pointer) 531 // and the stack frame is large, we need two scratch registers. 532 // Also, stack probe requires two scratch registers, one for old sp, one for 533 // large frame and large probe size. 534 bool 535 PPCFrameLowering::twoUniqueScratchRegsRequired(MachineBasicBlock *MBB) const { 536 const PPCRegisterInfo *RegInfo = Subtarget.getRegisterInfo(); 537 MachineFunction &MF = *(MBB->getParent()); 538 bool HasBP = RegInfo->hasBasePointer(MF); 539 unsigned FrameSize = determineFrameLayout(MF); 540 int NegFrameSize = -FrameSize; 541 bool IsLargeFrame = !isInt<16>(NegFrameSize); 542 MachineFrameInfo &MFI = MF.getFrameInfo(); 543 Align MaxAlign = MFI.getMaxAlign(); 544 bool HasRedZone = Subtarget.isPPC64() || !Subtarget.isSVR4ABI(); 545 const PPCTargetLowering &TLI = *Subtarget.getTargetLowering(); 546 547 return ((IsLargeFrame || !HasRedZone) && HasBP && MaxAlign > 1) || 548 TLI.hasInlineStackProbe(MF); 549 } 550 551 bool PPCFrameLowering::canUseAsPrologue(const MachineBasicBlock &MBB) const { 552 MachineBasicBlock *TmpMBB = const_cast<MachineBasicBlock *>(&MBB); 553 554 return findScratchRegister(TmpMBB, false, 555 twoUniqueScratchRegsRequired(TmpMBB)); 556 } 557 558 bool PPCFrameLowering::canUseAsEpilogue(const MachineBasicBlock &MBB) const { 559 MachineBasicBlock *TmpMBB = const_cast<MachineBasicBlock *>(&MBB); 560 561 return findScratchRegister(TmpMBB, true); 562 } 563 564 bool PPCFrameLowering::stackUpdateCanBeMoved(MachineFunction &MF) const { 565 const PPCRegisterInfo *RegInfo = Subtarget.getRegisterInfo(); 566 PPCFunctionInfo *FI = MF.getInfo<PPCFunctionInfo>(); 567 568 // Abort if there is no register info or function info. 569 if (!RegInfo || !FI) 570 return false; 571 572 // Only move the stack update on ELFv2 ABI and PPC64. 573 if (!Subtarget.isELFv2ABI() || !Subtarget.isPPC64()) 574 return false; 575 576 // Check the frame size first and return false if it does not fit the 577 // requirements. 578 // We need a non-zero frame size as well as a frame that will fit in the red 579 // zone. This is because by moving the stack pointer update we are now storing 580 // to the red zone until the stack pointer is updated. If we get an interrupt 581 // inside the prologue but before the stack update we now have a number of 582 // stores to the red zone and those stores must all fit. 583 MachineFrameInfo &MFI = MF.getFrameInfo(); 584 unsigned FrameSize = MFI.getStackSize(); 585 if (!FrameSize || FrameSize > Subtarget.getRedZoneSize()) 586 return false; 587 588 // Frame pointers and base pointers complicate matters so don't do anything 589 // if we have them. For example having a frame pointer will sometimes require 590 // a copy of r1 into r31 and that makes keeping track of updates to r1 more 591 // difficult. Similar situation exists with setjmp. 592 if (hasFP(MF) || RegInfo->hasBasePointer(MF) || MF.exposesReturnsTwice()) 593 return false; 594 595 // Calls to fast_cc functions use different rules for passing parameters on 596 // the stack from the ABI and using PIC base in the function imposes 597 // similar restrictions to using the base pointer. It is not generally safe 598 // to move the stack pointer update in these situations. 599 if (FI->hasFastCall() || FI->usesPICBase()) 600 return false; 601 602 // Finally we can move the stack update if we do not require register 603 // scavenging. Register scavenging can introduce more spills and so 604 // may make the frame size larger than we have computed. 605 return !RegInfo->requiresFrameIndexScavenging(MF); 606 } 607 608 void PPCFrameLowering::emitPrologue(MachineFunction &MF, 609 MachineBasicBlock &MBB) const { 610 MachineBasicBlock::iterator MBBI = MBB.begin(); 611 MachineFrameInfo &MFI = MF.getFrameInfo(); 612 const PPCInstrInfo &TII = *Subtarget.getInstrInfo(); 613 const PPCRegisterInfo *RegInfo = Subtarget.getRegisterInfo(); 614 const PPCTargetLowering &TLI = *Subtarget.getTargetLowering(); 615 616 MachineModuleInfo &MMI = MF.getMMI(); 617 const MCRegisterInfo *MRI = MMI.getContext().getRegisterInfo(); 618 DebugLoc dl; 619 // AIX assembler does not support cfi directives. 620 const bool needsCFI = MF.needsFrameMoves() && !Subtarget.isAIXABI(); 621 622 const bool HasFastMFLR = Subtarget.hasFastMFLR(); 623 624 // Get processor type. 625 bool isPPC64 = Subtarget.isPPC64(); 626 // Get the ABI. 627 bool isSVR4ABI = Subtarget.isSVR4ABI(); 628 bool isELFv2ABI = Subtarget.isELFv2ABI(); 629 assert((isSVR4ABI || Subtarget.isAIXABI()) && "Unsupported PPC ABI."); 630 631 // Work out frame sizes. 632 uint64_t FrameSize = determineFrameLayoutAndUpdate(MF); 633 int64_t NegFrameSize = -FrameSize; 634 if (!isPPC64 && (!isInt<32>(FrameSize) || !isInt<32>(NegFrameSize))) 635 llvm_unreachable("Unhandled stack size!"); 636 637 if (MFI.isFrameAddressTaken()) 638 replaceFPWithRealFP(MF); 639 640 // Check if the link register (LR) must be saved. 641 PPCFunctionInfo *FI = MF.getInfo<PPCFunctionInfo>(); 642 bool MustSaveLR = FI->mustSaveLR(); 643 bool MustSaveTOC = FI->mustSaveTOC(); 644 const SmallVectorImpl<Register> &MustSaveCRs = FI->getMustSaveCRs(); 645 bool MustSaveCR = !MustSaveCRs.empty(); 646 // Do we have a frame pointer and/or base pointer for this function? 647 bool HasFP = hasFP(MF); 648 bool HasBP = RegInfo->hasBasePointer(MF); 649 bool HasRedZone = isPPC64 || !isSVR4ABI; 650 bool HasROPProtect = Subtarget.hasROPProtect(); 651 bool HasPrivileged = Subtarget.hasPrivileged(); 652 653 Register SPReg = isPPC64 ? PPC::X1 : PPC::R1; 654 Register BPReg = RegInfo->getBaseRegister(MF); 655 Register FPReg = isPPC64 ? PPC::X31 : PPC::R31; 656 Register LRReg = isPPC64 ? PPC::LR8 : PPC::LR; 657 Register TOCReg = isPPC64 ? PPC::X2 : PPC::R2; 658 Register ScratchReg; 659 Register TempReg = isPPC64 ? PPC::X12 : PPC::R12; // another scratch reg 660 // ...(R12/X12 is volatile in both Darwin & SVR4, & can't be a function arg.) 661 const MCInstrDesc& MFLRInst = TII.get(isPPC64 ? PPC::MFLR8 662 : PPC::MFLR ); 663 const MCInstrDesc& StoreInst = TII.get(isPPC64 ? PPC::STD 664 : PPC::STW ); 665 const MCInstrDesc& StoreUpdtInst = TII.get(isPPC64 ? PPC::STDU 666 : PPC::STWU ); 667 const MCInstrDesc& StoreUpdtIdxInst = TII.get(isPPC64 ? PPC::STDUX 668 : PPC::STWUX); 669 const MCInstrDesc& OrInst = TII.get(isPPC64 ? PPC::OR8 670 : PPC::OR ); 671 const MCInstrDesc& SubtractCarryingInst = TII.get(isPPC64 ? PPC::SUBFC8 672 : PPC::SUBFC); 673 const MCInstrDesc& SubtractImmCarryingInst = TII.get(isPPC64 ? PPC::SUBFIC8 674 : PPC::SUBFIC); 675 const MCInstrDesc &MoveFromCondRegInst = TII.get(isPPC64 ? PPC::MFCR8 676 : PPC::MFCR); 677 const MCInstrDesc &StoreWordInst = TII.get(isPPC64 ? PPC::STW8 : PPC::STW); 678 const MCInstrDesc &HashST = 679 TII.get(isPPC64 ? (HasPrivileged ? PPC::HASHSTP8 : PPC::HASHST8) 680 : (HasPrivileged ? PPC::HASHSTP : PPC::HASHST)); 681 682 // Regarding this assert: Even though LR is saved in the caller's frame (i.e., 683 // LROffset is positive), that slot is callee-owned. Because PPC32 SVR4 has no 684 // Red Zone, an asynchronous event (a form of "callee") could claim a frame & 685 // overwrite it, so PPC32 SVR4 must claim at least a minimal frame to save LR. 686 assert((isPPC64 || !isSVR4ABI || !(!FrameSize && (MustSaveLR || HasFP))) && 687 "FrameSize must be >0 to save/restore the FP or LR for 32-bit SVR4."); 688 689 // Using the same bool variable as below to suppress compiler warnings. 690 bool SingleScratchReg = findScratchRegister( 691 &MBB, false, twoUniqueScratchRegsRequired(&MBB), &ScratchReg, &TempReg); 692 assert(SingleScratchReg && 693 "Required number of registers not available in this block"); 694 695 SingleScratchReg = ScratchReg == TempReg; 696 697 int64_t LROffset = getReturnSaveOffset(); 698 699 int64_t FPOffset = 0; 700 if (HasFP) { 701 MachineFrameInfo &MFI = MF.getFrameInfo(); 702 int FPIndex = FI->getFramePointerSaveIndex(); 703 assert(FPIndex && "No Frame Pointer Save Slot!"); 704 FPOffset = MFI.getObjectOffset(FPIndex); 705 } 706 707 int64_t BPOffset = 0; 708 if (HasBP) { 709 MachineFrameInfo &MFI = MF.getFrameInfo(); 710 int BPIndex = FI->getBasePointerSaveIndex(); 711 assert(BPIndex && "No Base Pointer Save Slot!"); 712 BPOffset = MFI.getObjectOffset(BPIndex); 713 } 714 715 int64_t PBPOffset = 0; 716 if (FI->usesPICBase()) { 717 MachineFrameInfo &MFI = MF.getFrameInfo(); 718 int PBPIndex = FI->getPICBasePointerSaveIndex(); 719 assert(PBPIndex && "No PIC Base Pointer Save Slot!"); 720 PBPOffset = MFI.getObjectOffset(PBPIndex); 721 } 722 723 // Get stack alignments. 724 Align MaxAlign = MFI.getMaxAlign(); 725 if (HasBP && MaxAlign > 1) 726 assert(Log2(MaxAlign) < 16 && "Invalid alignment!"); 727 728 // Frames of 32KB & larger require special handling because they cannot be 729 // indexed into with a simple STDU/STWU/STD/STW immediate offset operand. 730 bool isLargeFrame = !isInt<16>(NegFrameSize); 731 732 // Check if we can move the stack update instruction (stdu) down the prologue 733 // past the callee saves. Hopefully this will avoid the situation where the 734 // saves are waiting for the update on the store with update to complete. 735 MachineBasicBlock::iterator StackUpdateLoc = MBBI; 736 bool MovingStackUpdateDown = false; 737 738 // Check if we can move the stack update. 739 if (stackUpdateCanBeMoved(MF)) { 740 const std::vector<CalleeSavedInfo> &Info = MFI.getCalleeSavedInfo(); 741 for (CalleeSavedInfo CSI : Info) { 742 // If the callee saved register is spilled to a register instead of the 743 // stack then the spill no longer uses the stack pointer. 744 // This can lead to two consequences: 745 // 1) We no longer need to update the stack because the function does not 746 // spill any callee saved registers to stack. 747 // 2) We have a situation where we still have to update the stack pointer 748 // even though some registers are spilled to other registers. In 749 // this case the current code moves the stack update to an incorrect 750 // position. 751 // In either case we should abort moving the stack update operation. 752 if (CSI.isSpilledToReg()) { 753 StackUpdateLoc = MBBI; 754 MovingStackUpdateDown = false; 755 break; 756 } 757 758 int FrIdx = CSI.getFrameIdx(); 759 // If the frame index is not negative the callee saved info belongs to a 760 // stack object that is not a fixed stack object. We ignore non-fixed 761 // stack objects because we won't move the stack update pointer past them. 762 if (FrIdx >= 0) 763 continue; 764 765 if (MFI.isFixedObjectIndex(FrIdx) && MFI.getObjectOffset(FrIdx) < 0) { 766 StackUpdateLoc++; 767 MovingStackUpdateDown = true; 768 } else { 769 // We need all of the Frame Indices to meet these conditions. 770 // If they do not, abort the whole operation. 771 StackUpdateLoc = MBBI; 772 MovingStackUpdateDown = false; 773 break; 774 } 775 } 776 777 // If the operation was not aborted then update the object offset. 778 if (MovingStackUpdateDown) { 779 for (CalleeSavedInfo CSI : Info) { 780 int FrIdx = CSI.getFrameIdx(); 781 if (FrIdx < 0) 782 MFI.setObjectOffset(FrIdx, MFI.getObjectOffset(FrIdx) + NegFrameSize); 783 } 784 } 785 } 786 787 // Where in the prologue we move the CR fields depends on how many scratch 788 // registers we have, and if we need to save the link register or not. This 789 // lambda is to avoid duplicating the logic in 2 places. 790 auto BuildMoveFromCR = [&]() { 791 if (isELFv2ABI && MustSaveCRs.size() == 1) { 792 // In the ELFv2 ABI, we are not required to save all CR fields. 793 // If only one CR field is clobbered, it is more efficient to use 794 // mfocrf to selectively save just that field, because mfocrf has short 795 // latency compares to mfcr. 796 assert(isPPC64 && "V2 ABI is 64-bit only."); 797 MachineInstrBuilder MIB = 798 BuildMI(MBB, MBBI, dl, TII.get(PPC::MFOCRF8), TempReg); 799 MIB.addReg(MustSaveCRs[0], RegState::Kill); 800 } else { 801 MachineInstrBuilder MIB = 802 BuildMI(MBB, MBBI, dl, MoveFromCondRegInst, TempReg); 803 for (unsigned CRfield : MustSaveCRs) 804 MIB.addReg(CRfield, RegState::ImplicitKill); 805 } 806 }; 807 808 // If we need to spill the CR and the LR but we don't have two separate 809 // registers available, we must spill them one at a time 810 if (MustSaveCR && SingleScratchReg && MustSaveLR) { 811 BuildMoveFromCR(); 812 BuildMI(MBB, MBBI, dl, StoreWordInst) 813 .addReg(TempReg, getKillRegState(true)) 814 .addImm(CRSaveOffset) 815 .addReg(SPReg); 816 } 817 818 if (MustSaveLR) 819 BuildMI(MBB, MBBI, dl, MFLRInst, ScratchReg); 820 821 if (MustSaveCR && !(SingleScratchReg && MustSaveLR)) 822 BuildMoveFromCR(); 823 824 if (HasRedZone) { 825 if (HasFP) 826 BuildMI(MBB, MBBI, dl, StoreInst) 827 .addReg(FPReg) 828 .addImm(FPOffset) 829 .addReg(SPReg); 830 if (FI->usesPICBase()) 831 BuildMI(MBB, MBBI, dl, StoreInst) 832 .addReg(PPC::R30) 833 .addImm(PBPOffset) 834 .addReg(SPReg); 835 if (HasBP) 836 BuildMI(MBB, MBBI, dl, StoreInst) 837 .addReg(BPReg) 838 .addImm(BPOffset) 839 .addReg(SPReg); 840 } 841 842 // Generate the instruction to store the LR. In the case where ROP protection 843 // is required the register holding the LR should not be killed as it will be 844 // used by the hash store instruction. 845 auto SaveLR = [&](int64_t Offset) { 846 assert(MustSaveLR && "LR is not required to be saved!"); 847 BuildMI(MBB, StackUpdateLoc, dl, StoreInst) 848 .addReg(ScratchReg, getKillRegState(!HasROPProtect)) 849 .addImm(Offset) 850 .addReg(SPReg); 851 852 // Add the ROP protection Hash Store instruction. 853 // NOTE: This is technically a violation of the ABI. The hash can be saved 854 // up to 512 bytes into the Protected Zone. This can be outside of the 855 // initial 288 byte volatile program storage region in the Protected Zone. 856 // However, this restriction will be removed in an upcoming revision of the 857 // ABI. 858 if (HasROPProtect) { 859 const int SaveIndex = FI->getROPProtectionHashSaveIndex(); 860 const int64_t ImmOffset = MFI.getObjectOffset(SaveIndex); 861 assert((ImmOffset <= -8 && ImmOffset >= -512) && 862 "ROP hash save offset out of range."); 863 assert(((ImmOffset & 0x7) == 0) && 864 "ROP hash save offset must be 8 byte aligned."); 865 BuildMI(MBB, StackUpdateLoc, dl, HashST) 866 .addReg(ScratchReg, getKillRegState(true)) 867 .addImm(ImmOffset) 868 .addReg(SPReg); 869 } 870 }; 871 872 if (MustSaveLR && HasFastMFLR) 873 SaveLR(LROffset); 874 875 if (MustSaveCR && 876 !(SingleScratchReg && MustSaveLR)) { 877 assert(HasRedZone && "A red zone is always available on PPC64"); 878 BuildMI(MBB, MBBI, dl, StoreWordInst) 879 .addReg(TempReg, getKillRegState(true)) 880 .addImm(CRSaveOffset) 881 .addReg(SPReg); 882 } 883 884 // Skip the rest if this is a leaf function & all spills fit in the Red Zone. 885 if (!FrameSize) { 886 if (MustSaveLR && !HasFastMFLR) 887 SaveLR(LROffset); 888 return; 889 } 890 891 // Adjust stack pointer: r1 += NegFrameSize. 892 // If there is a preferred stack alignment, align R1 now 893 894 if (HasBP && HasRedZone) { 895 // Save a copy of r1 as the base pointer. 896 BuildMI(MBB, MBBI, dl, OrInst, BPReg) 897 .addReg(SPReg) 898 .addReg(SPReg); 899 } 900 901 // Have we generated a STUX instruction to claim stack frame? If so, 902 // the negated frame size will be placed in ScratchReg. 903 bool HasSTUX = 904 (TLI.hasInlineStackProbe(MF) && FrameSize > TLI.getStackProbeSize(MF)) || 905 (HasBP && MaxAlign > 1) || isLargeFrame; 906 907 // If we use STUX to update the stack pointer, we need the two scratch 908 // registers TempReg and ScratchReg, we have to save LR here which is stored 909 // in ScratchReg. 910 // If the offset can not be encoded into the store instruction, we also have 911 // to save LR here. 912 if (MustSaveLR && !HasFastMFLR && 913 (HasSTUX || !isInt<16>(FrameSize + LROffset))) 914 SaveLR(LROffset); 915 916 // If FrameSize <= TLI.getStackProbeSize(MF), as POWER ABI requires backchain 917 // pointer is always stored at SP, we will get a free probe due to an essential 918 // STU(X) instruction. 919 if (TLI.hasInlineStackProbe(MF) && FrameSize > TLI.getStackProbeSize(MF)) { 920 // To be consistent with other targets, a pseudo instruction is emitted and 921 // will be later expanded in `inlineStackProbe`. 922 BuildMI(MBB, MBBI, dl, 923 TII.get(isPPC64 ? PPC::PROBED_STACKALLOC_64 924 : PPC::PROBED_STACKALLOC_32)) 925 .addDef(TempReg) 926 .addDef(ScratchReg) // ScratchReg stores the old sp. 927 .addImm(NegFrameSize); 928 // FIXME: HasSTUX is only read if HasRedZone is not set, in such case, we 929 // update the ScratchReg to meet the assumption that ScratchReg contains 930 // the NegFrameSize. This solution is rather tricky. 931 if (!HasRedZone) { 932 BuildMI(MBB, MBBI, dl, TII.get(PPC::SUBF), ScratchReg) 933 .addReg(ScratchReg) 934 .addReg(SPReg); 935 } 936 } else { 937 // This condition must be kept in sync with canUseAsPrologue. 938 if (HasBP && MaxAlign > 1) { 939 if (isPPC64) 940 BuildMI(MBB, MBBI, dl, TII.get(PPC::RLDICL), ScratchReg) 941 .addReg(SPReg) 942 .addImm(0) 943 .addImm(64 - Log2(MaxAlign)); 944 else // PPC32... 945 BuildMI(MBB, MBBI, dl, TII.get(PPC::RLWINM), ScratchReg) 946 .addReg(SPReg) 947 .addImm(0) 948 .addImm(32 - Log2(MaxAlign)) 949 .addImm(31); 950 if (!isLargeFrame) { 951 BuildMI(MBB, MBBI, dl, SubtractImmCarryingInst, ScratchReg) 952 .addReg(ScratchReg, RegState::Kill) 953 .addImm(NegFrameSize); 954 } else { 955 assert(!SingleScratchReg && "Only a single scratch reg available"); 956 TII.materializeImmPostRA(MBB, MBBI, dl, TempReg, NegFrameSize); 957 BuildMI(MBB, MBBI, dl, SubtractCarryingInst, ScratchReg) 958 .addReg(ScratchReg, RegState::Kill) 959 .addReg(TempReg, RegState::Kill); 960 } 961 962 BuildMI(MBB, MBBI, dl, StoreUpdtIdxInst, SPReg) 963 .addReg(SPReg, RegState::Kill) 964 .addReg(SPReg) 965 .addReg(ScratchReg); 966 } else if (!isLargeFrame) { 967 BuildMI(MBB, StackUpdateLoc, dl, StoreUpdtInst, SPReg) 968 .addReg(SPReg) 969 .addImm(NegFrameSize) 970 .addReg(SPReg); 971 } else { 972 TII.materializeImmPostRA(MBB, MBBI, dl, ScratchReg, NegFrameSize); 973 BuildMI(MBB, MBBI, dl, StoreUpdtIdxInst, SPReg) 974 .addReg(SPReg, RegState::Kill) 975 .addReg(SPReg) 976 .addReg(ScratchReg); 977 } 978 } 979 980 // Save the TOC register after the stack pointer update if a prologue TOC 981 // save is required for the function. 982 if (MustSaveTOC) { 983 assert(isELFv2ABI && "TOC saves in the prologue only supported on ELFv2"); 984 BuildMI(MBB, StackUpdateLoc, dl, TII.get(PPC::STD)) 985 .addReg(TOCReg, getKillRegState(true)) 986 .addImm(TOCSaveOffset) 987 .addReg(SPReg); 988 } 989 990 if (!HasRedZone) { 991 assert(!isPPC64 && "A red zone is always available on PPC64"); 992 if (HasSTUX) { 993 // The negated frame size is in ScratchReg, and the SPReg has been 994 // decremented by the frame size: SPReg = old SPReg + ScratchReg. 995 // Since FPOffset, PBPOffset, etc. are relative to the beginning of 996 // the stack frame (i.e. the old SP), ideally, we would put the old 997 // SP into a register and use it as the base for the stores. The 998 // problem is that the only available register may be ScratchReg, 999 // which could be R0, and R0 cannot be used as a base address. 1000 1001 // First, set ScratchReg to the old SP. This may need to be modified 1002 // later. 1003 BuildMI(MBB, MBBI, dl, TII.get(PPC::SUBF), ScratchReg) 1004 .addReg(ScratchReg, RegState::Kill) 1005 .addReg(SPReg); 1006 1007 if (ScratchReg == PPC::R0) { 1008 // R0 cannot be used as a base register, but it can be used as an 1009 // index in a store-indexed. 1010 int LastOffset = 0; 1011 if (HasFP) { 1012 // R0 += (FPOffset-LastOffset). 1013 // Need addic, since addi treats R0 as 0. 1014 BuildMI(MBB, MBBI, dl, TII.get(PPC::ADDIC), ScratchReg) 1015 .addReg(ScratchReg) 1016 .addImm(FPOffset-LastOffset); 1017 LastOffset = FPOffset; 1018 // Store FP into *R0. 1019 BuildMI(MBB, MBBI, dl, TII.get(PPC::STWX)) 1020 .addReg(FPReg, RegState::Kill) // Save FP. 1021 .addReg(PPC::ZERO) 1022 .addReg(ScratchReg); // This will be the index (R0 is ok here). 1023 } 1024 if (FI->usesPICBase()) { 1025 // R0 += (PBPOffset-LastOffset). 1026 BuildMI(MBB, MBBI, dl, TII.get(PPC::ADDIC), ScratchReg) 1027 .addReg(ScratchReg) 1028 .addImm(PBPOffset-LastOffset); 1029 LastOffset = PBPOffset; 1030 BuildMI(MBB, MBBI, dl, TII.get(PPC::STWX)) 1031 .addReg(PPC::R30, RegState::Kill) // Save PIC base pointer. 1032 .addReg(PPC::ZERO) 1033 .addReg(ScratchReg); // This will be the index (R0 is ok here). 1034 } 1035 if (HasBP) { 1036 // R0 += (BPOffset-LastOffset). 1037 BuildMI(MBB, MBBI, dl, TII.get(PPC::ADDIC), ScratchReg) 1038 .addReg(ScratchReg) 1039 .addImm(BPOffset-LastOffset); 1040 LastOffset = BPOffset; 1041 BuildMI(MBB, MBBI, dl, TII.get(PPC::STWX)) 1042 .addReg(BPReg, RegState::Kill) // Save BP. 1043 .addReg(PPC::ZERO) 1044 .addReg(ScratchReg); // This will be the index (R0 is ok here). 1045 // BP = R0-LastOffset 1046 BuildMI(MBB, MBBI, dl, TII.get(PPC::ADDIC), BPReg) 1047 .addReg(ScratchReg, RegState::Kill) 1048 .addImm(-LastOffset); 1049 } 1050 } else { 1051 // ScratchReg is not R0, so use it as the base register. It is 1052 // already set to the old SP, so we can use the offsets directly. 1053 1054 // Now that the stack frame has been allocated, save all the necessary 1055 // registers using ScratchReg as the base address. 1056 if (HasFP) 1057 BuildMI(MBB, MBBI, dl, StoreInst) 1058 .addReg(FPReg) 1059 .addImm(FPOffset) 1060 .addReg(ScratchReg); 1061 if (FI->usesPICBase()) 1062 BuildMI(MBB, MBBI, dl, StoreInst) 1063 .addReg(PPC::R30) 1064 .addImm(PBPOffset) 1065 .addReg(ScratchReg); 1066 if (HasBP) { 1067 BuildMI(MBB, MBBI, dl, StoreInst) 1068 .addReg(BPReg) 1069 .addImm(BPOffset) 1070 .addReg(ScratchReg); 1071 BuildMI(MBB, MBBI, dl, OrInst, BPReg) 1072 .addReg(ScratchReg, RegState::Kill) 1073 .addReg(ScratchReg); 1074 } 1075 } 1076 } else { 1077 // The frame size is a known 16-bit constant (fitting in the immediate 1078 // field of STWU). To be here we have to be compiling for PPC32. 1079 // Since the SPReg has been decreased by FrameSize, add it back to each 1080 // offset. 1081 if (HasFP) 1082 BuildMI(MBB, MBBI, dl, StoreInst) 1083 .addReg(FPReg) 1084 .addImm(FrameSize + FPOffset) 1085 .addReg(SPReg); 1086 if (FI->usesPICBase()) 1087 BuildMI(MBB, MBBI, dl, StoreInst) 1088 .addReg(PPC::R30) 1089 .addImm(FrameSize + PBPOffset) 1090 .addReg(SPReg); 1091 if (HasBP) { 1092 BuildMI(MBB, MBBI, dl, StoreInst) 1093 .addReg(BPReg) 1094 .addImm(FrameSize + BPOffset) 1095 .addReg(SPReg); 1096 BuildMI(MBB, MBBI, dl, TII.get(PPC::ADDI), BPReg) 1097 .addReg(SPReg) 1098 .addImm(FrameSize); 1099 } 1100 } 1101 } 1102 1103 // Save the LR now. 1104 if (!HasSTUX && MustSaveLR && !HasFastMFLR && isInt<16>(FrameSize + LROffset)) 1105 SaveLR(LROffset + FrameSize); 1106 1107 // Add Call Frame Information for the instructions we generated above. 1108 if (needsCFI) { 1109 unsigned CFIIndex; 1110 1111 if (HasBP) { 1112 // Define CFA in terms of BP. Do this in preference to using FP/SP, 1113 // because if the stack needed aligning then CFA won't be at a fixed 1114 // offset from FP/SP. 1115 unsigned Reg = MRI->getDwarfRegNum(BPReg, true); 1116 CFIIndex = MF.addFrameInst( 1117 MCCFIInstruction::createDefCfaRegister(nullptr, Reg)); 1118 } else { 1119 // Adjust the definition of CFA to account for the change in SP. 1120 assert(NegFrameSize); 1121 CFIIndex = MF.addFrameInst( 1122 MCCFIInstruction::cfiDefCfaOffset(nullptr, -NegFrameSize)); 1123 } 1124 BuildMI(MBB, MBBI, dl, TII.get(TargetOpcode::CFI_INSTRUCTION)) 1125 .addCFIIndex(CFIIndex); 1126 1127 if (HasFP) { 1128 // Describe where FP was saved, at a fixed offset from CFA. 1129 unsigned Reg = MRI->getDwarfRegNum(FPReg, true); 1130 CFIIndex = MF.addFrameInst( 1131 MCCFIInstruction::createOffset(nullptr, Reg, FPOffset)); 1132 BuildMI(MBB, MBBI, dl, TII.get(TargetOpcode::CFI_INSTRUCTION)) 1133 .addCFIIndex(CFIIndex); 1134 } 1135 1136 if (FI->usesPICBase()) { 1137 // Describe where FP was saved, at a fixed offset from CFA. 1138 unsigned Reg = MRI->getDwarfRegNum(PPC::R30, true); 1139 CFIIndex = MF.addFrameInst( 1140 MCCFIInstruction::createOffset(nullptr, Reg, PBPOffset)); 1141 BuildMI(MBB, MBBI, dl, TII.get(TargetOpcode::CFI_INSTRUCTION)) 1142 .addCFIIndex(CFIIndex); 1143 } 1144 1145 if (HasBP) { 1146 // Describe where BP was saved, at a fixed offset from CFA. 1147 unsigned Reg = MRI->getDwarfRegNum(BPReg, true); 1148 CFIIndex = MF.addFrameInst( 1149 MCCFIInstruction::createOffset(nullptr, Reg, BPOffset)); 1150 BuildMI(MBB, MBBI, dl, TII.get(TargetOpcode::CFI_INSTRUCTION)) 1151 .addCFIIndex(CFIIndex); 1152 } 1153 1154 if (MustSaveLR) { 1155 // Describe where LR was saved, at a fixed offset from CFA. 1156 unsigned Reg = MRI->getDwarfRegNum(LRReg, true); 1157 CFIIndex = MF.addFrameInst( 1158 MCCFIInstruction::createOffset(nullptr, Reg, LROffset)); 1159 BuildMI(MBB, MBBI, dl, TII.get(TargetOpcode::CFI_INSTRUCTION)) 1160 .addCFIIndex(CFIIndex); 1161 } 1162 } 1163 1164 // If there is a frame pointer, copy R1 into R31 1165 if (HasFP) { 1166 BuildMI(MBB, MBBI, dl, OrInst, FPReg) 1167 .addReg(SPReg) 1168 .addReg(SPReg); 1169 1170 if (!HasBP && needsCFI) { 1171 // Change the definition of CFA from SP+offset to FP+offset, because SP 1172 // will change at every alloca. 1173 unsigned Reg = MRI->getDwarfRegNum(FPReg, true); 1174 unsigned CFIIndex = MF.addFrameInst( 1175 MCCFIInstruction::createDefCfaRegister(nullptr, Reg)); 1176 1177 BuildMI(MBB, MBBI, dl, TII.get(TargetOpcode::CFI_INSTRUCTION)) 1178 .addCFIIndex(CFIIndex); 1179 } 1180 } 1181 1182 if (needsCFI) { 1183 // Describe where callee saved registers were saved, at fixed offsets from 1184 // CFA. 1185 const std::vector<CalleeSavedInfo> &CSI = MFI.getCalleeSavedInfo(); 1186 for (const CalleeSavedInfo &I : CSI) { 1187 Register Reg = I.getReg(); 1188 if (Reg == PPC::LR || Reg == PPC::LR8 || Reg == PPC::RM) continue; 1189 1190 // This is a bit of a hack: CR2LT, CR2GT, CR2EQ and CR2UN are just 1191 // subregisters of CR2. We just need to emit a move of CR2. 1192 if (PPC::CRBITRCRegClass.contains(Reg)) 1193 continue; 1194 1195 if ((Reg == PPC::X2 || Reg == PPC::R2) && MustSaveTOC) 1196 continue; 1197 1198 // For 64-bit SVR4 when we have spilled CRs, the spill location 1199 // is SP+8, not a frame-relative slot. 1200 if (isSVR4ABI && isPPC64 && (PPC::CR2 <= Reg && Reg <= PPC::CR4)) { 1201 // In the ELFv1 ABI, only CR2 is noted in CFI and stands in for 1202 // the whole CR word. In the ELFv2 ABI, every CR that was 1203 // actually saved gets its own CFI record. 1204 Register CRReg = isELFv2ABI? Reg : PPC::CR2; 1205 unsigned CFIIndex = MF.addFrameInst(MCCFIInstruction::createOffset( 1206 nullptr, MRI->getDwarfRegNum(CRReg, true), CRSaveOffset)); 1207 BuildMI(MBB, MBBI, dl, TII.get(TargetOpcode::CFI_INSTRUCTION)) 1208 .addCFIIndex(CFIIndex); 1209 continue; 1210 } 1211 1212 if (I.isSpilledToReg()) { 1213 unsigned SpilledReg = I.getDstReg(); 1214 unsigned CFIRegister = MF.addFrameInst(MCCFIInstruction::createRegister( 1215 nullptr, MRI->getDwarfRegNum(Reg, true), 1216 MRI->getDwarfRegNum(SpilledReg, true))); 1217 BuildMI(MBB, MBBI, dl, TII.get(TargetOpcode::CFI_INSTRUCTION)) 1218 .addCFIIndex(CFIRegister); 1219 } else { 1220 int64_t Offset = MFI.getObjectOffset(I.getFrameIdx()); 1221 // We have changed the object offset above but we do not want to change 1222 // the actual offsets in the CFI instruction so we have to undo the 1223 // offset change here. 1224 if (MovingStackUpdateDown) 1225 Offset -= NegFrameSize; 1226 1227 unsigned CFIIndex = MF.addFrameInst(MCCFIInstruction::createOffset( 1228 nullptr, MRI->getDwarfRegNum(Reg, true), Offset)); 1229 BuildMI(MBB, MBBI, dl, TII.get(TargetOpcode::CFI_INSTRUCTION)) 1230 .addCFIIndex(CFIIndex); 1231 } 1232 } 1233 } 1234 } 1235 1236 void PPCFrameLowering::inlineStackProbe(MachineFunction &MF, 1237 MachineBasicBlock &PrologMBB) const { 1238 bool isPPC64 = Subtarget.isPPC64(); 1239 const PPCTargetLowering &TLI = *Subtarget.getTargetLowering(); 1240 const PPCInstrInfo &TII = *Subtarget.getInstrInfo(); 1241 MachineFrameInfo &MFI = MF.getFrameInfo(); 1242 MachineModuleInfo &MMI = MF.getMMI(); 1243 const MCRegisterInfo *MRI = MMI.getContext().getRegisterInfo(); 1244 // AIX assembler does not support cfi directives. 1245 const bool needsCFI = MF.needsFrameMoves() && !Subtarget.isAIXABI(); 1246 auto StackAllocMIPos = llvm::find_if(PrologMBB, [](MachineInstr &MI) { 1247 int Opc = MI.getOpcode(); 1248 return Opc == PPC::PROBED_STACKALLOC_64 || Opc == PPC::PROBED_STACKALLOC_32; 1249 }); 1250 if (StackAllocMIPos == PrologMBB.end()) 1251 return; 1252 const BasicBlock *ProbedBB = PrologMBB.getBasicBlock(); 1253 MachineBasicBlock *CurrentMBB = &PrologMBB; 1254 DebugLoc DL = PrologMBB.findDebugLoc(StackAllocMIPos); 1255 MachineInstr &MI = *StackAllocMIPos; 1256 int64_t NegFrameSize = MI.getOperand(2).getImm(); 1257 unsigned ProbeSize = TLI.getStackProbeSize(MF); 1258 int64_t NegProbeSize = -(int64_t)ProbeSize; 1259 assert(isInt<32>(NegProbeSize) && "Unhandled probe size"); 1260 int64_t NumBlocks = NegFrameSize / NegProbeSize; 1261 int64_t NegResidualSize = NegFrameSize % NegProbeSize; 1262 Register SPReg = isPPC64 ? PPC::X1 : PPC::R1; 1263 Register ScratchReg = MI.getOperand(0).getReg(); 1264 Register FPReg = MI.getOperand(1).getReg(); 1265 const PPCRegisterInfo *RegInfo = Subtarget.getRegisterInfo(); 1266 bool HasBP = RegInfo->hasBasePointer(MF); 1267 Register BPReg = RegInfo->getBaseRegister(MF); 1268 Align MaxAlign = MFI.getMaxAlign(); 1269 bool HasRedZone = Subtarget.isPPC64() || !Subtarget.isSVR4ABI(); 1270 const MCInstrDesc &CopyInst = TII.get(isPPC64 ? PPC::OR8 : PPC::OR); 1271 // Subroutines to generate .cfi_* directives. 1272 auto buildDefCFAReg = [&](MachineBasicBlock &MBB, 1273 MachineBasicBlock::iterator MBBI, Register Reg) { 1274 unsigned RegNum = MRI->getDwarfRegNum(Reg, true); 1275 unsigned CFIIndex = MF.addFrameInst( 1276 MCCFIInstruction::createDefCfaRegister(nullptr, RegNum)); 1277 BuildMI(MBB, MBBI, DL, TII.get(TargetOpcode::CFI_INSTRUCTION)) 1278 .addCFIIndex(CFIIndex); 1279 }; 1280 auto buildDefCFA = [&](MachineBasicBlock &MBB, 1281 MachineBasicBlock::iterator MBBI, Register Reg, 1282 int Offset) { 1283 unsigned RegNum = MRI->getDwarfRegNum(Reg, true); 1284 unsigned CFIIndex = MBB.getParent()->addFrameInst( 1285 MCCFIInstruction::cfiDefCfa(nullptr, RegNum, Offset)); 1286 BuildMI(MBB, MBBI, DL, TII.get(TargetOpcode::CFI_INSTRUCTION)) 1287 .addCFIIndex(CFIIndex); 1288 }; 1289 // Subroutine to determine if we can use the Imm as part of d-form. 1290 auto CanUseDForm = [](int64_t Imm) { return isInt<16>(Imm) && Imm % 4 == 0; }; 1291 // Subroutine to materialize the Imm into TempReg. 1292 auto MaterializeImm = [&](MachineBasicBlock &MBB, 1293 MachineBasicBlock::iterator MBBI, int64_t Imm, 1294 Register &TempReg) { 1295 assert(isInt<32>(Imm) && "Unhandled imm"); 1296 if (isInt<16>(Imm)) 1297 BuildMI(MBB, MBBI, DL, TII.get(isPPC64 ? PPC::LI8 : PPC::LI), TempReg) 1298 .addImm(Imm); 1299 else { 1300 BuildMI(MBB, MBBI, DL, TII.get(isPPC64 ? PPC::LIS8 : PPC::LIS), TempReg) 1301 .addImm(Imm >> 16); 1302 BuildMI(MBB, MBBI, DL, TII.get(isPPC64 ? PPC::ORI8 : PPC::ORI), TempReg) 1303 .addReg(TempReg) 1304 .addImm(Imm & 0xFFFF); 1305 } 1306 }; 1307 // Subroutine to store frame pointer and decrease stack pointer by probe size. 1308 auto allocateAndProbe = [&](MachineBasicBlock &MBB, 1309 MachineBasicBlock::iterator MBBI, int64_t NegSize, 1310 Register NegSizeReg, bool UseDForm, 1311 Register StoreReg) { 1312 if (UseDForm) 1313 BuildMI(MBB, MBBI, DL, TII.get(isPPC64 ? PPC::STDU : PPC::STWU), SPReg) 1314 .addReg(StoreReg) 1315 .addImm(NegSize) 1316 .addReg(SPReg); 1317 else 1318 BuildMI(MBB, MBBI, DL, TII.get(isPPC64 ? PPC::STDUX : PPC::STWUX), SPReg) 1319 .addReg(StoreReg) 1320 .addReg(SPReg) 1321 .addReg(NegSizeReg); 1322 }; 1323 // Used to probe stack when realignment is required. 1324 // Note that, according to ABI's requirement, *sp must always equals the 1325 // value of back-chain pointer, only st(w|d)u(x) can be used to update sp. 1326 // Following is pseudo code: 1327 // final_sp = (sp & align) + negframesize; 1328 // neg_gap = final_sp - sp; 1329 // while (neg_gap < negprobesize) { 1330 // stdu fp, negprobesize(sp); 1331 // neg_gap -= negprobesize; 1332 // } 1333 // stdux fp, sp, neg_gap 1334 // 1335 // When HasBP & HasRedzone, back-chain pointer is already saved in BPReg 1336 // before probe code, we don't need to save it, so we get one additional reg 1337 // that can be used to materialize the probeside if needed to use xform. 1338 // Otherwise, we can NOT materialize probeside, so we can only use Dform for 1339 // now. 1340 // 1341 // The allocations are: 1342 // if (HasBP && HasRedzone) { 1343 // r0: materialize the probesize if needed so that we can use xform. 1344 // r12: `neg_gap` 1345 // } else { 1346 // r0: back-chain pointer 1347 // r12: `neg_gap`. 1348 // } 1349 auto probeRealignedStack = [&](MachineBasicBlock &MBB, 1350 MachineBasicBlock::iterator MBBI, 1351 Register ScratchReg, Register TempReg) { 1352 assert(HasBP && "The function is supposed to have base pointer when its " 1353 "stack is realigned."); 1354 assert(isPowerOf2_64(ProbeSize) && "Probe size should be power of 2"); 1355 1356 // FIXME: We can eliminate this limitation if we get more infomation about 1357 // which part of redzone are already used. Used redzone can be treated 1358 // probed. But there might be `holes' in redzone probed, this could 1359 // complicate the implementation. 1360 assert(ProbeSize >= Subtarget.getRedZoneSize() && 1361 "Probe size should be larger or equal to the size of red-zone so " 1362 "that red-zone is not clobbered by probing."); 1363 1364 Register &FinalStackPtr = TempReg; 1365 // FIXME: We only support NegProbeSize materializable by DForm currently. 1366 // When HasBP && HasRedzone, we can use xform if we have an additional idle 1367 // register. 1368 NegProbeSize = std::max(NegProbeSize, -((int64_t)1 << 15)); 1369 assert(isInt<16>(NegProbeSize) && 1370 "NegProbeSize should be materializable by DForm"); 1371 Register CRReg = PPC::CR0; 1372 // Layout of output assembly kinda like: 1373 // bb.0: 1374 // ... 1375 // sub $scratchreg, $finalsp, r1 1376 // cmpdi $scratchreg, <negprobesize> 1377 // bge bb.2 1378 // bb.1: 1379 // stdu <backchain>, <negprobesize>(r1) 1380 // sub $scratchreg, $scratchreg, negprobesize 1381 // cmpdi $scratchreg, <negprobesize> 1382 // blt bb.1 1383 // bb.2: 1384 // stdux <backchain>, r1, $scratchreg 1385 MachineFunction::iterator MBBInsertPoint = std::next(MBB.getIterator()); 1386 MachineBasicBlock *ProbeLoopBodyMBB = MF.CreateMachineBasicBlock(ProbedBB); 1387 MF.insert(MBBInsertPoint, ProbeLoopBodyMBB); 1388 MachineBasicBlock *ProbeExitMBB = MF.CreateMachineBasicBlock(ProbedBB); 1389 MF.insert(MBBInsertPoint, ProbeExitMBB); 1390 // bb.2 1391 { 1392 Register BackChainPointer = HasRedZone ? BPReg : TempReg; 1393 allocateAndProbe(*ProbeExitMBB, ProbeExitMBB->end(), 0, ScratchReg, false, 1394 BackChainPointer); 1395 if (HasRedZone) 1396 // PROBED_STACKALLOC_64 assumes Operand(1) stores the old sp, copy BPReg 1397 // to TempReg to satisfy it. 1398 BuildMI(*ProbeExitMBB, ProbeExitMBB->end(), DL, CopyInst, TempReg) 1399 .addReg(BPReg) 1400 .addReg(BPReg); 1401 ProbeExitMBB->splice(ProbeExitMBB->end(), &MBB, MBBI, MBB.end()); 1402 ProbeExitMBB->transferSuccessorsAndUpdatePHIs(&MBB); 1403 } 1404 // bb.0 1405 { 1406 BuildMI(&MBB, DL, TII.get(isPPC64 ? PPC::SUBF8 : PPC::SUBF), ScratchReg) 1407 .addReg(SPReg) 1408 .addReg(FinalStackPtr); 1409 if (!HasRedZone) 1410 BuildMI(&MBB, DL, CopyInst, TempReg).addReg(SPReg).addReg(SPReg); 1411 BuildMI(&MBB, DL, TII.get(isPPC64 ? PPC::CMPDI : PPC::CMPWI), CRReg) 1412 .addReg(ScratchReg) 1413 .addImm(NegProbeSize); 1414 BuildMI(&MBB, DL, TII.get(PPC::BCC)) 1415 .addImm(PPC::PRED_GE) 1416 .addReg(CRReg) 1417 .addMBB(ProbeExitMBB); 1418 MBB.addSuccessor(ProbeLoopBodyMBB); 1419 MBB.addSuccessor(ProbeExitMBB); 1420 } 1421 // bb.1 1422 { 1423 Register BackChainPointer = HasRedZone ? BPReg : TempReg; 1424 allocateAndProbe(*ProbeLoopBodyMBB, ProbeLoopBodyMBB->end(), NegProbeSize, 1425 0, true /*UseDForm*/, BackChainPointer); 1426 BuildMI(ProbeLoopBodyMBB, DL, TII.get(isPPC64 ? PPC::ADDI8 : PPC::ADDI), 1427 ScratchReg) 1428 .addReg(ScratchReg) 1429 .addImm(-NegProbeSize); 1430 BuildMI(ProbeLoopBodyMBB, DL, TII.get(isPPC64 ? PPC::CMPDI : PPC::CMPWI), 1431 CRReg) 1432 .addReg(ScratchReg) 1433 .addImm(NegProbeSize); 1434 BuildMI(ProbeLoopBodyMBB, DL, TII.get(PPC::BCC)) 1435 .addImm(PPC::PRED_LT) 1436 .addReg(CRReg) 1437 .addMBB(ProbeLoopBodyMBB); 1438 ProbeLoopBodyMBB->addSuccessor(ProbeExitMBB); 1439 ProbeLoopBodyMBB->addSuccessor(ProbeLoopBodyMBB); 1440 } 1441 // Update liveins. 1442 fullyRecomputeLiveIns({ProbeExitMBB, ProbeLoopBodyMBB}); 1443 return ProbeExitMBB; 1444 }; 1445 // For case HasBP && MaxAlign > 1, we have to realign the SP by performing 1446 // SP = SP - SP % MaxAlign, thus make the probe more like dynamic probe since 1447 // the offset subtracted from SP is determined by SP's runtime value. 1448 if (HasBP && MaxAlign > 1) { 1449 // Calculate final stack pointer. 1450 if (isPPC64) 1451 BuildMI(*CurrentMBB, {MI}, DL, TII.get(PPC::RLDICL), ScratchReg) 1452 .addReg(SPReg) 1453 .addImm(0) 1454 .addImm(64 - Log2(MaxAlign)); 1455 else 1456 BuildMI(*CurrentMBB, {MI}, DL, TII.get(PPC::RLWINM), ScratchReg) 1457 .addReg(SPReg) 1458 .addImm(0) 1459 .addImm(32 - Log2(MaxAlign)) 1460 .addImm(31); 1461 BuildMI(*CurrentMBB, {MI}, DL, TII.get(isPPC64 ? PPC::SUBF8 : PPC::SUBF), 1462 FPReg) 1463 .addReg(ScratchReg) 1464 .addReg(SPReg); 1465 MaterializeImm(*CurrentMBB, {MI}, NegFrameSize, ScratchReg); 1466 BuildMI(*CurrentMBB, {MI}, DL, TII.get(isPPC64 ? PPC::ADD8 : PPC::ADD4), 1467 FPReg) 1468 .addReg(ScratchReg) 1469 .addReg(FPReg); 1470 CurrentMBB = probeRealignedStack(*CurrentMBB, {MI}, ScratchReg, FPReg); 1471 if (needsCFI) 1472 buildDefCFAReg(*CurrentMBB, {MI}, FPReg); 1473 } else { 1474 // Initialize current frame pointer. 1475 BuildMI(*CurrentMBB, {MI}, DL, CopyInst, FPReg).addReg(SPReg).addReg(SPReg); 1476 // Use FPReg to calculate CFA. 1477 if (needsCFI) 1478 buildDefCFA(*CurrentMBB, {MI}, FPReg, 0); 1479 // Probe residual part. 1480 if (NegResidualSize) { 1481 bool ResidualUseDForm = CanUseDForm(NegResidualSize); 1482 if (!ResidualUseDForm) 1483 MaterializeImm(*CurrentMBB, {MI}, NegResidualSize, ScratchReg); 1484 allocateAndProbe(*CurrentMBB, {MI}, NegResidualSize, ScratchReg, 1485 ResidualUseDForm, FPReg); 1486 } 1487 bool UseDForm = CanUseDForm(NegProbeSize); 1488 // If number of blocks is small, just probe them directly. 1489 if (NumBlocks < 3) { 1490 if (!UseDForm) 1491 MaterializeImm(*CurrentMBB, {MI}, NegProbeSize, ScratchReg); 1492 for (int i = 0; i < NumBlocks; ++i) 1493 allocateAndProbe(*CurrentMBB, {MI}, NegProbeSize, ScratchReg, UseDForm, 1494 FPReg); 1495 if (needsCFI) { 1496 // Restore using SPReg to calculate CFA. 1497 buildDefCFAReg(*CurrentMBB, {MI}, SPReg); 1498 } 1499 } else { 1500 // Since CTR is a volatile register and current shrinkwrap implementation 1501 // won't choose an MBB in a loop as the PrologMBB, it's safe to synthesize a 1502 // CTR loop to probe. 1503 // Calculate trip count and stores it in CTRReg. 1504 MaterializeImm(*CurrentMBB, {MI}, NumBlocks, ScratchReg); 1505 BuildMI(*CurrentMBB, {MI}, DL, TII.get(isPPC64 ? PPC::MTCTR8 : PPC::MTCTR)) 1506 .addReg(ScratchReg, RegState::Kill); 1507 if (!UseDForm) 1508 MaterializeImm(*CurrentMBB, {MI}, NegProbeSize, ScratchReg); 1509 // Create MBBs of the loop. 1510 MachineFunction::iterator MBBInsertPoint = 1511 std::next(CurrentMBB->getIterator()); 1512 MachineBasicBlock *LoopMBB = MF.CreateMachineBasicBlock(ProbedBB); 1513 MF.insert(MBBInsertPoint, LoopMBB); 1514 MachineBasicBlock *ExitMBB = MF.CreateMachineBasicBlock(ProbedBB); 1515 MF.insert(MBBInsertPoint, ExitMBB); 1516 // Synthesize the loop body. 1517 allocateAndProbe(*LoopMBB, LoopMBB->end(), NegProbeSize, ScratchReg, 1518 UseDForm, FPReg); 1519 BuildMI(LoopMBB, DL, TII.get(isPPC64 ? PPC::BDNZ8 : PPC::BDNZ)) 1520 .addMBB(LoopMBB); 1521 LoopMBB->addSuccessor(ExitMBB); 1522 LoopMBB->addSuccessor(LoopMBB); 1523 // Synthesize the exit MBB. 1524 ExitMBB->splice(ExitMBB->end(), CurrentMBB, 1525 std::next(MachineBasicBlock::iterator(MI)), 1526 CurrentMBB->end()); 1527 ExitMBB->transferSuccessorsAndUpdatePHIs(CurrentMBB); 1528 CurrentMBB->addSuccessor(LoopMBB); 1529 if (needsCFI) { 1530 // Restore using SPReg to calculate CFA. 1531 buildDefCFAReg(*ExitMBB, ExitMBB->begin(), SPReg); 1532 } 1533 // Update liveins. 1534 fullyRecomputeLiveIns({ExitMBB, LoopMBB}); 1535 } 1536 } 1537 ++NumPrologProbed; 1538 MI.eraseFromParent(); 1539 } 1540 1541 void PPCFrameLowering::emitEpilogue(MachineFunction &MF, 1542 MachineBasicBlock &MBB) const { 1543 MachineBasicBlock::iterator MBBI = MBB.getFirstTerminator(); 1544 DebugLoc dl; 1545 1546 if (MBBI != MBB.end()) 1547 dl = MBBI->getDebugLoc(); 1548 1549 const PPCInstrInfo &TII = *Subtarget.getInstrInfo(); 1550 const PPCRegisterInfo *RegInfo = Subtarget.getRegisterInfo(); 1551 1552 // Get alignment info so we know how to restore the SP. 1553 const MachineFrameInfo &MFI = MF.getFrameInfo(); 1554 1555 // Get the number of bytes allocated from the FrameInfo. 1556 int64_t FrameSize = MFI.getStackSize(); 1557 1558 // Get processor type. 1559 bool isPPC64 = Subtarget.isPPC64(); 1560 1561 // Check if the link register (LR) has been saved. 1562 PPCFunctionInfo *FI = MF.getInfo<PPCFunctionInfo>(); 1563 bool MustSaveLR = FI->mustSaveLR(); 1564 const SmallVectorImpl<Register> &MustSaveCRs = FI->getMustSaveCRs(); 1565 bool MustSaveCR = !MustSaveCRs.empty(); 1566 // Do we have a frame pointer and/or base pointer for this function? 1567 bool HasFP = hasFP(MF); 1568 bool HasBP = RegInfo->hasBasePointer(MF); 1569 bool HasRedZone = Subtarget.isPPC64() || !Subtarget.isSVR4ABI(); 1570 bool HasROPProtect = Subtarget.hasROPProtect(); 1571 bool HasPrivileged = Subtarget.hasPrivileged(); 1572 1573 Register SPReg = isPPC64 ? PPC::X1 : PPC::R1; 1574 Register BPReg = RegInfo->getBaseRegister(MF); 1575 Register FPReg = isPPC64 ? PPC::X31 : PPC::R31; 1576 Register ScratchReg; 1577 Register TempReg = isPPC64 ? PPC::X12 : PPC::R12; // another scratch reg 1578 const MCInstrDesc& MTLRInst = TII.get( isPPC64 ? PPC::MTLR8 1579 : PPC::MTLR ); 1580 const MCInstrDesc& LoadInst = TII.get( isPPC64 ? PPC::LD 1581 : PPC::LWZ ); 1582 const MCInstrDesc& LoadImmShiftedInst = TII.get( isPPC64 ? PPC::LIS8 1583 : PPC::LIS ); 1584 const MCInstrDesc& OrInst = TII.get(isPPC64 ? PPC::OR8 1585 : PPC::OR ); 1586 const MCInstrDesc& OrImmInst = TII.get( isPPC64 ? PPC::ORI8 1587 : PPC::ORI ); 1588 const MCInstrDesc& AddImmInst = TII.get( isPPC64 ? PPC::ADDI8 1589 : PPC::ADDI ); 1590 const MCInstrDesc& AddInst = TII.get( isPPC64 ? PPC::ADD8 1591 : PPC::ADD4 ); 1592 const MCInstrDesc& LoadWordInst = TII.get( isPPC64 ? PPC::LWZ8 1593 : PPC::LWZ); 1594 const MCInstrDesc& MoveToCRInst = TII.get( isPPC64 ? PPC::MTOCRF8 1595 : PPC::MTOCRF); 1596 const MCInstrDesc &HashChk = 1597 TII.get(isPPC64 ? (HasPrivileged ? PPC::HASHCHKP8 : PPC::HASHCHK8) 1598 : (HasPrivileged ? PPC::HASHCHKP : PPC::HASHCHK)); 1599 int64_t LROffset = getReturnSaveOffset(); 1600 1601 int64_t FPOffset = 0; 1602 1603 // Using the same bool variable as below to suppress compiler warnings. 1604 bool SingleScratchReg = findScratchRegister(&MBB, true, false, &ScratchReg, 1605 &TempReg); 1606 assert(SingleScratchReg && 1607 "Could not find an available scratch register"); 1608 1609 SingleScratchReg = ScratchReg == TempReg; 1610 1611 if (HasFP) { 1612 int FPIndex = FI->getFramePointerSaveIndex(); 1613 assert(FPIndex && "No Frame Pointer Save Slot!"); 1614 FPOffset = MFI.getObjectOffset(FPIndex); 1615 } 1616 1617 int64_t BPOffset = 0; 1618 if (HasBP) { 1619 int BPIndex = FI->getBasePointerSaveIndex(); 1620 assert(BPIndex && "No Base Pointer Save Slot!"); 1621 BPOffset = MFI.getObjectOffset(BPIndex); 1622 } 1623 1624 int64_t PBPOffset = 0; 1625 if (FI->usesPICBase()) { 1626 int PBPIndex = FI->getPICBasePointerSaveIndex(); 1627 assert(PBPIndex && "No PIC Base Pointer Save Slot!"); 1628 PBPOffset = MFI.getObjectOffset(PBPIndex); 1629 } 1630 1631 bool IsReturnBlock = (MBBI != MBB.end() && MBBI->isReturn()); 1632 1633 if (IsReturnBlock) { 1634 unsigned RetOpcode = MBBI->getOpcode(); 1635 bool UsesTCRet = RetOpcode == PPC::TCRETURNri || 1636 RetOpcode == PPC::TCRETURNdi || 1637 RetOpcode == PPC::TCRETURNai || 1638 RetOpcode == PPC::TCRETURNri8 || 1639 RetOpcode == PPC::TCRETURNdi8 || 1640 RetOpcode == PPC::TCRETURNai8; 1641 1642 if (UsesTCRet) { 1643 int MaxTCRetDelta = FI->getTailCallSPDelta(); 1644 MachineOperand &StackAdjust = MBBI->getOperand(1); 1645 assert(StackAdjust.isImm() && "Expecting immediate value."); 1646 // Adjust stack pointer. 1647 int StackAdj = StackAdjust.getImm(); 1648 int Delta = StackAdj - MaxTCRetDelta; 1649 assert((Delta >= 0) && "Delta must be positive"); 1650 if (MaxTCRetDelta>0) 1651 FrameSize += (StackAdj +Delta); 1652 else 1653 FrameSize += StackAdj; 1654 } 1655 } 1656 1657 // Frames of 32KB & larger require special handling because they cannot be 1658 // indexed into with a simple LD/LWZ immediate offset operand. 1659 bool isLargeFrame = !isInt<16>(FrameSize); 1660 1661 // On targets without red zone, the SP needs to be restored last, so that 1662 // all live contents of the stack frame are upwards of the SP. This means 1663 // that we cannot restore SP just now, since there may be more registers 1664 // to restore from the stack frame (e.g. R31). If the frame size is not 1665 // a simple immediate value, we will need a spare register to hold the 1666 // restored SP. If the frame size is known and small, we can simply adjust 1667 // the offsets of the registers to be restored, and still use SP to restore 1668 // them. In such case, the final update of SP will be to add the frame 1669 // size to it. 1670 // To simplify the code, set RBReg to the base register used to restore 1671 // values from the stack, and set SPAdd to the value that needs to be added 1672 // to the SP at the end. The default values are as if red zone was present. 1673 unsigned RBReg = SPReg; 1674 uint64_t SPAdd = 0; 1675 1676 // Check if we can move the stack update instruction up the epilogue 1677 // past the callee saves. This will allow the move to LR instruction 1678 // to be executed before the restores of the callee saves which means 1679 // that the callee saves can hide the latency from the MTLR instrcution. 1680 MachineBasicBlock::iterator StackUpdateLoc = MBBI; 1681 if (stackUpdateCanBeMoved(MF)) { 1682 const std::vector<CalleeSavedInfo> & Info = MFI.getCalleeSavedInfo(); 1683 for (CalleeSavedInfo CSI : Info) { 1684 // If the callee saved register is spilled to another register abort the 1685 // stack update movement. 1686 if (CSI.isSpilledToReg()) { 1687 StackUpdateLoc = MBBI; 1688 break; 1689 } 1690 int FrIdx = CSI.getFrameIdx(); 1691 // If the frame index is not negative the callee saved info belongs to a 1692 // stack object that is not a fixed stack object. We ignore non-fixed 1693 // stack objects because we won't move the update of the stack pointer 1694 // past them. 1695 if (FrIdx >= 0) 1696 continue; 1697 1698 if (MFI.isFixedObjectIndex(FrIdx) && MFI.getObjectOffset(FrIdx) < 0) 1699 StackUpdateLoc--; 1700 else { 1701 // Abort the operation as we can't update all CSR restores. 1702 StackUpdateLoc = MBBI; 1703 break; 1704 } 1705 } 1706 } 1707 1708 if (FrameSize) { 1709 // In the prologue, the loaded (or persistent) stack pointer value is 1710 // offset by the STDU/STDUX/STWU/STWUX instruction. For targets with red 1711 // zone add this offset back now. 1712 1713 // If the function has a base pointer, the stack pointer has been copied 1714 // to it so we can restore it by copying in the other direction. 1715 if (HasRedZone && HasBP) { 1716 BuildMI(MBB, MBBI, dl, OrInst, RBReg). 1717 addReg(BPReg). 1718 addReg(BPReg); 1719 } 1720 // If this function contained a fastcc call and GuaranteedTailCallOpt is 1721 // enabled (=> hasFastCall()==true) the fastcc call might contain a tail 1722 // call which invalidates the stack pointer value in SP(0). So we use the 1723 // value of R31 in this case. Similar situation exists with setjmp. 1724 else if (FI->hasFastCall() || MF.exposesReturnsTwice()) { 1725 assert(HasFP && "Expecting a valid frame pointer."); 1726 if (!HasRedZone) 1727 RBReg = FPReg; 1728 if (!isLargeFrame) { 1729 BuildMI(MBB, MBBI, dl, AddImmInst, RBReg) 1730 .addReg(FPReg).addImm(FrameSize); 1731 } else { 1732 TII.materializeImmPostRA(MBB, MBBI, dl, ScratchReg, FrameSize); 1733 BuildMI(MBB, MBBI, dl, AddInst) 1734 .addReg(RBReg) 1735 .addReg(FPReg) 1736 .addReg(ScratchReg); 1737 } 1738 } else if (!isLargeFrame && !HasBP && !MFI.hasVarSizedObjects()) { 1739 if (HasRedZone) { 1740 BuildMI(MBB, StackUpdateLoc, dl, AddImmInst, SPReg) 1741 .addReg(SPReg) 1742 .addImm(FrameSize); 1743 } else { 1744 // Make sure that adding FrameSize will not overflow the max offset 1745 // size. 1746 assert(FPOffset <= 0 && BPOffset <= 0 && PBPOffset <= 0 && 1747 "Local offsets should be negative"); 1748 SPAdd = FrameSize; 1749 FPOffset += FrameSize; 1750 BPOffset += FrameSize; 1751 PBPOffset += FrameSize; 1752 } 1753 } else { 1754 // We don't want to use ScratchReg as a base register, because it 1755 // could happen to be R0. Use FP instead, but make sure to preserve it. 1756 if (!HasRedZone) { 1757 // If FP is not saved, copy it to ScratchReg. 1758 if (!HasFP) 1759 BuildMI(MBB, MBBI, dl, OrInst, ScratchReg) 1760 .addReg(FPReg) 1761 .addReg(FPReg); 1762 RBReg = FPReg; 1763 } 1764 BuildMI(MBB, StackUpdateLoc, dl, LoadInst, RBReg) 1765 .addImm(0) 1766 .addReg(SPReg); 1767 } 1768 } 1769 assert(RBReg != ScratchReg && "Should have avoided ScratchReg"); 1770 // If there is no red zone, ScratchReg may be needed for holding a useful 1771 // value (although not the base register). Make sure it is not overwritten 1772 // too early. 1773 1774 // If we need to restore both the LR and the CR and we only have one 1775 // available scratch register, we must do them one at a time. 1776 if (MustSaveCR && SingleScratchReg && MustSaveLR) { 1777 // Here TempReg == ScratchReg, and in the absence of red zone ScratchReg 1778 // is live here. 1779 assert(HasRedZone && "Expecting red zone"); 1780 BuildMI(MBB, MBBI, dl, LoadWordInst, TempReg) 1781 .addImm(CRSaveOffset) 1782 .addReg(SPReg); 1783 for (unsigned i = 0, e = MustSaveCRs.size(); i != e; ++i) 1784 BuildMI(MBB, MBBI, dl, MoveToCRInst, MustSaveCRs[i]) 1785 .addReg(TempReg, getKillRegState(i == e-1)); 1786 } 1787 1788 // Delay restoring of the LR if ScratchReg is needed. This is ok, since 1789 // LR is stored in the caller's stack frame. ScratchReg will be needed 1790 // if RBReg is anything other than SP. We shouldn't use ScratchReg as 1791 // a base register anyway, because it may happen to be R0. 1792 bool LoadedLR = false; 1793 if (MustSaveLR && RBReg == SPReg && isInt<16>(LROffset+SPAdd)) { 1794 BuildMI(MBB, StackUpdateLoc, dl, LoadInst, ScratchReg) 1795 .addImm(LROffset+SPAdd) 1796 .addReg(RBReg); 1797 LoadedLR = true; 1798 } 1799 1800 if (MustSaveCR && !(SingleScratchReg && MustSaveLR)) { 1801 assert(RBReg == SPReg && "Should be using SP as a base register"); 1802 BuildMI(MBB, MBBI, dl, LoadWordInst, TempReg) 1803 .addImm(CRSaveOffset) 1804 .addReg(RBReg); 1805 } 1806 1807 if (HasFP) { 1808 // If there is red zone, restore FP directly, since SP has already been 1809 // restored. Otherwise, restore the value of FP into ScratchReg. 1810 if (HasRedZone || RBReg == SPReg) 1811 BuildMI(MBB, MBBI, dl, LoadInst, FPReg) 1812 .addImm(FPOffset) 1813 .addReg(SPReg); 1814 else 1815 BuildMI(MBB, MBBI, dl, LoadInst, ScratchReg) 1816 .addImm(FPOffset) 1817 .addReg(RBReg); 1818 } 1819 1820 if (FI->usesPICBase()) 1821 BuildMI(MBB, MBBI, dl, LoadInst, PPC::R30) 1822 .addImm(PBPOffset) 1823 .addReg(RBReg); 1824 1825 if (HasBP) 1826 BuildMI(MBB, MBBI, dl, LoadInst, BPReg) 1827 .addImm(BPOffset) 1828 .addReg(RBReg); 1829 1830 // There is nothing more to be loaded from the stack, so now we can 1831 // restore SP: SP = RBReg + SPAdd. 1832 if (RBReg != SPReg || SPAdd != 0) { 1833 assert(!HasRedZone && "This should not happen with red zone"); 1834 // If SPAdd is 0, generate a copy. 1835 if (SPAdd == 0) 1836 BuildMI(MBB, MBBI, dl, OrInst, SPReg) 1837 .addReg(RBReg) 1838 .addReg(RBReg); 1839 else 1840 BuildMI(MBB, MBBI, dl, AddImmInst, SPReg) 1841 .addReg(RBReg) 1842 .addImm(SPAdd); 1843 1844 assert(RBReg != ScratchReg && "Should be using FP or SP as base register"); 1845 if (RBReg == FPReg) 1846 BuildMI(MBB, MBBI, dl, OrInst, FPReg) 1847 .addReg(ScratchReg) 1848 .addReg(ScratchReg); 1849 1850 // Now load the LR from the caller's stack frame. 1851 if (MustSaveLR && !LoadedLR) 1852 BuildMI(MBB, MBBI, dl, LoadInst, ScratchReg) 1853 .addImm(LROffset) 1854 .addReg(SPReg); 1855 } 1856 1857 if (MustSaveCR && 1858 !(SingleScratchReg && MustSaveLR)) 1859 for (unsigned i = 0, e = MustSaveCRs.size(); i != e; ++i) 1860 BuildMI(MBB, MBBI, dl, MoveToCRInst, MustSaveCRs[i]) 1861 .addReg(TempReg, getKillRegState(i == e-1)); 1862 1863 if (MustSaveLR) { 1864 // If ROP protection is required, an extra instruction is added to compute a 1865 // hash and then compare it to the hash stored in the prologue. 1866 if (HasROPProtect) { 1867 const int SaveIndex = FI->getROPProtectionHashSaveIndex(); 1868 const int64_t ImmOffset = MFI.getObjectOffset(SaveIndex); 1869 assert((ImmOffset <= -8 && ImmOffset >= -512) && 1870 "ROP hash check location offset out of range."); 1871 assert(((ImmOffset & 0x7) == 0) && 1872 "ROP hash check location offset must be 8 byte aligned."); 1873 BuildMI(MBB, StackUpdateLoc, dl, HashChk) 1874 .addReg(ScratchReg) 1875 .addImm(ImmOffset) 1876 .addReg(SPReg); 1877 } 1878 BuildMI(MBB, StackUpdateLoc, dl, MTLRInst).addReg(ScratchReg); 1879 } 1880 1881 // Callee pop calling convention. Pop parameter/linkage area. Used for tail 1882 // call optimization 1883 if (IsReturnBlock) { 1884 unsigned RetOpcode = MBBI->getOpcode(); 1885 if (MF.getTarget().Options.GuaranteedTailCallOpt && 1886 (RetOpcode == PPC::BLR || RetOpcode == PPC::BLR8) && 1887 MF.getFunction().getCallingConv() == CallingConv::Fast) { 1888 PPCFunctionInfo *FI = MF.getInfo<PPCFunctionInfo>(); 1889 unsigned CallerAllocatedAmt = FI->getMinReservedArea(); 1890 1891 if (CallerAllocatedAmt && isInt<16>(CallerAllocatedAmt)) { 1892 BuildMI(MBB, MBBI, dl, AddImmInst, SPReg) 1893 .addReg(SPReg).addImm(CallerAllocatedAmt); 1894 } else { 1895 BuildMI(MBB, MBBI, dl, LoadImmShiftedInst, ScratchReg) 1896 .addImm(CallerAllocatedAmt >> 16); 1897 BuildMI(MBB, MBBI, dl, OrImmInst, ScratchReg) 1898 .addReg(ScratchReg, RegState::Kill) 1899 .addImm(CallerAllocatedAmt & 0xFFFF); 1900 BuildMI(MBB, MBBI, dl, AddInst) 1901 .addReg(SPReg) 1902 .addReg(FPReg) 1903 .addReg(ScratchReg); 1904 } 1905 } else { 1906 createTailCallBranchInstr(MBB); 1907 } 1908 } 1909 } 1910 1911 void PPCFrameLowering::createTailCallBranchInstr(MachineBasicBlock &MBB) const { 1912 MachineBasicBlock::iterator MBBI = MBB.getFirstTerminator(); 1913 1914 // If we got this far a first terminator should exist. 1915 assert(MBBI != MBB.end() && "Failed to find the first terminator."); 1916 1917 DebugLoc dl = MBBI->getDebugLoc(); 1918 const PPCInstrInfo &TII = *Subtarget.getInstrInfo(); 1919 1920 // Create branch instruction for pseudo tail call return instruction. 1921 // The TCRETURNdi variants are direct calls. Valid targets for those are 1922 // MO_GlobalAddress operands as well as MO_ExternalSymbol with PC-Rel 1923 // since we can tail call external functions with PC-Rel (i.e. we don't need 1924 // to worry about different TOC pointers). Some of the external functions will 1925 // be MO_GlobalAddress while others like memcpy for example, are going to 1926 // be MO_ExternalSymbol. 1927 unsigned RetOpcode = MBBI->getOpcode(); 1928 if (RetOpcode == PPC::TCRETURNdi) { 1929 MBBI = MBB.getLastNonDebugInstr(); 1930 MachineOperand &JumpTarget = MBBI->getOperand(0); 1931 if (JumpTarget.isGlobal()) 1932 BuildMI(MBB, MBBI, dl, TII.get(PPC::TAILB)). 1933 addGlobalAddress(JumpTarget.getGlobal(), JumpTarget.getOffset()); 1934 else if (JumpTarget.isSymbol()) 1935 BuildMI(MBB, MBBI, dl, TII.get(PPC::TAILB)). 1936 addExternalSymbol(JumpTarget.getSymbolName()); 1937 else 1938 llvm_unreachable("Expecting Global or External Symbol"); 1939 } else if (RetOpcode == PPC::TCRETURNri) { 1940 MBBI = MBB.getLastNonDebugInstr(); 1941 assert(MBBI->getOperand(0).isReg() && "Expecting register operand."); 1942 BuildMI(MBB, MBBI, dl, TII.get(PPC::TAILBCTR)); 1943 } else if (RetOpcode == PPC::TCRETURNai) { 1944 MBBI = MBB.getLastNonDebugInstr(); 1945 MachineOperand &JumpTarget = MBBI->getOperand(0); 1946 BuildMI(MBB, MBBI, dl, TII.get(PPC::TAILBA)).addImm(JumpTarget.getImm()); 1947 } else if (RetOpcode == PPC::TCRETURNdi8) { 1948 MBBI = MBB.getLastNonDebugInstr(); 1949 MachineOperand &JumpTarget = MBBI->getOperand(0); 1950 if (JumpTarget.isGlobal()) 1951 BuildMI(MBB, MBBI, dl, TII.get(PPC::TAILB8)). 1952 addGlobalAddress(JumpTarget.getGlobal(), JumpTarget.getOffset()); 1953 else if (JumpTarget.isSymbol()) 1954 BuildMI(MBB, MBBI, dl, TII.get(PPC::TAILB8)). 1955 addExternalSymbol(JumpTarget.getSymbolName()); 1956 else 1957 llvm_unreachable("Expecting Global or External Symbol"); 1958 } else if (RetOpcode == PPC::TCRETURNri8) { 1959 MBBI = MBB.getLastNonDebugInstr(); 1960 assert(MBBI->getOperand(0).isReg() && "Expecting register operand."); 1961 BuildMI(MBB, MBBI, dl, TII.get(PPC::TAILBCTR8)); 1962 } else if (RetOpcode == PPC::TCRETURNai8) { 1963 MBBI = MBB.getLastNonDebugInstr(); 1964 MachineOperand &JumpTarget = MBBI->getOperand(0); 1965 BuildMI(MBB, MBBI, dl, TII.get(PPC::TAILBA8)).addImm(JumpTarget.getImm()); 1966 } 1967 } 1968 1969 void PPCFrameLowering::determineCalleeSaves(MachineFunction &MF, 1970 BitVector &SavedRegs, 1971 RegScavenger *RS) const { 1972 TargetFrameLowering::determineCalleeSaves(MF, SavedRegs, RS); 1973 if (Subtarget.isAIXABI()) 1974 updateCalleeSaves(MF, SavedRegs); 1975 1976 const PPCRegisterInfo *RegInfo = Subtarget.getRegisterInfo(); 1977 1978 // Do not explicitly save the callee saved VSRp registers. 1979 // The individual VSR subregisters will be saved instead. 1980 SavedRegs.reset(PPC::VSRp26); 1981 SavedRegs.reset(PPC::VSRp27); 1982 SavedRegs.reset(PPC::VSRp28); 1983 SavedRegs.reset(PPC::VSRp29); 1984 SavedRegs.reset(PPC::VSRp30); 1985 SavedRegs.reset(PPC::VSRp31); 1986 1987 // Save and clear the LR state. 1988 PPCFunctionInfo *FI = MF.getInfo<PPCFunctionInfo>(); 1989 unsigned LR = RegInfo->getRARegister(); 1990 FI->setMustSaveLR(MustSaveLR(MF, LR)); 1991 SavedRegs.reset(LR); 1992 1993 // Save R31 if necessary 1994 int FPSI = FI->getFramePointerSaveIndex(); 1995 const bool isPPC64 = Subtarget.isPPC64(); 1996 MachineFrameInfo &MFI = MF.getFrameInfo(); 1997 1998 // If the frame pointer save index hasn't been defined yet. 1999 if (!FPSI && needsFP(MF)) { 2000 // Find out what the fix offset of the frame pointer save area. 2001 int FPOffset = getFramePointerSaveOffset(); 2002 // Allocate the frame index for frame pointer save area. 2003 FPSI = MFI.CreateFixedObject(isPPC64? 8 : 4, FPOffset, true); 2004 // Save the result. 2005 FI->setFramePointerSaveIndex(FPSI); 2006 } 2007 2008 int BPSI = FI->getBasePointerSaveIndex(); 2009 if (!BPSI && RegInfo->hasBasePointer(MF)) { 2010 int BPOffset = getBasePointerSaveOffset(); 2011 // Allocate the frame index for the base pointer save area. 2012 BPSI = MFI.CreateFixedObject(isPPC64? 8 : 4, BPOffset, true); 2013 // Save the result. 2014 FI->setBasePointerSaveIndex(BPSI); 2015 } 2016 2017 // Reserve stack space for the PIC Base register (R30). 2018 // Only used in SVR4 32-bit. 2019 if (FI->usesPICBase()) { 2020 int PBPSI = MFI.CreateFixedObject(4, -8, true); 2021 FI->setPICBasePointerSaveIndex(PBPSI); 2022 } 2023 2024 // Make sure we don't explicitly spill r31, because, for example, we have 2025 // some inline asm which explicitly clobbers it, when we otherwise have a 2026 // frame pointer and are using r31's spill slot for the prologue/epilogue 2027 // code. Same goes for the base pointer and the PIC base register. 2028 if (needsFP(MF)) 2029 SavedRegs.reset(isPPC64 ? PPC::X31 : PPC::R31); 2030 if (RegInfo->hasBasePointer(MF)) 2031 SavedRegs.reset(RegInfo->getBaseRegister(MF)); 2032 if (FI->usesPICBase()) 2033 SavedRegs.reset(PPC::R30); 2034 2035 // Reserve stack space to move the linkage area to in case of a tail call. 2036 int TCSPDelta = 0; 2037 if (MF.getTarget().Options.GuaranteedTailCallOpt && 2038 (TCSPDelta = FI->getTailCallSPDelta()) < 0) { 2039 MFI.CreateFixedObject(-1 * TCSPDelta, TCSPDelta, true); 2040 } 2041 2042 // Allocate the nonvolatile CR spill slot iff the function uses CR 2, 3, or 4. 2043 // For 64-bit SVR4, and all flavors of AIX we create a FixedStack 2044 // object at the offset of the CR-save slot in the linkage area. The actual 2045 // save and restore of the condition register will be created as part of the 2046 // prologue and epilogue insertion, but the FixedStack object is needed to 2047 // keep the CalleSavedInfo valid. 2048 if ((SavedRegs.test(PPC::CR2) || SavedRegs.test(PPC::CR3) || 2049 SavedRegs.test(PPC::CR4))) { 2050 const uint64_t SpillSize = 4; // Condition register is always 4 bytes. 2051 const int64_t SpillOffset = 2052 Subtarget.isPPC64() ? 8 : Subtarget.isAIXABI() ? 4 : -4; 2053 int FrameIdx = 2054 MFI.CreateFixedObject(SpillSize, SpillOffset, 2055 /* IsImmutable */ true, /* IsAliased */ false); 2056 FI->setCRSpillFrameIndex(FrameIdx); 2057 } 2058 } 2059 2060 void PPCFrameLowering::processFunctionBeforeFrameFinalized(MachineFunction &MF, 2061 RegScavenger *RS) const { 2062 // Get callee saved register information. 2063 MachineFrameInfo &MFI = MF.getFrameInfo(); 2064 const std::vector<CalleeSavedInfo> &CSI = MFI.getCalleeSavedInfo(); 2065 2066 // If the function is shrink-wrapped, and if the function has a tail call, the 2067 // tail call might not be in the new RestoreBlock, so real branch instruction 2068 // won't be generated by emitEpilogue(), because shrink-wrap has chosen new 2069 // RestoreBlock. So we handle this case here. 2070 if (MFI.getSavePoint() && MFI.hasTailCall()) { 2071 MachineBasicBlock *RestoreBlock = MFI.getRestorePoint(); 2072 for (MachineBasicBlock &MBB : MF) { 2073 if (MBB.isReturnBlock() && (&MBB) != RestoreBlock) 2074 createTailCallBranchInstr(MBB); 2075 } 2076 } 2077 2078 // Early exit if no callee saved registers are modified! 2079 if (CSI.empty() && !needsFP(MF)) { 2080 addScavengingSpillSlot(MF, RS); 2081 return; 2082 } 2083 2084 unsigned MinGPR = PPC::R31; 2085 unsigned MinG8R = PPC::X31; 2086 unsigned MinFPR = PPC::F31; 2087 unsigned MinVR = Subtarget.hasSPE() ? PPC::S31 : PPC::V31; 2088 2089 bool HasGPSaveArea = false; 2090 bool HasG8SaveArea = false; 2091 bool HasFPSaveArea = false; 2092 bool HasVRSaveArea = false; 2093 2094 SmallVector<CalleeSavedInfo, 18> GPRegs; 2095 SmallVector<CalleeSavedInfo, 18> G8Regs; 2096 SmallVector<CalleeSavedInfo, 18> FPRegs; 2097 SmallVector<CalleeSavedInfo, 18> VRegs; 2098 2099 for (const CalleeSavedInfo &I : CSI) { 2100 Register Reg = I.getReg(); 2101 assert((!MF.getInfo<PPCFunctionInfo>()->mustSaveTOC() || 2102 (Reg != PPC::X2 && Reg != PPC::R2)) && 2103 "Not expecting to try to spill R2 in a function that must save TOC"); 2104 if (PPC::GPRCRegClass.contains(Reg)) { 2105 HasGPSaveArea = true; 2106 2107 GPRegs.push_back(I); 2108 2109 if (Reg < MinGPR) { 2110 MinGPR = Reg; 2111 } 2112 } else if (PPC::G8RCRegClass.contains(Reg)) { 2113 HasG8SaveArea = true; 2114 2115 G8Regs.push_back(I); 2116 2117 if (Reg < MinG8R) { 2118 MinG8R = Reg; 2119 } 2120 } else if (PPC::F8RCRegClass.contains(Reg)) { 2121 HasFPSaveArea = true; 2122 2123 FPRegs.push_back(I); 2124 2125 if (Reg < MinFPR) { 2126 MinFPR = Reg; 2127 } 2128 } else if (PPC::CRBITRCRegClass.contains(Reg) || 2129 PPC::CRRCRegClass.contains(Reg)) { 2130 ; // do nothing, as we already know whether CRs are spilled 2131 } else if (PPC::VRRCRegClass.contains(Reg) || 2132 PPC::SPERCRegClass.contains(Reg)) { 2133 // Altivec and SPE are mutually exclusive, but have the same stack 2134 // alignment requirements, so overload the save area for both cases. 2135 HasVRSaveArea = true; 2136 2137 VRegs.push_back(I); 2138 2139 if (Reg < MinVR) { 2140 MinVR = Reg; 2141 } 2142 } else { 2143 llvm_unreachable("Unknown RegisterClass!"); 2144 } 2145 } 2146 2147 PPCFunctionInfo *PFI = MF.getInfo<PPCFunctionInfo>(); 2148 const TargetRegisterInfo *TRI = Subtarget.getRegisterInfo(); 2149 2150 int64_t LowerBound = 0; 2151 2152 // Take into account stack space reserved for tail calls. 2153 int TCSPDelta = 0; 2154 if (MF.getTarget().Options.GuaranteedTailCallOpt && 2155 (TCSPDelta = PFI->getTailCallSPDelta()) < 0) { 2156 LowerBound = TCSPDelta; 2157 } 2158 2159 // The Floating-point register save area is right below the back chain word 2160 // of the previous stack frame. 2161 if (HasFPSaveArea) { 2162 for (unsigned i = 0, e = FPRegs.size(); i != e; ++i) { 2163 int FI = FPRegs[i].getFrameIdx(); 2164 2165 MFI.setObjectOffset(FI, LowerBound + MFI.getObjectOffset(FI)); 2166 } 2167 2168 LowerBound -= (31 - TRI->getEncodingValue(MinFPR) + 1) * 8; 2169 } 2170 2171 // Check whether the frame pointer register is allocated. If so, make sure it 2172 // is spilled to the correct offset. 2173 if (needsFP(MF)) { 2174 int FI = PFI->getFramePointerSaveIndex(); 2175 assert(FI && "No Frame Pointer Save Slot!"); 2176 MFI.setObjectOffset(FI, LowerBound + MFI.getObjectOffset(FI)); 2177 // FP is R31/X31, so no need to update MinGPR/MinG8R. 2178 HasGPSaveArea = true; 2179 } 2180 2181 if (PFI->usesPICBase()) { 2182 int FI = PFI->getPICBasePointerSaveIndex(); 2183 assert(FI && "No PIC Base Pointer Save Slot!"); 2184 MFI.setObjectOffset(FI, LowerBound + MFI.getObjectOffset(FI)); 2185 2186 MinGPR = std::min<unsigned>(MinGPR, PPC::R30); 2187 HasGPSaveArea = true; 2188 } 2189 2190 const PPCRegisterInfo *RegInfo = Subtarget.getRegisterInfo(); 2191 if (RegInfo->hasBasePointer(MF)) { 2192 int FI = PFI->getBasePointerSaveIndex(); 2193 assert(FI && "No Base Pointer Save Slot!"); 2194 MFI.setObjectOffset(FI, LowerBound + MFI.getObjectOffset(FI)); 2195 2196 Register BP = RegInfo->getBaseRegister(MF); 2197 if (PPC::G8RCRegClass.contains(BP)) { 2198 MinG8R = std::min<unsigned>(MinG8R, BP); 2199 HasG8SaveArea = true; 2200 } else if (PPC::GPRCRegClass.contains(BP)) { 2201 MinGPR = std::min<unsigned>(MinGPR, BP); 2202 HasGPSaveArea = true; 2203 } 2204 } 2205 2206 // General register save area starts right below the Floating-point 2207 // register save area. 2208 if (HasGPSaveArea || HasG8SaveArea) { 2209 // Move general register save area spill slots down, taking into account 2210 // the size of the Floating-point register save area. 2211 for (unsigned i = 0, e = GPRegs.size(); i != e; ++i) { 2212 if (!GPRegs[i].isSpilledToReg()) { 2213 int FI = GPRegs[i].getFrameIdx(); 2214 MFI.setObjectOffset(FI, LowerBound + MFI.getObjectOffset(FI)); 2215 } 2216 } 2217 2218 // Move general register save area spill slots down, taking into account 2219 // the size of the Floating-point register save area. 2220 for (unsigned i = 0, e = G8Regs.size(); i != e; ++i) { 2221 if (!G8Regs[i].isSpilledToReg()) { 2222 int FI = G8Regs[i].getFrameIdx(); 2223 MFI.setObjectOffset(FI, LowerBound + MFI.getObjectOffset(FI)); 2224 } 2225 } 2226 2227 unsigned MinReg = 2228 std::min<unsigned>(TRI->getEncodingValue(MinGPR), 2229 TRI->getEncodingValue(MinG8R)); 2230 2231 const unsigned GPRegSize = Subtarget.isPPC64() ? 8 : 4; 2232 LowerBound -= (31 - MinReg + 1) * GPRegSize; 2233 } 2234 2235 // For 32-bit only, the CR save area is below the general register 2236 // save area. For 64-bit SVR4, the CR save area is addressed relative 2237 // to the stack pointer and hence does not need an adjustment here. 2238 // Only CR2 (the first nonvolatile spilled) has an associated frame 2239 // index so that we have a single uniform save area. 2240 if (spillsCR(MF) && Subtarget.is32BitELFABI()) { 2241 // Adjust the frame index of the CR spill slot. 2242 for (const auto &CSInfo : CSI) { 2243 if (CSInfo.getReg() == PPC::CR2) { 2244 int FI = CSInfo.getFrameIdx(); 2245 MFI.setObjectOffset(FI, LowerBound + MFI.getObjectOffset(FI)); 2246 break; 2247 } 2248 } 2249 2250 LowerBound -= 4; // The CR save area is always 4 bytes long. 2251 } 2252 2253 // Both Altivec and SPE have the same alignment and padding requirements 2254 // within the stack frame. 2255 if (HasVRSaveArea) { 2256 // Insert alignment padding, we need 16-byte alignment. Note: for positive 2257 // number the alignment formula is : y = (x + (n-1)) & (~(n-1)). But since 2258 // we are using negative number here (the stack grows downward). We should 2259 // use formula : y = x & (~(n-1)). Where x is the size before aligning, n 2260 // is the alignment size ( n = 16 here) and y is the size after aligning. 2261 assert(LowerBound <= 0 && "Expect LowerBound have a non-positive value!"); 2262 LowerBound &= ~(15); 2263 2264 for (unsigned i = 0, e = VRegs.size(); i != e; ++i) { 2265 int FI = VRegs[i].getFrameIdx(); 2266 2267 MFI.setObjectOffset(FI, LowerBound + MFI.getObjectOffset(FI)); 2268 } 2269 } 2270 2271 addScavengingSpillSlot(MF, RS); 2272 } 2273 2274 void 2275 PPCFrameLowering::addScavengingSpillSlot(MachineFunction &MF, 2276 RegScavenger *RS) const { 2277 // Reserve a slot closest to SP or frame pointer if we have a dynalloc or 2278 // a large stack, which will require scavenging a register to materialize a 2279 // large offset. 2280 2281 // We need to have a scavenger spill slot for spills if the frame size is 2282 // large. In case there is no free register for large-offset addressing, 2283 // this slot is used for the necessary emergency spill. Also, we need the 2284 // slot for dynamic stack allocations. 2285 2286 // The scavenger might be invoked if the frame offset does not fit into 2287 // the 16-bit immediate in case of not SPE and 8-bit in case of SPE. 2288 // We don't know the complete frame size here because we've not yet computed 2289 // callee-saved register spills or the needed alignment padding. 2290 unsigned StackSize = determineFrameLayout(MF, true); 2291 MachineFrameInfo &MFI = MF.getFrameInfo(); 2292 bool NeedSpills = Subtarget.hasSPE() ? !isInt<8>(StackSize) : !isInt<16>(StackSize); 2293 2294 if (MFI.hasVarSizedObjects() || spillsCR(MF) || hasNonRISpills(MF) || 2295 (hasSpills(MF) && NeedSpills)) { 2296 const TargetRegisterClass &GPRC = PPC::GPRCRegClass; 2297 const TargetRegisterClass &G8RC = PPC::G8RCRegClass; 2298 const TargetRegisterClass &RC = Subtarget.isPPC64() ? G8RC : GPRC; 2299 const TargetRegisterInfo &TRI = *Subtarget.getRegisterInfo(); 2300 unsigned Size = TRI.getSpillSize(RC); 2301 Align Alignment = TRI.getSpillAlign(RC); 2302 RS->addScavengingFrameIndex(MFI.CreateStackObject(Size, Alignment, false)); 2303 2304 // Might we have over-aligned allocas? 2305 bool HasAlVars = 2306 MFI.hasVarSizedObjects() && MFI.getMaxAlign() > getStackAlign(); 2307 2308 // These kinds of spills might need two registers. 2309 if (spillsCR(MF) || HasAlVars) 2310 RS->addScavengingFrameIndex( 2311 MFI.CreateStackObject(Size, Alignment, false)); 2312 } 2313 } 2314 2315 // This function checks if a callee saved gpr can be spilled to a volatile 2316 // vector register. This occurs for leaf functions when the option 2317 // ppc-enable-pe-vector-spills is enabled. If there are any remaining registers 2318 // which were not spilled to vectors, return false so the target independent 2319 // code can handle them by assigning a FrameIdx to a stack slot. 2320 bool PPCFrameLowering::assignCalleeSavedSpillSlots( 2321 MachineFunction &MF, const TargetRegisterInfo *TRI, 2322 std::vector<CalleeSavedInfo> &CSI) const { 2323 2324 if (CSI.empty()) 2325 return true; // Early exit if no callee saved registers are modified! 2326 2327 const PPCRegisterInfo *RegInfo = Subtarget.getRegisterInfo(); 2328 const MCPhysReg *CSRegs = RegInfo->getCalleeSavedRegs(&MF); 2329 const MachineRegisterInfo &MRI = MF.getRegInfo(); 2330 2331 if (Subtarget.hasSPE()) { 2332 // In case of SPE we only have SuperRegs and CRs 2333 // in our CalleSaveInfo vector. 2334 2335 for (auto &CalleeSaveReg : CSI) { 2336 MCPhysReg Reg = CalleeSaveReg.getReg(); 2337 MCPhysReg Lower = RegInfo->getSubReg(Reg, 1); 2338 MCPhysReg Higher = RegInfo->getSubReg(Reg, 2); 2339 2340 if ( // Check only for SuperRegs. 2341 Lower && 2342 // Replace Reg if only lower-32 bits modified 2343 !MRI.isPhysRegModified(Higher)) 2344 CalleeSaveReg = CalleeSavedInfo(Lower); 2345 } 2346 } 2347 2348 // Early exit if cannot spill gprs to volatile vector registers. 2349 MachineFrameInfo &MFI = MF.getFrameInfo(); 2350 if (!EnablePEVectorSpills || MFI.hasCalls() || !Subtarget.hasP9Vector()) 2351 return false; 2352 2353 // Build a BitVector of VSRs that can be used for spilling GPRs. 2354 BitVector BVAllocatable = TRI->getAllocatableSet(MF); 2355 BitVector BVCalleeSaved(TRI->getNumRegs()); 2356 for (unsigned i = 0; CSRegs[i]; ++i) 2357 BVCalleeSaved.set(CSRegs[i]); 2358 2359 for (unsigned Reg : BVAllocatable.set_bits()) { 2360 // Set to 0 if the register is not a volatile VSX register, or if it is 2361 // used in the function. 2362 if (BVCalleeSaved[Reg] || !PPC::VSRCRegClass.contains(Reg) || 2363 MRI.isPhysRegUsed(Reg)) 2364 BVAllocatable.reset(Reg); 2365 } 2366 2367 bool AllSpilledToReg = true; 2368 unsigned LastVSRUsedForSpill = 0; 2369 for (auto &CS : CSI) { 2370 if (BVAllocatable.none()) 2371 return false; 2372 2373 Register Reg = CS.getReg(); 2374 2375 if (!PPC::G8RCRegClass.contains(Reg)) { 2376 AllSpilledToReg = false; 2377 continue; 2378 } 2379 2380 // For P9, we can reuse LastVSRUsedForSpill to spill two GPRs 2381 // into one VSR using the mtvsrdd instruction. 2382 if (LastVSRUsedForSpill != 0) { 2383 CS.setDstReg(LastVSRUsedForSpill); 2384 BVAllocatable.reset(LastVSRUsedForSpill); 2385 LastVSRUsedForSpill = 0; 2386 continue; 2387 } 2388 2389 unsigned VolatileVFReg = BVAllocatable.find_first(); 2390 if (VolatileVFReg < BVAllocatable.size()) { 2391 CS.setDstReg(VolatileVFReg); 2392 LastVSRUsedForSpill = VolatileVFReg; 2393 } else { 2394 AllSpilledToReg = false; 2395 } 2396 } 2397 return AllSpilledToReg; 2398 } 2399 2400 bool PPCFrameLowering::spillCalleeSavedRegisters( 2401 MachineBasicBlock &MBB, MachineBasicBlock::iterator MI, 2402 ArrayRef<CalleeSavedInfo> CSI, const TargetRegisterInfo *TRI) const { 2403 2404 MachineFunction *MF = MBB.getParent(); 2405 const PPCInstrInfo &TII = *Subtarget.getInstrInfo(); 2406 PPCFunctionInfo *FI = MF->getInfo<PPCFunctionInfo>(); 2407 bool MustSaveTOC = FI->mustSaveTOC(); 2408 DebugLoc DL; 2409 bool CRSpilled = false; 2410 MachineInstrBuilder CRMIB; 2411 BitVector Spilled(TRI->getNumRegs()); 2412 2413 VSRContainingGPRs.clear(); 2414 2415 // Map each VSR to GPRs to be spilled with into it. Single VSR can contain one 2416 // or two GPRs, so we need table to record information for later save/restore. 2417 for (const CalleeSavedInfo &Info : CSI) { 2418 if (Info.isSpilledToReg()) { 2419 auto &SpilledVSR = 2420 VSRContainingGPRs.FindAndConstruct(Info.getDstReg()).second; 2421 assert(SpilledVSR.second == 0 && 2422 "Can't spill more than two GPRs into VSR!"); 2423 if (SpilledVSR.first == 0) 2424 SpilledVSR.first = Info.getReg(); 2425 else 2426 SpilledVSR.second = Info.getReg(); 2427 } 2428 } 2429 2430 for (const CalleeSavedInfo &I : CSI) { 2431 Register Reg = I.getReg(); 2432 2433 // CR2 through CR4 are the nonvolatile CR fields. 2434 bool IsCRField = PPC::CR2 <= Reg && Reg <= PPC::CR4; 2435 2436 // Add the callee-saved register as live-in; it's killed at the spill. 2437 // Do not do this for callee-saved registers that are live-in to the 2438 // function because they will already be marked live-in and this will be 2439 // adding it for a second time. It is an error to add the same register 2440 // to the set more than once. 2441 const MachineRegisterInfo &MRI = MF->getRegInfo(); 2442 bool IsLiveIn = MRI.isLiveIn(Reg); 2443 if (!IsLiveIn) 2444 MBB.addLiveIn(Reg); 2445 2446 if (CRSpilled && IsCRField) { 2447 CRMIB.addReg(Reg, RegState::ImplicitKill); 2448 continue; 2449 } 2450 2451 // The actual spill will happen in the prologue. 2452 if ((Reg == PPC::X2 || Reg == PPC::R2) && MustSaveTOC) 2453 continue; 2454 2455 // Insert the spill to the stack frame. 2456 if (IsCRField) { 2457 PPCFunctionInfo *FuncInfo = MF->getInfo<PPCFunctionInfo>(); 2458 if (!Subtarget.is32BitELFABI()) { 2459 // The actual spill will happen at the start of the prologue. 2460 FuncInfo->addMustSaveCR(Reg); 2461 } else { 2462 CRSpilled = true; 2463 FuncInfo->setSpillsCR(); 2464 2465 // 32-bit: FP-relative. Note that we made sure CR2-CR4 all have 2466 // the same frame index in PPCRegisterInfo::hasReservedSpillSlot. 2467 CRMIB = BuildMI(*MF, DL, TII.get(PPC::MFCR), PPC::R12) 2468 .addReg(Reg, RegState::ImplicitKill); 2469 2470 MBB.insert(MI, CRMIB); 2471 MBB.insert(MI, addFrameReference(BuildMI(*MF, DL, TII.get(PPC::STW)) 2472 .addReg(PPC::R12, 2473 getKillRegState(true)), 2474 I.getFrameIdx())); 2475 } 2476 } else { 2477 if (I.isSpilledToReg()) { 2478 unsigned Dst = I.getDstReg(); 2479 2480 if (Spilled[Dst]) 2481 continue; 2482 2483 if (VSRContainingGPRs[Dst].second != 0) { 2484 assert(Subtarget.hasP9Vector() && 2485 "mtvsrdd is unavailable on pre-P9 targets."); 2486 2487 NumPESpillVSR += 2; 2488 BuildMI(MBB, MI, DL, TII.get(PPC::MTVSRDD), Dst) 2489 .addReg(VSRContainingGPRs[Dst].first, getKillRegState(true)) 2490 .addReg(VSRContainingGPRs[Dst].second, getKillRegState(true)); 2491 } else if (VSRContainingGPRs[Dst].second == 0) { 2492 assert(Subtarget.hasP8Vector() && 2493 "Can't move GPR to VSR on pre-P8 targets."); 2494 2495 ++NumPESpillVSR; 2496 BuildMI(MBB, MI, DL, TII.get(PPC::MTVSRD), 2497 TRI->getSubReg(Dst, PPC::sub_64)) 2498 .addReg(VSRContainingGPRs[Dst].first, getKillRegState(true)); 2499 } else { 2500 llvm_unreachable("More than two GPRs spilled to a VSR!"); 2501 } 2502 Spilled.set(Dst); 2503 } else { 2504 const TargetRegisterClass *RC = TRI->getMinimalPhysRegClass(Reg); 2505 // Use !IsLiveIn for the kill flag. 2506 // We do not want to kill registers that are live in this function 2507 // before their use because they will become undefined registers. 2508 // Functions without NoUnwind need to preserve the order of elements in 2509 // saved vector registers. 2510 if (Subtarget.needsSwapsForVSXMemOps() && 2511 !MF->getFunction().hasFnAttribute(Attribute::NoUnwind)) 2512 TII.storeRegToStackSlotNoUpd(MBB, MI, Reg, !IsLiveIn, 2513 I.getFrameIdx(), RC, TRI); 2514 else 2515 TII.storeRegToStackSlot(MBB, MI, Reg, !IsLiveIn, I.getFrameIdx(), RC, 2516 TRI, Register()); 2517 } 2518 } 2519 } 2520 return true; 2521 } 2522 2523 static void restoreCRs(bool is31, bool CR2Spilled, bool CR3Spilled, 2524 bool CR4Spilled, MachineBasicBlock &MBB, 2525 MachineBasicBlock::iterator MI, 2526 ArrayRef<CalleeSavedInfo> CSI, unsigned CSIIndex) { 2527 2528 MachineFunction *MF = MBB.getParent(); 2529 const PPCInstrInfo &TII = *MF->getSubtarget<PPCSubtarget>().getInstrInfo(); 2530 DebugLoc DL; 2531 unsigned MoveReg = PPC::R12; 2532 2533 // 32-bit: FP-relative 2534 MBB.insert(MI, 2535 addFrameReference(BuildMI(*MF, DL, TII.get(PPC::LWZ), MoveReg), 2536 CSI[CSIIndex].getFrameIdx())); 2537 2538 unsigned RestoreOp = PPC::MTOCRF; 2539 if (CR2Spilled) 2540 MBB.insert(MI, BuildMI(*MF, DL, TII.get(RestoreOp), PPC::CR2) 2541 .addReg(MoveReg, getKillRegState(!CR3Spilled && !CR4Spilled))); 2542 2543 if (CR3Spilled) 2544 MBB.insert(MI, BuildMI(*MF, DL, TII.get(RestoreOp), PPC::CR3) 2545 .addReg(MoveReg, getKillRegState(!CR4Spilled))); 2546 2547 if (CR4Spilled) 2548 MBB.insert(MI, BuildMI(*MF, DL, TII.get(RestoreOp), PPC::CR4) 2549 .addReg(MoveReg, getKillRegState(true))); 2550 } 2551 2552 MachineBasicBlock::iterator PPCFrameLowering:: 2553 eliminateCallFramePseudoInstr(MachineFunction &MF, MachineBasicBlock &MBB, 2554 MachineBasicBlock::iterator I) const { 2555 const TargetInstrInfo &TII = *Subtarget.getInstrInfo(); 2556 if (MF.getTarget().Options.GuaranteedTailCallOpt && 2557 I->getOpcode() == PPC::ADJCALLSTACKUP) { 2558 // Add (actually subtract) back the amount the callee popped on return. 2559 if (int CalleeAmt = I->getOperand(1).getImm()) { 2560 bool is64Bit = Subtarget.isPPC64(); 2561 CalleeAmt *= -1; 2562 unsigned StackReg = is64Bit ? PPC::X1 : PPC::R1; 2563 unsigned TmpReg = is64Bit ? PPC::X0 : PPC::R0; 2564 unsigned ADDIInstr = is64Bit ? PPC::ADDI8 : PPC::ADDI; 2565 unsigned ADDInstr = is64Bit ? PPC::ADD8 : PPC::ADD4; 2566 unsigned LISInstr = is64Bit ? PPC::LIS8 : PPC::LIS; 2567 unsigned ORIInstr = is64Bit ? PPC::ORI8 : PPC::ORI; 2568 const DebugLoc &dl = I->getDebugLoc(); 2569 2570 if (isInt<16>(CalleeAmt)) { 2571 BuildMI(MBB, I, dl, TII.get(ADDIInstr), StackReg) 2572 .addReg(StackReg, RegState::Kill) 2573 .addImm(CalleeAmt); 2574 } else { 2575 MachineBasicBlock::iterator MBBI = I; 2576 BuildMI(MBB, MBBI, dl, TII.get(LISInstr), TmpReg) 2577 .addImm(CalleeAmt >> 16); 2578 BuildMI(MBB, MBBI, dl, TII.get(ORIInstr), TmpReg) 2579 .addReg(TmpReg, RegState::Kill) 2580 .addImm(CalleeAmt & 0xFFFF); 2581 BuildMI(MBB, MBBI, dl, TII.get(ADDInstr), StackReg) 2582 .addReg(StackReg, RegState::Kill) 2583 .addReg(TmpReg); 2584 } 2585 } 2586 } 2587 // Simply discard ADJCALLSTACKDOWN, ADJCALLSTACKUP instructions. 2588 return MBB.erase(I); 2589 } 2590 2591 static bool isCalleeSavedCR(unsigned Reg) { 2592 return PPC::CR2 == Reg || Reg == PPC::CR3 || Reg == PPC::CR4; 2593 } 2594 2595 bool PPCFrameLowering::restoreCalleeSavedRegisters( 2596 MachineBasicBlock &MBB, MachineBasicBlock::iterator MI, 2597 MutableArrayRef<CalleeSavedInfo> CSI, const TargetRegisterInfo *TRI) const { 2598 MachineFunction *MF = MBB.getParent(); 2599 const PPCInstrInfo &TII = *Subtarget.getInstrInfo(); 2600 PPCFunctionInfo *FI = MF->getInfo<PPCFunctionInfo>(); 2601 bool MustSaveTOC = FI->mustSaveTOC(); 2602 bool CR2Spilled = false; 2603 bool CR3Spilled = false; 2604 bool CR4Spilled = false; 2605 unsigned CSIIndex = 0; 2606 BitVector Restored(TRI->getNumRegs()); 2607 2608 // Initialize insertion-point logic; we will be restoring in reverse 2609 // order of spill. 2610 MachineBasicBlock::iterator I = MI, BeforeI = I; 2611 bool AtStart = I == MBB.begin(); 2612 2613 if (!AtStart) 2614 --BeforeI; 2615 2616 for (unsigned i = 0, e = CSI.size(); i != e; ++i) { 2617 Register Reg = CSI[i].getReg(); 2618 2619 if ((Reg == PPC::X2 || Reg == PPC::R2) && MustSaveTOC) 2620 continue; 2621 2622 // Restore of callee saved condition register field is handled during 2623 // epilogue insertion. 2624 if (isCalleeSavedCR(Reg) && !Subtarget.is32BitELFABI()) 2625 continue; 2626 2627 if (Reg == PPC::CR2) { 2628 CR2Spilled = true; 2629 // The spill slot is associated only with CR2, which is the 2630 // first nonvolatile spilled. Save it here. 2631 CSIIndex = i; 2632 continue; 2633 } else if (Reg == PPC::CR3) { 2634 CR3Spilled = true; 2635 continue; 2636 } else if (Reg == PPC::CR4) { 2637 CR4Spilled = true; 2638 continue; 2639 } else { 2640 // On 32-bit ELF when we first encounter a non-CR register after seeing at 2641 // least one CR register, restore all spilled CRs together. 2642 if (CR2Spilled || CR3Spilled || CR4Spilled) { 2643 bool is31 = needsFP(*MF); 2644 restoreCRs(is31, CR2Spilled, CR3Spilled, CR4Spilled, MBB, I, CSI, 2645 CSIIndex); 2646 CR2Spilled = CR3Spilled = CR4Spilled = false; 2647 } 2648 2649 if (CSI[i].isSpilledToReg()) { 2650 DebugLoc DL; 2651 unsigned Dst = CSI[i].getDstReg(); 2652 2653 if (Restored[Dst]) 2654 continue; 2655 2656 if (VSRContainingGPRs[Dst].second != 0) { 2657 assert(Subtarget.hasP9Vector()); 2658 NumPEReloadVSR += 2; 2659 BuildMI(MBB, I, DL, TII.get(PPC::MFVSRLD), 2660 VSRContainingGPRs[Dst].second) 2661 .addReg(Dst); 2662 BuildMI(MBB, I, DL, TII.get(PPC::MFVSRD), 2663 VSRContainingGPRs[Dst].first) 2664 .addReg(TRI->getSubReg(Dst, PPC::sub_64), getKillRegState(true)); 2665 } else if (VSRContainingGPRs[Dst].second == 0) { 2666 assert(Subtarget.hasP8Vector()); 2667 ++NumPEReloadVSR; 2668 BuildMI(MBB, I, DL, TII.get(PPC::MFVSRD), 2669 VSRContainingGPRs[Dst].first) 2670 .addReg(TRI->getSubReg(Dst, PPC::sub_64), getKillRegState(true)); 2671 } else { 2672 llvm_unreachable("More than two GPRs spilled to a VSR!"); 2673 } 2674 2675 Restored.set(Dst); 2676 2677 } else { 2678 // Default behavior for non-CR saves. 2679 const TargetRegisterClass *RC = TRI->getMinimalPhysRegClass(Reg); 2680 2681 // Functions without NoUnwind need to preserve the order of elements in 2682 // saved vector registers. 2683 if (Subtarget.needsSwapsForVSXMemOps() && 2684 !MF->getFunction().hasFnAttribute(Attribute::NoUnwind)) 2685 TII.loadRegFromStackSlotNoUpd(MBB, I, Reg, CSI[i].getFrameIdx(), RC, 2686 TRI); 2687 else 2688 TII.loadRegFromStackSlot(MBB, I, Reg, CSI[i].getFrameIdx(), RC, TRI, 2689 Register()); 2690 2691 assert(I != MBB.begin() && 2692 "loadRegFromStackSlot didn't insert any code!"); 2693 } 2694 } 2695 2696 // Insert in reverse order. 2697 if (AtStart) 2698 I = MBB.begin(); 2699 else { 2700 I = BeforeI; 2701 ++I; 2702 } 2703 } 2704 2705 // If we haven't yet spilled the CRs, do so now. 2706 if (CR2Spilled || CR3Spilled || CR4Spilled) { 2707 assert(Subtarget.is32BitELFABI() && 2708 "Only set CR[2|3|4]Spilled on 32-bit SVR4."); 2709 bool is31 = needsFP(*MF); 2710 restoreCRs(is31, CR2Spilled, CR3Spilled, CR4Spilled, MBB, I, CSI, CSIIndex); 2711 } 2712 2713 return true; 2714 } 2715 2716 uint64_t PPCFrameLowering::getTOCSaveOffset() const { 2717 return TOCSaveOffset; 2718 } 2719 2720 uint64_t PPCFrameLowering::getFramePointerSaveOffset() const { 2721 return FramePointerSaveOffset; 2722 } 2723 2724 uint64_t PPCFrameLowering::getBasePointerSaveOffset() const { 2725 return BasePointerSaveOffset; 2726 } 2727 2728 bool PPCFrameLowering::enableShrinkWrapping(const MachineFunction &MF) const { 2729 if (MF.getInfo<PPCFunctionInfo>()->shrinkWrapDisabled()) 2730 return false; 2731 return !MF.getSubtarget<PPCSubtarget>().is32BitELFABI(); 2732 } 2733 2734 void PPCFrameLowering::updateCalleeSaves(const MachineFunction &MF, 2735 BitVector &SavedRegs) const { 2736 // The AIX ABI uses traceback tables for EH which require that if callee-saved 2737 // register N is used, all registers N-31 must be saved/restored. 2738 // NOTE: The check for AIX is not actually what is relevant. Traceback tables 2739 // on Linux have the same requirements. It is just that AIX is the only ABI 2740 // for which we actually use traceback tables. If another ABI needs to be 2741 // supported that also uses them, we can add a check such as 2742 // Subtarget.usesTraceBackTables(). 2743 assert(Subtarget.isAIXABI() && 2744 "Function updateCalleeSaves should only be called for AIX."); 2745 2746 // If there are no callee saves then there is nothing to do. 2747 if (SavedRegs.none()) 2748 return; 2749 2750 const MCPhysReg *CSRegs = 2751 Subtarget.getRegisterInfo()->getCalleeSavedRegs(&MF); 2752 MCPhysReg LowestGPR = PPC::R31; 2753 MCPhysReg LowestG8R = PPC::X31; 2754 MCPhysReg LowestFPR = PPC::F31; 2755 MCPhysReg LowestVR = PPC::V31; 2756 2757 // Traverse the CSRs twice so as not to rely on ascending ordering of 2758 // registers in the array. The first pass finds the lowest numbered 2759 // register and the second pass marks all higher numbered registers 2760 // for spilling. 2761 for (int i = 0; CSRegs[i]; i++) { 2762 // Get the lowest numbered register for each class that actually needs 2763 // to be saved. 2764 MCPhysReg Cand = CSRegs[i]; 2765 if (!SavedRegs.test(Cand)) 2766 continue; 2767 if (PPC::GPRCRegClass.contains(Cand) && Cand < LowestGPR) 2768 LowestGPR = Cand; 2769 else if (PPC::G8RCRegClass.contains(Cand) && Cand < LowestG8R) 2770 LowestG8R = Cand; 2771 else if ((PPC::F4RCRegClass.contains(Cand) || 2772 PPC::F8RCRegClass.contains(Cand)) && 2773 Cand < LowestFPR) 2774 LowestFPR = Cand; 2775 else if (PPC::VRRCRegClass.contains(Cand) && Cand < LowestVR) 2776 LowestVR = Cand; 2777 } 2778 2779 for (int i = 0; CSRegs[i]; i++) { 2780 MCPhysReg Cand = CSRegs[i]; 2781 if ((PPC::GPRCRegClass.contains(Cand) && Cand > LowestGPR) || 2782 (PPC::G8RCRegClass.contains(Cand) && Cand > LowestG8R) || 2783 ((PPC::F4RCRegClass.contains(Cand) || 2784 PPC::F8RCRegClass.contains(Cand)) && 2785 Cand > LowestFPR) || 2786 (PPC::VRRCRegClass.contains(Cand) && Cand > LowestVR)) 2787 SavedRegs.set(Cand); 2788 } 2789 } 2790 2791 uint64_t PPCFrameLowering::getStackThreshold() const { 2792 // On PPC64, we use `stux r1, r1, <scratch_reg>` to extend the stack; 2793 // use `add r1, r1, <scratch_reg>` to release the stack frame. 2794 // Scratch register contains a signed 64-bit number, which is negative 2795 // when extending the stack and is positive when releasing the stack frame. 2796 // To make `stux` and `add` paired, the absolute value of the number contained 2797 // in the scratch register should be the same. Thus the maximum stack size 2798 // is (2^63)-1, i.e., LONG_MAX. 2799 if (Subtarget.isPPC64()) 2800 return LONG_MAX; 2801 2802 return TargetFrameLowering::getStackThreshold(); 2803 } 2804