xref: /llvm-project/llvm/lib/Target/PowerPC/PPCFrameLowering.cpp (revision 9bd22fec0d7bee6fa32479ba090b9c89656c0a3c)
1 //===-- PPCFrameLowering.cpp - PPC Frame Information ----------------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This file contains the PPC implementation of TargetFrameLowering class.
10 //
11 //===----------------------------------------------------------------------===//
12 
13 #include "PPCFrameLowering.h"
14 #include "PPCInstrBuilder.h"
15 #include "PPCInstrInfo.h"
16 #include "PPCMachineFunctionInfo.h"
17 #include "PPCSubtarget.h"
18 #include "PPCTargetMachine.h"
19 #include "llvm/ADT/Statistic.h"
20 #include "llvm/CodeGen/MachineFrameInfo.h"
21 #include "llvm/CodeGen/MachineFunction.h"
22 #include "llvm/CodeGen/MachineInstrBuilder.h"
23 #include "llvm/CodeGen/MachineModuleInfo.h"
24 #include "llvm/CodeGen/MachineRegisterInfo.h"
25 #include "llvm/CodeGen/RegisterScavenging.h"
26 #include "llvm/IR/Function.h"
27 #include "llvm/Target/TargetOptions.h"
28 
29 using namespace llvm;
30 
31 #define DEBUG_TYPE "framelowering"
32 STATISTIC(NumPESpillVSR, "Number of spills to vector in prologue");
33 STATISTIC(NumPEReloadVSR, "Number of reloads from vector in epilogue");
34 
35 static cl::opt<bool>
36 EnablePEVectorSpills("ppc-enable-pe-vector-spills",
37                      cl::desc("Enable spills in prologue to vector registers."),
38                      cl::init(false), cl::Hidden);
39 
40 /// VRRegNo - Map from a numbered VR register to its enum value.
41 ///
42 static const MCPhysReg VRRegNo[] = {
43  PPC::V0 , PPC::V1 , PPC::V2 , PPC::V3 , PPC::V4 , PPC::V5 , PPC::V6 , PPC::V7 ,
44  PPC::V8 , PPC::V9 , PPC::V10, PPC::V11, PPC::V12, PPC::V13, PPC::V14, PPC::V15,
45  PPC::V16, PPC::V17, PPC::V18, PPC::V19, PPC::V20, PPC::V21, PPC::V22, PPC::V23,
46  PPC::V24, PPC::V25, PPC::V26, PPC::V27, PPC::V28, PPC::V29, PPC::V30, PPC::V31
47 };
48 
49 static unsigned computeReturnSaveOffset(const PPCSubtarget &STI) {
50   if (STI.isDarwinABI())
51     return STI.isPPC64() ? 16 : 8;
52   // SVR4 ABI:
53   return STI.isPPC64() ? 16 : 4;
54 }
55 
56 static unsigned computeTOCSaveOffset(const PPCSubtarget &STI) {
57   return STI.isELFv2ABI() ? 24 : 40;
58 }
59 
60 static unsigned computeFramePointerSaveOffset(const PPCSubtarget &STI) {
61   // For the Darwin ABI:
62   // We cannot use the TOC save slot (offset +20) in the PowerPC linkage area
63   // for saving the frame pointer (if needed.)  While the published ABI has
64   // not used this slot since at least MacOSX 10.2, there is older code
65   // around that does use it, and that needs to continue to work.
66   if (STI.isDarwinABI())
67     return STI.isPPC64() ? -8U : -4U;
68 
69   // SVR4 ABI: First slot in the general register save area.
70   return STI.isPPC64() ? -8U : -4U;
71 }
72 
73 static unsigned computeLinkageSize(const PPCSubtarget &STI) {
74   if ((STI.isDarwinABI() || STI.isAIXABI()) || STI.isPPC64())
75     return (STI.isELFv2ABI() ? 4 : 6) * (STI.isPPC64() ? 8 : 4);
76 
77   // 32-bit SVR4 ABI:
78   return 8;
79 }
80 
81 static unsigned computeBasePointerSaveOffset(const PPCSubtarget &STI) {
82   if (STI.isDarwinABI())
83     return STI.isPPC64() ? -16U : -8U;
84 
85   // SVR4 ABI: First slot in the general register save area.
86   return STI.isPPC64()
87              ? -16U
88              : STI.getTargetMachine().isPositionIndependent() ? -12U : -8U;
89 }
90 
91 static unsigned computeCRSaveOffset() {
92   // The condition register save offset needs to be updated for AIX PPC32.
93   return 8;
94 }
95 
96 PPCFrameLowering::PPCFrameLowering(const PPCSubtarget &STI)
97     : TargetFrameLowering(TargetFrameLowering::StackGrowsDown,
98                           STI.getPlatformStackAlignment(), 0),
99       Subtarget(STI), ReturnSaveOffset(computeReturnSaveOffset(Subtarget)),
100       TOCSaveOffset(computeTOCSaveOffset(Subtarget)),
101       FramePointerSaveOffset(computeFramePointerSaveOffset(Subtarget)),
102       LinkageSize(computeLinkageSize(Subtarget)),
103       BasePointerSaveOffset(computeBasePointerSaveOffset(Subtarget)),
104       CRSaveOffset(computeCRSaveOffset()) {}
105 
106 // With the SVR4 ABI, callee-saved registers have fixed offsets on the stack.
107 const PPCFrameLowering::SpillSlot *PPCFrameLowering::getCalleeSavedSpillSlots(
108     unsigned &NumEntries) const {
109   if (Subtarget.isDarwinABI()) {
110     NumEntries = 1;
111     if (Subtarget.isPPC64()) {
112       static const SpillSlot darwin64Offsets = {PPC::X31, -8};
113       return &darwin64Offsets;
114     } else {
115       static const SpillSlot darwinOffsets = {PPC::R31, -4};
116       return &darwinOffsets;
117     }
118   }
119 
120   // Early exit if not using the SVR4 ABI.
121   if (!Subtarget.isSVR4ABI()) {
122     NumEntries = 0;
123     return nullptr;
124   }
125 
126   // Note that the offsets here overlap, but this is fixed up in
127   // processFunctionBeforeFrameFinalized.
128 
129   static const SpillSlot Offsets[] = {
130       // Floating-point register save area offsets.
131       {PPC::F31, -8},
132       {PPC::F30, -16},
133       {PPC::F29, -24},
134       {PPC::F28, -32},
135       {PPC::F27, -40},
136       {PPC::F26, -48},
137       {PPC::F25, -56},
138       {PPC::F24, -64},
139       {PPC::F23, -72},
140       {PPC::F22, -80},
141       {PPC::F21, -88},
142       {PPC::F20, -96},
143       {PPC::F19, -104},
144       {PPC::F18, -112},
145       {PPC::F17, -120},
146       {PPC::F16, -128},
147       {PPC::F15, -136},
148       {PPC::F14, -144},
149 
150       // General register save area offsets.
151       {PPC::R31, -4},
152       {PPC::R30, -8},
153       {PPC::R29, -12},
154       {PPC::R28, -16},
155       {PPC::R27, -20},
156       {PPC::R26, -24},
157       {PPC::R25, -28},
158       {PPC::R24, -32},
159       {PPC::R23, -36},
160       {PPC::R22, -40},
161       {PPC::R21, -44},
162       {PPC::R20, -48},
163       {PPC::R19, -52},
164       {PPC::R18, -56},
165       {PPC::R17, -60},
166       {PPC::R16, -64},
167       {PPC::R15, -68},
168       {PPC::R14, -72},
169 
170       // CR save area offset.  We map each of the nonvolatile CR fields
171       // to the slot for CR2, which is the first of the nonvolatile CR
172       // fields to be assigned, so that we only allocate one save slot.
173       // See PPCRegisterInfo::hasReservedSpillSlot() for more information.
174       {PPC::CR2, -4},
175 
176       // VRSAVE save area offset.
177       {PPC::VRSAVE, -4},
178 
179       // Vector register save area
180       {PPC::V31, -16},
181       {PPC::V30, -32},
182       {PPC::V29, -48},
183       {PPC::V28, -64},
184       {PPC::V27, -80},
185       {PPC::V26, -96},
186       {PPC::V25, -112},
187       {PPC::V24, -128},
188       {PPC::V23, -144},
189       {PPC::V22, -160},
190       {PPC::V21, -176},
191       {PPC::V20, -192},
192 
193       // SPE register save area (overlaps Vector save area).
194       {PPC::S31, -8},
195       {PPC::S30, -16},
196       {PPC::S29, -24},
197       {PPC::S28, -32},
198       {PPC::S27, -40},
199       {PPC::S26, -48},
200       {PPC::S25, -56},
201       {PPC::S24, -64},
202       {PPC::S23, -72},
203       {PPC::S22, -80},
204       {PPC::S21, -88},
205       {PPC::S20, -96},
206       {PPC::S19, -104},
207       {PPC::S18, -112},
208       {PPC::S17, -120},
209       {PPC::S16, -128},
210       {PPC::S15, -136},
211       {PPC::S14, -144}};
212 
213   static const SpillSlot Offsets64[] = {
214       // Floating-point register save area offsets.
215       {PPC::F31, -8},
216       {PPC::F30, -16},
217       {PPC::F29, -24},
218       {PPC::F28, -32},
219       {PPC::F27, -40},
220       {PPC::F26, -48},
221       {PPC::F25, -56},
222       {PPC::F24, -64},
223       {PPC::F23, -72},
224       {PPC::F22, -80},
225       {PPC::F21, -88},
226       {PPC::F20, -96},
227       {PPC::F19, -104},
228       {PPC::F18, -112},
229       {PPC::F17, -120},
230       {PPC::F16, -128},
231       {PPC::F15, -136},
232       {PPC::F14, -144},
233 
234       // General register save area offsets.
235       {PPC::X31, -8},
236       {PPC::X30, -16},
237       {PPC::X29, -24},
238       {PPC::X28, -32},
239       {PPC::X27, -40},
240       {PPC::X26, -48},
241       {PPC::X25, -56},
242       {PPC::X24, -64},
243       {PPC::X23, -72},
244       {PPC::X22, -80},
245       {PPC::X21, -88},
246       {PPC::X20, -96},
247       {PPC::X19, -104},
248       {PPC::X18, -112},
249       {PPC::X17, -120},
250       {PPC::X16, -128},
251       {PPC::X15, -136},
252       {PPC::X14, -144},
253 
254       // VRSAVE save area offset.
255       {PPC::VRSAVE, -4},
256 
257       // Vector register save area
258       {PPC::V31, -16},
259       {PPC::V30, -32},
260       {PPC::V29, -48},
261       {PPC::V28, -64},
262       {PPC::V27, -80},
263       {PPC::V26, -96},
264       {PPC::V25, -112},
265       {PPC::V24, -128},
266       {PPC::V23, -144},
267       {PPC::V22, -160},
268       {PPC::V21, -176},
269       {PPC::V20, -192}};
270 
271   if (Subtarget.isPPC64()) {
272     NumEntries = array_lengthof(Offsets64);
273 
274     return Offsets64;
275   } else {
276     NumEntries = array_lengthof(Offsets);
277 
278     return Offsets;
279   }
280 }
281 
282 /// RemoveVRSaveCode - We have found that this function does not need any code
283 /// to manipulate the VRSAVE register, even though it uses vector registers.
284 /// This can happen when the only registers used are known to be live in or out
285 /// of the function.  Remove all of the VRSAVE related code from the function.
286 /// FIXME: The removal of the code results in a compile failure at -O0 when the
287 /// function contains a function call, as the GPR containing original VRSAVE
288 /// contents is spilled and reloaded around the call.  Without the prolog code,
289 /// the spill instruction refers to an undefined register.  This code needs
290 /// to account for all uses of that GPR.
291 static void RemoveVRSaveCode(MachineInstr &MI) {
292   MachineBasicBlock *Entry = MI.getParent();
293   MachineFunction *MF = Entry->getParent();
294 
295   // We know that the MTVRSAVE instruction immediately follows MI.  Remove it.
296   MachineBasicBlock::iterator MBBI = MI;
297   ++MBBI;
298   assert(MBBI != Entry->end() && MBBI->getOpcode() == PPC::MTVRSAVE);
299   MBBI->eraseFromParent();
300 
301   bool RemovedAllMTVRSAVEs = true;
302   // See if we can find and remove the MTVRSAVE instruction from all of the
303   // epilog blocks.
304   for (MachineFunction::iterator I = MF->begin(), E = MF->end(); I != E; ++I) {
305     // If last instruction is a return instruction, add an epilogue
306     if (I->isReturnBlock()) {
307       bool FoundIt = false;
308       for (MBBI = I->end(); MBBI != I->begin(); ) {
309         --MBBI;
310         if (MBBI->getOpcode() == PPC::MTVRSAVE) {
311           MBBI->eraseFromParent();  // remove it.
312           FoundIt = true;
313           break;
314         }
315       }
316       RemovedAllMTVRSAVEs &= FoundIt;
317     }
318   }
319 
320   // If we found and removed all MTVRSAVE instructions, remove the read of
321   // VRSAVE as well.
322   if (RemovedAllMTVRSAVEs) {
323     MBBI = MI;
324     assert(MBBI != Entry->begin() && "UPDATE_VRSAVE is first instr in block?");
325     --MBBI;
326     assert(MBBI->getOpcode() == PPC::MFVRSAVE && "VRSAVE instrs wandered?");
327     MBBI->eraseFromParent();
328   }
329 
330   // Finally, nuke the UPDATE_VRSAVE.
331   MI.eraseFromParent();
332 }
333 
334 // HandleVRSaveUpdate - MI is the UPDATE_VRSAVE instruction introduced by the
335 // instruction selector.  Based on the vector registers that have been used,
336 // transform this into the appropriate ORI instruction.
337 static void HandleVRSaveUpdate(MachineInstr &MI, const TargetInstrInfo &TII) {
338   MachineFunction *MF = MI.getParent()->getParent();
339   const TargetRegisterInfo *TRI = MF->getSubtarget().getRegisterInfo();
340   DebugLoc dl = MI.getDebugLoc();
341 
342   const MachineRegisterInfo &MRI = MF->getRegInfo();
343   unsigned UsedRegMask = 0;
344   for (unsigned i = 0; i != 32; ++i)
345     if (MRI.isPhysRegModified(VRRegNo[i]))
346       UsedRegMask |= 1 << (31-i);
347 
348   // Live in and live out values already must be in the mask, so don't bother
349   // marking them.
350   for (std::pair<unsigned, unsigned> LI : MF->getRegInfo().liveins()) {
351     unsigned RegNo = TRI->getEncodingValue(LI.first);
352     if (VRRegNo[RegNo] == LI.first)        // If this really is a vector reg.
353       UsedRegMask &= ~(1 << (31-RegNo));   // Doesn't need to be marked.
354   }
355 
356   // Live out registers appear as use operands on return instructions.
357   for (MachineFunction::const_iterator BI = MF->begin(), BE = MF->end();
358        UsedRegMask != 0 && BI != BE; ++BI) {
359     const MachineBasicBlock &MBB = *BI;
360     if (!MBB.isReturnBlock())
361       continue;
362     const MachineInstr &Ret = MBB.back();
363     for (unsigned I = 0, E = Ret.getNumOperands(); I != E; ++I) {
364       const MachineOperand &MO = Ret.getOperand(I);
365       if (!MO.isReg() || !PPC::VRRCRegClass.contains(MO.getReg()))
366         continue;
367       unsigned RegNo = TRI->getEncodingValue(MO.getReg());
368       UsedRegMask &= ~(1 << (31-RegNo));
369     }
370   }
371 
372   // If no registers are used, turn this into a copy.
373   if (UsedRegMask == 0) {
374     // Remove all VRSAVE code.
375     RemoveVRSaveCode(MI);
376     return;
377   }
378 
379   unsigned SrcReg = MI.getOperand(1).getReg();
380   unsigned DstReg = MI.getOperand(0).getReg();
381 
382   if ((UsedRegMask & 0xFFFF) == UsedRegMask) {
383     if (DstReg != SrcReg)
384       BuildMI(*MI.getParent(), MI, dl, TII.get(PPC::ORI), DstReg)
385           .addReg(SrcReg)
386           .addImm(UsedRegMask);
387     else
388       BuildMI(*MI.getParent(), MI, dl, TII.get(PPC::ORI), DstReg)
389           .addReg(SrcReg, RegState::Kill)
390           .addImm(UsedRegMask);
391   } else if ((UsedRegMask & 0xFFFF0000) == UsedRegMask) {
392     if (DstReg != SrcReg)
393       BuildMI(*MI.getParent(), MI, dl, TII.get(PPC::ORIS), DstReg)
394           .addReg(SrcReg)
395           .addImm(UsedRegMask >> 16);
396     else
397       BuildMI(*MI.getParent(), MI, dl, TII.get(PPC::ORIS), DstReg)
398           .addReg(SrcReg, RegState::Kill)
399           .addImm(UsedRegMask >> 16);
400   } else {
401     if (DstReg != SrcReg)
402       BuildMI(*MI.getParent(), MI, dl, TII.get(PPC::ORIS), DstReg)
403           .addReg(SrcReg)
404           .addImm(UsedRegMask >> 16);
405     else
406       BuildMI(*MI.getParent(), MI, dl, TII.get(PPC::ORIS), DstReg)
407           .addReg(SrcReg, RegState::Kill)
408           .addImm(UsedRegMask >> 16);
409 
410     BuildMI(*MI.getParent(), MI, dl, TII.get(PPC::ORI), DstReg)
411         .addReg(DstReg, RegState::Kill)
412         .addImm(UsedRegMask & 0xFFFF);
413   }
414 
415   // Remove the old UPDATE_VRSAVE instruction.
416   MI.eraseFromParent();
417 }
418 
419 static bool spillsCR(const MachineFunction &MF) {
420   const PPCFunctionInfo *FuncInfo = MF.getInfo<PPCFunctionInfo>();
421   return FuncInfo->isCRSpilled();
422 }
423 
424 static bool spillsVRSAVE(const MachineFunction &MF) {
425   const PPCFunctionInfo *FuncInfo = MF.getInfo<PPCFunctionInfo>();
426   return FuncInfo->isVRSAVESpilled();
427 }
428 
429 static bool hasSpills(const MachineFunction &MF) {
430   const PPCFunctionInfo *FuncInfo = MF.getInfo<PPCFunctionInfo>();
431   return FuncInfo->hasSpills();
432 }
433 
434 static bool hasNonRISpills(const MachineFunction &MF) {
435   const PPCFunctionInfo *FuncInfo = MF.getInfo<PPCFunctionInfo>();
436   return FuncInfo->hasNonRISpills();
437 }
438 
439 /// MustSaveLR - Return true if this function requires that we save the LR
440 /// register onto the stack in the prolog and restore it in the epilog of the
441 /// function.
442 static bool MustSaveLR(const MachineFunction &MF, unsigned LR) {
443   const PPCFunctionInfo *MFI = MF.getInfo<PPCFunctionInfo>();
444 
445   // We need a save/restore of LR if there is any def of LR (which is
446   // defined by calls, including the PIC setup sequence), or if there is
447   // some use of the LR stack slot (e.g. for builtin_return_address).
448   // (LR comes in 32 and 64 bit versions.)
449   MachineRegisterInfo::def_iterator RI = MF.getRegInfo().def_begin(LR);
450   return RI !=MF.getRegInfo().def_end() || MFI->isLRStoreRequired();
451 }
452 
453 /// determineFrameLayoutAndUpdate - Determine the size of the frame and maximum
454 /// call frame size. Update the MachineFunction object with the stack size.
455 unsigned
456 PPCFrameLowering::determineFrameLayoutAndUpdate(MachineFunction &MF,
457                                                 bool UseEstimate) const {
458   unsigned NewMaxCallFrameSize = 0;
459   unsigned FrameSize = determineFrameLayout(MF, UseEstimate,
460                                             &NewMaxCallFrameSize);
461   MF.getFrameInfo().setStackSize(FrameSize);
462   MF.getFrameInfo().setMaxCallFrameSize(NewMaxCallFrameSize);
463   return FrameSize;
464 }
465 
466 /// determineFrameLayout - Determine the size of the frame and maximum call
467 /// frame size.
468 unsigned
469 PPCFrameLowering::determineFrameLayout(const MachineFunction &MF,
470                                        bool UseEstimate,
471                                        unsigned *NewMaxCallFrameSize) const {
472   const MachineFrameInfo &MFI = MF.getFrameInfo();
473   const PPCFunctionInfo *FI = MF.getInfo<PPCFunctionInfo>();
474 
475   // Get the number of bytes to allocate from the FrameInfo
476   unsigned FrameSize =
477     UseEstimate ? MFI.estimateStackSize(MF) : MFI.getStackSize();
478 
479   // Get stack alignments. The frame must be aligned to the greatest of these:
480   unsigned TargetAlign = getStackAlignment(); // alignment required per the ABI
481   unsigned MaxAlign = MFI.getMaxAlignment(); // algmt required by data in frame
482   unsigned AlignMask = std::max(MaxAlign, TargetAlign) - 1;
483 
484   const PPCRegisterInfo *RegInfo = Subtarget.getRegisterInfo();
485 
486   unsigned LR = RegInfo->getRARegister();
487   bool DisableRedZone = MF.getFunction().hasFnAttribute(Attribute::NoRedZone);
488   bool CanUseRedZone = !MFI.hasVarSizedObjects() && // No dynamic alloca.
489                        !MFI.adjustsStack() &&       // No calls.
490                        !MustSaveLR(MF, LR) &&       // No need to save LR.
491                        !FI->mustSaveTOC() &&        // No need to save TOC.
492                        !RegInfo->hasBasePointer(MF); // No special alignment.
493 
494   // Note: for PPC32 SVR4ABI (Non-DarwinABI), we can still generate stackless
495   // code if all local vars are reg-allocated.
496   bool FitsInRedZone = FrameSize <= Subtarget.getRedZoneSize();
497 
498   // Check whether we can skip adjusting the stack pointer (by using red zone)
499   if (!DisableRedZone && CanUseRedZone && FitsInRedZone) {
500     // No need for frame
501     return 0;
502   }
503 
504   // Get the maximum call frame size of all the calls.
505   unsigned maxCallFrameSize = MFI.getMaxCallFrameSize();
506 
507   // Maximum call frame needs to be at least big enough for linkage area.
508   unsigned minCallFrameSize = getLinkageSize();
509   maxCallFrameSize = std::max(maxCallFrameSize, minCallFrameSize);
510 
511   // If we have dynamic alloca then maxCallFrameSize needs to be aligned so
512   // that allocations will be aligned.
513   if (MFI.hasVarSizedObjects())
514     maxCallFrameSize = (maxCallFrameSize + AlignMask) & ~AlignMask;
515 
516   // Update the new max call frame size if the caller passes in a valid pointer.
517   if (NewMaxCallFrameSize)
518     *NewMaxCallFrameSize = maxCallFrameSize;
519 
520   // Include call frame size in total.
521   FrameSize += maxCallFrameSize;
522 
523   // Make sure the frame is aligned.
524   FrameSize = (FrameSize + AlignMask) & ~AlignMask;
525 
526   return FrameSize;
527 }
528 
529 // hasFP - Return true if the specified function actually has a dedicated frame
530 // pointer register.
531 bool PPCFrameLowering::hasFP(const MachineFunction &MF) const {
532   const MachineFrameInfo &MFI = MF.getFrameInfo();
533   // FIXME: This is pretty much broken by design: hasFP() might be called really
534   // early, before the stack layout was calculated and thus hasFP() might return
535   // true or false here depending on the time of call.
536   return (MFI.getStackSize()) && needsFP(MF);
537 }
538 
539 // needsFP - Return true if the specified function should have a dedicated frame
540 // pointer register.  This is true if the function has variable sized allocas or
541 // if frame pointer elimination is disabled.
542 bool PPCFrameLowering::needsFP(const MachineFunction &MF) const {
543   const MachineFrameInfo &MFI = MF.getFrameInfo();
544 
545   // Naked functions have no stack frame pushed, so we don't have a frame
546   // pointer.
547   if (MF.getFunction().hasFnAttribute(Attribute::Naked))
548     return false;
549 
550   return MF.getTarget().Options.DisableFramePointerElim(MF) ||
551     MFI.hasVarSizedObjects() || MFI.hasStackMap() || MFI.hasPatchPoint() ||
552     (MF.getTarget().Options.GuaranteedTailCallOpt &&
553      MF.getInfo<PPCFunctionInfo>()->hasFastCall());
554 }
555 
556 void PPCFrameLowering::replaceFPWithRealFP(MachineFunction &MF) const {
557   bool is31 = needsFP(MF);
558   unsigned FPReg  = is31 ? PPC::R31 : PPC::R1;
559   unsigned FP8Reg = is31 ? PPC::X31 : PPC::X1;
560 
561   const PPCRegisterInfo *RegInfo = Subtarget.getRegisterInfo();
562   bool HasBP = RegInfo->hasBasePointer(MF);
563   unsigned BPReg  = HasBP ? (unsigned) RegInfo->getBaseRegister(MF) : FPReg;
564   unsigned BP8Reg = HasBP ? (unsigned) PPC::X30 : FP8Reg;
565 
566   for (MachineFunction::iterator BI = MF.begin(), BE = MF.end();
567        BI != BE; ++BI)
568     for (MachineBasicBlock::iterator MBBI = BI->end(); MBBI != BI->begin(); ) {
569       --MBBI;
570       for (unsigned I = 0, E = MBBI->getNumOperands(); I != E; ++I) {
571         MachineOperand &MO = MBBI->getOperand(I);
572         if (!MO.isReg())
573           continue;
574 
575         switch (MO.getReg()) {
576         case PPC::FP:
577           MO.setReg(FPReg);
578           break;
579         case PPC::FP8:
580           MO.setReg(FP8Reg);
581           break;
582         case PPC::BP:
583           MO.setReg(BPReg);
584           break;
585         case PPC::BP8:
586           MO.setReg(BP8Reg);
587           break;
588 
589         }
590       }
591     }
592 }
593 
594 /*  This function will do the following:
595     - If MBB is an entry or exit block, set SR1 and SR2 to R0 and R12
596       respectively (defaults recommended by the ABI) and return true
597     - If MBB is not an entry block, initialize the register scavenger and look
598       for available registers.
599     - If the defaults (R0/R12) are available, return true
600     - If TwoUniqueRegsRequired is set to true, it looks for two unique
601       registers. Otherwise, look for a single available register.
602       - If the required registers are found, set SR1 and SR2 and return true.
603       - If the required registers are not found, set SR2 or both SR1 and SR2 to
604         PPC::NoRegister and return false.
605 
606     Note that if both SR1 and SR2 are valid parameters and TwoUniqueRegsRequired
607     is not set, this function will attempt to find two different registers, but
608     still return true if only one register is available (and set SR1 == SR2).
609 */
610 bool
611 PPCFrameLowering::findScratchRegister(MachineBasicBlock *MBB,
612                                       bool UseAtEnd,
613                                       bool TwoUniqueRegsRequired,
614                                       unsigned *SR1,
615                                       unsigned *SR2) const {
616   RegScavenger RS;
617   unsigned R0 =  Subtarget.isPPC64() ? PPC::X0 : PPC::R0;
618   unsigned R12 = Subtarget.isPPC64() ? PPC::X12 : PPC::R12;
619 
620   // Set the defaults for the two scratch registers.
621   if (SR1)
622     *SR1 = R0;
623 
624   if (SR2) {
625     assert (SR1 && "Asking for the second scratch register but not the first?");
626     *SR2 = R12;
627   }
628 
629   // If MBB is an entry or exit block, use R0 and R12 as the scratch registers.
630   if ((UseAtEnd && MBB->isReturnBlock()) ||
631       (!UseAtEnd && (&MBB->getParent()->front() == MBB)))
632     return true;
633 
634   RS.enterBasicBlock(*MBB);
635 
636   if (UseAtEnd && !MBB->empty()) {
637     // The scratch register will be used at the end of the block, so must
638     // consider all registers used within the block
639 
640     MachineBasicBlock::iterator MBBI = MBB->getFirstTerminator();
641     // If no terminator, back iterator up to previous instruction.
642     if (MBBI == MBB->end())
643       MBBI = std::prev(MBBI);
644 
645     if (MBBI != MBB->begin())
646       RS.forward(MBBI);
647   }
648 
649   // If the two registers are available, we're all good.
650   // Note that we only return here if both R0 and R12 are available because
651   // although the function may not require two unique registers, it may benefit
652   // from having two so we should try to provide them.
653   if (!RS.isRegUsed(R0) && !RS.isRegUsed(R12))
654     return true;
655 
656   // Get the list of callee-saved registers for the target.
657   const PPCRegisterInfo *RegInfo = Subtarget.getRegisterInfo();
658   const MCPhysReg *CSRegs = RegInfo->getCalleeSavedRegs(MBB->getParent());
659 
660   // Get all the available registers in the block.
661   BitVector BV = RS.getRegsAvailable(Subtarget.isPPC64() ? &PPC::G8RCRegClass :
662                                      &PPC::GPRCRegClass);
663 
664   // We shouldn't use callee-saved registers as scratch registers as they may be
665   // available when looking for a candidate block for shrink wrapping but not
666   // available when the actual prologue/epilogue is being emitted because they
667   // were added as live-in to the prologue block by PrologueEpilogueInserter.
668   for (int i = 0; CSRegs[i]; ++i)
669     BV.reset(CSRegs[i]);
670 
671   // Set the first scratch register to the first available one.
672   if (SR1) {
673     int FirstScratchReg = BV.find_first();
674     *SR1 = FirstScratchReg == -1 ? (unsigned)PPC::NoRegister : FirstScratchReg;
675   }
676 
677   // If there is another one available, set the second scratch register to that.
678   // Otherwise, set it to either PPC::NoRegister if this function requires two
679   // or to whatever SR1 is set to if this function doesn't require two.
680   if (SR2) {
681     int SecondScratchReg = BV.find_next(*SR1);
682     if (SecondScratchReg != -1)
683       *SR2 = SecondScratchReg;
684     else
685       *SR2 = TwoUniqueRegsRequired ? (unsigned)PPC::NoRegister : *SR1;
686   }
687 
688   // Now that we've done our best to provide both registers, double check
689   // whether we were unable to provide enough.
690   if (BV.count() < (TwoUniqueRegsRequired ? 2U : 1U))
691     return false;
692 
693   return true;
694 }
695 
696 // We need a scratch register for spilling LR and for spilling CR. By default,
697 // we use two scratch registers to hide latency. However, if only one scratch
698 // register is available, we can adjust for that by not overlapping the spill
699 // code. However, if we need to realign the stack (i.e. have a base pointer)
700 // and the stack frame is large, we need two scratch registers.
701 bool
702 PPCFrameLowering::twoUniqueScratchRegsRequired(MachineBasicBlock *MBB) const {
703   const PPCRegisterInfo *RegInfo = Subtarget.getRegisterInfo();
704   MachineFunction &MF = *(MBB->getParent());
705   bool HasBP = RegInfo->hasBasePointer(MF);
706   unsigned FrameSize = determineFrameLayout(MF);
707   int NegFrameSize = -FrameSize;
708   bool IsLargeFrame = !isInt<16>(NegFrameSize);
709   MachineFrameInfo &MFI = MF.getFrameInfo();
710   unsigned MaxAlign = MFI.getMaxAlignment();
711   bool HasRedZone = Subtarget.isPPC64() || !Subtarget.isSVR4ABI();
712 
713   return (IsLargeFrame || !HasRedZone) && HasBP && MaxAlign > 1;
714 }
715 
716 bool PPCFrameLowering::canUseAsPrologue(const MachineBasicBlock &MBB) const {
717   MachineBasicBlock *TmpMBB = const_cast<MachineBasicBlock *>(&MBB);
718 
719   return findScratchRegister(TmpMBB, false,
720                              twoUniqueScratchRegsRequired(TmpMBB));
721 }
722 
723 bool PPCFrameLowering::canUseAsEpilogue(const MachineBasicBlock &MBB) const {
724   MachineBasicBlock *TmpMBB = const_cast<MachineBasicBlock *>(&MBB);
725 
726   return findScratchRegister(TmpMBB, true);
727 }
728 
729 bool PPCFrameLowering::stackUpdateCanBeMoved(MachineFunction &MF) const {
730   const PPCRegisterInfo *RegInfo = Subtarget.getRegisterInfo();
731   PPCFunctionInfo *FI = MF.getInfo<PPCFunctionInfo>();
732 
733   // Abort if there is no register info or function info.
734   if (!RegInfo || !FI)
735     return false;
736 
737   // Only move the stack update on ELFv2 ABI and PPC64.
738   if (!Subtarget.isELFv2ABI() || !Subtarget.isPPC64())
739     return false;
740 
741   // Check the frame size first and return false if it does not fit the
742   // requirements.
743   // We need a non-zero frame size as well as a frame that will fit in the red
744   // zone. This is because by moving the stack pointer update we are now storing
745   // to the red zone until the stack pointer is updated. If we get an interrupt
746   // inside the prologue but before the stack update we now have a number of
747   // stores to the red zone and those stores must all fit.
748   MachineFrameInfo &MFI = MF.getFrameInfo();
749   unsigned FrameSize = MFI.getStackSize();
750   if (!FrameSize || FrameSize > Subtarget.getRedZoneSize())
751     return false;
752 
753   // Frame pointers and base pointers complicate matters so don't do anything
754   // if we have them. For example having a frame pointer will sometimes require
755   // a copy of r1 into r31 and that makes keeping track of updates to r1 more
756   // difficult.
757   if (hasFP(MF) || RegInfo->hasBasePointer(MF))
758     return false;
759 
760   // Calls to fast_cc functions use different rules for passing parameters on
761   // the stack from the ABI and using PIC base in the function imposes
762   // similar restrictions to using the base pointer. It is not generally safe
763   // to move the stack pointer update in these situations.
764   if (FI->hasFastCall() || FI->usesPICBase())
765     return false;
766 
767   // Finally we can move the stack update if we do not require register
768   // scavenging. Register scavenging can introduce more spills and so
769   // may make the frame size larger than we have computed.
770   return !RegInfo->requiresFrameIndexScavenging(MF);
771 }
772 
773 void PPCFrameLowering::emitPrologue(MachineFunction &MF,
774                                     MachineBasicBlock &MBB) const {
775   MachineBasicBlock::iterator MBBI = MBB.begin();
776   MachineFrameInfo &MFI = MF.getFrameInfo();
777   const PPCInstrInfo &TII = *Subtarget.getInstrInfo();
778   const PPCRegisterInfo *RegInfo = Subtarget.getRegisterInfo();
779 
780   MachineModuleInfo &MMI = MF.getMMI();
781   const MCRegisterInfo *MRI = MMI.getContext().getRegisterInfo();
782   DebugLoc dl;
783   bool needsCFI = MMI.hasDebugInfo() ||
784     MF.getFunction().needsUnwindTableEntry();
785 
786   // Get processor type.
787   bool isPPC64 = Subtarget.isPPC64();
788   // Get the ABI.
789   bool isSVR4ABI = Subtarget.isSVR4ABI();
790   bool isELFv2ABI = Subtarget.isELFv2ABI();
791   assert((Subtarget.isDarwinABI() || isSVR4ABI) &&
792          "Currently only Darwin and SVR4 ABIs are supported for PowerPC.");
793 
794   // Scan the prolog, looking for an UPDATE_VRSAVE instruction.  If we find it,
795   // process it.
796   if (!isSVR4ABI)
797     for (unsigned i = 0; MBBI != MBB.end(); ++i, ++MBBI) {
798       if (MBBI->getOpcode() == PPC::UPDATE_VRSAVE) {
799         HandleVRSaveUpdate(*MBBI, TII);
800         break;
801       }
802     }
803 
804   // Move MBBI back to the beginning of the prologue block.
805   MBBI = MBB.begin();
806 
807   // Work out frame sizes.
808   unsigned FrameSize = determineFrameLayoutAndUpdate(MF);
809   int NegFrameSize = -FrameSize;
810   if (!isInt<32>(NegFrameSize))
811     llvm_unreachable("Unhandled stack size!");
812 
813   if (MFI.isFrameAddressTaken())
814     replaceFPWithRealFP(MF);
815 
816   // Check if the link register (LR) must be saved.
817   PPCFunctionInfo *FI = MF.getInfo<PPCFunctionInfo>();
818   bool MustSaveLR = FI->mustSaveLR();
819   bool MustSaveTOC = FI->mustSaveTOC();
820   const SmallVectorImpl<unsigned> &MustSaveCRs = FI->getMustSaveCRs();
821   bool MustSaveCR = !MustSaveCRs.empty();
822   // Do we have a frame pointer and/or base pointer for this function?
823   bool HasFP = hasFP(MF);
824   bool HasBP = RegInfo->hasBasePointer(MF);
825   bool HasRedZone = isPPC64 || !isSVR4ABI;
826 
827   unsigned SPReg       = isPPC64 ? PPC::X1  : PPC::R1;
828   unsigned BPReg       = RegInfo->getBaseRegister(MF);
829   unsigned FPReg       = isPPC64 ? PPC::X31 : PPC::R31;
830   unsigned LRReg       = isPPC64 ? PPC::LR8 : PPC::LR;
831   unsigned TOCReg      = isPPC64 ? PPC::X2 :  PPC::R2;
832   unsigned ScratchReg  = 0;
833   unsigned TempReg     = isPPC64 ? PPC::X12 : PPC::R12; // another scratch reg
834   //  ...(R12/X12 is volatile in both Darwin & SVR4, & can't be a function arg.)
835   const MCInstrDesc& MFLRInst = TII.get(isPPC64 ? PPC::MFLR8
836                                                 : PPC::MFLR );
837   const MCInstrDesc& StoreInst = TII.get(isPPC64 ? PPC::STD
838                                                  : PPC::STW );
839   const MCInstrDesc& StoreUpdtInst = TII.get(isPPC64 ? PPC::STDU
840                                                      : PPC::STWU );
841   const MCInstrDesc& StoreUpdtIdxInst = TII.get(isPPC64 ? PPC::STDUX
842                                                         : PPC::STWUX);
843   const MCInstrDesc& LoadImmShiftedInst = TII.get(isPPC64 ? PPC::LIS8
844                                                           : PPC::LIS );
845   const MCInstrDesc& OrImmInst = TII.get(isPPC64 ? PPC::ORI8
846                                                  : PPC::ORI );
847   const MCInstrDesc& OrInst = TII.get(isPPC64 ? PPC::OR8
848                                               : PPC::OR );
849   const MCInstrDesc& SubtractCarryingInst = TII.get(isPPC64 ? PPC::SUBFC8
850                                                             : PPC::SUBFC);
851   const MCInstrDesc& SubtractImmCarryingInst = TII.get(isPPC64 ? PPC::SUBFIC8
852                                                                : PPC::SUBFIC);
853 
854   // Regarding this assert: Even though LR is saved in the caller's frame (i.e.,
855   // LROffset is positive), that slot is callee-owned. Because PPC32 SVR4 has no
856   // Red Zone, an asynchronous event (a form of "callee") could claim a frame &
857   // overwrite it, so PPC32 SVR4 must claim at least a minimal frame to save LR.
858   assert((isPPC64 || !isSVR4ABI || !(!FrameSize && (MustSaveLR || HasFP))) &&
859          "FrameSize must be >0 to save/restore the FP or LR for 32-bit SVR4.");
860 
861   // Using the same bool variable as below to suppress compiler warnings.
862   bool SingleScratchReg =
863     findScratchRegister(&MBB, false, twoUniqueScratchRegsRequired(&MBB),
864                         &ScratchReg, &TempReg);
865   assert(SingleScratchReg &&
866          "Required number of registers not available in this block");
867 
868   SingleScratchReg = ScratchReg == TempReg;
869 
870   int LROffset = getReturnSaveOffset();
871 
872   int FPOffset = 0;
873   if (HasFP) {
874     if (isSVR4ABI) {
875       MachineFrameInfo &MFI = MF.getFrameInfo();
876       int FPIndex = FI->getFramePointerSaveIndex();
877       assert(FPIndex && "No Frame Pointer Save Slot!");
878       FPOffset = MFI.getObjectOffset(FPIndex);
879     } else {
880       FPOffset = getFramePointerSaveOffset();
881     }
882   }
883 
884   int BPOffset = 0;
885   if (HasBP) {
886     if (isSVR4ABI) {
887       MachineFrameInfo &MFI = MF.getFrameInfo();
888       int BPIndex = FI->getBasePointerSaveIndex();
889       assert(BPIndex && "No Base Pointer Save Slot!");
890       BPOffset = MFI.getObjectOffset(BPIndex);
891     } else {
892       BPOffset = getBasePointerSaveOffset();
893     }
894   }
895 
896   int PBPOffset = 0;
897   if (FI->usesPICBase()) {
898     MachineFrameInfo &MFI = MF.getFrameInfo();
899     int PBPIndex = FI->getPICBasePointerSaveIndex();
900     assert(PBPIndex && "No PIC Base Pointer Save Slot!");
901     PBPOffset = MFI.getObjectOffset(PBPIndex);
902   }
903 
904   // Get stack alignments.
905   unsigned MaxAlign = MFI.getMaxAlignment();
906   if (HasBP && MaxAlign > 1)
907     assert(isPowerOf2_32(MaxAlign) && isInt<16>(MaxAlign) &&
908            "Invalid alignment!");
909 
910   // Frames of 32KB & larger require special handling because they cannot be
911   // indexed into with a simple STDU/STWU/STD/STW immediate offset operand.
912   bool isLargeFrame = !isInt<16>(NegFrameSize);
913 
914   assert((isPPC64 || !MustSaveCR) &&
915          "Prologue CR saving supported only in 64-bit mode");
916 
917   // Check if we can move the stack update instruction (stdu) down the prologue
918   // past the callee saves. Hopefully this will avoid the situation where the
919   // saves are waiting for the update on the store with update to complete.
920   MachineBasicBlock::iterator StackUpdateLoc = MBBI;
921   bool MovingStackUpdateDown = false;
922 
923   // Check if we can move the stack update.
924   if (stackUpdateCanBeMoved(MF)) {
925     const std::vector<CalleeSavedInfo> &Info = MFI.getCalleeSavedInfo();
926     for (CalleeSavedInfo CSI : Info) {
927       int FrIdx = CSI.getFrameIdx();
928       // If the frame index is not negative the callee saved info belongs to a
929       // stack object that is not a fixed stack object. We ignore non-fixed
930       // stack objects because we won't move the stack update pointer past them.
931       if (FrIdx >= 0)
932         continue;
933 
934       if (MFI.isFixedObjectIndex(FrIdx) && MFI.getObjectOffset(FrIdx) < 0) {
935         StackUpdateLoc++;
936         MovingStackUpdateDown = true;
937       } else {
938         // We need all of the Frame Indices to meet these conditions.
939         // If they do not, abort the whole operation.
940         StackUpdateLoc = MBBI;
941         MovingStackUpdateDown = false;
942         break;
943       }
944     }
945 
946     // If the operation was not aborted then update the object offset.
947     if (MovingStackUpdateDown) {
948       for (CalleeSavedInfo CSI : Info) {
949         int FrIdx = CSI.getFrameIdx();
950         if (FrIdx < 0)
951           MFI.setObjectOffset(FrIdx, MFI.getObjectOffset(FrIdx) + NegFrameSize);
952       }
953     }
954   }
955 
956   // If we need to spill the CR and the LR but we don't have two separate
957   // registers available, we must spill them one at a time
958   if (MustSaveCR && SingleScratchReg && MustSaveLR) {
959     // In the ELFv2 ABI, we are not required to save all CR fields.
960     // If only one or two CR fields are clobbered, it is more efficient to use
961     // mfocrf to selectively save just those fields, because mfocrf has short
962     // latency compares to mfcr.
963     unsigned MfcrOpcode = PPC::MFCR8;
964     unsigned CrState = RegState::ImplicitKill;
965     if (isELFv2ABI && MustSaveCRs.size() == 1) {
966       MfcrOpcode = PPC::MFOCRF8;
967       CrState = RegState::Kill;
968     }
969     MachineInstrBuilder MIB =
970       BuildMI(MBB, MBBI, dl, TII.get(MfcrOpcode), TempReg);
971     for (unsigned i = 0, e = MustSaveCRs.size(); i != e; ++i)
972       MIB.addReg(MustSaveCRs[i], CrState);
973     BuildMI(MBB, MBBI, dl, TII.get(PPC::STW8))
974       .addReg(TempReg, getKillRegState(true))
975       .addImm(getCRSaveOffset())
976       .addReg(SPReg);
977   }
978 
979   if (MustSaveLR)
980     BuildMI(MBB, MBBI, dl, MFLRInst, ScratchReg);
981 
982   if (MustSaveCR &&
983       !(SingleScratchReg && MustSaveLR)) { // will only occur for PPC64
984     // In the ELFv2 ABI, we are not required to save all CR fields.
985     // If only one or two CR fields are clobbered, it is more efficient to use
986     // mfocrf to selectively save just those fields, because mfocrf has short
987     // latency compares to mfcr.
988     unsigned MfcrOpcode = PPC::MFCR8;
989     unsigned CrState = RegState::ImplicitKill;
990     if (isELFv2ABI && MustSaveCRs.size() == 1) {
991       MfcrOpcode = PPC::MFOCRF8;
992       CrState = RegState::Kill;
993     }
994     MachineInstrBuilder MIB =
995       BuildMI(MBB, MBBI, dl, TII.get(MfcrOpcode), TempReg);
996     for (unsigned i = 0, e = MustSaveCRs.size(); i != e; ++i)
997       MIB.addReg(MustSaveCRs[i], CrState);
998   }
999 
1000   if (HasRedZone) {
1001     if (HasFP)
1002       BuildMI(MBB, MBBI, dl, StoreInst)
1003         .addReg(FPReg)
1004         .addImm(FPOffset)
1005         .addReg(SPReg);
1006     if (FI->usesPICBase())
1007       BuildMI(MBB, MBBI, dl, StoreInst)
1008         .addReg(PPC::R30)
1009         .addImm(PBPOffset)
1010         .addReg(SPReg);
1011     if (HasBP)
1012       BuildMI(MBB, MBBI, dl, StoreInst)
1013         .addReg(BPReg)
1014         .addImm(BPOffset)
1015         .addReg(SPReg);
1016   }
1017 
1018   if (MustSaveLR)
1019     BuildMI(MBB, StackUpdateLoc, dl, StoreInst)
1020       .addReg(ScratchReg, getKillRegState(true))
1021       .addImm(LROffset)
1022       .addReg(SPReg);
1023 
1024   if (MustSaveCR &&
1025       !(SingleScratchReg && MustSaveLR)) { // will only occur for PPC64
1026     assert(HasRedZone && "A red zone is always available on PPC64");
1027     BuildMI(MBB, MBBI, dl, TII.get(PPC::STW8))
1028       .addReg(TempReg, getKillRegState(true))
1029       .addImm(getCRSaveOffset())
1030       .addReg(SPReg);
1031   }
1032 
1033   // Skip the rest if this is a leaf function & all spills fit in the Red Zone.
1034   if (!FrameSize)
1035     return;
1036 
1037   // Adjust stack pointer: r1 += NegFrameSize.
1038   // If there is a preferred stack alignment, align R1 now
1039 
1040   if (HasBP && HasRedZone) {
1041     // Save a copy of r1 as the base pointer.
1042     BuildMI(MBB, MBBI, dl, OrInst, BPReg)
1043       .addReg(SPReg)
1044       .addReg(SPReg);
1045   }
1046 
1047   // Have we generated a STUX instruction to claim stack frame? If so,
1048   // the negated frame size will be placed in ScratchReg.
1049   bool HasSTUX = false;
1050 
1051   // This condition must be kept in sync with canUseAsPrologue.
1052   if (HasBP && MaxAlign > 1) {
1053     if (isPPC64)
1054       BuildMI(MBB, MBBI, dl, TII.get(PPC::RLDICL), ScratchReg)
1055         .addReg(SPReg)
1056         .addImm(0)
1057         .addImm(64 - Log2_32(MaxAlign));
1058     else // PPC32...
1059       BuildMI(MBB, MBBI, dl, TII.get(PPC::RLWINM), ScratchReg)
1060         .addReg(SPReg)
1061         .addImm(0)
1062         .addImm(32 - Log2_32(MaxAlign))
1063         .addImm(31);
1064     if (!isLargeFrame) {
1065       BuildMI(MBB, MBBI, dl, SubtractImmCarryingInst, ScratchReg)
1066         .addReg(ScratchReg, RegState::Kill)
1067         .addImm(NegFrameSize);
1068     } else {
1069       assert(!SingleScratchReg && "Only a single scratch reg available");
1070       BuildMI(MBB, MBBI, dl, LoadImmShiftedInst, TempReg)
1071         .addImm(NegFrameSize >> 16);
1072       BuildMI(MBB, MBBI, dl, OrImmInst, TempReg)
1073         .addReg(TempReg, RegState::Kill)
1074         .addImm(NegFrameSize & 0xFFFF);
1075       BuildMI(MBB, MBBI, dl, SubtractCarryingInst, ScratchReg)
1076         .addReg(ScratchReg, RegState::Kill)
1077         .addReg(TempReg, RegState::Kill);
1078     }
1079 
1080     BuildMI(MBB, MBBI, dl, StoreUpdtIdxInst, SPReg)
1081       .addReg(SPReg, RegState::Kill)
1082       .addReg(SPReg)
1083       .addReg(ScratchReg);
1084     HasSTUX = true;
1085 
1086   } else if (!isLargeFrame) {
1087     BuildMI(MBB, StackUpdateLoc, dl, StoreUpdtInst, SPReg)
1088       .addReg(SPReg)
1089       .addImm(NegFrameSize)
1090       .addReg(SPReg);
1091 
1092   } else {
1093     BuildMI(MBB, MBBI, dl, LoadImmShiftedInst, ScratchReg)
1094       .addImm(NegFrameSize >> 16);
1095     BuildMI(MBB, MBBI, dl, OrImmInst, ScratchReg)
1096       .addReg(ScratchReg, RegState::Kill)
1097       .addImm(NegFrameSize & 0xFFFF);
1098     BuildMI(MBB, MBBI, dl, StoreUpdtIdxInst, SPReg)
1099       .addReg(SPReg, RegState::Kill)
1100       .addReg(SPReg)
1101       .addReg(ScratchReg);
1102     HasSTUX = true;
1103   }
1104 
1105   // Save the TOC register after the stack pointer update if a prologue TOC
1106   // save is required for the function.
1107   if (MustSaveTOC) {
1108     assert(isELFv2ABI && "TOC saves in the prologue only supported on ELFv2");
1109     BuildMI(MBB, StackUpdateLoc, dl, TII.get(PPC::STD))
1110       .addReg(TOCReg, getKillRegState(true))
1111       .addImm(TOCSaveOffset)
1112       .addReg(SPReg);
1113   }
1114 
1115   if (!HasRedZone) {
1116     assert(!isPPC64 && "A red zone is always available on PPC64");
1117     if (HasSTUX) {
1118       // The negated frame size is in ScratchReg, and the SPReg has been
1119       // decremented by the frame size: SPReg = old SPReg + ScratchReg.
1120       // Since FPOffset, PBPOffset, etc. are relative to the beginning of
1121       // the stack frame (i.e. the old SP), ideally, we would put the old
1122       // SP into a register and use it as the base for the stores. The
1123       // problem is that the only available register may be ScratchReg,
1124       // which could be R0, and R0 cannot be used as a base address.
1125 
1126       // First, set ScratchReg to the old SP. This may need to be modified
1127       // later.
1128       BuildMI(MBB, MBBI, dl, TII.get(PPC::SUBF), ScratchReg)
1129         .addReg(ScratchReg, RegState::Kill)
1130         .addReg(SPReg);
1131 
1132       if (ScratchReg == PPC::R0) {
1133         // R0 cannot be used as a base register, but it can be used as an
1134         // index in a store-indexed.
1135         int LastOffset = 0;
1136         if (HasFP)  {
1137           // R0 += (FPOffset-LastOffset).
1138           // Need addic, since addi treats R0 as 0.
1139           BuildMI(MBB, MBBI, dl, TII.get(PPC::ADDIC), ScratchReg)
1140             .addReg(ScratchReg)
1141             .addImm(FPOffset-LastOffset);
1142           LastOffset = FPOffset;
1143           // Store FP into *R0.
1144           BuildMI(MBB, MBBI, dl, TII.get(PPC::STWX))
1145             .addReg(FPReg, RegState::Kill)  // Save FP.
1146             .addReg(PPC::ZERO)
1147             .addReg(ScratchReg);  // This will be the index (R0 is ok here).
1148         }
1149         if (FI->usesPICBase()) {
1150           // R0 += (PBPOffset-LastOffset).
1151           BuildMI(MBB, MBBI, dl, TII.get(PPC::ADDIC), ScratchReg)
1152             .addReg(ScratchReg)
1153             .addImm(PBPOffset-LastOffset);
1154           LastOffset = PBPOffset;
1155           BuildMI(MBB, MBBI, dl, TII.get(PPC::STWX))
1156             .addReg(PPC::R30, RegState::Kill)  // Save PIC base pointer.
1157             .addReg(PPC::ZERO)
1158             .addReg(ScratchReg);  // This will be the index (R0 is ok here).
1159         }
1160         if (HasBP) {
1161           // R0 += (BPOffset-LastOffset).
1162           BuildMI(MBB, MBBI, dl, TII.get(PPC::ADDIC), ScratchReg)
1163             .addReg(ScratchReg)
1164             .addImm(BPOffset-LastOffset);
1165           LastOffset = BPOffset;
1166           BuildMI(MBB, MBBI, dl, TII.get(PPC::STWX))
1167             .addReg(BPReg, RegState::Kill)  // Save BP.
1168             .addReg(PPC::ZERO)
1169             .addReg(ScratchReg);  // This will be the index (R0 is ok here).
1170           // BP = R0-LastOffset
1171           BuildMI(MBB, MBBI, dl, TII.get(PPC::ADDIC), BPReg)
1172             .addReg(ScratchReg, RegState::Kill)
1173             .addImm(-LastOffset);
1174         }
1175       } else {
1176         // ScratchReg is not R0, so use it as the base register. It is
1177         // already set to the old SP, so we can use the offsets directly.
1178 
1179         // Now that the stack frame has been allocated, save all the necessary
1180         // registers using ScratchReg as the base address.
1181         if (HasFP)
1182           BuildMI(MBB, MBBI, dl, StoreInst)
1183             .addReg(FPReg)
1184             .addImm(FPOffset)
1185             .addReg(ScratchReg);
1186         if (FI->usesPICBase())
1187           BuildMI(MBB, MBBI, dl, StoreInst)
1188             .addReg(PPC::R30)
1189             .addImm(PBPOffset)
1190             .addReg(ScratchReg);
1191         if (HasBP) {
1192           BuildMI(MBB, MBBI, dl, StoreInst)
1193             .addReg(BPReg)
1194             .addImm(BPOffset)
1195             .addReg(ScratchReg);
1196           BuildMI(MBB, MBBI, dl, OrInst, BPReg)
1197             .addReg(ScratchReg, RegState::Kill)
1198             .addReg(ScratchReg);
1199         }
1200       }
1201     } else {
1202       // The frame size is a known 16-bit constant (fitting in the immediate
1203       // field of STWU). To be here we have to be compiling for PPC32.
1204       // Since the SPReg has been decreased by FrameSize, add it back to each
1205       // offset.
1206       if (HasFP)
1207         BuildMI(MBB, MBBI, dl, StoreInst)
1208           .addReg(FPReg)
1209           .addImm(FrameSize + FPOffset)
1210           .addReg(SPReg);
1211       if (FI->usesPICBase())
1212         BuildMI(MBB, MBBI, dl, StoreInst)
1213           .addReg(PPC::R30)
1214           .addImm(FrameSize + PBPOffset)
1215           .addReg(SPReg);
1216       if (HasBP) {
1217         BuildMI(MBB, MBBI, dl, StoreInst)
1218           .addReg(BPReg)
1219           .addImm(FrameSize + BPOffset)
1220           .addReg(SPReg);
1221         BuildMI(MBB, MBBI, dl, TII.get(PPC::ADDI), BPReg)
1222           .addReg(SPReg)
1223           .addImm(FrameSize);
1224       }
1225     }
1226   }
1227 
1228   // Add Call Frame Information for the instructions we generated above.
1229   if (needsCFI) {
1230     unsigned CFIIndex;
1231 
1232     if (HasBP) {
1233       // Define CFA in terms of BP. Do this in preference to using FP/SP,
1234       // because if the stack needed aligning then CFA won't be at a fixed
1235       // offset from FP/SP.
1236       unsigned Reg = MRI->getDwarfRegNum(BPReg, true);
1237       CFIIndex = MF.addFrameInst(
1238           MCCFIInstruction::createDefCfaRegister(nullptr, Reg));
1239     } else {
1240       // Adjust the definition of CFA to account for the change in SP.
1241       assert(NegFrameSize);
1242       CFIIndex = MF.addFrameInst(
1243           MCCFIInstruction::createDefCfaOffset(nullptr, NegFrameSize));
1244     }
1245     BuildMI(MBB, MBBI, dl, TII.get(TargetOpcode::CFI_INSTRUCTION))
1246         .addCFIIndex(CFIIndex);
1247 
1248     if (HasFP) {
1249       // Describe where FP was saved, at a fixed offset from CFA.
1250       unsigned Reg = MRI->getDwarfRegNum(FPReg, true);
1251       CFIIndex = MF.addFrameInst(
1252           MCCFIInstruction::createOffset(nullptr, Reg, FPOffset));
1253       BuildMI(MBB, MBBI, dl, TII.get(TargetOpcode::CFI_INSTRUCTION))
1254           .addCFIIndex(CFIIndex);
1255     }
1256 
1257     if (FI->usesPICBase()) {
1258       // Describe where FP was saved, at a fixed offset from CFA.
1259       unsigned Reg = MRI->getDwarfRegNum(PPC::R30, true);
1260       CFIIndex = MF.addFrameInst(
1261           MCCFIInstruction::createOffset(nullptr, Reg, PBPOffset));
1262       BuildMI(MBB, MBBI, dl, TII.get(TargetOpcode::CFI_INSTRUCTION))
1263           .addCFIIndex(CFIIndex);
1264     }
1265 
1266     if (HasBP) {
1267       // Describe where BP was saved, at a fixed offset from CFA.
1268       unsigned Reg = MRI->getDwarfRegNum(BPReg, true);
1269       CFIIndex = MF.addFrameInst(
1270           MCCFIInstruction::createOffset(nullptr, Reg, BPOffset));
1271       BuildMI(MBB, MBBI, dl, TII.get(TargetOpcode::CFI_INSTRUCTION))
1272           .addCFIIndex(CFIIndex);
1273     }
1274 
1275     if (MustSaveLR) {
1276       // Describe where LR was saved, at a fixed offset from CFA.
1277       unsigned Reg = MRI->getDwarfRegNum(LRReg, true);
1278       CFIIndex = MF.addFrameInst(
1279           MCCFIInstruction::createOffset(nullptr, Reg, LROffset));
1280       BuildMI(MBB, MBBI, dl, TII.get(TargetOpcode::CFI_INSTRUCTION))
1281           .addCFIIndex(CFIIndex);
1282     }
1283   }
1284 
1285   // If there is a frame pointer, copy R1 into R31
1286   if (HasFP) {
1287     BuildMI(MBB, MBBI, dl, OrInst, FPReg)
1288       .addReg(SPReg)
1289       .addReg(SPReg);
1290 
1291     if (!HasBP && needsCFI) {
1292       // Change the definition of CFA from SP+offset to FP+offset, because SP
1293       // will change at every alloca.
1294       unsigned Reg = MRI->getDwarfRegNum(FPReg, true);
1295       unsigned CFIIndex = MF.addFrameInst(
1296           MCCFIInstruction::createDefCfaRegister(nullptr, Reg));
1297 
1298       BuildMI(MBB, MBBI, dl, TII.get(TargetOpcode::CFI_INSTRUCTION))
1299           .addCFIIndex(CFIIndex);
1300     }
1301   }
1302 
1303   if (needsCFI) {
1304     // Describe where callee saved registers were saved, at fixed offsets from
1305     // CFA.
1306     const std::vector<CalleeSavedInfo> &CSI = MFI.getCalleeSavedInfo();
1307     for (unsigned I = 0, E = CSI.size(); I != E; ++I) {
1308       unsigned Reg = CSI[I].getReg();
1309       if (Reg == PPC::LR || Reg == PPC::LR8 || Reg == PPC::RM) continue;
1310 
1311       // This is a bit of a hack: CR2LT, CR2GT, CR2EQ and CR2UN are just
1312       // subregisters of CR2. We just need to emit a move of CR2.
1313       if (PPC::CRBITRCRegClass.contains(Reg))
1314         continue;
1315 
1316       if ((Reg == PPC::X2 || Reg == PPC::R2) && MustSaveTOC)
1317         continue;
1318 
1319       // For SVR4, don't emit a move for the CR spill slot if we haven't
1320       // spilled CRs.
1321       if (isSVR4ABI && (PPC::CR2 <= Reg && Reg <= PPC::CR4)
1322           && !MustSaveCR)
1323         continue;
1324 
1325       // For 64-bit SVR4 when we have spilled CRs, the spill location
1326       // is SP+8, not a frame-relative slot.
1327       if (isSVR4ABI && isPPC64 && (PPC::CR2 <= Reg && Reg <= PPC::CR4)) {
1328         // In the ELFv1 ABI, only CR2 is noted in CFI and stands in for
1329         // the whole CR word.  In the ELFv2 ABI, every CR that was
1330         // actually saved gets its own CFI record.
1331         unsigned CRReg = isELFv2ABI? Reg : (unsigned) PPC::CR2;
1332         unsigned CFIIndex = MF.addFrameInst(MCCFIInstruction::createOffset(
1333             nullptr, MRI->getDwarfRegNum(CRReg, true), getCRSaveOffset()));
1334         BuildMI(MBB, MBBI, dl, TII.get(TargetOpcode::CFI_INSTRUCTION))
1335             .addCFIIndex(CFIIndex);
1336         continue;
1337       }
1338 
1339       if (CSI[I].isSpilledToReg()) {
1340         unsigned SpilledReg = CSI[I].getDstReg();
1341         unsigned CFIRegister = MF.addFrameInst(MCCFIInstruction::createRegister(
1342             nullptr, MRI->getDwarfRegNum(Reg, true),
1343             MRI->getDwarfRegNum(SpilledReg, true)));
1344         BuildMI(MBB, MBBI, dl, TII.get(TargetOpcode::CFI_INSTRUCTION))
1345           .addCFIIndex(CFIRegister);
1346       } else {
1347         int Offset = MFI.getObjectOffset(CSI[I].getFrameIdx());
1348         // We have changed the object offset above but we do not want to change
1349         // the actual offsets in the CFI instruction so we have to undo the
1350         // offset change here.
1351         if (MovingStackUpdateDown)
1352           Offset -= NegFrameSize;
1353 
1354         unsigned CFIIndex = MF.addFrameInst(MCCFIInstruction::createOffset(
1355             nullptr, MRI->getDwarfRegNum(Reg, true), Offset));
1356         BuildMI(MBB, MBBI, dl, TII.get(TargetOpcode::CFI_INSTRUCTION))
1357             .addCFIIndex(CFIIndex);
1358       }
1359     }
1360   }
1361 }
1362 
1363 void PPCFrameLowering::emitEpilogue(MachineFunction &MF,
1364                                     MachineBasicBlock &MBB) const {
1365   MachineBasicBlock::iterator MBBI = MBB.getFirstTerminator();
1366   DebugLoc dl;
1367 
1368   if (MBBI != MBB.end())
1369     dl = MBBI->getDebugLoc();
1370 
1371   const PPCInstrInfo &TII = *Subtarget.getInstrInfo();
1372   const PPCRegisterInfo *RegInfo = Subtarget.getRegisterInfo();
1373 
1374   // Get alignment info so we know how to restore the SP.
1375   const MachineFrameInfo &MFI = MF.getFrameInfo();
1376 
1377   // Get the number of bytes allocated from the FrameInfo.
1378   int FrameSize = MFI.getStackSize();
1379 
1380   // Get processor type.
1381   bool isPPC64 = Subtarget.isPPC64();
1382   // Get the ABI.
1383   bool isSVR4ABI = Subtarget.isSVR4ABI();
1384 
1385   // Check if the link register (LR) has been saved.
1386   PPCFunctionInfo *FI = MF.getInfo<PPCFunctionInfo>();
1387   bool MustSaveLR = FI->mustSaveLR();
1388   const SmallVectorImpl<unsigned> &MustSaveCRs = FI->getMustSaveCRs();
1389   bool MustSaveCR = !MustSaveCRs.empty();
1390   // Do we have a frame pointer and/or base pointer for this function?
1391   bool HasFP = hasFP(MF);
1392   bool HasBP = RegInfo->hasBasePointer(MF);
1393   bool HasRedZone = Subtarget.isPPC64() || !Subtarget.isSVR4ABI();
1394 
1395   unsigned SPReg      = isPPC64 ? PPC::X1  : PPC::R1;
1396   unsigned BPReg      = RegInfo->getBaseRegister(MF);
1397   unsigned FPReg      = isPPC64 ? PPC::X31 : PPC::R31;
1398   unsigned ScratchReg = 0;
1399   unsigned TempReg     = isPPC64 ? PPC::X12 : PPC::R12; // another scratch reg
1400   const MCInstrDesc& MTLRInst = TII.get( isPPC64 ? PPC::MTLR8
1401                                                  : PPC::MTLR );
1402   const MCInstrDesc& LoadInst = TII.get( isPPC64 ? PPC::LD
1403                                                  : PPC::LWZ );
1404   const MCInstrDesc& LoadImmShiftedInst = TII.get( isPPC64 ? PPC::LIS8
1405                                                            : PPC::LIS );
1406   const MCInstrDesc& OrInst = TII.get(isPPC64 ? PPC::OR8
1407                                               : PPC::OR );
1408   const MCInstrDesc& OrImmInst = TII.get( isPPC64 ? PPC::ORI8
1409                                                   : PPC::ORI );
1410   const MCInstrDesc& AddImmInst = TII.get( isPPC64 ? PPC::ADDI8
1411                                                    : PPC::ADDI );
1412   const MCInstrDesc& AddInst = TII.get( isPPC64 ? PPC::ADD8
1413                                                 : PPC::ADD4 );
1414 
1415   int LROffset = getReturnSaveOffset();
1416 
1417   int FPOffset = 0;
1418 
1419   // Using the same bool variable as below to suppress compiler warnings.
1420   bool SingleScratchReg = findScratchRegister(&MBB, true, false, &ScratchReg,
1421                                               &TempReg);
1422   assert(SingleScratchReg &&
1423          "Could not find an available scratch register");
1424 
1425   SingleScratchReg = ScratchReg == TempReg;
1426 
1427   if (HasFP) {
1428     if (isSVR4ABI) {
1429       int FPIndex = FI->getFramePointerSaveIndex();
1430       assert(FPIndex && "No Frame Pointer Save Slot!");
1431       FPOffset = MFI.getObjectOffset(FPIndex);
1432     } else {
1433       FPOffset = getFramePointerSaveOffset();
1434     }
1435   }
1436 
1437   int BPOffset = 0;
1438   if (HasBP) {
1439     if (isSVR4ABI) {
1440       int BPIndex = FI->getBasePointerSaveIndex();
1441       assert(BPIndex && "No Base Pointer Save Slot!");
1442       BPOffset = MFI.getObjectOffset(BPIndex);
1443     } else {
1444       BPOffset = getBasePointerSaveOffset();
1445     }
1446   }
1447 
1448   int PBPOffset = 0;
1449   if (FI->usesPICBase()) {
1450     int PBPIndex = FI->getPICBasePointerSaveIndex();
1451     assert(PBPIndex && "No PIC Base Pointer Save Slot!");
1452     PBPOffset = MFI.getObjectOffset(PBPIndex);
1453   }
1454 
1455   bool IsReturnBlock = (MBBI != MBB.end() && MBBI->isReturn());
1456 
1457   if (IsReturnBlock) {
1458     unsigned RetOpcode = MBBI->getOpcode();
1459     bool UsesTCRet =  RetOpcode == PPC::TCRETURNri ||
1460                       RetOpcode == PPC::TCRETURNdi ||
1461                       RetOpcode == PPC::TCRETURNai ||
1462                       RetOpcode == PPC::TCRETURNri8 ||
1463                       RetOpcode == PPC::TCRETURNdi8 ||
1464                       RetOpcode == PPC::TCRETURNai8;
1465 
1466     if (UsesTCRet) {
1467       int MaxTCRetDelta = FI->getTailCallSPDelta();
1468       MachineOperand &StackAdjust = MBBI->getOperand(1);
1469       assert(StackAdjust.isImm() && "Expecting immediate value.");
1470       // Adjust stack pointer.
1471       int StackAdj = StackAdjust.getImm();
1472       int Delta = StackAdj - MaxTCRetDelta;
1473       assert((Delta >= 0) && "Delta must be positive");
1474       if (MaxTCRetDelta>0)
1475         FrameSize += (StackAdj +Delta);
1476       else
1477         FrameSize += StackAdj;
1478     }
1479   }
1480 
1481   // Frames of 32KB & larger require special handling because they cannot be
1482   // indexed into with a simple LD/LWZ immediate offset operand.
1483   bool isLargeFrame = !isInt<16>(FrameSize);
1484 
1485   // On targets without red zone, the SP needs to be restored last, so that
1486   // all live contents of the stack frame are upwards of the SP. This means
1487   // that we cannot restore SP just now, since there may be more registers
1488   // to restore from the stack frame (e.g. R31). If the frame size is not
1489   // a simple immediate value, we will need a spare register to hold the
1490   // restored SP. If the frame size is known and small, we can simply adjust
1491   // the offsets of the registers to be restored, and still use SP to restore
1492   // them. In such case, the final update of SP will be to add the frame
1493   // size to it.
1494   // To simplify the code, set RBReg to the base register used to restore
1495   // values from the stack, and set SPAdd to the value that needs to be added
1496   // to the SP at the end. The default values are as if red zone was present.
1497   unsigned RBReg = SPReg;
1498   unsigned SPAdd = 0;
1499 
1500   // Check if we can move the stack update instruction up the epilogue
1501   // past the callee saves. This will allow the move to LR instruction
1502   // to be executed before the restores of the callee saves which means
1503   // that the callee saves can hide the latency from the MTLR instrcution.
1504   MachineBasicBlock::iterator StackUpdateLoc = MBBI;
1505   if (stackUpdateCanBeMoved(MF)) {
1506     const std::vector<CalleeSavedInfo> & Info = MFI.getCalleeSavedInfo();
1507     for (CalleeSavedInfo CSI : Info) {
1508       int FrIdx = CSI.getFrameIdx();
1509       // If the frame index is not negative the callee saved info belongs to a
1510       // stack object that is not a fixed stack object. We ignore non-fixed
1511       // stack objects because we won't move the update of the stack pointer
1512       // past them.
1513       if (FrIdx >= 0)
1514         continue;
1515 
1516       if (MFI.isFixedObjectIndex(FrIdx) && MFI.getObjectOffset(FrIdx) < 0)
1517         StackUpdateLoc--;
1518       else {
1519         // Abort the operation as we can't update all CSR restores.
1520         StackUpdateLoc = MBBI;
1521         break;
1522       }
1523     }
1524   }
1525 
1526   if (FrameSize) {
1527     // In the prologue, the loaded (or persistent) stack pointer value is
1528     // offset by the STDU/STDUX/STWU/STWUX instruction. For targets with red
1529     // zone add this offset back now.
1530 
1531     // If this function contained a fastcc call and GuaranteedTailCallOpt is
1532     // enabled (=> hasFastCall()==true) the fastcc call might contain a tail
1533     // call which invalidates the stack pointer value in SP(0). So we use the
1534     // value of R31 in this case.
1535     if (FI->hasFastCall()) {
1536       assert(HasFP && "Expecting a valid frame pointer.");
1537       if (!HasRedZone)
1538         RBReg = FPReg;
1539       if (!isLargeFrame) {
1540         BuildMI(MBB, MBBI, dl, AddImmInst, RBReg)
1541           .addReg(FPReg).addImm(FrameSize);
1542       } else {
1543         BuildMI(MBB, MBBI, dl, LoadImmShiftedInst, ScratchReg)
1544           .addImm(FrameSize >> 16);
1545         BuildMI(MBB, MBBI, dl, OrImmInst, ScratchReg)
1546           .addReg(ScratchReg, RegState::Kill)
1547           .addImm(FrameSize & 0xFFFF);
1548         BuildMI(MBB, MBBI, dl, AddInst)
1549           .addReg(RBReg)
1550           .addReg(FPReg)
1551           .addReg(ScratchReg);
1552       }
1553     } else if (!isLargeFrame && !HasBP && !MFI.hasVarSizedObjects()) {
1554       if (HasRedZone) {
1555         BuildMI(MBB, StackUpdateLoc, dl, AddImmInst, SPReg)
1556           .addReg(SPReg)
1557           .addImm(FrameSize);
1558       } else {
1559         // Make sure that adding FrameSize will not overflow the max offset
1560         // size.
1561         assert(FPOffset <= 0 && BPOffset <= 0 && PBPOffset <= 0 &&
1562                "Local offsets should be negative");
1563         SPAdd = FrameSize;
1564         FPOffset += FrameSize;
1565         BPOffset += FrameSize;
1566         PBPOffset += FrameSize;
1567       }
1568     } else {
1569       // We don't want to use ScratchReg as a base register, because it
1570       // could happen to be R0. Use FP instead, but make sure to preserve it.
1571       if (!HasRedZone) {
1572         // If FP is not saved, copy it to ScratchReg.
1573         if (!HasFP)
1574           BuildMI(MBB, MBBI, dl, OrInst, ScratchReg)
1575             .addReg(FPReg)
1576             .addReg(FPReg);
1577         RBReg = FPReg;
1578       }
1579       BuildMI(MBB, StackUpdateLoc, dl, LoadInst, RBReg)
1580         .addImm(0)
1581         .addReg(SPReg);
1582     }
1583   }
1584   assert(RBReg != ScratchReg && "Should have avoided ScratchReg");
1585   // If there is no red zone, ScratchReg may be needed for holding a useful
1586   // value (although not the base register). Make sure it is not overwritten
1587   // too early.
1588 
1589   assert((isPPC64 || !MustSaveCR) &&
1590          "Epilogue CR restoring supported only in 64-bit mode");
1591 
1592   // If we need to restore both the LR and the CR and we only have one
1593   // available scratch register, we must do them one at a time.
1594   if (MustSaveCR && SingleScratchReg && MustSaveLR) {
1595     // Here TempReg == ScratchReg, and in the absence of red zone ScratchReg
1596     // is live here.
1597     assert(HasRedZone && "Expecting red zone");
1598     BuildMI(MBB, MBBI, dl, TII.get(PPC::LWZ8), TempReg)
1599       .addImm(getCRSaveOffset())
1600       .addReg(SPReg);
1601     for (unsigned i = 0, e = MustSaveCRs.size(); i != e; ++i)
1602       BuildMI(MBB, MBBI, dl, TII.get(PPC::MTOCRF8), MustSaveCRs[i])
1603         .addReg(TempReg, getKillRegState(i == e-1));
1604   }
1605 
1606   // Delay restoring of the LR if ScratchReg is needed. This is ok, since
1607   // LR is stored in the caller's stack frame. ScratchReg will be needed
1608   // if RBReg is anything other than SP. We shouldn't use ScratchReg as
1609   // a base register anyway, because it may happen to be R0.
1610   bool LoadedLR = false;
1611   if (MustSaveLR && RBReg == SPReg && isInt<16>(LROffset+SPAdd)) {
1612     BuildMI(MBB, StackUpdateLoc, dl, LoadInst, ScratchReg)
1613       .addImm(LROffset+SPAdd)
1614       .addReg(RBReg);
1615     LoadedLR = true;
1616   }
1617 
1618   if (MustSaveCR && !(SingleScratchReg && MustSaveLR)) {
1619     // This will only occur for PPC64.
1620     assert(isPPC64 && "Expecting 64-bit mode");
1621     assert(RBReg == SPReg && "Should be using SP as a base register");
1622     BuildMI(MBB, MBBI, dl, TII.get(PPC::LWZ8), TempReg)
1623       .addImm(getCRSaveOffset())
1624       .addReg(RBReg);
1625   }
1626 
1627   if (HasFP) {
1628     // If there is red zone, restore FP directly, since SP has already been
1629     // restored. Otherwise, restore the value of FP into ScratchReg.
1630     if (HasRedZone || RBReg == SPReg)
1631       BuildMI(MBB, MBBI, dl, LoadInst, FPReg)
1632         .addImm(FPOffset)
1633         .addReg(SPReg);
1634     else
1635       BuildMI(MBB, MBBI, dl, LoadInst, ScratchReg)
1636         .addImm(FPOffset)
1637         .addReg(RBReg);
1638   }
1639 
1640   if (FI->usesPICBase())
1641     BuildMI(MBB, MBBI, dl, LoadInst, PPC::R30)
1642       .addImm(PBPOffset)
1643       .addReg(RBReg);
1644 
1645   if (HasBP)
1646     BuildMI(MBB, MBBI, dl, LoadInst, BPReg)
1647       .addImm(BPOffset)
1648       .addReg(RBReg);
1649 
1650   // There is nothing more to be loaded from the stack, so now we can
1651   // restore SP: SP = RBReg + SPAdd.
1652   if (RBReg != SPReg || SPAdd != 0) {
1653     assert(!HasRedZone && "This should not happen with red zone");
1654     // If SPAdd is 0, generate a copy.
1655     if (SPAdd == 0)
1656       BuildMI(MBB, MBBI, dl, OrInst, SPReg)
1657         .addReg(RBReg)
1658         .addReg(RBReg);
1659     else
1660       BuildMI(MBB, MBBI, dl, AddImmInst, SPReg)
1661         .addReg(RBReg)
1662         .addImm(SPAdd);
1663 
1664     assert(RBReg != ScratchReg && "Should be using FP or SP as base register");
1665     if (RBReg == FPReg)
1666       BuildMI(MBB, MBBI, dl, OrInst, FPReg)
1667         .addReg(ScratchReg)
1668         .addReg(ScratchReg);
1669 
1670     // Now load the LR from the caller's stack frame.
1671     if (MustSaveLR && !LoadedLR)
1672       BuildMI(MBB, MBBI, dl, LoadInst, ScratchReg)
1673         .addImm(LROffset)
1674         .addReg(SPReg);
1675   }
1676 
1677   if (MustSaveCR &&
1678       !(SingleScratchReg && MustSaveLR)) // will only occur for PPC64
1679     for (unsigned i = 0, e = MustSaveCRs.size(); i != e; ++i)
1680       BuildMI(MBB, MBBI, dl, TII.get(PPC::MTOCRF8), MustSaveCRs[i])
1681         .addReg(TempReg, getKillRegState(i == e-1));
1682 
1683   if (MustSaveLR)
1684     BuildMI(MBB, StackUpdateLoc, dl, MTLRInst).addReg(ScratchReg);
1685 
1686   // Callee pop calling convention. Pop parameter/linkage area. Used for tail
1687   // call optimization
1688   if (IsReturnBlock) {
1689     unsigned RetOpcode = MBBI->getOpcode();
1690     if (MF.getTarget().Options.GuaranteedTailCallOpt &&
1691         (RetOpcode == PPC::BLR || RetOpcode == PPC::BLR8) &&
1692         MF.getFunction().getCallingConv() == CallingConv::Fast) {
1693       PPCFunctionInfo *FI = MF.getInfo<PPCFunctionInfo>();
1694       unsigned CallerAllocatedAmt = FI->getMinReservedArea();
1695 
1696       if (CallerAllocatedAmt && isInt<16>(CallerAllocatedAmt)) {
1697         BuildMI(MBB, MBBI, dl, AddImmInst, SPReg)
1698           .addReg(SPReg).addImm(CallerAllocatedAmt);
1699       } else {
1700         BuildMI(MBB, MBBI, dl, LoadImmShiftedInst, ScratchReg)
1701           .addImm(CallerAllocatedAmt >> 16);
1702         BuildMI(MBB, MBBI, dl, OrImmInst, ScratchReg)
1703           .addReg(ScratchReg, RegState::Kill)
1704           .addImm(CallerAllocatedAmt & 0xFFFF);
1705         BuildMI(MBB, MBBI, dl, AddInst)
1706           .addReg(SPReg)
1707           .addReg(FPReg)
1708           .addReg(ScratchReg);
1709       }
1710     } else {
1711       createTailCallBranchInstr(MBB);
1712     }
1713   }
1714 }
1715 
1716 void PPCFrameLowering::createTailCallBranchInstr(MachineBasicBlock &MBB) const {
1717   MachineBasicBlock::iterator MBBI = MBB.getFirstTerminator();
1718 
1719   // If we got this far a first terminator should exist.
1720   assert(MBBI != MBB.end() && "Failed to find the first terminator.");
1721 
1722   DebugLoc dl = MBBI->getDebugLoc();
1723   const PPCInstrInfo &TII = *Subtarget.getInstrInfo();
1724 
1725   // Create branch instruction for pseudo tail call return instruction
1726   unsigned RetOpcode = MBBI->getOpcode();
1727   if (RetOpcode == PPC::TCRETURNdi) {
1728     MBBI = MBB.getLastNonDebugInstr();
1729     MachineOperand &JumpTarget = MBBI->getOperand(0);
1730     BuildMI(MBB, MBBI, dl, TII.get(PPC::TAILB)).
1731       addGlobalAddress(JumpTarget.getGlobal(), JumpTarget.getOffset());
1732   } else if (RetOpcode == PPC::TCRETURNri) {
1733     MBBI = MBB.getLastNonDebugInstr();
1734     assert(MBBI->getOperand(0).isReg() && "Expecting register operand.");
1735     BuildMI(MBB, MBBI, dl, TII.get(PPC::TAILBCTR));
1736   } else if (RetOpcode == PPC::TCRETURNai) {
1737     MBBI = MBB.getLastNonDebugInstr();
1738     MachineOperand &JumpTarget = MBBI->getOperand(0);
1739     BuildMI(MBB, MBBI, dl, TII.get(PPC::TAILBA)).addImm(JumpTarget.getImm());
1740   } else if (RetOpcode == PPC::TCRETURNdi8) {
1741     MBBI = MBB.getLastNonDebugInstr();
1742     MachineOperand &JumpTarget = MBBI->getOperand(0);
1743     BuildMI(MBB, MBBI, dl, TII.get(PPC::TAILB8)).
1744       addGlobalAddress(JumpTarget.getGlobal(), JumpTarget.getOffset());
1745   } else if (RetOpcode == PPC::TCRETURNri8) {
1746     MBBI = MBB.getLastNonDebugInstr();
1747     assert(MBBI->getOperand(0).isReg() && "Expecting register operand.");
1748     BuildMI(MBB, MBBI, dl, TII.get(PPC::TAILBCTR8));
1749   } else if (RetOpcode == PPC::TCRETURNai8) {
1750     MBBI = MBB.getLastNonDebugInstr();
1751     MachineOperand &JumpTarget = MBBI->getOperand(0);
1752     BuildMI(MBB, MBBI, dl, TII.get(PPC::TAILBA8)).addImm(JumpTarget.getImm());
1753   }
1754 }
1755 
1756 void PPCFrameLowering::determineCalleeSaves(MachineFunction &MF,
1757                                             BitVector &SavedRegs,
1758                                             RegScavenger *RS) const {
1759   TargetFrameLowering::determineCalleeSaves(MF, SavedRegs, RS);
1760 
1761   const PPCRegisterInfo *RegInfo = Subtarget.getRegisterInfo();
1762 
1763   //  Save and clear the LR state.
1764   PPCFunctionInfo *FI = MF.getInfo<PPCFunctionInfo>();
1765   unsigned LR = RegInfo->getRARegister();
1766   FI->setMustSaveLR(MustSaveLR(MF, LR));
1767   SavedRegs.reset(LR);
1768 
1769   //  Save R31 if necessary
1770   int FPSI = FI->getFramePointerSaveIndex();
1771   bool isPPC64 = Subtarget.isPPC64();
1772   bool isDarwinABI  = Subtarget.isDarwinABI();
1773   MachineFrameInfo &MFI = MF.getFrameInfo();
1774 
1775   // If the frame pointer save index hasn't been defined yet.
1776   if (!FPSI && needsFP(MF)) {
1777     // Find out what the fix offset of the frame pointer save area.
1778     int FPOffset = getFramePointerSaveOffset();
1779     // Allocate the frame index for frame pointer save area.
1780     FPSI = MFI.CreateFixedObject(isPPC64? 8 : 4, FPOffset, true);
1781     // Save the result.
1782     FI->setFramePointerSaveIndex(FPSI);
1783   }
1784 
1785   int BPSI = FI->getBasePointerSaveIndex();
1786   if (!BPSI && RegInfo->hasBasePointer(MF)) {
1787     int BPOffset = getBasePointerSaveOffset();
1788     // Allocate the frame index for the base pointer save area.
1789     BPSI = MFI.CreateFixedObject(isPPC64? 8 : 4, BPOffset, true);
1790     // Save the result.
1791     FI->setBasePointerSaveIndex(BPSI);
1792   }
1793 
1794   // Reserve stack space for the PIC Base register (R30).
1795   // Only used in SVR4 32-bit.
1796   if (FI->usesPICBase()) {
1797     int PBPSI = MFI.CreateFixedObject(4, -8, true);
1798     FI->setPICBasePointerSaveIndex(PBPSI);
1799   }
1800 
1801   // Make sure we don't explicitly spill r31, because, for example, we have
1802   // some inline asm which explicitly clobbers it, when we otherwise have a
1803   // frame pointer and are using r31's spill slot for the prologue/epilogue
1804   // code. Same goes for the base pointer and the PIC base register.
1805   if (needsFP(MF))
1806     SavedRegs.reset(isPPC64 ? PPC::X31 : PPC::R31);
1807   if (RegInfo->hasBasePointer(MF))
1808     SavedRegs.reset(RegInfo->getBaseRegister(MF));
1809   if (FI->usesPICBase())
1810     SavedRegs.reset(PPC::R30);
1811 
1812   // Reserve stack space to move the linkage area to in case of a tail call.
1813   int TCSPDelta = 0;
1814   if (MF.getTarget().Options.GuaranteedTailCallOpt &&
1815       (TCSPDelta = FI->getTailCallSPDelta()) < 0) {
1816     MFI.CreateFixedObject(-1 * TCSPDelta, TCSPDelta, true);
1817   }
1818 
1819   // For 32-bit SVR4, allocate the nonvolatile CR spill slot iff the
1820   // function uses CR 2, 3, or 4.
1821   if (!isPPC64 && !isDarwinABI &&
1822       (SavedRegs.test(PPC::CR2) ||
1823        SavedRegs.test(PPC::CR3) ||
1824        SavedRegs.test(PPC::CR4))) {
1825     int FrameIdx = MFI.CreateFixedObject((uint64_t)4, (int64_t)-4, true);
1826     FI->setCRSpillFrameIndex(FrameIdx);
1827   }
1828 }
1829 
1830 void PPCFrameLowering::processFunctionBeforeFrameFinalized(MachineFunction &MF,
1831                                                        RegScavenger *RS) const {
1832   // Early exit if not using the SVR4 ABI.
1833   if (!Subtarget.isSVR4ABI()) {
1834     addScavengingSpillSlot(MF, RS);
1835     return;
1836   }
1837 
1838   // Get callee saved register information.
1839   MachineFrameInfo &MFI = MF.getFrameInfo();
1840   const std::vector<CalleeSavedInfo> &CSI = MFI.getCalleeSavedInfo();
1841 
1842   // If the function is shrink-wrapped, and if the function has a tail call, the
1843   // tail call might not be in the new RestoreBlock, so real branch instruction
1844   // won't be generated by emitEpilogue(), because shrink-wrap has chosen new
1845   // RestoreBlock. So we handle this case here.
1846   if (MFI.getSavePoint() && MFI.hasTailCall()) {
1847     MachineBasicBlock *RestoreBlock = MFI.getRestorePoint();
1848     for (MachineBasicBlock &MBB : MF) {
1849       if (MBB.isReturnBlock() && (&MBB) != RestoreBlock)
1850         createTailCallBranchInstr(MBB);
1851     }
1852   }
1853 
1854   // Early exit if no callee saved registers are modified!
1855   if (CSI.empty() && !needsFP(MF)) {
1856     addScavengingSpillSlot(MF, RS);
1857     return;
1858   }
1859 
1860   unsigned MinGPR = PPC::R31;
1861   unsigned MinG8R = PPC::X31;
1862   unsigned MinFPR = PPC::F31;
1863   unsigned MinVR = Subtarget.hasSPE() ? PPC::S31 : PPC::V31;
1864 
1865   bool HasGPSaveArea = false;
1866   bool HasG8SaveArea = false;
1867   bool HasFPSaveArea = false;
1868   bool HasVRSAVESaveArea = false;
1869   bool HasVRSaveArea = false;
1870 
1871   SmallVector<CalleeSavedInfo, 18> GPRegs;
1872   SmallVector<CalleeSavedInfo, 18> G8Regs;
1873   SmallVector<CalleeSavedInfo, 18> FPRegs;
1874   SmallVector<CalleeSavedInfo, 18> VRegs;
1875 
1876   for (unsigned i = 0, e = CSI.size(); i != e; ++i) {
1877     unsigned Reg = CSI[i].getReg();
1878     assert((!MF.getInfo<PPCFunctionInfo>()->mustSaveTOC() ||
1879             (Reg != PPC::X2 && Reg != PPC::R2)) &&
1880            "Not expecting to try to spill R2 in a function that must save TOC");
1881     if (PPC::GPRCRegClass.contains(Reg) ||
1882         PPC::SPE4RCRegClass.contains(Reg)) {
1883       HasGPSaveArea = true;
1884 
1885       GPRegs.push_back(CSI[i]);
1886 
1887       if (Reg < MinGPR) {
1888         MinGPR = Reg;
1889       }
1890     } else if (PPC::G8RCRegClass.contains(Reg)) {
1891       HasG8SaveArea = true;
1892 
1893       G8Regs.push_back(CSI[i]);
1894 
1895       if (Reg < MinG8R) {
1896         MinG8R = Reg;
1897       }
1898     } else if (PPC::F8RCRegClass.contains(Reg)) {
1899       HasFPSaveArea = true;
1900 
1901       FPRegs.push_back(CSI[i]);
1902 
1903       if (Reg < MinFPR) {
1904         MinFPR = Reg;
1905       }
1906     } else if (PPC::CRBITRCRegClass.contains(Reg) ||
1907                PPC::CRRCRegClass.contains(Reg)) {
1908       ; // do nothing, as we already know whether CRs are spilled
1909     } else if (PPC::VRSAVERCRegClass.contains(Reg)) {
1910       HasVRSAVESaveArea = true;
1911     } else if (PPC::VRRCRegClass.contains(Reg) ||
1912                PPC::SPERCRegClass.contains(Reg)) {
1913       // Altivec and SPE are mutually exclusive, but have the same stack
1914       // alignment requirements, so overload the save area for both cases.
1915       HasVRSaveArea = true;
1916 
1917       VRegs.push_back(CSI[i]);
1918 
1919       if (Reg < MinVR) {
1920         MinVR = Reg;
1921       }
1922     } else {
1923       llvm_unreachable("Unknown RegisterClass!");
1924     }
1925   }
1926 
1927   PPCFunctionInfo *PFI = MF.getInfo<PPCFunctionInfo>();
1928   const TargetRegisterInfo *TRI = Subtarget.getRegisterInfo();
1929 
1930   int64_t LowerBound = 0;
1931 
1932   // Take into account stack space reserved for tail calls.
1933   int TCSPDelta = 0;
1934   if (MF.getTarget().Options.GuaranteedTailCallOpt &&
1935       (TCSPDelta = PFI->getTailCallSPDelta()) < 0) {
1936     LowerBound = TCSPDelta;
1937   }
1938 
1939   // The Floating-point register save area is right below the back chain word
1940   // of the previous stack frame.
1941   if (HasFPSaveArea) {
1942     for (unsigned i = 0, e = FPRegs.size(); i != e; ++i) {
1943       int FI = FPRegs[i].getFrameIdx();
1944 
1945       MFI.setObjectOffset(FI, LowerBound + MFI.getObjectOffset(FI));
1946     }
1947 
1948     LowerBound -= (31 - TRI->getEncodingValue(MinFPR) + 1) * 8;
1949   }
1950 
1951   // Check whether the frame pointer register is allocated. If so, make sure it
1952   // is spilled to the correct offset.
1953   if (needsFP(MF)) {
1954     int FI = PFI->getFramePointerSaveIndex();
1955     assert(FI && "No Frame Pointer Save Slot!");
1956     MFI.setObjectOffset(FI, LowerBound + MFI.getObjectOffset(FI));
1957     // FP is R31/X31, so no need to update MinGPR/MinG8R.
1958     HasGPSaveArea = true;
1959   }
1960 
1961   if (PFI->usesPICBase()) {
1962     int FI = PFI->getPICBasePointerSaveIndex();
1963     assert(FI && "No PIC Base Pointer Save Slot!");
1964     MFI.setObjectOffset(FI, LowerBound + MFI.getObjectOffset(FI));
1965 
1966     MinGPR = std::min<unsigned>(MinGPR, PPC::R30);
1967     HasGPSaveArea = true;
1968   }
1969 
1970   const PPCRegisterInfo *RegInfo = Subtarget.getRegisterInfo();
1971   if (RegInfo->hasBasePointer(MF)) {
1972     int FI = PFI->getBasePointerSaveIndex();
1973     assert(FI && "No Base Pointer Save Slot!");
1974     MFI.setObjectOffset(FI, LowerBound + MFI.getObjectOffset(FI));
1975 
1976     unsigned BP = RegInfo->getBaseRegister(MF);
1977     if (PPC::G8RCRegClass.contains(BP)) {
1978       MinG8R = std::min<unsigned>(MinG8R, BP);
1979       HasG8SaveArea = true;
1980     } else if (PPC::GPRCRegClass.contains(BP)) {
1981       MinGPR = std::min<unsigned>(MinGPR, BP);
1982       HasGPSaveArea = true;
1983     }
1984   }
1985 
1986   // General register save area starts right below the Floating-point
1987   // register save area.
1988   if (HasGPSaveArea || HasG8SaveArea) {
1989     // Move general register save area spill slots down, taking into account
1990     // the size of the Floating-point register save area.
1991     for (unsigned i = 0, e = GPRegs.size(); i != e; ++i) {
1992       if (!GPRegs[i].isSpilledToReg()) {
1993         int FI = GPRegs[i].getFrameIdx();
1994         MFI.setObjectOffset(FI, LowerBound + MFI.getObjectOffset(FI));
1995       }
1996     }
1997 
1998     // Move general register save area spill slots down, taking into account
1999     // the size of the Floating-point register save area.
2000     for (unsigned i = 0, e = G8Regs.size(); i != e; ++i) {
2001       if (!G8Regs[i].isSpilledToReg()) {
2002         int FI = G8Regs[i].getFrameIdx();
2003         MFI.setObjectOffset(FI, LowerBound + MFI.getObjectOffset(FI));
2004       }
2005     }
2006 
2007     unsigned MinReg =
2008       std::min<unsigned>(TRI->getEncodingValue(MinGPR),
2009                          TRI->getEncodingValue(MinG8R));
2010 
2011     if (Subtarget.isPPC64()) {
2012       LowerBound -= (31 - MinReg + 1) * 8;
2013     } else {
2014       LowerBound -= (31 - MinReg + 1) * 4;
2015     }
2016   }
2017 
2018   // For 32-bit only, the CR save area is below the general register
2019   // save area.  For 64-bit SVR4, the CR save area is addressed relative
2020   // to the stack pointer and hence does not need an adjustment here.
2021   // Only CR2 (the first nonvolatile spilled) has an associated frame
2022   // index so that we have a single uniform save area.
2023   if (spillsCR(MF) && !(Subtarget.isPPC64() && Subtarget.isSVR4ABI())) {
2024     // Adjust the frame index of the CR spill slot.
2025     for (unsigned i = 0, e = CSI.size(); i != e; ++i) {
2026       unsigned Reg = CSI[i].getReg();
2027 
2028       if ((Subtarget.isSVR4ABI() && Reg == PPC::CR2)
2029           // Leave Darwin logic as-is.
2030           || (!Subtarget.isSVR4ABI() &&
2031               (PPC::CRBITRCRegClass.contains(Reg) ||
2032                PPC::CRRCRegClass.contains(Reg)))) {
2033         int FI = CSI[i].getFrameIdx();
2034 
2035         MFI.setObjectOffset(FI, LowerBound + MFI.getObjectOffset(FI));
2036       }
2037     }
2038 
2039     LowerBound -= 4; // The CR save area is always 4 bytes long.
2040   }
2041 
2042   if (HasVRSAVESaveArea) {
2043     // FIXME SVR4: Is it actually possible to have multiple elements in CSI
2044     //             which have the VRSAVE register class?
2045     // Adjust the frame index of the VRSAVE spill slot.
2046     for (unsigned i = 0, e = CSI.size(); i != e; ++i) {
2047       unsigned Reg = CSI[i].getReg();
2048 
2049       if (PPC::VRSAVERCRegClass.contains(Reg)) {
2050         int FI = CSI[i].getFrameIdx();
2051 
2052         MFI.setObjectOffset(FI, LowerBound + MFI.getObjectOffset(FI));
2053       }
2054     }
2055 
2056     LowerBound -= 4; // The VRSAVE save area is always 4 bytes long.
2057   }
2058 
2059   // Both Altivec and SPE have the same alignment and padding requirements
2060   // within the stack frame.
2061   if (HasVRSaveArea) {
2062     // Insert alignment padding, we need 16-byte alignment. Note: for positive
2063     // number the alignment formula is : y = (x + (n-1)) & (~(n-1)). But since
2064     // we are using negative number here (the stack grows downward). We should
2065     // use formula : y = x & (~(n-1)). Where x is the size before aligning, n
2066     // is the alignment size ( n = 16 here) and y is the size after aligning.
2067     assert(LowerBound <= 0 && "Expect LowerBound have a non-positive value!");
2068     LowerBound &= ~(15);
2069 
2070     for (unsigned i = 0, e = VRegs.size(); i != e; ++i) {
2071       int FI = VRegs[i].getFrameIdx();
2072 
2073       MFI.setObjectOffset(FI, LowerBound + MFI.getObjectOffset(FI));
2074     }
2075   }
2076 
2077   addScavengingSpillSlot(MF, RS);
2078 }
2079 
2080 void
2081 PPCFrameLowering::addScavengingSpillSlot(MachineFunction &MF,
2082                                          RegScavenger *RS) const {
2083   // Reserve a slot closest to SP or frame pointer if we have a dynalloc or
2084   // a large stack, which will require scavenging a register to materialize a
2085   // large offset.
2086 
2087   // We need to have a scavenger spill slot for spills if the frame size is
2088   // large. In case there is no free register for large-offset addressing,
2089   // this slot is used for the necessary emergency spill. Also, we need the
2090   // slot for dynamic stack allocations.
2091 
2092   // The scavenger might be invoked if the frame offset does not fit into
2093   // the 16-bit immediate. We don't know the complete frame size here
2094   // because we've not yet computed callee-saved register spills or the
2095   // needed alignment padding.
2096   unsigned StackSize = determineFrameLayout(MF, true);
2097   MachineFrameInfo &MFI = MF.getFrameInfo();
2098   if (MFI.hasVarSizedObjects() || spillsCR(MF) || spillsVRSAVE(MF) ||
2099       hasNonRISpills(MF) || (hasSpills(MF) && !isInt<16>(StackSize))) {
2100     const TargetRegisterClass &GPRC = PPC::GPRCRegClass;
2101     const TargetRegisterClass &G8RC = PPC::G8RCRegClass;
2102     const TargetRegisterClass &RC = Subtarget.isPPC64() ? G8RC : GPRC;
2103     const TargetRegisterInfo &TRI = *Subtarget.getRegisterInfo();
2104     unsigned Size = TRI.getSpillSize(RC);
2105     unsigned Align = TRI.getSpillAlignment(RC);
2106     RS->addScavengingFrameIndex(MFI.CreateStackObject(Size, Align, false));
2107 
2108     // Might we have over-aligned allocas?
2109     bool HasAlVars = MFI.hasVarSizedObjects() &&
2110                      MFI.getMaxAlignment() > getStackAlignment();
2111 
2112     // These kinds of spills might need two registers.
2113     if (spillsCR(MF) || spillsVRSAVE(MF) || HasAlVars)
2114       RS->addScavengingFrameIndex(MFI.CreateStackObject(Size, Align, false));
2115 
2116   }
2117 }
2118 
2119 // This function checks if a callee saved gpr can be spilled to a volatile
2120 // vector register. This occurs for leaf functions when the option
2121 // ppc-enable-pe-vector-spills is enabled. If there are any remaining registers
2122 // which were not spilled to vectors, return false so the target independent
2123 // code can handle them by assigning a FrameIdx to a stack slot.
2124 bool PPCFrameLowering::assignCalleeSavedSpillSlots(
2125     MachineFunction &MF, const TargetRegisterInfo *TRI,
2126     std::vector<CalleeSavedInfo> &CSI) const {
2127 
2128   if (CSI.empty())
2129     return true; // Early exit if no callee saved registers are modified!
2130 
2131   // Early exit if cannot spill gprs to volatile vector registers.
2132   MachineFrameInfo &MFI = MF.getFrameInfo();
2133   if (!EnablePEVectorSpills || MFI.hasCalls() || !Subtarget.hasP9Vector())
2134     return false;
2135 
2136   // Build a BitVector of VSRs that can be used for spilling GPRs.
2137   BitVector BVAllocatable = TRI->getAllocatableSet(MF);
2138   BitVector BVCalleeSaved(TRI->getNumRegs());
2139   const PPCRegisterInfo *RegInfo = Subtarget.getRegisterInfo();
2140   const MCPhysReg *CSRegs = RegInfo->getCalleeSavedRegs(&MF);
2141   for (unsigned i = 0; CSRegs[i]; ++i)
2142     BVCalleeSaved.set(CSRegs[i]);
2143 
2144   for (unsigned Reg : BVAllocatable.set_bits()) {
2145     // Set to 0 if the register is not a volatile VF/F8 register, or if it is
2146     // used in the function.
2147     if (BVCalleeSaved[Reg] ||
2148         (!PPC::F8RCRegClass.contains(Reg) &&
2149          !PPC::VFRCRegClass.contains(Reg)) ||
2150         (MF.getRegInfo().isPhysRegUsed(Reg)))
2151       BVAllocatable.reset(Reg);
2152   }
2153 
2154   bool AllSpilledToReg = true;
2155   for (auto &CS : CSI) {
2156     if (BVAllocatable.none())
2157       return false;
2158 
2159     unsigned Reg = CS.getReg();
2160     if (!PPC::G8RCRegClass.contains(Reg) && !PPC::GPRCRegClass.contains(Reg)) {
2161       AllSpilledToReg = false;
2162       continue;
2163     }
2164 
2165     unsigned VolatileVFReg = BVAllocatable.find_first();
2166     if (VolatileVFReg < BVAllocatable.size()) {
2167       CS.setDstReg(VolatileVFReg);
2168       BVAllocatable.reset(VolatileVFReg);
2169     } else {
2170       AllSpilledToReg = false;
2171     }
2172   }
2173   return AllSpilledToReg;
2174 }
2175 
2176 
2177 bool
2178 PPCFrameLowering::spillCalleeSavedRegisters(MachineBasicBlock &MBB,
2179                                      MachineBasicBlock::iterator MI,
2180                                      const std::vector<CalleeSavedInfo> &CSI,
2181                                      const TargetRegisterInfo *TRI) const {
2182 
2183   // Currently, this function only handles SVR4 32- and 64-bit ABIs.
2184   // Return false otherwise to maintain pre-existing behavior.
2185   if (!Subtarget.isSVR4ABI())
2186     return false;
2187 
2188   MachineFunction *MF = MBB.getParent();
2189   const PPCInstrInfo &TII = *Subtarget.getInstrInfo();
2190   PPCFunctionInfo *FI = MF->getInfo<PPCFunctionInfo>();
2191   bool MustSaveTOC = FI->mustSaveTOC();
2192   DebugLoc DL;
2193   bool CRSpilled = false;
2194   MachineInstrBuilder CRMIB;
2195 
2196   for (unsigned i = 0, e = CSI.size(); i != e; ++i) {
2197     unsigned Reg = CSI[i].getReg();
2198     // Only Darwin actually uses the VRSAVE register, but it can still appear
2199     // here if, for example, @llvm.eh.unwind.init() is used.  If we're not on
2200     // Darwin, ignore it.
2201     if (Reg == PPC::VRSAVE && !Subtarget.isDarwinABI())
2202       continue;
2203 
2204     // CR2 through CR4 are the nonvolatile CR fields.
2205     bool IsCRField = PPC::CR2 <= Reg && Reg <= PPC::CR4;
2206 
2207     // Add the callee-saved register as live-in; it's killed at the spill.
2208     // Do not do this for callee-saved registers that are live-in to the
2209     // function because they will already be marked live-in and this will be
2210     // adding it for a second time. It is an error to add the same register
2211     // to the set more than once.
2212     const MachineRegisterInfo &MRI = MF->getRegInfo();
2213     bool IsLiveIn = MRI.isLiveIn(Reg);
2214     if (!IsLiveIn)
2215        MBB.addLiveIn(Reg);
2216 
2217     if (CRSpilled && IsCRField) {
2218       CRMIB.addReg(Reg, RegState::ImplicitKill);
2219       continue;
2220     }
2221 
2222     // The actual spill will happen in the prologue.
2223     if ((Reg == PPC::X2 || Reg == PPC::R2) && MustSaveTOC)
2224       continue;
2225 
2226     // Insert the spill to the stack frame.
2227     if (IsCRField) {
2228       PPCFunctionInfo *FuncInfo = MF->getInfo<PPCFunctionInfo>();
2229       if (Subtarget.isPPC64()) {
2230         // The actual spill will happen at the start of the prologue.
2231         FuncInfo->addMustSaveCR(Reg);
2232       } else {
2233         CRSpilled = true;
2234         FuncInfo->setSpillsCR();
2235 
2236         // 32-bit:  FP-relative.  Note that we made sure CR2-CR4 all have
2237         // the same frame index in PPCRegisterInfo::hasReservedSpillSlot.
2238         CRMIB = BuildMI(*MF, DL, TII.get(PPC::MFCR), PPC::R12)
2239                   .addReg(Reg, RegState::ImplicitKill);
2240 
2241         MBB.insert(MI, CRMIB);
2242         MBB.insert(MI, addFrameReference(BuildMI(*MF, DL, TII.get(PPC::STW))
2243                                          .addReg(PPC::R12,
2244                                                  getKillRegState(true)),
2245                                          CSI[i].getFrameIdx()));
2246       }
2247     } else {
2248       if (CSI[i].isSpilledToReg()) {
2249         NumPESpillVSR++;
2250         BuildMI(MBB, MI, DL, TII.get(PPC::MTVSRD), CSI[i].getDstReg())
2251           .addReg(Reg, getKillRegState(true));
2252       } else {
2253         const TargetRegisterClass *RC = TRI->getMinimalPhysRegClass(Reg);
2254         // Use !IsLiveIn for the kill flag.
2255         // We do not want to kill registers that are live in this function
2256         // before their use because they will become undefined registers.
2257         TII.storeRegToStackSlot(MBB, MI, Reg, !IsLiveIn,
2258                                 CSI[i].getFrameIdx(), RC, TRI);
2259       }
2260     }
2261   }
2262   return true;
2263 }
2264 
2265 static void
2266 restoreCRs(bool isPPC64, bool is31,
2267            bool CR2Spilled, bool CR3Spilled, bool CR4Spilled,
2268            MachineBasicBlock &MBB, MachineBasicBlock::iterator MI,
2269            const std::vector<CalleeSavedInfo> &CSI, unsigned CSIIndex) {
2270 
2271   MachineFunction *MF = MBB.getParent();
2272   const PPCInstrInfo &TII = *MF->getSubtarget<PPCSubtarget>().getInstrInfo();
2273   DebugLoc DL;
2274   unsigned RestoreOp, MoveReg;
2275 
2276   if (isPPC64)
2277     // This is handled during epilogue generation.
2278     return;
2279   else {
2280     // 32-bit:  FP-relative
2281     MBB.insert(MI, addFrameReference(BuildMI(*MF, DL, TII.get(PPC::LWZ),
2282                                              PPC::R12),
2283                                      CSI[CSIIndex].getFrameIdx()));
2284     RestoreOp = PPC::MTOCRF;
2285     MoveReg = PPC::R12;
2286   }
2287 
2288   if (CR2Spilled)
2289     MBB.insert(MI, BuildMI(*MF, DL, TII.get(RestoreOp), PPC::CR2)
2290                .addReg(MoveReg, getKillRegState(!CR3Spilled && !CR4Spilled)));
2291 
2292   if (CR3Spilled)
2293     MBB.insert(MI, BuildMI(*MF, DL, TII.get(RestoreOp), PPC::CR3)
2294                .addReg(MoveReg, getKillRegState(!CR4Spilled)));
2295 
2296   if (CR4Spilled)
2297     MBB.insert(MI, BuildMI(*MF, DL, TII.get(RestoreOp), PPC::CR4)
2298                .addReg(MoveReg, getKillRegState(true)));
2299 }
2300 
2301 MachineBasicBlock::iterator PPCFrameLowering::
2302 eliminateCallFramePseudoInstr(MachineFunction &MF, MachineBasicBlock &MBB,
2303                               MachineBasicBlock::iterator I) const {
2304   const TargetInstrInfo &TII = *Subtarget.getInstrInfo();
2305   if (MF.getTarget().Options.GuaranteedTailCallOpt &&
2306       I->getOpcode() == PPC::ADJCALLSTACKUP) {
2307     // Add (actually subtract) back the amount the callee popped on return.
2308     if (int CalleeAmt =  I->getOperand(1).getImm()) {
2309       bool is64Bit = Subtarget.isPPC64();
2310       CalleeAmt *= -1;
2311       unsigned StackReg = is64Bit ? PPC::X1 : PPC::R1;
2312       unsigned TmpReg = is64Bit ? PPC::X0 : PPC::R0;
2313       unsigned ADDIInstr = is64Bit ? PPC::ADDI8 : PPC::ADDI;
2314       unsigned ADDInstr = is64Bit ? PPC::ADD8 : PPC::ADD4;
2315       unsigned LISInstr = is64Bit ? PPC::LIS8 : PPC::LIS;
2316       unsigned ORIInstr = is64Bit ? PPC::ORI8 : PPC::ORI;
2317       const DebugLoc &dl = I->getDebugLoc();
2318 
2319       if (isInt<16>(CalleeAmt)) {
2320         BuildMI(MBB, I, dl, TII.get(ADDIInstr), StackReg)
2321           .addReg(StackReg, RegState::Kill)
2322           .addImm(CalleeAmt);
2323       } else {
2324         MachineBasicBlock::iterator MBBI = I;
2325         BuildMI(MBB, MBBI, dl, TII.get(LISInstr), TmpReg)
2326           .addImm(CalleeAmt >> 16);
2327         BuildMI(MBB, MBBI, dl, TII.get(ORIInstr), TmpReg)
2328           .addReg(TmpReg, RegState::Kill)
2329           .addImm(CalleeAmt & 0xFFFF);
2330         BuildMI(MBB, MBBI, dl, TII.get(ADDInstr), StackReg)
2331           .addReg(StackReg, RegState::Kill)
2332           .addReg(TmpReg);
2333       }
2334     }
2335   }
2336   // Simply discard ADJCALLSTACKDOWN, ADJCALLSTACKUP instructions.
2337   return MBB.erase(I);
2338 }
2339 
2340 bool
2341 PPCFrameLowering::restoreCalleeSavedRegisters(MachineBasicBlock &MBB,
2342                                         MachineBasicBlock::iterator MI,
2343                                         std::vector<CalleeSavedInfo> &CSI,
2344                                         const TargetRegisterInfo *TRI) const {
2345 
2346   // Currently, this function only handles SVR4 32- and 64-bit ABIs.
2347   // Return false otherwise to maintain pre-existing behavior.
2348   if (!Subtarget.isSVR4ABI())
2349     return false;
2350 
2351   MachineFunction *MF = MBB.getParent();
2352   const PPCInstrInfo &TII = *Subtarget.getInstrInfo();
2353   PPCFunctionInfo *FI = MF->getInfo<PPCFunctionInfo>();
2354   bool MustSaveTOC = FI->mustSaveTOC();
2355   bool CR2Spilled = false;
2356   bool CR3Spilled = false;
2357   bool CR4Spilled = false;
2358   unsigned CSIIndex = 0;
2359 
2360   // Initialize insertion-point logic; we will be restoring in reverse
2361   // order of spill.
2362   MachineBasicBlock::iterator I = MI, BeforeI = I;
2363   bool AtStart = I == MBB.begin();
2364 
2365   if (!AtStart)
2366     --BeforeI;
2367 
2368   for (unsigned i = 0, e = CSI.size(); i != e; ++i) {
2369     unsigned Reg = CSI[i].getReg();
2370 
2371     // Only Darwin actually uses the VRSAVE register, but it can still appear
2372     // here if, for example, @llvm.eh.unwind.init() is used.  If we're not on
2373     // Darwin, ignore it.
2374     if (Reg == PPC::VRSAVE && !Subtarget.isDarwinABI())
2375       continue;
2376 
2377     if ((Reg == PPC::X2 || Reg == PPC::R2) && MustSaveTOC)
2378       continue;
2379 
2380     if (Reg == PPC::CR2) {
2381       CR2Spilled = true;
2382       // The spill slot is associated only with CR2, which is the
2383       // first nonvolatile spilled.  Save it here.
2384       CSIIndex = i;
2385       continue;
2386     } else if (Reg == PPC::CR3) {
2387       CR3Spilled = true;
2388       continue;
2389     } else if (Reg == PPC::CR4) {
2390       CR4Spilled = true;
2391       continue;
2392     } else {
2393       // When we first encounter a non-CR register after seeing at
2394       // least one CR register, restore all spilled CRs together.
2395       if ((CR2Spilled || CR3Spilled || CR4Spilled)
2396           && !(PPC::CR2 <= Reg && Reg <= PPC::CR4)) {
2397         bool is31 = needsFP(*MF);
2398         restoreCRs(Subtarget.isPPC64(), is31,
2399                    CR2Spilled, CR3Spilled, CR4Spilled,
2400                    MBB, I, CSI, CSIIndex);
2401         CR2Spilled = CR3Spilled = CR4Spilled = false;
2402       }
2403 
2404       if (CSI[i].isSpilledToReg()) {
2405         DebugLoc DL;
2406         NumPEReloadVSR++;
2407         BuildMI(MBB, I, DL, TII.get(PPC::MFVSRD), Reg)
2408             .addReg(CSI[i].getDstReg(), getKillRegState(true));
2409       } else {
2410        // Default behavior for non-CR saves.
2411         const TargetRegisterClass *RC = TRI->getMinimalPhysRegClass(Reg);
2412         TII.loadRegFromStackSlot(MBB, I, Reg, CSI[i].getFrameIdx(), RC, TRI);
2413         assert(I != MBB.begin() &&
2414                "loadRegFromStackSlot didn't insert any code!");
2415       }
2416     }
2417 
2418     // Insert in reverse order.
2419     if (AtStart)
2420       I = MBB.begin();
2421     else {
2422       I = BeforeI;
2423       ++I;
2424     }
2425   }
2426 
2427   // If we haven't yet spilled the CRs, do so now.
2428   if (CR2Spilled || CR3Spilled || CR4Spilled) {
2429     bool is31 = needsFP(*MF);
2430     restoreCRs(Subtarget.isPPC64(), is31, CR2Spilled, CR3Spilled, CR4Spilled,
2431                MBB, I, CSI, CSIIndex);
2432   }
2433 
2434   return true;
2435 }
2436 
2437 bool PPCFrameLowering::enableShrinkWrapping(const MachineFunction &MF) const {
2438   if (MF.getInfo<PPCFunctionInfo>()->shrinkWrapDisabled())
2439     return false;
2440   return (MF.getSubtarget<PPCSubtarget>().isSVR4ABI() &&
2441           MF.getSubtarget<PPCSubtarget>().isPPC64());
2442 }
2443