xref: /llvm-project/llvm/lib/Target/PowerPC/PPCFrameLowering.cpp (revision 4fdaac0e1eb8e75fe59de0bd01cf72329dacbdb4)
1 //===-- PPCFrameLowering.cpp - PPC Frame Information ----------------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This file contains the PPC implementation of TargetFrameLowering class.
10 //
11 //===----------------------------------------------------------------------===//
12 
13 #include "PPCFrameLowering.h"
14 #include "PPCInstrBuilder.h"
15 #include "PPCInstrInfo.h"
16 #include "PPCMachineFunctionInfo.h"
17 #include "PPCSubtarget.h"
18 #include "PPCTargetMachine.h"
19 #include "llvm/ADT/Statistic.h"
20 #include "llvm/CodeGen/MachineFrameInfo.h"
21 #include "llvm/CodeGen/MachineFunction.h"
22 #include "llvm/CodeGen/MachineInstrBuilder.h"
23 #include "llvm/CodeGen/MachineModuleInfo.h"
24 #include "llvm/CodeGen/MachineRegisterInfo.h"
25 #include "llvm/CodeGen/RegisterScavenging.h"
26 #include "llvm/IR/Function.h"
27 #include "llvm/Target/TargetOptions.h"
28 
29 using namespace llvm;
30 
31 #define DEBUG_TYPE "framelowering"
32 STATISTIC(NumPESpillVSR, "Number of spills to vector in prologue");
33 STATISTIC(NumPEReloadVSR, "Number of reloads from vector in epilogue");
34 
35 static cl::opt<bool>
36 EnablePEVectorSpills("ppc-enable-pe-vector-spills",
37                      cl::desc("Enable spills in prologue to vector registers."),
38                      cl::init(false), cl::Hidden);
39 
40 /// VRRegNo - Map from a numbered VR register to its enum value.
41 ///
42 static const MCPhysReg VRRegNo[] = {
43  PPC::V0 , PPC::V1 , PPC::V2 , PPC::V3 , PPC::V4 , PPC::V5 , PPC::V6 , PPC::V7 ,
44  PPC::V8 , PPC::V9 , PPC::V10, PPC::V11, PPC::V12, PPC::V13, PPC::V14, PPC::V15,
45  PPC::V16, PPC::V17, PPC::V18, PPC::V19, PPC::V20, PPC::V21, PPC::V22, PPC::V23,
46  PPC::V24, PPC::V25, PPC::V26, PPC::V27, PPC::V28, PPC::V29, PPC::V30, PPC::V31
47 };
48 
49 static unsigned computeReturnSaveOffset(const PPCSubtarget &STI) {
50   if (STI.isAIXABI())
51     return STI.isPPC64() ? 16 : 8;
52   // SVR4 ABI:
53   return STI.isPPC64() ? 16 : 4;
54 }
55 
56 static unsigned computeTOCSaveOffset(const PPCSubtarget &STI) {
57   if (STI.isAIXABI())
58     return STI.isPPC64() ? 40 : 20;
59   return STI.isELFv2ABI() ? 24 : 40;
60 }
61 
62 static unsigned computeFramePointerSaveOffset(const PPCSubtarget &STI) {
63   // First slot in the general register save area.
64   return STI.isPPC64() ? -8U : -4U;
65 }
66 
67 static unsigned computeLinkageSize(const PPCSubtarget &STI) {
68   if (STI.isAIXABI() || STI.isPPC64())
69     return (STI.isELFv2ABI() ? 4 : 6) * (STI.isPPC64() ? 8 : 4);
70 
71   // 32-bit SVR4 ABI:
72   return 8;
73 }
74 
75 static unsigned computeBasePointerSaveOffset(const PPCSubtarget &STI) {
76   // SVR4 ABI: First slot in the general register save area.
77   return STI.isPPC64()
78              ? -16U
79              : STI.getTargetMachine().isPositionIndependent() ? -12U : -8U;
80 }
81 
82 static unsigned computeCRSaveOffset() {
83   // The condition register save offset needs to be updated for AIX PPC32.
84   return 8;
85 }
86 
87 PPCFrameLowering::PPCFrameLowering(const PPCSubtarget &STI)
88     : TargetFrameLowering(TargetFrameLowering::StackGrowsDown,
89                           STI.getPlatformStackAlignment(), 0),
90       Subtarget(STI), ReturnSaveOffset(computeReturnSaveOffset(Subtarget)),
91       TOCSaveOffset(computeTOCSaveOffset(Subtarget)),
92       FramePointerSaveOffset(computeFramePointerSaveOffset(Subtarget)),
93       LinkageSize(computeLinkageSize(Subtarget)),
94       BasePointerSaveOffset(computeBasePointerSaveOffset(Subtarget)),
95       CRSaveOffset(computeCRSaveOffset()) {}
96 
97 // With the SVR4 ABI, callee-saved registers have fixed offsets on the stack.
98 const PPCFrameLowering::SpillSlot *PPCFrameLowering::getCalleeSavedSpillSlots(
99     unsigned &NumEntries) const {
100   // Early exit if not using the SVR4 ABI.
101   if (!Subtarget.isSVR4ABI()) {
102     NumEntries = 0;
103     return nullptr;
104   }
105 
106 // Floating-point register save area offsets.
107 #define CALLEE_SAVED_FPRS \
108       {PPC::F31, -8},     \
109       {PPC::F30, -16},    \
110       {PPC::F29, -24},    \
111       {PPC::F28, -32},    \
112       {PPC::F27, -40},    \
113       {PPC::F26, -48},    \
114       {PPC::F25, -56},    \
115       {PPC::F24, -64},    \
116       {PPC::F23, -72},    \
117       {PPC::F22, -80},    \
118       {PPC::F21, -88},    \
119       {PPC::F20, -96},    \
120       {PPC::F19, -104},   \
121       {PPC::F18, -112},   \
122       {PPC::F17, -120},   \
123       {PPC::F16, -128},   \
124       {PPC::F15, -136},   \
125       {PPC::F14, -144}
126 
127 // 32-bit general purpose register save area offsets.
128 #define CALLEE_SAVED_GPRS32 \
129       {PPC::R31, -4},       \
130       {PPC::R30, -8},       \
131       {PPC::R29, -12},      \
132       {PPC::R28, -16},      \
133       {PPC::R27, -20},      \
134       {PPC::R26, -24},      \
135       {PPC::R25, -28},      \
136       {PPC::R24, -32},      \
137       {PPC::R23, -36},      \
138       {PPC::R22, -40},      \
139       {PPC::R21, -44},      \
140       {PPC::R20, -48},      \
141       {PPC::R19, -52},      \
142       {PPC::R18, -56},      \
143       {PPC::R17, -60},      \
144       {PPC::R16, -64},      \
145       {PPC::R15, -68},      \
146       {PPC::R14, -72}
147 
148 // 64-bit general purpose register save area offsets.
149 #define CALLEE_SAVED_GPRS64 \
150       {PPC::X31, -8},       \
151       {PPC::X30, -16},      \
152       {PPC::X29, -24},      \
153       {PPC::X28, -32},      \
154       {PPC::X27, -40},      \
155       {PPC::X26, -48},      \
156       {PPC::X25, -56},      \
157       {PPC::X24, -64},      \
158       {PPC::X23, -72},      \
159       {PPC::X22, -80},      \
160       {PPC::X21, -88},      \
161       {PPC::X20, -96},      \
162       {PPC::X19, -104},     \
163       {PPC::X18, -112},     \
164       {PPC::X17, -120},     \
165       {PPC::X16, -128},     \
166       {PPC::X15, -136},     \
167       {PPC::X14, -144}
168 
169 // Vector register save area offsets.
170 #define CALLEE_SAVED_VRS \
171       {PPC::V31, -16},   \
172       {PPC::V30, -32},   \
173       {PPC::V29, -48},   \
174       {PPC::V28, -64},   \
175       {PPC::V27, -80},   \
176       {PPC::V26, -96},   \
177       {PPC::V25, -112},  \
178       {PPC::V24, -128},  \
179       {PPC::V23, -144},  \
180       {PPC::V22, -160},  \
181       {PPC::V21, -176},  \
182       {PPC::V20, -192}
183 
184   // Note that the offsets here overlap, but this is fixed up in
185   // processFunctionBeforeFrameFinalized.
186 
187   static const SpillSlot Offsets[] = {
188       CALLEE_SAVED_FPRS,
189       CALLEE_SAVED_GPRS32,
190 
191       // CR save area offset.  We map each of the nonvolatile CR fields
192       // to the slot for CR2, which is the first of the nonvolatile CR
193       // fields to be assigned, so that we only allocate one save slot.
194       // See PPCRegisterInfo::hasReservedSpillSlot() for more information.
195       {PPC::CR2, -4},
196 
197       // VRSAVE save area offset.
198       {PPC::VRSAVE, -4},
199 
200       CALLEE_SAVED_VRS,
201 
202       // SPE register save area (overlaps Vector save area).
203       {PPC::S31, -8},
204       {PPC::S30, -16},
205       {PPC::S29, -24},
206       {PPC::S28, -32},
207       {PPC::S27, -40},
208       {PPC::S26, -48},
209       {PPC::S25, -56},
210       {PPC::S24, -64},
211       {PPC::S23, -72},
212       {PPC::S22, -80},
213       {PPC::S21, -88},
214       {PPC::S20, -96},
215       {PPC::S19, -104},
216       {PPC::S18, -112},
217       {PPC::S17, -120},
218       {PPC::S16, -128},
219       {PPC::S15, -136},
220       {PPC::S14, -144}};
221 
222   static const SpillSlot Offsets64[] = {
223       CALLEE_SAVED_FPRS,
224       CALLEE_SAVED_GPRS64,
225 
226       // VRSAVE save area offset.
227       {PPC::VRSAVE, -4},
228 
229       CALLEE_SAVED_VRS
230   };
231 
232   if (Subtarget.isPPC64()) {
233     NumEntries = array_lengthof(Offsets64);
234 
235     return Offsets64;
236   } else {
237     NumEntries = array_lengthof(Offsets);
238 
239     return Offsets;
240   }
241 }
242 
243 /// RemoveVRSaveCode - We have found that this function does not need any code
244 /// to manipulate the VRSAVE register, even though it uses vector registers.
245 /// This can happen when the only registers used are known to be live in or out
246 /// of the function.  Remove all of the VRSAVE related code from the function.
247 /// FIXME: The removal of the code results in a compile failure at -O0 when the
248 /// function contains a function call, as the GPR containing original VRSAVE
249 /// contents is spilled and reloaded around the call.  Without the prolog code,
250 /// the spill instruction refers to an undefined register.  This code needs
251 /// to account for all uses of that GPR.
252 static void RemoveVRSaveCode(MachineInstr &MI) {
253   MachineBasicBlock *Entry = MI.getParent();
254   MachineFunction *MF = Entry->getParent();
255 
256   // We know that the MTVRSAVE instruction immediately follows MI.  Remove it.
257   MachineBasicBlock::iterator MBBI = MI;
258   ++MBBI;
259   assert(MBBI != Entry->end() && MBBI->getOpcode() == PPC::MTVRSAVE);
260   MBBI->eraseFromParent();
261 
262   bool RemovedAllMTVRSAVEs = true;
263   // See if we can find and remove the MTVRSAVE instruction from all of the
264   // epilog blocks.
265   for (MachineFunction::iterator I = MF->begin(), E = MF->end(); I != E; ++I) {
266     // If last instruction is a return instruction, add an epilogue
267     if (I->isReturnBlock()) {
268       bool FoundIt = false;
269       for (MBBI = I->end(); MBBI != I->begin(); ) {
270         --MBBI;
271         if (MBBI->getOpcode() == PPC::MTVRSAVE) {
272           MBBI->eraseFromParent();  // remove it.
273           FoundIt = true;
274           break;
275         }
276       }
277       RemovedAllMTVRSAVEs &= FoundIt;
278     }
279   }
280 
281   // If we found and removed all MTVRSAVE instructions, remove the read of
282   // VRSAVE as well.
283   if (RemovedAllMTVRSAVEs) {
284     MBBI = MI;
285     assert(MBBI != Entry->begin() && "UPDATE_VRSAVE is first instr in block?");
286     --MBBI;
287     assert(MBBI->getOpcode() == PPC::MFVRSAVE && "VRSAVE instrs wandered?");
288     MBBI->eraseFromParent();
289   }
290 
291   // Finally, nuke the UPDATE_VRSAVE.
292   MI.eraseFromParent();
293 }
294 
295 // HandleVRSaveUpdate - MI is the UPDATE_VRSAVE instruction introduced by the
296 // instruction selector.  Based on the vector registers that have been used,
297 // transform this into the appropriate ORI instruction.
298 static void HandleVRSaveUpdate(MachineInstr &MI, const TargetInstrInfo &TII) {
299   MachineFunction *MF = MI.getParent()->getParent();
300   const TargetRegisterInfo *TRI = MF->getSubtarget().getRegisterInfo();
301   DebugLoc dl = MI.getDebugLoc();
302 
303   const MachineRegisterInfo &MRI = MF->getRegInfo();
304   unsigned UsedRegMask = 0;
305   for (unsigned i = 0; i != 32; ++i)
306     if (MRI.isPhysRegModified(VRRegNo[i]))
307       UsedRegMask |= 1 << (31-i);
308 
309   // Live in and live out values already must be in the mask, so don't bother
310   // marking them.
311   for (std::pair<unsigned, unsigned> LI : MF->getRegInfo().liveins()) {
312     unsigned RegNo = TRI->getEncodingValue(LI.first);
313     if (VRRegNo[RegNo] == LI.first)        // If this really is a vector reg.
314       UsedRegMask &= ~(1 << (31-RegNo));   // Doesn't need to be marked.
315   }
316 
317   // Live out registers appear as use operands on return instructions.
318   for (MachineFunction::const_iterator BI = MF->begin(), BE = MF->end();
319        UsedRegMask != 0 && BI != BE; ++BI) {
320     const MachineBasicBlock &MBB = *BI;
321     if (!MBB.isReturnBlock())
322       continue;
323     const MachineInstr &Ret = MBB.back();
324     for (unsigned I = 0, E = Ret.getNumOperands(); I != E; ++I) {
325       const MachineOperand &MO = Ret.getOperand(I);
326       if (!MO.isReg() || !PPC::VRRCRegClass.contains(MO.getReg()))
327         continue;
328       unsigned RegNo = TRI->getEncodingValue(MO.getReg());
329       UsedRegMask &= ~(1 << (31-RegNo));
330     }
331   }
332 
333   // If no registers are used, turn this into a copy.
334   if (UsedRegMask == 0) {
335     // Remove all VRSAVE code.
336     RemoveVRSaveCode(MI);
337     return;
338   }
339 
340   Register SrcReg = MI.getOperand(1).getReg();
341   Register DstReg = MI.getOperand(0).getReg();
342 
343   if ((UsedRegMask & 0xFFFF) == UsedRegMask) {
344     if (DstReg != SrcReg)
345       BuildMI(*MI.getParent(), MI, dl, TII.get(PPC::ORI), DstReg)
346           .addReg(SrcReg)
347           .addImm(UsedRegMask);
348     else
349       BuildMI(*MI.getParent(), MI, dl, TII.get(PPC::ORI), DstReg)
350           .addReg(SrcReg, RegState::Kill)
351           .addImm(UsedRegMask);
352   } else if ((UsedRegMask & 0xFFFF0000) == UsedRegMask) {
353     if (DstReg != SrcReg)
354       BuildMI(*MI.getParent(), MI, dl, TII.get(PPC::ORIS), DstReg)
355           .addReg(SrcReg)
356           .addImm(UsedRegMask >> 16);
357     else
358       BuildMI(*MI.getParent(), MI, dl, TII.get(PPC::ORIS), DstReg)
359           .addReg(SrcReg, RegState::Kill)
360           .addImm(UsedRegMask >> 16);
361   } else {
362     if (DstReg != SrcReg)
363       BuildMI(*MI.getParent(), MI, dl, TII.get(PPC::ORIS), DstReg)
364           .addReg(SrcReg)
365           .addImm(UsedRegMask >> 16);
366     else
367       BuildMI(*MI.getParent(), MI, dl, TII.get(PPC::ORIS), DstReg)
368           .addReg(SrcReg, RegState::Kill)
369           .addImm(UsedRegMask >> 16);
370 
371     BuildMI(*MI.getParent(), MI, dl, TII.get(PPC::ORI), DstReg)
372         .addReg(DstReg, RegState::Kill)
373         .addImm(UsedRegMask & 0xFFFF);
374   }
375 
376   // Remove the old UPDATE_VRSAVE instruction.
377   MI.eraseFromParent();
378 }
379 
380 static bool spillsCR(const MachineFunction &MF) {
381   const PPCFunctionInfo *FuncInfo = MF.getInfo<PPCFunctionInfo>();
382   return FuncInfo->isCRSpilled();
383 }
384 
385 static bool spillsVRSAVE(const MachineFunction &MF) {
386   const PPCFunctionInfo *FuncInfo = MF.getInfo<PPCFunctionInfo>();
387   return FuncInfo->isVRSAVESpilled();
388 }
389 
390 static bool hasSpills(const MachineFunction &MF) {
391   const PPCFunctionInfo *FuncInfo = MF.getInfo<PPCFunctionInfo>();
392   return FuncInfo->hasSpills();
393 }
394 
395 static bool hasNonRISpills(const MachineFunction &MF) {
396   const PPCFunctionInfo *FuncInfo = MF.getInfo<PPCFunctionInfo>();
397   return FuncInfo->hasNonRISpills();
398 }
399 
400 /// MustSaveLR - Return true if this function requires that we save the LR
401 /// register onto the stack in the prolog and restore it in the epilog of the
402 /// function.
403 static bool MustSaveLR(const MachineFunction &MF, unsigned LR) {
404   const PPCFunctionInfo *MFI = MF.getInfo<PPCFunctionInfo>();
405 
406   // We need a save/restore of LR if there is any def of LR (which is
407   // defined by calls, including the PIC setup sequence), or if there is
408   // some use of the LR stack slot (e.g. for builtin_return_address).
409   // (LR comes in 32 and 64 bit versions.)
410   MachineRegisterInfo::def_iterator RI = MF.getRegInfo().def_begin(LR);
411   return RI !=MF.getRegInfo().def_end() || MFI->isLRStoreRequired();
412 }
413 
414 /// determineFrameLayoutAndUpdate - Determine the size of the frame and maximum
415 /// call frame size. Update the MachineFunction object with the stack size.
416 unsigned
417 PPCFrameLowering::determineFrameLayoutAndUpdate(MachineFunction &MF,
418                                                 bool UseEstimate) const {
419   unsigned NewMaxCallFrameSize = 0;
420   unsigned FrameSize = determineFrameLayout(MF, UseEstimate,
421                                             &NewMaxCallFrameSize);
422   MF.getFrameInfo().setStackSize(FrameSize);
423   MF.getFrameInfo().setMaxCallFrameSize(NewMaxCallFrameSize);
424   return FrameSize;
425 }
426 
427 /// determineFrameLayout - Determine the size of the frame and maximum call
428 /// frame size.
429 unsigned
430 PPCFrameLowering::determineFrameLayout(const MachineFunction &MF,
431                                        bool UseEstimate,
432                                        unsigned *NewMaxCallFrameSize) const {
433   const MachineFrameInfo &MFI = MF.getFrameInfo();
434   const PPCFunctionInfo *FI = MF.getInfo<PPCFunctionInfo>();
435 
436   // Get the number of bytes to allocate from the FrameInfo
437   unsigned FrameSize =
438     UseEstimate ? MFI.estimateStackSize(MF) : MFI.getStackSize();
439 
440   // Get stack alignments. The frame must be aligned to the greatest of these:
441   unsigned TargetAlign = getStackAlignment(); // alignment required per the ABI
442   unsigned MaxAlign = MFI.getMaxAlignment(); // algmt required by data in frame
443   unsigned AlignMask = std::max(MaxAlign, TargetAlign) - 1;
444 
445   const PPCRegisterInfo *RegInfo = Subtarget.getRegisterInfo();
446 
447   unsigned LR = RegInfo->getRARegister();
448   bool DisableRedZone = MF.getFunction().hasFnAttribute(Attribute::NoRedZone);
449   bool CanUseRedZone = !MFI.hasVarSizedObjects() && // No dynamic alloca.
450                        !MFI.adjustsStack() &&       // No calls.
451                        !MustSaveLR(MF, LR) &&       // No need to save LR.
452                        !FI->mustSaveTOC() &&        // No need to save TOC.
453                        !RegInfo->hasBasePointer(MF); // No special alignment.
454 
455   // Note: for PPC32 SVR4ABI (Non-DarwinABI), we can still generate stackless
456   // code if all local vars are reg-allocated.
457   bool FitsInRedZone = FrameSize <= Subtarget.getRedZoneSize();
458 
459   // Check whether we can skip adjusting the stack pointer (by using red zone)
460   if (!DisableRedZone && CanUseRedZone && FitsInRedZone) {
461     // No need for frame
462     return 0;
463   }
464 
465   // Get the maximum call frame size of all the calls.
466   unsigned maxCallFrameSize = MFI.getMaxCallFrameSize();
467 
468   // Maximum call frame needs to be at least big enough for linkage area.
469   unsigned minCallFrameSize = getLinkageSize();
470   maxCallFrameSize = std::max(maxCallFrameSize, minCallFrameSize);
471 
472   // If we have dynamic alloca then maxCallFrameSize needs to be aligned so
473   // that allocations will be aligned.
474   if (MFI.hasVarSizedObjects())
475     maxCallFrameSize = (maxCallFrameSize + AlignMask) & ~AlignMask;
476 
477   // Update the new max call frame size if the caller passes in a valid pointer.
478   if (NewMaxCallFrameSize)
479     *NewMaxCallFrameSize = maxCallFrameSize;
480 
481   // Include call frame size in total.
482   FrameSize += maxCallFrameSize;
483 
484   // Make sure the frame is aligned.
485   FrameSize = (FrameSize + AlignMask) & ~AlignMask;
486 
487   return FrameSize;
488 }
489 
490 // hasFP - Return true if the specified function actually has a dedicated frame
491 // pointer register.
492 bool PPCFrameLowering::hasFP(const MachineFunction &MF) const {
493   const MachineFrameInfo &MFI = MF.getFrameInfo();
494   // FIXME: This is pretty much broken by design: hasFP() might be called really
495   // early, before the stack layout was calculated and thus hasFP() might return
496   // true or false here depending on the time of call.
497   return (MFI.getStackSize()) && needsFP(MF);
498 }
499 
500 // needsFP - Return true if the specified function should have a dedicated frame
501 // pointer register.  This is true if the function has variable sized allocas or
502 // if frame pointer elimination is disabled.
503 bool PPCFrameLowering::needsFP(const MachineFunction &MF) const {
504   const MachineFrameInfo &MFI = MF.getFrameInfo();
505 
506   // Naked functions have no stack frame pushed, so we don't have a frame
507   // pointer.
508   if (MF.getFunction().hasFnAttribute(Attribute::Naked))
509     return false;
510 
511   return MF.getTarget().Options.DisableFramePointerElim(MF) ||
512     MFI.hasVarSizedObjects() || MFI.hasStackMap() || MFI.hasPatchPoint() ||
513     (MF.getTarget().Options.GuaranteedTailCallOpt &&
514      MF.getInfo<PPCFunctionInfo>()->hasFastCall());
515 }
516 
517 void PPCFrameLowering::replaceFPWithRealFP(MachineFunction &MF) const {
518   bool is31 = needsFP(MF);
519   unsigned FPReg  = is31 ? PPC::R31 : PPC::R1;
520   unsigned FP8Reg = is31 ? PPC::X31 : PPC::X1;
521 
522   const PPCRegisterInfo *RegInfo = Subtarget.getRegisterInfo();
523   bool HasBP = RegInfo->hasBasePointer(MF);
524   unsigned BPReg  = HasBP ? (unsigned) RegInfo->getBaseRegister(MF) : FPReg;
525   unsigned BP8Reg = HasBP ? (unsigned) PPC::X30 : FP8Reg;
526 
527   for (MachineFunction::iterator BI = MF.begin(), BE = MF.end();
528        BI != BE; ++BI)
529     for (MachineBasicBlock::iterator MBBI = BI->end(); MBBI != BI->begin(); ) {
530       --MBBI;
531       for (unsigned I = 0, E = MBBI->getNumOperands(); I != E; ++I) {
532         MachineOperand &MO = MBBI->getOperand(I);
533         if (!MO.isReg())
534           continue;
535 
536         switch (MO.getReg()) {
537         case PPC::FP:
538           MO.setReg(FPReg);
539           break;
540         case PPC::FP8:
541           MO.setReg(FP8Reg);
542           break;
543         case PPC::BP:
544           MO.setReg(BPReg);
545           break;
546         case PPC::BP8:
547           MO.setReg(BP8Reg);
548           break;
549 
550         }
551       }
552     }
553 }
554 
555 /*  This function will do the following:
556     - If MBB is an entry or exit block, set SR1 and SR2 to R0 and R12
557       respectively (defaults recommended by the ABI) and return true
558     - If MBB is not an entry block, initialize the register scavenger and look
559       for available registers.
560     - If the defaults (R0/R12) are available, return true
561     - If TwoUniqueRegsRequired is set to true, it looks for two unique
562       registers. Otherwise, look for a single available register.
563       - If the required registers are found, set SR1 and SR2 and return true.
564       - If the required registers are not found, set SR2 or both SR1 and SR2 to
565         PPC::NoRegister and return false.
566 
567     Note that if both SR1 and SR2 are valid parameters and TwoUniqueRegsRequired
568     is not set, this function will attempt to find two different registers, but
569     still return true if only one register is available (and set SR1 == SR2).
570 */
571 bool
572 PPCFrameLowering::findScratchRegister(MachineBasicBlock *MBB,
573                                       bool UseAtEnd,
574                                       bool TwoUniqueRegsRequired,
575                                       unsigned *SR1,
576                                       unsigned *SR2) const {
577   RegScavenger RS;
578   unsigned R0 =  Subtarget.isPPC64() ? PPC::X0 : PPC::R0;
579   unsigned R12 = Subtarget.isPPC64() ? PPC::X12 : PPC::R12;
580 
581   // Set the defaults for the two scratch registers.
582   if (SR1)
583     *SR1 = R0;
584 
585   if (SR2) {
586     assert (SR1 && "Asking for the second scratch register but not the first?");
587     *SR2 = R12;
588   }
589 
590   // If MBB is an entry or exit block, use R0 and R12 as the scratch registers.
591   if ((UseAtEnd && MBB->isReturnBlock()) ||
592       (!UseAtEnd && (&MBB->getParent()->front() == MBB)))
593     return true;
594 
595   RS.enterBasicBlock(*MBB);
596 
597   if (UseAtEnd && !MBB->empty()) {
598     // The scratch register will be used at the end of the block, so must
599     // consider all registers used within the block
600 
601     MachineBasicBlock::iterator MBBI = MBB->getFirstTerminator();
602     // If no terminator, back iterator up to previous instruction.
603     if (MBBI == MBB->end())
604       MBBI = std::prev(MBBI);
605 
606     if (MBBI != MBB->begin())
607       RS.forward(MBBI);
608   }
609 
610   // If the two registers are available, we're all good.
611   // Note that we only return here if both R0 and R12 are available because
612   // although the function may not require two unique registers, it may benefit
613   // from having two so we should try to provide them.
614   if (!RS.isRegUsed(R0) && !RS.isRegUsed(R12))
615     return true;
616 
617   // Get the list of callee-saved registers for the target.
618   const PPCRegisterInfo *RegInfo = Subtarget.getRegisterInfo();
619   const MCPhysReg *CSRegs = RegInfo->getCalleeSavedRegs(MBB->getParent());
620 
621   // Get all the available registers in the block.
622   BitVector BV = RS.getRegsAvailable(Subtarget.isPPC64() ? &PPC::G8RCRegClass :
623                                      &PPC::GPRCRegClass);
624 
625   // We shouldn't use callee-saved registers as scratch registers as they may be
626   // available when looking for a candidate block for shrink wrapping but not
627   // available when the actual prologue/epilogue is being emitted because they
628   // were added as live-in to the prologue block by PrologueEpilogueInserter.
629   for (int i = 0; CSRegs[i]; ++i)
630     BV.reset(CSRegs[i]);
631 
632   // Set the first scratch register to the first available one.
633   if (SR1) {
634     int FirstScratchReg = BV.find_first();
635     *SR1 = FirstScratchReg == -1 ? (unsigned)PPC::NoRegister : FirstScratchReg;
636   }
637 
638   // If there is another one available, set the second scratch register to that.
639   // Otherwise, set it to either PPC::NoRegister if this function requires two
640   // or to whatever SR1 is set to if this function doesn't require two.
641   if (SR2) {
642     int SecondScratchReg = BV.find_next(*SR1);
643     if (SecondScratchReg != -1)
644       *SR2 = SecondScratchReg;
645     else
646       *SR2 = TwoUniqueRegsRequired ? (unsigned)PPC::NoRegister : *SR1;
647   }
648 
649   // Now that we've done our best to provide both registers, double check
650   // whether we were unable to provide enough.
651   if (BV.count() < (TwoUniqueRegsRequired ? 2U : 1U))
652     return false;
653 
654   return true;
655 }
656 
657 // We need a scratch register for spilling LR and for spilling CR. By default,
658 // we use two scratch registers to hide latency. However, if only one scratch
659 // register is available, we can adjust for that by not overlapping the spill
660 // code. However, if we need to realign the stack (i.e. have a base pointer)
661 // and the stack frame is large, we need two scratch registers.
662 bool
663 PPCFrameLowering::twoUniqueScratchRegsRequired(MachineBasicBlock *MBB) const {
664   const PPCRegisterInfo *RegInfo = Subtarget.getRegisterInfo();
665   MachineFunction &MF = *(MBB->getParent());
666   bool HasBP = RegInfo->hasBasePointer(MF);
667   unsigned FrameSize = determineFrameLayout(MF);
668   int NegFrameSize = -FrameSize;
669   bool IsLargeFrame = !isInt<16>(NegFrameSize);
670   MachineFrameInfo &MFI = MF.getFrameInfo();
671   unsigned MaxAlign = MFI.getMaxAlignment();
672   bool HasRedZone = Subtarget.isPPC64() || !Subtarget.isSVR4ABI();
673 
674   return (IsLargeFrame || !HasRedZone) && HasBP && MaxAlign > 1;
675 }
676 
677 bool PPCFrameLowering::canUseAsPrologue(const MachineBasicBlock &MBB) const {
678   MachineBasicBlock *TmpMBB = const_cast<MachineBasicBlock *>(&MBB);
679 
680   return findScratchRegister(TmpMBB, false,
681                              twoUniqueScratchRegsRequired(TmpMBB));
682 }
683 
684 bool PPCFrameLowering::canUseAsEpilogue(const MachineBasicBlock &MBB) const {
685   MachineBasicBlock *TmpMBB = const_cast<MachineBasicBlock *>(&MBB);
686 
687   return findScratchRegister(TmpMBB, true);
688 }
689 
690 bool PPCFrameLowering::stackUpdateCanBeMoved(MachineFunction &MF) const {
691   const PPCRegisterInfo *RegInfo = Subtarget.getRegisterInfo();
692   PPCFunctionInfo *FI = MF.getInfo<PPCFunctionInfo>();
693 
694   // Abort if there is no register info or function info.
695   if (!RegInfo || !FI)
696     return false;
697 
698   // Only move the stack update on ELFv2 ABI and PPC64.
699   if (!Subtarget.isELFv2ABI() || !Subtarget.isPPC64())
700     return false;
701 
702   // Check the frame size first and return false if it does not fit the
703   // requirements.
704   // We need a non-zero frame size as well as a frame that will fit in the red
705   // zone. This is because by moving the stack pointer update we are now storing
706   // to the red zone until the stack pointer is updated. If we get an interrupt
707   // inside the prologue but before the stack update we now have a number of
708   // stores to the red zone and those stores must all fit.
709   MachineFrameInfo &MFI = MF.getFrameInfo();
710   unsigned FrameSize = MFI.getStackSize();
711   if (!FrameSize || FrameSize > Subtarget.getRedZoneSize())
712     return false;
713 
714   // Frame pointers and base pointers complicate matters so don't do anything
715   // if we have them. For example having a frame pointer will sometimes require
716   // a copy of r1 into r31 and that makes keeping track of updates to r1 more
717   // difficult.
718   if (hasFP(MF) || RegInfo->hasBasePointer(MF))
719     return false;
720 
721   // Calls to fast_cc functions use different rules for passing parameters on
722   // the stack from the ABI and using PIC base in the function imposes
723   // similar restrictions to using the base pointer. It is not generally safe
724   // to move the stack pointer update in these situations.
725   if (FI->hasFastCall() || FI->usesPICBase())
726     return false;
727 
728   // Finally we can move the stack update if we do not require register
729   // scavenging. Register scavenging can introduce more spills and so
730   // may make the frame size larger than we have computed.
731   return !RegInfo->requiresFrameIndexScavenging(MF);
732 }
733 
734 void PPCFrameLowering::emitPrologue(MachineFunction &MF,
735                                     MachineBasicBlock &MBB) const {
736   MachineBasicBlock::iterator MBBI = MBB.begin();
737   MachineFrameInfo &MFI = MF.getFrameInfo();
738   const PPCInstrInfo &TII = *Subtarget.getInstrInfo();
739   const PPCRegisterInfo *RegInfo = Subtarget.getRegisterInfo();
740 
741   MachineModuleInfo &MMI = MF.getMMI();
742   const MCRegisterInfo *MRI = MMI.getContext().getRegisterInfo();
743   DebugLoc dl;
744   bool needsCFI = MF.needsFrameMoves();
745 
746   // Get processor type.
747   bool isPPC64 = Subtarget.isPPC64();
748   // Get the ABI.
749   bool isSVR4ABI = Subtarget.isSVR4ABI();
750   bool isAIXABI = Subtarget.isAIXABI();
751   bool isELFv2ABI = Subtarget.isELFv2ABI();
752   assert((isSVR4ABI || isAIXABI) && "Unsupported PPC ABI.");
753 
754   // Scan the prolog, looking for an UPDATE_VRSAVE instruction.  If we find it,
755   // process it.
756   if (!isSVR4ABI)
757     for (unsigned i = 0; MBBI != MBB.end(); ++i, ++MBBI) {
758       if (MBBI->getOpcode() == PPC::UPDATE_VRSAVE) {
759         if (isAIXABI)
760           report_fatal_error("UPDATE_VRSAVE is unexpected on AIX.");
761         HandleVRSaveUpdate(*MBBI, TII);
762         break;
763       }
764     }
765 
766   // Move MBBI back to the beginning of the prologue block.
767   MBBI = MBB.begin();
768 
769   // Work out frame sizes.
770   unsigned FrameSize = determineFrameLayoutAndUpdate(MF);
771   int NegFrameSize = -FrameSize;
772   if (!isInt<32>(NegFrameSize))
773     llvm_unreachable("Unhandled stack size!");
774 
775   if (MFI.isFrameAddressTaken())
776     replaceFPWithRealFP(MF);
777 
778   // Check if the link register (LR) must be saved.
779   PPCFunctionInfo *FI = MF.getInfo<PPCFunctionInfo>();
780   bool MustSaveLR = FI->mustSaveLR();
781   bool MustSaveTOC = FI->mustSaveTOC();
782   const SmallVectorImpl<unsigned> &MustSaveCRs = FI->getMustSaveCRs();
783   bool MustSaveCR = !MustSaveCRs.empty();
784   // Do we have a frame pointer and/or base pointer for this function?
785   bool HasFP = hasFP(MF);
786   bool HasBP = RegInfo->hasBasePointer(MF);
787   bool HasRedZone = isPPC64 || !isSVR4ABI;
788 
789   unsigned SPReg       = isPPC64 ? PPC::X1  : PPC::R1;
790   Register BPReg = RegInfo->getBaseRegister(MF);
791   unsigned FPReg       = isPPC64 ? PPC::X31 : PPC::R31;
792   unsigned LRReg       = isPPC64 ? PPC::LR8 : PPC::LR;
793   unsigned TOCReg      = isPPC64 ? PPC::X2 :  PPC::R2;
794   unsigned ScratchReg  = 0;
795   unsigned TempReg     = isPPC64 ? PPC::X12 : PPC::R12; // another scratch reg
796   //  ...(R12/X12 is volatile in both Darwin & SVR4, & can't be a function arg.)
797   const MCInstrDesc& MFLRInst = TII.get(isPPC64 ? PPC::MFLR8
798                                                 : PPC::MFLR );
799   const MCInstrDesc& StoreInst = TII.get(isPPC64 ? PPC::STD
800                                                  : PPC::STW );
801   const MCInstrDesc& StoreUpdtInst = TII.get(isPPC64 ? PPC::STDU
802                                                      : PPC::STWU );
803   const MCInstrDesc& StoreUpdtIdxInst = TII.get(isPPC64 ? PPC::STDUX
804                                                         : PPC::STWUX);
805   const MCInstrDesc& LoadImmShiftedInst = TII.get(isPPC64 ? PPC::LIS8
806                                                           : PPC::LIS );
807   const MCInstrDesc& OrImmInst = TII.get(isPPC64 ? PPC::ORI8
808                                                  : PPC::ORI );
809   const MCInstrDesc& OrInst = TII.get(isPPC64 ? PPC::OR8
810                                               : PPC::OR );
811   const MCInstrDesc& SubtractCarryingInst = TII.get(isPPC64 ? PPC::SUBFC8
812                                                             : PPC::SUBFC);
813   const MCInstrDesc& SubtractImmCarryingInst = TII.get(isPPC64 ? PPC::SUBFIC8
814                                                                : PPC::SUBFIC);
815 
816   // Regarding this assert: Even though LR is saved in the caller's frame (i.e.,
817   // LROffset is positive), that slot is callee-owned. Because PPC32 SVR4 has no
818   // Red Zone, an asynchronous event (a form of "callee") could claim a frame &
819   // overwrite it, so PPC32 SVR4 must claim at least a minimal frame to save LR.
820   assert((isPPC64 || !isSVR4ABI || !(!FrameSize && (MustSaveLR || HasFP))) &&
821          "FrameSize must be >0 to save/restore the FP or LR for 32-bit SVR4.");
822 
823   // Using the same bool variable as below to suppress compiler warnings.
824   bool SingleScratchReg =
825     findScratchRegister(&MBB, false, twoUniqueScratchRegsRequired(&MBB),
826                         &ScratchReg, &TempReg);
827   assert(SingleScratchReg &&
828          "Required number of registers not available in this block");
829 
830   SingleScratchReg = ScratchReg == TempReg;
831 
832   int LROffset = getReturnSaveOffset();
833 
834   int FPOffset = 0;
835   if (HasFP) {
836     if (isSVR4ABI) {
837       MachineFrameInfo &MFI = MF.getFrameInfo();
838       int FPIndex = FI->getFramePointerSaveIndex();
839       assert(FPIndex && "No Frame Pointer Save Slot!");
840       FPOffset = MFI.getObjectOffset(FPIndex);
841     } else {
842       FPOffset = getFramePointerSaveOffset();
843     }
844   }
845 
846   int BPOffset = 0;
847   if (HasBP) {
848     if (isSVR4ABI) {
849       MachineFrameInfo &MFI = MF.getFrameInfo();
850       int BPIndex = FI->getBasePointerSaveIndex();
851       assert(BPIndex && "No Base Pointer Save Slot!");
852       BPOffset = MFI.getObjectOffset(BPIndex);
853     } else {
854       BPOffset = getBasePointerSaveOffset();
855     }
856   }
857 
858   int PBPOffset = 0;
859   if (FI->usesPICBase()) {
860     MachineFrameInfo &MFI = MF.getFrameInfo();
861     int PBPIndex = FI->getPICBasePointerSaveIndex();
862     assert(PBPIndex && "No PIC Base Pointer Save Slot!");
863     PBPOffset = MFI.getObjectOffset(PBPIndex);
864   }
865 
866   // Get stack alignments.
867   unsigned MaxAlign = MFI.getMaxAlignment();
868   if (HasBP && MaxAlign > 1)
869     assert(isPowerOf2_32(MaxAlign) && isInt<16>(MaxAlign) &&
870            "Invalid alignment!");
871 
872   // Frames of 32KB & larger require special handling because they cannot be
873   // indexed into with a simple STDU/STWU/STD/STW immediate offset operand.
874   bool isLargeFrame = !isInt<16>(NegFrameSize);
875 
876   assert((isPPC64 || !MustSaveCR) &&
877          "Prologue CR saving supported only in 64-bit mode");
878 
879   if (MustSaveCR && isAIXABI)
880     report_fatal_error("Prologue CR saving is unimplemented on AIX.");
881 
882   // Check if we can move the stack update instruction (stdu) down the prologue
883   // past the callee saves. Hopefully this will avoid the situation where the
884   // saves are waiting for the update on the store with update to complete.
885   MachineBasicBlock::iterator StackUpdateLoc = MBBI;
886   bool MovingStackUpdateDown = false;
887 
888   // Check if we can move the stack update.
889   if (stackUpdateCanBeMoved(MF)) {
890     const std::vector<CalleeSavedInfo> &Info = MFI.getCalleeSavedInfo();
891     for (CalleeSavedInfo CSI : Info) {
892       int FrIdx = CSI.getFrameIdx();
893       // If the frame index is not negative the callee saved info belongs to a
894       // stack object that is not a fixed stack object. We ignore non-fixed
895       // stack objects because we won't move the stack update pointer past them.
896       if (FrIdx >= 0)
897         continue;
898 
899       if (MFI.isFixedObjectIndex(FrIdx) && MFI.getObjectOffset(FrIdx) < 0) {
900         StackUpdateLoc++;
901         MovingStackUpdateDown = true;
902       } else {
903         // We need all of the Frame Indices to meet these conditions.
904         // If they do not, abort the whole operation.
905         StackUpdateLoc = MBBI;
906         MovingStackUpdateDown = false;
907         break;
908       }
909     }
910 
911     // If the operation was not aborted then update the object offset.
912     if (MovingStackUpdateDown) {
913       for (CalleeSavedInfo CSI : Info) {
914         int FrIdx = CSI.getFrameIdx();
915         if (FrIdx < 0)
916           MFI.setObjectOffset(FrIdx, MFI.getObjectOffset(FrIdx) + NegFrameSize);
917       }
918     }
919   }
920 
921   // If we need to spill the CR and the LR but we don't have two separate
922   // registers available, we must spill them one at a time
923   if (MustSaveCR && SingleScratchReg && MustSaveLR) {
924     // In the ELFv2 ABI, we are not required to save all CR fields.
925     // If only one or two CR fields are clobbered, it is more efficient to use
926     // mfocrf to selectively save just those fields, because mfocrf has short
927     // latency compares to mfcr.
928     unsigned MfcrOpcode = PPC::MFCR8;
929     unsigned CrState = RegState::ImplicitKill;
930     if (isELFv2ABI && MustSaveCRs.size() == 1) {
931       MfcrOpcode = PPC::MFOCRF8;
932       CrState = RegState::Kill;
933     }
934     MachineInstrBuilder MIB =
935       BuildMI(MBB, MBBI, dl, TII.get(MfcrOpcode), TempReg);
936     for (unsigned i = 0, e = MustSaveCRs.size(); i != e; ++i)
937       MIB.addReg(MustSaveCRs[i], CrState);
938     BuildMI(MBB, MBBI, dl, TII.get(PPC::STW8))
939       .addReg(TempReg, getKillRegState(true))
940       .addImm(getCRSaveOffset())
941       .addReg(SPReg);
942   }
943 
944   if (MustSaveLR)
945     BuildMI(MBB, MBBI, dl, MFLRInst, ScratchReg);
946 
947   if (MustSaveCR &&
948       !(SingleScratchReg && MustSaveLR)) { // will only occur for PPC64
949     // In the ELFv2 ABI, we are not required to save all CR fields.
950     // If only one or two CR fields are clobbered, it is more efficient to use
951     // mfocrf to selectively save just those fields, because mfocrf has short
952     // latency compares to mfcr.
953     unsigned MfcrOpcode = PPC::MFCR8;
954     unsigned CrState = RegState::ImplicitKill;
955     if (isELFv2ABI && MustSaveCRs.size() == 1) {
956       MfcrOpcode = PPC::MFOCRF8;
957       CrState = RegState::Kill;
958     }
959     MachineInstrBuilder MIB =
960       BuildMI(MBB, MBBI, dl, TII.get(MfcrOpcode), TempReg);
961     for (unsigned i = 0, e = MustSaveCRs.size(); i != e; ++i)
962       MIB.addReg(MustSaveCRs[i], CrState);
963   }
964 
965   if (HasRedZone) {
966     if (HasFP)
967       BuildMI(MBB, MBBI, dl, StoreInst)
968         .addReg(FPReg)
969         .addImm(FPOffset)
970         .addReg(SPReg);
971     if (FI->usesPICBase())
972       BuildMI(MBB, MBBI, dl, StoreInst)
973         .addReg(PPC::R30)
974         .addImm(PBPOffset)
975         .addReg(SPReg);
976     if (HasBP)
977       BuildMI(MBB, MBBI, dl, StoreInst)
978         .addReg(BPReg)
979         .addImm(BPOffset)
980         .addReg(SPReg);
981   }
982 
983   if (MustSaveLR)
984     BuildMI(MBB, StackUpdateLoc, dl, StoreInst)
985       .addReg(ScratchReg, getKillRegState(true))
986       .addImm(LROffset)
987       .addReg(SPReg);
988 
989   if (MustSaveCR &&
990       !(SingleScratchReg && MustSaveLR)) { // will only occur for PPC64
991     assert(HasRedZone && "A red zone is always available on PPC64");
992     BuildMI(MBB, MBBI, dl, TII.get(PPC::STW8))
993       .addReg(TempReg, getKillRegState(true))
994       .addImm(getCRSaveOffset())
995       .addReg(SPReg);
996   }
997 
998   // Skip the rest if this is a leaf function & all spills fit in the Red Zone.
999   if (!FrameSize)
1000     return;
1001 
1002   // Adjust stack pointer: r1 += NegFrameSize.
1003   // If there is a preferred stack alignment, align R1 now
1004 
1005   if (HasBP && HasRedZone) {
1006     // Save a copy of r1 as the base pointer.
1007     BuildMI(MBB, MBBI, dl, OrInst, BPReg)
1008       .addReg(SPReg)
1009       .addReg(SPReg);
1010   }
1011 
1012   // Have we generated a STUX instruction to claim stack frame? If so,
1013   // the negated frame size will be placed in ScratchReg.
1014   bool HasSTUX = false;
1015 
1016   // This condition must be kept in sync with canUseAsPrologue.
1017   if (HasBP && MaxAlign > 1) {
1018     if (isPPC64)
1019       BuildMI(MBB, MBBI, dl, TII.get(PPC::RLDICL), ScratchReg)
1020         .addReg(SPReg)
1021         .addImm(0)
1022         .addImm(64 - Log2_32(MaxAlign));
1023     else // PPC32...
1024       BuildMI(MBB, MBBI, dl, TII.get(PPC::RLWINM), ScratchReg)
1025         .addReg(SPReg)
1026         .addImm(0)
1027         .addImm(32 - Log2_32(MaxAlign))
1028         .addImm(31);
1029     if (!isLargeFrame) {
1030       BuildMI(MBB, MBBI, dl, SubtractImmCarryingInst, ScratchReg)
1031         .addReg(ScratchReg, RegState::Kill)
1032         .addImm(NegFrameSize);
1033     } else {
1034       assert(!SingleScratchReg && "Only a single scratch reg available");
1035       BuildMI(MBB, MBBI, dl, LoadImmShiftedInst, TempReg)
1036         .addImm(NegFrameSize >> 16);
1037       BuildMI(MBB, MBBI, dl, OrImmInst, TempReg)
1038         .addReg(TempReg, RegState::Kill)
1039         .addImm(NegFrameSize & 0xFFFF);
1040       BuildMI(MBB, MBBI, dl, SubtractCarryingInst, ScratchReg)
1041         .addReg(ScratchReg, RegState::Kill)
1042         .addReg(TempReg, RegState::Kill);
1043     }
1044 
1045     BuildMI(MBB, MBBI, dl, StoreUpdtIdxInst, SPReg)
1046       .addReg(SPReg, RegState::Kill)
1047       .addReg(SPReg)
1048       .addReg(ScratchReg);
1049     HasSTUX = true;
1050 
1051   } else if (!isLargeFrame) {
1052     BuildMI(MBB, StackUpdateLoc, dl, StoreUpdtInst, SPReg)
1053       .addReg(SPReg)
1054       .addImm(NegFrameSize)
1055       .addReg(SPReg);
1056 
1057   } else {
1058     BuildMI(MBB, MBBI, dl, LoadImmShiftedInst, ScratchReg)
1059       .addImm(NegFrameSize >> 16);
1060     BuildMI(MBB, MBBI, dl, OrImmInst, ScratchReg)
1061       .addReg(ScratchReg, RegState::Kill)
1062       .addImm(NegFrameSize & 0xFFFF);
1063     BuildMI(MBB, MBBI, dl, StoreUpdtIdxInst, SPReg)
1064       .addReg(SPReg, RegState::Kill)
1065       .addReg(SPReg)
1066       .addReg(ScratchReg);
1067     HasSTUX = true;
1068   }
1069 
1070   // Save the TOC register after the stack pointer update if a prologue TOC
1071   // save is required for the function.
1072   if (MustSaveTOC) {
1073     assert(isELFv2ABI && "TOC saves in the prologue only supported on ELFv2");
1074     BuildMI(MBB, StackUpdateLoc, dl, TII.get(PPC::STD))
1075       .addReg(TOCReg, getKillRegState(true))
1076       .addImm(TOCSaveOffset)
1077       .addReg(SPReg);
1078   }
1079 
1080   if (!HasRedZone) {
1081     assert(!isPPC64 && "A red zone is always available on PPC64");
1082     if (HasSTUX) {
1083       // The negated frame size is in ScratchReg, and the SPReg has been
1084       // decremented by the frame size: SPReg = old SPReg + ScratchReg.
1085       // Since FPOffset, PBPOffset, etc. are relative to the beginning of
1086       // the stack frame (i.e. the old SP), ideally, we would put the old
1087       // SP into a register and use it as the base for the stores. The
1088       // problem is that the only available register may be ScratchReg,
1089       // which could be R0, and R0 cannot be used as a base address.
1090 
1091       // First, set ScratchReg to the old SP. This may need to be modified
1092       // later.
1093       BuildMI(MBB, MBBI, dl, TII.get(PPC::SUBF), ScratchReg)
1094         .addReg(ScratchReg, RegState::Kill)
1095         .addReg(SPReg);
1096 
1097       if (ScratchReg == PPC::R0) {
1098         // R0 cannot be used as a base register, but it can be used as an
1099         // index in a store-indexed.
1100         int LastOffset = 0;
1101         if (HasFP)  {
1102           // R0 += (FPOffset-LastOffset).
1103           // Need addic, since addi treats R0 as 0.
1104           BuildMI(MBB, MBBI, dl, TII.get(PPC::ADDIC), ScratchReg)
1105             .addReg(ScratchReg)
1106             .addImm(FPOffset-LastOffset);
1107           LastOffset = FPOffset;
1108           // Store FP into *R0.
1109           BuildMI(MBB, MBBI, dl, TII.get(PPC::STWX))
1110             .addReg(FPReg, RegState::Kill)  // Save FP.
1111             .addReg(PPC::ZERO)
1112             .addReg(ScratchReg);  // This will be the index (R0 is ok here).
1113         }
1114         if (FI->usesPICBase()) {
1115           // R0 += (PBPOffset-LastOffset).
1116           BuildMI(MBB, MBBI, dl, TII.get(PPC::ADDIC), ScratchReg)
1117             .addReg(ScratchReg)
1118             .addImm(PBPOffset-LastOffset);
1119           LastOffset = PBPOffset;
1120           BuildMI(MBB, MBBI, dl, TII.get(PPC::STWX))
1121             .addReg(PPC::R30, RegState::Kill)  // Save PIC base pointer.
1122             .addReg(PPC::ZERO)
1123             .addReg(ScratchReg);  // This will be the index (R0 is ok here).
1124         }
1125         if (HasBP) {
1126           // R0 += (BPOffset-LastOffset).
1127           BuildMI(MBB, MBBI, dl, TII.get(PPC::ADDIC), ScratchReg)
1128             .addReg(ScratchReg)
1129             .addImm(BPOffset-LastOffset);
1130           LastOffset = BPOffset;
1131           BuildMI(MBB, MBBI, dl, TII.get(PPC::STWX))
1132             .addReg(BPReg, RegState::Kill)  // Save BP.
1133             .addReg(PPC::ZERO)
1134             .addReg(ScratchReg);  // This will be the index (R0 is ok here).
1135           // BP = R0-LastOffset
1136           BuildMI(MBB, MBBI, dl, TII.get(PPC::ADDIC), BPReg)
1137             .addReg(ScratchReg, RegState::Kill)
1138             .addImm(-LastOffset);
1139         }
1140       } else {
1141         // ScratchReg is not R0, so use it as the base register. It is
1142         // already set to the old SP, so we can use the offsets directly.
1143 
1144         // Now that the stack frame has been allocated, save all the necessary
1145         // registers using ScratchReg as the base address.
1146         if (HasFP)
1147           BuildMI(MBB, MBBI, dl, StoreInst)
1148             .addReg(FPReg)
1149             .addImm(FPOffset)
1150             .addReg(ScratchReg);
1151         if (FI->usesPICBase())
1152           BuildMI(MBB, MBBI, dl, StoreInst)
1153             .addReg(PPC::R30)
1154             .addImm(PBPOffset)
1155             .addReg(ScratchReg);
1156         if (HasBP) {
1157           BuildMI(MBB, MBBI, dl, StoreInst)
1158             .addReg(BPReg)
1159             .addImm(BPOffset)
1160             .addReg(ScratchReg);
1161           BuildMI(MBB, MBBI, dl, OrInst, BPReg)
1162             .addReg(ScratchReg, RegState::Kill)
1163             .addReg(ScratchReg);
1164         }
1165       }
1166     } else {
1167       // The frame size is a known 16-bit constant (fitting in the immediate
1168       // field of STWU). To be here we have to be compiling for PPC32.
1169       // Since the SPReg has been decreased by FrameSize, add it back to each
1170       // offset.
1171       if (HasFP)
1172         BuildMI(MBB, MBBI, dl, StoreInst)
1173           .addReg(FPReg)
1174           .addImm(FrameSize + FPOffset)
1175           .addReg(SPReg);
1176       if (FI->usesPICBase())
1177         BuildMI(MBB, MBBI, dl, StoreInst)
1178           .addReg(PPC::R30)
1179           .addImm(FrameSize + PBPOffset)
1180           .addReg(SPReg);
1181       if (HasBP) {
1182         BuildMI(MBB, MBBI, dl, StoreInst)
1183           .addReg(BPReg)
1184           .addImm(FrameSize + BPOffset)
1185           .addReg(SPReg);
1186         BuildMI(MBB, MBBI, dl, TII.get(PPC::ADDI), BPReg)
1187           .addReg(SPReg)
1188           .addImm(FrameSize);
1189       }
1190     }
1191   }
1192 
1193   // Add Call Frame Information for the instructions we generated above.
1194   if (needsCFI) {
1195     unsigned CFIIndex;
1196 
1197     if (HasBP) {
1198       // Define CFA in terms of BP. Do this in preference to using FP/SP,
1199       // because if the stack needed aligning then CFA won't be at a fixed
1200       // offset from FP/SP.
1201       unsigned Reg = MRI->getDwarfRegNum(BPReg, true);
1202       CFIIndex = MF.addFrameInst(
1203           MCCFIInstruction::createDefCfaRegister(nullptr, Reg));
1204     } else {
1205       // Adjust the definition of CFA to account for the change in SP.
1206       assert(NegFrameSize);
1207       CFIIndex = MF.addFrameInst(
1208           MCCFIInstruction::createDefCfaOffset(nullptr, NegFrameSize));
1209     }
1210     BuildMI(MBB, MBBI, dl, TII.get(TargetOpcode::CFI_INSTRUCTION))
1211         .addCFIIndex(CFIIndex);
1212 
1213     if (HasFP) {
1214       // Describe where FP was saved, at a fixed offset from CFA.
1215       unsigned Reg = MRI->getDwarfRegNum(FPReg, true);
1216       CFIIndex = MF.addFrameInst(
1217           MCCFIInstruction::createOffset(nullptr, Reg, FPOffset));
1218       BuildMI(MBB, MBBI, dl, TII.get(TargetOpcode::CFI_INSTRUCTION))
1219           .addCFIIndex(CFIIndex);
1220     }
1221 
1222     if (FI->usesPICBase()) {
1223       // Describe where FP was saved, at a fixed offset from CFA.
1224       unsigned Reg = MRI->getDwarfRegNum(PPC::R30, true);
1225       CFIIndex = MF.addFrameInst(
1226           MCCFIInstruction::createOffset(nullptr, Reg, PBPOffset));
1227       BuildMI(MBB, MBBI, dl, TII.get(TargetOpcode::CFI_INSTRUCTION))
1228           .addCFIIndex(CFIIndex);
1229     }
1230 
1231     if (HasBP) {
1232       // Describe where BP was saved, at a fixed offset from CFA.
1233       unsigned Reg = MRI->getDwarfRegNum(BPReg, true);
1234       CFIIndex = MF.addFrameInst(
1235           MCCFIInstruction::createOffset(nullptr, Reg, BPOffset));
1236       BuildMI(MBB, MBBI, dl, TII.get(TargetOpcode::CFI_INSTRUCTION))
1237           .addCFIIndex(CFIIndex);
1238     }
1239 
1240     if (MustSaveLR) {
1241       // Describe where LR was saved, at a fixed offset from CFA.
1242       unsigned Reg = MRI->getDwarfRegNum(LRReg, true);
1243       CFIIndex = MF.addFrameInst(
1244           MCCFIInstruction::createOffset(nullptr, Reg, LROffset));
1245       BuildMI(MBB, MBBI, dl, TII.get(TargetOpcode::CFI_INSTRUCTION))
1246           .addCFIIndex(CFIIndex);
1247     }
1248   }
1249 
1250   // If there is a frame pointer, copy R1 into R31
1251   if (HasFP) {
1252     BuildMI(MBB, MBBI, dl, OrInst, FPReg)
1253       .addReg(SPReg)
1254       .addReg(SPReg);
1255 
1256     if (!HasBP && needsCFI) {
1257       // Change the definition of CFA from SP+offset to FP+offset, because SP
1258       // will change at every alloca.
1259       unsigned Reg = MRI->getDwarfRegNum(FPReg, true);
1260       unsigned CFIIndex = MF.addFrameInst(
1261           MCCFIInstruction::createDefCfaRegister(nullptr, Reg));
1262 
1263       BuildMI(MBB, MBBI, dl, TII.get(TargetOpcode::CFI_INSTRUCTION))
1264           .addCFIIndex(CFIIndex);
1265     }
1266   }
1267 
1268   if (needsCFI) {
1269     // Describe where callee saved registers were saved, at fixed offsets from
1270     // CFA.
1271     const std::vector<CalleeSavedInfo> &CSI = MFI.getCalleeSavedInfo();
1272     for (unsigned I = 0, E = CSI.size(); I != E; ++I) {
1273       unsigned Reg = CSI[I].getReg();
1274       if (Reg == PPC::LR || Reg == PPC::LR8 || Reg == PPC::RM) continue;
1275 
1276       // This is a bit of a hack: CR2LT, CR2GT, CR2EQ and CR2UN are just
1277       // subregisters of CR2. We just need to emit a move of CR2.
1278       if (PPC::CRBITRCRegClass.contains(Reg))
1279         continue;
1280 
1281       if ((Reg == PPC::X2 || Reg == PPC::R2) && MustSaveTOC)
1282         continue;
1283 
1284       // For SVR4, don't emit a move for the CR spill slot if we haven't
1285       // spilled CRs.
1286       if (isSVR4ABI && (PPC::CR2 <= Reg && Reg <= PPC::CR4)
1287           && !MustSaveCR)
1288         continue;
1289 
1290       // For 64-bit SVR4 when we have spilled CRs, the spill location
1291       // is SP+8, not a frame-relative slot.
1292       if (isSVR4ABI && isPPC64 && (PPC::CR2 <= Reg && Reg <= PPC::CR4)) {
1293         // In the ELFv1 ABI, only CR2 is noted in CFI and stands in for
1294         // the whole CR word.  In the ELFv2 ABI, every CR that was
1295         // actually saved gets its own CFI record.
1296         unsigned CRReg = isELFv2ABI? Reg : (unsigned) PPC::CR2;
1297         unsigned CFIIndex = MF.addFrameInst(MCCFIInstruction::createOffset(
1298             nullptr, MRI->getDwarfRegNum(CRReg, true), getCRSaveOffset()));
1299         BuildMI(MBB, MBBI, dl, TII.get(TargetOpcode::CFI_INSTRUCTION))
1300             .addCFIIndex(CFIIndex);
1301         continue;
1302       }
1303 
1304       if (CSI[I].isSpilledToReg()) {
1305         unsigned SpilledReg = CSI[I].getDstReg();
1306         unsigned CFIRegister = MF.addFrameInst(MCCFIInstruction::createRegister(
1307             nullptr, MRI->getDwarfRegNum(Reg, true),
1308             MRI->getDwarfRegNum(SpilledReg, true)));
1309         BuildMI(MBB, MBBI, dl, TII.get(TargetOpcode::CFI_INSTRUCTION))
1310           .addCFIIndex(CFIRegister);
1311       } else {
1312         int Offset = MFI.getObjectOffset(CSI[I].getFrameIdx());
1313         // We have changed the object offset above but we do not want to change
1314         // the actual offsets in the CFI instruction so we have to undo the
1315         // offset change here.
1316         if (MovingStackUpdateDown)
1317           Offset -= NegFrameSize;
1318 
1319         unsigned CFIIndex = MF.addFrameInst(MCCFIInstruction::createOffset(
1320             nullptr, MRI->getDwarfRegNum(Reg, true), Offset));
1321         BuildMI(MBB, MBBI, dl, TII.get(TargetOpcode::CFI_INSTRUCTION))
1322             .addCFIIndex(CFIIndex);
1323       }
1324     }
1325   }
1326 }
1327 
1328 void PPCFrameLowering::emitEpilogue(MachineFunction &MF,
1329                                     MachineBasicBlock &MBB) const {
1330   MachineBasicBlock::iterator MBBI = MBB.getFirstTerminator();
1331   DebugLoc dl;
1332 
1333   if (MBBI != MBB.end())
1334     dl = MBBI->getDebugLoc();
1335 
1336   const PPCInstrInfo &TII = *Subtarget.getInstrInfo();
1337   const PPCRegisterInfo *RegInfo = Subtarget.getRegisterInfo();
1338 
1339   // Get alignment info so we know how to restore the SP.
1340   const MachineFrameInfo &MFI = MF.getFrameInfo();
1341 
1342   // Get the number of bytes allocated from the FrameInfo.
1343   int FrameSize = MFI.getStackSize();
1344 
1345   // Get processor type.
1346   bool isPPC64 = Subtarget.isPPC64();
1347   // Get the ABI.
1348   bool isSVR4ABI = Subtarget.isSVR4ABI();
1349 
1350   // Check if the link register (LR) has been saved.
1351   PPCFunctionInfo *FI = MF.getInfo<PPCFunctionInfo>();
1352   bool MustSaveLR = FI->mustSaveLR();
1353   const SmallVectorImpl<unsigned> &MustSaveCRs = FI->getMustSaveCRs();
1354   bool MustSaveCR = !MustSaveCRs.empty();
1355   // Do we have a frame pointer and/or base pointer for this function?
1356   bool HasFP = hasFP(MF);
1357   bool HasBP = RegInfo->hasBasePointer(MF);
1358   bool HasRedZone = Subtarget.isPPC64() || !Subtarget.isSVR4ABI();
1359 
1360   unsigned SPReg      = isPPC64 ? PPC::X1  : PPC::R1;
1361   Register BPReg = RegInfo->getBaseRegister(MF);
1362   unsigned FPReg      = isPPC64 ? PPC::X31 : PPC::R31;
1363   unsigned ScratchReg = 0;
1364   unsigned TempReg     = isPPC64 ? PPC::X12 : PPC::R12; // another scratch reg
1365   const MCInstrDesc& MTLRInst = TII.get( isPPC64 ? PPC::MTLR8
1366                                                  : PPC::MTLR );
1367   const MCInstrDesc& LoadInst = TII.get( isPPC64 ? PPC::LD
1368                                                  : PPC::LWZ );
1369   const MCInstrDesc& LoadImmShiftedInst = TII.get( isPPC64 ? PPC::LIS8
1370                                                            : PPC::LIS );
1371   const MCInstrDesc& OrInst = TII.get(isPPC64 ? PPC::OR8
1372                                               : PPC::OR );
1373   const MCInstrDesc& OrImmInst = TII.get( isPPC64 ? PPC::ORI8
1374                                                   : PPC::ORI );
1375   const MCInstrDesc& AddImmInst = TII.get( isPPC64 ? PPC::ADDI8
1376                                                    : PPC::ADDI );
1377   const MCInstrDesc& AddInst = TII.get( isPPC64 ? PPC::ADD8
1378                                                 : PPC::ADD4 );
1379 
1380   int LROffset = getReturnSaveOffset();
1381 
1382   int FPOffset = 0;
1383 
1384   // Using the same bool variable as below to suppress compiler warnings.
1385   bool SingleScratchReg = findScratchRegister(&MBB, true, false, &ScratchReg,
1386                                               &TempReg);
1387   assert(SingleScratchReg &&
1388          "Could not find an available scratch register");
1389 
1390   SingleScratchReg = ScratchReg == TempReg;
1391 
1392   if (HasFP) {
1393     if (isSVR4ABI) {
1394       int FPIndex = FI->getFramePointerSaveIndex();
1395       assert(FPIndex && "No Frame Pointer Save Slot!");
1396       FPOffset = MFI.getObjectOffset(FPIndex);
1397     } else {
1398       FPOffset = getFramePointerSaveOffset();
1399     }
1400   }
1401 
1402   int BPOffset = 0;
1403   if (HasBP) {
1404     if (isSVR4ABI) {
1405       int BPIndex = FI->getBasePointerSaveIndex();
1406       assert(BPIndex && "No Base Pointer Save Slot!");
1407       BPOffset = MFI.getObjectOffset(BPIndex);
1408     } else {
1409       BPOffset = getBasePointerSaveOffset();
1410     }
1411   }
1412 
1413   int PBPOffset = 0;
1414   if (FI->usesPICBase()) {
1415     int PBPIndex = FI->getPICBasePointerSaveIndex();
1416     assert(PBPIndex && "No PIC Base Pointer Save Slot!");
1417     PBPOffset = MFI.getObjectOffset(PBPIndex);
1418   }
1419 
1420   bool IsReturnBlock = (MBBI != MBB.end() && MBBI->isReturn());
1421 
1422   if (IsReturnBlock) {
1423     unsigned RetOpcode = MBBI->getOpcode();
1424     bool UsesTCRet =  RetOpcode == PPC::TCRETURNri ||
1425                       RetOpcode == PPC::TCRETURNdi ||
1426                       RetOpcode == PPC::TCRETURNai ||
1427                       RetOpcode == PPC::TCRETURNri8 ||
1428                       RetOpcode == PPC::TCRETURNdi8 ||
1429                       RetOpcode == PPC::TCRETURNai8;
1430 
1431     if (UsesTCRet) {
1432       int MaxTCRetDelta = FI->getTailCallSPDelta();
1433       MachineOperand &StackAdjust = MBBI->getOperand(1);
1434       assert(StackAdjust.isImm() && "Expecting immediate value.");
1435       // Adjust stack pointer.
1436       int StackAdj = StackAdjust.getImm();
1437       int Delta = StackAdj - MaxTCRetDelta;
1438       assert((Delta >= 0) && "Delta must be positive");
1439       if (MaxTCRetDelta>0)
1440         FrameSize += (StackAdj +Delta);
1441       else
1442         FrameSize += StackAdj;
1443     }
1444   }
1445 
1446   // Frames of 32KB & larger require special handling because they cannot be
1447   // indexed into with a simple LD/LWZ immediate offset operand.
1448   bool isLargeFrame = !isInt<16>(FrameSize);
1449 
1450   // On targets without red zone, the SP needs to be restored last, so that
1451   // all live contents of the stack frame are upwards of the SP. This means
1452   // that we cannot restore SP just now, since there may be more registers
1453   // to restore from the stack frame (e.g. R31). If the frame size is not
1454   // a simple immediate value, we will need a spare register to hold the
1455   // restored SP. If the frame size is known and small, we can simply adjust
1456   // the offsets of the registers to be restored, and still use SP to restore
1457   // them. In such case, the final update of SP will be to add the frame
1458   // size to it.
1459   // To simplify the code, set RBReg to the base register used to restore
1460   // values from the stack, and set SPAdd to the value that needs to be added
1461   // to the SP at the end. The default values are as if red zone was present.
1462   unsigned RBReg = SPReg;
1463   unsigned SPAdd = 0;
1464 
1465   // Check if we can move the stack update instruction up the epilogue
1466   // past the callee saves. This will allow the move to LR instruction
1467   // to be executed before the restores of the callee saves which means
1468   // that the callee saves can hide the latency from the MTLR instrcution.
1469   MachineBasicBlock::iterator StackUpdateLoc = MBBI;
1470   if (stackUpdateCanBeMoved(MF)) {
1471     const std::vector<CalleeSavedInfo> & Info = MFI.getCalleeSavedInfo();
1472     for (CalleeSavedInfo CSI : Info) {
1473       int FrIdx = CSI.getFrameIdx();
1474       // If the frame index is not negative the callee saved info belongs to a
1475       // stack object that is not a fixed stack object. We ignore non-fixed
1476       // stack objects because we won't move the update of the stack pointer
1477       // past them.
1478       if (FrIdx >= 0)
1479         continue;
1480 
1481       if (MFI.isFixedObjectIndex(FrIdx) && MFI.getObjectOffset(FrIdx) < 0)
1482         StackUpdateLoc--;
1483       else {
1484         // Abort the operation as we can't update all CSR restores.
1485         StackUpdateLoc = MBBI;
1486         break;
1487       }
1488     }
1489   }
1490 
1491   if (FrameSize) {
1492     // In the prologue, the loaded (or persistent) stack pointer value is
1493     // offset by the STDU/STDUX/STWU/STWUX instruction. For targets with red
1494     // zone add this offset back now.
1495 
1496     // If this function contained a fastcc call and GuaranteedTailCallOpt is
1497     // enabled (=> hasFastCall()==true) the fastcc call might contain a tail
1498     // call which invalidates the stack pointer value in SP(0). So we use the
1499     // value of R31 in this case.
1500     if (FI->hasFastCall()) {
1501       assert(HasFP && "Expecting a valid frame pointer.");
1502       if (!HasRedZone)
1503         RBReg = FPReg;
1504       if (!isLargeFrame) {
1505         BuildMI(MBB, MBBI, dl, AddImmInst, RBReg)
1506           .addReg(FPReg).addImm(FrameSize);
1507       } else {
1508         BuildMI(MBB, MBBI, dl, LoadImmShiftedInst, ScratchReg)
1509           .addImm(FrameSize >> 16);
1510         BuildMI(MBB, MBBI, dl, OrImmInst, ScratchReg)
1511           .addReg(ScratchReg, RegState::Kill)
1512           .addImm(FrameSize & 0xFFFF);
1513         BuildMI(MBB, MBBI, dl, AddInst)
1514           .addReg(RBReg)
1515           .addReg(FPReg)
1516           .addReg(ScratchReg);
1517       }
1518     } else if (!isLargeFrame && !HasBP && !MFI.hasVarSizedObjects()) {
1519       if (HasRedZone) {
1520         BuildMI(MBB, StackUpdateLoc, dl, AddImmInst, SPReg)
1521           .addReg(SPReg)
1522           .addImm(FrameSize);
1523       } else {
1524         // Make sure that adding FrameSize will not overflow the max offset
1525         // size.
1526         assert(FPOffset <= 0 && BPOffset <= 0 && PBPOffset <= 0 &&
1527                "Local offsets should be negative");
1528         SPAdd = FrameSize;
1529         FPOffset += FrameSize;
1530         BPOffset += FrameSize;
1531         PBPOffset += FrameSize;
1532       }
1533     } else {
1534       // We don't want to use ScratchReg as a base register, because it
1535       // could happen to be R0. Use FP instead, but make sure to preserve it.
1536       if (!HasRedZone) {
1537         // If FP is not saved, copy it to ScratchReg.
1538         if (!HasFP)
1539           BuildMI(MBB, MBBI, dl, OrInst, ScratchReg)
1540             .addReg(FPReg)
1541             .addReg(FPReg);
1542         RBReg = FPReg;
1543       }
1544       BuildMI(MBB, StackUpdateLoc, dl, LoadInst, RBReg)
1545         .addImm(0)
1546         .addReg(SPReg);
1547     }
1548   }
1549   assert(RBReg != ScratchReg && "Should have avoided ScratchReg");
1550   // If there is no red zone, ScratchReg may be needed for holding a useful
1551   // value (although not the base register). Make sure it is not overwritten
1552   // too early.
1553 
1554   assert((isPPC64 || !MustSaveCR) &&
1555          "Epilogue CR restoring supported only in 64-bit mode");
1556 
1557   // If we need to restore both the LR and the CR and we only have one
1558   // available scratch register, we must do them one at a time.
1559   if (MustSaveCR && SingleScratchReg && MustSaveLR) {
1560     // Here TempReg == ScratchReg, and in the absence of red zone ScratchReg
1561     // is live here.
1562     assert(HasRedZone && "Expecting red zone");
1563     BuildMI(MBB, MBBI, dl, TII.get(PPC::LWZ8), TempReg)
1564       .addImm(getCRSaveOffset())
1565       .addReg(SPReg);
1566     for (unsigned i = 0, e = MustSaveCRs.size(); i != e; ++i)
1567       BuildMI(MBB, MBBI, dl, TII.get(PPC::MTOCRF8), MustSaveCRs[i])
1568         .addReg(TempReg, getKillRegState(i == e-1));
1569   }
1570 
1571   // Delay restoring of the LR if ScratchReg is needed. This is ok, since
1572   // LR is stored in the caller's stack frame. ScratchReg will be needed
1573   // if RBReg is anything other than SP. We shouldn't use ScratchReg as
1574   // a base register anyway, because it may happen to be R0.
1575   bool LoadedLR = false;
1576   if (MustSaveLR && RBReg == SPReg && isInt<16>(LROffset+SPAdd)) {
1577     BuildMI(MBB, StackUpdateLoc, dl, LoadInst, ScratchReg)
1578       .addImm(LROffset+SPAdd)
1579       .addReg(RBReg);
1580     LoadedLR = true;
1581   }
1582 
1583   if (MustSaveCR && !(SingleScratchReg && MustSaveLR)) {
1584     // This will only occur for PPC64.
1585     assert(isPPC64 && "Expecting 64-bit mode");
1586     assert(RBReg == SPReg && "Should be using SP as a base register");
1587     BuildMI(MBB, MBBI, dl, TII.get(PPC::LWZ8), TempReg)
1588       .addImm(getCRSaveOffset())
1589       .addReg(RBReg);
1590   }
1591 
1592   if (HasFP) {
1593     // If there is red zone, restore FP directly, since SP has already been
1594     // restored. Otherwise, restore the value of FP into ScratchReg.
1595     if (HasRedZone || RBReg == SPReg)
1596       BuildMI(MBB, MBBI, dl, LoadInst, FPReg)
1597         .addImm(FPOffset)
1598         .addReg(SPReg);
1599     else
1600       BuildMI(MBB, MBBI, dl, LoadInst, ScratchReg)
1601         .addImm(FPOffset)
1602         .addReg(RBReg);
1603   }
1604 
1605   if (FI->usesPICBase())
1606     BuildMI(MBB, MBBI, dl, LoadInst, PPC::R30)
1607       .addImm(PBPOffset)
1608       .addReg(RBReg);
1609 
1610   if (HasBP)
1611     BuildMI(MBB, MBBI, dl, LoadInst, BPReg)
1612       .addImm(BPOffset)
1613       .addReg(RBReg);
1614 
1615   // There is nothing more to be loaded from the stack, so now we can
1616   // restore SP: SP = RBReg + SPAdd.
1617   if (RBReg != SPReg || SPAdd != 0) {
1618     assert(!HasRedZone && "This should not happen with red zone");
1619     // If SPAdd is 0, generate a copy.
1620     if (SPAdd == 0)
1621       BuildMI(MBB, MBBI, dl, OrInst, SPReg)
1622         .addReg(RBReg)
1623         .addReg(RBReg);
1624     else
1625       BuildMI(MBB, MBBI, dl, AddImmInst, SPReg)
1626         .addReg(RBReg)
1627         .addImm(SPAdd);
1628 
1629     assert(RBReg != ScratchReg && "Should be using FP or SP as base register");
1630     if (RBReg == FPReg)
1631       BuildMI(MBB, MBBI, dl, OrInst, FPReg)
1632         .addReg(ScratchReg)
1633         .addReg(ScratchReg);
1634 
1635     // Now load the LR from the caller's stack frame.
1636     if (MustSaveLR && !LoadedLR)
1637       BuildMI(MBB, MBBI, dl, LoadInst, ScratchReg)
1638         .addImm(LROffset)
1639         .addReg(SPReg);
1640   }
1641 
1642   if (MustSaveCR &&
1643       !(SingleScratchReg && MustSaveLR)) // will only occur for PPC64
1644     for (unsigned i = 0, e = MustSaveCRs.size(); i != e; ++i)
1645       BuildMI(MBB, MBBI, dl, TII.get(PPC::MTOCRF8), MustSaveCRs[i])
1646         .addReg(TempReg, getKillRegState(i == e-1));
1647 
1648   if (MustSaveLR)
1649     BuildMI(MBB, StackUpdateLoc, dl, MTLRInst).addReg(ScratchReg);
1650 
1651   // Callee pop calling convention. Pop parameter/linkage area. Used for tail
1652   // call optimization
1653   if (IsReturnBlock) {
1654     unsigned RetOpcode = MBBI->getOpcode();
1655     if (MF.getTarget().Options.GuaranteedTailCallOpt &&
1656         (RetOpcode == PPC::BLR || RetOpcode == PPC::BLR8) &&
1657         MF.getFunction().getCallingConv() == CallingConv::Fast) {
1658       PPCFunctionInfo *FI = MF.getInfo<PPCFunctionInfo>();
1659       unsigned CallerAllocatedAmt = FI->getMinReservedArea();
1660 
1661       if (CallerAllocatedAmt && isInt<16>(CallerAllocatedAmt)) {
1662         BuildMI(MBB, MBBI, dl, AddImmInst, SPReg)
1663           .addReg(SPReg).addImm(CallerAllocatedAmt);
1664       } else {
1665         BuildMI(MBB, MBBI, dl, LoadImmShiftedInst, ScratchReg)
1666           .addImm(CallerAllocatedAmt >> 16);
1667         BuildMI(MBB, MBBI, dl, OrImmInst, ScratchReg)
1668           .addReg(ScratchReg, RegState::Kill)
1669           .addImm(CallerAllocatedAmt & 0xFFFF);
1670         BuildMI(MBB, MBBI, dl, AddInst)
1671           .addReg(SPReg)
1672           .addReg(FPReg)
1673           .addReg(ScratchReg);
1674       }
1675     } else {
1676       createTailCallBranchInstr(MBB);
1677     }
1678   }
1679 }
1680 
1681 void PPCFrameLowering::createTailCallBranchInstr(MachineBasicBlock &MBB) const {
1682   MachineBasicBlock::iterator MBBI = MBB.getFirstTerminator();
1683 
1684   // If we got this far a first terminator should exist.
1685   assert(MBBI != MBB.end() && "Failed to find the first terminator.");
1686 
1687   DebugLoc dl = MBBI->getDebugLoc();
1688   const PPCInstrInfo &TII = *Subtarget.getInstrInfo();
1689 
1690   // Create branch instruction for pseudo tail call return instruction
1691   unsigned RetOpcode = MBBI->getOpcode();
1692   if (RetOpcode == PPC::TCRETURNdi) {
1693     MBBI = MBB.getLastNonDebugInstr();
1694     MachineOperand &JumpTarget = MBBI->getOperand(0);
1695     BuildMI(MBB, MBBI, dl, TII.get(PPC::TAILB)).
1696       addGlobalAddress(JumpTarget.getGlobal(), JumpTarget.getOffset());
1697   } else if (RetOpcode == PPC::TCRETURNri) {
1698     MBBI = MBB.getLastNonDebugInstr();
1699     assert(MBBI->getOperand(0).isReg() && "Expecting register operand.");
1700     BuildMI(MBB, MBBI, dl, TII.get(PPC::TAILBCTR));
1701   } else if (RetOpcode == PPC::TCRETURNai) {
1702     MBBI = MBB.getLastNonDebugInstr();
1703     MachineOperand &JumpTarget = MBBI->getOperand(0);
1704     BuildMI(MBB, MBBI, dl, TII.get(PPC::TAILBA)).addImm(JumpTarget.getImm());
1705   } else if (RetOpcode == PPC::TCRETURNdi8) {
1706     MBBI = MBB.getLastNonDebugInstr();
1707     MachineOperand &JumpTarget = MBBI->getOperand(0);
1708     BuildMI(MBB, MBBI, dl, TII.get(PPC::TAILB8)).
1709       addGlobalAddress(JumpTarget.getGlobal(), JumpTarget.getOffset());
1710   } else if (RetOpcode == PPC::TCRETURNri8) {
1711     MBBI = MBB.getLastNonDebugInstr();
1712     assert(MBBI->getOperand(0).isReg() && "Expecting register operand.");
1713     BuildMI(MBB, MBBI, dl, TII.get(PPC::TAILBCTR8));
1714   } else if (RetOpcode == PPC::TCRETURNai8) {
1715     MBBI = MBB.getLastNonDebugInstr();
1716     MachineOperand &JumpTarget = MBBI->getOperand(0);
1717     BuildMI(MBB, MBBI, dl, TII.get(PPC::TAILBA8)).addImm(JumpTarget.getImm());
1718   }
1719 }
1720 
1721 void PPCFrameLowering::determineCalleeSaves(MachineFunction &MF,
1722                                             BitVector &SavedRegs,
1723                                             RegScavenger *RS) const {
1724   TargetFrameLowering::determineCalleeSaves(MF, SavedRegs, RS);
1725 
1726   const PPCRegisterInfo *RegInfo = Subtarget.getRegisterInfo();
1727 
1728   //  Save and clear the LR state.
1729   PPCFunctionInfo *FI = MF.getInfo<PPCFunctionInfo>();
1730   unsigned LR = RegInfo->getRARegister();
1731   FI->setMustSaveLR(MustSaveLR(MF, LR));
1732   SavedRegs.reset(LR);
1733 
1734   //  Save R31 if necessary
1735   int FPSI = FI->getFramePointerSaveIndex();
1736   const bool isPPC64 = Subtarget.isPPC64();
1737   MachineFrameInfo &MFI = MF.getFrameInfo();
1738 
1739   // If the frame pointer save index hasn't been defined yet.
1740   if (!FPSI && needsFP(MF)) {
1741     // Find out what the fix offset of the frame pointer save area.
1742     int FPOffset = getFramePointerSaveOffset();
1743     // Allocate the frame index for frame pointer save area.
1744     FPSI = MFI.CreateFixedObject(isPPC64? 8 : 4, FPOffset, true);
1745     // Save the result.
1746     FI->setFramePointerSaveIndex(FPSI);
1747   }
1748 
1749   int BPSI = FI->getBasePointerSaveIndex();
1750   if (!BPSI && RegInfo->hasBasePointer(MF)) {
1751     int BPOffset = getBasePointerSaveOffset();
1752     // Allocate the frame index for the base pointer save area.
1753     BPSI = MFI.CreateFixedObject(isPPC64? 8 : 4, BPOffset, true);
1754     // Save the result.
1755     FI->setBasePointerSaveIndex(BPSI);
1756   }
1757 
1758   // Reserve stack space for the PIC Base register (R30).
1759   // Only used in SVR4 32-bit.
1760   if (FI->usesPICBase()) {
1761     int PBPSI = MFI.CreateFixedObject(4, -8, true);
1762     FI->setPICBasePointerSaveIndex(PBPSI);
1763   }
1764 
1765   // Make sure we don't explicitly spill r31, because, for example, we have
1766   // some inline asm which explicitly clobbers it, when we otherwise have a
1767   // frame pointer and are using r31's spill slot for the prologue/epilogue
1768   // code. Same goes for the base pointer and the PIC base register.
1769   if (needsFP(MF))
1770     SavedRegs.reset(isPPC64 ? PPC::X31 : PPC::R31);
1771   if (RegInfo->hasBasePointer(MF))
1772     SavedRegs.reset(RegInfo->getBaseRegister(MF));
1773   if (FI->usesPICBase())
1774     SavedRegs.reset(PPC::R30);
1775 
1776   // Reserve stack space to move the linkage area to in case of a tail call.
1777   int TCSPDelta = 0;
1778   if (MF.getTarget().Options.GuaranteedTailCallOpt &&
1779       (TCSPDelta = FI->getTailCallSPDelta()) < 0) {
1780     MFI.CreateFixedObject(-1 * TCSPDelta, TCSPDelta, true);
1781   }
1782 
1783   // For 32-bit SVR4, allocate the nonvolatile CR spill slot iff the
1784   // function uses CR 2, 3, or 4. For 64-bit SVR4 we create a FixedStack
1785   // object at the offset of the CR-save slot in the linkage area. The actual
1786   // save and restore of the condition register will be created as part of the
1787   // prologue and epilogue insertion, but the FixedStack object is needed to
1788   // keep the CalleSavedInfo valid.
1789   if (Subtarget.isSVR4ABI() &&
1790       (SavedRegs.test(PPC::CR2) || SavedRegs.test(PPC::CR3) ||
1791        SavedRegs.test(PPC::CR4))) {
1792     const uint64_t SpillSize = 4; // Condition register is always 4 bytes.
1793     const int64_t SpillOffset = Subtarget.isPPC64() ? 8 : -4;
1794     int FrameIdx =
1795         MFI.CreateFixedObject(SpillSize, SpillOffset,
1796                               /* IsImmutable */ true, /* IsAliased */ false);
1797     FI->setCRSpillFrameIndex(FrameIdx);
1798   }
1799 }
1800 
1801 void PPCFrameLowering::processFunctionBeforeFrameFinalized(MachineFunction &MF,
1802                                                        RegScavenger *RS) const {
1803   // Early exit if not using the SVR4 ABI.
1804   if (!Subtarget.isSVR4ABI()) {
1805     addScavengingSpillSlot(MF, RS);
1806     return;
1807   }
1808 
1809   // Get callee saved register information.
1810   MachineFrameInfo &MFI = MF.getFrameInfo();
1811   const std::vector<CalleeSavedInfo> &CSI = MFI.getCalleeSavedInfo();
1812 
1813   // If the function is shrink-wrapped, and if the function has a tail call, the
1814   // tail call might not be in the new RestoreBlock, so real branch instruction
1815   // won't be generated by emitEpilogue(), because shrink-wrap has chosen new
1816   // RestoreBlock. So we handle this case here.
1817   if (MFI.getSavePoint() && MFI.hasTailCall()) {
1818     MachineBasicBlock *RestoreBlock = MFI.getRestorePoint();
1819     for (MachineBasicBlock &MBB : MF) {
1820       if (MBB.isReturnBlock() && (&MBB) != RestoreBlock)
1821         createTailCallBranchInstr(MBB);
1822     }
1823   }
1824 
1825   // Early exit if no callee saved registers are modified!
1826   if (CSI.empty() && !needsFP(MF)) {
1827     addScavengingSpillSlot(MF, RS);
1828     return;
1829   }
1830 
1831   unsigned MinGPR = PPC::R31;
1832   unsigned MinG8R = PPC::X31;
1833   unsigned MinFPR = PPC::F31;
1834   unsigned MinVR = Subtarget.hasSPE() ? PPC::S31 : PPC::V31;
1835 
1836   bool HasGPSaveArea = false;
1837   bool HasG8SaveArea = false;
1838   bool HasFPSaveArea = false;
1839   bool HasVRSAVESaveArea = false;
1840   bool HasVRSaveArea = false;
1841 
1842   SmallVector<CalleeSavedInfo, 18> GPRegs;
1843   SmallVector<CalleeSavedInfo, 18> G8Regs;
1844   SmallVector<CalleeSavedInfo, 18> FPRegs;
1845   SmallVector<CalleeSavedInfo, 18> VRegs;
1846 
1847   for (unsigned i = 0, e = CSI.size(); i != e; ++i) {
1848     unsigned Reg = CSI[i].getReg();
1849     assert((!MF.getInfo<PPCFunctionInfo>()->mustSaveTOC() ||
1850             (Reg != PPC::X2 && Reg != PPC::R2)) &&
1851            "Not expecting to try to spill R2 in a function that must save TOC");
1852     if (PPC::GPRCRegClass.contains(Reg)) {
1853       HasGPSaveArea = true;
1854 
1855       GPRegs.push_back(CSI[i]);
1856 
1857       if (Reg < MinGPR) {
1858         MinGPR = Reg;
1859       }
1860     } else if (PPC::G8RCRegClass.contains(Reg)) {
1861       HasG8SaveArea = true;
1862 
1863       G8Regs.push_back(CSI[i]);
1864 
1865       if (Reg < MinG8R) {
1866         MinG8R = Reg;
1867       }
1868     } else if (PPC::F8RCRegClass.contains(Reg)) {
1869       HasFPSaveArea = true;
1870 
1871       FPRegs.push_back(CSI[i]);
1872 
1873       if (Reg < MinFPR) {
1874         MinFPR = Reg;
1875       }
1876     } else if (PPC::CRBITRCRegClass.contains(Reg) ||
1877                PPC::CRRCRegClass.contains(Reg)) {
1878       ; // do nothing, as we already know whether CRs are spilled
1879     } else if (PPC::VRSAVERCRegClass.contains(Reg)) {
1880       HasVRSAVESaveArea = true;
1881     } else if (PPC::VRRCRegClass.contains(Reg) ||
1882                PPC::SPERCRegClass.contains(Reg)) {
1883       // Altivec and SPE are mutually exclusive, but have the same stack
1884       // alignment requirements, so overload the save area for both cases.
1885       HasVRSaveArea = true;
1886 
1887       VRegs.push_back(CSI[i]);
1888 
1889       if (Reg < MinVR) {
1890         MinVR = Reg;
1891       }
1892     } else {
1893       llvm_unreachable("Unknown RegisterClass!");
1894     }
1895   }
1896 
1897   PPCFunctionInfo *PFI = MF.getInfo<PPCFunctionInfo>();
1898   const TargetRegisterInfo *TRI = Subtarget.getRegisterInfo();
1899 
1900   int64_t LowerBound = 0;
1901 
1902   // Take into account stack space reserved for tail calls.
1903   int TCSPDelta = 0;
1904   if (MF.getTarget().Options.GuaranteedTailCallOpt &&
1905       (TCSPDelta = PFI->getTailCallSPDelta()) < 0) {
1906     LowerBound = TCSPDelta;
1907   }
1908 
1909   // The Floating-point register save area is right below the back chain word
1910   // of the previous stack frame.
1911   if (HasFPSaveArea) {
1912     for (unsigned i = 0, e = FPRegs.size(); i != e; ++i) {
1913       int FI = FPRegs[i].getFrameIdx();
1914 
1915       MFI.setObjectOffset(FI, LowerBound + MFI.getObjectOffset(FI));
1916     }
1917 
1918     LowerBound -= (31 - TRI->getEncodingValue(MinFPR) + 1) * 8;
1919   }
1920 
1921   // Check whether the frame pointer register is allocated. If so, make sure it
1922   // is spilled to the correct offset.
1923   if (needsFP(MF)) {
1924     int FI = PFI->getFramePointerSaveIndex();
1925     assert(FI && "No Frame Pointer Save Slot!");
1926     MFI.setObjectOffset(FI, LowerBound + MFI.getObjectOffset(FI));
1927     // FP is R31/X31, so no need to update MinGPR/MinG8R.
1928     HasGPSaveArea = true;
1929   }
1930 
1931   if (PFI->usesPICBase()) {
1932     int FI = PFI->getPICBasePointerSaveIndex();
1933     assert(FI && "No PIC Base Pointer Save Slot!");
1934     MFI.setObjectOffset(FI, LowerBound + MFI.getObjectOffset(FI));
1935 
1936     MinGPR = std::min<unsigned>(MinGPR, PPC::R30);
1937     HasGPSaveArea = true;
1938   }
1939 
1940   const PPCRegisterInfo *RegInfo = Subtarget.getRegisterInfo();
1941   if (RegInfo->hasBasePointer(MF)) {
1942     int FI = PFI->getBasePointerSaveIndex();
1943     assert(FI && "No Base Pointer Save Slot!");
1944     MFI.setObjectOffset(FI, LowerBound + MFI.getObjectOffset(FI));
1945 
1946     Register BP = RegInfo->getBaseRegister(MF);
1947     if (PPC::G8RCRegClass.contains(BP)) {
1948       MinG8R = std::min<unsigned>(MinG8R, BP);
1949       HasG8SaveArea = true;
1950     } else if (PPC::GPRCRegClass.contains(BP)) {
1951       MinGPR = std::min<unsigned>(MinGPR, BP);
1952       HasGPSaveArea = true;
1953     }
1954   }
1955 
1956   // General register save area starts right below the Floating-point
1957   // register save area.
1958   if (HasGPSaveArea || HasG8SaveArea) {
1959     // Move general register save area spill slots down, taking into account
1960     // the size of the Floating-point register save area.
1961     for (unsigned i = 0, e = GPRegs.size(); i != e; ++i) {
1962       if (!GPRegs[i].isSpilledToReg()) {
1963         int FI = GPRegs[i].getFrameIdx();
1964         MFI.setObjectOffset(FI, LowerBound + MFI.getObjectOffset(FI));
1965       }
1966     }
1967 
1968     // Move general register save area spill slots down, taking into account
1969     // the size of the Floating-point register save area.
1970     for (unsigned i = 0, e = G8Regs.size(); i != e; ++i) {
1971       if (!G8Regs[i].isSpilledToReg()) {
1972         int FI = G8Regs[i].getFrameIdx();
1973         MFI.setObjectOffset(FI, LowerBound + MFI.getObjectOffset(FI));
1974       }
1975     }
1976 
1977     unsigned MinReg =
1978       std::min<unsigned>(TRI->getEncodingValue(MinGPR),
1979                          TRI->getEncodingValue(MinG8R));
1980 
1981     if (Subtarget.isPPC64()) {
1982       LowerBound -= (31 - MinReg + 1) * 8;
1983     } else {
1984       LowerBound -= (31 - MinReg + 1) * 4;
1985     }
1986   }
1987 
1988   // For 32-bit only, the CR save area is below the general register
1989   // save area.  For 64-bit SVR4, the CR save area is addressed relative
1990   // to the stack pointer and hence does not need an adjustment here.
1991   // Only CR2 (the first nonvolatile spilled) has an associated frame
1992   // index so that we have a single uniform save area.
1993   if (spillsCR(MF) && Subtarget.is32BitELFABI()) {
1994     // Adjust the frame index of the CR spill slot.
1995     for (const auto &CSInfo : CSI) {
1996       if (CSInfo.getReg() == PPC::CR2) {
1997         int FI = CSInfo.getFrameIdx();
1998         MFI.setObjectOffset(FI, LowerBound + MFI.getObjectOffset(FI));
1999         break;
2000       }
2001     }
2002 
2003     LowerBound -= 4; // The CR save area is always 4 bytes long.
2004   }
2005 
2006   if (HasVRSAVESaveArea) {
2007     // FIXME SVR4: Is it actually possible to have multiple elements in CSI
2008     //             which have the VRSAVE register class?
2009     // Adjust the frame index of the VRSAVE spill slot.
2010     for (unsigned i = 0, e = CSI.size(); i != e; ++i) {
2011       unsigned Reg = CSI[i].getReg();
2012 
2013       if (PPC::VRSAVERCRegClass.contains(Reg)) {
2014         int FI = CSI[i].getFrameIdx();
2015 
2016         MFI.setObjectOffset(FI, LowerBound + MFI.getObjectOffset(FI));
2017       }
2018     }
2019 
2020     LowerBound -= 4; // The VRSAVE save area is always 4 bytes long.
2021   }
2022 
2023   // Both Altivec and SPE have the same alignment and padding requirements
2024   // within the stack frame.
2025   if (HasVRSaveArea) {
2026     // Insert alignment padding, we need 16-byte alignment. Note: for positive
2027     // number the alignment formula is : y = (x + (n-1)) & (~(n-1)). But since
2028     // we are using negative number here (the stack grows downward). We should
2029     // use formula : y = x & (~(n-1)). Where x is the size before aligning, n
2030     // is the alignment size ( n = 16 here) and y is the size after aligning.
2031     assert(LowerBound <= 0 && "Expect LowerBound have a non-positive value!");
2032     LowerBound &= ~(15);
2033 
2034     for (unsigned i = 0, e = VRegs.size(); i != e; ++i) {
2035       int FI = VRegs[i].getFrameIdx();
2036 
2037       MFI.setObjectOffset(FI, LowerBound + MFI.getObjectOffset(FI));
2038     }
2039   }
2040 
2041   addScavengingSpillSlot(MF, RS);
2042 }
2043 
2044 void
2045 PPCFrameLowering::addScavengingSpillSlot(MachineFunction &MF,
2046                                          RegScavenger *RS) const {
2047   // Reserve a slot closest to SP or frame pointer if we have a dynalloc or
2048   // a large stack, which will require scavenging a register to materialize a
2049   // large offset.
2050 
2051   // We need to have a scavenger spill slot for spills if the frame size is
2052   // large. In case there is no free register for large-offset addressing,
2053   // this slot is used for the necessary emergency spill. Also, we need the
2054   // slot for dynamic stack allocations.
2055 
2056   // The scavenger might be invoked if the frame offset does not fit into
2057   // the 16-bit immediate. We don't know the complete frame size here
2058   // because we've not yet computed callee-saved register spills or the
2059   // needed alignment padding.
2060   unsigned StackSize = determineFrameLayout(MF, true);
2061   MachineFrameInfo &MFI = MF.getFrameInfo();
2062   if (MFI.hasVarSizedObjects() || spillsCR(MF) || spillsVRSAVE(MF) ||
2063       hasNonRISpills(MF) || (hasSpills(MF) && !isInt<16>(StackSize))) {
2064     const TargetRegisterClass &GPRC = PPC::GPRCRegClass;
2065     const TargetRegisterClass &G8RC = PPC::G8RCRegClass;
2066     const TargetRegisterClass &RC = Subtarget.isPPC64() ? G8RC : GPRC;
2067     const TargetRegisterInfo &TRI = *Subtarget.getRegisterInfo();
2068     unsigned Size = TRI.getSpillSize(RC);
2069     unsigned Align = TRI.getSpillAlignment(RC);
2070     RS->addScavengingFrameIndex(MFI.CreateStackObject(Size, Align, false));
2071 
2072     // Might we have over-aligned allocas?
2073     bool HasAlVars = MFI.hasVarSizedObjects() &&
2074                      MFI.getMaxAlignment() > getStackAlignment();
2075 
2076     // These kinds of spills might need two registers.
2077     if (spillsCR(MF) || spillsVRSAVE(MF) || HasAlVars)
2078       RS->addScavengingFrameIndex(MFI.CreateStackObject(Size, Align, false));
2079 
2080   }
2081 }
2082 
2083 // This function checks if a callee saved gpr can be spilled to a volatile
2084 // vector register. This occurs for leaf functions when the option
2085 // ppc-enable-pe-vector-spills is enabled. If there are any remaining registers
2086 // which were not spilled to vectors, return false so the target independent
2087 // code can handle them by assigning a FrameIdx to a stack slot.
2088 bool PPCFrameLowering::assignCalleeSavedSpillSlots(
2089     MachineFunction &MF, const TargetRegisterInfo *TRI,
2090     std::vector<CalleeSavedInfo> &CSI) const {
2091 
2092   if (CSI.empty())
2093     return true; // Early exit if no callee saved registers are modified!
2094 
2095   // Early exit if cannot spill gprs to volatile vector registers.
2096   MachineFrameInfo &MFI = MF.getFrameInfo();
2097   if (!EnablePEVectorSpills || MFI.hasCalls() || !Subtarget.hasP9Vector())
2098     return false;
2099 
2100   // Build a BitVector of VSRs that can be used for spilling GPRs.
2101   BitVector BVAllocatable = TRI->getAllocatableSet(MF);
2102   BitVector BVCalleeSaved(TRI->getNumRegs());
2103   const PPCRegisterInfo *RegInfo = Subtarget.getRegisterInfo();
2104   const MCPhysReg *CSRegs = RegInfo->getCalleeSavedRegs(&MF);
2105   for (unsigned i = 0; CSRegs[i]; ++i)
2106     BVCalleeSaved.set(CSRegs[i]);
2107 
2108   for (unsigned Reg : BVAllocatable.set_bits()) {
2109     // Set to 0 if the register is not a volatile VF/F8 register, or if it is
2110     // used in the function.
2111     if (BVCalleeSaved[Reg] ||
2112         (!PPC::F8RCRegClass.contains(Reg) &&
2113          !PPC::VFRCRegClass.contains(Reg)) ||
2114         (MF.getRegInfo().isPhysRegUsed(Reg)))
2115       BVAllocatable.reset(Reg);
2116   }
2117 
2118   bool AllSpilledToReg = true;
2119   for (auto &CS : CSI) {
2120     if (BVAllocatable.none())
2121       return false;
2122 
2123     unsigned Reg = CS.getReg();
2124     if (!PPC::G8RCRegClass.contains(Reg) && !PPC::GPRCRegClass.contains(Reg)) {
2125       AllSpilledToReg = false;
2126       continue;
2127     }
2128 
2129     unsigned VolatileVFReg = BVAllocatable.find_first();
2130     if (VolatileVFReg < BVAllocatable.size()) {
2131       CS.setDstReg(VolatileVFReg);
2132       BVAllocatable.reset(VolatileVFReg);
2133     } else {
2134       AllSpilledToReg = false;
2135     }
2136   }
2137   return AllSpilledToReg;
2138 }
2139 
2140 bool PPCFrameLowering::spillCalleeSavedRegisters(
2141     MachineBasicBlock &MBB, MachineBasicBlock::iterator MI,
2142     ArrayRef<CalleeSavedInfo> CSI, const TargetRegisterInfo *TRI) const {
2143 
2144   // Currently, this function only handles SVR4 32- and 64-bit ABIs.
2145   // Return false otherwise to maintain pre-existing behavior.
2146   if (!Subtarget.isSVR4ABI())
2147     return false;
2148 
2149   MachineFunction *MF = MBB.getParent();
2150   const PPCInstrInfo &TII = *Subtarget.getInstrInfo();
2151   PPCFunctionInfo *FI = MF->getInfo<PPCFunctionInfo>();
2152   bool MustSaveTOC = FI->mustSaveTOC();
2153   DebugLoc DL;
2154   bool CRSpilled = false;
2155   MachineInstrBuilder CRMIB;
2156 
2157   for (unsigned i = 0, e = CSI.size(); i != e; ++i) {
2158     unsigned Reg = CSI[i].getReg();
2159     // VRSAVE can appear here if, for example, @llvm.eh.unwind.init() is used.
2160     if (Reg == PPC::VRSAVE)
2161       continue;
2162 
2163     // CR2 through CR4 are the nonvolatile CR fields.
2164     bool IsCRField = PPC::CR2 <= Reg && Reg <= PPC::CR4;
2165 
2166     // Add the callee-saved register as live-in; it's killed at the spill.
2167     // Do not do this for callee-saved registers that are live-in to the
2168     // function because they will already be marked live-in and this will be
2169     // adding it for a second time. It is an error to add the same register
2170     // to the set more than once.
2171     const MachineRegisterInfo &MRI = MF->getRegInfo();
2172     bool IsLiveIn = MRI.isLiveIn(Reg);
2173     if (!IsLiveIn)
2174        MBB.addLiveIn(Reg);
2175 
2176     if (CRSpilled && IsCRField) {
2177       CRMIB.addReg(Reg, RegState::ImplicitKill);
2178       continue;
2179     }
2180 
2181     // The actual spill will happen in the prologue.
2182     if ((Reg == PPC::X2 || Reg == PPC::R2) && MustSaveTOC)
2183       continue;
2184 
2185     // Insert the spill to the stack frame.
2186     if (IsCRField) {
2187       PPCFunctionInfo *FuncInfo = MF->getInfo<PPCFunctionInfo>();
2188       if (Subtarget.isPPC64()) {
2189         // The actual spill will happen at the start of the prologue.
2190         FuncInfo->addMustSaveCR(Reg);
2191       } else {
2192         CRSpilled = true;
2193         FuncInfo->setSpillsCR();
2194 
2195         // 32-bit:  FP-relative.  Note that we made sure CR2-CR4 all have
2196         // the same frame index in PPCRegisterInfo::hasReservedSpillSlot.
2197         CRMIB = BuildMI(*MF, DL, TII.get(PPC::MFCR), PPC::R12)
2198                   .addReg(Reg, RegState::ImplicitKill);
2199 
2200         MBB.insert(MI, CRMIB);
2201         MBB.insert(MI, addFrameReference(BuildMI(*MF, DL, TII.get(PPC::STW))
2202                                          .addReg(PPC::R12,
2203                                                  getKillRegState(true)),
2204                                          CSI[i].getFrameIdx()));
2205       }
2206     } else {
2207       if (CSI[i].isSpilledToReg()) {
2208         NumPESpillVSR++;
2209         BuildMI(MBB, MI, DL, TII.get(PPC::MTVSRD), CSI[i].getDstReg())
2210           .addReg(Reg, getKillRegState(true));
2211       } else {
2212         const TargetRegisterClass *RC = TRI->getMinimalPhysRegClass(Reg);
2213         // Use !IsLiveIn for the kill flag.
2214         // We do not want to kill registers that are live in this function
2215         // before their use because they will become undefined registers.
2216         // Functions without NoUnwind need to preserve the order of elements in
2217         // saved vector registers.
2218         if (Subtarget.needsSwapsForVSXMemOps() &&
2219             !MF->getFunction().hasFnAttribute(Attribute::NoUnwind))
2220           TII.storeRegToStackSlotNoUpd(MBB, MI, Reg, !IsLiveIn,
2221                                        CSI[i].getFrameIdx(), RC, TRI);
2222         else
2223           TII.storeRegToStackSlot(MBB, MI, Reg, !IsLiveIn, CSI[i].getFrameIdx(),
2224                                   RC, TRI);
2225       }
2226     }
2227   }
2228   return true;
2229 }
2230 
2231 static void restoreCRs(bool is31, bool CR2Spilled, bool CR3Spilled,
2232                        bool CR4Spilled, MachineBasicBlock &MBB,
2233                        MachineBasicBlock::iterator MI,
2234                        ArrayRef<CalleeSavedInfo> CSI, unsigned CSIIndex) {
2235 
2236   MachineFunction *MF = MBB.getParent();
2237   const PPCInstrInfo &TII = *MF->getSubtarget<PPCSubtarget>().getInstrInfo();
2238   DebugLoc DL;
2239   unsigned MoveReg = PPC::R12;
2240 
2241   // 32-bit:  FP-relative
2242   MBB.insert(MI,
2243              addFrameReference(BuildMI(*MF, DL, TII.get(PPC::LWZ), MoveReg),
2244                                CSI[CSIIndex].getFrameIdx()));
2245 
2246   unsigned RestoreOp = PPC::MTOCRF;
2247   if (CR2Spilled)
2248     MBB.insert(MI, BuildMI(*MF, DL, TII.get(RestoreOp), PPC::CR2)
2249                .addReg(MoveReg, getKillRegState(!CR3Spilled && !CR4Spilled)));
2250 
2251   if (CR3Spilled)
2252     MBB.insert(MI, BuildMI(*MF, DL, TII.get(RestoreOp), PPC::CR3)
2253                .addReg(MoveReg, getKillRegState(!CR4Spilled)));
2254 
2255   if (CR4Spilled)
2256     MBB.insert(MI, BuildMI(*MF, DL, TII.get(RestoreOp), PPC::CR4)
2257                .addReg(MoveReg, getKillRegState(true)));
2258 }
2259 
2260 MachineBasicBlock::iterator PPCFrameLowering::
2261 eliminateCallFramePseudoInstr(MachineFunction &MF, MachineBasicBlock &MBB,
2262                               MachineBasicBlock::iterator I) const {
2263   const TargetInstrInfo &TII = *Subtarget.getInstrInfo();
2264   if (MF.getTarget().Options.GuaranteedTailCallOpt &&
2265       I->getOpcode() == PPC::ADJCALLSTACKUP) {
2266     // Add (actually subtract) back the amount the callee popped on return.
2267     if (int CalleeAmt =  I->getOperand(1).getImm()) {
2268       bool is64Bit = Subtarget.isPPC64();
2269       CalleeAmt *= -1;
2270       unsigned StackReg = is64Bit ? PPC::X1 : PPC::R1;
2271       unsigned TmpReg = is64Bit ? PPC::X0 : PPC::R0;
2272       unsigned ADDIInstr = is64Bit ? PPC::ADDI8 : PPC::ADDI;
2273       unsigned ADDInstr = is64Bit ? PPC::ADD8 : PPC::ADD4;
2274       unsigned LISInstr = is64Bit ? PPC::LIS8 : PPC::LIS;
2275       unsigned ORIInstr = is64Bit ? PPC::ORI8 : PPC::ORI;
2276       const DebugLoc &dl = I->getDebugLoc();
2277 
2278       if (isInt<16>(CalleeAmt)) {
2279         BuildMI(MBB, I, dl, TII.get(ADDIInstr), StackReg)
2280           .addReg(StackReg, RegState::Kill)
2281           .addImm(CalleeAmt);
2282       } else {
2283         MachineBasicBlock::iterator MBBI = I;
2284         BuildMI(MBB, MBBI, dl, TII.get(LISInstr), TmpReg)
2285           .addImm(CalleeAmt >> 16);
2286         BuildMI(MBB, MBBI, dl, TII.get(ORIInstr), TmpReg)
2287           .addReg(TmpReg, RegState::Kill)
2288           .addImm(CalleeAmt & 0xFFFF);
2289         BuildMI(MBB, MBBI, dl, TII.get(ADDInstr), StackReg)
2290           .addReg(StackReg, RegState::Kill)
2291           .addReg(TmpReg);
2292       }
2293     }
2294   }
2295   // Simply discard ADJCALLSTACKDOWN, ADJCALLSTACKUP instructions.
2296   return MBB.erase(I);
2297 }
2298 
2299 static bool isCalleeSavedCR(unsigned Reg) {
2300   return PPC::CR2 == Reg || Reg == PPC::CR3 || Reg == PPC::CR4;
2301 }
2302 
2303 bool
2304 PPCFrameLowering::restoreCalleeSavedRegisters(MachineBasicBlock &MBB,
2305                                         MachineBasicBlock::iterator MI,
2306                                         std::vector<CalleeSavedInfo> &CSI,
2307                                         const TargetRegisterInfo *TRI) const {
2308 
2309   // Currently, this function only handles SVR4 32- and 64-bit ABIs.
2310   // Return false otherwise to maintain pre-existing behavior.
2311   if (!Subtarget.isSVR4ABI())
2312     return false;
2313 
2314   MachineFunction *MF = MBB.getParent();
2315   const PPCInstrInfo &TII = *Subtarget.getInstrInfo();
2316   PPCFunctionInfo *FI = MF->getInfo<PPCFunctionInfo>();
2317   bool MustSaveTOC = FI->mustSaveTOC();
2318   bool CR2Spilled = false;
2319   bool CR3Spilled = false;
2320   bool CR4Spilled = false;
2321   unsigned CSIIndex = 0;
2322 
2323   // Initialize insertion-point logic; we will be restoring in reverse
2324   // order of spill.
2325   MachineBasicBlock::iterator I = MI, BeforeI = I;
2326   bool AtStart = I == MBB.begin();
2327 
2328   if (!AtStart)
2329     --BeforeI;
2330 
2331   for (unsigned i = 0, e = CSI.size(); i != e; ++i) {
2332     unsigned Reg = CSI[i].getReg();
2333 
2334     // VRSAVE can appear here if, for example, @llvm.eh.unwind.init() is used.
2335     if (Reg == PPC::VRSAVE)
2336       continue;
2337 
2338     if ((Reg == PPC::X2 || Reg == PPC::R2) && MustSaveTOC)
2339       continue;
2340 
2341     // Restore of callee saved condition register field is handled during
2342     // epilogue insertion.
2343     if (isCalleeSavedCR(Reg) && !Subtarget.is32BitELFABI())
2344       continue;
2345 
2346     if (Reg == PPC::CR2) {
2347       CR2Spilled = true;
2348       // The spill slot is associated only with CR2, which is the
2349       // first nonvolatile spilled.  Save it here.
2350       CSIIndex = i;
2351       continue;
2352     } else if (Reg == PPC::CR3) {
2353       CR3Spilled = true;
2354       continue;
2355     } else if (Reg == PPC::CR4) {
2356       CR4Spilled = true;
2357       continue;
2358     } else {
2359       // When we first encounter a non-CR register after seeing at
2360       // least one CR register, restore all spilled CRs together.
2361       if (CR2Spilled || CR3Spilled || CR4Spilled) {
2362         bool is31 = needsFP(*MF);
2363         restoreCRs(is31, CR2Spilled, CR3Spilled, CR4Spilled, MBB, I, CSI,
2364                    CSIIndex);
2365         CR2Spilled = CR3Spilled = CR4Spilled = false;
2366       }
2367 
2368       if (CSI[i].isSpilledToReg()) {
2369         DebugLoc DL;
2370         NumPEReloadVSR++;
2371         BuildMI(MBB, I, DL, TII.get(PPC::MFVSRD), Reg)
2372             .addReg(CSI[i].getDstReg(), getKillRegState(true));
2373       } else {
2374        // Default behavior for non-CR saves.
2375         const TargetRegisterClass *RC = TRI->getMinimalPhysRegClass(Reg);
2376 
2377         // Functions without NoUnwind need to preserve the order of elements in
2378         // saved vector registers.
2379         if (Subtarget.needsSwapsForVSXMemOps() &&
2380             !MF->getFunction().hasFnAttribute(Attribute::NoUnwind))
2381           TII.loadRegFromStackSlotNoUpd(MBB, I, Reg, CSI[i].getFrameIdx(), RC,
2382                                         TRI);
2383         else
2384           TII.loadRegFromStackSlot(MBB, I, Reg, CSI[i].getFrameIdx(), RC, TRI);
2385 
2386         assert(I != MBB.begin() &&
2387                "loadRegFromStackSlot didn't insert any code!");
2388       }
2389     }
2390 
2391     // Insert in reverse order.
2392     if (AtStart)
2393       I = MBB.begin();
2394     else {
2395       I = BeforeI;
2396       ++I;
2397     }
2398   }
2399 
2400   // If we haven't yet spilled the CRs, do so now.
2401   if (CR2Spilled || CR3Spilled || CR4Spilled) {
2402     assert(Subtarget.is32BitELFABI() &&
2403            "Only set CR[2|3|4]Spilled on 32-bit SVR4.");
2404     bool is31 = needsFP(*MF);
2405     restoreCRs(is31, CR2Spilled, CR3Spilled, CR4Spilled, MBB, I, CSI, CSIIndex);
2406   }
2407 
2408   return true;
2409 }
2410 
2411 unsigned PPCFrameLowering::getTOCSaveOffset() const {
2412   return TOCSaveOffset;
2413 }
2414 
2415 unsigned PPCFrameLowering::getFramePointerSaveOffset() const {
2416   return FramePointerSaveOffset;
2417 }
2418 
2419 unsigned PPCFrameLowering::getBasePointerSaveOffset() const {
2420   if (Subtarget.isAIXABI())
2421     report_fatal_error("BasePointer is not implemented on AIX yet.");
2422   return BasePointerSaveOffset;
2423 }
2424 
2425 bool PPCFrameLowering::enableShrinkWrapping(const MachineFunction &MF) const {
2426   if (MF.getInfo<PPCFunctionInfo>()->shrinkWrapDisabled())
2427     return false;
2428   return (MF.getSubtarget<PPCSubtarget>().isSVR4ABI() &&
2429           MF.getSubtarget<PPCSubtarget>().isPPC64());
2430 }
2431