xref: /llvm-project/llvm/lib/Target/PowerPC/PPCFrameLowering.cpp (revision a073a18460b4e1083005bc82a412bbce82e98d3d)
1 //===-- PPCFrameLowering.cpp - PPC Frame Information ----------------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This file contains the PPC implementation of TargetFrameLowering class.
10 //
11 //===----------------------------------------------------------------------===//
12 
13 #include "PPCFrameLowering.h"
14 #include "PPCInstrBuilder.h"
15 #include "PPCInstrInfo.h"
16 #include "PPCMachineFunctionInfo.h"
17 #include "PPCSubtarget.h"
18 #include "PPCTargetMachine.h"
19 #include "llvm/ADT/Statistic.h"
20 #include "llvm/CodeGen/MachineFrameInfo.h"
21 #include "llvm/CodeGen/MachineFunction.h"
22 #include "llvm/CodeGen/MachineInstrBuilder.h"
23 #include "llvm/CodeGen/MachineModuleInfo.h"
24 #include "llvm/CodeGen/MachineRegisterInfo.h"
25 #include "llvm/CodeGen/RegisterScavenging.h"
26 #include "llvm/IR/Function.h"
27 #include "llvm/Target/TargetOptions.h"
28 
29 using namespace llvm;
30 
31 #define DEBUG_TYPE "framelowering"
32 STATISTIC(NumPESpillVSR, "Number of spills to vector in prologue");
33 STATISTIC(NumPEReloadVSR, "Number of reloads from vector in epilogue");
34 
35 static cl::opt<bool>
36 EnablePEVectorSpills("ppc-enable-pe-vector-spills",
37                      cl::desc("Enable spills in prologue to vector registers."),
38                      cl::init(false), cl::Hidden);
39 
40 /// VRRegNo - Map from a numbered VR register to its enum value.
41 ///
42 static const MCPhysReg VRRegNo[] = {
43  PPC::V0 , PPC::V1 , PPC::V2 , PPC::V3 , PPC::V4 , PPC::V5 , PPC::V6 , PPC::V7 ,
44  PPC::V8 , PPC::V9 , PPC::V10, PPC::V11, PPC::V12, PPC::V13, PPC::V14, PPC::V15,
45  PPC::V16, PPC::V17, PPC::V18, PPC::V19, PPC::V20, PPC::V21, PPC::V22, PPC::V23,
46  PPC::V24, PPC::V25, PPC::V26, PPC::V27, PPC::V28, PPC::V29, PPC::V30, PPC::V31
47 };
48 
49 static unsigned computeReturnSaveOffset(const PPCSubtarget &STI) {
50   if (STI.isDarwinABI())
51     return STI.isPPC64() ? 16 : 8;
52   // SVR4 ABI:
53   return STI.isPPC64() ? 16 : 4;
54 }
55 
56 static unsigned computeTOCSaveOffset(const PPCSubtarget &STI) {
57   return STI.isELFv2ABI() ? 24 : 40;
58 }
59 
60 static unsigned computeFramePointerSaveOffset(const PPCSubtarget &STI) {
61   // For the Darwin ABI:
62   // We cannot use the TOC save slot (offset +20) in the PowerPC linkage area
63   // for saving the frame pointer (if needed.)  While the published ABI has
64   // not used this slot since at least MacOSX 10.2, there is older code
65   // around that does use it, and that needs to continue to work.
66   if (STI.isDarwinABI())
67     return STI.isPPC64() ? -8U : -4U;
68 
69   // SVR4 ABI: First slot in the general register save area.
70   return STI.isPPC64() ? -8U : -4U;
71 }
72 
73 static unsigned computeLinkageSize(const PPCSubtarget &STI) {
74   if (STI.isDarwinABI() || STI.isPPC64())
75     return (STI.isELFv2ABI() ? 4 : 6) * (STI.isPPC64() ? 8 : 4);
76 
77   // SVR4 ABI:
78   return 8;
79 }
80 
81 static unsigned computeBasePointerSaveOffset(const PPCSubtarget &STI) {
82   if (STI.isDarwinABI())
83     return STI.isPPC64() ? -16U : -8U;
84 
85   // SVR4 ABI: First slot in the general register save area.
86   return STI.isPPC64()
87              ? -16U
88              : STI.getTargetMachine().isPositionIndependent() ? -12U : -8U;
89 }
90 
91 PPCFrameLowering::PPCFrameLowering(const PPCSubtarget &STI)
92     : TargetFrameLowering(TargetFrameLowering::StackGrowsDown,
93                           STI.getPlatformStackAlignment(), 0),
94       Subtarget(STI), ReturnSaveOffset(computeReturnSaveOffset(Subtarget)),
95       TOCSaveOffset(computeTOCSaveOffset(Subtarget)),
96       FramePointerSaveOffset(computeFramePointerSaveOffset(Subtarget)),
97       LinkageSize(computeLinkageSize(Subtarget)),
98       BasePointerSaveOffset(computeBasePointerSaveOffset(STI)) {}
99 
100 // With the SVR4 ABI, callee-saved registers have fixed offsets on the stack.
101 const PPCFrameLowering::SpillSlot *PPCFrameLowering::getCalleeSavedSpillSlots(
102     unsigned &NumEntries) const {
103   if (Subtarget.isDarwinABI()) {
104     NumEntries = 1;
105     if (Subtarget.isPPC64()) {
106       static const SpillSlot darwin64Offsets = {PPC::X31, -8};
107       return &darwin64Offsets;
108     } else {
109       static const SpillSlot darwinOffsets = {PPC::R31, -4};
110       return &darwinOffsets;
111     }
112   }
113 
114   // Early exit if not using the SVR4 ABI.
115   if (!Subtarget.isSVR4ABI()) {
116     NumEntries = 0;
117     return nullptr;
118   }
119 
120   // Note that the offsets here overlap, but this is fixed up in
121   // processFunctionBeforeFrameFinalized.
122 
123   static const SpillSlot Offsets[] = {
124       // Floating-point register save area offsets.
125       {PPC::F31, -8},
126       {PPC::F30, -16},
127       {PPC::F29, -24},
128       {PPC::F28, -32},
129       {PPC::F27, -40},
130       {PPC::F26, -48},
131       {PPC::F25, -56},
132       {PPC::F24, -64},
133       {PPC::F23, -72},
134       {PPC::F22, -80},
135       {PPC::F21, -88},
136       {PPC::F20, -96},
137       {PPC::F19, -104},
138       {PPC::F18, -112},
139       {PPC::F17, -120},
140       {PPC::F16, -128},
141       {PPC::F15, -136},
142       {PPC::F14, -144},
143 
144       // General register save area offsets.
145       {PPC::R31, -4},
146       {PPC::R30, -8},
147       {PPC::R29, -12},
148       {PPC::R28, -16},
149       {PPC::R27, -20},
150       {PPC::R26, -24},
151       {PPC::R25, -28},
152       {PPC::R24, -32},
153       {PPC::R23, -36},
154       {PPC::R22, -40},
155       {PPC::R21, -44},
156       {PPC::R20, -48},
157       {PPC::R19, -52},
158       {PPC::R18, -56},
159       {PPC::R17, -60},
160       {PPC::R16, -64},
161       {PPC::R15, -68},
162       {PPC::R14, -72},
163 
164       // CR save area offset.  We map each of the nonvolatile CR fields
165       // to the slot for CR2, which is the first of the nonvolatile CR
166       // fields to be assigned, so that we only allocate one save slot.
167       // See PPCRegisterInfo::hasReservedSpillSlot() for more information.
168       {PPC::CR2, -4},
169 
170       // VRSAVE save area offset.
171       {PPC::VRSAVE, -4},
172 
173       // Vector register save area
174       {PPC::V31, -16},
175       {PPC::V30, -32},
176       {PPC::V29, -48},
177       {PPC::V28, -64},
178       {PPC::V27, -80},
179       {PPC::V26, -96},
180       {PPC::V25, -112},
181       {PPC::V24, -128},
182       {PPC::V23, -144},
183       {PPC::V22, -160},
184       {PPC::V21, -176},
185       {PPC::V20, -192},
186 
187       // SPE register save area (overlaps Vector save area).
188       {PPC::S31, -8},
189       {PPC::S30, -16},
190       {PPC::S29, -24},
191       {PPC::S28, -32},
192       {PPC::S27, -40},
193       {PPC::S26, -48},
194       {PPC::S25, -56},
195       {PPC::S24, -64},
196       {PPC::S23, -72},
197       {PPC::S22, -80},
198       {PPC::S21, -88},
199       {PPC::S20, -96},
200       {PPC::S19, -104},
201       {PPC::S18, -112},
202       {PPC::S17, -120},
203       {PPC::S16, -128},
204       {PPC::S15, -136},
205       {PPC::S14, -144}};
206 
207   static const SpillSlot Offsets64[] = {
208       // Floating-point register save area offsets.
209       {PPC::F31, -8},
210       {PPC::F30, -16},
211       {PPC::F29, -24},
212       {PPC::F28, -32},
213       {PPC::F27, -40},
214       {PPC::F26, -48},
215       {PPC::F25, -56},
216       {PPC::F24, -64},
217       {PPC::F23, -72},
218       {PPC::F22, -80},
219       {PPC::F21, -88},
220       {PPC::F20, -96},
221       {PPC::F19, -104},
222       {PPC::F18, -112},
223       {PPC::F17, -120},
224       {PPC::F16, -128},
225       {PPC::F15, -136},
226       {PPC::F14, -144},
227 
228       // General register save area offsets.
229       {PPC::X31, -8},
230       {PPC::X30, -16},
231       {PPC::X29, -24},
232       {PPC::X28, -32},
233       {PPC::X27, -40},
234       {PPC::X26, -48},
235       {PPC::X25, -56},
236       {PPC::X24, -64},
237       {PPC::X23, -72},
238       {PPC::X22, -80},
239       {PPC::X21, -88},
240       {PPC::X20, -96},
241       {PPC::X19, -104},
242       {PPC::X18, -112},
243       {PPC::X17, -120},
244       {PPC::X16, -128},
245       {PPC::X15, -136},
246       {PPC::X14, -144},
247 
248       // VRSAVE save area offset.
249       {PPC::VRSAVE, -4},
250 
251       // Vector register save area
252       {PPC::V31, -16},
253       {PPC::V30, -32},
254       {PPC::V29, -48},
255       {PPC::V28, -64},
256       {PPC::V27, -80},
257       {PPC::V26, -96},
258       {PPC::V25, -112},
259       {PPC::V24, -128},
260       {PPC::V23, -144},
261       {PPC::V22, -160},
262       {PPC::V21, -176},
263       {PPC::V20, -192}};
264 
265   if (Subtarget.isPPC64()) {
266     NumEntries = array_lengthof(Offsets64);
267 
268     return Offsets64;
269   } else {
270     NumEntries = array_lengthof(Offsets);
271 
272     return Offsets;
273   }
274 }
275 
276 /// RemoveVRSaveCode - We have found that this function does not need any code
277 /// to manipulate the VRSAVE register, even though it uses vector registers.
278 /// This can happen when the only registers used are known to be live in or out
279 /// of the function.  Remove all of the VRSAVE related code from the function.
280 /// FIXME: The removal of the code results in a compile failure at -O0 when the
281 /// function contains a function call, as the GPR containing original VRSAVE
282 /// contents is spilled and reloaded around the call.  Without the prolog code,
283 /// the spill instruction refers to an undefined register.  This code needs
284 /// to account for all uses of that GPR.
285 static void RemoveVRSaveCode(MachineInstr &MI) {
286   MachineBasicBlock *Entry = MI.getParent();
287   MachineFunction *MF = Entry->getParent();
288 
289   // We know that the MTVRSAVE instruction immediately follows MI.  Remove it.
290   MachineBasicBlock::iterator MBBI = MI;
291   ++MBBI;
292   assert(MBBI != Entry->end() && MBBI->getOpcode() == PPC::MTVRSAVE);
293   MBBI->eraseFromParent();
294 
295   bool RemovedAllMTVRSAVEs = true;
296   // See if we can find and remove the MTVRSAVE instruction from all of the
297   // epilog blocks.
298   for (MachineFunction::iterator I = MF->begin(), E = MF->end(); I != E; ++I) {
299     // If last instruction is a return instruction, add an epilogue
300     if (I->isReturnBlock()) {
301       bool FoundIt = false;
302       for (MBBI = I->end(); MBBI != I->begin(); ) {
303         --MBBI;
304         if (MBBI->getOpcode() == PPC::MTVRSAVE) {
305           MBBI->eraseFromParent();  // remove it.
306           FoundIt = true;
307           break;
308         }
309       }
310       RemovedAllMTVRSAVEs &= FoundIt;
311     }
312   }
313 
314   // If we found and removed all MTVRSAVE instructions, remove the read of
315   // VRSAVE as well.
316   if (RemovedAllMTVRSAVEs) {
317     MBBI = MI;
318     assert(MBBI != Entry->begin() && "UPDATE_VRSAVE is first instr in block?");
319     --MBBI;
320     assert(MBBI->getOpcode() == PPC::MFVRSAVE && "VRSAVE instrs wandered?");
321     MBBI->eraseFromParent();
322   }
323 
324   // Finally, nuke the UPDATE_VRSAVE.
325   MI.eraseFromParent();
326 }
327 
328 // HandleVRSaveUpdate - MI is the UPDATE_VRSAVE instruction introduced by the
329 // instruction selector.  Based on the vector registers that have been used,
330 // transform this into the appropriate ORI instruction.
331 static void HandleVRSaveUpdate(MachineInstr &MI, const TargetInstrInfo &TII) {
332   MachineFunction *MF = MI.getParent()->getParent();
333   const TargetRegisterInfo *TRI = MF->getSubtarget().getRegisterInfo();
334   DebugLoc dl = MI.getDebugLoc();
335 
336   const MachineRegisterInfo &MRI = MF->getRegInfo();
337   unsigned UsedRegMask = 0;
338   for (unsigned i = 0; i != 32; ++i)
339     if (MRI.isPhysRegModified(VRRegNo[i]))
340       UsedRegMask |= 1 << (31-i);
341 
342   // Live in and live out values already must be in the mask, so don't bother
343   // marking them.
344   for (std::pair<unsigned, unsigned> LI : MF->getRegInfo().liveins()) {
345     unsigned RegNo = TRI->getEncodingValue(LI.first);
346     if (VRRegNo[RegNo] == LI.first)        // If this really is a vector reg.
347       UsedRegMask &= ~(1 << (31-RegNo));   // Doesn't need to be marked.
348   }
349 
350   // Live out registers appear as use operands on return instructions.
351   for (MachineFunction::const_iterator BI = MF->begin(), BE = MF->end();
352        UsedRegMask != 0 && BI != BE; ++BI) {
353     const MachineBasicBlock &MBB = *BI;
354     if (!MBB.isReturnBlock())
355       continue;
356     const MachineInstr &Ret = MBB.back();
357     for (unsigned I = 0, E = Ret.getNumOperands(); I != E; ++I) {
358       const MachineOperand &MO = Ret.getOperand(I);
359       if (!MO.isReg() || !PPC::VRRCRegClass.contains(MO.getReg()))
360         continue;
361       unsigned RegNo = TRI->getEncodingValue(MO.getReg());
362       UsedRegMask &= ~(1 << (31-RegNo));
363     }
364   }
365 
366   // If no registers are used, turn this into a copy.
367   if (UsedRegMask == 0) {
368     // Remove all VRSAVE code.
369     RemoveVRSaveCode(MI);
370     return;
371   }
372 
373   unsigned SrcReg = MI.getOperand(1).getReg();
374   unsigned DstReg = MI.getOperand(0).getReg();
375 
376   if ((UsedRegMask & 0xFFFF) == UsedRegMask) {
377     if (DstReg != SrcReg)
378       BuildMI(*MI.getParent(), MI, dl, TII.get(PPC::ORI), DstReg)
379           .addReg(SrcReg)
380           .addImm(UsedRegMask);
381     else
382       BuildMI(*MI.getParent(), MI, dl, TII.get(PPC::ORI), DstReg)
383           .addReg(SrcReg, RegState::Kill)
384           .addImm(UsedRegMask);
385   } else if ((UsedRegMask & 0xFFFF0000) == UsedRegMask) {
386     if (DstReg != SrcReg)
387       BuildMI(*MI.getParent(), MI, dl, TII.get(PPC::ORIS), DstReg)
388           .addReg(SrcReg)
389           .addImm(UsedRegMask >> 16);
390     else
391       BuildMI(*MI.getParent(), MI, dl, TII.get(PPC::ORIS), DstReg)
392           .addReg(SrcReg, RegState::Kill)
393           .addImm(UsedRegMask >> 16);
394   } else {
395     if (DstReg != SrcReg)
396       BuildMI(*MI.getParent(), MI, dl, TII.get(PPC::ORIS), DstReg)
397           .addReg(SrcReg)
398           .addImm(UsedRegMask >> 16);
399     else
400       BuildMI(*MI.getParent(), MI, dl, TII.get(PPC::ORIS), DstReg)
401           .addReg(SrcReg, RegState::Kill)
402           .addImm(UsedRegMask >> 16);
403 
404     BuildMI(*MI.getParent(), MI, dl, TII.get(PPC::ORI), DstReg)
405         .addReg(DstReg, RegState::Kill)
406         .addImm(UsedRegMask & 0xFFFF);
407   }
408 
409   // Remove the old UPDATE_VRSAVE instruction.
410   MI.eraseFromParent();
411 }
412 
413 static bool spillsCR(const MachineFunction &MF) {
414   const PPCFunctionInfo *FuncInfo = MF.getInfo<PPCFunctionInfo>();
415   return FuncInfo->isCRSpilled();
416 }
417 
418 static bool spillsVRSAVE(const MachineFunction &MF) {
419   const PPCFunctionInfo *FuncInfo = MF.getInfo<PPCFunctionInfo>();
420   return FuncInfo->isVRSAVESpilled();
421 }
422 
423 static bool hasSpills(const MachineFunction &MF) {
424   const PPCFunctionInfo *FuncInfo = MF.getInfo<PPCFunctionInfo>();
425   return FuncInfo->hasSpills();
426 }
427 
428 static bool hasNonRISpills(const MachineFunction &MF) {
429   const PPCFunctionInfo *FuncInfo = MF.getInfo<PPCFunctionInfo>();
430   return FuncInfo->hasNonRISpills();
431 }
432 
433 /// MustSaveLR - Return true if this function requires that we save the LR
434 /// register onto the stack in the prolog and restore it in the epilog of the
435 /// function.
436 static bool MustSaveLR(const MachineFunction &MF, unsigned LR) {
437   const PPCFunctionInfo *MFI = MF.getInfo<PPCFunctionInfo>();
438 
439   // We need a save/restore of LR if there is any def of LR (which is
440   // defined by calls, including the PIC setup sequence), or if there is
441   // some use of the LR stack slot (e.g. for builtin_return_address).
442   // (LR comes in 32 and 64 bit versions.)
443   MachineRegisterInfo::def_iterator RI = MF.getRegInfo().def_begin(LR);
444   return RI !=MF.getRegInfo().def_end() || MFI->isLRStoreRequired();
445 }
446 
447 /// determineFrameLayoutAndUpdate - Determine the size of the frame and maximum
448 /// call frame size. Update the MachineFunction object with the stack size.
449 unsigned
450 PPCFrameLowering::determineFrameLayoutAndUpdate(MachineFunction &MF,
451                                                 bool UseEstimate) const {
452   unsigned NewMaxCallFrameSize = 0;
453   unsigned FrameSize = determineFrameLayout(MF, UseEstimate,
454                                             &NewMaxCallFrameSize);
455   MF.getFrameInfo().setStackSize(FrameSize);
456   MF.getFrameInfo().setMaxCallFrameSize(NewMaxCallFrameSize);
457   return FrameSize;
458 }
459 
460 /// determineFrameLayout - Determine the size of the frame and maximum call
461 /// frame size.
462 unsigned
463 PPCFrameLowering::determineFrameLayout(const MachineFunction &MF,
464                                        bool UseEstimate,
465                                        unsigned *NewMaxCallFrameSize) const {
466   const MachineFrameInfo &MFI = MF.getFrameInfo();
467 
468   // Get the number of bytes to allocate from the FrameInfo
469   unsigned FrameSize =
470     UseEstimate ? MFI.estimateStackSize(MF) : MFI.getStackSize();
471 
472   // Get stack alignments. The frame must be aligned to the greatest of these:
473   unsigned TargetAlign = getStackAlignment(); // alignment required per the ABI
474   unsigned MaxAlign = MFI.getMaxAlignment(); // algmt required by data in frame
475   unsigned AlignMask = std::max(MaxAlign, TargetAlign) - 1;
476 
477   const PPCRegisterInfo *RegInfo = Subtarget.getRegisterInfo();
478 
479   unsigned LR = RegInfo->getRARegister();
480   bool DisableRedZone = MF.getFunction().hasFnAttribute(Attribute::NoRedZone);
481   bool CanUseRedZone = !MFI.hasVarSizedObjects() && // No dynamic alloca.
482                        !MFI.adjustsStack() &&       // No calls.
483                        !MustSaveLR(MF, LR) &&       // No need to save LR.
484                        !RegInfo->hasBasePointer(MF); // No special alignment.
485 
486   // Note: for PPC32 SVR4ABI (Non-DarwinABI), we can still generate stackless
487   // code if all local vars are reg-allocated.
488   bool FitsInRedZone = FrameSize <= Subtarget.getRedZoneSize();
489 
490   // Check whether we can skip adjusting the stack pointer (by using red zone)
491   if (!DisableRedZone && CanUseRedZone && FitsInRedZone) {
492     // No need for frame
493     return 0;
494   }
495 
496   // Get the maximum call frame size of all the calls.
497   unsigned maxCallFrameSize = MFI.getMaxCallFrameSize();
498 
499   // Maximum call frame needs to be at least big enough for linkage area.
500   unsigned minCallFrameSize = getLinkageSize();
501   maxCallFrameSize = std::max(maxCallFrameSize, minCallFrameSize);
502 
503   // If we have dynamic alloca then maxCallFrameSize needs to be aligned so
504   // that allocations will be aligned.
505   if (MFI.hasVarSizedObjects())
506     maxCallFrameSize = (maxCallFrameSize + AlignMask) & ~AlignMask;
507 
508   // Update the new max call frame size if the caller passes in a valid pointer.
509   if (NewMaxCallFrameSize)
510     *NewMaxCallFrameSize = maxCallFrameSize;
511 
512   // Include call frame size in total.
513   FrameSize += maxCallFrameSize;
514 
515   // Make sure the frame is aligned.
516   FrameSize = (FrameSize + AlignMask) & ~AlignMask;
517 
518   return FrameSize;
519 }
520 
521 // hasFP - Return true if the specified function actually has a dedicated frame
522 // pointer register.
523 bool PPCFrameLowering::hasFP(const MachineFunction &MF) const {
524   const MachineFrameInfo &MFI = MF.getFrameInfo();
525   // FIXME: This is pretty much broken by design: hasFP() might be called really
526   // early, before the stack layout was calculated and thus hasFP() might return
527   // true or false here depending on the time of call.
528   return (MFI.getStackSize()) && needsFP(MF);
529 }
530 
531 // needsFP - Return true if the specified function should have a dedicated frame
532 // pointer register.  This is true if the function has variable sized allocas or
533 // if frame pointer elimination is disabled.
534 bool PPCFrameLowering::needsFP(const MachineFunction &MF) const {
535   const MachineFrameInfo &MFI = MF.getFrameInfo();
536 
537   // Naked functions have no stack frame pushed, so we don't have a frame
538   // pointer.
539   if (MF.getFunction().hasFnAttribute(Attribute::Naked))
540     return false;
541 
542   return MF.getTarget().Options.DisableFramePointerElim(MF) ||
543     MFI.hasVarSizedObjects() || MFI.hasStackMap() || MFI.hasPatchPoint() ||
544     (MF.getTarget().Options.GuaranteedTailCallOpt &&
545      MF.getInfo<PPCFunctionInfo>()->hasFastCall());
546 }
547 
548 void PPCFrameLowering::replaceFPWithRealFP(MachineFunction &MF) const {
549   bool is31 = needsFP(MF);
550   unsigned FPReg  = is31 ? PPC::R31 : PPC::R1;
551   unsigned FP8Reg = is31 ? PPC::X31 : PPC::X1;
552 
553   const PPCRegisterInfo *RegInfo = Subtarget.getRegisterInfo();
554   bool HasBP = RegInfo->hasBasePointer(MF);
555   unsigned BPReg  = HasBP ? (unsigned) RegInfo->getBaseRegister(MF) : FPReg;
556   unsigned BP8Reg = HasBP ? (unsigned) PPC::X30 : FP8Reg;
557 
558   for (MachineFunction::iterator BI = MF.begin(), BE = MF.end();
559        BI != BE; ++BI)
560     for (MachineBasicBlock::iterator MBBI = BI->end(); MBBI != BI->begin(); ) {
561       --MBBI;
562       for (unsigned I = 0, E = MBBI->getNumOperands(); I != E; ++I) {
563         MachineOperand &MO = MBBI->getOperand(I);
564         if (!MO.isReg())
565           continue;
566 
567         switch (MO.getReg()) {
568         case PPC::FP:
569           MO.setReg(FPReg);
570           break;
571         case PPC::FP8:
572           MO.setReg(FP8Reg);
573           break;
574         case PPC::BP:
575           MO.setReg(BPReg);
576           break;
577         case PPC::BP8:
578           MO.setReg(BP8Reg);
579           break;
580 
581         }
582       }
583     }
584 }
585 
586 /*  This function will do the following:
587     - If MBB is an entry or exit block, set SR1 and SR2 to R0 and R12
588       respectively (defaults recommended by the ABI) and return true
589     - If MBB is not an entry block, initialize the register scavenger and look
590       for available registers.
591     - If the defaults (R0/R12) are available, return true
592     - If TwoUniqueRegsRequired is set to true, it looks for two unique
593       registers. Otherwise, look for a single available register.
594       - If the required registers are found, set SR1 and SR2 and return true.
595       - If the required registers are not found, set SR2 or both SR1 and SR2 to
596         PPC::NoRegister and return false.
597 
598     Note that if both SR1 and SR2 are valid parameters and TwoUniqueRegsRequired
599     is not set, this function will attempt to find two different registers, but
600     still return true if only one register is available (and set SR1 == SR2).
601 */
602 bool
603 PPCFrameLowering::findScratchRegister(MachineBasicBlock *MBB,
604                                       bool UseAtEnd,
605                                       bool TwoUniqueRegsRequired,
606                                       unsigned *SR1,
607                                       unsigned *SR2) const {
608   RegScavenger RS;
609   unsigned R0 =  Subtarget.isPPC64() ? PPC::X0 : PPC::R0;
610   unsigned R12 = Subtarget.isPPC64() ? PPC::X12 : PPC::R12;
611 
612   // Set the defaults for the two scratch registers.
613   if (SR1)
614     *SR1 = R0;
615 
616   if (SR2) {
617     assert (SR1 && "Asking for the second scratch register but not the first?");
618     *SR2 = R12;
619   }
620 
621   // If MBB is an entry or exit block, use R0 and R12 as the scratch registers.
622   if ((UseAtEnd && MBB->isReturnBlock()) ||
623       (!UseAtEnd && (&MBB->getParent()->front() == MBB)))
624     return true;
625 
626   RS.enterBasicBlock(*MBB);
627 
628   if (UseAtEnd && !MBB->empty()) {
629     // The scratch register will be used at the end of the block, so must
630     // consider all registers used within the block
631 
632     MachineBasicBlock::iterator MBBI = MBB->getFirstTerminator();
633     // If no terminator, back iterator up to previous instruction.
634     if (MBBI == MBB->end())
635       MBBI = std::prev(MBBI);
636 
637     if (MBBI != MBB->begin())
638       RS.forward(MBBI);
639   }
640 
641   // If the two registers are available, we're all good.
642   // Note that we only return here if both R0 and R12 are available because
643   // although the function may not require two unique registers, it may benefit
644   // from having two so we should try to provide them.
645   if (!RS.isRegUsed(R0) && !RS.isRegUsed(R12))
646     return true;
647 
648   // Get the list of callee-saved registers for the target.
649   const PPCRegisterInfo *RegInfo = Subtarget.getRegisterInfo();
650   const MCPhysReg *CSRegs = RegInfo->getCalleeSavedRegs(MBB->getParent());
651 
652   // Get all the available registers in the block.
653   BitVector BV = RS.getRegsAvailable(Subtarget.isPPC64() ? &PPC::G8RCRegClass :
654                                      &PPC::GPRCRegClass);
655 
656   // We shouldn't use callee-saved registers as scratch registers as they may be
657   // available when looking for a candidate block for shrink wrapping but not
658   // available when the actual prologue/epilogue is being emitted because they
659   // were added as live-in to the prologue block by PrologueEpilogueInserter.
660   for (int i = 0; CSRegs[i]; ++i)
661     BV.reset(CSRegs[i]);
662 
663   // Set the first scratch register to the first available one.
664   if (SR1) {
665     int FirstScratchReg = BV.find_first();
666     *SR1 = FirstScratchReg == -1 ? (unsigned)PPC::NoRegister : FirstScratchReg;
667   }
668 
669   // If there is another one available, set the second scratch register to that.
670   // Otherwise, set it to either PPC::NoRegister if this function requires two
671   // or to whatever SR1 is set to if this function doesn't require two.
672   if (SR2) {
673     int SecondScratchReg = BV.find_next(*SR1);
674     if (SecondScratchReg != -1)
675       *SR2 = SecondScratchReg;
676     else
677       *SR2 = TwoUniqueRegsRequired ? (unsigned)PPC::NoRegister : *SR1;
678   }
679 
680   // Now that we've done our best to provide both registers, double check
681   // whether we were unable to provide enough.
682   if (BV.count() < (TwoUniqueRegsRequired ? 2U : 1U))
683     return false;
684 
685   return true;
686 }
687 
688 // We need a scratch register for spilling LR and for spilling CR. By default,
689 // we use two scratch registers to hide latency. However, if only one scratch
690 // register is available, we can adjust for that by not overlapping the spill
691 // code. However, if we need to realign the stack (i.e. have a base pointer)
692 // and the stack frame is large, we need two scratch registers.
693 bool
694 PPCFrameLowering::twoUniqueScratchRegsRequired(MachineBasicBlock *MBB) const {
695   const PPCRegisterInfo *RegInfo = Subtarget.getRegisterInfo();
696   MachineFunction &MF = *(MBB->getParent());
697   bool HasBP = RegInfo->hasBasePointer(MF);
698   unsigned FrameSize = determineFrameLayout(MF);
699   int NegFrameSize = -FrameSize;
700   bool IsLargeFrame = !isInt<16>(NegFrameSize);
701   MachineFrameInfo &MFI = MF.getFrameInfo();
702   unsigned MaxAlign = MFI.getMaxAlignment();
703   bool HasRedZone = Subtarget.isPPC64() || !Subtarget.isSVR4ABI();
704 
705   return (IsLargeFrame || !HasRedZone) && HasBP && MaxAlign > 1;
706 }
707 
708 bool PPCFrameLowering::canUseAsPrologue(const MachineBasicBlock &MBB) const {
709   MachineBasicBlock *TmpMBB = const_cast<MachineBasicBlock *>(&MBB);
710 
711   return findScratchRegister(TmpMBB, false,
712                              twoUniqueScratchRegsRequired(TmpMBB));
713 }
714 
715 bool PPCFrameLowering::canUseAsEpilogue(const MachineBasicBlock &MBB) const {
716   MachineBasicBlock *TmpMBB = const_cast<MachineBasicBlock *>(&MBB);
717 
718   return findScratchRegister(TmpMBB, true);
719 }
720 
721 bool PPCFrameLowering::stackUpdateCanBeMoved(MachineFunction &MF) const {
722   const PPCRegisterInfo *RegInfo = Subtarget.getRegisterInfo();
723   PPCFunctionInfo *FI = MF.getInfo<PPCFunctionInfo>();
724 
725   // Abort if there is no register info or function info.
726   if (!RegInfo || !FI)
727     return false;
728 
729   // Only move the stack update on ELFv2 ABI and PPC64.
730   if (!Subtarget.isELFv2ABI() || !Subtarget.isPPC64())
731     return false;
732 
733   // Check the frame size first and return false if it does not fit the
734   // requirements.
735   // We need a non-zero frame size as well as a frame that will fit in the red
736   // zone. This is because by moving the stack pointer update we are now storing
737   // to the red zone until the stack pointer is updated. If we get an interrupt
738   // inside the prologue but before the stack update we now have a number of
739   // stores to the red zone and those stores must all fit.
740   MachineFrameInfo &MFI = MF.getFrameInfo();
741   unsigned FrameSize = MFI.getStackSize();
742   if (!FrameSize || FrameSize > Subtarget.getRedZoneSize())
743     return false;
744 
745   // Frame pointers and base pointers complicate matters so don't do anything
746   // if we have them. For example having a frame pointer will sometimes require
747   // a copy of r1 into r31 and that makes keeping track of updates to r1 more
748   // difficult.
749   if (hasFP(MF) || RegInfo->hasBasePointer(MF))
750     return false;
751 
752   // Calls to fast_cc functions use different rules for passing parameters on
753   // the stack from the ABI and using PIC base in the function imposes
754   // similar restrictions to using the base pointer. It is not generally safe
755   // to move the stack pointer update in these situations.
756   if (FI->hasFastCall() || FI->usesPICBase())
757     return false;
758 
759   // Finally we can move the stack update if we do not require regiser
760   // scavenging. Register scavenging can introduce more spills and so
761   // may make the frame size larger than we have computed.
762   return !RegInfo->requiresFrameIndexScavenging(MF);
763 }
764 
765 void PPCFrameLowering::emitPrologue(MachineFunction &MF,
766                                     MachineBasicBlock &MBB) const {
767   MachineBasicBlock::iterator MBBI = MBB.begin();
768   MachineFrameInfo &MFI = MF.getFrameInfo();
769   const PPCInstrInfo &TII = *Subtarget.getInstrInfo();
770   const PPCRegisterInfo *RegInfo = Subtarget.getRegisterInfo();
771 
772   MachineModuleInfo &MMI = MF.getMMI();
773   const MCRegisterInfo *MRI = MMI.getContext().getRegisterInfo();
774   DebugLoc dl;
775   bool needsCFI = MMI.hasDebugInfo() ||
776     MF.getFunction().needsUnwindTableEntry();
777 
778   // Get processor type.
779   bool isPPC64 = Subtarget.isPPC64();
780   // Get the ABI.
781   bool isSVR4ABI = Subtarget.isSVR4ABI();
782   bool isELFv2ABI = Subtarget.isELFv2ABI();
783   assert((Subtarget.isDarwinABI() || isSVR4ABI) &&
784          "Currently only Darwin and SVR4 ABIs are supported for PowerPC.");
785 
786   // Scan the prolog, looking for an UPDATE_VRSAVE instruction.  If we find it,
787   // process it.
788   if (!isSVR4ABI)
789     for (unsigned i = 0; MBBI != MBB.end(); ++i, ++MBBI) {
790       if (MBBI->getOpcode() == PPC::UPDATE_VRSAVE) {
791         HandleVRSaveUpdate(*MBBI, TII);
792         break;
793       }
794     }
795 
796   // Move MBBI back to the beginning of the prologue block.
797   MBBI = MBB.begin();
798 
799   // Work out frame sizes.
800   unsigned FrameSize = determineFrameLayoutAndUpdate(MF);
801   int NegFrameSize = -FrameSize;
802   if (!isInt<32>(NegFrameSize))
803     llvm_unreachable("Unhandled stack size!");
804 
805   if (MFI.isFrameAddressTaken())
806     replaceFPWithRealFP(MF);
807 
808   // Check if the link register (LR) must be saved.
809   PPCFunctionInfo *FI = MF.getInfo<PPCFunctionInfo>();
810   bool MustSaveLR = FI->mustSaveLR();
811   const SmallVectorImpl<unsigned> &MustSaveCRs = FI->getMustSaveCRs();
812   bool MustSaveCR = !MustSaveCRs.empty();
813   // Do we have a frame pointer and/or base pointer for this function?
814   bool HasFP = hasFP(MF);
815   bool HasBP = RegInfo->hasBasePointer(MF);
816   bool HasRedZone = isPPC64 || !isSVR4ABI;
817 
818   unsigned SPReg       = isPPC64 ? PPC::X1  : PPC::R1;
819   unsigned BPReg       = RegInfo->getBaseRegister(MF);
820   unsigned FPReg       = isPPC64 ? PPC::X31 : PPC::R31;
821   unsigned LRReg       = isPPC64 ? PPC::LR8 : PPC::LR;
822   unsigned ScratchReg  = 0;
823   unsigned TempReg     = isPPC64 ? PPC::X12 : PPC::R12; // another scratch reg
824   //  ...(R12/X12 is volatile in both Darwin & SVR4, & can't be a function arg.)
825   const MCInstrDesc& MFLRInst = TII.get(isPPC64 ? PPC::MFLR8
826                                                 : PPC::MFLR );
827   const MCInstrDesc& StoreInst = TII.get(isPPC64 ? PPC::STD
828                                                  : PPC::STW );
829   const MCInstrDesc& StoreUpdtInst = TII.get(isPPC64 ? PPC::STDU
830                                                      : PPC::STWU );
831   const MCInstrDesc& StoreUpdtIdxInst = TII.get(isPPC64 ? PPC::STDUX
832                                                         : PPC::STWUX);
833   const MCInstrDesc& LoadImmShiftedInst = TII.get(isPPC64 ? PPC::LIS8
834                                                           : PPC::LIS );
835   const MCInstrDesc& OrImmInst = TII.get(isPPC64 ? PPC::ORI8
836                                                  : PPC::ORI );
837   const MCInstrDesc& OrInst = TII.get(isPPC64 ? PPC::OR8
838                                               : PPC::OR );
839   const MCInstrDesc& SubtractCarryingInst = TII.get(isPPC64 ? PPC::SUBFC8
840                                                             : PPC::SUBFC);
841   const MCInstrDesc& SubtractImmCarryingInst = TII.get(isPPC64 ? PPC::SUBFIC8
842                                                                : PPC::SUBFIC);
843 
844   // Regarding this assert: Even though LR is saved in the caller's frame (i.e.,
845   // LROffset is positive), that slot is callee-owned. Because PPC32 SVR4 has no
846   // Red Zone, an asynchronous event (a form of "callee") could claim a frame &
847   // overwrite it, so PPC32 SVR4 must claim at least a minimal frame to save LR.
848   assert((isPPC64 || !isSVR4ABI || !(!FrameSize && (MustSaveLR || HasFP))) &&
849          "FrameSize must be >0 to save/restore the FP or LR for 32-bit SVR4.");
850 
851   // Using the same bool variable as below to suppress compiler warnings.
852   bool SingleScratchReg =
853     findScratchRegister(&MBB, false, twoUniqueScratchRegsRequired(&MBB),
854                         &ScratchReg, &TempReg);
855   assert(SingleScratchReg &&
856          "Required number of registers not available in this block");
857 
858   SingleScratchReg = ScratchReg == TempReg;
859 
860   int LROffset = getReturnSaveOffset();
861 
862   int FPOffset = 0;
863   if (HasFP) {
864     if (isSVR4ABI) {
865       MachineFrameInfo &MFI = MF.getFrameInfo();
866       int FPIndex = FI->getFramePointerSaveIndex();
867       assert(FPIndex && "No Frame Pointer Save Slot!");
868       FPOffset = MFI.getObjectOffset(FPIndex);
869     } else {
870       FPOffset = getFramePointerSaveOffset();
871     }
872   }
873 
874   int BPOffset = 0;
875   if (HasBP) {
876     if (isSVR4ABI) {
877       MachineFrameInfo &MFI = MF.getFrameInfo();
878       int BPIndex = FI->getBasePointerSaveIndex();
879       assert(BPIndex && "No Base Pointer Save Slot!");
880       BPOffset = MFI.getObjectOffset(BPIndex);
881     } else {
882       BPOffset = getBasePointerSaveOffset();
883     }
884   }
885 
886   int PBPOffset = 0;
887   if (FI->usesPICBase()) {
888     MachineFrameInfo &MFI = MF.getFrameInfo();
889     int PBPIndex = FI->getPICBasePointerSaveIndex();
890     assert(PBPIndex && "No PIC Base Pointer Save Slot!");
891     PBPOffset = MFI.getObjectOffset(PBPIndex);
892   }
893 
894   // Get stack alignments.
895   unsigned MaxAlign = MFI.getMaxAlignment();
896   if (HasBP && MaxAlign > 1)
897     assert(isPowerOf2_32(MaxAlign) && isInt<16>(MaxAlign) &&
898            "Invalid alignment!");
899 
900   // Frames of 32KB & larger require special handling because they cannot be
901   // indexed into with a simple STDU/STWU/STD/STW immediate offset operand.
902   bool isLargeFrame = !isInt<16>(NegFrameSize);
903 
904   assert((isPPC64 || !MustSaveCR) &&
905          "Prologue CR saving supported only in 64-bit mode");
906 
907   // Check if we can move the stack update instruction (stdu) down the prologue
908   // past the callee saves. Hopefully this will avoid the situation where the
909   // saves are waiting for the update on the store with update to complete.
910   MachineBasicBlock::iterator StackUpdateLoc = MBBI;
911   bool MovingStackUpdateDown = false;
912 
913   // Check if we can move the stack update.
914   if (stackUpdateCanBeMoved(MF)) {
915     const std::vector<CalleeSavedInfo> &Info = MFI.getCalleeSavedInfo();
916     for (CalleeSavedInfo CSI : Info) {
917       int FrIdx = CSI.getFrameIdx();
918       // If the frame index is not negative the callee saved info belongs to a
919       // stack object that is not a fixed stack object. We ignore non-fixed
920       // stack objects because we won't move the stack update pointer past them.
921       if (FrIdx >= 0)
922         continue;
923 
924       if (MFI.isFixedObjectIndex(FrIdx) && MFI.getObjectOffset(FrIdx) < 0) {
925         StackUpdateLoc++;
926         MovingStackUpdateDown = true;
927       } else {
928         // We need all of the Frame Indices to meet these conditions.
929         // If they do not, abort the whole operation.
930         StackUpdateLoc = MBBI;
931         MovingStackUpdateDown = false;
932         break;
933       }
934     }
935 
936     // If the operation was not aborted then update the object offset.
937     if (MovingStackUpdateDown) {
938       for (CalleeSavedInfo CSI : Info) {
939         int FrIdx = CSI.getFrameIdx();
940         if (FrIdx < 0)
941           MFI.setObjectOffset(FrIdx, MFI.getObjectOffset(FrIdx) + NegFrameSize);
942       }
943     }
944   }
945 
946   // If we need to spill the CR and the LR but we don't have two separate
947   // registers available, we must spill them one at a time
948   if (MustSaveCR && SingleScratchReg && MustSaveLR) {
949     // In the ELFv2 ABI, we are not required to save all CR fields.
950     // If only one or two CR fields are clobbered, it is more efficient to use
951     // mfocrf to selectively save just those fields, because mfocrf has short
952     // latency compares to mfcr.
953     unsigned MfcrOpcode = PPC::MFCR8;
954     unsigned CrState = RegState::ImplicitKill;
955     if (isELFv2ABI && MustSaveCRs.size() == 1) {
956       MfcrOpcode = PPC::MFOCRF8;
957       CrState = RegState::Kill;
958     }
959     MachineInstrBuilder MIB =
960       BuildMI(MBB, MBBI, dl, TII.get(MfcrOpcode), TempReg);
961     for (unsigned i = 0, e = MustSaveCRs.size(); i != e; ++i)
962       MIB.addReg(MustSaveCRs[i], CrState);
963     BuildMI(MBB, MBBI, dl, TII.get(PPC::STW8))
964       .addReg(TempReg, getKillRegState(true))
965       .addImm(8)
966       .addReg(SPReg);
967   }
968 
969   if (MustSaveLR)
970     BuildMI(MBB, MBBI, dl, MFLRInst, ScratchReg);
971 
972   if (MustSaveCR &&
973       !(SingleScratchReg && MustSaveLR)) { // will only occur for PPC64
974     // In the ELFv2 ABI, we are not required to save all CR fields.
975     // If only one or two CR fields are clobbered, it is more efficient to use
976     // mfocrf to selectively save just those fields, because mfocrf has short
977     // latency compares to mfcr.
978     unsigned MfcrOpcode = PPC::MFCR8;
979     unsigned CrState = RegState::ImplicitKill;
980     if (isELFv2ABI && MustSaveCRs.size() == 1) {
981       MfcrOpcode = PPC::MFOCRF8;
982       CrState = RegState::Kill;
983     }
984     MachineInstrBuilder MIB =
985       BuildMI(MBB, MBBI, dl, TII.get(MfcrOpcode), TempReg);
986     for (unsigned i = 0, e = MustSaveCRs.size(); i != e; ++i)
987       MIB.addReg(MustSaveCRs[i], CrState);
988   }
989 
990   if (HasRedZone) {
991     if (HasFP)
992       BuildMI(MBB, MBBI, dl, StoreInst)
993         .addReg(FPReg)
994         .addImm(FPOffset)
995         .addReg(SPReg);
996     if (FI->usesPICBase())
997       BuildMI(MBB, MBBI, dl, StoreInst)
998         .addReg(PPC::R30)
999         .addImm(PBPOffset)
1000         .addReg(SPReg);
1001     if (HasBP)
1002       BuildMI(MBB, MBBI, dl, StoreInst)
1003         .addReg(BPReg)
1004         .addImm(BPOffset)
1005         .addReg(SPReg);
1006   }
1007 
1008   if (MustSaveLR)
1009     BuildMI(MBB, StackUpdateLoc, dl, StoreInst)
1010       .addReg(ScratchReg, getKillRegState(true))
1011       .addImm(LROffset)
1012       .addReg(SPReg);
1013 
1014   if (MustSaveCR &&
1015       !(SingleScratchReg && MustSaveLR)) { // will only occur for PPC64
1016     assert(HasRedZone && "A red zone is always available on PPC64");
1017     BuildMI(MBB, MBBI, dl, TII.get(PPC::STW8))
1018       .addReg(TempReg, getKillRegState(true))
1019       .addImm(8)
1020       .addReg(SPReg);
1021   }
1022 
1023   // Skip the rest if this is a leaf function & all spills fit in the Red Zone.
1024   if (!FrameSize)
1025     return;
1026 
1027   // Adjust stack pointer: r1 += NegFrameSize.
1028   // If there is a preferred stack alignment, align R1 now
1029 
1030   if (HasBP && HasRedZone) {
1031     // Save a copy of r1 as the base pointer.
1032     BuildMI(MBB, MBBI, dl, OrInst, BPReg)
1033       .addReg(SPReg)
1034       .addReg(SPReg);
1035   }
1036 
1037   // Have we generated a STUX instruction to claim stack frame? If so,
1038   // the negated frame size will be placed in ScratchReg.
1039   bool HasSTUX = false;
1040 
1041   // This condition must be kept in sync with canUseAsPrologue.
1042   if (HasBP && MaxAlign > 1) {
1043     if (isPPC64)
1044       BuildMI(MBB, MBBI, dl, TII.get(PPC::RLDICL), ScratchReg)
1045         .addReg(SPReg)
1046         .addImm(0)
1047         .addImm(64 - Log2_32(MaxAlign));
1048     else // PPC32...
1049       BuildMI(MBB, MBBI, dl, TII.get(PPC::RLWINM), ScratchReg)
1050         .addReg(SPReg)
1051         .addImm(0)
1052         .addImm(32 - Log2_32(MaxAlign))
1053         .addImm(31);
1054     if (!isLargeFrame) {
1055       BuildMI(MBB, MBBI, dl, SubtractImmCarryingInst, ScratchReg)
1056         .addReg(ScratchReg, RegState::Kill)
1057         .addImm(NegFrameSize);
1058     } else {
1059       assert(!SingleScratchReg && "Only a single scratch reg available");
1060       BuildMI(MBB, MBBI, dl, LoadImmShiftedInst, TempReg)
1061         .addImm(NegFrameSize >> 16);
1062       BuildMI(MBB, MBBI, dl, OrImmInst, TempReg)
1063         .addReg(TempReg, RegState::Kill)
1064         .addImm(NegFrameSize & 0xFFFF);
1065       BuildMI(MBB, MBBI, dl, SubtractCarryingInst, ScratchReg)
1066         .addReg(ScratchReg, RegState::Kill)
1067         .addReg(TempReg, RegState::Kill);
1068     }
1069 
1070     BuildMI(MBB, MBBI, dl, StoreUpdtIdxInst, SPReg)
1071       .addReg(SPReg, RegState::Kill)
1072       .addReg(SPReg)
1073       .addReg(ScratchReg);
1074     HasSTUX = true;
1075 
1076   } else if (!isLargeFrame) {
1077     BuildMI(MBB, StackUpdateLoc, dl, StoreUpdtInst, SPReg)
1078       .addReg(SPReg)
1079       .addImm(NegFrameSize)
1080       .addReg(SPReg);
1081 
1082   } else {
1083     BuildMI(MBB, MBBI, dl, LoadImmShiftedInst, ScratchReg)
1084       .addImm(NegFrameSize >> 16);
1085     BuildMI(MBB, MBBI, dl, OrImmInst, ScratchReg)
1086       .addReg(ScratchReg, RegState::Kill)
1087       .addImm(NegFrameSize & 0xFFFF);
1088     BuildMI(MBB, MBBI, dl, StoreUpdtIdxInst, SPReg)
1089       .addReg(SPReg, RegState::Kill)
1090       .addReg(SPReg)
1091       .addReg(ScratchReg);
1092     HasSTUX = true;
1093   }
1094 
1095   if (!HasRedZone) {
1096     assert(!isPPC64 && "A red zone is always available on PPC64");
1097     if (HasSTUX) {
1098       // The negated frame size is in ScratchReg, and the SPReg has been
1099       // decremented by the frame size: SPReg = old SPReg + ScratchReg.
1100       // Since FPOffset, PBPOffset, etc. are relative to the beginning of
1101       // the stack frame (i.e. the old SP), ideally, we would put the old
1102       // SP into a register and use it as the base for the stores. The
1103       // problem is that the only available register may be ScratchReg,
1104       // which could be R0, and R0 cannot be used as a base address.
1105 
1106       // First, set ScratchReg to the old SP. This may need to be modified
1107       // later.
1108       BuildMI(MBB, MBBI, dl, TII.get(PPC::SUBF), ScratchReg)
1109         .addReg(ScratchReg, RegState::Kill)
1110         .addReg(SPReg);
1111 
1112       if (ScratchReg == PPC::R0) {
1113         // R0 cannot be used as a base register, but it can be used as an
1114         // index in a store-indexed.
1115         int LastOffset = 0;
1116         if (HasFP)  {
1117           // R0 += (FPOffset-LastOffset).
1118           // Need addic, since addi treats R0 as 0.
1119           BuildMI(MBB, MBBI, dl, TII.get(PPC::ADDIC), ScratchReg)
1120             .addReg(ScratchReg)
1121             .addImm(FPOffset-LastOffset);
1122           LastOffset = FPOffset;
1123           // Store FP into *R0.
1124           BuildMI(MBB, MBBI, dl, TII.get(PPC::STWX))
1125             .addReg(FPReg, RegState::Kill)  // Save FP.
1126             .addReg(PPC::ZERO)
1127             .addReg(ScratchReg);  // This will be the index (R0 is ok here).
1128         }
1129         if (FI->usesPICBase()) {
1130           // R0 += (PBPOffset-LastOffset).
1131           BuildMI(MBB, MBBI, dl, TII.get(PPC::ADDIC), ScratchReg)
1132             .addReg(ScratchReg)
1133             .addImm(PBPOffset-LastOffset);
1134           LastOffset = PBPOffset;
1135           BuildMI(MBB, MBBI, dl, TII.get(PPC::STWX))
1136             .addReg(PPC::R30, RegState::Kill)  // Save PIC base pointer.
1137             .addReg(PPC::ZERO)
1138             .addReg(ScratchReg);  // This will be the index (R0 is ok here).
1139         }
1140         if (HasBP) {
1141           // R0 += (BPOffset-LastOffset).
1142           BuildMI(MBB, MBBI, dl, TII.get(PPC::ADDIC), ScratchReg)
1143             .addReg(ScratchReg)
1144             .addImm(BPOffset-LastOffset);
1145           LastOffset = BPOffset;
1146           BuildMI(MBB, MBBI, dl, TII.get(PPC::STWX))
1147             .addReg(BPReg, RegState::Kill)  // Save BP.
1148             .addReg(PPC::ZERO)
1149             .addReg(ScratchReg);  // This will be the index (R0 is ok here).
1150           // BP = R0-LastOffset
1151           BuildMI(MBB, MBBI, dl, TII.get(PPC::ADDIC), BPReg)
1152             .addReg(ScratchReg, RegState::Kill)
1153             .addImm(-LastOffset);
1154         }
1155       } else {
1156         // ScratchReg is not R0, so use it as the base register. It is
1157         // already set to the old SP, so we can use the offsets directly.
1158 
1159         // Now that the stack frame has been allocated, save all the necessary
1160         // registers using ScratchReg as the base address.
1161         if (HasFP)
1162           BuildMI(MBB, MBBI, dl, StoreInst)
1163             .addReg(FPReg)
1164             .addImm(FPOffset)
1165             .addReg(ScratchReg);
1166         if (FI->usesPICBase())
1167           BuildMI(MBB, MBBI, dl, StoreInst)
1168             .addReg(PPC::R30)
1169             .addImm(PBPOffset)
1170             .addReg(ScratchReg);
1171         if (HasBP) {
1172           BuildMI(MBB, MBBI, dl, StoreInst)
1173             .addReg(BPReg)
1174             .addImm(BPOffset)
1175             .addReg(ScratchReg);
1176           BuildMI(MBB, MBBI, dl, OrInst, BPReg)
1177             .addReg(ScratchReg, RegState::Kill)
1178             .addReg(ScratchReg);
1179         }
1180       }
1181     } else {
1182       // The frame size is a known 16-bit constant (fitting in the immediate
1183       // field of STWU). To be here we have to be compiling for PPC32.
1184       // Since the SPReg has been decreased by FrameSize, add it back to each
1185       // offset.
1186       if (HasFP)
1187         BuildMI(MBB, MBBI, dl, StoreInst)
1188           .addReg(FPReg)
1189           .addImm(FrameSize + FPOffset)
1190           .addReg(SPReg);
1191       if (FI->usesPICBase())
1192         BuildMI(MBB, MBBI, dl, StoreInst)
1193           .addReg(PPC::R30)
1194           .addImm(FrameSize + PBPOffset)
1195           .addReg(SPReg);
1196       if (HasBP) {
1197         BuildMI(MBB, MBBI, dl, StoreInst)
1198           .addReg(BPReg)
1199           .addImm(FrameSize + BPOffset)
1200           .addReg(SPReg);
1201         BuildMI(MBB, MBBI, dl, TII.get(PPC::ADDI), BPReg)
1202           .addReg(SPReg)
1203           .addImm(FrameSize);
1204       }
1205     }
1206   }
1207 
1208   // Add Call Frame Information for the instructions we generated above.
1209   if (needsCFI) {
1210     unsigned CFIIndex;
1211 
1212     if (HasBP) {
1213       // Define CFA in terms of BP. Do this in preference to using FP/SP,
1214       // because if the stack needed aligning then CFA won't be at a fixed
1215       // offset from FP/SP.
1216       unsigned Reg = MRI->getDwarfRegNum(BPReg, true);
1217       CFIIndex = MF.addFrameInst(
1218           MCCFIInstruction::createDefCfaRegister(nullptr, Reg));
1219     } else {
1220       // Adjust the definition of CFA to account for the change in SP.
1221       assert(NegFrameSize);
1222       CFIIndex = MF.addFrameInst(
1223           MCCFIInstruction::createDefCfaOffset(nullptr, NegFrameSize));
1224     }
1225     BuildMI(MBB, MBBI, dl, TII.get(TargetOpcode::CFI_INSTRUCTION))
1226         .addCFIIndex(CFIIndex);
1227 
1228     if (HasFP) {
1229       // Describe where FP was saved, at a fixed offset from CFA.
1230       unsigned Reg = MRI->getDwarfRegNum(FPReg, true);
1231       CFIIndex = MF.addFrameInst(
1232           MCCFIInstruction::createOffset(nullptr, Reg, FPOffset));
1233       BuildMI(MBB, MBBI, dl, TII.get(TargetOpcode::CFI_INSTRUCTION))
1234           .addCFIIndex(CFIIndex);
1235     }
1236 
1237     if (FI->usesPICBase()) {
1238       // Describe where FP was saved, at a fixed offset from CFA.
1239       unsigned Reg = MRI->getDwarfRegNum(PPC::R30, true);
1240       CFIIndex = MF.addFrameInst(
1241           MCCFIInstruction::createOffset(nullptr, Reg, PBPOffset));
1242       BuildMI(MBB, MBBI, dl, TII.get(TargetOpcode::CFI_INSTRUCTION))
1243           .addCFIIndex(CFIIndex);
1244     }
1245 
1246     if (HasBP) {
1247       // Describe where BP was saved, at a fixed offset from CFA.
1248       unsigned Reg = MRI->getDwarfRegNum(BPReg, true);
1249       CFIIndex = MF.addFrameInst(
1250           MCCFIInstruction::createOffset(nullptr, Reg, BPOffset));
1251       BuildMI(MBB, MBBI, dl, TII.get(TargetOpcode::CFI_INSTRUCTION))
1252           .addCFIIndex(CFIIndex);
1253     }
1254 
1255     if (MustSaveLR) {
1256       // Describe where LR was saved, at a fixed offset from CFA.
1257       unsigned Reg = MRI->getDwarfRegNum(LRReg, true);
1258       CFIIndex = MF.addFrameInst(
1259           MCCFIInstruction::createOffset(nullptr, Reg, LROffset));
1260       BuildMI(MBB, MBBI, dl, TII.get(TargetOpcode::CFI_INSTRUCTION))
1261           .addCFIIndex(CFIIndex);
1262     }
1263   }
1264 
1265   // If there is a frame pointer, copy R1 into R31
1266   if (HasFP) {
1267     BuildMI(MBB, MBBI, dl, OrInst, FPReg)
1268       .addReg(SPReg)
1269       .addReg(SPReg);
1270 
1271     if (!HasBP && needsCFI) {
1272       // Change the definition of CFA from SP+offset to FP+offset, because SP
1273       // will change at every alloca.
1274       unsigned Reg = MRI->getDwarfRegNum(FPReg, true);
1275       unsigned CFIIndex = MF.addFrameInst(
1276           MCCFIInstruction::createDefCfaRegister(nullptr, Reg));
1277 
1278       BuildMI(MBB, MBBI, dl, TII.get(TargetOpcode::CFI_INSTRUCTION))
1279           .addCFIIndex(CFIIndex);
1280     }
1281   }
1282 
1283   if (needsCFI) {
1284     // Describe where callee saved registers were saved, at fixed offsets from
1285     // CFA.
1286     const std::vector<CalleeSavedInfo> &CSI = MFI.getCalleeSavedInfo();
1287     for (unsigned I = 0, E = CSI.size(); I != E; ++I) {
1288       unsigned Reg = CSI[I].getReg();
1289       if (Reg == PPC::LR || Reg == PPC::LR8 || Reg == PPC::RM) continue;
1290 
1291       // This is a bit of a hack: CR2LT, CR2GT, CR2EQ and CR2UN are just
1292       // subregisters of CR2. We just need to emit a move of CR2.
1293       if (PPC::CRBITRCRegClass.contains(Reg))
1294         continue;
1295 
1296       // For SVR4, don't emit a move for the CR spill slot if we haven't
1297       // spilled CRs.
1298       if (isSVR4ABI && (PPC::CR2 <= Reg && Reg <= PPC::CR4)
1299           && !MustSaveCR)
1300         continue;
1301 
1302       // For 64-bit SVR4 when we have spilled CRs, the spill location
1303       // is SP+8, not a frame-relative slot.
1304       if (isSVR4ABI && isPPC64 && (PPC::CR2 <= Reg && Reg <= PPC::CR4)) {
1305         // In the ELFv1 ABI, only CR2 is noted in CFI and stands in for
1306         // the whole CR word.  In the ELFv2 ABI, every CR that was
1307         // actually saved gets its own CFI record.
1308         unsigned CRReg = isELFv2ABI? Reg : (unsigned) PPC::CR2;
1309         unsigned CFIIndex = MF.addFrameInst(MCCFIInstruction::createOffset(
1310             nullptr, MRI->getDwarfRegNum(CRReg, true), 8));
1311         BuildMI(MBB, MBBI, dl, TII.get(TargetOpcode::CFI_INSTRUCTION))
1312             .addCFIIndex(CFIIndex);
1313         continue;
1314       }
1315 
1316       if (CSI[I].isSpilledToReg()) {
1317         unsigned SpilledReg = CSI[I].getDstReg();
1318         unsigned CFIRegister = MF.addFrameInst(MCCFIInstruction::createRegister(
1319             nullptr, MRI->getDwarfRegNum(Reg, true),
1320             MRI->getDwarfRegNum(SpilledReg, true)));
1321         BuildMI(MBB, MBBI, dl, TII.get(TargetOpcode::CFI_INSTRUCTION))
1322           .addCFIIndex(CFIRegister);
1323       } else {
1324         int Offset = MFI.getObjectOffset(CSI[I].getFrameIdx());
1325         // We have changed the object offset above but we do not want to change
1326         // the actual offsets in the CFI instruction so we have to undo the
1327         // offset change here.
1328         if (MovingStackUpdateDown)
1329           Offset -= NegFrameSize;
1330 
1331         unsigned CFIIndex = MF.addFrameInst(MCCFIInstruction::createOffset(
1332             nullptr, MRI->getDwarfRegNum(Reg, true), Offset));
1333         BuildMI(MBB, MBBI, dl, TII.get(TargetOpcode::CFI_INSTRUCTION))
1334             .addCFIIndex(CFIIndex);
1335       }
1336     }
1337   }
1338 }
1339 
1340 void PPCFrameLowering::emitEpilogue(MachineFunction &MF,
1341                                     MachineBasicBlock &MBB) const {
1342   MachineBasicBlock::iterator MBBI = MBB.getFirstTerminator();
1343   DebugLoc dl;
1344 
1345   if (MBBI != MBB.end())
1346     dl = MBBI->getDebugLoc();
1347 
1348   const PPCInstrInfo &TII = *Subtarget.getInstrInfo();
1349   const PPCRegisterInfo *RegInfo = Subtarget.getRegisterInfo();
1350 
1351   // Get alignment info so we know how to restore the SP.
1352   const MachineFrameInfo &MFI = MF.getFrameInfo();
1353 
1354   // Get the number of bytes allocated from the FrameInfo.
1355   int FrameSize = MFI.getStackSize();
1356 
1357   // Get processor type.
1358   bool isPPC64 = Subtarget.isPPC64();
1359   // Get the ABI.
1360   bool isSVR4ABI = Subtarget.isSVR4ABI();
1361 
1362   // Check if the link register (LR) has been saved.
1363   PPCFunctionInfo *FI = MF.getInfo<PPCFunctionInfo>();
1364   bool MustSaveLR = FI->mustSaveLR();
1365   const SmallVectorImpl<unsigned> &MustSaveCRs = FI->getMustSaveCRs();
1366   bool MustSaveCR = !MustSaveCRs.empty();
1367   // Do we have a frame pointer and/or base pointer for this function?
1368   bool HasFP = hasFP(MF);
1369   bool HasBP = RegInfo->hasBasePointer(MF);
1370   bool HasRedZone = Subtarget.isPPC64() || !Subtarget.isSVR4ABI();
1371 
1372   unsigned SPReg      = isPPC64 ? PPC::X1  : PPC::R1;
1373   unsigned BPReg      = RegInfo->getBaseRegister(MF);
1374   unsigned FPReg      = isPPC64 ? PPC::X31 : PPC::R31;
1375   unsigned ScratchReg = 0;
1376   unsigned TempReg     = isPPC64 ? PPC::X12 : PPC::R12; // another scratch reg
1377   const MCInstrDesc& MTLRInst = TII.get( isPPC64 ? PPC::MTLR8
1378                                                  : PPC::MTLR );
1379   const MCInstrDesc& LoadInst = TII.get( isPPC64 ? PPC::LD
1380                                                  : PPC::LWZ );
1381   const MCInstrDesc& LoadImmShiftedInst = TII.get( isPPC64 ? PPC::LIS8
1382                                                            : PPC::LIS );
1383   const MCInstrDesc& OrInst = TII.get(isPPC64 ? PPC::OR8
1384                                               : PPC::OR );
1385   const MCInstrDesc& OrImmInst = TII.get( isPPC64 ? PPC::ORI8
1386                                                   : PPC::ORI );
1387   const MCInstrDesc& AddImmInst = TII.get( isPPC64 ? PPC::ADDI8
1388                                                    : PPC::ADDI );
1389   const MCInstrDesc& AddInst = TII.get( isPPC64 ? PPC::ADD8
1390                                                 : PPC::ADD4 );
1391 
1392   int LROffset = getReturnSaveOffset();
1393 
1394   int FPOffset = 0;
1395 
1396   // Using the same bool variable as below to suppress compiler warnings.
1397   bool SingleScratchReg = findScratchRegister(&MBB, true, false, &ScratchReg,
1398                                               &TempReg);
1399   assert(SingleScratchReg &&
1400          "Could not find an available scratch register");
1401 
1402   SingleScratchReg = ScratchReg == TempReg;
1403 
1404   if (HasFP) {
1405     if (isSVR4ABI) {
1406       int FPIndex = FI->getFramePointerSaveIndex();
1407       assert(FPIndex && "No Frame Pointer Save Slot!");
1408       FPOffset = MFI.getObjectOffset(FPIndex);
1409     } else {
1410       FPOffset = getFramePointerSaveOffset();
1411     }
1412   }
1413 
1414   int BPOffset = 0;
1415   if (HasBP) {
1416     if (isSVR4ABI) {
1417       int BPIndex = FI->getBasePointerSaveIndex();
1418       assert(BPIndex && "No Base Pointer Save Slot!");
1419       BPOffset = MFI.getObjectOffset(BPIndex);
1420     } else {
1421       BPOffset = getBasePointerSaveOffset();
1422     }
1423   }
1424 
1425   int PBPOffset = 0;
1426   if (FI->usesPICBase()) {
1427     int PBPIndex = FI->getPICBasePointerSaveIndex();
1428     assert(PBPIndex && "No PIC Base Pointer Save Slot!");
1429     PBPOffset = MFI.getObjectOffset(PBPIndex);
1430   }
1431 
1432   bool IsReturnBlock = (MBBI != MBB.end() && MBBI->isReturn());
1433 
1434   if (IsReturnBlock) {
1435     unsigned RetOpcode = MBBI->getOpcode();
1436     bool UsesTCRet =  RetOpcode == PPC::TCRETURNri ||
1437                       RetOpcode == PPC::TCRETURNdi ||
1438                       RetOpcode == PPC::TCRETURNai ||
1439                       RetOpcode == PPC::TCRETURNri8 ||
1440                       RetOpcode == PPC::TCRETURNdi8 ||
1441                       RetOpcode == PPC::TCRETURNai8;
1442 
1443     if (UsesTCRet) {
1444       int MaxTCRetDelta = FI->getTailCallSPDelta();
1445       MachineOperand &StackAdjust = MBBI->getOperand(1);
1446       assert(StackAdjust.isImm() && "Expecting immediate value.");
1447       // Adjust stack pointer.
1448       int StackAdj = StackAdjust.getImm();
1449       int Delta = StackAdj - MaxTCRetDelta;
1450       assert((Delta >= 0) && "Delta must be positive");
1451       if (MaxTCRetDelta>0)
1452         FrameSize += (StackAdj +Delta);
1453       else
1454         FrameSize += StackAdj;
1455     }
1456   }
1457 
1458   // Frames of 32KB & larger require special handling because they cannot be
1459   // indexed into with a simple LD/LWZ immediate offset operand.
1460   bool isLargeFrame = !isInt<16>(FrameSize);
1461 
1462   // On targets without red zone, the SP needs to be restored last, so that
1463   // all live contents of the stack frame are upwards of the SP. This means
1464   // that we cannot restore SP just now, since there may be more registers
1465   // to restore from the stack frame (e.g. R31). If the frame size is not
1466   // a simple immediate value, we will need a spare register to hold the
1467   // restored SP. If the frame size is known and small, we can simply adjust
1468   // the offsets of the registers to be restored, and still use SP to restore
1469   // them. In such case, the final update of SP will be to add the frame
1470   // size to it.
1471   // To simplify the code, set RBReg to the base register used to restore
1472   // values from the stack, and set SPAdd to the value that needs to be added
1473   // to the SP at the end. The default values are as if red zone was present.
1474   unsigned RBReg = SPReg;
1475   unsigned SPAdd = 0;
1476 
1477   // Check if we can move the stack update instruction up the epilogue
1478   // past the callee saves. This will allow the move to LR instruction
1479   // to be executed before the restores of the callee saves which means
1480   // that the callee saves can hide the latency from the MTLR instrcution.
1481   MachineBasicBlock::iterator StackUpdateLoc = MBBI;
1482   if (stackUpdateCanBeMoved(MF)) {
1483     const std::vector<CalleeSavedInfo> & Info = MFI.getCalleeSavedInfo();
1484     for (CalleeSavedInfo CSI : Info) {
1485       int FrIdx = CSI.getFrameIdx();
1486       // If the frame index is not negative the callee saved info belongs to a
1487       // stack object that is not a fixed stack object. We ignore non-fixed
1488       // stack objects because we won't move the update of the stack pointer
1489       // past them.
1490       if (FrIdx >= 0)
1491         continue;
1492 
1493       if (MFI.isFixedObjectIndex(FrIdx) && MFI.getObjectOffset(FrIdx) < 0)
1494         StackUpdateLoc--;
1495       else {
1496         // Abort the operation as we can't update all CSR restores.
1497         StackUpdateLoc = MBBI;
1498         break;
1499       }
1500     }
1501   }
1502 
1503   if (FrameSize) {
1504     // In the prologue, the loaded (or persistent) stack pointer value is
1505     // offset by the STDU/STDUX/STWU/STWUX instruction. For targets with red
1506     // zone add this offset back now.
1507 
1508     // If this function contained a fastcc call and GuaranteedTailCallOpt is
1509     // enabled (=> hasFastCall()==true) the fastcc call might contain a tail
1510     // call which invalidates the stack pointer value in SP(0). So we use the
1511     // value of R31 in this case.
1512     if (FI->hasFastCall()) {
1513       assert(HasFP && "Expecting a valid frame pointer.");
1514       if (!HasRedZone)
1515         RBReg = FPReg;
1516       if (!isLargeFrame) {
1517         BuildMI(MBB, MBBI, dl, AddImmInst, RBReg)
1518           .addReg(FPReg).addImm(FrameSize);
1519       } else {
1520         BuildMI(MBB, MBBI, dl, LoadImmShiftedInst, ScratchReg)
1521           .addImm(FrameSize >> 16);
1522         BuildMI(MBB, MBBI, dl, OrImmInst, ScratchReg)
1523           .addReg(ScratchReg, RegState::Kill)
1524           .addImm(FrameSize & 0xFFFF);
1525         BuildMI(MBB, MBBI, dl, AddInst)
1526           .addReg(RBReg)
1527           .addReg(FPReg)
1528           .addReg(ScratchReg);
1529       }
1530     } else if (!isLargeFrame && !HasBP && !MFI.hasVarSizedObjects()) {
1531       if (HasRedZone) {
1532         BuildMI(MBB, StackUpdateLoc, dl, AddImmInst, SPReg)
1533           .addReg(SPReg)
1534           .addImm(FrameSize);
1535       } else {
1536         // Make sure that adding FrameSize will not overflow the max offset
1537         // size.
1538         assert(FPOffset <= 0 && BPOffset <= 0 && PBPOffset <= 0 &&
1539                "Local offsets should be negative");
1540         SPAdd = FrameSize;
1541         FPOffset += FrameSize;
1542         BPOffset += FrameSize;
1543         PBPOffset += FrameSize;
1544       }
1545     } else {
1546       // We don't want to use ScratchReg as a base register, because it
1547       // could happen to be R0. Use FP instead, but make sure to preserve it.
1548       if (!HasRedZone) {
1549         // If FP is not saved, copy it to ScratchReg.
1550         if (!HasFP)
1551           BuildMI(MBB, MBBI, dl, OrInst, ScratchReg)
1552             .addReg(FPReg)
1553             .addReg(FPReg);
1554         RBReg = FPReg;
1555       }
1556       BuildMI(MBB, StackUpdateLoc, dl, LoadInst, RBReg)
1557         .addImm(0)
1558         .addReg(SPReg);
1559     }
1560   }
1561   assert(RBReg != ScratchReg && "Should have avoided ScratchReg");
1562   // If there is no red zone, ScratchReg may be needed for holding a useful
1563   // value (although not the base register). Make sure it is not overwritten
1564   // too early.
1565 
1566   assert((isPPC64 || !MustSaveCR) &&
1567          "Epilogue CR restoring supported only in 64-bit mode");
1568 
1569   // If we need to restore both the LR and the CR and we only have one
1570   // available scratch register, we must do them one at a time.
1571   if (MustSaveCR && SingleScratchReg && MustSaveLR) {
1572     // Here TempReg == ScratchReg, and in the absence of red zone ScratchReg
1573     // is live here.
1574     assert(HasRedZone && "Expecting red zone");
1575     BuildMI(MBB, MBBI, dl, TII.get(PPC::LWZ8), TempReg)
1576       .addImm(8)
1577       .addReg(SPReg);
1578     for (unsigned i = 0, e = MustSaveCRs.size(); i != e; ++i)
1579       BuildMI(MBB, MBBI, dl, TII.get(PPC::MTOCRF8), MustSaveCRs[i])
1580         .addReg(TempReg, getKillRegState(i == e-1));
1581   }
1582 
1583   // Delay restoring of the LR if ScratchReg is needed. This is ok, since
1584   // LR is stored in the caller's stack frame. ScratchReg will be needed
1585   // if RBReg is anything other than SP. We shouldn't use ScratchReg as
1586   // a base register anyway, because it may happen to be R0.
1587   bool LoadedLR = false;
1588   if (MustSaveLR && RBReg == SPReg && isInt<16>(LROffset+SPAdd)) {
1589     BuildMI(MBB, StackUpdateLoc, dl, LoadInst, ScratchReg)
1590       .addImm(LROffset+SPAdd)
1591       .addReg(RBReg);
1592     LoadedLR = true;
1593   }
1594 
1595   if (MustSaveCR && !(SingleScratchReg && MustSaveLR)) {
1596     // This will only occur for PPC64.
1597     assert(isPPC64 && "Expecting 64-bit mode");
1598     assert(RBReg == SPReg && "Should be using SP as a base register");
1599     BuildMI(MBB, MBBI, dl, TII.get(PPC::LWZ8), TempReg)
1600       .addImm(8)
1601       .addReg(RBReg);
1602   }
1603 
1604   if (HasFP) {
1605     // If there is red zone, restore FP directly, since SP has already been
1606     // restored. Otherwise, restore the value of FP into ScratchReg.
1607     if (HasRedZone || RBReg == SPReg)
1608       BuildMI(MBB, MBBI, dl, LoadInst, FPReg)
1609         .addImm(FPOffset)
1610         .addReg(SPReg);
1611     else
1612       BuildMI(MBB, MBBI, dl, LoadInst, ScratchReg)
1613         .addImm(FPOffset)
1614         .addReg(RBReg);
1615   }
1616 
1617   if (FI->usesPICBase())
1618     BuildMI(MBB, MBBI, dl, LoadInst, PPC::R30)
1619       .addImm(PBPOffset)
1620       .addReg(RBReg);
1621 
1622   if (HasBP)
1623     BuildMI(MBB, MBBI, dl, LoadInst, BPReg)
1624       .addImm(BPOffset)
1625       .addReg(RBReg);
1626 
1627   // There is nothing more to be loaded from the stack, so now we can
1628   // restore SP: SP = RBReg + SPAdd.
1629   if (RBReg != SPReg || SPAdd != 0) {
1630     assert(!HasRedZone && "This should not happen with red zone");
1631     // If SPAdd is 0, generate a copy.
1632     if (SPAdd == 0)
1633       BuildMI(MBB, MBBI, dl, OrInst, SPReg)
1634         .addReg(RBReg)
1635         .addReg(RBReg);
1636     else
1637       BuildMI(MBB, MBBI, dl, AddImmInst, SPReg)
1638         .addReg(RBReg)
1639         .addImm(SPAdd);
1640 
1641     assert(RBReg != ScratchReg && "Should be using FP or SP as base register");
1642     if (RBReg == FPReg)
1643       BuildMI(MBB, MBBI, dl, OrInst, FPReg)
1644         .addReg(ScratchReg)
1645         .addReg(ScratchReg);
1646 
1647     // Now load the LR from the caller's stack frame.
1648     if (MustSaveLR && !LoadedLR)
1649       BuildMI(MBB, MBBI, dl, LoadInst, ScratchReg)
1650         .addImm(LROffset)
1651         .addReg(SPReg);
1652   }
1653 
1654   if (MustSaveCR &&
1655       !(SingleScratchReg && MustSaveLR)) // will only occur for PPC64
1656     for (unsigned i = 0, e = MustSaveCRs.size(); i != e; ++i)
1657       BuildMI(MBB, MBBI, dl, TII.get(PPC::MTOCRF8), MustSaveCRs[i])
1658         .addReg(TempReg, getKillRegState(i == e-1));
1659 
1660   if (MustSaveLR)
1661     BuildMI(MBB, StackUpdateLoc, dl, MTLRInst).addReg(ScratchReg);
1662 
1663   // Callee pop calling convention. Pop parameter/linkage area. Used for tail
1664   // call optimization
1665   if (IsReturnBlock) {
1666     unsigned RetOpcode = MBBI->getOpcode();
1667     if (MF.getTarget().Options.GuaranteedTailCallOpt &&
1668         (RetOpcode == PPC::BLR || RetOpcode == PPC::BLR8) &&
1669         MF.getFunction().getCallingConv() == CallingConv::Fast) {
1670       PPCFunctionInfo *FI = MF.getInfo<PPCFunctionInfo>();
1671       unsigned CallerAllocatedAmt = FI->getMinReservedArea();
1672 
1673       if (CallerAllocatedAmt && isInt<16>(CallerAllocatedAmt)) {
1674         BuildMI(MBB, MBBI, dl, AddImmInst, SPReg)
1675           .addReg(SPReg).addImm(CallerAllocatedAmt);
1676       } else {
1677         BuildMI(MBB, MBBI, dl, LoadImmShiftedInst, ScratchReg)
1678           .addImm(CallerAllocatedAmt >> 16);
1679         BuildMI(MBB, MBBI, dl, OrImmInst, ScratchReg)
1680           .addReg(ScratchReg, RegState::Kill)
1681           .addImm(CallerAllocatedAmt & 0xFFFF);
1682         BuildMI(MBB, MBBI, dl, AddInst)
1683           .addReg(SPReg)
1684           .addReg(FPReg)
1685           .addReg(ScratchReg);
1686       }
1687     } else {
1688       createTailCallBranchInstr(MBB);
1689     }
1690   }
1691 }
1692 
1693 void PPCFrameLowering::createTailCallBranchInstr(MachineBasicBlock &MBB) const {
1694   MachineBasicBlock::iterator MBBI = MBB.getFirstTerminator();
1695 
1696   // If we got this far a first terminator should exist.
1697   assert(MBBI != MBB.end() && "Failed to find the first terminator.");
1698 
1699   DebugLoc dl = MBBI->getDebugLoc();
1700   const PPCInstrInfo &TII = *Subtarget.getInstrInfo();
1701 
1702   // Create branch instruction for pseudo tail call return instruction
1703   unsigned RetOpcode = MBBI->getOpcode();
1704   if (RetOpcode == PPC::TCRETURNdi) {
1705     MBBI = MBB.getLastNonDebugInstr();
1706     MachineOperand &JumpTarget = MBBI->getOperand(0);
1707     BuildMI(MBB, MBBI, dl, TII.get(PPC::TAILB)).
1708       addGlobalAddress(JumpTarget.getGlobal(), JumpTarget.getOffset());
1709   } else if (RetOpcode == PPC::TCRETURNri) {
1710     MBBI = MBB.getLastNonDebugInstr();
1711     assert(MBBI->getOperand(0).isReg() && "Expecting register operand.");
1712     BuildMI(MBB, MBBI, dl, TII.get(PPC::TAILBCTR));
1713   } else if (RetOpcode == PPC::TCRETURNai) {
1714     MBBI = MBB.getLastNonDebugInstr();
1715     MachineOperand &JumpTarget = MBBI->getOperand(0);
1716     BuildMI(MBB, MBBI, dl, TII.get(PPC::TAILBA)).addImm(JumpTarget.getImm());
1717   } else if (RetOpcode == PPC::TCRETURNdi8) {
1718     MBBI = MBB.getLastNonDebugInstr();
1719     MachineOperand &JumpTarget = MBBI->getOperand(0);
1720     BuildMI(MBB, MBBI, dl, TII.get(PPC::TAILB8)).
1721       addGlobalAddress(JumpTarget.getGlobal(), JumpTarget.getOffset());
1722   } else if (RetOpcode == PPC::TCRETURNri8) {
1723     MBBI = MBB.getLastNonDebugInstr();
1724     assert(MBBI->getOperand(0).isReg() && "Expecting register operand.");
1725     BuildMI(MBB, MBBI, dl, TII.get(PPC::TAILBCTR8));
1726   } else if (RetOpcode == PPC::TCRETURNai8) {
1727     MBBI = MBB.getLastNonDebugInstr();
1728     MachineOperand &JumpTarget = MBBI->getOperand(0);
1729     BuildMI(MBB, MBBI, dl, TII.get(PPC::TAILBA8)).addImm(JumpTarget.getImm());
1730   }
1731 }
1732 
1733 void PPCFrameLowering::determineCalleeSaves(MachineFunction &MF,
1734                                             BitVector &SavedRegs,
1735                                             RegScavenger *RS) const {
1736   TargetFrameLowering::determineCalleeSaves(MF, SavedRegs, RS);
1737 
1738   const PPCRegisterInfo *RegInfo = Subtarget.getRegisterInfo();
1739 
1740   //  Save and clear the LR state.
1741   PPCFunctionInfo *FI = MF.getInfo<PPCFunctionInfo>();
1742   unsigned LR = RegInfo->getRARegister();
1743   FI->setMustSaveLR(MustSaveLR(MF, LR));
1744   SavedRegs.reset(LR);
1745 
1746   //  Save R31 if necessary
1747   int FPSI = FI->getFramePointerSaveIndex();
1748   bool isPPC64 = Subtarget.isPPC64();
1749   bool isDarwinABI  = Subtarget.isDarwinABI();
1750   MachineFrameInfo &MFI = MF.getFrameInfo();
1751 
1752   // If the frame pointer save index hasn't been defined yet.
1753   if (!FPSI && needsFP(MF)) {
1754     // Find out what the fix offset of the frame pointer save area.
1755     int FPOffset = getFramePointerSaveOffset();
1756     // Allocate the frame index for frame pointer save area.
1757     FPSI = MFI.CreateFixedObject(isPPC64? 8 : 4, FPOffset, true);
1758     // Save the result.
1759     FI->setFramePointerSaveIndex(FPSI);
1760   }
1761 
1762   int BPSI = FI->getBasePointerSaveIndex();
1763   if (!BPSI && RegInfo->hasBasePointer(MF)) {
1764     int BPOffset = getBasePointerSaveOffset();
1765     // Allocate the frame index for the base pointer save area.
1766     BPSI = MFI.CreateFixedObject(isPPC64? 8 : 4, BPOffset, true);
1767     // Save the result.
1768     FI->setBasePointerSaveIndex(BPSI);
1769   }
1770 
1771   // Reserve stack space for the PIC Base register (R30).
1772   // Only used in SVR4 32-bit.
1773   if (FI->usesPICBase()) {
1774     int PBPSI = MFI.CreateFixedObject(4, -8, true);
1775     FI->setPICBasePointerSaveIndex(PBPSI);
1776   }
1777 
1778   // Make sure we don't explicitly spill r31, because, for example, we have
1779   // some inline asm which explicitly clobbers it, when we otherwise have a
1780   // frame pointer and are using r31's spill slot for the prologue/epilogue
1781   // code. Same goes for the base pointer and the PIC base register.
1782   if (needsFP(MF))
1783     SavedRegs.reset(isPPC64 ? PPC::X31 : PPC::R31);
1784   if (RegInfo->hasBasePointer(MF))
1785     SavedRegs.reset(RegInfo->getBaseRegister(MF));
1786   if (FI->usesPICBase())
1787     SavedRegs.reset(PPC::R30);
1788 
1789   // Reserve stack space to move the linkage area to in case of a tail call.
1790   int TCSPDelta = 0;
1791   if (MF.getTarget().Options.GuaranteedTailCallOpt &&
1792       (TCSPDelta = FI->getTailCallSPDelta()) < 0) {
1793     MFI.CreateFixedObject(-1 * TCSPDelta, TCSPDelta, true);
1794   }
1795 
1796   // For 32-bit SVR4, allocate the nonvolatile CR spill slot iff the
1797   // function uses CR 2, 3, or 4.
1798   if (!isPPC64 && !isDarwinABI &&
1799       (SavedRegs.test(PPC::CR2) ||
1800        SavedRegs.test(PPC::CR3) ||
1801        SavedRegs.test(PPC::CR4))) {
1802     int FrameIdx = MFI.CreateFixedObject((uint64_t)4, (int64_t)-4, true);
1803     FI->setCRSpillFrameIndex(FrameIdx);
1804   }
1805 }
1806 
1807 void PPCFrameLowering::processFunctionBeforeFrameFinalized(MachineFunction &MF,
1808                                                        RegScavenger *RS) const {
1809   // Early exit if not using the SVR4 ABI.
1810   if (!Subtarget.isSVR4ABI()) {
1811     addScavengingSpillSlot(MF, RS);
1812     return;
1813   }
1814 
1815   // Get callee saved register information.
1816   MachineFrameInfo &MFI = MF.getFrameInfo();
1817   const std::vector<CalleeSavedInfo> &CSI = MFI.getCalleeSavedInfo();
1818 
1819   // If the function is shrink-wrapped, and if the function has a tail call, the
1820   // tail call might not be in the new RestoreBlock, so real branch instruction
1821   // won't be generated by emitEpilogue(), because shrink-wrap has chosen new
1822   // RestoreBlock. So we handle this case here.
1823   if (MFI.getSavePoint() && MFI.hasTailCall()) {
1824     MachineBasicBlock *RestoreBlock = MFI.getRestorePoint();
1825     for (MachineBasicBlock &MBB : MF) {
1826       if (MBB.isReturnBlock() && (&MBB) != RestoreBlock)
1827         createTailCallBranchInstr(MBB);
1828     }
1829   }
1830 
1831   // Early exit if no callee saved registers are modified!
1832   if (CSI.empty() && !needsFP(MF)) {
1833     addScavengingSpillSlot(MF, RS);
1834     return;
1835   }
1836 
1837   unsigned MinGPR = PPC::R31;
1838   unsigned MinG8R = PPC::X31;
1839   unsigned MinFPR = PPC::F31;
1840   unsigned MinVR = Subtarget.hasSPE() ? PPC::S31 : PPC::V31;
1841 
1842   bool HasGPSaveArea = false;
1843   bool HasG8SaveArea = false;
1844   bool HasFPSaveArea = false;
1845   bool HasVRSAVESaveArea = false;
1846   bool HasVRSaveArea = false;
1847 
1848   SmallVector<CalleeSavedInfo, 18> GPRegs;
1849   SmallVector<CalleeSavedInfo, 18> G8Regs;
1850   SmallVector<CalleeSavedInfo, 18> FPRegs;
1851   SmallVector<CalleeSavedInfo, 18> VRegs;
1852 
1853   for (unsigned i = 0, e = CSI.size(); i != e; ++i) {
1854     unsigned Reg = CSI[i].getReg();
1855     if (PPC::GPRCRegClass.contains(Reg) ||
1856         PPC::SPE4RCRegClass.contains(Reg)) {
1857       HasGPSaveArea = true;
1858 
1859       GPRegs.push_back(CSI[i]);
1860 
1861       if (Reg < MinGPR) {
1862         MinGPR = Reg;
1863       }
1864     } else if (PPC::G8RCRegClass.contains(Reg)) {
1865       HasG8SaveArea = true;
1866 
1867       G8Regs.push_back(CSI[i]);
1868 
1869       if (Reg < MinG8R) {
1870         MinG8R = Reg;
1871       }
1872     } else if (PPC::F8RCRegClass.contains(Reg)) {
1873       HasFPSaveArea = true;
1874 
1875       FPRegs.push_back(CSI[i]);
1876 
1877       if (Reg < MinFPR) {
1878         MinFPR = Reg;
1879       }
1880     } else if (PPC::CRBITRCRegClass.contains(Reg) ||
1881                PPC::CRRCRegClass.contains(Reg)) {
1882       ; // do nothing, as we already know whether CRs are spilled
1883     } else if (PPC::VRSAVERCRegClass.contains(Reg)) {
1884       HasVRSAVESaveArea = true;
1885     } else if (PPC::VRRCRegClass.contains(Reg) ||
1886                PPC::SPERCRegClass.contains(Reg)) {
1887       // Altivec and SPE are mutually exclusive, but have the same stack
1888       // alignment requirements, so overload the save area for both cases.
1889       HasVRSaveArea = true;
1890 
1891       VRegs.push_back(CSI[i]);
1892 
1893       if (Reg < MinVR) {
1894         MinVR = Reg;
1895       }
1896     } else {
1897       llvm_unreachable("Unknown RegisterClass!");
1898     }
1899   }
1900 
1901   PPCFunctionInfo *PFI = MF.getInfo<PPCFunctionInfo>();
1902   const TargetRegisterInfo *TRI = Subtarget.getRegisterInfo();
1903 
1904   int64_t LowerBound = 0;
1905 
1906   // Take into account stack space reserved for tail calls.
1907   int TCSPDelta = 0;
1908   if (MF.getTarget().Options.GuaranteedTailCallOpt &&
1909       (TCSPDelta = PFI->getTailCallSPDelta()) < 0) {
1910     LowerBound = TCSPDelta;
1911   }
1912 
1913   // The Floating-point register save area is right below the back chain word
1914   // of the previous stack frame.
1915   if (HasFPSaveArea) {
1916     for (unsigned i = 0, e = FPRegs.size(); i != e; ++i) {
1917       int FI = FPRegs[i].getFrameIdx();
1918 
1919       MFI.setObjectOffset(FI, LowerBound + MFI.getObjectOffset(FI));
1920     }
1921 
1922     LowerBound -= (31 - TRI->getEncodingValue(MinFPR) + 1) * 8;
1923   }
1924 
1925   // Check whether the frame pointer register is allocated. If so, make sure it
1926   // is spilled to the correct offset.
1927   if (needsFP(MF)) {
1928     int FI = PFI->getFramePointerSaveIndex();
1929     assert(FI && "No Frame Pointer Save Slot!");
1930     MFI.setObjectOffset(FI, LowerBound + MFI.getObjectOffset(FI));
1931     // FP is R31/X31, so no need to update MinGPR/MinG8R.
1932     HasGPSaveArea = true;
1933   }
1934 
1935   if (PFI->usesPICBase()) {
1936     int FI = PFI->getPICBasePointerSaveIndex();
1937     assert(FI && "No PIC Base Pointer Save Slot!");
1938     MFI.setObjectOffset(FI, LowerBound + MFI.getObjectOffset(FI));
1939 
1940     MinGPR = std::min<unsigned>(MinGPR, PPC::R30);
1941     HasGPSaveArea = true;
1942   }
1943 
1944   const PPCRegisterInfo *RegInfo = Subtarget.getRegisterInfo();
1945   if (RegInfo->hasBasePointer(MF)) {
1946     int FI = PFI->getBasePointerSaveIndex();
1947     assert(FI && "No Base Pointer Save Slot!");
1948     MFI.setObjectOffset(FI, LowerBound + MFI.getObjectOffset(FI));
1949 
1950     unsigned BP = RegInfo->getBaseRegister(MF);
1951     if (PPC::G8RCRegClass.contains(BP)) {
1952       MinG8R = std::min<unsigned>(MinG8R, BP);
1953       HasG8SaveArea = true;
1954     } else if (PPC::GPRCRegClass.contains(BP)) {
1955       MinGPR = std::min<unsigned>(MinGPR, BP);
1956       HasGPSaveArea = true;
1957     }
1958   }
1959 
1960   // General register save area starts right below the Floating-point
1961   // register save area.
1962   if (HasGPSaveArea || HasG8SaveArea) {
1963     // Move general register save area spill slots down, taking into account
1964     // the size of the Floating-point register save area.
1965     for (unsigned i = 0, e = GPRegs.size(); i != e; ++i) {
1966       if (!GPRegs[i].isSpilledToReg()) {
1967         int FI = GPRegs[i].getFrameIdx();
1968         MFI.setObjectOffset(FI, LowerBound + MFI.getObjectOffset(FI));
1969       }
1970     }
1971 
1972     // Move general register save area spill slots down, taking into account
1973     // the size of the Floating-point register save area.
1974     for (unsigned i = 0, e = G8Regs.size(); i != e; ++i) {
1975       if (!G8Regs[i].isSpilledToReg()) {
1976         int FI = G8Regs[i].getFrameIdx();
1977         MFI.setObjectOffset(FI, LowerBound + MFI.getObjectOffset(FI));
1978       }
1979     }
1980 
1981     unsigned MinReg =
1982       std::min<unsigned>(TRI->getEncodingValue(MinGPR),
1983                          TRI->getEncodingValue(MinG8R));
1984 
1985     if (Subtarget.isPPC64()) {
1986       LowerBound -= (31 - MinReg + 1) * 8;
1987     } else {
1988       LowerBound -= (31 - MinReg + 1) * 4;
1989     }
1990   }
1991 
1992   // For 32-bit only, the CR save area is below the general register
1993   // save area.  For 64-bit SVR4, the CR save area is addressed relative
1994   // to the stack pointer and hence does not need an adjustment here.
1995   // Only CR2 (the first nonvolatile spilled) has an associated frame
1996   // index so that we have a single uniform save area.
1997   if (spillsCR(MF) && !(Subtarget.isPPC64() && Subtarget.isSVR4ABI())) {
1998     // Adjust the frame index of the CR spill slot.
1999     for (unsigned i = 0, e = CSI.size(); i != e; ++i) {
2000       unsigned Reg = CSI[i].getReg();
2001 
2002       if ((Subtarget.isSVR4ABI() && Reg == PPC::CR2)
2003           // Leave Darwin logic as-is.
2004           || (!Subtarget.isSVR4ABI() &&
2005               (PPC::CRBITRCRegClass.contains(Reg) ||
2006                PPC::CRRCRegClass.contains(Reg)))) {
2007         int FI = CSI[i].getFrameIdx();
2008 
2009         MFI.setObjectOffset(FI, LowerBound + MFI.getObjectOffset(FI));
2010       }
2011     }
2012 
2013     LowerBound -= 4; // The CR save area is always 4 bytes long.
2014   }
2015 
2016   if (HasVRSAVESaveArea) {
2017     // FIXME SVR4: Is it actually possible to have multiple elements in CSI
2018     //             which have the VRSAVE register class?
2019     // Adjust the frame index of the VRSAVE spill slot.
2020     for (unsigned i = 0, e = CSI.size(); i != e; ++i) {
2021       unsigned Reg = CSI[i].getReg();
2022 
2023       if (PPC::VRSAVERCRegClass.contains(Reg)) {
2024         int FI = CSI[i].getFrameIdx();
2025 
2026         MFI.setObjectOffset(FI, LowerBound + MFI.getObjectOffset(FI));
2027       }
2028     }
2029 
2030     LowerBound -= 4; // The VRSAVE save area is always 4 bytes long.
2031   }
2032 
2033   // Both Altivec and SPE have the same alignment and padding requirements
2034   // within the stack frame.
2035   if (HasVRSaveArea) {
2036     // Insert alignment padding, we need 16-byte alignment. Note: for positive
2037     // number the alignment formula is : y = (x + (n-1)) & (~(n-1)). But since
2038     // we are using negative number here (the stack grows downward). We should
2039     // use formula : y = x & (~(n-1)). Where x is the size before aligning, n
2040     // is the alignment size ( n = 16 here) and y is the size after aligning.
2041     assert(LowerBound <= 0 && "Expect LowerBound have a non-positive value!");
2042     LowerBound &= ~(15);
2043 
2044     for (unsigned i = 0, e = VRegs.size(); i != e; ++i) {
2045       int FI = VRegs[i].getFrameIdx();
2046 
2047       MFI.setObjectOffset(FI, LowerBound + MFI.getObjectOffset(FI));
2048     }
2049   }
2050 
2051   addScavengingSpillSlot(MF, RS);
2052 }
2053 
2054 void
2055 PPCFrameLowering::addScavengingSpillSlot(MachineFunction &MF,
2056                                          RegScavenger *RS) const {
2057   // Reserve a slot closest to SP or frame pointer if we have a dynalloc or
2058   // a large stack, which will require scavenging a register to materialize a
2059   // large offset.
2060 
2061   // We need to have a scavenger spill slot for spills if the frame size is
2062   // large. In case there is no free register for large-offset addressing,
2063   // this slot is used for the necessary emergency spill. Also, we need the
2064   // slot for dynamic stack allocations.
2065 
2066   // The scavenger might be invoked if the frame offset does not fit into
2067   // the 16-bit immediate. We don't know the complete frame size here
2068   // because we've not yet computed callee-saved register spills or the
2069   // needed alignment padding.
2070   unsigned StackSize = determineFrameLayout(MF, true);
2071   MachineFrameInfo &MFI = MF.getFrameInfo();
2072   if (MFI.hasVarSizedObjects() || spillsCR(MF) || spillsVRSAVE(MF) ||
2073       hasNonRISpills(MF) || (hasSpills(MF) && !isInt<16>(StackSize))) {
2074     const TargetRegisterClass &GPRC = PPC::GPRCRegClass;
2075     const TargetRegisterClass &G8RC = PPC::G8RCRegClass;
2076     const TargetRegisterClass &RC = Subtarget.isPPC64() ? G8RC : GPRC;
2077     const TargetRegisterInfo &TRI = *Subtarget.getRegisterInfo();
2078     unsigned Size = TRI.getSpillSize(RC);
2079     unsigned Align = TRI.getSpillAlignment(RC);
2080     RS->addScavengingFrameIndex(MFI.CreateStackObject(Size, Align, false));
2081 
2082     // Might we have over-aligned allocas?
2083     bool HasAlVars = MFI.hasVarSizedObjects() &&
2084                      MFI.getMaxAlignment() > getStackAlignment();
2085 
2086     // These kinds of spills might need two registers.
2087     if (spillsCR(MF) || spillsVRSAVE(MF) || HasAlVars)
2088       RS->addScavengingFrameIndex(MFI.CreateStackObject(Size, Align, false));
2089 
2090   }
2091 }
2092 
2093 // This function checks if a callee saved gpr can be spilled to a volatile
2094 // vector register. This occurs for leaf functions when the option
2095 // ppc-enable-pe-vector-spills is enabled. If there are any remaining registers
2096 // which were not spilled to vectors, return false so the target independent
2097 // code can handle them by assigning a FrameIdx to a stack slot.
2098 bool PPCFrameLowering::assignCalleeSavedSpillSlots(
2099     MachineFunction &MF, const TargetRegisterInfo *TRI,
2100     std::vector<CalleeSavedInfo> &CSI) const {
2101 
2102   if (CSI.empty())
2103     return true; // Early exit if no callee saved registers are modified!
2104 
2105   // Early exit if cannot spill gprs to volatile vector registers.
2106   MachineFrameInfo &MFI = MF.getFrameInfo();
2107   if (!EnablePEVectorSpills || MFI.hasCalls() || !Subtarget.hasP9Vector())
2108     return false;
2109 
2110   // Build a BitVector of VSRs that can be used for spilling GPRs.
2111   BitVector BVAllocatable = TRI->getAllocatableSet(MF);
2112   BitVector BVCalleeSaved(TRI->getNumRegs());
2113   const PPCRegisterInfo *RegInfo = Subtarget.getRegisterInfo();
2114   const MCPhysReg *CSRegs = RegInfo->getCalleeSavedRegs(&MF);
2115   for (unsigned i = 0; CSRegs[i]; ++i)
2116     BVCalleeSaved.set(CSRegs[i]);
2117 
2118   for (unsigned Reg : BVAllocatable.set_bits()) {
2119     // Set to 0 if the register is not a volatile VF/F8 register, or if it is
2120     // used in the function.
2121     if (BVCalleeSaved[Reg] ||
2122         (!PPC::F8RCRegClass.contains(Reg) &&
2123          !PPC::VFRCRegClass.contains(Reg)) ||
2124         (MF.getRegInfo().isPhysRegUsed(Reg)))
2125       BVAllocatable.reset(Reg);
2126   }
2127 
2128   bool AllSpilledToReg = true;
2129   for (auto &CS : CSI) {
2130     if (BVAllocatable.none())
2131       return false;
2132 
2133     unsigned Reg = CS.getReg();
2134     if (!PPC::G8RCRegClass.contains(Reg) && !PPC::GPRCRegClass.contains(Reg)) {
2135       AllSpilledToReg = false;
2136       continue;
2137     }
2138 
2139     unsigned VolatileVFReg = BVAllocatable.find_first();
2140     if (VolatileVFReg < BVAllocatable.size()) {
2141       CS.setDstReg(VolatileVFReg);
2142       BVAllocatable.reset(VolatileVFReg);
2143     } else {
2144       AllSpilledToReg = false;
2145     }
2146   }
2147   return AllSpilledToReg;
2148 }
2149 
2150 
2151 bool
2152 PPCFrameLowering::spillCalleeSavedRegisters(MachineBasicBlock &MBB,
2153                                      MachineBasicBlock::iterator MI,
2154                                      const std::vector<CalleeSavedInfo> &CSI,
2155                                      const TargetRegisterInfo *TRI) const {
2156 
2157   // Currently, this function only handles SVR4 32- and 64-bit ABIs.
2158   // Return false otherwise to maintain pre-existing behavior.
2159   if (!Subtarget.isSVR4ABI())
2160     return false;
2161 
2162   MachineFunction *MF = MBB.getParent();
2163   const PPCInstrInfo &TII = *Subtarget.getInstrInfo();
2164   DebugLoc DL;
2165   bool CRSpilled = false;
2166   MachineInstrBuilder CRMIB;
2167 
2168   for (unsigned i = 0, e = CSI.size(); i != e; ++i) {
2169     unsigned Reg = CSI[i].getReg();
2170     // Only Darwin actually uses the VRSAVE register, but it can still appear
2171     // here if, for example, @llvm.eh.unwind.init() is used.  If we're not on
2172     // Darwin, ignore it.
2173     if (Reg == PPC::VRSAVE && !Subtarget.isDarwinABI())
2174       continue;
2175 
2176     // CR2 through CR4 are the nonvolatile CR fields.
2177     bool IsCRField = PPC::CR2 <= Reg && Reg <= PPC::CR4;
2178 
2179     // Add the callee-saved register as live-in; it's killed at the spill.
2180     // Do not do this for callee-saved registers that are live-in to the
2181     // function because they will already be marked live-in and this will be
2182     // adding it for a second time. It is an error to add the same register
2183     // to the set more than once.
2184     const MachineRegisterInfo &MRI = MF->getRegInfo();
2185     bool IsLiveIn = MRI.isLiveIn(Reg);
2186     if (!IsLiveIn)
2187        MBB.addLiveIn(Reg);
2188 
2189     if (CRSpilled && IsCRField) {
2190       CRMIB.addReg(Reg, RegState::ImplicitKill);
2191       continue;
2192     }
2193 
2194     // Insert the spill to the stack frame.
2195     if (IsCRField) {
2196       PPCFunctionInfo *FuncInfo = MF->getInfo<PPCFunctionInfo>();
2197       if (Subtarget.isPPC64()) {
2198         // The actual spill will happen at the start of the prologue.
2199         FuncInfo->addMustSaveCR(Reg);
2200       } else {
2201         CRSpilled = true;
2202         FuncInfo->setSpillsCR();
2203 
2204         // 32-bit:  FP-relative.  Note that we made sure CR2-CR4 all have
2205         // the same frame index in PPCRegisterInfo::hasReservedSpillSlot.
2206         CRMIB = BuildMI(*MF, DL, TII.get(PPC::MFCR), PPC::R12)
2207                   .addReg(Reg, RegState::ImplicitKill);
2208 
2209         MBB.insert(MI, CRMIB);
2210         MBB.insert(MI, addFrameReference(BuildMI(*MF, DL, TII.get(PPC::STW))
2211                                          .addReg(PPC::R12,
2212                                                  getKillRegState(true)),
2213                                          CSI[i].getFrameIdx()));
2214       }
2215     } else {
2216       if (CSI[i].isSpilledToReg()) {
2217         NumPESpillVSR++;
2218         BuildMI(MBB, MI, DL, TII.get(PPC::MTVSRD), CSI[i].getDstReg())
2219           .addReg(Reg, getKillRegState(true));
2220       } else {
2221         const TargetRegisterClass *RC = TRI->getMinimalPhysRegClass(Reg);
2222         // Use !IsLiveIn for the kill flag.
2223         // We do not want to kill registers that are live in this function
2224         // before their use because they will become undefined registers.
2225         TII.storeRegToStackSlot(MBB, MI, Reg, !IsLiveIn,
2226                                 CSI[i].getFrameIdx(), RC, TRI);
2227       }
2228     }
2229   }
2230   return true;
2231 }
2232 
2233 static void
2234 restoreCRs(bool isPPC64, bool is31,
2235            bool CR2Spilled, bool CR3Spilled, bool CR4Spilled,
2236            MachineBasicBlock &MBB, MachineBasicBlock::iterator MI,
2237            const std::vector<CalleeSavedInfo> &CSI, unsigned CSIIndex) {
2238 
2239   MachineFunction *MF = MBB.getParent();
2240   const PPCInstrInfo &TII = *MF->getSubtarget<PPCSubtarget>().getInstrInfo();
2241   DebugLoc DL;
2242   unsigned RestoreOp, MoveReg;
2243 
2244   if (isPPC64)
2245     // This is handled during epilogue generation.
2246     return;
2247   else {
2248     // 32-bit:  FP-relative
2249     MBB.insert(MI, addFrameReference(BuildMI(*MF, DL, TII.get(PPC::LWZ),
2250                                              PPC::R12),
2251                                      CSI[CSIIndex].getFrameIdx()));
2252     RestoreOp = PPC::MTOCRF;
2253     MoveReg = PPC::R12;
2254   }
2255 
2256   if (CR2Spilled)
2257     MBB.insert(MI, BuildMI(*MF, DL, TII.get(RestoreOp), PPC::CR2)
2258                .addReg(MoveReg, getKillRegState(!CR3Spilled && !CR4Spilled)));
2259 
2260   if (CR3Spilled)
2261     MBB.insert(MI, BuildMI(*MF, DL, TII.get(RestoreOp), PPC::CR3)
2262                .addReg(MoveReg, getKillRegState(!CR4Spilled)));
2263 
2264   if (CR4Spilled)
2265     MBB.insert(MI, BuildMI(*MF, DL, TII.get(RestoreOp), PPC::CR4)
2266                .addReg(MoveReg, getKillRegState(true)));
2267 }
2268 
2269 MachineBasicBlock::iterator PPCFrameLowering::
2270 eliminateCallFramePseudoInstr(MachineFunction &MF, MachineBasicBlock &MBB,
2271                               MachineBasicBlock::iterator I) const {
2272   const TargetInstrInfo &TII = *Subtarget.getInstrInfo();
2273   if (MF.getTarget().Options.GuaranteedTailCallOpt &&
2274       I->getOpcode() == PPC::ADJCALLSTACKUP) {
2275     // Add (actually subtract) back the amount the callee popped on return.
2276     if (int CalleeAmt =  I->getOperand(1).getImm()) {
2277       bool is64Bit = Subtarget.isPPC64();
2278       CalleeAmt *= -1;
2279       unsigned StackReg = is64Bit ? PPC::X1 : PPC::R1;
2280       unsigned TmpReg = is64Bit ? PPC::X0 : PPC::R0;
2281       unsigned ADDIInstr = is64Bit ? PPC::ADDI8 : PPC::ADDI;
2282       unsigned ADDInstr = is64Bit ? PPC::ADD8 : PPC::ADD4;
2283       unsigned LISInstr = is64Bit ? PPC::LIS8 : PPC::LIS;
2284       unsigned ORIInstr = is64Bit ? PPC::ORI8 : PPC::ORI;
2285       const DebugLoc &dl = I->getDebugLoc();
2286 
2287       if (isInt<16>(CalleeAmt)) {
2288         BuildMI(MBB, I, dl, TII.get(ADDIInstr), StackReg)
2289           .addReg(StackReg, RegState::Kill)
2290           .addImm(CalleeAmt);
2291       } else {
2292         MachineBasicBlock::iterator MBBI = I;
2293         BuildMI(MBB, MBBI, dl, TII.get(LISInstr), TmpReg)
2294           .addImm(CalleeAmt >> 16);
2295         BuildMI(MBB, MBBI, dl, TII.get(ORIInstr), TmpReg)
2296           .addReg(TmpReg, RegState::Kill)
2297           .addImm(CalleeAmt & 0xFFFF);
2298         BuildMI(MBB, MBBI, dl, TII.get(ADDInstr), StackReg)
2299           .addReg(StackReg, RegState::Kill)
2300           .addReg(TmpReg);
2301       }
2302     }
2303   }
2304   // Simply discard ADJCALLSTACKDOWN, ADJCALLSTACKUP instructions.
2305   return MBB.erase(I);
2306 }
2307 
2308 bool
2309 PPCFrameLowering::restoreCalleeSavedRegisters(MachineBasicBlock &MBB,
2310                                         MachineBasicBlock::iterator MI,
2311                                         std::vector<CalleeSavedInfo> &CSI,
2312                                         const TargetRegisterInfo *TRI) const {
2313 
2314   // Currently, this function only handles SVR4 32- and 64-bit ABIs.
2315   // Return false otherwise to maintain pre-existing behavior.
2316   if (!Subtarget.isSVR4ABI())
2317     return false;
2318 
2319   MachineFunction *MF = MBB.getParent();
2320   const PPCInstrInfo &TII = *Subtarget.getInstrInfo();
2321   bool CR2Spilled = false;
2322   bool CR3Spilled = false;
2323   bool CR4Spilled = false;
2324   unsigned CSIIndex = 0;
2325 
2326   // Initialize insertion-point logic; we will be restoring in reverse
2327   // order of spill.
2328   MachineBasicBlock::iterator I = MI, BeforeI = I;
2329   bool AtStart = I == MBB.begin();
2330 
2331   if (!AtStart)
2332     --BeforeI;
2333 
2334   for (unsigned i = 0, e = CSI.size(); i != e; ++i) {
2335     unsigned Reg = CSI[i].getReg();
2336 
2337     // Only Darwin actually uses the VRSAVE register, but it can still appear
2338     // here if, for example, @llvm.eh.unwind.init() is used.  If we're not on
2339     // Darwin, ignore it.
2340     if (Reg == PPC::VRSAVE && !Subtarget.isDarwinABI())
2341       continue;
2342 
2343     if (Reg == PPC::CR2) {
2344       CR2Spilled = true;
2345       // The spill slot is associated only with CR2, which is the
2346       // first nonvolatile spilled.  Save it here.
2347       CSIIndex = i;
2348       continue;
2349     } else if (Reg == PPC::CR3) {
2350       CR3Spilled = true;
2351       continue;
2352     } else if (Reg == PPC::CR4) {
2353       CR4Spilled = true;
2354       continue;
2355     } else {
2356       // When we first encounter a non-CR register after seeing at
2357       // least one CR register, restore all spilled CRs together.
2358       if ((CR2Spilled || CR3Spilled || CR4Spilled)
2359           && !(PPC::CR2 <= Reg && Reg <= PPC::CR4)) {
2360         bool is31 = needsFP(*MF);
2361         restoreCRs(Subtarget.isPPC64(), is31,
2362                    CR2Spilled, CR3Spilled, CR4Spilled,
2363                    MBB, I, CSI, CSIIndex);
2364         CR2Spilled = CR3Spilled = CR4Spilled = false;
2365       }
2366 
2367       if (CSI[i].isSpilledToReg()) {
2368         DebugLoc DL;
2369         NumPEReloadVSR++;
2370         BuildMI(MBB, I, DL, TII.get(PPC::MFVSRD), Reg)
2371             .addReg(CSI[i].getDstReg(), getKillRegState(true));
2372       } else {
2373        // Default behavior for non-CR saves.
2374         const TargetRegisterClass *RC = TRI->getMinimalPhysRegClass(Reg);
2375         TII.loadRegFromStackSlot(MBB, I, Reg, CSI[i].getFrameIdx(), RC, TRI);
2376         assert(I != MBB.begin() &&
2377                "loadRegFromStackSlot didn't insert any code!");
2378       }
2379     }
2380 
2381     // Insert in reverse order.
2382     if (AtStart)
2383       I = MBB.begin();
2384     else {
2385       I = BeforeI;
2386       ++I;
2387     }
2388   }
2389 
2390   // If we haven't yet spilled the CRs, do so now.
2391   if (CR2Spilled || CR3Spilled || CR4Spilled) {
2392     bool is31 = needsFP(*MF);
2393     restoreCRs(Subtarget.isPPC64(), is31, CR2Spilled, CR3Spilled, CR4Spilled,
2394                MBB, I, CSI, CSIIndex);
2395   }
2396 
2397   return true;
2398 }
2399 
2400 bool PPCFrameLowering::enableShrinkWrapping(const MachineFunction &MF) const {
2401   if (MF.getInfo<PPCFunctionInfo>()->shrinkWrapDisabled())
2402     return false;
2403   return (MF.getSubtarget<PPCSubtarget>().isSVR4ABI() &&
2404           MF.getSubtarget<PPCSubtarget>().isPPC64());
2405 }
2406