xref: /llvm-project/llvm/lib/Target/PowerPC/PPCFrameLowering.cpp (revision 8efc2f5723b0892d0518bdac441c674b7d850ac6)
1 //===-- PPCFrameLowering.cpp - PPC Frame Information ----------------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This file contains the PPC implementation of TargetFrameLowering class.
10 //
11 //===----------------------------------------------------------------------===//
12 
13 #include "PPCFrameLowering.h"
14 #include "PPCInstrBuilder.h"
15 #include "PPCInstrInfo.h"
16 #include "PPCMachineFunctionInfo.h"
17 #include "PPCSubtarget.h"
18 #include "PPCTargetMachine.h"
19 #include "llvm/ADT/Statistic.h"
20 #include "llvm/CodeGen/MachineFrameInfo.h"
21 #include "llvm/CodeGen/MachineFunction.h"
22 #include "llvm/CodeGen/MachineInstrBuilder.h"
23 #include "llvm/CodeGen/MachineModuleInfo.h"
24 #include "llvm/CodeGen/MachineRegisterInfo.h"
25 #include "llvm/CodeGen/RegisterScavenging.h"
26 #include "llvm/IR/Function.h"
27 #include "llvm/Target/TargetOptions.h"
28 
29 using namespace llvm;
30 
31 #define DEBUG_TYPE "framelowering"
32 STATISTIC(NumPESpillVSR, "Number of spills to vector in prologue");
33 STATISTIC(NumPEReloadVSR, "Number of reloads from vector in epilogue");
34 
35 static cl::opt<bool>
36 EnablePEVectorSpills("ppc-enable-pe-vector-spills",
37                      cl::desc("Enable spills in prologue to vector registers."),
38                      cl::init(false), cl::Hidden);
39 
40 /// VRRegNo - Map from a numbered VR register to its enum value.
41 ///
42 static const MCPhysReg VRRegNo[] = {
43  PPC::V0 , PPC::V1 , PPC::V2 , PPC::V3 , PPC::V4 , PPC::V5 , PPC::V6 , PPC::V7 ,
44  PPC::V8 , PPC::V9 , PPC::V10, PPC::V11, PPC::V12, PPC::V13, PPC::V14, PPC::V15,
45  PPC::V16, PPC::V17, PPC::V18, PPC::V19, PPC::V20, PPC::V21, PPC::V22, PPC::V23,
46  PPC::V24, PPC::V25, PPC::V26, PPC::V27, PPC::V28, PPC::V29, PPC::V30, PPC::V31
47 };
48 
49 static unsigned computeReturnSaveOffset(const PPCSubtarget &STI) {
50   if (STI.isAIXABI())
51     return STI.isPPC64() ? 16 : 8;
52   // SVR4 ABI:
53   return STI.isPPC64() ? 16 : 4;
54 }
55 
56 static unsigned computeTOCSaveOffset(const PPCSubtarget &STI) {
57   if (STI.isAIXABI())
58     return STI.isPPC64() ? 40 : 20;
59   return STI.isELFv2ABI() ? 24 : 40;
60 }
61 
62 static unsigned computeFramePointerSaveOffset(const PPCSubtarget &STI) {
63   // First slot in the general register save area.
64   return STI.isPPC64() ? -8U : -4U;
65 }
66 
67 static unsigned computeLinkageSize(const PPCSubtarget &STI) {
68   if (STI.isAIXABI() || STI.isPPC64())
69     return (STI.isELFv2ABI() ? 4 : 6) * (STI.isPPC64() ? 8 : 4);
70 
71   // 32-bit SVR4 ABI:
72   return 8;
73 }
74 
75 static unsigned computeBasePointerSaveOffset(const PPCSubtarget &STI) {
76   // SVR4 ABI: First slot in the general register save area.
77   return STI.isPPC64()
78              ? -16U
79              : STI.getTargetMachine().isPositionIndependent() ? -12U : -8U;
80 }
81 
82 static unsigned computeCRSaveOffset(const PPCSubtarget &STI) {
83   return (STI.isAIXABI() && !STI.isPPC64()) ? 4 : 8;
84 }
85 
86 PPCFrameLowering::PPCFrameLowering(const PPCSubtarget &STI)
87     : TargetFrameLowering(TargetFrameLowering::StackGrowsDown,
88                           STI.getPlatformStackAlignment(), 0),
89       Subtarget(STI), ReturnSaveOffset(computeReturnSaveOffset(Subtarget)),
90       TOCSaveOffset(computeTOCSaveOffset(Subtarget)),
91       FramePointerSaveOffset(computeFramePointerSaveOffset(Subtarget)),
92       LinkageSize(computeLinkageSize(Subtarget)),
93       BasePointerSaveOffset(computeBasePointerSaveOffset(Subtarget)),
94       CRSaveOffset(computeCRSaveOffset(Subtarget)) {}
95 
96 // With the SVR4 ABI, callee-saved registers have fixed offsets on the stack.
97 const PPCFrameLowering::SpillSlot *PPCFrameLowering::getCalleeSavedSpillSlots(
98     unsigned &NumEntries) const {
99   // Early exit if not using the SVR4 ABI.
100   if (!Subtarget.isSVR4ABI()) {
101     NumEntries = 0;
102     return nullptr;
103   }
104 
105 // Floating-point register save area offsets.
106 #define CALLEE_SAVED_FPRS \
107       {PPC::F31, -8},     \
108       {PPC::F30, -16},    \
109       {PPC::F29, -24},    \
110       {PPC::F28, -32},    \
111       {PPC::F27, -40},    \
112       {PPC::F26, -48},    \
113       {PPC::F25, -56},    \
114       {PPC::F24, -64},    \
115       {PPC::F23, -72},    \
116       {PPC::F22, -80},    \
117       {PPC::F21, -88},    \
118       {PPC::F20, -96},    \
119       {PPC::F19, -104},   \
120       {PPC::F18, -112},   \
121       {PPC::F17, -120},   \
122       {PPC::F16, -128},   \
123       {PPC::F15, -136},   \
124       {PPC::F14, -144}
125 
126 // 32-bit general purpose register save area offsets.
127 #define CALLEE_SAVED_GPRS32 \
128       {PPC::R31, -4},       \
129       {PPC::R30, -8},       \
130       {PPC::R29, -12},      \
131       {PPC::R28, -16},      \
132       {PPC::R27, -20},      \
133       {PPC::R26, -24},      \
134       {PPC::R25, -28},      \
135       {PPC::R24, -32},      \
136       {PPC::R23, -36},      \
137       {PPC::R22, -40},      \
138       {PPC::R21, -44},      \
139       {PPC::R20, -48},      \
140       {PPC::R19, -52},      \
141       {PPC::R18, -56},      \
142       {PPC::R17, -60},      \
143       {PPC::R16, -64},      \
144       {PPC::R15, -68},      \
145       {PPC::R14, -72}
146 
147 // 64-bit general purpose register save area offsets.
148 #define CALLEE_SAVED_GPRS64 \
149       {PPC::X31, -8},       \
150       {PPC::X30, -16},      \
151       {PPC::X29, -24},      \
152       {PPC::X28, -32},      \
153       {PPC::X27, -40},      \
154       {PPC::X26, -48},      \
155       {PPC::X25, -56},      \
156       {PPC::X24, -64},      \
157       {PPC::X23, -72},      \
158       {PPC::X22, -80},      \
159       {PPC::X21, -88},      \
160       {PPC::X20, -96},      \
161       {PPC::X19, -104},     \
162       {PPC::X18, -112},     \
163       {PPC::X17, -120},     \
164       {PPC::X16, -128},     \
165       {PPC::X15, -136},     \
166       {PPC::X14, -144}
167 
168 // Vector register save area offsets.
169 #define CALLEE_SAVED_VRS \
170       {PPC::V31, -16},   \
171       {PPC::V30, -32},   \
172       {PPC::V29, -48},   \
173       {PPC::V28, -64},   \
174       {PPC::V27, -80},   \
175       {PPC::V26, -96},   \
176       {PPC::V25, -112},  \
177       {PPC::V24, -128},  \
178       {PPC::V23, -144},  \
179       {PPC::V22, -160},  \
180       {PPC::V21, -176},  \
181       {PPC::V20, -192}
182 
183   // Note that the offsets here overlap, but this is fixed up in
184   // processFunctionBeforeFrameFinalized.
185 
186   static const SpillSlot Offsets[] = {
187       CALLEE_SAVED_FPRS,
188       CALLEE_SAVED_GPRS32,
189 
190       // CR save area offset.  We map each of the nonvolatile CR fields
191       // to the slot for CR2, which is the first of the nonvolatile CR
192       // fields to be assigned, so that we only allocate one save slot.
193       // See PPCRegisterInfo::hasReservedSpillSlot() for more information.
194       {PPC::CR2, -4},
195 
196       // VRSAVE save area offset.
197       {PPC::VRSAVE, -4},
198 
199       CALLEE_SAVED_VRS,
200 
201       // SPE register save area (overlaps Vector save area).
202       {PPC::S31, -8},
203       {PPC::S30, -16},
204       {PPC::S29, -24},
205       {PPC::S28, -32},
206       {PPC::S27, -40},
207       {PPC::S26, -48},
208       {PPC::S25, -56},
209       {PPC::S24, -64},
210       {PPC::S23, -72},
211       {PPC::S22, -80},
212       {PPC::S21, -88},
213       {PPC::S20, -96},
214       {PPC::S19, -104},
215       {PPC::S18, -112},
216       {PPC::S17, -120},
217       {PPC::S16, -128},
218       {PPC::S15, -136},
219       {PPC::S14, -144}};
220 
221   static const SpillSlot Offsets64[] = {
222       CALLEE_SAVED_FPRS,
223       CALLEE_SAVED_GPRS64,
224 
225       // VRSAVE save area offset.
226       {PPC::VRSAVE, -4},
227 
228       CALLEE_SAVED_VRS
229   };
230 
231   if (Subtarget.isPPC64()) {
232     NumEntries = array_lengthof(Offsets64);
233 
234     return Offsets64;
235   } else {
236     NumEntries = array_lengthof(Offsets);
237 
238     return Offsets;
239   }
240 }
241 
242 /// RemoveVRSaveCode - We have found that this function does not need any code
243 /// to manipulate the VRSAVE register, even though it uses vector registers.
244 /// This can happen when the only registers used are known to be live in or out
245 /// of the function.  Remove all of the VRSAVE related code from the function.
246 /// FIXME: The removal of the code results in a compile failure at -O0 when the
247 /// function contains a function call, as the GPR containing original VRSAVE
248 /// contents is spilled and reloaded around the call.  Without the prolog code,
249 /// the spill instruction refers to an undefined register.  This code needs
250 /// to account for all uses of that GPR.
251 static void RemoveVRSaveCode(MachineInstr &MI) {
252   MachineBasicBlock *Entry = MI.getParent();
253   MachineFunction *MF = Entry->getParent();
254 
255   // We know that the MTVRSAVE instruction immediately follows MI.  Remove it.
256   MachineBasicBlock::iterator MBBI = MI;
257   ++MBBI;
258   assert(MBBI != Entry->end() && MBBI->getOpcode() == PPC::MTVRSAVE);
259   MBBI->eraseFromParent();
260 
261   bool RemovedAllMTVRSAVEs = true;
262   // See if we can find and remove the MTVRSAVE instruction from all of the
263   // epilog blocks.
264   for (MachineFunction::iterator I = MF->begin(), E = MF->end(); I != E; ++I) {
265     // If last instruction is a return instruction, add an epilogue
266     if (I->isReturnBlock()) {
267       bool FoundIt = false;
268       for (MBBI = I->end(); MBBI != I->begin(); ) {
269         --MBBI;
270         if (MBBI->getOpcode() == PPC::MTVRSAVE) {
271           MBBI->eraseFromParent();  // remove it.
272           FoundIt = true;
273           break;
274         }
275       }
276       RemovedAllMTVRSAVEs &= FoundIt;
277     }
278   }
279 
280   // If we found and removed all MTVRSAVE instructions, remove the read of
281   // VRSAVE as well.
282   if (RemovedAllMTVRSAVEs) {
283     MBBI = MI;
284     assert(MBBI != Entry->begin() && "UPDATE_VRSAVE is first instr in block?");
285     --MBBI;
286     assert(MBBI->getOpcode() == PPC::MFVRSAVE && "VRSAVE instrs wandered?");
287     MBBI->eraseFromParent();
288   }
289 
290   // Finally, nuke the UPDATE_VRSAVE.
291   MI.eraseFromParent();
292 }
293 
294 // HandleVRSaveUpdate - MI is the UPDATE_VRSAVE instruction introduced by the
295 // instruction selector.  Based on the vector registers that have been used,
296 // transform this into the appropriate ORI instruction.
297 static void HandleVRSaveUpdate(MachineInstr &MI, const TargetInstrInfo &TII) {
298   MachineFunction *MF = MI.getParent()->getParent();
299   const TargetRegisterInfo *TRI = MF->getSubtarget().getRegisterInfo();
300   DebugLoc dl = MI.getDebugLoc();
301 
302   const MachineRegisterInfo &MRI = MF->getRegInfo();
303   unsigned UsedRegMask = 0;
304   for (unsigned i = 0; i != 32; ++i)
305     if (MRI.isPhysRegModified(VRRegNo[i]))
306       UsedRegMask |= 1 << (31-i);
307 
308   // Live in and live out values already must be in the mask, so don't bother
309   // marking them.
310   for (std::pair<unsigned, unsigned> LI : MF->getRegInfo().liveins()) {
311     unsigned RegNo = TRI->getEncodingValue(LI.first);
312     if (VRRegNo[RegNo] == LI.first)        // If this really is a vector reg.
313       UsedRegMask &= ~(1 << (31-RegNo));   // Doesn't need to be marked.
314   }
315 
316   // Live out registers appear as use operands on return instructions.
317   for (MachineFunction::const_iterator BI = MF->begin(), BE = MF->end();
318        UsedRegMask != 0 && BI != BE; ++BI) {
319     const MachineBasicBlock &MBB = *BI;
320     if (!MBB.isReturnBlock())
321       continue;
322     const MachineInstr &Ret = MBB.back();
323     for (unsigned I = 0, E = Ret.getNumOperands(); I != E; ++I) {
324       const MachineOperand &MO = Ret.getOperand(I);
325       if (!MO.isReg() || !PPC::VRRCRegClass.contains(MO.getReg()))
326         continue;
327       unsigned RegNo = TRI->getEncodingValue(MO.getReg());
328       UsedRegMask &= ~(1 << (31-RegNo));
329     }
330   }
331 
332   // If no registers are used, turn this into a copy.
333   if (UsedRegMask == 0) {
334     // Remove all VRSAVE code.
335     RemoveVRSaveCode(MI);
336     return;
337   }
338 
339   Register SrcReg = MI.getOperand(1).getReg();
340   Register DstReg = MI.getOperand(0).getReg();
341 
342   if ((UsedRegMask & 0xFFFF) == UsedRegMask) {
343     if (DstReg != SrcReg)
344       BuildMI(*MI.getParent(), MI, dl, TII.get(PPC::ORI), DstReg)
345           .addReg(SrcReg)
346           .addImm(UsedRegMask);
347     else
348       BuildMI(*MI.getParent(), MI, dl, TII.get(PPC::ORI), DstReg)
349           .addReg(SrcReg, RegState::Kill)
350           .addImm(UsedRegMask);
351   } else if ((UsedRegMask & 0xFFFF0000) == UsedRegMask) {
352     if (DstReg != SrcReg)
353       BuildMI(*MI.getParent(), MI, dl, TII.get(PPC::ORIS), DstReg)
354           .addReg(SrcReg)
355           .addImm(UsedRegMask >> 16);
356     else
357       BuildMI(*MI.getParent(), MI, dl, TII.get(PPC::ORIS), DstReg)
358           .addReg(SrcReg, RegState::Kill)
359           .addImm(UsedRegMask >> 16);
360   } else {
361     if (DstReg != SrcReg)
362       BuildMI(*MI.getParent(), MI, dl, TII.get(PPC::ORIS), DstReg)
363           .addReg(SrcReg)
364           .addImm(UsedRegMask >> 16);
365     else
366       BuildMI(*MI.getParent(), MI, dl, TII.get(PPC::ORIS), DstReg)
367           .addReg(SrcReg, RegState::Kill)
368           .addImm(UsedRegMask >> 16);
369 
370     BuildMI(*MI.getParent(), MI, dl, TII.get(PPC::ORI), DstReg)
371         .addReg(DstReg, RegState::Kill)
372         .addImm(UsedRegMask & 0xFFFF);
373   }
374 
375   // Remove the old UPDATE_VRSAVE instruction.
376   MI.eraseFromParent();
377 }
378 
379 static bool spillsCR(const MachineFunction &MF) {
380   const PPCFunctionInfo *FuncInfo = MF.getInfo<PPCFunctionInfo>();
381   return FuncInfo->isCRSpilled();
382 }
383 
384 static bool spillsVRSAVE(const MachineFunction &MF) {
385   const PPCFunctionInfo *FuncInfo = MF.getInfo<PPCFunctionInfo>();
386   return FuncInfo->isVRSAVESpilled();
387 }
388 
389 static bool hasSpills(const MachineFunction &MF) {
390   const PPCFunctionInfo *FuncInfo = MF.getInfo<PPCFunctionInfo>();
391   return FuncInfo->hasSpills();
392 }
393 
394 static bool hasNonRISpills(const MachineFunction &MF) {
395   const PPCFunctionInfo *FuncInfo = MF.getInfo<PPCFunctionInfo>();
396   return FuncInfo->hasNonRISpills();
397 }
398 
399 /// MustSaveLR - Return true if this function requires that we save the LR
400 /// register onto the stack in the prolog and restore it in the epilog of the
401 /// function.
402 static bool MustSaveLR(const MachineFunction &MF, unsigned LR) {
403   const PPCFunctionInfo *MFI = MF.getInfo<PPCFunctionInfo>();
404 
405   // We need a save/restore of LR if there is any def of LR (which is
406   // defined by calls, including the PIC setup sequence), or if there is
407   // some use of the LR stack slot (e.g. for builtin_return_address).
408   // (LR comes in 32 and 64 bit versions.)
409   MachineRegisterInfo::def_iterator RI = MF.getRegInfo().def_begin(LR);
410   return RI !=MF.getRegInfo().def_end() || MFI->isLRStoreRequired();
411 }
412 
413 /// determineFrameLayoutAndUpdate - Determine the size of the frame and maximum
414 /// call frame size. Update the MachineFunction object with the stack size.
415 unsigned
416 PPCFrameLowering::determineFrameLayoutAndUpdate(MachineFunction &MF,
417                                                 bool UseEstimate) const {
418   unsigned NewMaxCallFrameSize = 0;
419   unsigned FrameSize = determineFrameLayout(MF, UseEstimate,
420                                             &NewMaxCallFrameSize);
421   MF.getFrameInfo().setStackSize(FrameSize);
422   MF.getFrameInfo().setMaxCallFrameSize(NewMaxCallFrameSize);
423   return FrameSize;
424 }
425 
426 /// determineFrameLayout - Determine the size of the frame and maximum call
427 /// frame size.
428 unsigned
429 PPCFrameLowering::determineFrameLayout(const MachineFunction &MF,
430                                        bool UseEstimate,
431                                        unsigned *NewMaxCallFrameSize) const {
432   const MachineFrameInfo &MFI = MF.getFrameInfo();
433   const PPCFunctionInfo *FI = MF.getInfo<PPCFunctionInfo>();
434 
435   // Get the number of bytes to allocate from the FrameInfo
436   unsigned FrameSize =
437     UseEstimate ? MFI.estimateStackSize(MF) : MFI.getStackSize();
438 
439   // Get stack alignments. The frame must be aligned to the greatest of these:
440   unsigned TargetAlign = getStackAlignment(); // alignment required per the ABI
441   unsigned MaxAlign = MFI.getMaxAlignment(); // algmt required by data in frame
442   unsigned AlignMask = std::max(MaxAlign, TargetAlign) - 1;
443 
444   const PPCRegisterInfo *RegInfo = Subtarget.getRegisterInfo();
445 
446   unsigned LR = RegInfo->getRARegister();
447   bool DisableRedZone = MF.getFunction().hasFnAttribute(Attribute::NoRedZone);
448   bool CanUseRedZone = !MFI.hasVarSizedObjects() && // No dynamic alloca.
449                        !MFI.adjustsStack() &&       // No calls.
450                        !MustSaveLR(MF, LR) &&       // No need to save LR.
451                        !FI->mustSaveTOC() &&        // No need to save TOC.
452                        !RegInfo->hasBasePointer(MF); // No special alignment.
453 
454   // Note: for PPC32 SVR4ABI (Non-DarwinABI), we can still generate stackless
455   // code if all local vars are reg-allocated.
456   bool FitsInRedZone = FrameSize <= Subtarget.getRedZoneSize();
457 
458   // Check whether we can skip adjusting the stack pointer (by using red zone)
459   if (!DisableRedZone && CanUseRedZone && FitsInRedZone) {
460     // No need for frame
461     return 0;
462   }
463 
464   // Get the maximum call frame size of all the calls.
465   unsigned maxCallFrameSize = MFI.getMaxCallFrameSize();
466 
467   // Maximum call frame needs to be at least big enough for linkage area.
468   unsigned minCallFrameSize = getLinkageSize();
469   maxCallFrameSize = std::max(maxCallFrameSize, minCallFrameSize);
470 
471   // If we have dynamic alloca then maxCallFrameSize needs to be aligned so
472   // that allocations will be aligned.
473   if (MFI.hasVarSizedObjects())
474     maxCallFrameSize = (maxCallFrameSize + AlignMask) & ~AlignMask;
475 
476   // Update the new max call frame size if the caller passes in a valid pointer.
477   if (NewMaxCallFrameSize)
478     *NewMaxCallFrameSize = maxCallFrameSize;
479 
480   // Include call frame size in total.
481   FrameSize += maxCallFrameSize;
482 
483   // Make sure the frame is aligned.
484   FrameSize = (FrameSize + AlignMask) & ~AlignMask;
485 
486   return FrameSize;
487 }
488 
489 // hasFP - Return true if the specified function actually has a dedicated frame
490 // pointer register.
491 bool PPCFrameLowering::hasFP(const MachineFunction &MF) const {
492   const MachineFrameInfo &MFI = MF.getFrameInfo();
493   // FIXME: This is pretty much broken by design: hasFP() might be called really
494   // early, before the stack layout was calculated and thus hasFP() might return
495   // true or false here depending on the time of call.
496   return (MFI.getStackSize()) && needsFP(MF);
497 }
498 
499 // needsFP - Return true if the specified function should have a dedicated frame
500 // pointer register.  This is true if the function has variable sized allocas or
501 // if frame pointer elimination is disabled.
502 bool PPCFrameLowering::needsFP(const MachineFunction &MF) const {
503   const MachineFrameInfo &MFI = MF.getFrameInfo();
504 
505   // Naked functions have no stack frame pushed, so we don't have a frame
506   // pointer.
507   if (MF.getFunction().hasFnAttribute(Attribute::Naked))
508     return false;
509 
510   return MF.getTarget().Options.DisableFramePointerElim(MF) ||
511     MFI.hasVarSizedObjects() || MFI.hasStackMap() || MFI.hasPatchPoint() ||
512     (MF.getTarget().Options.GuaranteedTailCallOpt &&
513      MF.getInfo<PPCFunctionInfo>()->hasFastCall());
514 }
515 
516 void PPCFrameLowering::replaceFPWithRealFP(MachineFunction &MF) const {
517   bool is31 = needsFP(MF);
518   unsigned FPReg  = is31 ? PPC::R31 : PPC::R1;
519   unsigned FP8Reg = is31 ? PPC::X31 : PPC::X1;
520 
521   const PPCRegisterInfo *RegInfo = Subtarget.getRegisterInfo();
522   bool HasBP = RegInfo->hasBasePointer(MF);
523   unsigned BPReg  = HasBP ? (unsigned) RegInfo->getBaseRegister(MF) : FPReg;
524   unsigned BP8Reg = HasBP ? (unsigned) PPC::X30 : FP8Reg;
525 
526   for (MachineFunction::iterator BI = MF.begin(), BE = MF.end();
527        BI != BE; ++BI)
528     for (MachineBasicBlock::iterator MBBI = BI->end(); MBBI != BI->begin(); ) {
529       --MBBI;
530       for (unsigned I = 0, E = MBBI->getNumOperands(); I != E; ++I) {
531         MachineOperand &MO = MBBI->getOperand(I);
532         if (!MO.isReg())
533           continue;
534 
535         switch (MO.getReg()) {
536         case PPC::FP:
537           MO.setReg(FPReg);
538           break;
539         case PPC::FP8:
540           MO.setReg(FP8Reg);
541           break;
542         case PPC::BP:
543           MO.setReg(BPReg);
544           break;
545         case PPC::BP8:
546           MO.setReg(BP8Reg);
547           break;
548 
549         }
550       }
551     }
552 }
553 
554 /*  This function will do the following:
555     - If MBB is an entry or exit block, set SR1 and SR2 to R0 and R12
556       respectively (defaults recommended by the ABI) and return true
557     - If MBB is not an entry block, initialize the register scavenger and look
558       for available registers.
559     - If the defaults (R0/R12) are available, return true
560     - If TwoUniqueRegsRequired is set to true, it looks for two unique
561       registers. Otherwise, look for a single available register.
562       - If the required registers are found, set SR1 and SR2 and return true.
563       - If the required registers are not found, set SR2 or both SR1 and SR2 to
564         PPC::NoRegister and return false.
565 
566     Note that if both SR1 and SR2 are valid parameters and TwoUniqueRegsRequired
567     is not set, this function will attempt to find two different registers, but
568     still return true if only one register is available (and set SR1 == SR2).
569 */
570 bool
571 PPCFrameLowering::findScratchRegister(MachineBasicBlock *MBB,
572                                       bool UseAtEnd,
573                                       bool TwoUniqueRegsRequired,
574                                       unsigned *SR1,
575                                       unsigned *SR2) const {
576   RegScavenger RS;
577   unsigned R0 =  Subtarget.isPPC64() ? PPC::X0 : PPC::R0;
578   unsigned R12 = Subtarget.isPPC64() ? PPC::X12 : PPC::R12;
579 
580   // Set the defaults for the two scratch registers.
581   if (SR1)
582     *SR1 = R0;
583 
584   if (SR2) {
585     assert (SR1 && "Asking for the second scratch register but not the first?");
586     *SR2 = R12;
587   }
588 
589   // If MBB is an entry or exit block, use R0 and R12 as the scratch registers.
590   if ((UseAtEnd && MBB->isReturnBlock()) ||
591       (!UseAtEnd && (&MBB->getParent()->front() == MBB)))
592     return true;
593 
594   RS.enterBasicBlock(*MBB);
595 
596   if (UseAtEnd && !MBB->empty()) {
597     // The scratch register will be used at the end of the block, so must
598     // consider all registers used within the block
599 
600     MachineBasicBlock::iterator MBBI = MBB->getFirstTerminator();
601     // If no terminator, back iterator up to previous instruction.
602     if (MBBI == MBB->end())
603       MBBI = std::prev(MBBI);
604 
605     if (MBBI != MBB->begin())
606       RS.forward(MBBI);
607   }
608 
609   // If the two registers are available, we're all good.
610   // Note that we only return here if both R0 and R12 are available because
611   // although the function may not require two unique registers, it may benefit
612   // from having two so we should try to provide them.
613   if (!RS.isRegUsed(R0) && !RS.isRegUsed(R12))
614     return true;
615 
616   // Get the list of callee-saved registers for the target.
617   const PPCRegisterInfo *RegInfo = Subtarget.getRegisterInfo();
618   const MCPhysReg *CSRegs = RegInfo->getCalleeSavedRegs(MBB->getParent());
619 
620   // Get all the available registers in the block.
621   BitVector BV = RS.getRegsAvailable(Subtarget.isPPC64() ? &PPC::G8RCRegClass :
622                                      &PPC::GPRCRegClass);
623 
624   // We shouldn't use callee-saved registers as scratch registers as they may be
625   // available when looking for a candidate block for shrink wrapping but not
626   // available when the actual prologue/epilogue is being emitted because they
627   // were added as live-in to the prologue block by PrologueEpilogueInserter.
628   for (int i = 0; CSRegs[i]; ++i)
629     BV.reset(CSRegs[i]);
630 
631   // Set the first scratch register to the first available one.
632   if (SR1) {
633     int FirstScratchReg = BV.find_first();
634     *SR1 = FirstScratchReg == -1 ? (unsigned)PPC::NoRegister : FirstScratchReg;
635   }
636 
637   // If there is another one available, set the second scratch register to that.
638   // Otherwise, set it to either PPC::NoRegister if this function requires two
639   // or to whatever SR1 is set to if this function doesn't require two.
640   if (SR2) {
641     int SecondScratchReg = BV.find_next(*SR1);
642     if (SecondScratchReg != -1)
643       *SR2 = SecondScratchReg;
644     else
645       *SR2 = TwoUniqueRegsRequired ? (unsigned)PPC::NoRegister : *SR1;
646   }
647 
648   // Now that we've done our best to provide both registers, double check
649   // whether we were unable to provide enough.
650   if (BV.count() < (TwoUniqueRegsRequired ? 2U : 1U))
651     return false;
652 
653   return true;
654 }
655 
656 // We need a scratch register for spilling LR and for spilling CR. By default,
657 // we use two scratch registers to hide latency. However, if only one scratch
658 // register is available, we can adjust for that by not overlapping the spill
659 // code. However, if we need to realign the stack (i.e. have a base pointer)
660 // and the stack frame is large, we need two scratch registers.
661 bool
662 PPCFrameLowering::twoUniqueScratchRegsRequired(MachineBasicBlock *MBB) const {
663   const PPCRegisterInfo *RegInfo = Subtarget.getRegisterInfo();
664   MachineFunction &MF = *(MBB->getParent());
665   bool HasBP = RegInfo->hasBasePointer(MF);
666   unsigned FrameSize = determineFrameLayout(MF);
667   int NegFrameSize = -FrameSize;
668   bool IsLargeFrame = !isInt<16>(NegFrameSize);
669   MachineFrameInfo &MFI = MF.getFrameInfo();
670   unsigned MaxAlign = MFI.getMaxAlignment();
671   bool HasRedZone = Subtarget.isPPC64() || !Subtarget.isSVR4ABI();
672 
673   return (IsLargeFrame || !HasRedZone) && HasBP && MaxAlign > 1;
674 }
675 
676 bool PPCFrameLowering::canUseAsPrologue(const MachineBasicBlock &MBB) const {
677   MachineBasicBlock *TmpMBB = const_cast<MachineBasicBlock *>(&MBB);
678 
679   return findScratchRegister(TmpMBB, false,
680                              twoUniqueScratchRegsRequired(TmpMBB));
681 }
682 
683 bool PPCFrameLowering::canUseAsEpilogue(const MachineBasicBlock &MBB) const {
684   MachineBasicBlock *TmpMBB = const_cast<MachineBasicBlock *>(&MBB);
685 
686   return findScratchRegister(TmpMBB, true);
687 }
688 
689 bool PPCFrameLowering::stackUpdateCanBeMoved(MachineFunction &MF) const {
690   const PPCRegisterInfo *RegInfo = Subtarget.getRegisterInfo();
691   PPCFunctionInfo *FI = MF.getInfo<PPCFunctionInfo>();
692 
693   // Abort if there is no register info or function info.
694   if (!RegInfo || !FI)
695     return false;
696 
697   // Only move the stack update on ELFv2 ABI and PPC64.
698   if (!Subtarget.isELFv2ABI() || !Subtarget.isPPC64())
699     return false;
700 
701   // Check the frame size first and return false if it does not fit the
702   // requirements.
703   // We need a non-zero frame size as well as a frame that will fit in the red
704   // zone. This is because by moving the stack pointer update we are now storing
705   // to the red zone until the stack pointer is updated. If we get an interrupt
706   // inside the prologue but before the stack update we now have a number of
707   // stores to the red zone and those stores must all fit.
708   MachineFrameInfo &MFI = MF.getFrameInfo();
709   unsigned FrameSize = MFI.getStackSize();
710   if (!FrameSize || FrameSize > Subtarget.getRedZoneSize())
711     return false;
712 
713   // Frame pointers and base pointers complicate matters so don't do anything
714   // if we have them. For example having a frame pointer will sometimes require
715   // a copy of r1 into r31 and that makes keeping track of updates to r1 more
716   // difficult.
717   if (hasFP(MF) || RegInfo->hasBasePointer(MF))
718     return false;
719 
720   // Calls to fast_cc functions use different rules for passing parameters on
721   // the stack from the ABI and using PIC base in the function imposes
722   // similar restrictions to using the base pointer. It is not generally safe
723   // to move the stack pointer update in these situations.
724   if (FI->hasFastCall() || FI->usesPICBase())
725     return false;
726 
727   // Finally we can move the stack update if we do not require register
728   // scavenging. Register scavenging can introduce more spills and so
729   // may make the frame size larger than we have computed.
730   return !RegInfo->requiresFrameIndexScavenging(MF);
731 }
732 
733 void PPCFrameLowering::emitPrologue(MachineFunction &MF,
734                                     MachineBasicBlock &MBB) const {
735   MachineBasicBlock::iterator MBBI = MBB.begin();
736   MachineFrameInfo &MFI = MF.getFrameInfo();
737   const PPCInstrInfo &TII = *Subtarget.getInstrInfo();
738   const PPCRegisterInfo *RegInfo = Subtarget.getRegisterInfo();
739 
740   MachineModuleInfo &MMI = MF.getMMI();
741   const MCRegisterInfo *MRI = MMI.getContext().getRegisterInfo();
742   DebugLoc dl;
743   // AIX assembler does not support cfi directives.
744   const bool needsCFI = MF.needsFrameMoves() && !Subtarget.isAIXABI();
745 
746   // Get processor type.
747   bool isPPC64 = Subtarget.isPPC64();
748   // Get the ABI.
749   bool isSVR4ABI = Subtarget.isSVR4ABI();
750   bool isAIXABI = Subtarget.isAIXABI();
751   bool isELFv2ABI = Subtarget.isELFv2ABI();
752   assert((isSVR4ABI || isAIXABI) && "Unsupported PPC ABI.");
753 
754   // Scan the prolog, looking for an UPDATE_VRSAVE instruction.  If we find it,
755   // process it.
756   if (!isSVR4ABI)
757     for (unsigned i = 0; MBBI != MBB.end(); ++i, ++MBBI) {
758       if (MBBI->getOpcode() == PPC::UPDATE_VRSAVE) {
759         if (isAIXABI)
760           report_fatal_error("UPDATE_VRSAVE is unexpected on AIX.");
761         HandleVRSaveUpdate(*MBBI, TII);
762         break;
763       }
764     }
765 
766   // Move MBBI back to the beginning of the prologue block.
767   MBBI = MBB.begin();
768 
769   // Work out frame sizes.
770   unsigned FrameSize = determineFrameLayoutAndUpdate(MF);
771   int NegFrameSize = -FrameSize;
772   if (!isInt<32>(NegFrameSize))
773     llvm_unreachable("Unhandled stack size!");
774 
775   if (MFI.isFrameAddressTaken())
776     replaceFPWithRealFP(MF);
777 
778   // Check if the link register (LR) must be saved.
779   PPCFunctionInfo *FI = MF.getInfo<PPCFunctionInfo>();
780   bool MustSaveLR = FI->mustSaveLR();
781   bool MustSaveTOC = FI->mustSaveTOC();
782   const SmallVectorImpl<unsigned> &MustSaveCRs = FI->getMustSaveCRs();
783   bool MustSaveCR = !MustSaveCRs.empty();
784   // Do we have a frame pointer and/or base pointer for this function?
785   bool HasFP = hasFP(MF);
786   bool HasBP = RegInfo->hasBasePointer(MF);
787   bool HasRedZone = isPPC64 || !isSVR4ABI;
788 
789   unsigned SPReg       = isPPC64 ? PPC::X1  : PPC::R1;
790   Register BPReg = RegInfo->getBaseRegister(MF);
791   unsigned FPReg       = isPPC64 ? PPC::X31 : PPC::R31;
792   unsigned LRReg       = isPPC64 ? PPC::LR8 : PPC::LR;
793   unsigned TOCReg      = isPPC64 ? PPC::X2 :  PPC::R2;
794   unsigned ScratchReg  = 0;
795   unsigned TempReg     = isPPC64 ? PPC::X12 : PPC::R12; // another scratch reg
796   //  ...(R12/X12 is volatile in both Darwin & SVR4, & can't be a function arg.)
797   const MCInstrDesc& MFLRInst = TII.get(isPPC64 ? PPC::MFLR8
798                                                 : PPC::MFLR );
799   const MCInstrDesc& StoreInst = TII.get(isPPC64 ? PPC::STD
800                                                  : PPC::STW );
801   const MCInstrDesc& StoreUpdtInst = TII.get(isPPC64 ? PPC::STDU
802                                                      : PPC::STWU );
803   const MCInstrDesc& StoreUpdtIdxInst = TII.get(isPPC64 ? PPC::STDUX
804                                                         : PPC::STWUX);
805   const MCInstrDesc& LoadImmShiftedInst = TII.get(isPPC64 ? PPC::LIS8
806                                                           : PPC::LIS );
807   const MCInstrDesc& OrImmInst = TII.get(isPPC64 ? PPC::ORI8
808                                                  : PPC::ORI );
809   const MCInstrDesc& OrInst = TII.get(isPPC64 ? PPC::OR8
810                                               : PPC::OR );
811   const MCInstrDesc& SubtractCarryingInst = TII.get(isPPC64 ? PPC::SUBFC8
812                                                             : PPC::SUBFC);
813   const MCInstrDesc& SubtractImmCarryingInst = TII.get(isPPC64 ? PPC::SUBFIC8
814                                                                : PPC::SUBFIC);
815   const MCInstrDesc &MoveFromCondRegInst = TII.get(isPPC64 ? PPC::MFCR8
816                                                            : PPC::MFCR);
817   const MCInstrDesc &StoreWordInst = TII.get(isPPC64 ? PPC::STW8 : PPC::STW);
818 
819   // Regarding this assert: Even though LR is saved in the caller's frame (i.e.,
820   // LROffset is positive), that slot is callee-owned. Because PPC32 SVR4 has no
821   // Red Zone, an asynchronous event (a form of "callee") could claim a frame &
822   // overwrite it, so PPC32 SVR4 must claim at least a minimal frame to save LR.
823   assert((isPPC64 || !isSVR4ABI || !(!FrameSize && (MustSaveLR || HasFP))) &&
824          "FrameSize must be >0 to save/restore the FP or LR for 32-bit SVR4.");
825 
826   // Using the same bool variable as below to suppress compiler warnings.
827   bool SingleScratchReg =
828     findScratchRegister(&MBB, false, twoUniqueScratchRegsRequired(&MBB),
829                         &ScratchReg, &TempReg);
830   assert(SingleScratchReg &&
831          "Required number of registers not available in this block");
832 
833   SingleScratchReg = ScratchReg == TempReg;
834 
835   int LROffset = getReturnSaveOffset();
836 
837   int FPOffset = 0;
838   if (HasFP) {
839     if (isSVR4ABI) {
840       MachineFrameInfo &MFI = MF.getFrameInfo();
841       int FPIndex = FI->getFramePointerSaveIndex();
842       assert(FPIndex && "No Frame Pointer Save Slot!");
843       FPOffset = MFI.getObjectOffset(FPIndex);
844     } else {
845       FPOffset = getFramePointerSaveOffset();
846     }
847   }
848 
849   int BPOffset = 0;
850   if (HasBP) {
851     if (isSVR4ABI) {
852       MachineFrameInfo &MFI = MF.getFrameInfo();
853       int BPIndex = FI->getBasePointerSaveIndex();
854       assert(BPIndex && "No Base Pointer Save Slot!");
855       BPOffset = MFI.getObjectOffset(BPIndex);
856     } else {
857       BPOffset = getBasePointerSaveOffset();
858     }
859   }
860 
861   int PBPOffset = 0;
862   if (FI->usesPICBase()) {
863     MachineFrameInfo &MFI = MF.getFrameInfo();
864     int PBPIndex = FI->getPICBasePointerSaveIndex();
865     assert(PBPIndex && "No PIC Base Pointer Save Slot!");
866     PBPOffset = MFI.getObjectOffset(PBPIndex);
867   }
868 
869   // Get stack alignments.
870   unsigned MaxAlign = MFI.getMaxAlignment();
871   if (HasBP && MaxAlign > 1)
872     assert(isPowerOf2_32(MaxAlign) && isInt<16>(MaxAlign) &&
873            "Invalid alignment!");
874 
875   // Frames of 32KB & larger require special handling because they cannot be
876   // indexed into with a simple STDU/STWU/STD/STW immediate offset operand.
877   bool isLargeFrame = !isInt<16>(NegFrameSize);
878 
879   // Check if we can move the stack update instruction (stdu) down the prologue
880   // past the callee saves. Hopefully this will avoid the situation where the
881   // saves are waiting for the update on the store with update to complete.
882   MachineBasicBlock::iterator StackUpdateLoc = MBBI;
883   bool MovingStackUpdateDown = false;
884 
885   // Check if we can move the stack update.
886   if (stackUpdateCanBeMoved(MF)) {
887     const std::vector<CalleeSavedInfo> &Info = MFI.getCalleeSavedInfo();
888     for (CalleeSavedInfo CSI : Info) {
889       int FrIdx = CSI.getFrameIdx();
890       // If the frame index is not negative the callee saved info belongs to a
891       // stack object that is not a fixed stack object. We ignore non-fixed
892       // stack objects because we won't move the stack update pointer past them.
893       if (FrIdx >= 0)
894         continue;
895 
896       if (MFI.isFixedObjectIndex(FrIdx) && MFI.getObjectOffset(FrIdx) < 0) {
897         StackUpdateLoc++;
898         MovingStackUpdateDown = true;
899       } else {
900         // We need all of the Frame Indices to meet these conditions.
901         // If they do not, abort the whole operation.
902         StackUpdateLoc = MBBI;
903         MovingStackUpdateDown = false;
904         break;
905       }
906     }
907 
908     // If the operation was not aborted then update the object offset.
909     if (MovingStackUpdateDown) {
910       for (CalleeSavedInfo CSI : Info) {
911         int FrIdx = CSI.getFrameIdx();
912         if (FrIdx < 0)
913           MFI.setObjectOffset(FrIdx, MFI.getObjectOffset(FrIdx) + NegFrameSize);
914       }
915     }
916   }
917 
918   // Where in the prologue we move the CR fields depends on how many scratch
919   // registers we have, and if we need to save the link register or not. This
920   // lambda is to avoid duplicating the logic in 2 places.
921   auto BuildMoveFromCR = [&]() {
922     if (isELFv2ABI && MustSaveCRs.size() == 1) {
923     // In the ELFv2 ABI, we are not required to save all CR fields.
924     // If only one CR field is clobbered, it is more efficient to use
925     // mfocrf to selectively save just that field, because mfocrf has short
926     // latency compares to mfcr.
927       assert(isPPC64 && "V2 ABI is 64-bit only.");
928       MachineInstrBuilder MIB =
929           BuildMI(MBB, MBBI, dl, TII.get(PPC::MFOCRF8), TempReg);
930       MIB.addReg(MustSaveCRs[0], RegState::Kill);
931     } else {
932       MachineInstrBuilder MIB =
933           BuildMI(MBB, MBBI, dl, MoveFromCondRegInst, TempReg);
934       for (unsigned CRfield : MustSaveCRs)
935         MIB.addReg(CRfield, RegState::ImplicitKill);
936     }
937   };
938 
939   // If we need to spill the CR and the LR but we don't have two separate
940   // registers available, we must spill them one at a time
941   if (MustSaveCR && SingleScratchReg && MustSaveLR) {
942     BuildMoveFromCR();
943     BuildMI(MBB, MBBI, dl, StoreWordInst)
944         .addReg(TempReg, getKillRegState(true))
945         .addImm(CRSaveOffset)
946         .addReg(SPReg);
947   }
948 
949   if (MustSaveLR)
950     BuildMI(MBB, MBBI, dl, MFLRInst, ScratchReg);
951 
952   if (MustSaveCR && !(SingleScratchReg && MustSaveLR))
953     BuildMoveFromCR();
954 
955   if (HasRedZone) {
956     if (HasFP)
957       BuildMI(MBB, MBBI, dl, StoreInst)
958         .addReg(FPReg)
959         .addImm(FPOffset)
960         .addReg(SPReg);
961     if (FI->usesPICBase())
962       BuildMI(MBB, MBBI, dl, StoreInst)
963         .addReg(PPC::R30)
964         .addImm(PBPOffset)
965         .addReg(SPReg);
966     if (HasBP)
967       BuildMI(MBB, MBBI, dl, StoreInst)
968         .addReg(BPReg)
969         .addImm(BPOffset)
970         .addReg(SPReg);
971   }
972 
973   if (MustSaveLR)
974     BuildMI(MBB, StackUpdateLoc, dl, StoreInst)
975       .addReg(ScratchReg, getKillRegState(true))
976       .addImm(LROffset)
977       .addReg(SPReg);
978 
979   if (MustSaveCR &&
980       !(SingleScratchReg && MustSaveLR)) {
981     assert(HasRedZone && "A red zone is always available on PPC64");
982     BuildMI(MBB, MBBI, dl, StoreWordInst)
983       .addReg(TempReg, getKillRegState(true))
984       .addImm(CRSaveOffset)
985       .addReg(SPReg);
986   }
987 
988   // Skip the rest if this is a leaf function & all spills fit in the Red Zone.
989   if (!FrameSize)
990     return;
991 
992   // Adjust stack pointer: r1 += NegFrameSize.
993   // If there is a preferred stack alignment, align R1 now
994 
995   if (HasBP && HasRedZone) {
996     // Save a copy of r1 as the base pointer.
997     BuildMI(MBB, MBBI, dl, OrInst, BPReg)
998       .addReg(SPReg)
999       .addReg(SPReg);
1000   }
1001 
1002   // Have we generated a STUX instruction to claim stack frame? If so,
1003   // the negated frame size will be placed in ScratchReg.
1004   bool HasSTUX = false;
1005 
1006   // This condition must be kept in sync with canUseAsPrologue.
1007   if (HasBP && MaxAlign > 1) {
1008     if (isPPC64)
1009       BuildMI(MBB, MBBI, dl, TII.get(PPC::RLDICL), ScratchReg)
1010         .addReg(SPReg)
1011         .addImm(0)
1012         .addImm(64 - Log2_32(MaxAlign));
1013     else // PPC32...
1014       BuildMI(MBB, MBBI, dl, TII.get(PPC::RLWINM), ScratchReg)
1015         .addReg(SPReg)
1016         .addImm(0)
1017         .addImm(32 - Log2_32(MaxAlign))
1018         .addImm(31);
1019     if (!isLargeFrame) {
1020       BuildMI(MBB, MBBI, dl, SubtractImmCarryingInst, ScratchReg)
1021         .addReg(ScratchReg, RegState::Kill)
1022         .addImm(NegFrameSize);
1023     } else {
1024       assert(!SingleScratchReg && "Only a single scratch reg available");
1025       BuildMI(MBB, MBBI, dl, LoadImmShiftedInst, TempReg)
1026         .addImm(NegFrameSize >> 16);
1027       BuildMI(MBB, MBBI, dl, OrImmInst, TempReg)
1028         .addReg(TempReg, RegState::Kill)
1029         .addImm(NegFrameSize & 0xFFFF);
1030       BuildMI(MBB, MBBI, dl, SubtractCarryingInst, ScratchReg)
1031         .addReg(ScratchReg, RegState::Kill)
1032         .addReg(TempReg, RegState::Kill);
1033     }
1034 
1035     BuildMI(MBB, MBBI, dl, StoreUpdtIdxInst, SPReg)
1036       .addReg(SPReg, RegState::Kill)
1037       .addReg(SPReg)
1038       .addReg(ScratchReg);
1039     HasSTUX = true;
1040 
1041   } else if (!isLargeFrame) {
1042     BuildMI(MBB, StackUpdateLoc, dl, StoreUpdtInst, SPReg)
1043       .addReg(SPReg)
1044       .addImm(NegFrameSize)
1045       .addReg(SPReg);
1046 
1047   } else {
1048     BuildMI(MBB, MBBI, dl, LoadImmShiftedInst, ScratchReg)
1049       .addImm(NegFrameSize >> 16);
1050     BuildMI(MBB, MBBI, dl, OrImmInst, ScratchReg)
1051       .addReg(ScratchReg, RegState::Kill)
1052       .addImm(NegFrameSize & 0xFFFF);
1053     BuildMI(MBB, MBBI, dl, StoreUpdtIdxInst, SPReg)
1054       .addReg(SPReg, RegState::Kill)
1055       .addReg(SPReg)
1056       .addReg(ScratchReg);
1057     HasSTUX = true;
1058   }
1059 
1060   // Save the TOC register after the stack pointer update if a prologue TOC
1061   // save is required for the function.
1062   if (MustSaveTOC) {
1063     assert(isELFv2ABI && "TOC saves in the prologue only supported on ELFv2");
1064     BuildMI(MBB, StackUpdateLoc, dl, TII.get(PPC::STD))
1065       .addReg(TOCReg, getKillRegState(true))
1066       .addImm(TOCSaveOffset)
1067       .addReg(SPReg);
1068   }
1069 
1070   if (!HasRedZone) {
1071     assert(!isPPC64 && "A red zone is always available on PPC64");
1072     if (HasSTUX) {
1073       // The negated frame size is in ScratchReg, and the SPReg has been
1074       // decremented by the frame size: SPReg = old SPReg + ScratchReg.
1075       // Since FPOffset, PBPOffset, etc. are relative to the beginning of
1076       // the stack frame (i.e. the old SP), ideally, we would put the old
1077       // SP into a register and use it as the base for the stores. The
1078       // problem is that the only available register may be ScratchReg,
1079       // which could be R0, and R0 cannot be used as a base address.
1080 
1081       // First, set ScratchReg to the old SP. This may need to be modified
1082       // later.
1083       BuildMI(MBB, MBBI, dl, TII.get(PPC::SUBF), ScratchReg)
1084         .addReg(ScratchReg, RegState::Kill)
1085         .addReg(SPReg);
1086 
1087       if (ScratchReg == PPC::R0) {
1088         // R0 cannot be used as a base register, but it can be used as an
1089         // index in a store-indexed.
1090         int LastOffset = 0;
1091         if (HasFP)  {
1092           // R0 += (FPOffset-LastOffset).
1093           // Need addic, since addi treats R0 as 0.
1094           BuildMI(MBB, MBBI, dl, TII.get(PPC::ADDIC), ScratchReg)
1095             .addReg(ScratchReg)
1096             .addImm(FPOffset-LastOffset);
1097           LastOffset = FPOffset;
1098           // Store FP into *R0.
1099           BuildMI(MBB, MBBI, dl, TII.get(PPC::STWX))
1100             .addReg(FPReg, RegState::Kill)  // Save FP.
1101             .addReg(PPC::ZERO)
1102             .addReg(ScratchReg);  // This will be the index (R0 is ok here).
1103         }
1104         if (FI->usesPICBase()) {
1105           // R0 += (PBPOffset-LastOffset).
1106           BuildMI(MBB, MBBI, dl, TII.get(PPC::ADDIC), ScratchReg)
1107             .addReg(ScratchReg)
1108             .addImm(PBPOffset-LastOffset);
1109           LastOffset = PBPOffset;
1110           BuildMI(MBB, MBBI, dl, TII.get(PPC::STWX))
1111             .addReg(PPC::R30, RegState::Kill)  // Save PIC base pointer.
1112             .addReg(PPC::ZERO)
1113             .addReg(ScratchReg);  // This will be the index (R0 is ok here).
1114         }
1115         if (HasBP) {
1116           // R0 += (BPOffset-LastOffset).
1117           BuildMI(MBB, MBBI, dl, TII.get(PPC::ADDIC), ScratchReg)
1118             .addReg(ScratchReg)
1119             .addImm(BPOffset-LastOffset);
1120           LastOffset = BPOffset;
1121           BuildMI(MBB, MBBI, dl, TII.get(PPC::STWX))
1122             .addReg(BPReg, RegState::Kill)  // Save BP.
1123             .addReg(PPC::ZERO)
1124             .addReg(ScratchReg);  // This will be the index (R0 is ok here).
1125           // BP = R0-LastOffset
1126           BuildMI(MBB, MBBI, dl, TII.get(PPC::ADDIC), BPReg)
1127             .addReg(ScratchReg, RegState::Kill)
1128             .addImm(-LastOffset);
1129         }
1130       } else {
1131         // ScratchReg is not R0, so use it as the base register. It is
1132         // already set to the old SP, so we can use the offsets directly.
1133 
1134         // Now that the stack frame has been allocated, save all the necessary
1135         // registers using ScratchReg as the base address.
1136         if (HasFP)
1137           BuildMI(MBB, MBBI, dl, StoreInst)
1138             .addReg(FPReg)
1139             .addImm(FPOffset)
1140             .addReg(ScratchReg);
1141         if (FI->usesPICBase())
1142           BuildMI(MBB, MBBI, dl, StoreInst)
1143             .addReg(PPC::R30)
1144             .addImm(PBPOffset)
1145             .addReg(ScratchReg);
1146         if (HasBP) {
1147           BuildMI(MBB, MBBI, dl, StoreInst)
1148             .addReg(BPReg)
1149             .addImm(BPOffset)
1150             .addReg(ScratchReg);
1151           BuildMI(MBB, MBBI, dl, OrInst, BPReg)
1152             .addReg(ScratchReg, RegState::Kill)
1153             .addReg(ScratchReg);
1154         }
1155       }
1156     } else {
1157       // The frame size is a known 16-bit constant (fitting in the immediate
1158       // field of STWU). To be here we have to be compiling for PPC32.
1159       // Since the SPReg has been decreased by FrameSize, add it back to each
1160       // offset.
1161       if (HasFP)
1162         BuildMI(MBB, MBBI, dl, StoreInst)
1163           .addReg(FPReg)
1164           .addImm(FrameSize + FPOffset)
1165           .addReg(SPReg);
1166       if (FI->usesPICBase())
1167         BuildMI(MBB, MBBI, dl, StoreInst)
1168           .addReg(PPC::R30)
1169           .addImm(FrameSize + PBPOffset)
1170           .addReg(SPReg);
1171       if (HasBP) {
1172         BuildMI(MBB, MBBI, dl, StoreInst)
1173           .addReg(BPReg)
1174           .addImm(FrameSize + BPOffset)
1175           .addReg(SPReg);
1176         BuildMI(MBB, MBBI, dl, TII.get(PPC::ADDI), BPReg)
1177           .addReg(SPReg)
1178           .addImm(FrameSize);
1179       }
1180     }
1181   }
1182 
1183   // Add Call Frame Information for the instructions we generated above.
1184   if (needsCFI) {
1185     unsigned CFIIndex;
1186 
1187     if (HasBP) {
1188       // Define CFA in terms of BP. Do this in preference to using FP/SP,
1189       // because if the stack needed aligning then CFA won't be at a fixed
1190       // offset from FP/SP.
1191       unsigned Reg = MRI->getDwarfRegNum(BPReg, true);
1192       CFIIndex = MF.addFrameInst(
1193           MCCFIInstruction::createDefCfaRegister(nullptr, Reg));
1194     } else {
1195       // Adjust the definition of CFA to account for the change in SP.
1196       assert(NegFrameSize);
1197       CFIIndex = MF.addFrameInst(
1198           MCCFIInstruction::createDefCfaOffset(nullptr, NegFrameSize));
1199     }
1200     BuildMI(MBB, MBBI, dl, TII.get(TargetOpcode::CFI_INSTRUCTION))
1201         .addCFIIndex(CFIIndex);
1202 
1203     if (HasFP) {
1204       // Describe where FP was saved, at a fixed offset from CFA.
1205       unsigned Reg = MRI->getDwarfRegNum(FPReg, true);
1206       CFIIndex = MF.addFrameInst(
1207           MCCFIInstruction::createOffset(nullptr, Reg, FPOffset));
1208       BuildMI(MBB, MBBI, dl, TII.get(TargetOpcode::CFI_INSTRUCTION))
1209           .addCFIIndex(CFIIndex);
1210     }
1211 
1212     if (FI->usesPICBase()) {
1213       // Describe where FP was saved, at a fixed offset from CFA.
1214       unsigned Reg = MRI->getDwarfRegNum(PPC::R30, true);
1215       CFIIndex = MF.addFrameInst(
1216           MCCFIInstruction::createOffset(nullptr, Reg, PBPOffset));
1217       BuildMI(MBB, MBBI, dl, TII.get(TargetOpcode::CFI_INSTRUCTION))
1218           .addCFIIndex(CFIIndex);
1219     }
1220 
1221     if (HasBP) {
1222       // Describe where BP was saved, at a fixed offset from CFA.
1223       unsigned Reg = MRI->getDwarfRegNum(BPReg, true);
1224       CFIIndex = MF.addFrameInst(
1225           MCCFIInstruction::createOffset(nullptr, Reg, BPOffset));
1226       BuildMI(MBB, MBBI, dl, TII.get(TargetOpcode::CFI_INSTRUCTION))
1227           .addCFIIndex(CFIIndex);
1228     }
1229 
1230     if (MustSaveLR) {
1231       // Describe where LR was saved, at a fixed offset from CFA.
1232       unsigned Reg = MRI->getDwarfRegNum(LRReg, true);
1233       CFIIndex = MF.addFrameInst(
1234           MCCFIInstruction::createOffset(nullptr, Reg, LROffset));
1235       BuildMI(MBB, MBBI, dl, TII.get(TargetOpcode::CFI_INSTRUCTION))
1236           .addCFIIndex(CFIIndex);
1237     }
1238   }
1239 
1240   // If there is a frame pointer, copy R1 into R31
1241   if (HasFP) {
1242     BuildMI(MBB, MBBI, dl, OrInst, FPReg)
1243       .addReg(SPReg)
1244       .addReg(SPReg);
1245 
1246     if (!HasBP && needsCFI) {
1247       // Change the definition of CFA from SP+offset to FP+offset, because SP
1248       // will change at every alloca.
1249       unsigned Reg = MRI->getDwarfRegNum(FPReg, true);
1250       unsigned CFIIndex = MF.addFrameInst(
1251           MCCFIInstruction::createDefCfaRegister(nullptr, Reg));
1252 
1253       BuildMI(MBB, MBBI, dl, TII.get(TargetOpcode::CFI_INSTRUCTION))
1254           .addCFIIndex(CFIIndex);
1255     }
1256   }
1257 
1258   if (needsCFI) {
1259     // Describe where callee saved registers were saved, at fixed offsets from
1260     // CFA.
1261     const std::vector<CalleeSavedInfo> &CSI = MFI.getCalleeSavedInfo();
1262     for (unsigned I = 0, E = CSI.size(); I != E; ++I) {
1263       unsigned Reg = CSI[I].getReg();
1264       if (Reg == PPC::LR || Reg == PPC::LR8 || Reg == PPC::RM) continue;
1265 
1266       // This is a bit of a hack: CR2LT, CR2GT, CR2EQ and CR2UN are just
1267       // subregisters of CR2. We just need to emit a move of CR2.
1268       if (PPC::CRBITRCRegClass.contains(Reg))
1269         continue;
1270 
1271       if ((Reg == PPC::X2 || Reg == PPC::R2) && MustSaveTOC)
1272         continue;
1273 
1274       // For SVR4, don't emit a move for the CR spill slot if we haven't
1275       // spilled CRs.
1276       if (isSVR4ABI && (PPC::CR2 <= Reg && Reg <= PPC::CR4)
1277           && !MustSaveCR)
1278         continue;
1279 
1280       // For 64-bit SVR4 when we have spilled CRs, the spill location
1281       // is SP+8, not a frame-relative slot.
1282       if (isSVR4ABI && isPPC64 && (PPC::CR2 <= Reg && Reg <= PPC::CR4)) {
1283         // In the ELFv1 ABI, only CR2 is noted in CFI and stands in for
1284         // the whole CR word.  In the ELFv2 ABI, every CR that was
1285         // actually saved gets its own CFI record.
1286         unsigned CRReg = isELFv2ABI? Reg : (unsigned) PPC::CR2;
1287         unsigned CFIIndex = MF.addFrameInst(MCCFIInstruction::createOffset(
1288             nullptr, MRI->getDwarfRegNum(CRReg, true), CRSaveOffset));
1289         BuildMI(MBB, MBBI, dl, TII.get(TargetOpcode::CFI_INSTRUCTION))
1290             .addCFIIndex(CFIIndex);
1291         continue;
1292       }
1293 
1294       if (CSI[I].isSpilledToReg()) {
1295         unsigned SpilledReg = CSI[I].getDstReg();
1296         unsigned CFIRegister = MF.addFrameInst(MCCFIInstruction::createRegister(
1297             nullptr, MRI->getDwarfRegNum(Reg, true),
1298             MRI->getDwarfRegNum(SpilledReg, true)));
1299         BuildMI(MBB, MBBI, dl, TII.get(TargetOpcode::CFI_INSTRUCTION))
1300           .addCFIIndex(CFIRegister);
1301       } else {
1302         int Offset = MFI.getObjectOffset(CSI[I].getFrameIdx());
1303         // We have changed the object offset above but we do not want to change
1304         // the actual offsets in the CFI instruction so we have to undo the
1305         // offset change here.
1306         if (MovingStackUpdateDown)
1307           Offset -= NegFrameSize;
1308 
1309         unsigned CFIIndex = MF.addFrameInst(MCCFIInstruction::createOffset(
1310             nullptr, MRI->getDwarfRegNum(Reg, true), Offset));
1311         BuildMI(MBB, MBBI, dl, TII.get(TargetOpcode::CFI_INSTRUCTION))
1312             .addCFIIndex(CFIIndex);
1313       }
1314     }
1315   }
1316 }
1317 
1318 void PPCFrameLowering::emitEpilogue(MachineFunction &MF,
1319                                     MachineBasicBlock &MBB) const {
1320   MachineBasicBlock::iterator MBBI = MBB.getFirstTerminator();
1321   DebugLoc dl;
1322 
1323   if (MBBI != MBB.end())
1324     dl = MBBI->getDebugLoc();
1325 
1326   const PPCInstrInfo &TII = *Subtarget.getInstrInfo();
1327   const PPCRegisterInfo *RegInfo = Subtarget.getRegisterInfo();
1328 
1329   // Get alignment info so we know how to restore the SP.
1330   const MachineFrameInfo &MFI = MF.getFrameInfo();
1331 
1332   // Get the number of bytes allocated from the FrameInfo.
1333   int FrameSize = MFI.getStackSize();
1334 
1335   // Get processor type.
1336   bool isPPC64 = Subtarget.isPPC64();
1337   // Get the ABI.
1338   bool isSVR4ABI = Subtarget.isSVR4ABI();
1339 
1340   // Check if the link register (LR) has been saved.
1341   PPCFunctionInfo *FI = MF.getInfo<PPCFunctionInfo>();
1342   bool MustSaveLR = FI->mustSaveLR();
1343   const SmallVectorImpl<unsigned> &MustSaveCRs = FI->getMustSaveCRs();
1344   bool MustSaveCR = !MustSaveCRs.empty();
1345   // Do we have a frame pointer and/or base pointer for this function?
1346   bool HasFP = hasFP(MF);
1347   bool HasBP = RegInfo->hasBasePointer(MF);
1348   bool HasRedZone = Subtarget.isPPC64() || !Subtarget.isSVR4ABI();
1349 
1350   unsigned SPReg      = isPPC64 ? PPC::X1  : PPC::R1;
1351   Register BPReg = RegInfo->getBaseRegister(MF);
1352   unsigned FPReg      = isPPC64 ? PPC::X31 : PPC::R31;
1353   unsigned ScratchReg = 0;
1354   unsigned TempReg     = isPPC64 ? PPC::X12 : PPC::R12; // another scratch reg
1355   const MCInstrDesc& MTLRInst = TII.get( isPPC64 ? PPC::MTLR8
1356                                                  : PPC::MTLR );
1357   const MCInstrDesc& LoadInst = TII.get( isPPC64 ? PPC::LD
1358                                                  : PPC::LWZ );
1359   const MCInstrDesc& LoadImmShiftedInst = TII.get( isPPC64 ? PPC::LIS8
1360                                                            : PPC::LIS );
1361   const MCInstrDesc& OrInst = TII.get(isPPC64 ? PPC::OR8
1362                                               : PPC::OR );
1363   const MCInstrDesc& OrImmInst = TII.get( isPPC64 ? PPC::ORI8
1364                                                   : PPC::ORI );
1365   const MCInstrDesc& AddImmInst = TII.get( isPPC64 ? PPC::ADDI8
1366                                                    : PPC::ADDI );
1367   const MCInstrDesc& AddInst = TII.get( isPPC64 ? PPC::ADD8
1368                                                 : PPC::ADD4 );
1369   const MCInstrDesc& LoadWordInst = TII.get( isPPC64 ? PPC::LWZ8
1370                                                      : PPC::LWZ);
1371   const MCInstrDesc& MoveToCRInst = TII.get( isPPC64 ? PPC::MTOCRF8
1372                                                      : PPC::MTOCRF);
1373   int LROffset = getReturnSaveOffset();
1374 
1375   int FPOffset = 0;
1376 
1377   // Using the same bool variable as below to suppress compiler warnings.
1378   bool SingleScratchReg = findScratchRegister(&MBB, true, false, &ScratchReg,
1379                                               &TempReg);
1380   assert(SingleScratchReg &&
1381          "Could not find an available scratch register");
1382 
1383   SingleScratchReg = ScratchReg == TempReg;
1384 
1385   if (HasFP) {
1386     if (isSVR4ABI) {
1387       int FPIndex = FI->getFramePointerSaveIndex();
1388       assert(FPIndex && "No Frame Pointer Save Slot!");
1389       FPOffset = MFI.getObjectOffset(FPIndex);
1390     } else {
1391       FPOffset = getFramePointerSaveOffset();
1392     }
1393   }
1394 
1395   int BPOffset = 0;
1396   if (HasBP) {
1397     if (isSVR4ABI) {
1398       int BPIndex = FI->getBasePointerSaveIndex();
1399       assert(BPIndex && "No Base Pointer Save Slot!");
1400       BPOffset = MFI.getObjectOffset(BPIndex);
1401     } else {
1402       BPOffset = getBasePointerSaveOffset();
1403     }
1404   }
1405 
1406   int PBPOffset = 0;
1407   if (FI->usesPICBase()) {
1408     int PBPIndex = FI->getPICBasePointerSaveIndex();
1409     assert(PBPIndex && "No PIC Base Pointer Save Slot!");
1410     PBPOffset = MFI.getObjectOffset(PBPIndex);
1411   }
1412 
1413   bool IsReturnBlock = (MBBI != MBB.end() && MBBI->isReturn());
1414 
1415   if (IsReturnBlock) {
1416     unsigned RetOpcode = MBBI->getOpcode();
1417     bool UsesTCRet =  RetOpcode == PPC::TCRETURNri ||
1418                       RetOpcode == PPC::TCRETURNdi ||
1419                       RetOpcode == PPC::TCRETURNai ||
1420                       RetOpcode == PPC::TCRETURNri8 ||
1421                       RetOpcode == PPC::TCRETURNdi8 ||
1422                       RetOpcode == PPC::TCRETURNai8;
1423 
1424     if (UsesTCRet) {
1425       int MaxTCRetDelta = FI->getTailCallSPDelta();
1426       MachineOperand &StackAdjust = MBBI->getOperand(1);
1427       assert(StackAdjust.isImm() && "Expecting immediate value.");
1428       // Adjust stack pointer.
1429       int StackAdj = StackAdjust.getImm();
1430       int Delta = StackAdj - MaxTCRetDelta;
1431       assert((Delta >= 0) && "Delta must be positive");
1432       if (MaxTCRetDelta>0)
1433         FrameSize += (StackAdj +Delta);
1434       else
1435         FrameSize += StackAdj;
1436     }
1437   }
1438 
1439   // Frames of 32KB & larger require special handling because they cannot be
1440   // indexed into with a simple LD/LWZ immediate offset operand.
1441   bool isLargeFrame = !isInt<16>(FrameSize);
1442 
1443   // On targets without red zone, the SP needs to be restored last, so that
1444   // all live contents of the stack frame are upwards of the SP. This means
1445   // that we cannot restore SP just now, since there may be more registers
1446   // to restore from the stack frame (e.g. R31). If the frame size is not
1447   // a simple immediate value, we will need a spare register to hold the
1448   // restored SP. If the frame size is known and small, we can simply adjust
1449   // the offsets of the registers to be restored, and still use SP to restore
1450   // them. In such case, the final update of SP will be to add the frame
1451   // size to it.
1452   // To simplify the code, set RBReg to the base register used to restore
1453   // values from the stack, and set SPAdd to the value that needs to be added
1454   // to the SP at the end. The default values are as if red zone was present.
1455   unsigned RBReg = SPReg;
1456   unsigned SPAdd = 0;
1457 
1458   // Check if we can move the stack update instruction up the epilogue
1459   // past the callee saves. This will allow the move to LR instruction
1460   // to be executed before the restores of the callee saves which means
1461   // that the callee saves can hide the latency from the MTLR instrcution.
1462   MachineBasicBlock::iterator StackUpdateLoc = MBBI;
1463   if (stackUpdateCanBeMoved(MF)) {
1464     const std::vector<CalleeSavedInfo> & Info = MFI.getCalleeSavedInfo();
1465     for (CalleeSavedInfo CSI : Info) {
1466       int FrIdx = CSI.getFrameIdx();
1467       // If the frame index is not negative the callee saved info belongs to a
1468       // stack object that is not a fixed stack object. We ignore non-fixed
1469       // stack objects because we won't move the update of the stack pointer
1470       // past them.
1471       if (FrIdx >= 0)
1472         continue;
1473 
1474       if (MFI.isFixedObjectIndex(FrIdx) && MFI.getObjectOffset(FrIdx) < 0)
1475         StackUpdateLoc--;
1476       else {
1477         // Abort the operation as we can't update all CSR restores.
1478         StackUpdateLoc = MBBI;
1479         break;
1480       }
1481     }
1482   }
1483 
1484   if (FrameSize) {
1485     // In the prologue, the loaded (or persistent) stack pointer value is
1486     // offset by the STDU/STDUX/STWU/STWUX instruction. For targets with red
1487     // zone add this offset back now.
1488 
1489     // If this function contained a fastcc call and GuaranteedTailCallOpt is
1490     // enabled (=> hasFastCall()==true) the fastcc call might contain a tail
1491     // call which invalidates the stack pointer value in SP(0). So we use the
1492     // value of R31 in this case.
1493     if (FI->hasFastCall()) {
1494       assert(HasFP && "Expecting a valid frame pointer.");
1495       if (!HasRedZone)
1496         RBReg = FPReg;
1497       if (!isLargeFrame) {
1498         BuildMI(MBB, MBBI, dl, AddImmInst, RBReg)
1499           .addReg(FPReg).addImm(FrameSize);
1500       } else {
1501         BuildMI(MBB, MBBI, dl, LoadImmShiftedInst, ScratchReg)
1502           .addImm(FrameSize >> 16);
1503         BuildMI(MBB, MBBI, dl, OrImmInst, ScratchReg)
1504           .addReg(ScratchReg, RegState::Kill)
1505           .addImm(FrameSize & 0xFFFF);
1506         BuildMI(MBB, MBBI, dl, AddInst)
1507           .addReg(RBReg)
1508           .addReg(FPReg)
1509           .addReg(ScratchReg);
1510       }
1511     } else if (!isLargeFrame && !HasBP && !MFI.hasVarSizedObjects()) {
1512       if (HasRedZone) {
1513         BuildMI(MBB, StackUpdateLoc, dl, AddImmInst, SPReg)
1514           .addReg(SPReg)
1515           .addImm(FrameSize);
1516       } else {
1517         // Make sure that adding FrameSize will not overflow the max offset
1518         // size.
1519         assert(FPOffset <= 0 && BPOffset <= 0 && PBPOffset <= 0 &&
1520                "Local offsets should be negative");
1521         SPAdd = FrameSize;
1522         FPOffset += FrameSize;
1523         BPOffset += FrameSize;
1524         PBPOffset += FrameSize;
1525       }
1526     } else {
1527       // We don't want to use ScratchReg as a base register, because it
1528       // could happen to be R0. Use FP instead, but make sure to preserve it.
1529       if (!HasRedZone) {
1530         // If FP is not saved, copy it to ScratchReg.
1531         if (!HasFP)
1532           BuildMI(MBB, MBBI, dl, OrInst, ScratchReg)
1533             .addReg(FPReg)
1534             .addReg(FPReg);
1535         RBReg = FPReg;
1536       }
1537       BuildMI(MBB, StackUpdateLoc, dl, LoadInst, RBReg)
1538         .addImm(0)
1539         .addReg(SPReg);
1540     }
1541   }
1542   assert(RBReg != ScratchReg && "Should have avoided ScratchReg");
1543   // If there is no red zone, ScratchReg may be needed for holding a useful
1544   // value (although not the base register). Make sure it is not overwritten
1545   // too early.
1546 
1547   // If we need to restore both the LR and the CR and we only have one
1548   // available scratch register, we must do them one at a time.
1549   if (MustSaveCR && SingleScratchReg && MustSaveLR) {
1550     // Here TempReg == ScratchReg, and in the absence of red zone ScratchReg
1551     // is live here.
1552     assert(HasRedZone && "Expecting red zone");
1553     BuildMI(MBB, MBBI, dl, LoadWordInst, TempReg)
1554       .addImm(CRSaveOffset)
1555       .addReg(SPReg);
1556     for (unsigned i = 0, e = MustSaveCRs.size(); i != e; ++i)
1557       BuildMI(MBB, MBBI, dl, MoveToCRInst, MustSaveCRs[i])
1558         .addReg(TempReg, getKillRegState(i == e-1));
1559   }
1560 
1561   // Delay restoring of the LR if ScratchReg is needed. This is ok, since
1562   // LR is stored in the caller's stack frame. ScratchReg will be needed
1563   // if RBReg is anything other than SP. We shouldn't use ScratchReg as
1564   // a base register anyway, because it may happen to be R0.
1565   bool LoadedLR = false;
1566   if (MustSaveLR && RBReg == SPReg && isInt<16>(LROffset+SPAdd)) {
1567     BuildMI(MBB, StackUpdateLoc, dl, LoadInst, ScratchReg)
1568       .addImm(LROffset+SPAdd)
1569       .addReg(RBReg);
1570     LoadedLR = true;
1571   }
1572 
1573   if (MustSaveCR && !(SingleScratchReg && MustSaveLR)) {
1574     assert(RBReg == SPReg && "Should be using SP as a base register");
1575     BuildMI(MBB, MBBI, dl, LoadWordInst, TempReg)
1576       .addImm(CRSaveOffset)
1577       .addReg(RBReg);
1578   }
1579 
1580   if (HasFP) {
1581     // If there is red zone, restore FP directly, since SP has already been
1582     // restored. Otherwise, restore the value of FP into ScratchReg.
1583     if (HasRedZone || RBReg == SPReg)
1584       BuildMI(MBB, MBBI, dl, LoadInst, FPReg)
1585         .addImm(FPOffset)
1586         .addReg(SPReg);
1587     else
1588       BuildMI(MBB, MBBI, dl, LoadInst, ScratchReg)
1589         .addImm(FPOffset)
1590         .addReg(RBReg);
1591   }
1592 
1593   if (FI->usesPICBase())
1594     BuildMI(MBB, MBBI, dl, LoadInst, PPC::R30)
1595       .addImm(PBPOffset)
1596       .addReg(RBReg);
1597 
1598   if (HasBP)
1599     BuildMI(MBB, MBBI, dl, LoadInst, BPReg)
1600       .addImm(BPOffset)
1601       .addReg(RBReg);
1602 
1603   // There is nothing more to be loaded from the stack, so now we can
1604   // restore SP: SP = RBReg + SPAdd.
1605   if (RBReg != SPReg || SPAdd != 0) {
1606     assert(!HasRedZone && "This should not happen with red zone");
1607     // If SPAdd is 0, generate a copy.
1608     if (SPAdd == 0)
1609       BuildMI(MBB, MBBI, dl, OrInst, SPReg)
1610         .addReg(RBReg)
1611         .addReg(RBReg);
1612     else
1613       BuildMI(MBB, MBBI, dl, AddImmInst, SPReg)
1614         .addReg(RBReg)
1615         .addImm(SPAdd);
1616 
1617     assert(RBReg != ScratchReg && "Should be using FP or SP as base register");
1618     if (RBReg == FPReg)
1619       BuildMI(MBB, MBBI, dl, OrInst, FPReg)
1620         .addReg(ScratchReg)
1621         .addReg(ScratchReg);
1622 
1623     // Now load the LR from the caller's stack frame.
1624     if (MustSaveLR && !LoadedLR)
1625       BuildMI(MBB, MBBI, dl, LoadInst, ScratchReg)
1626         .addImm(LROffset)
1627         .addReg(SPReg);
1628   }
1629 
1630   if (MustSaveCR &&
1631       !(SingleScratchReg && MustSaveLR))
1632     for (unsigned i = 0, e = MustSaveCRs.size(); i != e; ++i)
1633       BuildMI(MBB, MBBI, dl, MoveToCRInst, MustSaveCRs[i])
1634         .addReg(TempReg, getKillRegState(i == e-1));
1635 
1636   if (MustSaveLR)
1637     BuildMI(MBB, StackUpdateLoc, dl, MTLRInst).addReg(ScratchReg);
1638 
1639   // Callee pop calling convention. Pop parameter/linkage area. Used for tail
1640   // call optimization
1641   if (IsReturnBlock) {
1642     unsigned RetOpcode = MBBI->getOpcode();
1643     if (MF.getTarget().Options.GuaranteedTailCallOpt &&
1644         (RetOpcode == PPC::BLR || RetOpcode == PPC::BLR8) &&
1645         MF.getFunction().getCallingConv() == CallingConv::Fast) {
1646       PPCFunctionInfo *FI = MF.getInfo<PPCFunctionInfo>();
1647       unsigned CallerAllocatedAmt = FI->getMinReservedArea();
1648 
1649       if (CallerAllocatedAmt && isInt<16>(CallerAllocatedAmt)) {
1650         BuildMI(MBB, MBBI, dl, AddImmInst, SPReg)
1651           .addReg(SPReg).addImm(CallerAllocatedAmt);
1652       } else {
1653         BuildMI(MBB, MBBI, dl, LoadImmShiftedInst, ScratchReg)
1654           .addImm(CallerAllocatedAmt >> 16);
1655         BuildMI(MBB, MBBI, dl, OrImmInst, ScratchReg)
1656           .addReg(ScratchReg, RegState::Kill)
1657           .addImm(CallerAllocatedAmt & 0xFFFF);
1658         BuildMI(MBB, MBBI, dl, AddInst)
1659           .addReg(SPReg)
1660           .addReg(FPReg)
1661           .addReg(ScratchReg);
1662       }
1663     } else {
1664       createTailCallBranchInstr(MBB);
1665     }
1666   }
1667 }
1668 
1669 void PPCFrameLowering::createTailCallBranchInstr(MachineBasicBlock &MBB) const {
1670   MachineBasicBlock::iterator MBBI = MBB.getFirstTerminator();
1671 
1672   // If we got this far a first terminator should exist.
1673   assert(MBBI != MBB.end() && "Failed to find the first terminator.");
1674 
1675   DebugLoc dl = MBBI->getDebugLoc();
1676   const PPCInstrInfo &TII = *Subtarget.getInstrInfo();
1677 
1678   // Create branch instruction for pseudo tail call return instruction
1679   unsigned RetOpcode = MBBI->getOpcode();
1680   if (RetOpcode == PPC::TCRETURNdi) {
1681     MBBI = MBB.getLastNonDebugInstr();
1682     MachineOperand &JumpTarget = MBBI->getOperand(0);
1683     BuildMI(MBB, MBBI, dl, TII.get(PPC::TAILB)).
1684       addGlobalAddress(JumpTarget.getGlobal(), JumpTarget.getOffset());
1685   } else if (RetOpcode == PPC::TCRETURNri) {
1686     MBBI = MBB.getLastNonDebugInstr();
1687     assert(MBBI->getOperand(0).isReg() && "Expecting register operand.");
1688     BuildMI(MBB, MBBI, dl, TII.get(PPC::TAILBCTR));
1689   } else if (RetOpcode == PPC::TCRETURNai) {
1690     MBBI = MBB.getLastNonDebugInstr();
1691     MachineOperand &JumpTarget = MBBI->getOperand(0);
1692     BuildMI(MBB, MBBI, dl, TII.get(PPC::TAILBA)).addImm(JumpTarget.getImm());
1693   } else if (RetOpcode == PPC::TCRETURNdi8) {
1694     MBBI = MBB.getLastNonDebugInstr();
1695     MachineOperand &JumpTarget = MBBI->getOperand(0);
1696     BuildMI(MBB, MBBI, dl, TII.get(PPC::TAILB8)).
1697       addGlobalAddress(JumpTarget.getGlobal(), JumpTarget.getOffset());
1698   } else if (RetOpcode == PPC::TCRETURNri8) {
1699     MBBI = MBB.getLastNonDebugInstr();
1700     assert(MBBI->getOperand(0).isReg() && "Expecting register operand.");
1701     BuildMI(MBB, MBBI, dl, TII.get(PPC::TAILBCTR8));
1702   } else if (RetOpcode == PPC::TCRETURNai8) {
1703     MBBI = MBB.getLastNonDebugInstr();
1704     MachineOperand &JumpTarget = MBBI->getOperand(0);
1705     BuildMI(MBB, MBBI, dl, TII.get(PPC::TAILBA8)).addImm(JumpTarget.getImm());
1706   }
1707 }
1708 
1709 void PPCFrameLowering::determineCalleeSaves(MachineFunction &MF,
1710                                             BitVector &SavedRegs,
1711                                             RegScavenger *RS) const {
1712   TargetFrameLowering::determineCalleeSaves(MF, SavedRegs, RS);
1713 
1714   const PPCRegisterInfo *RegInfo = Subtarget.getRegisterInfo();
1715 
1716   //  Save and clear the LR state.
1717   PPCFunctionInfo *FI = MF.getInfo<PPCFunctionInfo>();
1718   unsigned LR = RegInfo->getRARegister();
1719   FI->setMustSaveLR(MustSaveLR(MF, LR));
1720   SavedRegs.reset(LR);
1721 
1722   //  Save R31 if necessary
1723   int FPSI = FI->getFramePointerSaveIndex();
1724   const bool isPPC64 = Subtarget.isPPC64();
1725   MachineFrameInfo &MFI = MF.getFrameInfo();
1726 
1727   // If the frame pointer save index hasn't been defined yet.
1728   if (!FPSI && needsFP(MF)) {
1729     // Find out what the fix offset of the frame pointer save area.
1730     int FPOffset = getFramePointerSaveOffset();
1731     // Allocate the frame index for frame pointer save area.
1732     FPSI = MFI.CreateFixedObject(isPPC64? 8 : 4, FPOffset, true);
1733     // Save the result.
1734     FI->setFramePointerSaveIndex(FPSI);
1735   }
1736 
1737   int BPSI = FI->getBasePointerSaveIndex();
1738   if (!BPSI && RegInfo->hasBasePointer(MF)) {
1739     int BPOffset = getBasePointerSaveOffset();
1740     // Allocate the frame index for the base pointer save area.
1741     BPSI = MFI.CreateFixedObject(isPPC64? 8 : 4, BPOffset, true);
1742     // Save the result.
1743     FI->setBasePointerSaveIndex(BPSI);
1744   }
1745 
1746   // Reserve stack space for the PIC Base register (R30).
1747   // Only used in SVR4 32-bit.
1748   if (FI->usesPICBase()) {
1749     int PBPSI = MFI.CreateFixedObject(4, -8, true);
1750     FI->setPICBasePointerSaveIndex(PBPSI);
1751   }
1752 
1753   // Make sure we don't explicitly spill r31, because, for example, we have
1754   // some inline asm which explicitly clobbers it, when we otherwise have a
1755   // frame pointer and are using r31's spill slot for the prologue/epilogue
1756   // code. Same goes for the base pointer and the PIC base register.
1757   if (needsFP(MF))
1758     SavedRegs.reset(isPPC64 ? PPC::X31 : PPC::R31);
1759   if (RegInfo->hasBasePointer(MF))
1760     SavedRegs.reset(RegInfo->getBaseRegister(MF));
1761   if (FI->usesPICBase())
1762     SavedRegs.reset(PPC::R30);
1763 
1764   // Reserve stack space to move the linkage area to in case of a tail call.
1765   int TCSPDelta = 0;
1766   if (MF.getTarget().Options.GuaranteedTailCallOpt &&
1767       (TCSPDelta = FI->getTailCallSPDelta()) < 0) {
1768     MFI.CreateFixedObject(-1 * TCSPDelta, TCSPDelta, true);
1769   }
1770 
1771   // Allocate the nonvolatile CR spill slot iff the function uses CR 2, 3, or 4.
1772   // For 64-bit SVR4, and all flavors of AIX we create a FixedStack
1773   // object at the offset of the CR-save slot in the linkage area. The actual
1774   // save and restore of the condition register will be created as part of the
1775   // prologue and epilogue insertion, but the FixedStack object is needed to
1776   // keep the CalleSavedInfo valid.
1777   if ((SavedRegs.test(PPC::CR2) || SavedRegs.test(PPC::CR3) ||
1778        SavedRegs.test(PPC::CR4))) {
1779     const uint64_t SpillSize = 4; // Condition register is always 4 bytes.
1780     const int64_t SpillOffset =
1781         Subtarget.isPPC64() ? 8 : Subtarget.isAIXABI() ? 4 : -4;
1782     int FrameIdx =
1783         MFI.CreateFixedObject(SpillSize, SpillOffset,
1784                               /* IsImmutable */ true, /* IsAliased */ false);
1785     FI->setCRSpillFrameIndex(FrameIdx);
1786   }
1787 }
1788 
1789 void PPCFrameLowering::processFunctionBeforeFrameFinalized(MachineFunction &MF,
1790                                                        RegScavenger *RS) const {
1791   // Early exit if not using the SVR4 ABI.
1792   if (!Subtarget.isSVR4ABI()) {
1793     addScavengingSpillSlot(MF, RS);
1794     return;
1795   }
1796 
1797   // Get callee saved register information.
1798   MachineFrameInfo &MFI = MF.getFrameInfo();
1799   const std::vector<CalleeSavedInfo> &CSI = MFI.getCalleeSavedInfo();
1800 
1801   // If the function is shrink-wrapped, and if the function has a tail call, the
1802   // tail call might not be in the new RestoreBlock, so real branch instruction
1803   // won't be generated by emitEpilogue(), because shrink-wrap has chosen new
1804   // RestoreBlock. So we handle this case here.
1805   if (MFI.getSavePoint() && MFI.hasTailCall()) {
1806     MachineBasicBlock *RestoreBlock = MFI.getRestorePoint();
1807     for (MachineBasicBlock &MBB : MF) {
1808       if (MBB.isReturnBlock() && (&MBB) != RestoreBlock)
1809         createTailCallBranchInstr(MBB);
1810     }
1811   }
1812 
1813   // Early exit if no callee saved registers are modified!
1814   if (CSI.empty() && !needsFP(MF)) {
1815     addScavengingSpillSlot(MF, RS);
1816     return;
1817   }
1818 
1819   unsigned MinGPR = PPC::R31;
1820   unsigned MinG8R = PPC::X31;
1821   unsigned MinFPR = PPC::F31;
1822   unsigned MinVR = Subtarget.hasSPE() ? PPC::S31 : PPC::V31;
1823 
1824   bool HasGPSaveArea = false;
1825   bool HasG8SaveArea = false;
1826   bool HasFPSaveArea = false;
1827   bool HasVRSAVESaveArea = false;
1828   bool HasVRSaveArea = false;
1829 
1830   SmallVector<CalleeSavedInfo, 18> GPRegs;
1831   SmallVector<CalleeSavedInfo, 18> G8Regs;
1832   SmallVector<CalleeSavedInfo, 18> FPRegs;
1833   SmallVector<CalleeSavedInfo, 18> VRegs;
1834 
1835   for (unsigned i = 0, e = CSI.size(); i != e; ++i) {
1836     unsigned Reg = CSI[i].getReg();
1837     assert((!MF.getInfo<PPCFunctionInfo>()->mustSaveTOC() ||
1838             (Reg != PPC::X2 && Reg != PPC::R2)) &&
1839            "Not expecting to try to spill R2 in a function that must save TOC");
1840     if (PPC::GPRCRegClass.contains(Reg)) {
1841       HasGPSaveArea = true;
1842 
1843       GPRegs.push_back(CSI[i]);
1844 
1845       if (Reg < MinGPR) {
1846         MinGPR = Reg;
1847       }
1848     } else if (PPC::G8RCRegClass.contains(Reg)) {
1849       HasG8SaveArea = true;
1850 
1851       G8Regs.push_back(CSI[i]);
1852 
1853       if (Reg < MinG8R) {
1854         MinG8R = Reg;
1855       }
1856     } else if (PPC::F8RCRegClass.contains(Reg)) {
1857       HasFPSaveArea = true;
1858 
1859       FPRegs.push_back(CSI[i]);
1860 
1861       if (Reg < MinFPR) {
1862         MinFPR = Reg;
1863       }
1864     } else if (PPC::CRBITRCRegClass.contains(Reg) ||
1865                PPC::CRRCRegClass.contains(Reg)) {
1866       ; // do nothing, as we already know whether CRs are spilled
1867     } else if (PPC::VRSAVERCRegClass.contains(Reg)) {
1868       HasVRSAVESaveArea = true;
1869     } else if (PPC::VRRCRegClass.contains(Reg) ||
1870                PPC::SPERCRegClass.contains(Reg)) {
1871       // Altivec and SPE are mutually exclusive, but have the same stack
1872       // alignment requirements, so overload the save area for both cases.
1873       HasVRSaveArea = true;
1874 
1875       VRegs.push_back(CSI[i]);
1876 
1877       if (Reg < MinVR) {
1878         MinVR = Reg;
1879       }
1880     } else {
1881       llvm_unreachable("Unknown RegisterClass!");
1882     }
1883   }
1884 
1885   PPCFunctionInfo *PFI = MF.getInfo<PPCFunctionInfo>();
1886   const TargetRegisterInfo *TRI = Subtarget.getRegisterInfo();
1887 
1888   int64_t LowerBound = 0;
1889 
1890   // Take into account stack space reserved for tail calls.
1891   int TCSPDelta = 0;
1892   if (MF.getTarget().Options.GuaranteedTailCallOpt &&
1893       (TCSPDelta = PFI->getTailCallSPDelta()) < 0) {
1894     LowerBound = TCSPDelta;
1895   }
1896 
1897   // The Floating-point register save area is right below the back chain word
1898   // of the previous stack frame.
1899   if (HasFPSaveArea) {
1900     for (unsigned i = 0, e = FPRegs.size(); i != e; ++i) {
1901       int FI = FPRegs[i].getFrameIdx();
1902 
1903       MFI.setObjectOffset(FI, LowerBound + MFI.getObjectOffset(FI));
1904     }
1905 
1906     LowerBound -= (31 - TRI->getEncodingValue(MinFPR) + 1) * 8;
1907   }
1908 
1909   // Check whether the frame pointer register is allocated. If so, make sure it
1910   // is spilled to the correct offset.
1911   if (needsFP(MF)) {
1912     int FI = PFI->getFramePointerSaveIndex();
1913     assert(FI && "No Frame Pointer Save Slot!");
1914     MFI.setObjectOffset(FI, LowerBound + MFI.getObjectOffset(FI));
1915     // FP is R31/X31, so no need to update MinGPR/MinG8R.
1916     HasGPSaveArea = true;
1917   }
1918 
1919   if (PFI->usesPICBase()) {
1920     int FI = PFI->getPICBasePointerSaveIndex();
1921     assert(FI && "No PIC Base Pointer Save Slot!");
1922     MFI.setObjectOffset(FI, LowerBound + MFI.getObjectOffset(FI));
1923 
1924     MinGPR = std::min<unsigned>(MinGPR, PPC::R30);
1925     HasGPSaveArea = true;
1926   }
1927 
1928   const PPCRegisterInfo *RegInfo = Subtarget.getRegisterInfo();
1929   if (RegInfo->hasBasePointer(MF)) {
1930     int FI = PFI->getBasePointerSaveIndex();
1931     assert(FI && "No Base Pointer Save Slot!");
1932     MFI.setObjectOffset(FI, LowerBound + MFI.getObjectOffset(FI));
1933 
1934     Register BP = RegInfo->getBaseRegister(MF);
1935     if (PPC::G8RCRegClass.contains(BP)) {
1936       MinG8R = std::min<unsigned>(MinG8R, BP);
1937       HasG8SaveArea = true;
1938     } else if (PPC::GPRCRegClass.contains(BP)) {
1939       MinGPR = std::min<unsigned>(MinGPR, BP);
1940       HasGPSaveArea = true;
1941     }
1942   }
1943 
1944   // General register save area starts right below the Floating-point
1945   // register save area.
1946   if (HasGPSaveArea || HasG8SaveArea) {
1947     // Move general register save area spill slots down, taking into account
1948     // the size of the Floating-point register save area.
1949     for (unsigned i = 0, e = GPRegs.size(); i != e; ++i) {
1950       if (!GPRegs[i].isSpilledToReg()) {
1951         int FI = GPRegs[i].getFrameIdx();
1952         MFI.setObjectOffset(FI, LowerBound + MFI.getObjectOffset(FI));
1953       }
1954     }
1955 
1956     // Move general register save area spill slots down, taking into account
1957     // the size of the Floating-point register save area.
1958     for (unsigned i = 0, e = G8Regs.size(); i != e; ++i) {
1959       if (!G8Regs[i].isSpilledToReg()) {
1960         int FI = G8Regs[i].getFrameIdx();
1961         MFI.setObjectOffset(FI, LowerBound + MFI.getObjectOffset(FI));
1962       }
1963     }
1964 
1965     unsigned MinReg =
1966       std::min<unsigned>(TRI->getEncodingValue(MinGPR),
1967                          TRI->getEncodingValue(MinG8R));
1968 
1969     if (Subtarget.isPPC64()) {
1970       LowerBound -= (31 - MinReg + 1) * 8;
1971     } else {
1972       LowerBound -= (31 - MinReg + 1) * 4;
1973     }
1974   }
1975 
1976   // For 32-bit only, the CR save area is below the general register
1977   // save area.  For 64-bit SVR4, the CR save area is addressed relative
1978   // to the stack pointer and hence does not need an adjustment here.
1979   // Only CR2 (the first nonvolatile spilled) has an associated frame
1980   // index so that we have a single uniform save area.
1981   if (spillsCR(MF) && Subtarget.is32BitELFABI()) {
1982     // Adjust the frame index of the CR spill slot.
1983     for (const auto &CSInfo : CSI) {
1984       if (CSInfo.getReg() == PPC::CR2) {
1985         int FI = CSInfo.getFrameIdx();
1986         MFI.setObjectOffset(FI, LowerBound + MFI.getObjectOffset(FI));
1987         break;
1988       }
1989     }
1990 
1991     LowerBound -= 4; // The CR save area is always 4 bytes long.
1992   }
1993 
1994   if (HasVRSAVESaveArea) {
1995     // FIXME SVR4: Is it actually possible to have multiple elements in CSI
1996     //             which have the VRSAVE register class?
1997     // Adjust the frame index of the VRSAVE spill slot.
1998     for (unsigned i = 0, e = CSI.size(); i != e; ++i) {
1999       unsigned Reg = CSI[i].getReg();
2000 
2001       if (PPC::VRSAVERCRegClass.contains(Reg)) {
2002         int FI = CSI[i].getFrameIdx();
2003 
2004         MFI.setObjectOffset(FI, LowerBound + MFI.getObjectOffset(FI));
2005       }
2006     }
2007 
2008     LowerBound -= 4; // The VRSAVE save area is always 4 bytes long.
2009   }
2010 
2011   // Both Altivec and SPE have the same alignment and padding requirements
2012   // within the stack frame.
2013   if (HasVRSaveArea) {
2014     // Insert alignment padding, we need 16-byte alignment. Note: for positive
2015     // number the alignment formula is : y = (x + (n-1)) & (~(n-1)). But since
2016     // we are using negative number here (the stack grows downward). We should
2017     // use formula : y = x & (~(n-1)). Where x is the size before aligning, n
2018     // is the alignment size ( n = 16 here) and y is the size after aligning.
2019     assert(LowerBound <= 0 && "Expect LowerBound have a non-positive value!");
2020     LowerBound &= ~(15);
2021 
2022     for (unsigned i = 0, e = VRegs.size(); i != e; ++i) {
2023       int FI = VRegs[i].getFrameIdx();
2024 
2025       MFI.setObjectOffset(FI, LowerBound + MFI.getObjectOffset(FI));
2026     }
2027   }
2028 
2029   addScavengingSpillSlot(MF, RS);
2030 }
2031 
2032 void
2033 PPCFrameLowering::addScavengingSpillSlot(MachineFunction &MF,
2034                                          RegScavenger *RS) const {
2035   // Reserve a slot closest to SP or frame pointer if we have a dynalloc or
2036   // a large stack, which will require scavenging a register to materialize a
2037   // large offset.
2038 
2039   // We need to have a scavenger spill slot for spills if the frame size is
2040   // large. In case there is no free register for large-offset addressing,
2041   // this slot is used for the necessary emergency spill. Also, we need the
2042   // slot for dynamic stack allocations.
2043 
2044   // The scavenger might be invoked if the frame offset does not fit into
2045   // the 16-bit immediate. We don't know the complete frame size here
2046   // because we've not yet computed callee-saved register spills or the
2047   // needed alignment padding.
2048   unsigned StackSize = determineFrameLayout(MF, true);
2049   MachineFrameInfo &MFI = MF.getFrameInfo();
2050   if (MFI.hasVarSizedObjects() || spillsCR(MF) || spillsVRSAVE(MF) ||
2051       hasNonRISpills(MF) || (hasSpills(MF) && !isInt<16>(StackSize))) {
2052     const TargetRegisterClass &GPRC = PPC::GPRCRegClass;
2053     const TargetRegisterClass &G8RC = PPC::G8RCRegClass;
2054     const TargetRegisterClass &RC = Subtarget.isPPC64() ? G8RC : GPRC;
2055     const TargetRegisterInfo &TRI = *Subtarget.getRegisterInfo();
2056     unsigned Size = TRI.getSpillSize(RC);
2057     unsigned Align = TRI.getSpillAlignment(RC);
2058     RS->addScavengingFrameIndex(MFI.CreateStackObject(Size, Align, false));
2059 
2060     // Might we have over-aligned allocas?
2061     bool HasAlVars = MFI.hasVarSizedObjects() &&
2062                      MFI.getMaxAlignment() > getStackAlignment();
2063 
2064     // These kinds of spills might need two registers.
2065     if (spillsCR(MF) || spillsVRSAVE(MF) || HasAlVars)
2066       RS->addScavengingFrameIndex(MFI.CreateStackObject(Size, Align, false));
2067 
2068   }
2069 }
2070 
2071 // This function checks if a callee saved gpr can be spilled to a volatile
2072 // vector register. This occurs for leaf functions when the option
2073 // ppc-enable-pe-vector-spills is enabled. If there are any remaining registers
2074 // which were not spilled to vectors, return false so the target independent
2075 // code can handle them by assigning a FrameIdx to a stack slot.
2076 bool PPCFrameLowering::assignCalleeSavedSpillSlots(
2077     MachineFunction &MF, const TargetRegisterInfo *TRI,
2078     std::vector<CalleeSavedInfo> &CSI) const {
2079 
2080   if (CSI.empty())
2081     return true; // Early exit if no callee saved registers are modified!
2082 
2083   // Early exit if cannot spill gprs to volatile vector registers.
2084   MachineFrameInfo &MFI = MF.getFrameInfo();
2085   if (!EnablePEVectorSpills || MFI.hasCalls() || !Subtarget.hasP9Vector())
2086     return false;
2087 
2088   // Build a BitVector of VSRs that can be used for spilling GPRs.
2089   BitVector BVAllocatable = TRI->getAllocatableSet(MF);
2090   BitVector BVCalleeSaved(TRI->getNumRegs());
2091   const PPCRegisterInfo *RegInfo = Subtarget.getRegisterInfo();
2092   const MCPhysReg *CSRegs = RegInfo->getCalleeSavedRegs(&MF);
2093   for (unsigned i = 0; CSRegs[i]; ++i)
2094     BVCalleeSaved.set(CSRegs[i]);
2095 
2096   for (unsigned Reg : BVAllocatable.set_bits()) {
2097     // Set to 0 if the register is not a volatile VF/F8 register, or if it is
2098     // used in the function.
2099     if (BVCalleeSaved[Reg] ||
2100         (!PPC::F8RCRegClass.contains(Reg) &&
2101          !PPC::VFRCRegClass.contains(Reg)) ||
2102         (MF.getRegInfo().isPhysRegUsed(Reg)))
2103       BVAllocatable.reset(Reg);
2104   }
2105 
2106   bool AllSpilledToReg = true;
2107   for (auto &CS : CSI) {
2108     if (BVAllocatable.none())
2109       return false;
2110 
2111     unsigned Reg = CS.getReg();
2112     if (!PPC::G8RCRegClass.contains(Reg) && !PPC::GPRCRegClass.contains(Reg)) {
2113       AllSpilledToReg = false;
2114       continue;
2115     }
2116 
2117     unsigned VolatileVFReg = BVAllocatable.find_first();
2118     if (VolatileVFReg < BVAllocatable.size()) {
2119       CS.setDstReg(VolatileVFReg);
2120       BVAllocatable.reset(VolatileVFReg);
2121     } else {
2122       AllSpilledToReg = false;
2123     }
2124   }
2125   return AllSpilledToReg;
2126 }
2127 
2128 bool PPCFrameLowering::spillCalleeSavedRegisters(
2129     MachineBasicBlock &MBB, MachineBasicBlock::iterator MI,
2130     ArrayRef<CalleeSavedInfo> CSI, const TargetRegisterInfo *TRI) const {
2131 
2132   MachineFunction *MF = MBB.getParent();
2133   const PPCInstrInfo &TII = *Subtarget.getInstrInfo();
2134   PPCFunctionInfo *FI = MF->getInfo<PPCFunctionInfo>();
2135   bool MustSaveTOC = FI->mustSaveTOC();
2136   DebugLoc DL;
2137   bool CRSpilled = false;
2138   MachineInstrBuilder CRMIB;
2139 
2140   for (unsigned i = 0, e = CSI.size(); i != e; ++i) {
2141     unsigned Reg = CSI[i].getReg();
2142     // VRSAVE can appear here if, for example, @llvm.eh.unwind.init() is used.
2143     if (Reg == PPC::VRSAVE)
2144       continue;
2145 
2146     // CR2 through CR4 are the nonvolatile CR fields.
2147     bool IsCRField = PPC::CR2 <= Reg && Reg <= PPC::CR4;
2148 
2149     // Add the callee-saved register as live-in; it's killed at the spill.
2150     // Do not do this for callee-saved registers that are live-in to the
2151     // function because they will already be marked live-in and this will be
2152     // adding it for a second time. It is an error to add the same register
2153     // to the set more than once.
2154     const MachineRegisterInfo &MRI = MF->getRegInfo();
2155     bool IsLiveIn = MRI.isLiveIn(Reg);
2156     if (!IsLiveIn)
2157        MBB.addLiveIn(Reg);
2158 
2159     if (CRSpilled && IsCRField) {
2160       CRMIB.addReg(Reg, RegState::ImplicitKill);
2161       continue;
2162     }
2163 
2164     // The actual spill will happen in the prologue.
2165     if ((Reg == PPC::X2 || Reg == PPC::R2) && MustSaveTOC)
2166       continue;
2167 
2168     // Insert the spill to the stack frame.
2169     if (IsCRField) {
2170       PPCFunctionInfo *FuncInfo = MF->getInfo<PPCFunctionInfo>();
2171       if (!Subtarget.is32BitELFABI()) {
2172         // The actual spill will happen at the start of the prologue.
2173         FuncInfo->addMustSaveCR(Reg);
2174       } else {
2175         CRSpilled = true;
2176         FuncInfo->setSpillsCR();
2177 
2178         // 32-bit:  FP-relative.  Note that we made sure CR2-CR4 all have
2179         // the same frame index in PPCRegisterInfo::hasReservedSpillSlot.
2180         CRMIB = BuildMI(*MF, DL, TII.get(PPC::MFCR), PPC::R12)
2181                   .addReg(Reg, RegState::ImplicitKill);
2182 
2183         MBB.insert(MI, CRMIB);
2184         MBB.insert(MI, addFrameReference(BuildMI(*MF, DL, TII.get(PPC::STW))
2185                                          .addReg(PPC::R12,
2186                                                  getKillRegState(true)),
2187                                          CSI[i].getFrameIdx()));
2188       }
2189     } else {
2190       if (CSI[i].isSpilledToReg()) {
2191         NumPESpillVSR++;
2192         BuildMI(MBB, MI, DL, TII.get(PPC::MTVSRD), CSI[i].getDstReg())
2193           .addReg(Reg, getKillRegState(true));
2194       } else {
2195         const TargetRegisterClass *RC = TRI->getMinimalPhysRegClass(Reg);
2196         // Use !IsLiveIn for the kill flag.
2197         // We do not want to kill registers that are live in this function
2198         // before their use because they will become undefined registers.
2199         // Functions without NoUnwind need to preserve the order of elements in
2200         // saved vector registers.
2201         if (Subtarget.needsSwapsForVSXMemOps() &&
2202             !MF->getFunction().hasFnAttribute(Attribute::NoUnwind))
2203           TII.storeRegToStackSlotNoUpd(MBB, MI, Reg, !IsLiveIn,
2204                                        CSI[i].getFrameIdx(), RC, TRI);
2205         else
2206           TII.storeRegToStackSlot(MBB, MI, Reg, !IsLiveIn, CSI[i].getFrameIdx(),
2207                                   RC, TRI);
2208       }
2209     }
2210   }
2211   return true;
2212 }
2213 
2214 static void restoreCRs(bool is31, bool CR2Spilled, bool CR3Spilled,
2215                        bool CR4Spilled, MachineBasicBlock &MBB,
2216                        MachineBasicBlock::iterator MI,
2217                        ArrayRef<CalleeSavedInfo> CSI, unsigned CSIIndex) {
2218 
2219   MachineFunction *MF = MBB.getParent();
2220   const PPCInstrInfo &TII = *MF->getSubtarget<PPCSubtarget>().getInstrInfo();
2221   DebugLoc DL;
2222   unsigned MoveReg = PPC::R12;
2223 
2224   // 32-bit:  FP-relative
2225   MBB.insert(MI,
2226              addFrameReference(BuildMI(*MF, DL, TII.get(PPC::LWZ), MoveReg),
2227                                CSI[CSIIndex].getFrameIdx()));
2228 
2229   unsigned RestoreOp = PPC::MTOCRF;
2230   if (CR2Spilled)
2231     MBB.insert(MI, BuildMI(*MF, DL, TII.get(RestoreOp), PPC::CR2)
2232                .addReg(MoveReg, getKillRegState(!CR3Spilled && !CR4Spilled)));
2233 
2234   if (CR3Spilled)
2235     MBB.insert(MI, BuildMI(*MF, DL, TII.get(RestoreOp), PPC::CR3)
2236                .addReg(MoveReg, getKillRegState(!CR4Spilled)));
2237 
2238   if (CR4Spilled)
2239     MBB.insert(MI, BuildMI(*MF, DL, TII.get(RestoreOp), PPC::CR4)
2240                .addReg(MoveReg, getKillRegState(true)));
2241 }
2242 
2243 MachineBasicBlock::iterator PPCFrameLowering::
2244 eliminateCallFramePseudoInstr(MachineFunction &MF, MachineBasicBlock &MBB,
2245                               MachineBasicBlock::iterator I) const {
2246   const TargetInstrInfo &TII = *Subtarget.getInstrInfo();
2247   if (MF.getTarget().Options.GuaranteedTailCallOpt &&
2248       I->getOpcode() == PPC::ADJCALLSTACKUP) {
2249     // Add (actually subtract) back the amount the callee popped on return.
2250     if (int CalleeAmt =  I->getOperand(1).getImm()) {
2251       bool is64Bit = Subtarget.isPPC64();
2252       CalleeAmt *= -1;
2253       unsigned StackReg = is64Bit ? PPC::X1 : PPC::R1;
2254       unsigned TmpReg = is64Bit ? PPC::X0 : PPC::R0;
2255       unsigned ADDIInstr = is64Bit ? PPC::ADDI8 : PPC::ADDI;
2256       unsigned ADDInstr = is64Bit ? PPC::ADD8 : PPC::ADD4;
2257       unsigned LISInstr = is64Bit ? PPC::LIS8 : PPC::LIS;
2258       unsigned ORIInstr = is64Bit ? PPC::ORI8 : PPC::ORI;
2259       const DebugLoc &dl = I->getDebugLoc();
2260 
2261       if (isInt<16>(CalleeAmt)) {
2262         BuildMI(MBB, I, dl, TII.get(ADDIInstr), StackReg)
2263           .addReg(StackReg, RegState::Kill)
2264           .addImm(CalleeAmt);
2265       } else {
2266         MachineBasicBlock::iterator MBBI = I;
2267         BuildMI(MBB, MBBI, dl, TII.get(LISInstr), TmpReg)
2268           .addImm(CalleeAmt >> 16);
2269         BuildMI(MBB, MBBI, dl, TII.get(ORIInstr), TmpReg)
2270           .addReg(TmpReg, RegState::Kill)
2271           .addImm(CalleeAmt & 0xFFFF);
2272         BuildMI(MBB, MBBI, dl, TII.get(ADDInstr), StackReg)
2273           .addReg(StackReg, RegState::Kill)
2274           .addReg(TmpReg);
2275       }
2276     }
2277   }
2278   // Simply discard ADJCALLSTACKDOWN, ADJCALLSTACKUP instructions.
2279   return MBB.erase(I);
2280 }
2281 
2282 static bool isCalleeSavedCR(unsigned Reg) {
2283   return PPC::CR2 == Reg || Reg == PPC::CR3 || Reg == PPC::CR4;
2284 }
2285 
2286 bool
2287 PPCFrameLowering::restoreCalleeSavedRegisters(MachineBasicBlock &MBB,
2288                                         MachineBasicBlock::iterator MI,
2289                                         std::vector<CalleeSavedInfo> &CSI,
2290                                         const TargetRegisterInfo *TRI) const {
2291   MachineFunction *MF = MBB.getParent();
2292   const PPCInstrInfo &TII = *Subtarget.getInstrInfo();
2293   PPCFunctionInfo *FI = MF->getInfo<PPCFunctionInfo>();
2294   bool MustSaveTOC = FI->mustSaveTOC();
2295   bool CR2Spilled = false;
2296   bool CR3Spilled = false;
2297   bool CR4Spilled = false;
2298   unsigned CSIIndex = 0;
2299 
2300   // Initialize insertion-point logic; we will be restoring in reverse
2301   // order of spill.
2302   MachineBasicBlock::iterator I = MI, BeforeI = I;
2303   bool AtStart = I == MBB.begin();
2304 
2305   if (!AtStart)
2306     --BeforeI;
2307 
2308   for (unsigned i = 0, e = CSI.size(); i != e; ++i) {
2309     unsigned Reg = CSI[i].getReg();
2310 
2311     // VRSAVE can appear here if, for example, @llvm.eh.unwind.init() is used.
2312     if (Reg == PPC::VRSAVE)
2313       continue;
2314 
2315     if ((Reg == PPC::X2 || Reg == PPC::R2) && MustSaveTOC)
2316       continue;
2317 
2318     // Restore of callee saved condition register field is handled during
2319     // epilogue insertion.
2320     if (isCalleeSavedCR(Reg) && !Subtarget.is32BitELFABI())
2321       continue;
2322 
2323     if (Reg == PPC::CR2) {
2324       CR2Spilled = true;
2325       // The spill slot is associated only with CR2, which is the
2326       // first nonvolatile spilled.  Save it here.
2327       CSIIndex = i;
2328       continue;
2329     } else if (Reg == PPC::CR3) {
2330       CR3Spilled = true;
2331       continue;
2332     } else if (Reg == PPC::CR4) {
2333       CR4Spilled = true;
2334       continue;
2335     } else {
2336       // On 32-bit ELF when we first encounter a non-CR register after seeing at
2337       // least one CR register, restore all spilled CRs together.
2338       if (CR2Spilled || CR3Spilled || CR4Spilled) {
2339         bool is31 = needsFP(*MF);
2340         restoreCRs(is31, CR2Spilled, CR3Spilled, CR4Spilled, MBB, I, CSI,
2341                    CSIIndex);
2342         CR2Spilled = CR3Spilled = CR4Spilled = false;
2343       }
2344 
2345       if (CSI[i].isSpilledToReg()) {
2346         DebugLoc DL;
2347         NumPEReloadVSR++;
2348         BuildMI(MBB, I, DL, TII.get(PPC::MFVSRD), Reg)
2349             .addReg(CSI[i].getDstReg(), getKillRegState(true));
2350       } else {
2351        // Default behavior for non-CR saves.
2352         const TargetRegisterClass *RC = TRI->getMinimalPhysRegClass(Reg);
2353 
2354         // Functions without NoUnwind need to preserve the order of elements in
2355         // saved vector registers.
2356         if (Subtarget.needsSwapsForVSXMemOps() &&
2357             !MF->getFunction().hasFnAttribute(Attribute::NoUnwind))
2358           TII.loadRegFromStackSlotNoUpd(MBB, I, Reg, CSI[i].getFrameIdx(), RC,
2359                                         TRI);
2360         else
2361           TII.loadRegFromStackSlot(MBB, I, Reg, CSI[i].getFrameIdx(), RC, TRI);
2362 
2363         assert(I != MBB.begin() &&
2364                "loadRegFromStackSlot didn't insert any code!");
2365       }
2366     }
2367 
2368     // Insert in reverse order.
2369     if (AtStart)
2370       I = MBB.begin();
2371     else {
2372       I = BeforeI;
2373       ++I;
2374     }
2375   }
2376 
2377   // If we haven't yet spilled the CRs, do so now.
2378   if (CR2Spilled || CR3Spilled || CR4Spilled) {
2379     assert(Subtarget.is32BitELFABI() &&
2380            "Only set CR[2|3|4]Spilled on 32-bit SVR4.");
2381     bool is31 = needsFP(*MF);
2382     restoreCRs(is31, CR2Spilled, CR3Spilled, CR4Spilled, MBB, I, CSI, CSIIndex);
2383   }
2384 
2385   return true;
2386 }
2387 
2388 unsigned PPCFrameLowering::getTOCSaveOffset() const {
2389   return TOCSaveOffset;
2390 }
2391 
2392 unsigned PPCFrameLowering::getFramePointerSaveOffset() const {
2393   return FramePointerSaveOffset;
2394 }
2395 
2396 unsigned PPCFrameLowering::getBasePointerSaveOffset() const {
2397   if (Subtarget.isAIXABI())
2398     report_fatal_error("BasePointer is not implemented on AIX yet.");
2399   return BasePointerSaveOffset;
2400 }
2401 
2402 bool PPCFrameLowering::enableShrinkWrapping(const MachineFunction &MF) const {
2403   if (MF.getInfo<PPCFunctionInfo>()->shrinkWrapDisabled())
2404     return false;
2405   return (MF.getSubtarget<PPCSubtarget>().isSVR4ABI() &&
2406           MF.getSubtarget<PPCSubtarget>().isPPC64());
2407 }
2408