xref: /llvm-project/llvm/lib/Target/PowerPC/PPCFrameLowering.cpp (revision d000655a8cd58c8449a86a1761038c8c1dd78d87)
1 //===-- PPCFrameLowering.cpp - PPC Frame Information ----------------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This file contains the PPC implementation of TargetFrameLowering class.
10 //
11 //===----------------------------------------------------------------------===//
12 
13 #include "PPCFrameLowering.h"
14 #include "PPCInstrBuilder.h"
15 #include "PPCInstrInfo.h"
16 #include "PPCMachineFunctionInfo.h"
17 #include "PPCSubtarget.h"
18 #include "PPCTargetMachine.h"
19 #include "llvm/ADT/Statistic.h"
20 #include "llvm/CodeGen/MachineFrameInfo.h"
21 #include "llvm/CodeGen/MachineFunction.h"
22 #include "llvm/CodeGen/MachineInstrBuilder.h"
23 #include "llvm/CodeGen/MachineModuleInfo.h"
24 #include "llvm/CodeGen/MachineRegisterInfo.h"
25 #include "llvm/CodeGen/RegisterScavenging.h"
26 #include "llvm/IR/Function.h"
27 #include "llvm/Target/TargetOptions.h"
28 
29 using namespace llvm;
30 
31 #define DEBUG_TYPE "framelowering"
32 STATISTIC(NumPESpillVSR, "Number of spills to vector in prologue");
33 STATISTIC(NumPEReloadVSR, "Number of reloads from vector in epilogue");
34 
35 static cl::opt<bool>
36 EnablePEVectorSpills("ppc-enable-pe-vector-spills",
37                      cl::desc("Enable spills in prologue to vector registers."),
38                      cl::init(false), cl::Hidden);
39 
40 /// VRRegNo - Map from a numbered VR register to its enum value.
41 ///
42 static const MCPhysReg VRRegNo[] = {
43  PPC::V0 , PPC::V1 , PPC::V2 , PPC::V3 , PPC::V4 , PPC::V5 , PPC::V6 , PPC::V7 ,
44  PPC::V8 , PPC::V9 , PPC::V10, PPC::V11, PPC::V12, PPC::V13, PPC::V14, PPC::V15,
45  PPC::V16, PPC::V17, PPC::V18, PPC::V19, PPC::V20, PPC::V21, PPC::V22, PPC::V23,
46  PPC::V24, PPC::V25, PPC::V26, PPC::V27, PPC::V28, PPC::V29, PPC::V30, PPC::V31
47 };
48 
49 static unsigned computeReturnSaveOffset(const PPCSubtarget &STI) {
50   if (STI.isAIXABI())
51     return STI.isPPC64() ? 16 : 8;
52   // SVR4 ABI:
53   return STI.isPPC64() ? 16 : 4;
54 }
55 
56 static unsigned computeTOCSaveOffset(const PPCSubtarget &STI) {
57   if (STI.isAIXABI())
58     return STI.isPPC64() ? 40 : 20;
59   return STI.isELFv2ABI() ? 24 : 40;
60 }
61 
62 static unsigned computeFramePointerSaveOffset(const PPCSubtarget &STI) {
63   // First slot in the general register save area.
64   return STI.isPPC64() ? -8U : -4U;
65 }
66 
67 static unsigned computeLinkageSize(const PPCSubtarget &STI) {
68   if (STI.isAIXABI() || STI.isPPC64())
69     return (STI.isELFv2ABI() ? 4 : 6) * (STI.isPPC64() ? 8 : 4);
70 
71   // 32-bit SVR4 ABI:
72   return 8;
73 }
74 
75 static unsigned computeBasePointerSaveOffset(const PPCSubtarget &STI) {
76   // SVR4 ABI: First slot in the general register save area.
77   return STI.isPPC64()
78              ? -16U
79              : STI.getTargetMachine().isPositionIndependent() ? -12U : -8U;
80 }
81 
82 static unsigned computeCRSaveOffset(const PPCSubtarget &STI) {
83   return (STI.isAIXABI() && !STI.isPPC64()) ? 4 : 8;
84 }
85 
86 PPCFrameLowering::PPCFrameLowering(const PPCSubtarget &STI)
87     : TargetFrameLowering(TargetFrameLowering::StackGrowsDown,
88                           STI.getPlatformStackAlignment(), 0),
89       Subtarget(STI), ReturnSaveOffset(computeReturnSaveOffset(Subtarget)),
90       TOCSaveOffset(computeTOCSaveOffset(Subtarget)),
91       FramePointerSaveOffset(computeFramePointerSaveOffset(Subtarget)),
92       LinkageSize(computeLinkageSize(Subtarget)),
93       BasePointerSaveOffset(computeBasePointerSaveOffset(Subtarget)),
94       CRSaveOffset(computeCRSaveOffset(Subtarget)) {}
95 
96 // With the SVR4 ABI, callee-saved registers have fixed offsets on the stack.
97 const PPCFrameLowering::SpillSlot *PPCFrameLowering::getCalleeSavedSpillSlots(
98     unsigned &NumEntries) const {
99   // Early exit if not using the SVR4 ABI.
100   if (!Subtarget.isSVR4ABI()) {
101     NumEntries = 0;
102     return nullptr;
103   }
104 
105 // Floating-point register save area offsets.
106 #define CALLEE_SAVED_FPRS \
107       {PPC::F31, -8},     \
108       {PPC::F30, -16},    \
109       {PPC::F29, -24},    \
110       {PPC::F28, -32},    \
111       {PPC::F27, -40},    \
112       {PPC::F26, -48},    \
113       {PPC::F25, -56},    \
114       {PPC::F24, -64},    \
115       {PPC::F23, -72},    \
116       {PPC::F22, -80},    \
117       {PPC::F21, -88},    \
118       {PPC::F20, -96},    \
119       {PPC::F19, -104},   \
120       {PPC::F18, -112},   \
121       {PPC::F17, -120},   \
122       {PPC::F16, -128},   \
123       {PPC::F15, -136},   \
124       {PPC::F14, -144}
125 
126 // 32-bit general purpose register save area offsets.
127 #define CALLEE_SAVED_GPRS32 \
128       {PPC::R31, -4},       \
129       {PPC::R30, -8},       \
130       {PPC::R29, -12},      \
131       {PPC::R28, -16},      \
132       {PPC::R27, -20},      \
133       {PPC::R26, -24},      \
134       {PPC::R25, -28},      \
135       {PPC::R24, -32},      \
136       {PPC::R23, -36},      \
137       {PPC::R22, -40},      \
138       {PPC::R21, -44},      \
139       {PPC::R20, -48},      \
140       {PPC::R19, -52},      \
141       {PPC::R18, -56},      \
142       {PPC::R17, -60},      \
143       {PPC::R16, -64},      \
144       {PPC::R15, -68},      \
145       {PPC::R14, -72}
146 
147 // 64-bit general purpose register save area offsets.
148 #define CALLEE_SAVED_GPRS64 \
149       {PPC::X31, -8},       \
150       {PPC::X30, -16},      \
151       {PPC::X29, -24},      \
152       {PPC::X28, -32},      \
153       {PPC::X27, -40},      \
154       {PPC::X26, -48},      \
155       {PPC::X25, -56},      \
156       {PPC::X24, -64},      \
157       {PPC::X23, -72},      \
158       {PPC::X22, -80},      \
159       {PPC::X21, -88},      \
160       {PPC::X20, -96},      \
161       {PPC::X19, -104},     \
162       {PPC::X18, -112},     \
163       {PPC::X17, -120},     \
164       {PPC::X16, -128},     \
165       {PPC::X15, -136},     \
166       {PPC::X14, -144}
167 
168 // Vector register save area offsets.
169 #define CALLEE_SAVED_VRS \
170       {PPC::V31, -16},   \
171       {PPC::V30, -32},   \
172       {PPC::V29, -48},   \
173       {PPC::V28, -64},   \
174       {PPC::V27, -80},   \
175       {PPC::V26, -96},   \
176       {PPC::V25, -112},  \
177       {PPC::V24, -128},  \
178       {PPC::V23, -144},  \
179       {PPC::V22, -160},  \
180       {PPC::V21, -176},  \
181       {PPC::V20, -192}
182 
183   // Note that the offsets here overlap, but this is fixed up in
184   // processFunctionBeforeFrameFinalized.
185 
186   static const SpillSlot Offsets[] = {
187       CALLEE_SAVED_FPRS,
188       CALLEE_SAVED_GPRS32,
189 
190       // CR save area offset.  We map each of the nonvolatile CR fields
191       // to the slot for CR2, which is the first of the nonvolatile CR
192       // fields to be assigned, so that we only allocate one save slot.
193       // See PPCRegisterInfo::hasReservedSpillSlot() for more information.
194       {PPC::CR2, -4},
195 
196       // VRSAVE save area offset.
197       {PPC::VRSAVE, -4},
198 
199       CALLEE_SAVED_VRS,
200 
201       // SPE register save area (overlaps Vector save area).
202       {PPC::S31, -8},
203       {PPC::S30, -16},
204       {PPC::S29, -24},
205       {PPC::S28, -32},
206       {PPC::S27, -40},
207       {PPC::S26, -48},
208       {PPC::S25, -56},
209       {PPC::S24, -64},
210       {PPC::S23, -72},
211       {PPC::S22, -80},
212       {PPC::S21, -88},
213       {PPC::S20, -96},
214       {PPC::S19, -104},
215       {PPC::S18, -112},
216       {PPC::S17, -120},
217       {PPC::S16, -128},
218       {PPC::S15, -136},
219       {PPC::S14, -144}};
220 
221   static const SpillSlot Offsets64[] = {
222       CALLEE_SAVED_FPRS,
223       CALLEE_SAVED_GPRS64,
224 
225       // VRSAVE save area offset.
226       {PPC::VRSAVE, -4},
227 
228       CALLEE_SAVED_VRS
229   };
230 
231   if (Subtarget.isPPC64()) {
232     NumEntries = array_lengthof(Offsets64);
233 
234     return Offsets64;
235   } else {
236     NumEntries = array_lengthof(Offsets);
237 
238     return Offsets;
239   }
240 }
241 
242 /// RemoveVRSaveCode - We have found that this function does not need any code
243 /// to manipulate the VRSAVE register, even though it uses vector registers.
244 /// This can happen when the only registers used are known to be live in or out
245 /// of the function.  Remove all of the VRSAVE related code from the function.
246 /// FIXME: The removal of the code results in a compile failure at -O0 when the
247 /// function contains a function call, as the GPR containing original VRSAVE
248 /// contents is spilled and reloaded around the call.  Without the prolog code,
249 /// the spill instruction refers to an undefined register.  This code needs
250 /// to account for all uses of that GPR.
251 static void RemoveVRSaveCode(MachineInstr &MI) {
252   MachineBasicBlock *Entry = MI.getParent();
253   MachineFunction *MF = Entry->getParent();
254 
255   // We know that the MTVRSAVE instruction immediately follows MI.  Remove it.
256   MachineBasicBlock::iterator MBBI = MI;
257   ++MBBI;
258   assert(MBBI != Entry->end() && MBBI->getOpcode() == PPC::MTVRSAVE);
259   MBBI->eraseFromParent();
260 
261   bool RemovedAllMTVRSAVEs = true;
262   // See if we can find and remove the MTVRSAVE instruction from all of the
263   // epilog blocks.
264   for (MachineFunction::iterator I = MF->begin(), E = MF->end(); I != E; ++I) {
265     // If last instruction is a return instruction, add an epilogue
266     if (I->isReturnBlock()) {
267       bool FoundIt = false;
268       for (MBBI = I->end(); MBBI != I->begin(); ) {
269         --MBBI;
270         if (MBBI->getOpcode() == PPC::MTVRSAVE) {
271           MBBI->eraseFromParent();  // remove it.
272           FoundIt = true;
273           break;
274         }
275       }
276       RemovedAllMTVRSAVEs &= FoundIt;
277     }
278   }
279 
280   // If we found and removed all MTVRSAVE instructions, remove the read of
281   // VRSAVE as well.
282   if (RemovedAllMTVRSAVEs) {
283     MBBI = MI;
284     assert(MBBI != Entry->begin() && "UPDATE_VRSAVE is first instr in block?");
285     --MBBI;
286     assert(MBBI->getOpcode() == PPC::MFVRSAVE && "VRSAVE instrs wandered?");
287     MBBI->eraseFromParent();
288   }
289 
290   // Finally, nuke the UPDATE_VRSAVE.
291   MI.eraseFromParent();
292 }
293 
294 // HandleVRSaveUpdate - MI is the UPDATE_VRSAVE instruction introduced by the
295 // instruction selector.  Based on the vector registers that have been used,
296 // transform this into the appropriate ORI instruction.
297 static void HandleVRSaveUpdate(MachineInstr &MI, const TargetInstrInfo &TII) {
298   MachineFunction *MF = MI.getParent()->getParent();
299   const TargetRegisterInfo *TRI = MF->getSubtarget().getRegisterInfo();
300   DebugLoc dl = MI.getDebugLoc();
301 
302   const MachineRegisterInfo &MRI = MF->getRegInfo();
303   unsigned UsedRegMask = 0;
304   for (unsigned i = 0; i != 32; ++i)
305     if (MRI.isPhysRegModified(VRRegNo[i]))
306       UsedRegMask |= 1 << (31-i);
307 
308   // Live in and live out values already must be in the mask, so don't bother
309   // marking them.
310   for (std::pair<unsigned, unsigned> LI : MF->getRegInfo().liveins()) {
311     unsigned RegNo = TRI->getEncodingValue(LI.first);
312     if (VRRegNo[RegNo] == LI.first)        // If this really is a vector reg.
313       UsedRegMask &= ~(1 << (31-RegNo));   // Doesn't need to be marked.
314   }
315 
316   // Live out registers appear as use operands on return instructions.
317   for (MachineFunction::const_iterator BI = MF->begin(), BE = MF->end();
318        UsedRegMask != 0 && BI != BE; ++BI) {
319     const MachineBasicBlock &MBB = *BI;
320     if (!MBB.isReturnBlock())
321       continue;
322     const MachineInstr &Ret = MBB.back();
323     for (unsigned I = 0, E = Ret.getNumOperands(); I != E; ++I) {
324       const MachineOperand &MO = Ret.getOperand(I);
325       if (!MO.isReg() || !PPC::VRRCRegClass.contains(MO.getReg()))
326         continue;
327       unsigned RegNo = TRI->getEncodingValue(MO.getReg());
328       UsedRegMask &= ~(1 << (31-RegNo));
329     }
330   }
331 
332   // If no registers are used, turn this into a copy.
333   if (UsedRegMask == 0) {
334     // Remove all VRSAVE code.
335     RemoveVRSaveCode(MI);
336     return;
337   }
338 
339   Register SrcReg = MI.getOperand(1).getReg();
340   Register DstReg = MI.getOperand(0).getReg();
341 
342   if ((UsedRegMask & 0xFFFF) == UsedRegMask) {
343     if (DstReg != SrcReg)
344       BuildMI(*MI.getParent(), MI, dl, TII.get(PPC::ORI), DstReg)
345           .addReg(SrcReg)
346           .addImm(UsedRegMask);
347     else
348       BuildMI(*MI.getParent(), MI, dl, TII.get(PPC::ORI), DstReg)
349           .addReg(SrcReg, RegState::Kill)
350           .addImm(UsedRegMask);
351   } else if ((UsedRegMask & 0xFFFF0000) == UsedRegMask) {
352     if (DstReg != SrcReg)
353       BuildMI(*MI.getParent(), MI, dl, TII.get(PPC::ORIS), DstReg)
354           .addReg(SrcReg)
355           .addImm(UsedRegMask >> 16);
356     else
357       BuildMI(*MI.getParent(), MI, dl, TII.get(PPC::ORIS), DstReg)
358           .addReg(SrcReg, RegState::Kill)
359           .addImm(UsedRegMask >> 16);
360   } else {
361     if (DstReg != SrcReg)
362       BuildMI(*MI.getParent(), MI, dl, TII.get(PPC::ORIS), DstReg)
363           .addReg(SrcReg)
364           .addImm(UsedRegMask >> 16);
365     else
366       BuildMI(*MI.getParent(), MI, dl, TII.get(PPC::ORIS), DstReg)
367           .addReg(SrcReg, RegState::Kill)
368           .addImm(UsedRegMask >> 16);
369 
370     BuildMI(*MI.getParent(), MI, dl, TII.get(PPC::ORI), DstReg)
371         .addReg(DstReg, RegState::Kill)
372         .addImm(UsedRegMask & 0xFFFF);
373   }
374 
375   // Remove the old UPDATE_VRSAVE instruction.
376   MI.eraseFromParent();
377 }
378 
379 static bool spillsCR(const MachineFunction &MF) {
380   const PPCFunctionInfo *FuncInfo = MF.getInfo<PPCFunctionInfo>();
381   return FuncInfo->isCRSpilled();
382 }
383 
384 static bool spillsVRSAVE(const MachineFunction &MF) {
385   const PPCFunctionInfo *FuncInfo = MF.getInfo<PPCFunctionInfo>();
386   return FuncInfo->isVRSAVESpilled();
387 }
388 
389 static bool hasSpills(const MachineFunction &MF) {
390   const PPCFunctionInfo *FuncInfo = MF.getInfo<PPCFunctionInfo>();
391   return FuncInfo->hasSpills();
392 }
393 
394 static bool hasNonRISpills(const MachineFunction &MF) {
395   const PPCFunctionInfo *FuncInfo = MF.getInfo<PPCFunctionInfo>();
396   return FuncInfo->hasNonRISpills();
397 }
398 
399 /// MustSaveLR - Return true if this function requires that we save the LR
400 /// register onto the stack in the prolog and restore it in the epilog of the
401 /// function.
402 static bool MustSaveLR(const MachineFunction &MF, unsigned LR) {
403   const PPCFunctionInfo *MFI = MF.getInfo<PPCFunctionInfo>();
404 
405   // We need a save/restore of LR if there is any def of LR (which is
406   // defined by calls, including the PIC setup sequence), or if there is
407   // some use of the LR stack slot (e.g. for builtin_return_address).
408   // (LR comes in 32 and 64 bit versions.)
409   MachineRegisterInfo::def_iterator RI = MF.getRegInfo().def_begin(LR);
410   return RI !=MF.getRegInfo().def_end() || MFI->isLRStoreRequired();
411 }
412 
413 /// determineFrameLayoutAndUpdate - Determine the size of the frame and maximum
414 /// call frame size. Update the MachineFunction object with the stack size.
415 unsigned
416 PPCFrameLowering::determineFrameLayoutAndUpdate(MachineFunction &MF,
417                                                 bool UseEstimate) const {
418   unsigned NewMaxCallFrameSize = 0;
419   unsigned FrameSize = determineFrameLayout(MF, UseEstimate,
420                                             &NewMaxCallFrameSize);
421   MF.getFrameInfo().setStackSize(FrameSize);
422   MF.getFrameInfo().setMaxCallFrameSize(NewMaxCallFrameSize);
423   return FrameSize;
424 }
425 
426 /// determineFrameLayout - Determine the size of the frame and maximum call
427 /// frame size.
428 unsigned
429 PPCFrameLowering::determineFrameLayout(const MachineFunction &MF,
430                                        bool UseEstimate,
431                                        unsigned *NewMaxCallFrameSize) const {
432   const MachineFrameInfo &MFI = MF.getFrameInfo();
433   const PPCFunctionInfo *FI = MF.getInfo<PPCFunctionInfo>();
434 
435   // Get the number of bytes to allocate from the FrameInfo
436   unsigned FrameSize =
437     UseEstimate ? MFI.estimateStackSize(MF) : MFI.getStackSize();
438 
439   // Get stack alignments. The frame must be aligned to the greatest of these:
440   Align TargetAlign = getStackAlign(); // alignment required per the ABI
441   Align MaxAlign = MFI.getMaxAlign();  // algmt required by data in frame
442   Align Alignment = std::max(TargetAlign, MaxAlign);
443 
444   const PPCRegisterInfo *RegInfo = Subtarget.getRegisterInfo();
445 
446   unsigned LR = RegInfo->getRARegister();
447   bool DisableRedZone = MF.getFunction().hasFnAttribute(Attribute::NoRedZone);
448   bool CanUseRedZone = !MFI.hasVarSizedObjects() && // No dynamic alloca.
449                        !MFI.adjustsStack() &&       // No calls.
450                        !MustSaveLR(MF, LR) &&       // No need to save LR.
451                        !FI->mustSaveTOC() &&        // No need to save TOC.
452                        !RegInfo->hasBasePointer(MF); // No special alignment.
453 
454   // Note: for PPC32 SVR4ABI (Non-DarwinABI), we can still generate stackless
455   // code if all local vars are reg-allocated.
456   bool FitsInRedZone = FrameSize <= Subtarget.getRedZoneSize();
457 
458   // Check whether we can skip adjusting the stack pointer (by using red zone)
459   if (!DisableRedZone && CanUseRedZone && FitsInRedZone) {
460     // No need for frame
461     return 0;
462   }
463 
464   // Get the maximum call frame size of all the calls.
465   unsigned maxCallFrameSize = MFI.getMaxCallFrameSize();
466 
467   // Maximum call frame needs to be at least big enough for linkage area.
468   unsigned minCallFrameSize = getLinkageSize();
469   maxCallFrameSize = std::max(maxCallFrameSize, minCallFrameSize);
470 
471   // If we have dynamic alloca then maxCallFrameSize needs to be aligned so
472   // that allocations will be aligned.
473   if (MFI.hasVarSizedObjects())
474     maxCallFrameSize = alignTo(maxCallFrameSize, Alignment);
475 
476   // Update the new max call frame size if the caller passes in a valid pointer.
477   if (NewMaxCallFrameSize)
478     *NewMaxCallFrameSize = maxCallFrameSize;
479 
480   // Include call frame size in total.
481   FrameSize += maxCallFrameSize;
482 
483   // Make sure the frame is aligned.
484   FrameSize = alignTo(FrameSize, Alignment);
485 
486   return FrameSize;
487 }
488 
489 // hasFP - Return true if the specified function actually has a dedicated frame
490 // pointer register.
491 bool PPCFrameLowering::hasFP(const MachineFunction &MF) const {
492   const MachineFrameInfo &MFI = MF.getFrameInfo();
493   // FIXME: This is pretty much broken by design: hasFP() might be called really
494   // early, before the stack layout was calculated and thus hasFP() might return
495   // true or false here depending on the time of call.
496   return (MFI.getStackSize()) && needsFP(MF);
497 }
498 
499 // needsFP - Return true if the specified function should have a dedicated frame
500 // pointer register.  This is true if the function has variable sized allocas or
501 // if frame pointer elimination is disabled.
502 bool PPCFrameLowering::needsFP(const MachineFunction &MF) const {
503   const MachineFrameInfo &MFI = MF.getFrameInfo();
504 
505   // Naked functions have no stack frame pushed, so we don't have a frame
506   // pointer.
507   if (MF.getFunction().hasFnAttribute(Attribute::Naked))
508     return false;
509 
510   return MF.getTarget().Options.DisableFramePointerElim(MF) ||
511     MFI.hasVarSizedObjects() || MFI.hasStackMap() || MFI.hasPatchPoint() ||
512     (MF.getTarget().Options.GuaranteedTailCallOpt &&
513      MF.getInfo<PPCFunctionInfo>()->hasFastCall());
514 }
515 
516 void PPCFrameLowering::replaceFPWithRealFP(MachineFunction &MF) const {
517   bool is31 = needsFP(MF);
518   unsigned FPReg  = is31 ? PPC::R31 : PPC::R1;
519   unsigned FP8Reg = is31 ? PPC::X31 : PPC::X1;
520 
521   const PPCRegisterInfo *RegInfo = Subtarget.getRegisterInfo();
522   bool HasBP = RegInfo->hasBasePointer(MF);
523   unsigned BPReg  = HasBP ? (unsigned) RegInfo->getBaseRegister(MF) : FPReg;
524   unsigned BP8Reg = HasBP ? (unsigned) PPC::X30 : FP8Reg;
525 
526   for (MachineFunction::iterator BI = MF.begin(), BE = MF.end();
527        BI != BE; ++BI)
528     for (MachineBasicBlock::iterator MBBI = BI->end(); MBBI != BI->begin(); ) {
529       --MBBI;
530       for (unsigned I = 0, E = MBBI->getNumOperands(); I != E; ++I) {
531         MachineOperand &MO = MBBI->getOperand(I);
532         if (!MO.isReg())
533           continue;
534 
535         switch (MO.getReg()) {
536         case PPC::FP:
537           MO.setReg(FPReg);
538           break;
539         case PPC::FP8:
540           MO.setReg(FP8Reg);
541           break;
542         case PPC::BP:
543           MO.setReg(BPReg);
544           break;
545         case PPC::BP8:
546           MO.setReg(BP8Reg);
547           break;
548 
549         }
550       }
551     }
552 }
553 
554 /*  This function will do the following:
555     - If MBB is an entry or exit block, set SR1 and SR2 to R0 and R12
556       respectively (defaults recommended by the ABI) and return true
557     - If MBB is not an entry block, initialize the register scavenger and look
558       for available registers.
559     - If the defaults (R0/R12) are available, return true
560     - If TwoUniqueRegsRequired is set to true, it looks for two unique
561       registers. Otherwise, look for a single available register.
562       - If the required registers are found, set SR1 and SR2 and return true.
563       - If the required registers are not found, set SR2 or both SR1 and SR2 to
564         PPC::NoRegister and return false.
565 
566     Note that if both SR1 and SR2 are valid parameters and TwoUniqueRegsRequired
567     is not set, this function will attempt to find two different registers, but
568     still return true if only one register is available (and set SR1 == SR2).
569 */
570 bool
571 PPCFrameLowering::findScratchRegister(MachineBasicBlock *MBB,
572                                       bool UseAtEnd,
573                                       bool TwoUniqueRegsRequired,
574                                       unsigned *SR1,
575                                       unsigned *SR2) const {
576   RegScavenger RS;
577   unsigned R0 =  Subtarget.isPPC64() ? PPC::X0 : PPC::R0;
578   unsigned R12 = Subtarget.isPPC64() ? PPC::X12 : PPC::R12;
579 
580   // Set the defaults for the two scratch registers.
581   if (SR1)
582     *SR1 = R0;
583 
584   if (SR2) {
585     assert (SR1 && "Asking for the second scratch register but not the first?");
586     *SR2 = R12;
587   }
588 
589   // If MBB is an entry or exit block, use R0 and R12 as the scratch registers.
590   if ((UseAtEnd && MBB->isReturnBlock()) ||
591       (!UseAtEnd && (&MBB->getParent()->front() == MBB)))
592     return true;
593 
594   RS.enterBasicBlock(*MBB);
595 
596   if (UseAtEnd && !MBB->empty()) {
597     // The scratch register will be used at the end of the block, so must
598     // consider all registers used within the block
599 
600     MachineBasicBlock::iterator MBBI = MBB->getFirstTerminator();
601     // If no terminator, back iterator up to previous instruction.
602     if (MBBI == MBB->end())
603       MBBI = std::prev(MBBI);
604 
605     if (MBBI != MBB->begin())
606       RS.forward(MBBI);
607   }
608 
609   // If the two registers are available, we're all good.
610   // Note that we only return here if both R0 and R12 are available because
611   // although the function may not require two unique registers, it may benefit
612   // from having two so we should try to provide them.
613   if (!RS.isRegUsed(R0) && !RS.isRegUsed(R12))
614     return true;
615 
616   // Get the list of callee-saved registers for the target.
617   const PPCRegisterInfo *RegInfo = Subtarget.getRegisterInfo();
618   const MCPhysReg *CSRegs = RegInfo->getCalleeSavedRegs(MBB->getParent());
619 
620   // Get all the available registers in the block.
621   BitVector BV = RS.getRegsAvailable(Subtarget.isPPC64() ? &PPC::G8RCRegClass :
622                                      &PPC::GPRCRegClass);
623 
624   // We shouldn't use callee-saved registers as scratch registers as they may be
625   // available when looking for a candidate block for shrink wrapping but not
626   // available when the actual prologue/epilogue is being emitted because they
627   // were added as live-in to the prologue block by PrologueEpilogueInserter.
628   for (int i = 0; CSRegs[i]; ++i)
629     BV.reset(CSRegs[i]);
630 
631   // Set the first scratch register to the first available one.
632   if (SR1) {
633     int FirstScratchReg = BV.find_first();
634     *SR1 = FirstScratchReg == -1 ? (unsigned)PPC::NoRegister : FirstScratchReg;
635   }
636 
637   // If there is another one available, set the second scratch register to that.
638   // Otherwise, set it to either PPC::NoRegister if this function requires two
639   // or to whatever SR1 is set to if this function doesn't require two.
640   if (SR2) {
641     int SecondScratchReg = BV.find_next(*SR1);
642     if (SecondScratchReg != -1)
643       *SR2 = SecondScratchReg;
644     else
645       *SR2 = TwoUniqueRegsRequired ? (unsigned)PPC::NoRegister : *SR1;
646   }
647 
648   // Now that we've done our best to provide both registers, double check
649   // whether we were unable to provide enough.
650   if (BV.count() < (TwoUniqueRegsRequired ? 2U : 1U))
651     return false;
652 
653   return true;
654 }
655 
656 // We need a scratch register for spilling LR and for spilling CR. By default,
657 // we use two scratch registers to hide latency. However, if only one scratch
658 // register is available, we can adjust for that by not overlapping the spill
659 // code. However, if we need to realign the stack (i.e. have a base pointer)
660 // and the stack frame is large, we need two scratch registers.
661 bool
662 PPCFrameLowering::twoUniqueScratchRegsRequired(MachineBasicBlock *MBB) const {
663   const PPCRegisterInfo *RegInfo = Subtarget.getRegisterInfo();
664   MachineFunction &MF = *(MBB->getParent());
665   bool HasBP = RegInfo->hasBasePointer(MF);
666   unsigned FrameSize = determineFrameLayout(MF);
667   int NegFrameSize = -FrameSize;
668   bool IsLargeFrame = !isInt<16>(NegFrameSize);
669   MachineFrameInfo &MFI = MF.getFrameInfo();
670   Align MaxAlign = MFI.getMaxAlign();
671   bool HasRedZone = Subtarget.isPPC64() || !Subtarget.isSVR4ABI();
672 
673   return (IsLargeFrame || !HasRedZone) && HasBP && MaxAlign > 1;
674 }
675 
676 bool PPCFrameLowering::canUseAsPrologue(const MachineBasicBlock &MBB) const {
677   MachineBasicBlock *TmpMBB = const_cast<MachineBasicBlock *>(&MBB);
678 
679   return findScratchRegister(TmpMBB, false,
680                              twoUniqueScratchRegsRequired(TmpMBB));
681 }
682 
683 bool PPCFrameLowering::canUseAsEpilogue(const MachineBasicBlock &MBB) const {
684   MachineBasicBlock *TmpMBB = const_cast<MachineBasicBlock *>(&MBB);
685 
686   return findScratchRegister(TmpMBB, true);
687 }
688 
689 bool PPCFrameLowering::stackUpdateCanBeMoved(MachineFunction &MF) const {
690   const PPCRegisterInfo *RegInfo = Subtarget.getRegisterInfo();
691   PPCFunctionInfo *FI = MF.getInfo<PPCFunctionInfo>();
692 
693   // Abort if there is no register info or function info.
694   if (!RegInfo || !FI)
695     return false;
696 
697   // Only move the stack update on ELFv2 ABI and PPC64.
698   if (!Subtarget.isELFv2ABI() || !Subtarget.isPPC64())
699     return false;
700 
701   // Check the frame size first and return false if it does not fit the
702   // requirements.
703   // We need a non-zero frame size as well as a frame that will fit in the red
704   // zone. This is because by moving the stack pointer update we are now storing
705   // to the red zone until the stack pointer is updated. If we get an interrupt
706   // inside the prologue but before the stack update we now have a number of
707   // stores to the red zone and those stores must all fit.
708   MachineFrameInfo &MFI = MF.getFrameInfo();
709   unsigned FrameSize = MFI.getStackSize();
710   if (!FrameSize || FrameSize > Subtarget.getRedZoneSize())
711     return false;
712 
713   // Frame pointers and base pointers complicate matters so don't do anything
714   // if we have them. For example having a frame pointer will sometimes require
715   // a copy of r1 into r31 and that makes keeping track of updates to r1 more
716   // difficult.
717   if (hasFP(MF) || RegInfo->hasBasePointer(MF))
718     return false;
719 
720   // Calls to fast_cc functions use different rules for passing parameters on
721   // the stack from the ABI and using PIC base in the function imposes
722   // similar restrictions to using the base pointer. It is not generally safe
723   // to move the stack pointer update in these situations.
724   if (FI->hasFastCall() || FI->usesPICBase())
725     return false;
726 
727   // Finally we can move the stack update if we do not require register
728   // scavenging. Register scavenging can introduce more spills and so
729   // may make the frame size larger than we have computed.
730   return !RegInfo->requiresFrameIndexScavenging(MF);
731 }
732 
733 void PPCFrameLowering::emitPrologue(MachineFunction &MF,
734                                     MachineBasicBlock &MBB) const {
735   MachineBasicBlock::iterator MBBI = MBB.begin();
736   MachineFrameInfo &MFI = MF.getFrameInfo();
737   const PPCInstrInfo &TII = *Subtarget.getInstrInfo();
738   const PPCRegisterInfo *RegInfo = Subtarget.getRegisterInfo();
739 
740   MachineModuleInfo &MMI = MF.getMMI();
741   const MCRegisterInfo *MRI = MMI.getContext().getRegisterInfo();
742   DebugLoc dl;
743   // AIX assembler does not support cfi directives.
744   const bool needsCFI = MF.needsFrameMoves() && !Subtarget.isAIXABI();
745 
746   // Get processor type.
747   bool isPPC64 = Subtarget.isPPC64();
748   // Get the ABI.
749   bool isSVR4ABI = Subtarget.isSVR4ABI();
750   bool isAIXABI = Subtarget.isAIXABI();
751   bool isELFv2ABI = Subtarget.isELFv2ABI();
752   assert((isSVR4ABI || isAIXABI) && "Unsupported PPC ABI.");
753 
754   // Scan the prolog, looking for an UPDATE_VRSAVE instruction.  If we find it,
755   // process it.
756   if (!isSVR4ABI)
757     for (unsigned i = 0; MBBI != MBB.end(); ++i, ++MBBI) {
758       if (MBBI->getOpcode() == PPC::UPDATE_VRSAVE) {
759         if (isAIXABI)
760           report_fatal_error("UPDATE_VRSAVE is unexpected on AIX.");
761         HandleVRSaveUpdate(*MBBI, TII);
762         break;
763       }
764     }
765 
766   // Move MBBI back to the beginning of the prologue block.
767   MBBI = MBB.begin();
768 
769   // Work out frame sizes.
770   unsigned FrameSize = determineFrameLayoutAndUpdate(MF);
771   int NegFrameSize = -FrameSize;
772   if (!isInt<32>(NegFrameSize))
773     llvm_unreachable("Unhandled stack size!");
774 
775   if (MFI.isFrameAddressTaken())
776     replaceFPWithRealFP(MF);
777 
778   // Check if the link register (LR) must be saved.
779   PPCFunctionInfo *FI = MF.getInfo<PPCFunctionInfo>();
780   bool MustSaveLR = FI->mustSaveLR();
781   bool MustSaveTOC = FI->mustSaveTOC();
782   const SmallVectorImpl<unsigned> &MustSaveCRs = FI->getMustSaveCRs();
783   bool MustSaveCR = !MustSaveCRs.empty();
784   // Do we have a frame pointer and/or base pointer for this function?
785   bool HasFP = hasFP(MF);
786   bool HasBP = RegInfo->hasBasePointer(MF);
787   bool HasRedZone = isPPC64 || !isSVR4ABI;
788 
789   unsigned SPReg       = isPPC64 ? PPC::X1  : PPC::R1;
790   Register BPReg = RegInfo->getBaseRegister(MF);
791   unsigned FPReg       = isPPC64 ? PPC::X31 : PPC::R31;
792   unsigned LRReg       = isPPC64 ? PPC::LR8 : PPC::LR;
793   unsigned TOCReg      = isPPC64 ? PPC::X2 :  PPC::R2;
794   unsigned ScratchReg  = 0;
795   unsigned TempReg     = isPPC64 ? PPC::X12 : PPC::R12; // another scratch reg
796   //  ...(R12/X12 is volatile in both Darwin & SVR4, & can't be a function arg.)
797   const MCInstrDesc& MFLRInst = TII.get(isPPC64 ? PPC::MFLR8
798                                                 : PPC::MFLR );
799   const MCInstrDesc& StoreInst = TII.get(isPPC64 ? PPC::STD
800                                                  : PPC::STW );
801   const MCInstrDesc& StoreUpdtInst = TII.get(isPPC64 ? PPC::STDU
802                                                      : PPC::STWU );
803   const MCInstrDesc& StoreUpdtIdxInst = TII.get(isPPC64 ? PPC::STDUX
804                                                         : PPC::STWUX);
805   const MCInstrDesc& LoadImmShiftedInst = TII.get(isPPC64 ? PPC::LIS8
806                                                           : PPC::LIS );
807   const MCInstrDesc& OrImmInst = TII.get(isPPC64 ? PPC::ORI8
808                                                  : PPC::ORI );
809   const MCInstrDesc& OrInst = TII.get(isPPC64 ? PPC::OR8
810                                               : PPC::OR );
811   const MCInstrDesc& SubtractCarryingInst = TII.get(isPPC64 ? PPC::SUBFC8
812                                                             : PPC::SUBFC);
813   const MCInstrDesc& SubtractImmCarryingInst = TII.get(isPPC64 ? PPC::SUBFIC8
814                                                                : PPC::SUBFIC);
815   const MCInstrDesc &MoveFromCondRegInst = TII.get(isPPC64 ? PPC::MFCR8
816                                                            : PPC::MFCR);
817   const MCInstrDesc &StoreWordInst = TII.get(isPPC64 ? PPC::STW8 : PPC::STW);
818 
819   // Regarding this assert: Even though LR is saved in the caller's frame (i.e.,
820   // LROffset is positive), that slot is callee-owned. Because PPC32 SVR4 has no
821   // Red Zone, an asynchronous event (a form of "callee") could claim a frame &
822   // overwrite it, so PPC32 SVR4 must claim at least a minimal frame to save LR.
823   assert((isPPC64 || !isSVR4ABI || !(!FrameSize && (MustSaveLR || HasFP))) &&
824          "FrameSize must be >0 to save/restore the FP or LR for 32-bit SVR4.");
825 
826   // Using the same bool variable as below to suppress compiler warnings.
827   bool SingleScratchReg =
828     findScratchRegister(&MBB, false, twoUniqueScratchRegsRequired(&MBB),
829                         &ScratchReg, &TempReg);
830   assert(SingleScratchReg &&
831          "Required number of registers not available in this block");
832 
833   SingleScratchReg = ScratchReg == TempReg;
834 
835   int LROffset = getReturnSaveOffset();
836 
837   int FPOffset = 0;
838   if (HasFP) {
839     if (isSVR4ABI) {
840       MachineFrameInfo &MFI = MF.getFrameInfo();
841       int FPIndex = FI->getFramePointerSaveIndex();
842       assert(FPIndex && "No Frame Pointer Save Slot!");
843       FPOffset = MFI.getObjectOffset(FPIndex);
844     } else {
845       FPOffset = getFramePointerSaveOffset();
846     }
847   }
848 
849   int BPOffset = 0;
850   if (HasBP) {
851     if (isSVR4ABI) {
852       MachineFrameInfo &MFI = MF.getFrameInfo();
853       int BPIndex = FI->getBasePointerSaveIndex();
854       assert(BPIndex && "No Base Pointer Save Slot!");
855       BPOffset = MFI.getObjectOffset(BPIndex);
856     } else {
857       BPOffset = getBasePointerSaveOffset();
858     }
859   }
860 
861   int PBPOffset = 0;
862   if (FI->usesPICBase()) {
863     MachineFrameInfo &MFI = MF.getFrameInfo();
864     int PBPIndex = FI->getPICBasePointerSaveIndex();
865     assert(PBPIndex && "No PIC Base Pointer Save Slot!");
866     PBPOffset = MFI.getObjectOffset(PBPIndex);
867   }
868 
869   // Get stack alignments.
870   Align MaxAlign = MFI.getMaxAlign();
871   if (HasBP && MaxAlign > 1)
872     assert(Log2(MaxAlign) < 16 && "Invalid alignment!");
873 
874   // Frames of 32KB & larger require special handling because they cannot be
875   // indexed into with a simple STDU/STWU/STD/STW immediate offset operand.
876   bool isLargeFrame = !isInt<16>(NegFrameSize);
877 
878   // Check if we can move the stack update instruction (stdu) down the prologue
879   // past the callee saves. Hopefully this will avoid the situation where the
880   // saves are waiting for the update on the store with update to complete.
881   MachineBasicBlock::iterator StackUpdateLoc = MBBI;
882   bool MovingStackUpdateDown = false;
883 
884   // Check if we can move the stack update.
885   if (stackUpdateCanBeMoved(MF)) {
886     const std::vector<CalleeSavedInfo> &Info = MFI.getCalleeSavedInfo();
887     for (CalleeSavedInfo CSI : Info) {
888       int FrIdx = CSI.getFrameIdx();
889       // If the frame index is not negative the callee saved info belongs to a
890       // stack object that is not a fixed stack object. We ignore non-fixed
891       // stack objects because we won't move the stack update pointer past them.
892       if (FrIdx >= 0)
893         continue;
894 
895       if (MFI.isFixedObjectIndex(FrIdx) && MFI.getObjectOffset(FrIdx) < 0) {
896         StackUpdateLoc++;
897         MovingStackUpdateDown = true;
898       } else {
899         // We need all of the Frame Indices to meet these conditions.
900         // If they do not, abort the whole operation.
901         StackUpdateLoc = MBBI;
902         MovingStackUpdateDown = false;
903         break;
904       }
905     }
906 
907     // If the operation was not aborted then update the object offset.
908     if (MovingStackUpdateDown) {
909       for (CalleeSavedInfo CSI : Info) {
910         int FrIdx = CSI.getFrameIdx();
911         if (FrIdx < 0)
912           MFI.setObjectOffset(FrIdx, MFI.getObjectOffset(FrIdx) + NegFrameSize);
913       }
914     }
915   }
916 
917   // Where in the prologue we move the CR fields depends on how many scratch
918   // registers we have, and if we need to save the link register or not. This
919   // lambda is to avoid duplicating the logic in 2 places.
920   auto BuildMoveFromCR = [&]() {
921     if (isELFv2ABI && MustSaveCRs.size() == 1) {
922     // In the ELFv2 ABI, we are not required to save all CR fields.
923     // If only one CR field is clobbered, it is more efficient to use
924     // mfocrf to selectively save just that field, because mfocrf has short
925     // latency compares to mfcr.
926       assert(isPPC64 && "V2 ABI is 64-bit only.");
927       MachineInstrBuilder MIB =
928           BuildMI(MBB, MBBI, dl, TII.get(PPC::MFOCRF8), TempReg);
929       MIB.addReg(MustSaveCRs[0], RegState::Kill);
930     } else {
931       MachineInstrBuilder MIB =
932           BuildMI(MBB, MBBI, dl, MoveFromCondRegInst, TempReg);
933       for (unsigned CRfield : MustSaveCRs)
934         MIB.addReg(CRfield, RegState::ImplicitKill);
935     }
936   };
937 
938   // If we need to spill the CR and the LR but we don't have two separate
939   // registers available, we must spill them one at a time
940   if (MustSaveCR && SingleScratchReg && MustSaveLR) {
941     BuildMoveFromCR();
942     BuildMI(MBB, MBBI, dl, StoreWordInst)
943         .addReg(TempReg, getKillRegState(true))
944         .addImm(CRSaveOffset)
945         .addReg(SPReg);
946   }
947 
948   if (MustSaveLR)
949     BuildMI(MBB, MBBI, dl, MFLRInst, ScratchReg);
950 
951   if (MustSaveCR && !(SingleScratchReg && MustSaveLR))
952     BuildMoveFromCR();
953 
954   if (HasRedZone) {
955     if (HasFP)
956       BuildMI(MBB, MBBI, dl, StoreInst)
957         .addReg(FPReg)
958         .addImm(FPOffset)
959         .addReg(SPReg);
960     if (FI->usesPICBase())
961       BuildMI(MBB, MBBI, dl, StoreInst)
962         .addReg(PPC::R30)
963         .addImm(PBPOffset)
964         .addReg(SPReg);
965     if (HasBP)
966       BuildMI(MBB, MBBI, dl, StoreInst)
967         .addReg(BPReg)
968         .addImm(BPOffset)
969         .addReg(SPReg);
970   }
971 
972   if (MustSaveLR)
973     BuildMI(MBB, StackUpdateLoc, dl, StoreInst)
974       .addReg(ScratchReg, getKillRegState(true))
975       .addImm(LROffset)
976       .addReg(SPReg);
977 
978   if (MustSaveCR &&
979       !(SingleScratchReg && MustSaveLR)) {
980     assert(HasRedZone && "A red zone is always available on PPC64");
981     BuildMI(MBB, MBBI, dl, StoreWordInst)
982       .addReg(TempReg, getKillRegState(true))
983       .addImm(CRSaveOffset)
984       .addReg(SPReg);
985   }
986 
987   // Skip the rest if this is a leaf function & all spills fit in the Red Zone.
988   if (!FrameSize)
989     return;
990 
991   // Adjust stack pointer: r1 += NegFrameSize.
992   // If there is a preferred stack alignment, align R1 now
993 
994   if (HasBP && HasRedZone) {
995     // Save a copy of r1 as the base pointer.
996     BuildMI(MBB, MBBI, dl, OrInst, BPReg)
997       .addReg(SPReg)
998       .addReg(SPReg);
999   }
1000 
1001   // Have we generated a STUX instruction to claim stack frame? If so,
1002   // the negated frame size will be placed in ScratchReg.
1003   bool HasSTUX = false;
1004 
1005   // This condition must be kept in sync with canUseAsPrologue.
1006   if (HasBP && MaxAlign > 1) {
1007     if (isPPC64)
1008       BuildMI(MBB, MBBI, dl, TII.get(PPC::RLDICL), ScratchReg)
1009           .addReg(SPReg)
1010           .addImm(0)
1011           .addImm(64 - Log2(MaxAlign));
1012     else // PPC32...
1013       BuildMI(MBB, MBBI, dl, TII.get(PPC::RLWINM), ScratchReg)
1014           .addReg(SPReg)
1015           .addImm(0)
1016           .addImm(32 - Log2(MaxAlign))
1017           .addImm(31);
1018     if (!isLargeFrame) {
1019       BuildMI(MBB, MBBI, dl, SubtractImmCarryingInst, ScratchReg)
1020         .addReg(ScratchReg, RegState::Kill)
1021         .addImm(NegFrameSize);
1022     } else {
1023       assert(!SingleScratchReg && "Only a single scratch reg available");
1024       BuildMI(MBB, MBBI, dl, LoadImmShiftedInst, TempReg)
1025         .addImm(NegFrameSize >> 16);
1026       BuildMI(MBB, MBBI, dl, OrImmInst, TempReg)
1027         .addReg(TempReg, RegState::Kill)
1028         .addImm(NegFrameSize & 0xFFFF);
1029       BuildMI(MBB, MBBI, dl, SubtractCarryingInst, ScratchReg)
1030         .addReg(ScratchReg, RegState::Kill)
1031         .addReg(TempReg, RegState::Kill);
1032     }
1033 
1034     BuildMI(MBB, MBBI, dl, StoreUpdtIdxInst, SPReg)
1035       .addReg(SPReg, RegState::Kill)
1036       .addReg(SPReg)
1037       .addReg(ScratchReg);
1038     HasSTUX = true;
1039 
1040   } else if (!isLargeFrame) {
1041     BuildMI(MBB, StackUpdateLoc, dl, StoreUpdtInst, SPReg)
1042       .addReg(SPReg)
1043       .addImm(NegFrameSize)
1044       .addReg(SPReg);
1045 
1046   } else {
1047     BuildMI(MBB, MBBI, dl, LoadImmShiftedInst, ScratchReg)
1048       .addImm(NegFrameSize >> 16);
1049     BuildMI(MBB, MBBI, dl, OrImmInst, ScratchReg)
1050       .addReg(ScratchReg, RegState::Kill)
1051       .addImm(NegFrameSize & 0xFFFF);
1052     BuildMI(MBB, MBBI, dl, StoreUpdtIdxInst, SPReg)
1053       .addReg(SPReg, RegState::Kill)
1054       .addReg(SPReg)
1055       .addReg(ScratchReg);
1056     HasSTUX = true;
1057   }
1058 
1059   // Save the TOC register after the stack pointer update if a prologue TOC
1060   // save is required for the function.
1061   if (MustSaveTOC) {
1062     assert(isELFv2ABI && "TOC saves in the prologue only supported on ELFv2");
1063     BuildMI(MBB, StackUpdateLoc, dl, TII.get(PPC::STD))
1064       .addReg(TOCReg, getKillRegState(true))
1065       .addImm(TOCSaveOffset)
1066       .addReg(SPReg);
1067   }
1068 
1069   if (!HasRedZone) {
1070     assert(!isPPC64 && "A red zone is always available on PPC64");
1071     if (HasSTUX) {
1072       // The negated frame size is in ScratchReg, and the SPReg has been
1073       // decremented by the frame size: SPReg = old SPReg + ScratchReg.
1074       // Since FPOffset, PBPOffset, etc. are relative to the beginning of
1075       // the stack frame (i.e. the old SP), ideally, we would put the old
1076       // SP into a register and use it as the base for the stores. The
1077       // problem is that the only available register may be ScratchReg,
1078       // which could be R0, and R0 cannot be used as a base address.
1079 
1080       // First, set ScratchReg to the old SP. This may need to be modified
1081       // later.
1082       BuildMI(MBB, MBBI, dl, TII.get(PPC::SUBF), ScratchReg)
1083         .addReg(ScratchReg, RegState::Kill)
1084         .addReg(SPReg);
1085 
1086       if (ScratchReg == PPC::R0) {
1087         // R0 cannot be used as a base register, but it can be used as an
1088         // index in a store-indexed.
1089         int LastOffset = 0;
1090         if (HasFP)  {
1091           // R0 += (FPOffset-LastOffset).
1092           // Need addic, since addi treats R0 as 0.
1093           BuildMI(MBB, MBBI, dl, TII.get(PPC::ADDIC), ScratchReg)
1094             .addReg(ScratchReg)
1095             .addImm(FPOffset-LastOffset);
1096           LastOffset = FPOffset;
1097           // Store FP into *R0.
1098           BuildMI(MBB, MBBI, dl, TII.get(PPC::STWX))
1099             .addReg(FPReg, RegState::Kill)  // Save FP.
1100             .addReg(PPC::ZERO)
1101             .addReg(ScratchReg);  // This will be the index (R0 is ok here).
1102         }
1103         if (FI->usesPICBase()) {
1104           // R0 += (PBPOffset-LastOffset).
1105           BuildMI(MBB, MBBI, dl, TII.get(PPC::ADDIC), ScratchReg)
1106             .addReg(ScratchReg)
1107             .addImm(PBPOffset-LastOffset);
1108           LastOffset = PBPOffset;
1109           BuildMI(MBB, MBBI, dl, TII.get(PPC::STWX))
1110             .addReg(PPC::R30, RegState::Kill)  // Save PIC base pointer.
1111             .addReg(PPC::ZERO)
1112             .addReg(ScratchReg);  // This will be the index (R0 is ok here).
1113         }
1114         if (HasBP) {
1115           // R0 += (BPOffset-LastOffset).
1116           BuildMI(MBB, MBBI, dl, TII.get(PPC::ADDIC), ScratchReg)
1117             .addReg(ScratchReg)
1118             .addImm(BPOffset-LastOffset);
1119           LastOffset = BPOffset;
1120           BuildMI(MBB, MBBI, dl, TII.get(PPC::STWX))
1121             .addReg(BPReg, RegState::Kill)  // Save BP.
1122             .addReg(PPC::ZERO)
1123             .addReg(ScratchReg);  // This will be the index (R0 is ok here).
1124           // BP = R0-LastOffset
1125           BuildMI(MBB, MBBI, dl, TII.get(PPC::ADDIC), BPReg)
1126             .addReg(ScratchReg, RegState::Kill)
1127             .addImm(-LastOffset);
1128         }
1129       } else {
1130         // ScratchReg is not R0, so use it as the base register. It is
1131         // already set to the old SP, so we can use the offsets directly.
1132 
1133         // Now that the stack frame has been allocated, save all the necessary
1134         // registers using ScratchReg as the base address.
1135         if (HasFP)
1136           BuildMI(MBB, MBBI, dl, StoreInst)
1137             .addReg(FPReg)
1138             .addImm(FPOffset)
1139             .addReg(ScratchReg);
1140         if (FI->usesPICBase())
1141           BuildMI(MBB, MBBI, dl, StoreInst)
1142             .addReg(PPC::R30)
1143             .addImm(PBPOffset)
1144             .addReg(ScratchReg);
1145         if (HasBP) {
1146           BuildMI(MBB, MBBI, dl, StoreInst)
1147             .addReg(BPReg)
1148             .addImm(BPOffset)
1149             .addReg(ScratchReg);
1150           BuildMI(MBB, MBBI, dl, OrInst, BPReg)
1151             .addReg(ScratchReg, RegState::Kill)
1152             .addReg(ScratchReg);
1153         }
1154       }
1155     } else {
1156       // The frame size is a known 16-bit constant (fitting in the immediate
1157       // field of STWU). To be here we have to be compiling for PPC32.
1158       // Since the SPReg has been decreased by FrameSize, add it back to each
1159       // offset.
1160       if (HasFP)
1161         BuildMI(MBB, MBBI, dl, StoreInst)
1162           .addReg(FPReg)
1163           .addImm(FrameSize + FPOffset)
1164           .addReg(SPReg);
1165       if (FI->usesPICBase())
1166         BuildMI(MBB, MBBI, dl, StoreInst)
1167           .addReg(PPC::R30)
1168           .addImm(FrameSize + PBPOffset)
1169           .addReg(SPReg);
1170       if (HasBP) {
1171         BuildMI(MBB, MBBI, dl, StoreInst)
1172           .addReg(BPReg)
1173           .addImm(FrameSize + BPOffset)
1174           .addReg(SPReg);
1175         BuildMI(MBB, MBBI, dl, TII.get(PPC::ADDI), BPReg)
1176           .addReg(SPReg)
1177           .addImm(FrameSize);
1178       }
1179     }
1180   }
1181 
1182   // Add Call Frame Information for the instructions we generated above.
1183   if (needsCFI) {
1184     unsigned CFIIndex;
1185 
1186     if (HasBP) {
1187       // Define CFA in terms of BP. Do this in preference to using FP/SP,
1188       // because if the stack needed aligning then CFA won't be at a fixed
1189       // offset from FP/SP.
1190       unsigned Reg = MRI->getDwarfRegNum(BPReg, true);
1191       CFIIndex = MF.addFrameInst(
1192           MCCFIInstruction::createDefCfaRegister(nullptr, Reg));
1193     } else {
1194       // Adjust the definition of CFA to account for the change in SP.
1195       assert(NegFrameSize);
1196       CFIIndex = MF.addFrameInst(
1197           MCCFIInstruction::createDefCfaOffset(nullptr, NegFrameSize));
1198     }
1199     BuildMI(MBB, MBBI, dl, TII.get(TargetOpcode::CFI_INSTRUCTION))
1200         .addCFIIndex(CFIIndex);
1201 
1202     if (HasFP) {
1203       // Describe where FP was saved, at a fixed offset from CFA.
1204       unsigned Reg = MRI->getDwarfRegNum(FPReg, true);
1205       CFIIndex = MF.addFrameInst(
1206           MCCFIInstruction::createOffset(nullptr, Reg, FPOffset));
1207       BuildMI(MBB, MBBI, dl, TII.get(TargetOpcode::CFI_INSTRUCTION))
1208           .addCFIIndex(CFIIndex);
1209     }
1210 
1211     if (FI->usesPICBase()) {
1212       // Describe where FP was saved, at a fixed offset from CFA.
1213       unsigned Reg = MRI->getDwarfRegNum(PPC::R30, true);
1214       CFIIndex = MF.addFrameInst(
1215           MCCFIInstruction::createOffset(nullptr, Reg, PBPOffset));
1216       BuildMI(MBB, MBBI, dl, TII.get(TargetOpcode::CFI_INSTRUCTION))
1217           .addCFIIndex(CFIIndex);
1218     }
1219 
1220     if (HasBP) {
1221       // Describe where BP was saved, at a fixed offset from CFA.
1222       unsigned Reg = MRI->getDwarfRegNum(BPReg, true);
1223       CFIIndex = MF.addFrameInst(
1224           MCCFIInstruction::createOffset(nullptr, Reg, BPOffset));
1225       BuildMI(MBB, MBBI, dl, TII.get(TargetOpcode::CFI_INSTRUCTION))
1226           .addCFIIndex(CFIIndex);
1227     }
1228 
1229     if (MustSaveLR) {
1230       // Describe where LR was saved, at a fixed offset from CFA.
1231       unsigned Reg = MRI->getDwarfRegNum(LRReg, true);
1232       CFIIndex = MF.addFrameInst(
1233           MCCFIInstruction::createOffset(nullptr, Reg, LROffset));
1234       BuildMI(MBB, MBBI, dl, TII.get(TargetOpcode::CFI_INSTRUCTION))
1235           .addCFIIndex(CFIIndex);
1236     }
1237   }
1238 
1239   // If there is a frame pointer, copy R1 into R31
1240   if (HasFP) {
1241     BuildMI(MBB, MBBI, dl, OrInst, FPReg)
1242       .addReg(SPReg)
1243       .addReg(SPReg);
1244 
1245     if (!HasBP && needsCFI) {
1246       // Change the definition of CFA from SP+offset to FP+offset, because SP
1247       // will change at every alloca.
1248       unsigned Reg = MRI->getDwarfRegNum(FPReg, true);
1249       unsigned CFIIndex = MF.addFrameInst(
1250           MCCFIInstruction::createDefCfaRegister(nullptr, Reg));
1251 
1252       BuildMI(MBB, MBBI, dl, TII.get(TargetOpcode::CFI_INSTRUCTION))
1253           .addCFIIndex(CFIIndex);
1254     }
1255   }
1256 
1257   if (needsCFI) {
1258     // Describe where callee saved registers were saved, at fixed offsets from
1259     // CFA.
1260     const std::vector<CalleeSavedInfo> &CSI = MFI.getCalleeSavedInfo();
1261     for (unsigned I = 0, E = CSI.size(); I != E; ++I) {
1262       unsigned Reg = CSI[I].getReg();
1263       if (Reg == PPC::LR || Reg == PPC::LR8 || Reg == PPC::RM) continue;
1264 
1265       // This is a bit of a hack: CR2LT, CR2GT, CR2EQ and CR2UN are just
1266       // subregisters of CR2. We just need to emit a move of CR2.
1267       if (PPC::CRBITRCRegClass.contains(Reg))
1268         continue;
1269 
1270       if ((Reg == PPC::X2 || Reg == PPC::R2) && MustSaveTOC)
1271         continue;
1272 
1273       // For SVR4, don't emit a move for the CR spill slot if we haven't
1274       // spilled CRs.
1275       if (isSVR4ABI && (PPC::CR2 <= Reg && Reg <= PPC::CR4)
1276           && !MustSaveCR)
1277         continue;
1278 
1279       // For 64-bit SVR4 when we have spilled CRs, the spill location
1280       // is SP+8, not a frame-relative slot.
1281       if (isSVR4ABI && isPPC64 && (PPC::CR2 <= Reg && Reg <= PPC::CR4)) {
1282         // In the ELFv1 ABI, only CR2 is noted in CFI and stands in for
1283         // the whole CR word.  In the ELFv2 ABI, every CR that was
1284         // actually saved gets its own CFI record.
1285         unsigned CRReg = isELFv2ABI? Reg : (unsigned) PPC::CR2;
1286         unsigned CFIIndex = MF.addFrameInst(MCCFIInstruction::createOffset(
1287             nullptr, MRI->getDwarfRegNum(CRReg, true), CRSaveOffset));
1288         BuildMI(MBB, MBBI, dl, TII.get(TargetOpcode::CFI_INSTRUCTION))
1289             .addCFIIndex(CFIIndex);
1290         continue;
1291       }
1292 
1293       if (CSI[I].isSpilledToReg()) {
1294         unsigned SpilledReg = CSI[I].getDstReg();
1295         unsigned CFIRegister = MF.addFrameInst(MCCFIInstruction::createRegister(
1296             nullptr, MRI->getDwarfRegNum(Reg, true),
1297             MRI->getDwarfRegNum(SpilledReg, true)));
1298         BuildMI(MBB, MBBI, dl, TII.get(TargetOpcode::CFI_INSTRUCTION))
1299           .addCFIIndex(CFIRegister);
1300       } else {
1301         int Offset = MFI.getObjectOffset(CSI[I].getFrameIdx());
1302         // We have changed the object offset above but we do not want to change
1303         // the actual offsets in the CFI instruction so we have to undo the
1304         // offset change here.
1305         if (MovingStackUpdateDown)
1306           Offset -= NegFrameSize;
1307 
1308         unsigned CFIIndex = MF.addFrameInst(MCCFIInstruction::createOffset(
1309             nullptr, MRI->getDwarfRegNum(Reg, true), Offset));
1310         BuildMI(MBB, MBBI, dl, TII.get(TargetOpcode::CFI_INSTRUCTION))
1311             .addCFIIndex(CFIIndex);
1312       }
1313     }
1314   }
1315 }
1316 
1317 void PPCFrameLowering::emitEpilogue(MachineFunction &MF,
1318                                     MachineBasicBlock &MBB) const {
1319   MachineBasicBlock::iterator MBBI = MBB.getFirstTerminator();
1320   DebugLoc dl;
1321 
1322   if (MBBI != MBB.end())
1323     dl = MBBI->getDebugLoc();
1324 
1325   const PPCInstrInfo &TII = *Subtarget.getInstrInfo();
1326   const PPCRegisterInfo *RegInfo = Subtarget.getRegisterInfo();
1327 
1328   // Get alignment info so we know how to restore the SP.
1329   const MachineFrameInfo &MFI = MF.getFrameInfo();
1330 
1331   // Get the number of bytes allocated from the FrameInfo.
1332   int FrameSize = MFI.getStackSize();
1333 
1334   // Get processor type.
1335   bool isPPC64 = Subtarget.isPPC64();
1336   // Get the ABI.
1337   bool isSVR4ABI = Subtarget.isSVR4ABI();
1338 
1339   // Check if the link register (LR) has been saved.
1340   PPCFunctionInfo *FI = MF.getInfo<PPCFunctionInfo>();
1341   bool MustSaveLR = FI->mustSaveLR();
1342   const SmallVectorImpl<unsigned> &MustSaveCRs = FI->getMustSaveCRs();
1343   bool MustSaveCR = !MustSaveCRs.empty();
1344   // Do we have a frame pointer and/or base pointer for this function?
1345   bool HasFP = hasFP(MF);
1346   bool HasBP = RegInfo->hasBasePointer(MF);
1347   bool HasRedZone = Subtarget.isPPC64() || !Subtarget.isSVR4ABI();
1348 
1349   unsigned SPReg      = isPPC64 ? PPC::X1  : PPC::R1;
1350   Register BPReg = RegInfo->getBaseRegister(MF);
1351   unsigned FPReg      = isPPC64 ? PPC::X31 : PPC::R31;
1352   unsigned ScratchReg = 0;
1353   unsigned TempReg     = isPPC64 ? PPC::X12 : PPC::R12; // another scratch reg
1354   const MCInstrDesc& MTLRInst = TII.get( isPPC64 ? PPC::MTLR8
1355                                                  : PPC::MTLR );
1356   const MCInstrDesc& LoadInst = TII.get( isPPC64 ? PPC::LD
1357                                                  : PPC::LWZ );
1358   const MCInstrDesc& LoadImmShiftedInst = TII.get( isPPC64 ? PPC::LIS8
1359                                                            : PPC::LIS );
1360   const MCInstrDesc& OrInst = TII.get(isPPC64 ? PPC::OR8
1361                                               : PPC::OR );
1362   const MCInstrDesc& OrImmInst = TII.get( isPPC64 ? PPC::ORI8
1363                                                   : PPC::ORI );
1364   const MCInstrDesc& AddImmInst = TII.get( isPPC64 ? PPC::ADDI8
1365                                                    : PPC::ADDI );
1366   const MCInstrDesc& AddInst = TII.get( isPPC64 ? PPC::ADD8
1367                                                 : PPC::ADD4 );
1368   const MCInstrDesc& LoadWordInst = TII.get( isPPC64 ? PPC::LWZ8
1369                                                      : PPC::LWZ);
1370   const MCInstrDesc& MoveToCRInst = TII.get( isPPC64 ? PPC::MTOCRF8
1371                                                      : PPC::MTOCRF);
1372   int LROffset = getReturnSaveOffset();
1373 
1374   int FPOffset = 0;
1375 
1376   // Using the same bool variable as below to suppress compiler warnings.
1377   bool SingleScratchReg = findScratchRegister(&MBB, true, false, &ScratchReg,
1378                                               &TempReg);
1379   assert(SingleScratchReg &&
1380          "Could not find an available scratch register");
1381 
1382   SingleScratchReg = ScratchReg == TempReg;
1383 
1384   if (HasFP) {
1385     if (isSVR4ABI) {
1386       int FPIndex = FI->getFramePointerSaveIndex();
1387       assert(FPIndex && "No Frame Pointer Save Slot!");
1388       FPOffset = MFI.getObjectOffset(FPIndex);
1389     } else {
1390       FPOffset = getFramePointerSaveOffset();
1391     }
1392   }
1393 
1394   int BPOffset = 0;
1395   if (HasBP) {
1396     if (isSVR4ABI) {
1397       int BPIndex = FI->getBasePointerSaveIndex();
1398       assert(BPIndex && "No Base Pointer Save Slot!");
1399       BPOffset = MFI.getObjectOffset(BPIndex);
1400     } else {
1401       BPOffset = getBasePointerSaveOffset();
1402     }
1403   }
1404 
1405   int PBPOffset = 0;
1406   if (FI->usesPICBase()) {
1407     int PBPIndex = FI->getPICBasePointerSaveIndex();
1408     assert(PBPIndex && "No PIC Base Pointer Save Slot!");
1409     PBPOffset = MFI.getObjectOffset(PBPIndex);
1410   }
1411 
1412   bool IsReturnBlock = (MBBI != MBB.end() && MBBI->isReturn());
1413 
1414   if (IsReturnBlock) {
1415     unsigned RetOpcode = MBBI->getOpcode();
1416     bool UsesTCRet =  RetOpcode == PPC::TCRETURNri ||
1417                       RetOpcode == PPC::TCRETURNdi ||
1418                       RetOpcode == PPC::TCRETURNai ||
1419                       RetOpcode == PPC::TCRETURNri8 ||
1420                       RetOpcode == PPC::TCRETURNdi8 ||
1421                       RetOpcode == PPC::TCRETURNai8;
1422 
1423     if (UsesTCRet) {
1424       int MaxTCRetDelta = FI->getTailCallSPDelta();
1425       MachineOperand &StackAdjust = MBBI->getOperand(1);
1426       assert(StackAdjust.isImm() && "Expecting immediate value.");
1427       // Adjust stack pointer.
1428       int StackAdj = StackAdjust.getImm();
1429       int Delta = StackAdj - MaxTCRetDelta;
1430       assert((Delta >= 0) && "Delta must be positive");
1431       if (MaxTCRetDelta>0)
1432         FrameSize += (StackAdj +Delta);
1433       else
1434         FrameSize += StackAdj;
1435     }
1436   }
1437 
1438   // Frames of 32KB & larger require special handling because they cannot be
1439   // indexed into with a simple LD/LWZ immediate offset operand.
1440   bool isLargeFrame = !isInt<16>(FrameSize);
1441 
1442   // On targets without red zone, the SP needs to be restored last, so that
1443   // all live contents of the stack frame are upwards of the SP. This means
1444   // that we cannot restore SP just now, since there may be more registers
1445   // to restore from the stack frame (e.g. R31). If the frame size is not
1446   // a simple immediate value, we will need a spare register to hold the
1447   // restored SP. If the frame size is known and small, we can simply adjust
1448   // the offsets of the registers to be restored, and still use SP to restore
1449   // them. In such case, the final update of SP will be to add the frame
1450   // size to it.
1451   // To simplify the code, set RBReg to the base register used to restore
1452   // values from the stack, and set SPAdd to the value that needs to be added
1453   // to the SP at the end. The default values are as if red zone was present.
1454   unsigned RBReg = SPReg;
1455   unsigned SPAdd = 0;
1456 
1457   // Check if we can move the stack update instruction up the epilogue
1458   // past the callee saves. This will allow the move to LR instruction
1459   // to be executed before the restores of the callee saves which means
1460   // that the callee saves can hide the latency from the MTLR instrcution.
1461   MachineBasicBlock::iterator StackUpdateLoc = MBBI;
1462   if (stackUpdateCanBeMoved(MF)) {
1463     const std::vector<CalleeSavedInfo> & Info = MFI.getCalleeSavedInfo();
1464     for (CalleeSavedInfo CSI : Info) {
1465       int FrIdx = CSI.getFrameIdx();
1466       // If the frame index is not negative the callee saved info belongs to a
1467       // stack object that is not a fixed stack object. We ignore non-fixed
1468       // stack objects because we won't move the update of the stack pointer
1469       // past them.
1470       if (FrIdx >= 0)
1471         continue;
1472 
1473       if (MFI.isFixedObjectIndex(FrIdx) && MFI.getObjectOffset(FrIdx) < 0)
1474         StackUpdateLoc--;
1475       else {
1476         // Abort the operation as we can't update all CSR restores.
1477         StackUpdateLoc = MBBI;
1478         break;
1479       }
1480     }
1481   }
1482 
1483   if (FrameSize) {
1484     // In the prologue, the loaded (or persistent) stack pointer value is
1485     // offset by the STDU/STDUX/STWU/STWUX instruction. For targets with red
1486     // zone add this offset back now.
1487 
1488     // If this function contained a fastcc call and GuaranteedTailCallOpt is
1489     // enabled (=> hasFastCall()==true) the fastcc call might contain a tail
1490     // call which invalidates the stack pointer value in SP(0). So we use the
1491     // value of R31 in this case.
1492     if (FI->hasFastCall()) {
1493       assert(HasFP && "Expecting a valid frame pointer.");
1494       if (!HasRedZone)
1495         RBReg = FPReg;
1496       if (!isLargeFrame) {
1497         BuildMI(MBB, MBBI, dl, AddImmInst, RBReg)
1498           .addReg(FPReg).addImm(FrameSize);
1499       } else {
1500         BuildMI(MBB, MBBI, dl, LoadImmShiftedInst, ScratchReg)
1501           .addImm(FrameSize >> 16);
1502         BuildMI(MBB, MBBI, dl, OrImmInst, ScratchReg)
1503           .addReg(ScratchReg, RegState::Kill)
1504           .addImm(FrameSize & 0xFFFF);
1505         BuildMI(MBB, MBBI, dl, AddInst)
1506           .addReg(RBReg)
1507           .addReg(FPReg)
1508           .addReg(ScratchReg);
1509       }
1510     } else if (!isLargeFrame && !HasBP && !MFI.hasVarSizedObjects()) {
1511       if (HasRedZone) {
1512         BuildMI(MBB, StackUpdateLoc, dl, AddImmInst, SPReg)
1513           .addReg(SPReg)
1514           .addImm(FrameSize);
1515       } else {
1516         // Make sure that adding FrameSize will not overflow the max offset
1517         // size.
1518         assert(FPOffset <= 0 && BPOffset <= 0 && PBPOffset <= 0 &&
1519                "Local offsets should be negative");
1520         SPAdd = FrameSize;
1521         FPOffset += FrameSize;
1522         BPOffset += FrameSize;
1523         PBPOffset += FrameSize;
1524       }
1525     } else {
1526       // We don't want to use ScratchReg as a base register, because it
1527       // could happen to be R0. Use FP instead, but make sure to preserve it.
1528       if (!HasRedZone) {
1529         // If FP is not saved, copy it to ScratchReg.
1530         if (!HasFP)
1531           BuildMI(MBB, MBBI, dl, OrInst, ScratchReg)
1532             .addReg(FPReg)
1533             .addReg(FPReg);
1534         RBReg = FPReg;
1535       }
1536       BuildMI(MBB, StackUpdateLoc, dl, LoadInst, RBReg)
1537         .addImm(0)
1538         .addReg(SPReg);
1539     }
1540   }
1541   assert(RBReg != ScratchReg && "Should have avoided ScratchReg");
1542   // If there is no red zone, ScratchReg may be needed for holding a useful
1543   // value (although not the base register). Make sure it is not overwritten
1544   // too early.
1545 
1546   // If we need to restore both the LR and the CR and we only have one
1547   // available scratch register, we must do them one at a time.
1548   if (MustSaveCR && SingleScratchReg && MustSaveLR) {
1549     // Here TempReg == ScratchReg, and in the absence of red zone ScratchReg
1550     // is live here.
1551     assert(HasRedZone && "Expecting red zone");
1552     BuildMI(MBB, MBBI, dl, LoadWordInst, TempReg)
1553       .addImm(CRSaveOffset)
1554       .addReg(SPReg);
1555     for (unsigned i = 0, e = MustSaveCRs.size(); i != e; ++i)
1556       BuildMI(MBB, MBBI, dl, MoveToCRInst, MustSaveCRs[i])
1557         .addReg(TempReg, getKillRegState(i == e-1));
1558   }
1559 
1560   // Delay restoring of the LR if ScratchReg is needed. This is ok, since
1561   // LR is stored in the caller's stack frame. ScratchReg will be needed
1562   // if RBReg is anything other than SP. We shouldn't use ScratchReg as
1563   // a base register anyway, because it may happen to be R0.
1564   bool LoadedLR = false;
1565   if (MustSaveLR && RBReg == SPReg && isInt<16>(LROffset+SPAdd)) {
1566     BuildMI(MBB, StackUpdateLoc, dl, LoadInst, ScratchReg)
1567       .addImm(LROffset+SPAdd)
1568       .addReg(RBReg);
1569     LoadedLR = true;
1570   }
1571 
1572   if (MustSaveCR && !(SingleScratchReg && MustSaveLR)) {
1573     assert(RBReg == SPReg && "Should be using SP as a base register");
1574     BuildMI(MBB, MBBI, dl, LoadWordInst, TempReg)
1575       .addImm(CRSaveOffset)
1576       .addReg(RBReg);
1577   }
1578 
1579   if (HasFP) {
1580     // If there is red zone, restore FP directly, since SP has already been
1581     // restored. Otherwise, restore the value of FP into ScratchReg.
1582     if (HasRedZone || RBReg == SPReg)
1583       BuildMI(MBB, MBBI, dl, LoadInst, FPReg)
1584         .addImm(FPOffset)
1585         .addReg(SPReg);
1586     else
1587       BuildMI(MBB, MBBI, dl, LoadInst, ScratchReg)
1588         .addImm(FPOffset)
1589         .addReg(RBReg);
1590   }
1591 
1592   if (FI->usesPICBase())
1593     BuildMI(MBB, MBBI, dl, LoadInst, PPC::R30)
1594       .addImm(PBPOffset)
1595       .addReg(RBReg);
1596 
1597   if (HasBP)
1598     BuildMI(MBB, MBBI, dl, LoadInst, BPReg)
1599       .addImm(BPOffset)
1600       .addReg(RBReg);
1601 
1602   // There is nothing more to be loaded from the stack, so now we can
1603   // restore SP: SP = RBReg + SPAdd.
1604   if (RBReg != SPReg || SPAdd != 0) {
1605     assert(!HasRedZone && "This should not happen with red zone");
1606     // If SPAdd is 0, generate a copy.
1607     if (SPAdd == 0)
1608       BuildMI(MBB, MBBI, dl, OrInst, SPReg)
1609         .addReg(RBReg)
1610         .addReg(RBReg);
1611     else
1612       BuildMI(MBB, MBBI, dl, AddImmInst, SPReg)
1613         .addReg(RBReg)
1614         .addImm(SPAdd);
1615 
1616     assert(RBReg != ScratchReg && "Should be using FP or SP as base register");
1617     if (RBReg == FPReg)
1618       BuildMI(MBB, MBBI, dl, OrInst, FPReg)
1619         .addReg(ScratchReg)
1620         .addReg(ScratchReg);
1621 
1622     // Now load the LR from the caller's stack frame.
1623     if (MustSaveLR && !LoadedLR)
1624       BuildMI(MBB, MBBI, dl, LoadInst, ScratchReg)
1625         .addImm(LROffset)
1626         .addReg(SPReg);
1627   }
1628 
1629   if (MustSaveCR &&
1630       !(SingleScratchReg && MustSaveLR))
1631     for (unsigned i = 0, e = MustSaveCRs.size(); i != e; ++i)
1632       BuildMI(MBB, MBBI, dl, MoveToCRInst, MustSaveCRs[i])
1633         .addReg(TempReg, getKillRegState(i == e-1));
1634 
1635   if (MustSaveLR)
1636     BuildMI(MBB, StackUpdateLoc, dl, MTLRInst).addReg(ScratchReg);
1637 
1638   // Callee pop calling convention. Pop parameter/linkage area. Used for tail
1639   // call optimization
1640   if (IsReturnBlock) {
1641     unsigned RetOpcode = MBBI->getOpcode();
1642     if (MF.getTarget().Options.GuaranteedTailCallOpt &&
1643         (RetOpcode == PPC::BLR || RetOpcode == PPC::BLR8) &&
1644         MF.getFunction().getCallingConv() == CallingConv::Fast) {
1645       PPCFunctionInfo *FI = MF.getInfo<PPCFunctionInfo>();
1646       unsigned CallerAllocatedAmt = FI->getMinReservedArea();
1647 
1648       if (CallerAllocatedAmt && isInt<16>(CallerAllocatedAmt)) {
1649         BuildMI(MBB, MBBI, dl, AddImmInst, SPReg)
1650           .addReg(SPReg).addImm(CallerAllocatedAmt);
1651       } else {
1652         BuildMI(MBB, MBBI, dl, LoadImmShiftedInst, ScratchReg)
1653           .addImm(CallerAllocatedAmt >> 16);
1654         BuildMI(MBB, MBBI, dl, OrImmInst, ScratchReg)
1655           .addReg(ScratchReg, RegState::Kill)
1656           .addImm(CallerAllocatedAmt & 0xFFFF);
1657         BuildMI(MBB, MBBI, dl, AddInst)
1658           .addReg(SPReg)
1659           .addReg(FPReg)
1660           .addReg(ScratchReg);
1661       }
1662     } else {
1663       createTailCallBranchInstr(MBB);
1664     }
1665   }
1666 }
1667 
1668 void PPCFrameLowering::createTailCallBranchInstr(MachineBasicBlock &MBB) const {
1669   MachineBasicBlock::iterator MBBI = MBB.getFirstTerminator();
1670 
1671   // If we got this far a first terminator should exist.
1672   assert(MBBI != MBB.end() && "Failed to find the first terminator.");
1673 
1674   DebugLoc dl = MBBI->getDebugLoc();
1675   const PPCInstrInfo &TII = *Subtarget.getInstrInfo();
1676 
1677   // Create branch instruction for pseudo tail call return instruction
1678   unsigned RetOpcode = MBBI->getOpcode();
1679   if (RetOpcode == PPC::TCRETURNdi) {
1680     MBBI = MBB.getLastNonDebugInstr();
1681     MachineOperand &JumpTarget = MBBI->getOperand(0);
1682     BuildMI(MBB, MBBI, dl, TII.get(PPC::TAILB)).
1683       addGlobalAddress(JumpTarget.getGlobal(), JumpTarget.getOffset());
1684   } else if (RetOpcode == PPC::TCRETURNri) {
1685     MBBI = MBB.getLastNonDebugInstr();
1686     assert(MBBI->getOperand(0).isReg() && "Expecting register operand.");
1687     BuildMI(MBB, MBBI, dl, TII.get(PPC::TAILBCTR));
1688   } else if (RetOpcode == PPC::TCRETURNai) {
1689     MBBI = MBB.getLastNonDebugInstr();
1690     MachineOperand &JumpTarget = MBBI->getOperand(0);
1691     BuildMI(MBB, MBBI, dl, TII.get(PPC::TAILBA)).addImm(JumpTarget.getImm());
1692   } else if (RetOpcode == PPC::TCRETURNdi8) {
1693     MBBI = MBB.getLastNonDebugInstr();
1694     MachineOperand &JumpTarget = MBBI->getOperand(0);
1695     BuildMI(MBB, MBBI, dl, TII.get(PPC::TAILB8)).
1696       addGlobalAddress(JumpTarget.getGlobal(), JumpTarget.getOffset());
1697   } else if (RetOpcode == PPC::TCRETURNri8) {
1698     MBBI = MBB.getLastNonDebugInstr();
1699     assert(MBBI->getOperand(0).isReg() && "Expecting register operand.");
1700     BuildMI(MBB, MBBI, dl, TII.get(PPC::TAILBCTR8));
1701   } else if (RetOpcode == PPC::TCRETURNai8) {
1702     MBBI = MBB.getLastNonDebugInstr();
1703     MachineOperand &JumpTarget = MBBI->getOperand(0);
1704     BuildMI(MBB, MBBI, dl, TII.get(PPC::TAILBA8)).addImm(JumpTarget.getImm());
1705   }
1706 }
1707 
1708 void PPCFrameLowering::determineCalleeSaves(MachineFunction &MF,
1709                                             BitVector &SavedRegs,
1710                                             RegScavenger *RS) const {
1711   TargetFrameLowering::determineCalleeSaves(MF, SavedRegs, RS);
1712 
1713   const PPCRegisterInfo *RegInfo = Subtarget.getRegisterInfo();
1714 
1715   //  Save and clear the LR state.
1716   PPCFunctionInfo *FI = MF.getInfo<PPCFunctionInfo>();
1717   unsigned LR = RegInfo->getRARegister();
1718   FI->setMustSaveLR(MustSaveLR(MF, LR));
1719   SavedRegs.reset(LR);
1720 
1721   //  Save R31 if necessary
1722   int FPSI = FI->getFramePointerSaveIndex();
1723   const bool isPPC64 = Subtarget.isPPC64();
1724   MachineFrameInfo &MFI = MF.getFrameInfo();
1725 
1726   // If the frame pointer save index hasn't been defined yet.
1727   if (!FPSI && needsFP(MF)) {
1728     // Find out what the fix offset of the frame pointer save area.
1729     int FPOffset = getFramePointerSaveOffset();
1730     // Allocate the frame index for frame pointer save area.
1731     FPSI = MFI.CreateFixedObject(isPPC64? 8 : 4, FPOffset, true);
1732     // Save the result.
1733     FI->setFramePointerSaveIndex(FPSI);
1734   }
1735 
1736   int BPSI = FI->getBasePointerSaveIndex();
1737   if (!BPSI && RegInfo->hasBasePointer(MF)) {
1738     int BPOffset = getBasePointerSaveOffset();
1739     // Allocate the frame index for the base pointer save area.
1740     BPSI = MFI.CreateFixedObject(isPPC64? 8 : 4, BPOffset, true);
1741     // Save the result.
1742     FI->setBasePointerSaveIndex(BPSI);
1743   }
1744 
1745   // Reserve stack space for the PIC Base register (R30).
1746   // Only used in SVR4 32-bit.
1747   if (FI->usesPICBase()) {
1748     int PBPSI = MFI.CreateFixedObject(4, -8, true);
1749     FI->setPICBasePointerSaveIndex(PBPSI);
1750   }
1751 
1752   // Make sure we don't explicitly spill r31, because, for example, we have
1753   // some inline asm which explicitly clobbers it, when we otherwise have a
1754   // frame pointer and are using r31's spill slot for the prologue/epilogue
1755   // code. Same goes for the base pointer and the PIC base register.
1756   if (needsFP(MF))
1757     SavedRegs.reset(isPPC64 ? PPC::X31 : PPC::R31);
1758   if (RegInfo->hasBasePointer(MF))
1759     SavedRegs.reset(RegInfo->getBaseRegister(MF));
1760   if (FI->usesPICBase())
1761     SavedRegs.reset(PPC::R30);
1762 
1763   // Reserve stack space to move the linkage area to in case of a tail call.
1764   int TCSPDelta = 0;
1765   if (MF.getTarget().Options.GuaranteedTailCallOpt &&
1766       (TCSPDelta = FI->getTailCallSPDelta()) < 0) {
1767     MFI.CreateFixedObject(-1 * TCSPDelta, TCSPDelta, true);
1768   }
1769 
1770   // Allocate the nonvolatile CR spill slot iff the function uses CR 2, 3, or 4.
1771   // For 64-bit SVR4, and all flavors of AIX we create a FixedStack
1772   // object at the offset of the CR-save slot in the linkage area. The actual
1773   // save and restore of the condition register will be created as part of the
1774   // prologue and epilogue insertion, but the FixedStack object is needed to
1775   // keep the CalleSavedInfo valid.
1776   if ((SavedRegs.test(PPC::CR2) || SavedRegs.test(PPC::CR3) ||
1777        SavedRegs.test(PPC::CR4))) {
1778     const uint64_t SpillSize = 4; // Condition register is always 4 bytes.
1779     const int64_t SpillOffset =
1780         Subtarget.isPPC64() ? 8 : Subtarget.isAIXABI() ? 4 : -4;
1781     int FrameIdx =
1782         MFI.CreateFixedObject(SpillSize, SpillOffset,
1783                               /* IsImmutable */ true, /* IsAliased */ false);
1784     FI->setCRSpillFrameIndex(FrameIdx);
1785   }
1786 }
1787 
1788 void PPCFrameLowering::processFunctionBeforeFrameFinalized(MachineFunction &MF,
1789                                                        RegScavenger *RS) const {
1790   // Early exit if not using the SVR4 ABI.
1791   if (!Subtarget.isSVR4ABI()) {
1792     addScavengingSpillSlot(MF, RS);
1793     return;
1794   }
1795 
1796   // Get callee saved register information.
1797   MachineFrameInfo &MFI = MF.getFrameInfo();
1798   const std::vector<CalleeSavedInfo> &CSI = MFI.getCalleeSavedInfo();
1799 
1800   // If the function is shrink-wrapped, and if the function has a tail call, the
1801   // tail call might not be in the new RestoreBlock, so real branch instruction
1802   // won't be generated by emitEpilogue(), because shrink-wrap has chosen new
1803   // RestoreBlock. So we handle this case here.
1804   if (MFI.getSavePoint() && MFI.hasTailCall()) {
1805     MachineBasicBlock *RestoreBlock = MFI.getRestorePoint();
1806     for (MachineBasicBlock &MBB : MF) {
1807       if (MBB.isReturnBlock() && (&MBB) != RestoreBlock)
1808         createTailCallBranchInstr(MBB);
1809     }
1810   }
1811 
1812   // Early exit if no callee saved registers are modified!
1813   if (CSI.empty() && !needsFP(MF)) {
1814     addScavengingSpillSlot(MF, RS);
1815     return;
1816   }
1817 
1818   unsigned MinGPR = PPC::R31;
1819   unsigned MinG8R = PPC::X31;
1820   unsigned MinFPR = PPC::F31;
1821   unsigned MinVR = Subtarget.hasSPE() ? PPC::S31 : PPC::V31;
1822 
1823   bool HasGPSaveArea = false;
1824   bool HasG8SaveArea = false;
1825   bool HasFPSaveArea = false;
1826   bool HasVRSAVESaveArea = false;
1827   bool HasVRSaveArea = false;
1828 
1829   SmallVector<CalleeSavedInfo, 18> GPRegs;
1830   SmallVector<CalleeSavedInfo, 18> G8Regs;
1831   SmallVector<CalleeSavedInfo, 18> FPRegs;
1832   SmallVector<CalleeSavedInfo, 18> VRegs;
1833 
1834   for (unsigned i = 0, e = CSI.size(); i != e; ++i) {
1835     unsigned Reg = CSI[i].getReg();
1836     assert((!MF.getInfo<PPCFunctionInfo>()->mustSaveTOC() ||
1837             (Reg != PPC::X2 && Reg != PPC::R2)) &&
1838            "Not expecting to try to spill R2 in a function that must save TOC");
1839     if (PPC::GPRCRegClass.contains(Reg)) {
1840       HasGPSaveArea = true;
1841 
1842       GPRegs.push_back(CSI[i]);
1843 
1844       if (Reg < MinGPR) {
1845         MinGPR = Reg;
1846       }
1847     } else if (PPC::G8RCRegClass.contains(Reg)) {
1848       HasG8SaveArea = true;
1849 
1850       G8Regs.push_back(CSI[i]);
1851 
1852       if (Reg < MinG8R) {
1853         MinG8R = Reg;
1854       }
1855     } else if (PPC::F8RCRegClass.contains(Reg)) {
1856       HasFPSaveArea = true;
1857 
1858       FPRegs.push_back(CSI[i]);
1859 
1860       if (Reg < MinFPR) {
1861         MinFPR = Reg;
1862       }
1863     } else if (PPC::CRBITRCRegClass.contains(Reg) ||
1864                PPC::CRRCRegClass.contains(Reg)) {
1865       ; // do nothing, as we already know whether CRs are spilled
1866     } else if (PPC::VRSAVERCRegClass.contains(Reg)) {
1867       HasVRSAVESaveArea = true;
1868     } else if (PPC::VRRCRegClass.contains(Reg) ||
1869                PPC::SPERCRegClass.contains(Reg)) {
1870       // Altivec and SPE are mutually exclusive, but have the same stack
1871       // alignment requirements, so overload the save area for both cases.
1872       HasVRSaveArea = true;
1873 
1874       VRegs.push_back(CSI[i]);
1875 
1876       if (Reg < MinVR) {
1877         MinVR = Reg;
1878       }
1879     } else {
1880       llvm_unreachable("Unknown RegisterClass!");
1881     }
1882   }
1883 
1884   PPCFunctionInfo *PFI = MF.getInfo<PPCFunctionInfo>();
1885   const TargetRegisterInfo *TRI = Subtarget.getRegisterInfo();
1886 
1887   int64_t LowerBound = 0;
1888 
1889   // Take into account stack space reserved for tail calls.
1890   int TCSPDelta = 0;
1891   if (MF.getTarget().Options.GuaranteedTailCallOpt &&
1892       (TCSPDelta = PFI->getTailCallSPDelta()) < 0) {
1893     LowerBound = TCSPDelta;
1894   }
1895 
1896   // The Floating-point register save area is right below the back chain word
1897   // of the previous stack frame.
1898   if (HasFPSaveArea) {
1899     for (unsigned i = 0, e = FPRegs.size(); i != e; ++i) {
1900       int FI = FPRegs[i].getFrameIdx();
1901 
1902       MFI.setObjectOffset(FI, LowerBound + MFI.getObjectOffset(FI));
1903     }
1904 
1905     LowerBound -= (31 - TRI->getEncodingValue(MinFPR) + 1) * 8;
1906   }
1907 
1908   // Check whether the frame pointer register is allocated. If so, make sure it
1909   // is spilled to the correct offset.
1910   if (needsFP(MF)) {
1911     int FI = PFI->getFramePointerSaveIndex();
1912     assert(FI && "No Frame Pointer Save Slot!");
1913     MFI.setObjectOffset(FI, LowerBound + MFI.getObjectOffset(FI));
1914     // FP is R31/X31, so no need to update MinGPR/MinG8R.
1915     HasGPSaveArea = true;
1916   }
1917 
1918   if (PFI->usesPICBase()) {
1919     int FI = PFI->getPICBasePointerSaveIndex();
1920     assert(FI && "No PIC Base Pointer Save Slot!");
1921     MFI.setObjectOffset(FI, LowerBound + MFI.getObjectOffset(FI));
1922 
1923     MinGPR = std::min<unsigned>(MinGPR, PPC::R30);
1924     HasGPSaveArea = true;
1925   }
1926 
1927   const PPCRegisterInfo *RegInfo = Subtarget.getRegisterInfo();
1928   if (RegInfo->hasBasePointer(MF)) {
1929     int FI = PFI->getBasePointerSaveIndex();
1930     assert(FI && "No Base Pointer Save Slot!");
1931     MFI.setObjectOffset(FI, LowerBound + MFI.getObjectOffset(FI));
1932 
1933     Register BP = RegInfo->getBaseRegister(MF);
1934     if (PPC::G8RCRegClass.contains(BP)) {
1935       MinG8R = std::min<unsigned>(MinG8R, BP);
1936       HasG8SaveArea = true;
1937     } else if (PPC::GPRCRegClass.contains(BP)) {
1938       MinGPR = std::min<unsigned>(MinGPR, BP);
1939       HasGPSaveArea = true;
1940     }
1941   }
1942 
1943   // General register save area starts right below the Floating-point
1944   // register save area.
1945   if (HasGPSaveArea || HasG8SaveArea) {
1946     // Move general register save area spill slots down, taking into account
1947     // the size of the Floating-point register save area.
1948     for (unsigned i = 0, e = GPRegs.size(); i != e; ++i) {
1949       if (!GPRegs[i].isSpilledToReg()) {
1950         int FI = GPRegs[i].getFrameIdx();
1951         MFI.setObjectOffset(FI, LowerBound + MFI.getObjectOffset(FI));
1952       }
1953     }
1954 
1955     // Move general register save area spill slots down, taking into account
1956     // the size of the Floating-point register save area.
1957     for (unsigned i = 0, e = G8Regs.size(); i != e; ++i) {
1958       if (!G8Regs[i].isSpilledToReg()) {
1959         int FI = G8Regs[i].getFrameIdx();
1960         MFI.setObjectOffset(FI, LowerBound + MFI.getObjectOffset(FI));
1961       }
1962     }
1963 
1964     unsigned MinReg =
1965       std::min<unsigned>(TRI->getEncodingValue(MinGPR),
1966                          TRI->getEncodingValue(MinG8R));
1967 
1968     if (Subtarget.isPPC64()) {
1969       LowerBound -= (31 - MinReg + 1) * 8;
1970     } else {
1971       LowerBound -= (31 - MinReg + 1) * 4;
1972     }
1973   }
1974 
1975   // For 32-bit only, the CR save area is below the general register
1976   // save area.  For 64-bit SVR4, the CR save area is addressed relative
1977   // to the stack pointer and hence does not need an adjustment here.
1978   // Only CR2 (the first nonvolatile spilled) has an associated frame
1979   // index so that we have a single uniform save area.
1980   if (spillsCR(MF) && Subtarget.is32BitELFABI()) {
1981     // Adjust the frame index of the CR spill slot.
1982     for (const auto &CSInfo : CSI) {
1983       if (CSInfo.getReg() == PPC::CR2) {
1984         int FI = CSInfo.getFrameIdx();
1985         MFI.setObjectOffset(FI, LowerBound + MFI.getObjectOffset(FI));
1986         break;
1987       }
1988     }
1989 
1990     LowerBound -= 4; // The CR save area is always 4 bytes long.
1991   }
1992 
1993   if (HasVRSAVESaveArea) {
1994     // FIXME SVR4: Is it actually possible to have multiple elements in CSI
1995     //             which have the VRSAVE register class?
1996     // Adjust the frame index of the VRSAVE spill slot.
1997     for (unsigned i = 0, e = CSI.size(); i != e; ++i) {
1998       unsigned Reg = CSI[i].getReg();
1999 
2000       if (PPC::VRSAVERCRegClass.contains(Reg)) {
2001         int FI = CSI[i].getFrameIdx();
2002 
2003         MFI.setObjectOffset(FI, LowerBound + MFI.getObjectOffset(FI));
2004       }
2005     }
2006 
2007     LowerBound -= 4; // The VRSAVE save area is always 4 bytes long.
2008   }
2009 
2010   // Both Altivec and SPE have the same alignment and padding requirements
2011   // within the stack frame.
2012   if (HasVRSaveArea) {
2013     // Insert alignment padding, we need 16-byte alignment. Note: for positive
2014     // number the alignment formula is : y = (x + (n-1)) & (~(n-1)). But since
2015     // we are using negative number here (the stack grows downward). We should
2016     // use formula : y = x & (~(n-1)). Where x is the size before aligning, n
2017     // is the alignment size ( n = 16 here) and y is the size after aligning.
2018     assert(LowerBound <= 0 && "Expect LowerBound have a non-positive value!");
2019     LowerBound &= ~(15);
2020 
2021     for (unsigned i = 0, e = VRegs.size(); i != e; ++i) {
2022       int FI = VRegs[i].getFrameIdx();
2023 
2024       MFI.setObjectOffset(FI, LowerBound + MFI.getObjectOffset(FI));
2025     }
2026   }
2027 
2028   addScavengingSpillSlot(MF, RS);
2029 }
2030 
2031 void
2032 PPCFrameLowering::addScavengingSpillSlot(MachineFunction &MF,
2033                                          RegScavenger *RS) const {
2034   // Reserve a slot closest to SP or frame pointer if we have a dynalloc or
2035   // a large stack, which will require scavenging a register to materialize a
2036   // large offset.
2037 
2038   // We need to have a scavenger spill slot for spills if the frame size is
2039   // large. In case there is no free register for large-offset addressing,
2040   // this slot is used for the necessary emergency spill. Also, we need the
2041   // slot for dynamic stack allocations.
2042 
2043   // The scavenger might be invoked if the frame offset does not fit into
2044   // the 16-bit immediate. We don't know the complete frame size here
2045   // because we've not yet computed callee-saved register spills or the
2046   // needed alignment padding.
2047   unsigned StackSize = determineFrameLayout(MF, true);
2048   MachineFrameInfo &MFI = MF.getFrameInfo();
2049   if (MFI.hasVarSizedObjects() || spillsCR(MF) || spillsVRSAVE(MF) ||
2050       hasNonRISpills(MF) || (hasSpills(MF) && !isInt<16>(StackSize))) {
2051     const TargetRegisterClass &GPRC = PPC::GPRCRegClass;
2052     const TargetRegisterClass &G8RC = PPC::G8RCRegClass;
2053     const TargetRegisterClass &RC = Subtarget.isPPC64() ? G8RC : GPRC;
2054     const TargetRegisterInfo &TRI = *Subtarget.getRegisterInfo();
2055     unsigned Size = TRI.getSpillSize(RC);
2056     unsigned Align = TRI.getSpillAlignment(RC);
2057     RS->addScavengingFrameIndex(MFI.CreateStackObject(Size, Align, false));
2058 
2059     // Might we have over-aligned allocas?
2060     bool HasAlVars =
2061         MFI.hasVarSizedObjects() && MFI.getMaxAlign() > getStackAlign();
2062 
2063     // These kinds of spills might need two registers.
2064     if (spillsCR(MF) || spillsVRSAVE(MF) || HasAlVars)
2065       RS->addScavengingFrameIndex(MFI.CreateStackObject(Size, Align, false));
2066 
2067   }
2068 }
2069 
2070 // This function checks if a callee saved gpr can be spilled to a volatile
2071 // vector register. This occurs for leaf functions when the option
2072 // ppc-enable-pe-vector-spills is enabled. If there are any remaining registers
2073 // which were not spilled to vectors, return false so the target independent
2074 // code can handle them by assigning a FrameIdx to a stack slot.
2075 bool PPCFrameLowering::assignCalleeSavedSpillSlots(
2076     MachineFunction &MF, const TargetRegisterInfo *TRI,
2077     std::vector<CalleeSavedInfo> &CSI) const {
2078 
2079   if (CSI.empty())
2080     return true; // Early exit if no callee saved registers are modified!
2081 
2082   // Early exit if cannot spill gprs to volatile vector registers.
2083   MachineFrameInfo &MFI = MF.getFrameInfo();
2084   if (!EnablePEVectorSpills || MFI.hasCalls() || !Subtarget.hasP9Vector())
2085     return false;
2086 
2087   // Build a BitVector of VSRs that can be used for spilling GPRs.
2088   BitVector BVAllocatable = TRI->getAllocatableSet(MF);
2089   BitVector BVCalleeSaved(TRI->getNumRegs());
2090   const PPCRegisterInfo *RegInfo = Subtarget.getRegisterInfo();
2091   const MCPhysReg *CSRegs = RegInfo->getCalleeSavedRegs(&MF);
2092   for (unsigned i = 0; CSRegs[i]; ++i)
2093     BVCalleeSaved.set(CSRegs[i]);
2094 
2095   for (unsigned Reg : BVAllocatable.set_bits()) {
2096     // Set to 0 if the register is not a volatile VF/F8 register, or if it is
2097     // used in the function.
2098     if (BVCalleeSaved[Reg] ||
2099         (!PPC::F8RCRegClass.contains(Reg) &&
2100          !PPC::VFRCRegClass.contains(Reg)) ||
2101         (MF.getRegInfo().isPhysRegUsed(Reg)))
2102       BVAllocatable.reset(Reg);
2103   }
2104 
2105   bool AllSpilledToReg = true;
2106   for (auto &CS : CSI) {
2107     if (BVAllocatable.none())
2108       return false;
2109 
2110     unsigned Reg = CS.getReg();
2111     if (!PPC::G8RCRegClass.contains(Reg) && !PPC::GPRCRegClass.contains(Reg)) {
2112       AllSpilledToReg = false;
2113       continue;
2114     }
2115 
2116     unsigned VolatileVFReg = BVAllocatable.find_first();
2117     if (VolatileVFReg < BVAllocatable.size()) {
2118       CS.setDstReg(VolatileVFReg);
2119       BVAllocatable.reset(VolatileVFReg);
2120     } else {
2121       AllSpilledToReg = false;
2122     }
2123   }
2124   return AllSpilledToReg;
2125 }
2126 
2127 bool PPCFrameLowering::spillCalleeSavedRegisters(
2128     MachineBasicBlock &MBB, MachineBasicBlock::iterator MI,
2129     ArrayRef<CalleeSavedInfo> CSI, const TargetRegisterInfo *TRI) const {
2130 
2131   MachineFunction *MF = MBB.getParent();
2132   const PPCInstrInfo &TII = *Subtarget.getInstrInfo();
2133   PPCFunctionInfo *FI = MF->getInfo<PPCFunctionInfo>();
2134   bool MustSaveTOC = FI->mustSaveTOC();
2135   DebugLoc DL;
2136   bool CRSpilled = false;
2137   MachineInstrBuilder CRMIB;
2138 
2139   for (unsigned i = 0, e = CSI.size(); i != e; ++i) {
2140     unsigned Reg = CSI[i].getReg();
2141     // VRSAVE can appear here if, for example, @llvm.eh.unwind.init() is used.
2142     if (Reg == PPC::VRSAVE)
2143       continue;
2144 
2145     // CR2 through CR4 are the nonvolatile CR fields.
2146     bool IsCRField = PPC::CR2 <= Reg && Reg <= PPC::CR4;
2147 
2148     // Add the callee-saved register as live-in; it's killed at the spill.
2149     // Do not do this for callee-saved registers that are live-in to the
2150     // function because they will already be marked live-in and this will be
2151     // adding it for a second time. It is an error to add the same register
2152     // to the set more than once.
2153     const MachineRegisterInfo &MRI = MF->getRegInfo();
2154     bool IsLiveIn = MRI.isLiveIn(Reg);
2155     if (!IsLiveIn)
2156        MBB.addLiveIn(Reg);
2157 
2158     if (CRSpilled && IsCRField) {
2159       CRMIB.addReg(Reg, RegState::ImplicitKill);
2160       continue;
2161     }
2162 
2163     // The actual spill will happen in the prologue.
2164     if ((Reg == PPC::X2 || Reg == PPC::R2) && MustSaveTOC)
2165       continue;
2166 
2167     // Insert the spill to the stack frame.
2168     if (IsCRField) {
2169       PPCFunctionInfo *FuncInfo = MF->getInfo<PPCFunctionInfo>();
2170       if (!Subtarget.is32BitELFABI()) {
2171         // The actual spill will happen at the start of the prologue.
2172         FuncInfo->addMustSaveCR(Reg);
2173       } else {
2174         CRSpilled = true;
2175         FuncInfo->setSpillsCR();
2176 
2177         // 32-bit:  FP-relative.  Note that we made sure CR2-CR4 all have
2178         // the same frame index in PPCRegisterInfo::hasReservedSpillSlot.
2179         CRMIB = BuildMI(*MF, DL, TII.get(PPC::MFCR), PPC::R12)
2180                   .addReg(Reg, RegState::ImplicitKill);
2181 
2182         MBB.insert(MI, CRMIB);
2183         MBB.insert(MI, addFrameReference(BuildMI(*MF, DL, TII.get(PPC::STW))
2184                                          .addReg(PPC::R12,
2185                                                  getKillRegState(true)),
2186                                          CSI[i].getFrameIdx()));
2187       }
2188     } else {
2189       if (CSI[i].isSpilledToReg()) {
2190         NumPESpillVSR++;
2191         BuildMI(MBB, MI, DL, TII.get(PPC::MTVSRD), CSI[i].getDstReg())
2192           .addReg(Reg, getKillRegState(true));
2193       } else {
2194         const TargetRegisterClass *RC = TRI->getMinimalPhysRegClass(Reg);
2195         // Use !IsLiveIn for the kill flag.
2196         // We do not want to kill registers that are live in this function
2197         // before their use because they will become undefined registers.
2198         // Functions without NoUnwind need to preserve the order of elements in
2199         // saved vector registers.
2200         if (Subtarget.needsSwapsForVSXMemOps() &&
2201             !MF->getFunction().hasFnAttribute(Attribute::NoUnwind))
2202           TII.storeRegToStackSlotNoUpd(MBB, MI, Reg, !IsLiveIn,
2203                                        CSI[i].getFrameIdx(), RC, TRI);
2204         else
2205           TII.storeRegToStackSlot(MBB, MI, Reg, !IsLiveIn, CSI[i].getFrameIdx(),
2206                                   RC, TRI);
2207       }
2208     }
2209   }
2210   return true;
2211 }
2212 
2213 static void restoreCRs(bool is31, bool CR2Spilled, bool CR3Spilled,
2214                        bool CR4Spilled, MachineBasicBlock &MBB,
2215                        MachineBasicBlock::iterator MI,
2216                        ArrayRef<CalleeSavedInfo> CSI, unsigned CSIIndex) {
2217 
2218   MachineFunction *MF = MBB.getParent();
2219   const PPCInstrInfo &TII = *MF->getSubtarget<PPCSubtarget>().getInstrInfo();
2220   DebugLoc DL;
2221   unsigned MoveReg = PPC::R12;
2222 
2223   // 32-bit:  FP-relative
2224   MBB.insert(MI,
2225              addFrameReference(BuildMI(*MF, DL, TII.get(PPC::LWZ), MoveReg),
2226                                CSI[CSIIndex].getFrameIdx()));
2227 
2228   unsigned RestoreOp = PPC::MTOCRF;
2229   if (CR2Spilled)
2230     MBB.insert(MI, BuildMI(*MF, DL, TII.get(RestoreOp), PPC::CR2)
2231                .addReg(MoveReg, getKillRegState(!CR3Spilled && !CR4Spilled)));
2232 
2233   if (CR3Spilled)
2234     MBB.insert(MI, BuildMI(*MF, DL, TII.get(RestoreOp), PPC::CR3)
2235                .addReg(MoveReg, getKillRegState(!CR4Spilled)));
2236 
2237   if (CR4Spilled)
2238     MBB.insert(MI, BuildMI(*MF, DL, TII.get(RestoreOp), PPC::CR4)
2239                .addReg(MoveReg, getKillRegState(true)));
2240 }
2241 
2242 MachineBasicBlock::iterator PPCFrameLowering::
2243 eliminateCallFramePseudoInstr(MachineFunction &MF, MachineBasicBlock &MBB,
2244                               MachineBasicBlock::iterator I) const {
2245   const TargetInstrInfo &TII = *Subtarget.getInstrInfo();
2246   if (MF.getTarget().Options.GuaranteedTailCallOpt &&
2247       I->getOpcode() == PPC::ADJCALLSTACKUP) {
2248     // Add (actually subtract) back the amount the callee popped on return.
2249     if (int CalleeAmt =  I->getOperand(1).getImm()) {
2250       bool is64Bit = Subtarget.isPPC64();
2251       CalleeAmt *= -1;
2252       unsigned StackReg = is64Bit ? PPC::X1 : PPC::R1;
2253       unsigned TmpReg = is64Bit ? PPC::X0 : PPC::R0;
2254       unsigned ADDIInstr = is64Bit ? PPC::ADDI8 : PPC::ADDI;
2255       unsigned ADDInstr = is64Bit ? PPC::ADD8 : PPC::ADD4;
2256       unsigned LISInstr = is64Bit ? PPC::LIS8 : PPC::LIS;
2257       unsigned ORIInstr = is64Bit ? PPC::ORI8 : PPC::ORI;
2258       const DebugLoc &dl = I->getDebugLoc();
2259 
2260       if (isInt<16>(CalleeAmt)) {
2261         BuildMI(MBB, I, dl, TII.get(ADDIInstr), StackReg)
2262           .addReg(StackReg, RegState::Kill)
2263           .addImm(CalleeAmt);
2264       } else {
2265         MachineBasicBlock::iterator MBBI = I;
2266         BuildMI(MBB, MBBI, dl, TII.get(LISInstr), TmpReg)
2267           .addImm(CalleeAmt >> 16);
2268         BuildMI(MBB, MBBI, dl, TII.get(ORIInstr), TmpReg)
2269           .addReg(TmpReg, RegState::Kill)
2270           .addImm(CalleeAmt & 0xFFFF);
2271         BuildMI(MBB, MBBI, dl, TII.get(ADDInstr), StackReg)
2272           .addReg(StackReg, RegState::Kill)
2273           .addReg(TmpReg);
2274       }
2275     }
2276   }
2277   // Simply discard ADJCALLSTACKDOWN, ADJCALLSTACKUP instructions.
2278   return MBB.erase(I);
2279 }
2280 
2281 static bool isCalleeSavedCR(unsigned Reg) {
2282   return PPC::CR2 == Reg || Reg == PPC::CR3 || Reg == PPC::CR4;
2283 }
2284 
2285 bool PPCFrameLowering::restoreCalleeSavedRegisters(
2286     MachineBasicBlock &MBB, MachineBasicBlock::iterator MI,
2287     MutableArrayRef<CalleeSavedInfo> CSI, const TargetRegisterInfo *TRI) const {
2288   MachineFunction *MF = MBB.getParent();
2289   const PPCInstrInfo &TII = *Subtarget.getInstrInfo();
2290   PPCFunctionInfo *FI = MF->getInfo<PPCFunctionInfo>();
2291   bool MustSaveTOC = FI->mustSaveTOC();
2292   bool CR2Spilled = false;
2293   bool CR3Spilled = false;
2294   bool CR4Spilled = false;
2295   unsigned CSIIndex = 0;
2296 
2297   // Initialize insertion-point logic; we will be restoring in reverse
2298   // order of spill.
2299   MachineBasicBlock::iterator I = MI, BeforeI = I;
2300   bool AtStart = I == MBB.begin();
2301 
2302   if (!AtStart)
2303     --BeforeI;
2304 
2305   for (unsigned i = 0, e = CSI.size(); i != e; ++i) {
2306     unsigned Reg = CSI[i].getReg();
2307 
2308     // VRSAVE can appear here if, for example, @llvm.eh.unwind.init() is used.
2309     if (Reg == PPC::VRSAVE)
2310       continue;
2311 
2312     if ((Reg == PPC::X2 || Reg == PPC::R2) && MustSaveTOC)
2313       continue;
2314 
2315     // Restore of callee saved condition register field is handled during
2316     // epilogue insertion.
2317     if (isCalleeSavedCR(Reg) && !Subtarget.is32BitELFABI())
2318       continue;
2319 
2320     if (Reg == PPC::CR2) {
2321       CR2Spilled = true;
2322       // The spill slot is associated only with CR2, which is the
2323       // first nonvolatile spilled.  Save it here.
2324       CSIIndex = i;
2325       continue;
2326     } else if (Reg == PPC::CR3) {
2327       CR3Spilled = true;
2328       continue;
2329     } else if (Reg == PPC::CR4) {
2330       CR4Spilled = true;
2331       continue;
2332     } else {
2333       // On 32-bit ELF when we first encounter a non-CR register after seeing at
2334       // least one CR register, restore all spilled CRs together.
2335       if (CR2Spilled || CR3Spilled || CR4Spilled) {
2336         bool is31 = needsFP(*MF);
2337         restoreCRs(is31, CR2Spilled, CR3Spilled, CR4Spilled, MBB, I, CSI,
2338                    CSIIndex);
2339         CR2Spilled = CR3Spilled = CR4Spilled = false;
2340       }
2341 
2342       if (CSI[i].isSpilledToReg()) {
2343         DebugLoc DL;
2344         NumPEReloadVSR++;
2345         BuildMI(MBB, I, DL, TII.get(PPC::MFVSRD), Reg)
2346             .addReg(CSI[i].getDstReg(), getKillRegState(true));
2347       } else {
2348        // Default behavior for non-CR saves.
2349         const TargetRegisterClass *RC = TRI->getMinimalPhysRegClass(Reg);
2350 
2351         // Functions without NoUnwind need to preserve the order of elements in
2352         // saved vector registers.
2353         if (Subtarget.needsSwapsForVSXMemOps() &&
2354             !MF->getFunction().hasFnAttribute(Attribute::NoUnwind))
2355           TII.loadRegFromStackSlotNoUpd(MBB, I, Reg, CSI[i].getFrameIdx(), RC,
2356                                         TRI);
2357         else
2358           TII.loadRegFromStackSlot(MBB, I, Reg, CSI[i].getFrameIdx(), RC, TRI);
2359 
2360         assert(I != MBB.begin() &&
2361                "loadRegFromStackSlot didn't insert any code!");
2362       }
2363     }
2364 
2365     // Insert in reverse order.
2366     if (AtStart)
2367       I = MBB.begin();
2368     else {
2369       I = BeforeI;
2370       ++I;
2371     }
2372   }
2373 
2374   // If we haven't yet spilled the CRs, do so now.
2375   if (CR2Spilled || CR3Spilled || CR4Spilled) {
2376     assert(Subtarget.is32BitELFABI() &&
2377            "Only set CR[2|3|4]Spilled on 32-bit SVR4.");
2378     bool is31 = needsFP(*MF);
2379     restoreCRs(is31, CR2Spilled, CR3Spilled, CR4Spilled, MBB, I, CSI, CSIIndex);
2380   }
2381 
2382   return true;
2383 }
2384 
2385 unsigned PPCFrameLowering::getTOCSaveOffset() const {
2386   return TOCSaveOffset;
2387 }
2388 
2389 unsigned PPCFrameLowering::getFramePointerSaveOffset() const {
2390   return FramePointerSaveOffset;
2391 }
2392 
2393 unsigned PPCFrameLowering::getBasePointerSaveOffset() const {
2394   if (Subtarget.isAIXABI())
2395     report_fatal_error("BasePointer is not implemented on AIX yet.");
2396   return BasePointerSaveOffset;
2397 }
2398 
2399 bool PPCFrameLowering::enableShrinkWrapping(const MachineFunction &MF) const {
2400   if (MF.getInfo<PPCFunctionInfo>()->shrinkWrapDisabled())
2401     return false;
2402   return (MF.getSubtarget<PPCSubtarget>().isSVR4ABI() &&
2403           MF.getSubtarget<PPCSubtarget>().isPPC64());
2404 }
2405